commit c8b3c3c44f8820dcdc1703ef2685b7ea17b089ae
Author: Soma Nakamura
Date: Thu Jul 24 02:24:10 2025 +0900
initial
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ed768f3
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+/target/
+Cargo.lock
\ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..6d05c54
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,457 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "bindgen"
+version = "0.69.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
+dependencies = [
+ "bitflags",
+ "cexpr",
+ "clang-sys",
+ "itertools",
+ "lazy_static",
+ "lazycell",
+ "log",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash",
+ "shlex",
+ "syn",
+ "which",
+]
+
+[[package]]
+name = "bitflags"
+version = "2.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
+
+[[package]]
+name = "cc"
+version = "1.2.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7"
+dependencies = [
+ "shlex",
+]
+
+[[package]]
+name = "cexpr"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
+dependencies = [
+ "nom",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"
+
+[[package]]
+name = "clang-sys"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
+dependencies = [
+ "glob",
+ "libc",
+ "libloading",
+]
+
+[[package]]
+name = "either"
+version = "1.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
+
+[[package]]
+name = "errno"
+version = "0.3.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad"
+dependencies = [
+ "libc",
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "glob"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
+
+[[package]]
+name = "home"
+version = "0.5.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf"
+dependencies = [
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "itertools"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "lazycell"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
+
+[[package]]
+name = "libc"
+version = "0.2.174"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
+
+[[package]]
+name = "libloading"
+version = "0.8.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667"
+dependencies = [
+ "cfg-if",
+ "windows-targets 0.53.2",
+]
+
+[[package]]
+name = "linux-raw-sys"
+version = "0.4.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
+
+[[package]]
+name = "log"
+version = "0.4.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
+
+[[package]]
+name = "marisa-rs"
+version = "0.1.0"
+dependencies = [
+ "bindgen",
+ "cc",
+ "libc",
+ "pkg-config",
+]
+
+[[package]]
+name = "memchr"
+version = "2.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
+
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
+[[package]]
+name = "nom"
+version = "7.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "pkg-config"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
+
+[[package]]
+name = "prettyplease"
+version = "0.2.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "061c1221631e079b26479d25bbf2275bfe5917ae8419cd7e34f13bfc2aa7539a"
+dependencies = [
+ "proc-macro2",
+ "syn",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.95"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "regex"
+version = "1.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
+
+[[package]]
+name = "rustc-hash"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
+
+[[package]]
+name = "rustix"
+version = "0.38.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
+dependencies = [
+ "bitflags",
+ "errno",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
+[[package]]
+name = "syn"
+version = "2.0.104"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
+
+[[package]]
+name = "which"
+version = "4.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
+dependencies = [
+ "either",
+ "home",
+ "once_cell",
+ "rustix",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets 0.53.2",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
+ "windows_i686_gnullvm 0.52.6",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.53.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef"
+dependencies = [
+ "windows_aarch64_gnullvm 0.53.0",
+ "windows_aarch64_msvc 0.53.0",
+ "windows_i686_gnu 0.53.0",
+ "windows_i686_gnullvm 0.53.0",
+ "windows_i686_msvc 0.53.0",
+ "windows_x86_64_gnu 0.53.0",
+ "windows_x86_64_gnullvm 0.53.0",
+ "windows_x86_64_msvc 0.53.0",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..f166ced
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,22 @@
+[package]
+name = "marisa-rs"
+version = "0.1.0"
+edition = "2021"
+description = "Safe Rust wrapper for the marisa-trie C++ library - a static memory-efficient trie data structure. Requires marisa-trie system library."
+license = "MIT OR Apache-2.0"
+readme = "README.md"
+homepage = "https://crates.io/crates/marisa-rs"
+keywords = ["trie", "string", "search", "marisa"]
+categories = ["data-structures", "text-processing"]
+
+[lib]
+name = "marisa_rs"
+crate-type = ["cdylib", "rlib"]
+
+[dependencies]
+libc = "0.2"
+
+[build-dependencies]
+cc = "1.0"
+pkg-config = "0.3"
+bindgen = "0.69"
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..1f906bb
--- /dev/null
+++ b/README.md
@@ -0,0 +1,178 @@
+# marisa-rs
+
+Safe Rust wrapper for the [marisa-trie](https://github.com/s-yata/marisa-trie) C++ library.
+
+marisa-trie is a static and space-efficient trie data structure library. This crate provides safe Rust bindings to the C++ library.
+
+## Installation
+
+Add this to your `Cargo.toml`:
+
+```toml
+[dependencies]
+marisa-rs = "0.1"
+```
+
+## Requirements
+
+This crate requires the marisa-trie C++ library to be installed on your system.
+
+### Ubuntu/Debian
+```bash
+sudo apt-get install libmarisa-dev
+```
+
+### macOS
+```bash
+brew install marisa-trie
+```
+
+## Quick Start
+
+```rust
+use marisa_rs::{Keyset, Trie};
+
+fn main() {
+ // Create a keyset and add words
+ let mut keyset = Keyset::new();
+ keyset.push("apple");
+ keyset.push("application");
+ keyset.push("apply");
+
+ // Build the trie
+ let mut trie = Trie::new();
+ trie.build(&mut keyset).unwrap();
+
+ // Lookup a word
+ if let Some(id) = trie.lookup("apple") {
+ println!("Found 'apple' with ID: {}", id);
+ }
+
+ // Search for words starting with "app"
+ trie.predictive_search("app", |word, id| {
+ println!("Found: {} (ID: {})", word, id);
+ });
+}
+```
+
+## Basic Usage
+
+### Creating and Building a Trie
+
+```rust
+use marisa_rs::{Keyset, Trie};
+
+// Create a keyset
+let mut keyset = Keyset::new();
+
+// Add words to the keyset
+keyset.push("cat");
+keyset.push("car");
+keyset.push("card");
+keyset.push("care");
+
+// Build the trie
+let mut trie = Trie::new();
+trie.build(&mut keyset)?;
+```
+
+### Lookup Operations
+
+```rust
+// Exact lookup
+match trie.lookup("car") {
+ Some(id) => println!("Found with ID: {}", id),
+ None => println!("Not found"),
+}
+
+// Reverse lookup (get word by ID)
+match trie.reverse_lookup(0) {
+ Ok(word) => println!("ID 0 corresponds to: {}", word),
+ Err(_) => println!("Invalid ID"),
+}
+```
+
+### Search Operations
+
+```rust
+// Find all prefixes of a word
+trie.common_prefix_search("cards", |word, id| {
+ println!("Prefix: {} (ID: {})", word, id);
+ // Output: "car", "card"
+});
+
+// Find all words starting with a prefix
+trie.predictive_search("car", |word, id| {
+ println!("Word: {} (ID: {})", word, id);
+ // Output: "car", "card", "care"
+});
+```
+
+### Working with Weights
+
+```rust
+let mut keyset = Keyset::new();
+
+// Add words with custom weights
+keyset.push_back("important", 10.0);
+keyset.push_back("normal", 1.0);
+keyset.push_back("less_important", 0.1);
+
+let mut trie = Trie::new();
+trie.build(&mut keyset)?;
+```
+
+## API Reference
+
+### Keyset
+
+- `Keyset::new()` - Create a new empty keyset
+- `keyset.push(key)` - Add a key with default weight (1.0)
+- `keyset.push_back(key, weight)` - Add a key with specified weight
+- `keyset.size()` - Get the number of keys
+- `keyset.is_empty()` - Check if the keyset is empty
+
+### Trie
+
+- `Trie::new()` - Create a new empty trie
+- `trie.build(&mut keyset)` - Build the trie from a keyset
+- `trie.lookup(key)` - Find the ID of a key (returns `Option`)
+- `trie.reverse_lookup(id)` - Find the key for an ID (returns `Result`)
+- `trie.common_prefix_search(query, callback)` - Find all keys that are prefixes of query
+- `trie.predictive_search(query, callback)` - Find all keys that start with query
+- `trie.size()` - Get the number of keys in the trie
+- `trie.is_empty()` - Check if the trie is empty
+
+## Japanese Text Example
+
+```rust
+use marisa_rs::{Keyset, Trie};
+
+let mut keyset = Keyset::new();
+keyset.push("あ"); // a
+keyset.push("あい"); // ai (love)
+keyset.push("あいて"); // aite (partner)
+
+let mut trie = Trie::new();
+trie.build(&mut keyset).unwrap();
+
+// Works with UTF-8 strings
+if let Some(id) = trie.lookup("あい") {
+ println!("Found Japanese word with ID: {}", id);
+}
+```
+
+## Thread Safety
+
+All types (`Keyset`, `Trie`, `Agent`) implement `Send` and can be transferred between threads. However, they are not `Sync` and cannot be shared between threads without additional synchronization.
+
+## License
+
+This project is licensed under either of
+
+ * Apache License, Version 2.0
+ * MIT license
+
+at your option.
+
+This crate is built on top of the excellent [marisa-trie](https://github.com/s-yata/marisa-trie) library by Susumu Yata.
\ No newline at end of file
diff --git a/build.rs b/build.rs
new file mode 100644
index 0000000..e5e226a
--- /dev/null
+++ b/build.rs
@@ -0,0 +1,54 @@
+use std::env;
+use std::path::PathBuf;
+
+fn main() {
+ println!("cargo:rerun-if-changed=wrapper.cpp");
+ println!("cargo:rerun-if-changed=wrapper.h");
+
+ let out_path = PathBuf::from(env::var("OUT_DIR").unwrap());
+
+ if pkg_config::Config::new()
+ .atleast_version("0.2.0")
+ .probe("marisa")
+ .is_ok()
+ {
+ println!("cargo:rustc-link-lib=static=marisa");
+ } else {
+ println!("cargo:rustc-link-lib=static=marisa");
+ println!("cargo:rustc-link-search=native=/usr/local/lib");
+ println!("cargo:rustc-link-search=native=/usr/lib");
+ }
+
+ let mut build = cc::Build::new();
+ build
+ .cpp(true)
+ .file("wrapper.cpp")
+ .flag_if_supported("-std=c++17");
+
+ if let Ok(cpath) = env::var("CPATH") {
+ for path in cpath.split(':') {
+ if !path.is_empty() {
+ build.include(path);
+ }
+ }
+ }
+
+ build
+ .include("/usr/local/include")
+ .include("/usr/include");
+
+ build.compile("marisa_wrapper");
+
+ let bindings = bindgen::Builder::default()
+ .header("wrapper.h")
+ .parse_callbacks(Box::new(bindgen::CargoCallbacks::new()))
+ .rust_target(bindgen::RustTarget::Stable_1_47)
+ .generate_inline_functions(false)
+ .generate_comments(false)
+ .generate()
+ .expect("Unable to generate bindings");
+
+ bindings
+ .write_to_file(out_path.join("bindings.rs"))
+ .expect("Couldn't write bindings!");
+}
\ No newline at end of file
diff --git a/flake.lock b/flake.lock
new file mode 100644
index 0000000..fd3bdc5
--- /dev/null
+++ b/flake.lock
@@ -0,0 +1,96 @@
+{
+ "nodes": {
+ "flake-utils": {
+ "inputs": {
+ "systems": "systems"
+ },
+ "locked": {
+ "lastModified": 1731533236,
+ "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
+ "owner": "numtide",
+ "repo": "flake-utils",
+ "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
+ "type": "github"
+ },
+ "original": {
+ "owner": "numtide",
+ "repo": "flake-utils",
+ "type": "github"
+ }
+ },
+ "nixpkgs": {
+ "locked": {
+ "lastModified": 1752950548,
+ "narHash": "sha256-NS6BLD0lxOrnCiEOcvQCDVPXafX1/ek1dfJHX1nUIzc=",
+ "owner": "NixOS",
+ "repo": "nixpkgs",
+ "rev": "c87b95e25065c028d31a94f06a62927d18763fdf",
+ "type": "github"
+ },
+ "original": {
+ "owner": "NixOS",
+ "ref": "nixos-unstable",
+ "repo": "nixpkgs",
+ "type": "github"
+ }
+ },
+ "nixpkgs_2": {
+ "locked": {
+ "lastModified": 1744536153,
+ "narHash": "sha256-awS2zRgF4uTwrOKwwiJcByDzDOdo3Q1rPZbiHQg/N38=",
+ "owner": "NixOS",
+ "repo": "nixpkgs",
+ "rev": "18dd725c29603f582cf1900e0d25f9f1063dbf11",
+ "type": "github"
+ },
+ "original": {
+ "owner": "NixOS",
+ "ref": "nixpkgs-unstable",
+ "repo": "nixpkgs",
+ "type": "github"
+ }
+ },
+ "root": {
+ "inputs": {
+ "flake-utils": "flake-utils",
+ "nixpkgs": "nixpkgs",
+ "rust-overlay": "rust-overlay"
+ }
+ },
+ "rust-overlay": {
+ "inputs": {
+ "nixpkgs": "nixpkgs_2"
+ },
+ "locked": {
+ "lastModified": 1753238793,
+ "narHash": "sha256-jmQeEpgX+++MEgrcikcwoSiI7vDZWLP0gci7XiWb9uQ=",
+ "owner": "oxalica",
+ "repo": "rust-overlay",
+ "rev": "0ad7ab4ca8e83febf147197e65c006dff60623ab",
+ "type": "github"
+ },
+ "original": {
+ "owner": "oxalica",
+ "repo": "rust-overlay",
+ "type": "github"
+ }
+ },
+ "systems": {
+ "locked": {
+ "lastModified": 1681028828,
+ "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+ "owner": "nix-systems",
+ "repo": "default",
+ "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+ "type": "github"
+ },
+ "original": {
+ "owner": "nix-systems",
+ "repo": "default",
+ "type": "github"
+ }
+ }
+ },
+ "root": "root",
+ "version": 7
+}
diff --git a/flake.nix b/flake.nix
new file mode 100644
index 0000000..1562d9c
--- /dev/null
+++ b/flake.nix
@@ -0,0 +1,85 @@
+{
+ description = "Rust wrapper for marisa-trie C++ library";
+
+ inputs = {
+ nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
+ rust-overlay.url = "github:oxalica/rust-overlay";
+ flake-utils.url = "github:numtide/flake-utils";
+ };
+
+ outputs = { self, nixpkgs, rust-overlay, flake-utils }:
+ flake-utils.lib.eachDefaultSystem (system:
+ let
+ overlays = [ (import rust-overlay) ];
+ pkgs = import nixpkgs {
+ inherit system overlays;
+ };
+
+ rustToolchain = pkgs.rust-bin.stable.latest.default.override {
+ extensions = [ "rust-src" "clippy" "rustfmt" ];
+ };
+
+ # Use nixpkgs marisa-trie package
+ marisa-trie = pkgs.marisa;
+
+ in
+ {
+ devShells.default = pkgs.mkShell {
+ buildInputs = with pkgs; [
+ rustToolchain
+ marisa-trie
+
+ # Build tools
+ cmake
+ gcc
+ pkg-config
+ clang
+ llvmPackages.libclang.lib
+
+ # Development tools
+ rust-analyzer
+ clippy
+ rustfmt
+
+ # C++ development
+ gdb
+ valgrind
+ ];
+
+ shellHook = ''
+ echo "Rust marisa-trie development environment"
+ echo "Rust version: $(rustc --version)"
+ echo "Cargo version: $(cargo --version)"
+ echo "marisa-trie library available at: ${marisa-trie}"
+
+ export PKG_CONFIG_PATH="${marisa-trie}/lib/pkgconfig:$PKG_CONFIG_PATH"
+ export LD_LIBRARY_PATH="${marisa-trie}/lib:$LD_LIBRARY_PATH"
+ export LIBRARY_PATH="${marisa-trie}/lib:$LIBRARY_PATH"
+ export CPATH="${marisa-trie}/include:$CPATH"
+ export LIBCLANG_PATH="${pkgs.llvmPackages.libclang.lib}/lib"
+ '';
+
+ RUST_SRC_PATH = "${rustToolchain}/lib/rustlib/src/rust/library";
+ };
+
+ packages.default = pkgs.rustPlatform.buildRustPackage {
+ pname = "marisa-rs";
+ version = "0.1.0";
+
+ src = ./.;
+
+ cargoLock = {
+ lockFile = ./Cargo.lock;
+ };
+
+ buildInputs = [ marisa-trie ];
+ nativeBuildInputs = with pkgs; [ cmake gcc pkg-config ];
+
+ meta = with pkgs.lib; {
+ description = "Rust wrapper for marisa-trie C++ library";
+ license = with licenses; [ mit asl20 ];
+ platforms = platforms.unix;
+ };
+ };
+ });
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..4b1a656
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,333 @@
+//! # marisa-rs
+//!
+//! Safe Rust wrapper for the marisa-trie C++ library.
+//!
+//! marisa-trie is a static and space-efficient trie data structure library.
+//! This crate provides safe Rust bindings to the C++ library.
+//!
+//! ## Example
+//!
+//! ```rust
+//! use marisa_rs::{Keyset, Trie};
+//!
+//! let mut keyset = Keyset::new();
+//! keyset.push("apple");
+//! keyset.push("application");
+//! keyset.push("apply");
+//!
+//! let mut trie = Trie::new();
+//! trie.build(&mut keyset).unwrap();
+//!
+//! // Lookup
+//! assert!(trie.lookup("apple").is_some());
+//! assert!(trie.lookup("orange").is_none());
+//!
+//! // Common prefix search
+//! trie.common_prefix_search("application", |key, id| {
+//! println!("Found: {} (ID: {})", key, id);
+//! });
+//! ```
+
+use std::slice;
+
+mod bindings {
+ include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
+}
+
+use bindings::*;
+
+/// A keyset for building a trie.
+///
+/// Keyset is used to store a collection of keys before building a trie.
+/// Keys can be added with different weights.
+pub struct Keyset {
+ inner: *mut MarisaKeyset,
+}
+
+impl Keyset {
+ /// Creates a new empty keyset.
+ pub fn new() -> Self {
+ unsafe {
+ let inner = marisa_keyset_new();
+ Keyset { inner }
+ }
+ }
+
+ /// Adds a key with the specified weight to the keyset.
+ pub fn push_back(&mut self, key: &str, weight: f32) {
+ let key_bytes = key.as_bytes();
+ unsafe {
+ marisa_keyset_push_back(
+ self.inner,
+ key_bytes.as_ptr() as *const i8,
+ key_bytes.len(),
+ weight,
+ );
+ }
+ }
+
+ /// Adds a key with default weight (1.0) to the keyset.
+ pub fn push(&mut self, key: &str) {
+ self.push_back(key, 1.0);
+ }
+
+ /// Returns the number of keys in the keyset.
+ pub fn size(&self) -> usize {
+ unsafe { marisa_keyset_size(self.inner) }
+ }
+
+ /// Returns true if the keyset is empty.
+ pub fn is_empty(&self) -> bool {
+ self.size() == 0
+ }
+}
+
+impl Drop for Keyset {
+ fn drop(&mut self) {
+ unsafe {
+ marisa_keyset_delete(self.inner);
+ }
+ }
+}
+
+/// A trie data structure for efficient string lookups.
+///
+/// The trie must be built from a keyset before it can be used for lookups.
+pub struct Trie {
+ inner: *mut MarisaTrie,
+}
+
+impl Trie {
+ /// Creates a new empty trie.
+ pub fn new() -> Self {
+ unsafe {
+ let inner = marisa_trie_new();
+ Trie { inner }
+ }
+ }
+
+ /// Builds the trie from the given keyset.
+ ///
+ /// # Errors
+ ///
+ /// Returns an error if the trie cannot be built from the keyset.
+ pub fn build(&mut self, keyset: &mut Keyset) -> Result<(), &'static str> {
+ unsafe {
+ if marisa_trie_build(self.inner, keyset.inner) == 1 {
+ Ok(())
+ } else {
+ Err("Failed to build trie")
+ }
+ }
+ }
+
+ /// Looks up a key in the trie and returns its ID if found.
+ ///
+ /// # Returns
+ ///
+ /// - `Some(id)` if the key is found in the trie
+ /// - `None` if the key is not found
+ pub fn lookup(&self, key: &str) -> Option {
+ let mut agent = Agent::new();
+ agent.set_query(key);
+
+ unsafe {
+ if marisa_trie_lookup(self.inner, agent.inner) == 1 {
+ Some(agent.key_id())
+ } else {
+ None
+ }
+ }
+ }
+
+ /// Performs reverse lookup to get the key corresponding to the given ID.
+ ///
+ /// # Errors
+ ///
+ /// Returns an error if the ID is not valid.
+ pub fn reverse_lookup(&self, id: usize) -> Result {
+ let mut agent = Agent::new();
+ agent.set_query_by_id(id);
+
+ unsafe {
+ if marisa_trie_reverse_lookup(self.inner, agent.inner) == 1 {
+ Ok(agent.key_string())
+ } else {
+ Err("Failed to reverse lookup")
+ }
+ }
+ }
+
+ /// Searches for all keys that are prefixes of the given query.
+ ///
+ /// The callback function is called for each matching key with the key and its ID.
+ pub fn common_prefix_search(&self, query: &str, mut callback: F)
+ where
+ F: FnMut(&str, usize),
+ {
+ let mut agent = Agent::new();
+ agent.set_query(query);
+
+ unsafe {
+ while marisa_trie_common_prefix_search(self.inner, agent.inner) == 1 {
+ let key = agent.key_string();
+ let id = agent.key_id();
+ callback(&key, id);
+ }
+ }
+ }
+
+ /// Searches for all keys that have the given query as a prefix.
+ ///
+ /// The callback function is called for each matching key with the key and its ID.
+ pub fn predictive_search(&self, query: &str, mut callback: F)
+ where
+ F: FnMut(&str, usize),
+ {
+ let mut agent = Agent::new();
+ agent.set_query(query);
+
+ unsafe {
+ while marisa_trie_predictive_search(self.inner, agent.inner) == 1 {
+ let key = agent.key_string();
+ let id = agent.key_id();
+ callback(&key, id);
+ }
+ }
+ }
+
+ /// Returns the number of keys stored in the trie.
+ pub fn size(&self) -> usize {
+ unsafe { marisa_trie_size(self.inner) }
+ }
+
+ /// Returns true if the trie is empty.
+ pub fn is_empty(&self) -> bool {
+ self.size() == 0
+ }
+}
+
+impl Drop for Trie {
+ fn drop(&mut self) {
+ unsafe {
+ marisa_trie_delete(self.inner);
+ }
+ }
+}
+
+/// An agent for performing trie operations.
+///
+/// Agent is used internally for trie operations and should not be used directly
+/// in most cases.
+pub struct Agent {
+ inner: *mut MarisaAgent,
+}
+
+impl Agent {
+ pub fn new() -> Self {
+ unsafe {
+ let inner = marisa_agent_new();
+ Agent { inner }
+ }
+ }
+
+ pub fn set_query(&mut self, query: &str) {
+ let query_bytes = query.as_bytes();
+ unsafe {
+ marisa_agent_set_query(
+ self.inner,
+ query_bytes.as_ptr() as *const i8,
+ query_bytes.len(),
+ );
+ }
+ }
+
+ pub fn set_query_by_id(&mut self, id: usize) {
+ unsafe {
+ marisa_agent_set_query_by_id(self.inner, id);
+ }
+ }
+
+ pub fn key_string(&self) -> String {
+ unsafe {
+ let ptr = marisa_agent_key_ptr(self.inner);
+ let len = marisa_agent_key_length(self.inner);
+ let slice = slice::from_raw_parts(ptr as *const u8, len);
+ String::from_utf8_lossy(slice).into_owned()
+ }
+ }
+
+ pub fn key_id(&self) -> usize {
+ unsafe { marisa_agent_key_id(self.inner) }
+ }
+}
+
+impl Drop for Agent {
+ fn drop(&mut self) {
+ unsafe {
+ marisa_agent_delete(self.inner);
+ }
+ }
+}
+
+unsafe impl Send for Keyset {}
+unsafe impl Send for Trie {}
+unsafe impl Send for Agent {}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_basic_operations() {
+ let mut keyset = Keyset::new();
+ keyset.push("apple");
+ keyset.push("application");
+ keyset.push("apply");
+ keyset.push("apricot");
+
+ assert_eq!(keyset.size(), 4);
+
+ let mut trie = Trie::new();
+ trie.build(&mut keyset).expect("Failed to build trie");
+
+ assert_eq!(trie.size(), 4);
+
+ // Test lookup
+ assert!(trie.lookup("apple").is_some());
+ assert!(trie.lookup("banana").is_none());
+
+ // Test reverse lookup
+ if let Some(id) = trie.lookup("apple") {
+ assert_eq!(trie.reverse_lookup(id).unwrap(), "apple");
+ }
+
+ // Test common prefix search
+ let mut results = Vec::new();
+ trie.common_prefix_search("application", |key, id| {
+ results.push((key.to_string(), id));
+ });
+ assert!(results.len() > 0);
+
+ // Test predictive search
+ let mut results = Vec::new();
+ trie.predictive_search("app", |key, id| {
+ results.push((key.to_string(), id));
+ });
+ assert!(results.len() > 0);
+ }
+
+ #[test]
+ fn test_empty_keyset() {
+ let keyset = Keyset::new();
+ assert!(keyset.is_empty());
+ assert_eq!(keyset.size(), 0);
+ }
+
+ #[test]
+ fn test_empty_trie() {
+ let trie = Trie::new();
+ assert!(trie.is_empty());
+ assert_eq!(trie.size(), 0);
+ }
+}
\ No newline at end of file
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..4bffc74
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,50 @@
+use marisa_rs::{Keyset, Trie};
+
+fn main() {
+ println!("Marisa Trie Demo");
+ println!("================");
+
+ let mut keyset = Keyset::new();
+ let words = vec!["apple", "application", "apply", "apricot"];
+
+ println!("Adding words to keyset:");
+ for word in &words {
+ keyset.push(word);
+ println!(" - {}", word);
+ }
+
+ let mut trie = Trie::new();
+ match trie.build(&mut keyset) {
+ Ok(()) => println!("\nTrie built successfully! Size: {}", trie.size()),
+ Err(e) => {
+ println!("Failed to build trie: {}", e);
+ return;
+ }
+ }
+
+ println!("\n--- Lookup Test ---");
+ for word in &words {
+ match trie.lookup(word) {
+ Some(id) => println!("'{}' found with ID: {}", word, id),
+ None => println!("'{}' not found", word),
+ }
+ }
+
+ println!("\n--- Reverse Lookup Test ---");
+ for i in 0..trie.size() {
+ match trie.reverse_lookup(i) {
+ Ok(word) => println!("ID {} -> '{}'", i, word),
+ Err(e) => println!("ID {}: {}", i, e),
+ }
+ }
+
+ println!("\n--- Common Prefix Search for 'application' ---");
+ trie.common_prefix_search("application", |key, id| {
+ println!(" Found: '{}' (ID: {})", key, id);
+ });
+
+ println!("\n--- Predictive Search for 'app' ---");
+ trie.predictive_search("app", |key, id| {
+ println!(" Found: '{}' (ID: {})", key, id);
+ });
+}
diff --git a/wrapper.cpp b/wrapper.cpp
new file mode 100644
index 0000000..7fea9d2
--- /dev/null
+++ b/wrapper.cpp
@@ -0,0 +1,123 @@
+#include "wrapper.h"
+#include
+#include
+
+extern "C" {
+
+MarisaKeyset* marisa_keyset_new() {
+ return reinterpret_cast(new marisa::Keyset());
+}
+
+void marisa_keyset_delete(MarisaKeyset* keyset) {
+ delete reinterpret_cast(keyset);
+}
+
+void marisa_keyset_push_back(MarisaKeyset* keyset, const char* key, size_t length, float weight) {
+ marisa::Keyset* ks = reinterpret_cast(keyset);
+ ks->push_back(std::string(key, length), weight);
+}
+
+size_t marisa_keyset_size(const MarisaKeyset* keyset) {
+ const marisa::Keyset* ks = reinterpret_cast(keyset);
+ return ks->size();
+}
+
+MarisaTrie* marisa_trie_new() {
+ return reinterpret_cast(new marisa::Trie());
+}
+
+void marisa_trie_delete(MarisaTrie* trie) {
+ delete reinterpret_cast(trie);
+}
+
+int marisa_trie_build(MarisaTrie* trie, MarisaKeyset* keyset) {
+ try {
+ marisa::Trie* tr = reinterpret_cast(trie);
+ marisa::Keyset* ks = reinterpret_cast(keyset);
+ tr->build(*ks);
+ return 1;
+ } catch (...) {
+ return 0;
+ }
+}
+
+int marisa_trie_lookup(const MarisaTrie* trie, MarisaAgent* agent) {
+ try {
+ const marisa::Trie* tr = reinterpret_cast(trie);
+ marisa::Agent* ag = reinterpret_cast(agent);
+ return tr->lookup(*ag) ? 1 : 0;
+ } catch (...) {
+ return 0;
+ }
+}
+
+int marisa_trie_reverse_lookup(const MarisaTrie* trie, MarisaAgent* agent) {
+ try {
+ const marisa::Trie* tr = reinterpret_cast(trie);
+ marisa::Agent* ag = reinterpret_cast(agent);
+ tr->reverse_lookup(*ag);
+ return 1;
+ } catch (...) {
+ return 0;
+ }
+}
+
+int marisa_trie_common_prefix_search(const MarisaTrie* trie, MarisaAgent* agent) {
+ try {
+ const marisa::Trie* tr = reinterpret_cast(trie);
+ marisa::Agent* ag = reinterpret_cast(agent);
+ return tr->common_prefix_search(*ag) ? 1 : 0;
+ } catch (...) {
+ return 0;
+ }
+}
+
+int marisa_trie_predictive_search(const MarisaTrie* trie, MarisaAgent* agent) {
+ try {
+ const marisa::Trie* tr = reinterpret_cast(trie);
+ marisa::Agent* ag = reinterpret_cast(agent);
+ return tr->predictive_search(*ag) ? 1 : 0;
+ } catch (...) {
+ return 0;
+ }
+}
+
+size_t marisa_trie_size(const MarisaTrie* trie) {
+ const marisa::Trie* tr = reinterpret_cast(trie);
+ return tr->size();
+}
+
+MarisaAgent* marisa_agent_new() {
+ return reinterpret_cast(new marisa::Agent());
+}
+
+void marisa_agent_delete(MarisaAgent* agent) {
+ delete reinterpret_cast(agent);
+}
+
+void marisa_agent_set_query(MarisaAgent* agent, const char* query, size_t length) {
+ marisa::Agent* ag = reinterpret_cast(agent);
+ ag->set_query(query, length);
+}
+
+void marisa_agent_set_query_by_id(MarisaAgent* agent, size_t id) {
+ marisa::Agent* ag = reinterpret_cast(agent);
+ ag->set_query(id);
+}
+
+const char* marisa_agent_key_ptr(const MarisaAgent* agent) {
+ const marisa::Agent* ag = reinterpret_cast(agent);
+ return ag->key().str().data();
+}
+
+size_t marisa_agent_key_length(const MarisaAgent* agent) {
+ const marisa::Agent* ag = reinterpret_cast(agent);
+ return ag->key().str().length();
+}
+
+size_t marisa_agent_key_id(const MarisaAgent* agent) {
+ const marisa::Agent* ag = reinterpret_cast(agent);
+ return ag->key().id();
+}
+
+}
\ No newline at end of file
diff --git a/wrapper.h b/wrapper.h
new file mode 100644
index 0000000..9289af5
--- /dev/null
+++ b/wrapper.h
@@ -0,0 +1,41 @@
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+#include
+
+typedef struct MarisaTrie MarisaTrie;
+typedef struct MarisaKeyset MarisaKeyset;
+typedef struct MarisaAgent MarisaAgent;
+
+// Keyset functions
+MarisaKeyset* marisa_keyset_new();
+void marisa_keyset_delete(MarisaKeyset* keyset);
+void marisa_keyset_push_back(MarisaKeyset* keyset, const char* key, size_t length, float weight);
+size_t marisa_keyset_size(const MarisaKeyset* keyset);
+
+// Trie functions
+MarisaTrie* marisa_trie_new();
+void marisa_trie_delete(MarisaTrie* trie);
+int marisa_trie_build(MarisaTrie* trie, MarisaKeyset* keyset);
+int marisa_trie_lookup(const MarisaTrie* trie, MarisaAgent* agent);
+int marisa_trie_reverse_lookup(const MarisaTrie* trie, MarisaAgent* agent);
+int marisa_trie_common_prefix_search(const MarisaTrie* trie, MarisaAgent* agent);
+int marisa_trie_predictive_search(const MarisaTrie* trie, MarisaAgent* agent);
+size_t marisa_trie_size(const MarisaTrie* trie);
+
+// Agent functions
+MarisaAgent* marisa_agent_new();
+void marisa_agent_delete(MarisaAgent* agent);
+void marisa_agent_set_query(MarisaAgent* agent, const char* query, size_t length);
+void marisa_agent_set_query_by_id(MarisaAgent* agent, size_t id);
+const char* marisa_agent_key_ptr(const MarisaAgent* agent);
+size_t marisa_agent_key_length(const MarisaAgent* agent);
+size_t marisa_agent_key_id(const MarisaAgent* agent);
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file