initial
This commit is contained in:
commit
c8b3c3c44f
11 changed files with 1441 additions and 0 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
/target/
|
||||||
|
Cargo.lock
|
||||||
457
Cargo.lock
generated
Normal file
457
Cargo.lock
generated
Normal file
|
|
@ -0,0 +1,457 @@
|
||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
version = 4
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aho-corasick"
|
||||||
|
version = "1.1.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bindgen"
|
||||||
|
version = "0.69.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"cexpr",
|
||||||
|
"clang-sys",
|
||||||
|
"itertools",
|
||||||
|
"lazy_static",
|
||||||
|
"lazycell",
|
||||||
|
"log",
|
||||||
|
"prettyplease",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"regex",
|
||||||
|
"rustc-hash",
|
||||||
|
"shlex",
|
||||||
|
"syn",
|
||||||
|
"which",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bitflags"
|
||||||
|
version = "2.9.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cc"
|
||||||
|
version = "1.2.30"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7"
|
||||||
|
dependencies = [
|
||||||
|
"shlex",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cexpr"
|
||||||
|
version = "0.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
|
||||||
|
dependencies = [
|
||||||
|
"nom",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cfg-if"
|
||||||
|
version = "1.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clang-sys"
|
||||||
|
version = "1.8.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
|
||||||
|
dependencies = [
|
||||||
|
"glob",
|
||||||
|
"libc",
|
||||||
|
"libloading",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "either"
|
||||||
|
version = "1.15.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "errno"
|
||||||
|
version = "0.3.13"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"windows-sys 0.60.2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "glob"
|
||||||
|
version = "0.3.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "home"
|
||||||
|
version = "0.5.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf"
|
||||||
|
dependencies = [
|
||||||
|
"windows-sys 0.59.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "itertools"
|
||||||
|
version = "0.12.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
|
||||||
|
dependencies = [
|
||||||
|
"either",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lazy_static"
|
||||||
|
version = "1.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lazycell"
|
||||||
|
version = "1.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libc"
|
||||||
|
version = "0.2.174"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libloading"
|
||||||
|
version = "0.8.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"windows-targets 0.53.2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "linux-raw-sys"
|
||||||
|
version = "0.4.15"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "log"
|
||||||
|
version = "0.4.27"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "marisa-rs"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"bindgen",
|
||||||
|
"cc",
|
||||||
|
"libc",
|
||||||
|
"pkg-config",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "memchr"
|
||||||
|
version = "2.7.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "minimal-lexical"
|
||||||
|
version = "0.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "nom"
|
||||||
|
version = "7.1.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
"minimal-lexical",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "once_cell"
|
||||||
|
version = "1.21.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pkg-config"
|
||||||
|
version = "0.3.32"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "prettyplease"
|
||||||
|
version = "0.2.35"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "061c1221631e079b26479d25bbf2275bfe5917ae8419cd7e34f13bfc2aa7539a"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro2"
|
||||||
|
version = "1.0.95"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quote"
|
||||||
|
version = "1.0.40"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex"
|
||||||
|
version = "1.11.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-automata",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-automata"
|
||||||
|
version = "0.4.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-syntax"
|
||||||
|
version = "0.8.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustc-hash"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustix"
|
||||||
|
version = "0.38.44"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"errno",
|
||||||
|
"libc",
|
||||||
|
"linux-raw-sys",
|
||||||
|
"windows-sys 0.59.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "shlex"
|
||||||
|
version = "1.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "syn"
|
||||||
|
version = "2.0.104"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-ident"
|
||||||
|
version = "1.0.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "which"
|
||||||
|
version = "4.4.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
|
||||||
|
dependencies = [
|
||||||
|
"either",
|
||||||
|
"home",
|
||||||
|
"once_cell",
|
||||||
|
"rustix",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-sys"
|
||||||
|
version = "0.59.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
|
||||||
|
dependencies = [
|
||||||
|
"windows-targets 0.52.6",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-sys"
|
||||||
|
version = "0.60.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
|
||||||
|
dependencies = [
|
||||||
|
"windows-targets 0.53.2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-targets"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
|
||||||
|
dependencies = [
|
||||||
|
"windows_aarch64_gnullvm 0.52.6",
|
||||||
|
"windows_aarch64_msvc 0.52.6",
|
||||||
|
"windows_i686_gnu 0.52.6",
|
||||||
|
"windows_i686_gnullvm 0.52.6",
|
||||||
|
"windows_i686_msvc 0.52.6",
|
||||||
|
"windows_x86_64_gnu 0.52.6",
|
||||||
|
"windows_x86_64_gnullvm 0.52.6",
|
||||||
|
"windows_x86_64_msvc 0.52.6",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-targets"
|
||||||
|
version = "0.53.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef"
|
||||||
|
dependencies = [
|
||||||
|
"windows_aarch64_gnullvm 0.53.0",
|
||||||
|
"windows_aarch64_msvc 0.53.0",
|
||||||
|
"windows_i686_gnu 0.53.0",
|
||||||
|
"windows_i686_gnullvm 0.53.0",
|
||||||
|
"windows_i686_msvc 0.53.0",
|
||||||
|
"windows_x86_64_gnu 0.53.0",
|
||||||
|
"windows_x86_64_gnullvm 0.53.0",
|
||||||
|
"windows_x86_64_msvc 0.53.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_aarch64_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_aarch64_gnullvm"
|
||||||
|
version = "0.53.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_aarch64_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_aarch64_msvc"
|
||||||
|
version = "0.53.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_gnu"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_gnu"
|
||||||
|
version = "0.53.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_gnullvm"
|
||||||
|
version = "0.53.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_msvc"
|
||||||
|
version = "0.53.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_gnu"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_gnu"
|
||||||
|
version = "0.53.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_gnullvm"
|
||||||
|
version = "0.53.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_msvc"
|
||||||
|
version = "0.53.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
|
||||||
22
Cargo.toml
Normal file
22
Cargo.toml
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
[package]
|
||||||
|
name = "marisa-rs"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
description = "Safe Rust wrapper for the marisa-trie C++ library - a static memory-efficient trie data structure. Requires marisa-trie system library."
|
||||||
|
license = "MIT OR Apache-2.0"
|
||||||
|
readme = "README.md"
|
||||||
|
homepage = "https://crates.io/crates/marisa-rs"
|
||||||
|
keywords = ["trie", "string", "search", "marisa"]
|
||||||
|
categories = ["data-structures", "text-processing"]
|
||||||
|
|
||||||
|
[lib]
|
||||||
|
name = "marisa_rs"
|
||||||
|
crate-type = ["cdylib", "rlib"]
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
libc = "0.2"
|
||||||
|
|
||||||
|
[build-dependencies]
|
||||||
|
cc = "1.0"
|
||||||
|
pkg-config = "0.3"
|
||||||
|
bindgen = "0.69"
|
||||||
178
README.md
Normal file
178
README.md
Normal file
|
|
@ -0,0 +1,178 @@
|
||||||
|
# marisa-rs
|
||||||
|
|
||||||
|
Safe Rust wrapper for the [marisa-trie](https://github.com/s-yata/marisa-trie) C++ library.
|
||||||
|
|
||||||
|
marisa-trie is a static and space-efficient trie data structure library. This crate provides safe Rust bindings to the C++ library.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
Add this to your `Cargo.toml`:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[dependencies]
|
||||||
|
marisa-rs = "0.1"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
This crate requires the marisa-trie C++ library to be installed on your system.
|
||||||
|
|
||||||
|
### Ubuntu/Debian
|
||||||
|
```bash
|
||||||
|
sudo apt-get install libmarisa-dev
|
||||||
|
```
|
||||||
|
|
||||||
|
### macOS
|
||||||
|
```bash
|
||||||
|
brew install marisa-trie
|
||||||
|
```
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use marisa_rs::{Keyset, Trie};
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
// Create a keyset and add words
|
||||||
|
let mut keyset = Keyset::new();
|
||||||
|
keyset.push("apple");
|
||||||
|
keyset.push("application");
|
||||||
|
keyset.push("apply");
|
||||||
|
|
||||||
|
// Build the trie
|
||||||
|
let mut trie = Trie::new();
|
||||||
|
trie.build(&mut keyset).unwrap();
|
||||||
|
|
||||||
|
// Lookup a word
|
||||||
|
if let Some(id) = trie.lookup("apple") {
|
||||||
|
println!("Found 'apple' with ID: {}", id);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Search for words starting with "app"
|
||||||
|
trie.predictive_search("app", |word, id| {
|
||||||
|
println!("Found: {} (ID: {})", word, id);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Basic Usage
|
||||||
|
|
||||||
|
### Creating and Building a Trie
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use marisa_rs::{Keyset, Trie};
|
||||||
|
|
||||||
|
// Create a keyset
|
||||||
|
let mut keyset = Keyset::new();
|
||||||
|
|
||||||
|
// Add words to the keyset
|
||||||
|
keyset.push("cat");
|
||||||
|
keyset.push("car");
|
||||||
|
keyset.push("card");
|
||||||
|
keyset.push("care");
|
||||||
|
|
||||||
|
// Build the trie
|
||||||
|
let mut trie = Trie::new();
|
||||||
|
trie.build(&mut keyset)?;
|
||||||
|
```
|
||||||
|
|
||||||
|
### Lookup Operations
|
||||||
|
|
||||||
|
```rust
|
||||||
|
// Exact lookup
|
||||||
|
match trie.lookup("car") {
|
||||||
|
Some(id) => println!("Found with ID: {}", id),
|
||||||
|
None => println!("Not found"),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reverse lookup (get word by ID)
|
||||||
|
match trie.reverse_lookup(0) {
|
||||||
|
Ok(word) => println!("ID 0 corresponds to: {}", word),
|
||||||
|
Err(_) => println!("Invalid ID"),
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Search Operations
|
||||||
|
|
||||||
|
```rust
|
||||||
|
// Find all prefixes of a word
|
||||||
|
trie.common_prefix_search("cards", |word, id| {
|
||||||
|
println!("Prefix: {} (ID: {})", word, id);
|
||||||
|
// Output: "car", "card"
|
||||||
|
});
|
||||||
|
|
||||||
|
// Find all words starting with a prefix
|
||||||
|
trie.predictive_search("car", |word, id| {
|
||||||
|
println!("Word: {} (ID: {})", word, id);
|
||||||
|
// Output: "car", "card", "care"
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Working with Weights
|
||||||
|
|
||||||
|
```rust
|
||||||
|
let mut keyset = Keyset::new();
|
||||||
|
|
||||||
|
// Add words with custom weights
|
||||||
|
keyset.push_back("important", 10.0);
|
||||||
|
keyset.push_back("normal", 1.0);
|
||||||
|
keyset.push_back("less_important", 0.1);
|
||||||
|
|
||||||
|
let mut trie = Trie::new();
|
||||||
|
trie.build(&mut keyset)?;
|
||||||
|
```
|
||||||
|
|
||||||
|
## API Reference
|
||||||
|
|
||||||
|
### Keyset
|
||||||
|
|
||||||
|
- `Keyset::new()` - Create a new empty keyset
|
||||||
|
- `keyset.push(key)` - Add a key with default weight (1.0)
|
||||||
|
- `keyset.push_back(key, weight)` - Add a key with specified weight
|
||||||
|
- `keyset.size()` - Get the number of keys
|
||||||
|
- `keyset.is_empty()` - Check if the keyset is empty
|
||||||
|
|
||||||
|
### Trie
|
||||||
|
|
||||||
|
- `Trie::new()` - Create a new empty trie
|
||||||
|
- `trie.build(&mut keyset)` - Build the trie from a keyset
|
||||||
|
- `trie.lookup(key)` - Find the ID of a key (returns `Option<usize>`)
|
||||||
|
- `trie.reverse_lookup(id)` - Find the key for an ID (returns `Result<String, &str>`)
|
||||||
|
- `trie.common_prefix_search(query, callback)` - Find all keys that are prefixes of query
|
||||||
|
- `trie.predictive_search(query, callback)` - Find all keys that start with query
|
||||||
|
- `trie.size()` - Get the number of keys in the trie
|
||||||
|
- `trie.is_empty()` - Check if the trie is empty
|
||||||
|
|
||||||
|
## Japanese Text Example
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use marisa_rs::{Keyset, Trie};
|
||||||
|
|
||||||
|
let mut keyset = Keyset::new();
|
||||||
|
keyset.push("あ"); // a
|
||||||
|
keyset.push("あい"); // ai (love)
|
||||||
|
keyset.push("あいて"); // aite (partner)
|
||||||
|
|
||||||
|
let mut trie = Trie::new();
|
||||||
|
trie.build(&mut keyset).unwrap();
|
||||||
|
|
||||||
|
// Works with UTF-8 strings
|
||||||
|
if let Some(id) = trie.lookup("あい") {
|
||||||
|
println!("Found Japanese word with ID: {}", id);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Thread Safety
|
||||||
|
|
||||||
|
All types (`Keyset`, `Trie`, `Agent`) implement `Send` and can be transferred between threads. However, they are not `Sync` and cannot be shared between threads without additional synchronization.
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
This project is licensed under either of
|
||||||
|
|
||||||
|
* Apache License, Version 2.0
|
||||||
|
* MIT license
|
||||||
|
|
||||||
|
at your option.
|
||||||
|
|
||||||
|
This crate is built on top of the excellent [marisa-trie](https://github.com/s-yata/marisa-trie) library by Susumu Yata.
|
||||||
54
build.rs
Normal file
54
build.rs
Normal file
|
|
@ -0,0 +1,54 @@
|
||||||
|
use std::env;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
println!("cargo:rerun-if-changed=wrapper.cpp");
|
||||||
|
println!("cargo:rerun-if-changed=wrapper.h");
|
||||||
|
|
||||||
|
let out_path = PathBuf::from(env::var("OUT_DIR").unwrap());
|
||||||
|
|
||||||
|
if pkg_config::Config::new()
|
||||||
|
.atleast_version("0.2.0")
|
||||||
|
.probe("marisa")
|
||||||
|
.is_ok()
|
||||||
|
{
|
||||||
|
println!("cargo:rustc-link-lib=static=marisa");
|
||||||
|
} else {
|
||||||
|
println!("cargo:rustc-link-lib=static=marisa");
|
||||||
|
println!("cargo:rustc-link-search=native=/usr/local/lib");
|
||||||
|
println!("cargo:rustc-link-search=native=/usr/lib");
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut build = cc::Build::new();
|
||||||
|
build
|
||||||
|
.cpp(true)
|
||||||
|
.file("wrapper.cpp")
|
||||||
|
.flag_if_supported("-std=c++17");
|
||||||
|
|
||||||
|
if let Ok(cpath) = env::var("CPATH") {
|
||||||
|
for path in cpath.split(':') {
|
||||||
|
if !path.is_empty() {
|
||||||
|
build.include(path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
build
|
||||||
|
.include("/usr/local/include")
|
||||||
|
.include("/usr/include");
|
||||||
|
|
||||||
|
build.compile("marisa_wrapper");
|
||||||
|
|
||||||
|
let bindings = bindgen::Builder::default()
|
||||||
|
.header("wrapper.h")
|
||||||
|
.parse_callbacks(Box::new(bindgen::CargoCallbacks::new()))
|
||||||
|
.rust_target(bindgen::RustTarget::Stable_1_47)
|
||||||
|
.generate_inline_functions(false)
|
||||||
|
.generate_comments(false)
|
||||||
|
.generate()
|
||||||
|
.expect("Unable to generate bindings");
|
||||||
|
|
||||||
|
bindings
|
||||||
|
.write_to_file(out_path.join("bindings.rs"))
|
||||||
|
.expect("Couldn't write bindings!");
|
||||||
|
}
|
||||||
96
flake.lock
generated
Normal file
96
flake.lock
generated
Normal file
|
|
@ -0,0 +1,96 @@
|
||||||
|
{
|
||||||
|
"nodes": {
|
||||||
|
"flake-utils": {
|
||||||
|
"inputs": {
|
||||||
|
"systems": "systems"
|
||||||
|
},
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1731533236,
|
||||||
|
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "flake-utils",
|
||||||
|
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "flake-utils",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nixpkgs": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1752950548,
|
||||||
|
"narHash": "sha256-NS6BLD0lxOrnCiEOcvQCDVPXafX1/ek1dfJHX1nUIzc=",
|
||||||
|
"owner": "NixOS",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"rev": "c87b95e25065c028d31a94f06a62927d18763fdf",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "NixOS",
|
||||||
|
"ref": "nixos-unstable",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nixpkgs_2": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1744536153,
|
||||||
|
"narHash": "sha256-awS2zRgF4uTwrOKwwiJcByDzDOdo3Q1rPZbiHQg/N38=",
|
||||||
|
"owner": "NixOS",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"rev": "18dd725c29603f582cf1900e0d25f9f1063dbf11",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "NixOS",
|
||||||
|
"ref": "nixpkgs-unstable",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"root": {
|
||||||
|
"inputs": {
|
||||||
|
"flake-utils": "flake-utils",
|
||||||
|
"nixpkgs": "nixpkgs",
|
||||||
|
"rust-overlay": "rust-overlay"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"rust-overlay": {
|
||||||
|
"inputs": {
|
||||||
|
"nixpkgs": "nixpkgs_2"
|
||||||
|
},
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1753238793,
|
||||||
|
"narHash": "sha256-jmQeEpgX+++MEgrcikcwoSiI7vDZWLP0gci7XiWb9uQ=",
|
||||||
|
"owner": "oxalica",
|
||||||
|
"repo": "rust-overlay",
|
||||||
|
"rev": "0ad7ab4ca8e83febf147197e65c006dff60623ab",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "oxalica",
|
||||||
|
"repo": "rust-overlay",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systems": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1681028828,
|
||||||
|
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"root": "root",
|
||||||
|
"version": 7
|
||||||
|
}
|
||||||
85
flake.nix
Normal file
85
flake.nix
Normal file
|
|
@ -0,0 +1,85 @@
|
||||||
|
{
|
||||||
|
description = "Rust wrapper for marisa-trie C++ library";
|
||||||
|
|
||||||
|
inputs = {
|
||||||
|
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
||||||
|
rust-overlay.url = "github:oxalica/rust-overlay";
|
||||||
|
flake-utils.url = "github:numtide/flake-utils";
|
||||||
|
};
|
||||||
|
|
||||||
|
outputs = { self, nixpkgs, rust-overlay, flake-utils }:
|
||||||
|
flake-utils.lib.eachDefaultSystem (system:
|
||||||
|
let
|
||||||
|
overlays = [ (import rust-overlay) ];
|
||||||
|
pkgs = import nixpkgs {
|
||||||
|
inherit system overlays;
|
||||||
|
};
|
||||||
|
|
||||||
|
rustToolchain = pkgs.rust-bin.stable.latest.default.override {
|
||||||
|
extensions = [ "rust-src" "clippy" "rustfmt" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
# Use nixpkgs marisa-trie package
|
||||||
|
marisa-trie = pkgs.marisa;
|
||||||
|
|
||||||
|
in
|
||||||
|
{
|
||||||
|
devShells.default = pkgs.mkShell {
|
||||||
|
buildInputs = with pkgs; [
|
||||||
|
rustToolchain
|
||||||
|
marisa-trie
|
||||||
|
|
||||||
|
# Build tools
|
||||||
|
cmake
|
||||||
|
gcc
|
||||||
|
pkg-config
|
||||||
|
clang
|
||||||
|
llvmPackages.libclang.lib
|
||||||
|
|
||||||
|
# Development tools
|
||||||
|
rust-analyzer
|
||||||
|
clippy
|
||||||
|
rustfmt
|
||||||
|
|
||||||
|
# C++ development
|
||||||
|
gdb
|
||||||
|
valgrind
|
||||||
|
];
|
||||||
|
|
||||||
|
shellHook = ''
|
||||||
|
echo "Rust marisa-trie development environment"
|
||||||
|
echo "Rust version: $(rustc --version)"
|
||||||
|
echo "Cargo version: $(cargo --version)"
|
||||||
|
echo "marisa-trie library available at: ${marisa-trie}"
|
||||||
|
|
||||||
|
export PKG_CONFIG_PATH="${marisa-trie}/lib/pkgconfig:$PKG_CONFIG_PATH"
|
||||||
|
export LD_LIBRARY_PATH="${marisa-trie}/lib:$LD_LIBRARY_PATH"
|
||||||
|
export LIBRARY_PATH="${marisa-trie}/lib:$LIBRARY_PATH"
|
||||||
|
export CPATH="${marisa-trie}/include:$CPATH"
|
||||||
|
export LIBCLANG_PATH="${pkgs.llvmPackages.libclang.lib}/lib"
|
||||||
|
'';
|
||||||
|
|
||||||
|
RUST_SRC_PATH = "${rustToolchain}/lib/rustlib/src/rust/library";
|
||||||
|
};
|
||||||
|
|
||||||
|
packages.default = pkgs.rustPlatform.buildRustPackage {
|
||||||
|
pname = "marisa-rs";
|
||||||
|
version = "0.1.0";
|
||||||
|
|
||||||
|
src = ./.;
|
||||||
|
|
||||||
|
cargoLock = {
|
||||||
|
lockFile = ./Cargo.lock;
|
||||||
|
};
|
||||||
|
|
||||||
|
buildInputs = [ marisa-trie ];
|
||||||
|
nativeBuildInputs = with pkgs; [ cmake gcc pkg-config ];
|
||||||
|
|
||||||
|
meta = with pkgs.lib; {
|
||||||
|
description = "Rust wrapper for marisa-trie C++ library";
|
||||||
|
license = with licenses; [ mit asl20 ];
|
||||||
|
platforms = platforms.unix;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
333
src/lib.rs
Normal file
333
src/lib.rs
Normal file
|
|
@ -0,0 +1,333 @@
|
||||||
|
//! # marisa-rs
|
||||||
|
//!
|
||||||
|
//! Safe Rust wrapper for the marisa-trie C++ library.
|
||||||
|
//!
|
||||||
|
//! marisa-trie is a static and space-efficient trie data structure library.
|
||||||
|
//! This crate provides safe Rust bindings to the C++ library.
|
||||||
|
//!
|
||||||
|
//! ## Example
|
||||||
|
//!
|
||||||
|
//! ```rust
|
||||||
|
//! use marisa_rs::{Keyset, Trie};
|
||||||
|
//!
|
||||||
|
//! let mut keyset = Keyset::new();
|
||||||
|
//! keyset.push("apple");
|
||||||
|
//! keyset.push("application");
|
||||||
|
//! keyset.push("apply");
|
||||||
|
//!
|
||||||
|
//! let mut trie = Trie::new();
|
||||||
|
//! trie.build(&mut keyset).unwrap();
|
||||||
|
//!
|
||||||
|
//! // Lookup
|
||||||
|
//! assert!(trie.lookup("apple").is_some());
|
||||||
|
//! assert!(trie.lookup("orange").is_none());
|
||||||
|
//!
|
||||||
|
//! // Common prefix search
|
||||||
|
//! trie.common_prefix_search("application", |key, id| {
|
||||||
|
//! println!("Found: {} (ID: {})", key, id);
|
||||||
|
//! });
|
||||||
|
//! ```
|
||||||
|
|
||||||
|
use std::slice;
|
||||||
|
|
||||||
|
mod bindings {
|
||||||
|
include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
|
||||||
|
}
|
||||||
|
|
||||||
|
use bindings::*;
|
||||||
|
|
||||||
|
/// A keyset for building a trie.
|
||||||
|
///
|
||||||
|
/// Keyset is used to store a collection of keys before building a trie.
|
||||||
|
/// Keys can be added with different weights.
|
||||||
|
pub struct Keyset {
|
||||||
|
inner: *mut MarisaKeyset,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Keyset {
|
||||||
|
/// Creates a new empty keyset.
|
||||||
|
pub fn new() -> Self {
|
||||||
|
unsafe {
|
||||||
|
let inner = marisa_keyset_new();
|
||||||
|
Keyset { inner }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Adds a key with the specified weight to the keyset.
|
||||||
|
pub fn push_back(&mut self, key: &str, weight: f32) {
|
||||||
|
let key_bytes = key.as_bytes();
|
||||||
|
unsafe {
|
||||||
|
marisa_keyset_push_back(
|
||||||
|
self.inner,
|
||||||
|
key_bytes.as_ptr() as *const i8,
|
||||||
|
key_bytes.len(),
|
||||||
|
weight,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Adds a key with default weight (1.0) to the keyset.
|
||||||
|
pub fn push(&mut self, key: &str) {
|
||||||
|
self.push_back(key, 1.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the number of keys in the keyset.
|
||||||
|
pub fn size(&self) -> usize {
|
||||||
|
unsafe { marisa_keyset_size(self.inner) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if the keyset is empty.
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.size() == 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for Keyset {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
unsafe {
|
||||||
|
marisa_keyset_delete(self.inner);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A trie data structure for efficient string lookups.
|
||||||
|
///
|
||||||
|
/// The trie must be built from a keyset before it can be used for lookups.
|
||||||
|
pub struct Trie {
|
||||||
|
inner: *mut MarisaTrie,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Trie {
|
||||||
|
/// Creates a new empty trie.
|
||||||
|
pub fn new() -> Self {
|
||||||
|
unsafe {
|
||||||
|
let inner = marisa_trie_new();
|
||||||
|
Trie { inner }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Builds the trie from the given keyset.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns an error if the trie cannot be built from the keyset.
|
||||||
|
pub fn build(&mut self, keyset: &mut Keyset) -> Result<(), &'static str> {
|
||||||
|
unsafe {
|
||||||
|
if marisa_trie_build(self.inner, keyset.inner) == 1 {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err("Failed to build trie")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Looks up a key in the trie and returns its ID if found.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// - `Some(id)` if the key is found in the trie
|
||||||
|
/// - `None` if the key is not found
|
||||||
|
pub fn lookup(&self, key: &str) -> Option<usize> {
|
||||||
|
let mut agent = Agent::new();
|
||||||
|
agent.set_query(key);
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
if marisa_trie_lookup(self.inner, agent.inner) == 1 {
|
||||||
|
Some(agent.key_id())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Performs reverse lookup to get the key corresponding to the given ID.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns an error if the ID is not valid.
|
||||||
|
pub fn reverse_lookup(&self, id: usize) -> Result<String, &'static str> {
|
||||||
|
let mut agent = Agent::new();
|
||||||
|
agent.set_query_by_id(id);
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
if marisa_trie_reverse_lookup(self.inner, agent.inner) == 1 {
|
||||||
|
Ok(agent.key_string())
|
||||||
|
} else {
|
||||||
|
Err("Failed to reverse lookup")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Searches for all keys that are prefixes of the given query.
|
||||||
|
///
|
||||||
|
/// The callback function is called for each matching key with the key and its ID.
|
||||||
|
pub fn common_prefix_search<F>(&self, query: &str, mut callback: F)
|
||||||
|
where
|
||||||
|
F: FnMut(&str, usize),
|
||||||
|
{
|
||||||
|
let mut agent = Agent::new();
|
||||||
|
agent.set_query(query);
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
while marisa_trie_common_prefix_search(self.inner, agent.inner) == 1 {
|
||||||
|
let key = agent.key_string();
|
||||||
|
let id = agent.key_id();
|
||||||
|
callback(&key, id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Searches for all keys that have the given query as a prefix.
|
||||||
|
///
|
||||||
|
/// The callback function is called for each matching key with the key and its ID.
|
||||||
|
pub fn predictive_search<F>(&self, query: &str, mut callback: F)
|
||||||
|
where
|
||||||
|
F: FnMut(&str, usize),
|
||||||
|
{
|
||||||
|
let mut agent = Agent::new();
|
||||||
|
agent.set_query(query);
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
while marisa_trie_predictive_search(self.inner, agent.inner) == 1 {
|
||||||
|
let key = agent.key_string();
|
||||||
|
let id = agent.key_id();
|
||||||
|
callback(&key, id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the number of keys stored in the trie.
|
||||||
|
pub fn size(&self) -> usize {
|
||||||
|
unsafe { marisa_trie_size(self.inner) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if the trie is empty.
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.size() == 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for Trie {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
unsafe {
|
||||||
|
marisa_trie_delete(self.inner);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An agent for performing trie operations.
|
||||||
|
///
|
||||||
|
/// Agent is used internally for trie operations and should not be used directly
|
||||||
|
/// in most cases.
|
||||||
|
pub struct Agent {
|
||||||
|
inner: *mut MarisaAgent,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Agent {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
unsafe {
|
||||||
|
let inner = marisa_agent_new();
|
||||||
|
Agent { inner }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_query(&mut self, query: &str) {
|
||||||
|
let query_bytes = query.as_bytes();
|
||||||
|
unsafe {
|
||||||
|
marisa_agent_set_query(
|
||||||
|
self.inner,
|
||||||
|
query_bytes.as_ptr() as *const i8,
|
||||||
|
query_bytes.len(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_query_by_id(&mut self, id: usize) {
|
||||||
|
unsafe {
|
||||||
|
marisa_agent_set_query_by_id(self.inner, id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn key_string(&self) -> String {
|
||||||
|
unsafe {
|
||||||
|
let ptr = marisa_agent_key_ptr(self.inner);
|
||||||
|
let len = marisa_agent_key_length(self.inner);
|
||||||
|
let slice = slice::from_raw_parts(ptr as *const u8, len);
|
||||||
|
String::from_utf8_lossy(slice).into_owned()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn key_id(&self) -> usize {
|
||||||
|
unsafe { marisa_agent_key_id(self.inner) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for Agent {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
unsafe {
|
||||||
|
marisa_agent_delete(self.inner);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe impl Send for Keyset {}
|
||||||
|
unsafe impl Send for Trie {}
|
||||||
|
unsafe impl Send for Agent {}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_basic_operations() {
|
||||||
|
let mut keyset = Keyset::new();
|
||||||
|
keyset.push("apple");
|
||||||
|
keyset.push("application");
|
||||||
|
keyset.push("apply");
|
||||||
|
keyset.push("apricot");
|
||||||
|
|
||||||
|
assert_eq!(keyset.size(), 4);
|
||||||
|
|
||||||
|
let mut trie = Trie::new();
|
||||||
|
trie.build(&mut keyset).expect("Failed to build trie");
|
||||||
|
|
||||||
|
assert_eq!(trie.size(), 4);
|
||||||
|
|
||||||
|
// Test lookup
|
||||||
|
assert!(trie.lookup("apple").is_some());
|
||||||
|
assert!(trie.lookup("banana").is_none());
|
||||||
|
|
||||||
|
// Test reverse lookup
|
||||||
|
if let Some(id) = trie.lookup("apple") {
|
||||||
|
assert_eq!(trie.reverse_lookup(id).unwrap(), "apple");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test common prefix search
|
||||||
|
let mut results = Vec::new();
|
||||||
|
trie.common_prefix_search("application", |key, id| {
|
||||||
|
results.push((key.to_string(), id));
|
||||||
|
});
|
||||||
|
assert!(results.len() > 0);
|
||||||
|
|
||||||
|
// Test predictive search
|
||||||
|
let mut results = Vec::new();
|
||||||
|
trie.predictive_search("app", |key, id| {
|
||||||
|
results.push((key.to_string(), id));
|
||||||
|
});
|
||||||
|
assert!(results.len() > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_empty_keyset() {
|
||||||
|
let keyset = Keyset::new();
|
||||||
|
assert!(keyset.is_empty());
|
||||||
|
assert_eq!(keyset.size(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_empty_trie() {
|
||||||
|
let trie = Trie::new();
|
||||||
|
assert!(trie.is_empty());
|
||||||
|
assert_eq!(trie.size(), 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
50
src/main.rs
Normal file
50
src/main.rs
Normal file
|
|
@ -0,0 +1,50 @@
|
||||||
|
use marisa_rs::{Keyset, Trie};
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
println!("Marisa Trie Demo");
|
||||||
|
println!("================");
|
||||||
|
|
||||||
|
let mut keyset = Keyset::new();
|
||||||
|
let words = vec!["apple", "application", "apply", "apricot"];
|
||||||
|
|
||||||
|
println!("Adding words to keyset:");
|
||||||
|
for word in &words {
|
||||||
|
keyset.push(word);
|
||||||
|
println!(" - {}", word);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut trie = Trie::new();
|
||||||
|
match trie.build(&mut keyset) {
|
||||||
|
Ok(()) => println!("\nTrie built successfully! Size: {}", trie.size()),
|
||||||
|
Err(e) => {
|
||||||
|
println!("Failed to build trie: {}", e);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("\n--- Lookup Test ---");
|
||||||
|
for word in &words {
|
||||||
|
match trie.lookup(word) {
|
||||||
|
Some(id) => println!("'{}' found with ID: {}", word, id),
|
||||||
|
None => println!("'{}' not found", word),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("\n--- Reverse Lookup Test ---");
|
||||||
|
for i in 0..trie.size() {
|
||||||
|
match trie.reverse_lookup(i) {
|
||||||
|
Ok(word) => println!("ID {} -> '{}'", i, word),
|
||||||
|
Err(e) => println!("ID {}: {}", i, e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("\n--- Common Prefix Search for 'application' ---");
|
||||||
|
trie.common_prefix_search("application", |key, id| {
|
||||||
|
println!(" Found: '{}' (ID: {})", key, id);
|
||||||
|
});
|
||||||
|
|
||||||
|
println!("\n--- Predictive Search for 'app' ---");
|
||||||
|
trie.predictive_search("app", |key, id| {
|
||||||
|
println!(" Found: '{}' (ID: {})", key, id);
|
||||||
|
});
|
||||||
|
}
|
||||||
123
wrapper.cpp
Normal file
123
wrapper.cpp
Normal file
|
|
@ -0,0 +1,123 @@
|
||||||
|
#include "wrapper.h"
|
||||||
|
#include <marisa.h>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
|
||||||
|
MarisaKeyset* marisa_keyset_new() {
|
||||||
|
return reinterpret_cast<MarisaKeyset*>(new marisa::Keyset());
|
||||||
|
}
|
||||||
|
|
||||||
|
void marisa_keyset_delete(MarisaKeyset* keyset) {
|
||||||
|
delete reinterpret_cast<marisa::Keyset*>(keyset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void marisa_keyset_push_back(MarisaKeyset* keyset, const char* key, size_t length, float weight) {
|
||||||
|
marisa::Keyset* ks = reinterpret_cast<marisa::Keyset*>(keyset);
|
||||||
|
ks->push_back(std::string(key, length), weight);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t marisa_keyset_size(const MarisaKeyset* keyset) {
|
||||||
|
const marisa::Keyset* ks = reinterpret_cast<const marisa::Keyset*>(keyset);
|
||||||
|
return ks->size();
|
||||||
|
}
|
||||||
|
|
||||||
|
MarisaTrie* marisa_trie_new() {
|
||||||
|
return reinterpret_cast<MarisaTrie*>(new marisa::Trie());
|
||||||
|
}
|
||||||
|
|
||||||
|
void marisa_trie_delete(MarisaTrie* trie) {
|
||||||
|
delete reinterpret_cast<marisa::Trie*>(trie);
|
||||||
|
}
|
||||||
|
|
||||||
|
int marisa_trie_build(MarisaTrie* trie, MarisaKeyset* keyset) {
|
||||||
|
try {
|
||||||
|
marisa::Trie* tr = reinterpret_cast<marisa::Trie*>(trie);
|
||||||
|
marisa::Keyset* ks = reinterpret_cast<marisa::Keyset*>(keyset);
|
||||||
|
tr->build(*ks);
|
||||||
|
return 1;
|
||||||
|
} catch (...) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int marisa_trie_lookup(const MarisaTrie* trie, MarisaAgent* agent) {
|
||||||
|
try {
|
||||||
|
const marisa::Trie* tr = reinterpret_cast<const marisa::Trie*>(trie);
|
||||||
|
marisa::Agent* ag = reinterpret_cast<marisa::Agent*>(agent);
|
||||||
|
return tr->lookup(*ag) ? 1 : 0;
|
||||||
|
} catch (...) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int marisa_trie_reverse_lookup(const MarisaTrie* trie, MarisaAgent* agent) {
|
||||||
|
try {
|
||||||
|
const marisa::Trie* tr = reinterpret_cast<const marisa::Trie*>(trie);
|
||||||
|
marisa::Agent* ag = reinterpret_cast<marisa::Agent*>(agent);
|
||||||
|
tr->reverse_lookup(*ag);
|
||||||
|
return 1;
|
||||||
|
} catch (...) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int marisa_trie_common_prefix_search(const MarisaTrie* trie, MarisaAgent* agent) {
|
||||||
|
try {
|
||||||
|
const marisa::Trie* tr = reinterpret_cast<const marisa::Trie*>(trie);
|
||||||
|
marisa::Agent* ag = reinterpret_cast<marisa::Agent*>(agent);
|
||||||
|
return tr->common_prefix_search(*ag) ? 1 : 0;
|
||||||
|
} catch (...) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int marisa_trie_predictive_search(const MarisaTrie* trie, MarisaAgent* agent) {
|
||||||
|
try {
|
||||||
|
const marisa::Trie* tr = reinterpret_cast<const marisa::Trie*>(trie);
|
||||||
|
marisa::Agent* ag = reinterpret_cast<marisa::Agent*>(agent);
|
||||||
|
return tr->predictive_search(*ag) ? 1 : 0;
|
||||||
|
} catch (...) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t marisa_trie_size(const MarisaTrie* trie) {
|
||||||
|
const marisa::Trie* tr = reinterpret_cast<const marisa::Trie*>(trie);
|
||||||
|
return tr->size();
|
||||||
|
}
|
||||||
|
|
||||||
|
MarisaAgent* marisa_agent_new() {
|
||||||
|
return reinterpret_cast<MarisaAgent*>(new marisa::Agent());
|
||||||
|
}
|
||||||
|
|
||||||
|
void marisa_agent_delete(MarisaAgent* agent) {
|
||||||
|
delete reinterpret_cast<marisa::Agent*>(agent);
|
||||||
|
}
|
||||||
|
|
||||||
|
void marisa_agent_set_query(MarisaAgent* agent, const char* query, size_t length) {
|
||||||
|
marisa::Agent* ag = reinterpret_cast<marisa::Agent*>(agent);
|
||||||
|
ag->set_query(query, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
void marisa_agent_set_query_by_id(MarisaAgent* agent, size_t id) {
|
||||||
|
marisa::Agent* ag = reinterpret_cast<marisa::Agent*>(agent);
|
||||||
|
ag->set_query(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
const char* marisa_agent_key_ptr(const MarisaAgent* agent) {
|
||||||
|
const marisa::Agent* ag = reinterpret_cast<const marisa::Agent*>(agent);
|
||||||
|
return ag->key().str().data();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t marisa_agent_key_length(const MarisaAgent* agent) {
|
||||||
|
const marisa::Agent* ag = reinterpret_cast<const marisa::Agent*>(agent);
|
||||||
|
return ag->key().str().length();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t marisa_agent_key_id(const MarisaAgent* agent) {
|
||||||
|
const marisa::Agent* ag = reinterpret_cast<const marisa::Agent*>(agent);
|
||||||
|
return ag->key().id();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
41
wrapper.h
Normal file
41
wrapper.h
Normal file
|
|
@ -0,0 +1,41 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
typedef struct MarisaTrie MarisaTrie;
|
||||||
|
typedef struct MarisaKeyset MarisaKeyset;
|
||||||
|
typedef struct MarisaAgent MarisaAgent;
|
||||||
|
|
||||||
|
// Keyset functions
|
||||||
|
MarisaKeyset* marisa_keyset_new();
|
||||||
|
void marisa_keyset_delete(MarisaKeyset* keyset);
|
||||||
|
void marisa_keyset_push_back(MarisaKeyset* keyset, const char* key, size_t length, float weight);
|
||||||
|
size_t marisa_keyset_size(const MarisaKeyset* keyset);
|
||||||
|
|
||||||
|
// Trie functions
|
||||||
|
MarisaTrie* marisa_trie_new();
|
||||||
|
void marisa_trie_delete(MarisaTrie* trie);
|
||||||
|
int marisa_trie_build(MarisaTrie* trie, MarisaKeyset* keyset);
|
||||||
|
int marisa_trie_lookup(const MarisaTrie* trie, MarisaAgent* agent);
|
||||||
|
int marisa_trie_reverse_lookup(const MarisaTrie* trie, MarisaAgent* agent);
|
||||||
|
int marisa_trie_common_prefix_search(const MarisaTrie* trie, MarisaAgent* agent);
|
||||||
|
int marisa_trie_predictive_search(const MarisaTrie* trie, MarisaAgent* agent);
|
||||||
|
size_t marisa_trie_size(const MarisaTrie* trie);
|
||||||
|
|
||||||
|
// Agent functions
|
||||||
|
MarisaAgent* marisa_agent_new();
|
||||||
|
void marisa_agent_delete(MarisaAgent* agent);
|
||||||
|
void marisa_agent_set_query(MarisaAgent* agent, const char* query, size_t length);
|
||||||
|
void marisa_agent_set_query_by_id(MarisaAgent* agent, size_t id);
|
||||||
|
const char* marisa_agent_key_ptr(const MarisaAgent* agent);
|
||||||
|
size_t marisa_agent_key_length(const MarisaAgent* agent);
|
||||||
|
size_t marisa_agent_key_id(const MarisaAgent* agent);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
Loading…
Add table
Reference in a new issue