diff --git a/.gitignore b/.gitignore index aa8d4f1..b197cae 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,4 @@ Thumbs.db *.swp *.swo output_tests +target/* diff --git a/Cargo.lock b/Cargo.lock index 849d4b1..b61328c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,20 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "getrandom 0.3.1", + "once_cell", + "serde", + "version_check", + "zerocopy 0.8.26", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -211,12 +225,24 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + [[package]] name = "base64" version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bitflags" version = "1.3.2" @@ -247,6 +273,15 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +[[package]] +name = "castaway" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0abae9be0aaf9ea96a3b1b8b1b55c602ca751eba1b1500220cea4ecbafe7c0d5" +dependencies = [ + "rustversion", +] + [[package]] name = "cc" version = "1.2.16" @@ -348,6 +383,34 @@ dependencies = [ "memchr", ] +[[package]] +name = "compact_str" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "serde", + "static_assertions", +] + +[[package]] +name = "console" +version = "0.15.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "unicode-width", + "windows-sys 0.59.0", +] + [[package]] name = "convert_case" version = "0.6.0" @@ -375,7 +438,7 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "cratedocs-mcp" -version = "0.1.0" +version = "0.2.0" dependencies = [ "anyhow", "axum", @@ -388,9 +451,11 @@ dependencies = [ "mcp-server", "mockito", "rand 0.8.5", + "regex", "reqwest", "serde", "serde_json", + "tokenizers", "tokio", "tokio-util", "tower 0.4.13", @@ -400,6 +465,15 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + [[package]] name = "crossbeam-channel" version = "0.5.14" @@ -409,12 +483,75 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core", + "quote", + "syn", +] + +[[package]] +name = "dary_heap" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728" +dependencies = [ + "serde", +] + [[package]] name = "deranged" version = "0.3.11" @@ -424,6 +561,58 @@ dependencies = [ "powerfmt", ] +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn", +] + +[[package]] +name = "dirs" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.59.0", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -441,6 +630,18 @@ version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c7a8fb8a9fbf66c1f703fe16184d10ca0ee9d23be5b4436400408ba54a95005" +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "encoding_rs" version = "0.8.35" @@ -466,12 +667,31 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "esaxx-rs" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6" +dependencies = [ + "cc", +] + [[package]] name = "fastrand" version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "flate2" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "fnv" version = "1.0.7" @@ -680,6 +900,25 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hf-hub" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "629d8f3bbeda9d148036d6b0de0a3ab947abd08ce90626327fc3547a49d59d97" +dependencies = [ + "dirs", + "http 1.2.0", + "indicatif", + "libc", + "log", + "rand 0.9.0", + "serde", + "serde_json", + "thiserror 2.0.12", + "ureq", + "windows-sys 0.60.2", +] + [[package]] name = "html2md" version = "0.2.15" @@ -990,6 +1229,12 @@ dependencies = [ "syn", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.0.3" @@ -1021,6 +1266,19 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "indicatif" +version = "0.17.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" +dependencies = [ + "console", + "number_prefix", + "portable-atomic", + "unicode-width", + "web-time", +] + [[package]] name = "ipnet" version = "2.11.0" @@ -1033,6 +1291,15 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.15" @@ -1049,7 +1316,7 @@ dependencies = [ "combine", "jni-sys", "log", - "thiserror", + "thiserror 1.0.69", "walkdir", ] @@ -1081,6 +1348,16 @@ version = "0.2.170" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828" +[[package]] +name = "libredox" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1580801010e535496706ba011c15f8532df6b42297d2e471fec38ceadd8c0638" +dependencies = [ + "bitflags 2.9.0", + "libc", +] + [[package]] name = "linux-raw-sys" version = "0.9.2" @@ -1115,6 +1392,22 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" +[[package]] +name = "macro_rules_attribute" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65049d7923698040cd0b1ddcced9b0eb14dd22c5f86ae59c3740eab64a676520" +dependencies = [ + "macro_rules_attribute-proc_macro", + "paste", +] + +[[package]] +name = "macro_rules_attribute-proc_macro" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670fdfda89751bc4a84ac13eaa63e205cf0fd22b4c9a5fbfa085b63c1f1d3a30" + [[package]] name = "markup5ever" version = "0.12.1" @@ -1163,12 +1456,12 @@ source = "git+https://github.com/modelcontextprotocol/rust-sdk?rev=c0bd94dd85a35 dependencies = [ "anyhow", "async-trait", - "base64", + "base64 0.21.7", "chrono", "schemars", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", "url", ] @@ -1202,7 +1495,7 @@ dependencies = [ "schemars", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", "tokio", "tower 0.4.13", "tower-service", @@ -1223,6 +1516,12 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.8.5" @@ -1267,6 +1566,27 @@ dependencies = [ "tokio", ] +[[package]] +name = "monostate" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aafe1be9d0c75642e3e50fedc7ecadf1ef1cbce6eb66462153fc44245343fbee" +dependencies = [ + "monostate-impl", + "serde", +] + +[[package]] +name = "monostate-impl" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c402a4092d5e204f32c9e155431046831fa712637043c58cb73bc6bc6c9663b5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "native-tls" version = "0.2.14" @@ -1290,6 +1610,16 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -1315,6 +1645,12 @@ dependencies = [ "autocfg", ] +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + [[package]] name = "object" version = "0.36.7" @@ -1330,6 +1666,28 @@ version = "1.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" +[[package]] +name = "onig" +version = "6.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0" +dependencies = [ + "bitflags 2.9.0", + "libc", + "once_cell", + "onig_sys", +] + +[[package]] +name = "onig_sys" +version = "69.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7f86c6eef3d6df15f23bcfb6af487cbd2fed4e5581d58d5bf1f5f8b7f6727dc" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "openssl" version = "0.10.71" @@ -1374,6 +1732,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "overload" version = "0.1.1" @@ -1403,6 +1767,12 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "percent-encoding" version = "2.3.1" @@ -1485,6 +1855,12 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "portable-atomic" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" + [[package]] name = "powerfmt" version = "0.2.0" @@ -1543,7 +1919,7 @@ checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" dependencies = [ "rand_chacha 0.9.0", "rand_core 0.9.3", - "zerocopy 0.8.23", + "zerocopy 0.8.26", ] [[package]] @@ -1584,6 +1960,37 @@ dependencies = [ "getrandom 0.3.1", ] +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-cond" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2964d0cf57a3e7a06e8183d14a8b527195c706b7983549cd5462d5aa3747438f" +dependencies = [ + "either", + "itertools", + "rayon", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.10" @@ -1593,6 +2000,17 @@ dependencies = [ "bitflags 2.9.0", ] +[[package]] +name = "redox_users" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b" +dependencies = [ + "getrandom 0.2.15", + "libredox", + "thiserror 2.0.12", +] + [[package]] name = "regex" version = "1.11.1" @@ -1643,7 +2061,7 @@ version = "0.11.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" dependencies = [ - "base64", + "base64 0.21.7", "bytes", "encoding_rs", "futures-core", @@ -1677,6 +2095,20 @@ dependencies = [ "winreg", ] +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.15", + "libc", + "untrusted", + "windows-sys 0.52.0", +] + [[package]] name = "rustc-demangle" version = "0.1.24" @@ -1696,13 +2128,48 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "rustls" +version = "0.23.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7160e3e10bf4535308537f3c4e1641468cd0e485175d6163087c0393c7d46643" +dependencies = [ + "log", + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + [[package]] name = "rustls-pemfile" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" dependencies = [ - "base64", + "base64 0.21.7", +] + +[[package]] +name = "rustls-pki-types" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79" +dependencies = [ + "zeroize", +] + +[[package]] +name = "rustls-webpki" +version = "0.103.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4a72fe2bcf7a6ac6fd7d0b9e5cb68aeb7d4c0a0271730218b3e92d43b4eb435" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", ] [[package]] @@ -1914,12 +2381,41 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "socks" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c3dbbd9ae980613c6dd8e28a9407b50509d3803b57624d5dfe8315218cd58b" +dependencies = [ + "byteorder", + "libc", + "winapi", +] + +[[package]] +name = "spm_precompiled" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5851699c4033c63636f7ea4cf7b7c1f1bf06d0cc03cfb42e711de5a5c46cf326" +dependencies = [ + "base64 0.13.1", + "nom", + "serde", + "unicode-segmentation", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "string_cache" version = "0.8.8" @@ -1951,6 +2447,12 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + [[package]] name = "syn" version = "2.0.99" @@ -2037,7 +2539,16 @@ version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" +dependencies = [ + "thiserror-impl 2.0.12", ] [[package]] @@ -2051,6 +2562,17 @@ dependencies = [ "syn", ] +[[package]] +name = "thiserror-impl" +version = "2.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "thread_local" version = "1.1.8" @@ -2102,6 +2624,41 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tokenizers" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c3846d8588abed0daba25a0e47edd58ea15e450a6088b2575f5116fdb0b27ca" +dependencies = [ + "ahash", + "aho-corasick", + "compact_str", + "dary_heap", + "derive_builder", + "esaxx-rs", + "getrandom 0.3.1", + "hf-hub", + "indicatif", + "itertools", + "log", + "macro_rules_attribute", + "monostate", + "onig", + "paste", + "rand 0.9.0", + "rayon", + "rayon-cond", + "regex", + "regex-syntax 0.8.5", + "serde", + "serde_json", + "spm_precompiled", + "thiserror 2.0.12", + "unicode-normalization-alignments", + "unicode-segmentation", + "unicode_categories", +] + [[package]] name = "tokio" version = "1.43.0" @@ -2217,7 +2774,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3566e8ce28cc0a3fe42519fc80e6b4c943cc4c8cef275620eb8dac2d3d4e06cf" dependencies = [ "crossbeam-channel", - "thiserror", + "thiserror 1.0.69", "time", "tracing-subscriber", ] @@ -2284,12 +2841,58 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unicode-normalization-alignments" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de" +dependencies = [ + "smallvec", +] + [[package]] name = "unicode-segmentation" version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +[[package]] +name = "unicode-width" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" + +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "ureq" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d" +dependencies = [ + "base64 0.22.1", + "flate2", + "log", + "once_cell", + "rustls", + "rustls-pki-types", + "serde", + "serde_json", + "socks", + "url", + "webpki-roots 0.26.11", +] + [[package]] name = "url" version = "2.5.4" @@ -2337,6 +2940,12 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "walkdir" version = "2.5.0" @@ -2452,6 +3061,34 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "webpki-roots" +version = "0.26.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" +dependencies = [ + "webpki-roots 1.0.1", +] + +[[package]] +name = "webpki-roots" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8782dd5a41a24eed3a4f40b606249b3e236ca61adf1f25ea4d45c73de122b502" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "winapi" version = "0.3.9" @@ -2525,6 +3162,15 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.2", +] + [[package]] name = "windows-targets" version = "0.48.5" @@ -2549,13 +3195,29 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows-targets" +version = "0.53.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef" +dependencies = [ + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -2568,6 +3230,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -2580,6 +3248,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -2592,12 +3266,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -2610,6 +3296,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -2622,6 +3314,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -2634,6 +3332,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -2646,6 +3350,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + [[package]] name = "winreg" version = "0.50.0" @@ -2724,11 +3434,11 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.23" +version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd97444d05a4328b90e75e503a34bad781f14e28a823ad3557f0750df1ebcbc6" +checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" dependencies = [ - "zerocopy-derive 0.8.23", + "zerocopy-derive 0.8.26", ] [[package]] @@ -2744,9 +3454,9 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.8.23" +version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6352c01d0edd5db859a63e2605f4ea3183ddbd15e2c4a9e7d32184df75e4f154" +checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" dependencies = [ "proc-macro2", "quote", @@ -2774,6 +3484,12 @@ dependencies = [ "synstructure", ] +[[package]] +name = "zeroize" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" + [[package]] name = "zerovec" version = "0.10.4" diff --git a/Cargo.toml b/Cargo.toml index c4a2f9f..c153ee9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,10 @@ [package] name = "cratedocs-mcp" -version = "0.1.0" +version = "0.2.0" edition = "2021" description = "Rust Documentation MCP Server for LLM crate assistance" -authors = ["Claude "] +authors = ["Brian Horakh ", +"Claude "] license = "MIT" repository = "https://github.com/d6e/cratedocs-mcp" @@ -18,6 +19,9 @@ mcp-server = { git = "https://github.com/modelcontextprotocol/rust-sdk", rev = " mcp-core = { git = "https://github.com/modelcontextprotocol/rust-sdk", rev = "c0bd94dd85a3535cb1580424465140d51bab2a17", package = "mcp-core" } mcp-macros = { git = "https://github.com/modelcontextprotocol/rust-sdk", rev = "c0bd94dd85a3535cb1580424465140d51bab2a17", package = "mcp-macros" } +# Tokenizer dependency for token count functionality +tokenizers = { version = "0.21.2", features = ["http"] } + # HTTP and networking tokio = { version = "1", features = ["full"] } reqwest = { version = "0.11", features = ["json"] } @@ -42,6 +46,7 @@ futures = "0.3" rand = "0.8" clap = { version = "4.4", features = ["derive"] } html2md = "0.2.14" +regex = "1" [dev-dependencies] # Testing utilities diff --git a/README.md b/README.md index 2c90eea..2e7cc9d 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,9 @@ cargo run --bin cratedocs test --tool lookup_item --crate-name tokio --item-path # Look up documentation for a specific version cargo run --bin cratedocs test --tool lookup_item --crate-name serde --item-path Serialize --version 1.0.147 +# Look up a trait in a crate (e.g., the Serialize trait in serde) & a specific version +cargo run --bin cratedocs test --tool lookup_item --crate-name serde --item-path serde::Serialize --version 1.0.160 + # Search for crates cargo run --bin cratedocs test --tool search_crates --query logger --limit 5 @@ -64,6 +67,9 @@ cargo run --bin cratedocs test --tool lookup_crate --crate-name tokio --format t # Save output to a file cargo run --bin cratedocs test --tool lookup_crate --crate-name tokio --output tokio-docs.md + +# Summarize output by stripping LICENSE and VERSION sections, limits to xxxxx tokens (uses huggingface tokenizer) +cargo run --bin cratedocs test --tool lookup_crate --crate-name tokio --tldr --max_tokens 48000 ``` By default, the HTTP server will listen on `http://127.0.0.1:8080/sse`. @@ -144,3 +150,44 @@ This server implements the Model Context Protocol (MCP) which allows it to be ea ## License MIT License + +## MCP Tool: `list_crate_items` + +The `list_crate_items` tool enumerates all items in a specified Rust crate and version, optionally filtering by item type, visibility, or module path. This is useful for quickly exploring the structure of a crate, generating concise listings for LLMs, or programmatically analyzing crate APIs. + +### Usage + +```sh +cargo run --bin cratedocs -- list-crate-items --crate-name serde --version 1.0.0 +``` + +#### With filters: + +- Filter by item type (e.g., struct, enum, trait, fn, macro, mod): + + ```sh + cargo run --bin cratedocs -- list-crate-items --crate-name serde --version 1.0.0 --item-type struct + ``` + +- Filter by visibility (e.g., pub, private): + + ```sh + cargo run --bin cratedocs -- list-crate-items --crate-name serde --version 1.0.0 --visibility pub + ``` + +- Filter by module path: + + ```sh + cargo run --bin cratedocs -- list-crate-items --crate-name serde --version 1.0.0 --module serde::de + ``` + +### Output + +The output is a concise, categorized list (JSON or markdown) showing each item's name, type, visibility, and module path. + +**Example (stub output):** +``` +Stub: list_crate_items for crate: serde, version: 1.0.0, filters: Some(ItemListFilters { item_type: Some("struct"), visibility: None, module: None }) +``` + +When implemented, the output will be a structured list of items matching the filters. diff --git a/src/bin/cratedocs.rs b/src/bin/cratedocs.rs index 977c346..8b50a6e 100644 --- a/src/bin/cratedocs.rs +++ b/src/bin/cratedocs.rs @@ -9,9 +9,10 @@ use std::net::SocketAddr; use tokio::io::{stdin, stdout}; use tracing_appender::rolling::{RollingFileAppender, Rotation}; use tracing_subscriber::{self, EnvFilter, layer::SubscriberExt, util::SubscriberInitExt}; +use regex::Regex; #[derive(Parser)] -#[command(author, version = "0.1.0", about, long_about = None)] +#[command(author, version = "0.2.0", about, long_about = None)] #[command(propagate_version = true)] #[command(disable_version_flag = true)] struct Cli { @@ -70,11 +71,37 @@ enum Commands { /// Output file path (if not specified, results will be printed to stdout) #[arg(long)] output: Option, + + /// Summarize output by stripping LICENSE and VERSION sections (TL;DR mode) + #[arg(long)] + tldr: bool, + + /// Maximum number of tokens for output (token-aware truncation) + #[arg(long)] + max_tokens: Option, /// Enable debug logging #[arg(short, long)] debug: bool, }, + /// List all items in a crate (using rust-analyzer) + ListCrateItems { + /// Crate name (e.g., serde) + #[arg(long)] + crate_name: String, + /// Crate version (e.g., 1.0.0) + #[arg(long)] + version: String, + /// Filter by item type (struct, enum, trait, fn, macro, mod) + #[arg(long)] + item_type: Option, + /// Filter by visibility (pub, private) + #[arg(long)] + visibility: Option, + /// Filter by module path (e.g., serde::de) + #[arg(long)] + module: Option, + }, } #[tokio::main] @@ -84,16 +111,18 @@ async fn main() -> Result<()> { match cli.command { Commands::Stdio { debug } => run_stdio_server(debug).await, Commands::Http { address, debug } => run_http_server(address, debug).await, - Commands::Test { - tool, - crate_name, - item_path, - query, - version, + Commands::Test { + tool, + crate_name, + item_path, + query, + version, limit, format, output, - debug + tldr, + max_tokens, + debug } => run_test_tool(TestToolConfig { tool, crate_name, @@ -103,8 +132,27 @@ async fn main() -> Result<()> { limit, format, output, + tldr, + max_tokens, debug }).await, + Commands::ListCrateItems { + crate_name, + version, + item_type, + visibility, + module, + } => { + use cratedocs_mcp::tools::item_list::{list_crate_items, ItemListFilters}; + let filters = ItemListFilters { + item_type, + visibility, + module, + }; + let result = list_crate_items(&crate_name, &version, Some(filters)).await?; + println!("{}", result); + Ok(()) + } } } @@ -163,6 +211,34 @@ async fn run_http_server(address: String, debug: bool) -> Result<()> { Ok(()) } +// --- TLDR Helper Function --- +fn apply_tldr(input: &str) -> String { + // Remove LICENSE and VERSION(S) sections by skipping lines between those headings and the next heading or EOF. + let mut output = Vec::new(); + let mut skip = false; + + // Match any heading (with or without space) for LICENSE or VERSION(S) + let tldr_section_re = Regex::new(r"(?i)^\s*#+\s*license\b|^\s*#+\s*version(s)?\b|^\s*#+license\b|^\s*#+version(s)?\b").unwrap(); + // Match any heading (for ending the skip) + let heading_re = Regex::new(r"^\s*#+").unwrap(); + + for line in input.lines() { + // Start skipping if we hit a LICENSE or VERSION(S) heading + if !skip && tldr_section_re.is_match(line) { + skip = true; + continue; // skip the heading line itself + } + // Stop skipping at the next heading (but do not skip the heading itself) + if skip && heading_re.is_match(line) && !tldr_section_re.is_match(line) { + skip = false; + } + if !skip { + output.push(line); + } + } + output.join("\n") +} + /// Configuration for the test tool struct TestToolConfig { tool: String, @@ -173,6 +249,8 @@ struct TestToolConfig { limit: Option, format: Option, output: Option, + tldr: bool, + max_tokens: Option, debug: bool, } @@ -187,6 +265,8 @@ async fn run_test_tool(config: TestToolConfig) -> Result<()> { limit, format, output, + tldr, + max_tokens, debug, } = config; // Print help information if the tool is "help" @@ -210,6 +290,7 @@ async fn run_test_tool(config: TestToolConfig) -> Result<()> { println!("\nOutput options:"); println!(" --format - Output format: markdown (default), text, json"); println!(" --output - Write output to a file instead of stdout"); + println!(" --tldr - Summarize output by stripping LICENSE and VERSION sections"); return Ok(()); } @@ -289,7 +370,32 @@ async fn run_test_tool(config: TestToolConfig) -> Result<()> { if !result.is_empty() { for content in result { if let Content::Text(text) = content { - let content_str = text.text; + let mut content_str = text.text; + + // If max_tokens is set, truncate output to fit within the limit + if let Some(max_tokens) = max_tokens { + match cratedocs_mcp::tools::count_tokens(&content_str) { + Ok(token_count) if token_count > max_tokens => { + // Truncate by character, then to previous word boundary, and append Mandarin to indicate truncation. + let mut truncated = content_str.clone(); + while cratedocs_mcp::tools::count_tokens(&truncated).map_or(0, |c| c) > max_tokens && !truncated.is_empty() { + truncated.pop(); + } + if let Some(last_space) = truncated.rfind(' ') { + truncated.truncate(last_space); + } + truncated.push_str(" 内容被截断"); + content_str = truncated; + } + _ => {} + } + } + + // TL;DR processing: strip LICENSE and VERSION(S) sections if --tldr is set + if tldr { + content_str = apply_tldr(&content_str); + } + let formatted_output = match format.as_str() { "json" => { // For search_crates, which may return JSON content @@ -321,7 +427,7 @@ async fn run_test_tool(config: TestToolConfig) -> Result<()> { let description = crate_info.get("description").and_then(|v| v.as_str()).unwrap_or("No description"); let downloads = crate_info.get("downloads").and_then(|v| v.as_u64()).unwrap_or(0); - text_output.push_str(&format!("{}. {} - {} (Downloads: {})\n", + text_output.push_str(&format!("{}. {} - {} (Downloads: {})\n", i + 1, name, description, downloads)); } text_output @@ -384,4 +490,97 @@ async fn run_test_tool(config: TestToolConfig) -> Result<()> { } Ok(()) -} \ No newline at end of file +} +#[cfg(test)] +mod tldr_tests { + use super::apply_tldr; + + #[test] + fn test_apply_tldr_removes_license_and_versions() { + let input = r#" +# Versions +This is version info. + +# LICENSE +MIT License text. + +# Usage +Some real documentation here. + +# Another Section +More docs. +"#; + let output = apply_tldr(input); + assert!(!output.to_lowercase().contains("license")); + assert!(!output.to_lowercase().contains("version")); + assert!(output.contains("Usage")); + assert!(output.contains("Another Section")); + assert!(output.contains("Some real documentation here.")); + // Debug print for failure analysis + if output.to_lowercase().contains("license") { + println!("DEBUG OUTPUT:\n{}", output); + } + } + + #[test] + fn test_apply_tldr_handles_no_license_or_versions() { + let input = r#" +# Usage +Some real documentation here. +"#; + let output = apply_tldr(input); + assert_eq!(output.trim(), input.trim()); + } +#[test] +fn test_apply_tldr_no_headings() { + let input = r#" +This is plain text without any headings. +It should remain unchanged after processing. +"#; + let output = apply_tldr(input); + assert_eq!(output.trim(), input.trim()); +} + +#[test] +fn test_apply_tldr_malformed_markdown() { + let input = r#" +#LICENSE +This is a malformed license heading. +#VERSION +This is a malformed version heading. +"#; + let output = apply_tldr(input); + assert!(!output.to_lowercase().contains("license")); + assert!(!output.to_lowercase().contains("version")); +} + +#[test] +fn test_apply_tldr_large_input() { + let input = r#" +# Versions +Version 1.0.0 +Version 2.0.0 + +# LICENSE +MIT License text. + +# Usage +Some real documentation here. + +# Another Section +More docs. + +# LICENSE +Another license section. + +# Versions +Another version section. +"#; + let output = apply_tldr(input); + assert!(!output.to_lowercase().contains("license")); + assert!(!output.to_lowercase().contains("version")); + assert!(output.contains("Usage")); + assert!(output.contains("Another Section")); + assert!(output.contains("Some real documentation here.")); +} +} diff --git a/src/tools/mod.rs b/src/tools/mod.rs index b407b15..cd6b80a 100644 --- a/src/tools/mod.rs +++ b/src/tools/mod.rs @@ -1,4 +1,16 @@ pub mod docs; +pub mod item_list; pub use docs::DocRouter; -pub use docs::docs::DocCache; \ No newline at end of file +pub use docs::docs::DocCache; + + +// Function to count tokens in a given text using a pretrained model from Hugging Face Hub +use tokenizers::tokenizer::Tokenizer; + +pub fn count_tokens(text: &str) -> Result { + // 🦨 skunky: This loads the tokenizer from Hugging Face Hub every call; cache for production. + let tokenizer = Tokenizer::from_pretrained("bert-base-cased", None)?; + let encoding = tokenizer.encode(text, true)?; + Ok(encoding.get_ids().len()) +} \ No newline at end of file