diff --git a/AGENTS.md b/AGENTS.md index 5349b5f..7240e20 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -34,6 +34,7 @@ Key capabilities: Available specs: - `specs/initial.md` - WebFetch tool specification (types, behavior, conversions, error handling) - `specs/fetchers.md` - Pluggable fetcher system for URL-specific handling +- `specs/maintenance.md` - Periodic maintenance checklist (deps, docs, spec-code alignment) Specification format: Abstract and Requirements sections. diff --git a/Cargo.lock b/Cargo.lock index 65886c6..64de254 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -94,9 +94,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.37" +version = "0.4.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d10e4f991a553474232bc0a31799f6d24b034a84c0971d80d2e2f78b2e576e40" +checksum = "68650b7df54f0293fd061972a0fb05aaf4fc0879d3b3d21a638a182c5c543b9f" dependencies = [ "compression-codecs", "compression-core", @@ -121,12 +121,6 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" -[[package]] -name = "autocfg" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" - [[package]] name = "aws-lc-rs" version = "1.15.4" @@ -139,9 +133,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.37.0" +version = "0.37.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c34dda4df7017c8db52132f0f8a2e0f8161649d15723ed63fc00c82d0f2081a" +checksum = "b092fe214090261288111db7a2b2c2118e5a7f30dc2569f1732c4069a6840549" dependencies = [ "cc", "cmake", @@ -190,15 +184,15 @@ checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" [[package]] name = "bytes" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" [[package]] name = "cc" -version = "1.2.54" +version = "1.2.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6354c81bbfd62d9cfa9cb3c773c2b7b2a3a482d569de977fd0e961f6e7c00583" +checksum = "47b26a0954ae34af09b50f0de26458fa95369a0d478d8236d3f93082b219bd29" dependencies = [ "find-msvc-tools", "jobserver", @@ -226,9 +220,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "clap" -version = "4.5.54" +version = "4.5.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394" +checksum = "63be97961acde393029492ce0be7a1af7e323e6bae9511ebfac33751be5e6806" dependencies = [ "clap_builder", "clap_derive", @@ -236,9 +230,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.54" +version = "4.5.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00" +checksum = "7f13174bda5dfd69d7e947827e5af4b0f2f94a4a3ee92912fba07a66150f21e2" dependencies = [ "anstream", "anstyle", @@ -248,9 +242,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.49" +version = "4.5.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" +checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" dependencies = [ "heck", "proc-macro2", @@ -260,9 +254,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.7" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" +checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" [[package]] name = "cmake" @@ -409,7 +403,7 @@ dependencies = [ "schemars", "serde", "serde_json", - "thiserror 1.0.69", + "thiserror 2.0.18", "tokio", "tokio-test", "tracing", @@ -441,15 +435,15 @@ dependencies = [ [[package]] name = "find-msvc-tools" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "flate2" -version = "1.1.8" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ "crc32fast", "miniz_oxide", @@ -715,14 +709,13 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.19" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" dependencies = [ "base64", "bytes", "futures-channel", - "futures-core", "futures-util", "http", "http-body", @@ -851,15 +844,6 @@ dependencies = [ "hashbrown", ] -[[package]] -name = "indoc" -version = "2.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" -dependencies = [ - "rustversion", -] - [[package]] name = "ipnet" version = "2.11.0" @@ -938,9 +922,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.180" +version = "0.2.181" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" +checksum = "459427e2af2b9c839b132acb702a1c654d95e10f8c326bfc2ad11310e458b1c5" [[package]] name = "litemap" @@ -971,18 +955,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" - -[[package]] -name = "memoffset" -version = "0.9.1" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" -dependencies = [ - "autocfg", -] +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "mime" @@ -1068,9 +1043,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "portable-atomic" -version = "1.13.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "potential_utf" @@ -1101,37 +1076,32 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.22.6" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884" +checksum = "fcf3ccafdf54c050be48a3a086d372f77ba6615f5057211607cd30e5ac5cec6d" dependencies = [ - "cfg-if", - "indoc", "libc", - "memoffset", "once_cell", "portable-atomic", "pyo3-build-config", "pyo3-ffi", "pyo3-macros", - "unindent", ] [[package]] name = "pyo3-build-config" -version = "0.22.6" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38" +checksum = "972720a441c91fd9c49f212a1d2d74c6e3803b231ebc8d66c51efbd7ccab11c8" dependencies = [ - "once_cell", "target-lexicon", ] [[package]] name = "pyo3-ffi" -version = "0.22.6" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636" +checksum = "5994456d9dab8934d600d3867571b6410f24fbd6002570ad56356733eb54859b" dependencies = [ "libc", "pyo3-build-config", @@ -1139,9 +1109,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.22.6" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453" +checksum = "11ce9cc8d81b3c4969748807604d92b4eef363c5bb82b1a1bdb34ec6f1093a18" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -1151,9 +1121,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.22.6" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" +checksum = "eaf4b60036a154d23282679b658e3cc7d88d3b8c9a40b43824785f232d2e1b98" dependencies = [ "heck", "proc-macro2", @@ -1262,11 +1232,31 @@ dependencies = [ "getrandom 0.3.4", ] +[[package]] +name = "ref-cast" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "regex" -version = "1.12.2" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick", "memchr", @@ -1276,9 +1266,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", @@ -1287,15 +1277,15 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.8" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" [[package]] name = "reqwest" -version = "0.13.1" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04e9018c9d814e5f30cc16a0f03271aeab3571e609612d9fe78c1aa8d11c2f62" +checksum = "ab3f43e3283ab1488b624b44b0e988d0acea0b3214e694730a055cb6b2efa801" dependencies = [ "base64", "bytes", @@ -1455,11 +1445,12 @@ dependencies = [ [[package]] name = "schemars" -version = "0.8.22" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" +checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc" dependencies = [ "dyn-clone", + "ref-cast", "schemars_derive", "serde", "serde_json", @@ -1467,9 +1458,9 @@ dependencies = [ [[package]] name = "schemars_derive" -version = "0.8.22" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d" +checksum = "7d115b50f4aaeea07e79c1912f645c7513d81715d0420f8bc77a18c6260b307f" dependencies = [ "proc-macro2", "quote", @@ -1577,9 +1568,9 @@ checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" [[package]] name = "slab" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" [[package]] name = "smallvec" @@ -1617,9 +1608,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.114" +version = "2.0.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +checksum = "6e614ed320ac28113fa64972c4262d5dbc89deacdfd00c34a3e4cea073243c12" dependencies = [ "proc-macro2", "quote", @@ -1648,9 +1639,9 @@ dependencies = [ [[package]] name = "system-configuration" -version = "0.6.1" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" +checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b" dependencies = [ "bitflags", "core-foundation 0.9.4", @@ -1669,9 +1660,9 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.12.16" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" +checksum = "b1dd07eb858a2067e2f3c7155d54e929265c264e6f37efe3ee7a8d1b5a1dd0ba" [[package]] name = "thiserror" @@ -1937,15 +1928,9 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "unicode-ident" -version = "1.0.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" - -[[package]] -name = "unindent" -version = "0.2.4" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" +checksum = "537dd038a89878be9b64dd4bd1b260315c1bb94f4d784956b81e27a088d9a09e" [[package]] name = "untrusted" @@ -2078,9 +2063,9 @@ dependencies = [ [[package]] name = "wasm-streams" -version = "0.4.2" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" +checksum = "9d1ec4f6517c9e11ae630e200b2b65d193279042e28edd4a2cda233e46670bbb" dependencies = [ "futures-util", "js-sys", @@ -2111,9 +2096,9 @@ dependencies = [ [[package]] name = "webpki-root-certs" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36a29fc0408b113f68cf32637857ab740edfafdf460c326cd2afaa2d84cc05dc" +checksum = "804f18a4ac2676ffb4e8b5b5fa9ae38af06df08162314f96a68d2a363e21a8ca" dependencies = [ "rustls-pki-types", ] @@ -2444,18 +2429,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.34" +version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71ddd76bcebeed25db614f82bf31a9f4222d3fbba300e6fb6c00afa26cbd4d9d" +checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.34" +version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8187381b52e32220d50b255276aa16a084ec0a9017a0ca2152a1f55c539758d" +checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" dependencies = [ "proc-macro2", "quote", @@ -2524,6 +2509,6 @@ dependencies = [ [[package]] name = "zmij" -version = "1.0.17" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02aae0f83f69aafc94776e879363e9771d7ecbffe2c7fbb6c14c5e00dfe88439" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/Cargo.toml b/Cargo.toml index 53f14c6..cd1907d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,7 +24,7 @@ serde = { version = "1", features = ["derive"] } serde_json = "1" # Schema generation -schemars = "0.8" +schemars = "1" # URL parsing url = "2" @@ -33,7 +33,7 @@ url = "2" tracing = "0.1" # Error handling -thiserror = "1" +thiserror = "2" # CLI clap = { version = "4", features = ["derive"] } diff --git a/README.md b/README.md index ca0835d..27ebd05 100644 --- a/README.md +++ b/README.md @@ -97,14 +97,9 @@ use fetchkit::{fetch, FetchRequest}; #[tokio::main] async fn main() { - let request = FetchRequest { - url: "https://example.com".to_string(), - method: None, - as_markdown: Some(true), - as_text: None, - }; - - let response = fetch(request).await; + let request = FetchRequest::new("https://example.com").as_markdown(); + + let response = fetch(request).await.unwrap(); println!("{}", response.content.unwrap_or_default()); } ``` @@ -112,9 +107,9 @@ async fn main() { ### With Tool Builder ```rust -use fetchkit::Tool; +use fetchkit::{FetchRequest, ToolBuilder}; -let tool = Tool::builder() +let tool = ToolBuilder::new() .enable_markdown(true) .enable_text(false) .user_agent("MyBot/1.0") @@ -122,7 +117,8 @@ let tool = Tool::builder() .block_prefix("https://internal.example.com") .build(); -let response = tool.fetch(request).await; +let request = FetchRequest::new("https://example.com"); +let response = tool.execute(request).await.unwrap(); ``` ## Python Bindings @@ -132,7 +128,7 @@ pip install fetchkit ``` ```python -from fetchkit import fetch, FetchRequest, FetchKitTool +from fetchkit_py import fetch, FetchRequest, FetchKitTool # Simple fetch response = fetch("https://example.com", as_markdown=True) @@ -144,7 +140,7 @@ tool = FetchKitTool( user_agent="MyBot/1.0", allow_prefixes=["https://docs.example.com"] ) -response = tool.fetch(FetchRequest(url="https://example.com")) +response = tool.fetch("https://example.com") ``` ## Response Fields @@ -157,10 +153,10 @@ response = tool.fetch(FetchRequest(url="https://example.com")) | `size` | int? | Content size in bytes | | `last_modified` | string? | Last-Modified header | | `filename` | string? | From Content-Disposition | -| `format` | string | "markdown", "text", or "raw" | +| `format` | string? | "markdown", "text", "raw", or "github_repo" | | `content` | string? | Page content | -| `truncated` | bool | True if content was cut off | -| `method` | string | HTTP method used | +| `truncated` | bool? | True if content was cut off | +| `method` | string? | "HEAD" for HEAD requests | | `error` | string? | Error message if failed | ## Error Handling diff --git a/crates/fetchkit-cli/src/main.rs b/crates/fetchkit-cli/src/main.rs index a542da3..c94ca47 100644 --- a/crates/fetchkit-cli/src/main.rs +++ b/crates/fetchkit-cli/src/main.rs @@ -1,4 +1,15 @@ //! FetchKit CLI - Command-line interface for fetching web content +//! +//! Provides the `fetchkit` binary with subcommands for fetching URLs +//! and running an MCP server. +//! +//! # Usage +//! +//! ```text +//! fetchkit fetch [--output md|json] [--user-agent ] +//! fetchkit mcp +//! fetchkit --llmtxt +//! ``` mod mcp; diff --git a/crates/fetchkit-python/Cargo.toml b/crates/fetchkit-python/Cargo.toml index 5895b9a..9a422c7 100644 --- a/crates/fetchkit-python/Cargo.toml +++ b/crates/fetchkit-python/Cargo.toml @@ -14,6 +14,6 @@ crate-type = ["cdylib"] [dependencies] fetchkit = { path = "../fetchkit" } -pyo3 = { version = "0.22", features = ["extension-module"] } +pyo3 = { version = "0.28", features = ["extension-module"] } tokio = { workspace = true } serde_json = { workspace = true } diff --git a/crates/fetchkit-python/src/lib.rs b/crates/fetchkit-python/src/lib.rs index 0a4025d..e503e81 100644 --- a/crates/fetchkit-python/src/lib.rs +++ b/crates/fetchkit-python/src/lib.rs @@ -1,9 +1,16 @@ //! Python bindings for FetchKit //! -//! This module exposes the FetchKit tool contract to Python. - -// Allow false positive clippy warning from pyo3 macro expansion -#![allow(clippy::useless_conversion)] +//! Exposes the FetchKit tool contract to Python via PyO3. +//! +//! # Python Usage +//! +//! ```python +//! from fetchkit_py import FetchKitTool, FetchRequest +//! +//! tool = FetchKitTool() +//! response = tool.fetch("https://example.com", as_markdown=True) +//! print(response.content) +//! ``` use fetchkit::{FetchError, FetchRequest, FetchResponse, HttpMethod, Tool, ToolBuilder}; use pyo3::exceptions::PyValueError; @@ -15,7 +22,7 @@ fn to_py_err(e: FetchError) -> PyErr { } /// Python wrapper for FetchRequest -#[pyclass(name = "FetchRequest")] +#[pyclass(name = "FetchRequest", from_py_object)] #[derive(Clone)] pub struct PyFetchRequest { inner: FetchRequest, @@ -83,7 +90,7 @@ impl PyFetchRequest { } /// Python wrapper for FetchResponse -#[pyclass(name = "FetchResponse")] +#[pyclass(name = "FetchResponse", from_py_object)] #[derive(Clone)] pub struct PyFetchResponse { inner: FetchResponse, diff --git a/crates/fetchkit/src/client.rs b/crates/fetchkit/src/client.rs index 0840fc3..8e7f471 100644 --- a/crates/fetchkit/src/client.rs +++ b/crates/fetchkit/src/client.rs @@ -27,6 +27,18 @@ pub struct FetchOptions { /// Uses the default fetcher registry with all built-in fetchers. /// Markdown and text conversions are enabled by default. /// For custom options, use [`fetch_with_options`]. +/// +/// # Examples +/// +/// ```no_run +/// use fetchkit::{FetchRequest, fetch}; +/// +/// # async fn example() -> Result<(), fetchkit::FetchError> { +/// let response = fetch(FetchRequest::new("https://example.com")).await?; +/// println!("Status: {}", response.status_code); +/// # Ok(()) +/// # } +/// ``` pub async fn fetch(req: FetchRequest) -> Result { let options = FetchOptions { enable_markdown: true, diff --git a/crates/fetchkit/src/convert.rs b/crates/fetchkit/src/convert.rs index 6695162..bb2c44e 100644 --- a/crates/fetchkit/src/convert.rs +++ b/crates/fetchkit/src/convert.rs @@ -1,6 +1,9 @@ //! HTML conversion utilities /// Check if content is HTML based on content type and body +/// +/// Returns `true` if the content type contains `text/html` or `application/xhtml`, +/// or if the body starts with `, body: &str) -> bool { // Check Content-Type if let Some(ct) = content_type { @@ -16,6 +19,21 @@ pub fn is_html(content_type: &Option, body: &str) -> bool { } /// Convert HTML to markdown +/// +/// Converts common HTML elements (headings, lists, emphasis, code blocks, links, +/// blockquotes) to their Markdown equivalents. Strips script, style, noscript, +/// iframe, and svg elements. Decodes HTML entities. +/// +/// # Examples +/// +/// ``` +/// use fetchkit::html_to_markdown; +/// +/// let html = "

Title

Bold text

"; +/// let md = html_to_markdown(html); +/// assert!(md.contains("# Title")); +/// assert!(md.contains("**Bold**")); +/// ``` pub fn html_to_markdown(html: &str) -> String { let mut output = String::new(); let mut in_skip_element = 0; @@ -196,6 +214,20 @@ pub fn html_to_markdown(html: &str) -> String { } /// Convert HTML to plain text +/// +/// Strips all HTML tags and returns plain text content. Handles newlines +/// for block elements (p, div, headings). Decodes HTML entities. +/// +/// # Examples +/// +/// ``` +/// use fetchkit::html_to_text; +/// +/// let html = "

Title

Paragraph with & entity

"; +/// let text = html_to_text(html); +/// assert!(text.contains("Title")); +/// assert!(text.contains("Paragraph with & entity")); +/// ``` pub fn html_to_text(html: &str) -> String { let mut output = String::new(); let mut in_skip_element = 0; diff --git a/crates/fetchkit/src/error.rs b/crates/fetchkit/src/error.rs index da011b0..d327f4c 100644 --- a/crates/fetchkit/src/error.rs +++ b/crates/fetchkit/src/error.rs @@ -1,8 +1,17 @@ -//! Error types for WebFetch +//! Error types for FetchKit use thiserror::Error; /// Errors that can occur during fetch operations +/// +/// # Examples +/// +/// ``` +/// use fetchkit::FetchError; +/// +/// let err = FetchError::MissingUrl; +/// assert_eq!(err.to_string(), "Missing required parameter: url"); +/// ``` #[derive(Debug, Error)] pub enum FetchError { /// URL is missing diff --git a/crates/fetchkit/src/fetchers/mod.rs b/crates/fetchkit/src/fetchers/mod.rs index 6fc44d2..23feec7 100644 --- a/crates/fetchkit/src/fetchers/mod.rs +++ b/crates/fetchkit/src/fetchers/mod.rs @@ -46,6 +46,19 @@ pub trait Fetcher: Send + Sync { /// /// Maintains an ordered list of fetchers. When fetching a URL, iterates /// through fetchers and uses the first one that matches. +/// +/// # Examples +/// +/// ``` +/// use fetchkit::FetcherRegistry; +/// +/// // Create registry with built-in fetchers +/// let registry = FetcherRegistry::with_defaults(); +/// +/// // Or create empty and register custom fetchers +/// let mut registry = FetcherRegistry::new(); +/// registry.register(Box::new(fetchkit::DefaultFetcher::new())); +/// ``` pub struct FetcherRegistry { fetchers: Vec>, } diff --git a/crates/fetchkit/src/lib.rs b/crates/fetchkit/src/lib.rs index 930f5e4..a154cc9 100644 --- a/crates/fetchkit/src/lib.rs +++ b/crates/fetchkit/src/lib.rs @@ -1,9 +1,57 @@ //! FetchKit - AI-friendly web content fetching library //! //! This crate provides a reusable library API for fetching web content, -//! with optional HTML to markdown/text conversion. +//! with optional HTML to markdown/text conversion optimized for LLM consumption. //! -//! ## Fetcher System +//! # Quick Start +//! +//! ```no_run +//! use fetchkit::{FetchRequest, fetch}; +//! +//! # async fn example() -> Result<(), fetchkit::FetchError> { +//! let request = FetchRequest::new("https://example.com").as_markdown(); +//! let response = fetch(request).await?; +//! println!("Content: {}", response.content.unwrap_or_default()); +//! # Ok(()) +//! # } +//! ``` +//! +//! # Tool Builder +//! +//! For more control, use the [`ToolBuilder`] to configure options: +//! +//! ```no_run +//! use fetchkit::{FetchRequest, ToolBuilder}; +//! +//! # async fn example() -> Result<(), fetchkit::FetchError> { +//! let tool = ToolBuilder::new() +//! .enable_markdown(true) +//! .user_agent("MyBot/1.0") +//! .block_prefix("https://blocked.example.com") +//! .build(); +//! +//! let request = FetchRequest::new("https://example.com"); +//! let response = tool.execute(request).await?; +//! # Ok(()) +//! # } +//! ``` +//! +//! # HTML Conversion +//! +//! Convert HTML to markdown or plain text directly: +//! +//! ``` +//! use fetchkit::{html_to_markdown, html_to_text}; +//! +//! let html = "

Hello

World

"; +//! let md = html_to_markdown(html); +//! assert!(md.contains("# Hello")); +//! +//! let text = html_to_text(html); +//! assert!(text.contains("Hello")); +//! ``` +//! +//! # Fetcher System //! //! FetchKit uses a pluggable fetcher system where specialized fetchers //! handle specific URL patterns. The [`FetcherRegistry`] dispatches diff --git a/crates/fetchkit/src/tool.rs b/crates/fetchkit/src/tool.rs index f5477fd..3e47988 100644 --- a/crates/fetchkit/src/tool.rs +++ b/crates/fetchkit/src/tool.rs @@ -54,6 +54,22 @@ impl ToolStatus { } /// Builder for configuring the FetchKit tool +/// +/// # Examples +/// +/// ``` +/// use fetchkit::ToolBuilder; +/// +/// let tool = ToolBuilder::new() +/// .enable_markdown(true) +/// .enable_text(false) +/// .user_agent("MyBot/1.0") +/// .allow_prefix("https://docs.example.com") +/// .block_prefix("https://internal.example.com") +/// .build(); +/// +/// assert!(!tool.description().is_empty()); +/// ``` #[derive(Debug, Clone, Default)] pub struct ToolBuilder { /// Enable as_markdown option @@ -121,6 +137,22 @@ impl ToolBuilder { } /// Configured FetchKit tool +/// +/// Created via [`ToolBuilder`]. Provides methods for executing fetch requests, +/// retrieving schemas, and accessing tool metadata. +/// +/// # Examples +/// +/// ```no_run +/// use fetchkit::{FetchRequest, Tool}; +/// +/// # async fn example() -> Result<(), fetchkit::FetchError> { +/// let tool = Tool::default(); +/// let response = tool.execute(FetchRequest::new("https://example.com")).await?; +/// println!("Status: {}", response.status_code); +/// # Ok(()) +/// # } +/// ``` #[derive(Debug, Clone)] pub struct Tool { enable_markdown: bool, diff --git a/crates/fetchkit/src/types.rs b/crates/fetchkit/src/types.rs index cd3a5cb..8ab729a 100644 --- a/crates/fetchkit/src/types.rs +++ b/crates/fetchkit/src/types.rs @@ -37,6 +37,25 @@ impl std::fmt::Display for HttpMethod { } /// Request to fetch a URL +/// +/// # Examples +/// +/// ``` +/// use fetchkit::{FetchRequest, HttpMethod}; +/// +/// // Simple GET request +/// let req = FetchRequest::new("https://example.com"); +/// assert_eq!(req.effective_method(), HttpMethod::Get); +/// +/// // Request with markdown conversion +/// let req = FetchRequest::new("https://example.com").as_markdown(); +/// assert!(req.wants_markdown()); +/// +/// // HEAD request +/// let req = FetchRequest::new("https://example.com") +/// .method(HttpMethod::Head); +/// assert_eq!(req.effective_method(), HttpMethod::Head); +/// ``` #[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)] pub struct FetchRequest { /// The URL to fetch (required, must be http:// or https://) @@ -99,6 +118,27 @@ impl FetchRequest { } /// Response from a fetch operation +/// +/// Contains the fetched content along with metadata like status code, +/// content type, and size. Optional fields are omitted when not applicable. +/// +/// # Examples +/// +/// ``` +/// use fetchkit::FetchResponse; +/// +/// let response = FetchResponse { +/// url: "https://example.com".to_string(), +/// status_code: 200, +/// content_type: Some("text/html".to_string()), +/// format: Some("markdown".to_string()), +/// content: Some("# Example Domain".to_string()), +/// ..Default::default() +/// }; +/// +/// assert_eq!(response.status_code, 200); +/// assert!(response.content.unwrap().contains("Example")); +/// ``` #[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)] pub struct FetchResponse { /// The fetched URL diff --git a/specs/maintenance.md b/specs/maintenance.md new file mode 100644 index 0000000..1a1ea77 --- /dev/null +++ b/specs/maintenance.md @@ -0,0 +1,79 @@ +# Periodic Maintenance Specification + +## Abstract + +Define recurring maintenance tasks to keep the fetchkit repository healthy, up-to-date, and well-documented. This spec is intended to be executed periodically (e.g., monthly or before each release) by a human or coding agent. + +## Requirements + +### 1. Dependency Updates + +Update all workspace and crate-level dependencies to their latest compatible versions. + +1. **Check outdated deps** - Run `cargo outdated -R` (or equivalent) to list stale dependencies +2. **Update minor/patch** - Apply non-breaking updates via `cargo update` +3. **Evaluate major bumps** - Major version upgrades are allowed; review changelogs for breaking changes and adapt code accordingly +4. **Verify lockfile** - Ensure `Cargo.lock` reflects the updated versions +5. **Build & test** - `cargo build --workspace && cargo test --workspace` must pass after updates +6. **Audit advisories** - Run `cargo audit` (if available) to check for known vulnerabilities + +### 2. Documentation Quality (docs.rs / rustdoc) + +Ensure all public items have good documentation suitable for docs.rs rendering. + +1. **Crate-level docs** - Each crate's `lib.rs` must have a `//!` module doc with: + - One-line summary + - Feature overview + - Quick-start code example (compilable with `cargo test --doc`) +2. **Public items** - Every public struct, enum, trait, function, and method must have a `///` doc comment explaining purpose and usage +3. **Examples in docs** - Key types (`FetchRequest`, `FetchResponse`, `Tool`, `ToolBuilder`, `FetcherRegistry`) should include `# Examples` sections with runnable code blocks +4. **No doc warnings** - `RUSTDOCFLAGS="-D warnings" cargo doc --workspace --no-deps` must pass +5. **README sync** - Root README.md code snippets should be consistent with actual API + +### 3. Spec-Code Alignment + +Ensure specifications in `specs/` accurately describe the current code, and code conforms to specs. + +1. **Type definitions** - Verify struct/enum fields in code match spec definitions (field names, types, optionality) +2. **Error variants** - Verify `FetchError` variants in code match spec +3. **Behavior** - Verify timeouts, binary detection, HTML conversion rules match spec descriptions +4. **Fetcher system** - Verify fetcher trait, registry, and built-in fetchers match `specs/fetchers.md` +5. **CLI flags** - Verify CLI argument names and behavior match spec +6. **MCP protocol** - Verify MCP method names and schemas match spec +7. **Update stale specs** - If code intentionally diverges from spec, update the spec to match +8. **Update stale code** - If spec describes required behavior not in code, flag for implementation + +### 4. Example Verification + +Ensure all examples compile and run correctly. + +1. **Cargo examples** - `cargo run -p fetchkit --example fetch_urls` must complete without error (network-dependent; CI may use timeout) +2. **Doc examples** - `cargo test --doc --workspace` must pass (all doc code blocks must compile) +3. **docs/ and examples/ prose** - Verify shell commands and code snippets in markdown files are accurate +4. **Python examples** - If Python environment available, verify `examples/` Python scripts have correct API usage matching current bindings + +### 5. CI & Tooling Health + +Verify CI pipeline and development tooling are current. + +1. **CI actions** - Check GitHub Actions versions are not deprecated; update to latest stable +2. **Rust toolchain** - Confirm builds on latest Rust stable +3. **Clippy clean** - `cargo clippy --workspace --all-targets -- -D warnings` passes +4. **Format clean** - `cargo fmt --all -- --check` passes +5. **Lockfile committed** - `Cargo.lock` is committed and up-to-date + +### 6. Security & License + +1. **Dependency licenses** - All dependencies must have permissive licenses (MIT, Apache-2.0, BSD). Flag any non-permissive additions +2. **Advisory scan** - No known vulnerabilities in dependency tree (via `cargo audit` or equivalent) +3. **No secrets** - Ensure no API keys, tokens, or credentials are committed + +### 7. Changelog & Versioning + +1. **Unreleased section** - `CHANGELOG.md` has an `[Unreleased]` section for pending changes +2. **Version consistency** - Workspace version in root `Cargo.toml` matches latest changelog entry +3. **Inter-crate versions** - Internal dependency versions (e.g., `fetchkit-cli` depending on `fetchkit`) are consistent + +## Execution + +Run this checklist by working through sections 1-7 in order. Fix issues as encountered. Commit fixes in logical groups following conventional commits. After completion, all CI checks should pass.