diff --git a/Cargo.lock b/Cargo.lock index 19cea592..474425f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -32,6 +32,15 @@ version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +[[package]] +name = "ascii-canvas" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8824ecca2e851cec16968d54a01dd372ef8f95b244fb84b84e70128be347c3c6" +dependencies = [ + "term", +] + [[package]] name = "askama" version = "0.14.0" @@ -39,7 +48,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f75363874b771be265f4ffe307ca705ef6f3baa19011c149da8674a87f1b75c4" dependencies = [ "askama_derive", - "itoa", + "itoa 1.0.15", "percent-encoding", "serde", "serde_json", @@ -60,7 +69,7 @@ dependencies = [ "rustc-hash", "serde", "serde_derive", - "syn", + "syn 2.0.104", ] [[package]] @@ -75,6 +84,209 @@ dependencies = [ "winnow", ] +[[package]] +name = "assert-json-diff" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "async-attributes" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3203e79f4dd9bdda415ed03cf14dae5a2bf775c683a00f94e9cd1faf0f596e5" +dependencies = [ + "quote", + "syn 1.0.109", +] + +[[package]] +name = "async-channel" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35" +dependencies = [ + "concurrent-queue", + "event-listener 2.5.3", + "futures-core", +] + +[[package]] +name = "async-channel" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "924ed96dd52d1b75e9c1a3e6275715fd320f5f9439fb5a4a11fa51f4221158d2" +dependencies = [ + "concurrent-queue", + "event-listener-strategy", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-compression" +version = "0.4.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68650b7df54f0293fd061972a0fb05aaf4fc0879d3b3d21a638a182c5c543b9f" +dependencies = [ + "compression-codecs", + "compression-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "async-executor" +version = "1.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "497c00e0fd83a72a79a39fcbd8e3e2f055d6f6c7e025f3b3d91f4f8e76527fb8" +dependencies = [ + "async-task", + "concurrent-queue", + "fastrand", + "futures-lite", + "pin-project-lite", + "slab", +] + +[[package]] +name = "async-global-executor" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05b1b633a2115cd122d73b955eadd9916c18c8f510ec9cd1686404c60ad1c29c" +dependencies = [ + "async-channel 2.5.0", + "async-executor", + "async-io", + "async-lock", + "blocking", + "futures-lite", + "once_cell", +] + +[[package]] +name = "async-io" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "456b8a8feb6f42d237746d4b3e9a178494627745c3c56c6ea55d92ba50d026fc" +dependencies = [ + "autocfg", + "cfg-if", + "concurrent-queue", + "futures-io", + "futures-lite", + "parking", + "polling", + "rustix", + "slab", + "windows-sys 0.61.2", +] + +[[package]] +name = "async-lock" +version = "3.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290f7f2596bd5b78a9fec8088ccd89180d7f9f55b94b0576823bbbdc72ee8311" +dependencies = [ + "event-listener 5.4.1", + "event-listener-strategy", + "pin-project-lite", +] + +[[package]] +name = "async-object-pool" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "333c456b97c3f2d50604e8b2624253b7f787208cb72eb75e64b0ad11b221652c" +dependencies = [ + "async-std", +] + +[[package]] +name = "async-process" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc50921ec0055cdd8a16de48773bfeec5c972598674347252c0399676be7da75" +dependencies = [ + "async-channel 2.5.0", + "async-io", + "async-lock", + "async-signal", + "async-task", + "blocking", + "cfg-if", + "event-listener 5.4.1", + "futures-lite", + "rustix", +] + +[[package]] +name = "async-signal" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43c070bbf59cd3570b6b2dd54cd772527c7c3620fce8be898406dd3ed6adc64c" +dependencies = [ + "async-io", + "async-lock", + "atomic-waker", + "cfg-if", + "futures-core", + "futures-io", + "rustix", + "signal-hook-registry", + "slab", + "windows-sys 0.61.2", +] + +[[package]] +name = "async-std" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c8e079a4ab67ae52b7403632e4618815d6db36d2a010cfe41b02c1b1578f93b" +dependencies = [ + "async-attributes", + "async-channel 1.9.0", + "async-global-executor", + "async-io", + "async-lock", + "async-process", + "crossbeam-utils", + "futures-channel", + "futures-core", + "futures-io", + "futures-lite", + "gloo-timers", + "kv-log-macro", + "log", + "memchr", + "once_cell", + "pin-project-lite", + "pin-utils", + "slab", + "wasm-bindgen-futures", +] + +[[package]] +name = "async-task" +version = "4.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" + +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.104", +] + [[package]] name = "atomic" version = "0.6.1" @@ -107,12 +319,12 @@ dependencies = [ "bytes", "form_urlencoded", "futures-util", - "http", - "http-body", + "http 1.3.1", + "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-util", - "itoa", + "itoa 1.0.15", "matchit", "memchr", "mime", @@ -139,8 +351,8 @@ checksum = "68464cd0412f486726fb3373129ef5d2993f90c34bc2bc1c1e9943b2f4fc7ca6" dependencies = [ "bytes", "futures-core", - "http", - "http-body", + "http 1.3.1", + "http-body 1.0.1", "http-body-util", "mime", "pin-project-lite", @@ -162,8 +374,8 @@ dependencies = [ "bytes", "form_urlencoded", "futures-util", - "http", - "http-body", + "http 1.3.1", + "http-body 1.0.1", "http-body-util", "mime", "pin-project-lite", @@ -184,7 +396,7 @@ checksum = "604fde5e028fea851ce1d8570bbdc034bec850d157f7569d10f347d06808c05c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -202,12 +414,29 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + [[package]] name = "base64" version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "basic-cookies" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67bd8fd42c16bdb08688243dc5f0cc117a3ca9efeeaba3a345a18a6159ad96f7" +dependencies = [ + "lalrpop", + "lalrpop-util", + "regex", +] + [[package]] name = "basic-toml" version = "0.1.10" @@ -226,6 +455,21 @@ dependencies = [ "serde", ] +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bitflags" version = "1.3.2" @@ -253,6 +497,19 @@ dependencies = [ "generic-array", ] +[[package]] +name = "blocking" +version = "1.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e83f8d02be6967315521be875afa792a316e28d57b5a2d401897e2a7921b7f21" +dependencies = [ + "async-channel 2.5.0", + "async-task", + "futures-io", + "futures-lite", + "piper", +] + [[package]] name = "bumpalo" version = "3.18.1" @@ -329,6 +586,12 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "cocoa" version = "0.25.0" @@ -359,6 +622,32 @@ dependencies = [ "objc", ] +[[package]] +name = "compression-codecs" +version = "0.4.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00828ba6fd27b45a448e57dbfe84f1029d4c9f26b368157e9a448a5f49a2ec2a" +dependencies = [ + "compression-core", + "flate2", + "memchr", +] + +[[package]] +name = "compression-core" +version = "0.4.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" + +[[package]] +name = "concurrent-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "conv" version = "0.3.3" @@ -368,6 +657,12 @@ dependencies = [ "custom_derive", ] +[[package]] +name = "convert_case" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" + [[package]] name = "cookie" version = "0.18.1" @@ -446,6 +741,15 @@ dependencies = [ "libc", ] +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + [[package]] name = "crossbeam-channel" version = "0.5.15" @@ -480,6 +784,12 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "crypto-common" version = "0.1.6" @@ -490,6 +800,23 @@ dependencies = [ "typenum", ] +[[package]] +name = "cssparser" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa 0.4.8", + "matches", + "phf 0.8.0", + "proc-macro2", + "quote", + "smallvec", + "syn 1.0.109", +] + [[package]] name = "cssparser" version = "0.34.0" @@ -498,8 +825,8 @@ checksum = "b7c66d1cd8ed61bf80b38432613a7a2f09401ab8d0501110655f8b341484a3e3" dependencies = [ "cssparser-macros", "dtoa-short", - "itoa", - "phf", + "itoa 1.0.15", + "phf 0.11.3", "smallvec", ] @@ -510,7 +837,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" dependencies = [ "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -540,7 +867,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn", + "syn 2.0.104", ] [[package]] @@ -551,7 +878,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -582,7 +909,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -592,7 +919,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn", + "syn 2.0.104", ] [[package]] @@ -601,9 +928,11 @@ version = "0.99.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6edb4b64a43d977b8e99788fe3a04d483834fba1215a7e02caa415b626497f7f" dependencies = [ + "convert_case", "proc-macro2", "quote", - "syn", + "rustc_version", + "syn 2.0.104", ] [[package]] @@ -625,6 +954,16 @@ dependencies = [ "dirs-sys", ] +[[package]] +name = "dirs-next" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" +dependencies = [ + "cfg-if", + "dirs-sys-next", +] + [[package]] name = "dirs-sys" version = "0.5.0" @@ -633,10 +972,21 @@ checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" dependencies = [ "libc", "option-ext", - "redox_users", + "redox_users 0.5.0", "windows-sys 0.60.2", ] +[[package]] +name = "dirs-sys-next" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +dependencies = [ + "libc", + "redox_users 0.4.6", + "winapi", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -645,7 +995,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -672,6 +1022,12 @@ dependencies = [ "dtoa", ] +[[package]] +name = "ego-tree" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12a0bb14ac04a9fcf170d0bbbef949b44cc492f4452bd20c095636956f653642" + [[package]] name = "ego-tree" version = "0.10.0" @@ -698,6 +1054,15 @@ dependencies = [ "winreg", ] +[[package]] +name = "ena" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d248bdd43ce613d87415282f69b9bb99d947d290b10962dd6c56233312c2ad5" +dependencies = [ + "log", +] + [[package]] name = "encoding_rs" version = "0.8.35" @@ -732,6 +1097,33 @@ dependencies = [ "version_check", ] +[[package]] +name = "event-listener" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" + +[[package]] +name = "event-listener" +version = "5.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "event-listener-strategy" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93" +dependencies = [ + "event-listener 5.4.1", + "pin-project-lite", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -761,6 +1153,22 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "fnv" version = "1.0.7" @@ -794,7 +1202,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -876,6 +1284,19 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +[[package]] +name = "futures-lite" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f78e10609fe0e0b3f4157ffab1876319b5b0db102a2c60dc4626306dc46b44ad" +dependencies = [ + "fastrand", + "futures-core", + "futures-io", + "parking", + "pin-project-lite", +] + [[package]] name = "futures-macro" version = "0.3.31" @@ -884,7 +1305,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -945,6 +1366,17 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", +] + [[package]] name = "getrandom" version = "0.2.16" @@ -952,8 +1384,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi 0.11.1+wasi-snapshot-preview1", + "wasm-bindgen", ] [[package]] @@ -963,9 +1397,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" dependencies = [ "cfg-if", + "js-sys", "libc", "r-efi", "wasi 0.14.2+wasi-0.2.4", + "wasm-bindgen", ] [[package]] @@ -980,6 +1416,18 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" +[[package]] +name = "gloo-timers" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994" +dependencies = [ + "futures-channel", + "futures-core", + "js-sys", + "wasm-bindgen", +] + [[package]] name = "h2" version = "0.4.10" @@ -991,7 +1439,7 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http", + "http 1.3.1", "indexmap", "slab", "tokio", @@ -1011,6 +1459,20 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" +[[package]] +name = "html5ever" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7" +dependencies = [ + "log", + "mac", + "markup5ever 0.11.0", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "html5ever" version = "0.29.1" @@ -1019,7 +1481,7 @@ checksum = "3b7410cae13cbc75623c98ac4cbfd1f0bedddf3227afc24f370cf0f50a44a11c" dependencies = [ "log", "mac", - "markup5ever", + "markup5ever 0.14.1", "match_token", ] @@ -1034,6 +1496,17 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa 1.0.15", +] + [[package]] name = "http" version = "1.3.1" @@ -1042,7 +1515,18 @@ checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" dependencies = [ "bytes", "fnv", - "itoa", + "itoa 1.0.15", +] + +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", ] [[package]] @@ -1052,7 +1536,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http", + "http 1.3.1", ] [[package]] @@ -1063,8 +1547,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http", - "http-body", + "http 1.3.1", + "http-body 1.0.1", "pin-project-lite", ] @@ -1086,6 +1570,57 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +[[package]] +name = "httpmock" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08ec9586ee0910472dec1a1f0f8acf52f0fdde93aea74d70d4a3107b4be0fd5b" +dependencies = [ + "assert-json-diff", + "async-object-pool", + "async-std", + "async-trait", + "base64 0.21.7", + "basic-cookies", + "crossbeam-utils", + "form_urlencoded", + "futures-util", + "hyper 0.14.32", + "lazy_static", + "levenshtein", + "log", + "regex", + "serde", + "serde_json", + "serde_regex", + "similar", + "tokio", + "url", +] + +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa 1.0.15", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + [[package]] name = "hyper" version = "1.6.0" @@ -1096,11 +1631,11 @@ dependencies = [ "futures-channel", "futures-util", "h2", - "http", - "http-body", + "http 1.3.1", + "http-body 1.0.1", "httparse", "httpdate", - "itoa", + "itoa 1.0.15", "pin-project-lite", "smallvec", "tokio", @@ -1113,14 +1648,15 @@ version = "0.27.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ - "http", - "hyper", + "http 1.3.1", + "hyper 1.6.0", "hyper-util", "rustls", "rustls-pki-types", "tokio", "tokio-rustls", "tower-service", + "webpki-roots", ] [[package]] @@ -1131,7 +1667,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-util", "native-tls", "tokio", @@ -1145,14 +1681,14 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc2fdfdbff08affe55bb779f33b053aa1fe5dd5b54c257343c17edfa55711bdb" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "futures-channel", "futures-core", "futures-util", - "http", - "http-body", - "hyper", + "http 1.3.1", + "http-body 1.0.1", + "hyper 1.6.0", "ipnet", "libc", "percent-encoding", @@ -1349,6 +1885,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.14.0" @@ -1358,6 +1903,12 @@ dependencies = [ "either", ] +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + [[package]] name = "itoa" version = "1.0.15" @@ -1374,6 +1925,46 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "kv-log-macro" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de8b303297635ad57c9f5059fd9cee7a47f8e8daa09df0fcd07dd39fb22977f" +dependencies = [ + "log", +] + +[[package]] +name = "lalrpop" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cb077ad656299f160924eb2912aa147d7339ea7d69e1b5517326fdcec3c1ca" +dependencies = [ + "ascii-canvas", + "bit-set", + "ena", + "itertools 0.11.0", + "lalrpop-util", + "petgraph", + "pico-args", + "regex", + "regex-syntax 0.8.5", + "string_cache", + "term", + "tiny-keccak", + "unicode-xid", + "walkdir", +] + +[[package]] +name = "lalrpop-util" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507460a910eb7b32ee961886ff48539633b788a36b65692b95f225b844c82553" +dependencies = [ + "regex-automata 0.4.13", +] + [[package]] name = "lava_torrent" version = "0.11.1" @@ -1394,6 +1985,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +[[package]] +name = "levenshtein" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db13adb97ab515a3691f56e4dbab09283d0b86cb45abd991d8634a9d6f501760" + [[package]] name = "libc" version = "0.2.180" @@ -1443,6 +2040,15 @@ name = "log" version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +dependencies = [ + "value-bag", +] + +[[package]] +name = "lru-slab" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" [[package]] name = "mac" @@ -1459,6 +2065,20 @@ dependencies = [ "libc", ] +[[package]] +name = "markup5ever" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016" +dependencies = [ + "log", + "phf 0.10.1", + "phf_codegen 0.10.0", + "string_cache", + "string_cache_codegen", + "tendril", +] + [[package]] name = "markup5ever" version = "0.14.1" @@ -1466,8 +2086,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7a7213d12e1864c0f002f52c2923d4556935a43dec5e71355c2760e0f6e7a18" dependencies = [ "log", - "phf", - "phf_codegen", + "phf 0.11.3", + "phf_codegen 0.11.3", "string_cache", "string_cache_codegen", "tendril", @@ -1481,7 +2101,7 @@ checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -1493,6 +2113,12 @@ dependencies = [ "regex-automata 0.1.10", ] +[[package]] +name = "matches" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" + [[package]] name = "matchit" version = "0.8.4" @@ -1534,6 +2160,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", + "simd-adler32", ] [[package]] @@ -1553,6 +2180,7 @@ version = "0.4.6" dependencies = [ "anyhow", "askama", + "async-trait", "axum", "axum-extra", "bytes", @@ -1569,6 +2197,7 @@ dependencies = [ "matchr", "mlm_db", "mlm_mam", + "mlm_meta", "mlm_parse", "native_db", "native_model", @@ -1581,7 +2210,7 @@ dependencies = [ "reqwest", "reqwest_cookie_store", "sanitize-filename", - "scraper", + "scraper 0.23.1", "serde", "serde-nested-json", "serde_derive", @@ -1602,6 +2231,7 @@ dependencies = [ "tracing-subscriber", "tray-item", "unidecode", + "url", "urlencoding", "uuid", "winsafe", @@ -1641,7 +2271,6 @@ dependencies = [ "native_db", "native_model", "once_cell", - "openssl", "reqwest", "reqwest_cookie_store", "serde", @@ -1654,6 +2283,26 @@ dependencies = [ "tracing", ] +[[package]] +name = "mlm_meta" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "httpmock", + "mlm_db", + "mlm_parse", + "reqwest", + "scraper 0.14.0", + "serde", + "serde_json", + "strsim", + "tokio", + "tracing", + "url", + "urlencoding", +] + [[package]] name = "mlm_parse" version = "0.1.0" @@ -1704,7 +2353,7 @@ source = "git+https://github.com/StirlingMouse/native_db.git?branch=0.8.x#cddaaf dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -1730,7 +2379,7 @@ checksum = "2f385f3d57adaea8d8868e65a0bc821bcb8ba2228bbf87a1c3c6144ac48f3791" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -1739,6 +2388,12 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" +[[package]] +name = "nodrop" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" + [[package]] name = "nom" version = "8.0.0" @@ -1861,7 +2516,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -1910,6 +2565,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c10569378a1dacd9f30dbe7ae49e054d2c45dc2f8ee49899903e09c3924e8b6f" +[[package]] +name = "parking" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" + [[package]] name = "parking_lot" version = "0.12.4" @@ -1959,7 +2620,7 @@ dependencies = [ "proc-macro2", "proc-macro2-diagnostics", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -1968,14 +2629,64 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset", + "indexmap", +] + +[[package]] +name = "phf" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" +dependencies = [ + "phf_macros 0.8.0", + "phf_shared 0.8.0", + "proc-macro-hack", +] + +[[package]] +name = "phf" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" +dependencies = [ + "phf_shared 0.10.0", +] + [[package]] name = "phf" version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" dependencies = [ - "phf_macros", - "phf_shared", + "phf_macros 0.11.3", + "phf_shared 0.11.3", +] + +[[package]] +name = "phf_codegen" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" +dependencies = [ + "phf_generator 0.8.0", + "phf_shared 0.8.0", +] + +[[package]] +name = "phf_codegen" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" +dependencies = [ + "phf_generator 0.10.0", + "phf_shared 0.10.0", ] [[package]] @@ -1984,8 +2695,28 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" dependencies = [ - "phf_generator", - "phf_shared", + "phf_generator 0.11.3", + "phf_shared 0.11.3", +] + +[[package]] +name = "phf_generator" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" +dependencies = [ + "phf_shared 0.8.0", + "rand 0.7.3", +] + +[[package]] +name = "phf_generator" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" +dependencies = [ + "phf_shared 0.10.0", + "rand 0.8.5", ] [[package]] @@ -1994,8 +2725,22 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" dependencies = [ - "phf_shared", - "rand", + "phf_shared 0.11.3", + "rand 0.8.5", +] + +[[package]] +name = "phf_macros" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c" +dependencies = [ + "phf_generator 0.8.0", + "phf_shared 0.8.0", + "proc-macro-hack", + "proc-macro2", + "quote", + "syn 1.0.109", ] [[package]] @@ -2004,11 +2749,29 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216" dependencies = [ - "phf_generator", - "phf_shared", + "phf_generator 0.11.3", + "phf_shared 0.11.3", "proc-macro2", "quote", - "syn", + "syn 2.0.104", +] + +[[package]] +name = "phf_shared" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" +dependencies = [ + "siphasher 0.3.11", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher 0.3.11", ] [[package]] @@ -2017,9 +2780,15 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" dependencies = [ - "siphasher", + "siphasher 1.0.1", ] +[[package]] +name = "pico-args" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315" + [[package]] name = "pin-project-lite" version = "0.2.16" @@ -2032,12 +2801,37 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "piper" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96c8c490f422ef9a4efd2cb5b42b76c8613d7e7dfc1caf667b8a3350a5acc066" +dependencies = [ + "atomic-waker", + "fastrand", + "futures-io", +] + [[package]] name = "pkg-config" version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "polling" +version = "3.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d0e4f59085d47d8241c88ead0f274e8a0cb551f3625263c05eb8dd897c34218" +dependencies = [ + "cfg-if", + "concurrent-queue", + "hermit-abi", + "pin-project-lite", + "rustix", + "windows-sys 0.61.2", +] + [[package]] name = "potential_utf" version = "0.1.2" @@ -2053,12 +2847,27 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + [[package]] name = "precomputed-hash" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" +[[package]] +name = "proc-macro-hack" +version = "0.5.20+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" + [[package]] name = "proc-macro2" version = "1.0.95" @@ -2076,7 +2885,7 @@ checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", "version_check", "yansi", ] @@ -2144,6 +2953,61 @@ dependencies = [ "serde", ] +[[package]] +name = "quinn" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" +dependencies = [ + "bytes", + "cfg_aliases", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash", + "rustls", + "socket2 0.5.10", + "thiserror 2.0.17", + "tokio", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-proto" +version = "0.11.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" +dependencies = [ + "bytes", + "getrandom 0.3.3", + "lru-slab", + "rand 0.9.2", + "ring", + "rustc-hash", + "rustls", + "rustls-pki-types", + "slab", + "thiserror 2.0.17", + "tinyvec", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-udp" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd" +dependencies = [ + "cfg_aliases", + "libc", + "once_cell", + "socket2 0.5.10", + "tracing", + "windows-sys 0.52.0", +] + [[package]] name = "quote" version = "1.0.40" @@ -2159,13 +3023,78 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc", + "rand_pcg", +] + [[package]] name = "rand" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ - "rand_core", + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom 0.1.16", ] [[package]] @@ -2173,6 +3102,36 @@ name = "rand_core" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.16", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.3", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core 0.5.1", +] + +[[package]] +name = "rand_pcg" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" +dependencies = [ + "rand_core 0.5.1", +] [[package]] name = "rayon" @@ -2221,6 +3180,17 @@ dependencies = [ "bitflags 2.9.1", ] +[[package]] +name = "redox_users" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" +dependencies = [ + "getrandom 0.2.16", + "libredox", + "thiserror 1.0.69", +] + [[package]] name = "redox_users" version = "0.5.0" @@ -2282,7 +3252,8 @@ version = "0.12.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" dependencies = [ - "base64", + "async-compression", + "base64 0.22.1", "bytes", "cookie", "cookie_store", @@ -2290,10 +3261,10 @@ dependencies = [ "futures-core", "futures-util", "h2", - "http", - "http-body", + "http 1.3.1", + "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-rustls", "hyper-tls", "hyper-util", @@ -2304,6 +3275,8 @@ dependencies = [ "native-tls", "percent-encoding", "pin-project-lite", + "quinn", + "rustls", "rustls-pki-types", "serde", "serde_json", @@ -2311,6 +3284,8 @@ dependencies = [ "sync_wrapper", "tokio", "tokio-native-tls", + "tokio-rustls", + "tokio-util", "tower", "tower-http", "tower-service", @@ -2318,6 +3293,7 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "web-sys", + "webpki-roots", ] [[package]] @@ -2387,6 +3363,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7160e3e10bf4535308537f3c4e1641468cd0e485175d6163087c0393c7d46643" dependencies = [ "once_cell", + "ring", "rustls-pki-types", "rustls-webpki", "subtle", @@ -2399,6 +3376,7 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79" dependencies = [ + "web-time", "zeroize", ] @@ -2454,18 +3432,34 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scraper" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7cb4dae083699a22a65aa9d2699c27f525e35dffaec38b10801e958ed4cf27" +dependencies = [ + "cssparser 0.27.2", + "ego-tree 0.6.3", + "getopts", + "html5ever 0.26.0", + "matches", + "selectors 0.22.0", + "smallvec", + "tendril", +] + [[package]] name = "scraper" version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "527e65d9d888567588db4c12da1087598d0f6f8b346cc2c5abc91f05fc2dffe2" dependencies = [ - "cssparser", - "ego-tree", + "cssparser 0.34.0", + "ego-tree 0.10.0", "getopts", - "html5ever", + "html5ever 0.29.1", "precomputed-hash", - "selectors", + "selectors 0.26.0", "tendril", ] @@ -2492,6 +3486,26 @@ dependencies = [ "libc", ] +[[package]] +name = "selectors" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df320f1889ac4ba6bc0cdc9c9af7af4bd64bb927bccdf32d81140dc1f9be12fe" +dependencies = [ + "bitflags 1.3.2", + "cssparser 0.27.2", + "derive_more", + "fxhash", + "log", + "matches", + "phf 0.8.0", + "phf_codegen 0.8.0", + "precomputed-hash", + "servo_arc 0.1.1", + "smallvec", + "thin-slice", +] + [[package]] name = "selectors" version = "0.26.0" @@ -2499,15 +3513,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8" dependencies = [ "bitflags 2.9.1", - "cssparser", + "cssparser 0.34.0", "derive_more", "fxhash", "log", "new_debug_unreachable", - "phf", - "phf_codegen", + "phf 0.11.3", + "phf_codegen 0.11.3", "precomputed-hash", - "servo_arc", + "servo_arc 0.4.1", "smallvec", ] @@ -2557,7 +3571,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -2568,7 +3582,7 @@ checksum = "9d2de91cf02bbc07cde38891769ccd5d4f073d22a40683aa4bc7a95781aaa2c4" dependencies = [ "form_urlencoded", "indexmap", - "itoa", + "itoa 1.0.15", "ryu", "serde", ] @@ -2579,7 +3593,7 @@ version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" dependencies = [ - "itoa", + "itoa 1.0.15", "memchr", "ryu", "serde", @@ -2592,7 +3606,17 @@ version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59fab13f937fa393d08645bf3a84bdfe86e296747b506ada67bb15f10f218b2a" dependencies = [ - "itoa", + "itoa 1.0.15", + "serde", +] + +[[package]] +name = "serde_regex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8136f1a4ea815d7eac4101cfd0b16dc0cb5e1fe1b8609dfd728058656b7badf" +dependencies = [ + "regex", "serde", ] @@ -2604,7 +3628,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -2632,11 +3656,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" dependencies = [ "form_urlencoded", - "itoa", + "itoa 1.0.15", "ryu", "serde", ] +[[package]] +name = "servo_arc" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98238b800e0d1576d8b6e3de32827c2d74bee68bb97748dcf5071fb53965432" +dependencies = [ + "nodrop", + "stable_deref_trait", +] + [[package]] name = "servo_arc" version = "0.4.1" @@ -2681,6 +3715,24 @@ dependencies = [ "libc", ] +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + +[[package]] +name = "similar" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" + +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + [[package]] name = "siphasher" version = "1.0.1" @@ -2748,7 +3800,7 @@ checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f" dependencies = [ "new_debug_unreachable", "parking_lot", - "phf_shared", + "phf_shared 0.11.3", "precomputed-hash", "serde", ] @@ -2759,8 +3811,8 @@ version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0" dependencies = [ - "phf_generator", - "phf_shared", + "phf_generator 0.11.3", + "phf_shared 0.11.3", "proc-macro2", "quote", ] @@ -2783,6 +3835,17 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.104" @@ -2811,7 +3874,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -2859,6 +3922,23 @@ dependencies = [ "utf-8", ] +[[package]] +name = "term" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" +dependencies = [ + "dirs-next", + "rustversion", + "winapi", +] + +[[package]] +name = "thin-slice" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c" + [[package]] name = "thiserror" version = "1.0.69" @@ -2885,7 +3965,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -2896,7 +3976,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -2915,7 +3995,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" dependencies = [ "deranged", - "itoa", + "itoa 1.0.15", "libc", "num-conv", "num_threads", @@ -2941,6 +4021,15 @@ dependencies = [ "time-core", ] +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "tinystr" version = "0.8.1" @@ -2951,6 +4040,21 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tinyvec" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + [[package]] name = "tokio" version = "1.47.1" @@ -2979,7 +4083,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -3133,8 +4237,8 @@ dependencies = [ "bytes", "futures-core", "futures-util", - "http", - "http-body", + "http 1.3.1", + "http-body 1.0.1", "http-body-util", "http-range-header", "httpdate", @@ -3195,7 +4299,7 @@ checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -3303,6 +4407,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + [[package]] name = "unidecode" version = "0.3.0" @@ -3363,6 +4473,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" +[[package]] +name = "value-bag" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ba6f5989077681266825251a52748b8c1d8a4ad098cc37e440103d0ea717fc0" + [[package]] name = "vcpkg" version = "0.2.15" @@ -3414,6 +4530,12 @@ dependencies = [ "try-lock", ] +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" @@ -3451,7 +4573,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn", + "syn 2.0.104", "wasm-bindgen-shared", ] @@ -3486,7 +4608,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3510,6 +4632,25 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "webpki-roots" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "winapi" version = "0.3.9" @@ -3547,13 +4688,19 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + [[package]] name = "windows-registry" version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3bab093bdd303a1240bb99b8aba8ea8a69ee19d34c9e2ef9594e708a4878820" dependencies = [ - "windows-link", + "windows-link 0.1.3", "windows-result", "windows-strings", ] @@ -3564,7 +4711,7 @@ version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" dependencies = [ - "windows-link", + "windows-link 0.1.3", ] [[package]] @@ -3573,7 +4720,7 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" dependencies = [ - "windows-link", + "windows-link 0.1.3", ] [[package]] @@ -3603,6 +4750,15 @@ dependencies = [ "windows-targets 0.53.2", ] +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link 0.2.1", +] + [[package]] name = "windows-targets" version = "0.52.6" @@ -3797,7 +4953,7 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", "synstructure", ] @@ -3818,7 +4974,7 @@ checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -3838,7 +4994,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", "synstructure", ] @@ -3878,5 +5034,5 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] diff --git a/Cargo.toml b/Cargo.toml index d0cbb5c4..bf5b68df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,4 +1,3 @@ [workspace] resolver = "3" -members = ["server", "mlm_db", "mlm_parse", "mlm_mam"] - +members = ["server", "mlm_db", "mlm_parse", "mlm_mam", "mlm_meta"] diff --git a/mlm_db/src/lib.rs b/mlm_db/src/lib.rs index ff16f855..b1094cc7 100644 --- a/mlm_db/src/lib.rs +++ b/mlm_db/src/lib.rs @@ -22,8 +22,8 @@ use std::collections::HashMap; use anyhow::Result; use mlm_parse::normalize_title; -use native_db::Models; pub use native_db::Database; +use native_db::Models; use native_db::transaction::RwTransaction; use native_db::{ToInput, db_type}; use once_cell::sync::Lazy; diff --git a/mlm_db/src/v03.rs b/mlm_db/src/v03.rs index 06828eb0..42b67abf 100644 --- a/mlm_db/src/v03.rs +++ b/mlm_db/src/v03.rs @@ -1,6 +1,6 @@ use super::{v01, v02, v04, v05, v06}; -use native_db::{native_db, Key, ToKey}; -use native_model::{native_model, Model}; +use native_db::{Key, ToKey, native_db}; +use native_model::{Model, native_model}; use serde::{Deserialize, Serialize}; use std::path::PathBuf; use time::{OffsetDateTime, UtcDateTime}; diff --git a/mlm_db/src/v09.rs b/mlm_db/src/v09.rs index 65a6a9d9..0241f41d 100644 --- a/mlm_db/src/v09.rs +++ b/mlm_db/src/v09.rs @@ -1,6 +1,6 @@ use super::{v01, v03, v04, v06, v08, v10}; -use native_db::{native_db, ToKey}; -use native_model::{native_model, Model}; +use native_db::{ToKey, native_db}; +use native_model::{Model, native_model}; use serde::{Deserialize, Serialize}; use std::path::PathBuf; use tracing::warn; diff --git a/mlm_db/src/v13.rs b/mlm_db/src/v13.rs index 4d20a4ff..bc46f4d6 100644 --- a/mlm_db/src/v13.rs +++ b/mlm_db/src/v13.rs @@ -1,6 +1,6 @@ use super::{v03, v04, v06, v08, v09, v10, v11, v12, v14}; -use native_db::{native_db, ToKey}; -use native_model::{native_model, Model}; +use native_db::{ToKey, native_db}; +use native_model::{Model, native_model}; use serde::{Deserialize, Serialize}; use std::path::PathBuf; diff --git a/mlm_db/src/v18.rs b/mlm_db/src/v18.rs index 66ab5abf..5ceb8d05 100644 --- a/mlm_db/src/v18.rs +++ b/mlm_db/src/v18.rs @@ -1,9 +1,9 @@ use crate::ids; use super::{v01, v03, v04, v05, v06, v08, v09, v10, v11, v12, v13, v15, v16, v17}; -use mlm_parse::{normalize_title, parse_edition}; -use native_db::{native_db, ToKey}; -use native_model::{native_model, Model}; +use mlm_parse::normalize_title; +use native_db::{ToKey, native_db}; +use native_model::{Model, native_model}; use serde::{Deserialize, Serialize}; use std::{collections::BTreeMap, path::PathBuf}; diff --git a/mlm_mam/Cargo.toml b/mlm_mam/Cargo.toml index e336b77b..d0a7e033 100644 --- a/mlm_mam/Cargo.toml +++ b/mlm_mam/Cargo.toml @@ -13,8 +13,10 @@ mlm_parse = { path = "../mlm_parse" } native_db = { git = "https://github.com/StirlingMouse/native_db.git", branch = "0.8.x" } native_model = "0.4.20" once_cell = "1.21.3" -openssl = { version = "0.10.73", features = ["vendored"] } -reqwest = { version = "0.12.20", features = ["json"] } +reqwest = { version = "0.12.20", default-features = false, features = [ + "json", + "rustls-tls", +] } reqwest_cookie_store = "0.8.0" serde = "1.0.136" serde_derive = "1.0.136" diff --git a/mlm_meta/Cargo.toml b/mlm_meta/Cargo.toml new file mode 100644 index 00000000..bc6d4c0f --- /dev/null +++ b/mlm_meta/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "mlm_meta" +version = "0.1.0" +edition = "2024" + +[dependencies] +anyhow = "1.0" +async-trait = "0.1" +serde = { version = "1.0", features = ["derive"] } +reqwest = { version = "0.12.20", default-features = false, features = ["json", "rustls-tls", "gzip"] } +tokio = { version = "1", features = ["rt-multi-thread", "sync", "macros"] } +serde_json = "1.0" +scraper = "0.14" +mlm_db = { path = "../mlm_db" } +mlm_parse = { path = "../mlm_parse" } +strsim = "0.11" +tracing = "0.1" + +urlencoding = "2.1" +url = "2.4" + +[dev-dependencies] +httpmock = "0.7" diff --git a/mlm_meta/README.md b/mlm_meta/README.md new file mode 100644 index 00000000..fe4d92fd --- /dev/null +++ b/mlm_meta/README.md @@ -0,0 +1,14 @@ +mlm_meta +======== + +Small crate defining the Provider trait and helper types for external +metadata providers (Goodreads, Hardcover, ...). + +Purpose +- Provide a stable trait so server can query multiple providers and map + results into existing `TorrentMeta`. + +How to add a provider +- Implement `mlm_meta::Provider` and return `TorrentMeta` from `fetch`. +- Register the provider in server's `MetadataService` and map fields into + `TorrentMeta` before persisting. diff --git a/mlm_meta/src/helpers.rs b/mlm_meta/src/helpers.rs new file mode 100644 index 00000000..fa94b59e --- /dev/null +++ b/mlm_meta/src/helpers.rs @@ -0,0 +1,169 @@ +use mlm_parse::{clean_name, normalize_title}; + +pub use anyhow; +pub use tracing::{Level, debug, enabled, trace}; + +/// Search query with optional author. Providers can decide how to use these fields. +#[derive(Debug, Clone)] +pub struct SearchQuery { + pub title: String, + pub author: Option, +} + +impl SearchQuery { + pub fn new(title: String, author: Option) -> Self { + Self { title, author } + } + + /// Build a combined search string for providers that use a single query string. + pub fn to_combined_string(&self) -> String { + match &self.author { + Some(author) if !self.title.is_empty() && !author.is_empty() => { + format!("{} {}", self.title, author) + } + _ if !self.title.is_empty() => self.title.clone(), + _ => String::new(), + } + } +} + +/// Build SearchQuery with author included +pub fn query_with_author(title: &str, authors: &[String]) -> SearchQuery { + let author = authors + .iter() + .map(|a| a.trim()) + .find(|a| !a.is_empty()) + .map(|a| a.to_string()); + SearchQuery::new(title.to_string(), author) +} + +/// Build SearchQuery without author (title-only search) +pub fn query_title_only(title: &str) -> SearchQuery { + SearchQuery::new(title.to_string(), None) +} + +/// Normalized string similarity 0.0..1.0 +pub fn token_similarity(a: &str, b: &str) -> f64 { + strsim::normalized_levenshtein(a, b) +} + +/// Normalize author names (clean and lowercase) +pub fn normalize_authors(auths: &[String]) -> Vec { + auths + .iter() + .map(|a| { + let mut s = a.clone(); + let _ = clean_name(&mut s); + s.to_lowercase() + }) + .collect() +} + +/// Score a candidate by title and author similarity. Candidate title and +/// candidate authors are provided directly as strings (the caller extracts +/// them from JSON). The query title/authors are the original query values. +pub fn score_candidate( + cand_title: Option<&str>, + cand_auths: &[String], + q_title: &Option, + q_auths: &[String], +) -> f64 { + let q_title_norm = q_title.as_ref().map(|t| normalize_title(t)); + + let mut title_score = 0.0f64; + if let Some(qt_norm) = q_title_norm.as_ref() + && let Some(ct) = cand_title + { + let cand = normalize_title(ct); + if cand == *qt_norm { + title_score = 1.0; + } else if cand.contains(qt_norm.as_str()) || qt_norm.contains(cand.as_str()) { + title_score = 0.9; + } else { + title_score = token_similarity(&cand, qt_norm); + } + } + + let mut author_score = 0.0f64; + if !q_auths.is_empty() { + let q_auths_norm = normalize_authors(q_auths); + let mut best = 0.0f64; + for a in cand_auths { + let mut n = a.clone(); + let _ = clean_name(&mut n); + let n = n.to_lowercase(); + for qa in &q_auths_norm { + if n.contains(qa) || qa.contains(&n) { + best = best.max(1.0); + } else { + best = best.max(token_similarity(&n, qa)); + } + } + } + author_score = best; + } + + // Require minimum author match score when query has authors. + // This prevents false positives from exact title matches with wrong authors + // (e.g., "Boss of the Year" by Nicole French matching "Boss of the Year" by T. Funny) + // and prevents loose title matches (e.g., "Book Title" matching "Book Title: A Novel") + // when the author doesn't match at all. + if !q_auths.is_empty() && author_score < 0.5 { + return 0.0; + } + + if q_title_norm.is_some() && !q_auths.is_empty() { + 0.7 * title_score + 0.3 * author_score + } else if q_title_norm.is_some() { + title_score + } else { + author_score + } +} + +#[cfg(test)] +mod tests { + use super::*; + use mlm_parse::normalize_title; + + #[test] + fn test_token_similarity() { + assert!(token_similarity("great adventure", "great adventure") > 0.999); + assert!(token_similarity("great adventure", "great adventures") > 0.8); + assert!(token_similarity("great adventure", "completely different") < 0.3); + } + + #[test] + fn test_score_candidate_title_pref() { + let q_title = Some(normalize_title("The Great Adventure")); + let q_auths: Vec = vec![]; + + let cand_exact_title = Some("The Great Adventure"); + let cand_sim_title = Some("Great Adventure"); + let cand_auths_exact: Vec = vec!["Alice".to_string()]; + let cand_auths_sim: Vec = vec!["Bob Smith".to_string()]; + + let s_exact = score_candidate(cand_exact_title, &cand_auths_exact, &q_title, &q_auths); + let s_sim = score_candidate(cand_sim_title, &cand_auths_sim, &q_title, &q_auths); + assert!(s_exact >= s_sim, "expected exact title to score >= similar"); + } + + #[test] + fn test_score_candidate_author_influence() { + let q_title = Some(normalize_title("Great Adventure")); + let q_auths: Vec = vec!["bob smith".to_string()]; + + let cand_title_only = Some("Great Adventure"); + let cand_both = Some("Great Adventur"); + let cand_auths_title_only: Vec = vec!["Alice".to_string()]; + let cand_auths_both: Vec = vec!["Bob Smith".to_string()]; + + let s_title_only = + score_candidate(cand_title_only, &cand_auths_title_only, &q_title, &q_auths); + let s_both = score_candidate(cand_both, &cand_auths_both, &q_title, &q_auths); + assert!( + s_both > s_title_only, + "expected candidate with matching author to score higher" + ); + } +} diff --git a/mlm_meta/src/http.rs b/mlm_meta/src/http.rs new file mode 100644 index 00000000..7d03177d --- /dev/null +++ b/mlm_meta/src/http.rs @@ -0,0 +1,100 @@ +use anyhow::Result; +use async_trait::async_trait; +use reqwest::Client; + +#[async_trait] +pub trait HttpClient: Send + Sync { + async fn get(&self, url: &str) -> Result; + + async fn post(&self, url: &str, body: Option<&str>, headers: &[(&str, &str)]) + -> Result; +} + +pub struct ReqwestClient { + client: Client, +} + +impl ReqwestClient { + pub fn new() -> Self { + use reqwest::header::{ + ACCEPT, ACCEPT_LANGUAGE, CONNECTION, HeaderMap, HeaderName, HeaderValue, + }; + + let mut headers = HeaderMap::new(); + headers.insert( + ACCEPT, + HeaderValue::from_static( + "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + ), + ); + headers.insert( + ACCEPT_LANGUAGE, + HeaderValue::from_static("en,en-US;q=0.9,en-GB;q=0.8,sv;q=0.7"), + ); + headers.insert(CONNECTION, HeaderValue::from_static("keep-alive")); + headers.insert( + HeaderName::from_static("dnt"), + HeaderValue::from_static("1"), + ); + headers.insert( + HeaderName::from_static("priority"), + HeaderValue::from_static("u=0, i"), + ); + headers.insert( + HeaderName::from_static("sec-fetch-dest"), + HeaderValue::from_static("document"), + ); + headers.insert( + HeaderName::from_static("sec-fetch-mode"), + HeaderValue::from_static("navigate"), + ); + headers.insert( + HeaderName::from_static("sec-fetch-site"), + HeaderValue::from_static("none"), + ); + headers.insert( + HeaderName::from_static("sec-fetch-user"), + HeaderValue::from_static("?1"), + ); + + Self { + client: Client::builder() + .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36") + .default_headers(headers) + .gzip(true) + .build() + .unwrap() + } + } +} + +impl Default for ReqwestClient { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl HttpClient for ReqwestClient { + async fn get(&self, url: &str) -> Result { + let res = self.client.get(url).send().await?.text().await?; + Ok(res) + } + + async fn post( + &self, + url: &str, + body: Option<&str>, + headers: &[(&str, &str)], + ) -> Result { + let mut req = self.client.post(url); + for (k, v) in headers { + req = req.header(*k, *v); + } + if let Some(b) = body { + req = req.body(b.to_string()); + } + let res = req.send().await?.text().await?; + Ok(res) + } +} diff --git a/mlm_meta/src/lib.rs b/mlm_meta/src/lib.rs new file mode 100644 index 00000000..8640a0b5 --- /dev/null +++ b/mlm_meta/src/lib.rs @@ -0,0 +1,11 @@ +pub mod helpers; +pub mod http; +pub mod providers; +pub mod tag_category_map; +pub mod traits; + +pub use helpers::*; +pub use http::*; +pub use providers::*; +pub use tag_category_map::*; +pub use traits::*; diff --git a/mlm_meta/src/providers/fake.rs b/mlm_meta/src/providers/fake.rs new file mode 100644 index 00000000..e05291cb --- /dev/null +++ b/mlm_meta/src/providers/fake.rs @@ -0,0 +1,32 @@ +use crate::traits::Provider; +use anyhow::Result; +use async_trait::async_trait; +use mlm_db::TorrentMeta; + +pub struct FakeProvider { + pub id_str: String, + pub meta: Option, +} + +impl FakeProvider { + pub fn new(id: &str, meta: Option) -> Self { + Self { + id_str: id.to_string(), + meta, + } + } +} + +#[async_trait] +impl Provider for FakeProvider { + fn id(&self) -> &str { + &self.id_str + } + + async fn fetch(&self, _query: &TorrentMeta) -> Result { + match &self.meta { + Some(m) => Ok(m.clone()), + None => Err(anyhow::anyhow!("not found")), + } + } +} diff --git a/mlm_meta/src/providers/hardcover.rs b/mlm_meta/src/providers/hardcover.rs new file mode 100644 index 00000000..b6f82916 --- /dev/null +++ b/mlm_meta/src/providers/hardcover.rs @@ -0,0 +1,738 @@ +use anyhow::{Context, Result}; +use async_trait::async_trait; +use tracing::{debug, instrument, warn}; + +use crate::providers::MetadataProvider; +use crate::traits::Provider; +use crate::{helpers, http::HttpClient, map_tag_to_category}; +use mlm_db::TorrentMeta; +use mlm_parse::parse_edition; + +use std::sync::Arc; + +const DEFAULT_ENDPOINT: &str = "https://api.hardcover.app/v1/graphql"; + +pub struct Hardcover { + endpoint: String, + client: Arc, + api_key: Option, +} + +impl Hardcover { + pub fn new(api_key: Option) -> Self { + Self { + endpoint: DEFAULT_ENDPOINT.to_string(), + client: Arc::new(crate::http::ReqwestClient::new()), + api_key, + } + } + + pub fn with_client( + endpoint: &str, + client: Arc, + api_key: Option, + ) -> Self { + Self { + endpoint: endpoint.to_string(), + client, + api_key, + } + } + + #[instrument(skip_all, fields(query = %query))] + async fn post_graphql( + &self, + query: &str, + variables: serde_json::Value, + ) -> Result { + let body_v = serde_json::json!({ "query": query, "variables": variables }); + let body = serde_json::to_string(&body_v)?; + debug!(url = %self.endpoint, "posting GraphQL request"); + + let headers = if let Some(ref key) = self.api_key { + vec![ + ("content-type", "application/json"), + ("authorization", key.as_str()), + ] + } else { + vec![("content-type", "application/json")] + }; + + let s = self + .client + .post(&self.endpoint, Some(&body), &headers) + .await + .context("post graphql")?; + let v: serde_json::Value = serde_json::from_str(&s).context("parse graphql json")?; + Ok(v) + } + + fn parse_results(&self, v: &serde_json::Value) -> Vec { + let hits = v + .get("data") + .and_then(|d| d.get("search")) + .and_then(|s| s.get("results")) + .and_then(|r| r.get("hits")) + .and_then(|h| h.as_array()) + .cloned() + .unwrap_or_default(); + + hits.iter() + .filter_map(|hit| hit.get("document").cloned()) + .collect() + } + + fn result_id(result: &serde_json::Value) -> Option { + result.get("id").and_then(|v| v.as_i64()) + } + + #[instrument(skip_all, fields(book_id = book_id))] + async fn fetch_book_by_id(&self, book_id: i64) -> Result { + let gql = r#" + query BookById($id: Int!) { + books_by_pk(id: $id) { + id + title + subtitle + headline + description + pages + images { + height + ratio + url + } + contributions { + author { + name + } + contributable_type + contribution + } + book_series { + position + details + series { + name + } + } + taggings(distinct_on: tag_id) { + id + spoiler + taggable_type + tag { + tag + tag_category { + category + } + } + } + editions { + language { + language + } + asin + isbn_10 + isbn_13 + edition_format + contributions { + contribution + author { + name + } + } + } + } + } + "#; + + let vars = serde_json::json!({ "id": book_id }); + let v = self.post_graphql(gql, vars).await?; + let book = v + .get("data") + .and_then(|d| d.get("books_by_pk")) + .cloned() + .context("missing books_by_pk in hardcover response")?; + Ok(book) + } + + fn parse_series_entries(position: Option, details: Option<&str>) -> mlm_db::SeriesEntries { + if let Some(pos) = position { + return mlm_db::SeriesEntries::new(vec![mlm_db::SeriesEntry::Num(pos as f32)]); + } + + if let Some(details) = details { + let cleaned = details.trim(); + if let Ok(num) = cleaned.parse::() { + return mlm_db::SeriesEntries::new(vec![mlm_db::SeriesEntry::Num(num)]); + } + } + + mlm_db::SeriesEntries::new(vec![]) + } + + fn normalize_identifier(value: &str) -> String { + value + .chars() + .filter(|c| c.is_ascii_alphanumeric()) + .collect::() + .to_ascii_uppercase() + } + + fn normalize_name(value: &str) -> String { + value.trim().to_ascii_lowercase() + } + + fn parse_contributions( + contributions: Option<&Vec>, + ) -> (Vec, Vec) { + let mut authors = Vec::new(); + let mut narrators = Vec::new(); + + if let Some(contribs) = contributions { + for c in contribs { + let name = c + .get("author") + .and_then(|a| a.get("name")) + .and_then(|n| n.as_str()) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(ToString::to_string); + + if let Some(name) = name { + let contribution = c + .get("contribution") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_ascii_lowercase(); + + if contribution.contains("narrat") { + if !narrators.contains(&name) { + narrators.push(name); + } + } else if (contribution.is_empty() || contribution.contains("author")) + && !authors.contains(&name) + { + authors.push(name); + } + } + } + } + + (authors, narrators) + } + + fn edition_language(edition: &serde_json::Value) -> Option { + edition + .get("language") + .and_then(|l| l.get("language")) + .and_then(|v| v.as_str()) + .and_then(|s| s.parse::().ok()) + } + + fn edition_isbn(edition: &serde_json::Value) -> Option { + edition + .get("isbn_13") + .and_then(|v| v.as_str()) + .or_else(|| edition.get("isbn_10").and_then(|v| v.as_str())) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(ToString::to_string) + } + + fn edition_asin(edition: &serde_json::Value) -> Option { + edition + .get("asin") + .and_then(|v| v.as_str()) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(ToString::to_string) + } + + fn edition_format(edition: &serde_json::Value) -> Option { + edition + .get("edition_format") + .and_then(|v| v.as_str()) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(ToString::to_string) + } + + fn is_audiobook_format(fmt: &str) -> bool { + let f = fmt.to_ascii_lowercase(); + f.contains("audio") || f.contains("audible") || f.contains("hör") + } + + fn is_ebook_format(fmt: &str) -> bool { + let f = fmt.to_ascii_lowercase(); + f.contains("ebook") || f.contains("e-book") || f.contains("epub") || f.contains("kindle") + } + + fn score_edition( + edition: &serde_json::Value, + result: &serde_json::Value, + query: Option<&TorrentMeta>, + ) -> i32 { + let mut score = 0_i32; + + let query_isbn = query + .and_then(|q| q.ids.get(mlm_db::ids::ISBN)) + .map(|s| Self::normalize_identifier(s)); + let search_isbn = result + .get("isbns") + .and_then(|i| i.as_array()) + .and_then(|arr| arr.iter().filter_map(|v| v.as_str()).next()) + .map(Self::normalize_identifier); + + if let Some(query_isbn) = query_isbn + && let Some(edition_isbn) = Self::edition_isbn(edition) + && Self::normalize_identifier(&edition_isbn) == query_isbn + { + score += 140; + } else if let Some(search_isbn) = search_isbn + && let Some(edition_isbn) = Self::edition_isbn(edition) + && Self::normalize_identifier(&edition_isbn) == search_isbn + { + score += 20; + } + + if let Some(query_asin) = query + .and_then(|q| q.ids.get(mlm_db::ids::ASIN)) + .map(|s| Self::normalize_identifier(s)) + && let Some(edition_asin) = Self::edition_asin(edition) + && Self::normalize_identifier(&edition_asin) == query_asin + { + score += 140; + } + + if let Some(query_lang) = query.and_then(|q| q.language) + && let Some(edition_lang) = Self::edition_language(edition) + { + if edition_lang == query_lang { + score += 30; + } else { + score -= 10; + } + } + + if let Some(query) = query { + let format = Self::edition_format(edition); + match query.media_type { + mlm_db::MediaType::Audiobook | mlm_db::MediaType::PeriodicalAudiobook => { + if let Some(format) = format { + if Self::is_audiobook_format(&format) { + score += 25; + } else { + score -= 8; + } + } + } + mlm_db::MediaType::Ebook | mlm_db::MediaType::PeriodicalEbook => { + if let Some(format) = format { + if Self::is_ebook_format(&format) { + score += 25; + } else if Self::is_audiobook_format(&format) { + score -= 8; + } + } + } + _ => {} + } + } + + let query_authors = query + .map(|q| { + q.authors + .iter() + .map(|a| Self::normalize_name(a)) + .collect::>() + }) + .unwrap_or_default(); + if !query_authors.is_empty() { + let (edition_authors, _) = + Self::parse_contributions(edition.get("contributions").and_then(|c| c.as_array())); + let edition_author_names = edition_authors + .iter() + .map(|a| Self::normalize_name(a)) + .collect::>(); + + if edition_author_names + .iter() + .any(|a| query_authors.iter().any(|q| q == a)) + { + score += 20; + } + } + + score + } + + fn select_best_edition<'a>( + editions: &'a [serde_json::Value], + result: &serde_json::Value, + query: Option<&TorrentMeta>, + ) -> Option<&'a serde_json::Value> { + editions + .iter() + .enumerate() + .max_by_key(|(idx, edition)| { + (Self::score_edition(edition, result, query), -(*idx as i32)) + }) + .map(|(_, edition)| edition) + } + + fn score_result( + &self, + result: &serde_json::Value, + scoring_query: &helpers::SearchQuery, + ) -> f64 { + let q_title = Some(scoring_query.title.clone()); + let q_auths = scoring_query.author.iter().cloned().collect::>(); + crate::helpers::score_candidate( + self.result_title(result), + &self.result_authors(result), + &q_title, + &q_auths, + ) + } + + fn select_best_result( + &self, + results: &[serde_json::Value], + scoring_query: &helpers::SearchQuery, + threshold: f64, + ) -> Option<(usize, f64)> { + let mut best_idx = None; + let mut best_score = -1.0_f64; + for (i, item) in results.iter().enumerate() { + let score = self.score_result(item, scoring_query); + if score > best_score { + best_score = score; + best_idx = Some(i); + } + } + + if best_score >= threshold { + best_idx.map(|idx| (idx, best_score)) + } else { + None + } + } + + async fn search_best_result( + &self, + title: &str, + authors: &[String], + ) -> Result<(serde_json::Value, f64)> { + if title.trim().is_empty() { + return Err(anyhow::anyhow!("title is required for search")); + } + + let threshold = self.min_score_threshold(); + let q_with_author = helpers::query_with_author(title, authors); + let q_title_only = helpers::query_title_only(title); + + let tried_with_author = if q_with_author.author.is_some() { + match self.search(&q_with_author).await { + Ok(results) => { + if !results.is_empty() + && let Some((idx, score)) = + self.select_best_result(&results, &q_with_author, threshold) + { + return Ok((results[idx].clone(), score)); + } + } + Err(e) => warn!("hardcover search with author failed: {e}"), + } + true + } else { + false + }; + + if (!tried_with_author || !authors.is_empty()) && !q_title_only.title.is_empty() { + match self.search(&q_title_only).await { + Ok(results) => { + if !results.is_empty() + && let Some((idx, score)) = + self.select_best_result(&results, &q_with_author, threshold) + { + return Ok((results[idx].clone(), score)); + } + } + Err(e) => warn!("hardcover title-only search failed: {e}"), + } + } + + Err(anyhow::anyhow!("no result above score threshold")) + } + + async fn result_to_meta_with_query( + &self, + result: &serde_json::Value, + query: Option<&TorrentMeta>, + ) -> Result { + let id = Self::result_id(result).context("missing hardcover result id")?; + let book = self.fetch_book_by_id(id).await?; + + let title = book + .get("title") + .and_then(|t| t.as_str()) + .unwrap_or("") + .to_string(); + + let subtitle = book + .get("subtitle") + .and_then(|s| s.as_str()) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(ToString::to_string); + let headline = book + .get("headline") + .and_then(|h| h.as_str()) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(ToString::to_string); + let body = book + .get("description") + .and_then(|d| d.as_str()) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(ToString::to_string); + let mut description_parts = Vec::new(); + if let Some(h) = headline { + description_parts.push(h); + } + if let Some(b) = body { + description_parts.push(b); + } + let description = description_parts.join("\n\n"); + + let (book_authors, book_narrators) = + Self::parse_contributions(book.get("contributions").and_then(|c| c.as_array())); + + let selected_edition = book + .get("editions") + .and_then(|e| e.as_array()) + .and_then(|editions| Self::select_best_edition(editions, result, query)); + + let (authors, narrators) = if let Some(edition) = selected_edition { + let (edition_authors, edition_narrators) = + Self::parse_contributions(edition.get("contributions").and_then(|c| c.as_array())); + + let authors = if edition_authors.is_empty() { + book_authors.clone() + } else { + edition_authors + }; + let narrators = if edition_narrators.is_empty() { + book_narrators.clone() + } else { + edition_narrators + }; + + (authors, narrators) + } else { + (book_authors, book_narrators) + }; + + let mut tm = TorrentMeta { + title: subtitle + .map(|s| format!("{title}: {s}")) + .unwrap_or_else(|| title.clone()), + description, + authors, + narrators, + media_type: query.map(|q| q.media_type).unwrap_or_default(), + language: query.and_then(|q| q.language), + ..Default::default() + }; + + let mut tags = Vec::new(); + let mut categories = Vec::new(); + if let Some(taggings) = book.get("taggings").and_then(|t| t.as_array()) { + for tagging in taggings { + let tag_text = tagging + .get("tag") + .and_then(|t| t.get("tag")) + .and_then(|v| v.as_str()) + .map(str::trim) + .filter(|s| !s.is_empty()); + + let Some(tag_text) = tag_text else { + continue; + }; + + let normalized_tag = tag_text.to_ascii_lowercase(); + if !normalized_tag.is_empty() && !tags.contains(&normalized_tag) { + tags.push(normalized_tag); + } + + let tag_category = tagging + .get("tag") + .and_then(|t| t.get("tag_category")) + .and_then(|tc| tc.get("category")) + .and_then(|v| v.as_str()) + .map(|s| s.trim().to_ascii_lowercase()); + + if matches!(tag_category.as_deref(), Some("genre" | "mood")) { + for category in map_tag_to_category(tag_text) { + if !categories.contains(&category) { + categories.push(category); + } + } + } + } + } + tm.tags = tags; + tm.categories = categories; + tm.ids.insert("hardcover".to_string(), id.to_string()); + + if let Some(edition) = selected_edition { + if let Some(lang) = Self::edition_language(edition) { + tm.language = Some(lang); + } + if let Some(asin) = Self::edition_asin(edition) { + tm.ids.insert(mlm_db::ids::ASIN.to_string(), asin); + } + if let Some(isbn) = Self::edition_isbn(edition) { + tm.ids.insert(mlm_db::ids::ISBN.to_string(), isbn); + } + if let Some(format) = Self::edition_format(edition) { + let lower = format.to_ascii_lowercase(); + if Self::is_audiobook_format(&lower) { + tm.media_type = mlm_db::MediaType::Audiobook; + } else if Self::is_ebook_format(&lower) { + tm.media_type = mlm_db::MediaType::Ebook; + } + + let (_t, ed_parsed) = parse_edition(&tm.title, &format); + if ed_parsed.is_some() { + tm.edition = ed_parsed; + } + } + } + + // Fallback ISBN support from search payload when edition doesn't provide one. + if !tm.ids.contains_key(mlm_db::ids::ISBN) + && let Some(isbns_arr) = result.get("isbns").and_then(|i| i.as_array()) + && let Some(first) = isbns_arr.iter().filter_map(|v| v.as_str()).next() + { + let s = first.trim().to_string(); + if !s.is_empty() { + tm.ids.insert(mlm_db::ids::ISBN.to_string(), s); + } + } + + // Legacy edition fallback from selected search document. + if tm.edition.is_none() + && let Some(ed_str) = result + .get("edition") + .and_then(|v| v.as_str()) + .or(result.get("edition_string").and_then(|v| v.as_str())) + { + let (_t, ed_parsed) = parse_edition(&tm.title, ed_str); + if ed_parsed.is_some() { + tm.edition = ed_parsed; + } + } + + if let Some(series_arr) = book.get("book_series").and_then(|v| v.as_array()) { + for s in series_arr { + let name = s + .get("series") + .and_then(|series| series.get("name")) + .and_then(|v| v.as_str()) + .map(str::trim) + .filter(|n| !n.is_empty()); + + if let Some(name) = name { + let position = s.get("position").and_then(|v| v.as_f64()); + let details = s.get("details").and_then(|v| v.as_str()); + tm.series.push(mlm_db::Series { + name: name.to_string(), + entries: Self::parse_series_entries(position, details), + }); + } + } + } + + debug!( + title = %tm.title, + authors = ?tm.authors, + language = ?tm.language, + tags_count = tm.tags.len(), + categories_count = tm.categories.len(), + "returning hardcover metadata" + ); + Ok(tm) + } +} + +impl Default for Hardcover { + fn default() -> Self { + Self::new(None) + } +} + +impl MetadataProvider for Hardcover { + type SearchResult = serde_json::Value; + + fn id(&self) -> &str { + "hardcover" + } + + async fn search(&self, query: &helpers::SearchQuery) -> Result> { + let gql = r#" + query Search($q: String!, $type: String!, $per_page: Int, $page: Int) { + search(query: $q, query_type: $type, per_page: $per_page, page: $page) { + results + } + } + "#; + + let qstr = query.to_combined_string(); + let vars = serde_json::json!({"q": qstr, "type": "Book", "per_page": 10, "page": 1}); + debug!(query = %qstr, "searching hardcover"); + let v = self.post_graphql(gql, vars).await?; + let results = self.parse_results(&v); + debug!(count = results.len(), "hardcover search results"); + Ok(results) + } + + fn result_title<'a>(&self, result: &'a Self::SearchResult) -> Option<&'a str> { + result.get("title")?.as_str() + } + + fn result_authors(&self, result: &Self::SearchResult) -> Vec { + result + .get("author_names") + .and_then(|a| a.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|v| v.as_str().map(|s| s.to_string())) + .collect() + }) + .unwrap_or_default() + } + + async fn result_to_meta(&self, result: &Self::SearchResult) -> Result { + self.result_to_meta_with_query(result, None).await + } +} + +#[async_trait] +impl Provider for Hardcover { + fn id(&self) -> &str { + MetadataProvider::id(self) + } + + async fn fetch(&self, query: &TorrentMeta) -> Result { + let (best_result, _score) = self + .search_best_result(&query.title, &query.authors) + .await?; + let meta = self + .result_to_meta_with_query(&best_result, Some(query)) + .await?; + Ok(meta) + } +} diff --git a/mlm_meta/src/providers/mod.rs b/mlm_meta/src/providers/mod.rs new file mode 100644 index 00000000..fe582942 --- /dev/null +++ b/mlm_meta/src/providers/mod.rs @@ -0,0 +1,140 @@ +pub mod fake; +pub mod hardcover; +pub mod openlibrary; +pub mod romanceio; + +pub use fake::FakeProvider; +pub use hardcover::Hardcover; +pub use openlibrary::OpenLibrary; +pub use romanceio::RomanceIo; + +use crate::helpers::SearchQuery; +use anyhow::Result; +use mlm_db::TorrentMeta; + +/// Metadata provider trait for searching and fetching book metadata. +/// Implement this trait to add a new provider. +#[allow(async_fn_in_trait)] +pub trait MetadataProvider: Send + Sync { + /// Provider's search result type (e.g., serde_json::Value for JSON APIs) + type SearchResult; + + /// Unique identifier for this provider (e.g., "hardcover", "romanceio") + fn id(&self) -> &str; + + /// Minimum score threshold for accepting a match. Default 0.5. + fn min_score_threshold(&self) -> f64 { + 0.5 + } + + /// Perform a search query. Receives title and optional author. + async fn search(&self, query: &SearchQuery) -> Result>; + + /// Extract title from a search result + fn result_title<'a>(&self, result: &'a Self::SearchResult) -> Option<&'a str>; + + /// Extract authors from a search result + fn result_authors(&self, result: &Self::SearchResult) -> Vec; + + /// Convert a search result to TorrentMeta. May fetch additional data (e.g., romanceio). + async fn result_to_meta(&self, result: &Self::SearchResult) -> Result; +} + +/// `search_query` - the query sent to the provider (may have no author for title-only fallback) +/// `scoring_query` - the query used for scoring (always includes author if provided) +fn select_best( + provider: &P, + results: &[P::SearchResult], + _search_query: &SearchQuery, + scoring_query: &SearchQuery, + threshold: f64, +) -> Result> { + let q_title = Some(scoring_query.title.clone()); + let q_auths = scoring_query.author.iter().cloned().collect::>(); + + let mut best_idx: Option = None; + let mut best_score = -1.0f64; + + for (i, item) in results.iter().enumerate() { + let title = provider.result_title(item); + let authors = provider.result_authors(item); + + let score = crate::helpers::score_candidate(title, &authors, &q_title, &q_auths); + + if score > best_score { + best_score = score; + best_idx = Some(i); + } + } + + if best_score >= threshold { + Ok(best_idx.map(|idx| (idx, best_score))) + } else { + Ok(None) + } +} + +/// Run a search with fallback: try title+author first, then title-only if needed. +/// Returns the matched metadata and score if found above threshold. +pub async fn search_with_fallback( + provider: &P, + title: &str, + authors: &[String], +) -> Result<(TorrentMeta, f64)> { + if title.trim().is_empty() { + return Err(anyhow::anyhow!("title is required for search")); + } + + let threshold = provider.min_score_threshold(); + + // Build queries + let q_with_author = crate::helpers::query_with_author(title, authors); + let q_title_only = crate::helpers::query_title_only(title); + + // If we have authors, try with author first + let tried_with_author = if q_with_author.author.is_some() { + match provider.search(&q_with_author).await { + Ok(results) => { + if !results.is_empty() + && let Some((idx, score)) = select_best( + provider, + &results, + &q_with_author, + &q_with_author, + threshold, + )? + { + let meta = provider.result_to_meta(&results[idx]).await?; + return Ok((meta, score)); + } + } + Err(e) => { + tracing::warn!("search with author failed: {}", e); + } + } + true + } else { + false + }; + + // If authors was provided but didn't yield results above threshold, try title-only + // Or if no authors were provided, do title-only search + if (!tried_with_author || !authors.is_empty()) && !q_title_only.title.is_empty() { + match provider.search(&q_title_only).await { + Ok(results) => { + if !results.is_empty() + && let Some((idx, score)) = + select_best(provider, &results, &q_title_only, &q_with_author, threshold)? + { + let meta = provider.result_to_meta(&results[idx]).await?; + return Ok((meta, score)); + } + } + Err(e) => { + tracing::warn!("title-only search failed: {}", e); + } + } + } + + Err(anyhow::anyhow!("no result above score threshold")) +} diff --git a/mlm_meta/src/providers/openlibrary.rs b/mlm_meta/src/providers/openlibrary.rs new file mode 100644 index 00000000..290c1fb2 --- /dev/null +++ b/mlm_meta/src/providers/openlibrary.rs @@ -0,0 +1,162 @@ +use std::sync::Arc; + +use anyhow::{Context, Result}; +use async_trait::async_trait; +use tracing::{debug, instrument}; +use url::Url; + +use crate::http::ReqwestClient; +use crate::providers::{MetadataProvider, search_with_fallback}; +use crate::traits::Provider; +use crate::{helpers, http::HttpClient}; +use mlm_db::TorrentMeta; + +pub struct OpenLibrary { + pub client: Arc, +} + +impl OpenLibrary { + pub fn new() -> Self { + Self { + client: Arc::new(ReqwestClient::new()), + } + } + + pub fn with_client(client: Arc) -> Self { + Self { client } + } + + #[instrument(skip_all, fields(url = %url))] + async fn fetch_json(&self, url: &str) -> Result { + debug!("fetching Open Library JSON"); + self.client.get(url).await + } +} + +impl Default for OpenLibrary { + fn default() -> Self { + Self::new() + } +} + +impl MetadataProvider for OpenLibrary { + type SearchResult = serde_json::Value; + + fn id(&self) -> &str { + "openlibrary" + } + + async fn search(&self, query: &helpers::SearchQuery) -> Result> { + let base = Url::parse("https://openlibrary.org").unwrap(); + let qstr = query.to_combined_string(); + + let mut search_url = base.join("/search.json").unwrap(); + if !qstr.is_empty() { + search_url.query_pairs_mut().append_pair("q", &qstr); + } + + let url = search_url.to_string(); + debug!(query = %qstr, url = %url, "searching Open Library"); + + let body = self.fetch_json(&url).await.context("fetch search json")?; + let v: serde_json::Value = serde_json::from_str(&body).context("parse search json")?; + + let docs = v.get("docs").and_then(|d| d.as_array()).cloned(); + debug!( + count = docs.as_ref().map(|a| a.len()).unwrap_or(0), + "Open Library search results" + ); + Ok(docs.unwrap_or_default()) + } + + fn result_title<'a>(&self, result: &'a Self::SearchResult) -> Option<&'a str> { + result.get("title").and_then(|t| t.as_str()) + } + + fn result_authors(&self, result: &Self::SearchResult) -> Vec { + result + .get("author_name") + .and_then(|a| a.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|a| a.as_str().map(|s| s.to_string())) + .collect() + }) + .unwrap_or_default() + } + + async fn result_to_meta(&self, result: &Self::SearchResult) -> Result { + let title = result + .get("title") + .and_then(|t| t.as_str()) + .unwrap_or("") + .to_string(); + + let authors = self.result_authors(result); + + let first_publish_year = result + .get("first_publish_year") + .and_then(|y| y.as_i64()) + .map(|y| y.to_string()); + + let edition_count = result + .get("edition_count") + .and_then(|e| e.as_i64()) + .map(|e| e as u32); + + let subjects: Vec = result + .get("subject") + .and_then(|s| s.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|s| s.as_str()) + .filter(|s| s.len() > 2 && s.len() < 50) + .take(20) + .map(|s| s.to_lowercase()) + .collect() + }) + .unwrap_or_default(); + + let mut tm = TorrentMeta { + title: title.clone(), + description: String::new(), + authors: authors.clone(), + ..Default::default() + }; + + if let Some(year) = first_publish_year { + tm.description + .push_str(&format!("First published: {}\n", year)); + } + if let Some(count) = edition_count { + tm.description.push_str(&format!("{} editions\n", count)); + } + + tm.tags = subjects; + + if let Some(isbns) = result.get("isbn").and_then(|i| i.as_array()) { + for isbn in isbns.iter().take(3) { + if let Some(isbn_str) = isbn.as_str() { + tm.ids + .insert(mlm_db::ids::ISBN.to_string(), isbn_str.to_string()); + break; + } + } + } + + debug!(title = %tm.title, authors = ?tm.authors, tags_count = tm.tags.len(), "returning Open Library metadata"); + Ok(tm) + } +} + +#[async_trait] +impl Provider for OpenLibrary { + fn id(&self) -> &str { + MetadataProvider::id(self) + } + + async fn fetch(&self, query: &TorrentMeta) -> Result { + let (meta, _score) = search_with_fallback(self, &query.title, &query.authors).await?; + Ok(meta) + } +} diff --git a/mlm_meta/src/providers/romanceio.rs b/mlm_meta/src/providers/romanceio.rs new file mode 100644 index 00000000..51ef74b4 --- /dev/null +++ b/mlm_meta/src/providers/romanceio.rs @@ -0,0 +1,259 @@ +use std::sync::Arc; + +use anyhow::{Context, Result}; +use async_trait::async_trait; +use scraper::{Html, Selector}; +use tracing::{debug, instrument}; +use url::Url; + +use crate::http::ReqwestClient; +use crate::providers::{MetadataProvider, search_with_fallback}; +use crate::traits::Provider; +use crate::{helpers, http::HttpClient, map_tag_to_category}; +use mlm_db::TorrentMeta; + +pub struct RomanceIo { + pub client: Arc, +} + +impl RomanceIo { + pub fn new() -> Self { + Self { + client: Arc::new(ReqwestClient::new()), + } + } + + pub fn with_client(client: Arc) -> Self { + Self { client } + } + + #[instrument(skip_all, fields(url = %url))] + async fn fetch_html(&self, url: &str) -> Result { + debug!("fetching romance.io HTML"); + self.client.get(url).await + } + + async fn fetch_book(&self, book_url: &str) -> Result { + let book_html = self.fetch_html(book_url).await.context("fetch book page")?; + self.parse_book_html(&book_html) + } + + pub fn parse_book_html(&self, html: &str) -> Result { + let doc = Html::parse_document(html); + + let script_sel = Selector::parse("script[type=\"application/ld+json\"]").unwrap(); + if let Some(script) = doc.select(&script_sel).next() { + let json_text = script.inner_html(); + let v: serde_json::Value = serde_json::from_str(&json_text).context("parse json-ld")?; + let book = v.get("@graph").and_then(|g| g.get(0)).unwrap_or(&v); + let title = book + .get("name") + .and_then(|n| n.as_str()) + .unwrap_or("") + .to_string(); + let authors: Vec = book + .get("author") + .and_then(|a| a.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|p| { + p.get("name") + .and_then(|n| n.as_str()) + .map(|s| s.to_string()) + }) + .collect() + }) + .unwrap_or_default(); + let description = book + .get("description") + .and_then(|d| d.as_str()) + .map(|s| s.to_string()); + + let mut tm = TorrentMeta { + title: title.clone(), + description: description.clone().unwrap_or_default(), + authors: authors.clone(), + ..Default::default() + }; + + let mut topics = Vec::new(); + let topics_sel = Selector::parse("#valid-topics-list a.topic").unwrap(); + for t in doc.select(&topics_sel) { + let text = t.text().collect::>().join(" ").trim().to_lowercase(); + if text.len() > 2 && !topics.contains(&text) { + topics.push(text); + } + } + + if let Some(desc) = description.as_ref() { + for part in desc.split(&[',', '\n'][..]) { + let s = part.trim().to_lowercase(); + if s.len() > 2 && !topics.contains(&s) { + topics.push(s); + } + } + } + + let mut categories = Vec::new(); + let mut tags = Vec::new(); + for t in topics { + let mapped = topic_to_category(&t); + if !mapped.is_empty() { + for cat in mapped { + if !categories.contains(&cat) { + categories.push(cat); + } + } + } else if !tags.contains(&t) { + tags.push(t); + } + } + tm.categories = categories; + tm.tags = tags; + + return Ok(tm); + } + Err(anyhow::anyhow!("no json-ld found")) + } +} + +impl Default for RomanceIo { + fn default() -> Self { + Self::new() + } +} + +impl MetadataProvider for RomanceIo { + type SearchResult = serde_json::Value; + + fn id(&self) -> &str { + "romanceio" + } + + async fn search(&self, query: &helpers::SearchQuery) -> Result> { + let base = Url::parse("https://www.romance.io").unwrap(); + let qstr = query.to_combined_string(); + + let mut json_url = base.join("/json/search_books").unwrap(); + json_url.query_pairs_mut().append_pair("search", &qstr); + + debug!(query = %qstr, url = %json_url, "searching romance.io"); + let body = self + .fetch_html(json_url.as_str()) + .await + .context("fetch search json")?; + + let v: serde_json::Value = match serde_json::from_str(&body) { + Ok(v) => v, + Err(e) => { + let preview = if body.len() > 50000 { + format!("{}...", &body[..50000]) + } else { + body.clone() + }; + tracing::warn!( + url = %json_url, + response_preview = %preview, + "failed to parse romance.io search response: {}", + e + ); + return Err(anyhow::anyhow!("parse search json: {}", e)) + .context("parse search json"); + } + }; + + let books = v.get("books").and_then(|b| b.as_array()).cloned(); + debug!( + count = books.as_ref().map(|a| a.len()).unwrap_or(0), + "romance.io search results" + ); + Ok(books.unwrap_or_default()) + } + + fn result_title<'a>(&self, result: &'a Self::SearchResult) -> Option<&'a str> { + result + .get("info") + .and_then(|info| info.get("title")) + .and_then(|t| t.as_str()) + .or_else(|| result.get("url").and_then(|u| u.as_str())) + } + + fn result_authors(&self, result: &Self::SearchResult) -> Vec { + result + .get("authors") + .and_then(|a| a.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|a| { + a.get("name") + .and_then(|n| n.as_str()) + .map(|s| s.to_string()) + }) + .collect() + }) + .unwrap_or_default() + } + + async fn result_to_meta(&self, result: &Self::SearchResult) -> Result { + // RomanceIo fetches the full book page for verification, so this method + // extracts the URL and fetches the book page + let url = result + .get("url") + .and_then(|u| u.as_str()) + .context("no URL in search result")?; + + let base = Url::parse("https://www.romance.io").unwrap(); + let book_url = base.join(url).context("invalid book URL")?; + + debug!(url = %book_url, "fetching romance.io book page"); + let meta = self.fetch_book(book_url.as_str()).await?; + + // Verify title matches (case-insensitive substring) + // Note: The caller should handle verification, but we do a quick check here + Ok(meta) + } +} + +#[async_trait] +impl Provider for RomanceIo { + fn id(&self) -> &str { + MetadataProvider::id(self) + } + + async fn fetch(&self, query: &TorrentMeta) -> Result { + let (meta, _score) = search_with_fallback(self, &query.title, &query.authors).await?; + + // Additional verification: ensure title contains query title + let query_title_lower = query.title.to_lowercase(); + let meta_title_lower = meta.title.to_lowercase(); + if !meta_title_lower.contains(&query_title_lower) { + return Err(anyhow::anyhow!( + "matched title does not contain query title" + )); + } + + // Additional verification: if query has authors, at least one should match + if !query.authors.is_empty() { + let query_authors_lower: Vec = + query.authors.iter().map(|a| a.to_lowercase()).collect(); + let meta_authors_lower: Vec = + meta.authors.iter().map(|a| a.to_lowercase()).collect(); + let any_match = query_authors_lower.iter().any(|qa| { + meta_authors_lower + .iter() + .any(|ma| ma.contains(qa) || qa.contains(ma)) + }); + if !any_match { + return Err(anyhow::anyhow!( + "matched author does not contain any query author" + )); + } + } + + Ok(meta) + } +} + +fn topic_to_category(topic: &str) -> Vec { + map_tag_to_category(topic) +} diff --git a/mlm_meta/src/tag_category_map.rs b/mlm_meta/src/tag_category_map.rs new file mode 100644 index 00000000..6ad1d6cd --- /dev/null +++ b/mlm_meta/src/tag_category_map.rs @@ -0,0 +1,494 @@ +use mlm_db::Category; + +fn normalize_tag(tag: &str) -> String { + let trimmed = tag.trim(); + let value = match trimmed.split_once(':') { + Some((prefix, rest)) => { + let p = prefix.trim().to_ascii_lowercase(); + if matches!( + p.as_str(), + "genre" | "enre" | "mood" | "tag" | "pace" | "content warning" | "general" + ) { + rest.trim() + } else { + trimmed + } + } + None => trimmed, + }; + + value + .to_ascii_lowercase() + .replace('&', " and ") + .replace(['/', '-', '|'], " ") + .replace('\'', "") + .replace([',', '.', '(', ')'], " ") + .split_whitespace() + .collect::>() + .join(" ") +} + +/// Map external string tags into the internal category taxonomy. +/// +/// Returns an empty list for broad/ambiguous/noisy tags that do not map cleanly. +pub fn map_tag_to_category(tag: &str) -> Vec { + let key = normalize_tag(tag); + + // Explicit multi-category mappings for compound tags. + match key.as_str() { + "contemporary romance" => return vec![Category::Contemporary, Category::Romance], + "historical romance" => return vec![Category::Historical, Category::Romance], + "fantasy romance" => return vec![Category::Fantasy, Category::Romance], + "science fiction and fantasy" | "science fiction fantasy" => { + return vec![Category::ScienceFiction, Category::Fantasy]; + } + _ => {} + } + + if key.contains("programming language") { + return vec![Category::ComputerScience]; + } + let mapped: &[Category] = match key.as_str() { + // Kept mappings + "fantasy" | "magic" | "fairies" | "fantasy games" => &[Category::Fantasy], + "young adult" | "young adult fiction" | "adolescence" => &[Category::YoungAdult], + "adventure" | "adventurous" => &[Category::ActionAdventure], + "science fiction" + | "aliens" + | "extraterrestrial beings" + | "life on other planets" + | "human alien encounters" => &[Category::ScienceFiction], + "strong character development" | "character driven" | "literary" => { + &[Category::CharacterDriven] + } + "comics" | "graphic novels" | "comics and graphic novels" => { + &[Category::GraphicNovelsComics] + } + "history" | "histoire" | "civilization" | "holocaust" | "world war ii" | "1914 1918" => { + &[Category::History] + } + "emotional" | "sad" | "heartfelt" | "introspective" | "depressing" | "grief" => { + &[Category::Emotional] + } + "lgbtq" | "lgbtqia" | "lgbtqia+" => &[Category::Lgbtqia], + "dark" => &[Category::Dark], + "romance" | "love stories" | "romance fiction" | "romantic" | "love" | "marriage" + | "arranged marriage" | "regency romance" | "romantic suspense" => &[Category::Romance], + "war" | "world war" | "1939 1945" | "imaginary wars and battles" => &[Category::Military], + "mysterious" => &[Category::Mystery], + "juvenile fiction" + | "children" + | "childrens stories" + | "childrens literature" + | "board books" + | "picture book" + | "boys" => &[Category::Children], + "tense" | "suspense" | "mystery thriller" => &[Category::Thriller], + "reflective" | "thought provoking" => &[Category::CharacterDriven], + "funny" | "exciting" => &[Category::Funny], + "biography" | "biography and autobiography" | "autobiography" => &[Category::Biography], + "lighthearted" | "hopeful" | "inspiring" => &[Category::Wholesome], + "mystery" | "detective and mystery stories" => &[Category::Mystery], + "dystopian" => &[Category::Dystopian], + "religion" | "spirituality" => &[Category::ReligionSpirituality], + "juvenile nonfiction" | "education" => &[Category::Textbook], + "space" | "astronauts" => &[Category::Space], + "business and economics" | "business" => &[Category::Business], + "philosophy" => &[Category::Philosophy], + "science" | "physics" | "cosmology" | "genetic engineering" => &[Category::Science], + "thriller" | "thriller and suspense" | "suspenseful" => &[Category::Thriller], + "computers" | "programming" => &[Category::ComputerScience], + "psychology" => &[Category::Psychology], + "poetry" | "childrens poetry" | "english poetry" => &[Category::Poetry], + "relaxing" => &[Category::Cozy], + "humor" + | "comedy" + | "humorous" + | "humorous stories" + | "humour" + | "american wit and humor" + | "witty" + | "comedians" => &[Category::Humor], + "politics" + | "social science" + | "political science" + | "politique" + | "feminism" + | "capitalism" + | "communism" + | "leadership" + | "presidents" + | "spies and politics" + | "sociologie" + | "political" + | "sociology" + | "anarchism" + | "arab israeli conflict" => &[Category::PoliticsSociety], + "travel" | "air pilots" => &[Category::Travel], + "mathematics" | "algebra" | "calculus" => &[Category::Mathematics], + "cooking" | "food" => &[Category::CookingFood], + "murder" | "police" => &[Category::Crime], + "art" | "painters" | "architects" | "drawing" | "beauty" => &[Category::ArtPhotography], + "self help" => &[Category::SelfHelp], + "short stories" => &[Category::ShortStories], + "literary criticism" => &[Category::LiteraryCriticism], + "body" | "mind and spirit" => &[Category::HealthWellness], + "health and fitness" + | "health" + | "cancer" + | "self actualization psychology" + | "happiness" + | "emotions" + | "aging" => &[Category::HealthWellness], + "literary collections" => &[Category::Anthology], + "historical fiction" => &[Category::Historical], + "contemporary" + | "english fiction" + | "domestic fiction" + | "slice of life" + | "genre fiction" + | "literature and fiction" + | "literary fiction" + | "classique" + | "realistic fiction" + | "french fiction" + | "afrikaans fiction" => &[Category::ContemporaryRealist], + "language arts and disciplines" + | "foreign language study" + | "spanish" + | "spanish language" + | "english" + | "french" + | "french language" + | "german language" + | "turkish" + | "italian" + | "speech" + | "communication" => &[Category::LanguageLinguistics], + "nature" | "animals" | "bears" | "birds" | "dinosaurs" => &[Category::NatureEnvironment], + "folklore" | "fairy tales" | "mythology" => &[Category::MythologyFolklore], + "sports and recreation" | "sports" | "soccer" | "horses" | "hiking" | "baseball" => { + &[Category::SportsOutdoors] + } + "fast paced" => &[Category::ActionAdventure], + "medical" => &[Category::Medicine], + "performing arts" | "drama" | "english drama" | "verse novel" | "plays" => { + &[Category::DramaPlays] + } + "manga" => &[Category::Manga], + "cyberpunk" => &[Category::Cyberpunk], + "crime" | "true crime" => &[Category::TrueCrime], + "music" => &[Category::Music], + "technology and engineering" | "aeronautics" | "automobiles" => &[Category::Engineering], + "horror" | "horror tales" | "scary" | "horreur" => &[Category::Horror], + "architecture" | "design" | "photography" => &[Category::ArtPhotography], + "fantasy fiction" => &[Category::Fantasy], + "crafts and hobbies" => &[Category::CraftsDiy], + "adventure stories" + | "action and adventure" + | "adventure and adventurers" + | "action" + | "aventure" => &[Category::ActionAdventure], + "reference" => &[Category::Reference], + "urban fantasy" | "paranormal and urban" => &[Category::UrbanFantasy], + "games and activities" | "games" | "roleplaying games" => &[Category::SportsOutdoors], + "audiobook" | "audio book" | "audiobooks" | "kinder hörbücher" | "childrens audiobooks" => { + &[Category::Audiobook] + } + "electronic books" => &[Category::Ebook], + "holiday" | "christmas" => &[Category::Wholesome], + "great britain" | "england" | "europe" | "british" | "germany" | "london england" + | "ireland" | "greece" | "italy" | "scotland" | "rome" | "portugal" | "poland" + | "berlin germany" | "soviet union" | "russia" => &[Category::Europe], + "gardening" | "house and home" => &[Category::HomeGarden], + "memoir" => &[Category::Memoir], + "bible" | "bibles" | "christian life" | "christian fiction" => { + &[Category::ReligionSpirituality] + } + "epic fantasy" => &[Category::EpicFantasy], + "military" => &[Category::Military], + "boys love" | "bl" | "yaoi" | "gay men" => &[Category::Lgbtqia], + "young adult nonfiction" | "teen and young adult" | "jeune adulte" => { + &[Category::YoungAdult] + } + "american" | "native americans" | "american fiction" | "americans" => { + &[Category::NorthAmerica] + } + "authors" => &[Category::LiteraryCriticism], + "english language" | "fiction in english" => &[Category::LanguageLinguistics], + "historical" => &[Category::Historical], + "middle grade" => &[Category::MiddleGrade], + "australia" | "australian fiction" | "australian" => &[Category::Oceania], + "american poetry" => &[Category::Poetry], + "france" => &[Category::Europe], + "china" | "chinese" => &[Category::EastAsia], + "japan" | "japanese" => &[Category::EastAsia], + "india" => &[Category::SouthAsia], + "egypt" | "arabic fiction" => &[Category::MiddleEast], + "africa" => &[Category::Africa], + "united states" | "california" | "canada" | "canadian" | "colorado" | "new york n y" + | "arizona" | "alaska" | "america" => &[Category::NorthAmerica], + "brazil" => &[Category::LatinAmerica], + "christmas stories" => &[Category::Wholesome], + "occult" => &[Category::OccultEsotericism], + "demonology" => &[Category::OccultEsotericism], + "erotic stories" => &[Category::Erotica], + "erotica" => &[Category::Erotica], + "romantasy" => &[Category::Fantasy, Category::Romance], + "dragons" => &[Category::Fantasy], + "conduct of life" => &[Category::SelfHelp], + "modern" => &[Category::Contemporary], + "transportation" => &[Category::Travel], + "space opera" | "first contact" => &[Category::SpaceOpera], + "assassins" | "mafia" | "missing persons" | "kidnapping" | "abduction" => { + &[Category::Crime] + } + "dark romance" | "dark romance kink" => &[Category::DarkRomance], + "study aids" => &[Category::Workbook], + "adult" | "adulte" => &[Category::Adult], + "paranormal romance" | "omegaverse" | "amish romance" => &[Category::ParanormalRomance], + "monster romance" => &[Category::ParanormalRomance], + "espionage" => &[Category::PoliticalIntrigue], + "conspiracies" => &[Category::PoliticalIntrigue], + "artists" => &[Category::ArtPhotography], + "actors" + | "actresses" + | "motion picture actors and actresses" + | "motion pictures" + | "motion picture producers and directors" => &[Category::FilmTelevision], + "paranormal" | "paranormal fiction" | "supernatural" | "vampires" | "ghost stories" + | "ghosts" | "angels" => &[Category::ParanormalHorror], + "slow paced" => &[Category::Cozy], + "time travel" => &[Category::TimeTravel], + "magical realism" => &[Category::MagicalRealism], + "dystopias" => &[Category::Dystopian], + "criminals" | "crime fiction" => &[Category::Crime], + "thrillers" | "suspense fiction" | "crime thrillers" => &[Category::Thriller], + "philosophie" => &[Category::Philosophy], + "found family" => &[Category::FoundFamily], + "chick lit" => &[Category::RomanticComedy], + "women sleuths" => &[Category::Detective], + "city and town life" => &[Category::Urban], + "college students" => &[Category::NewAdult], + "caricatures and cartoons" | "fantasy comic books" | "pictorial" => { + &[Category::GraphicNovelsComics] + } + "artificial intelligence" => &[Category::DataAi], + "businessmen" | "economics" | "business enterprises" | "businesswomen" => { + &[Category::Business] + } + "country life" | "frontier and pioneer life" => &[Category::Rural], + "coming of age" | "bildungsromans" => &[Category::ComingOfAge], + "high fantasy" => &[Category::EpicFantasy], + "psychological" | "amnesia" | "psychological thriller" => { + &[Category::PsychologicalThriller] + } + "books and reading" | "authorship" => &[Category::LiteraryCriticism], + "anthology" => &[Category::Anthology], + "essays" => &[Category::Essays], + "novella" => &[Category::Novella], + "java" | "javascript" | "c++" | "python" | "application software" => { + &[Category::ComputerScience] + } + "sapphic" | "queer" | "lesbians" | "shounen ai" => &[Category::Lgbtqia], + "rock musicians" => &[Category::Music], + "christianity" | "buddhism" | "amish" => &[Category::ReligionSpirituality], + "criminal investigation" + | "private investigators" + | "cold cases criminal investigation" + | "mystery and detective" + | "detective" => &[Category::Detective], + "hard science fiction" + | "speculative fiction" + | "sci fi" + | "doctor who fictitious character" => &[Category::ScienceFiction], + "imaginary places" => &[Category::Fantasy], + "astronomy" | "interplanetary voyages" => &[Category::Space], + "dark fantasy" => &[Category::Fantasy], + "litrpg" => &[Category::ProgressionFantasy], + "mental health" | "brain" | "ability" => &[Category::HealthWellness], + "cowboys" | "american western romance" => &[Category::Western], + "cookbooks" => &[Category::CookingFood], + "dreams" => &[Category::Psychology], + "blessing and cursing" => &[Category::ReligionSpirituality], + "high school students" => &[Category::AcademySchool], + "mythical" + | "dragons and mythical creatures" + | "curiosities and wonders" + | "gods" + | "arthurian romances" => &[Category::MythologyFolklore], + "african american women" => &[Category::PocRepresentation], + "indians of north america" => &[Category::NorthAmerica], + "audio theater" | "hörspiel" => &[Category::DramatizedAdaptation], + "cozy" | "bed and breakfast accommodations" | "birthdays" => &[Category::Cozy], + "war and military" | "battle of" | "soldiers" | "guerre" => &[Category::Military], + "chicago ill" | "boston mass" => &[Category::NorthAmerica], + "literature and fiction science fiction and fantasy" => { + &[Category::ScienceFiction, Category::Fantasy] + } + "batman fictitious character" | "superheroes" | "science fiction comic books" => { + &[Category::GraphicNovelsComics] + } + "german" | "greek" | "russian" => &[Category::LanguageLinguistics], + "jewish 1939 1945" => &[Category::History], + "german fiction" | "japanese fiction" | "chinese fiction" => { + &[Category::ContemporaryRealist] + } + "light novel" => &[Category::LightNovel], + "computer networks" | "technology" | "computer games" | "computer adventure games" => { + &[Category::Technology] + } + "diaries" => &[Category::Memoir], + "retellings" | "retelling" => &[Category::Retelling], + "gothic" => &[Category::GothicHorror], + "artistic" | "aesthetics" => &[Category::ArtPhotography], + "ethics" => &[Category::Philosophy], + "series" | "anthologies" | "anthologies and short stories" => &[Category::Anthology], + "islam" | "spiritual life" => &[Category::ReligionSpirituality], + "urban" | "cities and towns" => &[Category::Urban], + "ancient" => &[Category::Ancient], + "medieval" | "castles" => &[Category::Medieval], + "child rearing" => &[Category::ParentingFamily], + "psychological fiction" => &[Category::PsychologicalThriller], + "traditional detectives" | "amateur sleuths" | "police procedural" => { + &[Category::Detective] + } + "cozy mystery" => &[Category::CozyMystery], + "heroes" => &[Category::ActionAdventure], + "alphabet" + | "bedtime" + | "picture books for children" + | "readers" + | "girls" + | "babysitters" => &[Category::Children], + "intelligence officers" => &[Category::PoliticalIntrigue], + "cults" => &[Category::OccultEsotericism], + "dungeons and dragons game" => &[Category::ProgressionFantasy], + "sexy" => &[Category::Erotica], + "climatic changes" | "agriculture" | "farm life" | "dwellings" => { + &[Category::NatureEnvironment] + } + "romantic comedy" => &[Category::RomanticComedy], + "post apocalyptic" | "end of the world" => &[Category::PostApocalyptic], + "satire" => &[Category::Satire], + "democracy" => &[Category::PoliticsSociety], + "monsters" => &[Category::Horror], + "biology" => &[Category::Science], + "clothing and dress" => &[Category::CraftsDiy], + "adult fiction" => &[Category::Adult], + "western" => &[Category::Western], + "jews" => &[Category::ReligionSpirituality], + "businesspeople" | "entrepreneurship" => &[Category::Business], + "new zealand" => &[Category::Oceania], + "anxiety" => &[Category::Psychology], + "literature and fiction mystery" => &[Category::Mystery], + "bandes dessinées" => &[Category::GraphicNovelsComics], + "afghanistan" | "iran" => &[Category::MiddleEast], + "technothrillers" => &[Category::Thriller], + "gothic horror" => &[Category::GothicHorror], + "computer science" => &[Category::ComputerScience], + "slow burn" => &[Category::SlowBurn], + "blind" => &[Category::DisabilityRepresentation], + "novelists" | "journalism" | "poets" | "college teachers" | "composers" => { + &[Category::LiteraryCriticism] + } + "pirates" => &[Category::ActionAdventure], + "christian biography" => &[Category::Biography], + "courtship" => &[Category::Romance], + "mexico" => &[Category::LatinAmerica], + "alternative histories fiction" => &[Category::AlternateHistory], + "historical fantasy" => &[Category::Historical], + "magical" => &[Category::Fantasy], + "antiques and collectibles" => &[Category::CraftsDiy], + + _ => &[], + }; + + mapped.to_vec() +} + +#[cfg(test)] +mod tests { + use super::map_tag_to_category; + use mlm_db::Category; + + #[test] + fn maps_selected_tags() { + assert_eq!(map_tag_to_category("Fantasy"), vec![Category::Fantasy]); + assert_eq!(map_tag_to_category(" Fantasy "), vec![Category::Fantasy]); + assert_eq!( + map_tag_to_category("Character driven"), + vec![Category::CharacterDriven] + ); + assert_eq!( + map_tag_to_category("Comics & Graphic Novels"), + vec![Category::GraphicNovelsComics] + ); + assert_eq!( + map_tag_to_category("Business & Economics"), + vec![Category::Business] + ); + assert_eq!(map_tag_to_category("LGBTQ"), vec![Category::Lgbtqia]); + assert_eq!(map_tag_to_category("Boy's Love"), vec![Category::Lgbtqia]); + assert_eq!(map_tag_to_category("Manga"), vec![Category::Manga]); + assert_eq!( + map_tag_to_category("Technology & Engineering"), + vec![Category::Engineering] + ); + assert_eq!(map_tag_to_category("Audio book"), vec![Category::Audiobook]); + assert_eq!( + map_tag_to_category("English language"), + vec![Category::LanguageLinguistics] + ); + assert_eq!( + map_tag_to_category("Paranormal Romance"), + vec![Category::ParanormalRomance] + ); + assert_eq!( + map_tag_to_category("Genre: Rust (Programming Language)"), + vec![Category::ComputerScience] + ); + assert_eq!( + map_tag_to_category("Genre: C# (Programming Language)"), + vec![Category::ComputerScience] + ); + assert_eq!( + map_tag_to_category("Genre: Light Novel"), + vec![Category::LightNovel] + ); + assert_eq!(map_tag_to_category("enre: Ireland"), vec![Category::Europe]); + } + + #[test] + fn maps_compound_tags_to_multiple_categories() { + assert_eq!( + map_tag_to_category("Contemporary Romance"), + vec![Category::Contemporary, Category::Romance] + ); + assert_eq!( + map_tag_to_category("Historical Romance"), + vec![Category::Historical, Category::Romance] + ); + assert_eq!( + map_tag_to_category("Fantasy Romance"), + vec![Category::Fantasy, Category::Romance] + ); + } + + #[test] + fn drops_ambiguous_tags() { + assert_eq!(map_tag_to_category("Fiction"), Vec::::new()); + assert_eq!(map_tag_to_category("Nonfiction"), Vec::::new()); + assert_eq!(map_tag_to_category("medium"), Vec::::new()); + assert_eq!(map_tag_to_category("A mix driven"), Vec::::new()); + assert_eq!(map_tag_to_category("etc"), Vec::::new()); + assert_eq!(map_tag_to_category("Rape"), Vec::::new()); + assert_eq!( + map_tag_to_category("Sexual violence"), + Vec::::new() + ); + assert_eq!(map_tag_to_category("Finance"), Vec::::new()); + assert_eq!(map_tag_to_category("Law"), Vec::::new()); + assert_eq!(map_tag_to_category("Asia"), Vec::::new()); + } +} diff --git a/mlm_meta/src/traits.rs b/mlm_meta/src/traits.rs new file mode 100644 index 00000000..077bc677 --- /dev/null +++ b/mlm_meta/src/traits.rs @@ -0,0 +1,15 @@ +use anyhow::Result; +use async_trait::async_trait; +use mlm_db::TorrentMeta; + +/// Implementations should populate and return a `TorrentMeta` containing as +/// much normalized metadata as possible. +#[async_trait] +pub trait Provider: Send + Sync { + /// Short stable id for the provider, e.g. "goodreads" + fn id(&self) -> &str; + + /// Fetch metadata for the given `TorrentMeta` query. Return Ok(TorrentMeta) + /// on success. + async fn fetch(&self, query: &TorrentMeta) -> Result; +} diff --git a/mlm_meta/tests/hardcover_tests.rs b/mlm_meta/tests/hardcover_tests.rs new file mode 100644 index 00000000..f3603cb5 --- /dev/null +++ b/mlm_meta/tests/hardcover_tests.rs @@ -0,0 +1,422 @@ +use std::sync::Arc; + +use mlm_db::TorrentMeta; +use mlm_meta::Provider; +use mlm_meta::providers::Hardcover; + +mod helper { + use anyhow::Result; + use async_trait::async_trait; + use mlm_meta::http::HttpClient; + + pub struct MockClient { + resps: std::sync::Mutex>, + } + + impl MockClient { + pub fn new(resp: &str) -> Self { + Self { + resps: std::sync::Mutex::new(vec![resp.to_string()]), + } + } + + pub fn new_many(resps: &[&str]) -> Self { + Self { + resps: std::sync::Mutex::new(resps.iter().map(|s| s.to_string()).collect()), + } + } + } + + #[async_trait] + impl HttpClient for MockClient { + async fn get(&self, _url: &str) -> Result { + Ok(String::new()) + } + + async fn post( + &self, + _url: &str, + _body: Option<&str>, + _headers: &[(&str, &str)], + ) -> Result { + let mut guard = self.resps.lock().unwrap(); + if guard.is_empty() { + return Ok(String::new()); + } + Ok(guard.remove(0)) + } + } +} + +#[tokio::test] +async fn hardcover_selects_best_candidate() { + use helper::MockClient; + + let search = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "id": 10, "title": "The Great Adventure", "author_names": ["Alice Author"], "description": "A" } }, + { "document": { "id": 11, "title": "Great Adventure", "author_names": ["Bob Smith"], "description": "B" } } + ] } } } }"#; + let detail = r#"{ "data": { "books_by_pk": { + "id": 11, + "title": "Great Adventure", + "subtitle": null, + "headline": null, + "description": "B", + "contributions": [{ "author": { "name": "Bob Smith" }, "contribution": null }], + "book_series": [], + "taggings": [] + } } }"#; + + let client = Arc::new(MockClient::new_many(&[search, detail])); + let prov = Hardcover::with_client("http://example/graphql", client, None); + + let query_meta = TorrentMeta { + title: "Great Adventure".to_string(), + authors: vec!["Bob Smith".to_string()], + ..Default::default() + }; + + let m = prov + .fetch(&query_meta) + .await + .expect("should select best candidate"); + assert!(m.authors.iter().any(|a| a.to_lowercase().contains("bob"))); + assert!(m.title.to_lowercase().contains("great")); +} + +#[tokio::test] +async fn hardcover_parses_tags_and_isbn() { + use helper::MockClient; + + let search = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "id": 20, "title": "Unique Book", "author_names": ["Unique Author"], "description": "desc", "isbns": ["9781234567897"] } } + ] } } } }"#; + let detail = r#"{ "data": { "books_by_pk": { + "id": 20, + "title": "Unique Book", + "subtitle": null, + "headline": null, + "description": "desc", + "contributions": [{ "author": { "name": "Unique Author" }, "contribution": null }], + "book_series": [], + "taggings": [ + { "id": 1, "tag": { "tag": "Tropes", "tag_category": { "category": "Tag" } } }, + { "id": 2, "tag": { "tag": "Romance", "tag_category": { "category": "Genre" } } } + ] + } } }"#; + + let client = Arc::new(MockClient::new_many(&[search, detail])); + let prov = Hardcover::with_client("http://example/graphql", client, None); + + let query_meta = TorrentMeta { + title: "Unique Book".to_string(), + ..Default::default() + }; + + let m = prov + .fetch(&query_meta) + .await + .expect("should parse tags and isbn"); + assert!(m.tags.iter().any(|t| t == "tropes")); + assert!(m.tags.iter().any(|t| t == "romance")); + assert!(m.categories.contains(&mlm_db::Category::Romance)); + assert!(!m.categories.contains(&mlm_db::Category::CharacterDriven)); + assert_eq!(m.ids.get("isbn").map(|s| s.as_str()), Some("9781234567897")); +} + +#[tokio::test] +async fn hardcover_empty_results_returns_err() { + use helper::MockClient; + + let data = r#"{ "data": { "search": { "results": { "hits": [] } } } }"#; + let client = Arc::new(MockClient::new(data)); + let prov = Hardcover::with_client("http://example/graphql", client, None); + + let query_meta = TorrentMeta { + title: "Does Not Exist".to_string(), + ..Default::default() + }; + let res = prov.fetch(&query_meta).await; + assert!(res.is_err(), "expected error for empty results"); +} + +#[tokio::test] +async fn hardcover_handles_malformed_fields_gracefully() { + use helper::MockClient; + + let search = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "id": 30, "title": "Any Title", "description": "only desc" } } + ] } } } }"#; + let detail = r#"{ "data": { "books_by_pk": { + "id": 30, + "title": "Any Title", + "subtitle": null, + "headline": null, + "description": "only desc", + "contributions": [], + "book_series": [], + "taggings": [] + } } }"#; + let client = Arc::new(MockClient::new_many(&[search, detail])); + let prov = Hardcover::with_client("http://example/graphql", client, None); + + let query_meta = TorrentMeta { + title: "Any Title".to_string(), + ..Default::default() + }; + let m = prov + .fetch(&query_meta) + .await + .expect("should handle malformed fields"); + assert_eq!(m.title, "Any Title"); + assert_eq!(m.description, "only desc"); + assert!(m.tags.is_empty()); + assert!(!m.ids.contains_key("isbn")); +} + +#[tokio::test] +async fn hardcover_uses_first_isbn_when_multiple_present() { + use helper::MockClient; + + let search = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "id": 40, "title": "Multi ISBN", "author_names": ["A"], "isbns": ["FIRSTISBN","SECONDISBN"] } } + ] } } } }"#; + let detail = r#"{ "data": { "books_by_pk": { + "id": 40, + "title": "Multi ISBN", + "subtitle": null, + "headline": null, + "description": "", + "contributions": [{ "author": { "name": "A" }, "contribution": null }], + "book_series": [], + "taggings": [] + } } }"#; + let client = Arc::new(MockClient::new_many(&[search, detail])); + let prov = Hardcover::with_client("http://example/graphql", client, None); + + let query_meta = TorrentMeta { + title: "Multi ISBN".to_string(), + ..Default::default() + }; + let m = prov + .fetch(&query_meta) + .await + .expect("should parse multiple isbns"); + assert_eq!(m.ids.get("isbn").map(|s| s.as_str()), Some("FIRSTISBN")); +} + +#[tokio::test] +async fn hardcover_tie_breaker_prefers_first_result() { + use helper::MockClient; + + let search = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "id": 50, "title": "Tie Book", "author_names": ["Author One"], "description": "first" } }, + { "document": { "id": 51, "title": "Tie Book", "author_names": ["Author One"], "description": "second" } } + ] } } } }"#; + let detail = r#"{ "data": { "books_by_pk": { + "id": 50, + "title": "Tie Book", + "subtitle": null, + "headline": null, + "description": "first", + "contributions": [{ "author": { "name": "Author One" }, "contribution": null }], + "book_series": [], + "taggings": [] + } } }"#; + + let client = Arc::new(MockClient::new_many(&[search, detail])); + let prov = Hardcover::with_client("http://example/graphql", client, None); + + let query_meta = TorrentMeta { + title: "Tie Book".to_string(), + ..Default::default() + }; + let m = prov + .fetch(&query_meta) + .await + .expect("should return first result on tie"); + assert!(m.description == "first"); +} + +#[tokio::test] +async fn hardcover_handles_minor_typos() { + use helper::MockClient; + + let search = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "id": 60, "title": "Great Adventure", "author_names": ["Bob Smith"], "description": "B" } } + ] } } } }"#; + let detail = r#"{ "data": { "books_by_pk": { + "id": 60, + "title": "Great Adventure", + "subtitle": null, + "headline": null, + "description": "B", + "contributions": [{ "author": { "name": "Bob Smith" }, "contribution": null }], + "book_series": [], + "taggings": [] + } } }"#; + + let client = Arc::new(MockClient::new_many(&[search, detail])); + let prov = Hardcover::with_client("http://example/graphql", client, None); + + let query_meta = TorrentMeta { + title: "Gret Adventure".to_string(), + authors: vec!["Bob Smith".to_string()], + ..Default::default() + }; + let m = prov + .fetch(&query_meta) + .await + .expect("should match despite typo"); + assert!(m.title.to_lowercase().contains("great adventure")); +} + +#[tokio::test] +async fn hardcover_parses_isbn_from_search_results() { + use helper::MockClient; + + let search = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "id": 123, "title": "Detailed Book", "author_names": ["Detail Author"], "description": "short desc", "isbns": ["9781111111111"], "series_names": ["Series A"] } } + ] } } } }"#; + let detail = r#"{ "data": { "books_by_pk": { + "id": 123, + "title": "Detailed Book", + "subtitle": null, + "headline": null, + "description": "short desc", + "contributions": [{ "author": { "name": "Detail Author" }, "contribution": null }], + "book_series": [{ "position": 1, "details": "1", "series": { "name": "Series A" } }], + "taggings": [] + } } }"#; + + let client = Arc::new(MockClient::new_many(&[search, detail])); + let prov = Hardcover::with_client("http://example/graphql", client, None); + + let query_meta = TorrentMeta { + title: "Detailed Book".to_string(), + ..Default::default() + }; + let m = prov + .fetch(&query_meta) + .await + .expect("should parse search results"); + + assert_eq!(m.ids.get("isbn").map(|s| s.as_str()), Some("9781111111111")); + assert!(m.series.iter().any(|s| s.name == "Series A")); + assert_eq!(m.description, "short desc"); +} + +#[tokio::test] +async fn hardcover_title_only_fallback_still_scores_with_author() { + use helper::MockClient; + + // Query for "Boss of the Year" by "Nicole French" + // Results include a similar title by a different author + // The fallback to title-only should NOT match because author doesn't match + let search = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "id": 70, "title": "Not the Boss of the Year", "author_names": ["J.S. Cooper"], "description": "wrong author" } }, + { "document": { "id": 71, "title": "Boss of the Year", "author_names": ["Nicole French"], "description": "correct" } } + ] } } } }"#; + let detail = r#"{ "data": { "books_by_pk": { + "id": 71, + "title": "Boss of the Year", + "subtitle": null, + "headline": null, + "description": "correct", + "contributions": [{ "author": { "name": "Nicole French" }, "contribution": null }], + "book_series": [], + "taggings": [] + } } }"#; + + let client = Arc::new(MockClient::new_many(&[search, detail])); + let prov = Hardcover::with_client("http://example/graphql", client, None); + + let query_meta = TorrentMeta { + title: "Boss of the Year".to_string(), + authors: vec!["Nicole French".to_string()], + ..Default::default() + }; + + // Should NOT match "Not the Boss of the Year" by J.S. Cooper + // Should either match the correct one OR return error + let m = prov + .fetch(&query_meta) + .await + .expect("should find correct match"); + assert!( + m.title.to_lowercase().contains("boss of the year"), + "title should contain 'Boss of the Year'" + ); + assert!( + m.authors + .iter() + .any(|a| a.to_lowercase().contains("nicole")), + "author should be Nicole French, got: {:?}", + m.authors + ); +} + +#[tokio::test] +async fn hardcover_prefers_best_matching_edition_and_edition_specific_fields() { + use helper::MockClient; + + let search = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "id": 1421303, "title": "Quicksilver", "author_names": ["Callie Hart"], "isbns": ["9781399745420"] } } + ] } } } }"#; + + let detail = r#"{ "data": { "books_by_pk": { + "id": 1421303, + "title": "Quicksilver", + "subtitle": null, + "headline": null, + "description": "Book description", + "contributions": [{ "author": { "name": "Callie Hart" }, "contribution": null }], + "book_series": [], + "taggings": [], + "editions": [ + { + "language": { "language": "English" }, + "asin": null, + "isbn_10": "1399745425", + "isbn_13": "9781399745420", + "edition_format": "Paperback", + "contributions": [{ "contribution": null, "author": { "name": "Callie Hart" } }] + }, + { + "language": { "language": "English" }, + "asin": "B0DBJBFHGT", + "isbn_10": null, + "isbn_13": null, + "edition_format": "Audible", + "contributions": [ + { "contribution": null, "author": { "name": "Callie Hart" } }, + { "contribution": "Narrator", "author": { "name": "Stella Bloom" } } + ] + } + ] + } } }"#; + + let client = Arc::new(MockClient::new_many(&[search, detail])); + let prov = Hardcover::with_client("http://example/graphql", client, None); + + let query_meta = TorrentMeta { + title: "Quicksilver".to_string(), + authors: vec!["Callie Hart".to_string()], + media_type: mlm_db::MediaType::Audiobook, + language: Some(mlm_db::Language::English), + ..Default::default() + }; + + let m = prov + .fetch(&query_meta) + .await + .expect("should choose audible edition"); + + assert_eq!( + m.ids.get(mlm_db::ids::ASIN).map(|s| s.as_str()), + Some("B0DBJBFHGT") + ); + assert_eq!(m.media_type, mlm_db::MediaType::Audiobook); + assert_eq!(m.language, Some(mlm_db::Language::English)); + assert!(m.narrators.iter().any(|n| n == "Stella Bloom")); +} diff --git a/mlm_meta/tests/mock_fetcher.rs b/mlm_meta/tests/mock_fetcher.rs new file mode 100644 index 00000000..d1e1809c --- /dev/null +++ b/mlm_meta/tests/mock_fetcher.rs @@ -0,0 +1,71 @@ +use anyhow::Result; +use mlm_meta::http::HttpClient; +use std::sync::Arc; + +pub struct MockClient; + +#[async_trait::async_trait] +impl HttpClient for MockClient { + async fn get(&self, url: &str) -> Result { + let u = url::Url::parse(url).map_err(|e| anyhow::anyhow!(e))?; + if !u.host_str().is_some_and(|h| h.contains("romance.io")) { + return Err(anyhow::anyhow!("unexpected host in test fetch")); + } + + if u.path().starts_with("/json/search_books") { + return Ok(r#"{ + "success": true, + "books": [ + { + "_id":"68b95a390bc0cee156edaf2b", + "info":{"title":"Of Ink and Alchemy"}, + "authors":[{"name":"Sloane St. James"}], + "url":"/books/68b95a390bc0cee156edaf2b/of-ink-and-alchemy-sloane-st-james" + } + ] +}"# + .to_string()); + } + if u.path().starts_with("/json/search_authors") { + return Ok(r#"{ "success": true, "authors": [] }"#.to_string()); + } + if u.path().starts_with("/search") { + return Ok("search".to_string()); + } + + Ok(r#" + + + + + +"# + .to_string()) + } + + async fn post( + &self, + _url: &str, + _body: Option<&str>, + _headers: &[(&str, &str)], + ) -> Result { + Err(anyhow::anyhow!("post not implemented in mock")) + } +} + +pub fn boxed() -> Arc { + Arc::new(MockClient) +} diff --git a/mlm_meta/tests/mock_openlibrary.rs b/mlm_meta/tests/mock_openlibrary.rs new file mode 100644 index 00000000..2067fc20 --- /dev/null +++ b/mlm_meta/tests/mock_openlibrary.rs @@ -0,0 +1,55 @@ +use anyhow::Result; +use mlm_meta::http::HttpClient; +use std::sync::Arc; + +fn resolve_plan_file(rel: &str) -> std::io::Result { + let mut dir = std::env::current_dir()?; + loop { + let candidate = dir.join(rel); + if candidate.exists() { + return Ok(candidate); + } + if !dir.pop() { + break; + } + } + Err(std::io::Error::new( + std::io::ErrorKind::NotFound, + format!("could not find {}", rel), + )) +} + +pub struct MockOpenLibraryClient; + +#[async_trait::async_trait] +impl HttpClient for MockOpenLibraryClient { + async fn get(&self, url: &str) -> Result { + let u = url::Url::parse(url).map_err(|e| anyhow::anyhow!(e))?; + let rel = if u.host_str().is_some_and(|h| h.contains("openlibrary.org")) { + if u.path().starts_with("/search.json") { + "plan/openlibrary/search.json" + } else { + return Err(anyhow::anyhow!("unexpected path: {}", u.path())); + } + } else { + return Err(anyhow::anyhow!("unexpected host in test fetch")); + }; + + let p = resolve_plan_file(rel).map_err(|e| anyhow::anyhow!(e))?; + let s = std::fs::read_to_string(p).map_err(|e| anyhow::anyhow!(e))?; + Ok(s) + } + + async fn post( + &self, + _url: &str, + _body: Option<&str>, + _headers: &[(&str, &str)], + ) -> Result { + Err(anyhow::anyhow!("post not implemented in mock")) + } +} + +pub fn boxed() -> Arc { + Arc::new(MockOpenLibraryClient) +} diff --git a/mlm_meta/tests/openlibrary_tests.rs b/mlm_meta/tests/openlibrary_tests.rs new file mode 100644 index 00000000..db05a6fe --- /dev/null +++ b/mlm_meta/tests/openlibrary_tests.rs @@ -0,0 +1,118 @@ +use mlm_db::TorrentMeta; +use mlm_meta::Provider; +use mlm_meta::http::HttpClient; +use mlm_meta::providers::OpenLibrary; + +mod mock_openlibrary; + +#[tokio::test] +async fn openlibrary_parses_search_results() { + let prov = OpenLibrary::with_client(mock_openlibrary::boxed()); + let query_meta = TorrentMeta { + title: "The Lord of the Rings".to_string(), + ..Default::default() + }; + let m = prov + .fetch(&query_meta) + .await + .expect("should fetch metadata"); + assert!(m.title.contains("Lord of the Rings")); + assert!(!m.authors.is_empty()); +} + +#[tokio::test] +async fn openlibrary_matches_title_and_author() { + let prov = OpenLibrary::with_client(mock_openlibrary::boxed()); + let query_meta = TorrentMeta { + title: "The Lord of the Rings".to_string(), + authors: vec!["J.R.R. Tolkien".to_string()], + ..Default::default() + }; + let m = prov + .fetch(&query_meta) + .await + .expect("should match title+author"); + assert!(m.title.to_lowercase().contains("lord of the rings")); + assert!( + m.authors + .iter() + .any(|a| a.to_lowercase().contains("tolkien")) + ); +} + +#[tokio::test] +async fn openlibrary_extracts_isbn() { + let prov = OpenLibrary::with_client(mock_openlibrary::boxed()); + let query_meta = TorrentMeta { + title: "The Lord of the Rings".to_string(), + ..Default::default() + }; + let m = prov + .fetch(&query_meta) + .await + .expect("should fetch metadata"); + assert!( + m.ids.values().any(|v| v.starts_with("978")), + "should have ISBN" + ); +} + +#[tokio::test] +async fn openlibrary_extracts_subjects_as_tags() { + let prov = OpenLibrary::with_client(mock_openlibrary::boxed()); + let query_meta = TorrentMeta { + title: "The Lord of the Rings".to_string(), + ..Default::default() + }; + let m = prov + .fetch(&query_meta) + .await + .expect("should fetch metadata"); + assert!(!m.tags.is_empty(), "should have subject tags"); +} + +#[tokio::test] +async fn openlibrary_title_only_search() { + let prov = OpenLibrary::with_client(mock_openlibrary::boxed()); + let query_meta = TorrentMeta { + title: "The Lord of the Rings".to_string(), + authors: vec![], + ..Default::default() + }; + let m = prov + .fetch(&query_meta) + .await + .expect("should find result with title only"); + assert!(m.title.to_lowercase().contains("lord of the rings")); +} + +#[tokio::test] +async fn openlibrary_no_results() { + use std::sync::Arc; + + struct EmptyClient; + + #[async_trait::async_trait] + impl HttpClient for EmptyClient { + async fn get(&self, _url: &str) -> anyhow::Result { + Ok(r#"{"numFound": 0, "docs": []}"#.to_string()) + } + + async fn post( + &self, + _url: &str, + _body: Option<&str>, + _headers: &[(&str, &str)], + ) -> anyhow::Result { + anyhow::bail!("post not implemented") + } + } + + let prov = OpenLibrary::with_client(Arc::new(EmptyClient)); + let query_meta = TorrentMeta { + title: "Nonexistent Title XYZ123".to_string(), + ..Default::default() + }; + let res = prov.fetch(&query_meta).await; + assert!(res.is_err(), "expected no results for nonexistent title"); +} diff --git a/mlm_meta/tests/provider_tests.rs b/mlm_meta/tests/provider_tests.rs new file mode 100644 index 00000000..de3a394c --- /dev/null +++ b/mlm_meta/tests/provider_tests.rs @@ -0,0 +1,32 @@ +use mlm_db::TorrentMeta; +use mlm_meta::providers::FakeProvider; +use mlm_meta::traits::Provider; + +#[tokio::test] +async fn fake_provider_returns_meta() { + let meta = TorrentMeta { + title: "The Test Book".to_string(), + authors: vec!["Jane Doe".to_string()], + description: "desc".to_string(), + ..Default::default() + }; + + let provider = FakeProvider::new("fake", Some(meta.clone())); + let mut q: TorrentMeta = Default::default(); + q.ids + .insert("isbn".to_string(), "9781234567897".to_string()); + let got = provider.fetch(&q).await.expect("should return meta"); + assert_eq!(got.title, meta.title); + assert_eq!(got.authors, meta.authors); +} + +#[tokio::test] +async fn fake_provider_not_found() { + let provider = FakeProvider::new("fake", None); + let q = TorrentMeta { + title: "nope".to_string(), + ..Default::default() + }; + let res = provider.fetch(&q).await; + assert!(res.is_err()); +} diff --git a/mlm_meta/tests/romanceio_tests.rs b/mlm_meta/tests/romanceio_tests.rs new file mode 100644 index 00000000..7658840e --- /dev/null +++ b/mlm_meta/tests/romanceio_tests.rs @@ -0,0 +1,165 @@ +use mlm_db::{Category, TorrentMeta}; +use mlm_meta::Provider; +use mlm_meta::http::HttpClient; +use mlm_meta::providers::RomanceIo; + +mod mock_fetcher; + +#[tokio::test] +async fn romanceio_parses_book() { + let prov = RomanceIo::with_client(mock_fetcher::boxed()); + let query_meta = TorrentMeta { + title: "Of Ink and Alchemy".to_string(), + ..Default::default() + }; + let m = prov.fetch(&query_meta).await.expect("should parse book"); + assert!(m.title.contains("Of Ink and Alchemy")); + assert!(m.authors.iter().any(|a| a.contains("Sloane"))); + assert!(!m.description.is_empty()); +} + +#[tokio::test] +async fn romanceio_matches_title_and_author() { + let prov = RomanceIo::with_client(mock_fetcher::boxed()); + let query_meta = TorrentMeta { + title: "Of Ink and Alchemy".to_string(), + authors: vec!["Sloane St. James".to_string()], + ..Default::default() + }; + let m = prov + .fetch(&query_meta) + .await + .expect("should match title+author"); + assert!(m.title.to_lowercase().contains("of ink and alchemy")); + assert!( + m.authors + .iter() + .any(|a| a.to_lowercase().contains("sloane")) + ); +} + +#[tokio::test] +async fn romanceio_rejects_title_with_nonmatching_author() { + let prov = RomanceIo::with_client(mock_fetcher::boxed()); + let query_meta = TorrentMeta { + title: "Of Ink and Alchemy".to_string(), + authors: vec!["Some Other Author".to_string()], + ..Default::default() + }; + let res = prov.fetch(&query_meta).await; + assert!(res.is_err(), "expected no result for non-matching author"); +} + +#[tokio::test] +async fn romanceio_rejects_different_title_same_author() { + let prov = RomanceIo::with_client(mock_fetcher::boxed()); + let query_meta = TorrentMeta { + title: "A Title That Does Not Exist".to_string(), + authors: vec!["Sloane St. James".to_string()], + ..Default::default() + }; + let res = prov.fetch(&query_meta).await; + assert!( + res.is_err(), + "expected no result for different title even if author matches" + ); +} + +#[tokio::test] +async fn romanceio_finds_late_result_in_json_array() { + use anyhow::Result; + use std::sync::Arc; + + struct CustomClient; + + #[async_trait::async_trait] + impl HttpClient for CustomClient { + async fn get(&self, url: &str) -> Result { + if url.contains("/json/search_books") { + let data = r#"{ + "success": true, + "books": [ + {"_id":"x1","info":{"title":"Unrelated Book"},"url":"/books/x1/unrelated"}, + {"_id":"x2","info":{"title":"Another Irrelevant"},"url":"/books/x2/irrelevant"}, + {"_id":"68b95a390bc0cee156edaf2b","info":{"title":"Of Ink and Alchemy"},"authors":[{"name":"Sloane St. James"}],"url":"/books/68b95a390bc0cee156edaf2b/of-ink-and-alchemy-sloane-st-james"} + ] + }"#; + return Ok(data.to_string()); + } + if url.contains("/books/68b95a390bc0cee156edaf2b") { + let html = r#" + + + + + +"#; + return Ok(html.to_string()); + } + Err(anyhow::anyhow!("unexpected url")) + } + + async fn post( + &self, + _url: &str, + _body: Option<&str>, + _headers: &[(&str, &str)], + ) -> Result { + Err(anyhow::anyhow!("post not implemented")) + } + } + + let prov = RomanceIo::with_client(Arc::new(CustomClient)); + let query_meta = TorrentMeta { + title: "Of Ink and Alchemy".to_string(), + authors: vec!["Sloane St. James".to_string()], + ..Default::default() + }; + let m = prov + .fetch(&query_meta) + .await + .expect("should find late result"); + assert!(m.title.to_lowercase().contains("of ink and alchemy")); +} + +#[tokio::test] +async fn parse_book_html_extracts_categories_and_tags() { + let prov = RomanceIo::with_client(mock_fetcher::boxed()); + let query_meta = TorrentMeta { + title: "Of Ink and Alchemy".to_string(), + authors: vec!["Sloane St. James".to_string()], + ..Default::default() + }; + let m = prov.fetch(&query_meta).await.expect("should parse book"); + + assert!(m.title.to_lowercase().contains("of ink and alchemy")); + assert!( + m.authors + .iter() + .any(|a| a.to_lowercase().contains("sloane")) + ); + + // categories should include contemporary and dark romance (derived from topics) + assert!( + m.categories + .iter() + .any(|c| c == &Category::ContemporaryRealist) + ); + assert!(m.categories.iter().any(|c| c == &Category::DarkRomance)); + + // tags should include some of the romance-specific tropes + let tags = m.tags.join(","); + assert!(tags.contains("age difference") || tags.contains("age gap")); + assert!(tags.contains("friends to lovers")); +} diff --git a/mlm_meta/tests/scoring_tests.rs b/mlm_meta/tests/scoring_tests.rs new file mode 100644 index 00000000..761c6fa7 --- /dev/null +++ b/mlm_meta/tests/scoring_tests.rs @@ -0,0 +1,49 @@ +use std::sync::Arc; + +use mlm_meta::{HttpClient, providers::romanceio::RomanceIo}; + +const SAMPLE_ROMANCE_HTML: &str = r#" + + + + + +"#; + +#[test] +fn parse_book_html_smoke() { + struct DummyClient; + #[async_trait::async_trait] + impl HttpClient for DummyClient { + async fn get(&self, _url: &str) -> anyhow::Result { + anyhow::bail!("not used") + } + async fn post( + &self, + _url: &str, + _body: Option<&str>, + _headers: &[(&str, &str)], + ) -> anyhow::Result { + anyhow::bail!("not used") + } + } + + let provider = RomanceIo::with_client(Arc::new(DummyClient)); + let meta = provider.parse_book_html(SAMPLE_ROMANCE_HTML).unwrap(); + + assert!(!meta.title.is_empty()); + assert!(!meta.authors.is_empty()); +} diff --git a/server/Cargo.toml b/server/Cargo.toml index ab92c832..9ac21718 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -32,6 +32,7 @@ matchr = "0.2.5" mlm_db = { path = "../mlm_db" } mlm_mam = { path = "../mlm_mam" } mlm_parse = { path = "../mlm_parse" } +mlm_meta = { path = "../mlm_meta" } native_db = { git = "https://github.com/StirlingMouse/native_db.git", branch = "0.8.x" } native_model = "0.4.20" once_cell = "1.21.3" @@ -85,3 +86,5 @@ embed-resource = "3.0.5" [dev-dependencies] tempfile = "3.24.0" +async-trait = "0.1" +url = "2.4" diff --git a/server/src/autograbber.rs b/server/src/autograbber.rs index 2d8d77d9..16498e4f 100644 --- a/server/src/autograbber.rs +++ b/server/src/autograbber.rs @@ -731,7 +731,8 @@ pub async fn update_torrent_meta( } } - if linker_is_owner && torrent.linker.is_none() + if linker_is_owner + && torrent.linker.is_none() && let Some(mam_torrent) = mam_torrent { torrent.linker = Some(mam_torrent.owner_name.clone()); diff --git a/server/src/config.rs b/server/src/config.rs index ddca8874..a40b85bb 100644 --- a/server/src/config.rs +++ b/server/src/config.rs @@ -1,8 +1,8 @@ use std::{collections::BTreeMap, path::PathBuf}; use mlm_db::{ - impls::{parse, parse_opt, parse_vec}, Flags, Language, MediaType, OldDbMainCat, Size, + impls::{parse, parse_opt, parse_vec}, }; use mlm_mam::{ enums::{Categories, SearchIn, SnatchlistType}, @@ -11,6 +11,72 @@ use mlm_mam::{ use serde::{Deserialize, Serialize}; use time::Date; +#[derive(Clone, Debug, Deserialize)] +#[serde(tag = "id", rename_all = "lowercase")] +pub enum ProviderConfig { + Hardcover(HardcoverConfig), + RomanceIo(RomanceIoConfig), + OpenLibrary(OpenLibraryConfig), +} + +#[derive(Clone, Debug, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct HardcoverConfig { + #[serde(default = "default_provider_enabled")] + pub enabled: bool, + #[serde(default)] + pub timeout_secs: Option, + pub api_key: Option, +} + +#[derive(Clone, Debug, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct RomanceIoConfig { + #[serde(default = "default_provider_enabled")] + pub enabled: bool, + #[serde(default)] + pub timeout_secs: Option, +} + +#[derive(Clone, Debug, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct OpenLibraryConfig { + #[serde(default = "default_provider_enabled")] + pub enabled: bool, + #[serde(default)] + pub timeout_secs: Option, +} + +impl ProviderConfig { + pub fn id(&self) -> &str { + match self { + ProviderConfig::Hardcover(_) => "hardcover", + ProviderConfig::RomanceIo(_) => "romanceio", + ProviderConfig::OpenLibrary(_) => "openlibrary", + } + } + + pub fn enabled(&self) -> bool { + match self { + ProviderConfig::Hardcover(c) => c.enabled, + ProviderConfig::RomanceIo(c) => c.enabled, + ProviderConfig::OpenLibrary(c) => c.enabled, + } + } + + pub fn timeout_secs(&self) -> Option { + match self { + ProviderConfig::Hardcover(c) => c.timeout_secs, + ProviderConfig::RomanceIo(c) => c.timeout_secs, + ProviderConfig::OpenLibrary(c) => c.timeout_secs, + } + } +} + +fn default_provider_enabled() -> bool { + true +} + #[derive(Clone, Debug, Deserialize)] #[serde(deny_unknown_fields)] pub struct Config { @@ -74,6 +140,8 @@ pub struct Config { #[serde(default)] #[serde(rename = "library")] pub libraries: Vec, + #[serde(default)] + pub metadata_providers: Vec, } #[derive(Clone, Debug, Default, Deserialize)] @@ -424,3 +492,43 @@ fn default_music_types() -> Vec { fn default_radio_types() -> Vec { ["mp3"].iter().map(ToString::to_string).collect() } + +impl Default for Config { + fn default() -> Self { + Self { + mam_id: String::new(), + web_host: default_host(), + web_port: default_port(), + min_ratio: default_min_ratio(), + unsat_buffer: default_unsat_buffer(), + wedge_buffer: 0, + add_torrents_stopped: false, + exclude_narrator_in_library_dir: false, + search_interval: default_search_interval(), + link_interval: default_link_interval(), + import_interval: default_import_interval(), + ignore_torrents: vec![], + + audio_types: default_audio_types(), + ebook_types: default_ebook_types(), + music_types: default_music_types(), + radio_types: default_radio_types(), + + search: Default::default(), + audiobookshelf: None, + + autograbs: vec![], + snatchlist: vec![], + + goodreads_lists: vec![], + notion_lists: vec![], + + tags: vec![], + + qbittorrent: vec![], + + libraries: vec![], + metadata_providers: vec![], + } + } +} diff --git a/server/src/config_impl.rs b/server/src/config_impl.rs index b09cb514..e872a3ed 100644 --- a/server/src/config_impl.rs +++ b/server/src/config_impl.rs @@ -162,6 +162,7 @@ impl TorrentFilter { } } +#[allow(dead_code)] impl EditionFilter { pub fn matches(&self, torrent: &MaMTorrent) -> bool { if !self.media_type.is_empty() diff --git a/server/src/lib.rs b/server/src/lib.rs index 1e09b56d..40eba759 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -7,6 +7,7 @@ pub mod exporter; pub mod linker; pub mod lists; pub mod logging; +pub mod metadata; pub mod qbittorrent; pub mod snatchlist; pub mod stats; diff --git a/server/src/linker/common.rs b/server/src/linker/common.rs index e5cd4fec..7aded018 100644 --- a/server/src/linker/common.rs +++ b/server/src/linker/common.rs @@ -209,11 +209,22 @@ mod tests { #[test] fn test_select_format() { - struct F { name: String } + struct F { + name: String, + } impl HasFileName for F { - fn name_lower(&self) -> String { self.name.to_lowercase() } + fn name_lower(&self) -> String { + self.name.to_lowercase() + } } - let files = vec![F { name: "book.M4B".to_string() }, F { name: "cover.jpg".to_string() }]; + let files = vec![ + F { + name: "book.M4B".to_string(), + }, + F { + name: "cover.jpg".to_string(), + }, + ]; let wanted = vec!["m4b".to_string(), "mp3".to_string()]; let sel = select_format(&Some(vec!["m4b".to_string()]), &wanted, &files); assert_eq!(sel.unwrap(), ".m4b".to_string()); @@ -223,9 +234,17 @@ mod tests { #[test] fn test_select_format_leading_dot_in_override() { - struct F { name: String } - impl HasFileName for F { fn name_lower(&self) -> String { self.name.to_lowercase() } } - let files = vec![F { name: "track.FLAC".to_string() }]; + struct F { + name: String, + } + impl HasFileName for F { + fn name_lower(&self) -> String { + self.name.to_lowercase() + } + } + let files = vec![F { + name: "track.FLAC".to_string(), + }]; let wanted = vec!["mp3".to_string(), "flac".to_string()]; // override contains leading dot let sel = select_format(&Some(vec![".flac".to_string()]), &wanted, &files); @@ -234,9 +253,17 @@ mod tests { #[test] fn test_select_format_uppercase_extension() { - struct F { name: String } - impl HasFileName for F { fn name_lower(&self) -> String { self.name.to_lowercase() } } - let files = vec![F { name: "ALBUM.MP3".to_string() }]; + struct F { + name: String, + } + impl HasFileName for F { + fn name_lower(&self) -> String { + self.name.to_lowercase() + } + } + let files = vec![F { + name: "ALBUM.MP3".to_string(), + }]; let wanted = vec!["mp3".to_string()]; let sel = select_format(&None, &wanted, &files); assert_eq!(sel.unwrap(), ".mp3".to_string()); @@ -244,9 +271,17 @@ mod tests { #[test] fn test_select_format_missing_extension_returns_none() { - struct F { name: String } - impl HasFileName for F { fn name_lower(&self) -> String { self.name.to_lowercase() } } - let files = vec![F { name: "README".to_string() }]; + struct F { + name: String, + } + impl HasFileName for F { + fn name_lower(&self) -> String { + self.name.to_lowercase() + } + } + let files = vec![F { + name: "README".to_string(), + }]; let wanted = vec!["m4b".to_string()]; let sel = select_format(&None, &wanted, &files); assert!(sel.is_none()); @@ -254,9 +289,17 @@ mod tests { #[test] fn test_select_format_overridden_empty_vector() { - struct F { name: String } - impl HasFileName for F { fn name_lower(&self) -> String { self.name.to_lowercase() } } - let files = vec![F { name: "song.mp3".to_string() }]; + struct F { + name: String, + } + impl HasFileName for F { + fn name_lower(&self) -> String { + self.name.to_lowercase() + } + } + let files = vec![F { + name: "song.mp3".to_string(), + }]; let wanted = vec!["mp3".to_string()]; // override provided but empty -> should produce no selection let sel = select_format(&Some(vec![]), &wanted, &files); @@ -265,9 +308,17 @@ mod tests { #[test] fn test_select_format_wanted_empty_then_none() { - struct F { name: String } - impl HasFileName for F { fn name_lower(&self) -> String { self.name.to_lowercase() } } - let files = vec![F { name: "file.mp3".to_string() }]; + struct F { + name: String, + } + impl HasFileName for F { + fn name_lower(&self) -> String { + self.name.to_lowercase() + } + } + let files = vec![F { + name: "file.mp3".to_string(), + }]; let wanted: Vec = vec![]; let sel = select_format(&None, &wanted, &files); assert!(sel.is_none()); diff --git a/server/src/linker/duplicates.rs b/server/src/linker/duplicates.rs index cf6f5997..511adfbc 100644 --- a/server/src/linker/duplicates.rs +++ b/server/src/linker/duplicates.rs @@ -67,11 +67,15 @@ pub fn rank_torrents(config: &Config, batch: Vec) -> Vec { #[cfg(test)] mod tests { use super::*; + use mlm_db::{Language, MainCat, MediaType, MetadataSource, Size, Timestamp, TorrentMeta}; use std::collections::BTreeMap; - use mlm_db::{MediaType, Size, TorrentMeta, Timestamp, MetadataSource, MainCat, Language}; - use crate::config::SearchConfig; - fn create_test_torrent(id: &str, title: &str, filetypes: Vec, size_bytes: u64) -> Torrent { + fn create_test_torrent( + id: &str, + title: &str, + filetypes: Vec, + size_bytes: u64, + ) -> Torrent { let meta = TorrentMeta { title: title.to_string(), filetypes, @@ -116,43 +120,20 @@ mod tests { fn create_test_config() -> Config { Config { mam_id: "test".to_string(), - web_host: "0.0.0.0".to_string(), - web_port: 3157, - min_ratio: 2.0, - unsat_buffer: 10, - wedge_buffer: 0, - add_torrents_stopped: false, - exclude_narrator_in_library_dir: false, - search_interval: 30, - link_interval: 10, - import_interval: 135, - ignore_torrents: vec![], - audio_types: vec!["m4b".to_string(), "mp3".to_string()], - ebook_types: vec!["epub".to_string(), "pdf".to_string()], - music_types: vec!["flac".to_string(), "mp3".to_string()], - radio_types: vec!["mp3".to_string()], - search: SearchConfig::default(), - audiobookshelf: None, - autograbs: vec![], - snatchlist: vec![], - goodreads_lists: vec![], - notion_lists: vec![], - tags: vec![], - qbittorrent: vec![], - libraries: vec![], + ..Default::default() } } #[test] fn test_rank_torrents_preference() { let config = create_test_config(); - + let t1 = create_test_torrent("1", "Title", vec!["mp3".to_string()], 100); let t2 = create_test_torrent("2", "Title", vec!["m4b".to_string()], 100); - + let batch = vec![t1.clone(), t2.clone()]; let ranked = rank_torrents(&config, batch); - + assert_eq!(ranked[0].id, "2"); // m4b is preferred over mp3 assert_eq!(ranked[1].id, "1"); } @@ -160,31 +141,32 @@ mod tests { #[test] fn test_rank_torrents_size_tie_break() { let config = create_test_config(); - + let t1 = create_test_torrent("1", "Title", vec!["m4b".to_string()], 100); let t2 = create_test_torrent("2", "Title", vec!["m4b".to_string()], 200); - + let batch = vec![t1.clone(), t2.clone()]; let ranked = rank_torrents(&config, batch); - + assert_eq!(ranked[0].id, "2"); // Larger size wins tie assert_eq!(ranked[1].id, "1"); } #[tokio::test] async fn test_find_matches() -> Result<()> { - let tmp_dir = std::env::temp_dir().join(format!("mlm_test_duplicates_{}", std::process::id())); + let tmp_dir = + std::env::temp_dir().join(format!("mlm_test_duplicates_{}", std::process::id())); let _ = fs::remove_dir_all(&tmp_dir); fs::create_dir_all(&tmp_dir)?; let db_path = tmp_dir.join("test.db"); - + let db = native_db::Builder::new().create(&mlm_db::MODELS, &db_path)?; mlm_db::migrate(&db)?; - + let t1 = create_test_torrent("1", "My Book", vec!["m4b".to_string()], 100); let t2 = create_test_torrent("2", "My Book", vec!["mp3".to_string()], 150); let t3 = create_test_torrent("3", "Other Book", vec!["m4b".to_string()], 100); - + { let rw = db.rw_transaction()?; rw.insert(t1.clone())?; @@ -192,14 +174,13 @@ mod tests { rw.insert(t3.clone())?; rw.commit()?; } - + let matches = find_matches(&db, &t1)?; assert_eq!(matches.len(), 1); assert_eq!(matches[0].id, "2"); - + drop(db); let _ = fs::remove_dir_all(tmp_dir); Ok(()) } } - diff --git a/server/src/linker/torrent.rs b/server/src/linker/torrent.rs index 9b51b31e..32f5f0d9 100644 --- a/server/src/linker/torrent.rs +++ b/server/src/linker/torrent.rs @@ -271,6 +271,7 @@ where if torrent.progress < 1.0 { continue; } + let library = find_library(&config, &torrent); let r = db.r_transaction()?; let mut existing_torrent: Option = r.get().primary(torrent.hash.clone())?; @@ -340,6 +341,9 @@ where ) .await .context("match_torrent"); + if let Err(e) = &result { + debug!("match_torrent error for {}: {:#}", torrent.hash, e); + } update_errored_torrent( &db, ErroredTorrentId::Linker(torrent.hash.clone()), @@ -713,6 +717,8 @@ async fn link_torrent( ) -> Result<()> { let mut library_files = vec![]; + // Removed temporary debug prints that were used during investigation. + let library_path = if library.options().method != LibraryLinkMethod::NoLink { let Some(mut dir) = library_dir(config.exclude_narrator_in_library_dir, library, meta) else { @@ -876,43 +882,18 @@ mod tests { fn test_find_library_by_download_dir() { let cfg = Config { mam_id: "m".to_string(), - web_host: "".to_string(), - web_port: 0, - min_ratio: 0.0, - unsat_buffer: 0, - wedge_buffer: 0, - add_torrents_stopped: false, - exclude_narrator_in_library_dir: false, - search_interval: 0, - link_interval: 0, - import_interval: 0, - ignore_torrents: vec![], - audio_types: vec![], - ebook_types: vec![], - music_types: vec![], - radio_types: vec![], - search: crate::config::SearchConfig::default(), - audiobookshelf: None, - autograbs: vec![], - snatchlist: vec![], - goodreads_lists: vec![], - notion_lists: vec![], - tags: vec![], - qbittorrent: vec![], libraries: vec![Library::ByDownloadDir(LibraryByDownloadDir { download_dir: PathBuf::from("/downloads"), options: LibraryOptions { name: None, - library_dir: PathBuf::from("/library"), + library_dir: PathBuf::from("/lib"), method: LibraryLinkMethod::Hardlink, audio_types: None, ebook_types: None, }, - tag_filters: LibraryTagFilters { - allow_tags: vec![], - deny_tags: vec![], - }, + tag_filters: LibraryTagFilters::default(), })], + ..Default::default() }; let qbit_torrent = qbit::models::Torrent { @@ -932,29 +913,6 @@ mod tests { fn test_find_library_by_category() { let cfg = Config { mam_id: "m".to_string(), - web_host: "".to_string(), - web_port: 0, - min_ratio: 0.0, - unsat_buffer: 0, - wedge_buffer: 0, - add_torrents_stopped: false, - exclude_narrator_in_library_dir: false, - search_interval: 0, - link_interval: 0, - import_interval: 0, - ignore_torrents: vec![], - audio_types: vec![], - ebook_types: vec![], - music_types: vec![], - radio_types: vec![], - search: crate::config::SearchConfig::default(), - audiobookshelf: None, - autograbs: vec![], - snatchlist: vec![], - goodreads_lists: vec![], - notion_lists: vec![], - tags: vec![], - qbittorrent: vec![], libraries: vec![Library::ByCategory(LibraryByCategory { category: "audiobooks".to_string(), options: LibraryOptions { @@ -969,6 +927,7 @@ mod tests { deny_tags: vec![], }, })], + ..Default::default() }; let qbit_torrent = qbit::models::Torrent { @@ -988,29 +947,6 @@ mod tests { fn test_find_library_skips_rip_dir() { let cfg = Config { mam_id: "m".to_string(), - web_host: "".to_string(), - web_port: 0, - min_ratio: 0.0, - unsat_buffer: 0, - wedge_buffer: 0, - add_torrents_stopped: false, - exclude_narrator_in_library_dir: false, - search_interval: 0, - link_interval: 0, - import_interval: 0, - ignore_torrents: vec![], - audio_types: vec![], - ebook_types: vec![], - music_types: vec![], - radio_types: vec![], - search: crate::config::SearchConfig::default(), - audiobookshelf: None, - autograbs: vec![], - snatchlist: vec![], - goodreads_lists: vec![], - notion_lists: vec![], - tags: vec![], - qbittorrent: vec![], libraries: vec![Library::ByRipDir(crate::config::LibraryByRipDir { rip_dir: PathBuf::from("/rip"), options: LibraryOptions { @@ -1022,6 +958,7 @@ mod tests { }, filter: crate::config::EditionFilter::default(), })], + ..Default::default() }; let qbit_torrent = qbit::models::Torrent { @@ -1175,30 +1112,7 @@ mod tests { }; let cfg = Config { mam_id: "m".to_string(), - web_host: "".to_string(), - web_port: 0, - min_ratio: 0.0, - unsat_buffer: 0, - wedge_buffer: 0, - add_torrents_stopped: false, - exclude_narrator_in_library_dir: false, - search_interval: 0, - link_interval: 0, - import_interval: 0, - ignore_torrents: vec![], - audio_types: vec![], - ebook_types: vec![], - music_types: vec![], - radio_types: vec![], - search: crate::config::SearchConfig::default(), - audiobookshelf: None, - autograbs: vec![], - snatchlist: vec![], - goodreads_lists: vec![], - notion_lists: vec![], - tags: vec![], - qbittorrent: vec![], - libraries: vec![], + ..Default::default() }; let update = check_torrent_updates(&mut torrent, &qbit_torrent, None, &cfg, &[]); @@ -1485,30 +1399,7 @@ mod tests { fn mock_config() -> Config { Config { mam_id: "m".to_string(), - web_host: "".to_string(), - web_port: 0, - min_ratio: 0.0, - unsat_buffer: 0, - wedge_buffer: 0, - add_torrents_stopped: false, - exclude_narrator_in_library_dir: false, - search_interval: 0, - link_interval: 0, - import_interval: 0, - ignore_torrents: vec![], - audio_types: vec![], - ebook_types: vec![], - music_types: vec![], - radio_types: vec![], - search: crate::config::SearchConfig::default(), - audiobookshelf: None, - autograbs: vec![], - snatchlist: vec![], - goodreads_lists: vec![], - notion_lists: vec![], - tags: vec![], - qbittorrent: vec![], - libraries: vec![], + ..Default::default() } } diff --git a/server/src/main.rs b/server/src/main.rs index 75020420..6034e56d 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -38,6 +38,7 @@ use mlm::{ config::Config, linker::{folder::link_folders_to_library, torrent::link_torrents_to_library}, lists::{get_lists, run_list_import}, + metadata::MetadataService, snatchlist::run_snatchlist_search, stats::{Context, Stats, Triggers}, torrent_downloader::grab_selected_torrents, @@ -209,6 +210,37 @@ async fn app_main() -> Result<()> { let stats = Stats::new(); + // Instantiate metadata service from config provider settings + let default_timeout = Duration::from_secs(5); + // Convert Config's ProviderConfig -> metadata::ProviderSetting + let provider_settings: Vec = config + .metadata_providers + .iter() + .map(|p| match p { + mlm::config::ProviderConfig::Hardcover(c) => { + mlm::metadata::ProviderSetting::Hardcover { + enabled: c.enabled, + timeout_secs: c.timeout_secs, + api_key: c.api_key.clone(), + } + } + mlm::config::ProviderConfig::RomanceIo(c) => { + mlm::metadata::ProviderSetting::RomanceIo { + enabled: c.enabled, + timeout_secs: c.timeout_secs, + } + } + mlm::config::ProviderConfig::OpenLibrary(c) => { + mlm::metadata::ProviderSetting::OpenLibrary { + enabled: c.enabled, + timeout_secs: c.timeout_secs, + } + } + }) + .collect(); + let metadata_service = MetadataService::from_settings(&provider_settings, default_timeout); + let metadata_service = Arc::new(metadata_service); + let (mut search_tx, mut search_rx) = (BTreeMap::new(), BTreeMap::new()); let (mut import_tx, mut import_rx) = (BTreeMap::new(), BTreeMap::new()); let (torrent_linker_tx, torrent_linker_rx) = watch::channel(()); @@ -704,6 +736,7 @@ async fn app_main() -> Result<()> { db, mam: Arc::new(mam), stats, + metadata: metadata_service, triggers, }; diff --git a/server/src/metadata/mam_meta.rs b/server/src/metadata/mam_meta.rs new file mode 100644 index 00000000..f7280417 --- /dev/null +++ b/server/src/metadata/mam_meta.rs @@ -0,0 +1,104 @@ +use crate::stats::Context; +use anyhow::Result; +use mlm_db::TorrentMeta; + +/// Match metadata for a given original `TorrentMeta` using the selected +/// provider id. This function does NOT persist changes to the database; it +/// performs the provider query and returns the new metadata and the list of +/// diffed fields so the caller can decide how to persist/apply them. +pub async fn match_meta( + ctx: &Context, + orig: &TorrentMeta, + provider_id: &str, +) -> Result<(TorrentMeta, String, Vec)> { + // Build a small query meta for providers to consume. Providers accept + // a TorrentMeta and may read any fields they need. + let mut query: TorrentMeta = Default::default(); + if let Some(isbn) = orig.ids.get(mlm_db::ids::ISBN) { + query + .ids + .insert(mlm_db::ids::ISBN.to_string(), isbn.clone()); + } + query.title = orig.title.clone(); + query.authors = orig.authors.clone(); + + // Delegate provider selection and request-timeout handling to the + // centralized MetadataService attached to the Context. This keeps + // provider configuration in one place and avoids duplicating instantiation + // logic here. + let fetched = ctx.metadata.fetch_provider(ctx, query, provider_id).await?; + + // Merge fetched metadata into original meta: only overwrite fields when + // the provider supplied non-empty / non-default values. This preserves + // DB-only fields (sizes, upload timestamps, internal IDs) when providers + // don't populate them. + let merged = merge_meta(orig, &fetched); + + let fields = orig.diff(&merged); + + Ok((merged, provider_id.to_string(), fields)) +} + +fn merge_meta(orig: &TorrentMeta, incoming: &TorrentMeta) -> TorrentMeta { + let mut out = orig.clone(); + + // ids: overlay incoming entries (non-empty) on top of existing ids + for (k, v) in &incoming.ids { + if !v.is_empty() { + out.ids.insert(k.clone(), v.clone()); + } + } + + if !incoming.title.is_empty() { + out.title = incoming.title.clone(); + } + if !incoming.description.is_empty() { + out.description = incoming.description.clone(); + } + + if !incoming.authors.is_empty() { + out.authors = incoming.authors.clone(); + } + if !incoming.narrators.is_empty() { + out.narrators = incoming.narrators.clone(); + } + if !incoming.series.is_empty() { + out.series = incoming.series.clone(); + } + + if !incoming.categories.is_empty() { + out.categories = incoming.categories.clone(); + } + if !incoming.tags.is_empty() { + out.tags = incoming.tags.clone(); + } + + // Simple scalar/option overlays + if incoming.main_cat.is_some() { + out.main_cat = incoming.main_cat; + } + if incoming.language.is_some() { + out.language = incoming.language; + } + if incoming.flags.is_some() { + out.flags = incoming.flags; + } + if !incoming.filetypes.is_empty() { + out.filetypes = incoming.filetypes.clone(); + } + if incoming.num_files != 0 { + out.num_files = incoming.num_files; + } + // size: only overwrite when provider returned a non-zero size + if incoming.size.bytes() > 0 { + out.size = incoming.size; + } + if incoming.edition.is_some() { + out.edition = incoming.edition.clone(); + } + + // Always set source to Match for provider-updated data + out.source = mlm_db::MetadataSource::Match; + + out +} diff --git a/server/src/metadata/mod.rs b/server/src/metadata/mod.rs new file mode 100644 index 00000000..1e56db25 --- /dev/null +++ b/server/src/metadata/mod.rs @@ -0,0 +1,205 @@ +use crate::stats::Context; +use anyhow::Result; +use mlm_db::DatabaseExt as _; +use mlm_db::{Event, EventType, MetadataSource, TorrentMeta}; +use mlm_meta::providers::{Hardcover, OpenLibrary, RomanceIo}; +use mlm_meta::traits::Provider; +use std::sync::Arc; +use tokio::time::{Duration, timeout}; +use tracing::instrument; +pub mod mam_meta; + +pub struct MetadataService { + // Each provider can have its own request timeout + providers: Vec<(Arc, Duration)>, + #[allow(dead_code)] + default_timeout: Duration, +} + +/// Simple provider configuration used by the server. +pub enum ProviderSetting { + Hardcover { + enabled: bool, + timeout_secs: Option, + api_key: Option, + }, + RomanceIo { + enabled: bool, + timeout_secs: Option, + }, + OpenLibrary { + enabled: bool, + timeout_secs: Option, + }, +} + +impl MetadataService { + pub fn new(providers: Vec<(Arc, Duration)>, default_timeout: Duration) -> Self { + Self { + providers, + default_timeout, + } + } + + /// Build a MetadataService from a list of ProviderSetting. + pub fn from_settings(settings: &[ProviderSetting], default_timeout: Duration) -> Self { + let mut providers: Vec<(Arc, Duration)> = Vec::new(); + for s in settings { + match s { + ProviderSetting::Hardcover { + enabled, + timeout_secs, + api_key, + } => { + if !enabled { + continue; + } + let to = timeout_secs + .map(Duration::from_secs) + .unwrap_or(default_timeout); + providers.push((Arc::new(Hardcover::new(api_key.clone())), to)); + } + ProviderSetting::RomanceIo { + enabled, + timeout_secs, + } => { + if !enabled { + continue; + } + let to = timeout_secs + .map(Duration::from_secs) + .unwrap_or(default_timeout); + providers.push((Arc::new(RomanceIo::new()), to)); + } + ProviderSetting::OpenLibrary { + enabled, + timeout_secs, + } => { + if !enabled { + continue; + } + let to = timeout_secs + .map(Duration::from_secs) + .unwrap_or(default_timeout); + providers.push((Arc::new(OpenLibrary::new()), to)); + } + } + } + Self::new(providers, default_timeout) + } + + pub fn enabled_providers(&self) -> Vec { + self.providers + .iter() + .map(|(p, _)| p.id().to_string()) + .collect() + } + + #[instrument(skip(self, ctx))] + pub async fn fetch_and_persist( + &self, + ctx: &Context, + query: TorrentMeta, + ) -> Result { + // Query providers in parallel with timeout and pick first successful + let mut handles = vec![]; + for (p, to) in &self.providers { + let p = p.clone(); + let q = query.clone(); + let to = *to; + handles.push(tokio::spawn(async move { + let r = timeout(to, p.fetch(&q)).await; + match r { + Ok(Ok(m)) => Ok((p.id().to_string(), m)), + Ok(Err(e)) => Err(anyhow::anyhow!(e)), + Err(_) => Err(anyhow::anyhow!("timeout")), + } + })); + } + + let mut best: Option<(String, TorrentMeta)> = None; + + for h in handles { + match h.await { + Ok(Ok((id, meta))) => { + // pick first for now + best = Some((id, meta)); + break; + } + Ok(Err(e)) => { + tracing::debug!(error=?e, "provider task returned error"); + } + Err(join_err) => { + tracing::debug!(error=?join_err, "provider task panicked or was cancelled"); + } + } + } + + let (provider_id, meta): (String, TorrentMeta) = match best { + Some(v) => v, + None => return Err(anyhow::anyhow!("no provider matched")), + }; + + // Provider already returns a TorrentMeta; use it and mark source + let mut tmeta: TorrentMeta = meta; + tmeta.source = MetadataSource::Match; + + // Persist: write a SelectedTorrent or Torrent depending on context. + // Here we insert an Event to record metadata update and return the meta. + let ev = Event { + id: mlm_db::Uuid::new(), + torrent_id: None, + mam_id: None, + created_at: mlm_db::Timestamp::now(), + event: EventType::Updated { + fields: vec![], + source: (MetadataSource::Match, provider_id.clone()), + }, + }; + + // Insert event into DB using async rw transaction helper from mlm_db + let (guard, rw) = ctx.db.rw_async().await?; + rw.insert(ev)?; + rw.commit()?; + drop(guard); + + Ok(tmeta) + } + + /// Fetch using an explicit provider id. This looks up the provider in the + /// registered list and executes it with its configured timeout. Returns + /// the provider-provided TorrentMeta on success. + #[instrument(skip(self, _ctx))] + pub async fn fetch_provider( + &self, + _ctx: &Context, + query: TorrentMeta, + provider_id: &str, + ) -> Result { + // find provider + let mut found: Option<(Arc, Duration)> = None; + for (p, to) in &self.providers { + if p.id() == provider_id { + found = Some((p.clone(), *to)); + break; + } + } + + let (p, to) = match found { + Some(v) => v, + None => anyhow::bail!("unknown provider id: {}", provider_id), + }; + + // run with timeout + let r = timeout(to, p.fetch(&query)).await; + let meta = match r { + Ok(Ok(m)) => m, + Ok(Err(e)) => return Err(anyhow::anyhow!(e)), + Err(_) => return Err(anyhow::anyhow!("timeout")), + }; + + let mut tmeta: TorrentMeta = meta; + tmeta.source = MetadataSource::Match; + Ok(tmeta) + } +} diff --git a/server/src/stats.rs b/server/src/stats.rs index 3bda86bf..e22e7784 100644 --- a/server/src/stats.rs +++ b/server/src/stats.rs @@ -11,6 +11,7 @@ use tokio::sync::{ }; use crate::config::Config; +use crate::metadata::MetadataService; #[derive(Default)] pub struct StatsValues { @@ -55,6 +56,12 @@ impl Stats { } } +impl Default for Stats { + fn default() -> Self { + Self::new() + } +} + #[derive(Clone)] pub struct Events { pub event: (Sender>, Receiver>), @@ -76,6 +83,7 @@ pub struct Context { pub db: Arc>, pub mam: Arc>>>, pub stats: Stats, + pub metadata: Arc, // pub events: Events, pub triggers: Triggers, } diff --git a/server/src/web/pages/torrent.rs b/server/src/web/pages/torrent.rs index 7fb70985..93147bb3 100644 --- a/server/src/web/pages/torrent.rs +++ b/server/src/web/pages/torrent.rs @@ -29,6 +29,7 @@ use serde::Deserialize; use time::UtcDateTime; use tokio_util::io::ReaderStream; +use crate::metadata::mam_meta::match_meta; use crate::{ audiobookshelf::{Abs, LibraryItemMinified}, cleaner::clean_torrent, @@ -45,6 +46,7 @@ use crate::{ time, }, }; +use mlm_db::MetadataSource; pub async fn torrent_file( State(context): State, @@ -173,8 +175,8 @@ async fn torrent_page_id( .db .r_transaction()? .scan() - .secondary::(EventKey::mam_id)?; - let events = events.range(Some(torrent.mam_id)..=Some(torrent.mam_id))?; + .secondary::(EventKey::torrent_id)?; + let events = events.range(Some(torrent.id.clone())..=Some(torrent.id.clone()))?; let mut events = events.collect::, _>>()?; events.sort_by(|a, b| b.created_at.cmp(&a.created_at)); @@ -184,19 +186,24 @@ async fn torrent_page_id( } else { None }; - let mam_meta = mam_torrent.as_ref().map(|t| t.as_meta()).transpose()?; + let mut mam_meta = mam_torrent.as_ref().map(|t| t.as_meta()).transpose()?; - if let Some(mam_meta) = &mam_meta - && torrent + if let Some(mam_meta) = &mut mam_meta { + let mut ids = torrent.meta.ids.clone(); + ids.append(&mut mam_meta.ids); // MaM adds its IDs + mam_meta.ids = ids; + + if torrent .meta .uploaded_at .as_ref() .is_none_or(|t| t.0 == UtcDateTime::UNIX_EPOCH) - { - let (_guard, rw) = context.db.rw_async().await?; - torrent.meta.uploaded_at = mam_meta.uploaded_at; - rw.upsert(torrent.clone())?; - rw.commit()?; + { + let (_guard, rw) = context.db.rw_async().await?; + torrent.meta.uploaded_at = mam_meta.uploaded_at; + rw.upsert(torrent.clone())?; + rw.commit()?; + } } let mut qbit_data = None; @@ -266,6 +273,7 @@ async fn torrent_page_id( wanted_path, qbit_files, other_torrents, + metadata_providers: context.metadata.enabled_providers(), }; Ok::<_, AppError>(Html(template.to_string())) } @@ -345,6 +353,51 @@ pub async fn torrent_page_post_id( let mam = context.mam()?; refresh_metadata_relink(&config, &context.db, &mam, id).await?; } + "match" => { + // Build a query from existing torrent metadata + let Some(mut torrent) = context.db.r_transaction()?.get().primary::(id)? + else { + return Err(anyhow::Error::msg("Could not find torrent").into()); + }; + + let provider_id = match &form.provider { + Some(p) => p.as_str(), + None => { + tracing::error!("metadata match failed: no provider selected"); + return Err(anyhow::Error::msg("no provider selected").into()); + } + }; + + match match_meta(&context, &torrent.meta, provider_id).await { + Ok((new_meta, pid, fields)) => { + let ev = Event { + id: mlm_db::Uuid::new(), + torrent_id: Some(torrent.id.clone()), + mam_id: torrent.mam_id, + created_at: mlm_db::Timestamp::now(), + event: EventType::Updated { + fields: fields.clone(), + source: (MetadataSource::Match, pid.clone()), + }, + }; + + let (_guard, rw) = context.db.rw_async().await?; + // apply meta updates + let mut meta = new_meta; + meta.source = MetadataSource::Match; + torrent.meta = meta; + // update title_search to normalized title + torrent.title_search = mlm_parse::normalize_title(&torrent.meta.title); + + rw.upsert(torrent)?; + rw.insert(ev)?; + rw.commit()?; + } + Err(e) => { + tracing::error!("metadata match failed for provider {}: {}", provider_id, e) + } + } + } "remove" => { let (_guard, rw) = context.db.rw_async().await?; let Some(torrent) = rw.get().primary::(id)? else { @@ -440,6 +493,8 @@ pub async fn torrent_page_post_id( pub struct TorrentPageForm { action: String, #[serde(default)] + provider: Option, + #[serde(default)] category: String, #[serde(default)] tags: Vec, @@ -460,6 +515,7 @@ struct TorrentPageTemplate { wanted_path: Option, qbit_files: Vec, other_torrents: MaMTorrentsTemplate, + metadata_providers: Vec, } impl TorrentPageTemplate { diff --git a/server/src/web/tables.rs b/server/src/web/tables.rs index a911b544..2cec7123 100644 --- a/server/src/web/tables.rs +++ b/server/src/web/tables.rs @@ -254,6 +254,7 @@ pub trait HidableColumns: Sortable { pub trait Size { fn style(&self) -> String; + #[allow(dead_code)] fn px(&self) -> u64; } impl Size for u64 { @@ -266,7 +267,7 @@ impl Size for u64 { } } -pub struct Flex(pub u64, pub u64); +pub struct Flex(pub u64, #[allow(dead_code)] pub u64); impl Size for Flex { fn style(&self) -> String { format!("{}fr", self.0) diff --git a/server/templates/pages/torrent.html b/server/templates/pages/torrent.html index 9fe08900..1d15fa92 100644 --- a/server/templates/pages/torrent.html +++ b/server/templates/pages/torrent.html @@ -30,7 +30,7 @@

Replaced with: {{ torrent.meta.title }} {% endif %}
-
+
diff --git a/server/tests/cleaner_test.rs b/server/tests/cleaner_test.rs index ca47d127..769d343c 100644 --- a/server/tests/cleaner_test.rs +++ b/server/tests/cleaner_test.rs @@ -1,15 +1,18 @@ mod common; -use common::{TestDb, MockFs, mock_config, MockTorrentBuilder}; +use common::{MockFs, MockTorrentBuilder, TestDb, mock_config}; use mlm::cleaner::run_library_cleaner; +use mlm_db::{DatabaseExt, Torrent}; use std::sync::Arc; -use mlm_db::{Torrent, DatabaseExt}; #[tokio::test] async fn test_run_library_cleaner() -> anyhow::Result<()> { let test_db = TestDb::new()?; let mock_fs = MockFs::new()?; - let config = Arc::new(mock_config(mock_fs.rip_dir.clone(), mock_fs.library_dir.clone())); + let config = Arc::new(mock_config( + mock_fs.rip_dir.clone(), + mock_fs.library_dir.clone(), + )); // Create two versions of the same book let lib_path1 = mock_fs.library_dir.join("Author 1").join("Book 1 (v1)"); @@ -51,11 +54,20 @@ async fn test_run_library_cleaner() -> anyhow::Result<()> { // t1 should be replaced_with t2 assert!(t1_after.replaced_with.is_some(), "t1 should be replaced"); assert_eq!(t1_after.replaced_with.unwrap().0, "ID2"); - assert!(t1_after.library_path.is_none(), "t1 library path should be cleared"); + assert!( + t1_after.library_path.is_none(), + "t1 library path should be cleared" + ); // t2 should still be there - assert!(t2_after.replaced_with.is_none(), "t2 should not be replaced"); - assert!(t2_after.library_path.is_some(), "t2 library path should still be set"); + assert!( + t2_after.replaced_with.is_none(), + "t2 should not be replaced" + ); + assert!( + t2_after.library_path.is_some(), + "t2 library path should still be set" + ); // Files for t1 should be deleted assert!(!lib_path1.exists(), "t1 files should be deleted"); diff --git a/server/tests/common/mod.rs b/server/tests/common/mod.rs index ecfb4da3..3acf9462 100644 --- a/server/tests/common/mod.rs +++ b/server/tests/common/mod.rs @@ -1,8 +1,8 @@ use anyhow::Result; use mlm::config::{Config, Library, LibraryByRipDir, LibraryLinkMethod, LibraryOptions}; use mlm_db::{ - migrate, Database, MainCat, MediaType, MetadataSource, Size, Timestamp, Torrent, TorrentMeta, - MODELS, + Database, MODELS, MainCat, MediaType, MetadataSource, Size, Timestamp, Torrent, TorrentMeta, + migrate, }; use native_db::Builder; use std::path::PathBuf; @@ -112,6 +112,7 @@ impl MockTorrentBuilder { } } +#[allow(dead_code)] pub struct MockFs { #[allow(dead_code)] pub root: TempDir, @@ -120,6 +121,7 @@ pub struct MockFs { } impl MockFs { + #[allow(dead_code)] pub fn new() -> Result { let root = tempfile::tempdir()?; let rip_dir = root.path().join("rip"); @@ -259,29 +261,6 @@ impl MockFs { pub fn mock_config(rip_dir: PathBuf, library_dir: PathBuf) -> Config { Config { mam_id: "test".to_string(), - web_host: "127.0.0.1".to_string(), - web_port: 3157, - min_ratio: 2.0, - unsat_buffer: 10, - wedge_buffer: 0, - add_torrents_stopped: false, - exclude_narrator_in_library_dir: false, - search_interval: 30, - link_interval: 10, - import_interval: 135, - ignore_torrents: vec![], - audio_types: vec!["m4b".to_string(), "m4a".to_string()], - ebook_types: vec!["epub".to_string()], - music_types: vec!["mp3".to_string()], - radio_types: vec!["mp3".to_string()], - search: Default::default(), - audiobookshelf: None, - autograbs: vec![], - snatchlist: vec![], - goodreads_lists: vec![], - notion_lists: vec![], - tags: vec![], - qbittorrent: vec![], libraries: vec![Library::ByRipDir(LibraryByRipDir { rip_dir, options: LibraryOptions { @@ -293,5 +272,12 @@ pub fn mock_config(rip_dir: PathBuf, library_dir: PathBuf) -> Config { }, filter: Default::default(), })], + metadata_providers: vec![mlm::config::ProviderConfig::RomanceIo( + mlm::config::RomanceIoConfig { + enabled: true, + timeout_secs: None, + }, + )], + ..Default::default() } } diff --git a/server/tests/linker_torrent_test.rs b/server/tests/linker_torrent_test.rs index eb053760..a7b48d1e 100644 --- a/server/tests/linker_torrent_test.rs +++ b/server/tests/linker_torrent_test.rs @@ -2,12 +2,12 @@ mod common; use anyhow::Result; use common::{MockFs, TestDb, mock_config}; -use mlm_db::DatabaseExt as _; use mlm::config::{ Library, LibraryByDownloadDir, LibraryLinkMethod, LibraryOptions, LibraryTagFilters, QbitConfig, }; use mlm::linker::torrent::{MaMApi, link_torrents_to_library}; use mlm::qbittorrent::QbitApi; +use mlm_db::DatabaseExt as _; use mlm_mam::search::MaMTorrent; use qbit::models::{Torrent as QbitTorrent, TorrentContent, Tracker}; use qbit::parameters::TorrentListParams; @@ -56,28 +56,34 @@ impl MaMApi for MockMaM { } } -fn mock_meta(title: &str, author: &str) -> mlm_db::TorrentMeta { - mlm_db::TorrentMeta { - ids: BTreeMap::new(), - vip_status: None, - cat: None, - media_type: mlm_db::MediaType::Audiobook, - main_cat: None, - categories: vec![], - tags: vec![], - language: None, - flags: None, - filetypes: vec![], - num_files: 0, - size: mlm_db::Size::from_bytes(0), +#[allow(clippy::too_many_arguments)] +/// Helper to build a MaMTorrent with sensible defaults for tests. +fn make_mam_torrent( + id: u64, + title: &str, + mediatype: u8, + maincat: u8, + category: u64, + catname: &str, + language: u8, + lang_code: &str, + numfiles: u64, + filetype: &str, +) -> MaMTorrent { + MaMTorrent { + id, title: title.to_string(), - edition: None, - description: "".to_string(), - authors: vec![author.to_string()], - narrators: vec![], - series: vec![], - source: mlm_db::MetadataSource::Mam, - uploaded_at: mlm_db::Timestamp::now(), + added: "2024-01-01 12:00:00".to_string(), + size: format!("{} B", 100), + mediatype, + maincat, + catname: catname.to_string(), + category, + language, + lang_code: lang_code.to_string(), + numfiles, + filetype: filetype.to_string(), + ..Default::default() } } @@ -135,21 +141,18 @@ async fn test_link_torrent_audiobook() -> anyhow::Result<()> { }; // Setup mock MaM - let mut mam_torrent = MaMTorrent { - id: 1, - title: "Test Title".to_string(), - added: "2024-01-01 12:00:00".to_string(), - size: "100 B".to_string(), - mediatype: 1, // Audiobook - maincat: 1, // Fiction - catname: "General Fiction".to_string(), - category: 42, // General Fiction in AudiobookCategory - language: 1, // English - lang_code: "en".to_string(), - numfiles: 1, - filetype: "m4b".to_string(), - ..Default::default() - }; + let mut mam_torrent = make_mam_torrent( + 1, + "Test Title", + 1, + 1, + 42, + "General Fiction", + 1, + "en", + 1, + "m4b", + ); mam_torrent.author_info.insert(1, "Test Author".to_string()); let mock_mam = MockMaM { @@ -379,18 +382,21 @@ async fn test_link_torrent_ebook() -> anyhow::Result<()> { files: HashMap::from([(torrent_hash.to_string(), vec![qbit_content])]), }; - let mut mam_torrent = MaMTorrent { - id: 2, - title: "Ebook Title".to_string(), - added: "2024-01-02 12:00:00".to_string(), - size: "200 B".to_string(), - mediatype: 2, // Ebook - category: 46, - language: 1, - lang_code: "en".to_string(), - ..Default::default() - }; - mam_torrent.author_info.insert(2, "Ebook Author".to_string()); + let mut mam_torrent = make_mam_torrent( + 2, + "Ebook Title", + 2, + 2, + 64, + "General Fiction", + 1, + "en", + 1, + "epub", + ); + mam_torrent + .author_info + .insert(2, "Ebook Author".to_string()); let mock_mam = MockMaM { torrents: HashMap::from([(torrent_hash.to_string(), mam_torrent)]), @@ -595,18 +601,11 @@ async fn test_refresh_metadata_relink() -> anyhow::Result<()> { )]), }; - let mut mam_torrent = MaMTorrent { - id: 2, - title: "Title".to_string(), - added: "2024-01-01 12:00:00".to_string(), - size: "100 B".to_string(), - mediatype: 1, // Audiobook - category: 42, - language: 1, - lang_code: "en".to_string(), - ..Default::default() - }; - mam_torrent.author_info.insert(2, "Refreshed Author".to_string()); + let mut mam_torrent = + make_mam_torrent(2, "Title", 1, 1, 42, "General Fiction", 1, "en", 1, "m4b"); + mam_torrent + .author_info + .insert(2, "Refreshed Author".to_string()); let mock_mam = MockMaM { torrents: HashMap::from([(torrent_hash.to_string(), mam_torrent)]), diff --git a/server/tests/metadata_integration.rs b/server/tests/metadata_integration.rs new file mode 100644 index 00000000..a50b69fc --- /dev/null +++ b/server/tests/metadata_integration.rs @@ -0,0 +1,187 @@ +mod common; + +use anyhow::Result; +use std::sync::Arc; +use std::time::Duration as StdDuration; + +use mlm_db::{Event, EventKey, EventType, TorrentMeta as MetadataQuery}; + +use async_trait::async_trait; +use common::{TestDb, mock_config}; +use mlm::metadata::MetadataService; +use mlm::stats::Context; +use url::Url; + +// Simple mock fetcher that returns inline mock data for tests. +struct MockFetcher; + +#[async_trait] +impl mlm_meta::http::HttpClient for MockFetcher { + async fn get(&self, url: &str) -> anyhow::Result { + let u = Url::parse(url).map_err(|e| anyhow::anyhow!(e))?; + if !u.host_str().is_some_and(|h| h.contains("romance.io")) { + return Err(anyhow::anyhow!("unexpected host in test fetch")); + } + if u.path().starts_with("/json/search_books") { + return Ok(r#"{ + "success": true, + "books": [ + { + "_id":"68b95a390bc0cee156edaf2b", + "info":{"title":"Of Ink and Alchemy"}, + "authors":[{"name":"Sloane St. James"}], + "url":"/books/68b95a390bc0cee156edaf2b/of-ink-and-alchemy-sloane-st-james" + } + ] +}"# + .to_string()); + } + if u.path().starts_with("/json/search_authors") { + return Ok(r#"{ "success": true, "authors": [] }"#.to_string()); + } + if u.path().starts_with("/search") { + return Ok("search".to_string()); + } + + Ok(r#" + + + + + +"# + .to_string()) + } + + async fn post( + &self, + _url: &str, + _body: Option<&str>, + _headers: &[(&str, &str)], + ) -> anyhow::Result { + Err(anyhow::anyhow!("post not implemented in mock fetcher")) + } +} + +#[tokio::test] +async fn test_metadata_fetch_and_persist_romanceio() -> Result<()> { + let test_db = TestDb::new()?; + + // minimal config/context + let temp = tempfile::tempdir()?; + let rip = temp.path().join("rip"); + let lib = temp.path().join("library"); + std::fs::create_dir_all(&rip)?; + std::fs::create_dir_all(&lib)?; + let cfg = mock_config(rip, lib); + + let _default_timeout = StdDuration::from_secs(5); + let providers = cfg.metadata_providers.clone(); + // convert provider config to server metadata provider settings + let provider_settings: Vec = providers + .iter() + .map(|p| match p { + mlm::config::ProviderConfig::Hardcover(c) => { + mlm::metadata::ProviderSetting::Hardcover { + enabled: c.enabled, + timeout_secs: c.timeout_secs, + api_key: c.api_key.clone(), + } + } + mlm::config::ProviderConfig::RomanceIo(c) => { + mlm::metadata::ProviderSetting::RomanceIo { + enabled: c.enabled, + timeout_secs: c.timeout_secs, + } + } + mlm::config::ProviderConfig::OpenLibrary(c) => { + mlm::metadata::ProviderSetting::OpenLibrary { + enabled: c.enabled, + timeout_secs: c.timeout_secs, + } + } + }) + .collect(); + let metadata = + MetadataService::from_settings(&provider_settings, std::time::Duration::from_secs(5)); + let metadata = Arc::new(metadata); + + let ctx = Context { + config: Arc::new(tokio::sync::Mutex::new(Arc::new(cfg))), + db: test_db.db.clone(), + mam: Arc::new(Err(anyhow::anyhow!("no mam"))), + stats: mlm::stats::Stats::new(), + metadata: metadata.clone(), + triggers: mlm::stats::Triggers { + search_tx: std::collections::BTreeMap::new(), + import_tx: std::collections::BTreeMap::new(), + torrent_linker_tx: tokio::sync::watch::channel(()).0, + folder_linker_tx: tokio::sync::watch::channel(()).0, + downloader_tx: tokio::sync::watch::channel(()).0, + audiobookshelf_tx: tokio::sync::watch::channel(()).0, + }, + }; + + // Use a title known to the plan/romanceio mock. Inject the test fetcher + // implementation into the RomanceIo provider so we don't make network + // requests during tests. + // Replace the RomanceIo provider in the metadata service with one that + // uses the MockFetcher. + let mock_fetcher = std::sync::Arc::new(MockFetcher); + // Rebuild a metadata service with a RomanceIo using the mock fetcher. + let rom = mlm_meta::providers::RomanceIo::with_client(mock_fetcher.clone()); + let svc = mlm::metadata::MetadataService::new( + vec![(std::sync::Arc::new(rom), std::time::Duration::from_secs(5))], + std::time::Duration::from_secs(5), + ); + let metadata = Arc::new(svc); + + let ctx = Context { + metadata: metadata.clone(), + ..ctx + }; + + // Use a title known to the plan/romanceio mock + let mut q: MetadataQuery = Default::default(); + q.title = "Of Ink and Alchemy".to_string(); + let meta = metadata.fetch_and_persist(&ctx, q).await?; + + // Expect meta to contain some categories/tags + assert!( + meta.title.to_lowercase().contains("ink") + || !meta.categories.is_empty() + || !meta.tags.is_empty() + ); + + // Ensure an Event::Updated was inserted + let r = test_db.db.r_transaction()?; + let events = r.scan().secondary::(EventKey::created_at)?; + let events = events.all()?; + let mut found = false; + for ev in events { + let ev = ev?; + if let EventType::Updated { source, .. } = ev.event + && source.0 == mlm_db::MetadataSource::Match + && source.1 == "romanceio" + { + found = true; + break; + } + } + assert!(found, "Expected Event::Updated from romanceio provider"); + + Ok(()) +}