From 15f5d23db9f276a93637b36693a4d43c1b44dc4c Mon Sep 17 00:00:00 2001 From: Stirling Mouse <181794392+StirlingMouse@users.noreply.github.com> Date: Sat, 14 Feb 2026 09:48:35 +0100 Subject: [PATCH 1/4] Add hardcover and romance.io as metadata providers --- Cargo.lock | 1512 +++++++++++++++++++++++--- Cargo.toml | 3 +- mlm_db/src/lib.rs | 2 +- mlm_db/src/v03.rs | 4 +- mlm_db/src/v09.rs | 4 +- mlm_db/src/v13.rs | 4 +- mlm_db/src/v18.rs | 6 +- mlm_meta/Cargo.toml | 24 + mlm_meta/README.md | 14 + mlm_meta/src/helpers.rs | 171 +++ mlm_meta/src/http.rs | 54 + mlm_meta/src/lib.rs | 9 + mlm_meta/src/providers/fake.rs | 32 + mlm_meta/src/providers/hardcover.rs | 240 ++++ mlm_meta/src/providers/mod.rs | 138 +++ mlm_meta/src/providers/romanceio.rs | 247 +++++ mlm_meta/src/traits.rs | 15 + mlm_meta/tests/hardcover_tests.rs | 266 +++++ mlm_meta/tests/mock_fetcher.rs | 70 ++ mlm_meta/tests/provider_tests.rs | 32 + mlm_meta/tests/romanceio_tests.rs | 161 +++ mlm_meta/tests/scoring_tests.rs | 50 + server/Cargo.toml | 3 + server/src/autograbber.rs | 3 +- server/src/config.rs | 97 +- server/src/config_impl.rs | 1 + server/src/lib.rs | 1 + server/src/linker/common.rs | 87 +- server/src/linker/duplicates.rs | 61 +- server/src/linker/torrent.rs | 135 +-- server/src/main.rs | 27 + server/src/metadata/mam_meta.rs | 104 ++ server/src/metadata/mod.rs | 189 ++++ server/src/stats.rs | 8 + server/src/web/pages/torrent.rs | 49 +- server/src/web/tables.rs | 3 +- server/templates/pages/torrent.html | 9 + server/tests/cleaner_test.rs | 24 +- server/tests/common/mod.rs | 36 +- server/tests/linker_torrent_test.rs | 121 +-- server/tests/metadata_integration.rs | 181 +++ 41 files changed, 3786 insertions(+), 411 deletions(-) create mode 100644 mlm_meta/Cargo.toml create mode 100644 mlm_meta/README.md create mode 100644 mlm_meta/src/helpers.rs create mode 100644 mlm_meta/src/http.rs create mode 100644 mlm_meta/src/lib.rs create mode 100644 mlm_meta/src/providers/fake.rs create mode 100644 mlm_meta/src/providers/hardcover.rs create mode 100644 mlm_meta/src/providers/mod.rs create mode 100644 mlm_meta/src/providers/romanceio.rs create mode 100644 mlm_meta/src/traits.rs create mode 100644 mlm_meta/tests/hardcover_tests.rs create mode 100644 mlm_meta/tests/mock_fetcher.rs create mode 100644 mlm_meta/tests/provider_tests.rs create mode 100644 mlm_meta/tests/romanceio_tests.rs create mode 100644 mlm_meta/tests/scoring_tests.rs create mode 100644 server/src/metadata/mam_meta.rs create mode 100644 server/src/metadata/mod.rs create mode 100644 server/tests/metadata_integration.rs diff --git a/Cargo.lock b/Cargo.lock index 19cea592..c2662486 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -32,6 +32,15 @@ version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +[[package]] +name = "ascii-canvas" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8824ecca2e851cec16968d54a01dd372ef8f95b244fb84b84e70128be347c3c6" +dependencies = [ + "term", +] + [[package]] name = "askama" version = "0.14.0" @@ -39,7 +48,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f75363874b771be265f4ffe307ca705ef6f3baa19011c149da8674a87f1b75c4" dependencies = [ "askama_derive", - "itoa", + "itoa 1.0.15", "percent-encoding", "serde", "serde_json", @@ -60,7 +69,7 @@ dependencies = [ "rustc-hash", "serde", "serde_derive", - "syn", + "syn 2.0.104", ] [[package]] @@ -75,6 +84,209 @@ dependencies = [ "winnow", ] +[[package]] +name = "assert-json-diff" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "async-attributes" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3203e79f4dd9bdda415ed03cf14dae5a2bf775c683a00f94e9cd1faf0f596e5" +dependencies = [ + "quote", + "syn 1.0.109", +] + +[[package]] +name = "async-channel" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35" +dependencies = [ + "concurrent-queue", + "event-listener 2.5.3", + "futures-core", +] + +[[package]] +name = "async-channel" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "924ed96dd52d1b75e9c1a3e6275715fd320f5f9439fb5a4a11fa51f4221158d2" +dependencies = [ + "concurrent-queue", + "event-listener-strategy", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-compression" +version = "0.4.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68650b7df54f0293fd061972a0fb05aaf4fc0879d3b3d21a638a182c5c543b9f" +dependencies = [ + "compression-codecs", + "compression-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "async-executor" +version = "1.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "497c00e0fd83a72a79a39fcbd8e3e2f055d6f6c7e025f3b3d91f4f8e76527fb8" +dependencies = [ + "async-task", + "concurrent-queue", + "fastrand", + "futures-lite", + "pin-project-lite", + "slab", +] + +[[package]] +name = "async-global-executor" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05b1b633a2115cd122d73b955eadd9916c18c8f510ec9cd1686404c60ad1c29c" +dependencies = [ + "async-channel 2.5.0", + "async-executor", + "async-io", + "async-lock", + "blocking", + "futures-lite", + "once_cell", +] + +[[package]] +name = "async-io" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "456b8a8feb6f42d237746d4b3e9a178494627745c3c56c6ea55d92ba50d026fc" +dependencies = [ + "autocfg", + "cfg-if", + "concurrent-queue", + "futures-io", + "futures-lite", + "parking", + "polling", + "rustix", + "slab", + "windows-sys 0.61.2", +] + +[[package]] +name = "async-lock" +version = "3.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290f7f2596bd5b78a9fec8088ccd89180d7f9f55b94b0576823bbbdc72ee8311" +dependencies = [ + "event-listener 5.4.1", + "event-listener-strategy", + "pin-project-lite", +] + +[[package]] +name = "async-object-pool" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "333c456b97c3f2d50604e8b2624253b7f787208cb72eb75e64b0ad11b221652c" +dependencies = [ + "async-std", +] + +[[package]] +name = "async-process" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc50921ec0055cdd8a16de48773bfeec5c972598674347252c0399676be7da75" +dependencies = [ + "async-channel 2.5.0", + "async-io", + "async-lock", + "async-signal", + "async-task", + "blocking", + "cfg-if", + "event-listener 5.4.1", + "futures-lite", + "rustix", +] + +[[package]] +name = "async-signal" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43c070bbf59cd3570b6b2dd54cd772527c7c3620fce8be898406dd3ed6adc64c" +dependencies = [ + "async-io", + "async-lock", + "atomic-waker", + "cfg-if", + "futures-core", + "futures-io", + "rustix", + "signal-hook-registry", + "slab", + "windows-sys 0.61.2", +] + +[[package]] +name = "async-std" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c8e079a4ab67ae52b7403632e4618815d6db36d2a010cfe41b02c1b1578f93b" +dependencies = [ + "async-attributes", + "async-channel 1.9.0", + "async-global-executor", + "async-io", + "async-lock", + "async-process", + "crossbeam-utils", + "futures-channel", + "futures-core", + "futures-io", + "futures-lite", + "gloo-timers", + "kv-log-macro", + "log", + "memchr", + "once_cell", + "pin-project-lite", + "pin-utils", + "slab", + "wasm-bindgen-futures", +] + +[[package]] +name = "async-task" +version = "4.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" + +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.104", +] + [[package]] name = "atomic" version = "0.6.1" @@ -107,12 +319,12 @@ dependencies = [ "bytes", "form_urlencoded", "futures-util", - "http", - "http-body", + "http 1.3.1", + "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-util", - "itoa", + "itoa 1.0.15", "matchit", "memchr", "mime", @@ -123,7 +335,7 @@ dependencies = [ "serde_json", "serde_path_to_error", "serde_urlencoded", - "sync_wrapper", + "sync_wrapper 1.0.2", "tokio", "tower", "tower-layer", @@ -139,13 +351,13 @@ checksum = "68464cd0412f486726fb3373129ef5d2993f90c34bc2bc1c1e9943b2f4fc7ca6" dependencies = [ "bytes", "futures-core", - "http", - "http-body", + "http 1.3.1", + "http-body 1.0.1", "http-body-util", "mime", "pin-project-lite", "rustversion", - "sync_wrapper", + "sync_wrapper 1.0.2", "tower-layer", "tower-service", "tracing", @@ -162,8 +374,8 @@ dependencies = [ "bytes", "form_urlencoded", "futures-util", - "http", - "http-body", + "http 1.3.1", + "http-body 1.0.1", "http-body-util", "mime", "pin-project-lite", @@ -184,7 +396,7 @@ checksum = "604fde5e028fea851ce1d8570bbdc034bec850d157f7569d10f347d06808c05c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -202,12 +414,29 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + [[package]] name = "base64" version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "basic-cookies" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67bd8fd42c16bdb08688243dc5f0cc117a3ca9efeeaba3a345a18a6159ad96f7" +dependencies = [ + "lalrpop", + "lalrpop-util", + "regex", +] + [[package]] name = "basic-toml" version = "0.1.10" @@ -226,6 +455,21 @@ dependencies = [ "serde", ] +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bitflags" version = "1.3.2" @@ -253,6 +497,19 @@ dependencies = [ "generic-array", ] +[[package]] +name = "blocking" +version = "1.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e83f8d02be6967315521be875afa792a316e28d57b5a2d401897e2a7921b7f21" +dependencies = [ + "async-channel 2.5.0", + "async-task", + "futures-io", + "futures-lite", + "piper", +] + [[package]] name = "bumpalo" version = "3.18.1" @@ -359,6 +616,32 @@ dependencies = [ "objc", ] +[[package]] +name = "compression-codecs" +version = "0.4.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00828ba6fd27b45a448e57dbfe84f1029d4c9f26b368157e9a448a5f49a2ec2a" +dependencies = [ + "compression-core", + "flate2", + "memchr", +] + +[[package]] +name = "compression-core" +version = "0.4.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" + +[[package]] +name = "concurrent-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "conv" version = "0.3.3" @@ -368,6 +651,12 @@ dependencies = [ "custom_derive", ] +[[package]] +name = "convert_case" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" + [[package]] name = "cookie" version = "0.18.1" @@ -446,6 +735,15 @@ dependencies = [ "libc", ] +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + [[package]] name = "crossbeam-channel" version = "0.5.15" @@ -480,6 +778,12 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "crypto-common" version = "0.1.6" @@ -490,6 +794,23 @@ dependencies = [ "typenum", ] +[[package]] +name = "cssparser" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa 0.4.8", + "matches", + "phf 0.8.0", + "proc-macro2", + "quote", + "smallvec", + "syn 1.0.109", +] + [[package]] name = "cssparser" version = "0.34.0" @@ -498,8 +819,8 @@ checksum = "b7c66d1cd8ed61bf80b38432613a7a2f09401ab8d0501110655f8b341484a3e3" dependencies = [ "cssparser-macros", "dtoa-short", - "itoa", - "phf", + "itoa 1.0.15", + "phf 0.11.3", "smallvec", ] @@ -510,7 +831,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" dependencies = [ "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -540,7 +861,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn", + "syn 2.0.104", ] [[package]] @@ -551,7 +872,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -582,7 +903,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -592,7 +913,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn", + "syn 2.0.104", ] [[package]] @@ -601,9 +922,11 @@ version = "0.99.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6edb4b64a43d977b8e99788fe3a04d483834fba1215a7e02caa415b626497f7f" dependencies = [ + "convert_case", "proc-macro2", "quote", - "syn", + "rustc_version", + "syn 2.0.104", ] [[package]] @@ -625,6 +948,16 @@ dependencies = [ "dirs-sys", ] +[[package]] +name = "dirs-next" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" +dependencies = [ + "cfg-if", + "dirs-sys-next", +] + [[package]] name = "dirs-sys" version = "0.5.0" @@ -633,10 +966,21 @@ checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" dependencies = [ "libc", "option-ext", - "redox_users", + "redox_users 0.5.0", "windows-sys 0.60.2", ] +[[package]] +name = "dirs-sys-next" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +dependencies = [ + "libc", + "redox_users 0.4.6", + "winapi", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -645,7 +989,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -672,6 +1016,12 @@ dependencies = [ "dtoa", ] +[[package]] +name = "ego-tree" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12a0bb14ac04a9fcf170d0bbbef949b44cc492f4452bd20c095636956f653642" + [[package]] name = "ego-tree" version = "0.10.0" @@ -695,7 +1045,16 @@ dependencies = [ "rustc_version", "toml 0.9.2", "vswhom", - "winreg", + "winreg 0.55.0", +] + +[[package]] +name = "ena" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d248bdd43ce613d87415282f69b9bb99d947d290b10962dd6c56233312c2ad5" +dependencies = [ + "log", ] [[package]] @@ -732,6 +1091,33 @@ dependencies = [ "version_check", ] +[[package]] +name = "event-listener" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" + +[[package]] +name = "event-listener" +version = "5.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "event-listener-strategy" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93" +dependencies = [ + "event-listener 5.4.1", + "pin-project-lite", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -761,6 +1147,22 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "fnv" version = "1.0.7" @@ -794,7 +1196,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -876,6 +1278,19 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +[[package]] +name = "futures-lite" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f78e10609fe0e0b3f4157ffab1876319b5b0db102a2c60dc4626306dc46b44ad" +dependencies = [ + "fastrand", + "futures-core", + "futures-io", + "parking", + "pin-project-lite", +] + [[package]] name = "futures-macro" version = "0.3.31" @@ -884,7 +1299,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -945,6 +1360,17 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", +] + [[package]] name = "getrandom" version = "0.2.16" @@ -980,6 +1406,37 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" +[[package]] +name = "gloo-timers" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994" +dependencies = [ + "futures-channel", + "futures-core", + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "h2" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "h2" version = "0.4.10" @@ -991,7 +1448,7 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http", + "http 1.3.1", "indexmap", "slab", "tokio", @@ -1011,6 +1468,20 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" +[[package]] +name = "html5ever" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7" +dependencies = [ + "log", + "mac", + "markup5ever 0.11.0", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "html5ever" version = "0.29.1" @@ -1019,7 +1490,7 @@ checksum = "3b7410cae13cbc75623c98ac4cbfd1f0bedddf3227afc24f370cf0f50a44a11c" dependencies = [ "log", "mac", - "markup5ever", + "markup5ever 0.14.1", "match_token", ] @@ -1034,6 +1505,17 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa 1.0.15", +] + [[package]] name = "http" version = "1.3.1" @@ -1042,7 +1524,18 @@ checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" dependencies = [ "bytes", "fnv", - "itoa", + "itoa 1.0.15", +] + +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", ] [[package]] @@ -1052,7 +1545,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http", + "http 1.3.1", ] [[package]] @@ -1063,8 +1556,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http", - "http-body", + "http 1.3.1", + "http-body 1.0.1", "pin-project-lite", ] @@ -1086,6 +1579,58 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +[[package]] +name = "httpmock" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08ec9586ee0910472dec1a1f0f8acf52f0fdde93aea74d70d4a3107b4be0fd5b" +dependencies = [ + "assert-json-diff", + "async-object-pool", + "async-std", + "async-trait", + "base64 0.21.7", + "basic-cookies", + "crossbeam-utils", + "form_urlencoded", + "futures-util", + "hyper 0.14.32", + "lazy_static", + "levenshtein", + "log", + "regex", + "serde", + "serde_json", + "serde_regex", + "similar", + "tokio", + "url", +] + +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa 1.0.15", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + [[package]] name = "hyper" version = "1.6.0" @@ -1095,34 +1640,61 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "h2", - "http", - "http-body", + "h2 0.4.10", + "http 1.3.1", + "http-body 1.0.1", "httparse", "httpdate", - "itoa", + "itoa 1.0.15", "pin-project-lite", "smallvec", "tokio", "want", ] +[[package]] +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http 0.2.12", + "hyper 0.14.32", + "rustls 0.21.12", + "tokio", + "tokio-rustls 0.24.1", +] + [[package]] name = "hyper-rustls" version = "0.27.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ - "http", - "hyper", + "http 1.3.1", + "hyper 1.6.0", "hyper-util", - "rustls", + "rustls 0.23.28", "rustls-pki-types", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.2", "tower-service", ] +[[package]] +name = "hyper-tls" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +dependencies = [ + "bytes", + "hyper 0.14.32", + "native-tls", + "tokio", + "tokio-native-tls", +] + [[package]] name = "hyper-tls" version = "0.6.0" @@ -1131,7 +1703,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-util", "native-tls", "tokio", @@ -1145,20 +1717,20 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc2fdfdbff08affe55bb779f33b053aa1fe5dd5b54c257343c17edfa55711bdb" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "futures-channel", "futures-core", "futures-util", - "http", - "http-body", - "hyper", + "http 1.3.1", + "http-body 1.0.1", + "hyper 1.6.0", "ipnet", "libc", "percent-encoding", "pin-project-lite", "socket2 0.5.10", - "system-configuration", + "system-configuration 0.6.1", "tokio", "tower-service", "tracing", @@ -1349,6 +1921,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.14.0" @@ -1358,6 +1939,12 @@ dependencies = [ "either", ] +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + [[package]] name = "itoa" version = "1.0.15" @@ -1374,6 +1961,46 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "kv-log-macro" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de8b303297635ad57c9f5059fd9cee7a47f8e8daa09df0fcd07dd39fb22977f" +dependencies = [ + "log", +] + +[[package]] +name = "lalrpop" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cb077ad656299f160924eb2912aa147d7339ea7d69e1b5517326fdcec3c1ca" +dependencies = [ + "ascii-canvas", + "bit-set", + "ena", + "itertools 0.11.0", + "lalrpop-util", + "petgraph", + "pico-args", + "regex", + "regex-syntax 0.8.5", + "string_cache", + "term", + "tiny-keccak", + "unicode-xid", + "walkdir", +] + +[[package]] +name = "lalrpop-util" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507460a910eb7b32ee961886ff48539633b788a36b65692b95f225b844c82553" +dependencies = [ + "regex-automata 0.4.13", +] + [[package]] name = "lava_torrent" version = "0.11.1" @@ -1394,6 +2021,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +[[package]] +name = "levenshtein" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db13adb97ab515a3691f56e4dbab09283d0b86cb45abd991d8634a9d6f501760" + [[package]] name = "libc" version = "0.2.180" @@ -1443,6 +2076,9 @@ name = "log" version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +dependencies = [ + "value-bag", +] [[package]] name = "mac" @@ -1459,6 +2095,20 @@ dependencies = [ "libc", ] +[[package]] +name = "markup5ever" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016" +dependencies = [ + "log", + "phf 0.10.1", + "phf_codegen 0.10.0", + "string_cache", + "string_cache_codegen", + "tendril", +] + [[package]] name = "markup5ever" version = "0.14.1" @@ -1466,8 +2116,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7a7213d12e1864c0f002f52c2923d4556935a43dec5e71355c2760e0f6e7a18" dependencies = [ "log", - "phf", - "phf_codegen", + "phf 0.11.3", + "phf_codegen 0.11.3", "string_cache", "string_cache_codegen", "tendril", @@ -1481,7 +2131,7 @@ checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -1493,6 +2143,12 @@ dependencies = [ "regex-automata 0.1.10", ] +[[package]] +name = "matches" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" + [[package]] name = "matchit" version = "0.8.4" @@ -1534,6 +2190,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", + "simd-adler32", ] [[package]] @@ -1553,6 +2210,7 @@ version = "0.4.6" dependencies = [ "anyhow", "askama", + "async-trait", "axum", "axum-extra", "bytes", @@ -1569,6 +2227,7 @@ dependencies = [ "matchr", "mlm_db", "mlm_mam", + "mlm_meta", "mlm_parse", "native_db", "native_model", @@ -1578,10 +2237,10 @@ dependencies = [ "qbit", "quick-xml", "regex", - "reqwest", + "reqwest 0.12.24", "reqwest_cookie_store", "sanitize-filename", - "scraper", + "scraper 0.23.1", "serde", "serde-nested-json", "serde_derive", @@ -1602,6 +2261,7 @@ dependencies = [ "tracing-subscriber", "tray-item", "unidecode", + "url", "urlencoding", "uuid", "winsafe", @@ -1642,7 +2302,7 @@ dependencies = [ "native_model", "once_cell", "openssl", - "reqwest", + "reqwest 0.12.24", "reqwest_cookie_store", "serde", "serde-nested-json", @@ -1654,6 +2314,27 @@ dependencies = [ "tracing", ] +[[package]] +name = "mlm_meta" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "httpmock", + "mlm_db", + "mlm_parse", + "openssl", + "reqwest 0.11.27", + "scraper 0.14.0", + "serde", + "serde_json", + "strsim", + "tokio", + "tracing", + "url", + "urlencoding", +] + [[package]] name = "mlm_parse" version = "0.1.0" @@ -1704,7 +2385,7 @@ source = "git+https://github.com/StirlingMouse/native_db.git?branch=0.8.x#cddaaf dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -1730,7 +2411,7 @@ checksum = "2f385f3d57adaea8d8868e65a0bc821bcb8ba2228bbf87a1c3c6144ac48f3791" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -1739,6 +2420,12 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" +[[package]] +name = "nodrop" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" + [[package]] name = "nom" version = "8.0.0" @@ -1861,7 +2548,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -1910,6 +2597,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c10569378a1dacd9f30dbe7ae49e054d2c45dc2f8ee49899903e09c3924e8b6f" +[[package]] +name = "parking" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" + [[package]] name = "parking_lot" version = "0.12.4" @@ -1959,7 +2652,7 @@ dependencies = [ "proc-macro2", "proc-macro2-diagnostics", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -1968,14 +2661,64 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset", + "indexmap", +] + +[[package]] +name = "phf" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" +dependencies = [ + "phf_macros 0.8.0", + "phf_shared 0.8.0", + "proc-macro-hack", +] + +[[package]] +name = "phf" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" +dependencies = [ + "phf_shared 0.10.0", +] + [[package]] name = "phf" version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" dependencies = [ - "phf_macros", - "phf_shared", + "phf_macros 0.11.3", + "phf_shared 0.11.3", +] + +[[package]] +name = "phf_codegen" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" +dependencies = [ + "phf_generator 0.8.0", + "phf_shared 0.8.0", +] + +[[package]] +name = "phf_codegen" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" +dependencies = [ + "phf_generator 0.10.0", + "phf_shared 0.10.0", ] [[package]] @@ -1984,8 +2727,28 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" dependencies = [ - "phf_generator", - "phf_shared", + "phf_generator 0.11.3", + "phf_shared 0.11.3", +] + +[[package]] +name = "phf_generator" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" +dependencies = [ + "phf_shared 0.8.0", + "rand 0.7.3", +] + +[[package]] +name = "phf_generator" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" +dependencies = [ + "phf_shared 0.10.0", + "rand 0.8.5", ] [[package]] @@ -1994,8 +2757,22 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" dependencies = [ - "phf_shared", - "rand", + "phf_shared 0.11.3", + "rand 0.8.5", +] + +[[package]] +name = "phf_macros" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c" +dependencies = [ + "phf_generator 0.8.0", + "phf_shared 0.8.0", + "proc-macro-hack", + "proc-macro2", + "quote", + "syn 1.0.109", ] [[package]] @@ -2004,11 +2781,29 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216" dependencies = [ - "phf_generator", - "phf_shared", + "phf_generator 0.11.3", + "phf_shared 0.11.3", "proc-macro2", "quote", - "syn", + "syn 2.0.104", +] + +[[package]] +name = "phf_shared" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" +dependencies = [ + "siphasher 0.3.11", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher 0.3.11", ] [[package]] @@ -2017,9 +2812,15 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" dependencies = [ - "siphasher", + "siphasher 1.0.1", ] +[[package]] +name = "pico-args" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315" + [[package]] name = "pin-project-lite" version = "0.2.16" @@ -2032,12 +2833,37 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "piper" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96c8c490f422ef9a4efd2cb5b42b76c8613d7e7dfc1caf667b8a3350a5acc066" +dependencies = [ + "atomic-waker", + "fastrand", + "futures-io", +] + [[package]] name = "pkg-config" version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "polling" +version = "3.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d0e4f59085d47d8241c88ead0f274e8a0cb551f3625263c05eb8dd897c34218" +dependencies = [ + "cfg-if", + "concurrent-queue", + "hermit-abi", + "pin-project-lite", + "rustix", + "windows-sys 0.61.2", +] + [[package]] name = "potential_utf" version = "0.1.2" @@ -2053,12 +2879,27 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + [[package]] name = "precomputed-hash" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" +[[package]] +name = "proc-macro-hack" +version = "0.5.20+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" + [[package]] name = "proc-macro2" version = "1.0.95" @@ -2076,7 +2917,7 @@ checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", "version_check", "yansi", ] @@ -2126,7 +2967,7 @@ source = "git+https://github.com/StirlingMouse/qbittorrent-webui-api.git#ce47d16 dependencies = [ "bytes", "derive_builder", - "reqwest", + "reqwest 0.12.24", "serde", "serde_json", "serde_repr", @@ -2160,19 +3001,85 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" [[package]] -name = "rand" -version = "0.8.5" +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc", + "rand_pcg", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom 0.1.16", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.16", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" dependencies = [ - "rand_core", + "rand_core 0.5.1", ] [[package]] -name = "rand_core" -version = "0.6.4" +name = "rand_pcg" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" +dependencies = [ + "rand_core 0.5.1", +] [[package]] name = "rayon" @@ -2221,6 +3128,17 @@ dependencies = [ "bitflags 2.9.1", ] +[[package]] +name = "redox_users" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" +dependencies = [ + "getrandom 0.2.16", + "libredox", + "thiserror 1.0.69", +] + [[package]] name = "redox_users" version = "0.5.0" @@ -2276,26 +3194,72 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "reqwest" +version = "0.11.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" +dependencies = [ + "async-compression", + "base64 0.21.7", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper-rustls 0.24.2", + "hyper-tls 0.5.0", + "ipnet", + "js-sys", + "log", + "mime", + "native-tls", + "once_cell", + "percent-encoding", + "pin-project-lite", + "rustls 0.21.12", + "rustls-pemfile", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper 0.1.2", + "system-configuration 0.5.1", + "tokio", + "tokio-native-tls", + "tokio-rustls 0.24.1", + "tokio-util", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "webpki-roots", + "winreg 0.50.0", +] + [[package]] name = "reqwest" version = "0.12.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "cookie", "cookie_store", "encoding_rs", "futures-core", "futures-util", - "h2", - "http", - "http-body", + "h2 0.4.10", + "http 1.3.1", + "http-body 1.0.1", "http-body-util", - "hyper", - "hyper-rustls", - "hyper-tls", + "hyper 1.6.0", + "hyper-rustls 0.27.7", + "hyper-tls 0.6.0", "hyper-util", "js-sys", "log", @@ -2308,7 +3272,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", - "sync_wrapper", + "sync_wrapper 1.0.2", "tokio", "tokio-native-tls", "tower", @@ -2328,7 +3292,7 @@ checksum = "a0b36498c7452f11b1833900f31fbb01fc46be20992a50269c88cf59d79f54e9" dependencies = [ "bytes", "cookie_store", - "reqwest", + "reqwest 0.12.24", "url", ] @@ -2380,6 +3344,18 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "rustls" +version = "0.21.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +dependencies = [ + "log", + "ring", + "rustls-webpki 0.101.7", + "sct", +] + [[package]] name = "rustls" version = "0.23.28" @@ -2388,11 +3364,20 @@ checksum = "7160e3e10bf4535308537f3c4e1641468cd0e485175d6163087c0393c7d46643" dependencies = [ "once_cell", "rustls-pki-types", - "rustls-webpki", + "rustls-webpki 0.103.3", "subtle", "zeroize", ] +[[package]] +name = "rustls-pemfile" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" +dependencies = [ + "base64 0.21.7", +] + [[package]] name = "rustls-pki-types" version = "1.12.0" @@ -2402,6 +3387,16 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "rustls-webpki" version = "0.103.3" @@ -2454,21 +3449,47 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scraper" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7cb4dae083699a22a65aa9d2699c27f525e35dffaec38b10801e958ed4cf27" +dependencies = [ + "cssparser 0.27.2", + "ego-tree 0.6.3", + "getopts", + "html5ever 0.26.0", + "matches", + "selectors 0.22.0", + "smallvec", + "tendril", +] + [[package]] name = "scraper" version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "527e65d9d888567588db4c12da1087598d0f6f8b346cc2c5abc91f05fc2dffe2" dependencies = [ - "cssparser", - "ego-tree", + "cssparser 0.34.0", + "ego-tree 0.10.0", "getopts", - "html5ever", + "html5ever 0.29.1", "precomputed-hash", - "selectors", + "selectors 0.26.0", "tendril", ] +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "security-framework" version = "2.11.1" @@ -2492,6 +3513,26 @@ dependencies = [ "libc", ] +[[package]] +name = "selectors" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df320f1889ac4ba6bc0cdc9c9af7af4bd64bb927bccdf32d81140dc1f9be12fe" +dependencies = [ + "bitflags 1.3.2", + "cssparser 0.27.2", + "derive_more", + "fxhash", + "log", + "matches", + "phf 0.8.0", + "phf_codegen 0.8.0", + "precomputed-hash", + "servo_arc 0.1.1", + "smallvec", + "thin-slice", +] + [[package]] name = "selectors" version = "0.26.0" @@ -2499,15 +3540,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8" dependencies = [ "bitflags 2.9.1", - "cssparser", + "cssparser 0.34.0", "derive_more", "fxhash", "log", "new_debug_unreachable", - "phf", - "phf_codegen", + "phf 0.11.3", + "phf_codegen 0.11.3", "precomputed-hash", - "servo_arc", + "servo_arc 0.4.1", "smallvec", ] @@ -2557,7 +3598,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -2568,7 +3609,7 @@ checksum = "9d2de91cf02bbc07cde38891769ccd5d4f073d22a40683aa4bc7a95781aaa2c4" dependencies = [ "form_urlencoded", "indexmap", - "itoa", + "itoa 1.0.15", "ryu", "serde", ] @@ -2579,7 +3620,7 @@ version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" dependencies = [ - "itoa", + "itoa 1.0.15", "memchr", "ryu", "serde", @@ -2592,7 +3633,17 @@ version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59fab13f937fa393d08645bf3a84bdfe86e296747b506ada67bb15f10f218b2a" dependencies = [ - "itoa", + "itoa 1.0.15", + "serde", +] + +[[package]] +name = "serde_regex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8136f1a4ea815d7eac4101cfd0b16dc0cb5e1fe1b8609dfd728058656b7badf" +dependencies = [ + "regex", "serde", ] @@ -2604,7 +3655,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -2632,11 +3683,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" dependencies = [ "form_urlencoded", - "itoa", + "itoa 1.0.15", "ryu", "serde", ] +[[package]] +name = "servo_arc" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98238b800e0d1576d8b6e3de32827c2d74bee68bb97748dcf5071fb53965432" +dependencies = [ + "nodrop", + "stable_deref_trait", +] + [[package]] name = "servo_arc" version = "0.4.1" @@ -2681,6 +3742,24 @@ dependencies = [ "libc", ] +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + +[[package]] +name = "similar" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" + +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + [[package]] name = "siphasher" version = "1.0.1" @@ -2748,7 +3827,7 @@ checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f" dependencies = [ "new_debug_unreachable", "parking_lot", - "phf_shared", + "phf_shared 0.11.3", "precomputed-hash", "serde", ] @@ -2759,8 +3838,8 @@ version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0" dependencies = [ - "phf_generator", - "phf_shared", + "phf_generator 0.11.3", + "phf_shared 0.11.3", "proc-macro2", "quote", ] @@ -2783,6 +3862,17 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.104" @@ -2794,6 +3884,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sync_wrapper" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" + [[package]] name = "sync_wrapper" version = "1.0.2" @@ -2811,7 +3907,18 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", +] + +[[package]] +name = "system-configuration" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "system-configuration-sys 0.5.0", ] [[package]] @@ -2822,7 +3929,17 @@ checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ "bitflags 2.9.1", "core-foundation", - "system-configuration-sys", + "system-configuration-sys 0.6.0", +] + +[[package]] +name = "system-configuration-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +dependencies = [ + "core-foundation-sys", + "libc", ] [[package]] @@ -2859,6 +3976,23 @@ dependencies = [ "utf-8", ] +[[package]] +name = "term" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" +dependencies = [ + "dirs-next", + "rustversion", + "winapi", +] + +[[package]] +name = "thin-slice" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c" + [[package]] name = "thiserror" version = "1.0.69" @@ -2885,7 +4019,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -2896,7 +4030,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -2915,7 +4049,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" dependencies = [ "deranged", - "itoa", + "itoa 1.0.15", "libc", "num-conv", "num_threads", @@ -2941,6 +4075,15 @@ dependencies = [ "time-core", ] +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "tinystr" version = "0.8.1" @@ -2979,7 +4122,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -2992,13 +4135,23 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.12", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" dependencies = [ - "rustls", + "rustls 0.23.28", "tokio", ] @@ -3116,7 +4269,7 @@ dependencies = [ "futures-core", "futures-util", "pin-project-lite", - "sync_wrapper", + "sync_wrapper 1.0.2", "tokio", "tower-layer", "tower-service", @@ -3133,8 +4286,8 @@ dependencies = [ "bytes", "futures-core", "futures-util", - "http", - "http-body", + "http 1.3.1", + "http-body 1.0.1", "http-body-util", "http-range-header", "httpdate", @@ -3195,7 +4348,7 @@ checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -3303,6 +4456,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + [[package]] name = "unidecode" version = "0.3.0" @@ -3363,6 +4522,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" +[[package]] +name = "value-bag" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ba6f5989077681266825251a52748b8c1d8a4ad098cc37e440103d0ea717fc0" + [[package]] name = "vcpkg" version = "0.2.15" @@ -3414,6 +4579,12 @@ dependencies = [ "try-lock", ] +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" @@ -3451,7 +4622,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn", + "syn 2.0.104", "wasm-bindgen-shared", ] @@ -3486,7 +4657,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3510,6 +4681,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki-roots" +version = "0.25.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1" + [[package]] name = "winapi" version = "0.3.9" @@ -3547,13 +4724,19 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + [[package]] name = "windows-registry" version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3bab093bdd303a1240bb99b8aba8ea8a69ee19d34c9e2ef9594e708a4878820" dependencies = [ - "windows-link", + "windows-link 0.1.3", "windows-result", "windows-strings", ] @@ -3564,7 +4747,7 @@ version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" dependencies = [ - "windows-link", + "windows-link 0.1.3", ] [[package]] @@ -3573,7 +4756,16 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" dependencies = [ - "windows-link", + "windows-link 0.1.3", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", ] [[package]] @@ -3603,6 +4795,30 @@ dependencies = [ "windows-targets 0.53.2", ] +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link 0.2.1", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + [[package]] name = "windows-targets" version = "0.52.6" @@ -3635,6 +4851,12 @@ dependencies = [ "windows_x86_64_msvc 0.53.0", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -3647,6 +4869,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" @@ -3659,6 +4887,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -3683,6 +4917,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_i686_msvc" version = "0.52.6" @@ -3695,6 +4935,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" @@ -3707,6 +4953,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" @@ -3719,6 +4971,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" @@ -3740,6 +4998,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "winreg" +version = "0.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + [[package]] name = "winreg" version = "0.55.0" @@ -3797,7 +5065,7 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", "synstructure", ] @@ -3818,7 +5086,7 @@ checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] [[package]] @@ -3838,7 +5106,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", "synstructure", ] @@ -3878,5 +5146,5 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.104", ] diff --git a/Cargo.toml b/Cargo.toml index d0cbb5c4..bf5b68df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,4 +1,3 @@ [workspace] resolver = "3" -members = ["server", "mlm_db", "mlm_parse", "mlm_mam"] - +members = ["server", "mlm_db", "mlm_parse", "mlm_mam", "mlm_meta"] diff --git a/mlm_db/src/lib.rs b/mlm_db/src/lib.rs index ff16f855..b1094cc7 100644 --- a/mlm_db/src/lib.rs +++ b/mlm_db/src/lib.rs @@ -22,8 +22,8 @@ use std::collections::HashMap; use anyhow::Result; use mlm_parse::normalize_title; -use native_db::Models; pub use native_db::Database; +use native_db::Models; use native_db::transaction::RwTransaction; use native_db::{ToInput, db_type}; use once_cell::sync::Lazy; diff --git a/mlm_db/src/v03.rs b/mlm_db/src/v03.rs index 06828eb0..42b67abf 100644 --- a/mlm_db/src/v03.rs +++ b/mlm_db/src/v03.rs @@ -1,6 +1,6 @@ use super::{v01, v02, v04, v05, v06}; -use native_db::{native_db, Key, ToKey}; -use native_model::{native_model, Model}; +use native_db::{Key, ToKey, native_db}; +use native_model::{Model, native_model}; use serde::{Deserialize, Serialize}; use std::path::PathBuf; use time::{OffsetDateTime, UtcDateTime}; diff --git a/mlm_db/src/v09.rs b/mlm_db/src/v09.rs index 65a6a9d9..0241f41d 100644 --- a/mlm_db/src/v09.rs +++ b/mlm_db/src/v09.rs @@ -1,6 +1,6 @@ use super::{v01, v03, v04, v06, v08, v10}; -use native_db::{native_db, ToKey}; -use native_model::{native_model, Model}; +use native_db::{ToKey, native_db}; +use native_model::{Model, native_model}; use serde::{Deserialize, Serialize}; use std::path::PathBuf; use tracing::warn; diff --git a/mlm_db/src/v13.rs b/mlm_db/src/v13.rs index 4d20a4ff..bc46f4d6 100644 --- a/mlm_db/src/v13.rs +++ b/mlm_db/src/v13.rs @@ -1,6 +1,6 @@ use super::{v03, v04, v06, v08, v09, v10, v11, v12, v14}; -use native_db::{native_db, ToKey}; -use native_model::{native_model, Model}; +use native_db::{ToKey, native_db}; +use native_model::{Model, native_model}; use serde::{Deserialize, Serialize}; use std::path::PathBuf; diff --git a/mlm_db/src/v18.rs b/mlm_db/src/v18.rs index 66ab5abf..5ceb8d05 100644 --- a/mlm_db/src/v18.rs +++ b/mlm_db/src/v18.rs @@ -1,9 +1,9 @@ use crate::ids; use super::{v01, v03, v04, v05, v06, v08, v09, v10, v11, v12, v13, v15, v16, v17}; -use mlm_parse::{normalize_title, parse_edition}; -use native_db::{native_db, ToKey}; -use native_model::{native_model, Model}; +use mlm_parse::normalize_title; +use native_db::{ToKey, native_db}; +use native_model::{Model, native_model}; use serde::{Deserialize, Serialize}; use std::{collections::BTreeMap, path::PathBuf}; diff --git a/mlm_meta/Cargo.toml b/mlm_meta/Cargo.toml new file mode 100644 index 00000000..7f72f54a --- /dev/null +++ b/mlm_meta/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "mlm_meta" +version = "0.1.0" +edition = "2024" + +[dependencies] +anyhow = "1.0" +async-trait = "0.1" +openssl = { version = "0.10.73", features = ["vendored"] } +serde = { version = "1.0", features = ["derive"] } +reqwest = { version = "0.11", features = ["json", "gzip", "rustls-tls"] } +tokio = { version = "1", features = ["rt-multi-thread", "macros"] } +serde_json = "1.0" +scraper = "0.14" +mlm_db = { path = "../mlm_db" } +mlm_parse = { path = "../mlm_parse" } +strsim = "0.11" +tracing = "0.1" + +urlencoding = "2.1" +url = "2.4" + +[dev-dependencies] +httpmock = "0.7" diff --git a/mlm_meta/README.md b/mlm_meta/README.md new file mode 100644 index 00000000..fe4d92fd --- /dev/null +++ b/mlm_meta/README.md @@ -0,0 +1,14 @@ +mlm_meta +======== + +Small crate defining the Provider trait and helper types for external +metadata providers (Goodreads, Hardcover, ...). + +Purpose +- Provide a stable trait so server can query multiple providers and map + results into existing `TorrentMeta`. + +How to add a provider +- Implement `mlm_meta::Provider` and return `TorrentMeta` from `fetch`. +- Register the provider in server's `MetadataService` and map fields into + `TorrentMeta` before persisting. diff --git a/mlm_meta/src/helpers.rs b/mlm_meta/src/helpers.rs new file mode 100644 index 00000000..0152f7c4 --- /dev/null +++ b/mlm_meta/src/helpers.rs @@ -0,0 +1,171 @@ +use mlm_parse::{clean_name, normalize_title}; + +pub use anyhow; +pub use tracing::{Level, debug, enabled, trace}; + +/// Search query with optional author. Providers can decide how to use these fields. +#[derive(Debug, Clone)] +pub struct SearchQuery { + pub title: String, + pub author: Option, +} + +impl SearchQuery { + pub fn new(title: String, author: Option) -> Self { + Self { title, author } + } + + /// Build a combined search string for providers that use a single query string. + pub fn to_combined_string(&self) -> String { + match &self.author { + Some(author) if !self.title.is_empty() && !author.is_empty() => { + format!("{} {}", self.title, author) + } + _ if !self.title.is_empty() => self.title.clone(), + _ => String::new(), + } + } +} + +/// Build SearchQuery with author included +pub fn query_with_author(title: &str, authors: &[String]) -> SearchQuery { + let author = authors + .iter() + .map(|a| a.trim()) + .find(|a| !a.is_empty()) + .map(|a| a.to_string()); + SearchQuery::new(title.to_string(), author) +} + +/// Build SearchQuery without author (title-only search) +pub fn query_title_only(title: &str) -> SearchQuery { + SearchQuery::new(title.to_string(), None) +} + +/// Normalized string similarity 0.0..1.0 +pub fn token_similarity(a: &str, b: &str) -> f64 { + strsim::normalized_levenshtein(a, b) +} + +/// Normalize author names (clean and lowercase) +pub fn normalize_authors(auths: &[String]) -> Vec { + auths + .iter() + .map(|a| { + let mut s = a.clone(); + let _ = clean_name(&mut s); + s.to_lowercase() + }) + .collect() +} + +/// Score a candidate by title and author similarity. Candidate title and +/// candidate authors are provided directly as strings (the caller extracts +/// them from JSON). The query title/authors are the original query values. +pub fn score_candidate( + cand_title: Option<&str>, + cand_auths: &[String], + q_title: &Option, + q_auths: &[String], +) -> f64 { + let q_title_norm = q_title.as_ref().map(|t| normalize_title(t)); + + let mut title_score = 0.0f64; + let mut title_exact = false; + if let Some(qt_norm) = q_title_norm.as_ref() + && let Some(ct) = cand_title + { + let cand = normalize_title(ct); + if cand == *qt_norm { + title_score = 1.0; + title_exact = true; + } else if cand.contains(qt_norm.as_str()) || qt_norm.contains(cand.as_str()) { + title_score = 0.9; + } else { + title_score = token_similarity(&cand, qt_norm); + } + } + + let mut author_score = 0.0f64; + let mut authors_match = false; + if !q_auths.is_empty() { + let q_auths_norm = normalize_authors(q_auths); + let mut best = 0.0f64; + for a in cand_auths { + let mut n = a.clone(); + let _ = clean_name(&mut n); + let n = n.to_lowercase(); + for qa in &q_auths_norm { + if n.contains(qa) || qa.contains(&n) { + best = best.max(1.0); + authors_match = true; + } else { + best = best.max(token_similarity(&n, qa)); + } + } + } + author_score = best; + } + + // Penalize heavily if title is not exact AND no author match + // This prevents "Not the Boss of the Year" from matching "Boss of the Year" + // when authors don't match + if !title_exact && !authors_match && q_title_norm.is_some() && !q_auths.is_empty() { + return 0.0; + } + + if q_title_norm.is_some() && !q_auths.is_empty() { + 0.7 * title_score + 0.3 * author_score + } else if q_title_norm.is_some() { + title_score + } else { + author_score + } +} + +#[cfg(test)] +mod tests { + use super::*; + use mlm_parse::normalize_title; + + #[test] + fn test_token_similarity() { + assert!(token_similarity("great adventure", "great adventure") > 0.999); + assert!(token_similarity("great adventure", "great adventures") > 0.8); + assert!(token_similarity("great adventure", "completely different") < 0.3); + } + + #[test] + fn test_score_candidate_title_pref() { + let q_title = Some(normalize_title("The Great Adventure")); + let q_auths: Vec = vec![]; + + let cand_exact_title = Some("The Great Adventure"); + let cand_sim_title = Some("Great Adventure"); + let cand_auths_exact: Vec = vec!["Alice".to_string()]; + let cand_auths_sim: Vec = vec!["Bob Smith".to_string()]; + + let s_exact = score_candidate(cand_exact_title, &cand_auths_exact, &q_title, &q_auths); + let s_sim = score_candidate(cand_sim_title, &cand_auths_sim, &q_title, &q_auths); + assert!(s_exact >= s_sim, "expected exact title to score >= similar"); + } + + #[test] + fn test_score_candidate_author_influence() { + let q_title = Some(normalize_title("Great Adventure")); + let q_auths: Vec = vec!["bob smith".to_string()]; + + let cand_title_only = Some("Great Adventure"); + let cand_both = Some("Great Adventur"); + let cand_auths_title_only: Vec = vec!["Alice".to_string()]; + let cand_auths_both: Vec = vec!["Bob Smith".to_string()]; + + let s_title_only = + score_candidate(cand_title_only, &cand_auths_title_only, &q_title, &q_auths); + let s_both = score_candidate(cand_both, &cand_auths_both, &q_title, &q_auths); + assert!( + s_both > s_title_only, + "expected candidate with matching author to score higher" + ); + } +} diff --git a/mlm_meta/src/http.rs b/mlm_meta/src/http.rs new file mode 100644 index 00000000..c511e512 --- /dev/null +++ b/mlm_meta/src/http.rs @@ -0,0 +1,54 @@ +use anyhow::Result; +use async_trait::async_trait; +use reqwest::Client; + +#[async_trait] +pub trait HttpClient: Send + Sync { + async fn get(&self, url: &str) -> Result; + + async fn post(&self, url: &str, body: Option<&str>, headers: &[(&str, &str)]) + -> Result; +} + +pub struct ReqwestClient { + client: Client, +} + +impl ReqwestClient { + pub fn new() -> Self { + Self { + client: Client::new(), + } + } +} + +impl Default for ReqwestClient { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl HttpClient for ReqwestClient { + async fn get(&self, url: &str) -> Result { + let res = self.client.get(url).send().await?.text().await?; + Ok(res) + } + + async fn post( + &self, + url: &str, + body: Option<&str>, + headers: &[(&str, &str)], + ) -> Result { + let mut req = self.client.post(url); + for (k, v) in headers { + req = req.header(*k, *v); + } + if let Some(b) = body { + req = req.body(b.to_string()); + } + let res = req.send().await?.text().await?; + Ok(res) + } +} diff --git a/mlm_meta/src/lib.rs b/mlm_meta/src/lib.rs new file mode 100644 index 00000000..bc4d80ef --- /dev/null +++ b/mlm_meta/src/lib.rs @@ -0,0 +1,9 @@ +pub mod helpers; +pub mod http; +pub mod providers; +pub mod traits; + +pub use helpers::*; +pub use http::*; +pub use providers::*; +pub use traits::*; diff --git a/mlm_meta/src/providers/fake.rs b/mlm_meta/src/providers/fake.rs new file mode 100644 index 00000000..e05291cb --- /dev/null +++ b/mlm_meta/src/providers/fake.rs @@ -0,0 +1,32 @@ +use crate::traits::Provider; +use anyhow::Result; +use async_trait::async_trait; +use mlm_db::TorrentMeta; + +pub struct FakeProvider { + pub id_str: String, + pub meta: Option, +} + +impl FakeProvider { + pub fn new(id: &str, meta: Option) -> Self { + Self { + id_str: id.to_string(), + meta, + } + } +} + +#[async_trait] +impl Provider for FakeProvider { + fn id(&self) -> &str { + &self.id_str + } + + async fn fetch(&self, _query: &TorrentMeta) -> Result { + match &self.meta { + Some(m) => Ok(m.clone()), + None => Err(anyhow::anyhow!("not found")), + } + } +} diff --git a/mlm_meta/src/providers/hardcover.rs b/mlm_meta/src/providers/hardcover.rs new file mode 100644 index 00000000..b02a0c50 --- /dev/null +++ b/mlm_meta/src/providers/hardcover.rs @@ -0,0 +1,240 @@ +use anyhow::{Context, Result}; +use async_trait::async_trait; +use tracing::{debug, instrument}; + +use crate::providers::{MetadataProvider, search_with_fallback}; +use crate::traits::Provider; +use crate::{helpers, http::HttpClient}; +use mlm_db::TorrentMeta; +use mlm_parse::parse_edition; + +use std::sync::Arc; + +const DEFAULT_ENDPOINT: &str = "https://api.hardcover.app/v1/graphql"; + +pub struct Hardcover { + endpoint: String, + client: Arc, + api_key: Option, +} + +impl Hardcover { + pub fn new(api_key: Option) -> Self { + Self { + endpoint: DEFAULT_ENDPOINT.to_string(), + client: Arc::new(crate::http::ReqwestClient::new()), + api_key, + } + } + + pub fn with_client( + endpoint: &str, + client: Arc, + api_key: Option, + ) -> Self { + Self { + endpoint: endpoint.to_string(), + client, + api_key, + } + } + + #[instrument(skip_all, fields(query = %query))] + async fn post_graphql( + &self, + query: &str, + variables: serde_json::Value, + ) -> Result { + let body_v = serde_json::json!({ "query": query, "variables": variables }); + let body = serde_json::to_string(&body_v)?; + debug!(url = %self.endpoint, "posting GraphQL request"); + + let headers = if let Some(ref key) = self.api_key { + vec![ + ("content-type", "application/json"), + ("authorization", key.as_str()), + ] + } else { + vec![("content-type", "application/json")] + }; + + let s = self + .client + .post(&self.endpoint, Some(&body), &headers) + .await + .context("post graphql")?; + let v: serde_json::Value = serde_json::from_str(&s).context("parse graphql json")?; + Ok(v) + } + + fn parse_results(&self, v: &serde_json::Value) -> Vec { + let hits = v + .get("data") + .and_then(|d| d.get("search")) + .and_then(|s| s.get("results")) + .and_then(|r| r.get("hits")) + .and_then(|h| h.as_array()) + .cloned() + .unwrap_or_default(); + + hits.iter() + .filter_map(|hit| hit.get("document").cloned()) + .collect() + } +} + +impl Default for Hardcover { + fn default() -> Self { + Self::new(None) + } +} + +impl MetadataProvider for Hardcover { + type SearchResult = serde_json::Value; + + fn id(&self) -> &str { + "hardcover" + } + + async fn search(&self, query: &helpers::SearchQuery) -> Result> { + let gql = r#" + query Search($q: String!, $type: String!, $per_page: Int, $page: Int) { + search(query: $q, query_type: $type, per_page: $per_page, page: $page) { + results + } + } + "#; + + let qstr = query.to_combined_string(); + let vars = serde_json::json!({"q": qstr, "type": "Book", "per_page": 10, "page": 1}); + debug!(query = %qstr, "searching hardcover"); + let v = self.post_graphql(gql, vars).await?; + let results = self.parse_results(&v); + debug!(count = results.len(), "hardcover search results"); + Ok(results) + } + + fn result_title<'a>(&self, result: &'a Self::SearchResult) -> Option<&'a str> { + result.get("title")?.as_str() + } + + fn result_authors(&self, result: &Self::SearchResult) -> Vec { + result + .get("author_names") + .and_then(|a| a.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|v| v.as_str().map(|s| s.to_string())) + .collect() + }) + .unwrap_or_default() + } + + async fn result_to_meta(&self, result: &Self::SearchResult) -> Result { + let title = result + .get("title") + .and_then(|t| t.as_str()) + .unwrap_or("") + .to_string(); + let authors: Vec = self.result_authors(result); + let description = result + .get("description") + .and_then(|d| d.as_str()) + .map(|s| s.to_string()); + + let mut tm = TorrentMeta { + title: title.clone(), + description: description.clone().unwrap_or_default(), + authors: authors.clone(), + ..Default::default() + }; + + // tags/genres + let mut tags = Vec::new(); + if let Some(tarr) = result.get("tags").and_then(|t| t.as_array()) { + for t in tarr { + if let Some(s) = t.as_str() { + let s = s.trim().to_lowercase(); + if !s.is_empty() && !tags.contains(&s) { + tags.push(s); + } + } + } + } + if let Some(genres) = result.get("genres").and_then(|g| g.as_array()) { + for g in genres { + if let Some(s) = g.as_str() { + let s = s.trim().to_lowercase(); + if !s.is_empty() && !tags.contains(&s) { + tags.push(s); + } + } + } + } + tm.tags = tags; + + // ISBNs + if let Some(isbns_arr) = result.get("isbns").and_then(|i| i.as_array()) + && let Some(first) = isbns_arr.iter().filter_map(|v| v.as_str()).next() + { + let s = first.trim().to_string(); + if !s.is_empty() { + tm.ids.insert(mlm_db::ids::ISBN.to_string(), s); + } + } + + // edition + if let Some(ed_str) = result + .get("edition") + .and_then(|v| v.as_str()) + .or(result.get("edition_string").and_then(|v| v.as_str())) + { + let (_t, ed_parsed) = parse_edition(&tm.title, ed_str); + if ed_parsed.is_some() { + tm.edition = ed_parsed; + } + } + + // series + if let Some(series_arr) = result.get("series_names").and_then(|v| v.as_array()) { + for s in series_arr { + if let Some(name) = s.as_str() { + tm.series.push(mlm_db::Series { + name: name.to_string(), + entries: mlm_db::SeriesEntries::new(vec![]), + }); + } else if let Some(obj) = s.as_object() + && let Some(name) = obj.get("name").and_then(|v| v.as_str()) + { + if let Some(idx) = obj.get("index").and_then(|v| v.as_f64()) { + let entry = mlm_db::SeriesEntry::Num(idx as f32); + tm.series.push(mlm_db::Series { + name: name.to_string(), + entries: mlm_db::SeriesEntries::new(vec![entry]), + }); + } else { + tm.series.push(mlm_db::Series { + name: name.to_string(), + entries: mlm_db::SeriesEntries::new(vec![]), + }); + } + } + } + } + + debug!(title = %tm.title, authors = ?tm.authors, tags_count = tm.tags.len(), "returning hardcover metadata"); + Ok(tm) + } +} + +#[async_trait] +impl Provider for Hardcover { + fn id(&self) -> &str { + MetadataProvider::id(self) + } + + async fn fetch(&self, query: &TorrentMeta) -> Result { + let (meta, _score) = search_with_fallback(self, &query.title, &query.authors).await?; + Ok(meta) + } +} diff --git a/mlm_meta/src/providers/mod.rs b/mlm_meta/src/providers/mod.rs new file mode 100644 index 00000000..ffd646a0 --- /dev/null +++ b/mlm_meta/src/providers/mod.rs @@ -0,0 +1,138 @@ +pub mod fake; +pub mod hardcover; +pub mod romanceio; + +pub use fake::FakeProvider; +pub use hardcover::Hardcover; +pub use romanceio::RomanceIo; + +use crate::helpers::SearchQuery; +use anyhow::Result; +use mlm_db::TorrentMeta; + +/// Metadata provider trait for searching and fetching book metadata. +/// Implement this trait to add a new provider. +#[allow(async_fn_in_trait)] +pub trait MetadataProvider: Send + Sync { + /// Provider's search result type (e.g., serde_json::Value for JSON APIs) + type SearchResult; + + /// Unique identifier for this provider (e.g., "hardcover", "romanceio") + fn id(&self) -> &str; + + /// Minimum score threshold for accepting a match. Default 0.5. + fn min_score_threshold(&self) -> f64 { + 0.5 + } + + /// Perform a search query. Receives title and optional author. + async fn search(&self, query: &SearchQuery) -> Result>; + + /// Extract title from a search result + fn result_title<'a>(&self, result: &'a Self::SearchResult) -> Option<&'a str>; + + /// Extract authors from a search result + fn result_authors(&self, result: &Self::SearchResult) -> Vec; + + /// Convert a search result to TorrentMeta. May fetch additional data (e.g., romanceio). + async fn result_to_meta(&self, result: &Self::SearchResult) -> Result; +} + +/// `search_query` - the query sent to the provider (may have no author for title-only fallback) +/// `scoring_query` - the query used for scoring (always includes author if provided) +fn select_best( + provider: &P, + results: &[P::SearchResult], + _search_query: &SearchQuery, + scoring_query: &SearchQuery, + threshold: f64, +) -> Result> { + let q_title = Some(scoring_query.title.clone()); + let q_auths = scoring_query.author.iter().cloned().collect::>(); + + let mut best_idx: Option = None; + let mut best_score = -1.0f64; + + for (i, item) in results.iter().enumerate() { + let title = provider.result_title(item); + let authors = provider.result_authors(item); + + let score = crate::helpers::score_candidate(title, &authors, &q_title, &q_auths); + + if score > best_score { + best_score = score; + best_idx = Some(i); + } + } + + if best_score >= threshold { + Ok(best_idx.map(|idx| (idx, best_score))) + } else { + Ok(None) + } +} + +/// Run a search with fallback: try title+author first, then title-only if needed. +/// Returns the matched metadata and score if found above threshold. +pub async fn search_with_fallback( + provider: &P, + title: &str, + authors: &[String], +) -> Result<(TorrentMeta, f64)> { + if title.trim().is_empty() { + return Err(anyhow::anyhow!("title is required for search")); + } + + let threshold = provider.min_score_threshold(); + + // Build queries + let q_with_author = crate::helpers::query_with_author(title, authors); + let q_title_only = crate::helpers::query_title_only(title); + + // If we have authors, try with author first + let tried_with_author = if q_with_author.author.is_some() { + match provider.search(&q_with_author).await { + Ok(results) => { + if !results.is_empty() + && let Some((idx, score)) = select_best( + provider, + &results, + &q_with_author, + &q_with_author, + threshold, + )? + { + let meta = provider.result_to_meta(&results[idx]).await?; + return Ok((meta, score)); + } + } + Err(e) => { + tracing::warn!("search with author failed: {}", e); + } + } + true + } else { + false + }; + + // If authors was provided but didn't yield results above threshold, try title-only + // Or if no authors were provided, do title-only search + if (!tried_with_author || !authors.is_empty()) && !q_title_only.title.is_empty() { + match provider.search(&q_title_only).await { + Ok(results) => { + if !results.is_empty() + && let Some((idx, score)) = + select_best(provider, &results, &q_title_only, &q_with_author, threshold)? + { + let meta = provider.result_to_meta(&results[idx]).await?; + return Ok((meta, score)); + } + } + Err(e) => { + tracing::warn!("title-only search failed: {}", e); + } + } + } + + Err(anyhow::anyhow!("no result above score threshold")) +} diff --git a/mlm_meta/src/providers/romanceio.rs b/mlm_meta/src/providers/romanceio.rs new file mode 100644 index 00000000..509a1f26 --- /dev/null +++ b/mlm_meta/src/providers/romanceio.rs @@ -0,0 +1,247 @@ +use std::sync::Arc; + +use anyhow::{Context, Result}; +use async_trait::async_trait; +use scraper::{Html, Selector}; +use tracing::{debug, instrument}; +use url::Url; + +use crate::http::ReqwestClient; +use crate::providers::{MetadataProvider, search_with_fallback}; +use crate::traits::Provider; +use crate::{helpers, http::HttpClient}; +use mlm_db::TorrentMeta; + +pub struct RomanceIo { + pub client: Arc, +} + +impl RomanceIo { + pub fn new() -> Self { + Self { + client: Arc::new(ReqwestClient::new()), + } + } + + pub fn with_client(client: Arc) -> Self { + Self { client } + } + + #[instrument(skip_all, fields(url = %url))] + async fn fetch_html(&self, url: &str) -> Result { + debug!("fetching romance.io HTML"); + self.client.get(url).await + } + + async fn fetch_book(&self, book_url: &str) -> Result { + let book_html = self.fetch_html(book_url).await.context("fetch book page")?; + self.parse_book_html(&book_html) + } + + pub fn parse_book_html(&self, html: &str) -> Result { + let doc = Html::parse_document(html); + + let script_sel = Selector::parse("script[type=\"application/ld+json\"]").unwrap(); + if let Some(script) = doc.select(&script_sel).next() { + let json_text = script.inner_html(); + let v: serde_json::Value = serde_json::from_str(&json_text).context("parse json-ld")?; + let book = v.get("@graph").and_then(|g| g.get(0)).unwrap_or(&v); + let title = book + .get("name") + .and_then(|n| n.as_str()) + .unwrap_or("") + .to_string(); + let authors: Vec = book + .get("author") + .and_then(|a| a.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|p| { + p.get("name") + .and_then(|n| n.as_str()) + .map(|s| s.to_string()) + }) + .collect() + }) + .unwrap_or_default(); + let description = book + .get("description") + .and_then(|d| d.as_str()) + .map(|s| s.to_string()); + + let mut tm = TorrentMeta { + title: title.clone(), + description: description.clone().unwrap_or_default(), + authors: authors.clone(), + ..Default::default() + }; + + let mut topics = Vec::new(); + let topics_sel = Selector::parse("#valid-topics-list a.topic").unwrap(); + for t in doc.select(&topics_sel) { + let text = t.text().collect::>().join(" ").trim().to_lowercase(); + if text.len() > 2 && !topics.contains(&text) { + topics.push(text); + } + } + + if let Some(desc) = description.as_ref() { + for part in desc.split(&[',', '\n'][..]) { + let s = part.trim().to_lowercase(); + if s.len() > 2 && !topics.contains(&s) { + topics.push(s); + } + } + } + + let mut categories = Vec::new(); + let mut tags = Vec::new(); + for t in topics { + if let Some(cat) = topic_to_category(&t) { + if !categories.contains(&cat) { + categories.push(cat); + } + } else if !tags.contains(&t) { + tags.push(t); + } + } + tm.categories = categories; + tm.tags = tags; + + return Ok(tm); + } + Err(anyhow::anyhow!("no json-ld found")) + } +} + +impl Default for RomanceIo { + fn default() -> Self { + Self::new() + } +} + +impl MetadataProvider for RomanceIo { + type SearchResult = serde_json::Value; + + fn id(&self) -> &str { + "romanceio" + } + + async fn search(&self, query: &helpers::SearchQuery) -> Result> { + let base = Url::parse("https://www.romance.io").unwrap(); + let qstr = query.to_combined_string(); + + let mut json_url = base.join("/json/search_books").unwrap(); + json_url.query_pairs_mut().append_pair("search", &qstr); + + debug!(query = %qstr, url = %json_url, "searching romance.io"); + let body = self + .fetch_html(json_url.as_str()) + .await + .context("fetch search json")?; + let v: serde_json::Value = serde_json::from_str(&body).context("parse search json")?; + + let books = v.get("books").and_then(|b| b.as_array()).cloned(); + debug!( + count = books.as_ref().map(|a| a.len()).unwrap_or(0), + "romance.io search results" + ); + Ok(books.unwrap_or_default()) + } + + fn result_title<'a>(&self, result: &'a Self::SearchResult) -> Option<&'a str> { + result + .get("info") + .and_then(|info| info.get("title")) + .and_then(|t| t.as_str()) + .or_else(|| result.get("url").and_then(|u| u.as_str())) + } + + fn result_authors(&self, result: &Self::SearchResult) -> Vec { + result + .get("authors") + .and_then(|a| a.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|a| { + a.get("name") + .and_then(|n| n.as_str()) + .map(|s| s.to_string()) + }) + .collect() + }) + .unwrap_or_default() + } + + async fn result_to_meta(&self, result: &Self::SearchResult) -> Result { + // RomanceIo fetches the full book page for verification, so this method + // extracts the URL and fetches the book page + let url = result + .get("url") + .and_then(|u| u.as_str()) + .context("no URL in search result")?; + + let base = Url::parse("https://www.romance.io").unwrap(); + let book_url = base.join(url).context("invalid book URL")?; + + debug!(url = %book_url, "fetching romance.io book page"); + let meta = self.fetch_book(book_url.as_str()).await?; + + // Verify title matches (case-insensitive substring) + // Note: The caller should handle verification, but we do a quick check here + Ok(meta) + } +} + +#[async_trait] +impl Provider for RomanceIo { + fn id(&self) -> &str { + MetadataProvider::id(self) + } + + async fn fetch(&self, query: &TorrentMeta) -> Result { + let (meta, _score) = search_with_fallback(self, &query.title, &query.authors).await?; + + // Additional verification: ensure title contains query title + let query_title_lower = query.title.to_lowercase(); + let meta_title_lower = meta.title.to_lowercase(); + if !meta_title_lower.contains(&query_title_lower) { + return Err(anyhow::anyhow!( + "matched title does not contain query title" + )); + } + + // Additional verification: if query has authors, at least one should match + if !query.authors.is_empty() { + let query_authors_lower: Vec = + query.authors.iter().map(|a| a.to_lowercase()).collect(); + let meta_authors_lower: Vec = + meta.authors.iter().map(|a| a.to_lowercase()).collect(); + let any_match = query_authors_lower.iter().any(|qa| { + meta_authors_lower + .iter() + .any(|ma| ma.contains(qa) || qa.contains(ma)) + }); + if !any_match { + return Err(anyhow::anyhow!( + "matched author does not contain any query author" + )); + } + } + + Ok(meta) + } +} + +fn topic_to_category(topic: &str) -> Option { + let t = topic.trim().to_lowercase(); + match t.as_str() { + "contemporary" | "contemporary romance" => Some("contemporary".to_string()), + "romance" => Some("romance".to_string()), + "dark" | "dark romance" => Some("dark romance".to_string()), + "suspense" | "romantic suspense" => Some("suspense".to_string()), + "erotic" | "erotic romance" | "steam" | "explicit" => Some("erotic".to_string()), + "office" | "workplace" | "boss & employee" => Some("contemporary".to_string()), + _ => None, + } +} diff --git a/mlm_meta/src/traits.rs b/mlm_meta/src/traits.rs new file mode 100644 index 00000000..077bc677 --- /dev/null +++ b/mlm_meta/src/traits.rs @@ -0,0 +1,15 @@ +use anyhow::Result; +use async_trait::async_trait; +use mlm_db::TorrentMeta; + +/// Implementations should populate and return a `TorrentMeta` containing as +/// much normalized metadata as possible. +#[async_trait] +pub trait Provider: Send + Sync { + /// Short stable id for the provider, e.g. "goodreads" + fn id(&self) -> &str; + + /// Fetch metadata for the given `TorrentMeta` query. Return Ok(TorrentMeta) + /// on success. + async fn fetch(&self, query: &TorrentMeta) -> Result; +} diff --git a/mlm_meta/tests/hardcover_tests.rs b/mlm_meta/tests/hardcover_tests.rs new file mode 100644 index 00000000..ca871987 --- /dev/null +++ b/mlm_meta/tests/hardcover_tests.rs @@ -0,0 +1,266 @@ +use std::sync::Arc; + +use mlm_db::TorrentMeta; +use mlm_meta::Provider; +use mlm_meta::providers::Hardcover; + +mod helper { + use anyhow::Result; + use async_trait::async_trait; + use mlm_meta::http::HttpClient; + + pub struct MockClient { + resps: std::sync::Mutex>, + } + + impl MockClient { + pub fn new(resp: &str) -> Self { + Self { + resps: std::sync::Mutex::new(vec![resp.to_string()]), + } + } + } + + #[async_trait] + impl HttpClient for MockClient { + async fn get(&self, _url: &str) -> Result { + Ok(String::new()) + } + + async fn post( + &self, + _url: &str, + _body: Option<&str>, + _headers: &[(&str, &str)], + ) -> Result { + let mut guard = self.resps.lock().unwrap(); + if guard.is_empty() { + return Ok(String::new()); + } + Ok(guard.remove(0)) + } + } +} + +#[tokio::test] +async fn hardcover_selects_best_candidate() { + use helper::MockClient; + + let data = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "title": "The Great Adventure", "author_names": ["Alice Author"], "description": "A" } }, + { "document": { "title": "Great Adventure", "author_names": ["Bob Smith"], "description": "B" } } + ] } } } }"#; + + let client = Arc::new(MockClient::new(data)); + let prov = Hardcover::with_client("http://example/graphql", client, None); + + let query_meta = TorrentMeta { + title: "Great Adventure".to_string(), + authors: vec!["Bob Smith".to_string()], + ..Default::default() + }; + + let m = prov + .fetch(&query_meta) + .await + .expect("should select best candidate"); + assert!(m.authors.iter().any(|a| a.to_lowercase().contains("bob"))); + assert!(m.title.to_lowercase().contains("great")); +} + +#[tokio::test] +async fn hardcover_parses_tags_and_isbn() { + use helper::MockClient; + + let data = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "title": "Unique Book", "author_names": ["Unique Author"], "description": "desc", "tags": ["Tropes"], "genres": ["Romance"], "isbns": ["9781234567897"] } } + ] } } } }"#; + + let client = Arc::new(MockClient::new(data)); + let prov = Hardcover::with_client("http://example/graphql", client, None); + + let query_meta = TorrentMeta { + title: "Unique Book".to_string(), + ..Default::default() + }; + + let m = prov + .fetch(&query_meta) + .await + .expect("should parse tags and isbn"); + assert!(m.tags.iter().any(|t| t == "tropes")); + assert!(m.tags.iter().any(|t| t == "romance")); + assert_eq!(m.ids.get("isbn").map(|s| s.as_str()), Some("9781234567897")); +} + +#[tokio::test] +async fn hardcover_empty_results_returns_err() { + use helper::MockClient; + + let data = r#"{ "data": { "search": { "results": { "hits": [] } } } }"#; + let client = Arc::new(MockClient::new(data)); + let prov = Hardcover::with_client("http://example/graphql", client, None); + + let query_meta = TorrentMeta { + title: "Does Not Exist".to_string(), + ..Default::default() + }; + let res = prov.fetch(&query_meta).await; + assert!(res.is_err(), "expected error for empty results"); +} + +#[tokio::test] +async fn hardcover_handles_malformed_fields_gracefully() { + use helper::MockClient; + + let data = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "title": "Any Title", "description": "only desc", "tags": null, "genres": 123 } } + ] } } } }"#; + let client = Arc::new(MockClient::new(data)); + let prov = Hardcover::with_client("http://example/graphql", client, None); + + let query_meta = TorrentMeta { + title: "Any Title".to_string(), + ..Default::default() + }; + let m = prov + .fetch(&query_meta) + .await + .expect("should handle malformed fields"); + assert_eq!(m.title, "Any Title"); + assert_eq!(m.description, "only desc"); + assert!(m.tags.is_empty()); + assert!(!m.ids.contains_key("isbn")); +} + +#[tokio::test] +async fn hardcover_uses_first_isbn_when_multiple_present() { + use helper::MockClient; + + let data = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "title": "Multi ISBN", "author_names": ["A"], "isbns": ["FIRSTISBN","SECONDISBN"] } } + ] } } } }"#; + let client = Arc::new(MockClient::new(data)); + let prov = Hardcover::with_client("http://example/graphql", client, None); + + let query_meta = TorrentMeta { + title: "Multi ISBN".to_string(), + ..Default::default() + }; + let m = prov + .fetch(&query_meta) + .await + .expect("should parse multiple isbns"); + assert_eq!(m.ids.get("isbn").map(|s| s.as_str()), Some("FIRSTISBN")); +} + +#[tokio::test] +async fn hardcover_tie_breaker_prefers_first_result() { + use helper::MockClient; + + let data = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "title": "Tie Book", "author_names": ["Author One"], "description": "first" } }, + { "document": { "title": "Tie Book", "author_names": ["Author One"], "description": "second" } } + ] } } } }"#; + + let client = Arc::new(MockClient::new(data)); + let prov = Hardcover::with_client("http://example/graphql", client, None); + + let query_meta = TorrentMeta { + title: "Tie Book".to_string(), + ..Default::default() + }; + let m = prov + .fetch(&query_meta) + .await + .expect("should return first result on tie"); + assert!(m.description == "first"); +} + +#[tokio::test] +async fn hardcover_handles_minor_typos() { + use helper::MockClient; + + let data = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "title": "Great Adventure", "author_names": ["Bob Smith"], "description": "B" } } + ] } } } }"#; + + let client = Arc::new(MockClient::new(data)); + let prov = Hardcover::with_client("http://example/graphql", client, None); + + let query_meta = TorrentMeta { + title: "Gret Adventure".to_string(), + authors: vec!["Bob Smith".to_string()], + ..Default::default() + }; + let m = prov + .fetch(&query_meta) + .await + .expect("should match despite typo"); + assert!(m.title.to_lowercase().contains("great adventure")); +} + +#[tokio::test] +async fn hardcover_parses_isbn_from_search_results() { + use helper::MockClient; + + let search = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "id": 123, "title": "Detailed Book", "author_names": ["Detail Author"], "description": "short desc", "isbns": ["9781111111111"], "series_names": ["Series A"] } } + ] } } } }"#; + + let client = Arc::new(MockClient::new(search)); + let prov = Hardcover::with_client("http://example/graphql", client, None); + + let query_meta = TorrentMeta { + title: "Detailed Book".to_string(), + ..Default::default() + }; + let m = prov + .fetch(&query_meta) + .await + .expect("should parse search results"); + + assert_eq!(m.ids.get("isbn").map(|s| s.as_str()), Some("9781111111111")); + assert!(m.series.iter().any(|s| s.name == "Series A")); + assert_eq!(m.description, "short desc"); +} + +#[tokio::test] +async fn hardcover_title_only_fallback_still_scores_with_author() { + use helper::MockClient; + + // Query for "Boss of the Year" by "Nicole French" + // Results include a similar title by a different author + // The fallback to title-only should NOT match because author doesn't match + let data = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "title": "Not the Boss of the Year", "author_names": ["J.S. Cooper"], "description": "wrong author" } }, + { "document": { "title": "Boss of the Year", "author_names": ["Nicole French"], "description": "correct" } } + ] } } } }"#; + + let client = Arc::new(MockClient::new(data)); + let prov = Hardcover::with_client("http://example/graphql", client, None); + + let query_meta = TorrentMeta { + title: "Boss of the Year".to_string(), + authors: vec!["Nicole French".to_string()], + ..Default::default() + }; + + // Should NOT match "Not the Boss of the Year" by J.S. Cooper + // Should either match the correct one OR return error + let m = prov + .fetch(&query_meta) + .await + .expect("should find correct match"); + assert!( + m.title.to_lowercase().contains("boss of the year"), + "title should contain 'Boss of the Year'" + ); + assert!( + m.authors + .iter() + .any(|a| a.to_lowercase().contains("nicole")), + "author should be Nicole French, got: {:?}", + m.authors + ); +} diff --git a/mlm_meta/tests/mock_fetcher.rs b/mlm_meta/tests/mock_fetcher.rs new file mode 100644 index 00000000..69a03bf8 --- /dev/null +++ b/mlm_meta/tests/mock_fetcher.rs @@ -0,0 +1,70 @@ +use anyhow::Result; +use mlm_meta::http::HttpClient; +use std::sync::Arc; + +pub struct MockClient; + +#[async_trait::async_trait] +impl HttpClient for MockClient { + async fn get(&self, url: &str) -> Result { + let u = url::Url::parse(url).map_err(|e| anyhow::anyhow!(e))?; + if !u.host_str().is_some_and(|h| h.contains("romance.io")) { + return Err(anyhow::anyhow!("unexpected host in test fetch")); + } + if u.path().starts_with("/json/search_books") { + return Ok(r#"{ + "success": true, + "books": [ + { + "_id":"68b95a390bc0cee156edaf2b", + "info":{"title":"Of Ink and Alchemy"}, + "authors":[{"name":"Sloane St. James"}], + "url":"/books/68b95a390bc0cee156edaf2b/of-ink-and-alchemy-sloane-st-james" + } + ] +}"# + .to_string()); + } + if u.path().starts_with("/json/search_authors") { + return Ok(r#"{ "success": true, "authors": [] }"#.to_string()); + } + if u.path().starts_with("/search") { + return Ok("search".to_string()); + } + + Ok(r#" + + + + + +"# + .to_string()) + } + + async fn post( + &self, + _url: &str, + _body: Option<&str>, + _headers: &[(&str, &str)], + ) -> Result { + Err(anyhow::anyhow!("post not implemented in mock")) + } +} + +pub fn boxed() -> Arc { + Arc::new(MockClient) +} diff --git a/mlm_meta/tests/provider_tests.rs b/mlm_meta/tests/provider_tests.rs new file mode 100644 index 00000000..de3a394c --- /dev/null +++ b/mlm_meta/tests/provider_tests.rs @@ -0,0 +1,32 @@ +use mlm_db::TorrentMeta; +use mlm_meta::providers::FakeProvider; +use mlm_meta::traits::Provider; + +#[tokio::test] +async fn fake_provider_returns_meta() { + let meta = TorrentMeta { + title: "The Test Book".to_string(), + authors: vec!["Jane Doe".to_string()], + description: "desc".to_string(), + ..Default::default() + }; + + let provider = FakeProvider::new("fake", Some(meta.clone())); + let mut q: TorrentMeta = Default::default(); + q.ids + .insert("isbn".to_string(), "9781234567897".to_string()); + let got = provider.fetch(&q).await.expect("should return meta"); + assert_eq!(got.title, meta.title); + assert_eq!(got.authors, meta.authors); +} + +#[tokio::test] +async fn fake_provider_not_found() { + let provider = FakeProvider::new("fake", None); + let q = TorrentMeta { + title: "nope".to_string(), + ..Default::default() + }; + let res = provider.fetch(&q).await; + assert!(res.is_err()); +} diff --git a/mlm_meta/tests/romanceio_tests.rs b/mlm_meta/tests/romanceio_tests.rs new file mode 100644 index 00000000..5deee935 --- /dev/null +++ b/mlm_meta/tests/romanceio_tests.rs @@ -0,0 +1,161 @@ +use mlm_db::TorrentMeta; +use mlm_meta::Provider; +use mlm_meta::http::HttpClient; +use mlm_meta::providers::RomanceIo; + +mod mock_fetcher; + +#[tokio::test] +async fn romanceio_parses_book() { + let prov = RomanceIo::with_client(mock_fetcher::boxed()); + let query_meta = TorrentMeta { + title: "Of Ink and Alchemy".to_string(), + ..Default::default() + }; + let m = prov.fetch(&query_meta).await.expect("should parse book"); + assert!(m.title.contains("Of Ink and Alchemy")); + assert!(m.authors.iter().any(|a| a.contains("Sloane"))); + assert!(!m.description.is_empty()); +} + +#[tokio::test] +async fn romanceio_matches_title_and_author() { + let prov = RomanceIo::with_client(mock_fetcher::boxed()); + let query_meta = TorrentMeta { + title: "Of Ink and Alchemy".to_string(), + authors: vec!["Sloane St. James".to_string()], + ..Default::default() + }; + let m = prov + .fetch(&query_meta) + .await + .expect("should match title+author"); + assert!(m.title.to_lowercase().contains("of ink and alchemy")); + assert!( + m.authors + .iter() + .any(|a| a.to_lowercase().contains("sloane")) + ); +} + +#[tokio::test] +async fn romanceio_rejects_title_with_nonmatching_author() { + let prov = RomanceIo::with_client(mock_fetcher::boxed()); + let query_meta = TorrentMeta { + title: "Of Ink and Alchemy".to_string(), + authors: vec!["Some Other Author".to_string()], + ..Default::default() + }; + let res = prov.fetch(&query_meta).await; + assert!(res.is_err(), "expected no result for non-matching author"); +} + +#[tokio::test] +async fn romanceio_rejects_different_title_same_author() { + let prov = RomanceIo::with_client(mock_fetcher::boxed()); + let query_meta = TorrentMeta { + title: "A Title That Does Not Exist".to_string(), + authors: vec!["Sloane St. James".to_string()], + ..Default::default() + }; + let res = prov.fetch(&query_meta).await; + assert!( + res.is_err(), + "expected no result for different title even if author matches" + ); +} + +#[tokio::test] +async fn romanceio_finds_late_result_in_json_array() { + use anyhow::Result; + use std::sync::Arc; + + struct CustomClient; + + #[async_trait::async_trait] + impl HttpClient for CustomClient { + async fn get(&self, url: &str) -> Result { + if url.contains("/json/search_books") { + let data = r#"{ + "success": true, + "books": [ + {"_id":"x1","info":{"title":"Unrelated Book"},"url":"/books/x1/unrelated"}, + {"_id":"x2","info":{"title":"Another Irrelevant"},"url":"/books/x2/irrelevant"}, + {"_id":"68b95a390bc0cee156edaf2b","info":{"title":"Of Ink and Alchemy"},"authors":[{"name":"Sloane St. James"}],"url":"/books/68b95a390bc0cee156edaf2b/of-ink-and-alchemy-sloane-st-james"} + ] + }"#; + return Ok(data.to_string()); + } + if url.contains("/books/68b95a390bc0cee156edaf2b") { + let html = r#" + + + + + +"#; + return Ok(html.to_string()); + } + Err(anyhow::anyhow!("unexpected url")) + } + + async fn post( + &self, + _url: &str, + _body: Option<&str>, + _headers: &[(&str, &str)], + ) -> Result { + Err(anyhow::anyhow!("post not implemented")) + } + } + + let prov = RomanceIo::with_client(Arc::new(CustomClient)); + let query_meta = TorrentMeta { + title: "Of Ink and Alchemy".to_string(), + authors: vec!["Sloane St. James".to_string()], + ..Default::default() + }; + let m = prov + .fetch(&query_meta) + .await + .expect("should find late result"); + assert!(m.title.to_lowercase().contains("of ink and alchemy")); +} + +#[tokio::test] +async fn parse_book_html_extracts_categories_and_tags() { + let prov = RomanceIo::with_client(mock_fetcher::boxed()); + let query_meta = TorrentMeta { + title: "Of Ink and Alchemy".to_string(), + authors: vec!["Sloane St. James".to_string()], + ..Default::default() + }; + let m = prov.fetch(&query_meta).await.expect("should parse book"); + + assert!(m.title.to_lowercase().contains("of ink and alchemy")); + assert!( + m.authors + .iter() + .any(|a| a.to_lowercase().contains("sloane")) + ); + + // categories should include contemporary and dark romance (derived from topics) + assert!(m.categories.iter().any(|c| c == "contemporary")); + assert!(m.categories.iter().any(|c| c == "dark romance")); + + // tags should include some of the romance-specific tropes + let tags = m.tags.join(","); + assert!(tags.contains("age difference") || tags.contains("age gap")); + assert!(tags.contains("friends to lovers")); +} diff --git a/mlm_meta/tests/scoring_tests.rs b/mlm_meta/tests/scoring_tests.rs new file mode 100644 index 00000000..8a550790 --- /dev/null +++ b/mlm_meta/tests/scoring_tests.rs @@ -0,0 +1,50 @@ +use std::sync::Arc; + +use mlm_meta::{HttpClient, providers::romanceio::RomanceIo}; + +const SAMPLE_ROMANCE_HTML: &str = r#" + + + + + +"#; + +#[test] +fn parse_book_html_smoke() { + struct DummyClient; + #[async_trait::async_trait] + impl HttpClient for DummyClient { + async fn get(&self, _url: &str) -> anyhow::Result { + anyhow::bail!("not used") + } + async fn post( + &self, + _url: &str, + _body: Option<&str>, + _headers: &[(&str, &str)], + ) -> anyhow::Result { + anyhow::bail!("not used") + } + } + + let provider = RomanceIo::with_client(Arc::new(DummyClient)); + let meta = provider.parse_book_html(SAMPLE_ROMANCE_HTML).unwrap(); + assert!(!meta.title.is_empty()); + + assert!(!meta.title.is_empty()); + assert!(!meta.authors.is_empty()); +} diff --git a/server/Cargo.toml b/server/Cargo.toml index ab92c832..9ac21718 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -32,6 +32,7 @@ matchr = "0.2.5" mlm_db = { path = "../mlm_db" } mlm_mam = { path = "../mlm_mam" } mlm_parse = { path = "../mlm_parse" } +mlm_meta = { path = "../mlm_meta" } native_db = { git = "https://github.com/StirlingMouse/native_db.git", branch = "0.8.x" } native_model = "0.4.20" once_cell = "1.21.3" @@ -85,3 +86,5 @@ embed-resource = "3.0.5" [dev-dependencies] tempfile = "3.24.0" +async-trait = "0.1" +url = "2.4" diff --git a/server/src/autograbber.rs b/server/src/autograbber.rs index 2d8d77d9..16498e4f 100644 --- a/server/src/autograbber.rs +++ b/server/src/autograbber.rs @@ -731,7 +731,8 @@ pub async fn update_torrent_meta( } } - if linker_is_owner && torrent.linker.is_none() + if linker_is_owner + && torrent.linker.is_none() && let Some(mam_torrent) = mam_torrent { torrent.linker = Some(mam_torrent.owner_name.clone()); diff --git a/server/src/config.rs b/server/src/config.rs index ddca8874..33dffcc5 100644 --- a/server/src/config.rs +++ b/server/src/config.rs @@ -1,8 +1,8 @@ use std::{collections::BTreeMap, path::PathBuf}; use mlm_db::{ - impls::{parse, parse_opt, parse_vec}, Flags, Language, MediaType, OldDbMainCat, Size, + impls::{parse, parse_opt, parse_vec}, }; use mlm_mam::{ enums::{Categories, SearchIn, SnatchlistType}, @@ -11,6 +11,59 @@ use mlm_mam::{ use serde::{Deserialize, Serialize}; use time::Date; +#[derive(Clone, Debug, Deserialize)] +#[serde(tag = "id", rename_all = "lowercase")] +pub enum ProviderConfig { + Hardcover(HardcoverConfig), + RomanceIo(RomanceIoConfig), +} + +#[derive(Clone, Debug, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct HardcoverConfig { + #[serde(default = "default_provider_enabled")] + pub enabled: bool, + #[serde(default)] + pub timeout_secs: Option, + pub api_key: Option, +} + +#[derive(Clone, Debug, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct RomanceIoConfig { + #[serde(default = "default_provider_enabled")] + pub enabled: bool, + #[serde(default)] + pub timeout_secs: Option, +} + +impl ProviderConfig { + pub fn id(&self) -> &str { + match self { + ProviderConfig::Hardcover(_) => "hardcover", + ProviderConfig::RomanceIo(_) => "romanceio", + } + } + + pub fn enabled(&self) -> bool { + match self { + ProviderConfig::Hardcover(c) => c.enabled, + ProviderConfig::RomanceIo(c) => c.enabled, + } + } + + pub fn timeout_secs(&self) -> Option { + match self { + ProviderConfig::Hardcover(c) => c.timeout_secs, + ProviderConfig::RomanceIo(c) => c.timeout_secs, + } + } +} + +fn default_provider_enabled() -> bool { + true +} + #[derive(Clone, Debug, Deserialize)] #[serde(deny_unknown_fields)] pub struct Config { @@ -74,6 +127,8 @@ pub struct Config { #[serde(default)] #[serde(rename = "library")] pub libraries: Vec, + #[serde(default)] + pub metadata_providers: Vec, } #[derive(Clone, Debug, Default, Deserialize)] @@ -424,3 +479,43 @@ fn default_music_types() -> Vec { fn default_radio_types() -> Vec { ["mp3"].iter().map(ToString::to_string).collect() } + +impl Default for Config { + fn default() -> Self { + Self { + mam_id: String::new(), + web_host: default_host(), + web_port: default_port(), + min_ratio: default_min_ratio(), + unsat_buffer: default_unsat_buffer(), + wedge_buffer: 0, + add_torrents_stopped: false, + exclude_narrator_in_library_dir: false, + search_interval: default_search_interval(), + link_interval: default_link_interval(), + import_interval: default_import_interval(), + ignore_torrents: vec![], + + audio_types: default_audio_types(), + ebook_types: default_ebook_types(), + music_types: default_music_types(), + radio_types: default_radio_types(), + + search: Default::default(), + audiobookshelf: None, + + autograbs: vec![], + snatchlist: vec![], + + goodreads_lists: vec![], + notion_lists: vec![], + + tags: vec![], + + qbittorrent: vec![], + + libraries: vec![], + metadata_providers: vec![], + } + } +} diff --git a/server/src/config_impl.rs b/server/src/config_impl.rs index b09cb514..e872a3ed 100644 --- a/server/src/config_impl.rs +++ b/server/src/config_impl.rs @@ -162,6 +162,7 @@ impl TorrentFilter { } } +#[allow(dead_code)] impl EditionFilter { pub fn matches(&self, torrent: &MaMTorrent) -> bool { if !self.media_type.is_empty() diff --git a/server/src/lib.rs b/server/src/lib.rs index 1e09b56d..40eba759 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -7,6 +7,7 @@ pub mod exporter; pub mod linker; pub mod lists; pub mod logging; +pub mod metadata; pub mod qbittorrent; pub mod snatchlist; pub mod stats; diff --git a/server/src/linker/common.rs b/server/src/linker/common.rs index e5cd4fec..7aded018 100644 --- a/server/src/linker/common.rs +++ b/server/src/linker/common.rs @@ -209,11 +209,22 @@ mod tests { #[test] fn test_select_format() { - struct F { name: String } + struct F { + name: String, + } impl HasFileName for F { - fn name_lower(&self) -> String { self.name.to_lowercase() } + fn name_lower(&self) -> String { + self.name.to_lowercase() + } } - let files = vec![F { name: "book.M4B".to_string() }, F { name: "cover.jpg".to_string() }]; + let files = vec![ + F { + name: "book.M4B".to_string(), + }, + F { + name: "cover.jpg".to_string(), + }, + ]; let wanted = vec!["m4b".to_string(), "mp3".to_string()]; let sel = select_format(&Some(vec!["m4b".to_string()]), &wanted, &files); assert_eq!(sel.unwrap(), ".m4b".to_string()); @@ -223,9 +234,17 @@ mod tests { #[test] fn test_select_format_leading_dot_in_override() { - struct F { name: String } - impl HasFileName for F { fn name_lower(&self) -> String { self.name.to_lowercase() } } - let files = vec![F { name: "track.FLAC".to_string() }]; + struct F { + name: String, + } + impl HasFileName for F { + fn name_lower(&self) -> String { + self.name.to_lowercase() + } + } + let files = vec![F { + name: "track.FLAC".to_string(), + }]; let wanted = vec!["mp3".to_string(), "flac".to_string()]; // override contains leading dot let sel = select_format(&Some(vec![".flac".to_string()]), &wanted, &files); @@ -234,9 +253,17 @@ mod tests { #[test] fn test_select_format_uppercase_extension() { - struct F { name: String } - impl HasFileName for F { fn name_lower(&self) -> String { self.name.to_lowercase() } } - let files = vec![F { name: "ALBUM.MP3".to_string() }]; + struct F { + name: String, + } + impl HasFileName for F { + fn name_lower(&self) -> String { + self.name.to_lowercase() + } + } + let files = vec![F { + name: "ALBUM.MP3".to_string(), + }]; let wanted = vec!["mp3".to_string()]; let sel = select_format(&None, &wanted, &files); assert_eq!(sel.unwrap(), ".mp3".to_string()); @@ -244,9 +271,17 @@ mod tests { #[test] fn test_select_format_missing_extension_returns_none() { - struct F { name: String } - impl HasFileName for F { fn name_lower(&self) -> String { self.name.to_lowercase() } } - let files = vec![F { name: "README".to_string() }]; + struct F { + name: String, + } + impl HasFileName for F { + fn name_lower(&self) -> String { + self.name.to_lowercase() + } + } + let files = vec![F { + name: "README".to_string(), + }]; let wanted = vec!["m4b".to_string()]; let sel = select_format(&None, &wanted, &files); assert!(sel.is_none()); @@ -254,9 +289,17 @@ mod tests { #[test] fn test_select_format_overridden_empty_vector() { - struct F { name: String } - impl HasFileName for F { fn name_lower(&self) -> String { self.name.to_lowercase() } } - let files = vec![F { name: "song.mp3".to_string() }]; + struct F { + name: String, + } + impl HasFileName for F { + fn name_lower(&self) -> String { + self.name.to_lowercase() + } + } + let files = vec![F { + name: "song.mp3".to_string(), + }]; let wanted = vec!["mp3".to_string()]; // override provided but empty -> should produce no selection let sel = select_format(&Some(vec![]), &wanted, &files); @@ -265,9 +308,17 @@ mod tests { #[test] fn test_select_format_wanted_empty_then_none() { - struct F { name: String } - impl HasFileName for F { fn name_lower(&self) -> String { self.name.to_lowercase() } } - let files = vec![F { name: "file.mp3".to_string() }]; + struct F { + name: String, + } + impl HasFileName for F { + fn name_lower(&self) -> String { + self.name.to_lowercase() + } + } + let files = vec![F { + name: "file.mp3".to_string(), + }]; let wanted: Vec = vec![]; let sel = select_format(&None, &wanted, &files); assert!(sel.is_none()); diff --git a/server/src/linker/duplicates.rs b/server/src/linker/duplicates.rs index cf6f5997..511adfbc 100644 --- a/server/src/linker/duplicates.rs +++ b/server/src/linker/duplicates.rs @@ -67,11 +67,15 @@ pub fn rank_torrents(config: &Config, batch: Vec) -> Vec { #[cfg(test)] mod tests { use super::*; + use mlm_db::{Language, MainCat, MediaType, MetadataSource, Size, Timestamp, TorrentMeta}; use std::collections::BTreeMap; - use mlm_db::{MediaType, Size, TorrentMeta, Timestamp, MetadataSource, MainCat, Language}; - use crate::config::SearchConfig; - fn create_test_torrent(id: &str, title: &str, filetypes: Vec, size_bytes: u64) -> Torrent { + fn create_test_torrent( + id: &str, + title: &str, + filetypes: Vec, + size_bytes: u64, + ) -> Torrent { let meta = TorrentMeta { title: title.to_string(), filetypes, @@ -116,43 +120,20 @@ mod tests { fn create_test_config() -> Config { Config { mam_id: "test".to_string(), - web_host: "0.0.0.0".to_string(), - web_port: 3157, - min_ratio: 2.0, - unsat_buffer: 10, - wedge_buffer: 0, - add_torrents_stopped: false, - exclude_narrator_in_library_dir: false, - search_interval: 30, - link_interval: 10, - import_interval: 135, - ignore_torrents: vec![], - audio_types: vec!["m4b".to_string(), "mp3".to_string()], - ebook_types: vec!["epub".to_string(), "pdf".to_string()], - music_types: vec!["flac".to_string(), "mp3".to_string()], - radio_types: vec!["mp3".to_string()], - search: SearchConfig::default(), - audiobookshelf: None, - autograbs: vec![], - snatchlist: vec![], - goodreads_lists: vec![], - notion_lists: vec![], - tags: vec![], - qbittorrent: vec![], - libraries: vec![], + ..Default::default() } } #[test] fn test_rank_torrents_preference() { let config = create_test_config(); - + let t1 = create_test_torrent("1", "Title", vec!["mp3".to_string()], 100); let t2 = create_test_torrent("2", "Title", vec!["m4b".to_string()], 100); - + let batch = vec![t1.clone(), t2.clone()]; let ranked = rank_torrents(&config, batch); - + assert_eq!(ranked[0].id, "2"); // m4b is preferred over mp3 assert_eq!(ranked[1].id, "1"); } @@ -160,31 +141,32 @@ mod tests { #[test] fn test_rank_torrents_size_tie_break() { let config = create_test_config(); - + let t1 = create_test_torrent("1", "Title", vec!["m4b".to_string()], 100); let t2 = create_test_torrent("2", "Title", vec!["m4b".to_string()], 200); - + let batch = vec![t1.clone(), t2.clone()]; let ranked = rank_torrents(&config, batch); - + assert_eq!(ranked[0].id, "2"); // Larger size wins tie assert_eq!(ranked[1].id, "1"); } #[tokio::test] async fn test_find_matches() -> Result<()> { - let tmp_dir = std::env::temp_dir().join(format!("mlm_test_duplicates_{}", std::process::id())); + let tmp_dir = + std::env::temp_dir().join(format!("mlm_test_duplicates_{}", std::process::id())); let _ = fs::remove_dir_all(&tmp_dir); fs::create_dir_all(&tmp_dir)?; let db_path = tmp_dir.join("test.db"); - + let db = native_db::Builder::new().create(&mlm_db::MODELS, &db_path)?; mlm_db::migrate(&db)?; - + let t1 = create_test_torrent("1", "My Book", vec!["m4b".to_string()], 100); let t2 = create_test_torrent("2", "My Book", vec!["mp3".to_string()], 150); let t3 = create_test_torrent("3", "Other Book", vec!["m4b".to_string()], 100); - + { let rw = db.rw_transaction()?; rw.insert(t1.clone())?; @@ -192,14 +174,13 @@ mod tests { rw.insert(t3.clone())?; rw.commit()?; } - + let matches = find_matches(&db, &t1)?; assert_eq!(matches.len(), 1); assert_eq!(matches[0].id, "2"); - + drop(db); let _ = fs::remove_dir_all(tmp_dir); Ok(()) } } - diff --git a/server/src/linker/torrent.rs b/server/src/linker/torrent.rs index 9b51b31e..32f5f0d9 100644 --- a/server/src/linker/torrent.rs +++ b/server/src/linker/torrent.rs @@ -271,6 +271,7 @@ where if torrent.progress < 1.0 { continue; } + let library = find_library(&config, &torrent); let r = db.r_transaction()?; let mut existing_torrent: Option = r.get().primary(torrent.hash.clone())?; @@ -340,6 +341,9 @@ where ) .await .context("match_torrent"); + if let Err(e) = &result { + debug!("match_torrent error for {}: {:#}", torrent.hash, e); + } update_errored_torrent( &db, ErroredTorrentId::Linker(torrent.hash.clone()), @@ -713,6 +717,8 @@ async fn link_torrent( ) -> Result<()> { let mut library_files = vec![]; + // Removed temporary debug prints that were used during investigation. + let library_path = if library.options().method != LibraryLinkMethod::NoLink { let Some(mut dir) = library_dir(config.exclude_narrator_in_library_dir, library, meta) else { @@ -876,43 +882,18 @@ mod tests { fn test_find_library_by_download_dir() { let cfg = Config { mam_id: "m".to_string(), - web_host: "".to_string(), - web_port: 0, - min_ratio: 0.0, - unsat_buffer: 0, - wedge_buffer: 0, - add_torrents_stopped: false, - exclude_narrator_in_library_dir: false, - search_interval: 0, - link_interval: 0, - import_interval: 0, - ignore_torrents: vec![], - audio_types: vec![], - ebook_types: vec![], - music_types: vec![], - radio_types: vec![], - search: crate::config::SearchConfig::default(), - audiobookshelf: None, - autograbs: vec![], - snatchlist: vec![], - goodreads_lists: vec![], - notion_lists: vec![], - tags: vec![], - qbittorrent: vec![], libraries: vec![Library::ByDownloadDir(LibraryByDownloadDir { download_dir: PathBuf::from("/downloads"), options: LibraryOptions { name: None, - library_dir: PathBuf::from("/library"), + library_dir: PathBuf::from("/lib"), method: LibraryLinkMethod::Hardlink, audio_types: None, ebook_types: None, }, - tag_filters: LibraryTagFilters { - allow_tags: vec![], - deny_tags: vec![], - }, + tag_filters: LibraryTagFilters::default(), })], + ..Default::default() }; let qbit_torrent = qbit::models::Torrent { @@ -932,29 +913,6 @@ mod tests { fn test_find_library_by_category() { let cfg = Config { mam_id: "m".to_string(), - web_host: "".to_string(), - web_port: 0, - min_ratio: 0.0, - unsat_buffer: 0, - wedge_buffer: 0, - add_torrents_stopped: false, - exclude_narrator_in_library_dir: false, - search_interval: 0, - link_interval: 0, - import_interval: 0, - ignore_torrents: vec![], - audio_types: vec![], - ebook_types: vec![], - music_types: vec![], - radio_types: vec![], - search: crate::config::SearchConfig::default(), - audiobookshelf: None, - autograbs: vec![], - snatchlist: vec![], - goodreads_lists: vec![], - notion_lists: vec![], - tags: vec![], - qbittorrent: vec![], libraries: vec![Library::ByCategory(LibraryByCategory { category: "audiobooks".to_string(), options: LibraryOptions { @@ -969,6 +927,7 @@ mod tests { deny_tags: vec![], }, })], + ..Default::default() }; let qbit_torrent = qbit::models::Torrent { @@ -988,29 +947,6 @@ mod tests { fn test_find_library_skips_rip_dir() { let cfg = Config { mam_id: "m".to_string(), - web_host: "".to_string(), - web_port: 0, - min_ratio: 0.0, - unsat_buffer: 0, - wedge_buffer: 0, - add_torrents_stopped: false, - exclude_narrator_in_library_dir: false, - search_interval: 0, - link_interval: 0, - import_interval: 0, - ignore_torrents: vec![], - audio_types: vec![], - ebook_types: vec![], - music_types: vec![], - radio_types: vec![], - search: crate::config::SearchConfig::default(), - audiobookshelf: None, - autograbs: vec![], - snatchlist: vec![], - goodreads_lists: vec![], - notion_lists: vec![], - tags: vec![], - qbittorrent: vec![], libraries: vec![Library::ByRipDir(crate::config::LibraryByRipDir { rip_dir: PathBuf::from("/rip"), options: LibraryOptions { @@ -1022,6 +958,7 @@ mod tests { }, filter: crate::config::EditionFilter::default(), })], + ..Default::default() }; let qbit_torrent = qbit::models::Torrent { @@ -1175,30 +1112,7 @@ mod tests { }; let cfg = Config { mam_id: "m".to_string(), - web_host: "".to_string(), - web_port: 0, - min_ratio: 0.0, - unsat_buffer: 0, - wedge_buffer: 0, - add_torrents_stopped: false, - exclude_narrator_in_library_dir: false, - search_interval: 0, - link_interval: 0, - import_interval: 0, - ignore_torrents: vec![], - audio_types: vec![], - ebook_types: vec![], - music_types: vec![], - radio_types: vec![], - search: crate::config::SearchConfig::default(), - audiobookshelf: None, - autograbs: vec![], - snatchlist: vec![], - goodreads_lists: vec![], - notion_lists: vec![], - tags: vec![], - qbittorrent: vec![], - libraries: vec![], + ..Default::default() }; let update = check_torrent_updates(&mut torrent, &qbit_torrent, None, &cfg, &[]); @@ -1485,30 +1399,7 @@ mod tests { fn mock_config() -> Config { Config { mam_id: "m".to_string(), - web_host: "".to_string(), - web_port: 0, - min_ratio: 0.0, - unsat_buffer: 0, - wedge_buffer: 0, - add_torrents_stopped: false, - exclude_narrator_in_library_dir: false, - search_interval: 0, - link_interval: 0, - import_interval: 0, - ignore_torrents: vec![], - audio_types: vec![], - ebook_types: vec![], - music_types: vec![], - radio_types: vec![], - search: crate::config::SearchConfig::default(), - audiobookshelf: None, - autograbs: vec![], - snatchlist: vec![], - goodreads_lists: vec![], - notion_lists: vec![], - tags: vec![], - qbittorrent: vec![], - libraries: vec![], + ..Default::default() } } diff --git a/server/src/main.rs b/server/src/main.rs index 75020420..667effe3 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -38,6 +38,7 @@ use mlm::{ config::Config, linker::{folder::link_folders_to_library, torrent::link_torrents_to_library}, lists::{get_lists, run_list_import}, + metadata::MetadataService, snatchlist::run_snatchlist_search, stats::{Context, Stats, Triggers}, torrent_downloader::grab_selected_torrents, @@ -209,6 +210,31 @@ async fn app_main() -> Result<()> { let stats = Stats::new(); + // Instantiate metadata service from config provider settings + let default_timeout = Duration::from_secs(5); + // Convert Config's ProviderConfig -> metadata::ProviderSetting + let provider_settings: Vec = config + .metadata_providers + .iter() + .map(|p| match p { + mlm::config::ProviderConfig::Hardcover(c) => { + mlm::metadata::ProviderSetting::Hardcover { + enabled: c.enabled, + timeout_secs: c.timeout_secs, + api_key: c.api_key.clone(), + } + } + mlm::config::ProviderConfig::RomanceIo(c) => { + mlm::metadata::ProviderSetting::RomanceIo { + enabled: c.enabled, + timeout_secs: c.timeout_secs, + } + } + }) + .collect(); + let metadata_service = MetadataService::from_settings(&provider_settings, default_timeout); + let metadata_service = Arc::new(metadata_service); + let (mut search_tx, mut search_rx) = (BTreeMap::new(), BTreeMap::new()); let (mut import_tx, mut import_rx) = (BTreeMap::new(), BTreeMap::new()); let (torrent_linker_tx, torrent_linker_rx) = watch::channel(()); @@ -704,6 +730,7 @@ async fn app_main() -> Result<()> { db, mam: Arc::new(mam), stats, + metadata: metadata_service, triggers, }; diff --git a/server/src/metadata/mam_meta.rs b/server/src/metadata/mam_meta.rs new file mode 100644 index 00000000..f7280417 --- /dev/null +++ b/server/src/metadata/mam_meta.rs @@ -0,0 +1,104 @@ +use crate::stats::Context; +use anyhow::Result; +use mlm_db::TorrentMeta; + +/// Match metadata for a given original `TorrentMeta` using the selected +/// provider id. This function does NOT persist changes to the database; it +/// performs the provider query and returns the new metadata and the list of +/// diffed fields so the caller can decide how to persist/apply them. +pub async fn match_meta( + ctx: &Context, + orig: &TorrentMeta, + provider_id: &str, +) -> Result<(TorrentMeta, String, Vec)> { + // Build a small query meta for providers to consume. Providers accept + // a TorrentMeta and may read any fields they need. + let mut query: TorrentMeta = Default::default(); + if let Some(isbn) = orig.ids.get(mlm_db::ids::ISBN) { + query + .ids + .insert(mlm_db::ids::ISBN.to_string(), isbn.clone()); + } + query.title = orig.title.clone(); + query.authors = orig.authors.clone(); + + // Delegate provider selection and request-timeout handling to the + // centralized MetadataService attached to the Context. This keeps + // provider configuration in one place and avoids duplicating instantiation + // logic here. + let fetched = ctx.metadata.fetch_provider(ctx, query, provider_id).await?; + + // Merge fetched metadata into original meta: only overwrite fields when + // the provider supplied non-empty / non-default values. This preserves + // DB-only fields (sizes, upload timestamps, internal IDs) when providers + // don't populate them. + let merged = merge_meta(orig, &fetched); + + let fields = orig.diff(&merged); + + Ok((merged, provider_id.to_string(), fields)) +} + +fn merge_meta(orig: &TorrentMeta, incoming: &TorrentMeta) -> TorrentMeta { + let mut out = orig.clone(); + + // ids: overlay incoming entries (non-empty) on top of existing ids + for (k, v) in &incoming.ids { + if !v.is_empty() { + out.ids.insert(k.clone(), v.clone()); + } + } + + if !incoming.title.is_empty() { + out.title = incoming.title.clone(); + } + if !incoming.description.is_empty() { + out.description = incoming.description.clone(); + } + + if !incoming.authors.is_empty() { + out.authors = incoming.authors.clone(); + } + if !incoming.narrators.is_empty() { + out.narrators = incoming.narrators.clone(); + } + if !incoming.series.is_empty() { + out.series = incoming.series.clone(); + } + + if !incoming.categories.is_empty() { + out.categories = incoming.categories.clone(); + } + if !incoming.tags.is_empty() { + out.tags = incoming.tags.clone(); + } + + // Simple scalar/option overlays + if incoming.main_cat.is_some() { + out.main_cat = incoming.main_cat; + } + if incoming.language.is_some() { + out.language = incoming.language; + } + if incoming.flags.is_some() { + out.flags = incoming.flags; + } + if !incoming.filetypes.is_empty() { + out.filetypes = incoming.filetypes.clone(); + } + if incoming.num_files != 0 { + out.num_files = incoming.num_files; + } + // size: only overwrite when provider returned a non-zero size + if incoming.size.bytes() > 0 { + out.size = incoming.size; + } + if incoming.edition.is_some() { + out.edition = incoming.edition.clone(); + } + + // Always set source to Match for provider-updated data + out.source = mlm_db::MetadataSource::Match; + + out +} diff --git a/server/src/metadata/mod.rs b/server/src/metadata/mod.rs new file mode 100644 index 00000000..351eb88f --- /dev/null +++ b/server/src/metadata/mod.rs @@ -0,0 +1,189 @@ +use crate::stats::Context; +use anyhow::Result; +use mlm_db::DatabaseExt as _; +use mlm_db::{Event, EventType, MetadataSource, TorrentMeta}; +use mlm_meta::providers::{Hardcover, RomanceIo}; +use mlm_meta::traits::Provider; +use std::sync::Arc; +use tokio::time::{Duration, timeout}; +use tracing::instrument; +pub mod mam_meta; + +pub struct MetadataService { + // Each provider can have its own request timeout + providers: Vec<(Arc, Duration)>, + #[allow(dead_code)] + default_timeout: Duration, +} + +/// Simple provider configuration used by the server. +pub enum ProviderSetting { + Hardcover { + enabled: bool, + timeout_secs: Option, + api_key: Option, + }, + RomanceIo { + enabled: bool, + timeout_secs: Option, + }, +} + +impl MetadataService { + pub fn new(providers: Vec<(Arc, Duration)>, default_timeout: Duration) -> Self { + Self { + providers, + default_timeout, + } + } + + /// Build a MetadataService from a list of ProviderSetting. + pub fn from_settings(settings: &[ProviderSetting], default_timeout: Duration) -> Self { + let mut providers: Vec<(Arc, Duration)> = Vec::new(); + for s in settings { + match s { + ProviderSetting::Hardcover { + enabled, + timeout_secs, + api_key, + } => { + if !enabled { + continue; + } + let to = timeout_secs + .map(Duration::from_secs) + .unwrap_or(default_timeout); + providers.push((Arc::new(Hardcover::new(api_key.clone())), to)); + } + ProviderSetting::RomanceIo { + enabled, + timeout_secs, + } => { + if !enabled { + continue; + } + let to = timeout_secs + .map(Duration::from_secs) + .unwrap_or(default_timeout); + providers.push((Arc::new(RomanceIo::new()), to)); + } + } + } + Self::new(providers, default_timeout) + } + + pub fn enabled_providers(&self) -> Vec { + self.providers + .iter() + .map(|(p, _)| p.id().to_string()) + .collect() + } + + #[instrument(skip(self, ctx))] + pub async fn fetch_and_persist( + &self, + ctx: &Context, + query: TorrentMeta, + ) -> Result { + // Query providers in parallel with timeout and pick first successful + let mut handles = vec![]; + for (p, to) in &self.providers { + let p = p.clone(); + let q = query.clone(); + let to = *to; + handles.push(tokio::spawn(async move { + let r = timeout(to, p.fetch(&q)).await; + match r { + Ok(Ok(m)) => Ok((p.id().to_string(), m)), + Ok(Err(e)) => Err(anyhow::anyhow!(e)), + Err(_) => Err(anyhow::anyhow!("timeout")), + } + })); + } + + let mut best: Option<(String, TorrentMeta)> = None; + + for h in handles { + match h.await { + Ok(Ok((id, meta))) => { + // pick first for now + best = Some((id, meta)); + break; + } + Ok(Err(e)) => { + tracing::debug!(error=?e, "provider task returned error"); + } + Err(join_err) => { + tracing::debug!(error=?join_err, "provider task panicked or was cancelled"); + } + } + } + + let (provider_id, meta): (String, TorrentMeta) = match best { + Some(v) => v, + None => return Err(anyhow::anyhow!("no provider matched")), + }; + + // Provider already returns a TorrentMeta; use it and mark source + let mut tmeta: TorrentMeta = meta; + tmeta.source = MetadataSource::Match; + + // Persist: write a SelectedTorrent or Torrent depending on context. + // Here we insert an Event to record metadata update and return the meta. + let ev = Event { + id: mlm_db::Uuid::new(), + torrent_id: None, + mam_id: None, + created_at: mlm_db::Timestamp::now(), + event: EventType::Updated { + fields: vec![], + source: (MetadataSource::Match, provider_id.clone()), + }, + }; + + // Insert event into DB using async rw transaction helper from mlm_db + let (guard, rw) = ctx.db.rw_async().await?; + rw.insert(ev)?; + rw.commit()?; + drop(guard); + + Ok(tmeta) + } + + /// Fetch using an explicit provider id. This looks up the provider in the + /// registered list and executes it with its configured timeout. Returns + /// the provider-provided TorrentMeta on success. + #[instrument(skip(self, _ctx))] + pub async fn fetch_provider( + &self, + _ctx: &Context, + query: TorrentMeta, + provider_id: &str, + ) -> Result { + // find provider + let mut found: Option<(Arc, Duration)> = None; + for (p, to) in &self.providers { + if p.id() == provider_id { + found = Some((p.clone(), *to)); + break; + } + } + + let (p, to) = match found { + Some(v) => v, + None => anyhow::bail!("unknown provider id: {}", provider_id), + }; + + // run with timeout + let r = timeout(to, p.fetch(&query)).await; + let meta = match r { + Ok(Ok(m)) => m, + Ok(Err(e)) => return Err(anyhow::anyhow!(e)), + Err(_) => return Err(anyhow::anyhow!("timeout")), + }; + + let mut tmeta: TorrentMeta = meta; + tmeta.source = MetadataSource::Match; + Ok(tmeta) + } +} diff --git a/server/src/stats.rs b/server/src/stats.rs index 3bda86bf..e22e7784 100644 --- a/server/src/stats.rs +++ b/server/src/stats.rs @@ -11,6 +11,7 @@ use tokio::sync::{ }; use crate::config::Config; +use crate::metadata::MetadataService; #[derive(Default)] pub struct StatsValues { @@ -55,6 +56,12 @@ impl Stats { } } +impl Default for Stats { + fn default() -> Self { + Self::new() + } +} + #[derive(Clone)] pub struct Events { pub event: (Sender>, Receiver>), @@ -76,6 +83,7 @@ pub struct Context { pub db: Arc>, pub mam: Arc>>>, pub stats: Stats, + pub metadata: Arc, // pub events: Events, pub triggers: Triggers, } diff --git a/server/src/web/pages/torrent.rs b/server/src/web/pages/torrent.rs index 7fb70985..f3bd9bd7 100644 --- a/server/src/web/pages/torrent.rs +++ b/server/src/web/pages/torrent.rs @@ -29,6 +29,7 @@ use serde::Deserialize; use time::UtcDateTime; use tokio_util::io::ReaderStream; +use crate::metadata::mam_meta::match_meta; use crate::{ audiobookshelf::{Abs, LibraryItemMinified}, cleaner::clean_torrent, @@ -45,6 +46,7 @@ use crate::{ time, }, }; +use mlm_db::MetadataSource; pub async fn torrent_file( State(context): State, @@ -173,8 +175,8 @@ async fn torrent_page_id( .db .r_transaction()? .scan() - .secondary::(EventKey::mam_id)?; - let events = events.range(Some(torrent.mam_id)..=Some(torrent.mam_id))?; + .secondary::(EventKey::torrent_id)?; + let events = events.range(Some(torrent.id.clone())..=Some(torrent.id.clone()))?; let mut events = events.collect::, _>>()?; events.sort_by(|a, b| b.created_at.cmp(&a.created_at)); @@ -266,6 +268,7 @@ async fn torrent_page_id( wanted_path, qbit_files, other_torrents, + metadata_providers: context.metadata.enabled_providers(), }; Ok::<_, AppError>(Html(template.to_string())) } @@ -345,6 +348,47 @@ pub async fn torrent_page_post_id( let mam = context.mam()?; refresh_metadata_relink(&config, &context.db, &mam, id).await?; } + "match-hardcover" | "match-romanceio" => { + // Build a query from existing torrent metadata + let Some(mut torrent) = context.db.r_transaction()?.get().primary::(id)? + else { + return Err(anyhow::Error::msg("Could not find torrent").into()); + }; + + let provider_id = if form.action == "match-hardcover" { + "hardcover" + } else { + "romanceio" + }; + + match match_meta(&context, &torrent.meta, provider_id).await { + Ok((new_meta, pid, fields)) => { + let ev = Event { + id: mlm_db::Uuid::new(), + torrent_id: Some(torrent.id.clone()), + mam_id: torrent.mam_id, + created_at: mlm_db::Timestamp::now(), + event: EventType::Updated { + fields: fields.clone(), + source: (MetadataSource::Match, pid.clone()), + }, + }; + + let (_guard, rw) = context.db.rw_async().await?; + // apply meta updates + let mut meta = new_meta; + meta.source = MetadataSource::Match; + torrent.meta = meta; + // update title_search to normalized title + torrent.title_search = mlm_parse::normalize_title(&torrent.meta.title); + + rw.upsert(torrent)?; + rw.insert(ev)?; + rw.commit()?; + } + Err(e) => tracing::error!("metadata match failed: {e}"), + } + } "remove" => { let (_guard, rw) = context.db.rw_async().await?; let Some(torrent) = rw.get().primary::(id)? else { @@ -460,6 +504,7 @@ struct TorrentPageTemplate { wanted_path: Option, qbit_files: Vec, other_torrents: MaMTorrentsTemplate, + metadata_providers: Vec, } impl TorrentPageTemplate { diff --git a/server/src/web/tables.rs b/server/src/web/tables.rs index a911b544..2cec7123 100644 --- a/server/src/web/tables.rs +++ b/server/src/web/tables.rs @@ -254,6 +254,7 @@ pub trait HidableColumns: Sortable { pub trait Size { fn style(&self) -> String; + #[allow(dead_code)] fn px(&self) -> u64; } impl Size for u64 { @@ -266,7 +267,7 @@ impl Size for u64 { } } -pub struct Flex(pub u64, pub u64); +pub struct Flex(pub u64, #[allow(dead_code)] pub u64); impl Size for Flex { fn style(&self) -> String { format!("{}fr", self.0) diff --git a/server/templates/pages/torrent.html b/server/templates/pages/torrent.html index 9fe08900..a7096c2e 100644 --- a/server/templates/pages/torrent.html +++ b/server/templates/pages/torrent.html @@ -76,6 +76,15 @@

Replaced with: {{ torrent.meta.title }}clean torrent +
+ {% for provider in metadata_providers %} + {% if provider == "hardcover" %} + + {% else if provider == "romanceio" %} + + {% endif %} + {% endfor %} +
{% endif %} {% if wanted_path != torrent.library_path %} {% if let Some(wanted_path) = wanted_path %} diff --git a/server/tests/cleaner_test.rs b/server/tests/cleaner_test.rs index ca47d127..769d343c 100644 --- a/server/tests/cleaner_test.rs +++ b/server/tests/cleaner_test.rs @@ -1,15 +1,18 @@ mod common; -use common::{TestDb, MockFs, mock_config, MockTorrentBuilder}; +use common::{MockFs, MockTorrentBuilder, TestDb, mock_config}; use mlm::cleaner::run_library_cleaner; +use mlm_db::{DatabaseExt, Torrent}; use std::sync::Arc; -use mlm_db::{Torrent, DatabaseExt}; #[tokio::test] async fn test_run_library_cleaner() -> anyhow::Result<()> { let test_db = TestDb::new()?; let mock_fs = MockFs::new()?; - let config = Arc::new(mock_config(mock_fs.rip_dir.clone(), mock_fs.library_dir.clone())); + let config = Arc::new(mock_config( + mock_fs.rip_dir.clone(), + mock_fs.library_dir.clone(), + )); // Create two versions of the same book let lib_path1 = mock_fs.library_dir.join("Author 1").join("Book 1 (v1)"); @@ -51,11 +54,20 @@ async fn test_run_library_cleaner() -> anyhow::Result<()> { // t1 should be replaced_with t2 assert!(t1_after.replaced_with.is_some(), "t1 should be replaced"); assert_eq!(t1_after.replaced_with.unwrap().0, "ID2"); - assert!(t1_after.library_path.is_none(), "t1 library path should be cleared"); + assert!( + t1_after.library_path.is_none(), + "t1 library path should be cleared" + ); // t2 should still be there - assert!(t2_after.replaced_with.is_none(), "t2 should not be replaced"); - assert!(t2_after.library_path.is_some(), "t2 library path should still be set"); + assert!( + t2_after.replaced_with.is_none(), + "t2 should not be replaced" + ); + assert!( + t2_after.library_path.is_some(), + "t2 library path should still be set" + ); // Files for t1 should be deleted assert!(!lib_path1.exists(), "t1 files should be deleted"); diff --git a/server/tests/common/mod.rs b/server/tests/common/mod.rs index ecfb4da3..3acf9462 100644 --- a/server/tests/common/mod.rs +++ b/server/tests/common/mod.rs @@ -1,8 +1,8 @@ use anyhow::Result; use mlm::config::{Config, Library, LibraryByRipDir, LibraryLinkMethod, LibraryOptions}; use mlm_db::{ - migrate, Database, MainCat, MediaType, MetadataSource, Size, Timestamp, Torrent, TorrentMeta, - MODELS, + Database, MODELS, MainCat, MediaType, MetadataSource, Size, Timestamp, Torrent, TorrentMeta, + migrate, }; use native_db::Builder; use std::path::PathBuf; @@ -112,6 +112,7 @@ impl MockTorrentBuilder { } } +#[allow(dead_code)] pub struct MockFs { #[allow(dead_code)] pub root: TempDir, @@ -120,6 +121,7 @@ pub struct MockFs { } impl MockFs { + #[allow(dead_code)] pub fn new() -> Result { let root = tempfile::tempdir()?; let rip_dir = root.path().join("rip"); @@ -259,29 +261,6 @@ impl MockFs { pub fn mock_config(rip_dir: PathBuf, library_dir: PathBuf) -> Config { Config { mam_id: "test".to_string(), - web_host: "127.0.0.1".to_string(), - web_port: 3157, - min_ratio: 2.0, - unsat_buffer: 10, - wedge_buffer: 0, - add_torrents_stopped: false, - exclude_narrator_in_library_dir: false, - search_interval: 30, - link_interval: 10, - import_interval: 135, - ignore_torrents: vec![], - audio_types: vec!["m4b".to_string(), "m4a".to_string()], - ebook_types: vec!["epub".to_string()], - music_types: vec!["mp3".to_string()], - radio_types: vec!["mp3".to_string()], - search: Default::default(), - audiobookshelf: None, - autograbs: vec![], - snatchlist: vec![], - goodreads_lists: vec![], - notion_lists: vec![], - tags: vec![], - qbittorrent: vec![], libraries: vec![Library::ByRipDir(LibraryByRipDir { rip_dir, options: LibraryOptions { @@ -293,5 +272,12 @@ pub fn mock_config(rip_dir: PathBuf, library_dir: PathBuf) -> Config { }, filter: Default::default(), })], + metadata_providers: vec![mlm::config::ProviderConfig::RomanceIo( + mlm::config::RomanceIoConfig { + enabled: true, + timeout_secs: None, + }, + )], + ..Default::default() } } diff --git a/server/tests/linker_torrent_test.rs b/server/tests/linker_torrent_test.rs index eb053760..a7b48d1e 100644 --- a/server/tests/linker_torrent_test.rs +++ b/server/tests/linker_torrent_test.rs @@ -2,12 +2,12 @@ mod common; use anyhow::Result; use common::{MockFs, TestDb, mock_config}; -use mlm_db::DatabaseExt as _; use mlm::config::{ Library, LibraryByDownloadDir, LibraryLinkMethod, LibraryOptions, LibraryTagFilters, QbitConfig, }; use mlm::linker::torrent::{MaMApi, link_torrents_to_library}; use mlm::qbittorrent::QbitApi; +use mlm_db::DatabaseExt as _; use mlm_mam::search::MaMTorrent; use qbit::models::{Torrent as QbitTorrent, TorrentContent, Tracker}; use qbit::parameters::TorrentListParams; @@ -56,28 +56,34 @@ impl MaMApi for MockMaM { } } -fn mock_meta(title: &str, author: &str) -> mlm_db::TorrentMeta { - mlm_db::TorrentMeta { - ids: BTreeMap::new(), - vip_status: None, - cat: None, - media_type: mlm_db::MediaType::Audiobook, - main_cat: None, - categories: vec![], - tags: vec![], - language: None, - flags: None, - filetypes: vec![], - num_files: 0, - size: mlm_db::Size::from_bytes(0), +#[allow(clippy::too_many_arguments)] +/// Helper to build a MaMTorrent with sensible defaults for tests. +fn make_mam_torrent( + id: u64, + title: &str, + mediatype: u8, + maincat: u8, + category: u64, + catname: &str, + language: u8, + lang_code: &str, + numfiles: u64, + filetype: &str, +) -> MaMTorrent { + MaMTorrent { + id, title: title.to_string(), - edition: None, - description: "".to_string(), - authors: vec![author.to_string()], - narrators: vec![], - series: vec![], - source: mlm_db::MetadataSource::Mam, - uploaded_at: mlm_db::Timestamp::now(), + added: "2024-01-01 12:00:00".to_string(), + size: format!("{} B", 100), + mediatype, + maincat, + catname: catname.to_string(), + category, + language, + lang_code: lang_code.to_string(), + numfiles, + filetype: filetype.to_string(), + ..Default::default() } } @@ -135,21 +141,18 @@ async fn test_link_torrent_audiobook() -> anyhow::Result<()> { }; // Setup mock MaM - let mut mam_torrent = MaMTorrent { - id: 1, - title: "Test Title".to_string(), - added: "2024-01-01 12:00:00".to_string(), - size: "100 B".to_string(), - mediatype: 1, // Audiobook - maincat: 1, // Fiction - catname: "General Fiction".to_string(), - category: 42, // General Fiction in AudiobookCategory - language: 1, // English - lang_code: "en".to_string(), - numfiles: 1, - filetype: "m4b".to_string(), - ..Default::default() - }; + let mut mam_torrent = make_mam_torrent( + 1, + "Test Title", + 1, + 1, + 42, + "General Fiction", + 1, + "en", + 1, + "m4b", + ); mam_torrent.author_info.insert(1, "Test Author".to_string()); let mock_mam = MockMaM { @@ -379,18 +382,21 @@ async fn test_link_torrent_ebook() -> anyhow::Result<()> { files: HashMap::from([(torrent_hash.to_string(), vec![qbit_content])]), }; - let mut mam_torrent = MaMTorrent { - id: 2, - title: "Ebook Title".to_string(), - added: "2024-01-02 12:00:00".to_string(), - size: "200 B".to_string(), - mediatype: 2, // Ebook - category: 46, - language: 1, - lang_code: "en".to_string(), - ..Default::default() - }; - mam_torrent.author_info.insert(2, "Ebook Author".to_string()); + let mut mam_torrent = make_mam_torrent( + 2, + "Ebook Title", + 2, + 2, + 64, + "General Fiction", + 1, + "en", + 1, + "epub", + ); + mam_torrent + .author_info + .insert(2, "Ebook Author".to_string()); let mock_mam = MockMaM { torrents: HashMap::from([(torrent_hash.to_string(), mam_torrent)]), @@ -595,18 +601,11 @@ async fn test_refresh_metadata_relink() -> anyhow::Result<()> { )]), }; - let mut mam_torrent = MaMTorrent { - id: 2, - title: "Title".to_string(), - added: "2024-01-01 12:00:00".to_string(), - size: "100 B".to_string(), - mediatype: 1, // Audiobook - category: 42, - language: 1, - lang_code: "en".to_string(), - ..Default::default() - }; - mam_torrent.author_info.insert(2, "Refreshed Author".to_string()); + let mut mam_torrent = + make_mam_torrent(2, "Title", 1, 1, 42, "General Fiction", 1, "en", 1, "m4b"); + mam_torrent + .author_info + .insert(2, "Refreshed Author".to_string()); let mock_mam = MockMaM { torrents: HashMap::from([(torrent_hash.to_string(), mam_torrent)]), diff --git a/server/tests/metadata_integration.rs b/server/tests/metadata_integration.rs new file mode 100644 index 00000000..809a5857 --- /dev/null +++ b/server/tests/metadata_integration.rs @@ -0,0 +1,181 @@ +mod common; + +use anyhow::Result; +use std::sync::Arc; +use std::time::Duration as StdDuration; + +use mlm_db::{Event, EventKey, EventType, TorrentMeta as MetadataQuery}; + +use async_trait::async_trait; +use common::{TestDb, mock_config}; +use mlm::metadata::MetadataService; +use mlm::stats::Context; +use url::Url; + +// Simple mock fetcher that returns inline mock data for tests. +struct MockFetcher; + +#[async_trait] +impl mlm_meta::http::HttpClient for MockFetcher { + async fn get(&self, url: &str) -> anyhow::Result { + let u = Url::parse(url).map_err(|e| anyhow::anyhow!(e))?; + if !u.host_str().is_some_and(|h| h.contains("romance.io")) { + return Err(anyhow::anyhow!("unexpected host in test fetch")); + } + if u.path().starts_with("/json/search_books") { + return Ok(r#"{ + "success": true, + "books": [ + { + "_id":"68b95a390bc0cee156edaf2b", + "info":{"title":"Of Ink and Alchemy"}, + "authors":[{"name":"Sloane St. James"}], + "url":"/books/68b95a390bc0cee156edaf2b/of-ink-and-alchemy-sloane-st-james" + } + ] +}"# + .to_string()); + } + if u.path().starts_with("/json/search_authors") { + return Ok(r#"{ "success": true, "authors": [] }"#.to_string()); + } + if u.path().starts_with("/search") { + return Ok("search".to_string()); + } + + Ok(r#" + + + +
+ +"# + .to_string()) + } + + async fn post( + &self, + _url: &str, + _body: Option<&str>, + _headers: &[(&str, &str)], + ) -> anyhow::Result { + Err(anyhow::anyhow!("post not implemented in mock fetcher")) + } +} + +#[tokio::test] +async fn test_metadata_fetch_and_persist_romanceio() -> Result<()> { + let test_db = TestDb::new()?; + + // minimal config/context + let temp = tempfile::tempdir()?; + let rip = temp.path().join("rip"); + let lib = temp.path().join("library"); + std::fs::create_dir_all(&rip)?; + std::fs::create_dir_all(&lib)?; + let cfg = mock_config(rip, lib); + + let _default_timeout = StdDuration::from_secs(5); + let providers = cfg.metadata_providers.clone(); + // convert provider config to server metadata provider settings + let provider_settings: Vec = providers + .iter() + .map(|p| match p { + mlm::config::ProviderConfig::Hardcover(c) => { + mlm::metadata::ProviderSetting::Hardcover { + enabled: c.enabled, + timeout_secs: c.timeout_secs, + api_key: c.api_key.clone(), + } + } + mlm::config::ProviderConfig::RomanceIo(c) => { + mlm::metadata::ProviderSetting::RomanceIo { + enabled: c.enabled, + timeout_secs: c.timeout_secs, + } + } + }) + .collect(); + let metadata = + MetadataService::from_settings(&provider_settings, std::time::Duration::from_secs(5)); + let metadata = Arc::new(metadata); + + let ctx = Context { + config: Arc::new(tokio::sync::Mutex::new(Arc::new(cfg))), + db: test_db.db.clone(), + mam: Arc::new(Err(anyhow::anyhow!("no mam"))), + stats: mlm::stats::Stats::new(), + metadata: metadata.clone(), + triggers: mlm::stats::Triggers { + search_tx: std::collections::BTreeMap::new(), + import_tx: std::collections::BTreeMap::new(), + torrent_linker_tx: tokio::sync::watch::channel(()).0, + folder_linker_tx: tokio::sync::watch::channel(()).0, + downloader_tx: tokio::sync::watch::channel(()).0, + audiobookshelf_tx: tokio::sync::watch::channel(()).0, + }, + }; + + // Use a title known to the plan/romanceio mock. Inject the test fetcher + // implementation into the RomanceIo provider so we don't make network + // requests during tests. + // Replace the RomanceIo provider in the metadata service with one that + // uses the MockFetcher. + let mock_fetcher = std::sync::Arc::new(MockFetcher); + // Rebuild a metadata service with a RomanceIo using the mock fetcher. + let rom = mlm_meta::providers::RomanceIo::with_client(mock_fetcher.clone()); + let svc = mlm::metadata::MetadataService::new( + vec![(std::sync::Arc::new(rom), std::time::Duration::from_secs(5))], + std::time::Duration::from_secs(5), + ); + let metadata = Arc::new(svc); + + let ctx = Context { + metadata: metadata.clone(), + ..ctx + }; + + // Use a title known to the plan/romanceio mock + let mut q: MetadataQuery = Default::default(); + q.title = "Of Ink and Alchemy".to_string(); + let meta = metadata.fetch_and_persist(&ctx, q).await?; + + // Expect meta to contain some categories/tags + assert!( + meta.title.to_lowercase().contains("ink") + || !meta.categories.is_empty() + || !meta.tags.is_empty() + ); + + // Ensure an Event::Updated was inserted + let r = test_db.db.r_transaction()?; + let events = r.scan().secondary::(EventKey::created_at)?; + let events = events.all()?; + let mut found = false; + for ev in events { + let ev = ev?; + if let EventType::Updated { source, .. } = ev.event + && source.0 == mlm_db::MetadataSource::Match + && source.1 == "romanceio" + { + found = true; + break; + } + } + assert!(found, "Expected Event::Updated from romanceio provider"); + + Ok(()) +} From 15f667d9dc4de04e3ed86f4b02361c6ffd4aad5c Mon Sep 17 00:00:00 2001 From: Stirling Mouse <181794392+StirlingMouse@users.noreply.github.com> Date: Sat, 14 Feb 2026 15:11:08 +0100 Subject: [PATCH 2/4] Add openlibrary provider --- mlm_meta/src/helpers.rs | 14 +-- mlm_meta/src/http.rs | 48 +++++++- mlm_meta/src/providers/mod.rs | 2 + mlm_meta/src/providers/openlibrary.rs | 162 ++++++++++++++++++++++++++ mlm_meta/src/providers/romanceio.rs | 20 +++- mlm_meta/tests/mock_openlibrary.rs | 55 +++++++++ mlm_meta/tests/openlibrary_tests.rs | 118 +++++++++++++++++++ server/src/config.rs | 13 +++ server/src/main.rs | 6 + server/src/metadata/mod.rs | 18 ++- server/src/web/pages/torrent.rs | 39 ++++--- server/templates/pages/torrent.html | 17 ++- server/tests/metadata_integration.rs | 6 + 13 files changed, 483 insertions(+), 35 deletions(-) create mode 100644 mlm_meta/src/providers/openlibrary.rs create mode 100644 mlm_meta/tests/mock_openlibrary.rs create mode 100644 mlm_meta/tests/openlibrary_tests.rs diff --git a/mlm_meta/src/helpers.rs b/mlm_meta/src/helpers.rs index 0152f7c4..fa94b59e 100644 --- a/mlm_meta/src/helpers.rs +++ b/mlm_meta/src/helpers.rs @@ -71,14 +71,12 @@ pub fn score_candidate( let q_title_norm = q_title.as_ref().map(|t| normalize_title(t)); let mut title_score = 0.0f64; - let mut title_exact = false; if let Some(qt_norm) = q_title_norm.as_ref() && let Some(ct) = cand_title { let cand = normalize_title(ct); if cand == *qt_norm { title_score = 1.0; - title_exact = true; } else if cand.contains(qt_norm.as_str()) || qt_norm.contains(cand.as_str()) { title_score = 0.9; } else { @@ -87,7 +85,6 @@ pub fn score_candidate( } let mut author_score = 0.0f64; - let mut authors_match = false; if !q_auths.is_empty() { let q_auths_norm = normalize_authors(q_auths); let mut best = 0.0f64; @@ -98,7 +95,6 @@ pub fn score_candidate( for qa in &q_auths_norm { if n.contains(qa) || qa.contains(&n) { best = best.max(1.0); - authors_match = true; } else { best = best.max(token_similarity(&n, qa)); } @@ -107,10 +103,12 @@ pub fn score_candidate( author_score = best; } - // Penalize heavily if title is not exact AND no author match - // This prevents "Not the Boss of the Year" from matching "Boss of the Year" - // when authors don't match - if !title_exact && !authors_match && q_title_norm.is_some() && !q_auths.is_empty() { + // Require minimum author match score when query has authors. + // This prevents false positives from exact title matches with wrong authors + // (e.g., "Boss of the Year" by Nicole French matching "Boss of the Year" by T. Funny) + // and prevents loose title matches (e.g., "Book Title" matching "Book Title: A Novel") + // when the author doesn't match at all. + if !q_auths.is_empty() && author_score < 0.5 { return 0.0; } diff --git a/mlm_meta/src/http.rs b/mlm_meta/src/http.rs index c511e512..7d03177d 100644 --- a/mlm_meta/src/http.rs +++ b/mlm_meta/src/http.rs @@ -16,8 +16,54 @@ pub struct ReqwestClient { impl ReqwestClient { pub fn new() -> Self { + use reqwest::header::{ + ACCEPT, ACCEPT_LANGUAGE, CONNECTION, HeaderMap, HeaderName, HeaderValue, + }; + + let mut headers = HeaderMap::new(); + headers.insert( + ACCEPT, + HeaderValue::from_static( + "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + ), + ); + headers.insert( + ACCEPT_LANGUAGE, + HeaderValue::from_static("en,en-US;q=0.9,en-GB;q=0.8,sv;q=0.7"), + ); + headers.insert(CONNECTION, HeaderValue::from_static("keep-alive")); + headers.insert( + HeaderName::from_static("dnt"), + HeaderValue::from_static("1"), + ); + headers.insert( + HeaderName::from_static("priority"), + HeaderValue::from_static("u=0, i"), + ); + headers.insert( + HeaderName::from_static("sec-fetch-dest"), + HeaderValue::from_static("document"), + ); + headers.insert( + HeaderName::from_static("sec-fetch-mode"), + HeaderValue::from_static("navigate"), + ); + headers.insert( + HeaderName::from_static("sec-fetch-site"), + HeaderValue::from_static("none"), + ); + headers.insert( + HeaderName::from_static("sec-fetch-user"), + HeaderValue::from_static("?1"), + ); + Self { - client: Client::new(), + client: Client::builder() + .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36") + .default_headers(headers) + .gzip(true) + .build() + .unwrap() } } } diff --git a/mlm_meta/src/providers/mod.rs b/mlm_meta/src/providers/mod.rs index ffd646a0..fe582942 100644 --- a/mlm_meta/src/providers/mod.rs +++ b/mlm_meta/src/providers/mod.rs @@ -1,9 +1,11 @@ pub mod fake; pub mod hardcover; +pub mod openlibrary; pub mod romanceio; pub use fake::FakeProvider; pub use hardcover::Hardcover; +pub use openlibrary::OpenLibrary; pub use romanceio::RomanceIo; use crate::helpers::SearchQuery; diff --git a/mlm_meta/src/providers/openlibrary.rs b/mlm_meta/src/providers/openlibrary.rs new file mode 100644 index 00000000..290c1fb2 --- /dev/null +++ b/mlm_meta/src/providers/openlibrary.rs @@ -0,0 +1,162 @@ +use std::sync::Arc; + +use anyhow::{Context, Result}; +use async_trait::async_trait; +use tracing::{debug, instrument}; +use url::Url; + +use crate::http::ReqwestClient; +use crate::providers::{MetadataProvider, search_with_fallback}; +use crate::traits::Provider; +use crate::{helpers, http::HttpClient}; +use mlm_db::TorrentMeta; + +pub struct OpenLibrary { + pub client: Arc, +} + +impl OpenLibrary { + pub fn new() -> Self { + Self { + client: Arc::new(ReqwestClient::new()), + } + } + + pub fn with_client(client: Arc) -> Self { + Self { client } + } + + #[instrument(skip_all, fields(url = %url))] + async fn fetch_json(&self, url: &str) -> Result { + debug!("fetching Open Library JSON"); + self.client.get(url).await + } +} + +impl Default for OpenLibrary { + fn default() -> Self { + Self::new() + } +} + +impl MetadataProvider for OpenLibrary { + type SearchResult = serde_json::Value; + + fn id(&self) -> &str { + "openlibrary" + } + + async fn search(&self, query: &helpers::SearchQuery) -> Result> { + let base = Url::parse("https://openlibrary.org").unwrap(); + let qstr = query.to_combined_string(); + + let mut search_url = base.join("/search.json").unwrap(); + if !qstr.is_empty() { + search_url.query_pairs_mut().append_pair("q", &qstr); + } + + let url = search_url.to_string(); + debug!(query = %qstr, url = %url, "searching Open Library"); + + let body = self.fetch_json(&url).await.context("fetch search json")?; + let v: serde_json::Value = serde_json::from_str(&body).context("parse search json")?; + + let docs = v.get("docs").and_then(|d| d.as_array()).cloned(); + debug!( + count = docs.as_ref().map(|a| a.len()).unwrap_or(0), + "Open Library search results" + ); + Ok(docs.unwrap_or_default()) + } + + fn result_title<'a>(&self, result: &'a Self::SearchResult) -> Option<&'a str> { + result.get("title").and_then(|t| t.as_str()) + } + + fn result_authors(&self, result: &Self::SearchResult) -> Vec { + result + .get("author_name") + .and_then(|a| a.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|a| a.as_str().map(|s| s.to_string())) + .collect() + }) + .unwrap_or_default() + } + + async fn result_to_meta(&self, result: &Self::SearchResult) -> Result { + let title = result + .get("title") + .and_then(|t| t.as_str()) + .unwrap_or("") + .to_string(); + + let authors = self.result_authors(result); + + let first_publish_year = result + .get("first_publish_year") + .and_then(|y| y.as_i64()) + .map(|y| y.to_string()); + + let edition_count = result + .get("edition_count") + .and_then(|e| e.as_i64()) + .map(|e| e as u32); + + let subjects: Vec = result + .get("subject") + .and_then(|s| s.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|s| s.as_str()) + .filter(|s| s.len() > 2 && s.len() < 50) + .take(20) + .map(|s| s.to_lowercase()) + .collect() + }) + .unwrap_or_default(); + + let mut tm = TorrentMeta { + title: title.clone(), + description: String::new(), + authors: authors.clone(), + ..Default::default() + }; + + if let Some(year) = first_publish_year { + tm.description + .push_str(&format!("First published: {}\n", year)); + } + if let Some(count) = edition_count { + tm.description.push_str(&format!("{} editions\n", count)); + } + + tm.tags = subjects; + + if let Some(isbns) = result.get("isbn").and_then(|i| i.as_array()) { + for isbn in isbns.iter().take(3) { + if let Some(isbn_str) = isbn.as_str() { + tm.ids + .insert(mlm_db::ids::ISBN.to_string(), isbn_str.to_string()); + break; + } + } + } + + debug!(title = %tm.title, authors = ?tm.authors, tags_count = tm.tags.len(), "returning Open Library metadata"); + Ok(tm) + } +} + +#[async_trait] +impl Provider for OpenLibrary { + fn id(&self) -> &str { + MetadataProvider::id(self) + } + + async fn fetch(&self, query: &TorrentMeta) -> Result { + let (meta, _score) = search_with_fallback(self, &query.title, &query.authors).await?; + Ok(meta) + } +} diff --git a/mlm_meta/src/providers/romanceio.rs b/mlm_meta/src/providers/romanceio.rs index 509a1f26..8e3f1aa9 100644 --- a/mlm_meta/src/providers/romanceio.rs +++ b/mlm_meta/src/providers/romanceio.rs @@ -139,7 +139,25 @@ impl MetadataProvider for RomanceIo { .fetch_html(json_url.as_str()) .await .context("fetch search json")?; - let v: serde_json::Value = serde_json::from_str(&body).context("parse search json")?; + + let v: serde_json::Value = match serde_json::from_str(&body) { + Ok(v) => v, + Err(e) => { + let preview = if body.len() > 50000 { + format!("{}...", &body[..50000]) + } else { + body.clone() + }; + tracing::warn!( + url = %json_url, + response_preview = %preview, + "failed to parse romance.io search response: {}", + e + ); + return Err(anyhow::anyhow!("parse search json: {}", e)) + .context("parse search json"); + } + }; let books = v.get("books").and_then(|b| b.as_array()).cloned(); debug!( diff --git a/mlm_meta/tests/mock_openlibrary.rs b/mlm_meta/tests/mock_openlibrary.rs new file mode 100644 index 00000000..2067fc20 --- /dev/null +++ b/mlm_meta/tests/mock_openlibrary.rs @@ -0,0 +1,55 @@ +use anyhow::Result; +use mlm_meta::http::HttpClient; +use std::sync::Arc; + +fn resolve_plan_file(rel: &str) -> std::io::Result { + let mut dir = std::env::current_dir()?; + loop { + let candidate = dir.join(rel); + if candidate.exists() { + return Ok(candidate); + } + if !dir.pop() { + break; + } + } + Err(std::io::Error::new( + std::io::ErrorKind::NotFound, + format!("could not find {}", rel), + )) +} + +pub struct MockOpenLibraryClient; + +#[async_trait::async_trait] +impl HttpClient for MockOpenLibraryClient { + async fn get(&self, url: &str) -> Result { + let u = url::Url::parse(url).map_err(|e| anyhow::anyhow!(e))?; + let rel = if u.host_str().is_some_and(|h| h.contains("openlibrary.org")) { + if u.path().starts_with("/search.json") { + "plan/openlibrary/search.json" + } else { + return Err(anyhow::anyhow!("unexpected path: {}", u.path())); + } + } else { + return Err(anyhow::anyhow!("unexpected host in test fetch")); + }; + + let p = resolve_plan_file(rel).map_err(|e| anyhow::anyhow!(e))?; + let s = std::fs::read_to_string(p).map_err(|e| anyhow::anyhow!(e))?; + Ok(s) + } + + async fn post( + &self, + _url: &str, + _body: Option<&str>, + _headers: &[(&str, &str)], + ) -> Result { + Err(anyhow::anyhow!("post not implemented in mock")) + } +} + +pub fn boxed() -> Arc { + Arc::new(MockOpenLibraryClient) +} diff --git a/mlm_meta/tests/openlibrary_tests.rs b/mlm_meta/tests/openlibrary_tests.rs new file mode 100644 index 00000000..db05a6fe --- /dev/null +++ b/mlm_meta/tests/openlibrary_tests.rs @@ -0,0 +1,118 @@ +use mlm_db::TorrentMeta; +use mlm_meta::Provider; +use mlm_meta::http::HttpClient; +use mlm_meta::providers::OpenLibrary; + +mod mock_openlibrary; + +#[tokio::test] +async fn openlibrary_parses_search_results() { + let prov = OpenLibrary::with_client(mock_openlibrary::boxed()); + let query_meta = TorrentMeta { + title: "The Lord of the Rings".to_string(), + ..Default::default() + }; + let m = prov + .fetch(&query_meta) + .await + .expect("should fetch metadata"); + assert!(m.title.contains("Lord of the Rings")); + assert!(!m.authors.is_empty()); +} + +#[tokio::test] +async fn openlibrary_matches_title_and_author() { + let prov = OpenLibrary::with_client(mock_openlibrary::boxed()); + let query_meta = TorrentMeta { + title: "The Lord of the Rings".to_string(), + authors: vec!["J.R.R. Tolkien".to_string()], + ..Default::default() + }; + let m = prov + .fetch(&query_meta) + .await + .expect("should match title+author"); + assert!(m.title.to_lowercase().contains("lord of the rings")); + assert!( + m.authors + .iter() + .any(|a| a.to_lowercase().contains("tolkien")) + ); +} + +#[tokio::test] +async fn openlibrary_extracts_isbn() { + let prov = OpenLibrary::with_client(mock_openlibrary::boxed()); + let query_meta = TorrentMeta { + title: "The Lord of the Rings".to_string(), + ..Default::default() + }; + let m = prov + .fetch(&query_meta) + .await + .expect("should fetch metadata"); + assert!( + m.ids.values().any(|v| v.starts_with("978")), + "should have ISBN" + ); +} + +#[tokio::test] +async fn openlibrary_extracts_subjects_as_tags() { + let prov = OpenLibrary::with_client(mock_openlibrary::boxed()); + let query_meta = TorrentMeta { + title: "The Lord of the Rings".to_string(), + ..Default::default() + }; + let m = prov + .fetch(&query_meta) + .await + .expect("should fetch metadata"); + assert!(!m.tags.is_empty(), "should have subject tags"); +} + +#[tokio::test] +async fn openlibrary_title_only_search() { + let prov = OpenLibrary::with_client(mock_openlibrary::boxed()); + let query_meta = TorrentMeta { + title: "The Lord of the Rings".to_string(), + authors: vec![], + ..Default::default() + }; + let m = prov + .fetch(&query_meta) + .await + .expect("should find result with title only"); + assert!(m.title.to_lowercase().contains("lord of the rings")); +} + +#[tokio::test] +async fn openlibrary_no_results() { + use std::sync::Arc; + + struct EmptyClient; + + #[async_trait::async_trait] + impl HttpClient for EmptyClient { + async fn get(&self, _url: &str) -> anyhow::Result { + Ok(r#"{"numFound": 0, "docs": []}"#.to_string()) + } + + async fn post( + &self, + _url: &str, + _body: Option<&str>, + _headers: &[(&str, &str)], + ) -> anyhow::Result { + anyhow::bail!("post not implemented") + } + } + + let prov = OpenLibrary::with_client(Arc::new(EmptyClient)); + let query_meta = TorrentMeta { + title: "Nonexistent Title XYZ123".to_string(), + ..Default::default() + }; + let res = prov.fetch(&query_meta).await; + assert!(res.is_err(), "expected no results for nonexistent title"); +} diff --git a/server/src/config.rs b/server/src/config.rs index 33dffcc5..a40b85bb 100644 --- a/server/src/config.rs +++ b/server/src/config.rs @@ -16,6 +16,7 @@ use time::Date; pub enum ProviderConfig { Hardcover(HardcoverConfig), RomanceIo(RomanceIoConfig), + OpenLibrary(OpenLibraryConfig), } #[derive(Clone, Debug, Deserialize)] @@ -37,11 +38,21 @@ pub struct RomanceIoConfig { pub timeout_secs: Option, } +#[derive(Clone, Debug, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct OpenLibraryConfig { + #[serde(default = "default_provider_enabled")] + pub enabled: bool, + #[serde(default)] + pub timeout_secs: Option, +} + impl ProviderConfig { pub fn id(&self) -> &str { match self { ProviderConfig::Hardcover(_) => "hardcover", ProviderConfig::RomanceIo(_) => "romanceio", + ProviderConfig::OpenLibrary(_) => "openlibrary", } } @@ -49,6 +60,7 @@ impl ProviderConfig { match self { ProviderConfig::Hardcover(c) => c.enabled, ProviderConfig::RomanceIo(c) => c.enabled, + ProviderConfig::OpenLibrary(c) => c.enabled, } } @@ -56,6 +68,7 @@ impl ProviderConfig { match self { ProviderConfig::Hardcover(c) => c.timeout_secs, ProviderConfig::RomanceIo(c) => c.timeout_secs, + ProviderConfig::OpenLibrary(c) => c.timeout_secs, } } } diff --git a/server/src/main.rs b/server/src/main.rs index 667effe3..6034e56d 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -230,6 +230,12 @@ async fn app_main() -> Result<()> { timeout_secs: c.timeout_secs, } } + mlm::config::ProviderConfig::OpenLibrary(c) => { + mlm::metadata::ProviderSetting::OpenLibrary { + enabled: c.enabled, + timeout_secs: c.timeout_secs, + } + } }) .collect(); let metadata_service = MetadataService::from_settings(&provider_settings, default_timeout); diff --git a/server/src/metadata/mod.rs b/server/src/metadata/mod.rs index 351eb88f..1e56db25 100644 --- a/server/src/metadata/mod.rs +++ b/server/src/metadata/mod.rs @@ -2,7 +2,7 @@ use crate::stats::Context; use anyhow::Result; use mlm_db::DatabaseExt as _; use mlm_db::{Event, EventType, MetadataSource, TorrentMeta}; -use mlm_meta::providers::{Hardcover, RomanceIo}; +use mlm_meta::providers::{Hardcover, OpenLibrary, RomanceIo}; use mlm_meta::traits::Provider; use std::sync::Arc; use tokio::time::{Duration, timeout}; @@ -27,6 +27,10 @@ pub enum ProviderSetting { enabled: bool, timeout_secs: Option, }, + OpenLibrary { + enabled: bool, + timeout_secs: Option, + }, } impl MetadataService { @@ -67,6 +71,18 @@ impl MetadataService { .unwrap_or(default_timeout); providers.push((Arc::new(RomanceIo::new()), to)); } + ProviderSetting::OpenLibrary { + enabled, + timeout_secs, + } => { + if !enabled { + continue; + } + let to = timeout_secs + .map(Duration::from_secs) + .unwrap_or(default_timeout); + providers.push((Arc::new(OpenLibrary::new()), to)); + } } } Self::new(providers, default_timeout) diff --git a/server/src/web/pages/torrent.rs b/server/src/web/pages/torrent.rs index f3bd9bd7..93147bb3 100644 --- a/server/src/web/pages/torrent.rs +++ b/server/src/web/pages/torrent.rs @@ -186,19 +186,24 @@ async fn torrent_page_id( } else { None }; - let mam_meta = mam_torrent.as_ref().map(|t| t.as_meta()).transpose()?; + let mut mam_meta = mam_torrent.as_ref().map(|t| t.as_meta()).transpose()?; - if let Some(mam_meta) = &mam_meta - && torrent + if let Some(mam_meta) = &mut mam_meta { + let mut ids = torrent.meta.ids.clone(); + ids.append(&mut mam_meta.ids); // MaM adds its IDs + mam_meta.ids = ids; + + if torrent .meta .uploaded_at .as_ref() .is_none_or(|t| t.0 == UtcDateTime::UNIX_EPOCH) - { - let (_guard, rw) = context.db.rw_async().await?; - torrent.meta.uploaded_at = mam_meta.uploaded_at; - rw.upsert(torrent.clone())?; - rw.commit()?; + { + let (_guard, rw) = context.db.rw_async().await?; + torrent.meta.uploaded_at = mam_meta.uploaded_at; + rw.upsert(torrent.clone())?; + rw.commit()?; + } } let mut qbit_data = None; @@ -348,17 +353,19 @@ pub async fn torrent_page_post_id( let mam = context.mam()?; refresh_metadata_relink(&config, &context.db, &mam, id).await?; } - "match-hardcover" | "match-romanceio" => { + "match" => { // Build a query from existing torrent metadata let Some(mut torrent) = context.db.r_transaction()?.get().primary::(id)? else { return Err(anyhow::Error::msg("Could not find torrent").into()); }; - let provider_id = if form.action == "match-hardcover" { - "hardcover" - } else { - "romanceio" + let provider_id = match &form.provider { + Some(p) => p.as_str(), + None => { + tracing::error!("metadata match failed: no provider selected"); + return Err(anyhow::Error::msg("no provider selected").into()); + } }; match match_meta(&context, &torrent.meta, provider_id).await { @@ -386,7 +393,9 @@ pub async fn torrent_page_post_id( rw.insert(ev)?; rw.commit()?; } - Err(e) => tracing::error!("metadata match failed: {e}"), + Err(e) => { + tracing::error!("metadata match failed for provider {}: {}", provider_id, e) + } } } "remove" => { @@ -484,6 +493,8 @@ pub async fn torrent_page_post_id( pub struct TorrentPageForm { action: String, #[serde(default)] + provider: Option, + #[serde(default)] category: String, #[serde(default)] tags: Vec, diff --git a/server/templates/pages/torrent.html b/server/templates/pages/torrent.html index a7096c2e..1d15fa92 100644 --- a/server/templates/pages/torrent.html +++ b/server/templates/pages/torrent.html @@ -30,7 +30,7 @@

Replaced with: {{ torrent.meta.title }} {% endif %}
-
+
@@ -76,15 +82,6 @@

Replaced with: {{ torrent.meta.title }}clean torrent

-
- {% for provider in metadata_providers %} - {% if provider == "hardcover" %} - - {% else if provider == "romanceio" %} - - {% endif %} - {% endfor %} -
{% endif %} {% if wanted_path != torrent.library_path %} {% if let Some(wanted_path) = wanted_path %} diff --git a/server/tests/metadata_integration.rs b/server/tests/metadata_integration.rs index 809a5857..a50b69fc 100644 --- a/server/tests/metadata_integration.rs +++ b/server/tests/metadata_integration.rs @@ -107,6 +107,12 @@ async fn test_metadata_fetch_and_persist_romanceio() -> Result<()> { timeout_secs: c.timeout_secs, } } + mlm::config::ProviderConfig::OpenLibrary(c) => { + mlm::metadata::ProviderSetting::OpenLibrary { + enabled: c.enabled, + timeout_secs: c.timeout_secs, + } + } }) .collect(); let metadata = From 90ee38968337b705b20f9d03a60d286dfd4e3330 Mon Sep 17 00:00:00 2001 From: Stirling Mouse <181794392+StirlingMouse@users.noreply.github.com> Date: Mon, 2 Mar 2026 15:07:45 +0100 Subject: [PATCH 3/4] Rust tls normalization --- mlm_mam/Cargo.toml | 6 ++++-- mlm_meta/Cargo.toml | 5 ++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/mlm_mam/Cargo.toml b/mlm_mam/Cargo.toml index e336b77b..d0a7e033 100644 --- a/mlm_mam/Cargo.toml +++ b/mlm_mam/Cargo.toml @@ -13,8 +13,10 @@ mlm_parse = { path = "../mlm_parse" } native_db = { git = "https://github.com/StirlingMouse/native_db.git", branch = "0.8.x" } native_model = "0.4.20" once_cell = "1.21.3" -openssl = { version = "0.10.73", features = ["vendored"] } -reqwest = { version = "0.12.20", features = ["json"] } +reqwest = { version = "0.12.20", default-features = false, features = [ + "json", + "rustls-tls", +] } reqwest_cookie_store = "0.8.0" serde = "1.0.136" serde_derive = "1.0.136" diff --git a/mlm_meta/Cargo.toml b/mlm_meta/Cargo.toml index 7f72f54a..bc6d4c0f 100644 --- a/mlm_meta/Cargo.toml +++ b/mlm_meta/Cargo.toml @@ -6,10 +6,9 @@ edition = "2024" [dependencies] anyhow = "1.0" async-trait = "0.1" -openssl = { version = "0.10.73", features = ["vendored"] } serde = { version = "1.0", features = ["derive"] } -reqwest = { version = "0.11", features = ["json", "gzip", "rustls-tls"] } -tokio = { version = "1", features = ["rt-multi-thread", "macros"] } +reqwest = { version = "0.12.20", default-features = false, features = ["json", "rustls-tls", "gzip"] } +tokio = { version = "1", features = ["rt-multi-thread", "sync", "macros"] } serde_json = "1.0" scraper = "0.14" mlm_db = { path = "../mlm_db" } From ac1957f35b5da3905ba7561d713c14005c13347e Mon Sep 17 00:00:00 2001 From: Stirling Mouse <181794392+StirlingMouse@users.noreply.github.com> Date: Wed, 11 Mar 2026 23:13:00 +0100 Subject: [PATCH 4/4] New categories for metadata providers --- Cargo.lock | 430 +++++++----------- mlm_meta/src/lib.rs | 2 + mlm_meta/src/providers/hardcover.rs | 672 ++++++++++++++++++++++++---- mlm_meta/src/providers/romanceio.rs | 24 +- mlm_meta/src/tag_category_map.rs | 494 ++++++++++++++++++++ mlm_meta/tests/hardcover_tests.rs | 218 +++++++-- mlm_meta/tests/mock_fetcher.rs | 1 + mlm_meta/tests/romanceio_tests.rs | 10 +- mlm_meta/tests/scoring_tests.rs | 1 - 9 files changed, 1444 insertions(+), 408 deletions(-) create mode 100644 mlm_meta/src/tag_category_map.rs diff --git a/Cargo.lock b/Cargo.lock index c2662486..474425f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -335,7 +335,7 @@ dependencies = [ "serde_json", "serde_path_to_error", "serde_urlencoded", - "sync_wrapper 1.0.2", + "sync_wrapper", "tokio", "tower", "tower-layer", @@ -357,7 +357,7 @@ dependencies = [ "mime", "pin-project-lite", "rustversion", - "sync_wrapper 1.0.2", + "sync_wrapper", "tower-layer", "tower-service", "tracing", @@ -586,6 +586,12 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "cocoa" version = "0.25.0" @@ -1045,7 +1051,7 @@ dependencies = [ "rustc_version", "toml 0.9.2", "vswhom", - "winreg 0.55.0", + "winreg", ] [[package]] @@ -1378,8 +1384,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi 0.11.1+wasi-snapshot-preview1", + "wasm-bindgen", ] [[package]] @@ -1389,9 +1397,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" dependencies = [ "cfg-if", + "js-sys", "libc", "r-efi", "wasi 0.14.2+wasi-0.2.4", + "wasm-bindgen", ] [[package]] @@ -1418,25 +1428,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "h2" -version = "0.3.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" -dependencies = [ - "bytes", - "fnv", - "futures-core", - "futures-sink", - "futures-util", - "http 0.2.12", - "indexmap", - "slab", - "tokio", - "tokio-util", - "tracing", -] - [[package]] name = "h2" version = "0.4.10" @@ -1617,7 +1608,6 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "h2 0.3.27", "http 0.2.12", "http-body 0.4.6", "httparse", @@ -1640,7 +1630,7 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "h2 0.4.10", + "h2", "http 1.3.1", "http-body 1.0.1", "httparse", @@ -1652,20 +1642,6 @@ dependencies = [ "want", ] -[[package]] -name = "hyper-rustls" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" -dependencies = [ - "futures-util", - "http 0.2.12", - "hyper 0.14.32", - "rustls 0.21.12", - "tokio", - "tokio-rustls 0.24.1", -] - [[package]] name = "hyper-rustls" version = "0.27.7" @@ -1675,24 +1651,12 @@ dependencies = [ "http 1.3.1", "hyper 1.6.0", "hyper-util", - "rustls 0.23.28", + "rustls", "rustls-pki-types", "tokio", - "tokio-rustls 0.26.2", + "tokio-rustls", "tower-service", -] - -[[package]] -name = "hyper-tls" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" -dependencies = [ - "bytes", - "hyper 0.14.32", - "native-tls", - "tokio", - "tokio-native-tls", + "webpki-roots", ] [[package]] @@ -1730,7 +1694,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "socket2 0.5.10", - "system-configuration 0.6.1", + "system-configuration", "tokio", "tower-service", "tracing", @@ -2080,6 +2044,12 @@ dependencies = [ "value-bag", ] +[[package]] +name = "lru-slab" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" + [[package]] name = "mac" version = "0.1.1" @@ -2237,7 +2207,7 @@ dependencies = [ "qbit", "quick-xml", "regex", - "reqwest 0.12.24", + "reqwest", "reqwest_cookie_store", "sanitize-filename", "scraper 0.23.1", @@ -2301,8 +2271,7 @@ dependencies = [ "native_db", "native_model", "once_cell", - "openssl", - "reqwest 0.12.24", + "reqwest", "reqwest_cookie_store", "serde", "serde-nested-json", @@ -2323,8 +2292,7 @@ dependencies = [ "httpmock", "mlm_db", "mlm_parse", - "openssl", - "reqwest 0.11.27", + "reqwest", "scraper 0.14.0", "serde", "serde_json", @@ -2967,7 +2935,7 @@ source = "git+https://github.com/StirlingMouse/qbittorrent-webui-api.git#ce47d16 dependencies = [ "bytes", "derive_builder", - "reqwest 0.12.24", + "reqwest", "serde", "serde_json", "serde_repr", @@ -2985,6 +2953,61 @@ dependencies = [ "serde", ] +[[package]] +name = "quinn" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" +dependencies = [ + "bytes", + "cfg_aliases", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash", + "rustls", + "socket2 0.5.10", + "thiserror 2.0.17", + "tokio", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-proto" +version = "0.11.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" +dependencies = [ + "bytes", + "getrandom 0.3.3", + "lru-slab", + "rand 0.9.2", + "ring", + "rustc-hash", + "rustls", + "rustls-pki-types", + "slab", + "thiserror 2.0.17", + "tinyvec", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-udp" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd" +dependencies = [ + "cfg_aliases", + "libc", + "once_cell", + "socket2 0.5.10", + "tracing", + "windows-sys 0.52.0", +] + [[package]] name = "quote" version = "1.0.40" @@ -3025,6 +3048,16 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.5", +] + [[package]] name = "rand_chacha" version = "0.2.2" @@ -3045,6 +3078,16 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.5", +] + [[package]] name = "rand_core" version = "0.5.1" @@ -3063,6 +3106,15 @@ dependencies = [ "getrandom 0.2.16", ] +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.3", +] + [[package]] name = "rand_hc" version = "0.2.0" @@ -3194,58 +3246,13 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" -[[package]] -name = "reqwest" -version = "0.11.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" -dependencies = [ - "async-compression", - "base64 0.21.7", - "bytes", - "encoding_rs", - "futures-core", - "futures-util", - "h2 0.3.27", - "http 0.2.12", - "http-body 0.4.6", - "hyper 0.14.32", - "hyper-rustls 0.24.2", - "hyper-tls 0.5.0", - "ipnet", - "js-sys", - "log", - "mime", - "native-tls", - "once_cell", - "percent-encoding", - "pin-project-lite", - "rustls 0.21.12", - "rustls-pemfile", - "serde", - "serde_json", - "serde_urlencoded", - "sync_wrapper 0.1.2", - "system-configuration 0.5.1", - "tokio", - "tokio-native-tls", - "tokio-rustls 0.24.1", - "tokio-util", - "tower-service", - "url", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", - "webpki-roots", - "winreg 0.50.0", -] - [[package]] name = "reqwest" version = "0.12.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" dependencies = [ + "async-compression", "base64 0.22.1", "bytes", "cookie", @@ -3253,13 +3260,13 @@ dependencies = [ "encoding_rs", "futures-core", "futures-util", - "h2 0.4.10", + "h2", "http 1.3.1", "http-body 1.0.1", "http-body-util", "hyper 1.6.0", - "hyper-rustls 0.27.7", - "hyper-tls 0.6.0", + "hyper-rustls", + "hyper-tls", "hyper-util", "js-sys", "log", @@ -3268,13 +3275,17 @@ dependencies = [ "native-tls", "percent-encoding", "pin-project-lite", + "quinn", + "rustls", "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", - "sync_wrapper 1.0.2", + "sync_wrapper", "tokio", "tokio-native-tls", + "tokio-rustls", + "tokio-util", "tower", "tower-http", "tower-service", @@ -3282,6 +3293,7 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "web-sys", + "webpki-roots", ] [[package]] @@ -3292,7 +3304,7 @@ checksum = "a0b36498c7452f11b1833900f31fbb01fc46be20992a50269c88cf59d79f54e9" dependencies = [ "bytes", "cookie_store", - "reqwest 0.12.24", + "reqwest", "url", ] @@ -3344,18 +3356,6 @@ dependencies = [ "windows-sys 0.60.2", ] -[[package]] -name = "rustls" -version = "0.21.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" -dependencies = [ - "log", - "ring", - "rustls-webpki 0.101.7", - "sct", -] - [[package]] name = "rustls" version = "0.23.28" @@ -3363,40 +3363,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7160e3e10bf4535308537f3c4e1641468cd0e485175d6163087c0393c7d46643" dependencies = [ "once_cell", + "ring", "rustls-pki-types", - "rustls-webpki 0.103.3", + "rustls-webpki", "subtle", "zeroize", ] -[[package]] -name = "rustls-pemfile" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" -dependencies = [ - "base64 0.21.7", -] - [[package]] name = "rustls-pki-types" version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79" dependencies = [ + "web-time", "zeroize", ] -[[package]] -name = "rustls-webpki" -version = "0.101.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "rustls-webpki" version = "0.103.3" @@ -3480,16 +3463,6 @@ dependencies = [ "tendril", ] -[[package]] -name = "sct" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "security-framework" version = "2.11.1" @@ -3884,12 +3857,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "sync_wrapper" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" - [[package]] name = "sync_wrapper" version = "1.0.2" @@ -3910,17 +3877,6 @@ dependencies = [ "syn 2.0.104", ] -[[package]] -name = "system-configuration" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" -dependencies = [ - "bitflags 1.3.2", - "core-foundation", - "system-configuration-sys 0.5.0", -] - [[package]] name = "system-configuration" version = "0.6.1" @@ -3929,17 +3885,7 @@ checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ "bitflags 2.9.1", "core-foundation", - "system-configuration-sys 0.6.0", -] - -[[package]] -name = "system-configuration-sys" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" -dependencies = [ - "core-foundation-sys", - "libc", + "system-configuration-sys", ] [[package]] @@ -4094,6 +4040,21 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tinyvec" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + [[package]] name = "tokio" version = "1.47.1" @@ -4135,23 +4096,13 @@ dependencies = [ "tokio", ] -[[package]] -name = "tokio-rustls" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" -dependencies = [ - "rustls 0.21.12", - "tokio", -] - [[package]] name = "tokio-rustls" version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" dependencies = [ - "rustls 0.23.28", + "rustls", "tokio", ] @@ -4269,7 +4220,7 @@ dependencies = [ "futures-core", "futures-util", "pin-project-lite", - "sync_wrapper 1.0.2", + "sync_wrapper", "tokio", "tower-layer", "tower-service", @@ -4681,11 +4632,24 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "webpki-roots" -version = "0.25.4" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1" +checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" +dependencies = [ + "rustls-pki-types", +] [[package]] name = "winapi" @@ -4759,15 +4723,6 @@ dependencies = [ "windows-link 0.1.3", ] -[[package]] -name = "windows-sys" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" -dependencies = [ - "windows-targets 0.48.5", -] - [[package]] name = "windows-sys" version = "0.52.0" @@ -4804,21 +4759,6 @@ dependencies = [ "windows-link 0.2.1", ] -[[package]] -name = "windows-targets" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" -dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", -] - [[package]] name = "windows-targets" version = "0.52.6" @@ -4851,12 +4791,6 @@ dependencies = [ "windows_x86_64_msvc 0.53.0", ] -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -4869,12 +4803,6 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - [[package]] name = "windows_aarch64_msvc" version = "0.52.6" @@ -4887,12 +4815,6 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -4917,12 +4839,6 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - [[package]] name = "windows_i686_msvc" version = "0.52.6" @@ -4935,12 +4851,6 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - [[package]] name = "windows_x86_64_gnu" version = "0.52.6" @@ -4953,12 +4863,6 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" @@ -4971,12 +4875,6 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" - [[package]] name = "windows_x86_64_msvc" version = "0.52.6" @@ -4998,16 +4896,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "winreg" -version = "0.50.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" -dependencies = [ - "cfg-if", - "windows-sys 0.48.0", -] - [[package]] name = "winreg" version = "0.55.0" diff --git a/mlm_meta/src/lib.rs b/mlm_meta/src/lib.rs index bc4d80ef..8640a0b5 100644 --- a/mlm_meta/src/lib.rs +++ b/mlm_meta/src/lib.rs @@ -1,9 +1,11 @@ pub mod helpers; pub mod http; pub mod providers; +pub mod tag_category_map; pub mod traits; pub use helpers::*; pub use http::*; pub use providers::*; +pub use tag_category_map::*; pub use traits::*; diff --git a/mlm_meta/src/providers/hardcover.rs b/mlm_meta/src/providers/hardcover.rs index b02a0c50..b6f82916 100644 --- a/mlm_meta/src/providers/hardcover.rs +++ b/mlm_meta/src/providers/hardcover.rs @@ -1,10 +1,10 @@ use anyhow::{Context, Result}; use async_trait::async_trait; -use tracing::{debug, instrument}; +use tracing::{debug, instrument, warn}; -use crate::providers::{MetadataProvider, search_with_fallback}; +use crate::providers::MetadataProvider; use crate::traits::Provider; -use crate::{helpers, http::HttpClient}; +use crate::{helpers, http::HttpClient, map_tag_to_category}; use mlm_db::TorrentMeta; use mlm_parse::parse_edition; @@ -81,100 +81,540 @@ impl Hardcover { .filter_map(|hit| hit.get("document").cloned()) .collect() } -} - -impl Default for Hardcover { - fn default() -> Self { - Self::new(None) - } -} -impl MetadataProvider for Hardcover { - type SearchResult = serde_json::Value; - - fn id(&self) -> &str { - "hardcover" + fn result_id(result: &serde_json::Value) -> Option { + result.get("id").and_then(|v| v.as_i64()) } - async fn search(&self, query: &helpers::SearchQuery) -> Result> { + #[instrument(skip_all, fields(book_id = book_id))] + async fn fetch_book_by_id(&self, book_id: i64) -> Result { let gql = r#" - query Search($q: String!, $type: String!, $per_page: Int, $page: Int) { - search(query: $q, query_type: $type, per_page: $per_page, page: $page) { - results + query BookById($id: Int!) { + books_by_pk(id: $id) { + id + title + subtitle + headline + description + pages + images { + height + ratio + url + } + contributions { + author { + name + } + contributable_type + contribution + } + book_series { + position + details + series { + name + } + } + taggings(distinct_on: tag_id) { + id + spoiler + taggable_type + tag { + tag + tag_category { + category + } + } + } + editions { + language { + language + } + asin + isbn_10 + isbn_13 + edition_format + contributions { + contribution + author { + name + } + } + } } } "#; - let qstr = query.to_combined_string(); - let vars = serde_json::json!({"q": qstr, "type": "Book", "per_page": 10, "page": 1}); - debug!(query = %qstr, "searching hardcover"); + let vars = serde_json::json!({ "id": book_id }); let v = self.post_graphql(gql, vars).await?; - let results = self.parse_results(&v); - debug!(count = results.len(), "hardcover search results"); - Ok(results) + let book = v + .get("data") + .and_then(|d| d.get("books_by_pk")) + .cloned() + .context("missing books_by_pk in hardcover response")?; + Ok(book) } - fn result_title<'a>(&self, result: &'a Self::SearchResult) -> Option<&'a str> { - result.get("title")?.as_str() + fn parse_series_entries(position: Option, details: Option<&str>) -> mlm_db::SeriesEntries { + if let Some(pos) = position { + return mlm_db::SeriesEntries::new(vec![mlm_db::SeriesEntry::Num(pos as f32)]); + } + + if let Some(details) = details { + let cleaned = details.trim(); + if let Ok(num) = cleaned.parse::() { + return mlm_db::SeriesEntries::new(vec![mlm_db::SeriesEntry::Num(num)]); + } + } + + mlm_db::SeriesEntries::new(vec![]) } - fn result_authors(&self, result: &Self::SearchResult) -> Vec { - result - .get("author_names") - .and_then(|a| a.as_array()) - .map(|arr| { - arr.iter() - .filter_map(|v| v.as_str().map(|s| s.to_string())) - .collect() + fn normalize_identifier(value: &str) -> String { + value + .chars() + .filter(|c| c.is_ascii_alphanumeric()) + .collect::() + .to_ascii_uppercase() + } + + fn normalize_name(value: &str) -> String { + value.trim().to_ascii_lowercase() + } + + fn parse_contributions( + contributions: Option<&Vec>, + ) -> (Vec, Vec) { + let mut authors = Vec::new(); + let mut narrators = Vec::new(); + + if let Some(contribs) = contributions { + for c in contribs { + let name = c + .get("author") + .and_then(|a| a.get("name")) + .and_then(|n| n.as_str()) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(ToString::to_string); + + if let Some(name) = name { + let contribution = c + .get("contribution") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_ascii_lowercase(); + + if contribution.contains("narrat") { + if !narrators.contains(&name) { + narrators.push(name); + } + } else if (contribution.is_empty() || contribution.contains("author")) + && !authors.contains(&name) + { + authors.push(name); + } + } + } + } + + (authors, narrators) + } + + fn edition_language(edition: &serde_json::Value) -> Option { + edition + .get("language") + .and_then(|l| l.get("language")) + .and_then(|v| v.as_str()) + .and_then(|s| s.parse::().ok()) + } + + fn edition_isbn(edition: &serde_json::Value) -> Option { + edition + .get("isbn_13") + .and_then(|v| v.as_str()) + .or_else(|| edition.get("isbn_10").and_then(|v| v.as_str())) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(ToString::to_string) + } + + fn edition_asin(edition: &serde_json::Value) -> Option { + edition + .get("asin") + .and_then(|v| v.as_str()) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(ToString::to_string) + } + + fn edition_format(edition: &serde_json::Value) -> Option { + edition + .get("edition_format") + .and_then(|v| v.as_str()) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(ToString::to_string) + } + + fn is_audiobook_format(fmt: &str) -> bool { + let f = fmt.to_ascii_lowercase(); + f.contains("audio") || f.contains("audible") || f.contains("hör") + } + + fn is_ebook_format(fmt: &str) -> bool { + let f = fmt.to_ascii_lowercase(); + f.contains("ebook") || f.contains("e-book") || f.contains("epub") || f.contains("kindle") + } + + fn score_edition( + edition: &serde_json::Value, + result: &serde_json::Value, + query: Option<&TorrentMeta>, + ) -> i32 { + let mut score = 0_i32; + + let query_isbn = query + .and_then(|q| q.ids.get(mlm_db::ids::ISBN)) + .map(|s| Self::normalize_identifier(s)); + let search_isbn = result + .get("isbns") + .and_then(|i| i.as_array()) + .and_then(|arr| arr.iter().filter_map(|v| v.as_str()).next()) + .map(Self::normalize_identifier); + + if let Some(query_isbn) = query_isbn + && let Some(edition_isbn) = Self::edition_isbn(edition) + && Self::normalize_identifier(&edition_isbn) == query_isbn + { + score += 140; + } else if let Some(search_isbn) = search_isbn + && let Some(edition_isbn) = Self::edition_isbn(edition) + && Self::normalize_identifier(&edition_isbn) == search_isbn + { + score += 20; + } + + if let Some(query_asin) = query + .and_then(|q| q.ids.get(mlm_db::ids::ASIN)) + .map(|s| Self::normalize_identifier(s)) + && let Some(edition_asin) = Self::edition_asin(edition) + && Self::normalize_identifier(&edition_asin) == query_asin + { + score += 140; + } + + if let Some(query_lang) = query.and_then(|q| q.language) + && let Some(edition_lang) = Self::edition_language(edition) + { + if edition_lang == query_lang { + score += 30; + } else { + score -= 10; + } + } + + if let Some(query) = query { + let format = Self::edition_format(edition); + match query.media_type { + mlm_db::MediaType::Audiobook | mlm_db::MediaType::PeriodicalAudiobook => { + if let Some(format) = format { + if Self::is_audiobook_format(&format) { + score += 25; + } else { + score -= 8; + } + } + } + mlm_db::MediaType::Ebook | mlm_db::MediaType::PeriodicalEbook => { + if let Some(format) = format { + if Self::is_ebook_format(&format) { + score += 25; + } else if Self::is_audiobook_format(&format) { + score -= 8; + } + } + } + _ => {} + } + } + + let query_authors = query + .map(|q| { + q.authors + .iter() + .map(|a| Self::normalize_name(a)) + .collect::>() }) - .unwrap_or_default() + .unwrap_or_default(); + if !query_authors.is_empty() { + let (edition_authors, _) = + Self::parse_contributions(edition.get("contributions").and_then(|c| c.as_array())); + let edition_author_names = edition_authors + .iter() + .map(|a| Self::normalize_name(a)) + .collect::>(); + + if edition_author_names + .iter() + .any(|a| query_authors.iter().any(|q| q == a)) + { + score += 20; + } + } + + score } - async fn result_to_meta(&self, result: &Self::SearchResult) -> Result { - let title = result + fn select_best_edition<'a>( + editions: &'a [serde_json::Value], + result: &serde_json::Value, + query: Option<&TorrentMeta>, + ) -> Option<&'a serde_json::Value> { + editions + .iter() + .enumerate() + .max_by_key(|(idx, edition)| { + (Self::score_edition(edition, result, query), -(*idx as i32)) + }) + .map(|(_, edition)| edition) + } + + fn score_result( + &self, + result: &serde_json::Value, + scoring_query: &helpers::SearchQuery, + ) -> f64 { + let q_title = Some(scoring_query.title.clone()); + let q_auths = scoring_query.author.iter().cloned().collect::>(); + crate::helpers::score_candidate( + self.result_title(result), + &self.result_authors(result), + &q_title, + &q_auths, + ) + } + + fn select_best_result( + &self, + results: &[serde_json::Value], + scoring_query: &helpers::SearchQuery, + threshold: f64, + ) -> Option<(usize, f64)> { + let mut best_idx = None; + let mut best_score = -1.0_f64; + for (i, item) in results.iter().enumerate() { + let score = self.score_result(item, scoring_query); + if score > best_score { + best_score = score; + best_idx = Some(i); + } + } + + if best_score >= threshold { + best_idx.map(|idx| (idx, best_score)) + } else { + None + } + } + + async fn search_best_result( + &self, + title: &str, + authors: &[String], + ) -> Result<(serde_json::Value, f64)> { + if title.trim().is_empty() { + return Err(anyhow::anyhow!("title is required for search")); + } + + let threshold = self.min_score_threshold(); + let q_with_author = helpers::query_with_author(title, authors); + let q_title_only = helpers::query_title_only(title); + + let tried_with_author = if q_with_author.author.is_some() { + match self.search(&q_with_author).await { + Ok(results) => { + if !results.is_empty() + && let Some((idx, score)) = + self.select_best_result(&results, &q_with_author, threshold) + { + return Ok((results[idx].clone(), score)); + } + } + Err(e) => warn!("hardcover search with author failed: {e}"), + } + true + } else { + false + }; + + if (!tried_with_author || !authors.is_empty()) && !q_title_only.title.is_empty() { + match self.search(&q_title_only).await { + Ok(results) => { + if !results.is_empty() + && let Some((idx, score)) = + self.select_best_result(&results, &q_with_author, threshold) + { + return Ok((results[idx].clone(), score)); + } + } + Err(e) => warn!("hardcover title-only search failed: {e}"), + } + } + + Err(anyhow::anyhow!("no result above score threshold")) + } + + async fn result_to_meta_with_query( + &self, + result: &serde_json::Value, + query: Option<&TorrentMeta>, + ) -> Result { + let id = Self::result_id(result).context("missing hardcover result id")?; + let book = self.fetch_book_by_id(id).await?; + + let title = book .get("title") .and_then(|t| t.as_str()) .unwrap_or("") .to_string(); - let authors: Vec = self.result_authors(result); - let description = result + + let subtitle = book + .get("subtitle") + .and_then(|s| s.as_str()) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(ToString::to_string); + let headline = book + .get("headline") + .and_then(|h| h.as_str()) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(ToString::to_string); + let body = book .get("description") .and_then(|d| d.as_str()) - .map(|s| s.to_string()); + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(ToString::to_string); + let mut description_parts = Vec::new(); + if let Some(h) = headline { + description_parts.push(h); + } + if let Some(b) = body { + description_parts.push(b); + } + let description = description_parts.join("\n\n"); + + let (book_authors, book_narrators) = + Self::parse_contributions(book.get("contributions").and_then(|c| c.as_array())); + + let selected_edition = book + .get("editions") + .and_then(|e| e.as_array()) + .and_then(|editions| Self::select_best_edition(editions, result, query)); + + let (authors, narrators) = if let Some(edition) = selected_edition { + let (edition_authors, edition_narrators) = + Self::parse_contributions(edition.get("contributions").and_then(|c| c.as_array())); + + let authors = if edition_authors.is_empty() { + book_authors.clone() + } else { + edition_authors + }; + let narrators = if edition_narrators.is_empty() { + book_narrators.clone() + } else { + edition_narrators + }; + + (authors, narrators) + } else { + (book_authors, book_narrators) + }; let mut tm = TorrentMeta { - title: title.clone(), - description: description.clone().unwrap_or_default(), - authors: authors.clone(), + title: subtitle + .map(|s| format!("{title}: {s}")) + .unwrap_or_else(|| title.clone()), + description, + authors, + narrators, + media_type: query.map(|q| q.media_type).unwrap_or_default(), + language: query.and_then(|q| q.language), ..Default::default() }; - // tags/genres let mut tags = Vec::new(); - if let Some(tarr) = result.get("tags").and_then(|t| t.as_array()) { - for t in tarr { - if let Some(s) = t.as_str() { - let s = s.trim().to_lowercase(); - if !s.is_empty() && !tags.contains(&s) { - tags.push(s); + let mut categories = Vec::new(); + if let Some(taggings) = book.get("taggings").and_then(|t| t.as_array()) { + for tagging in taggings { + let tag_text = tagging + .get("tag") + .and_then(|t| t.get("tag")) + .and_then(|v| v.as_str()) + .map(str::trim) + .filter(|s| !s.is_empty()); + + let Some(tag_text) = tag_text else { + continue; + }; + + let normalized_tag = tag_text.to_ascii_lowercase(); + if !normalized_tag.is_empty() && !tags.contains(&normalized_tag) { + tags.push(normalized_tag); + } + + let tag_category = tagging + .get("tag") + .and_then(|t| t.get("tag_category")) + .and_then(|tc| tc.get("category")) + .and_then(|v| v.as_str()) + .map(|s| s.trim().to_ascii_lowercase()); + + if matches!(tag_category.as_deref(), Some("genre" | "mood")) { + for category in map_tag_to_category(tag_text) { + if !categories.contains(&category) { + categories.push(category); + } } } } } - if let Some(genres) = result.get("genres").and_then(|g| g.as_array()) { - for g in genres { - if let Some(s) = g.as_str() { - let s = s.trim().to_lowercase(); - if !s.is_empty() && !tags.contains(&s) { - tags.push(s); - } + tm.tags = tags; + tm.categories = categories; + tm.ids.insert("hardcover".to_string(), id.to_string()); + + if let Some(edition) = selected_edition { + if let Some(lang) = Self::edition_language(edition) { + tm.language = Some(lang); + } + if let Some(asin) = Self::edition_asin(edition) { + tm.ids.insert(mlm_db::ids::ASIN.to_string(), asin); + } + if let Some(isbn) = Self::edition_isbn(edition) { + tm.ids.insert(mlm_db::ids::ISBN.to_string(), isbn); + } + if let Some(format) = Self::edition_format(edition) { + let lower = format.to_ascii_lowercase(); + if Self::is_audiobook_format(&lower) { + tm.media_type = mlm_db::MediaType::Audiobook; + } else if Self::is_ebook_format(&lower) { + tm.media_type = mlm_db::MediaType::Ebook; + } + + let (_t, ed_parsed) = parse_edition(&tm.title, &format); + if ed_parsed.is_some() { + tm.edition = ed_parsed; } } } - tm.tags = tags; - // ISBNs - if let Some(isbns_arr) = result.get("isbns").and_then(|i| i.as_array()) + // Fallback ISBN support from search payload when edition doesn't provide one. + if !tm.ids.contains_key(mlm_db::ids::ISBN) + && let Some(isbns_arr) = result.get("isbns").and_then(|i| i.as_array()) && let Some(first) = isbns_arr.iter().filter_map(|v| v.as_str()).next() { let s = first.trim().to_string(); @@ -183,11 +623,12 @@ impl MetadataProvider for Hardcover { } } - // edition - if let Some(ed_str) = result - .get("edition") - .and_then(|v| v.as_str()) - .or(result.get("edition_string").and_then(|v| v.as_str())) + // Legacy edition fallback from selected search document. + if tm.edition.is_none() + && let Some(ed_str) = result + .get("edition") + .and_then(|v| v.as_str()) + .or(result.get("edition_string").and_then(|v| v.as_str())) { let (_t, ed_parsed) = parse_edition(&tm.title, ed_str); if ed_parsed.is_some() { @@ -195,38 +636,90 @@ impl MetadataProvider for Hardcover { } } - // series - if let Some(series_arr) = result.get("series_names").and_then(|v| v.as_array()) { + if let Some(series_arr) = book.get("book_series").and_then(|v| v.as_array()) { for s in series_arr { - if let Some(name) = s.as_str() { + let name = s + .get("series") + .and_then(|series| series.get("name")) + .and_then(|v| v.as_str()) + .map(str::trim) + .filter(|n| !n.is_empty()); + + if let Some(name) = name { + let position = s.get("position").and_then(|v| v.as_f64()); + let details = s.get("details").and_then(|v| v.as_str()); tm.series.push(mlm_db::Series { name: name.to_string(), - entries: mlm_db::SeriesEntries::new(vec![]), + entries: Self::parse_series_entries(position, details), }); - } else if let Some(obj) = s.as_object() - && let Some(name) = obj.get("name").and_then(|v| v.as_str()) - { - if let Some(idx) = obj.get("index").and_then(|v| v.as_f64()) { - let entry = mlm_db::SeriesEntry::Num(idx as f32); - tm.series.push(mlm_db::Series { - name: name.to_string(), - entries: mlm_db::SeriesEntries::new(vec![entry]), - }); - } else { - tm.series.push(mlm_db::Series { - name: name.to_string(), - entries: mlm_db::SeriesEntries::new(vec![]), - }); - } } } } - debug!(title = %tm.title, authors = ?tm.authors, tags_count = tm.tags.len(), "returning hardcover metadata"); + debug!( + title = %tm.title, + authors = ?tm.authors, + language = ?tm.language, + tags_count = tm.tags.len(), + categories_count = tm.categories.len(), + "returning hardcover metadata" + ); Ok(tm) } } +impl Default for Hardcover { + fn default() -> Self { + Self::new(None) + } +} + +impl MetadataProvider for Hardcover { + type SearchResult = serde_json::Value; + + fn id(&self) -> &str { + "hardcover" + } + + async fn search(&self, query: &helpers::SearchQuery) -> Result> { + let gql = r#" + query Search($q: String!, $type: String!, $per_page: Int, $page: Int) { + search(query: $q, query_type: $type, per_page: $per_page, page: $page) { + results + } + } + "#; + + let qstr = query.to_combined_string(); + let vars = serde_json::json!({"q": qstr, "type": "Book", "per_page": 10, "page": 1}); + debug!(query = %qstr, "searching hardcover"); + let v = self.post_graphql(gql, vars).await?; + let results = self.parse_results(&v); + debug!(count = results.len(), "hardcover search results"); + Ok(results) + } + + fn result_title<'a>(&self, result: &'a Self::SearchResult) -> Option<&'a str> { + result.get("title")?.as_str() + } + + fn result_authors(&self, result: &Self::SearchResult) -> Vec { + result + .get("author_names") + .and_then(|a| a.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|v| v.as_str().map(|s| s.to_string())) + .collect() + }) + .unwrap_or_default() + } + + async fn result_to_meta(&self, result: &Self::SearchResult) -> Result { + self.result_to_meta_with_query(result, None).await + } +} + #[async_trait] impl Provider for Hardcover { fn id(&self) -> &str { @@ -234,7 +727,12 @@ impl Provider for Hardcover { } async fn fetch(&self, query: &TorrentMeta) -> Result { - let (meta, _score) = search_with_fallback(self, &query.title, &query.authors).await?; + let (best_result, _score) = self + .search_best_result(&query.title, &query.authors) + .await?; + let meta = self + .result_to_meta_with_query(&best_result, Some(query)) + .await?; Ok(meta) } } diff --git a/mlm_meta/src/providers/romanceio.rs b/mlm_meta/src/providers/romanceio.rs index 8e3f1aa9..51ef74b4 100644 --- a/mlm_meta/src/providers/romanceio.rs +++ b/mlm_meta/src/providers/romanceio.rs @@ -9,7 +9,7 @@ use url::Url; use crate::http::ReqwestClient; use crate::providers::{MetadataProvider, search_with_fallback}; use crate::traits::Provider; -use crate::{helpers, http::HttpClient}; +use crate::{helpers, http::HttpClient, map_tag_to_category}; use mlm_db::TorrentMeta; pub struct RomanceIo { @@ -97,9 +97,12 @@ impl RomanceIo { let mut categories = Vec::new(); let mut tags = Vec::new(); for t in topics { - if let Some(cat) = topic_to_category(&t) { - if !categories.contains(&cat) { - categories.push(cat); + let mapped = topic_to_category(&t); + if !mapped.is_empty() { + for cat in mapped { + if !categories.contains(&cat) { + categories.push(cat); + } } } else if !tags.contains(&t) { tags.push(t); @@ -251,15 +254,6 @@ impl Provider for RomanceIo { } } -fn topic_to_category(topic: &str) -> Option { - let t = topic.trim().to_lowercase(); - match t.as_str() { - "contemporary" | "contemporary romance" => Some("contemporary".to_string()), - "romance" => Some("romance".to_string()), - "dark" | "dark romance" => Some("dark romance".to_string()), - "suspense" | "romantic suspense" => Some("suspense".to_string()), - "erotic" | "erotic romance" | "steam" | "explicit" => Some("erotic".to_string()), - "office" | "workplace" | "boss & employee" => Some("contemporary".to_string()), - _ => None, - } +fn topic_to_category(topic: &str) -> Vec { + map_tag_to_category(topic) } diff --git a/mlm_meta/src/tag_category_map.rs b/mlm_meta/src/tag_category_map.rs new file mode 100644 index 00000000..6ad1d6cd --- /dev/null +++ b/mlm_meta/src/tag_category_map.rs @@ -0,0 +1,494 @@ +use mlm_db::Category; + +fn normalize_tag(tag: &str) -> String { + let trimmed = tag.trim(); + let value = match trimmed.split_once(':') { + Some((prefix, rest)) => { + let p = prefix.trim().to_ascii_lowercase(); + if matches!( + p.as_str(), + "genre" | "enre" | "mood" | "tag" | "pace" | "content warning" | "general" + ) { + rest.trim() + } else { + trimmed + } + } + None => trimmed, + }; + + value + .to_ascii_lowercase() + .replace('&', " and ") + .replace(['/', '-', '|'], " ") + .replace('\'', "") + .replace([',', '.', '(', ')'], " ") + .split_whitespace() + .collect::>() + .join(" ") +} + +/// Map external string tags into the internal category taxonomy. +/// +/// Returns an empty list for broad/ambiguous/noisy tags that do not map cleanly. +pub fn map_tag_to_category(tag: &str) -> Vec { + let key = normalize_tag(tag); + + // Explicit multi-category mappings for compound tags. + match key.as_str() { + "contemporary romance" => return vec![Category::Contemporary, Category::Romance], + "historical romance" => return vec![Category::Historical, Category::Romance], + "fantasy romance" => return vec![Category::Fantasy, Category::Romance], + "science fiction and fantasy" | "science fiction fantasy" => { + return vec![Category::ScienceFiction, Category::Fantasy]; + } + _ => {} + } + + if key.contains("programming language") { + return vec![Category::ComputerScience]; + } + let mapped: &[Category] = match key.as_str() { + // Kept mappings + "fantasy" | "magic" | "fairies" | "fantasy games" => &[Category::Fantasy], + "young adult" | "young adult fiction" | "adolescence" => &[Category::YoungAdult], + "adventure" | "adventurous" => &[Category::ActionAdventure], + "science fiction" + | "aliens" + | "extraterrestrial beings" + | "life on other planets" + | "human alien encounters" => &[Category::ScienceFiction], + "strong character development" | "character driven" | "literary" => { + &[Category::CharacterDriven] + } + "comics" | "graphic novels" | "comics and graphic novels" => { + &[Category::GraphicNovelsComics] + } + "history" | "histoire" | "civilization" | "holocaust" | "world war ii" | "1914 1918" => { + &[Category::History] + } + "emotional" | "sad" | "heartfelt" | "introspective" | "depressing" | "grief" => { + &[Category::Emotional] + } + "lgbtq" | "lgbtqia" | "lgbtqia+" => &[Category::Lgbtqia], + "dark" => &[Category::Dark], + "romance" | "love stories" | "romance fiction" | "romantic" | "love" | "marriage" + | "arranged marriage" | "regency romance" | "romantic suspense" => &[Category::Romance], + "war" | "world war" | "1939 1945" | "imaginary wars and battles" => &[Category::Military], + "mysterious" => &[Category::Mystery], + "juvenile fiction" + | "children" + | "childrens stories" + | "childrens literature" + | "board books" + | "picture book" + | "boys" => &[Category::Children], + "tense" | "suspense" | "mystery thriller" => &[Category::Thriller], + "reflective" | "thought provoking" => &[Category::CharacterDriven], + "funny" | "exciting" => &[Category::Funny], + "biography" | "biography and autobiography" | "autobiography" => &[Category::Biography], + "lighthearted" | "hopeful" | "inspiring" => &[Category::Wholesome], + "mystery" | "detective and mystery stories" => &[Category::Mystery], + "dystopian" => &[Category::Dystopian], + "religion" | "spirituality" => &[Category::ReligionSpirituality], + "juvenile nonfiction" | "education" => &[Category::Textbook], + "space" | "astronauts" => &[Category::Space], + "business and economics" | "business" => &[Category::Business], + "philosophy" => &[Category::Philosophy], + "science" | "physics" | "cosmology" | "genetic engineering" => &[Category::Science], + "thriller" | "thriller and suspense" | "suspenseful" => &[Category::Thriller], + "computers" | "programming" => &[Category::ComputerScience], + "psychology" => &[Category::Psychology], + "poetry" | "childrens poetry" | "english poetry" => &[Category::Poetry], + "relaxing" => &[Category::Cozy], + "humor" + | "comedy" + | "humorous" + | "humorous stories" + | "humour" + | "american wit and humor" + | "witty" + | "comedians" => &[Category::Humor], + "politics" + | "social science" + | "political science" + | "politique" + | "feminism" + | "capitalism" + | "communism" + | "leadership" + | "presidents" + | "spies and politics" + | "sociologie" + | "political" + | "sociology" + | "anarchism" + | "arab israeli conflict" => &[Category::PoliticsSociety], + "travel" | "air pilots" => &[Category::Travel], + "mathematics" | "algebra" | "calculus" => &[Category::Mathematics], + "cooking" | "food" => &[Category::CookingFood], + "murder" | "police" => &[Category::Crime], + "art" | "painters" | "architects" | "drawing" | "beauty" => &[Category::ArtPhotography], + "self help" => &[Category::SelfHelp], + "short stories" => &[Category::ShortStories], + "literary criticism" => &[Category::LiteraryCriticism], + "body" | "mind and spirit" => &[Category::HealthWellness], + "health and fitness" + | "health" + | "cancer" + | "self actualization psychology" + | "happiness" + | "emotions" + | "aging" => &[Category::HealthWellness], + "literary collections" => &[Category::Anthology], + "historical fiction" => &[Category::Historical], + "contemporary" + | "english fiction" + | "domestic fiction" + | "slice of life" + | "genre fiction" + | "literature and fiction" + | "literary fiction" + | "classique" + | "realistic fiction" + | "french fiction" + | "afrikaans fiction" => &[Category::ContemporaryRealist], + "language arts and disciplines" + | "foreign language study" + | "spanish" + | "spanish language" + | "english" + | "french" + | "french language" + | "german language" + | "turkish" + | "italian" + | "speech" + | "communication" => &[Category::LanguageLinguistics], + "nature" | "animals" | "bears" | "birds" | "dinosaurs" => &[Category::NatureEnvironment], + "folklore" | "fairy tales" | "mythology" => &[Category::MythologyFolklore], + "sports and recreation" | "sports" | "soccer" | "horses" | "hiking" | "baseball" => { + &[Category::SportsOutdoors] + } + "fast paced" => &[Category::ActionAdventure], + "medical" => &[Category::Medicine], + "performing arts" | "drama" | "english drama" | "verse novel" | "plays" => { + &[Category::DramaPlays] + } + "manga" => &[Category::Manga], + "cyberpunk" => &[Category::Cyberpunk], + "crime" | "true crime" => &[Category::TrueCrime], + "music" => &[Category::Music], + "technology and engineering" | "aeronautics" | "automobiles" => &[Category::Engineering], + "horror" | "horror tales" | "scary" | "horreur" => &[Category::Horror], + "architecture" | "design" | "photography" => &[Category::ArtPhotography], + "fantasy fiction" => &[Category::Fantasy], + "crafts and hobbies" => &[Category::CraftsDiy], + "adventure stories" + | "action and adventure" + | "adventure and adventurers" + | "action" + | "aventure" => &[Category::ActionAdventure], + "reference" => &[Category::Reference], + "urban fantasy" | "paranormal and urban" => &[Category::UrbanFantasy], + "games and activities" | "games" | "roleplaying games" => &[Category::SportsOutdoors], + "audiobook" | "audio book" | "audiobooks" | "kinder hörbücher" | "childrens audiobooks" => { + &[Category::Audiobook] + } + "electronic books" => &[Category::Ebook], + "holiday" | "christmas" => &[Category::Wholesome], + "great britain" | "england" | "europe" | "british" | "germany" | "london england" + | "ireland" | "greece" | "italy" | "scotland" | "rome" | "portugal" | "poland" + | "berlin germany" | "soviet union" | "russia" => &[Category::Europe], + "gardening" | "house and home" => &[Category::HomeGarden], + "memoir" => &[Category::Memoir], + "bible" | "bibles" | "christian life" | "christian fiction" => { + &[Category::ReligionSpirituality] + } + "epic fantasy" => &[Category::EpicFantasy], + "military" => &[Category::Military], + "boys love" | "bl" | "yaoi" | "gay men" => &[Category::Lgbtqia], + "young adult nonfiction" | "teen and young adult" | "jeune adulte" => { + &[Category::YoungAdult] + } + "american" | "native americans" | "american fiction" | "americans" => { + &[Category::NorthAmerica] + } + "authors" => &[Category::LiteraryCriticism], + "english language" | "fiction in english" => &[Category::LanguageLinguistics], + "historical" => &[Category::Historical], + "middle grade" => &[Category::MiddleGrade], + "australia" | "australian fiction" | "australian" => &[Category::Oceania], + "american poetry" => &[Category::Poetry], + "france" => &[Category::Europe], + "china" | "chinese" => &[Category::EastAsia], + "japan" | "japanese" => &[Category::EastAsia], + "india" => &[Category::SouthAsia], + "egypt" | "arabic fiction" => &[Category::MiddleEast], + "africa" => &[Category::Africa], + "united states" | "california" | "canada" | "canadian" | "colorado" | "new york n y" + | "arizona" | "alaska" | "america" => &[Category::NorthAmerica], + "brazil" => &[Category::LatinAmerica], + "christmas stories" => &[Category::Wholesome], + "occult" => &[Category::OccultEsotericism], + "demonology" => &[Category::OccultEsotericism], + "erotic stories" => &[Category::Erotica], + "erotica" => &[Category::Erotica], + "romantasy" => &[Category::Fantasy, Category::Romance], + "dragons" => &[Category::Fantasy], + "conduct of life" => &[Category::SelfHelp], + "modern" => &[Category::Contemporary], + "transportation" => &[Category::Travel], + "space opera" | "first contact" => &[Category::SpaceOpera], + "assassins" | "mafia" | "missing persons" | "kidnapping" | "abduction" => { + &[Category::Crime] + } + "dark romance" | "dark romance kink" => &[Category::DarkRomance], + "study aids" => &[Category::Workbook], + "adult" | "adulte" => &[Category::Adult], + "paranormal romance" | "omegaverse" | "amish romance" => &[Category::ParanormalRomance], + "monster romance" => &[Category::ParanormalRomance], + "espionage" => &[Category::PoliticalIntrigue], + "conspiracies" => &[Category::PoliticalIntrigue], + "artists" => &[Category::ArtPhotography], + "actors" + | "actresses" + | "motion picture actors and actresses" + | "motion pictures" + | "motion picture producers and directors" => &[Category::FilmTelevision], + "paranormal" | "paranormal fiction" | "supernatural" | "vampires" | "ghost stories" + | "ghosts" | "angels" => &[Category::ParanormalHorror], + "slow paced" => &[Category::Cozy], + "time travel" => &[Category::TimeTravel], + "magical realism" => &[Category::MagicalRealism], + "dystopias" => &[Category::Dystopian], + "criminals" | "crime fiction" => &[Category::Crime], + "thrillers" | "suspense fiction" | "crime thrillers" => &[Category::Thriller], + "philosophie" => &[Category::Philosophy], + "found family" => &[Category::FoundFamily], + "chick lit" => &[Category::RomanticComedy], + "women sleuths" => &[Category::Detective], + "city and town life" => &[Category::Urban], + "college students" => &[Category::NewAdult], + "caricatures and cartoons" | "fantasy comic books" | "pictorial" => { + &[Category::GraphicNovelsComics] + } + "artificial intelligence" => &[Category::DataAi], + "businessmen" | "economics" | "business enterprises" | "businesswomen" => { + &[Category::Business] + } + "country life" | "frontier and pioneer life" => &[Category::Rural], + "coming of age" | "bildungsromans" => &[Category::ComingOfAge], + "high fantasy" => &[Category::EpicFantasy], + "psychological" | "amnesia" | "psychological thriller" => { + &[Category::PsychologicalThriller] + } + "books and reading" | "authorship" => &[Category::LiteraryCriticism], + "anthology" => &[Category::Anthology], + "essays" => &[Category::Essays], + "novella" => &[Category::Novella], + "java" | "javascript" | "c++" | "python" | "application software" => { + &[Category::ComputerScience] + } + "sapphic" | "queer" | "lesbians" | "shounen ai" => &[Category::Lgbtqia], + "rock musicians" => &[Category::Music], + "christianity" | "buddhism" | "amish" => &[Category::ReligionSpirituality], + "criminal investigation" + | "private investigators" + | "cold cases criminal investigation" + | "mystery and detective" + | "detective" => &[Category::Detective], + "hard science fiction" + | "speculative fiction" + | "sci fi" + | "doctor who fictitious character" => &[Category::ScienceFiction], + "imaginary places" => &[Category::Fantasy], + "astronomy" | "interplanetary voyages" => &[Category::Space], + "dark fantasy" => &[Category::Fantasy], + "litrpg" => &[Category::ProgressionFantasy], + "mental health" | "brain" | "ability" => &[Category::HealthWellness], + "cowboys" | "american western romance" => &[Category::Western], + "cookbooks" => &[Category::CookingFood], + "dreams" => &[Category::Psychology], + "blessing and cursing" => &[Category::ReligionSpirituality], + "high school students" => &[Category::AcademySchool], + "mythical" + | "dragons and mythical creatures" + | "curiosities and wonders" + | "gods" + | "arthurian romances" => &[Category::MythologyFolklore], + "african american women" => &[Category::PocRepresentation], + "indians of north america" => &[Category::NorthAmerica], + "audio theater" | "hörspiel" => &[Category::DramatizedAdaptation], + "cozy" | "bed and breakfast accommodations" | "birthdays" => &[Category::Cozy], + "war and military" | "battle of" | "soldiers" | "guerre" => &[Category::Military], + "chicago ill" | "boston mass" => &[Category::NorthAmerica], + "literature and fiction science fiction and fantasy" => { + &[Category::ScienceFiction, Category::Fantasy] + } + "batman fictitious character" | "superheroes" | "science fiction comic books" => { + &[Category::GraphicNovelsComics] + } + "german" | "greek" | "russian" => &[Category::LanguageLinguistics], + "jewish 1939 1945" => &[Category::History], + "german fiction" | "japanese fiction" | "chinese fiction" => { + &[Category::ContemporaryRealist] + } + "light novel" => &[Category::LightNovel], + "computer networks" | "technology" | "computer games" | "computer adventure games" => { + &[Category::Technology] + } + "diaries" => &[Category::Memoir], + "retellings" | "retelling" => &[Category::Retelling], + "gothic" => &[Category::GothicHorror], + "artistic" | "aesthetics" => &[Category::ArtPhotography], + "ethics" => &[Category::Philosophy], + "series" | "anthologies" | "anthologies and short stories" => &[Category::Anthology], + "islam" | "spiritual life" => &[Category::ReligionSpirituality], + "urban" | "cities and towns" => &[Category::Urban], + "ancient" => &[Category::Ancient], + "medieval" | "castles" => &[Category::Medieval], + "child rearing" => &[Category::ParentingFamily], + "psychological fiction" => &[Category::PsychologicalThriller], + "traditional detectives" | "amateur sleuths" | "police procedural" => { + &[Category::Detective] + } + "cozy mystery" => &[Category::CozyMystery], + "heroes" => &[Category::ActionAdventure], + "alphabet" + | "bedtime" + | "picture books for children" + | "readers" + | "girls" + | "babysitters" => &[Category::Children], + "intelligence officers" => &[Category::PoliticalIntrigue], + "cults" => &[Category::OccultEsotericism], + "dungeons and dragons game" => &[Category::ProgressionFantasy], + "sexy" => &[Category::Erotica], + "climatic changes" | "agriculture" | "farm life" | "dwellings" => { + &[Category::NatureEnvironment] + } + "romantic comedy" => &[Category::RomanticComedy], + "post apocalyptic" | "end of the world" => &[Category::PostApocalyptic], + "satire" => &[Category::Satire], + "democracy" => &[Category::PoliticsSociety], + "monsters" => &[Category::Horror], + "biology" => &[Category::Science], + "clothing and dress" => &[Category::CraftsDiy], + "adult fiction" => &[Category::Adult], + "western" => &[Category::Western], + "jews" => &[Category::ReligionSpirituality], + "businesspeople" | "entrepreneurship" => &[Category::Business], + "new zealand" => &[Category::Oceania], + "anxiety" => &[Category::Psychology], + "literature and fiction mystery" => &[Category::Mystery], + "bandes dessinées" => &[Category::GraphicNovelsComics], + "afghanistan" | "iran" => &[Category::MiddleEast], + "technothrillers" => &[Category::Thriller], + "gothic horror" => &[Category::GothicHorror], + "computer science" => &[Category::ComputerScience], + "slow burn" => &[Category::SlowBurn], + "blind" => &[Category::DisabilityRepresentation], + "novelists" | "journalism" | "poets" | "college teachers" | "composers" => { + &[Category::LiteraryCriticism] + } + "pirates" => &[Category::ActionAdventure], + "christian biography" => &[Category::Biography], + "courtship" => &[Category::Romance], + "mexico" => &[Category::LatinAmerica], + "alternative histories fiction" => &[Category::AlternateHistory], + "historical fantasy" => &[Category::Historical], + "magical" => &[Category::Fantasy], + "antiques and collectibles" => &[Category::CraftsDiy], + + _ => &[], + }; + + mapped.to_vec() +} + +#[cfg(test)] +mod tests { + use super::map_tag_to_category; + use mlm_db::Category; + + #[test] + fn maps_selected_tags() { + assert_eq!(map_tag_to_category("Fantasy"), vec![Category::Fantasy]); + assert_eq!(map_tag_to_category(" Fantasy "), vec![Category::Fantasy]); + assert_eq!( + map_tag_to_category("Character driven"), + vec![Category::CharacterDriven] + ); + assert_eq!( + map_tag_to_category("Comics & Graphic Novels"), + vec![Category::GraphicNovelsComics] + ); + assert_eq!( + map_tag_to_category("Business & Economics"), + vec![Category::Business] + ); + assert_eq!(map_tag_to_category("LGBTQ"), vec![Category::Lgbtqia]); + assert_eq!(map_tag_to_category("Boy's Love"), vec![Category::Lgbtqia]); + assert_eq!(map_tag_to_category("Manga"), vec![Category::Manga]); + assert_eq!( + map_tag_to_category("Technology & Engineering"), + vec![Category::Engineering] + ); + assert_eq!(map_tag_to_category("Audio book"), vec![Category::Audiobook]); + assert_eq!( + map_tag_to_category("English language"), + vec![Category::LanguageLinguistics] + ); + assert_eq!( + map_tag_to_category("Paranormal Romance"), + vec![Category::ParanormalRomance] + ); + assert_eq!( + map_tag_to_category("Genre: Rust (Programming Language)"), + vec![Category::ComputerScience] + ); + assert_eq!( + map_tag_to_category("Genre: C# (Programming Language)"), + vec![Category::ComputerScience] + ); + assert_eq!( + map_tag_to_category("Genre: Light Novel"), + vec![Category::LightNovel] + ); + assert_eq!(map_tag_to_category("enre: Ireland"), vec![Category::Europe]); + } + + #[test] + fn maps_compound_tags_to_multiple_categories() { + assert_eq!( + map_tag_to_category("Contemporary Romance"), + vec![Category::Contemporary, Category::Romance] + ); + assert_eq!( + map_tag_to_category("Historical Romance"), + vec![Category::Historical, Category::Romance] + ); + assert_eq!( + map_tag_to_category("Fantasy Romance"), + vec![Category::Fantasy, Category::Romance] + ); + } + + #[test] + fn drops_ambiguous_tags() { + assert_eq!(map_tag_to_category("Fiction"), Vec::::new()); + assert_eq!(map_tag_to_category("Nonfiction"), Vec::::new()); + assert_eq!(map_tag_to_category("medium"), Vec::::new()); + assert_eq!(map_tag_to_category("A mix driven"), Vec::::new()); + assert_eq!(map_tag_to_category("etc"), Vec::::new()); + assert_eq!(map_tag_to_category("Rape"), Vec::::new()); + assert_eq!( + map_tag_to_category("Sexual violence"), + Vec::::new() + ); + assert_eq!(map_tag_to_category("Finance"), Vec::::new()); + assert_eq!(map_tag_to_category("Law"), Vec::::new()); + assert_eq!(map_tag_to_category("Asia"), Vec::::new()); + } +} diff --git a/mlm_meta/tests/hardcover_tests.rs b/mlm_meta/tests/hardcover_tests.rs index ca871987..f3603cb5 100644 --- a/mlm_meta/tests/hardcover_tests.rs +++ b/mlm_meta/tests/hardcover_tests.rs @@ -19,6 +19,12 @@ mod helper { resps: std::sync::Mutex::new(vec![resp.to_string()]), } } + + pub fn new_many(resps: &[&str]) -> Self { + Self { + resps: std::sync::Mutex::new(resps.iter().map(|s| s.to_string()).collect()), + } + } } #[async_trait] @@ -46,12 +52,22 @@ mod helper { async fn hardcover_selects_best_candidate() { use helper::MockClient; - let data = r#"{ "data": { "search": { "results": { "hits": [ - { "document": { "title": "The Great Adventure", "author_names": ["Alice Author"], "description": "A" } }, - { "document": { "title": "Great Adventure", "author_names": ["Bob Smith"], "description": "B" } } + let search = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "id": 10, "title": "The Great Adventure", "author_names": ["Alice Author"], "description": "A" } }, + { "document": { "id": 11, "title": "Great Adventure", "author_names": ["Bob Smith"], "description": "B" } } ] } } } }"#; - - let client = Arc::new(MockClient::new(data)); + let detail = r#"{ "data": { "books_by_pk": { + "id": 11, + "title": "Great Adventure", + "subtitle": null, + "headline": null, + "description": "B", + "contributions": [{ "author": { "name": "Bob Smith" }, "contribution": null }], + "book_series": [], + "taggings": [] + } } }"#; + + let client = Arc::new(MockClient::new_many(&[search, detail])); let prov = Hardcover::with_client("http://example/graphql", client, None); let query_meta = TorrentMeta { @@ -72,11 +88,24 @@ async fn hardcover_selects_best_candidate() { async fn hardcover_parses_tags_and_isbn() { use helper::MockClient; - let data = r#"{ "data": { "search": { "results": { "hits": [ - { "document": { "title": "Unique Book", "author_names": ["Unique Author"], "description": "desc", "tags": ["Tropes"], "genres": ["Romance"], "isbns": ["9781234567897"] } } + let search = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "id": 20, "title": "Unique Book", "author_names": ["Unique Author"], "description": "desc", "isbns": ["9781234567897"] } } ] } } } }"#; - - let client = Arc::new(MockClient::new(data)); + let detail = r#"{ "data": { "books_by_pk": { + "id": 20, + "title": "Unique Book", + "subtitle": null, + "headline": null, + "description": "desc", + "contributions": [{ "author": { "name": "Unique Author" }, "contribution": null }], + "book_series": [], + "taggings": [ + { "id": 1, "tag": { "tag": "Tropes", "tag_category": { "category": "Tag" } } }, + { "id": 2, "tag": { "tag": "Romance", "tag_category": { "category": "Genre" } } } + ] + } } }"#; + + let client = Arc::new(MockClient::new_many(&[search, detail])); let prov = Hardcover::with_client("http://example/graphql", client, None); let query_meta = TorrentMeta { @@ -90,6 +119,8 @@ async fn hardcover_parses_tags_and_isbn() { .expect("should parse tags and isbn"); assert!(m.tags.iter().any(|t| t == "tropes")); assert!(m.tags.iter().any(|t| t == "romance")); + assert!(m.categories.contains(&mlm_db::Category::Romance)); + assert!(!m.categories.contains(&mlm_db::Category::CharacterDriven)); assert_eq!(m.ids.get("isbn").map(|s| s.as_str()), Some("9781234567897")); } @@ -113,10 +144,20 @@ async fn hardcover_empty_results_returns_err() { async fn hardcover_handles_malformed_fields_gracefully() { use helper::MockClient; - let data = r#"{ "data": { "search": { "results": { "hits": [ - { "document": { "title": "Any Title", "description": "only desc", "tags": null, "genres": 123 } } + let search = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "id": 30, "title": "Any Title", "description": "only desc" } } ] } } } }"#; - let client = Arc::new(MockClient::new(data)); + let detail = r#"{ "data": { "books_by_pk": { + "id": 30, + "title": "Any Title", + "subtitle": null, + "headline": null, + "description": "only desc", + "contributions": [], + "book_series": [], + "taggings": [] + } } }"#; + let client = Arc::new(MockClient::new_many(&[search, detail])); let prov = Hardcover::with_client("http://example/graphql", client, None); let query_meta = TorrentMeta { @@ -137,10 +178,20 @@ async fn hardcover_handles_malformed_fields_gracefully() { async fn hardcover_uses_first_isbn_when_multiple_present() { use helper::MockClient; - let data = r#"{ "data": { "search": { "results": { "hits": [ - { "document": { "title": "Multi ISBN", "author_names": ["A"], "isbns": ["FIRSTISBN","SECONDISBN"] } } + let search = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "id": 40, "title": "Multi ISBN", "author_names": ["A"], "isbns": ["FIRSTISBN","SECONDISBN"] } } ] } } } }"#; - let client = Arc::new(MockClient::new(data)); + let detail = r#"{ "data": { "books_by_pk": { + "id": 40, + "title": "Multi ISBN", + "subtitle": null, + "headline": null, + "description": "", + "contributions": [{ "author": { "name": "A" }, "contribution": null }], + "book_series": [], + "taggings": [] + } } }"#; + let client = Arc::new(MockClient::new_many(&[search, detail])); let prov = Hardcover::with_client("http://example/graphql", client, None); let query_meta = TorrentMeta { @@ -158,12 +209,22 @@ async fn hardcover_uses_first_isbn_when_multiple_present() { async fn hardcover_tie_breaker_prefers_first_result() { use helper::MockClient; - let data = r#"{ "data": { "search": { "results": { "hits": [ - { "document": { "title": "Tie Book", "author_names": ["Author One"], "description": "first" } }, - { "document": { "title": "Tie Book", "author_names": ["Author One"], "description": "second" } } + let search = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "id": 50, "title": "Tie Book", "author_names": ["Author One"], "description": "first" } }, + { "document": { "id": 51, "title": "Tie Book", "author_names": ["Author One"], "description": "second" } } ] } } } }"#; - - let client = Arc::new(MockClient::new(data)); + let detail = r#"{ "data": { "books_by_pk": { + "id": 50, + "title": "Tie Book", + "subtitle": null, + "headline": null, + "description": "first", + "contributions": [{ "author": { "name": "Author One" }, "contribution": null }], + "book_series": [], + "taggings": [] + } } }"#; + + let client = Arc::new(MockClient::new_many(&[search, detail])); let prov = Hardcover::with_client("http://example/graphql", client, None); let query_meta = TorrentMeta { @@ -181,11 +242,21 @@ async fn hardcover_tie_breaker_prefers_first_result() { async fn hardcover_handles_minor_typos() { use helper::MockClient; - let data = r#"{ "data": { "search": { "results": { "hits": [ - { "document": { "title": "Great Adventure", "author_names": ["Bob Smith"], "description": "B" } } + let search = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "id": 60, "title": "Great Adventure", "author_names": ["Bob Smith"], "description": "B" } } ] } } } }"#; - - let client = Arc::new(MockClient::new(data)); + let detail = r#"{ "data": { "books_by_pk": { + "id": 60, + "title": "Great Adventure", + "subtitle": null, + "headline": null, + "description": "B", + "contributions": [{ "author": { "name": "Bob Smith" }, "contribution": null }], + "book_series": [], + "taggings": [] + } } }"#; + + let client = Arc::new(MockClient::new_many(&[search, detail])); let prov = Hardcover::with_client("http://example/graphql", client, None); let query_meta = TorrentMeta { @@ -207,8 +278,18 @@ async fn hardcover_parses_isbn_from_search_results() { let search = r#"{ "data": { "search": { "results": { "hits": [ { "document": { "id": 123, "title": "Detailed Book", "author_names": ["Detail Author"], "description": "short desc", "isbns": ["9781111111111"], "series_names": ["Series A"] } } ] } } } }"#; - - let client = Arc::new(MockClient::new(search)); + let detail = r#"{ "data": { "books_by_pk": { + "id": 123, + "title": "Detailed Book", + "subtitle": null, + "headline": null, + "description": "short desc", + "contributions": [{ "author": { "name": "Detail Author" }, "contribution": null }], + "book_series": [{ "position": 1, "details": "1", "series": { "name": "Series A" } }], + "taggings": [] + } } }"#; + + let client = Arc::new(MockClient::new_many(&[search, detail])); let prov = Hardcover::with_client("http://example/graphql", client, None); let query_meta = TorrentMeta { @@ -232,12 +313,22 @@ async fn hardcover_title_only_fallback_still_scores_with_author() { // Query for "Boss of the Year" by "Nicole French" // Results include a similar title by a different author // The fallback to title-only should NOT match because author doesn't match - let data = r#"{ "data": { "search": { "results": { "hits": [ - { "document": { "title": "Not the Boss of the Year", "author_names": ["J.S. Cooper"], "description": "wrong author" } }, - { "document": { "title": "Boss of the Year", "author_names": ["Nicole French"], "description": "correct" } } + let search = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "id": 70, "title": "Not the Boss of the Year", "author_names": ["J.S. Cooper"], "description": "wrong author" } }, + { "document": { "id": 71, "title": "Boss of the Year", "author_names": ["Nicole French"], "description": "correct" } } ] } } } }"#; - - let client = Arc::new(MockClient::new(data)); + let detail = r#"{ "data": { "books_by_pk": { + "id": 71, + "title": "Boss of the Year", + "subtitle": null, + "headline": null, + "description": "correct", + "contributions": [{ "author": { "name": "Nicole French" }, "contribution": null }], + "book_series": [], + "taggings": [] + } } }"#; + + let client = Arc::new(MockClient::new_many(&[search, detail])); let prov = Hardcover::with_client("http://example/graphql", client, None); let query_meta = TorrentMeta { @@ -264,3 +355,68 @@ async fn hardcover_title_only_fallback_still_scores_with_author() { m.authors ); } + +#[tokio::test] +async fn hardcover_prefers_best_matching_edition_and_edition_specific_fields() { + use helper::MockClient; + + let search = r#"{ "data": { "search": { "results": { "hits": [ + { "document": { "id": 1421303, "title": "Quicksilver", "author_names": ["Callie Hart"], "isbns": ["9781399745420"] } } + ] } } } }"#; + + let detail = r#"{ "data": { "books_by_pk": { + "id": 1421303, + "title": "Quicksilver", + "subtitle": null, + "headline": null, + "description": "Book description", + "contributions": [{ "author": { "name": "Callie Hart" }, "contribution": null }], + "book_series": [], + "taggings": [], + "editions": [ + { + "language": { "language": "English" }, + "asin": null, + "isbn_10": "1399745425", + "isbn_13": "9781399745420", + "edition_format": "Paperback", + "contributions": [{ "contribution": null, "author": { "name": "Callie Hart" } }] + }, + { + "language": { "language": "English" }, + "asin": "B0DBJBFHGT", + "isbn_10": null, + "isbn_13": null, + "edition_format": "Audible", + "contributions": [ + { "contribution": null, "author": { "name": "Callie Hart" } }, + { "contribution": "Narrator", "author": { "name": "Stella Bloom" } } + ] + } + ] + } } }"#; + + let client = Arc::new(MockClient::new_many(&[search, detail])); + let prov = Hardcover::with_client("http://example/graphql", client, None); + + let query_meta = TorrentMeta { + title: "Quicksilver".to_string(), + authors: vec!["Callie Hart".to_string()], + media_type: mlm_db::MediaType::Audiobook, + language: Some(mlm_db::Language::English), + ..Default::default() + }; + + let m = prov + .fetch(&query_meta) + .await + .expect("should choose audible edition"); + + assert_eq!( + m.ids.get(mlm_db::ids::ASIN).map(|s| s.as_str()), + Some("B0DBJBFHGT") + ); + assert_eq!(m.media_type, mlm_db::MediaType::Audiobook); + assert_eq!(m.language, Some(mlm_db::Language::English)); + assert!(m.narrators.iter().any(|n| n == "Stella Bloom")); +} diff --git a/mlm_meta/tests/mock_fetcher.rs b/mlm_meta/tests/mock_fetcher.rs index 69a03bf8..d1e1809c 100644 --- a/mlm_meta/tests/mock_fetcher.rs +++ b/mlm_meta/tests/mock_fetcher.rs @@ -11,6 +11,7 @@ impl HttpClient for MockClient { if !u.host_str().is_some_and(|h| h.contains("romance.io")) { return Err(anyhow::anyhow!("unexpected host in test fetch")); } + if u.path().starts_with("/json/search_books") { return Ok(r#"{ "success": true, diff --git a/mlm_meta/tests/romanceio_tests.rs b/mlm_meta/tests/romanceio_tests.rs index 5deee935..7658840e 100644 --- a/mlm_meta/tests/romanceio_tests.rs +++ b/mlm_meta/tests/romanceio_tests.rs @@ -1,4 +1,4 @@ -use mlm_db::TorrentMeta; +use mlm_db::{Category, TorrentMeta}; use mlm_meta::Provider; use mlm_meta::http::HttpClient; use mlm_meta::providers::RomanceIo; @@ -151,8 +151,12 @@ async fn parse_book_html_extracts_categories_and_tags() { ); // categories should include contemporary and dark romance (derived from topics) - assert!(m.categories.iter().any(|c| c == "contemporary")); - assert!(m.categories.iter().any(|c| c == "dark romance")); + assert!( + m.categories + .iter() + .any(|c| c == &Category::ContemporaryRealist) + ); + assert!(m.categories.iter().any(|c| c == &Category::DarkRomance)); // tags should include some of the romance-specific tropes let tags = m.tags.join(","); diff --git a/mlm_meta/tests/scoring_tests.rs b/mlm_meta/tests/scoring_tests.rs index 8a550790..761c6fa7 100644 --- a/mlm_meta/tests/scoring_tests.rs +++ b/mlm_meta/tests/scoring_tests.rs @@ -43,7 +43,6 @@ fn parse_book_html_smoke() { let provider = RomanceIo::with_client(Arc::new(DummyClient)); let meta = provider.parse_book_html(SAMPLE_ROMANCE_HTML).unwrap(); - assert!(!meta.title.is_empty()); assert!(!meta.title.is_empty()); assert!(!meta.authors.is_empty());