Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,352 changes: 1,254 additions & 98 deletions Cargo.lock

Large diffs are not rendered by default.

3 changes: 1 addition & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
[workspace]
resolver = "3"
members = ["server", "mlm_db", "mlm_parse", "mlm_mam"]

members = ["server", "mlm_db", "mlm_parse", "mlm_mam", "mlm_meta"]
2 changes: 1 addition & 1 deletion mlm_db/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ use std::collections::HashMap;

use anyhow::Result;
use mlm_parse::normalize_title;
use native_db::Models;
pub use native_db::Database;
use native_db::Models;
use native_db::transaction::RwTransaction;
use native_db::{ToInput, db_type};
use once_cell::sync::Lazy;
Expand Down
4 changes: 2 additions & 2 deletions mlm_db/src/v03.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use super::{v01, v02, v04, v05, v06};
use native_db::{native_db, Key, ToKey};
use native_model::{native_model, Model};
use native_db::{Key, ToKey, native_db};
use native_model::{Model, native_model};
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
use time::{OffsetDateTime, UtcDateTime};
Expand Down
4 changes: 2 additions & 2 deletions mlm_db/src/v09.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use super::{v01, v03, v04, v06, v08, v10};
use native_db::{native_db, ToKey};
use native_model::{native_model, Model};
use native_db::{ToKey, native_db};
use native_model::{Model, native_model};
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
use tracing::warn;
Expand Down
4 changes: 2 additions & 2 deletions mlm_db/src/v13.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use super::{v03, v04, v06, v08, v09, v10, v11, v12, v14};
use native_db::{native_db, ToKey};
use native_model::{native_model, Model};
use native_db::{ToKey, native_db};
use native_model::{Model, native_model};
use serde::{Deserialize, Serialize};
use std::path::PathBuf;

Expand Down
6 changes: 3 additions & 3 deletions mlm_db/src/v18.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use crate::ids;

use super::{v01, v03, v04, v05, v06, v08, v09, v10, v11, v12, v13, v15, v16, v17};
use mlm_parse::{normalize_title, parse_edition};
use native_db::{native_db, ToKey};
use native_model::{native_model, Model};
use mlm_parse::normalize_title;
use native_db::{ToKey, native_db};
use native_model::{Model, native_model};
use serde::{Deserialize, Serialize};
use std::{collections::BTreeMap, path::PathBuf};

Expand Down
6 changes: 4 additions & 2 deletions mlm_mam/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@ mlm_parse = { path = "../mlm_parse" }
native_db = { git = "https://github.com/StirlingMouse/native_db.git", branch = "0.8.x" }
native_model = "0.4.20"
once_cell = "1.21.3"
openssl = { version = "0.10.73", features = ["vendored"] }
reqwest = { version = "0.12.20", features = ["json"] }
reqwest = { version = "0.12.20", default-features = false, features = [
"json",
"rustls-tls",
] }
reqwest_cookie_store = "0.8.0"
serde = "1.0.136"
serde_derive = "1.0.136"
Expand Down
23 changes: 23 additions & 0 deletions mlm_meta/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[package]
name = "mlm_meta"
version = "0.1.0"
edition = "2024"

[dependencies]
anyhow = "1.0"
async-trait = "0.1"
serde = { version = "1.0", features = ["derive"] }
reqwest = { version = "0.12.20", default-features = false, features = ["json", "rustls-tls", "gzip"] }
tokio = { version = "1", features = ["rt-multi-thread", "sync", "macros"] }
serde_json = "1.0"
scraper = "0.14"
mlm_db = { path = "../mlm_db" }
mlm_parse = { path = "../mlm_parse" }
strsim = "0.11"
tracing = "0.1"

urlencoding = "2.1"
url = "2.4"

[dev-dependencies]
httpmock = "0.7"
14 changes: 14 additions & 0 deletions mlm_meta/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
mlm_meta
========

Small crate defining the Provider trait and helper types for external
metadata providers (Goodreads, Hardcover, ...).

Comment on lines +4 to +6
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Update provider examples to match implemented providers.

Line 5 still cites Goodreads, but this PR wires Hardcover/OpenLibrary/RomanceIo. Keeping this aligned avoids confusion for new provider contributors.

📝 Suggested doc update
-Small crate defining the Provider trait and helper types for external
-metadata providers (Goodreads, Hardcover, ...).
+Small crate defining the Provider trait and helper types for external
+metadata providers (Hardcover, OpenLibrary, RomanceIo, ...).
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
Small crate defining the Provider trait and helper types for external
metadata providers (Goodreads, Hardcover, ...).
Small crate defining the Provider trait and helper types for external
metadata providers (Hardcover, OpenLibrary, RomanceIo, ...).
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@mlm_meta/README.md` around lines 4 - 6, The README text referencing example
providers is out of date: update the brief crate description that mentions
"Goodreads" to instead list the actual implemented providers (Hardcover,
OpenLibrary, RomanceIo) and ensure any example usages align with the Provider
trait and helper types defined in this crate (e.g., references to Provider,
Metadata, and provider-specific types); replace the old provider name and adjust
example snippets to show usage for Hardcover/OpenLibrary/RomanceIo so new
contributors see correct, current examples.

Purpose
- Provide a stable trait so server can query multiple providers and map
results into existing `TorrentMeta`.

How to add a provider
- Implement `mlm_meta::Provider` and return `TorrentMeta` from `fetch`.
- Register the provider in server's `MetadataService` and map fields into
`TorrentMeta` before persisting.
169 changes: 169 additions & 0 deletions mlm_meta/src/helpers.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
use mlm_parse::{clean_name, normalize_title};

pub use anyhow;
pub use tracing::{Level, debug, enabled, trace};

/// Search query with optional author. Providers can decide how to use these fields.
#[derive(Debug, Clone)]
pub struct SearchQuery {
pub title: String,
pub author: Option<String>,
}

impl SearchQuery {
pub fn new(title: String, author: Option<String>) -> Self {
Self { title, author }
}

/// Build a combined search string for providers that use a single query string.
pub fn to_combined_string(&self) -> String {
match &self.author {
Some(author) if !self.title.is_empty() && !author.is_empty() => {
format!("{} {}", self.title, author)
}
_ if !self.title.is_empty() => self.title.clone(),
_ => String::new(),
}
}
}

/// Build SearchQuery with author included
pub fn query_with_author(title: &str, authors: &[String]) -> SearchQuery {
let author = authors
.iter()
.map(|a| a.trim())
.find(|a| !a.is_empty())
.map(|a| a.to_string());
SearchQuery::new(title.to_string(), author)
}

/// Build SearchQuery without author (title-only search)
pub fn query_title_only(title: &str) -> SearchQuery {
SearchQuery::new(title.to_string(), None)
}

/// Normalized string similarity 0.0..1.0
pub fn token_similarity(a: &str, b: &str) -> f64 {
strsim::normalized_levenshtein(a, b)
}

/// Normalize author names (clean and lowercase)
pub fn normalize_authors(auths: &[String]) -> Vec<String> {
auths
.iter()
.map(|a| {
let mut s = a.clone();
let _ = clean_name(&mut s);
s.to_lowercase()
})
.collect()
}

/// Score a candidate by title and author similarity. Candidate title and
/// candidate authors are provided directly as strings (the caller extracts
/// them from JSON). The query title/authors are the original query values.
pub fn score_candidate(
cand_title: Option<&str>,
cand_auths: &[String],
q_title: &Option<String>,
q_auths: &[String],
) -> f64 {
let q_title_norm = q_title.as_ref().map(|t| normalize_title(t));

let mut title_score = 0.0f64;
if let Some(qt_norm) = q_title_norm.as_ref()
&& let Some(ct) = cand_title
{
let cand = normalize_title(ct);
if cand == *qt_norm {
title_score = 1.0;
} else if cand.contains(qt_norm.as_str()) || qt_norm.contains(cand.as_str()) {
title_score = 0.9;
} else {
title_score = token_similarity(&cand, qt_norm);
}
}

let mut author_score = 0.0f64;
if !q_auths.is_empty() {
let q_auths_norm = normalize_authors(q_auths);
let mut best = 0.0f64;
Comment on lines +87 to +90
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

Avoid repeated query-author normalization inside per-candidate scoring.

Line 89 recomputes normalized query authors on every score_candidate call. For batch candidate scoring, pre-normalizing once per query will reduce repeated allocations/work.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@mlm_meta/src/helpers.rs` around lines 87 - 90, The code currently calls
normalize_authors(q_auths) inside the per-candidate scoring loop (see q_auths
and normalize_authors) causing repeated allocations; refactor so q_auths is
normalized once before iterating candidates and then pass the precomputed
q_auths_norm into the per-candidate scorer (e.g., change score_candidate to
accept a &q_auths_norm or provide an overload/use closure capturing
q_auths_norm) and remove the normalize_authors call from within score_candidate
to avoid recomputation.

for a in cand_auths {
let mut n = a.clone();
let _ = clean_name(&mut n);
let n = n.to_lowercase();
for qa in &q_auths_norm {
if n.contains(qa) || qa.contains(&n) {
best = best.max(1.0);
} else {
best = best.max(token_similarity(&n, qa));
}
}
}
author_score = best;
}

// Require minimum author match score when query has authors.
// This prevents false positives from exact title matches with wrong authors
// (e.g., "Boss of the Year" by Nicole French matching "Boss of the Year" by T. Funny)
// and prevents loose title matches (e.g., "Book Title" matching "Book Title: A Novel")
// when the author doesn't match at all.
if !q_auths.is_empty() && author_score < 0.5 {
return 0.0;
}

if q_title_norm.is_some() && !q_auths.is_empty() {
0.7 * title_score + 0.3 * author_score
} else if q_title_norm.is_some() {
title_score
} else {
author_score
}
}

#[cfg(test)]
mod tests {
use super::*;
use mlm_parse::normalize_title;

#[test]
fn test_token_similarity() {
assert!(token_similarity("great adventure", "great adventure") > 0.999);
assert!(token_similarity("great adventure", "great adventures") > 0.8);
assert!(token_similarity("great adventure", "completely different") < 0.3);
}

#[test]
fn test_score_candidate_title_pref() {
let q_title = Some(normalize_title("The Great Adventure"));
let q_auths: Vec<String> = vec![];

let cand_exact_title = Some("The Great Adventure");
let cand_sim_title = Some("Great Adventure");
let cand_auths_exact: Vec<String> = vec!["Alice".to_string()];
let cand_auths_sim: Vec<String> = vec!["Bob Smith".to_string()];

let s_exact = score_candidate(cand_exact_title, &cand_auths_exact, &q_title, &q_auths);
let s_sim = score_candidate(cand_sim_title, &cand_auths_sim, &q_title, &q_auths);
assert!(s_exact >= s_sim, "expected exact title to score >= similar");
}

#[test]
fn test_score_candidate_author_influence() {
let q_title = Some(normalize_title("Great Adventure"));
let q_auths: Vec<String> = vec!["bob smith".to_string()];

let cand_title_only = Some("Great Adventure");
let cand_both = Some("Great Adventur");
let cand_auths_title_only: Vec<String> = vec!["Alice".to_string()];
let cand_auths_both: Vec<String> = vec!["Bob Smith".to_string()];

let s_title_only =
score_candidate(cand_title_only, &cand_auths_title_only, &q_title, &q_auths);
let s_both = score_candidate(cand_both, &cand_auths_both, &q_title, &q_auths);
assert!(
s_both > s_title_only,
"expected candidate with matching author to score higher"
);
}
}
100 changes: 100 additions & 0 deletions mlm_meta/src/http.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
use anyhow::Result;
use async_trait::async_trait;
use reqwest::Client;

#[async_trait]
pub trait HttpClient: Send + Sync {
async fn get(&self, url: &str) -> Result<String>;

async fn post(&self, url: &str, body: Option<&str>, headers: &[(&str, &str)])
-> Result<String>;
}

pub struct ReqwestClient {
client: Client,
}

impl ReqwestClient {
pub fn new() -> Self {
use reqwest::header::{
ACCEPT, ACCEPT_LANGUAGE, CONNECTION, HeaderMap, HeaderName, HeaderValue,
};

let mut headers = HeaderMap::new();
headers.insert(
ACCEPT,
HeaderValue::from_static(
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
),
);
headers.insert(
ACCEPT_LANGUAGE,
HeaderValue::from_static("en,en-US;q=0.9,en-GB;q=0.8,sv;q=0.7"),
);
headers.insert(CONNECTION, HeaderValue::from_static("keep-alive"));
headers.insert(
HeaderName::from_static("dnt"),
HeaderValue::from_static("1"),
);
headers.insert(
HeaderName::from_static("priority"),
HeaderValue::from_static("u=0, i"),
);
headers.insert(
HeaderName::from_static("sec-fetch-dest"),
HeaderValue::from_static("document"),
);
headers.insert(
HeaderName::from_static("sec-fetch-mode"),
HeaderValue::from_static("navigate"),
);
headers.insert(
HeaderName::from_static("sec-fetch-site"),
HeaderValue::from_static("none"),
);
headers.insert(
HeaderName::from_static("sec-fetch-user"),
HeaderValue::from_static("?1"),
);

Self {
client: Client::builder()
.user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36")
.default_headers(headers)
.gzip(true)
.build()
.unwrap()
}
Comment on lines +60 to +67
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Potential panic on client construction.

Client::builder().build().unwrap() can panic if the TLS backend fails to initialize (e.g., missing system certificates in constrained environments). Consider using ? or returning a Result from new().

🛡️ Proposed fix
-impl ReqwestClient {
-    pub fn new() -> Self {
+impl ReqwestClient {
+    pub fn new() -> Self {
+        Self::try_new().expect("failed to build HTTP client")
+    }
+
+    pub fn try_new() -> Result<Self> {
         // ... header setup ...
 
-        Self {
-            client: Client::builder()
+        Ok(Self {
+            client: Client::builder()
                 .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36")
                 .default_headers(headers)
                 .gzip(true)
-                .build()
-                .unwrap()
-        }
+                .build()?
+        })
     }
 }
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@mlm_meta/src/http.rs` around lines 60 - 67, The
Client::builder().build().unwrap() can panic; modify the constructor (the new()
function that creates Self) to return Result<Self, reqwest::Error> (or a
suitable error type) and replace unwrap() with the fallible build()? call: let
client = Client::builder()...build()?; then return Ok(Self { client }); ensuring
callers handle the propagated error instead of allowing a panic from unwrap().

}
}

impl Default for ReqwestClient {
fn default() -> Self {
Self::new()
}
}

#[async_trait]
impl HttpClient for ReqwestClient {
async fn get(&self, url: &str) -> Result<String> {
let res = self.client.get(url).send().await?.text().await?;
Ok(res)
}
Comment on lines +79 to +82
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

Consider adding error context for debugging.

Adding context to HTTP errors helps identify which URL failed when debugging production issues.

♻️ Proposed improvement
+use anyhow::Context;
+
     async fn get(&self, url: &str) -> Result<String> {
-        let res = self.client.get(url).send().await?.text().await?;
+        let res = self.client.get(url).send().await
+            .with_context(|| format!("GET request failed: {}", url))?
+            .text().await
+            .with_context(|| format!("failed to read response body from: {}", url))?;
         Ok(res)
     }
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@mlm_meta/src/http.rs` around lines 79 - 82, The get function currently
returns raw reqwest errors without URL context; update async fn get(&self, url:
&str) -> Result<String> to attach context to the network calls (the
self.client.get(url).send().await and/or the .text().await) so failures include
which URL failed—for example, use anyhow::Context (or map_err with a closure) to
.with_context(|| format!("failed to send GET request to {}", url)) and
.with_context(|| format!("failed to read response body from {}", url)) around
the await points; reference the get function and the
self.client.get(url).send().await and .text().await calls when applying the
change.


async fn post(
&self,
url: &str,
body: Option<&str>,
headers: &[(&str, &str)],
) -> Result<String> {
let mut req = self.client.post(url);
for (k, v) in headers {
req = req.header(*k, *v);
}
if let Some(b) = body {
req = req.body(b.to_string());
}
let res = req.send().await?.text().await?;
Ok(res)
}
}
11 changes: 11 additions & 0 deletions mlm_meta/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
pub mod helpers;
pub mod http;
pub mod providers;
pub mod tag_category_map;
pub mod traits;

pub use helpers::*;
pub use http::*;
pub use providers::*;
pub use tag_category_map::*;
pub use traits::*;
Loading