From bebbaf99fd0f3fede2236753fb036aba37322277 Mon Sep 17 00:00:00 2001 From: Nachtalb Date: Mon, 20 Oct 2025 01:25:01 +0200 Subject: [PATCH 1/3] fix: Parsing IQDB correctly --- src/source/iqdb.rs | 52 ++++++---------------------------------------- 1 file changed, 6 insertions(+), 46 deletions(-) diff --git a/src/source/iqdb.rs b/src/source/iqdb.rs index df98924..855a967 100644 --- a/src/source/iqdb.rs +++ b/src/source/iqdb.rs @@ -96,30 +96,17 @@ impl Source for Iqdb { impl Iqdb { fn harvest_page(page: &ElementRef) -> Option { - let dom = page; - debug!("selecting .image a"); - let link = dom.select(sel!(".image a")).next()?; + let link = page.select(sel!(".image a")).next()?; debug!("grabbing href"); let url = link.value().attr("href")?; - debug!("collecting trs"); - let score = dom.select(sel!("tr")).collect::>(); - - if score.len() != 5 { - return Some(Item { - link: url.to_string(), - similarity: -1.0, - }); - } - debug!("grabbing score"); - let score = score[3]; - debug!("grabbing td"); - let td = score.select(sel!("td")).next()?; + let score = page.select(sel!("tr:last-child > td")).next()?; - let score = td.text().collect::(); + debug!("parsing score"); + let score = score.text().collect::(); let score = score.split_once('%')?.0.parse::().ok()? / 100.0; Some(Item { @@ -128,34 +115,7 @@ impl Iqdb { }) } - fn harvest_best_match(pages: &ElementRef) -> Option { - debug!("selecting .image a"); - let link = pages.select(sel!(".image a")).next()?; - - debug!("grabbing href"); - let url = link.value().attr("href")?; - - debug!("collecting trs"); - let score = pages.select(sel!("tr")).collect::>(); - - if score.len() != 5 { - return Some(Item { - link: url.to_string(), - similarity: -1.0, - }); - } - - debug!("grabbing score"); - let score = score[3]; - debug!("grabbing td"); - let td = score.select(sel!("td")).next()?; - - let score = td.text().collect::(); - let score = score.split_once('%')?.0.parse::().ok()? / 100.0; - - Some(Item { - link: url.to_string(), - similarity: score, - }) + fn harvest_best_match(page: &ElementRef) -> Option { + Self::harvest_page(page) } } From d12c4ab33a1474164026941f4525951dcbd2211f Mon Sep 17 00:00:00 2001 From: Nachtalb Date: Mon, 20 Oct 2025 01:25:01 +0200 Subject: [PATCH 2/3] refactor: Simpler IQDB processing workflow IQDB always shows results in the same order: uploaded image, best, others. Thus we can just iterate over all results excluding the uploaded one and we have our final list. --- src/source/iqdb.rs | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/src/source/iqdb.rs b/src/source/iqdb.rs index 855a967..16807f8 100644 --- a/src/source/iqdb.rs +++ b/src/source/iqdb.rs @@ -57,25 +57,11 @@ impl Source for Iqdb { let html = scraper::Html::parse_document(&text); - let pages = html.select(sel!("#pages > div")).collect::>(); - - let best_match = if pages.len() > 2 { - Self::harvest_best_match(&pages[0]) - } else { - None - }; - - let mut items = Vec::new(); - - if let Some(best_match) = best_match { - items.push(best_match); - } - - for page in pages.into_iter().skip(2) { - let page = Self::harvest_page(&page); - - items.extend(page); - } + let mut items: Vec = html + .select(sel!("#pages > div")) + .skip(1) + .filter_map(Self::harvest_page) + .collect(); for item in &mut items { if item.link.starts_with("//") { @@ -95,7 +81,7 @@ impl Source for Iqdb { } impl Iqdb { - fn harvest_page(page: &ElementRef) -> Option { + fn harvest_page(page: ElementRef) -> Option { debug!("selecting .image a"); let link = page.select(sel!(".image a")).next()?; @@ -114,8 +100,4 @@ impl Iqdb { similarity: score, }) } - - fn harvest_best_match(page: &ElementRef) -> Option { - Self::harvest_page(page) - } } From c7b0831ec132e13db7630d00778b6b6fdf11246b Mon Sep 17 00:00:00 2001 From: Nachtalb Date: Mon, 20 Oct 2025 01:25:02 +0200 Subject: [PATCH 3/3] refactor: Fix IQDB urls on the fly Rather than reiterating over all results again --- src/source/iqdb.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/source/iqdb.rs b/src/source/iqdb.rs index 16807f8..aef8cce 100644 --- a/src/source/iqdb.rs +++ b/src/source/iqdb.rs @@ -57,18 +57,12 @@ impl Source for Iqdb { let html = scraper::Html::parse_document(&text); - let mut items: Vec = html + let items: Vec = html .select(sel!("#pages > div")) .skip(1) .filter_map(Self::harvest_page) .collect(); - for item in &mut items { - if item.link.starts_with("//") { - item.link = format!("https:{}", item.link); - } - } - Ok(Output { original_url: url.to_string(), items, @@ -87,6 +81,12 @@ impl Iqdb { debug!("grabbing href"); let url = link.value().attr("href")?; + debug!("fix broken url if needed"); + let url = if url.starts_with("//") { + format!("https:{url}") + } else { + url.to_string() + }; debug!("grabbing score"); let score = page.select(sel!("tr:last-child > td")).next()?; @@ -96,7 +96,7 @@ impl Iqdb { let score = score.split_once('%')?.0.parse::().ok()? / 100.0; Some(Item { - link: url.to_string(), + link: url, similarity: score, }) }