diff --git a/apps/labrinth/src/search/backend/typesense/mod.rs b/apps/labrinth/src/search/backend/typesense/mod.rs index 026b940b76..dec7923e2c 100644 --- a/apps/labrinth/src/search/backend/typesense/mod.rs +++ b/apps/labrinth/src/search/backend/typesense/mod.rs @@ -105,18 +105,25 @@ impl Default for RequestConfig { } fn default_query_by() -> Vec { - ["indexed_title", "slug", "summary", "indexed_author"] - .into_iter() - .map(str::to_string) - .collect() + [ + "name", + "indexed_name", + "slug", + "author", + "indexed_author", + "summary", + ] + .into_iter() + .map(str::to_string) + .collect() } fn default_query_by_weights() -> Vec { - vec![15, 5, 2, 1] + vec![15, 15, 10, 3, 3, 1] } fn default_prefix() -> Vec { - vec![true, true, true, true] + vec![true, true, true, true, true, true] } const fn default_prioritize_exact_match() -> bool { @@ -491,7 +498,7 @@ impl Typesense { let mut fields = vec![ json!({"name": "summary", "type": "string", "facet": false}), json!({"name": "slug", "type": "string", "facet": false}), - json!({"name": "indexed_title", "type": "string", "facet": false, "stem": true}), + json!({"name": "indexed_name", "type": "string", "facet": false, "stem": true}), json!({"name": "indexed_author", "type": "string", "facet": false}), json!({"name": "log_downloads", "type": "float", "sort": true}), json!({"name": "follows", "type": "int32", "facet": true, "sort": true}), diff --git a/apps/labrinth/src/search/indexing.rs b/apps/labrinth/src/search/indexing.rs index cf4a92f931..b081ce9aae 100644 --- a/apps/labrinth/src/search/indexing.rs +++ b/apps/labrinth/src/search/indexing.rs @@ -4,8 +4,10 @@ use eyre::Result; use futures::TryStreamExt; use heck::ToKebabCase; use itertools::Itertools; +use regex::Regex; use std::collections::HashMap; -use tracing::info; +use std::sync::LazyLock; +use tracing::{info, warn}; use crate::database::PgPool; use crate::database::models::loader_fields::{ @@ -25,6 +27,13 @@ use crate::routes::v2_reroute; use crate::search::UploadSearchProject; use crate::util::error::Context; +fn normalize_for_search(s: &str) -> String { + static SPECIAL_CHARS_RE: LazyLock = + LazyLock::new(|| Regex::new(r"[^a-zA-Z0-9-.\s]").expect("valid regex")); + + SPECIAL_CHARS_RE.replace_all(s, "").to_kebab_case() +} + pub async fn index_local( pool: &PgPool, redis: &RedisPool, @@ -262,7 +271,7 @@ pub async fn index_local( { team_owner } else { - println!( + warn!( "org owner not found for project {} id: {}!", project.name, project.id.0 ); @@ -427,7 +436,7 @@ pub async fn index_local( project_id: crate::models::ids::ProjectId::from(project.id) .to_string(), name: project.name.clone(), - indexed_title: project.name.to_kebab_case(), + indexed_name: normalize_for_search(&project.name), summary: project.summary.clone(), categories: categories.clone(), display_categories: display_categories.clone(), @@ -436,7 +445,7 @@ pub async fn index_local( log_downloads: (project.downloads.max(1) as f64).ln(), icon_url: project.icon_url.clone(), author: owner.clone(), - indexed_author: owner.to_kebab_case(), + indexed_author: normalize_for_search(&owner), date_created: project.approved, created_timestamp: project.approved.timestamp(), date_modified: project.updated, @@ -614,3 +623,31 @@ async fn index_versions( Ok(res_versions) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_normalize_for_search_removes_special_chars() { + assert_eq!(normalize_for_search("Xaero's Minimap"), "xaeros-minimap"); + assert_eq!(normalize_for_search("JourneyMap"), "journey-map"); + assert_eq!(normalize_for_search("journey-map"), "journey-map"); + assert_eq!(normalize_for_search("SomeUserName"), "some-user-name"); + } + + #[test] + fn test_normalize_for_search_handles_whitespace() { + assert_eq!( + normalize_for_search("Some Project Name"), + "some-project-name" + ); + assert_eq!(normalize_for_search(" padded "), "padded"); + } + + #[test] + fn test_normalize_for_search_handles_numbers() { + assert_eq!(normalize_for_search("Project 123"), "project-123"); + assert_eq!(normalize_for_search("Test 1.0"), "test-1-0"); + } +} diff --git a/apps/labrinth/src/search/mod.rs b/apps/labrinth/src/search/mod.rs index 545a0c4fc6..843a6519ac 100644 --- a/apps/labrinth/src/search/mod.rs +++ b/apps/labrinth/src/search/mod.rs @@ -230,7 +230,7 @@ pub struct UploadSearchProject { pub author: String, pub indexed_author: String, pub name: String, - pub indexed_title: String, + pub indexed_name: String, pub summary: String, pub categories: Vec, pub display_categories: Vec,