diff --git a/apps/tray-ui/src/components/kb/TrayEntityLink.tsx b/apps/tray-ui/src/components/kb/TrayEntityLink.tsx
index e80db05..eb59aab 100644
--- a/apps/tray-ui/src/components/kb/TrayEntityLink.tsx
+++ b/apps/tray-ui/src/components/kb/TrayEntityLink.tsx
@@ -24,7 +24,7 @@ import type {
ReferenceCategory,
ReferenceEntry,
} from '../../lib/reference';
-import { webKbUrl } from '../../lib/reference';
+import { resolveReferenceEntry, webKbUrl } from '../../lib/reference';
import { TierChip } from './TierChip';
interface Props {
@@ -69,8 +69,10 @@ export function TrayEntityLink({
return {label ?? ''};
}
- const entry: ReferenceEntry | undefined = catalog?.get(
- classKey.toLowerCase(),
+ const entry: ReferenceEntry | undefined = resolveReferenceEntry(
+ category,
+ classKey,
+ catalog,
);
const text = label ?? entry?.display_name ?? classKey;
diff --git a/apps/tray-ui/src/lib/reference.test.ts b/apps/tray-ui/src/lib/reference.test.ts
new file mode 100644
index 0000000..f3aee0b
--- /dev/null
+++ b/apps/tray-ui/src/lib/reference.test.ts
@@ -0,0 +1,101 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+ findEntityInBundles,
+ isCosmeticItemPort,
+ isNonLinkableItemClass,
+ resolveReferenceEntry,
+ type AllReferenceBundles,
+ type CategoryBundle,
+ type ReferenceCatalog,
+ type ReferenceCategory,
+ type ReferenceEntry,
+} from './reference';
+
+function refEntry(
+ category: ReferenceCategory,
+ class_name: string,
+ display_name: string,
+ slug: string | null = null,
+): ReferenceEntry {
+ return { category, class_name, display_name, slug, summary: { category } };
+}
+
+function makeCatalog(entries: ReferenceEntry[]): ReferenceCatalog {
+ const m = new Map();
+ for (const e of entries) m.set(e.class_name.toLowerCase(), e);
+ return m;
+}
+
+function bundle(entries: ReferenceEntry[]): CategoryBundle {
+ return { map: new Map(), catalog: makeCatalog(entries), list: entries };
+}
+
+describe('resolveReferenceEntry — tray mirror', () => {
+ const vehicles = makeCatalog([
+ refEntry('vehicle', 'ARGO_MOLE', 'ARGO MOLE', 'argo-mole'),
+ ]);
+
+ it('strips _Teach loaner suffix', () => {
+ expect(
+ resolveReferenceEntry('vehicle', 'ARGO_MOLE_Teach', vehicles)?.slug,
+ ).toBe('argo-mole');
+ });
+
+ it('resolves exact + case-insensitive', () => {
+ expect(resolveReferenceEntry('vehicle', 'argo_mole', vehicles)?.slug).toBe(
+ 'argo-mole',
+ );
+ });
+
+ it('filters avatar/structural item noise', () => {
+ const items = makeCatalog([
+ refEntry('item', 'grin_multitool_01', 'Greycat Multi-Tool', 'multitool'),
+ ]);
+ expect(
+ resolveReferenceEntry('item', 'Head_Eyelashes', items),
+ ).toBeUndefined();
+ expect(
+ resolveReferenceEntry('item', 'grin_multitool_01', items)?.slug,
+ ).toBe('multitool');
+ });
+});
+
+describe('isNonLinkableItemClass / isCosmeticItemPort — tray mirror', () => {
+ it('flags noise classes but not equipment', () => {
+ expect(isNonLinkableItemClass('Default')).toBe(true);
+ expect(isNonLinkableItemClass('Shared_Scalp_Unified')).toBe(true);
+ expect(isNonLinkableItemClass('grin_multitool_01')).toBe(false);
+ });
+
+ it('flags cosmetic ports but not equipment ports', () => {
+ expect(isCosmeticItemPort('Hair_ItemPort')).toBe(true);
+ expect(isCosmeticItemPort('weapon_attach_hand_right')).toBe(false);
+ expect(isCosmeticItemPort(null)).toBe(false);
+ });
+});
+
+describe('findEntityInBundles — applies noise + suffix logic', () => {
+ const bundles: AllReferenceBundles = {
+ vehicle: bundle([refEntry('vehicle', 'ARGO_MOLE', 'ARGO MOLE', 'argo-mole')]),
+ weapon: bundle([]),
+ item: bundle([
+ refEntry('item', 'grin_multitool_01', 'Greycat Multi-Tool', 'multitool'),
+ ]),
+ location: bundle([]),
+ };
+
+ it('finds a loaner variant via suffix strip', () => {
+ const hit = findEntityInBundles('ARGO_MOLE_Teach', bundles);
+ expect(hit?.category).toBe('vehicle');
+ expect(hit?.entry.slug).toBe('argo-mole');
+ });
+
+ it('does not bind avatar noise even though it probes the item catalog', () => {
+ expect(findEntityInBundles('Head_Eyelashes', bundles)).toBeNull();
+ });
+
+ it('returns null for a genuinely unknown identifier', () => {
+ expect(findEntityInBundles('NOPE_Unknown_Thing', bundles)).toBeNull();
+ });
+});
diff --git a/apps/tray-ui/src/lib/reference.ts b/apps/tray-ui/src/lib/reference.ts
index 2b0891e..6f2f364 100644
--- a/apps/tray-ui/src/lib/reference.ts
+++ b/apps/tray-ui/src/lib/reference.ts
@@ -328,6 +328,74 @@ export function webKbUrl(
return `${base}/kb/${category}`;
}
+/**
+ * Variant / loaner suffixes appended to a base class name in some
+ * event payloads but absent from the wiki catalogue. Mirrors the web
+ * `apps/web/src/lib/reference-types.ts`. Stripped as a second lookup
+ * attempt so `ARGO_MOLE_Teach` resolves to `ARGO_MOLE`. Lowercased.
+ */
+const VARIANT_SUFFIXES: readonly string[] = ['_teach', '_loaner'];
+
+/**
+ * Item class identifiers that are character-avatar parts, structural
+ * placeholders, or engine defaults — never catalogued equipment. Keep
+ * in sync with the web mirror. Match is case-insensitive.
+ */
+const NON_LINKABLE_ITEM_PATTERNS: readonly RegExp[] = [
+ /^default(_|$)/i,
+ /^head_/i,
+ /^body_/i,
+ /^shared_scalp/i,
+ /^pu_protos/i,
+ /^fp_visor$/i,
+ /^fps_default/i,
+ /lensdisplay/i,
+];
+
+/** True when an item class is avatar/structural noise. Pure. */
+export function isNonLinkableItemClass(classKey: string): boolean {
+ return NON_LINKABLE_ITEM_PATTERNS.some((re) => re.test(classKey));
+}
+
+const COSMETIC_ITEM_PORTS: readonly RegExp[] = [
+ /^(eyes|hair|eyelashes|eyebrow|beard|teeth|head|face)_itemport$/i,
+ /^body_itemport$/i,
+ /_scalp/i,
+];
+
+/** True when an item PORT is avatar customisation / structural. Pure. */
+export function isCosmeticItemPort(port: string | null | undefined): boolean {
+ if (!port) return false;
+ return COSMETIC_ITEM_PORTS.some((re) => re.test(port));
+}
+
+/**
+ * Resolve a raw class identifier within a single category's catalog,
+ * applying the item-noise filter and variant-suffix strip. Mirror of
+ * the web `resolveReferenceEntry`. Pure; returns undefined on miss.
+ */
+export function resolveReferenceEntry(
+ category: ReferenceCategory,
+ classKey: string | null | undefined,
+ catalog: ReferenceCatalog | undefined,
+): ReferenceEntry | undefined {
+ if (!classKey || !catalog) return undefined;
+ if (category === 'item' && isNonLinkableItemClass(classKey)) {
+ return undefined;
+ }
+ const key = classKey.toLowerCase();
+ const direct = catalog.get(key);
+ if (direct) return direct;
+ if (category === 'location') return undefined;
+ for (const suffix of VARIANT_SUFFIXES) {
+ if (key.endsWith(suffix) && key.length > suffix.length) {
+ const stripped = catalog.get(key.slice(0, -suffix.length));
+ if (stripped) return stripped;
+ }
+ }
+ return undefined;
+}
+
/** Locate a class identifier across all four catalogues. Used by
* the ReactNode prettifier (`prettifySummaryReact`) — the regex
* picks tokens out of a server-rendered summary string without
@@ -337,6 +405,10 @@ export function webKbUrl(
* practice because the wiki sync namespaces by category, but the
* iteration order is deterministic if they ever did.
*
+ * Applies the item-noise filter + variant-suffix strip per category
+ * via `resolveReferenceEntry`, so loaner variants resolve and avatar
+ * noise doesn't bind.
+ *
* Returns `null` when no catalogue claims the identifier — the
* caller falls back to the raw string in that case (same
* behaviour as the legacy `prettifySummary`). */
@@ -344,9 +416,12 @@ export function findEntityInBundles(
classKey: string,
bundles: AllReferenceBundles,
): { category: ReferenceCategory; entry: ReferenceEntry } | null {
- const key = classKey.toLowerCase();
for (const category of REFERENCE_CATEGORIES) {
- const entry = bundles[category].catalog.get(key);
+ const entry = resolveReferenceEntry(
+ category,
+ classKey,
+ bundles[category].catalog,
+ );
if (entry) return { category, entry };
}
return null;
diff --git a/apps/web/src/components/kb/EntityLink.tsx b/apps/web/src/components/kb/EntityLink.tsx
index c03a5f4..36233b7 100644
--- a/apps/web/src/components/kb/EntityLink.tsx
+++ b/apps/web/src/components/kb/EntityLink.tsx
@@ -26,6 +26,7 @@ import type {
ReferenceCatalog,
ReferenceCategory,
} from '@/lib/reference-types';
+import { resolveReferenceEntry } from '@/lib/reference-types';
import { toFriendlyName } from '@/lib/heuristic-name';
import { EntityHoverCard } from './EntityHoverCard';
import { TierChip } from './TierChip';
@@ -71,7 +72,7 @@ export function EntityLink({
return {label ?? ''};
}
- const entry = catalog?.get(classKey.toLowerCase());
+ const entry = resolveReferenceEntry(category, classKey, catalog);
const text = label ?? entry?.display_name ?? toFriendlyName(classKey);
// Tier chip is opt-in via `showTier` and only meaningful for
diff --git a/apps/web/src/lib/reference-types.test.ts b/apps/web/src/lib/reference-types.test.ts
index 83d26cc..69ad97c 100644
--- a/apps/web/src/lib/reference-types.test.ts
+++ b/apps/web/src/lib/reference-types.test.ts
@@ -1,11 +1,16 @@
import { describe, expect, it } from 'vitest';
import {
+ isCosmeticItemPort,
+ isNonLinkableItemClass,
placementLabel,
+ resolveReferenceEntry,
subtypeLabel,
tierLabel,
type LocationSummary,
type Placement,
+ type ReferenceCatalog,
+ type ReferenceEntry,
} from './reference-types';
describe('tierLabel', () => {
@@ -90,3 +95,125 @@ describe('LocationSummary backward compat', () => {
}
});
});
+
+function refEntry(
+ category: ReferenceEntry['category'],
+ class_name: string,
+ display_name: string,
+ slug: string | null = null,
+): ReferenceEntry {
+ return { category, class_name, display_name, slug, summary: { category } };
+}
+
+/** Catalog keyed by lowercased class_name, mirroring getCategoryBundle. */
+function makeCatalog(entries: ReferenceEntry[]): ReferenceCatalog {
+ const m = new Map();
+ for (const e of entries) m.set(e.class_name.toLowerCase(), e);
+ return m;
+}
+
+describe('resolveReferenceEntry — variant-suffix strip (workstream A)', () => {
+ const vehicles = makeCatalog([
+ refEntry('vehicle', 'ARGO_MOLE', 'ARGO MOLE', 'argo-mole'),
+ refEntry('vehicle', 'DRAK_Vulture', 'Drake Vulture', 'drake-vulture'),
+ ]);
+
+ it('resolves an exact (and case-insensitive) class name', () => {
+ expect(resolveReferenceEntry('vehicle', 'ARGO_MOLE', vehicles)?.slug).toBe(
+ 'argo-mole',
+ );
+ expect(resolveReferenceEntry('vehicle', 'argo_mole', vehicles)?.slug).toBe(
+ 'argo-mole',
+ );
+ });
+
+ it('strips the _Teach loaner suffix to the base class', () => {
+ // The two real misses found in the live tray DB (93 + 13 events).
+ expect(
+ resolveReferenceEntry('vehicle', 'ARGO_MOLE_Teach', vehicles)?.slug,
+ ).toBe('argo-mole');
+ expect(
+ resolveReferenceEntry('vehicle', 'DRAK_Vulture_Teach', vehicles)?.slug,
+ ).toBe('drake-vulture');
+ });
+
+ it('does not over-strip when there is no catalogued base', () => {
+ expect(
+ resolveReferenceEntry('vehicle', 'SOME_Unknown_Teach', vehicles),
+ ).toBeUndefined();
+ });
+
+ it('returns undefined when no catalog is supplied', () => {
+ expect(
+ resolveReferenceEntry('vehicle', 'ARGO_MOLE', undefined),
+ ).toBeUndefined();
+ });
+});
+
+describe('isNonLinkableItemClass — item noise filter (workstream D)', () => {
+ it('flags avatar / structural / default classes', () => {
+ for (const c of [
+ 'Default',
+ 'Default_LensDisplay_PU',
+ 'Head_Eyelashes',
+ 'Head_Teeth',
+ 'body_01_noMagicPocket',
+ 'Shared_Scalp_Unified',
+ 'PU_Protos_Head',
+ 'FP_Visor',
+ 'FPS_DefaultRadar_Lens',
+ ]) {
+ expect(isNonLinkableItemClass(c), c).toBe(true);
+ }
+ });
+
+ it('does NOT flag genuine equipment', () => {
+ for (const c of [
+ 'grin_multitool_01',
+ 'klwe_pistol_energy_01_mag',
+ 'crlf_consumable_healing_01',
+ 'behr_gren_frag_01',
+ ]) {
+ expect(isNonLinkableItemClass(c), c).toBe(false);
+ }
+ });
+
+ it('keeps noise item classes from resolving (renders plain text)', () => {
+ const items = makeCatalog([
+ refEntry('item', 'Head_Eyelashes', 'Eyelashes', 'eyelashes'),
+ refEntry('item', 'grin_multitool_01', 'Greycat Multi-Tool', 'multitool'),
+ ]);
+ expect(
+ resolveReferenceEntry('item', 'Head_Eyelashes', items),
+ ).toBeUndefined();
+ expect(
+ resolveReferenceEntry('item', 'grin_multitool_01', items)?.slug,
+ ).toBe('multitool');
+ });
+});
+
+describe('isCosmeticItemPort', () => {
+ it('flags avatar / structural ports, not equipment ports', () => {
+ for (const p of [
+ 'Eyes_ItemPort',
+ 'Hair_ItemPort',
+ 'Eyelashes_ItemPort',
+ 'Body_ItemPort',
+ ]) {
+ expect(isCosmeticItemPort(p), p).toBe(true);
+ }
+ for (const p of [
+ 'weapon_attach_hand_right',
+ 'magazine_attach',
+ 'Armor_Helmet',
+ 'utility_attach_1',
+ ]) {
+ expect(isCosmeticItemPort(p), p).toBe(false);
+ }
+ });
+
+ it('handles null / undefined', () => {
+ expect(isCosmeticItemPort(null)).toBe(false);
+ expect(isCosmeticItemPort(undefined)).toBe(false);
+ });
+});
diff --git a/apps/web/src/lib/reference-types.ts b/apps/web/src/lib/reference-types.ts
index 1a30507..66b224d 100644
--- a/apps/web/src/lib/reference-types.ts
+++ b/apps/web/src/lib/reference-types.ts
@@ -346,6 +346,97 @@ export function prettyClass(
return map.get(raw.toLowerCase()) ?? toFriendlyName(raw);
}
+/**
+ * Variant / loaner suffixes appended to a base class name in some
+ * event payloads but ABSENT from the wiki catalogue. Stripped as a
+ * second lookup attempt so e.g. `ARGO_MOLE_Teach` (the tutorial
+ * loaner) and `DRAK_Vulture_Teach` resolve to the catalogued
+ * `ARGO_MOLE` / `DRAK_Vulture`. Lowercased; matched as a suffix.
+ */
+const VARIANT_SUFFIXES: readonly string[] = ['_teach', '_loaner'];
+
+/**
+ * Item class identifiers that are character-avatar parts, structural
+ * placeholders, or engine defaults — never catalogued equipment. The
+ * `attachment_received` stream is dominated by these (avatar assembly
+ * on spawn), so without a filter the catalogue is consulted for
+ * thousands of `Head_Eyelashes` / `Default` / `body_*` "items". Match
+ * is case-insensitive; a hit means "render as plain text, never a
+ * link". Conservative — only patterns confirmed against real logs.
+ */
+const NON_LINKABLE_ITEM_PATTERNS: readonly RegExp[] = [
+ /^default(_|$)/i, // "Default", "Default_LensDisplay_PU"
+ /^head_/i, // Head_Eyelashes, Head_Teeth, Head_Eyedetail
+ /^body_/i, // body_01_noMagicPocket (corpse / avatar body)
+ /^shared_scalp/i, // Shared_Scalp_Unified
+ /^pu_protos/i, // PU_Protos_Head
+ /^fp_visor$/i, // FP_Visor
+ /^fps_default/i, // FPS_DefaultRadar_Lens
+ /lensdisplay/i, // *_LensDisplay_* HUD glass
+];
+
+/**
+ * True when an item class is avatar/structural noise rather than a
+ * catalogued, linkable piece of equipment. Pure.
+ */
+export function isNonLinkableItemClass(classKey: string): boolean {
+ return NON_LINKABLE_ITEM_PATTERNS.some((re) => re.test(classKey));
+}
+
+/**
+ * Item *ports* that hold avatar customisation or structural sockets
+ * rather than meaningful equipment (`Eyes_ItemPort`, `Hair_ItemPort`,
+ * `Body_ItemPort`). Exposed so event-rendering surfaces can suppress
+ * `attachment_received` noise by port. Pure.
+ */
+const COSMETIC_ITEM_PORTS: readonly RegExp[] = [
+ /^(eyes|hair|eyelashes|eyebrow|beard|teeth|head|face)_itemport$/i,
+ /^body_itemport$/i,
+ /_scalp/i,
+];
+
+export function isCosmeticItemPort(port: string | null | undefined): boolean {
+ if (!port) return false;
+ return COSMETIC_ITEM_PORTS.some((re) => re.test(port));
+}
+
+/**
+ * Resolve a raw class identifier to a catalog entry, applying two
+ * fallbacks beyond the exact case-insensitive key:
+ * 1. **Item noise filter** — avatar / structural item classes never
+ * resolve (so they render as plain text, never a misleading
+ * link). Only applies to `category === 'item'`.
+ * 2. **Variant-suffix strip** — `_Teach` / `_loaner` loaner variants
+ * fall back to their base class (`ARGO_MOLE_Teach` → `ARGO_MOLE`).
+ * Applies to vehicle / weapon / item.
+ *
+ * Locations are looked up by exact key only here — their richer
+ * multi-index / classifier resolution lives in the location catalog
+ * path, not in this generic catalog. Pure; returns undefined on miss.
+ */
+export function resolveReferenceEntry(
+ category: ReferenceCategory,
+ classKey: string | null | undefined,
+ catalog: ReferenceCatalog | undefined,
+): ReferenceEntry | undefined {
+ if (!classKey || !catalog) return undefined;
+ if (category === 'item' && isNonLinkableItemClass(classKey)) {
+ return undefined;
+ }
+ const key = classKey.toLowerCase();
+ const direct = catalog.get(key);
+ if (direct) return direct;
+ // Locations don't carry loaner-style variant suffixes.
+ if (category === 'location') return undefined;
+ for (const suffix of VARIANT_SUFFIXES) {
+ if (key.endsWith(suffix) && key.length > suffix.length) {
+ const stripped = catalog.get(key.slice(0, -suffix.length));
+ if (stripped) return stripped;
+ }
+ }
+ return undefined;
+}
+
// -- Location catalog types (Wave 1: catalog-driven hierarchy) ---------
/** Trimmed shape of a wiki location entry — only the fields we use
diff --git a/crates/starstats-core/src/location_catalog.rs b/crates/starstats-core/src/location_catalog.rs
index 04846ed..12156a2 100644
--- a/crates/starstats-core/src/location_catalog.rs
+++ b/crates/starstats-core/src/location_catalog.rs
@@ -94,8 +94,61 @@ pub struct LocationCatalog {
by_engine_tag: HashMap>,
by_slug: HashMap>,
by_normalized_name: HashMap>,
+ /// Content tokens of every entry's display name, aligned by index
+ /// with `entries`. Used by [`LocationCatalog::fuzzy_match`] to score
+ /// token overlap. Built once; never mutated after construction.
+ entry_tokens: Vec>,
+ /// Document frequency of each content token across all display
+ /// names. Drives the inverse-document-frequency weighting — a rare
+ /// word (`"kaltag"`, df 1) dominates a common one (`"outpost"`,
+ /// df ~200) so a match resting on filler words scores near zero.
+ name_token_df: HashMap,
+ /// Inverted index `token → entry indices`, restricted to tokens
+ /// rare enough to be worth gathering candidates on (df ≤
+ /// [`MAX_INDEX_DF`]). Common filler tokens are deliberately absent —
+ /// they still contribute to *scoring* (via `name_token_df`) but
+ /// never *seed* a candidate set.
+ name_token_index: HashMap>,
}
+/// A token must appear in at most this many display names to seed a
+/// fuzzy-match candidate set. Above this it's filler (`"outpost"`,
+/// `"station"`, `"research"`) and gathering on it would scan hundreds
+/// of rows for no precision gain.
+const MAX_INDEX_DF: u32 = 40;
+
+/// The *anchor* requirement: to accept a fuzzy match, the matched entry
+/// must share a non-digit content token this rare with the query. Set
+/// deliberately low (≤ 4) so the anchor is a near-unique *place* name
+/// (`"kaltag"`, `"goldenrod"`, df 1), never a corporate operator
+/// shared across a dozen sibling outposts (`"rayari"`, df 6). This is
+/// the guard that rejects `RayariHydro_McGarth` (no catalogued
+/// `mcgarth`) instead of letting it land on a random Rayari outpost.
+/// Trade-off: digit-discriminated families whose only non-digit token
+/// is a shared operator (Shubin `SAL-2`/`SAL-5`) fall through to the
+/// system heuristic rather than risk a wrong sibling.
+const FUZZY_ANCHOR_DF: u32 = 4;
+
+/// Operator / utility words that appear in engine *affiliation*
+/// segments (`RayariHydro_…`) and happen to also be rare words in some
+/// unrelated wiki name. Excluded from anchor eligibility so a
+/// coincidental affiliation-word overlap can't carry a match — e.g.
+/// `hydro` must never bind `RayariHydro_McGarth` to `Terra Mills
+/// HydroFarm`. They still contribute to *scoring* once a real anchor
+/// exists; they just can't be the anchor themselves.
+const AFFILIATION_NOISE: &[&str] = &[
+ "hydro",
+ "dynamics",
+ "corp",
+ "corporation",
+ "industries",
+ "industrial",
+ "manufacturing",
+ "security",
+ "logistics",
+ "aerospace",
+];
+
impl LocationCatalog {
/// Build the catalogue + all three indices in a single pass.
/// Collisions on any index are resolved last-write-wins; the
@@ -127,12 +180,145 @@ impl LocationCatalog {
.by_normalized_name
.insert(normalize_name(&arc.display_name), arc.clone());
+ // Content tokens of the display name feed the fuzzy
+ // matcher. Dedup per-entry so a name like "Pyro2 M Trdp 01"
+ // counts each token once toward document frequency.
+ let mut toks = content_tokens(&arc.display_name);
+ toks.sort();
+ toks.dedup();
+ for tok in &toks {
+ *catalog.name_token_df.entry(tok.clone()).or_insert(0) += 1;
+ }
+ catalog.entry_tokens.push(toks);
catalog.entries.push(arc);
}
+ // Second pass: build the inverted index now that every token's
+ // document frequency is known, skipping filler tokens.
+ for (idx, toks) in catalog.entry_tokens.iter().enumerate() {
+ for tok in toks {
+ if catalog.name_token_df.get(tok).copied().unwrap_or(0) <= MAX_INDEX_DF {
+ catalog
+ .name_token_index
+ .entry(tok.clone())
+ .or_default()
+ .push(idx);
+ }
+ }
+ }
+
catalog
}
+ /// Fuzzy fallback used by the classifier when no exact engine-tag,
+ /// slug, or normalized-name key matched. Scores catalog entries by
+ /// inverse-document-frequency-weighted token overlap against the
+ /// query tokens (already split on `_` and `` by the
+ /// classifier), and returns the single best entry — or `None` when
+ /// no candidate clears the precision bar.
+ ///
+ /// Two guards keep precision high:
+ /// * **Distinctive-token requirement** — the winner must share a
+ /// non-digit token with df ≤ [`FUZZY_ANCHOR_DF`]. A match resting only
+ /// on filler (`"research"`, `"outpost"`) is rejected.
+ /// * **System consistency** — when the caller knows the system
+ /// (parsed from the engine string) and a candidate declares a
+ /// *different* system, that candidate is discarded. A
+ /// `Stanton…` engine string can never resolve to a Pyro row.
+ ///
+ /// Deterministic: ties break by score, then shared-token count,
+ /// then slug — never by `HashMap` iteration order.
+ pub fn fuzzy_match(
+ &self,
+ query_tokens: &[String],
+ system_hint: Option<&str>,
+ ) -> Option<&LocationCatalogEntry> {
+ // Expand the query into its content-token set.
+ let mut query: Vec = Vec::new();
+ for t in query_tokens {
+ for tok in content_tokens(t) {
+ if !query.contains(&tok) {
+ query.push(tok);
+ }
+ }
+ }
+ if query.is_empty() {
+ return None;
+ }
+
+ // Gather candidate entries: any entry sharing a non-filler
+ // token with the query.
+ let mut candidates: Vec = Vec::new();
+ for tok in &query {
+ if let Some(idxs) = self.name_token_index.get(tok) {
+ candidates.extend_from_slice(idxs);
+ }
+ }
+ candidates.sort_unstable();
+ candidates.dedup();
+
+ // Score each candidate over the FULL shared-token set (including
+ // common tokens, weighted near-zero), and apply both guards.
+ struct Scored {
+ idx: usize,
+ score: f32,
+ shared: u32,
+ }
+ let mut best: Option = None;
+ for &i in &candidates {
+ let entry = &self.entries[i];
+ if let (Some(hint), Some(sys)) = (system_hint, entry.system.as_deref()) {
+ if !hint.eq_ignore_ascii_case(sys) {
+ continue;
+ }
+ }
+ let mut score = 0.0f32;
+ let mut shared = 0u32;
+ let mut has_anchor = false;
+ for tok in &self.entry_tokens[i] {
+ if !query.contains(tok) {
+ continue;
+ }
+ let df = self.name_token_df.get(tok).copied().unwrap_or(1).max(1);
+ // Every shared token counts toward the ranking score
+ // (so a shared digit still breaks SAL-2 from SAL-5)…
+ score += 1.0 / df as f32;
+ shared += 1;
+ // …but only a rare, non-digit, non-affiliation token
+ // qualifies as the *anchor* that licenses the match.
+ if df <= FUZZY_ANCHOR_DF
+ && tok.len() >= 3
+ && !tok.chars().all(|c| c.is_ascii_digit())
+ && !AFFILIATION_NOISE.contains(&tok.as_str())
+ {
+ has_anchor = true;
+ }
+ }
+ if !has_anchor {
+ continue;
+ }
+ let better = match &best {
+ None => true,
+ Some(b) => {
+ score > b.score
+ || (score == b.score && shared > b.shared)
+ || (score == b.score
+ && shared == b.shared
+ && entry.slug < self.entries[b.idx].slug)
+ }
+ };
+ if better {
+ best = Some(Scored {
+ idx: i,
+ score,
+ shared,
+ });
+ }
+ }
+
+ best.map(|b| self.entries[b.idx].as_ref())
+ }
+
/// Number of entries in the catalogue (post-dedup of empty rows).
pub fn len(&self) -> usize {
self.entries.len()
@@ -194,6 +380,73 @@ fn normalize_name(name: &str) -> String {
out
}
+/// System names are dropped from content tokens — the classifier
+/// tracks the system separately, and including it would let any two
+/// same-system locations share a (useless) token.
+const SYSTEM_TOKENS: &[&str] = &["stanton", "pyro", "nyx", "castra", "terra", "sol"];
+
+/// Split a name or engine identifier into lowercase content tokens for
+/// fuzzy matching. Boundaries: non-alphanumerics, camelCase humps, and
+/// letter↔digit transitions. Drops system names and sub-3-char
+/// non-numeric noise (single letters like a stray `b` from `1b`).
+///
+/// * `"RayariHydro_Deltana"` → `["rayari", "hydro", "deltana"]`
+/// * `"Shubin Mining SAL-2"` → `["shubin", "mining", "sal", "2"]`
+/// * `"Stanton4a_Shubin_SM0_13"` → `["4a"→…, "shubin", "sm", "0", "13"]`
+pub fn content_tokens(s: &str) -> Vec {
+ #[derive(PartialEq, Clone, Copy)]
+ enum Kind {
+ Upper,
+ Lower,
+ Digit,
+ Other,
+ }
+ fn kind(c: char) -> Kind {
+ if c.is_ascii_digit() {
+ Kind::Digit
+ } else if c.is_ascii_uppercase() {
+ Kind::Upper
+ } else if c.is_ascii_lowercase() {
+ Kind::Lower
+ } else {
+ Kind::Other
+ }
+ }
+
+ let mut tokens: Vec = Vec::new();
+ let mut cur = String::new();
+ let mut prev: Option = None;
+ for c in s.chars() {
+ let k = kind(c);
+ let boundary = matches!(
+ (prev, k),
+ (_, Kind::Other)
+ | (Some(Kind::Other), _)
+ | (Some(Kind::Lower), Kind::Upper)
+ | (Some(Kind::Digit), Kind::Upper)
+ | (Some(Kind::Lower), Kind::Digit)
+ | (Some(Kind::Upper), Kind::Digit)
+ | (Some(Kind::Digit), Kind::Lower)
+ );
+ if boundary && !cur.is_empty() {
+ tokens.push(std::mem::take(&mut cur));
+ }
+ if k != Kind::Other {
+ cur.push(c.to_ascii_lowercase());
+ }
+ prev = Some(k);
+ }
+ if !cur.is_empty() {
+ tokens.push(cur);
+ }
+
+ tokens.retain(|t| {
+ let all_digit = t.chars().all(|c| c.is_ascii_digit());
+ (t.len() >= 3 || all_digit) && !SYSTEM_TOKENS.contains(&t.as_str())
+ });
+ tokens
+}
+
#[cfg(test)]
mod tests {
use super::*;
diff --git a/crates/starstats-core/src/location_classifier.rs b/crates/starstats-core/src/location_classifier.rs
index e5142d4..937f048 100644
--- a/crates/starstats-core/src/location_classifier.rs
+++ b/crates/starstats-core/src/location_classifier.rs
@@ -20,20 +20,29 @@
//! Resolution order (first hit wins):
//!
//! 1. **Synthetic** — engine patterns the wiki doesn't model
-//! (jump points, comm arrays, crash sites, caves, bunkers).
-//! These map to a synthetic `AnonymousPoi` tier with a
-//! derived subtype.
-//! 2. **Catalog** — `LocationCatalog::lookup_by_token` against
-//! every token in the stripped raw string. The strongest
+//! (jump points, comm arrays, crash sites, caves, bunkers), plus
+//! *noise* patterns (procedural mining/cluster nodes, dynamic
+//! mission/nav markers) which get an honest generic label and a
+//! suppressible subtype instead of being title-cased into fake
+//! proper-noun places. All map to a synthetic `AnonymousPoi` tier
+//! with a derived subtype.
+//! 2. **Catalog (exact)** — `LocationCatalog::lookup_by_token`
+//! against every token in the stripped raw string. The strongest
//! binding because it pulls real wiki taxonomy.
-//! 3. **System fallback** — engine string contains a known
+//! 3. **Catalog (fuzzy)** — `LocationCatalog::fuzzy_match`: idf-
+//! weighted distinctive-token overlap, guarded by a rarity floor
+//! and system consistency. Recovers real wiki rows the engine
+//! names differently (`Stanton4a_RayariHydro_Kaltag` →
+//! `Rayari Kaltag Research Outpost`). Runs before the heuristic
+//! so a real row beats a bare-system guess.
+//! 4. **System fallback** — engine string contains a known
//! system token (`Stanton`/`Pyro`/`Nyx`/…) but no catalogue
//! hit. Tier left as `AnonymousPoi`; system populated.
-//! 4. **Body short-code fallback** — engine emits Lagrange
+//! 5. **Body short-code fallback** — engine emits Lagrange
//! prefixes like `HUR_L1` or affiliation short codes like
//! `HDMS_*` / `Shubin_*`. Mapped to the parent system + a
//! synthetic body name.
-//! 5. **Last-resort title-case** — none of the above matched.
+//! 6. **Last-resort title-case** — none of the above matched.
//! Display name is the title-cased raw; tier `AnonymousPoi`.
use std::collections::HashMap;
@@ -90,8 +99,13 @@ pub struct LocationClassification {
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ClassificationSource {
- /// Matched against `LocationCatalog`.
+ /// Matched against `LocationCatalog` by an exact key (engine tag,
+ /// slug, or normalized name).
Catalog,
+ /// Matched `LocationCatalog` via the distinctive-token fuzzy
+ /// fallback. A real wiki binding, but lower-confidence than an
+ /// exact key hit — kept distinct for coverage-quality telemetry.
+ Fuzzy,
/// Matched a `SYNTHETIC_MATCHER` (engine-only pattern with no wiki entry).
Synthetic,
/// Matched a system / body short-code dictionary.
@@ -119,24 +133,72 @@ pub fn classify(raw: &str, catalog: &LocationCatalog) -> LocationClassification
// `Stanton1b`-style engine tags), then each segment.
let joined = parts.join("");
if let Some(hit) = catalog.lookup_by_token(&joined) {
- return from_catalog(hit, raw);
+ return from_catalog(hit, raw, ClassificationSource::Catalog);
+ }
+ // Exact per-token, but skip bare *system* tokens on this pass. The
+ // system itself is a catalogued location (slug `stanton`), and the
+ // engine emits the system token FIRST (`OOC_Stanton_2b_Daymar`),
+ // so matching it here would shadow the specific body/place that
+ // follows — collapsing every planet/moon to its system name.
+ // Deferred to a second pass below so a *bare* system identifier
+ // still resolves.
+ for token in &parts {
+ if KNOWN_SYSTEMS.contains_key(token.to_ascii_lowercase().as_str()) {
+ continue;
+ }
+ if let Some(hit) = catalog.lookup_by_token(token) {
+ return from_catalog(hit, raw, ClassificationSource::Catalog);
+ }
+ }
+
+ // 3. Distinctive-token fuzzy match. Runs BEFORE the system
+ // heuristic: a real wiki row (`Rayari Kaltag Research Outpost`)
+ // beats a bare-system guess (`Stanton`). System hint, parsed
+ // from the same parts, prevents cross-system false positives.
+ let hint = system_hint(&parts);
+ if let Some(hit) = catalog.fuzzy_match(&parts, hint) {
+ return from_catalog(hit, raw, ClassificationSource::Fuzzy);
}
+
+ // 4. Deferred system-token exact match — a bare system identifier
+ // (just `Stanton`) still resolves to the system row with its
+ // full taxonomy now that specific tokens have had priority.
for token in &parts {
if let Some(hit) = catalog.lookup_by_token(token) {
- return from_catalog(hit, raw);
+ return from_catalog(hit, raw, ClassificationSource::Catalog);
}
}
- // 3-4. System / body short-code heuristics.
+ // 5. System / body short-code heuristics.
if let Some(c) = system_or_body_heuristic(&parts, raw) {
return c;
}
- // 5. Last-resort title-case.
+ // 6. Last-resort title-case.
fallback(&title_case_segments(&parts), raw)
}
-fn from_catalog(hit: &LocationCatalogEntry, raw: &str) -> LocationClassification {
+/// Best-effort system parse from already-stripped parts, reusing the
+/// same dictionaries the heuristic tier uses. Feeds the fuzzy matcher's
+/// system-consistency guard.
+fn system_hint(parts: &[String]) -> Option<&'static str> {
+ for p in parts {
+ let key = p.to_ascii_lowercase();
+ if let Some(meta) = KNOWN_BODY_SHORT_CODES.get(key.as_str()) {
+ return Some(meta.system);
+ }
+ if let Some(sys) = KNOWN_SYSTEMS.get(key.as_str()) {
+ return Some(sys);
+ }
+ }
+ None
+}
+
+fn from_catalog(
+ hit: &LocationCatalogEntry,
+ raw: &str,
+ source: ClassificationSource,
+) -> LocationClassification {
LocationClassification {
display_name: hit.display_name.clone(),
slug: Some(hit.slug.clone()),
@@ -153,7 +215,7 @@ fn from_catalog(hit: &LocationCatalogEntry, raw: &str) -> LocationClassification
operator: hit.taxonomy.operator.clone(),
faction: hit.taxonomy.faction.clone(),
raw: raw.to_string(),
- source: ClassificationSource::Catalog,
+ source,
}
}
@@ -277,6 +339,14 @@ static SYNTHETIC_MATCHERS: &[SyntheticMatcher] = &[
match_rest_stop_engine,
match_rest_stop_generic,
match_orbital_marker,
+ // Noise patterns — engine-only dynamic / procedural identifiers
+ // that have no catalogued wiki page. Classified with an honest
+ // generic label + a suppressible subtype so they stop being
+ // title-cased into fake proper-noun "places" (e.g.
+ // `ab_mine_stanton2_med_010` → "Asteroid mining node", not
+ // "Ab Mine Stanton 2 Med 010").
+ match_dynamic_marker,
+ match_procedural_node,
];
/// Jump-point detection. The shape gating the matcher is simply the
@@ -645,6 +715,51 @@ fn match_orbital_marker(parts: &[String], raw: &str) -> Option Option {
+ let lower = raw.to_ascii_lowercase();
+ let (display, subtype) =
+ if lower.contains("navpoint_dynamic") || lower.contains("dynamic_navpoint") {
+ ("Dynamic nav point", "nav_marker")
+ } else if lower.contains("mission_qt") || lower.contains("quantum_beacon") {
+ ("Mission marker", "mission_marker")
+ } else {
+ return None;
+ };
+ Some(synthetic(display.to_string(), subtype, None, raw))
+}
+
+/// Procedural / instanced resource sites: asteroid mining and gas
+/// collection nodes, asteroid clusters (`*.socpak` object containers),
+/// and static race tracks. Real places players visit, but not
+/// catalogued wiki entities — so we give an honest category label and
+/// attach the system when the engine string carries one.
+fn match_procedural_node(parts: &[String], raw: &str) -> Option {
+ let lower = raw.to_ascii_lowercase();
+ let (display, subtype) = if lower.contains("ab_mine") {
+ ("Asteroid mining node", "mining_node")
+ } else if lower.contains("ab_collector") {
+ ("Gas collection node", "gas_node")
+ } else if lower.contains("_cluster_") || lower.ends_with(".socpak") {
+ ("Asteroid cluster", "asteroid_cluster")
+ } else if lower.contains("racing_static") {
+ ("Race track", "race_track")
+ } else {
+ return None;
+ };
+ Some(synthetic(
+ display.to_string(),
+ subtype,
+ system_hint(parts).map(str::to_string),
+ raw,
+ ))
+}
+
fn synthetic(
display: String,
subtype: &str,
@@ -1254,4 +1369,294 @@ mod tests {
assert_eq!(c.source, ClassificationSource::Catalog);
assert_eq!(c.display_name, "Aberdeen");
}
+
+ #[test]
+ fn specific_body_wins_over_bare_system_token() {
+ // Regression guard for the shadowing fix: with BOTH the Stanton
+ // system row and the Daymar moon in the catalogue, the engine
+ // string `OOC_Stanton_2b_Daymar` must resolve to "Daymar", not
+ // be shadowed by the leading `Stanton` token. Pre-fix this
+ // collapsed 91% of real location events to their system name.
+ let daymar = LocationCatalogEntry {
+ slug: "daymar".into(),
+ display_name: "Daymar".into(),
+ class_name: "daymar".into(),
+ engine_tag: None,
+ system: Some("Stanton".into()),
+ parent_body: Some("Crusader".into()),
+ classification: Some("Moon".into()),
+ taxonomy: LocationTaxonomy {
+ tier: Some(LocationTier::AstronomicalObject),
+ subtype: Some("moon".into()),
+ ..LocationTaxonomy::default()
+ },
+ };
+ let stanton_system = LocationCatalogEntry {
+ slug: "stanton".into(),
+ display_name: "Stanton".into(),
+ class_name: "stanton".into(),
+ engine_tag: None,
+ system: Some("Stanton".into()),
+ parent_body: None,
+ classification: Some("System".into()),
+ taxonomy: LocationTaxonomy::default(),
+ };
+ let cat = catalog_with(vec![stanton_system, daymar]);
+ let c = classify("OOC_Stanton_2b_Daymar", &cat);
+ assert_eq!(c.display_name, "Daymar");
+ assert_eq!(c.source, ClassificationSource::Catalog);
+ assert_eq!(c.subtype.as_deref(), Some("moon"));
+ }
+
+ #[test]
+ fn bare_system_identifier_still_resolves_to_system_row() {
+ // The flip side of the shadowing fix: a *bare* system string
+ // (no specific body token) must still hit the system row via
+ // the deferred second pass — keeping its slug + System tier.
+ let stanton_system = LocationCatalogEntry {
+ slug: "stanton".into(),
+ display_name: "Stanton".into(),
+ class_name: "stanton".into(),
+ engine_tag: None,
+ system: Some("Stanton".into()),
+ parent_body: None,
+ classification: Some("System".into()),
+ taxonomy: LocationTaxonomy {
+ tier: Some(LocationTier::System),
+ ..LocationTaxonomy::default()
+ },
+ };
+ let cat = catalog_with(vec![stanton_system]);
+ let c = classify("OOC_Stanton", &cat);
+ assert_eq!(c.display_name, "Stanton");
+ assert_eq!(c.source, ClassificationSource::Catalog);
+ assert_eq!(c.tier, LocationTier::System);
+ }
+
+ // ---- distinctive-token fuzzy matcher ---------------------------
+ //
+ // Every engine identifier below is verbatim from a real LIVE tray
+ // DB (2026-05-31); the wiki names are the real
+ // api.star-citizen.wiki display names they should resolve to.
+ // These are the "real but unmatched by exact keys" locations that
+ // motivated the fuzzy tier.
+
+ fn outpost(slug: &str, name: &str, system: &str) -> LocationCatalogEntry {
+ LocationCatalogEntry {
+ slug: slug.into(),
+ display_name: name.into(),
+ class_name: name.replace(' ', ""),
+ engine_tag: None,
+ system: Some(system.into()),
+ parent_body: None,
+ classification: Some("Outpost".into()),
+ taxonomy: LocationTaxonomy {
+ tier: Some(LocationTier::Landmark),
+ subtype: Some("outpost".into()),
+ ..LocationTaxonomy::default()
+ },
+ }
+ }
+
+ /// A realistic-ish catalog: the real recoverable rows plus enough
+ /// filler "* Research Outpost" rows that `outpost` / `research` /
+ /// `mining` climb above `FUZZY_ANCHOR_DF`, so a filler-only overlap is
+ /// correctly rejected (mirrors the real ~1955-row catalogue).
+ fn fuzzy_catalog() -> LocationCatalog {
+ let mut entries = vec![
+ outpost(
+ "rayari-kaltag-research-outpost",
+ "Rayari Kaltag Research Outpost",
+ "Stanton",
+ ),
+ outpost(
+ "rayari-deltana-research-outpost",
+ "Rayari Deltana Research Outpost",
+ "Stanton",
+ ),
+ outpost(
+ "rayari-cantwell-research-outpost",
+ "Rayari Cantwell Research Outpost",
+ "Stanton",
+ ),
+ outpost(
+ "rayari-anvik-research-outpost",
+ "Rayari Anvik Research Outpost",
+ "Stanton",
+ ),
+ // Fifth Rayari outpost → `rayari` df 5 (> FUZZY_ANCHOR_DF),
+ // so the operator name alone can never anchor a match.
+ outpost(
+ "rayari-hickes-research-outpost",
+ "Rayari Hickes Research Outpost",
+ "Stanton",
+ ),
+ // Coincidental rare `hydro` token — the trap that must NOT
+ // catch `RayariHydro_*` engine strings.
+ outpost("terra-mills-hydrofarm", "Terra Mills HydroFarm", "Stanton"),
+ outpost(
+ "shubin-mining-facility-sal-2",
+ "Shubin Mining Facility SAL-2",
+ "Stanton",
+ ),
+ outpost(
+ "shubin-mining-facility-sal-5",
+ "Shubin Mining Facility SAL-5",
+ "Stanton",
+ ),
+ outpost(
+ "sakura-sun-goldenrod-workcenter",
+ "Sakura Sun Goldenrod Workcenter",
+ "Stanton",
+ ),
+ outpost("benson-mining-outpost", "Benson Mining Outpost", "Stanton"),
+ outpost(
+ "deakins-research-outpost",
+ "Deakins Research Outpost",
+ "Stanton",
+ ),
+ ];
+ // Filler padding — inflate df of generic words (`research`,
+ // `outpost`) so a filler-only overlap can't clear the anchor.
+ for i in 0..12 {
+ entries.push(outpost(
+ &format!("filler-{i}-research-outpost"),
+ &format!("Filler{i} Research Outpost"),
+ "Stanton",
+ ));
+ }
+ catalog_with(entries)
+ }
+
+ #[test]
+ fn fuzzy_recovers_rayari_kaltag() {
+ let cat = fuzzy_catalog();
+ let c = classify("Stanton4a_RayariHydro_Kaltag", &cat);
+ assert_eq!(c.source, ClassificationSource::Fuzzy);
+ assert_eq!(c.slug.as_deref(), Some("rayari-kaltag-research-outpost"));
+ assert_eq!(c.system.as_deref(), Some("Stanton"));
+ }
+
+ #[test]
+ fn fuzzy_recovers_sakura_sun_goldenrod() {
+ let cat = fuzzy_catalog();
+ let c = classify("Stanton4_DistributionCentre_SakuraSun_Goldenrod", &cat);
+ assert_eq!(c.source, ClassificationSource::Fuzzy);
+ assert_eq!(c.slug.as_deref(), Some("sakura-sun-goldenrod-workcenter"));
+ }
+
+ #[test]
+ fn fuzzy_disambiguates_shubin_sal2_from_sal5() {
+ // Both rows share `shubin`+`mining`+`facility`+`sal`; only the
+ // trailing digit separates them. The idf score must tip toward
+ // the row that also shares the `2`. (Here `shubin` df is 2,
+ // within FUZZY_ANCHOR_DF, so it anchors; in the full
+ // production catalogue `shubin` is more common and this family
+ // falls back to the system heuristic — a deliberate
+ // precision-over-recall trade for digit-only discriminators.)
+ let cat = fuzzy_catalog();
+ let c2 = classify("Stanton3a_Shubin_SAL2", &cat);
+ assert_eq!(c2.slug.as_deref(), Some("shubin-mining-facility-sal-2"));
+ let c5 = classify("Stanton3a_Shubin_SAL5", &cat);
+ assert_eq!(c5.slug.as_deref(), Some("shubin-mining-facility-sal-5"));
+ }
+
+ #[test]
+ fn fuzzy_rejects_filler_only_overlap() {
+ // Engine string shares only `research`/`outpost` (both far
+ // above FUZZY_ANCHOR_DF). No distinctive anchor → no fuzzy hit;
+ // falls through to the system heuristic instead of fabricating
+ // a wrong wiki link.
+ let cat = fuzzy_catalog();
+ let c = classify("Stanton2a_Unmapped_Research_Outpost", &cat);
+ assert_ne!(c.source, ClassificationSource::Fuzzy);
+ assert_eq!(c.system.as_deref(), Some("Stanton"));
+ }
+
+ #[test]
+ fn fuzzy_rejects_uncatalogued_place_with_only_operator_overlap() {
+ // `RayariHydro_McGarth`: there is no `McGarth` row. The engine
+ // string overlaps the catalogue only on the operator `rayari`
+ // (df 5, above the anchor bar) and the affiliation word `hydro`
+ // (denylisted). Neither may anchor → no match, so it must NOT
+ // bind to a random Rayari sibling or to Terra Mills HydroFarm.
+ let cat = fuzzy_catalog();
+ let c = classify("Stanton4b_RayariHydro_McGarth", &cat);
+ assert_ne!(
+ c.source,
+ ClassificationSource::Fuzzy,
+ "unexpected fuzzy bind to {}",
+ c.display_name
+ );
+ }
+
+ #[test]
+ fn fuzzy_respects_system_consistency_guard() {
+ // The only `Kaltag` row is in Pyro; a Stanton engine string
+ // must NOT cross-match it.
+ let cat = catalog_with(vec![outpost(
+ "rayari-kaltag-research-outpost",
+ "Rayari Kaltag Research Outpost",
+ "Pyro",
+ )]);
+ let c = classify("Stanton4a_RayariHydro_Kaltag", &cat);
+ assert_ne!(c.source, ClassificationSource::Fuzzy);
+ }
+
+ #[test]
+ fn fuzzy_does_not_fire_when_exact_key_matches() {
+ // Exact engine-tag/slug must always win over fuzzy.
+ let cat = catalog_with(vec![aberdeen_entry()]);
+ let c = classify("OOC_Stanton_1b_Aberdeen", &cat);
+ assert_eq!(c.source, ClassificationSource::Catalog);
+ }
+
+ // ---- noise classification --------------------------------------
+
+ #[test]
+ fn noise_asteroid_mining_node() {
+ let c = classify("ab_mine_stanton2_med_010", &empty_catalog());
+ assert_eq!(c.source, ClassificationSource::Synthetic);
+ assert_eq!(c.subtype.as_deref(), Some("mining_node"));
+ assert_eq!(c.system.as_deref(), Some("Stanton"));
+ }
+
+ #[test]
+ fn noise_gas_collection_node() {
+ let c = classify("ab_collector_gas_Stanton1", &empty_catalog());
+ assert_eq!(c.subtype.as_deref(), Some("gas_node"));
+ }
+
+ #[test]
+ fn noise_asteroid_cluster_socpak_beats_fuzzy() {
+ // `shubin_cluster_..._.socpak` is a procedural asteroid field,
+ // NOT the Shubin facility. Noise classification (a synthetic
+ // matcher) runs before fuzzy, so even with the facility in the
+ // catalog it must classify as a cluster.
+ let cat = fuzzy_catalog();
+ let c = classify(
+ "shubin_cluster_001_frost_{13DA184B-8620-4DAE-9450-5CE6F2ADA1A5}.socpak",
+ &cat,
+ );
+ assert_eq!(c.source, ClassificationSource::Synthetic);
+ assert_eq!(c.subtype.as_deref(), Some("asteroid_cluster"));
+ }
+
+ #[test]
+ fn noise_mission_marker() {
+ let c = classify("MISSION_QT_Quantum_Beacon_286174403838", &empty_catalog());
+ assert_eq!(c.subtype.as_deref(), Some("mission_marker"));
+ }
+
+ #[test]
+ fn noise_dynamic_nav_point() {
+ let c = classify("NavPoint_Dynamic_285165357631", &empty_catalog());
+ assert_eq!(c.subtype.as_deref(), Some("nav_marker"));
+ }
+
+ #[test]
+ fn noise_race_track() {
+ let c = classify("racing_static_st2c_ghexasteroid", &empty_catalog());
+ assert_eq!(c.subtype.as_deref(), Some("race_track"));
+ }
}