From 04c544c70b02b85e6e66ac0dfcd1570c62d7404d Mon Sep 17 00:00:00 2001 From: KimHyeongRae0 <42205606+KimHyeongRae0@users.noreply.github.com> Date: Wed, 13 May 2026 01:32:22 +0900 Subject: [PATCH 01/10] feat(citability): add evidence signal hint --- src/core/citability.test.ts | 14 ++++++++++++++ src/core/citability.ts | 12 ++++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/core/citability.test.ts b/src/core/citability.test.ts index cafa6a9..261c989 100644 --- a/src/core/citability.test.ts +++ b/src/core/citability.test.ts @@ -90,6 +90,20 @@ Furthermore, we plan to add more features.`; const stats = result.dimensions.find(d => d.name === 'Statistical Density')!; expect(stats.score).toBeGreaterThanOrEqual(12); }); + + it('suggests attribution when statistical claims lack evidence signals', () => { + const statsContent = `Our platform serves 50,000 users across 120 countries. Revenue grew 250% in 2024. We process $2.5 million in transactions daily with 99.99% uptime.`; + const result = scorePageCitability(makePage(statsContent)); + const evidenceHint = result.hints.find(h => h.message.includes('source links or attribution')); + expect(evidenceHint).toBeDefined(); + }); + + it('does not suggest attribution when statistical claims include evidence signals', () => { + const statsContent = `According to the 2024 Benchmark Report, our platform serves 50,000 users across 120 countries. Revenue grew 250% in 2024. Source: https://example.com/report`; + const result = scorePageCitability(makePage(statsContent)); + const evidenceHint = result.hints.find(h => h.message.includes('source links or attribution')); + expect(evidenceHint).toBeUndefined(); + }); }); describe('scoreSiteCitability', () => { diff --git a/src/core/citability.ts b/src/core/citability.ts index dfdfd4b..d0e4711 100644 --- a/src/core/citability.ts +++ b/src/core/citability.ts @@ -197,11 +197,23 @@ function scoreStatisticalDensity(content: string, hints: ContentHint[]): Citabil if (totalMatches === 0) { hints.push({ type: 'suggestion', message: 'No statistics or factual claims found — AI favors content with concrete numbers, percentages, and dates' }); + } else if (!hasEvidenceSignals(content)) { + hints.push({ type: 'suggestion', message: 'Add source links or attribution for statistical claims so AI systems can verify them' }); } return { name: 'Statistical Density', score, maxScore: 25, details: `${totalMatches} statistical claims (${density.toFixed(1)} per 100 words)` }; } +function hasEvidenceSignals(content: string): boolean { + const evidencePatterns = [ + /https?:\/\/\S+/i, + /\[\^?\d+\]/, + /\b(according to|source:|sources:|reported by|published by|based on|study|survey|report|research|data from)\b/i, + ]; + + return evidencePatterns.some(pattern => pattern.test(content)); +} + /** * Dimension 4: Structure Quality (0-25) * Checks heading hierarchy, lists, short paragraphs, summary/TL;DR. From 7e5e42eea4a17f74ccc121bba7e436298752c181 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Thu, 14 May 2026 15:48:05 +0100 Subject: [PATCH 02/10] fix(citability): tighten evidence-signal regex against self-referential prose MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Greptile flagged that the standalone words `report`, `study`, `research`, and `based on` matched self-referential content like 'Based on our internal data' or 'See the attached sales report', suppressing the attribution hint even when no external source is cited. Replaced the single permissive alternation with multiple narrower patterns that each require an attribution preposition (by / from / of) or are inherently external-source phrases: - `\baccording to\b` — explicit attribution - `\bsources?:` — explicit source label - `\b(reported|published) (by|in)\b` — clearly external - `\b(study|survey|report|research|paper|analysis) (by|from)\b` — paired with attribution preposition - `\bdata (from|by)\b` — paired - `\b(citing|cited (by|in)|as cited)\b` — explicit citation Two new tests cover the regression: self-referential phrases still trigger the attribution suggestion, and external attribution phrases ("study by Stanford", "report from McKinsey") correctly silence it. 16 tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/core/citability.test.ts | 16 ++++++++++++++++ src/core/citability.ts | 11 ++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/core/citability.test.ts b/src/core/citability.test.ts index 261c989..7d367e5 100644 --- a/src/core/citability.test.ts +++ b/src/core/citability.test.ts @@ -104,6 +104,22 @@ Furthermore, we plan to add more features.`; const evidenceHint = result.hints.find(h => h.message.includes('source links or attribution')); expect(evidenceHint).toBeUndefined(); }); + + it('still suggests attribution for self-referential phrasing that mentions reports/studies', () => { + // "our internal report" / "based on our data" / "user behavior study" are + // self-referential — they should NOT silence the attribution hint. + const selfReferential = `Based on our internal data, we grew 300% in 2024. See our quarterly report for more numbers — our user behavior study found 80% retention.`; + const result = scorePageCitability(makePage(selfReferential)); + const evidenceHint = result.hints.find(h => h.message.includes('source links or attribution')); + expect(evidenceHint).toBeDefined(); + }); + + it('recognizes "study by" / "report from" attribution phrases as evidence', () => { + const externalAttribution = `A 2024 study by Stanford found 80% adoption. Revenue figures came from a report from McKinsey, showing 250% growth.`; + const result = scorePageCitability(makePage(externalAttribution)); + const evidenceHint = result.hints.find(h => h.message.includes('source links or attribution')); + expect(evidenceHint).toBeUndefined(); + }); }); describe('scoreSiteCitability', () => { diff --git a/src/core/citability.ts b/src/core/citability.ts index d0e4711..57af8aa 100644 --- a/src/core/citability.ts +++ b/src/core/citability.ts @@ -206,9 +206,18 @@ function scoreStatisticalDensity(content: string, hints: ContentHint[]): Citabil function hasEvidenceSignals(content: string): boolean { const evidencePatterns = [ + // External-source markers: URLs, footnote refs, or explicit attribution phrases. + // Patterns are scoped so self-referential prose ("our internal report", "based on our data") + // doesn't accidentally count as evidence — each keyword must pair with an attribution + // preposition (by/from/of) or be a phrase that only makes sense for external sources. /https?:\/\/\S+/i, /\[\^?\d+\]/, - /\b(according to|source:|sources:|reported by|published by|based on|study|survey|report|research|data from)\b/i, + /\baccording to\b/i, + /\bsources?:/i, + /\b(reported|published) (by|in)\b/i, + /\b(study|survey|report|research|paper|analysis) (by|from)\b/i, + /\bdata (from|by)\b/i, + /\b(citing|cited (by|in)|as cited)\b/i, ]; return evidencePatterns.some(pattern => pattern.test(content)); From f5236676501c75736ccac4277d3652b133d7ccda Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Thu, 14 May 2026 15:55:43 +0100 Subject: [PATCH 03/10] fix(citability): drop bare 'citing' from evidence patterns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bare 'citing' matches self-referential prose like 'Citing our own data...' and silently suppresses the attribution hint — the inverse of intent. 'cited by/in' and 'as cited' remain, and external attribution like 'study by'/'report from' is already covered by other patterns. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/core/citability.test.ts | 9 +++++++++ src/core/citability.ts | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/core/citability.test.ts b/src/core/citability.test.ts index 7d367e5..1d5ed07 100644 --- a/src/core/citability.test.ts +++ b/src/core/citability.test.ts @@ -120,6 +120,15 @@ Furthermore, we plan to add more features.`; const evidenceHint = result.hints.find(h => h.message.includes('source links or attribution')); expect(evidenceHint).toBeUndefined(); }); + + it('still suggests attribution for "Citing our own data" — bare citing is self-referential', () => { + // The bare "citing" alternative used to swallow the hint here. Real attribution uses + // "cited by/in" or "as cited" — those are kept. "Citing our X" is self-referential. + const selfCiting = `Citing our own data, we grew 300% in 2024. Revenue grew 250% to $50 million daily.`; + const result = scorePageCitability(makePage(selfCiting)); + const evidenceHint = result.hints.find(h => h.message.includes('source links or attribution')); + expect(evidenceHint).toBeDefined(); + }); }); describe('scoreSiteCitability', () => { diff --git a/src/core/citability.ts b/src/core/citability.ts index 57af8aa..524f9b2 100644 --- a/src/core/citability.ts +++ b/src/core/citability.ts @@ -217,7 +217,7 @@ function hasEvidenceSignals(content: string): boolean { /\b(reported|published) (by|in)\b/i, /\b(study|survey|report|research|paper|analysis) (by|from)\b/i, /\bdata (from|by)\b/i, - /\b(citing|cited (by|in)|as cited)\b/i, + /\b(cited (by|in)|as cited)\b/i, ]; return evidencePatterns.some(pattern => pattern.test(content)); From 605a50784a44f4409a0802b301d32cece5ed5ec8 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Thu, 14 May 2026 16:01:41 +0100 Subject: [PATCH 04/10] fix(citability): scope 'according to' against self-referential phrasing 'According to our CEO' or 'According to our internal data' previously suppressed the attribution hint despite citing no external source. Add a negative lookahead matching the same scoping intent as the other patterns in hasEvidenceSignals (study by, report from, data from, etc.). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/core/citability.test.ts | 16 ++++++++++++++++ src/core/citability.ts | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/core/citability.test.ts b/src/core/citability.test.ts index 1d5ed07..daa1a0d 100644 --- a/src/core/citability.test.ts +++ b/src/core/citability.test.ts @@ -129,6 +129,22 @@ Furthermore, we plan to add more features.`; const evidenceHint = result.hints.find(h => h.message.includes('source links or attribution')); expect(evidenceHint).toBeDefined(); }); + + it('still suggests attribution for "According to our internal data" — self-referential according', () => { + // "according to" used to be unscoped and silently passed for "according to our X". The + // negative lookahead excludes our/my/us and "the {company,team,organization,internal}". + const selfAccording = `According to our internal data, we grew 300% in 2024. Revenue grew 250% to $50 million daily.`; + const result = scorePageCitability(makePage(selfAccording)); + const evidenceHint = result.hints.find(h => h.message.includes('source links or attribution')); + expect(evidenceHint).toBeDefined(); + }); + + it('recognizes "according to" an external source as evidence', () => { + const externalAccording = `According to Gartner, the market grew 40% in 2024. Revenue figures reached $50 million daily.`; + const result = scorePageCitability(makePage(externalAccording)); + const evidenceHint = result.hints.find(h => h.message.includes('source links or attribution')); + expect(evidenceHint).toBeUndefined(); + }); }); describe('scoreSiteCitability', () => { diff --git a/src/core/citability.ts b/src/core/citability.ts index 524f9b2..049394c 100644 --- a/src/core/citability.ts +++ b/src/core/citability.ts @@ -212,7 +212,7 @@ function hasEvidenceSignals(content: string): boolean { // preposition (by/from/of) or be a phrase that only makes sense for external sources. /https?:\/\/\S+/i, /\[\^?\d+\]/, - /\baccording to\b/i, + /\baccording to\s+(?!our\b|my\b|us\b|the\s+(?:company|team|organization|internal)\b)/i, /\bsources?:/i, /\b(reported|published) (by|in)\b/i, /\b(study|survey|report|research|paper|analysis) (by|from)\b/i, From a41692da3e3c72b67a9e74f1e72873eab12ac4d4 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Thu, 14 May 2026 16:08:35 +0100 Subject: [PATCH 05/10] fix(citability): scope 'data from/by' against self-referential phrasing 'Data from our research team' previously suppressed the attribution hint because the pattern was unguarded. Mirror the same negative lookahead used by 'according to' so the only path that fires is real external attribution. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/core/citability.test.ts | 16 ++++++++++++++++ src/core/citability.ts | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/core/citability.test.ts b/src/core/citability.test.ts index daa1a0d..885fdd8 100644 --- a/src/core/citability.test.ts +++ b/src/core/citability.test.ts @@ -145,6 +145,22 @@ Furthermore, we plan to add more features.`; const evidenceHint = result.hints.find(h => h.message.includes('source links or attribution')); expect(evidenceHint).toBeUndefined(); }); + + it('still suggests attribution for "data from our research team" — self-referential data-from', () => { + // "data from" used to be unguarded — "Data from our research team" would silently + // suppress the hint. Same negative-lookahead scoping as "according to". + const selfData = `Data from our research team shows we grew 300% in 2024. Revenue grew 250% to $50 million daily.`; + const result = scorePageCitability(makePage(selfData)); + const evidenceHint = result.hints.find(h => h.message.includes('source links or attribution')); + expect(evidenceHint).toBeDefined(); + }); + + it('recognizes "data from" an external source as evidence', () => { + const externalData = `Data from Bloomberg shows the market grew 40% in 2024. Revenue figures reached $50 million daily.`; + const result = scorePageCitability(makePage(externalData)); + const evidenceHint = result.hints.find(h => h.message.includes('source links or attribution')); + expect(evidenceHint).toBeUndefined(); + }); }); describe('scoreSiteCitability', () => { diff --git a/src/core/citability.ts b/src/core/citability.ts index 049394c..ab486be 100644 --- a/src/core/citability.ts +++ b/src/core/citability.ts @@ -216,7 +216,7 @@ function hasEvidenceSignals(content: string): boolean { /\bsources?:/i, /\b(reported|published) (by|in)\b/i, /\b(study|survey|report|research|paper|analysis) (by|from)\b/i, - /\bdata (from|by)\b/i, + /\bdata (from|by)\s+(?!our\b|my\b|us\b|the\s+(?:company|team|organization|internal)\b)/i, /\b(cited (by|in)|as cited)\b/i, ]; From 611b0e69c4ce9d95287042fa9d7d3dfa4bb75255 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Thu, 14 May 2026 16:15:46 +0100 Subject: [PATCH 06/10] fix(citability): apply self-referential guard to study/report and reported/published 'Study by our team' and 'Reported by our CEO' previously slipped through. Extract the negative-lookahead fragment to a shared NOT_SELF_REF constant and apply it consistently to every attribution pattern. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/core/citability.test.ts | 8 ++++++++ src/core/citability.ts | 18 +++++++++++------- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/core/citability.test.ts b/src/core/citability.test.ts index 885fdd8..93d60c9 100644 --- a/src/core/citability.test.ts +++ b/src/core/citability.test.ts @@ -161,6 +161,14 @@ Furthermore, we plan to add more features.`; const evidenceHint = result.hints.find(h => h.message.includes('source links or attribution')); expect(evidenceHint).toBeUndefined(); }); + + it('still suggests attribution for "study by our team" and "reported by our CEO"', () => { + // Same self-referential guard for study/report/research-by-from and reported/published-by-in. + const selfStudy = `In a study by our team, we grew 300% in 2024. Reported by our CEO, revenue reached $50 million daily.`; + const result = scorePageCitability(makePage(selfStudy)); + const evidenceHint = result.hints.find(h => h.message.includes('source links or attribution')); + expect(evidenceHint).toBeDefined(); + }); }); describe('scoreSiteCitability', () => { diff --git a/src/core/citability.ts b/src/core/citability.ts index ab486be..9ec2750 100644 --- a/src/core/citability.ts +++ b/src/core/citability.ts @@ -204,19 +204,23 @@ function scoreStatisticalDensity(content: string, hints: ContentHint[]): Citabil return { name: 'Statistical Density', score, maxScore: 25, details: `${totalMatches} statistical claims (${density.toFixed(1)} per 100 words)` }; } +// Shared negative-lookahead fragment used by every attribution pattern. Excludes +// "our/my/us" and "the {company,team,organization,internal}" so phrases like +// "according to our CEO" or "study from our team" don't masquerade as evidence. +const NOT_SELF_REF = String.raw`(?!our\b|my\b|us\b|the\s+(?:company|team|organization|internal)\b)`; + function hasEvidenceSignals(content: string): boolean { const evidencePatterns = [ // External-source markers: URLs, footnote refs, or explicit attribution phrases. - // Patterns are scoped so self-referential prose ("our internal report", "based on our data") - // doesn't accidentally count as evidence — each keyword must pair with an attribution - // preposition (by/from/of) or be a phrase that only makes sense for external sources. + // Every keyword pair must be followed by something that isn't self-referential — + // "our internal report", "data from us", "study by our team" should NOT count. /https?:\/\/\S+/i, /\[\^?\d+\]/, - /\baccording to\s+(?!our\b|my\b|us\b|the\s+(?:company|team|organization|internal)\b)/i, + new RegExp(String.raw`\baccording to\s+${NOT_SELF_REF}`, 'i'), /\bsources?:/i, - /\b(reported|published) (by|in)\b/i, - /\b(study|survey|report|research|paper|analysis) (by|from)\b/i, - /\bdata (from|by)\s+(?!our\b|my\b|us\b|the\s+(?:company|team|organization|internal)\b)/i, + new RegExp(String.raw`\b(reported|published) (by|in)\s+${NOT_SELF_REF}`, 'i'), + new RegExp(String.raw`\b(study|survey|report|research|paper|analysis) (by|from)\s+${NOT_SELF_REF}`, 'i'), + new RegExp(String.raw`\bdata (from|by)\s+${NOT_SELF_REF}`, 'i'), /\b(cited (by|in)|as cited)\b/i, ]; From bbe6f90da8bb5a2ecebfe5f7d7f0b3a7b1d53f38 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Thu, 14 May 2026 16:26:13 +0100 Subject: [PATCH 07/10] fix(citability): make NOT_SELF_REF backtracking-safe and apply to sources?: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous form had \s+ outside the lookahead, which the regex engine backtracks to 0 chars when the lookahead fails — letting 'Sources: our X' pass as evidence. Move whitespace inside the lookahead so it's anchored immediately after the keyword and cannot be skipped via backtracking. Apply to the sources?: pattern too for parity. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/core/citability.test.ts | 8 ++++++++ src/core/citability.ts | 18 ++++++++++-------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/core/citability.test.ts b/src/core/citability.test.ts index 93d60c9..7c41f6f 100644 --- a/src/core/citability.test.ts +++ b/src/core/citability.test.ts @@ -169,6 +169,14 @@ Furthermore, we plan to add more features.`; const evidenceHint = result.hints.find(h => h.message.includes('source links or attribution')); expect(evidenceHint).toBeDefined(); }); + + it('still suggests attribution for "Sources: our internal team"', () => { + // The sources?: label is a strong signal but only when followed by an external source. + const selfSources = `We grew 300% in 2024. Revenue reached $50 million daily across 120 countries. Sources: our internal team and our research department.`; + const result = scorePageCitability(makePage(selfSources)); + const evidenceHint = result.hints.find(h => h.message.includes('source links or attribution')); + expect(evidenceHint).toBeDefined(); + }); }); describe('scoreSiteCitability', () => { diff --git a/src/core/citability.ts b/src/core/citability.ts index 9ec2750..6bd854f 100644 --- a/src/core/citability.ts +++ b/src/core/citability.ts @@ -204,10 +204,12 @@ function scoreStatisticalDensity(content: string, hints: ContentHint[]): Citabil return { name: 'Statistical Density', score, maxScore: 25, details: `${totalMatches} statistical claims (${density.toFixed(1)} per 100 words)` }; } -// Shared negative-lookahead fragment used by every attribution pattern. Excludes -// "our/my/us" and "the {company,team,organization,internal}" so phrases like +// Shared negative-lookahead fragment used by every attribution pattern. The lookahead +// is anchored immediately after the keyword and consumes any whitespace internally so +// the engine can't backtrack \s* to 0 chars and slip a self-referential token through. +// Excludes "our/my/us" and "the {company,team,organization,internal}" so phrases like // "according to our CEO" or "study from our team" don't masquerade as evidence. -const NOT_SELF_REF = String.raw`(?!our\b|my\b|us\b|the\s+(?:company|team|organization|internal)\b)`; +const NOT_SELF_REF = String.raw`(?!\s*(?:our|my|us|the\s+(?:company|team|organization|internal))\b)`; function hasEvidenceSignals(content: string): boolean { const evidencePatterns = [ @@ -216,11 +218,11 @@ function hasEvidenceSignals(content: string): boolean { // "our internal report", "data from us", "study by our team" should NOT count. /https?:\/\/\S+/i, /\[\^?\d+\]/, - new RegExp(String.raw`\baccording to\s+${NOT_SELF_REF}`, 'i'), - /\bsources?:/i, - new RegExp(String.raw`\b(reported|published) (by|in)\s+${NOT_SELF_REF}`, 'i'), - new RegExp(String.raw`\b(study|survey|report|research|paper|analysis) (by|from)\s+${NOT_SELF_REF}`, 'i'), - new RegExp(String.raw`\bdata (from|by)\s+${NOT_SELF_REF}`, 'i'), + new RegExp(String.raw`\baccording to\b${NOT_SELF_REF}`, 'i'), + new RegExp(String.raw`\bsources?:${NOT_SELF_REF}`, 'i'), + new RegExp(String.raw`\b(reported|published) (by|in)\b${NOT_SELF_REF}`, 'i'), + new RegExp(String.raw`\b(study|survey|report|research|paper|analysis) (by|from)\b${NOT_SELF_REF}`, 'i'), + new RegExp(String.raw`\bdata (from|by)\b${NOT_SELF_REF}`, 'i'), /\b(cited (by|in)|as cited)\b/i, ]; From 67e9185fd5d9ab3babb060b998be66d33b64ed23 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Thu, 14 May 2026 16:30:02 +0100 Subject: [PATCH 08/10] fix(citability): apply NOT_SELF_REF guard to cited (by|in) 'Revenue data cited by our internal research team' previously passed as evidence. Split 'as cited' from 'cited by/in' since only the latter takes a follower; apply the shared guard to the variable form. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/core/citability.test.ts | 14 ++++++++++++++ src/core/citability.ts | 3 ++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/core/citability.test.ts b/src/core/citability.test.ts index 7c41f6f..e888b1a 100644 --- a/src/core/citability.test.ts +++ b/src/core/citability.test.ts @@ -170,6 +170,20 @@ Furthermore, we plan to add more features.`; expect(evidenceHint).toBeDefined(); }); + it('still suggests attribution for "cited by our internal research team"', () => { + const selfCited = `Revenue data cited by our internal research team shows we grew 300% in 2024 across 120 countries. Daily transactions reached $50 million.`; + const result = scorePageCitability(makePage(selfCited)); + const evidenceHint = result.hints.find(h => h.message.includes('source links or attribution')); + expect(evidenceHint).toBeDefined(); + }); + + it('recognizes "cited by" an external source as evidence', () => { + const externalCited = `Findings cited by Stanford show the market grew 40% in 2024. Revenue figures reached $50 million daily.`; + const result = scorePageCitability(makePage(externalCited)); + const evidenceHint = result.hints.find(h => h.message.includes('source links or attribution')); + expect(evidenceHint).toBeUndefined(); + }); + it('still suggests attribution for "Sources: our internal team"', () => { // The sources?: label is a strong signal but only when followed by an external source. const selfSources = `We grew 300% in 2024. Revenue reached $50 million daily across 120 countries. Sources: our internal team and our research department.`; diff --git a/src/core/citability.ts b/src/core/citability.ts index 6bd854f..0b00760 100644 --- a/src/core/citability.ts +++ b/src/core/citability.ts @@ -223,7 +223,8 @@ function hasEvidenceSignals(content: string): boolean { new RegExp(String.raw`\b(reported|published) (by|in)\b${NOT_SELF_REF}`, 'i'), new RegExp(String.raw`\b(study|survey|report|research|paper|analysis) (by|from)\b${NOT_SELF_REF}`, 'i'), new RegExp(String.raw`\bdata (from|by)\b${NOT_SELF_REF}`, 'i'), - /\b(cited (by|in)|as cited)\b/i, + new RegExp(String.raw`\bcited (by|in)\b${NOT_SELF_REF}`, 'i'), + /\bas cited\b/i, ]; return evidencePatterns.some(pattern => pattern.test(content)); From c0dae247159ef5eab87ae84559f43f00c910a7c9 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Thu, 14 May 2026 16:39:56 +0100 Subject: [PATCH 09/10] fix(citability): drop bare 'as cited' from evidence patterns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'As cited in our internal report' previously suppressed the hint because the bare /as cited/i pattern matched independently. 'as cited' alone is meaningless without 'in/by SOURCE' — the existing 'cited (by|in)' with the self-referential guard already covers the real case. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/core/citability.test.ts | 9 +++++++++ src/core/citability.ts | 1 - 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/core/citability.test.ts b/src/core/citability.test.ts index e888b1a..992b950 100644 --- a/src/core/citability.test.ts +++ b/src/core/citability.test.ts @@ -170,6 +170,15 @@ Furthermore, we plan to add more features.`; expect(evidenceHint).toBeDefined(); }); + it('still suggests attribution for "As cited in our internal report"', () => { + // 'as cited' alone is meaningless without 'in/by SOURCE' — covered by the + // 'cited (by|in)' pattern with the self-referential guard. + const selfAsCited = `As cited in our internal report, we grew 300% in 2024 across 120 countries. Daily transactions reached $50 million.`; + const result = scorePageCitability(makePage(selfAsCited)); + const evidenceHint = result.hints.find(h => h.message.includes('source links or attribution')); + expect(evidenceHint).toBeDefined(); + }); + it('still suggests attribution for "cited by our internal research team"', () => { const selfCited = `Revenue data cited by our internal research team shows we grew 300% in 2024 across 120 countries. Daily transactions reached $50 million.`; const result = scorePageCitability(makePage(selfCited)); diff --git a/src/core/citability.ts b/src/core/citability.ts index 0b00760..739823f 100644 --- a/src/core/citability.ts +++ b/src/core/citability.ts @@ -224,7 +224,6 @@ function hasEvidenceSignals(content: string): boolean { new RegExp(String.raw`\b(study|survey|report|research|paper|analysis) (by|from)\b${NOT_SELF_REF}`, 'i'), new RegExp(String.raw`\bdata (from|by)\b${NOT_SELF_REF}`, 'i'), new RegExp(String.raw`\bcited (by|in)\b${NOT_SELF_REF}`, 'i'), - /\bas cited\b/i, ]; return evidencePatterns.some(pattern => pattern.test(content)); From b80f3d861746678d937ce9408f0cb0366a1a7731 Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Thu, 14 May 2026 16:46:10 +0100 Subject: [PATCH 10/10] =?UTF-8?q?fix(citability):=20drop=20'us'=20from=20N?= =?UTF-8?q?OT=5FSELF=5FREF=20=E2=80=94=20collides=20with=20'US'=20(country?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The /i flag made 'us' match 'US' (United States), mis-flagging legitimate external citations like 'According to US regulators' or 'Data from US studies' as self-referential. The bare 'us' pronoun ('data from us', 'according to us') is uncommon enough that the collision isn't worth it. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/core/citability.test.ts | 9 +++++++++ src/core/citability.ts | 8 ++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/core/citability.test.ts b/src/core/citability.test.ts index 992b950..d95449f 100644 --- a/src/core/citability.test.ts +++ b/src/core/citability.test.ts @@ -170,6 +170,15 @@ Furthermore, we plan to add more features.`; expect(evidenceHint).toBeDefined(); }); + it('recognizes "According to US regulators" as external evidence (not self-referential)', () => { + // /i flag was making 'us' in NOT_SELF_REF match 'US' (United States), incorrectly + // flagging legitimate citations like "According to US regulators". 'us' was dropped. + const usCitation = `According to US regulators, the market grew 40% in 2024. Revenue figures reached $50 million daily across 120 countries.`; + const result = scorePageCitability(makePage(usCitation)); + const evidenceHint = result.hints.find(h => h.message.includes('source links or attribution')); + expect(evidenceHint).toBeUndefined(); + }); + it('still suggests attribution for "As cited in our internal report"', () => { // 'as cited' alone is meaningless without 'in/by SOURCE' — covered by the // 'cited (by|in)' pattern with the self-referential guard. diff --git a/src/core/citability.ts b/src/core/citability.ts index 739823f..267036b 100644 --- a/src/core/citability.ts +++ b/src/core/citability.ts @@ -207,9 +207,13 @@ function scoreStatisticalDensity(content: string, hints: ContentHint[]): Citabil // Shared negative-lookahead fragment used by every attribution pattern. The lookahead // is anchored immediately after the keyword and consumes any whitespace internally so // the engine can't backtrack \s* to 0 chars and slip a self-referential token through. -// Excludes "our/my/us" and "the {company,team,organization,internal}" so phrases like +// Excludes "our/my" and "the {company,team,organization,internal}" so phrases like // "according to our CEO" or "study from our team" don't masquerade as evidence. -const NOT_SELF_REF = String.raw`(?!\s*(?:our|my|us|the\s+(?:company|team|organization|internal))\b)`; +// 'us' is intentionally omitted — the /i flag makes it also match "US" (United States), +// which would mis-flag legitimate external sources like "According to US regulators". +// The bare "data from us" / "according to us" cases are uncommon and not worth the +// collision with US-government citations. +const NOT_SELF_REF = String.raw`(?!\s*(?:our|my|the\s+(?:company|team|organization|internal))\b)`; function hasEvidenceSignals(content: string): boolean { const evidencePatterns = [