From 7bfaf5aefeb530f7890ad279d7a14c9812fd191b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 09:09:35 +0000 Subject: [PATCH 1/5] Initial plan From 803ae340572e0a068a5d12f1191c00c9bd61ed63 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 09:33:47 +0000 Subject: [PATCH 2/5] refactor: split shared.ts into focused helpers, consolidate types, remove deprecated code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Split scripts/data-transformers/content-generators/shared.ts (1125 lines → 527 lines) into focused sub-modules: - doc-type-helpers.ts: DOC_TYPE_DISPLAY, localizeDocType, TITLE_SUFFIX_TEMPLATES - event-helpers.ts: findRelatedDocuments, findRelatedQuestions, extractMinister - impact-helpers.ts: generateImpactAnalysis, generateConsequencesAnalysis - framework-renderers.ts: stub types/functions + PESTLE/stakeholder/risk/implementation HTML renderers - ai-marker-helpers.ts: detectBannedPatterns + BANNED_PATTERNS - shared.ts kept as barrel that re-exports all previously-exported symbols for full backward compatibility; no existing imports were broken - Extract URL/text utilities from generators.ts (2234 → 2109 lines) to url-utils.ts: extractDocIdFromUrl, isGovernmentUrl, isGitHubUrl, toGitHubRawUrl, hashPathSuffix, sanitizePlainText; re-exported for BC - Types review: scripts/data-transformers/types.ts and scripts/generate-news-enhanced/types.ts have no duplicates with scripts/types/ — no consolidation needed - All 4097 tests pass, ESLint clean (no new errors) Co-authored-by: pethers <1726836+pethers@users.noreply.github.com> --- .../content-generators/ai-marker-helpers.ts | 56 ++ .../content-generators/doc-type-helpers.ts | 156 +++ .../content-generators/event-helpers.ts | 62 ++ .../content-generators/framework-renderers.ts | 353 +++++++ .../content-generators/impact-helpers.ts | 110 +++ .../content-generators/shared.ts | 888 +++--------------- scripts/generate-news-enhanced/generators.ts | 149 +-- scripts/generate-news-enhanced/url-utils.ts | 145 +++ 8 files changed, 1039 insertions(+), 880 deletions(-) create mode 100644 scripts/data-transformers/content-generators/ai-marker-helpers.ts create mode 100644 scripts/data-transformers/content-generators/doc-type-helpers.ts create mode 100644 scripts/data-transformers/content-generators/event-helpers.ts create mode 100644 scripts/data-transformers/content-generators/framework-renderers.ts create mode 100644 scripts/data-transformers/content-generators/impact-helpers.ts create mode 100644 scripts/generate-news-enhanced/url-utils.ts diff --git a/scripts/data-transformers/content-generators/ai-marker-helpers.ts b/scripts/data-transformers/content-generators/ai-marker-helpers.ts new file mode 100644 index 000000000..d3d941282 --- /dev/null +++ b/scripts/data-transformers/content-generators/ai-marker-helpers.ts @@ -0,0 +1,56 @@ +/** + * @module data-transformers/content-generators/ai-marker-helpers + * @description Banned content pattern detection. + * Per SHARED_PROMPT_PATTERNS.md §BANNED Content Patterns v4.0, + * these patterns must never appear in production articles. + * AI agents MUST replace all AI_MUST_REPLACE markers with genuine analysis. + * + * @author Hack23 AB + * @license Apache-2.0 + */ + +/** + * Banned content patterns that indicate low-quality boilerplate text. + * Per SHARED_PROMPT_PATTERNS.md §BANNED Content Patterns v4.0, these + * must never appear in production articles. AI agents MUST replace them + * with genuine, document-specific analysis. + */ +const BANNED_PATTERNS: readonly { label: string; pattern: RegExp }[] = [ + { label: 'neutralText: "The political landscape remains fluid…"', pattern: /The political landscape remains fluid,? with both government and opposition positioning for advantage/i }, + { label: 'debateAnalysisMarker: "No chamber debate data is available…"', pattern: /No chamber debate data is available for these items,? limiting our ability/i }, + { label: 'policySignificanceTouches: "Touches on {domains}."', pattern: /Touches on [\p{L}\p{N}][\p{L}\p{N}\s,&/()-]*\./iu }, + { label: 'analysisOfNDocuments: "Analysis of N documents covering…"', pattern: /Analysis of \d+ documents covering/i }, + { label: 'policySignificanceGeneric: "Requires committee review and chamber debate…"', pattern: /Requires committee review and chamber debate/i }, + { label: 'topicInFocusSuffix: "…: {Topic} in Focus"', pattern: /:\s+\w[\w\s]*\bin Focus\b/i }, + { label: 'briefingOnFieldLabels: "Political intelligence briefing on {Field}: and {Field}:"', pattern: /Political intelligence briefing on \w+:\s+and\s+\w+:/i }, + // Deep Analysis generic template patterns — AI MUST replace these with specific analysis + { label: 'genericTimeline: "The pace of activity signals…"', pattern: /The pace of activity signals the political urgency/i }, + { label: 'genericTimeline: "define the current legislative landscape"', pattern: /define the current legislative landscape/i }, + { label: 'genericWhy: "broad legislative push that will shape"', pattern: /broad legislative push that will shape multiple aspects/i }, + { label: 'genericWhy: "critical period for understanding the government"', pattern: /critical period for understanding the government.s strategic direction/i }, + { label: 'genericImpact: "culmination of legislative review, with recommendations that guide"', pattern: /culmination of legislative review,? with recommendations that guide/i }, + { label: 'genericImpact: "interplay between governing ambition and opposition scrutiny"', pattern: /interplay between governing ambition and opposition scrutiny/i }, + { label: 'genericConsequences: "cascade through committee deliberations"', pattern: /cascade through committee deliberations,? chamber votes/i }, + { label: 'genericConsequences: "establish the policy alternatives that opposition parties will champion"', pattern: /establish the policy alternatives that opposition parties will champion/i }, + { label: 'genericCritical: "Standard parliamentary procedures are being followed"', pattern: /Standard parliamentary procedures are being followed/i }, + { label: 'genericCritical: "gap between legislative intent and implementation"', pattern: /gap between legislative intent and implementation often reveals/i }, + { label: 'genericPillarTransition: "While parliament deliberates these legislative matters"', pattern: /While parliament deliberates these legislative matters/i }, +]; + +/** + * Detect banned boilerplate patterns in HTML content. + * Returns an array of human-readable labels identifying each detected + * banned pattern, suitable for quality gate logs and error messages. + * + * @param html - The HTML string to scan for banned patterns + * @returns Array of stable human-readable labels for each detected banned pattern + */ +export function detectBannedPatterns(html: string): string[] { + const found: string[] = []; + for (const { label, pattern } of BANNED_PATTERNS) { + if (pattern.test(html)) { + found.push(label); + } + } + return found; +} diff --git a/scripts/data-transformers/content-generators/doc-type-helpers.ts b/scripts/data-transformers/content-generators/doc-type-helpers.ts new file mode 100644 index 000000000..056f07fd2 --- /dev/null +++ b/scripts/data-transformers/content-generators/doc-type-helpers.ts @@ -0,0 +1,156 @@ +/** + * @module data-transformers/content-generators/doc-type-helpers + * @description Document type display names (multi-language) and title suffix templates. + * Provides DOC_TYPE_DISPLAY lookup table, localizeDocType utility, and + * TITLE_SUFFIX_TEMPLATES for inverted-pyramid lede construction. + * + * @author Hack23 AB + * @license Apache-2.0 + */ + +import type { Language } from '../../types/language.js'; + +/** Localized singular/plural display names for a Riksdag document type code. */ +export type DocTypeLocalization = { + singular: Partial>; + plural: Partial>; +}; + +/** Multi-language display names for known Riksdag document type codes. */ +export const DOC_TYPE_DISPLAY: Readonly> = { + prop: { + singular: { + en: 'Proposition', sv: 'Proposition', da: 'Proposition', no: 'Proposisjon', + fi: 'Hallituksen esitys', de: 'Regierungsvorlage', fr: 'Projet de loi', es: 'Proposición', + nl: 'Wetsvoorstel', ar: 'مقترح قانون', he: 'הצעת חוק', ja: '法案', ko: '정부 제출 법안', zh: '政府法案', + }, + plural: { + en: 'Propositions', sv: 'Propositioner', da: 'Propositioner', no: 'Proposisjoner', + fi: 'Hallituksen esitykset', de: 'Regierungsvorlagen', fr: 'Projets de loi', es: 'Proposiciones', + nl: 'Wetsvoorstellen', ar: 'مقترحات قوانين', he: 'הצעות חוק', ja: '法案', ko: '정부 제출 법안', zh: '政府法案', + }, + }, + bet: { + singular: { + en: 'Committee Report', sv: 'Betänkande', da: 'Udvalgsbetænkning', no: 'Komitéinnstilling', + fi: 'Valiokunnan mietintö', de: 'Ausschussbericht', fr: 'Rapport de commission', es: 'Informe de comisión', + nl: 'Commissieverslag', ar: 'تقرير لجنة', he: 'דוח ועדה', ja: '委員会報告書', ko: '위원회 보고서', zh: '委员会报告', + }, + plural: { + en: 'Committee Reports', sv: 'Betänkanden', da: 'Udvalgsbetænkninger', no: 'Komitéinnstillinger', + fi: 'Valiokunnan mietinnöt', de: 'Ausschussberichte', fr: 'Rapports de commission', es: 'Informes de comisión', + nl: 'Commissieverslagen', ar: 'تقارير لجان', he: 'דוחות ועדה', ja: '委員会報告書', ko: '위원회 보고서', zh: '委员会报告', + }, + }, + mot: { + singular: { + en: 'Motion', sv: 'Motion', da: 'Forslag', no: 'Forslag', + fi: 'Aloite', de: 'Antrag', fr: 'Motion', es: 'Moción', + nl: 'Motie', ar: 'مقترح', he: 'הצעה', ja: '動議', ko: '동의안', zh: '动议', + }, + plural: { + en: 'Motions', sv: 'Motioner', da: 'Forslag', no: 'Forslag', + fi: 'Aloitteet', de: 'Anträge', fr: 'Motions', es: 'Mociones', + nl: 'Moties', ar: 'مقترحات', he: 'הצעות', ja: '動議', ko: '동의안', zh: '动议', + }, + }, + skr: { + singular: { + en: 'Government Communication', sv: 'Skrivelse', da: 'Regeringsskrivelse', no: 'Regjeringsskriv', + fi: 'Valtioneuvoston kirjelmä', de: 'Regierungsschreiben', fr: 'Communication du gouvernement', es: 'Comunicación del gobierno', + nl: 'Regeringsmededeling', ar: 'مذكرة حكومية', he: 'מכתב ממשלתי', ja: '政府通信文書', ko: '정부 통신문', zh: '政府公文', + }, + plural: { + en: 'Government Communications', sv: 'Skrivelser', da: 'Regeringsskrivelser', no: 'Regjeringsskriv', + fi: 'Valtioneuvoston kirjelmät', de: 'Regierungsschreiben', fr: 'Communications du gouvernement', es: 'Comunicaciones del gobierno', + nl: 'Regeringsmededelingen', ar: 'مذكرات حكومية', he: 'מכתבים ממשלתיים', ja: '政府通信文書', ko: '정부 통신문', zh: '政府公文', + }, + }, + sfs: { + singular: { + en: 'Law/Statute', sv: 'Lag/förordning', da: 'Lov/forordning', no: 'Lov/forordning', + fi: 'Laki/asetus', de: 'Gesetz/Verordnung', fr: 'Loi/Règlement', es: 'Ley/Reglamento', + nl: 'Wet/Verordening', ar: 'قانون / لائحة', he: 'חוק/תקנה', ja: '法律/条例', ko: '법률/법규', zh: '法律/法规', + }, + plural: { + en: 'Laws/Statutes', sv: 'Lagar/förordningar', da: 'Love/forordninger', no: 'Lover/forordninger', + fi: 'Lait/asetukset', de: 'Gesetze/Verordnungen', fr: 'Lois/Règlements', es: 'Leyes/Reglamentos', + nl: 'Wetten/Verordeningen', ar: 'قوانين / لوائح', he: 'חוקים/תקנות', ja: '法律/条例', ko: '법률/법규', zh: '法律/法规', + }, + }, + fpm: { + singular: { + en: 'EU Position Paper', sv: 'Faktapromemoria', da: 'EU-faktanota', no: 'EU-faktanotat', + fi: 'EU-tietomuistio', de: 'EU-Positionspapier', fr: 'Note de position UE', es: 'Documento de posición de la UE', + nl: 'EU-positiepaper', ar: 'ورقة موقف للاتحاد الأوروبي', he: 'מסמך עמדה של האיחוד האירופי', ja: 'EUポジションペーパー', ko: 'EU 입장 문서', zh: '欧盟立场文件', + }, + plural: { + en: 'EU Position Papers', sv: 'Faktapromemorior', da: 'EU-faktanotaer', no: 'EU-faktanotater', + fi: 'EU-tietomuistiot', de: 'EU-Positionspapiere', fr: 'Notes de position UE', es: 'Documentos de posición de la UE', + nl: 'EU-positiepapers', ar: 'أوراق موقف للاتحاد الأوروبي', he: 'מסמכי עמדה של האיחוד האירופי', ja: 'EUポジションペーパー', ko: 'EU 입장 문서', zh: '欧盟立场文件', + }, + }, + pressm: { + singular: { + en: 'Press Release', sv: 'Pressmeddelande', da: 'Pressemeddelelse', no: 'Pressemelding', + fi: 'Lehdistötiedote', de: 'Pressemitteilung', fr: 'Communiqué de presse', es: 'Comunicado de prensa', + nl: 'Persbericht', ar: 'بيان صحفي', he: 'הודעה לעיתונות', ja: 'プレスリリース', ko: '보도자료', zh: '新闻稿', + }, + plural: { + en: 'Press Releases', sv: 'Pressmeddelanden', da: 'Pressemeddelelser', no: 'Pressemeldinger', + fi: 'Lehdistötiedotteet', de: 'Pressemitteilungen', fr: 'Communiqués de presse', es: 'Comunicados de prensa', + nl: 'Persberichten', ar: 'بيانات صحفية', he: 'הודעות לעיתונות', ja: 'プレスリリース', ko: '보도자료', zh: '新闻稿', + }, + }, + ext: { + singular: { + en: 'External Reference', sv: 'Extern referens', da: 'Ekstern reference', no: 'Ekstern referanse', + fi: 'Ulkoinen viite', de: 'Externe Referenz', fr: 'Référence externe', es: 'Referencia externa', + nl: 'Externe referentie', ar: 'مرجع خارجي', he: 'הפניה חיצונית', ja: '外部参照', ko: '외부 참조', zh: '外部参考', + }, + plural: { + en: 'External References', sv: 'Externa referenser', da: 'Eksterne referencer', no: 'Eksterne referanser', + fi: 'Ulkoiset viitteet', de: 'Externe Referenzen', fr: 'Références externes', es: 'Referencias externas', + nl: 'Externe referenties', ar: 'مراجع خارجية', he: 'הפניות חיצוניות', ja: '外部参照', ko: '외부 참照', zh: '外部参考', + }, + }, + other: { + singular: { + en: 'Other Document', sv: 'Övrigt dokument', da: 'Andet dokument', no: 'Annet dokument', + fi: 'Muu asiakirja', de: 'Sonstiges Dokument', fr: 'Autre document', es: 'Otro documento', + nl: 'Overig document', ar: 'مستند آخر', he: 'מסמך אחר', ja: 'その他の文書', ko: '기타 문서', zh: '其他文件', + }, + plural: { + en: 'Other Documents', sv: 'Övriga dokument', da: 'Andre dokumenter', no: 'Andre dokumenter', + fi: 'Muut asiakirjat', de: 'Sonstige Dokumente', fr: 'Autres documents', es: 'Otros documentos', + nl: 'Overige documenten', ar: 'مستندات أخرى', he: 'מסמכים אחרים', ja: 'その他の文書', ko: '기타 문서', zh: '其他文件', + }, + }, +}; + +/** Localise raw Riksdag document type codes for display (singular/plural-aware, multi-language). */ +export function localizeDocType(code: string, lang: Language | string, count?: number): string { + const entry = DOC_TYPE_DISPLAY[code]; + if (!entry) return code; + const usePlural = count !== 1; + const primary = usePlural ? entry.plural : entry.singular; + const fallback = usePlural ? entry.singular : entry.plural; + return primary[lang as Language] ?? primary.en ?? fallback[lang as Language] ?? fallback.en ?? code; +} + +/** Per-language title-suffix templates for inverted-pyramid lede construction. */ +export const TITLE_SUFFIX_TEMPLATES: Readonly string>> = { + sv: t => ` — inklusive "${t}"`, + da: t => ` — herunder "${t}"`, + no: t => ` — inkludert "${t}"`, + fi: t => ` — mukaan lukien "${t}"`, + de: t => ` — darunter "${t}"`, + fr: t => ` — notamment "${t}"`, + es: t => ` — incluyendo "${t}"`, + nl: t => ` — inclusief "${t}"`, + ar: t => ` — بما فيها "${t}"`, + he: t => ` — כולל "${t}"`, + ja: t => `、「${t}」を含む`, + ko: t => `, "${t}" 포함`, + zh: t => `,包括"${t}"`, +}; diff --git a/scripts/data-transformers/content-generators/event-helpers.ts b/scripts/data-transformers/content-generators/event-helpers.ts new file mode 100644 index 000000000..caec666f9 --- /dev/null +++ b/scripts/data-transformers/content-generators/event-helpers.ts @@ -0,0 +1,62 @@ +/** + * @module data-transformers/content-generators/event-helpers + * @description Calendar event and document matching helpers. + * Provides keyword extraction, related document lookup, and + * minister extraction from interpellation summaries. + * + * @author Hack23 AB + * @license Apache-2.0 + */ + +import type { RawDocument, RawCalendarEvent } from '../types.js'; + +/** Extract meaningful keywords from text for cross-reference matching + * (min 2 chars, captures EU, KU, etc.; splits on whitespace, hyphens, and commas) */ +function extractKeywords(text: string): string[] { + return text.toLowerCase().split(/[\s,–-]+/u).filter(w => w.length >= 2); +} + +/** Find documents related to a calendar event by organ match or keyword overlap (max 3) */ +export function findRelatedDocuments(event: RawCalendarEvent, documents: RawDocument[]): RawDocument[] { + const eventOrgan = event.organ ?? ''; + const keywords = extractKeywords(event.rubrik ?? event.titel ?? event.title ?? ''); + return documents.filter(doc => { + const docOrgan = doc.organ ?? doc.committee ?? ''; + if (eventOrgan && docOrgan && eventOrgan.toLowerCase() === docOrgan.toLowerCase()) return true; + const docText = (doc.titel ?? doc.title ?? '').toLowerCase(); + return keywords.some(kw => docText.includes(kw)); + }).slice(0, 3); +} + +/** Find written questions related to a calendar event by keyword overlap (max 3) */ +export function findRelatedQuestions(event: RawCalendarEvent, questions: RawDocument[]): RawDocument[] { + const keywords = extractKeywords(event.rubrik ?? event.titel ?? event.title ?? ''); + return questions.filter(q => { + const qText = (q.titel ?? q.title ?? '').toLowerCase(); + return keywords.some(kw => qText.includes(kw)); + }).slice(0, 3); +} + +/** Extract targeted minister name from interpellation summary "till MINISTER" header line. + * Strips trailing topic clauses ("om X", "angående Y", etc.) and punctuation. */ +export function extractMinister(summary: string): string { + // Use non-newline whitespace ([^\S\n]+) so we don't cross into the next line + const m = summary.match(/\btill[^\S\n]+([^\n]+)/i); + if (!m) return ''; + const raw = m[1].trim(); + if (!raw) return ''; + + // Remove common trailing topic clauses and punctuation + const lowerRaw = raw.toLowerCase(); + const stopPhrases = [' om ', ' angående ', ' rörande ', ' beträffande ']; + let end = raw.length; + for (const phrase of stopPhrases) { + const idx = lowerRaw.indexOf(phrase); + if (idx !== -1 && idx < end) end = idx; + } + // Cut at terminating punctuation if it comes earlier + const punctIdx = raw.search(/[?:;.,]/); + if (punctIdx !== -1 && punctIdx < end) end = punctIdx; + + return raw.slice(0, end).trim(); +} diff --git a/scripts/data-transformers/content-generators/framework-renderers.ts b/scripts/data-transformers/content-generators/framework-renderers.ts new file mode 100644 index 000000000..747b17ed0 --- /dev/null +++ b/scripts/data-transformers/content-generators/framework-renderers.ts @@ -0,0 +1,353 @@ +/** + * @module data-transformers/content-generators/framework-renderers + * @description HTML renderers for the document analysis framework sections: + * PESTLE analysis, stakeholder impact, risk assessment, and implementation assessment. + * Also contains the stub types and stub functions that replace the formerly + * AI-generated analysis modules (per ai-driven-analysis-guide.md Rule 2). + * + * ⚠️ DEPRECATED FOR ANALYSIS GENERATION (v3.0, 2026-04-02): + * All render functions produce fallback stub HTML. + * AI agents in agentic workflow .md files MUST overwrite this output + * with genuine political intelligence. + * + * @author Hack23 AB + * @license Apache-2.0 + */ + +import { escapeHtml } from '../../html-utils.js'; +import type { Language } from '../../types/language.js'; +import type { CIAContext } from '../types.js'; + +// --------------------------------------------------------------------------- +// Stub types — local to this module; replaced by AI-driven analysis +// --------------------------------------------------------------------------- + +export interface PESTLEDimensions { + political: string[]; + economic: string[]; + social: string[]; + technological: string[]; + legal: string[]; + environmental: string[]; +} + +export interface StakeholderDirectImpact { + direction: 'positive' | 'negative' | 'mixed' | 'neutral'; + magnitude: 'significant' | 'moderate' | 'minor'; + summary: string; +} + +export interface StakeholderImpact { + stakeholder: string; + displayName: string; + directImpact: StakeholderDirectImpact; + confidence: string; + implementationBurden: 'high' | 'medium' | 'low'; +} + +export interface ImplementationAssessment { + feasibility: 'high' | 'medium' | 'low'; + keyObstacles: string[]; + agenciesInvolved: string[]; + timeline: string; + estimatedTimeline: string; + summary: string; +} + +export interface DocumentAnalysis { + pestleDimensions: PESTLEDimensions; + stakeholderImpacts: StakeholderImpact[]; + implementationAssessment: ImplementationAssessment; + riskAssessment: RiskAssessment[]; + [key: string]: unknown; +} + +export type PESTLEAnalysis = PESTLEDimensions; + +export interface RiskAssessment { + type: 'political' | 'implementation' | 'public-acceptance' | 'legal' | 'financial'; + severity: 'high' | 'medium' | 'low'; + description: string; +} + +export interface BatchAnalysisResult { results: unknown[] } + +// --------------------------------------------------------------------------- +// Stub functions — return empty data; real analysis is AI-driven in workflows +// --------------------------------------------------------------------------- + +/** Stub: returns empty analysis. Real analysis is AI-driven in workflows. */ +export function analyzeDocumentsBatch(_docs: unknown[], _lang?: Language | string, _cia?: CIAContext): Map { + return new Map(); +} + +/** Stub: returns empty perspectives. Real analysis is AI-driven in workflows. */ +export function analyzeDocumentsPerspectives(_docs: unknown[], _cia?: CIAContext, _lang?: Language | string): BatchAnalysisResult { + return { results: [] }; +} + +// --------------------------------------------------------------------------- +// Display constants +// --------------------------------------------------------------------------- + +/** Max items per PESTLE dimension in aggregated display */ +const MAX_PESTLE_ITEMS = 4; +/** Max stakeholder impacts shown in the summary list */ +const MAX_STAKEHOLDER_IMPACTS = 7; +/** Max risk items shown in the risk assessment summary */ +const MAX_RISK_ITEMS = 5; +/** Max perspective insights shown from 6-lens analysis */ +export const MAX_PERSPECTIVE_INSIGHTS = 5; +/** Max implementation obstacles listed */ +const MAX_IMPLEMENTATION_OBSTACLES = 4; +/** Max agencies displayed in implementation assessment */ +const MAX_AGENCIES_DISPLAYED = 5; + +// --------------------------------------------------------------------------- +// Multi-language label tables +// --------------------------------------------------------------------------- + +const PESTLE_LABELS: Readonly>> = { + en: { political: 'Political', economic: 'Economic', social: 'Social', technological: 'Technological', legal: 'Legal', environmental: 'Environmental' }, + sv: { political: 'Politisk', economic: 'Ekonomisk', social: 'Social', technological: 'Teknologisk', legal: 'Juridisk', environmental: 'Miljö' }, + da: { political: 'Politisk', economic: 'Økonomisk', social: 'Social', technological: 'Teknologisk', legal: 'Juridisk', environmental: 'Miljø' }, + no: { political: 'Politisk', economic: 'Økonomisk', social: 'Sosial', technological: 'Teknologisk', legal: 'Juridisk', environmental: 'Miljø' }, + fi: { political: 'Poliittinen', economic: 'Taloudellinen', social: 'Sosiaalinen', technological: 'Teknologinen', legal: 'Oikeudellinen', environmental: 'Ympäristö' }, + de: { political: 'Politisch', economic: 'Wirtschaftlich', social: 'Sozial', technological: 'Technologisch', legal: 'Rechtlich', environmental: 'Umwelt' }, + fr: { political: 'Politique', economic: 'Économique', social: 'Social', technological: 'Technologique', legal: 'Juridique', environmental: 'Environnemental' }, + es: { political: 'Político', economic: 'Económico', social: 'Social', technological: 'Tecnológico', legal: 'Jurídico', environmental: 'Ambiental' }, + nl: { political: 'Politiek', economic: 'Economisch', social: 'Sociaal', technological: 'Technologisch', legal: 'Juridisch', environmental: 'Milieu' }, + ar: { political: 'سياسي', economic: 'اقتصادي', social: 'اجتماعي', technological: 'تقني', legal: 'قانوني', environmental: 'بيئي' }, + he: { political: 'פוליטי', economic: 'כלכלי', social: 'חברתי', technological: 'טכנולוגי', legal: 'משפטי', environmental: 'סביבתי' }, + ja: { political: '政治', economic: '経済', social: '社会', technological: '技術', legal: '法的', environmental: '環境' }, + ko: { political: '정치', economic: '경제', social: '사회', technological: '기술', legal: '법률', environmental: '환경' }, + zh: { political: '政治', economic: '经济', social: '社会', technological: '技术', legal: '法律', environmental: '环境' }, +}; + +const RISK_TYPE_LABELS: Readonly>> = { + en: { political: 'Political', implementation: 'Implementation', 'public-acceptance': 'Public acceptance', legal: 'Legal', financial: 'Financial' }, + sv: { political: 'Politisk', implementation: 'Genomförande', 'public-acceptance': 'Offentlig acceptans', legal: 'Juridisk', financial: 'Finansiell' }, + da: { political: 'Politisk', implementation: 'Implementering', 'public-acceptance': 'Offentlig accept', legal: 'Juridisk', financial: 'Finansiel' }, + no: { political: 'Politisk', implementation: 'Implementering', 'public-acceptance': 'Offentlig aksept', legal: 'Juridisk', financial: 'Finansiell' }, + fi: { political: 'Poliittinen', implementation: 'Toteutus', 'public-acceptance': 'Julkinen hyväksyntä', legal: 'Oikeudellinen', financial: 'Taloudellinen' }, + de: { political: 'Politisch', implementation: 'Umsetzung', 'public-acceptance': 'Öffentliche Akzeptanz', legal: 'Rechtlich', financial: 'Finanziell' }, + fr: { political: 'Politique', implementation: 'Mise en œuvre', 'public-acceptance': 'Acceptation publique', legal: 'Juridique', financial: 'Financier' }, + es: { political: 'Político', implementation: 'Implementación', 'public-acceptance': 'Aceptación pública', legal: 'Jurídico', financial: 'Financiero' }, + nl: { political: 'Politiek', implementation: 'Implementatie', 'public-acceptance': 'Publieke acceptatie', legal: 'Juridisch', financial: 'Financieel' }, + ar: { political: 'سياسي', implementation: 'تنفيذي', 'public-acceptance': 'القبول العام', legal: 'قانوني', financial: 'مالي' }, + he: { political: 'פוליטי', implementation: 'יישום', 'public-acceptance': 'קבלה ציבורית', legal: 'משפטי', financial: 'פיננסי' }, + ja: { political: '政治', implementation: '実装', 'public-acceptance': '世論受容', legal: '法的', financial: '財政' }, + ko: { political: '정치', implementation: '이행', 'public-acceptance': '대중 수용성', legal: '법률', financial: '재정' }, + zh: { political: '政治', implementation: '实施', 'public-acceptance': '公众接受度', legal: '法律', financial: '财政' }, +}; + +const LEVEL_LABELS: Readonly>> = { + en: { high: 'High', medium: 'Medium', low: 'Low' }, + sv: { high: 'Hög', medium: 'Medel', low: 'Låg' }, + da: { high: 'Høj', medium: 'Mellem', low: 'Lav' }, + no: { high: 'Høy', medium: 'Middels', low: 'Lav' }, + fi: { high: 'Korkea', medium: 'Keskitaso', low: 'Matala' }, + de: { high: 'Hoch', medium: 'Mittel', low: 'Niedrig' }, + fr: { high: 'Élevé', medium: 'Moyen', low: 'Faible' }, + es: { high: 'Alto', medium: 'Medio', low: 'Bajo' }, + nl: { high: 'Hoog', medium: 'Middel', low: 'Laag' }, + ar: { high: 'مرتفع', medium: 'متوسط', low: 'منخفض' }, + he: { high: 'גבוה', medium: 'בינוני', low: 'נמוך' }, + ja: { high: '高', medium: '中', low: '低' }, + ko: { high: '높음', medium: '보통', low: '낮음' }, + zh: { high: '高', medium: '中', low: '低' }, +}; + +const IMPLEMENTATION_LABELS: Readonly> = { + en: { feasibility: 'Feasibility', obstacles: 'Key obstacles', agencies: 'Agencies involved', noStakeholderData: 'No stakeholder impact data available.', noImplementationData: 'No implementation data available.', burden: 'Burden' }, + sv: { feasibility: 'Genomförbarhet', obstacles: 'Viktiga hinder', agencies: 'Berörda myndigheter', noStakeholderData: 'Ingen data om intressentpåverkan tillgänglig.', noImplementationData: 'Ingen implementeringsdata tillgänglig.', burden: 'Belastning' }, + da: { feasibility: 'Gennemførlighed', obstacles: 'Vigtige hindringer', agencies: 'Involverede myndigheder', noStakeholderData: 'Ingen data om interessentpåvirkning tilgængelig.', noImplementationData: 'Ingen implementeringsdata tilgængelig.', burden: 'Byrde' }, + no: { feasibility: 'Gjennomførbarhet', obstacles: 'Viktige hindringer', agencies: 'Involverte etater', noStakeholderData: 'Ingen data om interessentpåvirkning tilgjengelig.', noImplementationData: 'Ingen implementeringsdata tilgjengelig.', burden: 'Belastning' }, + fi: { feasibility: 'Toteutettavuus', obstacles: 'Keskeiset esteet', agencies: 'Mukana olevat viranomaiset', noStakeholderData: 'Sidosryhmävaikutustietoa ei saatavilla.', noImplementationData: 'Toteutustietoa ei saatavilla.', burden: 'Rasite' }, + de: { feasibility: 'Umsetzbarkeit', obstacles: 'Wesentliche Hindernisse', agencies: 'Beteiligte Behörden', noStakeholderData: 'Keine Daten zu Stakeholder-Auswirkungen verfügbar.', noImplementationData: 'Keine Umsetzungsdaten verfügbar.', burden: 'Belastung' }, + fr: { feasibility: 'Faisabilité', obstacles: 'Obstacles clés', agencies: 'Agences impliquées', noStakeholderData: 'Aucune donnée d\'impact des parties prenantes disponible.', noImplementationData: 'Aucune donnée de mise en œuvre disponible.', burden: 'Charge' }, + es: { feasibility: 'Viabilidad', obstacles: 'Obstáculos clave', agencies: 'Organismos implicados', noStakeholderData: 'No hay datos de impacto en partes interesadas.', noImplementationData: 'No hay datos de implementación disponibles.', burden: 'Carga' }, + nl: { feasibility: 'Haalbaarheid', obstacles: 'Belangrijkste obstakels', agencies: 'Betrokken instanties', noStakeholderData: 'Geen gegevens over impact op belanghebbenden beschikbaar.', noImplementationData: 'Geen implementatiegegevens beschikbaar.', burden: 'Last' }, + ar: { feasibility: 'قابلية التنفيذ', obstacles: 'العقبات الرئيسية', agencies: 'الجهات المعنية', noStakeholderData: 'لا تتوفر بيانات تأثير أصحاب المصلحة.', noImplementationData: 'لا تتوفر بيانات تنفيذ.', burden: 'العبء' }, + he: { feasibility: 'ישימות', obstacles: 'חסמים מרכזיים', agencies: 'גורמים מעורבים', noStakeholderData: 'אין נתוני השפעה על בעלי עניין.', noImplementationData: 'אין נתוני יישום.', burden: 'נטל' }, + ja: { feasibility: '実現可能性', obstacles: '主な障害', agencies: '関係機関', noStakeholderData: 'ステークホルダー影響データはありません。', noImplementationData: '実施データはありません。', burden: '負担' }, + ko: { feasibility: '실행 가능성', obstacles: '주요 장애 요인', agencies: '관여 기관', noStakeholderData: '이해관계자 영향 데이터가 없습니다.', noImplementationData: '이행 데이터가 없습니다.', burden: '부담' }, + zh: { feasibility: '可实施性', obstacles: '关键障碍', agencies: '涉及机构', noStakeholderData: '暂无利益相关方影响数据。', noImplementationData: '暂无实施数据。', burden: '负担' }, +}; + +// --------------------------------------------------------------------------- +// Localization helpers +// --------------------------------------------------------------------------- + +function localizeLevel(level: 'high' | 'medium' | 'low', lang: Language | string): string { + return LEVEL_LABELS[lang as string]?.[level] ?? LEVEL_LABELS.en[level]; +} + +function localizeRiskType(type: RiskAssessment['type'], lang: Language | string): string { + return RISK_TYPE_LABELS[lang as string]?.[type] ?? RISK_TYPE_LABELS.en[type]; +} + +function localizedImplementationLabels(lang: Language | string): { feasibility: string; obstacles: string; agencies: string; noStakeholderData: string; noImplementationData: string; burden: string } { + return IMPLEMENTATION_LABELS[lang as string] ?? IMPLEMENTATION_LABELS.en; +} + +// --------------------------------------------------------------------------- +// Ranking helpers +// --------------------------------------------------------------------------- + +function severityRank(s: string): number { + return s === 'high' ? 3 : s === 'medium' ? 2 : 1; +} + +function magnitudeRank(magnitude: 'significant' | 'moderate' | 'minor'): number { + return magnitude === 'significant' ? 3 : magnitude === 'moderate' ? 2 : 1; +} + +function feasibilityRank(f: string): number { + return f === 'high' ? 3 : f === 'medium' ? 2 : 1; +} + +// --------------------------------------------------------------------------- +// Framework analysis HTML renderers +// --------------------------------------------------------------------------- + +/** + * Aggregate PESTLE dimensions across multiple document analyses into a + * deduplicated list per dimension and render as an HTML description list. + */ +export function renderAggregatedPestle(analyses: DocumentAnalysis[], lang: Language | string): string { + const merged: PESTLEAnalysis = { + political: [], economic: [], social: [], + technological: [], legal: [], environmental: [], + }; + + for (const a of analyses) { + const p = a.pestleDimensions; + merged.political.push(...p.political); + merged.economic.push(...p.economic); + merged.social.push(...p.social); + merged.technological.push(...p.technological); + merged.legal.push(...p.legal); + merged.environmental.push(...p.environmental); + } + + // Deduplicate per dimension + const dedup = (arr: string[]): string[] => [...new Set(arr)].slice(0, MAX_PESTLE_ITEMS); + + const labels = PESTLE_LABELS[lang as string] ?? PESTLE_LABELS.en; + const dims: Array<[string, string[]]> = [ + [labels.political, dedup(merged.political)], + [labels.economic, dedup(merged.economic)], + [labels.social, dedup(merged.social)], + [labels.technological, dedup(merged.technological)], + [labels.legal, dedup(merged.legal)], + [labels.environmental, dedup(merged.environmental)], + ]; + + const items = dims + .filter(([, items]) => items.length > 0) + .map(([label, items]) => + `
${escapeHtml(label)}
\n
${items.map(i => escapeHtml(i)).join(' ')}
`, + ) + .join('\n'); + + return `
\n${items}\n
`; +} + +/** + * Render a summary of stakeholder impacts across all analysed documents. + * Shows up to 7 stakeholder groups with impact direction, confidence, and burden. + */ +export function renderStakeholderImpactSummary(analyses: DocumentAnalysis[], lang: Language | string): string { + const labels = localizedImplementationLabels(lang); + // Collect all stakeholder impacts, deduplicated by stakeholder name + const impactMap = new Map(); + for (const a of analyses) { + for (const impact of a.stakeholderImpacts) { + // Keep the higher-magnitude impact per stakeholder + const existing = impactMap.get(impact.stakeholder); + if (!existing || magnitudeRank(impact.directImpact.magnitude) > magnitudeRank(existing.directImpact.magnitude)) { + impactMap.set(impact.stakeholder, impact); + } + } + } + + const impacts = [...impactMap.values()].slice(0, MAX_STAKEHOLDER_IMPACTS); + if (impacts.length === 0) return `

${escapeHtml(labels.noStakeholderData)}

`; + + const rows = impacts.map(i => { + const directionIcon = + i.directImpact.direction === 'positive' ? '↑' + : i.directImpact.direction === 'negative' ? '↓' + : i.directImpact.direction === 'mixed' ? '↕' + : '→'; + const burdenText = localizeLevel(i.implementationBurden, lang); + return `
  • ${escapeHtml(i.displayName)}: ${directionIcon} ${escapeHtml(i.directImpact.summary)} (${escapeHtml(i.confidence)}; ${escapeHtml(labels.burden)}: ${escapeHtml(burdenText)})
  • `; + }); + + return `
      \n${rows.join('\n')}\n
    `; +} + +/** + * Render a risk assessment summary. Groups risks by type and keeps the + * highest-severity risk per type. + */ +export function renderRiskAssessment(risks: RiskAssessment[], lang: Language | string): string { + // Deduplicate by type, preferring higher severity + const byType = new Map(); + for (const r of risks) { + const key = r.type; + const existing = byType.get(key); + if (!existing || severityRank(r.severity) > severityRank(existing.severity)) { + byType.set(key, r); + } + } + + const top = [...byType.values()].slice(0, MAX_RISK_ITEMS); + const rows = top.map(r => { + const icon = r.severity === 'high' ? '🔴' : r.severity === 'medium' ? '🟡' : '🟢'; + return `
  • ${icon} ${escapeHtml(localizeRiskType(r.type, lang))} (${escapeHtml(localizeLevel(r.severity, lang))}): ${escapeHtml(r.description)}
  • `; + }); + + return `
      \n${rows.join('\n')}\n
    `; +} + +/** + * Render implementation assessment summary from framework analyses. + */ +export function renderImplementationAssessment(analyses: DocumentAnalysis[], lang: Language | string): string { + const labels = localizedImplementationLabels(lang); + const assessments: ImplementationAssessment[] = analyses.map(a => a.implementationAssessment); + if (assessments.length === 0) return `

    ${escapeHtml(labels.noImplementationData)}

    `; + + // Aggregate obstacles and agencies across all documents + const allObstacles = new Set(); + const allAgencies = new Set(); + let highestFeasibility: ImplementationAssessment['feasibility'] = 'high'; + let selectedAssessment: ImplementationAssessment = assessments[0]; + + for (const ia of assessments) { + ia.keyObstacles.forEach(o => allObstacles.add(o)); + ia.agenciesInvolved.forEach(a => allAgencies.add(a)); + if (feasibilityRank(ia.feasibility) < feasibilityRank(highestFeasibility)) { + highestFeasibility = ia.feasibility; + selectedAssessment = ia; + } + } + + const parts: string[] = []; + const fIcon = highestFeasibility === 'high' ? '🟢' : highestFeasibility === 'medium' ? '🟡' : '🔴'; + const timeline = selectedAssessment.estimatedTimeline; + parts.push(`

    ${fIcon} ${escapeHtml(labels.feasibility)}: ${escapeHtml(localizeLevel(highestFeasibility, lang))}. ${escapeHtml(timeline)}

    `); + + if (allObstacles.size > 0) { + const obstacleList = [...allObstacles].slice(0, MAX_IMPLEMENTATION_OBSTACLES).map(o => `
  • ${escapeHtml(o)}
  • `).join(''); + parts.push(`

    ${escapeHtml(labels.obstacles)}:

    \n
      ${obstacleList}
    `); + } + + if (allAgencies.size > 0) { + parts.push(`

    ${escapeHtml(labels.agencies)}: ${[...allAgencies].slice(0, MAX_AGENCIES_DISPLAYED).map(a => escapeHtml(a)).join(', ')}

    `); + } + + return parts.join('\n'); +} diff --git a/scripts/data-transformers/content-generators/impact-helpers.ts b/scripts/data-transformers/content-generators/impact-helpers.ts new file mode 100644 index 000000000..4cc1c1f71 --- /dev/null +++ b/scripts/data-transformers/content-generators/impact-helpers.ts @@ -0,0 +1,110 @@ +/** + * @module data-transformers/content-generators/impact-helpers + * @description Political impact and legislative consequences analysis helpers. + * Generates AI_MUST_REPLACE stub paragraphs for political impact, + * implementation consequences, and related analysis sections. + * + * ⚠️ DEPRECATED FOR ANALYSIS GENERATION (v3.0, 2026-04-02): + * All functions in this module produce fallback stub text. + * AI agents in agentic workflow .md files MUST overwrite this + * template-generated text with genuine political intelligence. + * + * @author Hack23 AB + * @license Apache-2.0 + */ + +import type { RawDocument } from '../types.js'; +import type { CIAContext } from '../types.js'; +import type { Language } from '../../types/language.js'; + +// --------------------------------------------------------------------------- +// Political impact analysis (AI_MUST_REPLACE stubs) +// --------------------------------------------------------------------------- + +function propImpactText(_lang: Language | string, n: number): string { + return ``; +} + +function betImpactText(_lang: Language | string, n: number): string { + return ``; +} + +function motImpactText(_lang: Language | string, n: number): string { + return ``; +} + +function thinMajorityImpactText(_lang: Language | string, margin: number): string { + return ``; +} + +function genericImpactText(_lang: Language | string): string { + return ''; +} + +/** + * Generate a political impact analysis paragraph for the deep analysis section. + * Returns an AI_MUST_REPLACE stub that AI agents must overwrite. + */ +export function generateImpactAnalysis(docs: RawDocument[], lang: Language | string, cia: CIAContext | undefined): string { + const parts: string[] = []; + + const propCount = docs.filter(d => d.doktyp === 'prop').length; + const motCount = docs.filter(d => d.doktyp === 'mot').length; + const betCount = docs.filter(d => d.doktyp === 'bet').length; + + if (propCount > 0) { + parts.push(propImpactText(lang, propCount)); + } + if (betCount > 0) { + parts.push(betImpactText(lang, betCount)); + } + if (motCount > 0) { + parts.push(motImpactText(lang, motCount)); + } + + if (cia) { + const margin = cia.coalitionStability?.majorityMargin ?? 0; + if (margin <= 5) { + parts.push(thinMajorityImpactText(lang, margin)); + } + } + + return parts.join(' ') || genericImpactText(lang); +} + +// --------------------------------------------------------------------------- +// Legislative consequences analysis (AI_MUST_REPLACE stubs) +// --------------------------------------------------------------------------- + +function propConsequencesText(_lang: Language | string, n: number): string { + return ``; +} + +function motConsequencesText(_lang: Language | string, n: number): string { + return ``; +} + +function genericConsequencesText(_lang: Language | string): string { + return ''; +} + +/** + * Generate a legislative consequences analysis paragraph for the deep analysis section. + * Returns AI_MUST_REPLACE stubs that AI agents must overwrite. + */ +export function generateConsequencesAnalysis(docs: RawDocument[], lang: Language | string, _articleType: string): string { + const propCount = docs.filter(d => d.doktyp === 'prop').length; + const motCount = docs.filter(d => d.doktyp === 'mot').length; + const parts: string[] = []; + + if (propCount > 0) { + parts.push(propConsequencesText(lang, propCount)); + } + if (motCount > 0) { + parts.push(motConsequencesText(lang, motCount)); + } + if (parts.length === 0) { + parts.push(genericConsequencesText(lang)); + } + return parts.join(' '); +} diff --git a/scripts/data-transformers/content-generators/shared.ts b/scripts/data-transformers/content-generators/shared.ts index a6258bf0d..268fadc21 100644 --- a/scripts/data-transformers/content-generators/shared.ts +++ b/scripts/data-transformers/content-generators/shared.ts @@ -4,6 +4,13 @@ * Contains TITLE_SUFFIX_TEMPLATES, keyword extraction, event/document matching helpers, * and the deep analysis section generator (5W framework). * + * Implementation is split into focused sub-modules: + * - doc-type-helpers.ts — DOC_TYPE_DISPLAY, localizeDocType, TITLE_SUFFIX_TEMPLATES + * - event-helpers.ts — findRelatedDocuments, findRelatedQuestions, extractMinister + * - impact-helpers.ts — generateImpactAnalysis, generateConsequencesAnalysis + * - framework-renderers.ts — PESTLE/stakeholder/risk/implementation HTML renderers + * - ai-marker-helpers.ts — detectBannedPatterns + * * ⚠️ DEPRECATED FOR ANALYSIS GENERATION (v3.0, 2026-04-02): * Per analysis/methodologies/ai-driven-analysis-guide.md Rule 2, the following * functions are DEPRECATED for generating analysis content: @@ -26,265 +33,26 @@ import { escapeHtml } from '../../html-utils.js'; import type { Language } from '../../types/language.js'; -import type { RawDocument, RawCalendarEvent, CIAContext } from '../types.js'; +import type { RawDocument, CIAContext } from '../types.js'; import { L, normalizePartyKey } from '../helpers.js'; import { detectPolicyDomains } from '../policy-analysis.js'; -/* ── Stub types/functions for deleted analysis modules ── */ -/* Per ai-driven-analysis-guide.md Rule 2: scripts must NOT generate analysis */ - -interface PESTLEDimensions { - political: string[]; - economic: string[]; - social: string[]; - technological: string[]; - legal: string[]; - environmental: string[]; -} - -interface StakeholderDirectImpact { - direction: 'positive' | 'negative' | 'mixed' | 'neutral'; - magnitude: 'significant' | 'moderate' | 'minor'; - summary: string; -} - -interface StakeholderImpact { - stakeholder: string; - displayName: string; - directImpact: StakeholderDirectImpact; - confidence: string; - implementationBurden: 'high' | 'medium' | 'low'; -} - -interface ImplementationAssessment { - feasibility: 'high' | 'medium' | 'low'; - keyObstacles: string[]; - agenciesInvolved: string[]; - timeline: string; - estimatedTimeline: string; - summary: string; -} - -interface DocumentAnalysis { - pestleDimensions: PESTLEDimensions; - stakeholderImpacts: StakeholderImpact[]; - implementationAssessment: ImplementationAssessment; - riskAssessment: RiskAssessment[]; - [key: string]: unknown; -} - -type PESTLEAnalysis = PESTLEDimensions; - -interface RiskAssessment { - type: 'political' | 'implementation' | 'public-acceptance' | 'legal' | 'financial'; - severity: 'high' | 'medium' | 'low'; - description: string; -} - -interface BatchAnalysisResult { results: unknown[] } - -/** Stub: returns empty analysis. Real analysis is AI-driven in workflows. */ -function analyzeDocumentsBatch(_docs: unknown[], _lang?: Language | string, _cia?: CIAContext): Map { - return new Map(); -} - -/** Stub: returns empty perspectives. Real analysis is AI-driven in workflows. */ -function analyzeDocumentsPerspectives(_docs: unknown[], _cia?: CIAContext, _lang?: Language | string): BatchAnalysisResult { - return { results: [] }; -} - -/** Localise raw Riksdag document type codes for display (singular/plural-aware, multi-language). */ -export type DocTypeLocalization = { - singular: Partial>; - plural: Partial>; -}; - -export const DOC_TYPE_DISPLAY: Readonly> = { - prop: { - singular: { - en: 'Proposition', sv: 'Proposition', da: 'Proposition', no: 'Proposisjon', - fi: 'Hallituksen esitys', de: 'Regierungsvorlage', fr: 'Projet de loi', es: 'Proposición', - nl: 'Wetsvoorstel', ar: 'مقترح قانون', he: 'הצעת חוק', ja: '法案', ko: '정부 제출 법안', zh: '政府法案', - }, - plural: { - en: 'Propositions', sv: 'Propositioner', da: 'Propositioner', no: 'Proposisjoner', - fi: 'Hallituksen esitykset', de: 'Regierungsvorlagen', fr: 'Projets de loi', es: 'Proposiciones', - nl: 'Wetsvoorstellen', ar: 'مقترحات قوانين', he: 'הצעות חוק', ja: '法案', ko: '정부 제출 법안', zh: '政府法案', - }, - }, - bet: { - singular: { - en: 'Committee Report', sv: 'Betänkande', da: 'Udvalgsbetænkning', no: 'Komitéinnstilling', - fi: 'Valiokunnan mietintö', de: 'Ausschussbericht', fr: 'Rapport de commission', es: 'Informe de comisión', - nl: 'Commissieverslag', ar: 'تقرير لجنة', he: 'דוח ועדה', ja: '委員会報告書', ko: '위원회 보고서', zh: '委员会报告', - }, - plural: { - en: 'Committee Reports', sv: 'Betänkanden', da: 'Udvalgsbetænkninger', no: 'Komitéinnstillinger', - fi: 'Valiokunnan mietinnöt', de: 'Ausschussberichte', fr: 'Rapports de commission', es: 'Informes de comisión', - nl: 'Commissieverslagen', ar: 'تقارير لجان', he: 'דוחות ועדה', ja: '委員会報告書', ko: '위원회 보고서', zh: '委员会报告', - }, - }, - mot: { - singular: { - en: 'Motion', sv: 'Motion', da: 'Forslag', no: 'Forslag', - fi: 'Aloite', de: 'Antrag', fr: 'Motion', es: 'Moción', - nl: 'Motie', ar: 'مقترح', he: 'הצעה', ja: '動議', ko: '동의안', zh: '动议', - }, - plural: { - en: 'Motions', sv: 'Motioner', da: 'Forslag', no: 'Forslag', - fi: 'Aloitteet', de: 'Anträge', fr: 'Motions', es: 'Mociones', - nl: 'Moties', ar: 'مقترحات', he: 'הצעות', ja: '動議', ko: '동의안', zh: '动议', - }, - }, - skr: { - singular: { - en: 'Government Communication', sv: 'Skrivelse', da: 'Regeringsskrivelse', no: 'Regjeringsskriv', - fi: 'Valtioneuvoston kirjelmä', de: 'Regierungsschreiben', fr: 'Communication du gouvernement', es: 'Comunicación del gobierno', - nl: 'Regeringsmededeling', ar: 'مذكرة حكومية', he: 'מכתב ממשלתי', ja: '政府通信文書', ko: '정부 통신문', zh: '政府公文', - }, - plural: { - en: 'Government Communications', sv: 'Skrivelser', da: 'Regeringsskrivelser', no: 'Regjeringsskriv', - fi: 'Valtioneuvoston kirjelmät', de: 'Regierungsschreiben', fr: 'Communications du gouvernement', es: 'Comunicaciones del gobierno', - nl: 'Regeringsmededelingen', ar: 'مذكرات حكومية', he: 'מכתבים ממשלתיים', ja: '政府通信文書', ko: '정부 통신문', zh: '政府公文', - }, - }, - sfs: { - singular: { - en: 'Law/Statute', sv: 'Lag/förordning', da: 'Lov/forordning', no: 'Lov/forordning', - fi: 'Laki/asetus', de: 'Gesetz/Verordnung', fr: 'Loi/Règlement', es: 'Ley/Reglamento', - nl: 'Wet/Verordening', ar: 'قانون / لائحة', he: 'חוק/תקנה', ja: '法律/条例', ko: '법률/법규', zh: '法律/法规', - }, - plural: { - en: 'Laws/Statutes', sv: 'Lagar/förordningar', da: 'Love/forordninger', no: 'Lover/forordninger', - fi: 'Lait/asetukset', de: 'Gesetze/Verordnungen', fr: 'Lois/Règlements', es: 'Leyes/Reglamentos', - nl: 'Wetten/Verordeningen', ar: 'قوانين / لوائح', he: 'חוקים/תקנות', ja: '法律/条例', ko: '법률/법규', zh: '法律/法规', - }, - }, - fpm: { - singular: { - en: 'EU Position Paper', sv: 'Faktapromemoria', da: 'EU-faktanota', no: 'EU-faktanotat', - fi: 'EU-tietomuistio', de: 'EU-Positionspapier', fr: 'Note de position UE', es: 'Documento de posición de la UE', - nl: 'EU-positiepaper', ar: 'ورقة موقف للاتحاد الأوروبي', he: 'מסמך עמדה של האיחוד האירופי', ja: 'EUポジションペーパー', ko: 'EU 입장 문서', zh: '欧盟立场文件', - }, - plural: { - en: 'EU Position Papers', sv: 'Faktapromemorior', da: 'EU-faktanotaer', no: 'EU-faktanotater', - fi: 'EU-tietomuistiot', de: 'EU-Positionspapiere', fr: 'Notes de position UE', es: 'Documentos de posición de la UE', - nl: 'EU-positiepapers', ar: 'أوراق موقف للاتحاد الأوروبي', he: 'מסמכי עמדה של האיחוד האירופי', ja: 'EUポジションペーパー', ko: 'EU 입장 문서', zh: '欧盟立场文件', - }, - }, - pressm: { - singular: { - en: 'Press Release', sv: 'Pressmeddelande', da: 'Pressemeddelelse', no: 'Pressemelding', - fi: 'Lehdistötiedote', de: 'Pressemitteilung', fr: 'Communiqué de presse', es: 'Comunicado de prensa', - nl: 'Persbericht', ar: 'بيان صحفي', he: 'הודעה לעיתונות', ja: 'プレスリリース', ko: '보도자료', zh: '新闻稿', - }, - plural: { - en: 'Press Releases', sv: 'Pressmeddelanden', da: 'Pressemeddelelser', no: 'Pressemeldinger', - fi: 'Lehdistötiedotteet', de: 'Pressemitteilungen', fr: 'Communiqués de presse', es: 'Comunicados de prensa', - nl: 'Persberichten', ar: 'بيانات صحفية', he: 'הודעות לעיתונות', ja: 'プレスリリース', ko: '보도자료', zh: '新闻稿', - }, - }, - ext: { - singular: { - en: 'External Reference', sv: 'Extern referens', da: 'Ekstern reference', no: 'Ekstern referanse', - fi: 'Ulkoinen viite', de: 'Externe Referenz', fr: 'Référence externe', es: 'Referencia externa', - nl: 'Externe referentie', ar: 'مرجع خارجي', he: 'הפניה חיצונית', ja: '外部参照', ko: '외부 참조', zh: '外部参考', - }, - plural: { - en: 'External References', sv: 'Externa referenser', da: 'Eksterne referencer', no: 'Eksterne referanser', - fi: 'Ulkoiset viitteet', de: 'Externe Referenzen', fr: 'Références externes', es: 'Referencias externas', - nl: 'Externe referenties', ar: 'مراجع خارجية', he: 'הפניות חיצוניות', ja: '外部参照', ko: '외부 참조', zh: '外部参考', - }, - }, - other: { - singular: { - en: 'Other Document', sv: 'Övrigt dokument', da: 'Andet dokument', no: 'Annet dokument', - fi: 'Muu asiakirja', de: 'Sonstiges Dokument', fr: 'Autre document', es: 'Otro documento', - nl: 'Overig document', ar: 'مستند آخر', he: 'מסמך אחר', ja: 'その他の文書', ko: '기타 문서', zh: '其他文件', - }, - plural: { - en: 'Other Documents', sv: 'Övriga dokument', da: 'Andre dokumenter', no: 'Andre dokumenter', - fi: 'Muut asiakirjat', de: 'Sonstige Dokumente', fr: 'Autres documents', es: 'Otros documentos', - nl: 'Overige documenten', ar: 'مستندات أخرى', he: 'מסמכים אחרים', ja: 'その他の文書', ko: '기타 문서', zh: '其他文件', - }, - }, -}; - -export function localizeDocType(code: string, lang: Language | string, count?: number): string { - const entry = DOC_TYPE_DISPLAY[code]; - if (!entry) return code; - const usePlural = count !== 1; - const primary = usePlural ? entry.plural : entry.singular; - const fallback = usePlural ? entry.singular : entry.plural; - return primary[lang as Language] ?? primary.en ?? fallback[lang as Language] ?? fallback.en ?? code; -} - -/** Per-language title-suffix templates for inverted-pyramid lede construction. */ -export const TITLE_SUFFIX_TEMPLATES: Readonly string>> = { - sv: t => ` — inklusive "${t}"`, - da: t => ` — herunder "${t}"`, - no: t => ` — inkludert "${t}"`, - fi: t => ` — mukaan lukien "${t}"`, - de: t => ` — darunter "${t}"`, - fr: t => ` — notamment "${t}"`, - es: t => ` — incluyendo "${t}"`, - nl: t => ` — inclusief "${t}"`, - ar: t => ` — بما فيها "${t}"`, - he: t => ` — כולל "${t}"`, - ja: t => `、「${t}」を含む`, - ko: t => `, "${t}" 포함`, - zh: t => `,包括"${t}"`, -}; - -/** Extract meaningful keywords from text for cross-reference matching (min 2 chars, captures EU, KU, etc.; splits on whitespace, hyphens, and commas) */ -function extractKeywords(text: string): string[] { - return text.toLowerCase().split(/[\s,–-]+/u).filter(w => w.length >= 2); -} - -/** Find documents related to a calendar event by organ match or keyword overlap (max 3) */ -export function findRelatedDocuments(event: RawCalendarEvent, documents: RawDocument[]): RawDocument[] { - const eventOrgan = event.organ ?? ''; - const keywords = extractKeywords(event.rubrik ?? event.titel ?? event.title ?? ''); - return documents.filter(doc => { - const docOrgan = doc.organ ?? doc.committee ?? ''; - if (eventOrgan && docOrgan && eventOrgan.toLowerCase() === docOrgan.toLowerCase()) return true; - const docText = (doc.titel ?? doc.title ?? '').toLowerCase(); - return keywords.some(kw => docText.includes(kw)); - }).slice(0, 3); -} - -/** Find written questions related to a calendar event by keyword overlap (max 3) */ -export function findRelatedQuestions(event: RawCalendarEvent, questions: RawDocument[]): RawDocument[] { - const keywords = extractKeywords(event.rubrik ?? event.titel ?? event.title ?? ''); - return questions.filter(q => { - const qText = (q.titel ?? q.title ?? '').toLowerCase(); - return keywords.some(kw => qText.includes(kw)); - }).slice(0, 3); -} - -/** Extract targeted minister name from interpellation summary "till MINISTER" header line. - * Strips trailing topic clauses ("om X", "angående Y", etc.) and punctuation. */ -export function extractMinister(summary: string): string { - // Use non-newline whitespace ([^\S\n]+) so we don't cross into the next line - const m = summary.match(/\btill[^\S\n]+([^\n]+)/i); - if (!m) return ''; - const raw = m[1].trim(); - if (!raw) return ''; - - // Remove common trailing topic clauses and punctuation - const lowerRaw = raw.toLowerCase(); - const stopPhrases = [' om ', ' angående ', ' rörande ', ' beträffande ']; - let end = raw.length; - for (const phrase of stopPhrases) { - const idx = lowerRaw.indexOf(phrase); - if (idx !== -1 && idx < end) end = idx; - } - // Cut at terminating punctuation if it comes earlier - const punctIdx = raw.search(/[?:;.,]/); - if (punctIdx !== -1 && punctIdx < end) end = punctIdx; - - return raw.slice(0, end).trim(); -} +// ── Re-exports from sub-modules (backward-compatible) ────────────────────── +export type { DocTypeLocalization } from './doc-type-helpers.js'; +export { DOC_TYPE_DISPLAY, localizeDocType, TITLE_SUFFIX_TEMPLATES } from './doc-type-helpers.js'; +export { findRelatedDocuments, findRelatedQuestions, extractMinister } from './event-helpers.js'; +export { detectBannedPatterns } from './ai-marker-helpers.js'; + +// ── Sub-module imports used by the deep analysis section ─────────────────── +import { generateImpactAnalysis, generateConsequencesAnalysis } from './impact-helpers.js'; +import { + type DocumentAnalysis, type BatchAnalysisResult, type RiskAssessment, + analyzeDocumentsBatch, analyzeDocumentsPerspectives, + MAX_PERSPECTIVE_INSIGHTS, + renderAggregatedPestle, renderStakeholderImpactSummary, + renderRiskAssessment, renderImplementationAssessment, +} from './framework-renderers.js'; +import { localizeDocType } from './doc-type-helpers.js'; // --------------------------------------------------------------------------- // Deep Analysis Section (5W Framework) @@ -339,6 +107,10 @@ export function analyzeDocumentsForContent( return { frameworkAnalysis, perspectiveAnalysis }; } +// --------------------------------------------------------------------------- +// Deep Analysis section private helpers +// --------------------------------------------------------------------------- + /** * Extract unique party names from a set of documents for "Who" analysis. */ @@ -453,150 +225,6 @@ function coalitionRiskText(lang: Language | string, margin: number): string { return templates[lang as string] ?? templates.en; } -/** - * Generate a comprehensive Deep Analysis section following the 5W framework - * (Who, What, When, Why, Winners/Losers) plus impact, consequences, and critical - * assessment subsections. This section is designed for highly analytical readers - * who seek multi-perspective intelligence on parliamentary developments. - * - * @returns HTML string for the deep analysis section, or empty string if insufficient data - */ -export function generateDeepAnalysisSection(opts: DeepAnalysisOptions): string { - const { documents, lang, cia, articleType, whyContext, frameworkAnalysis, perspectiveAnalysis } = opts; - - // Deep analysis requires at least 2 documents for cross-document insights - // in standard article types. For deep-inspection articles, allow single- - // document analysis since the whole article is dedicated to in-depth review. - const minDocs = articleType === 'deep-inspection' ? 1 : 2; - if (!documents || documents.length < minDocs) return ''; - - const lbl = (key: string): string => { - const val = L(lang, key); - return typeof val === 'string' ? val : key; - }; - - const parts: string[] = []; - parts.push(`\n
    `); - parts.push(`

    ${escapeHtml(lbl('deepAnalysis'))}

    `); - - // ── WHO: Key Actors ──────────────────────────────────────────────────────── - const { parties, authors } = extractKeyActors(documents); - if (parties.size > 0 || authors.length > 0) { - parts.push(`

    ${escapeHtml(lbl('deepAnalysisWho'))}

    `); - if (parties.size > 0) { - const sortedParties = [...parties.entries()].sort((a, b) => b[1] - a[1]); - const partyList = sortedParties - .map(([p, count]) => `${escapeHtml(p)} (${count})`) - .join(', '); - parts.push(`

    ${partyList}

    `); - } - if (authors.length > 0) { - parts.push(`

    ${authors.map(a => escapeHtml(a)).join(', ')}

    `); - } - } - - // ── WHAT: What Happened ──────────────────────────────────────────────────── - const domains = aggregateDomains(documents, lang); - if (domains.size > 0) { - parts.push(`

    ${escapeHtml(lbl('deepAnalysisWhat'))}

    `); - const sortedDomains = [...domains.entries()].sort((a, b) => b[1] - a[1]); - const domainItems = sortedDomains.slice(0, 6) - .map(([d, c]) => `${escapeHtml(d)} (${c})`) - .join(', '); - const docTypes = new Map(); - for (const doc of documents) { - const t = doc.doktyp || doc.documentType || 'other'; - docTypes.set(t, (docTypes.get(t) ?? 0) + 1); - } - const typeList = [...docTypes.entries()] - .sort((a, b) => b[1] - a[1]) - .map(([t, c]) => `${escapeHtml(localizeDocType(t, lang, c))}: ${c}`) - .join(', '); - parts.push(`

    ${domainItems}

    `); - parts.push(`

    ${typeList}

    `); - } - - // ── WHEN: Timeline & Context ─────────────────────────────────────────────── - parts.push(`

    ${escapeHtml(lbl('deepAnalysisWhen'))}

    `); - const timelineContext = generateTimelineContext(documents, lang, articleType); - parts.push(`

    ${timelineContext}

    `); - - // ── WHY: Why This Matters ────────────────────────────────────────────────── - parts.push(`

    ${escapeHtml(lbl('deepAnalysisWhy'))}

    `); - const whyText = generateWhyAnalysis(documents, lang, cia, whyContext); - parts.push(`

    ${whyText}

    `); - - // ── WINNERS & LOSERS ─────────────────────────────────────────────────────── - parts.push(`

    ${escapeHtml(lbl('deepAnalysisWinners'))}

    `); - const winnersText = analyseWinnersLosers(documents, cia, lang); - parts.push(`

    ${winnersText}

    `); - - // ── POLITICAL IMPACT ─────────────────────────────────────────────────────── - parts.push(`

    ${escapeHtml(lbl('deepAnalysisImpact'))}

    `); - const impactText = generateImpactAnalysis(documents, lang, cia); - parts.push(`

    ${impactText}

    `); - - // ── ACTIONS & CONSEQUENCES ───────────────────────────────────────────────── - parts.push(`

    ${escapeHtml(lbl('deepAnalysisConsequences'))}

    `); - const consequencesText = generateConsequencesAnalysis(documents, lang, articleType); - parts.push(`

    ${consequencesText}

    `); - - // ── CRITICAL ASSESSMENT ──────────────────────────────────────────────────── - parts.push(`

    ${escapeHtml(lbl('deepAnalysisCritical'))}

    `); - const criticalText = generateCriticalAssessment(documents, lang, cia); - parts.push(`

    ${criticalText}

    `); - - // ── MULTIPLE PERSPECTIVES ────────────────────────────────────────────────── - if (parties.size >= 2) { - parts.push(`

    ${escapeHtml(lbl('deepAnalysisPerspectives'))}

    `); - const perspectivesText = generatePerspectivesAnalysis(documents, lang, parties); - parts.push(`

    ${perspectivesText}

    `); - } - - // ── FRAMEWORK ANALYSIS SECTIONS ──────────────────────────────────────────── - // When the document analysis framework has been run, inject its richer - // PESTLE, stakeholder impact, risk, and implementation assessment data. - if (frameworkAnalysis && frameworkAnalysis.size > 0) { - const analyses = [...frameworkAnalysis.values()]; - - // PESTLE Analysis — aggregate across all analysed documents - parts.push(`

    ${escapeHtml(lbl('deepAnalysisPestle'))}

    `); - parts.push(renderAggregatedPestle(analyses, lang)); - - // Stakeholder Impact — summarise stakeholder impacts from the framework - parts.push(`

    ${escapeHtml(lbl('deepAnalysisStakeholderImpact'))}

    `); - parts.push(renderStakeholderImpactSummary(analyses, lang)); - - // Risk Assessment — aggregate risk factors across documents - const allRisks = analyses.flatMap(a => a.riskAssessment); - if (allRisks.length > 0) { - parts.push(`

    ${escapeHtml(lbl('deepAnalysisRisk'))}

    `); - parts.push(renderRiskAssessment(allRisks, lang)); - } - - // Implementation Assessment — summarise implementation feasibility - parts.push(`

    ${escapeHtml(lbl('deepAnalysisImplementation'))}

    `); - parts.push(renderImplementationAssessment(analyses, lang)); - } - - // ── MULTI-PERSPECTIVE INSIGHTS (6 lenses) ──────────────────────────────── - // When the analysis-framework has been run, inject key insights from the - // government, opposition, citizen, economic, international, and media lenses. - if (perspectiveAnalysis && perspectiveAnalysis.results.length > 0) { - const allInsights = perspectiveAnalysis.results.flatMap((r: unknown) => ((r as { keyInsights?: string[] }).keyInsights ?? [])); - if (allInsights.length > 0) { - const uniqueInsights = [...new Set(allInsights)].slice(0, MAX_PERSPECTIVE_INSIGHTS); - parts.push(`
    `); - const insightItems = uniqueInsights.map(i => `
  • ${escapeHtml(i)}
  • `).join('\n'); - parts.push(`
      \n${insightItems}\n
    `); - parts.push(`
    `); - } - } - - parts.push('
    \n'); - return parts.join('\n'); -} - // --------------------------------------------------------------------------- // Deep Analysis subsection generators // --------------------------------------------------------------------------- @@ -671,82 +299,6 @@ function defaultWhyText(_lang: Language | string): string { return ''; } -function generateImpactAnalysis(docs: RawDocument[], lang: Language | string, cia: CIAContext | undefined): string { - const parts: string[] = []; - - const propCount = docs.filter(d => d.doktyp === 'prop').length; - const motCount = docs.filter(d => d.doktyp === 'mot').length; - const betCount = docs.filter(d => d.doktyp === 'bet').length; - - if (propCount > 0) { - parts.push(propImpactText(lang, propCount)); - } - if (betCount > 0) { - parts.push(betImpactText(lang, betCount)); - } - if (motCount > 0) { - parts.push(motImpactText(lang, motCount)); - } - - if (cia) { - const margin = cia.coalitionStability?.majorityMargin ?? 0; - if (margin <= 5) { - parts.push(thinMajorityImpactText(lang, margin)); - } - } - - return parts.join(' ') || genericImpactText(lang); -} - -function propImpactText(_lang: Language | string, n: number): string { - return ``; -} - -function betImpactText(_lang: Language | string, n: number): string { - return ``; -} - -function motImpactText(_lang: Language | string, n: number): string { - return ``; -} - -function thinMajorityImpactText(_lang: Language | string, margin: number): string { - return ``; -} - -function genericImpactText(_lang: Language | string): string { - return ''; -} - -function generateConsequencesAnalysis(docs: RawDocument[], lang: Language | string, _articleType: string): string { - const propCount = docs.filter(d => d.doktyp === 'prop').length; - const motCount = docs.filter(d => d.doktyp === 'mot').length; - const parts: string[] = []; - - if (propCount > 0) { - parts.push(propConsequencesText(lang, propCount)); - } - if (motCount > 0) { - parts.push(motConsequencesText(lang, motCount)); - } - if (parts.length === 0) { - parts.push(genericConsequencesText(lang)); - } - return parts.join(' '); -} - -function propConsequencesText(_lang: Language | string, n: number): string { - return ``; -} - -function motConsequencesText(_lang: Language | string, n: number): string { - return ``; -} - -function genericConsequencesText(_lang: Language | string): string { - return ''; -} - function generateCriticalAssessment(docs: RawDocument[], lang: Language | string, cia: CIAContext | undefined): string { const parts: string[] = []; @@ -827,299 +379,149 @@ function generatePerspectivesAnalysis(docs: RawDocument[], lang: Language | stri } // --------------------------------------------------------------------------- -// Framework analysis section renderers +// Main deep analysis section generator // --------------------------------------------------------------------------- -/** Max items per PESTLE dimension in aggregated display */ -const MAX_PESTLE_ITEMS = 4; -/** Max stakeholder impacts shown in the summary list */ -const MAX_STAKEHOLDER_IMPACTS = 7; -/** Max risk items shown in the risk assessment summary */ -const MAX_RISK_ITEMS = 5; -/** Max perspective insights shown from 6-lens analysis */ -const MAX_PERSPECTIVE_INSIGHTS = 5; -/** Max implementation obstacles listed */ -const MAX_IMPLEMENTATION_OBSTACLES = 4; -/** Max agencies displayed in implementation assessment */ -const MAX_AGENCIES_DISPLAYED = 5; - -const PESTLE_LABELS: Readonly>> = { - en: { political: 'Political', economic: 'Economic', social: 'Social', technological: 'Technological', legal: 'Legal', environmental: 'Environmental' }, - sv: { political: 'Politisk', economic: 'Ekonomisk', social: 'Social', technological: 'Teknologisk', legal: 'Juridisk', environmental: 'Miljö' }, - da: { political: 'Politisk', economic: 'Økonomisk', social: 'Social', technological: 'Teknologisk', legal: 'Juridisk', environmental: 'Miljø' }, - no: { political: 'Politisk', economic: 'Økonomisk', social: 'Sosial', technological: 'Teknologisk', legal: 'Juridisk', environmental: 'Miljø' }, - fi: { political: 'Poliittinen', economic: 'Taloudellinen', social: 'Sosiaalinen', technological: 'Teknologinen', legal: 'Oikeudellinen', environmental: 'Ympäristö' }, - de: { political: 'Politisch', economic: 'Wirtschaftlich', social: 'Sozial', technological: 'Technologisch', legal: 'Rechtlich', environmental: 'Umwelt' }, - fr: { political: 'Politique', economic: 'Économique', social: 'Social', technological: 'Technologique', legal: 'Juridique', environmental: 'Environnemental' }, - es: { political: 'Político', economic: 'Económico', social: 'Social', technological: 'Tecnológico', legal: 'Jurídico', environmental: 'Ambiental' }, - nl: { political: 'Politiek', economic: 'Economisch', social: 'Sociaal', technological: 'Technologisch', legal: 'Juridisch', environmental: 'Milieu' }, - ar: { political: 'سياسي', economic: 'اقتصادي', social: 'اجتماعي', technological: 'تقني', legal: 'قانوني', environmental: 'بيئي' }, - he: { political: 'פוליטי', economic: 'כלכלי', social: 'חברתי', technological: 'טכנולוגי', legal: 'משפטי', environmental: 'סביבתי' }, - ja: { political: '政治', economic: '経済', social: '社会', technological: '技術', legal: '法的', environmental: '環境' }, - ko: { political: '정치', economic: '경제', social: '사회', technological: '기술', legal: '법률', environmental: '환경' }, - zh: { political: '政治', economic: '经济', social: '社会', technological: '技术', legal: '法律', environmental: '环境' }, -}; - -const RISK_TYPE_LABELS: Readonly>> = { - en: { political: 'Political', implementation: 'Implementation', 'public-acceptance': 'Public acceptance', legal: 'Legal', financial: 'Financial' }, - sv: { political: 'Politisk', implementation: 'Genomförande', 'public-acceptance': 'Offentlig acceptans', legal: 'Juridisk', financial: 'Finansiell' }, - da: { political: 'Politisk', implementation: 'Implementering', 'public-acceptance': 'Offentlig accept', legal: 'Juridisk', financial: 'Finansiel' }, - no: { political: 'Politisk', implementation: 'Implementering', 'public-acceptance': 'Offentlig aksept', legal: 'Juridisk', financial: 'Finansiell' }, - fi: { political: 'Poliittinen', implementation: 'Toteutus', 'public-acceptance': 'Julkinen hyväksyntä', legal: 'Oikeudellinen', financial: 'Taloudellinen' }, - de: { political: 'Politisch', implementation: 'Umsetzung', 'public-acceptance': 'Öffentliche Akzeptanz', legal: 'Rechtlich', financial: 'Finanziell' }, - fr: { political: 'Politique', implementation: 'Mise en œuvre', 'public-acceptance': 'Acceptation publique', legal: 'Juridique', financial: 'Financier' }, - es: { political: 'Político', implementation: 'Implementación', 'public-acceptance': 'Aceptación pública', legal: 'Jurídico', financial: 'Financiero' }, - nl: { political: 'Politiek', implementation: 'Implementatie', 'public-acceptance': 'Publieke acceptatie', legal: 'Juridisch', financial: 'Financieel' }, - ar: { political: 'سياسي', implementation: 'تنفيذي', 'public-acceptance': 'القبول العام', legal: 'قانوني', financial: 'مالي' }, - he: { political: 'פוליטי', implementation: 'יישום', 'public-acceptance': 'קבלה ציבורית', legal: 'משפטי', financial: 'פיננסי' }, - ja: { political: '政治', implementation: '実装', 'public-acceptance': '世論受容', legal: '法的', financial: '財政' }, - ko: { political: '정치', implementation: '이행', 'public-acceptance': '대중 수용성', legal: '법률', financial: '재정' }, - zh: { political: '政治', implementation: '实施', 'public-acceptance': '公众接受度', legal: '法律', financial: '财政' }, -}; - -const LEVEL_LABELS: Readonly>> = { - en: { high: 'High', medium: 'Medium', low: 'Low' }, - sv: { high: 'Hög', medium: 'Medel', low: 'Låg' }, - da: { high: 'Høj', medium: 'Mellem', low: 'Lav' }, - no: { high: 'Høy', medium: 'Middels', low: 'Lav' }, - fi: { high: 'Korkea', medium: 'Keskitaso', low: 'Matala' }, - de: { high: 'Hoch', medium: 'Mittel', low: 'Niedrig' }, - fr: { high: 'Élevé', medium: 'Moyen', low: 'Faible' }, - es: { high: 'Alto', medium: 'Medio', low: 'Bajo' }, - nl: { high: 'Hoog', medium: 'Middel', low: 'Laag' }, - ar: { high: 'مرتفع', medium: 'متوسط', low: 'منخفض' }, - he: { high: 'גבוה', medium: 'בינוני', low: 'נמוך' }, - ja: { high: '高', medium: '中', low: '低' }, - ko: { high: '높음', medium: '보통', low: '낮음' }, - zh: { high: '高', medium: '中', low: '低' }, -}; - -const IMPLEMENTATION_LABELS: Readonly> = { - en: { feasibility: 'Feasibility', obstacles: 'Key obstacles', agencies: 'Agencies involved', noStakeholderData: 'No stakeholder impact data available.', noImplementationData: 'No implementation data available.', burden: 'Burden' }, - sv: { feasibility: 'Genomförbarhet', obstacles: 'Viktiga hinder', agencies: 'Berörda myndigheter', noStakeholderData: 'Ingen data om intressentpåverkan tillgänglig.', noImplementationData: 'Ingen implementeringsdata tillgänglig.', burden: 'Belastning' }, - da: { feasibility: 'Gennemførlighed', obstacles: 'Vigtige hindringer', agencies: 'Involverede myndigheder', noStakeholderData: 'Ingen data om interessentpåvirkning tilgængelig.', noImplementationData: 'Ingen implementeringsdata tilgængelig.', burden: 'Byrde' }, - no: { feasibility: 'Gjennomførbarhet', obstacles: 'Viktige hindringer', agencies: 'Involverte etater', noStakeholderData: 'Ingen data om interessentpåvirkning tilgjengelig.', noImplementationData: 'Ingen implementeringsdata tilgjengelig.', burden: 'Belastning' }, - fi: { feasibility: 'Toteutettavuus', obstacles: 'Keskeiset esteet', agencies: 'Mukana olevat viranomaiset', noStakeholderData: 'Sidosryhmävaikutustietoa ei saatavilla.', noImplementationData: 'Toteutustietoa ei saatavilla.', burden: 'Rasite' }, - de: { feasibility: 'Umsetzbarkeit', obstacles: 'Wesentliche Hindernisse', agencies: 'Beteiligte Behörden', noStakeholderData: 'Keine Daten zu Stakeholder-Auswirkungen verfügbar.', noImplementationData: 'Keine Umsetzungsdaten verfügbar.', burden: 'Belastung' }, - fr: { feasibility: 'Faisabilité', obstacles: 'Obstacles clés', agencies: 'Agences impliquées', noStakeholderData: 'Aucune donnée d’impact des parties prenantes disponible.', noImplementationData: 'Aucune donnée de mise en œuvre disponible.', burden: 'Charge' }, - es: { feasibility: 'Viabilidad', obstacles: 'Obstáculos clave', agencies: 'Organismos implicados', noStakeholderData: 'No hay datos de impacto en partes interesadas.', noImplementationData: 'No hay datos de implementación disponibles.', burden: 'Carga' }, - nl: { feasibility: 'Haalbaarheid', obstacles: 'Belangrijkste obstakels', agencies: 'Betrokken instanties', noStakeholderData: 'Geen gegevens over impact op belanghebbenden beschikbaar.', noImplementationData: 'Geen implementatiegegevens beschikbaar.', burden: 'Last' }, - ar: { feasibility: 'قابلية التنفيذ', obstacles: 'العقبات الرئيسية', agencies: 'الجهات المعنية', noStakeholderData: 'لا تتوفر بيانات تأثير أصحاب المصلحة.', noImplementationData: 'لا تتوفر بيانات تنفيذ.', burden: 'العبء' }, - he: { feasibility: 'ישימות', obstacles: 'חסמים מרכזיים', agencies: 'גורמים מעורבים', noStakeholderData: 'אין נתוני השפעה על בעלי עניין.', noImplementationData: 'אין נתוני יישום.', burden: 'נטל' }, - ja: { feasibility: '実現可能性', obstacles: '主な障害', agencies: '関係機関', noStakeholderData: 'ステークホルダー影響データはありません。', noImplementationData: '実施データはありません。', burden: '負担' }, - ko: { feasibility: '실행 가능성', obstacles: '주요 장애 요인', agencies: '관여 기관', noStakeholderData: '이해관계자 영향 데이터가 없습니다.', noImplementationData: '이행 데이터가 없습니다.', burden: '부담' }, - zh: { feasibility: '可实施性', obstacles: '关键障碍', agencies: '涉及机构', noStakeholderData: '暂无利益相关方影响数据。', noImplementationData: '暂无实施数据。', burden: '负担' }, -}; - -function localizeLevel(level: 'high' | 'medium' | 'low', lang: Language | string): string { - return LEVEL_LABELS[lang as string]?.[level] ?? LEVEL_LABELS.en[level]; -} - -function localizeRiskType(type: RiskAssessment['type'], lang: Language | string): string { - return RISK_TYPE_LABELS[lang as string]?.[type] ?? RISK_TYPE_LABELS.en[type]; -} - -function localizedImplementationLabels(lang: Language | string): { feasibility: string; obstacles: string; agencies: string; noStakeholderData: string; noImplementationData: string; burden: string } { - return IMPLEMENTATION_LABELS[lang as string] ?? IMPLEMENTATION_LABELS.en; -} - /** - * Aggregate PESTLE dimensions across multiple document analyses into a - * deduplicated list per dimension and render as an HTML description list. + * Generate a comprehensive Deep Analysis section following the 5W framework + * (Who, What, When, Why, Winners/Losers) plus impact, consequences, and critical + * assessment subsections. This section is designed for highly analytical readers + * who seek multi-perspective intelligence on parliamentary developments. + * + * @returns HTML string for the deep analysis section, or empty string if insufficient data */ -function renderAggregatedPestle(analyses: DocumentAnalysis[], lang: Language | string): string { - const merged: PESTLEAnalysis = { - political: [], economic: [], social: [], - technological: [], legal: [], environmental: [], - }; +export function generateDeepAnalysisSection(opts: DeepAnalysisOptions): string { + const { documents, lang, cia, articleType, whyContext, frameworkAnalysis, perspectiveAnalysis } = opts; - for (const a of analyses) { - const p = a.pestleDimensions; - merged.political.push(...p.political); - merged.economic.push(...p.economic); - merged.social.push(...p.social); - merged.technological.push(...p.technological); - merged.legal.push(...p.legal); - merged.environmental.push(...p.environmental); - } + // Deep analysis requires at least 2 documents for cross-document insights + // in standard article types. For deep-inspection articles, allow single- + // document analysis since the whole article is dedicated to in-depth review. + const minDocs = articleType === 'deep-inspection' ? 1 : 2; + if (!documents || documents.length < minDocs) return ''; - // Deduplicate per dimension - const dedup = (arr: string[]): string[] => [...new Set(arr)].slice(0, MAX_PESTLE_ITEMS); - - const labels = PESTLE_LABELS[lang as string] ?? PESTLE_LABELS.en; - const dims: Array<[string, string[]]> = [ - [labels.political, dedup(merged.political)], - [labels.economic, dedup(merged.economic)], - [labels.social, dedup(merged.social)], - [labels.technological, dedup(merged.technological)], - [labels.legal, dedup(merged.legal)], - [labels.environmental, dedup(merged.environmental)], - ]; - - const items = dims - .filter(([, items]) => items.length > 0) - .map(([label, items]) => - `
    ${escapeHtml(label)}
    \n
    ${items.map(i => escapeHtml(i)).join(' ')}
    `, - ) - .join('\n'); - - return `
    \n${items}\n
    `; -} + const lbl = (key: string): string => { + const val = L(lang, key); + return typeof val === 'string' ? val : key; + }; -/** - * Render a summary of stakeholder impacts across all analysed documents. - * Shows up to 7 stakeholder groups with impact direction, confidence, and burden. - */ -function renderStakeholderImpactSummary(analyses: DocumentAnalysis[], lang: Language | string): string { - const labels = localizedImplementationLabels(lang); - // Collect all stakeholder impacts, deduplicated by stakeholder name - const impactMap = new Map(); - for (const a of analyses) { - for (const impact of a.stakeholderImpacts) { - // Keep the higher-magnitude impact per stakeholder - const existing = impactMap.get(impact.stakeholder); - if (!existing || magnitudeRank(impact.directImpact.magnitude) > magnitudeRank(existing.directImpact.magnitude)) { - impactMap.set(impact.stakeholder, impact); - } + const parts: string[] = []; + parts.push(`\n
    `); + parts.push(`

    ${escapeHtml(lbl('deepAnalysis'))}

    `); + + // ── WHO: Key Actors ──────────────────────────────────────────────────────── + const { parties, authors } = extractKeyActors(documents); + if (parties.size > 0 || authors.length > 0) { + parts.push(`

    ${escapeHtml(lbl('deepAnalysisWho'))}

    `); + if (parties.size > 0) { + const sortedParties = [...parties.entries()].sort((a, b) => b[1] - a[1]); + const partyList = sortedParties + .map(([p, count]) => `${escapeHtml(p)} (${count})`) + .join(', '); + parts.push(`

    ${partyList}

    `); + } + if (authors.length > 0) { + parts.push(`

    ${authors.map(a => escapeHtml(a)).join(', ')}

    `); } } - const impacts = [...impactMap.values()].slice(0, MAX_STAKEHOLDER_IMPACTS); - if (impacts.length === 0) return `

    ${escapeHtml(labels.noStakeholderData)}

    `; - - const rows = impacts.map(i => { - const directionIcon = - i.directImpact.direction === 'positive' ? '↑' - : i.directImpact.direction === 'negative' ? '↓' - : i.directImpact.direction === 'mixed' ? '↕' - : '→'; - const burdenText = localizeLevel(i.implementationBurden, lang); - return `
  • ${escapeHtml(i.displayName)}: ${directionIcon} ${escapeHtml(i.directImpact.summary)} (${escapeHtml(i.confidence)}; ${escapeHtml(labels.burden)}: ${escapeHtml(burdenText)})
  • `; - }); - - return `
      \n${rows.join('\n')}\n
    `; -} - -/** - * Render a risk assessment summary. Groups risks by type and keeps the - * highest-severity risk per type. - */ -function renderRiskAssessment(risks: RiskAssessment[], lang: Language | string): string { - // Deduplicate by type, preferring higher severity - const byType = new Map(); - for (const r of risks) { - const key = r.type; - const existing = byType.get(key); - if (!existing || severityRank(r.severity) > severityRank(existing.severity)) { - byType.set(key, r); + // ── WHAT: What Happened ──────────────────────────────────────────────────── + const domains = aggregateDomains(documents, lang); + if (domains.size > 0) { + parts.push(`

    ${escapeHtml(lbl('deepAnalysisWhat'))}

    `); + const sortedDomains = [...domains.entries()].sort((a, b) => b[1] - a[1]); + const domainItems = sortedDomains.slice(0, 6) + .map(([d, c]) => `${escapeHtml(d)} (${c})`) + .join(', '); + const docTypes = new Map(); + for (const doc of documents) { + const t = doc.doktyp || doc.documentType || 'other'; + docTypes.set(t, (docTypes.get(t) ?? 0) + 1); } + const typeList = [...docTypes.entries()] + .sort((a, b) => b[1] - a[1]) + .map(([t, c]) => `${escapeHtml(localizeDocType(t, lang, c))}: ${c}`) + .join(', '); + parts.push(`

    ${domainItems}

    `); + parts.push(`

    ${typeList}

    `); } - const top = [...byType.values()].slice(0, MAX_RISK_ITEMS); - const rows = top.map(r => { - const icon = r.severity === 'high' ? '🔴' : r.severity === 'medium' ? '🟡' : '🟢'; - return `
  • ${icon} ${escapeHtml(localizeRiskType(r.type, lang))} (${escapeHtml(localizeLevel(r.severity, lang))}): ${escapeHtml(r.description)}
  • `; - }); + // ── WHEN: Timeline & Context ─────────────────────────────────────────────── + parts.push(`

    ${escapeHtml(lbl('deepAnalysisWhen'))}

    `); + const timelineContext = generateTimelineContext(documents, lang, articleType); + parts.push(`

    ${timelineContext}

    `); - return `
      \n${rows.join('\n')}\n
    `; -} + // ── WHY: Why This Matters ────────────────────────────────────────────────── + parts.push(`

    ${escapeHtml(lbl('deepAnalysisWhy'))}

    `); + const whyText = generateWhyAnalysis(documents, lang, cia, whyContext); + parts.push(`

    ${whyText}

    `); -function severityRank(s: string): number { - return s === 'high' ? 3 : s === 'medium' ? 2 : 1; -} + // ── WINNERS & LOSERS ─────────────────────────────────────────────────────── + parts.push(`

    ${escapeHtml(lbl('deepAnalysisWinners'))}

    `); + const winnersText = analyseWinnersLosers(documents, cia, lang); + parts.push(`

    ${winnersText}

    `); -function magnitudeRank(magnitude: 'significant' | 'moderate' | 'minor'): number { - return magnitude === 'significant' ? 3 : magnitude === 'moderate' ? 2 : 1; -} + // ── POLITICAL IMPACT ─────────────────────────────────────────────────────── + parts.push(`

    ${escapeHtml(lbl('deepAnalysisImpact'))}

    `); + const impactText = generateImpactAnalysis(documents, lang, cia); + parts.push(`

    ${impactText}

    `); -/** - * Render implementation assessment summary from framework analyses. - */ -function renderImplementationAssessment(analyses: DocumentAnalysis[], lang: Language | string): string { - const labels = localizedImplementationLabels(lang); - const assessments: ImplementationAssessment[] = analyses.map(a => a.implementationAssessment); - if (assessments.length === 0) return `

    ${escapeHtml(labels.noImplementationData)}

    `; - - // Aggregate obstacles and agencies across all documents - const allObstacles = new Set(); - const allAgencies = new Set(); - let highestFeasibility: ImplementationAssessment['feasibility'] = 'high'; - let selectedAssessment: ImplementationAssessment = assessments[0]; - - for (const ia of assessments) { - ia.keyObstacles.forEach(o => allObstacles.add(o)); - ia.agenciesInvolved.forEach(a => allAgencies.add(a)); - if (feasibilityRank(ia.feasibility) < feasibilityRank(highestFeasibility)) { - highestFeasibility = ia.feasibility; - selectedAssessment = ia; - } - } + // ── ACTIONS & CONSEQUENCES ───────────────────────────────────────────────── + parts.push(`

    ${escapeHtml(lbl('deepAnalysisConsequences'))}

    `); + const consequencesText = generateConsequencesAnalysis(documents, lang, articleType); + parts.push(`

    ${consequencesText}

    `); - const parts: string[] = []; - const fIcon = highestFeasibility === 'high' ? '🟢' : highestFeasibility === 'medium' ? '🟡' : '🔴'; - const timeline = selectedAssessment.estimatedTimeline; - parts.push(`

    ${fIcon} ${escapeHtml(labels.feasibility)}: ${escapeHtml(localizeLevel(highestFeasibility, lang))}. ${escapeHtml(timeline)}

    `); + // ── CRITICAL ASSESSMENT ──────────────────────────────────────────────────── + parts.push(`

    ${escapeHtml(lbl('deepAnalysisCritical'))}

    `); + const criticalText = generateCriticalAssessment(documents, lang, cia); + parts.push(`

    ${criticalText}

    `); - if (allObstacles.size > 0) { - const obstacleList = [...allObstacles].slice(0, MAX_IMPLEMENTATION_OBSTACLES).map(o => `
  • ${escapeHtml(o)}
  • `).join(''); - parts.push(`

    ${escapeHtml(labels.obstacles)}:

    \n
      ${obstacleList}
    `); + // ── MULTIPLE PERSPECTIVES ────────────────────────────────────────────────── + if (parties.size >= 2) { + parts.push(`

    ${escapeHtml(lbl('deepAnalysisPerspectives'))}

    `); + const perspectivesText = generatePerspectivesAnalysis(documents, lang, parties); + parts.push(`

    ${perspectivesText}

    `); } - if (allAgencies.size > 0) { - parts.push(`

    ${escapeHtml(labels.agencies)}: ${[...allAgencies].slice(0, MAX_AGENCIES_DISPLAYED).map(a => escapeHtml(a)).join(', ')}

    `); - } + // ── FRAMEWORK ANALYSIS SECTIONS ──────────────────────────────────────────── + // When the document analysis framework has been run, inject its richer + // PESTLE, stakeholder impact, risk, and implementation assessment data. + if (frameworkAnalysis && frameworkAnalysis.size > 0) { + const analyses = [...frameworkAnalysis.values()]; - return parts.join('\n'); -} + // PESTLE Analysis — aggregate across all analysed documents + parts.push(`

    ${escapeHtml(lbl('deepAnalysisPestle'))}

    `); + parts.push(renderAggregatedPestle(analyses, lang)); -function feasibilityRank(f: string): number { - return f === 'high' ? 3 : f === 'medium' ? 2 : 1; -} + // Stakeholder Impact — summarise stakeholder impacts from the framework + parts.push(`

    ${escapeHtml(lbl('deepAnalysisStakeholderImpact'))}

    `); + parts.push(renderStakeholderImpactSummary(analyses, lang)); -/* ── Banned pattern detection ─────────────────────────────────────────────── */ + // Risk Assessment — aggregate risk factors across documents + const allRisks = analyses.flatMap(a => a.riskAssessment as RiskAssessment[]); + if (allRisks.length > 0) { + parts.push(`

    ${escapeHtml(lbl('deepAnalysisRisk'))}

    `); + parts.push(renderRiskAssessment(allRisks, lang)); + } -/** - * Banned content patterns that indicate low-quality boilerplate text. - * Per SHARED_PROMPT_PATTERNS.md §BANNED Content Patterns v4.0, these - * must never appear in production articles. AI agents MUST replace them - * with genuine, document-specific analysis. - */ -const BANNED_PATTERNS: readonly { label: string; pattern: RegExp }[] = [ - { label: 'neutralText: "The political landscape remains fluid…"', pattern: /The political landscape remains fluid,? with both government and opposition positioning for advantage/i }, - { label: 'debateAnalysisMarker: "No chamber debate data is available…"', pattern: /No chamber debate data is available for these items,? limiting our ability/i }, - { label: 'policySignificanceTouches: "Touches on {domains}."', pattern: /Touches on [\p{L}\p{N}][\p{L}\p{N}\s,&/()-]*\./iu }, - { label: 'analysisOfNDocuments: "Analysis of N documents covering…"', pattern: /Analysis of \d+ documents covering/i }, - { label: 'policySignificanceGeneric: "Requires committee review and chamber debate…"', pattern: /Requires committee review and chamber debate/i }, - { label: 'topicInFocusSuffix: "…: {Topic} in Focus"', pattern: /:\s+\w[\w\s]*\bin Focus\b/i }, - { label: 'briefingOnFieldLabels: "Political intelligence briefing on {Field}: and {Field}:"', pattern: /Political intelligence briefing on \w+:\s+and\s+\w+:/i }, - // Deep Analysis generic template patterns — AI MUST replace these with specific analysis - { label: 'genericTimeline: "The pace of activity signals…"', pattern: /The pace of activity signals the political urgency/i }, - { label: 'genericTimeline: "define the current legislative landscape"', pattern: /define the current legislative landscape/i }, - { label: 'genericWhy: "broad legislative push that will shape"', pattern: /broad legislative push that will shape multiple aspects/i }, - { label: 'genericWhy: "critical period for understanding the government"', pattern: /critical period for understanding the government.s strategic direction/i }, - { label: 'genericImpact: "culmination of legislative review, with recommendations that guide"', pattern: /culmination of legislative review,? with recommendations that guide/i }, - { label: 'genericImpact: "interplay between governing ambition and opposition scrutiny"', pattern: /interplay between governing ambition and opposition scrutiny/i }, - { label: 'genericConsequences: "cascade through committee deliberations"', pattern: /cascade through committee deliberations,? chamber votes/i }, - { label: 'genericConsequences: "establish the policy alternatives that opposition parties will champion"', pattern: /establish the policy alternatives that opposition parties will champion/i }, - { label: 'genericCritical: "Standard parliamentary procedures are being followed"', pattern: /Standard parliamentary procedures are being followed/i }, - { label: 'genericCritical: "gap between legislative intent and implementation"', pattern: /gap between legislative intent and implementation often reveals/i }, - { label: 'genericPillarTransition: "While parliament deliberates these legislative matters"', pattern: /While parliament deliberates these legislative matters/i }, -]; + // Implementation Assessment — summarise implementation feasibility + parts.push(`

    ${escapeHtml(lbl('deepAnalysisImplementation'))}

    `); + parts.push(renderImplementationAssessment(analyses, lang)); + } -/** - * Detect banned boilerplate patterns in HTML content. - * Returns an array of human-readable labels identifying each detected - * banned pattern, suitable for quality gate logs and error messages. - * - * @param html - The HTML string to scan for banned patterns - * @returns Array of stable human-readable labels for each detected banned pattern - */ -export function detectBannedPatterns(html: string): string[] { - const found: string[] = []; - for (const { label, pattern } of BANNED_PATTERNS) { - if (pattern.test(html)) { - found.push(label); + // ── MULTI-PERSPECTIVE INSIGHTS (6 lenses) ──────────────────────────────── + // When the analysis-framework has been run, inject key insights from the + // government, opposition, citizen, economic, international, and media lenses. + if (perspectiveAnalysis && perspectiveAnalysis.results.length > 0) { + const allInsights = perspectiveAnalysis.results.flatMap((r: unknown) => ((r as { keyInsights?: string[] }).keyInsights ?? [])); + if (allInsights.length > 0) { + const uniqueInsights = [...new Set(allInsights)].slice(0, MAX_PERSPECTIVE_INSIGHTS); + parts.push(`
    `); + const insightItems = uniqueInsights.map(i => `
  • ${escapeHtml(i)}
  • `).join('\n'); + parts.push(`
      \n${insightItems}\n
    `); + parts.push(`
    `); } } - return found; + + parts.push('
    \n'); + return parts.join('\n'); } diff --git a/scripts/generate-news-enhanced/generators.ts b/scripts/generate-news-enhanced/generators.ts index d29a81365..347e5e0fc 100644 --- a/scripts/generate-news-enhanced/generators.ts +++ b/scripts/generate-news-enhanced/generators.ts @@ -662,143 +662,18 @@ export async function generateInterpellations(): Promise { } } -/** - * Extract a Riksdag document ID (dok_id) from a known URL pattern. - * Supports: - * - https://riksdagen.se/sv/dokument-och-lagar/dokument/{type}/{dok_id}/ - * - https://data.riksdagen.se/dokument/{dok_id} - * - https://data.riksdagen.se/dokument/{dok_id}.json - * - * @returns The extracted dok_id, or null if the URL doesn't match a known pattern. - */ -export function extractDocIdFromUrl(url: string): string | null { - try { - const parsed = new URL(url); - const hostname = parsed.hostname.toLowerCase(); - const segments = parsed.pathname.split('/').filter(Boolean); - - // https://riksdagen.se/sv/dokument-och-lagar/dokument/{type}/{dok_id} - if (hostname === 'riksdagen.se' || hostname === 'www.riksdagen.se') { - // Path: /sv/dokument-och-lagar/dokument/{type}/{dok_id} - const dokIdx = segments.indexOf('dokument'); - if (dokIdx >= 0 && segments.length > dokIdx + 2) { - return segments[dokIdx + 2]; - } - } - - // https://data.riksdagen.se/dokument/{dok_id}[.json|.xml|.html] - if (hostname === 'data.riksdagen.se') { - const dokIdx = segments.indexOf('dokument'); - if (dokIdx >= 0 && segments.length > dokIdx + 1) { - return segments[dokIdx + 1].replace(/\.(json|xml|html|pdf)$/i, ''); // strip known file extensions - } - } - - return null; - } catch { - return null; - } -} - -/** - * Determine whether a URL points to a government (regeringen.se) resource - * that can be fetched via the get_g0v_document_content MCP tool. - */ -export function isGovernmentUrl(url: string): boolean { - try { - const parsed = new URL(url); - const hostname = parsed.hostname.toLowerCase(); - return hostname === 'regeringen.se' || hostname === 'www.regeringen.se'; - } catch { - return false; - } -} - -/** - * Determine whether a URL points to a GitHub repository resource - * (github.com or raw.githubusercontent.com) that can be fetched as raw content. - */ -export function isGitHubUrl(url: string): boolean { - try { - const parsed = new URL(url); - const hostname = parsed.hostname.toLowerCase(); - return hostname === 'github.com' - || hostname === 'www.github.com' - || hostname === 'raw.githubusercontent.com'; - } catch { - return false; - } -} - -/** - * Convert a GitHub blob/tree URL to a raw.githubusercontent.com URL. - * Handles patterns like: - * - https://github.com/{owner}/{repo}/blob/{branch}/{path} - * - https://github.com/{owner}/{repo}/raw/{branch}/{path} - * - https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{path} (returned as-is) - * - * @returns The raw URL, or null if the URL cannot be converted. - */ -export function toGitHubRawUrl(url: string): string | null { - try { - const parsed = new URL(url); - const hostname = parsed.hostname.toLowerCase(); - - // Already a raw URL — return as-is - if (hostname === 'raw.githubusercontent.com') { - return url; - } - - if (hostname !== 'github.com' && hostname !== 'www.github.com') { - return null; - } - - // Path: /{owner}/{repo}/blob/{branch}/{...path} - // or: /{owner}/{repo}/raw/{branch}/{...path} - const segments = parsed.pathname.split('/').filter(Boolean); - if (segments.length < 4) return null; - - const [owner, repo, refType, ...rest] = segments; - if (refType !== 'blob' && refType !== 'raw') return null; - - // rest = [branch, ...pathParts] - return `https://raw.githubusercontent.com/${owner}/${repo}/${rest.join('/')}`; - } catch { - return null; - } -} - -/** - * Compute a short, deterministic hash suffix from a URL path string. - * Used to generate collision-resistant `dok_id` values for documents - * fetched from government or GitHub URLs. - * - * The hash is a simple DJB2-style left-shift-and-add over each character, - * rendered in base-36. A leading `-` (from negative ints) is replaced with `n`. - */ -export function hashPathSuffix(path: string): string { - return path - .split('') - .reduce((a, c) => ((a << 5) - a + c.charCodeAt(0)) | 0, 0) - .toString(36) - .replace(/^-/, 'n'); -} - -/** - * Strip HTML tags from a user-supplied string to prevent XSS. - * Uses a multi-pass loop to handle nested tag reconstruction attempts - * (e.g. `ipt>`). Returns **plain text** — callers must - * apply `escapeHtml()` at their render sites so escaping happens exactly once. - */ -export function sanitizePlainText(text: string): string { - let cleaned = text; - let prev: string; - do { - prev = cleaned; - cleaned = cleaned.replace(/<[^>]*>/g, ''); - } while (cleaned !== prev); - return cleaned; -} +// --------------------------------------------------------------------------- +// URL & text utilities — implementation extracted to url-utils.ts +// Re-exported here for backward compatibility. +// --------------------------------------------------------------------------- +export { + extractDocIdFromUrl, + isGovernmentUrl, + isGitHubUrl, + toGitHubRawUrl, + hashPathSuffix, + sanitizePlainText, +} from './url-utils.js'; // --------------------------------------------------------------------------- // Deep-Inspection content generator (topic-focused, comprehensive) diff --git a/scripts/generate-news-enhanced/url-utils.ts b/scripts/generate-news-enhanced/url-utils.ts new file mode 100644 index 000000000..f864fecf0 --- /dev/null +++ b/scripts/generate-news-enhanced/url-utils.ts @@ -0,0 +1,145 @@ +/** + * @module generate-news-enhanced/url-utils + * @description URL parsing and text sanitization utilities for the + * deep-inspection article generator. Provides Riksdag/government URL + * extraction, GitHub raw URL conversion, and XSS-safe text cleaning. + * + * @author Hack23 AB + * @license Apache-2.0 + */ + +/** + * Extract a `dok_id` from a Riksdag or data.riksdagen.se document URL. + * Returns `null` if the URL is not a recognised Riksdag document URL. + * + * Supported patterns: + * - `https://riksdagen.se/sv/dokument-och-lagar/dokument/{type}/{dok_id}` + * - `https://data.riksdagen.se/dokument/{dok_id}[.json|.xml|.html]` + */ +export function extractDocIdFromUrl(url: string): string | null { + try { + const parsed = new URL(url); + const hostname = parsed.hostname.toLowerCase(); + const segments = parsed.pathname.split('/').filter(Boolean); + + // https://riksdagen.se/sv/dokument-och-lagar/dokument/{type}/{dok_id} + if (hostname === 'riksdagen.se' || hostname === 'www.riksdagen.se') { + const dokIdx = segments.indexOf('dokument'); + if (dokIdx >= 0 && segments.length > dokIdx + 2) { + return segments[dokIdx + 2]; + } + } + + // https://data.riksdagen.se/dokument/{dok_id}[.json|.xml|.html] + if (hostname === 'data.riksdagen.se') { + const dokIdx = segments.indexOf('dokument'); + if (dokIdx >= 0 && segments.length > dokIdx + 1) { + return segments[dokIdx + 1].replace(/\.(json|xml|html|pdf)$/i, ''); // strip known file extensions + } + } + + return null; + } catch { + return null; + } +} + +/** + * Determine whether a URL points to a government (regeringen.se) resource + * that can be fetched via the get_g0v_document_content MCP tool. + */ +export function isGovernmentUrl(url: string): boolean { + try { + const parsed = new URL(url); + const hostname = parsed.hostname.toLowerCase(); + return hostname === 'regeringen.se' || hostname === 'www.regeringen.se'; + } catch { + return false; + } +} + +/** + * Determine whether a URL points to a GitHub repository resource + * (github.com or raw.githubusercontent.com) that can be fetched as raw content. + */ +export function isGitHubUrl(url: string): boolean { + try { + const parsed = new URL(url); + const hostname = parsed.hostname.toLowerCase(); + return hostname === 'github.com' + || hostname === 'www.github.com' + || hostname === 'raw.githubusercontent.com'; + } catch { + return false; + } +} + +/** + * Convert a GitHub blob/tree URL to a raw.githubusercontent.com URL. + * Handles patterns like: + * - https://github.com/{owner}/{repo}/blob/{branch}/{path} + * - https://github.com/{owner}/{repo}/raw/{branch}/{path} + * - https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{path} (returned as-is) + * + * @returns The raw URL, or null if the URL cannot be converted. + */ +export function toGitHubRawUrl(url: string): string | null { + try { + const parsed = new URL(url); + const hostname = parsed.hostname.toLowerCase(); + + // Already a raw URL — return as-is + if (hostname === 'raw.githubusercontent.com') { + return url; + } + + if (hostname !== 'github.com' && hostname !== 'www.github.com') { + return null; + } + + // Path: /{owner}/{repo}/blob/{branch}/{...path} + // or: /{owner}/{repo}/raw/{branch}/{...path} + const segments = parsed.pathname.split('/').filter(Boolean); + if (segments.length < 4) return null; + + const [owner, repo, refType, ...rest] = segments; + if (refType !== 'blob' && refType !== 'raw') return null; + + // rest = [branch, ...pathParts] + return `https://raw.githubusercontent.com/${owner}/${repo}/${rest.join('/')}`; + } catch { + return null; + } +} + +/** + * Compute a short, deterministic hash suffix from a URL path string. + * Used to generate collision-resistant `dok_id` values for documents + * fetched from government or GitHub URLs. + * + * The hash is a simple DJB2-style left-shift-and-add over each character, + * rendered in base-36. A leading `-` (from negative ints) is replaced with `n`. + */ +export function hashPathSuffix(path: string): string { + return path + .split('') + .reduce((a, c) => ((a << 5) - a + c.charCodeAt(0)) | 0, 0) + .toString(36) + .replace(/^-/, 'n'); +} + +/** + * Strip HTML tags from a user-supplied string to prevent XSS. + * Uses a multi-pass loop to handle nested tag reconstruction attempts + * (e.g. `ipt>`). Returns **plain text** — callers must + * apply `escapeHtml()` at their render sites so escaping happens exactly once. + */ +export function sanitizePlainText(text: string): string { + let cleaned = text; + let prev: string; + do { + prev = cleaned; + cleaned = cleaned.replace(/<[^>]*>/g, ''); + } while (cleaned !== prev); + return cleaned; +} From 694112801f662bf358cb78b442d2aa9a30423c4e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 09:37:53 +0000 Subject: [PATCH 3/5] fix: use double quotes for French string containing apostrophe in framework-renderers.ts Co-authored-by: pethers <1726836+pethers@users.noreply.github.com> --- .../data-transformers/content-generators/framework-renderers.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/data-transformers/content-generators/framework-renderers.ts b/scripts/data-transformers/content-generators/framework-renderers.ts index 747b17ed0..00e835ed6 100644 --- a/scripts/data-transformers/content-generators/framework-renderers.ts +++ b/scripts/data-transformers/content-generators/framework-renderers.ts @@ -165,7 +165,7 @@ const IMPLEMENTATION_LABELS: Readonly Date: Fri, 10 Apr 2026 11:34:13 +0000 Subject: [PATCH 4/5] fix: import URL utils locally in generators.ts and fix Korean typo in doc-type-helpers.ts Agent-Logs-Url: https://github.com/Hack23/riksdagsmonitor/sessions/dff320eb-d844-4f76-8ec0-f968ed2780c5 Co-authored-by: pethers <1726836+pethers@users.noreply.github.com> --- .../content-generators/doc-type-helpers.ts | 2 +- scripts/generate-news-enhanced/generators.ts | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/scripts/data-transformers/content-generators/doc-type-helpers.ts b/scripts/data-transformers/content-generators/doc-type-helpers.ts index 056f07fd2..375f7a952 100644 --- a/scripts/data-transformers/content-generators/doc-type-helpers.ts +++ b/scripts/data-transformers/content-generators/doc-type-helpers.ts @@ -111,7 +111,7 @@ export const DOC_TYPE_DISPLAY: Readonly> = { plural: { en: 'External References', sv: 'Externa referenser', da: 'Eksterne referencer', no: 'Eksterne referanser', fi: 'Ulkoiset viitteet', de: 'Externe Referenzen', fr: 'Références externes', es: 'Referencias externas', - nl: 'Externe referenties', ar: 'مراجع خارجية', he: 'הפניות חיצוניות', ja: '外部参照', ko: '외부 참照', zh: '外部参考', + nl: 'Externe referenties', ar: 'مراجع خارجية', he: 'הפניות חיצוניות', ja: '外部参照', ko: '외부 참조', zh: '외부参考', }, }, other: { diff --git a/scripts/generate-news-enhanced/generators.ts b/scripts/generate-news-enhanced/generators.ts index 347e5e0fc..75061a04a 100644 --- a/scripts/generate-news-enhanced/generators.ts +++ b/scripts/generate-news-enhanced/generators.ts @@ -664,9 +664,9 @@ export async function generateInterpellations(): Promise { // --------------------------------------------------------------------------- // URL & text utilities — implementation extracted to url-utils.ts -// Re-exported here for backward compatibility. +// Imported for local use and re-exported for backward compatibility. // --------------------------------------------------------------------------- -export { +import { extractDocIdFromUrl, isGovernmentUrl, isGitHubUrl, @@ -675,6 +675,15 @@ export { sanitizePlainText, } from './url-utils.js'; +export { + extractDocIdFromUrl, + isGovernmentUrl, + isGitHubUrl, + toGitHubRawUrl, + hashPathSuffix, + sanitizePlainText, +}; + // --------------------------------------------------------------------------- // Deep-Inspection content generator (topic-focused, comprehensive) // --------------------------------------------------------------------------- From 1b86b312ae85d70a547fc3458b90c75036a95fc2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 13:15:25 +0000 Subject: [PATCH 5/5] fix: correct zh translation typo and extend toGitHubRawUrl to handle tree URLs Agent-Logs-Url: https://github.com/Hack23/riksdagsmonitor/sessions/4ac9d786-f068-4ebc-9b61-40abb31e209f Co-authored-by: pethers <1726836+pethers@users.noreply.github.com> --- .../content-generators/doc-type-helpers.ts | 2 +- scripts/generate-news-enhanced/url-utils.ts | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/data-transformers/content-generators/doc-type-helpers.ts b/scripts/data-transformers/content-generators/doc-type-helpers.ts index 375f7a952..dcc4f646c 100644 --- a/scripts/data-transformers/content-generators/doc-type-helpers.ts +++ b/scripts/data-transformers/content-generators/doc-type-helpers.ts @@ -111,7 +111,7 @@ export const DOC_TYPE_DISPLAY: Readonly> = { plural: { en: 'External References', sv: 'Externa referenser', da: 'Eksterne referencer', no: 'Eksterne referanser', fi: 'Ulkoiset viitteet', de: 'Externe Referenzen', fr: 'Références externes', es: 'Referencias externas', - nl: 'Externe referenties', ar: 'مراجع خارجية', he: 'הפניות חיצוניות', ja: '外部参照', ko: '외부 참조', zh: '외부参考', + nl: 'Externe referenties', ar: 'مراجع خارجية', he: 'הפניות חיצוניות', ja: '外部参照', ko: '외부 참조', zh: '外部参考', }, }, other: { diff --git a/scripts/generate-news-enhanced/url-utils.ts b/scripts/generate-news-enhanced/url-utils.ts index f864fecf0..7d9aed9c6 100644 --- a/scripts/generate-news-enhanced/url-utils.ts +++ b/scripts/generate-news-enhanced/url-utils.ts @@ -75,9 +75,10 @@ export function isGitHubUrl(url: string): boolean { } /** - * Convert a GitHub blob/tree URL to a raw.githubusercontent.com URL. + * Convert a GitHub blob/tree/raw URL to a raw.githubusercontent.com URL. * Handles patterns like: * - https://github.com/{owner}/{repo}/blob/{branch}/{path} + * - https://github.com/{owner}/{repo}/tree/{branch}/{path} * - https://github.com/{owner}/{repo}/raw/{branch}/{path} * - https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{path} (returned as-is) * @@ -98,12 +99,13 @@ export function toGitHubRawUrl(url: string): string | null { } // Path: /{owner}/{repo}/blob/{branch}/{...path} + // or: /{owner}/{repo}/tree/{branch}/{...path} // or: /{owner}/{repo}/raw/{branch}/{...path} const segments = parsed.pathname.split('/').filter(Boolean); if (segments.length < 4) return null; const [owner, repo, refType, ...rest] = segments; - if (refType !== 'blob' && refType !== 'raw') return null; + if (refType !== 'blob' && refType !== 'raw' && refType !== 'tree') return null; // rest = [branch, ...pathParts] return `https://raw.githubusercontent.com/${owner}/${repo}/${rest.join('/')}`;