Hack23 · pethers · Apr 10, 2026 · Apr 10, 2026 · Apr 10, 2026 · Apr 10, 2026
diff --git a/scripts/data-transformers/content-generators/ai-marker-helpers.ts b/scripts/data-transformers/content-generators/ai-marker-helpers.ts
@@ -0,0 +1,56 @@
+/**
+ * @module data-transformers/content-generators/ai-marker-helpers
+ * @description Banned content pattern detection.
+ * Per SHARED_PROMPT_PATTERNS.md §BANNED Content Patterns v4.0,
+ * these patterns must never appear in production articles.
+ * AI agents MUST replace all AI_MUST_REPLACE markers with genuine analysis.
+ *
+ * @author Hack23 AB
+ * @license Apache-2.0
+ */
+
+/**
+ * Banned content patterns that indicate low-quality boilerplate text.
+ * Per SHARED_PROMPT_PATTERNS.md §BANNED Content Patterns v4.0, these
+ * must never appear in production articles. AI agents MUST replace them
+ * with genuine, document-specific analysis.
+ */
+const BANNED_PATTERNS: readonly { label: string; pattern: RegExp }[] = [
+  { label: 'neutralText: "The political landscape remains fluid…"', pattern: /The political landscape remains fluid,? with both government and opposition positioning for advantage/i },
+  { label: 'debateAnalysisMarker: "No chamber debate data is available…"', pattern: /No chamber debate data is available for these items,? limiting our ability/i },
+  { label: 'policySignificanceTouches: "Touches on {domains}."', pattern: /Touches on [\p{L}\p{N}][\p{L}\p{N}\s,&/()-]*\./iu },
+  { label: 'analysisOfNDocuments: "Analysis of N documents covering…"', pattern: /Analysis of \d+ documents covering/i },
+  { label: 'policySignificanceGeneric: "Requires committee review and chamber debate…"', pattern: /Requires committee review and chamber debate/i },
+  { label: 'topicInFocusSuffix: "…: {Topic} in Focus"', pattern: /:\s+\w[\w\s]*\bin Focus\b/i },
+  { label: 'briefingOnFieldLabels: "Political intelligence briefing on {Field}: and {Field}:"', pattern: /Political intelligence briefing on \w+:\s+and\s+\w+:/i },
+  // Deep Analysis generic template patterns — AI MUST replace these with specific analysis
+  { label: 'genericTimeline: "The pace of activity signals…"', pattern: /The pace of activity signals the political urgency/i },
+  { label: 'genericTimeline: "define the current legislative landscape"', pattern: /define the current legislative landscape/i },
+  { label: 'genericWhy: "broad legislative push that will shape"', pattern: /broad legislative push that will shape multiple aspects/i },
+  { label: 'genericWhy: "critical period for understanding the government"', pattern: /critical period for understanding the government.s strategic direction/i },
+  { label: 'genericImpact: "culmination of legislative review, with recommendations that guide"', pattern: /culmination of legislative review,? with recommendations that guide/i },
+  { label: 'genericImpact: "interplay between governing ambition and opposition scrutiny"', pattern: /interplay between governing ambition and opposition scrutiny/i },
+  { label: 'genericConsequences: "cascade through committee deliberations"', pattern: /cascade through committee deliberations,? chamber votes/i },
+  { label: 'genericConsequences: "establish the policy alternatives that opposition parties will champion"', pattern: /establish the policy alternatives that opposition parties will champion/i },
+  { label: 'genericCritical: "Standard parliamentary procedures are being followed"', pattern: /Standard parliamentary procedures are being followed/i },
+  { label: 'genericCritical: "gap between legislative intent and implementation"', pattern: /gap between legislative intent and implementation often reveals/i },
+  { label: 'genericPillarTransition: "While parliament deliberates these legislative matters"', pattern: /While parliament deliberates these legislative matters/i },
+];
+
+/**
+ * Detect banned boilerplate patterns in HTML content.
+ * Returns an array of human-readable labels identifying each detected
+ * banned pattern, suitable for quality gate logs and error messages.
+ *
+ * @param html - The HTML string to scan for banned patterns
+ * @returns Array of stable human-readable labels for each detected banned pattern
+ */
+export function detectBannedPatterns(html: string): string[] {
+  const found: string[] = [];
+  for (const { label, pattern } of BANNED_PATTERNS) {
+    if (pattern.test(html)) {
+      found.push(label);
+    }
+  }
+  return found;
+}
diff --git a/scripts/data-transformers/content-generators/doc-type-helpers.ts b/scripts/data-transformers/content-generators/doc-type-helpers.ts
@@ -0,0 +1,156 @@
+/**
+ * @module data-transformers/content-generators/doc-type-helpers
+ * @description Document type display names (multi-language) and title suffix templates.
+ * Provides DOC_TYPE_DISPLAY lookup table, localizeDocType utility, and
+ * TITLE_SUFFIX_TEMPLATES for inverted-pyramid lede construction.
+ *
+ * @author Hack23 AB
+ * @license Apache-2.0
+ */
+
+import type { Language } from '../../types/language.js';
+
+/** Localized singular/plural display names for a Riksdag document type code. */
+export type DocTypeLocalization = {
+  singular: Partial<Record<Language, string>>;
+  plural: Partial<Record<Language, string>>;
+};
+
+/** Multi-language display names for known Riksdag document type codes. */
+export const DOC_TYPE_DISPLAY: Readonly<Record<string, DocTypeLocalization>> = {
+  prop: {
+    singular: {
+      en: 'Proposition', sv: 'Proposition', da: 'Proposition', no: 'Proposisjon',
+      fi: 'Hallituksen esitys', de: 'Regierungsvorlage', fr: 'Projet de loi', es: 'Proposición',
+      nl: 'Wetsvoorstel', ar: 'مقترح قانون', he: 'הצעת חוק', ja: '法案', ko: '정부 제출 법안', zh: '政府法案',
+    },
+    plural: {
+      en: 'Propositions', sv: 'Propositioner', da: 'Propositioner', no: 'Proposisjoner',
+      fi: 'Hallituksen esitykset', de: 'Regierungsvorlagen', fr: 'Projets de loi', es: 'Proposiciones',
+      nl: 'Wetsvoorstellen', ar: 'مقترحات قوانين', he: 'הצעות חוק', ja: '法案', ko: '정부 제출 법안', zh: '政府法案',
+    },
+  },
+  bet: {
+    singular: {
+      en: 'Committee Report', sv: 'Betänkande', da: 'Udvalgsbetænkning', no: 'Komitéinnstilling',
+      fi: 'Valiokunnan mietintö', de: 'Ausschussbericht', fr: 'Rapport de commission', es: 'Informe de comisión',
+      nl: 'Commissieverslag', ar: 'تقرير لجنة', he: 'דוח ועדה', ja: '委員会報告書', ko: '위원회 보고서', zh: '委员会报告',
+    },
+    plural: {
+      en: 'Committee Reports', sv: 'Betänkanden', da: 'Udvalgsbetænkninger', no: 'Komitéinnstillinger',
+      fi: 'Valiokunnan mietinnöt', de: 'Ausschussberichte', fr: 'Rapports de commission', es: 'Informes de comisión',
+      nl: 'Commissieverslagen', ar: 'تقارير لجان', he: 'דוחות ועדה', ja: '委員会報告書', ko: '위원회 보고서', zh: '委员会报告',
+    },
+  },
+  mot: {
+    singular: {
+      en: 'Motion', sv: 'Motion', da: 'Forslag', no: 'Forslag',
+      fi: 'Aloite', de: 'Antrag', fr: 'Motion', es: 'Moción',
+      nl: 'Motie', ar: 'مقترح', he: 'הצעה', ja: '動議', ko: '동의안', zh: '动议',
+    },
+    plural: {
+      en: 'Motions', sv: 'Motioner', da: 'Forslag', no: 'Forslag',
+      fi: 'Aloitteet', de: 'Anträge', fr: 'Motions', es: 'Mociones',
+      nl: 'Moties', ar: 'مقترحات', he: 'הצעות', ja: '動議', ko: '동의안', zh: '动议',
+    },
+  },
+  skr: {
+    singular: {
+      en: 'Government Communication', sv: 'Skrivelse', da: 'Regeringsskrivelse', no: 'Regjeringsskriv',
+      fi: 'Valtioneuvoston kirjelmä', de: 'Regierungsschreiben', fr: 'Communication du gouvernement', es: 'Comunicación del gobierno',
+      nl: 'Regeringsmededeling', ar: 'مذكرة حكومية', he: 'מכתב ממשלתי', ja: '政府通信文書', ko: '정부 통신문', zh: '政府公文',
+    },
+    plural: {
+      en: 'Government Communications', sv: 'Skrivelser', da: 'Regeringsskrivelser', no: 'Regjeringsskriv',
+      fi: 'Valtioneuvoston kirjelmät', de: 'Regierungsschreiben', fr: 'Communications du gouvernement', es: 'Comunicaciones del gobierno',
+      nl: 'Regeringsmededelingen', ar: 'مذكرات حكومية', he: 'מכתבים ממשלתיים', ja: '政府通信文書', ko: '정부 통신문', zh: '政府公文',
+    },
+  },
+  sfs: {
+    singular: {
+      en: 'Law/Statute', sv: 'Lag/förordning', da: 'Lov/forordning', no: 'Lov/forordning',
+      fi: 'Laki/asetus', de: 'Gesetz/Verordnung', fr: 'Loi/Règlement', es: 'Ley/Reglamento',
+      nl: 'Wet/Verordening', ar: 'قانون / لائحة', he: 'חוק/תקנה', ja: '法律／条例', ko: '법률/법규', zh: '法律/法规',
+    },
+    plural: {
+      en: 'Laws/Statutes', sv: 'Lagar/förordningar', da: 'Love/forordninger', no: 'Lover/forordninger',
+      fi: 'Lait/asetukset', de: 'Gesetze/Verordnungen', fr: 'Lois/Règlements', es: 'Leyes/Reglamentos',
+      nl: 'Wetten/Verordeningen', ar: 'قوانين / لوائح', he: 'חוקים/תקנות', ja: '法律／条例', ko: '법률/법규', zh: '法律/法规',
+    },
+  },
+  fpm: {
+    singular: {
+      en: 'EU Position Paper', sv: 'Faktapromemoria', da: 'EU-faktanota', no: 'EU-faktanotat',
+      fi: 'EU-tietomuistio', de: 'EU-Positionspapier', fr: 'Note de position UE', es: 'Documento de posición de la UE',
+      nl: 'EU-positiepaper', ar: 'ورقة موقف للاتحاد الأوروبي', he: 'מסמך עמדה של האיחוד האירופי', ja: 'EUポジションペーパー', ko: 'EU 입장 문서', zh: '欧盟立场文件',
+    },
+    plural: {
+      en: 'EU Position Papers', sv: 'Faktapromemorior', da: 'EU-faktanotaer', no: 'EU-faktanotater',
+      fi: 'EU-tietomuistiot', de: 'EU-Positionspapiere', fr: 'Notes de position UE', es: 'Documentos de posición de la UE',
+      nl: 'EU-positiepapers', ar: 'أوراق موقف للاتحاد الأوروبي', he: 'מסמכי עמדה של האיחוד האירופי', ja: 'EUポジションペーパー', ko: 'EU 입장 문서', zh: '欧盟立场文件',
+    },
+  },
+  pressm: {
+    singular: {
+      en: 'Press Release', sv: 'Pressmeddelande', da: 'Pressemeddelelse', no: 'Pressemelding',
+      fi: 'Lehdistötiedote', de: 'Pressemitteilung', fr: 'Communiqué de presse', es: 'Comunicado de prensa',
+      nl: 'Persbericht', ar: 'بيان صحفي', he: 'הודעה לעיתונות', ja: 'プレスリリース', ko: '보도자료', zh: '新闻稿',
+    },
+    plural: {
+      en: 'Press Releases', sv: 'Pressmeddelanden', da: 'Pressemeddelelser', no: 'Pressemeldinger',
+      fi: 'Lehdistötiedotteet', de: 'Pressemitteilungen', fr: 'Communiqués de presse', es: 'Comunicados de prensa',
+      nl: 'Persberichten', ar: 'بيانات صحفية', he: 'הודעות לעיתונות', ja: 'プレスリリース', ko: '보도자료', zh: '新闻稿',
+    },
+  },
+  ext: {
+    singular: {
+      en: 'External Reference', sv: 'Extern referens', da: 'Ekstern reference', no: 'Ekstern referanse',
+      fi: 'Ulkoinen viite', de: 'Externe Referenz', fr: 'Référence externe', es: 'Referencia externa',
+      nl: 'Externe referentie', ar: 'مرجع خارجي', he: 'הפניה חיצונית', ja: '外部参照', ko: '외부 참조', zh: '外部参考',
+    },
+    plural: {
+      en: 'External References', sv: 'Externa referenser', da: 'Eksterne referencer', no: 'Eksterne referanser',
+      fi: 'Ulkoiset viitteet', de: 'Externe Referenzen', fr: 'Références externes', es: 'Referencias externas',
+      nl: 'Externe referenties', ar: 'مراجع خارجية', he: 'הפניות חיצוניות', ja: '外部参照', ko: '외부 참조', zh: '外部参考',
+    },
+  },
+  other: {
+    singular: {
+      en: 'Other Document', sv: 'Övrigt dokument', da: 'Andet dokument', no: 'Annet dokument',
+      fi: 'Muu asiakirja', de: 'Sonstiges Dokument', fr: 'Autre document', es: 'Otro documento',
+      nl: 'Overig document', ar: 'مستند آخر', he: 'מסמך אחר', ja: 'その他の文書', ko: '기타 문서', zh: '其他文件',
+    },
+    plural: {
+      en: 'Other Documents', sv: 'Övriga dokument', da: 'Andre dokumenter', no: 'Andre dokumenter',
+      fi: 'Muut asiakirjat', de: 'Sonstige Dokumente', fr: 'Autres documents', es: 'Otros documentos',
+      nl: 'Overige documenten', ar: 'مستندات أخرى', he: 'מסמכים אחרים', ja: 'その他の文書', ko: '기타 문서', zh: '其他文件',
+    },
+  },
+};
+
+/** Localise raw Riksdag document type codes for display (singular/plural-aware, multi-language). */
+export function localizeDocType(code: string, lang: Language | string, count?: number): string {
+  const entry = DOC_TYPE_DISPLAY[code];
+  if (!entry) return code;
+  const usePlural = count !== 1;
+  const primary = usePlural ? entry.plural : entry.singular;
+  const fallback = usePlural ? entry.singular : entry.plural;
+  return primary[lang as Language] ?? primary.en ?? fallback[lang as Language] ?? fallback.en ?? code;
+}
+
+/** Per-language title-suffix templates for inverted-pyramid lede construction. */
+export const TITLE_SUFFIX_TEMPLATES: Readonly<Record<string, (t: string) => string>> = {
+  sv: t => ` — inklusive "${t}"`,
+  da: t => ` — herunder "${t}"`,
+  no: t => ` — inkludert "${t}"`,
+  fi: t => ` — mukaan lukien "${t}"`,
+  de: t => ` — darunter "${t}"`,
+  fr: t => ` — notamment "${t}"`,
+  es: t => ` — incluyendo "${t}"`,
+  nl: t => ` — inclusief "${t}"`,
+  ar: t => ` — بما فيها "${t}"`,
+  he: t => ` — כולל "${t}"`,
+  ja: t => `、「${t}」を含む`,
+  ko: t => `, "${t}" 포함`,
+  zh: t => `，包括"${t}"`,
+};
diff --git a/scripts/data-transformers/content-generators/event-helpers.ts b/scripts/data-transformers/content-generators/event-helpers.ts
@@ -0,0 +1,62 @@
+/**
+ * @module data-transformers/content-generators/event-helpers
+ * @description Calendar event and document matching helpers.
+ * Provides keyword extraction, related document lookup, and
+ * minister extraction from interpellation summaries.
+ *
+ * @author Hack23 AB
+ * @license Apache-2.0
+ */
+
+import type { RawDocument, RawCalendarEvent } from '../types.js';
+
+/** Extract meaningful keywords from text for cross-reference matching
+ *  (min 2 chars, captures EU, KU, etc.; splits on whitespace, hyphens, and commas) */
+function extractKeywords(text: string): string[] {
+  return text.toLowerCase().split(/[\s,–-]+/u).filter(w => w.length >= 2);
+}
+
+/** Find documents related to a calendar event by organ match or keyword overlap (max 3) */
+export function findRelatedDocuments(event: RawCalendarEvent, documents: RawDocument[]): RawDocument[] {
+  const eventOrgan = event.organ ?? '';
+  const keywords = extractKeywords(event.rubrik ?? event.titel ?? event.title ?? '');
+  return documents.filter(doc => {
+    const docOrgan = doc.organ ?? doc.committee ?? '';
+    if (eventOrgan && docOrgan && eventOrgan.toLowerCase() === docOrgan.toLowerCase()) return true;
+    const docText = (doc.titel ?? doc.title ?? '').toLowerCase();
+    return keywords.some(kw => docText.includes(kw));
+  }).slice(0, 3);
+}
+
+/** Find written questions related to a calendar event by keyword overlap (max 3) */
+export function findRelatedQuestions(event: RawCalendarEvent, questions: RawDocument[]): RawDocument[] {
+  const keywords = extractKeywords(event.rubrik ?? event.titel ?? event.title ?? '');
+  return questions.filter(q => {
+    const qText = (q.titel ?? q.title ?? '').toLowerCase();
+    return keywords.some(kw => qText.includes(kw));
+  }).slice(0, 3);
+}
+
+/** Extract targeted minister name from interpellation summary "till MINISTER" header line.
+ *  Strips trailing topic clauses ("om X", "angående Y", etc.) and punctuation. */
+export function extractMinister(summary: string): string {
+  // Use non-newline whitespace ([^\S\n]+) so we don't cross into the next line
+  const m = summary.match(/\btill[^\S\n]+([^\n]+)/i);
+  if (!m) return '';
+  const raw = m[1].trim();
+  if (!raw) return '';
+
+  // Remove common trailing topic clauses and punctuation
+  const lowerRaw = raw.toLowerCase();
+  const stopPhrases = [' om ', ' angående ', ' rörande ', ' beträffande '];
+  let end = raw.length;
+  for (const phrase of stopPhrases) {
+    const idx = lowerRaw.indexOf(phrase);
+    if (idx !== -1 && idx < end) end = idx;
+  }
+  // Cut at terminating punctuation if it comes earlier
+  const punctIdx = raw.search(/[?:;.,]/);
+  if (punctIdx !== -1 && punctIdx < end) end = punctIdx;
+
+  return raw.slice(0, end).trim();
+}