Platform-OS · fklosowski · May 6, 2026 · Apr 16, 2026 · Apr 16, 2026 · Apr 16, 2026
diff --git a/.gitignore b/.gitignore
@@ -3,9 +3,25 @@
 /modules
 /prompts
 /.claude
+.serena
 IDEAS.md
 node_modules/
 *.log
 *.jsonl
 .mcp.json
 CLAUDE.md
+/.pos-supervisor
+
+# Test artifacts — integration tests write to fixture .pos-supervisor dirs
+**/.pos-supervisor/sessions/
+**/.pos-supervisor/blobs/
+**/.pos-supervisor/analytics.db
+**/.pos-supervisor/analytics.db-wal
+**/.pos-supervisor/analytics.db-shm
+
+# Stray test/scratch files
+/t
+/2026-*.txt
+.serena/project.yml
+.gitignore~
+.serena/project.yml
diff --git a/CHANGELOG.md b/CHANGELOG.md
diff --git a/SYSTEM_ARCHITECTURE.md b/SYSTEM_ARCHITECTURE.md
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@platformos/pos-supervisor",
-  "version": "0.5.2",
+  "version": "0.7.3",
   "description": "platformOS domain-specific MCP server for LLM agents",
   "type": "module",
   "bin": {
@@ -13,7 +13,7 @@
   },
   "dependencies": {
     "@modelcontextprotocol/sdk": "^1.29.0",
-    "@platformos/liquid-html-parser": "^0.0.11",
+    "@platformos/liquid-html-parser": "^0.0.17",
     "js-yaml": "^4.1.1",
     "zod": "^4.3.6"
   },

diff --git a/scripts/cleanup-live-console-rows.js b/scripts/cleanup-live-console-rows.js
@@ -0,0 +1,94 @@
+#!/usr/bin/env bun
+/**
+ * One-off cleanup — remove analytics rows that originated from the dashboard
+ * Live Diagnostic Console before A3 introduced the `untracked` gate.
+ *
+ * Symptom those rows caused: `__pos_live_console__` files appearing in the
+ * `OrphanedPartial` and `pos-supervisor:MissingDocBlock` file distributions
+ * of the supervisor report. Every live-console validation wrote a
+ * `validator_emit` that the store replayed into diagnostics/outcomes.
+ *
+ * Usage:
+ *   bun scripts/cleanup-live-console-rows.js [/path/to/project]
+ *
+ * Project path defaults to POS_SUPERVISOR_PROJECT_DIR or the current working
+ * directory. Runs against `.pos-supervisor/analytics.db` under that project.
+ *
+ * Safe to re-run — purely a DELETE of rows whose file column matches the
+ * live-console sentinel. No schema changes.
+ */
+
+import { join, resolve } from 'node:path';
+import { existsSync } from 'node:fs';
+import { openAnalyticsStore } from '../src/core/analytics-store.js';
+
+const LIVE_CONSOLE_NEEDLE = '__pos_live_console__';
+
+function parseArgs() {
+  const projectArg = process.argv[2];
+  const projectDir = resolve(projectArg ?? process.env.POS_SUPERVISOR_PROJECT_DIR ?? process.cwd());
+  return { projectDir };
+}
+
+function main() {
+  const { projectDir } = parseArgs();
+  const dbPath = join(projectDir, '.pos-supervisor', 'analytics.db');
+
+  if (!existsSync(dbPath)) {
+    console.error(`No analytics DB at ${dbPath}. Nothing to clean.`);
+    process.exit(0);
+  }
+
+  const store = openAnalyticsStore(dbPath);
+  const db = store.db;
+
+  const beforeCounts = {
+    events: db.prepare(`SELECT COUNT(*) AS n FROM events WHERE payload LIKE ?`).get(`%${LIVE_CONSOLE_NEEDLE}%`).n,
+    diagnostics: db.prepare(`SELECT COUNT(*) AS n FROM diagnostics WHERE file LIKE ?`).get(`%${LIVE_CONSOLE_NEEDLE}%`).n,
+    outcomes: db.prepare(`SELECT COUNT(*) AS n FROM outcomes WHERE file LIKE ?`).get(`%${LIVE_CONSOLE_NEEDLE}%`).n,
+    windows: db.prepare(`SELECT COUNT(*) AS n FROM windows WHERE file LIKE ?`).get(`%${LIVE_CONSOLE_NEEDLE}%`).n,
+    proposed_fixes: db.prepare(
+      `SELECT COUNT(*) AS n FROM proposed_fixes pf
+       WHERE EXISTS (SELECT 1 FROM diagnostics d WHERE d.fp = pf.fp AND d.file LIKE ?)`,
+    ).get(`%${LIVE_CONSOLE_NEEDLE}%`).n,
+  };
+
+  db.exec('BEGIN');
+  try {
+    db.prepare(
+      `DELETE FROM proposed_fixes
+       WHERE fp IN (SELECT fp FROM diagnostics WHERE file LIKE ?)`,
+    ).run(`%${LIVE_CONSOLE_NEEDLE}%`);
+
+    db.prepare(
+      `DELETE FROM outcomes WHERE file LIKE ?`,
+    ).run(`%${LIVE_CONSOLE_NEEDLE}%`);
+
+    db.prepare(
+      `DELETE FROM windows WHERE file LIKE ?`,
+    ).run(`%${LIVE_CONSOLE_NEEDLE}%`);
+
+    db.prepare(
+      `DELETE FROM diagnostics WHERE file LIKE ?`,
+    ).run(`%${LIVE_CONSOLE_NEEDLE}%`);
+
+    db.prepare(
+      `DELETE FROM events WHERE payload LIKE ?`,
+    ).run(`%${LIVE_CONSOLE_NEEDLE}%`);
+
+    db.exec('COMMIT');
+  } catch (e) {
+    db.exec('ROLLBACK');
+    console.error('Cleanup failed; rolled back.');
+    throw e;
+  }
+
+  console.log(`Removed live-console rows from ${dbPath}:`);
+  for (const [table, count] of Object.entries(beforeCounts)) {
+    console.log(`  ${table.padEnd(16)} ${count}`);
+  }
+
+  store.close();
+}
+
+main();
diff --git a/scripts/rebuild-analytics.js b/scripts/rebuild-analytics.js
@@ -0,0 +1,57 @@
+#!/usr/bin/env node
+/**
+ * Rebuild the analytics DB from session event logs.
+ *
+ * Usage:
+ *   node scripts/rebuild-analytics.js /path/to/project
+ *
+ * The project must have a .pos-supervisor/ directory with sessions/ and analytics.db.
+ * The server must NOT be running when this script executes (WAL mode allows reads
+ * but schema migrations can conflict with a live server).
+ */
+
+import { join } from 'node:path';
+import { existsSync } from 'node:fs';
+import { openAnalyticsStore } from '../src/core/analytics-store.js';
+import { openBlobStore } from '../src/core/blob-store.js';
+
+const projectDir = process.argv[2];
+if (!projectDir) {
+  console.error('Usage: node scripts/rebuild-analytics.js /path/to/project');
+  process.exit(1);
+}
+
+const supervisorDir = join(projectDir, '.pos-supervisor');
+const dbPath        = join(supervisorDir, 'analytics.db');
+const sessionsDir   = join(supervisorDir, 'sessions');
+const blobsDir      = join(supervisorDir, 'blobs');
+
+if (!existsSync(supervisorDir)) {
+  console.error(`No .pos-supervisor directory found at: ${supervisorDir}`);
+  process.exit(1);
+}
+if (!existsSync(sessionsDir)) {
+  console.error(`No sessions directory found at: ${sessionsDir}`);
+  process.exit(1);
+}
+
+console.log(`DB:       ${dbPath}`);
+console.log(`Sessions: ${sessionsDir}`);
+console.log(`Blobs:    ${blobsDir}`);
+console.log('Rebuilding...');
+
+// Blob store is required for fix-adoption classification (reads start/end file
+// snapshots and proposed-fix texts). Without it, every outcome row lands with
+// fix_applied = null. Fine if the blobs dir doesn't exist yet — classification
+// just degrades to null for that session.
+let blobStore = null;
+try {
+  blobStore = openBlobStore(blobsDir);
+} catch (e) {
+  console.warn(`Blob store unavailable (${e.message}); fix adoption will not be classified.`);
+}
+
+const store = openAnalyticsStore(dbPath, { blobStore });
+const { sessions, events } = store.rebuild(sessionsDir);
+
+console.log(`Done. Replayed ${events} events across ${sessions} sessions.`);
diff --git a/src/core/analytics-labels.js b/src/core/analytics-labels.js
@@ -0,0 +1,126 @@
+/**
+ * Analytics labels — single source of truth for the GOOD / OK / LOW / HARMFUL,
+ * AT RISK / UNMATCHED, and INSUFFICIENT_DATA presentation-layer labels.
+ *
+ * Pure functions, intentionally side-effect-free. The HTTP layer attaches
+ * `.label` to each scorecard / rule-performance row before serialising; the
+ * dashboard browser code and Markdown report consume that field directly so
+ * label logic isn't duplicated (or drifted) between server and client.
+ *
+ * INSUFFICIENT_DATA gate (`LABEL_MIN_OUTCOMES`) is the load-bearing change.
+ * Labels computed from a sample of one — `AT RISK -100%` on a single
+ * regression — are statistically meaningless and previously caused operators
+ * to chase ghosts of already-fixed rules. Below the threshold we return a
+ * neutral label that says "we don't know yet" instead of a confident wrong
+ * answer.
+ *
+ * The threshold is conservative on purpose: 5 outcomes lets a Beta(2,2)
+ * posterior collapse from "wide ribbon" to a meaningful interval. Engine-side
+ * decisions (auto-disable in case-base.ruleScores) use a stricter gate of 10
+ * because promotion/demotion is more consequential than display.
+ */
+
+export const LABEL_MIN_OUTCOMES = 5;
+
+/**
+ * Normalise a Beta-posterior object or bare number to a scalar in [0, 1].
+ * Mirrors the dashboard `rateVal()` helper exactly so the server emits the
+ * same labels the browser would have computed inline.
+ */
+function asRate(r) {
+  if (r && typeof r === 'object' && typeof r.mean === 'number') return r.mean;
+  if (typeof r === 'number') return r;
+  return 0;
+}
+
+/**
+ * Per-check scorecard label.
+ *
+ * Accepts a row from `checkScorecards()` carrying `.resolution_rate`,
+ * `.mislead_rate`, and either `.sample_size` (preferred) or `.total_outcomes`.
+ * Each rate may be a Beta posterior `{ mean, lower95, upper95 }` or a number.
+ *
+ * Returns one of:
+ *   - INSUFFICIENT_DATA — fewer than LABEL_MIN_OUTCOMES outcomes
+ *   - GOOD             — effectiveness > 0.5
+ *   - OK               — 0.15 < effectiveness <= 0.5
+ *   - LOW              — 0    <= effectiveness <= 0.15
+ *   - HARMFUL          — effectiveness < 0
+ */
+export function checkLabel(card) {
+  if (!card || typeof card !== 'object') return 'INSUFFICIENT_DATA';
+  const sampleSize = Number(card.sample_size ?? card.total_outcomes ?? 0);
+  if (!Number.isFinite(sampleSize) || sampleSize < LABEL_MIN_OUTCOMES) {
+    return 'INSUFFICIENT_DATA';
+  }
+  const effectiveness = asRate(card.resolution_rate) - asRate(card.mislead_rate);
+  if (effectiveness > 0.5)   return 'GOOD';
+  if (effectiveness > 0.15)  return 'OK';
+  if (effectiveness >= 0)    return 'LOW';
+  return 'HARMFUL';
+}
+
+/**
+ * Per-rule_id performance label.
+ *
+ * Accepts a row from `rulePerformance()` / `ruleScores()` carrying
+ * `.unmatched`, `.effectiveness`, and `.total_outcomes`.
+ *
+ * Precedence:
+ *   1. UNMATCHED       — `.unmatched === true` always wins. Coverage gap is
+ *                        actionable regardless of sample size; one emit on a
+ *                        rule-less check still tells the operator a rule needs
+ *                        writing.
+ *   2. INSUFFICIENT_DATA — `total_outcomes < LABEL_MIN_OUTCOMES`. We don't
+ *                          know enough to call the rule risky.
+ *   3. AT RISK         — effectiveness < 0.15. Real signal, real concern.
+ *   4. OK              — everything else.
+ *
+ * Note: `effectiveness` here is `resolution_rate - regression_rate`, not the
+ * 0..1 percentage the case-base disable-gate uses. A negative number is
+ * possible (rule causes more regressions than it resolves).
+ */
+export function ruleLabel(rule) {
+  if (!rule || typeof rule !== 'object') return 'INSUFFICIENT_DATA';
+  if (rule.unmatched) return 'UNMATCHED';
+  const totalOutcomes = Number(rule.total_outcomes ?? 0);
+  if (!Number.isFinite(totalOutcomes) || totalOutcomes < LABEL_MIN_OUTCOMES) {
+    return 'INSUFFICIENT_DATA';
+  }
+  const effectiveness = Number(rule.effectiveness ?? 0);
+  if (!Number.isFinite(effectiveness)) return 'INSUFFICIENT_DATA';
+  if (effectiveness < 0.15) return 'AT RISK';
+  return 'OK';
+}
+
+/**
+ * Filter scorecards down to the rows that warrant a HARMFUL headline in the
+ * Markdown report's executive summary. Honours the same sample-size gate so
+ * we don't trumpet "HARMFUL" off a single regression — which is exactly the
+ * stale-data trap that motivated this whole module.
+ */
+export function harmfulSummary(scorecards) {
+  if (!Array.isArray(scorecards)) return [];
+  return scorecards.filter(c => checkLabel(c) === 'HARMFUL');
+}
+
+/**
+ * Attach a `.label` field to every row in a scorecard array. Returns a NEW
+ * array; rows are shallow-copied so callers can't accidentally mutate the
+ * underlying analytics-queries result. HTTP handlers wrap the array with this
+ * before sending so the dashboard receives labelled rows it can render
+ * without re-computing.
+ */
+export function withCheckLabels(scorecards) {
+  if (!Array.isArray(scorecards)) return [];
+  return scorecards.map(card => ({ ...card, label: checkLabel(card) }));
+}
+
+/**
+ * Attach a `.label` field to every row in a rule-performance / rule-score
+ * array. See `withCheckLabels`.
+ */
+export function withRuleLabels(rules) {
+  if (!Array.isArray(rules)) return [];
+  return rules.map(rule => ({ ...rule, label: ruleLabel(rule) }));
+}