feat(provenant): add shadow mode calibration and daily ingest workflow

StackMemory Bot (CLI) · StackMemory Bot (CLI) · commit c69842a05ccf · 2026-03-19T17:25:25.000-04:00
- Add `provenant calibrate` command: re-scores existing nodes against
  current signal model, reports FP rate vs 10% target
- Add `--sweep` flag for threshold combination matrix
- Add GitHub Actions workflow for daily batch ingest (linear + slack)
  with artifact-based graph.db persistence and calibration check
diff --git a/.github/workflows/provenant-ingest.yml b/.github/workflows/provenant-ingest.yml
@@ -0,0 +1,87 @@
+name: Provenant Daily Ingest
+
+on:
+  schedule:
+    # Run daily at 06:00 UTC (10pm PST)
+    - cron: '0 6 * * *'
+  workflow_dispatch:
+    inputs:
+      sources:
+        description: 'Comma-separated adapters to ingest (e.g. linear,slack)'
+        required: false
+        default: 'linear,slack'
+        type: string
+      dry_run:
+        description: 'Dry run (score without writing)'
+        required: false
+        default: false
+        type: boolean
+
+concurrency:
+  group: provenant-ingest
+  cancel-in-progress: false
+
+jobs:
+  ingest:
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+          cache: 'npm'
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Build provenant
+        working-directory: packages/provenant
+        run: npx tsc
+
+      - name: Download graph database
+        uses: actions/cache@v4
+        with:
+          path: .provenant/graph.db
+          key: provenant-db-${{ github.run_number }}
+          restore-keys: provenant-db-
+
+      - name: Run ingest
+        env:
+          LINEAR_API_KEY: ${{ secrets.LINEAR_API_KEY }}
+          SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
+        run: |
+          SOURCES="${{ inputs.sources || 'linear,slack' }}"
+          DRY_RUN="${{ inputs.dry_run || 'false' }}"
+          DRY_FLAG=""
+          if [ "$DRY_RUN" = "true" ]; then
+            DRY_FLAG="--dry-run"
+          fi
+
+          IFS=',' read -ra ADAPTER_LIST <<< "$SOURCES"
+          for adapter in "${ADAPTER_LIST[@]}"; do
+            adapter=$(echo "$adapter" | xargs)  # trim whitespace
+            echo "═══ Ingesting: $adapter ═══"
+            npx tsx packages/provenant/src/cli/index.ts ingest -s "$adapter" $DRY_FLAG || {
+              echo "⚠ Adapter $adapter failed (may not be configured), continuing..."
+            }
+            echo ""
+          done
+
+      - name: Run calibration check
+        run: |
+          npx tsx packages/provenant/src/cli/index.ts calibrate --since "$(date -d '30 days ago' +%Y-%m-%d 2>/dev/null || date -v-30d +%Y-%m-%d)" || true
+
+      - name: Show status
+        run: npx tsx packages/provenant/src/cli/index.ts status
+
+      - name: Upload graph database
+        uses: actions/upload-artifact@v4
+        with:
+          name: provenant-graph-${{ github.run_number }}
+          path: .provenant/graph.db
+          retention-days: 90
diff --git a/packages/provenant/src/cli/commands/calibrate.ts b/packages/provenant/src/cli/commands/calibrate.ts
@@ -0,0 +1,184 @@
+import { mkdirSync } from 'node:fs';
+import { dirname } from 'node:path';
+import { Database } from '../../schema/database.js';
+import { scoreRecord } from '../../scoring/confidence.js';
+import type { RawRecord } from '../../adapters/adapter.js';
+
+interface CalibrateOpts {
+  db: string;
+  since?: string;
+  autoAccept?: string;
+  review?: string;
+  sweep?: boolean;
+}
+
+interface BucketStats {
+  total: number;
+  byAction: Record<string, number>;
+  avgConfidence: number;
+  examples: Array<{ content: string; score: number; action: string }>;
+}
+
+export function calibrate(opts: CalibrateOpts): void {
+  mkdirSync(dirname(opts.db), { recursive: true });
+  const db = new Database(opts.db);
+
+  try {
+    const status = db.getStatus();
+    if (status.nodeCount === 0) {
+      console.error('No nodes in graph. Ingest data first before calibrating.');
+      process.exit(1);
+    }
+
+    // Load all nodes as records for re-scoring
+    const sinceMs = opts.since ? new Date(opts.since).getTime() : 0;
+    const nodes = db.searchNodesByKeywords(
+      [],
+      10000,
+      undefined,
+      sinceMs || undefined
+    );
+
+    console.log(`Calibrating against ${nodes.length} nodes`);
+    console.log('═'.repeat(50));
+
+    if (opts.sweep) {
+      runThresholdSweep(nodes);
+    } else {
+      const autoAccept = opts.autoAccept ? parseFloat(opts.autoAccept) : 0.7;
+      const reviewThreshold = opts.review ? parseFloat(opts.review) : 0.4;
+      runCalibration(nodes, autoAccept, reviewThreshold);
+    }
+  } finally {
+    db.close();
+  }
+}
+
+function nodeToRecord(node: {
+  content: string;
+  actor: string | null;
+}): RawRecord {
+  return {
+    external_id: 'calibration',
+    content: node.content,
+    raw_payload: JSON.stringify({ content: node.content }),
+    actor: node.actor ?? undefined,
+  };
+}
+
+function runCalibration(
+  nodes: Array<{ content: string; actor: string | null; confidence: number }>,
+  autoAccept: number,
+  reviewThreshold: number
+): void {
+  const buckets: Record<string, BucketStats> = {
+    auto_accept: { total: 0, byAction: {}, avgConfidence: 0, examples: [] },
+    review: { total: 0, byAction: {}, avgConfidence: 0, examples: [] },
+    discard: { total: 0, byAction: {}, avgConfidence: 0, examples: [] },
+  };
+
+  let confidenceSum = 0;
+  let mismatchCount = 0;
+
+  for (const node of nodes) {
+    const record = nodeToRecord(node);
+    const result = scoreRecord(record, undefined, {
+      autoAccept,
+      review: reviewThreshold,
+    });
+
+    const bucket = buckets[result.action]!;
+    bucket.total++;
+    confidenceSum += result.score;
+
+    // Track original confidence vs re-scored action
+    // Nodes in the graph were auto-accepted, so any that now score as
+    // 'review' or 'discard' are potential false positives
+    if (result.action !== 'auto_accept') {
+      mismatchCount++;
+    }
+
+    if (bucket.examples.length < 3) {
+      bucket.examples.push({
+        content: node.content.slice(0, 80),
+        score: result.score,
+        action: result.action,
+      });
+    }
+  }
+
+  const fpRate = nodes.length > 0 ? (mismatchCount / nodes.length) * 100 : 0;
+
+  console.log(
+    `\nThresholds: autoAccept=${autoAccept}, review=${reviewThreshold}`
+  );
+  console.log('─'.repeat(50));
+
+  for (const [action, stats] of Object.entries(buckets)) {
+    if (stats.total === 0) continue;
+    const pct = ((stats.total / nodes.length) * 100).toFixed(1);
+    console.log(`\n${action.toUpperCase()} — ${stats.total} nodes (${pct}%)`);
+    for (const ex of stats.examples) {
+      console.log(`  ${ex.score.toFixed(2)} │ ${ex.content}`);
+    }
+  }
+
+  console.log('\n' + '═'.repeat(50));
+  console.log(
+    `FP rate (accepted nodes that would now be filtered): ${fpRate.toFixed(1)}%`
+  );
+  if (fpRate > 10) {
+    console.log(
+      `⚠ FP rate exceeds 10% target — consider lowering autoAccept threshold`
+    );
+  } else {
+    console.log(`✓ FP rate within 10% target`);
+  }
+}
+
+function runThresholdSweep(
+  nodes: Array<{ content: string; actor: string | null; confidence: number }>
+): void {
+  console.log('\nThreshold sweep (autoAccept / review → FP%)');
+  console.log('─'.repeat(50));
+  console.log('autoAccept │ review │ accept% │ review% │ discard% │ FP%');
+  console.log('───────────┼────────┼─────────┼─────────┼──────────┼─────');
+
+  const thresholds = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8];
+  const reviewThresholds = [0.2, 0.3, 0.4];
+
+  for (const autoAccept of thresholds) {
+    for (const review of reviewThresholds) {
+      if (review >= autoAccept) continue;
+
+      let accepted = 0;
+      let reviewed = 0;
+      let discarded = 0;
+
+      for (const node of nodes) {
+        const record = nodeToRecord(node);
+        const result = scoreRecord(record, undefined, { autoAccept, review });
+        if (result.action === 'auto_accept') accepted++;
+        else if (result.action === 'review') reviewed++;
+        else discarded++;
+      }
+
+      const total = nodes.length;
+      const fpRate =
+        total > 0 ? (((reviewed + discarded) / total) * 100).toFixed(1) : '0.0';
+      const acceptPct =
+        total > 0 ? ((accepted / total) * 100).toFixed(1) : '0.0';
+      const reviewPct =
+        total > 0 ? ((reviewed / total) * 100).toFixed(1) : '0.0';
+      const discardPct =
+        total > 0 ? ((discarded / total) * 100).toFixed(1) : '0.0';
+
+      const marker = parseFloat(fpRate) <= 10 ? ' ✓' : '';
+      console.log(
+        `    ${autoAccept.toFixed(1)}    │  ${review.toFixed(1)}   │  ${acceptPct.padStart(5)}  │  ${reviewPct.padStart(5)}  │   ${discardPct.padStart(5)}  │ ${fpRate.padStart(5)}${marker}`
+      );
+    }
+  }
+
+  console.log('\n✓ = FP rate ≤ 10% target');
+}
diff --git a/packages/provenant/src/cli/index.ts b/packages/provenant/src/cli/index.ts
@@ -16,6 +16,7 @@ import {
   logOverrideResolve,
 } from './commands/log-override.js';
 import { serve } from './commands/serve.js';
+import { calibrate } from './commands/calibrate.js';
 
 const program = new Command();
 
@@ -132,4 +133,21 @@ program
   .option('--db <path>', 'Database path', '.provenant/graph.db')
   .action(serve);
 
+// Shadow mode calibration
+program
+  .command('calibrate')
+  .description(
+    'Re-score existing nodes to calibrate confidence thresholds (shadow mode)'
+  )
+  .option('--db <path>', 'Database path', '.provenant/graph.db')
+  .option('--since <date>', 'Only calibrate nodes after this date')
+  .option('--auto-accept <threshold>', 'Auto-accept threshold to test', '0.7')
+  .option('--review <threshold>', 'Review threshold to test', '0.4')
+  .option(
+    '--sweep',
+    'Sweep all threshold combinations and show FP rates',
+    false
+  )
+  .action(calibrate);
+
 program.parse();