Skip to content

Commit c69842a

Browse files
author
StackMemory Bot (CLI)
committed
feat(provenant): add shadow mode calibration and daily ingest workflow
- Add `provenant calibrate` command: re-scores existing nodes against current signal model, reports FP rate vs 10% target - Add `--sweep` flag for threshold combination matrix - Add GitHub Actions workflow for daily batch ingest (linear + slack) with artifact-based graph.db persistence and calibration check
1 parent 1f94366 commit c69842a

3 files changed

Lines changed: 289 additions & 0 deletions

File tree

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
name: Provenant Daily Ingest
2+
3+
on:
4+
schedule:
5+
# Run daily at 06:00 UTC (10pm PST)
6+
- cron: '0 6 * * *'
7+
workflow_dispatch:
8+
inputs:
9+
sources:
10+
description: 'Comma-separated adapters to ingest (e.g. linear,slack)'
11+
required: false
12+
default: 'linear,slack'
13+
type: string
14+
dry_run:
15+
description: 'Dry run (score without writing)'
16+
required: false
17+
default: false
18+
type: boolean
19+
20+
concurrency:
21+
group: provenant-ingest
22+
cancel-in-progress: false
23+
24+
jobs:
25+
ingest:
26+
runs-on: ubuntu-latest
27+
timeout-minutes: 15
28+
steps:
29+
- uses: actions/checkout@v4
30+
31+
- name: Setup Node.js
32+
uses: actions/setup-node@v4
33+
with:
34+
node-version: '20'
35+
cache: 'npm'
36+
37+
- name: Install dependencies
38+
run: npm ci
39+
40+
- name: Build provenant
41+
working-directory: packages/provenant
42+
run: npx tsc
43+
44+
- name: Download graph database
45+
uses: actions/cache@v4
46+
with:
47+
path: .provenant/graph.db
48+
key: provenant-db-${{ github.run_number }}
49+
restore-keys: provenant-db-
50+
51+
- name: Run ingest
52+
env:
53+
LINEAR_API_KEY: ${{ secrets.LINEAR_API_KEY }}
54+
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
55+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
56+
VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
57+
run: |
58+
SOURCES="${{ inputs.sources || 'linear,slack' }}"
59+
DRY_RUN="${{ inputs.dry_run || 'false' }}"
60+
DRY_FLAG=""
61+
if [ "$DRY_RUN" = "true" ]; then
62+
DRY_FLAG="--dry-run"
63+
fi
64+
65+
IFS=',' read -ra ADAPTER_LIST <<< "$SOURCES"
66+
for adapter in "${ADAPTER_LIST[@]}"; do
67+
adapter=$(echo "$adapter" | xargs) # trim whitespace
68+
echo "═══ Ingesting: $adapter ═══"
69+
npx tsx packages/provenant/src/cli/index.ts ingest -s "$adapter" $DRY_FLAG || {
70+
echo "⚠ Adapter $adapter failed (may not be configured), continuing..."
71+
}
72+
echo ""
73+
done
74+
75+
- name: Run calibration check
76+
run: |
77+
npx tsx packages/provenant/src/cli/index.ts calibrate --since "$(date -d '30 days ago' +%Y-%m-%d 2>/dev/null || date -v-30d +%Y-%m-%d)" || true
78+
79+
- name: Show status
80+
run: npx tsx packages/provenant/src/cli/index.ts status
81+
82+
- name: Upload graph database
83+
uses: actions/upload-artifact@v4
84+
with:
85+
name: provenant-graph-${{ github.run_number }}
86+
path: .provenant/graph.db
87+
retention-days: 90
Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
import { mkdirSync } from 'node:fs';
2+
import { dirname } from 'node:path';
3+
import { Database } from '../../schema/database.js';
4+
import { scoreRecord } from '../../scoring/confidence.js';
5+
import type { RawRecord } from '../../adapters/adapter.js';
6+
7+
interface CalibrateOpts {
8+
db: string;
9+
since?: string;
10+
autoAccept?: string;
11+
review?: string;
12+
sweep?: boolean;
13+
}
14+
15+
interface BucketStats {
16+
total: number;
17+
byAction: Record<string, number>;
18+
avgConfidence: number;
19+
examples: Array<{ content: string; score: number; action: string }>;
20+
}
21+
22+
export function calibrate(opts: CalibrateOpts): void {
23+
mkdirSync(dirname(opts.db), { recursive: true });
24+
const db = new Database(opts.db);
25+
26+
try {
27+
const status = db.getStatus();
28+
if (status.nodeCount === 0) {
29+
console.error('No nodes in graph. Ingest data first before calibrating.');
30+
process.exit(1);
31+
}
32+
33+
// Load all nodes as records for re-scoring
34+
const sinceMs = opts.since ? new Date(opts.since).getTime() : 0;
35+
const nodes = db.searchNodesByKeywords(
36+
[],
37+
10000,
38+
undefined,
39+
sinceMs || undefined
40+
);
41+
42+
console.log(`Calibrating against ${nodes.length} nodes`);
43+
console.log('═'.repeat(50));
44+
45+
if (opts.sweep) {
46+
runThresholdSweep(nodes);
47+
} else {
48+
const autoAccept = opts.autoAccept ? parseFloat(opts.autoAccept) : 0.7;
49+
const reviewThreshold = opts.review ? parseFloat(opts.review) : 0.4;
50+
runCalibration(nodes, autoAccept, reviewThreshold);
51+
}
52+
} finally {
53+
db.close();
54+
}
55+
}
56+
57+
function nodeToRecord(node: {
58+
content: string;
59+
actor: string | null;
60+
}): RawRecord {
61+
return {
62+
external_id: 'calibration',
63+
content: node.content,
64+
raw_payload: JSON.stringify({ content: node.content }),
65+
actor: node.actor ?? undefined,
66+
};
67+
}
68+
69+
function runCalibration(
70+
nodes: Array<{ content: string; actor: string | null; confidence: number }>,
71+
autoAccept: number,
72+
reviewThreshold: number
73+
): void {
74+
const buckets: Record<string, BucketStats> = {
75+
auto_accept: { total: 0, byAction: {}, avgConfidence: 0, examples: [] },
76+
review: { total: 0, byAction: {}, avgConfidence: 0, examples: [] },
77+
discard: { total: 0, byAction: {}, avgConfidence: 0, examples: [] },
78+
};
79+
80+
let confidenceSum = 0;
81+
let mismatchCount = 0;
82+
83+
for (const node of nodes) {
84+
const record = nodeToRecord(node);
85+
const result = scoreRecord(record, undefined, {
86+
autoAccept,
87+
review: reviewThreshold,
88+
});
89+
90+
const bucket = buckets[result.action]!;
91+
bucket.total++;
92+
confidenceSum += result.score;
93+
94+
// Track original confidence vs re-scored action
95+
// Nodes in the graph were auto-accepted, so any that now score as
96+
// 'review' or 'discard' are potential false positives
97+
if (result.action !== 'auto_accept') {
98+
mismatchCount++;
99+
}
100+
101+
if (bucket.examples.length < 3) {
102+
bucket.examples.push({
103+
content: node.content.slice(0, 80),
104+
score: result.score,
105+
action: result.action,
106+
});
107+
}
108+
}
109+
110+
const fpRate = nodes.length > 0 ? (mismatchCount / nodes.length) * 100 : 0;
111+
112+
console.log(
113+
`\nThresholds: autoAccept=${autoAccept}, review=${reviewThreshold}`
114+
);
115+
console.log('─'.repeat(50));
116+
117+
for (const [action, stats] of Object.entries(buckets)) {
118+
if (stats.total === 0) continue;
119+
const pct = ((stats.total / nodes.length) * 100).toFixed(1);
120+
console.log(`\n${action.toUpperCase()}${stats.total} nodes (${pct}%)`);
121+
for (const ex of stats.examples) {
122+
console.log(` ${ex.score.toFixed(2)}${ex.content}`);
123+
}
124+
}
125+
126+
console.log('\n' + '═'.repeat(50));
127+
console.log(
128+
`FP rate (accepted nodes that would now be filtered): ${fpRate.toFixed(1)}%`
129+
);
130+
if (fpRate > 10) {
131+
console.log(
132+
`⚠ FP rate exceeds 10% target — consider lowering autoAccept threshold`
133+
);
134+
} else {
135+
console.log(`✓ FP rate within 10% target`);
136+
}
137+
}
138+
139+
function runThresholdSweep(
140+
nodes: Array<{ content: string; actor: string | null; confidence: number }>
141+
): void {
142+
console.log('\nThreshold sweep (autoAccept / review → FP%)');
143+
console.log('─'.repeat(50));
144+
console.log('autoAccept │ review │ accept% │ review% │ discard% │ FP%');
145+
console.log('───────────┼────────┼─────────┼─────────┼──────────┼─────');
146+
147+
const thresholds = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8];
148+
const reviewThresholds = [0.2, 0.3, 0.4];
149+
150+
for (const autoAccept of thresholds) {
151+
for (const review of reviewThresholds) {
152+
if (review >= autoAccept) continue;
153+
154+
let accepted = 0;
155+
let reviewed = 0;
156+
let discarded = 0;
157+
158+
for (const node of nodes) {
159+
const record = nodeToRecord(node);
160+
const result = scoreRecord(record, undefined, { autoAccept, review });
161+
if (result.action === 'auto_accept') accepted++;
162+
else if (result.action === 'review') reviewed++;
163+
else discarded++;
164+
}
165+
166+
const total = nodes.length;
167+
const fpRate =
168+
total > 0 ? (((reviewed + discarded) / total) * 100).toFixed(1) : '0.0';
169+
const acceptPct =
170+
total > 0 ? ((accepted / total) * 100).toFixed(1) : '0.0';
171+
const reviewPct =
172+
total > 0 ? ((reviewed / total) * 100).toFixed(1) : '0.0';
173+
const discardPct =
174+
total > 0 ? ((discarded / total) * 100).toFixed(1) : '0.0';
175+
176+
const marker = parseFloat(fpRate) <= 10 ? ' ✓' : '';
177+
console.log(
178+
` ${autoAccept.toFixed(1)}${review.toFixed(1)}${acceptPct.padStart(5)}${reviewPct.padStart(5)}${discardPct.padStart(5)}${fpRate.padStart(5)}${marker}`
179+
);
180+
}
181+
}
182+
183+
console.log('\n✓ = FP rate ≤ 10% target');
184+
}

packages/provenant/src/cli/index.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import {
1616
logOverrideResolve,
1717
} from './commands/log-override.js';
1818
import { serve } from './commands/serve.js';
19+
import { calibrate } from './commands/calibrate.js';
1920

2021
const program = new Command();
2122

@@ -132,4 +133,21 @@ program
132133
.option('--db <path>', 'Database path', '.provenant/graph.db')
133134
.action(serve);
134135

136+
// Shadow mode calibration
137+
program
138+
.command('calibrate')
139+
.description(
140+
'Re-score existing nodes to calibrate confidence thresholds (shadow mode)'
141+
)
142+
.option('--db <path>', 'Database path', '.provenant/graph.db')
143+
.option('--since <date>', 'Only calibrate nodes after this date')
144+
.option('--auto-accept <threshold>', 'Auto-accept threshold to test', '0.7')
145+
.option('--review <threshold>', 'Review threshold to test', '0.4')
146+
.option(
147+
'--sweep',
148+
'Sweep all threshold combinations and show FP rates',
149+
false
150+
)
151+
.action(calibrate);
152+
135153
program.parse();

0 commit comments

Comments
 (0)