Skip to content

Commit 6701978

Browse files
author
StackMemory Bot (CLI)
committed
feat(harness): audit CLI, edit telemetry, sm_edit fuzzy MCP tool
Three immediate actions from harness review: 1. `stackmemory audit` — measures context overhead tokens from CLAUDE.md files, auto-memory, handoff, MCP schemas, hot stack. Supports --json for machine-readable output. 2. Edit telemetry — PostToolUse hook logs Edit/Write success/failure to edit_telemetry table via sqlite3 CLI. `stackmemory stats edits` shows success rate, top failure files, error types, daily trend. 3. `sm_edit` MCP tool — four-tier fuzzy matching (exact, whitespace- normalized, indentation-insensitive, line-level Levenshtein) as fallback when CC's Edit fails on whitespace mismatches.
1 parent b7985a2 commit 6701978

11 files changed

Lines changed: 1227 additions & 1 deletion

File tree

.claude/claude.json

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,16 @@
6969
"timeout": 30
7070
}
7171
]
72+
},
73+
{
74+
"matcher": "Edit|MultiEdit|Write",
75+
"hooks": [
76+
{
77+
"type": "command",
78+
"command": "# Log edit telemetry to .stackmemory/context.db\ndb_path=\"$CLAUDE_PROJECT_DIR/.stackmemory/context.db\"\nif [ ! -f \"$db_path\" ]; then exit 0; fi\ntool_name=\"${CLAUDE_TOOL_NAME:-unknown}\"\nfile_path=\"${CLAUDE_TOOL_INPUT_FILE_PATH:-}\"\noutput=\"${CLAUDE_TOOL_OUTPUT:-}\"\nsuccess=1\nerror_type=\"\"\nerror_msg=\"\"\nif echo \"$output\" | grep -qi 'not found in file\\|string to replace not found\\|could not find\\|multiple occurrences'; then\n success=0\n if echo \"$output\" | grep -qi 'not found in file\\|string to replace not found\\|could not find'; then\n error_type='string_not_found'\n elif echo \"$output\" | grep -qi 'multiple occurrences'; then\n error_type='multiple_matches'\n fi\n error_msg=$(echo \"$output\" | head -1 | cut -c1-200)\nfi\nsqlite3 \"$db_path\" \"INSERT INTO edit_telemetry (tool_name, file_path, success, error_type, error_message) VALUES ('$tool_name', '$(echo \"$file_path\" | sed \"s/'/''/g\")', $success, '$(echo \"$error_type\" | sed \"s/'/''/g\")', '$(echo \"$error_msg\" | sed \"s/'/''/g\")')\" 2>/dev/null\nexit 0",
79+
"timeout": 5
80+
}
81+
]
7282
}
7383
]
7484
}

docs/specs/HARNESS_REVIEW.md

Lines changed: 320 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import { describe, it, expect, beforeEach, vi } from 'vitest';
2+
import { Command } from 'commander';
3+
import { createAuditCommand } from '../audit.js';
4+
5+
describe('audit command', () => {
6+
let consoleSpy: { log: ReturnType<typeof vi.spyOn> };
7+
8+
beforeEach(() => {
9+
consoleSpy = {
10+
log: vi.spyOn(console, 'log').mockImplementation(() => {}),
11+
};
12+
});
13+
14+
it('outputs table and JSON with correct shape', async () => {
15+
// Table output
16+
const program = new Command();
17+
program.addCommand(createAuditCommand());
18+
await program.parseAsync(['node', 'stackmemory', 'audit']);
19+
expect(consoleSpy.log).toHaveBeenCalledWith(
20+
expect.stringContaining('TOTAL')
21+
);
22+
23+
// JSON output
24+
consoleSpy.log.mockClear();
25+
const program2 = new Command();
26+
program2.addCommand(createAuditCommand());
27+
await program2.parseAsync(['node', 'stackmemory', 'audit', '--json']);
28+
29+
const jsonCall = consoleSpy.log.mock.calls.find((call) => {
30+
try {
31+
return 'entries' in JSON.parse(call[0]);
32+
} catch {
33+
return false;
34+
}
35+
});
36+
expect(jsonCall).toBeDefined();
37+
const parsed = JSON.parse(jsonCall![0]);
38+
expect(Array.isArray(parsed.entries)).toBe(true);
39+
expect(typeof parsed.totalTokens).toBe('number');
40+
});
41+
});
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
2+
import { Command } from 'commander';
3+
import { createStatsCommand } from '../stats.js';
4+
import Database from 'better-sqlite3';
5+
import { mkdtempSync, rmSync, mkdirSync } from 'fs';
6+
import { join } from 'path';
7+
import { tmpdir } from 'os';
8+
9+
describe('stats command', () => {
10+
let consoleSpy: { log: ReturnType<typeof vi.spyOn> };
11+
let tempDir: string;
12+
let origCwd: () => string;
13+
14+
beforeEach(() => {
15+
consoleSpy = {
16+
log: vi.spyOn(console, 'log').mockImplementation(() => {}),
17+
};
18+
tempDir = mkdtempSync(join(tmpdir(), 'sm-stats-'));
19+
mkdirSync(join(tempDir, '.stackmemory'), { recursive: true });
20+
origCwd = process.cwd;
21+
process.cwd = () => tempDir;
22+
});
23+
24+
afterEach(() => {
25+
process.cwd = origCwd;
26+
rmSync(tempDir, { recursive: true, force: true });
27+
});
28+
29+
it('shows no-data message and stats with seeded data', async () => {
30+
// No table → no data message
31+
const db1 = new Database(join(tempDir, '.stackmemory', 'context.db'));
32+
db1.close();
33+
34+
const p1 = new Command();
35+
p1.addCommand(createStatsCommand());
36+
await p1.parseAsync(['node', 'stackmemory', 'stats', 'edits']);
37+
expect(consoleSpy.log).toHaveBeenCalledWith(
38+
expect.stringContaining('No edit telemetry data')
39+
);
40+
41+
// Seed data → shows stats
42+
consoleSpy.log.mockClear();
43+
const db2 = new Database(join(tempDir, '.stackmemory', 'context.db'));
44+
db2.exec(`
45+
CREATE TABLE IF NOT EXISTS edit_telemetry (
46+
id INTEGER PRIMARY KEY AUTOINCREMENT,
47+
timestamp INTEGER NOT NULL DEFAULT (unixepoch()),
48+
session_id TEXT, tool_name TEXT NOT NULL, file_path TEXT,
49+
success INTEGER NOT NULL DEFAULT 1, error_type TEXT, error_message TEXT
50+
);
51+
`);
52+
const now = Math.floor(Date.now() / 1000);
53+
const ins = db2.prepare(
54+
'INSERT INTO edit_telemetry (timestamp, tool_name, file_path, success, error_type, error_message) VALUES (?, ?, ?, ?, ?, ?)'
55+
);
56+
ins.run(now, 'Edit', '/src/foo.ts', 1, null, null);
57+
ins.run(now, 'Edit', '/src/foo.ts', 0, 'string_not_found', 'not found');
58+
db2.close();
59+
60+
const p2 = new Command();
61+
p2.addCommand(createStatsCommand());
62+
await p2.parseAsync(['node', 'stackmemory', 'stats', 'edits']);
63+
expect(consoleSpy.log).toHaveBeenCalledWith(
64+
expect.stringContaining('Success Rate')
65+
);
66+
});
67+
});

src/cli/commands/audit.ts

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
/**
2+
* Audit Command for StackMemory CLI
3+
* Measures total tokens injected into context before user's first message.
4+
*/
5+
6+
import { Command } from 'commander';
7+
import { existsSync, readFileSync } from 'fs';
8+
import { join } from 'path';
9+
import { homedir } from 'os';
10+
11+
// Token counting - use Anthropic's tokenizer with fallback
12+
let countTokens: (text: string) => number;
13+
try {
14+
const tokenizer = await import('@anthropic-ai/tokenizer');
15+
countTokens = tokenizer.countTokens;
16+
} catch {
17+
countTokens = (text: string) => Math.ceil(text.length / 3.5);
18+
}
19+
20+
interface AuditEntry {
21+
source: string;
22+
tokens: number;
23+
percent: number;
24+
}
25+
26+
function readFileSafe(filePath: string): string | null {
27+
try {
28+
if (existsSync(filePath)) {
29+
return readFileSync(filePath, 'utf-8');
30+
}
31+
} catch {
32+
// File not readable
33+
}
34+
return null;
35+
}
36+
37+
export function createAuditCommand(): Command {
38+
const audit = new Command('audit')
39+
.description(
40+
'Measure context overhead (tokens injected before first message)'
41+
)
42+
.option('--json', 'Output as JSON', false)
43+
.action(async (options) => {
44+
const projectRoot = process.cwd();
45+
const home = homedir();
46+
const entries: AuditEntry[] = [];
47+
48+
// 1. Global CLAUDE.md
49+
const globalClaudeMd = readFileSafe(join(home, '.claude', 'CLAUDE.md'));
50+
if (globalClaudeMd) {
51+
entries.push({
52+
source: '~/.claude/CLAUDE.md',
53+
tokens: countTokens(globalClaudeMd),
54+
percent: 0,
55+
});
56+
}
57+
58+
// 2. Project CLAUDE.md
59+
const projectClaudeMd = readFileSafe(join(projectRoot, 'CLAUDE.md'));
60+
if (projectClaudeMd) {
61+
entries.push({
62+
source: './CLAUDE.md',
63+
tokens: countTokens(projectClaudeMd),
64+
percent: 0,
65+
});
66+
}
67+
68+
// 3. Auto memory (MEMORY.md)
69+
// Derive the project memory path from project root
70+
const projectSlug = projectRoot.replace(/\//g, '-');
71+
const memoryPath = join(
72+
home,
73+
'.claude',
74+
'projects',
75+
projectSlug,
76+
'memory',
77+
'MEMORY.md'
78+
);
79+
const memoryMd = readFileSafe(memoryPath);
80+
if (memoryMd) {
81+
entries.push({
82+
source: 'auto-memory/MEMORY.md',
83+
tokens: countTokens(memoryMd),
84+
percent: 0,
85+
});
86+
}
87+
88+
// 4. Handoff file
89+
const handoffPath = join(projectRoot, '.stackmemory', 'handoff.md');
90+
const handoffMd = readFileSafe(handoffPath);
91+
if (handoffMd) {
92+
entries.push({
93+
source: '.stackmemory/handoff.md',
94+
tokens: countTokens(handoffMd),
95+
percent: 0,
96+
});
97+
}
98+
99+
// 5. MCP tool schemas
100+
try {
101+
const { MCPToolDefinitions } =
102+
await import('../../integrations/mcp/tool-definitions.js');
103+
const defs = new MCPToolDefinitions();
104+
const allTools = defs.getAllToolDefinitions();
105+
const schemasJson = JSON.stringify(allTools);
106+
entries.push({
107+
source: 'MCP tool schemas',
108+
tokens: countTokens(schemasJson),
109+
percent: 0,
110+
});
111+
} catch {
112+
// MCP not available
113+
}
114+
115+
// 6. Active context frames (hot stack)
116+
try {
117+
const dbPath = join(projectRoot, '.stackmemory', 'context.db');
118+
if (existsSync(dbPath)) {
119+
const { default: Database } = await import('better-sqlite3');
120+
const { FrameManager } = await import('../../core/context/index.js');
121+
const db = new Database(dbPath);
122+
const fm = new FrameManager(db, 'cli-project');
123+
const hotStack = fm.getHotStackContext();
124+
if (hotStack) {
125+
entries.push({
126+
source: 'Active frames (hot stack)',
127+
tokens: countTokens(hotStack),
128+
percent: 0,
129+
});
130+
}
131+
db.close();
132+
}
133+
} catch {
134+
// DB not available
135+
}
136+
137+
// Calculate totals and percentages
138+
const totalTokens = entries.reduce((sum, e) => sum + e.tokens, 0);
139+
for (const entry of entries) {
140+
entry.percent =
141+
totalTokens > 0
142+
? Math.round((entry.tokens / totalTokens) * 1000) / 10
143+
: 0;
144+
}
145+
146+
if (options.json) {
147+
console.log(JSON.stringify({ entries, totalTokens }, null, 2));
148+
return;
149+
}
150+
151+
// Table output
152+
console.log('\nContext Overhead Audit');
153+
console.log('─'.repeat(60));
154+
console.log(
155+
`${'Source'.padEnd(32)} ${'Tokens'.padStart(8)} ${'%'.padStart(7)}`
156+
);
157+
console.log('─'.repeat(60));
158+
159+
for (const entry of entries) {
160+
console.log(
161+
`${entry.source.padEnd(32)} ${String(entry.tokens).padStart(8)} ${(entry.percent + '%').padStart(7)}`
162+
);
163+
}
164+
165+
console.log('─'.repeat(60));
166+
console.log(
167+
`${'TOTAL'.padEnd(32)} ${String(totalTokens).padStart(8)} ${'100%'.padStart(7)}`
168+
);
169+
console.log('');
170+
});
171+
172+
return audit;
173+
}

0 commit comments

Comments
 (0)