Skip to content

Commit aa11d6c

Browse files
author
StackMemory Bot (CLI)
committed
feat(mcp): add cord/team handlers, auto-save service, and evals
- Add cord-handlers with cord tracing MCP tools - Add team-handlers for team management tools - Register new handlers in MCP server and tool definitions - Add auto-save daemon service - Add cord-trace Claude hook template - Add cord vs flat eval scripts and initial results - Add comprehensive tests for cord and team handlers
1 parent ea9b6a2 commit aa11d6c

14 files changed

Lines changed: 4049 additions & 1 deletion

scripts/evals/cord-vs-flat-eval.ts

Lines changed: 417 additions & 0 deletions
Large diffs are not rendered by default.

scripts/evals/results/cord-vs-flat-2026-02-23T22-29-44.json

Lines changed: 519 additions & 0 deletions
Large diffs are not rendered by default.

scripts/evals/results/cord-vs-flat-2026-02-23T22-29-44.md

Lines changed: 173 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
[
2+
{
3+
"id": "feature-profile-editor",
4+
"name": "Feature: User Profile Editor",
5+
"description": "4-step pipeline: validate schema -> build API endpoints -> create UI components -> write tests",
6+
"type": "feature_dev",
7+
"complexity": "medium",
8+
"steps": [
9+
{ "id": "validate", "action": "Validate user profile schema and permissions model" },
10+
{ "id": "api", "action": "Build CRUD API endpoints for user profiles", "depends_on": ["validate"] },
11+
{ "id": "ui", "action": "Create React profile editor components", "depends_on": ["api"] },
12+
{ "id": "tests", "action": "Write integration tests for profile CRUD flow", "depends_on": ["api", "ui"] }
13+
],
14+
"cord_transcript": [
15+
{ "tool": "cord_spawn", "args": { "goal": "Validate user profile schema", "prompt": "Review existing User model, define profile fields (name, bio, avatar, settings), check RBAC permissions" } },
16+
{ "tool": "cord_complete", "args": { "task_id": "T1", "result": "Schema: {name: string, bio: string(500), avatar: url, settings: {theme, notifications}}. RBAC: owner can edit all, admin can edit any, viewer read-only." } },
17+
{ "tool": "cord_spawn", "args": { "goal": "Build profile API endpoints", "prompt": "REST endpoints: GET/PUT /api/profile/:id, PATCH /api/profile/:id/settings", "blocked_by": ["T1"] } },
18+
{ "tool": "cord_complete", "args": { "task_id": "T2", "result": "Endpoints implemented: GET /api/profile/:id (200/404), PUT /api/profile/:id (200/400/403), PATCH /api/profile/:id/settings (200/400). Validation middleware added." } },
19+
{ "tool": "cord_spawn", "args": { "goal": "Create profile UI components", "prompt": "React components: ProfileEditor, AvatarUpload, SettingsPanel", "blocked_by": ["T2"] } },
20+
{ "tool": "cord_complete", "args": { "task_id": "T3", "result": "Components: ProfileEditor (form with validation), AvatarUpload (drag-drop + crop), SettingsPanel (toggle switches). All use profile API hooks." } },
21+
{ "tool": "cord_spawn", "args": { "goal": "Write integration tests", "prompt": "Test profile CRUD flow end-to-end", "blocked_by": ["T2", "T3"] } },
22+
{ "tool": "cord_tree", "args": { "include_results": true } }
23+
],
24+
"flat_transcript": [
25+
{ "tool": "create_task", "args": { "title": "Validate user profile schema", "description": "Review existing User model, define profile fields" } },
26+
{ "tool": "update_task_status", "args": { "task_id": "T1", "status": "completed", "notes": "Schema: {name, bio, avatar, settings}. RBAC: owner/admin/viewer." } },
27+
{ "tool": "create_task", "args": { "title": "Build profile API endpoints", "description": "REST endpoints. Schema from T1: {name: string, bio: string(500), avatar: url, settings: {theme, notifications}}. RBAC: owner can edit all, admin can edit any." } },
28+
{ "tool": "update_task_status", "args": { "task_id": "T2", "status": "completed", "notes": "GET/PUT/PATCH endpoints done" } },
29+
{ "tool": "create_task", "args": { "title": "Create profile UI components", "description": "ProfileEditor, AvatarUpload, SettingsPanel. API endpoints from T2: GET/PUT /profile/:id, PATCH /profile/:id/settings" } },
30+
{ "tool": "update_task_status", "args": { "task_id": "T3", "status": "completed", "notes": "Components done" } },
31+
{ "tool": "create_task", "args": { "title": "Write integration tests", "description": "Test CRUD flow. Must manually reference T2 and T3 results." } },
32+
{ "tool": "list_tasks", "args": {} }
33+
]
34+
},
35+
{
36+
"id": "debug-memory-leak",
37+
"name": "Debug: Memory Leak Investigation",
38+
"description": "Parallel investigation (profiler + log analysis) then fix + verify",
39+
"type": "complex_debug",
40+
"complexity": "high",
41+
"steps": [
42+
{ "id": "profiler", "action": "Run heap profiler to identify leak source" },
43+
{ "id": "logs", "action": "Analyze server logs for memory growth pattern" },
44+
{ "id": "fix", "action": "Implement fix based on profiler + log findings", "depends_on": ["profiler", "logs"] },
45+
{ "id": "verify", "action": "Verify fix with load test", "depends_on": ["fix"] }
46+
],
47+
"cord_transcript": [
48+
{ "tool": "cord_spawn", "args": { "goal": "Run heap profiler", "prompt": "Take heap snapshots at 1min intervals, identify objects with growing retention" } },
49+
{ "tool": "cord_spawn", "args": { "goal": "Analyze server logs", "prompt": "Search for RSS growth, GC pressure, unclosed connections in last 24h logs" } },
50+
{ "tool": "cord_complete", "args": { "task_id": "T1", "result": "Heap profiler: EventEmitter listeners on WebSocket connections never removed. 150 leaked listeners after 1h. Source: src/ws/handler.ts:47 missing removeListener in disconnect." } },
51+
{ "tool": "cord_complete", "args": { "task_id": "T2", "result": "Logs: RSS grows 2MB/min under 100 concurrent connections. GC runs every 30s but old-gen keeps growing. Correlates with WS reconnection spikes at :00/:30 marks." } },
52+
{ "tool": "cord_spawn", "args": { "goal": "Fix memory leak", "prompt": "Fix the WebSocket listener leak", "blocked_by": ["T1", "T2"] } },
53+
{ "tool": "cord_complete", "args": { "task_id": "T3", "result": "Fixed: Added removeAllListeners() in ws disconnect handler. Also added maxListeners warning at 10. RSS now stable at 180MB under load." } },
54+
{ "tool": "cord_spawn", "args": { "goal": "Verify fix with load test", "prompt": "Run 1h soak test with 200 connections", "blocked_by": ["T3"] } },
55+
{ "tool": "cord_tree", "args": { "include_results": true } }
56+
],
57+
"flat_transcript": [
58+
{ "tool": "create_task", "args": { "title": "Run heap profiler", "description": "Take heap snapshots" } },
59+
{ "tool": "create_task", "args": { "title": "Analyze server logs", "description": "Search for memory growth patterns" } },
60+
{ "tool": "update_task_status", "args": { "task_id": "T1", "status": "completed", "notes": "Leaked EventEmitter listeners on WS" } },
61+
{ "tool": "update_task_status", "args": { "task_id": "T2", "status": "completed", "notes": "RSS grows 2MB/min" } },
62+
{ "tool": "create_task", "args": { "title": "Fix memory leak", "description": "Based on T1 (listeners leak) and T2 (RSS growth). Must manually copy results here." } },
63+
{ "tool": "list_tasks", "args": {} },
64+
{ "tool": "update_task_status", "args": { "task_id": "T3", "status": "completed", "notes": "Fixed removeAllListeners" } },
65+
{ "tool": "create_task", "args": { "title": "Verify fix", "description": "Run soak test. Depends on T3 fix." } },
66+
{ "tool": "list_tasks", "args": {} }
67+
]
68+
},
69+
{
70+
"id": "refactor-auth-migration",
71+
"name": "Refactor: Auth System Migration",
72+
"description": "Nested decomposition: audit -> design -> parallel impl (JWT + session) -> migrate -> test",
73+
"type": "refactor",
74+
"complexity": "very_high",
75+
"steps": [
76+
{ "id": "audit", "action": "Audit current auth system usage across codebase" },
77+
{ "id": "design", "action": "Design new auth architecture", "depends_on": ["audit"] },
78+
{ "id": "impl-jwt", "action": "Implement JWT token service", "depends_on": ["design"] },
79+
{ "id": "impl-session", "action": "Implement session migration layer", "depends_on": ["design"] },
80+
{ "id": "migrate", "action": "Run data migration", "depends_on": ["impl-jwt", "impl-session"] },
81+
{ "id": "test", "action": "Run full auth regression suite", "depends_on": ["migrate"] }
82+
],
83+
"cord_transcript": [
84+
{ "tool": "cord_spawn", "args": { "goal": "Audit current auth system", "prompt": "Find all session-based auth usage: middleware, login flows, token refresh, logout" } },
85+
{ "tool": "cord_complete", "args": { "task_id": "T1", "result": "47 files use session auth. 12 middleware functions. 3 login flows (web, mobile, API). Session stored in Redis with 24h TTL." } },
86+
{ "tool": "cord_spawn", "args": { "goal": "Design new auth architecture", "prompt": "Design JWT + refresh token system", "blocked_by": ["T1"] } },
87+
{ "tool": "cord_complete", "args": { "task_id": "T2", "result": "Architecture: JWT access (15min) + refresh (7d) in httpOnly cookie. Middleware: verifyJWT -> extractUser. Migration: dual-mode period (2 weeks) accepting both session and JWT." } },
88+
{ "tool": "cord_fork", "args": { "goal": "Implement JWT token service", "prompt": "Build JWT sign/verify/refresh", "blocked_by": ["T2"], "parent_id": "T2" } },
89+
{ "tool": "cord_fork", "args": { "goal": "Implement session migration layer", "prompt": "Build session-to-JWT converter", "blocked_by": ["T2"], "parent_id": "T2" } },
90+
{ "tool": "cord_complete", "args": { "task_id": "T3", "result": "JWT service: sign(payload, 15min), verify(token), refresh(refreshToken, 7d). RS256 key rotation support." } },
91+
{ "tool": "cord_complete", "args": { "task_id": "T4", "result": "Migration layer: detectAuthType(req) -> session|jwt. convertSession(sessionId) -> {accessToken, refreshToken}. Dual-mode middleware." } },
92+
{ "tool": "cord_spawn", "args": { "goal": "Run data migration", "prompt": "Migrate active sessions to JWT", "blocked_by": ["T3", "T4"] } },
93+
{ "tool": "cord_complete", "args": { "task_id": "T5", "result": "Migration complete: 15,000 active sessions converted. 99.7% success rate. 45 sessions expired during migration (expected)." } },
94+
{ "tool": "cord_spawn", "args": { "goal": "Run auth regression suite", "prompt": "Full test coverage", "blocked_by": ["T5"] } },
95+
{ "tool": "cord_tree", "args": { "include_results": true } }
96+
],
97+
"flat_transcript": [
98+
{ "tool": "create_task", "args": { "title": "Audit current auth", "description": "Find session auth usage" } },
99+
{ "tool": "update_task_status", "args": { "task_id": "T1", "status": "completed" } },
100+
{ "tool": "create_task", "args": { "title": "Design new auth", "description": "JWT system. Audit found 47 files, 12 middleware, 3 login flows." } },
101+
{ "tool": "update_task_status", "args": { "task_id": "T2", "status": "completed" } },
102+
{ "tool": "create_task", "args": { "title": "Implement JWT service", "description": "Build JWT. Design: access 15min, refresh 7d, RS256." } },
103+
{ "tool": "create_task", "args": { "title": "Implement session migration", "description": "Build converter. Design: dual-mode 2 weeks." } },
104+
{ "tool": "update_task_status", "args": { "task_id": "T3", "status": "completed" } },
105+
{ "tool": "update_task_status", "args": { "task_id": "T4", "status": "completed" } },
106+
{ "tool": "create_task", "args": { "title": "Run migration", "description": "Depends on T3 and T4. Must check both are done." } },
107+
{ "tool": "list_tasks", "args": {} },
108+
{ "tool": "update_task_status", "args": { "task_id": "T5", "status": "completed" } },
109+
{ "tool": "create_task", "args": { "title": "Run regression tests", "description": "Depends on T5." } },
110+
{ "tool": "list_tasks", "args": {} }
111+
]
112+
},
113+
{
114+
"id": "hotfix-production-error",
115+
"name": "Hotfix: Production Error",
116+
"description": "Decision point (rollback vs fix-forward?) then execute chosen approach",
117+
"type": "bug_fix",
118+
"complexity": "high",
119+
"steps": [
120+
{ "id": "diagnose", "action": "Diagnose production error from logs and monitoring" },
121+
{ "id": "decide", "action": "Decision: rollback or fix-forward?", "depends_on": ["diagnose"] },
122+
{ "id": "execute", "action": "Execute chosen approach", "depends_on": ["decide"] },
123+
{ "id": "verify", "action": "Verify production is healthy", "depends_on": ["execute"] }
124+
],
125+
"cord_transcript": [
126+
{ "tool": "cord_spawn", "args": { "goal": "Diagnose production error", "prompt": "Check error logs, Sentry alerts, and metrics dashboards" } },
127+
{ "tool": "cord_complete", "args": { "task_id": "T1", "result": "Error: NullPointerException in PaymentService.processRefund() since deploy v2.14.0 (2h ago). Affects 12% of refund requests. Root cause: missing null check on optional coupon field added in v2.14.0." } },
128+
{ "tool": "cord_ask", "args": { "question": "Rollback or fix-forward for PaymentService NPE?", "options": ["rollback to v2.13.9 (safe, 5min downtime)", "fix-forward with null check patch (risky, 0 downtime)"], "blocked_by": ["T1"] } },
129+
{ "tool": "cord_complete", "args": { "task_id": "T2", "result": "fix-forward: the coupon field is needed for new feature launching tomorrow, rollback would block it" } },
130+
{ "tool": "cord_spawn", "args": { "goal": "Patch PaymentService with null check", "prompt": "Add null check for coupon field in processRefund()", "blocked_by": ["T2"] } },
131+
{ "tool": "cord_complete", "args": { "task_id": "T3", "result": "Patched: Added optional chaining on coupon?.discountAmount in processRefund(). Deploy v2.14.1 pushed. Error rate dropped to 0%." } },
132+
{ "tool": "cord_spawn", "args": { "goal": "Verify production health", "prompt": "Check error rate, refund success rate, monitoring dashboards", "blocked_by": ["T3"] } },
133+
{ "tool": "cord_tree", "args": { "include_results": true } }
134+
],
135+
"flat_transcript": [
136+
{ "tool": "create_task", "args": { "title": "Diagnose production error", "description": "Check logs and monitoring" } },
137+
{ "tool": "update_task_status", "args": { "task_id": "T1", "status": "completed", "notes": "NPE in PaymentService since v2.14.0" } },
138+
{ "tool": "create_task", "args": { "title": "Decision: rollback or fix-forward?", "description": "Options: 1) rollback v2.13.9 (5min downtime) 2) fix-forward null check (0 downtime). Diagnosis: missing null check on coupon field." } },
139+
{ "tool": "list_tasks", "args": {} },
140+
{ "tool": "update_task_status", "args": { "task_id": "T2", "status": "completed", "notes": "fix-forward chosen" } },
141+
{ "tool": "create_task", "args": { "title": "Patch PaymentService", "description": "Add null check. Decision was fix-forward. Must manually copy diagnosis context." } },
142+
{ "tool": "update_task_status", "args": { "task_id": "T3", "status": "completed" } },
143+
{ "tool": "create_task", "args": { "title": "Verify production", "description": "Check health after patch" } },
144+
{ "tool": "list_tasks", "args": {} }
145+
]
146+
}
147+
]
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
/**
2+
* Context Auto-Save Service
3+
* Periodically saves context checkpoints
4+
*/
5+
6+
import { existsSync } from 'fs';
7+
import { join } from 'path';
8+
import { execSync } from 'child_process';
9+
import { homedir } from 'os';
10+
import type { ContextServiceConfig } from '../daemon-config.js';
11+
12+
export interface ContextServiceState {
13+
lastSaveTime: number;
14+
saveCount: number;
15+
errors: string[];
16+
}
17+
18+
export class DaemonContextService {
19+
private config: ContextServiceConfig;
20+
private state: ContextServiceState;
21+
private intervalId?: NodeJS.Timeout;
22+
private isRunning = false;
23+
private onLog: (level: string, message: string, data?: unknown) => void;
24+
25+
constructor(
26+
config: ContextServiceConfig,
27+
onLog: (level: string, message: string, data?: unknown) => void
28+
) {
29+
this.config = config;
30+
this.onLog = onLog;
31+
this.state = {
32+
lastSaveTime: 0,
33+
saveCount: 0,
34+
errors: [],
35+
};
36+
}
37+
38+
start(): void {
39+
if (this.isRunning || !this.config.enabled) {
40+
return;
41+
}
42+
43+
this.isRunning = true;
44+
const intervalMs = this.config.interval * 60 * 1000;
45+
46+
this.onLog('INFO', 'Context service started', {
47+
interval: this.config.interval,
48+
});
49+
50+
// Initial save
51+
this.saveContext();
52+
53+
// Schedule periodic saves
54+
this.intervalId = setInterval(() => {
55+
this.saveContext();
56+
}, intervalMs);
57+
}
58+
59+
stop(): void {
60+
if (this.intervalId) {
61+
clearInterval(this.intervalId);
62+
this.intervalId = undefined;
63+
}
64+
this.isRunning = false;
65+
this.onLog('INFO', 'Context service stopped');
66+
}
67+
68+
getState(): ContextServiceState {
69+
return { ...this.state };
70+
}
71+
72+
updateConfig(config: Partial<ContextServiceConfig>): void {
73+
const wasRunning = this.isRunning;
74+
if (wasRunning) {
75+
this.stop();
76+
}
77+
78+
this.config = { ...this.config, ...config };
79+
80+
if (wasRunning && this.config.enabled) {
81+
this.start();
82+
}
83+
}
84+
85+
forceSave(): void {
86+
this.saveContext();
87+
}
88+
89+
private saveContext(): void {
90+
if (!this.isRunning) return;
91+
92+
try {
93+
const stackmemoryBin = this.getStackMemoryBin();
94+
95+
if (!stackmemoryBin) {
96+
this.onLog('WARN', 'StackMemory binary not found');
97+
return;
98+
}
99+
100+
const message =
101+
this.config.checkpointMessage ||
102+
`Auto-checkpoint #${this.state.saveCount + 1}`;
103+
const fullMessage = `${message} at ${new Date().toISOString()}`;
104+
105+
execSync(`"${stackmemoryBin}" context add observation "${fullMessage}"`, {
106+
timeout: 30000,
107+
encoding: 'utf8',
108+
stdio: 'pipe',
109+
});
110+
111+
this.state.saveCount++;
112+
this.state.lastSaveTime = Date.now();
113+
114+
this.onLog('INFO', 'Context saved', {
115+
saveCount: this.state.saveCount,
116+
});
117+
} catch (err) {
118+
const errorMsg = err instanceof Error ? err.message : String(err);
119+
120+
// Only log if not a transient error
121+
if (!errorMsg.includes('EBUSY') && !errorMsg.includes('EAGAIN')) {
122+
this.state.errors.push(errorMsg);
123+
this.onLog('WARN', 'Failed to save context', { error: errorMsg });
124+
125+
// Keep only last 10 errors
126+
if (this.state.errors.length > 10) {
127+
this.state.errors = this.state.errors.slice(-10);
128+
}
129+
}
130+
}
131+
}
132+
133+
private getStackMemoryBin(): string | null {
134+
const homeDir = homedir();
135+
136+
// Check common locations
137+
const locations = [
138+
join(homeDir, '.stackmemory', 'bin', 'stackmemory'),
139+
join(homeDir, '.local', 'bin', 'stackmemory'),
140+
'/usr/local/bin/stackmemory',
141+
'/opt/homebrew/bin/stackmemory',
142+
];
143+
144+
for (const loc of locations) {
145+
if (existsSync(loc)) {
146+
return loc;
147+
}
148+
}
149+
150+
// Try to find in PATH
151+
try {
152+
const result = execSync('which stackmemory', {
153+
encoding: 'utf8',
154+
stdio: 'pipe',
155+
}).trim();
156+
if (result && existsSync(result)) {
157+
return result;
158+
}
159+
} catch {
160+
// Not in PATH
161+
}
162+
163+
return null;
164+
}
165+
}

0 commit comments

Comments
 (0)