From b24b033122ca6f19644bc9a8cdb8b29bf3131629 Mon Sep 17 00:00:00 2001 From: Caleb Gross Date: Fri, 20 Mar 2026 10:46:51 -0400 Subject: [PATCH 1/5] refactor: extract shared stub LLM to internal/testutil/stubllm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the deterministic bag-of-words LLM provider from cmd/benchmark-quality/stubllm.go to a shared package so it can be reused by the upcoming lifecycle test suite. The benchmark-quality binary now imports from the shared package via thin re-exports. No behavior change — same vocabulary, embeddings, and response templates. Closes #249 Co-Authored-By: Claude Opus 4.6 (1M context) --- cmd/benchmark-quality/stubllm.go | 417 +------------------------- internal/testutil/stubllm/stubllm.go | 425 +++++++++++++++++++++++++++ 2 files changed, 434 insertions(+), 408 deletions(-) create mode 100644 internal/testutil/stubllm/stubllm.go diff --git a/cmd/benchmark-quality/stubllm.go b/cmd/benchmark-quality/stubllm.go index 74932ff..bded175 100644 --- a/cmd/benchmark-quality/stubllm.go +++ b/cmd/benchmark-quality/stubllm.go @@ -1,419 +1,20 @@ package main import ( - "context" - "encoding/json" - "fmt" - "hash/fnv" - "math" - "regexp" - "sort" - "strings" - "github.com/appsprout-dev/mnemonic/internal/llm" + "github.com/appsprout-dev/mnemonic/internal/testutil/stubllm" ) -const bowDims = 128 - -// vocabulary is the fixed bag-of-words vocabulary. Each word maps to a -// fixed dimension in the embedding space. Texts sharing words produce -// similar embeddings, making retrieval and association scores meaningful. -var vocabulary = map[string]int{ - // Languages & runtimes - "go": 0, "golang": 0, "python": 1, "javascript": 2, "typescript": 3, - "sql": 4, "bash": 5, "html": 6, "css": 7, "rust": 8, "java": 9, - // Infrastructure - "docker": 10, "git": 11, "linux": 12, "macos": 13, "systemd": 14, - "build": 15, "ci": 16, "deployment": 17, "deploy": 17, "kubernetes": 18, - // Dev activities - "debugging": 19, "debug": 19, "testing": 20, "test": 20, - "refactoring": 21, "refactor": 21, "configuration": 22, "config": 22, - "migration": 23, "documentation": 24, "review": 25, - // Code domains - "api": 26, "database": 27, "db": 27, "sqlite": 27, "postgres": 27, "postgresql": 27, - "filesystem": 28, "file": 28, "networking": 29, "network": 29, "connection": 29, - "security": 30, "authentication": 31, "auth": 31, "login": 31, "session": 31, - "performance": 32, "logging": 33, "log": 33, "ui": 34, "cli": 35, - "latency": 32, "throughput": 32, "slow": 32, "fast": 32, "speed": 32, - // Memory system - "memory": 36, "encoding": 37, "retrieval": 38, "embedding": 39, - "agent": 40, "llm": 41, "daemon": 42, "mcp": 43, "watcher": 44, - // Project context — with synonyms - "decision": 45, "chose": 45, "choose": 45, "selected": 45, "picked": 45, "choice": 45, - "error": 46, "bug": 46, "issue": 46, "problem": 46, "defect": 46, "incident": 46, "outage": 46, - "fix": 47, "fixed": 47, "resolve": 47, "resolved": 47, "solution": 47, "repair": 47, "patch": 47, "workaround": 47, - "insight": 48, "learning": 49, "planning": 50, "research": 51, - "dependency": 52, "library": 52, "module": 52, "schema": 53, "config_yaml": 54, - // Common nouns - "server": 55, "client": 56, "request": 57, "response": 58, - "cache": 59, "redis": 59, "memcached": 59, "queue": 60, "event": 61, "handler": 62, - "middleware": 63, "route": 64, "endpoint": 65, - "function": 66, "method": 67, "interface": 68, "struct": 69, - "channel": 70, "goroutine": 71, "mutex": 72, "context": 73, - // Actions - "create": 74, "read": 75, "update": 76, "delete": 77, - "query": 78, "search": 79, "filter": 80, "sort": 81, - "parse": 82, "validate": 83, "transform": 84, "serialize": 85, - // Qualities — with synonyms - "nil": 86, "null": 86, "panic": 87, "crash": 87, "failure": 87, "failed": 87, "broken": 87, - "timeout": 88, "retry": 89, "fallback": 90, "graceful": 91, - "concurrent": 92, "concurrency": 92, "pool": 92, "async": 93, "sync": 94, - // Specific to mnemonic - "spread": 95, "activation": 96, "association": 97, "salience": 98, - "consolidation": 99, "decay": 100, "dreaming": 101, "abstraction": 102, - "episoding": 103, "metacognition": 104, "perception": 105, - "fts5": 106, "bm25": 107, "cosine": 108, "similarity": 109, - // General — with synonyms - "pattern": 110, "principle": 111, "rule": 111, "guideline": 111, "axiom": 112, - "graph": 113, "node": 114, "edge": 115, - "threshold": 116, "weight": 117, "score": 118, - "architecture": 119, "design": 120, "tradeoff": 121, "tradeoffs": 121, - // System noise vocabulary (distinct region) - "chrome": 122, "browser": 122, "clipboard": 123, - "desktop": 124, "gnome": 124, "notification": 125, - "audio": 126, "pipewire": 126, "trash": 127, -} - -// wordSplitRe splits text into words for bag-of-words. -var wordSplitRe = regexp.MustCompile(`[a-zA-Z][a-z]*|[A-Z]+`) - -// semanticStubProvider implements llm.Provider with deterministic, -// semantically meaningful embeddings and template-based completions. -type semanticStubProvider struct{} - -func (s *semanticStubProvider) Complete(_ context.Context, req llm.CompletionRequest) (llm.CompletionResponse, error) { - if len(req.Messages) == 0 { - return llm.CompletionResponse{Content: "", StopReason: "stub"}, nil - } - - systemPrompt := "" - userContent := "" - for _, msg := range req.Messages { - if msg.Role == "system" { - systemPrompt = msg.Content - } - if msg.Role == "user" { - userContent = msg.Content - } - } - - // Detect which agent is calling based on system prompt. - var content string - switch { - case strings.Contains(systemPrompt, "memory encoder"): - content = stubEncodingResponse(userContent) - case strings.Contains(systemPrompt, "classifier"): - content = stubClassificationResponse(userContent) - case strings.Contains(systemPrompt, "episode synthesizer"): - content = stubEpisodicResponse(userContent) - case strings.Contains(systemPrompt, "insight generator"): - content = stubInsightResponse(userContent) - case strings.Contains(systemPrompt, "principle synthesizer"): - content = stubPrincipleResponse(userContent) - case strings.Contains(systemPrompt, "axiom synthesizer"): - content = stubAxiomResponse(userContent) - default: - content = "{}" - } +// semanticStubProvider wraps the shared stubllm.Provider for local use. +type semanticStubProvider = stubllm.Provider - return llm.CompletionResponse{Content: content, StopReason: "stop"}, nil -} - -func (s *semanticStubProvider) Embed(_ context.Context, text string) ([]float32, error) { - return bowEmbedding(text), nil -} - -func (s *semanticStubProvider) BatchEmbed(_ context.Context, texts []string) ([][]float32, error) { - results := make([][]float32, len(texts)) - for i, t := range texts { - results[i] = bowEmbedding(t) - } - return results, nil -} - -func (s *semanticStubProvider) Health(_ context.Context) error { - return nil -} +// vocabulary re-exports the shared vocabulary for len() checks in reports. +var vocabulary = stubllm.Vocabulary -func (s *semanticStubProvider) ModelInfo(_ context.Context) (llm.ModelMetadata, error) { - return llm.ModelMetadata{Name: "semantic-stub"}, nil -} - -// bowEmbedding creates a bag-of-words embedding. Words in the vocabulary -// activate their assigned dimension. Unknown words hash into the space. -// Result is normalized to a unit vector. +// bowEmbedding re-exports the shared embedding function for scenario use. func bowEmbedding(text string) []float32 { - emb := make([]float32, bowDims) - lower := strings.ToLower(text) - words := wordSplitRe.FindAllString(lower, -1) - - for _, w := range words { - if dim, ok := vocabulary[w]; ok { - emb[dim] += 1.0 - } else { - // Hash unknown words into the embedding space. - h := fnv.New32a() - _, _ = h.Write([]byte(w)) - dim := int(h.Sum32()) % bowDims - emb[dim] += 0.3 // weaker signal for unknown words - } - } - - // Normalize to unit vector. - var norm float64 - for _, v := range emb { - norm += float64(v) * float64(v) - } - norm = math.Sqrt(norm) - if norm > 0 { - for i := range emb { - emb[i] = float32(float64(emb[i]) / norm) - } - } - return emb -} - -// extractTopConcepts returns the top N vocabulary words found in text, -// ranked by frequency. -func extractTopConcepts(text string, n int) []string { - lower := strings.ToLower(text) - words := wordSplitRe.FindAllString(lower, -1) - - // Count vocabulary word hits (deduplicated by dimension to group synonyms). - type dimCount struct { - word string - dim int - count int - } - dimCounts := make(map[int]*dimCount) - for _, w := range words { - if dim, ok := vocabulary[w]; ok { - if dc, exists := dimCounts[dim]; exists { - dc.count++ - } else { - dimCounts[dim] = &dimCount{word: w, dim: dim, count: 1} - } - } - } - - // Sort by count descending. - sorted := make([]*dimCount, 0, len(dimCounts)) - for _, dc := range dimCounts { - sorted = append(sorted, dc) - } - sort.Slice(sorted, func(i, j int) bool { - return sorted[i].count > sorted[j].count - }) - - result := make([]string, 0, n) - for i := 0; i < n && i < len(sorted); i++ { - result = append(result, sorted[i].word) - } - return result -} - -// truncate returns the first n characters of s, or s if shorter. -func truncateStr(s string, n int) string { - if len(s) <= n { - return s - } - return s[:n] -} - -// deterministic salience based on vocabulary density. -func computeSalience(text string) float32 { - lower := strings.ToLower(text) - words := wordSplitRe.FindAllString(lower, -1) - if len(words) == 0 { - return 0.3 - } - vocabHits := 0 - for _, w := range words { - if _, ok := vocabulary[w]; ok { - vocabHits++ - } - } - ratio := float32(vocabHits) / float32(len(words)) - // Map ratio to salience range [0.3, 0.9]. - sal := 0.3 + ratio*0.6 - if sal > 0.9 { - sal = 0.9 - } - return sal -} - -// stubEncodingResponse returns a valid encoding_response JSON. -func stubEncodingResponse(userContent string) string { - concepts := extractTopConcepts(userContent, 8) - if len(concepts) == 0 { - concepts = []string{"general"} - } - - // Extract the actual content after "CONTENT:" marker. - content := userContent - if _, after, found := strings.Cut(userContent, "CONTENT:"); found { - content = strings.TrimSpace(after) - } - - summary := truncateStr(content, 100) - gist := truncateStr(content, 60) - salience := computeSalience(content) - - // Determine significance from salience. - significance := "routine" - if salience > 0.7 { - significance = "important" - } else if salience > 0.5 { - significance = "notable" - } - - resp := map[string]any{ - "gist": gist, - "summary": summary, - "content": truncateStr(content, 500), - "narrative": fmt.Sprintf("Observed: %s", truncateStr(content, 200)), - "concepts": concepts, - "structured_concepts": map[string]any{ - "topics": []any{}, - "entities": []any{}, - "actions": []any{}, - "causality": []any{}, - }, - "significance": significance, - "emotional_tone": "neutral", - "outcome": "ongoing", - "salience": salience, - } - - b, _ := json.Marshal(resp) - return string(b) -} - -// stubClassificationResponse returns a valid classification_response JSON. -func stubClassificationResponse(userContent string) string { - lower := strings.ToLower(userContent) - - relationType := "similar" - switch { - case strings.Contains(lower, "caused") || strings.Contains(lower, "because") || - strings.Contains(lower, "led to") || strings.Contains(lower, "result"): - relationType = "caused_by" - case strings.Contains(lower, "part of") || strings.Contains(lower, "component") || - strings.Contains(lower, "belongs"): - relationType = "part_of" - case strings.Contains(lower, "contradict") || strings.Contains(lower, "opposite") || - strings.Contains(lower, "however"): - relationType = "contradicts" - case strings.Contains(lower, "before") || strings.Contains(lower, "after") || - strings.Contains(lower, "then") || strings.Contains(lower, "later"): - relationType = "temporal" - case strings.Contains(lower, "reinforce") || strings.Contains(lower, "confirm") || - strings.Contains(lower, "support"): - relationType = "reinforces" - } - - resp := map[string]string{"relation_type": relationType} - b, _ := json.Marshal(resp) - return string(b) + return stubllm.BowEmbedding(text) } -// stubEpisodicResponse returns a valid episode_synthesis JSON. -func stubEpisodicResponse(userContent string) string { - concepts := extractTopConcepts(userContent, 5) - if len(concepts) == 0 { - concepts = []string{"session"} - } - - title := fmt.Sprintf("Session: %s", strings.Join(concepts, ", ")) - if len(title) > 80 { - title = title[:80] - } - - salience := computeSalience(userContent) - - resp := map[string]any{ - "title": title, - "summary": fmt.Sprintf("Work session involving %s", strings.Join(concepts, ", ")), - "narrative": fmt.Sprintf("During this session, activity was observed related to %s.", strings.Join(concepts, ", ")), - "emotional_tone": "neutral", - "outcome": "ongoing", - "concepts": concepts, - "salience": salience, - } - - b, _ := json.Marshal(resp) - return string(b) -} - -// stubInsightResponse returns a valid insight_response JSON. -func stubInsightResponse(userContent string) string { - concepts := extractTopConcepts(userContent, 6) - - // Only generate insight if there's meaningful concept overlap. - hasInsight := len(concepts) >= 3 - - resp := map[string]any{ - "has_insight": hasInsight, - "title": "", - "insight": "", - "concepts": concepts, - "confidence": 0.0, - } - - if hasInsight { - resp["title"] = fmt.Sprintf("Connection: %s", strings.Join(concepts[:3], " + ")) - resp["insight"] = fmt.Sprintf("These memories share a pattern around %s, suggesting a recurring theme in the workflow.", strings.Join(concepts, ", ")) - resp["confidence"] = 0.7 - } - - b, _ := json.Marshal(resp) - return string(b) -} - -// stubPrincipleResponse returns a valid principle_response JSON. -func stubPrincipleResponse(userContent string) string { - concepts := extractTopConcepts(userContent, 5) - - hasPrinciple := len(concepts) >= 2 - - resp := map[string]any{ - "has_principle": hasPrinciple, - "title": "", - "principle": "", - "concepts": concepts, - "confidence": 0.0, - } - - if hasPrinciple { - resp["title"] = fmt.Sprintf("Principle: %s", strings.Join(concepts[:2], " and ")) - resp["principle"] = fmt.Sprintf("When working with %s, consistent patterns emerge around %s.", concepts[0], strings.Join(concepts[1:], " and ")) - resp["confidence"] = 0.6 - } - - b, _ := json.Marshal(resp) - return string(b) -} - -// stubAxiomResponse returns a valid axiom_response JSON. -func stubAxiomResponse(userContent string) string { - concepts := extractTopConcepts(userContent, 4) - - hasAxiom := len(concepts) >= 3 - - resp := map[string]any{ - "has_axiom": hasAxiom, - "title": "", - "axiom": "", - "concepts": concepts, - "confidence": 0.0, - } - - if hasAxiom { - resp["title"] = fmt.Sprintf("Axiom: %s", concepts[0]) - resp["axiom"] = fmt.Sprintf("Across all observed patterns, %s serves as a fundamental organizing principle.", concepts[0]) - resp["confidence"] = 0.5 - } - - b, _ := json.Marshal(resp) - return string(b) -} +// Ensure the shared provider satisfies llm.Provider at compile time. +var _ llm.Provider = (*stubllm.Provider)(nil) diff --git a/internal/testutil/stubllm/stubllm.go b/internal/testutil/stubllm/stubllm.go new file mode 100644 index 0000000..1b03d70 --- /dev/null +++ b/internal/testutil/stubllm/stubllm.go @@ -0,0 +1,425 @@ +// Package stubllm provides a deterministic, semantically meaningful LLM stub +// for integration testing. It implements llm.Provider using bag-of-words +// embeddings and template-based completions, producing consistent results +// that exercise encoding, retrieval, and association pipelines without +// requiring a real LLM. +package stubllm + +import ( + "context" + "encoding/json" + "fmt" + "hash/fnv" + "math" + "regexp" + "sort" + "strings" + + "github.com/appsprout-dev/mnemonic/internal/llm" +) + +// BowDims is the dimensionality of the bag-of-words embedding space. +const BowDims = 128 + +// Vocabulary is the fixed bag-of-words vocabulary. Each word maps to a +// fixed dimension in the embedding space. Texts sharing words produce +// similar embeddings, making retrieval and association scores meaningful. +var Vocabulary = map[string]int{ + // Languages & runtimes + "go": 0, "golang": 0, "python": 1, "javascript": 2, "typescript": 3, + "sql": 4, "bash": 5, "html": 6, "css": 7, "rust": 8, "java": 9, + // Infrastructure + "docker": 10, "git": 11, "linux": 12, "macos": 13, "systemd": 14, + "build": 15, "ci": 16, "deployment": 17, "deploy": 17, "kubernetes": 18, + // Dev activities + "debugging": 19, "debug": 19, "testing": 20, "test": 20, + "refactoring": 21, "refactor": 21, "configuration": 22, "config": 22, + "migration": 23, "documentation": 24, "review": 25, + // Code domains + "api": 26, "database": 27, "db": 27, "sqlite": 27, "postgres": 27, "postgresql": 27, + "filesystem": 28, "file": 28, "networking": 29, "network": 29, "connection": 29, + "security": 30, "authentication": 31, "auth": 31, "login": 31, "session": 31, + "performance": 32, "logging": 33, "log": 33, "ui": 34, "cli": 35, + "latency": 32, "throughput": 32, "slow": 32, "fast": 32, "speed": 32, + // Memory system + "memory": 36, "encoding": 37, "retrieval": 38, "embedding": 39, + "agent": 40, "llm": 41, "daemon": 42, "mcp": 43, "watcher": 44, + // Project context — with synonyms + "decision": 45, "chose": 45, "choose": 45, "selected": 45, "picked": 45, "choice": 45, + "error": 46, "bug": 46, "issue": 46, "problem": 46, "defect": 46, "incident": 46, "outage": 46, + "fix": 47, "fixed": 47, "resolve": 47, "resolved": 47, "solution": 47, "repair": 47, "patch": 47, "workaround": 47, + "insight": 48, "learning": 49, "planning": 50, "research": 51, + "dependency": 52, "library": 52, "module": 52, "schema": 53, "config_yaml": 54, + // Common nouns + "server": 55, "client": 56, "request": 57, "response": 58, + "cache": 59, "redis": 59, "memcached": 59, "queue": 60, "event": 61, "handler": 62, + "middleware": 63, "route": 64, "endpoint": 65, + "function": 66, "method": 67, "interface": 68, "struct": 69, + "channel": 70, "goroutine": 71, "mutex": 72, "context": 73, + // Actions + "create": 74, "read": 75, "update": 76, "delete": 77, + "query": 78, "search": 79, "filter": 80, "sort": 81, + "parse": 82, "validate": 83, "transform": 84, "serialize": 85, + // Qualities — with synonyms + "nil": 86, "null": 86, "panic": 87, "crash": 87, "failure": 87, "failed": 87, "broken": 87, + "timeout": 88, "retry": 89, "fallback": 90, "graceful": 91, + "concurrent": 92, "concurrency": 92, "pool": 92, "async": 93, "sync": 94, + // Specific to mnemonic + "spread": 95, "activation": 96, "association": 97, "salience": 98, + "consolidation": 99, "decay": 100, "dreaming": 101, "abstraction": 102, + "episoding": 103, "metacognition": 104, "perception": 105, + "fts5": 106, "bm25": 107, "cosine": 108, "similarity": 109, + // General — with synonyms + "pattern": 110, "principle": 111, "rule": 111, "guideline": 111, "axiom": 112, + "graph": 113, "node": 114, "edge": 115, + "threshold": 116, "weight": 117, "score": 118, + "architecture": 119, "design": 120, "tradeoff": 121, "tradeoffs": 121, + // System noise vocabulary (distinct region) + "chrome": 122, "browser": 122, "clipboard": 123, + "desktop": 124, "gnome": 124, "notification": 125, + "audio": 126, "pipewire": 126, "trash": 127, +} + +// wordSplitRe splits text into words for bag-of-words. +var wordSplitRe = regexp.MustCompile(`[a-zA-Z][a-z]*|[A-Z]+`) + +// Provider implements llm.Provider with deterministic, +// semantically meaningful embeddings and template-based completions. +type Provider struct{} + +func (s *Provider) Complete(_ context.Context, req llm.CompletionRequest) (llm.CompletionResponse, error) { + if len(req.Messages) == 0 { + return llm.CompletionResponse{Content: "", StopReason: "stub"}, nil + } + + systemPrompt := "" + userContent := "" + for _, msg := range req.Messages { + if msg.Role == "system" { + systemPrompt = msg.Content + } + if msg.Role == "user" { + userContent = msg.Content + } + } + + // Detect which agent is calling based on system prompt. + var content string + switch { + case strings.Contains(systemPrompt, "memory encoder"): + content = stubEncodingResponse(userContent) + case strings.Contains(systemPrompt, "classifier"): + content = stubClassificationResponse(userContent) + case strings.Contains(systemPrompt, "episode synthesizer"): + content = stubEpisodicResponse(userContent) + case strings.Contains(systemPrompt, "insight generator"): + content = stubInsightResponse(userContent) + case strings.Contains(systemPrompt, "principle synthesizer"): + content = stubPrincipleResponse(userContent) + case strings.Contains(systemPrompt, "axiom synthesizer"): + content = stubAxiomResponse(userContent) + default: + content = "{}" + } + + return llm.CompletionResponse{Content: content, StopReason: "stop"}, nil +} + +func (s *Provider) Embed(_ context.Context, text string) ([]float32, error) { + return BowEmbedding(text), nil +} + +func (s *Provider) BatchEmbed(_ context.Context, texts []string) ([][]float32, error) { + results := make([][]float32, len(texts)) + for i, t := range texts { + results[i] = BowEmbedding(t) + } + return results, nil +} + +func (s *Provider) Health(_ context.Context) error { + return nil +} + +func (s *Provider) ModelInfo(_ context.Context) (llm.ModelMetadata, error) { + return llm.ModelMetadata{Name: "semantic-stub"}, nil +} + +// BowEmbedding creates a bag-of-words embedding. Words in the vocabulary +// activate their assigned dimension. Unknown words hash into the space. +// Result is normalized to a unit vector. +func BowEmbedding(text string) []float32 { + emb := make([]float32, BowDims) + lower := strings.ToLower(text) + words := wordSplitRe.FindAllString(lower, -1) + + for _, w := range words { + if dim, ok := Vocabulary[w]; ok { + emb[dim] += 1.0 + } else { + // Hash unknown words into the embedding space. + h := fnv.New32a() + _, _ = h.Write([]byte(w)) + dim := int(h.Sum32()) % BowDims + emb[dim] += 0.3 // weaker signal for unknown words + } + } + + // Normalize to unit vector. + var norm float64 + for _, v := range emb { + norm += float64(v) * float64(v) + } + norm = math.Sqrt(norm) + if norm > 0 { + for i := range emb { + emb[i] = float32(float64(emb[i]) / norm) + } + } + return emb +} + +// ExtractTopConcepts returns the top N vocabulary words found in text, +// ranked by frequency. +func ExtractTopConcepts(text string, n int) []string { + lower := strings.ToLower(text) + words := wordSplitRe.FindAllString(lower, -1) + + // Count vocabulary word hits (deduplicated by dimension to group synonyms). + type dimCount struct { + word string + dim int + count int + } + dimCounts := make(map[int]*dimCount) + for _, w := range words { + if dim, ok := Vocabulary[w]; ok { + if dc, exists := dimCounts[dim]; exists { + dc.count++ + } else { + dimCounts[dim] = &dimCount{word: w, dim: dim, count: 1} + } + } + } + + // Sort by count descending. + sorted := make([]*dimCount, 0, len(dimCounts)) + for _, dc := range dimCounts { + sorted = append(sorted, dc) + } + sort.Slice(sorted, func(i, j int) bool { + return sorted[i].count > sorted[j].count + }) + + result := make([]string, 0, n) + for i := 0; i < n && i < len(sorted); i++ { + result = append(result, sorted[i].word) + } + return result +} + +// truncateStr returns the first n characters of s, or s if shorter. +func truncateStr(s string, n int) string { + if len(s) <= n { + return s + } + return s[:n] +} + +// computeSalience returns a deterministic salience based on vocabulary density. +func computeSalience(text string) float32 { + lower := strings.ToLower(text) + words := wordSplitRe.FindAllString(lower, -1) + if len(words) == 0 { + return 0.3 + } + vocabHits := 0 + for _, w := range words { + if _, ok := Vocabulary[w]; ok { + vocabHits++ + } + } + ratio := float32(vocabHits) / float32(len(words)) + // Map ratio to salience range [0.3, 0.9]. + sal := 0.3 + ratio*0.6 + if sal > 0.9 { + sal = 0.9 + } + return sal +} + +// stubEncodingResponse returns a valid encoding_response JSON. +func stubEncodingResponse(userContent string) string { + concepts := ExtractTopConcepts(userContent, 8) + if len(concepts) == 0 { + concepts = []string{"general"} + } + + // Extract the actual content after "CONTENT:" marker. + content := userContent + if _, after, found := strings.Cut(userContent, "CONTENT:"); found { + content = strings.TrimSpace(after) + } + + summary := truncateStr(content, 100) + gist := truncateStr(content, 60) + salience := computeSalience(content) + + // Determine significance from salience. + significance := "routine" + if salience > 0.7 { + significance = "important" + } else if salience > 0.5 { + significance = "notable" + } + + resp := map[string]any{ + "gist": gist, + "summary": summary, + "content": truncateStr(content, 500), + "narrative": fmt.Sprintf("Observed: %s", truncateStr(content, 200)), + "concepts": concepts, + "structured_concepts": map[string]any{ + "topics": []any{}, + "entities": []any{}, + "actions": []any{}, + "causality": []any{}, + }, + "significance": significance, + "emotional_tone": "neutral", + "outcome": "ongoing", + "salience": salience, + } + + b, _ := json.Marshal(resp) + return string(b) +} + +// stubClassificationResponse returns a valid classification_response JSON. +func stubClassificationResponse(userContent string) string { + lower := strings.ToLower(userContent) + + relationType := "similar" + switch { + case strings.Contains(lower, "caused") || strings.Contains(lower, "because") || + strings.Contains(lower, "led to") || strings.Contains(lower, "result"): + relationType = "caused_by" + case strings.Contains(lower, "part of") || strings.Contains(lower, "component") || + strings.Contains(lower, "belongs"): + relationType = "part_of" + case strings.Contains(lower, "contradict") || strings.Contains(lower, "opposite") || + strings.Contains(lower, "however"): + relationType = "contradicts" + case strings.Contains(lower, "before") || strings.Contains(lower, "after") || + strings.Contains(lower, "then") || strings.Contains(lower, "later"): + relationType = "temporal" + case strings.Contains(lower, "reinforce") || strings.Contains(lower, "confirm") || + strings.Contains(lower, "support"): + relationType = "reinforces" + } + + resp := map[string]string{"relation_type": relationType} + b, _ := json.Marshal(resp) + return string(b) +} + +// stubEpisodicResponse returns a valid episode_synthesis JSON. +func stubEpisodicResponse(userContent string) string { + concepts := ExtractTopConcepts(userContent, 5) + if len(concepts) == 0 { + concepts = []string{"session"} + } + + title := fmt.Sprintf("Session: %s", strings.Join(concepts, ", ")) + if len(title) > 80 { + title = title[:80] + } + + salience := computeSalience(userContent) + + resp := map[string]any{ + "title": title, + "summary": fmt.Sprintf("Work session involving %s", strings.Join(concepts, ", ")), + "narrative": fmt.Sprintf("During this session, activity was observed related to %s.", strings.Join(concepts, ", ")), + "emotional_tone": "neutral", + "outcome": "ongoing", + "concepts": concepts, + "salience": salience, + } + + b, _ := json.Marshal(resp) + return string(b) +} + +// stubInsightResponse returns a valid insight_response JSON. +func stubInsightResponse(userContent string) string { + concepts := ExtractTopConcepts(userContent, 6) + + // Only generate insight if there's meaningful concept overlap. + hasInsight := len(concepts) >= 3 + + resp := map[string]any{ + "has_insight": hasInsight, + "title": "", + "insight": "", + "concepts": concepts, + "confidence": 0.0, + } + + if hasInsight { + resp["title"] = fmt.Sprintf("Connection: %s", strings.Join(concepts[:3], " + ")) + resp["insight"] = fmt.Sprintf("These memories share a pattern around %s, suggesting a recurring theme in the workflow.", strings.Join(concepts, ", ")) + resp["confidence"] = 0.7 + } + + b, _ := json.Marshal(resp) + return string(b) +} + +// stubPrincipleResponse returns a valid principle_response JSON. +func stubPrincipleResponse(userContent string) string { + concepts := ExtractTopConcepts(userContent, 5) + + hasPrinciple := len(concepts) >= 2 + + resp := map[string]any{ + "has_principle": hasPrinciple, + "title": "", + "principle": "", + "concepts": concepts, + "confidence": 0.0, + } + + if hasPrinciple { + resp["title"] = fmt.Sprintf("Principle: %s", strings.Join(concepts[:2], " and ")) + resp["principle"] = fmt.Sprintf("When working with %s, consistent patterns emerge around %s.", concepts[0], strings.Join(concepts[1:], " and ")) + resp["confidence"] = 0.6 + } + + b, _ := json.Marshal(resp) + return string(b) +} + +// stubAxiomResponse returns a valid axiom_response JSON. +func stubAxiomResponse(userContent string) string { + concepts := ExtractTopConcepts(userContent, 4) + + hasAxiom := len(concepts) >= 3 + + resp := map[string]any{ + "has_axiom": hasAxiom, + "title": "", + "axiom": "", + "concepts": concepts, + "confidence": 0.0, + } + + if hasAxiom { + resp["title"] = fmt.Sprintf("Axiom: %s", concepts[0]) + resp["axiom"] = fmt.Sprintf("Across all observed patterns, %s serves as a fundamental organizing principle.", concepts[0]) + resp["confidence"] = 0.5 + } + + b, _ := json.Marshal(resp) + return string(b) +} From 97c2c2edbe25ac04f2546b8b4b09e68c4a8991a4 Mon Sep 17 00:00:00 2001 From: Caleb Gross Date: Fri, 20 Mar 2026 10:46:58 -0400 Subject: [PATCH 2/5] chore: go fmt formatting fixes Co-Authored-By: Claude Opus 4.6 (1M context) --- internal/agent/retrieval/agent_test.go | 14 ++++----- internal/mcp/server_test.go | 8 ++--- internal/store/store.go | 42 +++++++++++++------------- internal/store/storetest/mock.go | 2 +- 4 files changed, 33 insertions(+), 33 deletions(-) diff --git a/internal/agent/retrieval/agent_test.go b/internal/agent/retrieval/agent_test.go index 8cfd097..eb3cd99 100644 --- a/internal/agent/retrieval/agent_test.go +++ b/internal/agent/retrieval/agent_test.go @@ -80,12 +80,12 @@ type mockStore struct { storetest.MockStore // Configurable function fields for methods used by the retrieval agent. - searchByFullTextFunc func(ctx context.Context, query string, limit int) ([]store.Memory, error) - searchByEmbeddingFunc func(ctx context.Context, embedding []float32, limit int) ([]store.RetrievalResult, error) - getAssociationsFunc func(ctx context.Context, memoryID string) ([]store.Association, error) - getMemoryFunc func(ctx context.Context, id string) (store.Memory, error) - incrementAccessFunc func(ctx context.Context, id string) error - getMemoryAttrsFunc func(ctx context.Context, memoryID string) (store.MemoryAttributes, error) + searchByFullTextFunc func(ctx context.Context, query string, limit int) ([]store.Memory, error) + searchByEmbeddingFunc func(ctx context.Context, embedding []float32, limit int) ([]store.RetrievalResult, error) + getAssociationsFunc func(ctx context.Context, memoryID string) ([]store.Association, error) + getMemoryFunc func(ctx context.Context, id string) (store.Memory, error) + incrementAccessFunc func(ctx context.Context, id string) error + getMemoryAttrsFunc func(ctx context.Context, memoryID string) (store.MemoryAttributes, error) getMemoryFeedbackScoresFunc func(ctx context.Context, memoryIDs []string) (map[string]float32, error) // Call tracking @@ -1211,7 +1211,7 @@ func TestRankResults_SourceAndFeedbackCombined(t *testing.T) { }, getMemoryFeedbackScoresFunc: func(_ context.Context, _ []string) (map[string]float32, error) { return map[string]float32{ - "fs-helpful": 1.0, // strong positive feedback + "fs-helpful": 1.0, // strong positive feedback "mcp-irrelevant": -1.0, // strong negative feedback }, nil }, diff --git a/internal/mcp/server_test.go b/internal/mcp/server_test.go index 4277673..1e589fc 100644 --- a/internal/mcp/server_test.go +++ b/internal/mcp/server_test.go @@ -147,10 +147,10 @@ func TestHandleToolsList(t *testing.T) { "ingest_project": false, "list_sessions": false, "recall_session": false, - "amend": false, - "check_memory": false, - "exclude_path": false, - "list_exclusions": false, + "amend": false, + "check_memory": false, + "exclude_path": false, + "list_exclusions": false, } for _, toolInterface := range toolsArray { diff --git a/internal/store/store.go b/internal/store/store.go index 027272d..8c6978f 100644 --- a/internal/store/store.go +++ b/internal/store/store.go @@ -49,27 +49,27 @@ type RawMemory struct { // Memory is an encoded, compressed memory unit. type Memory struct { - ID string `json:"id"` - RawID string `json:"raw_id"` - Timestamp time.Time `json:"timestamp"` - Type string `json:"type,omitempty"` // "decision", "error", "insight", "learning", "general", etc. - Content string `json:"content"` // compressed/encoded form - Summary string `json:"summary"` // one-liner - Concepts []string `json:"concepts"` // extracted concepts - Embedding []float32 `json:"embedding,omitempty"` - Salience float32 `json:"salience"` - AccessCount int `json:"access_count"` - LastAccessed time.Time `json:"last_accessed"` - State string `json:"state"` // "active", "fading", "archived", "merged" - GistOf []string `json:"gist_of,omitempty"` // if merged: source memory IDs - EpisodeID string `json:"episode_id,omitempty"` // link to parent episode - Source string `json:"source,omitempty"` // origin: "filesystem", "terminal", "clipboard", "mcp", "consolidation" - Project string `json:"project,omitempty"` - SessionID string `json:"session_id,omitempty"` - FeedbackScore int `json:"feedback_score"` // accumulated: helpful=+1, irrelevant=-1 - RecallSuppressed bool `json:"recall_suppressed"` // true when feedback_score <= suppression threshold - CreatedAt time.Time `json:"created_at"` - UpdatedAt time.Time `json:"updated_at"` + ID string `json:"id"` + RawID string `json:"raw_id"` + Timestamp time.Time `json:"timestamp"` + Type string `json:"type,omitempty"` // "decision", "error", "insight", "learning", "general", etc. + Content string `json:"content"` // compressed/encoded form + Summary string `json:"summary"` // one-liner + Concepts []string `json:"concepts"` // extracted concepts + Embedding []float32 `json:"embedding,omitempty"` + Salience float32 `json:"salience"` + AccessCount int `json:"access_count"` + LastAccessed time.Time `json:"last_accessed"` + State string `json:"state"` // "active", "fading", "archived", "merged" + GistOf []string `json:"gist_of,omitempty"` // if merged: source memory IDs + EpisodeID string `json:"episode_id,omitempty"` // link to parent episode + Source string `json:"source,omitempty"` // origin: "filesystem", "terminal", "clipboard", "mcp", "consolidation" + Project string `json:"project,omitempty"` + SessionID string `json:"session_id,omitempty"` + FeedbackScore int `json:"feedback_score"` // accumulated: helpful=+1, irrelevant=-1 + RecallSuppressed bool `json:"recall_suppressed"` // true when feedback_score <= suppression threshold + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` } // Association is a weighted link between two memories. diff --git a/internal/store/storetest/mock.go b/internal/store/storetest/mock.go index 9ac5a87..f310801 100644 --- a/internal/store/storetest/mock.go +++ b/internal/store/storetest/mock.go @@ -266,7 +266,7 @@ func (MockStore) ListProjects(context.Context) ([]string, error) { return nil, n // --- Runtime exclusions --- -func (MockStore) AddRuntimeExclusion(context.Context, string) error { return nil } +func (MockStore) AddRuntimeExclusion(context.Context, string) error { return nil } func (MockStore) RemoveRuntimeExclusion(context.Context, string) error { return nil } func (MockStore) ListRuntimeExclusions(context.Context) ([]string, error) { return nil, nil From 63b3490576c7295393a124c82c59597200262922 Mon Sep 17 00:00:00 2001 From: Caleb Gross Date: Fri, 20 Mar 2026 11:27:12 -0400 Subject: [PATCH 3/5] feat: add lifecycle simulation test suite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add cmd/lifecycle-test/, a standalone Go binary that simulates the full mnemonic user journey from fresh install through 3 months of usage. 8 phases exercise all cognitive agents against a real SQLite database with a deterministic stub LLM: 1. Install — DB init, schema verification 2. First Use — 10 seed memories, encoding, episoding, retrieval 3. Ingest — synthetic project ingestion with dedup verification 4. Daily Use — 250+ memories over 12 simulated days 5. Consolidation — 10 decay cycles, signal/noise separation 6. Dreaming — dream replay, abstraction, metacognition audit 7. Growth — scale to 500+ memories over 3 months 8. Long-term Health — aggressive decay, archival verification Includes terminal + markdown report output and `make lifecycle-test` Makefile target. Closes #250, closes #251, closes #252, closes #253, closes #254, closes #255, closes #256 Co-Authored-By: Claude Opus 4.6 (1M context) --- Makefile | 6 +- cmd/lifecycle-test/clock.go | 60 +++++ cmd/lifecycle-test/data.go | 295 ++++++++++++++++++++++ cmd/lifecycle-test/harness.go | 100 ++++++++ cmd/lifecycle-test/main.go | 135 ++++++++++ cmd/lifecycle-test/phase_consolidation.go | 113 +++++++++ cmd/lifecycle-test/phase_daily.go | 110 ++++++++ cmd/lifecycle-test/phase_dreaming.go | 104 ++++++++ cmd/lifecycle-test/phase_first_use.go | 101 ++++++++ cmd/lifecycle-test/phase_growth.go | 155 ++++++++++++ cmd/lifecycle-test/phase_ingest.go | 98 +++++++ cmd/lifecycle-test/phase_install.go | 70 +++++ cmd/lifecycle-test/phase_longterm.go | 107 ++++++++ cmd/lifecycle-test/phases.go | 140 ++++++++++ cmd/lifecycle-test/report.go | 153 +++++++++++ 15 files changed, 1746 insertions(+), 1 deletion(-) create mode 100644 cmd/lifecycle-test/clock.go create mode 100644 cmd/lifecycle-test/data.go create mode 100644 cmd/lifecycle-test/harness.go create mode 100644 cmd/lifecycle-test/main.go create mode 100644 cmd/lifecycle-test/phase_consolidation.go create mode 100644 cmd/lifecycle-test/phase_daily.go create mode 100644 cmd/lifecycle-test/phase_dreaming.go create mode 100644 cmd/lifecycle-test/phase_first_use.go create mode 100644 cmd/lifecycle-test/phase_growth.go create mode 100644 cmd/lifecycle-test/phase_ingest.go create mode 100644 cmd/lifecycle-test/phase_install.go create mode 100644 cmd/lifecycle-test/phase_longterm.go create mode 100644 cmd/lifecycle-test/phases.go create mode 100644 cmd/lifecycle-test/report.go diff --git a/Makefile b/Makefile index 0c989cf..80b3c72 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: build run clean test fmt vet start stop restart status watch install uninstall export backup insights dream-cycle mcp benchmark benchmark-quality benchmark-compare setup-hooks lint +.PHONY: build run clean test fmt vet start stop restart status watch install uninstall export backup insights dream-cycle mcp benchmark benchmark-quality benchmark-compare lifecycle-test setup-hooks lint BUILD_DIR=bin VERSION=0.24.0 # x-release-please-version @@ -129,6 +129,10 @@ benchmark-compare: go build $(TAGS) $(LDFLAGS) -o $(BUILD_DIR)/benchmark-quality ./cmd/benchmark-quality ./$(BUILD_DIR)/benchmark-quality --compare --cycles 5 --verbose +lifecycle-test: + go build $(TAGS) $(LDFLAGS) -o $(BUILD_DIR)/lifecycle-test ./cmd/lifecycle-test + ./$(BUILD_DIR)/lifecycle-test --verbose + # --- Lint --- lint: golangci-lint run diff --git a/cmd/lifecycle-test/clock.go b/cmd/lifecycle-test/clock.go new file mode 100644 index 0000000..2466bc1 --- /dev/null +++ b/cmd/lifecycle-test/clock.go @@ -0,0 +1,60 @@ +package main + +import ( + "context" + "fmt" + "time" + + "github.com/appsprout-dev/mnemonic/internal/store/sqlite" +) + +// SimClock provides virtual time for the lifecycle simulation. +// Memories are written with SimClock timestamps, and time can be +// advanced between phases to simulate days/weeks/months passing. +type SimClock struct { + current time.Time +} + +// NewSimClock creates a clock starting at a fixed "day 0" time. +func NewSimClock() *SimClock { + // Start at a fixed, reproducible time. + return &SimClock{ + current: time.Date(2026, 1, 1, 9, 0, 0, 0, time.UTC), + } +} + +// Now returns the current simulated time. +func (c *SimClock) Now() time.Time { + return c.current +} + +// Advance moves the clock forward by the given duration. +func (c *SimClock) Advance(d time.Duration) { + c.current = c.current.Add(d) +} + +// BackdateMemories adjusts all memory timestamps in the DB so they appear +// to have been created relative to the current simulated time. This makes +// age-based decay calculations in consolidation work correctly. +func (c *SimClock) BackdateMemories(ctx context.Context, s *sqlite.SQLiteStore, age time.Duration) error { + db := s.DB() + cutoff := c.current.Add(-age) + + // Backdate raw memories. + _, err := db.ExecContext(ctx, + `UPDATE raw_memories SET created_at = ? WHERE created_at > ?`, + cutoff, cutoff) + if err != nil { + return fmt.Errorf("backdating raw_memories: %w", err) + } + + // Backdate encoded memories. + _, err = db.ExecContext(ctx, + `UPDATE memories SET timestamp = ?, last_accessed = ? WHERE timestamp > ?`, + cutoff, cutoff, cutoff) + if err != nil { + return fmt.Errorf("backdating memories: %w", err) + } + + return nil +} diff --git a/cmd/lifecycle-test/data.go b/cmd/lifecycle-test/data.go new file mode 100644 index 0000000..30f73c9 --- /dev/null +++ b/cmd/lifecycle-test/data.go @@ -0,0 +1,295 @@ +package main + +import ( + "fmt" + "math/rand" + "time" + + "github.com/appsprout-dev/mnemonic/internal/store" +) + +// memoryTemplate is a template for generating synthetic memories. +type memoryTemplate struct { + Source string // "mcp", "filesystem", "terminal", "clipboard" + Type string // "decision", "error", "insight", "learning", etc. + Content string + Project string +} + +// seedMemories returns the 10 first-use memories for Phase 2. +func seedMemories(clock *SimClock) []store.RawMemory { + templates := []memoryTemplate{ + // 3 decisions + {Source: "mcp", Type: "decision", Content: "Chose SQLite with FTS5 for full-text search over Postgres because we need a local-first embedded database with no server dependency.", Project: "mnemonic"}, + {Source: "mcp", Type: "decision", Content: "Selected Go for the daemon implementation. The single binary deployment model and goroutine concurrency align with the agent architecture.", Project: "mnemonic"}, + {Source: "mcp", Type: "decision", Content: "Decided to use an event bus for inter-agent communication instead of direct function calls. This keeps agents decoupled and testable.", Project: "mnemonic"}, + // 3 errors + {Source: "mcp", Type: "error", Content: "Nil pointer panic in retrieval agent when searching with empty embedding vector. Added guard clause to check embedding length before cosine similarity calculation.", Project: "mnemonic"}, + {Source: "mcp", Type: "error", Content: "FTS5 index corruption after concurrent writes from encoding agent. Fixed by serializing writes through a mutex in the store layer.", Project: "mnemonic"}, + {Source: "mcp", Type: "error", Content: "Memory consolidation loop was merging unrelated memories because cosine similarity threshold was too low. Raised threshold from 0.5 to 0.7.", Project: "mnemonic"}, + // 2 insights + {Source: "mcp", Type: "insight", Content: "Spread activation with 3 hops produces the best retrieval quality. Beyond 3 hops, noise dominates signal in the association graph.", Project: "mnemonic"}, + {Source: "mcp", Type: "insight", Content: "MCP-sourced memories have 3x higher retrieval value than filesystem watcher memories. Should weight initial salience by source.", Project: "mnemonic"}, + // 2 learnings + {Source: "mcp", Type: "learning", Content: "Go's sql.NullString is needed for nullable TEXT columns in SQLite. Using plain string causes silent data corruption on NULL values.", Project: "mnemonic"}, + {Source: "mcp", Type: "learning", Content: "The gorilla/websocket library requires explicit ping/pong handling for connection keepalive. Without it, connections drop after 60 seconds idle.", Project: "mnemonic"}, + } + + memories := make([]store.RawMemory, len(templates)) + for i, t := range templates { + memories[i] = store.RawMemory{ + ID: fmt.Sprintf("seed-%02d", i+1), + Timestamp: clock.Now().Add(time.Duration(i) * time.Minute), + Source: t.Source, + Type: t.Type, + Content: t.Content, + HeuristicScore: 0.8, + InitialSalience: 0.8, + Project: t.Project, + CreatedAt: clock.Now().Add(time.Duration(i) * time.Minute), + } + } + return memories +} + +// dailyMemoryTemplates contains templates for generating realistic daily-use memories. +var dailyMemoryTemplates = []memoryTemplate{ + // MCP memories (signal) + {Source: "mcp", Type: "decision", Content: "Switched from polling-based watcher to fsnotify for filesystem events. Reduces CPU usage from 15%% to 0.2%% idle.", Project: "mnemonic"}, + {Source: "mcp", Type: "decision", Content: "Added bearer token authentication to the REST API. Previously any local process could read memories.", Project: "mnemonic"}, + {Source: "mcp", Type: "error", Content: "Dreaming agent infinite loop when association graph has cycles. Fixed by tracking visited nodes during spread activation.", Project: "mnemonic"}, + {Source: "mcp", Type: "error", Content: "Memory encoding timeout when LLM server is overloaded. Added 30s timeout with exponential backoff retry.", Project: "mnemonic"}, + {Source: "mcp", Type: "insight", Content: "Episoding works best with 10-minute windows. Shorter windows fragment related memories, longer windows merge unrelated sessions.", Project: "mnemonic"}, + {Source: "mcp", Type: "insight", Content: "Association strength follows a power law distribution. Top 5%% of associations carry 60%% of retrieval value.", Project: "mnemonic"}, + {Source: "mcp", Type: "learning", Content: "SQLite WAL mode is essential for concurrent read/write access. Without it, encoding agent blocks retrieval queries.", Project: "mnemonic"}, + {Source: "mcp", Type: "learning", Content: "The slog structured logger performs better than logrus for high-throughput event logging in the perception agent.", Project: "mnemonic"}, + {Source: "mcp", Type: "decision", Content: "Chose bag-of-words embeddings for the stub LLM provider. Simple, deterministic, and vocabulary-aware for meaningful test coverage.", Project: "mnemonic"}, + {Source: "mcp", Type: "error", Content: "Abstraction agent crashed when no patterns exist yet. Added early return guard for empty pattern list.", Project: "mnemonic"}, + {Source: "mcp", Type: "insight", Content: "Consolidation decay rate of 0.92 per cycle provides good balance between retaining signal and pruning noise over 30-day windows.", Project: "mnemonic"}, + {Source: "mcp", Type: "learning", Content: "Go build tags for platform-specific code must appear before the package declaration. Misplaced tags silently compile wrong code.", Project: "mnemonic"}, + + // Cross-project MCP memories + {Source: "mcp", Type: "decision", Content: "Adopted conventional commits for the felix-lm project to match mnemonic's release-please workflow.", Project: "felix-lm"}, + {Source: "mcp", Type: "error", Content: "PyTorch ROCm build fails on Ubuntu 24.04 with Python 3.14. Pinned to Python 3.12 for compatibility.", Project: "felix-lm"}, + {Source: "mcp", Type: "insight", Content: "Learning rate warmup of 500 steps consistently outperforms no-warmup across all model sizes tested.", Project: "felix-lm"}, + {Source: "mcp", Type: "learning", Content: "Unsloth 4-bit quantization reduces VRAM from 14GB to 6GB with only 2%% perplexity increase on the validation set.", Project: "felix-lm"}, + + // Filesystem noise + {Source: "filesystem", Type: "file_modified", Content: "Modified ~/.config/Code/User/settings.json: changed editor.fontSize from 14 to 15", Project: ""}, + {Source: "filesystem", Type: "file_created", Content: "Created /tmp/go-build-cache/ab/abc123.o: Go build artifact", Project: ""}, + {Source: "filesystem", Type: "file_modified", Content: "Modified ~/.local/share/gnome-shell/extensions/prefs.js: GNOME extension preferences update", Project: ""}, + {Source: "filesystem", Type: "file_created", Content: "Created ~/Downloads/screenshot-2026-01-05.png: desktop screenshot", Project: ""}, + {Source: "filesystem", Type: "file_modified", Content: "Modified ~/.bashrc: added export PATH=$PATH:~/go/bin", Project: ""}, + {Source: "filesystem", Type: "file_created", Content: "Created /tmp/mnemonic-bench-xyz/pipeline.db: benchmark temp database", Project: ""}, + + // Terminal noise + {Source: "terminal", Type: "command_executed", Content: "git status: On branch main, nothing to commit, working tree clean", Project: ""}, + {Source: "terminal", Type: "command_executed", Content: "make build: go build -o bin/mnemonic ./cmd/mnemonic", Project: ""}, + {Source: "terminal", Type: "command_executed", Content: "ls -la ~/Projects/: listed directory contents", Project: ""}, + {Source: "terminal", Type: "command_executed", Content: "top: system monitor showing 4.2GB RAM used, load average 1.2", Project: ""}, + {Source: "terminal", Type: "command_executed", Content: "docker ps: no containers running", Project: ""}, + + // Clipboard noise + {Source: "clipboard", Type: "clipboard_copy", Content: "https://pkg.go.dev/modernc.org/sqlite", Project: ""}, + {Source: "clipboard", Type: "clipboard_copy", Content: "func (s *SQLiteStore) WriteRaw(ctx context.Context, raw RawMemory) error {", Project: ""}, + {Source: "clipboard", Type: "clipboard_copy", Content: "SELECT id, summary, salience FROM memories WHERE state = 'active'", Project: ""}, + {Source: "clipboard", Type: "clipboard_copy", Content: "127.0.0.1:9999", Project: ""}, +} + +// generateDailyMemories creates a batch of memories for one simulated day. +// The distribution is approximately: 30% MCP, 50% noise (filesystem/terminal), 20% clipboard. +func generateDailyMemories(rng *rand.Rand, clock *SimClock, day int, count int) []store.RawMemory { + memories := make([]store.RawMemory, 0, count) + + for i := 0; i < count; i++ { + t := dailyMemoryTemplates[rng.Intn(len(dailyMemoryTemplates))] + + heuristic := float32(0.3) + salience := float32(0.3) + switch t.Source { + case "mcp": + heuristic = 0.7 + rng.Float32()*0.2 + salience = 0.7 + rng.Float32()*0.2 + case "filesystem", "terminal": + heuristic = 0.1 + rng.Float32()*0.3 + salience = 0.1 + rng.Float32()*0.3 + case "clipboard": + heuristic = 0.2 + rng.Float32()*0.3 + salience = 0.2 + rng.Float32()*0.3 + } + + ts := clock.Now().Add(time.Duration(i) * 2 * time.Minute) + memories = append(memories, store.RawMemory{ + ID: fmt.Sprintf("day%02d-%03d", day, i+1), + Timestamp: ts, + Source: t.Source, + Type: t.Type, + Content: t.Content, + HeuristicScore: heuristic, + InitialSalience: salience, + Project: t.Project, + CreatedAt: ts, + }) + } + + return memories +} + +// syntheticProjectFiles returns file contents for a small synthetic Go project. +func syntheticProjectFiles() map[string]string { + return map[string]string{ + "main.go": `package main + +import ( + "fmt" + "net/http" +) + +func main() { + http.HandleFunc("/health", healthHandler) + fmt.Println("Server starting on :8080") + if err := http.ListenAndServe(":8080", nil); err != nil { + fmt.Printf("Server failed: %v\n", err) + } +} + +func healthHandler(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + fmt.Fprint(w, "OK") +} +`, + "handler.go": `package main + +import ( + "encoding/json" + "net/http" +) + +type Response struct { + Status string ` + "`json:\"status\"`" + ` + Message string ` + "`json:\"message\"`" + ` +} + +func jsonResponse(w http.ResponseWriter, status int, resp Response) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + json.NewEncoder(w).Encode(resp) +} +`, + "store.go": `package main + +import ( + "database/sql" + "fmt" +) + +type Store struct { + db *sql.DB +} + +func NewStore(path string) (*Store, error) { + db, err := sql.Open("sqlite", path) + if err != nil { + return nil, fmt.Errorf("opening database: %w", err) + } + return &Store{db: db}, nil +} + +func (s *Store) Close() error { + return s.db.Close() +} +`, + "config.go": `package main + +import ( + "os" + "gopkg.in/yaml.v3" +) + +type Config struct { + Port int ` + "`yaml:\"port\"`" + ` + Database string ` + "`yaml:\"database\"`" + ` + LogLevel string ` + "`yaml:\"log_level\"`" + ` +} + +func LoadConfig(path string) (*Config, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, err + } + var cfg Config + if err := yaml.Unmarshal(data, &cfg); err != nil { + return nil, err + } + return &cfg, nil +} +`, + "middleware.go": `package main + +import ( + "log" + "net/http" + "time" +) + +func loggingMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + start := time.Now() + next.ServeHTTP(w, r) + log.Printf("%s %s %v", r.Method, r.URL.Path, time.Since(start)) + }) +} + +func authMiddleware(token string) func(http.Handler) http.Handler { + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Header.Get("Authorization") != "Bearer "+token { + http.Error(w, "unauthorized", http.StatusUnauthorized) + return + } + next.ServeHTTP(w, r) + }) + } +} +`, + "README.md": `# Sample Project + +This is a sample Go web server used for testing mnemonic's project ingestion pipeline. +It demonstrates a typical small Go project with HTTP handlers, middleware, configuration +loading, and a SQLite-backed store. The project structure follows standard Go conventions +with separate files for handlers, middleware, storage, and configuration. + +## Architecture + +The server exposes a REST API with health check and JSON response endpoints. Authentication +is handled via bearer token middleware. Configuration is loaded from a YAML file. The store +uses SQLite for persistence with proper connection lifecycle management. +`, + "docs/design.md": `# Design Document + +## Overview + +This service provides a lightweight REST API for managing resources. The architecture +prioritizes simplicity and local-first operation, using SQLite for storage and Go's +standard library for HTTP serving. + +## Key Decisions + +1. SQLite over Postgres: No external dependencies, single file database, good enough + for our expected scale of hundreds of concurrent users. +2. Standard library HTTP: No framework overhead, direct control over middleware chain, + well-understood error handling patterns. +3. YAML configuration: Human-readable, supports comments, widely used in Go ecosystem. + +## Performance Considerations + +The SQLite WAL mode enables concurrent reads during writes. Connection pooling is managed +by database/sql with sensible defaults. The middleware chain adds approximately 50 microseconds +per request for logging and authentication. +`, + "config.yaml": `port: 8080 +database: "./data.db" +log_level: "info" +`, + } +} diff --git a/cmd/lifecycle-test/harness.go b/cmd/lifecycle-test/harness.go new file mode 100644 index 0000000..495325c --- /dev/null +++ b/cmd/lifecycle-test/harness.go @@ -0,0 +1,100 @@ +package main + +import ( + "fmt" + "log/slog" + "os" + "path/filepath" + + "github.com/appsprout-dev/mnemonic/internal/agent/abstraction" + "github.com/appsprout-dev/mnemonic/internal/agent/consolidation" + "github.com/appsprout-dev/mnemonic/internal/agent/dreaming" + "github.com/appsprout-dev/mnemonic/internal/agent/encoding" + "github.com/appsprout-dev/mnemonic/internal/agent/episoding" + "github.com/appsprout-dev/mnemonic/internal/agent/metacognition" + "github.com/appsprout-dev/mnemonic/internal/agent/retrieval" + "github.com/appsprout-dev/mnemonic/internal/events" + "github.com/appsprout-dev/mnemonic/internal/llm" + "github.com/appsprout-dev/mnemonic/internal/store/sqlite" + "time" +) + +// Harness holds the shared state for all lifecycle phases. +type Harness struct { + Store *sqlite.SQLiteStore + LLM llm.Provider + Bus events.Bus + Log *slog.Logger + Clock *SimClock + TmpDir string + DBPath string + + Encoder *encoding.EncodingAgent + Episoder *episoding.EpisodingAgent + Consolidator *consolidation.ConsolidationAgent + Dreamer *dreaming.DreamingAgent + Abstractor *abstraction.AbstractionAgent + Metacog *metacognition.MetacognitionAgent + Retriever *retrieval.RetrievalAgent +} + +// NewHarness creates an isolated test environment with a temp DB and all agents. +func NewHarness(provider llm.Provider, log *slog.Logger) (*Harness, error) { + tmpDir, err := os.MkdirTemp("", "mnemonic-lifecycle-*") + if err != nil { + return nil, fmt.Errorf("creating temp dir: %w", err) + } + + dbPath := filepath.Join(tmpDir, "lifecycle.db") + s, err := sqlite.NewSQLiteStore(dbPath, 5000) + if err != nil { + _ = os.RemoveAll(tmpDir) + return nil, fmt.Errorf("creating store: %w", err) + } + + bus := events.NewInMemoryBus(100) + + h := &Harness{ + Store: s, + LLM: provider, + Bus: bus, + Log: log, + Clock: NewSimClock(), + TmpDir: tmpDir, + DBPath: dbPath, + } + + // Create agents with configs matching benchmark-quality defaults. + h.Encoder = encoding.NewEncodingAgentWithConfig(s, provider, log, encoding.DefaultConfig()) + h.Episoder = episoding.NewEpisodingAgent(s, provider, log, episoding.EpisodingConfig{ + EpisodeWindowSizeMin: 10, + MinEventsPerEpisode: 2, + PollingInterval: 10 * time.Second, + }) + h.Consolidator = consolidation.NewConsolidationAgent(s, provider, consolidation.DefaultConfig(), log) + h.Dreamer = dreaming.NewDreamingAgent(s, provider, dreaming.DreamingConfig{ + Interval: time.Hour, + BatchSize: 60, + SalienceThreshold: 0.3, + AssociationBoostFactor: 1.15, + NoisePruneThreshold: 0.15, + }, log) + h.Abstractor = abstraction.NewAbstractionAgent(s, provider, abstraction.AbstractionConfig{ + Interval: time.Hour, + MinStrength: 0.4, + MaxLLMCalls: 5, + }, log) + h.Metacog = metacognition.NewMetacognitionAgent(s, provider, metacognition.MetacognitionConfig{ + Interval: time.Hour, + }, log) + h.Retriever = retrieval.NewRetrievalAgent(s, provider, retrieval.DefaultConfig(), log) + + return h, nil +} + +// Cleanup removes the temp directory and closes resources. +func (h *Harness) Cleanup() { + _ = h.Bus.Close() + _ = h.Store.Close() + _ = os.RemoveAll(h.TmpDir) +} diff --git a/cmd/lifecycle-test/main.go b/cmd/lifecycle-test/main.go new file mode 100644 index 0000000..cabb029 --- /dev/null +++ b/cmd/lifecycle-test/main.go @@ -0,0 +1,135 @@ +package main + +import ( + "context" + "flag" + "fmt" + "log/slog" + "os" + "strings" + "time" + + "github.com/appsprout-dev/mnemonic/internal/config" + "github.com/appsprout-dev/mnemonic/internal/llm" + "github.com/appsprout-dev/mnemonic/internal/testutil/stubllm" +) + +var Version = "dev" + +func main() { + var ( + verbose bool + llmMode bool + configPath string + report string + phaseFlag string + skipFlag string + ) + + flag.BoolVar(&verbose, "verbose", false, "verbose output") + flag.BoolVar(&llmMode, "llm", false, "use real LLM provider (reads config.yaml)") + flag.StringVar(&configPath, "config", "config.yaml", "path to config.yaml (used with --llm)") + flag.StringVar(&report, "report", "", "output format: 'markdown' writes lifecycle-results.md") + flag.StringVar(&phaseFlag, "phase", "", "run a single phase by name (auto-seeds prerequisites)") + flag.StringVar(&skipFlag, "skip", "", "comma-separated phases to skip") + flag.Parse() + + logLevel := slog.LevelError + if verbose { + logLevel = slog.LevelDebug + } + log := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: logLevel})) + + // Create LLM provider. + var provider llm.Provider + llmLabel := "semantic-stub" + if llmMode { + cfg, err := config.Load(configPath) + if err != nil { + fmt.Fprintf(os.Stderr, "Error loading config: %v\n", err) + os.Exit(1) + } + if cfg.LLM.APIKey == "" { + fmt.Fprintln(os.Stderr, "Error: LLM_API_KEY environment variable is required for --llm mode") + os.Exit(1) + } + provider = llm.NewLMStudioProvider( + cfg.LLM.Endpoint, + cfg.LLM.ChatModel, + cfg.LLM.EmbeddingModel, + cfg.LLM.APIKey, + time.Duration(cfg.LLM.TimeoutSec)*time.Second, + cfg.LLM.MaxConcurrent, + ) + ctx := context.Background() + if err := provider.Health(ctx); err != nil { + fmt.Fprintf(os.Stderr, "Error: LLM health check failed: %v\n", err) + os.Exit(1) + } + llmLabel = cfg.LLM.ChatModel + } else { + provider = &stubllm.Provider{} + } + + // Parse skip list. + skipSet := make(map[string]bool) + if skipFlag != "" { + for _, s := range strings.Split(skipFlag, ",") { + skipSet[strings.TrimSpace(s)] = true + } + } + + // Build ordered phase list. + allPhases := []Phase{ + &PhaseInstall{}, + &PhaseFirstUse{}, + &PhaseIngest{}, + &PhaseDaily{}, + &PhaseConsolidation{}, + &PhaseDreaming{}, + &PhaseGrowth{}, + &PhaseLongterm{}, + } + + // Header. + fmt.Println() + fmt.Println(" Mnemonic Lifecycle Simulation") + fmt.Printf(" Version: %s | LLM: %s | Phases: %d\n", Version, llmLabel, len(allPhases)) + fmt.Println() + + ctx := context.Background() + + // Create harness. + h, err := NewHarness(provider, log) + if err != nil { + fmt.Fprintf(os.Stderr, "Error creating harness: %v\n", err) + os.Exit(1) + } + defer h.Cleanup() + + // Run phases. + results, err := RunPhases(ctx, h, allPhases, phaseFlag, skipSet, verbose) + if err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } + + // Print report. + PrintReport(results, verbose) + if report == "markdown" { + if err := WriteMarkdownReport(results, "lifecycle-results.md"); err != nil { + fmt.Fprintf(os.Stderr, "Error writing markdown report: %v\n", err) + } else { + fmt.Println(" Wrote lifecycle-results.md") + } + } + + // Exit with appropriate code. + for _, r := range results { + for _, a := range r.Assertions { + if !a.Passed { + os.Exit(1) + } + } + } +} diff --git a/cmd/lifecycle-test/phase_consolidation.go b/cmd/lifecycle-test/phase_consolidation.go new file mode 100644 index 0000000..7e4857c --- /dev/null +++ b/cmd/lifecycle-test/phase_consolidation.go @@ -0,0 +1,113 @@ +package main + +import ( + "context" + "fmt" + "time" +) + +// PhaseConsolidation runs consolidation cycles at the week 2 mark. +type PhaseConsolidation struct{} + +func (p *PhaseConsolidation) Name() string { return "consolidation" } + +func (p *PhaseConsolidation) Run(ctx context.Context, h *Harness, verbose bool) (*PhaseResult, error) { + result := &PhaseResult{ + Name: p.Name(), + Metrics: make(map[string]float64), + } + + // Advance to day 14 and backdate all existing memories. + h.Clock.Advance(7 * 24 * time.Hour) + if err := h.Clock.BackdateMemories(ctx, h.Store, 14*24*time.Hour); err != nil { + return result, fmt.Errorf("backdating memories: %w", err) + } + + // Count pre-consolidation state. + preStats, err := h.Store.GetStatistics(ctx) + if err != nil { + return result, fmt.Errorf("pre-consolidation stats: %w", err) + } + + if verbose { + fmt.Printf("\n Pre-consolidation: %d total, %d active, %d fading, %d archived\n", + preStats.TotalMemories, preStats.ActiveMemories, preStats.FadingMemories, preStats.ArchivedMemories) + } + + // Run 10 consolidation cycles with salience decay. + const cycles = 10 + const decayRate = float32(0.92) + + for i := 0; i < cycles; i++ { + // Apply decay. + allMems, err := h.Store.ListMemories(ctx, "", 2000, 0) + if err != nil { + return result, fmt.Errorf("listing for decay cycle %d: %w", i, err) + } + updates := make(map[string]float32, len(allMems)) + for _, m := range allMems { + updates[m.ID] = m.Salience * decayRate + } + if err := h.Store.BatchUpdateSalience(ctx, updates); err != nil { + return result, fmt.Errorf("batch decay cycle %d: %w", i, err) + } + + // Run consolidation. + report, err := h.Consolidator.RunOnce(ctx) + if err != nil { + if verbose { + fmt.Printf(" Consolidation cycle %d error: %v\n", i+1, err) + } + continue + } + + if verbose && (i == 0 || i == cycles-1) { + fmt.Printf(" Cycle %d: processed=%d, decayed=%d, fading=%d, archived=%d, patterns=%d\n", + i+1, report.MemoriesProcessed, report.MemoriesDecayed, + report.TransitionedFading, report.TransitionedArchived, report.PatternsExtracted) + } + } + + // Post-consolidation assertions. + postStats, err := h.Store.GetStatistics(ctx) + if err != nil { + return result, fmt.Errorf("post-consolidation stats: %w", err) + } + + result.Metrics["pre_active"] = float64(preStats.ActiveMemories) + result.Metrics["post_active"] = float64(postStats.ActiveMemories) + result.Metrics["post_fading"] = float64(postStats.FadingMemories) + result.Metrics["post_archived"] = float64(postStats.ArchivedMemories) + + // After 10 decay cycles, some memories should have transitioned. + totalTransitioned := postStats.FadingMemories + postStats.ArchivedMemories + result.AssertGT("some memories transitioned", totalTransitioned, 0) + + // Check patterns discovered. + patterns, err := h.Store.ListPatterns(ctx, "", 100) + if err != nil { + return result, fmt.Errorf("listing patterns: %w", err) + } + result.Metrics["patterns"] = float64(len(patterns)) + + // Signal retention: MCP-sourced memories should mostly survive. + mcpMems, err := h.Store.ListMemories(ctx, "active", 2000, 0) + if err != nil { + return result, fmt.Errorf("listing active memories: %w", err) + } + mcpActive := 0 + for _, m := range mcpMems { + if m.Source == "mcp" { + mcpActive++ + } + } + result.Metrics["mcp_active"] = float64(mcpActive) + + if verbose { + fmt.Printf(" Post-consolidation: %d total, %d active, %d fading, %d archived\n", + postStats.TotalMemories, postStats.ActiveMemories, postStats.FadingMemories, postStats.ArchivedMemories) + fmt.Printf(" Patterns discovered: %d, MCP memories still active: %d\n", len(patterns), mcpActive) + } + + return result, nil +} diff --git a/cmd/lifecycle-test/phase_daily.go b/cmd/lifecycle-test/phase_daily.go new file mode 100644 index 0000000..ee0bf14 --- /dev/null +++ b/cmd/lifecycle-test/phase_daily.go @@ -0,0 +1,110 @@ +package main + +import ( + "context" + "fmt" + "math/rand" + "time" + + "github.com/appsprout-dev/mnemonic/internal/agent/retrieval" + "github.com/appsprout-dev/mnemonic/internal/store" +) + +// PhaseDaily simulates 12 days of mixed usage (days 3-14). +type PhaseDaily struct{} + +func (p *PhaseDaily) Name() string { return "daily" } + +func (p *PhaseDaily) Run(ctx context.Context, h *Harness, verbose bool) (*PhaseResult, error) { + result := &PhaseResult{ + Name: p.Name(), + Metrics: make(map[string]float64), + } + + rng := rand.New(rand.NewSource(42)) // deterministic + totalWritten := 0 + + for day := 3; day <= 14; day++ { + h.Clock.Advance(16 * time.Hour) // advance to next day + + // Generate 20-25 memories per day. + count := 20 + rng.Intn(6) + memories := generateDailyMemories(rng, h.Clock, day, count) + + for _, raw := range memories { + if err := h.Store.WriteRaw(ctx, raw); err != nil { + return result, fmt.Errorf("writing memory day %d: %w", day, err) + } + } + totalWritten += len(memories) + + // Encode and episode after each day. + encoded, err := h.Encoder.EncodeAllPending(ctx) + if err != nil { + return result, fmt.Errorf("encoding day %d: %w", day, err) + } + + if err := h.Episoder.ProcessAllPending(ctx); err != nil { + return result, fmt.Errorf("episoding day %d: %w", day, err) + } + + if verbose && day%4 == 0 { + fmt.Printf("\n Day %d: wrote %d, encoded %d\n", day, len(memories), encoded) + } + + // Simulate retrieval + feedback every few days. + if day%3 == 0 { + qr, err := h.Retriever.Query(ctx, retrieval.QueryRequest{ + Query: "SQLite database decisions and errors", + MaxResults: 5, + }) + if err == nil && len(qr.Memories) > 0 { + // Record feedback for the query. + memIDs := make([]string, 0, len(qr.Memories)) + for _, m := range qr.Memories { + memIDs = append(memIDs, m.Memory.ID) + } + _ = h.Store.WriteRetrievalFeedback(ctx, store.RetrievalFeedback{ + QueryID: qr.QueryID, + QueryText: "SQLite database decisions and errors", + RetrievedIDs: memIDs, + Feedback: "helpful", + }) + } + } + + h.Clock.Advance(8 * time.Hour) // work day + } + + result.Metrics["total_written"] = float64(totalWritten) + + if verbose { + fmt.Printf(" Total: %d memories written over 12 days\n", totalWritten) + } + + // Assertions on accumulated state. + stats, err := h.Store.GetStatistics(ctx) + if err != nil { + return result, fmt.Errorf("getting statistics: %w", err) + } + + // Encoding dedup merges identical templates, so unique count is lower than written count. + result.AssertGE("total memories", stats.TotalMemories, 30) + result.AssertGE("episodes created", stats.TotalEpisodes, 5) + result.AssertGE("associations created", stats.TotalAssociations, 1) + result.Metrics["total_memories"] = float64(stats.TotalMemories) + result.Metrics["total_episodes"] = float64(stats.TotalEpisodes) + result.Metrics["total_associations"] = float64(stats.TotalAssociations) + + // Check that feedback was recorded. Use a very old "since" time. + epoch := time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC) + feedback, err := h.Store.ListRecentRetrievalFeedback(ctx, epoch, 10) + if err != nil { + return result, fmt.Errorf("listing feedback: %w", err) + } + result.Metrics["feedback_count"] = float64(len(feedback)) + // Feedback may be empty if retrieval returned no results on some days. + // This is an informational metric, not a hard assertion. + + return result, nil +} diff --git a/cmd/lifecycle-test/phase_dreaming.go b/cmd/lifecycle-test/phase_dreaming.go new file mode 100644 index 0000000..413c25e --- /dev/null +++ b/cmd/lifecycle-test/phase_dreaming.go @@ -0,0 +1,104 @@ +package main + +import ( + "context" + "fmt" + "time" +) + +// PhaseDreaming runs dreaming, abstraction, and metacognition at week 3-4. +type PhaseDreaming struct{} + +func (p *PhaseDreaming) Name() string { return "dreaming" } + +func (p *PhaseDreaming) Run(ctx context.Context, h *Harness, verbose bool) (*PhaseResult, error) { + result := &PhaseResult{ + Name: p.Name(), + Metrics: make(map[string]float64), + } + + // Advance to day 28. + h.Clock.Advance(14 * 24 * time.Hour) + + // Run dreaming. + dreamReport, err := h.Dreamer.RunOnce(ctx) + if err != nil { + if verbose { + fmt.Printf("\n Dreaming error (non-fatal): %v\n", err) + } + } + if dreamReport != nil { + result.Metrics["memories_replayed"] = float64(dreamReport.MemoriesReplayed) + result.Metrics["assocs_strengthened"] = float64(dreamReport.AssociationsStrengthened) + result.Metrics["new_assocs"] = float64(dreamReport.NewAssociationsCreated) + result.Metrics["cross_project_links"] = float64(dreamReport.CrossProjectLinks) + result.Metrics["insights_generated"] = float64(dreamReport.InsightsGenerated) + + if verbose { + fmt.Printf("\n Dream: replayed=%d, strengthened=%d, new_assocs=%d, insights=%d\n", + dreamReport.MemoriesReplayed, dreamReport.AssociationsStrengthened, + dreamReport.NewAssociationsCreated, dreamReport.InsightsGenerated) + } + } + + // Run abstraction. + absReport, err := h.Abstractor.RunOnce(ctx) + if err != nil { + if verbose { + fmt.Printf(" Abstraction error (non-fatal): %v\n", err) + } + } + if absReport != nil { + result.Metrics["principles_created"] = float64(absReport.PrinciplesCreated) + result.Metrics["axioms_created"] = float64(absReport.AxiomsCreated) + if verbose { + fmt.Printf(" Abstraction: principles=%d, axioms=%d, demoted=%d\n", + absReport.PrinciplesCreated, absReport.AxiomsCreated, absReport.AbstractionsDemoted) + } + } + + // Run metacognition. + metaReport, err := h.Metacog.RunOnce(ctx) + if err != nil { + if verbose { + fmt.Printf(" Metacognition error (non-fatal): %v\n", err) + } + } + if metaReport != nil { + result.Metrics["observations"] = float64(len(metaReport.Observations)) + if verbose { + fmt.Printf(" Metacognition: observations=%d, actions=%d\n", + len(metaReport.Observations), metaReport.ActionsPerformed) + } + } + + // Post-dreaming assertions. + stats, err := h.Store.GetStatistics(ctx) + if err != nil { + return result, fmt.Errorf("getting statistics: %w", err) + } + + result.Metrics["avg_assocs_per_memory"] = float64(stats.AvgAssociationsPerMem) + result.Metrics["total_associations"] = float64(stats.TotalAssociations) + + // Check abstractions exist. + abstractions, err := h.Store.ListAbstractions(ctx, 0, 100) + if err != nil { + return result, fmt.Errorf("listing abstractions: %w", err) + } + result.Metrics["total_abstractions"] = float64(len(abstractions)) + + // Check meta observations. + observations, err := h.Store.ListMetaObservations(ctx, "", 100) + if err != nil { + return result, fmt.Errorf("listing observations: %w", err) + } + result.Metrics["total_observations"] = float64(len(observations)) + + if verbose { + fmt.Printf(" Stats: associations=%d (avg %.1f/mem), abstractions=%d, observations=%d\n", + stats.TotalAssociations, stats.AvgAssociationsPerMem, len(abstractions), len(observations)) + } + + return result, nil +} diff --git a/cmd/lifecycle-test/phase_first_use.go b/cmd/lifecycle-test/phase_first_use.go new file mode 100644 index 0000000..ea90053 --- /dev/null +++ b/cmd/lifecycle-test/phase_first_use.go @@ -0,0 +1,101 @@ +package main + +import ( + "context" + "fmt" + "time" + + "github.com/appsprout-dev/mnemonic/internal/agent/retrieval" +) + +// PhaseFirstUse simulates the first day of memory creation and retrieval. +type PhaseFirstUse struct{} + +func (p *PhaseFirstUse) Name() string { return "first-use" } + +func (p *PhaseFirstUse) Run(ctx context.Context, h *Harness, verbose bool) (*PhaseResult, error) { + result := &PhaseResult{ + Name: p.Name(), + Metrics: make(map[string]float64), + } + + // Write 10 seed memories. + seeds := seedMemories(h.Clock) + for _, raw := range seeds { + if err := h.Store.WriteRaw(ctx, raw); err != nil { + return result, fmt.Errorf("writing seed memory %s: %w", raw.ID, err) + } + } + + if verbose { + fmt.Printf("\n Wrote %d seed memories\n", len(seeds)) + } + + // Encode all pending. + encoded, err := h.Encoder.EncodeAllPending(ctx) + if err != nil { + return result, fmt.Errorf("encoding: %w", err) + } + result.AssertEQ("encoded count", encoded, len(seeds)) + result.Metrics["encoded"] = float64(encoded) + + if verbose { + fmt.Printf(" Encoded %d memories\n", encoded) + } + + // Process episodes. + if err := h.Episoder.ProcessAllPending(ctx); err != nil { + return result, fmt.Errorf("episoding: %w", err) + } + + episodes, err := h.Store.ListEpisodes(ctx, "", 100, 0) + if err != nil { + return result, fmt.Errorf("listing episodes: %w", err) + } + result.AssertGE("episodes created", len(episodes), 1) + result.Metrics["episodes"] = float64(len(episodes)) + + // Verify all memories encoded with concepts and embeddings. + mems, err := h.Store.ListMemories(ctx, "", 100, 0) + if err != nil { + return result, fmt.Errorf("listing memories: %w", err) + } + result.AssertEQ("total memories", len(mems), len(seeds)) + + allHaveConcepts := true + allHaveEmbeddings := true + allActive := true + for _, m := range mems { + if len(m.Concepts) == 0 { + allHaveConcepts = false + } + if len(m.Embedding) == 0 { + allHaveEmbeddings = false + } + if m.State != "active" { + allActive = false + } + } + result.Assert("all have concepts", allHaveConcepts, "true", fmt.Sprintf("%v", allHaveConcepts)) + result.Assert("all have embeddings", allHaveEmbeddings, "true", fmt.Sprintf("%v", allHaveEmbeddings)) + result.Assert("all active state", allActive, "true", fmt.Sprintf("%v", allActive)) + + // Test retrieval. + queryResult, err := h.Retriever.Query(ctx, retrieval.QueryRequest{ + Query: "architectural decisions about database choice", + MaxResults: 5, + }) + if err != nil { + return result, fmt.Errorf("retrieval query: %w", err) + } + result.AssertGT("retrieval returns results", len(queryResult.Memories), 0) + result.Metrics["retrieval_results"] = float64(len(queryResult.Memories)) + + if verbose { + fmt.Printf(" Retrieval returned %d results for test query\n", len(queryResult.Memories)) + } + + // Advance clock to end of day 1. + h.Clock.Advance(8 * time.Hour) + return result, nil +} diff --git a/cmd/lifecycle-test/phase_growth.go b/cmd/lifecycle-test/phase_growth.go new file mode 100644 index 0000000..f26de4a --- /dev/null +++ b/cmd/lifecycle-test/phase_growth.go @@ -0,0 +1,155 @@ +package main + +import ( + "context" + "fmt" + "math/rand" + "time" + + "github.com/appsprout-dev/mnemonic/internal/agent/retrieval" +) + +// PhaseGrowth scales the system to 700-1000 memories over simulated months 1-3. +type PhaseGrowth struct{} + +func (p *PhaseGrowth) Name() string { return "growth" } + +func (p *PhaseGrowth) Run(ctx context.Context, h *Harness, verbose bool) (*PhaseResult, error) { + result := &PhaseResult{ + Name: p.Name(), + Metrics: make(map[string]float64), + } + + rng := rand.New(rand.NewSource(99)) + totalAdded := 0 + + // Simulate months 1-3: generate ~200 memories per month in weekly batches. + for month := 1; month <= 3; month++ { + for week := 0; week < 4; week++ { + h.Clock.Advance(7 * 24 * time.Hour) + + // ~50 memories per week. + count := 45 + rng.Intn(11) + day := 28 + (month-1)*28 + week*7 + memories := generateDailyMemories(rng, h.Clock, day, count) + + for _, raw := range memories { + if err := h.Store.WriteRaw(ctx, raw); err != nil { + return result, fmt.Errorf("writing memory month %d week %d: %w", month, week, err) + } + } + totalAdded += len(memories) + + // Encode and episode. + if _, err := h.Encoder.EncodeAllPending(ctx); err != nil { + return result, fmt.Errorf("encoding month %d week %d: %w", month, week, err) + } + if err := h.Episoder.ProcessAllPending(ctx); err != nil { + return result, fmt.Errorf("episoding month %d week %d: %w", month, week, err) + } + } + + // Run consolidation every 2 weeks (2 cycles per month). + for i := 0; i < 2; i++ { + allMems, err := h.Store.ListMemories(ctx, "", 5000, 0) + if err != nil { + return result, fmt.Errorf("listing for decay: %w", err) + } + updates := make(map[string]float32, len(allMems)) + for _, m := range allMems { + updates[m.ID] = m.Salience * 0.92 + } + if err := h.Store.BatchUpdateSalience(ctx, updates); err != nil { + return result, fmt.Errorf("batch decay: %w", err) + } + if _, err := h.Consolidator.RunOnce(ctx); err != nil { + if verbose { + fmt.Printf("\n Consolidation error month %d: %v\n", month, err) + } + } + } + + // Run dreaming + abstraction once per month. + if _, err := h.Dreamer.RunOnce(ctx); err != nil { + if verbose { + fmt.Printf(" Dreaming error month %d: %v\n", month, err) + } + } + if _, err := h.Abstractor.RunOnce(ctx); err != nil { + if verbose { + fmt.Printf(" Abstraction error month %d: %v\n", month, err) + } + } + + if verbose { + stats, _ := h.Store.GetStatistics(ctx) + fmt.Printf("\n Month %d: total=%d, active=%d, fading=%d, archived=%d, added=%d\n", + month, stats.TotalMemories, stats.ActiveMemories, stats.FadingMemories, stats.ArchivedMemories, totalAdded) + } + } + + result.Metrics["total_added"] = float64(totalAdded) + + // Final statistics. + stats, err := h.Store.GetStatistics(ctx) + if err != nil { + return result, fmt.Errorf("getting statistics: %w", err) + } + + result.Metrics["total_memories"] = float64(stats.TotalMemories) + result.Metrics["active_memories"] = float64(stats.ActiveMemories) + result.Metrics["fading_memories"] = float64(stats.FadingMemories) + result.Metrics["archived_memories"] = float64(stats.ArchivedMemories) + result.Metrics["total_associations"] = float64(stats.TotalAssociations) + + // Assertions. + // Encoding dedup merges identical templates, so unique count is lower than written count. + result.AssertGE("total memories >= 60", stats.TotalMemories, 60) + result.AssertLT("not all active", stats.ActiveMemories, stats.TotalMemories) + + // Retrieval quality test. + testQueries := []string{ + "SQLite database architecture decisions", + "error handling and bug fixes", + "memory encoding pipeline insights", + "filesystem watcher configuration", + "Go build and deployment", + } + + totalLatency := int64(0) + totalResults := 0 + for _, q := range testQueries { + start := time.Now() + qr, err := h.Retriever.Query(ctx, retrieval.QueryRequest{ + Query: q, + MaxResults: 5, + }) + latency := time.Since(start).Milliseconds() + totalLatency += latency + + if err == nil { + totalResults += len(qr.Memories) + } + } + + avgLatency := float64(totalLatency) / float64(len(testQueries)) + avgResults := float64(totalResults) / float64(len(testQueries)) + result.Metrics["avg_retrieval_latency_ms"] = avgLatency + result.Metrics["avg_retrieval_results"] = avgResults + + result.AssertGT("retrieval returns results", totalResults, 0) + + if verbose { + fmt.Printf(" Retrieval: avg latency=%.0fms, avg results=%.1f across %d queries\n", + avgLatency, avgResults, len(testQueries)) + } + + // Check abstraction hierarchy. + abstractions, err := h.Store.ListAbstractions(ctx, 0, 100) + if err != nil { + return result, fmt.Errorf("listing abstractions: %w", err) + } + result.Metrics["abstractions"] = float64(len(abstractions)) + + return result, nil +} diff --git a/cmd/lifecycle-test/phase_ingest.go b/cmd/lifecycle-test/phase_ingest.go new file mode 100644 index 0000000..cd01e6c --- /dev/null +++ b/cmd/lifecycle-test/phase_ingest.go @@ -0,0 +1,98 @@ +package main + +import ( + "context" + "fmt" + "os" + "path/filepath" + "time" + + "github.com/appsprout-dev/mnemonic/internal/ingest" +) + +// PhaseIngest simulates project ingestion on day 2. +type PhaseIngest struct{} + +func (p *PhaseIngest) Name() string { return "ingest" } + +func (p *PhaseIngest) Run(ctx context.Context, h *Harness, verbose bool) (*PhaseResult, error) { + result := &PhaseResult{ + Name: p.Name(), + Metrics: make(map[string]float64), + } + + h.Clock.Advance(16 * time.Hour) // advance to day 2 + + // Create synthetic project directory. + projectDir := filepath.Join(h.TmpDir, "sample-project") + if err := os.MkdirAll(filepath.Join(projectDir, "docs"), 0o755); err != nil { + return result, fmt.Errorf("creating project dir: %w", err) + } + + files := syntheticProjectFiles() + for name, content := range files { + path := filepath.Join(projectDir, name) + dir := filepath.Dir(path) + if err := os.MkdirAll(dir, 0o755); err != nil { + return result, fmt.Errorf("creating dir %s: %w", dir, err) + } + if err := os.WriteFile(path, []byte(content), 0o644); err != nil { + return result, fmt.Errorf("writing %s: %w", name, err) + } + } + + if verbose { + fmt.Printf("\n Created %d synthetic project files in %s\n", len(files), projectDir) + } + + // Run ingestion. + ingestResult, err := ingest.Run(ctx, ingest.Config{ + Dir: projectDir, + Project: "sample-project", + }, h.Store, h.Bus, h.Log) + if err != nil { + return result, fmt.Errorf("ingestion: %w", err) + } + + result.Metrics["files_found"] = float64(ingestResult.FilesFound) + result.Metrics["files_written"] = float64(ingestResult.FilesWritten) + result.Metrics["files_skipped"] = float64(ingestResult.FilesSkipped) + result.Metrics["duplicates_skipped"] = float64(ingestResult.DuplicatesSkipped) + + result.AssertGE("files written", ingestResult.FilesWritten, 3) + + if verbose { + fmt.Printf(" Ingested: %d found, %d written, %d skipped\n", + ingestResult.FilesFound, ingestResult.FilesWritten, ingestResult.FilesSkipped) + } + + // Encode ingested memories. + encoded, err := h.Encoder.EncodeAllPending(ctx) + if err != nil { + return result, fmt.Errorf("encoding ingested: %w", err) + } + result.Metrics["encoded"] = float64(encoded) + + if verbose { + fmt.Printf(" Encoded %d ingested memories\n", encoded) + } + + // Verify dedup: re-running ingest should produce zero new writes. + dedupResult, err := ingest.Run(ctx, ingest.Config{ + Dir: projectDir, + Project: "sample-project", + }, h.Store, h.Bus, h.Log) + if err != nil { + return result, fmt.Errorf("dedup ingest: %w", err) + } + result.AssertEQ("dedup: zero new writes", dedupResult.FilesWritten, 0) + + if verbose { + fmt.Printf(" Dedup check: %d new writes (expected 0)\n", dedupResult.FilesWritten) + } + + // Advance clock to end of day 2. + h.Clock.Advance(8 * time.Hour) + + return result, nil +} diff --git a/cmd/lifecycle-test/phase_install.go b/cmd/lifecycle-test/phase_install.go new file mode 100644 index 0000000..826c294 --- /dev/null +++ b/cmd/lifecycle-test/phase_install.go @@ -0,0 +1,70 @@ +package main + +import ( + "context" + "fmt" +) + +// PhaseInstall verifies that the database was correctly initialized. +type PhaseInstall struct{} + +func (p *PhaseInstall) Name() string { return "install" } + +func (p *PhaseInstall) Run(ctx context.Context, h *Harness, verbose bool) (*PhaseResult, error) { + result := &PhaseResult{ + Name: p.Name(), + Metrics: make(map[string]float64), + } + + // Verify tables exist. + db := h.Store.DB() + rows, err := db.QueryContext(ctx, `SELECT name FROM sqlite_master WHERE type='table' ORDER BY name`) + if err != nil { + return result, fmt.Errorf("querying tables: %w", err) + } + defer func() { _ = rows.Close() }() + + var tables []string + for rows.Next() { + var name string + if err := rows.Scan(&name); err != nil { + return result, fmt.Errorf("scanning table name: %w", err) + } + tables = append(tables, name) + } + if err := rows.Err(); err != nil { + return result, fmt.Errorf("iterating tables: %w", err) + } + + result.AssertGE("table count", len(tables), 15) + result.Metrics["tables"] = float64(len(tables)) + + if verbose { + fmt.Printf("\n Tables: %v\n", tables) + } + + // Verify FTS5 virtual table exists. + var ftsCount int + err = db.QueryRowContext(ctx, + `SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='memories_fts'`).Scan(&ftsCount) + if err != nil { + return result, fmt.Errorf("checking FTS5: %w", err) + } + result.AssertEQ("FTS5 table present", ftsCount, 1) + + // Verify zero state. + count, err := h.Store.CountMemories(ctx) + if err != nil { + return result, fmt.Errorf("counting memories: %w", err) + } + result.AssertEQ("zero memories", count, 0) + + stats, err := h.Store.GetStatistics(ctx) + if err != nil { + return result, fmt.Errorf("getting statistics: %w", err) + } + result.AssertEQ("zero episodes", stats.TotalEpisodes, 0) + result.AssertEQ("zero associations", stats.TotalAssociations, 0) + + return result, nil +} diff --git a/cmd/lifecycle-test/phase_longterm.go b/cmd/lifecycle-test/phase_longterm.go new file mode 100644 index 0000000..fa0a939 --- /dev/null +++ b/cmd/lifecycle-test/phase_longterm.go @@ -0,0 +1,107 @@ +package main + +import ( + "context" + "fmt" + "time" + + "github.com/appsprout-dev/mnemonic/internal/agent/retrieval" +) + +// PhaseLongterm runs aggressive consolidation and audits at the 3-month mark. +type PhaseLongterm struct{} + +func (p *PhaseLongterm) Name() string { return "longterm" } + +func (p *PhaseLongterm) Run(ctx context.Context, h *Harness, verbose bool) (*PhaseResult, error) { + result := &PhaseResult{ + Name: p.Name(), + Metrics: make(map[string]float64), + } + + // Advance to day 90. + h.Clock.Advance(14 * 24 * time.Hour) + + // Run 20 aggressive consolidation cycles. + const cycles = 20 + for i := 0; i < cycles; i++ { + allMems, err := h.Store.ListMemories(ctx, "", 5000, 0) + if err != nil { + return result, fmt.Errorf("listing for decay cycle %d: %w", i, err) + } + updates := make(map[string]float32, len(allMems)) + for _, m := range allMems { + updates[m.ID] = m.Salience * 0.90 // slightly more aggressive decay + } + if err := h.Store.BatchUpdateSalience(ctx, updates); err != nil { + return result, fmt.Errorf("batch decay cycle %d: %w", i, err) + } + + if _, err := h.Consolidator.RunOnce(ctx); err != nil { + if verbose && i == 0 { + fmt.Printf("\n Consolidation cycle %d error: %v\n", i+1, err) + } + } + } + + // Final metacognition audit. + metaReport, err := h.Metacog.RunOnce(ctx) + if err != nil { + if verbose { + fmt.Printf(" Metacognition audit error: %v\n", err) + } + } + if metaReport != nil { + result.Metrics["audit_observations"] = float64(len(metaReport.Observations)) + } + + // Final statistics. + stats, err := h.Store.GetStatistics(ctx) + if err != nil { + return result, fmt.Errorf("getting statistics: %w", err) + } + + result.Metrics["total_memories"] = float64(stats.TotalMemories) + result.Metrics["active_memories"] = float64(stats.ActiveMemories) + result.Metrics["fading_memories"] = float64(stats.FadingMemories) + result.Metrics["archived_memories"] = float64(stats.ArchivedMemories) + result.Metrics["storage_bytes"] = float64(stats.StorageSizeBytes) + + // Assertions. + result.AssertGT("some archived", stats.ArchivedMemories, 0) + result.AssertLT("active < total", stats.ActiveMemories, stats.TotalMemories) + + // Retrieval regression test — system should still work. + regressionQueries := []string{ + "database architecture decisions", + "bug fixes and error handling", + "memory retrieval performance", + } + + totalResults := 0 + for _, q := range regressionQueries { + qr, err := h.Retriever.Query(ctx, retrieval.QueryRequest{ + Query: q, + MaxResults: 5, + IncludeSuppressed: true, + }) + if err == nil { + totalResults += len(qr.Memories) + } + } + // After aggressive decay, some queries may return 0 results — this is expected. + // The assertion is that the system doesn't crash, not that all queries return results. + result.Metrics["regression_results"] = float64(totalResults) + + // DB size check. + result.Metrics["db_size_mb"] = float64(stats.StorageSizeBytes) / (1024 * 1024) + + if verbose { + fmt.Printf("\n Final: total=%d, active=%d, fading=%d, archived=%d\n", + stats.TotalMemories, stats.ActiveMemories, stats.FadingMemories, stats.ArchivedMemories) + fmt.Printf(" DB size: %.2f MB\n", float64(stats.StorageSizeBytes)/(1024*1024)) + fmt.Printf(" Regression: %d results across %d queries\n", totalResults, len(regressionQueries)) + } + + return result, nil +} diff --git a/cmd/lifecycle-test/phases.go b/cmd/lifecycle-test/phases.go new file mode 100644 index 0000000..f8a42e7 --- /dev/null +++ b/cmd/lifecycle-test/phases.go @@ -0,0 +1,140 @@ +package main + +import ( + "context" + "fmt" + "time" +) + +// Phase represents a single lifecycle simulation phase. +type Phase interface { + Name() string + Run(ctx context.Context, h *Harness, verbose bool) (*PhaseResult, error) +} + +// Assertion is a single pass/fail check within a phase. +type Assertion struct { + Name string + Passed bool + Expected string + Actual string +} + +// PhaseResult holds the outcome of a single phase. +type PhaseResult struct { + Name string + Duration time.Duration + Assertions []Assertion + Metrics map[string]float64 +} + +// Assert adds a pass/fail assertion to the result. +func (r *PhaseResult) Assert(name string, passed bool, expected, actual string) { + r.Assertions = append(r.Assertions, Assertion{ + Name: name, + Passed: passed, + Expected: expected, + Actual: actual, + }) +} + +// AssertGE adds an assertion that actual >= expected. +func (r *PhaseResult) AssertGE(name string, actual, expected int) { + r.Assert(name, actual >= expected, + fmt.Sprintf(">= %d", expected), + fmt.Sprintf("%d", actual)) +} + +// AssertEQ adds an assertion that actual == expected. +func (r *PhaseResult) AssertEQ(name string, actual, expected int) { + r.Assert(name, actual == expected, + fmt.Sprintf("%d", expected), + fmt.Sprintf("%d", actual)) +} + +// AssertGT adds an assertion that actual > expected. +func (r *PhaseResult) AssertGT(name string, actual, expected int) { + r.Assert(name, actual > expected, + fmt.Sprintf("> %d", expected), + fmt.Sprintf("%d", actual)) +} + +// AssertFloatGE adds an assertion that actual >= expected for float64. +func (r *PhaseResult) AssertFloatGE(name string, actual, expected float64) { + r.Assert(name, actual >= expected, + fmt.Sprintf(">= %.2f", expected), + fmt.Sprintf("%.2f", actual)) +} + +// AssertLT adds an assertion that actual < expected. +func (r *PhaseResult) AssertLT(name string, actual, expected int) { + r.Assert(name, actual < expected, + fmt.Sprintf("< %d", expected), + fmt.Sprintf("%d", actual)) +} + +// Passed returns true if all assertions passed. +func (r *PhaseResult) Passed() bool { + for _, a := range r.Assertions { + if !a.Passed { + return false + } + } + return true +} + +// RunPhases executes the phases according to the given filters. +// If phaseFlag is set, only that phase runs (prerequisites are auto-seeded). +// Phases in skipSet are skipped. +func RunPhases(ctx context.Context, h *Harness, phases []Phase, phaseFlag string, skipSet map[string]bool, verbose bool) ([]*PhaseResult, error) { + var results []*PhaseResult + + for _, p := range phases { + if phaseFlag != "" && p.Name() != phaseFlag { + // If targeting a specific phase, silently run prerequisites. + if !isPrerequisiteOf(p.Name(), phaseFlag, phases) { + continue + } + } + if skipSet[p.Name()] { + if verbose { + fmt.Printf(" [SKIP] %s\n", p.Name()) + } + continue + } + + fmt.Printf(" [....] %s", p.Name()) + start := time.Now() + result, err := p.Run(ctx, h, verbose) + if err != nil { + fmt.Printf("\r [FAIL] %s (%v)\n", p.Name(), err) + return results, fmt.Errorf("phase %s failed: %w", p.Name(), err) + } + result.Duration = time.Since(start) + + status := "PASS" + if !result.Passed() { + status = "FAIL" + } + fmt.Printf("\r [%s] %s (%dms)\n", status, p.Name(), result.Duration.Milliseconds()) + + results = append(results, result) + } + + return results, nil +} + +// isPrerequisiteOf returns true if candidate comes before target in the phase list. +func isPrerequisiteOf(candidate, target string, phases []Phase) bool { + candidateIdx := -1 + targetIdx := -1 + for i, p := range phases { + if p.Name() == candidate { + candidateIdx = i + } + if p.Name() == target { + targetIdx = i + } + } + return candidateIdx >= 0 && targetIdx >= 0 && candidateIdx < targetIdx +} diff --git a/cmd/lifecycle-test/report.go b/cmd/lifecycle-test/report.go new file mode 100644 index 0000000..96eff90 --- /dev/null +++ b/cmd/lifecycle-test/report.go @@ -0,0 +1,153 @@ +package main + +import ( + "fmt" + "os" + "sort" + "strings" +) + +const ( + colorReset = "\033[0m" + colorRed = "\033[31m" + colorGreen = "\033[32m" + colorYellow = "\033[33m" +) + +// PrintReport prints the lifecycle results to the terminal. +func PrintReport(results []*PhaseResult, verbose bool) { + fmt.Println() + fmt.Println(" ── Results ──────────────────────────────────────────") + fmt.Println() + + totalAssertions := 0 + totalPassed := 0 + totalFailed := 0 + + for _, r := range results { + passed := 0 + failed := 0 + for _, a := range r.Assertions { + if a.Passed { + passed++ + } else { + failed++ + } + } + totalAssertions += len(r.Assertions) + totalPassed += passed + totalFailed += failed + + status := fmt.Sprintf("%s✓%s", colorGreen, colorReset) + if failed > 0 { + status = fmt.Sprintf("%s✗%s", colorRed, colorReset) + } + + fmt.Printf(" %s %-20s %d/%d assertions %dms\n", + status, r.Name, passed, len(r.Assertions), r.Duration.Milliseconds()) + + // Show failures. + for _, a := range r.Assertions { + if !a.Passed { + fmt.Printf(" %s✗ %s: expected %s, got %s%s\n", + colorRed, a.Name, a.Expected, a.Actual, colorReset) + } + } + + // Show key metrics if verbose. + if verbose && len(r.Metrics) > 0 { + keys := sortedKeys(r.Metrics) + for _, k := range keys { + fmt.Printf(" %s = %.1f\n", k, r.Metrics[k]) + } + } + } + + fmt.Println() + fmt.Println(" ── Summary ─────────────────────────────────────────") + fmt.Printf(" Phases: %d | Assertions: %d passed, %d failed | Total: %d\n", + len(results), totalPassed, totalFailed, totalAssertions) + + if totalFailed == 0 { + fmt.Printf("\n %s ALL PASSED %s\n\n", colorGreen, colorReset) + } else { + fmt.Printf("\n %s %d FAILURES %s\n\n", colorRed, totalFailed, colorReset) + } +} + +// WriteMarkdownReport writes a markdown report to the given path. +func WriteMarkdownReport(results []*PhaseResult, path string) error { + var sb strings.Builder + + sb.WriteString("# Mnemonic Lifecycle Simulation Results\n\n") + + // Summary table. + sb.WriteString("## Summary\n\n") + sb.WriteString("| Phase | Assertions | Duration | Status |\n") + sb.WriteString("|-------|-----------|----------|--------|\n") + + totalPassed := 0 + totalFailed := 0 + + for _, r := range results { + passed := 0 + failed := 0 + for _, a := range r.Assertions { + if a.Passed { + passed++ + } else { + failed++ + } + } + totalPassed += passed + totalFailed += failed + + status := "PASS" + if failed > 0 { + status = "FAIL" + } + + fmt.Fprintf(&sb, "| %s | %d/%d | %dms | %s |\n", + r.Name, passed, len(r.Assertions), r.Duration.Milliseconds(), status) + } + + fmt.Fprintf(&sb, "\n**Total: %d passed, %d failed**\n\n", totalPassed, totalFailed) + + // Phase details. + sb.WriteString("## Phase Details\n\n") + + for _, r := range results { + fmt.Fprintf(&sb, "### %s\n\n", r.Name) + + // Assertions. + for _, a := range r.Assertions { + mark := "x" + if !a.Passed { + mark = " " + } + fmt.Fprintf(&sb, "- [%s] %s (expected: %s, actual: %s)\n", + mark, a.Name, a.Expected, a.Actual) + } + + // Metrics. + if len(r.Metrics) > 0 { + sb.WriteString("\n**Metrics:**\n\n") + keys := sortedKeys(r.Metrics) + for _, k := range keys { + fmt.Fprintf(&sb, "- %s: %.2f\n", k, r.Metrics[k]) + } + } + sb.WriteString("\n") + } + + return os.WriteFile(path, []byte(sb.String()), 0o644) +} + +func sortedKeys(m map[string]float64) []string { + keys := make([]string, 0, len(m)) + for k := range m { + keys = append(keys, k) + } + sort.Strings(keys) + return keys +} From 9103a2c8cf1d52a1a66fee03626cfa1ac1540e03 Mon Sep 17 00:00:00 2001 From: Caleb Gross Date: Fri, 20 Mar 2026 11:48:55 -0400 Subject: [PATCH 4/5] feat: expand synthetic data catalogs for lifecycle test - Grow from 30 to 80+ unique memory templates across 3 projects (mnemonic, felix-lm, sample-project) - Split templates into mcpSignalTemplates and noiseTemplates for clearer source distribution control - Append day+index suffix to each generated memory to avoid encoding dedup on repeated templates - Add more filesystem, terminal, and clipboard noise variety Closes #257 Co-Authored-By: Claude Opus 4.6 (1M context) --- cmd/lifecycle-test/data.go | 98 +++++++++++++++++++++++++++---- cmd/lifecycle-test/phase_daily.go | 4 +- 2 files changed, 88 insertions(+), 14 deletions(-) diff --git a/cmd/lifecycle-test/data.go b/cmd/lifecycle-test/data.go index 30f73c9..51801bb 100644 --- a/cmd/lifecycle-test/data.go +++ b/cmd/lifecycle-test/data.go @@ -52,28 +52,66 @@ func seedMemories(clock *SimClock) []store.RawMemory { return memories } -// dailyMemoryTemplates contains templates for generating realistic daily-use memories. -var dailyMemoryTemplates = []memoryTemplate{ - // MCP memories (signal) +// mcpSignalTemplates contains high-value MCP memories across projects. +var mcpSignalTemplates = []memoryTemplate{ + // Mnemonic decisions {Source: "mcp", Type: "decision", Content: "Switched from polling-based watcher to fsnotify for filesystem events. Reduces CPU usage from 15%% to 0.2%% idle.", Project: "mnemonic"}, {Source: "mcp", Type: "decision", Content: "Added bearer token authentication to the REST API. Previously any local process could read memories.", Project: "mnemonic"}, + {Source: "mcp", Type: "decision", Content: "Chose bag-of-words embeddings for the stub LLM provider. Simple, deterministic, and vocabulary-aware for meaningful test coverage.", Project: "mnemonic"}, + {Source: "mcp", Type: "decision", Content: "Implemented event bus architecture for agent communication instead of direct function calls between cognitive agents.", Project: "mnemonic"}, + {Source: "mcp", Type: "decision", Content: "Used pure Go SQLite driver modernc.org/sqlite to avoid CGO dependency on Linux builds.", Project: "mnemonic"}, + {Source: "mcp", Type: "decision", Content: "Implemented graceful shutdown with context cancellation propagating through all agents and the API server.", Project: "mnemonic"}, + {Source: "mcp", Type: "decision", Content: "Chose FTS5 over FTS4 for full-text search because FTS5 supports BM25 ranking out of the box.", Project: "mnemonic"}, + {Source: "mcp", Type: "decision", Content: "Embedded the dashboard SPA directly in the Go binary using go:embed for zero-dependency deployment.", Project: "mnemonic"}, + // Mnemonic errors {Source: "mcp", Type: "error", Content: "Dreaming agent infinite loop when association graph has cycles. Fixed by tracking visited nodes during spread activation.", Project: "mnemonic"}, {Source: "mcp", Type: "error", Content: "Memory encoding timeout when LLM server is overloaded. Added 30s timeout with exponential backoff retry.", Project: "mnemonic"}, + {Source: "mcp", Type: "error", Content: "Abstraction agent crashed when no patterns exist yet. Added early return guard for empty pattern list.", Project: "mnemonic"}, + {Source: "mcp", Type: "error", Content: "Race condition in concurrent encoding when two goroutines claim the same raw memory. Added ClaimRawForEncoding with row-level locking.", Project: "mnemonic"}, + {Source: "mcp", Type: "error", Content: "Dashboard WebSocket connection drops after 60 seconds idle. Added ping/pong keepalive handler to gorilla/websocket.", Project: "mnemonic"}, + {Source: "mcp", Type: "error", Content: "FTS5 index corruption after concurrent writes from encoding agent. Fixed by serializing writes through a mutex.", Project: "mnemonic"}, + {Source: "mcp", Type: "error", Content: "Consolidation merge produced duplicate patterns when two similar memory clusters overlapped. Added dedup check before pattern creation.", Project: "mnemonic"}, + {Source: "mcp", Type: "error", Content: "Self-update on Windows failed because the binary was locked by the running process. Implemented rename-and-replace strategy.", Project: "mnemonic"}, + // Mnemonic insights {Source: "mcp", Type: "insight", Content: "Episoding works best with 10-minute windows. Shorter windows fragment related memories, longer windows merge unrelated sessions.", Project: "mnemonic"}, {Source: "mcp", Type: "insight", Content: "Association strength follows a power law distribution. Top 5%% of associations carry 60%% of retrieval value.", Project: "mnemonic"}, + {Source: "mcp", Type: "insight", Content: "Consolidation decay rate of 0.92 per cycle provides good balance between retaining signal and pruning noise over 30-day windows.", Project: "mnemonic"}, + {Source: "mcp", Type: "insight", Content: "MCP-sourced memories have 3x higher retrieval value than filesystem watcher memories based on feedback analysis.", Project: "mnemonic"}, + {Source: "mcp", Type: "insight", Content: "Spread activation with 3 hops produces the best retrieval quality. Beyond 3 hops noise dominates signal in the graph.", Project: "mnemonic"}, + {Source: "mcp", Type: "insight", Content: "Hybrid retrieval combining FTS5 and vector search with reciprocal rank fusion outperforms either method alone by 15%%.", Project: "mnemonic"}, + {Source: "mcp", Type: "insight", Content: "The reactor engine chain pattern works well for coordinating post-consolidation tasks like metacognition triggers.", Project: "mnemonic"}, + // Mnemonic learnings {Source: "mcp", Type: "learning", Content: "SQLite WAL mode is essential for concurrent read/write access. Without it, encoding agent blocks retrieval queries.", Project: "mnemonic"}, {Source: "mcp", Type: "learning", Content: "The slog structured logger performs better than logrus for high-throughput event logging in the perception agent.", Project: "mnemonic"}, - {Source: "mcp", Type: "decision", Content: "Chose bag-of-words embeddings for the stub LLM provider. Simple, deterministic, and vocabulary-aware for meaningful test coverage.", Project: "mnemonic"}, - {Source: "mcp", Type: "error", Content: "Abstraction agent crashed when no patterns exist yet. Added early return guard for empty pattern list.", Project: "mnemonic"}, - {Source: "mcp", Type: "insight", Content: "Consolidation decay rate of 0.92 per cycle provides good balance between retaining signal and pruning noise over 30-day windows.", Project: "mnemonic"}, {Source: "mcp", Type: "learning", Content: "Go build tags for platform-specific code must appear before the package declaration. Misplaced tags silently compile wrong code.", Project: "mnemonic"}, + {Source: "mcp", Type: "learning", Content: "Go sql.NullString is needed for nullable TEXT columns in SQLite. Using plain string causes silent data corruption on NULL values.", Project: "mnemonic"}, + {Source: "mcp", Type: "learning", Content: "The gorilla/websocket library requires explicit ping/pong handling for connection keepalive on the dashboard.", Project: "mnemonic"}, + {Source: "mcp", Type: "learning", Content: "SQLite PRAGMA journal_mode=WAL must be set per connection, not just once at database creation time.", Project: "mnemonic"}, + {Source: "mcp", Type: "learning", Content: "Go embed directive paths are relative to the source file, not the module root. Learned this debugging missing dashboard assets.", Project: "mnemonic"}, - // Cross-project MCP memories + // Felix-LM project (cross-project) {Source: "mcp", Type: "decision", Content: "Adopted conventional commits for the felix-lm project to match mnemonic's release-please workflow.", Project: "felix-lm"}, + {Source: "mcp", Type: "decision", Content: "Chose AdamW optimizer with cosine annealing schedule for the 100M parameter pretraining run.", Project: "felix-lm"}, + {Source: "mcp", Type: "decision", Content: "Implemented gradient accumulation of 4 micro-steps to simulate larger batch sizes on the RX 7800 XT.", Project: "felix-lm"}, {Source: "mcp", Type: "error", Content: "PyTorch ROCm build fails on Ubuntu 24.04 with Python 3.14. Pinned to Python 3.12 for compatibility.", Project: "felix-lm"}, + {Source: "mcp", Type: "error", Content: "Training loss spiked at step 12000 due to corrupt data shard. Added checksum validation to the data loader.", Project: "felix-lm"}, + {Source: "mcp", Type: "error", Content: "VRAM out-of-memory crash during HP sweep with batch size 32. Added automatic VRAM cap detection.", Project: "felix-lm"}, {Source: "mcp", Type: "insight", Content: "Learning rate warmup of 500 steps consistently outperforms no-warmup across all model sizes tested.", Project: "felix-lm"}, + {Source: "mcp", Type: "insight", Content: "Weight decay of 0.01 provides better generalization than 0.1 for the 100M model architecture.", Project: "felix-lm"}, + {Source: "mcp", Type: "insight", Content: "Mixed precision training with bfloat16 gives identical loss curves to float32 but uses 40%% less VRAM.", Project: "felix-lm"}, {Source: "mcp", Type: "learning", Content: "Unsloth 4-bit quantization reduces VRAM from 14GB to 6GB with only 2%% perplexity increase on the validation set.", Project: "felix-lm"}, + {Source: "mcp", Type: "learning", Content: "ROCm hipcc compiler requires explicit device targeting with PYTORCH_ROCM_ARCH=gfx1100 for the 7800 XT.", Project: "felix-lm"}, + {Source: "mcp", Type: "learning", Content: "The tokenizer's padding side must match the model architecture. GPT-style models need left padding for batch inference.", Project: "felix-lm"}, + + // Sample-project (third project for cross-project testing) + {Source: "mcp", Type: "decision", Content: "Chose chi router over gorilla/mux for the sample REST API because chi has better middleware composability.", Project: "sample-project"}, + {Source: "mcp", Type: "error", Content: "Connection pool exhaustion under load testing. Increased max idle connections from 5 to 25 in database/sql config.", Project: "sample-project"}, + {Source: "mcp", Type: "insight", Content: "Request logging middleware adds 50 microseconds per request. Acceptable overhead for the observability benefit.", Project: "sample-project"}, + {Source: "mcp", Type: "learning", Content: "Go http.Server ReadTimeout should be set to prevent slowloris attacks. Default zero timeout leaves connections open forever.", Project: "sample-project"}, +} +// noiseTemplates contains filesystem, terminal, and clipboard noise. +var noiseTemplates = []memoryTemplate{ // Filesystem noise {Source: "filesystem", Type: "file_modified", Content: "Modified ~/.config/Code/User/settings.json: changed editor.fontSize from 14 to 15", Project: ""}, {Source: "filesystem", Type: "file_created", Content: "Created /tmp/go-build-cache/ab/abc123.o: Go build artifact", Project: ""}, @@ -81,28 +119,64 @@ var dailyMemoryTemplates = []memoryTemplate{ {Source: "filesystem", Type: "file_created", Content: "Created ~/Downloads/screenshot-2026-01-05.png: desktop screenshot", Project: ""}, {Source: "filesystem", Type: "file_modified", Content: "Modified ~/.bashrc: added export PATH=$PATH:~/go/bin", Project: ""}, {Source: "filesystem", Type: "file_created", Content: "Created /tmp/mnemonic-bench-xyz/pipeline.db: benchmark temp database", Project: ""}, - + {Source: "filesystem", Type: "file_modified", Content: "Modified ~/.config/gtk-3.0/settings.ini: GTK theme changed to Adwaita-dark", Project: ""}, + {Source: "filesystem", Type: "file_created", Content: "Created ~/.cache/thumbnails/large/abcdef.png: thumbnail cache entry", Project: ""}, + {Source: "filesystem", Type: "file_modified", Content: "Modified /etc/hosts: added local development domain mapping", Project: ""}, + {Source: "filesystem", Type: "file_created", Content: "Created ~/Documents/notes-2026-01.md: personal notes file", Project: ""}, + {Source: "filesystem", Type: "file_modified", Content: "Modified ~/.ssh/config: added new host entry for staging server", Project: ""}, + {Source: "filesystem", Type: "file_created", Content: "Created ~/Downloads/go1.22.0.linux-amd64.tar.gz: Go binary download", Project: ""}, // Terminal noise {Source: "terminal", Type: "command_executed", Content: "git status: On branch main, nothing to commit, working tree clean", Project: ""}, {Source: "terminal", Type: "command_executed", Content: "make build: go build -o bin/mnemonic ./cmd/mnemonic", Project: ""}, {Source: "terminal", Type: "command_executed", Content: "ls -la ~/Projects/: listed directory contents", Project: ""}, {Source: "terminal", Type: "command_executed", Content: "top: system monitor showing 4.2GB RAM used, load average 1.2", Project: ""}, {Source: "terminal", Type: "command_executed", Content: "docker ps: no containers running", Project: ""}, - + {Source: "terminal", Type: "command_executed", Content: "git log --oneline -5: viewed recent commit history", Project: ""}, + {Source: "terminal", Type: "command_executed", Content: "curl http://127.0.0.1:9999/api/v1/health: checked daemon health endpoint", Project: ""}, + {Source: "terminal", Type: "command_executed", Content: "systemctl --user status mnemonic: daemon is active and running", Project: ""}, + {Source: "terminal", Type: "command_executed", Content: "go test ./internal/store/... : ran store package tests, all passed", Project: ""}, + {Source: "terminal", Type: "command_executed", Content: "df -h: checked disk usage, 45GB free on root partition", Project: ""}, + {Source: "terminal", Type: "command_executed", Content: "htop: 8 cores, 2.1GB used of 32GB RAM, no swap activity", Project: ""}, + {Source: "terminal", Type: "command_executed", Content: "rocm-smi: GPU temperature 42C, VRAM 0MB/16GB used, idle", Project: ""}, // Clipboard noise {Source: "clipboard", Type: "clipboard_copy", Content: "https://pkg.go.dev/modernc.org/sqlite", Project: ""}, {Source: "clipboard", Type: "clipboard_copy", Content: "func (s *SQLiteStore) WriteRaw(ctx context.Context, raw RawMemory) error {", Project: ""}, {Source: "clipboard", Type: "clipboard_copy", Content: "SELECT id, summary, salience FROM memories WHERE state = 'active'", Project: ""}, {Source: "clipboard", Type: "clipboard_copy", Content: "127.0.0.1:9999", Project: ""}, + {Source: "clipboard", Type: "clipboard_copy", Content: "https://github.com/appsprout-dev/mnemonic/pull/248", Project: ""}, + {Source: "clipboard", Type: "clipboard_copy", Content: "export PYTORCH_ROCM_ARCH=gfx1100", Project: ""}, + {Source: "clipboard", Type: "clipboard_copy", Content: "CREATE INDEX idx_memories_salience ON memories(salience DESC);", Project: ""}, + {Source: "clipboard", Type: "clipboard_copy", Content: "func TestRetrievalQuery(t *testing.T) {", Project: ""}, } // generateDailyMemories creates a batch of memories for one simulated day. -// The distribution is approximately: 30% MCP, 50% noise (filesystem/terminal), 20% clipboard. +// Each memory gets a unique content suffix (day+index) to avoid encoding dedup. +// Distribution: ~30% MCP signal, ~50% filesystem/terminal noise, ~20% clipboard. func generateDailyMemories(rng *rand.Rand, clock *SimClock, day int, count int) []store.RawMemory { memories := make([]store.RawMemory, 0, count) for i := 0; i < count; i++ { - t := dailyMemoryTemplates[rng.Intn(len(dailyMemoryTemplates))] + // Pick from signal or noise based on distribution. + var t memoryTemplate + roll := rng.Float32() + switch { + case roll < 0.30: + t = mcpSignalTemplates[rng.Intn(len(mcpSignalTemplates))] + case roll < 0.80: + t = noiseTemplates[rng.Intn(len(noiseTemplates))] + default: + // Clipboard subset from noise templates. + clipTemplates := make([]memoryTemplate, 0) + for _, nt := range noiseTemplates { + if nt.Source == "clipboard" { + clipTemplates = append(clipTemplates, nt) + } + } + t = clipTemplates[rng.Intn(len(clipTemplates))] + } + + // Append day+index to make each memory's content unique for dedup. + content := fmt.Sprintf("%s [day %d, event %d]", t.Content, day, i+1) heuristic := float32(0.3) salience := float32(0.3) @@ -124,7 +198,7 @@ func generateDailyMemories(rng *rand.Rand, clock *SimClock, day int, count int) Timestamp: ts, Source: t.Source, Type: t.Type, - Content: t.Content, + Content: content, HeuristicScore: heuristic, InitialSalience: salience, Project: t.Project, diff --git a/cmd/lifecycle-test/phase_daily.go b/cmd/lifecycle-test/phase_daily.go index ee0bf14..fa86a33 100644 --- a/cmd/lifecycle-test/phase_daily.go +++ b/cmd/lifecycle-test/phase_daily.go @@ -88,8 +88,8 @@ func (p *PhaseDaily) Run(ctx context.Context, h *Harness, verbose bool) (*PhaseR return result, fmt.Errorf("getting statistics: %w", err) } - // Encoding dedup merges identical templates, so unique count is lower than written count. - result.AssertGE("total memories", stats.TotalMemories, 30) + // Encoding dedup merges semantically identical content, so unique count is lower than raw count. + result.AssertGE("total memories", stats.TotalMemories, 40) result.AssertGE("episodes created", stats.TotalEpisodes, 5) result.AssertGE("associations created", stats.TotalAssociations, 1) result.Metrics["total_memories"] = float64(stats.TotalMemories) From 554b7f6e77cfc027ee91ca1c6562af475bd541a1 Mon Sep 17 00:00:00 2001 From: Caleb Gross Date: Fri, 20 Mar 2026 11:49:44 -0400 Subject: [PATCH 5/5] docs: add lifecycle test and testutil to CLAUDE.md Document the lifecycle simulation test suite, make lifecycle-test target, and internal/testutil/ shared package in the project guide. Closes #258 Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index ab01bdc..1f1bcbd 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -9,6 +9,7 @@ make build # go build ... make test # go test ./... -v make check # go fmt + go vet make run # Build and run in foreground (serve mode) +make lifecycle-test # Build + run full lifecycle simulation golangci-lint run # Lint (uses .golangci.yml config) ``` @@ -20,6 +21,7 @@ golangci-lint run # Lint (uses .golangci.yml config) cmd/mnemonic/ CLI + daemon entry point cmd/benchmark/ End-to-end benchmark cmd/benchmark-quality/ Memory quality IR benchmark +cmd/lifecycle-test/ Full lifecycle simulation (install → 3 months) internal/ agent/ 8 cognitive agents + orchestrator + reactor perception/ Watch filesystem/terminal/clipboard, heuristic filter @@ -45,6 +47,7 @@ internal/ config/ Config loading (config.yaml) logger/ Structured logging (slog) backup/ Export/import + testutil/ Shared test infrastructure (stub LLM provider) sdk/ Python agent SDK (self-evolving assistant) agent/evolution/ Agent evolution data (created at runtime, gitignored) agent/evolution/examples/ Example evolution data for reference