Merge pull request #361 from AppSprout-dev/feat/agent-recall-quality

CalebisGross · web-flow · commit 7a8bfa6758dc · 2026-03-29T00:02:19.000-04:00
feat: improve recall quality for LLM agents, fix Windows self-update
diff --git a/cmd/mnemonic/main.go b/cmd/mnemonic/main.go
@@ -1226,6 +1226,11 @@ func serveCommand(configPath string) {
 	}
 	slog.SetDefault(log)
 
+	// Clean up leftover .old binary from a previous Windows update
+	if err := updater.CleanupOldBinary(); err != nil {
+		log.Warn("failed to clean up old binary after update", "error", err)
+	}
+
 	// Create data directory if it doesn't exist
 	if err := cfg.EnsureDataDir(); err != nil {
 		die(exitPermission, fmt.Sprintf("creating data directory: %v", err), "check permissions on ~/.mnemonic/")
@@ -1874,12 +1879,26 @@ func buildRetrievalConfig(cfg *config.Config) retrieval.RetrievalConfig {
 
 		FeedbackWeight: float32(cfg.Retrieval.FeedbackWeight),
 		SourceWeights:  convertSourceWeights(cfg.Retrieval.SourceWeights),
+		TypeWeights:    convertSourceWeights(cfg.Retrieval.TypeWeights),
 
 		ContextBoostWindowMin: cfg.Perception.RecallBoostWindowMin,
 		ContextBoostMax:       float32(cfg.Perception.RecallBoostMax),
+		ContextBoostSources:   convertContextBoostSources(cfg.Retrieval.ContextBoostSources),
 	}
 }
 
+// convertContextBoostSources converts []string to map[string]bool.
+func convertContextBoostSources(src []string) map[string]bool {
+	if src == nil {
+		return nil
+	}
+	out := make(map[string]bool, len(src))
+	for _, s := range src {
+		out[s] = true
+	}
+	return out
+}
+
 // convertSourceWeights converts map[string]float64 to map[string]float32.
 func convertSourceWeights(src map[string]float64) map[string]float32 {
 	if src == nil {
diff --git a/internal/agent/consolidation/agent.go b/internal/agent/consolidation/agent.go
@@ -28,6 +28,7 @@ type ConsolidationConfig struct {
 	MaxMemoriesPerCycle int
 	MaxMergesPerCycle   int
 	MinClusterSize      int
+	MinEvidenceSalience float32 // minimum salience for memories to count as pattern evidence (default: 0.5)
 	AssocPruneThreshold float32 // prune associations below this strength
 
 	// Salience decay tunables
@@ -76,6 +77,7 @@ func DefaultConfig() ConsolidationConfig {
 		MaxMemoriesPerCycle:       100,
 		MaxMergesPerCycle:         5,
 		MinClusterSize:            3,
+		MinEvidenceSalience:       0.5,
 		AssocPruneThreshold:       0.05,
 		RecencyProtection24h:      0.8,
 		RecencyProtection168h:     0.9,
@@ -874,21 +876,30 @@ func (ca *ConsolidationAgent) extractPatterns(ctx context.Context) (int, error)
 // processPatternClusters handles the common logic for evaluating a set of memory clusters
 // as potential patterns: strengthening existing matches or identifying new ones via LLM.
 func (ca *ConsolidationAgent) processPatternClusters(ctx context.Context, clusters [][]store.Memory, project string, budget int) int {
+	minSalience := cfgFloat32(ca.config.MinEvidenceSalience, 0.5)
 	extracted := 0
 	for _, cluster := range clusters {
 		if extracted >= budget {
 			break
 		}
-		if len(cluster) < 3 {
+
+		// Filter cluster to salience-qualified memories
+		var qualified []store.Memory
+		for _, mem := range cluster {
+			if mem.Salience >= minSalience {
+				qualified = append(qualified, mem)
+			}
+		}
+		if len(qualified) < 3 {
 			continue
 		}
 
 		// Check if this cluster matches an existing pattern (by embedding similarity)
-		existing, err := ca.findMatchingPattern(ctx, cluster)
+		existing, err := ca.findMatchingPattern(ctx, qualified)
 		if err == nil && existing != nil {
 			// Count genuinely new evidence
 			newEvidence := 0
-			for _, mem := range cluster {
+			for _, mem := range qualified {
 				if !containsString(existing.EvidenceIDs, mem.ID) {
 					existing.EvidenceIDs = append(existing.EvidenceIDs, mem.ID)
 					newEvidence++
@@ -922,13 +933,13 @@ func (ca *ConsolidationAgent) processPatternClusters(ctx context.Context, cluste
 		}
 
 		// Ask LLM if there's a recurring pattern
-		pattern, err := ca.identifyPattern(ctx, cluster, project)
+		pattern, err := ca.identifyPattern(ctx, qualified, project)
 		if err != nil {
-			ca.log.Warn("pattern identification failed", "project", project, "cluster_size", len(cluster), "error", err)
+			ca.log.Warn("pattern identification failed", "project", project, "cluster_size", len(qualified), "error", err)
 			continue
 		}
 		if pattern == nil {
-			ca.log.Info("pattern extraction: LLM rejected cluster (not a pattern)", "project", project, "cluster_size", len(cluster))
+			ca.log.Info("pattern extraction: LLM rejected cluster (not a pattern)", "project", project, "cluster_size", len(qualified))
 			continue
 		}
 
@@ -947,7 +958,7 @@ func (ca *ConsolidationAgent) processPatternClusters(ctx context.Context, cluste
 					embSim := agentutil.CosineSimilarity(pattern.Embedding, ep.Embedding)
 					titleSim := normalizedTitleSimilarity(pattern.Title, ep.Title)
 					if isDuplicate(pattern.Title, ep.Title, pattern.Embedding, ep.Embedding, 0.5, 0.75) {
-						for _, mem := range cluster {
+						for _, mem := range qualified {
 							if !containsString(ep.EvidenceIDs, mem.ID) {
 								ep.EvidenceIDs = append(ep.EvidenceIDs, mem.ID)
 							}
diff --git a/internal/agent/retrieval/agent.go b/internal/agent/retrieval/agent.go
@@ -62,11 +62,15 @@ type RetrievalConfig struct {
 	FeedbackWeight float32 // weight of user feedback score in ranking (default: 0.15)
 
 	// Source-weighted scoring
-	SourceWeights map[string]float32 // per-source multipliers (default: mcp=1.0, terminal=0.8, clipboard=0.6, filesystem=0.5)
+	SourceWeights map[string]float32 // per-source multipliers (default: mcp=1.5, terminal=0.8, clipboard=0.6, filesystem=0.5)
+
+	// Memory type scoring — actionable types (decision, error) rank higher than observations
+	TypeWeights map[string]float32 // per-type multipliers (default: decision=1.3, error=1.25, insight=1.2, learning=1.15)
 
 	// Context boost from watcher activity
-	ContextBoostWindowMin int     // minutes context boost decays over (default: 30)
-	ContextBoostMax       float32 // max additive boost from watcher context (default: 0.2)
+	ContextBoostWindowMin int             // minutes context boost decays over (default: 30)
+	ContextBoostMax       float32         // max additive boost from watcher context (default: 0.2)
+	ContextBoostSources   map[string]bool // sources eligible for context boost (nil = all sources)
 }
 
 // DefaultConfig returns sensible defaults for retrieval configuration.
@@ -106,13 +110,23 @@ func DefaultConfig() RetrievalConfig {
 
 		FeedbackWeight: 0.15,
 		SourceWeights: map[string]float32{
-			"mcp":        1.0,
+			"mcp":        1.5,
 			"terminal":   0.8,
 			"clipboard":  0.6,
 			"filesystem": 0.5,
 		},
+		TypeWeights: map[string]float32{
+			"decision": 1.3,
+			"error":    1.25,
+			"insight":  1.2,
+			"learning": 1.15,
+		},
 		ContextBoostWindowMin: 30,
 		ContextBoostMax:       0.2,
+		ContextBoostSources: map[string]bool{
+			"mcp":      true,
+			"terminal": true,
+		},
 	}
 }
 
@@ -391,12 +405,12 @@ func (ra *RetrievalAgent) Query(ctx context.Context, req QueryRequest) (QueryRes
 		evidenceBoost := make(map[string]float32)
 		for _, p := range matchedPatterns {
 			for _, eid := range p.EvidenceIDs {
-				evidenceBoost[eid] += 0.1
+				evidenceBoost[eid] += 0.1 * p.Strength
 			}
 		}
 		for _, a := range matchedAbstractions {
 			for _, mid := range a.SourceMemoryIDs {
-				evidenceBoost[mid] += 0.05
+				evidenceBoost[mid] += 0.05 * a.Confidence
 			}
 		}
 		for i, r := range ranked {
@@ -623,6 +637,7 @@ func (ra *RetrievalAgent) rankResults(ctx context.Context, activated map[string]
 		recencyBonus   float32
 		activityBonus  float32
 		contextBoost   float32
+		typeWeight     float32
 		sourceWeight   float32
 		feedbackAdjust float32
 	}
@@ -666,10 +681,13 @@ func (ra *RetrievalAgent) rankResults(ctx context.Context, activated map[string]
 		actScale := float64(f32Or(ra.config.ActivityBonusScale, 0.02))
 		activityBonus := float32(math.Min(actMax, actScale*math.Log1p(float64(state.activationCount))))
 
-		// Context boost from recent watcher activity
+		// Context boost from recent watcher activity (only for eligible sources)
 		var contextBoost float32
 		if ra.activity != nil {
-			contextBoost = ra.activity.boostForMemory(mem.Concepts)
+			eligible := ra.config.ContextBoostSources == nil || ra.config.ContextBoostSources[mem.Source]
+			if eligible {
+				contextBoost = ra.activity.boostForMemory(mem.Concepts)
+			}
 		}
 
 		// Combined score
@@ -686,6 +704,15 @@ func (ra *RetrievalAgent) rankResults(ctx context.Context, activated map[string]
 			}
 		}
 
+		// Memory type weight — actionable types (decision, error) rank higher than observations
+		typeWeight := float32(1.0)
+		if ra.config.TypeWeights != nil {
+			if tw, ok := ra.config.TypeWeights[mem.Type]; ok && tw > 0 {
+				typeWeight = tw
+			}
+		}
+		baseScore *= typeWeight
+
 		// Apply source weight as a multiplier (before feedback adjustment)
 		sourceWeight := float32(1.0)
 		if ra.config.SourceWeights != nil {
@@ -709,6 +736,7 @@ func (ra *RetrievalAgent) rankResults(ctx context.Context, activated map[string]
 			recencyBonus:   recencyBonus,
 			activityBonus:  activityBonus,
 			contextBoost:   contextBoost,
+			typeWeight:     typeWeight,
 			sourceWeight:   sourceWeight,
 			feedbackAdjust: feedbackAdjust,
 		})
@@ -725,8 +753,8 @@ func (ra *RetrievalAgent) rankResults(ctx context.Context, activated map[string]
 		explanation := ""
 		if includeReasoning {
 			explanation = fmt.Sprintf(
-				"activation: %.3f, recency_bonus: %.3f, activity_bonus: %.3f, context_boost: %.3f, source_weight: %.2f, feedback_adjust: %.3f, combined_score: %.3f",
-				sm.activation, sm.recencyBonus, sm.activityBonus, sm.contextBoost, sm.sourceWeight, sm.feedbackAdjust, sm.finalScore,
+				"activation: %.3f, recency_bonus: %.3f, activity_bonus: %.3f, context_boost: %.3f, type_weight: %.2f, source_weight: %.2f, feedback_adjust: %.3f, combined_score: %.3f",
+				sm.activation, sm.recencyBonus, sm.activityBonus, sm.contextBoost, sm.typeWeight, sm.sourceWeight, sm.feedbackAdjust, sm.finalScore,
 			)
 		}
 
diff --git a/internal/config/config.go b/internal/config/config.go
@@ -287,7 +287,13 @@ type RetrievalConfig struct {
 	FeedbackWeight float64 `yaml:"feedback_weight"` // weight of user feedback score in ranking (default 0.15)
 
 	// Source-weighted scoring
-	SourceWeights map[string]float64 `yaml:"source_weights"` // per-source multipliers (default: mcp=1.0, terminal=0.8, clipboard=0.6, filesystem=0.5)
+	SourceWeights map[string]float64 `yaml:"source_weights"` // per-source multipliers (default: mcp=1.5, terminal=0.8, clipboard=0.6, filesystem=0.5)
+
+	// Memory type scoring
+	TypeWeights map[string]float64 `yaml:"type_weights"` // per-type multipliers (default: decision=1.3, error=1.25, insight=1.2, learning=1.15)
+
+	// Context boost source eligibility
+	ContextBoostSources []string `yaml:"context_boost_sources"` // sources eligible for context boost (default: [mcp, terminal])
 }
 
 // MetacognitionConfig holds metacognition settings.
@@ -700,11 +706,18 @@ func Default() *Config {
 
 			FeedbackWeight: 0.15,
 			SourceWeights: map[string]float64{
-				"mcp":        1.0,
+				"mcp":        1.5,
 				"terminal":   0.8,
 				"clipboard":  0.6,
 				"filesystem": 0.5,
 			},
+			TypeWeights: map[string]float64{
+				"decision": 1.3,
+				"error":    1.25,
+				"insight":  1.2,
+				"learning": 1.15,
+			},
+			ContextBoostSources: []string{"mcp", "terminal"},
 		},
 		Metacognition: MetacognitionConfig{
 			Enabled:               true,
@@ -743,7 +756,7 @@ func Default() *Config {
 			Enabled:                    true,
 			IntervalRaw:                "6h",
 			Interval:                   6 * time.Hour,
-			MinStrength:                0.4,
+			MinStrength:                0.7,
 			MaxLLMCalls:                5,
 			StartupDelaySec:            300,
 			DefaultConfidence:          0.6,
diff --git a/internal/mcp/server.go b/internal/mcp/server.go
@@ -1583,6 +1583,16 @@ func (srv *MCPServer) handleRecallProject(ctx context.Context, args map[string]i
 		}
 	}
 
+	// Filter patterns to quality threshold
+	if len(patterns) > 0 {
+		filtered := patterns[:0]
+		for _, p := range patterns {
+			if p.Strength >= 0.3 {
+				filtered = append(filtered, p)
+			}
+		}
+		patterns = filtered
+	}
 	if len(patterns) > 0 {
 		text += fmt.Sprintf("\nPatterns (%d):\n", len(patterns))
 		for _, p := range patterns {
@@ -1810,12 +1820,28 @@ func (srv *MCPServer) handleGetPatterns(ctx context.Context, args map[string]int
 		limit = int(l)
 	}
 
+	minStrength := float32(0.3)
+	if ms, ok := args["min_strength"].(float64); ok {
+		minStrength = float32(ms)
+	}
+
 	patterns, err := srv.store.ListPatterns(ctx, project, limit)
 	if err != nil {
 		srv.log.Error("failed to list patterns", "error", err)
 		return nil, fmt.Errorf("failed to list patterns: %w", err)
 	}
 
+	// Filter by minimum strength
+	if minStrength > 0 {
+		filtered := patterns[:0]
+		for _, p := range patterns {
+			if p.Strength >= minStrength {
+				filtered = append(filtered, p)
+			}
+		}
+		patterns = filtered
+	}
+
 	if len(patterns) == 0 {
 		return toolResult("No patterns discovered yet. Patterns emerge as the system processes more memories and runs consolidation cycles."), nil
 	}
diff --git a/internal/mcp/tools.go b/internal/mcp/tools.go
@@ -359,6 +359,10 @@ func getPatternsToolDef() ToolDefinition {
 					"type":        "integer",
 					"description": "Maximum number of patterns to return (default: 10)",
 				},
+				"min_strength": map[string]interface{}{
+					"type":        "number",
+					"description": "Minimum pattern strength to return (default: 0.3). Set to 0 for all patterns.",
+				},
 			},
 			"required": []string{},
 		},
diff --git a/internal/updater/replace_unix.go b/internal/updater/replace_unix.go
@@ -0,0 +1,12 @@
+//go:build !windows
+
+package updater
+
+import "os"
+
+// replaceBinary atomically replaces the running binary with a new one.
+// On Unix systems, os.Rename over a running binary works because the old
+// process keeps the deleted inode open until it exits.
+func replaceBinary(newBinaryPath, execPath string) error {
+	return os.Rename(newBinaryPath, execPath)
+}
diff --git a/internal/updater/replace_windows.go b/internal/updater/replace_windows.go
@@ -0,0 +1,41 @@
+//go:build windows
+
+package updater
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+)
+
+// oldBinarySuffix is the extension used when moving the locked running binary
+// out of the way during a Windows update.
+const oldBinarySuffix = ".old"
+
+// replaceBinary replaces the running binary on Windows using a rename-dance.
+// Windows locks running executables, preventing direct overwrite. However, a
+// locked file CAN be renamed. So we:
+//  1. Rename the running binary to <name>.old (move it out of the way)
+//  2. Rename the new binary into the original path
+//
+// The .old file is cleaned up on next startup via CleanupOldBinary.
+func replaceBinary(newBinaryPath, execPath string) error {
+	oldPath := execPath + oldBinarySuffix
+
+	// Remove any leftover .old file from a previous update
+	_ = os.Remove(oldPath)
+
+	// Step 1: Rename the running (locked) binary out of the way
+	if err := os.Rename(execPath, oldPath); err != nil {
+		return fmt.Errorf("moving running binary to %s: %w", filepath.Base(oldPath), err)
+	}
+
+	// Step 2: Rename the new binary into place
+	if err := os.Rename(newBinaryPath, execPath); err != nil {
+		// Try to restore the original binary
+		_ = os.Rename(oldPath, execPath)
+		return fmt.Errorf("moving new binary into place: %w", err)
+	}
+
+	return nil
+}
diff --git a/internal/updater/updater.go b/internal/updater/updater.go
diff --git a/internal/updater/updater_test.go b/internal/updater/updater_test.go