AppSprout-dev
diff --git a/‎.gitignore‎
Lines changed: 6 additions & 0 deletions b/‎.gitignore‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 54 additions & 0 deletions b/‎Makefile‎
Lines changed: 54 additions & 0 deletions
diff --git a/‎cmd/mnemonic/main.go‎
Lines changed: 16 additions & 4 deletions b/‎cmd/mnemonic/main.go‎
Lines changed: 16 additions & 4 deletions
diff --git a/‎internal/agent/encoding/agent.go‎
Lines changed: 11 additions & 0 deletions b/‎internal/agent/encoding/agent.go‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎internal/agent/perception/heuristic.go‎
Lines changed: 39 additions & 15 deletions b/‎internal/agent/perception/heuristic.go‎
Lines changed: 39 additions & 15 deletions
@@ -36,6 +36,7 @@ config.yaml
 # IDE / local config
 .vscode/
 .claude/settings.local.json
+.vscode/
 .claude/plans/
 .claude/projects/
 sdk/agent/evolution/.decay_stamp
@@ -57,3 +58,8 @@ benchmark-results.md
 sweep-results.md
 lifecycle-results.md
 training/probe_results.tsv
+models/
+
+# llama.cpp build artifacts
+third_party/llama.cpp/build/
+*.o
@@ -44,6 +44,60 @@ build:
 	@mkdir -p $(BUILD_DIR)
 	go build $(TAGS) $(LDFLAGS) -o $(BUILD_DIR)/$(BINARY) ./cmd/mnemonic
 
+# --- Embedded LLM (llama.cpp + Felix architecture) ---
+LLAMACPP_DIR=third_party/llama.cpp
+LLAMACPP_BUILD=$(LLAMACPP_DIR)/build
+BRIDGE_DIR=internal/llm/llamacpp/csrc
+
+build-llamacpp:
+	@if [ ! -f $(LLAMACPP_BUILD)/src/libllama.a ]; then \
+		cmake -B $(LLAMACPP_BUILD) -S $(LLAMACPP_DIR) \
+			-DCMAKE_BUILD_TYPE=Release \
+			-DBUILD_SHARED_LIBS=OFF \
+			-DGGML_NATIVE=ON; \
+		cmake --build $(LLAMACPP_BUILD) --target llama -j$$(nproc); \
+	fi
+
+build-llamacpp-rocm:
+	@if [ ! -f $(LLAMACPP_BUILD)/src/libllama.a ]; then \
+		cmake -B $(LLAMACPP_BUILD) -S $(LLAMACPP_DIR) \
+			-DCMAKE_BUILD_TYPE=Release \
+			-DBUILD_SHARED_LIBS=OFF \
+			-DGGML_NATIVE=ON \
+			-DGGML_HIP=ON; \
+		cmake --build $(LLAMACPP_BUILD) --target llama -j$$(nproc); \
+	fi
+
+build-bridge:
+	@if [ ! -f $(BRIDGE_DIR)/bridge.o ] || [ $(BRIDGE_DIR)/bridge.cpp -nt $(BRIDGE_DIR)/bridge.o ]; then \
+		g++ -std=c++17 -O2 -c $(BRIDGE_DIR)/bridge.cpp -o $(BRIDGE_DIR)/bridge.o \
+			-I$(BRIDGE_DIR) \
+			-I$(LLAMACPP_DIR)/include \
+			-I$(LLAMACPP_DIR)/ggml/include; \
+	fi
+
+ifdef ROCM
+build-embedded: build-llamacpp-rocm build-bridge
+	@mkdir -p $(BUILD_DIR)
+	CGO_ENABLED=1 go build -tags "llamacpp rocm" $(LDFLAGS) -o $(BUILD_DIR)/$(BINARY) ./cmd/mnemonic
+else
+build-embedded: build-llamacpp build-bridge
+	@mkdir -p $(BUILD_DIR)
+	CGO_ENABLED=1 go build -tags llamacpp $(LDFLAGS) -o $(BUILD_DIR)/$(BINARY) ./cmd/mnemonic
+endif
+
+build-quantize: build-llamacpp
+	cmake --build $(LLAMACPP_BUILD) --target llama-quantize -j$$(nproc)
+
+quantize: build-quantize
+	@for f in models/*-v1.gguf; do \
+		q8="$${f%.gguf}-q8_0.gguf"; \
+		if [ ! -f "$$q8" ]; then \
+			echo "Quantizing $$f -> $$q8"; \
+			$(LLAMACPP_BUILD)/bin/llama-quantize "$$f" "$$q8" q8_0; \
+		fi; \
+	done
+
 run: build
 	./$(BUILD_DIR)/$(BINARY) --config config.yaml serve
 
 
@@ -24,6 +24,7 @@ import (
 	"github.com/appsprout-dev/mnemonic/internal/daemon"
 	"github.com/appsprout-dev/mnemonic/internal/events"
 	"github.com/appsprout-dev/mnemonic/internal/llm"
+	"github.com/appsprout-dev/mnemonic/internal/llm/llamacpp"
 	"github.com/appsprout-dev/mnemonic/internal/logger"
 	"github.com/appsprout-dev/mnemonic/internal/store/sqlite"
 	"github.com/appsprout-dev/mnemonic/internal/watcher"
@@ -1383,8 +1384,12 @@ func serveCommand(configPath string) {
 
 	// Instrumented provider wrapper — gives each agent its own usage tracking.
 	// If training data capture is enabled, wrap with TrainingCaptureProvider too.
+	modelLabel := cfg.LLM.ChatModel
+	if cfg.LLM.Provider == "embedded" && cfg.LLM.Embedded.ChatModelFile != "" {
+		modelLabel = cfg.LLM.Embedded.ChatModelFile
+	}
 	wrap := func(caller string) llm.Provider {
-		var p llm.Provider = llm.NewInstrumentedProvider(llmProvider, memStore, caller, cfg.LLM.ChatModel)
+		var p llm.Provider = llm.NewInstrumentedProvider(llmProvider, memStore, caller, modelLabel)
 		if cfg.Training.CaptureEnabled && cfg.Training.CaptureDir != "" {
 			p = llm.NewTrainingCaptureProvider(p, caller, cfg.Training.CaptureDir)
 		}
@@ -2951,9 +2956,16 @@ func newLLMProvider(cfg *config.Config) llm.Provider {
 			Temperature:    float32(cfg.LLM.Temperature),
 			MaxConcurrent:  cfg.LLM.MaxConcurrent,
 		})
-		// Note: LoadModels must be called with a backend factory before use.
-		// Until llama.cpp bindings are integrated, the provider will return
-		// ErrProviderUnavailable on all inference calls.
+		backend := llamacpp.NewBackend()
+		if backend != nil {
+			if err := ep.LoadModels(func() llm.Backend {
+				return llamacpp.NewBackend()
+			}); err != nil {
+				slog.Error("failed to load embedded models", "error", err)
+			}
+		} else {
+			slog.Warn("embedded provider selected but llama.cpp not compiled in (build with: make build-embedded)")
+		}
 		return ep
 	default: // "api" or ""
 		timeout := time.Duration(cfg.LLM.TimeoutSec) * time.Second
 
@@ -1361,6 +1361,17 @@ func (ea *EncodingAgent) compressAndExtractConcepts(ctx context.Context, raw sto
 		return nil, fmt.Errorf("LLM completion failed: %w", err)
 	}
 
+	// Logit validation: reject low-confidence completions from embedded models
+	if resp.MeanProb > 0 && resp.MeanProb < 0.10 {
+		slog.Warn("LLM completion has very low confidence, falling back to heuristic",
+			"mean_prob", resp.MeanProb, "min_prob", resp.MinProb,
+			"tokens", resp.CompletionTokens)
+		return nil, fmt.Errorf("LLM completion confidence too low (mean_prob=%.3f)", resp.MeanProb)
+	}
+	if resp.MeanProb > 0 {
+		slog.Debug("LLM completion confidence", "mean_prob", resp.MeanProb, "min_prob", resp.MinProb)
+	}
+
 	// Extract and parse JSON from LLM response
 	jsonStr := agentutil.ExtractJSON(resp.Content)
 	var result compressionResponse
 
@@ -42,18 +42,42 @@ type HeuristicConfig struct {
 // scoringOrDefault returns the scoring config with defaults for any zero values.
 func (s ScoringConfig) withDefaults() ScoringConfig {
 	d := s
-	if d.BaseFilesystem <= 0 { d.BaseFilesystem = 0.3 }
-	if d.BaseTerminal <= 0 { d.BaseTerminal = 0.3 }
-	if d.BaseClipboard <= 0 { d.BaseClipboard = 0.3 }
-	if d.BaseMCP <= 0 { d.BaseMCP = 0.6 }
-	if d.BoostErrorLog <= 0 { d.BoostErrorLog = 0.2 }
-	if d.BoostConfig <= 0 { d.BoostConfig = 0.15 }
-	if d.BoostSourceCode <= 0 { d.BoostSourceCode = 0.1 }
-	if d.BoostCommand <= 0 { d.BoostCommand = 0.25 }
-	if d.BoostCodeSnippet <= 0 { d.BoostCodeSnippet = 0.2 }
-	if d.KeywordHigh <= 0 { d.KeywordHigh = 0.15 }
-	if d.KeywordMedium <= 0 { d.KeywordMedium = 0.10 }
-	if d.KeywordLow <= 0 { d.KeywordLow = 0.05 }
+	if d.BaseFilesystem <= 0 {
+		d.BaseFilesystem = 0.3
+	}
+	if d.BaseTerminal <= 0 {
+		d.BaseTerminal = 0.3
+	}
+	if d.BaseClipboard <= 0 {
+		d.BaseClipboard = 0.3
+	}
+	if d.BaseMCP <= 0 {
+		d.BaseMCP = 0.6
+	}
+	if d.BoostErrorLog <= 0 {
+		d.BoostErrorLog = 0.2
+	}
+	if d.BoostConfig <= 0 {
+		d.BoostConfig = 0.15
+	}
+	if d.BoostSourceCode <= 0 {
+		d.BoostSourceCode = 0.1
+	}
+	if d.BoostCommand <= 0 {
+		d.BoostCommand = 0.25
+	}
+	if d.BoostCodeSnippet <= 0 {
+		d.BoostCodeSnippet = 0.2
+	}
+	if d.KeywordHigh <= 0 {
+		d.KeywordHigh = 0.15
+	}
+	if d.KeywordMedium <= 0 {
+		d.KeywordMedium = 0.10
+	}
+	if d.KeywordLow <= 0 {
+		d.KeywordLow = 0.05
+	}
 	return d
 }
 
@@ -72,9 +96,9 @@ type frequencyEntry struct {
 
 // HeuristicFilter implements the pre-filter logic for watcher events.
 type HeuristicFilter struct {
-	cfg     HeuristicConfig
-	scoring ScoringConfig // resolved scoring with defaults applied
-	log     *slog.Logger
+	cfg       HeuristicConfig
+	scoring   ScoringConfig // resolved scoring with defaults applied
+	log       *slog.Logger
 	mu        sync.RWMutex
 	frequency map[string][]frequencyEntry // hash -> list of timestamps