Skip to content

Commit 49ad590

Browse files
authored
Merge pull request #360 from AppSprout-dev/autoresearch/ft-mar25
feat: Phase 3-4 autoresearch — fine-tuning pipeline, CGo backend, experiments
2 parents 21facd3 + 679b22d commit 49ad590

30 files changed

Lines changed: 5355 additions & 122 deletions

.gitignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ config.yaml
3636
# IDE / local config
3737
.vscode/
3838
.claude/settings.local.json
39+
.vscode/
3940
.claude/plans/
4041
.claude/projects/
4142
sdk/agent/evolution/.decay_stamp
@@ -57,3 +58,8 @@ benchmark-results.md
5758
sweep-results.md
5859
lifecycle-results.md
5960
training/probe_results.tsv
61+
models/
62+
63+
# llama.cpp build artifacts
64+
third_party/llama.cpp/build/
65+
*.o

Makefile

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,60 @@ build:
4444
@mkdir -p $(BUILD_DIR)
4545
go build $(TAGS) $(LDFLAGS) -o $(BUILD_DIR)/$(BINARY) ./cmd/mnemonic
4646

47+
# --- Embedded LLM (llama.cpp + Felix architecture) ---
48+
LLAMACPP_DIR=third_party/llama.cpp
49+
LLAMACPP_BUILD=$(LLAMACPP_DIR)/build
50+
BRIDGE_DIR=internal/llm/llamacpp/csrc
51+
52+
build-llamacpp:
53+
@if [ ! -f $(LLAMACPP_BUILD)/src/libllama.a ]; then \
54+
cmake -B $(LLAMACPP_BUILD) -S $(LLAMACPP_DIR) \
55+
-DCMAKE_BUILD_TYPE=Release \
56+
-DBUILD_SHARED_LIBS=OFF \
57+
-DGGML_NATIVE=ON; \
58+
cmake --build $(LLAMACPP_BUILD) --target llama -j$$(nproc); \
59+
fi
60+
61+
build-llamacpp-rocm:
62+
@if [ ! -f $(LLAMACPP_BUILD)/src/libllama.a ]; then \
63+
cmake -B $(LLAMACPP_BUILD) -S $(LLAMACPP_DIR) \
64+
-DCMAKE_BUILD_TYPE=Release \
65+
-DBUILD_SHARED_LIBS=OFF \
66+
-DGGML_NATIVE=ON \
67+
-DGGML_HIP=ON; \
68+
cmake --build $(LLAMACPP_BUILD) --target llama -j$$(nproc); \
69+
fi
70+
71+
build-bridge:
72+
@if [ ! -f $(BRIDGE_DIR)/bridge.o ] || [ $(BRIDGE_DIR)/bridge.cpp -nt $(BRIDGE_DIR)/bridge.o ]; then \
73+
g++ -std=c++17 -O2 -c $(BRIDGE_DIR)/bridge.cpp -o $(BRIDGE_DIR)/bridge.o \
74+
-I$(BRIDGE_DIR) \
75+
-I$(LLAMACPP_DIR)/include \
76+
-I$(LLAMACPP_DIR)/ggml/include; \
77+
fi
78+
79+
ifdef ROCM
80+
build-embedded: build-llamacpp-rocm build-bridge
81+
@mkdir -p $(BUILD_DIR)
82+
CGO_ENABLED=1 go build -tags "llamacpp rocm" $(LDFLAGS) -o $(BUILD_DIR)/$(BINARY) ./cmd/mnemonic
83+
else
84+
build-embedded: build-llamacpp build-bridge
85+
@mkdir -p $(BUILD_DIR)
86+
CGO_ENABLED=1 go build -tags llamacpp $(LDFLAGS) -o $(BUILD_DIR)/$(BINARY) ./cmd/mnemonic
87+
endif
88+
89+
build-quantize: build-llamacpp
90+
cmake --build $(LLAMACPP_BUILD) --target llama-quantize -j$$(nproc)
91+
92+
quantize: build-quantize
93+
@for f in models/*-v1.gguf; do \
94+
q8="$${f%.gguf}-q8_0.gguf"; \
95+
if [ ! -f "$$q8" ]; then \
96+
echo "Quantizing $$f -> $$q8"; \
97+
$(LLAMACPP_BUILD)/bin/llama-quantize "$$f" "$$q8" q8_0; \
98+
fi; \
99+
done
100+
47101
run: build
48102
./$(BUILD_DIR)/$(BINARY) --config config.yaml serve
49103

cmd/mnemonic/main.go

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"github.com/appsprout-dev/mnemonic/internal/daemon"
2525
"github.com/appsprout-dev/mnemonic/internal/events"
2626
"github.com/appsprout-dev/mnemonic/internal/llm"
27+
"github.com/appsprout-dev/mnemonic/internal/llm/llamacpp"
2728
"github.com/appsprout-dev/mnemonic/internal/logger"
2829
"github.com/appsprout-dev/mnemonic/internal/store/sqlite"
2930
"github.com/appsprout-dev/mnemonic/internal/watcher"
@@ -1383,8 +1384,12 @@ func serveCommand(configPath string) {
13831384

13841385
// Instrumented provider wrapper — gives each agent its own usage tracking.
13851386
// If training data capture is enabled, wrap with TrainingCaptureProvider too.
1387+
modelLabel := cfg.LLM.ChatModel
1388+
if cfg.LLM.Provider == "embedded" && cfg.LLM.Embedded.ChatModelFile != "" {
1389+
modelLabel = cfg.LLM.Embedded.ChatModelFile
1390+
}
13861391
wrap := func(caller string) llm.Provider {
1387-
var p llm.Provider = llm.NewInstrumentedProvider(llmProvider, memStore, caller, cfg.LLM.ChatModel)
1392+
var p llm.Provider = llm.NewInstrumentedProvider(llmProvider, memStore, caller, modelLabel)
13881393
if cfg.Training.CaptureEnabled && cfg.Training.CaptureDir != "" {
13891394
p = llm.NewTrainingCaptureProvider(p, caller, cfg.Training.CaptureDir)
13901395
}
@@ -2951,9 +2956,16 @@ func newLLMProvider(cfg *config.Config) llm.Provider {
29512956
Temperature: float32(cfg.LLM.Temperature),
29522957
MaxConcurrent: cfg.LLM.MaxConcurrent,
29532958
})
2954-
// Note: LoadModels must be called with a backend factory before use.
2955-
// Until llama.cpp bindings are integrated, the provider will return
2956-
// ErrProviderUnavailable on all inference calls.
2959+
backend := llamacpp.NewBackend()
2960+
if backend != nil {
2961+
if err := ep.LoadModels(func() llm.Backend {
2962+
return llamacpp.NewBackend()
2963+
}); err != nil {
2964+
slog.Error("failed to load embedded models", "error", err)
2965+
}
2966+
} else {
2967+
slog.Warn("embedded provider selected but llama.cpp not compiled in (build with: make build-embedded)")
2968+
}
29572969
return ep
29582970
default: // "api" or ""
29592971
timeout := time.Duration(cfg.LLM.TimeoutSec) * time.Second

internal/agent/encoding/agent.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1361,6 +1361,17 @@ func (ea *EncodingAgent) compressAndExtractConcepts(ctx context.Context, raw sto
13611361
return nil, fmt.Errorf("LLM completion failed: %w", err)
13621362
}
13631363

1364+
// Logit validation: reject low-confidence completions from embedded models
1365+
if resp.MeanProb > 0 && resp.MeanProb < 0.10 {
1366+
slog.Warn("LLM completion has very low confidence, falling back to heuristic",
1367+
"mean_prob", resp.MeanProb, "min_prob", resp.MinProb,
1368+
"tokens", resp.CompletionTokens)
1369+
return nil, fmt.Errorf("LLM completion confidence too low (mean_prob=%.3f)", resp.MeanProb)
1370+
}
1371+
if resp.MeanProb > 0 {
1372+
slog.Debug("LLM completion confidence", "mean_prob", resp.MeanProb, "min_prob", resp.MinProb)
1373+
}
1374+
13641375
// Extract and parse JSON from LLM response
13651376
jsonStr := agentutil.ExtractJSON(resp.Content)
13661377
var result compressionResponse

internal/agent/perception/heuristic.go

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -42,18 +42,42 @@ type HeuristicConfig struct {
4242
// scoringOrDefault returns the scoring config with defaults for any zero values.
4343
func (s ScoringConfig) withDefaults() ScoringConfig {
4444
d := s
45-
if d.BaseFilesystem <= 0 { d.BaseFilesystem = 0.3 }
46-
if d.BaseTerminal <= 0 { d.BaseTerminal = 0.3 }
47-
if d.BaseClipboard <= 0 { d.BaseClipboard = 0.3 }
48-
if d.BaseMCP <= 0 { d.BaseMCP = 0.6 }
49-
if d.BoostErrorLog <= 0 { d.BoostErrorLog = 0.2 }
50-
if d.BoostConfig <= 0 { d.BoostConfig = 0.15 }
51-
if d.BoostSourceCode <= 0 { d.BoostSourceCode = 0.1 }
52-
if d.BoostCommand <= 0 { d.BoostCommand = 0.25 }
53-
if d.BoostCodeSnippet <= 0 { d.BoostCodeSnippet = 0.2 }
54-
if d.KeywordHigh <= 0 { d.KeywordHigh = 0.15 }
55-
if d.KeywordMedium <= 0 { d.KeywordMedium = 0.10 }
56-
if d.KeywordLow <= 0 { d.KeywordLow = 0.05 }
45+
if d.BaseFilesystem <= 0 {
46+
d.BaseFilesystem = 0.3
47+
}
48+
if d.BaseTerminal <= 0 {
49+
d.BaseTerminal = 0.3
50+
}
51+
if d.BaseClipboard <= 0 {
52+
d.BaseClipboard = 0.3
53+
}
54+
if d.BaseMCP <= 0 {
55+
d.BaseMCP = 0.6
56+
}
57+
if d.BoostErrorLog <= 0 {
58+
d.BoostErrorLog = 0.2
59+
}
60+
if d.BoostConfig <= 0 {
61+
d.BoostConfig = 0.15
62+
}
63+
if d.BoostSourceCode <= 0 {
64+
d.BoostSourceCode = 0.1
65+
}
66+
if d.BoostCommand <= 0 {
67+
d.BoostCommand = 0.25
68+
}
69+
if d.BoostCodeSnippet <= 0 {
70+
d.BoostCodeSnippet = 0.2
71+
}
72+
if d.KeywordHigh <= 0 {
73+
d.KeywordHigh = 0.15
74+
}
75+
if d.KeywordMedium <= 0 {
76+
d.KeywordMedium = 0.10
77+
}
78+
if d.KeywordLow <= 0 {
79+
d.KeywordLow = 0.05
80+
}
5781
return d
5882
}
5983

@@ -72,9 +96,9 @@ type frequencyEntry struct {
7296

7397
// HeuristicFilter implements the pre-filter logic for watcher events.
7498
type HeuristicFilter struct {
75-
cfg HeuristicConfig
76-
scoring ScoringConfig // resolved scoring with defaults applied
77-
log *slog.Logger
99+
cfg HeuristicConfig
100+
scoring ScoringConfig // resolved scoring with defaults applied
101+
log *slog.Logger
78102
mu sync.RWMutex
79103
frequency map[string][]frequencyEntry // hash -> list of timestamps
80104

0 commit comments

Comments
 (0)