Skip to content
53 changes: 49 additions & 4 deletions cmd/llmem/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@ import (

"github.com/MichielDean/LLMem/internal/config"
"github.com/MichielDean/LLMem/internal/dream"
"github.com/MichielDean/LLMem/internal/embed"
"github.com/MichielDean/LLMem/internal/extract"
"github.com/MichielDean/LLMem/internal/introspect"
"github.com/MichielDean/LLMem/internal/ollama"
"github.com/MichielDean/LLMem/internal/paths"
"github.com/MichielDean/LLMem/internal/session"
"github.com/MichielDean/LLMem/internal/store"
Expand Down Expand Up @@ -121,6 +124,42 @@ func openAdapter() (session.SessionAdapter, error) {
return adapter, nil
}

// openExtractionEngine creates an ExtractionEngine for session hooks.
// Returns nil on failure — the coordinator gracefully handles a nil engine
// by skipping extraction (graceful degradation).
func openExtractionEngine() *extract.ExtractionEngine {
engine, err := extract.NewExtractionEngine(extract.ExtractionConfig{})
if err != nil {
slog.Debug("llmem: failed to create extraction engine, skipping", "error", err)
return nil
}
return engine
}

// openEmbeddingEngine creates an EmbeddingEngine for session hooks.
// Returns nil on failure — the coordinator gracefully handles a nil engine
// by storing memories without embeddings.
func openEmbeddingEngine() *embed.EmbeddingEngine {
engine, err := embed.NewEmbeddingEngine(embed.EmbeddingConfig{})
if err != nil {
slog.Debug("llmem: failed to create embedding engine, skipping", "error", err)
return nil
}
return engine
}

// openOllamaClient creates an OllamaClient for session hook introspection.
// Returns nil on failure — the coordinator gracefully handles a nil client
// by falling back to degraded introspection in OnEnding (plain-text summary, no LLM).
func openOllamaClient() *ollama.OllamaClient {
client, err := ollama.NewOllamaClient(ollama.OllamaClientConfig{})
if err != nil {
slog.Debug("llmem: failed to create Ollama client, falling back to degraded introspection", "error", err)
return nil
}
return client
}

func addCmd() *cobra.Command {
var (
typeVal string
Expand Down Expand Up @@ -1080,8 +1119,11 @@ func contextCmd() *cobra.Command {
}

coord, err := session.NewSessionHookCoordinator(session.SessionHookConfig{
Store: ms,
Adapter: adapter,
Store: ms,
Adapter: adapter,
ExtractionEngine: openExtractionEngine(),
Embedding: openEmbeddingEngine(),
OllamaClient: openOllamaClient(),
})
if err != nil {
return err
Expand Down Expand Up @@ -1146,8 +1188,11 @@ func hookCmd() *cobra.Command {
}

coord, err := session.NewSessionHookCoordinator(session.SessionHookConfig{
Store: ms,
Adapter: adapter,
Store: ms,
Adapter: adapter,
ExtractionEngine: openExtractionEngine(),
Embedding: openEmbeddingEngine(),
OllamaClient: openOllamaClient(),
})
if err != nil {
return err
Expand Down
60 changes: 46 additions & 14 deletions docs/API.md
Original file line number Diff line number Diff line change
Expand Up @@ -906,7 +906,7 @@ _, err := ms.Add(ctx, store.AddParams{Type: "unknown_type", Content: "test"})

Embeddings are stored and accepted as packed `[]byte` in little-endian `float32` format. For a 768-dimensional embedding, this is `768 × 4 = 3072` bytes.

Use the exported `vecToBytes` and `bytesToVec` helpers if you need conversion:
Use the exported `VecToBytes` and `BytesToVec` helpers if you need conversion:

```go
// Convert float32 slice to []byte for storage
Expand Down Expand Up @@ -1251,7 +1251,7 @@ available := engine.CheckAvailable(ctx)

### Introspection (internal/introspect)

The `internal/introspect` package provides failure analysis and lesson learning (see [Dream Cycle & Extraction](DREAM.md#go) for usage).
The `internal/introspect` package provides failure analysis, lesson learning, and session transcript introspection (see [Dream Cycle & Extraction](DREAM.md#go) for usage).

```go
import "github.com/MichielDean/LLMem/internal/introspect"
Expand All @@ -1269,9 +1269,17 @@ id, err := introspect.LearnLesson(ctx, ms, introspect.LearnLessonParams{
WhatIsCorrect: "inject dependency via constructor",
Context: "service.go:15",
})

// IntrospectTranscript — analyze a session transcript at session end
id, err := introspect.IntrospectTranscript(ctx, ms, transcript, "session-id", ollamaClient, "glm-5.1:cloud")
// When ollamaClient is nil, falls back to degraded storage (plain-text summary, no LLM call)
```

Both functions use LLM expansion via Ollama when available. When Ollama is unavailable, they gracefully degrade to storage-only mode (storing the raw parameters without LLM expansion).
All three functions use LLM expansion via Ollama when available. When Ollama is unavailable, they gracefully degrade to storage-only mode (storing the raw parameters without LLM expansion).

**IntrospectTranscript** differs from `IntrospectFailure` and `LearnLesson` in two ways:
1. It accepts a pre-configured `*ollama.OllamaClient` instead of a model/baseURL pair, reusing the session's configured Ollama connection.
2. It uses `context.Background()` for the final store operation (not the caller's `ctx`), ensuring the session-end self-assessment is persisted even if the calling context has expired during the LLM call. This is intentional — `IntrospectFailure` and `LearnLesson` pass through `ctx` because they run mid-session when the context is still alive.

#### IntrospectAuto

Expand Down Expand Up @@ -1433,32 +1441,48 @@ if adapter != nil {
}

coord, err := session.NewSessionHookCoordinator(session.SessionHookConfig{
Store: ms,
Adapter: adapter, // nil → no_transcript on idle/ending
Store: ms,
Adapter: adapter, // nil → no_transcript on idle/ending
ExtractionEngine: extractionEngine, // nil → skip extraction
Embedding: embeddingEngine, // nil → store without embeddings
OllamaClient: ollamaClient, // nil → degraded introspection in OnEnding
})
```

When `config.yaml` has `opencode.db_path` set and the database exists, the adapter is wired into the coordinator. When the path is empty or the DB is unreachable, a nil adapter is used — `OnIdle` and `OnEnding` return `"no_transcript"` gracefully.

The CLI also provides `openExtractionEngine()`, `openEmbeddingEngine()`, and `openOllamaClient()` helper functions that return nil on failure. The coordinator gracefully degrades when any of these are nil:
- `ExtractionEngine` nil → extraction skipped, memories not extracted from transcript
- `Embedding` nil → memories stored without embedding vectors
- `OllamaClient` nil → `IntrospectTranscript` produces degraded self-assessment (plain-text summary, no LLM call)

#### SessionHookConfig

```go
type SessionHookConfig struct {
Store *store.MemoryStore // Required for all hook operations
Adapter SessionAdapter // Provides session content. nil → no_transcript
DebounceSeconds int // Min interval between idle events. Default: 30
ContextDir string // Directory for context files. Default: paths.GetContextDir()
Model string // LLM model for introspection. Default: "glm-5.1:cloud"
BaseURL string // Ollama base URL for introspection. Default: "http://localhost:11434"
Store *store.MemoryStore // Required for all hook operations
Adapter SessionAdapter // Provides session content. nil → no_transcript
DebounceSeconds int // Min interval between idle events. Default: 30
ContextDir string // Directory for context files. Default: paths.GetContextDir()
Model string // LLM model for introspection. Default: "glm-5.1:cloud"
BaseURL string // Ollama base URL for introspection. Default: "http://localhost:11434"
ExtractionEngine *extract.ExtractionEngine // Extracts memories from transcript. nil → skip extraction
Embedding *embed.EmbeddingEngine // Generates embedding vectors. nil → store without embeddings
OllamaClient *ollama.OllamaClient // Used for introspection in OnEnding. nil → degraded fallback
IntrospectModel string // LLM model name for IntrospectTranscript. Default: "glm-5.1:cloud"
}
```

#### SessionHookCoordinator

```go
coord, err := session.NewSessionHookCoordinator(session.SessionHookConfig{
Store: ms,
Adapter: adapter,
Store: ms,
Adapter: adapter,
ExtractionEngine: extractionEngine, // nil → skip extraction
Embedding: embeddingEngine, // nil → store without embeddings
OllamaClient: ollamaClient, // nil → degraded introspection in OnEnding
IntrospectModel: "glm-5.1:cloud", // optional, defaults to "glm-5.1:cloud"
})

result, err := coord.OnCreated(ctx, "session-id") // "success" | "already_processed"
Expand All @@ -1474,7 +1498,15 @@ resultType, memoryID, err := coord.OnEndingWithIntrospect(ctx, "session-id")
// ("error", "", err) on validation error
```

All methods validate session IDs via `paths.ValidateSessionID` to prevent path traversal. OnIdle includes a 30-second debounce mechanism.
All methods validate session IDs via `paths.ValidateSessionID` to prevent path traversal.

**OnIdle** includes a 30-second debounce mechanism. When a transcript is available and `ExtractionEngine` is non-nil, OnIdle:
1. Calls `SupersedeBySource` to invalidate prior memories from the same session (re-extraction as conversation grows)
2. Extracts memories via the extraction engine
3. Generates embedding vectors for each memory (if `Embedding` is non-nil)
4. Stores memories and logs the extraction

**OnEnding** extracts memories the same way as OnIdle, then runs `IntrospectTranscript` to produce a session-end self-assessment. When `OllamaClient` is nil, `IntrospectTranscript` falls back to a degraded plain-text summary (no LLM call attempted) — the nil-OllamaClient guard must NOT be used, or the degradation path is bypassed.

### Systemd Unit Generation (internal/systemd)

Expand Down
4 changes: 3 additions & 1 deletion docs/CLI.md
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,9 @@ Handle session lifecycle hook events. Supports four hook types:
- `--model`: LLM model for introspection (default: `glm-5.1:cloud`). Used by the `ending` hook for automatic introspection.
- `--base-url`: Ollama base URL for introspection (default: `http://localhost:11434`). Used by the `ending` hook for automatic introspection.

The `idle` hook processes the session's transcript, extracts memories, and runs introspection automatically. It uses a debounce mechanism (via `extraction_log` table) to prevent re-extraction.
The `idle` hook processes the session's transcript, extracts memories via the extraction pipeline (chunk → dedup → LLM extract → embed → store), and generates embedding vectors for each extracted memory. It uses a debounce mechanism (via `extraction_log` table) to prevent re-extraction. When `ExtractionEngine` is not configured, extraction is skipped gracefully.

The `ending` hook extracts memories from the transcript (same pipeline as `idle`), then runs `IntrospectTranscript` to produce a session-end `self_assessment` memory. When the LLM is unavailable, `IntrospectTranscript` falls back to a degraded plain-text summary of the session (no LLM call attempted).

The `ending` hook performs automatic introspection on the session transcript. It reads the transcript via the configured adapter, generates a `self_assessment` memory using `IntrospectAuto`, and outputs the result type and memory ID. If no adapter is configured or the transcript is empty, it returns `no_transcript`. If introspection fails but the transcript was read, it logs a warning and returns success without crashing the ending event.

Expand Down
14 changes: 12 additions & 2 deletions docs/DREAM.md
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ result, err := coord.OnEnding(ctx, "session-id")
resultType, memoryID, err := coord.OnEndingWithIntrospect(ctx, "session-id")
```

The `internal/introspect` package provides failure analysis and lesson learning:
The `internal/introspect` package provides failure analysis, lesson learning, and session transcript introspection:

```go
import "github.com/MichielDean/LLMem/internal/introspect"
Expand All @@ -216,4 +216,14 @@ id, err := introspect.LearnLesson(ctx, ms, introspect.LearnLessonParams{
id, err := introspect.IntrospectAuto(ctx, ms, "Session transcript text...", "glm-5.1:cloud", "http://localhost:11434")
```

All three functions use LLM expansion via Ollama when available, with graceful degradation to storage-only mode when Ollama is unavailable. `IntrospectAuto` never returns `("", nil)` — either creates a memory or returns an error.
All three functions use LLM expansion via Ollama when available, with graceful degradation to storage-only mode when Ollama is unavailable. `IntrospectAuto` never returns `("", nil)` — either creates a memory or returns an error.

```go
// Introspect a session transcript (called by OnEnding)
id, err := introspect.IntrospectTranscript(ctx, ms, transcript, "session-id", ollamaClient, "glm-5.1:cloud")
// When ollamaClient is nil, falls back to degraded storage (plain-text summary, no LLM call)
```

Both `IntrospectFailure` and `LearnLesson` use LLM expansion via Ollama when available, with graceful degradation to storage-only mode when Ollama is unavailable.

`IntrospectTranscript` analyzes a session transcript and stores a `self_assessment` memory. It accepts a pre-configured `*ollama.OllamaClient` (reusing the session's connection). When `ollamaClient` is nil, it produces a degraded memory with a plain-text summary. On LLM availability, the model generates a structured self-assessment from the transcript content. Note: `IntrospectTranscript` uses `context.Background()` for the final store operation (not the caller's `ctx`) to ensure persistence even if the calling context has expired during the LLM call.
26 changes: 0 additions & 26 deletions internal/embed/embed.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@ package embed
import (
"bytes"
"context"
"encoding/binary"
"encoding/json"
"fmt"
"log/slog"
"math"
"net/http"
"strings"
"sync"
Expand Down Expand Up @@ -261,28 +259,4 @@ func (e *EmbeddingEngine) CheckAvailable(ctx context.Context) bool {
func (e *EmbeddingEngine) Close() error {
e.httpClient.CloseIdleConnections()
return nil
}

// vecToBytes encodes a []float32 into packed little-endian bytes.
// Matches Python's struct.pack(f"{dim}f", *vec).
func vecToBytes(vec []float32) []byte {
buf := make([]byte, len(vec)*4)
for i, v := range vec {
binary.LittleEndian.PutUint32(buf[i*4:], math.Float32bits(v))
}
return buf
}

// bytesToVec decodes a packed float32 byte slice into a []float32.
// Matches Python's struct.unpack(f"{dim}f", data).
func bytesToVec(data []byte) []float32 {
if len(data) == 0 {
return nil
}
dim := len(data) / 4
result := make([]float32, dim)
for i := 0; i < dim; i++ {
result[i] = math.Float32frombits(binary.LittleEndian.Uint32(data[i*4:]))
}
return result
}
6 changes: 4 additions & 2 deletions internal/embed/embed_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import (
"strings"
"testing"
"time"

"github.com/MichielDean/LLMem/internal/store"
)

func TestEmbeddingEngine_Embed_Success(t *testing.T) {
Expand Down Expand Up @@ -316,8 +318,8 @@ func TestEmbeddingEngine_CheckAvailable_Unreachable(t *testing.T) {

func TestEmbeddingEngine_VecToBytes_RoundTrip(t *testing.T) {
original := []float32{1.0, -2.5, 3.14, 0.0, -0.001}
encoded := vecToBytes(original)
decoded := bytesToVec(encoded)
encoded := store.VecToBytes(original)
decoded := store.BytesToVec(encoded)

if len(decoded) != len(original) {
t.Fatalf("expected %d elements, got %d", len(original), len(decoded))
Expand Down
Loading
Loading