MichielDean · MichielDean · May 11, 2026 · May 11, 2026 · May 11, 2026 · May 11, 2026
diff --git a/cmd/llmem/main.go b/cmd/llmem/main.go
@@ -13,7 +13,10 @@ import (
 
 	"github.com/MichielDean/LLMem/internal/config"
 	"github.com/MichielDean/LLMem/internal/dream"
+	"github.com/MichielDean/LLMem/internal/embed"
+	"github.com/MichielDean/LLMem/internal/extract"
 	"github.com/MichielDean/LLMem/internal/introspect"
+	"github.com/MichielDean/LLMem/internal/ollama"
 	"github.com/MichielDean/LLMem/internal/paths"
 	"github.com/MichielDean/LLMem/internal/session"
 	"github.com/MichielDean/LLMem/internal/store"
@@ -121,6 +124,42 @@ func openAdapter() (session.SessionAdapter, error) {
 	return adapter, nil
 }
 
+// openExtractionEngine creates an ExtractionEngine for session hooks.
+// Returns nil on failure — the coordinator gracefully handles a nil engine
+// by skipping extraction (graceful degradation).
+func openExtractionEngine() *extract.ExtractionEngine {
+	engine, err := extract.NewExtractionEngine(extract.ExtractionConfig{})
+	if err != nil {
+		slog.Debug("llmem: failed to create extraction engine, skipping", "error", err)
+		return nil
+	}
+	return engine
+}
+
+// openEmbeddingEngine creates an EmbeddingEngine for session hooks.
+// Returns nil on failure — the coordinator gracefully handles a nil engine
+// by storing memories without embeddings.
+func openEmbeddingEngine() *embed.EmbeddingEngine {
+	engine, err := embed.NewEmbeddingEngine(embed.EmbeddingConfig{})
+	if err != nil {
+		slog.Debug("llmem: failed to create embedding engine, skipping", "error", err)
+		return nil
+	}
+	return engine
+}
+
+// openOllamaClient creates an OllamaClient for session hook introspection.
+// Returns nil on failure — the coordinator gracefully handles a nil client
+// by falling back to degraded introspection in OnEnding (plain-text summary, no LLM).
+func openOllamaClient() *ollama.OllamaClient {
+	client, err := ollama.NewOllamaClient(ollama.OllamaClientConfig{})
+	if err != nil {
+		slog.Debug("llmem: failed to create Ollama client, falling back to degraded introspection", "error", err)
+		return nil
+	}
+	return client
+}
+
 func addCmd() *cobra.Command {
 	var (
 		typeVal      string
@@ -1080,8 +1119,11 @@ func contextCmd() *cobra.Command {
 			}
 
 			coord, err := session.NewSessionHookCoordinator(session.SessionHookConfig{
-				Store:   ms,
-				Adapter: adapter,
+				Store:            ms,
+				Adapter:          adapter,
+				ExtractionEngine: openExtractionEngine(),
+				Embedding:        openEmbeddingEngine(),
+				OllamaClient:     openOllamaClient(),
 			})
 			if err != nil {
 				return err
@@ -1146,8 +1188,11 @@ func hookCmd() *cobra.Command {
 			}
 
 			coord, err := session.NewSessionHookCoordinator(session.SessionHookConfig{
-				Store:   ms,
-				Adapter: adapter,
+				Store:            ms,
+				Adapter:          adapter,
+				ExtractionEngine: openExtractionEngine(),
+				Embedding:        openEmbeddingEngine(),
+				OllamaClient:     openOllamaClient(),
 			})
 			if err != nil {
 				return err

diff --git a/docs/API.md b/docs/API.md
@@ -906,7 +906,7 @@ _, err := ms.Add(ctx, store.AddParams{Type: "unknown_type", Content: "test"})
 
 Embeddings are stored and accepted as packed `[]byte` in little-endian `float32` format. For a 768-dimensional embedding, this is `768 × 4 = 3072` bytes.
 
-Use the exported `vecToBytes` and `bytesToVec` helpers if you need conversion:
+Use the exported `VecToBytes` and `BytesToVec` helpers if you need conversion:
 
 ```go
 // Convert float32 slice to []byte for storage
@@ -1251,7 +1251,7 @@ available := engine.CheckAvailable(ctx)
 
 ### Introspection (internal/introspect)
 
-The `internal/introspect` package provides failure analysis and lesson learning (see [Dream Cycle & Extraction](DREAM.md#go) for usage).
+The `internal/introspect` package provides failure analysis, lesson learning, and session transcript introspection (see [Dream Cycle & Extraction](DREAM.md#go) for usage).
 
 ```go
 import "github.com/MichielDean/LLMem/internal/introspect"
@@ -1269,9 +1269,17 @@ id, err := introspect.LearnLesson(ctx, ms, introspect.LearnLessonParams{
     WhatIsCorrect: "inject dependency via constructor",
     Context:       "service.go:15",
 })
+
+// IntrospectTranscript — analyze a session transcript at session end
+id, err := introspect.IntrospectTranscript(ctx, ms, transcript, "session-id", ollamaClient, "glm-5.1:cloud")
+// When ollamaClient is nil, falls back to degraded storage (plain-text summary, no LLM call)
 ```
 
-Both functions use LLM expansion via Ollama when available. When Ollama is unavailable, they gracefully degrade to storage-only mode (storing the raw parameters without LLM expansion).
+All three functions use LLM expansion via Ollama when available. When Ollama is unavailable, they gracefully degrade to storage-only mode (storing the raw parameters without LLM expansion).
+
+**IntrospectTranscript** differs from `IntrospectFailure` and `LearnLesson` in two ways:
+1. It accepts a pre-configured `*ollama.OllamaClient` instead of a model/baseURL pair, reusing the session's configured Ollama connection.
+2. It uses `context.Background()` for the final store operation (not the caller's `ctx`), ensuring the session-end self-assessment is persisted even if the calling context has expired during the LLM call. This is intentional — `IntrospectFailure` and `LearnLesson` pass through `ctx` because they run mid-session when the context is still alive.
 
 #### IntrospectAuto
 
@@ -1433,32 +1441,48 @@ if adapter != nil {
 }
 
 coord, err := session.NewSessionHookCoordinator(session.SessionHookConfig{
-    Store:   ms,
-    Adapter: adapter,  // nil → no_transcript on idle/ending
+    Store:            ms,
+    Adapter:          adapter,           // nil → no_transcript on idle/ending
+    ExtractionEngine: extractionEngine,  // nil → skip extraction
+    Embedding:        embeddingEngine,   // nil → store without embeddings
+    OllamaClient:     ollamaClient,      // nil → degraded introspection in OnEnding
 })
 ```
 
 When `config.yaml` has `opencode.db_path` set and the database exists, the adapter is wired into the coordinator. When the path is empty or the DB is unreachable, a nil adapter is used — `OnIdle` and `OnEnding` return `"no_transcript"` gracefully.
 
+The CLI also provides `openExtractionEngine()`, `openEmbeddingEngine()`, and `openOllamaClient()` helper functions that return nil on failure. The coordinator gracefully degrades when any of these are nil:
+- `ExtractionEngine` nil → extraction skipped, memories not extracted from transcript
+- `Embedding` nil → memories stored without embedding vectors
+- `OllamaClient` nil → `IntrospectTranscript` produces degraded self-assessment (plain-text summary, no LLM call)
+
 #### SessionHookConfig
 
 ```go
 type SessionHookConfig struct {
-    Store           *store.MemoryStore  // Required for all hook operations
-    Adapter         SessionAdapter      // Provides session content. nil → no_transcript
-    DebounceSeconds int                 // Min interval between idle events. Default: 30
-    ContextDir      string              // Directory for context files. Default: paths.GetContextDir()
-    Model           string              // LLM model for introspection. Default: "glm-5.1:cloud"
-    BaseURL         string              // Ollama base URL for introspection. Default: "http://localhost:11434"
+    Store            *store.MemoryStore          // Required for all hook operations
+    Adapter          SessionAdapter              // Provides session content. nil → no_transcript
+    DebounceSeconds  int                        // Min interval between idle events. Default: 30
+    ContextDir       string                     // Directory for context files. Default: paths.GetContextDir()
+    Model            string                     // LLM model for introspection. Default: "glm-5.1:cloud"
+    BaseURL          string                     // Ollama base URL for introspection. Default: "http://localhost:11434"
+    ExtractionEngine *extract.ExtractionEngine  // Extracts memories from transcript. nil → skip extraction
+    Embedding        *embed.EmbeddingEngine     // Generates embedding vectors. nil → store without embeddings
+    OllamaClient     *ollama.OllamaClient       // Used for introspection in OnEnding. nil → degraded fallback
+    IntrospectModel  string                     // LLM model name for IntrospectTranscript. Default: "glm-5.1:cloud"
 }
 ```
 
 #### SessionHookCoordinator
 
 ```go
 coord, err := session.NewSessionHookCoordinator(session.SessionHookConfig{
-    Store:   ms,
-    Adapter: adapter,
+    Store:            ms,
+    Adapter:          adapter,
+    ExtractionEngine: extractionEngine,  // nil → skip extraction
+    Embedding:        embeddingEngine,    // nil → store without embeddings
+    OllamaClient:     ollamaClient,       // nil → degraded introspection in OnEnding
+    IntrospectModel:  "glm-5.1:cloud",    // optional, defaults to "glm-5.1:cloud"
 })
 
 result, err := coord.OnCreated(ctx, "session-id")       // "success" | "already_processed"
@@ -1474,7 +1498,15 @@ resultType, memoryID, err := coord.OnEndingWithIntrospect(ctx, "session-id")
 //         ("error", "", err) on validation error
 ```
 
-All methods validate session IDs via `paths.ValidateSessionID` to prevent path traversal. OnIdle includes a 30-second debounce mechanism.
+All methods validate session IDs via `paths.ValidateSessionID` to prevent path traversal.
+
+**OnIdle** includes a 30-second debounce mechanism. When a transcript is available and `ExtractionEngine` is non-nil, OnIdle:
+1. Calls `SupersedeBySource` to invalidate prior memories from the same session (re-extraction as conversation grows)
+2. Extracts memories via the extraction engine
+3. Generates embedding vectors for each memory (if `Embedding` is non-nil)
+4. Stores memories and logs the extraction
+
+**OnEnding** extracts memories the same way as OnIdle, then runs `IntrospectTranscript` to produce a session-end self-assessment. When `OllamaClient` is nil, `IntrospectTranscript` falls back to a degraded plain-text summary (no LLM call attempted) — the nil-OllamaClient guard must NOT be used, or the degradation path is bypassed.
 
 ### Systemd Unit Generation (internal/systemd)
 

diff --git a/docs/CLI.md b/docs/CLI.md
@@ -229,7 +229,9 @@ Handle session lifecycle hook events. Supports four hook types:
 - `--model`: LLM model for introspection (default: `glm-5.1:cloud`). Used by the `ending` hook for automatic introspection.
 - `--base-url`: Ollama base URL for introspection (default: `http://localhost:11434`). Used by the `ending` hook for automatic introspection.
 
-The `idle` hook processes the session's transcript, extracts memories, and runs introspection automatically. It uses a debounce mechanism (via `extraction_log` table) to prevent re-extraction.
+The `idle` hook processes the session's transcript, extracts memories via the extraction pipeline (chunk → dedup → LLM extract → embed → store), and generates embedding vectors for each extracted memory. It uses a debounce mechanism (via `extraction_log` table) to prevent re-extraction. When `ExtractionEngine` is not configured, extraction is skipped gracefully.
+
+The `ending` hook extracts memories from the transcript (same pipeline as `idle`), then runs `IntrospectTranscript` to produce a session-end `self_assessment` memory. When the LLM is unavailable, `IntrospectTranscript` falls back to a degraded plain-text summary of the session (no LLM call attempted).
 
 The `ending` hook performs automatic introspection on the session transcript. It reads the transcript via the configured adapter, generates a `self_assessment` memory using `IntrospectAuto`, and outputs the result type and memory ID. If no adapter is configured or the transcript is empty, it returns `no_transcript`. If introspection fails but the transcript was read, it logs a warning and returns success without crashing the ending event.
 

diff --git a/docs/DREAM.md b/docs/DREAM.md
@@ -189,7 +189,7 @@ result, err := coord.OnEnding(ctx, "session-id")
 resultType, memoryID, err := coord.OnEndingWithIntrospect(ctx, "session-id")
 ```
 
-The `internal/introspect` package provides failure analysis and lesson learning:
+The `internal/introspect` package provides failure analysis, lesson learning, and session transcript introspection:
 
 ```go
 import "github.com/MichielDean/LLMem/internal/introspect"
@@ -216,4 +216,14 @@ id, err := introspect.LearnLesson(ctx, ms, introspect.LearnLessonParams{
 id, err := introspect.IntrospectAuto(ctx, ms, "Session transcript text...", "glm-5.1:cloud", "http://localhost:11434")
 ```
 
-All three functions use LLM expansion via Ollama when available, with graceful degradation to storage-only mode when Ollama is unavailable. `IntrospectAuto` never returns `("", nil)` — either creates a memory or returns an error.
+All three functions use LLM expansion via Ollama when available, with graceful degradation to storage-only mode when Ollama is unavailable. `IntrospectAuto` never returns `("", nil)` — either creates a memory or returns an error.
+
+```go
+// Introspect a session transcript (called by OnEnding)
+id, err := introspect.IntrospectTranscript(ctx, ms, transcript, "session-id", ollamaClient, "glm-5.1:cloud")
+// When ollamaClient is nil, falls back to degraded storage (plain-text summary, no LLM call)
+```
+
+Both `IntrospectFailure` and `LearnLesson` use LLM expansion via Ollama when available, with graceful degradation to storage-only mode when Ollama is unavailable.
+
+`IntrospectTranscript` analyzes a session transcript and stores a `self_assessment` memory. It accepts a pre-configured `*ollama.OllamaClient` (reusing the session's connection). When `ollamaClient` is nil, it produces a degraded memory with a plain-text summary. On LLM availability, the model generates a structured self-assessment from the transcript content. Note: `IntrospectTranscript` uses `context.Background()` for the final store operation (not the caller's `ctx`) to ensure persistence even if the calling context has expired during the LLM call.
diff --git a/internal/embed/embed.go b/internal/embed/embed.go
@@ -6,11 +6,9 @@ package embed
 import (
 	"bytes"
 	"context"
-	"encoding/binary"
 	"encoding/json"
 	"fmt"
 	"log/slog"
-	"math"
 	"net/http"
 	"strings"
 	"sync"
@@ -261,28 +259,4 @@ func (e *EmbeddingEngine) CheckAvailable(ctx context.Context) bool {
 func (e *EmbeddingEngine) Close() error {
 	e.httpClient.CloseIdleConnections()
 	return nil
-}
-
-// vecToBytes encodes a []float32 into packed little-endian bytes.
-// Matches Python's struct.pack(f"{dim}f", *vec).
-func vecToBytes(vec []float32) []byte {
-	buf := make([]byte, len(vec)*4)
-	for i, v := range vec {
-		binary.LittleEndian.PutUint32(buf[i*4:], math.Float32bits(v))
-	}
-	return buf
-}
-
-// bytesToVec decodes a packed float32 byte slice into a []float32.
-// Matches Python's struct.unpack(f"{dim}f", data).
-func bytesToVec(data []byte) []float32 {
-	if len(data) == 0 {
-		return nil
-	}
-	dim := len(data) / 4
-	result := make([]float32, dim)
-	for i := 0; i < dim; i++ {
-		result[i] = math.Float32frombits(binary.LittleEndian.Uint32(data[i*4:]))
-	}
-	return result
 }
diff --git a/internal/embed/embed_test.go b/internal/embed/embed_test.go
@@ -9,6 +9,8 @@ import (
 	"strings"
 	"testing"
 	"time"
+
+	"github.com/MichielDean/LLMem/internal/store"
 )
 
 func TestEmbeddingEngine_Embed_Success(t *testing.T) {
@@ -316,8 +318,8 @@ func TestEmbeddingEngine_CheckAvailable_Unreachable(t *testing.T) {
 
 func TestEmbeddingEngine_VecToBytes_RoundTrip(t *testing.T) {
 	original := []float32{1.0, -2.5, 3.14, 0.0, -0.001}
-	encoded := vecToBytes(original)
-	decoded := bytesToVec(encoded)
+	encoded := store.VecToBytes(original)
+	decoded := store.BytesToVec(encoded)
 
 	if len(decoded) != len(original) {
 		t.Fatalf("expected %d elements, got %d", len(original), len(decoded))