Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion cmd/gortex/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,13 @@ var (
// (the function has no *cobra.Command of its own) to decide whether
// the flag overrides the `embedding:` config block. Set once in
// runDaemonStart before buildDaemonState runs.
daemonEmbeddingsChanged bool
daemonEmbeddingsChanged bool
// daemonEmbeddingsURL / daemonEmbeddingsModel mirror `gortex mcp`'s
// --embeddings-url / --embeddings-model so the daemon can be started with an
// explicit OpenAI-compatible (or Ollama) embedding API. A non-empty URL forces
// the api provider in ResolveEmbedder, overriding the embedding: config block.
daemonEmbeddingsURL string
daemonEmbeddingsModel string
daemonStatusWatch bool
daemonStatusInterval time.Duration
daemonHTTPAddr string
Expand Down Expand Up @@ -103,6 +109,10 @@ func init() {
"fork to background after starting (logs to the daemon log file — see `gortex daemon logs`)")
daemonStartCmd.Flags().BoolVar(&daemonEmbeddings, "embeddings", false,
"load a semantic embedding provider (opt-in — adds ~87 MB model download on first use and ~60 ms/symbol warmup)")
daemonStartCmd.Flags().StringVar(&daemonEmbeddingsURL, "embeddings-url", "",
"OpenAI-compatible (or Ollama) embedding API base URL (e.g. https://api.openai.com/v1). A non-empty URL forces the api provider, overriding the embedding: config. Key via $GORTEX_EMBEDDINGS_API_KEY or $OPENAI_API_KEY (openai.com only).")
daemonStartCmd.Flags().StringVar(&daemonEmbeddingsModel, "embeddings-model", "",
"embedding model for --embeddings-url (default: auto-detect — text-embedding-3-small for OpenAI, nomic-embed-text for Ollama)")
daemonStartCmd.Flags().StringVar(&daemonHTTPAddr, "http-addr", "",
"also expose the MCP 2026 Streamable HTTP transport on this TCP address (e.g. 127.0.0.1:7411); empty disables")
daemonStartCmd.Flags().StringVar(&daemonHTTPAuthToken, "http-auth-token", "",
Expand Down
43 changes: 41 additions & 2 deletions cmd/gortex/daemon_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ func buildDaemonState(logger *zap.Logger) (*daemonState, error) {
Embedder: serverstack.EmbedderRequest{
FlagChanged: daemonEmbeddingsChanged,
FlagEnabled: daemonEmbeddings,
FlagURL: daemonEmbeddingsURL,
FlagModel: daemonEmbeddingsModel,
},
// Workspace-global side-store layout: notes/memories partition
// under the "daemon" key in the shared DataDir sidecar; the
Expand Down Expand Up @@ -641,8 +643,15 @@ func priorMtimesFromStore(g graph.Store, cm *config.ConfigManager, entry config.
// a worktree instance persists its mtimes under `<base>@<workspace>`,
// not the bare basename, so a plain ResolvePrefix would load the
// canonical checkout's mtimes and force a full re-index every restart.
prefix := strings.TrimPrefix(indexer.EffectiveRepoPrefix(cm, entry), "/")
if prefix == "" {
effective := strings.TrimPrefix(indexer.EffectiveRepoPrefix(cm, entry), "/")
repoCount := 1
if cm != nil {
if g := cm.Global(); g != nil {
repoCount = len(g.Repos)
}
}
prefix, ok := warmMtimePrefix(effective, repoCount)
if !ok {
if logger != nil {
logger.Info("daemon: priorMtimesFromStore: empty prefix",
zap.String("entry_path", entry.Path),
Expand All @@ -654,11 +663,41 @@ func priorMtimesFromStore(g graph.Store, cm *config.ConfigManager, entry config.
if logger != nil {
logger.Info("daemon: priorMtimesFromStore loaded",
zap.String("prefix", prefix),
zap.Bool("single_repo", repoCount < 2),
zap.Int("count", len(mtimes)))
}
return mtimes
}

// warmMtimePrefix picks the repo_prefix to look up persisted file mtimes
// (and, by extension, to decide whether the warm-restart reconcile can run)
// for a repo whose EffectiveRepoPrefix is `effective` in a daemon tracking
// `repoCount` repos total.
//
// PURPOSE: single-repo daemons index WITHOUT a prefix — MultiIndexer.
// indexSingleRepo / ReconcileRepoCtx only switch on a repo prefix once a
// SECOND repo joins (the willBeMultiRepo gate). So a lone repo's nodes and
// file_mtimes rows are persisted under "", while EffectiveRepoPrefix returns
// the path basename (e.g. "drools"). Looking mtimes up under the basename
// finds zero rows and forces a full cold re-index — and, with an API
// embedder, a full (paid) re-embed — on every restart.
//
// RATIONALE: mirror the indexer's own single-vs-multi decision here so the
// warm path keys mtimes exactly where they were written. In multi-repo mode
// an empty effective prefix is untrustworthy (it would collide across repos),
// so report ok=false and let the caller fall back to a cold index.
//
// KEYWORDS: warm-restart, repo-prefix, single-repo, file_mtimes, re-embed
func warmMtimePrefix(effective string, repoCount int) (prefix string, ok bool) {
if repoCount < 2 {
return "", true
}
if effective == "" {
return "", false
}
return effective, true
}

// storeNeedsRebuild reports whether the backend signalled, via the optional
// NeedsRebuild capability, that a schema migration crossed a rung an ALTER
// could not satisfy — so its persisted rows are in an old shape and the
Expand Down
30 changes: 30 additions & 0 deletions cmd/gortex/daemon_state_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,33 @@ func TestLSPDisabledSet_Empty(t *testing.T) {
t.Fatalf("expected empty map, got %v", got)
}
}

// TestWarmMtimePrefix covers the single- vs multi-repo prefix decision the
// warm-restart mtime lookup hangs on. The bug it guards: a lone repo indexes
// unprefixed (rows under ""), but EffectiveRepoPrefix returns the basename, so
// looking up the basename finds nothing and forces a paid cold re-index every
// restart.
func TestWarmMtimePrefix(t *testing.T) {
cases := []struct {
name string
effective string
repoCount int
wantPrefix string
wantOK bool
}{
{"single repo uses empty prefix (the bug)", "drools", 1, "", true},
{"single repo, zero configured still unprefixed", "drools", 0, "", true},
{"multi-repo keeps its derived prefix", "drools", 2, "drools", true},
{"multi-repo with no prefix is untrustworthy", "", 3, "", false},
{"single repo already empty prefix", "", 1, "", true},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
gotPrefix, gotOK := warmMtimePrefix(tc.effective, tc.repoCount)
if gotPrefix != tc.wantPrefix || gotOK != tc.wantOK {
t.Fatalf("warmMtimePrefix(%q, %d) = (%q, %v), want (%q, %v)",
tc.effective, tc.repoCount, gotPrefix, gotOK, tc.wantPrefix, tc.wantOK)
}
})
}
}
133 changes: 129 additions & 4 deletions internal/embedding/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@ import (
"fmt"
"io"
"net/http"
"os"
"strconv"
"strings"
"sync/atomic"
"time"
)

Expand All @@ -18,15 +20,65 @@ import (
// fails and the caller aborts to text-only search.
const maxRetryAfterWait = 60 * time.Second

// maxEmbedInputBytes caps each embedding input. OpenAI's embedding models
// reject inputs over 8192 tokens with a 400 that aborts the WHOLE batch
// (and the vector index). A BPE tokenizer never emits more tokens than
// input characters, and for single-byte (ASCII) source — the overwhelming
// majority of code — characters equal bytes, so capping the head at 8000
// bytes guarantees ≤8000 tokens, safely under the 8192 limit, regardless
// of how token-dense the snippet is. The truncated head still carries the
// symbol's signature and leading body — enough signal for nearest-neighbour
// search. Head-truncation beats dropping the whole index over a few giant
// generated symbols.
const maxEmbedInputBytes = 8000

// truncateEmbedInputs head-truncates any input over the byte cap, on a
// UTF-8 rune boundary so the JSON payload stays valid. Returns the same
// slice when nothing needed trimming (the common case).
func truncateEmbedInputs(texts []string) []string {
var out []string
for i, t := range texts {
if len(t) <= maxEmbedInputBytes {
continue
}
if out == nil {
out = make([]string, len(texts))
copy(out, texts)
}
b := []byte(t[:maxEmbedInputBytes])
for len(b) > 0 && b[len(b)-1]&0xC0 == 0x80 { // back off mid-rune
b = b[:len(b)-1]
}
out[i] = string(b)
}
if out == nil {
return texts
}
return out
}

// APIProvider calls an external embedding API (Ollama or OpenAI-compatible).
type APIProvider struct {
url string
model string
apiKey string
client *http.Client
dims int
format apiFormat

// tokensUsed accumulates the `usage.total_tokens` reported by the
// embedding backend across every request, so the indexer can log the
// actual token spend of a paid embedding pass (otherwise invisible).
// Touched from several goroutines under the concurrent embedding pool,
// hence atomic.
tokensUsed int64
}

// TokensUsed reports the total embedding tokens this provider has been
// billed for so far, summed from each response's usage.total_tokens.
// Returns 0 for backends that don't report usage (e.g. Ollama).
func (p *APIProvider) TokensUsed() int64 { return atomic.LoadInt64(&p.tokensUsed) }

type apiFormat int

const (
Expand All @@ -50,9 +102,21 @@ func NewAPIProvider(url, model string) *APIProvider {
}
}

// API key for authenticated embedding backends (OpenAI, Azure, and
// OpenAI-compatible gateways). Ollama on localhost is keyless, so the
// key stays optional and an unset value just omits the header. Prefer
// an explicit GORTEX_EMBEDDINGS_API_KEY; fall back to OPENAI_API_KEY
// only when the endpoint is api.openai.com, so a stray OPENAI_API_KEY
// can never leak to an arbitrary third-party URL.
apiKey := os.Getenv("GORTEX_EMBEDDINGS_API_KEY")
if apiKey == "" && strings.Contains(url, "openai.com") {
apiKey = os.Getenv("OPENAI_API_KEY")
}

return &APIProvider{
url: strings.TrimRight(url, "/"),
model: model,
apiKey: apiKey,
client: &http.Client{Timeout: 30 * time.Second},
format: format,
}
Expand All @@ -79,6 +143,40 @@ func (p *APIProvider) EmbedBatch(ctx context.Context, texts []string) ([][]float
func (p *APIProvider) Dimensions() int { return p.dims }
func (p *APIProvider) Close() error { return nil }

// ProbeDimensions makes one tiny embedding call to discover and cache the
// provider's vector width, so Dimensions() reports the true value *before*
// the first indexing pass. An APIProvider learns its width only from the
// first real embed (embedOpenAI / embedOllama set p.dims from the returned
// vector); until then Dimensions() returns 0, which has two concrete
// consequences at daemon startup: the "embeddings enabled" log mislabels
// the width as dim:0, and the snapshot-vector reload gate
// (daemon_state.go: vec.Dims == EmbedderDims) rejects a correctly-sized
// persisted index, forcing a needless full re-embed on every restart.
//
// Idempotent: a no-op once the width is known. Best-effort: on any
// transport/auth error it returns the error and leaves dims at 0 — the
// caller logs a warning, the lazy path still sets the width from the first
// real vector, and indexing degrades to BM25 if embeddings are truly
// unreachable. The probe also doubles as an early connectivity/credential
// check, surfacing a bad key or URL at startup instead of mid-index.
func (p *APIProvider) ProbeDimensions(ctx context.Context) (int, error) {
if d := p.Dimensions(); d > 0 {
return d, nil
}
pctx, cancel := context.WithTimeout(ctx, 20*time.Second)
defer cancel()
vec, err := p.Embed(pctx, "gortex embedding dimension probe")
if err != nil {
return 0, err
}
// Embed -> EmbedBatch -> embed{OpenAI,Ollama} already cached p.dims from
// the response; fall back to the returned vector's length defensively.
if p.dims == 0 && len(vec) > 0 {
p.dims = len(vec)
}
return p.dims, nil
}

// Concurrent reports that this provider is safe — and worth — calling
// from several goroutines at once. An external HTTP embedding endpoint
// gains from overlapped round-trips; the indexer's embedding pool uses
Expand Down Expand Up @@ -155,7 +253,7 @@ type ollamaResponse struct {
func (p *APIProvider) embedOllama(ctx context.Context, texts []string) ([][]float32, error) {
reqBody := ollamaRequest{
Model: p.model,
Input: texts,
Input: truncateEmbedInputs(texts),
}

body, err := json.Marshal(reqBody)
Expand All @@ -169,6 +267,9 @@ func (p *APIProvider) embedOllama(ctx context.Context, texts []string) ([][]floa
return nil, fmt.Errorf("create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
if p.apiKey != "" {
req.Header.Set("Authorization", "Bearer "+p.apiKey)
}

resp, err := p.doRequest(ctx, req, body)
if err != nil {
Expand Down Expand Up @@ -201,7 +302,15 @@ type openAIRequest struct {
}

type openAIResponse struct {
Data []openAIEmbedding `json:"data"`
Data []openAIEmbedding `json:"data"`
Usage openAIUsage `json:"usage"`
}

// openAIUsage carries the token accounting OpenAI returns alongside every
// embeddings response. total_tokens is what the request is billed on.
type openAIUsage struct {
PromptTokens int `json:"prompt_tokens"`
TotalTokens int `json:"total_tokens"`
}

type openAIEmbedding struct {
Expand All @@ -212,20 +321,32 @@ type openAIEmbedding struct {
func (p *APIProvider) embedOpenAI(ctx context.Context, texts []string) ([][]float32, error) {
reqBody := openAIRequest{
Model: p.model,
Input: texts,
Input: truncateEmbedInputs(texts),
}

body, err := json.Marshal(reqBody)
if err != nil {
return nil, fmt.Errorf("marshal request: %w", err)
}

url := p.url + "/v1/embeddings"
// OpenAI-compatible bases are conventionally given WITH the version
// segment (OpenAI "https://api.openai.com/v1", OpenRouter
// "https://openrouter.ai/api/v1"). Append "/v1" only when it is absent,
// so a "…/v1" base does not become "…/v1/v1/embeddings" (a 404 that
// silently degrades the whole vector index to BM25).
endpoint := "/v1/embeddings"
if strings.HasSuffix(p.url, "/v1") {
endpoint = "/embeddings"
}
url := p.url + endpoint
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
if p.apiKey != "" {
req.Header.Set("Authorization", "Bearer "+p.apiKey)
}

resp, err := p.doRequest(ctx, req, body)
if err != nil {
Expand All @@ -243,6 +364,10 @@ func (p *APIProvider) embedOpenAI(ctx context.Context, texts []string) ([][]floa
return nil, fmt.Errorf("decode response: %w", err)
}

if result.Usage.TotalTokens > 0 {
atomic.AddInt64(&p.tokensUsed, int64(result.Usage.TotalTokens))
}

vecs := make([][]float32, len(result.Data))
for _, d := range result.Data {
vecs[d.Index] = d.Embedding
Expand Down
Loading
Loading