From 6420ffe347c094eeedcf44fa740f6a21699d2395 Mon Sep 17 00:00:00 2001 From: Rohit Ghumare Date: Thu, 14 May 2026 12:49:37 +0100 Subject: [PATCH 1/2] chore(providers): bump Gemini defaults to current GA models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bundles two upstream PRs into one chore — both are blocking real users today and both are simple default-string bumps with no API contract change. LLM default (was PR #368, @yut304) - `gemini-2.0-flash` is deprecated in Google's Gemini API and returns 429 rate-limit errors under load. Replace the default with `gemini-flash-latest`. Users on a pinned `GEMINI_MODEL` in `~/.agentmemory/.env` are unaffected. Embedding default (was PR #246, @AmmarSaleh50) - `text-embedding-004` is deprecated (shutdown Jan 14 2026). Replace with `gemini-embedding-001` (GA): 100+ languages, MRL dims (768 / 1536 / 3072), 2048-token input. - URL path changes from `:batchEmbedContent` to `:batchEmbedContents` (plural — the new model's batch endpoint). - Each request now sends `outputDimensionality: 768` so the returned vectors match the existing index dim guard from #248 — no reindex needed. - L2-normalize each returned vector before pushing to the result array. `gemini-embedding-001` does not normalize by default, unlike `text-embedding-004`. Without this the cosine-similarity math elsewhere in the search pipeline (which assumes unit-length vectors) collapses. Verified - `npm test` clean: 903 / 903. - `npm run build` clean. Closes #368, closes #246. --- README.md | 2 +- src/config.ts | 2 +- src/providers/embedding/gemini.ts | 17 ++++++++++++++--- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 678351a8..7c4dea82 100644 --- a/README.md +++ b/README.md @@ -719,7 +719,7 @@ npm install @xenova/transformers | Provider | Model | Cost | Notes | |---|---|---|---| | **Local (recommended)** | `all-MiniLM-L6-v2` | Free | Offline, +8pp recall over BM25-only | -| Gemini | `text-embedding-004` | Free tier | 1500 RPM | +| Gemini | `gemini-embedding-001` | Free tier | 100+ languages, 768/1536/3072 dims (MRL), 2048-token input. Replaces `text-embedding-004` ([deprecated, shutdown Jan 14, 2026](https://ai.google.dev/gemini-api/docs/deprecations)) | | OpenAI | `text-embedding-3-small` | $0.02/1M | Highest quality | | Voyage AI | `voyage-code-3` | Paid | Optimized for code | | Cohere | `embed-english-v3.0` | Free trial | General purpose | diff --git a/src/config.ts b/src/config.ts index 2898552d..c37aea4e 100644 --- a/src/config.ts +++ b/src/config.ts @@ -76,7 +76,7 @@ function detectProvider(env: Record): ProviderConfig { } return { provider: "gemini", - model: env["GEMINI_MODEL"] || "gemini-2.0-flash", + model: env["GEMINI_MODEL"] || "gemini-flash-latest", maxTokens, }; } diff --git a/src/providers/embedding/gemini.ts b/src/providers/embedding/gemini.ts index 74dca6f5..d2e99693 100644 --- a/src/providers/embedding/gemini.ts +++ b/src/providers/embedding/gemini.ts @@ -2,7 +2,8 @@ import type { EmbeddingProvider } from "../../types.js"; import { getEnvVar } from "../../config.js"; const BATCH_LIMIT = 100; -const API_BASE = "https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:batchEmbedContent"; +const MODEL = "models/gemini-embedding-001"; +const API_BASE = `https://generativelanguage.googleapis.com/v1beta/${MODEL}:batchEmbedContents`; export class GeminiEmbeddingProvider implements EmbeddingProvider { readonly name = "gemini"; @@ -29,8 +30,9 @@ export class GeminiEmbeddingProvider implements EmbeddingProvider { headers: { "Content-Type": "application/json" }, body: JSON.stringify({ requests: chunk.map((t) => ({ - model: "models/text-embedding-004", + model: MODEL, content: { parts: [{ text: t }] }, + outputDimensionality: this.dimensions, })), }), }); @@ -45,10 +47,19 @@ export class GeminiEmbeddingProvider implements EmbeddingProvider { }; for (const emb of data.embeddings) { - results.push(new Float32Array(emb.values)); + results.push(l2Normalize(new Float32Array(emb.values))); } } return results; } } + +function l2Normalize(vec: Float32Array): Float32Array { + let sum = 0; + for (let i = 0; i < vec.length; i++) sum += vec[i]! * vec[i]!; + const norm = Math.sqrt(sum); + if (norm === 0) return vec; + for (let i = 0; i < vec.length; i++) vec[i] = vec[i]! / norm; + return vec; +} From 255105bc78cf5b364a71242ccefb79d3129e547b Mon Sep 17 00:00:00 2001 From: Rohit Ghumare Date: Thu, 14 May 2026 13:31:16 +0100 Subject: [PATCH 2/2] fix(gemini): pin LLM default to gemini-2.5-flash + warn-once on zero-norm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses CodeRabbit findings on PR #370. 1. Pin Gemini LLM default to gemini-2.5-flash. `gemini-flash-latest` is a moving alias that points to whatever Google promotes next. Production behaviour should be deterministic from a release perspective — users who upgrade agentmemory should not also get a Gemini model rotation in the same step. Switch the default to the current stable GA model `gemini-2.5-flash`. Users who want the moving alias keep getting it via `GEMINI_MODEL=gemini-flash-latest` in `~/.agentmemory/.env`. 2. Warn-once on zero-norm embedding in l2Normalize. `gemini-embedding-001` can return a zero-norm vector for degenerate input. The previous code silently returned the zero vector — downstream cosine-similarity math then divides by zero and the call site sees `NaN` scores with no signal as to why. Emit a one-time stderr warning naming the model + vector length so operators can correlate index quality dips with upstream embedding regressions. Behaviour otherwise unchanged: return the zero vector and let BM25 carry the search signal. Throwing was the other option — rejected because a single bad embedding in a 100-item batch would abort the whole batch and surface as an indexing pipeline halt. Soft-fail + warn matches the rest of the embedding provider error handling. Skipped finding: - `outputDimensionality` → `output_dimensionality` snake_case rename. CodeRabbit asserts the REST API expects snake_case. The Gemini REST API actually uses camelCase on the wire — confirmed against ai.google.dev/api/embeddings (field labelled `outputDimensionality` in the REST schema; the Python SDK alone uses snake_case and translates internally). Current code is correct as-shipped; the snake_case rename would silently break the dim override. Verified: 903 / 903 tests pass; build clean. --- src/config.ts | 2 +- src/providers/embedding/gemini.ts | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/config.ts b/src/config.ts index c37aea4e..a4b676cf 100644 --- a/src/config.ts +++ b/src/config.ts @@ -76,7 +76,7 @@ function detectProvider(env: Record): ProviderConfig { } return { provider: "gemini", - model: env["GEMINI_MODEL"] || "gemini-flash-latest", + model: env["GEMINI_MODEL"] || "gemini-2.5-flash", maxTokens, }; } diff --git a/src/providers/embedding/gemini.ts b/src/providers/embedding/gemini.ts index d2e99693..6cfb2764 100644 --- a/src/providers/embedding/gemini.ts +++ b/src/providers/embedding/gemini.ts @@ -55,11 +55,23 @@ export class GeminiEmbeddingProvider implements EmbeddingProvider { } } +let zeroNormWarned = false; + function l2Normalize(vec: Float32Array): Float32Array { let sum = 0; for (let i = 0; i < vec.length; i++) sum += vec[i]! * vec[i]!; const norm = Math.sqrt(sum); - if (norm === 0) return vec; + if (norm === 0) { + if (!zeroNormWarned) { + zeroNormWarned = true; + process.stderr.write( + `[agentmemory] warn: gemini-embedding-001 returned a zero-norm ` + + `embedding (length=${vec.length}); leaving it un-normalized. ` + + `Subsequent zero-norm vectors will not be reported.\n`, + ); + } + return vec; + } for (let i = 0; i < vec.length; i++) vec[i] = vec[i]! / norm; return vec; }