From 6420ffe347c094eeedcf44fa740f6a21699d2395 Mon Sep 17 00:00:00 2001
From: Rohit Ghumare <ghumare64@gmail.com>
Date: Thu, 14 May 2026 12:49:37 +0100
Subject: [PATCH 1/2] chore(providers): bump Gemini defaults to current GA
 models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bundles two upstream PRs into one chore — both are blocking real users
today and both are simple default-string bumps with no API contract
change.

LLM default (was PR #368, @yut304)
- `gemini-2.0-flash` is deprecated in Google's Gemini API and returns
  429 rate-limit errors under load. Replace the default with
  `gemini-flash-latest`. Users on a pinned `GEMINI_MODEL` in
  `~/.agentmemory/.env` are unaffected.

Embedding default (was PR #246, @AmmarSaleh50)
- `text-embedding-004` is deprecated (shutdown Jan 14 2026). Replace
  with `gemini-embedding-001` (GA): 100+ languages, MRL dims
  (768 / 1536 / 3072), 2048-token input.
- URL path changes from `:batchEmbedContent` to `:batchEmbedContents`
  (plural — the new model's batch endpoint).
- Each request now sends `outputDimensionality: 768` so the returned
  vectors match the existing index dim guard from #248 — no
  reindex needed.
- L2-normalize each returned vector before pushing to the result
  array. `gemini-embedding-001` does not normalize by default,
  unlike `text-embedding-004`. Without this the cosine-similarity
  math elsewhere in the search pipeline (which assumes unit-length
  vectors) collapses.

Verified
- `npm test` clean: 903 / 903.
- `npm run build` clean.

Closes #368, closes #246.
---
 README.md                         |  2 +-
 src/config.ts                     |  2 +-
 src/providers/embedding/gemini.ts | 17 ++++++++++++++---
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 678351a8..7c4dea82 100644
--- a/README.md
+++ b/README.md
@@ -719,7 +719,7 @@ npm install @xenova/transformers
 | Provider | Model | Cost | Notes |
 |---|---|---|---|
 | **Local (recommended)** | `all-MiniLM-L6-v2` | Free | Offline, +8pp recall over BM25-only |
-| Gemini | `text-embedding-004` | Free tier | 1500 RPM |
+| Gemini | `gemini-embedding-001` | Free tier | 100+ languages, 768/1536/3072 dims (MRL), 2048-token input. Replaces `text-embedding-004` ([deprecated, shutdown Jan 14, 2026](https://ai.google.dev/gemini-api/docs/deprecations)) |
 | OpenAI | `text-embedding-3-small` | $0.02/1M | Highest quality |
 | Voyage AI | `voyage-code-3` | Paid | Optimized for code |
 | Cohere | `embed-english-v3.0` | Free trial | General purpose |
diff --git a/src/config.ts b/src/config.ts
index 2898552d..c37aea4e 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -76,7 +76,7 @@ function detectProvider(env: Record<string, string>): ProviderConfig {
     }
     return {
       provider: "gemini",
-      model: env["GEMINI_MODEL"] || "gemini-2.0-flash",
+      model: env["GEMINI_MODEL"] || "gemini-flash-latest",
       maxTokens,
     };
   }
diff --git a/src/providers/embedding/gemini.ts b/src/providers/embedding/gemini.ts
index 74dca6f5..d2e99693 100644
--- a/src/providers/embedding/gemini.ts
+++ b/src/providers/embedding/gemini.ts
@@ -2,7 +2,8 @@ import type { EmbeddingProvider } from "../../types.js";
 import { getEnvVar } from "../../config.js";
 
 const BATCH_LIMIT = 100;
-const API_BASE = "https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:batchEmbedContent";
+const MODEL = "models/gemini-embedding-001";
+const API_BASE = `https://generativelanguage.googleapis.com/v1beta/${MODEL}:batchEmbedContents`;
 
 export class GeminiEmbeddingProvider implements EmbeddingProvider {
   readonly name = "gemini";
@@ -29,8 +30,9 @@ export class GeminiEmbeddingProvider implements EmbeddingProvider {
         headers: { "Content-Type": "application/json" },
         body: JSON.stringify({
           requests: chunk.map((t) => ({
-            model: "models/text-embedding-004",
+            model: MODEL,
             content: { parts: [{ text: t }] },
+            outputDimensionality: this.dimensions,
           })),
         }),
       });
@@ -45,10 +47,19 @@ export class GeminiEmbeddingProvider implements EmbeddingProvider {
       };
 
       for (const emb of data.embeddings) {
-        results.push(new Float32Array(emb.values));
+        results.push(l2Normalize(new Float32Array(emb.values)));
       }
     }
 
     return results;
   }
 }
+
+function l2Normalize(vec: Float32Array): Float32Array {
+  let sum = 0;
+  for (let i = 0; i < vec.length; i++) sum += vec[i]! * vec[i]!;
+  const norm = Math.sqrt(sum);
+  if (norm === 0) return vec;
+  for (let i = 0; i < vec.length; i++) vec[i] = vec[i]! / norm;
+  return vec;
+}

From 255105bc78cf5b364a71242ccefb79d3129e547b Mon Sep 17 00:00:00 2001
From: Rohit Ghumare <ghumare64@gmail.com>
Date: Thu, 14 May 2026 13:31:16 +0100
Subject: [PATCH 2/2] fix(gemini): pin LLM default to gemini-2.5-flash +
 warn-once on zero-norm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses CodeRabbit findings on PR #370.

1. Pin Gemini LLM default to gemini-2.5-flash.

   `gemini-flash-latest` is a moving alias that points to whatever
   Google promotes next. Production behaviour should be deterministic
   from a release perspective — users who upgrade agentmemory should
   not also get a Gemini model rotation in the same step. Switch the
   default to the current stable GA model `gemini-2.5-flash`.
   Users who want the moving alias keep getting it via
   `GEMINI_MODEL=gemini-flash-latest` in `~/.agentmemory/.env`.

2. Warn-once on zero-norm embedding in l2Normalize.

   `gemini-embedding-001` can return a zero-norm vector for
   degenerate input. The previous code silently returned the zero
   vector — downstream cosine-similarity math then divides by zero
   and the call site sees `NaN` scores with no signal as to why.

   Emit a one-time stderr warning naming the model + vector length
   so operators can correlate index quality dips with upstream
   embedding regressions. Behaviour otherwise unchanged: return the
   zero vector and let BM25 carry the search signal.

   Throwing was the other option — rejected because a single bad
   embedding in a 100-item batch would abort the whole batch and
   surface as an indexing pipeline halt. Soft-fail + warn matches
   the rest of the embedding provider error handling.

Skipped finding:

- `outputDimensionality` → `output_dimensionality` snake_case rename.
  CodeRabbit asserts the REST API expects snake_case. The Gemini
  REST API actually uses camelCase on the wire — confirmed against
  ai.google.dev/api/embeddings (field labelled
  `outputDimensionality` in the REST schema; the Python SDK alone
  uses snake_case and translates internally). Current code is
  correct as-shipped; the snake_case rename would silently break
  the dim override.

Verified: 903 / 903 tests pass; build clean.
---
 src/config.ts                     |  2 +-
 src/providers/embedding/gemini.ts | 14 +++++++++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/config.ts b/src/config.ts
index c37aea4e..a4b676cf 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -76,7 +76,7 @@ function detectProvider(env: Record<string, string>): ProviderConfig {
     }
     return {
       provider: "gemini",
-      model: env["GEMINI_MODEL"] || "gemini-flash-latest",
+      model: env["GEMINI_MODEL"] || "gemini-2.5-flash",
       maxTokens,
     };
   }
diff --git a/src/providers/embedding/gemini.ts b/src/providers/embedding/gemini.ts
index d2e99693..6cfb2764 100644
--- a/src/providers/embedding/gemini.ts
+++ b/src/providers/embedding/gemini.ts
@@ -55,11 +55,23 @@ export class GeminiEmbeddingProvider implements EmbeddingProvider {
   }
 }
 
+let zeroNormWarned = false;
+
 function l2Normalize(vec: Float32Array): Float32Array {
   let sum = 0;
   for (let i = 0; i < vec.length; i++) sum += vec[i]! * vec[i]!;
   const norm = Math.sqrt(sum);
-  if (norm === 0) return vec;
+  if (norm === 0) {
+    if (!zeroNormWarned) {
+      zeroNormWarned = true;
+      process.stderr.write(
+        `[agentmemory] warn: gemini-embedding-001 returned a zero-norm ` +
+          `embedding (length=${vec.length}); leaving it un-normalized. ` +
+          `Subsequent zero-norm vectors will not be reported.\n`,
+      );
+    }
+    return vec;
+  }
   for (let i = 0; i < vec.length; i++) vec[i] = vec[i]! / norm;
   return vec;
 }