From 007e3cb72dcfa261e8ab2fd304b9b11b4489bd8c Mon Sep 17 00:00:00 2001 From: Vincenzo Palazzo Date: Sat, 9 May 2026 18:43:20 +0000 Subject: [PATCH] fix: replace deprecated text-embedding-004 / embedding-001 with gemini-embedding-001 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Gemini text-embedding-004 model is no longer served by Google's Generative Language API — calls to /v1beta/models/text-embedding-004:* return 404 for newly-issued API keys (verified 2026-05-09). Likewise, the older models/embedding-001 referenced in get_defaults() is deprecated. Replace both with the current GA model gemini-embedding-001: - packages/openmemory-js/src/memory/embed.ts: hardcoded URL + request body model field (the gemini path does not yet read from models.yml via get_model() — that's left as a follow-up). - models.yml: gemini entry for all 5 sectors + the documentation comment block at the bottom. - packages/openmemory-js/src/core/models.ts: get_defaults() fallback for all 5 sectors. The new model exposes the same batchEmbedContents endpoint with the same request body shape (model, content, taskType) and returns vectors that are resized to env.vec_dim via the existing resize_vec helper, so no caller-side changes are needed. Verified on a smart-tier deployment (159 memories): queries that were falling through to synthetic fallback ("[EMBED] gemini failed: Gemini failed after 3 attempts: Gemini: 404") now return semantically- ranked matches in <1s. --- models.yml | 13 +++++++------ packages/openmemory-js/src/core/models.ts | 10 +++++----- packages/openmemory-js/src/memory/embed.ts | 6 +++--- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/models.yml b/models.yml index ed466804..5e101827 100644 --- a/models.yml +++ b/models.yml @@ -8,35 +8,35 @@ episodic: ollama: nomic-embed-text openai: text-embedding-3-small - gemini: models/text-embedding-004 + gemini: models/gemini-embedding-001 aws: amazon.titan-embed-text-v2:0 local: all-MiniLM-L6-v2 semantic: ollama: nomic-embed-text openai: text-embedding-3-small - gemini: models/text-embedding-004 + gemini: models/gemini-embedding-001 aws: amazon.titan-embed-text-v2:0 local: all-MiniLM-L6-v2 procedural: ollama: nomic-embed-text openai: text-embedding-3-small - gemini: models/text-embedding-004 + gemini: models/gemini-embedding-001 aws: amazon.titan-embed-text-v2:0 local: all-MiniLM-L6-v2 emotional: ollama: nomic-embed-text openai: text-embedding-3-small - gemini: models/text-embedding-004 + gemini: models/gemini-embedding-001 aws: amazon.titan-embed-text-v2:0 local: all-MiniLM-L6-v2 reflective: ollama: nomic-embed-text openai: text-embedding-3-large - gemini: models/text-embedding-004 + gemini: models/gemini-embedding-001 aws: amazon.titan-embed-text-v2:0 local: all-mpnet-base-v2 # Available Ollama models (pull with: ollama pull ) @@ -50,7 +50,8 @@ reflective: # - text-embedding-3-large (3072d) # Gemini models: -# - models/text-embedding-004 (768d) - latest +# - models/gemini-embedding-001 (3072d native, configurable via outputDimensionality) - current GA +# - models/text-embedding-004 (768d) - deprecated, returns 404 # - models/embedding-001 (768d) - deprecated #AWS models: diff --git a/packages/openmemory-js/src/core/models.ts b/packages/openmemory-js/src/core/models.ts index 0e2cf6b7..69b77f9c 100644 --- a/packages/openmemory-js/src/core/models.ts +++ b/packages/openmemory-js/src/core/models.ts @@ -51,7 +51,7 @@ const get_defaults = (): model_cfg => ({ episodic: { ollama: "nomic-embed-text", openai: "text-embedding-3-small", - gemini: "models/embedding-001", + gemini: "models/gemini-embedding-001", aws: "amazon.titan-embed-text-v2:0", siray: "text-embedding-3-small", local: "all-MiniLM-L6-v2", @@ -59,7 +59,7 @@ const get_defaults = (): model_cfg => ({ semantic: { ollama: "nomic-embed-text", openai: "text-embedding-3-small", - gemini: "models/embedding-001", + gemini: "models/gemini-embedding-001", aws: "amazon.titan-embed-text-v2:0", siray: "text-embedding-3-small", local: "all-MiniLM-L6-v2", @@ -67,21 +67,21 @@ const get_defaults = (): model_cfg => ({ procedural: { ollama: "nomic-embed-text", openai: "text-embedding-3-small", - gemini: "models/embedding-001", + gemini: "models/gemini-embedding-001", aws: "amazon.titan-embed-text-v2:0", local: "all-MiniLM-L6-v2", }, emotional: { ollama: "nomic-embed-text", openai: "text-embedding-3-small", - gemini: "models/embedding-001", + gemini: "models/gemini-embedding-001", aws: "amazon.titan-embed-text-v2:0", local: "all-MiniLM-L6-v2", }, reflective: { ollama: "nomic-embed-text", openai: "text-embedding-3-large", - gemini: "models/embedding-001", + gemini: "models/gemini-embedding-001", aws: "amazon.titan-embed-text-v2:0", local: "all-mpnet-base-v2", }, diff --git a/packages/openmemory-js/src/memory/embed.ts b/packages/openmemory-js/src/memory/embed.ts index 5c63b8de..2c9ec67d 100644 --- a/packages/openmemory-js/src/memory/embed.ts +++ b/packages/openmemory-js/src/memory/embed.ts @@ -307,11 +307,11 @@ async function emb_gemini( ): Promise> { if (!env.gemini_key) throw new Error("Gemini key missing"); const prom = gem_q.then(async () => { - const url = `https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:batchEmbedContents?key=${env.gemini_key}`; + const url = `https://generativelanguage.googleapis.com/v1beta/models/gemini-embedding-001:batchEmbedContents?key=${env.gemini_key}`; for (let a = 0; a < 3; a++) { try { const reqs = Object.entries(txts).map(([s, t]) => ({ - model: "models/text-embedding-004", + model: "models/gemini-embedding-001", content: { parts: [{ text: t }] }, taskType: task_map[s] || task_map.semantic, })); @@ -705,7 +705,7 @@ export const getEmbeddingInfo = () => { } else if (env.emb_kind === "gemini") { i.configured = !!env.gemini_key; i.batch_api = env.embed_mode === "simple"; - i.model = "embedding-001"; + i.model = "gemini-embedding-001"; } else if (env.emb_kind === "aws") { i.configured = !!env.AWS_REGION &&