From 6701619f495c42a449133f16b41f2eb1bf32d807 Mon Sep 17 00:00:00 2001 From: Pavan Kumar Date: Wed, 25 Feb 2026 17:49:48 +0530 Subject: [PATCH 1/4] Enhance embedding functionality with LiteLLM support - Updated environment variables in .env.example to include LiteLLM proxy settings. - Modified embed-vcons.ts to support LiteLLM as an embedding provider, updating the provider detection logic and embedding functions. - Adjusted test-semantic-search.ts to allow embedding generation via LiteLLM or OpenAI. - Updated Supabase functions to prioritize LiteLLM for embeddings, ensuring proper error handling and API integration. - Enhanced sync-to-s3 function to include LiteLLM in the provider priority logic. --- .env.example | 4 ++ scripts/embed-vcons.ts | 68 ++++++++++++++++++++++--- scripts/test-semantic-search.ts | 21 +++++--- supabase/functions/embed-vcons/index.ts | 57 ++++++++++++++------- supabase/functions/sync-to-s3/index.ts | 20 +++++--- 5 files changed, 131 insertions(+), 39 deletions(-) diff --git a/.env.example b/.env.example index acf95d7..508c712 100644 --- a/.env.example +++ b/.env.example @@ -8,6 +8,10 @@ SUPABASE_ANON_KEY=your-anon-key-here # Optional: For service role operations (use carefully!) # SUPABASE_SERVICE_ROLE_KEY=your-service-role-key +# LiteLLM proxy (optional; used for embeddings when set) +# LITELLM_PROXY_URL=http://localhost:4000 +# LITELLM_MASTER_KEY=sk-your-master-key + # ============================================================================ # Plugin Configuration # ============================================================================ diff --git a/scripts/embed-vcons.ts b/scripts/embed-vcons.ts index 624a483..f9cef90 100755 --- a/scripts/embed-vcons.ts +++ b/scripts/embed-vcons.ts @@ -22,7 +22,7 @@ * --mode=MODE Mode: 'backfill' (default) or 'embed' * --vcon-id=UUID Specific vCon UUID to embed (required for embed mode) * --limit=N Max text units to process per batch (default: 100, max: 500) - * --provider=PROVIDER Embedding provider: 'openai', 'azure', or 'hf' (auto-detected from env) + * --provider=PROVIDER Embedding provider: 'litellm', 'openai', 'azure', or 'hf' (auto-detected from env) * --continuous, -c Run continuously until all embeddings complete * --delay=N Delay in seconds between batches in continuous mode (default: 2) * --oldest-first Process oldest vCons first (for backfilling old data) @@ -30,6 +30,8 @@ * Environment Variables: * SUPABASE_URL Supabase project URL * SUPABASE_SERVICE_ROLE_KEY Service role key for admin operations + * LITELLM_PROXY_URL LiteLLM proxy base URL (when using LiteLLM; takes priority) + * LITELLM_MASTER_KEY / LITELLM_API_KEY LiteLLM proxy API key * OPENAI_API_KEY OpenAI API key (for text-embedding-3-small) * AZURE_OPENAI_EMBEDDING_ENDPOINT Azure OpenAI base endpoint (e.g., https://your-resource.openai.azure.com) * AZURE_OPENAI_EMBEDDING_API_KEY Azure OpenAI API key @@ -67,7 +69,7 @@ import pLimit from 'p-limit'; // Load environment variables dotenv.config(); -type EmbeddingProvider = 'openai' | 'azure' | 'hf'; +type EmbeddingProvider = 'litellm' | 'openai' | 'azure' | 'hf'; interface TextUnit { vcon_id: string; @@ -119,7 +121,7 @@ function parseArgs(): { limit = Math.max(1, Math.min(500, parseInt(arg.split('=')[1], 10))); } else if (arg.startsWith('--provider=')) { const value = arg.split('=')[1] as EmbeddingProvider; - if (value === 'openai' || value === 'azure' || value === 'hf') { + if (value === 'litellm' || value === 'openai' || value === 'azure' || value === 'hf') { provider = value; } } else if (arg === '--continuous' || arg === '-c') { @@ -136,13 +138,17 @@ function parseArgs(): { /** * Detect embedding provider from environment variables - * Priority: Azure OpenAI > OpenAI > Hugging Face + * Priority: LiteLLM > Azure OpenAI > OpenAI > Hugging Face */ function detectProvider(preferredProvider?: EmbeddingProvider): EmbeddingProvider { if (preferredProvider) { return preferredProvider; } - // Azure OpenAI takes priority if endpoint and API key are set + const litellmUrl = (process.env.LITELLM_PROXY_URL ?? '').trim().replace(/\/$/, ''); + const litellmKey = (process.env.LITELLM_MASTER_KEY ?? process.env.LITELLM_API_KEY ?? '').trim(); + if (litellmUrl && litellmKey) { + return 'litellm'; + } if (process.env.AZURE_OPENAI_EMBEDDING_ENDPOINT && process.env.AZURE_OPENAI_EMBEDDING_API_KEY) { return 'azure'; @@ -305,6 +311,41 @@ function truncateToTokens(text: string, maxTokens: number): string { return text.substring(0, maxChars); } +/** + * Generate embeddings via LiteLLM proxy (OpenAI-compatible /v1/embeddings) + */ +async function embedLiteLLM(texts: string[]): Promise { + const baseUrl = (process.env.LITELLM_PROXY_URL ?? '').trim().replace(/\/$/, ''); + const apiKey = (process.env.LITELLM_MASTER_KEY ?? process.env.LITELLM_API_KEY ?? '').trim(); + if (!baseUrl || !apiKey) { + throw new Error('LITELLM_PROXY_URL and LITELLM_MASTER_KEY (or LITELLM_API_KEY) are required'); + } + const url = baseUrl.startsWith('http') ? `${baseUrl}/v1/embeddings` : `https://${baseUrl}/v1/embeddings`; + try { + const response = await fetch(url, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${apiKey}` + }, + body: JSON.stringify({ + model: 'text-embedding-3-small', + input: texts, + dimensions: 384 + }) + }); + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`LiteLLM embeddings failed: ${response.status} ${errorText}`); + } + const json = await response.json(); + return json.data.map((d: any) => d.embedding as number[]); + } catch (error) { + if (error instanceof Error) throw error; + throw new Error(`LiteLLM embeddings failed: ${JSON.stringify(error)}`); + } +} + /** * Generate embeddings using OpenAI API */ @@ -453,9 +494,9 @@ async function upsertEmbeddings( ): Promise { const getModelName = (provider: EmbeddingProvider): string => { switch (provider) { + case 'litellm': case 'openai': case 'azure': - // Both use the same underlying model return 'text-embedding-3-small'; case 'hf': return 'sentence-transformers/all-MiniLM-L6-v2'; @@ -511,7 +552,7 @@ async function processEmbeddings( const MAX_TOKENS_PER_ITEM = 8000; const CONCURRENCY_LIMIT = 15; // Process 15 batches concurrently - if (provider === 'openai' || provider === 'azure') { + if (provider === 'litellm' || provider === 'openai' || provider === 'azure') { // Group units into token-aware batches const batches: TextUnit[][] = []; let currentBatch: TextUnit[] = []; @@ -543,7 +584,7 @@ async function processEmbeddings( const startTime = Date.now(); // Choose the appropriate embedding function - const embedFn = provider === 'azure' ? embedAzureOpenAI : embedOpenAI; + const embedFn = provider === 'litellm' ? embedLiteLLM : provider === 'azure' ? embedAzureOpenAI : embedOpenAI; const processBatch = async (batch: TextUnit[], batchIndex: number) => { try { @@ -662,6 +703,15 @@ async function main() { process.exit(1); } + if (provider === 'litellm') { + const url = (process.env.LITELLM_PROXY_URL ?? '').trim(); + const key = (process.env.LITELLM_MASTER_KEY ?? process.env.LITELLM_API_KEY ?? '').trim(); + if (!url || !key) { + console.error('❌ LITELLM_PROXY_URL and LITELLM_MASTER_KEY (or LITELLM_API_KEY) required for LiteLLM provider'); + process.exit(1); + } + } + if (provider === 'openai' && !process.env.OPENAI_API_KEY) { console.error('❌ OPENAI_API_KEY not set (required for OpenAI provider)'); console.error(' Set OPENAI_API_KEY or use --provider=azure for Azure OpenAI'); @@ -695,6 +745,8 @@ async function main() { // Display configuration const getProviderDisplayName = (p: EmbeddingProvider): string => { switch (p) { + case 'litellm': + return 'LiteLLM proxy (text-embedding-3-small)'; case 'openai': return 'OpenAI (text-embedding-3-small)'; case 'azure': diff --git a/scripts/test-semantic-search.ts b/scripts/test-semantic-search.ts index dea3e71..6a3e7fa 100644 --- a/scripts/test-semantic-search.ts +++ b/scripts/test-semantic-search.ts @@ -17,10 +17,14 @@ dotenv.config(); const supabaseUrl = process.env.SUPABASE_URL || 'http://127.0.0.1:54321'; const supabaseKey = process.env.SUPABASE_SERVICE_ROLE_KEY || 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImV4cCI6MTk4MzgxMjk5Nn0.EGIM96RAZx35lJzdJsyH-qQwv8Hdp7fsn3W0YpN81IU'; + +const litellmUrl = (process.env.LITELLM_PROXY_URL ?? '').trim().replace(/\/$/, ''); +const litellmKey = (process.env.LITELLM_MASTER_KEY ?? process.env.LITELLM_API_KEY ?? '').trim(); +const useLiteLLM = Boolean(litellmUrl && litellmKey); const openaiKey = process.env.OPENAI_API_KEY; -if (!openaiKey) { - console.error('❌ OPENAI_API_KEY is required'); +if (!useLiteLLM && !openaiKey) { + console.error('❌ Set LITELLM_PROXY_URL + LITELLM_MASTER_KEY, or OPENAI_API_KEY'); process.exit(1); } @@ -28,14 +32,19 @@ const supabase = createClient(supabaseUrl, supabaseKey); const queries = new VConQueries(supabase); /** - * Generate embedding for a text query using OpenAI + * Generate embedding for a text query using LiteLLM proxy or OpenAI */ async function generateQueryEmbedding(text: string): Promise { - const resp = await fetch('https://api.openai.com/v1/embeddings', { + const url = useLiteLLM + ? (litellmUrl.startsWith('http') ? `${litellmUrl}/v1/embeddings` : `https://${litellmUrl}/v1/embeddings`) + : 'https://api.openai.com/v1/embeddings'; + const apiKey = useLiteLLM ? litellmKey : openaiKey; + + const resp = await fetch(url, { method: 'POST', headers: { 'Content-Type': 'application/json', - 'Authorization': `Bearer ${openaiKey}`, + 'Authorization': `Bearer ${apiKey}`, }, body: JSON.stringify({ model: 'text-embedding-3-small', @@ -45,7 +54,7 @@ async function generateQueryEmbedding(text: string): Promise { }); if (!resp.ok) { - throw new Error(`OpenAI API error: ${resp.status} ${await resp.text()}`); + throw new Error(`${useLiteLLM ? 'LiteLLM' : 'OpenAI'} API error: ${resp.status} ${await resp.text()}`); } const json = await resp.json(); diff --git a/supabase/functions/embed-vcons/index.ts b/supabase/functions/embed-vcons/index.ts index 27213c6..4ab58bc 100644 --- a/supabase/functions/embed-vcons/index.ts +++ b/supabase/functions/embed-vcons/index.ts @@ -2,24 +2,28 @@ import { serve } from "https://deno.land/std@0.224.0/http/server.ts"; import { createClient } from "https://esm.sh/@supabase/supabase-js@2"; -type EmbeddingProvider = "openai" | "azure" | "hf"; +type EmbeddingProvider = "litellm" | "openai" | "azure" | "hf"; const SUPABASE_URL = Deno.env.get("SUPABASE_URL") ?? ""; const SUPABASE_SERVICE_ROLE_KEY = Deno.env.get("SUPABASE_SERVICE_ROLE_KEY") ?? ""; +const LITELLM_PROXY_URL = (Deno.env.get("LITELLM_PROXY_URL") ?? "").replace(/\/$/, ""); +const LITELLM_MASTER_KEY = Deno.env.get("LITELLM_MASTER_KEY") ?? Deno.env.get("LITELLM_API_KEY"); const OPENAI_API_KEY = Deno.env.get("OPENAI_API_KEY"); const AZURE_OPENAI_EMBEDDING_ENDPOINT = Deno.env.get("AZURE_OPENAI_EMBEDDING_ENDPOINT"); const AZURE_OPENAI_EMBEDDING_API_KEY = Deno.env.get("AZURE_OPENAI_EMBEDDING_API_KEY"); const AZURE_OPENAI_EMBEDDING_API_VERSION = Deno.env.get("AZURE_OPENAI_EMBEDDING_API_VERSION") || "2024-02-01"; const HF_API_TOKEN = Deno.env.get("HF_API_TOKEN"); -// Provider priority: Azure OpenAI > OpenAI > Hugging Face -const PROVIDER: EmbeddingProvider = (AZURE_OPENAI_EMBEDDING_ENDPOINT && AZURE_OPENAI_EMBEDDING_API_KEY) - ? "azure" - : OPENAI_API_KEY - ? "openai" - : HF_API_TOKEN - ? "hf" - : "openai"; +// Provider priority: LiteLLM > Azure OpenAI > OpenAI > Hugging Face +const PROVIDER: EmbeddingProvider = (LITELLM_PROXY_URL && LITELLM_MASTER_KEY) + ? "litellm" + : (AZURE_OPENAI_EMBEDDING_ENDPOINT && AZURE_OPENAI_EMBEDDING_API_KEY) + ? "azure" + : OPENAI_API_KEY + ? "openai" + : HF_API_TOKEN + ? "hf" + : "openai"; const supabase = createClient(SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY, { auth: { persistSession: false } @@ -118,6 +122,22 @@ function truncateToTokens(text: string, maxTokens: number): string { return text.substring(0, maxChars) + "..."; } +async function embedLiteLLM(texts: string[]): Promise { + const baseUrl = LITELLM_PROXY_URL.startsWith("http") ? LITELLM_PROXY_URL : `https://${LITELLM_PROXY_URL}`; + const url = `${baseUrl}/v1/embeddings`; + const resp = await fetch(url, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${LITELLM_MASTER_KEY}` + }, + body: JSON.stringify({ model: "text-embedding-3-small", input: texts, dimensions: 384 }) + }); + if (!resp.ok) throw new Error(`LiteLLM embeddings failed: ${resp.status} ${await resp.text()}`); + const json = await resp.json(); + return json.data.map((d: any) => d.embedding as number[]); +} + async function embedOpenAI(texts: string[]): Promise { const resp = await fetch("https://api.openai.com/v1/embeddings", { method: "POST", @@ -181,9 +201,9 @@ async function embedHF(texts: string[]): Promise { function getModelName(provider: EmbeddingProvider): string { switch (provider) { + case "litellm": case "openai": case "azure": - // Both use the same underlying model return "text-embedding-3-small"; case "hf": return "sentence-transformers/all-MiniLM-L6-v2"; @@ -214,6 +234,9 @@ serve(async (req) => { const vconId = url.searchParams.get("vcon_id") ?? undefined; const limit = Math.max(1, Math.min(500, Number(url.searchParams.get("limit") ?? "100"))); + if (PROVIDER === "litellm" && (!LITELLM_PROXY_URL || !LITELLM_MASTER_KEY)) { + return new Response(JSON.stringify({ error: "LITELLM_PROXY_URL and LITELLM_MASTER_KEY (or LITELLM_API_KEY) missing" }), { status: 400 }); + } if (PROVIDER === "openai" && !OPENAI_API_KEY) { return new Response(JSON.stringify({ error: "OPENAI_API_KEY missing" }), { status: 400 }); } @@ -238,35 +261,35 @@ serve(async (req) => { let totalEmbedded = 0; let totalErrors = 0; - if (PROVIDER === "openai" || PROVIDER === "azure") { + if (PROVIDER === "litellm" || PROVIDER === "openai" || PROVIDER === "azure") { // Group units into token-aware batches const batches: TextUnit[][] = []; let currentBatch: TextUnit[] = []; let currentTokens = 0; - + for (const unit of units) { // Truncate extremely long texts const truncated = truncateToTokens(unit.content_text, MAX_TOKENS_PER_ITEM); const tokens = estimateTokens(truncated); - + // If adding this unit would exceed batch limit, start a new batch if (currentBatch.length > 0 && currentTokens + tokens > MAX_TOKENS_PER_BATCH) { batches.push(currentBatch); currentBatch = []; currentTokens = 0; } - + currentBatch.push({ ...unit, content_text: truncated }); currentTokens += tokens; } - + // Add remaining batch if (currentBatch.length > 0) { batches.push(currentBatch); } - + // Choose the appropriate embedding function - const embedFn = PROVIDER === "azure" ? embedAzureOpenAI : embedOpenAI; + const embedFn = PROVIDER === "litellm" ? embedLiteLLM : PROVIDER === "azure" ? embedAzureOpenAI : embedOpenAI; // Process each batch for (const batch of batches) { diff --git a/supabase/functions/sync-to-s3/index.ts b/supabase/functions/sync-to-s3/index.ts index 9d1940d..db5bca3 100644 --- a/supabase/functions/sync-to-s3/index.ts +++ b/supabase/functions/sync-to-s3/index.ts @@ -11,6 +11,8 @@ const VCON_S3_PREFIX = Deno.env.get("VCON_S3_PREFIX") ?? ""; const AWS_REGION = Deno.env.get("AWS_REGION") ?? "us-east-1"; const AWS_ACCESS_KEY_ID = Deno.env.get("AWS_ACCESS_KEY_ID") ?? ""; const AWS_SECRET_ACCESS_KEY = Deno.env.get("AWS_SECRET_ACCESS_KEY") ?? ""; +const LITELLM_PROXY_URL = (Deno.env.get("LITELLM_PROXY_URL") ?? "").replace(/\/$/, ""); +const LITELLM_MASTER_KEY = Deno.env.get("LITELLM_MASTER_KEY") ?? Deno.env.get("LITELLM_API_KEY"); const OPENAI_API_KEY = Deno.env.get("OPENAI_API_KEY"); const AZURE_OPENAI_EMBEDDING_ENDPOINT = Deno.env.get("AZURE_OPENAI_EMBEDDING_ENDPOINT"); const AZURE_OPENAI_EMBEDDING_API_KEY = Deno.env.get("AZURE_OPENAI_EMBEDDING_API_KEY"); @@ -99,14 +101,16 @@ async function uploadToS3(key, jsonStr) { // --------------------------------------------------------------------------- // Embedding helpers (unchanged except for safety fixes) // --------------------------------------------------------------------------- -// Provider priority: Azure OpenAI > OpenAI > Hugging Face -const PROVIDER = (AZURE_OPENAI_EMBEDDING_ENDPOINT && AZURE_OPENAI_EMBEDDING_API_KEY) - ? "azure" - : OPENAI_API_KEY - ? "openai" - : HF_API_TOKEN - ? "hf" - : "openai"; +// Provider priority: LiteLLM > Azure OpenAI > OpenAI > Hugging Face +const PROVIDER = (LITELLM_PROXY_URL && LITELLM_MASTER_KEY) + ? "litellm" + : (AZURE_OPENAI_EMBEDDING_ENDPOINT && AZURE_OPENAI_EMBEDDING_API_KEY) + ? "azure" + : OPENAI_API_KEY + ? "openai" + : HF_API_TOKEN + ? "hf" + : "openai"; function estimateTokens(text) { return Math.ceil(text.length / 3.5); } From 9fc9b52dc142493d15f3369ab160ff40d899abb2 Mon Sep 17 00:00:00 2001 From: Pavan Kumar Date: Thu, 26 Feb 2026 11:11:49 +0530 Subject: [PATCH 2/4] Refactor embedding functions to use shared utilities - Introduced a new shared module for embedding functions, consolidating logic for LiteLLM, OpenAI, Azure OpenAI, and Hugging Face. - Updated embed-vcons.ts to utilize the shared embedding functions, improving code maintainability and reducing duplication. - Modified Supabase functions to call shared embedding utilities, ensuring consistent handling of API keys and options. - Removed redundant embedding logic from Supabase functions, streamlining the codebase. --- scripts/embed-vcons.ts | 187 ++--------------------- supabase/functions/_shared/embeddings.ts | 147 ++++++++++++++++++ supabase/functions/embed-vcons/index.ts | 123 ++++----------- 3 files changed, 189 insertions(+), 268 deletions(-) create mode 100644 supabase/functions/_shared/embeddings.ts diff --git a/scripts/embed-vcons.ts b/scripts/embed-vcons.ts index f9cef90..f61251e 100755 --- a/scripts/embed-vcons.ts +++ b/scripts/embed-vcons.ts @@ -65,11 +65,12 @@ import * as dotenv from 'dotenv'; import { getSupabaseClient } from '../dist/db/client.js'; import pLimit from 'p-limit'; +import * as sharedEmbed from '../supabase/functions/_shared/embeddings'; // Load environment variables dotenv.config(); -type EmbeddingProvider = 'litellm' | 'openai' | 'azure' | 'hf'; +type EmbeddingProvider = sharedEmbed.EmbeddingProvider; interface TextUnit { vcon_id: string; @@ -311,176 +312,31 @@ function truncateToTokens(text: string, maxTokens: number): string { return text.substring(0, maxChars); } -/** - * Generate embeddings via LiteLLM proxy (OpenAI-compatible /v1/embeddings) - */ -async function embedLiteLLM(texts: string[]): Promise { +/** Wrapper: reads env and calls shared embedLiteLLM */ +export async function embedLiteLLM(texts: string[]): Promise { const baseUrl = (process.env.LITELLM_PROXY_URL ?? '').trim().replace(/\/$/, ''); const apiKey = (process.env.LITELLM_MASTER_KEY ?? process.env.LITELLM_API_KEY ?? '').trim(); - if (!baseUrl || !apiKey) { - throw new Error('LITELLM_PROXY_URL and LITELLM_MASTER_KEY (or LITELLM_API_KEY) are required'); - } - const url = baseUrl.startsWith('http') ? `${baseUrl}/v1/embeddings` : `https://${baseUrl}/v1/embeddings`; - try { - const response = await fetch(url, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${apiKey}` - }, - body: JSON.stringify({ - model: 'text-embedding-3-small', - input: texts, - dimensions: 384 - }) - }); - if (!response.ok) { - const errorText = await response.text(); - throw new Error(`LiteLLM embeddings failed: ${response.status} ${errorText}`); - } - const json = await response.json(); - return json.data.map((d: any) => d.embedding as number[]); - } catch (error) { - if (error instanceof Error) throw error; - throw new Error(`LiteLLM embeddings failed: ${JSON.stringify(error)}`); - } + return sharedEmbed.embedLiteLLM(texts, { baseUrl, apiKey }); } -/** - * Generate embeddings using OpenAI API - */ +/** Wrapper: reads env and calls shared embedOpenAI */ async function embedOpenAI(texts: string[]): Promise { - const apiKey = process.env.OPENAI_API_KEY; - if (!apiKey) { - throw new Error('OPENAI_API_KEY not set'); - } - - try { - const response = await fetch('https://api.openai.com/v1/embeddings', { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${apiKey}` - }, - body: JSON.stringify({ - model: 'text-embedding-3-small', - input: texts, - dimensions: 384 - }) - }); - - if (!response.ok) { - const errorText = await response.text(); - let errorDetails = ''; - try { - const errorJson = JSON.parse(errorText); - errorDetails = JSON.stringify(errorJson, null, 2); - } catch { - errorDetails = errorText; - } - throw new Error(`OpenAI API error ${response.status}: ${errorDetails}`); - } - - const json = await response.json(); - return json.data.map((d: any) => d.embedding as number[]); - } catch (error) { - if (error instanceof Error) { - throw error; - } - throw new Error(`OpenAI embeddings failed: ${JSON.stringify(error)}`); - } + const apiKey = process.env.OPENAI_API_KEY ?? ''; + return sharedEmbed.embedOpenAI(texts, { apiKey }); } -/** - * Generate embeddings using Azure OpenAI API - */ +/** Wrapper: reads env and calls shared embedAzureOpenAI */ async function embedAzureOpenAI(texts: string[]): Promise { - const baseEndpoint = process.env.AZURE_OPENAI_EMBEDDING_ENDPOINT; - const apiKey = process.env.AZURE_OPENAI_EMBEDDING_API_KEY; - const deployment = 'text-embedding-3-small'; + const endpoint = process.env.AZURE_OPENAI_EMBEDDING_ENDPOINT ?? ''; + const apiKey = process.env.AZURE_OPENAI_EMBEDDING_API_KEY ?? ''; const apiVersion = process.env.AZURE_OPENAI_EMBEDDING_API_VERSION || '2024-02-01'; - - if (!baseEndpoint || !apiKey) { - throw new Error('AZURE_OPENAI_EMBEDDING_ENDPOINT and AZURE_OPENAI_EMBEDDING_API_KEY are required'); - } - - // Construct the full URL: {endpoint}/openai/deployments/{deployment}/embeddings?api-version={version} - const normalizedEndpoint = baseEndpoint.replace(/\/$/, ''); // Remove trailing slash if present - const url = `${normalizedEndpoint}/openai/deployments/${deployment}/embeddings?api-version=${apiVersion}`; - - try { - const response = await fetch(url, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'api-key': apiKey - }, - body: JSON.stringify({ - input: texts, - dimensions: 384 - }) - }); - - if (!response.ok) { - const errorText = await response.text(); - let errorDetails = ''; - try { - const errorJson = JSON.parse(errorText); - errorDetails = JSON.stringify(errorJson, null, 2); - } catch { - errorDetails = errorText; - } - throw new Error(`Azure OpenAI API error ${response.status}: ${errorDetails}`); - } - - const json = await response.json(); - return json.data.map((d: any) => d.embedding as number[]); - } catch (error) { - if (error instanceof Error) { - throw error; - } - throw new Error(`Azure OpenAI embeddings failed: ${JSON.stringify(error)}`); - } + return sharedEmbed.embedAzureOpenAI(texts, { endpoint, apiKey, apiVersion }); } -/** - * Generate embeddings using Hugging Face API - */ +/** Wrapper: reads env and calls shared embedHF */ async function embedHF(texts: string[]): Promise { - const apiToken = process.env.HF_API_TOKEN; - if (!apiToken) { - throw new Error('HF_API_TOKEN not set'); - } - - const result: number[][] = []; - - for (const text of texts) { - const response = await fetch( - 'https://api-inference.huggingface.co/pipeline/feature-extraction/sentence-transformers/all-MiniLM-L6-v2', - { - method: 'POST', - headers: { - 'Authorization': `Bearer ${apiToken}`, - 'Content-Type': 'application/json' - }, - body: JSON.stringify({ - inputs: text, - options: { wait_for_model: true } - }) - } - ); - - if (!response.ok) { - const errorText = await response.text(); - throw new Error(`HF embeddings failed: ${response.status} ${errorText}`); - } - - const json = await response.json(); - const vec = Array.isArray(json[0]) ? json[0] : json; - result.push(vec as number[]); - } - - return result; + const apiToken = process.env.HF_API_TOKEN ?? ''; + return sharedEmbed.embedHF(texts, { apiToken }); } /** @@ -492,24 +348,13 @@ async function upsertEmbeddings( vectors: number[][], provider: EmbeddingProvider ): Promise { - const getModelName = (provider: EmbeddingProvider): string => { - switch (provider) { - case 'litellm': - case 'openai': - case 'azure': - return 'text-embedding-3-small'; - case 'hf': - return 'sentence-transformers/all-MiniLM-L6-v2'; - } - }; - const rows = units.map((u, i) => ({ vcon_id: u.vcon_id, content_type: u.content_type, content_reference: u.content_reference, content_text: u.content_text, embedding: vectors[i], - embedding_model: getModelName(provider), + embedding_model: sharedEmbed.getModelName(provider), embedding_dimension: 384 })); diff --git a/supabase/functions/_shared/embeddings.ts b/supabase/functions/_shared/embeddings.ts new file mode 100644 index 0000000..45f66ea --- /dev/null +++ b/supabase/functions/_shared/embeddings.ts @@ -0,0 +1,147 @@ +/** + * Shared embedding functions for LiteLLM, OpenAI, Azure OpenAI, and Hugging Face. + * Used by scripts/embed-vcons.ts (Node) and supabase/functions/embed-vcons (Deno). + * All functions take explicit options (no process.env / Deno.env) so callers supply credentials. + */ + +export type EmbeddingProvider = "litellm" | "openai" | "azure" | "hf"; + +const EMBEDDING_MODEL = "text-embedding-3-small"; +const DIMENSIONS = 384; + +export interface LiteLLMOptions { + baseUrl: string; + apiKey: string; +} + +export interface OpenAIOptions { + apiKey: string; +} + +export interface AzureOpenAIOptions { + endpoint: string; + apiKey: string; + apiVersion?: string; +} + +export interface HFOptions { + apiToken: string; +} + +/** + * Generate embeddings via LiteLLM proxy (OpenAI-compatible /v1/embeddings) + */ +export async function embedLiteLLM(texts: string[], options: LiteLLMOptions): Promise { + const { baseUrl, apiKey } = options; + const normalized = baseUrl.trim().replace(/\/$/, ""); + if (!normalized || !apiKey) { + throw new Error("LITELLM_PROXY_URL and LITELLM_MASTER_KEY (or LITELLM_API_KEY) are required"); + } + const url = normalized.startsWith("http") ? `${normalized}/v1/embeddings` : `https://${normalized}/v1/embeddings`; + const resp = await fetch(url, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${apiKey}`, + }, + body: JSON.stringify({ model: EMBEDDING_MODEL, input: texts, dimensions: DIMENSIONS }), + }); + if (!resp.ok) { + const text = await resp.text(); + throw new Error(`LiteLLM embeddings failed: ${resp.status} ${text}`); + } + const json = await resp.json(); + return json.data.map((d: { embedding: number[] }) => d.embedding); +} + +/** + * Generate embeddings using OpenAI API + */ +export async function embedOpenAI(texts: string[], options: OpenAIOptions): Promise { + const { apiKey } = options; + if (!apiKey) throw new Error("OPENAI_API_KEY not set"); + const resp = await fetch("https://api.openai.com/v1/embeddings", { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${apiKey}`, + }, + body: JSON.stringify({ model: EMBEDDING_MODEL, input: texts, dimensions: DIMENSIONS }), + }); + if (!resp.ok) { + const text = await resp.text(); + throw new Error(`OpenAI embeddings failed: ${resp.status} ${text}`); + } + const json = await resp.json(); + return json.data.map((d: { embedding: number[] }) => d.embedding); +} + +/** + * Generate embeddings using Azure OpenAI API + */ +export async function embedAzureOpenAI(texts: string[], options: AzureOpenAIOptions): Promise { + const { endpoint, apiKey, apiVersion = "2024-02-01" } = options; + if (!endpoint || !apiKey) { + throw new Error("AZURE_OPENAI_EMBEDDING_ENDPOINT and AZURE_OPENAI_EMBEDDING_API_KEY are required"); + } + const normalized = endpoint.replace(/\/$/, ""); + const url = `${normalized}/openai/deployments/${EMBEDDING_MODEL}/embeddings?api-version=${apiVersion}`; + const resp = await fetch(url, { + method: "POST", + headers: { + "Content-Type": "application/json", + "api-key": apiKey, + }, + body: JSON.stringify({ input: texts, dimensions: DIMENSIONS }), + }); + if (!resp.ok) { + const text = await resp.text(); + throw new Error(`Azure OpenAI embeddings failed: ${resp.status} ${text}`); + } + const json = await resp.json(); + return json.data.map((d: { embedding: number[] }) => d.embedding); +} + +const HF_MODEL = "sentence-transformers/all-MiniLM-L6-v2"; +const HF_URL = "https://api-inference.huggingface.co/pipeline/feature-extraction/" + HF_MODEL; + +/** + * Generate embeddings using Hugging Face Inference API (one request per text) + */ +export async function embedHF(texts: string[], options: HFOptions): Promise { + const { apiToken } = options; + if (!apiToken) throw new Error("HF_API_TOKEN not set"); + const result: number[][] = []; + for (const text of texts) { + const resp = await fetch(HF_URL, { + method: "POST", + headers: { + Authorization: `Bearer ${apiToken}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ inputs: text, options: { wait_for_model: true } }), + }); + if (!resp.ok) { + const textErr = await resp.text(); + throw new Error(`HF embeddings failed: ${resp.status} ${textErr}`); + } + const json = await resp.json(); + const vec = Array.isArray(json[0]) ? json[0] : json; + result.push(vec as number[]); + } + return result; +} + +/** + * Model name for storage (embedding_model column) + */ +export function getModelName(provider: EmbeddingProvider): string { + switch (provider) { + case "litellm": + case "openai": + case "azure": + return EMBEDDING_MODEL; + case "hf": + return HF_MODEL; + } +} diff --git a/supabase/functions/embed-vcons/index.ts b/supabase/functions/embed-vcons/index.ts index 4ab58bc..4a14c1d 100644 --- a/supabase/functions/embed-vcons/index.ts +++ b/supabase/functions/embed-vcons/index.ts @@ -1,8 +1,14 @@ // deno-lint-ignore-file no-explicit-any import { serve } from "https://deno.land/std@0.224.0/http/server.ts"; import { createClient } from "https://esm.sh/@supabase/supabase-js@2"; - -type EmbeddingProvider = "litellm" | "openai" | "azure" | "hf"; +import { + type EmbeddingProvider, + embedLiteLLM, + embedOpenAI, + embedAzureOpenAI, + embedHF, + getModelName, +} from "../_shared/embeddings.ts"; const SUPABASE_URL = Deno.env.get("SUPABASE_URL") ?? ""; const SUPABASE_SERVICE_ROLE_KEY = Deno.env.get("SUPABASE_SERVICE_ROLE_KEY") ?? ""; @@ -122,94 +128,6 @@ function truncateToTokens(text: string, maxTokens: number): string { return text.substring(0, maxChars) + "..."; } -async function embedLiteLLM(texts: string[]): Promise { - const baseUrl = LITELLM_PROXY_URL.startsWith("http") ? LITELLM_PROXY_URL : `https://${LITELLM_PROXY_URL}`; - const url = `${baseUrl}/v1/embeddings`; - const resp = await fetch(url, { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${LITELLM_MASTER_KEY}` - }, - body: JSON.stringify({ model: "text-embedding-3-small", input: texts, dimensions: 384 }) - }); - if (!resp.ok) throw new Error(`LiteLLM embeddings failed: ${resp.status} ${await resp.text()}`); - const json = await resp.json(); - return json.data.map((d: any) => d.embedding as number[]); -} - -async function embedOpenAI(texts: string[]): Promise { - const resp = await fetch("https://api.openai.com/v1/embeddings", { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${OPENAI_API_KEY}` - }, - body: JSON.stringify({ model: "text-embedding-3-small", input: texts, dimensions: 384 }) - }); - if (!resp.ok) throw new Error(`OpenAI embeddings failed: ${resp.status} ${await resp.text()}`); - const json = await resp.json(); - return json.data.map((d: any) => d.embedding as number[]); -} - -async function embedAzureOpenAI(texts: string[]): Promise { - if (!AZURE_OPENAI_EMBEDDING_ENDPOINT || !AZURE_OPENAI_EMBEDDING_API_KEY) { - throw new Error("AZURE_OPENAI_EMBEDDING_ENDPOINT and AZURE_OPENAI_EMBEDDING_API_KEY are required"); - } - - // Construct the full URL: {endpoint}/openai/deployments/{deployment}/embeddings?api-version={version} - const normalizedEndpoint = AZURE_OPENAI_EMBEDDING_ENDPOINT.replace(/\/$/, ""); - const deployment = "text-embedding-3-small"; - const url = `${normalizedEndpoint}/openai/deployments/${deployment}/embeddings?api-version=${AZURE_OPENAI_EMBEDDING_API_VERSION}`; - - const resp = await fetch(url, { - method: "POST", - headers: { - "Content-Type": "application/json", - "api-key": AZURE_OPENAI_EMBEDDING_API_KEY - }, - body: JSON.stringify({ input: texts, dimensions: 384 }) - }); - if (!resp.ok) throw new Error(`Azure OpenAI embeddings failed: ${resp.status} ${await resp.text()}`); - const json = await resp.json(); - return json.data.map((d: any) => d.embedding as number[]); -} - -async function embedHF(texts: string[]): Promise { - // Hugging Face Inference API batched: one by one fallback for simplicity - const result: number[][] = []; - for (const t of texts) { - const resp = await fetch( - "https://api-inference.huggingface.co/pipeline/feature-extraction/sentence-transformers/all-MiniLM-L6-v2", - { - method: "POST", - headers: { - Authorization: `Bearer ${HF_API_TOKEN}`, - "Content-Type": "application/json" - }, - body: JSON.stringify({ inputs: t, options: { wait_for_model: true } }) - } - ); - if (!resp.ok) throw new Error(`HF embeddings failed: ${resp.status} ${await resp.text()}`); - const json = await resp.json(); - // Response is nested array [1 x 384] → flatten - const vec = Array.isArray(json[0]) ? json[0] : json; - result.push(vec as number[]); - } - return result; -} - -function getModelName(provider: EmbeddingProvider): string { - switch (provider) { - case "litellm": - case "openai": - case "azure": - return "text-embedding-3-small"; - case "hf": - return "sentence-transformers/all-MiniLM-L6-v2"; - } -} - async function upsertEmbeddings(units: TextUnit[], vectors: number[][]) { const rows = units.map((u, i) => ({ vcon_id: u.vcon_id, @@ -288,14 +206,25 @@ serve(async (req) => { batches.push(currentBatch); } - // Choose the appropriate embedding function - const embedFn = PROVIDER === "litellm" ? embedLiteLLM : PROVIDER === "azure" ? embedAzureOpenAI : embedOpenAI; - - // Process each batch + // Process each batch with shared embedders (pass env as options) for (const batch of batches) { try { const texts = batch.map((u) => u.content_text); - const vectors = await embedFn(texts); + let vectors: number[][]; + switch (PROVIDER) { + case "litellm": + vectors = await embedLiteLLM(texts, { baseUrl: LITELLM_PROXY_URL, apiKey: LITELLM_MASTER_KEY ?? "" }); + break; + case "azure": + vectors = await embedAzureOpenAI(texts, { + endpoint: AZURE_OPENAI_EMBEDDING_ENDPOINT ?? "", + apiKey: AZURE_OPENAI_EMBEDDING_API_KEY ?? "", + apiVersion: AZURE_OPENAI_EMBEDDING_API_VERSION, + }); + break; + default: + vectors = await embedOpenAI(texts, { apiKey: OPENAI_API_KEY ?? "" }); + } await upsertEmbeddings(batch, vectors); totalEmbedded += batch.length; } catch (e) { @@ -304,9 +233,9 @@ serve(async (req) => { } } } else { - // HF processes one at a time anyway + // HF const texts = units.map((u) => u.content_text); - const vectors = await embedHF(texts); + const vectors = await embedHF(texts, { apiToken: HF_API_TOKEN ?? "" }); await upsertEmbeddings(units, vectors); totalEmbedded = units.length; } From 7e7b9de236198665274835dd425ee3d3103cbddf Mon Sep 17 00:00:00 2001 From: Thomas Howe Date: Tue, 14 Apr 2026 13:48:06 -0400 Subject: [PATCH 3/4] fix: resolve OTEL stdout corruption, round() type error, and semantic search embedding generation - Replace ConsoleSpanExporter/ConsoleMetricExporter with NullSpanExporter/ NullMetricExporter to prevent OTEL trace data from polluting MCP stdio - Gate DiagConsoleLogger behind OTEL_EXPORTER_TYPE=otlp check - Fix pino-pretty destination: move to transport options so worker thread writes to stderr (fd 2) instead of stdout - Fix ROUND(double precision, N) -> ROUND(expr::numeric, N) in getContentVolumeTrends to resolve pg error 42883 - Add generateEmbedding() to search handlers so search_vcons_semantic and search_vcons_hybrid auto-generate embeddings from query text via OpenAI Co-Authored-By: Claude Sonnet 4.6 --- src/db/database-analytics.ts | 2 +- src/observability/config.ts | 48 ++++++++++++++++++++++++------ src/observability/logger.ts | 26 ++++++++-------- src/tools/handlers/search.ts | 57 +++++++++++++++++++++++++++++------- 4 files changed, 98 insertions(+), 35 deletions(-) diff --git a/src/db/database-analytics.ts b/src/db/database-analytics.ts index 1e032ab..1dc8ec4 100644 --- a/src/db/database-analytics.ts +++ b/src/db/database-analytics.ts @@ -581,7 +581,7 @@ export class DatabaseAnalytics { analysis_count, attachment_count, total_duration, - ROUND(total_duration / NULLIF(dialog_count, 0), 2) as avg_duration_per_dialog + ROUND((total_duration / NULLIF(dialog_count, 0))::numeric, 2) as avg_duration_per_dialog FROM content_trends ORDER BY period `; diff --git a/src/observability/config.ts b/src/observability/config.ts index 01111e4..47aaebf 100644 --- a/src/observability/config.ts +++ b/src/observability/config.ts @@ -18,12 +18,39 @@ import { getNodeAutoInstrumentations } from '@opentelemetry/auto-instrumentation import { OTLPMetricExporter } from '@opentelemetry/exporter-metrics-otlp-http'; import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http'; import { Resource } from '@opentelemetry/resources'; -import { ConsoleMetricExporter, PeriodicExportingMetricReader } from '@opentelemetry/sdk-metrics'; +import { AggregationTemporality, MetricReader, PushMetricExporter } from '@opentelemetry/sdk-metrics'; +import { PeriodicExportingMetricReader } from '@opentelemetry/sdk-metrics'; import { NodeSDK } from '@opentelemetry/sdk-node'; -import { ConsoleSpanExporter } from '@opentelemetry/sdk-trace-node'; +import { ReadableSpan, SpanExporter } from '@opentelemetry/sdk-trace-base'; +import { ExportResult, ExportResultCode } from '@opentelemetry/core'; import { SEMRESATTRS_SERVICE_NAME, SEMRESATTRS_SERVICE_VERSION } from '@opentelemetry/semantic-conventions'; import { logger as pinoLogger } from './logger.js'; +/** + * Null span exporter — discards all spans without writing to stdout/stderr. + * Used when OTEL_EXPORTER_TYPE is not 'otlp' to avoid corrupting MCP stdio. + */ +class NullSpanExporter implements SpanExporter { + export(_spans: ReadableSpan[], resultCallback: (result: ExportResult) => void): void { + resultCallback({ code: ExportResultCode.SUCCESS }); + } + async shutdown(): Promise {} +} + +/** + * Null metric exporter — discards all metrics without writing to stdout/stderr. + */ +class NullMetricExporter implements PushMetricExporter { + async export(_metrics: any, resultCallback: (result: ExportResult) => void): Promise { + resultCallback({ code: ExportResultCode.SUCCESS }); + } + async shutdown(): Promise {} + async forceFlush(): Promise {} + selectAggregationTemporality(): AggregationTemporality { + return AggregationTemporality.CUMULATIVE; + } +} + // Use a child logger for observability config const logger = pinoLogger.child({ component: 'observability-config' }); @@ -93,8 +120,11 @@ export async function initializeObservability(): Promise { } try { - // Set up diagnostic logging - diag.setLogger(new DiagConsoleLogger(), config.logLevel); + // Set up diagnostic logging — only when using OTLP; DiagConsoleLogger writes to stdout + // which corrupts MCP stdio in non-OTLP mode. + if (config.exporterType === 'otlp') { + diag.setLogger(new DiagConsoleLogger(), config.logLevel); + } // Create resource with service information const resource = new Resource({ @@ -113,8 +143,8 @@ export async function initializeObservability(): Promise { endpoint: config.endpoint }, 'OpenTelemetry traces configured'); } else { - traceExporter = new ConsoleSpanExporter(); - logger.info({ exporter: 'console' }, 'OpenTelemetry traces configured'); + traceExporter = new NullSpanExporter(); + logger.info({ exporter: 'null' }, 'OpenTelemetry traces configured (null exporter — set OTEL_EXPORTER_TYPE=otlp to export)'); } // Configure metric exporter @@ -133,15 +163,15 @@ export async function initializeObservability(): Promise { interval_ms: 60000 }, 'OpenTelemetry metrics configured'); } else { - const metricExporter = new ConsoleMetricExporter(); + const metricExporter = new NullMetricExporter(); metricReader = new PeriodicExportingMetricReader({ exporter: metricExporter, exportIntervalMillis: 60000, }); logger.info({ - exporter: 'console', + exporter: 'null', interval_ms: 60000 - }, 'OpenTelemetry metrics configured'); + }, 'OpenTelemetry metrics configured (null exporter — set OTEL_EXPORTER_TYPE=otlp to export)'); } // Initialize SDK diff --git a/src/observability/logger.ts b/src/observability/logger.ts index 2b93baa..d3e9abd 100644 --- a/src/observability/logger.ts +++ b/src/observability/logger.ts @@ -52,22 +52,20 @@ function createRootLogger() { // Development: Pretty printing if (isDevelopment) { - return pino( - { - ...baseConfig, - transport: { - target: 'pino-pretty', - options: { - colorize: true, - translateTime: 'HH:MM:ss.l', - ignore: 'pid,hostname', - messageFormat: '{component} - {msg}', - singleLine: false, - }, + return pino({ + ...baseConfig, + transport: { + target: 'pino-pretty', + options: { + colorize: true, + translateTime: 'HH:MM:ss.l', + ignore: 'pid,hostname', + messageFormat: '{component} - {msg}', + singleLine: false, + destination: 2, // stderr — transport runs in worker thread, destination must be set here }, }, - pino.destination(2) // stderr for MCP - ); + }); } // Production: JSON to stderr diff --git a/src/tools/handlers/search.ts b/src/tools/handlers/search.ts index 7e39ee2..b256a3d 100644 --- a/src/tools/handlers/search.ts +++ b/src/tools/handlers/search.ts @@ -9,6 +9,41 @@ import { VCon } from '../../types/vcon.js'; import { BaseToolHandler, ToolHandlerContext, ToolResponse } from './base.js'; import { normalizeDateString, requireNonEmptyString } from './validation.js'; +/** + * Generate a 384-dim embedding for a text query via OpenAI text-embedding-3-small. + * Reads OPENAI_API_KEY from the environment at call time. + */ +async function generateEmbedding(query: string): Promise { + const apiKey = process.env.OPENAI_API_KEY; + if (!apiKey) { + throw new McpError( + ErrorCode.InvalidParams, + 'OPENAI_API_KEY is not set — cannot generate query embedding for semantic search.' + ); + } + const resp = await fetch('https://api.openai.com/v1/embeddings', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${apiKey}`, + }, + body: JSON.stringify({ + model: 'text-embedding-3-small', + input: query, + dimensions: 384, + }), + }); + if (!resp.ok) { + const text = await resp.text(); + throw new McpError( + ErrorCode.InternalError, + `OpenAI embedding API error: ${resp.status} ${text}` + ); + } + const json = await resp.json() as { data: { embedding: number[] }[] }; + return json.data[0].embedding; +} + /** * Handler for search_vcons tool */ @@ -208,13 +243,9 @@ export class SearchVConsSemanticHandler extends BaseToolHandler { let embedding = args?.embedding as number[] | undefined; const query = args?.query as string | undefined; - // If no embedding provided but query is, generate embedding + // If no embedding provided but query is, generate one on-the-fly if (!embedding && query) { - // For now, require pre-computed embeddings - throw new McpError( - ErrorCode.InvalidParams, - 'Embedding generation not yet implemented. Please provide a pre-computed embedding vector (384 dimensions) or use search_vcons_content for keyword search.' - ); + embedding = await generateEmbedding(query); } if (!embedding) { @@ -268,12 +299,16 @@ export class SearchVConsHybridHandler extends BaseToolHandler { throw new McpError(ErrorCode.InvalidParams, 'Embedding must be 384 dimensions'); } - // If no embedding provided, use keyword-only search + // If no embedding provided, generate one on-the-fly; fall back to keyword-only if that fails if (!embedding) { - logWithContext('warn', 'No embedding provided for hybrid search, falling back to keyword-only', { - tool_name: this.toolName, - query - }); + try { + embedding = await generateEmbedding(query); + } catch (e) { + logWithContext('warn', 'Could not generate embedding for hybrid search, falling back to keyword-only', { + tool_name: this.toolName, + error: e instanceof Error ? e.message : String(e), + }); + } } const results = await context.queries.hybridSearch({ From 353932bad09297daf25a465618ed515763e019a2 Mon Sep 17 00:00:00 2001 From: Thomas Howe Date: Tue, 14 Apr 2026 13:48:16 -0400 Subject: [PATCH 4/4] docs: align documentation with v0.4.0 spec and LiteLLM embedding support - Remove stale "not yet implemented" note from search.md (semantic search now generates embeddings automatically) - Fix broken cross-references in search.md pointing to wrong directory - Add historical warning banner to embeddings.md; update all SQL from ada-002/1536-dim to text-embedding-3-small/384-dim - Fix CORRECTED_SCHEMA.md: version 0.3.0->0.4.0, mimetype->mediatype, appended->amended, must_support->critical throughout - Update tools.md examples: vcon version 0.3.0->0.4.0, must_support->critical - Add LiteLLM as priority-1 provider to INGEST_AND_EMBEDDINGS.md with env vars and --provider=litellm example; fix spec version reference - Add LiteLLM env vars and provider priority order to installation.md Co-Authored-By: Claude Sonnet 4.6 --- docs/api/tools.md | 10 ++++---- docs/development/INGEST_AND_EMBEDDINGS.md | 18 ++++++++------ docs/development/embeddings.md | 30 ++++++++++++----------- docs/guide/installation.md | 7 +++++- docs/guide/search.md | 8 +++--- docs/reference/CORRECTED_SCHEMA.md | 22 ++++++++--------- 6 files changed, 51 insertions(+), 44 deletions(-) diff --git a/docs/api/tools.md b/docs/api/tools.md index fe94be6..7b9cab0 100644 --- a/docs/api/tools.md +++ b/docs/api/tools.md @@ -72,7 +72,7 @@ Create a new vCon (Virtual Conversation) record. ```typescript { vcon_data: { - vcon: "0.3.0", // vCon version + vcon: "0.4.0", // vCon version uuid?: string, // Auto-generated if not provided subject?: string, // Conversation subject parties: Party[], // At least one party required @@ -80,7 +80,7 @@ Create a new vCon (Virtual Conversation) record. analysis?: Analysis[], // Optional AI analysis attachments?: Attachment[], // Optional files extensions?: string[], // Optional extensions - must_support?: string[] // Optional requirements + critical?: string[] // Optional: extensions that must be supported (v0.4.0) }, metadata?: { basename?: string, @@ -106,7 +106,7 @@ Create a new vCon (Virtual Conversation) record. ```typescript { "vcon_data": { - "vcon": "0.3.0", + "vcon": "0.4.0", "subject": "Customer Support Call", "parties": [ { @@ -146,7 +146,7 @@ Retrieve a vCon by UUID. { success: boolean, vcon: { - vcon: "0.3.0", + vcon: "0.4.0", uuid: string, created_at: string, updated_at?: string, @@ -181,7 +181,7 @@ Update vCon metadata and top-level fields. updates: { subject?: string, extensions?: string[], - must_support?: string[], + critical?: string[], [key: string]: any }, merge_strategy?: "replace" | "merge" | "append", // Default: "merge" diff --git a/docs/development/INGEST_AND_EMBEDDINGS.md b/docs/development/INGEST_AND_EMBEDDINGS.md index 90a760e..c1c17ce 100644 --- a/docs/development/INGEST_AND_EMBEDDINGS.md +++ b/docs/development/INGEST_AND_EMBEDDINGS.md @@ -19,7 +19,7 @@ npm install - `SUPABASE_URL` - `SUPABASE_SERVICE_ROLE_KEY` - `VCON_S3_BUCKET` (optional, for S3 loading) - - `OPENAI_API_KEY` or `HF_API_TOKEN` (for embeddings) + - One embedding provider (see priority order below): `LITELLM_PROXY_URL`+`LITELLM_MASTER_KEY`, `OPENAI_API_KEY`, Azure vars, or `HF_API_TOKEN` --- @@ -56,7 +56,7 @@ npm run sync:vcons -- /absolute/path/to/vcons Notes: - Use absolute directory paths. Files must end with `.vcon` extension. - The script is idempotent and skips vCons already in the database. -- Handles both legacy (0.0.1-0.2.0) and current (0.3.0) vCon specs. +- Handles both legacy (0.0.1-0.2.0) and current (0.4.0) vCon specs. --- @@ -76,12 +76,11 @@ Analysis elements with `encoding='none'` are prioritized because they contain pl Environment variables (set in `.env` file or exported): - `SUPABASE_URL` - `SUPABASE_SERVICE_ROLE_KEY` -- One provider (choose one): - - `OPENAI_API_KEY` (uses `text-embedding-3-small` with `dimensions=384`) - - or Azure OpenAI: - - `AZURE_OPENAI_EMBEDDING_ENDPOINT` (e.g., `https://your-resource.openai.azure.com`) - - `AZURE_OPENAI_EMBEDDING_API_KEY` - - or `HF_API_TOKEN` (Hugging Face Inference API with `sentence-transformers/all-MiniLM-L6-v2`) +- One provider (auto-detected in priority order — first match wins): + 1. **LiteLLM** (highest priority): `LITELLM_PROXY_URL` + `LITELLM_MASTER_KEY` (or `LITELLM_API_KEY`) + 2. **Azure OpenAI**: `AZURE_OPENAI_EMBEDDING_ENDPOINT` + `AZURE_OPENAI_EMBEDDING_API_KEY` + 3. **OpenAI**: `OPENAI_API_KEY` (uses `text-embedding-3-small` with `dimensions=384`) + 4. **Hugging Face**: `HF_API_TOKEN` (uses `sentence-transformers/all-MiniLM-L6-v2`) #### Generate Embeddings @@ -101,6 +100,9 @@ For more control, use the script directly: # Process 100 units (default) npx tsx scripts/embed-vcons.ts +# Process with LiteLLM proxy (set LITELLM_PROXY_URL + LITELLM_MASTER_KEY in .env) +npx tsx scripts/embed-vcons.ts --limit=500 --provider=litellm + # Process 500 units with OpenAI npx tsx scripts/embed-vcons.ts --limit=500 --provider=openai diff --git a/docs/development/embeddings.md b/docs/development/embeddings.md index 3b2fa3d..efe1540 100644 --- a/docs/development/embeddings.md +++ b/docs/development/embeddings.md @@ -1,5 +1,7 @@ # Supabase Semantic Search Implementation Guide +> **⚠️ Historical reference document.** The SQL examples in Steps 1–2 below reflect an early 1536-dim / `text-embedding-ada-002` design. The current implementation uses **`text-embedding-3-small` at 384 dimensions**. For the authoritative setup, see [`docs/development/INGEST_AND_EMBEDDINGS.md`](./INGEST_AND_EMBEDDINGS.md). + ## Overview This guide explains how to implement semantic search for vCon conversation content in Supabase PostgreSQL using the **pgvector** extension for vector similarity search. @@ -48,18 +50,18 @@ SELECT * FROM pg_extension WHERE extname = 'vector'; ```sql -- Add vector columns to existing tables ALTER TABLE vcons -ADD COLUMN subject_embedding vector(1536), -- OpenAI ada-002 dimension -ADD COLUMN subject_embedding_model TEXT DEFAULT 'text-embedding-ada-002', +ADD COLUMN subject_embedding vector(384), -- text-embedding-3-small dimension +ADD COLUMN subject_embedding_model TEXT DEFAULT 'text-embedding-3-small', ADD COLUMN subject_embedding_updated_at TIMESTAMPTZ; ALTER TABLE dialog -ADD COLUMN content_embedding vector(1536), -ADD COLUMN content_embedding_model TEXT DEFAULT 'text-embedding-ada-002', +ADD COLUMN content_embedding vector(384), +ADD COLUMN content_embedding_model TEXT DEFAULT 'text-embedding-3-small', ADD COLUMN content_embedding_updated_at TIMESTAMPTZ; ALTER TABLE analysis -ADD COLUMN summary_embedding vector(1536), -ADD COLUMN summary_embedding_model TEXT DEFAULT 'text-embedding-ada-002', +ADD COLUMN summary_embedding vector(384), +ADD COLUMN summary_embedding_model TEXT DEFAULT 'text-embedding-3-small', ADD COLUMN summary_embedding_updated_at TIMESTAMPTZ; ``` @@ -79,9 +81,9 @@ CREATE TABLE vcon_embeddings ( content_text TEXT NOT NULL, -- Original text that was embedded -- The embedding - embedding vector(1536) NOT NULL, - embedding_model TEXT NOT NULL DEFAULT 'text-embedding-ada-002', - embedding_dimension INTEGER NOT NULL DEFAULT 1536, + embedding vector(384) NOT NULL, + embedding_model TEXT NOT NULL DEFAULT 'text-embedding-3-small', + embedding_dimension INTEGER NOT NULL DEFAULT 384, -- Metadata created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), @@ -157,7 +159,7 @@ from supabase import create_client openai.api_key = "your-openai-key" supabase = create_client("your-supabase-url", "your-supabase-key") -def generate_embedding(text: str, model: str = "text-embedding-ada-002") -> list[float]: +def generate_embedding(text: str, model: str = "text-embedding-3-small") -> list[float]: """Generate embedding using OpenAI API.""" response = openai.embeddings.create( input=text, @@ -177,8 +179,8 @@ def embed_vcon_content(vcon_id: str, subject: str, dialog_texts: list[str]): 'content_reference': None, 'content_text': subject, 'embedding': subject_embedding, - 'embedding_model': 'text-embedding-ada-002', - 'embedding_dimension': 1536 + 'embedding_model': 'text-embedding-3-small', + 'embedding_dimension': 384 }).execute() # Embed dialogs @@ -264,7 +266,7 @@ serve(async (req) => { }) ``` -See `docs/INGEST_AND_EMBEDDINGS.md` for the production-ready function (`supabase/functions/embed-vcons/index.ts`), environment variables, and Cron scheduling. This repository standardizes on 384‑dim embeddings to match the migrations and HNSW index. +See [`docs/development/INGEST_AND_EMBEDDINGS.md`](./INGEST_AND_EMBEDDINGS.md) for the production-ready function (`supabase/functions/embed-vcons/index.ts`), environment variables, and Cron scheduling. This repository standardizes on 384‑dim embeddings to match the migrations and HNSW index. --- @@ -275,7 +277,7 @@ See `docs/INGEST_AND_EMBEDDINGS.md` for the production-ready function (`supabase ```sql -- Function to search by semantic similarity CREATE OR REPLACE FUNCTION search_vcons_semantic( - query_embedding vector(1536), + query_embedding vector(384), match_threshold float DEFAULT 0.7, match_count int DEFAULT 20 ) diff --git a/docs/guide/installation.md b/docs/guide/installation.md index 4973537..04d49a7 100644 --- a/docs/guide/installation.md +++ b/docs/guide/installation.md @@ -532,14 +532,19 @@ curl "https://your-project.supabase.co/rest/v1/" \ Complete list of supported environment variables: +Embedding provider priority: **LiteLLM → Azure OpenAI → OpenAI → Hugging Face** (first configured wins). + | Variable | Required | Description | Default | |----------|----------|-------------|---------| | `SUPABASE_URL` | ✅ Yes | Your Supabase project URL | - | | `SUPABASE_ANON_KEY` | ✅ Yes | Supabase anon public key | - | | `SUPABASE_SERVICE_ROLE_KEY` | ❌ No | Service role key (admin operations) | - | -| `OPENAI_API_KEY` | ❌ No | OpenAI API key for embeddings | - | +| `LITELLM_PROXY_URL` | ❌ No | LiteLLM proxy base URL — takes priority for embeddings | - | +| `LITELLM_MASTER_KEY` | ❌ No | LiteLLM proxy API key (also accepted as `LITELLM_API_KEY`) | - | +| `OPENAI_API_KEY` | ❌ No | OpenAI API key for embeddings (if LiteLLM not set) | - | | `AZURE_OPENAI_EMBEDDING_ENDPOINT` | ❌ No | Azure OpenAI base endpoint (e.g., https://your-resource.openai.azure.com) | - | | `AZURE_OPENAI_EMBEDDING_API_KEY` | ❌ No | Azure OpenAI API key | - | +| `HF_API_TOKEN` | ❌ No | Hugging Face API token for embeddings (lowest priority fallback) | - | | `VCON_PLUGINS_PATH` | ❌ No | Comma-separated plugin paths | - | | `VCON_LICENSE_KEY` | ❌ No | Enterprise license key | - | | `MCP_SERVER_NAME` | ❌ No | Server name for MCP | `vcon-mcp-server` | diff --git a/docs/guide/search.md b/docs/guide/search.md index 8ab2732..bb56a9d 100644 --- a/docs/guide/search.md +++ b/docs/guide/search.md @@ -114,8 +114,6 @@ The vCon MCP server provides four search tools with different capabilities, from } ``` -**Note:** Automatic embedding generation from query text is not yet implemented. Use `search_vcons_content` for keyword-based search without embeddings. - **Returns:** Similar conversations ranked by semantic similarity --- @@ -287,8 +285,8 @@ Analysis with `encoding='json'` or `encoding='base64url'` typically contains: For semantic and hybrid search to work effectively, you need to generate embeddings for your vCons. See the following guides: -- [INGEST_AND_EMBEDDINGS.md](./INGEST_AND_EMBEDDINGS.md) - Complete guide to embedding generation -- [EMBEDDING_STRATEGY_UPGRADE.md](./EMBEDDING_STRATEGY_UPGRADE.md) - Details on which content is embedded +- [INGEST_AND_EMBEDDINGS.md](../development/INGEST_AND_EMBEDDINGS.md) - Complete guide to embedding generation +- [EMBEDDING_STRATEGY_UPGRADE.md](../development/EMBEDDING_STRATEGY_UPGRADE.md) - Details on which content is embedded **Quick start:** ```bash @@ -387,6 +385,6 @@ npm run embeddings:check ## Related Documentation - [QUICK_START.md](../QUICK_START.md) - Getting started with vCon MCP -- [INGEST_AND_EMBEDDINGS.md](./INGEST_AND_EMBEDDINGS.md) - Embedding generation +- [INGEST_AND_EMBEDDINGS.md](../development/INGEST_AND_EMBEDDINGS.md) - Embedding generation - [SUPABASE_SEMANTIC_SEARCH_GUIDE.md](../SUPABASE_SEMANTIC_SEARCH_GUIDE.md) - Database search implementation diff --git a/docs/reference/CORRECTED_SCHEMA.md b/docs/reference/CORRECTED_SCHEMA.md index f197f2f..1c21123 100644 --- a/docs/reference/CORRECTED_SCHEMA.md +++ b/docs/reference/CORRECTED_SCHEMA.md @@ -1,9 +1,9 @@ # Corrected Supabase Schema for IETF vCon -## Compliant with draft-ietf-vcon-vcon-core-00 +## Compliant with draft-ietf-vcon-vcon-core-02 (v0.4.0) ```sql -- Supabase Schema for IETF vCon - CORRECTED VERSION --- This schema is fully compliant with draft-ietf-vcon-vcon-core-00 +-- This schema is fully compliant with draft-ietf-vcon-vcon-core-02 (spec v0.4.0) -- Changes from original marked with -- CORRECTED comments -- Enable necessary extensions @@ -14,7 +14,7 @@ CREATE EXTENSION IF NOT EXISTS "pg_trgm"; CREATE TABLE vcons ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), uuid UUID UNIQUE NOT NULL, -- The vCon UUID from the original document - vcon_version VARCHAR(10) NOT NULL DEFAULT '0.3.0', -- CORRECTED: Updated to latest spec version + vcon_version VARCHAR(10) NOT NULL DEFAULT '0.4.0', -- CORRECTED: Updated to spec v0.4.0 subject TEXT, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), @@ -32,10 +32,10 @@ CREATE TABLE vcons ( -- JSON fields for complex nested data redacted JSONB DEFAULT '{}', - appended JSONB DEFAULT '{}', -- CORRECTED: Added appended support per spec + amended JSONB DEFAULT '{}', -- CORRECTED: v0.4.0 renamed appended→amended (Section 4.1.5) group_data JSONB DEFAULT '[]', extensions TEXT[], -- CORRECTED: Added extensions array per spec Section 4.1.3 - must_support TEXT[], -- CORRECTED: Added must_support array per spec Section 4.1.4 + critical TEXT[], -- CORRECTED: v0.4.0 renamed must_support→critical (Section 4.1.4) CONSTRAINT valid_uuid CHECK (uuid IS NOT NULL) ); @@ -126,7 +126,7 @@ CREATE TABLE attachments ( dialog INTEGER, -- CORRECTED: Added dialog reference per spec Section 4.4.4 -- Content fields - mimetype TEXT, + mediatype TEXT, -- CORRECTED: v0.0.2+ renamed mimetype→mediatype filename TEXT, body TEXT, encoding TEXT CHECK (encoding IS NULL OR encoding IN ('base64url', 'json', 'none')), -- CORRECTED: Removed default, added constraint @@ -263,9 +263,9 @@ CREATE INDEX idx_vcons_subject_trgm ON vcons USING gin (subject gin_trgm_ops); CREATE INDEX idx_parties_name_trgm ON parties USING gin (name gin_trgm_ops); -- Comments for documentation -COMMENT ON TABLE vcons IS 'Main vCon container table - compliant with draft-ietf-vcon-vcon-core-00'; +COMMENT ON TABLE vcons IS 'Main vCon container table - compliant with draft-ietf-vcon-vcon-core-02 (v0.4.0)'; COMMENT ON COLUMN vcons.extensions IS 'List of vCon extensions used (Section 4.1.3)'; -COMMENT ON COLUMN vcons.must_support IS 'List of incompatible extensions that must be supported (Section 4.1.4)'; +COMMENT ON COLUMN vcons.critical IS 'List of incompatible extensions that must be supported (Section 4.1.4); renamed from must_support in v0.4.0'; COMMENT ON TABLE parties IS 'Party objects from vCon parties array (Section 4.2)'; COMMENT ON COLUMN parties.uuid IS 'Unique identifier for participant across vCons (Section 4.2.12)'; @@ -320,9 +320,9 @@ BEGIN; -- 1. Add new required columns ALTER TABLE vcons ADD COLUMN IF NOT EXISTS extensions TEXT[]; -ALTER TABLE vcons ADD COLUMN IF NOT EXISTS must_support TEXT[]; -ALTER TABLE vcons ADD COLUMN IF NOT EXISTS appended JSONB DEFAULT '{}'; -ALTER TABLE vcons ALTER COLUMN vcon_version SET DEFAULT '0.3.0'; +ALTER TABLE vcons ADD COLUMN IF NOT EXISTS critical TEXT[]; +ALTER TABLE vcons ADD COLUMN IF NOT EXISTS amended JSONB DEFAULT '{}'; +ALTER TABLE vcons ALTER COLUMN vcon_version SET DEFAULT '0.4.0'; ALTER TABLE parties ADD COLUMN IF NOT EXISTS did TEXT; ALTER TABLE parties ADD COLUMN IF NOT EXISTS uuid UUID;