diff --git a/.env.example b/.env.example index acf95d7..508c712 100644 --- a/.env.example +++ b/.env.example @@ -8,6 +8,10 @@ SUPABASE_ANON_KEY=your-anon-key-here # Optional: For service role operations (use carefully!) # SUPABASE_SERVICE_ROLE_KEY=your-service-role-key +# LiteLLM proxy (optional; used for embeddings when set) +# LITELLM_PROXY_URL=http://localhost:4000 +# LITELLM_MASTER_KEY=sk-your-master-key + # ============================================================================ # Plugin Configuration # ============================================================================ diff --git a/docs/api/tools.md b/docs/api/tools.md index fe94be6..7b9cab0 100644 --- a/docs/api/tools.md +++ b/docs/api/tools.md @@ -72,7 +72,7 @@ Create a new vCon (Virtual Conversation) record. ```typescript { vcon_data: { - vcon: "0.3.0", // vCon version + vcon: "0.4.0", // vCon version uuid?: string, // Auto-generated if not provided subject?: string, // Conversation subject parties: Party[], // At least one party required @@ -80,7 +80,7 @@ Create a new vCon (Virtual Conversation) record. analysis?: Analysis[], // Optional AI analysis attachments?: Attachment[], // Optional files extensions?: string[], // Optional extensions - must_support?: string[] // Optional requirements + critical?: string[] // Optional: extensions that must be supported (v0.4.0) }, metadata?: { basename?: string, @@ -106,7 +106,7 @@ Create a new vCon (Virtual Conversation) record. ```typescript { "vcon_data": { - "vcon": "0.3.0", + "vcon": "0.4.0", "subject": "Customer Support Call", "parties": [ { @@ -146,7 +146,7 @@ Retrieve a vCon by UUID. { success: boolean, vcon: { - vcon: "0.3.0", + vcon: "0.4.0", uuid: string, created_at: string, updated_at?: string, @@ -181,7 +181,7 @@ Update vCon metadata and top-level fields. updates: { subject?: string, extensions?: string[], - must_support?: string[], + critical?: string[], [key: string]: any }, merge_strategy?: "replace" | "merge" | "append", // Default: "merge" diff --git a/docs/development/INGEST_AND_EMBEDDINGS.md b/docs/development/INGEST_AND_EMBEDDINGS.md index 90a760e..c1c17ce 100644 --- a/docs/development/INGEST_AND_EMBEDDINGS.md +++ b/docs/development/INGEST_AND_EMBEDDINGS.md @@ -19,7 +19,7 @@ npm install - `SUPABASE_URL` - `SUPABASE_SERVICE_ROLE_KEY` - `VCON_S3_BUCKET` (optional, for S3 loading) - - `OPENAI_API_KEY` or `HF_API_TOKEN` (for embeddings) + - One embedding provider (see priority order below): `LITELLM_PROXY_URL`+`LITELLM_MASTER_KEY`, `OPENAI_API_KEY`, Azure vars, or `HF_API_TOKEN` --- @@ -56,7 +56,7 @@ npm run sync:vcons -- /absolute/path/to/vcons Notes: - Use absolute directory paths. Files must end with `.vcon` extension. - The script is idempotent and skips vCons already in the database. -- Handles both legacy (0.0.1-0.2.0) and current (0.3.0) vCon specs. +- Handles both legacy (0.0.1-0.2.0) and current (0.4.0) vCon specs. --- @@ -76,12 +76,11 @@ Analysis elements with `encoding='none'` are prioritized because they contain pl Environment variables (set in `.env` file or exported): - `SUPABASE_URL` - `SUPABASE_SERVICE_ROLE_KEY` -- One provider (choose one): - - `OPENAI_API_KEY` (uses `text-embedding-3-small` with `dimensions=384`) - - or Azure OpenAI: - - `AZURE_OPENAI_EMBEDDING_ENDPOINT` (e.g., `https://your-resource.openai.azure.com`) - - `AZURE_OPENAI_EMBEDDING_API_KEY` - - or `HF_API_TOKEN` (Hugging Face Inference API with `sentence-transformers/all-MiniLM-L6-v2`) +- One provider (auto-detected in priority order — first match wins): + 1. **LiteLLM** (highest priority): `LITELLM_PROXY_URL` + `LITELLM_MASTER_KEY` (or `LITELLM_API_KEY`) + 2. **Azure OpenAI**: `AZURE_OPENAI_EMBEDDING_ENDPOINT` + `AZURE_OPENAI_EMBEDDING_API_KEY` + 3. **OpenAI**: `OPENAI_API_KEY` (uses `text-embedding-3-small` with `dimensions=384`) + 4. **Hugging Face**: `HF_API_TOKEN` (uses `sentence-transformers/all-MiniLM-L6-v2`) #### Generate Embeddings @@ -101,6 +100,9 @@ For more control, use the script directly: # Process 100 units (default) npx tsx scripts/embed-vcons.ts +# Process with LiteLLM proxy (set LITELLM_PROXY_URL + LITELLM_MASTER_KEY in .env) +npx tsx scripts/embed-vcons.ts --limit=500 --provider=litellm + # Process 500 units with OpenAI npx tsx scripts/embed-vcons.ts --limit=500 --provider=openai diff --git a/docs/development/embeddings.md b/docs/development/embeddings.md index 3b2fa3d..efe1540 100644 --- a/docs/development/embeddings.md +++ b/docs/development/embeddings.md @@ -1,5 +1,7 @@ # Supabase Semantic Search Implementation Guide +> **⚠️ Historical reference document.** The SQL examples in Steps 1–2 below reflect an early 1536-dim / `text-embedding-ada-002` design. The current implementation uses **`text-embedding-3-small` at 384 dimensions**. For the authoritative setup, see [`docs/development/INGEST_AND_EMBEDDINGS.md`](./INGEST_AND_EMBEDDINGS.md). + ## Overview This guide explains how to implement semantic search for vCon conversation content in Supabase PostgreSQL using the **pgvector** extension for vector similarity search. @@ -48,18 +50,18 @@ SELECT * FROM pg_extension WHERE extname = 'vector'; ```sql -- Add vector columns to existing tables ALTER TABLE vcons -ADD COLUMN subject_embedding vector(1536), -- OpenAI ada-002 dimension -ADD COLUMN subject_embedding_model TEXT DEFAULT 'text-embedding-ada-002', +ADD COLUMN subject_embedding vector(384), -- text-embedding-3-small dimension +ADD COLUMN subject_embedding_model TEXT DEFAULT 'text-embedding-3-small', ADD COLUMN subject_embedding_updated_at TIMESTAMPTZ; ALTER TABLE dialog -ADD COLUMN content_embedding vector(1536), -ADD COLUMN content_embedding_model TEXT DEFAULT 'text-embedding-ada-002', +ADD COLUMN content_embedding vector(384), +ADD COLUMN content_embedding_model TEXT DEFAULT 'text-embedding-3-small', ADD COLUMN content_embedding_updated_at TIMESTAMPTZ; ALTER TABLE analysis -ADD COLUMN summary_embedding vector(1536), -ADD COLUMN summary_embedding_model TEXT DEFAULT 'text-embedding-ada-002', +ADD COLUMN summary_embedding vector(384), +ADD COLUMN summary_embedding_model TEXT DEFAULT 'text-embedding-3-small', ADD COLUMN summary_embedding_updated_at TIMESTAMPTZ; ``` @@ -79,9 +81,9 @@ CREATE TABLE vcon_embeddings ( content_text TEXT NOT NULL, -- Original text that was embedded -- The embedding - embedding vector(1536) NOT NULL, - embedding_model TEXT NOT NULL DEFAULT 'text-embedding-ada-002', - embedding_dimension INTEGER NOT NULL DEFAULT 1536, + embedding vector(384) NOT NULL, + embedding_model TEXT NOT NULL DEFAULT 'text-embedding-3-small', + embedding_dimension INTEGER NOT NULL DEFAULT 384, -- Metadata created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), @@ -157,7 +159,7 @@ from supabase import create_client openai.api_key = "your-openai-key" supabase = create_client("your-supabase-url", "your-supabase-key") -def generate_embedding(text: str, model: str = "text-embedding-ada-002") -> list[float]: +def generate_embedding(text: str, model: str = "text-embedding-3-small") -> list[float]: """Generate embedding using OpenAI API.""" response = openai.embeddings.create( input=text, @@ -177,8 +179,8 @@ def embed_vcon_content(vcon_id: str, subject: str, dialog_texts: list[str]): 'content_reference': None, 'content_text': subject, 'embedding': subject_embedding, - 'embedding_model': 'text-embedding-ada-002', - 'embedding_dimension': 1536 + 'embedding_model': 'text-embedding-3-small', + 'embedding_dimension': 384 }).execute() # Embed dialogs @@ -264,7 +266,7 @@ serve(async (req) => { }) ``` -See `docs/INGEST_AND_EMBEDDINGS.md` for the production-ready function (`supabase/functions/embed-vcons/index.ts`), environment variables, and Cron scheduling. This repository standardizes on 384‑dim embeddings to match the migrations and HNSW index. +See [`docs/development/INGEST_AND_EMBEDDINGS.md`](./INGEST_AND_EMBEDDINGS.md) for the production-ready function (`supabase/functions/embed-vcons/index.ts`), environment variables, and Cron scheduling. This repository standardizes on 384‑dim embeddings to match the migrations and HNSW index. --- @@ -275,7 +277,7 @@ See `docs/INGEST_AND_EMBEDDINGS.md` for the production-ready function (`supabase ```sql -- Function to search by semantic similarity CREATE OR REPLACE FUNCTION search_vcons_semantic( - query_embedding vector(1536), + query_embedding vector(384), match_threshold float DEFAULT 0.7, match_count int DEFAULT 20 ) diff --git a/docs/guide/installation.md b/docs/guide/installation.md index 4973537..04d49a7 100644 --- a/docs/guide/installation.md +++ b/docs/guide/installation.md @@ -532,14 +532,19 @@ curl "https://your-project.supabase.co/rest/v1/" \ Complete list of supported environment variables: +Embedding provider priority: **LiteLLM → Azure OpenAI → OpenAI → Hugging Face** (first configured wins). + | Variable | Required | Description | Default | |----------|----------|-------------|---------| | `SUPABASE_URL` | ✅ Yes | Your Supabase project URL | - | | `SUPABASE_ANON_KEY` | ✅ Yes | Supabase anon public key | - | | `SUPABASE_SERVICE_ROLE_KEY` | ❌ No | Service role key (admin operations) | - | -| `OPENAI_API_KEY` | ❌ No | OpenAI API key for embeddings | - | +| `LITELLM_PROXY_URL` | ❌ No | LiteLLM proxy base URL — takes priority for embeddings | - | +| `LITELLM_MASTER_KEY` | ❌ No | LiteLLM proxy API key (also accepted as `LITELLM_API_KEY`) | - | +| `OPENAI_API_KEY` | ❌ No | OpenAI API key for embeddings (if LiteLLM not set) | - | | `AZURE_OPENAI_EMBEDDING_ENDPOINT` | ❌ No | Azure OpenAI base endpoint (e.g., https://your-resource.openai.azure.com) | - | | `AZURE_OPENAI_EMBEDDING_API_KEY` | ❌ No | Azure OpenAI API key | - | +| `HF_API_TOKEN` | ❌ No | Hugging Face API token for embeddings (lowest priority fallback) | - | | `VCON_PLUGINS_PATH` | ❌ No | Comma-separated plugin paths | - | | `VCON_LICENSE_KEY` | ❌ No | Enterprise license key | - | | `MCP_SERVER_NAME` | ❌ No | Server name for MCP | `vcon-mcp-server` | diff --git a/docs/guide/search.md b/docs/guide/search.md index 8ab2732..bb56a9d 100644 --- a/docs/guide/search.md +++ b/docs/guide/search.md @@ -114,8 +114,6 @@ The vCon MCP server provides four search tools with different capabilities, from } ``` -**Note:** Automatic embedding generation from query text is not yet implemented. Use `search_vcons_content` for keyword-based search without embeddings. - **Returns:** Similar conversations ranked by semantic similarity --- @@ -287,8 +285,8 @@ Analysis with `encoding='json'` or `encoding='base64url'` typically contains: For semantic and hybrid search to work effectively, you need to generate embeddings for your vCons. See the following guides: -- [INGEST_AND_EMBEDDINGS.md](./INGEST_AND_EMBEDDINGS.md) - Complete guide to embedding generation -- [EMBEDDING_STRATEGY_UPGRADE.md](./EMBEDDING_STRATEGY_UPGRADE.md) - Details on which content is embedded +- [INGEST_AND_EMBEDDINGS.md](../development/INGEST_AND_EMBEDDINGS.md) - Complete guide to embedding generation +- [EMBEDDING_STRATEGY_UPGRADE.md](../development/EMBEDDING_STRATEGY_UPGRADE.md) - Details on which content is embedded **Quick start:** ```bash @@ -387,6 +385,6 @@ npm run embeddings:check ## Related Documentation - [QUICK_START.md](../QUICK_START.md) - Getting started with vCon MCP -- [INGEST_AND_EMBEDDINGS.md](./INGEST_AND_EMBEDDINGS.md) - Embedding generation +- [INGEST_AND_EMBEDDINGS.md](../development/INGEST_AND_EMBEDDINGS.md) - Embedding generation - [SUPABASE_SEMANTIC_SEARCH_GUIDE.md](../SUPABASE_SEMANTIC_SEARCH_GUIDE.md) - Database search implementation diff --git a/docs/reference/CORRECTED_SCHEMA.md b/docs/reference/CORRECTED_SCHEMA.md index 6c62089..0a260bd 100644 --- a/docs/reference/CORRECTED_SCHEMA.md +++ b/docs/reference/CORRECTED_SCHEMA.md @@ -128,7 +128,7 @@ CREATE TABLE attachments ( dialog INTEGER, -- CORRECTED: Added dialog reference per spec Section 4.4.4 -- Content fields - mimetype TEXT, + mediatype TEXT, -- CORRECTED: v0.0.2+ renamed mimetype→mediatype filename TEXT, body TEXT, encoding TEXT CHECK (encoding IS NULL OR encoding IN ('base64url', 'json', 'none')), -- CORRECTED: Removed default, added constraint @@ -265,9 +265,9 @@ CREATE INDEX idx_vcons_subject_trgm ON vcons USING gin (subject gin_trgm_ops); CREATE INDEX idx_parties_name_trgm ON parties USING gin (name gin_trgm_ops); -- Comments for documentation -COMMENT ON TABLE vcons IS 'Main vCon container table - compliant with draft-ietf-vcon-vcon-core-00'; +COMMENT ON TABLE vcons IS 'Main vCon container table - compliant with draft-ietf-vcon-vcon-core-02 (v0.4.0)'; COMMENT ON COLUMN vcons.extensions IS 'List of vCon extensions used (Section 4.1.3)'; -COMMENT ON COLUMN vcons.must_support IS 'List of incompatible extensions that must be supported (Section 4.1.4)'; +COMMENT ON COLUMN vcons.critical IS 'List of incompatible extensions that must be supported (Section 4.1.4); renamed from must_support in v0.4.0'; COMMENT ON TABLE parties IS 'Party objects from vCon parties array (Section 4.2)'; COMMENT ON COLUMN parties.uuid IS 'Unique identifier for participant across vCons (Section 4.2.12)'; @@ -322,9 +322,9 @@ BEGIN; -- 1. Add new required columns ALTER TABLE vcons ADD COLUMN IF NOT EXISTS extensions TEXT[]; -ALTER TABLE vcons ADD COLUMN IF NOT EXISTS must_support TEXT[]; -ALTER TABLE vcons ADD COLUMN IF NOT EXISTS appended JSONB DEFAULT '{}'; -ALTER TABLE vcons ALTER COLUMN vcon_version SET DEFAULT '0.3.0'; +ALTER TABLE vcons ADD COLUMN IF NOT EXISTS critical TEXT[]; +ALTER TABLE vcons ADD COLUMN IF NOT EXISTS amended JSONB DEFAULT '{}'; +ALTER TABLE vcons ALTER COLUMN vcon_version SET DEFAULT '0.4.0'; ALTER TABLE parties ADD COLUMN IF NOT EXISTS did TEXT; ALTER TABLE parties ADD COLUMN IF NOT EXISTS uuid UUID; diff --git a/scripts/embed-vcons.ts b/scripts/embed-vcons.ts index 624a483..f61251e 100755 --- a/scripts/embed-vcons.ts +++ b/scripts/embed-vcons.ts @@ -22,7 +22,7 @@ * --mode=MODE Mode: 'backfill' (default) or 'embed' * --vcon-id=UUID Specific vCon UUID to embed (required for embed mode) * --limit=N Max text units to process per batch (default: 100, max: 500) - * --provider=PROVIDER Embedding provider: 'openai', 'azure', or 'hf' (auto-detected from env) + * --provider=PROVIDER Embedding provider: 'litellm', 'openai', 'azure', or 'hf' (auto-detected from env) * --continuous, -c Run continuously until all embeddings complete * --delay=N Delay in seconds between batches in continuous mode (default: 2) * --oldest-first Process oldest vCons first (for backfilling old data) @@ -30,6 +30,8 @@ * Environment Variables: * SUPABASE_URL Supabase project URL * SUPABASE_SERVICE_ROLE_KEY Service role key for admin operations + * LITELLM_PROXY_URL LiteLLM proxy base URL (when using LiteLLM; takes priority) + * LITELLM_MASTER_KEY / LITELLM_API_KEY LiteLLM proxy API key * OPENAI_API_KEY OpenAI API key (for text-embedding-3-small) * AZURE_OPENAI_EMBEDDING_ENDPOINT Azure OpenAI base endpoint (e.g., https://your-resource.openai.azure.com) * AZURE_OPENAI_EMBEDDING_API_KEY Azure OpenAI API key @@ -63,11 +65,12 @@ import * as dotenv from 'dotenv'; import { getSupabaseClient } from '../dist/db/client.js'; import pLimit from 'p-limit'; +import * as sharedEmbed from '../supabase/functions/_shared/embeddings'; // Load environment variables dotenv.config(); -type EmbeddingProvider = 'openai' | 'azure' | 'hf'; +type EmbeddingProvider = sharedEmbed.EmbeddingProvider; interface TextUnit { vcon_id: string; @@ -119,7 +122,7 @@ function parseArgs(): { limit = Math.max(1, Math.min(500, parseInt(arg.split('=')[1], 10))); } else if (arg.startsWith('--provider=')) { const value = arg.split('=')[1] as EmbeddingProvider; - if (value === 'openai' || value === 'azure' || value === 'hf') { + if (value === 'litellm' || value === 'openai' || value === 'azure' || value === 'hf') { provider = value; } } else if (arg === '--continuous' || arg === '-c') { @@ -136,13 +139,17 @@ function parseArgs(): { /** * Detect embedding provider from environment variables - * Priority: Azure OpenAI > OpenAI > Hugging Face + * Priority: LiteLLM > Azure OpenAI > OpenAI > Hugging Face */ function detectProvider(preferredProvider?: EmbeddingProvider): EmbeddingProvider { if (preferredProvider) { return preferredProvider; } - // Azure OpenAI takes priority if endpoint and API key are set + const litellmUrl = (process.env.LITELLM_PROXY_URL ?? '').trim().replace(/\/$/, ''); + const litellmKey = (process.env.LITELLM_MASTER_KEY ?? process.env.LITELLM_API_KEY ?? '').trim(); + if (litellmUrl && litellmKey) { + return 'litellm'; + } if (process.env.AZURE_OPENAI_EMBEDDING_ENDPOINT && process.env.AZURE_OPENAI_EMBEDDING_API_KEY) { return 'azure'; @@ -305,141 +312,31 @@ function truncateToTokens(text: string, maxTokens: number): string { return text.substring(0, maxChars); } -/** - * Generate embeddings using OpenAI API - */ -async function embedOpenAI(texts: string[]): Promise { - const apiKey = process.env.OPENAI_API_KEY; - if (!apiKey) { - throw new Error('OPENAI_API_KEY not set'); - } - - try { - const response = await fetch('https://api.openai.com/v1/embeddings', { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${apiKey}` - }, - body: JSON.stringify({ - model: 'text-embedding-3-small', - input: texts, - dimensions: 384 - }) - }); - - if (!response.ok) { - const errorText = await response.text(); - let errorDetails = ''; - try { - const errorJson = JSON.parse(errorText); - errorDetails = JSON.stringify(errorJson, null, 2); - } catch { - errorDetails = errorText; - } - throw new Error(`OpenAI API error ${response.status}: ${errorDetails}`); - } +/** Wrapper: reads env and calls shared embedLiteLLM */ +export async function embedLiteLLM(texts: string[]): Promise { + const baseUrl = (process.env.LITELLM_PROXY_URL ?? '').trim().replace(/\/$/, ''); + const apiKey = (process.env.LITELLM_MASTER_KEY ?? process.env.LITELLM_API_KEY ?? '').trim(); + return sharedEmbed.embedLiteLLM(texts, { baseUrl, apiKey }); +} - const json = await response.json(); - return json.data.map((d: any) => d.embedding as number[]); - } catch (error) { - if (error instanceof Error) { - throw error; - } - throw new Error(`OpenAI embeddings failed: ${JSON.stringify(error)}`); - } +/** Wrapper: reads env and calls shared embedOpenAI */ +async function embedOpenAI(texts: string[]): Promise { + const apiKey = process.env.OPENAI_API_KEY ?? ''; + return sharedEmbed.embedOpenAI(texts, { apiKey }); } -/** - * Generate embeddings using Azure OpenAI API - */ +/** Wrapper: reads env and calls shared embedAzureOpenAI */ async function embedAzureOpenAI(texts: string[]): Promise { - const baseEndpoint = process.env.AZURE_OPENAI_EMBEDDING_ENDPOINT; - const apiKey = process.env.AZURE_OPENAI_EMBEDDING_API_KEY; - const deployment = 'text-embedding-3-small'; + const endpoint = process.env.AZURE_OPENAI_EMBEDDING_ENDPOINT ?? ''; + const apiKey = process.env.AZURE_OPENAI_EMBEDDING_API_KEY ?? ''; const apiVersion = process.env.AZURE_OPENAI_EMBEDDING_API_VERSION || '2024-02-01'; - - if (!baseEndpoint || !apiKey) { - throw new Error('AZURE_OPENAI_EMBEDDING_ENDPOINT and AZURE_OPENAI_EMBEDDING_API_KEY are required'); - } - - // Construct the full URL: {endpoint}/openai/deployments/{deployment}/embeddings?api-version={version} - const normalizedEndpoint = baseEndpoint.replace(/\/$/, ''); // Remove trailing slash if present - const url = `${normalizedEndpoint}/openai/deployments/${deployment}/embeddings?api-version=${apiVersion}`; - - try { - const response = await fetch(url, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'api-key': apiKey - }, - body: JSON.stringify({ - input: texts, - dimensions: 384 - }) - }); - - if (!response.ok) { - const errorText = await response.text(); - let errorDetails = ''; - try { - const errorJson = JSON.parse(errorText); - errorDetails = JSON.stringify(errorJson, null, 2); - } catch { - errorDetails = errorText; - } - throw new Error(`Azure OpenAI API error ${response.status}: ${errorDetails}`); - } - - const json = await response.json(); - return json.data.map((d: any) => d.embedding as number[]); - } catch (error) { - if (error instanceof Error) { - throw error; - } - throw new Error(`Azure OpenAI embeddings failed: ${JSON.stringify(error)}`); - } + return sharedEmbed.embedAzureOpenAI(texts, { endpoint, apiKey, apiVersion }); } -/** - * Generate embeddings using Hugging Face API - */ +/** Wrapper: reads env and calls shared embedHF */ async function embedHF(texts: string[]): Promise { - const apiToken = process.env.HF_API_TOKEN; - if (!apiToken) { - throw new Error('HF_API_TOKEN not set'); - } - - const result: number[][] = []; - - for (const text of texts) { - const response = await fetch( - 'https://api-inference.huggingface.co/pipeline/feature-extraction/sentence-transformers/all-MiniLM-L6-v2', - { - method: 'POST', - headers: { - 'Authorization': `Bearer ${apiToken}`, - 'Content-Type': 'application/json' - }, - body: JSON.stringify({ - inputs: text, - options: { wait_for_model: true } - }) - } - ); - - if (!response.ok) { - const errorText = await response.text(); - throw new Error(`HF embeddings failed: ${response.status} ${errorText}`); - } - - const json = await response.json(); - const vec = Array.isArray(json[0]) ? json[0] : json; - result.push(vec as number[]); - } - - return result; + const apiToken = process.env.HF_API_TOKEN ?? ''; + return sharedEmbed.embedHF(texts, { apiToken }); } /** @@ -451,24 +348,13 @@ async function upsertEmbeddings( vectors: number[][], provider: EmbeddingProvider ): Promise { - const getModelName = (provider: EmbeddingProvider): string => { - switch (provider) { - case 'openai': - case 'azure': - // Both use the same underlying model - return 'text-embedding-3-small'; - case 'hf': - return 'sentence-transformers/all-MiniLM-L6-v2'; - } - }; - const rows = units.map((u, i) => ({ vcon_id: u.vcon_id, content_type: u.content_type, content_reference: u.content_reference, content_text: u.content_text, embedding: vectors[i], - embedding_model: getModelName(provider), + embedding_model: sharedEmbed.getModelName(provider), embedding_dimension: 384 })); @@ -511,7 +397,7 @@ async function processEmbeddings( const MAX_TOKENS_PER_ITEM = 8000; const CONCURRENCY_LIMIT = 15; // Process 15 batches concurrently - if (provider === 'openai' || provider === 'azure') { + if (provider === 'litellm' || provider === 'openai' || provider === 'azure') { // Group units into token-aware batches const batches: TextUnit[][] = []; let currentBatch: TextUnit[] = []; @@ -543,7 +429,7 @@ async function processEmbeddings( const startTime = Date.now(); // Choose the appropriate embedding function - const embedFn = provider === 'azure' ? embedAzureOpenAI : embedOpenAI; + const embedFn = provider === 'litellm' ? embedLiteLLM : provider === 'azure' ? embedAzureOpenAI : embedOpenAI; const processBatch = async (batch: TextUnit[], batchIndex: number) => { try { @@ -662,6 +548,15 @@ async function main() { process.exit(1); } + if (provider === 'litellm') { + const url = (process.env.LITELLM_PROXY_URL ?? '').trim(); + const key = (process.env.LITELLM_MASTER_KEY ?? process.env.LITELLM_API_KEY ?? '').trim(); + if (!url || !key) { + console.error('❌ LITELLM_PROXY_URL and LITELLM_MASTER_KEY (or LITELLM_API_KEY) required for LiteLLM provider'); + process.exit(1); + } + } + if (provider === 'openai' && !process.env.OPENAI_API_KEY) { console.error('❌ OPENAI_API_KEY not set (required for OpenAI provider)'); console.error(' Set OPENAI_API_KEY or use --provider=azure for Azure OpenAI'); @@ -695,6 +590,8 @@ async function main() { // Display configuration const getProviderDisplayName = (p: EmbeddingProvider): string => { switch (p) { + case 'litellm': + return 'LiteLLM proxy (text-embedding-3-small)'; case 'openai': return 'OpenAI (text-embedding-3-small)'; case 'azure': diff --git a/scripts/test-semantic-search.ts b/scripts/test-semantic-search.ts index dea3e71..6a3e7fa 100644 --- a/scripts/test-semantic-search.ts +++ b/scripts/test-semantic-search.ts @@ -17,10 +17,14 @@ dotenv.config(); const supabaseUrl = process.env.SUPABASE_URL || 'http://127.0.0.1:54321'; const supabaseKey = process.env.SUPABASE_SERVICE_ROLE_KEY || 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImV4cCI6MTk4MzgxMjk5Nn0.EGIM96RAZx35lJzdJsyH-qQwv8Hdp7fsn3W0YpN81IU'; + +const litellmUrl = (process.env.LITELLM_PROXY_URL ?? '').trim().replace(/\/$/, ''); +const litellmKey = (process.env.LITELLM_MASTER_KEY ?? process.env.LITELLM_API_KEY ?? '').trim(); +const useLiteLLM = Boolean(litellmUrl && litellmKey); const openaiKey = process.env.OPENAI_API_KEY; -if (!openaiKey) { - console.error('❌ OPENAI_API_KEY is required'); +if (!useLiteLLM && !openaiKey) { + console.error('❌ Set LITELLM_PROXY_URL + LITELLM_MASTER_KEY, or OPENAI_API_KEY'); process.exit(1); } @@ -28,14 +32,19 @@ const supabase = createClient(supabaseUrl, supabaseKey); const queries = new VConQueries(supabase); /** - * Generate embedding for a text query using OpenAI + * Generate embedding for a text query using LiteLLM proxy or OpenAI */ async function generateQueryEmbedding(text: string): Promise { - const resp = await fetch('https://api.openai.com/v1/embeddings', { + const url = useLiteLLM + ? (litellmUrl.startsWith('http') ? `${litellmUrl}/v1/embeddings` : `https://${litellmUrl}/v1/embeddings`) + : 'https://api.openai.com/v1/embeddings'; + const apiKey = useLiteLLM ? litellmKey : openaiKey; + + const resp = await fetch(url, { method: 'POST', headers: { 'Content-Type': 'application/json', - 'Authorization': `Bearer ${openaiKey}`, + 'Authorization': `Bearer ${apiKey}`, }, body: JSON.stringify({ model: 'text-embedding-3-small', @@ -45,7 +54,7 @@ async function generateQueryEmbedding(text: string): Promise { }); if (!resp.ok) { - throw new Error(`OpenAI API error: ${resp.status} ${await resp.text()}`); + throw new Error(`${useLiteLLM ? 'LiteLLM' : 'OpenAI'} API error: ${resp.status} ${await resp.text()}`); } const json = await resp.json(); diff --git a/src/db/database-analytics.ts b/src/db/database-analytics.ts index cf39eb6..5729dba 100644 --- a/src/db/database-analytics.ts +++ b/src/db/database-analytics.ts @@ -547,7 +547,7 @@ export class SupabaseDatabaseAnalytics implements IDatabaseAnalytics { analysis_count, attachment_count, total_duration, - ROUND(total_duration / NULLIF(dialog_count, 0), 2) as avg_duration_per_dialog + ROUND((total_duration / NULLIF(dialog_count, 0))::numeric, 2) as avg_duration_per_dialog FROM content_trends ORDER BY period `; diff --git a/src/observability/config.ts b/src/observability/config.ts index 2948562..f6ee735 100644 --- a/src/observability/config.ts +++ b/src/observability/config.ts @@ -111,8 +111,11 @@ export async function initializeObservability(): Promise { } try { - // Set up diagnostic logging - diag.setLogger(new DiagConsoleLogger(), config.logLevel); + // Set up diagnostic logging — only when using OTLP; DiagConsoleLogger writes to stdout + // which corrupts MCP stdio in non-OTLP mode. + if (config.exporterType === 'otlp') { + diag.setLogger(new DiagConsoleLogger(), config.logLevel); + } // Create resource with service information const resource = new Resource({ diff --git a/src/observability/logger.ts b/src/observability/logger.ts index 2b93baa..d3e9abd 100644 --- a/src/observability/logger.ts +++ b/src/observability/logger.ts @@ -52,22 +52,20 @@ function createRootLogger() { // Development: Pretty printing if (isDevelopment) { - return pino( - { - ...baseConfig, - transport: { - target: 'pino-pretty', - options: { - colorize: true, - translateTime: 'HH:MM:ss.l', - ignore: 'pid,hostname', - messageFormat: '{component} - {msg}', - singleLine: false, - }, + return pino({ + ...baseConfig, + transport: { + target: 'pino-pretty', + options: { + colorize: true, + translateTime: 'HH:MM:ss.l', + ignore: 'pid,hostname', + messageFormat: '{component} - {msg}', + singleLine: false, + destination: 2, // stderr — transport runs in worker thread, destination must be set here }, }, - pino.destination(2) // stderr for MCP - ); + }); } // Production: JSON to stderr diff --git a/supabase/functions/_shared/embeddings.ts b/supabase/functions/_shared/embeddings.ts new file mode 100644 index 0000000..45f66ea --- /dev/null +++ b/supabase/functions/_shared/embeddings.ts @@ -0,0 +1,147 @@ +/** + * Shared embedding functions for LiteLLM, OpenAI, Azure OpenAI, and Hugging Face. + * Used by scripts/embed-vcons.ts (Node) and supabase/functions/embed-vcons (Deno). + * All functions take explicit options (no process.env / Deno.env) so callers supply credentials. + */ + +export type EmbeddingProvider = "litellm" | "openai" | "azure" | "hf"; + +const EMBEDDING_MODEL = "text-embedding-3-small"; +const DIMENSIONS = 384; + +export interface LiteLLMOptions { + baseUrl: string; + apiKey: string; +} + +export interface OpenAIOptions { + apiKey: string; +} + +export interface AzureOpenAIOptions { + endpoint: string; + apiKey: string; + apiVersion?: string; +} + +export interface HFOptions { + apiToken: string; +} + +/** + * Generate embeddings via LiteLLM proxy (OpenAI-compatible /v1/embeddings) + */ +export async function embedLiteLLM(texts: string[], options: LiteLLMOptions): Promise { + const { baseUrl, apiKey } = options; + const normalized = baseUrl.trim().replace(/\/$/, ""); + if (!normalized || !apiKey) { + throw new Error("LITELLM_PROXY_URL and LITELLM_MASTER_KEY (or LITELLM_API_KEY) are required"); + } + const url = normalized.startsWith("http") ? `${normalized}/v1/embeddings` : `https://${normalized}/v1/embeddings`; + const resp = await fetch(url, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${apiKey}`, + }, + body: JSON.stringify({ model: EMBEDDING_MODEL, input: texts, dimensions: DIMENSIONS }), + }); + if (!resp.ok) { + const text = await resp.text(); + throw new Error(`LiteLLM embeddings failed: ${resp.status} ${text}`); + } + const json = await resp.json(); + return json.data.map((d: { embedding: number[] }) => d.embedding); +} + +/** + * Generate embeddings using OpenAI API + */ +export async function embedOpenAI(texts: string[], options: OpenAIOptions): Promise { + const { apiKey } = options; + if (!apiKey) throw new Error("OPENAI_API_KEY not set"); + const resp = await fetch("https://api.openai.com/v1/embeddings", { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${apiKey}`, + }, + body: JSON.stringify({ model: EMBEDDING_MODEL, input: texts, dimensions: DIMENSIONS }), + }); + if (!resp.ok) { + const text = await resp.text(); + throw new Error(`OpenAI embeddings failed: ${resp.status} ${text}`); + } + const json = await resp.json(); + return json.data.map((d: { embedding: number[] }) => d.embedding); +} + +/** + * Generate embeddings using Azure OpenAI API + */ +export async function embedAzureOpenAI(texts: string[], options: AzureOpenAIOptions): Promise { + const { endpoint, apiKey, apiVersion = "2024-02-01" } = options; + if (!endpoint || !apiKey) { + throw new Error("AZURE_OPENAI_EMBEDDING_ENDPOINT and AZURE_OPENAI_EMBEDDING_API_KEY are required"); + } + const normalized = endpoint.replace(/\/$/, ""); + const url = `${normalized}/openai/deployments/${EMBEDDING_MODEL}/embeddings?api-version=${apiVersion}`; + const resp = await fetch(url, { + method: "POST", + headers: { + "Content-Type": "application/json", + "api-key": apiKey, + }, + body: JSON.stringify({ input: texts, dimensions: DIMENSIONS }), + }); + if (!resp.ok) { + const text = await resp.text(); + throw new Error(`Azure OpenAI embeddings failed: ${resp.status} ${text}`); + } + const json = await resp.json(); + return json.data.map((d: { embedding: number[] }) => d.embedding); +} + +const HF_MODEL = "sentence-transformers/all-MiniLM-L6-v2"; +const HF_URL = "https://api-inference.huggingface.co/pipeline/feature-extraction/" + HF_MODEL; + +/** + * Generate embeddings using Hugging Face Inference API (one request per text) + */ +export async function embedHF(texts: string[], options: HFOptions): Promise { + const { apiToken } = options; + if (!apiToken) throw new Error("HF_API_TOKEN not set"); + const result: number[][] = []; + for (const text of texts) { + const resp = await fetch(HF_URL, { + method: "POST", + headers: { + Authorization: `Bearer ${apiToken}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ inputs: text, options: { wait_for_model: true } }), + }); + if (!resp.ok) { + const textErr = await resp.text(); + throw new Error(`HF embeddings failed: ${resp.status} ${textErr}`); + } + const json = await resp.json(); + const vec = Array.isArray(json[0]) ? json[0] : json; + result.push(vec as number[]); + } + return result; +} + +/** + * Model name for storage (embedding_model column) + */ +export function getModelName(provider: EmbeddingProvider): string { + switch (provider) { + case "litellm": + case "openai": + case "azure": + return EMBEDDING_MODEL; + case "hf": + return HF_MODEL; + } +} diff --git a/supabase/functions/embed-vcons/index.ts b/supabase/functions/embed-vcons/index.ts index 27213c6..4a14c1d 100644 --- a/supabase/functions/embed-vcons/index.ts +++ b/supabase/functions/embed-vcons/index.ts @@ -1,25 +1,35 @@ // deno-lint-ignore-file no-explicit-any import { serve } from "https://deno.land/std@0.224.0/http/server.ts"; import { createClient } from "https://esm.sh/@supabase/supabase-js@2"; - -type EmbeddingProvider = "openai" | "azure" | "hf"; +import { + type EmbeddingProvider, + embedLiteLLM, + embedOpenAI, + embedAzureOpenAI, + embedHF, + getModelName, +} from "../_shared/embeddings.ts"; const SUPABASE_URL = Deno.env.get("SUPABASE_URL") ?? ""; const SUPABASE_SERVICE_ROLE_KEY = Deno.env.get("SUPABASE_SERVICE_ROLE_KEY") ?? ""; +const LITELLM_PROXY_URL = (Deno.env.get("LITELLM_PROXY_URL") ?? "").replace(/\/$/, ""); +const LITELLM_MASTER_KEY = Deno.env.get("LITELLM_MASTER_KEY") ?? Deno.env.get("LITELLM_API_KEY"); const OPENAI_API_KEY = Deno.env.get("OPENAI_API_KEY"); const AZURE_OPENAI_EMBEDDING_ENDPOINT = Deno.env.get("AZURE_OPENAI_EMBEDDING_ENDPOINT"); const AZURE_OPENAI_EMBEDDING_API_KEY = Deno.env.get("AZURE_OPENAI_EMBEDDING_API_KEY"); const AZURE_OPENAI_EMBEDDING_API_VERSION = Deno.env.get("AZURE_OPENAI_EMBEDDING_API_VERSION") || "2024-02-01"; const HF_API_TOKEN = Deno.env.get("HF_API_TOKEN"); -// Provider priority: Azure OpenAI > OpenAI > Hugging Face -const PROVIDER: EmbeddingProvider = (AZURE_OPENAI_EMBEDDING_ENDPOINT && AZURE_OPENAI_EMBEDDING_API_KEY) - ? "azure" - : OPENAI_API_KEY - ? "openai" - : HF_API_TOKEN - ? "hf" - : "openai"; +// Provider priority: LiteLLM > Azure OpenAI > OpenAI > Hugging Face +const PROVIDER: EmbeddingProvider = (LITELLM_PROXY_URL && LITELLM_MASTER_KEY) + ? "litellm" + : (AZURE_OPENAI_EMBEDDING_ENDPOINT && AZURE_OPENAI_EMBEDDING_API_KEY) + ? "azure" + : OPENAI_API_KEY + ? "openai" + : HF_API_TOKEN + ? "hf" + : "openai"; const supabase = createClient(SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY, { auth: { persistSession: false } @@ -118,78 +128,6 @@ function truncateToTokens(text: string, maxTokens: number): string { return text.substring(0, maxChars) + "..."; } -async function embedOpenAI(texts: string[]): Promise { - const resp = await fetch("https://api.openai.com/v1/embeddings", { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${OPENAI_API_KEY}` - }, - body: JSON.stringify({ model: "text-embedding-3-small", input: texts, dimensions: 384 }) - }); - if (!resp.ok) throw new Error(`OpenAI embeddings failed: ${resp.status} ${await resp.text()}`); - const json = await resp.json(); - return json.data.map((d: any) => d.embedding as number[]); -} - -async function embedAzureOpenAI(texts: string[]): Promise { - if (!AZURE_OPENAI_EMBEDDING_ENDPOINT || !AZURE_OPENAI_EMBEDDING_API_KEY) { - throw new Error("AZURE_OPENAI_EMBEDDING_ENDPOINT and AZURE_OPENAI_EMBEDDING_API_KEY are required"); - } - - // Construct the full URL: {endpoint}/openai/deployments/{deployment}/embeddings?api-version={version} - const normalizedEndpoint = AZURE_OPENAI_EMBEDDING_ENDPOINT.replace(/\/$/, ""); - const deployment = "text-embedding-3-small"; - const url = `${normalizedEndpoint}/openai/deployments/${deployment}/embeddings?api-version=${AZURE_OPENAI_EMBEDDING_API_VERSION}`; - - const resp = await fetch(url, { - method: "POST", - headers: { - "Content-Type": "application/json", - "api-key": AZURE_OPENAI_EMBEDDING_API_KEY - }, - body: JSON.stringify({ input: texts, dimensions: 384 }) - }); - if (!resp.ok) throw new Error(`Azure OpenAI embeddings failed: ${resp.status} ${await resp.text()}`); - const json = await resp.json(); - return json.data.map((d: any) => d.embedding as number[]); -} - -async function embedHF(texts: string[]): Promise { - // Hugging Face Inference API batched: one by one fallback for simplicity - const result: number[][] = []; - for (const t of texts) { - const resp = await fetch( - "https://api-inference.huggingface.co/pipeline/feature-extraction/sentence-transformers/all-MiniLM-L6-v2", - { - method: "POST", - headers: { - Authorization: `Bearer ${HF_API_TOKEN}`, - "Content-Type": "application/json" - }, - body: JSON.stringify({ inputs: t, options: { wait_for_model: true } }) - } - ); - if (!resp.ok) throw new Error(`HF embeddings failed: ${resp.status} ${await resp.text()}`); - const json = await resp.json(); - // Response is nested array [1 x 384] → flatten - const vec = Array.isArray(json[0]) ? json[0] : json; - result.push(vec as number[]); - } - return result; -} - -function getModelName(provider: EmbeddingProvider): string { - switch (provider) { - case "openai": - case "azure": - // Both use the same underlying model - return "text-embedding-3-small"; - case "hf": - return "sentence-transformers/all-MiniLM-L6-v2"; - } -} - async function upsertEmbeddings(units: TextUnit[], vectors: number[][]) { const rows = units.map((u, i) => ({ vcon_id: u.vcon_id, @@ -214,6 +152,9 @@ serve(async (req) => { const vconId = url.searchParams.get("vcon_id") ?? undefined; const limit = Math.max(1, Math.min(500, Number(url.searchParams.get("limit") ?? "100"))); + if (PROVIDER === "litellm" && (!LITELLM_PROXY_URL || !LITELLM_MASTER_KEY)) { + return new Response(JSON.stringify({ error: "LITELLM_PROXY_URL and LITELLM_MASTER_KEY (or LITELLM_API_KEY) missing" }), { status: 400 }); + } if (PROVIDER === "openai" && !OPENAI_API_KEY) { return new Response(JSON.stringify({ error: "OPENAI_API_KEY missing" }), { status: 400 }); } @@ -238,41 +179,52 @@ serve(async (req) => { let totalEmbedded = 0; let totalErrors = 0; - if (PROVIDER === "openai" || PROVIDER === "azure") { + if (PROVIDER === "litellm" || PROVIDER === "openai" || PROVIDER === "azure") { // Group units into token-aware batches const batches: TextUnit[][] = []; let currentBatch: TextUnit[] = []; let currentTokens = 0; - + for (const unit of units) { // Truncate extremely long texts const truncated = truncateToTokens(unit.content_text, MAX_TOKENS_PER_ITEM); const tokens = estimateTokens(truncated); - + // If adding this unit would exceed batch limit, start a new batch if (currentBatch.length > 0 && currentTokens + tokens > MAX_TOKENS_PER_BATCH) { batches.push(currentBatch); currentBatch = []; currentTokens = 0; } - + currentBatch.push({ ...unit, content_text: truncated }); currentTokens += tokens; } - + // Add remaining batch if (currentBatch.length > 0) { batches.push(currentBatch); } - - // Choose the appropriate embedding function - const embedFn = PROVIDER === "azure" ? embedAzureOpenAI : embedOpenAI; - - // Process each batch + + // Process each batch with shared embedders (pass env as options) for (const batch of batches) { try { const texts = batch.map((u) => u.content_text); - const vectors = await embedFn(texts); + let vectors: number[][]; + switch (PROVIDER) { + case "litellm": + vectors = await embedLiteLLM(texts, { baseUrl: LITELLM_PROXY_URL, apiKey: LITELLM_MASTER_KEY ?? "" }); + break; + case "azure": + vectors = await embedAzureOpenAI(texts, { + endpoint: AZURE_OPENAI_EMBEDDING_ENDPOINT ?? "", + apiKey: AZURE_OPENAI_EMBEDDING_API_KEY ?? "", + apiVersion: AZURE_OPENAI_EMBEDDING_API_VERSION, + }); + break; + default: + vectors = await embedOpenAI(texts, { apiKey: OPENAI_API_KEY ?? "" }); + } await upsertEmbeddings(batch, vectors); totalEmbedded += batch.length; } catch (e) { @@ -281,9 +233,9 @@ serve(async (req) => { } } } else { - // HF processes one at a time anyway + // HF const texts = units.map((u) => u.content_text); - const vectors = await embedHF(texts); + const vectors = await embedHF(texts, { apiToken: HF_API_TOKEN ?? "" }); await upsertEmbeddings(units, vectors); totalEmbedded = units.length; } diff --git a/supabase/functions/sync-to-s3/index.ts b/supabase/functions/sync-to-s3/index.ts index 9d1940d..db5bca3 100644 --- a/supabase/functions/sync-to-s3/index.ts +++ b/supabase/functions/sync-to-s3/index.ts @@ -11,6 +11,8 @@ const VCON_S3_PREFIX = Deno.env.get("VCON_S3_PREFIX") ?? ""; const AWS_REGION = Deno.env.get("AWS_REGION") ?? "us-east-1"; const AWS_ACCESS_KEY_ID = Deno.env.get("AWS_ACCESS_KEY_ID") ?? ""; const AWS_SECRET_ACCESS_KEY = Deno.env.get("AWS_SECRET_ACCESS_KEY") ?? ""; +const LITELLM_PROXY_URL = (Deno.env.get("LITELLM_PROXY_URL") ?? "").replace(/\/$/, ""); +const LITELLM_MASTER_KEY = Deno.env.get("LITELLM_MASTER_KEY") ?? Deno.env.get("LITELLM_API_KEY"); const OPENAI_API_KEY = Deno.env.get("OPENAI_API_KEY"); const AZURE_OPENAI_EMBEDDING_ENDPOINT = Deno.env.get("AZURE_OPENAI_EMBEDDING_ENDPOINT"); const AZURE_OPENAI_EMBEDDING_API_KEY = Deno.env.get("AZURE_OPENAI_EMBEDDING_API_KEY"); @@ -99,14 +101,16 @@ async function uploadToS3(key, jsonStr) { // --------------------------------------------------------------------------- // Embedding helpers (unchanged except for safety fixes) // --------------------------------------------------------------------------- -// Provider priority: Azure OpenAI > OpenAI > Hugging Face -const PROVIDER = (AZURE_OPENAI_EMBEDDING_ENDPOINT && AZURE_OPENAI_EMBEDDING_API_KEY) - ? "azure" - : OPENAI_API_KEY - ? "openai" - : HF_API_TOKEN - ? "hf" - : "openai"; +// Provider priority: LiteLLM > Azure OpenAI > OpenAI > Hugging Face +const PROVIDER = (LITELLM_PROXY_URL && LITELLM_MASTER_KEY) + ? "litellm" + : (AZURE_OPENAI_EMBEDDING_ENDPOINT && AZURE_OPENAI_EMBEDDING_API_KEY) + ? "azure" + : OPENAI_API_KEY + ? "openai" + : HF_API_TOKEN + ? "hf" + : "openai"; function estimateTokens(text) { return Math.ceil(text.length / 3.5); }