Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions app/services/embedding.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
"""Embedding service — wraps LiteLLM for provider-agnostic vector generation."""
import asyncio
import logging

from __future__ import annotations

from litellm import aembedding

# Default embedding model — small, fast, cheap
DEFAULT_EMBEDDING_MODEL = "text-embedding-3-small"
# Retry configuration
MAX_RETRIES = 3
RETRY_DELAY = 1.0 # seconds
DEFAULT_EMBEDDING_DIMENSIONS = 1536
MAX_BATCH_SIZE = 128

Expand Down Expand Up @@ -36,9 +41,22 @@ async def embed_texts(
batch = texts[i : i + MAX_BATCH_SIZE]
kwargs: dict = {"model": model, "input": batch}
if api_key:
kwargs["api_key"] = api_key

response = await aembedding(**kwargs)
# Retry logic for API failures
for attempt in range(MAX_RETRIES):
try:
response = await aembedding(**kwargs)
batch_embeddings = [item["embedding"] for item in response.data]
all_embeddings.extend(batch_embeddings)
break # Success, exit retry loop
except Exception as e:
if attempt == MAX_RETRIES - 1:
# Final attempt failed, re-raise the exception
logging.error(f"Embedding API failed after {MAX_RETRIES} attempts: {e}")
raise RuntimeError(f"Failed to generate embeddings after {MAX_RETRIES} attempts: {e}") from e
else:
# Transient failure, wait and retry
logging.warning(f"Embedding API attempt {attempt + 1} failed: {e}. Retrying...")
await asyncio.sleep(RETRY_DELAY * (2 ** attempt)) # Exponential backoff
batch_embeddings = [item["embedding"] for item in response.data]
all_embeddings.extend(batch_embeddings)

Expand Down