diff --git a/.env.example b/.env.example index 7da35381..a9f2d3a7 100644 --- a/.env.example +++ b/.env.example @@ -182,6 +182,44 @@ ANTHROPIC_API_KEY=your_anthropic_api_key_here # Get OpenAI API key from https://platform.openai.com/api-keys MCP_SCANNER_LLM_API_KEY=your_openai_api_key_here +# ============================================================================= +# EMBEDDINGS CONFIGURATION +# ============================================================================= + +# Embeddings provider: 'sentence-transformers' (local) or 'litellm' (cloud-based) +# Default: sentence-transformers (no API key required) +EMBEDDINGS_PROVIDER=litellm + +# Model name for embeddings generation +# For sentence-transformers: model name from Hugging Face (e.g., all-MiniLM-L6-v2) +# For litellm: provider-prefixed model (e.g., bedrock/amazon.titan-embed-text-v1, +# openai/text-embedding-3-small, cohere/embed-english-v3.0) +EMBEDDINGS_MODEL_NAME=bedrock/amazon.titan-embed-text-v2:0 + +# Embedding dimension (must match the model's output dimension) +# all-MiniLM-L6-v2: 384 +# text-embedding-3-small: 1536 +# amazon.titan-embed-text-v1: 1536 +# cohere/embed-english-v3.0: 1024 +EMBEDDINGS_MODEL_DIMENSIONS=1024 + +# LiteLLM-specific settings (only used when EMBEDDINGS_PROVIDER=litellm) +# API key for cloud embeddings provider (provider-specific) +# For OpenAI: Get from https://platform.openai.com/api-keys +# For Cohere: Get from https://dashboard.cohere.com/api-keys +# For Bedrock: Not used - configure AWS credentials via standard methods (see below) +# EMBEDDINGS_API_KEY=your_api_key_here + +# Optional: Custom API base URL for embeddings provider +# EMBEDDINGS_API_BASE=https://api.custom-endpoint.com + +# AWS region for Amazon Bedrock embeddings (only needed for Bedrock) +# Note: For Bedrock authentication, use standard AWS credential chain: +# - IAM roles (recommended for EC2/EKS) +# - Environment variables (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) +# - AWS credentials file (~/.aws/credentials) +# EMBEDDINGS_AWS_REGION=us-east-1 + # ============================================================================= # CONTAINER REGISTRY CREDENTIALS (for CI/CD and local builds) # ============================================================================= diff --git a/docker-compose.yml b/docker-compose.yml index 0bbc3788..031594f2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -40,6 +40,13 @@ services: - EXTERNAL_REGISTRY_TAGS=${EXTERNAL_REGISTRY_TAGS:-anthropic-registry,workday-asor} - ASOR_ACCESS_TOKEN=${ASOR_ACCESS_TOKEN} - ASOR_CLIENT_CREDENTIALS=${ASOR_CLIENT_CREDENTIALS} + # Embeddings Configuration + - EMBEDDINGS_PROVIDER=${EMBEDDINGS_PROVIDER:-sentence-transformers} + - EMBEDDINGS_MODEL_NAME=${EMBEDDINGS_MODEL_NAME:-all-MiniLM-L6-v2} + - EMBEDDINGS_MODEL_DIMENSIONS=${EMBEDDINGS_MODEL_DIMENSIONS:-384} + - EMBEDDINGS_API_KEY=${EMBEDDINGS_API_KEY} + - EMBEDDINGS_API_BASE=${EMBEDDINGS_API_BASE} + - EMBEDDINGS_AWS_REGION=${EMBEDDINGS_AWS_REGION:-us-east-1} ports: - "80:80" - "443:443" diff --git a/docker/registry-entrypoint.sh b/docker/registry-entrypoint.sh index a49df5c1..ab65b559 100644 --- a/docker/registry-entrypoint.sh +++ b/docker/registry-entrypoint.sh @@ -103,26 +103,48 @@ else echo "HTTP + HTTPS Nginx configuration installed." fi -# --- Model Check --- -EMBEDDINGS_MODEL_NAME="all-MiniLM-L6-v2" -EMBEDDINGS_MODEL_DIR="/app/registry/models/$EMBEDDINGS_MODEL_NAME" - -echo "Checking for sentence-transformers model..." -if [ ! -d "$EMBEDDINGS_MODEL_DIR" ] || [ -z "$(ls -A "$EMBEDDINGS_MODEL_DIR")" ]; then - echo "==========================================" - echo "WARNING: Embeddings model not found!" - echo "==========================================" - echo "" - echo "The registry requires the sentence-transformers model to function properly." - echo "Please download the model to: $EMBEDDINGS_MODEL_DIR" - echo "" - echo "Run this command to download the model:" - echo " docker run --rm -v \$(pwd)/models:/models huggingface/transformers-pytorch-cpu python -c \"from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/$EMBEDDINGS_MODEL_NAME').save('/models/$EMBEDDINGS_MODEL_NAME')\"" - echo "" - echo "Or see the README for alternative download methods." - echo "==========================================" -else - echo "Embeddings model found at $EMBEDDINGS_MODEL_DIR" +# --- Embeddings Configuration --- +# Get embeddings configuration from environment or use defaults +EMBEDDINGS_PROVIDER="${EMBEDDINGS_PROVIDER:-sentence-transformers}" +EMBEDDINGS_MODEL_NAME="${EMBEDDINGS_MODEL_NAME:-all-MiniLM-L6-v2}" +EMBEDDINGS_MODEL_DIMENSIONS="${EMBEDDINGS_MODEL_DIMENSIONS:-384}" + +echo "Embeddings Configuration:" +echo " Provider: $EMBEDDINGS_PROVIDER" +echo " Model: $EMBEDDINGS_MODEL_NAME" +echo " Dimensions: $EMBEDDINGS_MODEL_DIMENSIONS" + +# Only check for local model if using sentence-transformers +if [ "$EMBEDDINGS_PROVIDER" = "sentence-transformers" ]; then + EMBEDDINGS_MODEL_DIR="/app/registry/models/$EMBEDDINGS_MODEL_NAME" + + echo "Checking for sentence-transformers model..." + if [ ! -d "$EMBEDDINGS_MODEL_DIR" ] || [ -z "$(ls -A "$EMBEDDINGS_MODEL_DIR")" ]; then + echo "==========================================" + echo "WARNING: Embeddings model not found!" + echo "==========================================" + echo "" + echo "The registry requires the sentence-transformers model to function properly." + echo "Please download the model to: $EMBEDDINGS_MODEL_DIR" + echo "" + echo "Run this command to download the model:" + echo " docker run --rm -v \$(pwd)/models:/models huggingface/transformers-pytorch-cpu python -c \"from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/$EMBEDDINGS_MODEL_NAME').save('/models/$EMBEDDINGS_MODEL_NAME')\"" + echo "" + echo "Or see the README for alternative download methods." + echo "==========================================" + else + echo "Embeddings model found at $EMBEDDINGS_MODEL_DIR" + fi +elif [ "$EMBEDDINGS_PROVIDER" = "litellm" ]; then + echo "Using LiteLLM provider - no local model download required" + echo "Model: $EMBEDDINGS_MODEL_NAME" + if [[ "$EMBEDDINGS_MODEL_NAME" == bedrock/* ]]; then + echo "Bedrock model will use AWS credential chain for authentication" + elif [ ! -z "$EMBEDDINGS_API_KEY" ]; then + echo "API key configured for cloud embeddings" + else + echo "WARNING: No EMBEDDINGS_API_KEY set for cloud provider" + fi fi # --- Environment Variable Substitution for MCP Server Auth Tokens --- @@ -140,8 +162,10 @@ done echo "MCP Server configuration processing completed." # --- Start Background Services --- +# Export embeddings configuration for the registry service +export EMBEDDINGS_PROVIDER=$EMBEDDINGS_PROVIDER export EMBEDDINGS_MODEL_NAME=$EMBEDDINGS_MODEL_NAME -export EMBEDDINGS_MODEL_DIMENSIONS=384 +export EMBEDDINGS_MODEL_DIMENSIONS=$EMBEDDINGS_MODEL_DIMENSIONS echo "Starting MCP Registry in the background..." cd /app diff --git a/docs/complete-setup-guide.md b/docs/complete-setup-guide.md index 88aa1433..82d1faeb 100644 --- a/docs/complete-setup-guide.md +++ b/docs/complete-setup-guide.md @@ -217,19 +217,19 @@ For now, make these additional essential changes in the `.env` file: ```bash # Set authentication provider to Keycloak -AUTH_PROVIDER=keycloak +AUTH_PROVIDER=keycloak #Do not change # Set a secure admin password (change this!) # This is used for Keycloak API authentication during setup -KEYCLOAK_ADMIN_PASSWORD=YourSecureAdminPassword123! +KEYCLOAK_ADMIN_PASSWORD=YourSecureAdminPassword123! # change me # CRITICAL: Set INITIAL_ADMIN_PASSWORD to the SAME VALUE as KEYCLOAK_ADMIN_PASSWORD # This is used to set the password for the initial admin user in the realm # THESE MUST MATCH - see Step 5 for details -INITIAL_ADMIN_PASSWORD=YourSecureAdminPassword123! +INITIAL_ADMIN_PASSWORD=YourSecureAdminPassword123! # change me # Set Keycloak database password (change this!) -KEYCLOAK_DB_PASSWORD=SecureKeycloakDB123! +KEYCLOAK_DB_PASSWORD=SecureKeycloakDB123! # change me # Leave other Keycloak settings as default for now KEYCLOAK_URL=http://localhost:8080 @@ -307,13 +307,13 @@ If these passwords don't match: ```bash # Start only the database and Keycloak services first -docker-compose up -d keycloak-db keycloak +docker compose up -d keycloak-db keycloak # Check if services are starting -docker-compose ps +docker compose ps # Monitor logs to see when Keycloak is ready -docker-compose logs -f keycloak +docker compose logs -f keycloak # Wait for message: "Keycloak 25.x.x started in xxxms" # Press Ctrl+C to exit logs when you see this message ``` diff --git a/pyproject.toml b/pyproject.toml index 14d172b7..227df9fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ dependencies = [ "cisco-ai-mcp-scanner>=3.0.1", "awscli>=1.36.0", "boto3>=1.35.0", + ""litellm>=1.50.0" ] [project.optional-dependencies] diff --git a/registry/core/config.py b/registry/core/config.py index 3ebbf6a9..13e5f55b 100644 --- a/registry/core/config.py +++ b/registry/core/config.py @@ -1,6 +1,8 @@ import os import secrets from pathlib import Path +from typing import Optional + from pydantic import ConfigDict from pydantic_settings import BaseSettings @@ -23,9 +25,19 @@ class Settings(BaseSettings): auth_server_url: str = "http://localhost:8888" auth_server_external_url: str = "http://localhost:8888" # External URL for OAuth redirects - # Embeddings settings + # Embeddings settings [Default] + embeddings_provider: str = "sentence-transformers" # 'sentence-transformers' or 'litellm' embeddings_model_name: str = "all-MiniLM-L6-v2" - embeddings_model_dimensions: int = 384 + embeddings_model_dimensions: int = 384 # 384 for default and 1024 for bedrock titan v2 + print(embeddings_provider, embeddings_model_name, embeddings_model_dimensions) + + # LiteLLM-specific settings (only used when embeddings_provider='litellm') + # For Bedrock: Set to None and configure AWS credentials via standard methods + # (IAM roles, AWS_ACCESS_KEY_ID/AWS_SECRET_ACCESS_KEY env vars, or ~/.aws/credentials) + embeddings_api_key: Optional[str] = None + embeddings_secret_key: Optional[str] = None + embeddings_api_base: Optional[str] = None + embeddings_aws_region: Optional[str] = "us-east-1" # Health check settings health_check_interval_seconds: int = 300 # 5 minutes for automatic background checks (configurable via env var) diff --git a/registry/embeddings/README.md b/registry/embeddings/README.md new file mode 100644 index 00000000..52172b6b --- /dev/null +++ b/registry/embeddings/README.md @@ -0,0 +1,385 @@ +# Embeddings Module + +Vendor-agnostic embeddings generation for MCP Gateway Registry's semantic search functionality. + +## Overview + +This module provides a unified interface for generating text embeddings from multiple providers, supporting both local models (sentence-transformers) and cloud-based APIs (via LiteLLM). + +## Features + +- **Vendor-agnostic**: Switch between embeddings providers with configuration changes +- **Local & Cloud Support**: Use local models or cloud APIs (OpenAI, Cohere, Amazon Bedrock, etc.) +- **Backward Compatible**: Works seamlessly with existing FAISS indices +- **Easy Configuration**: Simple environment variable setup +- **Extensible**: Easy to add new providers + +## Architecture + +``` +EmbeddingsClient (Abstract Base Class) +├── SentenceTransformersClient (Local models) +└── LiteLLMClient (Cloud APIs via LiteLLM) +``` + +## Quick Start + +### Using Sentence Transformers (Default) + +```bash +# In .env +EMBEDDINGS_PROVIDER=sentence-transformers +EMBEDDINGS_MODEL_NAME=all-MiniLM-L6-v2 +EMBEDDINGS_MODEL_DIMENSIONS=384 +``` + +```python +from registry.embeddings import create_embeddings_client + +client = create_embeddings_client( + provider="sentence-transformers", + model_name="all-MiniLM-L6-v2", + embedding_dimension=384, +) + +embeddings = client.encode(["Hello world", "This is a test"]) +print(embeddings.shape) # (2, 384) +``` + +### Using LiteLLM with OpenAI + +```bash +# In .env +EMBEDDINGS_PROVIDER=litellm +EMBEDDINGS_MODEL_NAME=openai/text-embedding-3-small +EMBEDDINGS_MODEL_DIMENSIONS=1536 +EMBEDDINGS_API_KEY=your_openai_api_key +``` + +```python +from registry.embeddings import create_embeddings_client + +client = create_embeddings_client( + provider="litellm", + model_name="openai/text-embedding-3-small", + api_key="your_openai_api_key", + embedding_dimension=1536, +) + +embeddings = client.encode(["Hello world", "This is a test"]) +print(embeddings.shape) # (2, 1536) +``` + +### Using LiteLLM with Amazon Bedrock + +Amazon Bedrock uses the standard AWS credential chain for authentication. + +```bash +# In .env +EMBEDDINGS_PROVIDER=litellm +EMBEDDINGS_MODEL_NAME=bedrock/amazon.titan-embed-text-v1 +EMBEDDINGS_MODEL_DIMENSIONS=1536 +EMBEDDINGS_AWS_REGION=us-east-1 +``` + +**Configure AWS credentials via standard methods:** + +**Option 1: IAM Roles (Recommended for EC2/EKS)** +```bash +# No additional configuration needed +# EC2 instance or EKS pod automatically uses attached IAM role +``` + +**Option 2: Environment Variables** +```bash +export AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE +export AWS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY +export AWS_REGION=us-east-1 +``` + +**Option 3: AWS Credentials File** +```bash +# ~/.aws/credentials +[default] +aws_access_key_id = AKIAIOSFODNN7EXAMPLE +aws_secret_access_key = wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY + +# ~/.aws/config +[default] +region = us-east-1 +``` + +**Python Usage:** +```python +from registry.embeddings import create_embeddings_client + +# Uses standard AWS credential chain +client = create_embeddings_client( + provider="litellm", + model_name="bedrock/amazon.titan-embed-text-v1", + aws_region="us-east-1", + embedding_dimension=1536, +) + +embeddings = client.encode(["Hello world", "This is a test"]) +print(embeddings.shape) # (2, 1536) +``` + +## Configuration + +### Environment Variables + +| Variable | Description | Default | Required | +|----------|-------------|---------|----------| +| `EMBEDDINGS_PROVIDER` | Provider type: `sentence-transformers` or `litellm` | `sentence-transformers` | No | +| `EMBEDDINGS_MODEL_NAME` | Model identifier | `all-MiniLM-L6-v2` | Yes | +| `EMBEDDINGS_MODEL_DIMENSIONS` | Embedding dimension | `384` | Yes | +| `EMBEDDINGS_API_KEY` | API key for cloud provider (OpenAI, Cohere, etc.) | - | For cloud* | +| `EMBEDDINGS_API_BASE` | Custom API endpoint (LiteLLM only) | - | No | +| `EMBEDDINGS_AWS_REGION` | AWS region for Bedrock (LiteLLM only) | - | For Bedrock | + +*Not required for AWS Bedrock - use standard AWS credential chain (IAM roles, environment variables, ~/.aws/credentials) + +### Supported Models + +#### Sentence Transformers (Local) + +- `all-MiniLM-L6-v2` (384 dimensions) - Fast, lightweight +- `all-mpnet-base-v2` (768 dimensions) - High quality +- `paraphrase-multilingual-MiniLM-L12-v2` (384 dimensions) - Multilingual +- Any model from [Hugging Face sentence-transformers](https://huggingface.co/models?library=sentence-transformers) + +#### LiteLLM (Cloud-based) + +**OpenAI:** +- `openai/text-embedding-3-small` (1536 dimensions) +- `openai/text-embedding-3-large` (3072 dimensions) +- `openai/text-embedding-ada-002` (1536 dimensions) + +**Cohere:** +- `cohere/embed-english-v3.0` (1024 dimensions) +- `cohere/embed-multilingual-v3.0` (1024 dimensions) + +**Amazon Bedrock:** +- `bedrock/amazon.titan-embed-text-v1` (1536 dimensions) +- `bedrock/cohere.embed-english-v3` (1024 dimensions) +- `bedrock/cohere.embed-multilingual-v3` (1024 dimensions) + +## API Reference + +### EmbeddingsClient (Abstract) + +Base class for all embeddings clients. + +**Methods:** +- `encode(texts: List[str]) -> np.ndarray`: Generate embeddings for texts +- `get_embedding_dimension() -> int`: Get embedding dimension + +### SentenceTransformersClient + +Local embeddings using sentence-transformers library. + +**Constructor:** +```python +SentenceTransformersClient( + model_name: str, + model_dir: Optional[Path] = None, + cache_dir: Optional[Path] = None, +) +``` + +**Parameters:** +- `model_name`: Hugging Face model identifier +- `model_dir`: Local directory with pre-downloaded model (optional) +- `cache_dir`: Cache directory for models (optional) + +### LiteLLMClient + +Cloud-based embeddings via LiteLLM. + +**Constructor:** +```python +LiteLLMClient( + model_name: str, + api_key: Optional[str] = None, + secret_key: Optional[str] = None, + api_base: Optional[str] = None, + aws_region: Optional[str] = None, + embedding_dimension: Optional[int] = None, +) +``` + +**Parameters:** +- `model_name`: Provider-prefixed model (e.g., `openai/text-embedding-3-small`, `bedrock/amazon.titan-embed-text-v1`) +- `api_key`: API key for the provider (OpenAI, Cohere, etc.; not used for Bedrock) +- `api_base`: Custom API endpoint URL (optional) +- `aws_region`: AWS region for Bedrock (required for Bedrock) +- `embedding_dimension`: Expected dimension for validation (optional) + +**AWS Bedrock Notes:** +- Uses standard AWS credential chain for authentication (IAM roles, environment variables, ~/.aws/credentials) +- The `api_key` parameter is not used for Bedrock authentication +- The `aws_region` parameter is required for Bedrock + +### Factory Function + +```python +create_embeddings_client( + provider: str, + model_name: str, + model_dir: Optional[Path] = None, + cache_dir: Optional[Path] = None, + api_key: Optional[str] = None, + secret_key: Optional[str] = None, + api_base: Optional[str] = None, + aws_region: Optional[str] = None, + embedding_dimension: Optional[int] = None, +) -> EmbeddingsClient +``` + +Creates an embeddings client based on the provider type. + +**Parameters:** +- `provider`: "sentence-transformers" or "litellm" +- `model_name`: Model identifier +- `model_dir`: Local model directory (sentence-transformers only) +- `cache_dir`: Cache directory (sentence-transformers only) +- `api_key`: API key (litellm only; not used for Bedrock) +- `api_base`: Custom API endpoint (litellm only) +- `aws_region`: AWS region (litellm with Bedrock only) +- `embedding_dimension`: Expected dimension + +## Integration with FAISS Service + +The embeddings module integrates seamlessly with the existing FAISS search service: + +```python +# In registry/search/service.py +from registry.embeddings import create_embeddings_client + +class FaissService: + async def _load_embedding_model(self): + self.embedding_model = create_embeddings_client( + provider=settings.embeddings_provider, + model_name=settings.embeddings_model_name, + # ... other parameters from settings + ) +``` + +## Migration Guide + +### From Direct SentenceTransformer Usage + +**Before:** +```python +from sentence_transformers import SentenceTransformer + +model = SentenceTransformer("all-MiniLM-L6-v2") +embeddings = model.encode(texts) +``` + +**After:** +```python +from registry.embeddings import create_embeddings_client + +client = create_embeddings_client( + provider="sentence-transformers", + model_name="all-MiniLM-L6-v2", +) +embeddings = client.encode(texts) +``` + +### Switching to Cloud Provider + +Just update your `.env` file: + +```bash +# From +EMBEDDINGS_PROVIDER=sentence-transformers +EMBEDDINGS_MODEL_NAME=all-MiniLM-L6-v2 +EMBEDDINGS_MODEL_DIMENSIONS=384 + +# To +EMBEDDINGS_PROVIDER=litellm +EMBEDDINGS_MODEL_NAME=openai/text-embedding-3-small +EMBEDDINGS_MODEL_DIMENSIONS=1536 +EMBEDDINGS_API_KEY=your_openai_api_key +``` + +No code changes required! + +## Performance Considerations + +### Local Models (Sentence Transformers) +- **Pros**: No API costs, privacy, no network latency +- **Cons**: CPU/GPU requirements, model download size +- **Best for**: High-volume usage, sensitive data, offline operation + +### Cloud APIs (LiteLLM) +- **Pros**: No local resources, higher quality models, instant availability +- **Cons**: API costs, network dependency, data leaves premises +- **Best for**: Low-volume usage, rapid prototyping, maximum quality + +## Troubleshooting + +### LiteLLM Not Installed + +``` +RuntimeError: LiteLLM is not installed. Install it with: uv add litellm +``` + +**Solution:** +```bash +uv add litellm +``` + +### Dimension Mismatch + +``` +WARNING: Embedding dimension mismatch: expected 384, got 1536 +``` + +**Solution:** Update `EMBEDDINGS_MODEL_DIMENSIONS` to match your model's actual output. + +### API Authentication Errors + +For cloud providers, ensure your API key is correctly set: +- OpenAI: Set `EMBEDDINGS_API_KEY` +- Cohere: Set `EMBEDDINGS_API_KEY` +- Bedrock: Configure AWS credentials via standard AWS methods + +## Testing + +Run the test suite to verify the integration: + +```bash +# Create a test file +cat > test_embeddings.py << 'EOF' +from registry.embeddings import create_embeddings_client + +# Test sentence-transformers +client = create_embeddings_client( + provider="sentence-transformers", + model_name="all-MiniLM-L6-v2", +) +embeddings = client.encode(["test"]) +print(f"✓ Embeddings shape: {embeddings.shape}") +EOF + +# Run test +uv run python test_embeddings.py +``` + +## Contributing + +To add a new embeddings provider: + +1. Create a new client class inheriting from `EmbeddingsClient` +2. Implement `encode()` and `get_embedding_dimension()` methods +3. Update `create_embeddings_client()` factory function +4. Add configuration options to `registry/core/config.py` +5. Document in this README + +## License + +Apache 2.0 - See LICENSE file for details diff --git a/registry/embeddings/__init__.py b/registry/embeddings/__init__.py new file mode 100644 index 00000000..2078f2ed --- /dev/null +++ b/registry/embeddings/__init__.py @@ -0,0 +1,15 @@ +"""Embeddings module for vendor-agnostic embeddings generation.""" + +from .client import ( + EmbeddingsClient, + SentenceTransformersClient, + LiteLLMClient, + create_embeddings_client, +) + +__all__ = [ + "EmbeddingsClient", + "SentenceTransformersClient", + "LiteLLMClient", + "create_embeddings_client", +] diff --git a/registry/embeddings/client.py b/registry/embeddings/client.py new file mode 100644 index 00000000..53feb613 --- /dev/null +++ b/registry/embeddings/client.py @@ -0,0 +1,404 @@ +""" +Embeddings client abstraction for vendor-agnostic embeddings generation. + +This module provides a unified interface for generating embeddings from multiple +providers including local sentence-transformers models and cloud-based APIs via LiteLLM. +""" + +import logging +import os +from abc import ( + ABC, + abstractmethod, +) +from pathlib import Path +from typing import ( + List, + Optional, +) + +import numpy as np + + +logger = logging.getLogger(__name__) + + +class EmbeddingsClient(ABC): + """Abstract base class for embeddings generation clients.""" + + @abstractmethod + def encode( + self, + texts: List[str], + ) -> np.ndarray: + """ + Generate embeddings for a list of texts. + + Args: + texts: List of text strings to encode + + Returns: + NumPy array of embeddings with shape (len(texts), embedding_dimension) + + Raises: + RuntimeError: If encoding fails + """ + pass + + @abstractmethod + def get_embedding_dimension(self) -> int: + """ + Get the dimension of embeddings produced by this client. + + Returns: + Integer dimension of embedding vectors + """ + pass + + +class SentenceTransformersClient(EmbeddingsClient): + """Client for local sentence-transformers models.""" + + def __init__( + self, + model_name: str, + model_dir: Optional[Path] = None, + cache_dir: Optional[Path] = None, + ): + """ + Initialize the SentenceTransformers client. + + Args: + model_name: Name of the sentence-transformers model + model_dir: Optional local directory containing the model + cache_dir: Optional cache directory for downloaded models + """ + self.model_name = model_name + self.model_dir = model_dir + self.cache_dir = cache_dir + self._model: Optional["SentenceTransformer"] = None + self._dimension: Optional[int] = None + + def _load_model(self) -> None: + """Load the sentence-transformers model.""" + if self._model is not None: + return + + try: + from sentence_transformers import SentenceTransformer + + # Set cache directory if provided + original_st_home = os.environ.get("SENTENCE_TRANSFORMERS_HOME") + if self.cache_dir: + self.cache_dir.mkdir(parents=True, exist_ok=True) + os.environ["SENTENCE_TRANSFORMERS_HOME"] = str(self.cache_dir) + + # Check if local model exists + model_exists = ( + self.model_dir.exists() and any(self.model_dir.iterdir()) + if self.model_dir and self.model_dir.exists() + else False + ) + + if model_exists: + logger.info( + f"Loading SentenceTransformer model from local path: {self.model_dir}" + ) + self._model = SentenceTransformer(str(self.model_dir)) + else: + logger.info( + f"Local model not found, downloading from Hugging Face: {self.model_name}" + ) + self._model = SentenceTransformer(self.model_name) + + # Restore original environment variable + if original_st_home: + os.environ["SENTENCE_TRANSFORMERS_HOME"] = original_st_home + elif "SENTENCE_TRANSFORMERS_HOME" in os.environ: + del os.environ["SENTENCE_TRANSFORMERS_HOME"] + + # Get embedding dimension + self._dimension = self._model.get_sentence_embedding_dimension() + + logger.info( + f"SentenceTransformer model loaded successfully. Dimension: {self._dimension}" + ) + + except Exception as e: + logger.error( + f"Failed to load SentenceTransformer model: {e}", exc_info=True + ) + raise RuntimeError(f"Failed to load SentenceTransformer model: {e}") from e + + def encode( + self, + texts: List[str], + ) -> np.ndarray: + """ + Generate embeddings using sentence-transformers. + + Args: + texts: List of text strings to encode + + Returns: + NumPy array of embeddings + + Raises: + RuntimeError: If encoding fails + """ + if self._model is None: + self._load_model() + + try: + embeddings = self._model.encode(texts) + return np.array(embeddings, dtype=np.float32) + except Exception as e: + logger.error(f"Failed to encode texts: {e}", exc_info=True) + raise RuntimeError(f"Failed to encode texts: {e}") from e + + def get_embedding_dimension(self) -> int: + """ + Get the embedding dimension. + + Returns: + Integer dimension of embedding vectors + + Raises: + RuntimeError: If model is not loaded + """ + if self._dimension is None: + self._load_model() + return self._dimension + + +class LiteLLMClient(EmbeddingsClient): + """Client for cloud-based embeddings via LiteLLM.""" + + def __init__( + self, + model_name: str, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + aws_region: Optional[str] = None, + embedding_dimension: Optional[int] = None, + ): + """ + Initialize the LiteLLM client. + + Args: + model_name: LiteLLM model identifier (e.g., 'bedrock/amazon.titan-embed-text-v1', + 'openai/text-embedding-3-small', 'cohere/embed-english-v3.0') + api_key: Optional API key for the provider + api_base: Optional API base URL for the provider + aws_region: Optional AWS region for Bedrock + embedding_dimension: Expected embedding dimension (will be validated) + + Note: + For AWS Bedrock, this client uses the standard AWS credential chain + (IAM roles, ~/.aws/credentials, environment variables). The api_key + parameter is not used for Bedrock authentication. + """ + self.model_name = model_name + self.api_key = api_key + self.api_base = api_base + self.aws_region = aws_region + self._embedding_dimension = embedding_dimension + self._validated_dimension: Optional[int] = None + + # Set environment variables for LiteLLM + if self.api_key: + self._set_api_key_env() + if self.aws_region: + os.environ["AWS_REGION_NAME"] = self.aws_region + + def _set_api_key_env(self) -> None: + """Set the appropriate API key environment variable based on provider.""" + provider = self.model_name.split("/")[0].lower() + + # AWS Bedrock uses standard AWS credential chain (IAM roles, env vars, ~/.aws/credentials) + # No need to set API key environment variable for Bedrock + if provider == "bedrock": + logger.info( + "Using standard AWS credential chain for Bedrock authentication" + ) + return + + # Handle other providers with API keys + env_var_mapping = { + "openai": "OPENAI_API_KEY", + "cohere": "COHERE_API_KEY", + "azure": "AZURE_API_KEY", + "anthropic": "ANTHROPIC_API_KEY", + } + + env_var = env_var_mapping.get(provider) + if env_var and self.api_key: + os.environ[env_var] = self.api_key + logger.debug(f"Set {env_var} environment variable for {provider}") + + def encode( + self, + texts: List[str], + ) -> np.ndarray: + """ + Generate embeddings using LiteLLM. + + Args: + texts: List of text strings to encode + + Returns: + NumPy array of embeddings + + Raises: + RuntimeError: If encoding fails or LiteLLM is not installed + """ + try: + from litellm import embedding + except ImportError as e: + logger.error("LiteLLM is not installed. Install it with: uv add litellm") + raise RuntimeError( + "LiteLLM is not installed. Install it with: uv add litellm" + ) from e + + try: + # LiteLLM expects 'input' parameter + kwargs = {"model": self.model_name, "input": texts} + + if self.api_base: + kwargs["api_base"] = self.api_base + + logger.debug( + f"Calling LiteLLM embedding API with model: {self.model_name}" + ) + response = embedding(**kwargs) + + # Extract embeddings from response + embeddings_list = [item["embedding"] for item in response["data"]] + embeddings_array = np.array(embeddings_list, dtype=np.float32) + + # Validate dimension on first call + if self._validated_dimension is None: + self._validated_dimension = embeddings_array.shape[1] + if ( + self._embedding_dimension + and self._validated_dimension != self._embedding_dimension + ): + logger.warning( + f"Embedding dimension mismatch: expected {self._embedding_dimension}, " + f"got {self._validated_dimension}" + ) + + logger.debug( + f"Generated {len(embeddings_list)} embeddings with dimension {self._validated_dimension}" + ) + return embeddings_array + + except Exception as e: + logger.error(f"Failed to generate embeddings via LiteLLM: {e}", exc_info=True) + raise RuntimeError(f"Failed to generate embeddings via LiteLLM: {e}") from e + + def get_embedding_dimension(self) -> int: + """ + Get the embedding dimension. + + Returns: + Integer dimension of embedding vectors + + Raises: + RuntimeError: If dimension cannot be determined + """ + # If we have a validated dimension from actual API calls, use that + if self._validated_dimension is not None: + return self._validated_dimension + + # Otherwise, use the configured dimension if provided + if self._embedding_dimension is not None: + return self._embedding_dimension + + # As a last resort, make a test call with a simple string + logger.info( + "Embedding dimension not known, making test call to determine dimension" + ) + try: + test_embedding = self.encode(["test"]) + return test_embedding.shape[1] + except Exception as e: + logger.error( + f"Failed to determine embedding dimension: {e}", exc_info=True + ) + raise RuntimeError( + f"Failed to determine embedding dimension: {e}. " + "Consider setting EMBEDDINGS_DIMENSION in configuration." + ) from e + + +def create_embeddings_client( + provider: str, + model_name: str, + model_dir: Optional[Path] = None, + cache_dir: Optional[Path] = None, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + aws_region: Optional[str] = None, + embedding_dimension: Optional[int] = None, +) -> EmbeddingsClient: + """ + Factory function to create an embeddings client based on provider. + + Args: + provider: Provider type ('sentence-transformers' or 'litellm') + model_name: Model identifier + model_dir: Optional local model directory (sentence-transformers only) + cache_dir: Optional cache directory (sentence-transformers only) + api_key: Optional API key (litellm only) + api_base: Optional API base URL (litellm only) + aws_region: Optional AWS region (litellm with Bedrock only) + embedding_dimension: Optional embedding dimension + + Returns: + EmbeddingsClient instance + + Raises: + ValueError: If provider is not supported + + Note: + For AWS Bedrock, AWS credentials should be configured via standard AWS + credential chain (IAM roles, environment variables, ~/.aws/credentials). + """ + provider_lower = provider.lower() + + if provider_lower == "sentence-transformers": + logger.info( + f"Creating SentenceTransformersClient with model: {model_name}" + ) + return SentenceTransformersClient( + model_name=model_name, + model_dir=model_dir, + cache_dir=cache_dir, + ) + + elif provider_lower == "litellm": + # Validate that model name has provider prefix + if "/" not in model_name: + raise ValueError( + f"Invalid model name for LiteLLM provider: '{model_name}'. " + f"LiteLLM requires provider-prefixed model names. " + f"Examples: 'openai/text-embedding-3-small', 'bedrock/amazon.titan-embed-text-v1', " + f"'cohere/embed-english-v3.0'. " + f"If you want to use '{model_name}', set EMBEDDINGS_PROVIDER=sentence-transformers" + ) + + logger.info(f"Creating LiteLLMClient with model: {model_name}") + return LiteLLMClient( + model_name=model_name, + api_key=api_key, + api_base=api_base, + aws_region=aws_region, + embedding_dimension=embedding_dimension, + ) + + else: + raise ValueError( + f"Unsupported embeddings provider: {provider}. " + "Supported providers: 'sentence-transformers', 'litellm'" + ) diff --git a/registry/search/service.py b/registry/search/service.py index 20354a7b..8d78969a 100644 --- a/registry/search/service.py +++ b/registry/search/service.py @@ -14,12 +14,15 @@ import faiss import numpy as np -from sentence_transformers import SentenceTransformer from pydantic import HttpUrl from ..core.config import settings from ..core.schemas import ServerInfo from ..schemas.agent_models import AgentCard +from ..embeddings import ( + EmbeddingsClient, + create_embeddings_client, +) logger = logging.getLogger(__name__) @@ -41,9 +44,9 @@ def default( class FaissService: """Service for managing FAISS vector database operations.""" - + def __init__(self): - self.embedding_model: Optional[SentenceTransformer] = None + self.embedding_model: Optional[EmbeddingsClient] = None self.faiss_index: Optional[faiss.IndexIDMap] = None self.metadata_store: Dict[str, Dict[str, Any]] = {} self.next_id_counter: int = 0 @@ -54,41 +57,58 @@ async def initialize(self): await self._load_faiss_data() async def _load_embedding_model(self): - """Load the sentence transformer model.""" - logger.info("Loading FAISS data and embedding model...") - + """Load the embeddings model using the configured provider.""" + logger.info( + f"Loading embedding model with provider: {settings.embeddings_provider}" + ) + # Ensure servers directory exists settings.servers_dir.mkdir(parents=True, exist_ok=True) - + try: + # Prepare cache directory for sentence-transformers model_cache_path = settings.container_registry_dir / ".cache" model_cache_path.mkdir(parents=True, exist_ok=True) - - # Set cache path for sentence transformers - import os - original_st_home = os.environ.get('SENTENCE_TRANSFORMERS_HOME') - os.environ['SENTENCE_TRANSFORMERS_HOME'] = str(model_cache_path) - - # Check if local model exists - model_path = settings.embeddings_model_dir - model_exists = model_path.exists() and any(model_path.iterdir()) if model_path.exists() else False - - if model_exists: - logger.info(f"Loading SentenceTransformer model from local path: {settings.embeddings_model_dir}") - self.embedding_model = SentenceTransformer(str(settings.embeddings_model_dir)) - else: - logger.info(f"Local model not found at {settings.embeddings_model_dir}, downloading from Hugging Face") - self.embedding_model = SentenceTransformer(str(settings.embeddings_model_name)) - - # Restore original environment variable - if original_st_home: - os.environ['SENTENCE_TRANSFORMERS_HOME'] = original_st_home - else: - del os.environ['SENTENCE_TRANSFORMERS_HOME'] - - logger.info("SentenceTransformer model loaded successfully.") + + # Create embeddings client using factory + self.embedding_model = create_embeddings_client( + provider=settings.embeddings_provider, + model_name=settings.embeddings_model_name, + model_dir=settings.embeddings_model_dir + if settings.embeddings_provider == "sentence-transformers" + else None, + cache_dir=model_cache_path + if settings.embeddings_provider == "sentence-transformers" + else None, + api_key=settings.embeddings_api_key + if settings.embeddings_provider == "litellm" + else None, + api_base=settings.embeddings_api_base + if settings.embeddings_provider == "litellm" + else None, + aws_region=settings.embeddings_aws_region + if settings.embeddings_provider == "litellm" + else None, + embedding_dimension=settings.embeddings_model_dimensions, + ) + + # Get and log the embedding dimension + embedding_dim = self.embedding_model.get_embedding_dimension() + logger.info( + f"Embedding model loaded successfully. Provider: {settings.embeddings_provider}, " + f"Model: {settings.embeddings_model_name}, Dimension: {embedding_dim}" + ) + + # Warn if dimension doesn't match configuration + if embedding_dim != settings.embeddings_model_dimensions: + logger.warning( + f"Embedding dimension mismatch: configured={settings.embeddings_model_dimensions}, " + f"actual={embedding_dim}. Using actual dimension." + ) + settings.embeddings_model_dimensions = embedding_dim + except Exception as e: - logger.error(f"Failed to load SentenceTransformer model: {e}", exc_info=True) + logger.error(f"Failed to load embedding model: {e}", exc_info=True) self.embedding_model = None async def _load_faiss_data(self): diff --git a/servers/mcpgw/server.py b/servers/mcpgw/server.py index 8c9b1b7d..01a38808 100644 --- a/servers/mcpgw/server.py +++ b/servers/mcpgw/server.py @@ -16,12 +16,17 @@ from typing import Dict, Any, Optional, ClassVar, List from dotenv import load_dotenv import os -from sentence_transformers import SentenceTransformer # Added import numpy as np # Added from sklearn.metrics.pairwise import cosine_similarity # Added import faiss # Added import yaml # Added for scopes.yml parsing +# Import embeddings client from registry +import sys +from pathlib import Path +sys.path.insert(0, str(Path(__file__).parent.parent / "registry")) +from embeddings import create_embeddings_client, EmbeddingsClient + # Configure logging logging.basicConfig( level=logging.INFO, @@ -500,9 +505,9 @@ async def check_user_permission_for_tool(auth_context: Dict[str, Any], tool_name return False -# --- FAISS and Sentence Transformer Integration for mcpgw --- START +# --- FAISS and Embeddings Integration for mcpgw --- START _faiss_data_lock = asyncio.Lock() -_embedding_model_mcpgw: Optional[SentenceTransformer] = None +_embedding_model_mcpgw: Optional[EmbeddingsClient] = None _faiss_index_mcpgw: Optional[faiss.Index] = None _faiss_metadata_mcpgw: Optional[Dict[str, Any]] = None # This will store the content of service_index_metadata.json _last_faiss_index_mtime: Optional[float] = None @@ -517,10 +522,6 @@ async def check_user_permission_for_tool(auth_context: Dict[str, Any], tool_name FAISS_METADATA_PATH_MCPGW = _registry_server_data_path / "service_index_metadata.json" EMBEDDING_DIMENSION_MCPGW = 384 # Should match the one used in main registry -# Get configuration from environment variables -EMBEDDINGS_MODEL_NAME = os.environ.get('EMBEDDINGS_MODEL_NAME', 'all-MiniLM-L6-v2') -EMBEDDINGS_MODEL_DIR = _registry_server_data_path.parent / "models" / EMBEDDINGS_MODEL_NAME - async def load_faiss_data_for_mcpgw(): """Loads the FAISS index, metadata, and embedding model for the mcpgw server. Reloads data if underlying files have changed since last load. @@ -532,33 +533,35 @@ async def load_faiss_data_for_mcpgw(): # Load embedding model if not already loaded (model doesn't change on disk typically) if _embedding_model_mcpgw is None: try: - model_cache_path = _registry_server_data_path.parent / ".cache" - model_cache_path.mkdir(parents=True, exist_ok=True) - - # Set SENTENCE_TRANSFORMERS_HOME to use the defined cache path - original_st_home = os.environ.get('SENTENCE_TRANSFORMERS_HOME') - os.environ['SENTENCE_TRANSFORMERS_HOME'] = str(model_cache_path) - - # Check if the model path exists and is not empty - model_path = Path(EMBEDDINGS_MODEL_DIR) - model_exists = model_path.exists() and any(model_path.iterdir()) if model_path.exists() else False - - if model_exists: - logger.info(f"MCPGW: Loading SentenceTransformer model from local path: {EMBEDDINGS_MODEL_DIR}") - _embedding_model_mcpgw = await asyncio.to_thread(SentenceTransformer, str(EMBEDDINGS_MODEL_DIR)) - else: - logger.info(f"MCPGW: Local model not found at {EMBEDDINGS_MODEL_DIR}, downloading from Hugging Face") - _embedding_model_mcpgw = await asyncio.to_thread(SentenceTransformer, str(EMBEDDINGS_MODEL_NAME)) - - # Restore original environment variable if it was set - if original_st_home: - os.environ['SENTENCE_TRANSFORMERS_HOME'] = original_st_home - else: - del os.environ['SENTENCE_TRANSFORMERS_HOME'] # Remove if not originally set - - logger.info("MCPGW: SentenceTransformer model loaded successfully.") + # Get embeddings configuration from environment + embeddings_provider = os.environ.get('EMBEDDINGS_PROVIDER', 'sentence-transformers') + embeddings_model_name = os.environ.get('EMBEDDINGS_MODEL_NAME', 'all-MiniLM-L6-v2') + embeddings_api_key = os.environ.get('EMBEDDINGS_API_KEY') + embeddings_api_base = os.environ.get('EMBEDDINGS_API_BASE') + embeddings_aws_region = os.environ.get('EMBEDDINGS_AWS_REGION', 'us-east-1') + embeddings_model_dimensions = int(os.environ.get('EMBEDDINGS_MODEL_DIMENSIONS', '384')) + + logger.info(f"MCPGW: Loading embeddings model with provider: {embeddings_provider}, model: {embeddings_model_name}") + + # Compute model directory for sentence-transformers + embeddings_model_dir = _registry_server_data_path.parent / "models" / embeddings_model_name if embeddings_provider == 'sentence-transformers' else None + + # Create embeddings client using the factory function + _embedding_model_mcpgw = await asyncio.to_thread( + create_embeddings_client, + provider=embeddings_provider, + model_name=embeddings_model_name, + model_dir=embeddings_model_dir, + cache_dir=_registry_server_data_path.parent / ".cache" if embeddings_provider == 'sentence-transformers' else None, + api_key=embeddings_api_key if embeddings_provider == 'litellm' else None, + api_base=embeddings_api_base if embeddings_provider == 'litellm' else None, + aws_region=embeddings_aws_region if embeddings_provider == 'litellm' else None, + embedding_dimension=embeddings_model_dimensions, + ) + + logger.info(f"MCPGW: Embeddings client loaded successfully. Provider: {embeddings_provider}, Model: {embeddings_model_name}") except Exception as e: - logger.error(f"MCPGW: Failed to load SentenceTransformer model: {e}", exc_info=True) + logger.error(f"MCPGW: Failed to load embeddings client: {e}", exc_info=True) return # Cannot proceed without the model for subsequent logic # Check FAISS index file diff --git a/uv.lock b/uv.lock index baf208b0..db21698b 100644 --- a/uv.lock +++ b/uv.lock @@ -1216,6 +1216,7 @@ dependencies = [ { name = "langchain-aws" }, { name = "langchain-mcp-adapters" }, { name = "langgraph" }, + { name = "litellm" }, { name = "matplotlib" }, { name = "mcp" }, { name = "psutil" }, @@ -1284,6 +1285,7 @@ requires-dist = [ { name = "langchain-aws", specifier = ">=0.2.23" }, { name = "langchain-mcp-adapters", specifier = ">=0.0.11" }, { name = "langgraph", specifier = ">=0.4.3" }, + { name = "litellm", specifier = ">=1.50.0" }, { name = "matplotlib", specifier = ">=3.10.5" }, { name = "mcp", specifier = ">=1.9.3" }, { name = "mkdocs", marker = "extra == 'docs'", specifier = ">=1.5.0" },