eval-protocol
diff --git a/‎eval_protocol/proxy/Dockerfile.gateway‎
Lines changed: 2 additions & 2 deletions b/‎eval_protocol/proxy/Dockerfile.gateway‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎eval_protocol/proxy/README.md‎
Lines changed: 9 additions & 9 deletions b/‎eval_protocol/proxy/README.md‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎eval_protocol/proxy/config_no_cache.yaml‎
Lines changed: 1 addition & 2 deletions b/‎eval_protocol/proxy/config_no_cache.yaml‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎eval_protocol/proxy/docker-compose.yml‎
Lines changed: 4 additions & 27 deletions b/‎eval_protocol/proxy/docker-compose.yml‎
Lines changed: 4 additions & 27 deletions
diff --git a/‎eval_protocol/proxy/proxy_core/app.py‎
Lines changed: 5 additions & 15 deletions b/‎eval_protocol/proxy/proxy_core/app.py‎
Lines changed: 5 additions & 15 deletions
@@ -1,4 +1,4 @@
-# Metadata Extraction Gateway - Sits in front of LiteLLM
+# Metadata Extraction Gateway - Uses LiteLLM SDK directly with Langfuse OTEL
 FROM python:3.11-slim
 
 WORKDIR /app
@@ -19,5 +19,5 @@ COPY ./proxy_core /app/proxy_core
 EXPOSE 4000
 
 # Run the gateway as a module
-# LITELLM_URL will be set by environment (docker-compose or Cloud Run)
+# LANGFUSE_HOST and REDIS_HOST will be set by environment (docker-compose or Cloud Run)
 CMD ["python", "-m", "proxy_core.main"]
@@ -59,9 +59,9 @@ This enables distributed evaluation systems to track which LLM completions belon
    - Stores insertion IDs per rollout for completeness checking
    - Uses Redis Sets: `rollout_id -> {insertion_id_1, insertion_id_2, ...}`
 
-#### 3. **LiteLLM Backend**
-   - Standard LiteLLM proxy for routing to LLM providers
-   - Configured with Langfuse callbacks for automatic tracing
+#### 3. **LiteLLM SDK (Direct)**
+   - Uses LiteLLM SDK directly for LLM calls (no separate proxy server needed)
+   - Integrated with Langfuse via `langfuse_otel` OpenTelemetry callback
 
 ## Key Features
 
@@ -244,12 +244,11 @@ Forwards any other request to LiteLLM backend with API key injection.
 
 | Variable | Required | Default | Description |
 |----------|----------|---------|-------------|
-| `LITELLM_URL` | Yes | - | URL of LiteLLM backend |
 | `REDIS_HOST` | Yes | - | Redis hostname |
 | `REDIS_PORT` | No | 6379 | Redis port |
 | `REDIS_PASSWORD` | No | - | Redis password |
 | `SECRETS_PATH` | No | `proxy_core/secrets.yaml` | Path to secrets file (YAML) |
-| `LANGFUSE_HOST` | No | `https://cloud.langfuse.com` | Langfuse base URL |
+| `LANGFUSE_HOST` | No | `https://us.cloud.langfuse.com` | Langfuse OTEL host for tracing |
 | `REQUEST_TIMEOUT` | No | 300.0 | Request timeout (LLM calls) in seconds |
 | `LOG_LEVEL` | No | INFO | Logging level |
 | `PORT` | No | 4000 | Gateway port |
@@ -272,25 +271,26 @@ default_project_id: project-1
 
 ### LiteLLM Configuration
 
-The `config_no_cache.yaml` configures LiteLLM:
+The `config_no_cache.yaml` configures LiteLLM (only needed if running a standalone LiteLLM proxy):
 ```yaml
 model_list:
   - model_name: "*"
     litellm_params:
       model: "*"
 litellm_settings:
-  success_callback: ["langfuse"]
-  failure_callback: ["langfuse"]
+  callbacks: ["langfuse_otel"]
   drop_params: True
 general_settings:
   allow_client_side_credentials: true
 ```
 
 Key settings:
 - **Wildcard model support**: Route any model to any provider
-- **Langfuse callbacks**: Automatic tracing on success/failure
+- **Langfuse OTEL**: OpenTelemetry-based tracing via `langfuse_otel` callback
 - **Client-side credentials**: Accept API keys from request body
 
+**Note:** The proxy now uses the LiteLLM SDK directly with `langfuse_otel` integration, so a separate LiteLLM proxy server is no longer required.
+
 ## Security Considerations
 
 ### Authentication
 
@@ -3,8 +3,7 @@ model_list:
     litellm_params:
       model: "*"
 litellm_settings:
-  success_callback: ["langfuse"]
-  failure_callback: ["langfuse"]
+  callbacks: ["langfuse_otel"]
   drop_params: True
 general_settings:
   allow_client_side_credentials: true
@@ -7,41 +7,19 @@ services:
     ports:
       - "6379:6379"  # Expose for debugging if needed
     networks:
-      - litellm-network
+      - proxy-network
     restart: unless-stopped
     command: redis-server --appendonly yes
     volumes:
       - redis-data:/data
 
-  # LiteLLM Backend - Handles actual LLM proxying
-  litellm-backend:
-    image: litellm/litellm:v1.77.3-stable
-    platform: linux/amd64
-    container_name: litellm-backend
-    command: ["--config", "/app/config.yaml", "--port", "4000", "--host", "0.0.0.0"]
-    # If you want to be able to use other model providers like OpenAI, Anthropic, etc., you need to set keys in .env file.
-    env_file:
-      - .env  # Load API keys from .env file
-    environment:
-      - LANGFUSE_PUBLIC_KEY=dummy  # Set dummy public and private key so Langfuse instance initializes in LiteLLM, then real keys get sent in proxy
-      - LANGFUSE_SECRET_KEY=dummy
-    volumes:
-      - ./config_no_cache.yaml:/app/config.yaml:ro
-    ports:
-      - "4001:4000"  # Expose on 4001 for direct access if needed
-    networks:
-      - litellm-network
-    restart: unless-stopped
-
-  # Metadata Gateway - Public-facing service that extracts metadata from URLs
+  # Metadata Gateway - Handles LLM calls directly via LiteLLM SDK with Langfuse OTEL
   metadata-gateway:
     build:
       context: .
       dockerfile: Dockerfile.gateway
     container_name: metadata-gateway
     environment:
-      # Point to the LiteLLM backend service
-      - LITELLM_URL=http://litellm-backend:4000
       - PORT=4000
       # Redis configuration for assistant message counting
       - REDIS_HOST=redis
@@ -56,14 +34,13 @@ services:
     ports:
       - "4000:4000"  # Main public-facing port
     networks:
-      - litellm-network
+      - proxy-network
     depends_on:
-      - litellm-backend
       - redis
     restart: unless-stopped
 
 networks:
-  litellm-network:
+  proxy-network:
     driver: bridge
 
 volumes:
 
@@ -15,7 +15,7 @@
 
 from .models import ProxyConfig, LangfuseTracesResponse, TracesParams, ChatParams, ChatRequestHook, TracesRequestHook
 from .auth import AuthProvider, NoAuthProvider
-from .litellm import handle_chat_completion, proxy_to_litellm
+from .litellm import handle_chat_completion
 from .langfuse import fetch_langfuse_traces, pointwise_fetch_langfuse_trace
 
 # Configure logging before any other imports (so all modules inherit this config)
@@ -35,10 +35,6 @@ def build_proxy_config(
     preprocess_traces_request: Optional[TracesRequestHook] = None,
 ) -> ProxyConfig:
     """Load environment and secrets, and build ProxyConfig"""
-    # Env
-    litellm_url = os.getenv("LITELLM_URL")
-    if not litellm_url:
-        raise ValueError("LITELLM_URL environment variable must be set")
     request_timeout = float(os.getenv("REQUEST_TIMEOUT", "300.0"))
     langfuse_host = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")
 
@@ -66,7 +62,6 @@ def build_proxy_config(
         raise ValueError(f"Invalid format in secrets file {secrets_path.name}: {e}")
 
     return ProxyConfig(
-        litellm_url=litellm_url,
         request_timeout=request_timeout,
         langfuse_host=langfuse_host,
         langfuse_keys=langfuse_keys,
@@ -113,6 +108,10 @@ async def lifespan(app: FastAPI):
         app.state.config = build_proxy_config(preprocess_chat_request, preprocess_traces_request)
         app.state.redis = init_redis()
 
+        import litellm
+
+        litellm.callbacks = ["langfuse_otel"]
+
         try:
             yield
         finally:
@@ -297,13 +296,4 @@ async def pointwise_get_langfuse_trace(
     async def health():
         return {"status": "healthy", "service": "metadata-proxy"}
 
-    # Catch-all
-    @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH"])
-    async def catch_all_proxy(
-        path: str,
-        request: Request,
-        config: ProxyConfig = Depends(get_config),
-    ):
-        return await proxy_to_litellm(config, path, request)
-
     return app