Skip to content

Commit 0f3c471

Browse files
authored
revert (#434)
* Revert "use litellm sdk (#424)" This reverts commit acba670. * Revert "add finish reason (#421)" This reverts commit 1d07878.
1 parent 5ac0bb4 commit 0f3c471

File tree

15 files changed

+210
-311
lines changed

15 files changed

+210
-311
lines changed

eval_protocol/adapters/fireworks_tracing.py

Lines changed: 3 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,8 @@
88
import logging
99
import requests
1010
from datetime import datetime
11-
import ast
12-
import json
13-
import os
1411
from typing import Any, Dict, List, Optional, Protocol
12+
import os
1513

1614
from eval_protocol.models import EvaluationRow, InputMetadata, ExecutionMetadata, Message
1715
from .base import BaseAdapter
@@ -46,43 +44,6 @@ def __call__(
4644
...
4745

4846

49-
def extract_otel_attributes(observations: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
50-
"""Attempt to extract and parse attributes from raw_gen_ai_request observation. This only works when stored in OTEL format.
51-
52-
Args:
53-
observations: List of observation dictionaries from the trace
54-
55-
Returns:
56-
Dict with all attributes parsed. Or None if not found.
57-
"""
58-
for obs in observations:
59-
if obs.get("name") == "raw_gen_ai_request" and obs.get("type") == "SPAN":
60-
metadata = obs.get("metadata") or {}
61-
attributes = metadata.get("attributes") or {}
62-
63-
result: Dict[str, Any] = {}
64-
65-
for key, value in attributes.items():
66-
# Try to parse stringified objects (could be Python repr or JSON)
67-
if isinstance(value, str) and value.startswith(("[", "{")):
68-
try:
69-
result[key] = ast.literal_eval(value)
70-
except Exception as e:
71-
logger.debug("Failed to parse %s with ast.literal_eval: %s", key, e)
72-
try:
73-
result[key] = json.loads(value)
74-
except Exception as e:
75-
logger.debug("Failed to parse %s with json.loads: %s", key, e)
76-
result[key] = value
77-
else:
78-
result[key] = value
79-
80-
if result:
81-
return result
82-
83-
return None
84-
85-
8647
def convert_trace_dict_to_evaluation_row(
8748
trace: Dict[str, Any], include_tool_calls: bool = True, span_name: Optional[str] = None
8849
) -> Optional[EvaluationRow]:
@@ -135,19 +96,6 @@ def convert_trace_dict_to_evaluation_row(
13596
):
13697
break # Break early if we've found all the metadata we need
13798

138-
observations = trace.get("observations") or []
139-
# We can only extract when stored in OTEL format.
140-
otel_attributes = extract_otel_attributes(observations)
141-
if otel_attributes:
142-
# Find choices from any provider (llm.*.choices pattern)
143-
choices = None
144-
for key, value in otel_attributes.items():
145-
if key.endswith(".choices") and isinstance(value, list):
146-
choices = value
147-
break
148-
if choices and len(choices) > 0:
149-
execution_metadata.finish_reason = choices[0].get("finish_reason")
150-
15199
return EvaluationRow(
152100
messages=messages,
153101
tools=tools,
@@ -212,7 +160,7 @@ def extract_messages_from_trace_dict(
212160
# Fallback: use the last GENERATION observation which typically contains full chat history
213161
if not messages:
214162
try:
215-
all_observations = trace.get("observations") or []
163+
all_observations = trace.get("observations", [])
216164
gens = [obs for obs in all_observations if obs.get("type") == "GENERATION"]
217165
if gens:
218166
gens.sort(key=lambda x: x.get("start_time", ""))
@@ -238,7 +186,7 @@ def get_final_generation_in_span_dict(trace: Dict[str, Any], span_name: str) ->
238186
The final generation dictionary, or None if not found
239187
"""
240188
# Get all observations from the trace
241-
all_observations = trace.get("observations") or []
189+
all_observations = trace.get("observations", [])
242190

243191
# Find a span with the given name that has generation children
244192
parent_span = None
Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,23 @@
1-
# Metadata Extraction Gateway - Uses LiteLLM SDK directly with Langfuse OTEL
1+
# Metadata Extraction Gateway - Sits in front of LiteLLM
22
FROM python:3.11-slim
33

44
WORKDIR /app
55

66
# Prevent Python from buffering stdout/stderr
77
ENV PYTHONUNBUFFERED=1
88

9-
# Copy the entire package for local install (context is repo root)
10-
COPY pyproject.toml /app/pyproject.toml
11-
COPY eval_protocol /app/eval_protocol
12-
COPY README.md /app/README.md
9+
# Copy requirements file
10+
COPY ./requirements.txt /app/requirements.txt
1311

14-
# Install from local source with proxy extras
15-
RUN pip install --no-cache-dir ".[proxy]"
12+
# Install dependencies
13+
RUN pip install --no-cache-dir -r requirements.txt
1614

17-
# Copy the proxy package (local overrides for main.py, auth.py, etc.)
18-
COPY eval_protocol/proxy/proxy_core /app/proxy_core
15+
# Copy the proxy package
16+
COPY ./proxy_core /app/proxy_core
1917

2018
# Expose port
2119
EXPOSE 4000
2220

2321
# Run the gateway as a module
24-
# LANGFUSE_HOST and REDIS_HOST will be set by environment (docker-compose or Cloud Run)
22+
# LITELLM_URL will be set by environment (docker-compose or Cloud Run)
2523
CMD ["python", "-m", "proxy_core.main"]

eval_protocol/proxy/README.md

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,9 @@ This enables distributed evaluation systems to track which LLM completions belon
5959
- Stores insertion IDs per rollout for completeness checking
6060
- Uses Redis Sets: `rollout_id -> {insertion_id_1, insertion_id_2, ...}`
6161

62-
#### 3. **LiteLLM SDK (Direct)**
63-
- Uses LiteLLM SDK directly for LLM calls (no separate proxy server needed)
64-
- Integrated with Langfuse via `langfuse_otel` OpenTelemetry callback
62+
#### 3. **LiteLLM Backend**
63+
- Standard LiteLLM proxy for routing to LLM providers
64+
- Configured with Langfuse callbacks for automatic tracing
6565

6666
## Key Features
6767

@@ -244,11 +244,12 @@ Forwards any other request to LiteLLM backend with API key injection.
244244

245245
| Variable | Required | Default | Description |
246246
|----------|----------|---------|-------------|
247+
| `LITELLM_URL` | Yes | - | URL of LiteLLM backend |
247248
| `REDIS_HOST` | Yes | - | Redis hostname |
248249
| `REDIS_PORT` | No | 6379 | Redis port |
249250
| `REDIS_PASSWORD` | No | - | Redis password |
250251
| `SECRETS_PATH` | No | `proxy_core/secrets.yaml` | Path to secrets file (YAML) |
251-
| `LANGFUSE_HOST` | No | `https://us.cloud.langfuse.com` | Langfuse OTEL host for tracing |
252+
| `LANGFUSE_HOST` | No | `https://cloud.langfuse.com` | Langfuse base URL |
252253
| `REQUEST_TIMEOUT` | No | 300.0 | Request timeout (LLM calls) in seconds |
253254
| `LOG_LEVEL` | No | INFO | Logging level |
254255
| `PORT` | No | 4000 | Gateway port |
@@ -271,26 +272,25 @@ default_project_id: project-1
271272

272273
### LiteLLM Configuration
273274

274-
The `config_no_cache.yaml` configures LiteLLM (only needed if running a standalone LiteLLM proxy):
275+
The `config_no_cache.yaml` configures LiteLLM:
275276
```yaml
276277
model_list:
277278
- model_name: "*"
278279
litellm_params:
279280
model: "*"
280281
litellm_settings:
281-
callbacks: ["langfuse_otel"]
282+
success_callback: ["langfuse"]
283+
failure_callback: ["langfuse"]
282284
drop_params: True
283285
general_settings:
284286
allow_client_side_credentials: true
285287
```
286288

287289
Key settings:
288290
- **Wildcard model support**: Route any model to any provider
289-
- **Langfuse OTEL**: OpenTelemetry-based tracing via `langfuse_otel` callback
291+
- **Langfuse callbacks**: Automatic tracing on success/failure
290292
- **Client-side credentials**: Accept API keys from request body
291293

292-
**Note:** The proxy now uses the LiteLLM SDK directly with `langfuse_otel` integration, so a separate LiteLLM proxy server is no longer required.
293-
294294
## Security Considerations
295295

296296
### Authentication

eval_protocol/proxy/config_no_cache.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@ model_list:
33
litellm_params:
44
model: "*"
55
litellm_settings:
6-
callbacks: ["langfuse_otel"]
6+
success_callback: ["langfuse"]
7+
failure_callback: ["langfuse"]
78
drop_params: True
89
general_settings:
910
allow_client_side_credentials: true

eval_protocol/proxy/docker-compose.yml

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,41 @@ services:
77
ports:
88
- "6379:6379" # Expose for debugging if needed
99
networks:
10-
- proxy-network
10+
- litellm-network
1111
restart: unless-stopped
1212
command: redis-server --appendonly yes
1313
volumes:
1414
- redis-data:/data
1515

16-
# Metadata Gateway - Handles LLM calls directly via LiteLLM SDK with Langfuse OTEL
16+
# LiteLLM Backend - Handles actual LLM proxying
17+
litellm-backend:
18+
image: litellm/litellm:v1.77.3-stable
19+
platform: linux/amd64
20+
container_name: litellm-backend
21+
command: ["--config", "/app/config.yaml", "--port", "4000", "--host", "0.0.0.0"]
22+
# If you want to be able to use other model providers like OpenAI, Anthropic, etc., you need to set keys in .env file.
23+
env_file:
24+
- .env # Load API keys from .env file
25+
environment:
26+
- LANGFUSE_PUBLIC_KEY=dummy # Set dummy public and private key so Langfuse instance initializes in LiteLLM, then real keys get sent in proxy
27+
- LANGFUSE_SECRET_KEY=dummy
28+
volumes:
29+
- ./config_no_cache.yaml:/app/config.yaml:ro
30+
ports:
31+
- "4001:4000" # Expose on 4001 for direct access if needed
32+
networks:
33+
- litellm-network
34+
restart: unless-stopped
35+
36+
# Metadata Gateway - Public-facing service that extracts metadata from URLs
1737
metadata-gateway:
1838
build:
19-
context: ../..
20-
dockerfile: eval_protocol/proxy/Dockerfile.gateway
39+
context: .
40+
dockerfile: Dockerfile.gateway
2141
container_name: metadata-gateway
2242
environment:
43+
# Point to the LiteLLM backend service
44+
- LITELLM_URL=http://litellm-backend:4000
2345
- PORT=4000
2446
# Redis configuration for assistant message counting
2547
- REDIS_HOST=redis
@@ -34,13 +56,14 @@ services:
3456
ports:
3557
- "4000:4000" # Main public-facing port
3658
networks:
37-
- proxy-network
59+
- litellm-network
3860
depends_on:
61+
- litellm-backend
3962
- redis
4063
restart: unless-stopped
4164

4265
networks:
43-
proxy-network:
66+
litellm-network:
4467
driver: bridge
4568

4669
volumes:

eval_protocol/proxy/proxy_core/app.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
from .models import ProxyConfig, LangfuseTracesResponse, TracesParams, ChatParams, ChatRequestHook, TracesRequestHook
1717
from .auth import AuthProvider, NoAuthProvider
18-
from .litellm import handle_chat_completion
18+
from .litellm import handle_chat_completion, proxy_to_litellm
1919
from .langfuse import fetch_langfuse_traces, pointwise_fetch_langfuse_trace
2020

2121
# Configure logging before any other imports (so all modules inherit this config)
@@ -35,6 +35,10 @@ def build_proxy_config(
3535
preprocess_traces_request: Optional[TracesRequestHook] = None,
3636
) -> ProxyConfig:
3737
"""Load environment and secrets, and build ProxyConfig"""
38+
# Env
39+
litellm_url = os.getenv("LITELLM_URL")
40+
if not litellm_url:
41+
raise ValueError("LITELLM_URL environment variable must be set")
3842
request_timeout = float(os.getenv("REQUEST_TIMEOUT", "300.0"))
3943
langfuse_host = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")
4044

@@ -62,6 +66,7 @@ def build_proxy_config(
6266
raise ValueError(f"Invalid format in secrets file {secrets_path.name}: {e}")
6367

6468
return ProxyConfig(
69+
litellm_url=litellm_url,
6570
request_timeout=request_timeout,
6671
langfuse_host=langfuse_host,
6772
langfuse_keys=langfuse_keys,
@@ -108,16 +113,6 @@ async def lifespan(app: FastAPI):
108113
app.state.config = build_proxy_config(preprocess_chat_request, preprocess_traces_request)
109114
app.state.redis = init_redis()
110115

111-
config = app.state.config
112-
default_keys = config.langfuse_keys[config.default_project_id]
113-
os.environ["LANGFUSE_PUBLIC_KEY"] = default_keys["public_key"]
114-
os.environ["LANGFUSE_SECRET_KEY"] = default_keys["secret_key"]
115-
os.environ.setdefault("LANGFUSE_HOST", config.langfuse_host)
116-
117-
import litellm
118-
119-
litellm.callbacks = ["langfuse_otel"]
120-
121116
try:
122117
yield
123118
finally:
@@ -302,4 +297,13 @@ async def pointwise_get_langfuse_trace(
302297
async def health():
303298
return {"status": "healthy", "service": "metadata-proxy"}
304299

300+
# Catch-all
301+
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH"])
302+
async def catch_all_proxy(
303+
path: str,
304+
request: Request,
305+
config: ProxyConfig = Depends(get_config),
306+
):
307+
return await proxy_to_litellm(config, path, request)
308+
305309
return app

eval_protocol/proxy/proxy_core/langfuse.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ def _serialize_trace_to_dict(trace_full: Any) -> Dict[str, Any]:
5050
"input": getattr(obs, "input", None),
5151
"output": getattr(obs, "output", None),
5252
"parent_observation_id": getattr(obs, "parent_observation_id", None),
53-
"metadata": getattr(obs, "metadata", None),
5453
}
5554
for obs in getattr(trace_full, "observations", [])
5655
]

0 commit comments

Comments
 (0)