Skip to content

Commit 4de4d9e

Browse files
author
Dylan Huang
authored
add typescript simple example (#218)
* add typescript simple example * publish npm package for eval protocol (#219) * publish typescript SDK * add createLangfuseConfigTags function and update version to 0.1.1 * use eval-protocol npm dependency * refactor statusInfoSchema to use a record type and update version to 0.1.2 * add eval_metadata to langfuse_row in RemoteRolloutProcessor * Refactor data generator function name and update eval-protocol version to 0.1.2 * done
1 parent aa8ee84 commit 4de4d9e

File tree

17 files changed

+2719
-0
lines changed

17 files changed

+2719
-0
lines changed

eval_protocol/pytest/remote_rollout_processor.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ def _load_data():
167167
elif len(output_rows) == 1: # Return the Langfuse row
168168
langfuse_row = output_rows[0]
169169
langfuse_row.input_metadata.completion_params = row.input_metadata.completion_params
170+
langfuse_row.eval_metadata = row.eval_metadata
170171
return langfuse_row
171172
else:
172173
raise ValueError("RemoteRolloutProcessor's output_data_loader should return exactly one row.")
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import os
2+
from typing import List
3+
import atexit
4+
5+
import pytest
6+
7+
from eval_protocol.data_loader.dynamic_data_loader import DynamicDataLoader
8+
from eval_protocol.models import EvaluationRow, Message
9+
from eval_protocol.pytest import evaluation_test
10+
from eval_protocol.pytest.remote_rollout_processor import RemoteRolloutProcessor
11+
from eval_protocol.adapters.langfuse import create_langfuse_adapter
12+
from eval_protocol.quickstart.utils import filter_longest_conversation
13+
14+
ROLLOUT_IDS = set()
15+
16+
17+
@pytest.fixture(autouse=True)
18+
def check_rollout_coverage():
19+
"""Ensure we processed all expected rollout_ids"""
20+
global ROLLOUT_IDS
21+
ROLLOUT_IDS.clear()
22+
yield
23+
24+
# Verify we've seen the expected number of rollout_ids after test is done
25+
expected_rollout_count = 3
26+
assert len(ROLLOUT_IDS) == expected_rollout_count, (
27+
f"Expected to see {expected_rollout_count} rollout_ids, but only saw {len(ROLLOUT_IDS)}: {ROLLOUT_IDS}"
28+
)
29+
30+
31+
def fetch_langfuse_traces(rollout_id: str) -> List[EvaluationRow]:
32+
global ROLLOUT_IDS # Track all rollout_ids we've seen
33+
ROLLOUT_IDS.add(rollout_id)
34+
35+
adapter = create_langfuse_adapter()
36+
return adapter.get_evaluation_rows(tags=[f"rollout_id:{rollout_id}"])
37+
38+
39+
def langfuse_output_data_loader(rollout_id: str) -> DynamicDataLoader:
40+
return DynamicDataLoader(
41+
generators=[lambda: fetch_langfuse_traces(rollout_id)], preprocess_fn=filter_longest_conversation
42+
)
43+
44+
45+
def rows() -> List[EvaluationRow]:
46+
# Minimal single-user-turn message to trigger a response
47+
row = EvaluationRow(messages=[Message(role="user", content="What is the capital of France?")])
48+
return [row, row, row]
49+
50+
51+
@pytest.mark.skipif(os.environ.get("CI") == "true", reason="Only run this test locally (skipped in CI)")
52+
@pytest.mark.parametrize("completion_params", [{"model": "gpt-5"}])
53+
@evaluation_test(
54+
data_loaders=DynamicDataLoader(
55+
generators=[rows],
56+
),
57+
rollout_processor=RemoteRolloutProcessor(
58+
remote_base_url="http://127.0.0.1:3000",
59+
timeout_seconds=30,
60+
output_data_loader=langfuse_output_data_loader,
61+
),
62+
)
63+
async def test_remote_rollout_and_fetch_langfuse_typescript(row: EvaluationRow) -> EvaluationRow:
64+
"""
65+
End-to-end test:
66+
- remote server started at import time
67+
- trigger remote rollout via RemoteRolloutProcessor (calls init/status)
68+
- fetch traces from Langfuse filtered by metadata via output_data_loader; FAIL if none found
69+
"""
70+
assert row.messages[0].content == "What is the capital of France?", "Row should have correct message content"
71+
assert row.execution_metadata.rollout_id in ROLLOUT_IDS, (
72+
f"Row rollout_id {row.execution_metadata.rollout_id} should be in tracked rollout_ids: {ROLLOUT_IDS}"
73+
)
74+
75+
return row
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
!package.json
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
# TypeScript Express Server for Remote Rollout Processor
2+
3+
This TypeScript Express server implements the Remote Rollout Processor API contract as specified in the Eval Protocol documentation.
4+
5+
## Features
6+
7+
- **POST /init** - Initialize a rollout with validation using Zod schemas
8+
- **GET /status** - Check the status of a rollout
9+
- **GET /health** - Health check endpoint
10+
- Full TypeScript support with strict type checking
11+
- Request validation using Zod
12+
- Error handling and logging
13+
- CORS and security middleware
14+
15+
## Installation
16+
17+
```bash
18+
pnpm install
19+
```
20+
21+
## Development
22+
23+
```bash
24+
# Run in development mode with hot reload
25+
pnpm run dev
26+
27+
# Build for production
28+
pnpm run build
29+
30+
# Run production build
31+
pnpm run start
32+
```
33+
34+
## API Endpoints
35+
36+
### POST /init
37+
38+
Initialize a new rollout.
39+
40+
**Request Body:**
41+
```json
42+
{
43+
"rollout_id": "rll_ijkl",
44+
"model": "openai/gpt-4o",
45+
"messages": [
46+
{ "role": "user", "content": "Hello" }
47+
],
48+
"tools": null,
49+
"metadata": {
50+
"invocation_id": "ivk_abcd",
51+
"experiment_id": "exp_efgh",
52+
"rollout_id": "rll_ijkl",
53+
"run_id": "run_123",
54+
"row_id": "row_123"
55+
},
56+
"num_turns": 2
57+
}
58+
```
59+
60+
**Response:**
61+
```json
62+
{
63+
"status": "accepted",
64+
"rollout_id": "rll_ijkl",
65+
"message": "Rollout initialized successfully"
66+
}
67+
```
68+
69+
### GET /status
70+
71+
Check the status of a rollout.
72+
73+
**Query Parameters:**
74+
- `rollout_id` (required): The ID of the rollout to check
75+
76+
**Response (Running):**
77+
```json
78+
{
79+
"terminated": false
80+
}
81+
```
82+
83+
**Response (Completed):**
84+
```json
85+
{
86+
"terminated": true,
87+
"info": {
88+
"reason": "completed",
89+
"ended_at": "2025-01-24T12:34:56Z",
90+
"num_turns": 2
91+
}
92+
}
93+
```
94+
95+
### GET /health
96+
97+
Health check endpoint.
98+
99+
**Response:**
100+
```json
101+
{
102+
"status": "healthy",
103+
"timestamp": "2025-01-24T12:34:56Z"
104+
}
105+
```
106+
107+
## Usage with Eval Protocol
108+
109+
This server can be used with the Eval Protocol's `RemoteRolloutProcessor`:
110+
111+
```python
112+
from eval_protocol import (
113+
evaluation_test,
114+
DynamicDataLoader,
115+
RemoteRolloutProcessor,
116+
)
117+
118+
@pytest.mark.parametrize("completion_params", [{"model": "openai/gpt-4o"}])
119+
@evaluation_test(
120+
data_loaders=[InlineDataLoader(messages=[[Message(role="user", content="Hello")]])],
121+
rollout_processor=RemoteRolloutProcessor(
122+
remote_base_url="http://localhost:3000",
123+
output_data_loader=create_output_data_loader,
124+
)
125+
)
126+
def test_remote_http(row: EvaluationRow) -> EvaluationRow:
127+
return row
128+
```
129+
130+
## Configuration
131+
132+
The server runs on port 3000 by default. You can change this by setting the `PORT` environment variable:
133+
134+
```bash
135+
PORT=8080 pnpm run dev
136+
```
137+
138+
## Error Handling
139+
140+
The server includes comprehensive error handling:
141+
- Request validation errors return 400 with detailed error messages
142+
- Missing rollout IDs return 404
143+
- Server errors return 500 with error details
144+
- All errors are logged to the console
145+
146+
## Development Notes
147+
148+
- The server simulates async rollout execution with a 1-second delay per turn
149+
- Rollout states are stored in memory (not persistent across restarts)
150+
- All requests are validated using Zod schemas
151+
- TypeScript strict mode is enabled for better type safety
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import * as dotenv from "dotenv";
2+
3+
// Helper to resolve the root of the repo (for .env loading, etc.)
4+
import path from "path";
5+
import { fileURLToPath } from "url";
6+
import fs from "fs";
7+
8+
// Returns the absolute path to the root of the repo (where .git or .env is found)
9+
function getRepoRoot(): string {
10+
// __dirname is not available in ES modules, so use fileURLToPath
11+
const currentDir = path.dirname(fileURLToPath(import.meta.url));
12+
let dir = currentDir;
13+
while (true) {
14+
if (
15+
fs.existsSync(path.join(dir, ".git")) ||
16+
fs.existsSync(path.join(dir, ".env"))
17+
) {
18+
return dir;
19+
}
20+
const parent = path.dirname(dir);
21+
if (parent === dir) break;
22+
dir = parent;
23+
}
24+
// Fallback to current directory if not found
25+
return currentDir;
26+
}
27+
28+
export const REPO_ROOT = getRepoRoot();
29+
30+
// Load environment variables from .env at the root of the repo
31+
dotenv.config({ path: path.join(REPO_ROOT, ".env") });
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import { NodeSDK } from "@opentelemetry/sdk-node";
2+
import { LangfuseSpanProcessor } from "@langfuse/otel";
3+
import "./env";
4+
5+
const sdk = new NodeSDK({
6+
spanProcessors: [
7+
new LangfuseSpanProcessor({
8+
publicKey: process.env["LANGFUSE_PUBLIC_KEY"]!,
9+
secretKey: process.env["LANGFUSE_SECRET_KEY"]!,
10+
baseUrl: process.env["LANGFUSE_HOST"]!,
11+
}),
12+
],
13+
});
14+
15+
sdk.start();
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
{
2+
"name": "typescript-server",
3+
"version": "1.0.0",
4+
"description": "TypeScript Express server for Remote Rollout Processor",
5+
"main": "dist/server.js",
6+
"type": "module",
7+
"scripts": {
8+
"build": "tsc",
9+
"start": "node dist/server.js",
10+
"dev": "tsx server.ts",
11+
"test": "node test-server.js",
12+
"test:server": "node test-server.js"
13+
},
14+
"keywords": [],
15+
"author": "",
16+
"license": "ISC",
17+
"packageManager": "pnpm@10.6.2",
18+
"dependencies": {
19+
"@langfuse/openai": "^4.2.0",
20+
"@langfuse/otel": "^4.2.0",
21+
"@langfuse/tracing": "^4.2.0",
22+
"@opentelemetry/sdk-node": "^0.205.0",
23+
"cors": "^2.8.5",
24+
"dotenv": "^17.2.2",
25+
"eval-protocol": "^0.1.2",
26+
"express": "^5.1.0",
27+
"helmet": "^7.1.0",
28+
"openai": "^5.23.0"
29+
},
30+
"devDependencies": {
31+
"@types/cors": "^2.8.17",
32+
"@types/express": "^4.17.23",
33+
"@types/node": "^20.10.0",
34+
"tsx": "^4.6.0",
35+
"typescript": "^5.9.2",
36+
"zod": "^3.22.4"
37+
}
38+
}

0 commit comments

Comments
 (0)