From 886fa382380c92515fdacdfe762acf2bbdb3a2d1 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Wed, 24 Sep 2025 18:13:25 -0700 Subject: [PATCH 01/12] Add in data loader and pulling from tags --- eval_protocol/adapters/langfuse.py | 46 ++++++-- eval_protocol/pytest/evaluation_test.py | 47 ++++++-- .../pytest/remote_rollout_processor.py | 22 +++- eval_protocol/pytest/rollout_processor.py | 8 ++ eval_protocol/pytest/utils.py | 28 +++-- tests/chinook/langfuse/remote_server.py | 7 ++ .../langfuse/test_remote_langfuse_chinook.py | 103 ++++-------------- 7 files changed, 151 insertions(+), 110 deletions(-) diff --git a/eval_protocol/adapters/langfuse.py b/eval_protocol/adapters/langfuse.py index 9b630178..675ce1ea 100644 --- a/eval_protocol/adapters/langfuse.py +++ b/eval_protocol/adapters/langfuse.py @@ -12,7 +12,7 @@ from typing import Any, Dict, List, Optional, Protocol, TYPE_CHECKING, cast from langfuse.api.resources.commons.types.observations_view import ObservationsView -from eval_protocol.models import EvaluationRow, InputMetadata, Message +from eval_protocol.models import EvaluationRow, InputMetadata, ExecutionMetadata, Message from .base import BaseAdapter from .utils import extract_messages_from_data @@ -82,14 +82,44 @@ def convert_trace_to_evaluation_row( if not messages: return None + execution_metadata = ExecutionMetadata() + row_id = None + + if trace.observations: + for obs in trace.observations: + if obs.metadata and "requester_metadata" in obs.metadata: + req_meta = obs.metadata["requester_metadata"] + if isinstance(req_meta, dict): + execution_metadata.invocation_id = req_meta.get("invocation_id") + execution_metadata.experiment_id = req_meta.get("experiment_id") + execution_metadata.rollout_id = req_meta.get("rollout_id") + execution_metadata.run_id = req_meta.get("run_id") + row_id = req_meta.get("row_id") + break # Only need to get first observation + + if trace.tags: + for tag in trace.tags: + if tag.startswith("invocation_id:") and not execution_metadata.invocation_id: + execution_metadata.invocation_id = tag.split(":", 1)[1] + elif tag.startswith("experiment_id:") and not execution_metadata.experiment_id: + execution_metadata.experiment_id = tag.split(":", 1)[1] + elif tag.startswith("rollout_id:") and not execution_metadata.rollout_id: + execution_metadata.rollout_id = tag.split(":", 1)[1] + elif tag.startswith("run_id:") and not execution_metadata.run_id: + execution_metadata.run_id = tag.split(":", 1)[1] + elif tag.startswith("row_id:") and not row_id: + row_id = tag.split(":", 1)[1] + return EvaluationRow( messages=messages, tools=tools, input_metadata=InputMetadata( + row_id=row_id, session_data={ "langfuse_trace_id": trace.id, # Store the trace ID here - } + }, ), + execution_metadata=execution_metadata, ) except (AttributeError, ValueError, KeyError) as e: @@ -332,16 +362,18 @@ def get_evaluation_rows( to_timestamp=to_timestamp, order_by="timestamp.desc", ) + + # If no results, possible due to indexing delay--remote rollout processor just finished pushing rows to Langfuse + if traces and hasattr(traces, "meta") and traces.meta.total_items == 0 and page == 1: + raise Exception("Empty results - indexing delay") + break except Exception as e: list_retries += 1 - if "429" in str(e) and list_retries < max_retries: + if list_retries < max_retries and ("429" in str(e) or "Empty results" in str(e)): sleep_time = 2**list_retries # Exponential backoff logger.warning( - "Rate limit hit on trace.list(), retrying in %ds (attempt %d/%d)", - sleep_time, - list_retries, - max_retries, + "Retrying in %ds (attempt %d/%d): %s", sleep_time, list_retries, max_retries, str(e) ) time.sleep(sleep_time) else: diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py index d9dfe811..d0de8ea0 100644 --- a/eval_protocol/pytest/evaluation_test.py +++ b/eval_protocol/pytest/evaluation_test.py @@ -402,15 +402,33 @@ async def _execute_groupwise_eval_with_semaphore( return results if mode == "pointwise": - # Pointwise mode, rollouts will return as they complete so we can pipeline evaluation_test execution pointwise_tasks: list[asyncio.Task[EvaluationRow]] = [] - # Use wrapper that handles retry logic internally - async for row in rollout_processor_with_retry( - rollout_processor, fresh_dataset, config, run_idx - ): - pointwise_tasks.append( - asyncio.create_task(_execute_pointwise_eval_with_semaphore(row=row)) - ) + + if rollout_processor.supports_pipelining: + # Pointwise mode, rollouts will return as they complete so we can pipeline evaluation_test execution + # Use wrapper that handles retry logic internally + async for row in rollout_processor_with_retry( + rollout_processor, fresh_dataset, config, run_idx + ): + pointwise_tasks.append( + asyncio.create_task(_execute_pointwise_eval_with_semaphore(row=row)) + ) + else: + # Non-pipelined mode: collect all rollout results first, then postprocess, then evaluate + collected_rollout_rows: list[EvaluationRow] = [] + async for row in rollout_processor_with_retry( + rollout_processor, fresh_dataset, config, run_idx + ): + collected_rollout_rows.append(row) + + # Post-process rollout results to get evaluation inputs + eval_input_rows = rollout_processor.postprocess(collected_rollout_rows) + + # Now evaluate all the post-processed rows + for row in eval_input_rows: + pointwise_tasks.append( + asyncio.create_task(_execute_pointwise_eval_with_semaphore(row=row)) + ) # Run evaluation tasks with progress bar results = await run_tasks_with_eval_progress(pointwise_tasks, run_idx) @@ -453,9 +471,13 @@ async def _collect_result(config, lst): # pyright: ignore[reportUnknownParamete lst.append(copied_row) # pyright: ignore[reportUnknownMemberType] tasks.append(asyncio.create_task(_collect_result(config, lst))) # pyright: ignore[reportUnknownArgumentType] rollout_results = await asyncio.gather(*tasks) - for result in rollout_results: - for row in result: - row_groups[row.input_metadata.row_id].append(row) # pyright: ignore[reportUnknownMemberType] + + # Flatten and postprocess all rollout results + all_rollout_rows = [row for result in rollout_results for row in result] + processed_rows = rollout_processor.postprocess(all_rollout_rows) + + for row in processed_rows: + row_groups[row.input_metadata.row_id].append(row) tasks = [] for _, rows in row_groups.items(): # pyright: ignore[reportUnknownVariableType] tasks.append(asyncio.create_task(_execute_groupwise_eval_with_semaphore(rows=rows))) # pyright: ignore[reportUnknownArgumentType] @@ -471,6 +493,9 @@ async def _collect_result(config, lst): # pyright: ignore[reportUnknownParamete rollout_processor, fresh_dataset, config, run_idx ): input_dataset.append(row) # pyright: ignore[reportUnknownMemberType] + + input_dataset = rollout_processor.postprocess(input_dataset) + # NOTE: we will still evaluate errored rows (give users control over this) # i.e., they can choose to give EvaluateResult.score = 0 for errored rows in their test_func results = await execute_pytest( diff --git a/eval_protocol/pytest/remote_rollout_processor.py b/eval_protocol/pytest/remote_rollout_processor.py index 04963f0d..4f0e102c 100644 --- a/eval_protocol/pytest/remote_rollout_processor.py +++ b/eval_protocol/pytest/remote_rollout_processor.py @@ -1,10 +1,11 @@ import asyncio import time -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Callable import requests from eval_protocol.models import EvaluationRow +from eval_protocol.data_loader.dynamic_data_loader import DynamicDataLoader from .rollout_processor import RolloutProcessor from .types import RolloutProcessorConfig @@ -35,6 +36,8 @@ class RemoteRolloutProcessor(RolloutProcessor): Returns: {"terminated": bool, "info": {...}?} """ + supports_pipelining: bool = False # Remote rollout processor cannot pipeline - must wait for all rollouts to complete before fetching results. + def __init__( self, *, @@ -42,6 +45,7 @@ def __init__( num_turns: int = 2, poll_interval: float = 1.0, timeout_seconds: float = 120.0, + output_data_loader: Callable[[str], DynamicDataLoader], ): # Prefer constructor-provided configuration. These can be overridden via # config.kwargs at call time for backward compatibility. @@ -49,6 +53,7 @@ def __init__( self._num_turns = num_turns self._poll_interval = poll_interval self._timeout_seconds = timeout_seconds + self._output_data_loader = output_data_loader def __call__(self, rows: List[EvaluationRow], config: RolloutProcessorConfig) -> List[asyncio.Task[EvaluationRow]]: tasks: List[asyncio.Task[EvaluationRow]] = [] @@ -158,5 +163,20 @@ def _get_status() -> Dict[str, Any]: return tasks + def postprocess(self, finished_rollout_rows: List[EvaluationRow]) -> List[EvaluationRow]: + """Fetch actual evaluation rows from Langfuse using the output_data_loader.""" + invocation_id = finished_rollout_rows[0].execution_metadata.invocation_id + if not invocation_id: + raise ValueError("Invocation ID is required in RemoteRolloutProcessor") + + data_loader = self._output_data_loader(invocation_id) + + results = data_loader.load() + output_rows: List[EvaluationRow] = [] + for result in results: + output_rows.extend(result.rows) + + return output_rows + def cleanup(self) -> None: return None diff --git a/eval_protocol/pytest/rollout_processor.py b/eval_protocol/pytest/rollout_processor.py index 313f1768..5389b1c6 100644 --- a/eval_protocol/pytest/rollout_processor.py +++ b/eval_protocol/pytest/rollout_processor.py @@ -10,11 +10,19 @@ class RolloutProcessor(ABC): Abstract base class for all rollout processor strategies. """ + supports_pipelining: bool = ( + True # Whether this processor supports pipelined evaluation (evaluate rows as rollouts complete) + ) + @abstractmethod def __call__(self, rows: list[EvaluationRow], config: RolloutProcessorConfig) -> list[asyncio.Task[EvaluationRow]]: """Process evaluation rows and return async tasks. Must be implemented by subclasses.""" pass + def postprocess(self, finished_rollout_rows: list[EvaluationRow]) -> list[EvaluationRow]: + """Post-process rollout results to produce evaluation inputs. Only available for processors that return False from supports_pipelining.""" + return finished_rollout_rows + def cleanup(self) -> None: """Cleanup resources. Override in subclasses if cleanup is needed.""" pass diff --git a/eval_protocol/pytest/utils.py b/eval_protocol/pytest/utils.py index 4e9be951..4ac472ef 100644 --- a/eval_protocol/pytest/utils.py +++ b/eval_protocol/pytest/utils.py @@ -4,7 +4,7 @@ import re import sys from dataclasses import replace -from typing import Any, Literal +from typing import Any, Literal, Callable, AsyncGenerator from litellm.cost_calculator import cost_per_token from tqdm import tqdm @@ -33,7 +33,9 @@ AggregationMethod = Literal["mean", "max", "min", "bootstrap"] -async def run_tasks_with_eval_progress(pointwise_tasks: list, run_idx: int): +async def run_tasks_with_eval_progress( + pointwise_tasks: list[asyncio.Task[EvaluationRow]], run_idx: int +) -> list[EvaluationRow]: """ Run evaluation tasks with a progress bar and proper cancellation handling. @@ -58,7 +60,7 @@ async def run_tasks_with_eval_progress(pointwise_tasks: list, run_idx: int): bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]", ) as eval_pbar: - async def task_with_progress(task): + async def task_with_progress(task: asyncio.Task[EvaluationRow]) -> EvaluationRow: try: result = await task return result @@ -77,7 +79,9 @@ async def task_with_progress(task): raise -async def run_tasks_with_run_progress(execute_run_func, num_runs, config): +async def run_tasks_with_run_progress( + execute_run_func: Callable[[int, RolloutProcessorConfig], Any], num_runs: int, config: RolloutProcessorConfig +) -> None: """ Run tasks with a parallel runs progress bar, preserving original logic. @@ -98,12 +102,12 @@ async def run_tasks_with_run_progress(execute_run_func, num_runs, config): bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]", ) as run_pbar: - async def execute_run_with_progress(run_idx: int, config): + async def execute_run_with_progress(run_idx: int, config: RolloutProcessorConfig) -> Any: result = await execute_run_func(run_idx, config) run_pbar.update(1) return result - tasks = [] + tasks: list[asyncio.Task[Any]] = [] for run_idx in range(num_runs): tasks.append(asyncio.create_task(execute_run_with_progress(run_idx, config))) try: @@ -274,7 +278,7 @@ async def rollout_processor_with_retry( fresh_dataset: list[EvaluationRow], config: RolloutProcessorConfig, run_idx: int = 0, -): +) -> AsyncGenerator[EvaluationRow, None]: """ Wrapper around rollout_processor that handles retry logic using the Python backoff library. @@ -304,13 +308,13 @@ async def rollout_processor_with_retry( # Create a single backoff-decorated retry function that can be reused @exception_config.get_backoff_decorator() # pyright: ignore[reportUntypedFunctionDecorator] - async def execute_row_with_backoff_retry(row: EvaluationRow): + async def execute_row_with_backoff_retry(row: EvaluationRow) -> EvaluationRow: """Execute rollout for a single row with backoff retry.""" retry_config = replace(config, kwargs={**(config.kwargs or {}), "start_server": False}) retry_tasks = rollout_processor([row], retry_config) return await retry_tasks[0] - async def execute_row_with_backoff(task: asyncio.Task, row: EvaluationRow) -> EvaluationRow: # pyright: ignore[reportMissingTypeArgument, reportUnknownParameterType] + async def execute_row_with_backoff(task: asyncio.Task[EvaluationRow], row: EvaluationRow) -> EvaluationRow: """Execute a single row task with backoff retry.""" try: @@ -344,7 +348,9 @@ async def execute_row_with_backoff(task: asyncio.Task, row: EvaluationRow) -> Ev row.rollout_status = Status.rollout_error(repr(e)) return row - async def execute_row_with_backoff_and_log(task: asyncio.Task, row: EvaluationRow) -> EvaluationRow: # pyright: ignore[reportMissingTypeArgument, reportUnknownParameterType] + async def execute_row_with_backoff_and_log( + task: asyncio.Task[EvaluationRow], row: EvaluationRow + ) -> EvaluationRow: """Execute a single row task with backoff retry and logging.""" result = await execute_row_with_backoff(task, row) # Log the row after execution completes (success or failure) @@ -386,7 +392,7 @@ def sanitize_filename(text: str) -> str: return safe[:120] -def extract_effort_tag(params: dict) -> str | None: # pyright: ignore[reportMissingTypeArgument, reportUnknownParameterType] +def extract_effort_tag(params: dict[str, Any]) -> str | None: """ Extract effort tag from completion parameters for use in file naming. diff --git a/tests/chinook/langfuse/remote_server.py b/tests/chinook/langfuse/remote_server.py index 71971378..0cdc0afd 100644 --- a/tests/chinook/langfuse/remote_server.py +++ b/tests/chinook/langfuse/remote_server.py @@ -57,6 +57,13 @@ def _worker(): # Prepare metadata payload to attach for Langfuse filtering metadata = { + "tags": [ + f"invocation_id:{req.metadata.get('invocation_id')}", + f"experiment_id:{req.metadata.get('experiment_id')}", + f"rollout_id:{req.metadata.get('rollout_id')}", + f"run_id:{req.metadata.get('run_id')}", + f"row_id:{req.metadata.get('row_id')}", + ], "invocation_id": req.metadata.get("invocation_id"), "experiment_id": req.metadata.get("experiment_id"), "rollout_id": req.metadata.get("rollout_id"), diff --git a/tests/chinook/langfuse/test_remote_langfuse_chinook.py b/tests/chinook/langfuse/test_remote_langfuse_chinook.py index c121a6e4..59bf01d3 100644 --- a/tests/chinook/langfuse/test_remote_langfuse_chinook.py +++ b/tests/chinook/langfuse/test_remote_langfuse_chinook.py @@ -12,6 +12,21 @@ from eval_protocol.models import EvaluationRow, Message from eval_protocol.pytest import evaluation_test from eval_protocol.pytest.remote_rollout_processor import RemoteRolloutProcessor +from eval_protocol.adapters.langfuse import create_langfuse_adapter + +INVOCATION_ID = "" + + +def fetch_trajectories(invocation_id: str) -> List[EvaluationRow]: + global INVOCATION_ID # This is just to verify the invocation_id is set correctly in the test + INVOCATION_ID = invocation_id + + adapter = create_langfuse_adapter() + return adapter.get_evaluation_rows(tags=[f"invocation_id:{invocation_id}"]) + + +def create_output_data_loader(invocation_id: str) -> DynamicDataLoader: + return DynamicDataLoader(generators=[lambda: fetch_trajectories(invocation_id)]) def _start_remote_server(): @@ -63,101 +78,29 @@ def remote_langfuse_data_generator() -> List[EvaluationRow]: @pytest.mark.skipif(os.environ.get("CI") == "true", reason="Only run this test locally (skipped in CI)") -@pytest.mark.asyncio +@pytest.mark.parametrize("completion_params", [{"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"}]) @evaluation_test( data_loaders=DynamicDataLoader( generators=[remote_langfuse_data_generator], ), - completion_params=[{"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"}], rollout_processor=RemoteRolloutProcessor( remote_base_url="http://127.0.0.1:7077", num_turns=2, timeout_seconds=30, + output_data_loader=create_output_data_loader, ), - mode="pointwise", ) async def test_remote_rollout_and_fetch_langfuse(row: EvaluationRow) -> EvaluationRow: """ End-to-end test: - remote server started at import time - trigger remote rollout via RemoteRolloutProcessor (calls init/status) - - fetch traces from Langfuse filtered by metadata; FAIL if none found + - fetch traces from Langfuse filtered by metadata via output_data_loader; FAIL if none found """ - # Debug print IDs used for filtering - print( - "[Remote-E2E] IDs:", - { - "invocation_id": row.execution_metadata.invocation_id, - "experiment_id": row.execution_metadata.experiment_id, - "rollout_id": row.execution_metadata.rollout_id, - "run_id": row.execution_metadata.run_id, - }, - ) - - # Attempt retrieval via adapter - try: - from eval_protocol.adapters.langfuse import create_langfuse_adapter - - adapter = create_langfuse_adapter() - - # Preferred: observations-level requester_metadata contains invocation_id (proxy annotates per-request) - contains_val = row.execution_metadata.invocation_id or "" - rows = [] - if contains_val: - # Retry loop to allow ingestion/flush - deadline = time.time() + 90 - while time.time() < deadline and not rows: - rows = adapter.get_evaluation_rows( - limit=10, - from_timestamp=datetime.now() - timedelta(hours=2), - to_timestamp=datetime.now(), - include_tool_calls=False, - requester_metadata_contains=contains_val, - ) - if rows: - break - time.sleep(3) - else: - print("[Remote-E2E] Missing invocation_id; skipping observations filter") - - # If still empty, dump recent trace metadata for debugging - if not rows: - try: - from langfuse import get_client # pyright: ignore[reportPrivateImportUsage] - - lf = get_client() - recent = lf.api.trace.list(limit=5, order_by="timestamp.desc") - print("[Remote-E2E] Recent trace metadata dump (id, metadata, requester_metadata, tags):") - if recent and getattr(recent, "data", None): - for t in recent.data: - try: - full = lf.api.trace.get(t.id) - print( - { - "id": full.id, - "metadata": getattr(full, "metadata", None), - "requester_metadata": getattr(full, "requester_metadata", None), - "tags": getattr(full, "tags", None), - } - ) - except Exception as e: - print("[Remote-E2E] Failed to get trace details:", e) - else: - print("[Remote-E2E] No recent traces found via list().") - except Exception as e: - print("[Remote-E2E] Langfuse debug fetch failed:", e) - - assert rows and len(rows) > 0, ( - "No Langfuse traces matched the metadata. Ensure the LiteLLM proxy is configured to forward " - "Langfuse telemetry and that LANGFUSE_* env vars are set." - ) - - # Minimal sanity: rows contain session_data.langfuse_trace_id - assert any((r.input_metadata.session_data or {}).get("langfuse_trace_id") for r in rows), ( - "Expected langfuse_trace_id in session_data for at least one row" - ) - - except ImportError: - pytest.fail("Langfuse SDK not installed; cannot verify traces.") + # Sanity check: row should have an invocation_id since it came from Langfuse via output_data_loader + assert row.messages[0].content == "Hello there! Please say hi back.", "Row should have correct message content" + assert row.execution_metadata.invocation_id == INVOCATION_ID, "Row should have correct invocation_id set" + + print(f"✅ Successfully received row from Langfuse with invocation_id: {row.execution_metadata.invocation_id}") return row From 7fdb8725a7e5839032d958060c87243de98213fe Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Wed, 24 Sep 2025 18:31:54 -0700 Subject: [PATCH 02/12] break early --- eval_protocol/adapters/langfuse.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/eval_protocol/adapters/langfuse.py b/eval_protocol/adapters/langfuse.py index 675ce1ea..195e9adc 100644 --- a/eval_protocol/adapters/langfuse.py +++ b/eval_protocol/adapters/langfuse.py @@ -110,6 +110,15 @@ def convert_trace_to_evaluation_row( elif tag.startswith("row_id:") and not row_id: row_id = tag.split(":", 1)[1] + if ( + execution_metadata.invocation_id + and execution_metadata.experiment_id + and execution_metadata.rollout_id + and execution_metadata.run_id + and row_id + ): + break # Break early if we've found all the metadata we need + return EvaluationRow( messages=messages, tools=tools, From 2d4d22bdd0c0bfee2342143002f56946ab8b3977 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Wed, 24 Sep 2025 18:48:58 -0700 Subject: [PATCH 03/12] fix tests --- eval_protocol/adapters/langfuse.py | 2 +- tests/adapters/test_langfuse_adapter.py | 23 +++++++++++++++++------ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/eval_protocol/adapters/langfuse.py b/eval_protocol/adapters/langfuse.py index 195e9adc..2f0d487a 100644 --- a/eval_protocol/adapters/langfuse.py +++ b/eval_protocol/adapters/langfuse.py @@ -373,7 +373,7 @@ def get_evaluation_rows( ) # If no results, possible due to indexing delay--remote rollout processor just finished pushing rows to Langfuse - if traces and hasattr(traces, "meta") and traces.meta.total_items == 0 and page == 1: + if traces and traces.meta and traces.meta.total_items == 0 and page == 1: raise Exception("Empty results - indexing delay") break diff --git a/tests/adapters/test_langfuse_adapter.py b/tests/adapters/test_langfuse_adapter.py index 3a4c2699..06f2d5ba 100644 --- a/tests/adapters/test_langfuse_adapter.py +++ b/tests/adapters/test_langfuse_adapter.py @@ -63,7 +63,9 @@ def _create_mock_trace( trace_id: str, input_data: Any = None, output_data: Any = None, observations: Optional[List] = None ): """Helper to create mock trace objects""" - return SimpleNamespace(id=trace_id, input=input_data, output=output_data, observations=observations or []) + return SimpleNamespace( + id=trace_id, input=input_data, output=output_data, observations=observations or [], tags=[], metadata={} + ) def _create_mock_traces_response(traces: List[Dict[str, Any]]): @@ -72,7 +74,15 @@ def _create_mock_traces_response(traces: List[Dict[str, Any]]): for trace_data in traces: trace_objects.append(SimpleNamespace(**trace_data)) - return SimpleNamespace(data=trace_objects, meta=SimpleNamespace(page=1, total_pages=1)) + return SimpleNamespace( + data=trace_objects, + meta=SimpleNamespace( + page=1, + total_pages=1, + total_items=len(trace_objects), # Add total_items to match real API + limit=100, + ), + ) @pytest.fixture @@ -161,7 +171,7 @@ def test_trace_conversion_with_span_name(): """Test trace conversion with specific span name""" # Mock observations with spans and generations observations = [ - SimpleNamespace(id="span1", name="judge", type="SPAN"), + SimpleNamespace(id="span1", name="judge", type="SPAN", metadata={}), SimpleNamespace( id="gen1", name="generation", @@ -170,6 +180,7 @@ def test_trace_conversion_with_span_name(): input={"messages": [{"role": "user", "content": "Judge this"}]}, output={"messages": [{"role": "assistant", "content": "Good response"}]}, start_time=datetime.now(), + metadata={}, ), ] @@ -304,7 +315,7 @@ def test_extract_messages_from_various_formats(): input_data={"messages": [{"role": "user", "content": "Hello"}]}, output_data={"messages": [{"role": "assistant", "content": "Hi"}]}, ) - messages1 = extract_messages_from_trace(trace1) + messages1 = extract_messages_from_trace(trace1) # pyright: ignore[reportArgumentType] assert len(messages1) == 2 assert messages1[0].role == "user" assert messages1[1].role == "assistant" @@ -313,7 +324,7 @@ def test_extract_messages_from_various_formats(): trace2 = _create_mock_trace( "trace2", input_data={"prompt": "What is AI?"}, output_data={"content": "AI is artificial intelligence"} ) - messages2 = extract_messages_from_trace(trace2) + messages2 = extract_messages_from_trace(trace2) # pyright: ignore[reportArgumentType] assert len(messages2) == 2 assert messages2[0].role == "user" assert messages2[0].content == "What is AI?" @@ -326,7 +337,7 @@ def test_extract_messages_from_various_formats(): input_data=[{"role": "user", "content": "List format"}], output_data=[{"role": "assistant", "content": "Response"}], ) - messages3 = extract_messages_from_trace(trace3) + messages3 = extract_messages_from_trace(trace3) # pyright: ignore[reportArgumentType] assert len(messages3) == 2 assert messages3[0].content == "List format" assert messages3[1].content == "Response" From 2dccc9c904c422c5dec9811ec6bbd7fa6e315a32 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Wed, 24 Sep 2025 23:00:04 -0700 Subject: [PATCH 04/12] use mocks instead in the test --- tests/adapters/test_langfuse_adapter.py | 80 +++++++++++++------------ 1 file changed, 43 insertions(+), 37 deletions(-) diff --git a/tests/adapters/test_langfuse_adapter.py b/tests/adapters/test_langfuse_adapter.py index 06f2d5ba..bfd336b6 100644 --- a/tests/adapters/test_langfuse_adapter.py +++ b/tests/adapters/test_langfuse_adapter.py @@ -1,7 +1,6 @@ """Tests for Langfuse adapter.""" from datetime import datetime, timedelta -from types import SimpleNamespace from typing import Any, Dict, List, Optional from unittest.mock import Mock @@ -63,8 +62,19 @@ def _create_mock_trace( trace_id: str, input_data: Any = None, output_data: Any = None, observations: Optional[List] = None ): """Helper to create mock trace objects""" - return SimpleNamespace( - id=trace_id, input=input_data, output=output_data, observations=observations or [], tags=[], metadata={} + # Ensure observations have metadata attribute + obs_with_metadata = [] + for obs in observations or []: + if hasattr(obs, "metadata"): + obs_with_metadata.append(obs) + else: + # Add metadata to existing observation + obs_dict = obs.__dict__ if hasattr(obs, "__dict__") else {} + obs_dict["metadata"] = getattr(obs, "metadata", {}) + obs_with_metadata.append(Mock(**obs_dict)) + + return Mock( + id=trace_id, input=input_data, output=output_data, observations=obs_with_metadata, tags=[], metadata={} ) @@ -72,17 +82,9 @@ def _create_mock_traces_response(traces: List[Dict[str, Any]]): """Helper to create mock traces list response""" trace_objects = [] for trace_data in traces: - trace_objects.append(SimpleNamespace(**trace_data)) - - return SimpleNamespace( - data=trace_objects, - meta=SimpleNamespace( - page=1, - total_pages=1, - total_items=len(trace_objects), # Add total_items to match real API - limit=100, - ), - ) + trace_objects.append(Mock(**trace_data)) + + return Mock(data=trace_objects, meta=Mock(page=1, total_pages=1, total_items=len(trace_objects), limit=100)) @pytest.fixture @@ -111,8 +113,7 @@ def test_basic_trace_conversion(): input_data={"messages": [{"role": "user", "content": "What's the weather?"}]}, output_data={"messages": [{"role": "assistant", "content": "It's sunny!"}]}, ) - - result = convert_trace_to_evaluation_row(trace) # pyright: ignore[reportArgumentType] + result = convert_trace_to_evaluation_row(trace) assert result is not None assert len(result.messages) == 2 @@ -150,7 +151,7 @@ def test_trace_with_tool_calls(): }, ) - result = convert_trace_to_evaluation_row(trace, include_tool_calls=True) # pyright: ignore[reportArgumentType] + result = convert_trace_to_evaluation_row(trace, include_tool_calls=True) assert result is not None assert result.tools is not None @@ -170,22 +171,27 @@ def test_trace_with_tool_calls(): def test_trace_conversion_with_span_name(): """Test trace conversion with specific span name""" # Mock observations with spans and generations - observations = [ - SimpleNamespace(id="span1", name="judge", type="SPAN", metadata={}), - SimpleNamespace( - id="gen1", - name="generation", - type="GENERATION", - parent_observation_id="span1", - input={"messages": [{"role": "user", "content": "Judge this"}]}, - output={"messages": [{"role": "assistant", "content": "Good response"}]}, - start_time=datetime.now(), - metadata={}, - ), - ] + span_mock = Mock() + span_mock.id = "span1" + span_mock.name = "judge" + span_mock.type = "SPAN" + span_mock.metadata = {} + span_mock.parent_observation_id = None + + gen_mock = Mock() + gen_mock.id = "gen1" + gen_mock.name = "generation" + gen_mock.type = "GENERATION" + gen_mock.parent_observation_id = "span1" + gen_mock.input = {"messages": [{"role": "user", "content": "Judge this"}]} + gen_mock.output = {"messages": [{"role": "assistant", "content": "Good response"}]} + gen_mock.start_time = datetime.now() + gen_mock.metadata = {} + + observations = [span_mock, gen_mock] trace = _create_mock_trace("trace_span", observations=observations) - result = convert_trace_to_evaluation_row(trace, span_name="judge") # pyright: ignore[reportArgumentType] + result = convert_trace_to_evaluation_row(trace, span_name="judge") assert result is not None assert len(result.messages) == 2 @@ -197,7 +203,7 @@ def test_empty_trace_returns_none(): """Test that empty traces return None""" trace = _create_mock_trace("empty_trace", input_data=None, output_data=None) - result = convert_trace_to_evaluation_row(trace) # pyright: ignore[reportArgumentType] + result = convert_trace_to_evaluation_row(trace) assert result is None @@ -205,9 +211,9 @@ def test_empty_trace_returns_none(): def test_malformed_trace_returns_none(): """Test that malformed traces are handled gracefully""" # Trace with missing required attributes - trace = SimpleNamespace(id="malformed") # Missing input/output + trace = Mock(id="malformed") # Missing input/output - result = convert_trace_to_evaluation_row(trace) # pyright: ignore[reportArgumentType] + result = convert_trace_to_evaluation_row(trace) assert result is None @@ -315,7 +321,7 @@ def test_extract_messages_from_various_formats(): input_data={"messages": [{"role": "user", "content": "Hello"}]}, output_data={"messages": [{"role": "assistant", "content": "Hi"}]}, ) - messages1 = extract_messages_from_trace(trace1) # pyright: ignore[reportArgumentType] + messages1 = extract_messages_from_trace(trace1) assert len(messages1) == 2 assert messages1[0].role == "user" assert messages1[1].role == "assistant" @@ -324,7 +330,7 @@ def test_extract_messages_from_various_formats(): trace2 = _create_mock_trace( "trace2", input_data={"prompt": "What is AI?"}, output_data={"content": "AI is artificial intelligence"} ) - messages2 = extract_messages_from_trace(trace2) # pyright: ignore[reportArgumentType] + messages2 = extract_messages_from_trace(trace2) assert len(messages2) == 2 assert messages2[0].role == "user" assert messages2[0].content == "What is AI?" @@ -337,7 +343,7 @@ def test_extract_messages_from_various_formats(): input_data=[{"role": "user", "content": "List format"}], output_data=[{"role": "assistant", "content": "Response"}], ) - messages3 = extract_messages_from_trace(trace3) # pyright: ignore[reportArgumentType] + messages3 = extract_messages_from_trace(trace3) assert len(messages3) == 2 assert messages3[0].content == "List format" assert messages3[1].content == "Response" From 0a9a9a46d5bda2b66302e0adb2fbe19f310e0194 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Thu, 25 Sep 2025 11:35:09 -0700 Subject: [PATCH 05/12] remove the requester_metadata stuff --- eval_protocol/adapters/langfuse.py | 130 +----------------- .../langfuse/test_remote_langfuse_chinook.py | 16 +++ 2 files changed, 21 insertions(+), 125 deletions(-) diff --git a/eval_protocol/adapters/langfuse.py b/eval_protocol/adapters/langfuse.py index 2f0d487a..42eeee6f 100644 --- a/eval_protocol/adapters/langfuse.py +++ b/eval_protocol/adapters/langfuse.py @@ -85,29 +85,17 @@ def convert_trace_to_evaluation_row( execution_metadata = ExecutionMetadata() row_id = None - if trace.observations: - for obs in trace.observations: - if obs.metadata and "requester_metadata" in obs.metadata: - req_meta = obs.metadata["requester_metadata"] - if isinstance(req_meta, dict): - execution_metadata.invocation_id = req_meta.get("invocation_id") - execution_metadata.experiment_id = req_meta.get("experiment_id") - execution_metadata.rollout_id = req_meta.get("rollout_id") - execution_metadata.run_id = req_meta.get("run_id") - row_id = req_meta.get("row_id") - break # Only need to get first observation - if trace.tags: for tag in trace.tags: - if tag.startswith("invocation_id:") and not execution_metadata.invocation_id: + if tag.startswith("invocation_id:"): execution_metadata.invocation_id = tag.split(":", 1)[1] - elif tag.startswith("experiment_id:") and not execution_metadata.experiment_id: + elif tag.startswith("experiment_id:"): execution_metadata.experiment_id = tag.split(":", 1)[1] - elif tag.startswith("rollout_id:") and not execution_metadata.rollout_id: + elif tag.startswith("rollout_id:"): execution_metadata.rollout_id = tag.split(":", 1)[1] - elif tag.startswith("run_id:") and not execution_metadata.run_id: + elif tag.startswith("run_id:"): execution_metadata.run_id = tag.split(":", 1)[1] - elif tag.startswith("row_id:") and not row_id: + elif tag.startswith("row_id:"): row_id = tag.split(":", 1)[1] if ( @@ -298,9 +286,6 @@ def get_evaluation_rows( max_retries: int = 3, span_name: Optional[str] = None, converter: Optional[TraceConverter] = None, - metadata: Optional[Dict[str, Any]] = None, - requester_metadata: Optional[Dict[str, Any]] = None, - requester_metadata_contains: Optional[str] = None, ) -> List[EvaluationRow]: """Pull traces from Langfuse and convert to EvaluationRow format. @@ -335,10 +320,6 @@ def get_evaluation_rows( to_timestamp = datetime.now() from_timestamp = to_timestamp - timedelta(hours=hours_back) - # If filtering by metadata/requester_metadata, prefer fetching metadata fields - if (metadata is not None or requester_metadata is not None or requester_metadata_contains) and not fields: - fields = "core,metadata,observations" - # Collect trace summaries via pagination (up to limit) all_traces = [] page = 1 @@ -420,74 +401,6 @@ def get_evaluation_rows( selected_traces = all_traces logger.debug("Processing all %d collected traces (no sampling)", len(all_traces)) - # Helper to check if a trace matches provided metadata filters. We look in multiple places - # to account for Langfuse moving fields (e.g., metadata vs requester_metadata) and SDK shape. - def _trace_matches_metadata_filters(trace_obj: Any) -> bool: - if metadata is None and requester_metadata is None: - return True - - def _as_dict(val: Any) -> Dict[str, Any]: - if val is None: - return {} - if isinstance(val, dict): - return val - # Some SDK objects expose .model_dump() or behave like pydantic models - dump = getattr(val, "model_dump", None) - if callable(dump): - try: - return dump() # type: ignore[no-any-return] - except Exception: - return {} - return {} - - # Try common locations for metadata on full trace - trace_meta = _as_dict(getattr(trace_obj, "metadata", None)) - trace_req_meta = _as_dict(getattr(trace_obj, "requester_metadata", None)) - # Some Langfuse deployments nest requester_metadata inside metadata - nested_req_meta = {} - try: - if isinstance(trace_meta, dict) and isinstance(trace_meta.get("requester_metadata"), dict): - nested_req_meta = _as_dict(trace_meta.get("requester_metadata")) - except Exception: - nested_req_meta = {} - - # Fallbacks: sometimes metadata is embedded in input - input_meta = {} - try: - inp = getattr(trace_obj, "input", None) - if isinstance(inp, dict): - input_meta = _as_dict(inp.get("metadata")) - except Exception: - input_meta = {} - - # Combine for matching convenience (later keys override earlier for equality check only) - combined_meta = {**trace_meta, **input_meta} - combined_req_meta = {**trace_req_meta} - - # Also merge nested requester metadata when present - if nested_req_meta: - combined_req_meta = {**combined_req_meta, **nested_req_meta} - - def _is_subset(needle: Dict[str, Any], haystack: Dict[str, Any]) -> bool: - for k, v in needle.items(): - if haystack.get(k) != v: - return False - return True - - ok_meta = True - ok_req_meta = True - - if metadata is not None: - # Accept match if found either in metadata or requester_metadata buckets - ok_meta = _is_subset(metadata, combined_meta) or _is_subset(metadata, combined_req_meta) - - if requester_metadata is not None: - ok_req_meta = _is_subset(requester_metadata, combined_req_meta) or _is_subset( - requester_metadata, combined_meta - ) - - return ok_meta and ok_req_meta - # Process each selected trace with sleep and retry logic for trace_info in selected_traces: # Sleep between gets to avoid rate limits @@ -524,39 +437,6 @@ def _is_subset(needle: Dict[str, Any], haystack: Dict[str, Any]) -> bool: break # Skip this trace if trace_full: - # If metadata filters are provided, skip non-matching traces early - try: - if not _trace_matches_metadata_filters(trace_full): - continue - except Exception: - # Be permissive on filter errors; treat as non-match - continue - - # If observations carry requester_metadata, allow substring filtering - if requester_metadata_contains: - contains_val = requester_metadata_contains - found_match = False - try: - for obs in getattr(trace_full, "observations", []) or []: - obs_rmd = getattr(obs, "requester_metadata", None) - if isinstance(obs_rmd, dict) and any( - (isinstance(v, str) and contains_val in v) for v in obs_rmd.values() - ): - found_match = True - break - obs_md = getattr(obs, "metadata", None) - if isinstance(obs_md, dict): - nested = obs_md.get("requester_metadata") - if isinstance(nested, dict) and any( - (isinstance(v, str) and contains_val in v) for v in nested.values() - ): - found_match = True - break - except Exception: - found_match = False - if not found_match: - continue - try: if converter: eval_row = converter(trace_full, include_tool_calls, span_name) diff --git a/tests/chinook/langfuse/test_remote_langfuse_chinook.py b/tests/chinook/langfuse/test_remote_langfuse_chinook.py index 59bf01d3..0c8ffbb1 100644 --- a/tests/chinook/langfuse/test_remote_langfuse_chinook.py +++ b/tests/chinook/langfuse/test_remote_langfuse_chinook.py @@ -15,6 +15,19 @@ from eval_protocol.adapters.langfuse import create_langfuse_adapter INVOCATION_ID = "" +ASSERTION_EXECUTED = False + + +@pytest.fixture(autouse=True) +def check_assertion_executed(): + """Ensure the test actually executed the Langfuse validation""" + global ASSERTION_EXECUTED + ASSERTION_EXECUTED = False # Reset before test + yield + # After test completes, verify the assertion was executed + assert ASSERTION_EXECUTED, ( + "Test passed but never validated Langfuse data - check if output_data_loader returned empty results" + ) def fetch_trajectories(invocation_id: str) -> List[EvaluationRow]: @@ -97,10 +110,13 @@ async def test_remote_rollout_and_fetch_langfuse(row: EvaluationRow) -> Evaluati - trigger remote rollout via RemoteRolloutProcessor (calls init/status) - fetch traces from Langfuse filtered by metadata via output_data_loader; FAIL if none found """ + global ASSERTION_EXECUTED + # Sanity check: row should have an invocation_id since it came from Langfuse via output_data_loader assert row.messages[0].content == "Hello there! Please say hi back.", "Row should have correct message content" assert row.execution_metadata.invocation_id == INVOCATION_ID, "Row should have correct invocation_id set" + ASSERTION_EXECUTED = True print(f"✅ Successfully received row from Langfuse with invocation_id: {row.execution_metadata.invocation_id}") return row From 903584b9ce38ec809852abb01a564ae9026e2540 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Thu, 25 Sep 2025 13:33:47 -0700 Subject: [PATCH 06/12] pipelined --- eval_protocol/pytest/evaluation_test.py | 46 +++++------------- .../pytest/remote_rollout_processor.py | 35 +++++++------- eval_protocol/pytest/rollout_processor.py | 8 ---- eval_protocol/quickstart/utils.py | 22 +++++++++ .../langfuse/test_remote_langfuse_chinook.py | 48 +++++++++---------- 5 files changed, 75 insertions(+), 84 deletions(-) diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py index d0de8ea0..e34f7e4b 100644 --- a/eval_protocol/pytest/evaluation_test.py +++ b/eval_protocol/pytest/evaluation_test.py @@ -402,33 +402,15 @@ async def _execute_groupwise_eval_with_semaphore( return results if mode == "pointwise": + # Pointwise mode, rollouts will return as they complete so we can pipeline evaluation_test execution pointwise_tasks: list[asyncio.Task[EvaluationRow]] = [] - - if rollout_processor.supports_pipelining: - # Pointwise mode, rollouts will return as they complete so we can pipeline evaluation_test execution - # Use wrapper that handles retry logic internally - async for row in rollout_processor_with_retry( - rollout_processor, fresh_dataset, config, run_idx - ): - pointwise_tasks.append( - asyncio.create_task(_execute_pointwise_eval_with_semaphore(row=row)) - ) - else: - # Non-pipelined mode: collect all rollout results first, then postprocess, then evaluate - collected_rollout_rows: list[EvaluationRow] = [] - async for row in rollout_processor_with_retry( - rollout_processor, fresh_dataset, config, run_idx - ): - collected_rollout_rows.append(row) - - # Post-process rollout results to get evaluation inputs - eval_input_rows = rollout_processor.postprocess(collected_rollout_rows) - - # Now evaluate all the post-processed rows - for row in eval_input_rows: - pointwise_tasks.append( - asyncio.create_task(_execute_pointwise_eval_with_semaphore(row=row)) - ) + # Use wrapper that handles retry logic internally + async for row in rollout_processor_with_retry( + rollout_processor, fresh_dataset, config, run_idx + ): + pointwise_tasks.append( + asyncio.create_task(_execute_pointwise_eval_with_semaphore(row=row)) + ) # Run evaluation tasks with progress bar results = await run_tasks_with_eval_progress(pointwise_tasks, run_idx) @@ -471,13 +453,9 @@ async def _collect_result(config, lst): # pyright: ignore[reportUnknownParamete lst.append(copied_row) # pyright: ignore[reportUnknownMemberType] tasks.append(asyncio.create_task(_collect_result(config, lst))) # pyright: ignore[reportUnknownArgumentType] rollout_results = await asyncio.gather(*tasks) - - # Flatten and postprocess all rollout results - all_rollout_rows = [row for result in rollout_results for row in result] - processed_rows = rollout_processor.postprocess(all_rollout_rows) - - for row in processed_rows: - row_groups[row.input_metadata.row_id].append(row) + for result in rollout_results: + for row in result: + row_groups[row.input_metadata.row_id].append(row) # pyright: ignore[reportUnknownMemberType] tasks = [] for _, rows in row_groups.items(): # pyright: ignore[reportUnknownVariableType] tasks.append(asyncio.create_task(_execute_groupwise_eval_with_semaphore(rows=rows))) # pyright: ignore[reportUnknownArgumentType] @@ -494,8 +472,6 @@ async def _collect_result(config, lst): # pyright: ignore[reportUnknownParamete ): input_dataset.append(row) # pyright: ignore[reportUnknownMemberType] - input_dataset = rollout_processor.postprocess(input_dataset) - # NOTE: we will still evaluate errored rows (give users control over this) # i.e., they can choose to give EvaluateResult.score = 0 for errored rows in their test_func results = await execute_pytest( diff --git a/eval_protocol/pytest/remote_rollout_processor.py b/eval_protocol/pytest/remote_rollout_processor.py index 4f0e102c..1e6b102e 100644 --- a/eval_protocol/pytest/remote_rollout_processor.py +++ b/eval_protocol/pytest/remote_rollout_processor.py @@ -36,8 +36,6 @@ class RemoteRolloutProcessor(RolloutProcessor): Returns: {"terminated": bool, "info": {...}?} """ - supports_pipelining: bool = False # Remote rollout processor cannot pipeline - must wait for all rollouts to complete before fetching results. - def __init__( self, *, @@ -156,27 +154,30 @@ def _get_status() -> Dict[str, Any]: # Update duration, regardless of termination row.execution_metadata.duration_seconds = time.perf_counter() - start_time - return row - for r in rows: - tasks.append(asyncio.create_task(_process_row(r))) + if row.execution_metadata.rollout_id is None: + raise ValueError("Rollout ID is required in RemoteRolloutProcessor") - return tasks + data_loader = self._output_data_loader(row.execution_metadata.rollout_id) + + def _load_data(): + return data_loader.load() + + results = await asyncio.to_thread(_load_data) - def postprocess(self, finished_rollout_rows: List[EvaluationRow]) -> List[EvaluationRow]: - """Fetch actual evaluation rows from Langfuse using the output_data_loader.""" - invocation_id = finished_rollout_rows[0].execution_metadata.invocation_id - if not invocation_id: - raise ValueError("Invocation ID is required in RemoteRolloutProcessor") + output_rows: List[EvaluationRow] = [row for result in results for row in result.rows] - data_loader = self._output_data_loader(invocation_id) + assert len(output_rows) == 1, "Dataloader used for RemoteRolloutProcessor should have exactly one row" - results = data_loader.load() - output_rows: List[EvaluationRow] = [] - for result in results: - output_rows.extend(result.rows) + langfuse_row = output_rows[0] + langfuse_row.input_metadata.completion_params = row.input_metadata.completion_params - return output_rows + return langfuse_row + + for r in rows: + tasks.append(asyncio.create_task(_process_row(r))) + + return tasks def cleanup(self) -> None: return None diff --git a/eval_protocol/pytest/rollout_processor.py b/eval_protocol/pytest/rollout_processor.py index 5389b1c6..313f1768 100644 --- a/eval_protocol/pytest/rollout_processor.py +++ b/eval_protocol/pytest/rollout_processor.py @@ -10,19 +10,11 @@ class RolloutProcessor(ABC): Abstract base class for all rollout processor strategies. """ - supports_pipelining: bool = ( - True # Whether this processor supports pipelined evaluation (evaluate rows as rollouts complete) - ) - @abstractmethod def __call__(self, rows: list[EvaluationRow], config: RolloutProcessorConfig) -> list[asyncio.Task[EvaluationRow]]: """Process evaluation rows and return async tasks. Must be implemented by subclasses.""" pass - def postprocess(self, finished_rollout_rows: list[EvaluationRow]) -> list[EvaluationRow]: - """Post-process rollout results to produce evaluation inputs. Only available for processors that return False from supports_pipelining.""" - return finished_rollout_rows - def cleanup(self) -> None: """Cleanup resources. Override in subclasses if cleanup is needed.""" pass diff --git a/eval_protocol/quickstart/utils.py b/eval_protocol/quickstart/utils.py index 96ecb808..a5ab49d5 100644 --- a/eval_protocol/quickstart/utils.py +++ b/eval_protocol/quickstart/utils.py @@ -186,6 +186,28 @@ def assistant_to_ground_truth(data: list[EvaluationRow]) -> list[EvaluationRow]: return processed_rows +def filter_longest_conversation(data: list[EvaluationRow]) -> list[EvaluationRow]: + """ + Filter out the longest conversation from a list of evaluation rows that share the same rollout_id. + + Args: + data: List of EvaluationRow objects that share the same rollout_id + + Returns: + List containing only the EvaluationRow with the most messages (longest conversation) + """ + if not data: + return data + + if len(data) == 1: + return data + + # Find the row with the most messages (longest conversation) + longest_row = max(data, key=lambda row: len(row.messages)) + + return [longest_row] + + async def run_single_judgment( question_text: str, answer_a: str, answer_b: str, tools, judge_config, client ) -> Optional[Dict[str, Any]]: diff --git a/tests/chinook/langfuse/test_remote_langfuse_chinook.py b/tests/chinook/langfuse/test_remote_langfuse_chinook.py index 0c8ffbb1..28c9b9bb 100644 --- a/tests/chinook/langfuse/test_remote_langfuse_chinook.py +++ b/tests/chinook/langfuse/test_remote_langfuse_chinook.py @@ -13,33 +13,37 @@ from eval_protocol.pytest import evaluation_test from eval_protocol.pytest.remote_rollout_processor import RemoteRolloutProcessor from eval_protocol.adapters.langfuse import create_langfuse_adapter +from eval_protocol.quickstart.utils import filter_longest_conversation -INVOCATION_ID = "" -ASSERTION_EXECUTED = False +ROLLOUT_IDS = set() @pytest.fixture(autouse=True) -def check_assertion_executed(): - """Ensure the test actually executed the Langfuse validation""" - global ASSERTION_EXECUTED - ASSERTION_EXECUTED = False # Reset before test +def check_rollout_coverage(): + """Ensure we processed all expected rollout_ids""" + global ROLLOUT_IDS + ROLLOUT_IDS.clear() yield - # After test completes, verify the assertion was executed - assert ASSERTION_EXECUTED, ( - "Test passed but never validated Langfuse data - check if output_data_loader returned empty results" + + # Verify we've seen the expected number of rollout_ids after test is done + expected_rollout_count = 3 + assert len(ROLLOUT_IDS) == expected_rollout_count, ( + f"Expected to see {expected_rollout_count} rollout_ids, but only saw {len(ROLLOUT_IDS)}: {ROLLOUT_IDS}" ) -def fetch_trajectories(invocation_id: str) -> List[EvaluationRow]: - global INVOCATION_ID # This is just to verify the invocation_id is set correctly in the test - INVOCATION_ID = invocation_id +def fetch_langfuse_traces(rollout_id: str) -> List[EvaluationRow]: + global ROLLOUT_IDS # Track all rollout_ids we've seen + ROLLOUT_IDS.add(rollout_id) adapter = create_langfuse_adapter() - return adapter.get_evaluation_rows(tags=[f"invocation_id:{invocation_id}"]) + return adapter.get_evaluation_rows(tags=[f"rollout_id:{rollout_id}"]) -def create_output_data_loader(invocation_id: str) -> DynamicDataLoader: - return DynamicDataLoader(generators=[lambda: fetch_trajectories(invocation_id)]) +def langfuse_output_data_loader(rollout_id: str) -> DynamicDataLoader: + return DynamicDataLoader( + generators=[lambda: fetch_langfuse_traces(rollout_id)], preprocess_fn=filter_longest_conversation + ) def _start_remote_server(): @@ -87,7 +91,7 @@ def remote_langfuse_data_generator() -> List[EvaluationRow]: # Minimal single-user-turn message to trigger a response row = EvaluationRow(messages=[Message(role="user", content="Hello there! Please say hi back.")]) - return [row] + return [row, row, row] @pytest.mark.skipif(os.environ.get("CI") == "true", reason="Only run this test locally (skipped in CI)") @@ -100,7 +104,7 @@ def remote_langfuse_data_generator() -> List[EvaluationRow]: remote_base_url="http://127.0.0.1:7077", num_turns=2, timeout_seconds=30, - output_data_loader=create_output_data_loader, + output_data_loader=langfuse_output_data_loader, ), ) async def test_remote_rollout_and_fetch_langfuse(row: EvaluationRow) -> EvaluationRow: @@ -110,13 +114,9 @@ async def test_remote_rollout_and_fetch_langfuse(row: EvaluationRow) -> Evaluati - trigger remote rollout via RemoteRolloutProcessor (calls init/status) - fetch traces from Langfuse filtered by metadata via output_data_loader; FAIL if none found """ - global ASSERTION_EXECUTED - - # Sanity check: row should have an invocation_id since it came from Langfuse via output_data_loader assert row.messages[0].content == "Hello there! Please say hi back.", "Row should have correct message content" - assert row.execution_metadata.invocation_id == INVOCATION_ID, "Row should have correct invocation_id set" - - ASSERTION_EXECUTED = True - print(f"✅ Successfully received row from Langfuse with invocation_id: {row.execution_metadata.invocation_id}") + assert row.execution_metadata.rollout_id in ROLLOUT_IDS, ( + f"Row rollout_id {row.execution_metadata.rollout_id} should be in tracked rollout_ids: {ROLLOUT_IDS}" + ) return row From 8cb080cea9ad702fa6ab0bd5bae0a680d83fd359 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Thu, 25 Sep 2025 13:38:01 -0700 Subject: [PATCH 07/12] removing pyright --- eval_protocol/pytest/evaluation_test.py | 39 ++++++++++++------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py index e34f7e4b..4aabd296 100644 --- a/eval_protocol/pytest/evaluation_test.py +++ b/eval_protocol/pytest/evaluation_test.py @@ -72,7 +72,7 @@ def evaluation_test( input_dataset: Sequence[DatasetPathParam] | None = None, input_rows: Sequence[list[EvaluationRow]] | None = None, data_loaders: Sequence[EvaluationDataLoader] | EvaluationDataLoader | None = None, - dataset_adapter: Callable[[list[dict[str, Any]]], Dataset] = default_dataset_adapter, # pyright: ignore[reportExplicitAny] + dataset_adapter: Callable[[list[dict[str, Any]]], Dataset] = default_dataset_adapter, rollout_processor: RolloutProcessor | None = None, evaluation_test_kwargs: Sequence[EvaluationInputParam | None] | None = None, rollout_processor_kwargs: RolloutProcessorInputParam | None = None, @@ -418,9 +418,7 @@ async def _execute_groupwise_eval_with_semaphore( all_results[run_idx] = results elif mode == "groupwise": # rollout all the completion_params for the same row at once, and then send the output to the test_func - row_groups = defaultdict( # pyright: ignore[reportUnknownVariableType] - list - ) # key: row_id, value: list of rollout_result + row_groups = defaultdict(list) # key: row_id, value: list of rollout_result tasks: list[asyncio.Task[list[EvaluationRow]]] = [] # completion_groups = [] for idx, cp in enumerate(original_completion_params): @@ -435,13 +433,13 @@ async def _execute_groupwise_eval_with_semaphore( ) lst = [] - async def _collect_result(config, lst): # pyright: ignore[reportUnknownParameterType, reportMissingParameterType] + async def _collect_result(config, lst): result = [] async for row in rollout_processor_with_retry( rollout_processor, lst, config, run_idx ): # pyright: ignore[reportUnknownArgumentType] - result.append(row) # pyright: ignore[reportUnknownMemberType] - return result # pyright: ignore[reportUnknownVariableType] + result.append(row) + return result for ori_row in fresh_dataset: copied_row = ori_row.model_copy(deep=True) @@ -450,19 +448,19 @@ async def _collect_result(config, lst): # pyright: ignore[reportUnknownParamete str(ori_row.execution_metadata.rollout_id) + "_" + str(idx) ) copied_row.input_metadata.completion_params = cp if cp is not None else {} - lst.append(copied_row) # pyright: ignore[reportUnknownMemberType] - tasks.append(asyncio.create_task(_collect_result(config, lst))) # pyright: ignore[reportUnknownArgumentType] + lst.append(copied_row) + tasks.append(asyncio.create_task(_collect_result(config, lst))) rollout_results = await asyncio.gather(*tasks) for result in rollout_results: for row in result: - row_groups[row.input_metadata.row_id].append(row) # pyright: ignore[reportUnknownMemberType] + row_groups[row.input_metadata.row_id].append(row) tasks = [] - for _, rows in row_groups.items(): # pyright: ignore[reportUnknownVariableType] - tasks.append(asyncio.create_task(_execute_groupwise_eval_with_semaphore(rows=rows))) # pyright: ignore[reportUnknownArgumentType] + for _, rows in row_groups.items(): + tasks.append(asyncio.create_task(_execute_groupwise_eval_with_semaphore(rows=rows))) results = [] for task in tasks: res = await task - results.extend(res) # pyright: ignore[reportUnknownMemberType] + results.extend(res) all_results[run_idx] = results else: # Batch mode: collect all results first, then evaluate (no pipelining) @@ -470,13 +468,12 @@ async def _collect_result(config, lst): # pyright: ignore[reportUnknownParamete async for row in rollout_processor_with_retry( rollout_processor, fresh_dataset, config, run_idx ): - input_dataset.append(row) # pyright: ignore[reportUnknownMemberType] - + input_dataset.append(row) # NOTE: we will still evaluate errored rows (give users control over this) # i.e., they can choose to give EvaluateResult.score = 0 for errored rows in their test_func results = await execute_pytest( test_func, - processed_dataset=input_dataset, # pyright: ignore[reportUnknownArgumentType] + processed_dataset=input_dataset, evaluation_test_kwargs=kwargs.get("evaluation_test_kwargs") or {}, ) if ( @@ -539,16 +536,16 @@ async def _collect_result(config, lst): # pyright: ignore[reportUnknownParamete # for groupwise mode, the result contains eval otuput from multiple completion_params, we need to differentiate them # rollout_id is used to differentiate the result from different completion_params if mode == "groupwise": - results_by_group = [ # pyright: ignore[reportUnknownVariableType] + results_by_group = [ [[] for _ in range(num_runs)] for _ in range(len(original_completion_params)) ] for i_run, result in enumerate(all_results): for r in result: completion_param_idx = int(r.execution_metadata.rollout_id.split("_")[1]) # pyright: ignore[reportOptionalMemberAccess] - results_by_group[completion_param_idx][i_run].append(r) # pyright: ignore[reportUnknownMemberType] - for rollout_id, result in enumerate(results_by_group): # pyright: ignore[reportUnknownVariableType, reportUnknownArgumentType] + results_by_group[completion_param_idx][i_run].append(r) + for rollout_id, result in enumerate(results_by_group): postprocess( - result, # pyright: ignore[reportUnknownArgumentType] + result, aggregation_method, passed_threshold, active_logger, @@ -600,7 +597,7 @@ async def _collect_result(config, lst): # pyright: ignore[reportUnknownParamete pytest_wrapper = pytest.mark.asyncio(pytest_wrapper) # Create the dual mode wrapper - dual_mode_wrapper = create_dual_mode_wrapper( # pyright: ignore[reportUnknownVariableType] + dual_mode_wrapper = create_dual_mode_wrapper( test_func, mode, max_concurrent_rollouts, max_concurrent_evaluations, pytest_wrapper ) From 5125e5a6a89e8810d746c401aa11f4ad7f74ad7d Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Thu, 25 Sep 2025 14:37:52 -0700 Subject: [PATCH 08/12] take out litellm --- .../pytest/remote_rollout_processor.py | 17 +++--- tests/chinook/langfuse/remote_server.py | 52 +++++++++---------- .../langfuse/test_remote_langfuse_chinook.py | 10 ++-- 3 files changed, 39 insertions(+), 40 deletions(-) diff --git a/eval_protocol/pytest/remote_rollout_processor.py b/eval_protocol/pytest/remote_rollout_processor.py index 1e6b102e..9188a078 100644 --- a/eval_protocol/pytest/remote_rollout_processor.py +++ b/eval_protocol/pytest/remote_rollout_processor.py @@ -4,7 +4,7 @@ import requests -from eval_protocol.models import EvaluationRow +from eval_protocol.models import EvaluationRow, Status from eval_protocol.data_loader.dynamic_data_loader import DynamicDataLoader from .rollout_processor import RolloutProcessor from .types import RolloutProcessorConfig @@ -167,12 +167,15 @@ def _load_data(): output_rows: List[EvaluationRow] = [row for result in results for row in result.rows] - assert len(output_rows) == 1, "Dataloader used for RemoteRolloutProcessor should have exactly one row" - - langfuse_row = output_rows[0] - langfuse_row.input_metadata.completion_params = row.input_metadata.completion_params - - return langfuse_row + if len(output_rows) == 0: # Fallback to original row if no Langfuse data found + row.rollout_status = Status(code=Status.Code.NOT_FOUND, message="No Langfuse data found for rollout") + return row + elif len(output_rows) == 1: # Return the Langfuse row + langfuse_row = output_rows[0] + langfuse_row.input_metadata.completion_params = row.input_metadata.completion_params + return langfuse_row + else: + raise ValueError("RemoteRolloutProcessor's output_data_loader should return exactly one row.") for r in rows: tasks.append(asyncio.create_task(_process_row(r))) diff --git a/tests/chinook/langfuse/remote_server.py b/tests/chinook/langfuse/remote_server.py index 0cdc0afd..0a83c9e0 100644 --- a/tests/chinook/langfuse/remote_server.py +++ b/tests/chinook/langfuse/remote_server.py @@ -5,7 +5,7 @@ import uvicorn from fastapi import FastAPI, HTTPException from pydantic import BaseModel -import requests +from langfuse.openai import openai # pyright: ignore[reportPrivateImportUsage] app = FastAPI() @@ -42,52 +42,50 @@ def init(req: InitRequest): # Persist state _STATE[req.rollout_id] = {"terminated": False} - # Kick off worker thread that runs multi-turn chat via LiteLLM proxy + # Kick off worker thread that runs multi-turn chat via Langfuse OpenAI integration def _worker(): try: - base_url = os.getenv( - "LITELLM_BASE_URL", - "https://litellm-cloud-proxy-prod-644257448872.us-central1.run.app", - ) - url = f"{base_url}/v1/chat/completions" - headers = { - "Authorization": f"Bearer {os.getenv('FIREWORKS_API_KEY', '')}", - "Content-Type": "application/json", - } - - # Prepare metadata payload to attach for Langfuse filtering + # Prepare tags for Langfuse filtering metadata = { - "tags": [ + "langfuse_tags": [ f"invocation_id:{req.metadata.get('invocation_id')}", f"experiment_id:{req.metadata.get('experiment_id')}", f"rollout_id:{req.metadata.get('rollout_id')}", f"run_id:{req.metadata.get('run_id')}", f"row_id:{req.metadata.get('row_id')}", - ], - "invocation_id": req.metadata.get("invocation_id"), - "experiment_id": req.metadata.get("experiment_id"), - "rollout_id": req.metadata.get("rollout_id"), - "run_id": req.metadata.get("run_id"), - "row_id": req.metadata.get("row_id"), + ] } messages = req.messages # Simulate N-1 assistant turns (single-shot or simple echo) for _ in range(max(1, req.num_turns)): - payload = { + completion_kwargs = { "model": req.model, "messages": _clean_messages_for_api(messages), "metadata": metadata, } + if req.tools: - payload["tools"] = req.tools - r = requests.post(url, json=payload, headers=headers, timeout=60) - r.raise_for_status() - data = r.json() - assistant = data.get("choices", [{}])[0].get("message", {}) + completion_kwargs["tools"] = req.tools + + completion = openai.chat.completions.create(**completion_kwargs) + assistant_message = completion.choices[0].message + + # Convert to dict format for next turn + assistant_dict = {"role": "assistant", "content": assistant_message.content} + if assistant_message.tool_calls: + assistant_dict["tool_calls"] = [ + { + "id": tc.id, + "type": tc.type, + "function": {"name": tc.function.name, "arguments": tc.function.arguments}, + } + for tc in assistant_message.tool_calls + ] + # Append assistant for next turn - messages = messages + [assistant] + messages = messages + [assistant_dict] except Exception: # Best-effort; mark as done even on error to unblock polling diff --git a/tests/chinook/langfuse/test_remote_langfuse_chinook.py b/tests/chinook/langfuse/test_remote_langfuse_chinook.py index 28c9b9bb..e5ea0eed 100644 --- a/tests/chinook/langfuse/test_remote_langfuse_chinook.py +++ b/tests/chinook/langfuse/test_remote_langfuse_chinook.py @@ -25,11 +25,7 @@ def check_rollout_coverage(): ROLLOUT_IDS.clear() yield - # Verify we've seen the expected number of rollout_ids after test is done - expected_rollout_count = 3 - assert len(ROLLOUT_IDS) == expected_rollout_count, ( - f"Expected to see {expected_rollout_count} rollout_ids, but only saw {len(ROLLOUT_IDS)}: {ROLLOUT_IDS}" - ) + assert len(ROLLOUT_IDS) == 3, f"Expected to see {ROLLOUT_IDS} rollout_ids, but only saw {ROLLOUT_IDS}" def fetch_langfuse_traces(rollout_id: str) -> List[EvaluationRow]: @@ -95,7 +91,7 @@ def remote_langfuse_data_generator() -> List[EvaluationRow]: @pytest.mark.skipif(os.environ.get("CI") == "true", reason="Only run this test locally (skipped in CI)") -@pytest.mark.parametrize("completion_params", [{"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"}]) +@pytest.mark.parametrize("completion_params", [{"model": "gpt-4o"}]) @evaluation_test( data_loaders=DynamicDataLoader( generators=[remote_langfuse_data_generator], @@ -115,6 +111,8 @@ async def test_remote_rollout_and_fetch_langfuse(row: EvaluationRow) -> Evaluati - fetch traces from Langfuse filtered by metadata via output_data_loader; FAIL if none found """ assert row.messages[0].content == "Hello there! Please say hi back.", "Row should have correct message content" + assert len(row.messages) > 1, "Row should have a response. If this fails, we fellback to the original row." + assert row.execution_metadata.rollout_id in ROLLOUT_IDS, ( f"Row rollout_id {row.execution_metadata.rollout_id} should be in tracked rollout_ids: {ROLLOUT_IDS}" ) From 21db77ce16e81e3bddf0dad2fe0a34f8201172b6 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Thu, 25 Sep 2025 15:33:32 -0700 Subject: [PATCH 09/12] add types --- eval_protocol/__init__.py | 12 +++ .../pytest/remote_rollout_processor.py | 6 -- .../types/remote_rollout_processor.py | 45 ++++++++++ tests/chinook/langfuse/remote_server.py | 89 ++++++------------- .../langfuse/test_remote_langfuse_chinook.py | 64 +++---------- 5 files changed, 96 insertions(+), 120 deletions(-) create mode 100644 eval_protocol/types/remote_rollout_processor.py diff --git a/eval_protocol/__init__.py b/eval_protocol/__init__.py index e554699a..46d8a265 100644 --- a/eval_protocol/__init__.py +++ b/eval_protocol/__init__.py @@ -62,6 +62,13 @@ except ImportError: LangSmithAdapter = None +# Remote server types +from .types.remote_rollout_processor import ( + InitRequest, + RolloutMetadata, + StatusResponse, + create_langfuse_config_tags, +) warnings.filterwarnings("default", category=DeprecationWarning, module="eval_protocol") @@ -110,6 +117,11 @@ # Submodules "rewards", "mcp", + # Remote server types + "InitRequest", + "RolloutMetadata", + "StatusResponse", + "create_langfuse_config_tags", ] from . import _version diff --git a/eval_protocol/pytest/remote_rollout_processor.py b/eval_protocol/pytest/remote_rollout_processor.py index 9188a078..020ea9c5 100644 --- a/eval_protocol/pytest/remote_rollout_processor.py +++ b/eval_protocol/pytest/remote_rollout_processor.py @@ -28,7 +28,6 @@ class RemoteRolloutProcessor(RolloutProcessor): "run_id": str | null, "row_id": str | null }, - "num_turns": int } Returns: {"ok": true} @@ -40,7 +39,6 @@ def __init__( self, *, remote_base_url: Optional[str] = None, - num_turns: int = 2, poll_interval: float = 1.0, timeout_seconds: float = 120.0, output_data_loader: Callable[[str], DynamicDataLoader], @@ -48,7 +46,6 @@ def __init__( # Prefer constructor-provided configuration. These can be overridden via # config.kwargs at call time for backward compatibility. self._remote_base_url = remote_base_url - self._num_turns = num_turns self._poll_interval = poll_interval self._timeout_seconds = timeout_seconds self._output_data_loader = output_data_loader @@ -58,7 +55,6 @@ def __call__(self, rows: List[EvaluationRow], config: RolloutProcessorConfig) -> # Start with constructor values remote_base_url: Optional[str] = self._remote_base_url - num_turns: int = self._num_turns poll_interval: float = self._poll_interval timeout_seconds: float = self._timeout_seconds @@ -66,7 +62,6 @@ def __call__(self, rows: List[EvaluationRow], config: RolloutProcessorConfig) -> if config.kwargs: if remote_base_url is None: remote_base_url = config.kwargs.get("remote_base_url", remote_base_url) - num_turns = int(config.kwargs.get("num_turns", num_turns)) poll_interval = float(config.kwargs.get("poll_interval", poll_interval)) timeout_seconds = float(config.kwargs.get("timeout_seconds", timeout_seconds)) @@ -121,7 +116,6 @@ async def _process_row(row: EvaluationRow) -> EvaluationRow: "messages": clean_messages, "tools": row.tools, "metadata": meta, - "num_turns": num_turns, } # Fire-and-poll diff --git a/eval_protocol/types/remote_rollout_processor.py b/eval_protocol/types/remote_rollout_processor.py new file mode 100644 index 00000000..405692d1 --- /dev/null +++ b/eval_protocol/types/remote_rollout_processor.py @@ -0,0 +1,45 @@ +""" +Request and response models for remote rollout processor servers. +""" + +from typing import Any, Dict, List, Optional +from pydantic import BaseModel, Field +from eval_protocol.models import Message + + +class RolloutMetadata(BaseModel): + """Metadata for rollout execution.""" + + invocation_id: str + experiment_id: str + rollout_id: str + run_id: str + row_id: str + + +class InitRequest(BaseModel): + """Request model for POST /init endpoint.""" + + rollout_id: str + model: str + messages: List[Message] = Field(min_length=1) + tools: Optional[List[Dict[str, Any]]] = None + metadata: RolloutMetadata + + +class StatusResponse(BaseModel): + """Response model for GET /status endpoint.""" + + terminated: bool + + +def create_langfuse_config_tags(init_request: InitRequest) -> List[str]: + """Create Langfuse tags from InitRequest metadata.""" + metadata = init_request.metadata + return [ + f"invocation_id:{metadata.invocation_id}", + f"experiment_id:{metadata.experiment_id}", + f"rollout_id:{metadata.rollout_id}", + f"run_id:{metadata.run_id}", + f"row_id:{metadata.row_id}", + ] diff --git a/tests/chinook/langfuse/remote_server.py b/tests/chinook/langfuse/remote_server.py index 0a83c9e0..4c951008 100644 --- a/tests/chinook/langfuse/remote_server.py +++ b/tests/chinook/langfuse/remote_server.py @@ -1,37 +1,32 @@ import os import threading -from typing import Any, Dict +from typing import Any, Dict, List import uvicorn from fastapi import FastAPI, HTTPException -from pydantic import BaseModel from langfuse.openai import openai # pyright: ignore[reportPrivateImportUsage] - -app = FastAPI() +from eval_protocol.types.remote_rollout_processor import ( + InitRequest, + StatusResponse, + create_langfuse_config_tags, +) +from eval_protocol.models import Message -class InitRequest(BaseModel): - rollout_id: str - model: str - messages: list[dict] - tools: list[dict] | None = None - metadata: dict - num_turns: int = 2 +app = FastAPI() _STATE: Dict[str, Dict[str, Any]] = {} - ALLOWED_MESSAGE_FIELDS = {"role", "content", "tool_calls", "tool_call_id", "name"} -def _clean_messages_for_api(messages: list[dict]) -> list[dict]: +def _clean_messages_for_api(messages: List[Message]) -> list[dict]: cleaned: list[dict] = [] for msg in messages: - if not isinstance(msg, dict): - continue - cm = {k: v for k, v in msg.items() if k in ALLOWED_MESSAGE_FIELDS and v is not None} + msg_dict = msg.model_dump() + cm = {k: v for k, v in msg_dict.items() if k in ALLOWED_MESSAGE_FIELDS and v is not None} # Some providers dislike empty content on assistant messages; keep if present cleaned.append(cm) return cleaned @@ -42,53 +37,25 @@ def init(req: InitRequest): # Persist state _STATE[req.rollout_id] = {"terminated": False} - # Kick off worker thread that runs multi-turn chat via Langfuse OpenAI integration + # Kick off worker thread that does a single-turn chat via Langfuse OpenAI integration def _worker(): try: - # Prepare tags for Langfuse filtering - metadata = { - "langfuse_tags": [ - f"invocation_id:{req.metadata.get('invocation_id')}", - f"experiment_id:{req.metadata.get('experiment_id')}", - f"rollout_id:{req.metadata.get('rollout_id')}", - f"run_id:{req.metadata.get('run_id')}", - f"row_id:{req.metadata.get('row_id')}", - ] + metadata = {"langfuse_tags": create_langfuse_config_tags(req)} + + completion_kwargs = { + "model": req.model, + "messages": _clean_messages_for_api(req.messages), + "metadata": metadata, } - messages = req.messages - - # Simulate N-1 assistant turns (single-shot or simple echo) - for _ in range(max(1, req.num_turns)): - completion_kwargs = { - "model": req.model, - "messages": _clean_messages_for_api(messages), - "metadata": metadata, - } - - if req.tools: - completion_kwargs["tools"] = req.tools - - completion = openai.chat.completions.create(**completion_kwargs) - assistant_message = completion.choices[0].message - - # Convert to dict format for next turn - assistant_dict = {"role": "assistant", "content": assistant_message.content} - if assistant_message.tool_calls: - assistant_dict["tool_calls"] = [ - { - "id": tc.id, - "type": tc.type, - "function": {"name": tc.function.name, "arguments": tc.function.arguments}, - } - for tc in assistant_message.tool_calls - ] - - # Append assistant for next turn - messages = messages + [assistant_dict] - - except Exception: + if req.tools: + completion_kwargs["tools"] = req.tools + + completion = openai.chat.completions.create(**completion_kwargs) + + except Exception as e: # Best-effort; mark as done even on error to unblock polling + print(f"❌ Error in rollout {req.rollout_id}: {e}") pass finally: _STATE[req.rollout_id]["terminated"] = True @@ -98,12 +65,12 @@ def _worker(): return {"ok": True} -@app.get("/status") -def status(rollout_id: str): +@app.get("/status", response_model=StatusResponse) +def status(rollout_id: str) -> StatusResponse: st = _STATE.get(rollout_id) if not st: raise HTTPException(status_code=404, detail="unknown rollout_id") - return {"terminated": bool(st.get("terminated", False))} + return StatusResponse(terminated=bool(st.get("terminated", False))) def main(): diff --git a/tests/chinook/langfuse/test_remote_langfuse_chinook.py b/tests/chinook/langfuse/test_remote_langfuse_chinook.py index e5ea0eed..b82722e0 100644 --- a/tests/chinook/langfuse/test_remote_langfuse_chinook.py +++ b/tests/chinook/langfuse/test_remote_langfuse_chinook.py @@ -1,12 +1,14 @@ +# MANUAL SERVER STARTUP REQUIRED: +# Before running this test, start the remote server manually: +# cd /Users/derekxu/Documents/code/python-sdk +# python -m tests.chinook.langfuse.remote_server +# +# The server should be running on http://127.0.0.1:7077 + import os -import multiprocessing -import time -from datetime import datetime, timedelta from typing import List -import atexit import pytest -import requests from eval_protocol.data_loader.dynamic_data_loader import DynamicDataLoader from eval_protocol.models import EvaluationRow, Message @@ -33,7 +35,7 @@ def fetch_langfuse_traces(rollout_id: str) -> List[EvaluationRow]: ROLLOUT_IDS.add(rollout_id) adapter = create_langfuse_adapter() - return adapter.get_evaluation_rows(tags=[f"rollout_id:{rollout_id}"]) + return adapter.get_evaluation_rows(tags=[f"rollout_id:{rollout_id}"], max_retries=5) def langfuse_output_data_loader(rollout_id: str) -> DynamicDataLoader: @@ -42,51 +44,8 @@ def langfuse_output_data_loader(rollout_id: str) -> DynamicDataLoader: ) -def _start_remote_server(): - # Starts FastAPI server defined in remote_server.py using absolute import - import importlib - - os.environ.setdefault("REMOTE_SERVER_HOST", "127.0.0.1") - os.environ.setdefault("REMOTE_SERVER_PORT", "7077") - mod = importlib.import_module("tests.chinook.langfuse.remote_server") - mod.main() - - -def _ensure_server_running(): - host = os.getenv("REMOTE_SERVER_HOST", "127.0.0.1") - port = int(os.getenv("REMOTE_SERVER_PORT", "7077")) - base_url = f"http://{host}:{port}" - - def _is_up() -> bool: - try: - r = requests.get(f"{base_url}/status", params={"rollout_id": "ping"}, timeout=1.0) - return r.status_code in (200, 404) - except Exception: - return False - - if _is_up(): - return None - - # Launch in a background process - proc = multiprocessing.Process(target=_start_remote_server, daemon=True) - proc.start() - - # Poll for readiness up to 10s - deadline = time.time() + 10 - while time.time() < deadline: - if _is_up(): - break - time.sleep(0.5) - return proc - - def remote_langfuse_data_generator() -> List[EvaluationRow]: - # Ensure server is running BEFORE rollouts start (evaluation_test triggers rollouts before test body) - _SERVER_PROC = _ensure_server_running() - atexit.register(lambda: (_SERVER_PROC and _SERVER_PROC.is_alive() and _SERVER_PROC.terminate())) - - # Minimal single-user-turn message to trigger a response - row = EvaluationRow(messages=[Message(role="user", content="Hello there! Please say hi back.")]) + row = EvaluationRow(messages=[Message(role="user", content="What is the capital of France?")]) return [row, row, row] @@ -98,7 +57,6 @@ def remote_langfuse_data_generator() -> List[EvaluationRow]: ), rollout_processor=RemoteRolloutProcessor( remote_base_url="http://127.0.0.1:7077", - num_turns=2, timeout_seconds=30, output_data_loader=langfuse_output_data_loader, ), @@ -106,11 +64,11 @@ def remote_langfuse_data_generator() -> List[EvaluationRow]: async def test_remote_rollout_and_fetch_langfuse(row: EvaluationRow) -> EvaluationRow: """ End-to-end test: - - remote server started at import time + - REQUIRES MANUAL SERVER STARTUP: python -m tests.chinook.langfuse.remote_server - trigger remote rollout via RemoteRolloutProcessor (calls init/status) - fetch traces from Langfuse filtered by metadata via output_data_loader; FAIL if none found """ - assert row.messages[0].content == "Hello there! Please say hi back.", "Row should have correct message content" + assert row.messages[0].content == "What is the capital of France?", "Row should have correct message content" assert len(row.messages) > 1, "Row should have a response. If this fails, we fellback to the original row." assert row.execution_metadata.rollout_id in ROLLOUT_IDS, ( From aa8ee84d3d28e35d86bd9a463527ea0acc1996a8 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Thu, 25 Sep 2025 15:37:52 -0700 Subject: [PATCH 10/12] clean up --- tests/chinook/langfuse/remote_server.py | 1 - tests/chinook/langfuse/test_remote_langfuse_chinook.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/chinook/langfuse/remote_server.py b/tests/chinook/langfuse/remote_server.py index 4c951008..2add76c1 100644 --- a/tests/chinook/langfuse/remote_server.py +++ b/tests/chinook/langfuse/remote_server.py @@ -62,7 +62,6 @@ def _worker(): t = threading.Thread(target=_worker, daemon=True) t.start() - return {"ok": True} @app.get("/status", response_model=StatusResponse) diff --git a/tests/chinook/langfuse/test_remote_langfuse_chinook.py b/tests/chinook/langfuse/test_remote_langfuse_chinook.py index b82722e0..cdcd68fb 100644 --- a/tests/chinook/langfuse/test_remote_langfuse_chinook.py +++ b/tests/chinook/langfuse/test_remote_langfuse_chinook.py @@ -44,7 +44,7 @@ def langfuse_output_data_loader(rollout_id: str) -> DynamicDataLoader: ) -def remote_langfuse_data_generator() -> List[EvaluationRow]: +def rows() -> List[EvaluationRow]: row = EvaluationRow(messages=[Message(role="user", content="What is the capital of France?")]) return [row, row, row] @@ -53,7 +53,7 @@ def remote_langfuse_data_generator() -> List[EvaluationRow]: @pytest.mark.parametrize("completion_params", [{"model": "gpt-4o"}]) @evaluation_test( data_loaders=DynamicDataLoader( - generators=[remote_langfuse_data_generator], + generators=[rows], ), rollout_processor=RemoteRolloutProcessor( remote_base_url="http://127.0.0.1:7077", From 4de4d9e5665654743dadfad94ad956db81152aa4 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Thu, 25 Sep 2025 15:39:27 -0700 Subject: [PATCH 11/12] add typescript simple example (#218) * add typescript simple example * publish npm package for eval protocol (#219) * publish typescript SDK * add createLangfuseConfigTags function and update version to 0.1.1 * use eval-protocol npm dependency * refactor statusInfoSchema to use a record type and update version to 0.1.2 * add eval_metadata to langfuse_row in RemoteRolloutProcessor * Refactor data generator function name and update eval-protocol version to 0.1.2 * done --- .../pytest/remote_rollout_processor.py | 1 + ...test_remote_langfuse_chinook_typescript.py | 75 + .../langfuse/typescript-server/.gitignore | 1 + .../langfuse/typescript-server/README.md | 151 ++ .../chinook/langfuse/typescript-server/env.ts | 31 + .../typescript-server/instrumentation.ts | 15 + .../langfuse/typescript-server/package.json | 38 + .../langfuse/typescript-server/pnpm-lock.yaml | 1828 +++++++++++++++++ .../langfuse/typescript-server/server.ts | 203 ++ .../langfuse/typescript-server/tsconfig.json | 30 + .../use-bun-instead-of-node-vite-npm-pnpm.mdc | 111 + typescript/.gitignore | 36 + typescript/README.md | 13 + typescript/bun.lock | 37 + typescript/index.ts | 104 + typescript/package.json | 16 + typescript/tsconfig.json | 29 + 17 files changed, 2719 insertions(+) create mode 100644 tests/chinook/langfuse/test_remote_langfuse_chinook_typescript.py create mode 100644 tests/chinook/langfuse/typescript-server/.gitignore create mode 100644 tests/chinook/langfuse/typescript-server/README.md create mode 100644 tests/chinook/langfuse/typescript-server/env.ts create mode 100644 tests/chinook/langfuse/typescript-server/instrumentation.ts create mode 100644 tests/chinook/langfuse/typescript-server/package.json create mode 100644 tests/chinook/langfuse/typescript-server/pnpm-lock.yaml create mode 100644 tests/chinook/langfuse/typescript-server/server.ts create mode 100644 tests/chinook/langfuse/typescript-server/tsconfig.json create mode 100644 typescript/.cursor/rules/use-bun-instead-of-node-vite-npm-pnpm.mdc create mode 100644 typescript/.gitignore create mode 100644 typescript/README.md create mode 100644 typescript/bun.lock create mode 100644 typescript/index.ts create mode 100644 typescript/package.json create mode 100644 typescript/tsconfig.json diff --git a/eval_protocol/pytest/remote_rollout_processor.py b/eval_protocol/pytest/remote_rollout_processor.py index 020ea9c5..5efa793e 100644 --- a/eval_protocol/pytest/remote_rollout_processor.py +++ b/eval_protocol/pytest/remote_rollout_processor.py @@ -167,6 +167,7 @@ def _load_data(): elif len(output_rows) == 1: # Return the Langfuse row langfuse_row = output_rows[0] langfuse_row.input_metadata.completion_params = row.input_metadata.completion_params + langfuse_row.eval_metadata = row.eval_metadata return langfuse_row else: raise ValueError("RemoteRolloutProcessor's output_data_loader should return exactly one row.") diff --git a/tests/chinook/langfuse/test_remote_langfuse_chinook_typescript.py b/tests/chinook/langfuse/test_remote_langfuse_chinook_typescript.py new file mode 100644 index 00000000..bb3b7bf5 --- /dev/null +++ b/tests/chinook/langfuse/test_remote_langfuse_chinook_typescript.py @@ -0,0 +1,75 @@ +import os +from typing import List +import atexit + +import pytest + +from eval_protocol.data_loader.dynamic_data_loader import DynamicDataLoader +from eval_protocol.models import EvaluationRow, Message +from eval_protocol.pytest import evaluation_test +from eval_protocol.pytest.remote_rollout_processor import RemoteRolloutProcessor +from eval_protocol.adapters.langfuse import create_langfuse_adapter +from eval_protocol.quickstart.utils import filter_longest_conversation + +ROLLOUT_IDS = set() + + +@pytest.fixture(autouse=True) +def check_rollout_coverage(): + """Ensure we processed all expected rollout_ids""" + global ROLLOUT_IDS + ROLLOUT_IDS.clear() + yield + + # Verify we've seen the expected number of rollout_ids after test is done + expected_rollout_count = 3 + assert len(ROLLOUT_IDS) == expected_rollout_count, ( + f"Expected to see {expected_rollout_count} rollout_ids, but only saw {len(ROLLOUT_IDS)}: {ROLLOUT_IDS}" + ) + + +def fetch_langfuse_traces(rollout_id: str) -> List[EvaluationRow]: + global ROLLOUT_IDS # Track all rollout_ids we've seen + ROLLOUT_IDS.add(rollout_id) + + adapter = create_langfuse_adapter() + return adapter.get_evaluation_rows(tags=[f"rollout_id:{rollout_id}"]) + + +def langfuse_output_data_loader(rollout_id: str) -> DynamicDataLoader: + return DynamicDataLoader( + generators=[lambda: fetch_langfuse_traces(rollout_id)], preprocess_fn=filter_longest_conversation + ) + + +def rows() -> List[EvaluationRow]: + # Minimal single-user-turn message to trigger a response + row = EvaluationRow(messages=[Message(role="user", content="What is the capital of France?")]) + return [row, row, row] + + +@pytest.mark.skipif(os.environ.get("CI") == "true", reason="Only run this test locally (skipped in CI)") +@pytest.mark.parametrize("completion_params", [{"model": "gpt-5"}]) +@evaluation_test( + data_loaders=DynamicDataLoader( + generators=[rows], + ), + rollout_processor=RemoteRolloutProcessor( + remote_base_url="http://127.0.0.1:3000", + timeout_seconds=30, + output_data_loader=langfuse_output_data_loader, + ), +) +async def test_remote_rollout_and_fetch_langfuse_typescript(row: EvaluationRow) -> EvaluationRow: + """ + End-to-end test: + - remote server started at import time + - trigger remote rollout via RemoteRolloutProcessor (calls init/status) + - fetch traces from Langfuse filtered by metadata via output_data_loader; FAIL if none found + """ + assert row.messages[0].content == "What is the capital of France?", "Row should have correct message content" + assert row.execution_metadata.rollout_id in ROLLOUT_IDS, ( + f"Row rollout_id {row.execution_metadata.rollout_id} should be in tracked rollout_ids: {ROLLOUT_IDS}" + ) + + return row diff --git a/tests/chinook/langfuse/typescript-server/.gitignore b/tests/chinook/langfuse/typescript-server/.gitignore new file mode 100644 index 00000000..803352ef --- /dev/null +++ b/tests/chinook/langfuse/typescript-server/.gitignore @@ -0,0 +1 @@ +!package.json diff --git a/tests/chinook/langfuse/typescript-server/README.md b/tests/chinook/langfuse/typescript-server/README.md new file mode 100644 index 00000000..434b82a8 --- /dev/null +++ b/tests/chinook/langfuse/typescript-server/README.md @@ -0,0 +1,151 @@ +# TypeScript Express Server for Remote Rollout Processor + +This TypeScript Express server implements the Remote Rollout Processor API contract as specified in the Eval Protocol documentation. + +## Features + +- **POST /init** - Initialize a rollout with validation using Zod schemas +- **GET /status** - Check the status of a rollout +- **GET /health** - Health check endpoint +- Full TypeScript support with strict type checking +- Request validation using Zod +- Error handling and logging +- CORS and security middleware + +## Installation + +```bash +pnpm install +``` + +## Development + +```bash +# Run in development mode with hot reload +pnpm run dev + +# Build for production +pnpm run build + +# Run production build +pnpm run start +``` + +## API Endpoints + +### POST /init + +Initialize a new rollout. + +**Request Body:** +```json +{ + "rollout_id": "rll_ijkl", + "model": "openai/gpt-4o", + "messages": [ + { "role": "user", "content": "Hello" } + ], + "tools": null, + "metadata": { + "invocation_id": "ivk_abcd", + "experiment_id": "exp_efgh", + "rollout_id": "rll_ijkl", + "run_id": "run_123", + "row_id": "row_123" + }, + "num_turns": 2 +} +``` + +**Response:** +```json +{ + "status": "accepted", + "rollout_id": "rll_ijkl", + "message": "Rollout initialized successfully" +} +``` + +### GET /status + +Check the status of a rollout. + +**Query Parameters:** +- `rollout_id` (required): The ID of the rollout to check + +**Response (Running):** +```json +{ + "terminated": false +} +``` + +**Response (Completed):** +```json +{ + "terminated": true, + "info": { + "reason": "completed", + "ended_at": "2025-01-24T12:34:56Z", + "num_turns": 2 + } +} +``` + +### GET /health + +Health check endpoint. + +**Response:** +```json +{ + "status": "healthy", + "timestamp": "2025-01-24T12:34:56Z" +} +``` + +## Usage with Eval Protocol + +This server can be used with the Eval Protocol's `RemoteRolloutProcessor`: + +```python +from eval_protocol import ( + evaluation_test, + DynamicDataLoader, + RemoteRolloutProcessor, +) + +@pytest.mark.parametrize("completion_params", [{"model": "openai/gpt-4o"}]) +@evaluation_test( + data_loaders=[InlineDataLoader(messages=[[Message(role="user", content="Hello")]])], + rollout_processor=RemoteRolloutProcessor( + remote_base_url="http://localhost:3000", + output_data_loader=create_output_data_loader, + ) +) +def test_remote_http(row: EvaluationRow) -> EvaluationRow: + return row +``` + +## Configuration + +The server runs on port 3000 by default. You can change this by setting the `PORT` environment variable: + +```bash +PORT=8080 pnpm run dev +``` + +## Error Handling + +The server includes comprehensive error handling: +- Request validation errors return 400 with detailed error messages +- Missing rollout IDs return 404 +- Server errors return 500 with error details +- All errors are logged to the console + +## Development Notes + +- The server simulates async rollout execution with a 1-second delay per turn +- Rollout states are stored in memory (not persistent across restarts) +- All requests are validated using Zod schemas +- TypeScript strict mode is enabled for better type safety diff --git a/tests/chinook/langfuse/typescript-server/env.ts b/tests/chinook/langfuse/typescript-server/env.ts new file mode 100644 index 00000000..202f673d --- /dev/null +++ b/tests/chinook/langfuse/typescript-server/env.ts @@ -0,0 +1,31 @@ +import * as dotenv from "dotenv"; + +// Helper to resolve the root of the repo (for .env loading, etc.) +import path from "path"; +import { fileURLToPath } from "url"; +import fs from "fs"; + +// Returns the absolute path to the root of the repo (where .git or .env is found) +function getRepoRoot(): string { + // __dirname is not available in ES modules, so use fileURLToPath + const currentDir = path.dirname(fileURLToPath(import.meta.url)); + let dir = currentDir; + while (true) { + if ( + fs.existsSync(path.join(dir, ".git")) || + fs.existsSync(path.join(dir, ".env")) + ) { + return dir; + } + const parent = path.dirname(dir); + if (parent === dir) break; + dir = parent; + } + // Fallback to current directory if not found + return currentDir; +} + +export const REPO_ROOT = getRepoRoot(); + +// Load environment variables from .env at the root of the repo +dotenv.config({ path: path.join(REPO_ROOT, ".env") }); diff --git a/tests/chinook/langfuse/typescript-server/instrumentation.ts b/tests/chinook/langfuse/typescript-server/instrumentation.ts new file mode 100644 index 00000000..5918ac32 --- /dev/null +++ b/tests/chinook/langfuse/typescript-server/instrumentation.ts @@ -0,0 +1,15 @@ +import { NodeSDK } from "@opentelemetry/sdk-node"; +import { LangfuseSpanProcessor } from "@langfuse/otel"; +import "./env"; + +const sdk = new NodeSDK({ + spanProcessors: [ + new LangfuseSpanProcessor({ + publicKey: process.env["LANGFUSE_PUBLIC_KEY"]!, + secretKey: process.env["LANGFUSE_SECRET_KEY"]!, + baseUrl: process.env["LANGFUSE_HOST"]!, + }), + ], +}); + +sdk.start(); diff --git a/tests/chinook/langfuse/typescript-server/package.json b/tests/chinook/langfuse/typescript-server/package.json new file mode 100644 index 00000000..7e64fee5 --- /dev/null +++ b/tests/chinook/langfuse/typescript-server/package.json @@ -0,0 +1,38 @@ +{ + "name": "typescript-server", + "version": "1.0.0", + "description": "TypeScript Express server for Remote Rollout Processor", + "main": "dist/server.js", + "type": "module", + "scripts": { + "build": "tsc", + "start": "node dist/server.js", + "dev": "tsx server.ts", + "test": "node test-server.js", + "test:server": "node test-server.js" + }, + "keywords": [], + "author": "", + "license": "ISC", + "packageManager": "pnpm@10.6.2", + "dependencies": { + "@langfuse/openai": "^4.2.0", + "@langfuse/otel": "^4.2.0", + "@langfuse/tracing": "^4.2.0", + "@opentelemetry/sdk-node": "^0.205.0", + "cors": "^2.8.5", + "dotenv": "^17.2.2", + "eval-protocol": "^0.1.2", + "express": "^5.1.0", + "helmet": "^7.1.0", + "openai": "^5.23.0" + }, + "devDependencies": { + "@types/cors": "^2.8.17", + "@types/express": "^4.17.23", + "@types/node": "^20.10.0", + "tsx": "^4.6.0", + "typescript": "^5.9.2", + "zod": "^3.22.4" + } +} diff --git a/tests/chinook/langfuse/typescript-server/pnpm-lock.yaml b/tests/chinook/langfuse/typescript-server/pnpm-lock.yaml new file mode 100644 index 00000000..a105871d --- /dev/null +++ b/tests/chinook/langfuse/typescript-server/pnpm-lock.yaml @@ -0,0 +1,1828 @@ +lockfileVersion: '9.0' + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false + +importers: + + .: + dependencies: + '@langfuse/openai': + specifier: ^4.2.0 + version: 4.2.0(@opentelemetry/api@1.9.0) + '@langfuse/otel': + specifier: ^4.2.0 + version: 4.2.0(@opentelemetry/api@1.9.0)(@opentelemetry/core@2.1.0(@opentelemetry/api@1.9.0))(@opentelemetry/exporter-trace-otlp-http@0.205.0(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@2.1.0(@opentelemetry/api@1.9.0)) + '@langfuse/tracing': + specifier: ^4.2.0 + version: 4.2.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-node': + specifier: ^0.205.0 + version: 0.205.0(@opentelemetry/api@1.9.0) + cors: + specifier: ^2.8.5 + version: 2.8.5 + dotenv: + specifier: ^17.2.2 + version: 17.2.2 + eval-protocol: + specifier: ^0.1.2 + version: 0.1.2(typescript@5.9.2) + express: + specifier: ^5.1.0 + version: 5.1.0 + helmet: + specifier: ^7.1.0 + version: 7.2.0 + openai: + specifier: ^5.23.0 + version: 5.23.0(zod@3.25.76) + devDependencies: + '@types/cors': + specifier: ^2.8.17 + version: 2.8.19 + '@types/express': + specifier: ^4.17.23 + version: 4.17.23 + '@types/node': + specifier: ^20.10.0 + version: 20.19.17 + tsx: + specifier: ^4.6.0 + version: 4.20.5 + typescript: + specifier: ^5.9.2 + version: 5.9.2 + zod: + specifier: ^3.22.4 + version: 3.25.76 + +packages: + + '@esbuild/aix-ppc64@0.25.10': + resolution: {integrity: sha512-0NFWnA+7l41irNuaSVlLfgNT12caWJVLzp5eAVhZ0z1qpxbockccEt3s+149rE64VUI3Ml2zt8Nv5JVc4QXTsw==} + engines: {node: '>=18'} + cpu: [ppc64] + os: [aix] + + '@esbuild/android-arm64@0.25.10': + resolution: {integrity: sha512-LSQa7eDahypv/VO6WKohZGPSJDq5OVOo3UoFR1E4t4Gj1W7zEQMUhI+lo81H+DtB+kP+tDgBp+M4oNCwp6kffg==} + engines: {node: '>=18'} + cpu: [arm64] + os: [android] + + '@esbuild/android-arm@0.25.10': + resolution: {integrity: sha512-dQAxF1dW1C3zpeCDc5KqIYuZ1tgAdRXNoZP7vkBIRtKZPYe2xVr/d3SkirklCHudW1B45tGiUlz2pUWDfbDD4w==} + engines: {node: '>=18'} + cpu: [arm] + os: [android] + + '@esbuild/android-x64@0.25.10': + resolution: {integrity: sha512-MiC9CWdPrfhibcXwr39p9ha1x0lZJ9KaVfvzA0Wxwz9ETX4v5CHfF09bx935nHlhi+MxhA63dKRRQLiVgSUtEg==} + engines: {node: '>=18'} + cpu: [x64] + os: [android] + + '@esbuild/darwin-arm64@0.25.10': + resolution: {integrity: sha512-JC74bdXcQEpW9KkV326WpZZjLguSZ3DfS8wrrvPMHgQOIEIG/sPXEN/V8IssoJhbefLRcRqw6RQH2NnpdprtMA==} + engines: {node: '>=18'} + cpu: [arm64] + os: [darwin] + + '@esbuild/darwin-x64@0.25.10': + resolution: {integrity: sha512-tguWg1olF6DGqzws97pKZ8G2L7Ig1vjDmGTwcTuYHbuU6TTjJe5FXbgs5C1BBzHbJ2bo1m3WkQDbWO2PvamRcg==} + engines: {node: '>=18'} + cpu: [x64] + os: [darwin] + + '@esbuild/freebsd-arm64@0.25.10': + resolution: {integrity: sha512-3ZioSQSg1HT2N05YxeJWYR+Libe3bREVSdWhEEgExWaDtyFbbXWb49QgPvFH8u03vUPX10JhJPcz7s9t9+boWg==} + engines: {node: '>=18'} + cpu: [arm64] + os: [freebsd] + + '@esbuild/freebsd-x64@0.25.10': + resolution: {integrity: sha512-LLgJfHJk014Aa4anGDbh8bmI5Lk+QidDmGzuC2D+vP7mv/GeSN+H39zOf7pN5N8p059FcOfs2bVlrRr4SK9WxA==} + engines: {node: '>=18'} + cpu: [x64] + os: [freebsd] + + '@esbuild/linux-arm64@0.25.10': + resolution: {integrity: sha512-5luJWN6YKBsawd5f9i4+c+geYiVEw20FVW5x0v1kEMWNq8UctFjDiMATBxLvmmHA4bf7F6hTRaJgtghFr9iziQ==} + engines: {node: '>=18'} + cpu: [arm64] + os: [linux] + + '@esbuild/linux-arm@0.25.10': + resolution: {integrity: sha512-oR31GtBTFYCqEBALI9r6WxoU/ZofZl962pouZRTEYECvNF/dtXKku8YXcJkhgK/beU+zedXfIzHijSRapJY3vg==} + engines: {node: '>=18'} + cpu: [arm] + os: [linux] + + '@esbuild/linux-ia32@0.25.10': + resolution: {integrity: sha512-NrSCx2Kim3EnnWgS4Txn0QGt0Xipoumb6z6sUtl5bOEZIVKhzfyp/Lyw4C1DIYvzeW/5mWYPBFJU3a/8Yr75DQ==} + engines: {node: '>=18'} + cpu: [ia32] + os: [linux] + + '@esbuild/linux-loong64@0.25.10': + resolution: {integrity: sha512-xoSphrd4AZda8+rUDDfD9J6FUMjrkTz8itpTITM4/xgerAZZcFW7Dv+sun7333IfKxGG8gAq+3NbfEMJfiY+Eg==} + engines: {node: '>=18'} + cpu: [loong64] + os: [linux] + + '@esbuild/linux-mips64el@0.25.10': + resolution: {integrity: sha512-ab6eiuCwoMmYDyTnyptoKkVS3k8fy/1Uvq7Dj5czXI6DF2GqD2ToInBI0SHOp5/X1BdZ26RKc5+qjQNGRBelRA==} + engines: {node: '>=18'} + cpu: [mips64el] + os: [linux] + + '@esbuild/linux-ppc64@0.25.10': + resolution: {integrity: sha512-NLinzzOgZQsGpsTkEbdJTCanwA5/wozN9dSgEl12haXJBzMTpssebuXR42bthOF3z7zXFWH1AmvWunUCkBE4EA==} + engines: {node: '>=18'} + cpu: [ppc64] + os: [linux] + + '@esbuild/linux-riscv64@0.25.10': + resolution: {integrity: sha512-FE557XdZDrtX8NMIeA8LBJX3dC2M8VGXwfrQWU7LB5SLOajfJIxmSdyL/gU1m64Zs9CBKvm4UAuBp5aJ8OgnrA==} + engines: {node: '>=18'} + cpu: [riscv64] + os: [linux] + + '@esbuild/linux-s390x@0.25.10': + resolution: {integrity: sha512-3BBSbgzuB9ajLoVZk0mGu+EHlBwkusRmeNYdqmznmMc9zGASFjSsxgkNsqmXugpPk00gJ0JNKh/97nxmjctdew==} + engines: {node: '>=18'} + cpu: [s390x] + os: [linux] + + '@esbuild/linux-x64@0.25.10': + resolution: {integrity: sha512-QSX81KhFoZGwenVyPoberggdW1nrQZSvfVDAIUXr3WqLRZGZqWk/P4T8p2SP+de2Sr5HPcvjhcJzEiulKgnxtA==} + engines: {node: '>=18'} + cpu: [x64] + os: [linux] + + '@esbuild/netbsd-arm64@0.25.10': + resolution: {integrity: sha512-AKQM3gfYfSW8XRk8DdMCzaLUFB15dTrZfnX8WXQoOUpUBQ+NaAFCP1kPS/ykbbGYz7rxn0WS48/81l9hFl3u4A==} + engines: {node: '>=18'} + cpu: [arm64] + os: [netbsd] + + '@esbuild/netbsd-x64@0.25.10': + resolution: {integrity: sha512-7RTytDPGU6fek/hWuN9qQpeGPBZFfB4zZgcz2VK2Z5VpdUxEI8JKYsg3JfO0n/Z1E/6l05n0unDCNc4HnhQGig==} + engines: {node: '>=18'} + cpu: [x64] + os: [netbsd] + + '@esbuild/openbsd-arm64@0.25.10': + resolution: {integrity: sha512-5Se0VM9Wtq797YFn+dLimf2Zx6McttsH2olUBsDml+lm0GOCRVebRWUvDtkY4BWYv/3NgzS8b/UM3jQNh5hYyw==} + engines: {node: '>=18'} + cpu: [arm64] + os: [openbsd] + + '@esbuild/openbsd-x64@0.25.10': + resolution: {integrity: sha512-XkA4frq1TLj4bEMB+2HnI0+4RnjbuGZfet2gs/LNs5Hc7D89ZQBHQ0gL2ND6Lzu1+QVkjp3x1gIcPKzRNP8bXw==} + engines: {node: '>=18'} + cpu: [x64] + os: [openbsd] + + '@esbuild/openharmony-arm64@0.25.10': + resolution: {integrity: sha512-AVTSBhTX8Y/Fz6OmIVBip9tJzZEUcY8WLh7I59+upa5/GPhh2/aM6bvOMQySspnCCHvFi79kMtdJS1w0DXAeag==} + engines: {node: '>=18'} + cpu: [arm64] + os: [openharmony] + + '@esbuild/sunos-x64@0.25.10': + resolution: {integrity: sha512-fswk3XT0Uf2pGJmOpDB7yknqhVkJQkAQOcW/ccVOtfx05LkbWOaRAtn5SaqXypeKQra1QaEa841PgrSL9ubSPQ==} + engines: {node: '>=18'} + cpu: [x64] + os: [sunos] + + '@esbuild/win32-arm64@0.25.10': + resolution: {integrity: sha512-ah+9b59KDTSfpaCg6VdJoOQvKjI33nTaQr4UluQwW7aEwZQsbMCfTmfEO4VyewOxx4RaDT/xCy9ra2GPWmO7Kw==} + engines: {node: '>=18'} + cpu: [arm64] + os: [win32] + + '@esbuild/win32-ia32@0.25.10': + resolution: {integrity: sha512-QHPDbKkrGO8/cz9LKVnJU22HOi4pxZnZhhA2HYHez5Pz4JeffhDjf85E57Oyco163GnzNCVkZK0b/n4Y0UHcSw==} + engines: {node: '>=18'} + cpu: [ia32] + os: [win32] + + '@esbuild/win32-x64@0.25.10': + resolution: {integrity: sha512-9KpxSVFCu0iK1owoez6aC/s/EdUQLDN3adTxGCqxMVhrPDj6bt5dbrHDXUuq+Bs2vATFBBrQS5vdQ/Ed2P+nbw==} + engines: {node: '>=18'} + cpu: [x64] + os: [win32] + + '@grpc/grpc-js@1.14.0': + resolution: {integrity: sha512-N8Jx6PaYzcTRNzirReJCtADVoq4z7+1KQ4E70jTg/koQiMoUSN1kbNjPOqpPbhMFhfU1/l7ixspPl8dNY+FoUg==} + engines: {node: '>=12.10.0'} + + '@grpc/proto-loader@0.8.0': + resolution: {integrity: sha512-rc1hOQtjIWGxcxpb9aHAfLpIctjEnsDehj0DAiVfBlmT84uvR0uUtN2hEi/ecvWVjXUGf5qPF4qEgiLOx1YIMQ==} + engines: {node: '>=6'} + hasBin: true + + '@js-sdsl/ordered-map@4.4.2': + resolution: {integrity: sha512-iUKgm52T8HOE/makSxjqoWhe95ZJA1/G1sYsGev2JDKUSS14KAgg1LHb+Ba+IPow0xflbnSkOsZcO08C7w1gYw==} + + '@langfuse/core@4.2.0': + resolution: {integrity: sha512-QARHC8Xz3ZyFrIQtbc9L34XQo5yWwpTBvzqiXeEw/LScigG9gyUf3XaykVbJ8Er0O7cggHm9wBXw3POMxDih/w==} + + '@langfuse/openai@4.2.0': + resolution: {integrity: sha512-JeC0bktTrBqCjFCrIXhXMLjI5eYYO6v7FFcYxpy1KUdqTT+7e6B0jqk739FIA3wqaSLW0IJBa7nGQwFDuIaHUw==} + + '@langfuse/otel@4.2.0': + resolution: {integrity: sha512-SRso7L/GdKJ1MjOdiCwctJVm4skEl4XLYipa4kQsk5mhfxU/1WPO4FwFocc/IRE7PxkMYVmkKOAB2za7ROVgTw==} + engines: {node: '>=20'} + peerDependencies: + '@opentelemetry/api': ^1.9.0 + '@opentelemetry/core': ^2.0.1 + '@opentelemetry/exporter-trace-otlp-http': '>=0.202.0 <1.0.0' + '@opentelemetry/sdk-trace-base': ^2.0.1 + + '@langfuse/tracing@4.2.0': + resolution: {integrity: sha512-0WiqupzU2OoItnlPBFNZa6ODOKxPtD3f46c+G9RaajOELX+lfToDMvVWjmajuZdHbm5BE6BYidbGJtFfLNiyTA==} + engines: {node: '>=20'} + peerDependencies: + '@opentelemetry/api': ^1.9.0 + + '@opentelemetry/api-logs@0.205.0': + resolution: {integrity: sha512-wBlPk1nFB37Hsm+3Qy73yQSobVn28F4isnWIBvKpd5IUH/eat8bwcL02H9yzmHyyPmukeccSl2mbN5sDQZYnPg==} + engines: {node: '>=8.0.0'} + + '@opentelemetry/api@1.9.0': + resolution: {integrity: sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==} + engines: {node: '>=8.0.0'} + + '@opentelemetry/context-async-hooks@2.1.0': + resolution: {integrity: sha512-zOyetmZppnwTyPrt4S7jMfXiSX9yyfF0hxlA8B5oo2TtKl+/RGCy7fi4DrBfIf3lCPrkKsRBWZZD7RFojK7FDg==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': '>=1.0.0 <1.10.0' + + '@opentelemetry/core@2.1.0': + resolution: {integrity: sha512-RMEtHsxJs/GiHHxYT58IY57UXAQTuUnZVco6ymDEqTNlJKTimM4qPUPVe8InNFyBjhHBEAx4k3Q8LtNayBsbUQ==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': '>=1.0.0 <1.10.0' + + '@opentelemetry/exporter-logs-otlp-grpc@0.205.0': + resolution: {integrity: sha512-jQlw7OHbqZ8zPt+pOrW2KGN7T55P50e3NXBMr4ckPOF+DWDwSy4W7mkG09GpYWlQAQ5C9BXg5gfUlv5ldTgWsw==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/exporter-logs-otlp-http@0.205.0': + resolution: {integrity: sha512-5JteMyVWiro4ghF0tHQjfE6OJcF7UBUcoEqX3UIQ5jutKP1H+fxFdyhqjjpmeHMFxzOHaYuLlNR1Bn7FOjGyJg==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/exporter-logs-otlp-proto@0.205.0': + resolution: {integrity: sha512-q3VS9wS+lpZ01txKxiDGBtBpTNge3YhbVEFDgem9ZQR9eI3EZ68+9tVZH9zJcSxI37nZPJ6lEEZO58yEjYZsVA==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/exporter-metrics-otlp-grpc@0.205.0': + resolution: {integrity: sha512-1Vxlo4lUwqSKYX+phFkXHKYR3DolFHxCku6lVMP1H8sVE3oj4wwmwxMzDsJ7zF+sXd8M0FCr+ckK4SnNNKkV+w==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/exporter-metrics-otlp-http@0.205.0': + resolution: {integrity: sha512-fFxNQ/HbbpLmh1pgU6HUVbFD1kNIjrkoluoKJkh88+gnmpFD92kMQ8WFNjPnSbjg2mNVnEkeKXgCYEowNW+p1w==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/exporter-metrics-otlp-proto@0.205.0': + resolution: {integrity: sha512-qIbNnedw9QfFjwpx4NQvdgjK3j3R2kWH/2T+7WXAm1IfMFe9fwatYxE61i7li4CIJKf8HgUC3GS8Du0C3D+AuQ==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/exporter-prometheus@0.205.0': + resolution: {integrity: sha512-xsot/Qm9VLDTag4GEwAunD1XR1U8eBHTLAgO7IZNo2JuD/c/vL7xmDP7mQIUr6Lk3gtj/yGGIR2h3vhTeVzv4w==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/exporter-trace-otlp-grpc@0.205.0': + resolution: {integrity: sha512-ZBksUk84CcQOuDJB65yu5A4PORkC4qEsskNwCrPZxDLeWjPOFZNSWt0E0jQxKCY8PskLhjNXJYo12YaqsYvGFA==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/exporter-trace-otlp-http@0.205.0': + resolution: {integrity: sha512-vr2bwwPCSc9u7rbKc74jR+DXFvyMFQo9o5zs+H/fgbK672Whw/1izUKVf+xfWOdJOvuwTnfWxy+VAY+4TSo74Q==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/exporter-trace-otlp-proto@0.205.0': + resolution: {integrity: sha512-bGtFzqiENO2GpJk988mOBMe0MfeNpTQjbLm/LBijas6VRyEDQarUzdBHpFlu89A25k1+BCntdWGsWTa9Ai4FyA==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/exporter-zipkin@2.1.0': + resolution: {integrity: sha512-0mEI0VDZrrX9t5RE1FhAyGz+jAGt96HSuXu73leswtY3L5YZD11gtcpARY2KAx/s6Z2+rj5Mhj566JsI2C7mfA==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.0.0 + + '@opentelemetry/instrumentation@0.205.0': + resolution: {integrity: sha512-cgvm7tvQdu9Qo7VurJP84wJ7ZV9F6WqDDGZpUc6rUEXwjV7/bXWs0kaYp9v+1Vh1+3TZCD3i6j/lUBcPhu8NhA==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/otlp-exporter-base@0.205.0': + resolution: {integrity: sha512-2MN0C1IiKyo34M6NZzD6P9Nv9Dfuz3OJ3rkZwzFmF6xzjDfqqCTatc9v1EpNfaP55iDOCLHFyYNCgs61FFgtUQ==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/otlp-grpc-exporter-base@0.205.0': + resolution: {integrity: sha512-AeuLfrciGYffqsp4EUTdYYc6Ee2BQS+hr08mHZk1C524SFWx0WnfcTnV0NFXbVURUNU6DZu1DhS89zRRrcx/hg==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/otlp-transformer@0.205.0': + resolution: {integrity: sha512-KmObgqPtk9k/XTlWPJHdMbGCylRAmMJNXIRh6VYJmvlRDMfe+DonH41G7eenG8t4FXn3fxOGh14o/WiMRR6vPg==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/propagator-b3@2.1.0': + resolution: {integrity: sha512-yOdHmFseIChYanddMMz0mJIFQHyjwbNhoxc65fEAA8yanxcBPwoFDoh1+WBUWAO/Z0NRgk+k87d+aFIzAZhcBw==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': '>=1.0.0 <1.10.0' + + '@opentelemetry/propagator-jaeger@2.1.0': + resolution: {integrity: sha512-QYo7vLyMjrBCUTpwQBF/e+rvP7oGskrSELGxhSvLj5gpM0az9oJnu/0O4l2Nm7LEhAff80ntRYKkAcSwVgvSVQ==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': '>=1.0.0 <1.10.0' + + '@opentelemetry/resources@2.1.0': + resolution: {integrity: sha512-1CJjf3LCvoefUOgegxi8h6r4B/wLSzInyhGP2UmIBYNlo4Qk5CZ73e1eEyWmfXvFtm1ybkmfb2DqWvspsYLrWw==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': '>=1.3.0 <1.10.0' + + '@opentelemetry/sdk-logs@0.205.0': + resolution: {integrity: sha512-nyqhNQ6eEzPWQU60Nc7+A5LIq8fz3UeIzdEVBQYefB4+msJZ2vuVtRuk9KxPMw1uHoHDtYEwkr2Ct0iG29jU8w==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': '>=1.4.0 <1.10.0' + + '@opentelemetry/sdk-metrics@2.1.0': + resolution: {integrity: sha512-J9QX459mzqHLL9Y6FZ4wQPRZG4TOpMCyPOh6mkr/humxE1W2S3Bvf4i75yiMW9uyed2Kf5rxmLhTm/UK8vNkAw==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': '>=1.9.0 <1.10.0' + + '@opentelemetry/sdk-node@0.205.0': + resolution: {integrity: sha512-Y4Wcs8scj/Wy1u61pX1ggqPXPtCsGaqx/UnFu7BtRQE1zCQR+b0h56K7I0jz7U2bRlPUZIFdnNLtoaJSMNzz2g==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': '>=1.3.0 <1.10.0' + + '@opentelemetry/sdk-trace-base@2.1.0': + resolution: {integrity: sha512-uTX9FBlVQm4S2gVQO1sb5qyBLq/FPjbp+tmGoxu4tIgtYGmBYB44+KX/725RFDe30yBSaA9Ml9fqphe1hbUyLQ==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': '>=1.3.0 <1.10.0' + + '@opentelemetry/sdk-trace-node@2.1.0': + resolution: {integrity: sha512-SvVlBFc/jI96u/mmlKm86n9BbTCbQ35nsPoOohqJX6DXH92K0kTe73zGY5r8xoI1QkjR9PizszVJLzMC966y9Q==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': '>=1.0.0 <1.10.0' + + '@opentelemetry/semantic-conventions@1.37.0': + resolution: {integrity: sha512-JD6DerIKdJGmRp4jQyX5FlrQjA4tjOw1cvfsPAZXfOOEErMUHjPcPSICS+6WnM0nB0efSFARh0KAZss+bvExOA==} + engines: {node: '>=14'} + + '@protobufjs/aspromise@1.1.2': + resolution: {integrity: sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==} + + '@protobufjs/base64@1.1.2': + resolution: {integrity: sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==} + + '@protobufjs/codegen@2.0.4': + resolution: {integrity: sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==} + + '@protobufjs/eventemitter@1.1.0': + resolution: {integrity: sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==} + + '@protobufjs/fetch@1.1.0': + resolution: {integrity: sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==} + + '@protobufjs/float@1.0.2': + resolution: {integrity: sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==} + + '@protobufjs/inquire@1.1.0': + resolution: {integrity: sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==} + + '@protobufjs/path@1.1.2': + resolution: {integrity: sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==} + + '@protobufjs/pool@1.1.0': + resolution: {integrity: sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==} + + '@protobufjs/utf8@1.1.0': + resolution: {integrity: sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==} + + '@types/body-parser@1.19.6': + resolution: {integrity: sha512-HLFeCYgz89uk22N5Qg3dvGvsv46B8GLvKKo1zKG4NybA8U2DiEO3w9lqGg29t/tfLRJpJ6iQxnVw4OnB7MoM9g==} + + '@types/connect@3.4.38': + resolution: {integrity: sha512-K6uROf1LD88uDQqJCktA4yzL1YYAK6NgfsI0v/mTgyPKWsX1CnJ0XPSDhViejru1GcRkLWb8RlzFYJRqGUbaug==} + + '@types/cors@2.8.19': + resolution: {integrity: sha512-mFNylyeyqN93lfe/9CSxOGREz8cpzAhH+E93xJ4xWQf62V8sQ/24reV2nyzUWM6H6Xji+GGHpkbLe7pVoUEskg==} + + '@types/express-serve-static-core@4.19.6': + resolution: {integrity: sha512-N4LZ2xG7DatVqhCZzOGb1Yi5lMbXSZcmdLDe9EzSndPV2HpWYWzRbaerl2n27irrm94EPpprqa8KpskPT085+A==} + + '@types/express@4.17.23': + resolution: {integrity: sha512-Crp6WY9aTYP3qPi2wGDo9iUe/rceX01UMhnF1jmwDcKCFM6cx7YhGP/Mpr3y9AASpfHixIG0E6azCcL5OcDHsQ==} + + '@types/http-errors@2.0.5': + resolution: {integrity: sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==} + + '@types/mime@1.3.5': + resolution: {integrity: sha512-/pyBZWSLD2n0dcHE3hq8s8ZvcETHtEuF+3E7XVt0Ig2nvsVQXdghHVcEkIWjy9A0wKfTn97a/PSDYohKIlnP/w==} + + '@types/node@20.19.17': + resolution: {integrity: sha512-gfehUI8N1z92kygssiuWvLiwcbOB3IRktR6hTDgJlXMYh5OvkPSRmgfoBUmfZt+vhwJtX7v1Yw4KvvAf7c5QKQ==} + + '@types/qs@6.14.0': + resolution: {integrity: sha512-eOunJqu0K1923aExK6y8p6fsihYEn/BYuQ4g0CxAAgFc4b/ZLN4CrsRZ55srTdqoiLzU2B2evC+apEIxprEzkQ==} + + '@types/range-parser@1.2.7': + resolution: {integrity: sha512-hKormJbkJqzQGhziax5PItDUTMAM9uE2XXQmM37dyd4hVM+5aVl7oVxMVUiVQn2oCQFN/LKCZdvSM0pFRqbSmQ==} + + '@types/send@0.17.5': + resolution: {integrity: sha512-z6F2D3cOStZvuk2SaP6YrwkNO65iTZcwA2ZkSABegdkAh/lf+Aa/YQndZVfmEXT5vgAp6zv06VQ3ejSVjAny4w==} + + '@types/serve-static@1.15.8': + resolution: {integrity: sha512-roei0UY3LhpOJvjbIP6ZZFngyLKl5dskOtDhxY5THRSpO+ZI+nzJ+m5yUMzGrp89YRa7lvknKkMYjqQFGwA7Sg==} + + accepts@2.0.0: + resolution: {integrity: sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng==} + engines: {node: '>= 0.6'} + + acorn-import-attributes@1.9.5: + resolution: {integrity: sha512-n02Vykv5uA3eHGM/Z2dQrcD56kL8TyDb2p1+0P83PClMnC/nc+anbQRhIOWnSq4Ke/KvDPrY3C9hDtC/A3eHnQ==} + peerDependencies: + acorn: ^8 + + acorn@8.15.0: + resolution: {integrity: sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==} + engines: {node: '>=0.4.0'} + hasBin: true + + ansi-regex@5.0.1: + resolution: {integrity: sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==} + engines: {node: '>=8'} + + ansi-styles@4.3.0: + resolution: {integrity: sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==} + engines: {node: '>=8'} + + body-parser@2.2.0: + resolution: {integrity: sha512-02qvAaxv8tp7fBa/mw1ga98OGm+eCbqzJOKoRt70sLmfEEi+jyBYVTDGfCL/k06/4EMk/z01gCe7HoCH/f2LTg==} + engines: {node: '>=18'} + + bytes@3.1.2: + resolution: {integrity: sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==} + engines: {node: '>= 0.8'} + + call-bind-apply-helpers@1.0.2: + resolution: {integrity: sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==} + engines: {node: '>= 0.4'} + + call-bound@1.0.4: + resolution: {integrity: sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==} + engines: {node: '>= 0.4'} + + cjs-module-lexer@1.4.3: + resolution: {integrity: sha512-9z8TZaGM1pfswYeXrUpzPrkx8UnWYdhJclsiYMm6x/w5+nN+8Tf/LnAgfLGQCm59qAOxU8WwHEq2vNwF6i4j+Q==} + + cliui@8.0.1: + resolution: {integrity: sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==} + engines: {node: '>=12'} + + color-convert@2.0.1: + resolution: {integrity: sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==} + engines: {node: '>=7.0.0'} + + color-name@1.1.4: + resolution: {integrity: sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==} + + content-disposition@1.0.0: + resolution: {integrity: sha512-Au9nRL8VNUut/XSzbQA38+M78dzP4D+eqg3gfJHMIHHYa3bg067xj1KxMUWj+VULbiZMowKngFFbKczUrNJ1mg==} + engines: {node: '>= 0.6'} + + content-type@1.0.5: + resolution: {integrity: sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==} + engines: {node: '>= 0.6'} + + cookie-signature@1.2.2: + resolution: {integrity: sha512-D76uU73ulSXrD1UXF4KE2TMxVVwhsnCgfAyTg9k8P6KGZjlXKrOLe4dJQKI3Bxi5wjesZoFXJWElNWBjPZMbhg==} + engines: {node: '>=6.6.0'} + + cookie@0.7.2: + resolution: {integrity: sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==} + engines: {node: '>= 0.6'} + + cors@2.8.5: + resolution: {integrity: sha512-KIHbLJqu73RGr/hnbrO9uBeixNGuvSQjul/jdFvS/KFSIH1hWVd1ng7zOHx+YrEfInLG7q4n6GHQ9cDtxv/P6g==} + engines: {node: '>= 0.10'} + + debug@4.4.3: + resolution: {integrity: sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==} + engines: {node: '>=6.0'} + peerDependencies: + supports-color: '*' + peerDependenciesMeta: + supports-color: + optional: true + + depd@2.0.0: + resolution: {integrity: sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==} + engines: {node: '>= 0.8'} + + dotenv@17.2.2: + resolution: {integrity: sha512-Sf2LSQP+bOlhKWWyhFsn0UsfdK/kCWRv1iuA2gXAwt3dyNabr6QSj00I2V10pidqz69soatm9ZwZvpQMTIOd5Q==} + engines: {node: '>=12'} + + dunder-proto@1.0.1: + resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==} + engines: {node: '>= 0.4'} + + ee-first@1.1.1: + resolution: {integrity: sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==} + + emoji-regex@8.0.0: + resolution: {integrity: sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==} + + encodeurl@2.0.0: + resolution: {integrity: sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==} + engines: {node: '>= 0.8'} + + es-define-property@1.0.1: + resolution: {integrity: sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==} + engines: {node: '>= 0.4'} + + es-errors@1.3.0: + resolution: {integrity: sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==} + engines: {node: '>= 0.4'} + + es-object-atoms@1.1.1: + resolution: {integrity: sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==} + engines: {node: '>= 0.4'} + + esbuild@0.25.10: + resolution: {integrity: sha512-9RiGKvCwaqxO2owP61uQ4BgNborAQskMR6QusfWzQqv7AZOg5oGehdY2pRJMTKuwxd1IDBP4rSbI5lHzU7SMsQ==} + engines: {node: '>=18'} + hasBin: true + + escalade@3.2.0: + resolution: {integrity: sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==} + engines: {node: '>=6'} + + escape-html@1.0.3: + resolution: {integrity: sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==} + + etag@1.8.1: + resolution: {integrity: sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==} + engines: {node: '>= 0.6'} + + eval-protocol@0.1.2: + resolution: {integrity: sha512-YmEjRUy/MnYPudZpsCRzbQrBD3ZAKlK+jb+E5RklkKz7eDTLvhGY63Ynn5OoKcNW0+o9j9eV7SSHRVye6Sjbaw==} + peerDependencies: + typescript: ^5 + + express@5.1.0: + resolution: {integrity: sha512-DT9ck5YIRU+8GYzzU5kT3eHGA5iL+1Zd0EutOmTE9Dtk+Tvuzd23VBU+ec7HPNSTxXYO55gPV/hq4pSBJDjFpA==} + engines: {node: '>= 18'} + + finalhandler@2.1.0: + resolution: {integrity: sha512-/t88Ty3d5JWQbWYgaOGCCYfXRwV1+be02WqYYlL6h0lEiUAMPM8o8qKGO01YIkOHzka2up08wvgYD0mDiI+q3Q==} + engines: {node: '>= 0.8'} + + forwarded@0.2.0: + resolution: {integrity: sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==} + engines: {node: '>= 0.6'} + + fresh@2.0.0: + resolution: {integrity: sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A==} + engines: {node: '>= 0.8'} + + fsevents@2.3.3: + resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==} + engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0} + os: [darwin] + + function-bind@1.1.2: + resolution: {integrity: sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==} + + get-caller-file@2.0.5: + resolution: {integrity: sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==} + engines: {node: 6.* || 8.* || >= 10.*} + + get-intrinsic@1.3.0: + resolution: {integrity: sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==} + engines: {node: '>= 0.4'} + + get-proto@1.0.1: + resolution: {integrity: sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==} + engines: {node: '>= 0.4'} + + get-tsconfig@4.10.1: + resolution: {integrity: sha512-auHyJ4AgMz7vgS8Hp3N6HXSmlMdUyhSUrfBF16w153rxtLIEOE+HGqaBppczZvnHLqQJfiHotCYpNhl0lUROFQ==} + + gopd@1.2.0: + resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==} + engines: {node: '>= 0.4'} + + has-symbols@1.1.0: + resolution: {integrity: sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==} + engines: {node: '>= 0.4'} + + hasown@2.0.2: + resolution: {integrity: sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==} + engines: {node: '>= 0.4'} + + helmet@7.2.0: + resolution: {integrity: sha512-ZRiwvN089JfMXokizgqEPXsl2Guk094yExfoDXR0cBYWxtBbaSww/w+vT4WEJsBW2iTUi1GgZ6swmoug3Oy4Xw==} + engines: {node: '>=16.0.0'} + + http-errors@2.0.0: + resolution: {integrity: sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==} + engines: {node: '>= 0.8'} + + iconv-lite@0.6.3: + resolution: {integrity: sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==} + engines: {node: '>=0.10.0'} + + iconv-lite@0.7.0: + resolution: {integrity: sha512-cf6L2Ds3h57VVmkZe+Pn+5APsT7FpqJtEhhieDCvrE2MK5Qk9MyffgQyuxQTm6BChfeZNtcOLHp9IcWRVcIcBQ==} + engines: {node: '>=0.10.0'} + + import-in-the-middle@1.14.4: + resolution: {integrity: sha512-eWjxh735SJLFJJDs5X82JQ2405OdJeAHDBnaoFCfdr5GVc7AWc9xU7KbrF+3Xd5F2ccP1aQFKtY+65X6EfKZ7A==} + + inherits@2.0.4: + resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==} + + ipaddr.js@1.9.1: + resolution: {integrity: sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==} + engines: {node: '>= 0.10'} + + is-core-module@2.16.1: + resolution: {integrity: sha512-UfoeMA6fIJ8wTYFEUjelnaGI67v6+N7qXJEvQuIGa99l4xsCruSYOVSQ0uPANn4dAzm8lkYPaKLrrijLq7x23w==} + engines: {node: '>= 0.4'} + + is-fullwidth-code-point@3.0.0: + resolution: {integrity: sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==} + engines: {node: '>=8'} + + is-promise@4.0.0: + resolution: {integrity: sha512-hvpoI6korhJMnej285dSg6nu1+e6uxs7zG3BYAm5byqDsgJNWwxzM6z6iZiAgQR4TJ30JmBTOwqZUw3WlyH3AQ==} + + lodash.camelcase@4.3.0: + resolution: {integrity: sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA==} + + long@5.3.2: + resolution: {integrity: sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==} + + math-intrinsics@1.1.0: + resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==} + engines: {node: '>= 0.4'} + + media-typer@1.1.0: + resolution: {integrity: sha512-aisnrDP4GNe06UcKFnV5bfMNPBUw4jsLGaWwWfnH3v02GnBuXX2MCVn5RbrWo0j3pczUilYblq7fQ7Nw2t5XKw==} + engines: {node: '>= 0.8'} + + merge-descriptors@2.0.0: + resolution: {integrity: sha512-Snk314V5ayFLhp3fkUREub6WtjBfPdCPY1Ln8/8munuLuiYhsABgBVWsozAG+MWMbVEvcdcpbi9R7ww22l9Q3g==} + engines: {node: '>=18'} + + mime-db@1.54.0: + resolution: {integrity: sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==} + engines: {node: '>= 0.6'} + + mime-types@3.0.1: + resolution: {integrity: sha512-xRc4oEhT6eaBpU1XF7AjpOFD+xQmXNB5OVKwp4tqCuBpHLS/ZbBDrc07mYTDqVMg6PfxUjjNp85O6Cd2Z/5HWA==} + engines: {node: '>= 0.6'} + + module-details-from-path@1.0.4: + resolution: {integrity: sha512-EGWKgxALGMgzvxYF1UyGTy0HXX/2vHLkw6+NvDKW2jypWbHpjQuj4UMcqQWXHERJhVGKikolT06G3bcKe4fi7w==} + + ms@2.1.3: + resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==} + + negotiator@1.0.0: + resolution: {integrity: sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg==} + engines: {node: '>= 0.6'} + + object-assign@4.1.1: + resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==} + engines: {node: '>=0.10.0'} + + object-inspect@1.13.4: + resolution: {integrity: sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==} + engines: {node: '>= 0.4'} + + on-finished@2.4.1: + resolution: {integrity: sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==} + engines: {node: '>= 0.8'} + + once@1.4.0: + resolution: {integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==} + + openai@5.23.0: + resolution: {integrity: sha512-Cfq155NHzI7VWR67LUNJMIgPZy2oSh7Fld/OKhxq648BiUjELAvcge7g30xJ6vAfwwXf6TVK0KKuN+3nmIJG/A==} + hasBin: true + peerDependencies: + ws: ^8.18.0 + zod: ^3.23.8 + peerDependenciesMeta: + ws: + optional: true + zod: + optional: true + + parseurl@1.3.3: + resolution: {integrity: sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==} + engines: {node: '>= 0.8'} + + path-parse@1.0.7: + resolution: {integrity: sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==} + + path-to-regexp@8.3.0: + resolution: {integrity: sha512-7jdwVIRtsP8MYpdXSwOS0YdD0Du+qOoF/AEPIt88PcCFrZCzx41oxku1jD88hZBwbNUIEfpqvuhjFaMAqMTWnA==} + + protobufjs@7.5.4: + resolution: {integrity: sha512-CvexbZtbov6jW2eXAvLukXjXUW1TzFaivC46BpWc/3BpcCysb5Vffu+B3XHMm8lVEuy2Mm4XGex8hBSg1yapPg==} + engines: {node: '>=12.0.0'} + + proxy-addr@2.0.7: + resolution: {integrity: sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==} + engines: {node: '>= 0.10'} + + qs@6.14.0: + resolution: {integrity: sha512-YWWTjgABSKcvs/nWBi9PycY/JiPJqOD4JA6o9Sej2AtvSGarXxKC3OQSk4pAarbdQlKAh5D4FCQkJNkW+GAn3w==} + engines: {node: '>=0.6'} + + range-parser@1.2.1: + resolution: {integrity: sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==} + engines: {node: '>= 0.6'} + + raw-body@3.0.1: + resolution: {integrity: sha512-9G8cA+tuMS75+6G/TzW8OtLzmBDMo8p1JRxN5AZ+LAp8uxGA8V8GZm4GQ4/N5QNQEnLmg6SS7wyuSmbKepiKqA==} + engines: {node: '>= 0.10'} + + require-directory@2.1.1: + resolution: {integrity: sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==} + engines: {node: '>=0.10.0'} + + require-in-the-middle@7.5.2: + resolution: {integrity: sha512-gAZ+kLqBdHarXB64XpAe2VCjB7rIRv+mU8tfRWziHRJ5umKsIHN2tLLv6EtMw7WCdP19S0ERVMldNvxYCHnhSQ==} + engines: {node: '>=8.6.0'} + + resolve-pkg-maps@1.0.0: + resolution: {integrity: sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==} + + resolve@1.22.10: + resolution: {integrity: sha512-NPRy+/ncIMeDlTAsuqwKIiferiawhefFJtkNSW0qZJEqMEb+qBt/77B/jGeeek+F0uOeN05CDa6HXbbIgtVX4w==} + engines: {node: '>= 0.4'} + hasBin: true + + router@2.2.0: + resolution: {integrity: sha512-nLTrUKm2UyiL7rlhapu/Zl45FwNgkZGaCpZbIHajDYgwlJCOzLSk+cIPAnsEqV955GjILJnKbdQC1nVPz+gAYQ==} + engines: {node: '>= 18'} + + safe-buffer@5.2.1: + resolution: {integrity: sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==} + + safer-buffer@2.1.2: + resolution: {integrity: sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==} + + send@1.2.0: + resolution: {integrity: sha512-uaW0WwXKpL9blXE2o0bRhoL2EGXIrZxQ2ZQ4mgcfoBxdFmQold+qWsD2jLrfZ0trjKL6vOw0j//eAwcALFjKSw==} + engines: {node: '>= 18'} + + serve-static@2.2.0: + resolution: {integrity: sha512-61g9pCh0Vnh7IutZjtLGGpTA355+OPn2TyDv/6ivP2h/AdAVX9azsoxmg2/M6nZeQZNYBEwIcsne1mJd9oQItQ==} + engines: {node: '>= 18'} + + setprototypeof@1.2.0: + resolution: {integrity: sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==} + + side-channel-list@1.0.0: + resolution: {integrity: sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==} + engines: {node: '>= 0.4'} + + side-channel-map@1.0.1: + resolution: {integrity: sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==} + engines: {node: '>= 0.4'} + + side-channel-weakmap@1.0.2: + resolution: {integrity: sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==} + engines: {node: '>= 0.4'} + + side-channel@1.1.0: + resolution: {integrity: sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==} + engines: {node: '>= 0.4'} + + statuses@2.0.1: + resolution: {integrity: sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ==} + engines: {node: '>= 0.8'} + + statuses@2.0.2: + resolution: {integrity: sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==} + engines: {node: '>= 0.8'} + + string-width@4.2.3: + resolution: {integrity: sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==} + engines: {node: '>=8'} + + strip-ansi@6.0.1: + resolution: {integrity: sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==} + engines: {node: '>=8'} + + supports-preserve-symlinks-flag@1.0.0: + resolution: {integrity: sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==} + engines: {node: '>= 0.4'} + + toidentifier@1.0.1: + resolution: {integrity: sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==} + engines: {node: '>=0.6'} + + tsx@4.20.5: + resolution: {integrity: sha512-+wKjMNU9w/EaQayHXb7WA7ZaHY6hN8WgfvHNQ3t1PnU91/7O8TcTnIhCDYTZwnt8JsO9IBqZ30Ln1r7pPF52Aw==} + engines: {node: '>=18.0.0'} + hasBin: true + + type-is@2.0.1: + resolution: {integrity: sha512-OZs6gsjF4vMp32qrCbiVSkrFmXtG/AZhY3t0iAMrMBiAZyV9oALtXO8hsrHbMXF9x6L3grlFuwW2oAz7cav+Gw==} + engines: {node: '>= 0.6'} + + typescript@5.9.2: + resolution: {integrity: sha512-CWBzXQrc/qOkhidw1OzBTQuYRbfyxDXJMVJ1XNwUHGROVmuaeiEm3OslpZ1RV96d7SKKjZKrSJu3+t/xlw3R9A==} + engines: {node: '>=14.17'} + hasBin: true + + undici-types@6.21.0: + resolution: {integrity: sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==} + + unpipe@1.0.0: + resolution: {integrity: sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==} + engines: {node: '>= 0.8'} + + vary@1.1.2: + resolution: {integrity: sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==} + engines: {node: '>= 0.8'} + + wrap-ansi@7.0.0: + resolution: {integrity: sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==} + engines: {node: '>=10'} + + wrappy@1.0.2: + resolution: {integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==} + + y18n@5.0.8: + resolution: {integrity: sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==} + engines: {node: '>=10'} + + yargs-parser@21.1.1: + resolution: {integrity: sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==} + engines: {node: '>=12'} + + yargs@17.7.2: + resolution: {integrity: sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==} + engines: {node: '>=12'} + + zod@3.25.76: + resolution: {integrity: sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==} + + zod@4.1.11: + resolution: {integrity: sha512-WPsqwxITS2tzx1bzhIKsEs19ABD5vmCVa4xBo2tq/SrV4RNZtfws1EnCWQXM6yh8bD08a1idvkB5MZSBiZsjwg==} + +snapshots: + + '@esbuild/aix-ppc64@0.25.10': + optional: true + + '@esbuild/android-arm64@0.25.10': + optional: true + + '@esbuild/android-arm@0.25.10': + optional: true + + '@esbuild/android-x64@0.25.10': + optional: true + + '@esbuild/darwin-arm64@0.25.10': + optional: true + + '@esbuild/darwin-x64@0.25.10': + optional: true + + '@esbuild/freebsd-arm64@0.25.10': + optional: true + + '@esbuild/freebsd-x64@0.25.10': + optional: true + + '@esbuild/linux-arm64@0.25.10': + optional: true + + '@esbuild/linux-arm@0.25.10': + optional: true + + '@esbuild/linux-ia32@0.25.10': + optional: true + + '@esbuild/linux-loong64@0.25.10': + optional: true + + '@esbuild/linux-mips64el@0.25.10': + optional: true + + '@esbuild/linux-ppc64@0.25.10': + optional: true + + '@esbuild/linux-riscv64@0.25.10': + optional: true + + '@esbuild/linux-s390x@0.25.10': + optional: true + + '@esbuild/linux-x64@0.25.10': + optional: true + + '@esbuild/netbsd-arm64@0.25.10': + optional: true + + '@esbuild/netbsd-x64@0.25.10': + optional: true + + '@esbuild/openbsd-arm64@0.25.10': + optional: true + + '@esbuild/openbsd-x64@0.25.10': + optional: true + + '@esbuild/openharmony-arm64@0.25.10': + optional: true + + '@esbuild/sunos-x64@0.25.10': + optional: true + + '@esbuild/win32-arm64@0.25.10': + optional: true + + '@esbuild/win32-ia32@0.25.10': + optional: true + + '@esbuild/win32-x64@0.25.10': + optional: true + + '@grpc/grpc-js@1.14.0': + dependencies: + '@grpc/proto-loader': 0.8.0 + '@js-sdsl/ordered-map': 4.4.2 + + '@grpc/proto-loader@0.8.0': + dependencies: + lodash.camelcase: 4.3.0 + long: 5.3.2 + protobufjs: 7.5.4 + yargs: 17.7.2 + + '@js-sdsl/ordered-map@4.4.2': {} + + '@langfuse/core@4.2.0': {} + + '@langfuse/openai@4.2.0(@opentelemetry/api@1.9.0)': + dependencies: + '@langfuse/core': 4.2.0 + '@langfuse/tracing': 4.2.0(@opentelemetry/api@1.9.0) + transitivePeerDependencies: + - '@opentelemetry/api' + + '@langfuse/otel@4.2.0(@opentelemetry/api@1.9.0)(@opentelemetry/core@2.1.0(@opentelemetry/api@1.9.0))(@opentelemetry/exporter-trace-otlp-http@0.205.0(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@2.1.0(@opentelemetry/api@1.9.0))': + dependencies: + '@langfuse/core': 4.2.0 + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/exporter-trace-otlp-http': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-trace-base': 2.1.0(@opentelemetry/api@1.9.0) + + '@langfuse/tracing@4.2.0(@opentelemetry/api@1.9.0)': + dependencies: + '@langfuse/core': 4.2.0 + '@opentelemetry/api': 1.9.0 + + '@opentelemetry/api-logs@0.205.0': + dependencies: + '@opentelemetry/api': 1.9.0 + + '@opentelemetry/api@1.9.0': {} + + '@opentelemetry/context-async-hooks@2.1.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + + '@opentelemetry/core@2.1.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/semantic-conventions': 1.37.0 + + '@opentelemetry/exporter-logs-otlp-grpc@0.205.0(@opentelemetry/api@1.9.0)': + dependencies: + '@grpc/grpc-js': 1.14.0 + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-exporter-base': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-grpc-exporter-base': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-transformer': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-logs': 0.205.0(@opentelemetry/api@1.9.0) + + '@opentelemetry/exporter-logs-otlp-http@0.205.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/api-logs': 0.205.0 + '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-exporter-base': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-transformer': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-logs': 0.205.0(@opentelemetry/api@1.9.0) + + '@opentelemetry/exporter-logs-otlp-proto@0.205.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/api-logs': 0.205.0 + '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-exporter-base': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-transformer': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/resources': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-logs': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-trace-base': 2.1.0(@opentelemetry/api@1.9.0) + + '@opentelemetry/exporter-metrics-otlp-grpc@0.205.0(@opentelemetry/api@1.9.0)': + dependencies: + '@grpc/grpc-js': 1.14.0 + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/exporter-metrics-otlp-http': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-exporter-base': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-grpc-exporter-base': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-transformer': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/resources': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-metrics': 2.1.0(@opentelemetry/api@1.9.0) + + '@opentelemetry/exporter-metrics-otlp-http@0.205.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-exporter-base': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-transformer': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/resources': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-metrics': 2.1.0(@opentelemetry/api@1.9.0) + + '@opentelemetry/exporter-metrics-otlp-proto@0.205.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/exporter-metrics-otlp-http': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-exporter-base': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-transformer': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/resources': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-metrics': 2.1.0(@opentelemetry/api@1.9.0) + + '@opentelemetry/exporter-prometheus@0.205.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/resources': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-metrics': 2.1.0(@opentelemetry/api@1.9.0) + + '@opentelemetry/exporter-trace-otlp-grpc@0.205.0(@opentelemetry/api@1.9.0)': + dependencies: + '@grpc/grpc-js': 1.14.0 + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-exporter-base': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-grpc-exporter-base': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-transformer': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/resources': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-trace-base': 2.1.0(@opentelemetry/api@1.9.0) + + '@opentelemetry/exporter-trace-otlp-http@0.205.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-exporter-base': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-transformer': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/resources': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-trace-base': 2.1.0(@opentelemetry/api@1.9.0) + + '@opentelemetry/exporter-trace-otlp-proto@0.205.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-exporter-base': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-transformer': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/resources': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-trace-base': 2.1.0(@opentelemetry/api@1.9.0) + + '@opentelemetry/exporter-zipkin@2.1.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/resources': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-trace-base': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/semantic-conventions': 1.37.0 + + '@opentelemetry/instrumentation@0.205.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/api-logs': 0.205.0 + import-in-the-middle: 1.14.4 + require-in-the-middle: 7.5.2 + transitivePeerDependencies: + - supports-color + + '@opentelemetry/otlp-exporter-base@0.205.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-transformer': 0.205.0(@opentelemetry/api@1.9.0) + + '@opentelemetry/otlp-grpc-exporter-base@0.205.0(@opentelemetry/api@1.9.0)': + dependencies: + '@grpc/grpc-js': 1.14.0 + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-exporter-base': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-transformer': 0.205.0(@opentelemetry/api@1.9.0) + + '@opentelemetry/otlp-transformer@0.205.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/api-logs': 0.205.0 + '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/resources': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-logs': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-metrics': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-trace-base': 2.1.0(@opentelemetry/api@1.9.0) + protobufjs: 7.5.4 + + '@opentelemetry/propagator-b3@2.1.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0) + + '@opentelemetry/propagator-jaeger@2.1.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0) + + '@opentelemetry/resources@2.1.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/semantic-conventions': 1.37.0 + + '@opentelemetry/sdk-logs@0.205.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/api-logs': 0.205.0 + '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/resources': 2.1.0(@opentelemetry/api@1.9.0) + + '@opentelemetry/sdk-metrics@2.1.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/resources': 2.1.0(@opentelemetry/api@1.9.0) + + '@opentelemetry/sdk-node@0.205.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/api-logs': 0.205.0 + '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/exporter-logs-otlp-grpc': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/exporter-logs-otlp-http': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/exporter-logs-otlp-proto': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/exporter-metrics-otlp-grpc': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/exporter-metrics-otlp-http': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/exporter-metrics-otlp-proto': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/exporter-prometheus': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/exporter-trace-otlp-grpc': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/exporter-trace-otlp-http': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/exporter-trace-otlp-proto': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/exporter-zipkin': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/propagator-b3': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/propagator-jaeger': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/resources': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-logs': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-metrics': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-trace-base': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-trace-node': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/semantic-conventions': 1.37.0 + transitivePeerDependencies: + - supports-color + + '@opentelemetry/sdk-trace-base@2.1.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/resources': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/semantic-conventions': 1.37.0 + + '@opentelemetry/sdk-trace-node@2.1.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/context-async-hooks': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-trace-base': 2.1.0(@opentelemetry/api@1.9.0) + + '@opentelemetry/semantic-conventions@1.37.0': {} + + '@protobufjs/aspromise@1.1.2': {} + + '@protobufjs/base64@1.1.2': {} + + '@protobufjs/codegen@2.0.4': {} + + '@protobufjs/eventemitter@1.1.0': {} + + '@protobufjs/fetch@1.1.0': + dependencies: + '@protobufjs/aspromise': 1.1.2 + '@protobufjs/inquire': 1.1.0 + + '@protobufjs/float@1.0.2': {} + + '@protobufjs/inquire@1.1.0': {} + + '@protobufjs/path@1.1.2': {} + + '@protobufjs/pool@1.1.0': {} + + '@protobufjs/utf8@1.1.0': {} + + '@types/body-parser@1.19.6': + dependencies: + '@types/connect': 3.4.38 + '@types/node': 20.19.17 + + '@types/connect@3.4.38': + dependencies: + '@types/node': 20.19.17 + + '@types/cors@2.8.19': + dependencies: + '@types/node': 20.19.17 + + '@types/express-serve-static-core@4.19.6': + dependencies: + '@types/node': 20.19.17 + '@types/qs': 6.14.0 + '@types/range-parser': 1.2.7 + '@types/send': 0.17.5 + + '@types/express@4.17.23': + dependencies: + '@types/body-parser': 1.19.6 + '@types/express-serve-static-core': 4.19.6 + '@types/qs': 6.14.0 + '@types/serve-static': 1.15.8 + + '@types/http-errors@2.0.5': {} + + '@types/mime@1.3.5': {} + + '@types/node@20.19.17': + dependencies: + undici-types: 6.21.0 + + '@types/qs@6.14.0': {} + + '@types/range-parser@1.2.7': {} + + '@types/send@0.17.5': + dependencies: + '@types/mime': 1.3.5 + '@types/node': 20.19.17 + + '@types/serve-static@1.15.8': + dependencies: + '@types/http-errors': 2.0.5 + '@types/node': 20.19.17 + '@types/send': 0.17.5 + + accepts@2.0.0: + dependencies: + mime-types: 3.0.1 + negotiator: 1.0.0 + + acorn-import-attributes@1.9.5(acorn@8.15.0): + dependencies: + acorn: 8.15.0 + + acorn@8.15.0: {} + + ansi-regex@5.0.1: {} + + ansi-styles@4.3.0: + dependencies: + color-convert: 2.0.1 + + body-parser@2.2.0: + dependencies: + bytes: 3.1.2 + content-type: 1.0.5 + debug: 4.4.3 + http-errors: 2.0.0 + iconv-lite: 0.6.3 + on-finished: 2.4.1 + qs: 6.14.0 + raw-body: 3.0.1 + type-is: 2.0.1 + transitivePeerDependencies: + - supports-color + + bytes@3.1.2: {} + + call-bind-apply-helpers@1.0.2: + dependencies: + es-errors: 1.3.0 + function-bind: 1.1.2 + + call-bound@1.0.4: + dependencies: + call-bind-apply-helpers: 1.0.2 + get-intrinsic: 1.3.0 + + cjs-module-lexer@1.4.3: {} + + cliui@8.0.1: + dependencies: + string-width: 4.2.3 + strip-ansi: 6.0.1 + wrap-ansi: 7.0.0 + + color-convert@2.0.1: + dependencies: + color-name: 1.1.4 + + color-name@1.1.4: {} + + content-disposition@1.0.0: + dependencies: + safe-buffer: 5.2.1 + + content-type@1.0.5: {} + + cookie-signature@1.2.2: {} + + cookie@0.7.2: {} + + cors@2.8.5: + dependencies: + object-assign: 4.1.1 + vary: 1.1.2 + + debug@4.4.3: + dependencies: + ms: 2.1.3 + + depd@2.0.0: {} + + dotenv@17.2.2: {} + + dunder-proto@1.0.1: + dependencies: + call-bind-apply-helpers: 1.0.2 + es-errors: 1.3.0 + gopd: 1.2.0 + + ee-first@1.1.1: {} + + emoji-regex@8.0.0: {} + + encodeurl@2.0.0: {} + + es-define-property@1.0.1: {} + + es-errors@1.3.0: {} + + es-object-atoms@1.1.1: + dependencies: + es-errors: 1.3.0 + + esbuild@0.25.10: + optionalDependencies: + '@esbuild/aix-ppc64': 0.25.10 + '@esbuild/android-arm': 0.25.10 + '@esbuild/android-arm64': 0.25.10 + '@esbuild/android-x64': 0.25.10 + '@esbuild/darwin-arm64': 0.25.10 + '@esbuild/darwin-x64': 0.25.10 + '@esbuild/freebsd-arm64': 0.25.10 + '@esbuild/freebsd-x64': 0.25.10 + '@esbuild/linux-arm': 0.25.10 + '@esbuild/linux-arm64': 0.25.10 + '@esbuild/linux-ia32': 0.25.10 + '@esbuild/linux-loong64': 0.25.10 + '@esbuild/linux-mips64el': 0.25.10 + '@esbuild/linux-ppc64': 0.25.10 + '@esbuild/linux-riscv64': 0.25.10 + '@esbuild/linux-s390x': 0.25.10 + '@esbuild/linux-x64': 0.25.10 + '@esbuild/netbsd-arm64': 0.25.10 + '@esbuild/netbsd-x64': 0.25.10 + '@esbuild/openbsd-arm64': 0.25.10 + '@esbuild/openbsd-x64': 0.25.10 + '@esbuild/openharmony-arm64': 0.25.10 + '@esbuild/sunos-x64': 0.25.10 + '@esbuild/win32-arm64': 0.25.10 + '@esbuild/win32-ia32': 0.25.10 + '@esbuild/win32-x64': 0.25.10 + + escalade@3.2.0: {} + + escape-html@1.0.3: {} + + etag@1.8.1: {} + + eval-protocol@0.1.2(typescript@5.9.2): + dependencies: + typescript: 5.9.2 + zod: 4.1.11 + + express@5.1.0: + dependencies: + accepts: 2.0.0 + body-parser: 2.2.0 + content-disposition: 1.0.0 + content-type: 1.0.5 + cookie: 0.7.2 + cookie-signature: 1.2.2 + debug: 4.4.3 + encodeurl: 2.0.0 + escape-html: 1.0.3 + etag: 1.8.1 + finalhandler: 2.1.0 + fresh: 2.0.0 + http-errors: 2.0.0 + merge-descriptors: 2.0.0 + mime-types: 3.0.1 + on-finished: 2.4.1 + once: 1.4.0 + parseurl: 1.3.3 + proxy-addr: 2.0.7 + qs: 6.14.0 + range-parser: 1.2.1 + router: 2.2.0 + send: 1.2.0 + serve-static: 2.2.0 + statuses: 2.0.2 + type-is: 2.0.1 + vary: 1.1.2 + transitivePeerDependencies: + - supports-color + + finalhandler@2.1.0: + dependencies: + debug: 4.4.3 + encodeurl: 2.0.0 + escape-html: 1.0.3 + on-finished: 2.4.1 + parseurl: 1.3.3 + statuses: 2.0.2 + transitivePeerDependencies: + - supports-color + + forwarded@0.2.0: {} + + fresh@2.0.0: {} + + fsevents@2.3.3: + optional: true + + function-bind@1.1.2: {} + + get-caller-file@2.0.5: {} + + get-intrinsic@1.3.0: + dependencies: + call-bind-apply-helpers: 1.0.2 + es-define-property: 1.0.1 + es-errors: 1.3.0 + es-object-atoms: 1.1.1 + function-bind: 1.1.2 + get-proto: 1.0.1 + gopd: 1.2.0 + has-symbols: 1.1.0 + hasown: 2.0.2 + math-intrinsics: 1.1.0 + + get-proto@1.0.1: + dependencies: + dunder-proto: 1.0.1 + es-object-atoms: 1.1.1 + + get-tsconfig@4.10.1: + dependencies: + resolve-pkg-maps: 1.0.0 + + gopd@1.2.0: {} + + has-symbols@1.1.0: {} + + hasown@2.0.2: + dependencies: + function-bind: 1.1.2 + + helmet@7.2.0: {} + + http-errors@2.0.0: + dependencies: + depd: 2.0.0 + inherits: 2.0.4 + setprototypeof: 1.2.0 + statuses: 2.0.1 + toidentifier: 1.0.1 + + iconv-lite@0.6.3: + dependencies: + safer-buffer: 2.1.2 + + iconv-lite@0.7.0: + dependencies: + safer-buffer: 2.1.2 + + import-in-the-middle@1.14.4: + dependencies: + acorn: 8.15.0 + acorn-import-attributes: 1.9.5(acorn@8.15.0) + cjs-module-lexer: 1.4.3 + module-details-from-path: 1.0.4 + + inherits@2.0.4: {} + + ipaddr.js@1.9.1: {} + + is-core-module@2.16.1: + dependencies: + hasown: 2.0.2 + + is-fullwidth-code-point@3.0.0: {} + + is-promise@4.0.0: {} + + lodash.camelcase@4.3.0: {} + + long@5.3.2: {} + + math-intrinsics@1.1.0: {} + + media-typer@1.1.0: {} + + merge-descriptors@2.0.0: {} + + mime-db@1.54.0: {} + + mime-types@3.0.1: + dependencies: + mime-db: 1.54.0 + + module-details-from-path@1.0.4: {} + + ms@2.1.3: {} + + negotiator@1.0.0: {} + + object-assign@4.1.1: {} + + object-inspect@1.13.4: {} + + on-finished@2.4.1: + dependencies: + ee-first: 1.1.1 + + once@1.4.0: + dependencies: + wrappy: 1.0.2 + + openai@5.23.0(zod@3.25.76): + optionalDependencies: + zod: 3.25.76 + + parseurl@1.3.3: {} + + path-parse@1.0.7: {} + + path-to-regexp@8.3.0: {} + + protobufjs@7.5.4: + dependencies: + '@protobufjs/aspromise': 1.1.2 + '@protobufjs/base64': 1.1.2 + '@protobufjs/codegen': 2.0.4 + '@protobufjs/eventemitter': 1.1.0 + '@protobufjs/fetch': 1.1.0 + '@protobufjs/float': 1.0.2 + '@protobufjs/inquire': 1.1.0 + '@protobufjs/path': 1.1.2 + '@protobufjs/pool': 1.1.0 + '@protobufjs/utf8': 1.1.0 + '@types/node': 20.19.17 + long: 5.3.2 + + proxy-addr@2.0.7: + dependencies: + forwarded: 0.2.0 + ipaddr.js: 1.9.1 + + qs@6.14.0: + dependencies: + side-channel: 1.1.0 + + range-parser@1.2.1: {} + + raw-body@3.0.1: + dependencies: + bytes: 3.1.2 + http-errors: 2.0.0 + iconv-lite: 0.7.0 + unpipe: 1.0.0 + + require-directory@2.1.1: {} + + require-in-the-middle@7.5.2: + dependencies: + debug: 4.4.3 + module-details-from-path: 1.0.4 + resolve: 1.22.10 + transitivePeerDependencies: + - supports-color + + resolve-pkg-maps@1.0.0: {} + + resolve@1.22.10: + dependencies: + is-core-module: 2.16.1 + path-parse: 1.0.7 + supports-preserve-symlinks-flag: 1.0.0 + + router@2.2.0: + dependencies: + debug: 4.4.3 + depd: 2.0.0 + is-promise: 4.0.0 + parseurl: 1.3.3 + path-to-regexp: 8.3.0 + transitivePeerDependencies: + - supports-color + + safe-buffer@5.2.1: {} + + safer-buffer@2.1.2: {} + + send@1.2.0: + dependencies: + debug: 4.4.3 + encodeurl: 2.0.0 + escape-html: 1.0.3 + etag: 1.8.1 + fresh: 2.0.0 + http-errors: 2.0.0 + mime-types: 3.0.1 + ms: 2.1.3 + on-finished: 2.4.1 + range-parser: 1.2.1 + statuses: 2.0.2 + transitivePeerDependencies: + - supports-color + + serve-static@2.2.0: + dependencies: + encodeurl: 2.0.0 + escape-html: 1.0.3 + parseurl: 1.3.3 + send: 1.2.0 + transitivePeerDependencies: + - supports-color + + setprototypeof@1.2.0: {} + + side-channel-list@1.0.0: + dependencies: + es-errors: 1.3.0 + object-inspect: 1.13.4 + + side-channel-map@1.0.1: + dependencies: + call-bound: 1.0.4 + es-errors: 1.3.0 + get-intrinsic: 1.3.0 + object-inspect: 1.13.4 + + side-channel-weakmap@1.0.2: + dependencies: + call-bound: 1.0.4 + es-errors: 1.3.0 + get-intrinsic: 1.3.0 + object-inspect: 1.13.4 + side-channel-map: 1.0.1 + + side-channel@1.1.0: + dependencies: + es-errors: 1.3.0 + object-inspect: 1.13.4 + side-channel-list: 1.0.0 + side-channel-map: 1.0.1 + side-channel-weakmap: 1.0.2 + + statuses@2.0.1: {} + + statuses@2.0.2: {} + + string-width@4.2.3: + dependencies: + emoji-regex: 8.0.0 + is-fullwidth-code-point: 3.0.0 + strip-ansi: 6.0.1 + + strip-ansi@6.0.1: + dependencies: + ansi-regex: 5.0.1 + + supports-preserve-symlinks-flag@1.0.0: {} + + toidentifier@1.0.1: {} + + tsx@4.20.5: + dependencies: + esbuild: 0.25.10 + get-tsconfig: 4.10.1 + optionalDependencies: + fsevents: 2.3.3 + + type-is@2.0.1: + dependencies: + content-type: 1.0.5 + media-typer: 1.1.0 + mime-types: 3.0.1 + + typescript@5.9.2: {} + + undici-types@6.21.0: {} + + unpipe@1.0.0: {} + + vary@1.1.2: {} + + wrap-ansi@7.0.0: + dependencies: + ansi-styles: 4.3.0 + string-width: 4.2.3 + strip-ansi: 6.0.1 + + wrappy@1.0.2: {} + + y18n@5.0.8: {} + + yargs-parser@21.1.1: {} + + yargs@17.7.2: + dependencies: + cliui: 8.0.1 + escalade: 3.2.0 + get-caller-file: 2.0.5 + require-directory: 2.1.1 + string-width: 4.2.3 + y18n: 5.0.8 + yargs-parser: 21.1.1 + + zod@3.25.76: {} + + zod@4.1.11: {} diff --git a/tests/chinook/langfuse/typescript-server/server.ts b/tests/chinook/langfuse/typescript-server/server.ts new file mode 100644 index 00000000..0f14f58c --- /dev/null +++ b/tests/chinook/langfuse/typescript-server/server.ts @@ -0,0 +1,203 @@ +import express, { Request, Response } from "express"; +import cors from "cors"; +import helmet from "helmet"; +import { z } from "zod"; +import { OpenAI } from "openai"; +import { observeOpenAI } from "@langfuse/openai"; +import "./instrumentation"; +import "./env"; +import { + initRequestSchema, + statusResponseSchema, + StatusResponse, + initRequestToCompletionParams, + InitRequest, + createLangfuseConfigTags, +} from "eval-protocol"; + +// In-memory storage for rollout states +interface RolloutState { + rollout_id: string; + status: "running" | "completed" | "failed" | "timeout" | "cancelled"; + started_at: string; + ended_at?: string; + completed_turns: number; + error?: string; +} + +const rolloutStates = new Map(); + +// Express app setup +const app: express.Application = express(); +const PORT = process.env["PORT"] || 3000; + +// Middleware +app.use(helmet()); +app.use(cors()); +app.use(express.json()); + +// Health check endpoint +app.get("/health", (_req: Request, res: Response) => { + res.json({ status: "healthy", timestamp: new Date().toISOString() }); +}); + +// POST /init endpoint +app.post("/init", async (req: Request, res: Response) => { + try { + // Validate request body + const validatedData = initRequestSchema.parse(req.body); + const { rollout_id, model } = validatedData; + + console.log(`Initializing rollout ${rollout_id} with model ${model}`); + + // Create rollout state + const rolloutState: RolloutState = { + rollout_id, + status: "running", + started_at: new Date().toISOString(), + completed_turns: 0, + }; + + rolloutStates.set(rollout_id, rolloutState); + + // Simulate async processing + setTimeout(async () => { + await simulateRolloutExecution(validatedData); + }, 100); + + res.status(200).json({ + status: "accepted", + rollout_id, + message: "Rollout initialized successfully", + }); + } catch (error) { + console.error("Error in /init endpoint:", error); + + if (error instanceof z.ZodError) { + res.status(400).json({ + error: "Validation error", + details: error.errors, + }); + } else { + res.status(500).json({ + error: "Internal server error", + message: error instanceof Error ? error.message : "Unknown error", + }); + } + } +}); + +// GET /status endpoint +app.get("/status", (req: Request, res: Response) => { + try { + const { rollout_id } = req.query; + + if (!rollout_id || typeof rollout_id !== "string") { + res.status(400).json({ + error: "Missing or invalid rollout_id parameter", + }); + return; + } + + const rolloutState = rolloutStates.get(rollout_id); + + if (!rolloutState) { + res.status(404).json({ + error: "Rollout not found", + rollout_id, + }); + return; + } + + const response: StatusResponse = { + terminated: rolloutState.status !== "running", + }; + + if (rolloutState.status !== "running") { + response.info = { + reason: rolloutState.status, + ended_at: rolloutState.ended_at || new Date().toISOString(), + ...(rolloutState.error && { error: rolloutState.error }), + }; + } + + const validatedResponse = statusResponseSchema.parse(response); + + res.json(validatedResponse); + } catch (error) { + console.error("Error in /status endpoint:", error); + res.status(500).json({ + error: "Internal server error", + message: error instanceof Error ? error.message : "Unknown error", + }); + } +}); + +// Simulate rollout execution +async function simulateRolloutExecution( + initRequest: InitRequest +): Promise { + const rolloutState = rolloutStates.get(initRequest.rollout_id); + if (!rolloutState) return; + + try { + console.log(`Starting rollout execution for ${initRequest.rollout_id}`); + + const openai = new OpenAI({ + apiKey: process.env["OPENAI_API_KEY"], + }); + + const tracedOpenAI = observeOpenAI(openai, { + tags: createLangfuseConfigTags(initRequest), + }); + + const completionParams = initRequestToCompletionParams(initRequest); + + await tracedOpenAI.chat.completions.create(completionParams); + + // Mark as completed + rolloutState.status = "completed"; + rolloutState.ended_at = new Date().toISOString(); + rolloutState.completed_turns = 1; + + console.log(`Rollout ${initRequest.rollout_id} completed successfully`); + } catch (error) { + console.error( + `Error in rollout execution for ${initRequest.rollout_id}:`, + error + ); + + rolloutState.status = "failed"; + rolloutState.ended_at = new Date().toISOString(); + rolloutState.error = + error instanceof Error ? error.message : "Unknown error"; + } +} + +// Error handling middleware +app.use((error: Error, _req: Request, res: Response, _next: any) => { + console.error("Unhandled error:", error); + res.status(500).json({ + error: "Internal server error", + message: error.message, + }); +}); + +// 404 handler +app.use((_req: Request, res: Response) => { + res.status(404).json({ + error: "Not found", + path: _req.originalUrl, + }); +}); + +// Start server +app.listen(PORT, () => { + console.log(`🚀 TypeScript Express server running on port ${PORT}`); + console.log(`📋 Available endpoints:`); + console.log(` POST /init - Initialize a rollout`); + console.log(` GET /status?rollout_id={id} - Check rollout status`); + console.log(` GET http://localhost:${PORT}/health - Health check`); +}); + +export default app; diff --git a/tests/chinook/langfuse/typescript-server/tsconfig.json b/tests/chinook/langfuse/typescript-server/tsconfig.json new file mode 100644 index 00000000..82ca67bc --- /dev/null +++ b/tests/chinook/langfuse/typescript-server/tsconfig.json @@ -0,0 +1,30 @@ +{ + "compilerOptions": { + "target": "ES2020", + "module": "NodeNext", + "moduleResolution": "NodeNext", + "lib": ["ES2020"], + "outDir": "./dist", + "rootDir": "./", + "strict": true, + "esModuleInterop": true, + "allowSyntheticDefaultImports": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "declaration": true, + "declarationMap": true, + "sourceMap": true, + "removeComments": false, + "noImplicitAny": true, + "noImplicitReturns": true, + "noImplicitThis": true, + "noUnusedLocals": true, + "noUnusedParameters": true, + "exactOptionalPropertyTypes": true, + "noImplicitOverride": true, + "noPropertyAccessFromIndexSignature": true, + "noUncheckedIndexedAccess": true + }, + "include": ["*.ts"], + "exclude": ["node_modules", "dist"] +} diff --git a/typescript/.cursor/rules/use-bun-instead-of-node-vite-npm-pnpm.mdc b/typescript/.cursor/rules/use-bun-instead-of-node-vite-npm-pnpm.mdc new file mode 100644 index 00000000..b8100b77 --- /dev/null +++ b/typescript/.cursor/rules/use-bun-instead-of-node-vite-npm-pnpm.mdc @@ -0,0 +1,111 @@ +--- +description: Use Bun instead of Node.js, npm, pnpm, or vite. +globs: "*.ts, *.tsx, *.html, *.css, *.js, *.jsx, package.json" +alwaysApply: false +--- + +Default to using Bun instead of Node.js. + +- Use `bun ` instead of `node ` or `ts-node ` +- Use `bun test` instead of `jest` or `vitest` +- Use `bun build ` instead of `webpack` or `esbuild` +- Use `bun install` instead of `npm install` or `yarn install` or `pnpm install` +- Use `bun run + + +``` + +With the following `frontend.tsx`: + +```tsx#frontend.tsx +import React from "react"; + +// import .css files directly and it works +import './index.css'; + +import { createRoot } from "react-dom/client"; + +const root = createRoot(document.body); + +export default function Frontend() { + return

Hello, world!

; +} + +root.render(); +``` + +Then, run index.ts + +```sh +bun --hot ./index.ts +``` + +For more information, read the Bun API docs in `node_modules/bun-types/docs/**.md`. diff --git a/typescript/.gitignore b/typescript/.gitignore new file mode 100644 index 00000000..a0dd6762 --- /dev/null +++ b/typescript/.gitignore @@ -0,0 +1,36 @@ +# dependencies (bun install) +node_modules + +# output +out +dist +*.tgz + +# code coverage +coverage +*.lcov + +# logs +logs +_.log +report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json + +# dotenv environment variable files +.env +.env.development.local +.env.test.local +.env.production.local +.env.local + +# caches +.eslintcache +.cache +*.tsbuildinfo + +# IntelliJ based IDEs +.idea + +# Finder (MacOS) folder config +.DS_Store + +!package.json diff --git a/typescript/README.md b/typescript/README.md new file mode 100644 index 00000000..03727695 --- /dev/null +++ b/typescript/README.md @@ -0,0 +1,13 @@ +# eval-protocol + +This is the TypeScript SDK for [eval-protocol](https://evalprotocol.io). + +It includes helpful Zod schemas. TypeScript types, and helper functions for +creating a remote server for +[RemoteRolloutProcessor](https://evalprotocol.io/tutorial/remote-rollout-processor). + +Installation + +```bash +npm install eval-protocol +``` diff --git a/typescript/bun.lock b/typescript/bun.lock new file mode 100644 index 00000000..5401210e --- /dev/null +++ b/typescript/bun.lock @@ -0,0 +1,37 @@ +{ + "lockfileVersion": 1, + "workspaces": { + "": { + "name": "eval-protocol", + "dependencies": { + "zod": "^4.1.11", + }, + "devDependencies": { + "@types/bun": "latest", + "openai": "^5.23.0", + }, + "peerDependencies": { + "typescript": "^5", + }, + }, + }, + "packages": { + "@types/bun": ["@types/bun@1.2.22", "", { "dependencies": { "bun-types": "1.2.22" } }, "sha512-5A/KrKos2ZcN0c6ljRSOa1fYIyCKhZfIVYeuyb4snnvomnpFqC0tTsEkdqNxbAgExV384OETQ//WAjl3XbYqQA=="], + + "@types/node": ["@types/node@24.5.2", "", { "dependencies": { "undici-types": "~7.12.0" } }, "sha512-FYxk1I7wPv3K2XBaoyH2cTnocQEu8AOZ60hPbsyukMPLv5/5qr7V1i8PLHdl6Zf87I+xZXFvPCXYjiTFq+YSDQ=="], + + "@types/react": ["@types/react@19.1.13", "", { "dependencies": { "csstype": "^3.0.2" } }, "sha512-hHkbU/eoO3EG5/MZkuFSKmYqPbSVk5byPFa3e7y/8TybHiLMACgI8seVYlicwk7H5K/rI2px9xrQp/C+AUDTiQ=="], + + "bun-types": ["bun-types@1.2.22", "", { "dependencies": { "@types/node": "*" }, "peerDependencies": { "@types/react": "^19" } }, "sha512-hwaAu8tct/Zn6Zft4U9BsZcXkYomzpHJX28ofvx7k0Zz2HNz54n1n+tDgxoWFGB4PcFvJXJQloPhaV2eP3Q6EA=="], + + "csstype": ["csstype@3.1.3", "", {}, "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw=="], + + "openai": ["openai@5.23.0", "", { "peerDependencies": { "ws": "^8.18.0", "zod": "^3.23.8" }, "optionalPeers": ["ws", "zod"], "bin": { "openai": "bin/cli" } }, "sha512-Cfq155NHzI7VWR67LUNJMIgPZy2oSh7Fld/OKhxq648BiUjELAvcge7g30xJ6vAfwwXf6TVK0KKuN+3nmIJG/A=="], + + "typescript": ["typescript@5.9.2", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-CWBzXQrc/qOkhidw1OzBTQuYRbfyxDXJMVJ1XNwUHGROVmuaeiEm3OslpZ1RV96d7SKKjZKrSJu3+t/xlw3R9A=="], + + "undici-types": ["undici-types@7.12.0", "", {}, "sha512-goOacqME2GYyOZZfb5Lgtu+1IDmAlAEu5xnD3+xTzS10hT0vzpf0SPjkXwAw9Jm+4n/mQGDP3LO8CPbYROeBfQ=="], + + "zod": ["zod@4.1.11", "", {}, "sha512-WPsqwxITS2tzx1bzhIKsEs19ABD5vmCVa4xBo2tq/SrV4RNZtfws1EnCWQXM6yh8bD08a1idvkB5MZSBiZsjwg=="], + } +} diff --git a/typescript/index.ts b/typescript/index.ts new file mode 100644 index 00000000..b1b5572c --- /dev/null +++ b/typescript/index.ts @@ -0,0 +1,104 @@ +import z from "zod"; +import type { ChatCompletionCreateParamsNonStreaming } from "openai/resources/chat/completions/completions"; + +// Zod schemas for validation +const roleSchema = z.enum(["system", "user", "assistant"]); +const messageSchema = z.union([ + z.object({ + role: roleSchema, + content: z.string(), + }), + z.object({ + role: z.literal("tool"), + content: z.string(), + tool_call_id: z.string(), + }), +]); + +const functionDefinitionSchema = z + .object({ + name: z.string().regex(/^[a-zA-Z0-9_-]{1,64}$/), + description: z.string().optional(), + // JSON Schema object; allow arbitrary keys + parameters: z.object({}).loose().optional(), + }) + .loose(); + +const toolSchema = z.object({ + type: z.literal("function"), + function: functionDefinitionSchema, +}); + +const metadataSchema = z + .object({ + invocation_id: z.string(), + experiment_id: z.string(), + rollout_id: z.string(), + run_id: z.string(), + row_id: z.string(), + }) + .loose(); + +export const initRequestSchema = z.object({ + rollout_id: z.string(), + model: z.string(), + messages: z.array(messageSchema).min(1), + tools: z.array(toolSchema).optional().nullable(), + metadata: metadataSchema, +}); + +export const statusInfoSchema = z.record(z.string(), z.any()); + +export const statusResponseSchema = z.object({ + terminated: z.boolean(), + info: statusInfoSchema.optional(), +}); + +// Infer types from schemas +export type Message = z.infer; +export type FunctionDefinition = z.infer; +export type Tool = z.infer; +export type Metadata = z.infer; +export type InitRequest = z.infer; +export type StatusInfo = z.infer; +export type StatusResponse = z.infer; + +export function initRequestToCompletionParams( + initRequest: InitRequest +): ChatCompletionCreateParamsNonStreaming { + const toolsToOpenAI = initRequest.tools?.map((tool) => ({ + type: "function" as const, + function: tool.function.description + ? { + name: tool.function.name, + description: tool.function.description, + parameters: tool.function.parameters || {}, + } + : { + name: tool.function.name, + parameters: tool.function.parameters || {}, + }, + })); + + const completionParams = toolsToOpenAI + ? { + model: initRequest.model, + messages: initRequest.messages, + tools: toolsToOpenAI, + } + : { + model: initRequest.model, + messages: initRequest.messages, + }; + return completionParams; +} + +export function createLangfuseConfigTags(initRequest: InitRequest): string[] { + return [ + `invocation_id:${initRequest.metadata.invocation_id}`, + `experiment_id:${initRequest.metadata.experiment_id}`, + `rollout_id:${initRequest.metadata.rollout_id}`, + `run_id:${initRequest.metadata.run_id}`, + `row_id:${initRequest.metadata.row_id}`, + ]; +} diff --git a/typescript/package.json b/typescript/package.json new file mode 100644 index 00000000..986409e2 --- /dev/null +++ b/typescript/package.json @@ -0,0 +1,16 @@ +{ + "name": "eval-protocol", + "module": "index.ts", + "type": "module", + "version": "0.1.2", + "devDependencies": { + "@types/bun": "latest", + "openai": "^5.23.0" + }, + "peerDependencies": { + "typescript": "^5" + }, + "dependencies": { + "zod": "^4.1.11" + } +} diff --git a/typescript/tsconfig.json b/typescript/tsconfig.json new file mode 100644 index 00000000..bfa0fead --- /dev/null +++ b/typescript/tsconfig.json @@ -0,0 +1,29 @@ +{ + "compilerOptions": { + // Environment setup & latest features + "lib": ["ESNext"], + "target": "ESNext", + "module": "Preserve", + "moduleDetection": "force", + "jsx": "react-jsx", + "allowJs": true, + + // Bundler mode + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "verbatimModuleSyntax": true, + "noEmit": true, + + // Best practices + "strict": true, + "skipLibCheck": true, + "noFallthroughCasesInSwitch": true, + "noUncheckedIndexedAccess": true, + "noImplicitOverride": true, + + // Some stricter flags (disabled by default) + "noUnusedLocals": false, + "noUnusedParameters": false, + "noPropertyAccessFromIndexSignature": false + } +} From 941b21eb9e64c4ec848d70954efd01897d9d79b8 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Thu, 25 Sep 2025 15:49:17 -0700 Subject: [PATCH 12/12] move folders --- eval_protocol/types/remote_rollout_processor.py | 1 + .../langfuse => remote_server}/remote_server.py | 14 +------------- .../test_remote_langfuse.py} | 4 ++-- .../test_remote_langfuse_typescript.py} | 1 - .../typescript-server/.gitignore | 0 .../typescript-server/README.md | 0 .../typescript-server/env.ts | 0 .../typescript-server/instrumentation.ts | 2 +- .../typescript-server/package.json | 0 .../typescript-server/pnpm-lock.yaml | 0 .../typescript-server/server.ts | 4 ++-- .../typescript-server/tsconfig.json | 0 12 files changed, 7 insertions(+), 19 deletions(-) rename tests/{chinook/langfuse => remote_server}/remote_server.py (77%) rename tests/{chinook/langfuse/test_remote_langfuse_chinook.py => remote_server/test_remote_langfuse.py} (95%) rename tests/{chinook/langfuse/test_remote_langfuse_chinook_typescript.py => remote_server/test_remote_langfuse_typescript.py} (99%) rename tests/{chinook/langfuse => remote_server}/typescript-server/.gitignore (100%) rename tests/{chinook/langfuse => remote_server}/typescript-server/README.md (100%) rename tests/{chinook/langfuse => remote_server}/typescript-server/env.ts (100%) rename tests/{chinook/langfuse => remote_server}/typescript-server/instrumentation.ts (95%) rename tests/{chinook/langfuse => remote_server}/typescript-server/package.json (100%) rename tests/{chinook/langfuse => remote_server}/typescript-server/pnpm-lock.yaml (100%) rename tests/{chinook/langfuse => remote_server}/typescript-server/server.ts (99%) rename tests/{chinook/langfuse => remote_server}/typescript-server/tsconfig.json (100%) diff --git a/eval_protocol/types/remote_rollout_processor.py b/eval_protocol/types/remote_rollout_processor.py index 405692d1..bdc1f9f2 100644 --- a/eval_protocol/types/remote_rollout_processor.py +++ b/eval_protocol/types/remote_rollout_processor.py @@ -31,6 +31,7 @@ class StatusResponse(BaseModel): """Response model for GET /status endpoint.""" terminated: bool + info: Optional[Dict[str, Any]] = None def create_langfuse_config_tags(init_request: InitRequest) -> List[str]: diff --git a/tests/chinook/langfuse/remote_server.py b/tests/remote_server/remote_server.py similarity index 77% rename from tests/chinook/langfuse/remote_server.py rename to tests/remote_server/remote_server.py index 2add76c1..edd7f198 100644 --- a/tests/chinook/langfuse/remote_server.py +++ b/tests/remote_server/remote_server.py @@ -19,18 +19,6 @@ _STATE: Dict[str, Dict[str, Any]] = {} -ALLOWED_MESSAGE_FIELDS = {"role", "content", "tool_calls", "tool_call_id", "name"} - - -def _clean_messages_for_api(messages: List[Message]) -> list[dict]: - cleaned: list[dict] = [] - for msg in messages: - msg_dict = msg.model_dump() - cm = {k: v for k, v in msg_dict.items() if k in ALLOWED_MESSAGE_FIELDS and v is not None} - # Some providers dislike empty content on assistant messages; keep if present - cleaned.append(cm) - return cleaned - @app.post("/init") def init(req: InitRequest): @@ -44,7 +32,7 @@ def _worker(): completion_kwargs = { "model": req.model, - "messages": _clean_messages_for_api(req.messages), + "messages": req.messages, "metadata": metadata, } diff --git a/tests/chinook/langfuse/test_remote_langfuse_chinook.py b/tests/remote_server/test_remote_langfuse.py similarity index 95% rename from tests/chinook/langfuse/test_remote_langfuse_chinook.py rename to tests/remote_server/test_remote_langfuse.py index cdcd68fb..313908d1 100644 --- a/tests/chinook/langfuse/test_remote_langfuse_chinook.py +++ b/tests/remote_server/test_remote_langfuse.py @@ -1,7 +1,7 @@ # MANUAL SERVER STARTUP REQUIRED: # Before running this test, start the remote server manually: # cd /Users/derekxu/Documents/code/python-sdk -# python -m tests.chinook.langfuse.remote_server +# python -m tests.remote_server.remote_server # # The server should be running on http://127.0.0.1:7077 @@ -64,7 +64,7 @@ def rows() -> List[EvaluationRow]: async def test_remote_rollout_and_fetch_langfuse(row: EvaluationRow) -> EvaluationRow: """ End-to-end test: - - REQUIRES MANUAL SERVER STARTUP: python -m tests.chinook.langfuse.remote_server + - REQUIRES MANUAL SERVER STARTUP: python -m tests.remote_server.remote_server - trigger remote rollout via RemoteRolloutProcessor (calls init/status) - fetch traces from Langfuse filtered by metadata via output_data_loader; FAIL if none found """ diff --git a/tests/chinook/langfuse/test_remote_langfuse_chinook_typescript.py b/tests/remote_server/test_remote_langfuse_typescript.py similarity index 99% rename from tests/chinook/langfuse/test_remote_langfuse_chinook_typescript.py rename to tests/remote_server/test_remote_langfuse_typescript.py index bb3b7bf5..942a12ad 100644 --- a/tests/chinook/langfuse/test_remote_langfuse_chinook_typescript.py +++ b/tests/remote_server/test_remote_langfuse_typescript.py @@ -1,6 +1,5 @@ import os from typing import List -import atexit import pytest diff --git a/tests/chinook/langfuse/typescript-server/.gitignore b/tests/remote_server/typescript-server/.gitignore similarity index 100% rename from tests/chinook/langfuse/typescript-server/.gitignore rename to tests/remote_server/typescript-server/.gitignore diff --git a/tests/chinook/langfuse/typescript-server/README.md b/tests/remote_server/typescript-server/README.md similarity index 100% rename from tests/chinook/langfuse/typescript-server/README.md rename to tests/remote_server/typescript-server/README.md diff --git a/tests/chinook/langfuse/typescript-server/env.ts b/tests/remote_server/typescript-server/env.ts similarity index 100% rename from tests/chinook/langfuse/typescript-server/env.ts rename to tests/remote_server/typescript-server/env.ts diff --git a/tests/chinook/langfuse/typescript-server/instrumentation.ts b/tests/remote_server/typescript-server/instrumentation.ts similarity index 95% rename from tests/chinook/langfuse/typescript-server/instrumentation.ts rename to tests/remote_server/typescript-server/instrumentation.ts index 5918ac32..1992f78a 100644 --- a/tests/chinook/langfuse/typescript-server/instrumentation.ts +++ b/tests/remote_server/typescript-server/instrumentation.ts @@ -1,6 +1,6 @@ import { NodeSDK } from "@opentelemetry/sdk-node"; import { LangfuseSpanProcessor } from "@langfuse/otel"; -import "./env"; +import "./env.js"; const sdk = new NodeSDK({ spanProcessors: [ diff --git a/tests/chinook/langfuse/typescript-server/package.json b/tests/remote_server/typescript-server/package.json similarity index 100% rename from tests/chinook/langfuse/typescript-server/package.json rename to tests/remote_server/typescript-server/package.json diff --git a/tests/chinook/langfuse/typescript-server/pnpm-lock.yaml b/tests/remote_server/typescript-server/pnpm-lock.yaml similarity index 100% rename from tests/chinook/langfuse/typescript-server/pnpm-lock.yaml rename to tests/remote_server/typescript-server/pnpm-lock.yaml diff --git a/tests/chinook/langfuse/typescript-server/server.ts b/tests/remote_server/typescript-server/server.ts similarity index 99% rename from tests/chinook/langfuse/typescript-server/server.ts rename to tests/remote_server/typescript-server/server.ts index 0f14f58c..0551e639 100644 --- a/tests/chinook/langfuse/typescript-server/server.ts +++ b/tests/remote_server/typescript-server/server.ts @@ -4,8 +4,8 @@ import helmet from "helmet"; import { z } from "zod"; import { OpenAI } from "openai"; import { observeOpenAI } from "@langfuse/openai"; -import "./instrumentation"; -import "./env"; +import "./instrumentation.js"; +import "./env.js"; import { initRequestSchema, statusResponseSchema, diff --git a/tests/chinook/langfuse/typescript-server/tsconfig.json b/tests/remote_server/typescript-server/tsconfig.json similarity index 100% rename from tests/chinook/langfuse/typescript-server/tsconfig.json rename to tests/remote_server/typescript-server/tsconfig.json