From dc2a6f5b240b3df5031018fcb9b5ff3b5368c694 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Sun, 26 Oct 2025 03:03:00 -0700 Subject: [PATCH 1/9] fail proxy loudly --- eval_protocol/__init__.py | 13 ++++++++++--- pyproject.toml | 6 ++++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/eval_protocol/__init__.py b/eval_protocol/__init__.py index e0ca05cb..3a0f959c 100644 --- a/eval_protocol/__init__.py +++ b/eval_protocol/__init__.py @@ -81,9 +81,16 @@ try: from .proxy import create_app, AuthProvider, AccountInfo except ImportError: - create_app = None - AuthProvider = None - AccountInfo = None + + def _proxy_import_error(*args, **kwargs): + raise ImportError( + "Proxy functionality requires additional dependencies. " + "Please install with: pip install eval-protocol[proxy]" + ) + + create_app = _proxy_import_error + AuthProvider = _proxy_import_error + AccountInfo = _proxy_import_error warnings.filterwarnings("default", category=DeprecationWarning, module="eval_protocol") diff --git a/pyproject.toml b/pyproject.toml index fd7e6961..e13e813e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -142,6 +142,12 @@ langgraph_tools = [ "langchain-fireworks>=0.3.0", ] +proxy = [ + "redis>=5.0.0", + "langfuse>=2.0.0", + "uuid6>=2025.0.0", +] + [project.scripts] fireworks-reward = "eval_protocol.cli:main" eval-protocol = "eval_protocol.cli:main" From ea38ccdd277f60f38cde50f584f4d8ce536267f1 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Sun, 26 Oct 2025 03:07:05 -0700 Subject: [PATCH 2/9] lock --- uv.lock | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/uv.lock b/uv.lock index e75a7c55..e5f6e6dc 100644 --- a/uv.lock +++ b/uv.lock @@ -1313,6 +1313,11 @@ langsmith = [ openevals = [ { name = "openevals" }, ] +proxy = [ + { name = "langfuse" }, + { name = "redis" }, + { name = "uuid6" }, +] pydantic = [ { name = "pydantic-ai" }, ] @@ -1371,6 +1376,7 @@ requires-dist = [ { name = "langchain-core", marker = "extra == 'langgraph'", specifier = ">=0.3.75" }, { name = "langchain-fireworks", marker = "extra == 'langgraph-tools'", specifier = ">=0.3.0" }, { name = "langfuse", marker = "extra == 'langfuse'", specifier = ">=2.0.0" }, + { name = "langfuse", marker = "extra == 'proxy'", specifier = ">=2.0.0" }, { name = "langgraph", marker = "extra == 'langgraph'", specifier = ">=0.6.7" }, { name = "langgraph", marker = "extra == 'langgraph-tools'", specifier = ">=0.6.7" }, { name = "langsmith", marker = "extra == 'langsmith'", specifier = ">=0.1.86" }, @@ -1399,6 +1405,7 @@ requires-dist = [ { name = "python-dotenv", specifier = ">=0.19.0" }, { name = "pyyaml", specifier = ">=5.0" }, { name = "questionary", specifier = ">=2.0.0" }, + { name = "redis", marker = "extra == 'proxy'", specifier = ">=5.0.0" }, { name = "requests", specifier = ">=2.25.0" }, { name = "rich", specifier = ">=12.0.0" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.5.0" }, @@ -1417,12 +1424,13 @@ requires-dist = [ { name = "types-pyyaml", marker = "extra == 'dev'" }, { name = "types-requests", marker = "extra == 'dev'" }, { name = "types-setuptools", marker = "extra == 'dev'" }, + { name = "uuid6", marker = "extra == 'proxy'", specifier = ">=2025.0.0" }, { name = "uvicorn", specifier = ">=0.15.0" }, { name = "versioneer", marker = "extra == 'dev'", specifier = ">=0.20" }, { name = "websockets", specifier = ">=15.0.1" }, { name = "werkzeug", marker = "extra == 'dev'", specifier = ">=2.0.0" }, ] -provides-extras = ["dev", "trl", "openevals", "fireworks", "box2d", "langfuse", "huggingface", "langsmith", "bigquery", "svgbench", "pydantic", "supabase", "chinook", "langchain", "braintrust", "langgraph", "langgraph-tools"] +provides-extras = ["dev", "trl", "openevals", "fireworks", "box2d", "langfuse", "huggingface", "langsmith", "bigquery", "svgbench", "pydantic", "supabase", "chinook", "langchain", "braintrust", "langgraph", "langgraph-tools", "proxy"] [package.metadata.requires-dev] dev = [ @@ -5682,6 +5690,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d2/07/a5c7aef12f9a3497f5ad77157a37915645861e8b23b89b2ad4b0f11b48ad/realtime-2.7.0-py3-none-any.whl", hash = "sha256:d55a278803529a69d61c7174f16563a9cfa5bacc1664f656959694481903d99c", size = 22409, upload-time = "2025-07-28T18:54:21.383Z" }, ] +[[package]] +name = "redis" +version = "7.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "async-timeout", marker = "python_full_version < '3.11.3'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d2/0e/80de0c7d9b04360331906b6b713a967e6523d155a92090983eba2e99302e/redis-7.0.0.tar.gz", hash = "sha256:6546ada54354248a53a47342d36abe6172bb156f23d24f018fda2e3c06b9c97a", size = 4754895, upload-time = "2025-10-22T15:38:36.128Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/aa/de/68c1add9d9a49588e6f75a149e079e44bab973e748a35e0582ccada09002/redis-7.0.0-py3-none-any.whl", hash = "sha256:1e66c8355b3443af78367c4937484cd875fdf9f5f14e1fed14aa95869e64f6d1", size = 339526, upload-time = "2025-10-22T15:38:34.901Z" }, +] + [[package]] name = "referencing" version = "0.36.2" @@ -6998,6 +7018,15 @@ socks = [ { name = "pysocks" }, ] +[[package]] +name = "uuid6" +version = "2025.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ca/b7/4c0f736ca824b3a25b15e8213d1bcfc15f8ac2ae48d1b445b310892dc4da/uuid6-2025.0.1.tar.gz", hash = "sha256:cd0af94fa428675a44e32c5319ec5a3485225ba2179eefcf4c3f205ae30a81bd", size = 13932, upload-time = "2025-07-04T18:30:35.186Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3d/b2/93faaab7962e2aa8d6e174afb6f76be2ca0ce89fde14d3af835acebcaa59/uuid6-2025.0.1-py3-none-any.whl", hash = "sha256:80530ce4d02a93cdf82e7122ca0da3ebbbc269790ec1cb902481fa3e9cc9ff99", size = 6979, upload-time = "2025-07-04T18:30:34.001Z" }, +] + [[package]] name = "uvicorn" version = "0.35.0" From 015e856eadd30983c5d57baa8864c382454d97ca Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Mon, 27 Oct 2025 00:29:26 -0700 Subject: [PATCH 3/9] fix --- eval_protocol/__init__.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/eval_protocol/__init__.py b/eval_protocol/__init__.py index 3a0f959c..9c26bdb1 100644 --- a/eval_protocol/__init__.py +++ b/eval_protocol/__init__.py @@ -79,18 +79,28 @@ WeaveAdapter = None try: - from .proxy import create_app, AuthProvider, AccountInfo + from .proxy import create_app, AuthProvider, AccountInfo # pyright: ignore[reportAssignmentType] except ImportError: - def _proxy_import_error(*args, **kwargs): + def create_app(*args, **kwargs): raise ImportError( "Proxy functionality requires additional dependencies. " "Please install with: pip install eval-protocol[proxy]" ) - create_app = _proxy_import_error - AuthProvider = _proxy_import_error - AccountInfo = _proxy_import_error + class AuthProvider: + def __init__(self, *args, **kwargs): + raise ImportError( + "Proxy functionality requires additional dependencies. " + "Please install with: pip install eval-protocol[proxy]" + ) + + class AccountInfo: + def __init__(self, *args, **kwargs): + raise ImportError( + "Proxy functionality requires additional dependencies. " + "Please install with: pip install eval-protocol[proxy]" + ) warnings.filterwarnings("default", category=DeprecationWarning, module="eval_protocol") From 8ec64cedbfcc4b4c33ea249fad55c7f1e846acd2 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Mon, 27 Oct 2025 00:50:06 -0700 Subject: [PATCH 4/9] fix test --- tests/remote_server/test_remote_fireworks.py | 1 - .../test_remote_fireworks_propagate_status.py | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/tests/remote_server/test_remote_fireworks.py b/tests/remote_server/test_remote_fireworks.py index b385dd03..48a56c08 100644 --- a/tests/remote_server/test_remote_fireworks.py +++ b/tests/remote_server/test_remote_fireworks.py @@ -98,7 +98,6 @@ def rows() -> List[EvaluationRow]: return [row, row, row] -@pytest.mark.skipif(os.environ.get("CI") == "true", reason="Only run this test locally (skipped in CI)") @pytest.mark.parametrize( "completion_params", [{"model": "fireworks_ai/accounts/fireworks/models/gpt-oss-120b", "temperature": 0.5}], diff --git a/tests/remote_server/test_remote_fireworks_propagate_status.py b/tests/remote_server/test_remote_fireworks_propagate_status.py index d924832d..e415ed61 100644 --- a/tests/remote_server/test_remote_fireworks_propagate_status.py +++ b/tests/remote_server/test_remote_fireworks_propagate_status.py @@ -23,6 +23,9 @@ from eval_protocol.models import EvaluationRow, Message, Status from eval_protocol.pytest import evaluation_test from eval_protocol.pytest.remote_rollout_processor import RemoteRolloutProcessor +from eval_protocol.adapters.fireworks_tracing import FireworksTracingAdapter +from eval_protocol.utils.evaluation_row_utils import filter_longest_conversation +from eval_protocol.types.remote_rollout_processor import DataLoaderConfig def find_available_port() -> int: @@ -75,6 +78,18 @@ def setup_remote_server(): process.wait() +def fetch_fireworks_traces(config: DataLoaderConfig) -> List[EvaluationRow]: + base_url = config.model_base_url or "https://tracing.fireworks.ai" + adapter = FireworksTracingAdapter(base_url=base_url) + return adapter.get_evaluation_rows(tags=[f"rollout_id:{config.rollout_id}"], max_retries=7) + + +def fireworks_output_data_loader(config: DataLoaderConfig) -> DynamicDataLoader: + return DynamicDataLoader( + generators=[lambda: fetch_fireworks_traces(config)], preprocess_fn=filter_longest_conversation + ) + + def rows() -> List[EvaluationRow]: row = EvaluationRow(messages=[Message(role="user", content="What is the capital of France?")]) return [row] @@ -88,6 +103,7 @@ def rows() -> List[EvaluationRow]: rollout_processor=RemoteRolloutProcessor( remote_base_url=f"http://127.0.0.1:{SERVER_PORT}", timeout_seconds=120, + output_data_loader=fireworks_output_data_loader, ), ) async def test_remote_rollout_and_fetch_fireworks_propagate_status(row: EvaluationRow) -> EvaluationRow: From 5a85830b96a2f74984acdc170632554e3e8badf5 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Mon, 27 Oct 2025 00:58:59 -0700 Subject: [PATCH 5/9] one fix at a time --- tests/remote_server/test_remote_fireworks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/remote_server/test_remote_fireworks.py b/tests/remote_server/test_remote_fireworks.py index 48a56c08..b385dd03 100644 --- a/tests/remote_server/test_remote_fireworks.py +++ b/tests/remote_server/test_remote_fireworks.py @@ -98,6 +98,7 @@ def rows() -> List[EvaluationRow]: return [row, row, row] +@pytest.mark.skipif(os.environ.get("CI") == "true", reason="Only run this test locally (skipped in CI)") @pytest.mark.parametrize( "completion_params", [{"model": "fireworks_ai/accounts/fireworks/models/gpt-oss-120b", "temperature": 0.5}], From ae6d2cd2d491ed79b6978cb5cebd290215bc23d2 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Mon, 27 Oct 2025 01:06:37 -0700 Subject: [PATCH 6/9] test --- .github/workflows/ci.yml | 1 + tests/remote_server/test_remote_fireworks.py | 6 +----- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9b90b08b..6fbd0858 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -109,6 +109,7 @@ jobs: --ignore=tests/test_tau_bench_airline_smoke.py \ --ignore=tests/pytest/test_svgbench.py \ --ignore=tests/pytest/test_livesvgbench.py \ + --ignore=tests/remote_server/test_remote_fireworks.py \ --ignore=tests/remote_server/test_remote_fireworks_propagate_status.py \ --ignore=tests/logging/test_elasticsearch_direct_http_handler.py \ --ignore=eval_protocol/benchmarks/ \ diff --git a/tests/remote_server/test_remote_fireworks.py b/tests/remote_server/test_remote_fireworks.py index b385dd03..7e0ceee7 100644 --- a/tests/remote_server/test_remote_fireworks.py +++ b/tests/remote_server/test_remote_fireworks.py @@ -98,11 +98,7 @@ def rows() -> List[EvaluationRow]: return [row, row, row] -@pytest.mark.skipif(os.environ.get("CI") == "true", reason="Only run this test locally (skipped in CI)") -@pytest.mark.parametrize( - "completion_params", - [{"model": "fireworks_ai/accounts/fireworks/models/gpt-oss-120b", "temperature": 0.5}], -) +@pytest.mark.parametrize("completion_params", [{"model": "fireworks_ai/accounts/fireworks/models/gpt-oss-120b"}]) @evaluation_test( data_loaders=DynamicDataLoader( generators=[rows], From 2827c52b7a4d7d9e652934e8308e424645bcc52e Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Mon, 27 Oct 2025 01:07:36 -0700 Subject: [PATCH 7/9] revert --- tests/remote_server/test_remote_fireworks.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/remote_server/test_remote_fireworks.py b/tests/remote_server/test_remote_fireworks.py index 7e0ceee7..48a56c08 100644 --- a/tests/remote_server/test_remote_fireworks.py +++ b/tests/remote_server/test_remote_fireworks.py @@ -98,7 +98,10 @@ def rows() -> List[EvaluationRow]: return [row, row, row] -@pytest.mark.parametrize("completion_params", [{"model": "fireworks_ai/accounts/fireworks/models/gpt-oss-120b"}]) +@pytest.mark.parametrize( + "completion_params", + [{"model": "fireworks_ai/accounts/fireworks/models/gpt-oss-120b", "temperature": 0.5}], +) @evaluation_test( data_loaders=DynamicDataLoader( generators=[rows], From fb1b9a21a32a921cb3af8948f769fdd45148e41a Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Mon, 27 Oct 2025 10:41:48 -0700 Subject: [PATCH 8/9] test --- eval_protocol/proxy/proxy_core/litellm.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/eval_protocol/proxy/proxy_core/litellm.py b/eval_protocol/proxy/proxy_core/litellm.py index cdd2383b..7c24da54 100644 --- a/eval_protocol/proxy/proxy_core/litellm.py +++ b/eval_protocol/proxy/proxy_core/litellm.py @@ -108,6 +108,10 @@ async def handle_chat_completion( # Forward to LiteLLM litellm_url = f"{config.litellm_url}/chat/completions" + print("litellm_url: ", litellm_url) + print("data: ", data) + print("headers: ", headers) + response = await client.post( litellm_url, json=data, # httpx will serialize and set correct Content-Length From d42e46497e4541c50bc3e7fb53b8a7fde2c06381 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Mon, 27 Oct 2025 11:14:47 -0700 Subject: [PATCH 9/9] remove --- eval_protocol/proxy/proxy_core/litellm.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/eval_protocol/proxy/proxy_core/litellm.py b/eval_protocol/proxy/proxy_core/litellm.py index 7c24da54..cdd2383b 100644 --- a/eval_protocol/proxy/proxy_core/litellm.py +++ b/eval_protocol/proxy/proxy_core/litellm.py @@ -108,10 +108,6 @@ async def handle_chat_completion( # Forward to LiteLLM litellm_url = f"{config.litellm_url}/chat/completions" - print("litellm_url: ", litellm_url) - print("data: ", data) - print("headers: ", headers) - response = await client.post( litellm_url, json=data, # httpx will serialize and set correct Content-Length