From c6db5898e7a90d2e5e8c070fbe82ec2bfa00d5f1 Mon Sep 17 00:00:00 2001
From: Yinghan Ma <yinghan.ma@fireworks.ai>
Date: Thu, 25 Sep 2025 23:18:25 -0700
Subject: [PATCH 1/5] support custom invocation id

---
 eval_protocol/pytest/evaluation_test.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py
index 4aabd296..535022b1 100644
--- a/eval_protocol/pytest/evaluation_test.py
+++ b/eval_protocol/pytest/evaluation_test.py
@@ -189,7 +189,7 @@ def evaluation_test(
     completion_params = parse_ep_completion_params(completion_params)
     original_completion_params = completion_params
     passed_threshold = parse_ep_passed_threshold(passed_threshold)
-
+    custom_invocation_id = os.environ.get("EP_INVOCATION_ID", None)
     def decorator(
         test_func: TestFunction,
     ) -> TestFunction:
@@ -228,7 +228,10 @@ def decorator(
         # Create wrapper function with exact signature that pytest expects
         def create_wrapper_with_signature() -> Callable[[], None]:
             # Create the function body that will be used
-            invocation_id = generate_id()
+            if custom_invocation_id:
+                invocation_id = custom_invocation_id
+            else:
+                invocation_id = generate_id()
 
             async def wrapper_body(**kwargs: Unpack[ParameterizedTestKwargs]) -> None:
                 # Store URL for viewing results (after all postprocessing is complete)

From 914205b7d451285af722ddae3a323b3f5dce6f2a Mon Sep 17 00:00:00 2001
From: Yinghan Ma <yinghan.ma@fireworks.ai>
Date: Thu, 25 Sep 2025 23:20:28 -0700
Subject: [PATCH 2/5] format

---
 eval_protocol/pytest/evaluation_test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py
index 535022b1..4625114a 100644
--- a/eval_protocol/pytest/evaluation_test.py
+++ b/eval_protocol/pytest/evaluation_test.py
@@ -190,6 +190,7 @@ def evaluation_test(
     original_completion_params = completion_params
     passed_threshold = parse_ep_passed_threshold(passed_threshold)
     custom_invocation_id = os.environ.get("EP_INVOCATION_ID", None)
+
     def decorator(
         test_func: TestFunction,
     ) -> TestFunction:

From c1c97f88833411fb164b4e2943b32eabc3912cee Mon Sep 17 00:00:00 2001
From: Yinghan Ma <yinghan.ma@fireworks.ai>
Date: Fri, 26 Sep 2025 11:12:16 -0700
Subject: [PATCH 3/5] add test

---
 tests/pytest/test_pytest_env_overwrite.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 tests/pytest/test_pytest_env_overwrite.py

diff --git a/tests/pytest/test_pytest_env_overwrite.py b/tests/pytest/test_pytest_env_overwrite.py
new file mode 100644
index 00000000..dcf1409d
--- /dev/null
+++ b/tests/pytest/test_pytest_env_overwrite.py
@@ -0,0 +1,18 @@
+from eval_protocol.models import EvaluationRow, Message
+from eval_protocol.pytest import evaluation_test
+from eval_protocol.pytest.default_no_op_rollout_processor import NoOpRolloutProcessor
+import os
+
+os.environ["EP_INVOCATION_ID"] = "test-invocation-123"
+
+@evaluation_test(
+    input_rows=[[EvaluationRow(messages=[Message(role="user", content="What is the capital of France?")])]],
+    completion_params=[{"model": "no-op"}],
+    rollout_processor=NoOpRolloutProcessor(),
+    mode="pointwise",
+)
+def test_input_messages_in_decorator(row: EvaluationRow) -> EvaluationRow:
+    """Run math evaluation on sample dataset using pytest interface."""
+    assert row.messages[0].content == "What is the capital of France?"
+    assert row.execution_metadata.invocation_id == "test-invocation-123"
+    return row

From 08c9e485c7de62caaca8ec54a1866efa7bde5cfb Mon Sep 17 00:00:00 2001
From: Yinghan Ma <yinghan.ma@fireworks.ai>
Date: Fri, 26 Sep 2025 11:16:41 -0700
Subject: [PATCH 4/5] format

---
 tests/pytest/test_pytest_env_overwrite.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/pytest/test_pytest_env_overwrite.py b/tests/pytest/test_pytest_env_overwrite.py
index dcf1409d..8b3e28aa 100644
--- a/tests/pytest/test_pytest_env_overwrite.py
+++ b/tests/pytest/test_pytest_env_overwrite.py
@@ -5,6 +5,7 @@
 
 os.environ["EP_INVOCATION_ID"] = "test-invocation-123"
 
+
 @evaluation_test(
     input_rows=[[EvaluationRow(messages=[Message(role="user", content="What is the capital of France?")])]],
     completion_params=[{"model": "no-op"}],

From d435b6ec1d7ccc9bde3260b60219d5c1f98839ce Mon Sep 17 00:00:00 2001
From: Yinghan Ma <yinghan.ma@fireworks.ai>
Date: Fri, 26 Sep 2025 11:43:20 -0700
Subject: [PATCH 5/5] format

---
 tests/pytest/test_pytest_env_overwrite.py | 25 ++++++++++++-----------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/tests/pytest/test_pytest_env_overwrite.py b/tests/pytest/test_pytest_env_overwrite.py
index 8b3e28aa..c88dd2b8 100644
--- a/tests/pytest/test_pytest_env_overwrite.py
+++ b/tests/pytest/test_pytest_env_overwrite.py
@@ -2,18 +2,19 @@
 from eval_protocol.pytest import evaluation_test
 from eval_protocol.pytest.default_no_op_rollout_processor import NoOpRolloutProcessor
 import os
+from unittest import mock
 
-os.environ["EP_INVOCATION_ID"] = "test-invocation-123"
 
+with mock.patch.dict(os.environ, {"EP_INVOCATION_ID": "test-invocation-123"}):
 
-@evaluation_test(
-    input_rows=[[EvaluationRow(messages=[Message(role="user", content="What is the capital of France?")])]],
-    completion_params=[{"model": "no-op"}],
-    rollout_processor=NoOpRolloutProcessor(),
-    mode="pointwise",
-)
-def test_input_messages_in_decorator(row: EvaluationRow) -> EvaluationRow:
-    """Run math evaluation on sample dataset using pytest interface."""
-    assert row.messages[0].content == "What is the capital of France?"
-    assert row.execution_metadata.invocation_id == "test-invocation-123"
-    return row
+    @evaluation_test(
+        input_rows=[[EvaluationRow(messages=[Message(role="user", content="What is the capital of France?")])]],
+        completion_params=[{"model": "no-op"}],
+        rollout_processor=NoOpRolloutProcessor(),
+        mode="pointwise",
+    )
+    def test_input_messages_in_decorator(row: EvaluationRow) -> EvaluationRow:
+        """Run math evaluation on sample dataset using pytest interface."""
+        assert row.messages[0].content == "What is the capital of France?"
+        assert row.execution_metadata.invocation_id == "test-invocation-123"
+        return row