Skip to content

Commit 54b5cf7

Browse files
committed
Include all .md files in vendor.tau2 data to fix simulation_guidelines errors
1 parent 820440d commit 54b5cf7

File tree

3 files changed

+4
-4
lines changed

3 files changed

+4
-4
lines changed

eval_protocol/benchmarks/test_tau_bench_airline.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ def tau_bench_airline_to_evaluation_row(data: List[Dict[str, Any]]) -> List[Eval
119119
rollout_processor=MCPGymRolloutProcessor(),
120120
rollout_processor_kwargs={"domain": "airline"},
121121
passed_threshold={"success": 0.4, "standard_error": 0.02},
122-
num_runs=8,
122+
num_runs=2,
123123
mode="pointwise",
124124
max_concurrent_rollouts=50,
125125
server_script_path=_get_server_script_path(),

eval_protocol/pytest/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -300,12 +300,12 @@ async def execute_row_with_backoff(task: asyncio.Task, row: EvaluationRow) -> Ev
300300
except Exception as retry_error:
301301
# Backoff gave up
302302
row.rollout_status.status = RolloutStatus.Status.ERROR
303-
row.rollout_status.termination_reason = str(retry_error)
303+
# row.rollout_status.termination_reason = str(retry_error)
304304
return row
305305
else:
306306
# Non-retryable exception - fail immediately
307307
row.rollout_status.status = RolloutStatus.Status.ERROR
308-
row.rollout_status.termination_reason = str(e)
308+
# row.rollout_status.termination_reason = str(e)
309309
return row
310310

311311
# Process all tasks concurrently with backoff retry

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ include = ["eval_protocol*", "development*", "vendor*"]
147147
"eval_protocol" = ["../vite-app/dist/**/*"]
148148
"eval_protocol.mcp_servers.tau2" = ["*.md", "tests/system_prompts/*.md"]
149149
"eval_protocol.benchmarks" = ["data/*.jsonl"]
150-
"vendor.tau2" = ["data/domains/*/policy.md"]
150+
"vendor.tau2" = ["data/**/*.md"]
151151

152152

153153
[tool.versioneer]

0 commit comments

Comments
 (0)