Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions eval_protocol/pytest/handle_persist_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@
def handle_persist_flow(all_results: list[list[EvaluationRow]], test_func_name: str):
try:
# Default is to save and upload experiment JSONL files, unless explicitly disabled
should_save_and_upload = os.getenv("EP_NO_UPLOAD") != "1"
custom_output_dir = os.getenv("EP_OUTPUT_DIR")
should_save = os.getenv("EP_NO_UPLOAD") != "1" or custom_output_dir is not None

if should_save_and_upload:
if should_save:
current_run_rows = [item for sublist in all_results for item in sublist]
if current_run_rows:
experiments: dict[str, list[EvaluationRow]] = defaultdict(list)
Expand All @@ -27,6 +28,8 @@ def handle_persist_flow(all_results: list[list[EvaluationRow]], test_func_name:
experiments[row.execution_metadata.experiment_id].append(row)

eval_protocol_dir = find_eval_protocol_dir()
if custom_output_dir:
eval_protocol_dir = custom_output_dir
exp_dir = pathlib.Path(eval_protocol_dir) / "experiment_results"
exp_dir.mkdir(parents=True, exist_ok=True)

Expand Down Expand Up @@ -81,6 +84,10 @@ def handle_persist_flow(all_results: list[list[EvaluationRow]], test_func_name:
json.dump(row_data, f, ensure_ascii=False)
f.write("\n")

should_upload = os.getenv("EP_NO_UPLOAD") != "1"
if not should_upload:
continue

def get_auth_value(key: str) -> str | None:
"""Get auth value from config file or environment."""
try:
Expand Down
9 changes: 9 additions & 0 deletions eval_protocol/pytest/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,11 @@ def pytest_addoption(parser) -> None:
default=None,
help=("If set, use this base URL for remote rollout processing. Example: http://localhost:8000"),
)
group.addoption(
"--ep-output-dir",
default=None,
help=("If set, save evaluation results to this directory in jsonl format."),
)


def _normalize_max_rows(val: Optional[str]) -> Optional[str]:
Expand Down Expand Up @@ -258,6 +263,10 @@ def pytest_configure(config) -> None:
if threshold_env is not None:
os.environ["EP_PASSED_THRESHOLD"] = threshold_env

if config.getoption("--ep-output-dir"):
# set this to save eval results to the target dir in jsonl format
os.environ["EP_OUTPUT_DIR"] = config.getoption("--ep-output-dir")

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you ask chatgpt to come up with a better name? --ep-no-persist-results-jsonl just sounds so weird.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure sure lol

if config.getoption("--ep-no-upload"):
os.environ["EP_NO_UPLOAD"] = "1"

Expand Down
Loading