Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 16 additions & 34 deletions isaaclab_arena/tests/test_eval_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@

import json
import os
import re
import subprocess

import pytest

from isaaclab_arena.tests.utils.constants import TestConstants
from isaaclab_arena.tests.utils.subprocess import run_subprocess

HEADLESS = True
NUM_STEPS = 2
Expand All @@ -23,41 +22,24 @@ def write_jobs_config_to_file(jobs: list[dict], tmp_file_path: str):
json.dump(jobs_config, f, indent=4)


def run_eval_runner_and_check_no_failures(jobs_config_path: str, headless: bool = HEADLESS):
"""Run the eval_runner and verify no jobs failed.
def run_eval_runner(jobs_config_path: str, headless: bool = HEADLESS):
"""Run the eval_runner as a subprocess with timeout.

Args:
jobs_config_path: Path to the jobs config JSON file
headless: Whether to run in headless mode
--continue_on_error is NOT passed, so the eval_runner re-raises on the
first job failure, exiting non-zero. run_subprocess() detects that and
raises CalledProcessError, which surfaces as a test failure.

Raises:
AssertionError: If any jobs failed
Args:
jobs_config_path: Path to the jobs config JSON file.
headless: Whether to run in headless mode.
"""
args = [TestConstants.python_path, f"{TestConstants.evaluation_dir}/eval_runner.py"]
args.append("--eval_jobs_config")
args.append(jobs_config_path)
if headless:
args.append("--headless")

result = subprocess.run(args, capture_output=True, text=True, check=True)
output = result.stdout + result.stderr

# Parse the output to find job statuses in the table
# The table format is:
# | Job Name | Status | ...
# | gr1_open_microwave_cracker_box | completed | ...
status_pattern = r"\|\s+([^|]+?)\s+\|\s+(pending|running|completed|failed)\s+\|"
matches = re.findall(status_pattern, output, re.IGNORECASE)

# Filter out the header row
job_statuses = [(name.strip(), status.strip()) for name, status in matches if name.strip() != "Job Name"]

# Check for failed jobs
failed_jobs = [name for name, status in job_statuses if status.lower() == "failed"]

if failed_jobs:
print("\n" + output) # Print full output for debugging
raise AssertionError(f"The following jobs failed: {', '.join(failed_jobs)}\nAll job statuses: {job_statuses}")
run_subprocess(args)
Comment thread
xyao-nv marked this conversation as resolved.


@pytest.mark.with_subprocess
Expand Down Expand Up @@ -90,7 +72,7 @@ def test_eval_runner_two_jobs_zero_action(tmp_path):

temp_config_path = str(tmp_path / "test_eval_runner_two_jobs_zero_action.json")
write_jobs_config_to_file(jobs, temp_config_path)
run_eval_runner_and_check_no_failures(temp_config_path)
run_eval_runner(temp_config_path)


@pytest.mark.with_subprocess
Expand Down Expand Up @@ -123,7 +105,7 @@ def test_eval_runner_multiple_environments(tmp_path):

temp_config_path = str(tmp_path / "test_eval_runner_multiple_environments.json")
write_jobs_config_to_file(jobs, temp_config_path)
run_eval_runner_and_check_no_failures(temp_config_path)
run_eval_runner(temp_config_path)


@pytest.mark.with_subprocess
Expand Down Expand Up @@ -156,18 +138,18 @@ def test_eval_runner_different_embodiments(tmp_path):

temp_config_path = str(tmp_path / "test_eval_runner_different_embodiments.json")
write_jobs_config_to_file(jobs, temp_config_path)
run_eval_runner_and_check_no_failures(temp_config_path)
run_eval_runner(temp_config_path)


@pytest.mark.with_subprocess
def test_eval_runner_from_existing_config():
"""Test eval_runner using the zero_action_jobs_config.json and verify no jobs failed."""
config_path = f"{TestConstants.arena_environments_dir}/eval_jobs_configs/zero_action_jobs_config.json"
assert os.path.exists(config_path), f"Config file not found: {config_path}"
run_eval_runner_and_check_no_failures(config_path)
run_eval_runner(config_path)
Comment thread
xyao-nv marked this conversation as resolved.


@pytest.mark.with_subprocess
@pytest.mark.skip(reason="CI takes 1000s to cold-start camera rendering.")
def test_eval_runner_enable_cameras(tmp_path):
Comment thread
xyao-nv marked this conversation as resolved.
"""Test eval_runner with enable_cameras set to true."""
jobs = [
Expand Down Expand Up @@ -198,4 +180,4 @@ def test_eval_runner_enable_cameras(tmp_path):

temp_config_path = str(tmp_path / "test_eval_runner_enable_cameras.json")
write_jobs_config_to_file(jobs, temp_config_path)
run_eval_runner_and_check_no_failures(temp_config_path)
run_eval_runner(temp_config_path)
5 changes: 3 additions & 2 deletions isaaclab_arena_gr00t/tests/test_gr00t_closedloop_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import pytest

from isaaclab_arena.tests.test_eval_runner import run_eval_runner_and_check_no_failures, write_jobs_config_to_file
from isaaclab_arena.tests.test_eval_runner import run_eval_runner, write_jobs_config_to_file
from isaaclab_arena.tests.utils.constants import TestConstants
from isaaclab_arena.tests.utils.subprocess import run_simulation_app_function, run_subprocess
from isaaclab_arena_gr00t.tests.utils.constants import TestConstants as Gr00tTestConstants
Expand Down Expand Up @@ -221,6 +221,7 @@ def test_g1_locomanip_gr00t_closedloop_policy_runner_multi_envs(gr00t_finetuned_


@pytest.mark.with_subprocess
@pytest.mark.skip(reason="CI takes 1000+secs to cold-start camera rendering.")
def test_g1_locomanip_gr00t_closedloop_policy_runner_eval_runner(gr00t_finetuned_model_path, tmp_path):
"""Test eval_runner including a G00T closedloop policy and a zero action policy."""

Expand Down Expand Up @@ -260,7 +261,7 @@ def test_g1_locomanip_gr00t_closedloop_policy_runner_eval_runner(gr00t_finetuned
]
temp_config_path = str(tmp_path / "test_g1_locomanip_gr00t_closedloop_policy_runner_eval_runner.json")
write_jobs_config_to_file(jobs, temp_config_path)
run_eval_runner_and_check_no_failures(temp_config_path, headless=HEADLESS)
run_eval_runner(temp_config_path, headless=HEADLESS)


if __name__ == "__main__":
Expand Down
Loading