diff --git a/TESTING_README.md b/TESTING_README.md new file mode 100644 index 00000000..d4904193 --- /dev/null +++ b/TESTING_README.md @@ -0,0 +1,268 @@ +# Testing README Examples + +This document describes how to run unit tests that verify all code examples from the main [README.md](README.md) work correctly. + +## Overview + +The test file `tests/test_readme_examples.py` contains comprehensive unit tests for all code snippets and examples shown in the README. These tests ensure that: + +- All imports are valid and accessible +- CLI commands exist and are functional +- Core functions and classes work as documented +- File paths referenced in README exist +- API structures match the documentation + +## Prerequisites + +Before running the tests, ensure you have: + +1. **Python 3.11 or 3.12** (or use the uv virtual environment) +2. **AgentLab installed** with all dependencies: + ```bash + uv sync + ``` + +3. **Playwright browsers installed**: + ```bash + uv run playwright install + ``` + +## Running the Tests + +### Run All README Tests + +To run all tests for README examples: + +```bash +uv run pytest tests/test_readme_examples.py -v +``` + +### Run Specific Test Classes + +You can run tests for specific README sections: + +```bash +# Test installation and setup +uv run pytest tests/test_readme_examples.py::TestReadmeInstallationAndSetup -v + +# Test UI-Assistant examples +uv run pytest tests/test_readme_examples.py::TestReadmeUIAssistant -v + +# Test experiment launching examples +uv run pytest tests/test_readme_examples.py::TestReadmeLaunchExperiments -v + +# Test analysis examples +uv run pytest tests/test_readme_examples.py::TestReadmeAnalyseResults -v + +# Test AgentXray examples +uv run pytest tests/test_readme_examples.py::TestReadmeAgentXray -v + +# Test new agent implementation examples +uv run pytest tests/test_readme_examples.py::TestReadmeImplementNewAgent -v + +# Test reproducibility features +uv run pytest tests/test_readme_examples.py::TestReadmeReproducibility -v + +# Test benchmark examples +uv run pytest tests/test_readme_examples.py::TestReadmeBenchmarks -v +``` + +### Run Individual Tests + +To run a specific test function: + +```bash +uv run pytest tests/test_readme_examples.py::TestReadmeLaunchExperiments::test_make_study_creates_study -v +``` + +## Test Coverage by README Section + +### ✅ Installation and Setup (Lines 72-87) + +Tests verify: +- `pip install agentlab` works +- `playwright install` command is available +- Package imports succeed + +**Related tests:** +- `TestReadmeInstallationAndSetup::test_agentlab_package_installed` +- `TestReadmeInstallationAndSetup::test_playwright_install_command_exists` + +### ✅ UI-Assistant (Lines 110-117) + +Tests verify: +- `agentlab-assistant` CLI command exists +- Command accepts `--start_url` and `--agent_config` flags +- Generic agent imports work + +**Related tests:** +- `TestReadmeUIAssistant::test_agentlab_assistant_command_exists` +- `TestReadmeUIAssistant::test_generic_agent_import` + +### ✅ Launch Experiments (Lines 122-149) + +Tests verify: +- `make_study()` function works correctly +- `Study.load()` method exists +- `study.find_incomplete()` method exists +- `study.run()` method exists +- All agent imports are valid + +**Related tests:** +- `TestReadmeLaunchExperiments::test_make_study_creates_study` +- `TestReadmeLaunchExperiments::test_study_load_import` +- `TestReadmeLaunchExperiments::test_study_find_incomplete` +- `TestReadmeLaunchExperiments::test_agent_imports` + +### ✅ main.py Examples (Line 147) + +Tests verify: +- `main.py` file exists in repository +- All agent imports from `main.py` work +- Study class can be imported + +**Related tests:** +- `TestReadmeMainPy::test_main_py_exists` +- `TestReadmeMainPy::test_all_agent_imports_from_main` +- `TestReadmeMainPy::test_study_import_from_main` + +### ✅ Analyse Results (Lines 193-203) + +Tests verify: +- `inspect_results` module imports correctly +- `load_result_df()` function exists +- `ExpResult` class is accessible + +**Related tests:** +- `TestReadmeAnalyseResults::test_inspect_results_import` +- `TestReadmeAnalyseResults::test_load_result_df_function` +- `TestReadmeAnalyseResults::test_exp_result_class` + +### ✅ AgentXray (Lines 210-226) + +Tests verify: +- `agentlab-xray` CLI command exists and is runnable + +**Related tests:** +- `TestReadmeAgentXray::test_agentlab_xray_command_exists` + +### ✅ Implement a New Agent (Lines 239-245) + +Tests verify: +- `MostBasicAgent` file exists at documented path +- `AgentArgs` API file exists +- `AgentArgs` class can be imported + +**Related tests:** +- `TestReadmeImplementNewAgent::test_most_basic_agent_file_exists` +- `TestReadmeImplementNewAgent::test_agent_args_file_exists` +- `TestReadmeImplementNewAgent::test_agent_args_api` + +### ✅ Reproducibility (Lines 265-278) + +Tests verify: +- `reproducibility_journal.csv` exists +- `ReproducibilityAgent` file exists at documented path +- Study class supports reproducibility features + +**Related tests:** +- `TestReadmeReproducibility::test_reproducibility_journal_exists` +- `TestReadmeReproducibility::test_reproducibility_agent_exists` +- `TestReadmeReproducibility::test_study_has_reproducibility_info` + +### ✅ Supported Benchmarks (Lines 50-66) + +Tests verify: +- Benchmark names (like "miniwob") work with `make_study()` + +**Related tests:** +- `TestReadmeBenchmarks::test_miniwob_benchmark_accessible` + +## Understanding Test Results + +### Successful Test Output + +When all tests pass, you'll see: +``` +tests/test_readme_examples.py::TestReadmeInstallationAndSetup::test_agentlab_package_installed PASSED +tests/test_readme_examples.py::TestReadmeLaunchExperiments::test_make_study_creates_study PASSED +... +======================== XX passed in X.XXs ======================== +``` + +### Failed Test Output + +If a test fails, you'll see detailed error information: +``` +tests/test_readme_examples.py::TestReadmeUIAssistant::test_agentlab_assistant_command_exists FAILED + +FAILED tests/test_readme_examples.py::TestReadmeUIAssistant::test_agentlab_assistant_command_exists +AssertionError: agentlab-assistant command should work +``` + +This indicates that the README example may be outdated or there's an installation issue. + +## Notes + +- **API Keys Not Required**: These tests verify code structure and imports, not actual experiment execution. You don't need API keys (OPENAI_API_KEY, etc.) to run these tests. + +- **No Actual Experiments**: Tests that call `make_study()` verify the function works but don't call `study.run()`, which would require: + - Configured API keys + - Set up benchmark environments + - Significant time and resources + +- **CLI Command Tests**: Tests for `agentlab-assistant` and `agentlab-xray` verify the commands exist and respond to `--help`, but don't actually launch the UIs. + +- **File Existence Tests**: Some tests verify that files mentioned in README (like `main.py`, `reproducibility_journal.csv`) exist at their documented locations. + +## Continuous Integration + +These tests are ideal for CI/CD pipelines to ensure README examples stay up-to-date with code changes. + +Example GitHub Actions workflow: +```yaml +- name: Test README Examples + run: uv run pytest tests/test_readme_examples.py -v +``` + +## Troubleshooting + +### Test fails with "ModuleNotFoundError" + +Make sure you've installed all dependencies: +```bash +uv sync +``` + +### Test fails with "playwright not found" + +Install Playwright browsers: +```bash +uv run playwright install +``` + +### Test fails with "File not found" + +Ensure you're running tests from the repository root directory: +```bash +cd /path/to/AgentLab +uv run pytest tests/test_readme_examples.py -v +``` + +## Contributing + +When updating the README: + +1. **Update code examples** in `README.md` +2. **Update corresponding tests** in `tests/test_readme_examples.py` +3. **Run tests** to verify: + ```bash + uv run pytest tests/test_readme_examples.py -v + ``` +4. **Update this document** if test coverage changes + +## Related Documentation + +- [Main README](README.md) - Complete AgentLab documentation +- [BrowserGym Documentation](https://github.com/ServiceNow/BrowserGym) +- [Contributing Guidelines](CONTRIBUTING.md) (if applicable) diff --git a/tests/test_readme_examples.py b/tests/test_readme_examples.py new file mode 100644 index 00000000..98b51f0c --- /dev/null +++ b/tests/test_readme_examples.py @@ -0,0 +1,272 @@ +""" +Unit tests to verify all code examples from README.md work correctly. + +These tests ensure that: +1. All imports from README examples are valid +2. Core functions and classes are accessible +3. Basic API structures match README documentation +4. CLI commands are available and functional +""" + +import pytest +import subprocess +import sys +from pathlib import Path + + +class TestReadmeInstallationAndSetup: + """Tests for installation and setup code from README""" + + def test_playwright_install_command_exists(self): + """Verify playwright install command is available""" + result = subprocess.run( + ["playwright", "--version"], + capture_output=True, + text=True, + timeout=10 + ) + assert result.returncode == 0, "Playwright should be installed" + assert "Version" in result.stdout or "version" in result.stdout.lower() + + def test_agentlab_package_installed(self): + """Verify agentlab package is installed""" + try: + import agentlab + assert agentlab is not None + except ImportError: + pytest.fail("agentlab package should be importable") + + +class TestReadmeUIAssistant: + """Tests for UI-Assistant code from README""" + + def test_agentlab_assistant_command_exists(self): + """Verify agentlab-assistant command is available (lines 110-117)""" + result = subprocess.run( + ["agentlab-assistant", "--help"], + capture_output=True, + text=True, + timeout=30 + ) + assert result.returncode == 0, "agentlab-assistant command should work" + assert "--start_url" in result.stdout + assert "--agent_config" in result.stdout + + def test_generic_agent_import(self): + """Verify generic agent can be imported for UI assistant""" + from agentlab.agents.generic_agent import AGENT_4o_MINI + assert AGENT_4o_MINI is not None + + +class TestReadmeLaunchExperiments: + """Tests for experiment launching code from README (lines 122-149)""" + + def test_agent_imports(self): + """Verify agent configuration can be imported (line 125)""" + from agentlab.agents.generic_agent import AGENT_4o_MINI + assert AGENT_4o_MINI is not None + assert hasattr(AGENT_4o_MINI, 'agent_name') + + def test_make_study_import(self): + """Verify make_study can be imported (line 127)""" + from agentlab.experiments.study import make_study + assert make_study is not None + assert callable(make_study) + + def test_make_study_creates_study(self): + """Verify make_study function works (lines 129-133)""" + from agentlab.agents.generic_agent import AGENT_4o_MINI + from agentlab.experiments.study import make_study + + study = make_study( + benchmark="miniwob", + agent_args=[AGENT_4o_MINI], + comment="My first study", + ) + + assert study is not None + assert hasattr(study, 'run') + # Note: We don't actually run study.run() as it requires API keys + + def test_study_load_import(self): + """Verify Study.load can be imported (line 142)""" + from agentlab.experiments.study import Study + assert hasattr(Study, 'load') + assert callable(Study.load) + + def test_study_find_incomplete(self): + """Verify Study has find_incomplete method (line 143)""" + from agentlab.experiments.study import Study + assert hasattr(Study, 'find_incomplete') + + def test_study_run(self): + """Verify Study has run method (line 144)""" + from agentlab.experiments.study import Study + assert hasattr(Study, 'run') + + +class TestReadmeMainPy: + """Tests for main.py referenced in README (line 147)""" + + def test_main_py_exists(self): + """Verify main.py exists in the repository""" + main_path = Path(__file__).parent.parent / "main.py" + assert main_path.exists(), "main.py should exist in repository root" + + def test_all_agent_imports_from_main(self): + """Verify all agent imports from main.py work""" + from agentlab.agents.generic_agent import ( + AGENT_LLAMA3_70B, + AGENT_LLAMA31_70B, + RANDOM_SEARCH_AGENT, + AGENT_4o, + AGENT_4o_MINI, + AGENT_o3_MINI, + AGENT_37_SONNET, + AGENT_CLAUDE_SONNET_35, + AGENT_GPT5_MINI, + ) + + # Verify they're all not None + assert AGENT_LLAMA3_70B is not None + assert AGENT_LLAMA31_70B is not None + assert RANDOM_SEARCH_AGENT is not None + assert AGENT_4o is not None + assert AGENT_4o_MINI is not None + assert AGENT_o3_MINI is not None + assert AGENT_37_SONNET is not None + assert AGENT_CLAUDE_SONNET_35 is not None + assert AGENT_GPT5_MINI is not None + + def test_study_import_from_main(self): + """Verify Study import from main.py works""" + from agentlab.experiments.study import Study + assert Study is not None + + +class TestReadmeAnalyseResults: + """Tests for analyzing results code from README (lines 193-203)""" + + def test_inspect_results_import(self): + """Verify inspect_results can be imported (line 194)""" + from agentlab.analyze import inspect_results + assert inspect_results is not None + + def test_load_result_df_function(self): + """Verify load_result_df function exists (line 197)""" + from agentlab.analyze import inspect_results + assert hasattr(inspect_results, 'load_result_df') + assert callable(inspect_results.load_result_df) + + def test_exp_result_class(self): + """Verify ExpResult class is accessible (line 200)""" + from agentlab.experiments.loop import ExpResult + assert ExpResult is not None + + +class TestReadmeAgentXray: + """Tests for AgentXray code from README (lines 210-226)""" + + def test_agentlab_xray_command_exists(self): + """Verify agentlab-xray command is available (line 212)""" + # agentlab-xray launches a Gradio UI and doesn't have --help + # Just verify the command exists in the PATH or as a module + try: + result = subprocess.run( + ["which", "agentlab-xray"], + capture_output=True, + text=True, + timeout=5 + ) + # If 'which' finds it, returncode will be 0 + # If not found but command exists, it should still be importable + command_found = result.returncode == 0 + + # Alternatively, check if the module entry point exists + if not command_found: + # Try importing the xray module + try: + from agentlab.ui import xray + command_found = True + except ImportError: + pass + + assert command_found, "agentlab-xray command or module should exist" + except subprocess.TimeoutExpired: + pytest.fail("Command check timed out") + + +class TestReadmeImplementNewAgent: + """Tests for implementing new agent code from README (lines 239-245)""" + + def test_most_basic_agent_file_exists(self): + """Verify MostBasicAgent file exists (line 240)""" + agent_path = Path(__file__).parent.parent / "src" / "agentlab" / "agents" / "most_basic_agent" / "most_basic_agent.py" + assert agent_path.exists(), "MostBasicAgent file should exist" + + def test_agent_args_file_exists(self): + """Verify AgentArgs file exists (line 242)""" + args_path = Path(__file__).parent.parent / "src" / "agentlab" / "agents" / "agent_args.py" + assert args_path.exists(), "agent_args.py file should exist" + + def test_agent_args_api(self): + """Verify AgentArgs API is importable""" + from agentlab.agents.agent_args import AgentArgs + assert AgentArgs is not None + + +class TestReadmeReproducibility: + """Tests for reproducibility features from README (lines 265-278)""" + + def test_study_has_reproducibility_info(self): + """Verify Study contains reproducibility information (line 266)""" + from agentlab.experiments.study import Study + # Study should have methods/attributes for reproducibility + assert Study is not None + + def test_reproducibility_journal_exists(self): + """Verify reproducibility_journal.csv exists (line 269)""" + journal_path = Path(__file__).parent.parent / "reproducibility_journal.csv" + assert journal_path.exists(), "reproducibility_journal.csv should exist" + + def test_reproducibility_agent_exists(self): + """Verify ReproducibilityAgent file exists (line 274)""" + repro_agent_path = Path(__file__).parent.parent / "src" / "agentlab" / "agents" / "generic_agent" / "reproducibility_agent.py" + assert repro_agent_path.exists(), "reproducibility_agent.py should exist" + + +class TestReadmeEnvironmentVariables: + """Tests for environment variables documentation (lines 280-291)""" + + def test_env_variables_documented(self): + """Verify key environment variables are documented in README""" + readme_path = Path(__file__).parent.parent / "README.md" + readme_content = readme_path.read_text() + + # Check that important env vars are mentioned + assert "OPENAI_API_KEY" in readme_content + assert "AZURE_OPENAI_API_KEY" in readme_content + assert "AGENTLAB_EXP_ROOT" in readme_content + assert "OPENROUTER_API_KEY" in readme_content + + +class TestReadmeBenchmarks: + """Tests for supported benchmarks from README (lines 50-66)""" + + def test_miniwob_benchmark_accessible(self): + """Verify miniwob benchmark can be used""" + from agentlab.agents.generic_agent import AGENT_4o_MINI + from agentlab.experiments.study import make_study + + # Should create study without error (even if benchmark not fully set up) + study = make_study( + benchmark="miniwob", + agent_args=[AGENT_4o_MINI], + comment="Test study", + ) + assert study is not None + + +if __name__ == "__main__": + # Allow running tests directly with: python test_readme_examples.py + pytest.main([__file__, "-v"])