diff --git a/orchestrator/campaign.py b/orchestrator/campaign.py index 2ba6a84..c2dcfe7 100644 --- a/orchestrator/campaign.py +++ b/orchestrator/campaign.py @@ -393,7 +393,10 @@ def main() -> None: run_id = args.run_id or campaign.get("run_id") or campaign_path.parent.name + "-run" repo_path = campaign.get("target_system", {}).get("repo_path") - work_dir = setup_work_dir(run_id, repo_path=repo_path) + work_dir = setup_work_dir( + run_id, repo_path=repo_path, + campaign_path=campaign_path, campaign=campaign, + ) print(f"Working directory: {work_dir.resolve()}") print(f"Max iterations: {max_iter}") diff --git a/orchestrator/cli.py b/orchestrator/cli.py index 6e4dbe9..e45476e 100644 --- a/orchestrator/cli.py +++ b/orchestrator/cli.py @@ -96,7 +96,10 @@ def _cmd_run(args): ) sys.exit(1) - work_dir = setup_work_dir(run_id, repo_path=repo_path) + work_dir = setup_work_dir( + run_id, repo_path=repo_path, + campaign_path=campaign_path, campaign=campaign, + ) max_iterations = args.max_iterations if args.max_iterations is not None else campaign.get("max_iterations", 10) run_campaign( diff --git a/orchestrator/iteration.py b/orchestrator/iteration.py index 29e9712..6713484 100644 --- a/orchestrator/iteration.py +++ b/orchestrator/iteration.py @@ -17,10 +17,12 @@ --agent inline: Prompts emitted to stdout for the calling agent. """ import argparse +import importlib.metadata as importlib_metadata import json import logging import re import shutil +import subprocess import sys from datetime import datetime, timezone from enum import Enum @@ -187,12 +189,79 @@ def _merge_principles(work_dir: Path, iter_dir: Path) -> None: atomic_write(principles_path, json.dumps(store, indent=2) + "\n") -def setup_work_dir(run_id: str, repo_path: str | None = None) -> Path: +def _capture_runtime_meta(repo_path: str | None) -> dict: + """Capture runtime metadata at campaign init time. + + Returns a dict with target_repo, target_commit, nous_version, started_at. + Each git/importlib call is wrapped individually — failures log a warning + and yield null for that field. + """ + meta: dict = { + "target_repo": None, + "target_commit": None, + "nous_version": None, + "started_at": datetime.now(timezone.utc).isoformat(), + } + + # Target repo commit + if repo_path: + try: + meta["target_commit"] = subprocess.check_output( + ["git", "-C", repo_path, "rev-parse", "HEAD"], + text=True, stderr=subprocess.DEVNULL, + ).strip() or None + except (subprocess.CalledProcessError, FileNotFoundError, OSError): + logger.warning("Could not capture target_commit from %s", repo_path) + + # Target repo remote (org/repo identifier) + try: + remote = subprocess.check_output( + ["git", "-C", repo_path, "remote", "get-url", "origin"], + text=True, stderr=subprocess.DEVNULL, + ).strip() + if remote.startswith("git@github.com:"): + # SSH: git@github.com:org/repo.git + meta["target_repo"] = remote.split(":")[-1].removesuffix(".git") + elif "github.com/" in remote: + # HTTPS: https://github.com/org/repo.git + meta["target_repo"] = remote.split("github.com/")[-1].removesuffix(".git") + else: + meta["target_repo"] = remote or None + except (subprocess.CalledProcessError, FileNotFoundError, OSError): + logger.warning("Could not capture target_repo from %s", repo_path) + + # Nous version: prefer package metadata, fall back to git SHA + try: + meta["nous_version"] = importlib_metadata.version("nous") + except importlib_metadata.PackageNotFoundError: + nous_dir = Path(__file__).resolve().parent + try: + meta["nous_version"] = subprocess.check_output( + ["git", "-C", str(nous_dir), "rev-parse", "HEAD"], + text=True, stderr=subprocess.DEVNULL, + ).strip() or None + except (subprocess.CalledProcessError, FileNotFoundError, OSError): + logger.warning("Could not determine nous_version") + + return meta + + +def setup_work_dir( + run_id: str, + repo_path: str | None = None, + campaign_path: Path | None = None, + campaign: dict | None = None, +) -> Path: """Create and initialize a working directory from templates. If repo_path is provided, the campaign directory is created inside the target repo at .nous//. Otherwise falls back to creating / in the current directory. + + If campaign_path is provided, writes an enriched copy of campaign.yaml + into the work directory with a runtime: block (target_repo, target_commit, + nous_version, started_at). Only written on fresh init to avoid clobbering + on resume. """ if repo_path: work_dir = Path(repo_path) / ".nous" / run_id @@ -206,6 +275,21 @@ def setup_work_dir(run_id: str, repo_path: str | None = None) -> Path: state = json.loads((work_dir / "state.json").read_text()) state["run_id"] = run_id atomic_write(work_dir / "state.json", json.dumps(state, indent=2) + "\n") + + # Write enriched campaign.yaml copy on fresh init only + enriched_path = work_dir / "campaign.yaml" + if campaign_path and campaign and not enriched_path.exists(): + try: + runtime_meta = _capture_runtime_meta(repo_path) + enriched = dict(campaign) + enriched["runtime"] = runtime_meta + atomic_write( + enriched_path, + yaml.safe_dump(enriched, default_flow_style=False, sort_keys=False), + ) + except (OSError, yaml.YAMLError) as exc: + logger.warning("Could not write enriched campaign.yaml: %s", exc) + return work_dir @@ -526,7 +610,10 @@ def main() -> None: run_id = args.run_id or campaign.get("run_id") or campaign_path.parent.name + "-run" repo_path = campaign.get("target_system", {}).get("repo_path") - work_dir = setup_work_dir(run_id, repo_path=repo_path) + work_dir = setup_work_dir( + run_id, repo_path=repo_path, + campaign_path=campaign_path, campaign=campaign, + ) print(f"Working directory: {work_dir.resolve()}") run_iteration( diff --git a/orchestrator/schemas/campaign.schema.yaml b/orchestrator/schemas/campaign.schema.yaml index 4ca0be1..4590c25 100644 --- a/orchestrator/schemas/campaign.schema.yaml +++ b/orchestrator/schemas/campaign.schema.yaml @@ -54,6 +54,11 @@ properties: minLength: 1 description: "Path to target system git repo. Used by CLIDispatcher for code-access agents. If set, experiments run in isolated worktrees." + metadata: + type: object + additionalProperties: true + description: "User-defined metadata (tags, goal, etc.). Copied to work dir at init." + models: type: object additionalProperties: false diff --git a/tests/test_campaign.py b/tests/test_campaign.py index a4b2ecc..1776d83 100644 --- a/tests/test_campaign.py +++ b/tests/test_campaign.py @@ -1,9 +1,11 @@ """Tests for multi-iteration campaign loop.""" +import importlib.metadata as importlib_metadata import json import shutil +import subprocess import warnings from pathlib import Path -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch import jsonschema import pytest @@ -12,7 +14,12 @@ from orchestrator.dispatch import StubDispatcher from orchestrator.engine import Engine from orchestrator.campaign import run_campaign -from orchestrator.iteration import IterationOutcome, _save_human_feedback +from orchestrator.iteration import ( + IterationOutcome, + _capture_runtime_meta, + _save_human_feedback, + setup_work_dir, +) SCHEMAS_DIR = Path(__file__).resolve().parent.parent / "orchestrator" / "schemas" TEMPLATES_DIR = Path(__file__).resolve().parent.parent / "orchestrator" / "templates" @@ -449,3 +456,170 @@ def test_multiple_phases_independent(self, tmp_path): fb = json.loads((tmp_path / "human_feedback.json").read_text()) assert len(fb["design"]) == 1 assert len(fb["findings"]) == 1 + + +class TestMetadataEnrichment: + """Tests for campaign metadata enrichment (runtime block in campaign.yaml copy).""" + + CAMPAIGN_WITH_META = { + **SAMPLE_CAMPAIGN, + "metadata": { + "tags": ["prefix-caching", "ttft"], + "goal": "Determine prefix ratio effect on TTFT", + }, + } + + def test_setup_work_dir_writes_enriched_campaign_yaml(self, tmp_path): + """setup_work_dir writes an enriched campaign.yaml with runtime block.""" + campaign_path = tmp_path / "campaign.yaml" + campaign_path.write_text(yaml.safe_dump(self.CAMPAIGN_WITH_META)) + + work_dir = setup_work_dir( + "test-run", repo_path=None, + campaign_path=campaign_path, campaign=self.CAMPAIGN_WITH_META, + ) + + enriched_path = work_dir / "campaign.yaml" + assert enriched_path.exists() + + enriched = yaml.safe_load(enriched_path.read_text()) + assert "runtime" in enriched + assert "started_at" in enriched["runtime"] + assert "nous_version" in enriched["runtime"] + assert "target_repo" in enriched["runtime"] + assert "target_commit" in enriched["runtime"] + + def test_user_metadata_passes_through(self, tmp_path): + """User-defined metadata from campaign.yaml appears in the enriched copy.""" + campaign_path = tmp_path / "campaign.yaml" + campaign_path.write_text(yaml.safe_dump(self.CAMPAIGN_WITH_META)) + + work_dir = setup_work_dir( + "test-run", repo_path=None, + campaign_path=campaign_path, campaign=self.CAMPAIGN_WITH_META, + ) + + enriched = yaml.safe_load((work_dir / "campaign.yaml").read_text()) + assert enriched["metadata"]["tags"] == ["prefix-caching", "ttft"] + assert enriched["metadata"]["goal"] == "Determine prefix ratio effect on TTFT" + + def test_enriched_copy_not_overwritten_on_resume(self, tmp_path): + """Re-calling setup_work_dir does not clobber the enriched campaign.yaml.""" + campaign_path = tmp_path / "campaign.yaml" + campaign_path.write_text(yaml.safe_dump(self.CAMPAIGN_WITH_META)) + + work_dir = setup_work_dir( + "test-run", repo_path=None, + campaign_path=campaign_path, campaign=self.CAMPAIGN_WITH_META, + ) + + # Modify the enriched file to prove it's not overwritten + enriched_path = work_dir / "campaign.yaml" + enriched = yaml.safe_load(enriched_path.read_text()) + enriched["runtime"]["marker"] = "original" + enriched_path.write_text(yaml.safe_dump(enriched)) + + # Call setup_work_dir again (simulating resume) + setup_work_dir( + "test-run", repo_path=None, + campaign_path=campaign_path, campaign=self.CAMPAIGN_WITH_META, + ) + + reloaded = yaml.safe_load(enriched_path.read_text()) + assert reloaded["runtime"]["marker"] == "original" + + def test_runtime_meta_tolerates_no_git(self, tmp_path): + """_capture_runtime_meta returns nulls gracefully when git is unavailable.""" + with patch("orchestrator.iteration.subprocess.check_output", side_effect=FileNotFoundError): + meta = _capture_runtime_meta(str(tmp_path)) + + assert meta["target_repo"] is None + assert meta["target_commit"] is None + # nous_version may still be set via importlib.metadata + assert "started_at" in meta + + def test_runtime_meta_captures_target_commit_from_git_repo(self, tmp_path): + """_capture_runtime_meta captures target_commit from a real git repo.""" + import subprocess + repo = tmp_path / "target" + repo.mkdir() + subprocess.run(["git", "init"], cwd=repo, capture_output=True, check=True) + subprocess.run(["git", "config", "user.email", "test@test.com"], cwd=repo, capture_output=True) + subprocess.run(["git", "config", "user.name", "Test"], cwd=repo, capture_output=True) + (repo / "f.txt").write_text("x") + subprocess.run(["git", "add", "."], cwd=repo, capture_output=True, check=True) + subprocess.run(["git", "commit", "-m", "init"], cwd=repo, capture_output=True, check=True) + + meta = _capture_runtime_meta(str(repo)) + + assert meta["target_commit"] is not None + assert len(meta["target_commit"]) == 40 # full SHA + # No remote configured, so target_repo should be None + assert meta["target_repo"] is None + + def test_no_enriched_copy_without_campaign_path(self, tmp_path, monkeypatch): + """If campaign_path is not provided, no enriched copy is written.""" + monkeypatch.chdir(tmp_path) + work_dir = setup_work_dir("test-run", repo_path=None) + assert not (work_dir / "campaign.yaml").exists() + + @pytest.mark.parametrize("remote,expected", [ + ("git@github.com:org/repo.git", "org/repo"), + ("git@github.com:org/repo", "org/repo"), + ("https://github.com/org/repo.git", "org/repo"), + ("https://github.com/org/repo", "org/repo"), + ("ssh://git@github.com/org/repo.git", "org/repo"), + ("https://gitlab.com/org/repo.git", "https://gitlab.com/org/repo.git"), + ("git@gitlab.com:org/repo.git", "git@gitlab.com:org/repo.git"), + ]) + def test_remote_url_parsing(self, remote, expected, monkeypatch): + """_capture_runtime_meta correctly parses various remote URL formats.""" + def fake_check_output(cmd, **kwargs): + if "rev-parse" in cmd: + return "a" * 40 + "\n" + if "get-url" in cmd: + return remote + "\n" + raise subprocess.CalledProcessError(1, cmd) + + import subprocess as real_subprocess + monkeypatch.setattr("orchestrator.iteration.subprocess.check_output", fake_check_output) + meta = _capture_runtime_meta("/fake/repo") + assert meta["target_repo"] == expected + + def test_nous_version_git_sha_fallback(self, monkeypatch): + """When importlib.metadata fails, nous_version falls back to git SHA.""" + fake_sha = "b" * 40 + + monkeypatch.setattr( + "orchestrator.iteration.importlib_metadata.version", + lambda _: (_ for _ in ()).throw(importlib_metadata.PackageNotFoundError()), + ) + + def fake_check_output(cmd, **kwargs): + if "rev-parse" in cmd: + return fake_sha + "\n" + raise subprocess.CalledProcessError(1, cmd) + + monkeypatch.setattr("orchestrator.iteration.subprocess.check_output", fake_check_output) + meta = _capture_runtime_meta(None) + assert meta["nous_version"] == fake_sha + + def test_enrichment_with_repo_path(self, tmp_path): + """Enriched campaign.yaml is written inside .nous// when repo_path is set.""" + campaign_path = tmp_path / "campaign.yaml" + campaign_path.write_text(yaml.safe_dump(self.CAMPAIGN_WITH_META)) + + repo = tmp_path / "target_repo" + repo.mkdir() + + work_dir = setup_work_dir( + "test-run", repo_path=str(repo), + campaign_path=campaign_path, campaign=self.CAMPAIGN_WITH_META, + ) + + assert work_dir == repo / ".nous" / "test-run" + enriched_path = work_dir / "campaign.yaml" + assert enriched_path.exists() + enriched = yaml.safe_load(enriched_path.read_text()) + assert "runtime" in enriched + assert enriched["metadata"]["tags"] == ["prefix-caching", "ttft"]