From 13fc4686d5e6dce33aa5f0342fd7f241055e7b97 Mon Sep 17 00:00:00 2001 From: rdwj Date: Wed, 6 May 2026 16:24:23 -0500 Subject: [PATCH] patch: Add evals category for agent / workflow projects Closes #44. The agent and workflow templates ship a full eval harness under `evals/` (assertions, discovery, mock_factory, runner, package init, README). None of those files were covered by any patch category, so updates were invisible to `fips-agents patch check`. This adds an `evals` category to AGENT_FILE_CATEGORIES covering just the harness machinery and registers a `patch evals` subcommand. Set ask_before_patch=True since users may have customized the harness. User-authored eval inputs (`evals/evals.yaml` and `evals/fixtures/`) go to AGENT_NEVER_PATCH so the test plan and data fixtures stay under the user's control. Stacks on top of #43. Assisted-by: Claude Code (Opus 4.7) --- CLAUDE.md | 2 +- src/fips_agents_cli/commands/patch.py | 17 ++++++++++++++ src/fips_agents_cli/tools/patching.py | 14 ++++++++++++ tests/test_patch.py | 32 +++++++++++++++++++++++++++ 4 files changed, 64 insertions(+), 1 deletion(-) diff --git a/CLAUDE.md b/CLAUDE.md index 6bc44f2..b3462e2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -10,7 +10,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co The `patch` command is type-aware via `template.type` in `.template-info`: - **MCP server** projects expose `patch generators | core | docs | build | claude`. -- **Agent / workflow** projects expose `patch chart | docs | build | claude`. +- **Agent / workflow** projects expose `patch chart | docs | build | claude | evals`. Running an MCP-only subcommand inside an agent project (or vice versa) exits with a clear "available categories" error. `patch check` and `patch all` work for any supported type. ## Development Commands diff --git a/src/fips_agents_cli/commands/patch.py b/src/fips_agents_cli/commands/patch.py index 86bad7c..400e9ff 100644 --- a/src/fips_agents_cli/commands/patch.py +++ b/src/fips_agents_cli/commands/patch.py @@ -157,6 +157,23 @@ def claude(dry_run: bool): _patch_category("claude", dry_run) +@patch.command("evals") +@click.option( + "--dry-run", + is_flag=True, + help="Show what would be updated without making changes", +) +def evals(dry_run: bool): + """ + Update the eval harness (agent / workflow projects only). + + Patches discovery / assertions / runner / mock_factory under evals/. + evals/evals.yaml and evals/fixtures/ are user-authored and never + patched. + """ + _patch_category("evals", dry_run) + + @patch.command("all") @click.option( "--dry-run", diff --git a/src/fips_agents_cli/tools/patching.py b/src/fips_agents_cli/tools/patching.py index f7fd552..7f69861 100644 --- a/src/fips_agents_cli/tools/patching.py +++ b/src/fips_agents_cli/tools/patching.py @@ -127,6 +127,18 @@ ], "ask_before_patch": False, # Safe to overwrite }, + "evals": { + "description": "Evaluation harness (discovery, assertions, runner)", + "patterns": [ + "evals/__init__.py", + "evals/assertions.py", + "evals/discovery.py", + "evals/mock_factory.py", + "evals/run_evals.py", + "evals/README.md", + ], + "ask_before_patch": True, # Users may have customized + }, } # Files to NEVER patch in agent / workflow projects (user code) @@ -140,6 +152,8 @@ "prompts/**", # User-customized agent prompts "rules/**", # User-customized agent rules "skills/**", # User-customized agent skills + "evals/evals.yaml", # User-authored eval test plan + "evals/fixtures/**", # User-authored eval fixtures "tests/**/*.py", ".env*", ".memoryhub.yaml", # User-customized memory hub config diff --git a/tests/test_patch.py b/tests/test_patch.py index d268fc1..e8203c5 100644 --- a/tests/test_patch.py +++ b/tests/test_patch.py @@ -112,6 +112,38 @@ def test_claude_category_includes_rules(self): assert ".claude/rules/**/*" in patterns +class TestEvalsCategory: + """Issue #44: agent / workflow templates ship a full eval harness + that needs its own patch category, separated from user-authored + test plans and fixtures. + """ + + def test_evals_category_only_in_agent_categories(self): + assert "evals" in AGENT_FILE_CATEGORIES + assert "evals" not in MCP_FILE_CATEGORIES + + def test_evals_patterns_cover_harness_files(self): + patterns = AGENT_FILE_CATEGORIES["evals"]["patterns"] + for expected in [ + "evals/__init__.py", + "evals/assertions.py", + "evals/discovery.py", + "evals/mock_factory.py", + "evals/run_evals.py", + "evals/README.md", + ]: + assert expected in patterns, f"{expected} missing from evals patterns" + + def test_evals_asks_before_patch(self): + # Users may have customized the harness — show diffs and confirm + assert AGENT_FILE_CATEGORIES["evals"]["ask_before_patch"] is True + + def test_user_authored_eval_inputs_are_never_patched(self): + # The user owns evals.yaml (the test plan) and evals/fixtures/ (data) + assert "evals/evals.yaml" in AGENT_NEVER_PATCH + assert "evals/fixtures/**" in AGENT_NEVER_PATCH + + class TestAgentNeverPatchExtensions: """`add` writes user-customized files into well-known directories. Those paths must be in NEVER_PATCH so a future pattern broadening