From 13fc4686d5e6dce33aa5f0342fd7f241055e7b97 Mon Sep 17 00:00:00 2001
From: rdwj <wjackson@redhat.com>
Date: Wed, 6 May 2026 16:24:23 -0500
Subject: [PATCH] patch: Add evals category for agent / workflow projects

Closes #44.

The agent and workflow templates ship a full eval harness under
`evals/` (assertions, discovery, mock_factory, runner, package
init, README). None of those files were covered by any patch
category, so updates were invisible to `fips-agents patch check`.

This adds an `evals` category to AGENT_FILE_CATEGORIES covering
just the harness machinery and registers a `patch evals`
subcommand. Set ask_before_patch=True since users may have
customized the harness.

User-authored eval inputs (`evals/evals.yaml` and `evals/fixtures/`)
go to AGENT_NEVER_PATCH so the test plan and data fixtures stay
under the user's control.

Stacks on top of #43.

Assisted-by: Claude Code (Opus 4.7)
---
 CLAUDE.md                             |  2 +-
 src/fips_agents_cli/commands/patch.py | 17 ++++++++++++++
 src/fips_agents_cli/tools/patching.py | 14 ++++++++++++
 tests/test_patch.py                   | 32 +++++++++++++++++++++++++++
 4 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 6bc44f2..b3462e2 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -10,7 +10,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 
 The `patch` command is type-aware via `template.type` in `.template-info`:
 - **MCP server** projects expose `patch generators | core | docs | build | claude`.
-- **Agent / workflow** projects expose `patch chart | docs | build | claude`.
+- **Agent / workflow** projects expose `patch chart | docs | build | claude | evals`.
 Running an MCP-only subcommand inside an agent project (or vice versa) exits with a clear "available categories" error. `patch check` and `patch all` work for any supported type.
 
 ## Development Commands
diff --git a/src/fips_agents_cli/commands/patch.py b/src/fips_agents_cli/commands/patch.py
index 86bad7c..400e9ff 100644
--- a/src/fips_agents_cli/commands/patch.py
+++ b/src/fips_agents_cli/commands/patch.py
@@ -157,6 +157,23 @@ def claude(dry_run: bool):
     _patch_category("claude", dry_run)
 
 
+@patch.command("evals")
+@click.option(
+    "--dry-run",
+    is_flag=True,
+    help="Show what would be updated without making changes",
+)
+def evals(dry_run: bool):
+    """
+    Update the eval harness (agent / workflow projects only).
+
+    Patches discovery / assertions / runner / mock_factory under evals/.
+    evals/evals.yaml and evals/fixtures/ are user-authored and never
+    patched.
+    """
+    _patch_category("evals", dry_run)
+
+
 @patch.command("all")
 @click.option(
     "--dry-run",
diff --git a/src/fips_agents_cli/tools/patching.py b/src/fips_agents_cli/tools/patching.py
index f7fd552..7f69861 100644
--- a/src/fips_agents_cli/tools/patching.py
+++ b/src/fips_agents_cli/tools/patching.py
@@ -127,6 +127,18 @@
         ],
         "ask_before_patch": False,  # Safe to overwrite
     },
+    "evals": {
+        "description": "Evaluation harness (discovery, assertions, runner)",
+        "patterns": [
+            "evals/__init__.py",
+            "evals/assertions.py",
+            "evals/discovery.py",
+            "evals/mock_factory.py",
+            "evals/run_evals.py",
+            "evals/README.md",
+        ],
+        "ask_before_patch": True,  # Users may have customized
+    },
 }
 
 # Files to NEVER patch in agent / workflow projects (user code)
@@ -140,6 +152,8 @@
     "prompts/**",  # User-customized agent prompts
     "rules/**",  # User-customized agent rules
     "skills/**",  # User-customized agent skills
+    "evals/evals.yaml",  # User-authored eval test plan
+    "evals/fixtures/**",  # User-authored eval fixtures
     "tests/**/*.py",
     ".env*",
     ".memoryhub.yaml",  # User-customized memory hub config
diff --git a/tests/test_patch.py b/tests/test_patch.py
index d268fc1..e8203c5 100644
--- a/tests/test_patch.py
+++ b/tests/test_patch.py
@@ -112,6 +112,38 @@ def test_claude_category_includes_rules(self):
         assert ".claude/rules/**/*" in patterns
 
 
+class TestEvalsCategory:
+    """Issue #44: agent / workflow templates ship a full eval harness
+    that needs its own patch category, separated from user-authored
+    test plans and fixtures.
+    """
+
+    def test_evals_category_only_in_agent_categories(self):
+        assert "evals" in AGENT_FILE_CATEGORIES
+        assert "evals" not in MCP_FILE_CATEGORIES
+
+    def test_evals_patterns_cover_harness_files(self):
+        patterns = AGENT_FILE_CATEGORIES["evals"]["patterns"]
+        for expected in [
+            "evals/__init__.py",
+            "evals/assertions.py",
+            "evals/discovery.py",
+            "evals/mock_factory.py",
+            "evals/run_evals.py",
+            "evals/README.md",
+        ]:
+            assert expected in patterns, f"{expected} missing from evals patterns"
+
+    def test_evals_asks_before_patch(self):
+        # Users may have customized the harness — show diffs and confirm
+        assert AGENT_FILE_CATEGORIES["evals"]["ask_before_patch"] is True
+
+    def test_user_authored_eval_inputs_are_never_patched(self):
+        # The user owns evals.yaml (the test plan) and evals/fixtures/ (data)
+        assert "evals/evals.yaml" in AGENT_NEVER_PATCH
+        assert "evals/fixtures/**" in AGENT_NEVER_PATCH
+
+
 class TestAgentNeverPatchExtensions:
     """`add` writes user-customized files into well-known directories.
     Those paths must be in NEVER_PATCH so a future pattern broadening