Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co

The `patch` command is type-aware via `template.type` in `.template-info`:
- **MCP server** projects expose `patch generators | core | docs | build | claude`.
- **Agent / workflow** projects expose `patch chart | docs | build | claude`.
- **Agent / workflow** projects expose `patch chart | docs | build | claude | evals`.
Running an MCP-only subcommand inside an agent project (or vice versa) exits with a clear "available categories" error. `patch check` and `patch all` work for any supported type.

## Development Commands
Expand Down
17 changes: 17 additions & 0 deletions src/fips_agents_cli/commands/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,23 @@ def claude(dry_run: bool):
_patch_category("claude", dry_run)


@patch.command("evals")
@click.option(
"--dry-run",
is_flag=True,
help="Show what would be updated without making changes",
)
def evals(dry_run: bool):
"""
Update the eval harness (agent / workflow projects only).

Patches discovery / assertions / runner / mock_factory under evals/.
evals/evals.yaml and evals/fixtures/ are user-authored and never
patched.
"""
_patch_category("evals", dry_run)


@patch.command("all")
@click.option(
"--dry-run",
Expand Down
14 changes: 14 additions & 0 deletions src/fips_agents_cli/tools/patching.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,18 @@
],
"ask_before_patch": False, # Safe to overwrite
},
"evals": {
"description": "Evaluation harness (discovery, assertions, runner)",
"patterns": [
"evals/__init__.py",
"evals/assertions.py",
"evals/discovery.py",
"evals/mock_factory.py",
"evals/run_evals.py",
"evals/README.md",
],
"ask_before_patch": True, # Users may have customized
},
}

# Files to NEVER patch in agent / workflow projects (user code)
Expand All @@ -140,6 +152,8 @@
"prompts/**", # User-customized agent prompts
"rules/**", # User-customized agent rules
"skills/**", # User-customized agent skills
"evals/evals.yaml", # User-authored eval test plan
"evals/fixtures/**", # User-authored eval fixtures
"tests/**/*.py",
".env*",
".memoryhub.yaml", # User-customized memory hub config
Expand Down
32 changes: 32 additions & 0 deletions tests/test_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,38 @@ def test_claude_category_includes_rules(self):
assert ".claude/rules/**/*" in patterns


class TestEvalsCategory:
"""Issue #44: agent / workflow templates ship a full eval harness
that needs its own patch category, separated from user-authored
test plans and fixtures.
"""

def test_evals_category_only_in_agent_categories(self):
assert "evals" in AGENT_FILE_CATEGORIES
assert "evals" not in MCP_FILE_CATEGORIES

def test_evals_patterns_cover_harness_files(self):
patterns = AGENT_FILE_CATEGORIES["evals"]["patterns"]
for expected in [
"evals/__init__.py",
"evals/assertions.py",
"evals/discovery.py",
"evals/mock_factory.py",
"evals/run_evals.py",
"evals/README.md",
]:
assert expected in patterns, f"{expected} missing from evals patterns"

def test_evals_asks_before_patch(self):
# Users may have customized the harness — show diffs and confirm
assert AGENT_FILE_CATEGORIES["evals"]["ask_before_patch"] is True

def test_user_authored_eval_inputs_are_never_patched(self):
# The user owns evals.yaml (the test plan) and evals/fixtures/ (data)
assert "evals/evals.yaml" in AGENT_NEVER_PATCH
assert "evals/fixtures/**" in AGENT_NEVER_PATCH


class TestAgentNeverPatchExtensions:
"""`add` writes user-customized files into well-known directories.
Those paths must be in NEVER_PATCH so a future pattern broadening
Expand Down
Loading