From 4d5387b2512118b3ec6685b54c9c2b375eb4f49d Mon Sep 17 00:00:00 2001 From: Steve Kelly Date: Mon, 4 May 2026 20:05:11 -0400 Subject: [PATCH 1/2] fix: extract evolved skill body from optimized predictor --- evolution/skills/evolve_skill.py | 55 ++++++++++++++++++-- evolution/skills/skill_module.py | 39 +++++++++----- tests/skills/test_evolve_skill_extraction.py | 42 +++++++++++++++ tests/skills/test_skill_module.py | 29 ++++++++++- 4 files changed, 147 insertions(+), 18 deletions(-) create mode 100644 tests/skills/test_evolve_skill_extraction.py diff --git a/evolution/skills/evolve_skill.py b/evolution/skills/evolve_skill.py index 8ad4d89c..dfc73f65 100644 --- a/evolution/skills/evolve_skill.py +++ b/evolution/skills/evolve_skill.py @@ -28,11 +28,56 @@ load_skill, find_skill, reassemble_skill, + _SKILL_BODY_SENTINEL_, + _SKILL_INSTRUCTION_HEADER, ) console = Console() +def _candidate_instruction_texts(module) -> list[str]: + """Return instruction/docstring slots that DSPy optimizers may mutate.""" + candidates = [] + predictor = getattr(module, "predictor", None) + nested_predict = getattr(predictor, "predict", None) + + for obj in (nested_predict, predictor): + if obj is None: + continue + for attr in ("__doc__", "doc"): + value = getattr(obj, attr, None) + if value: + candidates.append(value) + signature = getattr(obj, "signature", None) + instructions = getattr(signature, "instructions", None) + if instructions: + candidates.append(instructions) + + return candidates + + +def _extract_evolved_skill_body(module, original_skill_body: str) -> str: + """Extract the evolved skill body from a GEPA-optimized SkillModule. + + GEPA mutates predictor instruction text, not SkillModule.skill_body. + Reading skill_body after optimization can therefore report an improved + score while writing the original body back to disk. Extract the body from + the predictor's mutated instructions/docstring, bounded by the sentinel + inserted by SkillModule. + """ + for instruction_text in _candidate_instruction_texts(module): + if not instruction_text.startswith(_SKILL_INSTRUCTION_HEADER): + continue + rest = instruction_text[len(_SKILL_INSTRUCTION_HEADER):] + if _SKILL_BODY_SENTINEL_ not in rest: + continue + evolved_body = rest.split(_SKILL_BODY_SENTINEL_, 1)[0].strip() + if evolved_body: + return evolved_body + + return original_skill_body + + def evolve( skill_name: str, iterations: int = 10, @@ -179,9 +224,13 @@ def evolve( elapsed = time.time() - start_time console.print(f"\n Optimization completed in {elapsed:.1f}s") - # ── 6. Extract evolved skill text ─────────────────────────────────── - # The optimized module's instructions contain the evolved skill text - evolved_body = optimized_module.skill_text + # ── 6. Extract evolved skill body ─────────────────────────────────── + evolved_body = _extract_evolved_skill_body(optimized_module, skill["body"]) + if not evolved_body.strip(): + console.print("[yellow] ⚠ Could not extract evolved body — using baseline[/yellow]") + evolved_body = skill["body"] + elif evolved_body == skill["body"]: + console.print("[dim] (baseline body retained — optimizer found no improved variant)[/dim]") evolved_full = reassemble_skill(skill["frontmatter"], evolved_body) # ── 7. Validate evolved skill ─────────────────────────────────────── diff --git a/evolution/skills/skill_module.py b/evolution/skills/skill_module.py index 6d4d22ed..20b1a72a 100644 --- a/evolution/skills/skill_module.py +++ b/evolution/skills/skill_module.py @@ -1,8 +1,9 @@ """Wraps a SKILL.md file as a DSPy module for optimization. The key abstraction: a skill file becomes a parameterized DSPy module -where the skill text is the optimizable parameter. GEPA can then -mutate the skill text and evaluate the results. +where the skill text is embedded in signature instructions. DSPy +optimizers such as GEPA mutate predictor instructions, so the skill body +must live in the predictor signature rather than a normal input field. """ import re @@ -12,6 +13,13 @@ import dspy +# Delimits the optimizable skill body from the fixed task wrapper in the +# predictor instructions. HTML comments are legal in Markdown but extremely +# unlikely to appear accidentally in skill text. +_SKILL_BODY_SENTINEL_ = "\n\n\n\n" +_SKILL_INSTRUCTION_HEADER = "Follow these skill instructions to complete the task:\n\n" + + def load_skill(skill_path: Path) -> dict: """Load a skill file and parse its frontmatter + body. @@ -84,11 +92,10 @@ def find_skill(skill_name: str, hermes_agent_path: Path) -> Optional[Path]: class SkillModule(dspy.Module): """A DSPy module that wraps a skill file for optimization. - The skill text (body) is the parameter that GEPA optimizes. - On each forward pass, the module: - 1. Uses the skill text as instructions - 2. Processes the task input - 3. Returns the agent's response + The skill body is embedded in the predictor signature instructions so + GEPA/MIPRO can mutate it. Passing the body as an InputField leaves it + invisible to prompt optimizers and causes "ghost improvements": scores + can improve while saved output still contains the original text. """ class TaskWithSkill(dspy.Signature): @@ -97,20 +104,24 @@ class TaskWithSkill(dspy.Signature): You are an AI agent following specific skill instructions to complete a task. Read the skill instructions carefully and follow the procedure described. """ - skill_instructions: str = dspy.InputField(desc="The skill instructions to follow") task_input: str = dspy.InputField(desc="The task to complete") output: str = dspy.OutputField(desc="Your response following the skill instructions") def __init__(self, skill_text: str): super().__init__() - self.skill_text = skill_text - self.predictor = dspy.ChainOfThought(self.TaskWithSkill) + self.skill_body = skill_text - def forward(self, task_input: str) -> dspy.Prediction: - result = self.predictor( - skill_instructions=self.skill_text, - task_input=task_input, + base_instructions = self.TaskWithSkill.__doc__ or "" + enriched_instructions = ( + f"{_SKILL_INSTRUCTION_HEADER}{skill_text}" + f"{_SKILL_BODY_SENTINEL_}{base_instructions}" ) + self.predictor = dspy.ChainOfThought( + self.TaskWithSkill.with_instructions(enriched_instructions) + ) + + def forward(self, task_input: str) -> dspy.Prediction: + result = self.predictor(task_input=task_input) return dspy.Prediction(output=result.output) diff --git a/tests/skills/test_evolve_skill_extraction.py b/tests/skills/test_evolve_skill_extraction.py new file mode 100644 index 00000000..ae060b4c --- /dev/null +++ b/tests/skills/test_evolve_skill_extraction.py @@ -0,0 +1,42 @@ +"""Regression tests for extracting actually evolved skill text.""" + +from types import SimpleNamespace + +from evolution.skills.evolve_skill import _extract_evolved_skill_body +from evolution.skills.skill_module import ( + _SKILL_BODY_SENTINEL_, + _SKILL_INSTRUCTION_HEADER, + SkillModule, +) + + +def _wrapped(body: str) -> str: + return f"{_SKILL_INSTRUCTION_HEADER}{body}{_SKILL_BODY_SENTINEL_}fixed wrapper" + + +def test_extracts_from_nested_predict_signature_instructions(): + module = SkillModule("# Original\nold procedure") + module.predictor.predict.signature.instructions = _wrapped("# Evolved\nnew procedure") + + evolved = _extract_evolved_skill_body(module, "# Original\nold procedure") + + assert evolved == "# Evolved\nnew procedure" + assert evolved != module.skill_body + + +def test_extracts_from_nested_predict_docstring_when_signature_is_stale(): + module = SkillModule("# Original\nold procedure") + module.predictor.predict.signature.instructions = _wrapped("# Original\nold procedure") + module.predictor.predict.__doc__ = _wrapped("# Evolved Via Doc\nnew docstring procedure") + + evolved = _extract_evolved_skill_body(module, "# Original\nold procedure") + + assert evolved == "# Evolved Via Doc\nnew docstring procedure" + assert evolved != module.skill_body + + +def test_falls_back_to_original_when_optimizer_did_not_mutate_text(): + original = "# Original\nold procedure" + module = SimpleNamespace(predictor=SimpleNamespace()) + + assert _extract_evolved_skill_body(module, original) == original diff --git a/tests/skills/test_skill_module.py b/tests/skills/test_skill_module.py index f4ad3c2c..7c27e40b 100644 --- a/tests/skills/test_skill_module.py +++ b/tests/skills/test_skill_module.py @@ -2,7 +2,13 @@ import pytest from pathlib import Path -from evolution.skills.skill_module import load_skill, reassemble_skill +from evolution.skills.skill_module import ( + _SKILL_BODY_SENTINEL_, + _SKILL_INSTRUCTION_HEADER, + SkillModule, + load_skill, + reassemble_skill, +) SAMPLE_SKILL = """--- @@ -90,3 +96,24 @@ def test_evolved_body_replaces_original(self): assert "EVOLVED" in result assert "New and improved" in result + + +class TestSkillModuleOptimizationSurface: + def test_skill_body_is_embedded_in_predictor_instructions(self): + body = "# My Skill\nUse the improved procedure." + module = SkillModule(body) + + instructions = module.predictor.predict.signature.instructions + + assert instructions.startswith(_SKILL_INSTRUCTION_HEADER) + assert body in instructions + assert _SKILL_BODY_SENTINEL_ in instructions + assert module.skill_body == body + + def test_task_signature_no_longer_treats_skill_as_input_field(self): + module = SkillModule("# My Skill\nDo the thing.") + + input_fields = module.predictor.predict.signature.input_fields + + assert "task_input" in input_fields + assert "skill_instructions" not in input_fields From 0fe6673421b201e63b0247ef11658b4c6e933313 Mon Sep 17 00:00:00 2001 From: Steve Kelly Date: Tue, 5 May 2026 21:58:22 -0400 Subject: [PATCH 2/2] ci: add pytest workflow --- .github/workflows/tests.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 .github/workflows/tests.yml diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 00000000..4e55ac42 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,19 @@ +name: Tests + +on: + pull_request: + push: + branches: [main] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Install package + run: python -m pip install -e .[dev] + - name: Run tests + run: python -m pytest -q