NousResearch · steezkelly · May 5, 2026 · May 6, 2026
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -0,0 +1,19 @@
+name: Tests
+
+on:
+  pull_request:
+  push:
+    branches: [main]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - name: Install package
+        run: python -m pip install -e .[dev]
+      - name: Run tests
+        run: python -m pytest -q
diff --git a/evolution/skills/evolve_skill.py b/evolution/skills/evolve_skill.py
@@ -28,11 +28,56 @@
     load_skill,
     find_skill,
     reassemble_skill,
+    _SKILL_BODY_SENTINEL_,
+    _SKILL_INSTRUCTION_HEADER,
 )
 
 console = Console()
 
 
+def _candidate_instruction_texts(module) -> list[str]:
+    """Return instruction/docstring slots that DSPy optimizers may mutate."""
+    candidates = []
+    predictor = getattr(module, "predictor", None)
+    nested_predict = getattr(predictor, "predict", None)
+
+    for obj in (nested_predict, predictor):
+        if obj is None:
+            continue
+        for attr in ("__doc__", "doc"):
+            value = getattr(obj, attr, None)
+            if value:
+                candidates.append(value)
+        signature = getattr(obj, "signature", None)
+        instructions = getattr(signature, "instructions", None)
+        if instructions:
+            candidates.append(instructions)
+
+    return candidates
+
+
+def _extract_evolved_skill_body(module, original_skill_body: str) -> str:
+    """Extract the evolved skill body from a GEPA-optimized SkillModule.
+
+    GEPA mutates predictor instruction text, not SkillModule.skill_body.
+    Reading skill_body after optimization can therefore report an improved
+    score while writing the original body back to disk. Extract the body from
+    the predictor's mutated instructions/docstring, bounded by the sentinel
+    inserted by SkillModule.
+    """
+    for instruction_text in _candidate_instruction_texts(module):
+        if not instruction_text.startswith(_SKILL_INSTRUCTION_HEADER):
+            continue
+        rest = instruction_text[len(_SKILL_INSTRUCTION_HEADER):]
+        if _SKILL_BODY_SENTINEL_ not in rest:
+            continue
+        evolved_body = rest.split(_SKILL_BODY_SENTINEL_, 1)[0].strip()
+        if evolved_body:
+            return evolved_body
+
+    return original_skill_body
+
+
 def evolve(
     skill_name: str,
     iterations: int = 10,
@@ -179,9 +224,13 @@ def evolve(
     elapsed = time.time() - start_time
     console.print(f"\n  Optimization completed in {elapsed:.1f}s")
 
-    # ── 6. Extract evolved skill text ───────────────────────────────────
-    # The optimized module's instructions contain the evolved skill text
-    evolved_body = optimized_module.skill_text
+    # ── 6. Extract evolved skill body ───────────────────────────────────
+    evolved_body = _extract_evolved_skill_body(optimized_module, skill["body"])
+    if not evolved_body.strip():
+        console.print("[yellow]  ⚠ Could not extract evolved body — using baseline[/yellow]")
+        evolved_body = skill["body"]
+    elif evolved_body == skill["body"]:
+        console.print("[dim]  (baseline body retained — optimizer found no improved variant)[/dim]")
     evolved_full = reassemble_skill(skill["frontmatter"], evolved_body)
 
     # ── 7. Validate evolved skill ───────────────────────────────────────

diff --git a/evolution/skills/skill_module.py b/evolution/skills/skill_module.py
@@ -1,8 +1,9 @@
 """Wraps a SKILL.md file as a DSPy module for optimization.
 
 The key abstraction: a skill file becomes a parameterized DSPy module
-where the skill text is the optimizable parameter. GEPA can then
-mutate the skill text and evaluate the results.
+where the skill text is embedded in signature instructions. DSPy
+optimizers such as GEPA mutate predictor instructions, so the skill body
+must live in the predictor signature rather than a normal input field.
 """
 
 import re
@@ -12,6 +13,13 @@
 import dspy
 
 
+# Delimits the optimizable skill body from the fixed task wrapper in the
+# predictor instructions. HTML comments are legal in Markdown but extremely
+# unlikely to appear accidentally in skill text.
+_SKILL_BODY_SENTINEL_ = "\n\n<!-- ___SKILL_EVOLUTION_SENTINEL___ -->\n\n"
+_SKILL_INSTRUCTION_HEADER = "Follow these skill instructions to complete the task:\n\n"
+
+
 def load_skill(skill_path: Path) -> dict:
     """Load a skill file and parse its frontmatter + body.
 
@@ -84,11 +92,10 @@ def find_skill(skill_name: str, hermes_agent_path: Path) -> Optional[Path]:
 class SkillModule(dspy.Module):
     """A DSPy module that wraps a skill file for optimization.
 
-    The skill text (body) is the parameter that GEPA optimizes.
-    On each forward pass, the module:
-    1. Uses the skill text as instructions
-    2. Processes the task input
-    3. Returns the agent's response
+    The skill body is embedded in the predictor signature instructions so
+    GEPA/MIPRO can mutate it. Passing the body as an InputField leaves it
+    invisible to prompt optimizers and causes "ghost improvements": scores
+    can improve while saved output still contains the original text.
     """
 
     class TaskWithSkill(dspy.Signature):
@@ -97,20 +104,24 @@ class TaskWithSkill(dspy.Signature):
         You are an AI agent following specific skill instructions to complete a task.
         Read the skill instructions carefully and follow the procedure described.
         """
-        skill_instructions: str = dspy.InputField(desc="The skill instructions to follow")
         task_input: str = dspy.InputField(desc="The task to complete")
         output: str = dspy.OutputField(desc="Your response following the skill instructions")
 
     def __init__(self, skill_text: str):
         super().__init__()
-        self.skill_text = skill_text
-        self.predictor = dspy.ChainOfThought(self.TaskWithSkill)
+        self.skill_body = skill_text
 
-    def forward(self, task_input: str) -> dspy.Prediction:
-        result = self.predictor(
-            skill_instructions=self.skill_text,
-            task_input=task_input,
+        base_instructions = self.TaskWithSkill.__doc__ or ""
+        enriched_instructions = (
+            f"{_SKILL_INSTRUCTION_HEADER}{skill_text}"
+            f"{_SKILL_BODY_SENTINEL_}{base_instructions}"
         )
+        self.predictor = dspy.ChainOfThought(
+            self.TaskWithSkill.with_instructions(enriched_instructions)
+        )
+
+    def forward(self, task_input: str) -> dspy.Prediction:
+        result = self.predictor(task_input=task_input)
         return dspy.Prediction(output=result.output)
 
 

diff --git a/tests/skills/test_evolve_skill_extraction.py b/tests/skills/test_evolve_skill_extraction.py
@@ -0,0 +1,42 @@
+"""Regression tests for extracting actually evolved skill text."""
+
+from types import SimpleNamespace
+
+from evolution.skills.evolve_skill import _extract_evolved_skill_body
+from evolution.skills.skill_module import (
+    _SKILL_BODY_SENTINEL_,
+    _SKILL_INSTRUCTION_HEADER,
+    SkillModule,
+)
+
+
+def _wrapped(body: str) -> str:
+    return f"{_SKILL_INSTRUCTION_HEADER}{body}{_SKILL_BODY_SENTINEL_}fixed wrapper"
+
+
+def test_extracts_from_nested_predict_signature_instructions():
+    module = SkillModule("# Original\nold procedure")
+    module.predictor.predict.signature.instructions = _wrapped("# Evolved\nnew procedure")
+
+    evolved = _extract_evolved_skill_body(module, "# Original\nold procedure")
+
+    assert evolved == "# Evolved\nnew procedure"
+    assert evolved != module.skill_body
+
+
+def test_extracts_from_nested_predict_docstring_when_signature_is_stale():
+    module = SkillModule("# Original\nold procedure")
+    module.predictor.predict.signature.instructions = _wrapped("# Original\nold procedure")
+    module.predictor.predict.__doc__ = _wrapped("# Evolved Via Doc\nnew docstring procedure")
+
+    evolved = _extract_evolved_skill_body(module, "# Original\nold procedure")
+
+    assert evolved == "# Evolved Via Doc\nnew docstring procedure"
+    assert evolved != module.skill_body
+
+
+def test_falls_back_to_original_when_optimizer_did_not_mutate_text():
+    original = "# Original\nold procedure"
+    module = SimpleNamespace(predictor=SimpleNamespace())
+
+    assert _extract_evolved_skill_body(module, original) == original
diff --git a/tests/skills/test_skill_module.py b/tests/skills/test_skill_module.py
@@ -2,7 +2,13 @@
 
 import pytest
 from pathlib import Path
-from evolution.skills.skill_module import load_skill, reassemble_skill
+from evolution.skills.skill_module import (
+    _SKILL_BODY_SENTINEL_,
+    _SKILL_INSTRUCTION_HEADER,
+    SkillModule,
+    load_skill,
+    reassemble_skill,
+)
 
 
 SAMPLE_SKILL = """---
@@ -90,3 +96,24 @@ def test_evolved_body_replaces_original(self):
 
         assert "EVOLVED" in result
         assert "New and improved" in result
+
+
+class TestSkillModuleOptimizationSurface:
+    def test_skill_body_is_embedded_in_predictor_instructions(self):
+        body = "# My Skill\nUse the improved procedure."
+        module = SkillModule(body)
+
+        instructions = module.predictor.predict.signature.instructions
+
+        assert instructions.startswith(_SKILL_INSTRUCTION_HEADER)
+        assert body in instructions
+        assert _SKILL_BODY_SENTINEL_ in instructions
+        assert module.skill_body == body
+
+    def test_task_signature_no_longer_treats_skill_as_input_field(self):
+        module = SkillModule("# My Skill\nDo the thing.")
+
+        input_fields = module.predictor.predict.signature.input_fields
+
+        assert "task_input" in input_fields
+        assert "skill_instructions" not in input_fields