Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name: Tests

on:
pull_request:
push:
branches: [main]

jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install package
run: python -m pip install -e .[dev]
- name: Run tests
run: python -m pytest -q
55 changes: 52 additions & 3 deletions evolution/skills/evolve_skill.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,56 @@
load_skill,
find_skill,
reassemble_skill,
_SKILL_BODY_SENTINEL_,
_SKILL_INSTRUCTION_HEADER,
)

console = Console()


def _candidate_instruction_texts(module) -> list[str]:
"""Return instruction/docstring slots that DSPy optimizers may mutate."""
candidates = []
predictor = getattr(module, "predictor", None)
nested_predict = getattr(predictor, "predict", None)

for obj in (nested_predict, predictor):
if obj is None:
continue
for attr in ("__doc__", "doc"):
value = getattr(obj, attr, None)
if value:
candidates.append(value)
signature = getattr(obj, "signature", None)
instructions = getattr(signature, "instructions", None)
if instructions:
candidates.append(instructions)

return candidates


def _extract_evolved_skill_body(module, original_skill_body: str) -> str:
"""Extract the evolved skill body from a GEPA-optimized SkillModule.

GEPA mutates predictor instruction text, not SkillModule.skill_body.
Reading skill_body after optimization can therefore report an improved
score while writing the original body back to disk. Extract the body from
the predictor's mutated instructions/docstring, bounded by the sentinel
inserted by SkillModule.
"""
for instruction_text in _candidate_instruction_texts(module):
if not instruction_text.startswith(_SKILL_INSTRUCTION_HEADER):
continue
rest = instruction_text[len(_SKILL_INSTRUCTION_HEADER):]
if _SKILL_BODY_SENTINEL_ not in rest:
continue
evolved_body = rest.split(_SKILL_BODY_SENTINEL_, 1)[0].strip()
if evolved_body:
return evolved_body

return original_skill_body


def evolve(
skill_name: str,
iterations: int = 10,
Expand Down Expand Up @@ -179,9 +224,13 @@ def evolve(
elapsed = time.time() - start_time
console.print(f"\n Optimization completed in {elapsed:.1f}s")

# ── 6. Extract evolved skill text ───────────────────────────────────
# The optimized module's instructions contain the evolved skill text
evolved_body = optimized_module.skill_text
# ── 6. Extract evolved skill body ───────────────────────────────────
evolved_body = _extract_evolved_skill_body(optimized_module, skill["body"])
if not evolved_body.strip():
console.print("[yellow] ⚠ Could not extract evolved body — using baseline[/yellow]")
evolved_body = skill["body"]
elif evolved_body == skill["body"]:
console.print("[dim] (baseline body retained — optimizer found no improved variant)[/dim]")
evolved_full = reassemble_skill(skill["frontmatter"], evolved_body)

# ── 7. Validate evolved skill ───────────────────────────────────────
Expand Down
39 changes: 25 additions & 14 deletions evolution/skills/skill_module.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""Wraps a SKILL.md file as a DSPy module for optimization.

The key abstraction: a skill file becomes a parameterized DSPy module
where the skill text is the optimizable parameter. GEPA can then
mutate the skill text and evaluate the results.
where the skill text is embedded in signature instructions. DSPy
optimizers such as GEPA mutate predictor instructions, so the skill body
must live in the predictor signature rather than a normal input field.
"""

import re
Expand All @@ -12,6 +13,13 @@
import dspy


# Delimits the optimizable skill body from the fixed task wrapper in the
# predictor instructions. HTML comments are legal in Markdown but extremely
# unlikely to appear accidentally in skill text.
_SKILL_BODY_SENTINEL_ = "\n\n<!-- ___SKILL_EVOLUTION_SENTINEL___ -->\n\n"
_SKILL_INSTRUCTION_HEADER = "Follow these skill instructions to complete the task:\n\n"


def load_skill(skill_path: Path) -> dict:
"""Load a skill file and parse its frontmatter + body.

Expand Down Expand Up @@ -84,11 +92,10 @@ def find_skill(skill_name: str, hermes_agent_path: Path) -> Optional[Path]:
class SkillModule(dspy.Module):
"""A DSPy module that wraps a skill file for optimization.

The skill text (body) is the parameter that GEPA optimizes.
On each forward pass, the module:
1. Uses the skill text as instructions
2. Processes the task input
3. Returns the agent's response
The skill body is embedded in the predictor signature instructions so
GEPA/MIPRO can mutate it. Passing the body as an InputField leaves it
invisible to prompt optimizers and causes "ghost improvements": scores
can improve while saved output still contains the original text.
"""

class TaskWithSkill(dspy.Signature):
Expand All @@ -97,20 +104,24 @@ class TaskWithSkill(dspy.Signature):
You are an AI agent following specific skill instructions to complete a task.
Read the skill instructions carefully and follow the procedure described.
"""
skill_instructions: str = dspy.InputField(desc="The skill instructions to follow")
task_input: str = dspy.InputField(desc="The task to complete")
output: str = dspy.OutputField(desc="Your response following the skill instructions")

def __init__(self, skill_text: str):
super().__init__()
self.skill_text = skill_text
self.predictor = dspy.ChainOfThought(self.TaskWithSkill)
self.skill_body = skill_text

def forward(self, task_input: str) -> dspy.Prediction:
result = self.predictor(
skill_instructions=self.skill_text,
task_input=task_input,
base_instructions = self.TaskWithSkill.__doc__ or ""
enriched_instructions = (
f"{_SKILL_INSTRUCTION_HEADER}{skill_text}"
f"{_SKILL_BODY_SENTINEL_}{base_instructions}"
)
self.predictor = dspy.ChainOfThought(
self.TaskWithSkill.with_instructions(enriched_instructions)
)

def forward(self, task_input: str) -> dspy.Prediction:
result = self.predictor(task_input=task_input)
return dspy.Prediction(output=result.output)


Expand Down
42 changes: 42 additions & 0 deletions tests/skills/test_evolve_skill_extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""Regression tests for extracting actually evolved skill text."""

from types import SimpleNamespace

from evolution.skills.evolve_skill import _extract_evolved_skill_body
from evolution.skills.skill_module import (
_SKILL_BODY_SENTINEL_,
_SKILL_INSTRUCTION_HEADER,
SkillModule,
)


def _wrapped(body: str) -> str:
return f"{_SKILL_INSTRUCTION_HEADER}{body}{_SKILL_BODY_SENTINEL_}fixed wrapper"


def test_extracts_from_nested_predict_signature_instructions():
module = SkillModule("# Original\nold procedure")
module.predictor.predict.signature.instructions = _wrapped("# Evolved\nnew procedure")

evolved = _extract_evolved_skill_body(module, "# Original\nold procedure")

assert evolved == "# Evolved\nnew procedure"
assert evolved != module.skill_body


def test_extracts_from_nested_predict_docstring_when_signature_is_stale():
module = SkillModule("# Original\nold procedure")
module.predictor.predict.signature.instructions = _wrapped("# Original\nold procedure")
module.predictor.predict.__doc__ = _wrapped("# Evolved Via Doc\nnew docstring procedure")

evolved = _extract_evolved_skill_body(module, "# Original\nold procedure")

assert evolved == "# Evolved Via Doc\nnew docstring procedure"
assert evolved != module.skill_body


def test_falls_back_to_original_when_optimizer_did_not_mutate_text():
original = "# Original\nold procedure"
module = SimpleNamespace(predictor=SimpleNamespace())

assert _extract_evolved_skill_body(module, original) == original
29 changes: 28 additions & 1 deletion tests/skills/test_skill_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,13 @@

import pytest
from pathlib import Path
from evolution.skills.skill_module import load_skill, reassemble_skill
from evolution.skills.skill_module import (
_SKILL_BODY_SENTINEL_,
_SKILL_INSTRUCTION_HEADER,
SkillModule,
load_skill,
reassemble_skill,
)


SAMPLE_SKILL = """---
Expand Down Expand Up @@ -90,3 +96,24 @@ def test_evolved_body_replaces_original(self):

assert "EVOLVED" in result
assert "New and improved" in result


class TestSkillModuleOptimizationSurface:
def test_skill_body_is_embedded_in_predictor_instructions(self):
body = "# My Skill\nUse the improved procedure."
module = SkillModule(body)

instructions = module.predictor.predict.signature.instructions

assert instructions.startswith(_SKILL_INSTRUCTION_HEADER)
assert body in instructions
assert _SKILL_BODY_SENTINEL_ in instructions
assert module.skill_body == body

def test_task_signature_no_longer_treats_skill_as_input_field(self):
module = SkillModule("# My Skill\nDo the thing.")

input_fields = module.predictor.predict.signature.input_fields

assert "task_input" in input_fields
assert "skill_instructions" not in input_fields