Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 41 additions & 14 deletions skill_eval/trigger.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,15 @@ def _run_trigger_query(
)
if rc == 0 and stdout.strip():
parsed = runner.parse_output(stdout)
triggered = _detect_skill_trigger_from_parsed(parsed, skill_path)
signal = _classify_trigger_signal(parsed, skill_path)
# For should_trigger=false queries, only count strong (tool-based)
# signals as triggers. Text-only mentions are too noisy for
# negative queries — an agent may casually mention a skill name
# (e.g. "text-summary", "compliance") without intending to use it.
if query.should_trigger:
triggered = signal != "none"
else:
triggered = signal == "tool"
if triggered:
trigger_count += 1
tc = parsed["token_counts"]
Expand Down Expand Up @@ -213,11 +221,30 @@ def _run_trigger_query(
def _detect_skill_trigger_from_parsed(parsed: dict, skill_path: Path) -> bool:
"""Detect whether the skill was activated from pre-parsed stream data.

Returns True if a tool-based signal (strong) or a text-based signal
(weak) is found. Use ``_classify_trigger_signal`` when you need to
distinguish the two.

Looks for:
1. Read tool_use referencing SKILL.md
2. Skill tool_use matching skill name
3. Bash/shell commands that *execute* skill scripts (not just reference paths)
4. Any tool explicitly invoking the skill by name
5. (Weak) Skill name mentioned in text output
"""
signal = _classify_trigger_signal(parsed, skill_path)
return signal != "none"


def _classify_trigger_signal(parsed: dict, skill_path: Path) -> str:
"""Classify the strength of a trigger signal.

Returns:
"tool" — strong signal: agent used a tool to activate the skill
(Read SKILL.md, Bash script execution, Skill tool, etc.)
"text" — weak signal: agent mentioned the skill name in its text
output without an explicit tool invocation
"none" — no trigger detected
"""
skill_name = skill_path.name
skill_md = "SKILL.md"
Expand Down Expand Up @@ -246,13 +273,13 @@ def _detect_skill_trigger_from_parsed(parsed: dict, skill_path: Path) -> bool:

# Check for Skill tool invocation
if name.lower() == "skill":
return True
return "tool"

# Check for Read of SKILL.md
if name.lower() == "read":
file_path = str(input_data.get("file_path", ""))
if skill_md in file_path:
return True
return "tool"

# Check for Bash/shell commands executing skill scripts
if name.lower() in ("bash", "shell", "terminal"):
Expand All @@ -267,10 +294,10 @@ def _detect_skill_trigger_from_parsed(parsed: dict, skill_path: Path) -> bool:
# Verify it looks like execution, not just ls/cat of a directory
for prefix in _EXEC_PREFIXES:
if prefix in command_lower:
return True
return "tool"
# Also match direct script execution: "./scripts/weather.py"
if f"scripts/{script_file.lower()}" in command_lower:
return True
return "tool"

# Check 2: Command uses the skill name as a CLI command
# e.g. "skill-eval audit ." — skill_name is the first token or after pipe
Expand All @@ -280,7 +307,7 @@ def _detect_skill_trigger_from_parsed(parsed: dict, skill_path: Path) -> bool:
_re.IGNORECASE,
)
if skill_cmd_pattern.search(command):
return True
return "tool"

# Check 3: Command uses underscore variant as a module
# e.g. "python3 -m data_analysis --file test.csv"
Expand All @@ -290,20 +317,20 @@ def _detect_skill_trigger_from_parsed(parsed: dict, skill_path: Path) -> bool:
_re.IGNORECASE,
)
if module_pattern.search(command):
return True
return "tool"

# Check 4: Full skill path explicitly in command
if str(skill_path) in command:
return True
return "tool"

# Also check text output for skill activation markers
# --- Text-based detection (weak signal) ---
text = parsed.get("text", "")
text_lower = text.lower()
skill_name_lower = skill_name.lower()

# Direct skill references
# Direct skill references — intentional activation phrases
if f"skill:{skill_name_lower}" in text_lower or f"using {skill_name_lower}" in text_lower:
return True
return "text"

# When skill is injected via --append-system-prompt, the agent may
# reference the skill by name or mention its scripts/rules without
Expand All @@ -317,16 +344,16 @@ def _detect_skill_trigger_from_parsed(parsed: dict, skill_path: Path) -> bool:
_re.IGNORECASE,
)
if skill_word_pattern.search(text):
return True
return "text"

# Also detect references to skill scripts in the text, using path-level
# matching (scripts/{filename}) to avoid false positives on common names
# like "check.py" or "main.py".
for script_file in script_files:
if f"scripts/{script_file.lower()}" in text_lower:
return True
return "text"

return False
return "none"


def _detect_skill_trigger(stream_output: str, skill_path: Path, runner: Optional[AgentRunner] = None) -> bool:
Expand Down
115 changes: 115 additions & 0 deletions tests/test_trigger.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
_read_skill_name,
_detect_skill_trigger,
_detect_skill_trigger_from_parsed,
_classify_trigger_signal,
_run_trigger_query,
_build_trigger_report,
_print_trigger_report,
Expand Down Expand Up @@ -276,6 +277,120 @@ def test_read_missing(self, tmp_path):
assert name is None


class TestClassifyTriggerSignal:
"""Test _classify_trigger_signal returns correct signal strength."""

def test_tool_signal_from_skill_tool(self):
parsed = {"tool_calls": [{"name": "Skill", "input": {}}], "text": ""}
assert _classify_trigger_signal(parsed, Path("/tmp/eval-skill")) == "tool"

def test_tool_signal_from_read_skill_md(self):
parsed = {
"tool_calls": [{"name": "Read", "input": {"file_path": "/tmp/eval-skill/SKILL.md"}}],
"text": "",
}
assert _classify_trigger_signal(parsed, Path("/tmp/eval-skill")) == "tool"

def test_tool_signal_from_bash_execution(self):
parsed = {
"tool_calls": [{"name": "Bash", "input": {"command": "eval-skill --audit ."}}],
"text": "",
}
assert _classify_trigger_signal(parsed, Path("/tmp/eval-skill")) == "tool"

def test_text_signal_from_name_mention(self):
parsed = {
"tool_calls": [],
"text": "I'll use the acme-compliance checker to validate this document.",
}
assert _classify_trigger_signal(parsed, Path("/tmp/acme-compliance")) == "text"

def test_text_signal_from_using_prefix(self):
parsed = {
"tool_calls": [],
"text": "Using text-summary to condense the article.",
}
assert _classify_trigger_signal(parsed, Path("/tmp/text-summary")) == "text"

def test_none_signal_when_unrelated(self):
parsed = {
"tool_calls": [{"name": "Bash", "input": {"command": "echo hello"}}],
"text": "The weather is nice today.",
}
assert _classify_trigger_signal(parsed, Path("/tmp/text-summary")) == "none"

def test_tool_signal_takes_priority_over_text(self):
"""When both tool and text signals exist, tool should be returned."""
parsed = {
"tool_calls": [{"name": "Read", "input": {"file_path": "/tmp/text-summary/SKILL.md"}}],
"text": "Using text-summary for the task.",
}
assert _classify_trigger_signal(parsed, Path("/tmp/text-summary")) == "tool"

def test_text_signal_from_script_reference(self, tmp_path):
"""Script path references in text are weak signals."""
skill_dir = tmp_path / "my-skill"
scripts_dir = skill_dir / "scripts"
scripts_dir.mkdir(parents=True)
(scripts_dir / "check.py").write_text("pass")

parsed = {
"tool_calls": [],
"text": "You should run scripts/check.py to validate.",
}
assert _classify_trigger_signal(parsed, skill_dir) == "text"


class TestAsymmetricTriggerDetection:
"""Test that should_trigger=false queries only count tool signals."""

def _make_mock_runner(self):
runner = MagicMock(spec=ClaudeRunner)
runner.parse_output.side_effect = ClaudeRunner().parse_output
return runner

def test_text_only_signal_ignored_for_negative_query(self):
"""should_trigger=false + text-only mention → NOT triggered (no false positive)."""
# Agent mentions "text-summary" casually but doesn't use any tool
stream = _make_trigger_stream(tool_name=None, text="I could use text-summary here but it's not needed.",
input_tokens=100, output_tokens=50)
runner = self._make_mock_runner()
runner.run_prompt.return_value = (stream, "", 0, 1.0)

query = TriggerQuery(query="What is the capital of France?", should_trigger=False)
result = _run_trigger_query(query, Path("/tmp/text-summary"), runs=1, timeout=30, runner=runner)
assert result.trigger_count == 0
assert result.passed is True # Correctly not triggered

def test_tool_signal_still_counts_for_negative_query(self):
"""should_trigger=false + tool signal → triggered (correct detection)."""
runner = MagicMock(spec=ClaudeRunner)
# Return a parsed dict with a tool call that reads SKILL.md
runner.parse_output.return_value = {
"tool_calls": [{"name": "Read", "input": {"file_path": "/tmp/text-summary/SKILL.md"}}],
"text": "",
"token_counts": {"input_tokens": 100, "output_tokens": 50},
}
runner.run_prompt.return_value = ("fake-stream", "", 0, 1.0)

query = TriggerQuery(query="What is the capital of France?", should_trigger=False)
result = _run_trigger_query(query, Path("/tmp/text-summary"), runs=1, timeout=30, runner=runner)
assert result.trigger_count == 1
assert result.passed is False # Correctly flagged: skill triggered when it shouldn't

def test_text_signal_counts_for_positive_query(self):
"""should_trigger=true + text mention → triggered (text signals count for positive)."""
stream = _make_trigger_stream(tool_name=None, text="Using text-summary to condense the article.",
input_tokens=100, output_tokens=50)
runner = self._make_mock_runner()
runner.run_prompt.return_value = (stream, "", 0, 1.0)

query = TriggerQuery(query="Summarize this article for me", should_trigger=True)
result = _run_trigger_query(query, Path("/tmp/text-summary"), runs=1, timeout=30, runner=runner)
assert result.trigger_count == 1
assert result.passed is True


class TestDetectSkillTriggerFromParsed:
"""Test _detect_skill_trigger_from_parsed with pre-parsed data."""

Expand Down
Loading