diff --git a/skill_eval/trigger.py b/skill_eval/trigger.py index 58f40d8..3697347 100644 --- a/skill_eval/trigger.py +++ b/skill_eval/trigger.py @@ -176,7 +176,15 @@ def _run_trigger_query( ) if rc == 0 and stdout.strip(): parsed = runner.parse_output(stdout) - triggered = _detect_skill_trigger_from_parsed(parsed, skill_path) + signal = _classify_trigger_signal(parsed, skill_path) + # For should_trigger=false queries, only count strong (tool-based) + # signals as triggers. Text-only mentions are too noisy for + # negative queries — an agent may casually mention a skill name + # (e.g. "text-summary", "compliance") without intending to use it. + if query.should_trigger: + triggered = signal != "none" + else: + triggered = signal == "tool" if triggered: trigger_count += 1 tc = parsed["token_counts"] @@ -213,11 +221,30 @@ def _run_trigger_query( def _detect_skill_trigger_from_parsed(parsed: dict, skill_path: Path) -> bool: """Detect whether the skill was activated from pre-parsed stream data. + Returns True if a tool-based signal (strong) or a text-based signal + (weak) is found. Use ``_classify_trigger_signal`` when you need to + distinguish the two. + Looks for: 1. Read tool_use referencing SKILL.md 2. Skill tool_use matching skill name 3. Bash/shell commands that *execute* skill scripts (not just reference paths) 4. Any tool explicitly invoking the skill by name + 5. (Weak) Skill name mentioned in text output + """ + signal = _classify_trigger_signal(parsed, skill_path) + return signal != "none" + + +def _classify_trigger_signal(parsed: dict, skill_path: Path) -> str: + """Classify the strength of a trigger signal. + + Returns: + "tool" — strong signal: agent used a tool to activate the skill + (Read SKILL.md, Bash script execution, Skill tool, etc.) + "text" — weak signal: agent mentioned the skill name in its text + output without an explicit tool invocation + "none" — no trigger detected """ skill_name = skill_path.name skill_md = "SKILL.md" @@ -246,13 +273,13 @@ def _detect_skill_trigger_from_parsed(parsed: dict, skill_path: Path) -> bool: # Check for Skill tool invocation if name.lower() == "skill": - return True + return "tool" # Check for Read of SKILL.md if name.lower() == "read": file_path = str(input_data.get("file_path", "")) if skill_md in file_path: - return True + return "tool" # Check for Bash/shell commands executing skill scripts if name.lower() in ("bash", "shell", "terminal"): @@ -267,10 +294,10 @@ def _detect_skill_trigger_from_parsed(parsed: dict, skill_path: Path) -> bool: # Verify it looks like execution, not just ls/cat of a directory for prefix in _EXEC_PREFIXES: if prefix in command_lower: - return True + return "tool" # Also match direct script execution: "./scripts/weather.py" if f"scripts/{script_file.lower()}" in command_lower: - return True + return "tool" # Check 2: Command uses the skill name as a CLI command # e.g. "skill-eval audit ." — skill_name is the first token or after pipe @@ -280,7 +307,7 @@ def _detect_skill_trigger_from_parsed(parsed: dict, skill_path: Path) -> bool: _re.IGNORECASE, ) if skill_cmd_pattern.search(command): - return True + return "tool" # Check 3: Command uses underscore variant as a module # e.g. "python3 -m data_analysis --file test.csv" @@ -290,20 +317,20 @@ def _detect_skill_trigger_from_parsed(parsed: dict, skill_path: Path) -> bool: _re.IGNORECASE, ) if module_pattern.search(command): - return True + return "tool" # Check 4: Full skill path explicitly in command if str(skill_path) in command: - return True + return "tool" - # Also check text output for skill activation markers + # --- Text-based detection (weak signal) --- text = parsed.get("text", "") text_lower = text.lower() skill_name_lower = skill_name.lower() - # Direct skill references + # Direct skill references — intentional activation phrases if f"skill:{skill_name_lower}" in text_lower or f"using {skill_name_lower}" in text_lower: - return True + return "text" # When skill is injected via --append-system-prompt, the agent may # reference the skill by name or mention its scripts/rules without @@ -317,16 +344,16 @@ def _detect_skill_trigger_from_parsed(parsed: dict, skill_path: Path) -> bool: _re.IGNORECASE, ) if skill_word_pattern.search(text): - return True + return "text" # Also detect references to skill scripts in the text, using path-level # matching (scripts/{filename}) to avoid false positives on common names # like "check.py" or "main.py". for script_file in script_files: if f"scripts/{script_file.lower()}" in text_lower: - return True + return "text" - return False + return "none" def _detect_skill_trigger(stream_output: str, skill_path: Path, runner: Optional[AgentRunner] = None) -> bool: diff --git a/tests/test_trigger.py b/tests/test_trigger.py index edaa563..9bb8679 100644 --- a/tests/test_trigger.py +++ b/tests/test_trigger.py @@ -12,6 +12,7 @@ _read_skill_name, _detect_skill_trigger, _detect_skill_trigger_from_parsed, + _classify_trigger_signal, _run_trigger_query, _build_trigger_report, _print_trigger_report, @@ -276,6 +277,120 @@ def test_read_missing(self, tmp_path): assert name is None +class TestClassifyTriggerSignal: + """Test _classify_trigger_signal returns correct signal strength.""" + + def test_tool_signal_from_skill_tool(self): + parsed = {"tool_calls": [{"name": "Skill", "input": {}}], "text": ""} + assert _classify_trigger_signal(parsed, Path("/tmp/eval-skill")) == "tool" + + def test_tool_signal_from_read_skill_md(self): + parsed = { + "tool_calls": [{"name": "Read", "input": {"file_path": "/tmp/eval-skill/SKILL.md"}}], + "text": "", + } + assert _classify_trigger_signal(parsed, Path("/tmp/eval-skill")) == "tool" + + def test_tool_signal_from_bash_execution(self): + parsed = { + "tool_calls": [{"name": "Bash", "input": {"command": "eval-skill --audit ."}}], + "text": "", + } + assert _classify_trigger_signal(parsed, Path("/tmp/eval-skill")) == "tool" + + def test_text_signal_from_name_mention(self): + parsed = { + "tool_calls": [], + "text": "I'll use the acme-compliance checker to validate this document.", + } + assert _classify_trigger_signal(parsed, Path("/tmp/acme-compliance")) == "text" + + def test_text_signal_from_using_prefix(self): + parsed = { + "tool_calls": [], + "text": "Using text-summary to condense the article.", + } + assert _classify_trigger_signal(parsed, Path("/tmp/text-summary")) == "text" + + def test_none_signal_when_unrelated(self): + parsed = { + "tool_calls": [{"name": "Bash", "input": {"command": "echo hello"}}], + "text": "The weather is nice today.", + } + assert _classify_trigger_signal(parsed, Path("/tmp/text-summary")) == "none" + + def test_tool_signal_takes_priority_over_text(self): + """When both tool and text signals exist, tool should be returned.""" + parsed = { + "tool_calls": [{"name": "Read", "input": {"file_path": "/tmp/text-summary/SKILL.md"}}], + "text": "Using text-summary for the task.", + } + assert _classify_trigger_signal(parsed, Path("/tmp/text-summary")) == "tool" + + def test_text_signal_from_script_reference(self, tmp_path): + """Script path references in text are weak signals.""" + skill_dir = tmp_path / "my-skill" + scripts_dir = skill_dir / "scripts" + scripts_dir.mkdir(parents=True) + (scripts_dir / "check.py").write_text("pass") + + parsed = { + "tool_calls": [], + "text": "You should run scripts/check.py to validate.", + } + assert _classify_trigger_signal(parsed, skill_dir) == "text" + + +class TestAsymmetricTriggerDetection: + """Test that should_trigger=false queries only count tool signals.""" + + def _make_mock_runner(self): + runner = MagicMock(spec=ClaudeRunner) + runner.parse_output.side_effect = ClaudeRunner().parse_output + return runner + + def test_text_only_signal_ignored_for_negative_query(self): + """should_trigger=false + text-only mention → NOT triggered (no false positive).""" + # Agent mentions "text-summary" casually but doesn't use any tool + stream = _make_trigger_stream(tool_name=None, text="I could use text-summary here but it's not needed.", + input_tokens=100, output_tokens=50) + runner = self._make_mock_runner() + runner.run_prompt.return_value = (stream, "", 0, 1.0) + + query = TriggerQuery(query="What is the capital of France?", should_trigger=False) + result = _run_trigger_query(query, Path("/tmp/text-summary"), runs=1, timeout=30, runner=runner) + assert result.trigger_count == 0 + assert result.passed is True # Correctly not triggered + + def test_tool_signal_still_counts_for_negative_query(self): + """should_trigger=false + tool signal → triggered (correct detection).""" + runner = MagicMock(spec=ClaudeRunner) + # Return a parsed dict with a tool call that reads SKILL.md + runner.parse_output.return_value = { + "tool_calls": [{"name": "Read", "input": {"file_path": "/tmp/text-summary/SKILL.md"}}], + "text": "", + "token_counts": {"input_tokens": 100, "output_tokens": 50}, + } + runner.run_prompt.return_value = ("fake-stream", "", 0, 1.0) + + query = TriggerQuery(query="What is the capital of France?", should_trigger=False) + result = _run_trigger_query(query, Path("/tmp/text-summary"), runs=1, timeout=30, runner=runner) + assert result.trigger_count == 1 + assert result.passed is False # Correctly flagged: skill triggered when it shouldn't + + def test_text_signal_counts_for_positive_query(self): + """should_trigger=true + text mention → triggered (text signals count for positive).""" + stream = _make_trigger_stream(tool_name=None, text="Using text-summary to condense the article.", + input_tokens=100, output_tokens=50) + runner = self._make_mock_runner() + runner.run_prompt.return_value = (stream, "", 0, 1.0) + + query = TriggerQuery(query="Summarize this article for me", should_trigger=True) + result = _run_trigger_query(query, Path("/tmp/text-summary"), runs=1, timeout=30, runner=runner) + assert result.trigger_count == 1 + assert result.passed is True + + class TestDetectSkillTriggerFromParsed: """Test _detect_skill_trigger_from_parsed with pre-parsed data."""