From 6e63d3566eb02d327367824fd784b16f44d666fa Mon Sep 17 00:00:00 2001 From: peiyao li Date: Tue, 17 Mar 2026 23:29:16 +1100 Subject: [PATCH] feat: add Anthropic best practice checks (STR-018, STR-019, STR-020) Add three new structure checks based on agentskills.io spec and Anthropic skill authoring best practices: - STR-018 (WARNING): Name contains reserved words ('anthropic', 'claude') Per agentskills.io spec, names must not contain these reserved words. - STR-019 (WARNING): Description contains XML tags Per agentskills.io spec, description must not contain XML/HTML tags as they can interfere with system prompt injection. - STR-020 (INFO): Description should use third person Per Anthropic best practices, descriptions should use third person ('Processes files') instead of first person ('I can help') or second person ('You can use'). Inconsistent point-of-view causes discovery problems. All three are pure static checks (no LLM needed). Includes 10 new test cases covering positive and negative scenarios. --- skill_eval/audit/structure_check.py | 58 +++++++++++++++ tests/test_structure_check.py | 106 ++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+) diff --git a/skill_eval/audit/structure_check.py b/skill_eval/audit/structure_check.py index 50f99e9..b57adea 100644 --- a/skill_eval/audit/structure_check.py +++ b/skill_eval/audit/structure_check.py @@ -3,7 +3,10 @@ Checks: - SKILL.md exists and has valid YAML frontmatter - name field: required, 1-64 chars, lowercase+hyphens, matches directory name +- name field: must not contain reserved words ('anthropic', 'claude') - description field: required, 1-1024 chars, non-empty +- description field: must not contain XML tags +- description field: should use third person (not first/second person) - Optional fields: license, compatibility (max 500 chars), metadata, allowed-tools - Directory structure conventions - Progressive disclosure (SKILL.md size, reference file sizes) @@ -270,6 +273,22 @@ def check_structure(skill_path: str | Path) -> tuple[list[Finding], Optional[dic fix=f"Either rename the directory to '{name}/' or change the name field to '{dir_name}'.", )) + # Check name does not contain reserved words (agentskills.io spec) + _RESERVED_WORDS = ["anthropic", "claude"] + for reserved in _RESERVED_WORDS: + if reserved in name: + findings.append(Finding( + code="STR-018", + severity=Severity.WARNING, + category=Category.STRUCTURE, + title=f"Name contains reserved word '{reserved}'", + detail=f"The name '{name}' contains '{reserved}', which is reserved per the agentskills.io specification. " + f"Names must not contain 'anthropic' or 'claude'.", + file_path=str(skill_md), + fix=f"Remove '{reserved}' from the skill name. Use a descriptive name for what the skill does instead.", + )) + break # One finding is enough + # --- Check 4: description field --- desc = frontmatter.get("description") if not desc: @@ -306,6 +325,45 @@ def check_structure(skill_path: str | Path) -> tuple[list[Finding], Optional[dic fix="Include both what the skill does and specific trigger contexts/phrases.", )) + # Check description does not contain XML tags (agentskills.io spec) + if re.search(r"<[a-zA-Z][^>]*>", desc): + findings.append(Finding( + code="STR-019", + severity=Severity.WARNING, + category=Category.STRUCTURE, + title="Description contains XML tags", + detail="The description field must not contain XML tags per the agentskills.io specification. " + "XML tags in the description can interfere with system prompt injection.", + file_path=str(skill_md), + fix="Remove all XML/HTML tags from the description. Use plain text only.", + )) + + # Check description uses third person (Anthropic best practice) + # Flag first-person ("I can", "I will", "I help") and second-person ("You can", "You will") + _FIRST_PERSON_RE = re.compile( + r"\b(I can|I will|I help|I am|I\'m|I process|I analyze|I generate|I create|I extract|I manage)\b", + re.IGNORECASE, + ) + _SECOND_PERSON_RE = re.compile( + r"\b(You can|You will|You should|You may)\b", + re.IGNORECASE, + ) + fp_match = _FIRST_PERSON_RE.search(desc) + sp_match = _SECOND_PERSON_RE.search(desc) + if fp_match or sp_match: + matched = fp_match.group(0) if fp_match else sp_match.group(0) + findings.append(Finding( + code="STR-020", + severity=Severity.INFO, + category=Category.STRUCTURE, + title="Description should use third person", + detail=f"Found '{matched}' in description. Per Anthropic best practices, descriptions should use " + f"third person (e.g., 'Processes Excel files') instead of first person ('I can help') " + f"or second person ('You can use this'). Inconsistent point-of-view can cause discovery problems.", + file_path=str(skill_md), + fix="Rewrite the description in third person: 'Analyzes data...' instead of 'I analyze data...'.", + )) + # --- Check 5: compatibility field --- compat = frontmatter.get("compatibility") if compat and isinstance(compat, str) and len(compat) > _MAX_COMPAT_LEN: diff --git a/tests/test_structure_check.py b/tests/test_structure_check.py index f23ea14..262d12d 100644 --- a/tests/test_structure_check.py +++ b/tests/test_structure_check.py @@ -140,3 +140,109 @@ def test_real_skill_weather(self): assert fm["name"] == "weather" critical = [f for f in findings if f.severity == Severity.CRITICAL] assert len(critical) == 0 + + +class TestBestPracticeChecks: + """Test Anthropic best practice checks (STR-018, STR-019, STR-020).""" + + def _make_skill(self, tmp_path, name="test-skill", description="A test skill that does useful things for testing purposes."): + """Helper: create a minimal valid skill directory with given name/description.""" + skill_dir = tmp_path / name + skill_dir.mkdir() + skill_md = skill_dir / "SKILL.md" + skill_md.write_text(f"---\nname: {name}\ndescription: \"{description}\"\n---\n\n# Test\n\nInstructions here.\n") + return skill_dir + + # --- STR-018: Reserved words in name --- + + def test_name_with_claude_reserved_word(self, tmp_path): + skill_dir = self._make_skill(tmp_path, name="claude-helper", description="Helps with code review and analysis tasks.") + findings, fm, _ = check_structure(skill_dir) + codes = [f.code for f in findings] + assert "STR-018" in codes, "Should flag 'claude' in skill name" + + def test_name_with_anthropic_reserved_word(self, tmp_path): + skill_dir = self._make_skill(tmp_path, name="anthropic-tools", description="Provides various analysis tools for development.") + findings, fm, _ = check_structure(skill_dir) + codes = [f.code for f in findings] + assert "STR-018" in codes, "Should flag 'anthropic' in skill name" + + def test_name_without_reserved_words(self, tmp_path): + skill_dir = self._make_skill(tmp_path, name="code-review", description="Reviews code for bugs, security issues, and best practices.") + findings, fm, _ = check_structure(skill_dir) + codes = [f.code for f in findings] + assert "STR-018" not in codes, "Should not flag normal skill name" + + # --- STR-019: XML tags in description --- + + def test_description_with_xml_tags(self, tmp_path): + skill_dir = self._make_skill( + tmp_path, + name="xml-test", + description="Processes data and generates reports.", + ) + findings, fm, _ = check_structure(skill_dir) + codes = [f.code for f in findings] + assert "STR-019" in codes, "Should flag XML tags in description" + + def test_description_with_html_tags(self, tmp_path): + skill_dir = self._make_skill( + tmp_path, + name="html-test", + description="Analyzes code for security vulnerabilities and best practices.", + ) + findings, fm, _ = check_structure(skill_dir) + codes = [f.code for f in findings] + assert "STR-019" in codes, "Should flag HTML tags in description" + + def test_description_without_xml_tags(self, tmp_path): + skill_dir = self._make_skill( + tmp_path, + name="no-xml-test", + description="Analyzes code for security vulnerabilities and best practices.", + ) + findings, fm, _ = check_structure(skill_dir) + codes = [f.code for f in findings] + assert "STR-019" not in codes, "Should not flag clean description" + + # --- STR-020: Third person description --- + + def test_description_first_person(self, tmp_path): + skill_dir = self._make_skill( + tmp_path, + name="first-person-test", + description="I can help you process Excel files and generate reports.", + ) + findings, fm, _ = check_structure(skill_dir) + codes = [f.code for f in findings] + assert "STR-020" in codes, "Should flag first-person description" + + def test_description_second_person(self, tmp_path): + skill_dir = self._make_skill( + tmp_path, + name="second-person-test", + description="You can use this to process Excel files and generate reports.", + ) + findings, fm, _ = check_structure(skill_dir) + codes = [f.code for f in findings] + assert "STR-020" in codes, "Should flag second-person description" + + def test_description_third_person(self, tmp_path): + skill_dir = self._make_skill( + tmp_path, + name="third-person-test", + description="Processes Excel files and generates reports. Use when working with spreadsheet data.", + ) + findings, fm, _ = check_structure(skill_dir) + codes = [f.code for f in findings] + assert "STR-020" not in codes, "Should not flag third-person description" + + def test_description_im_contraction(self, tmp_path): + skill_dir = self._make_skill( + tmp_path, + name="contraction-test", + description="I'm a helpful skill that processes data files and outputs summaries.", + ) + findings, fm, _ = check_structure(skill_dir) + codes = [f.code for f in findings] + assert "STR-020" in codes, "Should flag I'm contraction"