diff --git a/skill_eval/audit/structure_check.py b/skill_eval/audit/structure_check.py
index 50f99e9..b57adea 100644
--- a/skill_eval/audit/structure_check.py
+++ b/skill_eval/audit/structure_check.py
@@ -3,7 +3,10 @@
Checks:
- SKILL.md exists and has valid YAML frontmatter
- name field: required, 1-64 chars, lowercase+hyphens, matches directory name
+- name field: must not contain reserved words ('anthropic', 'claude')
- description field: required, 1-1024 chars, non-empty
+- description field: must not contain XML tags
+- description field: should use third person (not first/second person)
- Optional fields: license, compatibility (max 500 chars), metadata, allowed-tools
- Directory structure conventions
- Progressive disclosure (SKILL.md size, reference file sizes)
@@ -270,6 +273,22 @@ def check_structure(skill_path: str | Path) -> tuple[list[Finding], Optional[dic
fix=f"Either rename the directory to '{name}/' or change the name field to '{dir_name}'.",
))
+ # Check name does not contain reserved words (agentskills.io spec)
+ _RESERVED_WORDS = ["anthropic", "claude"]
+ for reserved in _RESERVED_WORDS:
+ if reserved in name:
+ findings.append(Finding(
+ code="STR-018",
+ severity=Severity.WARNING,
+ category=Category.STRUCTURE,
+ title=f"Name contains reserved word '{reserved}'",
+ detail=f"The name '{name}' contains '{reserved}', which is reserved per the agentskills.io specification. "
+ f"Names must not contain 'anthropic' or 'claude'.",
+ file_path=str(skill_md),
+ fix=f"Remove '{reserved}' from the skill name. Use a descriptive name for what the skill does instead.",
+ ))
+ break # One finding is enough
+
# --- Check 4: description field ---
desc = frontmatter.get("description")
if not desc:
@@ -306,6 +325,45 @@ def check_structure(skill_path: str | Path) -> tuple[list[Finding], Optional[dic
fix="Include both what the skill does and specific trigger contexts/phrases.",
))
+ # Check description does not contain XML tags (agentskills.io spec)
+ if re.search(r"<[a-zA-Z][^>]*>", desc):
+ findings.append(Finding(
+ code="STR-019",
+ severity=Severity.WARNING,
+ category=Category.STRUCTURE,
+ title="Description contains XML tags",
+ detail="The description field must not contain XML tags per the agentskills.io specification. "
+ "XML tags in the description can interfere with system prompt injection.",
+ file_path=str(skill_md),
+ fix="Remove all XML/HTML tags from the description. Use plain text only.",
+ ))
+
+ # Check description uses third person (Anthropic best practice)
+ # Flag first-person ("I can", "I will", "I help") and second-person ("You can", "You will")
+ _FIRST_PERSON_RE = re.compile(
+ r"\b(I can|I will|I help|I am|I\'m|I process|I analyze|I generate|I create|I extract|I manage)\b",
+ re.IGNORECASE,
+ )
+ _SECOND_PERSON_RE = re.compile(
+ r"\b(You can|You will|You should|You may)\b",
+ re.IGNORECASE,
+ )
+ fp_match = _FIRST_PERSON_RE.search(desc)
+ sp_match = _SECOND_PERSON_RE.search(desc)
+ if fp_match or sp_match:
+ matched = fp_match.group(0) if fp_match else sp_match.group(0)
+ findings.append(Finding(
+ code="STR-020",
+ severity=Severity.INFO,
+ category=Category.STRUCTURE,
+ title="Description should use third person",
+ detail=f"Found '{matched}' in description. Per Anthropic best practices, descriptions should use "
+ f"third person (e.g., 'Processes Excel files') instead of first person ('I can help') "
+ f"or second person ('You can use this'). Inconsistent point-of-view can cause discovery problems.",
+ file_path=str(skill_md),
+ fix="Rewrite the description in third person: 'Analyzes data...' instead of 'I analyze data...'.",
+ ))
+
# --- Check 5: compatibility field ---
compat = frontmatter.get("compatibility")
if compat and isinstance(compat, str) and len(compat) > _MAX_COMPAT_LEN:
diff --git a/tests/test_structure_check.py b/tests/test_structure_check.py
index f23ea14..262d12d 100644
--- a/tests/test_structure_check.py
+++ b/tests/test_structure_check.py
@@ -140,3 +140,109 @@ def test_real_skill_weather(self):
assert fm["name"] == "weather"
critical = [f for f in findings if f.severity == Severity.CRITICAL]
assert len(critical) == 0
+
+
+class TestBestPracticeChecks:
+ """Test Anthropic best practice checks (STR-018, STR-019, STR-020)."""
+
+ def _make_skill(self, tmp_path, name="test-skill", description="A test skill that does useful things for testing purposes."):
+ """Helper: create a minimal valid skill directory with given name/description."""
+ skill_dir = tmp_path / name
+ skill_dir.mkdir()
+ skill_md = skill_dir / "SKILL.md"
+ skill_md.write_text(f"---\nname: {name}\ndescription: \"{description}\"\n---\n\n# Test\n\nInstructions here.\n")
+ return skill_dir
+
+ # --- STR-018: Reserved words in name ---
+
+ def test_name_with_claude_reserved_word(self, tmp_path):
+ skill_dir = self._make_skill(tmp_path, name="claude-helper", description="Helps with code review and analysis tasks.")
+ findings, fm, _ = check_structure(skill_dir)
+ codes = [f.code for f in findings]
+ assert "STR-018" in codes, "Should flag 'claude' in skill name"
+
+ def test_name_with_anthropic_reserved_word(self, tmp_path):
+ skill_dir = self._make_skill(tmp_path, name="anthropic-tools", description="Provides various analysis tools for development.")
+ findings, fm, _ = check_structure(skill_dir)
+ codes = [f.code for f in findings]
+ assert "STR-018" in codes, "Should flag 'anthropic' in skill name"
+
+ def test_name_without_reserved_words(self, tmp_path):
+ skill_dir = self._make_skill(tmp_path, name="code-review", description="Reviews code for bugs, security issues, and best practices.")
+ findings, fm, _ = check_structure(skill_dir)
+ codes = [f.code for f in findings]
+ assert "STR-018" not in codes, "Should not flag normal skill name"
+
+ # --- STR-019: XML tags in description ---
+
+ def test_description_with_xml_tags(self, tmp_path):
+ skill_dir = self._make_skill(
+ tmp_path,
+ name="xml-test",
+ description="Processes data and generates reports.",
+ )
+ findings, fm, _ = check_structure(skill_dir)
+ codes = [f.code for f in findings]
+ assert "STR-019" in codes, "Should flag XML tags in description"
+
+ def test_description_with_html_tags(self, tmp_path):
+ skill_dir = self._make_skill(
+ tmp_path,
+ name="html-test",
+ description="Analyzes code for security vulnerabilities and best practices.",
+ )
+ findings, fm, _ = check_structure(skill_dir)
+ codes = [f.code for f in findings]
+ assert "STR-019" in codes, "Should flag HTML tags in description"
+
+ def test_description_without_xml_tags(self, tmp_path):
+ skill_dir = self._make_skill(
+ tmp_path,
+ name="no-xml-test",
+ description="Analyzes code for security vulnerabilities and best practices.",
+ )
+ findings, fm, _ = check_structure(skill_dir)
+ codes = [f.code for f in findings]
+ assert "STR-019" not in codes, "Should not flag clean description"
+
+ # --- STR-020: Third person description ---
+
+ def test_description_first_person(self, tmp_path):
+ skill_dir = self._make_skill(
+ tmp_path,
+ name="first-person-test",
+ description="I can help you process Excel files and generate reports.",
+ )
+ findings, fm, _ = check_structure(skill_dir)
+ codes = [f.code for f in findings]
+ assert "STR-020" in codes, "Should flag first-person description"
+
+ def test_description_second_person(self, tmp_path):
+ skill_dir = self._make_skill(
+ tmp_path,
+ name="second-person-test",
+ description="You can use this to process Excel files and generate reports.",
+ )
+ findings, fm, _ = check_structure(skill_dir)
+ codes = [f.code for f in findings]
+ assert "STR-020" in codes, "Should flag second-person description"
+
+ def test_description_third_person(self, tmp_path):
+ skill_dir = self._make_skill(
+ tmp_path,
+ name="third-person-test",
+ description="Processes Excel files and generates reports. Use when working with spreadsheet data.",
+ )
+ findings, fm, _ = check_structure(skill_dir)
+ codes = [f.code for f in findings]
+ assert "STR-020" not in codes, "Should not flag third-person description"
+
+ def test_description_im_contraction(self, tmp_path):
+ skill_dir = self._make_skill(
+ tmp_path,
+ name="contraction-test",
+ description="I'm a helpful skill that processes data files and outputs summaries.",
+ )
+ findings, fm, _ = check_structure(skill_dir)
+ codes = [f.code for f in findings]
+ assert "STR-020" in codes, "Should flag I'm contraction"