From bd242ab192ebbeeb8b81c51e218be3afc722407a Mon Sep 17 00:00:00 2001 From: Cody <251773092+fresh3nough@users.noreply.github.com> Date: Sun, 1 Mar 2026 17:30:50 -0500 Subject: [PATCH 1/2] feat: add mfa-bypass and edge-cases skills with test suite - Add mfa_bypass.md: MFA bypass testing covering session fixation, code reuse, fallback abuse, enrollment flow weaknesses (CWE-308) - Add edge_cases.md: edge case testing covering caching races, partial failures, boundary conditions, eventual consistency exploitation (CWE-362) - Add tests/skills/test_skills.py: 50 test cases covering skill loading, validation, frontmatter parsing, content quality, and structural checks across all skill files --- strix/skills/vulnerabilities/edge_cases.md | 184 ++++++++++ strix/skills/vulnerabilities/mfa_bypass.md | 186 ++++++++++ tests/skills/__init__.py | 1 + tests/skills/test_skills.py | 373 +++++++++++++++++++++ 4 files changed, 744 insertions(+) create mode 100644 strix/skills/vulnerabilities/edge_cases.md create mode 100644 strix/skills/vulnerabilities/mfa_bypass.md create mode 100644 tests/skills/__init__.py create mode 100644 tests/skills/test_skills.py diff --git a/strix/skills/vulnerabilities/edge_cases.md b/strix/skills/vulnerabilities/edge_cases.md new file mode 100644 index 000000000..ad14aeb6e --- /dev/null +++ b/strix/skills/vulnerabilities/edge_cases.md @@ -0,0 +1,184 @@ +--- +name: edge-cases +description: Edge case testing for caching races, partial failures, boundary conditions, and eventual consistency exploitation +category: vulnerabilities +tags: [caching, race-condition, partial-failure, edge-case] +cwe: 362 +--- + +# Edge Cases + +Edge case vulnerabilities arise at system boundaries: cache coherence gaps, partial failure states, retry storms, and consistency windows between distributed components. These bugs rarely appear in unit tests and require adversarial timing, ordering, and failure injection to surface. + +## Attack Surface + +**Caching Layers** +- CDN / reverse proxy (Cloudflare, Fastly, Varnish, nginx) +- Application cache (Redis, Memcached, in-process) +- Database query cache +- DNS cache and TTL manipulation +- Browser and service worker caches + +**Distributed State** +- Eventual consistency windows between replicas +- Cross-region replication lag +- Message queue delivery guarantees (at-least-once, at-most-once) +- Saga/compensation patterns in microservices + +**Failure Boundaries** +- Partial success in multi-step operations +- Timeout and retry behavior +- Circuit breaker states (closed, open, half-open) +- Graceful degradation and fallback paths + +**Boundary Conditions** +- Integer overflow/underflow at limits +- Pagination cursors at collection boundaries +- Time zone transitions, DST, leap seconds +- Unicode normalization and encoding edge cases + +## High-Value Targets + +- Authenticated CDN content served from shared cache without identity keys +- Payment flows with partial capture/refund states +- Inventory systems with reservation and release logic +- Session stores with replication lag between regions +- Rate limiters using distributed counters +- Background job queues with retry and dead-letter handling +- Search indexes with delayed consistency from primary stores + +## Reconnaissance + +### Cache Behavior Mapping + +- Identify caching headers: Cache-Control, Vary, ETag, Age, X-Cache, X-Cache-Hit, CF-Cache-Status +- Determine cache key composition: what headers, cookies, and query parameters are included +- Test Vary header completeness: does it include Authorization, Cookie, Accept-Language? +- Check for cache partitioning: do authenticated and unauthenticated requests share cache entries? +- Map TTL values and revalidation behavior (stale-while-revalidate, stale-if-error) + +### Consistency Model Discovery + +- Identify which data stores use eventual consistency vs strong consistency +- Map replication topology: primary/replica, multi-region, active-active +- Determine read-after-write guarantees per endpoint +- Check if reads are pinned to the write region or load-balanced across replicas +- Look for consistency-related headers or query parameters (consistency=strong, read_preference) + +### Failure Mode Enumeration + +- Identify multi-step operations and their atomicity guarantees +- Map retry policies: fixed, exponential backoff, jitter, max attempts +- Check for idempotency key support and scope +- Identify circuit breaker implementations and their state thresholds +- Look for graceful degradation paths that weaken security controls + +## Key Vulnerabilities + +### Cache Poisoning Races + +- **TOCTOU on CDN**: Inject a poisoned response (e.g., admin=true, elevated role) into the cache during the window between authentication check and response caching; subsequent users receive the poisoned cached response +- **Cache key confusion**: Exploit differences in how the cache and origin parse URLs, headers, or query parameters to serve one user's cached response to another +- **Vary header omission**: Origin returns user-specific content but Vary header does not include Authorization or Cookie; CDN caches and serves across identities +- **Web cache deception**: Trick caching layer into storing authenticated response at a public path (e.g., /account/profile.css) by appending cacheable extensions +- **Cache parameter cloaking**: Use unkeyed query parameters, headers, or cookies to influence response content while the cache key remains identical +- **Host header poisoning**: Inject alternate Host header values to generate cached responses with attacker-controlled links or redirects +- **Response splitting**: Inject headers that cause the cache to store a crafted response for a different URL + +### Partial Failure Exploitation + +- **Half-committed transactions**: In multi-service workflows (payment + inventory + notification), one service commits while another fails; exploit the inconsistent state before compensation runs +- **Orphaned resources**: Failed creation leaves allocated resources (IDs, reservations, storage objects) that can be claimed or referenced +- **Retry amplification**: Trigger timeouts to force retries that cause duplicate side effects (double charges, double credits, duplicate emails) +- **Compensation race**: Execute the compensation/rollback path before the original operation completes, leaving the system in a state that allows both the original action and its reversal to succeed +- **Dead letter exploitation**: Messages in dead-letter queues may be reprocessed with stale context, outdated authorization, or bypassed validation +- **Partial batch results**: Batch operations returning mixed success/failure per item; exploit items that succeeded before the batch was rolled back + +### Eventual Consistency Windows + +- **Read-your-writes violation**: Write to primary (e.g., revoke permission), immediately read from replica that has not replicated yet; stale read allows continued access +- **Cross-region stale reads**: In multi-region deployments, act in a lagging region before a security-critical write propagates (role revocation, account disable, password change) +- **Search index lag**: Item deleted or access revoked but still discoverable and accessible via search or listing endpoints backed by a delayed index +- **Counter drift**: Distributed rate limit counters or quota trackers that diverge across nodes; burst requests across multiple nodes before counters converge + +### Boundary Condition Abuse + +- **Integer boundaries**: Quantity, price, or balance fields at INT_MAX/INT_MIN; overflow to negative or zero +- **Pagination edge cases**: Cursor-based pagination allowing access to items beyond authorization scope when cursor encodes raw IDs; off-by-one at page boundaries exposing extra records +- **Time boundary exploitation**: Exploit midnight UTC rollovers, DST transitions, or month-end boundaries where time-based access controls, quotas, or rate limits reset +- **Encoding differentials**: Unicode normalization (NFC vs NFD), case folding, and homoglyph abuse causing different systems to interpret the same identifier differently (e.g., user lookup vs permission check) +- **Floating point boundaries**: Currency calculations at precision limits producing rounding errors that accumulate across transactions +- **Empty and null states**: Empty arrays, null values, missing fields, and zero-length strings bypassing validation that only checks for presence + +### Graceful Degradation Weaknesses + +- **Fallback path bypass**: When a dependency (auth service, rate limiter, WAF) is unavailable, the fallback allows requests through without full validation +- **Circuit breaker open state**: While the circuit breaker is open, requests may be routed to a degraded path that skips authorization or logging +- **Feature flag defaults**: Feature flags defaulting to enabled when the flag service is unreachable, exposing gated functionality +- **Cache stampede**: Force cache expiry on a hot key; the thundering herd of requests to the origin may overwhelm the backend and trigger degraded responses + +### Stale State and Revocation Gaps + +- **Token revocation lag**: Access tokens remain valid until expiry even after revocation event; long-lived tokens with no revocation check +- **Permission cache staleness**: Role or permission changes not reflected until cache TTL expires; act within the stale window +- **DNS rebinding**: Manipulate DNS TTL to point a validated hostname to an internal IP after the initial security check + +## Bypass Techniques + +- Timing manipulation: slow down requests (large payloads, keep-alive abuse) to widen race windows +- Regional routing: target specific regions or replicas known to lag behind the primary +- Header injection to influence cache behavior (X-Forwarded-Host, X-Original-URL) +- Trigger dependency failures (connection exhaustion, timeout injection) to force degraded paths +- Replay stale pagination cursors or continuation tokens after access revocation + +## Testing Methodology + +1. **Map caching layers** - Identify all caches (CDN, app, DB), their key composition, TTLs, and Vary headers +2. **Test cache isolation** - Verify authenticated content is not served cross-user; strip cookies, swap tokens, check ETags +3. **Probe consistency** - Write then immediately read from different paths/regions; measure replication lag +4. **Inject failures** - Simulate partial failures in multi-step operations; check for orphaned or inconsistent state +5. **Test boundaries** - Exercise integer limits, pagination edges, time boundaries, and encoding variants +6. **Force degradation** - Exhaust dependencies to trigger fallback paths; verify security controls remain enforced +7. **Measure revocation** - Change permissions/roles and measure how long stale access persists across all layers + +## Validation + +1. Show cross-user cache serving: two different authenticated users receiving each other's cached responses +2. Demonstrate partial failure leaving exploitable state (e.g., payment captured but order not created, allowing re-order) +3. Prove stale read after security-critical write (permission revocation still allowing access via replica) +4. Show boundary condition causing invariant violation (integer overflow, pagination leak, time-boundary quota reset) +5. Demonstrate degraded path bypassing security control that is enforced in the normal path +6. All findings must show durable state change or information disclosure, not just transient anomalies + +## False Positives + +- Intentional stale-while-revalidate behavior documented in architecture with acceptable staleness window +- Eventual consistency windows within documented SLA that do not affect security-critical state +- Cache serving public content (truly non-personalized) to multiple users as designed +- Graceful degradation with explicit fail-closed behavior on security-critical paths +- Pagination showing slightly stale counts due to known replica lag without access control implications + +## Impact + +- Cross-user data exposure via cache poisoning or confusion +- Financial loss from partial failure exploitation (double-spend, orphaned charges) +- Unauthorized access during consistency windows after revocation events +- Denial of service via cache stampede or retry storm amplification +- Policy bypass when security controls degrade under failure conditions + +## Pro Tips + +1. Cache bugs are most impactful on CDNs; start by mapping cache key composition and Vary headers +2. For consistency bugs, identify the replication topology first; then target the lagging component +3. Partial failures are easiest to trigger in payment and inventory flows; these have the highest business impact +4. Test revocation effectiveness by measuring the actual window between revocation and enforcement +5. Degrade one dependency at a time and check if security controls still hold +6. Integer boundary bugs are often in quantity, price, and balance fields; try MAX_INT, 0, -1, and overflow values +7. Time-boundary bugs cluster around midnight UTC, month-end, and DST transitions +8. Cache deception works best when the origin and CDN disagree on what constitutes a static resource +9. Use correlation IDs and timestamps in all test requests to prove ordering and causality +10. Document the exact timing window required to reproduce; edge cases must be repeatable to be actionable + +## Summary + +Edge cases exploit the gaps between components that each work correctly in isolation but fail under adversarial timing, ordering, or partial failure. Security must hold at every cache boundary, consistency window, failure mode, and numeric limit in the system. diff --git a/strix/skills/vulnerabilities/mfa_bypass.md b/strix/skills/vulnerabilities/mfa_bypass.md new file mode 100644 index 000000000..c94b10554 --- /dev/null +++ b/strix/skills/vulnerabilities/mfa_bypass.md @@ -0,0 +1,186 @@ +--- +name: mfa-bypass +description: MFA bypass testing for session fixation, code reuse, fallback abuse, and enrollment flow weaknesses +category: vulnerabilities +tags: [mfa, authentication, session-fixation, otp] +cwe: 308 +--- + +# MFA Bypass + +Multi-factor authentication failures allow attackers to skip or circumvent the second factor entirely. Focus on session state between authentication steps, code validation logic, fallback mechanisms, and enrollment flows. + +## Attack Surface + +**MFA Delivery Channels** +- TOTP (authenticator apps) +- SMS/voice OTP +- Email OTP/magic links +- Push notifications +- Hardware tokens (FIDO2/WebAuthn, U2F) +- Backup/recovery codes + +**Authentication Flow States** +- Pre-MFA (password verified, MFA pending) +- MFA challenge issued +- MFA verified +- MFA enrollment/setup +- MFA recovery/reset + +**Administrative Controls** +- MFA enforcement policies (org-wide, per-role) +- MFA disable/reset by admin or self-service +- Trusted device/remember-me logic + +## High-Value Targets + +- Pre-MFA session tokens and cookies that grant partial access +- MFA challenge endpoints and verification handlers +- Backup code generation and redemption +- MFA enrollment and unenrollment flows +- Trusted device registration and revocation +- Account recovery bypassing MFA (password reset, support flows) +- Admin endpoints for MFA reset on behalf of users + +## Reconnaissance + +### Session State Mapping + +- Capture cookies and tokens at each authentication step (pre-password, post-password/pre-MFA, post-MFA) +- Identify which endpoints are accessible at each state; look for partial session access +- Map session identifiers: do they change after MFA completion or persist from pre-MFA? +- Check for separate MFA challenge tokens (mfa_token, challenge_id, mfa_session) + +### MFA Configuration Discovery + +- Enumerate supported MFA methods per account and org +- Check if MFA is enforced server-side or only prompted client-side +- Identify fallback order: TOTP -> SMS -> backup codes -> support +- Look for MFA status in user profile APIs or JWT claims (mfa_verified, amr, acr) + +## Key Vulnerabilities + +### Session Fixation / Pre-MFA Access + +- **Pre-MFA session reuse**: After password verification, the session cookie or token may already grant access to some or all API endpoints without completing MFA +- **Session ID persistence**: If the session ID does not rotate after MFA completion, an attacker who captures the pre-MFA session can wait for the victim to complete MFA and inherit the authenticated session +- **Parallel session abuse**: Start login in session A, complete MFA in session B; check if session A is now fully authenticated + +### Code Validation Flaws + +- **Code reuse**: Submit a valid OTP code multiple times; server should invalidate after first use +- **Brute force**: No rate limiting or lockout on MFA code submission; 6-digit TOTP has only 1M possibilities +- **Time window abuse**: TOTP codes valid for extended windows (multiple 30s periods); try codes from adjacent time steps +- **Race conditions**: Submit multiple valid codes simultaneously to bypass single-use enforcement +- **Code leakage**: OTP codes reflected in responses, error messages, or logs +- **Predictable codes**: SMS/email codes using weak random number generators + +### Fallback and Recovery Abuse + +- **Backup code weaknesses**: Backup codes not rate-limited, not invalidated after use, or regeneratable without MFA +- **Method downgrade**: Force fallback from TOTP to SMS by claiming device unavailable; SMS interception is easier +- **Recovery flow bypass**: Account recovery (password reset, support ticket) does not require MFA, effectively bypassing it +- **Remember-me token theft**: Trusted device tokens stored insecurely, valid indefinitely, or not bound to device fingerprint + +### Enrollment Flow Weaknesses + +- **Enrollment without current MFA**: Add a new MFA method without verifying the existing one +- **TOTP secret exposure**: Secret key visible in API responses after initial enrollment; re-enrollment leaks new secret without invalidating old +- **Unenrollment without MFA**: Remove MFA method via API without proving possession of the factor +- **Race during enrollment**: Complete enrollment from two sessions simultaneously to register attacker-controlled device + +### State Machine Abuse + +- **Step skipping**: Call the post-MFA endpoint directly without completing the MFA challenge +- **Step repetition**: Replay the MFA success response to re-authenticate without a new code +- **Cross-flow confusion**: Use a password reset token or email verification flow to satisfy MFA requirements +- **Downgrade to single factor**: Modify client request to indicate MFA is not enabled for the account + +### Token and Claim Manipulation + +- **JWT amr/acr claims**: If MFA status is stored in JWT claims, modify them when signature verification is weak +- **MFA status in cookies**: Flip mfa_verified=true in a cookie or local storage value if server trusts client state +- **OAuth scope abuse**: Request tokens with scopes that bypass MFA checks on certain endpoints + +## Bypass Techniques + +- Content-type switching to hit alternate validation code paths +- Manipulate request flow: intercept redirect after password, modify to skip MFA challenge page +- Use API endpoints directly instead of UI flow to avoid client-side MFA enforcement +- Exploit inconsistent MFA enforcement between web, mobile, and API channels +- Abuse password reset or magic link flows that authenticate without MFA + +## Special Contexts + +### OAuth/OIDC Integration + +- MFA enforced at IdP but relying party accepts tokens without checking amr/acr claims +- Step-up authentication not triggered for sensitive operations +- Federated logins bypassing org MFA policy (social login without MFA) + +### Mobile Applications + +- Biometric prompt bypassed by hooking native APIs +- MFA state cached locally; modify app storage to skip challenge +- Push notification MFA: fatigue attacks (repeated prompts until user approves) + +### API Keys and Service Accounts + +- API keys bypass MFA entirely since they are single-factor by design +- Service account tokens not subject to MFA policy enforcement + +## Chaining Attacks + +- Session fixation + MFA bypass: fix pre-MFA session, wait for victim to complete MFA +- XSS + MFA bypass: steal pre-MFA cookies and access endpoints that do not enforce MFA completion +- CSRF + MFA unenrollment: force victim to disable MFA via cross-site request +- Account recovery + MFA bypass: reset password without MFA, then login without second factor + +## Testing Methodology + +1. **Map authentication states** - Document session tokens/cookies at each step; identify what changes after MFA +2. **Test pre-MFA access** - Try accessing protected resources with pre-MFA session tokens +3. **Validate code handling** - Test reuse, brute force, timing windows, and race conditions on OTP submission +4. **Probe fallbacks** - Attempt method downgrade, backup code abuse, recovery flow bypass +5. **Test enrollment** - Add/remove MFA methods without proper verification +6. **Cross-channel** - Verify MFA enforcement is consistent across web, mobile, and API +7. **Token inspection** - Check if MFA status is in JWT/cookie and if it can be manipulated + +## Validation + +1. Show access to protected resources using only a pre-MFA session (no second factor provided) +2. Demonstrate OTP code reuse or brute force resulting in successful MFA completion +3. Prove MFA can be disabled or a new method enrolled without verifying the existing factor +4. Show cross-channel inconsistency where one channel enforces MFA and another does not +5. Provide side-by-side evidence of normal MFA flow vs bypassed flow with the same account + +## False Positives + +- Remember-me functionality working as designed with proper device binding and expiry +- Step-up auth correctly gating sensitive operations while allowing read-only access pre-MFA +- Backup codes functioning as intended with single-use enforcement and rate limiting +- Account recovery requiring identity verification equivalent to MFA (e.g., identity document) + +## Impact + +- Full account takeover without possession of the second factor +- Bypass of compliance requirements (PCI DSS, SOC2, HIPAA) mandating MFA +- Persistent access after credential theft since MFA was the last line of defense +- Lateral movement in organizations where MFA is the primary access control + +## Pro Tips + +1. Always map the full session lifecycle; the gap between password verification and MFA completion is the primary attack surface +2. Test OTP validation with codes from adjacent time windows (T-1, T+1, T+2 for TOTP) +3. Check if pre-MFA sessions have any API access; even read-only access is a finding +4. Probe MFA enforcement at the service layer, not just the gateway/middleware +5. Try completing MFA on one session while accessing resources on a parallel pre-MFA session +6. Look for MFA status in JWTs and cookies; client-side MFA gates are common +7. Test account recovery and password reset flows separately; they often skip MFA +8. Push notification MFA is vulnerable to fatigue attacks; document the prompt rate and lockout behavior +9. Check if API keys or service tokens bypass MFA policy for the same account +10. Verify that MFA unenrollment requires the current factor, not just a password + +## Summary + +MFA security depends on the integrity of the entire authentication state machine, not just the code verification step. If any transition allows skipping the second factor, or if pre-MFA sessions grant meaningful access, the MFA implementation is broken. diff --git a/tests/skills/__init__.py b/tests/skills/__init__.py new file mode 100644 index 000000000..8024c8a47 --- /dev/null +++ b/tests/skills/__init__.py @@ -0,0 +1 @@ +# Skills test package diff --git a/tests/skills/test_skills.py b/tests/skills/test_skills.py new file mode 100644 index 000000000..0702403c0 --- /dev/null +++ b/tests/skills/test_skills.py @@ -0,0 +1,373 @@ +"""Tests for the Strix skills module: loading, validation, frontmatter, and new skill files.""" + +import re +from pathlib import Path + +import pytest + +from strix.skills import ( + _get_all_categories, + generate_skills_description, + get_all_skill_names, + get_available_skills, + load_skills, + validate_skill_names, +) +from strix.utils.resource_paths import get_strix_resource_path + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +SKILLS_DIR = get_strix_resource_path("skills") +FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL) +REQUIRED_SECTIONS = {"Attack Surface", "Key Vulnerabilities", "Testing Methodology", "Validation"} + +# New skills added in this branch +NEW_SKILLS = {"mfa_bypass", "edge_cases"} + + +def _read_skill_file(category: str, name: str) -> str: + """Return the raw text of a skill markdown file.""" + return (SKILLS_DIR / category / f"{name}.md").read_text(encoding="utf-8") + + +def _parse_frontmatter(raw: str) -> dict[str, str]: + """Extract frontmatter key-value pairs from a skill file.""" + match = FRONTMATTER_RE.match(raw) + if not match: + return {} + pairs: dict[str, str] = {} + for line in match.group(1).splitlines(): + if ":" in line: + key, _, value = line.partition(":") + pairs[key.strip()] = value.strip() + return pairs + + +# --------------------------------------------------------------------------- +# get_available_skills +# --------------------------------------------------------------------------- + + +class TestGetAvailableSkills: + """Tests for the get_available_skills function.""" + + def test_returns_dict(self) -> None: + result = get_available_skills() + assert isinstance(result, dict) + + def test_excludes_internal_categories(self) -> None: + result = get_available_skills() + assert "scan_modes" not in result + assert "coordination" not in result + + def test_vulnerabilities_category_present(self) -> None: + result = get_available_skills() + assert "vulnerabilities" in result + + def test_frameworks_category_present(self) -> None: + result = get_available_skills() + assert "frameworks" in result + + def test_skills_are_sorted(self) -> None: + for category, skills in get_available_skills().items(): + assert skills == sorted(skills), f"Skills in {category} are not sorted" + + def test_new_skills_appear_in_vulnerabilities(self) -> None: + """Verify mfa_bypass and edge_cases are discovered by the loader.""" + vuln_skills = get_available_skills().get("vulnerabilities", []) + for skill in NEW_SKILLS: + assert skill in vuln_skills, f"{skill} not found in vulnerabilities category" + + +# --------------------------------------------------------------------------- +# get_all_skill_names +# --------------------------------------------------------------------------- + + +class TestGetAllSkillNames: + """Tests for the get_all_skill_names function.""" + + def test_returns_set(self) -> None: + assert isinstance(get_all_skill_names(), set) + + def test_contains_new_skills(self) -> None: + all_names = get_all_skill_names() + for skill in NEW_SKILLS: + assert skill in all_names, f"{skill} missing from all skill names" + + def test_contains_existing_core_skills(self) -> None: + all_names = get_all_skill_names() + core = {"race_conditions", "business_logic", "authentication_jwt", "xss", "sql_injection"} + for skill in core: + assert skill in all_names, f"Core skill {skill} missing" + + +# --------------------------------------------------------------------------- +# validate_skill_names +# --------------------------------------------------------------------------- + + +class TestValidateSkillNames: + """Tests for the validate_skill_names function.""" + + def test_valid_skills_recognized(self) -> None: + result = validate_skill_names(["mfa_bypass", "edge_cases"]) + assert result["valid"] == ["mfa_bypass", "edge_cases"] + assert result["invalid"] == [] + + def test_invalid_skills_reported(self) -> None: + result = validate_skill_names(["nonexistent_skill_xyz"]) + assert result["valid"] == [] + assert "nonexistent_skill_xyz" in result["invalid"] + + def test_mixed_valid_and_invalid(self) -> None: + result = validate_skill_names(["mfa_bypass", "bogus", "edge_cases"]) + assert "mfa_bypass" in result["valid"] + assert "edge_cases" in result["valid"] + assert "bogus" in result["invalid"] + + def test_empty_input(self) -> None: + result = validate_skill_names([]) + assert result == {"valid": [], "invalid": []} + + +# --------------------------------------------------------------------------- +# load_skills +# --------------------------------------------------------------------------- + + +class TestLoadSkills: + """Tests for the load_skills function.""" + + def test_load_mfa_bypass(self) -> None: + loaded = load_skills(["mfa_bypass"]) + assert "mfa_bypass" in loaded + assert "# MFA Bypass" in loaded["mfa_bypass"] + + def test_load_edge_cases(self) -> None: + loaded = load_skills(["edge_cases"]) + assert "edge_cases" in loaded + assert "# Edge Cases" in loaded["edge_cases"] + + def test_frontmatter_stripped_on_load(self) -> None: + """Loaded content should have the YAML frontmatter removed.""" + for skill_name in NEW_SKILLS: + loaded = load_skills([skill_name]) + content = loaded[skill_name] + assert not content.startswith("---"), ( + f"Frontmatter not stripped from {skill_name}" + ) + + def test_load_nonexistent_returns_empty(self) -> None: + loaded = load_skills(["does_not_exist_abc"]) + assert "does_not_exist_abc" not in loaded + + def test_load_multiple_skills(self) -> None: + loaded = load_skills(["mfa_bypass", "edge_cases", "race_conditions"]) + assert len(loaded) == 3 + + def test_loaded_content_is_nonempty(self) -> None: + for skill_name in NEW_SKILLS: + loaded = load_skills([skill_name]) + assert len(loaded[skill_name].strip()) > 100, ( + f"{skill_name} content is suspiciously short" + ) + + +# --------------------------------------------------------------------------- +# generate_skills_description +# --------------------------------------------------------------------------- + + +class TestGenerateSkillsDescription: + """Tests for the generate_skills_description function.""" + + def test_returns_string(self) -> None: + desc = generate_skills_description() + assert isinstance(desc, str) + + def test_contains_available_skills_label(self) -> None: + desc = generate_skills_description() + assert "Available skills:" in desc + + def test_mentions_new_skills(self) -> None: + desc = generate_skills_description() + all_names = get_all_skill_names() + # The description includes a comma-separated list of all skill names. + for skill in NEW_SKILLS: + if skill in all_names: + assert skill.replace("_", "_") in desc + + +# --------------------------------------------------------------------------- +# _get_all_categories (internal, includes scan_modes/coordination) +# --------------------------------------------------------------------------- + + +class TestGetAllCategories: + """Tests for the internal _get_all_categories function.""" + + def test_includes_internal_categories(self) -> None: + cats = _get_all_categories() + # Should include categories that get_available_skills excludes + assert "vulnerabilities" in cats + + def test_returns_sorted_skills(self) -> None: + for category, skills in _get_all_categories().items(): + assert skills == sorted(skills), f"Skills in {category} not sorted" + + +# --------------------------------------------------------------------------- +# Frontmatter and content quality for new skills +# --------------------------------------------------------------------------- + + +class TestMfaBypassSkillContent: + """Content validation for the mfa_bypass skill file.""" + + @pytest.fixture + def raw(self) -> str: + return _read_skill_file("vulnerabilities", "mfa_bypass") + + @pytest.fixture + def frontmatter(self, raw: str) -> dict[str, str]: + return _parse_frontmatter(raw) + + def test_has_frontmatter(self, raw: str) -> None: + assert raw.startswith("---"), "Missing frontmatter delimiters" + + def test_frontmatter_name(self, frontmatter: dict[str, str]) -> None: + assert frontmatter.get("name") == "mfa-bypass" + + def test_frontmatter_description(self, frontmatter: dict[str, str]) -> None: + desc = frontmatter.get("description", "") + assert len(desc) > 10, "Description is too short" + + def test_frontmatter_has_cwe(self, frontmatter: dict[str, str]) -> None: + assert "cwe" in frontmatter + + def test_required_sections_present(self, raw: str) -> None: + for section in REQUIRED_SECTIONS: + assert f"## {section}" in raw or f"# {section}" in raw, ( + f"Missing section: {section}" + ) + + def test_covers_session_fixation(self, raw: str) -> None: + lower = raw.lower() + assert "session" in lower + assert "fixation" in lower + + def test_covers_otp_reuse(self, raw: str) -> None: + assert "code reuse" in raw.lower() or "otp" in raw.lower() + + def test_covers_fallback_abuse(self, raw: str) -> None: + assert "fallback" in raw.lower() or "recovery" in raw.lower() + + def test_covers_enrollment(self, raw: str) -> None: + assert "enrollment" in raw.lower() or "enroll" in raw.lower() + + def test_has_pro_tips(self, raw: str) -> None: + assert "## Pro Tips" in raw + + def test_has_summary(self, raw: str) -> None: + assert "## Summary" in raw + + +class TestEdgeCasesSkillContent: + """Content validation for the edge_cases skill file.""" + + @pytest.fixture + def raw(self) -> str: + return _read_skill_file("vulnerabilities", "edge_cases") + + @pytest.fixture + def frontmatter(self, raw: str) -> dict[str, str]: + return _parse_frontmatter(raw) + + def test_has_frontmatter(self, raw: str) -> None: + assert raw.startswith("---"), "Missing frontmatter delimiters" + + def test_frontmatter_name(self, frontmatter: dict[str, str]) -> None: + assert frontmatter.get("name") == "edge-cases" + + def test_frontmatter_description(self, frontmatter: dict[str, str]) -> None: + desc = frontmatter.get("description", "") + assert len(desc) > 10, "Description is too short" + + def test_required_sections_present(self, raw: str) -> None: + for section in REQUIRED_SECTIONS: + assert f"## {section}" in raw or f"# {section}" in raw, ( + f"Missing section: {section}" + ) + + def test_covers_cache_poisoning(self, raw: str) -> None: + assert "cache poisoning" in raw.lower() or "cache key" in raw.lower() + + def test_covers_partial_failures(self, raw: str) -> None: + assert "partial failure" in raw.lower() or "half-committed" in raw.lower() + + def test_covers_eventual_consistency(self, raw: str) -> None: + assert "eventual consistency" in raw.lower() + + def test_covers_boundary_conditions(self, raw: str) -> None: + assert "boundary" in raw.lower() or "integer" in raw.lower() + + def test_covers_graceful_degradation(self, raw: str) -> None: + assert "degradation" in raw.lower() or "fallback" in raw.lower() + + def test_has_pro_tips(self, raw: str) -> None: + assert "## Pro Tips" in raw + + def test_has_summary(self, raw: str) -> None: + assert "## Summary" in raw + + +# --------------------------------------------------------------------------- +# Structural validation across ALL skill files +# --------------------------------------------------------------------------- + + +class TestAllSkillFilesStructure: + """Verify every skill .md file in the repo has valid frontmatter and key sections.""" + + @pytest.fixture + def all_skill_files(self) -> list[Path]: + """Collect every .md skill file across all categories.""" + files = [] + for category_dir in SKILLS_DIR.iterdir(): + if category_dir.is_dir() and not category_dir.name.startswith("__"): + files.extend(category_dir.glob("*.md")) + return files + + def test_all_files_have_frontmatter(self, all_skill_files: list[Path]) -> None: + for path in all_skill_files: + content = path.read_text(encoding="utf-8") + assert content.startswith("---"), ( + f"{path.relative_to(SKILLS_DIR)} missing frontmatter" + ) + + def test_all_files_have_name_field(self, all_skill_files: list[Path]) -> None: + for path in all_skill_files: + fm = _parse_frontmatter(path.read_text(encoding="utf-8")) + assert "name" in fm, ( + f"{path.relative_to(SKILLS_DIR)} missing 'name' in frontmatter" + ) + + def test_all_files_have_description_field(self, all_skill_files: list[Path]) -> None: + for path in all_skill_files: + fm = _parse_frontmatter(path.read_text(encoding="utf-8")) + assert "description" in fm, ( + f"{path.relative_to(SKILLS_DIR)} missing 'description' in frontmatter" + ) + + def test_no_empty_skill_files(self, all_skill_files: list[Path]) -> None: + for path in all_skill_files: + content = path.read_text(encoding="utf-8") + # Strip frontmatter and check remaining content + body = FRONTMATTER_RE.sub("", content).strip() + assert len(body) > 50, ( + f"{path.relative_to(SKILLS_DIR)} has insufficient content ({len(body)} chars)" + ) From 214813561b77f22f69aeefe505b1757a600b6cad Mon Sep 17 00:00:00 2001 From: fresh3nough <251773092+fresh3nough@users.noreply.github.com> Date: Fri, 6 Mar 2026 17:26:28 +0000 Subject: [PATCH 2/2] fix: address review feedback in test_skills.py - Remove no-op replace('_', '_') in test_mentions_new_skills - Add scan_modes and coordination assertions in test_includes_internal_categories - Add test_frontmatter_has_cwe to TestEdgeCasesSkillContent for parity Signed-off-by: fresh3nough <251773092+fresh3nough@users.noreply.github.com> --- tests/skills/test_skills.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/skills/test_skills.py b/tests/skills/test_skills.py index 0702403c0..7d7853394 100644 --- a/tests/skills/test_skills.py +++ b/tests/skills/test_skills.py @@ -199,7 +199,7 @@ def test_mentions_new_skills(self) -> None: # The description includes a comma-separated list of all skill names. for skill in NEW_SKILLS: if skill in all_names: - assert skill.replace("_", "_") in desc + assert skill in desc # --------------------------------------------------------------------------- @@ -214,6 +214,8 @@ def test_includes_internal_categories(self) -> None: cats = _get_all_categories() # Should include categories that get_available_skills excludes assert "vulnerabilities" in cats + assert "scan_modes" in cats, "scan_modes should be returned by _get_all_categories" + assert "coordination" in cats, "coordination should be returned by _get_all_categories" def test_returns_sorted_skills(self) -> None: for category, skills in _get_all_categories().items(): @@ -297,6 +299,9 @@ def test_frontmatter_description(self, frontmatter: dict[str, str]) -> None: desc = frontmatter.get("description", "") assert len(desc) > 10, "Description is too short" + def test_frontmatter_has_cwe(self, frontmatter: dict[str, str]) -> None: + assert "cwe" in frontmatter + def test_required_sections_present(self, raw: str) -> None: for section in REQUIRED_SECTIONS: assert f"## {section}" in raw or f"# {section}" in raw, (