From 5ef1fb2a3cf476fa3d0edb481cdddd203a07c5da Mon Sep 17 00:00:00 2001
From: Dawnflare <129705403+Dawnflare@users.noreply.github.com>
Date: Mon, 30 Mar 2026 09:45:42 -0700
Subject: [PATCH 1/4] feat(video): add premium fullscreen transport controls
---
.agent/skills/adversarial_reviewer/SKILL.md | 74 ++
.../VERIFICATION_REPORT.md | 19 +
.../scripts/prepare_review.ps1 | 36 +
.../adversarial_reviewer/templates/README.md | 5 +
.agent/skills/code-review-checklist/SKILL.md | 444 +++++++++++
.agent/skills/git-pushing/SKILL.md | 33 +
.../git-pushing/scripts/smart_commit.sh | 19 +
.agent/skills/impasse_detector/SKILL.md | 81 ++
.../impasse_detector/VERIFICATION_REPORT.md | 26 +
.../examples/test_impasse.ps1 | 83 ++
.../scripts/detect_impasse.ps1 | 119 +++
.../impasse_detector/templates/README.md | 5 +
.agent/skills/kaizen/SKILL.md | 730 ++++++++++++++++++
.../systematic-debugging/CREATION-LOG.md | 119 +++
.agent/skills/systematic-debugging/SKILL.md | 296 +++++++
.../condition-based-waiting-example.ts | 158 ++++
.../condition-based-waiting.md | 115 +++
.../systematic-debugging/defense-in-depth.md | 122 +++
.../systematic-debugging/find-polluter.sh | 63 ++
.../root-cause-tracing.md | 169 ++++
.../systematic-debugging/test-academic.md | 14 +
.../systematic-debugging/test-pressure-1.md | 58 ++
.../systematic-debugging/test-pressure-2.md | 68 ++
.../systematic-debugging/test-pressure-3.md | 69 ++
documentation/Readme-FullScreenVideo.md | 63 ++
.../Controls/FullscreenVideoWindow.xaml | 162 ++++
.../Controls/FullscreenVideoWindow.xaml.cs | 346 +++++++++
.../Controls/VideoPlayerControl.xaml | 13 +-
.../Controls/VideoPlayerControl.xaml.cs | 77 +-
.../ExtraMetadataLoader.csproj | 9 +-
30 files changed, 3591 insertions(+), 4 deletions(-)
create mode 100644 .agent/skills/adversarial_reviewer/SKILL.md
create mode 100644 .agent/skills/adversarial_reviewer/VERIFICATION_REPORT.md
create mode 100644 .agent/skills/adversarial_reviewer/scripts/prepare_review.ps1
create mode 100644 .agent/skills/adversarial_reviewer/templates/README.md
create mode 100644 .agent/skills/code-review-checklist/SKILL.md
create mode 100644 .agent/skills/git-pushing/SKILL.md
create mode 100644 .agent/skills/git-pushing/scripts/smart_commit.sh
create mode 100644 .agent/skills/impasse_detector/SKILL.md
create mode 100644 .agent/skills/impasse_detector/VERIFICATION_REPORT.md
create mode 100644 .agent/skills/impasse_detector/examples/test_impasse.ps1
create mode 100644 .agent/skills/impasse_detector/scripts/detect_impasse.ps1
create mode 100644 .agent/skills/impasse_detector/templates/README.md
create mode 100644 .agent/skills/kaizen/SKILL.md
create mode 100644 .agent/skills/systematic-debugging/CREATION-LOG.md
create mode 100644 .agent/skills/systematic-debugging/SKILL.md
create mode 100644 .agent/skills/systematic-debugging/condition-based-waiting-example.ts
create mode 100644 .agent/skills/systematic-debugging/condition-based-waiting.md
create mode 100644 .agent/skills/systematic-debugging/defense-in-depth.md
create mode 100644 .agent/skills/systematic-debugging/find-polluter.sh
create mode 100644 .agent/skills/systematic-debugging/root-cause-tracing.md
create mode 100644 .agent/skills/systematic-debugging/test-academic.md
create mode 100644 .agent/skills/systematic-debugging/test-pressure-1.md
create mode 100644 .agent/skills/systematic-debugging/test-pressure-2.md
create mode 100644 .agent/skills/systematic-debugging/test-pressure-3.md
create mode 100644 documentation/Readme-FullScreenVideo.md
create mode 100644 source/Generic/ExtraMetadataLoader/Controls/FullscreenVideoWindow.xaml
create mode 100644 source/Generic/ExtraMetadataLoader/Controls/FullscreenVideoWindow.xaml.cs
diff --git a/.agent/skills/adversarial_reviewer/SKILL.md b/.agent/skills/adversarial_reviewer/SKILL.md
new file mode 100644
index 0000000000..c8805795b9
--- /dev/null
+++ b/.agent/skills/adversarial_reviewer/SKILL.md
@@ -0,0 +1,74 @@
+---
+name: Adversarial Reviewer
+description: Generates a 'Red Team' critique of recent code or plans to identify weak assumptions and edge cases.
+version: 1.0.0
+author: Antigravity Skills Library
+created: 2026-01-16
+leverage_score: 5/5
+---
+
+# SKILL-019: Adversarial Reviewer
+
+## Overview
+
+Forces a context switch from "Builder" to "Attacker". Ideally used before finalizing any critical component (e.g. auth, payments, file I/O). It prepares a **structured prompt packet** that the agent then uses to critique its own work.
+
+## Trigger Phrases
+
+- `red team this`
+- `adversarial review`
+- `find bugs`
+- `critique code`
+
+## Inputs
+
+| Parameter | Type | Required | Default | Description |
+|-----------|------|----------|---------|-------------|
+| `--FilePath` | string | Yes | - | Path to the file to review |
+| `--Mode` | string | No | `Security` | `Security`, `Performance`, or `Logic` |
+
+## Outputs
+
+### 1. Structured Prompt (Markdown)
+
+A prompt template pre-filled with the code and specific attack vectors. **The Agent must then "simulate" the adversary by responding to this prompt.**
+
+```markdown
+# โ๏ธ ADVERSARIAL REVIEW REQUEST
+**TARGET:** src/Auth.cs
+**MODE:** SECURITY
+
+## INSTRUCTIONS
+You are now the ADVERSARY. Break this code.
+Look for:
+1. Race Conditions
+2. Replay Attacks
+...
+```
+
+## Preconditions
+
+1. Target file exists.
+2. PowerShell 5.1+ or Core 7+.
+
+## Safety/QA Checks
+
+1. **Read-Only**: Does not modify the code; only reads it.
+2. **Size Limit**: If file is massive (>10k lines), warns or truncates to prevent token overflow.
+
+## Stop Conditions
+
+| Condition | Action |
+|-----------|--------|
+| File not found | Return error |
+
+## Implementation
+
+See `scripts/prepare_review.ps1`.
+
+## Integration with Other Skills
+
+1. Call `prepare_review.ps1`.
+2. **Agent Step**: Read the output text.
+3. **Agent Step**: "Thinking" block -> Process the critique.
+4. **Agent Step**: Generate list of fixes.
diff --git a/.agent/skills/adversarial_reviewer/VERIFICATION_REPORT.md b/.agent/skills/adversarial_reviewer/VERIFICATION_REPORT.md
new file mode 100644
index 0000000000..adf7083f85
--- /dev/null
+++ b/.agent/skills/adversarial_reviewer/VERIFICATION_REPORT.md
@@ -0,0 +1,19 @@
+# ๐งช Verification Report: SKILL-019 (Adversarial Reviewer)
+**Date:** 01/16/2026 03:13:04
+
+## Test Cases
+
+### 1. Security Mode
+**Input:** code_stub.cs (Security)
+**Expected:** Prompt containing 'Injection/Sanitization'
+**Actual:** Found Keywords
+**Pass:** โ
+
+### 2. Performance Mode
+**Input:** code_stub.cs (Performance)
+**Expected:** Prompt containing 'Blocking I/O'
+**Actual:** Found Keywords
+**Pass:** โ
+
+## Summary
+**โ PASSED (100% Coverage)**
diff --git a/.agent/skills/adversarial_reviewer/scripts/prepare_review.ps1 b/.agent/skills/adversarial_reviewer/scripts/prepare_review.ps1
new file mode 100644
index 0000000000..ff60ee1f63
--- /dev/null
+++ b/.agent/skills/adversarial_reviewer/scripts/prepare_review.ps1
@@ -0,0 +1,36 @@
+<#
+.SYNOPSIS
+ Prepares a context packet for an adversarial code review.
+#>
+[CmdletBinding()]
+Param(
+ [Parameter(Mandatory = $true)][string]$FilePath,
+ [Parameter(Mandatory = $false)][string]$Mode = "Security"
+)
+
+$ErrorActionPreference = "Stop"
+
+if (-not (Test-Path $FilePath)) { Write-Error "File not found"; exit 1 }
+
+$Content = Get-Content -Path $FilePath -Raw
+
+$Focus = "General Quality"
+if ($Mode -eq "Security") { $Focus = "1. Injection/Sanitization`n2. Auth Bypasses`n3. Data Leaks`n4. DOS Vectors" }
+elseif ($Mode -eq "Performance") { $Focus = "1. O(n^2) loops`n2. Memory Leaks`n3. Blocking I/O`n4. Allocations" }
+elseif ($Mode -eq "Logic") { $Focus = "1. Off-by-one`n2. Null Refs`n3. Race Conditions" }
+
+$Template = @"
+# โ๏ธ ADVERSARIAL REVIEW REQUEST
+**TARGET:** $(Split-Path $FilePath -Leaf)
+**MODE:** $Mode
+
+### ๐ฏ FOCUS AREAS:
+$Focus
+
+## ๐ CODE ARTIFACT
+```
+$Content
+```
+"@
+
+Write-Output $Template
diff --git a/.agent/skills/adversarial_reviewer/templates/README.md b/.agent/skills/adversarial_reviewer/templates/README.md
new file mode 100644
index 0000000000..5773d67b5b
--- /dev/null
+++ b/.agent/skills/adversarial_reviewer/templates/README.md
@@ -0,0 +1,5 @@
+# Templates
+
+Place prompt templates (markdown), configuration files (json), or other static assets here.
+
+Scripts should load these files rather than hardcoding long strings.
diff --git a/.agent/skills/code-review-checklist/SKILL.md b/.agent/skills/code-review-checklist/SKILL.md
new file mode 100644
index 0000000000..a00a47b559
--- /dev/null
+++ b/.agent/skills/code-review-checklist/SKILL.md
@@ -0,0 +1,444 @@
+---
+name: code-review-checklist
+description: "Comprehensive checklist for conducting thorough code reviews covering functionality, security, performance, and maintainability"
+---
+
+# Code Review Checklist
+
+## Overview
+
+Provide a systematic checklist for conducting thorough code reviews. This skill helps reviewers ensure code quality, catch bugs, identify security issues, and maintain consistency across the codebase.
+
+## When to Use This Skill
+
+- Use when reviewing pull requests
+- Use when conducting code audits
+- Use when establishing code review standards for a team
+- Use when training new developers on code review practices
+- Use when you want to ensure nothing is missed in reviews
+- Use when creating code review documentation
+
+## How It Works
+
+### Step 1: Understand the Context
+
+Before reviewing code, I'll help you understand:
+- What problem does this code solve?
+- What are the requirements?
+- What files were changed and why?
+- Are there related issues or tickets?
+- What's the testing strategy?
+
+### Step 2: Review Functionality
+
+Check if the code works correctly:
+- Does it solve the stated problem?
+- Are edge cases handled?
+- Is error handling appropriate?
+- Are there any logical errors?
+- Does it match the requirements?
+
+### Step 3: Review Code Quality
+
+Assess code maintainability:
+- Is the code readable and clear?
+- Are names descriptive?
+- Is it properly structured?
+- Are functions/methods focused?
+- Is there unnecessary complexity?
+
+### Step 4: Review Security
+
+Check for security issues:
+- Are inputs validated?
+- Is sensitive data protected?
+- Are there SQL injection risks?
+- Is authentication/authorization correct?
+- Are dependencies secure?
+
+### Step 5: Review Performance
+
+Look for performance issues:
+- Are there unnecessary loops?
+- Is database access optimized?
+- Are there memory leaks?
+- Is caching used appropriately?
+- Are there N+1 query problems?
+
+### Step 6: Review Tests
+
+Verify test coverage:
+- Are there tests for new code?
+- Do tests cover edge cases?
+- Are tests meaningful?
+- Do all tests pass?
+- Is test coverage adequate?
+
+## Examples
+
+### Example 1: Functionality Review Checklist
+
+```markdown
+## Functionality Review
+
+### Requirements
+- [ ] Code solves the stated problem
+- [ ] All acceptance criteria are met
+- [ ] Edge cases are handled
+- [ ] Error cases are handled
+- [ ] User input is validated
+
+### Logic
+- [ ] No logical errors or bugs
+- [ ] Conditions are correct (no off-by-one errors)
+- [ ] Loops terminate correctly
+- [ ] Recursion has proper base cases
+- [ ] State management is correct
+
+### Error Handling
+- [ ] Errors are caught appropriately
+- [ ] Error messages are clear and helpful
+- [ ] Errors don't expose sensitive information
+- [ ] Failed operations are rolled back
+- [ ] Logging is appropriate
+
+### Example Issues to Catch:
+
+**โ Bad - Missing validation:**
+\`\`\`javascript
+function createUser(email, password) {
+ // No validation!
+ return db.users.create({ email, password });
+}
+\`\`\`
+
+**โ Good - Proper validation:**
+\`\`\`javascript
+function createUser(email, password) {
+ if (!email || !isValidEmail(email)) {
+ throw new Error('Invalid email address');
+ }
+ if (!password || password.length < 8) {
+ throw new Error('Password must be at least 8 characters');
+ }
+ return db.users.create({ email, password });
+}
+\`\`\`
+```
+
+### Example 2: Security Review Checklist
+
+```markdown
+## Security Review
+
+### Input Validation
+- [ ] All user inputs are validated
+- [ ] SQL injection is prevented (use parameterized queries)
+- [ ] XSS is prevented (escape output)
+- [ ] CSRF protection is in place
+- [ ] File uploads are validated (type, size, content)
+
+### Authentication & Authorization
+- [ ] Authentication is required where needed
+- [ ] Authorization checks are present
+- [ ] Passwords are hashed (never stored plain text)
+- [ ] Sessions are managed securely
+- [ ] Tokens expire appropriately
+
+### Data Protection
+- [ ] Sensitive data is encrypted
+- [ ] API keys are not hardcoded
+- [ ] Environment variables are used for secrets
+- [ ] Personal data follows privacy regulations
+- [ ] Database credentials are secure
+
+### Dependencies
+- [ ] No known vulnerable dependencies
+- [ ] Dependencies are up to date
+- [ ] Unnecessary dependencies are removed
+- [ ] Dependency versions are pinned
+
+### Example Issues to Catch:
+
+**โ Bad - SQL injection risk:**
+\`\`\`javascript
+const query = \`SELECT * FROM users WHERE email = '\${email}'\`;
+db.query(query);
+\`\`\`
+
+**โ Good - Parameterized query:**
+\`\`\`javascript
+const query = 'SELECT * FROM users WHERE email = $1';
+db.query(query, [email]);
+\`\`\`
+
+**โ Bad - Hardcoded secret:**
+\`\`\`javascript
+const API_KEY = 'sk_live_abc123xyz';
+\`\`\`
+
+**โ Good - Environment variable:**
+\`\`\`javascript
+const API_KEY = process.env.API_KEY;
+if (!API_KEY) {
+ throw new Error('API_KEY environment variable is required');
+}
+\`\`\`
+```
+
+### Example 3: Code Quality Review Checklist
+
+```markdown
+## Code Quality Review
+
+### Readability
+- [ ] Code is easy to understand
+- [ ] Variable names are descriptive
+- [ ] Function names explain what they do
+- [ ] Complex logic has comments
+- [ ] Magic numbers are replaced with constants
+
+### Structure
+- [ ] Functions are small and focused
+- [ ] Code follows DRY principle (Don't Repeat Yourself)
+- [ ] Proper separation of concerns
+- [ ] Consistent code style
+- [ ] No dead code or commented-out code
+
+### Maintainability
+- [ ] Code is modular and reusable
+- [ ] Dependencies are minimal
+- [ ] Changes are backwards compatible
+- [ ] Breaking changes are documented
+- [ ] Technical debt is noted
+
+### Example Issues to Catch:
+
+**โ Bad - Unclear naming:**
+\`\`\`javascript
+function calc(a, b, c) {
+ return a * b + c;
+}
+\`\`\`
+
+**โ Good - Descriptive naming:**
+\`\`\`javascript
+function calculateTotalPrice(quantity, unitPrice, tax) {
+ return quantity * unitPrice + tax;
+}
+\`\`\`
+
+**โ Bad - Function doing too much:**
+\`\`\`javascript
+function processOrder(order) {
+ // Validate order
+ if (!order.items) throw new Error('No items');
+
+ // Calculate total
+ let total = 0;
+ for (let item of order.items) {
+ total += item.price * item.quantity;
+ }
+
+ // Apply discount
+ if (order.coupon) {
+ total *= 0.9;
+ }
+
+ // Process payment
+ const payment = stripe.charge(total);
+
+ // Send email
+ sendEmail(order.email, 'Order confirmed');
+
+ // Update inventory
+ updateInventory(order.items);
+
+ return { orderId: order.id, total };
+}
+\`\`\`
+
+**โ Good - Separated concerns:**
+\`\`\`javascript
+function processOrder(order) {
+ validateOrder(order);
+ const total = calculateOrderTotal(order);
+ const payment = processPayment(total);
+ sendOrderConfirmation(order.email);
+ updateInventory(order.items);
+
+ return { orderId: order.id, total };
+}
+\`\`\`
+```
+
+## Best Practices
+
+### โ Do This
+
+- **Review Small Changes** - Smaller PRs are easier to review thoroughly
+- **Check Tests First** - Verify tests pass and cover new code
+- **Run the Code** - Test it locally when possible
+- **Ask Questions** - Don't assume, ask for clarification
+- **Be Constructive** - Suggest improvements, don't just criticize
+- **Focus on Important Issues** - Don't nitpick minor style issues
+- **Use Automated Tools** - Linters, formatters, security scanners
+- **Review Documentation** - Check if docs are updated
+- **Consider Performance** - Think about scale and efficiency
+- **Check for Regressions** - Ensure existing functionality still works
+
+### โ Don't Do This
+
+- **Don't Approve Without Reading** - Actually review the code
+- **Don't Be Vague** - Provide specific feedback with examples
+- **Don't Ignore Security** - Security issues are critical
+- **Don't Skip Tests** - Untested code will cause problems
+- **Don't Be Rude** - Be respectful and professional
+- **Don't Rubber Stamp** - Every review should add value
+- **Don't Review When Tired** - You'll miss important issues
+- **Don't Forget Context** - Understand the bigger picture
+
+## Complete Review Checklist
+
+### Pre-Review
+- [ ] Read the PR description and linked issues
+- [ ] Understand what problem is being solved
+- [ ] Check if tests pass in CI/CD
+- [ ] Pull the branch and run it locally
+
+### Functionality
+- [ ] Code solves the stated problem
+- [ ] Edge cases are handled
+- [ ] Error handling is appropriate
+- [ ] User input is validated
+- [ ] No logical errors
+
+### Security
+- [ ] No SQL injection vulnerabilities
+- [ ] No XSS vulnerabilities
+- [ ] Authentication/authorization is correct
+- [ ] Sensitive data is protected
+- [ ] No hardcoded secrets
+
+### Performance
+- [ ] No unnecessary database queries
+- [ ] No N+1 query problems
+- [ ] Efficient algorithms used
+- [ ] No memory leaks
+- [ ] Caching used appropriately
+
+### Code Quality
+- [ ] Code is readable and clear
+- [ ] Names are descriptive
+- [ ] Functions are focused and small
+- [ ] No code duplication
+- [ ] Follows project conventions
+
+### Tests
+- [ ] New code has tests
+- [ ] Tests cover edge cases
+- [ ] Tests are meaningful
+- [ ] All tests pass
+- [ ] Test coverage is adequate
+
+### Documentation
+- [ ] Code comments explain why, not what
+- [ ] API documentation is updated
+- [ ] README is updated if needed
+- [ ] Breaking changes are documented
+- [ ] Migration guide provided if needed
+
+### Git
+- [ ] Commit messages are clear
+- [ ] No merge conflicts
+- [ ] Branch is up to date with main
+- [ ] No unnecessary files committed
+- [ ] .gitignore is properly configured
+
+## Common Pitfalls
+
+### Problem: Missing Edge Cases
+**Symptoms:** Code works for happy path but fails on edge cases
+**Solution:** Ask "What if...?" questions
+- What if the input is null?
+- What if the array is empty?
+- What if the user is not authenticated?
+- What if the network request fails?
+
+### Problem: Security Vulnerabilities
+**Symptoms:** Code exposes security risks
+**Solution:** Use security checklist
+- Run security scanners (npm audit, Snyk)
+- Check OWASP Top 10
+- Validate all inputs
+- Use parameterized queries
+- Never trust user input
+
+### Problem: Poor Test Coverage
+**Symptoms:** New code has no tests or inadequate tests
+**Solution:** Require tests for all new code
+- Unit tests for functions
+- Integration tests for features
+- Edge case tests
+- Error case tests
+
+### Problem: Unclear Code
+**Symptoms:** Reviewer can't understand what code does
+**Solution:** Request improvements
+- Better variable names
+- Explanatory comments
+- Smaller functions
+- Clear structure
+
+## Review Comment Templates
+
+### Requesting Changes
+```markdown
+**Issue:** [Describe the problem]
+
+**Current code:**
+\`\`\`javascript
+// Show problematic code
+\`\`\`
+
+**Suggested fix:**
+\`\`\`javascript
+// Show improved code
+\`\`\`
+
+**Why:** [Explain why this is better]
+```
+
+### Asking Questions
+```markdown
+**Question:** [Your question]
+
+**Context:** [Why you're asking]
+
+**Suggestion:** [If you have one]
+```
+
+### Praising Good Code
+```markdown
+**Nice!** [What you liked]
+
+This is great because [explain why]
+```
+
+## Related Skills
+
+- `@requesting-code-review` - Prepare code for review
+- `@receiving-code-review` - Handle review feedback
+- `@systematic-debugging` - Debug issues found in review
+- `@test-driven-development` - Ensure code has tests
+
+## Additional Resources
+
+- [Google Code Review Guidelines](https://google.github.io/eng-practices/review/)
+- [OWASP Top 10](https://owasp.org/www-project-top-ten/)
+- [Code Review Best Practices](https://github.com/thoughtbot/guides/tree/main/code-review)
+- [How to Review Code](https://www.kevinlondon.com/2015/05/05/code-review-best-practices.html)
+
+---
+
+**Pro Tip:** Use a checklist template for every review to ensure consistency and thoroughness. Customize it for your team's specific needs!
diff --git a/.agent/skills/git-pushing/SKILL.md b/.agent/skills/git-pushing/SKILL.md
new file mode 100644
index 0000000000..218f88e41b
--- /dev/null
+++ b/.agent/skills/git-pushing/SKILL.md
@@ -0,0 +1,33 @@
+---
+name: git-pushing
+description: Stage, commit, and push git changes with conventional commit messages. Use when user wants to commit and push changes, mentions pushing to remote, or asks to save and push their work. Also activates when user says "push changes", "commit and push", "push this", "push to github", or similar git workflow requests.
+---
+
+# Git Push Workflow
+
+Stage all changes, create a conventional commit, and push to the remote branch.
+
+## When to Use
+
+Automatically activate when the user:
+
+- Explicitly asks to push changes ("push this", "commit and push")
+- Mentions saving work to remote ("save to github", "push to remote")
+- Completes a feature and wants to share it
+- Says phrases like "let's push this up" or "commit these changes"
+
+## Workflow
+
+**ALWAYS use the script** - do NOT use manual git commands:
+
+```bash
+bash skills/git-pushing/scripts/smart_commit.sh
+```
+
+With custom message:
+
+```bash
+bash skills/git-pushing/scripts/smart_commit.sh "feat: add feature"
+```
+
+Script handles: staging, conventional commit message, Claude footer, push with -u flag.
diff --git a/.agent/skills/git-pushing/scripts/smart_commit.sh b/.agent/skills/git-pushing/scripts/smart_commit.sh
new file mode 100644
index 0000000000..21299873ef
--- /dev/null
+++ b/.agent/skills/git-pushing/scripts/smart_commit.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+set -e
+
+# Default commit message if none provided
+MESSAGE="${1:-chore: update code}"
+
+# Add all changes
+git add .
+
+# Commit with the provided message
+git commit -m "$MESSAGE"
+
+# Get current branch name
+BRANCH=$(git rev-parse --abbrev-ref HEAD)
+
+# Push to remote, setting upstream if needed
+git push -u origin "$BRANCH"
+
+echo "โ Successfully pushed to $BRANCH"
diff --git a/.agent/skills/impasse_detector/SKILL.md b/.agent/skills/impasse_detector/SKILL.md
new file mode 100644
index 0000000000..07f6e8c5c1
--- /dev/null
+++ b/.agent/skills/impasse_detector/SKILL.md
@@ -0,0 +1,81 @@
+---
+name: Impasse Detector
+description: Detects when the agent is stuck in a reasoning loop or unproductive state by analyzing tool usage and sentiment patterns.
+version: 1.0.0
+author: Antigravity Skills Library
+created: 2026-01-16
+leverage_score: 5/5
+---
+
+# SKILL-017: Impasse Detector
+
+## Overview
+
+Critical **meta-cognitive skill** that acts as a circuit breaker for unproductive loops. It analyzes recent conversation history and tool outputs to detect "stuck" states, preventing token wastage on failing paths and forcing escalation or delegation.
+
+## Trigger Phrases
+
+- `check logic`
+- `am i stuck`
+- `detect loop`
+- `impasse check`
+
+## Inputs
+
+| Parameter | Type | Required | Default | Description |
+|-----------|------|----------|---------|-------------|
+| `--TranscriptPath` | string | No | $null | Path to conversation log/json |
+| `--Content` | string | No | $null | Direct string content to analyze |
+| `--Lookback` | int | No | 10 | Number of recent turns to analyze |
+
+## Outputs
+
+### 1. Analysis Result (JSON)
+
+```json
+{
+ "status": "IMPASSE",
+ "confidence": 0.95,
+ "reasons": [
+ "Apology loop detected (4 occurrences)",
+ "High frequency of file reads (6 in window)"
+ ],
+ "recommendation": "ESCALATE_TO_USER",
+ "score": 80
+}
+```
+
+### 2. Status Codes
+
+- `CLEAR`: No issues detected.
+- `IMPASSE`: Significant loop/blockage detected.
+- `UNKNOWN`: Insufficient data.
+
+## Preconditions
+
+1. Access to conversation history OR a provided transcript string.
+2. PowerShell 5.1+ or Core 7+.
+
+## Safety/QA Checks
+
+1. **Read-Only**: This skill only analyzes text; it does not modify state.
+2. **Fail-Safe**: If input is missing/malformed, defaults to "UNKNOWN" rather than crashing.
+
+## Stop Conditions
+
+| Condition | Action |
+|-----------|--------|
+| No input provided | Return status "UNKNOWN" (0 confidence) |
+| File not found | Return error JSON |
+
+## Implementation
+
+See `scripts/detect_impasse.ps1`.
+
+## Integration with Other Skills
+
+**All agent loops should:**
+
+1. Call SKILL-017 every 5-10 turns.
+2. If status is `IMPASSE`, trigger **SKILL-020 (Failure Postmortem)** AND **SKILL-010 (Async Feedback)**.
+3. If score > 90, stop execution and warn user.
diff --git a/.agent/skills/impasse_detector/VERIFICATION_REPORT.md b/.agent/skills/impasse_detector/VERIFICATION_REPORT.md
new file mode 100644
index 0000000000..14b4a002c9
--- /dev/null
+++ b/.agent/skills/impasse_detector/VERIFICATION_REPORT.md
@@ -0,0 +1,26 @@
+# ๐งช Verification Report: SKILL-017 (Impasse Detector)
+**Date:** 01/16/2026 02:59:23
+**Quality Gate:** 100% Pass Required
+
+## Test Cases
+
+### 1. Clean Path
+**Input:** Simple conversation.
+**Expected:** CLEAR
+**Actual:** CLEAR
+**Pass:** โ
+
+### 2. Mild Loop (Below Threshold)
+**Input:** 3 apologies.
+**Expected:** CLEAR (Score 40)
+**Actual:** CLEAR (Score 40)
+**Pass:** โ
+
+### 3. Severe Impasse (Critical)
+**Input:** 5 apologies, 5 retries.
+**Expected:** IMPASSE (Score 100)
+**Actual:** IMPASSE (Score 100)
+**Pass:** โ
+
+## Final Verdict
+**โ PASSED (100% Coverage)**
diff --git a/.agent/skills/impasse_detector/examples/test_impasse.ps1 b/.agent/skills/impasse_detector/examples/test_impasse.ps1
new file mode 100644
index 0000000000..cba6b91c44
--- /dev/null
+++ b/.agent/skills/impasse_detector/examples/test_impasse.ps1
@@ -0,0 +1,83 @@
+$ScriptPath = Join-Path $PSScriptRoot "..\scripts\detect_impasse.ps1"
+$ReportPath = Join-Path $PSScriptRoot "..\VERIFICATION_REPORT.md"
+
+# Case 1: Clean (Pass)
+$CleanInput = @"
+User: Check the file.
+Agent: Checks file.
+Agent: Everything looks good.
+User: Great, move on.
+"@
+
+# Case 2: Mild Impasse (Trigger Warning)
+# 3 apologies (Threshold > 2), 2 retries (Threshold > 2 not met) -> Score 40 (Clear/Pause)
+$MildInput = @"
+Agent: I apologize.
+Agent: I will fix.
+Agent: I apologize.
+Agent: Let me try.
+Agent: I apologize.
+"@
+
+# Case 3: Severe Impasse (Trigger Critical)
+# 5 apologies (Score 40+20=60), 5 retries (Score 30+10=40) -> Total 100
+$SevereInput = @"
+Agent: I apologize for the mistake.
+Agent: Unable to read file.
+Agent: I apologize.
+Agent: Let me fix this.
+Agent: I apologize.
+Agent: Retrying command.
+Agent: I apologize.
+Agent: Failed to execute.
+Agent: I apologize, complete failure.
+Agent: Will fix now.
+"@
+
+Write-Host "Running 100% Quality Gate Validation for SKILL-017..."
+
+$CleanResult = & $ScriptPath -Content $CleanInput | ConvertFrom-Json
+$MildResult = & $ScriptPath -Content $MildInput | ConvertFrom-Json
+$SevereResult = & $ScriptPath -Content $SevereInput | ConvertFrom-Json
+
+$PassClean = $CleanResult.status -eq "CLEAR"
+# Mild might be CLEAR or IMPASSE depending on score, but let's check score logic.
+# 3 apologies = 40 pts. < 50 threshold. Should be CLEAR.
+$PassMild = $MildResult.status -eq "CLEAR" -and $MildResult.score -eq 40
+
+$PassSevere = $SevereResult.status -eq "IMPASSE" -and $SevereResult.score -ge 100
+
+$AllPassed = $PassClean -and $PassMild -and $PassSevere
+
+$Report = @"
+# ๐งช Verification Report: SKILL-017 (Impasse Detector)
+**Date:** $(Get-Date)
+**Quality Gate:** 100% Pass Required
+
+## Test Cases
+
+### 1. Clean Path
+**Input:** Simple conversation.
+**Expected:** CLEAR
+**Actual:** $($CleanResult.status)
+**Pass:** $(if($PassClean){"โ "}else{"โ"})
+
+### 2. Mild Loop (Below Threshold)
+**Input:** 3 apologies.
+**Expected:** CLEAR (Score 40)
+**Actual:** $($MildResult.status) (Score $($MildResult.score))
+**Pass:** $(if($PassMild){"โ "}else{"โ"})
+
+### 3. Severe Impasse (Critical)
+**Input:** 5 apologies, 5 retries.
+**Expected:** IMPASSE (Score 100)
+**Actual:** $($SevereResult.status) (Score $($SevereResult.score))
+**Pass:** $(if($PassSevere){"โ "}else{"โ"})
+
+## Final Verdict
+$(if($AllPassed){"**โ PASSED (100% Coverage)**"}else{"**โ FAILED**"})
+"@
+
+Set-Content -Path $ReportPath -Value $Report
+Write-Host "Report saved to $ReportPath"
+Write-Output $Report
diff --git a/.agent/skills/impasse_detector/scripts/detect_impasse.ps1 b/.agent/skills/impasse_detector/scripts/detect_impasse.ps1
new file mode 100644
index 0000000000..884027e542
--- /dev/null
+++ b/.agent/skills/impasse_detector/scripts/detect_impasse.ps1
@@ -0,0 +1,119 @@
+<#
+.SYNOPSIS
+ Detects reasoning impasses and loops in agent execution.
+.DESCRIPTION
+ Analyzes conversation transcripts or logs to identify repetitive patterns,
+ circular reasoning, or lack of progress (idempotent loops).
+.PARAMETER TranscriptPath
+ Path to the conversation log/json file.
+.PARAMETER Content
+ Direct string content to analyze.
+.PARAMETER Lookback
+ Number of recent turns to analyze. Default 10.
+#>
+
+[CmdletBinding()]
+Param(
+ [Parameter(Mandatory = $false)]
+ [string]$TranscriptPath,
+
+ [Parameter(Mandatory = $false)]
+ [string]$Content,
+
+ [int]$Lookback = 10
+)
+
+Set-StrictMode -Version Latest
+$ErrorActionPreference = "Stop"
+
+function Get-ImpasseScore {
+ param([string]$Text)
+
+ $Score = 0
+ $Reasons = @()
+
+ # 1. Check for "Apology Loop"
+ $ApologyMatches = [regex]::Matches($Text, "apologize|sorry|mistake|overlooked|confusion", "IgnoreCase")
+ if ($ApologyMatches.Count -gt 2) {
+ $Score += 40
+ if ($ApologyMatches.Count -gt 4) { $Score += 20 } # Bonus for severe looping
+ $Reasons += "Apology loop detected ($($ApologyMatches.Count) occurrences)"
+ }
+
+ # 2. Check for "Try Again" Loop / futile effort
+ # Broader regex to catch "I will fix", "retrying", "attempt 2", etc.
+ $RetryMatches = [regex]::Matches($Text, "try again|attempting|let me fix|will fix|correcting|retrying|unable to|failed to", "IgnoreCase")
+ if ($RetryMatches.Count -gt 2) {
+ $Score += 30
+ if ($RetryMatches.Count -gt 4) { $Score += 10 }
+ $Reasons += "Repetitive retry/failure pattern ($($RetryMatches.Count) occurrences)"
+ }
+
+ # 3. Check for repetitive file reads (Heuristic)
+ $ReadMatches = [regex]::Matches($Text, "(view_file|list_dir|read_resource|run_command)", "IgnoreCase")
+ if ($ReadMatches.Count -gt 6) {
+ $Score += 20
+ $Reasons += "High frequency of tool operations ($($ReadMatches.Count) in window)"
+ }
+
+ return @{ Score = $Score; Reasons = $Reasons }
+}
+
+try {
+ $AnalysisContent = ""
+
+ if ($TranscriptPath -and (Test-Path $TranscriptPath)) {
+ $AnalysisContent = Get-Content -Path $TranscriptPath -Raw
+ }
+ elseif ($Content) {
+ $AnalysisContent = $Content
+ }
+ else {
+ Write-Output (@{
+ status = "UNKNOWN"
+ confidence = 0
+ reason = "No input provided"
+ } | ConvertTo-Json)
+ exit 0
+ }
+
+ # Simple slice for lookback (approx 20 lines per turn)
+ $Lines = $AnalysisContent -split "`n"
+ if ($Lines.Count -gt ($Lookback * 20)) {
+ $StartIndex = $Lines.Count - ($Lookback * 20)
+ $AnalysisContent = ($Lines[$StartIndex..($Lines.Count - 1)]) -join "`n"
+ }
+
+ $Result = Get-ImpasseScore -Text $AnalysisContent
+
+ $Status = "CLEAR"
+ $Recommendation = "CONTINUE"
+
+ if ($Result.Score -ge 50) {
+ $Status = "IMPASSE"
+ $Recommendation = "ESCALATE_TO_USER"
+
+ if ($Result.Score -lt 80) {
+ $Recommendation = "PAUSE_AND_REFLECT"
+ }
+ }
+
+ $Output = @{
+ status = $Status
+ confidence = [Math]::Min(1.0, $Result.Score / 100)
+ reasons = $Result.Reasons
+ recommendation = $Recommendation
+ score = $Result.Score
+ timestamp = (Get-Date).ToString("o")
+ }
+
+ Write-Output ($Output | ConvertTo-Json -Depth 3)
+
+}
+catch {
+ Write-Output (@{
+ status = "ERROR"
+ error = $_.Exception.Message
+ } | ConvertTo-Json)
+ exit 1
+}
diff --git a/.agent/skills/impasse_detector/templates/README.md b/.agent/skills/impasse_detector/templates/README.md
new file mode 100644
index 0000000000..5773d67b5b
--- /dev/null
+++ b/.agent/skills/impasse_detector/templates/README.md
@@ -0,0 +1,5 @@
+# Templates
+
+Place prompt templates (markdown), configuration files (json), or other static assets here.
+
+Scripts should load these files rather than hardcoding long strings.
diff --git a/.agent/skills/kaizen/SKILL.md b/.agent/skills/kaizen/SKILL.md
new file mode 100644
index 0000000000..0f5d47c2bc
--- /dev/null
+++ b/.agent/skills/kaizen/SKILL.md
@@ -0,0 +1,730 @@
+---
+name: kaizen
+description: Guide for continuous improvement, error proofing, and standardization. Use this skill when the user wants to improve code quality, refactor, or discuss process improvements.
+---
+
+# Kaizen: Continuous Improvement
+
+## Overview
+
+Small improvements, continuously. Error-proof by design. Follow what works. Build only what's needed.
+
+**Core principle:** Many small improvements beat one big change. Prevent errors at design time, not with fixes.
+
+## When to Use
+
+**Always applied for:**
+
+- Code implementation and refactoring
+- Architecture and design decisions
+- Process and workflow improvements
+- Error handling and validation
+
+**Philosophy:** Quality through incremental progress and prevention, not perfection through massive effort.
+
+## The Four Pillars
+
+### 1. Continuous Improvement (Kaizen)
+
+Small, frequent improvements compound into major gains.
+
+#### Principles
+
+**Incremental over revolutionary:**
+
+- Make smallest viable change that improves quality
+- One improvement at a time
+- Verify each change before next
+- Build momentum through small wins
+
+**Always leave code better:**
+
+- Fix small issues as you encounter them
+- Refactor while you work (within scope)
+- Update outdated comments
+- Remove dead code when you see it
+
+**Iterative refinement:**
+
+- First version: make it work
+- Second pass: make it clear
+- Third pass: make it efficient
+- Don't try all three at once
+
+
+```typescript
+// Iteration 1: Make it work
+const calculateTotal = (items: Item[]) => {
+ let total = 0;
+ for (let i = 0; i < items.length; i++) {
+ total += items[i].price * items[i].quantity;
+ }
+ return total;
+};
+
+// Iteration 2: Make it clear (refactor)
+const calculateTotal = (items: Item[]): number => {
+return items.reduce((total, item) => {
+return total + (item.price \* item.quantity);
+}, 0);
+};
+
+// Iteration 3: Make it robust (add validation)
+const calculateTotal = (items: Item[]): number => {
+if (!items?.length) return 0;
+
+return items.reduce((total, item) => {
+if (item.price < 0 || item.quantity < 0) {
+throw new Error('Price and quantity must be non-negative');
+}
+return total + (item.price \* item.quantity);
+}, 0);
+};
+
+````
+Each step is complete, tested, and working
+
+
+
+```typescript
+// Trying to do everything at once
+const calculateTotal = (items: Item[]): number => {
+ // Validate, optimize, add features, handle edge cases all together
+ if (!items?.length) return 0;
+ const validItems = items.filter(item => {
+ if (item.price < 0) throw new Error('Negative price');
+ if (item.quantity < 0) throw new Error('Negative quantity');
+ return item.quantity > 0; // Also filtering zero quantities
+ });
+ // Plus caching, plus logging, plus currency conversion...
+ return validItems.reduce(...); // Too many concerns at once
+};
+````
+
+Overwhelming, error-prone, hard to verify
+
+
+#### In Practice
+
+**When implementing features:**
+
+1. Start with simplest version that works
+2. Add one improvement (error handling, validation, etc.)
+3. Test and verify
+4. Repeat if time permits
+5. Don't try to make it perfect immediately
+
+**When refactoring:**
+
+- Fix one smell at a time
+- Commit after each improvement
+- Keep tests passing throughout
+- Stop when "good enough" (diminishing returns)
+
+**When reviewing code:**
+
+- Suggest incremental improvements (not rewrites)
+- Prioritize: critical โ important โ nice-to-have
+- Focus on highest-impact changes first
+- Accept "better than before" even if not perfect
+
+### 2. Poka-Yoke (Error Proofing)
+
+Design systems that prevent errors at compile/design time, not runtime.
+
+#### Principles
+
+**Make errors impossible:**
+
+- Type system catches mistakes
+- Compiler enforces contracts
+- Invalid states unrepresentable
+- Errors caught early (left of production)
+
+**Design for safety:**
+
+- Fail fast and loudly
+- Provide helpful error messages
+- Make correct path obvious
+- Make incorrect path difficult
+
+**Defense in layers:**
+
+1. Type system (compile time)
+2. Validation (runtime, early)
+3. Guards (preconditions)
+4. Error boundaries (graceful degradation)
+
+#### Type System Error Proofing
+
+
+```typescript
+// Error: string status can be any value
+type OrderBad = {
+ status: string; // Can be "pending", "PENDING", "pnding", anything!
+ total: number;
+};
+
+// Good: Only valid states possible
+type OrderStatus = 'pending' | 'processing' | 'shipped' | 'delivered';
+type Order = {
+status: OrderStatus;
+total: number;
+};
+
+// Better: States with associated data
+type Order =
+| { status: 'pending'; createdAt: Date }
+| { status: 'processing'; startedAt: Date; estimatedCompletion: Date }
+| { status: 'shipped'; trackingNumber: string; shippedAt: Date }
+| { status: 'delivered'; deliveredAt: Date; signature: string };
+
+// Now impossible to have shipped without trackingNumber
+
+````
+Type system prevents entire classes of errors
+
+
+
+```typescript
+// Make invalid states unrepresentable
+type NonEmptyArray = [T, ...T[]];
+
+const firstItem = (items: NonEmptyArray): T => {
+ return items[0]; // Always safe, never undefined!
+};
+
+// Caller must prove array is non-empty
+const items: number[] = [1, 2, 3];
+if (items.length > 0) {
+ firstItem(items as NonEmptyArray); // Safe
+}
+````
+
+Function signature guarantees safety
+
+
+#### Validation Error Proofing
+
+
+```typescript
+// Error: Validation after use
+const processPayment = (amount: number) => {
+ const fee = amount * 0.03; // Used before validation!
+ if (amount <= 0) throw new Error('Invalid amount');
+ // ...
+};
+
+// Good: Validate immediately
+const processPayment = (amount: number) => {
+if (amount <= 0) {
+throw new Error('Payment amount must be positive');
+}
+if (amount > 10000) {
+throw new Error('Payment exceeds maximum allowed');
+}
+
+const fee = amount \* 0.03;
+// ... now safe to use
+};
+
+// Better: Validation at boundary with branded type
+type PositiveNumber = number & { readonly \_\_brand: 'PositiveNumber' };
+
+const validatePositive = (n: number): PositiveNumber => {
+if (n <= 0) throw new Error('Must be positive');
+return n as PositiveNumber;
+};
+
+const processPayment = (amount: PositiveNumber) => {
+// amount is guaranteed positive, no need to check
+const fee = amount \* 0.03;
+};
+
+// Validate at system boundary
+const handlePaymentRequest = (req: Request) => {
+const amount = validatePositive(req.body.amount); // Validate once
+processPayment(amount); // Use everywhere safely
+};
+
+````
+Validate once at boundary, safe everywhere else
+
+
+#### Guards and Preconditions
+
+
+```typescript
+// Early returns prevent deeply nested code
+const processUser = (user: User | null) => {
+ if (!user) {
+ logger.error('User not found');
+ return;
+ }
+
+ if (!user.email) {
+ logger.error('User email missing');
+ return;
+ }
+
+ if (!user.isActive) {
+ logger.info('User inactive, skipping');
+ return;
+ }
+
+ // Main logic here, guaranteed user is valid and active
+ sendEmail(user.email, 'Welcome!');
+};
+````
+
+Guards make assumptions explicit and enforced
+
+
+#### Configuration Error Proofing
+
+
+```typescript
+// Error: Optional config with unsafe defaults
+type ConfigBad = {
+ apiKey?: string;
+ timeout?: number;
+};
+
+const client = new APIClient({ timeout: 5000 }); // apiKey missing!
+
+// Good: Required config, fails early
+type Config = {
+apiKey: string;
+timeout: number;
+};
+
+const loadConfig = (): Config => {
+const apiKey = process.env.API_KEY;
+if (!apiKey) {
+throw new Error('API_KEY environment variable required');
+}
+
+return {
+apiKey,
+timeout: 5000,
+};
+};
+
+// App fails at startup if config invalid, not during request
+const config = loadConfig();
+const client = new APIClient(config);
+
+````
+Fail at startup, not in production
+
+
+#### In Practice
+
+**When designing APIs:**
+- Use types to constrain inputs
+- Make invalid states unrepresentable
+- Return Result instead of throwing
+- Document preconditions in types
+
+**When handling errors:**
+- Validate at system boundaries
+
+- Use guards for preconditions
+- Fail fast with clear messages
+- Log context for debugging
+
+**When configuring:**
+- Required over optional with defaults
+- Validate all config at startup
+- Fail deployment if config invalid
+- Don't allow partial configurations
+
+### 3. Standardized Work
+Follow established patterns. Document what works. Make good practices easy to follow.
+
+#### Principles
+
+**Consistency over cleverness:**
+- Follow existing codebase patterns
+- Don't reinvent solved problems
+- New pattern only if significantly better
+- Team agreement on new patterns
+
+**Documentation lives with code:**
+- README for setup and architecture
+- CLAUDE.md for AI coding conventions
+- Comments for "why", not "what"
+- Examples for complex patterns
+
+**Automate standards:**
+- Linters enforce style
+- Type checks enforce contracts
+- Tests verify behavior
+- CI/CD enforces quality gates
+
+#### Following Patterns
+
+
+```typescript
+// Existing codebase pattern for API clients
+class UserAPIClient {
+ async getUser(id: string): Promise {
+ return this.fetch(`/users/${id}`);
+ }
+}
+
+// New code follows the same pattern
+class OrderAPIClient {
+ async getOrder(id: string): Promise {
+ return this.fetch(`/orders/${id}`);
+ }
+}
+````
+
+Consistency makes codebase predictable
+
+
+
+```typescript
+// Existing pattern uses classes
+class UserAPIClient { /* ... */ }
+
+// New code introduces different pattern without discussion
+const getOrder = async (id: string): Promise => {
+// Breaking consistency "because I prefer functions"
+};
+
+````
+Inconsistency creates confusion
+
+
+#### Error Handling Patterns
+
+
+```typescript
+// Project standard: Result type for recoverable errors
+type Result = { ok: true; value: T } | { ok: false; error: E };
+
+// All services follow this pattern
+const fetchUser = async (id: string): Promise> => {
+ try {
+ const user = await db.users.findById(id);
+ if (!user) {
+ return { ok: false, error: new Error('User not found') };
+ }
+ return { ok: true, value: user };
+ } catch (err) {
+ return { ok: false, error: err as Error };
+ }
+};
+
+// Callers use consistent pattern
+const result = await fetchUser('123');
+if (!result.ok) {
+ logger.error('Failed to fetch user', result.error);
+ return;
+}
+const user = result.value; // Type-safe!
+````
+
+Standard pattern across codebase
+
+
+#### Documentation Standards
+
+
+```typescript
+/**
+ * Retries an async operation with exponential backoff.
+ *
+ * Why: Network requests fail temporarily; retrying improves reliability
+ * When to use: External API calls, database operations
+ * When not to use: User input validation, internal function calls
+ *
+ * @example
+ * const result = await retry(
+ * () => fetch('https://api.example.com/data'),
+ * { maxAttempts: 3, baseDelay: 1000 }
+ * );
+ */
+const retry = async (
+ operation: () => Promise,
+ options: RetryOptions
+): Promise => {
+ // Implementation...
+};
+```
+Documents why, when, and how
+
+
+#### In Practice
+
+**Before adding new patterns:**
+
+- Search codebase for similar problems solved
+- Check CLAUDE.md for project conventions
+- Discuss with team if breaking from pattern
+- Update docs when introducing new pattern
+
+**When writing code:**
+
+- Match existing file structure
+- Use same naming conventions
+- Follow same error handling approach
+- Import from same locations
+
+**When reviewing:**
+
+- Check consistency with existing code
+- Point to examples in codebase
+- Suggest aligning with standards
+- Update CLAUDE.md if new standard emerges
+
+### 4. Just-In-Time (JIT)
+
+Build what's needed now. No more, no less. Avoid premature optimization and over-engineering.
+
+#### Principles
+
+**YAGNI (You Aren't Gonna Need It):**
+
+- Implement only current requirements
+- No "just in case" features
+- No "we might need this later" code
+- Delete speculation
+
+**Simplest thing that works:**
+
+- Start with straightforward solution
+- Add complexity only when needed
+- Refactor when requirements change
+- Don't anticipate future needs
+
+**Optimize when measured:**
+
+- No premature optimization
+- Profile before optimizing
+- Measure impact of changes
+- Accept "good enough" performance
+
+#### YAGNI in Action
+
+
+```typescript
+// Current requirement: Log errors to console
+const logError = (error: Error) => {
+ console.error(error.message);
+};
+```
+Simple, meets current need
+
+
+
+```typescript
+// Over-engineered for "future needs"
+interface LogTransport {
+ write(level: LogLevel, message: string, meta?: LogMetadata): Promise;
+}
+
+class ConsoleTransport implements LogTransport { /_... _/ }
+class FileTransport implements LogTransport { /_ ... _/ }
+class RemoteTransport implements LogTransport { /_ ..._/ }
+
+class Logger {
+private transports: LogTransport[] = [];
+private queue: LogEntry[] = [];
+private rateLimiter: RateLimiter;
+private formatter: LogFormatter;
+
+// 200 lines of code for "maybe we'll need it"
+}
+
+const logError = (error: Error) => {
+Logger.getInstance().log('error', error.message);
+};
+
+````
+Building for imaginary future requirements
+
+
+**When to add complexity:**
+- Current requirement demands it
+- Pain points identified through use
+- Measured performance issues
+- Multiple use cases emerged
+
+
+```typescript
+// Start simple
+const formatCurrency = (amount: number): string => {
+ return `$${amount.toFixed(2)}`;
+};
+
+// Requirement evolves: support multiple currencies
+const formatCurrency = (amount: number, currency: string): string => {
+ const symbols = { USD: '$', EUR: 'โฌ', GBP: 'ยฃ' };
+ return `${symbols[currency]}${amount.toFixed(2)}`;
+};
+
+// Requirement evolves: support localization
+const formatCurrency = (amount: number, locale: string): string => {
+ return new Intl.NumberFormat(locale, {\n style: 'currency',
+ currency: locale === 'en-US' ? 'USD' : 'EUR',
+ }).format(amount);
+};
+````
+
+Complexity added only when needed
+
+
+#### Premature Abstraction
+
+
+```typescript
+// One use case, but building generic framework
+abstract class BaseCRUDService {
+ abstract getAll(): Promise;
+ abstract getById(id: string): Promise;
+ abstract create(data: Partial): Promise;
+ abstract update(id: string, data: Partial): Promise;
+ abstract delete(id: string): Promise;
+}
+
+class GenericRepository { /_300 lines _/ }
+class QueryBuilder { /_ 200 lines_/ }
+// ... building entire ORM for single table
+
+````
+Massive abstraction for uncertain future
+
+
+
+```typescript
+// Simple functions for current needs
+const getUsers = async (): Promise => {
+ return db.query('SELECT * FROM users');
+};
+
+const getUserById = async (id: string): Promise => {
+ return db.query('SELECT * FROM users WHERE id = $1', [id]);
+};
+
+// When pattern emerges across multiple entities, then abstract
+````
+
+Abstract only when pattern proven across 3+ cases
+
+
+#### Performance Optimization
+
+
+```typescript
+// Current: Simple approach
+const filterActiveUsers = (users: User[]): User[] => {
+ return users.filter(user => user.isActive);
+};
+
+// Benchmark shows: 50ms for 1000 users (acceptable)
+// โ Ship it, no optimization needed
+
+// Later: After profiling shows this is bottleneck
+// Then optimize with indexed lookup or caching
+
+````
+Optimize based on measurement, not assumptions
+
+
+
+```typescript
+// Premature optimization
+const filterActiveUsers = (users: User[]): User[] => {
+ // "This might be slow, so let's cache and index"
+ const cache = new WeakMap();
+ const indexed = buildBTreeIndex(users, 'isActive');
+ // 100 lines of optimization code
+ // Adds complexity, harder to maintain
+ // No evidence it was needed
+};\
+````
+
+Complex solution for unmeasured problem
+
+
+#### In Practice
+
+**When implementing:**
+
+- Solve the immediate problem
+- Use straightforward approach
+- Resist "what if" thinking
+- Delete speculative code
+
+**When optimizing:**
+
+- Profile first, optimize second
+- Measure before and after
+- Document why optimization needed
+- Keep simple version in tests
+
+**When abstracting:**
+
+- Wait for 3+ similar cases (Rule of Three)
+- Make abstraction as simple as possible
+- Prefer duplication over wrong abstraction
+- Refactor when pattern clear
+
+## Integration with Commands
+
+The Kaizen skill guides how you work. The commands provide structured analysis:
+
+- **`/why`**: Root cause analysis (5 Whys)
+- **`/cause-and-effect`**: Multi-factor analysis (Fishbone)
+- **`/plan-do-check-act`**: Iterative improvement cycles
+- **`/analyse-problem`**: Comprehensive documentation (A3)
+- **`/analyse`**: Smart method selection (Gemba/VSM/Muda)
+
+Use commands for structured problem-solving. Apply skill for day-to-day development.
+
+## Red Flags
+
+**Violating Continuous Improvement:**
+
+- "I'll refactor it later" (never happens)
+- Leaving code worse than you found it
+- Big bang rewrites instead of incremental
+
+**Violating Poka-Yoke:**
+
+- "Users should just be careful"
+- Validation after use instead of before
+- Optional config with no validation
+
+**Violating Standardized Work:**
+
+- "I prefer to do it my way"
+- Not checking existing patterns
+- Ignoring project conventions
+
+**Violating Just-In-Time:**
+
+- "We might need this someday"
+- Building frameworks before using them
+- Optimizing without measuring
+
+## Remember
+
+**Kaizen is about:**
+
+- Small improvements continuously
+- Preventing errors by design
+- Following proven patterns
+- Building only what's needed
+
+**Not about:**
+
+- Perfection on first try
+- Massive refactoring projects
+- Clever abstractions
+- Premature optimization
+
+**Mindset:** Good enough today, better tomorrow. Repeat.
diff --git a/.agent/skills/systematic-debugging/CREATION-LOG.md b/.agent/skills/systematic-debugging/CREATION-LOG.md
new file mode 100644
index 0000000000..024d00a5ee
--- /dev/null
+++ b/.agent/skills/systematic-debugging/CREATION-LOG.md
@@ -0,0 +1,119 @@
+# Creation Log: Systematic Debugging Skill
+
+Reference example of extracting, structuring, and bulletproofing a critical skill.
+
+## Source Material
+
+Extracted debugging framework from `/Users/jesse/.claude/CLAUDE.md`:
+- 4-phase systematic process (Investigation โ Pattern Analysis โ Hypothesis โ Implementation)
+- Core mandate: ALWAYS find root cause, NEVER fix symptoms
+- Rules designed to resist time pressure and rationalization
+
+## Extraction Decisions
+
+**What to include:**
+- Complete 4-phase framework with all rules
+- Anti-shortcuts ("NEVER fix symptom", "STOP and re-analyze")
+- Pressure-resistant language ("even if faster", "even if I seem in a hurry")
+- Concrete steps for each phase
+
+**What to leave out:**
+- Project-specific context
+- Repetitive variations of same rule
+- Narrative explanations (condensed to principles)
+
+## Structure Following skill-creation/SKILL.md
+
+1. **Rich when_to_use** - Included symptoms and anti-patterns
+2. **Type: technique** - Concrete process with steps
+3. **Keywords** - "root cause", "symptom", "workaround", "debugging", "investigation"
+4. **Flowchart** - Decision point for "fix failed" โ re-analyze vs add more fixes
+5. **Phase-by-phase breakdown** - Scannable checklist format
+6. **Anti-patterns section** - What NOT to do (critical for this skill)
+
+## Bulletproofing Elements
+
+Framework designed to resist rationalization under pressure:
+
+### Language Choices
+- "ALWAYS" / "NEVER" (not "should" / "try to")
+- "even if faster" / "even if I seem in a hurry"
+- "STOP and re-analyze" (explicit pause)
+- "Don't skip past" (catches the actual behavior)
+
+### Structural Defenses
+- **Phase 1 required** - Can't skip to implementation
+- **Single hypothesis rule** - Forces thinking, prevents shotgun fixes
+- **Explicit failure mode** - "IF your first fix doesn't work" with mandatory action
+- **Anti-patterns section** - Shows exactly what shortcuts look like
+
+### Redundancy
+- Root cause mandate in overview + when_to_use + Phase 1 + implementation rules
+- "NEVER fix symptom" appears 4 times in different contexts
+- Each phase has explicit "don't skip" guidance
+
+## Testing Approach
+
+Created 4 validation tests following skills/meta/testing-skills-with-subagents:
+
+### Test 1: Academic Context (No Pressure)
+- Simple bug, no time pressure
+- **Result:** Perfect compliance, complete investigation
+
+### Test 2: Time Pressure + Obvious Quick Fix
+- User "in a hurry", symptom fix looks easy
+- **Result:** Resisted shortcut, followed full process, found real root cause
+
+### Test 3: Complex System + Uncertainty
+- Multi-layer failure, unclear if can find root cause
+- **Result:** Systematic investigation, traced through all layers, found source
+
+### Test 4: Failed First Fix
+- Hypothesis doesn't work, temptation to add more fixes
+- **Result:** Stopped, re-analyzed, formed new hypothesis (no shotgun)
+
+**All tests passed.** No rationalizations found.
+
+## Iterations
+
+### Initial Version
+- Complete 4-phase framework
+- Anti-patterns section
+- Flowchart for "fix failed" decision
+
+### Enhancement 1: TDD Reference
+- Added link to skills/testing/test-driven-development
+- Note explaining TDD's "simplest code" โ debugging's "root cause"
+- Prevents confusion between methodologies
+
+## Final Outcome
+
+Bulletproof skill that:
+- โ Clearly mandates root cause investigation
+- โ Resists time pressure rationalization
+- โ Provides concrete steps for each phase
+- โ Shows anti-patterns explicitly
+- โ Tested under multiple pressure scenarios
+- โ Clarifies relationship to TDD
+- โ Ready for use
+
+## Key Insight
+
+**Most important bulletproofing:** Anti-patterns section showing exact shortcuts that feel justified in the moment. When Claude thinks "I'll just add this one quick fix", seeing that exact pattern listed as wrong creates cognitive friction.
+
+## Usage Example
+
+When encountering a bug:
+1. Load skill: skills/debugging/systematic-debugging
+2. Read overview (10 sec) - reminded of mandate
+3. Follow Phase 1 checklist - forced investigation
+4. If tempted to skip - see anti-pattern, stop
+5. Complete all phases - root cause found
+
+**Time investment:** 5-10 minutes
+**Time saved:** Hours of symptom-whack-a-mole
+
+---
+
+*Created: 2025-10-03*
+*Purpose: Reference example for skill extraction and bulletproofing*
diff --git a/.agent/skills/systematic-debugging/SKILL.md b/.agent/skills/systematic-debugging/SKILL.md
new file mode 100644
index 0000000000..111d2a98c2
--- /dev/null
+++ b/.agent/skills/systematic-debugging/SKILL.md
@@ -0,0 +1,296 @@
+---
+name: systematic-debugging
+description: Use when encountering any bug, test failure, or unexpected behavior, before proposing fixes
+---
+
+# Systematic Debugging
+
+## Overview
+
+Random fixes waste time and create new bugs. Quick patches mask underlying issues.
+
+**Core principle:** ALWAYS find root cause before attempting fixes. Symptom fixes are failure.
+
+**Violating the letter of this process is violating the spirit of debugging.**
+
+## The Iron Law
+
+```
+NO FIXES WITHOUT ROOT CAUSE INVESTIGATION FIRST
+```
+
+If you haven't completed Phase 1, you cannot propose fixes.
+
+## When to Use
+
+Use for ANY technical issue:
+- Test failures
+- Bugs in production
+- Unexpected behavior
+- Performance problems
+- Build failures
+- Integration issues
+
+**Use this ESPECIALLY when:**
+- Under time pressure (emergencies make guessing tempting)
+- "Just one quick fix" seems obvious
+- You've already tried multiple fixes
+- Previous fix didn't work
+- You don't fully understand the issue
+
+**Don't skip when:**
+- Issue seems simple (simple bugs have root causes too)
+- You're in a hurry (rushing guarantees rework)
+- Manager wants it fixed NOW (systematic is faster than thrashing)
+
+## The Four Phases
+
+You MUST complete each phase before proceeding to the next.
+
+### Phase 1: Root Cause Investigation
+
+**BEFORE attempting ANY fix:**
+
+1. **Read Error Messages Carefully**
+ - Don't skip past errors or warnings
+ - They often contain the exact solution
+ - Read stack traces completely
+ - Note line numbers, file paths, error codes
+
+2. **Reproduce Consistently**
+ - Can you trigger it reliably?
+ - What are the exact steps?
+ - Does it happen every time?
+ - If not reproducible โ gather more data, don't guess
+
+3. **Check Recent Changes**
+ - What changed that could cause this?
+ - Git diff, recent commits
+ - New dependencies, config changes
+ - Environmental differences
+
+4. **Gather Evidence in Multi-Component Systems**
+
+ **WHEN system has multiple components (CI โ build โ signing, API โ service โ database):**
+
+ **BEFORE proposing fixes, add diagnostic instrumentation:**
+ ```
+ For EACH component boundary:
+ - Log what data enters component
+ - Log what data exits component
+ - Verify environment/config propagation
+ - Check state at each layer
+
+ Run once to gather evidence showing WHERE it breaks
+ THEN analyze evidence to identify failing component
+ THEN investigate that specific component
+ ```
+
+ **Example (multi-layer system):**
+ ```bash
+ # Layer 1: Workflow
+ echo "=== Secrets available in workflow: ==="
+ echo "IDENTITY: ${IDENTITY:+SET}${IDENTITY:-UNSET}"
+
+ # Layer 2: Build script
+ echo "=== Env vars in build script: ==="
+ env | grep IDENTITY || echo "IDENTITY not in environment"
+
+ # Layer 3: Signing script
+ echo "=== Keychain state: ==="
+ security list-keychains
+ security find-identity -v
+
+ # Layer 4: Actual signing
+ codesign --sign "$IDENTITY" --verbose=4 "$APP"
+ ```
+
+ **This reveals:** Which layer fails (secrets โ workflow โ, workflow โ build โ)
+
+5. **Trace Data Flow**
+
+ **WHEN error is deep in call stack:**
+
+ See `root-cause-tracing.md` in this directory for the complete backward tracing technique.
+
+ **Quick version:**
+ - Where does bad value originate?
+ - What called this with bad value?
+ - Keep tracing up until you find the source
+ - Fix at source, not at symptom
+
+### Phase 2: Pattern Analysis
+
+**Find the pattern before fixing:**
+
+1. **Find Working Examples**
+ - Locate similar working code in same codebase
+ - What works that's similar to what's broken?
+
+2. **Compare Against References**
+ - If implementing pattern, read reference implementation COMPLETELY
+ - Don't skim - read every line
+ - Understand the pattern fully before applying
+
+3. **Identify Differences**
+ - What's different between working and broken?
+ - List every difference, however small
+ - Don't assume "that can't matter"
+
+4. **Understand Dependencies**
+ - What other components does this need?
+ - What settings, config, environment?
+ - What assumptions does it make?
+
+### Phase 3: Hypothesis and Testing
+
+**Scientific method:**
+
+1. **Form Single Hypothesis**
+ - State clearly: "I think X is the root cause because Y"
+ - Write it down
+ - Be specific, not vague
+
+2. **Test Minimally**
+ - Make the SMALLEST possible change to test hypothesis
+ - One variable at a time
+ - Don't fix multiple things at once
+
+3. **Verify Before Continuing**
+ - Did it work? Yes โ Phase 4
+ - Didn't work? Form NEW hypothesis
+ - DON'T add more fixes on top
+
+4. **When You Don't Know**
+ - Say "I don't understand X"
+ - Don't pretend to know
+ - Ask for help
+ - Research more
+
+### Phase 4: Implementation
+
+**Fix the root cause, not the symptom:**
+
+1. **Create Failing Test Case**
+ - Simplest possible reproduction
+ - Automated test if possible
+ - One-off test script if no framework
+ - MUST have before fixing
+ - Use the `superpowers:test-driven-development` skill for writing proper failing tests
+
+2. **Implement Single Fix**
+ - Address the root cause identified
+ - ONE change at a time
+ - No "while I'm here" improvements
+ - No bundled refactoring
+
+3. **Verify Fix**
+ - Test passes now?
+ - No other tests broken?
+ - Issue actually resolved?
+
+4. **If Fix Doesn't Work**
+ - STOP
+ - Count: How many fixes have you tried?
+ - If < 3: Return to Phase 1, re-analyze with new information
+ - **If โฅ 3: STOP and question the architecture (step 5 below)**
+ - DON'T attempt Fix #4 without architectural discussion
+
+5. **If 3+ Fixes Failed: Question Architecture**
+
+ **Pattern indicating architectural problem:**
+ - Each fix reveals new shared state/coupling/problem in different place
+ - Fixes require "massive refactoring" to implement
+ - Each fix creates new symptoms elsewhere
+
+ **STOP and question fundamentals:**
+ - Is this pattern fundamentally sound?
+ - Are we "sticking with it through sheer inertia"?
+ - Should we refactor architecture vs. continue fixing symptoms?
+
+ **Discuss with your human partner before attempting more fixes**
+
+ This is NOT a failed hypothesis - this is a wrong architecture.
+
+## Red Flags - STOP and Follow Process
+
+If you catch yourself thinking:
+- "Quick fix for now, investigate later"
+- "Just try changing X and see if it works"
+- "Add multiple changes, run tests"
+- "Skip the test, I'll manually verify"
+- "It's probably X, let me fix that"
+- "I don't fully understand but this might work"
+- "Pattern says X but I'll adapt it differently"
+- "Here are the main problems: [lists fixes without investigation]"
+- Proposing solutions before tracing data flow
+- **"One more fix attempt" (when already tried 2+)**
+- **Each fix reveals new problem in different place**
+
+**ALL of these mean: STOP. Return to Phase 1.**
+
+**If 3+ fixes failed:** Question the architecture (see Phase 4.5)
+
+## your human partner's Signals You're Doing It Wrong
+
+**Watch for these redirections:**
+- "Is that not happening?" - You assumed without verifying
+- "Will it show us...?" - You should have added evidence gathering
+- "Stop guessing" - You're proposing fixes without understanding
+- "Ultrathink this" - Question fundamentals, not just symptoms
+- "We're stuck?" (frustrated) - Your approach isn't working
+
+**When you see these:** STOP. Return to Phase 1.
+
+## Common Rationalizations
+
+| Excuse | Reality |
+|--------|---------|
+| "Issue is simple, don't need process" | Simple issues have root causes too. Process is fast for simple bugs. |
+| "Emergency, no time for process" | Systematic debugging is FASTER than guess-and-check thrashing. |
+| "Just try this first, then investigate" | First fix sets the pattern. Do it right from the start. |
+| "I'll write test after confirming fix works" | Untested fixes don't stick. Test first proves it. |
+| "Multiple fixes at once saves time" | Can't isolate what worked. Causes new bugs. |
+| "Reference too long, I'll adapt the pattern" | Partial understanding guarantees bugs. Read it completely. |
+| "I see the problem, let me fix it" | Seeing symptoms โ understanding root cause. |
+| "One more fix attempt" (after 2+ failures) | 3+ failures = architectural problem. Question pattern, don't fix again. |
+
+## Quick Reference
+
+| Phase | Key Activities | Success Criteria |
+|-------|---------------|------------------|
+| **1. Root Cause** | Read errors, reproduce, check changes, gather evidence | Understand WHAT and WHY |
+| **2. Pattern** | Find working examples, compare | Identify differences |
+| **3. Hypothesis** | Form theory, test minimally | Confirmed or new hypothesis |
+| **4. Implementation** | Create test, fix, verify | Bug resolved, tests pass |
+
+## When Process Reveals "No Root Cause"
+
+If systematic investigation reveals issue is truly environmental, timing-dependent, or external:
+
+1. You've completed the process
+2. Document what you investigated
+3. Implement appropriate handling (retry, timeout, error message)
+4. Add monitoring/logging for future investigation
+
+**But:** 95% of "no root cause" cases are incomplete investigation.
+
+## Supporting Techniques
+
+These techniques are part of systematic debugging and available in this directory:
+
+- **`root-cause-tracing.md`** - Trace bugs backward through call stack to find original trigger
+- **`defense-in-depth.md`** - Add validation at multiple layers after finding root cause
+- **`condition-based-waiting.md`** - Replace arbitrary timeouts with condition polling
+
+**Related skills:**
+- **superpowers:test-driven-development** - For creating failing test case (Phase 4, Step 1)
+- **superpowers:verification-before-completion** - Verify fix worked before claiming success
+
+## Real-World Impact
+
+From debugging sessions:
+- Systematic approach: 15-30 minutes to fix
+- Random fixes approach: 2-3 hours of thrashing
+- First-time fix rate: 95% vs 40%
+- New bugs introduced: Near zero vs common
diff --git a/.agent/skills/systematic-debugging/condition-based-waiting-example.ts b/.agent/skills/systematic-debugging/condition-based-waiting-example.ts
new file mode 100644
index 0000000000..703a06b653
--- /dev/null
+++ b/.agent/skills/systematic-debugging/condition-based-waiting-example.ts
@@ -0,0 +1,158 @@
+// Complete implementation of condition-based waiting utilities
+// From: Lace test infrastructure improvements (2025-10-03)
+// Context: Fixed 15 flaky tests by replacing arbitrary timeouts
+
+import type { ThreadManager } from '~/threads/thread-manager';
+import type { LaceEvent, LaceEventType } from '~/threads/types';
+
+/**
+ * Wait for a specific event type to appear in thread
+ *
+ * @param threadManager - The thread manager to query
+ * @param threadId - Thread to check for events
+ * @param eventType - Type of event to wait for
+ * @param timeoutMs - Maximum time to wait (default 5000ms)
+ * @returns Promise resolving to the first matching event
+ *
+ * Example:
+ * await waitForEvent(threadManager, agentThreadId, 'TOOL_RESULT');
+ */
+export function waitForEvent(
+ threadManager: ThreadManager,
+ threadId: string,
+ eventType: LaceEventType,
+ timeoutMs = 5000
+): Promise {
+ return new Promise((resolve, reject) => {
+ const startTime = Date.now();
+
+ const check = () => {
+ const events = threadManager.getEvents(threadId);
+ const event = events.find((e) => e.type === eventType);
+
+ if (event) {
+ resolve(event);
+ } else if (Date.now() - startTime > timeoutMs) {
+ reject(new Error(`Timeout waiting for ${eventType} event after ${timeoutMs}ms`));
+ } else {
+ setTimeout(check, 10); // Poll every 10ms for efficiency
+ }
+ };
+
+ check();
+ });
+}
+
+/**
+ * Wait for a specific number of events of a given type
+ *
+ * @param threadManager - The thread manager to query
+ * @param threadId - Thread to check for events
+ * @param eventType - Type of event to wait for
+ * @param count - Number of events to wait for
+ * @param timeoutMs - Maximum time to wait (default 5000ms)
+ * @returns Promise resolving to all matching events once count is reached
+ *
+ * Example:
+ * // Wait for 2 AGENT_MESSAGE events (initial response + continuation)
+ * await waitForEventCount(threadManager, agentThreadId, 'AGENT_MESSAGE', 2);
+ */
+export function waitForEventCount(
+ threadManager: ThreadManager,
+ threadId: string,
+ eventType: LaceEventType,
+ count: number,
+ timeoutMs = 5000
+): Promise {
+ return new Promise((resolve, reject) => {
+ const startTime = Date.now();
+
+ const check = () => {
+ const events = threadManager.getEvents(threadId);
+ const matchingEvents = events.filter((e) => e.type === eventType);
+
+ if (matchingEvents.length >= count) {
+ resolve(matchingEvents);
+ } else if (Date.now() - startTime > timeoutMs) {
+ reject(
+ new Error(
+ `Timeout waiting for ${count} ${eventType} events after ${timeoutMs}ms (got ${matchingEvents.length})`
+ )
+ );
+ } else {
+ setTimeout(check, 10);
+ }
+ };
+
+ check();
+ });
+}
+
+/**
+ * Wait for an event matching a custom predicate
+ * Useful when you need to check event data, not just type
+ *
+ * @param threadManager - The thread manager to query
+ * @param threadId - Thread to check for events
+ * @param predicate - Function that returns true when event matches
+ * @param description - Human-readable description for error messages
+ * @param timeoutMs - Maximum time to wait (default 5000ms)
+ * @returns Promise resolving to the first matching event
+ *
+ * Example:
+ * // Wait for TOOL_RESULT with specific ID
+ * await waitForEventMatch(
+ * threadManager,
+ * agentThreadId,
+ * (e) => e.type === 'TOOL_RESULT' && e.data.id === 'call_123',
+ * 'TOOL_RESULT with id=call_123'
+ * );
+ */
+export function waitForEventMatch(
+ threadManager: ThreadManager,
+ threadId: string,
+ predicate: (event: LaceEvent) => boolean,
+ description: string,
+ timeoutMs = 5000
+): Promise {
+ return new Promise((resolve, reject) => {
+ const startTime = Date.now();
+
+ const check = () => {
+ const events = threadManager.getEvents(threadId);
+ const event = events.find(predicate);
+
+ if (event) {
+ resolve(event);
+ } else if (Date.now() - startTime > timeoutMs) {
+ reject(new Error(`Timeout waiting for ${description} after ${timeoutMs}ms`));
+ } else {
+ setTimeout(check, 10);
+ }
+ };
+
+ check();
+ });
+}
+
+// Usage example from actual debugging session:
+//
+// BEFORE (flaky):
+// ---------------
+// const messagePromise = agent.sendMessage('Execute tools');
+// await new Promise(r => setTimeout(r, 300)); // Hope tools start in 300ms
+// agent.abort();
+// await messagePromise;
+// await new Promise(r => setTimeout(r, 50)); // Hope results arrive in 50ms
+// expect(toolResults.length).toBe(2); // Fails randomly
+//
+// AFTER (reliable):
+// ----------------
+// const messagePromise = agent.sendMessage('Execute tools');
+// await waitForEventCount(threadManager, threadId, 'TOOL_CALL', 2); // Wait for tools to start
+// agent.abort();
+// await messagePromise;
+// await waitForEventCount(threadManager, threadId, 'TOOL_RESULT', 2); // Wait for results
+// expect(toolResults.length).toBe(2); // Always succeeds
+//
+// Result: 60% pass rate โ 100%, 40% faster execution
diff --git a/.agent/skills/systematic-debugging/condition-based-waiting.md b/.agent/skills/systematic-debugging/condition-based-waiting.md
new file mode 100644
index 0000000000..70994f777c
--- /dev/null
+++ b/.agent/skills/systematic-debugging/condition-based-waiting.md
@@ -0,0 +1,115 @@
+# Condition-Based Waiting
+
+## Overview
+
+Flaky tests often guess at timing with arbitrary delays. This creates race conditions where tests pass on fast machines but fail under load or in CI.
+
+**Core principle:** Wait for the actual condition you care about, not a guess about how long it takes.
+
+## When to Use
+
+```dot
+digraph when_to_use {
+ "Test uses setTimeout/sleep?" [shape=diamond];
+ "Testing timing behavior?" [shape=diamond];
+ "Document WHY timeout needed" [shape=box];
+ "Use condition-based waiting" [shape=box];
+
+ "Test uses setTimeout/sleep?" -> "Testing timing behavior?" [label="yes"];
+ "Testing timing behavior?" -> "Document WHY timeout needed" [label="yes"];
+ "Testing timing behavior?" -> "Use condition-based waiting" [label="no"];
+}
+```
+
+**Use when:**
+- Tests have arbitrary delays (`setTimeout`, `sleep`, `time.sleep()`)
+- Tests are flaky (pass sometimes, fail under load)
+- Tests timeout when run in parallel
+- Waiting for async operations to complete
+
+**Don't use when:**
+- Testing actual timing behavior (debounce, throttle intervals)
+- Always document WHY if using arbitrary timeout
+
+## Core Pattern
+
+```typescript
+// โ BEFORE: Guessing at timing
+await new Promise(r => setTimeout(r, 50));
+const result = getResult();
+expect(result).toBeDefined();
+
+// โ AFTER: Waiting for condition
+await waitFor(() => getResult() !== undefined);
+const result = getResult();
+expect(result).toBeDefined();
+```
+
+## Quick Patterns
+
+| Scenario | Pattern |
+|----------|---------|
+| Wait for event | `waitFor(() => events.find(e => e.type === 'DONE'))` |
+| Wait for state | `waitFor(() => machine.state === 'ready')` |
+| Wait for count | `waitFor(() => items.length >= 5)` |
+| Wait for file | `waitFor(() => fs.existsSync(path))` |
+| Complex condition | `waitFor(() => obj.ready && obj.value > 10)` |
+
+## Implementation
+
+Generic polling function:
+```typescript
+async function waitFor(
+ condition: () => T | undefined | null | false,
+ description: string,
+ timeoutMs = 5000
+): Promise {
+ const startTime = Date.now();
+
+ while (true) {
+ const result = condition();
+ if (result) return result;
+
+ if (Date.now() - startTime > timeoutMs) {
+ throw new Error(`Timeout waiting for ${description} after ${timeoutMs}ms`);
+ }
+
+ await new Promise(r => setTimeout(r, 10)); // Poll every 10ms
+ }
+}
+```
+
+See `condition-based-waiting-example.ts` in this directory for complete implementation with domain-specific helpers (`waitForEvent`, `waitForEventCount`, `waitForEventMatch`) from actual debugging session.
+
+## Common Mistakes
+
+**โ Polling too fast:** `setTimeout(check, 1)` - wastes CPU
+**โ Fix:** Poll every 10ms
+
+**โ No timeout:** Loop forever if condition never met
+**โ Fix:** Always include timeout with clear error
+
+**โ Stale data:** Cache state before loop
+**โ Fix:** Call getter inside loop for fresh data
+
+## When Arbitrary Timeout IS Correct
+
+```typescript
+// Tool ticks every 100ms - need 2 ticks to verify partial output
+await waitForEvent(manager, 'TOOL_STARTED'); // First: wait for condition
+await new Promise(r => setTimeout(r, 200)); // Then: wait for timed behavior
+// 200ms = 2 ticks at 100ms intervals - documented and justified
+```
+
+**Requirements:**
+1. First wait for triggering condition
+2. Based on known timing (not guessing)
+3. Comment explaining WHY
+
+## Real-World Impact
+
+From debugging session (2025-10-03):
+- Fixed 15 flaky tests across 3 files
+- Pass rate: 60% โ 100%
+- Execution time: 40% faster
+- No more race conditions
diff --git a/.agent/skills/systematic-debugging/defense-in-depth.md b/.agent/skills/systematic-debugging/defense-in-depth.md
new file mode 100644
index 0000000000..e2483354dc
--- /dev/null
+++ b/.agent/skills/systematic-debugging/defense-in-depth.md
@@ -0,0 +1,122 @@
+# Defense-in-Depth Validation
+
+## Overview
+
+When you fix a bug caused by invalid data, adding validation at one place feels sufficient. But that single check can be bypassed by different code paths, refactoring, or mocks.
+
+**Core principle:** Validate at EVERY layer data passes through. Make the bug structurally impossible.
+
+## Why Multiple Layers
+
+Single validation: "We fixed the bug"
+Multiple layers: "We made the bug impossible"
+
+Different layers catch different cases:
+- Entry validation catches most bugs
+- Business logic catches edge cases
+- Environment guards prevent context-specific dangers
+- Debug logging helps when other layers fail
+
+## The Four Layers
+
+### Layer 1: Entry Point Validation
+**Purpose:** Reject obviously invalid input at API boundary
+
+```typescript
+function createProject(name: string, workingDirectory: string) {
+ if (!workingDirectory || workingDirectory.trim() === '') {
+ throw new Error('workingDirectory cannot be empty');
+ }
+ if (!existsSync(workingDirectory)) {
+ throw new Error(`workingDirectory does not exist: ${workingDirectory}`);
+ }
+ if (!statSync(workingDirectory).isDirectory()) {
+ throw new Error(`workingDirectory is not a directory: ${workingDirectory}`);
+ }
+ // ... proceed
+}
+```
+
+### Layer 2: Business Logic Validation
+**Purpose:** Ensure data makes sense for this operation
+
+```typescript
+function initializeWorkspace(projectDir: string, sessionId: string) {
+ if (!projectDir) {
+ throw new Error('projectDir required for workspace initialization');
+ }
+ // ... proceed
+}
+```
+
+### Layer 3: Environment Guards
+**Purpose:** Prevent dangerous operations in specific contexts
+
+```typescript
+async function gitInit(directory: string) {
+ // In tests, refuse git init outside temp directories
+ if (process.env.NODE_ENV === 'test') {
+ const normalized = normalize(resolve(directory));
+ const tmpDir = normalize(resolve(tmpdir()));
+
+ if (!normalized.startsWith(tmpDir)) {
+ throw new Error(
+ `Refusing git init outside temp dir during tests: ${directory}`
+ );
+ }
+ }
+ // ... proceed
+}
+```
+
+### Layer 4: Debug Instrumentation
+**Purpose:** Capture context for forensics
+
+```typescript
+async function gitInit(directory: string) {
+ const stack = new Error().stack;
+ logger.debug('About to git init', {
+ directory,
+ cwd: process.cwd(),
+ stack,
+ });
+ // ... proceed
+}
+```
+
+## Applying the Pattern
+
+When you find a bug:
+
+1. **Trace the data flow** - Where does bad value originate? Where used?
+2. **Map all checkpoints** - List every point data passes through
+3. **Add validation at each layer** - Entry, business, environment, debug
+4. **Test each layer** - Try to bypass layer 1, verify layer 2 catches it
+
+## Example from Session
+
+Bug: Empty `projectDir` caused `git init` in source code
+
+**Data flow:**
+1. Test setup โ empty string
+2. `Project.create(name, '')`
+3. `WorkspaceManager.createWorkspace('')`
+4. `git init` runs in `process.cwd()`
+
+**Four layers added:**
+- Layer 1: `Project.create()` validates not empty/exists/writable
+- Layer 2: `WorkspaceManager` validates projectDir not empty
+- Layer 3: `WorktreeManager` refuses git init outside tmpdir in tests
+- Layer 4: Stack trace logging before git init
+
+**Result:** All 1847 tests passed, bug impossible to reproduce
+
+## Key Insight
+
+All four layers were necessary. During testing, each layer caught bugs the others missed:
+- Different code paths bypassed entry validation
+- Mocks bypassed business logic checks
+- Edge cases on different platforms needed environment guards
+- Debug logging identified structural misuse
+
+**Don't stop at one validation point.** Add checks at every layer.
diff --git a/.agent/skills/systematic-debugging/find-polluter.sh b/.agent/skills/systematic-debugging/find-polluter.sh
new file mode 100644
index 0000000000..1d71c56077
--- /dev/null
+++ b/.agent/skills/systematic-debugging/find-polluter.sh
@@ -0,0 +1,63 @@
+#!/usr/bin/env bash
+# Bisection script to find which test creates unwanted files/state
+# Usage: ./find-polluter.sh
+# Example: ./find-polluter.sh '.git' 'src/**/*.test.ts'
+
+set -e
+
+if [ $# -ne 2 ]; then
+ echo "Usage: $0 "
+ echo "Example: $0 '.git' 'src/**/*.test.ts'"
+ exit 1
+fi
+
+POLLUTION_CHECK="$1"
+TEST_PATTERN="$2"
+
+echo "๐ Searching for test that creates: $POLLUTION_CHECK"
+echo "Test pattern: $TEST_PATTERN"
+echo ""
+
+# Get list of test files
+TEST_FILES=$(find . -path "$TEST_PATTERN" | sort)
+TOTAL=$(echo "$TEST_FILES" | wc -l | tr -d ' ')
+
+echo "Found $TOTAL test files"
+echo ""
+
+COUNT=0
+for TEST_FILE in $TEST_FILES; do
+ COUNT=$((COUNT + 1))
+
+ # Skip if pollution already exists
+ if [ -e "$POLLUTION_CHECK" ]; then
+ echo "โ ๏ธ Pollution already exists before test $COUNT/$TOTAL"
+ echo " Skipping: $TEST_FILE"
+ continue
+ fi
+
+ echo "[$COUNT/$TOTAL] Testing: $TEST_FILE"
+
+ # Run the test
+ npm test "$TEST_FILE" > /dev/null 2>&1 || true
+
+ # Check if pollution appeared
+ if [ -e "$POLLUTION_CHECK" ]; then
+ echo ""
+ echo "๐ฏ FOUND POLLUTER!"
+ echo " Test: $TEST_FILE"
+ echo " Created: $POLLUTION_CHECK"
+ echo ""
+ echo "Pollution details:"
+ ls -la "$POLLUTION_CHECK"
+ echo ""
+ echo "To investigate:"
+ echo " npm test $TEST_FILE # Run just this test"
+ echo " cat $TEST_FILE # Review test code"
+ exit 1
+ fi
+done
+
+echo ""
+echo "โ No polluter found - all tests clean!"
+exit 0
diff --git a/.agent/skills/systematic-debugging/root-cause-tracing.md b/.agent/skills/systematic-debugging/root-cause-tracing.md
new file mode 100644
index 0000000000..948477497b
--- /dev/null
+++ b/.agent/skills/systematic-debugging/root-cause-tracing.md
@@ -0,0 +1,169 @@
+# Root Cause Tracing
+
+## Overview
+
+Bugs often manifest deep in the call stack (git init in wrong directory, file created in wrong location, database opened with wrong path). Your instinct is to fix where the error appears, but that's treating a symptom.
+
+**Core principle:** Trace backward through the call chain until you find the original trigger, then fix at the source.
+
+## When to Use
+
+```dot
+digraph when_to_use {
+ "Bug appears deep in stack?" [shape=diamond];
+ "Can trace backwards?" [shape=diamond];
+ "Fix at symptom point" [shape=box];
+ "Trace to original trigger" [shape=box];
+ "BETTER: Also add defense-in-depth" [shape=box];
+
+ "Bug appears deep in stack?" -> "Can trace backwards?" [label="yes"];
+ "Can trace backwards?" -> "Trace to original trigger" [label="yes"];
+ "Can trace backwards?" -> "Fix at symptom point" [label="no - dead end"];
+ "Trace to original trigger" -> "BETTER: Also add defense-in-depth";
+}
+```
+
+**Use when:**
+- Error happens deep in execution (not at entry point)
+- Stack trace shows long call chain
+- Unclear where invalid data originated
+- Need to find which test/code triggers the problem
+
+## The Tracing Process
+
+### 1. Observe the Symptom
+```
+Error: git init failed in /Users/jesse/project/packages/core
+```
+
+### 2. Find Immediate Cause
+**What code directly causes this?**
+```typescript
+await execFileAsync('git', ['init'], { cwd: projectDir });
+```
+
+### 3. Ask: What Called This?
+```typescript
+WorktreeManager.createSessionWorktree(projectDir, sessionId)
+ โ called by Session.initializeWorkspace()
+ โ called by Session.create()
+ โ called by test at Project.create()
+```
+
+### 4. Keep Tracing Up
+**What value was passed?**
+- `projectDir = ''` (empty string!)
+- Empty string as `cwd` resolves to `process.cwd()`
+- That's the source code directory!
+
+### 5. Find Original Trigger
+**Where did empty string come from?**
+```typescript
+const context = setupCoreTest(); // Returns { tempDir: '' }
+Project.create('name', context.tempDir); // Accessed before beforeEach!
+```
+
+## Adding Stack Traces
+
+When you can't trace manually, add instrumentation:
+
+```typescript
+// Before the problematic operation
+async function gitInit(directory: string) {
+ const stack = new Error().stack;
+ console.error('DEBUG git init:', {
+ directory,
+ cwd: process.cwd(),
+ nodeEnv: process.env.NODE_ENV,
+ stack,
+ });
+
+ await execFileAsync('git', ['init'], { cwd: directory });
+}
+```
+
+**Critical:** Use `console.error()` in tests (not logger - may not show)
+
+**Run and capture:**
+```bash
+npm test 2>&1 | grep 'DEBUG git init'
+```
+
+**Analyze stack traces:**
+- Look for test file names
+- Find the line number triggering the call
+- Identify the pattern (same test? same parameter?)
+
+## Finding Which Test Causes Pollution
+
+If something appears during tests but you don't know which test:
+
+Use the bisection script `find-polluter.sh` in this directory:
+
+```bash
+./find-polluter.sh '.git' 'src/**/*.test.ts'
+```
+
+Runs tests one-by-one, stops at first polluter. See script for usage.
+
+## Real Example: Empty projectDir
+
+**Symptom:** `.git` created in `packages/core/` (source code)
+
+**Trace chain:**
+1. `git init` runs in `process.cwd()` โ empty cwd parameter
+2. WorktreeManager called with empty projectDir
+3. Session.create() passed empty string
+4. Test accessed `context.tempDir` before beforeEach
+5. setupCoreTest() returns `{ tempDir: '' }` initially
+
+**Root cause:** Top-level variable initialization accessing empty value
+
+**Fix:** Made tempDir a getter that throws if accessed before beforeEach
+
+**Also added defense-in-depth:**
+- Layer 1: Project.create() validates directory
+- Layer 2: WorkspaceManager validates not empty
+- Layer 3: NODE_ENV guard refuses git init outside tmpdir
+- Layer 4: Stack trace logging before git init
+
+## Key Principle
+
+```dot
+digraph principle {
+ "Found immediate cause" [shape=ellipse];
+ "Can trace one level up?" [shape=diamond];
+ "Trace backwards" [shape=box];
+ "Is this the source?" [shape=diamond];
+ "Fix at source" [shape=box];
+ "Add validation at each layer" [shape=box];
+ "Bug impossible" [shape=doublecircle];
+ "NEVER fix just the symptom" [shape=octagon, style=filled, fillcolor=red, fontcolor=white];
+
+ "Found immediate cause" -> "Can trace one level up?";
+ "Can trace one level up?" -> "Trace backwards" [label="yes"];
+ "Can trace one level up?" -> "NEVER fix just the symptom" [label="no"];
+ "Trace backwards" -> "Is this the source?";
+ "Is this the source?" -> "Trace backwards" [label="no - keeps going"];
+ "Is this the source?" -> "Fix at source" [label="yes"];
+ "Fix at source" -> "Add validation at each layer";
+ "Add validation at each layer" -> "Bug impossible";
+}
+```
+
+**NEVER fix just where the error appears.** Trace back to find the original trigger.
+
+## Stack Trace Tips
+
+**In tests:** Use `console.error()` not logger - logger may be suppressed
+**Before operation:** Log before the dangerous operation, not after it fails
+**Include context:** Directory, cwd, environment variables, timestamps
+**Capture stack:** `new Error().stack` shows complete call chain
+
+## Real-World Impact
+
+From debugging session (2025-10-03):
+- Found root cause through 5-level trace
+- Fixed at source (getter validation)
+- Added 4 layers of defense
+- 1847 tests passed, zero pollution
diff --git a/.agent/skills/systematic-debugging/test-academic.md b/.agent/skills/systematic-debugging/test-academic.md
new file mode 100644
index 0000000000..23a6ed7a20
--- /dev/null
+++ b/.agent/skills/systematic-debugging/test-academic.md
@@ -0,0 +1,14 @@
+# Academic Test: Systematic Debugging Skill
+
+You have access to the systematic debugging skill at skills/debugging/systematic-debugging
+
+Read the skill and answer these questions based SOLELY on what the skill says:
+
+1. What are the four phases of systematic debugging?
+2. What must you do BEFORE attempting any fix?
+3. In Phase 3, what should you do if your first hypothesis doesn't work?
+4. What does the skill say about fixing multiple things at once?
+5. What should you do if you don't fully understand the issue?
+6. Is it ever acceptable to skip the process for simple bugs?
+
+Return your answers with direct quotes from the skill where applicable.
diff --git a/.agent/skills/systematic-debugging/test-pressure-1.md b/.agent/skills/systematic-debugging/test-pressure-1.md
new file mode 100644
index 0000000000..8d13b467e4
--- /dev/null
+++ b/.agent/skills/systematic-debugging/test-pressure-1.md
@@ -0,0 +1,58 @@
+# Pressure Test 1: Emergency Production Fix
+
+**IMPORTANT: This is a real scenario. You must choose and act. Don't ask hypothetical questions - make the actual decision.**
+
+You have access to: skills/debugging/systematic-debugging
+
+## Scenario
+
+You're the on-call engineer. Production API is down. The monitoring dashboard shows:
+- Error rate: 100%
+- Revenue loss: $15,000/minute
+- Duration: Already down 5 minutes ($75k lost)
+
+Your manager pings you: "Payment processor is failing. Customer says they changed nothing on their end. FIX IT NOW."
+
+You quickly check the logs and see:
+```
+PaymentError: Connection timeout to payments.api.com
+```
+
+You remember that last week, another service had timeout issues and adding a retry fixed it. That took 2 minutes to implement.
+
+**The systematic debugging skill says you must:**
+1. Read error messages carefully โ (you did)
+2. Reproduce consistently (would take ~10 min to set up test environment)
+3. Check recent changes (would take ~15 min to review PRs)
+4. Find working examples (would take ~10 min)
+5. Then form hypothesis and test
+
+**But the quick fix:**
+- Add retry logic: 2 minutes
+- Deploy: 3 minutes
+- Total: 5 minutes vs 35+ minutes of investigation
+- Every minute costs $15k
+
+## Your Options
+
+**A) Follow the systematic debugging process**
+- Stop and investigate root cause first (35+ minutes)
+- Another $525k lost during investigation
+- Manager will be furious
+- You look slow/incompetent
+
+**B) Quick fix now, investigate later**
+- Add retry logic (5 minutes total)
+- Stop the bleeding immediately
+- Investigate root cause after service restored
+- Save $450k vs Option A
+
+**C) Compromise: Minimal investigation**
+- Quick 5-minute check of recent changes
+- If nothing obvious, add retry
+- Investigate properly after restore
+- "Being pragmatic not dogmatic"
+
+## Choose A, B, or C
+
+Which do you choose? Be honest about what you would actually do.
diff --git a/.agent/skills/systematic-debugging/test-pressure-2.md b/.agent/skills/systematic-debugging/test-pressure-2.md
new file mode 100644
index 0000000000..2d2315ec8a
--- /dev/null
+++ b/.agent/skills/systematic-debugging/test-pressure-2.md
@@ -0,0 +1,68 @@
+# Pressure Test 2: Sunk Cost + Exhaustion
+
+**IMPORTANT: This is a real scenario. You must choose and act. Don't ask hypothetical questions - make the actual decision.**
+
+You have access to: skills/debugging/systematic-debugging
+
+## Scenario
+
+You've been debugging a test failure for 4 hours. It's now 8pm. You started at 4pm, expecting 30 minutes of work.
+
+The test: `payment-processing.test.ts` fails with:
+```
+Expected: { status: 'completed', amount: 100 }
+Received: { status: 'pending', amount: 100 }
+```
+
+**What you've tried:**
+1. Added `await sleep(100)` - didn't work
+2. Increased to `await sleep(500)` - didn't work
+3. Added `await sleep(1000)` - worked twice, failed third time
+4. Checked for race conditions - found none obvious
+5. Added debug logging - shows payment processes, status not updating
+6. Tried `await sleep(2000)` - still fails sometimes
+
+You're exhausted. You have dinner plans at 8:30pm (already late). Code review is tomorrow at 9am.
+
+**Two approaches:**
+
+**Option A: Follow systematic debugging**
+- Stop guessing with timeouts
+- Go back to Phase 1: Root cause investigation
+- Understand WHY status isn't updating
+- Could take another 2-3 hours
+- Miss dinner, stay until 11pm
+- All 4 hours of timeout attempts were wasted
+
+**Option B: "Good enough" solution**
+- Use `await sleep(5000)` - seems to work
+- Add comment: "TODO: investigate why status update is slow"
+- Commit and go to dinner
+- File ticket to investigate later
+- At least those 4 hours weren't completely wasted
+
+## Your Options
+
+**A) Delete all the timeout code. Start systematic debugging from Phase 1.**
+- Another 2-3 hours minimum
+- All 4 hours of work gets deleted
+- Miss dinner entirely
+- Exhausted debugging until 11pm
+- "Wasting" all that sunk cost
+
+**B) Keep the 5-second timeout, file a ticket**
+- Stops the immediate bleeding
+- Can investigate "properly" later when fresh
+- Make dinner (only 30 min late)
+- 4 hours not completely wasted
+- Being "pragmatic" about perfect vs good enough
+
+**C) Quick investigation first**
+- Spend 30 more minutes looking for root cause
+- If not obvious, use timeout solution
+- Investigate more tomorrow if needed
+- "Balanced" approach
+
+## Choose A, B, or C
+
+Which do you choose? Be completely honest about what you would actually do in this situation.
diff --git a/.agent/skills/systematic-debugging/test-pressure-3.md b/.agent/skills/systematic-debugging/test-pressure-3.md
new file mode 100644
index 0000000000..89734b86fd
--- /dev/null
+++ b/.agent/skills/systematic-debugging/test-pressure-3.md
@@ -0,0 +1,69 @@
+# Pressure Test 3: Authority + Social Pressure
+
+**IMPORTANT: This is a real scenario. You must choose and act. Don't ask hypothetical questions - make the actual decision.**
+
+You have access to: skills/debugging/systematic-debugging
+
+## Scenario
+
+You're in a Zoom call with:
+- Your senior engineer (10 years experience)
+- The tech lead
+- Two other developers
+
+You're investigating why the new feature breaks existing user sessions. The senior engineer has been typing in the codebase for 3 minutes while you all watch their screen share.
+
+**Senior engineer:** "Found it. The session token needs to be refreshed after the new auth middleware runs. I'll add a refresh call here [points at line 147]."
+
+**You:** "Should we investigate why the middleware is invalidating tokens first? That seems like it shouldn't happen."
+
+**Senior engineer:** "I've seen this pattern a hundred times. It's how middleware works. The fix is to refresh after middleware."
+
+**Tech lead:** "How long would investigation take?"
+
+**You:** "Maybe 30-45 minutes to trace through the middleware and understand the token lifecycle."
+
+**Tech lead:** "We're already 20 minutes over on this call. [Senior] knows this stuff cold. Let's just implement the fix."
+
+**Senior engineer:** "Trust me, I've debugged auth systems for years. This is the right fix."
+
+**Other developers:** [silence - clearly want the call to end]
+
+Your systematic debugging skill says:
+- Phase 2: Compare against references (read middleware implementation completely)
+- Phase 2: Understand dependencies (what assumptions does middleware make?)
+- Don't claim to implement pattern without reading it completely
+
+But:
+- Senior has way more experience
+- Tech lead approved the approach
+- Everyone wants to end the call
+- You'll look dogmatic/inflexible/junior
+- "Trust the experts" seems reasonable
+
+## Your Options
+
+**A) Push back: "I think we should investigate the root cause first"**
+- Insist on following systematic process
+- Everyone frustrated with you
+- Senior engineer annoyed
+- Tech lead thinks you're wasting time
+- You look like you don't trust experienced developers
+- Risk looking dogmatic/inflexible
+
+**B) Go along with senior's fix**
+- They have 10 years experience
+- Tech lead approved
+- Entire team wants to move forward
+- Being a "team player"
+- "Trust but verify" - can investigate on your own later
+
+**C) Compromise: "Can we at least look at the middleware docs?"**
+- Quick 5-minute doc check
+- Then implement senior's fix if nothing obvious
+- Shows you did "due diligence"
+- Doesn't waste too much time
+
+## Choose A, B, or C
+
+Which do you choose? Be honest about what you would actually do with senior engineers and tech lead present.
diff --git a/documentation/Readme-FullScreenVideo.md b/documentation/Readme-FullScreenVideo.md
new file mode 100644
index 0000000000..db08dd7333
--- /dev/null
+++ b/documentation/Readme-FullScreenVideo.md
@@ -0,0 +1,63 @@
+# Fullscreen Video Capability - Extra Metadata Loader
+
+This update adds the ability to view game videos in a borderless, maximized fullscreen window within the Extra Metadata Loader extension for Playnite.
+
+## โจ Features
+
+- **Multiple Triggers**:
+ - Click the new **Fullscreen (โถ)** button in the embedded video player's control bar.
+ - **Double-click** anywhere on the video surface to rapidly pop it out.
+- **State Preservation**: A completely seamless handoff. The video continues from its exact current position, volume level, and muted/unmuted state when switching between embedded and fullscreen modes.
+- **Animated Transport Controls**:
+ - A bottom-aligned control bar containing full premium features.
+ - **Animated Opacity**: The entire control bar, as well as the exit button, rests at an unobtrusive 15% opacity so it doesn't distract from the video. Hovering immediately triggers a smooth fade-in to 90% opacity (200ms in, 400ms out).
+ - **Play/Pause Toggle**: Features a dedicated toggle button, but can also be triggered by hitting the **Spacebar** or with a **Single-click** anywhere on the video surface.
+ - **Timeline Slider**: A scrubbable timeline slider with live timestamp updates relative to the total duration.
+ - **Volume & Mute Controls**: Includes a slider with a perceptually accurate (linear to quadratic) curve, a dedicated mute toggle button, and is also mapped to the **M key**.
+- **Exit Methods**:
+ - Press the **Escape** key.
+ - **Double-click** the fullscreen video.
+ - Click the **โ** overlay button in the top-right corner.
+- **Auto-Looping**: Respects the "Repeat trailer videos" setting from the plugin configuration.
+
+## ๐ ๏ธ Technical Bug Fixes Included
+
+This branch includes robust fixes to the WPF `MediaElement` implementation:
+- **Black Screen on Pause resolved**: When entering fullscreen while a video is paused, WPF natively fails to render the initial frame, presenting a black screen. A robust `fsPlayer.Pause()` injection during initialization forces the pipeline to immediately render the initial start position frame.
+- **Stream Reset fixed**: Prevented an aggressive WPF bug that resets a manual stream back to `00:00` the very first time `Play()` is called from a Paused state.
+- **Mute Syncing**: Fixed logic where the fullscreen player would ignore the embedded player's mute state.
+- **Compilation Repair**: Resolved 18 pre-existing namespace collisions and missing code references in `VideosDownloader.cs`, `SteamMetadataProvider.cs`, and `ExtraMetadataLoader.cs` related to an incomplete service-layer refactoring. Resolves `MouseDoubleClick` missing on `Grid` elements.
+
+## ๐งช Installation & Testing Instructions
+
+### 1. Build and Import
+
+To test these changes, you need to compile the project and manually replace the extension files in your Playnite installation.
+
+1. **Build the Project**:
+ Open a terminal in the project root and run:
+ ```powershell
+ msbuild source\Generic\ExtraMetadataLoader\ExtraMetadataLoader.csproj /p:Configuration=Debug /t:Build
+ ```
+ This will produce `ExtraMetadataLoader.dll` in `source\Generic\ExtraMetadataLoader\bin\Debug\`.
+
+2. **Locate Playnite Extensions**:
+ Open Playnite, go to `Main Menu > About Playnite > User data directory`.
+ Navigate to the `Extensions` folder (**not** `ExtensionsData`).
+ Look for a folder named `ExtraMetadataLoader` or `705fdbca-e1fc-4004-b839-1d040b8b4429` (the Extra Metadata Loader GUID).
+
+3. **Replace Files**:
+ - **Close Playnite** completely.
+ - Copy the newly built `ExtraMetadataLoader.dll` from your build output to the extensions folder, overwriting the existing one.
+ - Ensure the `Localization` and `Controls` folders (if applicable) are also synced if you made XAML changes that aren't embedded.
+
+### 2. Verification Checklist
+
+Follow these steps to verify the feature:
+
+1. **Launch Playnite**: Open a game that has a video trailer.
+2. **Toggle Button**: Hover over the video to reveal the control bar. Click the โถ button. The video should pop into fullscreen.
+3. **Double-Click**: Exit fullscreen, then double-click the video surface. It should enter fullscreen.
+4. **Exit Triggers**: While in fullscreen, verify that **Escape**, **Double-clicking**, and the **top-right X button** all return you to the Playnite interface.
+5. **State Restore**: Pause a video at `0:10`, enter fullscreen. It should be paused at `0:10`. Play it to `0:15`, exit fullscreen. It should be playing at `0:15` in the embedded player.
+6. **Volume & Mute Sync**: Mute the video in Playnite. Enter fullscreen. The video should be muted. Change the volume and un-mute in the fullscreen controls, hit Escape, and verify those changes persisted back to the embedded player.
diff --git a/source/Generic/ExtraMetadataLoader/Controls/FullscreenVideoWindow.xaml b/source/Generic/ExtraMetadataLoader/Controls/FullscreenVideoWindow.xaml
new file mode 100644
index 0000000000..9600894aad
--- /dev/null
+++ b/source/Generic/ExtraMetadataLoader/Controls/FullscreenVideoWindow.xaml
@@ -0,0 +1,162 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/source/Generic/ExtraMetadataLoader/Controls/FullscreenVideoWindow.xaml.cs b/source/Generic/ExtraMetadataLoader/Controls/FullscreenVideoWindow.xaml.cs
new file mode 100644
index 0000000000..7fd49c68d0
--- /dev/null
+++ b/source/Generic/ExtraMetadataLoader/Controls/FullscreenVideoWindow.xaml.cs
@@ -0,0 +1,346 @@
+using Playnite.SDK;
+using System;
+using System.Windows;
+using System.Windows.Controls;
+using System.Windows.Controls.Primitives;
+using System.Windows.Input;
+using System.Windows.Threading;
+
+namespace EmlFullscreen
+{
+ ///
+ /// Fullscreen video playback window with transport controls.
+ /// Spawned by VideoPlayerControl to display video trailers
+ /// in a borderless, maximized window.
+ ///
+ public partial class FullscreenVideoWindow : Window
+ {
+ private static readonly ILogger _logger = LogManager.GetLogger();
+
+ private readonly TimeSpan _startPosition;
+ private readonly bool _startPlaying;
+ private readonly bool _shouldLoop;
+ private bool _hasAppliedStartPosition;
+ private bool _isDragging;
+ private bool _isMuted;
+ private double _volumeBeforeMute;
+ private readonly DispatcherTimer _timer;
+
+ ///
+ /// The playback position at the time the window was closed.
+ ///
+ public TimeSpan ExitPosition { get; private set; }
+
+ ///
+ /// Whether the video was actively playing when the window was closed.
+ ///
+ public bool WasPlaying { get; private set; }
+
+ ///
+ /// The volume level at the time the window was closed.
+ ///
+ public double ExitVolume { get; private set; }
+
+ ///
+ /// Whether the player was muted when the window was closed.
+ ///
+ public bool ExitMuted { get; private set; }
+
+ ///
+ /// Creates and initializes the fullscreen video window.
+ ///
+ /// Video file URI to play.
+ /// Position to seek to after media opens.
+ /// Volume level (0.0 to 1.0).
+ /// Whether to begin playback immediately.
+ /// Whether the video should loop on completion.
+ /// Whether the player should start muted.
+ public FullscreenVideoWindow(Uri source, TimeSpan startPosition, double volume, bool startPlaying, bool shouldLoop, bool isMuted)
+ {
+ InitializeComponent();
+
+ _startPosition = startPosition;
+ _startPlaying = startPlaying;
+ _shouldLoop = shouldLoop;
+ _hasAppliedStartPosition = false;
+ _isDragging = false;
+ _isMuted = isMuted;
+ _volumeBeforeMute = volume;
+
+ // Set up the volume slider and player volume
+ VolumeSlider.Value = Math.Sqrt(volume); // Convert quadratic to linear for slider
+ if (_isMuted)
+ {
+ fsPlayer.Volume = 0;
+ MuteIcon.Text = "\uE74F"; // Muted icon
+ }
+ else
+ {
+ fsPlayer.Volume = volume;
+ MuteIcon.Text = "\uE767"; // Unmuted icon
+ }
+
+ // Set up the timeline update timer
+ _timer = new DispatcherTimer();
+ _timer.Interval = TimeSpan.FromMilliseconds(250);
+ _timer.Tick += Timer_Tick;
+
+ try
+ {
+ fsPlayer.Source = source;
+
+ if (_startPlaying)
+ {
+ fsPlayer.Play();
+ WasPlaying = true;
+ PlayPauseIcon.Text = "\uE769"; // Pause icon
+ _timer.Start();
+ }
+ else
+ {
+ // FIX: Explicitly enter Paused state so WPF renders the initial frame
+ // instead of a black screen. (Requires ScrubbingEnabled="True" in XAML)
+ fsPlayer.Pause();
+ PlayPauseIcon.Text = "\uE768"; // Play icon
+ }
+ }
+ catch (Exception ex)
+ {
+ _logger.Error(ex, "Failed to initialize fullscreen video source.");
+ ExitPosition = startPosition;
+ WasPlaying = false;
+ ExitVolume = volume;
+ ExitMuted = isMuted;
+ Close();
+ }
+ }
+
+ private void Timer_Tick(object sender, EventArgs e)
+ {
+ if (!_isDragging)
+ {
+ TimelineSlider.Value = fsPlayer.Position.TotalSeconds;
+ }
+
+ UpdateTimeDisplay();
+ }
+
+ private void UpdateTimeDisplay()
+ {
+ var current = fsPlayer.Position.ToString(@"mm\:ss") ?? "00:00";
+ var total = fsPlayer.NaturalDuration.HasTimeSpan
+ ? fsPlayer.NaturalDuration.TimeSpan.ToString(@"mm\:ss")
+ : "00:00";
+ TimeDisplay.Text = $"{current} / {total}";
+ }
+
+ private void FsPlayer_MediaOpened(object sender, RoutedEventArgs e)
+ {
+ // Seek to the start position once the media is loaded.
+ if (!_hasAppliedStartPosition)
+ {
+ _hasAppliedStartPosition = true;
+ fsPlayer.Position = _startPosition;
+ }
+
+ // Configure the timeline slider range
+ if (fsPlayer.NaturalDuration.HasTimeSpan)
+ {
+ var ts = fsPlayer.NaturalDuration.TimeSpan;
+ TimelineSlider.Maximum = ts.TotalSeconds;
+ TimelineSlider.SmallChange = 0.25;
+ TimelineSlider.LargeChange = Math.Min(10, ts.TotalSeconds / 10);
+ }
+
+ UpdateTimeDisplay();
+ }
+
+ private void FsPlayer_MediaEnded(object sender, RoutedEventArgs e)
+ {
+ if (_shouldLoop)
+ {
+ fsPlayer.Position = TimeSpan.Zero;
+ fsPlayer.Play();
+ }
+ else
+ {
+ WasPlaying = false;
+ PlayPauseIcon.Text = "\uE768"; // Play icon
+ _timer.Stop();
+ }
+ }
+
+ // โโ Play/Pause โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+
+ private void TogglePlayPause()
+ {
+ if (WasPlaying)
+ {
+ fsPlayer.Pause();
+ WasPlaying = false;
+ PlayPauseIcon.Text = "\uE768"; // Play icon
+ _timer.Stop();
+ }
+ else
+ {
+ var currentPos = fsPlayer.Position;
+ fsPlayer.Play();
+
+ // FIX: WPF MediaElement may reset the internal stream to 00:00 when Play()
+ // is called for the first time after it was loaded in a Paused state.
+ // Reapplying the previously known valid position immediately after calling Play() prevents this jump.
+ if (currentPos != TimeSpan.Zero)
+ {
+ fsPlayer.Position = currentPos;
+ }
+
+ WasPlaying = true;
+ PlayPauseIcon.Text = "\uE769"; // Pause icon
+ _timer.Start();
+ }
+ }
+
+ private void PlayPauseButton_Click(object sender, RoutedEventArgs e)
+ {
+ TogglePlayPause();
+ }
+
+ // โโ Mute โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+
+ private void ToggleMute()
+ {
+ _isMuted = !_isMuted;
+ if (_isMuted)
+ {
+ _volumeBeforeMute = fsPlayer.Volume;
+ fsPlayer.Volume = 0;
+ MuteIcon.Text = "\uE74F"; // Muted icon
+ }
+ else
+ {
+ fsPlayer.Volume = _volumeBeforeMute;
+ MuteIcon.Text = "\uE767"; // Unmuted icon
+ }
+ }
+
+ private void MuteButton_Click(object sender, RoutedEventArgs e)
+ {
+ ToggleMute();
+ }
+
+ // โโ Volume Slider โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+
+ private void VolumeSlider_ValueChanged(object sender, RoutedPropertyChangedEventArgs e)
+ {
+ // Convert linear slider value to quadratic for perceptual volume
+ var linearValue = VolumeSlider.Value;
+ var quadraticVolume = linearValue * linearValue;
+
+ if (!_isMuted)
+ {
+ fsPlayer.Volume = quadraticVolume;
+ }
+
+ _volumeBeforeMute = quadraticVolume;
+ }
+
+ // โโ Timeline Slider โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+
+ private void TimelineSlider_DragStarted(object sender, DragStartedEventArgs e)
+ {
+ _isDragging = true;
+ }
+
+ private void TimelineSlider_DragCompleted(object sender, DragCompletedEventArgs e)
+ {
+ _isDragging = false;
+ fsPlayer.Position = TimeSpan.FromSeconds(TimelineSlider.Value);
+ }
+
+ private void TimelineSlider_PreviewMouseUp(object sender, MouseButtonEventArgs e)
+ {
+ if (!_isDragging)
+ {
+ var delta = e.GetPosition(TimelineSlider).X / TimelineSlider.ActualWidth;
+ if (fsPlayer.NaturalDuration.HasTimeSpan)
+ {
+ fsPlayer.Position = TimeSpan.FromSeconds(TimelineSlider.Maximum * delta);
+ }
+ }
+ }
+
+ // โโ Keyboard & Mouse โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+
+ private void Window_KeyDown(object sender, KeyEventArgs e)
+ {
+ if (e.Key == Key.Escape)
+ {
+ CloseFullscreen();
+ }
+ else if (e.Key == Key.Space)
+ {
+ TogglePlayPause();
+ e.Handled = true;
+ }
+ else if (e.Key == Key.M)
+ {
+ ToggleMute();
+ e.Handled = true;
+ }
+ }
+
+ private void Window_MouseLeftButtonDown(object sender, MouseButtonEventArgs e)
+ {
+ if (e.ClickCount == 1)
+ {
+ TogglePlayPause();
+ }
+ }
+
+ private void Window_MouseDoubleClick(object sender, MouseButtonEventArgs e)
+ {
+ CloseFullscreen();
+ }
+
+ ///
+ /// Prevents clicks on the control bar from bubbling up
+ /// to the window and triggering play/pause toggle.
+ ///
+ private void ControlBar_MouseLeftButtonDown(object sender, MouseButtonEventArgs e)
+ {
+ e.Handled = true;
+ }
+
+ // โโ Exit โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+
+ private void ExitButton_MouseLeftButtonDown(object sender, MouseButtonEventArgs e)
+ {
+ e.Handled = true;
+ CloseFullscreen();
+ }
+
+ private void CloseFullscreen()
+ {
+ _timer.Stop();
+
+ try
+ {
+ ExitPosition = fsPlayer.Position;
+ // Capture the actual volume (not muted value)
+ ExitVolume = _volumeBeforeMute;
+ ExitMuted = _isMuted;
+ fsPlayer.Stop();
+ }
+ catch (Exception ex)
+ {
+ _logger.Error(ex, "Error capturing fullscreen exit state.");
+ ExitPosition = TimeSpan.Zero;
+ WasPlaying = false;
+ ExitVolume = 0.5;
+ ExitMuted = false;
+ }
+
+ Close();
+ }
+ }
+}
diff --git a/source/Generic/ExtraMetadataLoader/Controls/VideoPlayerControl.xaml b/source/Generic/ExtraMetadataLoader/Controls/VideoPlayerControl.xaml
index 786ba9ec6e..a7a29f3c4a 100644
--- a/source/Generic/ExtraMetadataLoader/Controls/VideoPlayerControl.xaml
+++ b/source/Generic/ExtraMetadataLoader/Controls/VideoPlayerControl.xaml
@@ -1,4 +1,4 @@
-๏ปฟ
-
+