diff --git a/.claude-plugin/PLUGIN.md b/.claude-plugin/PLUGIN.md index 3a85f8b9..c81beba1 100644 --- a/.claude-plugin/PLUGIN.md +++ b/.claude-plugin/PLUGIN.md @@ -39,7 +39,6 @@ MAP (Modular Agentic Planner) is a cognitive architecture that orchestrates 11 s - `/map-efficient` — implement features, refactor code, complex tasks with full MAP workflow - `/map-debug` — debug issues using MAP analysis - `/map-fast` — small, low-risk changes with minimal overhead -- `/map-debate` — multi-variant synthesis with Opus arbiter - `/map-review` — comprehensive review of changes - `/map-check` — quality gates and verification - `/map-plan` — architecture decomposition diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 788e1201..77fcad98 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -33,7 +33,7 @@ "features": [ "11 specialized agents (TaskDecomposer, Actor, Monitor, Predictor, Evaluator, Reflector, DocumentationReviewer, Debate-Arbiter, Synthesizer, Research-Agent, Final-Verifier)", "5 Claude Code hooks for automation", - "12 slash commands (/map-efficient, /map-debug, /map-fast, /map-debate, /map-review, /map-check, /map-plan, /map-task, /map-tdd, /map-release, /map-resume, /map-learn)", + "11 slash commands (/map-efficient, /map-debug, /map-fast, /map-review, /map-check, /map-plan, /map-task, /map-tdd, /map-release, /map-resume, /map-learn)", "Professional code review integration", "Cost optimization (40-60% savings)" ], diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index eb1a71a6..3cf1fa9d 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -31,7 +31,7 @@ "features": [ "11 specialized MAP agents (TaskDecomposer, Actor, Monitor, Predictor, Evaluator, Reflector, DocumentationReviewer, Debate-Arbiter, Synthesizer, Research-Agent, Final-Verifier)", "5 Claude Code hooks for automation (validate-agent-templates, auto-store-knowledge, enrich-context, session-init, track-metrics)", - "12 slash commands (/map-efficient, /map-debug, /map-fast, /map-debate, /map-review, /map-check, /map-plan, /map-task, /map-tdd, /map-release, /map-resume, /map-learn)", + "11 slash commands (/map-efficient, /map-debug, /map-fast, /map-review, /map-check, /map-plan, /map-task, /map-tdd, /map-release, /map-resume, /map-learn)", "Chain-of-thought reasoning with sequential-thinking MCP", "Semantic pattern search with embeddings cache", "Cost optimization with per-agent model selection (haiku/sonnet/opus)" diff --git a/.claude/agents/actor.md b/.claude/agents/actor.md index d0070880..1c5a62ae 100644 --- a/.claude/agents/actor.md +++ b/.claude/agents/actor.md @@ -276,6 +276,9 @@ Rules: 5. Include edge cases from the spec's `## Edge Cases` section if available in the packet. 6. Use standard test patterns for the project's language and framework. 7. Tests SHOULD fail when run (implementation doesn't exist yet). This is expected. +8. Do NOT add temporal comments about test failure status (e.g., "currently FAILS", + "expected to FAIL", "will PASS once fix is applied"). Write tests as permanent, + clean code — the Red/Green state is transient and must not leak into comments. Output: - Test files created via Write tool diff --git a/.claude/commands/map-debate.md b/.claude/commands/map-debate.md deleted file mode 100644 index ae420be1..00000000 --- a/.claude/commands/map-debate.md +++ /dev/null @@ -1,408 +0,0 @@ ---- -description: Debate-based MAP workflow with Opus arbiter for multi-variant synthesis ---- - -# MAP Debate Workflow - -## Execution Rules - -1. Execute steps in order without pausing; only ask user if (a) `task-decomposer` returns blocking `analysis.open_questions` with no subtasks OR (b) Monitor sets `escalation_required === true` -2. Use exact `subagent_type` specified — never substitute `general-purpose` -3. Call each agent individually — no combining or skipping steps -4. Max 5 Actor→Monitor retry iterations per subtask (separate from debate-arbiter retries in 2.7 Retry Loop) -5. **ALWAYS generate 3 variants** — no conditional check (unlike map-efficient Self-MoA) -6. Use **debate-arbiter with model=opus** for synthesis - -**Task:** $ARGUMENTS - -## Workflow Overview - -``` -1. DECOMPOSE → task-decomposer -2. FOR each subtask: - a. RESEARCH → if existing code understanding needed - b. 3 Actors (parallel) → security/performance/simplicity focuses - c. 3 Monitors (parallel) → validate + extract decisions - d. debate-arbiter (opus) → cross-evaluate + synthesize - e. Final Monitor → validate synthesis - f. If invalid: retry with feedback (max 5) - g. If risk_level ∈ {high, medium}: → Predictor - h. Apply changes -3. SUMMARY → optionally suggest /map-learn -``` - -## Step 1: Task Decomposition - -``` -Task( - subagent_type="task-decomposer", - description="Decompose task into subtasks", - prompt="Break down into ≤8 atomic subtasks and RETURN ONLY JSON matching task-decomposer schema v2.0 (schema_version, analysis, blueprint{subtasks[]}). - -Task: $ARGUMENTS - -Hard requirements: -- Use `blueprint.subtasks[].validation_criteria` (2-4 testable, verifiable outcomes) - - Prefix each criterion with `VC1:`, `VC2:`, ... (stable references for Actor/Monitor) - - Include a concrete anchor per VC (endpoint/function + file path) -- Use `blueprint.subtasks[].dependencies` (array of subtask IDs) and order subtasks by dependency -- Include `blueprint.subtasks[].complexity_score` (1-10) and `risk_level` (low|medium|high) -- Include `blueprint.subtasks[].security_critical` (true for auth/crypto/validation/data access) -- Include `blueprint.subtasks[].test_strategy` with unit/integration/e2e keys - - Map every `VCn:` to ≥1 planned test case (prefer test name contains `vc`) - - Recommended format: `path/to/test_file.ext::test_name_or_symbol`" -) -``` - -## Step 2: Subtask Loop - -### 2.0 Build AI-Friendly Subtask Packet (XML Anchors) - -Before calling any agents for the subtask, build a single **AI Packet** with unique XML-like tags (NO attributes). - -**Rule:** Use the subtask ID as the anchor name. Convert `-` to `_` for XML tag safety: -- `ST-001` → `ST_001` - -**AI Packet template:** - -```xml - - ST-001 - ... - ... - low|medium|high - true|false - 1-10 - - path1;path2;... - ... - ... - ... - - ... - ... - -``` - -Pass this packet verbatim to Actor/Monitor/debate-arbiter/Predictor. Do NOT rename tags mid-flow. - -### 2.1 Research (Conditional) - -**Call if:** refactoring, bug fixes, extending existing code, touching 3+ files -**Skip for:** new standalone features, docs, config - -``` -Task( - subagent_type="research-agent", - description="Research for subtask [ID]", - prompt="Query: [subtask description] -File patterns: [relevant globs] -Symbols: [optional keywords] -Intent: locate -Max tokens: 1500" -) -``` - -Pass `executive_summary` to Actor if `confidence >= 0.7`. - -### 2.2 Quality-Stakes Assessment - -**Purpose:** Determine deployment context and set minimum quality thresholds before launching Actor variants. - -**Assessment Logic:** -``` -# Determine deployment risk level based on goal content -deployment_risk_level = assess_deployment_context(goal): - IF goal contains "hospital" OR "healthcare" OR "patient" OR "medical": - → risk_level = "critical", min_security = 8, min_functionality = 8 - ELIF goal contains "government" OR "financial" OR "banking" OR "critical infrastructure": - → risk_level = "high", min_security = 8, min_functionality = 7 - ELIF goal contains "production" OR "enterprise" OR "customer-facing": - → risk_level = "medium", min_security = 7, min_functionality = 7 - ELSE: - → risk_level = "medium", min_security = 7, min_functionality = 7 # safe default - -# Build quality context for Actor variants -quality_context = { - "deployment_risk_level": risk_level, - "min_security_score": min_security, - "min_functionality_score": min_functionality, - "quality_enforcement": "All Actor variants MUST meet minimum thresholds regardless of focus area" -} -``` - -**Pass to Actors:** Include `quality_context` in each Actor variant prompt. - -**Rationale:** Prevents quality erosion in debate by establishing non-negotiable baselines before variants propose solutions. - -### 2.3 Parallel Actors (3 Variants) - -**ALWAYS call 3 Actors in parallel with different focuses:** - -``` -# Variant 1: Security Focus -Task( - subagent_type="actor", - description="Implement subtask [ID] - Security (v1)", - prompt="Implement with SECURITY focus: -**AI Packet (XML):** [paste ...] -**Quality Context:** deployment_risk_level={risk_level}, min_security={min_security}, min_functionality={min_functionality} -⚠️ Your variant MUST meet minimum quality thresholds. Quality is non-negotiable regardless of security focus. -approach_focus: security, variant_id: v1, self_moa_mode: true -Follow the Actor agent protocol output format. Ensure `decisions_made` is included for debate-arbiter." -) - -# Variant 2: Performance Focus -Task( - subagent_type="actor", - description="Implement subtask [ID] - Performance (v2)", - prompt="Implement with PERFORMANCE focus: -**AI Packet (XML):** [paste ...] -**Quality Context:** deployment_risk_level={risk_level}, min_security={min_security}, min_functionality={min_functionality} -⚠️ Your variant MUST meet minimum quality thresholds. Quality is non-negotiable regardless of performance focus. -approach_focus: performance, variant_id: v2, self_moa_mode: true -Follow the Actor agent protocol output format. Ensure `decisions_made` is included for debate-arbiter." -) - -# Variant 3: Simplicity Focus -Task( - subagent_type="actor", - description="Implement subtask [ID] - Simplicity (v3)", - prompt="Implement with SIMPLICITY focus: -**AI Packet (XML):** [paste ...] -**Quality Context:** deployment_risk_level={risk_level}, min_security={min_security}, min_functionality={min_functionality} -⚠️ Your variant MUST meet minimum quality thresholds. Quality is non-negotiable regardless of simplicity focus. -approach_focus: simplicity, variant_id: v3, self_moa_mode: true -Follow the Actor agent protocol output format. Ensure `decisions_made` is included for debate-arbiter." -) -``` - -### 2.4 Parallel Monitors (3 Validations) - -Validate each variant in parallel: - -``` -Task( - subagent_type="monitor", - description="Validate v1", - prompt="Review variant v1 against requirements: -**AI Packet (XML):** [paste ...] -**Proposed Solution:** [paste v1 Actor output] -**Specification Contract (optional):** [SpecificationContract JSON or null] -variant_id: v1, self_moa_mode: true - -Return ONLY valid JSON following MonitorReviewOutput schema. -When in Self-MoA mode, include extension fields: variant_id, self_moa_mode, decisions_identified, compatibility_features, strengths, weaknesses, recommended_as_base. -If `validation_criteria` present: include `contract_compliance` + `contract_compliant`. -If a SpecificationContract is provided: include `spec_contract_compliant` + `spec_contract_violations`." -) -``` - -Repeat for v2 and v3 in parallel. - -### 2.5 debate-arbiter (Opus) - -``` -Task( - subagent_type="debate-arbiter", - model="opus", - description="Cross-evaluate and synthesize best implementation", - prompt="Cross-evaluate 3 variants and synthesize optimal solution: - -**AI Packet (XML):** [paste ...] -**Variants (raw Actor outputs):** - -[paste v1 Actor output] - - -[paste v2 Actor output] - - -[paste v3 Actor output] - -**Monitor Results (MonitorReviewOutput JSON):** - -[paste v1 Monitor output JSON] - - -[paste v2 Monitor output JSON] - - -[paste v3 Monitor output JSON] - -**Specification Contract (optional):** [SpecificationContract JSON or null] -**Priority Policy:** [\"correctness\", \"security\", \"maintainability\", \"performance\"] -**Evaluation Dimensions:** [\"security\", \"performance\", \"readability\", \"maintainability\"] - -Return ONLY valid JSON following ArbiterOutput schema. -Include: comparison_matrix, decision_rationales, synthesis_reasoning (8 steps)." -) -``` - -### 2.6 Final Monitor - -Validate synthesized code: - -``` -Task( - subagent_type="monitor", - description="Validate synthesized implementation", - prompt="Review synthesized code from debate-arbiter: -**AI Packet (XML):** [paste ...] -**Proposed Solution:** [paste debate-arbiter code output] -**Arbiter Confidence:** [confidence from debate-arbiter] - -Check: correctness, security, standards, decision implementation. -Return ONLY valid JSON following MonitorReviewOutput schema." -) -``` - -### 2.7 Retry Loop - -If Final Monitor returns `valid === false`: -1. Provide feedback including arbiter's synthesis_reasoning -2. Retry debate-arbiter with retry_context -3. Max 2 debate-arbiter retries per subtask - -```python -retry_context = { - "attempt": retry_count + 1, - "previous_errors": monitor_issues, - "failed_decisions": [decisions_causing_issues], - "strategy_adjustments": ["avoid decision X", "prefer fresh_generation"] -} -``` - -### 2.8 Escalation Gate (AskUserQuestion) - -If Monitor returns `escalation_required === true`, ask user: - -``` -AskUserQuestion(questions=[ - { - "header": "Escalation", - "question": "Human review requested by Monitor.\n\nSubtask: [ST-XXX]\nReason: [escalation_reason]\nArbiter Confidence: [confidence]\n\nProceed anyway?", - "multiSelect": false, - "options": [ - {"label": "YES - Proceed", "description": "Continue (run Predictor if required, then apply changes)."}, - {"label": "REVIEW - Details", "description": "Show synthesis_reasoning + comparison_matrix, then ask again."}, - {"label": "NO - Abort", "description": "Do not apply changes; wait for human review."} - ] - } -]) -``` - -### 2.9 Conditional Predictor - -```python -# Enhanced predictor decision: -# 1. ALWAYS call for: high risk, security_critical, or escalation_required -# 2. SKIP if: risk_level == "low" -# 3. SKIP if: risk_level == "medium" AND all affected_files are new (don't exist yet) -# AND complexity_score <= 4 AND NOT security_critical -# → Write minimal evidence directly via Write tool -# 4. OTHERWISE: Call predictor with tier_hint - -skip_predictor = ( - not subtask.escalation_required - and not subtask.security_critical - and ( - subtask.risk_level == "low" - or ( - subtask.risk_level == "medium" - and subtask.affected_files # guard against vacuous all() - and all(not file_exists(f) for f in subtask.affected_files) - and subtask.complexity_score <= 4 - ) - ) -) - -if skip_predictor: - # No action needed — Predictor skipped for low-risk subtasks - pass -else: - # Determine tier_hint from subtask metadata: - # - risk "medium" + complexity_score <= 3 → tier_hint: 1 - # - risk "medium" + complexity_score 4-7 → tier_hint: 2 - # - risk "high" OR security_critical → tier_hint: 3 - if subtask.risk_level == "high" or subtask.security_critical: - tier_hint = 3 - elif subtask.complexity_score <= 3: - tier_hint = 1 - else: - tier_hint = 2 - - Task( - subagent_type="predictor", - description="Analyze impact", - prompt="Analyze impact using Predictor input schema. - - tier_hint: {tier_hint} - - **AI Packet (XML):** [paste ...] - - Required inputs: - - change_description: [summary from debate-arbiter synthesis_reasoning] - - files_changed: [list of paths from synthesized code] - - diff_content: [unified diff] - - Optional inputs: - - analyzer_output: [debate-arbiter output] - - user_context: [subtask requirements + arbiter confidence] - - Return ONLY valid JSON following Predictor schema." - ) -``` - -### 2.10 Apply Changes - -Apply synthesized code via Write/Edit tools. Proceed to next subtask. - -### 2.11 Gate 2: Tests Available / Run - -After applying changes, run tests if available. - -**Prefer** the commands implied by ``. Otherwise: -- If `pytest` project: run `pytest` -- If `package.json` present: run `npm test` / `pnpm test` / `yarn test` -- If `go.mod` present: run `go test ./...` -- If `Cargo.toml` present: run `cargo test` - -If no tests found: mark gate as skipped and proceed. - -### 2.12 Gate 3: Formatter / Linter - -After tests gate, run formatter/linter checks if available. - -Prefer repo-standard commands (e.g., `make lint`, `make fmt`). Otherwise: -- Python: `ruff check`, `black --check`, `mypy` -- JS/TS: `eslint`, `prettier -c` -- Go: `gofmt` check + `golangci-lint run` -- Rust: `cargo fmt --check`, `cargo clippy` - -If none found: mark gate as skipped and proceed. - ---- - -## Step 3: Summary - -- Run tests if applicable -- Create commit (if requested) -- Report: features implemented, files changed -- Include key synthesis reasoning highlights from debate-arbiter - -**Optional:** Run `/map-learn [summary]` to preserve valuable patterns for future workflows. - ---- - -## Key Differences from map-efficient - -| Aspect | map-efficient | map-debate | -|--------|---------------|------------| -| Variant generation | Single variant (one Actor) | Always 3 variants | -| Synthesis agent | N/A (single Actor) | debate-arbiter (opus) | -| Output | Direct implementation | comparison_matrix + decision_rationales + synthesis_reasoning | -| Cost | Lower | ~3-5x higher (opus model) | -| Use case | Efficiency | Reasoning transparency | - -Begin now with debate workflow. diff --git a/.claude/commands/map-efficient.md b/.claude/commands/map-efficient.md index f69689d0..6b5d48b3 100644 --- a/.claude/commands/map-efficient.md +++ b/.claude/commands/map-efficient.md @@ -299,8 +299,9 @@ loop: # Phase D: Retry handling # For each monitor that returned valid=false: - # Re-run actor + monitor for that subtask (serially) - # Track retries per subtask: validate_wave_step SUBTASK_ID STEP_ID + # RETRY=$(python3 .map/scripts/map_orchestrator.py wave_monitor_failed $subtask_id --feedback "feedback") + # If RETRY.status == "max_retries": escalate to user + # Otherwise: re-run actor + monitor for that subtask (serially) # Phase E: Per-wave gates # Run tests + linter ONCE for the entire wave @@ -358,6 +359,8 @@ STRICT RULES: 5. Tests SHOULD fail when run (implementation doesn't exist yet). 6. Test files MUST be lint-clean. Use proper imports at the top of the file (not inside type annotations). Run the project linter on test files before finishing. +7. Do NOT add temporal comments about test failure status (e.g., "currently FAILS", + "expected to FAIL"). Tests are permanent, clean code — the Red/Green state is transient. """ ) @@ -453,55 +456,59 @@ fi # After Monitor returns: if monitor_output["valid"] == false: - # Increment retry counter (also triggered when test gate fails above) - if retry_count < 5: - # Go back to Phase: ACTOR with Monitor feedback - # Actor will fix issues and re-apply code - - # === STUCK RECOVERY (at retry 3) === - # At retry 3, intercept with intermediate recovery before retries 4-5. - # This gives Actor better context to break out of a stuck loop. - if retry_count == 3: - # Step 1: Check if research-agent already ran for this subtask - findings_file = f".map/{branch}/findings_{branch}.md" - if findings_file exists and has content for this subtask: - # Reuse existing findings (Edge Case 12: skip re-invocation) - recovery_context = read(findings_file) - else: - # Invoke research-agent for alternative approaches - Task( - subagent_type="research-agent", - description="Stuck recovery: find alternative approach", - prompt=f"""Subtask {subtask_id} failed 3 monitor retries. + # Use orchestrator to handle retry: requeues ACTOR+MONITOR, increments retry_count, + # switches phase so workflow-gate allows edits, persists feedback for Actor. + RETRY_RESULT=$(python3 .map/scripts/map_orchestrator.py monitor_failed --feedback "MONITOR_FEEDBACK_TEXT") + # RETRY_RESULT.status is "retrying" or "max_retries" + # RETRY_RESULT.retry_count shows current attempt number + # RETRY_RESULT.feedback_file points to .map//monitor_feedback_retry{N}.md + + RETRY_STATUS=$(echo "$RETRY_RESULT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('status',''))") + RETRY_COUNT=$(echo "$RETRY_RESULT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('retry_count',0))") + + if RETRY_STATUS == "max_retries": + # Escalate to user (retry limit reached after 5 attempts) + AskUserQuestion(questions=[{"question": "Monitor retry limit reached (5 attempts). How to proceed?", "header": "Retry limit", "options": [{"label": "Continue", "description": "Continue with more retries (manually edit step_state.json retry_count)"}, {"label": "Skip", "description": "Skip this subtask and move to next"}, {"label": "Abort", "description": "Stop workflow"}], "multiSelect": false}]) + + # === STUCK RECOVERY (at retry 3) === + # At retry 3, intercept with intermediate recovery before retries 4-5. + if RETRY_COUNT == 3: + # Step 1: Check if research-agent already ran for this subtask + findings_file = f".map/{branch}/findings_{branch}.md" + if findings_file exists and has content for this subtask: + recovery_context = read(findings_file) + else: + Task( + subagent_type="research-agent", + description="Stuck recovery: find alternative approach", + prompt=f"""Subtask {subtask_id} failed 3 monitor retries. Monitor feedback: {latest_monitor_feedback} Find an ALTERNATIVE approach. Current approach is not working. Focus on: different patterns, simpler implementations, existing utilities.""" - ) - recovery_context = research_agent_output - - # Step 2: Invoke predictor (skip for low-risk subtasks — Edge Case 7) - if subtask.risk_level != "low": - Task( - subagent_type="predictor", - description="Stuck recovery: analyze why approach fails", - prompt=f"""Subtask {subtask_id} failed 3 retries. + ) + recovery_context = research_agent_output + + # Step 2: Invoke predictor (skip for low-risk subtasks) + if subtask.risk_level != "low": + Task( + subagent_type="predictor", + description="Stuck recovery: analyze why approach fails", + prompt=f"""Subtask {subtask_id} failed 3 retries. Research findings: {recovery_context} Analyze: why is the current approach failing? What dependencies are missed?""" - ) - recovery_context += predictor_output + ) + recovery_context += predictor_output - # Step 3: Pass recovery context to Actor for retries 4-5 - # Actor receives: original task + monitor feedback + recovery context - # This gives Actor a fresh perspective from research-agent/predictor + if recovery_context is empty or unhelpful: + AskUserQuestion(questions=[{"question": "Stuck recovery failed. How to proceed?", "header": "Stuck", "options": [{"label": "Continue", "description": "Try 2 more retries"}, {"label": "Skip", "description": "Skip subtask"}, {"label": "Abort", "description": "Stop workflow"}], "multiSelect": false}]) + # === END STUCK RECOVERY === - # If both research-agent and predictor found nothing useful: - if recovery_context is empty or unhelpful: - AskUserQuestion(questions=[{"question": "Stuck recovery: research-agent and predictor found no alternative. How to proceed?", "header": "Stuck", "options": [{"label": "Continue", "description": "Try 2 more retries with current approach"}, {"label": "Skip", "description": "Skip subtask, move to next"}, {"label": "Abort", "description": "Stop workflow"}], "multiSelect": false}]) - # === END STUCK RECOVERY === + # Phase is now ACTOR (set by orchestrator). Proceed to get_next_step + # which will return ACTOR instruction. Pass RETRY_RESULT.feedback_file path + # to Actor so it can read the monitor feedback explicitly. - else: - # Escalate to user (retry limit reached after 5 attempts) - AskUserQuestion(questions=[{"question": "Monitor retry limit reached (5 attempts). How to proceed?", "header": "Retry limit", "options": [{"label": "Continue", "description": "Reset retry counter and try again"}, {"label": "Skip", "description": "Skip this subtask and move to next"}, {"label": "Abort", "description": "Stop workflow"}], "multiSelect": false}]) +# For wave-based execution, use wave_monitor_failed instead: +# python3 .map/scripts/map_orchestrator.py wave_monitor_failed ST-001 --feedback "feedback text" ``` ### Monitor Artifact Rule diff --git a/.claude/commands/map-review.md b/.claude/commands/map-review.md index 999ef2fd..a1634fe8 100644 --- a/.claude/commands/map-review.md +++ b/.claude/commands/map-review.md @@ -299,6 +299,20 @@ Present the verdict with a summary table: - Key issues resolved during interactive review - Remaining action items +## Workflow Gate Unlock (REVISE/BLOCK only) + +If the verdict is **REVISE** or **BLOCK** and the user asks to fix the issues, +the workflow gate may block edits because the workflow is in COMPLETE phase. + +**Before applying any fixes**, run: + +```bash +python3 .map/scripts/map_orchestrator.py reopen_for_fixes --feedback "Review findings: [summary of issues to fix]" +``` + +This transitions the workflow from COMPLETE → ACTOR so the edit gate unlocks. +Skip this step if the workflow is not in COMPLETE phase (e.g., review was run mid-workflow). + ## Handoff Artifact Update After the final verdict, update branch-scoped handoff artifacts so review output survives beyond the chat: diff --git a/.claude/commands/map-tdd.md b/.claude/commands/map-tdd.md index bf7aac34..716c1817 100644 --- a/.claude/commands/map-tdd.md +++ b/.claude/commands/map-tdd.md @@ -134,6 +134,24 @@ STRICT RULES: 9. Test files MUST be lint-clean. Use proper imports at the top of the file (not inside type annotations). Run the project linter (ruff/eslint/golangci-lint) on test files before finishing. Fix any lint errors in your test files. +10. Do NOT add temporal or state-marking comments about test failure status + (e.g., "currently FAILS", "expected to FAIL until fix is applied", + "will PASS once fix is implemented", "Red phase"). Write tests as permanent, + clean code. The Red/Green state is transient — it must NOT leak into comments. + +TEST QUALITY REQUIREMENTS — avoid "2+2=4" tests: +- Every test must verify SEMANTIC BEHAVIOR, not just that a single branch executes. + Bad: "returns error when input is nil" (trivial nil-check). + Good: "returns NotFound error and does NOT call downstream API when input is nil". +- Tests must assert MULTIPLE CONSEQUENCES of an action (side effects, return values, + state changes, calls to dependencies). A test that asserts only one thing from + a single if-branch is trivial — combine it with assertions about what else + should or should NOT happen. +- Prefer scenario-based tests that exercise a CHAIN of behavior (setup → action → + verify multiple outcomes) over unit-level tests that check one field. +- For each test ask: "Would this test catch a real bug, or does it just confirm + the obvious?" If the answer is "obvious", merge it into a richer scenario or drop it. +- Aim for at least 60% of tests being full semantic scenarios (multi-step, multi-assert). Output: - Test files written via Edit/Write tools @@ -188,10 +206,21 @@ fi **Then evaluate test results:** -- **Tests FAIL with assertion/import errors** → GOOD. This is the expected TDD state ("Red" phase). Proceed to ACTOR. +- **Tests FAIL with assertion/import errors** → GOOD. This is the expected TDD state ("Red" phase). But also run the quality check below before proceeding. - **Tests PASS** → PROBLEM. Tests are trivial or not testing real behavior. Go back to TEST_WRITER with feedback: "Tests pass without implementation. Tests must assert behavior that requires code to be written." - **Tests have syntax errors** → Go back to TEST_WRITER with feedback to fix syntax. +**Quality gate (run even if tests correctly fail):** + +Review the test files and classify each test as: +- **Semantic** — tests real behavior with multi-step scenario or multi-assert verification +- **Trivial ("2+2=4")** — tests a single if-branch or obvious nil-check with one assert + +If more than 40% of tests are trivial, go back to TEST_WRITER with feedback: +"Too many trivial tests. [N] of [M] tests are single-branch checks. Merge trivial +tests into richer scenarios that verify multiple consequences. Each test should catch +a real bug, not just confirm one obvious branch." + ```bash python3 .map/scripts/map_orchestrator.py validate_step "2.26" ``` @@ -234,7 +263,20 @@ Output: standard Actor output (approach + code + trade-offs) ) ``` -**CRITICAL: After ACTOR returns, you MUST call Monitor (2.4). Do NOT skip Monitor. Do NOT mark the subtask complete without Monitor validation.** This is not optional — Monitor is a mandatory phase in every workflow, including TDD. +**CRITICAL: After ACTOR returns, run the TDD Refactor step below, then call Monitor (2.4). Do NOT skip Monitor. Do NOT mark the subtask complete without Monitor validation.** This is not optional — Monitor is a mandatory phase in every workflow, including TDD. + +### TDD Refactor: Clean Stale Red-Phase Comments + +After ACTOR completes and tests pass (Green), scan the test files created by TEST_WRITER for stale Red-phase markers. This is the **Refactor** step of Red-Green-Refactor. + +Look for and clean up: +- Comments containing "currently FAILS", "expected to FAIL", "will PASS once", "Red phase", "TDD Red" +- File-level docstrings saying tests "are expected to fail against current implementation" +- Any temporal language that references the transient Red/Green state + +Rewrite matched comments as permanent, implementation-neutral descriptions. If a comment is only a state marker with no semantic value, remove it entirely. + +**This cleanup is done by the orchestrating agent (you), NOT by Actor.** Actor in code_only mode cannot modify test files, but you can. ```bash # Validate Actor step, then get_next_step will return MONITOR (2.4) diff --git a/.claude/skills/map-cli-reference/SKILL.md b/.claude/skills/map-cli-reference/SKILL.md deleted file mode 100644 index 2a993c46..00000000 --- a/.claude/skills/map-cli-reference/SKILL.md +++ /dev/null @@ -1,124 +0,0 @@ ---- -name: map-cli-reference -description: >- - Quick reference for mapify CLI usage errors. Use when - encountering "no such command", "no such option", "parameter not found", - or when user asks "how to use mapify", "validate graph". - Do NOT use for workflow selection (use map-workflows-guide) or planning - methodology (use map-planning). -metadata: - author: azalio - version: 3.1.0 ---- - -# MAP CLI Quick Reference - -Fast lookup for commands, parameters, and common error corrections. - -**For comprehensive documentation**, see: -- [CLI_REFERENCE.json](../../../docs/CLI_REFERENCE.json) -- [CLI_COMMAND_REFERENCE.md](../../../docs/CLI_COMMAND_REFERENCE.md) - ---- - -## Quick Command Index - -### Validate Commands - -```bash -# Validate dependency graph -mapify validate graph task_plan.json -echo '{"subtasks":[...]}' | mapify validate graph - -# Visualize dependencies -mapify validate graph task_plan.json --visualize - -# Strict mode (fail on warnings) -mapify validate graph task_plan.json --strict -``` - -### Root Commands - -```bash -# Initialize project -mapify init my-project -mapify init . --mcp essential --force - -# System checks -mapify check -mapify check --debug - -# Upgrade agents -mapify upgrade -``` - ---- - -## Common Errors & Corrections - -### Error 1: Using Removed Commands - -**Issue**: `Error: No such command 'playbook'` or docs/examples mention `mapify playbook ...` - -**Solution**: -- The `playbook` command was removed in v4.0+ - ---- - -## Exit Codes (validate graph) - -- **0**: Valid graph (no critical errors) -- **1**: Invalid graph (critical errors or warnings with `--strict`) -- **2**: Malformed input (invalid JSON) - ---- - -## See Also - -**Related Skills**: -- [map-workflows-guide](../map-workflows-guide/SKILL.md) - -**Source Code**: -- `src/mapify_cli/__init__.py` - ---- - -## Examples - -### Example 1: Fixing a deprecated command error - -**User says:** "I'm getting `Error: No such command 'playbook'` when running mapify" - -**Actions:** -1. Identify error type — removed command usage -2. Explain: `playbook` command was removed in v4.0+ - -**Result:** User acknowledges the removed command. - -### Example 2: Validating a dependency graph - -**User says:** "How do I check if my task plan has circular dependencies?" - -**Actions:** -1. Show command: `mapify validate graph task_plan.json` -2. Explain exit codes: 0 = valid, 1 = invalid, 2 = malformed JSON -3. Suggest `--strict` flag for CI pipelines and `--visualize` for debugging - -**Result:** User validates their task plan and fixes dependency issues before running workflow. - ---- - -## Troubleshooting - -| Issue | Cause | Solution | -|-------|-------|----------| -| `No such command 'playbook'` | Removed in v4.0+ | Command no longer available | -| `No such option '--output'` | Wrong subcommand syntax | Check `mapify --help` for valid options | -| `validate graph` exit code 2 | Malformed JSON input | Validate JSON with `python -m json.tool < file.json` | -| `mapify init` overwrites files | Using `--force` flag | Omit `--force` to preserve existing configuration | - ---- - -**Version**: 1.1 -**Last Updated**: 2026-01-15 -**Lines**: ~200 (follows 500-line skill rule) diff --git a/.claude/skills/map-cli-reference/scripts/check-command.sh b/.claude/skills/map-cli-reference/scripts/check-command.sh deleted file mode 100755 index f7efaa49..00000000 --- a/.claude/skills/map-cli-reference/scripts/check-command.sh +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/env bash -# Check if a mapify subcommand exists and show usage help. -# -# Usage: -# ./check-command.sh [option] -# -# Examples: -# ./check-command.sh validate graph -# ./check-command.sh init -# ./check-command.sh playbook # removed command -# -# Exit codes: -# 0 - Command exists -# 1 - Command not found -# 2 - Command removed - -set -euo pipefail - -SUBCOMMAND="${1:-}" -OPTION="${2:-}" - -if [ -z "$SUBCOMMAND" ]; then - echo "Usage: $0 [option]" - echo "" - echo "Checks if a mapify subcommand exists." - echo "" - echo "Available subcommands:" - echo " init - Initialize project with MAP framework" - echo " check - Run system checks" - echo " upgrade - Upgrade agent templates" - echo " validate - Validate dependency graphs" - echo "" - echo "Removed subcommands:" - echo " playbook - Removed in v4.0+" - exit 1 -fi - -# Removed subcommands -REMOVED_COMMANDS="playbook" - -# Known valid commands -VALID_COMMANDS="init check upgrade validate" - -# Check removed commands first -for dep in $REMOVED_COMMANDS; do - if [ "$SUBCOMMAND" = "$dep" ]; then - echo "ERROR: '$SUBCOMMAND' was removed in v4.0+" - exit 2 - fi -done - -# Check valid commands -FOUND=0 -for cmd in $VALID_COMMANDS; do - if [ "$SUBCOMMAND" = "$cmd" ]; then - FOUND=1 - break - fi -done - -if [ "$FOUND" -eq 0 ]; then - echo "ERROR: No such command '$SUBCOMMAND'" - echo "" - echo "Available commands: $VALID_COMMANDS" - echo "" - echo "Did you mean one of these?" - # Simple fuzzy match - for cmd in $VALID_COMMANDS; do - echo " mapify $cmd" - done - exit 1 -fi - -# Command exists, show help -echo "OK: 'mapify $SUBCOMMAND' is a valid command" - -# Show subcommand-specific help -case "$SUBCOMMAND" in - validate) - echo "" - echo "Usage: mapify validate graph [--strict] [--visualize]" - echo "" - echo "Options:" - echo " --strict Fail on warnings (exit code 1)" - echo " --visualize Show dependency graph" - echo "" - echo "Exit codes: 0=valid, 1=invalid, 2=malformed input" - if [ -n "$OPTION" ] && [ "$OPTION" != "graph" ]; then - echo "" - echo "WARNING: Unknown validate subcommand '$OPTION'. Did you mean 'graph'?" - fi - ;; - init) - echo "" - echo "Usage: mapify init [project-name] [--mcp essential|full] [--force]" - echo "" - echo "Options:" - echo " --mcp essential Install essential MCP tools only" - echo " --mcp full Install all MCP tools" - echo " --force Overwrite existing configuration" - ;; - check) - echo "" - echo "Usage: mapify check [--debug]" - echo "" - echo "Options:" - echo " --debug Show detailed diagnostic information" - ;; - upgrade) - echo "" - echo "Usage: mapify upgrade" - echo "" - echo "Upgrades agent templates to latest version." - ;; -esac - -exit 0 diff --git a/.claude/skills/map-workflows-guide/SKILL.md b/.claude/skills/map-workflows-guide/SKILL.md deleted file mode 100644 index 2512c00c..00000000 --- a/.claude/skills/map-workflows-guide/SKILL.md +++ /dev/null @@ -1,529 +0,0 @@ ---- -name: map-workflows-guide -description: >- - Guide for choosing the right MAP workflow based on task type, risk level, - and token budget. Use when user asks "which workflow should I use", - "difference between map-fast and map-efficient", "when to use map-debug", - or compares MAP workflows. Do NOT use for actual workflow execution — - use /map-efficient, /map-fast, etc. instead. Do NOT use for CLI errors - (use map-cli-reference). -version: 1.0 -metadata: - author: azalio - version: 3.1.0 ---- - -# MAP Workflows Guide - -This skill helps you choose the optimal MAP workflow for your development tasks. MAP Framework provides **12 workflow commands**: 4 primary workflows (`/map-fast`, `/map-efficient`, `/map-debug`, `/map-debate`) and 8 supporting commands (`/map-review`, `/map-check`, `/map-plan`, `/map-task`, `/map-tdd`, `/map-release`, `/map-resume`, `/map-learn`). Each is optimized for different scenarios with varying token costs, learning capabilities, and quality gates. Two additional workflows (`/map-feature`, `/map-refactor`) are planned but not yet implemented. - -## Quick Decision Tree - -Answer these 5 questions to find your workflow: - -``` -1. Is this a small, low-risk change with clear acceptance criteria? - YES → Use /map-fast (40-50% tokens, no learning) - NO → Continue to question 2 - -2. Are you debugging/fixing a specific bug or test failure? - YES → Use /map-debug (70-80% tokens, focused analysis) - NO → Continue to question 3 - -3. Do stakeholders need documented reasoning and trade-off analysis? - YES → Use /map-debate (3x cost, Opus arbiter, explicit reasoning) - NO → Continue to question 4 - -4. Is this critical infrastructure or security-sensitive code? - YES → Use /map-efficient (60-70% tokens, recommended default) - NO → Continue to question 5 - -5. Is this a change you'll maintain long-term or that has non-trivial impact? - YES → Use /map-efficient (60-70% tokens, batched learning) ← RECOMMENDED - NO → If still low-risk and localized, /map-fast may be acceptable -``` - ---- - -## Workflow Comparison Matrix - -| Aspect | `/map-fast` | `/map-efficient` | `/map-debug` | `/map-debate` | -|--------|-----------|-----------------|-------------|--------------| -| **Token Cost** | 40-50% | **60-70%** | 70-80% | ~3x baseline | -| **Learning** | ❌ None | ✅ Via /map-learn | ✅ Per-subtask | ✅ Via /map-learn | -| **Quality Gates** | Basic | Essential | Focused | Multi-variant | -| **Impact Analysis** | ❌ Skipped | ⚠️ Conditional | ✅ Yes | ⚠️ Conditional | -| **Multi-Variant** | ❌ Never | ⚠️ Optional (--self-moa) | ❌ Never | ✅ Always (3 variants) | -| **Synthesis Model** | N/A | Sonnet | N/A | **Opus** | -| **Best For** | Low-risk | **Production** | Bugs | Reasoning transparency | -| **Recommendation** | Use sparingly | **DEFAULT** | Issues | Complex decisions | - -> **Note:** `/map-feature` and `/map-refactor` are **planned but not yet implemented**. -> Use `/map-efficient` for critical features and refactoring tasks. -> See [Planned Workflows](#planned-workflows) below for details. - ---- - -## Detailed Workflow Descriptions - -### 1. /map-fast — Low-Risk Changes ⚡ - -**Use this when:** -- Small, localized changes with minimal blast radius -- Minor fixes and tweaks where speed matters -- Low-risk maintenance work - -**What you get:** -- ✅ Full implementation (Actor generates code) -- ✅ Basic validation (Monitor checks correctness) -- ❌ NO quality scoring (Evaluator skipped) -- ❌ NO impact analysis (Predictor skipped entirely) -- ❌ NO learning (Reflector skipped) - -**Trade-offs:** -- Saves 50-60% tokens vs full pipeline (every agent per subtask) -- Knowledge never accumulates -- Minimal quality gates (only basic checks) - -**Example tasks:** -- "Fix a small validation edge case" -- "Update error message wording" -- "Add a small CLI option with tests" - -**Command syntax:** -```bash -/map-fast [task description] -``` - -**When to AVOID:** -- ❌ Security-critical logic -- ❌ Wide refactors or multi-module changes -- ❌ High uncertainty / unclear requirements - -**See also:** [resources/map-fast-deep-dive.md](resources/map-fast-deep-dive.md) - ---- - -### 2. /map-efficient — Production Features (RECOMMENDED) 🎯 - -**Use this when:** -- Building production features (moderate complexity) -- Most of your development work -- You want full learning but need token efficiency -- Standard feature implementation with familiar patterns - -**What you get:** -- ✅ Full implementation (Actor) -- ✅ Comprehensive validation (Monitor with feedback loops) -- ✅ Impact analysis (Predictor runs conditionally) -- ✅ Tests gate + Linter gate per subtask -- ✅ Final-Verifier (adversarial verification at end) -- ✅ **Learning via /map-learn** (Reflector, optional after workflow) - -**Optimization strategy:** -- **Conditional Predictor:** Runs only if risk detected (security, breaking changes) -- **Batched Learning:** Reflector runs ONCE after all subtasks complete -- **Result:** 35-40% token savings vs full pipeline while preserving learning -- **Same quality gates:** Monitor still validates each subtask - -**When Predictor runs:** -- Modifies authentication/security code -- Introduces breaking changes -- High complexity detected -- Multiple files affected - -**Example tasks:** -- "Implement user registration with email validation" -- "Add pagination to blog posts API" -- "Create dashboard analytics component" -- "Build shopping cart feature" - -**Command syntax:** -```bash -/map-efficient [task description] -``` - -**Quality guarantee:** -Despite token optimization, preserves: -- Per-subtask validation (Monitor always checks) -- Complete implementation feedback loops -- Full learning (batched, not skipped) - -**See also:** [resources/map-efficient-deep-dive.md](resources/map-efficient-deep-dive.md) - ---- - -### 3. /map-debug — Bug Fixes 🐛 - -**Use this when:** -- Fixing specific bugs or defects -- Resolving test failures -- Investigating runtime errors -- Performing root cause analysis -- Diagnosing unexpected behavior - -**What you get:** -- ✅ Focused implementation (Actor targets root cause) -- ✅ Validation (Monitor verifies fix) -- ✅ Root cause analysis -- ✅ Impact assessment (Predictor) -- ✅ Learning (Reflector) - -**Specialized features:** -- Error log analysis -- Stack trace interpretation -- Test failure diagnosis -- Regression prevention - -**Example tasks:** -- "Fix failing tests in auth.test.ts" -- "Debug TypeError in user service" -- "Resolve race condition in async code" -- "Fix memory leak in notification handler" - -**Command syntax:** -```bash -/map-debug [issue description or error message] -``` - -**Include in request:** -- Error message/stack trace -- When it occurs (specific scenario) -- What the expected behavior is -- Relevant log files if available - -**See also:** [resources/map-debug-deep-dive.md](resources/map-debug-deep-dive.md) - ---- - -### Planned Workflows - -The following workflows are **planned but not yet implemented**. Use `/map-efficient` as a substitute for both. - -#### /map-feature — Critical Features (PLANNED) - -Intended for security-critical and high-risk features requiring maximum validation (100% token cost, per-subtask learning, Predictor always runs). **Not yet implemented.** Use `/map-efficient` instead — it provides the same agent pipeline with conditional Predictor and batched learning. - -**Design reference:** [resources/map-feature-deep-dive.md](resources/map-feature-deep-dive.md) - -#### /map-refactor — Code Restructuring (PLANNED) - -Intended for refactoring with dependency-focused impact analysis and breaking change detection. **Not yet implemented.** Use `/map-efficient` instead — describe the refactoring intent in the task description for appropriate Predictor analysis. - -**Design reference:** [resources/map-refactor-deep-dive.md](resources/map-refactor-deep-dive.md) - ---- - -## Understanding MAP Agents - -MAP workflows orchestrate **11 specialized agents**, each with specific responsibilities: - -### Execution & Validation Agents - -**TaskDecomposer** — Breaks goal into subtasks -- Analyzes requirements -- Creates atomic, implementable subtasks -- Defines acceptance criteria for each -- Estimates complexity - -**Actor** — Writes code and implements -- Generates implementation -- Makes file changes -- Uses existing patterns from previous workflows - -**Monitor** — Validates correctness -- Checks implementation against criteria -- Runs tests to verify -- Identifies issues -- Feedback loop: Returns to Actor if invalid - -**Evaluator** — Quality gates -- Scores implementation quality (0-10) -- Checks completeness -- Approves/rejects solution -- Feedback loop: Returns to Actor if score < threshold -- **Only in /map-debug, /map-review** (skipped in /map-efficient, /map-fast, /map-debate) - -### Analysis Agents - -**Predictor** — Impact analysis -- Analyzes dependencies -- Predicts side effects -- Identifies risks and breaking changes -- **Conditional in /map-efficient** (runs if risk detected) -- **Always in /map-debug** (focused analysis) - -### Learning Agents - -**Reflector** — Pattern extraction -- Analyzes what worked and failed -- Extracts reusable patterns -- Prevents duplicate pattern extraction -- **Batched in /map-efficient** (runs once at end, via /map-learn) -- **Skipped in /map-fast** (no learning) - -### Optional Agent - -**Documentation-Reviewer** — Documentation validation -- Reviews completeness -- Checks consistency -- Validates examples -- Verifies external dependency docs current - -### Synthesis Agents - -**Debate-Arbiter** — Multi-variant cross-evaluation (MAP Debate) -- Cross-evaluates Actor variants with explicit reasoning -- Synthesizes optimal solution from multiple approaches -- Uses Opus model for reasoning transparency -- **Only in /map-debate workflow** - -**Synthesizer** — Solution synthesis -- Extracts decisions from multiple variants -- Generates unified code from best elements (Self-MoA) -- Merges insights across Actor outputs -- **Used in /map-efficient with --self-moa flag** - -### Discovery & Verification Agents - -**Research-Agent** — Codebase discovery -- Heavy codebase reading with compressed output -- Gathers context proactively before Actor implementation -- Prevents context pollution in implementation agents -- **Used in /map-plan, /map-efficient, /map-debug** - -**Final-Verifier** — Adversarial verification (Ralph Loop) -- Root cause analysis via adversarial testing -- Terminal verification after all other agents -- Ensures no regressions or overlooked issues -- **Used in /map-check, /map-efficient** - ---- - -## Decision Flowchart - -``` -START: What type of development task? -│ -├─────────────────────────────────────┐ -│ Small, low-risk change? │ -│ (Localized, clear acceptance) │ -├─────────────────────────────────────┘ -│ YES → /map-fast (40-50% tokens, no learning) -│ -│ NO ↓ -│ -├─────────────────────────────────────┐ -│ Debugging/fixing a specific issue? │ -│ (Bug, test failure, error) │ -├─────────────────────────────────────┘ -│ YES → /map-debug (70-80% tokens, focused analysis) -│ -│ NO ↓ -│ -└─────────────────────────────────────┐ - Everything else (features, │ - refactoring, critical code) ←──────┘ - → /map-efficient (60-70% tokens, RECOMMENDED) -``` - ---- - -## Common Questions - -**Q: Which workflow should I use by default?** - -A: **`/map-efficient`** for 80% of tasks. -- Best balance of quality and token efficiency -- Full learning preserved (just batched) -- Suitable for all production code -- Default recommendation for feature development - -**Q: When is /map-fast actually acceptable?** - -A: When the change is small and low-risk: -- Localized fixes with minimal blast radius -- Small UI/text tweaks -- Minor maintenance changes - -Avoid /map-fast for: -- Security or critical infrastructure -- Broad refactors or multi-module changes -- High uncertainty requirements - -**Q: What about /map-feature and /map-refactor?** - -A: These are **planned but not yet implemented**. Use `/map-efficient` for all feature development and refactoring tasks. `/map-efficient` provides the full agent pipeline (Actor, Monitor, conditional Predictor, Tests/Linter gates, Final-Verifier) with optional learning via `/map-learn`. Describe the risk level and refactoring intent in your task description for appropriate Predictor analysis. - -**Q: Can I switch workflows mid-task?** - -A: No, each workflow is a complete pipeline. If you started with wrong workflow: -1. Complete current workflow -2. Start new workflow with correct one -3. Re-implement if needed - -**Q: How do I know if Predictor actually ran in /map-efficient?** - -A: Check agent output for indicators: -``` -✅ Predictor: [Risk detected - Full analysis] -⏭️ Predictor: [Skipped - Low risk item] -``` - -Predictor runs if: -- Subtask touches authentication/security code -- Breaking changes detected -- High complexity estimated -- Multiple files affected - ---- - -## Resources & Deep Dives - -For detailed information on each workflow: - -- **[map-fast Deep Dive](resources/map-fast-deep-dive.md)** — Token breakdown, skip conditions, risks -- **[map-efficient Deep Dive](resources/map-efficient-deep-dive.md)** — Optimization strategy, Predictor conditions, batching -- **[map-debug Deep Dive](resources/map-debug-deep-dive.md)** — Debugging strategies, error analysis, best practices -- **[map-feature Deep Dive](resources/map-feature-deep-dive.md)** — Design reference (PLANNED, not yet implemented) -- **[map-refactor Deep Dive](resources/map-refactor-deep-dive.md)** — Design reference (PLANNED, not yet implemented) - -Agent & system details: - -- **[Agent Architecture](resources/agent-architecture.md)** — How agents orchestrate and coordinate - ---- - -## Real-World Examples - -### Example 1: Choosing /map-efficient for a critical feature - -**Task:** "Add OAuth2 authentication" - -**Analysis:** -- Affects security (high-risk indicator) -- Affects multiple modules (breaking changes possible) -- First implementation of OAuth2 (high complexity) - -**Decision:** `/map-efficient` — describe the security-sensitive nature in the task description. Predictor will trigger conditionally on security-related subtasks. - -### Example 2: Choosing /map-debug - -**Task:** "Tests failing in checkout flow" - -**Analysis:** -- Specific issue (test failures) ✓ -- Not new feature (debugging) -- Needs root cause analysis ✓ - -**Decision:** `/map-debug` (focused on diagnosing failures) - -### Example 3: Choosing /map-efficient - -**Task:** "Add user profile page" - -**Analysis:** -- Standard production feature ✓ -- Moderate complexity (not first-time) ✓ -- No security implications -- No breaking changes - -**Decision:** `/map-efficient` (recommended default) - ---- - -## Integration with Auto-Activation - -This skill integrates with MAP's auto-activation system to suggest workflows: - -**Natural language request:** -``` -User: "Implement user registration" -MAP: 🎯 Suggests /map-efficient -``` - -**Questions from MAP:** -``` -MAP: "Is this for production?" -User: "Yes, but critical feature" -MAP: 🎯 Suggests /map-efficient with --self-moa instead -``` - -**Direct command:** -``` -User: "/map-efficient add pagination to blog API" -MAP: 📚 Loads this skill for context -``` - ---- - -## Tips for Effective Workflow Selection - -1. **Default to /map-efficient** — It's the recommended choice for 80% of tasks -2. **Use /map-fast sparingly** — Only for small, low-risk changes with clear scope -3. **Use /map-efficient for critical paths** — Describe risk context in the task description for appropriate Predictor triggers -4. **Trust the optimization** — /map-efficient preserves quality while cutting token usage -5. **Review deep dives** — When in doubt, check the appropriate deep-dive resource - ---- - -## Next Steps - -1. **First time using MAP?** Start with `/map-efficient` -2. **Have a critical feature?** Use `/map-efficient` with risk context in the task description -3. **Debugging an issue?** See [map-debug-deep-dive.md](resources/map-debug-deep-dive.md) -4. **Understanding agents?** See [Agent Architecture](resources/agent-architecture.md) ---- - -## Examples - -### Example 1: Choosing a workflow for a new feature - -**User says:** "I need to add JWT authentication to the API" - -**Actions:** -1. Assess risk level — security-sensitive (high-risk indicator) -2. Check if first implementation — yes, OAuth/JWT is new -3. Multiple modules affected — auth middleware, user service, token storage - -**Result:** Recommend `/map-efficient` — describe the security context in the task. Predictor will trigger on security-sensitive subtasks. Batched learning captures patterns at the end. - -### Example 2: Quick fix with clear scope - -**User says:** "Update the error message in the login form" - -**Actions:** -1. Assess risk — low, localized text change -2. Check blast radius — single file, no dependencies -3. No security implications - -**Result:** Recommend `/map-fast` — small, low-risk change with clear acceptance criteria. No learning needed. - -### Example 3: Debugging a test failure - -**User says:** "Tests in auth.test.ts are failing after the last merge" - -**Actions:** -1. Identify task type — debugging/fixing specific issue -2. Need root cause analysis — yes, regression after merge -3. Not a new feature or refactor - -**Result:** Recommend `/map-debug` — focused on diagnosing failures with root cause analysis and regression prevention. - ---- - -## Troubleshooting - -| Issue | Cause | Solution | -|-------|-------|----------| -| Wrong workflow chosen mid-task | Cannot switch workflows during execution | Complete current workflow, then restart with correct one | -| Predictor never runs in /map-efficient | Subtasks assessed as low-risk | Expected behavior; Predictor is conditional. Use /map-debug for guaranteed analysis | -| No patterns stored after /map-fast | /map-fast skips learning agents | By design — use /map-efficient + /map-learn for pattern accumulation | -| Skill suggests wrong workflow | Description trigger mismatch | Check skill-rules.json triggers; refine query wording | - ---- - -**Skill Version:** 1.0 -**Last Updated:** 2025-11-03 -**Recommended Reading Time:** 5-10 minutes -**Deep Dive Reading Time:** 15-20 minutes per resource diff --git a/.claude/skills/map-workflows-guide/resources/agent-architecture.md b/.claude/skills/map-workflows-guide/resources/agent-architecture.md deleted file mode 100644 index 1b8b212f..00000000 --- a/.claude/skills/map-workflows-guide/resources/agent-architecture.md +++ /dev/null @@ -1,266 +0,0 @@ -# Agent Architecture - -MAP Framework orchestrates 11 specialized agents in a coordinated workflow. - -## Agent Categories - -### Execution & Validation (Core Pipeline) - -**1. TaskDecomposer** -- **Role:** Breaks complex goals into atomic subtasks -- **Input:** User's high-level request -- **Output:** JSON with subtasks, dependencies, acceptance criteria -- **When it runs:** First step in every workflow - -**2. Actor** -- **Role:** Implements code changes -- **Input:** Subtask description, acceptance criteria -- **Output:** Code changes, rationale, test strategy -- **When it runs:** For each subtask (multiple times if revisions needed) - -**3. Monitor** -- **Role:** Validates correctness and standards compliance -- **Input:** Actor's implementation -- **Output:** Pass/fail verdict with specific issues -- **When it runs:** After every Actor output -- **Feedback loop:** Returns to Actor if validation fails (max 3-5 iterations) - -**4. Evaluator** -- **Role:** Quality scoring and final approval -- **Input:** Actor + Monitor results -- **Output:** Quality score (0-10), approve/reject decision -- **When it runs:** /map-debug, /map-review -- **Skipped in:** /map-efficient, /map-fast (Monitor provides sufficient validation) - -### Analysis - -**5. Predictor** -- **Role:** Impact analysis and dependency tracking -- **Input:** Planned changes -- **Output:** Affected files, breaking changes, risk assessment -- **When it runs:** - - /map-efficient: Conditional (only if Monitor flags high risk) - - /map-debug: Always (focused analysis) - - /map-debate: Conditional (same as /map-efficient) - - /map-fast: Never (skipped) - -### Learning - -**6. Reflector** -- **Role:** Extracts patterns and lessons learned -- **Input:** All agent outputs for subtask(s) -- **Output:** Insights, patterns discovered, pattern updates -- **When it runs:** - - /map-efficient, /map-debug, /map-debate: Batched (once at end, via /map-learn) - - /map-fast: Never (skipped) - -### Optional - -**8. Documentation-Reviewer** -- **Role:** Validates documentation completeness -- **Input:** Documentation files -- **Output:** Completeness assessment, dependency analysis -- **When it runs:** On-demand (not part of standard workflows) - -### Synthesis - -**9. Debate-Arbiter** -- **Role:** Cross-evaluates Actor variants with explicit reasoning -- **Input:** Multiple Actor outputs (variants) -- **Output:** Synthesized optimal solution with reasoning trace -- **When it runs:** /map-debate (per subtask, uses Opus model) - -**10. Synthesizer** -- **Role:** Extracts decisions from variants and generates unified code (Self-MoA) -- **Input:** Multiple Actor outputs -- **Output:** Merged implementation combining best elements -- **When it runs:** /map-efficient with --self-moa flag - -### Discovery & Verification - -**11. Research-Agent** -- **Role:** Heavy codebase reading with compressed output -- **Input:** Research question or exploration goal -- **Output:** Compressed context for implementation agents -- **When it runs:** /map-plan, /map-efficient, /map-debug (before Actor) - -**12. Final-Verifier** -- **Role:** Adversarial verification with Root Cause Analysis (Ralph Loop) -- **Input:** Complete implementation after all other agents -- **Output:** Verification verdict, regression analysis -- **When it runs:** /map-check, /map-efficient (terminal verification) - ---- - -## Orchestration Patterns - -### Linear Pipeline (map-fast) - -``` -TaskDecomposer → Actor → Monitor → Apply → Done -(No Evaluator, no Predictor, no learning) -``` - -### Conditional Pipeline (map-efficient) - -``` -TaskDecomposer - ↓ - For each subtask: - Actor → Monitor → [Predictor if high risk] → Tests → Linter → Apply - ↓ - Final-Verifier (adversarial verification of entire goal) - ↓ - Done! Optional: /map-learn → Reflector -``` - -### Multi-Variant Pipeline (map-debate) - -``` -TaskDecomposer - ↓ - For each subtask: - Actor×3 → Monitor×3 → debate-arbiter (Opus) - ↓ synthesized - Monitor → [Predictor if high risk] → Apply changes - ↓ - Batch learning (via /map-learn): - Reflector (all subtasks) → Done -``` - ---- - -## Feedback Loops - -### Actor ← Monitor Loop - -``` -Actor creates code - ↓ -Monitor validates - ↓ -Issues found? → YES → Feedback to Actor (iterate, max 3-5 times) - ↓ NO -Continue pipeline -``` - -### Actor ← Evaluator Loop - -``` -Monitor approved - ↓ -Evaluator scores quality - ↓ -Score < threshold? → YES → Feedback to Actor (revise) - ↓ NO -Proceed to next stage -``` - ---- - -## Conditional Execution Logic - -### Predictor Conditions (map-efficient) - -Predictor runs if ANY of: -- Subtask modifies critical files (`auth/**`, `database/**`, `api/**`) -- Breaking API changes detected by Monitor -- High complexity score (≥8) from TaskDecomposer -- Multiple file modifications (>3 files) - -Otherwise: Skipped (token savings) - ---- - -## State Management - -### Per-Subtask State -- Actor output -- Monitor verdict -- Predictor analysis (if ran) -- Evaluator score (if ran) - -### Workflow State -- All subtask results -- Aggregated patterns (Reflector) - ---- - -## Communication Protocol - -Agents communicate via structured JSON: - -```json -{ - "agent": "Actor", - "subtask_id": "ST-001", - "output": { - "approach": "...", - "code_changes": [...], - "trade_offs": [...], - "used_bullets": [...] - } -} -``` - ---- - -## Error Handling - -### Actor Failures -- Monitor provides specific feedback -- Actor iterates (max 3-5 attempts) -- If still failing: Mark subtask as failed, continue with others - -### Learning Failures -- Reflector gracefully degrades -- Learning skipped but implementation continues -- Logged to stderr for debugging - ---- - -## Performance Optimization - -### Token Usage by Agent - -| Agent | Avg Tokens | Frequency | Workflow Impact | -|-------|------------|-----------|-----------------| -| TaskDecomposer | ~1.5K | Once | All workflows | -| Actor | ~2-3K | Per subtask | All workflows | -| Monitor | ~1K | Per Actor output | All workflows | -| Evaluator | ~0.8K | Per subtask | map-debug, map-review | -| Predictor | ~1.5K | Per subtask or conditional | Varies | -| Reflector | ~2K | Per subtask or batched | Varies | -| Debate-Arbiter | ~3-4K | Per subtask | map-debate only | -| Synthesizer | ~2K | Per subtask | map-efficient (--self-moa) | -| Research-Agent | ~2-3K | Once (before Actor) | map-plan, map-efficient, map-debug | -| Final-Verifier | ~2K | Once (terminal) | map-check, map-efficient | - -**map-efficient savings:** -- Skip Evaluator: ~0.8K per subtask -- Conditional Predictor: ~1.5K per low-risk subtask -- Batch Reflector: ~(N-1) × 2K for N subtasks - ---- - -## Extension Points - -### Adding New Agents - -To add a custom agent: -1. Create `.claude/agents/my-agent.md` with prompt template -2. Add to workflow command (e.g., `.claude/commands/map-efficient.md`) -3. Define when it runs (before/after which agents) -4. Specify input/output format - -### Custom Workflows - -Create `.claude/commands/map-custom.md`: -- Define agent sequence -- Specify conditional logic -- Document token cost and use cases - ---- - -**See also:** -- [map-efficient Deep Dive](map-efficient-deep-dive.md) - Conditional execution example diff --git a/.claude/skills/map-workflows-guide/resources/map-debug-deep-dive.md b/.claude/skills/map-workflows-guide/resources/map-debug-deep-dive.md deleted file mode 100644 index 959e1881..00000000 --- a/.claude/skills/map-workflows-guide/resources/map-debug-deep-dive.md +++ /dev/null @@ -1,258 +0,0 @@ -# /map-debug Deep Dive - -## When to Use - -**Bug fixes and error investigation:** -- Fixing failing tests -- Resolving runtime errors -- Investigating unexpected behavior -- Root cause analysis -- Performance debugging - -**Why /map-debug?** -- Focused on error analysis -- Root cause identification -- Pattern recognition for similar bugs - ---- - -## Debugging Workflow - -### Standard Pipeline - -``` -1. TaskDecomposer: Break down debugging into steps - - Reproduce the issue - - Identify root cause - - Implement fix - - Verify resolution - - Add regression tests - -2. For each subtask: - - Actor implements (with error context) - - Monitor validates (tests must pass) - - Predictor analyzes (impact of fix) - - Evaluator scores (completeness) - -3. Reflector extracts lessons: - - What caused the bug? - - How was it fixed? - - How to prevent similar bugs? - - Reflector also documents: - - Debugging techniques used - - Common pitfalls - - Prevention strategies -``` - ---- - -## Error Analysis Strategies - -### 1. Stack Trace Interpretation - -**Actor receives:** -``` -Error: TypeError: Cannot read property 'name' of undefined - at UserService.getDisplayName (user.service.ts:42) - at ProfileController.show (profile.controller.ts:18) -``` - -**Analysis:** -- Line 42: `user.name` but `user` is undefined -- Line 18: Called without null check -- Root cause: Missing user validation - -**Fix:** -```typescript -// Before -getDisplayName(user) { - return user.name; -} - -// After -getDisplayName(user) { - if (!user) { - throw new Error("User not found"); - } - return user.name; -} -``` - -### 2. Test Failure Diagnosis - -**Failed test:** -``` -Expected: 200 OK -Received: 404 Not Found -``` - -**Actor investigates:** -1. Check route configuration -2. Verify request format -3. Debug middleware chain -4. Check database state - -**Findings:** -- Route expects `/users/:id` (number) -- Test sends `/users/abc` (string) -- No type validation middleware - -**Fix:** Add parameter validation - ---- - -## Example: Debugging Race Condition - -**Task:** "Fix intermittent test failures in async code" - -**Decomposition:** -``` -ST-1: Reproduce the race condition reliably -ST-2: Identify critical section -ST-3: Implement synchronization -ST-4: Verify fix under load -ST-5: Add regression tests -``` - -**Execution:** - -``` -ST-1: Reproduce reliably -├─ Actor: Add test that fails consistently -│ └─ Strategy: Increase concurrency, reduce delays -├─ Monitor: ✅ Test fails reliably (good!) -└─ Predictor: Low risk (test code) - -ST-2: Identify critical section -├─ Actor: Add logging, trace execution order -│ └─ Finding: Two async operations modify same state -├─ Monitor: ✅ Issue identified -└─ Predictor: Medium risk (affects core logic) - -ST-3: Implement synchronization -├─ Actor: Add mutex/lock to critical section -├─ Monitor: ✅ Valid (tests pass) -├─ Predictor: ✅ RAN (affects async behavior) -│ └─ Impact: May reduce throughput -└─ Evaluator: ✅ Approved (score: 8/10) - -ST-4: Verify under load -├─ Actor: Run stress test (1000x concurrency) -├─ Monitor: ✅ All tests pass -└─ Evaluator: ✅ Approved - -ST-5: Regression tests -├─ Actor: Add concurrent test to test suite -├─ Monitor: ✅ Tests pass -└─ Evaluator: ✅ Approved - -Reflector: -├─ Pattern: "Race conditions in async state updates" -├─ Solution: "Use mutex for critical sections" -└─ Prevention: "Design for immutability" - -``` - ---- - -## Root Cause Analysis - -### 5 Whys Technique - -**Problem:** "Users can't log in" - -``` -Why 1: Login fails with 500 error - → Database query failing - -Why 2: Database query failing - → Connection pool exhausted - -Why 3: Connection pool exhausted - → Connections not being released - -Why 4: Connections not being released - → Missing finally block in async function - -Why 5: Missing finally block - → Copy-pasted code from old example - -Root cause: Improper async error handling -``` - -**Fix:** Add proper resource cleanup - ---- - -## Debugging Patterns Learned - -### Common Bug Categories - -**1. Null/Undefined Errors** -- Pattern: Missing validation -- Fix: Add null checks, use optional chaining -- Prevention: TypeScript strict mode - -**2. Async/Await Issues** -- Pattern: Unhandled promise rejections -- Fix: Add try/catch, .catch() handlers -- Prevention: ESLint rules for promises - -**3. State Management Bugs** -- Pattern: Race conditions, stale closures -- Fix: Immutable updates, proper locking -- Prevention: Use state management libraries - -**4. Off-by-One Errors** -- Pattern: Array indexing, loop bounds -- Fix: Use array methods (map, filter) -- Prevention: Code review, unit tests - ---- - -## Prevention Strategies - -**After debugging, Reflector asks:** -1. How could this bug have been prevented? -2. What test was missing? -3. What pattern should we follow? - -**Reflector documents:** -```json -{ - "type": "TESTING_STRATEGY", - "content": "Add integration test for async operations", - "code_example": "test('handles concurrent requests', async () => { ... })", - "tags": ["async", "testing", "race-conditions"] -} -``` - ---- - -## Troubleshooting the Debugger - -**Issue:** Can't reproduce bug consistently -**Solution:** -- Add extensive logging -- Use debugger breakpoints -- Increase test iterations -- Test in production-like environment - -**Issue:** Root cause unclear -**Solution:** -- Simplify reproduction case -- Remove variables one by one -- Use git bisect to find regression commit - -**Issue:** Fix introduces new bugs -**Solution:** -- Predictor should catch this -- Run full test suite -- Check Predictor impact analysis - ---- - -**See also:** -- [map-efficient-deep-dive.md](map-efficient-deep-dive.md) - For implementing fixes -- [agent-architecture.md](agent-architecture.md) - Predictor's impact analysis diff --git a/.claude/skills/map-workflows-guide/resources/map-efficient-deep-dive.md b/.claude/skills/map-workflows-guide/resources/map-efficient-deep-dive.md deleted file mode 100644 index 6a2afb01..00000000 --- a/.claude/skills/map-workflows-guide/resources/map-efficient-deep-dive.md +++ /dev/null @@ -1,202 +0,0 @@ -# /map-efficient Deep Dive - -## Optimization Strategy - -### Predictor: Conditional Execution - -**Logic:** -```python -def should_run_predictor(subtask): - # Run if ANY condition true: - return ( - subtask.complexity == "high" or - subtask.modifies_critical_files() or - subtask.has_breaking_changes() or - subtask.affects_dependencies() - ) -``` - -**Critical files patterns:** -- `**/auth/**` - Authentication -- `**/database/**` - Schema changes -- `**/api/**` - Public API -- `**/*.proto` - Service contracts - -**Example:** -``` -Subtask 1: Add validation helper (utils/validation.ts) -→ Predictor: ⏭️ SKIPPED (low risk, no dependencies) - -Subtask 2: Update auth middleware (auth/middleware.ts) -→ Predictor: ✅ RAN (critical file detected) - -Subtask 3: Add unit tests (tests/auth.test.ts) -→ Predictor: ⏭️ SKIPPED (test file, no side effects) -``` - -### Reflector: Batched Learning - -**Full pipeline (theoretical baseline):** -``` -Subtask 1 → Actor → Monitor → Predictor → Evaluator → Reflector -Subtask 2 → Actor → Monitor → Predictor → Evaluator → Reflector -Subtask 3 → Actor → Monitor → Predictor → Evaluator → Reflector -``` -Result: 3 × (Predictor + Evaluator + Reflector) cycles - -**Optimized workflow (/map-efficient):** -``` -Subtask 1 → Actor → Monitor → [Predictor if high risk] → Apply -Subtask 2 → Actor → Monitor → [Predictor if high risk] → Apply -Subtask 3 → Actor → Monitor → [Predictor if high risk] → Apply - ↓ - Final-Verifier (adversarial verification) - ↓ - Done! Optionally run /map-learn: - Reflector (analyzes ALL subtasks, consolidates patterns) -``` -Result: No Evaluator, no per-subtask Reflector. Learning decoupled to /map-learn. - -**Token savings:** 35-40% vs full pipeline - ---- - -## When to Use /map-efficient - -✅ **Use for:** -- Production features (moderate complexity) -- API endpoints -- UI components -- Database queries -- Business logic -- Most development work (80% of tasks) - -❌ **Don't use for:** -- Critical infrastructure (use /map-efficient with --self-moa or /map-debate) -- Small, low-risk changes (use /map-fast) -- Simple bug fixes (use /map-debug) - ---- - -## Quality Preservation - -**Myth:** "Optimized workflows sacrifice quality" - -**Reality:** /map-efficient preserves essential quality gates: -- ✅ Monitor validates every subtask (correctness gate) -- ✅ Predictor runs when needed (conditional impact analysis) -- ✅ Tests gate and linter gate run per subtask -- ✅ Final-Verifier checks entire goal at end (adversarial verification) -- ✅ Learning available via /map-learn after workflow completes - -**What's optimized (intentionally omitted per-subtask):** -- Evaluator — Monitor validates correctness directly -- Reflector — decoupled to /map-learn (optional, run after workflow) - ---- - -## Example Walkthrough - -**Task:** "Implement blog post pagination API" - -**Decomposition:** -- ST-1: Add pagination params to GET /posts endpoint -- ST-2: Update PostService to support offset/limit -- ST-3: Add integration tests - -**Execution trace:** - -``` -TaskDecomposer: -├─ ST-1: Add pagination params (complexity: low) -├─ ST-2: Update service (complexity: medium, affects API) -└─ ST-3: Add tests (complexity: low) - -ST-1: Pagination params -├─ Actor: Modify routes/posts.ts -├─ Monitor: ✅ Valid -├─ Predictor: ⏭️ SKIPPED (low risk) -├─ Tests gate: ✅ Passed -└─ Linter gate: ✅ Passed - -ST-2: Service update -├─ Actor: Modify services/PostService.ts -├─ Monitor: ✅ Valid -├─ Predictor: ✅ RAN (affects API contract) -│ └─ Impact: Breaking change if clients expect all posts -├─ Tests gate: ✅ Passed -└─ Note: "Add API versioning or deprecation notice" - -ST-3: Integration tests -├─ Actor: Add tests/posts.integration.test.ts -├─ Monitor: ✅ Valid (tests pass) -├─ Predictor: ⏭️ SKIPPED (test file) -├─ Tests gate: ✅ Passed -└─ Linter gate: ✅ Passed - -Final-Verifier: ✅ All subtasks verified, goal achieved - -Optional /map-learn: - Reflector (batched): - ├─ Analyzed: 3 subtasks - ├─ Found similar pagination patterns - ├─ Extracted: pagination pattern, API versioning, test structure - └─ Consolidated: 1 new pattern (API pagination), 1 updated (test coverage++) -``` - -**Token usage:** -- Full pipeline: ~12k tokens -- /map-efficient: ~7.5k tokens -- **Savings: 37.5%** - -**Quality: Identical** -- All validations passed -- Breaking change detected -- Tests written -- Patterns learned - ---- - -## Configuration - -Edit `.claude/commands/map-efficient.md` to customize: - -**Predictor conditions:** -```python -# Add custom critical paths -CRITICAL_PATHS = [ - "auth/**", - "database/**", - "api/**", - "config/**", # Your addition -] -``` - -**Batch size:** -```python -# Default: Batch all subtasks -# Override: Batch every N subtasks -BATCH_SIZE = None # or 5 for large tasks -``` - ---- - -## Troubleshooting - -**Issue:** Predictor always skips -**Cause:** No critical file patterns matched -**Fix:** Review `subtask.modifies_critical_files()` logic - -**Issue:** Learning not happening -**Cause:** Reflector not running -**Fix:** Check workflow completion (must finish all subtasks) - -**Issue:** Token usage higher than expected -**Cause:** Predictor running too often -**Fix:** Review risk detection conditions - ---- - -**See also:** -- [map-feature-deep-dive.md](map-feature-deep-dive.md) - Full validation approach -- [agent-architecture.md](agent-architecture.md) - How agents orchestrate diff --git a/.claude/skills/map-workflows-guide/resources/map-fast-deep-dive.md b/.claude/skills/map-workflows-guide/resources/map-fast-deep-dive.md deleted file mode 100644 index dce9fc9e..00000000 --- a/.claude/skills/map-workflows-guide/resources/map-fast-deep-dive.md +++ /dev/null @@ -1,234 +0,0 @@ -# /map-fast Deep Dive - -## When to Use (and When NOT to Use) - -### ✅ Acceptable Use Cases - -**ONLY for small, low-risk changes:** -- Localized bug fixes with clear acceptance criteria -- Small UI/text tweaks -- Narrow refactors confined to a single module/file -- Maintenance changes where impact is easy to validate - -### ❌ NEVER Use For - -**High-risk code paths:** -- Features that will be maintained -- Critical infrastructure -- Security-sensitive functionality -- Code that others will build on - -**Why?** No learning means: -- Patterns not captured → team doesn't learn -- Knowledge base not updated → knowledge lost -- Patterns not synced → other projects don't benefit -- Technical debt accumulates - ---- - -## What Gets Skipped - -### Agents NOT Called - -**Evaluator (Quality Scoring)** -- No quality scoring (0-10 scale) -- No approval/rejection gate -- Monitor handles basic correctness only - -**Predictor (Impact Analysis)** -- No dependency analysis -- Breaking changes undetected -- Side effects not predicted - -**Reflector (Pattern Extraction)** -- Successful patterns not captured -- Failures not documented -- Knowledge not extracted - -### What Remains - -**Actor + Monitor:** -- Basic implementation ✅ -- Correctness validation ✅ - -**Result:** Functional code, but zero learning and no quality scoring - ---- - -## Token Savings Breakdown - -| Agent | Tokens | Status | -|-------|--------|--------| -| TaskDecomposer | ~1.5K | ✅ Runs | -| Actor | ~2-3K | ✅ Runs | -| Monitor | ~1K | ✅ Runs | -| Evaluator | ~0.8K | ❌ Skipped | -| Predictor | ~1.5K | ❌ Skipped | -| Reflector | ~2K | ❌ Skipped | - -**Total saved:** ~5.8K per subtask -**Percentage:** 40-50% vs full pipeline - ---- - -## Example: When map-fast Makes Sense - -**Scenario:** "Fix a nil/None check in a request handler" - -**Why map-fast is acceptable:** -``` -Goal: Small, localized fix -Timeline: Short -Outcome: Production-quality fix with tests -Next step: If scope grows, switch to /map-efficient -``` - -**Execution:** -``` -TaskDecomposer: 2 subtasks -ST-1: Setup React Query client - Actor → Monitor → Apply -ST-2: Test with one API endpoint - Actor → Monitor → Apply -Done. No Evaluator, no Reflector, no patterns learned. -``` - -**Appropriate because:** -- Low blast radius -- Easy to verify with targeted tests -- Requirements are clear - ---- - -## Example: When map-fast is WRONG - -**Scenario:** "Implement user authentication" - -**Why map-fast is wrong:** -``` -Goal: Production authentication (critical!) -Timeline: Doesn't matter -Outcome: Must be secure, maintainable -Risk: High (security, breaking changes) -``` - -**Problems with using map-fast:** -1. No Predictor → Breaking changes undetected -2. No Reflector → Security patterns not learned -3. No learning → Team doesn't learn from mistakes -4. High risk for under-validation mindset - -**Correct choice:** `/map-efficient` (critical infrastructure) - ---- - -## Common Pitfalls - -### Pitfall 1: "I'll make it quick, then refactor" - -**Problem:** Refactoring rarely happens -**Reality:** Technical debt accumulates -**Solution:** Use /map-efficient from the start - -### Pitfall 2: "This is just a quick change" - -**Problem:** Under-validated changes become long-lived -**Reality:** "Quick" changes often stick around -**Solution:** Default to production-quality standards - -### Pitfall 3: "I don't need learning for simple tasks" - -**Problem:** Simple patterns are most valuable -**Reality:** Basic patterns repeated most often -**Solution:** Use /map-efficient (batched learning, minimal overhead) - ---- - -## Decision Flowchart - -``` -Is the change small and low-risk? -│ -├─ YES → /map-fast acceptable -│ Examples: -│ - Localized bug fix with existing tests -│ - Small UI tweak -│ - Narrow refactor within a single file -│ -└─ NO, or uncertain → Use /map-efficient instead - Why? - - Same speed (only ~10% slower) - - Full learning preserved - - Better safe than sorry -``` - ---- - -## When Scope Grows - -If a task starts small but grows in scope or risk, switch to `/map-efficient` for the remainder. - -Why? -- Impact analysis (conditional Predictor) -- Learning preserved -- Stronger guardrails for multi-file work - ---- - -## Alternatives to Consider - -### Instead of /map-fast, consider: - -**1. /map-efficient (recommended)** -- Only ~10-15% slower than /map-fast -- Full learning preserved -- Suitable for production - -**2. Manual implementation** -- No agents at all -- Faster for tiny tasks (<50 lines) -- Use when MAP overhead doesn't make sense - -**3. /map-efficient or /map-debate** -- For high-risk changes -- Security or infrastructure work - ---- - -## Best Practices - -### When using /map-fast: - -1. **Document reduced analysis** - Note that /map-fast was used and why -2. **Run tests** - Ensure relevant unit/integration tests pass -3. **Keep changes small** - Avoid scope creep; switch workflows if needed -4. **Review critical paths** - Error handling, input validation, and security - -### General guidance: - -**Ask yourself:** -- Will anyone build on this code? → Don't use /map-fast -- Is this security-related? → Don't use /map-fast -- Will this integrate with production? → Don't use /map-fast -- Am I uncertain about rewrites? → Don't use /map-fast - -**If all answers are "No" → /map-fast is acceptable** - ---- - -## Troubleshooting - -**Issue:** Team keeps using /map-fast for production -**Solution:** Code review policy: Reject PRs with /map-fast code - -**Issue:** Low-analysis workflow used for risky changes -**Solution:** Team policy: use /map-efficient for anything beyond low-risk/localized - -**Issue:** No learning happening on the project -**Solution:** Audit workflow usage, reduce /map-fast usage to <5% - ---- - -**See also:** -- [map-efficient-deep-dive.md](map-efficient-deep-dive.md) - Better alternative for most tasks -- [map-feature-deep-dive.md](map-feature-deep-dive.md) - For critical features diff --git a/.claude/skills/map-workflows-guide/resources/map-feature-deep-dive.md b/.claude/skills/map-workflows-guide/resources/map-feature-deep-dive.md deleted file mode 100644 index 9b84fdf0..00000000 --- a/.claude/skills/map-workflows-guide/resources/map-feature-deep-dive.md +++ /dev/null @@ -1,235 +0,0 @@ -# /map-feature Deep Dive - -> **STATUS: PLANNED — NOT YET IMPLEMENTED.** -> This workflow is designed but not yet available as a command. -> Use `/map-efficient` for all feature development, including critical/high-risk features. - -## When to Use - -**Critical features requiring maximum confidence:** -- Authentication and authorization systems -- Payment processing -- Database schema migrations -- Security-sensitive functionality -- First-time complex features -- High-risk refactoring - -**Why /map-feature?** -- Maximum validation (all agents, all subtasks) -- Per-subtask learning (immediate feedback) -- Full impact analysis (Predictor always runs) -- Highest quality assurance - ---- - -## Full Pipeline - -### Per-Subtask Cycle - -``` -For each subtask: - 1. Actor implements - 2. Monitor validates - 3. Predictor analyzes impact (ALWAYS) - 4. Evaluator scores quality - 5. If approved: - 5a. Reflector extracts patterns - 5b. Apply changes - 6. If not approved: Return to Actor -``` - -**Key difference from /map-efficient:** -- Predictor runs EVERY subtask (not conditional) -- Reflector runs AFTER EVERY subtask (not batched) - ---- - -## Per-Subtask Learning Rationale - -### Why Learn Per-Subtask? - -**Immediate feedback loop:** -``` -Subtask 1: Implement JWT generation - ↓ completed -Reflector: "JWT secret storage pattern" - ↓ pattern extracted -Subtask 2: Implement JWT validation - ↓ starts -Actor uses learned pattern - ↓ applies pattern -Uses env vars (learned from Subtask 1) -``` - -**Benefit:** Each subtask benefits from previous subtask learnings - -### Trade-off vs Batched Learning - -**Per-subtask (/map-feature):** -- ✅ Immediate pattern application -- ✅ Error correction within workflow -- ❌ Higher token cost (N × Reflector) - -**Batched (/map-efficient):** -- ✅ Lower token cost (1 × Reflector) -- ⚠️ Patterns applied in next workflow -- ✅ Holistic insights (sees all subtasks together) - -**When per-subtask matters:** -- Complex multi-step features -- Interdependent subtasks -- Learning applies immediately - ---- - -## Example: Critical Authentication System - -**Task:** "Implement OAuth2 authentication" - -**Why /map-feature:** -- Security-critical (high risk) -- Complex (multiple components) -- First-time implementation - -**Execution:** - -``` -TaskDecomposer: -├─ ST-1: Setup OAuth2 provider config -├─ ST-2: Implement authorization code flow -├─ ST-3: Secure token storage -├─ ST-4: Add refresh token rotation -└─ ST-5: Implement logout - -ST-1: OAuth2 provider config -├─ Actor: Create config/oauth.ts -├─ Monitor: ✅ Valid -├─ Predictor: ✅ RAN (security-sensitive) -│ └─ Impact: Config must not be committed -├─ Evaluator: ✅ Approved (score: 9/10) -└─ Reflector: Pattern "Store OAuth secrets in env" - -ST-2: Authorization code flow -├─ Actor: Implement auth/oauth.ts -│ └─ Uses .env for secrets (learned from ST-1!) -├─ Monitor: ✅ Valid -├─ Predictor: ✅ RAN (affects auth flow) -│ └─ Impact: All protected routes need update -├─ Evaluator: ✅ Approved (score: 9/10) -└─ Reflector: Pattern "PKCE for public clients" - -[ST-3, ST-4, ST-5 continue with same pattern] -``` - -**Token usage:** ~18K tokens (full pipeline, 5 subtasks) - -**Quality achieved:** -- Zero security vulnerabilities -- All patterns documented -- Team learned OAuth2 best practices - ---- - -## Predictor: Always-On Analysis - -### What Predictor Catches - -**Breaking changes:** -- API signature modifications -- Database schema changes -- Configuration format updates - -**Dependencies:** -- Affected services -- Required migrations -- Client updates needed - -**Risks:** -- Backward compatibility issues -- Performance impacts -- Security implications - -### Example Output - -```json -{ - "affected_files": [ - {"path": "api/auth.ts", "impact": "high"}, - {"path": "database/users.sql", "impact": "medium"} - ], - "breaking_changes": [ - { - "type": "API", - "description": "User model no longer returns password field", - "mitigation": "Update all API clients to not expect password" - } - ], - "required_updates": [ - "Update client SDK to v2.0", - "Run migration: add_oauth_tokens_table" - ], - "risk_level": "high" -} -``` - ---- - -## When map-feature is Overkill - -**Don't use for:** -- Simple CRUD operations → Use /map-efficient -- Bug fixes → Use /map-debug -- Non-critical features → Use /map-efficient -- Code you understand well → Use /map-efficient - -**Cost vs benefit:** -- /map-feature: 100% token cost -- /map-efficient: 60-70% token cost -- **Savings: 30-40% by using /map-efficient** - -**Rule of thumb:** -- Critical/security = /map-feature -- Production/moderate = /map-efficient -- Everything else = /map-efficient - ---- - -## Quality Metrics - -### Success Indicators - -**All features implemented:** -- ✅ All acceptance criteria met -- ✅ All tests passing -- ✅ No security vulnerabilities - -**Knowledge captured:** -- ✅ Patterns extracted (N subtasks → N+ patterns) -- ✅ Team can apply patterns immediately - -**Impact understood:** -- ✅ All breaking changes documented -- ✅ Migration path clear -- ✅ Dependencies updated - ---- - -## Troubleshooting - -**Issue:** Workflow taking too long -**Cause:** Per-subtask learning overhead -**Solution:** Consider /map-efficient for next similar task - -**Issue:** Too many patterns created -**Cause:** Reflector suggesting redundant patterns -**Solution:** Review and deduplicate patterns more aggressively - -**Issue:** Predictor always says "high risk" -**Cause:** Overly conservative risk assessment -**Solution:** Tune Predictor thresholds in `.claude/agents/predictor.md` - ---- - -**See also:** -- [map-efficient-deep-dive.md](map-efficient-deep-dive.md) - Optimized alternative -- [agent-architecture.md](agent-architecture.md) - Understanding all agents diff --git a/.claude/skills/map-workflows-guide/resources/map-refactor-deep-dive.md b/.claude/skills/map-workflows-guide/resources/map-refactor-deep-dive.md deleted file mode 100644 index 8865244a..00000000 --- a/.claude/skills/map-workflows-guide/resources/map-refactor-deep-dive.md +++ /dev/null @@ -1,332 +0,0 @@ -# /map-refactor Deep Dive - -> **STATUS: PLANNED — NOT YET IMPLEMENTED.** -> This workflow is designed but not yet available as a command. -> Use `/map-efficient` for refactoring tasks. - -## When to Use - -**Code restructuring without behavior changes:** -- Improving code organization -- Renaming for clarity -- Extracting common logic -- Cleaning up technical debt -- Simplifying complex functions -- Reorganizing file structure - -**Why /map-refactor?** -- Focus on dependency analysis -- Breaking change detection -- Migration planning -- Preserving functionality - ---- - -## Refactoring Workflow - -### Key Principle: Behavior Preservation - -``` -Refactoring = Changing structure WITHOUT changing behavior -``` - -**Verification:** -- All existing tests must pass -- No new features added -- API contracts preserved (or versioned) - -### Standard Pipeline - -``` -1. TaskDecomposer: Break refactoring into safe steps - - Identify dependencies - - Plan incremental changes - - Define rollback points - -2. For each subtask: - - Actor refactors code - - Monitor validates (tests MUST pass) - - Predictor analyzes impact (CRITICAL) - - Evaluator checks completeness - -3. Reflector extracts: - - What patterns emerged? - - What dependencies were discovered? - - What risks were mitigated? - - Refactoring techniques - - Dependency patterns - - Migration strategies -``` - ---- - -## Dependency Impact Analysis - -### Predictor's Role in Refactoring - -**Always runs for refactoring** (high priority): - -**What Predictor tracks:** -1. **Direct dependencies:** - - Files that import refactored module - - Functions that call refactored functions - - Types that extend refactored types - -2. **Indirect dependencies:** - - Services that depend on direct dependencies - - Tests that rely on behavior - - Configuration that references paths - -3. **Breaking changes:** - - Renamed exports - - Changed function signatures - - Moved files - -**Example output:** -```json -{ - "affected_files": [ - {"path": "services/user.service.ts", "impact": "high", "reason": "imports renamed function"}, - {"path": "tests/user.test.ts", "impact": "medium", "reason": "tests old API"}, - {"path": "api/routes.ts", "impact": "low", "reason": "indirect dependency"} - ], - "breaking_changes": [ - { - "type": "rename", - "from": "getUserData", - "to": "fetchUserProfile", - "affected": 12 файлов - } - ], - "migration_steps": [ - "1. Update imports in user.service.ts", - "2. Update function calls (12 locations)", - "3. Update tests to use new name", - "4. Run full test suite" - ] -} -``` - ---- - -## Example: Extract Service Pattern - -**Task:** "Refactor auth logic into separate service" - -**Current state:** -```typescript -// controllers/auth.controller.ts (300 lines, mixed concerns) -class AuthController { - login(req, res) { - // JWT generation logic - // Database queries - // Response formatting - // All mixed together - } -} -``` - -**Goal:** -```typescript -// services/auth.service.ts (clean separation) -class AuthService { - generateToken(user) { ... } - validateCredentials(email, password) { ... } -} - -// controllers/auth.controller.ts (thin controller) -class AuthController { - constructor(private authService: AuthService) {} - - login(req, res) { - const user = await this.authService.validateCredentials(...); - const token = this.authService.generateToken(user); - res.json({ token }); - } -} -``` - -**Decomposition:** -``` -ST-1: Create AuthService class skeleton -ST-2: Extract token generation logic -ST-3: Extract credential validation logic -ST-4: Update AuthController to use AuthService -ST-5: Update dependency injection -ST-6: Update all tests -``` - -**Execution:** - -``` -ST-1: Create skeleton -├─ Actor: Create services/auth.service.ts -├─ Monitor: ✅ Valid (compiles, tests pass) -├─ Predictor: ⏭️ Low risk (new file, no impact) -└─ Apply - -ST-2: Extract token generation -├─ Actor: Move generateToken() to AuthService -├─ Monitor: ✅ Valid -├─ Predictor: ✅ RAN (affects auth flow) -│ └─ Impact: AuthController must be updated -└─ Migration: Update imports - -ST-3: Extract validation -├─ Actor: Move validateCredentials() to AuthService -├─ Monitor: ✅ Valid -├─ Predictor: ✅ RAN -│ └─ Impact: 3 files import this function -└─ Migration: Update all imports - -ST-4: Update AuthController -├─ Actor: Inject AuthService, call methods -├─ Monitor: ✅ Valid (all tests pass) -├─ Predictor: ✅ RAN -│ └─ Impact: DI container must provide AuthService -└─ Migration: Update DI config - -ST-5: Update DI -├─ Actor: Register AuthService in container -├─ Monitor: ✅ Valid -└─ Apply - -ST-6: Update tests -├─ Actor: Mock AuthService in controller tests -├─ Monitor: ✅ All tests pass -└─ Done - -Reflector: -├─ Pattern: "Separate business logic from controllers" -├─ Pattern: "Use dependency injection for services" -└─ Technique: "Incremental refactoring (6 safe steps)" - -``` - -**Token usage:** ~9K tokens (6 subtasks, Predictor always runs) -**Risk:** Low (tests pass at each step) -**Result:** Clean separation, no behavior changes - ---- - -## Breaking Change Detection - -### What Counts as Breaking - -**API changes:** -- Function renamed -- Parameters added/removed/reordered -- Return type changed - -**Module changes:** -- File moved -- Export renamed -- Public interface modified - -**Behavior changes:** -- Performance characteristics -- Error handling -- Side effects - -### Migration Planning - -**Predictor generates:** -1. **List of affected files** (with impact level) -2. **Migration checklist** (step-by-step) -3. **Rollback strategy** (if migration fails) -4. **Testing plan** (what to verify) - ---- - -## Refactoring Patterns - -### 1. Extract Function - -**Before:** -```typescript -function processOrder(order) { - // 50 lines of complex logic - const total = order.items.reduce((sum, item) => sum + item.price * item.qty, 0); - const tax = total * 0.08; - const shipping = total > 50 ? 0 : 5; - return total + tax + shipping; -} -``` - -**After:** -```typescript -function calculateTotal(items) { - return items.reduce((sum, item) => sum + item.price * item.qty, 0); -} - -function calculateTax(total) { - return total * 0.08; -} - -function calculateShipping(total) { - return total > 50 ? 0 : 5; -} - -function processOrder(order) { - const total = calculateTotal(order.items); - const tax = calculateTax(total); - const shipping = calculateShipping(total); - return total + tax + shipping; -} -``` - -**Predictor impact:** Low (internal refactoring, API unchanged) - -### 2. Rename for Clarity - -**Before:** -```typescript -function getData(id) { ... } // Vague -function updateInfo(data) { ... } // Unclear -``` - -**After:** -```typescript -function fetchUserProfile(userId) { ... } // Clear -function updateUserEmail(email) { ... } // Specific -``` - -**Predictor impact:** High (breaking change, all callers must update) - -### 3. Move to Shared Module - -**Before:** -``` -utils/helpers.ts (500 lines, mixed utilities) -``` - -**After:** -``` -utils/string-helpers.ts (string functions) -utils/date-helpers.ts (date functions) -utils/array-helpers.ts (array functions) -``` - -**Predictor impact:** Medium (import paths change, but behavior same) - ---- - -## Troubleshooting - -**Issue:** Tests fail after refactoring -**Cause:** Behavior inadvertently changed -**Solution:** Revert, refactor in smaller steps - -**Issue:** Too many breaking changes -**Cause:** Refactoring too aggressive -**Solution:** Use adapter pattern for backward compatibility - -**Issue:** Predictor didn't catch dependency -**Cause:** Indirect/runtime dependency -**Solution:** Improve static analysis, add integration tests - ---- - -**See also:** -- [agent-architecture.md](agent-architecture.md) - Predictor's dependency analysis -- [map-feature-deep-dive.md](map-feature-deep-dive.md) - When refactoring is risky diff --git a/.claude/skills/map-workflows-guide/scripts/validate-workflow-choice.py b/.claude/skills/map-workflows-guide/scripts/validate-workflow-choice.py deleted file mode 100755 index 6a576a06..00000000 --- a/.claude/skills/map-workflows-guide/scripts/validate-workflow-choice.py +++ /dev/null @@ -1,159 +0,0 @@ -#!/usr/bin/env python3 -"""Validate that a workflow choice matches task characteristics. - -Usage: - python validate-workflow-choice.py --workflow --risk --size --type - -Example: - python validate-workflow-choice.py --workflow map-efficient --risk medium --size medium --type feature - python validate-workflow-choice.py --workflow map-fast --risk high --size large --type security - -Exit codes: - 0 - Workflow choice is appropriate - 1 - Workflow choice is suboptimal (warning) - 2 - Workflow choice is inappropriate (error) -""" - -import argparse -import json -import sys - -# Workflow appropriateness rules -WORKFLOW_RULES = { - "map-fast": { - "allowed_risk": ["low"], - "allowed_size": ["small"], - "allowed_types": ["fix", "tweak", "maintenance", "docs"], - "forbidden_types": ["security", "auth", "payment", "database-schema"], - }, - "map-efficient": { - "allowed_risk": ["low", "medium", "high"], - "allowed_size": ["small", "medium", "large"], - "allowed_types": [ - "feature", - "enhancement", - "fix", - "tweak", - "maintenance", - "docs", - "security", - "auth", - "payment", - "database-schema", - "infrastructure", - "refactor", - "restructure", - "rename", - "extract", - "cleanup", - ], - "forbidden_types": [], - }, - "map-debug": { - "allowed_risk": ["low", "medium", "high"], - "allowed_size": ["small", "medium", "large"], - "allowed_types": ["bug", "fix", "test-failure", "error", "regression"], - "forbidden_types": ["feature", "refactor"], - }, -} - -# Recommendations for risky combinations -RISK_OVERRIDES = { - ("map-fast", "high"): "map-efficient", - ("map-fast", "medium"): "map-efficient", -} - - -def validate(workflow: str, risk: str, size: str, task_type: str) -> dict: - """Validate workflow choice against task characteristics. - - Returns dict with: - valid: bool - level: "ok" | "warning" | "error" - message: str - recommendation: str | None - """ - if workflow not in WORKFLOW_RULES: - return { - "valid": False, - "level": "error", - "message": f"Unknown workflow: {workflow}", - "recommendation": "map-efficient", - } - - rules = WORKFLOW_RULES[workflow] - issues = [] - - # Check risk level - if risk not in rules["allowed_risk"]: - issues.append(f"Risk level '{risk}' is too high for {workflow}") - - # Check size - if size not in rules["allowed_size"]: - issues.append(f"Size '{size}' is not suitable for {workflow}") - - # Check forbidden types - if task_type in rules["forbidden_types"]: - issues.append(f"Task type '{task_type}' is forbidden for {workflow}") - - # Check risk overrides - override_key = (workflow, risk) - recommendation = RISK_OVERRIDES.get(override_key) - - if issues: - level = "error" if any("forbidden" in i for i in issues) else "warning" - return { - "valid": False, - "level": level, - "message": "; ".join(issues), - "recommendation": recommendation or "map-efficient", - } - - return { - "valid": True, - "level": "ok", - "message": f"Workflow '{workflow}' is appropriate for {risk}-risk {size} {task_type} task", - "recommendation": None, - } - - -def main(): - parser = argparse.ArgumentParser(description="Validate MAP workflow choice") - parser.add_argument( - "--workflow", - required=True, - choices=list(WORKFLOW_RULES.keys()), - help="Chosen workflow", - ) - parser.add_argument( - "--risk", - required=True, - choices=["low", "medium", "high"], - help="Task risk level", - ) - parser.add_argument( - "--size", - required=True, - choices=["small", "medium", "large"], - help="Task size", - ) - parser.add_argument("--type", required=True, dest="task_type", help="Task type") - parser.add_argument("--json", action="store_true", help="Output as JSON") - - args = parser.parse_args() - result = validate(args.workflow, args.risk, args.size, args.task_type) - - if args.json: - print(json.dumps(result, indent=2)) - else: - status = {"ok": "OK", "warning": "WARNING", "error": "ERROR"}[result["level"]] - print(f"[{status}] {result['message']}") - if result["recommendation"]: - print(f" Recommendation: Use {result['recommendation']} instead") - - exit_codes = {"ok": 0, "warning": 1, "error": 2} - sys.exit(exit_codes[result["level"]]) - - -if __name__ == "__main__": - main() diff --git a/.claude/skills/skill-rules.json b/.claude/skills/skill-rules.json index eedaa7c8..c625c850 100644 --- a/.claude/skills/skill-rules.json +++ b/.claude/skills/skill-rules.json @@ -2,29 +2,6 @@ "version": "1.0", "description": "Skill activation triggers for MAP Framework", "skills": { - "map-workflows-guide": { - "type": "domain", - "enforcement": "suggest", - "priority": "high", - "description": "Guide for choosing the right MAP workflow", - "promptTriggers": { - "keywords": [ - "which workflow", - "map-fast or map-efficient", - "difference between workflows", - "when to use", - "workflow comparison", - "map workflow", - "choose workflow" - ], - "intentPatterns": [ - "(which|what).*?(workflow|mode).*?(use|choose)", - "(difference|compare).*?(map-fast|map-efficient|map-debug)", - "(when|how).*(choose|use|pick).*(workflow|map-\\w+)", - "explain.*?(workflow|map-fast|map-efficient)" - ] - } - }, "map-planning": { "type": "domain", "enforcement": "suggest", @@ -69,29 +46,6 @@ "map-learn" ] } - }, - "map-cli-reference": { - "type": "domain", - "enforcement": "suggest", - "priority": "high", - "description": "CLI error corrections", - "promptTriggers": { - "keywords": [ - "mapify command", - "mapify error", - "no such command", - "no such option", - "validate graph", - "mapify init", - "mapify check" - ], - "intentPatterns": [ - "mapify.*(error|command|help|usage)", - "(no such).*(command|option)", - "validate.*(graph|dependency)", - "(how to|how do).*mapify" - ] - } } } } diff --git a/.claude/workflow-rules.json b/.claude/workflow-rules.json index 01df164c..c99bed74 100644 --- a/.claude/workflow-rules.json +++ b/.claude/workflow-rules.json @@ -66,25 +66,6 @@ ] } }, - "map-debate": { - "priority": "medium", - "description": "Multi-variant synthesis with Opus arbiter for reasoning transparency", - "promptTriggers": { - "keywords": [ - "debate", - "compare approaches", - "trade-offs", - "reasoning", - "multiple variants", - "architectural decision" - ], - "intentPatterns": [ - "(compare|debate|weigh).*?(approaches|options|trade-offs)", - "(need|want).*?(reasoning|transparency|justification)", - "(architectural|design).*?(decision|choice)" - ] - } - }, "map-fast": { "priority": "low", "description": "Small, low-risk changes with reduced analysis (NO learning)", diff --git a/.gitignore b/.gitignore index 7e4a88ba..ca332cbd 100644 --- a/.gitignore +++ b/.gitignore @@ -47,6 +47,7 @@ coverage.json .map/* !.map/static-analysis/ !.map/scripts/ +.map/scripts/.map/ # Temporary verification files mapify_cli_verification_*.json diff --git a/.map/scripts/map_orchestrator.py b/.map/scripts/map_orchestrator.py index d03021ec..39960e50 100755 --- a/.map/scripts/map_orchestrator.py +++ b/.map/scripts/map_orchestrator.py @@ -274,6 +274,11 @@ class StepState: current_step_phase: str = "DECOMPOSE" completed_steps: list[str] = field(default_factory=list) pending_steps: list[str] = field(default_factory=lambda: STEP_ORDER.copy()) + # retry_count is for SERIAL mode only (single-subtask execution). + # subtask_retry_counts is for WAVE mode only (parallel wave execution). + # These counters are independent: advance_wave resets subtask_retry_counts + # but NOT retry_count, and get_next_step resets retry_count but NOT + # subtask_retry_counts. Never mix serial and wave retry tracking. retry_count: int = 0 max_retries: int = 5 plan_approved: bool = False @@ -969,6 +974,245 @@ def advance_wave(branch: str) -> dict: } +def _write_feedback_file( + branch: str, filename: str, header: str, feedback: str +) -> Optional[str]: + """Write monitor feedback to a file if feedback is non-empty. + + Returns the file path string, or None if nothing was written. + """ + if not feedback.strip(): + return None + fb_path = Path(f".map/{branch}/{filename}") + fb_path.parent.mkdir(parents=True, exist_ok=True) + fb_path.write_text(f"# {header}\n\n{feedback}\n", encoding="utf-8") + return str(fb_path) + + +def _check_retry_limit( + current_retries: int, max_retries: int, context: dict +) -> Optional[dict]: + """Return escalation dict if retry limit exceeded, else None. + + Shared by monitor_failed() and wave_monitor_failed() to avoid + duplicating the limit-check + escalation-dict construction. + + Args: + current_retries: Current retry count (already incremented). + max_retries: Maximum allowed retries. + context: Extra fields to include in the escalation dict + (e.g., subtask_id for wave mode). + + Returns: + Escalation dict with status="max_retries" if limit exceeded, + or None if still within limit. + """ + if current_retries > max_retries: + return { + "status": "max_retries", + "retry_count": current_retries, + "max_retries": max_retries, + **context, + } + return None + + +def monitor_failed(branch: str, feedback: str = "") -> dict: + """Handle Monitor valid=false: requeue ACTOR+MONITOR, increment retry_count. + + Precondition: current_step_phase must be MONITOR. Called by map-efficient.md + when Monitor returns valid=false. Switches phase back to ACTOR so + workflow-gate allows edits. Persists monitor feedback to a file that Actor + can read on next invocation. + + Args: + branch: Git branch name (sanitized) + feedback: Monitor's feedback_for_actor text (optional) + + Returns: + Dict with status (retrying|max_retries), retry_count, feedback_file + """ + state_file = Path(f".map/{branch}/step_state.json") + state = StepState.load(state_file) + + if state.current_step_phase != "MONITOR": + return { + "status": "error", + "message": ( + f"monitor_failed() called from phase '{state.current_step_phase}', " + "expected 'MONITOR'. Aborting to prevent state corruption." + ), + } + + state.retry_count += 1 + + escalation = _check_retry_limit( + state.retry_count, + state.max_retries, + { + "message": ( + f"Monitor retry limit reached ({state.max_retries} attempts). " + "Escalate to user." + ), + }, + ) + if escalation is not None: + state.save(state_file) + return escalation + + # Requeue only ACTOR (2.3) and MONITOR (2.4) on retry. + # TDD pre-steps (2.25/2.26) are NOT re-run — tests were already written + # and validated before the first Actor attempt. + state.pending_steps = ["2.3", "2.4"] + state.current_step_id = "2.3" + state.current_step_phase = "ACTOR" + + # Persist feedback so Actor can read it (numbered to preserve history) + feedback_file = _write_feedback_file( + branch, + f"monitor_feedback_retry{state.retry_count}.md", + f"Monitor Feedback (retry {state.retry_count})", + feedback, + ) + + state.save(state_file) + + return { + "status": "retrying", + "retry_count": state.retry_count, + "max_retries": state.max_retries, + "current_phase": "ACTOR", + "feedback_file": feedback_file, + "message": ( + f"Monitor failed. Retry {state.retry_count}/{state.max_retries}. " + f"Phase reset to ACTOR for subtask {state.current_subtask_id}." + ), + } + + +def wave_monitor_failed( + subtask_id: str, branch: str, feedback: str = "" +) -> dict: + """Handle Monitor valid=false for a subtask within a wave. + + Resets the subtask's phase back to ACTOR and increments its retry count. + + Args: + subtask_id: Subtask ID (e.g., "ST-002") + branch: Git branch name (sanitized) + feedback: Monitor's feedback_for_actor text (optional) + + Returns: + Dict with status, retry_count for the subtask + """ + state_file = Path(f".map/{branch}/step_state.json") + state = StepState.load(state_file) + + # Increment per-subtask retry count + current_retries = state.subtask_retry_counts.get(subtask_id, 0) + 1 + state.subtask_retry_counts[subtask_id] = current_retries + + escalation = _check_retry_limit( + current_retries, + state.max_retries, + { + "subtask_id": subtask_id, + "message": ( + f"Monitor retry limit reached for {subtask_id} " + f"({state.max_retries} attempts). Escalate to user." + ), + }, + ) + if escalation is not None: + state.save(state_file) + return escalation + + # Reset subtask phase back to ACTOR + state.subtask_phases[subtask_id] = "2.3" + + # Persist feedback (numbered to preserve history) + feedback_file = _write_feedback_file( + branch, + f"monitor_feedback_{subtask_id}_retry{current_retries}.md", + f"Monitor Feedback for {subtask_id} (retry {current_retries})", + feedback, + ) + + state.save(state_file) + + return { + "status": "retrying", + "subtask_id": subtask_id, + "retry_count": current_retries, + "max_retries": state.max_retries, + "current_phase": "ACTOR", + "feedback_file": feedback_file, + "message": ( + f"Monitor failed for {subtask_id}. " + f"Retry {current_retries}/{state.max_retries}. " + f"Phase reset to ACTOR." + ), + } + + +def reopen_for_fixes(branch: str, feedback: str = "") -> dict: + """Transition from COMPLETE back to ACTOR for post-review fixes. + + Called after /map-review finds issues in a completed workflow. + The workflow gate blocks edits during COMPLETE phase; this function + reopens the workflow so fixes can be applied. + + Args: + branch: Git branch name (sanitized) + feedback: Review feedback text describing what needs fixing + + Returns: + Dict with status and new phase info + """ + state_file = Path(f".map/{branch}/step_state.json") + if not state_file.exists(): + return { + "status": "error", + "message": "No step_state.json found. Nothing to reopen.", + } + + state = StepState.load(state_file) + + if state.current_step_phase != "COMPLETE": + return { + "status": "error", + "message": ( + f"Workflow is in phase '{state.current_step_phase}', not COMPLETE. " + "Use monitor_failed for non-COMPLETE retry." + ), + } + + # Reset to ACTOR+MONITOR cycle + state.current_step_id = "2.3" + state.current_step_phase = "ACTOR" + state.pending_steps = ["2.3", "2.4"] + state.retry_count = 0 + + feedback_file = _write_feedback_file( + branch, + "review_feedback.md", + "Review Feedback (post-COMPLETE reopen)", + feedback, + ) + + state.save(state_file) + + return { + "status": "reopened", + "current_phase": "ACTOR", + "feedback_file": feedback_file, + "message": ( + "Workflow reopened from COMPLETE to ACTOR. " + "Edit gate is now unlocked for review fixes." + ), + } + + SKIPPABLE_STEPS = {"2.2", "2.25", "2.26"} @@ -1142,9 +1386,11 @@ def resume_from_plan(branch: str) -> dict: subtask_index=0, subtask_sequence=subtask_ids, current_step_id=execution_start[0] if execution_start else "1.6", - current_step_phase=STEP_PHASES.get(execution_start[0], "INIT_STATE") - if execution_start - else "INIT_STATE", + current_step_phase=( + STEP_PHASES.get(execution_start[0], "INIT_STATE") + if execution_start + else "INIT_STATE" + ), completed_steps=skipped_phases, pending_steps=execution_start, plan_approved=True, @@ -1328,6 +1574,9 @@ def main(): "advance_wave", "resume_single_subtask", "get_plan_progress", + "monitor_failed", + "wave_monitor_failed", + "reopen_for_fixes", ], help="Command to execute", ) @@ -1344,6 +1593,10 @@ def main(): parser.add_argument( "--tdd", action="store_true", help="Enable TDD mode (for resume_single_subtask)" ) + parser.add_argument( + "--feedback", + help="Monitor feedback text (for monitor_failed / wave_monitor_failed)", + ) args = parser.parse_args() @@ -1487,6 +1740,29 @@ def main(): result = get_plan_progress(branch) print(json.dumps(result, indent=2)) + elif args.command == "monitor_failed": + feedback = args.feedback or "" + result = monitor_failed(branch, feedback) + print(json.dumps(result, indent=2)) + + elif args.command == "wave_monitor_failed": + if not args.task_or_step: + print( + json.dumps( + {"error": "subtask_id required. Usage: wave_monitor_failed ST-001 --feedback 'text'"} + ), + file=sys.stderr, + ) + sys.exit(1) + feedback = args.feedback or "" + result = wave_monitor_failed(args.task_or_step, branch, feedback) + print(json.dumps(result, indent=2)) + + elif args.command == "reopen_for_fixes": + feedback = args.feedback or "" + result = reopen_for_fixes(branch, feedback) + print(json.dumps(result, indent=2)) + except Exception as e: print(json.dumps({"error": str(e)}), file=sys.stderr) sys.exit(1) diff --git a/README.md b/README.md index 1eabc973..40e81b2a 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,6 @@ claude | `/map-efficient` | Production features, refactoring, complex tasks (recommended) | | `/map-debug` | Bug fixes and debugging | | `/map-fast` | Small, low-risk changes | -| `/map-debate` | Complex decisions with multi-variant synthesis | | `/map-review` | Pre-commit code review | | `/map-check` | Quality gates and verification | | `/map-plan` | Task decomposition without implementation | @@ -95,6 +94,10 @@ The orchestration lives in `.claude/commands/map-*.md` prompts created by `mapif | [Architecture](docs/ARCHITECTURE.md) | Agents, MCP integration, customization | | [Platform Spec](docs/MAP_PLATFORM_SPEC.md) | Platform refactor roadmap, codebase analysis | +## Case Study + +- [DevOpsConf 2026 case study](https://github.com/azalio/devopsconf-ai-develop) — real production case: SPEC -> PLAN -> TEST -> CODE -> REVIEW -> LEARN + ## Trouble? - **Command not found** → Run `mapify init` in your project first diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 421019b9..19360b38 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -74,15 +74,6 @@ MAP Framework implements cognitive architecture inspired by prefrontal cortex fu │ │ Includes both investigation AND implementation phases │ │ │ └──────────────────────────────────────────────────────────┘ │ │ │ -│ /map-debate (multi-variant with Opus arbiter): │ -│ ┌──────────────────────────────────────────────────────────┐ │ -│ │ TaskDecomposer → For each subtask: │ │ -│ │ 3×Actor (parallel: security/perf/simplicity) │ │ -│ │ → 3×Monitor (parallel validation) │ │ -│ │ → DebateArbiter (Opus) → Monitor → [Predictor if risky]│ │ -│ │ Uses Claude Opus for cross-evaluation and synthesis │ │ -│ └──────────────────────────────────────────────────────────┘ │ -│ │ │ /map-review (interactive 4-section): │ │ ┌──────────────────────────────────────────────────────────┐ │ │ │ git diff analysis │ │ @@ -410,7 +401,7 @@ print("Consider running /map-learn to save patterns") - Production code where token costs matter (RECOMMENDED) - Well-understood features (standard CRUD, APIs, UI) - Iterative development with frequent workflows -- Any task where /map-fast feels too risky but /map-debate too expensive +- Any task where /map-fast feels too risky #### 2. `/map-fast` - Minimal Pipeline (3 Agents) ⚠️ @@ -440,52 +431,6 @@ print("Consider running /map-learn to save patterns") - Broad refactors or multi-module changes - High uncertainty requirements -#### 3. `/map-debate` - Debate-Based Multi-Variant (5-7 Agents) - -**Agent Sequence:** TaskDecomposer → [conditional ResearchAgent] → (3×Actor parallel → 3×Monitor parallel → DebateArbiter (Opus) → Monitor → [Predictor if risky]) per subtask - -**Multi-Variant Architecture:** - -1. **Parallel Actor Variants** (3 simultaneous implementations) - - Variant 1: Security-focused approach - - Variant 2: Performance-focused approach - - Variant 3: Simplicity-focused approach - - Each variant gets `approach_focus` parameter - - All variants solve same subtask with different optimization priorities - -2. **Parallel Monitor Validation** (3 validations) - - Each Actor variant validated independently - - Failures fed back to respective Actor for iteration - - Continue until all 3 variants pass validation - -3. **Debate-Arbiter Cross-Evaluation + Synthesis** (Opus model) - - Receives all 3 validated variants AND their Monitor outputs - - Cross-evaluates trade-offs with explicit reasoning matrix - - **Synthesizes unified solution directly** (no separate Synthesizer agent) - - Uses Claude Opus 4.5 for high-quality analysis - - Outputs: comparison_matrix, decision_rationales, synthesis_reasoning, synthesized code - -4. **Final Validation** - - Final Monitor validates the synthesized code - - Conditional Predictor for medium/high risk subtasks - - Max 2 DebateArbiter retries if Monitor fails - -**Token Usage:** 80-100% of baseline -**Learning:** Optional via `/map-learn` (same as other workflows) -**Quality Gates:** All agents (maximum variant exploration) - -**Key Features:** -- **Opus-powered arbiter**: Higher reasoning quality for complex trade-off analysis -- **Explicit decision tracking**: Each variant documents decisions made -- **Multi-perspective synthesis**: Best-of-all-worlds solution -- **Parallel execution**: 3 Actor/Monitor pairs run simultaneously - -**Use for:** -- Architecture decisions with significant trade-offs -- Complex features where optimal approach is unclear -- Security-critical code requiring multiple review perspectives -- Performance-sensitive implementations -- Situations where you want to explore solution space thoroughly **Technical Details:** @@ -639,8 +584,7 @@ Typical token consumption per subtask (estimated): | Predictor | 1.5K | 1K | 2.5K | Conditional in /map-efficient, always in /map-debug | | Evaluator | 2K | 1K | 3K | Only in /map-debug, /map-review | | Reflector | 2K | 1K | 3K | Only via /map-learn | -| DebateArbiter | 3K | 2K | 5K | Opus model, /map-debate only (includes synthesis) | -| Synthesizer | 2K | 3K | 5K | /map-efficient Self-MoA only (DebateArbiter handles this in /map-debate) | +| Synthesizer | 2K | 3K | 5K | /map-efficient Self-MoA only | | ResearchAgent | 2K | 4K | 6K | Heavy codebase reading, on-demand in any workflow | **Per-subtask totals:** @@ -649,12 +593,10 @@ Typical token consumption per subtask (estimated): - /map-fast: ~8-10K tokens (minimal, no learning) - /map-debug: ~15-20K tokens (full pipeline with Evaluator) - /map-review: ~15-25K tokens (parallel agents + interactive 4-section presentation; --ci mode ~12-15K) -- /map-debate: ~30-40K tokens (3× Actor + Opus DebateArbiter) **For 5-subtask workflow:** - /map-efficient: ~45-60K tokens (learning optional via /map-learn: +5-8K) - /map-fast: ~40-50K tokens (no learning support) -- /map-debate: ~150-200K tokens (3× variants + Opus analysis) #### Workflow Variant Selection @@ -693,7 +635,9 @@ See [USAGE.md - Workflow Variants](./USAGE.md#workflow-variants) for detailed de │ • 8 step phases (DECOMPOSE → SUBTASK_APPROVAL + 2 TDD) │ │ • State file: .map//step_state.json │ │ • Enforces: Sequential execution, no step skipping │ -│ • CLI: get_next_step, validate_step, initialize │ +│ • CLI: get_next_step, validate_step, initialize, │ +│ monitor_failed, wave_monitor_failed, skip_step, │ +│ set_waves, get_wave_step, advance_wave, + more │ └─────────────────────────────────────────────────────────────┘ ``` @@ -1039,72 +983,9 @@ If you modified `.claude/commands/map-efficient.md`, you must manually integrate **Model Used:** Sonnet (requires strong reasoning for synthesis) -**Usage Context:** Only invoked in `/map-debate` workflow after DebateArbiter completes cross-evaluation - -### 9. DebateArbiter - -**Responsibility:** Cross-evaluate multiple Actor variants with explicit reasoning, identify best approaches for each decision point. - -**Input:** 3 Actor variants (security/performance/simplicity-focused) + Monitor validations - -**Output:** -```json -{ - "cross_evaluation": { - "decision_points": [ - { - "category": "algorithm", - "description": "Data structure choice for caching", - "variants_analysis": { - "v1_security": { - "approach": "HashMap with TTL tracking", - "pros": ["O(1) lookup", "Automatic expiration"], - "cons": ["Memory overhead for TTL metadata"], - "security_score": 9, - "performance_score": 7 - }, - "v2_performance": { - "approach": "LRU cache with size limit", - "pros": ["Bounded memory", "Fast eviction"], - "cons": ["No time-based expiration"], - "security_score": 6, - "performance_score": 10 - }, - "v3_simplicity": { - "approach": "Simple dictionary", - "pros": ["Minimal code", "Easy to understand"], - "cons": ["No eviction", "Unbounded growth"], - "security_score": 4, - "performance_score": 5 - } - }, - "recommendation": { - "best_variant": "v2_performance", - "reasoning": "LRU cache provides bounded memory (critical for production) with excellent performance. Add time-based expiration as enhancement.", - "synthesis_guidance": "Use v2's LRU implementation, add v1's TTL concept as optional feature" - } - } - ], - "synthesis_strategy": "Performance foundation with security enhancements" - } -} -``` - -**Key Behaviors:** -- Extracts decision points from variant outputs -- Compares approaches across multiple dimensions -- Uses Opus model for high-quality reasoning -- Provides explicit synthesis guidance -- Documents trade-off analysis for knowledge base - -**Model Used:** Opus 4.5 (highest reasoning quality for complex analysis) - -**Usage Context:** Only invoked in `/map-debate` workflow after all variants validated - -**MCP Tool Usage:** -- `mcp__sequential-thinking__sequentialthinking`: Multi-step reasoning for complex trade-off analysis +**Usage Context:** Invoked in `/map-efficient --self-moa` workflow for multi-variant synthesis -### 10. ResearchAgent +### 9. ResearchAgent **Responsibility:** Heavy codebase reading with context isolation and compressed output for Actor/Monitor consumption. diff --git a/docs/INSTALL.md b/docs/INSTALL.md index 970c17e6..85da2be0 100644 --- a/docs/INSTALL.md +++ b/docs/INSTALL.md @@ -148,7 +148,7 @@ This will: - ✅ Create project directory - ✅ Install MAP agents (including Synthesizer, DebateArbiter, ResearchAgent, FinalVerifier) -- ✅ Add 12 slash commands (/map-efficient, /map-debug, /map-fast, /map-debate, /map-learn, /map-review, /map-check, /map-plan, /map-task, /map-tdd, /map-release, /map-resume) +- ✅ Add 11 slash commands (/map-efficient, /map-debug, /map-fast, /map-learn, /map-review, /map-check, /map-plan, /map-task, /map-tdd, /map-release, /map-resume) - ✅ Configure essential MCP servers - ✅ Initialize git repository - ✅ Install branch-scoped `.map//` workflow runtime used by `/map-plan` and `/map-efficient` @@ -227,9 +227,8 @@ If you prefer manual setup: │ │ ├── research-agent.md # Isolated codebase research │ │ ├── final-verifier.md # Adversarial verification (Ralph Loop) │ │ └── documentation-reviewer.md # Reviews technical docs - │ ├── commands/ # 12 slash commands + │ ├── commands/ # 11 slash commands │ │ ├── map-efficient.md # Optimized workflow (recommended) - │ │ ├── map-debate.md # Multi-variant with Opus reasoning │ │ ├── map-debug.md # Debug workflow │ │ ├── map-fast.md # Minimal workflow (low-risk only) │ │ ├── map-learn.md # Extract and save lessons @@ -269,9 +268,6 @@ After installation, you can use MAP commands in Claude Code: # Standard production workflow (RECOMMENDED) /map-efficient Add user authentication with JWT tokens -# Multi-variant with explicit reasoning (complex decisions) -/map-debate Design caching strategy for user sessions - # Debug an issue /map-debug Fix API timeout on large file uploads @@ -290,7 +286,6 @@ After installation, you can use MAP commands in Claude Code: MAP Framework uses **slash commands** as entry points that coordinate specialized agents in the main Claude Code context: - **`/map-efficient`** ⭐ - Optimized workflow (5-6 agents): task-decomposer → actor → monitor → predictor (conditional) -- **`/map-debate`** - Multi-variant with Opus arbiter (7 agents): 3 Actor variants → debate-arbiter synthesis - **`/map-debug`** - Diagnostic and fix workflows with agent coordination - **`/map-fast`** - Minimal workflow (3 agents) — small, low-risk changes (reduced analysis) - **`/map-review`** - Comprehensive review with Monitor, Predictor, and Evaluator agents diff --git a/docs/MAP_PLATFORM_SPEC.md b/docs/MAP_PLATFORM_SPEC.md index b59710ea..49241f76 100644 --- a/docs/MAP_PLATFORM_SPEC.md +++ b/docs/MAP_PLATFORM_SPEC.md @@ -392,7 +392,7 @@ language: ru # optional: agent response language Add built-in profiles to reduce default complexity: - `core`: `/map-plan`, `/map-efficient`, `/map-check`, `/map-review` -- `full`: all advanced workflows (`map-fast`, `map-debug`, `map-tdd`, `map-debate`, `map-release`, `map-learn`) +- `full`: all advanced workflows (`map-fast`, `map-debug`, `map-tdd`, `map-release`, `map-learn`) - `custom`: explicit workflow selection Profiles should affect both generated delivery files and runtime defaults. @@ -416,7 +416,7 @@ workflows: - map-efficient - map-check - map-tdd # user opted into TDD - # map-debug, map-debate etc. — omitted, not installed + # map-debug etc. — omitted, not installed ``` This reduces generated file count and cognitive load for users who don't need every workflow. diff --git a/docs/USAGE.md b/docs/USAGE.md index b98a6073..3bc61bfa 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -843,18 +843,18 @@ MAP Framework offers three primary implementation workflows with different trade ### Comparison Table -| Feature | /map-efficient ⭐ | /map-debate | /map-fast ⚠️ | -|---------|-------------------|-------------|--------------| -| **Agents Used** | 3-4 (task-decomposer, actor, monitor, final-verifier)) | 7 (multi-variant) | 3 (minimal) | -| **Token Cost** | **Baseline** | 3x (Opus model) | 40-50% less | -| **Learning** | Via `/map-learn` | Via `/map-learn` | ❌ None | -| **Quality Gates** | Essential agents + Final-Verifier | Opus arbiter | Basic only | -| **Impact Analysis** | ✅ Conditional (Predictor) | ✅ Conditional | ❌ Never | -| **Multi-Variant** | ⚠️ Conditional (Self-MoA) | ✅ **Always 3 variants** | ❌ Never | -| **Synthesis Model** | Synthesizer (sonnet) | **debate-arbiter (opus)** | N/A | -| **Knowledge Updates** | Via `/map-learn` | Via `/map-learn` | ❌ None | -| **Best For** | **Most tasks** | **Reasoning transparency** | Throwaway only | -| **Production Ready** | ✅ Yes | ✅ Yes (expensive) | ❌ NO | +| Feature | /map-efficient ⭐ | /map-fast ⚠️ | +|---------|-------------------|--------------| +| **Agents Used** | 3-4 (task-decomposer, actor, monitor, final-verifier)) | 3 (minimal) | +| **Token Cost** | **Baseline** | 40-50% less | +| **Learning** | Via `/map-learn` | ❌ None | +| **Quality Gates** | Essential agents + Final-Verifier | Basic only | +| **Impact Analysis** | ✅ Conditional (Predictor) | ❌ Never | +| **Multi-Variant** | ⚠️ Conditional (Self-MoA) | ❌ Never | +| **Synthesis Model** | Synthesizer (sonnet) | N/A | +| **Knowledge Updates** | Via `/map-learn` | ❌ None | +| **Best For** | **Most tasks** | Throwaway only | +| **Production Ready** | ✅ Yes | ❌ NO | ### Decision Guide: Which Workflow Should I Use? @@ -907,60 +907,6 @@ MAP Framework offers three primary implementation workflows with different trade /map-efficient --self-moa build real-time chat system with WebSocket support ``` -#### Use `/map-debate` (Multi-Variant with Reasoning) - -**When:** -- 🧠 Decisions require explicit trade-off analysis -- 🧠 You need to understand WHY a solution was chosen -- 🧠 Stakeholders need documented reasoning for code review -- 🧠 Complex architectural decisions with multiple valid approaches -- 🧠 High-value features where reasoning transparency justifies cost - -**What makes it different:** -- **ALWAYS generates 3 variants** (security/performance/simplicity focus) -- **Uses Opus model** for debate-arbiter (deeper reasoning than Sonnet) -- **Outputs explicit trade-offs** — what you gain AND what you lose -- **Produces comparison matrix** — scores each variant on 4 dimensions -- **Reasoning trace** — 8-step visible thinking process - -**Key outputs:** -- `comparison_matrix` — variant × dimension scores (1-10) -- `decision_rationales` — for each decision: alternatives, winner, trade-off accepted -- `synthesis_reasoning` — step-by-step explanation of synthesis - -**Cost consideration:** -- ~3-5x more expensive than `/map-efficient` -- Uses Opus model (higher reasoning capability, higher cost) -- Worth it when reasoning transparency is critical - -**Example use cases:** -```bash -# Architectural decision with stakeholder review -/map-debate implement caching strategy for user sessions - -# Complex algorithm with multiple valid approaches -/map-debate design rate limiting system for API endpoints - -# Decision requiring documented justification -/map-debate implement authentication - JWT vs sessions vs OAuth -``` - -**Output example (decision_rationale):** -```json -{ - "decision_id": "dec-v1-001", - "decision_statement": "Use Result type for explicit error handling", - "alternatives_evaluated": [ - {"source_variant": "v2", "statement": "Raise exceptions", "why_rejected": "Less explicit"}, - {"source_variant": "v3", "statement": "Return tuple", "why_rejected": "Less type-safe"} - ], - "selection_reasoning": "Result type provides explicit error handling that caller cannot ignore...", - "tradeoff_accepted": "Increased code verbosity" -} -``` - ---- - #### Use `/map-fast` (Minimal) ⚠️ **ONLY when:** @@ -1051,33 +997,29 @@ DECOMPOSE → TEST_WRITER (tests from spec) → TEST_FAIL_GATE (verify Red) → **Small Task (1-2 subtasks):** - `/map-efficient`: ~12-20K tokens (baseline) - `/map-efficient --self-moa`: ~25-35K tokens (3 variants) -- `/map-debate`: ~40-60K tokens (Opus arbiter) - `/map-fast`: ~8-12K tokens (minimal) **Medium Task (3-5 subtasks):** - `/map-efficient`: ~45-60K tokens (baseline) - `/map-efficient --self-moa`: ~100-130K tokens (3 variants) -- `/map-debate`: ~150-200K tokens (Opus arbiter) - `/map-fast`: ~25-35K tokens (minimal) **Large Task (6-8 subtasks):** - `/map-efficient`: ~90-120K tokens (baseline) - `/map-efficient --self-moa`: ~200-260K tokens (3 variants) -- `/map-debate`: ~300-400K tokens (Opus arbiter) - `/map-fast`: ~50-70K tokens (minimal) -**Cost at $3/M input, $15/M output (Claude Sonnet) + Opus for debate:** +**Cost at $3/M input, $15/M output (Claude Sonnet):** -| Task Size | /map-efficient | /map-debate | /map-fast | -|-----------|----------------|-------------|-----------| -| Small | $0.18-0.30 | $0.60-0.90 | $0.12-0.18 | -| Medium | $0.68-0.90 | $2.25-3.00 | $0.38-0.53 | -| Large | $1.35-1.80 | $4.50-6.00 | $0.75-1.05 | +| Task Size | /map-efficient | /map-fast | +|-----------|----------------|-----------| +| Small | $0.18-0.30 | $0.12-0.18 | +| Medium | $0.68-0.90 | $0.38-0.53 | +| Large | $1.35-1.80 | $0.75-1.05 | **For teams running 10 workflows/day with /map-efficient:** - Daily cost: ~$13.50 - /map-fast would save ~40% but loses learning -- /map-debate costs ~3x more but provides reasoning transparency ### How /map-efficient Works @@ -1119,10 +1061,6 @@ START: I need to implement a feature | └─ YES → /map-efficient (maximum QA) | └─ NO → Continue | - ├─ Do stakeholders need documented reasoning for decisions? - | └─ YES → /map-debate (explicit trade-offs, Opus reasoning) - | └─ NO → Continue - | ├─ Do I care about token costs? | └─ NO → /map-efficient (best quality) | └─ YES → /map-efficient ⭐ (RECOMMENDED) @@ -1293,9 +1231,6 @@ Agents automatically use their configured model when invoked via slash commands: # Standard workflow - conditional predictor, optional learning via /map-learn /map-efficient implement authentication # Recommended for most tasks -# Multi-variant with explicit reasoning -/map-debate design caching strategy # Complex decisions - # Fast workflow - minimal agents, no learning /map-fast Update error message wording ``` @@ -1308,7 +1243,6 @@ Agents automatically use their configured model when invoked via slash commands: |----------|----------------|-------|---------|-----------|-------------|-------------| | `/map-efficient` | sonnet | sonnet (4x) | sonnet (4x) | sonnet (0-2x) | skip | ~$0.22 | | `/map-efficient --self-moa` | sonnet | sonnet (12x) | sonnet (12x) | sonnet (0-2x) | sonnet (4x) | ~$0.45 | -| `/map-debate` | sonnet | sonnet (12x) | sonnet (12x) | sonnet (0-2x) | opus (4x) | ~$0.75 | | `/map-fast` | sonnet | sonnet (4x) | sonnet (4x) | skip | skip | ~$0.12 | *Approximate costs based on typical token usage. Learning via `/map-learn` adds ~$0.05-0.10. @@ -1316,7 +1250,6 @@ Agents automatically use their configured model when invoked via slash commands: **Key differences:** - `/map-efficient`: Standard workflow, conditional Self-MoA - `/map-efficient --self-moa`: Forces 3-variant generation + synthesis -- `/map-debate`: 3 variants + Opus arbiter with explicit reasoning - `/map-fast`: Minimal, NO learning support --- @@ -1448,7 +1381,6 @@ Skills follow the 500-line rule: **Workflow deep-dives:** - `map-fast-deep-dive.md` - Skip conditions, when to avoid - `map-efficient-deep-dive.md` - Optimization strategy, recommended default -- `map-debate-deep-dive.md` - Multi-variant synthesis, Opus reasoning - `map-debug-deep-dive.md` - Debugging strategies, error analysis - `map-learn-deep-dive.md` - Lesson extraction, knowledge base updates - `map-release-deep-dive.md` - Release workflow, validation gates diff --git a/src/mapify_cli/delivery/file_copier.py b/src/mapify_cli/delivery/file_copier.py index 6cb42bb8..8eff2bc7 100644 --- a/src/mapify_cli/delivery/file_copier.py +++ b/src/mapify_cli/delivery/file_copier.py @@ -312,7 +312,6 @@ def create_commands_dir(project_path: Path) -> None: - `/map-task` - Execute a single subtask from an existing plan - `/map-tdd` - Run a test-first workflow for one task or plan - `/map-debug` - Debug issues using MAP analysis -- `/map-debate` - Generate variants and synthesize the best result - `/map-review` - Run a structured review workflow - `/map-check` - Run workflow quality gates and verification - `/map-fast` - Quick implementation with minimal validation diff --git a/src/mapify_cli/templates/agents/actor.md b/src/mapify_cli/templates/agents/actor.md index d0070880..1c5a62ae 100644 --- a/src/mapify_cli/templates/agents/actor.md +++ b/src/mapify_cli/templates/agents/actor.md @@ -276,6 +276,9 @@ Rules: 5. Include edge cases from the spec's `## Edge Cases` section if available in the packet. 6. Use standard test patterns for the project's language and framework. 7. Tests SHOULD fail when run (implementation doesn't exist yet). This is expected. +8. Do NOT add temporal comments about test failure status (e.g., "currently FAILS", + "expected to FAIL", "will PASS once fix is applied"). Write tests as permanent, + clean code — the Red/Green state is transient and must not leak into comments. Output: - Test files created via Write tool diff --git a/src/mapify_cli/templates/commands/map-debate.md b/src/mapify_cli/templates/commands/map-debate.md deleted file mode 100644 index ae420be1..00000000 --- a/src/mapify_cli/templates/commands/map-debate.md +++ /dev/null @@ -1,408 +0,0 @@ ---- -description: Debate-based MAP workflow with Opus arbiter for multi-variant synthesis ---- - -# MAP Debate Workflow - -## Execution Rules - -1. Execute steps in order without pausing; only ask user if (a) `task-decomposer` returns blocking `analysis.open_questions` with no subtasks OR (b) Monitor sets `escalation_required === true` -2. Use exact `subagent_type` specified — never substitute `general-purpose` -3. Call each agent individually — no combining or skipping steps -4. Max 5 Actor→Monitor retry iterations per subtask (separate from debate-arbiter retries in 2.7 Retry Loop) -5. **ALWAYS generate 3 variants** — no conditional check (unlike map-efficient Self-MoA) -6. Use **debate-arbiter with model=opus** for synthesis - -**Task:** $ARGUMENTS - -## Workflow Overview - -``` -1. DECOMPOSE → task-decomposer -2. FOR each subtask: - a. RESEARCH → if existing code understanding needed - b. 3 Actors (parallel) → security/performance/simplicity focuses - c. 3 Monitors (parallel) → validate + extract decisions - d. debate-arbiter (opus) → cross-evaluate + synthesize - e. Final Monitor → validate synthesis - f. If invalid: retry with feedback (max 5) - g. If risk_level ∈ {high, medium}: → Predictor - h. Apply changes -3. SUMMARY → optionally suggest /map-learn -``` - -## Step 1: Task Decomposition - -``` -Task( - subagent_type="task-decomposer", - description="Decompose task into subtasks", - prompt="Break down into ≤8 atomic subtasks and RETURN ONLY JSON matching task-decomposer schema v2.0 (schema_version, analysis, blueprint{subtasks[]}). - -Task: $ARGUMENTS - -Hard requirements: -- Use `blueprint.subtasks[].validation_criteria` (2-4 testable, verifiable outcomes) - - Prefix each criterion with `VC1:`, `VC2:`, ... (stable references for Actor/Monitor) - - Include a concrete anchor per VC (endpoint/function + file path) -- Use `blueprint.subtasks[].dependencies` (array of subtask IDs) and order subtasks by dependency -- Include `blueprint.subtasks[].complexity_score` (1-10) and `risk_level` (low|medium|high) -- Include `blueprint.subtasks[].security_critical` (true for auth/crypto/validation/data access) -- Include `blueprint.subtasks[].test_strategy` with unit/integration/e2e keys - - Map every `VCn:` to ≥1 planned test case (prefer test name contains `vc`) - - Recommended format: `path/to/test_file.ext::test_name_or_symbol`" -) -``` - -## Step 2: Subtask Loop - -### 2.0 Build AI-Friendly Subtask Packet (XML Anchors) - -Before calling any agents for the subtask, build a single **AI Packet** with unique XML-like tags (NO attributes). - -**Rule:** Use the subtask ID as the anchor name. Convert `-` to `_` for XML tag safety: -- `ST-001` → `ST_001` - -**AI Packet template:** - -```xml - - ST-001 - ... - ... - low|medium|high - true|false - 1-10 - - path1;path2;... - ... - ... - ... - - ... - ... - -``` - -Pass this packet verbatim to Actor/Monitor/debate-arbiter/Predictor. Do NOT rename tags mid-flow. - -### 2.1 Research (Conditional) - -**Call if:** refactoring, bug fixes, extending existing code, touching 3+ files -**Skip for:** new standalone features, docs, config - -``` -Task( - subagent_type="research-agent", - description="Research for subtask [ID]", - prompt="Query: [subtask description] -File patterns: [relevant globs] -Symbols: [optional keywords] -Intent: locate -Max tokens: 1500" -) -``` - -Pass `executive_summary` to Actor if `confidence >= 0.7`. - -### 2.2 Quality-Stakes Assessment - -**Purpose:** Determine deployment context and set minimum quality thresholds before launching Actor variants. - -**Assessment Logic:** -``` -# Determine deployment risk level based on goal content -deployment_risk_level = assess_deployment_context(goal): - IF goal contains "hospital" OR "healthcare" OR "patient" OR "medical": - → risk_level = "critical", min_security = 8, min_functionality = 8 - ELIF goal contains "government" OR "financial" OR "banking" OR "critical infrastructure": - → risk_level = "high", min_security = 8, min_functionality = 7 - ELIF goal contains "production" OR "enterprise" OR "customer-facing": - → risk_level = "medium", min_security = 7, min_functionality = 7 - ELSE: - → risk_level = "medium", min_security = 7, min_functionality = 7 # safe default - -# Build quality context for Actor variants -quality_context = { - "deployment_risk_level": risk_level, - "min_security_score": min_security, - "min_functionality_score": min_functionality, - "quality_enforcement": "All Actor variants MUST meet minimum thresholds regardless of focus area" -} -``` - -**Pass to Actors:** Include `quality_context` in each Actor variant prompt. - -**Rationale:** Prevents quality erosion in debate by establishing non-negotiable baselines before variants propose solutions. - -### 2.3 Parallel Actors (3 Variants) - -**ALWAYS call 3 Actors in parallel with different focuses:** - -``` -# Variant 1: Security Focus -Task( - subagent_type="actor", - description="Implement subtask [ID] - Security (v1)", - prompt="Implement with SECURITY focus: -**AI Packet (XML):** [paste ...] -**Quality Context:** deployment_risk_level={risk_level}, min_security={min_security}, min_functionality={min_functionality} -⚠️ Your variant MUST meet minimum quality thresholds. Quality is non-negotiable regardless of security focus. -approach_focus: security, variant_id: v1, self_moa_mode: true -Follow the Actor agent protocol output format. Ensure `decisions_made` is included for debate-arbiter." -) - -# Variant 2: Performance Focus -Task( - subagent_type="actor", - description="Implement subtask [ID] - Performance (v2)", - prompt="Implement with PERFORMANCE focus: -**AI Packet (XML):** [paste ...] -**Quality Context:** deployment_risk_level={risk_level}, min_security={min_security}, min_functionality={min_functionality} -⚠️ Your variant MUST meet minimum quality thresholds. Quality is non-negotiable regardless of performance focus. -approach_focus: performance, variant_id: v2, self_moa_mode: true -Follow the Actor agent protocol output format. Ensure `decisions_made` is included for debate-arbiter." -) - -# Variant 3: Simplicity Focus -Task( - subagent_type="actor", - description="Implement subtask [ID] - Simplicity (v3)", - prompt="Implement with SIMPLICITY focus: -**AI Packet (XML):** [paste ...] -**Quality Context:** deployment_risk_level={risk_level}, min_security={min_security}, min_functionality={min_functionality} -⚠️ Your variant MUST meet minimum quality thresholds. Quality is non-negotiable regardless of simplicity focus. -approach_focus: simplicity, variant_id: v3, self_moa_mode: true -Follow the Actor agent protocol output format. Ensure `decisions_made` is included for debate-arbiter." -) -``` - -### 2.4 Parallel Monitors (3 Validations) - -Validate each variant in parallel: - -``` -Task( - subagent_type="monitor", - description="Validate v1", - prompt="Review variant v1 against requirements: -**AI Packet (XML):** [paste ...] -**Proposed Solution:** [paste v1 Actor output] -**Specification Contract (optional):** [SpecificationContract JSON or null] -variant_id: v1, self_moa_mode: true - -Return ONLY valid JSON following MonitorReviewOutput schema. -When in Self-MoA mode, include extension fields: variant_id, self_moa_mode, decisions_identified, compatibility_features, strengths, weaknesses, recommended_as_base. -If `validation_criteria` present: include `contract_compliance` + `contract_compliant`. -If a SpecificationContract is provided: include `spec_contract_compliant` + `spec_contract_violations`." -) -``` - -Repeat for v2 and v3 in parallel. - -### 2.5 debate-arbiter (Opus) - -``` -Task( - subagent_type="debate-arbiter", - model="opus", - description="Cross-evaluate and synthesize best implementation", - prompt="Cross-evaluate 3 variants and synthesize optimal solution: - -**AI Packet (XML):** [paste ...] -**Variants (raw Actor outputs):** - -[paste v1 Actor output] - - -[paste v2 Actor output] - - -[paste v3 Actor output] - -**Monitor Results (MonitorReviewOutput JSON):** - -[paste v1 Monitor output JSON] - - -[paste v2 Monitor output JSON] - - -[paste v3 Monitor output JSON] - -**Specification Contract (optional):** [SpecificationContract JSON or null] -**Priority Policy:** [\"correctness\", \"security\", \"maintainability\", \"performance\"] -**Evaluation Dimensions:** [\"security\", \"performance\", \"readability\", \"maintainability\"] - -Return ONLY valid JSON following ArbiterOutput schema. -Include: comparison_matrix, decision_rationales, synthesis_reasoning (8 steps)." -) -``` - -### 2.6 Final Monitor - -Validate synthesized code: - -``` -Task( - subagent_type="monitor", - description="Validate synthesized implementation", - prompt="Review synthesized code from debate-arbiter: -**AI Packet (XML):** [paste ...] -**Proposed Solution:** [paste debate-arbiter code output] -**Arbiter Confidence:** [confidence from debate-arbiter] - -Check: correctness, security, standards, decision implementation. -Return ONLY valid JSON following MonitorReviewOutput schema." -) -``` - -### 2.7 Retry Loop - -If Final Monitor returns `valid === false`: -1. Provide feedback including arbiter's synthesis_reasoning -2. Retry debate-arbiter with retry_context -3. Max 2 debate-arbiter retries per subtask - -```python -retry_context = { - "attempt": retry_count + 1, - "previous_errors": monitor_issues, - "failed_decisions": [decisions_causing_issues], - "strategy_adjustments": ["avoid decision X", "prefer fresh_generation"] -} -``` - -### 2.8 Escalation Gate (AskUserQuestion) - -If Monitor returns `escalation_required === true`, ask user: - -``` -AskUserQuestion(questions=[ - { - "header": "Escalation", - "question": "Human review requested by Monitor.\n\nSubtask: [ST-XXX]\nReason: [escalation_reason]\nArbiter Confidence: [confidence]\n\nProceed anyway?", - "multiSelect": false, - "options": [ - {"label": "YES - Proceed", "description": "Continue (run Predictor if required, then apply changes)."}, - {"label": "REVIEW - Details", "description": "Show synthesis_reasoning + comparison_matrix, then ask again."}, - {"label": "NO - Abort", "description": "Do not apply changes; wait for human review."} - ] - } -]) -``` - -### 2.9 Conditional Predictor - -```python -# Enhanced predictor decision: -# 1. ALWAYS call for: high risk, security_critical, or escalation_required -# 2. SKIP if: risk_level == "low" -# 3. SKIP if: risk_level == "medium" AND all affected_files are new (don't exist yet) -# AND complexity_score <= 4 AND NOT security_critical -# → Write minimal evidence directly via Write tool -# 4. OTHERWISE: Call predictor with tier_hint - -skip_predictor = ( - not subtask.escalation_required - and not subtask.security_critical - and ( - subtask.risk_level == "low" - or ( - subtask.risk_level == "medium" - and subtask.affected_files # guard against vacuous all() - and all(not file_exists(f) for f in subtask.affected_files) - and subtask.complexity_score <= 4 - ) - ) -) - -if skip_predictor: - # No action needed — Predictor skipped for low-risk subtasks - pass -else: - # Determine tier_hint from subtask metadata: - # - risk "medium" + complexity_score <= 3 → tier_hint: 1 - # - risk "medium" + complexity_score 4-7 → tier_hint: 2 - # - risk "high" OR security_critical → tier_hint: 3 - if subtask.risk_level == "high" or subtask.security_critical: - tier_hint = 3 - elif subtask.complexity_score <= 3: - tier_hint = 1 - else: - tier_hint = 2 - - Task( - subagent_type="predictor", - description="Analyze impact", - prompt="Analyze impact using Predictor input schema. - - tier_hint: {tier_hint} - - **AI Packet (XML):** [paste ...] - - Required inputs: - - change_description: [summary from debate-arbiter synthesis_reasoning] - - files_changed: [list of paths from synthesized code] - - diff_content: [unified diff] - - Optional inputs: - - analyzer_output: [debate-arbiter output] - - user_context: [subtask requirements + arbiter confidence] - - Return ONLY valid JSON following Predictor schema." - ) -``` - -### 2.10 Apply Changes - -Apply synthesized code via Write/Edit tools. Proceed to next subtask. - -### 2.11 Gate 2: Tests Available / Run - -After applying changes, run tests if available. - -**Prefer** the commands implied by ``. Otherwise: -- If `pytest` project: run `pytest` -- If `package.json` present: run `npm test` / `pnpm test` / `yarn test` -- If `go.mod` present: run `go test ./...` -- If `Cargo.toml` present: run `cargo test` - -If no tests found: mark gate as skipped and proceed. - -### 2.12 Gate 3: Formatter / Linter - -After tests gate, run formatter/linter checks if available. - -Prefer repo-standard commands (e.g., `make lint`, `make fmt`). Otherwise: -- Python: `ruff check`, `black --check`, `mypy` -- JS/TS: `eslint`, `prettier -c` -- Go: `gofmt` check + `golangci-lint run` -- Rust: `cargo fmt --check`, `cargo clippy` - -If none found: mark gate as skipped and proceed. - ---- - -## Step 3: Summary - -- Run tests if applicable -- Create commit (if requested) -- Report: features implemented, files changed -- Include key synthesis reasoning highlights from debate-arbiter - -**Optional:** Run `/map-learn [summary]` to preserve valuable patterns for future workflows. - ---- - -## Key Differences from map-efficient - -| Aspect | map-efficient | map-debate | -|--------|---------------|------------| -| Variant generation | Single variant (one Actor) | Always 3 variants | -| Synthesis agent | N/A (single Actor) | debate-arbiter (opus) | -| Output | Direct implementation | comparison_matrix + decision_rationales + synthesis_reasoning | -| Cost | Lower | ~3-5x higher (opus model) | -| Use case | Efficiency | Reasoning transparency | - -Begin now with debate workflow. diff --git a/src/mapify_cli/templates/commands/map-efficient.md b/src/mapify_cli/templates/commands/map-efficient.md index f69689d0..6b5d48b3 100644 --- a/src/mapify_cli/templates/commands/map-efficient.md +++ b/src/mapify_cli/templates/commands/map-efficient.md @@ -299,8 +299,9 @@ loop: # Phase D: Retry handling # For each monitor that returned valid=false: - # Re-run actor + monitor for that subtask (serially) - # Track retries per subtask: validate_wave_step SUBTASK_ID STEP_ID + # RETRY=$(python3 .map/scripts/map_orchestrator.py wave_monitor_failed $subtask_id --feedback "feedback") + # If RETRY.status == "max_retries": escalate to user + # Otherwise: re-run actor + monitor for that subtask (serially) # Phase E: Per-wave gates # Run tests + linter ONCE for the entire wave @@ -358,6 +359,8 @@ STRICT RULES: 5. Tests SHOULD fail when run (implementation doesn't exist yet). 6. Test files MUST be lint-clean. Use proper imports at the top of the file (not inside type annotations). Run the project linter on test files before finishing. +7. Do NOT add temporal comments about test failure status (e.g., "currently FAILS", + "expected to FAIL"). Tests are permanent, clean code — the Red/Green state is transient. """ ) @@ -453,55 +456,59 @@ fi # After Monitor returns: if monitor_output["valid"] == false: - # Increment retry counter (also triggered when test gate fails above) - if retry_count < 5: - # Go back to Phase: ACTOR with Monitor feedback - # Actor will fix issues and re-apply code - - # === STUCK RECOVERY (at retry 3) === - # At retry 3, intercept with intermediate recovery before retries 4-5. - # This gives Actor better context to break out of a stuck loop. - if retry_count == 3: - # Step 1: Check if research-agent already ran for this subtask - findings_file = f".map/{branch}/findings_{branch}.md" - if findings_file exists and has content for this subtask: - # Reuse existing findings (Edge Case 12: skip re-invocation) - recovery_context = read(findings_file) - else: - # Invoke research-agent for alternative approaches - Task( - subagent_type="research-agent", - description="Stuck recovery: find alternative approach", - prompt=f"""Subtask {subtask_id} failed 3 monitor retries. + # Use orchestrator to handle retry: requeues ACTOR+MONITOR, increments retry_count, + # switches phase so workflow-gate allows edits, persists feedback for Actor. + RETRY_RESULT=$(python3 .map/scripts/map_orchestrator.py monitor_failed --feedback "MONITOR_FEEDBACK_TEXT") + # RETRY_RESULT.status is "retrying" or "max_retries" + # RETRY_RESULT.retry_count shows current attempt number + # RETRY_RESULT.feedback_file points to .map//monitor_feedback_retry{N}.md + + RETRY_STATUS=$(echo "$RETRY_RESULT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('status',''))") + RETRY_COUNT=$(echo "$RETRY_RESULT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('retry_count',0))") + + if RETRY_STATUS == "max_retries": + # Escalate to user (retry limit reached after 5 attempts) + AskUserQuestion(questions=[{"question": "Monitor retry limit reached (5 attempts). How to proceed?", "header": "Retry limit", "options": [{"label": "Continue", "description": "Continue with more retries (manually edit step_state.json retry_count)"}, {"label": "Skip", "description": "Skip this subtask and move to next"}, {"label": "Abort", "description": "Stop workflow"}], "multiSelect": false}]) + + # === STUCK RECOVERY (at retry 3) === + # At retry 3, intercept with intermediate recovery before retries 4-5. + if RETRY_COUNT == 3: + # Step 1: Check if research-agent already ran for this subtask + findings_file = f".map/{branch}/findings_{branch}.md" + if findings_file exists and has content for this subtask: + recovery_context = read(findings_file) + else: + Task( + subagent_type="research-agent", + description="Stuck recovery: find alternative approach", + prompt=f"""Subtask {subtask_id} failed 3 monitor retries. Monitor feedback: {latest_monitor_feedback} Find an ALTERNATIVE approach. Current approach is not working. Focus on: different patterns, simpler implementations, existing utilities.""" - ) - recovery_context = research_agent_output - - # Step 2: Invoke predictor (skip for low-risk subtasks — Edge Case 7) - if subtask.risk_level != "low": - Task( - subagent_type="predictor", - description="Stuck recovery: analyze why approach fails", - prompt=f"""Subtask {subtask_id} failed 3 retries. + ) + recovery_context = research_agent_output + + # Step 2: Invoke predictor (skip for low-risk subtasks) + if subtask.risk_level != "low": + Task( + subagent_type="predictor", + description="Stuck recovery: analyze why approach fails", + prompt=f"""Subtask {subtask_id} failed 3 retries. Research findings: {recovery_context} Analyze: why is the current approach failing? What dependencies are missed?""" - ) - recovery_context += predictor_output + ) + recovery_context += predictor_output - # Step 3: Pass recovery context to Actor for retries 4-5 - # Actor receives: original task + monitor feedback + recovery context - # This gives Actor a fresh perspective from research-agent/predictor + if recovery_context is empty or unhelpful: + AskUserQuestion(questions=[{"question": "Stuck recovery failed. How to proceed?", "header": "Stuck", "options": [{"label": "Continue", "description": "Try 2 more retries"}, {"label": "Skip", "description": "Skip subtask"}, {"label": "Abort", "description": "Stop workflow"}], "multiSelect": false}]) + # === END STUCK RECOVERY === - # If both research-agent and predictor found nothing useful: - if recovery_context is empty or unhelpful: - AskUserQuestion(questions=[{"question": "Stuck recovery: research-agent and predictor found no alternative. How to proceed?", "header": "Stuck", "options": [{"label": "Continue", "description": "Try 2 more retries with current approach"}, {"label": "Skip", "description": "Skip subtask, move to next"}, {"label": "Abort", "description": "Stop workflow"}], "multiSelect": false}]) - # === END STUCK RECOVERY === + # Phase is now ACTOR (set by orchestrator). Proceed to get_next_step + # which will return ACTOR instruction. Pass RETRY_RESULT.feedback_file path + # to Actor so it can read the monitor feedback explicitly. - else: - # Escalate to user (retry limit reached after 5 attempts) - AskUserQuestion(questions=[{"question": "Monitor retry limit reached (5 attempts). How to proceed?", "header": "Retry limit", "options": [{"label": "Continue", "description": "Reset retry counter and try again"}, {"label": "Skip", "description": "Skip this subtask and move to next"}, {"label": "Abort", "description": "Stop workflow"}], "multiSelect": false}]) +# For wave-based execution, use wave_monitor_failed instead: +# python3 .map/scripts/map_orchestrator.py wave_monitor_failed ST-001 --feedback "feedback text" ``` ### Monitor Artifact Rule diff --git a/src/mapify_cli/templates/commands/map-review.md b/src/mapify_cli/templates/commands/map-review.md index 999ef2fd..a1634fe8 100644 --- a/src/mapify_cli/templates/commands/map-review.md +++ b/src/mapify_cli/templates/commands/map-review.md @@ -299,6 +299,20 @@ Present the verdict with a summary table: - Key issues resolved during interactive review - Remaining action items +## Workflow Gate Unlock (REVISE/BLOCK only) + +If the verdict is **REVISE** or **BLOCK** and the user asks to fix the issues, +the workflow gate may block edits because the workflow is in COMPLETE phase. + +**Before applying any fixes**, run: + +```bash +python3 .map/scripts/map_orchestrator.py reopen_for_fixes --feedback "Review findings: [summary of issues to fix]" +``` + +This transitions the workflow from COMPLETE → ACTOR so the edit gate unlocks. +Skip this step if the workflow is not in COMPLETE phase (e.g., review was run mid-workflow). + ## Handoff Artifact Update After the final verdict, update branch-scoped handoff artifacts so review output survives beyond the chat: diff --git a/src/mapify_cli/templates/commands/map-tdd.md b/src/mapify_cli/templates/commands/map-tdd.md index bf7aac34..716c1817 100644 --- a/src/mapify_cli/templates/commands/map-tdd.md +++ b/src/mapify_cli/templates/commands/map-tdd.md @@ -134,6 +134,24 @@ STRICT RULES: 9. Test files MUST be lint-clean. Use proper imports at the top of the file (not inside type annotations). Run the project linter (ruff/eslint/golangci-lint) on test files before finishing. Fix any lint errors in your test files. +10. Do NOT add temporal or state-marking comments about test failure status + (e.g., "currently FAILS", "expected to FAIL until fix is applied", + "will PASS once fix is implemented", "Red phase"). Write tests as permanent, + clean code. The Red/Green state is transient — it must NOT leak into comments. + +TEST QUALITY REQUIREMENTS — avoid "2+2=4" tests: +- Every test must verify SEMANTIC BEHAVIOR, not just that a single branch executes. + Bad: "returns error when input is nil" (trivial nil-check). + Good: "returns NotFound error and does NOT call downstream API when input is nil". +- Tests must assert MULTIPLE CONSEQUENCES of an action (side effects, return values, + state changes, calls to dependencies). A test that asserts only one thing from + a single if-branch is trivial — combine it with assertions about what else + should or should NOT happen. +- Prefer scenario-based tests that exercise a CHAIN of behavior (setup → action → + verify multiple outcomes) over unit-level tests that check one field. +- For each test ask: "Would this test catch a real bug, or does it just confirm + the obvious?" If the answer is "obvious", merge it into a richer scenario or drop it. +- Aim for at least 60% of tests being full semantic scenarios (multi-step, multi-assert). Output: - Test files written via Edit/Write tools @@ -188,10 +206,21 @@ fi **Then evaluate test results:** -- **Tests FAIL with assertion/import errors** → GOOD. This is the expected TDD state ("Red" phase). Proceed to ACTOR. +- **Tests FAIL with assertion/import errors** → GOOD. This is the expected TDD state ("Red" phase). But also run the quality check below before proceeding. - **Tests PASS** → PROBLEM. Tests are trivial or not testing real behavior. Go back to TEST_WRITER with feedback: "Tests pass without implementation. Tests must assert behavior that requires code to be written." - **Tests have syntax errors** → Go back to TEST_WRITER with feedback to fix syntax. +**Quality gate (run even if tests correctly fail):** + +Review the test files and classify each test as: +- **Semantic** — tests real behavior with multi-step scenario or multi-assert verification +- **Trivial ("2+2=4")** — tests a single if-branch or obvious nil-check with one assert + +If more than 40% of tests are trivial, go back to TEST_WRITER with feedback: +"Too many trivial tests. [N] of [M] tests are single-branch checks. Merge trivial +tests into richer scenarios that verify multiple consequences. Each test should catch +a real bug, not just confirm one obvious branch." + ```bash python3 .map/scripts/map_orchestrator.py validate_step "2.26" ``` @@ -234,7 +263,20 @@ Output: standard Actor output (approach + code + trade-offs) ) ``` -**CRITICAL: After ACTOR returns, you MUST call Monitor (2.4). Do NOT skip Monitor. Do NOT mark the subtask complete without Monitor validation.** This is not optional — Monitor is a mandatory phase in every workflow, including TDD. +**CRITICAL: After ACTOR returns, run the TDD Refactor step below, then call Monitor (2.4). Do NOT skip Monitor. Do NOT mark the subtask complete without Monitor validation.** This is not optional — Monitor is a mandatory phase in every workflow, including TDD. + +### TDD Refactor: Clean Stale Red-Phase Comments + +After ACTOR completes and tests pass (Green), scan the test files created by TEST_WRITER for stale Red-phase markers. This is the **Refactor** step of Red-Green-Refactor. + +Look for and clean up: +- Comments containing "currently FAILS", "expected to FAIL", "will PASS once", "Red phase", "TDD Red" +- File-level docstrings saying tests "are expected to fail against current implementation" +- Any temporal language that references the transient Red/Green state + +Rewrite matched comments as permanent, implementation-neutral descriptions. If a comment is only a state marker with no semantic value, remove it entirely. + +**This cleanup is done by the orchestrating agent (you), NOT by Actor.** Actor in code_only mode cannot modify test files, but you can. ```bash # Validate Actor step, then get_next_step will return MONITOR (2.4) diff --git a/src/mapify_cli/templates/hooks/ralph-iteration-logger.py b/src/mapify_cli/templates/hooks/ralph-iteration-logger.py index 76595270..c32e5868 100755 --- a/src/mapify_cli/templates/hooks/ralph-iteration-logger.py +++ b/src/mapify_cli/templates/hooks/ralph-iteration-logger.py @@ -364,18 +364,14 @@ def derive_summary(log_file: Path) -> None: is_thrashing = file_thrashing[f] >= THRASHING_WINDOW if is_thrashing: thrashing_alert_count += 1 - file_stats.append( - { - "file": f, - "iterations": len(effs), - "avg_effectiveness": round(sum(effs) / len(effs), 3) if effs else 0.0, - "is_thrashing": is_thrashing, - } - ) - - all_effs = [ - e.get("effectiveness", 0.0) for e in entries if (e.get("file") or "").strip() - ] + file_stats.append({ + "file": f, + "iterations": len(effs), + "avg_effectiveness": round(sum(effs) / len(effs), 3) if effs else 0.0, + "is_thrashing": is_thrashing, + }) + + all_effs = [e.get("effectiveness", 0.0) for e in entries if (e.get("file") or "").strip()] summary: dict[str, object] = { "generated_at": datetime.now().isoformat(), "entry_count": len(entries), @@ -384,9 +380,7 @@ def derive_summary(log_file: Path) -> None: "file_stats": file_stats, "aggregate": { "total_iterations": total_lines, - "avg_effectiveness": ( - round(sum(all_effs) / len(all_effs), 3) if all_effs else 0.0 - ), + "avg_effectiveness": round(sum(all_effs) / len(all_effs), 3) if all_effs else 0.0, "total_thrashing_alerts": thrashing_alert_count, }, } diff --git a/src/mapify_cli/templates/map/scripts/diagnostics.py b/src/mapify_cli/templates/map/scripts/diagnostics.py index 8d24f75f..c2d9abf1 100644 --- a/src/mapify_cli/templates/map/scripts/diagnostics.py +++ b/src/mapify_cli/templates/map/scripts/diagnostics.py @@ -272,9 +272,9 @@ def cmd_summarize(args: argparse.Namespace) -> int: "accepted_issue_count": accepted_issue_count, "summary": args.summary or ("No blocking issues" if status == "passed" else "Blocking issues detected"), - "diagnostics_path": ( - str(diagnostics_path) if diagnostics_path.exists() else None - ), + "diagnostics_path": str(diagnostics_path) + if diagnostics_path.exists() + else None, } dossier = write_run_dossier( diff --git a/src/mapify_cli/templates/map/scripts/map_orchestrator.py b/src/mapify_cli/templates/map/scripts/map_orchestrator.py index a58bff2c..39960e50 100755 --- a/src/mapify_cli/templates/map/scripts/map_orchestrator.py +++ b/src/mapify_cli/templates/map/scripts/map_orchestrator.py @@ -274,6 +274,11 @@ class StepState: current_step_phase: str = "DECOMPOSE" completed_steps: list[str] = field(default_factory=list) pending_steps: list[str] = field(default_factory=lambda: STEP_ORDER.copy()) + # retry_count is for SERIAL mode only (single-subtask execution). + # subtask_retry_counts is for WAVE mode only (parallel wave execution). + # These counters are independent: advance_wave resets subtask_retry_counts + # but NOT retry_count, and get_next_step resets retry_count but NOT + # subtask_retry_counts. Never mix serial and wave retry tracking. retry_count: int = 0 max_retries: int = 5 plan_approved: bool = False @@ -969,6 +974,245 @@ def advance_wave(branch: str) -> dict: } +def _write_feedback_file( + branch: str, filename: str, header: str, feedback: str +) -> Optional[str]: + """Write monitor feedback to a file if feedback is non-empty. + + Returns the file path string, or None if nothing was written. + """ + if not feedback.strip(): + return None + fb_path = Path(f".map/{branch}/{filename}") + fb_path.parent.mkdir(parents=True, exist_ok=True) + fb_path.write_text(f"# {header}\n\n{feedback}\n", encoding="utf-8") + return str(fb_path) + + +def _check_retry_limit( + current_retries: int, max_retries: int, context: dict +) -> Optional[dict]: + """Return escalation dict if retry limit exceeded, else None. + + Shared by monitor_failed() and wave_monitor_failed() to avoid + duplicating the limit-check + escalation-dict construction. + + Args: + current_retries: Current retry count (already incremented). + max_retries: Maximum allowed retries. + context: Extra fields to include in the escalation dict + (e.g., subtask_id for wave mode). + + Returns: + Escalation dict with status="max_retries" if limit exceeded, + or None if still within limit. + """ + if current_retries > max_retries: + return { + "status": "max_retries", + "retry_count": current_retries, + "max_retries": max_retries, + **context, + } + return None + + +def monitor_failed(branch: str, feedback: str = "") -> dict: + """Handle Monitor valid=false: requeue ACTOR+MONITOR, increment retry_count. + + Precondition: current_step_phase must be MONITOR. Called by map-efficient.md + when Monitor returns valid=false. Switches phase back to ACTOR so + workflow-gate allows edits. Persists monitor feedback to a file that Actor + can read on next invocation. + + Args: + branch: Git branch name (sanitized) + feedback: Monitor's feedback_for_actor text (optional) + + Returns: + Dict with status (retrying|max_retries), retry_count, feedback_file + """ + state_file = Path(f".map/{branch}/step_state.json") + state = StepState.load(state_file) + + if state.current_step_phase != "MONITOR": + return { + "status": "error", + "message": ( + f"monitor_failed() called from phase '{state.current_step_phase}', " + "expected 'MONITOR'. Aborting to prevent state corruption." + ), + } + + state.retry_count += 1 + + escalation = _check_retry_limit( + state.retry_count, + state.max_retries, + { + "message": ( + f"Monitor retry limit reached ({state.max_retries} attempts). " + "Escalate to user." + ), + }, + ) + if escalation is not None: + state.save(state_file) + return escalation + + # Requeue only ACTOR (2.3) and MONITOR (2.4) on retry. + # TDD pre-steps (2.25/2.26) are NOT re-run — tests were already written + # and validated before the first Actor attempt. + state.pending_steps = ["2.3", "2.4"] + state.current_step_id = "2.3" + state.current_step_phase = "ACTOR" + + # Persist feedback so Actor can read it (numbered to preserve history) + feedback_file = _write_feedback_file( + branch, + f"monitor_feedback_retry{state.retry_count}.md", + f"Monitor Feedback (retry {state.retry_count})", + feedback, + ) + + state.save(state_file) + + return { + "status": "retrying", + "retry_count": state.retry_count, + "max_retries": state.max_retries, + "current_phase": "ACTOR", + "feedback_file": feedback_file, + "message": ( + f"Monitor failed. Retry {state.retry_count}/{state.max_retries}. " + f"Phase reset to ACTOR for subtask {state.current_subtask_id}." + ), + } + + +def wave_monitor_failed( + subtask_id: str, branch: str, feedback: str = "" +) -> dict: + """Handle Monitor valid=false for a subtask within a wave. + + Resets the subtask's phase back to ACTOR and increments its retry count. + + Args: + subtask_id: Subtask ID (e.g., "ST-002") + branch: Git branch name (sanitized) + feedback: Monitor's feedback_for_actor text (optional) + + Returns: + Dict with status, retry_count for the subtask + """ + state_file = Path(f".map/{branch}/step_state.json") + state = StepState.load(state_file) + + # Increment per-subtask retry count + current_retries = state.subtask_retry_counts.get(subtask_id, 0) + 1 + state.subtask_retry_counts[subtask_id] = current_retries + + escalation = _check_retry_limit( + current_retries, + state.max_retries, + { + "subtask_id": subtask_id, + "message": ( + f"Monitor retry limit reached for {subtask_id} " + f"({state.max_retries} attempts). Escalate to user." + ), + }, + ) + if escalation is not None: + state.save(state_file) + return escalation + + # Reset subtask phase back to ACTOR + state.subtask_phases[subtask_id] = "2.3" + + # Persist feedback (numbered to preserve history) + feedback_file = _write_feedback_file( + branch, + f"monitor_feedback_{subtask_id}_retry{current_retries}.md", + f"Monitor Feedback for {subtask_id} (retry {current_retries})", + feedback, + ) + + state.save(state_file) + + return { + "status": "retrying", + "subtask_id": subtask_id, + "retry_count": current_retries, + "max_retries": state.max_retries, + "current_phase": "ACTOR", + "feedback_file": feedback_file, + "message": ( + f"Monitor failed for {subtask_id}. " + f"Retry {current_retries}/{state.max_retries}. " + f"Phase reset to ACTOR." + ), + } + + +def reopen_for_fixes(branch: str, feedback: str = "") -> dict: + """Transition from COMPLETE back to ACTOR for post-review fixes. + + Called after /map-review finds issues in a completed workflow. + The workflow gate blocks edits during COMPLETE phase; this function + reopens the workflow so fixes can be applied. + + Args: + branch: Git branch name (sanitized) + feedback: Review feedback text describing what needs fixing + + Returns: + Dict with status and new phase info + """ + state_file = Path(f".map/{branch}/step_state.json") + if not state_file.exists(): + return { + "status": "error", + "message": "No step_state.json found. Nothing to reopen.", + } + + state = StepState.load(state_file) + + if state.current_step_phase != "COMPLETE": + return { + "status": "error", + "message": ( + f"Workflow is in phase '{state.current_step_phase}', not COMPLETE. " + "Use monitor_failed for non-COMPLETE retry." + ), + } + + # Reset to ACTOR+MONITOR cycle + state.current_step_id = "2.3" + state.current_step_phase = "ACTOR" + state.pending_steps = ["2.3", "2.4"] + state.retry_count = 0 + + feedback_file = _write_feedback_file( + branch, + "review_feedback.md", + "Review Feedback (post-COMPLETE reopen)", + feedback, + ) + + state.save(state_file) + + return { + "status": "reopened", + "current_phase": "ACTOR", + "feedback_file": feedback_file, + "message": ( + "Workflow reopened from COMPLETE to ACTOR. " + "Edit gate is now unlocked for review fixes." + ), + } + + SKIPPABLE_STEPS = {"2.2", "2.25", "2.26"} @@ -1330,6 +1574,9 @@ def main(): "advance_wave", "resume_single_subtask", "get_plan_progress", + "monitor_failed", + "wave_monitor_failed", + "reopen_for_fixes", ], help="Command to execute", ) @@ -1346,6 +1593,10 @@ def main(): parser.add_argument( "--tdd", action="store_true", help="Enable TDD mode (for resume_single_subtask)" ) + parser.add_argument( + "--feedback", + help="Monitor feedback text (for monitor_failed / wave_monitor_failed)", + ) args = parser.parse_args() @@ -1489,6 +1740,29 @@ def main(): result = get_plan_progress(branch) print(json.dumps(result, indent=2)) + elif args.command == "monitor_failed": + feedback = args.feedback or "" + result = monitor_failed(branch, feedback) + print(json.dumps(result, indent=2)) + + elif args.command == "wave_monitor_failed": + if not args.task_or_step: + print( + json.dumps( + {"error": "subtask_id required. Usage: wave_monitor_failed ST-001 --feedback 'text'"} + ), + file=sys.stderr, + ) + sys.exit(1) + feedback = args.feedback or "" + result = wave_monitor_failed(args.task_or_step, branch, feedback) + print(json.dumps(result, indent=2)) + + elif args.command == "reopen_for_fixes": + feedback = args.feedback or "" + result = reopen_for_fixes(branch, feedback) + print(json.dumps(result, indent=2)) + except Exception as e: print(json.dumps({"error": str(e)}), file=sys.stderr) sys.exit(1) diff --git a/src/mapify_cli/templates/map/scripts/map_step_runner.py b/src/mapify_cli/templates/map/scripts/map_step_runner.py index e7bd320f..0a44ccb7 100755 --- a/src/mapify_cli/templates/map/scripts/map_step_runner.py +++ b/src/mapify_cli/templates/map/scripts/map_step_runner.py @@ -319,9 +319,7 @@ def read(name: str) -> str: if not path.exists(): return "" try: - return _sanitize_for_json( - path.read_text(encoding="utf-8", errors="replace") - ) + return _sanitize_for_json(path.read_text(encoding="utf-8", errors="replace")) except OSError: return "" @@ -381,9 +379,7 @@ def read(name: str) -> str: if not path.exists(): return "" try: - return _sanitize_for_json( - path.read_text(encoding="utf-8", errors="replace") - ) + return _sanitize_for_json(path.read_text(encoding="utf-8", errors="replace")) except OSError: return "" @@ -407,26 +403,22 @@ def read(name: str) -> str: "branch": branch_name, "plan_review_path": latest_plan_review_name or None, "code_review_path": latest_code_review_name or None, - "verification_summary_path": ( - "verification-summary.md" - if (branch_dir / "verification-summary.md").exists() - else None - ), + "verification_summary_path": "verification-summary.md" + if (branch_dir / "verification-summary.md").exists() + else None, "qa_path": "qa-001.md" if (branch_dir / "qa-001.md").exists() else None, - "pr_draft_path": ( - "pr-draft.md" if (branch_dir / "pr-draft.md").exists() else None - ), - "active_issues_path": ( - "active-issues.json" - if (branch_dir / "active-issues.json").exists() - else None - ), - "plan_review": ( - read(latest_plan_review_name) if latest_plan_review_name else None - ), - "code_review": ( - read(latest_code_review_name) if latest_code_review_name else None - ), + "pr_draft_path": "pr-draft.md" + if (branch_dir / "pr-draft.md").exists() + else None, + "active_issues_path": "active-issues.json" + if (branch_dir / "active-issues.json").exists() + else None, + "plan_review": read(latest_plan_review_name) + if latest_plan_review_name + else None, + "code_review": read(latest_code_review_name) + if latest_code_review_name + else None, "verification_summary": read("verification-summary.md"), "qa": read("qa-001.md"), "pr_draft": read("pr-draft.md"), @@ -845,10 +837,7 @@ def run_test_gate() -> dict: # Detect test runner runners = [ - ( - ["pytest.ini", "pyproject.toml", "setup.py", "setup.cfg"], - ["pytest", "--tb=short", "-q"], - ), + (["pytest.ini", "pyproject.toml", "setup.py", "setup.cfg"], ["pytest", "--tb=short", "-q"]), (["package.json"], ["npm", "test"]), (["go.mod"], ["go", "test", "./..."]), (["Cargo.toml"], ["cargo", "test"]), @@ -943,9 +932,7 @@ def _run_git(args: list[str]) -> str: git_ref = _run_git(["rev-parse", "HEAD"]) diff_stat = _run_git(["diff", "--stat", "HEAD"]) diff_names = _run_git(["diff", "--name-only", "HEAD"]) - files_changed = ( - [f for f in diff_names.splitlines() if f.strip()] if diff_names else [] - ) + files_changed = [f for f in diff_names.splitlines() if f.strip()] if diff_names else [] return { "status": "success", diff --git a/src/mapify_cli/templates/skills/map-cli-reference/SKILL.md b/src/mapify_cli/templates/skills/map-cli-reference/SKILL.md deleted file mode 100644 index 2a993c46..00000000 --- a/src/mapify_cli/templates/skills/map-cli-reference/SKILL.md +++ /dev/null @@ -1,124 +0,0 @@ ---- -name: map-cli-reference -description: >- - Quick reference for mapify CLI usage errors. Use when - encountering "no such command", "no such option", "parameter not found", - or when user asks "how to use mapify", "validate graph". - Do NOT use for workflow selection (use map-workflows-guide) or planning - methodology (use map-planning). -metadata: - author: azalio - version: 3.1.0 ---- - -# MAP CLI Quick Reference - -Fast lookup for commands, parameters, and common error corrections. - -**For comprehensive documentation**, see: -- [CLI_REFERENCE.json](../../../docs/CLI_REFERENCE.json) -- [CLI_COMMAND_REFERENCE.md](../../../docs/CLI_COMMAND_REFERENCE.md) - ---- - -## Quick Command Index - -### Validate Commands - -```bash -# Validate dependency graph -mapify validate graph task_plan.json -echo '{"subtasks":[...]}' | mapify validate graph - -# Visualize dependencies -mapify validate graph task_plan.json --visualize - -# Strict mode (fail on warnings) -mapify validate graph task_plan.json --strict -``` - -### Root Commands - -```bash -# Initialize project -mapify init my-project -mapify init . --mcp essential --force - -# System checks -mapify check -mapify check --debug - -# Upgrade agents -mapify upgrade -``` - ---- - -## Common Errors & Corrections - -### Error 1: Using Removed Commands - -**Issue**: `Error: No such command 'playbook'` or docs/examples mention `mapify playbook ...` - -**Solution**: -- The `playbook` command was removed in v4.0+ - ---- - -## Exit Codes (validate graph) - -- **0**: Valid graph (no critical errors) -- **1**: Invalid graph (critical errors or warnings with `--strict`) -- **2**: Malformed input (invalid JSON) - ---- - -## See Also - -**Related Skills**: -- [map-workflows-guide](../map-workflows-guide/SKILL.md) - -**Source Code**: -- `src/mapify_cli/__init__.py` - ---- - -## Examples - -### Example 1: Fixing a deprecated command error - -**User says:** "I'm getting `Error: No such command 'playbook'` when running mapify" - -**Actions:** -1. Identify error type — removed command usage -2. Explain: `playbook` command was removed in v4.0+ - -**Result:** User acknowledges the removed command. - -### Example 2: Validating a dependency graph - -**User says:** "How do I check if my task plan has circular dependencies?" - -**Actions:** -1. Show command: `mapify validate graph task_plan.json` -2. Explain exit codes: 0 = valid, 1 = invalid, 2 = malformed JSON -3. Suggest `--strict` flag for CI pipelines and `--visualize` for debugging - -**Result:** User validates their task plan and fixes dependency issues before running workflow. - ---- - -## Troubleshooting - -| Issue | Cause | Solution | -|-------|-------|----------| -| `No such command 'playbook'` | Removed in v4.0+ | Command no longer available | -| `No such option '--output'` | Wrong subcommand syntax | Check `mapify --help` for valid options | -| `validate graph` exit code 2 | Malformed JSON input | Validate JSON with `python -m json.tool < file.json` | -| `mapify init` overwrites files | Using `--force` flag | Omit `--force` to preserve existing configuration | - ---- - -**Version**: 1.1 -**Last Updated**: 2026-01-15 -**Lines**: ~200 (follows 500-line skill rule) diff --git a/src/mapify_cli/templates/skills/map-cli-reference/scripts/check-command.sh b/src/mapify_cli/templates/skills/map-cli-reference/scripts/check-command.sh deleted file mode 100755 index f7efaa49..00000000 --- a/src/mapify_cli/templates/skills/map-cli-reference/scripts/check-command.sh +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/env bash -# Check if a mapify subcommand exists and show usage help. -# -# Usage: -# ./check-command.sh [option] -# -# Examples: -# ./check-command.sh validate graph -# ./check-command.sh init -# ./check-command.sh playbook # removed command -# -# Exit codes: -# 0 - Command exists -# 1 - Command not found -# 2 - Command removed - -set -euo pipefail - -SUBCOMMAND="${1:-}" -OPTION="${2:-}" - -if [ -z "$SUBCOMMAND" ]; then - echo "Usage: $0 [option]" - echo "" - echo "Checks if a mapify subcommand exists." - echo "" - echo "Available subcommands:" - echo " init - Initialize project with MAP framework" - echo " check - Run system checks" - echo " upgrade - Upgrade agent templates" - echo " validate - Validate dependency graphs" - echo "" - echo "Removed subcommands:" - echo " playbook - Removed in v4.0+" - exit 1 -fi - -# Removed subcommands -REMOVED_COMMANDS="playbook" - -# Known valid commands -VALID_COMMANDS="init check upgrade validate" - -# Check removed commands first -for dep in $REMOVED_COMMANDS; do - if [ "$SUBCOMMAND" = "$dep" ]; then - echo "ERROR: '$SUBCOMMAND' was removed in v4.0+" - exit 2 - fi -done - -# Check valid commands -FOUND=0 -for cmd in $VALID_COMMANDS; do - if [ "$SUBCOMMAND" = "$cmd" ]; then - FOUND=1 - break - fi -done - -if [ "$FOUND" -eq 0 ]; then - echo "ERROR: No such command '$SUBCOMMAND'" - echo "" - echo "Available commands: $VALID_COMMANDS" - echo "" - echo "Did you mean one of these?" - # Simple fuzzy match - for cmd in $VALID_COMMANDS; do - echo " mapify $cmd" - done - exit 1 -fi - -# Command exists, show help -echo "OK: 'mapify $SUBCOMMAND' is a valid command" - -# Show subcommand-specific help -case "$SUBCOMMAND" in - validate) - echo "" - echo "Usage: mapify validate graph [--strict] [--visualize]" - echo "" - echo "Options:" - echo " --strict Fail on warnings (exit code 1)" - echo " --visualize Show dependency graph" - echo "" - echo "Exit codes: 0=valid, 1=invalid, 2=malformed input" - if [ -n "$OPTION" ] && [ "$OPTION" != "graph" ]; then - echo "" - echo "WARNING: Unknown validate subcommand '$OPTION'. Did you mean 'graph'?" - fi - ;; - init) - echo "" - echo "Usage: mapify init [project-name] [--mcp essential|full] [--force]" - echo "" - echo "Options:" - echo " --mcp essential Install essential MCP tools only" - echo " --mcp full Install all MCP tools" - echo " --force Overwrite existing configuration" - ;; - check) - echo "" - echo "Usage: mapify check [--debug]" - echo "" - echo "Options:" - echo " --debug Show detailed diagnostic information" - ;; - upgrade) - echo "" - echo "Usage: mapify upgrade" - echo "" - echo "Upgrades agent templates to latest version." - ;; -esac - -exit 0 diff --git a/src/mapify_cli/templates/skills/map-workflows-guide/SKILL.md b/src/mapify_cli/templates/skills/map-workflows-guide/SKILL.md deleted file mode 100644 index 2512c00c..00000000 --- a/src/mapify_cli/templates/skills/map-workflows-guide/SKILL.md +++ /dev/null @@ -1,529 +0,0 @@ ---- -name: map-workflows-guide -description: >- - Guide for choosing the right MAP workflow based on task type, risk level, - and token budget. Use when user asks "which workflow should I use", - "difference between map-fast and map-efficient", "when to use map-debug", - or compares MAP workflows. Do NOT use for actual workflow execution — - use /map-efficient, /map-fast, etc. instead. Do NOT use for CLI errors - (use map-cli-reference). -version: 1.0 -metadata: - author: azalio - version: 3.1.0 ---- - -# MAP Workflows Guide - -This skill helps you choose the optimal MAP workflow for your development tasks. MAP Framework provides **12 workflow commands**: 4 primary workflows (`/map-fast`, `/map-efficient`, `/map-debug`, `/map-debate`) and 8 supporting commands (`/map-review`, `/map-check`, `/map-plan`, `/map-task`, `/map-tdd`, `/map-release`, `/map-resume`, `/map-learn`). Each is optimized for different scenarios with varying token costs, learning capabilities, and quality gates. Two additional workflows (`/map-feature`, `/map-refactor`) are planned but not yet implemented. - -## Quick Decision Tree - -Answer these 5 questions to find your workflow: - -``` -1. Is this a small, low-risk change with clear acceptance criteria? - YES → Use /map-fast (40-50% tokens, no learning) - NO → Continue to question 2 - -2. Are you debugging/fixing a specific bug or test failure? - YES → Use /map-debug (70-80% tokens, focused analysis) - NO → Continue to question 3 - -3. Do stakeholders need documented reasoning and trade-off analysis? - YES → Use /map-debate (3x cost, Opus arbiter, explicit reasoning) - NO → Continue to question 4 - -4. Is this critical infrastructure or security-sensitive code? - YES → Use /map-efficient (60-70% tokens, recommended default) - NO → Continue to question 5 - -5. Is this a change you'll maintain long-term or that has non-trivial impact? - YES → Use /map-efficient (60-70% tokens, batched learning) ← RECOMMENDED - NO → If still low-risk and localized, /map-fast may be acceptable -``` - ---- - -## Workflow Comparison Matrix - -| Aspect | `/map-fast` | `/map-efficient` | `/map-debug` | `/map-debate` | -|--------|-----------|-----------------|-------------|--------------| -| **Token Cost** | 40-50% | **60-70%** | 70-80% | ~3x baseline | -| **Learning** | ❌ None | ✅ Via /map-learn | ✅ Per-subtask | ✅ Via /map-learn | -| **Quality Gates** | Basic | Essential | Focused | Multi-variant | -| **Impact Analysis** | ❌ Skipped | ⚠️ Conditional | ✅ Yes | ⚠️ Conditional | -| **Multi-Variant** | ❌ Never | ⚠️ Optional (--self-moa) | ❌ Never | ✅ Always (3 variants) | -| **Synthesis Model** | N/A | Sonnet | N/A | **Opus** | -| **Best For** | Low-risk | **Production** | Bugs | Reasoning transparency | -| **Recommendation** | Use sparingly | **DEFAULT** | Issues | Complex decisions | - -> **Note:** `/map-feature` and `/map-refactor` are **planned but not yet implemented**. -> Use `/map-efficient` for critical features and refactoring tasks. -> See [Planned Workflows](#planned-workflows) below for details. - ---- - -## Detailed Workflow Descriptions - -### 1. /map-fast — Low-Risk Changes ⚡ - -**Use this when:** -- Small, localized changes with minimal blast radius -- Minor fixes and tweaks where speed matters -- Low-risk maintenance work - -**What you get:** -- ✅ Full implementation (Actor generates code) -- ✅ Basic validation (Monitor checks correctness) -- ❌ NO quality scoring (Evaluator skipped) -- ❌ NO impact analysis (Predictor skipped entirely) -- ❌ NO learning (Reflector skipped) - -**Trade-offs:** -- Saves 50-60% tokens vs full pipeline (every agent per subtask) -- Knowledge never accumulates -- Minimal quality gates (only basic checks) - -**Example tasks:** -- "Fix a small validation edge case" -- "Update error message wording" -- "Add a small CLI option with tests" - -**Command syntax:** -```bash -/map-fast [task description] -``` - -**When to AVOID:** -- ❌ Security-critical logic -- ❌ Wide refactors or multi-module changes -- ❌ High uncertainty / unclear requirements - -**See also:** [resources/map-fast-deep-dive.md](resources/map-fast-deep-dive.md) - ---- - -### 2. /map-efficient — Production Features (RECOMMENDED) 🎯 - -**Use this when:** -- Building production features (moderate complexity) -- Most of your development work -- You want full learning but need token efficiency -- Standard feature implementation with familiar patterns - -**What you get:** -- ✅ Full implementation (Actor) -- ✅ Comprehensive validation (Monitor with feedback loops) -- ✅ Impact analysis (Predictor runs conditionally) -- ✅ Tests gate + Linter gate per subtask -- ✅ Final-Verifier (adversarial verification at end) -- ✅ **Learning via /map-learn** (Reflector, optional after workflow) - -**Optimization strategy:** -- **Conditional Predictor:** Runs only if risk detected (security, breaking changes) -- **Batched Learning:** Reflector runs ONCE after all subtasks complete -- **Result:** 35-40% token savings vs full pipeline while preserving learning -- **Same quality gates:** Monitor still validates each subtask - -**When Predictor runs:** -- Modifies authentication/security code -- Introduces breaking changes -- High complexity detected -- Multiple files affected - -**Example tasks:** -- "Implement user registration with email validation" -- "Add pagination to blog posts API" -- "Create dashboard analytics component" -- "Build shopping cart feature" - -**Command syntax:** -```bash -/map-efficient [task description] -``` - -**Quality guarantee:** -Despite token optimization, preserves: -- Per-subtask validation (Monitor always checks) -- Complete implementation feedback loops -- Full learning (batched, not skipped) - -**See also:** [resources/map-efficient-deep-dive.md](resources/map-efficient-deep-dive.md) - ---- - -### 3. /map-debug — Bug Fixes 🐛 - -**Use this when:** -- Fixing specific bugs or defects -- Resolving test failures -- Investigating runtime errors -- Performing root cause analysis -- Diagnosing unexpected behavior - -**What you get:** -- ✅ Focused implementation (Actor targets root cause) -- ✅ Validation (Monitor verifies fix) -- ✅ Root cause analysis -- ✅ Impact assessment (Predictor) -- ✅ Learning (Reflector) - -**Specialized features:** -- Error log analysis -- Stack trace interpretation -- Test failure diagnosis -- Regression prevention - -**Example tasks:** -- "Fix failing tests in auth.test.ts" -- "Debug TypeError in user service" -- "Resolve race condition in async code" -- "Fix memory leak in notification handler" - -**Command syntax:** -```bash -/map-debug [issue description or error message] -``` - -**Include in request:** -- Error message/stack trace -- When it occurs (specific scenario) -- What the expected behavior is -- Relevant log files if available - -**See also:** [resources/map-debug-deep-dive.md](resources/map-debug-deep-dive.md) - ---- - -### Planned Workflows - -The following workflows are **planned but not yet implemented**. Use `/map-efficient` as a substitute for both. - -#### /map-feature — Critical Features (PLANNED) - -Intended for security-critical and high-risk features requiring maximum validation (100% token cost, per-subtask learning, Predictor always runs). **Not yet implemented.** Use `/map-efficient` instead — it provides the same agent pipeline with conditional Predictor and batched learning. - -**Design reference:** [resources/map-feature-deep-dive.md](resources/map-feature-deep-dive.md) - -#### /map-refactor — Code Restructuring (PLANNED) - -Intended for refactoring with dependency-focused impact analysis and breaking change detection. **Not yet implemented.** Use `/map-efficient` instead — describe the refactoring intent in the task description for appropriate Predictor analysis. - -**Design reference:** [resources/map-refactor-deep-dive.md](resources/map-refactor-deep-dive.md) - ---- - -## Understanding MAP Agents - -MAP workflows orchestrate **11 specialized agents**, each with specific responsibilities: - -### Execution & Validation Agents - -**TaskDecomposer** — Breaks goal into subtasks -- Analyzes requirements -- Creates atomic, implementable subtasks -- Defines acceptance criteria for each -- Estimates complexity - -**Actor** — Writes code and implements -- Generates implementation -- Makes file changes -- Uses existing patterns from previous workflows - -**Monitor** — Validates correctness -- Checks implementation against criteria -- Runs tests to verify -- Identifies issues -- Feedback loop: Returns to Actor if invalid - -**Evaluator** — Quality gates -- Scores implementation quality (0-10) -- Checks completeness -- Approves/rejects solution -- Feedback loop: Returns to Actor if score < threshold -- **Only in /map-debug, /map-review** (skipped in /map-efficient, /map-fast, /map-debate) - -### Analysis Agents - -**Predictor** — Impact analysis -- Analyzes dependencies -- Predicts side effects -- Identifies risks and breaking changes -- **Conditional in /map-efficient** (runs if risk detected) -- **Always in /map-debug** (focused analysis) - -### Learning Agents - -**Reflector** — Pattern extraction -- Analyzes what worked and failed -- Extracts reusable patterns -- Prevents duplicate pattern extraction -- **Batched in /map-efficient** (runs once at end, via /map-learn) -- **Skipped in /map-fast** (no learning) - -### Optional Agent - -**Documentation-Reviewer** — Documentation validation -- Reviews completeness -- Checks consistency -- Validates examples -- Verifies external dependency docs current - -### Synthesis Agents - -**Debate-Arbiter** — Multi-variant cross-evaluation (MAP Debate) -- Cross-evaluates Actor variants with explicit reasoning -- Synthesizes optimal solution from multiple approaches -- Uses Opus model for reasoning transparency -- **Only in /map-debate workflow** - -**Synthesizer** — Solution synthesis -- Extracts decisions from multiple variants -- Generates unified code from best elements (Self-MoA) -- Merges insights across Actor outputs -- **Used in /map-efficient with --self-moa flag** - -### Discovery & Verification Agents - -**Research-Agent** — Codebase discovery -- Heavy codebase reading with compressed output -- Gathers context proactively before Actor implementation -- Prevents context pollution in implementation agents -- **Used in /map-plan, /map-efficient, /map-debug** - -**Final-Verifier** — Adversarial verification (Ralph Loop) -- Root cause analysis via adversarial testing -- Terminal verification after all other agents -- Ensures no regressions or overlooked issues -- **Used in /map-check, /map-efficient** - ---- - -## Decision Flowchart - -``` -START: What type of development task? -│ -├─────────────────────────────────────┐ -│ Small, low-risk change? │ -│ (Localized, clear acceptance) │ -├─────────────────────────────────────┘ -│ YES → /map-fast (40-50% tokens, no learning) -│ -│ NO ↓ -│ -├─────────────────────────────────────┐ -│ Debugging/fixing a specific issue? │ -│ (Bug, test failure, error) │ -├─────────────────────────────────────┘ -│ YES → /map-debug (70-80% tokens, focused analysis) -│ -│ NO ↓ -│ -└─────────────────────────────────────┐ - Everything else (features, │ - refactoring, critical code) ←──────┘ - → /map-efficient (60-70% tokens, RECOMMENDED) -``` - ---- - -## Common Questions - -**Q: Which workflow should I use by default?** - -A: **`/map-efficient`** for 80% of tasks. -- Best balance of quality and token efficiency -- Full learning preserved (just batched) -- Suitable for all production code -- Default recommendation for feature development - -**Q: When is /map-fast actually acceptable?** - -A: When the change is small and low-risk: -- Localized fixes with minimal blast radius -- Small UI/text tweaks -- Minor maintenance changes - -Avoid /map-fast for: -- Security or critical infrastructure -- Broad refactors or multi-module changes -- High uncertainty requirements - -**Q: What about /map-feature and /map-refactor?** - -A: These are **planned but not yet implemented**. Use `/map-efficient` for all feature development and refactoring tasks. `/map-efficient` provides the full agent pipeline (Actor, Monitor, conditional Predictor, Tests/Linter gates, Final-Verifier) with optional learning via `/map-learn`. Describe the risk level and refactoring intent in your task description for appropriate Predictor analysis. - -**Q: Can I switch workflows mid-task?** - -A: No, each workflow is a complete pipeline. If you started with wrong workflow: -1. Complete current workflow -2. Start new workflow with correct one -3. Re-implement if needed - -**Q: How do I know if Predictor actually ran in /map-efficient?** - -A: Check agent output for indicators: -``` -✅ Predictor: [Risk detected - Full analysis] -⏭️ Predictor: [Skipped - Low risk item] -``` - -Predictor runs if: -- Subtask touches authentication/security code -- Breaking changes detected -- High complexity estimated -- Multiple files affected - ---- - -## Resources & Deep Dives - -For detailed information on each workflow: - -- **[map-fast Deep Dive](resources/map-fast-deep-dive.md)** — Token breakdown, skip conditions, risks -- **[map-efficient Deep Dive](resources/map-efficient-deep-dive.md)** — Optimization strategy, Predictor conditions, batching -- **[map-debug Deep Dive](resources/map-debug-deep-dive.md)** — Debugging strategies, error analysis, best practices -- **[map-feature Deep Dive](resources/map-feature-deep-dive.md)** — Design reference (PLANNED, not yet implemented) -- **[map-refactor Deep Dive](resources/map-refactor-deep-dive.md)** — Design reference (PLANNED, not yet implemented) - -Agent & system details: - -- **[Agent Architecture](resources/agent-architecture.md)** — How agents orchestrate and coordinate - ---- - -## Real-World Examples - -### Example 1: Choosing /map-efficient for a critical feature - -**Task:** "Add OAuth2 authentication" - -**Analysis:** -- Affects security (high-risk indicator) -- Affects multiple modules (breaking changes possible) -- First implementation of OAuth2 (high complexity) - -**Decision:** `/map-efficient` — describe the security-sensitive nature in the task description. Predictor will trigger conditionally on security-related subtasks. - -### Example 2: Choosing /map-debug - -**Task:** "Tests failing in checkout flow" - -**Analysis:** -- Specific issue (test failures) ✓ -- Not new feature (debugging) -- Needs root cause analysis ✓ - -**Decision:** `/map-debug` (focused on diagnosing failures) - -### Example 3: Choosing /map-efficient - -**Task:** "Add user profile page" - -**Analysis:** -- Standard production feature ✓ -- Moderate complexity (not first-time) ✓ -- No security implications -- No breaking changes - -**Decision:** `/map-efficient` (recommended default) - ---- - -## Integration with Auto-Activation - -This skill integrates with MAP's auto-activation system to suggest workflows: - -**Natural language request:** -``` -User: "Implement user registration" -MAP: 🎯 Suggests /map-efficient -``` - -**Questions from MAP:** -``` -MAP: "Is this for production?" -User: "Yes, but critical feature" -MAP: 🎯 Suggests /map-efficient with --self-moa instead -``` - -**Direct command:** -``` -User: "/map-efficient add pagination to blog API" -MAP: 📚 Loads this skill for context -``` - ---- - -## Tips for Effective Workflow Selection - -1. **Default to /map-efficient** — It's the recommended choice for 80% of tasks -2. **Use /map-fast sparingly** — Only for small, low-risk changes with clear scope -3. **Use /map-efficient for critical paths** — Describe risk context in the task description for appropriate Predictor triggers -4. **Trust the optimization** — /map-efficient preserves quality while cutting token usage -5. **Review deep dives** — When in doubt, check the appropriate deep-dive resource - ---- - -## Next Steps - -1. **First time using MAP?** Start with `/map-efficient` -2. **Have a critical feature?** Use `/map-efficient` with risk context in the task description -3. **Debugging an issue?** See [map-debug-deep-dive.md](resources/map-debug-deep-dive.md) -4. **Understanding agents?** See [Agent Architecture](resources/agent-architecture.md) ---- - -## Examples - -### Example 1: Choosing a workflow for a new feature - -**User says:** "I need to add JWT authentication to the API" - -**Actions:** -1. Assess risk level — security-sensitive (high-risk indicator) -2. Check if first implementation — yes, OAuth/JWT is new -3. Multiple modules affected — auth middleware, user service, token storage - -**Result:** Recommend `/map-efficient` — describe the security context in the task. Predictor will trigger on security-sensitive subtasks. Batched learning captures patterns at the end. - -### Example 2: Quick fix with clear scope - -**User says:** "Update the error message in the login form" - -**Actions:** -1. Assess risk — low, localized text change -2. Check blast radius — single file, no dependencies -3. No security implications - -**Result:** Recommend `/map-fast` — small, low-risk change with clear acceptance criteria. No learning needed. - -### Example 3: Debugging a test failure - -**User says:** "Tests in auth.test.ts are failing after the last merge" - -**Actions:** -1. Identify task type — debugging/fixing specific issue -2. Need root cause analysis — yes, regression after merge -3. Not a new feature or refactor - -**Result:** Recommend `/map-debug` — focused on diagnosing failures with root cause analysis and regression prevention. - ---- - -## Troubleshooting - -| Issue | Cause | Solution | -|-------|-------|----------| -| Wrong workflow chosen mid-task | Cannot switch workflows during execution | Complete current workflow, then restart with correct one | -| Predictor never runs in /map-efficient | Subtasks assessed as low-risk | Expected behavior; Predictor is conditional. Use /map-debug for guaranteed analysis | -| No patterns stored after /map-fast | /map-fast skips learning agents | By design — use /map-efficient + /map-learn for pattern accumulation | -| Skill suggests wrong workflow | Description trigger mismatch | Check skill-rules.json triggers; refine query wording | - ---- - -**Skill Version:** 1.0 -**Last Updated:** 2025-11-03 -**Recommended Reading Time:** 5-10 minutes -**Deep Dive Reading Time:** 15-20 minutes per resource diff --git a/src/mapify_cli/templates/skills/map-workflows-guide/resources/agent-architecture.md b/src/mapify_cli/templates/skills/map-workflows-guide/resources/agent-architecture.md deleted file mode 100644 index 1b8b212f..00000000 --- a/src/mapify_cli/templates/skills/map-workflows-guide/resources/agent-architecture.md +++ /dev/null @@ -1,266 +0,0 @@ -# Agent Architecture - -MAP Framework orchestrates 11 specialized agents in a coordinated workflow. - -## Agent Categories - -### Execution & Validation (Core Pipeline) - -**1. TaskDecomposer** -- **Role:** Breaks complex goals into atomic subtasks -- **Input:** User's high-level request -- **Output:** JSON with subtasks, dependencies, acceptance criteria -- **When it runs:** First step in every workflow - -**2. Actor** -- **Role:** Implements code changes -- **Input:** Subtask description, acceptance criteria -- **Output:** Code changes, rationale, test strategy -- **When it runs:** For each subtask (multiple times if revisions needed) - -**3. Monitor** -- **Role:** Validates correctness and standards compliance -- **Input:** Actor's implementation -- **Output:** Pass/fail verdict with specific issues -- **When it runs:** After every Actor output -- **Feedback loop:** Returns to Actor if validation fails (max 3-5 iterations) - -**4. Evaluator** -- **Role:** Quality scoring and final approval -- **Input:** Actor + Monitor results -- **Output:** Quality score (0-10), approve/reject decision -- **When it runs:** /map-debug, /map-review -- **Skipped in:** /map-efficient, /map-fast (Monitor provides sufficient validation) - -### Analysis - -**5. Predictor** -- **Role:** Impact analysis and dependency tracking -- **Input:** Planned changes -- **Output:** Affected files, breaking changes, risk assessment -- **When it runs:** - - /map-efficient: Conditional (only if Monitor flags high risk) - - /map-debug: Always (focused analysis) - - /map-debate: Conditional (same as /map-efficient) - - /map-fast: Never (skipped) - -### Learning - -**6. Reflector** -- **Role:** Extracts patterns and lessons learned -- **Input:** All agent outputs for subtask(s) -- **Output:** Insights, patterns discovered, pattern updates -- **When it runs:** - - /map-efficient, /map-debug, /map-debate: Batched (once at end, via /map-learn) - - /map-fast: Never (skipped) - -### Optional - -**8. Documentation-Reviewer** -- **Role:** Validates documentation completeness -- **Input:** Documentation files -- **Output:** Completeness assessment, dependency analysis -- **When it runs:** On-demand (not part of standard workflows) - -### Synthesis - -**9. Debate-Arbiter** -- **Role:** Cross-evaluates Actor variants with explicit reasoning -- **Input:** Multiple Actor outputs (variants) -- **Output:** Synthesized optimal solution with reasoning trace -- **When it runs:** /map-debate (per subtask, uses Opus model) - -**10. Synthesizer** -- **Role:** Extracts decisions from variants and generates unified code (Self-MoA) -- **Input:** Multiple Actor outputs -- **Output:** Merged implementation combining best elements -- **When it runs:** /map-efficient with --self-moa flag - -### Discovery & Verification - -**11. Research-Agent** -- **Role:** Heavy codebase reading with compressed output -- **Input:** Research question or exploration goal -- **Output:** Compressed context for implementation agents -- **When it runs:** /map-plan, /map-efficient, /map-debug (before Actor) - -**12. Final-Verifier** -- **Role:** Adversarial verification with Root Cause Analysis (Ralph Loop) -- **Input:** Complete implementation after all other agents -- **Output:** Verification verdict, regression analysis -- **When it runs:** /map-check, /map-efficient (terminal verification) - ---- - -## Orchestration Patterns - -### Linear Pipeline (map-fast) - -``` -TaskDecomposer → Actor → Monitor → Apply → Done -(No Evaluator, no Predictor, no learning) -``` - -### Conditional Pipeline (map-efficient) - -``` -TaskDecomposer - ↓ - For each subtask: - Actor → Monitor → [Predictor if high risk] → Tests → Linter → Apply - ↓ - Final-Verifier (adversarial verification of entire goal) - ↓ - Done! Optional: /map-learn → Reflector -``` - -### Multi-Variant Pipeline (map-debate) - -``` -TaskDecomposer - ↓ - For each subtask: - Actor×3 → Monitor×3 → debate-arbiter (Opus) - ↓ synthesized - Monitor → [Predictor if high risk] → Apply changes - ↓ - Batch learning (via /map-learn): - Reflector (all subtasks) → Done -``` - ---- - -## Feedback Loops - -### Actor ← Monitor Loop - -``` -Actor creates code - ↓ -Monitor validates - ↓ -Issues found? → YES → Feedback to Actor (iterate, max 3-5 times) - ↓ NO -Continue pipeline -``` - -### Actor ← Evaluator Loop - -``` -Monitor approved - ↓ -Evaluator scores quality - ↓ -Score < threshold? → YES → Feedback to Actor (revise) - ↓ NO -Proceed to next stage -``` - ---- - -## Conditional Execution Logic - -### Predictor Conditions (map-efficient) - -Predictor runs if ANY of: -- Subtask modifies critical files (`auth/**`, `database/**`, `api/**`) -- Breaking API changes detected by Monitor -- High complexity score (≥8) from TaskDecomposer -- Multiple file modifications (>3 files) - -Otherwise: Skipped (token savings) - ---- - -## State Management - -### Per-Subtask State -- Actor output -- Monitor verdict -- Predictor analysis (if ran) -- Evaluator score (if ran) - -### Workflow State -- All subtask results -- Aggregated patterns (Reflector) - ---- - -## Communication Protocol - -Agents communicate via structured JSON: - -```json -{ - "agent": "Actor", - "subtask_id": "ST-001", - "output": { - "approach": "...", - "code_changes": [...], - "trade_offs": [...], - "used_bullets": [...] - } -} -``` - ---- - -## Error Handling - -### Actor Failures -- Monitor provides specific feedback -- Actor iterates (max 3-5 attempts) -- If still failing: Mark subtask as failed, continue with others - -### Learning Failures -- Reflector gracefully degrades -- Learning skipped but implementation continues -- Logged to stderr for debugging - ---- - -## Performance Optimization - -### Token Usage by Agent - -| Agent | Avg Tokens | Frequency | Workflow Impact | -|-------|------------|-----------|-----------------| -| TaskDecomposer | ~1.5K | Once | All workflows | -| Actor | ~2-3K | Per subtask | All workflows | -| Monitor | ~1K | Per Actor output | All workflows | -| Evaluator | ~0.8K | Per subtask | map-debug, map-review | -| Predictor | ~1.5K | Per subtask or conditional | Varies | -| Reflector | ~2K | Per subtask or batched | Varies | -| Debate-Arbiter | ~3-4K | Per subtask | map-debate only | -| Synthesizer | ~2K | Per subtask | map-efficient (--self-moa) | -| Research-Agent | ~2-3K | Once (before Actor) | map-plan, map-efficient, map-debug | -| Final-Verifier | ~2K | Once (terminal) | map-check, map-efficient | - -**map-efficient savings:** -- Skip Evaluator: ~0.8K per subtask -- Conditional Predictor: ~1.5K per low-risk subtask -- Batch Reflector: ~(N-1) × 2K for N subtasks - ---- - -## Extension Points - -### Adding New Agents - -To add a custom agent: -1. Create `.claude/agents/my-agent.md` with prompt template -2. Add to workflow command (e.g., `.claude/commands/map-efficient.md`) -3. Define when it runs (before/after which agents) -4. Specify input/output format - -### Custom Workflows - -Create `.claude/commands/map-custom.md`: -- Define agent sequence -- Specify conditional logic -- Document token cost and use cases - ---- - -**See also:** -- [map-efficient Deep Dive](map-efficient-deep-dive.md) - Conditional execution example diff --git a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-debug-deep-dive.md b/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-debug-deep-dive.md deleted file mode 100644 index 959e1881..00000000 --- a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-debug-deep-dive.md +++ /dev/null @@ -1,258 +0,0 @@ -# /map-debug Deep Dive - -## When to Use - -**Bug fixes and error investigation:** -- Fixing failing tests -- Resolving runtime errors -- Investigating unexpected behavior -- Root cause analysis -- Performance debugging - -**Why /map-debug?** -- Focused on error analysis -- Root cause identification -- Pattern recognition for similar bugs - ---- - -## Debugging Workflow - -### Standard Pipeline - -``` -1. TaskDecomposer: Break down debugging into steps - - Reproduce the issue - - Identify root cause - - Implement fix - - Verify resolution - - Add regression tests - -2. For each subtask: - - Actor implements (with error context) - - Monitor validates (tests must pass) - - Predictor analyzes (impact of fix) - - Evaluator scores (completeness) - -3. Reflector extracts lessons: - - What caused the bug? - - How was it fixed? - - How to prevent similar bugs? - - Reflector also documents: - - Debugging techniques used - - Common pitfalls - - Prevention strategies -``` - ---- - -## Error Analysis Strategies - -### 1. Stack Trace Interpretation - -**Actor receives:** -``` -Error: TypeError: Cannot read property 'name' of undefined - at UserService.getDisplayName (user.service.ts:42) - at ProfileController.show (profile.controller.ts:18) -``` - -**Analysis:** -- Line 42: `user.name` but `user` is undefined -- Line 18: Called without null check -- Root cause: Missing user validation - -**Fix:** -```typescript -// Before -getDisplayName(user) { - return user.name; -} - -// After -getDisplayName(user) { - if (!user) { - throw new Error("User not found"); - } - return user.name; -} -``` - -### 2. Test Failure Diagnosis - -**Failed test:** -``` -Expected: 200 OK -Received: 404 Not Found -``` - -**Actor investigates:** -1. Check route configuration -2. Verify request format -3. Debug middleware chain -4. Check database state - -**Findings:** -- Route expects `/users/:id` (number) -- Test sends `/users/abc` (string) -- No type validation middleware - -**Fix:** Add parameter validation - ---- - -## Example: Debugging Race Condition - -**Task:** "Fix intermittent test failures in async code" - -**Decomposition:** -``` -ST-1: Reproduce the race condition reliably -ST-2: Identify critical section -ST-3: Implement synchronization -ST-4: Verify fix under load -ST-5: Add regression tests -``` - -**Execution:** - -``` -ST-1: Reproduce reliably -├─ Actor: Add test that fails consistently -│ └─ Strategy: Increase concurrency, reduce delays -├─ Monitor: ✅ Test fails reliably (good!) -└─ Predictor: Low risk (test code) - -ST-2: Identify critical section -├─ Actor: Add logging, trace execution order -│ └─ Finding: Two async operations modify same state -├─ Monitor: ✅ Issue identified -└─ Predictor: Medium risk (affects core logic) - -ST-3: Implement synchronization -├─ Actor: Add mutex/lock to critical section -├─ Monitor: ✅ Valid (tests pass) -├─ Predictor: ✅ RAN (affects async behavior) -│ └─ Impact: May reduce throughput -└─ Evaluator: ✅ Approved (score: 8/10) - -ST-4: Verify under load -├─ Actor: Run stress test (1000x concurrency) -├─ Monitor: ✅ All tests pass -└─ Evaluator: ✅ Approved - -ST-5: Regression tests -├─ Actor: Add concurrent test to test suite -├─ Monitor: ✅ Tests pass -└─ Evaluator: ✅ Approved - -Reflector: -├─ Pattern: "Race conditions in async state updates" -├─ Solution: "Use mutex for critical sections" -└─ Prevention: "Design for immutability" - -``` - ---- - -## Root Cause Analysis - -### 5 Whys Technique - -**Problem:** "Users can't log in" - -``` -Why 1: Login fails with 500 error - → Database query failing - -Why 2: Database query failing - → Connection pool exhausted - -Why 3: Connection pool exhausted - → Connections not being released - -Why 4: Connections not being released - → Missing finally block in async function - -Why 5: Missing finally block - → Copy-pasted code from old example - -Root cause: Improper async error handling -``` - -**Fix:** Add proper resource cleanup - ---- - -## Debugging Patterns Learned - -### Common Bug Categories - -**1. Null/Undefined Errors** -- Pattern: Missing validation -- Fix: Add null checks, use optional chaining -- Prevention: TypeScript strict mode - -**2. Async/Await Issues** -- Pattern: Unhandled promise rejections -- Fix: Add try/catch, .catch() handlers -- Prevention: ESLint rules for promises - -**3. State Management Bugs** -- Pattern: Race conditions, stale closures -- Fix: Immutable updates, proper locking -- Prevention: Use state management libraries - -**4. Off-by-One Errors** -- Pattern: Array indexing, loop bounds -- Fix: Use array methods (map, filter) -- Prevention: Code review, unit tests - ---- - -## Prevention Strategies - -**After debugging, Reflector asks:** -1. How could this bug have been prevented? -2. What test was missing? -3. What pattern should we follow? - -**Reflector documents:** -```json -{ - "type": "TESTING_STRATEGY", - "content": "Add integration test for async operations", - "code_example": "test('handles concurrent requests', async () => { ... })", - "tags": ["async", "testing", "race-conditions"] -} -``` - ---- - -## Troubleshooting the Debugger - -**Issue:** Can't reproduce bug consistently -**Solution:** -- Add extensive logging -- Use debugger breakpoints -- Increase test iterations -- Test in production-like environment - -**Issue:** Root cause unclear -**Solution:** -- Simplify reproduction case -- Remove variables one by one -- Use git bisect to find regression commit - -**Issue:** Fix introduces new bugs -**Solution:** -- Predictor should catch this -- Run full test suite -- Check Predictor impact analysis - ---- - -**See also:** -- [map-efficient-deep-dive.md](map-efficient-deep-dive.md) - For implementing fixes -- [agent-architecture.md](agent-architecture.md) - Predictor's impact analysis diff --git a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-efficient-deep-dive.md b/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-efficient-deep-dive.md deleted file mode 100644 index 6a2afb01..00000000 --- a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-efficient-deep-dive.md +++ /dev/null @@ -1,202 +0,0 @@ -# /map-efficient Deep Dive - -## Optimization Strategy - -### Predictor: Conditional Execution - -**Logic:** -```python -def should_run_predictor(subtask): - # Run if ANY condition true: - return ( - subtask.complexity == "high" or - subtask.modifies_critical_files() or - subtask.has_breaking_changes() or - subtask.affects_dependencies() - ) -``` - -**Critical files patterns:** -- `**/auth/**` - Authentication -- `**/database/**` - Schema changes -- `**/api/**` - Public API -- `**/*.proto` - Service contracts - -**Example:** -``` -Subtask 1: Add validation helper (utils/validation.ts) -→ Predictor: ⏭️ SKIPPED (low risk, no dependencies) - -Subtask 2: Update auth middleware (auth/middleware.ts) -→ Predictor: ✅ RAN (critical file detected) - -Subtask 3: Add unit tests (tests/auth.test.ts) -→ Predictor: ⏭️ SKIPPED (test file, no side effects) -``` - -### Reflector: Batched Learning - -**Full pipeline (theoretical baseline):** -``` -Subtask 1 → Actor → Monitor → Predictor → Evaluator → Reflector -Subtask 2 → Actor → Monitor → Predictor → Evaluator → Reflector -Subtask 3 → Actor → Monitor → Predictor → Evaluator → Reflector -``` -Result: 3 × (Predictor + Evaluator + Reflector) cycles - -**Optimized workflow (/map-efficient):** -``` -Subtask 1 → Actor → Monitor → [Predictor if high risk] → Apply -Subtask 2 → Actor → Monitor → [Predictor if high risk] → Apply -Subtask 3 → Actor → Monitor → [Predictor if high risk] → Apply - ↓ - Final-Verifier (adversarial verification) - ↓ - Done! Optionally run /map-learn: - Reflector (analyzes ALL subtasks, consolidates patterns) -``` -Result: No Evaluator, no per-subtask Reflector. Learning decoupled to /map-learn. - -**Token savings:** 35-40% vs full pipeline - ---- - -## When to Use /map-efficient - -✅ **Use for:** -- Production features (moderate complexity) -- API endpoints -- UI components -- Database queries -- Business logic -- Most development work (80% of tasks) - -❌ **Don't use for:** -- Critical infrastructure (use /map-efficient with --self-moa or /map-debate) -- Small, low-risk changes (use /map-fast) -- Simple bug fixes (use /map-debug) - ---- - -## Quality Preservation - -**Myth:** "Optimized workflows sacrifice quality" - -**Reality:** /map-efficient preserves essential quality gates: -- ✅ Monitor validates every subtask (correctness gate) -- ✅ Predictor runs when needed (conditional impact analysis) -- ✅ Tests gate and linter gate run per subtask -- ✅ Final-Verifier checks entire goal at end (adversarial verification) -- ✅ Learning available via /map-learn after workflow completes - -**What's optimized (intentionally omitted per-subtask):** -- Evaluator — Monitor validates correctness directly -- Reflector — decoupled to /map-learn (optional, run after workflow) - ---- - -## Example Walkthrough - -**Task:** "Implement blog post pagination API" - -**Decomposition:** -- ST-1: Add pagination params to GET /posts endpoint -- ST-2: Update PostService to support offset/limit -- ST-3: Add integration tests - -**Execution trace:** - -``` -TaskDecomposer: -├─ ST-1: Add pagination params (complexity: low) -├─ ST-2: Update service (complexity: medium, affects API) -└─ ST-3: Add tests (complexity: low) - -ST-1: Pagination params -├─ Actor: Modify routes/posts.ts -├─ Monitor: ✅ Valid -├─ Predictor: ⏭️ SKIPPED (low risk) -├─ Tests gate: ✅ Passed -└─ Linter gate: ✅ Passed - -ST-2: Service update -├─ Actor: Modify services/PostService.ts -├─ Monitor: ✅ Valid -├─ Predictor: ✅ RAN (affects API contract) -│ └─ Impact: Breaking change if clients expect all posts -├─ Tests gate: ✅ Passed -└─ Note: "Add API versioning or deprecation notice" - -ST-3: Integration tests -├─ Actor: Add tests/posts.integration.test.ts -├─ Monitor: ✅ Valid (tests pass) -├─ Predictor: ⏭️ SKIPPED (test file) -├─ Tests gate: ✅ Passed -└─ Linter gate: ✅ Passed - -Final-Verifier: ✅ All subtasks verified, goal achieved - -Optional /map-learn: - Reflector (batched): - ├─ Analyzed: 3 subtasks - ├─ Found similar pagination patterns - ├─ Extracted: pagination pattern, API versioning, test structure - └─ Consolidated: 1 new pattern (API pagination), 1 updated (test coverage++) -``` - -**Token usage:** -- Full pipeline: ~12k tokens -- /map-efficient: ~7.5k tokens -- **Savings: 37.5%** - -**Quality: Identical** -- All validations passed -- Breaking change detected -- Tests written -- Patterns learned - ---- - -## Configuration - -Edit `.claude/commands/map-efficient.md` to customize: - -**Predictor conditions:** -```python -# Add custom critical paths -CRITICAL_PATHS = [ - "auth/**", - "database/**", - "api/**", - "config/**", # Your addition -] -``` - -**Batch size:** -```python -# Default: Batch all subtasks -# Override: Batch every N subtasks -BATCH_SIZE = None # or 5 for large tasks -``` - ---- - -## Troubleshooting - -**Issue:** Predictor always skips -**Cause:** No critical file patterns matched -**Fix:** Review `subtask.modifies_critical_files()` logic - -**Issue:** Learning not happening -**Cause:** Reflector not running -**Fix:** Check workflow completion (must finish all subtasks) - -**Issue:** Token usage higher than expected -**Cause:** Predictor running too often -**Fix:** Review risk detection conditions - ---- - -**See also:** -- [map-feature-deep-dive.md](map-feature-deep-dive.md) - Full validation approach -- [agent-architecture.md](agent-architecture.md) - How agents orchestrate diff --git a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-fast-deep-dive.md b/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-fast-deep-dive.md deleted file mode 100644 index dce9fc9e..00000000 --- a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-fast-deep-dive.md +++ /dev/null @@ -1,234 +0,0 @@ -# /map-fast Deep Dive - -## When to Use (and When NOT to Use) - -### ✅ Acceptable Use Cases - -**ONLY for small, low-risk changes:** -- Localized bug fixes with clear acceptance criteria -- Small UI/text tweaks -- Narrow refactors confined to a single module/file -- Maintenance changes where impact is easy to validate - -### ❌ NEVER Use For - -**High-risk code paths:** -- Features that will be maintained -- Critical infrastructure -- Security-sensitive functionality -- Code that others will build on - -**Why?** No learning means: -- Patterns not captured → team doesn't learn -- Knowledge base not updated → knowledge lost -- Patterns not synced → other projects don't benefit -- Technical debt accumulates - ---- - -## What Gets Skipped - -### Agents NOT Called - -**Evaluator (Quality Scoring)** -- No quality scoring (0-10 scale) -- No approval/rejection gate -- Monitor handles basic correctness only - -**Predictor (Impact Analysis)** -- No dependency analysis -- Breaking changes undetected -- Side effects not predicted - -**Reflector (Pattern Extraction)** -- Successful patterns not captured -- Failures not documented -- Knowledge not extracted - -### What Remains - -**Actor + Monitor:** -- Basic implementation ✅ -- Correctness validation ✅ - -**Result:** Functional code, but zero learning and no quality scoring - ---- - -## Token Savings Breakdown - -| Agent | Tokens | Status | -|-------|--------|--------| -| TaskDecomposer | ~1.5K | ✅ Runs | -| Actor | ~2-3K | ✅ Runs | -| Monitor | ~1K | ✅ Runs | -| Evaluator | ~0.8K | ❌ Skipped | -| Predictor | ~1.5K | ❌ Skipped | -| Reflector | ~2K | ❌ Skipped | - -**Total saved:** ~5.8K per subtask -**Percentage:** 40-50% vs full pipeline - ---- - -## Example: When map-fast Makes Sense - -**Scenario:** "Fix a nil/None check in a request handler" - -**Why map-fast is acceptable:** -``` -Goal: Small, localized fix -Timeline: Short -Outcome: Production-quality fix with tests -Next step: If scope grows, switch to /map-efficient -``` - -**Execution:** -``` -TaskDecomposer: 2 subtasks -ST-1: Setup React Query client - Actor → Monitor → Apply -ST-2: Test with one API endpoint - Actor → Monitor → Apply -Done. No Evaluator, no Reflector, no patterns learned. -``` - -**Appropriate because:** -- Low blast radius -- Easy to verify with targeted tests -- Requirements are clear - ---- - -## Example: When map-fast is WRONG - -**Scenario:** "Implement user authentication" - -**Why map-fast is wrong:** -``` -Goal: Production authentication (critical!) -Timeline: Doesn't matter -Outcome: Must be secure, maintainable -Risk: High (security, breaking changes) -``` - -**Problems with using map-fast:** -1. No Predictor → Breaking changes undetected -2. No Reflector → Security patterns not learned -3. No learning → Team doesn't learn from mistakes -4. High risk for under-validation mindset - -**Correct choice:** `/map-efficient` (critical infrastructure) - ---- - -## Common Pitfalls - -### Pitfall 1: "I'll make it quick, then refactor" - -**Problem:** Refactoring rarely happens -**Reality:** Technical debt accumulates -**Solution:** Use /map-efficient from the start - -### Pitfall 2: "This is just a quick change" - -**Problem:** Under-validated changes become long-lived -**Reality:** "Quick" changes often stick around -**Solution:** Default to production-quality standards - -### Pitfall 3: "I don't need learning for simple tasks" - -**Problem:** Simple patterns are most valuable -**Reality:** Basic patterns repeated most often -**Solution:** Use /map-efficient (batched learning, minimal overhead) - ---- - -## Decision Flowchart - -``` -Is the change small and low-risk? -│ -├─ YES → /map-fast acceptable -│ Examples: -│ - Localized bug fix with existing tests -│ - Small UI tweak -│ - Narrow refactor within a single file -│ -└─ NO, or uncertain → Use /map-efficient instead - Why? - - Same speed (only ~10% slower) - - Full learning preserved - - Better safe than sorry -``` - ---- - -## When Scope Grows - -If a task starts small but grows in scope or risk, switch to `/map-efficient` for the remainder. - -Why? -- Impact analysis (conditional Predictor) -- Learning preserved -- Stronger guardrails for multi-file work - ---- - -## Alternatives to Consider - -### Instead of /map-fast, consider: - -**1. /map-efficient (recommended)** -- Only ~10-15% slower than /map-fast -- Full learning preserved -- Suitable for production - -**2. Manual implementation** -- No agents at all -- Faster for tiny tasks (<50 lines) -- Use when MAP overhead doesn't make sense - -**3. /map-efficient or /map-debate** -- For high-risk changes -- Security or infrastructure work - ---- - -## Best Practices - -### When using /map-fast: - -1. **Document reduced analysis** - Note that /map-fast was used and why -2. **Run tests** - Ensure relevant unit/integration tests pass -3. **Keep changes small** - Avoid scope creep; switch workflows if needed -4. **Review critical paths** - Error handling, input validation, and security - -### General guidance: - -**Ask yourself:** -- Will anyone build on this code? → Don't use /map-fast -- Is this security-related? → Don't use /map-fast -- Will this integrate with production? → Don't use /map-fast -- Am I uncertain about rewrites? → Don't use /map-fast - -**If all answers are "No" → /map-fast is acceptable** - ---- - -## Troubleshooting - -**Issue:** Team keeps using /map-fast for production -**Solution:** Code review policy: Reject PRs with /map-fast code - -**Issue:** Low-analysis workflow used for risky changes -**Solution:** Team policy: use /map-efficient for anything beyond low-risk/localized - -**Issue:** No learning happening on the project -**Solution:** Audit workflow usage, reduce /map-fast usage to <5% - ---- - -**See also:** -- [map-efficient-deep-dive.md](map-efficient-deep-dive.md) - Better alternative for most tasks -- [map-feature-deep-dive.md](map-feature-deep-dive.md) - For critical features diff --git a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-feature-deep-dive.md b/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-feature-deep-dive.md deleted file mode 100644 index 9b84fdf0..00000000 --- a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-feature-deep-dive.md +++ /dev/null @@ -1,235 +0,0 @@ -# /map-feature Deep Dive - -> **STATUS: PLANNED — NOT YET IMPLEMENTED.** -> This workflow is designed but not yet available as a command. -> Use `/map-efficient` for all feature development, including critical/high-risk features. - -## When to Use - -**Critical features requiring maximum confidence:** -- Authentication and authorization systems -- Payment processing -- Database schema migrations -- Security-sensitive functionality -- First-time complex features -- High-risk refactoring - -**Why /map-feature?** -- Maximum validation (all agents, all subtasks) -- Per-subtask learning (immediate feedback) -- Full impact analysis (Predictor always runs) -- Highest quality assurance - ---- - -## Full Pipeline - -### Per-Subtask Cycle - -``` -For each subtask: - 1. Actor implements - 2. Monitor validates - 3. Predictor analyzes impact (ALWAYS) - 4. Evaluator scores quality - 5. If approved: - 5a. Reflector extracts patterns - 5b. Apply changes - 6. If not approved: Return to Actor -``` - -**Key difference from /map-efficient:** -- Predictor runs EVERY subtask (not conditional) -- Reflector runs AFTER EVERY subtask (not batched) - ---- - -## Per-Subtask Learning Rationale - -### Why Learn Per-Subtask? - -**Immediate feedback loop:** -``` -Subtask 1: Implement JWT generation - ↓ completed -Reflector: "JWT secret storage pattern" - ↓ pattern extracted -Subtask 2: Implement JWT validation - ↓ starts -Actor uses learned pattern - ↓ applies pattern -Uses env vars (learned from Subtask 1) -``` - -**Benefit:** Each subtask benefits from previous subtask learnings - -### Trade-off vs Batched Learning - -**Per-subtask (/map-feature):** -- ✅ Immediate pattern application -- ✅ Error correction within workflow -- ❌ Higher token cost (N × Reflector) - -**Batched (/map-efficient):** -- ✅ Lower token cost (1 × Reflector) -- ⚠️ Patterns applied in next workflow -- ✅ Holistic insights (sees all subtasks together) - -**When per-subtask matters:** -- Complex multi-step features -- Interdependent subtasks -- Learning applies immediately - ---- - -## Example: Critical Authentication System - -**Task:** "Implement OAuth2 authentication" - -**Why /map-feature:** -- Security-critical (high risk) -- Complex (multiple components) -- First-time implementation - -**Execution:** - -``` -TaskDecomposer: -├─ ST-1: Setup OAuth2 provider config -├─ ST-2: Implement authorization code flow -├─ ST-3: Secure token storage -├─ ST-4: Add refresh token rotation -└─ ST-5: Implement logout - -ST-1: OAuth2 provider config -├─ Actor: Create config/oauth.ts -├─ Monitor: ✅ Valid -├─ Predictor: ✅ RAN (security-sensitive) -│ └─ Impact: Config must not be committed -├─ Evaluator: ✅ Approved (score: 9/10) -└─ Reflector: Pattern "Store OAuth secrets in env" - -ST-2: Authorization code flow -├─ Actor: Implement auth/oauth.ts -│ └─ Uses .env for secrets (learned from ST-1!) -├─ Monitor: ✅ Valid -├─ Predictor: ✅ RAN (affects auth flow) -│ └─ Impact: All protected routes need update -├─ Evaluator: ✅ Approved (score: 9/10) -└─ Reflector: Pattern "PKCE for public clients" - -[ST-3, ST-4, ST-5 continue with same pattern] -``` - -**Token usage:** ~18K tokens (full pipeline, 5 subtasks) - -**Quality achieved:** -- Zero security vulnerabilities -- All patterns documented -- Team learned OAuth2 best practices - ---- - -## Predictor: Always-On Analysis - -### What Predictor Catches - -**Breaking changes:** -- API signature modifications -- Database schema changes -- Configuration format updates - -**Dependencies:** -- Affected services -- Required migrations -- Client updates needed - -**Risks:** -- Backward compatibility issues -- Performance impacts -- Security implications - -### Example Output - -```json -{ - "affected_files": [ - {"path": "api/auth.ts", "impact": "high"}, - {"path": "database/users.sql", "impact": "medium"} - ], - "breaking_changes": [ - { - "type": "API", - "description": "User model no longer returns password field", - "mitigation": "Update all API clients to not expect password" - } - ], - "required_updates": [ - "Update client SDK to v2.0", - "Run migration: add_oauth_tokens_table" - ], - "risk_level": "high" -} -``` - ---- - -## When map-feature is Overkill - -**Don't use for:** -- Simple CRUD operations → Use /map-efficient -- Bug fixes → Use /map-debug -- Non-critical features → Use /map-efficient -- Code you understand well → Use /map-efficient - -**Cost vs benefit:** -- /map-feature: 100% token cost -- /map-efficient: 60-70% token cost -- **Savings: 30-40% by using /map-efficient** - -**Rule of thumb:** -- Critical/security = /map-feature -- Production/moderate = /map-efficient -- Everything else = /map-efficient - ---- - -## Quality Metrics - -### Success Indicators - -**All features implemented:** -- ✅ All acceptance criteria met -- ✅ All tests passing -- ✅ No security vulnerabilities - -**Knowledge captured:** -- ✅ Patterns extracted (N subtasks → N+ patterns) -- ✅ Team can apply patterns immediately - -**Impact understood:** -- ✅ All breaking changes documented -- ✅ Migration path clear -- ✅ Dependencies updated - ---- - -## Troubleshooting - -**Issue:** Workflow taking too long -**Cause:** Per-subtask learning overhead -**Solution:** Consider /map-efficient for next similar task - -**Issue:** Too many patterns created -**Cause:** Reflector suggesting redundant patterns -**Solution:** Review and deduplicate patterns more aggressively - -**Issue:** Predictor always says "high risk" -**Cause:** Overly conservative risk assessment -**Solution:** Tune Predictor thresholds in `.claude/agents/predictor.md` - ---- - -**See also:** -- [map-efficient-deep-dive.md](map-efficient-deep-dive.md) - Optimized alternative -- [agent-architecture.md](agent-architecture.md) - Understanding all agents diff --git a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-refactor-deep-dive.md b/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-refactor-deep-dive.md deleted file mode 100644 index 8865244a..00000000 --- a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-refactor-deep-dive.md +++ /dev/null @@ -1,332 +0,0 @@ -# /map-refactor Deep Dive - -> **STATUS: PLANNED — NOT YET IMPLEMENTED.** -> This workflow is designed but not yet available as a command. -> Use `/map-efficient` for refactoring tasks. - -## When to Use - -**Code restructuring without behavior changes:** -- Improving code organization -- Renaming for clarity -- Extracting common logic -- Cleaning up technical debt -- Simplifying complex functions -- Reorganizing file structure - -**Why /map-refactor?** -- Focus on dependency analysis -- Breaking change detection -- Migration planning -- Preserving functionality - ---- - -## Refactoring Workflow - -### Key Principle: Behavior Preservation - -``` -Refactoring = Changing structure WITHOUT changing behavior -``` - -**Verification:** -- All existing tests must pass -- No new features added -- API contracts preserved (or versioned) - -### Standard Pipeline - -``` -1. TaskDecomposer: Break refactoring into safe steps - - Identify dependencies - - Plan incremental changes - - Define rollback points - -2. For each subtask: - - Actor refactors code - - Monitor validates (tests MUST pass) - - Predictor analyzes impact (CRITICAL) - - Evaluator checks completeness - -3. Reflector extracts: - - What patterns emerged? - - What dependencies were discovered? - - What risks were mitigated? - - Refactoring techniques - - Dependency patterns - - Migration strategies -``` - ---- - -## Dependency Impact Analysis - -### Predictor's Role in Refactoring - -**Always runs for refactoring** (high priority): - -**What Predictor tracks:** -1. **Direct dependencies:** - - Files that import refactored module - - Functions that call refactored functions - - Types that extend refactored types - -2. **Indirect dependencies:** - - Services that depend on direct dependencies - - Tests that rely on behavior - - Configuration that references paths - -3. **Breaking changes:** - - Renamed exports - - Changed function signatures - - Moved files - -**Example output:** -```json -{ - "affected_files": [ - {"path": "services/user.service.ts", "impact": "high", "reason": "imports renamed function"}, - {"path": "tests/user.test.ts", "impact": "medium", "reason": "tests old API"}, - {"path": "api/routes.ts", "impact": "low", "reason": "indirect dependency"} - ], - "breaking_changes": [ - { - "type": "rename", - "from": "getUserData", - "to": "fetchUserProfile", - "affected": 12 файлов - } - ], - "migration_steps": [ - "1. Update imports in user.service.ts", - "2. Update function calls (12 locations)", - "3. Update tests to use new name", - "4. Run full test suite" - ] -} -``` - ---- - -## Example: Extract Service Pattern - -**Task:** "Refactor auth logic into separate service" - -**Current state:** -```typescript -// controllers/auth.controller.ts (300 lines, mixed concerns) -class AuthController { - login(req, res) { - // JWT generation logic - // Database queries - // Response formatting - // All mixed together - } -} -``` - -**Goal:** -```typescript -// services/auth.service.ts (clean separation) -class AuthService { - generateToken(user) { ... } - validateCredentials(email, password) { ... } -} - -// controllers/auth.controller.ts (thin controller) -class AuthController { - constructor(private authService: AuthService) {} - - login(req, res) { - const user = await this.authService.validateCredentials(...); - const token = this.authService.generateToken(user); - res.json({ token }); - } -} -``` - -**Decomposition:** -``` -ST-1: Create AuthService class skeleton -ST-2: Extract token generation logic -ST-3: Extract credential validation logic -ST-4: Update AuthController to use AuthService -ST-5: Update dependency injection -ST-6: Update all tests -``` - -**Execution:** - -``` -ST-1: Create skeleton -├─ Actor: Create services/auth.service.ts -├─ Monitor: ✅ Valid (compiles, tests pass) -├─ Predictor: ⏭️ Low risk (new file, no impact) -└─ Apply - -ST-2: Extract token generation -├─ Actor: Move generateToken() to AuthService -├─ Monitor: ✅ Valid -├─ Predictor: ✅ RAN (affects auth flow) -│ └─ Impact: AuthController must be updated -└─ Migration: Update imports - -ST-3: Extract validation -├─ Actor: Move validateCredentials() to AuthService -├─ Monitor: ✅ Valid -├─ Predictor: ✅ RAN -│ └─ Impact: 3 files import this function -└─ Migration: Update all imports - -ST-4: Update AuthController -├─ Actor: Inject AuthService, call methods -├─ Monitor: ✅ Valid (all tests pass) -├─ Predictor: ✅ RAN -│ └─ Impact: DI container must provide AuthService -└─ Migration: Update DI config - -ST-5: Update DI -├─ Actor: Register AuthService in container -├─ Monitor: ✅ Valid -└─ Apply - -ST-6: Update tests -├─ Actor: Mock AuthService in controller tests -├─ Monitor: ✅ All tests pass -└─ Done - -Reflector: -├─ Pattern: "Separate business logic from controllers" -├─ Pattern: "Use dependency injection for services" -└─ Technique: "Incremental refactoring (6 safe steps)" - -``` - -**Token usage:** ~9K tokens (6 subtasks, Predictor always runs) -**Risk:** Low (tests pass at each step) -**Result:** Clean separation, no behavior changes - ---- - -## Breaking Change Detection - -### What Counts as Breaking - -**API changes:** -- Function renamed -- Parameters added/removed/reordered -- Return type changed - -**Module changes:** -- File moved -- Export renamed -- Public interface modified - -**Behavior changes:** -- Performance characteristics -- Error handling -- Side effects - -### Migration Planning - -**Predictor generates:** -1. **List of affected files** (with impact level) -2. **Migration checklist** (step-by-step) -3. **Rollback strategy** (if migration fails) -4. **Testing plan** (what to verify) - ---- - -## Refactoring Patterns - -### 1. Extract Function - -**Before:** -```typescript -function processOrder(order) { - // 50 lines of complex logic - const total = order.items.reduce((sum, item) => sum + item.price * item.qty, 0); - const tax = total * 0.08; - const shipping = total > 50 ? 0 : 5; - return total + tax + shipping; -} -``` - -**After:** -```typescript -function calculateTotal(items) { - return items.reduce((sum, item) => sum + item.price * item.qty, 0); -} - -function calculateTax(total) { - return total * 0.08; -} - -function calculateShipping(total) { - return total > 50 ? 0 : 5; -} - -function processOrder(order) { - const total = calculateTotal(order.items); - const tax = calculateTax(total); - const shipping = calculateShipping(total); - return total + tax + shipping; -} -``` - -**Predictor impact:** Low (internal refactoring, API unchanged) - -### 2. Rename for Clarity - -**Before:** -```typescript -function getData(id) { ... } // Vague -function updateInfo(data) { ... } // Unclear -``` - -**After:** -```typescript -function fetchUserProfile(userId) { ... } // Clear -function updateUserEmail(email) { ... } // Specific -``` - -**Predictor impact:** High (breaking change, all callers must update) - -### 3. Move to Shared Module - -**Before:** -``` -utils/helpers.ts (500 lines, mixed utilities) -``` - -**After:** -``` -utils/string-helpers.ts (string functions) -utils/date-helpers.ts (date functions) -utils/array-helpers.ts (array functions) -``` - -**Predictor impact:** Medium (import paths change, but behavior same) - ---- - -## Troubleshooting - -**Issue:** Tests fail after refactoring -**Cause:** Behavior inadvertently changed -**Solution:** Revert, refactor in smaller steps - -**Issue:** Too many breaking changes -**Cause:** Refactoring too aggressive -**Solution:** Use adapter pattern for backward compatibility - -**Issue:** Predictor didn't catch dependency -**Cause:** Indirect/runtime dependency -**Solution:** Improve static analysis, add integration tests - ---- - -**See also:** -- [agent-architecture.md](agent-architecture.md) - Predictor's dependency analysis -- [map-feature-deep-dive.md](map-feature-deep-dive.md) - When refactoring is risky diff --git a/src/mapify_cli/templates/skills/map-workflows-guide/scripts/validate-workflow-choice.py b/src/mapify_cli/templates/skills/map-workflows-guide/scripts/validate-workflow-choice.py deleted file mode 100755 index 6a576a06..00000000 --- a/src/mapify_cli/templates/skills/map-workflows-guide/scripts/validate-workflow-choice.py +++ /dev/null @@ -1,159 +0,0 @@ -#!/usr/bin/env python3 -"""Validate that a workflow choice matches task characteristics. - -Usage: - python validate-workflow-choice.py --workflow --risk --size --type - -Example: - python validate-workflow-choice.py --workflow map-efficient --risk medium --size medium --type feature - python validate-workflow-choice.py --workflow map-fast --risk high --size large --type security - -Exit codes: - 0 - Workflow choice is appropriate - 1 - Workflow choice is suboptimal (warning) - 2 - Workflow choice is inappropriate (error) -""" - -import argparse -import json -import sys - -# Workflow appropriateness rules -WORKFLOW_RULES = { - "map-fast": { - "allowed_risk": ["low"], - "allowed_size": ["small"], - "allowed_types": ["fix", "tweak", "maintenance", "docs"], - "forbidden_types": ["security", "auth", "payment", "database-schema"], - }, - "map-efficient": { - "allowed_risk": ["low", "medium", "high"], - "allowed_size": ["small", "medium", "large"], - "allowed_types": [ - "feature", - "enhancement", - "fix", - "tweak", - "maintenance", - "docs", - "security", - "auth", - "payment", - "database-schema", - "infrastructure", - "refactor", - "restructure", - "rename", - "extract", - "cleanup", - ], - "forbidden_types": [], - }, - "map-debug": { - "allowed_risk": ["low", "medium", "high"], - "allowed_size": ["small", "medium", "large"], - "allowed_types": ["bug", "fix", "test-failure", "error", "regression"], - "forbidden_types": ["feature", "refactor"], - }, -} - -# Recommendations for risky combinations -RISK_OVERRIDES = { - ("map-fast", "high"): "map-efficient", - ("map-fast", "medium"): "map-efficient", -} - - -def validate(workflow: str, risk: str, size: str, task_type: str) -> dict: - """Validate workflow choice against task characteristics. - - Returns dict with: - valid: bool - level: "ok" | "warning" | "error" - message: str - recommendation: str | None - """ - if workflow not in WORKFLOW_RULES: - return { - "valid": False, - "level": "error", - "message": f"Unknown workflow: {workflow}", - "recommendation": "map-efficient", - } - - rules = WORKFLOW_RULES[workflow] - issues = [] - - # Check risk level - if risk not in rules["allowed_risk"]: - issues.append(f"Risk level '{risk}' is too high for {workflow}") - - # Check size - if size not in rules["allowed_size"]: - issues.append(f"Size '{size}' is not suitable for {workflow}") - - # Check forbidden types - if task_type in rules["forbidden_types"]: - issues.append(f"Task type '{task_type}' is forbidden for {workflow}") - - # Check risk overrides - override_key = (workflow, risk) - recommendation = RISK_OVERRIDES.get(override_key) - - if issues: - level = "error" if any("forbidden" in i for i in issues) else "warning" - return { - "valid": False, - "level": level, - "message": "; ".join(issues), - "recommendation": recommendation or "map-efficient", - } - - return { - "valid": True, - "level": "ok", - "message": f"Workflow '{workflow}' is appropriate for {risk}-risk {size} {task_type} task", - "recommendation": None, - } - - -def main(): - parser = argparse.ArgumentParser(description="Validate MAP workflow choice") - parser.add_argument( - "--workflow", - required=True, - choices=list(WORKFLOW_RULES.keys()), - help="Chosen workflow", - ) - parser.add_argument( - "--risk", - required=True, - choices=["low", "medium", "high"], - help="Task risk level", - ) - parser.add_argument( - "--size", - required=True, - choices=["small", "medium", "large"], - help="Task size", - ) - parser.add_argument("--type", required=True, dest="task_type", help="Task type") - parser.add_argument("--json", action="store_true", help="Output as JSON") - - args = parser.parse_args() - result = validate(args.workflow, args.risk, args.size, args.task_type) - - if args.json: - print(json.dumps(result, indent=2)) - else: - status = {"ok": "OK", "warning": "WARNING", "error": "ERROR"}[result["level"]] - print(f"[{status}] {result['message']}") - if result["recommendation"]: - print(f" Recommendation: Use {result['recommendation']} instead") - - exit_codes = {"ok": 0, "warning": 1, "error": 2} - sys.exit(exit_codes[result["level"]]) - - -if __name__ == "__main__": - main() diff --git a/src/mapify_cli/templates/skills/skill-rules.json b/src/mapify_cli/templates/skills/skill-rules.json index eedaa7c8..c625c850 100644 --- a/src/mapify_cli/templates/skills/skill-rules.json +++ b/src/mapify_cli/templates/skills/skill-rules.json @@ -2,29 +2,6 @@ "version": "1.0", "description": "Skill activation triggers for MAP Framework", "skills": { - "map-workflows-guide": { - "type": "domain", - "enforcement": "suggest", - "priority": "high", - "description": "Guide for choosing the right MAP workflow", - "promptTriggers": { - "keywords": [ - "which workflow", - "map-fast or map-efficient", - "difference between workflows", - "when to use", - "workflow comparison", - "map workflow", - "choose workflow" - ], - "intentPatterns": [ - "(which|what).*?(workflow|mode).*?(use|choose)", - "(difference|compare).*?(map-fast|map-efficient|map-debug)", - "(when|how).*(choose|use|pick).*(workflow|map-\\w+)", - "explain.*?(workflow|map-fast|map-efficient)" - ] - } - }, "map-planning": { "type": "domain", "enforcement": "suggest", @@ -69,29 +46,6 @@ "map-learn" ] } - }, - "map-cli-reference": { - "type": "domain", - "enforcement": "suggest", - "priority": "high", - "description": "CLI error corrections", - "promptTriggers": { - "keywords": [ - "mapify command", - "mapify error", - "no such command", - "no such option", - "validate graph", - "mapify init", - "mapify check" - ], - "intentPatterns": [ - "mapify.*(error|command|help|usage)", - "(no such).*(command|option)", - "validate.*(graph|dependency)", - "(how to|how do).*mapify" - ] - } } } } diff --git a/src/mapify_cli/templates/workflow-rules.json b/src/mapify_cli/templates/workflow-rules.json index 01df164c..c99bed74 100644 --- a/src/mapify_cli/templates/workflow-rules.json +++ b/src/mapify_cli/templates/workflow-rules.json @@ -66,25 +66,6 @@ ] } }, - "map-debate": { - "priority": "medium", - "description": "Multi-variant synthesis with Opus arbiter for reasoning transparency", - "promptTriggers": { - "keywords": [ - "debate", - "compare approaches", - "trade-offs", - "reasoning", - "multiple variants", - "architectural decision" - ], - "intentPatterns": [ - "(compare|debate|weigh).*?(approaches|options|trade-offs)", - "(need|want).*?(reasoning|transparency|justification)", - "(architectural|design).*?(decision|choice)" - ] - } - }, "map-fast": { "priority": "low", "description": "Small, low-risk changes with reduced analysis (NO learning)", diff --git a/tests/test_command_templates.py b/tests/test_command_templates.py index c7d640ba..fa7c55a4 100644 --- a/tests/test_command_templates.py +++ b/tests/test_command_templates.py @@ -109,7 +109,6 @@ def test_all_command_templates_exist(self, templates_commands_dir): """Test that all 12 expected command template files exist.""" expected_commands = [ "map-check.md", # Quality gates - "map-debate.md", # Multi-variant with Opus arbiter "map-debug.md", # Debugging workflow "map-efficient.md", # Recommended workflow "map-fast.md", # Minimal workflow diff --git a/tests/test_map_orchestrator.py b/tests/test_map_orchestrator.py index bca1808b..ee40198e 100644 --- a/tests/test_map_orchestrator.py +++ b/tests/test_map_orchestrator.py @@ -1218,5 +1218,274 @@ def test_no_plan_file_does_not_crash(self, branch_dir, tmp_path): assert result["branch"] == branch_dir +class TestMonitorFailed: + """Tests for monitor_failed() — automatic ACTOR retry on Monitor failure.""" + + def _make_monitor_state(self, tmp_path, branch, **overrides): + """Create a step_state.json at MONITOR phase.""" + state = map_orchestrator.StepState() + state.current_subtask_id = "ST-001" + state.current_step_id = "2.4" + state.current_step_phase = "MONITOR" + state.pending_steps = ["2.4"] + state.completed_steps = ["2.3"] + for k, v in overrides.items(): + setattr(state, k, v) + state_file = tmp_path / ".map" / branch / "step_state.json" + state.save(state_file) + return state_file + + def test_phase_resets_to_actor(self, branch_dir, tmp_path): + state_file = self._make_monitor_state(tmp_path, branch_dir) + result = map_orchestrator.monitor_failed(branch_dir, "fix it") + assert result["status"] == "retrying" + assert result["current_phase"] == "ACTOR" + state = map_orchestrator.StepState.load(state_file) + assert state.current_step_phase == "ACTOR" + assert state.current_step_id == "2.3" + + def test_retry_count_increments(self, branch_dir, tmp_path): + state_file = self._make_monitor_state(tmp_path, branch_dir) + result = map_orchestrator.monitor_failed(branch_dir, "") + assert result["retry_count"] == 1 + state = map_orchestrator.StepState.load(state_file) + assert state.retry_count == 1 + + def test_pending_steps_are_actor_and_monitor(self, branch_dir, tmp_path): + state_file = self._make_monitor_state(tmp_path, branch_dir) + map_orchestrator.monitor_failed(branch_dir, "") + state = map_orchestrator.StepState.load(state_file) + assert state.pending_steps == ["2.3", "2.4"] + + def test_tdd_mode_still_requeues_only_actor_monitor(self, branch_dir, tmp_path): + """TDD pre-steps (2.25/2.26) are NOT re-run on retry.""" + self._make_monitor_state(tmp_path, branch_dir, tdd_mode=True) + result = map_orchestrator.monitor_failed(branch_dir, "") + assert result["status"] == "retrying" + state_file = tmp_path / ".map" / branch_dir / "step_state.json" + state = map_orchestrator.StepState.load(state_file) + assert state.pending_steps == ["2.3", "2.4"] + + def test_max_retries_escalation(self, branch_dir, tmp_path): + self._make_monitor_state(tmp_path, branch_dir, retry_count=5, max_retries=5) + result = map_orchestrator.monitor_failed(branch_dir, "still broken") + assert result["status"] == "max_retries" + assert result["retry_count"] == 6 + + def test_feedback_file_written_when_nonempty(self, branch_dir, tmp_path): + self._make_monitor_state(tmp_path, branch_dir) + result = map_orchestrator.monitor_failed(branch_dir, "Missing Reset()") + assert result["feedback_file"] is not None + fb = Path(result["feedback_file"]) + assert fb.exists() + content = fb.read_text() + assert "Missing Reset()" in content + assert "retry 1" in content + + def test_feedback_file_none_when_empty(self, branch_dir, tmp_path): + self._make_monitor_state(tmp_path, branch_dir) + result = map_orchestrator.monitor_failed(branch_dir, "") + assert result["feedback_file"] is None + + def test_feedback_file_none_when_whitespace(self, branch_dir, tmp_path): + self._make_monitor_state(tmp_path, branch_dir) + result = map_orchestrator.monitor_failed(branch_dir, " ") + assert result["feedback_file"] is None + + def test_feedback_files_numbered_per_retry(self, branch_dir, tmp_path): + """Each retry creates a separate feedback file, not overwriting.""" + state_file = self._make_monitor_state(tmp_path, branch_dir) + r1 = map_orchestrator.monitor_failed(branch_dir, "issue 1") + # Reset phase back to MONITOR so the second call passes the guard + state = map_orchestrator.StepState.load(state_file) + state.current_step_phase = "MONITOR" + state.save(state_file) + r2 = map_orchestrator.monitor_failed(branch_dir, "issue 2") + assert r1["feedback_file"] != r2["feedback_file"] + assert Path(r1["feedback_file"]).exists() + assert Path(r2["feedback_file"]).exists() + + def test_state_saved_on_max_retries(self, branch_dir, tmp_path): + """State is persisted even in the max_retries early-return branch.""" + state_file = self._make_monitor_state( + tmp_path, branch_dir, retry_count=5, max_retries=5 + ) + map_orchestrator.monitor_failed(branch_dir, "") + state = map_orchestrator.StepState.load(state_file) + assert state.retry_count == 6 # incremented and saved + + def test_phase_guard_rejects_non_monitor_phase(self, branch_dir, tmp_path): + """monitor_failed() returns error if called from non-MONITOR phase.""" + self._make_monitor_state( + tmp_path, branch_dir, current_step_phase="ACTOR" + ) + result = map_orchestrator.monitor_failed(branch_dir, "feedback") + assert result["status"] == "error" + assert "ACTOR" in result["message"] + assert "MONITOR" in result["message"] + + def test_monitor_failed_then_get_next_step(self, branch_dir, tmp_path): + """Integration: after monitor_failed(), get_next_step() returns ACTOR.""" + self._make_monitor_state(tmp_path, branch_dir) + map_orchestrator.monitor_failed(branch_dir, "fix the bug") + result = map_orchestrator.get_next_step(branch_dir) + assert result["phase"] == "ACTOR" + assert result["step_id"] == "2.3" + + +class TestWaveMonitorFailed: + """Tests for wave_monitor_failed() — per-subtask retry in wave execution.""" + + def _make_wave_state(self, tmp_path, branch, **overrides): + state = map_orchestrator.StepState() + state.execution_waves = [["ST-001", "ST-002"]] + state.current_wave_index = 0 + state.subtask_phases = {"ST-001": "2.4", "ST-002": "2.4"} + state.subtask_retry_counts = {"ST-001": 0, "ST-002": 0} + for k, v in overrides.items(): + setattr(state, k, v) + state_file = tmp_path / ".map" / branch / "step_state.json" + state.save(state_file) + return state_file + + def test_subtask_phase_resets_to_actor(self, branch_dir, tmp_path): + state_file = self._make_wave_state(tmp_path, branch_dir) + result = map_orchestrator.wave_monitor_failed("ST-001", branch_dir, "fix") + assert result["status"] == "retrying" + assert result["current_phase"] == "ACTOR" + state = map_orchestrator.StepState.load(state_file) + assert state.subtask_phases["ST-001"] == "2.3" + + def test_other_subtask_unaffected(self, branch_dir, tmp_path): + state_file = self._make_wave_state(tmp_path, branch_dir) + map_orchestrator.wave_monitor_failed("ST-001", branch_dir, "") + state = map_orchestrator.StepState.load(state_file) + assert state.subtask_phases["ST-002"] == "2.4" # unchanged + + def test_retry_count_per_subtask(self, branch_dir, tmp_path): + state_file = self._make_wave_state(tmp_path, branch_dir) + map_orchestrator.wave_monitor_failed("ST-001", branch_dir, "") + map_orchestrator.wave_monitor_failed("ST-001", branch_dir, "") + state = map_orchestrator.StepState.load(state_file) + assert state.subtask_retry_counts["ST-001"] == 2 + assert state.subtask_retry_counts["ST-002"] == 0 + + def test_max_retries_escalation(self, branch_dir, tmp_path): + self._make_wave_state( + tmp_path, + branch_dir, + subtask_retry_counts={"ST-001": 5, "ST-002": 0}, + ) + result = map_orchestrator.wave_monitor_failed("ST-001", branch_dir, "") + assert result["status"] == "max_retries" + assert result["retry_count"] == 6 + + def test_feedback_file_includes_subtask_id(self, branch_dir, tmp_path): + self._make_wave_state(tmp_path, branch_dir) + result = map_orchestrator.wave_monitor_failed( + "ST-002", branch_dir, "type mismatch" + ) + assert result["feedback_file"] is not None + assert "ST-002" in result["feedback_file"] + content = Path(result["feedback_file"]).read_text() + assert "type mismatch" in content + + def test_feedback_file_none_when_empty(self, branch_dir, tmp_path): + self._make_wave_state(tmp_path, branch_dir) + result = map_orchestrator.wave_monitor_failed("ST-001", branch_dir, "") + assert result["feedback_file"] is None + + def test_new_subtask_starts_at_zero_retries(self, branch_dir, tmp_path): + """A subtask not in subtask_retry_counts starts at 0.""" + self._make_wave_state( + tmp_path, + branch_dir, + subtask_retry_counts={}, + ) + result = map_orchestrator.wave_monitor_failed("ST-001", branch_dir, "") + assert result["retry_count"] == 1 + + def test_max_retries_does_not_reset_subtask_phase(self, branch_dir, tmp_path): + """subtask_phases is NOT modified when max_retries is hit.""" + state_file = self._make_wave_state( + tmp_path, + branch_dir, + subtask_retry_counts={"ST-001": 5, "ST-002": 0}, + ) + map_orchestrator.wave_monitor_failed("ST-001", branch_dir, "") + state = map_orchestrator.StepState.load(state_file) + assert state.subtask_phases["ST-001"] == "2.4" # not reset on escalation + + def test_wave_monitor_failed_then_get_wave_step(self, branch_dir, tmp_path): + """Integration: after wave_monitor_failed(), get_wave_step() shows ACTOR for reset subtask.""" + self._make_wave_state(tmp_path, branch_dir) + map_orchestrator.wave_monitor_failed("ST-001", branch_dir, "fix type") + result = map_orchestrator.get_wave_step(branch_dir) + subtask_map = {s["subtask_id"]: s for s in result["subtasks"]} + assert subtask_map["ST-001"]["step_id"] == "2.3" + assert subtask_map["ST-001"]["phase"] == "ACTOR" + assert subtask_map["ST-002"]["step_id"] == "2.4" # unchanged + + +class TestReopenForFixes: + """Tests for reopen_for_fixes() — transition COMPLETE → ACTOR for review fixes.""" + + def _make_complete_state(self, tmp_path, branch, **overrides): + state = map_orchestrator.StepState() + state.current_step_id = "COMPLETE" + state.current_step_phase = "COMPLETE" + state.pending_steps = [] + state.completed_steps = ["1.0", "1.5", "1.6", "2.3", "2.4"] + for k, v in overrides.items(): + setattr(state, k, v) + state_file = tmp_path / ".map" / branch / "step_state.json" + state.save(state_file) + return state_file + + def test_reopens_from_complete_to_actor(self, branch_dir, tmp_path): + state_file = self._make_complete_state(tmp_path, branch_dir) + result = map_orchestrator.reopen_for_fixes(branch_dir, "fix type error") + assert result["status"] == "reopened" + assert result["current_phase"] == "ACTOR" + state = map_orchestrator.StepState.load(state_file) + assert state.current_step_phase == "ACTOR" + assert state.current_step_id == "2.3" + assert state.pending_steps == ["2.3", "2.4"] + + def test_resets_retry_count(self, branch_dir, tmp_path): + self._make_complete_state(tmp_path, branch_dir, retry_count=3) + map_orchestrator.reopen_for_fixes(branch_dir, "") + state_file = tmp_path / ".map" / branch_dir / "step_state.json" + state = map_orchestrator.StepState.load(state_file) + assert state.retry_count == 0 + + def test_rejects_non_complete_phase(self, branch_dir, tmp_path): + self._make_complete_state( + tmp_path, branch_dir, current_step_phase="MONITOR" + ) + result = map_orchestrator.reopen_for_fixes(branch_dir, "") + assert result["status"] == "error" + assert "MONITOR" in result["message"] + + def test_no_state_file_returns_error(self, branch_dir, tmp_path): + result = map_orchestrator.reopen_for_fixes(branch_dir, "") + assert result["status"] == "error" + + def test_feedback_file_written(self, branch_dir, tmp_path): + self._make_complete_state(tmp_path, branch_dir) + result = map_orchestrator.reopen_for_fixes(branch_dir, "fix DRY violation") + assert result["feedback_file"] is not None + content = Path(result["feedback_file"]).read_text() + assert "fix DRY violation" in content + + def test_reopen_then_get_next_step(self, branch_dir, tmp_path): + """Integration: after reopen, get_next_step returns ACTOR.""" + self._make_complete_state(tmp_path, branch_dir) + map_orchestrator.reopen_for_fixes(branch_dir, "review fixes") + result = map_orchestrator.get_next_step(branch_dir) + assert result["phase"] == "ACTOR" + assert result["step_id"] == "2.3" + + if __name__ == "__main__": pytest.main([__file__, "-v"]) diff --git a/tests/test_template_sync.py b/tests/test_template_sync.py index ecfd315c..0b430b58 100644 --- a/tests/test_template_sync.py +++ b/tests/test_template_sync.py @@ -189,7 +189,6 @@ def templates_commands_dir(self, project_root): "command", [ "map-check.md", - "map-debate.md", "map-debug.md", "map-efficient.md", "map-fast.md",