diff --git a/.cursor/hooks.json b/.cursor/hooks.json new file mode 100644 index 00000000..f53ded98 --- /dev/null +++ b/.cursor/hooks.json @@ -0,0 +1,21 @@ +{ + "version": 1, + "hooks": { + "preToolUse": [ + { + "command": "context-mode hook cursor pretooluse", + "matcher": "Shell|Read|Grep|WebFetch|mcp_web_fetch|mcp_fetch_tool|Task|MCP:ctx_execute|MCP:ctx_execute_file|MCP:ctx_batch_execute" + } + ], + "postToolUse": [ + { + "command": "context-mode hook cursor posttooluse" + } + ], + "stop": [ + { + "command": "context-mode hook cursor stop" + } + ] + } +} diff --git a/.cursor/mcp.json b/.cursor/mcp.json new file mode 100644 index 00000000..64465dbb --- /dev/null +++ b/.cursor/mcp.json @@ -0,0 +1,7 @@ +{ + "mcpServers": { + "context-mode": { + "command": "context-mode" + } + } +} diff --git a/.cursor/rules/context-mode.mdc b/.cursor/rules/context-mode.mdc new file mode 100644 index 00000000..0583cfeb --- /dev/null +++ b/.cursor/rules/context-mode.mdc @@ -0,0 +1,85 @@ +--- +description: context-mode routing rules for context window protection +alwaysApply: true +--- + +# context-mode + +Raw tool output floods context window. Use context-mode MCP tools to keep raw data in sandbox. + +## Think in Code — MANDATORY + +Analyze/count/filter/compare/search/parse/transform data: **write code** via `ctx_execute(language, code)`, `console.log()` only the answer. Do NOT read raw data into context. PROGRAM the analysis, not COMPUTE it. Pure JavaScript — Node.js built-ins only (`fs`, `path`, `child_process`). `try/catch`, handle `null`/`undefined`. One script replaces ten tool calls. + +## Tool Selection + +0. **MEMORY**: `ctx_search(sort: "timeline")` — after resume, check prior context before asking user. +1. **GATHER**: `ctx_batch_execute(commands, queries)` — runs all commands, auto-indexes, searches. ONE call replaces many steps. +2. **FOLLOW-UP**: `ctx_search(queries: ["q1", "q2", ...])` — all follow-up questions, ONE call (default relevance mode). +3. **PROCESSING**: `ctx_execute(language, code)` | `ctx_execute_file(path, language, code)` — sandbox, only stdout enters context. +4. **WEB**: `ctx_fetch_and_index(url)` then `ctx_search(queries)` — never dump raw HTML. +5. **INDEX**: `ctx_index(content, source)` — store in FTS5 for later search. + +## Parallel I/O batches + +For multi-URL fetches or multi-API calls, **always** include `concurrency: N` (1-8): + +- `ctx_batch_execute(commands: [3+ network commands], concurrency: 5)` — gh, curl, dig, docker inspect, multi-region cloud queries +- `ctx_fetch_and_index(requests: [{url, source}, ...], concurrency: 5)` — multi-URL batch fetch + +**Use concurrency 4-8** for I/O-bound work (network calls, API queries). **Keep concurrency 1** for CPU-bound (npm test, build, lint) or commands sharing state (ports, lock files, same-repo writes). + +GitHub API rate-limit: cap at 4 for `gh` calls. + +## Auth (with `~/.tokens`) + +- GitHub: use `gh` when logged in; else `GITHUB_TOKEN` — see `gauntletci.mdc` / `auth-tokens.mdc`. +- Other keys: read from `%USERPROFILE%\.tokens` inside `ctx_execute` only; set env vars for the subprocess; **never** index or echo token file contents. +- Do not put secret paths or values in `mcp.json`. + +## Forbidden Actions + +- DO NOT use Bash for >20 lines output — use `ctx_execute` or `ctx_batch_execute`. +- DO NOT use Read for analysis — use `ctx_execute_file`. Read IS correct for Edit. +- DO NOT use WebFetch — use `ctx_fetch_and_index`. +- DO NOT use curl/wget in terminal — use `ctx_fetch_and_index`. +- Bash ONLY for git, mkdir, rm, mv, navigation, short commands. +- DO NOT use `ctx_execute`/`ctx_execute_file` to create/modify files. ctx_execute is for analysis and computation only. + +## File Writing Policy + +ALWAYS use native file editing tools to create/modify files. NEVER use `ctx_execute`, `ctx_execute_file`, or Bash to write file content. + +## Output + +Terse like caveman. Technical substance exact. Only fluff die. +Drop: articles, filler (just/really/basically), pleasantries, hedging. Fragments OK. Short synonyms. Code unchanged. +Pattern: [thing] [action] [reason]. [next step]. Auto-expand for: security warnings, irreversible actions, user confusion. +Write artifacts to FILES — never inline. Return: file path + 1-line description. + +## Session Continuity + +Skills, roles, and decisions persist for the entire session. Do not abandon them as the conversation grows. + +## Memory + +Session history is persistent and searchable. On resume, search BEFORE asking the user: + +| Need | Command | +|------|---------| +| What did we decide? | `ctx_search(queries: ["decision"], source: "decision", sort: "timeline")` | +| What constraints exist? | `ctx_search(queries: ["constraint"], source: "constraint")` | + +DO NOT ask "what were we working on?" — SEARCH FIRST. +If search returns 0 results, proceed as a fresh session. + +## ctx Commands + +| Command | Action | +|---------|--------| +| `ctx stats` | Call ctx_stats MCP tool, display full output verbatim. | +| `ctx doctor` | Call ctx_doctor MCP tool, run returned shell command, display as checklist. | +| `ctx upgrade` | Call ctx_upgrade MCP tool, run returned shell command, display as checklist. | +| `ctx purge` | Call ctx_purge MCP tool with confirm: true. Warn user this is irreversible. | + +After /clear or /compact: knowledge base and session stats preserved. Use `ctx purge` to start fresh. diff --git a/.cursor/rules/gauntletci-build-instructions.mdc b/.cursor/rules/gauntletci-build-instructions.mdc new file mode 100644 index 00000000..d527aa00 --- /dev/null +++ b/.cursor/rules/gauntletci-build-instructions.mdc @@ -0,0 +1,463 @@ +--- +description: GauntletCI product architecture and build instructions (from Copilot) +alwaysApply: true +--- + +# GauntletCI — Copilot Build Instructions + +## What this is + +GauntletCI is a pre-commit developer tool that runs every changeset through +an 18-rule LLM-driven audit before it is committed. It behaves like the best +senior engineer you know reading your diff with full attention and no +obligation to be polite about what they find. + +The name is the product promise: you run the gauntlet before you ship. You +do not get through by looking good. You get through by actually holding up. + +This is NOT a static heuristic engine. Do not build pattern-matching rules. +Do not build a classifier. Do not build a confidence scoring system. The model +does the judgment. Your job is context assembly, prompt structure, and +delivery. + +--- + +## Evaluation architecture + +### Full flow + +``` +git diff HEAD (staged changes) + │ + ▼ +┌─────────────────────────────┐ +│ Pre-flight Gate 1 │ Branch currency check +│ git fetch --dry-run │ Abort if branch is behind upstream +└─────────────────────────────┘ + │ + ▼ +┌─────────────────────────────┐ +│ Pre-flight Gate 2 │ Test passage check +│ Run configured test cmd │ Abort if any test fails +└─────────────────────────────┘ + │ + ▼ +┌─────────────────────────────┐ +│ Context assembly │ Diff + repo config + test result +│ Trim to token budget │ ~8000 tokens max for diff portion +└─────────────────────────────┘ + │ + ▼ +┌─────────────────────────────┐ +│ Single LLM call │ All 18 rules in one structured prompt +│ Structured JSON response │ One finding object per rule that fired +└─────────────────────────────┘ + │ + ▼ +┌─────────────────────────────┐ +│ Rendered output │ Terminal (CLI) or chat panel (extension) +└─────────────────────────────┘ + │ + ▼ +┌─────────────────────────────┐ +│ Telemetry emission │ Anonymized, opt-in, no code content +└─────────────────────────────┘ +``` + +**One LLM call per evaluation. No sequential calls. No chaining.** +Total time budget: under 10 seconds from invocation to output. If it takes +longer than that, developers will disable it. This is a hard constraint. + +### Pre-flight gates + +These run synchronously before the LLM call. They are deterministic and fast. +If either fails, abort immediately with a specific error message and exit +non-zero. Do not spend a token. + +**Gate 1 — Branch currency** +```bash +git fetch --dry-run +git rev-list HEAD..@{upstream} --count +``` +If count > 0: report how many commits behind, name the upstream branch, stop. +The developer must pull before GauntletCI will proceed. + +**Gate 2 — Test passage** +Run the test command configured in `.gauntletci.json` or auto-detected from +the project (dotnet test, npm test, pytest, etc.). Capture stdout/stderr. If +exit code is non-zero: surface the failure output, stop. Do not evaluate a +changeset that already breaks tests. + +### LLM call design + +**Model selection** (configurable, in order of preference): +- Depth mode (default): claude-sonnet-4-5, gpt-4o +- Speed mode (`--fast`): claude-haiku-4-5, gpt-4o-mini + +**Prompt structure:** +- System prompt: GauntletCI identity, operating principles, global constraints, + all 18 rules with full Must Do / Must Not Do / Output Requirement sections, + standard output format, final instruction +- User prompt: assembled context (see Context assembly below) + +**Response format:** JSON array of finding objects. Rules that find no issue +are omitted entirely from the response — not null, not empty string, absent. +Parse failures must surface a clear error, not a silent pass. + +**Token budget:** +- System prompt (rules): ~4000 tokens, fixed +- User prompt (context): ~8000 tokens max +- Response: ~3000 tokens max +- Total: stays inside 16k context window on all supported models + +If the diff exceeds the token budget: prioritize added lines over removed +lines, prioritize changed files over unchanged files, trim context lines +aggressively, never truncate mid-hunk. Log that trimming occurred. + +### Finding schema + +```json +{ + "rule_id": "GCI003", + "rule_name": "Behavioral Change Detection", + "severity": "high", + "finding": "OrderProcessor.Submit now silently swallows ArgumentException on line 47, changing caller contract.", + "evidence": "OrderProcessor.cs:47 — catch block added without rethrow or logging", + "why_it_matters": "Callers that depended on the exception to detect invalid orders will now receive a false success response.", + "suggested_action": "Either rethrow the exception or log it and return a typed error result. Do not swallow it silently.", + "confidence": "High" +} +``` + +**Vague findings are build failures.** If the model cannot cite a specific +file, symbol, or line, the finding must not be emitted. A finding that says +"error handling may need review" is not a finding. It is noise. Noise trains +developers to ignore the tool. + +--- + +## The 18 rules + +These are embedded verbatim in the system prompt. They are the product. +Do not summarize them. Do not paraphrase them. Include the full text of each +rule including Intent, Must Do, Must Not Do, and Output Requirement. + +### Pre-flight gates (deterministic — not sent to the LLM) + +| Gate | Check | Failure behavior | +|------|-------|-----------------| +| Gate 1 | Branch currency | Abort, report commits behind, stop | +| Gate 2 | Test passage | Abort, surface failure output, stop | + +### LLM-evaluated rules + +| ID | Name | What it catches | +|----|------|----------------| +| GCI001 | Diff Integrity and Scope | Mixed concerns, incomplete diffs, scope creep | +| GCI002 | Goal Alignment | Implementation doesn't accomplish apparent goal | +| GCI003 | Behavioral Change Detection | Runtime behavior changes, intended or not | +| GCI004 | Breaking Change Risk | Public contract violations, caller breakage | +| GCI005 | Test Coverage Relevance | Missing, weak, or irrelevant tests | +| GCI006 | Edge Case Handling | Unhandled nulls, boundaries, invalid inputs | +| GCI007 | Error Handling Integrity | Swallowed exceptions, lost diagnostic context | +| GCI008 | Complexity Control | Disproportionate branching, nesting, abstraction | +| GCI009 | Consistency with Existing Patterns | Meaningful deviations that create defect risk | +| GCI010 | Hardcoding and Configuration | Magic values, secrets, environment assumptions | +| GCI011 | Performance Risk | Loops, allocations, blocking calls, N+1 queries | +| GCI012 | Security Risk | Injection, auth regressions, secret exposure | +| GCI013 | Observability and Debuggability | Reduced logging, lost correlation context | +| GCI014 | Rollback Safety | Irreversible changes, migration risk | +| GCI015 | Data Integrity Risk | Silent corruption, mis-mapping, partial writes | +| GCI016 | Concurrency and State Risk | Race conditions, lost awaits, blocking async | +| GCI017 | Production Readiness | Synthesis verdict: Ready / Needs Work / High Risk | +| GCI018 | Accountability Standard | Would a senior engineer put their name on this? | + +The full rule definitions live in `gauntletci-rules.md`. That file is the +authoritative source. The system prompt is generated from it at build time. +Do not maintain two copies of rule text manually. + +--- + +## Delivery mechanisms + +### Phase 1 — CLI (build this first, nothing else until it works) + +**Install:** +```bash +dotnet tool install -g gauntletci +gauntletci install # installs git hook into current repo +``` + +**Usage:** +```bash +gauntletci # evaluates staged changes (default) +gauntletci --full # evaluates all changes since last commit +gauntletci --fast # uses speed-tier model +gauntletci --rule GCI005 # runs a single rule only +gauntletci --format json # machine-readable output, no color +gauntletci --no-telemetry +gauntletci config # open config file +``` + +**Git hook behavior:** +The hook runs on `pre-commit`. If GauntletCI exits non-zero, the commit is +blocked. The developer sees the findings in the terminal and decides whether +to fix, amend, or `git commit --no-verify` to override. + +Do not block the commit silently. Do not block it without output. The +developer must always be able to see exactly why the commit was held. + +**Exit codes:** +- `0` — all gates passed, no High severity findings +- `1` — pre-flight gate failure or High severity finding +- `2` — configuration error or missing API key +- `3` — model call failed or timed out + +**NuGet package:** `GauntletCI` +Single binary, self-contained, no runtime dependency beyond .NET 8. + +### Phase 2 — Copilot Extension (build this second) + +The extension surfaces the same evaluation inside VS Code and Visual Studio +Copilot chat. It uses `#changes` to get the diff natively. + +**Commands:** +``` +@gauntletci review +@gauntletci review --rule GCI012 +@gauntletci explain GCI007 +@gauntletci status +``` + +The extension does not replicate the CLI. It calls the same core evaluation +library (`GauntletCI.Core`). One engine, two surfaces. All business logic +lives in Core. + +**Output format in Copilot chat:** +Findings render as collapsible sections per rule. High severity findings +expand by default. Pass results are collapsed. The final GCI017 verdict +(Ready / Needs Work / High Risk) appears as a header above all findings. + +### Phase 3 — PR integration (build this third) + +GitHub App that runs the full evaluation on PR open and on each subsequent +push. Findings post as review comments anchored to the relevant diff lines +where possible. GCI017 verdict posts as a PR status check. + +This is what unlocks team-tier revenue. Do not build it before the CLI has +real users. + +--- + +## Project structure + +``` +GauntletCI/ +├── GauntletCI.sln +├── src/ +│ ├── GauntletCI.Core/ # evaluation engine — no CLI, no UI +│ │ ├── Gates/ +│ │ │ ├── BranchCurrencyGate.cs +│ │ │ └── TestPassageGate.cs +│ │ ├── Evaluation/ +│ │ │ ├── EvaluationEngine.cs # orchestrates gates + LLM call +│ │ │ ├── ContextAssembler.cs # diff + config + test result → prompt +│ │ │ ├── DiffParser.cs # retained from prior codebase +│ │ │ ├── PromptBuilder.cs # injects rules into system prompt +│ │ │ └── FindingParser.cs # JSON response → Finding[] +│ │ ├── Models/ +│ │ │ ├── Finding.cs +│ │ │ ├── EvaluationResult.cs +│ │ │ └── GauntletConfig.cs +│ │ └── Telemetry/ +│ │ └── TelemetryEmitter.cs +│ ├── GauntletCI.Cli/ # CLI entry point only +│ │ └── Program.cs +│ └── GauntletCI.CopilotExtension/ # extension entry point only +└── tests/ + ├── GauntletCI.Core.Tests/ + └── GauntletCI.Cli.Tests/ +``` + +`GauntletCI.Core` has no dependency on the CLI or the extension. It is the +only project that touches the LLM. Everything testable lives here. + +--- + +## Telemetry design + +### What is collected (opt-in, stated plainly on first run) + +```json +{ + "session_id": "uuid-v4-anonymous", + "timestamp": "2026-04-05T14:22:00Z", + "schema_version": "1", + "rules_fired": ["GCI003", "GCI005", "GCI012"], + "severities": { + "high": 1, + "medium": 1, + "low": 0 + }, + "gates": { + "branch_currency": "pass", + "test_passage": "pass" + }, + "diff_metadata": { + "lines_added": 34, + "lines_removed": 12, + "files_changed": 3, + "test_files_touched": false, + "languages": ["csharp"], + "diff_trimmed": false + }, + "model": "claude-sonnet-4-5", + "action": "committed", + "time_to_action_seconds": 112, + "evaluation_duration_ms": 4300 +} +``` + +### What is NEVER collected + +- Any code content whatsoever +- File names or paths +- Symbol names, class names, method names +- Branch names or repository identifiers +- Finding text (it may contain code fragments) +- Commit messages +- Developer identity of any kind + +### Consent prompt (first run only) + +``` +GauntletCI is free. + +In exchange, anonymized metrics about which rules fired and whether you +acted on them are sent back to improve the product. No code content is +ever transmitted. You can verify this with a network proxy. + +Run `gauntletci config --no-telemetry` to opt out at any time. + +Collect anonymized usage metrics? [Y/n]: +``` + +Store consent in `~/.gauntletci/config.json`. Check it on every run before +emitting anything. Never emit if consent is absent or no. + +--- + +## Configuration + +### User-level: `~/.gauntletci/config.json` +```json +{ + "telemetry": true, + "model": "claude-sonnet-4-5", + "api_key_env": "ANTHROPIC_API_KEY", + "default_mode": "staged" +} +``` + +### Repo-level: `.gauntletci.json` (committed to the repo) +```json +{ + "test_command": "dotnet test", + "disabled_rules": [], + "blocking_rules": ["GCI012", "GCI004"], + "telemetry": true, + "model": "claude-sonnet-4-5" +} +``` + +Repo-level config overrides user-level where both exist. `blocking_rules` +defines which rules cause a non-zero exit code (blocking the commit) versus +which rules surface as warnings only. By default all High severity findings +block. Teams can tighten or loosen this. + +Enterprise repos will set `"telemetry": false`. Make that one line and +make it respected immediately. + +--- + +## Prompt engineering rules + +### System prompt structure (in order) + +1. GauntletCI identity and purpose (3-4 sentences) +2. Operating principles (verbatim from rules doc) +3. Global constraints — must do and must not do (verbatim) +4. Review mindset (verbatim) +5. Standard output format with example finding (verbatim) +6. All 18 rules — full text, in order GCI001–GCI018 +7. Final instruction (verbatim) + +Do not summarize rules in the system prompt. Include full text. The model +needs the Must Not Do sections — they prevent the failure modes that make +findings useless. + +### User prompt structure (in order) + +1. Pre-flight gate results (pass/fail, brief) +2. Test run summary (pass, N tests, duration) +3. Repo config if `.gauntletci.json` exists (relevant fields only) +4. Recent commit messages (last 3, for working memory context) +5. Full diff (trimmed to token budget, with trimming note if applied) + +### Context trimming rules (when diff exceeds budget) + +1. Strip binary file diffs entirely +2. Strip generated file diffs (`.Designer.cs`, `*.g.cs`, migrations) +3. Trim context lines (lines starting with space) to 2 per hunk +4. If still over budget: truncate largest hunks last, never mid-hunk +5. Always prepend: `[Note: diff trimmed from N to M tokens. Some context + omitted. State uncertainty where relevant.]` + +Do not include: entire codebase files, dependency lock files, package +manifests, build output, or anything not directly changed. + +--- + +## What to keep from the prior codebase + +| Component | Decision | Reason | +|-----------|----------|--------| +| `DiffParser.cs` | Keep | Solid unified diff parsing, handles edge cases | +| `TestDetector.cs` (path/stem logic only) | Keep | Useful for detecting test files in diff metadata | +| `ChangeBlock.cs` | Keep | Clean model for diff structure | +| All 20 PCG rule classes | Discard | Heuristic engine, wrong approach | +| `RuleEngine.cs` | Discard | Replaced by EvaluationEngine | +| `RuleFactsExtractor.cs` | Discard | Replaced by ContextAssembler | +| `SpecRuleEngine.cs` | Discard | Parallel engine, undefined status | +| `CuratedTemplateEngine.cs` | Discard | Model writes the output now | +| `SqliteLearningStore.cs` | Discard | Premature — telemetry replaces this | +| `FindingRenderer.cs` | Rewrite | New finding schema, new output format | +| `Program.cs` | Rewrite | New CLI surface, new commands | + +Start from `GauntletCI.Core` as a new project. Copy `DiffParser.cs`, +`TestDetector.cs` (path logic only), and `ChangeBlock.cs` into it. +Build everything else from scratch against the architecture above. + +--- + +## What NOT to build + +- Pattern-matching rules that approximate LLM judgment — the model does this +- A confidence scoring system for heuristics — there are no heuristics +- A SQLite learning store — telemetry handles signal collection at this stage +- A curated template engine — the model writes the output +- Multiple sequential LLM calls for one evaluation — one call, always +- A web dashboard — build it when you have users, not before +- Rule-specific C# logic for GCI001–GCI018 — the rules live in the prompt + +--- + +## Success criteria for Phase 1 + +- `dotnet tool install -g gauntletci` works in under 2 minutes +- `gauntletci install` sets up the git hook in under 30 seconds +- A typical evaluation (50 changed lines, 3 files) completes in under 10 seconds +- Every finding cites a specific file, symbol, or line — no vague output +- Zero code content leaves the developer's machine (verifiable with a proxy) +- Pre-flight gate failures are clear, specific, and actionable +- Developers who run it for one week ask to keep running it diff --git a/.cursor/rules/gauntletci-efficiency-playbook.mdc b/.cursor/rules/gauntletci-efficiency-playbook.mdc new file mode 100644 index 00000000..ceaa7440 --- /dev/null +++ b/.cursor/rules/gauntletci-efficiency-playbook.mdc @@ -0,0 +1,214 @@ +--- +description: GauntletCI Copilot efficiency playbook (batch prompts) +alwaysApply: true +--- + +# GauntletCI Copilot Efficiency Playbook + +## Purpose +Minimize Copilot request usage while maximizing output per prompt. + +--- + +## Core Strategy + +Treat Copilot as a batch processor, not a conversation. + +Each prompt should: +- Perform multiple steps +- Produce final output +- Avoid follow-up prompts + +--- + +## Model Usage Strategy + +### Included Models (Default) +Use for: +- Rule implementation +- Refactoring +- Test generation +- CLI wiring +- File-level analysis + +### Premium Models (Use Sparingly) +Use for: +- Architecture decisions +- Cross-file reasoning +- Complex debugging +- Corpus/scoring design + +Rule: +If the task fits in one file, do NOT use premium. + +--- + +## High-Efficiency Prompt Templates + +### Rule Implementation + +Implement a GauntletCI rule: + +Rule ID: GCI00XX +Purpose: [what risk it detects] + +Trigger: +- [exact condition] + +Must NOT trigger: +- [false positive case] + +Input: +- DiffContext (added/removed lines) + +Output: +- Use CreateFinding() +- Include summary, evidence, whyItMatters, suggestedAction + +Return: +- Complete C# class inheriting RuleBase +- No explanation + +--- + +### Multi-Step Refactor + +Refactor this file: + +Goals: +1. Improve readability +2. Remove duplication +3. Preserve behavior exactly + +Also: +- Fix any obvious bugs +- Keep method signatures unchanged +- Ensure it compiles + +Return: +- Full updated file +- No explanation + +--- + +### Test Generation + +Generate xUnit tests for this class: + +Requirements: +- Cover happy path +- Cover edge cases +- Cover failure scenarios + +Use: +- realistic test names +- Arrange/Act/Assert + +Return: +- Complete test file +- No explanation + +--- + +### Bug Hunt (Premium) + +Analyze this code and identify: + +1. The most likely bug +2. Why it occurs +3. Minimal fix + +Then: +- Show corrected code snippet only +- No extra commentary + +--- + +### Corpus Pipeline Task + +Implement the following in C#: + +1. PullRequestCandidate model +2. HydratedPullRequest model +3. SQLite schema for both + +Constraints: +- Use clean POCOs +- Include all required fields +- No business logic + +Return: +- All code in one response + +--- + +### Repo Understanding + +Read these files: +- [file list] + +Return: +1. What the system does +2. Top 3 design flaws +3. One concrete improvement + +Be concise + +--- + +## Anti-Patterns to Avoid + +### Avoid conversational prompts +- “what do you think?” +- “any suggestions?” +- “can you improve this?” + +### Avoid tiny step prompts +- writing small pieces iteratively + +### Avoid unnecessary agent usage +- Do not use agent for small tasks + +### Avoid blind exploration +- Always scope file list + +--- + +## Optimized Workflow + +### Rule Development +1. Use rule template +2. Paste result +3. One fix pass max + +### Corpus Development +1. Generate models + schema in one prompt +2. Generate CLI commands in one prompt +3. Generate hydration logic in one prompt + +### Debugging +1. Use premium +2. Ask for root cause + minimal fix + +### Repo Review +Ask for: +- Top 3 problems only +- No style issues + +--- + +## Expected Outcome + +- 50–70% fewer prompts +- 70–90% fewer premium requests +- Reduced iteration cycles + +--- + +## Mental Model + +Bad: +Think with Copilot + +Good: +Batch work through Copilot diff --git a/.cursor/skills/context-mode/SKILL.md b/.cursor/skills/context-mode/SKILL.md new file mode 100644 index 00000000..e034228d --- /dev/null +++ b/.cursor/skills/context-mode/SKILL.md @@ -0,0 +1,39 @@ +--- +name: context-mode +description: >- + Context window protection via context-mode MCP (sandbox execute, FTS5 search, + session memory). Use when analyzing large outputs, corpus data, logs, or when + the user says ctx stats, ctx doctor, or context-mode. +--- + +# context-mode (Cursor + GauntletCI) + +This project wires **context-mode** as an MCP server plus Cursor hooks and `.cursor/rules/context-mode.mdc`. + +## Verify setup + +1. Cursor Settings → MCP → **context-mode** connected +2. In agent chat: `ctx doctor` (runs `ctx_doctor` MCP tool) + +## Core tools (MCP) + +| Tool | Use | +|------|-----| +| `ctx_batch_execute` | Run multiple shell commands; auto-index; search in one step | +| `ctx_execute` | Sandbox JS for analysis — only stdout enters context | +| `ctx_execute_file` | Same, with a file path in the sandbox | +| `ctx_search` | FTS5/BM25 over indexed session + corpus | +| `ctx_fetch_and_index` | Fetch URL, index content (not raw HTML in chat) | +| `ctx_index` | Store arbitrary text for later search | +| `ctx_stats` / `ctx_doctor` / `ctx_upgrade` / `ctx_purge` | Ops | + +## GauntletCI-specific + +- API keys: load from `~/.tokens` into subprocess env only (see `.cursor/rules/gauntletci.mdc` and user `auth-tokens.mdc`); never index token files +- Prefer `ctx_execute` for corpus metrics, fixture counts, and large grep/read patterns +- Use native Edit/Write for file changes — never `ctx_execute` to write files +- Routing rules are always on via `context-mode.mdc` + +Install/update globally: `npm install -g context-mode` + +Docs: https://github.com/mksglu/context-mode diff --git a/.cursor/skills/graphify/SKILL.md b/.cursor/skills/graphify/SKILL.md new file mode 100644 index 00000000..605ed282 --- /dev/null +++ b/.cursor/skills/graphify/SKILL.md @@ -0,0 +1,1248 @@ +--- +name: graphify +description: >- + Any input (code, docs, papers, images) → knowledge graph → clustered communities + → HTML + JSON + audit report. Use for /graphify, codebase maps, or graph queries + on GauntletCI. +--- + +# /graphify + +Turn any folder of files into a navigable knowledge graph with community detection, an honest audit trail, and three outputs: interactive HTML, GraphRAG-ready JSON, and a plain-language GRAPH_REPORT.md. + +## Usage + +``` +/graphify # full pipeline on current directory → Obsidian vault +/graphify # full pipeline on specific path +/graphify --mode deep # thorough extraction, richer INFERRED edges +/graphify --update # incremental - re-extract only new/changed files +/graphify --directed # build directed graph (preserves edge direction: source→target) +/graphify --cluster-only # rerun clustering on existing graph +/graphify --no-viz # skip visualization, just report + JSON +/graphify --html # (HTML is generated by default - this flag is a no-op) +/graphify --svg # also export graph.svg (embeds in Notion, GitHub) +/graphify --graphml # export graph.graphml (Gephi, yEd) +/graphify --neo4j # generate graphify-out/cypher.txt for Neo4j +/graphify --neo4j-push bolt://localhost:7687 # push directly to Neo4j +/graphify --wiki # build agent-crawlable wiki (index.md + one article per community) +/graphify --obsidian --obsidian-dir ~/vaults/my-project # write vault to custom path (e.g. existing vault) +/graphify --mcp # start MCP stdio server for agent access +/graphify --watch # watch folder, auto-rebuild on code changes (no LLM needed) +/graphify add # fetch URL, save to ./raw, update graph +/graphify add --author "Name" # tag who wrote it +/graphify add --contributor "Name" # tag who added it to the corpus +/graphify query "" # BFS traversal - broad context +/graphify query "" --dfs # DFS - trace a specific path +/graphify query "" --budget 1500 # cap answer at N tokens +/graphify path "AuthModule" "Database" # shortest path between two concepts +/graphify explain "SwinTransformer" # plain-language explanation of a node +``` + +## What graphify is for + +graphify is built around Andrej Karpathy's /raw folder workflow: drop anything into a folder - papers, tweets, screenshots, code, notes - and get a structured knowledge graph that shows you what you didn't know was connected. + +Three things it does that your AI assistant alone cannot: +1. **Persistent graph** - relationships are stored in `graphify-out/graph.json` and survive across sessions. Ask questions weeks later without re-reading everything. +2. **Honest audit trail** - every edge is tagged EXTRACTED, INFERRED, or AMBIGUOUS. You know what was found vs invented. +3. **Cross-document surprise** - community detection finds connections between concepts in different files that you would never think to ask about directly. + +Use it for: +- A codebase you're new to (understand architecture before touching anything) +- A reading list (papers + tweets + notes → one navigable graph) +- A research corpus (citation graph + concept graph in one) +- Your personal /raw folder (drop everything in, let it grow, query it) + +## What You Must Do When Invoked + +If no path was given, use `.` (current directory). Do not ask the user for a path. + +Follow these steps in order. Do not skip steps. + +### Step 1 - Ensure graphify is installed + +```powershell +# Detect Python and install graphify if needed +python -c "import graphify" 2>$null +if ($LASTEXITCODE -ne 0) { pip install graphifyy -q 2>&1 | Select-Object -Last 3 } +# Write interpreter path for all subsequent steps +python -c "import sys; open('.graphify_python', 'w').write(sys.executable)" +``` + +If the import succeeds, print nothing and move straight to Step 2. + +### Step 2 - Detect files + +```powershell +python -c " +import json +from graphify.detect import detect +from pathlib import Path +result = detect(Path('INPUT_PATH')) +print(json.dumps(result)) +" > .graphify_detect.json +``` + +Replace INPUT_PATH with the actual path the user provided. Do NOT cat or print the JSON - read it silently and present a clean summary instead: + +``` +Corpus: X files · ~Y words + code: N files (.py .ts .go ...) + docs: N files (.md .txt ...) + papers: N files (.pdf ...) + images: N files + video: N files (.mp4 .mp3 ...) +``` + +Omit any category with 0 files from the summary. + +Then act on it: +- If `total_files` is 0: stop with "No supported files found in [path]." +- If `skipped_sensitive` is non-empty: mention file count skipped, not the file names. +- If `total_words` > 2,000,000 OR `total_files` > 200: show the warning and the top 5 subdirectories by file count, then ask which subfolder to run on. Wait for the user's answer before proceeding. +- Otherwise: proceed directly to Step 2.5 if video files were detected, or Step 3 if not. + +### Step 2.5 - Transcribe video / audio files (only if video files detected) + +Skip this step entirely if `detect` returned zero `video` files. + +Video and audio files cannot be read directly. Transcribe them to text first, then treat the transcripts as doc files in Step 3. + +**Strategy:** Read the god nodes from the detect output or analysis file. You are already a language model - write a one-sentence domain hint yourself from those labels. Then pass it to Whisper as the initial prompt. No separate API call needed. + +**However**, if the corpus has *only* video files and no other docs/code, use the generic fallback prompt: `"Use proper punctuation and paragraph breaks."` + +**Step 1 - Write the Whisper prompt yourself.** + +Read the top god node labels from detect output or analysis, then compose a short domain hint sentence, for example: + +- Labels: `transformer, attention, encoder, decoder` -> `"Machine learning research on transformer architectures and attention mechanisms. Use proper punctuation and paragraph breaks."` +- Labels: `kubernetes, deployment, pod, helm` -> `"DevOps discussion about Kubernetes deployments and Helm charts. Use proper punctuation and paragraph breaks."` + +Set it as `$env:GRAPHIFY_WHISPER_PROMPT` before running the transcription command. + +**Step 2 - Transcribe (PowerShell):** + +```powershell +& (Get-Content graphify-out\.graphify_python) -c " +import json, os +from pathlib import Path +from graphify.transcribe import transcribe_all + +detect = json.loads(Path('graphify-out/.graphify_detect.json').read_text()) +video_files = detect.get('files', {}).get('video', []) +prompt = os.environ.get('GRAPHIFY_WHISPER_PROMPT', 'Use proper punctuation and paragraph breaks.') + +transcript_paths = transcribe_all(video_files, initial_prompt=prompt) +print(json.dumps(transcript_paths)) +" | Out-File -FilePath graphify-out\.graphify_transcripts.json -Encoding utf8 +``` + +After transcription: +- Read the transcript paths from `graphify-out\.graphify_transcripts.json` +- Add them to the docs list before dispatching semantic subagents in Step 3B +- Print how many transcripts were created: `Transcribed N video file(s) -> treating as docs` +- If transcription fails for a file, print a warning and continue with the rest + +**Whisper model:** Default is `base`. If the user passed `--whisper-model `, set `$env:GRAPHIFY_WHISPER_MODEL = ""` before running the command above. + +### Step 3 - Extract entities and relationships + +**Before starting:** note whether `--mode deep` was given. You must pass `DEEP_MODE=true` to every subagent in Step B2 if it was. Track this from the original invocation - do not lose it. + +This step has two parts: **structural extraction** (deterministic, free) and **semantic extraction** (your AI model, costs tokens). + +**Run Part A (AST) and Part B (semantic) in parallel. Dispatch all semantic subagents AND start AST extraction in the same message. Both can run simultaneously since they operate on different file types. Merge results in Part C as before.** + +Note: Parallelizing AST + semantic saves 5-15s on large corpora. AST is deterministic and fast; start it while subagents are processing docs/papers. + +#### Part A - Structural extraction for code files + +For any code files detected, run AST extraction in parallel with Part B subagents: + +```powershell +python -c " +import sys, json +from graphify.extract import collect_files, extract +from pathlib import Path +import json + +code_files = [] +detect = json.loads(Path('.graphify_detect.json').read_text()) +for f in detect.get('files', {}).get('code', []): + code_files.extend(collect_files(Path(f)) if Path(f).is_dir() else [Path(f)]) + +if code_files: + result = extract(code_files) + Path('.graphify_ast.json').write_text(json.dumps(result, indent=2)) + print(f'AST: {len(result[\"nodes\"])} nodes, {len(result[\"edges\"])} edges') +else: + Path('.graphify_ast.json').write_text(json.dumps({'nodes':[],'edges':[],'input_tokens':0,'output_tokens':0})) + print('No code files - skipping AST extraction') +" +``` + +#### Part B - Semantic extraction (parallel subagents) + +**Fast path:** If detection found zero docs, papers, and images (code-only corpus), skip Part B entirely and go straight to Part C. AST handles code - there is nothing for semantic subagents to do. + +**MANDATORY: You MUST use the Agent tool here. Reading files yourself one-by-one is forbidden - it is 5-10x slower. If you do not use the Agent tool you are doing this wrong.** + +Before dispatching subagents, print a timing estimate: +- Load `total_words` and file counts from `.graphify_detect.json` +- Estimate agents needed: `ceil(uncached_non_code_files / 22)` (chunk size is 20-25) +- Estimate time: ~45s per agent batch (they run in parallel, so total ≈ 45s × ceil(agents/parallel_limit)) +- Print: "Semantic extraction: ~N files → X agents, estimated ~Ys" + +**Step B0 - Check extraction cache first** + +Before dispatching any subagents, check which files already have cached extraction results: + +```powershell +python -c " +import json +from graphify.cache import check_semantic_cache +from pathlib import Path + +detect = json.loads(Path('.graphify_detect.json').read_text()) +all_files = [f for files in detect['files'].values() for f in files] + +cached_nodes, cached_edges, cached_hyperedges, uncached = check_semantic_cache(all_files) + +if cached_nodes or cached_edges or cached_hyperedges: + Path('.graphify_cached.json').write_text(json.dumps({'nodes': cached_nodes, 'edges': cached_edges, 'hyperedges': cached_hyperedges})) +Path('.graphify_uncached.txt').write_text('\n'.join(uncached)) +print(f'Cache: {len(all_files)-len(uncached)} files hit, {len(uncached)} files need extraction') +" +``` + +Only dispatch subagents for files listed in `.graphify_uncached.txt`. If all files are cached, skip to Part C directly. + +**Step B1 - Split into chunks** + +Load files from `.graphify_uncached.txt`. Split into chunks of 20-25 files each. Each image gets its own chunk (vision needs separate context). + +**Step B2 - Dispatch ALL subagents in a single message** + +Call the Agent tool multiple times IN THE SAME RESPONSE - one call per chunk. This is the only way they run in parallel. If you make one Agent call, wait, then make another, you are doing it sequentially and defeating the purpose. + +Concrete example for 3 chunks: +``` +[Agent tool call 1: files 1-15] +[Agent tool call 2: files 16-30] +[Agent tool call 3: files 31-45] +``` +All three in one message. Not three separate messages. + +Each subagent receives this exact prompt (substitute FILE_LIST, CHUNK_NUM, TOTAL_CHUNKS, and DEEP_MODE): + +``` +You are a graphify extraction subagent. Read the files listed and extract a knowledge graph fragment. +Output ONLY valid JSON matching the schema below - no explanation, no markdown fences, no preamble. + +Files (chunk CHUNK_NUM of TOTAL_CHUNKS): +FILE_LIST + +Rules: +- EXTRACTED: relationship explicit in source (import, call, citation, "see §3.2") +- INFERRED: reasonable inference (shared data structure, implied dependency) +- AMBIGUOUS: uncertain - flag for review, do not omit + +Code files: focus on semantic edges AST cannot find (call relationships, shared data, arch patterns). + Do not re-extract imports - AST already has those. +Doc/paper files: extract named concepts, entities, citations. For rationale (WHY decisions were made, trade-offs, design intent): store as a `rationale` attribute on the relevant concept node — do NOT create a separate rationale node or fragment node. Only create a node for something that is itself a named entity or concept. +Code files: when adding `calls` edges, source MUST be the caller (the function/class doing the calling), target MUST be the callee. Never reverse this direction. +Image files: use vision to understand what the image IS - do not just OCR. + UI screenshot: layout patterns, design decisions, key elements, purpose. + Chart: metric, trend/insight, data source. + Tweet/post: claim as node, author, concepts mentioned. + Diagram: components and connections. + Research figure: what it demonstrates, method, result. + Handwritten/whiteboard: ideas and arrows, mark uncertain readings AMBIGUOUS. + +DEEP_MODE (if --mode deep was given): be aggressive with INFERRED edges - indirect deps, + shared assumptions, latent couplings. Mark uncertain ones AMBIGUOUS instead of omitting. + +Semantic similarity: if two concepts in this chunk solve the same problem or represent the same idea without any structural link (no import, no call, no citation), add a `semantically_similar_to` edge marked INFERRED with a confidence_score reflecting how similar they are (0.6-0.95). Examples: +- Two functions that both validate user input but never call each other +- A class in code and a concept in a paper that describe the same algorithm +- Two error types that handle the same failure mode differently +Only add these when the similarity is genuinely non-obvious and cross-cutting. Do not add them for trivially similar things. + +Hyperedges: if 3 or more nodes clearly participate together in a shared concept, flow, or pattern that is not captured by pairwise edges alone, add a hyperedge to a top-level `hyperedges` array. Examples: +- All classes that implement a common protocol or interface +- All functions in an authentication flow (even if they don't all call each other) +- All concepts from a paper section that form one coherent idea +Use sparingly — only when the group relationship adds information beyond the pairwise edges. Maximum 3 hyperedges per chunk. + +If a file has YAML frontmatter (--- ... ---), copy source_url, captured_at, author, + contributor onto every node from that file. + +confidence_score is REQUIRED on every edge - never omit it, never use 0.5 as a default: +- EXTRACTED edges: confidence_score = 1.0 always +- INFERRED edges: reason about each edge individually. + Direct structural evidence (shared data structure, clear dependency): 0.8-0.9. + Reasonable inference with some uncertainty: 0.6-0.7. + Weak or speculative: 0.4-0.5. Most edges should be 0.6-0.9, not 0.5. +- AMBIGUOUS edges: 0.1-0.3 + +Output exactly this JSON (no other text): +{"nodes":[{"id":"filestem_entityname","label":"Human Readable Name","file_type":"code|document|paper|image","source_file":"relative/path","source_location":null,"source_url":null,"captured_at":null,"author":null,"contributor":null}],"edges":[{"source":"node_id","target":"node_id","relation":"calls|implements|references|cites|conceptually_related_to|shares_data_with|semantically_similar_to|rationale_for","confidence":"EXTRACTED|INFERRED|AMBIGUOUS","confidence_score":1.0,"source_file":"relative/path","source_location":null,"weight":1.0}],"hyperedges":[{"id":"snake_case_id","label":"Human Readable Label","nodes":["node_id1","node_id2","node_id3"],"relation":"participate_in|implement|form","confidence":"EXTRACTED|INFERRED","confidence_score":0.75,"source_file":"relative/path"}],"input_tokens":0,"output_tokens":0} +``` + +**Step B3 - Collect, cache, and merge** + +Wait for all subagents. For each result: +- Check that `graphify-out/.graphify_chunk_NN.json` exists on disk — this is the success signal +- If the file exists and contains valid JSON with `nodes` and `edges`, include it and save to cache +- If the file is missing, the subagent was likely dispatched as read-only (Explore type) — print a warning: "chunk N missing from disk — subagent may have been read-only. Re-run with general-purpose agent." Do not silently skip. +- If a subagent failed or returned invalid JSON, print a warning and skip that chunk - do not abort + +If more than half the chunks failed or are missing, stop and tell the user to re-run and ensure `subagent_type="general-purpose"` is used. + +Save new results to cache: +```powershell +python -c " +import json +from graphify.cache import save_semantic_cache +from pathlib import Path + +new = json.loads(Path('.graphify_semantic_new.json').read_text()) if Path('.graphify_semantic_new.json').exists() else {'nodes':[],'edges':[],'hyperedges':[]} +saved = save_semantic_cache(new.get('nodes', []), new.get('edges', []), new.get('hyperedges', [])) +print(f'Cached {saved} files') +" +``` + +Merge cached + new results into `.graphify_semantic.json`: +```powershell +python -c " +import json +from pathlib import Path + +cached = json.loads(Path('.graphify_cached.json').read_text()) if Path('.graphify_cached.json').exists() else {'nodes':[],'edges':[],'hyperedges':[]} +new = json.loads(Path('.graphify_semantic_new.json').read_text()) if Path('.graphify_semantic_new.json').exists() else {'nodes':[],'edges':[],'hyperedges':[]} + +all_nodes = cached['nodes'] + new.get('nodes', []) +all_edges = cached['edges'] + new.get('edges', []) +all_hyperedges = cached.get('hyperedges', []) + new.get('hyperedges', []) +seen = set() +deduped = [] +for n in all_nodes: + if n['id'] not in seen: + seen.add(n['id']) + deduped.append(n) + +merged = { + 'nodes': deduped, + 'edges': all_edges, + 'hyperedges': all_hyperedges, + 'input_tokens': new.get('input_tokens', 0), + 'output_tokens': new.get('output_tokens', 0), +} +Path('.graphify_semantic.json').write_text(json.dumps(merged, indent=2)) +print(f'Extraction complete - {len(deduped)} nodes, {len(all_edges)} edges ({len(cached[\"nodes\"])} from cache, {len(new.get(\"nodes\",[]))} new)') +" +``` +Clean up temp files: `Remove-Item -ErrorAction SilentlyContinue .graphify_cached.json, .graphify_uncached.txt, .graphify_semantic_new.json` + +#### Part C - Merge AST + semantic into final extraction + +```powershell +python -c " +import sys, json +from pathlib import Path + +ast = json.loads(Path('.graphify_ast.json').read_text()) +sem = json.loads(Path('.graphify_semantic.json').read_text()) + +# Merge: AST nodes first, semantic nodes deduplicated by id +seen = {n['id'] for n in ast['nodes']} +merged_nodes = list(ast['nodes']) +for n in sem['nodes']: + if n['id'] not in seen: + merged_nodes.append(n) + seen.add(n['id']) + +merged_edges = ast['edges'] + sem['edges'] +merged_hyperedges = sem.get('hyperedges', []) +merged = { + 'nodes': merged_nodes, + 'edges': merged_edges, + 'hyperedges': merged_hyperedges, + 'input_tokens': sem.get('input_tokens', 0), + 'output_tokens': sem.get('output_tokens', 0), +} +Path('.graphify_extract.json').write_text(json.dumps(merged, indent=2)) +total = len(merged_nodes) +edges = len(merged_edges) +print(f'Merged: {total} nodes, {edges} edges ({len(ast[\"nodes\"])} AST + {len(sem[\"nodes\"])} semantic)') +" +``` + +### Step 4 - Build graph, cluster, analyze, generate outputs + +```powershell +New-Item -ItemType Directory -Force -Path graphify-out | Out-Null +python -c " +import sys, json +from graphify.build import build_from_json +from graphify.cluster import cluster, score_all +from graphify.analyze import god_nodes, surprising_connections, suggest_questions +from graphify.report import generate +from graphify.export import to_json +from pathlib import Path + +extraction = json.loads(Path('.graphify_extract.json').read_text()) +detection = json.loads(Path('.graphify_detect.json').read_text()) + +G = build_from_json(extraction) +communities = cluster(G) +cohesion = score_all(G, communities) +tokens = {'input': extraction.get('input_tokens', 0), 'output': extraction.get('output_tokens', 0)} +gods = god_nodes(G) +surprises = surprising_connections(G, communities) +labels = {cid: 'Community ' + str(cid) for cid in communities} +# Placeholder questions - regenerated with real labels in Step 5 +questions = suggest_questions(G, communities, labels) + +report = generate(G, communities, cohesion, labels, gods, surprises, detection, tokens, 'INPUT_PATH', suggested_questions=questions) +Path('graphify-out/GRAPH_REPORT.md').write_text(report) +to_json(G, communities, 'graphify-out/graph.json') + +analysis = { + 'communities': {str(k): v for k, v in communities.items()}, + 'cohesion': {str(k): v for k, v in cohesion.items()}, + 'gods': gods, + 'surprises': surprises, + 'questions': questions, +} +Path('.graphify_analysis.json').write_text(json.dumps(analysis, indent=2)) +if G.number_of_nodes() == 0: + print('ERROR: Graph is empty - extraction produced no nodes.') + print('Possible causes: all files were skipped, binary-only corpus, or extraction failed.') + raise SystemExit(1) +print(f'Graph: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges, {len(communities)} communities') +" +``` + +If this step prints `ERROR: Graph is empty`, stop and tell the user what happened - do not proceed to labeling or visualization. + +Replace INPUT_PATH with the actual path. + +### Step 5 - Label communities + +Read `.graphify_analysis.json`. For each community key, look at its node labels and write a 2-5 word plain-language name (e.g. "Attention Mechanism", "Training Pipeline", "Data Loading"). + +Then regenerate the report and save the labels for the visualizer: + +```powershell +python -c " +import sys, json +from graphify.build import build_from_json +from graphify.cluster import score_all +from graphify.analyze import god_nodes, surprising_connections, suggest_questions +from graphify.report import generate +from pathlib import Path + +extraction = json.loads(Path('.graphify_extract.json').read_text()) +detection = json.loads(Path('.graphify_detect.json').read_text()) +analysis = json.loads(Path('.graphify_analysis.json').read_text()) + +G = build_from_json(extraction) +communities = {int(k): v for k, v in analysis['communities'].items()} +cohesion = {int(k): v for k, v in analysis['cohesion'].items()} +tokens = {'input': extraction.get('input_tokens', 0), 'output': extraction.get('output_tokens', 0)} + +# LABELS - replace these with the names you chose above +labels = LABELS_DICT + +# Regenerate questions with real community labels (labels affect question phrasing) +questions = suggest_questions(G, communities, labels) + +report = generate(G, communities, cohesion, labels, analysis['gods'], analysis['surprises'], detection, tokens, 'INPUT_PATH', suggested_questions=questions) +Path('graphify-out/GRAPH_REPORT.md').write_text(report) +Path('.graphify_labels.json').write_text(json.dumps({str(k): v for k, v in labels.items()})) +print('Report updated with community labels') +" +``` + +Replace `LABELS_DICT` with the actual dict you constructed (e.g. `{0: "Attention Mechanism", 1: "Training Pipeline"}`). +Replace INPUT_PATH with the actual path. + +### Step 6 - Generate Obsidian vault (opt-in) + HTML + +**Generate HTML always** (unless `--no-viz`). **Obsidian vault only if `--obsidian` was explicitly given** — skip it otherwise, it generates one file per node. + +If `--obsidian` was given: + +- If `--obsidian-dir ` was also given, use that path as the vault directory. Otherwise default to `graphify-out/obsidian`. + +```powershell +python -c " +import sys, json +from graphify.build import build_from_json +from graphify.export import to_obsidian, to_canvas +from pathlib import Path + +extraction = json.loads(Path('.graphify_extract.json').read_text()) +analysis = json.loads(Path('.graphify_analysis.json').read_text()) +labels_raw = json.loads(Path('.graphify_labels.json').read_text()) if Path('.graphify_labels.json').exists() else {} + +G = build_from_json(extraction) +communities = {int(k): v for k, v in analysis['communities'].items()} +cohesion = {int(k): v for k, v in analysis['cohesion'].items()} +labels = {int(k): v for k, v in labels_raw.items()} + +obsidian_dir = 'OBSIDIAN_DIR' # replace with --obsidian-dir value, or 'graphify-out/obsidian' if not given + +n = to_obsidian(G, communities, obsidian_dir, community_labels=labels or None, cohesion=cohesion) +print(f'Obsidian vault: {n} notes in {obsidian_dir}/') + +to_canvas(G, communities, f'{obsidian_dir}/graph.canvas', community_labels=labels or None) +print(f'Canvas: {obsidian_dir}/graph.canvas - open in Obsidian for structured community layout') +print() +print(f'Open {obsidian_dir}/ as a vault in Obsidian.') +print(' Graph view - nodes colored by community (set automatically)') +print(' graph.canvas - structured layout with communities as groups') +print(' _COMMUNITY_* - overview notes with cohesion scores and dataview queries') +" +``` + +Generate the HTML graph (always, unless `--no-viz`): + +```powershell +python -c " +import sys, json +from graphify.build import build_from_json +from graphify.export import to_html +from pathlib import Path + +extraction = json.loads(Path('.graphify_extract.json').read_text()) +analysis = json.loads(Path('.graphify_analysis.json').read_text()) +labels_raw = json.loads(Path('.graphify_labels.json').read_text()) if Path('.graphify_labels.json').exists() else {} + +G = build_from_json(extraction) +communities = {int(k): v for k, v in analysis['communities'].items()} +labels = {int(k): v for k, v in labels_raw.items()} + +if G.number_of_nodes() > 5000: + print(f'Graph has {G.number_of_nodes()} nodes - too large for HTML viz. Use Obsidian vault instead.') +else: + to_html(G, communities, 'graphify-out/graph.html', community_labels=labels or None) + print('graph.html written - open in any browser, no server needed') +" +``` + +### Step 7 - Neo4j export (only if --neo4j or --neo4j-push flag) + +**If `--neo4j`** - generate a Cypher file for manual import: + +```powershell +python -c " +import sys, json +from graphify.build import build_from_json +from graphify.export import to_cypher +from pathlib import Path + +G = build_from_json(json.loads(Path('.graphify_extract.json').read_text())) +to_cypher(G, 'graphify-out/cypher.txt') +print('cypher.txt written - import with: cypher-shell < graphify-out/cypher.txt') +" +``` + +**If `--neo4j-push `** - push directly to a running Neo4j instance. Ask the user for credentials if not provided: + +```powershell +python -c " +import sys, json +from graphify.build import build_from_json +from graphify.cluster import cluster +from graphify.export import push_to_neo4j +from pathlib import Path + +extraction = json.loads(Path('.graphify_extract.json').read_text()) +analysis = json.loads(Path('.graphify_analysis.json').read_text()) +G = build_from_json(extraction) +communities = {int(k): v for k, v in analysis['communities'].items()} + +result = push_to_neo4j(G, uri='NEO4J_URI', user='NEO4J_USER', password='NEO4J_PASSWORD', communities=communities) +print(f'Pushed to Neo4j: {result[\"nodes\"]} nodes, {result[\"edges\"]} edges') +" +``` + +Replace `NEO4J_URI`, `NEO4J_USER`, `NEO4J_PASSWORD` with actual values. Default URI is `bolt://localhost:7687`, default user is `neo4j`. Uses MERGE - safe to re-run without creating duplicates. + +### Step 7b - SVG export (only if --svg flag) + +```powershell +python -c " +import sys, json +from graphify.build import build_from_json +from graphify.export import to_svg +from pathlib import Path + +extraction = json.loads(Path('.graphify_extract.json').read_text()) +analysis = json.loads(Path('.graphify_analysis.json').read_text()) +labels_raw = json.loads(Path('.graphify_labels.json').read_text()) if Path('.graphify_labels.json').exists() else {} + +G = build_from_json(extraction) +communities = {int(k): v for k, v in analysis['communities'].items()} +labels = {int(k): v for k, v in labels_raw.items()} + +to_svg(G, communities, 'graphify-out/graph.svg', community_labels=labels or None) +print('graph.svg written - embeds in Obsidian, Notion, GitHub READMEs') +" +``` + +### Step 7c - GraphML export (only if --graphml flag) + +```powershell +python -c " +import json +from graphify.build import build_from_json +from graphify.export import to_graphml +from pathlib import Path + +extraction = json.loads(Path('.graphify_extract.json').read_text()) +analysis = json.loads(Path('.graphify_analysis.json').read_text()) + +G = build_from_json(extraction) +communities = {int(k): v for k, v in analysis['communities'].items()} + +to_graphml(G, communities, 'graphify-out/graph.graphml') +print('graph.graphml written - open in Gephi, yEd, or any GraphML tool') +" +``` + +### Step 7d - MCP server (only if --mcp flag) + +```powershell +python -m graphify.serve graphify-out/graph.json +``` + +This starts a stdio MCP server that exposes tools: `query_graph`, `get_node`, `get_neighbors`, `get_community`, `god_nodes`, `graph_stats`, `shortest_path`. Add to Claude Desktop or any MCP-compatible agent orchestrator so other agents can query the graph live. + +To configure in Claude Desktop, add to `claude_desktop_config.json`: +```json +{ + "mcpServers": { + "graphify": { + "command": "python", + "args": ["-m", "graphify.serve", "/absolute/path/to/graphify-out/graph.json"] + } + } +} +``` + +### Step 8 - Token reduction benchmark (only if total_words > 5000) + +If `total_words` from `.graphify_detect.json` is greater than 5,000, run: + +```powershell +python -c " +import json +from graphify.benchmark import run_benchmark, print_benchmark +from pathlib import Path + +detection = json.loads(Path('.graphify_detect.json').read_text()) +result = run_benchmark('graphify-out/graph.json', corpus_words=detection['total_words']) +print_benchmark(result) +" +``` + +Print the output directly in chat. If `total_words <= 5000`, skip silently - the graph value is structural clarity, not token compression, for small corpora. + +--- + +### Step 9 - Save manifest, update cost tracker, clean up, and report + +```powershell +python -c " +import json +from pathlib import Path +from datetime import datetime, timezone +from graphify.detect import save_manifest + +# Save manifest for --update +detect = json.loads(Path('.graphify_detect.json').read_text()) +save_manifest(detect['files']) + +# Update cumulative cost tracker +extract = json.loads(Path('.graphify_extract.json').read_text()) +input_tok = extract.get('input_tokens', 0) +output_tok = extract.get('output_tokens', 0) + +cost_path = Path('graphify-out/cost.json') +if cost_path.exists(): + cost = json.loads(cost_path.read_text()) +else: + cost = {'runs': [], 'total_input_tokens': 0, 'total_output_tokens': 0} + +cost['runs'].append({ + 'date': datetime.now(timezone.utc).isoformat(), + 'input_tokens': input_tok, + 'output_tokens': output_tok, + 'files': detect.get('total_files', 0), +}) +cost['total_input_tokens'] += input_tok +cost['total_output_tokens'] += output_tok +cost_path.write_text(json.dumps(cost, indent=2)) + +print(f'This run: {input_tok:,} input tokens, {output_tok:,} output tokens') +print(f'All time: {cost[\"total_input_tokens\"]:,} input, {cost[\"total_output_tokens\"]:,} output ({len(cost[\"runs\"])} runs)') +" +Remove-Item -ErrorAction SilentlyContinue .graphify_detect.json, .graphify_extract.json, .graphify_ast.json, .graphify_semantic.json, .graphify_analysis.json, .graphify_labels.json +Remove-Item -ErrorAction SilentlyContinue graphify-out/.needs_update +``` + +Tell the user (omit the obsidian line unless --obsidian was given): +``` +Graph complete. Outputs in PATH_TO_DIR/graphify-out/ + + graph.html - interactive graph, open in browser + GRAPH_REPORT.md - audit report + graph.json - raw graph data + obsidian/ - Obsidian vault (only if --obsidian was given) +``` + +If graphify saved you time, consider supporting it: https://github.com/sponsors/safishamsi + +Replace PATH_TO_DIR with the actual absolute path of the directory that was processed. + +Then paste these sections from GRAPH_REPORT.md directly into the chat: +- God Nodes +- Surprising Connections +- Suggested Questions + +Do NOT paste the full report - just those three sections. Keep it concise. + +Then immediately offer to explore. Pick the single most interesting suggested question from the report - the one that crosses the most community boundaries or has the most surprising bridge node - and ask: + +> "The most interesting question this graph can answer: **[question]**. Want me to trace it?" + +If the user says yes, run `/graphify query "[question]"` on the graph and walk them through the answer using the graph structure - which nodes connect, which community boundaries get crossed, what the path reveals. Keep going as long as they want to explore. Each answer should end with a natural follow-up ("this connects to X - want to go deeper?") so the session feels like navigation, not a one-shot report. + +The graph is the map. Your job after the pipeline is to be the guide. + +--- + +## For --update (incremental re-extraction) + +Use when you've added or modified files since the last run. Only re-extracts changed files - saves tokens and time. + +```powershell +python -c " +import sys, json +from graphify.detect import detect_incremental, save_manifest +from pathlib import Path + +result = detect_incremental(Path('INPUT_PATH')) +new_total = result.get('new_total', 0) +print(json.dumps(result, indent=2)) +Path('.graphify_incremental.json').write_text(json.dumps(result)) +if new_total == 0: + print('No files changed since last run. Nothing to update.') + raise SystemExit(0) +print(f'{new_total} new/changed file(s) to re-extract.') +" +``` + +If new files exist, first check whether all changed files are code files: + +```powershell +python -c " +import json +from pathlib import Path + +result = json.loads(open('.graphify_incremental.json').read()) if Path('.graphify_incremental.json').exists() else {} +code_exts = {'.py','.ts','.js','.go','.rs','.java','.cpp','.c','.rb','.swift','.kt','.cs','.scala','.php','.cc','.cxx','.hpp','.h','.kts','.lua','.toc'} +new_files = result.get('new_files', {}) +all_changed = [f for files in new_files.values() for f in files] +code_only = all(Path(f).suffix.lower() in code_exts for f in all_changed) +print('code_only:', code_only) +" +``` + +If `code_only` is True: print `[graphify update] Code-only changes detected - skipping semantic extraction (no LLM needed)`, run only Step 3A (AST) on the changed files, skip Step 3B entirely (no subagents), then go straight to merge and Steps 4–8. + +If `code_only` is False (any changed file is a doc/paper/image): run the full Steps 3A–3C pipeline as normal. + +Then: + +```powershell +python -c " +import sys, json +from graphify.build import build_from_json +from graphify.export import to_json +from networkx.readwrite import json_graph +import networkx as nx +from pathlib import Path + +# Load existing graph +existing_data = json.loads(Path('graphify-out/graph.json').read_text()) +G_existing = json_graph.node_link_graph(existing_data, edges='links') + +# Load new extraction +new_extraction = json.loads(Path('.graphify_extract.json').read_text()) +G_new = build_from_json(new_extraction) + +# Merge: new nodes/edges into existing graph +G_existing.update(G_new) +print(f'Merged: {G_existing.number_of_nodes()} nodes, {G_existing.number_of_edges()} edges') +" +``` + +Then run Steps 4–8 on the merged graph as normal. + +After Step 4, show the graph diff: + +```powershell +python -c " +import json +from graphify.analyze import graph_diff +from graphify.build import build_from_json +from networkx.readwrite import json_graph +import networkx as nx +from pathlib import Path + +# Load old graph (before update) from backup written before merge +old_data = json.loads(Path('.graphify_old.json').read_text()) if Path('.graphify_old.json').exists() else None +new_extract = json.loads(Path('.graphify_extract.json').read_text()) +G_new = build_from_json(new_extract) + +if old_data: + G_old = json_graph.node_link_graph(old_data, edges='links') + diff = graph_diff(G_old, G_new) + print(diff['summary']) + if diff['new_nodes']: + print('New nodes:', ', '.join(n['label'] for n in diff['new_nodes'][:5])) + if diff['new_edges']: + print('New edges:', len(diff['new_edges'])) +" +``` + +Before the merge step, save the old graph: `Copy-Item graphify-out/graph.json .graphify_old.json` +Clean up after: `Remove-Item -ErrorAction SilentlyContinue .graphify_old.json` + +--- + +## For --cluster-only + +Skip Steps 1–3. Load the existing graph from `graphify-out/graph.json` and re-run clustering: + +```powershell +python -c " +import sys, json +from graphify.cluster import cluster, score_all +from graphify.analyze import god_nodes, surprising_connections +from graphify.report import generate +from graphify.export import to_json +from networkx.readwrite import json_graph +import networkx as nx +from pathlib import Path + +data = json.loads(Path('graphify-out/graph.json').read_text()) +G = json_graph.node_link_graph(data, edges='links') + +detection = {'total_files': 0, 'total_words': 99999, 'needs_graph': True, 'warning': None, + 'files': {'code': [], 'document': [], 'paper': []}} +tokens = {'input': 0, 'output': 0} + +communities = cluster(G) +cohesion = score_all(G, communities) +gods = god_nodes(G) +surprises = surprising_connections(G, communities) +labels = {cid: 'Community ' + str(cid) for cid in communities} + +report = generate(G, communities, cohesion, labels, gods, surprises, detection, tokens, '.') +Path('graphify-out/GRAPH_REPORT.md').write_text(report) +to_json(G, communities, 'graphify-out/graph.json') + +analysis = { + 'communities': {str(k): v for k, v in communities.items()}, + 'cohesion': {str(k): v for k, v in cohesion.items()}, + 'gods': gods, + 'surprises': surprises, +} +Path('.graphify_analysis.json').write_text(json.dumps(analysis, indent=2)) +print(f'Re-clustered: {len(communities)} communities') +" +``` + +Then run Steps 5–9 as normal (label communities, generate viz, benchmark, clean up, report). + +--- + +## For /graphify query + +Two traversal modes - choose based on the question: + +| Mode | Flag | Best for | +|------|------|----------| +| BFS (default) | _(none)_ | "What is X connected to?" - broad context, nearest neighbors first | +| DFS | `--dfs` | "How does X reach Y?" - trace a specific chain or dependency path | + +First check the graph exists: +```powershell +python -c " +from pathlib import Path +if not Path('graphify-out/graph.json').exists(): + print('ERROR: No graph found. Run /graphify first to build the graph.') + raise SystemExit(1) +" +``` +If it fails, stop and tell the user to run `/graphify ` first. + +Load `graphify-out/graph.json`, then: + +1. Find the 1-3 nodes whose label best matches key terms in the question. +2. Run the appropriate traversal from each starting node. +3. Read the subgraph - node labels, edge relations, confidence tags, source locations. +4. Answer using **only** what the graph contains. Quote `source_location` when citing a specific fact. +5. If the graph lacks enough information, say so - do not hallucinate edges. + +```powershell +python -c " +import sys, json +from networkx.readwrite import json_graph +import networkx as nx +from pathlib import Path + +data = json.loads(Path('graphify-out/graph.json').read_text()) +G = json_graph.node_link_graph(data, edges='links') + +question = 'QUESTION' +mode = 'MODE' # 'bfs' or 'dfs' +terms = [t.lower() for t in question.split() if len(t) > 3] + +# Find best-matching start nodes +scored = [] +for nid, ndata in G.nodes(data=True): + label = ndata.get('label', '').lower() + score = sum(1 for t in terms if t in label) + if score > 0: + scored.append((score, nid)) +scored.sort(reverse=True) +start_nodes = [nid for _, nid in scored[:3]] + +if not start_nodes: + print('No matching nodes found for query terms:', terms) + sys.exit(0) + +subgraph_nodes = set() +subgraph_edges = [] + +if mode == 'dfs': + # DFS: follow one path as deep as possible before backtracking. + # Depth-limited to 6 to avoid traversing the whole graph. + visited = set() + stack = [(n, 0) for n in reversed(start_nodes)] + while stack: + node, depth = stack.pop() + if node in visited or depth > 6: + continue + visited.add(node) + subgraph_nodes.add(node) + for neighbor in G.neighbors(node): + if neighbor not in visited: + stack.append((neighbor, depth + 1)) + subgraph_edges.append((node, neighbor)) +else: + # BFS: explore all neighbors layer by layer up to depth 3. + frontier = set(start_nodes) + subgraph_nodes = set(start_nodes) + for _ in range(3): + next_frontier = set() + for n in frontier: + for neighbor in G.neighbors(n): + if neighbor not in subgraph_nodes: + next_frontier.add(neighbor) + subgraph_edges.append((n, neighbor)) + subgraph_nodes.update(next_frontier) + frontier = next_frontier + +# Token-budget aware output: rank by relevance, cut at budget (~4 chars/token) +token_budget = BUDGET # default 2000 +char_budget = token_budget * 4 + +# Score each node by term overlap for ranked output +def relevance(nid): + label = G.nodes[nid].get('label', '').lower() + return sum(1 for t in terms if t in label) + +ranked_nodes = sorted(subgraph_nodes, key=relevance, reverse=True) + +lines = [f'Traversal: {mode.upper()} | Start: {[G.nodes[n].get(\"label\",n) for n in start_nodes]} | {len(subgraph_nodes)} nodes'] +for nid in ranked_nodes: + d = G.nodes[nid] + lines.append(f' NODE {d.get(\"label\", nid)} [src={d.get(\"source_file\",\"\")} loc={d.get(\"source_location\",\"\")}]') +for u, v in subgraph_edges: + if u in subgraph_nodes and v in subgraph_nodes: + d = G.edges[u, v] + lines.append(f' EDGE {G.nodes[u].get(\"label\",u)} --{d.get(\"relation\",\"\")} [{d.get(\"confidence\",\"\")}]--> {G.nodes[v].get(\"label\",v)}') + +output = '\n'.join(lines) +if len(output) > char_budget: + output = output[:char_budget] + f'\n... (truncated at ~{token_budget} token budget - use --budget N for more)' +print(output) +" +``` + +Replace `QUESTION` with the user's actual question, `MODE` with `bfs` or `dfs`, and `BUDGET` with the token budget (default `2000`, or whatever `--budget N` specifies). Then answer based on the subgraph output above. + +After writing the answer, save it back into the graph so it improves future queries: + +```powershell +python -m graphify save-result --question "QUESTION" --answer "ANSWER" --type query --nodes NODE1 NODE2 +``` + +Replace `QUESTION` with the question, `ANSWER` with your full answer text, `SOURCE_NODES` with the list of node labels you cited. This closes the feedback loop: the next `--update` will extract this Q&A as a node in the graph. + +--- + +## For /graphify path + +Find the shortest path between two named concepts in the graph. + +First check the graph exists: +```powershell +python -c " +from pathlib import Path +if not Path('graphify-out/graph.json').exists(): + print('ERROR: No graph found. Run /graphify first to build the graph.') + raise SystemExit(1) +" +``` +If it fails, stop and tell the user to run `/graphify ` first. + +```powershell +python -c " +import json, sys +import networkx as nx +from networkx.readwrite import json_graph +from pathlib import Path + +data = json.loads(Path('graphify-out/graph.json').read_text()) +G = json_graph.node_link_graph(data, edges='links') + +a_term = 'NODE_A' +b_term = 'NODE_B' + +def find_node(term): + term = term.lower() + scored = sorted( + [(sum(1 for w in term.split() if w in G.nodes[n].get('label','').lower()), n) + for n in G.nodes()], + reverse=True + ) + return scored[0][1] if scored and scored[0][0] > 0 else None + +src = find_node(a_term) +tgt = find_node(b_term) + +if not src or not tgt: + print(f'Could not find nodes matching: {a_term!r} or {b_term!r}') + sys.exit(0) + +try: + path = nx.shortest_path(G, src, tgt) + print(f'Shortest path ({len(path)-1} hops):') + for i, nid in enumerate(path): + label = G.nodes[nid].get('label', nid) + if i < len(path) - 1: + edge = G.edges[nid, path[i+1]] + rel = edge.get('relation', '') + conf = edge.get('confidence', '') + print(f' {label} --{rel}--> [{conf}]') + else: + print(f' {label}') +except nx.NetworkXNoPath: + print(f'No path found between {a_term!r} and {b_term!r}') +except nx.NodeNotFound as e: + print(f'Node not found: {e}') +" +``` + +Replace `NODE_A` and `NODE_B` with the actual concept names from the user. Then explain the path in plain language - what each hop means, why it's significant. + +After writing the explanation, save it back: + +```powershell +python -m graphify save-result --question "Path from NODE_A to NODE_B" --answer "ANSWER" --type path_query --nodes NODE_A NODE_B +``` + +--- + +## For /graphify explain + +Give a plain-language explanation of a single node - everything connected to it. + +First check the graph exists: +```powershell +python -c " +from pathlib import Path +if not Path('graphify-out/graph.json').exists(): + print('ERROR: No graph found. Run /graphify first to build the graph.') + raise SystemExit(1) +" +``` +If it fails, stop and tell the user to run `/graphify ` first. + +```powershell +python -c " +import json, sys +import networkx as nx +from networkx.readwrite import json_graph +from pathlib import Path + +data = json.loads(Path('graphify-out/graph.json').read_text()) +G = json_graph.node_link_graph(data, edges='links') + +term = 'NODE_NAME' +term_lower = term.lower() + +# Find best matching node +scored = sorted( + [(sum(1 for w in term_lower.split() if w in G.nodes[n].get('label','').lower()), n) + for n in G.nodes()], + reverse=True +) +if not scored or scored[0][0] == 0: + print(f'No node matching {term!r}') + sys.exit(0) + +nid = scored[0][1] +data_n = G.nodes[nid] +print(f'NODE: {data_n.get(\"label\", nid)}') +print(f' source: {data_n.get(\"source_file\",\"unknown\")}') +print(f' type: {data_n.get(\"file_type\",\"unknown\")}') +print(f' degree: {G.degree(nid)}') +print() +print('CONNECTIONS:') +for neighbor in G.neighbors(nid): + edge = G.edges[nid, neighbor] + nlabel = G.nodes[neighbor].get('label', neighbor) + rel = edge.get('relation', '') + conf = edge.get('confidence', '') + src_file = G.nodes[neighbor].get('source_file', '') + print(f' --{rel}--> {nlabel} [{conf}] ({src_file})') +" +``` + +Replace `NODE_NAME` with the concept the user asked about. Then write a 3-5 sentence explanation of what this node is, what it connects to, and why those connections are significant. Use the source locations as citations. + +After writing the explanation, save it back: + +```powershell +python -m graphify save-result --question "Explain NODE_NAME" --answer "ANSWER" --type explain --nodes NODE_NAME +``` + +--- + +## For /graphify add + +Fetch a URL and add it to the corpus, then update the graph. + +```powershell +python -c " +import sys +from graphify.ingest import ingest +from pathlib import Path + +try: + out = ingest('URL', Path('./raw'), author='AUTHOR', contributor='CONTRIBUTOR') + print(f'Saved to {out}') +except ValueError as e: + print(f'error: {e}', file=sys.stderr) + sys.exit(1) +except RuntimeError as e: + print(f'error: {e}', file=sys.stderr) + sys.exit(1) +" +``` + +Replace `URL` with the actual URL, `AUTHOR` with the user's name if provided, `CONTRIBUTOR` likewise. If the command exits with an error, tell the user what went wrong - do not silently continue. After a successful save, automatically run the `--update` pipeline on `./raw` to merge the new file into the existing graph. + +Supported URL types (auto-detected): +- Twitter/X → fetched via oEmbed, saved as `.md` with tweet text and author +- arXiv → abstract + metadata saved as `.md` +- PDF → downloaded as `.pdf` +- Images (.png/.jpg/.webp) → downloaded, vision extraction runs on next build +- Any webpage → converted to markdown via html2text + +--- + +## For --watch + +Start a background watcher that monitors a folder and auto-updates the graph when files change. + +```powershell +python -m graphify.watch INPUT_PATH --debounce 3 +``` + +Replace INPUT_PATH with the folder to watch. Behavior depends on what changed: + +- **Code files only (.py, .ts, .go, etc.):** re-runs AST extraction + rebuild + cluster immediately, no LLM needed. `graph.json` and `GRAPH_REPORT.md` are updated automatically. +- **Docs, papers, or images:** writes a `graphify-out/needs_update` flag and prints a notification to run `/graphify --update` (LLM semantic re-extraction required). + +Debounce (default 3s): waits until file activity stops before triggering, so a wave of parallel agent writes doesn't trigger a rebuild per file. + +Press Ctrl+C to stop. + +For agentic workflows: run `--watch` in a background terminal. Code changes from agent waves are picked up automatically between waves. If agents are also writing docs or notes, you'll need a manual `/graphify --update` after those waves. + +--- + +## For git commit hook + +Install a post-commit hook that auto-rebuilds the graph after every commit. No background process needed - triggers once per commit, works with any editor. + +```bash +graphify hook install # install +graphify hook uninstall # remove +graphify hook status # check +``` + +After every `git commit`, the hook detects which code files changed (via `git diff HEAD~1`), re-runs AST extraction on those files, and rebuilds `graph.json` and `GRAPH_REPORT.md`. Doc/image changes are ignored by the hook - run `/graphify --update` manually for those. + +If a post-commit hook already exists, graphify appends to it rather than replacing it. + +--- + +## For native CLAUDE.md integration + +Run once per project to make graphify always-on in Claude Code sessions: + +```bash +graphify claude install +``` + +This writes a `## graphify` section to the local `CLAUDE.md` that instructs Claude to check the graph before answering codebase questions and rebuild it after code changes. No manual `/graphify` needed in future sessions. + +```bash +graphify claude uninstall # remove the section +``` + +--- + +## Troubleshooting + +### PowerShell 5.1: Vertical scrolling stops working + +If vertical scrolling breaks in PowerShell after running graphify, this is caused by ANSI escape sequences from the `graspologic` library. Graphify v0.3.10+ suppresses this output, but if you still see the issue: + +1. **Upgrade graphify**: `pip install --upgrade graphifyy` +2. **Use Windows Terminal** instead of the legacy PowerShell console — Windows Terminal handles ANSI codes correctly +3. **Reset your terminal**: close and reopen PowerShell +4. **Skip graspologic**: uninstall it (`pip uninstall graspologic`) and graphify will fall back to NetworkX's built-in Louvain algorithm, which produces no ANSI output + +--- + +## Honesty Rules + +- Never invent an edge. If unsure, use AMBIGUOUS. +- Never skip the corpus check warning. +- Always show token cost in the report. +- Never hide cohesion scores behind symbols - show the raw number. +- Never run HTML viz on a graph with more than 5,000 nodes without warning the user. diff --git a/.github/hooks/context-mode.json b/.github/hooks/context-mode.json new file mode 100644 index 00000000..1459587b --- /dev/null +++ b/.github/hooks/context-mode.json @@ -0,0 +1,48 @@ +{ + "hooks": { + "PreToolUse": [ + { + "matcher": "", + "hooks": [ + { + "type": "command", + "command": "\"C:/Users/ericc/AppData/Local/Programs/cursor/resources/app/resources/helpers/node.exe\" \"C:/Users/ericc/AppData/Roaming/npm/node_modules/context-mode/hooks/vscode-copilot/pretooluse.mjs\"" + } + ] + } + ], + "PostToolUse": [ + { + "matcher": "", + "hooks": [ + { + "type": "command", + "command": "\"C:/Users/ericc/AppData/Local/Programs/cursor/resources/app/resources/helpers/node.exe\" \"C:/Users/ericc/AppData/Roaming/npm/node_modules/context-mode/hooks/vscode-copilot/posttooluse.mjs\"" + } + ] + } + ], + "PreCompact": [ + { + "matcher": "", + "hooks": [ + { + "type": "command", + "command": "\"C:/Users/ericc/AppData/Local/Programs/cursor/resources/app/resources/helpers/node.exe\" \"C:/Users/ericc/AppData/Roaming/npm/node_modules/context-mode/hooks/vscode-copilot/precompact.mjs\"" + } + ] + } + ], + "SessionStart": [ + { + "matcher": "", + "hooks": [ + { + "type": "command", + "command": "\"C:/Users/ericc/AppData/Local/Programs/cursor/resources/app/resources/helpers/node.exe\" \"C:/Users/ericc/AppData/Roaming/npm/node_modules/context-mode/hooks/vscode-copilot/sessionstart.mjs\"" + } + ] + } + ] + } +} diff --git a/.gitignore b/.gitignore index 8f2b6087..6b02ca31 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,11 @@ artifacts/ # Graphify knowledge graph (regenerable analysis artifact, stored in ~/.gauntletci/graphify/) graphify-out/ +# Cursor agent config (optional commit — un-ignore rules under .cursor/) +!.cursor/ +!.cursor/**/* +!.cursor/**/*.mdc + # GauntletCI model cache (Phi-4 Mini ONNX — ~2.8 GB, never commit) .gauntletci/ **/*.onnx @@ -50,10 +55,10 @@ graphify-out/ !docs/TROUBLESHOOTING.md !docs/features-benefits.md !Docker/docker.md -!worker/README.md -!github-app-server/README.md -!SUPPORT.md -!nuget-readme.md +!worker/README.md +!github-app-server/README.md +!SUPPORT.md +!nuget-readme.md !docs/noise-and-false-positives.md !docs/rules/GCI0003-behavioral-change-detection.md !docs/rules/GCI0004-breaking-change-risk.md @@ -154,12 +159,12 @@ BenchmarkDotNet.Artifacts/ # Node / Cloudflare Worker node_modules/ -worker/.wrangler/ -worker/dist/ -github-app-server/node_modules/ -github-app-server/dist/ -github-app-server/.env -**/.gauntletci-github-app-work/ +worker/.wrangler/ +worker/dist/ +github-app-server/node_modules/ +github-app-server/dist/ +github-app-server/.env +**/.gauntletci-github-app-work/ # Next.js site build artifacts site/out/ @@ -173,3 +178,8 @@ site/tsconfig.tsbuildinfo *.zip *.7z .aider* + +# Cursor agent config — allow commit after global *.md ignore above +!.cursor/**/*.md +!.cursor/**/*.mdc +!.cursor/skills/**/SKILL.md diff --git a/src/GauntletCI.Core/Configuration/DefaultSeverities.cs b/src/GauntletCI.Core/Configuration/DefaultSeverities.cs index 5ab0f499..315d89bf 100644 --- a/src/GauntletCI.Core/Configuration/DefaultSeverities.cs +++ b/src/GauntletCI.Core/Configuration/DefaultSeverities.cs @@ -13,31 +13,49 @@ internal static class DefaultSeverities private static readonly Dictionary Map = new(StringComparer.OrdinalIgnoreCase) { - // Block: commit-blocking by default + // Block: commit-blocking by default (rule-level; some rules use SeverityOverride per finding) ["GCI0003"] = RuleSeverity.Block, - ["GCI0004"] = RuleSeverity.Block, ["GCI0007"] = RuleSeverity.Block, ["GCI0010"] = RuleSeverity.Block, ["GCI0012"] = RuleSeverity.Block, ["GCI0015"] = RuleSeverity.Block, ["GCI0016"] = RuleSeverity.Block, + ["GCI0020"] = RuleSeverity.Block, ["GCI0021"] = RuleSeverity.Block, - ["GCI0032"] = RuleSeverity.Block, ["GCI0036"] = RuleSeverity.Block, ["GCI0039"] = RuleSeverity.Block, - ["GCI0052"] = RuleSeverity.Block, // Warn: visible by default, non-blocking ["GCI0001"] = RuleSeverity.Warn, - ["GCI0053"] = RuleSeverity.Warn, + ["GCI0004"] = RuleSeverity.Warn, ["GCI0006"] = RuleSeverity.Warn, ["GCI0022"] = RuleSeverity.Warn, ["GCI0024"] = RuleSeverity.Warn, ["GCI0029"] = RuleSeverity.Warn, + ["GCI0032"] = RuleSeverity.Warn, ["GCI0035"] = RuleSeverity.Warn, ["GCI0038"] = RuleSeverity.Warn, ["GCI0041"] = RuleSeverity.Warn, + ["GCI0048"] = RuleSeverity.Warn, + ["GCI0050"] = RuleSeverity.Warn, + ["GCI0051"] = RuleSeverity.Warn, + ["GCI0053"] = RuleSeverity.Warn, + ["GCI0057"] = RuleSeverity.Warn, + + // Info: advisory / low-noise heuristics + ["GCI0042"] = RuleSeverity.Info, + ["GCI0043"] = RuleSeverity.Info, + ["GCI0044"] = RuleSeverity.Info, + ["GCI0045"] = RuleSeverity.Info, + ["GCI0046"] = RuleSeverity.Info, + ["GCI0047"] = RuleSeverity.Info, + ["GCI0049"] = RuleSeverity.Info, + ["GCI0056"] = RuleSeverity.Info, + + // None: disabled by default (duplicate coverage elsewhere) + ["GCI0054"] = RuleSeverity.None, // duplicate coverage — see GCI0016 + ["GCI0055"] = RuleSeverity.None, // signature changes — covered by GCI0003 }; /// Returns the built-in default severity for , or if not listed. diff --git a/src/GauntletCI.Core/Model/Finding.cs b/src/GauntletCI.Core/Model/Finding.cs index e2d2b006..9350714b 100644 --- a/src/GauntletCI.Core/Model/Finding.cs +++ b/src/GauntletCI.Core/Model/Finding.cs @@ -40,6 +40,12 @@ public class Finding /// public RuleSeverity Severity { get; set; } = RuleSeverity.Info; + /// + /// When set, the orchestrator uses this instead of the rule-level configured severity. + /// Allows one rule to emit findings at different impact tiers (e.g. GCI0003 compatible vs incompatible signatures). + /// + public RuleSeverity? SeverityOverride { get; set; } + /// The path of the file that contains the finding, if applicable. public string? FilePath { get; set; } diff --git a/src/GauntletCI.Core/Rules/Implementations/GCI0003_BehavioralChangeDetection.cs b/src/GauntletCI.Core/Rules/Implementations/GCI0003_BehavioralChangeDetection.cs index e386bd7c..02c055f0 100644 --- a/src/GauntletCI.Core/Rules/Implementations/GCI0003_BehavioralChangeDetection.cs +++ b/src/GauntletCI.Core/Rules/Implementations/GCI0003_BehavioralChangeDetection.cs @@ -156,7 +156,8 @@ private void CheckLogicRemovedWithoutTests(DiffContext diff, List findi evidence: $"Removed logic: {string.Join(" | ", examples)}", whyItMatters: "Removing control-flow logic without updating tests may silently break behaviour that was previously covered.", suggestedAction: "Add or update tests to verify the removed logic paths are intentionally no longer needed.", - confidence: Confidence.Low)); + confidence: Confidence.Low, + severityOverride: RuleSeverity.Warn)); } } @@ -208,7 +209,8 @@ private void CheckMethodSignatureChanges(DiffContext diff, List finding crossSummary: (total, fcount) => $"{total} method signatures changed (incompatible) across {fcount} files", whyItMatters: "Signature changes can break callers that haven't been updated.", suggestedAction: "Verify all callers are updated and consider adding an overload for backward compatibility.", - confidence: Confidence.Medium); + confidence: Confidence.Medium, + severityOverride: RuleSeverity.Block); EmitSigFindings(findings, fileCompatible, single1Summary: (name, file) => $"Backward-compatible signature extension: '{name}' in {file.NewPath}", @@ -216,7 +218,8 @@ private void CheckMethodSignatureChanges(DiffContext diff, List finding crossSummary: (total, fcount) => $"{total} backward-compatible signature extensions across {fcount} files", whyItMatters: "New parameters have default values (backward-compatible), but callers using positional arguments may need review.", suggestedAction: "Confirm all existing callers still compile and behave correctly with the new defaults.", - confidence: Confidence.Low); + confidence: Confidence.Low, + severityOverride: RuleSeverity.Info); } private void EmitSigFindings( @@ -227,7 +230,8 @@ private void EmitSigFindings( Func crossSummary, string whyItMatters, string suggestedAction, - Confidence confidence) + Confidence confidence, + RuleSeverity? severityOverride = null) { if (perFile.Count == 0) return; @@ -246,7 +250,7 @@ private void EmitSigFindings( var evidence = items.Count == 1 ? $"Was: {firstRemoved.Content.Trim()} | Now: {firstAdded.Content.Trim()}" : $"Changed: {names} | e.g. Was: {firstRemoved.Content.Trim()} | Now: {firstAdded.Content.Trim()}"; - findings.Add(CreateFinding(file, summary, evidence, whyItMatters, suggestedAction, adjustedConfidence, firstAdded)); + findings.Add(CreateFinding(file, summary, evidence, whyItMatters, suggestedAction, adjustedConfidence, firstAdded, severityOverride)); } } else @@ -258,7 +262,8 @@ private void EmitSigFindings( evidence: $"Files: {fileList}", whyItMatters: whyItMatters, suggestedAction: suggestedAction, - confidence: confidence)); + confidence: confidence, + severityOverride: severityOverride)); } } diff --git a/src/GauntletCI.Core/Rules/Implementations/GCI0004_BreakingChangeRisk.cs b/src/GauntletCI.Core/Rules/Implementations/GCI0004_BreakingChangeRisk.cs index 71e72abb..b670cd57 100644 --- a/src/GauntletCI.Core/Rules/Implementations/GCI0004_BreakingChangeRisk.cs +++ b/src/GauntletCI.Core/Rules/Implementations/GCI0004_BreakingChangeRisk.cs @@ -99,7 +99,8 @@ private void CheckObsoleteRemoved(DiffContext diff, List findings) evidence: $"Removed: {string.Join("; ", removedObsolete.Take(3).Select(l => l.Content.Trim()))}", whyItMatters: "Removing [Obsolete] may indicate unintentional removal of a deprecation guard, or premature deletion of an API still consumed externally.", suggestedAction: "Confirm the member is no longer referenced and remove only after verifying downstream consumers.", - confidence: Confidence.Medium)); + confidence: Confidence.Medium, + severityOverride: RuleSeverity.Block)); } } } diff --git a/src/GauntletCI.Core/Rules/Implementations/GCI0054_AsyncVoidAbuse.cs b/src/GauntletCI.Core/Rules/Implementations/GCI0054_AsyncVoidAbuse.cs index 6dcd9f2e..2b31a67a 100644 --- a/src/GauntletCI.Core/Rules/Implementations/GCI0054_AsyncVoidAbuse.cs +++ b/src/GauntletCI.Core/Rules/Implementations/GCI0054_AsyncVoidAbuse.cs @@ -7,10 +7,9 @@ namespace GauntletCI.Core.Rules.Implementations; /// -/// GCI0054, Async Void Abuse -/// Detects public async methods that return void instead of Task, which prevents callers -/// from awaiting and catching exceptions. Fire-and-forget async methods should only be -/// used in event handlers where Task return is impossible. +/// GCI0054, Async Void Abuse (disabled by default) +/// Detects public async void methods. Disabled via default severity None because GCI0016 covers the same pattern. +/// Re-enable in .gauntletci.json with severity Warn or Block if you want the stricter public-only filter. /// public class GCI0054_AsyncVoidAbuse : RuleBase { diff --git a/src/GauntletCI.Core/Rules/Implementations/GCI0055_MethodSignatureChange.cs b/src/GauntletCI.Core/Rules/Implementations/GCI0055_MethodSignatureChange.cs index 4309df2e..4079eaf7 100644 --- a/src/GauntletCI.Core/Rules/Implementations/GCI0055_MethodSignatureChange.cs +++ b/src/GauntletCI.Core/Rules/Implementations/GCI0055_MethodSignatureChange.cs @@ -7,9 +7,9 @@ namespace GauntletCI.Core.Rules.Implementations; /// -/// GCI0055, Method Signature Change Risk -/// Detects breaking method signature changes: parameter additions without defaults, -/// parameter type changes, and return type changes in public methods. +/// GCI0055, Method Signature Change Risk (disabled by default) +/// Regex-based signature change detection. Disabled via default severity None because GCI0003 +/// covers incompatible and compatible signature changes with cross-file deduplication. /// public class GCI0055_MethodSignatureChange : RuleBase { diff --git a/src/GauntletCI.Core/Rules/Implementations/GCI0057_BlockingAsyncViolation.cs b/src/GauntletCI.Core/Rules/Implementations/GCI0057_BlockingAsyncViolation.cs index 4d3cbdea..57b22be8 100644 --- a/src/GauntletCI.Core/Rules/Implementations/GCI0057_BlockingAsyncViolation.cs +++ b/src/GauntletCI.Core/Rules/Implementations/GCI0057_BlockingAsyncViolation.cs @@ -7,10 +7,10 @@ namespace GauntletCI.Core.Rules.Implementations; /// -/// GCI0057, Blocking Async Pattern Violations -/// Detects patterns where synchronous/blocking operations are used where async should be used: -/// - .Result, .Wait(), .GetAwaiter().GetResult() on Task operations (blocks thread, causes deadlock) -/// - Synchronous file I/O in production code (File.ReadAllText instead of ReadAllTextAsync) +/// GCI0057, Synchronous File I/O in Production Code +/// Detects synchronous File.* calls that block threads; async variants should be preferred. +/// Blocking async patterns (.Result, .Wait) are handled by GCI0016 and disabled here to avoid duplicate findings. +/// Default severity: Warn. Disabled when superseded — use GCI0016 for blocking-async detection. /// public class GCI0057_BlockingAsyncViolation : RuleBase { @@ -19,16 +19,7 @@ public GCI0057_BlockingAsyncViolation(IPatternProvider patterns) : base(patterns } public override string Id => "GCI0057"; - public override string Name => "Blocking Async Pattern Violation"; - - private static readonly Regex BlockingResultPattern = - new(@"\.\s*Result\s*(?:[;\,\)\]])", RegexOptions.Compiled); - - private static readonly Regex BlockingWaitPattern = - new(@"\.\s*Wait\s*\(\s*(?:\)|[^)]*\))", RegexOptions.Compiled); - - private static readonly Regex BlockingGetResultPattern = - new(@"\.GetAwaiter\s*\(\s*\)\s*\.GetResult\s*\(\s*\)", RegexOptions.Compiled); + public override string Name => "Synchronous File I/O"; private static readonly Regex SyncFileIoPattern = new(@"\bFile\.(ReadAllText|ReadAllLines|WriteAllText|WriteAllLines|Copy|ReadAllBytes|WriteAllBytes)\s*\(", @@ -37,14 +28,10 @@ public GCI0057_BlockingAsyncViolation(IPatternProvider patterns) : base(patterns private static readonly Regex AsyncMethodPattern = new(@"\basync\s+(?:Task|void)", RegexOptions.Compiled); - private static readonly string[] BlockingAsyncExemptFiles = new[] - { + private static readonly string[] SyncFileIoExemptFiles = + [ "Program.cs", "Startup.cs", "AssemblyInfo.cs" - }; - - private static readonly Regex ControllerMethodPattern = - new(@"public\s+(?:async\s+)?(?:Task|IActionResult|void)\s+\w+\s*\(", - RegexOptions.Compiled); + ]; public override Task> EvaluateAsync( AnalysisContext context, CancellationToken ct = default) @@ -54,84 +41,21 @@ public override Task> EvaluateAsync( foreach (var file in diff.Files) { - // Skip test files if (WellKnownPatterns.IsTestFile(file.NewPath)) continue; - CheckBlockingAsyncCalls(file, findings); CheckSyncFileIo(file, findings); } return Task.FromResult(findings); } - private void CheckBlockingAsyncCalls(DiffFile file, List findings) - { - // Pattern A: .Result, .Wait(), .GetAwaiter().GetResult() on Task operations - foreach (var line in file.AddedLines) - { - var content = line.Content; - - // Skip string literals and comments - if (IsInStringOrComment(content)) - continue; - - // Check for .Result pattern - if (BlockingResultPattern.IsMatch(content)) - { - // High confidence if it looks like an async method call chain - if (content.Contains("Async") || content.Contains("Task")) - { - findings.Add(CreateFinding( - file, - summary: "Blocking call on async operation via .Result", - evidence: $"Line {line.LineNumber}: {content.Trim()}", - whyItMatters: ".Result blocks the current thread. In ASP.NET, Blazor, or WPF contexts, this can cause deadlock. The synchronization context needs the blocked thread to execute the continuation.", - suggestedAction: "Use await instead of .Result. If blocking is truly necessary, add a code comment explaining why and consider using .GetAwaiter().GetResult() with explicit intent.", - confidence: Confidence.High, - line: line)); - } - } - - // Check for .Wait() pattern - if (BlockingWaitPattern.IsMatch(content)) - { - if (content.Contains("Async") || content.Contains("Task")) - { - findings.Add(CreateFinding( - file, - summary: "Blocking call on async operation via .Wait()", - evidence: $"Line {line.LineNumber}: {content.Trim()}", - whyItMatters: ".Wait() blocks the current thread. In async contexts this can cause deadlock.", - suggestedAction: "Use await instead of .Wait().", - confidence: Confidence.High, - line: line)); - } - } - - // Check for .GetAwaiter().GetResult() pattern - if (BlockingGetResultPattern.IsMatch(content)) - { - findings.Add(CreateFinding( - file, - summary: "Blocking call on async operation via .GetAwaiter().GetResult()", - evidence: $"Line {line.LineNumber}: {content.Trim()}", - whyItMatters: ".GetAwaiter().GetResult() blocks the current thread and can cause deadlock in async contexts.", - suggestedAction: "Use await instead of .GetAwaiter().GetResult(). Only use this pattern in specific scenarios where blocking is unavoidable, with explicit justification.", - confidence: Confidence.High, - line: line)); - } - } - } - private void CheckSyncFileIo(DiffFile file, List findings) { - // Skip infrastructure files where some blocking I/O is acceptable var fileName = Path.GetFileName(file.NewPath); - if (BlockingAsyncExemptFiles.Any(f => f.Equals(fileName, StringComparison.OrdinalIgnoreCase))) + if (SyncFileIoExemptFiles.Any(f => f.Equals(fileName, StringComparison.OrdinalIgnoreCase))) return; - // Pattern B: Synchronous file I/O var allLines = file.Hunks.SelectMany(h => h.Lines).ToList(); foreach (var line in file.AddedLines) @@ -139,7 +63,6 @@ private void CheckSyncFileIo(DiffFile file, List findings) if (!SyncFileIoPattern.IsMatch(line.Content)) continue; - // Skip if in string/comment if (IsInStringOrComment(line.Content)) continue; @@ -148,11 +71,8 @@ private void CheckSyncFileIo(DiffFile file, List findings) continue; var method = match.Groups[1].Value; - - // Determine confidence based on context var confidence = DetermineFileIoConfidence(allLines, line); - // File.Copy has no direct async equivalent, mention streams var suggestion = method.Equals("Copy", StringComparison.OrdinalIgnoreCase) ? "For large files, consider using Stream.CopyToAsync() instead of File.Copy()." : $"Use await File.{method}Async(...) instead."; @@ -170,7 +90,6 @@ private void CheckSyncFileIo(DiffFile file, List findings) private static Confidence DetermineFileIoConfidence(List allLines, DiffLine targetLine) { - // Find the index of the target line by content comparison (safer than reference equality) var lineIndex = -1; for (int idx = 0; idx < allLines.Count; idx++) { @@ -182,37 +101,26 @@ private static Confidence DetermineFileIoConfidence(List allLines, Dif } if (lineIndex < 0) - return Confidence.Medium; // Conservative default if line not found + return Confidence.Medium; - // Look backwards to see if we're in an async method (30-line window) for (int i = lineIndex; i >= 0 && i >= lineIndex - 30; i--) { var prevLine = allLines[i].Content; if (AsyncMethodPattern.IsMatch(prevLine)) return Confidence.High; if (prevLine.Contains("public") || prevLine.Contains("private")) - break; // Reached method boundary + return Confidence.Medium; } - // In non-async context, it's still problematic but lower confidence return Confidence.Medium; } private static bool IsInStringOrComment(string line) { - // Simple heuristic: skip if line is a comment var trimmed = line.Trim(); if (trimmed.StartsWith("//") || trimmed.StartsWith("/*") || trimmed.StartsWith("*")) return true; - // Check if pattern appears before first string literal - // This is simplified and may have false negatives - var quoteIndex = line.IndexOf('"'); - if (quoteIndex == -1) - return false; // No quotes, pattern is likely code - - // If pattern appears after a quote, it might be in a string - // This is a heuristic and imperfect return false; } } diff --git a/src/GauntletCI.Core/Rules/RuleBase.cs b/src/GauntletCI.Core/Rules/RuleBase.cs index 93fbc066..b872889b 100644 --- a/src/GauntletCI.Core/Rules/RuleBase.cs +++ b/src/GauntletCI.Core/Rules/RuleBase.cs @@ -52,7 +52,8 @@ protected Finding CreateFinding( string evidence, string whyItMatters, string suggestedAction, - Confidence confidence) + Confidence confidence, + RuleSeverity? severityOverride = null) { var ruleId = RuleIdentifier.TryParse(Id, out var parsed) ? parsed.ToString() : Id; return new() @@ -63,7 +64,8 @@ protected Finding CreateFinding( Evidence = evidence, WhyItMatters = whyItMatters, SuggestedAction = suggestedAction, - Confidence = confidence + Confidence = confidence, + SeverityOverride = severityOverride, }; } @@ -85,7 +87,8 @@ protected Finding CreateFinding( string whyItMatters, string suggestedAction, Confidence confidence, - DiffLine? line = null) + DiffLine? line = null, + RuleSeverity? severityOverride = null) { var ruleId = RuleIdentifier.TryParse(Id, out var parsed) ? parsed.ToString() : Id; return new() @@ -99,6 +102,7 @@ protected Finding CreateFinding( Confidence = confidence, FilePath = file.NewPath, Line = (line?.LineNumber > 0) ? line.LineNumber : null, + SeverityOverride = severityOverride, }; } } diff --git a/src/GauntletCI.Core/Rules/RuleOrchestrator.cs b/src/GauntletCI.Core/Rules/RuleOrchestrator.cs index 602c2ca2..9916e4c5 100644 --- a/src/GauntletCI.Core/Rules/RuleOrchestrator.cs +++ b/src/GauntletCI.Core/Rules/RuleOrchestrator.cs @@ -194,7 +194,8 @@ public async Task RunAsync( try { var findings = await rule.EvaluateAsync(context, ruleCts.Token).ConfigureAwait(false); - foreach (var f in findings) f.Severity = severity; + foreach (var f in findings) + f.Severity = f.SeverityOverride ?? severity; allFindings.AddRange(findings); if (findings.Count > 0) outcome = RuleOutcome.Triggered; } diff --git a/src/GauntletCI.Tests/ConfigurationServiceTests.cs b/src/GauntletCI.Tests/ConfigurationServiceTests.cs index 5c0c87ae..a69f9da3 100644 --- a/src/GauntletCI.Tests/ConfigurationServiceTests.cs +++ b/src/GauntletCI.Tests/ConfigurationServiceTests.cs @@ -11,11 +11,18 @@ public class ConfigurationServiceTests [Theory] [InlineData("GCI0001", RuleSeverity.Warn)] [InlineData("GCI0003", RuleSeverity.Block)] + [InlineData("GCI0004", RuleSeverity.Warn)] [InlineData("GCI0012", RuleSeverity.Block)] + [InlineData("GCI0020", RuleSeverity.Block)] + [InlineData("GCI0032", RuleSeverity.Warn)] [InlineData("GCI0039", RuleSeverity.Block)] [InlineData("GCI0006", RuleSeverity.Warn)] [InlineData("GCI0035", RuleSeverity.Warn)] [InlineData("GCI0041", RuleSeverity.Warn)] + [InlineData("GCI0048", RuleSeverity.Warn)] + [InlineData("GCI0054", RuleSeverity.None)] + [InlineData("GCI0055", RuleSeverity.None)] + [InlineData("GCI0057", RuleSeverity.Warn)] [InlineData("GCI0099", RuleSeverity.Info)] // unknown → Info public void GetEffectiveSeverity_NoConfig_ReturnsDefault(string ruleId, RuleSeverity expected) { diff --git a/src/GauntletCI.Tests/OrchestratorTests.cs b/src/GauntletCI.Tests/OrchestratorTests.cs index 31b3d8aa..5561306b 100644 --- a/src/GauntletCI.Tests/OrchestratorTests.cs +++ b/src/GauntletCI.Tests/OrchestratorTests.cs @@ -21,7 +21,7 @@ index abc..def 100644 +var x = 1; """); var result = await orchestrator.RunAsync(diff); - Assert.Equal(36, result.RulesEvaluated); + Assert.Equal(34, result.RulesEvaluated); } [Fact] @@ -39,7 +39,7 @@ index abc..def 100644 """); var result = await orchestrator.RunAsync(diff); Assert.NotNull(result); - Assert.Equal(36, result.RulesEvaluated); + Assert.Equal(34, result.RulesEvaluated); } [Fact] diff --git a/src/GauntletCI.Tests/Phase5IntegrationTests.cs b/src/GauntletCI.Tests/Phase5IntegrationTests.cs index 6c054068..fb917c28 100644 --- a/src/GauntletCI.Tests/Phase5IntegrationTests.cs +++ b/src/GauntletCI.Tests/Phase5IntegrationTests.cs @@ -39,7 +39,7 @@ public class AuthService // Verify that behavioral change detection ran and context signals were evaluated Assert.NotNull(result); - Assert.Equal(36, result.RulesEvaluated); + Assert.Equal(34, result.RulesEvaluated); } [Fact] @@ -108,7 +108,7 @@ public class Service var result = await orchestrator.RunAsync(diff); Assert.NotNull(result); - Assert.Equal(36, result.RulesEvaluated); + Assert.Equal(34, result.RulesEvaluated); } [Fact] @@ -186,9 +186,9 @@ public async Task FetchAsync(string url) } [Fact] - public async Task Orchestrator_AllRulesPresent_36RulesEvaluated() + public async Task Orchestrator_AllRulesPresent_34EnabledRulesEvaluated() { - // Verify Phase 6 brought total to 36 rules (GCI0056 and GCI0057 added) + // 36 rule implementations; GCI0054 and GCI0055 disabled by default (duplicate coverage). var orchestrator = RuleOrchestrator.CreateDefault(); var cleanDiff = DiffParser.Parse(""" diff --git a/src/Clean.cs b/src/Clean.cs @@ -201,6 +201,6 @@ index abc..def 100644 """); var result = await orchestrator.RunAsync(cleanDiff); - Assert.Equal(36, result.RulesEvaluated); + Assert.Equal(34, result.RulesEvaluated); } } diff --git a/src/GauntletCI.Tests/Rules/GCI0003Tests.cs b/src/GauntletCI.Tests/Rules/GCI0003Tests.cs index 054ed2cc..fb6474e3 100644 --- a/src/GauntletCI.Tests/Rules/GCI0003Tests.cs +++ b/src/GauntletCI.Tests/Rules/GCI0003Tests.cs @@ -155,6 +155,7 @@ index abc..def 100644 var f = Assert.Single(findings, f => f.Summary.Contains("Backward-compatible", StringComparison.Ordinal)); Assert.Equal(Confidence.Low, f.Confidence); + Assert.Equal(RuleSeverity.Info, f.SeverityOverride); } [Fact] @@ -175,6 +176,7 @@ index abc..def 100644 var f = Assert.Single(findings, f => f.Summary.Contains("signature changed")); Assert.Equal(Confidence.Medium, f.Confidence); + Assert.Equal(RuleSeverity.Block, f.SeverityOverride); } [Fact] diff --git a/tests/GauntletCI.Core.Tests/Rules/GCI0057_BlockingAsyncViolationTests.cs b/tests/GauntletCI.Core.Tests/Rules/GCI0057_BlockingAsyncViolationTests.cs index 405711a9..b01c0f86 100644 --- a/tests/GauntletCI.Core.Tests/Rules/GCI0057_BlockingAsyncViolationTests.cs +++ b/tests/GauntletCI.Core.Tests/Rules/GCI0057_BlockingAsyncViolationTests.cs @@ -9,14 +9,9 @@ namespace GauntletCI.Core.Tests.Rules; public class GCI0057_BlockingAsyncViolationTests { - private GCI0057_BlockingAsyncViolation _rule; + private readonly GCI0057_BlockingAsyncViolation _rule = new(new DefaultPatternProvider()); - public GCI0057_BlockingAsyncViolationTests() - { - _rule = new GCI0057_BlockingAsyncViolation(new DefaultPatternProvider()); - } - - private DiffContext CreateDiff(string filePath, params string[] addedLines) + private static DiffContext CreateDiff(string filePath, params string[] addedLines) { var lines = addedLines .Select((content, idx) => new DiffLine @@ -45,62 +40,18 @@ private DiffContext CreateDiff(string filePath, params string[] addedLines) return new DiffContext { Files = new List { file } }; } - private AnalysisContext CreateContext(DiffContext diff) + private static AnalysisContext CreateContext(DiffContext diff) => new() { - return new AnalysisContext - { - EligibleFiles = [], - SkippedFiles = [], - Diff = diff - }; - } + EligibleFiles = [], + SkippedFiles = [], + Diff = diff + }; [Fact] - public async Task Finding_WhenUsingResult() + public async Task NoFinding_WhenBlockingAsyncOnly_GCI0016OwnsThatPattern() { var diff = CreateDiff("src/Service.cs", "var result = GetDataAsync().Result;"); - var context = CreateContext(diff); - - var findings = await _rule.EvaluateAsync(context); - - Assert.NotEmpty(findings); - var finding = findings.First(); - Assert.Equal("GCI0057", finding.RuleId); - Assert.Equal(Confidence.High, finding.Confidence); - } - - [Fact] - public async Task Finding_WhenUsingWait() - { - var diff = CreateDiff("src/Service.cs", "GetDataAsync().Wait();"); - var context = CreateContext(diff); - - var findings = await _rule.EvaluateAsync(context); - - Assert.NotEmpty(findings); - var finding = findings.First(); - Assert.Equal(Confidence.High, finding.Confidence); - } - - [Fact] - public async Task Finding_WhenUsingGetAwaiter() - { - var diff = CreateDiff("src/Service.cs", "var x = GetDataAsync().GetAwaiter().GetResult();"); - var context = CreateContext(diff); - - var findings = await _rule.EvaluateAsync(context); - - Assert.NotEmpty(findings); - } - - [Fact] - public async Task NoFinding_WhenUsingAwait() - { - var diff = CreateDiff("src/Service.cs", "var result = await GetDataAsync();"); - var context = CreateContext(diff); - - var findings = await _rule.EvaluateAsync(context); - + var findings = await _rule.EvaluateAsync(CreateContext(diff)); Assert.Empty(findings); } @@ -108,34 +59,24 @@ public async Task NoFinding_WhenUsingAwait() public async Task Finding_WhenUsingSyncFileRead() { var diff = CreateDiff("src/Service.cs", "var text = File.ReadAllText(path);"); - var context = CreateContext(diff); - - var findings = await _rule.EvaluateAsync(context); - + var findings = await _rule.EvaluateAsync(CreateContext(diff)); Assert.NotEmpty(findings); + Assert.Equal("GCI0057", findings[0].RuleId); } [Fact] public async Task NoFinding_WhenInProgramCs() { var diff = CreateDiff("Program.cs", "var text = File.ReadAllText(path);"); - var context = CreateContext(diff); - - var findings = await _rule.EvaluateAsync(context); - - // Infrastructure files like Program.cs are exempt + var findings = await _rule.EvaluateAsync(CreateContext(diff)); Assert.Empty(findings); } [Fact] public async Task NoFinding_WhenInTestFile() { - var diff = CreateDiff("tests/ServiceTests.cs", "var result = GetDataAsync().Result;"); - var context = CreateContext(diff); - - var findings = await _rule.EvaluateAsync(context); - - // Test files are exempt + var diff = CreateDiff("tests/ServiceTests.cs", "var text = File.ReadAllText(path);"); + var findings = await _rule.EvaluateAsync(CreateContext(diff)); Assert.Empty(findings); } @@ -143,10 +84,7 @@ public async Task NoFinding_WhenInTestFile() public async Task Finding_WhenUsingSyncFileWrite() { var diff = CreateDiff("src/Service.cs", "File.WriteAllText(path, content);"); - var context = CreateContext(diff); - - var findings = await _rule.EvaluateAsync(context); - + var findings = await _rule.EvaluateAsync(CreateContext(diff)); Assert.NotEmpty(findings); } @@ -154,24 +92,7 @@ public async Task Finding_WhenUsingSyncFileWrite() public async Task NoFinding_WhenUsingAsyncFileRead() { var diff = CreateDiff("src/Service.cs", "var text = await File.ReadAllTextAsync(path);"); - var context = CreateContext(diff); - - var findings = await _rule.EvaluateAsync(context); - + var findings = await _rule.EvaluateAsync(CreateContext(diff)); Assert.Empty(findings); } - - [Fact] - public async Task MultipleFindings_WhenMultipleViolations() - { - var diff = CreateDiff("src/Service.cs", - "var result = GetDataAsync().Result;", - "var text = File.ReadAllText(path);"); - var context = CreateContext(diff); - - var findings = await _rule.EvaluateAsync(context); - - Assert.True(findings.Count >= 2); - } } -