diff --git a/docs/GUARDLINK_REFERENCE.md b/docs/GUARDLINK_REFERENCE.md index d597a3f..63c5682 100644 --- a/docs/GUARDLINK_REFERENCE.md +++ b/docs/GUARDLINK_REFERENCE.md @@ -59,15 +59,107 @@ Append after severity: `cwe:CWE-89`, `owasp:A03:2021`, `capec:CAPEC-66`, `attack | Proprietary algorithm | `@shield:begin` ... `@shield:end` | | Unsure which annotation | `@comment -- "describe what you see"` | -## Commands +## CLI Commands ```bash -guardlink validate . # Check for errors -guardlink report . # Generate threat-model.md -guardlink status . # Coverage summary -guardlink suggest # Get annotation suggestions +# Core +guardlink init [dir] # Initialize .guardlink/ and agent instruction files +guardlink parse [dir] # Parse annotations → ThreatModel JSON +guardlink status [dir] # Risk grade + coverage summary +guardlink validate [dir] [--strict] # Syntax errors, dangling refs, unmitigated exposures + +# Reports & Export +guardlink report [dir] # Generate threat-model.md + optional JSON +guardlink dashboard [dir] # Interactive HTML dashboard with Mermaid diagrams +guardlink sarif [dir] [-o file] # SARIF 2.1.0 for GitHub Advanced Security / VS Code +guardlink diff [ref] # Compare threat model against a git ref (default: HEAD~1) + +# AI-Powered Analysis +guardlink threat-report # AI threat report (see frameworks below) +guardlink threat-reports # List saved threat reports +guardlink annotate # Launch coding agent to add annotations +guardlink config # Manage LLM provider / CLI agent configuration + +# Interactive +guardlink tui [dir] # Interactive TUI: slash commands + AI chat +guardlink mcp # Start MCP server (stdio) for Claude Code, Cursor, etc. +guardlink gal # Display GAL annotation language quick reference ``` +## Threat Report Frameworks + +```bash +guardlink threat-report stride # STRIDE (Spoofing, Tampering, Repudiation, Info Disclosure, DoS, Elevation) +guardlink threat-report dread # DREAD risk scoring +guardlink threat-report pasta # PASTA (Process for Attack Simulation and Threat Analysis) +guardlink threat-report attacker # Attacker-centric (personas, kill chains, attack trees) +guardlink threat-report rapid # RAPID threat model +guardlink threat-report general # General-purpose comprehensive analysis +guardlink threat-report "" # Custom prompt — any free-text analysis instructions +``` + +## AI Agent Flags + +All AI commands (`threat-report`, `annotate`) support: + +```bash +--claude-code # Run via Claude Code CLI (inline) +--codex # Run via Codex CLI (inline) +--gemini # Run via Gemini CLI (inline) +--cursor # Open Cursor IDE with prompt on clipboard +--windsurf # Open Windsurf IDE with prompt on clipboard +--clipboard # Copy prompt to clipboard only +``` + +Additional `threat-report` flags: + +```bash +--thinking # Enable extended thinking / reasoning mode +--web-search # Enable web search grounding (OpenAI Responses API) +--provider

# Direct API: anthropic, openai, openrouter, deepseek +--model # Override model name +``` + +## TUI Commands + +Run `guardlink tui` for the interactive terminal interface: + +``` +/init [name] Initialize project +/parse Parse annotations, build threat model +/status Risk grade + summary stats +/validate Check for errors + dangling refs +/exposures [--all] List open exposures by severity (--asset --severity --threat --file) +/show Detail view + code context for exposure +/scan Coverage scanner — find unannotated symbols +/assets Asset tree with threat/control counts +/files Annotated file tree with exposure counts +/view Show all annotations in a file with code context +/threat-report AI threat report (frameworks above or custom text) +/threat-reports List saved reports +/annotate Launch coding agent to annotate codebase +/model Set AI provider (API or CLI agent) +/report Generate markdown + JSON report +/dashboard Generate HTML dashboard + open browser +/diff [ref] Compare model vs git ref (default: HEAD~1) +/sarif [-o file] Export SARIF 2.1.0 +/gal GAL annotation language guide +(freeform text) Chat about your threat model with AI +``` + +## Critical Syntax Rules + +1. **@boundary requires TWO assets**: `@boundary between #A and #B` or `@boundary #A | #B`. +2. **@flows is ONE source → ONE target per line**: `@flows -> via `. +3. **@exposes / @mitigates require defined #id refs**: Every `#id` must have a definition in `.guardlink/definitions.*`. +4. **Severity in square brackets**: `[P0]` `[P1]` `[P2]` `[P3]` or `[critical]` `[high]` `[medium]` `[low]`. Goes AFTER the threat ref. +5. **Descriptions in double quotes after --**: `-- "description text here"`. +6. **IDs use parentheses in definitions, hash in references**: Define `(#sqli)`, reference `#sqli`. +7. **Asset references**: Use `#id` or `Dotted.Path` — no spaces or special chars. +8. **External refs space-separated after severity**: `cwe:CWE-89 owasp:A03:2021 capec:CAPEC-66`. +9. **@comment always needs -- and quotes**: `@comment -- "your note here"`. +10. **One annotation per comment line.** Do NOT put two @verbs on the same line. + ## MCP Tools When connected via `.mcp.json`, use: diff --git a/docs/SPEC.md b/docs/SPEC.md index 679b3c6..02318cf 100644 --- a/docs/SPEC.md +++ b/docs/SPEC.md @@ -866,7 +866,57 @@ A conforming GuardLink tool may expose a Model Context Protocol (MCP) server wit MCP integration enables real-time threat model awareness during coding sessions. Tools should support project-scoped MCP configuration (e.g., `.mcp.json` for Claude Code) so that the MCP server can be committed to the repository and automatically available to all developers. -### 8.3. `@shield` Compliance +### 8.3. AI-Powered Threat Analysis + +A conforming Level 4 implementation may provide AI-driven threat analysis that takes the parsed ThreatModel as input and produces structured reports using established threat modeling frameworks: + +| Framework | Description | +|-----------|-------------| +| STRIDE | Spoofing, Tampering, Repudiation, Information Disclosure, Denial of Service, Elevation of Privilege | +| DREAD | Risk scoring: Damage, Reproducibility, Exploitability, Affected Users, Discoverability | +| PASTA | Process for Attack Simulation and Threat Analysis — seven-stage methodology | +| Attacker-centric | Persona-based analysis with kill chains, attack trees, and adversary motivation | +| RAPID | Risk Assessment Process for Informed Decision-making | +| General | Comprehensive analysis combining multiple frameworks as appropriate for the codebase | + +The analysis workflow: +1. The tool serializes the ThreatModel (assets, threats, controls, flows, boundaries, exposures) into a prompt +2. Project context (README, package manifest, directory structure) is included for architecture understanding +3. Code snippets from annotated locations are extracted to provide real implementation context +4. The prompt is sent to an LLM (via direct API or CLI agent) with framework-specific system instructions +5. The AI reads the actual source files, cross-references annotations with code, and produces a structured report +6. Reports are saved as timestamped markdown files in `.guardlink/threat-reports/` + +Analysis can be performed through multiple execution paths: +- **Direct API**: Streaming LLM calls via Anthropic, OpenAI, OpenRouter, DeepSeek, or Ollama +- **CLI Agents**: Inline execution via Claude Code, Codex CLI, or Gemini CLI (the agent reads the codebase directly) +- **IDE Agents**: Prompt copied to clipboard for Cursor, Windsurf, or other IDE-integrated assistants + +Additional analysis capabilities: +- **Extended thinking / reasoning mode**: Enables chain-of-thought reasoning for deeper analysis +- **Web search grounding**: Augments analysis with real-time CVE, advisory, and vulnerability data +- **Custom prompts**: Free-text analysis instructions for domain-specific or mixed-framework analysis + +### 8.4. Interactive Dashboard + +A conforming implementation may generate an interactive HTML dashboard that visualizes the threat model. The dashboard should include: +- Risk grade and severity breakdown +- Asset graph with threat/control relationships +- Mermaid-based data flow diagrams generated from `@flows` and `@boundary` annotations +- Exposure triage view with severity filtering +- Annotation coverage statistics +- Integrated AI threat report summaries (loaded from `.guardlink/threat-reports/`) + +### 8.5. Interactive TUI + +A conforming implementation may provide an interactive terminal interface (TUI) that combines: +- Slash commands for all CLI operations (`/parse`, `/status`, `/validate`, `/exposures`, etc.) +- Freeform AI chat for conversational threat model exploration +- Exposure triage workflow (`/exposures` → `/show ` for detail + code context) +- Coverage scanning (`/scan`) to identify unannotated security-relevant symbols +- Integrated AI provider configuration (`/model`) supporting both direct API and CLI agent modes + +### 8.6. `@shield` Compliance AI tools claiming GuardLink Level 4 conformance (§9) must implement `@shield` exclusion as defined in §3.4. This is a compliance requirement for GuardLink-aware AI integrations. Code marked with `@shield` contains content the developer has explicitly decided should not be processed by external AI systems. @@ -905,9 +955,12 @@ A Level 3 conforming implementation (includes Level 2) additionally: ### Level 4: AI-Integrated A Level 4 conforming implementation (includes Level 3) additionally: -- Respects `@shield` exclusion markers (§3.4, §8.3) +- Respects `@shield` exclusion markers (§3.4, §8.6) - Provides MCP server integration (§8.2) or equivalent - Supports AI-assisted annotation generation +- May provide AI-powered threat analysis with framework-specific reports (§8.3) +- May provide interactive dashboard visualization (§8.4) +- May provide interactive TUI with exposure triage and AI chat (§8.5) ### Conformance Testing diff --git a/src/agents/config.ts b/src/agents/config.ts index 22bd939..5900376 100644 --- a/src/agents/config.ts +++ b/src/agents/config.ts @@ -24,13 +24,21 @@ interface SavedConfig { apiKey?: string; aiMode?: 'cli-agent' | 'api'; cliAgent?: string; + /** Enable extended thinking (Anthropic) / reasoning (DeepSeek) */ + extendedThinking?: boolean; + /** Enable web search grounding (OpenAI Responses API) */ + webSearch?: boolean; + /** Response format: 'text' or 'json' */ + responseFormat?: 'text' | 'json'; } const DEFAULT_MODELS: Record = { - anthropic: 'claude-sonnet-4-5-20250929', - openai: 'gpt-4o', - openrouter: 'anthropic/claude-sonnet-4-5-20250929', + anthropic: 'claude-sonnet-4-6', + openai: 'gpt-5.2', + google: 'gemini-2.5-flash', + openrouter: 'anthropic/claude-sonnet-4-6', deepseek: 'deepseek-chat', + ollama: 'llama3.2', }; const CONFIG_FILE = 'config.json'; @@ -87,7 +95,7 @@ export function resolveConfig( const provider = flags.provider as LLMProvider; return { provider, - model: flags.model || DEFAULT_MODELS[provider] || 'gpt-4o', + model: flags.model || DEFAULT_MODELS[provider] || 'gpt-5.2', apiKey: flags.apiKey, }; } @@ -118,6 +126,7 @@ export function resolveConfig( provider: projectCfg.provider, model: flags?.model || projectCfg.model || DEFAULT_MODELS[projectCfg.provider], apiKey: projectCfg.apiKey, + ...savedConfigExtras(projectCfg), }; } @@ -128,17 +137,29 @@ export function resolveConfig( provider: globalCfg.provider, model: flags?.model || globalCfg.model || DEFAULT_MODELS[globalCfg.provider], apiKey: globalCfg.apiKey, + ...savedConfigExtras(globalCfg), }; } return null; } +/** Extract optional LLM config extras from saved config */ +function savedConfigExtras(cfg: SavedConfig): Partial { + const extras: Partial = {}; + if (cfg.extendedThinking) extras.extendedThinking = true; + if (cfg.webSearch) extras.webSearch = true; + if (cfg.responseFormat) extras.responseFormat = cfg.responseFormat; + return extras; +} + /** Resolve from provider-specific env vars (ANTHROPIC_API_KEY, etc.) */ function resolveFromEnv(modelOverride?: string): LLMConfig | null { const checks: [string, LLMProvider][] = [ ['ANTHROPIC_API_KEY', 'anthropic'], ['OPENAI_API_KEY', 'openai'], + ['GOOGLE_API_KEY', 'google'], + ['GEMINI_API_KEY', 'google'], ['OPENROUTER_API_KEY', 'openrouter'], ['DEEPSEEK_API_KEY', 'deepseek'], ]; @@ -160,6 +181,7 @@ function detectProviderFromKey(key: string): LLMProvider | null { if (key.startsWith('sk-ant-')) return 'anthropic'; if (key.startsWith('sk-or-')) return 'openrouter'; if (key.startsWith('sk-')) return 'openai'; // OpenAI uses sk- prefix + if (key.startsWith('AIza')) return 'google'; // Google API keys start with AIza return null; // Can't detect — need GUARDLINK_LLM_PROVIDER } @@ -203,6 +225,8 @@ export function describeConfigSource( if (process.env.GUARDLINK_LLM_KEY) return 'GUARDLINK_LLM_KEY env var'; if (process.env.ANTHROPIC_API_KEY) return 'ANTHROPIC_API_KEY env var'; if (process.env.OPENAI_API_KEY) return 'OPENAI_API_KEY env var'; + if (process.env.GOOGLE_API_KEY) return 'GOOGLE_API_KEY env var'; + if (process.env.GEMINI_API_KEY) return 'GEMINI_API_KEY env var'; if (process.env.OPENROUTER_API_KEY) return 'OPENROUTER_API_KEY env var'; if (process.env.DEEPSEEK_API_KEY) return 'DEEPSEEK_API_KEY env var'; const pc = readJsonFile(projectConfigPath(root)); diff --git a/src/agents/index.ts b/src/agents/index.ts index 0783cbd..47dacc6 100644 --- a/src/agents/index.ts +++ b/src/agents/index.ts @@ -45,5 +45,6 @@ export function agentFromOpts(opts: Record): AgentEntry | null { return null; } -export { launchAgentForeground, launchAgentIDE, launchAgent, copyToClipboard } from './launcher.js'; +export { launchAgentForeground, launchAgentIDE, launchAgent, launchAgentInline, copyToClipboard } from './launcher.js'; +export type { InlineResult } from './launcher.js'; export { buildAnnotatePrompt } from './prompts.js'; diff --git a/src/agents/launcher.ts b/src/agents/launcher.ts index e7017e0..cdc8334 100644 --- a/src/agents/launcher.ts +++ b/src/agents/launcher.ts @@ -17,8 +17,11 @@ * @comment -- "copyToClipboard uses platform-specific clipboard commands (pbcopy, xclip, clip)" */ -import { spawnSync } from 'node:child_process'; +import { spawnSync, spawn } from 'node:child_process'; import { platform } from 'node:os'; +import { mkdtempSync, readFileSync, unlinkSync, existsSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; import type { AgentEntry } from './index.js'; // ─── Clipboard ─────────────────────────────────────────────────────── @@ -135,6 +138,151 @@ export function launchAgentIDE(agent: AgentEntry, cwd: string): { } } +// ─── Inline (non-interactive) agent execution ──────────────────────── + +export interface InlineResult { + content: string; + error?: string; +} + +/** + * CLI agent command + args for non-interactive (print) mode. + * claude: `claude -p "" --dangerously-skip-permissions ...` + * codex: `codex exec "" --dangerously-bypass-approvals-and-sandbox --color never -o ` + * gemini: `gemini --prompt "" --approval-mode yolo` + * + * For codex, we use `-o ` to capture the final agent message to a file, + * which avoids any TTY/streaming issues. The tmpfile path is passed separately. + */ +function buildInlineArgs(agentId: string, prompt: string, codexOutputFile?: string): string[] | null { + switch (agentId) { + case 'claude-code': + return [ + '-p', prompt, + '--dangerously-skip-permissions', + '--allowedTools', 'Read,Bash(cat *),Bash(find *),Bash(head *),Bash(tail *)', + '--output-format', 'text', + ]; + case 'codex': + // `codex exec` runs non-interactively (no TTY needed). + // --color never: suppress ANSI escape codes in output. + // -o : write the final agent message to a file for clean extraction. + // --skip-git-repo-check: allow running outside a git repo. + return [ + 'exec', prompt, + '--dangerously-bypass-approvals-and-sandbox', + '--color', 'never', + '--skip-git-repo-check', + ...(codexOutputFile ? ['-o', codexOutputFile] : []), + ]; + case 'gemini': + return [ + '--prompt', prompt, + '--approval-mode', 'yolo', + ]; + default: + return null; + } +} + +/** + * Run a CLI agent inline (non-interactive) and stream output. + * + * Instead of taking over the terminal, this spawns the agent with + * a print-mode flag and streams stdout back via onChunk. + * Returns the full collected output when done. + */ +export async function launchAgentInline( + agent: AgentEntry, + prompt: string, + cwd: string, + onChunk?: (text: string) => void, + opts?: { autoYes?: boolean } +): Promise { + if (!agent.cmd) { + return { content: '', error: `${agent.name} is not a terminal agent — cannot run inline` }; + } + + let cmd = agent.cmd; + let args = buildInlineArgs(agent.id, prompt); + if (!args) { + return { content: '', error: `Inline mode not supported for ${agent.name}` }; + } + + return new Promise((resolve) => { + try { + // For Codex: use `codex exec` which is designed for non-interactive/headless use. + // It does NOT require a TTY for stdin or stdout. + // We use -o so the final agent message is written to a file we can read + // back cleanly, avoiding any streaming/buffering issues with the live output. + let codexOutputFile: string | undefined; + if (agent.id === 'codex') { + const tmpDir = mkdtempSync(join(tmpdir(), 'guardlink-codex-')); + codexOutputFile = join(tmpDir, 'output.md'); + } + + args = buildInlineArgs(agent.id, prompt, codexOutputFile) as string[]; + + // Claude Code and Gemini still need stdin to be a real TTY (they check isatty(stdin)). + // Codex exec does not — it reads the prompt from the CLI arg, not stdin. + const stdinMode = agent.id === 'codex' ? 'pipe' : 'inherit'; + + const child = spawn(cmd, args, { + cwd, + stdio: [stdinMode, 'pipe', 'pipe'], + env: { ...process.env, NO_COLOR: '1' }, + }); + + // For codex, close stdin immediately so it knows there's no interactive input. + if (agent.id === 'codex') { + child.stdin?.end(); + } + + let content = ''; + let stderr = ''; + + child.stdout?.on('data', (data: Buffer) => { + const text = data.toString(); + content += text; + if (onChunk) onChunk(text); + }); + + child.stderr?.on('data', (data: Buffer) => { + stderr += data.toString(); + }); + + child.on('error', (err: Error) => { + const msg = (err as any).code === 'ENOENT' + ? `${agent.name} (${agent.cmd}) not found. Install it first.` + : `Failed to launch ${agent.name}: ${err.message}`; + resolve({ content, error: msg }); + }); + + child.on('close', (code: number | null) => { + // For codex, prefer the -o output file (final agent message) over streamed stdout. + if (codexOutputFile && existsSync(codexOutputFile)) { + try { + const fileContent = readFileSync(codexOutputFile, 'utf-8').trim(); + unlinkSync(codexOutputFile); + if (fileContent) { + resolve({ content: fileContent }); + return; + } + } catch { /* fall through to stdout content */ } + } + + if (code !== 0 && code !== null && !content) { + resolve({ content, error: `${agent.name} exited with code ${code}${stderr ? ': ' + stderr.slice(0, 200) : ''}` }); + } else { + resolve({ content }); + } + }); + } catch (err: any) { + resolve({ content: '', error: `Failed to launch ${agent.name}: ${err.message}` }); + } + }); +} + // ─── Unified agent launch ──────────────────────────────────────────── export interface LaunchResult { diff --git a/src/agents/prompts.ts b/src/agents/prompts.ts index c098108..7fa155f 100644 --- a/src/agents/prompts.ts +++ b/src/agents/prompts.ts @@ -72,15 +72,15 @@ export function buildAnnotatePrompt( } // Include unmitigated exposures so agent knows what still needs attention + // NOTE: Do NOT filter out @accepts — agents should see ALL exposures without real mitigations const unmitigatedExposures = model.exposures.filter(e => { - return !model.mitigations.some(m => m.asset === e.asset && m.threat === e.threat) - && !model.acceptances.some(a => a.asset === e.asset && a.threat === e.threat); + return !model.mitigations.some(m => m.asset === e.asset && m.threat === e.threat); }); if (unmitigatedExposures.length > 0) { const expLines = unmitigatedExposures.slice(0, 20).map(e => ` ${e.asset} exposed to ${e.threat} [${e.severity || 'unrated'}] (${e.location.file}:${e.location.line})` ); - existingExposures = `\n\nOpen exposures (unmitigated — add @mitigates or @accepts for these):\n${expLines.join('\n')}`; + existingExposures = `\n\nOpen exposures (no mitigation in code — add @mitigates if a control exists, or @audit to flag for human review):\n${expLines.join('\n')}`; if (unmitigatedExposures.length > 20) existingExposures += `\n ... and ${unmitigatedExposures.length - 20} more`; } } @@ -124,7 +124,7 @@ At each boundary crossing and data transformation, ask: - What validation/sanitization exists? (@mitigates) - What sensitive data passes through here? (@handles) - Is there an assumption that could be violated? (@assumes) -- Has the team accepted this risk intentionally? (@accepts) +- Does this need human security review? (@audit) - Is this risk handled by someone else? (@transfers) ### Step 4: Write Coupled Annotation Blocks @@ -226,6 +226,33 @@ Don't rate everything P0. A SQL injection in an admin-only internal tool is diff ### @comment — Always Add Context Every annotation block should include at least one @comment explaining non-obvious security decisions, assumptions, or context that helps future developers (and AI tools) understand the "why". +### @accepts — NEVER USE (Human-Only Decision) +@accepts marks a risk as intentionally unmitigated. This is a **human-only governance decision** — it requires conscious risk ownership by a person or team. +As an AI agent, you MUST NEVER write @accepts annotations. You cannot accept risk on behalf of humans. + +Instead, when you find an exposure with no mitigation in the code: +1. Write the @exposes annotation to document the risk +2. Add @audit to flag it for human security review +3. Add @comment explaining what controls COULD be added +4. Optionally add @assumes to document any assumptions the code makes + +Example — what to do when no mitigation exists: +\`\`\` +// @shield:begin -- "@accepts alternative examples, excluded from parsing" +// +// WRONG (AI rubber-stamping risk): +// @accepts #prompt-injection on #ai-endpoint -- "Relying on model safety filters" +// +// RIGHT (flag for human review): +// @exposes #ai-endpoint to #prompt-injection [P1] cwe:CWE-77 -- "User prompt passed directly to LLM API without sanitization" +// @audit #ai-endpoint -- "No prompt sanitization — needs human review to decide: add input filter or accept risk" +// @comment -- "Potential controls: #prompt-filter (input sanitization), #output-validator (response filtering)" +// +// @shield:end +\`\`\` + +Leaving exposures unmitigated is HONEST. The dashboard and reports will surface them as open risks for humans to triage. + ### @shield — DO NOT USE Unless Explicitly Asked @shield and @shield:begin/@shield:end block AI coding assistants from reading the annotated code. This means any shielded code becomes invisible to AI tools — they cannot analyze, refactor, or annotate it. @@ -250,7 +277,7 @@ Definitions go in .guardlink/definitions.{ts,js,py,rs}. Source files use only re // @shield:begin -- "Relationship syntax examples, excluded from parsing" // @exposes #auth to #sqli [P0] cwe:CWE-89 owasp:A03:2021 -- "User input concatenated into query" // @mitigates #auth against #sqli using #prepared-stmts -- "Uses parameterized queries via sqlx" -// @accepts #timing-attack on #auth -- "Acceptable given bcrypt constant-time comparison" +// @audit #auth -- "Timing attack risk — needs human review to decide if bcrypt constant-time comparison is sufficient" // @transfers #ddos from #api to #cdn -- "Cloudflare handles L7 DDoS mitigation" // @flows req.body.username -> db.query via string-concat -- "User input flows to SQL" // @boundary between #frontend and #api (#web-boundary) -- "TLS-terminated public/private boundary" @@ -310,8 +337,9 @@ Definitions go in .guardlink/definitions.{ts,js,py,rs}. Source files use only re Group related definitions together with section comments. 4. **Annotate in coupled blocks.** For each security-relevant location, write the complete story: - @exposes + @mitigates (or @accepts) + @flows + @comment at minimum. + @exposes + @mitigates (or @audit if no mitigation exists) + @flows + @comment at minimum. Think: "what's the risk, what's the defense, how does data flow here, and what should the next developer know?" + NEVER write @accepts — that is a human-only governance decision. Use @audit to flag unmitigated risks for review. 5. **Use the project's comment style** (// for JS/TS/Go/Rust, # for Python/Ruby/Shell, etc.) diff --git a/src/analyze/index.ts b/src/analyze/index.ts index 28313cc..7a41165 100644 --- a/src/analyze/index.ts +++ b/src/analyze/index.ts @@ -15,13 +15,16 @@ */ import { existsSync, mkdirSync, writeFileSync, readdirSync, readFileSync } from 'node:fs'; -import { join } from 'node:path'; +import { join, relative } from 'node:path'; import type { ThreatModel } from '../types/index.js'; import { type AnalysisFramework, FRAMEWORK_LABELS, FRAMEWORK_PROMPTS, buildUserMessage } from './prompts.js'; import { type LLMConfig, buildConfig, chatCompletion } from './llm.js'; +import { GUARDLINK_TOOLS, createToolExecutor } from './tools.js'; export { type AnalysisFramework, FRAMEWORK_LABELS, FRAMEWORK_PROMPTS, buildUserMessage } from './prompts.js'; export { type LLMConfig, type LLMProvider, buildConfig, autoDetectConfig } from './llm.js'; +export { GUARDLINK_TOOLS, createToolExecutor } from './tools.js'; +export type { ToolDefinition, ToolCall, ToolResult, ToolExecutor } from './llm.js'; // ─── Types ─────────────────────────────────────────────────────────── @@ -33,6 +36,18 @@ export interface ThreatReportOptions { customPrompt?: string; stream?: boolean; onChunk?: (text: string) => void; + /** Max lines of context to include around each annotated line (default: 8) */ + snippetContext?: number; + /** Max total characters for all code snippets combined (default: 40000) */ + snippetBudget?: number; + /** Enable web search grounding (OpenAI Responses API) */ + webSearch?: boolean; + /** Enable extended thinking (Anthropic) / reasoning (DeepSeek) */ + extendedThinking?: boolean; + /** Token budget for thinking (default: 10000) */ + thinkingBudget?: number; + /** Enable agentic tool use (CVE lookup, model validation, codebase search) */ + enableTools?: boolean; } export interface ThreatReportResult { @@ -44,13 +59,270 @@ export interface ThreatReportResult { savedTo?: string; inputTokens?: number; outputTokens?: number; + /** Thinking/reasoning content (if extended thinking was enabled) */ + thinking?: string; + thinkingTokens?: number; +} + +// ─── Project context builder ───────────────────────────────────────── + +/** + * Collect project-level context for the LLM: language/framework, key + * dependencies, and deployment signals (Dockerfile, CI, etc.). + * Keeps output compact — targets ~2-4 KB. + */ +export function buildProjectContext(root: string): string { + const lines: string[] = []; + + // package.json — language, framework, key deps + const pkgPath = join(root, 'package.json'); + if (existsSync(pkgPath)) { + try { + const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8')); + lines.push(`## package.json`); + if (pkg.name) lines.push(`name: ${pkg.name}`); + if (pkg.version) lines.push(`version: ${pkg.version}`); + if (pkg.description) lines.push(`description: ${pkg.description}`); + + const allDeps: Record = { + ...pkg.dependencies, + ...pkg.devDependencies, + }; + if (Object.keys(allDeps).length) { + lines.push(`dependencies (${Object.keys(allDeps).length} total):`); + // Include all deps — LLM needs them to reason about known-vulnerable packages + for (const [name, ver] of Object.entries(allDeps)) { + lines.push(` ${name}: ${ver}`); + } + } + + if (pkg.scripts && Object.keys(pkg.scripts).length) { + lines.push(`scripts: ${Object.keys(pkg.scripts).join(', ')}`); + } + if (pkg.engines) lines.push(`engines: ${JSON.stringify(pkg.engines)}`); + lines.push(''); + } catch { /* skip malformed */ } + } + + // requirements.txt — Python projects + const reqPath = join(root, 'requirements.txt'); + if (existsSync(reqPath)) { + try { + const reqs = readFileSync(reqPath, 'utf-8').trim(); + lines.push('## requirements.txt'); + lines.push(reqs); + lines.push(''); + } catch { /* skip */ } + } + + // pyproject.toml — Python projects + const pyprojectPath = join(root, 'pyproject.toml'); + if (existsSync(pyprojectPath)) { + try { + const content = readFileSync(pyprojectPath, 'utf-8'); + // Extract just the [tool.poetry.dependencies] or [project] section + const depsMatch = content.match(/\[(?:tool\.poetry\.)?dependencies\][\s\S]*?(?=\[|$)/m); + if (depsMatch) { + lines.push('## pyproject.toml (dependencies)'); + lines.push(depsMatch[0].trim()); + lines.push(''); + } + } catch { /* skip */ } + } + + // go.mod — Go projects + const gomodPath = join(root, 'go.mod'); + if (existsSync(gomodPath)) { + try { + const content = readFileSync(gomodPath, 'utf-8'); + lines.push('## go.mod'); + lines.push(content.trim()); + lines.push(''); + } catch { /* skip */ } + } + + // Dockerfile — deployment model + for (const name of ['Dockerfile', 'Dockerfile.prod', 'Dockerfile.production']) { + const dfPath = join(root, name); + if (existsSync(dfPath)) { + try { + const content = readFileSync(dfPath, 'utf-8').trim(); + lines.push(`## ${name}`); + lines.push(content); + lines.push(''); + break; + } catch { /* skip */ } + } + } + + // docker-compose.yml — service topology + for (const name of ['docker-compose.yml', 'docker-compose.yaml', 'compose.yml', 'compose.yaml']) { + const dcPath = join(root, name); + if (existsSync(dcPath)) { + try { + const content = readFileSync(dcPath, 'utf-8').trim(); + lines.push(`## ${name}`); + // Cap at 100 lines to avoid blowing token budget + const dcLines = content.split('\n'); + lines.push(dcLines.slice(0, 100).join('\n')); + if (dcLines.length > 100) lines.push(`... (${dcLines.length - 100} more lines)`); + lines.push(''); + break; + } catch { /* skip */ } + } + } + + // CI config — deployment signals + const ciFiles = [ + '.github/workflows', + '.gitlab-ci.yml', + '.circleci/config.yml', + 'Jenkinsfile', + '.travis.yml', + ]; + for (const ci of ciFiles) { + const ciPath = join(root, ci); + if (existsSync(ciPath)) { + lines.push(`## CI/CD: ${ci} (detected)`); + // Don't include full CI content — just note its presence + } + } + + // .env.example — environment variable signals + for (const name of ['.env.example', '.env.sample', '.env.template']) { + const envPath = join(root, name); + if (existsSync(envPath)) { + try { + const content = readFileSync(envPath, 'utf-8').trim(); + lines.push(`## ${name} (environment variables)`); + lines.push(content); + lines.push(''); + break; + } catch { /* skip */ } + } + } + + return lines.join('\n').trim(); +} + +// ─── Code snippet extractor ────────────────────────────────────────── + +/** + * Extract source code snippets around annotated lines. + * + * For each annotation that has a file + line location, reads the + * surrounding `contextLines` lines from disk and returns a formatted + * block. Deduplicates overlapping ranges within the same file. + * Respects a total character budget to keep token usage bounded. + */ +export function extractCodeSnippets( + root: string, + model: ThreatModel, + contextLines = 8, + budgetChars = 40_000, +): string { + // Collect all (file, line) pairs from security-relevant annotations. + // Prioritize: exposures > mitigations > acceptances > assumptions > flows/boundaries + type AnnotationRef = { file: string; line: number; label: string }; + const refs: AnnotationRef[] = []; + + for (const e of model.exposures) { + refs.push({ file: e.location.file, line: e.location.line, label: `@exposes ${e.asset} to ${e.threat} [${e.severity ?? 'unset'}]` }); + } + for (const m of model.mitigations) { + refs.push({ file: m.location.file, line: m.location.line, label: `@mitigates ${m.asset} against ${m.threat}` }); + } + for (const a of model.acceptances) { + refs.push({ file: a.location.file, line: a.location.line, label: `@accepts ${a.threat} on ${a.asset}` }); + } + for (const a of model.assumptions) { + refs.push({ file: a.location.file, line: a.location.line, label: `@assumes on ${a.asset}` }); + } + for (const b of model.boundaries) { + refs.push({ file: b.location.file, line: b.location.line, label: `@boundary ${b.asset_a} | ${b.asset_b}` }); + } + for (const f of model.flows) { + refs.push({ file: f.location.file, line: f.location.line, label: `@flows ${f.source} -> ${f.target}` }); + } + + // Group by file, merge overlapping line ranges + const byFile = new Map>(); + for (const ref of refs) { + if (!ref.file || !ref.line) continue; + const absFile = ref.file.startsWith('/') ? ref.file : join(root, ref.file); + const start = Math.max(1, ref.line - contextLines); + const end = ref.line + contextLines; + + if (!byFile.has(absFile)) byFile.set(absFile, []); + const ranges = byFile.get(absFile)!; + + // Merge with existing range if overlapping + let merged = false; + for (const r of ranges) { + if (start <= r.end + 1 && end >= r.start - 1) { + r.start = Math.min(r.start, start); + r.end = Math.max(r.end, end); + r.labels.push(ref.label); + merged = true; + break; + } + } + if (!merged) ranges.push({ start, end, labels: [ref.label] }); + } + + const blocks: string[] = []; + let totalChars = 0; + + for (const [absFile, ranges] of byFile) { + if (totalChars >= budgetChars) break; + if (!existsSync(absFile)) continue; + + let fileLines: string[]; + try { + fileLines = readFileSync(absFile, 'utf-8').split('\n'); + } catch { continue; } + + const relPath = relative(root, absFile); + ranges.sort((a, b) => a.start - b.start); + + for (const range of ranges) { + if (totalChars >= budgetChars) break; + + const from = Math.max(0, range.start - 1); + const to = Math.min(fileLines.length, range.end); + const snippet = fileLines.slice(from, to) + .map((l, i) => `${String(from + i + 1).padStart(4)} | ${l}`) + .join('\n'); + + const uniqueLabels = [...new Set(range.labels)]; + const block = `### ${relPath}:${range.start}-${range.end} +// Annotations: ${uniqueLabels.join('; ')} +\`\`\` +${snippet} +\`\`\``; + + if (totalChars + block.length > budgetChars) { + // Include a truncated note and stop + blocks.push(`### ${relPath}:${range.start}-${range.end} +// [snippet omitted — budget exhausted]`); + totalChars = budgetChars; + break; + } + + blocks.push(block); + totalChars += block.length; + } + } + + return blocks.join('\n\n'); } // ─── Serialization ─────────────────────────────────────────────────── /** * Serialize the threat model to a compact representation for LLM context. - * Strips empty arrays and location details to save tokens. + * Includes file:line locations for all security-relevant annotations so + * the LLM can cross-reference with code snippets. */ export function serializeModel(model: ThreatModel): string { const compact: Record = { @@ -62,54 +334,67 @@ export function serializeModel(model: ThreatModel): string { // Only include non-empty sections if (model.assets.length) compact.assets = model.assets.map(a => ({ path: a.path.join('.'), id: a.id, description: a.description, + file: a.location.file, line: a.location.line, })); if (model.threats.length) compact.threats = model.threats.map(t => ({ name: t.name, id: t.id, severity: t.severity, refs: t.external_refs.length ? t.external_refs : undefined, description: t.description, + file: t.location.file, line: t.location.line, })); if (model.controls.length) compact.controls = model.controls.map(c => ({ name: c.name, id: c.id, description: c.description, + file: c.location.file, line: c.location.line, })); if (model.mitigations.length) compact.mitigations = model.mitigations.map(m => ({ asset: m.asset, threat: m.threat, control: m.control, - description: m.description, file: m.location.file, + description: m.description, + file: m.location.file, line: m.location.line, })); if (model.exposures.length) compact.exposures = model.exposures.map(e => ({ asset: e.asset, threat: e.threat, severity: e.severity, refs: e.external_refs.length ? e.external_refs : undefined, - description: e.description, file: e.location.file, + description: e.description, + file: e.location.file, line: e.location.line, })); if (model.acceptances.length) compact.acceptances = model.acceptances.map(a => ({ asset: a.asset, threat: a.threat, description: a.description, + file: a.location.file, line: a.location.line, })); if (model.transfers.length) compact.transfers = model.transfers.map(t => ({ threat: t.threat, source: t.source, target: t.target, + file: t.location.file, line: t.location.line, })); if (model.flows.length) compact.flows = model.flows.map(f => ({ source: f.source, target: f.target, mechanism: f.mechanism, + file: f.location.file, line: f.location.line, })); if (model.boundaries.length) compact.boundaries = model.boundaries.map(b => ({ a: b.asset_a, b: b.asset_b, id: b.id, description: b.description, + file: b.location.file, line: b.location.line, })); if (model.data_handling.length) compact.data_handling = model.data_handling.map(h => ({ classification: h.classification, asset: h.asset, + file: h.location.file, line: h.location.line, })); if (model.assumptions.length) compact.assumptions = model.assumptions.map(a => ({ asset: a.asset, description: a.description, + file: a.location.file, line: a.location.line, })); if (model.comments.length) compact.comments = model.comments.map(c => ({ - description: c.description, file: c.location.file, + description: c.description, file: c.location.file, line: c.location.line, })); if (model.validations.length) compact.validations = model.validations.map(v => ({ control: v.control, asset: v.asset, + file: v.location.file, line: v.location.line, })); - // Coverage summary + // Coverage summary — include unannotated critical symbols so LLM sees gaps compact.coverage = { total_symbols: model.coverage.total_symbols, annotated: model.coverage.annotated_symbols, percent: model.coverage.coverage_percent, + unannotated_critical: model.coverage.unannotated_critical, }; // Unmitigated exposures summary @@ -120,6 +405,7 @@ export function serializeModel(model: ThreatModel): string { if (unmitigated.length) { compact.unmitigated_exposures = unmitigated.map(e => ({ asset: e.asset, threat: e.threat, severity: e.severity, + file: e.location.file, line: e.location.line, })); } @@ -228,16 +514,32 @@ const LEGACY_ANALYSES_DIR = 'analyses'; export async function generateThreatReport(opts: ThreatReportOptions): Promise { const { root, model, framework, llmConfig, customPrompt } = opts; + const snippetContext = opts.snippetContext ?? 8; + const snippetBudget = opts.snippetBudget ?? 40_000; const modelJson = serializeModel(model); + const projectContext = buildProjectContext(root); + const codeSnippets = extractCodeSnippets(root, model, snippetContext, snippetBudget); const systemPrompt = FRAMEWORK_PROMPTS[framework]; - const userMessage = buildUserMessage(modelJson, framework, customPrompt); + const userMessage = buildUserMessage(modelJson, framework, customPrompt, projectContext || undefined, codeSnippets || undefined); const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19); + // Build enhanced config with optional upgrades + const enhancedConfig: LLMConfig = { ...llmConfig }; + if (opts.webSearch) enhancedConfig.webSearch = true; + if (opts.extendedThinking) { + enhancedConfig.extendedThinking = true; + if (opts.thinkingBudget) enhancedConfig.thinkingBudget = opts.thinkingBudget; + } + if (opts.enableTools !== false) { + enhancedConfig.tools = GUARDLINK_TOOLS; + enhancedConfig.toolExecutor = createToolExecutor(root, model); + } + // Call LLM const response = await chatCompletion( - llmConfig, + enhancedConfig, systemPrompt, userMessage, opts.stream ? opts.onChunk : undefined, @@ -281,6 +583,8 @@ annotations: ${model.annotations_parsed} savedTo: `.guardlink/${THREAT_REPORTS_DIR}/${filename}`, inputTokens: response.inputTokens, outputTokens: response.outputTokens, + thinking: response.thinking, + thinkingTokens: response.thinkingTokens, }; } diff --git a/src/analyze/llm.ts b/src/analyze/llm.ts index 94dd0c9..84f1e45 100644 --- a/src/analyze/llm.ts +++ b/src/analyze/llm.ts @@ -2,8 +2,11 @@ * GuardLink Threat Reports — Lightweight LLM client using raw fetch. * * Supports: - * - Anthropic Messages API (claude-sonnet-4-5-20250929, etc.) - * - OpenAI-compatible Chat Completions (GPT-4o, DeepSeek, OpenRouter) + * - Anthropic Messages API (claude-sonnet-4-6, claude-opus-4-6, etc.) with extended thinking + tool use + * - OpenAI Responses API (gpt-5.2, o3, etc.) with web search, tools, structured output + * - Google Gemini API (gemini-2.5-flash, gemini-3-pro, etc.) via OpenAI-compatible endpoint + * - OpenAI-compatible Chat Completions (DeepSeek, OpenRouter, Ollama) + * - DeepSeek reasoning mode (deepseek-reasoner) * * Zero dependencies — uses Node 20+ built-in fetch. * @@ -19,7 +22,42 @@ * @flows External_LLM_APIs -> #llm-client via response -- "Streaming or complete response from LLM provider" */ -export type LLMProvider = 'anthropic' | 'openai' | 'openrouter' | 'deepseek'; +export type LLMProvider = 'anthropic' | 'openai' | 'google' | 'openrouter' | 'deepseek' | 'ollama'; + +// ─── Tool definitions ──────────────────────────────────────────────── + +export interface ToolParameter { + type: string; + description?: string; + enum?: string[]; +} + +export interface ToolDefinition { + name: string; + description: string; + parameters: { + type: 'object'; + properties: Record; + required?: string[]; + additionalProperties?: boolean; + }; +} + +export interface ToolCall { + id: string; + name: string; + arguments: Record; +} + +export interface ToolResult { + id: string; + content: string; +} + +/** Handler that executes a tool call and returns its result string */ +export type ToolExecutor = (name: string, args: Record) => Promise; + +// ─── Config & Response types ───────────────────────────────────────── export interface LLMConfig { provider: LLMProvider; @@ -27,6 +65,21 @@ export interface LLMConfig { apiKey: string; baseUrl?: string; maxTokens?: number; + + /** Enable extended thinking (Anthropic) / reasoning (DeepSeek) */ + extendedThinking?: boolean; + /** Token budget for thinking (default: 10000) */ + thinkingBudget?: number; + /** Enable web search grounding (OpenAI Responses API) */ + webSearch?: boolean; + /** Response format: 'text' (default) or 'json' for structured output */ + responseFormat?: 'text' | 'json'; + /** Tool definitions for function calling */ + tools?: ToolDefinition[]; + /** Tool executor function — required if tools are provided */ + toolExecutor?: ToolExecutor; + /** Max tool-call rounds in agentic loop (default: 5) */ + maxToolRounds?: number; } export interface LLMResponse { @@ -34,55 +87,55 @@ export interface LLMResponse { model: string; inputTokens?: number; outputTokens?: number; + /** Thinking/reasoning content (extended thinking) */ + thinking?: string; + /** Thinking tokens used */ + thinkingTokens?: number; + /** Tool calls made during generation */ + toolCalls?: ToolCall[]; } +// ─── Defaults ──────────────────────────────────────────────────────── + const DEFAULT_MODELS: Record = { - anthropic: 'claude-sonnet-4-5-20250929', - openai: 'gpt-4o', - openrouter: 'anthropic/claude-sonnet-4-5-20250929', + anthropic: 'claude-sonnet-4-6', + openai: 'gpt-5.2', + google: 'gemini-2.5-flash', + openrouter: 'anthropic/claude-sonnet-4-6', deepseek: 'deepseek-chat', + ollama: 'llama3.2', }; const BASE_URLS: Record = { anthropic: 'https://api.anthropic.com', openai: 'https://api.openai.com', + google: 'https://generativelanguage.googleapis.com/v1beta/openai', openrouter: 'https://openrouter.ai/api', deepseek: 'https://api.deepseek.com', + ollama: 'http://localhost:11434', }; +// ─── Auto-detect ───────────────────────────────────────────────────── + /** * Auto-detect provider from environment variables. * Returns null if no API key found. */ export function autoDetectConfig(): LLMConfig | null { - // Priority: Anthropic > OpenAI > OpenRouter > DeepSeek if (process.env.ANTHROPIC_API_KEY) { - return { - provider: 'anthropic', - model: DEFAULT_MODELS.anthropic, - apiKey: process.env.ANTHROPIC_API_KEY, - }; + return { provider: 'anthropic', model: DEFAULT_MODELS.anthropic, apiKey: process.env.ANTHROPIC_API_KEY }; } if (process.env.OPENAI_API_KEY) { - return { - provider: 'openai', - model: DEFAULT_MODELS.openai, - apiKey: process.env.OPENAI_API_KEY, - }; + return { provider: 'openai', model: DEFAULT_MODELS.openai, apiKey: process.env.OPENAI_API_KEY }; } if (process.env.OPENROUTER_API_KEY) { - return { - provider: 'openrouter', - model: DEFAULT_MODELS.openrouter, - apiKey: process.env.OPENROUTER_API_KEY, - }; + return { provider: 'openrouter', model: DEFAULT_MODELS.openrouter, apiKey: process.env.OPENROUTER_API_KEY }; + } + if (process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY) { + return { provider: 'google', model: DEFAULT_MODELS.google, apiKey: (process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY)! }; } if (process.env.DEEPSEEK_API_KEY) { - return { - provider: 'deepseek', - model: DEFAULT_MODELS.deepseek, - apiKey: process.env.DEEPSEEK_API_KEY, - }; + return { provider: 'deepseek', model: DEFAULT_MODELS.deepseek, apiKey: process.env.DEEPSEEK_API_KEY }; } return null; } @@ -95,36 +148,36 @@ export function buildConfig(opts: { model?: string; apiKey?: string; }): LLMConfig | null { - // If provider specified, use it if (opts.provider) { const provider = opts.provider as LLMProvider; const envKeyMap: Record = { anthropic: 'ANTHROPIC_API_KEY', openai: 'OPENAI_API_KEY', openrouter: 'OPENROUTER_API_KEY', + google: 'GOOGLE_API_KEY', deepseek: 'DEEPSEEK_API_KEY', }; const apiKey = opts.apiKey || process.env[envKeyMap[provider] || '']; if (!apiKey) return null; - return { provider, - model: opts.model || DEFAULT_MODELS[provider] || 'gpt-4o', + model: opts.model || DEFAULT_MODELS[provider] || 'gpt-5.2', apiKey, }; } - // Auto-detect const config = autoDetectConfig(); if (!config) return null; - - // Override model if specified if (opts.model) config.model = opts.model; return config; } +// ─── Main entry point ──────────────────────────────────────────────── + /** * Send a message to the LLM and return the response. + * Supports streaming, tool use (agentic loop), extended thinking, + * web search, and structured output. */ export async function chatCompletion( config: LLMConfig, @@ -133,39 +186,114 @@ export async function chatCompletion( onChunk?: (text: string) => void, ): Promise { if (config.provider === 'anthropic') { - return callAnthropic(config, systemPrompt, userMessage, onChunk); + return callAnthropicWithTools(config, systemPrompt, userMessage, onChunk); + } else if (config.provider === 'openai') { + return callOpenAIResponses(config, systemPrompt, userMessage, onChunk); } else { + // Google Gemini, DeepSeek, OpenRouter, Ollama all use OpenAI-compatible Chat Completions return callOpenAICompatible(config, systemPrompt, userMessage, onChunk); } } -// ─── Anthropic Messages API ────────────────────────────────────────── +// ─── Anthropic Messages API (2025) ────────────────────────────────── -async function callAnthropic( +const ANTHROPIC_API_VERSION = '2025-04-14'; + +interface AnthropicRawResponse extends LLMResponse { + _rawContent?: any[]; +} + +/** Wrapper with agentic tool-call loop */ +async function callAnthropicWithTools( config: LLMConfig, systemPrompt: string, userMessage: string, onChunk?: (text: string) => void, ): Promise { + const maxRounds = config.maxToolRounds ?? 5; + let messages: any[] = [{ role: 'user', content: userMessage }]; + const allToolCalls: ToolCall[] = []; + let finalResponse: AnthropicRawResponse | null = null; + + for (let round = 0; round <= maxRounds; round++) { + const response = await callAnthropic(config, systemPrompt, messages, round === 0 ? onChunk : undefined); + + if (response.toolCalls?.length) allToolCalls.push(...response.toolCalls); + + if (!response.toolCalls?.length || !config.toolExecutor) { + finalResponse = response; + break; + } + + // Add assistant response and tool results for next round + messages.push({ role: 'assistant', content: response._rawContent }); + + for (const tc of response.toolCalls) { + let resultText: string; + try { + resultText = await config.toolExecutor(tc.name, tc.arguments); + } catch (err: any) { + resultText = `Error: ${err.message}`; + } + messages.push({ + role: 'user', + content: [{ type: 'tool_result', tool_use_id: tc.id, content: resultText }], + }); + } + } + + if (!finalResponse) throw new Error('Max tool call rounds exceeded'); + finalResponse.toolCalls = allToolCalls.length ? allToolCalls : undefined; + return finalResponse; +} + +async function callAnthropic( + config: LLMConfig, + systemPrompt: string, + messages: any[], + onChunk?: (text: string) => void, +): Promise { const baseUrl = config.baseUrl || BASE_URLS.anthropic; const maxTokens = config.maxTokens || 8192; + const headers: Record = { + 'Content-Type': 'application/json', + 'x-api-key': config.apiKey, + 'anthropic-version': ANTHROPIC_API_VERSION, + }; + + if (config.extendedThinking) { + headers['anthropic-beta'] = 'interleaved-thinking-2025-05-14'; + } + + const body: Record = { + model: config.model, + max_tokens: maxTokens, + system: systemPrompt, + messages, + }; + + if (config.extendedThinking) { + body.thinking = { type: 'enabled', budget_tokens: config.thinkingBudget || 10000 }; + } + + if (config.tools?.length) { + body.tools = config.tools.map(t => ({ + name: t.name, + description: t.description, + input_schema: { + type: 'object', + properties: t.parameters.properties, + required: t.parameters.required, + }, + })); + } + if (onChunk) { - // Streaming + body.stream = true; + const res = await fetch(`${baseUrl}/v1/messages`, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'x-api-key': config.apiKey, - 'anthropic-version': '2023-06-01', - }, - body: JSON.stringify({ - model: config.model, - max_tokens: maxTokens, - system: systemPrompt, - stream: true, - messages: [{ role: 'user', content: userMessage }], - }), + method: 'POST', headers, body: JSON.stringify(body), }); if (!res.ok) { @@ -174,11 +302,16 @@ async function callAnthropic( } let content = ''; + let thinking = ''; let inputTokens = 0; let outputTokens = 0; + const toolCalls: ToolCall[] = []; + let curToolId = ''; + let curToolName = ''; + let curToolArgs = ''; + const reader = res.body?.getReader(); if (!reader) throw new Error('No response body'); - const decoder = new TextDecoder(); let buffer = ''; @@ -186,7 +319,6 @@ async function callAnthropic( const { done, value } = await reader.read(); if (done) break; buffer += decoder.decode(value, { stream: true }); - const lines = buffer.split('\n'); buffer = lines.pop() || ''; @@ -195,37 +327,47 @@ async function callAnthropic( const data = line.slice(6).trim(); if (data === '[DONE]') continue; try { - const event = JSON.parse(data); - if (event.type === 'content_block_delta' && event.delta?.text) { - content += event.delta.text; - onChunk(event.delta.text); + const ev = JSON.parse(data); + + if (ev.type === 'content_block_start' && ev.content_block?.type === 'tool_use') { + curToolId = ev.content_block.id || ''; + curToolName = ev.content_block.name || ''; + curToolArgs = ''; } - if (event.type === 'message_delta' && event.usage) { - outputTokens = event.usage.output_tokens || 0; + if (ev.type === 'content_block_delta') { + if (ev.delta?.type === 'text_delta' && ev.delta?.text) { + content += ev.delta.text; + onChunk(ev.delta.text); + } + if (ev.delta?.type === 'thinking_delta' && ev.delta?.thinking) { + thinking += ev.delta.thinking; + } + if (ev.delta?.type === 'input_json_delta' && ev.delta?.partial_json) { + curToolArgs += ev.delta.partial_json; + } } - if (event.type === 'message_start' && event.message?.usage) { - inputTokens = event.message.usage.input_tokens || 0; + if (ev.type === 'content_block_stop' && curToolId) { + try { + toolCalls.push({ id: curToolId, name: curToolName, arguments: JSON.parse(curToolArgs || '{}') }); + } catch { /* skip */ } + curToolId = ''; curToolName = ''; curToolArgs = ''; } - } catch { /* skip non-JSON lines */ } + if (ev.type === 'message_delta' && ev.usage) outputTokens = ev.usage.output_tokens || 0; + if (ev.type === 'message_start' && ev.message?.usage) inputTokens = ev.message.usage.input_tokens || 0; + } catch { /* skip */ } } } - return { content, model: config.model, inputTokens, outputTokens }; + return { + content, model: config.model, inputTokens, outputTokens, + thinking: thinking || undefined, thinkingTokens: undefined, + toolCalls: toolCalls.length ? toolCalls : undefined, + _rawContent: buildRawContent(content, thinking, toolCalls), + }; } else { // Non-streaming const res = await fetch(`${baseUrl}/v1/messages`, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'x-api-key': config.apiKey, - 'anthropic-version': '2023-06-01', - }, - body: JSON.stringify({ - model: config.model, - max_tokens: maxTokens, - system: systemPrompt, - messages: [{ role: 'user', content: userMessage }], - }), + method: 'POST', headers, body: JSON.stringify(body), }); if (!res.ok) { @@ -234,15 +376,236 @@ async function callAnthropic( } const data = await res.json() as any; + let content = ''; + let thinking = ''; + const toolCalls: ToolCall[] = []; + + for (const block of (data.content || [])) { + if (block.type === 'text') content += block.text; + if (block.type === 'thinking') thinking += block.thinking; + if (block.type === 'tool_use') { + toolCalls.push({ id: block.id, name: block.name, arguments: block.input || {} }); + } + } + return { - content: data.content?.[0]?.text || '', - model: data.model || config.model, + content, model: data.model || config.model, inputTokens: data.usage?.input_tokens, outputTokens: data.usage?.output_tokens, + thinking: thinking || undefined, + toolCalls: toolCalls.length ? toolCalls : undefined, + _rawContent: data.content, + }; + } +} + +function buildRawContent(content: string, thinking: string, toolCalls: ToolCall[]): any[] { + const blocks: any[] = []; + if (thinking) blocks.push({ type: 'thinking', thinking }); + if (content) blocks.push({ type: 'text', text: content }); + for (const tc of toolCalls) blocks.push({ type: 'tool_use', id: tc.id, name: tc.name, input: tc.arguments }); + return blocks; +} + +// ─── OpenAI Responses API ──────────────────────────────────────────── + +async function callOpenAIResponses( + config: LLMConfig, + systemPrompt: string, + userMessage: string, + onChunk?: (text: string) => void, +): Promise { + const baseUrl = config.baseUrl || BASE_URLS.openai; + const maxTokens = config.maxTokens || 8192; + + const headers: Record = { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${config.apiKey}`, + }; + + const input: any[] = [ + { role: 'developer', content: systemPrompt }, + { role: 'user', content: userMessage }, + ]; + + const tools: any[] = []; + if (config.webSearch) tools.push({ type: 'web_search' }); + if (config.tools?.length) { + for (const t of config.tools) { + tools.push({ + type: 'function', name: t.name, description: t.description, + parameters: t.parameters, strict: true, + }); + } + } + + const body: Record = { model: config.model, input, max_output_tokens: maxTokens }; + if (tools.length) body.tools = tools; + if (config.responseFormat === 'json') body.text = { format: { type: 'json_object' } }; + + if (onChunk) { + body.stream = true; + + const res = await fetch(`${baseUrl}/v1/responses`, { + method: 'POST', headers, body: JSON.stringify(body), + }); + + if (!res.ok) { + const err = await res.text(); + // Fallback to Chat Completions if Responses API not available + if (res.status === 404) return callOpenAICompatible(config, systemPrompt, userMessage, onChunk); + throw new Error(`OpenAI API error ${res.status}: ${err}`); + } + + let content = ''; + let inputTokens = 0; + let outputTokens = 0; + const toolCalls: ToolCall[] = []; + + const reader = res.body?.getReader(); + if (!reader) throw new Error('No response body'); + const decoder = new TextDecoder(); + let buffer = ''; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split('\n'); + buffer = lines.pop() || ''; + + for (const line of lines) { + if (!line.startsWith('data: ')) continue; + const d = line.slice(6).trim(); + if (d === '[DONE]') continue; + try { + const ev = JSON.parse(d); + if (ev.type === 'response.output_text.delta' && ev.delta) { content += ev.delta; onChunk(ev.delta); } + if (ev.type === 'response.function_call_arguments.done') { + try { toolCalls.push({ id: ev.call_id || '', name: ev.name || '', arguments: JSON.parse(ev.arguments || '{}') }); } catch { /* skip */ } + } + if (ev.type === 'response.completed' && ev.response?.usage) { + inputTokens = ev.response.usage.input_tokens || 0; + outputTokens = ev.response.usage.output_tokens || 0; + } + } catch { /* skip */ } + } + } + + if (toolCalls.length && config.toolExecutor) { + return handleOpenAIToolLoop(config, baseUrl, headers, body, content, toolCalls, inputTokens, outputTokens, onChunk); + } + return { content, model: config.model, inputTokens, outputTokens, toolCalls: toolCalls.length ? toolCalls : undefined }; + } else { + // Non-streaming + const res = await fetch(`${baseUrl}/v1/responses`, { + method: 'POST', headers, body: JSON.stringify(body), + }); + + if (!res.ok) { + const err = await res.text(); + if (res.status === 404) return callOpenAICompatible(config, systemPrompt, userMessage, undefined); + throw new Error(`OpenAI API error ${res.status}: ${err}`); + } + + const data = await res.json() as any; + let content = ''; + const toolCalls: ToolCall[] = []; + + for (const item of (data.output || [])) { + if (item.type === 'message') { + for (const part of (item.content || [])) { + if (part.type === 'output_text') content += part.text; + } + } + if (item.type === 'function_call') { + try { toolCalls.push({ id: item.call_id || item.id || '', name: item.name || '', arguments: JSON.parse(item.arguments || '{}') }); } catch { /* skip */ } + } + } + if (!content && data.output_text) content = data.output_text; + + if (toolCalls.length && config.toolExecutor) { + return handleOpenAIToolLoop(config, baseUrl, headers, body, content, toolCalls, data.usage?.input_tokens, data.usage?.output_tokens, undefined); + } + + return { + content, model: data.model || config.model, + inputTokens: data.usage?.input_tokens, outputTokens: data.usage?.output_tokens, + toolCalls: toolCalls.length ? toolCalls : undefined, }; } } +/** Agentic tool-call loop for OpenAI Responses API */ +async function handleOpenAIToolLoop( + config: LLMConfig, baseUrl: string, headers: Record, + origBody: Record, partialContent: string, pending: ToolCall[], + inTok: number | undefined, outTok: number | undefined, + onChunk?: (text: string) => void, +): Promise { + const maxRounds = config.maxToolRounds ?? 5; + const all = [...pending]; + let content = partialContent; + let inputTokens = inTok; + let outputTokens = outTok; + + for (let round = 0; round < maxRounds && pending.length; round++) { + const results: any[] = []; + for (const tc of pending) { + let r: string; + try { r = await config.toolExecutor!(tc.name, tc.arguments); } catch (e: any) { r = `Error: ${e.message}`; } + results.push({ type: 'function_call_output', call_id: tc.id, output: r }); + } + + const followUp: Record = { ...origBody, input: results, stream: !!onChunk }; + const res = await fetch(`${baseUrl}/v1/responses`, { method: 'POST', headers, body: JSON.stringify(followUp) }); + if (!res.ok) { const err = await res.text(); throw new Error(`OpenAI tool follow-up error ${res.status}: ${err}`); } + + pending = []; + + if (onChunk) { + const reader = res.body?.getReader(); + if (!reader) throw new Error('No response body'); + const dec = new TextDecoder(); + let buf = ''; + while (true) { + const { done, value } = await reader.read(); + if (done) break; + buf += dec.decode(value, { stream: true }); + const lines = buf.split('\n'); buf = lines.pop() || ''; + for (const ln of lines) { + if (!ln.startsWith('data: ')) continue; + const d = ln.slice(6).trim(); + if (d === '[DONE]') continue; + try { + const ev = JSON.parse(d); + if (ev.type === 'response.output_text.delta' && ev.delta) { content += ev.delta; onChunk(ev.delta); } + if (ev.type === 'response.function_call_arguments.done') { + try { const tc = { id: ev.call_id || '', name: ev.name || '', arguments: JSON.parse(ev.arguments || '{}') }; pending.push(tc); all.push(tc); } catch { /* skip */ } + } + if (ev.type === 'response.completed' && ev.response?.usage) { + inputTokens = (inputTokens || 0) + (ev.response.usage.input_tokens || 0); + outputTokens = (outputTokens || 0) + (ev.response.usage.output_tokens || 0); + } + } catch { /* skip */ } + } + } + } else { + const data = await res.json() as any; + for (const item of (data.output || [])) { + if (item.type === 'message') { for (const p of (item.content || [])) { if (p.type === 'output_text') content += p.text; } } + if (item.type === 'function_call') { + try { const tc = { id: item.call_id || item.id || '', name: item.name || '', arguments: JSON.parse(item.arguments || '{}') }; pending.push(tc); all.push(tc); } catch { /* skip */ } + } + } + if (data.output_text && !content) content = data.output_text; + if (data.usage) { inputTokens = (inputTokens || 0) + (data.usage.input_tokens || 0); outputTokens = (outputTokens || 0) + (data.usage.output_tokens || 0); } + } + } + + return { content, model: config.model, inputTokens, outputTokens, toolCalls: all.length ? all : undefined }; +} + // ─── OpenAI-compatible Chat Completions ────────────────────────────── async function callOpenAICompatible( @@ -259,26 +622,38 @@ async function callOpenAICompatible( 'Authorization': `Bearer ${config.apiKey}`, }; - // OpenRouter requires extra headers if (config.provider === 'openrouter') { headers['HTTP-Referer'] = 'https://guardlink.bugb.io'; headers['X-Title'] = 'GuardLink CLI'; } + const isDeepSeekReasoner = config.provider === 'deepseek' && config.model.includes('reasoner'); + + const body: Record = { + model: config.model, + max_tokens: maxTokens, + messages: [ + { role: 'system', content: systemPrompt }, + { role: 'user', content: userMessage }, + ], + }; + + if (config.responseFormat === 'json') { + body.response_format = { type: 'json_object' }; + } + + if (config.tools?.length) { + body.tools = config.tools.map(t => ({ + type: 'function', + function: { name: t.name, description: t.description, parameters: t.parameters }, + })); + } + if (onChunk) { - // Streaming + body.stream = true; + const res = await fetch(`${baseUrl}/v1/chat/completions`, { - method: 'POST', - headers, - body: JSON.stringify({ - model: config.model, - max_tokens: maxTokens, - stream: true, - messages: [ - { role: 'system', content: systemPrompt }, - { role: 'user', content: userMessage }, - ], - }), + method: 'POST', headers, body: JSON.stringify(body), }); if (!res.ok) { @@ -287,9 +662,9 @@ async function callOpenAICompatible( } let content = ''; + let reasoning = ''; const reader = res.body?.getReader(); if (!reader) throw new Error('No response body'); - const decoder = new TextDecoder(); let buffer = ''; @@ -297,7 +672,6 @@ async function callOpenAICompatible( const { done, value } = await reader.read(); if (done) break; buffer += decoder.decode(value, { stream: true }); - const lines = buffer.split('\n'); buffer = lines.pop() || ''; @@ -308,28 +682,17 @@ async function callOpenAICompatible( try { const event = JSON.parse(data); const delta = event.choices?.[0]?.delta?.content; - if (delta) { - content += delta; - onChunk(delta); - } + if (delta) { content += delta; onChunk(delta); } + const reasoningDelta = event.choices?.[0]?.delta?.reasoning_content; + if (reasoningDelta) reasoning += reasoningDelta; } catch { /* skip */ } } } - return { content, model: config.model }; + return { content, model: config.model, thinking: reasoning || undefined }; } else { - // Non-streaming const res = await fetch(`${baseUrl}/v1/chat/completions`, { - method: 'POST', - headers, - body: JSON.stringify({ - model: config.model, - max_tokens: maxTokens, - messages: [ - { role: 'system', content: systemPrompt }, - { role: 'user', content: userMessage }, - ], - }), + method: 'POST', headers, body: JSON.stringify(body), }); if (!res.ok) { @@ -338,11 +701,14 @@ async function callOpenAICompatible( } const data = await res.json() as any; + const choice = data.choices?.[0]; + return { - content: data.choices?.[0]?.message?.content || '', + content: choice?.message?.content || '', model: data.model || config.model, inputTokens: data.usage?.prompt_tokens, outputTokens: data.usage?.completion_tokens, + thinking: isDeepSeekReasoner ? (choice?.message?.reasoning_content || undefined) : undefined, }; } } diff --git a/src/analyze/prompts.ts b/src/analyze/prompts.ts index dfbfbac..5738180 100644 --- a/src/analyze/prompts.ts +++ b/src/analyze/prompts.ts @@ -16,201 +16,327 @@ export const FRAMEWORK_LABELS: Record = { general: 'General Threat Analysis', }; -const SYSTEM_BASE = `You are an expert Security Architect and Threat Modeler with 15+ years of experience. -You are analyzing a codebase that uses GuardLink annotations — structured security metadata embedded in source code comments. - -The threat model you receive contains: -- **Assets**: Components declared by developers -- **Threats**: Known threat vectors with severity and CWE references -- **Controls**: Security mechanisms in place -- **Mitigations**: Where controls defend assets against threats -- **Exposures**: Known vulnerabilities (asset exposed to threat) -- **Flows**: Data movement between components -- **Boundaries**: Trust boundaries between security zones -- **Comments**: Developer security notes - -Your analysis must be actionable, specific to THIS codebase, and reference the actual assets/threats/controls by name. -Never give generic advice — always tie recommendations to concrete annotations in the model.`; +const SYSTEM_BASE = `You are an expert Security Architect and Threat Modeler with 15+ years of experience in application security, secure design review, and red team operations. + +Your job is to **produce a complete, standalone threat model** for a real codebase. You are NOT simply summarizing annotations — you are using them as developer-provided hints to bootstrap a thorough security analysis. + +## Your inputs + +You will receive: +1. **Project context** — language/framework, dependencies, deployment signals (Dockerfile, CI config, etc.) +2. **Annotation graph** — structured security metadata extracted from source code comments (GuardLink annotations) +3. **Code snippets** — the actual source lines surrounding each annotation, so you can validate what developers claimed + +## How to use these inputs + +- Treat annotations as **developer hypotheses**, not ground truth. Validate them against the code snippets. +- Use the project context to reason about the **real attack surface** — what frameworks introduce, what dependencies are known-vulnerable, what the deployment model exposes. +- **Identify gaps**: what is NOT annotated but should be? Look at unannotated symbols, data flows with no security coverage, and dependency-level risks. +- Produce a threat model a **security team could hand to an auditor** — specific, evidence-based, and actionable. + +## Annotation semantics + +- **@asset** — a component the developer considers security-relevant +- **@threat** — a threat vector (with optional CWE reference and severity) +- **@control** — a security mechanism in place +- **@mitigates** — a real control exists in code defending an asset against a threat. This is a genuine defense. +- **@exposes** — a known vulnerability: this asset is exposed to this threat +- **@accepts** — risk acknowledged but **NO control in code**. This is a governance decision, not a technical fix. +- **@flows** — data movement between components +- **@boundary** — a trust boundary between security zones +- **@handles** — sensitive data classification (pii, phi, financial, secrets) +- **@assumes** — a security assumption the developer is relying on (potential blind spot) +- **@audit** — marks an asset as requiring human review + +## Critical rules + +- If you see **@accepts without @audit** on the same asset, flag it as a governance concern — risk may have been rubber-stamped without proper review. +- Treat accepted-but-unmitigated exposures as **OPEN RISKS**, not resolved findings. +- If a code snippet contradicts its annotation (e.g., a @mitigates annotation but the code shows no actual check), flag the annotation as **potentially inaccurate**. +- Challenge accepted risks: "You accepted this — is that reasonable given the severity and blast radius?" +- Always reference **specific files, assets, and threat IDs** from the model. Never give generic advice. + +## Output structure + +Your report must have two clearly separated sections: + +### Part 1 — Annotation Validation +For each significant annotation, assess: is the annotation accurate given the code? Did the developer miss anything in the surrounding code? Flag inaccurate, overstated, or missing annotations with specific evidence from the code snippets. + +### Part 2 — Threat Model +A complete, standalone threat model document produced from all available evidence (annotations + code + project context). Structure it with the sections appropriate to the framework you are applying. This is what a security team would hand to an auditor.`; export const FRAMEWORK_PROMPTS: Record = { stride: `${SYSTEM_BASE} -Perform a **STRIDE** analysis of this threat model. +Apply the **STRIDE** framework to produce a complete threat model. + +## Part 1 — Annotation Validation +For each @exposes and @mitigates annotation, cross-reference the provided code snippet: +- Does the code actually implement what the annotation claims? +- Is the severity rating appropriate given the code context? +- Flag any annotation that appears inaccurate or incomplete. -For each STRIDE category, evaluate the codebase: +## Part 2 — STRIDE Threat Model -## S — Spoofing -Identify where authentication can be bypassed. Check: are all assets with @exposes to auth-related threats properly mitigated? +For each STRIDE category, reason from ALL available evidence (annotations + code snippets + project context): -## T — Tampering -Identify where data integrity is at risk. Check: @flows without integrity controls, @handles with sensitive data lacking validation. +### S — Spoofing +Authentication bypass risks. Consider: framework-level auth mechanisms from project context, @exposes to auth threats, unannotated auth code paths visible in snippets. -## R — Repudiation -Identify where actions cannot be traced. Check: are there @audit annotations? Are critical operations logged? +### T — Tampering +Data integrity risks. Consider: @flows without integrity controls, @handles with sensitive data lacking validation, what the framework/dependencies do (or don't) provide. -## I — Information Disclosure -Identify where sensitive data leaks. Check: @exposes to info-disclosure/data-exposure threats, @handles pii/phi/secrets without encryption. +### R — Repudiation +Audit trail gaps. Consider: @audit annotations present vs. missing, critical operations in code snippets with no logging, framework logging capabilities. -## D — Denial of Service -Identify resource exhaustion risks. Check: @exposes to dos threats, rate limiting controls, boundary protections. +### I — Information Disclosure +Sensitive data leakage. Consider: @handles pii/phi/secrets, error handling visible in code snippets, dependency-level disclosure risks from project context. -## E — Elevation of Privilege -Identify privilege escalation paths. Check: @exposes to bac/idor threats, @boundary gaps, missing authorization controls. +### D — Denial of Service +Resource exhaustion. Consider: @exposes to dos threats, rate limiting in code snippets, framework/infrastructure protections from project context. + +### E — Elevation of Privilege +Privilege escalation paths. Consider: @exposes to bac/idor threats, @boundary gaps, authorization checks visible in code snippets. For each category: -1. List specific findings referencing actual assets and threats from the model -2. Rate severity (Critical/High/Medium/Low) -3. Recommend specific mitigations referencing existing controls or suggesting new ones -4. Identify gaps — what SHOULD be annotated but isn't? +1. Specific findings referencing actual assets, threats, and file locations +2. Severity (Critical/High/Medium/Low) with justification from code evidence +3. Concrete mitigations tied to existing controls or new ones needed +4. Annotation gaps — what @exposes or @mitigates are missing from the code? End with an Executive Summary and Priority Action Items.`, dread: `${SYSTEM_BASE} -Perform a **DREAD** risk scoring analysis of this threat model. +Apply **DREAD** risk scoring to produce a prioritized threat model. + +## Part 1 — Annotation Validation +Review each @exposes annotation against its code snippet. For each: +- Is the exposure real? Does the code confirm the vulnerability? +- Is the severity annotation accurate vs. DREAD scoring? +- Note any discrepancies between annotation claims and actual code. -For each unmitigated exposure and significant threat, calculate a DREAD score: +## Part 2 — DREAD Risk Model -- **D — Damage Potential** (0-10): How bad if exploited? -- **R — Reproducibility** (0-10): How easy to reproduce? -- **E — Exploitability** (0-10): How easy to launch the attack? -- **A — Affected Users** (0-10): How many users impacted? -- **D — Discoverability** (0-10): How easy to find the vulnerability? +For each unmitigated exposure and significant threat, calculate a DREAD score using ALL available evidence: + +- **D — Damage Potential** (0-10): How bad if exploited? Factor in @handles classifications and business context. +- **R — Reproducibility** (0-10): How easy to reproduce? Factor in code complexity from snippets. +- **E — Exploitability** (0-10): How easy to launch? Factor in known CVEs from dependencies (project context). +- **A — Affected Users** (0-10): How many users impacted? Factor in data flows and boundaries. +- **D — Discoverability** (0-10): How easy to find? Factor in public-facing surfaces from project context. Present results as a ranked table: -| Threat | Asset | D | R | E | A | D | Total | Risk Level | -|--------|-------|---|---|---|---|---|-------|------------| +| Threat | Asset | File | D | R | E | A | D | Total | Risk Level | +|--------|-------|------|---|---|---|---|---|-------|------------| Then provide: -1. Top 5 risks by DREAD score with detailed justification -2. Quick wins — high-score items with easy mitigations -3. Systemic risks — patterns across multiple exposures -4. Recommended priority order for remediation`, +1. Top 5 risks by DREAD score with detailed justification citing code evidence +2. Dependency-level risks from project context not captured in annotations +3. Quick wins — high-score items with straightforward mitigations +4. Systemic risks — patterns across multiple exposures +5. Recommended priority order for remediation`, pasta: `${SYSTEM_BASE} -Perform a **PASTA** (Process for Attack Simulation and Threat Analysis) assessment. +Apply the **PASTA** (Process for Attack Simulation and Threat Analysis) methodology. + +## Part 1 — Annotation Validation +Before the PASTA stages, validate the annotation graph against code snippets: +- Which @mitigates annotations are confirmed by actual code? +- Which @exposes annotations are confirmed by actual vulnerable code? +- Which @accepts decisions look unreasonable given the code evidence? -Work through all 7 PASTA stages: +## Part 2 — PASTA Assessment ### Stage 1: Define Objectives -What are the business-critical assets? Which @asset declarations represent the crown jewels? +Business-critical assets from @asset declarations. What are the crown jewels? Use @handles classifications to identify data sensitivity. ### Stage 2: Define Technical Scope -Map the attack surface from @flows, @boundary, and @handles annotations. What are the entry points? +Attack surface from @flows, @boundary, @handles, and project context (framework, deployment model, exposed ports/endpoints). What does the project context reveal that annotations miss? ### Stage 3: Application Decomposition -Analyze component relationships from flows and boundaries. Identify trust zones and data paths. +Component relationships from flows and boundaries. Trust zones, data paths, and dependency graph from project context. Identify components with no security annotations. ### Stage 4: Threat Analysis -Map declared @threat annotations to real-world attack techniques. Reference CWE/CAPEC where available. +Map @threat annotations to real-world attack techniques (CWE/CAPEC). Supplement with threats implied by the tech stack and dependencies from project context. ### Stage 5: Vulnerability Analysis -Evaluate each @exposes annotation. Which are most exploitable given the technical context? +Evaluate each @exposes annotation against code snippets. Which are confirmed? Which are most exploitable given the technical context and dependency versions? ### Stage 6: Attack Simulation -For the top 3 most critical exposures, describe a realistic attack scenario step-by-step. +For the top 3 most critical exposures, describe a realistic attack scenario step-by-step, referencing actual code paths from snippets and entry points from project context. ### Stage 7: Risk & Impact Analysis -Prioritized risk matrix with business impact assessment. +Prioritized risk matrix with business impact. Include dependency-level risks not captured in annotations. -End with concrete remediation recommendations tied to specific annotations.`, +End with concrete remediation recommendations tied to specific annotations and code locations.`, attacker: `${SYSTEM_BASE} -Perform an **Attacker Persona** analysis of this threat model. +Apply an **Attacker Persona** analysis to produce a complete threat model. -Adopt the mindset of different attacker types and evaluate the codebase: +## Part 1 — Annotation Validation +For each @exposes annotation, assess exploitability from an attacker's perspective: +- Does the code snippet confirm the vulnerability is real and reachable? +- Is the @accepts decision defensible against a motivated attacker? +- Flag any annotation that understates attacker capability. -### 1. Script Kiddie (Low Skill) -What can be exploited with publicly available tools? Which @exposes have known CVEs (check cwe: refs)? +## Part 2 — Attacker Persona Threat Model + +Adopt the mindset of each attacker type using ALL available evidence: + +### 1. Script Kiddie (Low Skill, Opportunistic) +What can be exploited with public tools? Check: CWE refs in @exposes, known-vulnerable dependency versions from project context, obvious misconfigurations visible in code snippets. ### 2. Opportunistic Attacker (Medium Skill) -What attack chains are possible? Can multiple exposures be combined? Check @flows for lateral movement paths. +What attack chains are possible? Check: @flows for lateral movement, multiple @exposes that can be chained, framework-level weaknesses from project context. -### 3. Targeted Attacker (High Skill) -What are the high-value targets (@handles pii/phi/financial/secrets)? What's the path from @boundary entry points to crown jewel assets? +### 3. Targeted Attacker (High Skill, Persistent) +Path from entry points to crown jewels. Check: @handles pii/phi/financial/secrets for targets, @boundary gaps for pivot points, @assumes for blind spots to exploit. -### 4. Insider Threat -Which @assumes annotations represent blind spots? Where does the model trust internal components without verification? +### 4. Insider Threat (Trusted Access) +What can a legitimate user or developer abuse? Check: @assumes that trust internal components, missing @audit annotations on sensitive operations, overprivileged data flows. For each persona: -1. Most likely attack vector (reference specific annotations) -2. Attack path (chain of assets/flows/boundaries) -3. Impact if successful -4. Current defenses (existing @mitigates) -5. Gaps in defense +1. Most likely attack vector (reference specific files and annotation IDs) +2. Step-by-step attack path through the system +3. Impact if successful (reference @handles data classifications) +4. Current defenses (@mitigates) and their effectiveness per code evidence +5. Gaps — what's missing that would stop this attacker? End with a prioritized defense improvement plan.`, rapid: `${SYSTEM_BASE} -Perform a **Rapid Risk Assessment** — concise, actionable, focused on the highest-impact items. +Produce a **Rapid Risk Assessment** — concise, actionable, highest-impact items only. + +## Part 1 — Annotation Validation (Brief) +Flag only significant discrepancies: annotations contradicted by code snippets, or @accepts decisions that look unreasonable. Keep this section under 20 lines. -### Critical Findings (Stop Everything) -List any P0/critical @exposes without @mitigates. These are active vulnerabilities. +## Part 2 — Rapid Risk Assessment + +### Critical Findings (Act Now) +Unmitigated @exposes at critical/high severity. Confirmed by code snippets where available. Include dependency CVEs from project context. ### High-Priority Gaps -- Unmitigated exposures by severity -- @assumes that could be violated -- @boundary without proper controls on crossing flows +- Unmitigated exposures by severity with file locations +- @assumes that could be violated given the tech stack +- @boundary crossings with no security controls in code +- Unannotated symbols handling sensitive data (from coverage stats) ### Coverage Assessment -- What percentage of assets have threat coverage? -- Which components have @flows but no security annotations? -- Are there @handles (sensitive data) without corresponding @mitigates? +- Annotation coverage % and what the unannotated symbols are +- Components with @flows but no security annotations +- @handles (sensitive data) without corresponding @mitigates ### Top 5 Recommendations -Numbered, actionable, with specific annotation suggestions (exact @mitigates lines to add). +Numbered, specific, actionable. For each: what to fix, where (file:line), and the exact GuardLink annotation to add. ### Risk Score -Rate overall security posture: A (excellent) through F (critical risk). Justify with data from the model. +A (excellent) through F (critical risk). Justify with specific data points from the model and code. -Keep the entire analysis under 500 lines. Be direct — no filler.`, +Keep the entire report under 500 lines. Be direct — no filler.`, general: `${SYSTEM_BASE} -Perform a comprehensive threat analysis of this codebase. +Produce a **complete threat model document** for this codebase. The document should be usable by a security team for audit, review, or a public trust center — not just a list of findings. + +## Part 1 — Annotation Validation + +For each significant annotation, assess accuracy against the code snippet: +- **@mitigates**: does the code actually implement the claimed control? +- **@exposes**: is the vulnerability real and reachable from the code? +- **@accepts**: is the risk acceptance reasonable given severity and blast radius? Challenge it. +- Flag inaccurate, missing, or overstated annotations with specific file:line evidence. + +## Part 2 — Threat Model Document + +Produce the following sections. Omit a section only if there is genuinely no relevant information for it — do not pad with boilerplate. + +### 1. Overview & Scope +What this system does, what it protects, and what is explicitly out of scope for this threat model. Derive from @asset declarations, @flows, and project context. -### Executive Summary -2-3 sentence overall assessment. +### 2. Architecture +How the system is structured: components, trust zones, and their relationships. Derive from @boundary, @flows, and project context (framework, deployment model). Include a prose description of the component topology — which components are internet-facing, which are internal, which handle sensitive data. -### Threat Landscape -What threats does this application face? Map @threat declarations to real-world attack patterns. +### 3. Key Flows & Data Paths +The most security-relevant data flows through the system. For each: source → destination, what data is carried (@handles classifications), what trust boundaries are crossed (@boundary), and what controls exist at each crossing. Reference specific file locations. -### Security Posture -- Strengths: well-mitigated areas, good control coverage -- Weaknesses: unmitigated exposures, missing controls -- Blind spots: areas with no annotations at all +### 4. Data Handling & AI/ML Data Use +All sensitive data in the system from @handles annotations: classification (pii, phi, financial, secrets), which assets hold it, how it moves, and where it is stored or logged. If the project uses ML/AI models: what data is fed to them, what is returned, and what the privacy/integrity implications are. -### Data Flow Analysis -Trace sensitive data through @flows and @boundary annotations. Where does data cross trust boundaries without protection? +### 5. Roles & Access +Who or what can access the system and at what privilege level. Derive from @flows, @boundary, and @assumes. Identify overprivileged paths and missing access controls. -### Missing Annotations -Based on the architecture visible in @flows and @boundary, what security annotations are likely missing? +### 6. Dependencies & Supply Chain +From project context: all third-party dependencies, their versions, and any known risk signals (outdated packages, packages with known CVEs, packages with excessive permissions). Flag dependencies not covered by any @mitigates annotation. -### Recommendations -Prioritized list with: -1. What to fix (specific exposure) -2. How to fix it (specific control/mitigation) -3. What annotation to add (exact syntax) +### 7. Secrets, Keys & Credential Management +All credentials, API keys, tokens, and secrets in the system. Derive from @handles secrets annotations, .env.example signals, and code snippets. How are they stored, rotated, and scoped? What happens if one is leaked? -### Compliance Considerations -Based on @handles classifications (pii, phi, financial), note relevant compliance requirements (GDPR, HIPAA, PCI-DSS).`, +### 8. Logging, Monitoring & Audit +What is logged, what is not, and what should be. Derive from @audit annotations (present and missing), @handles pii/phi (logged data must be scrubbed), and code snippets showing logging calls. Flag critical operations with no audit trail. + +### 9. Assumptions & Threat Actors +From @assumes annotations: what the system trusts without verification. List the threat actors relevant to this system (external attacker, insider, supply chain, automated scanner) and their assumed capabilities. Flag assumptions that a motivated attacker could violate. + +### 10. Abuse Scenarios & Findings +For each unmitigated @exposes and each significant gap found during annotation validation: a concrete abuse scenario. Format each as: +- **Finding**: what the vulnerability is (file:line) +- **Scenario**: how an attacker exploits it step by step +- **Impact**: what they gain (reference @handles data classifications) +- **Severity**: Critical / High / Medium / Low with justification +- **Remediation**: specific code change or control to add, plus the GuardLink annotation to reflect it + +Order by severity descending. + +### 11. Testing & Review Scope +What security testing is appropriate for this system given its architecture and findings above: unit tests for security controls, integration tests for auth/authz flows, fuzz targets, pen test scope (which endpoints/components), and any automated scanning recommendations. + +### 12. Open Risks & Accepted Risks +All @accepts annotations: for each, state the risk, why it was accepted, whether that acceptance is still reasonable given the code evidence, and what the residual blast radius is. Flag any @accepts without a corresponding @audit as an unreviewed acceptance. + +### 13. Priority Action Items +Top 5–10 items the team should act on, ordered by risk. For each: one-line description, severity, effort estimate (low/medium/high), and the specific GuardLink annotation change that would reflect the fix.`, }; /** - * Build the user message containing the serialized threat model. + * Build the user message containing the serialized threat model, + * optional project context, and optional code snippets. */ -export function buildUserMessage(modelJson: string, framework: AnalysisFramework, customPrompt?: string): string { +export function buildUserMessage( + modelJson: string, + framework: AnalysisFramework, + customPrompt?: string, + projectContext?: string, + codeSnippets?: string, +): string { const header = customPrompt - ? `Analyze this threat model. ${customPrompt}` - : `Produce a ${FRAMEWORK_LABELS[framework]} for this threat model.`; - - return `${header} - - -${modelJson} -`; + ? `Use these annotations as input to produce a threat model. Additional focus: ${customPrompt}` + : `Produce a ${FRAMEWORK_LABELS[framework]} for this codebase using all available evidence below.`; + + const parts: string[] = [header, '']; + + if (projectContext) { + parts.push(''); + parts.push(projectContext); + parts.push(''); + parts.push(''); + } + + parts.push(''); + parts.push(modelJson); + parts.push(''); + + if (codeSnippets) { + parts.push(''); + parts.push(''); + parts.push(codeSnippets); + parts.push(''); + } + + return parts.join('\n'); } diff --git a/src/analyze/tools.ts b/src/analyze/tools.ts new file mode 100644 index 0000000..10fbbaf --- /dev/null +++ b/src/analyze/tools.ts @@ -0,0 +1,246 @@ +/** + * GuardLink — Tool definitions for LLM function calling. + * + * Defines tools that the LLM can invoke during threat analysis: + * - lookup_cve: Search for CVE details (via web fetch) + * - validate_finding: Cross-reference a finding against the parsed model + * - search_codebase: Search project files for patterns + * + * @flows External_LLM_APIs -> #llm-tools via tool_call -- "LLM requests tool execution" + * @flows #llm-tools -> External_LLM_APIs via tool_result -- "Tool results returned to LLM" + * @exposes #llm-tools to #ssrf [medium] cwe:CWE-918 -- "lookup_cve fetches external URLs" + * @mitigates #llm-tools against #ssrf using #url-allowlist -- "Only fetches from known CVE databases" + */ + +import { readFileSync, readdirSync, statSync } from 'node:fs'; +import { join, relative } from 'node:path'; +import type { ToolDefinition, ToolExecutor } from './llm.js'; +import type { ThreatModel } from '../types/index.js'; + +// ─── Tool definitions ──────────────────────────────────────────────── + +export const GUARDLINK_TOOLS: ToolDefinition[] = [ + { + name: 'lookup_cve', + description: 'Look up a CVE identifier to get vulnerability details including severity, description, and affected products. Use this when analyzing exposures that reference specific CWEs or when you need current vulnerability intelligence.', + parameters: { + type: 'object', + properties: { + cve_id: { type: 'string', description: 'CVE identifier (e.g., CVE-2024-1234)' }, + }, + required: ['cve_id'], + additionalProperties: false, + }, + }, + { + name: 'validate_finding', + description: 'Cross-reference a potential finding against the parsed threat model. Check if an exposure, mitigation, or control already exists for a given asset+threat pair.', + parameters: { + type: 'object', + properties: { + asset: { type: 'string', description: 'Asset ID or path (e.g., #auth-api or Server.Auth)' }, + threat: { type: 'string', description: 'Threat ID or name (e.g., #sqli or SQL_Injection)' }, + check: { type: 'string', description: 'What to check', enum: ['exposure_exists', 'mitigation_exists', 'is_unmitigated'] }, + }, + required: ['asset', 'threat', 'check'], + additionalProperties: false, + }, + }, + { + name: 'search_codebase', + description: 'Search project source files for a pattern (case-insensitive substring match). Returns matching lines with file paths and line numbers. Use this to verify code-level claims during threat analysis.', + parameters: { + type: 'object', + properties: { + pattern: { type: 'string', description: 'Search pattern (substring, case-insensitive)' }, + }, + required: ['pattern'], + additionalProperties: false, + }, + }, +]; + +// ─── Tool executor ─────────────────────────────────────────────────── + +/** + * Create a tool executor bound to a project root and threat model. + * The executor handles all GuardLink tool calls. + */ +export function createToolExecutor(root: string, model: ThreatModel | null): ToolExecutor { + return async (name: string, args: Record): Promise => { + switch (name) { + case 'lookup_cve': + return lookupCve(args.cve_id); + case 'validate_finding': + return validateFinding(model, args.asset, args.threat, args.check); + case 'search_codebase': + return searchCodebase(root, args.pattern, args.file_glob, parseInt(args.max_results || '20', 10)); + default: + return `Unknown tool: ${name}`; + } + }; +} + +// ─── Tool implementations ──────────────────────────────────────────── + +/** Fetch CVE details from NVD API */ +async function lookupCve(cveId: string): Promise { + if (!cveId || !cveId.match(/^CVE-\d{4}-\d{4,}$/i)) { + return `Invalid CVE ID format: ${cveId}. Expected format: CVE-YYYY-NNNNN`; + } + + try { + const url = `https://services.nvd.nist.gov/rest/json/cves/2.0?cveId=${encodeURIComponent(cveId.toUpperCase())}`; + const res = await fetch(url, { + headers: { 'User-Agent': 'GuardLink/1.0 (threat-modeling-tool)' }, + signal: AbortSignal.timeout(10000), + }); + + if (!res.ok) { + return `NVD API returned ${res.status} for ${cveId}`; + } + + const data = await res.json() as any; + const vuln = data.vulnerabilities?.[0]?.cve; + if (!vuln) return `No data found for ${cveId}`; + + const desc = vuln.descriptions?.find((d: any) => d.lang === 'en')?.value || 'No description'; + const metrics = vuln.metrics?.cvssMetricV31?.[0]?.cvssData || vuln.metrics?.cvssMetricV40?.[0]?.cvssData; + const score = metrics?.baseScore || 'N/A'; + const severity = metrics?.baseSeverity || 'N/A'; + + const cwes = vuln.weaknesses?.flatMap((w: any) => + w.description?.map((d: any) => d.value) + )?.filter(Boolean) || []; + + return JSON.stringify({ + id: cveId.toUpperCase(), + description: desc.slice(0, 500), + cvss_score: score, + severity, + cwes, + published: vuln.published, + last_modified: vuln.lastModified, + }); + } catch (err: any) { + return `CVE lookup failed: ${err.message}`; + } +} + +/** Validate a finding against the parsed threat model */ +function validateFinding( + model: ThreatModel | null, + asset: string, + threat: string, + check: string, +): string { + if (!model) return 'No threat model available. Run guardlink parse first.'; + + const normalizeId = (s: string) => s.replace(/^#/, '').toLowerCase(); + const assetId = normalizeId(asset); + const threatId = normalizeId(threat); + + const matchAsset = (a: string) => normalizeId(a) === assetId; + const matchThreat = (t: string) => normalizeId(t) === threatId; + + switch (check) { + case 'exposure_exists': { + const found = model.exposures.filter(e => matchAsset(e.asset) && matchThreat(e.threat)); + if (found.length) { + return JSON.stringify({ + exists: true, + count: found.length, + exposures: found.map(e => ({ + severity: e.severity, + description: e.description, + file: e.location.file, + line: e.location.line, + })), + }); + } + return JSON.stringify({ exists: false }); + } + case 'mitigation_exists': { + const found = model.mitigations.filter(m => matchAsset(m.asset) && matchThreat(m.threat)); + if (found.length) { + return JSON.stringify({ + exists: true, + count: found.length, + mitigations: found.map(m => ({ + control: m.control, + description: m.description, + file: m.location.file, + line: m.location.line, + })), + }); + } + return JSON.stringify({ exists: false }); + } + case 'is_unmitigated': { + const exposed = model.exposures.some(e => matchAsset(e.asset) && matchThreat(e.threat)); + const mitigated = model.mitigations.some(m => matchAsset(m.asset) && matchThreat(m.threat)); + const accepted = model.acceptances.some(a => matchAsset(a.asset) && matchThreat(a.threat)); + return JSON.stringify({ exposed, mitigated, accepted, unmitigated: exposed && !mitigated && !accepted }); + } + default: + return `Unknown check type: ${check}. Use: exposure_exists, mitigation_exists, is_unmitigated`; + } +} + +/** Search project source files for a pattern */ +function searchCodebase( + root: string, + pattern: string, + fileGlob?: string, + maxResults = 20, +): string { + if (!pattern) return 'No search pattern provided'; + + const results: { file: string; line: number; text: string }[] = []; + const pat = pattern.toLowerCase(); + const ext = fileGlob ? fileGlob.toLowerCase() : null; + + // Walk source files (skip node_modules, .git, dist, etc.) + const skipDirs = new Set(['node_modules', '.git', 'dist', 'build', '.guardlink', '__pycache__', '.next', 'vendor', 'target']); + + function walk(dir: string) { + if (results.length >= maxResults) return; + let entries: string[]; + try { entries = readdirSync(dir); } catch { return; } + + for (const entry of entries) { + if (results.length >= maxResults) return; + const full = join(dir, entry); + let stat; + try { stat = statSync(full); } catch { continue; } + + if (stat.isDirectory()) { + if (!skipDirs.has(entry) && !entry.startsWith('.')) walk(full); + } else if (stat.isFile()) { + if (ext && !entry.toLowerCase().endsWith(ext)) continue; + // Skip binary / large files + if (stat.size > 500_000) continue; + if (/\.(png|jpg|gif|ico|woff|ttf|eot|svg|mp[34]|zip|tar|gz|lock|map)$/i.test(entry)) continue; + + try { + const content = readFileSync(full, 'utf-8'); + const lines = content.split('\n'); + for (let i = 0; i < lines.length && results.length < maxResults; i++) { + if (lines[i].toLowerCase().includes(pat)) { + results.push({ + file: relative(root, full), + line: i + 1, + text: lines[i].trim().slice(0, 200), + }); + } + } + } catch { /* skip unreadable */ } + } + } + } + + walk(root); + + if (!results.length) return `No matches found for "${pattern}"`; + return JSON.stringify(results); +} diff --git a/src/cli/index.ts b/src/cli/index.ts index dd009db..85e263b 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -4,13 +4,21 @@ * GuardLink CLI — Reference Implementation * * Usage: - * guardlink init [dir] Initialize GuardLink in a project - * guardlink parse [dir] Parse annotations, output ThreatModel JSON - * guardlink status [dir] Show annotation coverage summary - * guardlink validate [dir] Check for syntax errors and dangling refs - * guardlink analyze [framework] AI-powered threat analysis (STRIDE, DREAD, etc.) - * guardlink annotate Launch coding agent for annotation - * guardlink config Manage LLM provider configuration + * guardlink init [dir] Initialize GuardLink in a project + * guardlink parse [dir] Parse annotations, output ThreatModel JSON + * guardlink status [dir] Show annotation coverage summary + * guardlink validate [dir] Check for syntax errors and dangling refs + * guardlink report [dir] Generate markdown + JSON threat model report + * guardlink diff [ref] Compare threat model against a git ref + * guardlink sarif [dir] Export SARIF 2.1.0 for GitHub / VS Code + * guardlink threat-report AI-powered threat analysis (STRIDE, DREAD, PASTA, etc.) + * guardlink threat-reports List saved AI threat reports + * guardlink annotate Launch coding agent to add annotations + * guardlink config Manage LLM provider configuration + * guardlink dashboard [dir] Generate interactive HTML dashboard + * guardlink mcp Start MCP server (stdio) for Claude Code, Cursor, etc. + * guardlink tui [dir] Interactive TUI with slash commands + AI chat + * guardlink gal Display GAL annotation language quick reference * * @exposes #cli to #path-traversal [high] cwe:CWE-22 -- "Accepts directory paths from command line arguments" * @exposes #cli to #arbitrary-write [high] cwe:CWE-73 -- "Writes reports and SARIF to user-specified output paths" @@ -25,8 +33,8 @@ import { Command } from 'commander'; import { resolve, basename } from 'node:path'; -import { readFileSync, existsSync } from 'node:fs'; -import { parseProject, findDanglingRefs, findUnmitigatedExposures } from '../parser/index.js'; +import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'node:fs'; +import { parseProject, findDanglingRefs, findUnmitigatedExposures, findAcceptedWithoutAudit, findAcceptedExposures } from '../parser/index.js'; import { initProject, detectProject, promptAgentSelection } from '../init/index.js'; import { generateReport, generateMermaid } from '../report/index.js'; import { diffModels, formatDiff, formatDiffMarkdown, parseAtRef, getCurrentRef } from '../diff/index.js'; @@ -34,7 +42,7 @@ import { generateSarif } from '../analyzer/index.js'; import { startStdioServer } from '../mcp/index.js'; import { generateThreatReport, listThreatReports, loadThreatReportsForDashboard, buildConfig, FRAMEWORK_LABELS, FRAMEWORK_PROMPTS, serializeModel, buildUserMessage, type AnalysisFramework } from '../analyze/index.js'; import { generateDashboardHTML } from '../dashboard/index.js'; -import { AGENTS, agentFromOpts, launchAgent, buildAnnotatePrompt } from '../agents/index.js'; +import { AGENTS, agentFromOpts, launchAgent, launchAgentInline, buildAnnotatePrompt } from '../agents/index.js'; import { resolveConfig, saveProjectConfig, saveGlobalConfig, loadProjectConfig, loadGlobalConfig, maskKey, describeConfigSource } from '../agents/config.js'; import type { ThreatModel, ParseDiagnostic } from '../types/index.js'; import gradient from 'gradient-string'; @@ -216,11 +224,18 @@ program // Check for dangling refs const danglingDiags = findDanglingRefs(model); - const allDiags = [...diagnostics, ...danglingDiags]; + + // Check for @accepts without @audit (governance concern) + const acceptAuditDiags = findAcceptedWithoutAudit(model); + + const allDiags = [...diagnostics, ...danglingDiags, ...acceptAuditDiags]; // Check for unmitigated exposures const unmitigated = findUnmitigatedExposures(model); + // Check for accepted-but-unmitigated exposures (risk acceptance without real controls) + const acceptedOnly = findAcceptedExposures(model); + printDiagnostics(allDiags); if (unmitigated.length > 0) { @@ -230,11 +245,20 @@ program } } + if (acceptedOnly.length > 0) { + console.error(`\n⚡ ${acceptedOnly.length} accepted-but-unmitigated exposure(s) (risk accepted, no control in code):`); + for (const a of acceptedOnly) { + console.error(` ${a.asset} → ${a.threat} [${a.severity || 'unset'}] (${a.location.file}:${a.location.line})`); + } + } + const errorCount = allDiags.filter(d => d.level === 'error').length; const hasUnmitigated = unmitigated.length > 0; - if (errorCount === 0 && !hasUnmitigated) { + if (errorCount === 0 && !hasUnmitigated && acceptedOnly.length === 0) { console.error('\n✓ All annotations valid, no unmitigated exposures.'); + } else if (errorCount === 0 && !hasUnmitigated && acceptedOnly.length > 0) { + console.error(`\nValidation passed. ${acceptedOnly.length} exposure(s) accepted without mitigation — ensure these are intentional human decisions.`); } else if (errorCount === 0 && hasUnmitigated) { console.error(`\nValidation passed with ${unmitigated.length} unmitigated exposure(s).`); } @@ -385,39 +409,39 @@ program program .command('threat-report') - .description('Generate an AI threat report using a security framework (STRIDE, DREAD, PASTA, etc.)') - .argument('[framework]', 'Framework: stride, dread, pasta, attacker, rapid, general', 'general') - .argument('[dir]', 'Project directory', '.') + .description('Generate an AI threat report using a framework or custom prompt') + .argument('[prompt...]', 'Framework (stride, dread, pasta, attacker, rapid, general) or custom prompt text') + .option('-d, --dir

', 'Project directory', '.') .option('-p, --project ', 'Project name', 'unknown') - .option('--provider ', 'LLM provider: anthropic, openai, openrouter, deepseek (auto-detected from env)') + .option('--provider ', 'LLM provider: anthropic, openai, google, openrouter, deepseek (auto-detected from env)') .option('--model ', 'Model name (default: provider-specific)') .option('--api-key ', 'API key (default: from env variable)') .option('--no-stream', 'Disable streaming output') - .option('--custom ', 'Custom analysis prompt (replaces framework prompt header)') - .option('--claude-code', 'Launch Claude Code in foreground') - .option('--codex', 'Launch Codex CLI in foreground') - .option('--gemini', 'Launch Gemini CLI in foreground') + .option('--web-search', 'Enable web search grounding (OpenAI only)') + .option('--thinking', 'Enable extended thinking / reasoning (Anthropic, DeepSeek only)') + .option('--claude-code', 'Run via Claude Code (inline)') + .option('--codex', 'Run via Codex CLI (inline)') + .option('--gemini', 'Run via Gemini CLI (inline)') .option('--cursor', 'Open Cursor IDE with prompt on clipboard') .option('--windsurf', 'Open Windsurf IDE with prompt on clipboard') .option('--clipboard', 'Copy threat report prompt to clipboard only') - .action(async (framework: string, dir: string, opts: { - project: string; provider?: string; model?: string; apiKey?: string; - stream?: boolean; custom?: string; + .action(async (promptParts: string[], opts: { + dir: string; project: string; provider?: string; model?: string; apiKey?: string; + stream?: boolean; webSearch?: boolean; thinking?: boolean; claudeCode?: boolean; codex?: boolean; gemini?: boolean; cursor?: boolean; windsurf?: boolean; clipboard?: boolean; }) => { - const root = resolve(dir); + const root = resolve(opts.dir); const project = detectProjectName(root, opts.project); + const input = promptParts.join(' ').trim(); - // Validate framework + // Determine framework vs custom prompt const validFrameworks = ['stride', 'dread', 'pasta', 'attacker', 'rapid', 'general']; - if (!validFrameworks.includes(framework)) { - console.error(`Unknown framework: ${framework}`); - console.error(`Available: ${validFrameworks.join(', ')}`); - process.exit(1); - } - - const fw = framework as AnalysisFramework; + const inputLower = input.toLowerCase(); + const isStandard = validFrameworks.includes(inputLower); + const fw = (isStandard ? inputLower : 'general') as AnalysisFramework; + const customPrompt = isStandard ? undefined : (input || undefined); + const reportLabel = customPrompt ? 'Custom Threat Analysis' : FRAMEWORK_LABELS[fw]; // Parse project const { model, diagnostics } = await parseProject({ root, project }); @@ -429,69 +453,117 @@ program process.exit(1); } - // Resolve agent (same pattern as annotate) - const agent = agentFromOpts(opts); - - // ── Agent path: build prompt, launch agent ── - if (agent) { - const serialized = serializeModel(model); - const systemPrompt = FRAMEWORK_PROMPTS[fw] || FRAMEWORK_PROMPTS.general; - const userMessage = buildUserMessage(serialized, fw, opts.custom); - const fullPrompt = `${systemPrompt}\n\n${userMessage}\n\nAlso read the source files to understand code context. Save the report to .guardlink/threat-reports/ as a markdown file.`; - - console.log(`Generating ${FRAMEWORK_LABELS[fw]} via ${agent.name}...`); - if (agent.cmd) { - console.log(`${agent.name} will take over this terminal. Exit the agent to return.\n`); + // Build analysis prompt (shared by agent and API paths) + const serialized = serializeModel(model); + const { buildProjectContext, extractCodeSnippets } = await import('../analyze/index.js'); + const projectContext = buildProjectContext(root); + const codeSnippets = extractCodeSnippets(root, model); + const systemPrompt = FRAMEWORK_PROMPTS[fw]; + const userMessage = buildUserMessage(serialized, fw, customPrompt, projectContext || undefined, codeSnippets || undefined); + const analysisPrompt = `You are analyzing a codebase with GuardLink security annotations. +You have access to the full source code in the current directory. + +${systemPrompt} + +## Task +Read the source code and GuardLink annotations, then produce a thorough ${reportLabel}. + +## Threat Model (serialized from annotations) +${userMessage} + +## Instructions +1. Read the actual source files to understand the code — don't just rely on the serialized model above +2. Cross-reference the annotations with the real code to validate findings +3. Produce the full report as markdown +4. Be specific — reference actual files, functions, and line numbers from the codebase +5. Output ONLY the markdown report content — do NOT add any metadata comments, save confirmations, or file path messages +6. Do NOT include lines like "Generated by...", "Agent:", "Project:", or "The report file write was blocked..."`; + + // Resolve agent: explicit flag > project config CLI agent + let agent = agentFromOpts(opts); + if (!agent) { + const projCfg = loadProjectConfig(root); + if (projCfg?.aiMode === 'cli-agent' && projCfg?.cliAgent) { + agent = AGENTS.find(a => a.id === projCfg.cliAgent) || null; } + } - const result = launchAgent(agent, fullPrompt, root); + // ── Path 1: CLI Agent (inline, non-interactive) ── + if (agent && agent.cmd) { + console.error(`\n🔍 ${reportLabel}`); + console.error(` Agent: ${agent.name} (inline)`); + console.error(` Annotations: ${model.annotations_parsed} | Exposures: ${model.exposures.length}\n`); - if (result.clipboardCopied) { - console.log(`✓ Prompt copied to clipboard (${fullPrompt.length.toLocaleString()} chars)`); - } + const result = await launchAgentInline( + agent, + analysisPrompt, + root, + (text) => process.stdout.write(text), + { autoYes: true }, + ); if (result.error) { - console.error(`✗ ${result.error}`); - if (result.clipboardCopied) { - console.log('Prompt is on your clipboard — paste it manually.'); - } + console.error(`\n✗ ${result.error}`); process.exit(1); } - if (agent.cmd && result.launched) { - console.log(`\n✓ ${agent.name} session ended.`); - console.log(' Run: guardlink threat-reports to see saved reports.'); - } else if (agent.app && result.launched) { + process.stdout.write('\n'); + + // Save the agent's output as a report + if (result.content.trim()) { + const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19); + const reportsDir = resolve(root, '.guardlink', 'threat-reports'); + if (!existsSync(reportsDir)) mkdirSync(reportsDir, { recursive: true }); + const filename = `${timestamp}-${fw}.md`; + const filepath = resolve(reportsDir, filename); + + // Clean ANSI codes and CLI artifacts from the output before saving + const { cleanCliArtifacts } = await import('../tui/format.js'); + const cleanedContent = cleanCliArtifacts(result.content); + + const header = `---\nframework: ${fw}\nlabel: ${FRAMEWORK_LABELS[fw]}\nmodel: ${agent.name}\ntimestamp: ${new Date().toISOString()}\nproject: ${project}\nannotations: ${model.annotations_parsed}\n---\n\n# ${FRAMEWORK_LABELS[fw]}\n\n> Generated by \`guardlink threat-report ${fw}\` on ${new Date().toISOString().slice(0, 10)}\n> Agent: ${agent.name} | Project: ${project} | Annotations: ${model.annotations_parsed}\n\n`; + writeFileSync(filepath, header + cleanedContent + '\n'); + console.error(`\n✓ Report saved to .guardlink/threat-reports/${filename}`); + } + return; + } + + // ── Path 2: Clipboard / IDE agent ── + if (agent && !agent.cmd) { + const result = launchAgent(agent, analysisPrompt, root); + if (result.clipboardCopied) { + console.log(`✓ Prompt copied to clipboard (${analysisPrompt.length.toLocaleString()} chars)`); + } + if (result.launched && agent.app) { console.log(`✓ ${agent.name} launched with project: ${project}`); console.log('\nPaste (Cmd+V) the prompt in the AI chat panel.'); console.log('When done, run: guardlink threat-reports'); } else if (agent.id === 'clipboard') { console.log('\nPaste the prompt into your preferred AI tool.'); console.log('When done, run: guardlink threat-reports'); + } else if (result.error) { + console.error(`✗ ${result.error}`); + process.exit(1); } return; } - // ── API path: direct LLM call (no agent flag) ── + // ── Path 3: Direct API call ── const llmConfig = buildConfig({ provider: opts.provider, model: opts.model, apiKey: opts.apiKey, - }); + }) || resolveConfig(root); if (!llmConfig) { - // No agent, no API key — show usage like annotate does - console.error('No agent or API key specified. Use one of:'); - for (const a of AGENTS) { - console.error(` ${a.flag.padEnd(16)} ${a.name}`); - } - console.error(''); - console.error('Or set an API key: ANTHROPIC_API_KEY, OPENAI_API_KEY, etc.'); - console.error('Or use: --provider anthropic --api-key sk-...'); + console.error('No AI provider configured. Use one of:'); + console.error(' guardlink config Configure API provider'); + console.error(' --claude-code / --codex Use a CLI agent'); + console.error(' ANTHROPIC_API_KEY=... Set env var'); process.exit(1); } - console.error(`\n🔍 ${FRAMEWORK_LABELS[fw]}`); + console.error(`\n🔍 ${reportLabel}`); console.error(` Provider: ${llmConfig.provider} | Model: ${llmConfig.model}`); console.error(` Annotations: ${model.annotations_parsed} | Exposures: ${model.exposures.length}\n`); @@ -501,9 +573,11 @@ program model, framework: fw, llmConfig, - customPrompt: opts.custom, + customPrompt, stream: opts.stream !== false, onChunk: opts.stream !== false ? (text) => process.stdout.write(text) : undefined, + webSearch: opts.webSearch, + extendedThinking: opts.thinking, }); if (opts.stream !== false) { @@ -516,6 +590,9 @@ program if (result.inputTokens || result.outputTokens) { console.error(` Tokens: ${result.inputTokens || '?'} in / ${result.outputTokens || '?'} out`); } + if (result.thinkingTokens) { + console.error(` Thinking: ${result.thinkingTokens} tokens`); + } } catch (err: any) { console.error(`\n✗ Threat report generation failed: ${err.message}`); process.exit(1); @@ -629,7 +706,7 @@ program .command('config') .description('Manage LLM provider configuration') .argument('', 'Action: set, show, clear') - .argument('[key]', 'Config key: provider, api-key, model') + .argument('[key]', 'Config key: provider, api-key, model, ai-mode, cli-agent') .argument('[value]', 'Value to set') .option('--global', 'Use global config (~/.config/guardlink/) instead of project') .action(async (action: string, key?: string, value?: string, opts?: { global?: boolean }) => { @@ -640,16 +717,24 @@ program case 'show': { const config = resolveConfig(root); const source = describeConfigSource(root); + const projCfg = isGlobal ? loadGlobalConfig() : loadProjectConfig(root); + const aiMode = projCfg?.aiMode || 'api'; + const cliAgent = projCfg?.cliAgent; + + console.log(`AI Mode: ${aiMode}${cliAgent ? ` (${cliAgent})` : ''}`); if (config) { console.log(`Provider: ${config.provider}`); console.log(`Model: ${config.model}`); console.log(`API Key: ${maskKey(config.apiKey)}`); console.log(`Source: ${source}`); - } else { + } else if (aiMode !== 'cli-agent') { console.log('No LLM configuration found.'); console.log('\nSet one with:'); console.log(' guardlink config set provider anthropic'); console.log(' guardlink config set api-key sk-ant-...'); + console.log('\nOr use a CLI agent:'); + console.log(' guardlink config set ai-mode cli-agent'); + console.log(' guardlink config set cli-agent claude-code'); console.log('\nOr set environment variables:'); console.log(' export GUARDLINK_LLM_KEY=sk-ant-...'); console.log(' export GUARDLINK_LLM_PROVIDER=anthropic'); @@ -660,7 +745,7 @@ program case 'set': { if (!key || !value) { console.error('Usage: guardlink config set '); - console.error('Keys: provider, api-key, model'); + console.error('Keys: provider, api-key, model, ai-mode, cli-agent'); process.exit(1); } @@ -668,11 +753,14 @@ program ? loadGlobalConfig() || {} : loadProjectConfig(root) || {}; + const validProviders = ['anthropic', 'openai', 'google', 'openrouter', 'deepseek', 'ollama']; + const validAgentIds = AGENTS.map(a => a.id); + switch (key) { case 'provider': - if (!['anthropic', 'openai', 'openrouter', 'deepseek'].includes(value)) { + if (!validProviders.includes(value)) { console.error(`Unknown provider: ${value}`); - console.error('Available: anthropic, openai, openrouter, deepseek'); + console.error(`Available: ${validProviders.join(', ')}`); process.exit(1); } (existing as any).provider = value; @@ -683,8 +771,25 @@ program case 'model': (existing as any).model = value; break; + case 'ai-mode': + if (!['api', 'cli-agent'].includes(value)) { + console.error(`Unknown ai-mode: ${value}`); + console.error('Available: api, cli-agent'); + process.exit(1); + } + (existing as any).aiMode = value; + break; + case 'cli-agent': + if (!validAgentIds.includes(value)) { + console.error(`Unknown cli-agent: ${value}`); + console.error(`Available: ${validAgentIds.join(', ')}`); + process.exit(1); + } + (existing as any).cliAgent = value; + (existing as any).aiMode = 'cli-agent'; + break; default: - console.error(`Unknown config key: ${key}. Use: provider, api-key, model`); + console.error(`Unknown config key: ${key}. Use: provider, api-key, model, ai-mode, cli-agent`); process.exit(1); } @@ -766,7 +871,7 @@ program .command('tui') .description('Interactive TUI — slash commands, AI chat, exposure triage') .argument('[dir]', 'project directory', '.') - .option('--provider ', 'LLM provider for this session (anthropic, openai, openrouter, deepseek)') + .option('--provider ', 'LLM provider for this session (anthropic, openai, google, openrouter, deepseek)') .option('--api-key ', 'LLM API key for this session (not persisted)') .option('--model ', 'LLM model override') .action(async (dir: string, opts: { provider?: string; apiKey?: string; model?: string }) => { @@ -782,37 +887,154 @@ program .description('Display GuardLink Annotation Language (GAL) quick reference') .action(() => { import('chalk').then(({ default: c }) => { + const H = (s: string) => c.bold.cyan(s); + const V = (s: string) => c.bold.cyanBright(s); + const K = (s: string) => c.yellow(s); + const D = (s: string) => c.dim(s); + const EX = (s: string) => c.green(s); + console.log(gradient(['#00ff41', '#00d4ff'])(ASCII_LOGO)); - console.log(`${c.bold.bgCyan.black(' GUARDLINK ANNOTATION LANGUAGE (GAL) ')}\n`); - console.log(`${c.bold('Syntax:')}`); - console.log(` // @verb [qualifiers] [refs] -- "description"\n`); - - console.log(`${c.bold('Definition Verbs:')}`); - console.log(` ${c.green('@asset')} (#id) ${c.gray('Declare a component')}`); - console.log(` ${c.green('@threat')} (#id) [sev] ${c.gray('Declare a threat')}`); - console.log(` ${c.green('@control')} (#id) ${c.gray('Declare a security control')}\n`); - - console.log(`${c.bold('Relationship Verbs:')}`); - console.log(` ${c.green('@mitigates')} against using `); - console.log(` ${c.green('@exposes')} to [severity]`); - console.log(` ${c.green('@flows')} -> via `); - console.log(` ${c.green('@boundary')} between and (#id)\n`); - - console.log(`${c.bold('Lifecycle & Metadata:')}`); - console.log(` ${c.green('@handles')} on ${c.gray('Data classification')}`); - console.log(` ${c.green('@owns')} for ${c.gray('Security ownership')}`); - console.log(` ${c.green('@assumes')} ${c.gray('Security assumption')}`); - console.log(` ${c.green('@shield')} [-- "reason"] ${c.gray('AI exclusion marker')}\n`); - - console.log(`${c.bold('Severity Levels:')}`); - console.log(` [critical] | [high] | [medium] | [low]`); - console.log(` [P0] | [P1] | [P2] | [P3]\n`); - - console.log(`${c.bold('Data Classifications:')}`); - console.log(` pii | secrets | financial | phi | internal | public\n`); - - console.log(`${c.bold('Example:')}`); - console.log(` ${c.gray('// @mitigates #api against #sqli using #prepared-stmts -- "Parameterized query"')}\n`); + console.log(''); + console.log(H(' ══════════════════════════════════════════════════════════')); + console.log(H(' GAL — GuardLink Annotation Language')); + console.log(H(' ══════════════════════════════════════════════════════════')); + console.log(''); + console.log(D(' Annotations live in source code comments. GuardLink parses')); + console.log(D(' them to build a live threat model from your codebase.')); + console.log(''); + console.log(D(' Syntax: @verb subject [preposition object] [: description]')); + console.log(''); + + // ── DEFINITIONS ── + console.log(H(' ── Definitions ─────────────────────────────────────────────')); + console.log(''); + + console.log(` ${V('@asset')} ${K('')} ${D('[: description]')}`); + console.log(D(' Declare a named asset (component, service, data store).')); + console.log(D(' Path uses dot notation for hierarchy.')); + console.log(EX(' // @asset api.auth.token_store : Stores JWT refresh tokens')); + console.log(EX(' // @asset db.users')); + console.log(''); + + console.log(` ${V('@threat')} ${K('')} ${D('[severity: critical|high|medium|low] [: description]')}`); + console.log(D(' Declare a named threat. Severity aliases: P0=critical P1=high P2=medium P3=low.')); + console.log(EX(' // @threat SQL Injection severity:high : Unsanitized input reaches DB')); + console.log(EX(' // @threat Token Theft severity:P0')); + console.log(''); + + console.log(` ${V('@control')} ${K('')} ${D('[: description]')}`); + console.log(D(' Declare a security control (mitigation mechanism).')); + console.log(EX(' // @control Input Validation : Sanitize all user-supplied strings')); + console.log(EX(' // @control Rate Limiting')); + console.log(''); + + // ── RELATIONSHIPS ── + console.log(H(' ── Relationships ───────────────────────────────────────────')); + console.log(''); + + console.log(` ${V('@exposes')} ${K('')} ${D('to')} ${K('')} ${D('[severity: ...] [: description]')}`); + console.log(D(' Mark an asset as exposed to a threat at this code location.')); + console.log(D(' This is the primary annotation — every exposure creates a finding.')); + console.log(EX(' // @exposes api.auth to SQL Injection severity:high')); + console.log(EX(' // @exposes db.users to Token Theft severity:critical : No token rotation')); + console.log(''); + + console.log(` ${V('@mitigates')} ${K('')} ${D('against')} ${K('')} ${D('[with')} ${K('')}${D('] [: description]')}`); + console.log(D(' Mark that a control mitigates a threat on an asset.')); + console.log(D(' Closes the exposure — removes it from open findings.')); + console.log(EX(' // @mitigates api.auth against SQL Injection with Input Validation')); + console.log(EX(' // @mitigates db.users against Token Theft : Rotation implemented in v2')); + console.log(''); + + console.log(` ${V('@accepts')} ${K('')} ${D('on')} ${K('')} ${D('[: reason]')}`); + console.log(D(' Explicitly accept a risk. Removes it from open findings.')); + console.log(D(' Use when the risk is known and intentionally not mitigated.')); + console.log(EX(' // @accepts Timing Attack on api.auth : Acceptable for current threat model')); + console.log(''); + + console.log(` ${V('@transfers')} ${K('')} ${D('from')} ${K('')} ${D('to')} ${K('')} ${D('[: description]')}`); + console.log(D(' Transfer responsibility for a threat to another asset/team.')); + console.log(EX(' // @transfers DDoS from api.gateway to cdn.cloudflare : Handled by CDN layer')); + console.log(''); + + // ── DATA FLOWS ── + console.log(H(' ── Data Flows & Boundaries ─────────────────────────────────')); + console.log(''); + + console.log(` ${V('@flows')} ${K('')} ${D('to')} ${K('')} ${D('[via')} ${K('')}${D('] [: description]')}`); + console.log(D(' Document data movement between components.')); + console.log(D(' Appears in the Data Flow Diagram.')); + console.log(EX(' // @flows api.auth to db.users via TLS 1.3')); + console.log(EX(' // @flows mobile.app to api.gateway via HTTPS : User credentials')); + console.log(''); + + console.log(` ${V('@boundary')} ${K('')} ${D('and')} ${K('')} ${D('[: description]')}`); + console.log(D(' Declare a trust boundary between two assets.')); + console.log(D(' Groups assets in the Data Flow Diagram.')); + console.log(EX(' // @boundary internet and api.gateway : Public-facing edge')); + console.log(EX(' // @boundary api.gateway and db.users : Internal network boundary')); + console.log(''); + + // ── LIFECYCLE ── + console.log(H(' ── Lifecycle & Governance ──────────────────────────────────')); + console.log(''); + + console.log(` ${V('@handles')} ${K('')} ${D('on')} ${K('')} ${D('[: description]')}`); + console.log(D(' Declare data classification handled by an asset.')); + console.log(D(' Classifications: pii phi financial secrets internal public')); + console.log(EX(' // @handles pii on db.users : Stores name, email, phone')); + console.log(EX(' // @handles secrets on api.auth.token_store')); + console.log(''); + + console.log(` ${V('@owns')} ${K('')} ${K('')} ${D('[: description]')}`); + console.log(D(' Assign ownership of an asset to a team or person.')); + console.log(EX(' // @owns platform-team api.auth')); + console.log(''); + + console.log(` ${V('@validates')} ${K('')} ${D('on')} ${K('')} ${D('[: description]')}`); + console.log(D(' Assert that a control has been validated/tested on an asset.')); + console.log(EX(' // @validates Input Validation on api.auth : Pen-tested 2024-Q3')); + console.log(''); + + console.log(` ${V('@audit')} ${K('')} ${D('[: description]')}`); + console.log(D(' Mark that this code path is an audit trail point.')); + console.log(EX(' // @audit db.users : All writes logged to audit_log table')); + console.log(''); + + console.log(` ${V('@assumes')} ${K('')} ${D('[: description]')}`); + console.log(D(' Document a security assumption about an asset.')); + console.log(EX(' // @assumes api.gateway : Upstream WAF filters malformed requests')); + console.log(''); + + console.log(` ${V('@comment')} ${D('[: description]')}`); + console.log(D(' Free-form developer security note (no structural effect).')); + console.log(EX(' // @comment : TODO — add rate limiting before v2 launch')); + console.log(''); + + // ── SHIELD BLOCKS ── + console.log(H(' ── Shield Blocks ───────────────────────────────────────────')); + console.log(''); + console.log(` ${V('@shield:begin')} ${D('/')} ${V('@shield:end')}`); + console.log(D(' Wrap a code block to mark it as security-sensitive.')); + console.log(D(' GuardLink will flag unannotated symbols inside the block.')); + console.log(EX(' // @shield:begin')); + console.log(EX(' function verifyToken(token: string) { ... }')); + console.log(EX(' // @shield:end')); + console.log(''); + + // ── TIPS ── + console.log(H(' ── Tips ────────────────────────────────────────────────────')); + console.log(''); + console.log(D(' • Annotations work in any comment style: // /* # -- ')); + console.log(D(' • Place annotations on the line ABOVE the code they describe')); + console.log(D(' • Asset names are case-insensitive and normalized (spaces→underscores)')); + console.log(D(' • Threat/control names can reference IDs with #id syntax')); + console.log(D(' • Run guardlink parse after adding annotations to update the threat model')); + console.log(D(' • Run guardlink validate to check for syntax errors and dangling references')); + console.log(D(' • Run guardlink annotate to have an AI agent add annotations automatically')); + console.log(''); + console.log(H(' ══════════════════════════════════════════════════════════')); + console.log(''); }); }); diff --git a/src/dashboard/data.ts b/src/dashboard/data.ts index c28afef..83500ca 100644 --- a/src/dashboard/data.ts +++ b/src/dashboard/data.ts @@ -22,6 +22,11 @@ export interface DashboardStats { transfers: number; flows: number; boundaries: number; + validations: number; + ownership: number; + audits: number; + assumptions: number; + shields: number; comments: number; coveragePercent: number; coverageAnnotated: number; @@ -69,6 +74,11 @@ export function computeStats(model: ThreatModel): DashboardStats { transfers: model.transfers.length, flows: model.flows.length, boundaries: model.boundaries.length, + validations: model.validations.length, + ownership: model.ownership.length, + audits: model.audits.length, + assumptions: model.assumptions.length, + shields: model.shields.length, comments: model.comments.length, coveragePercent: model.coverage.coverage_percent, coverageAnnotated: model.coverage.annotated_symbols, diff --git a/src/dashboard/generate.ts b/src/dashboard/generate.ts index 1e0ac5e..61db759 100644 --- a/src/dashboard/generate.ts +++ b/src/dashboard/generate.ts @@ -83,15 +83,21 @@ ${CSS_CONTENT}
-
@@ -708,10 +727,11 @@ function renderAIAnalysisPage(analyses: ThreatReportWithContent[]): string { return `
Threat Reports
-
- -
+
+ +
+
`; } @@ -913,6 +933,79 @@ function renderDataPage(model: ThreatModel): string { ` : ''} + ${model.validations.length > 0 ? ` +
Validations (${model.validations.length})
+ + + + ${model.validations.map(v => ` + + + + + + `).join('')} + +
ControlAssetDescriptionLocation
${esc(v.control)}${esc(v.asset)}${esc(v.description || '—')}${v.location ? `${esc(v.location.file)}:${v.location.line}` : ''}
` : ''} + + ${model.ownership.length > 0 ? ` +
Ownership (${model.ownership.length})
+ + + + ${model.ownership.map(o => ` + + + + + + `).join('')} + +
AssetOwnerDescriptionLocation
${esc(o.asset)}${esc(o.owner)}${esc(o.description || '—')}${o.location ? `${esc(o.location.file)}:${o.location.line}` : ''}
` : ''} + + ${model.audits.length > 0 ? ` +
Audit Items (${model.audits.length})
+ + + + ${model.audits.map(a => ` + + + + + `).join('')} + +
AssetDescriptionLocation
${esc(a.asset)}${esc(a.description || 'Needs review')}${a.location ? `${esc(a.location.file)}:${a.location.line}` : ''}
` : ''} + + ${model.assumptions.length > 0 ? ` +
Assumptions (${model.assumptions.length})
+

Unverified assumptions that should be periodically reviewed.

+ + + + ${model.assumptions.map(a => ` + + + + + `).join('')} + +
AssetAssumptionLocation
${esc(a.asset)}${esc(a.description || 'Unverified assumption')}${a.location ? `${esc(a.location.file)}:${a.location.line}` : ''}
` : ''} + + ${model.shields.length > 0 ? ` +
Shielded Regions (${model.shields.length})
+

Code regions where annotations are intentionally suppressed via @shield.

+ + + + ${model.shields.map(s => ` + + + + `).join('')} + +
ReasonLocation
${esc(s.reason || 'No reason provided')}${s.location ? `${esc(s.location.file)}:${s.location.line}` : ''}
` : ''} + ${model.comments.length > 0 ? `
Developer Comments (${model.comments.length})
@@ -927,7 +1020,9 @@ function renderDataPage(model: ThreatModel): string {
` : ''} ${model.boundaries.length === 0 && model.data_handling.length === 0 && model.comments.length === 0 - ? '

No data classifications, trust boundaries, or comments found.

' : ''} + && model.validations.length === 0 && model.ownership.length === 0 && model.audits.length === 0 + && model.assumptions.length === 0 && model.shields.length === 0 + ? '

No data classifications, trust boundaries, or lifecycle annotations found.

' : ''}
`; } @@ -1015,6 +1110,12 @@ function buildFileAnnotations(model: ThreatModel, root?: string): FileAnnotation for (const t of model.transfers) addEntry('transfers', t as any, `${t.source} → ${t.target}`); for (const f of model.flows) addEntry('flow', f as any, `${f.source} → ${f.target}`); for (const b of model.boundaries) addEntry('boundary', b as any, `${b.asset_a} ↔ ${b.asset_b}`); + for (const h of model.data_handling) addEntry('handles', h as any, `${h.asset}: ${h.classification}`); + for (const v of model.validations) addEntry('validates', v as any, `${v.control} validates ${v.asset}`); + for (const o of model.ownership) addEntry('owns', o as any, `${o.owner} owns ${o.asset}`); + for (const a of model.audits) addEntry('audit', a as any, `Audit: ${a.asset}`); + for (const a of model.assumptions) addEntry('assumes', a as any, `Assumes: ${a.asset}`); + for (const s of model.shields) addEntry('shield', s as any, s.reason || 'Shielded region'); for (const c of model.comments) addEntry('comment', c as any, c.description || 'Developer note'); const result: FileAnnotationGroup[] = []; @@ -1128,12 +1229,23 @@ code { background: var(--border); padding: 1px 4px; border-radius: 3px; font-siz [data-theme="light"] .icon-moon { display: none; } /* ── Layout ── */ -.layout { display: flex; height: calc(100vh - 48px); } -.sidebar { width: var(--sidebar-w); min-width: var(--sidebar-w); background: var(--surface); border-right: 1px solid var(--border); overflow-y: auto; padding: .6rem 0; } +.layout { display: flex; height: calc(100vh - 48px); position: relative; } +.sidebar { width: var(--sidebar-w); min-width: var(--sidebar-w); background: var(--surface); border-right: 1px solid var(--border); display: flex; flex-direction: column; transition: all .25s ease; } +.sidebar-nav { flex: 1; overflow-y: auto; padding: .6rem 0; } +.sidebar.collapsed { width: 50px; min-width: 50px; } +.sidebar.collapsed .nav-text { display: none; } +.sidebar.collapsed .sep { margin: .5rem .5rem; } +.sidebar.collapsed .chevron-left { display: none; } +.sidebar.collapsed .chevron-right { display: block; } +#sidebarToggle { background: var(--surface2); border: none; border-top: 1px solid var(--border); padding: .8rem; cursor: pointer; color: var(--muted); transition: all .2s; display: flex; align-items: center; justify-content: center; width: 100%; } +#sidebarToggle:hover { background: var(--border); color: var(--accent); } +#sidebarToggle svg { display: block; } +#sidebarToggle .chevron-right { display: none; } .sidebar a { display: flex; align-items: center; gap: .6rem; padding: .55rem 1rem; font-size: .8rem; color: var(--muted); cursor: pointer; border-left: 3px solid transparent; transition: all .12s; user-select: none; } .sidebar a:hover { background: var(--surface2); color: var(--text); } .sidebar a.active { color: var(--accent); border-left-color: var(--accent); background: rgba(45,212,167,.08); } -.sidebar .nav-icon { font-size: 1rem; width: 20px; text-align: center; } +.sidebar .nav-icon { width: 20px; display: flex; align-items: center; justify-content: center; flex-shrink: 0; } +.sidebar .nav-icon svg { display: block; } .sidebar .sep { height: 1px; background: var(--border); margin: .5rem 1rem; } .main { flex: 1; overflow-y: auto; padding: 0; } .section-content { display: none; padding: 1.2rem 1.5rem; } .section-content.active { display: block; } @@ -1225,6 +1337,9 @@ tr.clickable { cursor: pointer; } tr.clickable:hover { background: var(--table-h .ann-mitigates { background: #1a3a1a; color: #3fb950; } .ann-accepts { background: #3a3a1a; color: #d29922; } .ann-transfers { background: #2a1a3a; color: #bc8cff; } .ann-flow { background: #2a2a2a; color: #8b949e; } .ann-boundary { background: #2a1a3a; color: #bc8cff; } .ann-data { background: #3a2a1a; color: #db6d28; } +.ann-handles { background: #3a2a1a; color: #db6d28; } .ann-validates { background: #1a3a1a; color: #3fb950; } +.ann-owns { background: #1c3a5e; color: #58a6ff; } .ann-audit { background: #3a3a1a; color: #d29922; } +.ann-assumes { background: #3a3a1a; color: #d29922; } .ann-shield { background: #2a2a2a; color: #8b949e; } .ann-comment { background: var(--surface2); color: var(--muted); border: 1px solid var(--border); } /* ── File Cards (Code Browser) ── */ @@ -1274,18 +1389,14 @@ tr.clickable { cursor: pointer; } tr.clickable:hover { background: var(--table-h .diagram-tab.active { color: var(--accent); border-bottom-color: var(--accent); } .diagram-panel { display: none; } .diagram-panel.active { display: block; } -/* ── AI Analysis Explorer ── */ -.ai-analysis-wrap { display: flex; gap: 1.5rem; margin-top: 0.75rem; min-height: 400px; } -.ai-analyses-explorer { width: 240px; min-width: 240px; max-height: calc(100vh - 220px); overflow-y: auto; padding-right: 0.5rem; border-right: 1px solid var(--border); } -.ai-analyses-explorer:empty { display: none; } -.ai-analysis-wrap:not(.has-explorer) .ai-analyses-explorer { display: none; } -.ai-analysis-main { flex: 1; min-width: 0; } -.ai-analysis-item { padding: 0.5rem 0.6rem; margin-bottom: 0.35rem; border-radius: 6px; font-size: 0.8rem; cursor: pointer; border: 1px solid transparent; transition: background 0.15s, border-color 0.15s; } -.ai-analysis-item:hover { background: var(--surface2); } -.ai-analysis-item.active { background: rgba(45,212,167,.12); border-color: var(--accent); } -.ai-analysis-item .aai-type { font-weight: 600; color: var(--accent); text-transform: capitalize; } -.ai-analysis-item .aai-date { color: var(--muted); font-size: 0.72rem; margin-top: 0.2rem; } -.ai-analysis-item .aai-model { color: var(--text-dim); font-size: 0.68rem; margin-top: 0.15rem; font-family: var(--font-mono); } +/* ── AI Analysis Controls ── */ +.ai-analysis-controls { display: flex; align-items: center; gap: 0.75rem; margin: 0.75rem 0 1.25rem; } +.report-selector-label { font-weight: 600; font-size: 0.88rem; color: var(--text); } +.report-selector { flex: 1; max-width: 600px; padding: 0.5rem 0.75rem; font-size: 0.88rem; font-family: var(--font-base); background: var(--surface2); color: var(--text); border: 1px solid var(--border); border-radius: 6px; cursor: pointer; transition: border-color 0.15s, background 0.15s; } +.report-selector:hover { background: var(--surface3); border-color: var(--accent); } +.report-selector:focus { outline: none; border-color: var(--accent); box-shadow: 0 0 0 3px rgba(45,212,167,0.1); } +.report-selector option { background: var(--surface); color: var(--text); padding: 0.5rem; } +.ai-analysis-main { margin-top: 0.5rem; } .md-content h1 { font-size: 1.4rem; font-weight: 700; margin: 1.2rem 0 .6rem; color: var(--text); } .md-content h2 { font-size: 1.15rem; font-weight: 600; margin: 1rem 0 .5rem; color: var(--text); border-bottom: 1px solid var(--border); padding-bottom: .3rem; } .md-content h3 { font-size: 1rem; font-weight: 600; margin: .8rem 0 .4rem; color: var(--text); } @@ -1303,8 +1414,8 @@ tr.clickable { cursor: pointer; } tr.clickable:hover { background: var(--table-h /* ── Responsive ── */ @media (max-width: 768px) { - .sidebar { width: 50px; min-width: 50px; } .sidebar a span:not(.nav-icon) { display: none; } + .sidebar { width: 50px; min-width: 50px; } .sidebar .nav-text { display: none; } .topnav .tn-stat { display: none; } } -@media print { .topnav, .sidebar { display: none; } .main { margin: 0; } .layout { display: block; } #themeToggle { display: none; } } +@media print { .topnav, .sidebar, #sidebarToggle { display: none; } .main { margin: 0; } .layout { display: block; } #themeToggle { display: none; } } `; diff --git a/src/init/templates.ts b/src/init/templates.ts index 12e4014..947a0e0 100644 --- a/src/init/templates.ts +++ b/src/init/templates.ts @@ -57,7 +57,7 @@ Append after severity: \`cwe:CWE-89\`, \`owasp:A03:2021\`, \`capec:CAPEC-66\`, \ 1. **Define once, reference everywhere.** \`@asset\`, \`@threat\`, \`@control\` with \`(#id)\` go in \`.guardlink/definitions${project.definitionsExt}\`. Source files use relationship verbs only (\`@mitigates\`, \`@exposes\`, etc.). 2. **Read definitions before adding.** Check for existing IDs first — avoid duplicates. -3. **Every \`@exposes\` needs a plan.** Match with \`@mitigates\` (fix exists), \`@accepts\` (risk acknowledged), or TODO. +3. **Every \`@exposes\` needs a response.** Match with \`@mitigates\` (fix exists), \`@audit\` (flag for review), or \`@accepts\` (human-only risk acceptance). AI agents must NEVER write \`@accepts\` — use \`@audit\` instead. 4. **Always annotate security-relevant code.** At minimum, add \`@comment\` to document intent. 5. **Use the full verb set.** \`@flows\` for data movement, \`@handles\` for data classification, \`@boundary\` for trust boundaries. @@ -67,6 +67,7 @@ Append after severity: \`cwe:CWE-89\`, \`owasp:A03:2021\`, \`capec:CAPEC-66\`, \ |-----------|------------| | New service/component | \`@asset\` in definitions, then reference in source | | Security gap exists | \`@exposes Asset to #threat\` | +| Risk with no fix yet | \`@audit Asset\` + \`@comment\` explaining potential controls | | Implementing a fix | \`@mitigates Asset against #threat using #control\` | | Processing sensitive data | \`@handles pii on Asset\` | | Proprietary algorithm | \`@shield:begin\` ... \`@shield:end\` | @@ -107,22 +108,37 @@ This project uses [GuardLink](https://guardlink.bugb.io) annotations in source c ### Key Rules -1. Definitions (\`@asset\`, \`@threat\`, \`@control\` with \`#id\`) go in \`.guardlink/definitions${project.definitionsExt}\` — read it before adding new ones. -2. Source files use relationship verbs only: \`@mitigates\`, \`@exposes\`, \`@accepts\`, \`@flows\`, \`@handles\`, \`@boundary\`, \`@comment\`. -3. Every \`@exposes\` needs a matching \`@mitigates\` or \`@accepts\`. -4. Always add at least \`@comment\` on security-relevant code. -5. Run \`guardlink validate .\` after making changes. +1. Do not delete or mangle existing GuardLink annotations. Treat them as part of the code. Edit only when intentionally changing the threat model. +2. Definitions (\`@asset\`, \`@threat\`, \`@control\` with \`(#id)\`) live in \`.guardlink/definitions${project.definitionsExt}\`. Reuse existing \`#id\`s — never redefine. +3. Source files use relationship verbs only: \`@mitigates\`, \`@exposes\`, \`@flows\`, \`@handles\`, \`@boundary\`, \`@comment\`, \`@validates\`, \`@audit\`, \`@owns\`, \`@assumes\`, \`@transfers\`. +4. Every \`@exposes\` should be paired with \`@mitigates\` or \`@audit\` (for human review). NEVER write \`@accepts\` — that is a human-only governance decision. +5. Prefer coupled blocks that tell a complete story near the code they describe (risk + control + flow + note). +6. Avoid \`@shield\` unless a human explicitly asks to hide code from AI — it creates blind spots. -### MCP Tools Available +### Workflow (while coding) -Use \`guardlink_lookup\` to check existing definitions. Use \`guardlink_validate\` after annotating. Use \`guardlink_suggest \` for recommendations. +- Before adding new annotations: skim \`.guardlink/definitions${project.definitionsExt}\` to reuse IDs. +- After changes: run \`guardlink validate .\` to catch syntax/dangling refs; run \`guardlink status .\` to check coverage; commit updates with the code. +- When adding features: add or update annotations in the same PR. -### Quick Syntax +### Tools + +- MCP tools (when available, e.g., Claude Code): \`guardlink_lookup\`, \`guardlink_validate\`, \`guardlink_status\`, \`guardlink_parse\`, \`guardlink_suggest \`. +- CLI equivalents (always available): \`guardlink validate .\`, \`guardlink status .\`, \`guardlink parse .\`. + +### Quick Syntax (common verbs) \`\`\` -@exposes Asset to #threat [P0] cwe:CWE-89 -- "description" -@mitigates Asset against #threat using #control -- "how" -@comment -- "security-relevant note" +@exposes App.API to #sqli [P0] cwe:CWE-89 -- "req.body.email concatenated into SQL" +@mitigates App.API against #sqli using #prepared-stmts -- "Parameterized queries via pg" +@audit App.API -- "Timing attack risk — needs human review to assess bcrypt constant-time comparison" +@flows User -> App.API via HTTPS -- "Login request path" +@boundary between #api and #db (#data-boundary) -- "App → DB trust change" +@handles pii on App.API -- "Processes email and session token" +@validates #prepared-stmts for App.API -- "sqlInjectionTest.ts ensures placeholders used" +@audit App.API -- "Token rotation logic needs crypto review" +@owns security-team for App.API -- "Team responsible for reviews" +@comment -- "Rate limit: 100 req/15min via express-rate-limit" \`\`\` `.trimStart(); } @@ -136,25 +152,29 @@ export function cursorRulesContent(project: ProjectInfo): string { This project uses GuardLink annotations in source code comments. -## Annotation Syntax -- @asset (#id) -- "description" -- @threat (#id) [P0|P1|P2|P3] cwe:CWE-NNN -- "description" -- @control (#id) -- "description" -- @mitigates against <#threat> using <#control> -- "how" -- @exposes to <#threat> [severity] cwe:CWE-NNN -- "what" -- @accepts <#threat> on -- "why" -- @flows -> via -- "details" -- @boundary between and (#id) -- "trust boundary" -- @handles on -- @shield:begin -- "reason" ... @shield:end - -## Rules -- All @asset, @threat, @control with (#id) go in .guardlink/definitions${project.definitionsExt}. Source files use only relationship verbs (@mitigates, @exposes, @accepts, @flows, etc). -- Read definitions file before adding — check for existing IDs first. -- Severity: P0=critical, P1=high, P2=medium, P3=low. Only P0-P3. -- External refs: cwe:CWE-89, owasp:A03:2021, capec:CAPEC-66 -- Every @exposes needs a matching @mitigates or @accepts. -- Run \`guardlink validate .\` to check annotations. +## Key Rules +- Preserve existing annotations — do not delete or mangle them. Edit only when intentionally changing the model. +- Definitions (@asset, @threat, @control with (#id)) live in .guardlink/definitions${project.definitionsExt}. Reuse IDs — never redefine. +- Source files use relationship verbs: @mitigates, @exposes, @flows, @handles, @boundary, @comment, @validates, @audit, @owns, @assumes, @transfers. +- Pair @exposes with @mitigates or @audit. NEVER write @accepts — that is a human-only governance decision. +- When no mitigation exists, use @audit to flag for human review + @comment to suggest potential controls. +- Avoid @shield unless a human explicitly asks to hide code from AI. + +## Workflow +- Before changes: skim .guardlink/definitions${project.definitionsExt}. +- After changes: run \`guardlink validate .\` and \`guardlink status .\`. + +## Quick Syntax +- @exposes App.API to #sqli [P0] cwe:CWE-89 -- "req.body.email concatenated into SQL" +- @mitigates App.API against #sqli using #prepared-stmts -- "Parameterized queries via pg" +- @audit App.API -- "Timing attack risk — needs human review" +- @flows User -> App.API via HTTPS -- "Login request" +- @boundary between #api and #db (#data-boundary) -- "Trust change" +- @handles pii on App.API -- "Processes email, token" +- @validates #prepared-stmts for App.API -- "CI test ensures placeholders" +- @audit App.API -- "Token rotation review" +- @owns security-team for App.API -- "Team responsible" +- @comment -- "Rate limit: 100 req/15min" `.trimStart(); } diff --git a/src/mcp/server.ts b/src/mcp/server.ts index 7ab4ccd..68bcbcd 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -201,11 +201,13 @@ export function createServer(): McpServer { { root: z.string().describe('Project root directory').default('.'), framework: z.enum(['stride', 'dread', 'pasta', 'attacker', 'rapid', 'general']).describe('Analysis framework').default('general'), - provider: z.string().describe('LLM provider: anthropic, openai, openrouter, deepseek (auto-detected from env)').optional(), + provider: z.string().describe('LLM provider: anthropic, openai, google, openrouter, deepseek (auto-detected from env)').optional(), model: z.string().describe('Model name override').optional(), custom_prompt: z.string().describe('Custom analysis prompt to replace the framework header').optional(), + web_search: z.boolean().describe('Enable web search grounding for real-time vulnerability intelligence (OpenAI)').optional(), + thinking: z.boolean().describe('Enable extended thinking / reasoning mode (Anthropic, DeepSeek)').optional(), }, - async ({ root, framework, provider, model: modelName, custom_prompt }) => { + async ({ root, framework, provider, model: modelName, custom_prompt, web_search, thinking }) => { const { model: threatModel } = await getModel(root); if (threatModel.annotations_parsed === 0) { return { @@ -246,6 +248,8 @@ export function createServer(): McpServer { llmConfig, customPrompt: custom_prompt, stream: false, + webSearch: web_search, + extendedThinking: thinking, }); return { diff --git a/src/parser/index.ts b/src/parser/index.ts index 60c92b7..0ea0fd0 100644 --- a/src/parser/index.ts +++ b/src/parser/index.ts @@ -8,4 +8,4 @@ export type { ParseProjectOptions } from './parse-project.js'; export { parseLine } from './parse-line.js'; export { normalizeName, resolveSeverity, unescapeDescription } from './normalize.js'; export { stripCommentPrefix, commentStyleForExt } from './comment-strip.js'; -export { findDanglingRefs, findUnmitigatedExposures } from './validate.js'; +export { findDanglingRefs, findUnmitigatedExposures, findAcceptedWithoutAudit, findAcceptedExposures } from './validate.js'; diff --git a/src/parser/validate.ts b/src/parser/validate.ts index a0da1fa..a850898 100644 --- a/src/parser/validate.ts +++ b/src/parser/validate.ts @@ -101,3 +101,53 @@ export function findUnmitigatedExposures(model: ThreatModel): ThreatModelExposur !covered.has(`${normalizeRef(e.asset)}::${normalizeRef(e.threat)}`) ); } + +/** + * Find @accepts annotations where the accepted asset has no corresponding @audit. + * Risk acceptance without an audit trail is a governance concern — the acceptance + * may be rubber-stamped (e.g., by an AI agent) rather than a deliberate human decision. + */ +export function findAcceptedWithoutAudit(model: ThreatModel): ParseDiagnostic[] { + const diagnostics: ParseDiagnostic[] = []; + + // Build set of audited assets (normalized) + const auditedAssets = new Set(); + for (const a of model.audits) { + auditedAssets.add(normalizeRef(a.asset)); + } + + for (const acc of model.acceptances) { + const assetNorm = normalizeRef(acc.asset); + if (!auditedAssets.has(assetNorm)) { + diagnostics.push({ + level: 'warning', + message: `@accepts ${acc.threat} on ${acc.asset} without @audit — risk acceptance should be paired with @audit for traceability`, + file: acc.location.file, + line: acc.location.line, + }); + } + } + + return diagnostics; +} + +/** + * Find exposures that are covered ONLY by @accepts (no real @mitigates). + * These are "accepted but unmitigated" — the risk exists and no control is in place. + * Useful for dashboards and reports to distinguish real mitigations from risk acceptance. + */ +export function findAcceptedExposures(model: ThreatModel): ThreatModelExposure[] { + const mitigated = new Set(); + for (const m of model.mitigations) { + mitigated.add(`${normalizeRef(m.asset)}::${normalizeRef(m.threat)}`); + } + const accepted = new Set(); + for (const a of model.acceptances) { + accepted.add(`${normalizeRef(a.asset)}::${normalizeRef(a.threat)}`); + } + + return model.exposures.filter(e => { + const key = `${normalizeRef(e.asset)}::${normalizeRef(e.threat)}`; + return accepted.has(key) && !mitigated.has(key); + }); +} diff --git a/src/report/report.ts b/src/report/report.ts index d192700..9f0898a 100644 --- a/src/report/report.ts +++ b/src/report/report.ts @@ -55,6 +55,10 @@ export function generateReport(model: ThreatModel): string { if (severityCounts.low > 0) lines.push(`| ↳ Low (P3) | ${severityCounts.low} |`); lines.push(`| Data flows | ${model.flows.length} |`); lines.push(`| Trust boundaries | ${model.boundaries.length} |`); + lines.push(`| Risk transfers | ${model.transfers.length} |`); + lines.push(`| Validations | ${model.validations.length} |`); + lines.push(`| Ownership records | ${model.ownership.length} |`); + if (model.shields.length > 0) lines.push(`| Shielded regions | ${model.shields.length} |`); lines.push(''); // ── Threat Model Diagram ── @@ -109,6 +113,20 @@ export function generateReport(model: ThreatModel): string { lines.push(''); } + // ── Trust Boundaries ── + if (model.boundaries.length > 0) { + lines.push('## 🔒 Trust Boundaries'); + lines.push(''); + lines.push('| Side A | Side B | Boundary ID | Description | Location |'); + lines.push('|--------|--------|-------------|-------------|----------|'); + for (const b of model.boundaries) { + const desc = b.description ? truncate(b.description, 50) : '—'; + const id = b.id || '—'; + lines.push(`| ${b.asset_a} | ${b.asset_b} | ${id} | ${desc} | ${b.location.file}:${b.location.line} |`); + } + lines.push(''); + } + // ── Data Flows ── if (model.flows.length > 0) { lines.push('## 📊 Data Flows'); @@ -136,6 +154,43 @@ export function generateReport(model: ThreatModel): string { lines.push(''); } + // ── Risk Transfers ── + if (model.transfers.length > 0) { + lines.push('## 🔀 Risk Transfers'); + lines.push(''); + lines.push('| Source | Threat | Target | Description | Location |'); + lines.push('|--------|--------|--------|-------------|----------|'); + for (const t of model.transfers) { + const desc = t.description ? truncate(t.description, 50) : '—'; + lines.push(`| ${t.source} | ${t.threat} | ${t.target} | ${desc} | ${t.location.file}:${t.location.line} |`); + } + lines.push(''); + } + + // ── Validations ── + if (model.validations.length > 0) { + lines.push('## ✔ Validations'); + lines.push(''); + lines.push('| Control | Asset | Description | Location |'); + lines.push('|---------|-------|-------------|----------|'); + for (const v of model.validations) { + const desc = v.description ? truncate(v.description, 50) : '—'; + lines.push(`| ${v.control} | ${v.asset} | ${desc} | ${v.location.file}:${v.location.line} |`); + } + lines.push(''); + } + + // ── Ownership ── + if (model.ownership.length > 0) { + lines.push('## 👤 Ownership'); + lines.push(''); + for (const o of model.ownership) { + const desc = o.description ? ` — ${o.description}` : ''; + lines.push(`- **${o.asset}** owned by **${o.owner}**${desc} (${o.location.file}:${o.location.line})`); + } + lines.push(''); + } + // ── Audit Items ── if (model.audits.length > 0) { lines.push('## 🔍 Audit Items'); @@ -160,6 +215,19 @@ export function generateReport(model: ThreatModel): string { lines.push(''); } + // ── Shielded Regions ── + if (model.shields.length > 0) { + lines.push('## 🛡️ Shielded Regions'); + lines.push(''); + lines.push('Code regions where annotations are intentionally suppressed via `@shield`.'); + lines.push(''); + for (const s of model.shields) { + const reason = s.reason || 'No reason provided'; + lines.push(`- ${reason} (${s.location.file}:${s.location.line})`); + } + lines.push(''); + } + // ── Developer Comments ── if (model.comments.length > 0) { lines.push('## 💬 Developer Comments'); diff --git a/src/tui/commands.ts b/src/tui/commands.ts index ea2d4fb..06e25a6 100644 --- a/src/tui/commands.ts +++ b/src/tui/commands.ts @@ -7,18 +7,18 @@ import { resolve, basename, isAbsolute } from 'node:path'; import { readFileSync, existsSync, writeFileSync, mkdirSync } from 'node:fs'; -import { parseProject, findDanglingRefs, findUnmitigatedExposures } from '../parser/index.js'; +import { parseProject, findDanglingRefs, findUnmitigatedExposures, findAcceptedWithoutAudit, findAcceptedExposures } from '../parser/index.js'; import { initProject, detectProject, promptAgentSelection } from '../init/index.js'; import { generateReport, generateMermaid } from '../report/index.js'; import { generateDashboardHTML } from '../dashboard/index.js'; -import { computeStats, computeSeverity } from '../dashboard/data.js'; -import { generateThreatReport, serializeModel, listThreatReports, loadThreatReportsForDashboard, FRAMEWORK_LABELS, FRAMEWORK_PROMPTS, buildUserMessage, type AnalysisFramework } from '../analyze/index.js'; +import { computeStats, computeSeverity, computeExposures } from '../dashboard/data.js'; +import { generateThreatReport, serializeModel, listThreatReports, loadThreatReportsForDashboard, FRAMEWORK_LABELS, FRAMEWORK_PROMPTS, buildUserMessage, buildProjectContext, extractCodeSnippets, type AnalysisFramework } from '../analyze/index.js'; import { diffModels, formatDiff, parseAtRef } from '../diff/index.js'; import { generateSarif } from '../analyzer/index.js'; -import type { ThreatModel, ParseDiagnostic } from '../types/index.js'; -import { C, severityBadge, severityText, severityOrder, computeGrade, gradeColored, formatTable, readCodeContext, trunc, bar, fileLink } from './format.js'; +import type { ThreatModel, ParseDiagnostic, ThreatModelExposure } from '../types/index.js'; +import { C, severityBadge, severityText, severityTextPad, severityOrder, computeGrade, gradeColored, formatTable, readCodeContext, trunc, bar, fileLink, fileLinkTrunc, cleanCliArtifacts } from './format.js'; import { resolveLLMConfig, saveTuiConfig, loadTuiConfig } from './config.js'; -import { AGENTS, parseAgentFlag, launchAgent, copyToClipboard, buildAnnotatePrompt, type AgentEntry } from '../agents/index.js'; +import { AGENTS, parseAgentFlag, launchAgent, launchAgentInline, copyToClipboard, buildAnnotatePrompt, type AgentEntry } from '../agents/index.js'; import { describeConfigSource } from '../agents/config.js'; // ─── Shared context ────────────────────────────────────────────────── @@ -46,6 +46,8 @@ export interface TuiContext { rl: import('node:readline').Interface; /** Guard: true while ask() is waiting for sub-prompt input */ _askActive?: boolean; + /** Cached exposure list from last /exposures call (used by /show) */ + lastExposures: ThreatModelExposure[]; } /** Re-parse the project and update context */ @@ -83,14 +85,17 @@ export function cmdHelp(): void { ['/status', 'Risk grade + summary stats'], ['/validate [--strict]', 'Check for syntax errors + dangling refs'], ['', ''], + ['/exposures [--all]', 'List open exposures by severity (filter: --asset --severity --threat --file)'], + ['/show ', 'Detail view + code context for an exposure (from /exposures list)'], + ['/scan', 'Annotation coverage scanner — find unannotated symbols'], ['/assets', 'Asset tree with threat/control counts'], ['/files', 'Annotated file tree with exposure counts'], ['/view ', 'Show all annotations in a file with code context'], ['', ''], - ['/threat-report ', 'AI threat report (stride|dread|pasta|attacker|rapid|general)'], + ['/threat-report ', 'AI threat report (stride|dread|pasta|attacker|rapid|general|custom)'], ['/threat-reports', 'List saved AI threat reports'], ['/annotate ', 'Launch coding agent to annotate codebase'], - ['/model', 'Set AI provider + API key'], + ['/model', 'Set AI provider (API or CLI agent: Claude Code, Codex, Gemini)'], ['(freeform text)', 'Chat about your threat model with AI'], ['', ''], ['/report', 'Generate markdown + JSON report'], @@ -98,6 +103,7 @@ export function cmdHelp(): void { ['/diff [ref]', 'Compare model against a git ref (default: HEAD~1)'], ['/sarif [-o file]', 'Export SARIF 2.1.0 for GitHub / VS Code'], ['', ''], + ['/gal', 'GAL annotation language guide'], ['/help', 'This help'], ['/quit', 'Exit'], ]; @@ -328,6 +334,131 @@ export function cmdStatus(ctx: TuiContext): void { console.log(''); } +// ─── /exposures ────────────────────────────────────────────────────── + +export function cmdExposures(args: string, ctx: TuiContext): void { + if (!ctx.model) { + console.log(C.warn(' No threat model. Run /parse first.')); + return; + } + + const rows = computeExposures(ctx.model); + let filtered = rows.filter(r => !r.mitigated && !r.accepted); // open only by default + + // Parse flags + const parts = args.split(/\s+/).filter(Boolean); + let showAll = false; + for (let i = 0; i < parts.length; i++) { + const flag = parts[i]; + const val = parts[i + 1]; + if (flag === '--asset' && val) { filtered = filtered.filter(r => r.asset.includes(val)); i++; } + else if (flag === '--severity' && val) { filtered = filtered.filter(r => r.severity === val.toLowerCase()); i++; } + else if (flag === '--file' && val) { filtered = filtered.filter(r => r.file.includes(val)); i++; } + else if (flag === '--threat' && val) { filtered = filtered.filter(r => r.threat.includes(val)); i++; } + else if (flag === '--all') { filtered = rows; showAll = true; } + } + + // Sort by severity + filtered.sort((a, b) => severityOrder(a.severity) - severityOrder(b.severity)); + + // Cache for /show + ctx.lastExposures = filtered.map(r => { + const original = ctx.model!.exposures.find(e => + e.asset === r.asset && e.threat === r.threat && e.location.file === r.file && e.location.line === r.line + ); + return original!; + }).filter(Boolean); + + if (filtered.length === 0) { + console.log(C.green(' No matching exposures found.')); + return; + } + + console.log(''); + + const termWidth = process.stdout.columns || 100; + const header = ` ${C.dim('#'.padEnd(4))}${C.dim('SEVERITY'.padEnd(12))}${C.dim('ASSET'.padEnd(18))}${C.dim('THREAT'.padEnd(20))}${C.dim('FILE'.padEnd(30))}${C.dim('LINE')}`; + console.log(header); + console.log(C.dim(' ' + '─'.repeat(Math.min(termWidth - 4, 96)))); + + for (const [i, r] of filtered.entries()) { + const num = String(i + 1).padEnd(4); + const sev = severityTextPad(r.severity, 12); + const asset = trunc(r.asset, 16).padEnd(18); + const threat = trunc(r.threat, 18).padEnd(20); + const linkedFile = fileLinkTrunc(r.file, 28, r.line, ctx.root); + const filePad = ' '.repeat(Math.max(0, 30 - trunc(r.file, 28).length)); + const line = ` ${num}${sev}${asset}${threat}${linkedFile}${filePad}${r.line}`; + console.log(line); + } + + console.log(''); + const countMsg = showAll + ? ` ${filtered.length} exposure(s) total` + : ` ${filtered.length} open exposure(s)`; + console.log(C.dim(countMsg + ' · /show for detail · --asset --severity --threat --file to filter')); + console.log(''); +} + +// ─── /show ─────────────────────────────────────────────────────────── + +export function cmdShow(args: string, ctx: TuiContext): void { + const num = parseInt(args.trim(), 10); + if (!num || num < 1 || num > ctx.lastExposures.length) { + console.log(C.warn(` Usage: /show where n is 1-${ctx.lastExposures.length || '?'}. Run /exposures first.`)); + return; + } + + const exp = ctx.lastExposures[num - 1]; + console.log(''); + console.log(` ${C.cyan('┌')} ${exp.asset} → ${exp.threat} ${severityBadge(exp.severity)}`); + if (exp.description) { + console.log(` ${C.cyan('│')} ${exp.description}`); + } + if (exp.external_refs.length > 0) { + console.log(` ${C.cyan('│')} ${C.dim(exp.external_refs.join(' · '))}`); + } + console.log(` ${C.cyan('│')} ${C.dim(fileLink(exp.location.file, exp.location.line, ctx.root))}`); + console.log(` ${C.cyan('│')}`); + + const { lines } = readCodeContext(exp.location.file, exp.location.line, ctx.root); + for (const l of lines) { + console.log(` ${C.cyan('│')} ${l}`); + } + console.log(` ${C.cyan('└')}`); + console.log(''); +} + +// ─── /scan ─────────────────────────────────────────────────────────── + +export function cmdScan(ctx: TuiContext): void { + if (!ctx.model) { + console.log(C.warn(' No threat model. Run /parse first.')); + return; + } + + const cov = ctx.model.coverage; + const pct = cov.coverage_percent; + console.log(''); + console.log(` ${C.bold('Coverage:')} ${cov.annotated_symbols}/${cov.total_symbols} symbols (${pct}%)`); + + const unannotated = cov.unannotated_critical || []; + if (unannotated.length === 0) { + console.log(C.green(' All security-relevant symbols are annotated!')); + } else { + console.log(C.warn(` ${unannotated.length} unannotated symbol(s):`)); + console.log(''); + const show = unannotated.slice(0, 25); + for (const u of show) { + console.log(` ${C.dim(fileLink(u.file, u.line, ctx.root))} ${u.kind} ${C.bold(u.name)}`); + } + if (unannotated.length > 25) { + console.log(C.dim(` ... and ${unannotated.length - 25} more`)); + } + } + console.log(''); +} + // ─── /assets ───────────────────────────────────────────────────────── export function cmdAssets(ctx: TuiContext): void { @@ -663,11 +794,18 @@ export async function cmdValidate(ctx: TuiContext): Promise { // Dangling refs const danglingDiags = findDanglingRefs(model); - const allDiags = [...diagnostics, ...danglingDiags]; + + // Check for @accepts without @audit (governance concern) + const acceptAuditDiags = findAcceptedWithoutAudit(model); + + const allDiags = [...diagnostics, ...danglingDiags, ...acceptAuditDiags]; // Unmitigated exposures const unmitigated = findUnmitigatedExposures(model); + // Accepted-but-unmitigated exposures + const acceptedOnly = findAcceptedExposures(model); + // Print diagnostics const errors = allDiags.filter(d => d.level === 'error'); const warnings = allDiags.filter(d => d.level === 'warning'); @@ -690,14 +828,24 @@ export async function cmdValidate(ctx: TuiContext): Promise { } } + if (acceptedOnly.length > 0) { + console.log(''); + console.log(C.warn(` ⚡ ${acceptedOnly.length} accepted-but-unmitigated exposure(s) (no control in code):`)); + for (const a of acceptedOnly) { + const sev = a.severity ? severityBadge(a.severity) : C.dim('unset'); + console.log(` ${sev} ${a.asset} → ${a.threat} ${C.dim(fileLink(a.location.file, a.location.line, ctx.root))}`); + } + } + console.log(''); - if (errors.length === 0 && unmitigated.length === 0) { + if (errors.length === 0 && unmitigated.length === 0 && acceptedOnly.length === 0) { console.log(C.success(' ✓ All annotations valid, no unmitigated exposures.')); } else { const parts: string[] = []; if (errors.length > 0) parts.push(`${errors.length} error(s)`); if (warnings.length > 0) parts.push(`${warnings.length} warning(s)`); if (unmitigated.length > 0) parts.push(`${unmitigated.length} unmitigated`); + if (acceptedOnly.length > 0) parts.push(`${acceptedOnly.length} accepted without mitigation`); console.log(` ${parts.join(', ')}`); } } catch (err: any) { @@ -784,11 +932,17 @@ export async function cmdSarif(args: string, ctx: TuiContext): Promise { // ─── /model ────────────────────────────────────────────────────────── -const CLI_AGENT_OPTIONS = [ - { id: 'claude-code', name: 'Claude Code' }, - { id: 'codex', name: 'Codex CLI' }, - { id: 'gemini', name: 'Gemini CLI' }, -] as const; +interface ModelOption { + id: string; + name: string; + desc: string; +} + +const CLI_AGENT_OPTIONS: ModelOption[] = [ + { id: 'claude-code', name: 'Claude Code', desc: 'Anthropic\'s coding agent (claude cli)' }, + { id: 'codex', name: 'Codex CLI', desc: 'OpenAI\'s coding agent (codex cli)' }, + { id: 'gemini', name: 'Gemini CLI', desc: 'Google\'s coding agent (gemini cli)' }, +]; const CLI_AGENT_NAMES: Record = { 'claude-code': 'Claude Code', @@ -796,6 +950,92 @@ const CLI_AGENT_NAMES: Record = { 'gemini': 'Gemini CLI', }; +/** Provider model catalogs — popular models per provider, ordered by capability */ +const PROVIDER_MODELS: Record = { + anthropic: [ + { id: 'claude-sonnet-4-6', name: 'Claude Sonnet 4.6', desc: 'Latest, frontier coding & agents' }, + { id: 'claude-opus-4-6', name: 'Claude Opus 4.6', desc: 'Most intelligent, complex reasoning' }, + { id: 'claude-sonnet-4-5', name: 'Claude Sonnet 4.5', desc: 'Previous gen, strong all-rounder' }, + { id: 'claude-opus-4-5', name: 'Claude Opus 4.5', desc: 'Previous gen, deep analysis' }, + { id: 'claude-haiku-4-5', name: 'Claude Haiku 4.5', desc: 'Fastest, lowest cost' }, + ], + openai: [ + { id: 'gpt-5.2', name: 'GPT-5.2', desc: 'Latest flagship, smartest & most precise' }, + { id: 'gpt-5.2-pro', name: 'GPT-5.2 Pro', desc: 'Enhanced GPT-5.2 for complex tasks' }, + { id: 'gpt-5', name: 'GPT-5', desc: 'Frontier model with reasoning' }, + { id: 'gpt-5-mini', name: 'GPT-5 Mini', desc: 'Fast and affordable' }, + { id: 'gpt-5-nano', name: 'GPT-5 Nano', desc: 'Fastest, lowest cost' }, + { id: 'gpt-5.1-codex', name: 'GPT-5.1 Codex', desc: 'Optimized for agentic coding' }, + { id: 'o3', name: 'o3', desc: 'Reasoning model, complex analysis' }, + { id: 'o4-mini', name: 'o4-mini', desc: 'Fast reasoning model' }, + { id: 'gpt-4.1', name: 'GPT-4.1', desc: 'Previous gen flagship' }, + { id: 'gpt-4.1-mini', name: 'GPT-4.1 Mini', desc: 'Previous gen, fast' }, + ], + google: [ + { id: 'gemini-2.5-flash', name: 'Gemini 2.5 Flash', desc: 'Best price-performance, reasoning' }, + { id: 'gemini-2.5-pro', name: 'Gemini 2.5 Pro', desc: 'Most advanced, deep reasoning & coding' }, + { id: 'gemini-2.5-flash-lite', name: 'Gemini 2.5 Flash-Lite', desc: 'Fastest, most budget-friendly' }, + { id: 'gemini-3-flash-preview', name: 'Gemini 3 Flash', desc: 'Preview: frontier-class at low cost' }, + { id: 'gemini-3-pro-preview', name: 'Gemini 3 Pro', desc: 'Preview: state-of-the-art reasoning' }, + { id: 'gemini-3.1-pro-preview', name: 'Gemini 3.1 Pro', desc: 'Preview: advanced agentic & coding' }, + ], + deepseek: [ + { id: 'deepseek-chat', name: 'DeepSeek V3.2', desc: 'General purpose, fast (128K context)' }, + { id: 'deepseek-reasoner', name: 'DeepSeek R1', desc: 'Thinking mode, best for analysis' }, + ], + openrouter: [ + { id: 'anthropic/claude-sonnet-4-6', name: 'Claude Sonnet 4.6', desc: 'Anthropic via OpenRouter' }, + { id: 'anthropic/claude-opus-4-6', name: 'Claude Opus 4.6', desc: 'Anthropic via OpenRouter' }, + { id: 'openai/gpt-5.2', name: 'GPT-5.2', desc: 'OpenAI via OpenRouter' }, + { id: 'openai/o3', name: 'o3', desc: 'OpenAI reasoning via OpenRouter' }, + { id: 'google/gemini-2.5-pro', name: 'Gemini 2.5 Pro', desc: 'Google via OpenRouter' }, + { id: 'google/gemini-2.5-flash', name: 'Gemini 2.5 Flash', desc: 'Google via OpenRouter' }, + { id: 'deepseek/deepseek-r1', name: 'DeepSeek R1', desc: 'DeepSeek via OpenRouter' }, + ], + ollama: [ + { id: 'llama3.2', name: 'Llama 3.2', desc: 'Meta, good general purpose' }, + { id: 'qwen2.5-coder:32b', name: 'Qwen 2.5 Coder 32B', desc: 'Best local coding model' }, + { id: 'deepseek-r1:32b', name: 'DeepSeek R1 32B', desc: 'Local reasoning model' }, + { id: 'gemma3:27b', name: 'Gemma 3 27B', desc: 'Google, strong local model' }, + { id: 'mistral', name: 'Mistral 7B', desc: 'Lightweight, fast' }, + ], +}; + +/** Helper to display a numbered model selection menu and return the chosen model ID */ +async function pickModel(ctx: TuiContext, provider: string): Promise { + const models = PROVIDER_MODELS[provider]; + if (!models || models.length === 0) { + // Fallback to free-text for unknown providers + const model = await ask(ctx, ' Model name: '); + return model || null; + } + + console.log(''); + console.log(' Select model:'); + for (let i = 0; i < models.length; i++) { + const m = models[i]; + console.log(` ${C.bold(String(i + 1))} ${m.name.padEnd(24)} ${C.dim(m.desc)}`); + } + console.log(` ${C.bold(String(models.length + 1))} ${C.dim('Custom (enter model ID manually)')}`); + console.log(''); + + const choice = await ask(ctx, ` Model [1-${models.length + 1}]: `); + const idx = parseInt(choice, 10) - 1; + + if (idx < 0 || idx > models.length) { + console.log(C.warn(' Cancelled.')); + return null; + } + + if (idx === models.length) { + // Custom model + const custom = await ask(ctx, ' Model ID: '); + return custom || null; + } + + return models[idx].id; +} + export async function cmdModel(ctx: TuiContext): Promise { const current = resolveLLMConfig(ctx.root); const tuiCfg = loadTuiConfig(ctx.root); @@ -841,7 +1081,10 @@ export async function cmdModel(ctx: TuiContext): Promise { // ── CLI Agent selection ── console.log(''); console.log(' Select CLI Agent:'); - CLI_AGENT_OPTIONS.forEach((a, i) => console.log(` ${C.bold(String(i + 1))} ${a.name}`)); + for (let i = 0; i < CLI_AGENT_OPTIONS.length; i++) { + const a = CLI_AGENT_OPTIONS[i]; + console.log(` ${C.bold(String(i + 1))} ${a.name.padEnd(16)} ${C.dim(a.desc)}`); + } console.log(''); const agentChoice = await ask(ctx, ` Agent [1-${CLI_AGENT_OPTIONS.length}]: `); @@ -864,10 +1107,20 @@ export async function cmdModel(ctx: TuiContext): Promise { console.log(''); } else { // ── API provider selection ── - const providers = ['anthropic', 'openai', 'deepseek', 'openrouter', 'ollama'] as const; + const providers: ModelOption[] = [ + { id: 'anthropic', name: 'Anthropic', desc: 'Claude Sonnet 4.6, Opus 4.6, Haiku 4.5' }, + { id: 'openai', name: 'OpenAI', desc: 'GPT-5.2, o3, o4-mini, GPT-5.1 Codex' }, + { id: 'google', name: 'Google', desc: 'Gemini 2.5 Flash/Pro, Gemini 3 Pro' }, + { id: 'deepseek', name: 'DeepSeek', desc: 'DeepSeek V3.2, R1 reasoning' }, + { id: 'openrouter', name: 'OpenRouter', desc: 'Multi-provider gateway' }, + { id: 'ollama', name: 'Ollama', desc: 'Local models (Llama, Qwen, Gemma)' }, + ]; console.log(''); console.log(' Select provider:'); - providers.forEach((p, i) => console.log(` ${C.bold(String(i + 1))} ${p}`)); + for (let i = 0; i < providers.length; i++) { + const p = providers[i]; + console.log(` ${C.bold(String(i + 1))} ${p.name.padEnd(14)} ${C.dim(p.desc)}`); + } console.log(''); const choice = await ask(ctx, ` Provider [1-${providers.length}]: `); @@ -877,11 +1130,16 @@ export async function cmdModel(ctx: TuiContext): Promise { return; } - const provider = providers[idx] as any; + const provider = providers[idx].id as import('../analyze/llm.js').LLMProvider; + + // Model selection — numbered menu + const modelId = await pickModel(ctx, provider); + if (!modelId) return; // API key let apiKey = ''; if (provider !== 'ollama') { + console.log(''); apiKey = await ask(ctx, ' API Key: '); if (!apiKey) { console.log(C.warn(' Cancelled — no API key provided.')); @@ -891,27 +1149,20 @@ export async function cmdModel(ctx: TuiContext): Promise { apiKey = 'ollama-local'; } - // Model selection - const defaults: Record = { - anthropic: 'claude-sonnet-4-5-20250929', - openai: 'gpt-4o', - openrouter: 'anthropic/claude-sonnet-4-5-20250929', - deepseek: 'deepseek-chat', - ollama: 'llama3.2', - }; - const model = await ask(ctx, ` Model [${defaults[provider]}]: `); - saveTuiConfig(ctx.root, { aiMode: 'api', provider, - model: model || defaults[provider], + model: modelId, apiKey, }); const displayKey = apiKey.length > 8 ? apiKey.slice(0, 6) + '•'.repeat(8) : '•'.repeat(8); + // Find display name for the model + const modelEntry = PROVIDER_MODELS[provider]?.find(m => m.id === modelId); + const modelDisplay = modelEntry ? `${modelEntry.name} (${modelId})` : modelId; console.log(''); - console.log(` ${C.success('✓')} Configured: ${C.bold(model || defaults[provider])} (${provider})`); - console.log(` Key: ${displayKey}`); + console.log(` ${C.success('✓')} Configured: ${C.bold(modelDisplay)}`); + console.log(` Provider: ${providers[idx].name} Key: ${displayKey}`); console.log(C.dim(' Saved to .guardlink/config.json')); console.log(''); } @@ -919,101 +1170,196 @@ export async function cmdModel(ctx: TuiContext): Promise { // ─── /threat-report ────────────────────────────────────────────────── +/** + * Build the full analysis prompt for CLI agents. + * Includes system prompt, serialized model, project context, code snippets, + * and instructions to read source code. + */ +function buildAgentAnalysisPrompt( + root: string, + model: ThreatModel, + fw: AnalysisFramework, + customPrompt: string | undefined, + reportLabel: string, +): string { + const modelJson = serializeModel(model); + const projectContext = buildProjectContext(root); + const codeSnippets = extractCodeSnippets(root, model); + const systemPrompt = FRAMEWORK_PROMPTS[fw]; + const userMessage = buildUserMessage(modelJson, fw, customPrompt, projectContext || undefined, codeSnippets || undefined); + + return `You are analyzing a codebase with GuardLink security annotations. +You have access to the full source code in the current directory. + +${systemPrompt} + +## Task +Read the source code and GuardLink annotations, then produce a thorough ${reportLabel}. + +## Threat Model (serialized from annotations) +${userMessage} + +## Instructions +1. Read the actual source files to understand the code — don't just rely on the serialized model above +2. Cross-reference the annotations with the real code to validate findings +3. Produce the full report as markdown +4. Be specific — reference actual files, functions, and line numbers from the codebase +5. Output ONLY the markdown report content — do NOT add any metadata comments, save confirmations, or file path messages +6. Do NOT include lines like "Generated by...", "Agent:", "Project:", or "The report file write was blocked..."`; +} + +/** + * Save inline agent output as a threat report markdown file. + */ +function saveInlineReport( + root: string, + content: string, + fw: AnalysisFramework, + agentName: string, + project: string, + annotationCount: number, +): string { + const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19); + const reportsDir = resolve(root, '.guardlink', 'threat-reports'); + if (!existsSync(reportsDir)) mkdirSync(reportsDir, { recursive: true }); + + const filename = `${timestamp}-${fw}.md`; + const filepath = resolve(reportsDir, filename); + + const cleanedContent = cleanCliArtifacts(content); + + const header = `--- +framework: ${fw} +label: ${FRAMEWORK_LABELS[fw]} +model: ${agentName} +timestamp: ${new Date().toISOString()} +project: ${project} +annotations: ${annotationCount} +--- + +# ${FRAMEWORK_LABELS[fw]} + +> Generated by \`guardlink threat-report ${fw}\` on ${new Date().toISOString().slice(0, 10)} +> Agent: ${agentName} | Project: ${project} | Annotations: ${annotationCount} + +`; + + writeFileSync(filepath, header + cleanedContent + '\n'); + return `.guardlink/threat-reports/${filename}`; +} + export async function cmdThreatReport(args: string, ctx: TuiContext): Promise { if (!ctx.model) { console.log(C.warn(' No threat model. Run /parse first.')); return; } - const { agent, cleanArgs } = parseAgentFlag(args); - const framework = cleanArgs.trim().toLowerCase() || ''; + // Parse any explicit --agent flag override + const { agent: flagAgent, cleanArgs } = parseAgentFlag(args); + const input = cleanArgs.trim(); const validFrameworks = ['stride', 'dread', 'pasta', 'attacker', 'rapid', 'general']; - if (!framework) { + // Show help when no arguments given + if (!input) { console.log(''); console.log(` ${C.bold('Threat report frameworks:')}`); for (const fw of validFrameworks) { console.log(` ${C.bold('/threat-report ' + fw.padEnd(12))} ${C.dim(FRAMEWORK_LABELS[fw as AnalysisFramework])}`); } console.log(''); - console.log(C.dim(' Flags: --claude-code --codex --gemini --cursor --windsurf --clipboard')); - console.log(C.dim(' Without flag: uses configured API provider (see /model)')); - console.log(C.dim(' Example: /threat-report stride --claude-code')); + console.log(` ${C.bold('Custom prompt:')}`); + console.log(C.dim(' /threat-report Uses your text as the analysis prompt')); + console.log(C.dim(' Example: /threat-report Create a comprehensive report mixing STRIDE and DREAD')); + console.log(''); + console.log(C.dim(' Uses the AI provider configured via /model (API or CLI agent).')); + console.log(C.dim(' Override with: --claude-code --codex --gemini --clipboard')); console.log(''); return; } - const isStandard = validFrameworks.includes(framework); - const fw = (isStandard ? framework : 'general') as AnalysisFramework; - const customPrompt = isStandard ? undefined : cleanArgs.trim(); + // Determine framework vs custom prompt + const inputLower = input.toLowerCase(); + const isStandard = validFrameworks.includes(inputLower); + const fw = (isStandard ? inputLower : 'general') as AnalysisFramework; + const customPrompt = isStandard ? undefined : input; + const reportLabel = customPrompt ? 'Custom Threat Analysis' : FRAMEWORK_LABELS[fw]; - // ── Agent path: spawn CLI agent or copy to clipboard ── - if (agent) { - const modelJson = serializeModel(ctx.model); - const systemPrompt = FRAMEWORK_PROMPTS[fw]; - const userMessage = buildUserMessage(modelJson, fw, customPrompt); + // ── Resolve execution method ── + // Priority: explicit --flag > /model config > env-var API + const tuiCfg = loadTuiConfig(ctx.root); - const analysisPrompt = `You are analyzing a codebase with GuardLink security annotations. -You have access to the full source code in the current directory. + // Resolve the agent to use (flag override or configured CLI agent) + let agent: AgentEntry | null = flagAgent; + if (!agent && tuiCfg?.aiMode === 'cli-agent' && tuiCfg?.cliAgent) { + agent = AGENTS.find(a => a.id === tuiCfg.cliAgent) || null; + } -${systemPrompt} + // ── Path 1: CLI Agent (inline, non-interactive) ── + if (agent && agent.cmd) { + const analysisPrompt = buildAgentAnalysisPrompt(ctx.root, ctx.model, fw, customPrompt, reportLabel); -## Task -Read the source code and GuardLink annotations, then produce a thorough ${FRAMEWORK_LABELS[fw]}. + console.log(` ${C.dim('Generating')} ${reportLabel} ${C.dim('via')} ${agent.name} ${C.dim('(inline)...')}`); + console.log(C.dim(` Annotations: ${ctx.model.annotations_parsed} | Exposures: ${ctx.model.exposures.length}`)); + console.log(''); -## Threat Model (serialized from annotations) -${userMessage} + const result = await launchAgentInline( + agent, + analysisPrompt, + ctx.root, + (text) => process.stdout.write(text), + { autoYes: true }, + ); -## Instructions -1. Read the actual source files to understand the code — don't just rely on the serialized model above -2. Cross-reference the annotations with the real code to validate findings -3. Produce the full report as markdown -4. Save the output to .guardlink/threat-reports/ with a timestamped filename -5. Be specific — reference actual files, functions, and line numbers from the codebase`; + if (result.error) { + console.log(C.error(`\n ✗ ${result.error}`)); + console.log(''); + return; + } - console.log(` ${C.dim('Sending')} ${FRAMEWORK_LABELS[fw]} ${C.dim('to')} ${agent.name}${C.dim('...')}`); - console.log(''); + process.stdout.write('\n'); - // Use shared launcher — foreground for terminal agents, IDE open for others - if (agent.cmd) { - const copied = copyToClipboard(analysisPrompt); - if (copied) { - console.log(C.success(` ✓ Prompt copied to clipboard (${analysisPrompt.length.toLocaleString()} chars)`)); - } - console.log(` ${C.dim('Launching')} ${agent.name} ${C.dim('in foreground...')}`); + // Save the agent's output as a report + if (result.content.trim()) { + const savedTo = saveInlineReport( + ctx.root, result.content, fw, agent.name, + ctx.model.project, ctx.model.annotations_parsed, + ); console.log(''); - const result = launchAgent(agent, analysisPrompt, ctx.root); - if (result.error) { - console.log(C.error(` ✗ ${result.error}`)); - } else { - console.log(`\n ${C.success('✓')} ${agent.name} session ended.`); - console.log(` Run ${C.bold('/threat-reports')} to see saved results.`); - } - } else { - const result = launchAgent(agent, analysisPrompt, ctx.root); - if (result.clipboardCopied) { - console.log(C.success(` ✓ Prompt copied to clipboard (${analysisPrompt.length.toLocaleString()} chars)`)); - } - if (result.launched && agent.app) { - console.log(` ${C.success('✓')} ${agent.name} launched with project: ${ctx.projectName}`); - console.log(`\n Paste (Cmd+V) the prompt in ${agent.name}.`); - } else if (result.error) { - console.log(C.error(` ✗ ${result.error}`)); - } + console.log(` ${C.success('✓')} Report saved to ${savedTo}`); } console.log(''); return; } - // ── API path: direct LLM call ── + // ── Path 2: Clipboard / IDE agent (copy prompt, open app) ── + if (agent && !agent.cmd) { + const analysisPrompt = buildAgentAnalysisPrompt(ctx.root, ctx.model, fw, customPrompt, reportLabel); + + const result = launchAgent(agent, analysisPrompt, ctx.root); + if (result.clipboardCopied) { + console.log(C.success(` ✓ Prompt copied to clipboard (${analysisPrompt.length.toLocaleString()} chars)`)); + } + if (result.launched && agent.app) { + console.log(` ${C.success('✓')} ${agent.name} launched with project: ${ctx.projectName}`); + console.log(`\n Paste (Cmd+V) the prompt in ${agent.name}.`); + } else if (result.error) { + console.log(C.error(` ✗ ${result.error}`)); + } + console.log(''); + return; + } + + // ── Path 3: Direct API call ── const llmConfig = resolveLLMConfig(ctx.root); if (!llmConfig) { - console.log(C.warn(' No AI provider configured. Run /model first, or use --claude-code / --codex.')); + console.log(C.warn(' No AI provider configured. Run /model first.')); console.log(C.dim(' Or set ANTHROPIC_API_KEY / OPENAI_API_KEY in environment.')); + console.log(C.dim(' Or use: /threat-report --claude-code')); return; } - console.log(` ${C.dim('Generating report with')} ${llmConfig.model}${C.dim('...')}`); + console.log(` ${C.dim('Generating')} ${reportLabel} ${C.dim('with')} ${llmConfig.model}${C.dim('...')}`); + console.log(C.dim(` Annotations: ${ctx.model.annotations_parsed} | Exposures: ${ctx.model.exposures.length}`)); console.log(''); try { @@ -1135,8 +1481,12 @@ export async function cmdAnnotate(args: string, ctx: TuiContext): Promise // ─── Freeform AI Chat ──────────────────────────────────────────────── export async function cmdChat(text: string, ctx: TuiContext): Promise { + const tuiCfg = loadTuiConfig(ctx.root); const llmConfig = resolveLLMConfig(ctx.root); - if (!llmConfig) { + + const useAgent = tuiCfg?.aiMode === 'cli-agent' && !!tuiCfg?.cliAgent; + + if (!useAgent && !llmConfig) { console.log(C.warn(' No AI provider configured. Run /model first, or set an API key in environment.')); return; } @@ -1162,23 +1512,51 @@ Keep responses under 500 words unless the user asks for detail.`; userMessage = `Threat model context:\n${JSON.stringify(compact, null, 2)}\n\nUser question: ${text}`; } - console.log(''); - console.log(C.dim(` Thinking via ${llmConfig.model}...`)); - console.log(''); + if (useAgent) { + const agent = AGENTS.find(a => a.id === tuiCfg.cliAgent); + if (!agent) { + console.log(C.error(` ✗ Configured agent ${tuiCfg.cliAgent} not found.`)); + return; + } - try { - const { chatCompletion } = await import('../analyze/llm.js'); - const response = await chatCompletion( - llmConfig, - systemPrompt, - userMessage, + console.log(''); + console.log(C.dim(` Thinking via ${agent.name}...`)); + console.log(''); + + const prompt = `${systemPrompt}\n\n${userMessage}`; + + const result = await launchAgentInline( + agent, + prompt, + ctx.root, (chunk) => process.stdout.write(chunk), + { autoYes: true } ); - process.stdout.write('\n\n'); - } catch (err: any) { - console.log(C.error(` ✗ AI request failed: ${err.message}`)); + if (result.error) { + console.log(C.error(`\n ✗ AI request failed: ${result.error}`)); + } else { + console.log('\n'); + } + } else { + console.log(''); + console.log(C.dim(` Thinking via ${llmConfig!.model}...`)); console.log(''); + + try { + const { chatCompletion } = await import('../analyze/llm.js'); + await chatCompletion( + llmConfig!, + systemPrompt, + userMessage, + (chunk) => process.stdout.write(chunk), + ); + + process.stdout.write('\n\n'); + } catch (err: any) { + console.log(C.error(` ✗ AI request failed: ${err.message}`)); + console.log(''); + } } } diff --git a/src/tui/format.ts b/src/tui/format.ts index c33e43a..e013768 100644 --- a/src/tui/format.ts +++ b/src/tui/format.ts @@ -38,6 +38,69 @@ export const C = { info: chalk.blue, }; +// ─── String Cleaning ───────────────────────────────────────────────── + +/** Strip ANSI escape codes from a string */ +export function stripAnsi(str: string): string { + // eslint-disable-next-line no-control-regex + return str.replace(/[\u001b\u009b][[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-ORZcf-nqry=><]/g, ''); +} + +/** + * Clean CLI framing artifacts from agent output before saving as markdown. + * Removes terminal boxes (╭─, │, ╰─), prompts (>_), and setup logs. + */ +export function cleanCliArtifacts(content: string): string { + let cleaned = stripAnsi(content); + + // Split into lines to filter out framing + const lines = cleaned.split('\n'); + const filtered: string[] = []; + + let inCodeBlock = false; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + + // Track code blocks so we don't accidentally strip valid markdown tables inside them + if (line.trim().startsWith('```')) { + inCodeBlock = !inCodeBlock; + } + + if (!inCodeBlock) { + // Remove CLI box drawing characters and terminal prompts + if (line.match(/^[╭│╰├>_]/) || line.includes('Update available!')) continue; + + // Remove Codex/Claude specific framing messages + if (line.includes('model: ') && line.includes('/model to change')) continue; + if (line.includes('directory: ~')) continue; + if (line.includes('Tip: New Try the Codex App')) continue; + if (line.includes('You are analyzing a codebase with GuardLink')) continue; + if (line.includes('You have access to the full source code')) continue; + + // Skip the echoed instructions/system prompt section if it leaked back out + if (line.match(/^• I’ll inspect/) || line.match(/^• Explored/)) continue; + if (line.match(/^─ Worked for/)) continue; + if (line.match(/^└ Read/) || line.match(/^└ Search/) || line.match(/^└ List/)) continue; + + // Stop completely if we hit the "Report saved" confirmation from the CLI + if (line.includes('✓ Report saved to')) break; + } + + filtered.push(line); + } + + // Find the actual start of the Markdown content (usually an H1 or H2) + const fullText = filtered.join('\n'); + const match = fullText.match(/(?:^|\n)(#+ [^\n]+)/); + if (match && match.index !== undefined) { + // Return from the first Markdown heading onwards, trimmed + return fullText.slice(match.index).trim(); + } + + return fullText.trim(); +} + // ─── Severity badge ────────────────────────────────────────────────── export function severityBadge(sev?: string): string { diff --git a/src/tui/index.ts b/src/tui/index.ts index 6db4f5f..b3ae35e 100644 --- a/src/tui/index.ts +++ b/src/tui/index.ts @@ -30,6 +30,9 @@ import { refreshModel, cmdHelp, cmdStatus, + cmdExposures, + cmdShow, + cmdScan, cmdAssets, cmdFiles, cmdView, @@ -53,6 +56,7 @@ import { const COMMANDS = [ '/help', '/gal', '/init', '/parse', '/run', '/status', '/validate', '/diff', '/sarif', + '/exposures', '/show', '/scan', '/assets', '/files', '/view', '/threat-report', '/threat-reports', '/annotate', '/model', '/report', '/dashboard', @@ -75,6 +79,9 @@ const PALETTE_COMMANDS: CommandEntry[] = [ { command: '/parse', label: 'Parse annotations', aliases: ['/run'] }, { command: '/status', label: 'Risk grade + stats' }, { command: '/validate', label: 'Syntax + ref checks' }, + { command: '/exposures', label: 'List open exposures by severity' }, + { command: '/show', label: 'Detail view for an exposure' }, + { command: '/scan', label: 'Annotation coverage scanner' }, { command: '/assets', label: 'Asset tree' }, { command: '/files', label: 'Annotated file tree' }, { command: '/view', label: 'File annotations + code' }, @@ -275,11 +282,14 @@ function getVersion(): string { // ─── Compact command list (shown on bare "/") ─────────────────────── function printCommandList(): void { - const cmds: [string, string][] = [ + const cmds: [string, string][] = [ ['/init', 'Initialize project'], ['/parse', 'Parse annotations'], ['/status', 'Risk grade + stats'], ['/validate', 'Syntax + ref checks'], + ['/exposures', 'List open exposures'], + ['/show ', 'Detail + code context'], + ['/scan', 'Coverage scanner'], ['/assets', 'Asset tree'], ['/files', 'Annotated file tree'], ['/view ','File annotations + code'], @@ -291,7 +301,7 @@ function printCommandList(): void { ['/dashboard', 'HTML dashboard'], ['/diff [ref]', 'Compare vs git ref'], ['/sarif', 'Export SARIF'], - ['/gal', 'GAL annotation language guide'], + ['/gal', 'GAL annotation guide'], ['/help', 'Full help'], ['/quit', 'Exit GuardLink CLI'], ]; @@ -330,6 +340,9 @@ async function dispatch(input: string, ctx: TuiContext): Promise { case '/help': cmdHelp(); break; case '/gal': cmdGal(); break; case '/status': cmdStatus(ctx); break; + case '/exposures': cmdExposures(args, ctx); break; + case '/show': cmdShow(args, ctx); break; + case '/scan': cmdScan(ctx); break; case '/assets': cmdAssets(ctx); break; case '/files': cmdFiles(ctx); break; case '/view': cmdView(args, ctx); break; @@ -390,6 +403,7 @@ export async function startTui(dir?: string): Promise { model: null, projectName, rl, + lastExposures: [], }; // Try loading existing model