From fb83ca8571f7855e6470fd5085b229323f9b5534 Mon Sep 17 00:00:00 2001 From: "Josh Grossman (Bounce Security)" <97975715+joshbouncesecurity@users.noreply.github.com> Date: Tue, 24 Mar 2026 08:15:38 +0200 Subject: [PATCH 1/6] feat: add `generate-context` CLI command with auto-discovery (#26) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a standalone `openant generate-context` command so users can generate application_context.json as a discrete pipeline step when running individual commands (parse → generate-context → analyze → verify). Also wire up auto-discovery of application_context.json in both the Go CLI (project scan dir) and Python CLI (output dir, repo path, input file dir) so `analyze` and `verify` pick it up automatically without requiring `--app-context` every time. Co-authored-by: Claude Opus 4.6 (1M context) --- apps/openant-cli/cmd/analyze.go | 3 + apps/openant-cli/cmd/generatecontext.go | 112 +++++++++++++++++++++++ apps/openant-cli/cmd/root.go | 22 ++--- apps/openant-cli/cmd/verify.go | 3 + libs/openant-core/openant/cli.py | 114 +++++++++++++++++++++++- libs/openant-core/tests/test_go_cli.py | 22 +++++ 6 files changed, 262 insertions(+), 14 deletions(-) create mode 100644 apps/openant-cli/cmd/generatecontext.go diff --git a/apps/openant-cli/cmd/analyze.go b/apps/openant-cli/cmd/analyze.go index 986213b..e9daf80 100644 --- a/apps/openant-cli/cmd/analyze.go +++ b/apps/openant-cli/cmd/analyze.go @@ -66,6 +66,9 @@ func runAnalyze(cmd *cobra.Command, args []string) { if analyzeAnalyzerOutput == "" { analyzeAnalyzerOutput = ctx.scanFile("analyzer_output.json") } + if analyzeAppContext == "" { + analyzeAppContext = ctx.scanFile("application_context.json") + } if analyzeRepoPath == "" { analyzeRepoPath = ctx.RepoPath } diff --git a/apps/openant-cli/cmd/generatecontext.go b/apps/openant-cli/cmd/generatecontext.go new file mode 100644 index 0000000..5f61aa2 --- /dev/null +++ b/apps/openant-cli/cmd/generatecontext.go @@ -0,0 +1,112 @@ +package cmd + +import ( + "fmt" + "os" + + "github.com/knostic/open-ant-cli/internal/output" + "github.com/knostic/open-ant-cli/internal/python" + "github.com/spf13/cobra" +) + +var generateContextCmd = &cobra.Command{ + Use: "generate-context [repository-path]", + Short: "Generate application security context for a repository", + Long: `Generate analyzes a repository and produces an application_context.json +file that describes the application type, trust boundaries, intended +behaviors, and patterns that should not be flagged as vulnerabilities. + +This context is automatically used by the analyze and verify commands +to reduce false positives. + +If no repository path is given, the active project is used (see: openant init). + +The command checks for a manual override file (OPENANT.md or OPENANT.json) +in the repository root before falling back to LLM-based generation. +Use --force to skip the manual override check.`, + Args: cobra.MaximumNArgs(1), + Run: runGenerateContext, +} + +var ( + gcOutput string + gcForce bool + gcShowPrompt bool +) + +func init() { + generateContextCmd.Flags().StringVarP(&gcOutput, "output", "o", "", "Output path (default: /application_context.json or /application_context.json)") + generateContextCmd.Flags().BoolVar(&gcForce, "force", false, "Force regeneration, ignoring OPENANT.md override files") + generateContextCmd.Flags().BoolVar(&gcShowPrompt, "show-prompt", false, "Include formatted prompt text in output") +} + +func runGenerateContext(cmd *cobra.Command, args []string) { + repoPath, ctx, err := resolveRepoArg(args) + if err != nil { + output.PrintError(err.Error()) + os.Exit(2) + } + + // Apply project defaults + if ctx != nil { + if gcOutput == "" { + gcOutput = ctx.scanFile("application_context.json") + } + } + + rt, err := ensurePython() + if err != nil { + output.PrintError(err.Error()) + os.Exit(2) + } + + // Build Python CLI args + pyArgs := []string{"generate-context", repoPath} + if gcOutput != "" { + pyArgs = append(pyArgs, "--output", gcOutput) + } + if gcForce { + pyArgs = append(pyArgs, "--force") + } + if gcShowPrompt { + pyArgs = append(pyArgs, "--show-prompt") + } + + result, err := python.Invoke(rt.Path, pyArgs, "", quiet, requireAPIKey()) + if err != nil { + output.PrintError(err.Error()) + os.Exit(2) + } + + if jsonOutput { + output.PrintJSON(result.Envelope) + } else if result.Envelope.Status == "success" { + if data, ok := result.Envelope.Data.(map[string]any); ok { + printGenerateContextSummary(data) + } + } else { + output.PrintErrors(result.Envelope.Errors) + } + + os.Exit(result.ExitCode) +} + +func printGenerateContextSummary(data map[string]any) { + output.PrintHeader("Application Context Generated") + if v, ok := data["application_type"].(string); ok { + output.PrintKeyValue("Type", v) + } + if v, ok := data["purpose"].(string); ok { + output.PrintKeyValue("Purpose", v) + } + if v, ok := data["confidence"].(float64); ok { + output.PrintKeyValue("Confidence", fmt.Sprintf("%.0f%%", v*100)) + } + if v, ok := data["source"].(string); ok { + output.PrintKeyValue("Source", v) + } + if v, ok := data["app_context_path"].(string); ok { + output.PrintKeyValue("Output", v) + } + fmt.Println() +} diff --git a/apps/openant-cli/cmd/root.go b/apps/openant-cli/cmd/root.go index 334dc9a..e584308 100644 --- a/apps/openant-cli/cmd/root.go +++ b/apps/openant-cli/cmd/root.go @@ -31,16 +31,17 @@ Stage 1: Detect potential vulnerabilities via code analysis Stage 2: Simulate an attacker to eliminate false positives Commands: - scan Full pipeline: parse → enhance → detect → verify → report - diff Scan only code changed vs a base ref or GitHub PR - parse Extract code units from a repository - enhance Add security context to a parsed dataset - analyze Run Stage 1 vulnerability detection - verify Run Stage 2 attacker simulation - build-output Assemble pipeline_output.json from verified results - dynamic-test Docker-isolated exploit testing - report Generate reports from analysis results - config Manage CLI configuration (API key, etc.)`, + scan Full pipeline: parse → enhance → detect → verify → report + diff Scan only code changed vs a base ref or GitHub PR + parse Extract code units from a repository + generate-context Generate application security context + enhance Add security context to a parsed dataset + analyze Run Stage 1 vulnerability detection + verify Run Stage 2 attacker simulation + build-output Assemble pipeline_output.json from verified results + dynamic-test Docker-isolated exploit testing + report Generate reports from analysis results + config Manage CLI configuration (API key, etc.)`, } // Execute adds all child commands to the root command and sets flags appropriately. @@ -82,6 +83,7 @@ func init() { rootCmd.AddCommand(scanCmd) rootCmd.AddCommand(diffCmd) rootCmd.AddCommand(parseCmd) + rootCmd.AddCommand(generateContextCmd) rootCmd.AddCommand(enhanceCmd) rootCmd.AddCommand(analyzeCmd) rootCmd.AddCommand(verifyCmd) diff --git a/apps/openant-cli/cmd/verify.go b/apps/openant-cli/cmd/verify.go index cad9b8a..b486db5 100644 --- a/apps/openant-cli/cmd/verify.go +++ b/apps/openant-cli/cmd/verify.go @@ -61,6 +61,9 @@ func runVerify(cmd *cobra.Command, args []string) { if verifyAnalyzerOutput == "" { verifyAnalyzerOutput = ctx.scanFile("analyzer_output.json") } + if verifyAppContext == "" { + verifyAppContext = ctx.scanFile("application_context.json") + } if verifyRepoPath == "" { verifyRepoPath = ctx.RepoPath } diff --git a/libs/openant-core/openant/cli.py b/libs/openant-core/openant/cli.py index b0ce345..39034e1 100644 --- a/libs/openant-core/openant/cli.py +++ b/libs/openant-core/openant/cli.py @@ -5,6 +5,7 @@ Commands: openant scan /path/to/repo --output /tmp/results openant parse /path/to/repo --output /tmp/results + openant generate-context /path/to/repo -o /tmp/results/application_context.json openant enhance dataset.json --analyzer-output ao.json --repo-path /repo -o enhanced.json openant analyze dataset.json --output /tmp/results openant verify results.json --analyzer-output ao.json --output /tmp/results @@ -29,6 +30,20 @@ def _output_json(data: dict): sys.stdout.write("\n") +def _find_app_context(*candidate_dirs: str) -> str | None: + """Search candidate directories for application_context.json. + + Returns the first existing path, or None. + """ + for d in candidate_dirs: + if not d: + continue + path = os.path.join(d, "application_context.json") + if os.path.isfile(path): + return path + return None + + def _load_step_reports(directory: str) -> list[dict]: """Load all {step}.report.json files from a directory. @@ -152,6 +167,57 @@ def cmd_parse(args): return 2 +def cmd_generate_context(args): + """Generate application security context for a repository.""" + from pathlib import Path + from context.application_context import ( + generate_application_context, + save_context, + format_context_for_prompt, + ) + from core.schemas import success, error + from core.step_report import step_context + + output_path = args.output or os.path.join(args.repo, "application_context.json") + output_dir = os.path.dirname(os.path.abspath(output_path)) + + try: + with step_context("generate-context", output_dir, inputs={ + "repo_path": os.path.abspath(args.repo), + "force": args.force, + }) as ctx: + app_context = generate_application_context( + Path(args.repo), + force_regenerate=args.force, + ) + save_context(app_context, Path(output_path)) + + ctx.summary = { + "application_type": app_context.application_type, + "confidence": app_context.confidence, + "source": app_context.source, + } + ctx.outputs = {"app_context_path": os.path.abspath(output_path)} + + result = { + "app_context_path": os.path.abspath(output_path), + "application_type": app_context.application_type, + "purpose": app_context.purpose, + "confidence": app_context.confidence, + "source": app_context.source, + } + + if args.show_prompt: + result["prompt_format"] = format_context_for_prompt(app_context) + + _output_json(success(result)) + return 0 + + except Exception as e: + _output_json(error(str(e))) + return 2 + + def cmd_enhance(args): """Enhance a dataset with security context.""" from core.enhancer import enhance_dataset @@ -225,6 +291,18 @@ def cmd_analyze(args): exploitable_filter = "all" if args.exploitable_all else ("strict" if args.exploitable_only else None) + # Auto-discover application context if not explicitly provided + app_context_path = args.app_context + if not app_context_path: + app_context_path = _find_app_context( + output_dir, + args.repo_path, + os.path.dirname(os.path.abspath(args.dataset)), + ) + if app_context_path: + print(f"[Analyze] Auto-discovered application context: {app_context_path}", + file=sys.stderr) + try: with step_context("analyze", output_dir, inputs={ "dataset_path": os.path.abspath(args.dataset), @@ -236,7 +314,7 @@ def cmd_analyze(args): dataset_path=args.dataset, output_dir=output_dir, analyzer_output_path=args.analyzer_output, - app_context_path=args.app_context, + app_context_path=app_context_path, repo_path=args.repo_path, limit=args.limit, model=args.model, @@ -277,7 +355,7 @@ def cmd_analyze(args): results_path=result.results_path, output_dir=output_dir, analyzer_output_path=args.analyzer_output, - app_context_path=args.app_context, + app_context_path=app_context_path, repo_path=args.repo_path, workers=args.workers, backoff_seconds=args.backoff, @@ -322,18 +400,30 @@ def cmd_verify(args): output_dir = args.output or tempfile.mkdtemp(prefix="open_ant_verify_") + # Auto-discover application context if not explicitly provided + app_context_path = args.app_context + if not app_context_path: + app_context_path = _find_app_context( + output_dir, + args.repo_path, + os.path.dirname(os.path.abspath(args.results)), + ) + if app_context_path: + print(f"[Verify] Auto-discovered application context: {app_context_path}", + file=sys.stderr) + try: with step_context("verify", output_dir, inputs={ "results_path": os.path.abspath(args.results), "analyzer_output_path": os.path.abspath(args.analyzer_output), - "app_context_path": os.path.abspath(args.app_context) if args.app_context else None, + "app_context_path": os.path.abspath(app_context_path) if app_context_path else None, "repo_path": os.path.abspath(args.repo_path) if args.repo_path else None, }) as ctx: result = run_verification( results_path=args.results, output_dir=output_dir, analyzer_output_path=args.analyzer_output, - app_context_path=args.app_context, + app_context_path=app_context_path, repo_path=args.repo_path, workers=args.workers, checkpoint_path=getattr(args, "checkpoint", None), @@ -1019,6 +1109,22 @@ def main(): parse_p.add_argument("--diff-manifest", help="Path to diff_manifest.json; tags units with diff_selected") parse_p.set_defaults(func=cmd_parse) + # --------------------------------------------------------------- + # generate-context — generate application security context + # --------------------------------------------------------------- + gc_p = subparsers.add_parser( + "generate-context", + help="Generate application security context for a repository", + ) + gc_p.add_argument("repo", help="Path to repository") + gc_p.add_argument("--output", "-o", + help="Output path (default: /application_context.json)") + gc_p.add_argument("--force", action="store_true", + help="Force regeneration, ignoring OPENANT.md override files") + gc_p.add_argument("--show-prompt", action="store_true", + help="Include formatted prompt text in output") + gc_p.set_defaults(func=cmd_generate_context) + # --------------------------------------------------------------- # enhance — add security context to a dataset # --------------------------------------------------------------- diff --git a/libs/openant-core/tests/test_go_cli.py b/libs/openant-core/tests/test_go_cli.py index fc92113..519e6ae 100644 --- a/libs/openant-core/tests/test_go_cli.py +++ b/libs/openant-core/tests/test_go_cli.py @@ -163,6 +163,28 @@ def test_parse_json_output_is_valid(self, sample_python_repo, tmp_path): assert "status" in envelope +class TestGenerateContextHelp: + """Tests for `openant generate-context --help`.""" + + def test_help(self): + result = run_cli("generate-context", "--help") + assert result.returncode == 0 + output = result.stdout + result.stderr + assert "repository" in output.lower() + assert "context" in output.lower() + + +class TestGenerateContext: + """Tests for `openant generate-context` (no API key).""" + + def test_requires_api_key(self, sample_python_repo): + """generate-context should fail without an API key.""" + result = run_cli("generate-context", sample_python_repo) + output = result.stderr + result.stdout + assert result.returncode != 0 + assert "api key" in output.lower() + + class TestApiKeyHandling: def test_scan_requires_api_key(self, sample_python_repo): """Scan should fail without an API key.""" From 649aa22b2de6f8591f8531790de12c06dc1ffaf7 Mon Sep 17 00:00:00 2001 From: "Josh Grossman (Bounce Security)" <97975715+joshbouncesecurity@users.noreply.github.com> Date: Tue, 24 Mar 2026 09:06:38 +0200 Subject: [PATCH 2/6] docs: update PIPELINE_MANUAL.md for generate-context CLI command (#28) * docs: update PIPELINE_MANUAL.md for generate-context CLI command Update Step 4 to document the new `openant generate-context` command as the primary way to generate application context. Add note about auto-discovery in analyze/verify. Update examples and quick reference. Co-Authored-By: Claude Opus 4.6 (1M context) * docs: update remaining docs for generate-context CLI command Update CURRENT_IMPLEMENTATION.md, README.md, and DOCUMENTATION.md to reference `openant generate-context` as the primary command and note the auto-discovery behavior in analyze/verify. Co-Authored-By: Claude Opus 4.6 (1M context) --------- Co-authored-by: Claude Opus 4.6 (1M context) --- libs/openant-core/CURRENT_IMPLEMENTATION.md | 12 ++++++++---- libs/openant-core/DOCUMENTATION.md | 3 ++- libs/openant-core/PIPELINE_MANUAL.md | 21 ++++++++++++++++----- libs/openant-core/README.md | 16 +++++++++------- 4 files changed, 35 insertions(+), 17 deletions(-) diff --git a/libs/openant-core/CURRENT_IMPLEMENTATION.md b/libs/openant-core/CURRENT_IMPLEMENTATION.md index f2524c3..07f246a 100644 --- a/libs/openant-core/CURRENT_IMPLEMENTATION.md +++ b/libs/openant-core/CURRENT_IMPLEMENTATION.md @@ -227,13 +227,17 @@ Unsupported types (desktop apps, mobile apps, games, embedded systems) are rejec **Usage:** ```bash -# List supported types -python -m context.generate_context --list-types +# Generate context via CLI (recommended) +openant generate-context /path/to/repo -# Generate context for a repository +# Generate context via Python module python -m context.generate_context /path/to/repo -# Context is saved to application_context.json in the dataset directory +# List supported types +python -m context.generate_context --list-types + +# Context is saved to application_context.json in the scan/dataset directory +# analyze and verify auto-discover it when using a project ``` **Generated Context Structure:** diff --git a/libs/openant-core/DOCUMENTATION.md b/libs/openant-core/DOCUMENTATION.md index 5f1f434..beb1761 100644 --- a/libs/openant-core/DOCUMENTATION.md +++ b/libs/openant-core/DOCUMENTATION.md @@ -221,7 +221,8 @@ For AI assistants working on the code, here are the key source files: | File | Purpose | |------|---------| | `context/application_context.py` | Context detection & formatting | -| `context/generate_context.py` | CLI for context generation | +| `context/generate_context.py` | Python module CLI for context generation | +| `openant/cli.py` (`generate-context`) | Primary CLI command (`openant generate-context`) | ### Report Generator diff --git a/libs/openant-core/PIPELINE_MANUAL.md b/libs/openant-core/PIPELINE_MANUAL.md index fe77b78..ef1f590 100644 --- a/libs/openant-core/PIPELINE_MANUAL.md +++ b/libs/openant-core/PIPELINE_MANUAL.md @@ -534,15 +534,26 @@ For typical web applications, entry-point filtering achieves 60-95% reduction. Classifies the repository type to reduce false positives. -**Location:** `context/generate_context.py` +**Location:** `context/application_context.py`, `openant/cli.py` -**Command:** +**Command (via CLI):** +```bash +openant generate-context # Uses active project +openant generate-context /path/to/repo # Explicit repo path +openant generate-context /path/to/repo -o ctx.json # Custom output path +openant generate-context --force # Skip OPENANT.md override +openant generate-context --show-prompt # Include prompt format in output +``` + +**Command (via Python module):** ```bash python -m context.generate_context /path/to/repo python -m context.generate_context /path/to/repo -o application_context.json python -m context.generate_context --list-types # Show supported types ``` +When using a project (`openant init`), the output defaults to the project scan directory and is automatically discovered by `analyze` and `verify` — no need to pass `--app-context`. + **Supported Application Types:** | Type | Description | Attack Model | @@ -885,7 +896,7 @@ python parsers/python/parse_repository.py /path/to/flask-app \ python validate_dataset_schema.py datasets/flask-app/dataset.json # 3. Generate application context -python -m context.generate_context /path/to/flask-app +openant generate-context /path/to/flask-app # 4. Run Stage 1 + Stage 2 on first 20 units python experiment.py --dataset flask-app --verify --limit 20 @@ -907,7 +918,7 @@ python parsers/javascript/test_pipeline.py /path/to/node-app \ python validate_dataset_schema.py datasets/node-app/dataset.json # 3. Generate application context -python -m context.generate_context /path/to/node-app +openant generate-context /path/to/node-app # 4. Run full analysis python experiment.py --dataset node-app --verify @@ -953,7 +964,7 @@ python parsers/python/parse_repository.py /repo --output datasets/name/dataset.j python parsers/javascript/test_pipeline.py /repo --analyzer-path /analyzer.js --output datasets/name --processing-level codeql # Generate app context -python -m context.generate_context /repo +openant generate-context /repo # Run Stage 1 python experiment.py --dataset name diff --git a/libs/openant-core/README.md b/libs/openant-core/README.md index 9d466ed..fdc2d80 100644 --- a/libs/openant-core/README.md +++ b/libs/openant-core/README.md @@ -131,16 +131,18 @@ OpenAnt generates application context to understand what type of application is ### Generate Context ```bash -# Generate context for a repository -python -m context.generate_context /path/to/repo - -# View formatted prompt output -python -m context.generate_context /path/to/repo --show-prompt +# Generate context via CLI (recommended) +openant generate-context /path/to/repo +openant generate-context /path/to/repo --show-prompt # Include prompt format +openant generate-context --force # Skip OPENANT.md override -# List supported types -python -m context.generate_context --list-types +# Generate context via Python module +python -m context.generate_context /path/to/repo +python -m context.generate_context --list-types # Show supported types ``` +When using a project (`openant init`), `analyze` and `verify` auto-discover the generated context — no need to pass `--app-context`. + ### Manual Override Create `OPENANT.md` or `OPENANT.json` in your repository root to provide manual security context. This is useful when: From af82a17acecd3c932406c0d311d67e69208eabf9 Mon Sep 17 00:00:00 2001 From: joshbouncesecurity Date: Mon, 4 May 2026 21:12:43 +0300 Subject: [PATCH 3/6] test: add unit tests for application_context.json auto-discovery Cover the `_find_app_context` helper used by `analyze` and `verify` to locate application_context.json automatically when --app-context is not provided. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../tests/test_app_context_discovery.py | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 libs/openant-core/tests/test_app_context_discovery.py diff --git a/libs/openant-core/tests/test_app_context_discovery.py b/libs/openant-core/tests/test_app_context_discovery.py new file mode 100644 index 0000000..74949d6 --- /dev/null +++ b/libs/openant-core/tests/test_app_context_discovery.py @@ -0,0 +1,86 @@ +"""Tests for application_context.json auto-discovery in the Python CLI. + +These tests exercise the `_find_app_context` helper used by `analyze` and +`verify` to locate `application_context.json` automatically when +`--app-context` is not passed. +""" +import json +from pathlib import Path + +from openant.cli import _find_app_context + + +def _write_dummy_context(path: Path) -> None: + path.write_text(json.dumps({ + "application_type": "web_app", + "purpose": "test", + "confidence": "high", + "source": "test", + })) + + +class TestFindAppContext: + def test_returns_none_when_no_dirs(self): + assert _find_app_context() is None + + def test_returns_none_when_dirs_empty(self): + assert _find_app_context("", None) is None + + def test_returns_none_when_no_file_present(self, tmp_path): + d1 = tmp_path / "out" + d1.mkdir() + d2 = tmp_path / "repo" + d2.mkdir() + assert _find_app_context(str(d1), str(d2)) is None + + def test_finds_in_first_dir(self, tmp_path): + out_dir = tmp_path / "out" + out_dir.mkdir() + ctx_path = out_dir / "application_context.json" + _write_dummy_context(ctx_path) + + result = _find_app_context(str(out_dir), str(tmp_path / "repo")) + assert result == str(ctx_path) + + def test_finds_in_second_dir_when_first_missing(self, tmp_path): + out_dir = tmp_path / "out" + out_dir.mkdir() + repo_dir = tmp_path / "repo" + repo_dir.mkdir() + ctx_path = repo_dir / "application_context.json" + _write_dummy_context(ctx_path) + + result = _find_app_context(str(out_dir), str(repo_dir)) + assert result == str(ctx_path) + + def test_first_match_wins(self, tmp_path): + out_dir = tmp_path / "out" + out_dir.mkdir() + repo_dir = tmp_path / "repo" + repo_dir.mkdir() + first = out_dir / "application_context.json" + second = repo_dir / "application_context.json" + _write_dummy_context(first) + _write_dummy_context(second) + + result = _find_app_context(str(out_dir), str(repo_dir)) + assert result == str(first) + + def test_skips_falsy_dirs(self, tmp_path): + repo_dir = tmp_path / "repo" + repo_dir.mkdir() + ctx_path = repo_dir / "application_context.json" + _write_dummy_context(ctx_path) + + # First two are falsy (empty / None) — should be skipped without error + result = _find_app_context("", None, str(repo_dir)) + assert result == str(ctx_path) + + def test_ignores_directory_named_application_context_json(self, tmp_path): + """A *directory* with the magic name should not be treated as a hit.""" + out_dir = tmp_path / "out" + out_dir.mkdir() + # Create a directory (not file) with the target name + (out_dir / "application_context.json").mkdir() + + assert _find_app_context(str(out_dir)) is None From ffb157086438da17f02f53a59fc11a03865e0944 Mon Sep 17 00:00:00 2001 From: "Josh Grossman (Bounce Security)" <97975715+joshbouncesecurity@users.noreply.github.com> Date: Tue, 24 Mar 2026 08:56:36 +0200 Subject: [PATCH 4/6] feat: add override merge mode for generate-context command (#27) When a manual override file (OPENANT.md) is detected, users can now choose how to handle it: use as-is, merge into LLM context, or ignore. An interactive prompt appears by default; --override-mode flag bypasses it. Co-authored-by: Claude Opus 4.6 (1M context) --- apps/openant-cli/cmd/generatecontext.go | 115 +++++++++++++++++- libs/openant-core/CLAUDE.md | 7 ++ libs/openant-core/CURRENT_IMPLEMENTATION.md | 7 ++ libs/openant-core/PIPELINE_MANUAL.md | 9 +- libs/openant-core/context/OPENANT_TEMPLATE.md | 7 ++ .../context/application_context.py | 78 ++++++++++-- libs/openant-core/openant/cli.py | 14 ++- libs/openant-core/tests/test_go_cli.py | 16 +++ 8 files changed, 235 insertions(+), 18 deletions(-) diff --git a/apps/openant-cli/cmd/generatecontext.go b/apps/openant-cli/cmd/generatecontext.go index 5f61aa2..f129dd2 100644 --- a/apps/openant-cli/cmd/generatecontext.go +++ b/apps/openant-cli/cmd/generatecontext.go @@ -1,8 +1,11 @@ package cmd import ( + "bufio" "fmt" "os" + "path/filepath" + "strings" "github.com/knostic/open-ant-cli/internal/output" "github.com/knostic/open-ant-cli/internal/python" @@ -23,20 +26,31 @@ If no repository path is given, the active project is used (see: openant init). The command checks for a manual override file (OPENANT.md or OPENANT.json) in the repository root before falling back to LLM-based generation. -Use --force to skip the manual override check.`, + +When an override file is found, you are prompted to choose how to handle it: + use - Use the override file as-is (skip LLM generation) + merge - Feed the override file into the LLM alongside other sources + ignore - Ignore the override and generate from scratch + +Use --override-mode to skip the prompt, or --force as a shortcut for --override-mode=ignore.`, Args: cobra.MaximumNArgs(1), Run: runGenerateContext, } var ( - gcOutput string - gcForce bool - gcShowPrompt bool + gcOutput string + gcForce bool + gcOverrideMode string + gcShowPrompt bool ) +// overrideFiles lists manual override filenames checked in the target repo. +var overrideFiles = []string{"OPENANT.md", "OPENANT.json", ".openant.md", ".openant.json"} + func init() { generateContextCmd.Flags().StringVarP(&gcOutput, "output", "o", "", "Output path (default: /application_context.json or /application_context.json)") generateContextCmd.Flags().BoolVar(&gcForce, "force", false, "Force regeneration, ignoring OPENANT.md override files") + generateContextCmd.Flags().StringVar(&gcOverrideMode, "override-mode", "", "How to handle OPENANT.md: use, merge, or ignore (skips interactive prompt)") generateContextCmd.Flags().BoolVar(&gcShowPrompt, "show-prompt", false, "Include formatted prompt text in output") } @@ -54,6 +68,13 @@ func runGenerateContext(cmd *cobra.Command, args []string) { } } + // Resolve effective override mode + effectiveMode, err := resolveOverrideMode(repoPath) + if err != nil { + output.PrintError(err.Error()) + os.Exit(2) + } + rt, err := ensurePython() if err != nil { output.PrintError(err.Error()) @@ -65,8 +86,8 @@ func runGenerateContext(cmd *cobra.Command, args []string) { if gcOutput != "" { pyArgs = append(pyArgs, "--output", gcOutput) } - if gcForce { - pyArgs = append(pyArgs, "--force") + if effectiveMode != "" { + pyArgs = append(pyArgs, "--override-mode", effectiveMode) } if gcShowPrompt { pyArgs = append(pyArgs, "--show-prompt") @@ -91,6 +112,88 @@ func runGenerateContext(cmd *cobra.Command, args []string) { os.Exit(result.ExitCode) } +// resolveOverrideMode determines the effective override mode based on flags +// and interactive prompting. +func resolveOverrideMode(repoPath string) (string, error) { + // --force and --override-mode are mutually exclusive + if gcForce && gcOverrideMode != "" { + return "", fmt.Errorf("--force and --override-mode are mutually exclusive") + } + + // --force is a shortcut for --override-mode=ignore + if gcForce { + return "ignore", nil + } + + // Explicit --override-mode takes precedence + if gcOverrideMode != "" { + mode := strings.ToLower(gcOverrideMode) + if mode != "use" && mode != "merge" && mode != "ignore" { + return "", fmt.Errorf("invalid --override-mode %q: must be use, merge, or ignore", gcOverrideMode) + } + return mode, nil + } + + // No explicit flag — check for override file + overrideFile := findOverrideFile(repoPath) + if overrideFile == "" { + // No override file exists; let Python use default LLM generation + return "", nil + } + + // Override file found — prompt if interactive, else default to "use" + if !isInteractiveTerminal() { + return "use", nil + } + + return promptOverrideMode(overrideFile), nil +} + +// findOverrideFile checks for manual override files in the repo root. +// Returns the filename if found, empty string otherwise. +func findOverrideFile(repoPath string) string { + for _, name := range overrideFiles { + path := filepath.Join(repoPath, name) + if info, err := os.Stat(path); err == nil && !info.IsDir() { + return name + } + } + return "" +} + +// isInteractiveTerminal returns true if stdin is a terminal (not piped/CI). +func isInteractiveTerminal() bool { + stat, err := os.Stdin.Stat() + if err != nil { + return false + } + return (stat.Mode() & os.ModeCharDevice) != 0 +} + +// promptOverrideMode shows an interactive prompt for how to handle the override file. +func promptOverrideMode(filename string) string { + fmt.Fprintf(os.Stderr, "\nFound manual override: %s\n\n", filename) + fmt.Fprintln(os.Stderr, " [u]se — Use as-is (skip LLM generation)") + fmt.Fprintln(os.Stderr, " [m]erge — Feed into LLM alongside other sources") + fmt.Fprintln(os.Stderr, " [i]gnore — Ignore, generate from scratch") + fmt.Fprintln(os.Stderr, "") + fmt.Fprint(os.Stderr, "Choice [u/m/i] (default: u): ") + + reader := bufio.NewReader(os.Stdin) + answer, _ := reader.ReadString('\n') + answer = strings.TrimSpace(strings.ToLower(answer)) + + switch answer { + case "m", "merge": + return "merge" + case "i", "ignore": + return "ignore" + default: + // "u", "use", or empty (default) + return "use" + } +} + func printGenerateContextSummary(data map[string]any) { output.PrintHeader("Application Context Generated") if v, ok := data["application_type"].(string); ok { diff --git a/libs/openant-core/CLAUDE.md b/libs/openant-core/CLAUDE.md index 3c61665..56c5754 100644 --- a/libs/openant-core/CLAUDE.md +++ b/libs/openant-core/CLAUDE.md @@ -68,6 +68,13 @@ python -m context.generate_context /path/to/repo --list-types # Show supported **Manual override:** Create `OPENANT.md` or `OPENANT.json` in repo root. See `context/OPENANT_TEMPLATE.md` for format. +**Override modes:** When a manual override file is detected, the CLI prompts for how to handle it: +- `use` — Use override as-is, skip LLM (default) +- `merge` — Feed override into LLM alongside other sources +- `ignore` — Ignore override, generate from scratch + +Use `--override-mode ` to skip the prompt, or `--force` as shortcut for `--override-mode ignore`. + **Unsupported types:** If a repository doesn't match supported types, OpenAnt exits with error code 2 and instructions for creating a manual override. # Autopilot (Autonomous Pipeline) diff --git a/libs/openant-core/CURRENT_IMPLEMENTATION.md b/libs/openant-core/CURRENT_IMPLEMENTATION.md index 07f246a..a10f390 100644 --- a/libs/openant-core/CURRENT_IMPLEMENTATION.md +++ b/libs/openant-core/CURRENT_IMPLEMENTATION.md @@ -255,6 +255,13 @@ python -m context.generate_context --list-types **Manual Override:** Place `OPENANT.md` or `OPENANT.json` in repo root to provide explicit context. Manual overrides bypass type validation. +**Override Modes:** When a manual override file is detected, the CLI prompts for how to handle it: +- `use` — Use override as-is, skip LLM generation (default) +- `merge` — Feed override content into LLM alongside other sources (source="merged") +- `ignore` — Ignore override, generate from scratch + +Use `--override-mode ` to skip the prompt, or `--force` as shortcut for `--override-mode ignore`. + **Integration:** Context automatically loaded in `experiment.py` and injected into Stage 1 and Stage 2 prompts. **Results on LangChain:** diff --git a/libs/openant-core/PIPELINE_MANUAL.md b/libs/openant-core/PIPELINE_MANUAL.md index ef1f590..facb1d9 100644 --- a/libs/openant-core/PIPELINE_MANUAL.md +++ b/libs/openant-core/PIPELINE_MANUAL.md @@ -586,7 +586,14 @@ When using a project (`openant init`), the output defaults to the project scan d **Manual Override:** -Create `OPENANT.md` or `OPENANT.json` in repo root to override automatic detection. +Create `OPENANT.md` or `OPENANT.json` in repo root to provide explicit context. + +When a manual override file is detected, the CLI prompts for how to handle it: +- `use` — Use override as-is, skip LLM generation (default) +- `merge` — Feed override content into LLM alongside other sources +- `ignore` — Ignore override, generate from scratch + +Use `--override-mode ` to skip the prompt, or `--force` as shortcut for `--override-mode ignore`. --- diff --git a/libs/openant-core/context/OPENANT_TEMPLATE.md b/libs/openant-core/context/OPENANT_TEMPLATE.md index 751093b..d649b79 100644 --- a/libs/openant-core/context/OPENANT_TEMPLATE.md +++ b/libs/openant-core/context/OPENANT_TEMPLATE.md @@ -16,6 +16,13 @@ OpenAnt supports these four application types: **Note:** Manual overrides can use any `application_type` value (validation is skipped for manual overrides). Use this to analyze unsupported application types by mapping them to the closest supported type. +**Override modes:** When this file is detected, the `generate-context` command prompts for how to handle it: +- `use` — Use this file as-is, skip LLM generation (default) +- `merge` — Feed this file into the LLM alongside other repo sources (README, etc.) +- `ignore` — Ignore this file and generate context from scratch + +Use `--override-mode ` to skip the prompt. + ## Format Include a JSON code block with the following structure: diff --git a/libs/openant-core/context/application_context.py b/libs/openant-core/context/application_context.py index f7fa55d..70b944f 100644 --- a/libs/openant-core/context/application_context.py +++ b/libs/openant-core/context/application_context.py @@ -192,17 +192,44 @@ def get_type_info(self) -> dict: } -def gather_context_sources(repo_path: Path) -> dict[str, str]: +def find_override_file(repo_path: Path) -> Path | None: + """Return path to first existing manual override file, or None. + + Args: + repo_path: Path to repository root. + + Returns: + Path to override file if found, None otherwise. + """ + for filename in MANUAL_OVERRIDE_FILES: + filepath = repo_path / filename + if filepath.exists(): + return filepath + return None + + +def gather_context_sources(repo_path: Path, override_path: Path | None = None) -> dict[str, str]: """Gather relevant files for context generation. Args: repo_path: Path to the repository root. + override_path: Optional path to override file to include as a source. Returns: Dictionary mapping filename to content. """ sources = {} + # Include override file content if provided (merge mode) + if override_path is not None: + try: + content = override_path.read_text(errors="ignore") + if len(content) > 10000: + content = content[:10000] + "\n\n[... truncated ...]" + sources[override_path.name] = content + except Exception as e: + print(f"Warning: Could not read {override_path.name}: {e}", file=sys.stderr) + # Read priority files for filename in CONTEXT_FILES: filepath = repo_path / filename @@ -384,12 +411,21 @@ def _build_type_descriptions() -> str: return "\n".join(lines) +MERGE_CONTEXT_SUPPLEMENT = """ +## Developer-Provided Context + +The repository maintainer provided a manual security context file (listed above +in the sources). Treat their classification of intended behaviors, trust +boundaries, and not-a-vulnerability entries as authoritative hints. Validate the +application type against the other source files and reconcile any conflicts. +""" + CONTEXT_GENERATION_PROMPT = """Analyze this software repository and generate a security analysis context. ## Repository Information {sources} - +{developer_context} --- ## Task @@ -464,6 +500,7 @@ def generate_application_context( repo_path: Path, model: str = "claude-sonnet-4-20250514", force_regenerate: bool = False, + override_mode: str | None = None, ) -> ApplicationContext: """Generate application context using LLM analysis. @@ -472,7 +509,9 @@ def generate_application_context( Args: repo_path: Path to the repository root. model: Anthropic model to use for generation. - force_regenerate: If True, skip manual override check. + force_regenerate: If True, skip manual override check (legacy, use override_mode). + override_mode: How to handle override files: "use" (verbatim), "merge" (feed + into LLM), "ignore" (skip override), or None (legacy behavior). Returns: ApplicationContext with security-relevant information. @@ -482,16 +521,29 @@ def generate_application_context( """ repo_path = Path(repo_path) - # Check for manual override first - if not force_regenerate: + # Resolve effective mode from override_mode or legacy force_regenerate + if override_mode is None: + effective_mode = "ignore" if force_regenerate else "use" + else: + effective_mode = override_mode + + # "use" mode: return manual override verbatim if found + if effective_mode == "use": manual_context = check_manual_override(repo_path) if manual_context: - print(f"Using manual override from repository", file=sys.stderr) + print("Using manual override from repository", file=sys.stderr) return manual_context - # Gather sources + # "merge" mode: find override file to include as LLM source + override_path = None + if effective_mode == "merge": + override_path = find_override_file(repo_path) + if override_path: + print(f"Merging {override_path.name} into LLM context", file=sys.stderr) + + # Gather sources (includes override file in merge mode) print(f"Gathering context sources from {repo_path}...", file=sys.stderr) - sources = gather_context_sources(repo_path) + sources = gather_context_sources(repo_path, override_path=override_path) if not sources: raise ValueError(f"No context sources found in {repo_path}") @@ -501,6 +553,9 @@ def generate_application_context( for name, content in sources.items(): sources_text += f"\n### {name}\n```\n{content}\n```\n" + # Add developer context supplement when merging + developer_context = MERGE_CONTEXT_SUPPLEMENT if override_path else "" + # Call LLM print(f"Generating context with {model}...", file=sys.stderr) client = Anthropic() @@ -509,7 +564,10 @@ def generate_application_context( max_tokens=2000, messages=[{ "role": "user", - "content": CONTEXT_GENERATION_PROMPT.format(sources=sources_text) + "content": CONTEXT_GENERATION_PROMPT.format( + sources=sources_text, + developer_context=developer_context, + ) }] ) @@ -529,7 +587,7 @@ def generate_application_context( except json.JSONDecodeError as e: raise ValueError(f"Failed to parse LLM response as JSON: {e}\nResponse: {response_text}") - data['source'] = 'llm' + data['source'] = 'merged' if override_path else 'llm' # Validate and create context (will raise UnsupportedApplicationTypeError if invalid) return ApplicationContext(**data) diff --git a/libs/openant-core/openant/cli.py b/libs/openant-core/openant/cli.py index 39034e1..66822ec 100644 --- a/libs/openant-core/openant/cli.py +++ b/libs/openant-core/openant/cli.py @@ -181,14 +181,23 @@ def cmd_generate_context(args): output_path = args.output or os.path.join(args.repo, "application_context.json") output_dir = os.path.dirname(os.path.abspath(output_path)) + # Resolve effective override mode + if args.override_mode: + effective_mode = args.override_mode + elif args.force: + effective_mode = "ignore" + else: + effective_mode = None # legacy default behavior + try: with step_context("generate-context", output_dir, inputs={ "repo_path": os.path.abspath(args.repo), "force": args.force, + "override_mode": effective_mode, }) as ctx: app_context = generate_application_context( Path(args.repo), - force_regenerate=args.force, + override_mode=effective_mode, ) save_context(app_context, Path(output_path)) @@ -1121,6 +1130,9 @@ def main(): help="Output path (default: /application_context.json)") gc_p.add_argument("--force", action="store_true", help="Force regeneration, ignoring OPENANT.md override files") + gc_p.add_argument("--override-mode", choices=["use", "ignore", "merge"], + default=None, + help="How to handle OPENANT.md: use (as-is), merge (into LLM), ignore") gc_p.add_argument("--show-prompt", action="store_true", help="Include formatted prompt text in output") gc_p.set_defaults(func=cmd_generate_context) diff --git a/libs/openant-core/tests/test_go_cli.py b/libs/openant-core/tests/test_go_cli.py index 519e6ae..e57508c 100644 --- a/libs/openant-core/tests/test_go_cli.py +++ b/libs/openant-core/tests/test_go_cli.py @@ -173,6 +173,12 @@ def test_help(self): assert "repository" in output.lower() assert "context" in output.lower() + def test_help_shows_override_mode(self): + result = run_cli("generate-context", "--help") + assert result.returncode == 0 + output = result.stdout + result.stderr + assert "override-mode" in output + class TestGenerateContext: """Tests for `openant generate-context` (no API key).""" @@ -184,6 +190,16 @@ def test_requires_api_key(self, sample_python_repo): assert result.returncode != 0 assert "api key" in output.lower() + def test_force_and_override_mode_mutually_exclusive(self, sample_python_repo): + """--force and --override-mode together should error.""" + result = run_cli( + "generate-context", sample_python_repo, + "--force", "--override-mode", "merge", + ) + output = result.stderr + result.stdout + assert result.returncode != 0 + assert "mutually exclusive" in output.lower() + class TestApiKeyHandling: def test_scan_requires_api_key(self, sample_python_repo): From 2864a722c02ddc4bca5f439414d6b39fbf31de12 Mon Sep 17 00:00:00 2001 From: joshbouncesecurity Date: Mon, 4 May 2026 21:33:32 +0300 Subject: [PATCH 5/6] test: add tests for override-mode and find_override_file Adds unit tests for the override-mode functionality of generate-context: - test_override_mode.py: covers find_override_file() (none/found/priority), gather_context_sources() merge behavior, and the dispatch logic of generate_application_context() across "use", "ignore", "merge", and legacy force_regenerate paths. Also covers the Python CLI's argparse validation of --override-mode choices. - test_go_cli.py: extends TestGenerateContext with parametrized tests for each --override-mode value, an invalid-value rejection test, and a no-TTY default test ensuring the interactive prompt is suppressed when stdin is piped. Also wires run_cli() to default stdin to a closed pipe so subprocess tests don't accidentally inherit pytest's TTY. All 128 tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- libs/openant-core/tests/test_go_cli.py | 55 +++- libs/openant-core/tests/test_override_mode.py | 251 ++++++++++++++++++ 2 files changed, 304 insertions(+), 2 deletions(-) create mode 100644 libs/openant-core/tests/test_override_mode.py diff --git a/libs/openant-core/tests/test_go_cli.py b/libs/openant-core/tests/test_go_cli.py index e57508c..fc9c9f9 100644 --- a/libs/openant-core/tests/test_go_cli.py +++ b/libs/openant-core/tests/test_go_cli.py @@ -23,8 +23,12 @@ ) -def run_cli(*args, env_override=None): - """Run the openant CLI binary and return the CompletedProcess.""" +def run_cli(*args, env_override=None, stdin_input=""): + """Run the openant CLI binary and return the CompletedProcess. + + `stdin_input` defaults to "" (a piped, non-TTY stdin). Pass `stdin_input=None` + to inherit the parent's stdin instead. + """ env = os.environ.copy() # Don't let the test hit any real API env.pop("ANTHROPIC_API_KEY", None) @@ -45,6 +49,7 @@ def run_cli(*args, env_override=None): text=True, timeout=30, env=env, + input=stdin_input, ) @@ -200,6 +205,52 @@ def test_force_and_override_mode_mutually_exclusive(self, sample_python_repo): assert result.returncode != 0 assert "mutually exclusive" in output.lower() + @pytest.mark.parametrize("mode", ["use", "merge", "ignore"]) + def test_override_mode_accepts_valid_values(self, sample_python_repo, mode): + """All three valid override-mode values are accepted by the Go CLI. + + We don't have an API key in this environment so the call still fails, + but the failure should NOT be a flag-validation error — it should be + the API-key check downstream. + """ + result = run_cli( + "generate-context", sample_python_repo, + "--override-mode", mode, + ) + output = (result.stderr + result.stdout).lower() + # Should NOT be rejected as an invalid mode value. + assert "invalid --override-mode" not in output + assert "must be use, merge, or ignore" not in output + + def test_override_mode_rejects_invalid_value(self, sample_python_repo): + """Unknown --override-mode value is rejected before any LLM call.""" + result = run_cli( + "generate-context", sample_python_repo, + "--override-mode", "bogus", + ) + output = (result.stderr + result.stdout).lower() + assert result.returncode != 0 + assert "invalid" in output + assert "use, merge, or ignore" in output + + def test_no_tty_default_is_use(self, sample_python_repo, tmp_path): + """When stdin is not a TTY (subprocess) and an override file exists, + the CLI should silently default to 'use' rather than prompt — i.e. + it should NOT print the interactive prompt menu.""" + # Copy the sample repo into tmp_path so we don't pollute the fixture + repo_copy = tmp_path / "repo" + shutil.copytree(sample_python_repo, repo_copy) + (repo_copy / "OPENANT.md").write_text( + '# manual override\n' + 'application_type: web_app\n' + ) + result = run_cli("generate-context", str(repo_copy)) + output = result.stderr + result.stdout + # The interactive prompt's text should NOT appear under non-TTY stdin. + assert "[u]se" not in output + assert "[m]erge" not in output + assert "Choice [u/m/i]" not in output + class TestApiKeyHandling: def test_scan_requires_api_key(self, sample_python_repo): diff --git a/libs/openant-core/tests/test_override_mode.py b/libs/openant-core/tests/test_override_mode.py new file mode 100644 index 0000000..b186756 --- /dev/null +++ b/libs/openant-core/tests/test_override_mode.py @@ -0,0 +1,251 @@ +"""Unit tests for the override-mode functionality of generate-context. + +These tests cover the Python-side logic for `find_override_file()`, +`gather_context_sources()` merge behavior, and the override-mode dispatch +inside `generate_application_context()`. They do not invoke the LLM — +network calls are mocked or avoided by exercising the early-return paths. +""" +from pathlib import Path +from unittest.mock import patch + +import pytest + +from context.application_context import ( + MANUAL_OVERRIDE_FILES, + MERGE_CONTEXT_SUPPLEMENT, + find_override_file, + gather_context_sources, + generate_application_context, +) + + +class TestFindOverrideFile: + """Tests for the `find_override_file()` helper.""" + + def test_returns_none_when_no_override(self, tmp_path): + """No override files in repo -> returns None.""" + # Create a non-override file to ensure the directory is real + (tmp_path / "README.md").write_text("# repo") + assert find_override_file(tmp_path) is None + + def test_finds_openant_md(self, tmp_path): + """OPENANT.md is detected.""" + path = tmp_path / "OPENANT.md" + path.write_text("# override") + result = find_override_file(tmp_path) + assert result == path + + def test_finds_openant_json(self, tmp_path): + """OPENANT.json is detected when no OPENANT.md exists.""" + path = tmp_path / "OPENANT.json" + path.write_text('{"application_type": "web_app"}') + result = find_override_file(tmp_path) + assert result == path + + def test_finds_dot_openant_md(self, tmp_path): + """.openant.md is detected.""" + path = tmp_path / ".openant.md" + path.write_text("# override") + result = find_override_file(tmp_path) + assert result == path + + def test_priority_md_over_json(self, tmp_path): + """When both OPENANT.md and OPENANT.json exist, MD is preferred.""" + md = tmp_path / "OPENANT.md" + md.write_text("# md override") + js = tmp_path / "OPENANT.json" + js.write_text('{"application_type": "web_app"}') + result = find_override_file(tmp_path) + # OPENANT.md is listed first in MANUAL_OVERRIDE_FILES + assert result == md + assert MANUAL_OVERRIDE_FILES.index("OPENANT.md") < MANUAL_OVERRIDE_FILES.index("OPENANT.json") + + def test_directory_with_override_name_is_skipped(self, tmp_path): + """A directory named OPENANT.md should be detected by Path.exists() — confirm + the helper at least returns a Path object (not crashes).""" + # find_override_file uses .exists(), which is True for directories too, + # so this test simply documents current behavior: it returns the path. + d = tmp_path / "OPENANT.md" + d.mkdir() + result = find_override_file(tmp_path) + # The behavior is "first existing path wins" — this just documents it. + assert result == d + + def test_accepts_str_path(self, tmp_path): + """Helper accepts a Path; calling with str via Path() conversion works.""" + (tmp_path / "OPENANT.md").write_text("# override") + result = find_override_file(Path(str(tmp_path))) + assert result is not None + assert result.name == "OPENANT.md" + + +class TestGatherContextSourcesMerge: + """Tests for `gather_context_sources()` with override_path (merge mode).""" + + def test_no_override_path(self, tmp_path): + """Without override_path, override file is not included as a source.""" + (tmp_path / "README.md").write_text("# readme") + sources = gather_context_sources(tmp_path) + assert "README.md" in sources + # No OPENANT.md key because we didn't pass override_path + assert "OPENANT.md" not in sources + + def test_override_path_included(self, tmp_path): + """When override_path is provided, its content is included.""" + readme = tmp_path / "README.md" + readme.write_text("# readme") + override = tmp_path / "OPENANT.md" + override.write_text("# manual override\nIntended behavior") + + sources = gather_context_sources(tmp_path, override_path=override) + assert "OPENANT.md" in sources + assert "manual override" in sources["OPENANT.md"] + + def test_override_truncated_when_huge(self, tmp_path): + """Override content >10000 chars is truncated.""" + override = tmp_path / "OPENANT.md" + override.write_text("x" * 12000) + sources = gather_context_sources(tmp_path, override_path=override) + content = sources["OPENANT.md"] + assert "[... truncated ...]" in content + # 10000 + truncation marker + assert len(content) < 12000 + + +class TestGenerateApplicationContextDispatch: + """Tests for the override-mode dispatch inside generate_application_context. + + These avoid hitting the LLM by exercising the "use" path (which returns + early when an override file is found). + """ + + def _write_valid_override_md(self, repo_path: Path) -> Path: + """Write a minimal valid OPENANT.md that check_manual_override accepts.""" + # check_manual_override prefers OPENANT.json for structured input; + # use OPENANT.json with the schema generate_application_context expects. + path = repo_path / "OPENANT.json" + path.write_text( + '{"application_type": "web_app", "purpose": "test app", ' + '"confidence": "high", "intended_behaviors": [], ' + '"trust_boundaries": [], "not_a_vulnerability": []}' + ) + return path + + def test_use_mode_returns_override_without_llm(self, tmp_path): + """override_mode='use' with an override file returns it verbatim + without ever calling the LLM.""" + self._write_valid_override_md(tmp_path) + + # If the LLM is called, this will blow up because we don't patch it. + # Test passes if we get a context back without any Anthropic call. + with patch("context.application_context.Anthropic") as mock_anth: + ctx = generate_application_context(tmp_path, override_mode="use") + mock_anth.assert_not_called() + assert ctx.application_type == "web_app" + + def test_force_regenerate_ignores_override(self, tmp_path): + """force_regenerate=True (legacy) should NOT short-circuit to override.""" + self._write_valid_override_md(tmp_path) + + with patch("context.application_context.Anthropic") as mock_anth: + # The LLM would be called — we don't actually want to wait for it. + # We just confirm the early-return for "use" did NOT happen by + # asserting Anthropic was instantiated. We then bail with an + # exception inside the mock to avoid running the rest. + mock_anth.side_effect = RuntimeError("LLM-call-attempted") + with pytest.raises(RuntimeError, match="LLM-call-attempted"): + generate_application_context(tmp_path, force_regenerate=True) + mock_anth.assert_called_once() + + def test_override_mode_ignore_skips_override(self, tmp_path): + """override_mode='ignore' should NOT short-circuit to override.""" + self._write_valid_override_md(tmp_path) + + with patch("context.application_context.Anthropic") as mock_anth: + mock_anth.side_effect = RuntimeError("LLM-call-attempted") + with pytest.raises(RuntimeError, match="LLM-call-attempted"): + generate_application_context(tmp_path, override_mode="ignore") + mock_anth.assert_called_once() + + def test_override_mode_merge_includes_supplement(self, tmp_path): + """override_mode='merge' should send the override content + supplement + to the LLM.""" + override = tmp_path / "OPENANT.md" + override.write_text("# manual override\nIntended behavior") + # Need at least one source so gather_context_sources doesn't raise + (tmp_path / "README.md").write_text("# readme") + + captured_prompt = {} + + class _FakeContent: + def __init__(self, text): + self.text = text + + class _FakeResponse: + def __init__(self, text): + self.content = [_FakeContent(text)] + + def _fake_create(**kwargs): + captured_prompt["content"] = kwargs["messages"][0]["content"] + return _FakeResponse( + '```json\n' + '{"application_type": "web_app", "purpose": "x", ' + '"confidence": "high", "intended_behaviors": [], ' + '"trust_boundaries": [], "not_a_vulnerability": []}\n' + '```' + ) + + with patch("context.application_context.Anthropic") as mock_anth: + instance = mock_anth.return_value + instance.messages.create.side_effect = _fake_create + ctx = generate_application_context(tmp_path, override_mode="merge") + + assert "OPENANT.md" in captured_prompt["content"] + assert MERGE_CONTEXT_SUPPLEMENT.strip() in captured_prompt["content"] + # Source should be marked as 'merged' when an override is merged. + assert ctx.source == "merged" + + +class TestPythonCLIArgparse: + """Verify the argparse-level wiring of --override-mode and --force. + + Invokes the CLI via subprocess to verify the public surface. + """ + + @staticmethod + def _run_cli(*args): + import os + import subprocess + import sys + + env = os.environ.copy() + # Don't let the test reach a real LLM + env.pop("ANTHROPIC_API_KEY", None) + return subprocess.run( + [sys.executable, "-m", "openant.cli"] + list(args), + capture_output=True, + text=True, + timeout=15, + env=env, + ) + + def test_override_mode_choices_validation(self): + """--override-mode rejects values outside use/ignore/merge.""" + result = self._run_cli( + "generate-context", "/tmp/nonexistent-repo", + "--override-mode", "bogus", + ) + assert result.returncode != 0 + # argparse error mentions invalid choice and the offending value + assert "invalid choice" in (result.stderr + result.stdout).lower() + + def test_override_mode_help_lists_choices(self): + """`generate-context --help` advertises the override-mode flag.""" + result = self._run_cli("generate-context", "--help") + assert result.returncode == 0 + out = result.stdout + result.stderr + assert "--override-mode" in out + # All three valid values appear in help text + assert "use" in out + assert "merge" in out + assert "ignore" in out From 5bf3f664c41a0d4f8b426bd2c025c6a6039ede4a Mon Sep 17 00:00:00 2001 From: joshbouncesecurity Date: Mon, 4 May 2026 23:09:19 +0300 Subject: [PATCH 6/6] fix: harden override-mode prompt and skip directory matches Round 1 PR review fixes for the override-mode feature: - Add 30s timeout on the interactive override-mode prompt (Go CLI). The previous bufio.ReadString call would block forever if a TTY was detected but no user was actually present (detached terminals, some CI runners). Now the prompt clearly advertises the timeout and falls back to "use". - find_override_file (Python) now requires a regular file via .is_file() rather than .exists(). This matches the Go CLI's IsDir() guard and prevents merge mode from crashing with IsADirectoryError when a directory happens to share an override filename. Updated the test that previously documented the unsafe behavior, and added a regression test. - Extract the 10000-char merge-mode truncation cap into a named constant MAX_OVERRIDE_MERGE_CHARS so the magic number isn't duplicated. --- apps/openant-cli/cmd/generatecontext.go | 38 ++++++++++++++++--- .../context/application_context.py | 17 +++++++-- libs/openant-core/tests/test_override_mode.py | 19 +++++++--- 3 files changed, 60 insertions(+), 14 deletions(-) diff --git a/apps/openant-cli/cmd/generatecontext.go b/apps/openant-cli/cmd/generatecontext.go index f129dd2..633610b 100644 --- a/apps/openant-cli/cmd/generatecontext.go +++ b/apps/openant-cli/cmd/generatecontext.go @@ -6,12 +6,19 @@ import ( "os" "path/filepath" "strings" + "time" "github.com/knostic/open-ant-cli/internal/output" "github.com/knostic/open-ant-cli/internal/python" "github.com/spf13/cobra" ) +// promptTimeout is how long the interactive override-mode prompt waits for +// user input before falling back to the default ("use"). This protects +// against indefinite hangs if a TTY is detected but no user is actually +// available to respond (e.g. some CI runners, detached terminals). +const promptTimeout = 30 * time.Second + var generateContextCmd = &cobra.Command{ Use: "generate-context [repository-path]", Short: "Generate application security context for a repository", @@ -171,17 +178,38 @@ func isInteractiveTerminal() bool { } // promptOverrideMode shows an interactive prompt for how to handle the override file. +// The prompt times out after promptTimeout and falls back to the default ("use") +// if no input is received, so the CLI can never hang indefinitely waiting on a +// detached or unattended terminal. func promptOverrideMode(filename string) string { fmt.Fprintf(os.Stderr, "\nFound manual override: %s\n\n", filename) fmt.Fprintln(os.Stderr, " [u]se — Use as-is (skip LLM generation)") fmt.Fprintln(os.Stderr, " [m]erge — Feed into LLM alongside other sources") fmt.Fprintln(os.Stderr, " [i]gnore — Ignore, generate from scratch") fmt.Fprintln(os.Stderr, "") - fmt.Fprint(os.Stderr, "Choice [u/m/i] (default: u): ") - - reader := bufio.NewReader(os.Stdin) - answer, _ := reader.ReadString('\n') - answer = strings.TrimSpace(strings.ToLower(answer)) + fmt.Fprintf(os.Stderr, "Choice [u/m/i] (default: u, %ds timeout): ", + int(promptTimeout.Seconds())) + + // Read on a goroutine so we can race against a timeout. + type readResult struct { + line string + err error + } + ch := make(chan readResult, 1) + go func() { + reader := bufio.NewReader(os.Stdin) + line, err := reader.ReadString('\n') + ch <- readResult{line: line, err: err} + }() + + var answer string + select { + case res := <-ch: + answer = strings.TrimSpace(strings.ToLower(res.line)) + case <-time.After(promptTimeout): + fmt.Fprintln(os.Stderr, "\nNo response — defaulting to 'use'.") + return "use" + } switch answer { case "m", "merge": diff --git a/libs/openant-core/context/application_context.py b/libs/openant-core/context/application_context.py index 70b944f..c5f1c23 100644 --- a/libs/openant-core/context/application_context.py +++ b/libs/openant-core/context/application_context.py @@ -153,6 +153,12 @@ def get_type_info(self) -> dict: ".openant.json", ] +# Maximum size (chars) of an override file when included in merge-mode LLM +# input. Larger files are truncated with a marker so they don't blow the +# prompt budget. 10 KB is comfortably above a hand-written notes file but +# well below the 200K-token context window. +MAX_OVERRIDE_MERGE_CHARS = 10000 + # Priority files to read for context generation CONTEXT_FILES = [ "README.md", @@ -195,6 +201,9 @@ def get_type_info(self) -> dict: def find_override_file(repo_path: Path) -> Path | None: """Return path to first existing manual override file, or None. + Only regular files are considered — directories that happen to share + an override filename are skipped (matches the Go CLI's behavior). + Args: repo_path: Path to repository root. @@ -203,7 +212,7 @@ def find_override_file(repo_path: Path) -> Path | None: """ for filename in MANUAL_OVERRIDE_FILES: filepath = repo_path / filename - if filepath.exists(): + if filepath.is_file(): return filepath return None @@ -224,8 +233,10 @@ def gather_context_sources(repo_path: Path, override_path: Path | None = None) - if override_path is not None: try: content = override_path.read_text(errors="ignore") - if len(content) > 10000: - content = content[:10000] + "\n\n[... truncated ...]" + if len(content) > MAX_OVERRIDE_MERGE_CHARS: + content = ( + content[:MAX_OVERRIDE_MERGE_CHARS] + "\n\n[... truncated ...]" + ) sources[override_path.name] = content except Exception as e: print(f"Warning: Could not read {override_path.name}: {e}", file=sys.stderr) diff --git a/libs/openant-core/tests/test_override_mode.py b/libs/openant-core/tests/test_override_mode.py index b186756..9abeafc 100644 --- a/libs/openant-core/tests/test_override_mode.py +++ b/libs/openant-core/tests/test_override_mode.py @@ -61,15 +61,22 @@ def test_priority_md_over_json(self, tmp_path): assert MANUAL_OVERRIDE_FILES.index("OPENANT.md") < MANUAL_OVERRIDE_FILES.index("OPENANT.json") def test_directory_with_override_name_is_skipped(self, tmp_path): - """A directory named OPENANT.md should be detected by Path.exists() — confirm - the helper at least returns a Path object (not crashes).""" - # find_override_file uses .exists(), which is True for directories too, - # so this test simply documents current behavior: it returns the path. + """A directory named OPENANT.md must NOT be returned — only regular + files are valid overrides, matching the Go CLI's behavior. Otherwise + merge mode would crash trying to read_text() on a directory.""" d = tmp_path / "OPENANT.md" d.mkdir() + # A real override file lower in priority should be picked up instead. + json_override = tmp_path / "OPENANT.json" + json_override.write_text('{"application_type": "web_app"}') + result = find_override_file(tmp_path) - # The behavior is "first existing path wins" — this just documents it. - assert result == d + assert result == json_override + + def test_directory_only_returns_none(self, tmp_path): + """If the only matching path is a directory, return None — not crash.""" + (tmp_path / "OPENANT.md").mkdir() + assert find_override_file(tmp_path) is None def test_accepts_str_path(self, tmp_path): """Helper accepts a Path; calling with str via Path() conversion works."""