vercel-labs · leap21ai · May 14, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -0,0 +1,41 @@
+# AGENTS.md
+
+Pointer file for Codex working in this repo. User-facing docs are
+in [README.md](./README.md); contributor docs in
+[CONTRIBUTING.md](./CONTRIBUTING.md). Read those first.
+
+## Repo shape
+
+```
+packages/
+  core/        Types, schemas, plugin contracts, config loader (defineConfig)
+  scanner/     Regex matchers + scanning engine
+  processor/   AI agent integration (Codex Agent SDK, Codex SDK), enrich, triage, revalidate
+  deepsec/      Publishable package: bundled CLI + the `deepsec/config` sub-export + the @vercel/sandbox executor
+e2e/           End-to-end tests
+```
+
+## Commands
+
+```bash
+pnpm install
+pnpm test          # all packages, including e2e
+pnpm test:unit     # excludes e2e
+pnpm -r build      # tsc across all workspaces (typecheck)
+pnpm bundle        # esbuild bundle for distribution
+pnpm deepsec ...    # the CLI (runs via tsx)
+```
+
+## Patterns to keep in mind
+
+- Plugin contracts live in `packages/core/src/plugin.ts`. Internals route
+  through `getRegistry()` from `deepsec/config` rather than calling
+  organization-specific code directly.
+- The CLI auto-loads `deepsec.config.{ts,mjs,js,cjs}` from cwd upward
+  (via `packages/deepsec/src/load-config.ts`, jiti).
+- New matchers go in `packages/scanner/src/matchers/` and register in
+  `matchers/index.ts`. Org-specific matchers belong in a separate
+  plugin package, not in this tree.
+- The AI prompt template lives in `packages/processor/src/index.ts`. It
+  is intentionally generic. Don't add organization-specific context
+  there; use `data/<projectId>/INFO.md` or `config.json:promptAppend`.
diff --git a/docs/configuration.md b/docs/configuration.md
@@ -89,8 +89,13 @@ Some legacy fields still live in `data/<id>/config.json`:
 }
 ```
 
-This is read by `scan` and by the AI agents. It overrides the same fields
-on the project declaration if both are present.
+This is read by `scan` and by the AI agents. The project declaration in
+`deepsec.config.ts` takes precedence over these legacy files when the same
+field is present in both. The fallback order is:
+
+1. `deepsec.config.ts` project declaration (`infoMarkdown`, `promptAppend`, `priorityPaths`)
+2. `data/<id>/config.json` / `data/<id>/INFO.md`
+3. Defaults / omitted
 
 ## Environment variables
 
@@ -115,7 +120,7 @@ backend you're using.
 | `OPENAI_API_KEY` | `--agent codex` | Codex SDK token. Unset is fine if `AI_GATEWAY_API_KEY` is set, or if Codex routes through AI Gateway with the Anthropic token. |
 | `OPENAI_BASE_URL` | `--agent codex` | Default (when `AI_GATEWAY_API_KEY` is set): `https://ai-gateway.vercel.sh/v1`. |
 | `DEEPSEC_AGENT_DEBUG` | both backends | Set to `1` to enable verbose agent logging. |
-| `DEEPSEC_DATA_ROOT` | core | Override the data directory location. Equivalent to `dataDir` in config. |
+| `DEEPSEC_DATA_ROOT` | core | Override the data directory location. Takes precedence over `dataDir` in config. |
 
 ### Plugin-specific
 

diff --git a/packages/core/src/__tests__/paths.test.ts b/packages/core/src/__tests__/paths.test.ts
@@ -1,6 +1,7 @@
 import path from "node:path";
-import { describe, expect, it } from "vitest";
-import { dataDir, fileRecordPath, filesDir, runMetaPath, runsDir } from "../paths.js";
+import { afterEach, describe, expect, it } from "vitest";
+import { defineConfig, setLoadedConfig } from "../config.js";
+import { dataDir, fileRecordPath, filesDir, getDataRoot, runMetaPath, runsDir } from "../paths.js";
 
 describe("paths", () => {
   // path.join uses native separators (\\ on Windows, / elsewhere). Build
@@ -26,6 +27,33 @@ describe("paths", () => {
   // Path-traversal protection — any segment that could escape the per-project
   // mirror (`..`, absolute paths, separators, null bytes) must throw, since
   // these are the documented sandbox-round-trip and CLI-flag attack vectors.
+  describe("getDataRoot", () => {
+    afterEach(() => {
+      delete process.env.DEEPSEC_DATA_ROOT;
+      setLoadedConfig(defineConfig({ projects: [] }));
+    });
+
+    it("defaults to 'data'", () => {
+      expect(getDataRoot()).toBe("data");
+    });
+
+    it("respects DEEPSEC_DATA_ROOT env var", () => {
+      process.env.DEEPSEC_DATA_ROOT = "/custom/data";
+      expect(getDataRoot()).toBe("/custom/data");
+    });
+
+    it("respects config dataDir when env var is absent", () => {
+      setLoadedConfig(defineConfig({ projects: [], dataDir: "/cfg/data" }));
+      expect(getDataRoot()).toBe("/cfg/data");
+    });
+
+    it("prefers env var over config dataDir", () => {
+      process.env.DEEPSEC_DATA_ROOT = "/env/data";
+      setLoadedConfig(defineConfig({ projects: [], dataDir: "/cfg/data" }));
+      expect(getDataRoot()).toBe("/env/data");
+    });
+  });
+
   describe("path traversal", () => {
     it("dataDir rejects '..' projectId", () => {
       expect(() => dataDir("..")).toThrow(/Invalid projectId/);

diff --git a/packages/core/src/paths.ts b/packages/core/src/paths.ts
@@ -1,7 +1,8 @@
 import path from "node:path";
+import { getConfig } from "./config.js";
 
 export function getDataRoot(): string {
-  return process.env.DEEPSEC_DATA_ROOT || "data";
+  return process.env.DEEPSEC_DATA_ROOT || getConfig()?.dataDir || "data";
 }
 
 // Reject empty, '.', '..', absolute paths, null bytes, and any path

diff --git a/packages/processor/package.json b/packages/processor/package.json
@@ -13,6 +13,7 @@
     "@openai/codex": "^0.125.0",
     "@openai/codex-sdk": "^0.125.0",
     "@deepsec/core": "workspace:*",
-    "@deepsec/scanner": "workspace:*"
+    "@deepsec/scanner": "workspace:*",
+    "zod": "^3.24.0"
   }
 }
diff --git a/packages/processor/src/__tests__/process-revalidate.test.ts b/packages/processor/src/__tests__/process-revalidate.test.ts
@@ -123,6 +123,54 @@ describe("processor with stub agent", () => {
     expect(rec.lockedByRunId).toBeFalsy();
   });
 
+  it("process() prefers config project declaration over legacy data files", async () => {
+    const fx = setupProject({ files: ["app.ts"] });
+    fx.writeRecord(pendingRecord(fx.projectId, "app.ts"));
+
+    fs.writeFileSync(path.join(fx.dataRoot, fx.projectId, "INFO.md"), "from info.md");
+    fs.writeFileSync(
+      path.join(fx.dataRoot, fx.projectId, "config.json"),
+      JSON.stringify({ promptAppend: "from config.json" }),
+    );
+
+    const stub = new StubAgent({
+      investigateImpl: async function* (params) {
+        return {
+          results: params.batch.map((rec) => ({
+            filePath: rec.filePath,
+            findings: [],
+          })),
+          meta: { durationMs: 1 },
+        };
+      },
+    });
+
+    setLoadedConfig(
+      defineConfig({
+        projects: [
+          {
+            id: fx.projectId,
+            root: fx.targetRoot,
+            infoMarkdown: "from config ts",
+            promptAppend: "from config ts",
+          },
+        ],
+        plugins: [{ name: "stub", agents: [stub] }],
+      }),
+    );
+
+    await processProject({
+      projectId: fx.projectId,
+      agentType: "stub",
+      concurrency: 1,
+    });
+
+    const prompt = stub.calls.investigateCalls[0].promptTemplate;
+    expect(prompt).toContain("from config ts");
+    expect(prompt).not.toContain("from info.md");
+    expect(prompt).not.toContain("from config.json");
+  });
+
   it("process() respects --limit", async () => {
     const fx = setupProject({ files: ["a.ts", "b.ts", "c.ts"] });
     fx.writeRecord(pendingRecord(fx.projectId, "a.ts"));

diff --git a/packages/processor/src/__tests__/shared.test.ts b/packages/processor/src/__tests__/shared.test.ts
@@ -262,7 +262,8 @@ describe("parseInvestigateResults", () => {
   const batch = [{ filePath: "a.ts" } as any, { filePath: "b.ts" } as any];
 
   it("matches results to batch files; fills missing with empty findings", () => {
-    const text = '```json\n[{"filePath":"a.ts","findings":[{"severity":"HIGH"}]}]\n```';
+    const text =
+      '```json\n[{"filePath":"a.ts","findings":[{"severity":"HIGH","vulnSlug":"x","title":"t","description":"d","lineNumbers":[1],"recommendation":"r","confidence":"high"}]}]\n```';
     const out = parseInvestigateResults(text, batch);
     expect(out.find((r) => r.filePath === "a.ts")?.findings.length).toBe(1);
     expect(out.find((r) => r.filePath === "b.ts")?.findings).toEqual([]);
@@ -284,6 +285,24 @@ describe("parseInvestigateResults", () => {
       /not an array/,
     );
   });
+
+  it("throws when a finding has invalid severity", () => {
+    const text =
+      '```json\n[{"filePath":"a.ts","findings":[{"severity":"INVALID","vulnSlug":"x","title":"t","description":"d","lineNumbers":[1],"recommendation":"r","confidence":"high"}]}]\n```';
+    expect(() => parseInvestigateResults(text, batch)).toThrow(/schema validation/);
+  });
+
+  it("throws when a finding has invalid confidence", () => {
+    const text =
+      '```json\n[{"filePath":"a.ts","findings":[{"severity":"HIGH","vulnSlug":"x","title":"t","description":"d","lineNumbers":[1],"recommendation":"r","confidence":"INVALID"}]}]\n```';
+    expect(() => parseInvestigateResults(text, batch)).toThrow(/schema validation/);
+  });
+
+  it("throws when a finding is missing a required field", () => {
+    const text =
+      '```json\n[{"filePath":"a.ts","findings":[{"severity":"HIGH","vulnSlug":"x"}]}]\n```';
+    expect(() => parseInvestigateResults(text, batch)).toThrow(/schema validation/);
+  });
 });
 
 describe("parseRevalidateVerdicts", () => {
@@ -298,4 +317,10 @@ describe("parseRevalidateVerdicts", () => {
   it("throws on parse failure", () => {
     expect(() => parseRevalidateVerdicts("garbage")).toThrow(/wasn't parseable JSON/);
   });
+
+  it("throws when verdict is invalid", () => {
+    const text =
+      '```json\n[{"filePath":"a.ts","title":"x","verdict":"INVALID","reasoning":"r"}]\n```';
+    expect(() => parseRevalidateVerdicts(text)).toThrow(/schema validation/);
+  });
 });
diff --git a/packages/processor/src/agents/shared.ts b/packages/processor/src/agents/shared.ts
@@ -1,5 +1,7 @@
 import { spawnSync } from "node:child_process";
-import type { FileRecord, Finding, RefusalReport } from "@deepsec/core";
+import type { FileRecord, RefusalReport } from "@deepsec/core";
+import { findingSchema } from "@deepsec/core";
+import { z } from "zod";
 import type { InvestigateResult, RevalidateVerdict } from "./types.js";
 
 // --- Retry / backoff -------------------------------------------------------
@@ -418,15 +420,23 @@ export function parseInvestigateResults(
     throw new Error(`Agent produced JSON but not an array of file findings. Got: ${typeof parsed}`);
   }
 
-  const typedParsed = parsed as Array<{ filePath: string; findings: Finding[] }>;
+  const investigateResultSchema = z.object({
+    filePath: z.string(),
+    findings: z.array(findingSchema),
+  });
+  const validation = z.array(investigateResultSchema).safeParse(parsed);
+  if (!validation.success) {
+    throw new Error(`Agent findings failed schema validation: ${validation.error.message}`);
+  }
+
   const results: InvestigateResult[] = [];
   const batchPaths = new Set(batch.map((r) => r.filePath));
 
-  for (const entry of typedParsed) {
+  for (const entry of validation.data) {
     if (batchPaths.has(entry.filePath)) {
       results.push({
         filePath: entry.filePath,
-        findings: entry.findings || [],
+        findings: entry.findings,
       });
       batchPaths.delete(entry.filePath);
     }
@@ -568,5 +578,19 @@ export function parseRevalidateVerdicts(resultText: string): RevalidateVerdict[]
   if (!Array.isArray(parsed)) {
     throw new Error(`Agent produced revalidation JSON but not an array. Got: ${typeof parsed}`);
   }
-  return parsed as RevalidateVerdict[];
+
+  const revalidateVerdictSchema = z.object({
+    filePath: z.string(),
+    title: z.string(),
+    verdict: z.enum(["true-positive", "false-positive", "fixed", "uncertain"]),
+    reasoning: z.string(),
+    adjustedSeverity: z.enum(["CRITICAL", "HIGH", "MEDIUM", "HIGH_BUG", "BUG"]).optional(),
+  });
+  const validation = z.array(revalidateVerdictSchema).safeParse(parsed);
+  if (!validation.success) {
+    throw new Error(
+      `Agent revalidation verdicts failed schema validation: ${validation.error.message}`,
+    );
+  }
+  return validation.data as RevalidateVerdict[];
 }
diff --git a/packages/processor/src/index.ts b/packages/processor/src/index.ts
@@ -7,6 +7,7 @@ import {
   createRunMeta,
   dataDir,
   defaultConcurrency,
+  findProject,
   getRegistry,
   loadAllFileRecords,
   readFileRecord,
@@ -179,14 +180,21 @@ export async function process(params: {
     manifestFilePaths = new Set(raw as string[]);
   }
 
-  // Load project INFO.md if it exists
+  // Resolve project context with clear precedence:
+  //   1. deepsec.config.ts project declaration (modern source of truth)
+  //   2. Legacy data/<id>/INFO.md and data/<id>/config.json
+  const projectDecl = findProject(projectId);
+
   const infoPath = path.join(dataDir(projectId), "INFO.md");
   let projectInfo = "";
   try {
     projectInfo = fs.readFileSync(infoPath, "utf-8");
   } catch {
     // No INFO.md — that's fine
   }
+  if (projectDecl?.infoMarkdown !== undefined) {
+    projectInfo = projectDecl.infoMarkdown;
+  }
 
   // Load project config.json for prompt customization and priority
   const projectConfigJsonPath = path.join(dataDir(projectId), "config.json");
@@ -199,6 +207,12 @@ export async function process(params: {
   } catch {
     // No config.json — that's fine
   }
+  if (projectDecl?.promptAppend !== undefined) {
+    projectConfig.promptAppend = projectDecl.promptAppend;
+  }
+  if (projectDecl?.priorityPaths !== undefined) {
+    projectConfig.priorityPaths = projectDecl.priorityPaths;
+  }
 
   // Tech detection result drives per-batch threat highlights. Read once
   // from `data/<id>/tech.json` (written by `scan()`); empty list when the
@@ -837,11 +851,16 @@ export async function revalidate(params: {
     manifestFilePaths = new Set(raw as string[]);
   }
 
+  const projectDecl = findProject(projectId);
+
   const infoPath = path.join(dataDir(projectId), "INFO.md");
   let projectInfo = "";
   try {
     projectInfo = fs.readFileSync(infoPath, "utf-8");
   } catch {}
+  if (projectDecl?.infoMarkdown !== undefined) {
+    projectInfo = projectDecl.infoMarkdown;
+  }
 
   const model = (config.model as string) ?? "claude-opus-4-7";