Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# AGENTS.md

Pointer file for Codex working in this repo. User-facing docs are
in [README.md](./README.md); contributor docs in
[CONTRIBUTING.md](./CONTRIBUTING.md). Read those first.

## Repo shape

```
packages/
core/ Types, schemas, plugin contracts, config loader (defineConfig)
scanner/ Regex matchers + scanning engine
processor/ AI agent integration (Codex Agent SDK, Codex SDK), enrich, triage, revalidate
deepsec/ Publishable package: bundled CLI + the `deepsec/config` sub-export + the @vercel/sandbox executor
e2e/ End-to-end tests
```

## Commands

```bash
pnpm install
pnpm test # all packages, including e2e
pnpm test:unit # excludes e2e
pnpm -r build # tsc across all workspaces (typecheck)
pnpm bundle # esbuild bundle for distribution
pnpm deepsec ... # the CLI (runs via tsx)
```

## Patterns to keep in mind

- Plugin contracts live in `packages/core/src/plugin.ts`. Internals route
through `getRegistry()` from `deepsec/config` rather than calling
organization-specific code directly.
- The CLI auto-loads `deepsec.config.{ts,mjs,js,cjs}` from cwd upward
(via `packages/deepsec/src/load-config.ts`, jiti).
- New matchers go in `packages/scanner/src/matchers/` and register in
`matchers/index.ts`. Org-specific matchers belong in a separate
plugin package, not in this tree.
- The AI prompt template lives in `packages/processor/src/index.ts`. It
is intentionally generic. Don't add organization-specific context
there; use `data/<projectId>/INFO.md` or `config.json:promptAppend`.
11 changes: 8 additions & 3 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,13 @@ Some legacy fields still live in `data/<id>/config.json`:
}
```

This is read by `scan` and by the AI agents. It overrides the same fields
on the project declaration if both are present.
This is read by `scan` and by the AI agents. The project declaration in
`deepsec.config.ts` takes precedence over these legacy files when the same
field is present in both. The fallback order is:

1. `deepsec.config.ts` project declaration (`infoMarkdown`, `promptAppend`, `priorityPaths`)
2. `data/<id>/config.json` / `data/<id>/INFO.md`
3. Defaults / omitted

## Environment variables

Expand All @@ -115,7 +120,7 @@ backend you're using.
| `OPENAI_API_KEY` | `--agent codex` | Codex SDK token. Unset is fine if `AI_GATEWAY_API_KEY` is set, or if Codex routes through AI Gateway with the Anthropic token. |
| `OPENAI_BASE_URL` | `--agent codex` | Default (when `AI_GATEWAY_API_KEY` is set): `https://ai-gateway.vercel.sh/v1`. |
| `DEEPSEC_AGENT_DEBUG` | both backends | Set to `1` to enable verbose agent logging. |
| `DEEPSEC_DATA_ROOT` | core | Override the data directory location. Equivalent to `dataDir` in config. |
| `DEEPSEC_DATA_ROOT` | core | Override the data directory location. Takes precedence over `dataDir` in config. |

### Plugin-specific

Expand Down
32 changes: 30 additions & 2 deletions packages/core/src/__tests__/paths.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import path from "node:path";
import { describe, expect, it } from "vitest";
import { dataDir, fileRecordPath, filesDir, runMetaPath, runsDir } from "../paths.js";
import { afterEach, describe, expect, it } from "vitest";
import { defineConfig, setLoadedConfig } from "../config.js";
import { dataDir, fileRecordPath, filesDir, getDataRoot, runMetaPath, runsDir } from "../paths.js";

describe("paths", () => {
// path.join uses native separators (\\ on Windows, / elsewhere). Build
Expand All @@ -26,6 +27,33 @@ describe("paths", () => {
// Path-traversal protection — any segment that could escape the per-project
// mirror (`..`, absolute paths, separators, null bytes) must throw, since
// these are the documented sandbox-round-trip and CLI-flag attack vectors.
describe("getDataRoot", () => {
afterEach(() => {
delete process.env.DEEPSEC_DATA_ROOT;
setLoadedConfig(defineConfig({ projects: [] }));
});

it("defaults to 'data'", () => {
expect(getDataRoot()).toBe("data");
});

it("respects DEEPSEC_DATA_ROOT env var", () => {
process.env.DEEPSEC_DATA_ROOT = "/custom/data";
expect(getDataRoot()).toBe("/custom/data");
});

it("respects config dataDir when env var is absent", () => {
setLoadedConfig(defineConfig({ projects: [], dataDir: "/cfg/data" }));
expect(getDataRoot()).toBe("/cfg/data");
});

it("prefers env var over config dataDir", () => {
process.env.DEEPSEC_DATA_ROOT = "/env/data";
setLoadedConfig(defineConfig({ projects: [], dataDir: "/cfg/data" }));
expect(getDataRoot()).toBe("/env/data");
});
});

describe("path traversal", () => {
it("dataDir rejects '..' projectId", () => {
expect(() => dataDir("..")).toThrow(/Invalid projectId/);
Expand Down
3 changes: 2 additions & 1 deletion packages/core/src/paths.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import path from "node:path";
import { getConfig } from "./config.js";

export function getDataRoot(): string {
return process.env.DEEPSEC_DATA_ROOT || "data";
return process.env.DEEPSEC_DATA_ROOT || getConfig()?.dataDir || "data";
}

// Reject empty, '.', '..', absolute paths, null bytes, and any path
Expand Down
3 changes: 2 additions & 1 deletion packages/processor/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"@openai/codex": "^0.125.0",
"@openai/codex-sdk": "^0.125.0",
"@deepsec/core": "workspace:*",
"@deepsec/scanner": "workspace:*"
"@deepsec/scanner": "workspace:*",
"zod": "^3.24.0"
}
}
48 changes: 48 additions & 0 deletions packages/processor/src/__tests__/process-revalidate.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,54 @@ describe("processor with stub agent", () => {
expect(rec.lockedByRunId).toBeFalsy();
});

it("process() prefers config project declaration over legacy data files", async () => {
const fx = setupProject({ files: ["app.ts"] });
fx.writeRecord(pendingRecord(fx.projectId, "app.ts"));

fs.writeFileSync(path.join(fx.dataRoot, fx.projectId, "INFO.md"), "from info.md");
fs.writeFileSync(
path.join(fx.dataRoot, fx.projectId, "config.json"),
JSON.stringify({ promptAppend: "from config.json" }),
);

const stub = new StubAgent({
investigateImpl: async function* (params) {
return {
results: params.batch.map((rec) => ({
filePath: rec.filePath,
findings: [],
})),
meta: { durationMs: 1 },
};
},
});

setLoadedConfig(
defineConfig({
projects: [
{
id: fx.projectId,
root: fx.targetRoot,
infoMarkdown: "from config ts",
promptAppend: "from config ts",
},
],
plugins: [{ name: "stub", agents: [stub] }],
}),
);

await processProject({
projectId: fx.projectId,
agentType: "stub",
concurrency: 1,
});

const prompt = stub.calls.investigateCalls[0].promptTemplate;
expect(prompt).toContain("from config ts");
expect(prompt).not.toContain("from info.md");
expect(prompt).not.toContain("from config.json");
});

it("process() respects --limit", async () => {
const fx = setupProject({ files: ["a.ts", "b.ts", "c.ts"] });
fx.writeRecord(pendingRecord(fx.projectId, "a.ts"));
Expand Down
27 changes: 26 additions & 1 deletion packages/processor/src/__tests__/shared.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,8 @@ describe("parseInvestigateResults", () => {
const batch = [{ filePath: "a.ts" } as any, { filePath: "b.ts" } as any];

it("matches results to batch files; fills missing with empty findings", () => {
const text = '```json\n[{"filePath":"a.ts","findings":[{"severity":"HIGH"}]}]\n```';
const text =
'```json\n[{"filePath":"a.ts","findings":[{"severity":"HIGH","vulnSlug":"x","title":"t","description":"d","lineNumbers":[1],"recommendation":"r","confidence":"high"}]}]\n```';
const out = parseInvestigateResults(text, batch);
expect(out.find((r) => r.filePath === "a.ts")?.findings.length).toBe(1);
expect(out.find((r) => r.filePath === "b.ts")?.findings).toEqual([]);
Expand All @@ -284,6 +285,24 @@ describe("parseInvestigateResults", () => {
/not an array/,
);
});

it("throws when a finding has invalid severity", () => {
const text =
'```json\n[{"filePath":"a.ts","findings":[{"severity":"INVALID","vulnSlug":"x","title":"t","description":"d","lineNumbers":[1],"recommendation":"r","confidence":"high"}]}]\n```';
expect(() => parseInvestigateResults(text, batch)).toThrow(/schema validation/);
});

it("throws when a finding has invalid confidence", () => {
const text =
'```json\n[{"filePath":"a.ts","findings":[{"severity":"HIGH","vulnSlug":"x","title":"t","description":"d","lineNumbers":[1],"recommendation":"r","confidence":"INVALID"}]}]\n```';
expect(() => parseInvestigateResults(text, batch)).toThrow(/schema validation/);
});

it("throws when a finding is missing a required field", () => {
const text =
'```json\n[{"filePath":"a.ts","findings":[{"severity":"HIGH","vulnSlug":"x"}]}]\n```';
expect(() => parseInvestigateResults(text, batch)).toThrow(/schema validation/);
});
});

describe("parseRevalidateVerdicts", () => {
Expand All @@ -298,4 +317,10 @@ describe("parseRevalidateVerdicts", () => {
it("throws on parse failure", () => {
expect(() => parseRevalidateVerdicts("garbage")).toThrow(/wasn't parseable JSON/);
});

it("throws when verdict is invalid", () => {
const text =
'```json\n[{"filePath":"a.ts","title":"x","verdict":"INVALID","reasoning":"r"}]\n```';
expect(() => parseRevalidateVerdicts(text)).toThrow(/schema validation/);
});
});
34 changes: 29 additions & 5 deletions packages/processor/src/agents/shared.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import { spawnSync } from "node:child_process";
import type { FileRecord, Finding, RefusalReport } from "@deepsec/core";
import type { FileRecord, RefusalReport } from "@deepsec/core";
import { findingSchema } from "@deepsec/core";
import { z } from "zod";
import type { InvestigateResult, RevalidateVerdict } from "./types.js";

// --- Retry / backoff -------------------------------------------------------
Expand Down Expand Up @@ -418,15 +420,23 @@ export function parseInvestigateResults(
throw new Error(`Agent produced JSON but not an array of file findings. Got: ${typeof parsed}`);
}

const typedParsed = parsed as Array<{ filePath: string; findings: Finding[] }>;
const investigateResultSchema = z.object({
filePath: z.string(),
findings: z.array(findingSchema),
});
const validation = z.array(investigateResultSchema).safeParse(parsed);
if (!validation.success) {
throw new Error(`Agent findings failed schema validation: ${validation.error.message}`);
}

const results: InvestigateResult[] = [];
const batchPaths = new Set(batch.map((r) => r.filePath));

for (const entry of typedParsed) {
for (const entry of validation.data) {
if (batchPaths.has(entry.filePath)) {
results.push({
filePath: entry.filePath,
findings: entry.findings || [],
findings: entry.findings,
});
batchPaths.delete(entry.filePath);
}
Expand Down Expand Up @@ -568,5 +578,19 @@ export function parseRevalidateVerdicts(resultText: string): RevalidateVerdict[]
if (!Array.isArray(parsed)) {
throw new Error(`Agent produced revalidation JSON but not an array. Got: ${typeof parsed}`);
}
return parsed as RevalidateVerdict[];

const revalidateVerdictSchema = z.object({
filePath: z.string(),
title: z.string(),
verdict: z.enum(["true-positive", "false-positive", "fixed", "uncertain"]),
reasoning: z.string(),
adjustedSeverity: z.enum(["CRITICAL", "HIGH", "MEDIUM", "HIGH_BUG", "BUG"]).optional(),
});
const validation = z.array(revalidateVerdictSchema).safeParse(parsed);
if (!validation.success) {
throw new Error(
`Agent revalidation verdicts failed schema validation: ${validation.error.message}`,
);
}
return validation.data as RevalidateVerdict[];
}
21 changes: 20 additions & 1 deletion packages/processor/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import {
createRunMeta,
dataDir,
defaultConcurrency,
findProject,
getRegistry,
loadAllFileRecords,
readFileRecord,
Expand Down Expand Up @@ -179,14 +180,21 @@ export async function process(params: {
manifestFilePaths = new Set(raw as string[]);
}

// Load project INFO.md if it exists
// Resolve project context with clear precedence:
// 1. deepsec.config.ts project declaration (modern source of truth)
// 2. Legacy data/<id>/INFO.md and data/<id>/config.json
const projectDecl = findProject(projectId);

const infoPath = path.join(dataDir(projectId), "INFO.md");
let projectInfo = "";
try {
projectInfo = fs.readFileSync(infoPath, "utf-8");
} catch {
// No INFO.md — that's fine
}
if (projectDecl?.infoMarkdown !== undefined) {
projectInfo = projectDecl.infoMarkdown;
}

// Load project config.json for prompt customization and priority
const projectConfigJsonPath = path.join(dataDir(projectId), "config.json");
Expand All @@ -199,6 +207,12 @@ export async function process(params: {
} catch {
// No config.json — that's fine
}
if (projectDecl?.promptAppend !== undefined) {
projectConfig.promptAppend = projectDecl.promptAppend;
}
if (projectDecl?.priorityPaths !== undefined) {
projectConfig.priorityPaths = projectDecl.priorityPaths;
}

// Tech detection result drives per-batch threat highlights. Read once
// from `data/<id>/tech.json` (written by `scan()`); empty list when the
Expand Down Expand Up @@ -837,11 +851,16 @@ export async function revalidate(params: {
manifestFilePaths = new Set(raw as string[]);
}

const projectDecl = findProject(projectId);

const infoPath = path.join(dataDir(projectId), "INFO.md");
let projectInfo = "";
try {
projectInfo = fs.readFileSync(infoPath, "utf-8");
} catch {}
if (projectDecl?.infoMarkdown !== undefined) {
projectInfo = projectDecl.infoMarkdown;
}

const model = (config.model as string) ?? "claude-opus-4-7";

Expand Down
Loading