From b5718343e8984dd43e780bbb427f7d09a6ccf79a Mon Sep 17 00:00:00 2001 From: William Hill Date: Sun, 3 May 2026 11:05:17 -0400 Subject: [PATCH 1/2] feat(analyze): runtime FERPA guard for NLQ SELECT + vitest (#127) - Add lib/sql-inspector.ts: conservative SELECT-clause check (top-level FROM, * , excluded ids) - Return 422 FERPA exclusion violated from /api/analyze when LLM SQL violates - Tests for happy path, WHERE-only GUID, projections, alias, SELECT *, empty list - Document runtime guard in ai-transparency nlq-analyzer notes Co-authored-by: Cursor --- .../__tests__/ferpa-exclusions.test.ts | 39 ++++++++++ .../app/api/analyze/route.ts | 14 ++++ .../content/ai-transparency.ts | 2 +- codebenders-dashboard/lib/sql-inspector.ts | 74 +++++++++++++++++++ codebenders-dashboard/vitest.config.ts | 2 +- 5 files changed, 129 insertions(+), 2 deletions(-) create mode 100644 codebenders-dashboard/app/api/analyze/__tests__/ferpa-exclusions.test.ts create mode 100644 codebenders-dashboard/lib/sql-inspector.ts diff --git a/codebenders-dashboard/app/api/analyze/__tests__/ferpa-exclusions.test.ts b/codebenders-dashboard/app/api/analyze/__tests__/ferpa-exclusions.test.ts new file mode 100644 index 0000000..bcf28ab --- /dev/null +++ b/codebenders-dashboard/app/api/analyze/__tests__/ferpa-exclusions.test.ts @@ -0,0 +1,39 @@ +import { describe, it, expect } from "vitest" +import { inspectSelectForFerpaExclusions } from "@/lib/sql-inspector" + +const excluded = ["Student_GUID", "student_guid"] as const + +describe("inspectSelectForFerpaExclusions", () => { + it("happy path: cohort aggregate without GUID in SELECT", () => { + const sql = `SELECT "Cohort", AVG("Retention") FROM student_level_with_predictions GROUP BY "Cohort"` + expect(inspectSelectForFerpaExclusions(sql, excluded)).toEqual({ ok: true }) + }) + + it("allows GUID only in WHERE", () => { + const sql = `SELECT COUNT(*) FROM student_level_with_predictions WHERE "Student_GUID" = 'foo'` + expect(inspectSelectForFerpaExclusions(sql, excluded)).toEqual({ ok: true }) + }) + + it("rejects direct GUID projection", () => { + const sql = `SELECT "Student_GUID", "Cohort" FROM student_level_with_predictions` + const r = inspectSelectForFerpaExclusions(sql, excluded) + expect(r).toEqual({ ok: false, violation: "Student_GUID" }) + }) + + it("rejects aliased GUID projection", () => { + const sql = `SELECT "Student_GUID" AS sid FROM student_level_with_predictions` + const r = inspectSelectForFerpaExclusions(sql, excluded) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.violation).toBe("Student_GUID") + }) + + it("rejects SELECT *", () => { + const sql = `SELECT * FROM student_level_with_predictions` + const r = inspectSelectForFerpaExclusions(sql, excluded) + expect(r).toEqual({ ok: false, violation: "*" }) + }) + + it("allows SELECT * when exclusion list is empty", () => { + expect(inspectSelectForFerpaExclusions(`SELECT * FROM t`, [])).toEqual({ ok: true }) + }) +}) diff --git a/codebenders-dashboard/app/api/analyze/route.ts b/codebenders-dashboard/app/api/analyze/route.ts index 042b26b..e4afa38 100644 --- a/codebenders-dashboard/app/api/analyze/route.ts +++ b/codebenders-dashboard/app/api/analyze/route.ts @@ -2,6 +2,7 @@ import { type NextRequest, NextResponse } from "next/server" import { streamObject } from "ai" import { createOpenAI } from "@ai-sdk/openai" import { z } from "zod" +import { inspectSelectForFerpaExclusions } from "@/lib/sql-inspector" const openai = createOpenAI({ apiKey: process.env.OPENAI_API_KEY || "", @@ -240,6 +241,19 @@ Make sure the SQL is valid PostgreSQL and addresses exactly what the user asked queryString: finalObject.queryString || "", } + const sql = typeof result.sql === "string" ? result.sql : "" + const ferpaExcluded = schemaInfo.ferpaExcluded ?? [] + if (ferpaExcluded.length > 0 && sql) { + const ferpaCheck = inspectSelectForFerpaExclusions(sql, ferpaExcluded) + if (!ferpaCheck.ok) { + console.warn("[analyze] FERPA exclusion violated:", ferpaCheck.violation) + return NextResponse.json( + { error: "FERPA exclusion violated", column: ferpaCheck.violation }, + { status: 422 } + ) + } + } + return NextResponse.json(result) } catch (error) { console.error("[analyze] Error:", error) diff --git a/codebenders-dashboard/content/ai-transparency.ts b/codebenders-dashboard/content/ai-transparency.ts index d713626..cb83f31 100644 --- a/codebenders-dashboard/content/ai-transparency.ts +++ b/codebenders-dashboard/content/ai-transparency.ts @@ -242,7 +242,7 @@ export const AI_SURFACES: AISurface[] = [ retentionPolicy: "OpenAI's API data-handling policy applies to the prompt and schema description. As of this writing, OpenAI states that data submitted via the API is not used to train their models. We do not separately log prompts or responses to a third-party store.", notes: - "If `OPENAI_API_KEY` is not configured, the route returns a 500 error and the client falls back to the rule-based analyzer (`prompt-analyzer.ts`, see next entry).", + "If `OPENAI_API_KEY` is not configured, the route returns a 500 error and the client falls back to the rule-based analyzer (`prompt-analyzer.ts`, see next entry). After the model returns SQL, `lib/sql-inspector.ts` runs a conservative SELECT-clause check against `schemaInfo.ferpaExcluded` (e.g. `Student_GUID` / `student_guid`); violations return HTTP 422 and are logged server-side only — not only prompt instructions.", }, { id: "nlq-rule-based-fallback", diff --git a/codebenders-dashboard/lib/sql-inspector.ts b/codebenders-dashboard/lib/sql-inspector.ts new file mode 100644 index 0000000..4574dd4 --- /dev/null +++ b/codebenders-dashboard/lib/sql-inspector.ts @@ -0,0 +1,74 @@ +/** + * Lightweight SELECT-clause inspection for FERPA-style column exclusions. + * Conservative: unknown shapes or unparseable SQL → not ok. Not a full SQL parser. + */ + +function escapeRegex(s: string): string { + return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") +} + +function splitTopLevelCommaItems(expr: string): string[] { + const items: string[] = [] + let depth = 0 + let start = 0 + for (let i = 0; i < expr.length; i++) { + const ch = expr[i] + if (ch === "(") depth++ + else if (ch === ")") depth-- + else if (ch === "," && depth === 0) { + items.push(expr.slice(start, i).trim()) + start = i + 1 + } + } + items.push(expr.slice(start).trim()) + return items +} + +/** SELECT list only: stops at the first top-level FROM (parenthesis depth 0). */ +function extractSelectClause(sql: string): string | null { + const m = /\bselect\s+/i.exec(sql) + if (!m || m.index === undefined) return null + let i = m.index + m[0].length + let depth = 0 + for (; i < sql.length; i++) { + const ch = sql[i] + if (ch === "(") depth++ + else if (ch === ")") depth-- + else if (depth === 0 && /^from\b/i.test(sql.slice(i))) { + return sql.slice(m.index + m[0].length, i).trim() + } + } + return null +} + +export function inspectSelectForFerpaExclusions( + sql: string, + excluded: readonly string[] +): { ok: true } | { ok: false; violation: string } { + if (!excluded.length) return { ok: true } + + const rawClause = extractSelectClause(sql) + if (rawClause === null) return { ok: false, violation: excluded[0] } + + const inner = rawClause.replace(/^\s*distinct\s+/i, "").trim() + if (!inner) return { ok: false, violation: excluded[0] } + + for (const item of splitTopLevelCommaItems(inner)) { + if (/^\*\s*$/.test(item)) { + return { ok: false, violation: "*" } + } + } + + for (const col of excluded) { + const quoted = `"${col.replace(/"/g, '""')}"` + if (inner.includes(quoted)) { + return { ok: false, violation: col } + } + const re = new RegExp(`\\b${escapeRegex(col)}\\b`, "i") + if (re.test(inner)) { + return { ok: false, violation: col } + } + } + + return { ok: true } +} diff --git a/codebenders-dashboard/vitest.config.ts b/codebenders-dashboard/vitest.config.ts index d8e2da6..a4a4079 100644 --- a/codebenders-dashboard/vitest.config.ts +++ b/codebenders-dashboard/vitest.config.ts @@ -4,7 +4,7 @@ import path from "path" export default defineConfig({ test: { environment: "node", - include: ["lib/__tests__/**/*.test.ts"], + include: ["lib/__tests__/**/*.test.ts", "app/api/analyze/__tests__/**/*.test.ts"], }, resolve: { alias: { From a1b47d82c552a8994dce70d378794df0289ecbc2 Mon Sep 17 00:00:00 2001 From: William Hill Date: Sun, 3 May 2026 11:11:41 -0400 Subject: [PATCH 2/2] refactor: simplify FERPA sql inspector and analyze guard (code-simplifier) - sql-inspector: listStart, unified selectList empty handling, .some for * - route: ferpaBlockedResponse helper for 422 path - tests: table-driven rejection cases Co-authored-by: Cursor --- .../__tests__/ferpa-exclusions.test.ts | 42 +++++++++++-------- .../app/api/analyze/route.ts | 31 ++++++++------ codebenders-dashboard/lib/sql-inspector.ts | 25 +++++------ 3 files changed, 54 insertions(+), 44 deletions(-) diff --git a/codebenders-dashboard/app/api/analyze/__tests__/ferpa-exclusions.test.ts b/codebenders-dashboard/app/api/analyze/__tests__/ferpa-exclusions.test.ts index bcf28ab..615558e 100644 --- a/codebenders-dashboard/app/api/analyze/__tests__/ferpa-exclusions.test.ts +++ b/codebenders-dashboard/app/api/analyze/__tests__/ferpa-exclusions.test.ts @@ -14,24 +14,32 @@ describe("inspectSelectForFerpaExclusions", () => { expect(inspectSelectForFerpaExclusions(sql, excluded)).toEqual({ ok: true }) }) - it("rejects direct GUID projection", () => { - const sql = `SELECT "Student_GUID", "Cohort" FROM student_level_with_predictions` - const r = inspectSelectForFerpaExclusions(sql, excluded) - expect(r).toEqual({ ok: false, violation: "Student_GUID" }) - }) - - it("rejects aliased GUID projection", () => { - const sql = `SELECT "Student_GUID" AS sid FROM student_level_with_predictions` - const r = inspectSelectForFerpaExclusions(sql, excluded) - expect(r.ok).toBe(false) - if (!r.ok) expect(r.violation).toBe("Student_GUID") - }) + const rejections: { title: string; sql: string; violation: string }[] = [ + { + title: "direct GUID projection", + sql: `SELECT "Student_GUID", "Cohort" FROM student_level_with_predictions`, + violation: "Student_GUID", + }, + { + title: "aliased GUID projection", + sql: `SELECT "Student_GUID" AS sid FROM student_level_with_predictions`, + violation: "Student_GUID", + }, + { + title: "SELECT *", + sql: `SELECT * FROM student_level_with_predictions`, + violation: "*", + }, + ] - it("rejects SELECT *", () => { - const sql = `SELECT * FROM student_level_with_predictions` - const r = inspectSelectForFerpaExclusions(sql, excluded) - expect(r).toEqual({ ok: false, violation: "*" }) - }) + for (const { title, sql, violation } of rejections) { + it(`rejects ${title}`, () => { + expect(inspectSelectForFerpaExclusions(sql, excluded)).toEqual({ + ok: false, + violation, + }) + }) + } it("allows SELECT * when exclusion list is empty", () => { expect(inspectSelectForFerpaExclusions(`SELECT * FROM t`, [])).toEqual({ ok: true }) diff --git a/codebenders-dashboard/app/api/analyze/route.ts b/codebenders-dashboard/app/api/analyze/route.ts index e4afa38..2e96b0c 100644 --- a/codebenders-dashboard/app/api/analyze/route.ts +++ b/codebenders-dashboard/app/api/analyze/route.ts @@ -109,6 +109,20 @@ export async function GET() { return NextResponse.json({ status: "ok", message: "Analyze route is loaded" }) } +function ferpaBlockedResponse( + sql: string, + ferpaExcluded: readonly string[] +): NextResponse | null { + if (!ferpaExcluded.length || !sql) return null + const check = inspectSelectForFerpaExclusions(sql, ferpaExcluded) + if (check.ok) return null + console.warn("[analyze] FERPA exclusion violated:", check.violation) + return NextResponse.json( + { error: "FERPA exclusion violated", column: check.violation }, + { status: 422 } + ) +} + export async function POST(request: NextRequest) { try { const { prompt, institution } = await request.json() @@ -241,18 +255,11 @@ Make sure the SQL is valid PostgreSQL and addresses exactly what the user asked queryString: finalObject.queryString || "", } - const sql = typeof result.sql === "string" ? result.sql : "" - const ferpaExcluded = schemaInfo.ferpaExcluded ?? [] - if (ferpaExcluded.length > 0 && sql) { - const ferpaCheck = inspectSelectForFerpaExclusions(sql, ferpaExcluded) - if (!ferpaCheck.ok) { - console.warn("[analyze] FERPA exclusion violated:", ferpaCheck.violation) - return NextResponse.json( - { error: "FERPA exclusion violated", column: ferpaCheck.violation }, - { status: 422 } - ) - } - } + const blocked = ferpaBlockedResponse( + typeof result.sql === "string" ? result.sql : "", + schemaInfo.ferpaExcluded ?? [] + ) + if (blocked) return blocked return NextResponse.json(result) } catch (error) { diff --git a/codebenders-dashboard/lib/sql-inspector.ts b/codebenders-dashboard/lib/sql-inspector.ts index 4574dd4..782483e 100644 --- a/codebenders-dashboard/lib/sql-inspector.ts +++ b/codebenders-dashboard/lib/sql-inspector.ts @@ -28,14 +28,14 @@ function splitTopLevelCommaItems(expr: string): string[] { function extractSelectClause(sql: string): string | null { const m = /\bselect\s+/i.exec(sql) if (!m || m.index === undefined) return null - let i = m.index + m[0].length + const listStart = m.index + m[0].length let depth = 0 - for (; i < sql.length; i++) { + for (let i = listStart; i < sql.length; i++) { const ch = sql[i] if (ch === "(") depth++ else if (ch === ")") depth-- else if (depth === 0 && /^from\b/i.test(sql.slice(i))) { - return sql.slice(m.index + m[0].length, i).trim() + return sql.slice(listStart, i).trim() } } return null @@ -47,25 +47,20 @@ export function inspectSelectForFerpaExclusions( ): { ok: true } | { ok: false; violation: string } { if (!excluded.length) return { ok: true } - const rawClause = extractSelectClause(sql) - if (rawClause === null) return { ok: false, violation: excluded[0] } + const raw = extractSelectClause(sql) + const selectList = (raw?.replace(/^\s*distinct\s+/i, "").trim()) ?? "" + if (!selectList) return { ok: false, violation: excluded[0] } - const inner = rawClause.replace(/^\s*distinct\s+/i, "").trim() - if (!inner) return { ok: false, violation: excluded[0] } - - for (const item of splitTopLevelCommaItems(inner)) { - if (/^\*\s*$/.test(item)) { - return { ok: false, violation: "*" } - } + if (splitTopLevelCommaItems(selectList).some((item) => /^\*\s*$/.test(item))) { + return { ok: false, violation: "*" } } for (const col of excluded) { const quoted = `"${col.replace(/"/g, '""')}"` - if (inner.includes(quoted)) { + if (selectList.includes(quoted)) { return { ok: false, violation: col } } - const re = new RegExp(`\\b${escapeRegex(col)}\\b`, "i") - if (re.test(inner)) { + if (new RegExp(`\\b${escapeRegex(col)}\\b`, "i").test(selectList)) { return { ok: false, violation: col } } }