diff --git a/codebenders-dashboard/app/api/analyze/__tests__/ferpa-exclusions.test.ts b/codebenders-dashboard/app/api/analyze/__tests__/ferpa-exclusions.test.ts new file mode 100644 index 0000000..615558e --- /dev/null +++ b/codebenders-dashboard/app/api/analyze/__tests__/ferpa-exclusions.test.ts @@ -0,0 +1,47 @@ +import { describe, it, expect } from "vitest" +import { inspectSelectForFerpaExclusions } from "@/lib/sql-inspector" + +const excluded = ["Student_GUID", "student_guid"] as const + +describe("inspectSelectForFerpaExclusions", () => { + it("happy path: cohort aggregate without GUID in SELECT", () => { + const sql = `SELECT "Cohort", AVG("Retention") FROM student_level_with_predictions GROUP BY "Cohort"` + expect(inspectSelectForFerpaExclusions(sql, excluded)).toEqual({ ok: true }) + }) + + it("allows GUID only in WHERE", () => { + const sql = `SELECT COUNT(*) FROM student_level_with_predictions WHERE "Student_GUID" = 'foo'` + expect(inspectSelectForFerpaExclusions(sql, excluded)).toEqual({ ok: true }) + }) + + const rejections: { title: string; sql: string; violation: string }[] = [ + { + title: "direct GUID projection", + sql: `SELECT "Student_GUID", "Cohort" FROM student_level_with_predictions`, + violation: "Student_GUID", + }, + { + title: "aliased GUID projection", + sql: `SELECT "Student_GUID" AS sid FROM student_level_with_predictions`, + violation: "Student_GUID", + }, + { + title: "SELECT *", + sql: `SELECT * FROM student_level_with_predictions`, + violation: "*", + }, + ] + + for (const { title, sql, violation } of rejections) { + it(`rejects ${title}`, () => { + expect(inspectSelectForFerpaExclusions(sql, excluded)).toEqual({ + ok: false, + violation, + }) + }) + } + + it("allows SELECT * when exclusion list is empty", () => { + expect(inspectSelectForFerpaExclusions(`SELECT * FROM t`, [])).toEqual({ ok: true }) + }) +}) diff --git a/codebenders-dashboard/app/api/analyze/route.ts b/codebenders-dashboard/app/api/analyze/route.ts index 042b26b..2e96b0c 100644 --- a/codebenders-dashboard/app/api/analyze/route.ts +++ b/codebenders-dashboard/app/api/analyze/route.ts @@ -2,6 +2,7 @@ import { type NextRequest, NextResponse } from "next/server" import { streamObject } from "ai" import { createOpenAI } from "@ai-sdk/openai" import { z } from "zod" +import { inspectSelectForFerpaExclusions } from "@/lib/sql-inspector" const openai = createOpenAI({ apiKey: process.env.OPENAI_API_KEY || "", @@ -108,6 +109,20 @@ export async function GET() { return NextResponse.json({ status: "ok", message: "Analyze route is loaded" }) } +function ferpaBlockedResponse( + sql: string, + ferpaExcluded: readonly string[] +): NextResponse | null { + if (!ferpaExcluded.length || !sql) return null + const check = inspectSelectForFerpaExclusions(sql, ferpaExcluded) + if (check.ok) return null + console.warn("[analyze] FERPA exclusion violated:", check.violation) + return NextResponse.json( + { error: "FERPA exclusion violated", column: check.violation }, + { status: 422 } + ) +} + export async function POST(request: NextRequest) { try { const { prompt, institution } = await request.json() @@ -240,6 +255,12 @@ Make sure the SQL is valid PostgreSQL and addresses exactly what the user asked queryString: finalObject.queryString || "", } + const blocked = ferpaBlockedResponse( + typeof result.sql === "string" ? result.sql : "", + schemaInfo.ferpaExcluded ?? [] + ) + if (blocked) return blocked + return NextResponse.json(result) } catch (error) { console.error("[analyze] Error:", error) diff --git a/codebenders-dashboard/content/ai-transparency.ts b/codebenders-dashboard/content/ai-transparency.ts index d713626..cb83f31 100644 --- a/codebenders-dashboard/content/ai-transparency.ts +++ b/codebenders-dashboard/content/ai-transparency.ts @@ -242,7 +242,7 @@ export const AI_SURFACES: AISurface[] = [ retentionPolicy: "OpenAI's API data-handling policy applies to the prompt and schema description. As of this writing, OpenAI states that data submitted via the API is not used to train their models. We do not separately log prompts or responses to a third-party store.", notes: - "If `OPENAI_API_KEY` is not configured, the route returns a 500 error and the client falls back to the rule-based analyzer (`prompt-analyzer.ts`, see next entry).", + "If `OPENAI_API_KEY` is not configured, the route returns a 500 error and the client falls back to the rule-based analyzer (`prompt-analyzer.ts`, see next entry). After the model returns SQL, `lib/sql-inspector.ts` runs a conservative SELECT-clause check against `schemaInfo.ferpaExcluded` (e.g. `Student_GUID` / `student_guid`); violations return HTTP 422 and are logged server-side only — not only prompt instructions.", }, { id: "nlq-rule-based-fallback", diff --git a/codebenders-dashboard/lib/sql-inspector.ts b/codebenders-dashboard/lib/sql-inspector.ts new file mode 100644 index 0000000..782483e --- /dev/null +++ b/codebenders-dashboard/lib/sql-inspector.ts @@ -0,0 +1,69 @@ +/** + * Lightweight SELECT-clause inspection for FERPA-style column exclusions. + * Conservative: unknown shapes or unparseable SQL → not ok. Not a full SQL parser. + */ + +function escapeRegex(s: string): string { + return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") +} + +function splitTopLevelCommaItems(expr: string): string[] { + const items: string[] = [] + let depth = 0 + let start = 0 + for (let i = 0; i < expr.length; i++) { + const ch = expr[i] + if (ch === "(") depth++ + else if (ch === ")") depth-- + else if (ch === "," && depth === 0) { + items.push(expr.slice(start, i).trim()) + start = i + 1 + } + } + items.push(expr.slice(start).trim()) + return items +} + +/** SELECT list only: stops at the first top-level FROM (parenthesis depth 0). */ +function extractSelectClause(sql: string): string | null { + const m = /\bselect\s+/i.exec(sql) + if (!m || m.index === undefined) return null + const listStart = m.index + m[0].length + let depth = 0 + for (let i = listStart; i < sql.length; i++) { + const ch = sql[i] + if (ch === "(") depth++ + else if (ch === ")") depth-- + else if (depth === 0 && /^from\b/i.test(sql.slice(i))) { + return sql.slice(listStart, i).trim() + } + } + return null +} + +export function inspectSelectForFerpaExclusions( + sql: string, + excluded: readonly string[] +): { ok: true } | { ok: false; violation: string } { + if (!excluded.length) return { ok: true } + + const raw = extractSelectClause(sql) + const selectList = (raw?.replace(/^\s*distinct\s+/i, "").trim()) ?? "" + if (!selectList) return { ok: false, violation: excluded[0] } + + if (splitTopLevelCommaItems(selectList).some((item) => /^\*\s*$/.test(item))) { + return { ok: false, violation: "*" } + } + + for (const col of excluded) { + const quoted = `"${col.replace(/"/g, '""')}"` + if (selectList.includes(quoted)) { + return { ok: false, violation: col } + } + if (new RegExp(`\\b${escapeRegex(col)}\\b`, "i").test(selectList)) { + return { ok: false, violation: col } + } + } + + return { ok: true } +} diff --git a/codebenders-dashboard/vitest.config.ts b/codebenders-dashboard/vitest.config.ts index d8e2da6..a4a4079 100644 --- a/codebenders-dashboard/vitest.config.ts +++ b/codebenders-dashboard/vitest.config.ts @@ -4,7 +4,7 @@ import path from "path" export default defineConfig({ test: { environment: "node", - include: ["lib/__tests__/**/*.test.ts"], + include: ["lib/__tests__/**/*.test.ts", "app/api/analyze/__tests__/**/*.test.ts"], }, resolve: { alias: {