From 72922fdbb35839e3d5c02fe6b6fe74a877c47387 Mon Sep 17 00:00:00 2001 From: Simantak Dabhade Date: Wed, 27 May 2026 13:05:29 -0700 Subject: [PATCH] Harden PK validation: reject empty primary_keys, dedupe declared PKs - Reject empty primary_keys object in run_subagent schema with .refine() so the orchestrator gets a validation error instead of silently dispatching a subagent with no entity identity - Dedupe declared primary_key array in schema inference validation to catch LLM-produced duplicates like ["name", "name"] before they cause confusing membership check failures Follow-up to #97 per CodeRabbit review. Co-Authored-By: Claude Opus 4.6 --- backend/src/mastra/tools/investigate-tool.ts | 3 +++ backend/src/pipeline/types.ts | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/backend/src/mastra/tools/investigate-tool.ts b/backend/src/mastra/tools/investigate-tool.ts index 7f56c43..7746ef2 100644 --- a/backend/src/mastra/tools/investigate-tool.ts +++ b/backend/src/mastra/tools/investigate-tool.ts @@ -12,6 +12,9 @@ const investigateInputSchema = z.object({ ), primary_keys: z .record(z.string(), z.string()) + .refine((v) => Object.keys(v).length > 0, { + message: "primary_keys must include at least one primary-key value", + }) .describe( "REQUIRED: the primary key column value(s) for this entity. e.g. {\"Company Name\": \"Stripe\"} or {\"First Name\": \"John\", \"Last Name\": \"Doe\"}. You MUST provide at least the primary key values you have found.", ), diff --git a/backend/src/pipeline/types.ts b/backend/src/pipeline/types.ts index 0968c2c..e0b95f9 100644 --- a/backend/src/pipeline/types.ts +++ b/backend/src/pipeline/types.ts @@ -61,10 +61,12 @@ export const datasetSchemaSchema = z } const pkNames = pkCols.map((c) => c.name); - const declaredPk = Array.isArray(data.primary_key) + const declaredPkRaw = Array.isArray(data.primary_key) ? data.primary_key : [data.primary_key]; + const declaredPk = [...new Set(declaredPkRaw)]; if ( + declaredPk.length !== declaredPkRaw.length || declaredPk.length !== pkNames.length || !declaredPk.every((n) => pkNames.includes(n)) ) {