Skip to content

Commit 757a378

Browse files
drAbreuclaude
andcommitted
Strengthen panel label guardrails in QC pipeline
Prepend panel label constraint to system prompt and add post-response filter that discards panels with labels not in the expected set, preventing downstream issues from AI-invented subdivisions or descriptive labels. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 252cbdb commit 757a378

4 files changed

Lines changed: 61 additions & 10 deletions

File tree

config.qc.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
qc_version: "2.3.2"
1+
qc_version: "2.3.3"
22
qc_check_metadata:
33
panel:
44
plot_axis_units:

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
44

55
[tool.poetry]
66
name = "soda-curation"
7-
version = "2.5.7"
7+
version = "2.5.8"
88
description = "A professional Python package for data curation with AI capabilities"
99
authors = ["Dr. Jorge Abreu Vicente <jorge.abreu@embo.org>"]
1010
license = "MIT"

src/soda_curation/qc/base_analyzers.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@ def analyze_figure(
9898
# Process and validate the response
9999
result = self.process_response(response)
100100

101+
# Filter out panels with labels not in expected_panels
102+
result = self._filter_valid_panels(result, expected_panels)
103+
101104
# Check if the test passed
102105
passed = self.check_test_passed(result)
103106

@@ -107,6 +110,47 @@ def analyze_figure(
107110
logger.error(f"Error analyzing figure with {self.test_name}: {str(e)}")
108111
return False, self.create_empty_result()
109112

113+
@staticmethod
114+
def _filter_valid_panels(
115+
result: Dict, expected_panels: Optional[List[str]]
116+
) -> Dict:
117+
"""Filter response outputs to only include panels with valid labels.
118+
119+
Args:
120+
result: Parsed response dict with an "outputs" list.
121+
expected_panels: Allowed panel labels for this figure.
122+
123+
Returns:
124+
The result dict with invalid-label outputs removed.
125+
"""
126+
if not expected_panels or not isinstance(result, dict):
127+
return result
128+
129+
outputs = result.get("outputs", [])
130+
if not outputs:
131+
return result
132+
133+
valid_outputs = []
134+
for panel in outputs:
135+
# Extract panel_label from dict or object
136+
if isinstance(panel, dict):
137+
label = panel.get("panel_label")
138+
elif hasattr(panel, "panel_label"):
139+
label = panel.panel_label
140+
else:
141+
label = None
142+
143+
if label in expected_panels:
144+
valid_outputs.append(panel)
145+
else:
146+
logger.warning(
147+
f"Discarded panel with invalid label '{label}' "
148+
f"(expected one of {expected_panels})"
149+
)
150+
151+
result["outputs"] = valid_outputs
152+
return result
153+
110154
def process_response(self, response: Any) -> Any:
111155
"""Process the response from the model API."""
112156
# If response is a string, try to parse it

src/soda_curation/qc/model_api.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -69,15 +69,22 @@ def generate_response(
6969

7070
# Add expected panels instruction if provided
7171
if expected_panels:
72-
panels_instruction = (
73-
f"\n\n**IMPORTANT CONSTRAINT**: The `panel_label` field in your response MUST be EXACTLY "
74-
f"one of the following valid panel labels for this figure: {expected_panels}. "
75-
f"Do NOT use any other labels, sub-panel labels (like 'A-a', 'A-b'), descriptive labels "
76-
f"(like 'Rice cell', 'Figure 8'), or panel labels with modifiers (like 'C (plot)', 'C (right)'). "
77-
f"Use ONLY the exact labels from this list: {expected_panels}"
72+
panels_constraint = (
73+
f"**CRITICAL PANEL LABEL CONSTRAINT**:\n"
74+
f"The output panel labels for this task MUST be exactly equal to "
75+
f"those defined for this figure. The allowed panel labels are: {expected_panels}\n"
76+
f"Rules:\n"
77+
f"- You MUST ONLY use panel labels from the list above.\n"
78+
f"- Do NOT invent new panel labels or subdivisions.\n"
79+
f"- Do NOT use sub-panel labels (e.g., 'A-a', 'A-b', 'A-l').\n"
80+
f"- Do NOT use descriptive labels (e.g., 'Rice cell', 'Figure 8').\n"
81+
f"- Do NOT add modifiers to labels (e.g., 'C (plot)', 'C (right)').\n"
82+
f"- Each `panel_label` in your response MUST be exactly one of: {expected_panels}\n"
7883
)
79-
system_prompt += panels_instruction
80-
user_prompt += panels_instruction
84+
# Prepend to system prompt so it acts as a ground rule
85+
system_prompt = panels_constraint + "\n" + system_prompt
86+
# Append to user prompt for reinforcement
87+
user_prompt += "\n\n" + panels_constraint
8188

8289
# Determine the type of analysis and create appropriate messages
8390
if encoded_image is not None and caption is not None:

0 commit comments

Comments
 (0)