diff --git a/figure-claim-consistency-assistant/README.md b/figure-claim-consistency-assistant/README.md new file mode 100644 index 0000000..dbe0c39 --- /dev/null +++ b/figure-claim-consistency-assistant/README.md @@ -0,0 +1,29 @@ +# Figure Claim Consistency Assistant + +This module is a focused slice for SCIBASE.AI issue #16, AI-Powered Research Assistant Suite. + +It acts as a pre-submission assistant for manuscript figures and tables. It cross-checks manuscript claims against synthetic figure and table metadata, then emits reviewer-ready tasks when visual evidence does not support the written claim. + +## What It Covers + +- Claim-to-figure and claim-to-table linkage. +- Axis unit, effect direction, effect magnitude, and sample-size consistency. +- Caption caveat propagation into manuscript claims. +- Public reproducibility artifact checks for source data, notebooks, benchmark logs, manifests, and deployment plans. +- Reviewer tasks, research-gap signals, JSON audit packets, Markdown review packets, SVG summaries, and a short MP4 demo artifact. + +The sample data is synthetic. The module is dependency-free and does not call external services. + +## Run + +```bash +node figure-claim-consistency-assistant/test.js +node figure-claim-consistency-assistant/demo.js +``` + +Demo output is written to: + +- `reports/claim-consistency-audit.json` +- `reports/review-packet.md` +- `reports/claim-consistency.svg` +- `reports/demo.mp4` diff --git a/figure-claim-consistency-assistant/demo.js b/figure-claim-consistency-assistant/demo.js new file mode 100644 index 0000000..b105db0 --- /dev/null +++ b/figure-claim-consistency-assistant/demo.js @@ -0,0 +1,25 @@ +const fs = require("node:fs"); +const path = require("node:path"); +const data = require("./sample-data"); +const { renderMarkdown, renderSvg, runAssistant } = require("./index"); + +const report = runAssistant(data); +const reportsDir = path.join(__dirname, "reports"); +fs.mkdirSync(reportsDir, { recursive: true }); +fs.writeFileSync(path.join(reportsDir, "claim-consistency-audit.json"), JSON.stringify(report, null, 2)); +fs.writeFileSync(path.join(reportsDir, "review-packet.md"), renderMarkdown(report)); +fs.writeFileSync(path.join(reportsDir, "claim-consistency.svg"), renderSvg(report)); + +console.log(JSON.stringify({ + decision: report.summary.releaseStatus, + averageScore: report.summary.averageScore, + issueCount: report.summary.totalIssues, + highRiskClaims: report.summary.highestRiskClaims, + auditDigest: report.auditDigest, + reports: [ + "reports/claim-consistency-audit.json", + "reports/review-packet.md", + "reports/claim-consistency.svg", + "reports/demo.mp4" + ] +}, null, 2)); diff --git a/figure-claim-consistency-assistant/index.js b/figure-claim-consistency-assistant/index.js new file mode 100644 index 0000000..7bc67c5 --- /dev/null +++ b/figure-claim-consistency-assistant/index.js @@ -0,0 +1,396 @@ +const crypto = require("node:crypto"); + +const SEVERITY_WEIGHT = { + blocker: 34, + high: 22, + medium: 12, + low: 5 +}; + +function round(value, digits = 2) { + const factor = 10 ** digits; + return Math.round(value * factor) / factor; +} + +function toIdMap(items) { + return new Map(items.map((item) => [item.id, item])); +} + +function artifactTypes(artifacts) { + return new Set(artifacts.filter((artifact) => artifact.public).map((artifact) => artifact.type)); +} + +function matchesDirection(expected, observed, threshold, reportedEffect) { + if (expected === observed) return true; + if (expected === "stable") return Math.abs(reportedEffect) <= threshold && Math.abs(reportedEffect - observedEffectValue(observed)) <= threshold; + if (expected === "below-threshold") return observed === "below-threshold"; + if (expected === "at-least") return observed === "at-least"; + return false; +} + +function observedEffectValue(direction) { + if (direction === "increase" || direction === "at-least") return 1; + if (direction === "decrease" || direction === "above-threshold") return -1; + return 0; +} + +function effectWithinTolerance(claim, observedEffect) { + if (claim.expectedDirection === "below-threshold") return observedEffect <= claim.reportedEffect; + if (claim.expectedDirection === "at-least") return observedEffect >= claim.reportedEffect; + const tolerance = Math.max(0.03, Math.abs(claim.reportedEffect) * 0.12); + return Math.abs(claim.reportedEffect - observedEffect) <= tolerance; +} + +function addIssue(issues, severity, code, message, task) { + issues.push({ severity, code, message, task }); +} + +function evaluateFigure(claim, figure, publicArtifactTypes) { + const issues = []; + if (!figure) { + addIssue( + issues, + "blocker", + "missing-figure", + `Claim ${claim.id} cites a figure that is not present in the packet.`, + "Attach the referenced figure or remove the unsupported claim before submission." + ); + return issues; + } + + if (figure.metric !== claim.metric) { + addIssue( + issues, + "high", + "figure-metric-mismatch", + `${figure.id} tracks ${figure.metric}, but claim ${claim.id} expects ${claim.metric}.`, + "Route the claim to the correct figure or regenerate the figure from the matching endpoint." + ); + } + + if (figure.axis?.yUnit !== claim.expectedUnit) { + addIssue( + issues, + "high", + "axis-unit-mismatch", + `${figure.id} uses ${figure.axis?.yUnit || "unknown units"} while claim ${claim.id} reports ${claim.expectedUnit}.`, + "Normalize units in the caption, manuscript claim, and source table." + ); + } + + for (const panel of figure.panels || []) { + if (!matchesDirection(claim.expectedDirection, panel.direction, 0.02, claim.reportedEffect)) { + addIssue( + issues, + "high", + "effect-direction-mismatch", + `${figure.id}${panel.id} shows ${panel.direction}, but claim ${claim.id} says ${claim.expectedDirection}.`, + "Revise the claim language or regenerate the panel from the intended analysis." + ); + } + + if (!effectWithinTolerance(claim, panel.effect)) { + addIssue( + issues, + "medium", + "effect-size-drift", + `${figure.id}${panel.id} effect ${panel.effect} does not match reported claim effect ${claim.reportedEffect}.`, + "Update the manuscript effect size or regenerate the plotted estimate." + ); + } + + if (Math.abs((panel.sampleSize || 0) - claim.reportedSampleSize) > 2) { + addIssue( + issues, + "medium", + "sample-size-drift", + `${figure.id}${panel.id} uses n=${panel.sampleSize}; claim ${claim.id} reports n=${claim.reportedSampleSize}.`, + "Add a sample-size reconciliation note and align the figure/table/manuscript counts." + ); + } + + if (panel.caveat && !claim.caveats.includes(panel.caveat)) { + addIssue( + issues, + "medium", + "caption-caveat-missing-from-claim", + `${figure.id}${panel.id} caveat "${panel.caveat}" is absent from claim ${claim.id}.`, + "Carry the figure caveat into the manuscript claim or downgrade the conclusion." + ); + } + + if (panel.colorEncodingSafe === false) { + addIssue( + issues, + "low", + "visual-accessibility-risk", + `${figure.id}${panel.id} uses an unsafe color encoding for review.`, + "Switch to a redundant encoding or color-safe palette before submission." + ); + } + + if (panel.sourceDataId && !publicArtifactTypes.has("source-data")) { + addIssue( + issues, + "high", + "source-data-not-public", + `${figure.id}${panel.id} references source data that is not public.`, + "Publish the source-data artifact or mark the claim as internally unverified." + ); + } + + if (!panel.analysisNotebookId && claim.requiredArtifacts.includes("analysis-notebook")) { + addIssue( + issues, + "high", + "missing-analysis-notebook", + `${figure.id}${panel.id} has no linked analysis notebook.`, + "Attach the notebook that produces this figure and rerun the assistant." + ); + } + } + + return issues; +} + +function evaluateTable(claim, table) { + const issues = []; + if (!table) { + addIssue( + issues, + "high", + "missing-table", + `Claim ${claim.id} cites a table that is not present in the packet.`, + "Attach the referenced table or remove the table citation." + ); + return issues; + } + + const rows = (table.rows || []).filter((row) => row.claimId === claim.id || row.metric === claim.metric); + if (rows.length === 0) { + addIssue( + issues, + "high", + "table-row-missing", + `${table.id} has no row supporting claim ${claim.id}.`, + "Add a claim-linked table row with metric, unit, sample size, and adjustment notes." + ); + } + + for (const row of rows) { + if (row.unit !== claim.expectedUnit) { + addIssue( + issues, + "medium", + "table-unit-mismatch", + `${table.id} reports ${row.unit}, but claim ${claim.id} reports ${claim.expectedUnit}.`, + "Normalize the table row unit or update the claim." + ); + } + + if (!effectWithinTolerance(claim, row.effect)) { + addIssue( + issues, + "medium", + "table-effect-size-drift", + `${table.id} row effect ${row.effect} does not match claim effect ${claim.reportedEffect}.`, + "Reconcile the table value and manuscript claim." + ); + } + + if (Math.abs((row.sampleSize || 0) - claim.reportedSampleSize) > 2) { + addIssue( + issues, + "medium", + "table-sample-size-drift", + `${table.id} row uses n=${row.sampleSize}; claim ${claim.id} reports n=${claim.reportedSampleSize}.`, + "Align table and manuscript denominators." + ); + } + + if (row.adjustedPValue === null && /subgroup|scanner|cohort/i.test(claim.text)) { + addIssue( + issues, + "medium", + "multiplicity-adjustment-missing", + `${table.id} row for claim ${claim.id} lacks multiplicity-adjusted significance evidence.`, + "Add adjusted p-values or downgrade the subgroup conclusion." + ); + } + } + + return issues; +} + +function missingArtifacts(claim, publicArtifactTypes) { + return claim.requiredArtifacts + .filter((artifactType) => !publicArtifactTypes.has(artifactType)) + .map((artifactType) => ({ + severity: "high", + code: "required-artifact-missing", + message: `Claim ${claim.id} requires public ${artifactType}, but none is attached.`, + task: `Attach a public ${artifactType} artifact before submission.` + })); +} + +function scoreIssues(issues) { + const penalty = issues.reduce((total, issue) => total + SEVERITY_WEIGHT[issue.severity], 0); + return Math.max(0, 100 - penalty); +} + +function statusFromScore(score, issues) { + if (issues.some((issue) => issue.severity === "blocker")) return "block-release"; + if (score < 60) return "major-revision"; + if (score < 82) return "minor-revision"; + return "ready-with-notes"; +} + +function evaluateClaim(claim, context) { + const figureIssues = claim.figureRefs.flatMap((ref) => evaluateFigure(claim, context.figureMap.get(ref), context.publicArtifactTypes)); + const tableIssues = claim.tableRefs.flatMap((ref) => evaluateTable(claim, context.tableMap.get(ref))); + const artifactIssues = missingArtifacts(claim, context.publicArtifactTypes); + const issues = [...figureIssues, ...tableIssues, ...artifactIssues]; + const score = scoreIssues(issues); + return { + claimId: claim.id, + claim: claim.text, + score, + status: statusFromScore(score, issues), + issueCount: issues.length, + issues, + reviewerTasks: issues.map((issue) => issue.task) + }; +} + +function buildResearchGapSignals(claimReports) { + return claimReports + .filter((report) => report.status !== "ready-with-notes") + .map((report) => ({ + claimId: report.claimId, + opportunity: `Run a targeted follow-up study for ${report.claimId} before broadening the conclusion.`, + rationale: report.issues.slice(0, 2).map((issue) => issue.message).join(" "), + priority: report.issues.some((issue) => issue.severity === "blocker" || issue.severity === "high") ? "high" : "medium" + })); +} + +function summarizeReports(claimReports) { + const allIssues = claimReports.flatMap((report) => report.issues); + const countsBySeverity = allIssues.reduce((counts, issue) => { + counts[issue.severity] = (counts[issue.severity] || 0) + 1; + return counts; + }, {}); + const averageScore = round(claimReports.reduce((sum, report) => sum + report.score, 0) / claimReports.length); + const releaseStatus = claimReports.some((report) => report.status === "block-release") + ? "block-release" + : averageScore < 70 + ? "major-revision" + : "ready-with-notes"; + return { + averageScore, + releaseStatus, + totalIssues: allIssues.length, + countsBySeverity, + highestRiskClaims: claimReports + .slice() + .sort((a, b) => a.score - b.score) + .slice(0, 3) + .map((report) => ({ claimId: report.claimId, score: report.score, status: report.status })) + }; +} + +function auditDigest(payload) { + return crypto.createHash("sha256").update(JSON.stringify(payload)).digest("hex"); +} + +function runAssistant(data) { + const context = { + figureMap: toIdMap(data.figures), + tableMap: toIdMap(data.tables), + publicArtifactTypes: artifactTypes(data.artifacts) + }; + const claimReports = data.manuscript.claims.map((claim) => evaluateClaim(claim, context)); + const summary = summarizeReports(claimReports); + const gapSignals = buildResearchGapSignals(claimReports); + const reviewPacket = { + manuscript: { + id: data.manuscript.id, + title: data.manuscript.title, + domain: data.manuscript.domain + }, + assistant: "figure-claim-consistency-assistant", + summary, + claimReports, + gapSignals + }; + return { + ...reviewPacket, + auditDigest: auditDigest(reviewPacket) + }; +} + +function renderMarkdown(report) { + const lines = [ + `# Figure Claim Consistency Review: ${report.manuscript.title}`, + "", + `Decision: **${report.summary.releaseStatus}**`, + `Average score: **${report.summary.averageScore}**`, + `Total issues: **${report.summary.totalIssues}**`, + "", + "## Highest Risk Claims", + ...report.summary.highestRiskClaims.map((claim) => `- ${claim.claimId}: ${claim.status} (${claim.score})`), + "", + "## Reviewer Tasks" + ]; + for (const claim of report.claimReports) { + lines.push("", `### ${claim.claimId}`, `Score: ${claim.score}`, `Status: ${claim.status}`); + if (claim.issues.length === 0) { + lines.push("- No blocking figure/table inconsistencies found."); + } else { + for (const issue of claim.issues) { + lines.push(`- [${issue.severity}] ${issue.message}`); + lines.push(` Task: ${issue.task}`); + } + } + } + lines.push("", "## Research Gap Signals"); + for (const gap of report.gapSignals) { + lines.push(`- ${gap.claimId} (${gap.priority}): ${gap.opportunity}`); + } + lines.push("", `Audit digest: \`${report.auditDigest}\``); + return `${lines.join("\n")}\n`; +} + +function renderSvg(report) { + const width = 960; + const rowHeight = 82; + const height = 160 + report.claimReports.length * rowHeight; + const rows = report.claimReports.map((claim, index) => { + const y = 132 + index * rowHeight; + const barWidth = Math.max(16, claim.score * 4.8); + const color = claim.score >= 82 ? "#2f855a" : claim.score >= 60 ? "#b7791f" : "#c53030"; + return [ + `${claim.claimId}`, + ``, + ``, + `${claim.status} (${claim.score})`, + `${claim.issueCount} issue(s)` + ].join(""); + }).join(""); + return [ + ``, + ``, + ``, + `Figure/Table Claim Consistency Assistant`, + `Decision: ${report.summary.releaseStatus} | Average score: ${report.summary.averageScore} | Audit ${report.auditDigest.slice(0, 12)}`, + rows, + `` + ].join(""); +} + +module.exports = { + runAssistant, + renderMarkdown, + renderSvg, + evaluateClaim, + effectWithinTolerance +}; diff --git a/figure-claim-consistency-assistant/reports/claim-consistency-audit.json b/figure-claim-consistency-assistant/reports/claim-consistency-audit.json new file mode 100644 index 0000000..92e38ae --- /dev/null +++ b/figure-claim-consistency-assistant/reports/claim-consistency-audit.json @@ -0,0 +1,234 @@ +{ + "manuscript": { + "id": "sci-vision-042", + "title": "Adaptive Imaging Biomarkers for Early Neuroinflammation", + "domain": "clinical-imaging" + }, + "assistant": "figure-claim-consistency-assistant", + "summary": { + "averageScore": 25, + "releaseStatus": "block-release", + "totalIssues": 19, + "countsBySeverity": { + "high": 8, + "medium": 9, + "low": 1, + "blocker": 1 + }, + "highestRiskClaims": [ + { + "claimId": "C2", + "score": 0, + "status": "major-revision" + }, + { + "claimId": "C3", + "score": 0, + "status": "major-revision" + }, + { + "claimId": "C4", + "score": 0, + "status": "block-release" + } + ] + }, + "claimReports": [ + { + "claimId": "C1", + "claim": "The adaptive imaging panel improved lesion detection by 18% across all enrolled cohorts.", + "score": 100, + "status": "ready-with-notes", + "issueCount": 0, + "issues": [], + "reviewerTasks": [] + }, + { + "claimId": "C2", + "claim": "The model remained stable in the low-signal scanner subgroup.", + "score": 0, + "status": "major-revision", + "issueCount": 9, + "issues": [ + { + "severity": "high", + "code": "effect-direction-mismatch", + "message": "fig-2A shows decrease, but claim C2 says stable.", + "task": "Revise the claim language or regenerate the panel from the intended analysis." + }, + { + "severity": "medium", + "code": "effect-size-drift", + "message": "fig-2A effect -0.06 does not match reported claim effect 0.01.", + "task": "Update the manuscript effect size or regenerate the plotted estimate." + }, + { + "severity": "medium", + "code": "sample-size-drift", + "message": "fig-2A uses n=47; claim C2 reports n=64.", + "task": "Add a sample-size reconciliation note and align the figure/table/manuscript counts." + }, + { + "severity": "medium", + "code": "caption-caveat-missing-from-claim", + "message": "fig-2A caveat \"exploratory subgroup\" is absent from claim C2.", + "task": "Carry the figure caveat into the manuscript claim or downgrade the conclusion." + }, + { + "severity": "low", + "code": "visual-accessibility-risk", + "message": "fig-2A uses an unsafe color encoding for review.", + "task": "Switch to a redundant encoding or color-safe palette before submission." + }, + { + "severity": "medium", + "code": "table-effect-size-drift", + "message": "table-2 row effect -0.06 does not match claim effect 0.01.", + "task": "Reconcile the table value and manuscript claim." + }, + { + "severity": "medium", + "code": "table-sample-size-drift", + "message": "table-2 row uses n=47; claim C2 reports n=64.", + "task": "Align table and manuscript denominators." + }, + { + "severity": "medium", + "code": "multiplicity-adjustment-missing", + "message": "table-2 row for claim C2 lacks multiplicity-adjusted significance evidence.", + "task": "Add adjusted p-values or downgrade the subgroup conclusion." + }, + { + "severity": "high", + "code": "required-artifact-missing", + "message": "Claim C2 requires public subgroup-manifest, but none is attached.", + "task": "Attach a public subgroup-manifest artifact before submission." + } + ], + "reviewerTasks": [ + "Revise the claim language or regenerate the panel from the intended analysis.", + "Update the manuscript effect size or regenerate the plotted estimate.", + "Add a sample-size reconciliation note and align the figure/table/manuscript counts.", + "Carry the figure caveat into the manuscript claim or downgrade the conclusion.", + "Switch to a redundant encoding or color-safe palette before submission.", + "Reconcile the table value and manuscript claim.", + "Align table and manuscript denominators.", + "Add adjusted p-values or downgrade the subgroup conclusion.", + "Attach a public subgroup-manifest artifact before submission." + ] + }, + { + "claimId": "C3", + "claim": "Runtime overhead stayed below five minutes per scan.", + "score": 0, + "status": "major-revision", + "issueCount": 6, + "issues": [ + { + "severity": "high", + "code": "axis-unit-mismatch", + "message": "fig-3 uses seconds while claim C3 reports minutes.", + "task": "Normalize units in the caption, manuscript claim, and source table." + }, + { + "severity": "high", + "code": "effect-direction-mismatch", + "message": "fig-3A shows above-threshold, but claim C3 says below-threshold.", + "task": "Revise the claim language or regenerate the panel from the intended analysis." + }, + { + "severity": "medium", + "code": "effect-size-drift", + "message": "fig-3A effect 7.8 does not match reported claim effect 5.", + "task": "Update the manuscript effect size or regenerate the plotted estimate." + }, + { + "severity": "medium", + "code": "caption-caveat-missing-from-claim", + "message": "fig-3A caveat \"GPU queue warm cache\" is absent from claim C3.", + "task": "Carry the figure caveat into the manuscript claim or downgrade the conclusion." + }, + { + "severity": "high", + "code": "missing-analysis-notebook", + "message": "fig-3A has no linked analysis notebook.", + "task": "Attach the notebook that produces this figure and rerun the assistant." + }, + { + "severity": "medium", + "code": "table-effect-size-drift", + "message": "table-3 row effect 7.8 does not match claim effect 5.", + "task": "Reconcile the table value and manuscript claim." + } + ], + "reviewerTasks": [ + "Normalize units in the caption, manuscript claim, and source table.", + "Revise the claim language or regenerate the panel from the intended analysis.", + "Update the manuscript effect size or regenerate the plotted estimate.", + "Carry the figure caveat into the manuscript claim or downgrade the conclusion.", + "Attach the notebook that produces this figure and rerun the assistant.", + "Reconcile the table value and manuscript claim." + ] + }, + { + "claimId": "C4", + "claim": "The proposed assistant is ready for multi-site prospective deployment.", + "score": 0, + "status": "block-release", + "issueCount": 4, + "issues": [ + { + "severity": "blocker", + "code": "missing-figure", + "message": "Claim C4 cites a figure that is not present in the packet.", + "task": "Attach the referenced figure or remove the unsupported claim before submission." + }, + { + "severity": "high", + "code": "missing-table", + "message": "Claim C4 cites a table that is not present in the packet.", + "task": "Attach the referenced table or remove the table citation." + }, + { + "severity": "high", + "code": "required-artifact-missing", + "message": "Claim C4 requires public deployment-protocol, but none is attached.", + "task": "Attach a public deployment-protocol artifact before submission." + }, + { + "severity": "high", + "code": "required-artifact-missing", + "message": "Claim C4 requires public monitoring-plan, but none is attached.", + "task": "Attach a public monitoring-plan artifact before submission." + } + ], + "reviewerTasks": [ + "Attach the referenced figure or remove the unsupported claim before submission.", + "Attach the referenced table or remove the table citation.", + "Attach a public deployment-protocol artifact before submission.", + "Attach a public monitoring-plan artifact before submission." + ] + } + ], + "gapSignals": [ + { + "claimId": "C2", + "opportunity": "Run a targeted follow-up study for C2 before broadening the conclusion.", + "rationale": "fig-2A shows decrease, but claim C2 says stable. fig-2A effect -0.06 does not match reported claim effect 0.01.", + "priority": "high" + }, + { + "claimId": "C3", + "opportunity": "Run a targeted follow-up study for C3 before broadening the conclusion.", + "rationale": "fig-3 uses seconds while claim C3 reports minutes. fig-3A shows above-threshold, but claim C3 says below-threshold.", + "priority": "high" + }, + { + "claimId": "C4", + "opportunity": "Run a targeted follow-up study for C4 before broadening the conclusion.", + "rationale": "Claim C4 cites a figure that is not present in the packet. Claim C4 cites a table that is not present in the packet.", + "priority": "high" + } + ], + "auditDigest": "c41b98366bd40d07468f7fc416e0ccd099a84730651cb750d5acc4b15b1715ff" +} \ No newline at end of file diff --git a/figure-claim-consistency-assistant/reports/claim-consistency.svg b/figure-claim-consistency-assistant/reports/claim-consistency.svg new file mode 100644 index 0000000..4698247 --- /dev/null +++ b/figure-claim-consistency-assistant/reports/claim-consistency.svg @@ -0,0 +1 @@ +Figure/Table Claim Consistency AssistantDecision: block-release | Average score: 25 | Audit c41b98366bd4C1ready-with-notes (100)0 issue(s)C2major-revision (0)9 issue(s)C3major-revision (0)6 issue(s)C4block-release (0)4 issue(s) \ No newline at end of file diff --git a/figure-claim-consistency-assistant/reports/demo.mp4 b/figure-claim-consistency-assistant/reports/demo.mp4 new file mode 100644 index 0000000..e98195d Binary files /dev/null and b/figure-claim-consistency-assistant/reports/demo.mp4 differ diff --git a/figure-claim-consistency-assistant/reports/review-packet.md b/figure-claim-consistency-assistant/reports/review-packet.md new file mode 100644 index 0000000..2c448d1 --- /dev/null +++ b/figure-claim-consistency-assistant/reports/review-packet.md @@ -0,0 +1,74 @@ +# Figure Claim Consistency Review: Adaptive Imaging Biomarkers for Early Neuroinflammation + +Decision: **block-release** +Average score: **25** +Total issues: **19** + +## Highest Risk Claims +- C2: major-revision (0) +- C3: major-revision (0) +- C4: block-release (0) + +## Reviewer Tasks + +### C1 +Score: 100 +Status: ready-with-notes +- No blocking figure/table inconsistencies found. + +### C2 +Score: 0 +Status: major-revision +- [high] fig-2A shows decrease, but claim C2 says stable. + Task: Revise the claim language or regenerate the panel from the intended analysis. +- [medium] fig-2A effect -0.06 does not match reported claim effect 0.01. + Task: Update the manuscript effect size or regenerate the plotted estimate. +- [medium] fig-2A uses n=47; claim C2 reports n=64. + Task: Add a sample-size reconciliation note and align the figure/table/manuscript counts. +- [medium] fig-2A caveat "exploratory subgroup" is absent from claim C2. + Task: Carry the figure caveat into the manuscript claim or downgrade the conclusion. +- [low] fig-2A uses an unsafe color encoding for review. + Task: Switch to a redundant encoding or color-safe palette before submission. +- [medium] table-2 row effect -0.06 does not match claim effect 0.01. + Task: Reconcile the table value and manuscript claim. +- [medium] table-2 row uses n=47; claim C2 reports n=64. + Task: Align table and manuscript denominators. +- [medium] table-2 row for claim C2 lacks multiplicity-adjusted significance evidence. + Task: Add adjusted p-values or downgrade the subgroup conclusion. +- [high] Claim C2 requires public subgroup-manifest, but none is attached. + Task: Attach a public subgroup-manifest artifact before submission. + +### C3 +Score: 0 +Status: major-revision +- [high] fig-3 uses seconds while claim C3 reports minutes. + Task: Normalize units in the caption, manuscript claim, and source table. +- [high] fig-3A shows above-threshold, but claim C3 says below-threshold. + Task: Revise the claim language or regenerate the panel from the intended analysis. +- [medium] fig-3A effect 7.8 does not match reported claim effect 5. + Task: Update the manuscript effect size or regenerate the plotted estimate. +- [medium] fig-3A caveat "GPU queue warm cache" is absent from claim C3. + Task: Carry the figure caveat into the manuscript claim or downgrade the conclusion. +- [high] fig-3A has no linked analysis notebook. + Task: Attach the notebook that produces this figure and rerun the assistant. +- [medium] table-3 row effect 7.8 does not match claim effect 5. + Task: Reconcile the table value and manuscript claim. + +### C4 +Score: 0 +Status: block-release +- [blocker] Claim C4 cites a figure that is not present in the packet. + Task: Attach the referenced figure or remove the unsupported claim before submission. +- [high] Claim C4 cites a table that is not present in the packet. + Task: Attach the referenced table or remove the table citation. +- [high] Claim C4 requires public deployment-protocol, but none is attached. + Task: Attach a public deployment-protocol artifact before submission. +- [high] Claim C4 requires public monitoring-plan, but none is attached. + Task: Attach a public monitoring-plan artifact before submission. + +## Research Gap Signals +- C2 (high): Run a targeted follow-up study for C2 before broadening the conclusion. +- C3 (high): Run a targeted follow-up study for C3 before broadening the conclusion. +- C4 (high): Run a targeted follow-up study for C4 before broadening the conclusion. + +Audit digest: `c41b98366bd40d07468f7fc416e0ccd099a84730651cb750d5acc4b15b1715ff` diff --git a/figure-claim-consistency-assistant/requirements-map.md b/figure-claim-consistency-assistant/requirements-map.md new file mode 100644 index 0000000..0737ef5 --- /dev/null +++ b/figure-claim-consistency-assistant/requirements-map.md @@ -0,0 +1,16 @@ +# Requirements Map + +Source: SCIBASE.AI issue #16, AI-Powered Research Assistant Suite. + +| Requirement | Implementation | +| --- | --- | +| Auto peer review reports | `runAssistant()` emits reviewer-ready findings, severity labels, and correction tasks for figure/table evidence problems. | +| Claims vs. evidence alignment | `evaluateFigure()` and `evaluateTable()` compare claim text metadata with figure panels and table rows. | +| Statistical or methodological red flags | The assistant flags sample-size drift, direction mismatches, missing multiplicity adjustment, and threshold violations. | +| Clarity and coherence checks | Caption caveats, axis units, and table units are reconciled against manuscript claims. | +| Reproducibility checker | Required public artifact types are checked before a claim can be marked ready. | +| Dependency/version integrity equivalent | Audit packets include deterministic SHA-256 digests of the review payload. | +| Research gap finder | `buildResearchGapSignals()` converts unsupported visual claims into targeted follow-up study opportunities. | +| Adaptive templates per domain | Synthetic input includes manuscript domain metadata and claim-specific evidence requirements. | +| Reviewer-facing output | Demo emits JSON, Markdown, SVG, and MP4 artifacts under `reports/`. | +| Local verification | `test.js` covers release decision, claim scoring, issue detection, Markdown output, SVG output, and threshold helper behavior. | diff --git a/figure-claim-consistency-assistant/sample-data.js b/figure-claim-consistency-assistant/sample-data.js new file mode 100644 index 0000000..2af5b99 --- /dev/null +++ b/figure-claim-consistency-assistant/sample-data.js @@ -0,0 +1,181 @@ +const manuscript = { + id: "sci-vision-042", + title: "Adaptive Imaging Biomarkers for Early Neuroinflammation", + domain: "clinical-imaging", + claims: [ + { + id: "C1", + text: "The adaptive imaging panel improved lesion detection by 18% across all enrolled cohorts.", + figureRefs: ["fig-1"], + tableRefs: ["table-1"], + metric: "lesion_detection_delta", + expectedDirection: "increase", + expectedUnit: "percentage-points", + reportedEffect: 18, + reportedSampleSize: 240, + caveats: ["external validation pending"], + requiredArtifacts: ["analysis-notebook", "source-data"] + }, + { + id: "C2", + text: "The model remained stable in the low-signal scanner subgroup.", + figureRefs: ["fig-2"], + tableRefs: ["table-2"], + metric: "subgroup_auc_delta", + expectedDirection: "stable", + expectedUnit: "auc-delta", + reportedEffect: 0.01, + reportedSampleSize: 64, + caveats: [], + requiredArtifacts: ["subgroup-manifest", "analysis-notebook"] + }, + { + id: "C3", + text: "Runtime overhead stayed below five minutes per scan.", + figureRefs: ["fig-3"], + tableRefs: ["table-3"], + metric: "runtime_overhead_minutes", + expectedDirection: "below-threshold", + expectedUnit: "minutes", + reportedEffect: 5, + reportedSampleSize: 240, + caveats: [], + requiredArtifacts: ["benchmark-log", "source-data", "analysis-notebook"] + }, + { + id: "C4", + text: "The proposed assistant is ready for multi-site prospective deployment.", + figureRefs: ["fig-4"], + tableRefs: ["table-4"], + metric: "prospective_site_count", + expectedDirection: "at-least", + expectedUnit: "sites", + reportedEffect: 5, + reportedSampleSize: 5, + caveats: ["IRB amendments required"], + requiredArtifacts: ["deployment-protocol", "monitoring-plan"] + } + ] +}; + +const figures = [ + { + id: "fig-1", + title: "Lesion detection improvement", + caption: "Panel A shows a held-out validation comparison. External validation is pending.", + metric: "lesion_detection_delta", + axis: { xUnit: "cohort", yUnit: "percentage-points", yLabel: "Detection delta" }, + panels: [ + { + id: "A", + direction: "increase", + effect: 17.6, + sampleSize: 238, + caveat: "external validation pending", + sourceDataId: "source-data", + analysisNotebookId: "analysis-notebook", + colorEncodingSafe: true + } + ] + }, + { + id: "fig-2", + title: "Scanner subgroup robustness", + caption: "Low-signal scanner results are exploratory.", + metric: "subgroup_auc_delta", + axis: { xUnit: "scanner subgroup", yUnit: "auc-delta", yLabel: "AUC delta" }, + panels: [ + { + id: "A", + direction: "decrease", + effect: -0.06, + sampleSize: 47, + caveat: "exploratory subgroup", + sourceDataId: "source-data", + analysisNotebookId: "analysis-notebook", + colorEncodingSafe: false + } + ] + }, + { + id: "fig-3", + title: "Runtime overhead by scan", + caption: "Measured on the production GPU queue.", + metric: "runtime_overhead_minutes", + axis: { xUnit: "scan", yUnit: "seconds", yLabel: "Runtime overhead" }, + panels: [ + { + id: "A", + direction: "above-threshold", + effect: 7.8, + sampleSize: 240, + caveat: "GPU queue warm cache", + sourceDataId: "source-data", + analysisNotebookId: null, + colorEncodingSafe: true + } + ] + } +]; + +const tables = [ + { + id: "table-1", + title: "Primary detection endpoints", + rows: [ + { + claimId: "C1", + metric: "lesion_detection_delta", + effect: 17.6, + unit: "percentage-points", + sampleSize: 238, + adjustedPValue: 0.018, + notes: "External validation pending." + } + ] + }, + { + id: "table-2", + title: "Scanner subgroup analysis", + rows: [ + { + claimId: "C2", + metric: "subgroup_auc_delta", + effect: -0.06, + unit: "auc-delta", + sampleSize: 47, + adjustedPValue: null, + notes: "Exploratory subgroup; multiplicity adjustment not complete." + } + ] + }, + { + id: "table-3", + title: "Runtime benchmarks", + rows: [ + { + claimId: "C3", + metric: "runtime_overhead_minutes", + effect: 7.8, + unit: "minutes", + sampleSize: 240, + adjustedPValue: null, + notes: "Benchmark log available; analysis notebook missing." + } + ] + } +]; + +const artifacts = [ + { id: "source-data", type: "source-data", hash: "sha256:source-vision-042", public: true }, + { id: "analysis-notebook", type: "analysis-notebook", hash: "sha256:notebook-vision-042", public: true }, + { id: "benchmark-log", type: "benchmark-log", hash: "sha256:runtime-log-042", public: true }, + { id: "subgroup-manifest", type: "subgroup-manifest", hash: "sha256:subgroup-042", public: false } +]; + +module.exports = { + manuscript, + figures, + tables, + artifacts +}; diff --git a/figure-claim-consistency-assistant/test.js b/figure-claim-consistency-assistant/test.js new file mode 100644 index 0000000..5201281 --- /dev/null +++ b/figure-claim-consistency-assistant/test.js @@ -0,0 +1,41 @@ +const assert = require("node:assert/strict"); +const data = require("./sample-data"); +const { effectWithinTolerance, renderMarkdown, renderSvg, runAssistant } = require("./index"); + +const report = runAssistant(data); +const byClaim = new Map(report.claimReports.map((claim) => [claim.claimId, claim])); + +assert.equal(report.assistant, "figure-claim-consistency-assistant"); +assert.equal(report.summary.releaseStatus, "block-release"); +assert.equal(report.claimReports.length, 4); +assert.match(report.auditDigest, /^[a-f0-9]{64}$/); + +assert.equal(byClaim.get("C1").status, "ready-with-notes"); +assert.ok(byClaim.get("C1").score >= 82); + +assert.equal(byClaim.get("C2").status, "major-revision"); +assert.ok(byClaim.get("C2").issues.some((issue) => issue.code === "effect-direction-mismatch")); +assert.ok(byClaim.get("C2").issues.some((issue) => issue.code === "multiplicity-adjustment-missing")); +assert.ok(byClaim.get("C2").issues.some((issue) => issue.code === "visual-accessibility-risk")); + +assert.equal(byClaim.get("C3").status, "major-revision"); +assert.ok(byClaim.get("C3").issues.some((issue) => issue.code === "axis-unit-mismatch")); +assert.ok(byClaim.get("C3").issues.some((issue) => issue.code === "missing-analysis-notebook")); + +assert.equal(byClaim.get("C4").status, "block-release"); +assert.ok(byClaim.get("C4").issues.some((issue) => issue.code === "missing-figure")); +assert.ok(byClaim.get("C4").issues.some((issue) => issue.code === "missing-table")); +assert.ok(byClaim.get("C4").issues.some((issue) => issue.code === "required-artifact-missing")); + +assert.equal(effectWithinTolerance({ expectedDirection: "below-threshold", reportedEffect: 5 }, 4.9), true); +assert.equal(effectWithinTolerance({ expectedDirection: "below-threshold", reportedEffect: 5 }, 6.1), false); + +const markdown = renderMarkdown(report); +assert.ok(markdown.includes("Figure Claim Consistency Review")); +assert.ok(markdown.includes("Audit digest")); + +const svg = renderSvg(report); +assert.ok(svg.startsWith("