diff --git a/analysis-variable-provenance-assistant/README.md b/analysis-variable-provenance-assistant/README.md
new file mode 100644
index 0000000..d36108f
--- /dev/null
+++ b/analysis-variable-provenance-assistant/README.md
@@ -0,0 +1,50 @@
+# Analysis Variable Provenance Assistant
+
+This is a focused AI-Powered Research Assistant Suite slice for SCIBASE issue #16. It audits whether manuscript analysis variables can be traced back to the project data dictionary, producing pipeline transforms, cohort filters, transform hashes, and prior reproducibility attempts.
+
+## Scope
+
+- Checks manuscript variables against data dictionary ids and aliases.
+- Detects unit drift between manuscript text and dictionary definitions.
+- Checks cohort-filter alignment between manuscript analyses and producing transforms.
+- Flags incomplete derived-variable lineage.
+- Detects stale transform hashes and failing or non-deterministic pipelines.
+- Links failed reproducibility attempts to affected manuscript analyses.
+- Emits reviewer-ready findings, priority actions, confidence scores, and stable digests.
+
+It intentionally does not duplicate broad assistant-suite submissions, protocol-trace modules, evidence-grounding checks, statistical methods review, research-gap planners, rebuttal packs, ethics checks, citation-context reconciliation, reporting-guideline compliance, benchmark-leakage audits, or figure/table consistency modules.
+
+## Run
+
+```powershell
+node analysis-variable-provenance-assistant/test.js
+node analysis-variable-provenance-assistant/demo.js
+```
+
+The demo writes:
+
+- `analysis-variable-provenance-assistant/demo-output/provenance-audit.json`
+- `analysis-variable-provenance-assistant/demo-output/demo.svg`
+
+This PR also includes the required short MP4 demo artifact:
+
+- `analysis-variable-provenance-assistant/demo-output/demo.mp4`
+
+## API
+
+```js
+const {
+ auditVariableProvenance,
+ buildReviewerReport,
+ createFindingDigest,
+} = require("./analysis-variable-provenance-assistant");
+
+const audit = auditVariableProvenance({
+ manuscript,
+ dataDictionary,
+ pipelines,
+ reproducibilityAttempts,
+});
+```
+
+`auditVariableProvenance` returns analysis-level packets with flags, findings, reviewer actions, reproducibility confidence, and deterministic finding digests.
diff --git a/analysis-variable-provenance-assistant/acceptance-notes.md b/analysis-variable-provenance-assistant/acceptance-notes.md
new file mode 100644
index 0000000..9bc376e
--- /dev/null
+++ b/analysis-variable-provenance-assistant/acceptance-notes.md
@@ -0,0 +1,27 @@
+# Acceptance Notes
+
+## What This Adds
+
+- Dependency-free Node.js module under `analysis-variable-provenance-assistant/`.
+- Deterministic analysis provenance audit packets for manuscript variables.
+- Tests for undefined variables, unit drift, incomplete lineage, stale transform hashes, non-deterministic pipelines, failed reproducibility links, suite-level reporting, and stable digests.
+- Demo JSON, SVG, and MP4 artifacts for bounty review.
+
+## Verification
+
+Use these commands from the repository root:
+
+```powershell
+node analysis-variable-provenance-assistant/test.js
+node analysis-variable-provenance-assistant/demo.js
+node --check analysis-variable-provenance-assistant/index.js
+node --check analysis-variable-provenance-assistant/test.js
+node --check analysis-variable-provenance-assistant/demo.js
+node --check analysis-variable-provenance-assistant/sample-data.js
+ffprobe -v error -show_entries format=duration,size -show_entries stream=codec_name,width,height -of default=noprint_wrappers=1 analysis-variable-provenance-assistant/demo-output/demo.mp4
+git diff --check
+```
+
+## AI Assistance Disclosure
+
+This contribution was prepared with AI assistance from OpenAI Codex and reviewed through local deterministic tests and artifact checks before submission.
diff --git a/analysis-variable-provenance-assistant/demo-output/demo.mp4 b/analysis-variable-provenance-assistant/demo-output/demo.mp4
new file mode 100644
index 0000000..cef2d0e
Binary files /dev/null and b/analysis-variable-provenance-assistant/demo-output/demo.mp4 differ
diff --git a/analysis-variable-provenance-assistant/demo-output/demo.svg b/analysis-variable-provenance-assistant/demo-output/demo.svg
new file mode 100644
index 0000000..59b05e1
--- /dev/null
+++ b/analysis-variable-provenance-assistant/demo-output/demo.svg
@@ -0,0 +1,57 @@
+
\ No newline at end of file
diff --git a/analysis-variable-provenance-assistant/demo-output/provenance-audit.json b/analysis-variable-provenance-assistant/demo-output/provenance-audit.json
new file mode 100644
index 0000000..575455a
--- /dev/null
+++ b/analysis-variable-provenance-assistant/demo-output/provenance-audit.json
@@ -0,0 +1,180 @@
+{
+ "generatedAt": "2026-05-20T12:30:00.000Z",
+ "projectId": "SCI-GLUCOSE-17",
+ "title": "Inflammation and glucose variability in post-acute cohorts",
+ "domain": "clinical trials",
+ "analysisPackets": [
+ {
+ "analysisId": "analysis-primary",
+ "claim": "Inflammation score predicts glucose variability in the post-acute cohort.",
+ "cohort": "post-acute",
+ "flags": [
+ "INCOMPLETE_DERIVATION_LINEAGE",
+ "TRANSFORM_HASH_STALE",
+ "UNIT_DRIFT",
+ "NONDETERMINISTIC_PIPELINE",
+ "PIPELINE_TEST_FAILING",
+ "FAILED_REPRODUCIBILITY_ATTEMPT"
+ ],
+ "findings": [
+ {
+ "analysisId": "analysis-primary",
+ "variableName": "inflammation_score",
+ "flag": "INCOMPLETE_DERIVATION_LINEAGE",
+ "severity": "medium",
+ "message": "inflammation_score lineage omits il6_pg_ml from transform biomarker-transform.",
+ "reviewerAction": "Add il6_pg_ml to biomarker-transform lineage for inflammation_score.",
+ "generatedAt": "2026-05-20T12:30:00.000Z",
+ "digest": "avpa_dfcd8f323eb93f1e52fcb80c"
+ },
+ {
+ "analysisId": "analysis-primary",
+ "variableName": "inflammation_score",
+ "flag": "TRANSFORM_HASH_STALE",
+ "severity": "medium",
+ "message": "biomarker-transform hash changed from sha256:old-glucose-transform to sha256:biomarker-transform.",
+ "reviewerAction": "Re-run analysis-primary or explain the biomarker-transform hash change.",
+ "generatedAt": "2026-05-20T12:30:00.000Z",
+ "digest": "avpa_326ea81261c504fdae920bd6"
+ },
+ {
+ "analysisId": "analysis-primary",
+ "variableName": "glucose_variability",
+ "flag": "UNIT_DRIFT",
+ "severity": "medium",
+ "message": "glucose_variability is reported as mg/dL but the data dictionary uses mmol/L.",
+ "reviewerAction": "Reconcile glucose_variability units between manuscript and data dictionary.",
+ "generatedAt": "2026-05-20T12:30:00.000Z",
+ "digest": "avpa_bbc1aeaec39194870b243c1e"
+ },
+ {
+ "analysisId": "analysis-primary",
+ "variableName": "glucose_variability",
+ "flag": "TRANSFORM_HASH_STALE",
+ "severity": "medium",
+ "message": "glucose-transform hash changed from sha256:old-glucose-transform to sha256:new-glucose-transform.",
+ "reviewerAction": "Re-run analysis-primary or explain the glucose-transform hash change.",
+ "generatedAt": "2026-05-20T12:30:00.000Z",
+ "digest": "avpa_1fd19d4fc0c424bfb0aa4965"
+ },
+ {
+ "analysisId": "analysis-primary",
+ "variableName": "glucose_variability",
+ "flag": "NONDETERMINISTIC_PIPELINE",
+ "severity": "medium",
+ "message": "glucose-transform is marked non-deterministic.",
+ "reviewerAction": "Stabilize glucose-transform seeds or document accepted variance.",
+ "generatedAt": "2026-05-20T12:30:00.000Z",
+ "digest": "avpa_6c6005f9b02216b7ac7bdd63"
+ },
+ {
+ "analysisId": "analysis-primary",
+ "variableName": "glucose_variability",
+ "flag": "PIPELINE_TEST_FAILING",
+ "severity": "high",
+ "message": "glucose-transform has test status fail.",
+ "reviewerAction": "Fix glucose-transform tests before relying on glucose_variability.",
+ "generatedAt": "2026-05-20T12:30:00.000Z",
+ "digest": "avpa_96929687755e683add74f88d"
+ },
+ {
+ "analysisId": "analysis-primary",
+ "variableName": "*analysis*",
+ "flag": "FAILED_REPRODUCIBILITY_ATTEMPT",
+ "severity": "high",
+ "message": "analysis-primary has a fail reproducibility attempt: Output variance changed after glucose transform rerun.",
+ "reviewerAction": "Re-run or explain failed reproducibility attempt from 2026-05-18T09:00:00.000Z.",
+ "generatedAt": "2026-05-20T12:30:00.000Z",
+ "digest": "avpa_292d6ab7fb77ed68a1dbd6d7"
+ }
+ ],
+ "reviewerActions": [
+ "Add il6_pg_ml to biomarker-transform lineage for inflammation_score.",
+ "Re-run analysis-primary or explain the biomarker-transform hash change.",
+ "Reconcile glucose_variability units between manuscript and data dictionary.",
+ "Re-run analysis-primary or explain the glucose-transform hash change.",
+ "Stabilize glucose-transform seeds or document accepted variance.",
+ "Fix glucose-transform tests before relying on glucose_variability.",
+ "Re-run or explain failed reproducibility attempt from 2026-05-18T09:00:00.000Z."
+ ],
+ "reproducibilityConfidence": 6,
+ "decision": "hold_for_provenance_fix"
+ },
+ {
+ "analysisId": "analysis-secondary",
+ "claim": "Sleep fragmentation explains residual glucose variance.",
+ "cohort": "sleep-substudy",
+ "flags": [
+ "UNDEFINED_VARIABLE",
+ "PIPELINE_MISSING"
+ ],
+ "findings": [
+ {
+ "analysisId": "analysis-secondary",
+ "variableName": "sleep_fragmentation_index",
+ "flag": "UNDEFINED_VARIABLE",
+ "severity": "high",
+ "message": "sleep_fragmentation_index is used in the manuscript but is not defined in the data dictionary.",
+ "reviewerAction": "Define sleep_fragmentation_index in the project data dictionary before review.",
+ "generatedAt": "2026-05-20T12:30:00.000Z",
+ "digest": "avpa_477e32e8eb9040183ed73e71"
+ },
+ {
+ "analysisId": "analysis-secondary",
+ "variableName": "sleep_fragmentation_index",
+ "flag": "PIPELINE_MISSING",
+ "severity": "high",
+ "message": "sleep_fragmentation_index has no producing pipeline transform.",
+ "reviewerAction": "Attach a producing transform or mark sleep_fragmentation_index as externally sourced.",
+ "generatedAt": "2026-05-20T12:30:00.000Z",
+ "digest": "avpa_bdc5a8bd4a299d99d80ab9e9"
+ }
+ ],
+ "reviewerActions": [
+ "Define sleep_fragmentation_index in the project data dictionary before review.",
+ "Attach a producing transform or mark sleep_fragmentation_index as externally sourced."
+ ],
+ "reproducibilityConfidence": 50,
+ "decision": "hold_for_provenance_fix"
+ },
+ {
+ "analysisId": "analysis-sensitivity",
+ "claim": "Inflammation score remains stable after excluding medication switchers.",
+ "cohort": "post-acute",
+ "flags": [
+ "INCOMPLETE_DERIVATION_LINEAGE"
+ ],
+ "findings": [
+ {
+ "analysisId": "analysis-sensitivity",
+ "variableName": "inflammation_score",
+ "flag": "INCOMPLETE_DERIVATION_LINEAGE",
+ "severity": "medium",
+ "message": "inflammation_score lineage omits il6_pg_ml from transform biomarker-transform.",
+ "reviewerAction": "Add il6_pg_ml to biomarker-transform lineage for inflammation_score.",
+ "generatedAt": "2026-05-20T12:30:00.000Z",
+ "digest": "avpa_60080ca167289503d0a3f4a7"
+ }
+ ],
+ "reviewerActions": [
+ "Add il6_pg_ml to biomarker-transform lineage for inflammation_score."
+ ],
+ "reproducibilityConfidence": 88,
+ "decision": "ready_for_review"
+ }
+ ],
+ "reviewerReport": {
+ "counts": {
+ "analyses": 3,
+ "highRiskAnalyses": 2,
+ "undefinedVariables": 1,
+ "unitDriftFindings": 1,
+ "failedReproducibilityLinks": 1
+ },
+ "priorityActions": [
+ "Resolve 2 high-risk analysis provenance packets before pre-submission review.",
+ "Define 1 manuscript variable in the project data dictionary.",
+ "Re-run or explain 1 failed reproducibility attempt linked to manuscript analyses."
+ ]
+ }
+}
diff --git a/analysis-variable-provenance-assistant/demo.js b/analysis-variable-provenance-assistant/demo.js
new file mode 100644
index 0000000..7fb4399
--- /dev/null
+++ b/analysis-variable-provenance-assistant/demo.js
@@ -0,0 +1,101 @@
+const fs = require("fs");
+const path = require("path");
+
+const { auditVariableProvenance } = require("./index");
+const {
+ manuscript,
+ dataDictionary,
+ pipelines,
+ reproducibilityAttempts,
+} = require("./sample-data");
+
+const generatedAt = "2026-05-20T12:30:00.000Z";
+const outputDir = path.join(__dirname, "demo-output");
+
+fs.mkdirSync(outputDir, { recursive: true });
+
+const audit = auditVariableProvenance({
+ manuscript,
+ dataDictionary,
+ pipelines,
+ reproducibilityAttempts,
+ generatedAt,
+});
+
+fs.writeFileSync(
+ path.join(outputDir, "provenance-audit.json"),
+ `${JSON.stringify(audit, null, 2)}\n`
+);
+fs.writeFileSync(path.join(outputDir, "demo.svg"), buildSvg(audit));
+
+console.log("Analysis variable provenance assistant demo");
+console.log(`Project: ${audit.title}`);
+console.log(`Analyses audited: ${audit.reviewerReport.counts.analyses}`);
+console.log(`High-risk analyses: ${audit.reviewerReport.counts.highRiskAnalyses}`);
+console.log(`Undefined variables: ${audit.reviewerReport.counts.undefinedVariables}`);
+console.log(`Unit drift findings: ${audit.reviewerReport.counts.unitDriftFindings}`);
+console.log(`Wrote ${path.join(outputDir, "provenance-audit.json")}`);
+console.log(`Wrote ${path.join(outputDir, "demo.svg")}`);
+
+function buildSvg(audit) {
+ const rows = audit.analysisPackets
+ .map((packet, index) => {
+ const y = 196 + index * 82;
+ const color = packet.decision === "ready_for_review"
+ ? "#1f8a5b"
+ : packet.decision === "needs_author_clarification"
+ ? "#ad6f00"
+ : "#b42318";
+ const flags = packet.flags.length === 0 ? "No flags" : packet.flags.join(" | ");
+ return `
+
+
+ ${escapeXml(packet.analysisId)}
+ ${escapeXml(formatDecision(packet.decision))} - confidence ${packet.reproducibilityConfidence}
+ ${escapeXml(flags)}
+ ${escapeXml(packet.claim)}
+ `;
+ })
+ .join("");
+
+ return ``;
+}
+
+function metricCard(x, y, label, value, color) {
+ return `
+
+ ${value}
+ ${escapeXml(label)}
+ `;
+}
+
+function formatDecision(decision) {
+ return decision.split("_").map((part) => part[0].toUpperCase() + part.slice(1)).join(" ");
+}
+
+function escapeXml(value) {
+ return String(value)
+ .replace(/&/g, "&")
+ .replace(//g, ">")
+ .replace(/"/g, """);
+}
diff --git a/analysis-variable-provenance-assistant/index.js b/analysis-variable-provenance-assistant/index.js
new file mode 100644
index 0000000..db05b5c
--- /dev/null
+++ b/analysis-variable-provenance-assistant/index.js
@@ -0,0 +1,332 @@
+const crypto = require("crypto");
+
+const FLAG_WEIGHTS = {
+ UNDEFINED_VARIABLE: 30,
+ PIPELINE_MISSING: 20,
+ UNIT_DRIFT: 12,
+ COHORT_FILTER_MISSING: 12,
+ INCOMPLETE_DERIVATION_LINEAGE: 12,
+ TRANSFORM_HASH_STALE: 15,
+ NONDETERMINISTIC_PIPELINE: 12,
+ PIPELINE_TEST_FAILING: 18,
+ FAILED_REPRODUCIBILITY_ATTEMPT: 25,
+};
+
+function auditVariableProvenance({
+ manuscript,
+ dataDictionary,
+ pipelines,
+ reproducibilityAttempts = [],
+ generatedAt = new Date().toISOString(),
+}) {
+ if (!manuscript || !Array.isArray(manuscript.analyses)) {
+ throw new Error("manuscript.analyses is required");
+ }
+ if (!dataDictionary || !Array.isArray(dataDictionary.variables)) {
+ throw new Error("dataDictionary.variables is required");
+ }
+ if (!Array.isArray(pipelines)) {
+ throw new Error("pipelines must be an array");
+ }
+
+ const dictionary = buildDictionaryIndex(dataDictionary.variables);
+ const pipelineByOutput = buildPipelineIndex(pipelines);
+ const attemptsByAnalysis = groupBy(reproducibilityAttempts, "analysisId");
+ const analysisPackets = manuscript.analyses.map((analysis) =>
+ auditAnalysis({
+ analysis,
+ dictionary,
+ pipelineByOutput,
+ attempts: attemptsByAnalysis.get(analysis.id) || [],
+ generatedAt,
+ })
+ );
+
+ const audit = {
+ generatedAt,
+ projectId: manuscript.projectId,
+ title: manuscript.title,
+ domain: manuscript.domain,
+ analysisPackets,
+ };
+ audit.reviewerReport = buildReviewerReport(audit);
+ return audit;
+}
+
+function auditAnalysis({ analysis, dictionary, pipelineByOutput, attempts, generatedAt }) {
+ const findings = [];
+ const flags = [];
+
+ for (const variable of analysis.variables || []) {
+ const normalizedName = normalize(variable.name);
+ const dictionaryEntry = dictionary.get(normalizedName);
+
+ if (!dictionaryEntry) {
+ addFinding(findings, flags, {
+ analysisId: analysis.id,
+ variableName: variable.name,
+ flag: "UNDEFINED_VARIABLE",
+ severity: "high",
+ message: `${variable.name} is used in the manuscript but is not defined in the data dictionary.`,
+ reviewerAction: `Define ${variable.name} in the project data dictionary before review.`,
+ generatedAt,
+ });
+ if (!pipelineByOutput.has(normalizedName)) {
+ addFinding(findings, flags, {
+ analysisId: analysis.id,
+ variableName: variable.name,
+ flag: "PIPELINE_MISSING",
+ severity: "high",
+ message: `${variable.name} has no producing pipeline transform.`,
+ reviewerAction: `Attach a producing transform or mark ${variable.name} as externally sourced.`,
+ generatedAt,
+ });
+ }
+ continue;
+ }
+
+ if (variable.unit && dictionaryEntry.unit && normalizeUnit(variable.unit) !== normalizeUnit(dictionaryEntry.unit)) {
+ addFinding(findings, flags, {
+ analysisId: analysis.id,
+ variableName: dictionaryEntry.id,
+ flag: "UNIT_DRIFT",
+ severity: "medium",
+ message: `${dictionaryEntry.id} is reported as ${variable.unit} but the data dictionary uses ${dictionaryEntry.unit}.`,
+ reviewerAction: `Reconcile ${dictionaryEntry.id} units between manuscript and data dictionary.`,
+ generatedAt,
+ });
+ }
+
+ if (
+ analysis.requiredCohortFilter &&
+ Array.isArray(dictionaryEntry.allowedCohorts) &&
+ !dictionaryEntry.allowedCohorts.includes(analysis.cohort)
+ ) {
+ addFinding(findings, flags, {
+ analysisId: analysis.id,
+ variableName: dictionaryEntry.id,
+ flag: "COHORT_FILTER_MISSING",
+ severity: "medium",
+ message: `${dictionaryEntry.id} is not listed for cohort ${analysis.cohort}.`,
+ reviewerAction: `Document why ${dictionaryEntry.id} can be used for ${analysis.cohort}.`,
+ generatedAt,
+ });
+ }
+
+ const pipeline = pipelineByOutput.get(normalize(dictionaryEntry.id));
+ if (!pipeline) {
+ addFinding(findings, flags, {
+ analysisId: analysis.id,
+ variableName: dictionaryEntry.id,
+ flag: "PIPELINE_MISSING",
+ severity: "high",
+ message: `${dictionaryEntry.id} has no producing pipeline transform.`,
+ reviewerAction: `Attach a producing transform for ${dictionaryEntry.id}.`,
+ generatedAt,
+ });
+ continue;
+ }
+
+ const missingLineageInputs = (dictionaryEntry.lineage || []).filter(
+ (parent) => !(pipeline.inputs || []).map(normalize).includes(normalize(parent))
+ );
+ if (missingLineageInputs.length > 0) {
+ addFinding(findings, flags, {
+ analysisId: analysis.id,
+ variableName: dictionaryEntry.id,
+ flag: "INCOMPLETE_DERIVATION_LINEAGE",
+ severity: "medium",
+ message: `${dictionaryEntry.id} lineage omits ${missingLineageInputs.join(", ")} from transform ${pipeline.id}.`,
+ reviewerAction: `Add ${missingLineageInputs.join(", ")} to ${pipeline.id} lineage for ${dictionaryEntry.id}.`,
+ generatedAt,
+ });
+ }
+
+ if (analysis.requiredCohortFilter && pipeline.cohortFilter !== analysis.requiredCohortFilter) {
+ addFinding(findings, flags, {
+ analysisId: analysis.id,
+ variableName: dictionaryEntry.id,
+ flag: "COHORT_FILTER_MISSING",
+ severity: "medium",
+ message: `${pipeline.id} cohort filter does not match ${analysis.requiredCohortFilter}.`,
+ reviewerAction: `Align ${pipeline.id} cohort filter with ${analysis.id}.`,
+ generatedAt,
+ });
+ }
+
+ if (analysis.reportedTransformHash && pipeline.currentHash !== analysis.reportedTransformHash) {
+ addFinding(findings, flags, {
+ analysisId: analysis.id,
+ variableName: dictionaryEntry.id,
+ flag: "TRANSFORM_HASH_STALE",
+ severity: "medium",
+ message: `${pipeline.id} hash changed from ${analysis.reportedTransformHash} to ${pipeline.currentHash}.`,
+ reviewerAction: `Re-run ${analysis.id} or explain the ${pipeline.id} hash change.`,
+ generatedAt,
+ });
+ }
+
+ if (pipeline.deterministic === false) {
+ addFinding(findings, flags, {
+ analysisId: analysis.id,
+ variableName: dictionaryEntry.id,
+ flag: "NONDETERMINISTIC_PIPELINE",
+ severity: "medium",
+ message: `${pipeline.id} is marked non-deterministic.`,
+ reviewerAction: `Stabilize ${pipeline.id} seeds or document accepted variance.`,
+ generatedAt,
+ });
+ }
+
+ if (pipeline.testStatus && pipeline.testStatus !== "pass") {
+ addFinding(findings, flags, {
+ analysisId: analysis.id,
+ variableName: dictionaryEntry.id,
+ flag: "PIPELINE_TEST_FAILING",
+ severity: "high",
+ message: `${pipeline.id} has test status ${pipeline.testStatus}.`,
+ reviewerAction: `Fix ${pipeline.id} tests before relying on ${dictionaryEntry.id}.`,
+ generatedAt,
+ });
+ }
+ }
+
+ for (const attempt of attempts.filter((item) => item.status !== "pass")) {
+ addFinding(findings, flags, {
+ analysisId: analysis.id,
+ variableName: "*analysis*",
+ flag: "FAILED_REPRODUCIBILITY_ATTEMPT",
+ severity: "high",
+ message: `${analysis.id} has a ${attempt.status} reproducibility attempt: ${attempt.reason}`,
+ reviewerAction: `Re-run or explain failed reproducibility attempt from ${attempt.attemptedAt}.`,
+ generatedAt,
+ });
+ }
+
+ const riskPenalty = flags.reduce((total, flag) => total + (FLAG_WEIGHTS[flag] || 5), 0);
+ const reproducibilityConfidence = Math.max(0, 100 - riskPenalty);
+
+ return {
+ analysisId: analysis.id,
+ claim: analysis.claim,
+ cohort: analysis.cohort,
+ flags,
+ findings,
+ reviewerActions: findings.map((finding) => finding.reviewerAction),
+ reproducibilityConfidence,
+ decision: reproducibilityConfidence >= 80
+ ? "ready_for_review"
+ : reproducibilityConfidence >= 60
+ ? "needs_author_clarification"
+ : "hold_for_provenance_fix",
+ };
+}
+
+function buildReviewerReport(audit) {
+ const packets = audit.analysisPackets || [];
+ const allFindings = packets.flatMap((packet) => packet.findings || []);
+ const counts = {
+ analyses: packets.length,
+ highRiskAnalyses: packets.filter((packet) => packet.reproducibilityConfidence < 70).length,
+ undefinedVariables: countFlags(allFindings, "UNDEFINED_VARIABLE"),
+ unitDriftFindings: countFlags(allFindings, "UNIT_DRIFT"),
+ failedReproducibilityLinks: countFlags(allFindings, "FAILED_REPRODUCIBILITY_ATTEMPT"),
+ };
+
+ const priorityActions = [];
+ if (counts.highRiskAnalyses > 0) {
+ priorityActions.push(
+ `Resolve ${formatCount(counts.highRiskAnalyses, "high-risk analysis provenance packet")} before pre-submission review.`
+ );
+ }
+ if (counts.undefinedVariables > 0) {
+ priorityActions.push(
+ `Define ${formatCount(counts.undefinedVariables, "manuscript variable")} in the project data dictionary.`
+ );
+ }
+ if (counts.failedReproducibilityLinks > 0) {
+ priorityActions.push(
+ `Re-run or explain ${formatCount(counts.failedReproducibilityLinks, "failed reproducibility attempt")} linked to manuscript analyses.`
+ );
+ }
+
+ return { counts, priorityActions };
+}
+
+function createFindingDigest(finding) {
+ const stableFacts = {
+ analysisId: finding.analysisId,
+ variableName: finding.variableName,
+ flag: finding.flag,
+ severity: finding.severity,
+ message: finding.message,
+ reviewerAction: finding.reviewerAction,
+ };
+ return `avpa_${crypto.createHash("sha256").update(JSON.stringify(stableFacts)).digest("hex").slice(0, 24)}`;
+}
+
+function addFinding(findings, flags, finding) {
+ const completeFinding = {
+ ...finding,
+ };
+ completeFinding.digest = createFindingDigest(completeFinding);
+ findings.push(completeFinding);
+ if (!flags.includes(finding.flag)) {
+ flags.push(finding.flag);
+ }
+}
+
+function buildDictionaryIndex(variables) {
+ const index = new Map();
+ for (const variable of variables) {
+ index.set(normalize(variable.id), variable);
+ for (const alias of variable.aliases || []) {
+ index.set(normalize(alias), variable);
+ }
+ }
+ return index;
+}
+
+function buildPipelineIndex(pipelines) {
+ const index = new Map();
+ for (const pipeline of pipelines) {
+ for (const output of pipeline.outputs || []) {
+ index.set(normalize(output), pipeline);
+ }
+ }
+ return index;
+}
+
+function groupBy(items, key) {
+ const grouped = new Map();
+ for (const item of items) {
+ const value = item[key];
+ if (!grouped.has(value)) {
+ grouped.set(value, []);
+ }
+ grouped.get(value).push(item);
+ }
+ return grouped;
+}
+
+function countFlags(findings, flag) {
+ return findings.filter((finding) => finding.flag === flag).length;
+}
+
+function normalize(value) {
+ return String(value || "").trim().toLowerCase().replace(/[\s-]+/g, "_");
+}
+
+function normalizeUnit(value) {
+ return String(value || "").trim().toLowerCase().replace(/\s+/g, "");
+}
+
+function formatCount(count, label) {
+ return `${count} ${label}${count === 1 ? "" : "s"}`;
+}
+
+module.exports = {
+ auditVariableProvenance,
+ buildReviewerReport,
+ createFindingDigest,
+};
diff --git a/analysis-variable-provenance-assistant/requirements-map.md b/analysis-variable-provenance-assistant/requirements-map.md
new file mode 100644
index 0000000..5ce49a0
--- /dev/null
+++ b/analysis-variable-provenance-assistant/requirements-map.md
@@ -0,0 +1,17 @@
+# Requirements Map
+
+| Issue #16 requirement | Implementation coverage |
+| --- | --- |
+| Auto peer review reports | Emits reviewer-ready findings and priority actions for manuscript analysis-variable risks. |
+| Statistical or methodological red flags | Flags unit drift, missing cohort filters, stale transform hashes, incomplete lineage, and failed transforms. |
+| Claims vs. evidence alignment | Maps manuscript analysis claims to variables, dictionary records, producing transforms, and prior reproducibility attempts. |
+| Reproducibility checker | Checks dependency-like transform hashes, deterministic flags, pipeline test status, and failed prior attempts. |
+| Output consistency with reported results | Flags stale transform hashes and failed reproducibility attempts that can change reported outputs. |
+| Dependency/version integrity | Treats transform hashes and producing pipeline records as deterministic provenance evidence. |
+| Presence of raw data, clean pipelines, and test sets | Requires data dictionary entries, producing transforms, lineage inputs, and passing pipeline tests. |
+| Reproducibility confidence score | Assigns each analysis a confidence score and decision. |
+| Links to previous reproducibility attempts | Links failed attempts to affected manuscript analyses and reviewer actions. |
+
+## Non-Overlap Statement
+
+This slice focuses on analysis-variable provenance. It does not duplicate broad assistant-suite foundations, protocol trace, evidence grounding, statistics review, gap planning, rebuttal response packs, ethics/data-availability checks, citation-context reconciliation, reporting-guideline compliance, benchmark-leakage auditing, or figure/table consistency checks.
diff --git a/analysis-variable-provenance-assistant/sample-data.js b/analysis-variable-provenance-assistant/sample-data.js
new file mode 100644
index 0000000..9dd083f
--- /dev/null
+++ b/analysis-variable-provenance-assistant/sample-data.js
@@ -0,0 +1,102 @@
+const manuscript = {
+ projectId: "SCI-GLUCOSE-17",
+ title: "Inflammation and glucose variability in post-acute cohorts",
+ domain: "clinical trials",
+ analyses: [
+ {
+ id: "analysis-primary",
+ claim: "Inflammation score predicts glucose variability in the post-acute cohort.",
+ cohort: "post-acute",
+ variables: [
+ { name: "inflammation_score", unit: "score", role: "predictor" },
+ { name: "glucose_variability", unit: "mg/dL", role: "outcome" },
+ ],
+ requiredCohortFilter: "post_acute == true",
+ reportedTransformHash: "sha256:old-glucose-transform",
+ },
+ {
+ id: "analysis-secondary",
+ claim: "Sleep fragmentation explains residual glucose variance.",
+ cohort: "sleep-substudy",
+ variables: [
+ { name: "sleep_fragmentation_index", unit: "index", role: "predictor" },
+ ],
+ requiredCohortFilter: "sleep_substudy == true",
+ reportedTransformHash: "sha256:sleep-transform",
+ },
+ {
+ id: "analysis-sensitivity",
+ claim: "Inflammation score remains stable after excluding medication switchers.",
+ cohort: "post-acute",
+ variables: [
+ { name: "inflammation_score", unit: "score", role: "predictor" },
+ ],
+ requiredCohortFilter: "post_acute == true",
+ reportedTransformHash: "sha256:biomarker-transform",
+ },
+ ],
+};
+
+const dataDictionary = {
+ variables: [
+ {
+ id: "inflammation_score",
+ aliases: ["inflammation score", "crp_il6_score"],
+ unit: "score",
+ dataset: "derived_biomarkers.csv",
+ lineage: ["crp_mg_l", "il6_pg_ml"],
+ allowedCohorts: ["post-acute", "all"],
+ },
+ {
+ id: "glucose_variability",
+ aliases: ["glucose variability", "gv"],
+ unit: "mmol/L",
+ dataset: "wearable_glucose.csv",
+ lineage: [],
+ allowedCohorts: ["post-acute"],
+ },
+ ],
+};
+
+const pipelines = [
+ {
+ id: "biomarker-transform",
+ outputs: ["inflammation_score"],
+ inputs: ["crp_mg_l"],
+ cohortFilter: "post_acute == true",
+ currentHash: "sha256:biomarker-transform",
+ deterministic: true,
+ testStatus: "pass",
+ },
+ {
+ id: "glucose-transform",
+ outputs: ["glucose_variability"],
+ inputs: ["glucose_reading"],
+ cohortFilter: "post_acute == true",
+ currentHash: "sha256:new-glucose-transform",
+ deterministic: false,
+ testStatus: "fail",
+ },
+];
+
+const reproducibilityAttempts = [
+ {
+ analysisId: "analysis-primary",
+ status: "fail",
+ reason: "Output variance changed after glucose transform rerun.",
+ attemptedAt: "2026-05-18T09:00:00.000Z",
+ },
+ {
+ analysisId: "analysis-sensitivity",
+ status: "pass",
+ reason: "Sensitivity analysis reproduced from locked transform hash.",
+ attemptedAt: "2026-05-19T15:30:00.000Z",
+ },
+];
+
+module.exports = {
+ manuscript,
+ dataDictionary,
+ pipelines,
+ reproducibilityAttempts,
+};
diff --git a/analysis-variable-provenance-assistant/test.js b/analysis-variable-provenance-assistant/test.js
new file mode 100644
index 0000000..6175d31
--- /dev/null
+++ b/analysis-variable-provenance-assistant/test.js
@@ -0,0 +1,166 @@
+const assert = require("assert");
+
+const {
+ auditVariableProvenance,
+ buildReviewerReport,
+ createFindingDigest,
+} = require("./index");
+
+const generatedAt = "2026-05-20T12:30:00.000Z";
+
+const manuscript = {
+ projectId: "SCI-GLUCOSE-17",
+ title: "Inflammation and glucose variability in post-acute cohorts",
+ domain: "clinical trials",
+ analyses: [
+ {
+ id: "analysis-primary",
+ claim: "Inflammation score predicts glucose variability in the post-acute cohort.",
+ cohort: "post-acute",
+ variables: [
+ { name: "inflammation_score", unit: "score", role: "predictor" },
+ { name: "glucose_variability", unit: "mg/dL", role: "outcome" },
+ ],
+ requiredCohortFilter: "post_acute == true",
+ reportedTransformHash: "sha256:old-glucose-transform",
+ },
+ {
+ id: "analysis-secondary",
+ claim: "Novel sleep fragmentation index explains residual variance.",
+ cohort: "sleep-substudy",
+ variables: [
+ { name: "sleep_fragmentation_index", unit: "index", role: "predictor" },
+ ],
+ requiredCohortFilter: "sleep_substudy == true",
+ reportedTransformHash: "sha256:sleep-transform",
+ },
+ ],
+};
+
+const dataDictionary = {
+ variables: [
+ {
+ id: "inflammation_score",
+ aliases: ["inflammation score", "crp_il6_score"],
+ unit: "score",
+ dataset: "derived_biomarkers.csv",
+ lineage: ["crp_mg_l", "il6_pg_ml"],
+ allowedCohorts: ["post-acute", "all"],
+ },
+ {
+ id: "glucose_variability",
+ aliases: ["glucose variability", "gv"],
+ unit: "mmol/L",
+ dataset: "wearable_glucose.csv",
+ lineage: [],
+ allowedCohorts: ["post-acute"],
+ },
+ ],
+};
+
+const pipelines = [
+ {
+ id: "biomarker-transform",
+ outputs: ["inflammation_score"],
+ inputs: ["crp_mg_l"],
+ cohortFilter: "post_acute == true",
+ currentHash: "sha256:biomarker-transform",
+ deterministic: true,
+ testStatus: "pass",
+ },
+ {
+ id: "glucose-transform",
+ outputs: ["glucose_variability"],
+ inputs: ["glucose_reading"],
+ cohortFilter: "post_acute == true",
+ currentHash: "sha256:new-glucose-transform",
+ deterministic: false,
+ testStatus: "fail",
+ },
+];
+
+const reproducibilityAttempts = [
+ {
+ analysisId: "analysis-primary",
+ status: "fail",
+ reason: "Output variance changed after glucose transform rerun.",
+ attemptedAt: "2026-05-18T09:00:00.000Z",
+ },
+];
+
+function test(name, fn) {
+ try {
+ fn();
+ console.log(`ok - ${name}`);
+ } catch (error) {
+ console.error(`not ok - ${name}`);
+ console.error(error);
+ process.exitCode = 1;
+ }
+}
+
+test("audits variable provenance risks with reviewer-ready actions", () => {
+ const audit = auditVariableProvenance({
+ manuscript,
+ dataDictionary,
+ pipelines,
+ reproducibilityAttempts,
+ generatedAt,
+ });
+
+ const primary = audit.analysisPackets.find((packet) => packet.analysisId === "analysis-primary");
+ assert(primary, "expected primary analysis packet");
+ assert(primary.flags.includes("UNIT_DRIFT"));
+ assert(primary.flags.includes("INCOMPLETE_DERIVATION_LINEAGE"));
+ assert(primary.flags.includes("TRANSFORM_HASH_STALE"));
+ assert(primary.flags.includes("NONDETERMINISTIC_PIPELINE"));
+ assert(primary.flags.includes("FAILED_REPRODUCIBILITY_ATTEMPT"));
+ assert(primary.reviewerActions.some((action) => action.includes("glucose_variability")));
+ assert(primary.reproducibilityConfidence < 70);
+
+ const secondary = audit.analysisPackets.find((packet) => packet.analysisId === "analysis-secondary");
+ assert(secondary, "expected secondary analysis packet");
+ assert.deepStrictEqual(secondary.flags, ["UNDEFINED_VARIABLE", "PIPELINE_MISSING"]);
+ assert(secondary.reviewerActions.some((action) => action.includes("Define sleep_fragmentation_index")));
+});
+
+test("builds a deterministic suite-level reviewer report", () => {
+ const audit = auditVariableProvenance({
+ manuscript,
+ dataDictionary,
+ pipelines,
+ reproducibilityAttempts,
+ generatedAt,
+ });
+ const report = buildReviewerReport(audit);
+
+ assert.deepStrictEqual(report.counts, {
+ analyses: 2,
+ highRiskAnalyses: 2,
+ undefinedVariables: 1,
+ unitDriftFindings: 1,
+ failedReproducibilityLinks: 1,
+ });
+ assert.deepStrictEqual(report.priorityActions, [
+ "Resolve 2 high-risk analysis provenance packets before pre-submission review.",
+ "Define 1 manuscript variable in the project data dictionary.",
+ "Re-run or explain 1 failed reproducibility attempt linked to manuscript analyses.",
+ ]);
+});
+
+test("creates stable finding digests from finding facts", () => {
+ const audit = auditVariableProvenance({
+ manuscript,
+ dataDictionary,
+ pipelines,
+ reproducibilityAttempts,
+ generatedAt,
+ });
+ const finding = audit.analysisPackets[0].findings[0];
+
+ const first = createFindingDigest(finding);
+ const second = createFindingDigest({ ...finding, reviewerAction: finding.reviewerAction });
+
+ assert.strictEqual(first, second);
+ assert.match(first, /^avpa_[a-f0-9]{24}$/);
+});