diff --git a/domain-review-template-selector/README.md b/domain-review-template-selector/README.md new file mode 100644 index 0000000..827ffe2 --- /dev/null +++ b/domain-review-template-selector/README.md @@ -0,0 +1,20 @@ +# Domain Review Template Selector + +This module covers the adaptive peer-review template slice of SCIBASE issue #16. + +It classifies a manuscript's scientific domain from title, keywords, methods, and artifact metadata, then selects a domain-specific review template for the AI research assistant. The output combines domain review sections with claim/evidence alignment, reproducibility readiness, reviewer expertise, blockers, warnings, and a deterministic audit digest. + +## What It Does + +- Selects clinical, machine-learning, wet-lab biology, materials/chemistry, computational, or general-science review templates. +- Uses domain-specific review sections instead of a one-size-fits-all peer-review checklist. +- Checks manuscript, data, code, evidence anchors, environment, run command, and expected output readiness. +- Blocks review packets with unanchored or missing-evidence claims. +- Emits reviewer questions, expertise routing, warnings, blockers, and an audit digest. + +## Run + +```bash +node domain-review-template-selector/test.js +node domain-review-template-selector/demo.js +``` diff --git a/domain-review-template-selector/acceptance-notes.md b/domain-review-template-selector/acceptance-notes.md new file mode 100644 index 0000000..7f34213 --- /dev/null +++ b/domain-review-template-selector/acceptance-notes.md @@ -0,0 +1,30 @@ +# Acceptance Notes + +## Review Scenarios + +1. Clinical manuscript + - Clinical terms select a clinical peer-review template. + - Patient eligibility, endpoint validity, safety reporting, and ethics/consent sections are included. + +2. Machine-learning manuscript + - Benchmark, model, training, and ablation terms select the machine-learning template. + - Data split integrity, baseline comparison, ablation strength, and leakage risk sections are included. + +3. Blocked wet-lab review packet + - A CRISPR manuscript with unanchored claims is blocked before review. + - Missing data, code, environment, run command, and output manifests are surfaced. + +4. Computational reproducibility review + - A computational manuscript with missing expected outputs is marked needs-review and routed to reproducibility expertise. + +## Validation + +```bash +node domain-review-template-selector/test.js +node domain-review-template-selector/demo.js +node --check domain-review-template-selector/index.js +node --check domain-review-template-selector/test.js +node --check domain-review-template-selector/demo.js +``` + +The included `demo.mp4` is a five-second visual walkthrough of domain classification, template selection, and review packet readiness. diff --git a/domain-review-template-selector/demo.js b/domain-review-template-selector/demo.js new file mode 100644 index 0000000..8d1b972 --- /dev/null +++ b/domain-review-template-selector/demo.js @@ -0,0 +1,36 @@ +"use strict" + +const { selectDomainReviewTemplate } = require("./index") + +const result = selectDomainReviewTemplate({ + manuscript: { + title: "Transformer baseline for satellite image segmentation", + keywords: ["model", "benchmark", "training", "ablation"], + methods: ["dataset split", "baseline comparison", "validation"], + artifacts: [ + { type: "manuscript", name: "paper.md" }, + { type: "data", name: "dataset-card.json" }, + { type: "code", name: "train.py" }, + ], + evidence: [{ id: "ev-split" }, { id: "ev-ablation" }], + claims: [ + { id: "claim-performance", evidenceIds: ["ev-split"] }, + { id: "claim-ablation", evidenceIds: ["ev-ablation"] }, + ], + reproducibility: { + environment: "Dockerfile", + runCommand: "python train.py --config baseline.yaml", + expectedOutputs: ["metrics.json", "ablation.csv"], + }, + }, +}) + +console.log("Domain Review Template Selector Demo") +console.log("====================================") +console.log(`status: ${result.status}`) +console.log(`domain: ${result.domain}`) +console.log(`template: ${result.templateId}`) +console.log(`first section: ${result.prioritySections[0]}`) +console.log(`review question count: ${result.reviewQuestions.length}`) +console.log(`expertise: ${result.recommendedReviewerExpertise.join(", ")}`) +console.log(`digest: ${result.auditDigest.slice(0, 16)}...`) diff --git a/domain-review-template-selector/demo.mp4 b/domain-review-template-selector/demo.mp4 new file mode 100644 index 0000000..7110b41 Binary files /dev/null and b/domain-review-template-selector/demo.mp4 differ diff --git a/domain-review-template-selector/demo.svg b/domain-review-template-selector/demo.svg new file mode 100644 index 0000000..ef1979f --- /dev/null +++ b/domain-review-template-selector/demo.svg @@ -0,0 +1,26 @@ + + Domain Review Template Selector + A visual summary showing manuscript domain classification, template selection, evidence checks, and review packet routing. + + + Domain Review Template Selector + Adaptive peer-review templates for AI research assistant packets. + + + Classify + keywords + methods + + + + Template + domain-specific sections + + + + Check + evidence + reproducibility + + Output + template id, review questions, blockers, warnings, reviewer expertise, audit digest + Clinical, ML, wet-lab biology, materials/chemistry, computational, and general science. + diff --git a/domain-review-template-selector/index.js b/domain-review-template-selector/index.js new file mode 100644 index 0000000..fce7527 --- /dev/null +++ b/domain-review-template-selector/index.js @@ -0,0 +1,176 @@ +"use strict" + +const crypto = require("node:crypto") + +function stableStringify(value) { + if (Array.isArray(value)) return `[${value.map(stableStringify).join(",")}]` + if (value && typeof value === "object") { + return `{${Object.keys(value) + .sort() + .map((key) => `${JSON.stringify(key)}:${stableStringify(value[key])}`) + .join(",")}}` + } + return JSON.stringify(value) +} + +function digest(value) { + return crypto.createHash("sha256").update(stableStringify(value)).digest("hex") +} + +const DOMAIN_RULES = [ + { + domain: "clinical", + terms: ["clinical", "trial", "patient", "cohort", "consort", "irb", "endpoint", "randomized"], + sections: ["patient eligibility", "endpoint validity", "safety reporting", "ethics and consent"], + }, + { + domain: "machine-learning", + terms: ["model", "benchmark", "dataset", "training", "validation", "baseline", "ablation", "auc", "accuracy"], + sections: ["data split integrity", "baseline comparison", "ablation strength", "leakage risk"], + }, + { + domain: "wet-lab-biology", + terms: ["assay", "cell", "gene", "protein", "western", "qpcr", "crispr", "replicate", "antibody"], + sections: ["biological replicates", "reagent validation", "protocol controls", "raw measurement evidence"], + }, + { + domain: "materials-chemistry", + terms: ["synthesis", "catalyst", "polymer", "crystal", "spectra", "xrd", "sem", "nmr", "yield"], + sections: ["synthesis reproducibility", "characterization evidence", "purity controls", "yield accounting"], + }, + { + domain: "computational", + terms: ["simulation", "notebook", "pipeline", "code", "parameter", "runtime", "dependency", "container"], + sections: ["runtime reproducibility", "parameter coverage", "artifact availability", "determinism checks"], + }, +] + +function tokenize(input) { + return String(input || "") + .toLowerCase() + .split(/[^a-z0-9+.-]+/) + .filter(Boolean) +} + +function classifyDomain(manuscript) { + const tokens = tokenize( + [ + manuscript.title, + manuscript.abstract, + ...(manuscript.keywords || []), + ...(manuscript.methods || []), + ...(manuscript.artifacts || []).map((artifact) => `${artifact.type || ""} ${artifact.name || ""}`), + ].join(" "), + ) + const tokenSet = new Set(tokens) + + const scores = DOMAIN_RULES.map((rule) => { + const matchedTerms = rule.terms.filter((term) => tokenSet.has(term)) + return { + domain: rule.domain, + score: matchedTerms.length, + matchedTerms, + sections: rule.sections, + } + }).sort((a, b) => b.score - a.score || a.domain.localeCompare(b.domain)) + + const winner = scores[0] + return winner.score > 0 + ? winner + : { + domain: "general-science", + score: 0, + matchedTerms: [], + sections: ["claim clarity", "methods adequacy", "evidence alignment", "reproducibility readiness"], + } +} + +function evaluateEvidence(manuscript) { + const artifacts = manuscript.artifacts || [] + const evidenceIds = new Set((manuscript.evidence || []).map((item) => item.id)) + const claims = manuscript.claims || [] + const warnings = [] + const blockers = [] + + for (const required of ["manuscript", "data", "code"]) { + if (!artifacts.some((artifact) => artifact.type === required)) { + warnings.push(`missing ${required} artifact`) + } + } + + for (const claim of claims) { + const claimEvidence = claim.evidenceIds || [] + if (claimEvidence.length === 0) { + blockers.push(`claim ${claim.id} has no evidence anchors`) + } else { + const missing = claimEvidence.filter((id) => !evidenceIds.has(id)) + if (missing.length > 0) blockers.push(`claim ${claim.id} references missing evidence: ${missing.join(", ")}`) + } + } + + if (!manuscript.reproducibility?.environment) warnings.push("missing reproducibility environment") + if (!manuscript.reproducibility?.runCommand) warnings.push("missing reproducibility run command") + if (!manuscript.reproducibility?.expectedOutputs?.length) warnings.push("missing expected output manifest") + + return { blockers, warnings } +} + +function buildReviewQuestions(domainResult, manuscript, evidenceResult) { + const questions = domainResult.sections.map((section) => ({ + section, + prompt: `Assess ${section} for ${manuscript.title || "the manuscript"} using cited evidence and reproducibility artifacts.`, + })) + + questions.push({ + section: "claim-evidence alignment", + prompt: "List every central claim and confirm whether the cited artifacts directly support it.", + }) + questions.push({ + section: "reproducibility check", + prompt: "Verify the data, code, environment, run command, and expected outputs are sufficient for rerun.", + }) + + if (evidenceResult.blockers.length > 0) { + questions.unshift({ + section: "release blocker", + prompt: "Resolve unanchored or missing-evidence claims before sending this manuscript to review.", + }) + } + + return questions +} + +function selectDomainReviewTemplate(input) { + const manuscript = input.manuscript || {} + const domain = classifyDomain(manuscript) + const evidence = evaluateEvidence(manuscript) + const questions = buildReviewQuestions(domain, manuscript, evidence) + const reviewerPacket = { + templateId: `${domain.domain}-peer-review`, + domain: domain.domain, + matchedTerms: domain.matchedTerms, + status: evidence.blockers.length > 0 ? "blocked" : evidence.warnings.length > 0 ? "needs-review" : "ready", + prioritySections: domain.sections, + blockers: evidence.blockers, + warnings: evidence.warnings, + reviewQuestions: questions, + recommendedReviewerExpertise: [ + domain.domain, + evidence.warnings.some((warning) => + ["reproducibility", "environment", "run command", "expected output"].some((term) => warning.includes(term)), + ) + ? "reproducibility" + : null, + evidence.blockers.length > 0 ? "evidence-audit" : null, + ].filter(Boolean), + } + + return { + ...reviewerPacket, + auditDigest: digest(reviewerPacket), + } +} + +module.exports = { + selectDomainReviewTemplate, +} diff --git a/domain-review-template-selector/requirements-map.md b/domain-review-template-selector/requirements-map.md new file mode 100644 index 0000000..e513403 --- /dev/null +++ b/domain-review-template-selector/requirements-map.md @@ -0,0 +1,13 @@ +# Requirements Map + +| Issue #16 requirement | Coverage in this module | +| --- | --- | +| Auto peer review reports | Builds reviewer-ready questions and sections for a manuscript before public release. | +| Adaptive templates per domain | Selects clinical, machine-learning, wet-lab biology, materials/chemistry, computational, or general-science templates. | +| Claims vs evidence alignment | Blocks unanchored claims and claims that reference missing evidence. | +| Reproducibility checker | Checks data/code artifacts, environment, run command, and expected output manifest readiness. | +| Equip researchers with editorial-quality feedback | Produces domain-specific review questions, blockers, warnings, and reviewer expertise routing. | + +## Non-Overlap Note + +This submission is distinct from broad assistant suites, evidence traces, protocol traces, statistics review, research-gap planners, rebuttal packs, ethics/data availability checks, citation-context reconciliation, reporting-guideline compliance, benchmark leakage audits, figure/table consistency checks, and analysis-variable provenance assistants. It focuses specifically on adaptive domain review template selection and review packet routing. diff --git a/domain-review-template-selector/test.js b/domain-review-template-selector/test.js new file mode 100644 index 0000000..8a5c730 --- /dev/null +++ b/domain-review-template-selector/test.js @@ -0,0 +1,98 @@ +"use strict" + +const assert = require("node:assert/strict") +const { selectDomainReviewTemplate } = require("./index") + +{ + const result = selectDomainReviewTemplate({ + manuscript: { + title: "Randomized clinical trial of a sepsis biomarker", + keywords: ["clinical", "trial", "patient"], + artifacts: [ + { type: "manuscript", name: "paper.md" }, + { type: "data", name: "cohort.csv" }, + { type: "code", name: "analysis.py" }, + ], + evidence: [{ id: "ev-primary-endpoint" }], + claims: [{ id: "claim-survival", evidenceIds: ["ev-primary-endpoint"] }], + reproducibility: { + environment: "conda-lock.yml", + runCommand: "python analysis.py", + expectedOutputs: ["tables/primary.csv"], + }, + }, + }) + + assert.equal(result.domain, "clinical") + assert.equal(result.status, "ready") + assert.ok(result.prioritySections.includes("patient eligibility")) + assert.match(result.auditDigest, /^[0-9a-f]{64}$/) +} + +{ + const result = selectDomainReviewTemplate({ + manuscript: { + title: "Transformer baseline for satellite image segmentation", + keywords: ["model", "benchmark", "training", "ablation"], + artifacts: [ + { type: "manuscript", name: "paper.md" }, + { type: "data", name: "dataset-card.json" }, + { type: "code", name: "train.py" }, + ], + evidence: [{ id: "ev-split" }], + claims: [{ id: "claim-sota", evidenceIds: ["ev-split"] }], + reproducibility: { + environment: "Dockerfile", + runCommand: "python train.py --config baseline.yaml", + expectedOutputs: ["metrics.json"], + }, + }, + }) + + assert.equal(result.domain, "machine-learning") + assert.equal(result.status, "ready") + assert.ok(result.prioritySections.includes("leakage risk")) +} + +{ + const result = selectDomainReviewTemplate({ + manuscript: { + title: "CRISPR screen identifies stress response genes", + keywords: ["crispr", "cell", "gene", "replicate"], + artifacts: [{ type: "manuscript", name: "paper.md" }], + evidence: [{ id: "ev-figure-1" }], + claims: [{ id: "claim-gene-hit", evidenceIds: [] }], + reproducibility: {}, + }, + }) + + assert.equal(result.domain, "wet-lab-biology") + assert.equal(result.status, "blocked") + assert.ok(result.blockers.includes("claim claim-gene-hit has no evidence anchors")) + assert.ok(result.warnings.includes("missing data artifact")) + assert.equal(result.reviewQuestions[0].section, "release blocker") +} + +{ + const result = selectDomainReviewTemplate({ + manuscript: { + title: "A reusable simulation workflow", + keywords: ["simulation", "notebook", "dependency"], + artifacts: [ + { type: "manuscript", name: "paper.md" }, + { type: "data", name: "input.json" }, + { type: "code", name: "notebook.ipynb" }, + ], + evidence: [], + claims: [], + reproducibility: { environment: "Dockerfile", runCommand: "jupyter nbconvert --execute notebook.ipynb" }, + }, + }) + + assert.equal(result.domain, "computational") + assert.equal(result.status, "needs-review") + assert.ok(result.warnings.includes("missing expected output manifest")) + assert.ok(result.recommendedReviewerExpertise.includes("reproducibility")) +} + +console.log("domain-review-template-selector tests passed")