diff --git a/evidence-anchored-summarizer/README.md b/evidence-anchored-summarizer/README.md new file mode 100644 index 0000000..e045985 --- /dev/null +++ b/evidence-anchored-summarizer/README.md @@ -0,0 +1,20 @@ +# Evidence-Anchored Summarizer + +This module covers the AI paper summarizer portion of SCIBASE issue #13. + +It turns claim/evidence inputs into abstract, executive, or layperson summaries while refusing to mark a summary ready if claims are not anchored to known evidence. This keeps AI-generated summaries useful without hiding provenance gaps. + +## What It Does + +- Supports `abstract`, `executive`, and `layperson` summary modes. +- Ranks objective, methods, results, findings, and limitations by evidence support. +- Keeps evidence IDs attached to every output bullet. +- Blocks summaries with missing or unanchored evidence. +- Emits implications, next steps, and a deterministic evidence digest. + +## Run + +```bash +node evidence-anchored-summarizer/test.js +node evidence-anchored-summarizer/demo.js +``` diff --git a/evidence-anchored-summarizer/acceptance-notes.md b/evidence-anchored-summarizer/acceptance-notes.md new file mode 100644 index 0000000..c17d302 --- /dev/null +++ b/evidence-anchored-summarizer/acceptance-notes.md @@ -0,0 +1,28 @@ +# Acceptance Notes + +## Review Scenarios + +1. Ready executive summary + - Objective, methods, and results claims are present. + - Every claim points to a known evidence anchor. + - The result is ready with ranked bullets and a stable digest. + +2. Layperson summary + - Technical terms such as p-values, regression, and confidence intervals are translated into plainer language. + - Evidence anchors remain attached to the output bullets. + +3. Blocked summary + - Unanchored claims and missing evidence IDs block readiness. + - Missing required sections are surfaced as warnings. + +## Validation + +```bash +node evidence-anchored-summarizer/test.js +node evidence-anchored-summarizer/demo.js +node --check evidence-anchored-summarizer/index.js +node --check evidence-anchored-summarizer/test.js +node --check evidence-anchored-summarizer/demo.js +``` + +The included `demo.mp4` is a five-second visual walkthrough of the evidence-anchored summary flow. diff --git a/evidence-anchored-summarizer/demo.js b/evidence-anchored-summarizer/demo.js new file mode 100644 index 0000000..2e79485 --- /dev/null +++ b/evidence-anchored-summarizer/demo.js @@ -0,0 +1,56 @@ +"use strict" + +const { buildSummary } = require("./index") + +const summary = buildSummary({ + sourceId: "preprint-42", + title: "Adaptive Sequencing for Rare Variant Detection", + mode: "layperson", + evidence: [ + { id: "ev-objective", source: "abstract", locator: "sentence 1" }, + { id: "ev-methods", source: "methods", locator: "paragraph 3" }, + { id: "ev-results", source: "results", locator: "figure 2" }, + { id: "ev-limits", source: "discussion", locator: "paragraph 5" }, + ], + claims: [ + { + id: "claim-objective", + section: "objective", + kind: "context", + text: "The study evaluates adaptive sequencing for rare variant detection.", + evidenceIds: ["ev-objective"], + }, + { + id: "claim-method", + section: "methods", + kind: "method", + text: "The pipeline compares targeted adaptive reads against baseline whole-genome sampling.", + evidenceIds: ["ev-methods"], + }, + { + id: "claim-result", + section: "results", + kind: "finding", + text: "Adaptive sequencing improves recall for low-frequency variants while reducing total reads.", + evidenceIds: ["ev-results"], + }, + { + id: "claim-limit", + section: "limitations", + kind: "limitation", + text: "The benchmark is limited to synthetic mixtures and needs external cohort validation.", + evidenceIds: ["ev-limits"], + }, + ], +}) + +console.log("Evidence-Anchored Summarizer Demo") +console.log("=================================") +console.log(`title: ${summary.title}`) +console.log(`mode: ${summary.mode}`) +console.log(`status: ${summary.status}`) +console.log(`headline: ${summary.headline}`) +console.log(`bullets: ${summary.bullets.length}`) +console.log(`top claim: ${summary.bullets[0].text}`) +console.log(`top evidence: ${summary.bullets[0].evidenceIds.join(", ")}`) +console.log(`digest: ${summary.evidenceDigest.slice(0, 16)}...`) diff --git a/evidence-anchored-summarizer/demo.mp4 b/evidence-anchored-summarizer/demo.mp4 new file mode 100644 index 0000000..3a33b9b Binary files /dev/null and b/evidence-anchored-summarizer/demo.mp4 differ diff --git a/evidence-anchored-summarizer/demo.svg b/evidence-anchored-summarizer/demo.svg new file mode 100644 index 0000000..c4ffe08 --- /dev/null +++ b/evidence-anchored-summarizer/demo.svg @@ -0,0 +1,18 @@ + + + Evidence-Anchored Summarizer + SCIBASE issue #13 - AI-assisted research tools + + 1. Claims + Objective, methods, results + + 2. Evidence + Anchor every bullet + + 3. Modes + Abstract, executive, plain + + Demo result: READY + Top finding links to ev-results and generates implications plus next steps. + Unanchored claims are blocked before a summary is shared. + diff --git a/evidence-anchored-summarizer/index.js b/evidence-anchored-summarizer/index.js new file mode 100644 index 0000000..8418c50 --- /dev/null +++ b/evidence-anchored-summarizer/index.js @@ -0,0 +1,144 @@ +"use strict" + +const crypto = require("node:crypto") + +const MODES = new Set(["abstract", "executive", "layperson"]) +const REQUIRED_SECTIONS = new Set(["objective", "methods", "results"]) + +function stableStringify(value) { + if (Array.isArray(value)) return `[${value.map(stableStringify).join(",")}]` + if (value && typeof value === "object") { + return `{${Object.keys(value) + .sort() + .map((key) => `${JSON.stringify(key)}:${stableStringify(value[key])}`) + .join(",")}}` + } + return JSON.stringify(value) +} + +function digest(value) { + return crypto.createHash("sha256").update(stableStringify(value)).digest("hex") +} + +function normalizeMode(mode) { + return MODES.has(mode) ? mode : "abstract" +} + +function splitSentences(text) { + return String(text || "") + .replace(/\s+/g, " ") + .split(/(?<=[.!?])\s+/) + .map((sentence) => sentence.trim()) + .filter(Boolean) +} + +function evidenceById(evidence) { + return new Map((evidence || []).map((item) => [item.id, item])) +} + +function claimScore(claim, evidenceMap) { + const anchors = claim.evidenceIds || [] + const supported = anchors.filter((id) => evidenceMap.has(id)) + const requiredBonus = REQUIRED_SECTIONS.has(claim.section) ? 2 : 0 + const findingBonus = claim.kind === "finding" ? 2 : claim.kind === "method" ? 1 : 0 + return supported.length * 3 + requiredBonus + findingBonus +} + +function summarizeClaim(claim, mode) { + const sentence = splitSentences(claim.text)[0] || claim.text || "No claim text provided." + if (mode === "layperson") { + return sentence + .replace(/\bp\s*[<=>]\s*0\.\d+\b/gi, "a statistical signal") + .replace(/\bconfidence interval\b/gi, "uncertainty range") + .replace(/\bregression\b/gi, "trend analysis") + } + if (mode === "executive") { + return `${claim.section || "study"}: ${sentence}` + } + return sentence +} + +function buildSummary(input) { + const mode = normalizeMode(input.mode) + const evidenceMap = evidenceById(input.evidence) + const claims = [...(input.claims || [])] + .map((claim) => ({ + ...claim, + supportedEvidenceIds: (claim.evidenceIds || []).filter((id) => evidenceMap.has(id)), + missingEvidenceIds: (claim.evidenceIds || []).filter((id) => !evidenceMap.has(id)), + })) + .sort((a, b) => claimScore(b, evidenceMap) - claimScore(a, evidenceMap)) + + const blockers = [] + const warnings = [] + const sectionsCovered = new Set(claims.map((claim) => claim.section)) + for (const required of REQUIRED_SECTIONS) { + if (!sectionsCovered.has(required)) warnings.push(`missing ${required} claim in summary input`) + } + for (const claim of claims) { + if ((claim.evidenceIds || []).length === 0) { + blockers.push(`claim ${claim.id} has no evidence anchors`) + } + if (claim.missingEvidenceIds.length > 0) { + blockers.push(`claim ${claim.id} references missing evidence: ${claim.missingEvidenceIds.join(", ")}`) + } + } + + const selectedClaims = claims.slice(0, input.maxClaims || 5) + const bullets = selectedClaims.map((claim) => ({ + claimId: claim.id, + section: claim.section, + text: summarizeClaim(claim, mode), + evidenceIds: claim.supportedEvidenceIds, + })) + + const implications = selectedClaims + .filter((claim) => claim.kind === "finding" || claim.kind === "limitation") + .slice(0, 3) + .map((claim) => ({ + claimId: claim.id, + text: + claim.kind === "limitation" + ? `Treat ${claim.section || "this result"} cautiously until the limitation is resolved.` + : `Use ${claim.section || "this finding"} as a candidate next-step signal.`, + })) + + const status = blockers.length > 0 ? "blocked" : warnings.length > 0 ? "held" : "ready" + const summary = { + sourceId: input.sourceId || null, + title: input.title || "Untitled research source", + mode, + status, + headline: + mode === "layperson" + ? "Plain-language summary with evidence links" + : mode === "executive" + ? "Decision-ready research summary" + : "Evidence-anchored abstract summary", + bullets, + implications, + nextSteps: [ + "Review all cited evidence anchors before sharing externally.", + "Resolve blockers before treating the summary as publication-ready.", + "Regenerate after manuscript or dataset revisions.", + ], + blockers, + warnings, + } + + return { + ...summary, + evidenceDigest: digest({ + sourceId: summary.sourceId, + title: summary.title, + bullets, + implications, + blockers, + warnings, + }), + } +} + +module.exports = { + buildSummary, +} diff --git a/evidence-anchored-summarizer/requirements-map.md b/evidence-anchored-summarizer/requirements-map.md new file mode 100644 index 0000000..8e66462 --- /dev/null +++ b/evidence-anchored-summarizer/requirements-map.md @@ -0,0 +1,14 @@ +# Requirements Map + +| Issue #13 requirement | Coverage in this module | +| --- | --- | +| Generate concise summaries of project repositories, preprints, or uploaded PDFs | Builds compact summaries from structured research claims and source metadata. | +| Summarization modes | Supports abstract, executive, and layperson modes. | +| Auto-generate key findings, implications, and next steps | Produces ranked bullets, implications, and next-step guidance. | +| Save time reading new literature | Sorts claims by section importance and evidence support. | +| Generate overviews for collaborators, funders, or journal editors | Executive and layperson modes produce audience-specific phrasing. | +| Raise quality and reproducibility | Blocks unanchored or missing evidence claims and emits an evidence digest. | + +## Non-Overlap Note + +This submission is distinct from broad AI-assisted tool suites, citation provenance, ethics/data availability, statistical consistency, methods reproducibility redlines, figure/table auditors, protocol deviation screeners, novelty overlap, and AI output evidence verifiers. It focuses specifically on multi-audience paper summarization with evidence anchors. diff --git a/evidence-anchored-summarizer/test.js b/evidence-anchored-summarizer/test.js new file mode 100644 index 0000000..d75f661 --- /dev/null +++ b/evidence-anchored-summarizer/test.js @@ -0,0 +1,105 @@ +"use strict" + +const assert = require("node:assert/strict") +const { buildSummary } = require("./index") + +const input = { + sourceId: "preprint-42", + title: "Adaptive Sequencing for Rare Variant Detection", + mode: "executive", + evidence: [ + { id: "ev-objective", source: "abstract", locator: "sentence 1" }, + { id: "ev-methods", source: "methods", locator: "paragraph 3" }, + { id: "ev-results", source: "results", locator: "figure 2" }, + { id: "ev-limits", source: "discussion", locator: "paragraph 5" }, + ], + claims: [ + { + id: "claim-objective", + section: "objective", + kind: "context", + text: "The study evaluates adaptive sequencing for rare variant detection.", + evidenceIds: ["ev-objective"], + }, + { + id: "claim-method", + section: "methods", + kind: "method", + text: "The pipeline compares targeted adaptive reads against baseline whole-genome sampling.", + evidenceIds: ["ev-methods"], + }, + { + id: "claim-result", + section: "results", + kind: "finding", + text: "Adaptive sequencing improves recall for low-frequency variants while reducing total reads.", + evidenceIds: ["ev-results"], + }, + { + id: "claim-limit", + section: "limitations", + kind: "limitation", + text: "The benchmark is limited to synthetic mixtures and needs external cohort validation.", + evidenceIds: ["ev-limits"], + }, + ], +} + +{ + const summary = buildSummary(input) + assert.equal(summary.status, "ready") + assert.equal(summary.mode, "executive") + assert.equal(summary.bullets.length, 4) + assert.equal(summary.bullets[0].claimId, "claim-result") + assert.deepEqual(summary.bullets[0].evidenceIds, ["ev-results"]) + assert.match(summary.evidenceDigest, /^[0-9a-f]{64}$/) +} + +{ + const summary = buildSummary({ + ...input, + mode: "layperson", + claims: [ + ...input.claims, + { + id: "claim-stat", + section: "results", + kind: "finding", + text: "The p < 0.05 regression result has a narrow confidence interval.", + evidenceIds: ["ev-results"], + }, + ], + }) + const statBullet = summary.bullets.find((bullet) => bullet.claimId === "claim-stat") + assert.ok(statBullet.text.includes("statistical signal")) + assert.ok(statBullet.text.includes("trend analysis")) + assert.ok(statBullet.text.includes("uncertainty range")) +} + +{ + const summary = buildSummary({ + ...input, + claims: [ + { + id: "claim-unanchored", + section: "results", + kind: "finding", + text: "Unsupported claim.", + evidenceIds: [], + }, + { + id: "claim-missing", + section: "methods", + kind: "method", + text: "Missing evidence claim.", + evidenceIds: ["does-not-exist"], + }, + ], + }) + assert.equal(summary.status, "blocked") + assert.ok(summary.blockers.includes("claim claim-unanchored has no evidence anchors")) + assert.ok(summary.blockers.includes("claim claim-missing references missing evidence: does-not-exist")) + assert.ok(summary.warnings.includes("missing objective claim in summary input")) +} + +console.log("evidence-anchored-summarizer tests passed")