diff --git a/scientific-roundtrip-fidelity-checker/README.md b/scientific-roundtrip-fidelity-checker/README.md new file mode 100644 index 0000000..a452f84 --- /dev/null +++ b/scientific-roundtrip-fidelity-checker/README.md @@ -0,0 +1,28 @@ +# Scientific Round-Trip Fidelity Checker + +This module is a focused slice for SCIBASE.AI issue #12, Real-Time Collaborative Editor. + +It verifies that scientific documents can move through Markdown, LaTeX, and notebook export/import paths without silently losing publication-critical context. The sample data is synthetic and dependency-free. + +## What It Covers + +- Equation LaTeX, labels, and citation keys. +- Figure and table anchors, captions, source assets, and table columns. +- Jupyter-style cell IDs, languages, source hashes, output hashes, and execution counts. +- Inline comments and unresolved suggestions that must survive review export. +- Cross-reference integrity across figures, tables, equations, and notebook cells. +- Reviewer repair queues, JSON audit packets, Markdown review packets, SVG summaries, and a short MP4 demo artifact. + +## Run + +```bash +node scientific-roundtrip-fidelity-checker/test.js +node scientific-roundtrip-fidelity-checker/demo.js +``` + +Demo output is written to: + +- `reports/roundtrip-audit.json` +- `reports/review-packet.md` +- `reports/roundtrip-fidelity.svg` +- `reports/demo.mp4` diff --git a/scientific-roundtrip-fidelity-checker/demo.js b/scientific-roundtrip-fidelity-checker/demo.js new file mode 100644 index 0000000..6ea3b87 --- /dev/null +++ b/scientific-roundtrip-fidelity-checker/demo.js @@ -0,0 +1,25 @@ +const fs = require("node:fs"); +const path = require("node:path"); +const data = require("./sample-data"); +const { renderMarkdown, renderSvg, runRoundTripAudit } = require("./index"); + +const report = runRoundTripAudit(data); +const reportsDir = path.join(__dirname, "reports"); +fs.mkdirSync(reportsDir, { recursive: true }); +fs.writeFileSync(path.join(reportsDir, "roundtrip-audit.json"), JSON.stringify(report, null, 2)); +fs.writeFileSync(path.join(reportsDir, "review-packet.md"), renderMarkdown(report)); +fs.writeFileSync(path.join(reportsDir, "roundtrip-fidelity.svg"), renderSvg(report)); + +console.log(JSON.stringify({ + decision: report.summary.releaseDecision, + averageScore: report.summary.averageScore, + totalFindings: report.summary.totalFindings, + weakestFormats: report.summary.weakestFormats, + auditDigest: report.auditDigest, + reports: [ + "reports/roundtrip-audit.json", + "reports/review-packet.md", + "reports/roundtrip-fidelity.svg", + "reports/demo.mp4" + ] +}, null, 2)); diff --git a/scientific-roundtrip-fidelity-checker/index.js b/scientific-roundtrip-fidelity-checker/index.js new file mode 100644 index 0000000..6fbac7e --- /dev/null +++ b/scientific-roundtrip-fidelity-checker/index.js @@ -0,0 +1,279 @@ +const crypto = require("node:crypto"); + +const SEVERITY_WEIGHTS = { + blocker: 30, + high: 20, + medium: 10, + low: 4 +}; + +function arrayDiff(expected = [], actual = []) { + const actualSet = new Set(actual); + return expected.filter((item) => !actualSet.has(item)); +} + +function stableStringify(value) { + if (Array.isArray(value)) return `[${value.map(stableStringify).join(",")}]`; + if (value && typeof value === "object") { + return `{${Object.keys(value).sort().map((key) => `${JSON.stringify(key)}:${stableStringify(value[key])}`).join(",")}}`; + } + return JSON.stringify(value); +} + +function digest(value) { + return crypto.createHash("sha256").update(stableStringify(value)).digest("hex"); +} + +function addFinding(findings, severity, code, message, task, blockId = null) { + findings.push({ severity, code, message, task, blockId }); +} + +function compareScalar(findings, block, roundTripBlock, field, severity, code, label) { + if (block[field] !== roundTripBlock[field]) { + addFinding( + findings, + severity, + code, + `${label} changed for ${block.id}: expected ${JSON.stringify(block[field])}, got ${JSON.stringify(roundTripBlock[field])}.`, + `Regenerate the export/import adapter so ${label.toLowerCase()} survives round trip for ${block.id}.`, + block.id + ); + } +} + +function compareArray(findings, block, roundTripBlock, field, severity, code, label) { + const missing = arrayDiff(block[field], roundTripBlock[field]); + if (missing.length) { + addFinding( + findings, + severity, + code, + `${label} missing after round trip for ${block.id}: ${missing.join(", ")}.`, + `Preserve ${label.toLowerCase()} metadata for ${block.id} in this format.`, + block.id + ); + } +} + +function compareBlock(canonicalBlock, roundTripBlock, format) { + const findings = []; + if (!roundTripBlock) { + addFinding( + findings, + "blocker", + "block-dropped", + `${format} round trip dropped required block ${canonicalBlock.id}.`, + `Restore ${canonicalBlock.id} during ${format} import before enabling publication export.`, + canonicalBlock.id + ); + return findings; + } + + compareScalar(findings, canonicalBlock, roundTripBlock, "type", "blocker", "block-type-changed", "Block type"); + compareScalar(findings, canonicalBlock, roundTripBlock, "label", "high", "label-changed", "Anchor label"); + compareArray(findings, canonicalBlock, roundTripBlock, "references", "medium", "citation-key-lost", "Citation keys"); + compareArray(findings, canonicalBlock, roundTripBlock, "comments", "medium", "comment-thread-lost", "Comment threads"); + compareArray(findings, canonicalBlock, roundTripBlock, "suggestions", "medium", "suggestion-lost", "Unresolved suggestions"); + + if (canonicalBlock.type === "equation") { + compareScalar(findings, canonicalBlock, roundTripBlock, "latex", "high", "equation-mutated", "Equation LaTeX"); + } + + if (canonicalBlock.type === "figure") { + compareScalar(findings, canonicalBlock, roundTripBlock, "caption", "medium", "caption-mutated", "Figure caption"); + if (canonicalBlock.sourceAsset !== roundTripBlock.sourceAsset) { + addFinding( + findings, + "low", + "source-asset-transcoded", + `${canonicalBlock.id} source asset changed from ${canonicalBlock.sourceAsset} to ${roundTripBlock.sourceAsset}.`, + "Confirm the transcoded figure keeps resolution, accessibility metadata, and citation anchors.", + canonicalBlock.id + ); + } + } + + if (canonicalBlock.type === "table") { + compareScalar(findings, canonicalBlock, roundTripBlock, "caption", "medium", "table-caption-mutated", "Table caption"); + compareArray(findings, canonicalBlock, roundTripBlock, "columns", "high", "table-column-lost", "Table columns"); + } + + if (canonicalBlock.type === "notebook-cell") { + compareScalar(findings, canonicalBlock, roundTripBlock, "language", "high", "cell-language-changed", "Notebook language"); + compareScalar(findings, canonicalBlock, roundTripBlock, "sourceHash", "high", "cell-source-mutated", "Notebook source hash"); + compareScalar(findings, canonicalBlock, roundTripBlock, "outputHash", "high", "cell-output-mutated", "Notebook output hash"); + compareScalar(findings, canonicalBlock, roundTripBlock, "executionCount", "medium", "cell-execution-count-drift", "Execution count"); + } + + return findings; +} + +function requiredBlocksForFormat(document, format) { + return document.blocks.filter((block) => block.requiredFormats.includes(format)); +} + +function crossReferenceKey(ref) { + return `${ref.from}->${ref.to}:${ref.kind}`; +} + +function compareCrossReferences(document, roundTrip) { + const actual = new Set((roundTrip.crossReferences || []).map(crossReferenceKey)); + return document.crossReferences + .filter((ref) => { + const fromBlock = document.blocks.find((block) => block.id === ref.from); + const toBlock = document.blocks.find((block) => block.id === ref.to); + return fromBlock?.requiredFormats.includes(roundTrip.format) || toBlock?.requiredFormats.includes(roundTrip.format); + }) + .filter((ref) => !actual.has(crossReferenceKey(ref))) + .map((ref) => ({ + severity: "high", + code: "cross-reference-lost", + message: `${roundTrip.format} round trip lost ${ref.kind} link ${ref.from} -> ${ref.to}.`, + task: `Preserve cross-reference ${ref.from} -> ${ref.to} during ${roundTrip.format} import.`, + blockId: ref.from + })); +} + +function score(findings) { + const penalty = findings.reduce((total, finding) => total + SEVERITY_WEIGHTS[finding.severity], 0); + return Math.max(0, 100 - penalty); +} + +function decisionFor(findings, fidelityScore) { + if (findings.some((finding) => finding.severity === "blocker")) return "block-export"; + if (fidelityScore < 70) return "repair-before-submit"; + if (fidelityScore < 90) return "review-warnings"; + return "roundtrip-ready"; +} + +function evaluateRoundTrip(document, roundTrip) { + const blockMap = new Map(roundTrip.blocks.map((block) => [block.id, block])); + const findings = []; + for (const block of requiredBlocksForFormat(document, roundTrip.format)) { + findings.push(...compareBlock(block, blockMap.get(block.id), roundTrip.format)); + } + findings.push(...compareCrossReferences(document, roundTrip)); + const fidelityScore = score(findings); + return { + format: roundTrip.format, + exporter: roundTrip.exporter, + importer: roundTrip.importer, + timestamp: roundTrip.timestamp, + fidelityScore, + decision: decisionFor(findings, fidelityScore), + findingCount: findings.length, + findings, + repairedBy: findings.map((finding) => finding.task) + }; +} + +function summarize(formatReports) { + const allFindings = formatReports.flatMap((report) => report.findings); + const countsBySeverity = allFindings.reduce((counts, finding) => { + counts[finding.severity] = (counts[finding.severity] || 0) + 1; + return counts; + }, {}); + const averageScore = Math.round(formatReports.reduce((total, report) => total + report.fidelityScore, 0) / formatReports.length); + const releaseDecision = formatReports.some((report) => report.decision === "block-export") + ? "block-export" + : averageScore < 80 + ? "repair-before-submit" + : "roundtrip-ready"; + return { + averageScore, + releaseDecision, + totalFindings: allFindings.length, + countsBySeverity, + weakestFormats: formatReports + .slice() + .sort((a, b) => a.fidelityScore - b.fidelityScore) + .map((report) => ({ + format: report.format, + fidelityScore: report.fidelityScore, + decision: report.decision + })) + }; +} + +function runRoundTripAudit({ canonicalDocument, roundTripExports }) { + const formatReports = roundTripExports.map((roundTrip) => evaluateRoundTrip(canonicalDocument, roundTrip)); + const packet = { + document: { + id: canonicalDocument.id, + title: canonicalDocument.title, + version: canonicalDocument.version, + publicationStyle: canonicalDocument.publicationStyle + }, + checker: "scientific-roundtrip-fidelity-checker", + summary: summarize(formatReports), + formatReports + }; + return { + ...packet, + auditDigest: digest(packet) + }; +} + +function renderMarkdown(report) { + const lines = [ + `# Round-Trip Fidelity Review: ${report.document.title}`, + "", + `Decision: **${report.summary.releaseDecision}**`, + `Average fidelity: **${report.summary.averageScore}**`, + `Total findings: **${report.summary.totalFindings}**`, + "", + "## Formats", + ...report.summary.weakestFormats.map((item) => `- ${item.format}: ${item.decision} (${item.fidelityScore})`), + "", + "## Repair Queue" + ]; + for (const formatReport of report.formatReports) { + lines.push("", `### ${formatReport.format}`, `Score: ${formatReport.fidelityScore}`, `Decision: ${formatReport.decision}`); + if (!formatReport.findings.length) { + lines.push("- No fidelity loss detected."); + } else { + for (const finding of formatReport.findings) { + lines.push(`- [${finding.severity}] ${finding.message}`); + lines.push(` Task: ${finding.task}`); + } + } + } + lines.push("", `Audit digest: \`${report.auditDigest}\``); + return `${lines.join("\n")}\n`; +} + +function renderSvg(report) { + const width = 980; + const rowHeight = 94; + const height = 160 + report.formatReports.length * rowHeight; + const rows = report.formatReports.map((formatReport, index) => { + const y = 140 + index * rowHeight; + const barWidth = Math.max(10, formatReport.fidelityScore * 5.2); + const color = formatReport.fidelityScore >= 90 ? "#2f855a" : formatReport.fidelityScore >= 70 ? "#b7791f" : "#c53030"; + return [ + `${formatReport.format}`, + ``, + ``, + `${formatReport.decision} (${formatReport.fidelityScore})`, + `${formatReport.findingCount} finding(s)` + ].join(""); + }).join(""); + return [ + ``, + ``, + ``, + `Scientific Round-Trip Fidelity Checker`, + `Decision: ${report.summary.releaseDecision} | Avg ${report.summary.averageScore} | Audit ${report.auditDigest.slice(0, 12)}`, + rows, + `` + ].join(""); +} + +module.exports = { + runRoundTripAudit, + evaluateRoundTrip, + compareBlock, + renderMarkdown, + renderSvg, + digest +}; diff --git a/scientific-roundtrip-fidelity-checker/reports/demo.mp4 b/scientific-roundtrip-fidelity-checker/reports/demo.mp4 new file mode 100644 index 0000000..eb1e232 Binary files /dev/null and b/scientific-roundtrip-fidelity-checker/reports/demo.mp4 differ diff --git a/scientific-roundtrip-fidelity-checker/reports/review-packet.md b/scientific-roundtrip-fidelity-checker/reports/review-packet.md new file mode 100644 index 0000000..90a06cb --- /dev/null +++ b/scientific-roundtrip-fidelity-checker/reports/review-packet.md @@ -0,0 +1,54 @@ +# Round-Trip Fidelity Review: Hybrid Bayesian Imaging Biomarkers + +Decision: **repair-before-submit** +Average fidelity: **35** +Total findings: **14** + +## Formats +- notebook: repair-before-submit (10) +- latex: repair-before-submit (36) +- markdown: repair-before-submit (60) + +## Repair Queue + +### markdown +Score: 60 +Decision: repair-before-submit +- [medium] Citation keys missing after round trip for table-cohorts: harmonization-lockfile. + Task: Preserve citation keys metadata for table-cohorts in this format. +- [medium] Unresolved suggestions missing after round trip for table-cohorts: add missing data footnote. + Task: Preserve unresolved suggestions metadata for table-cohorts in this format. +- [high] markdown round trip lost produces-summary link cell-harmonize -> table-cohorts. + Task: Preserve cross-reference cell-harmonize -> table-cohorts during markdown import. + +### latex +Score: 36 +Decision: repair-before-submit +- [medium] Citation keys missing after round trip for intro-eq-1: nguyen2025-imaging. + Task: Preserve citation keys metadata for intro-eq-1 in this format. +- [medium] Comment threads missing after round trip for intro-eq-1: resolve prior sensitivity note. + Task: Preserve comment threads metadata for intro-eq-1 in this format. +- [high] Equation LaTeX changed for intro-eq-1: expected "p(\\theta | y) \\propto p(y | \\theta)p(\\theta)", got "p(\\theta | y) = p(y | \\theta)p(\\theta)". + Task: Regenerate the export/import adapter so equation latex survives round trip for intro-eq-1. +- [low] fig-signal source asset changed from figures/signal-panel.svg to figures/signal-panel.pdf. + Task: Confirm the transcoded figure keeps resolution, accessibility metadata, and citation anchors. +- [high] Table columns missing after round trip for table-cohorts: median_age. + Task: Preserve table columns metadata for table-cohorts in this format. + +### notebook +Score: 10 +Decision: repair-before-submit +- [medium] Unresolved suggestions missing after round trip for intro-eq-1: explain weakly informative prior. + Task: Preserve unresolved suggestions metadata for intro-eq-1 in this format. +- [medium] Comment threads missing after round trip for fig-signal: confirm color-safe palette. + Task: Preserve comment threads metadata for fig-signal in this format. +- [high] Notebook output hash changed for cell-harmonize: expected "sha256:cell-output-harmonize", got "sha256:cell-output-stale". + Task: Regenerate the export/import adapter so notebook output hash survives round trip for cell-harmonize. +- [medium] Execution count changed for cell-harmonize: expected 18, got 12. + Task: Regenerate the export/import adapter so execution count survives round trip for cell-harmonize. +- [high] notebook round trip lost produces-summary link cell-harmonize -> table-cohorts. + Task: Preserve cross-reference cell-harmonize -> table-cohorts during notebook import. +- [high] notebook round trip lost shared-cohort link table-cohorts -> fig-signal. + Task: Preserve cross-reference table-cohorts -> fig-signal during notebook import. + +Audit digest: `da45ce37c71f4a4f7ffcaaca096fc93a959517aaee096c7bc15f3346ed557599` diff --git a/scientific-roundtrip-fidelity-checker/reports/roundtrip-audit.json b/scientific-roundtrip-fidelity-checker/reports/roundtrip-audit.json new file mode 100644 index 0000000..eab9b59 --- /dev/null +++ b/scientific-roundtrip-fidelity-checker/reports/roundtrip-audit.json @@ -0,0 +1,190 @@ +{ + "document": { + "id": "collab-ms-127", + "title": "Hybrid Bayesian Imaging Biomarkers", + "version": "v4.2.0", + "publicationStyle": "nature" + }, + "checker": "scientific-roundtrip-fidelity-checker", + "summary": { + "averageScore": 35, + "releaseDecision": "repair-before-submit", + "totalFindings": 14, + "countsBySeverity": { + "medium": 7, + "high": 6, + "low": 1 + }, + "weakestFormats": [ + { + "format": "notebook", + "fidelityScore": 10, + "decision": "repair-before-submit" + }, + { + "format": "latex", + "fidelityScore": 36, + "decision": "repair-before-submit" + }, + { + "format": "markdown", + "fidelityScore": 60, + "decision": "repair-before-submit" + } + ] + }, + "formatReports": [ + { + "format": "markdown", + "exporter": "collab-md-v2", + "importer": "collab-md-v2", + "timestamp": "2026-05-20T09:40:00Z", + "fidelityScore": 60, + "decision": "repair-before-submit", + "findingCount": 3, + "findings": [ + { + "severity": "medium", + "code": "citation-key-lost", + "message": "Citation keys missing after round trip for table-cohorts: harmonization-lockfile.", + "task": "Preserve citation keys metadata for table-cohorts in this format.", + "blockId": "table-cohorts" + }, + { + "severity": "medium", + "code": "suggestion-lost", + "message": "Unresolved suggestions missing after round trip for table-cohorts: add missing data footnote.", + "task": "Preserve unresolved suggestions metadata for table-cohorts in this format.", + "blockId": "table-cohorts" + }, + { + "severity": "high", + "code": "cross-reference-lost", + "message": "markdown round trip lost produces-summary link cell-harmonize -> table-cohorts.", + "task": "Preserve cross-reference cell-harmonize -> table-cohorts during markdown import.", + "blockId": "cell-harmonize" + } + ], + "repairedBy": [ + "Preserve citation keys metadata for table-cohorts in this format.", + "Preserve unresolved suggestions metadata for table-cohorts in this format.", + "Preserve cross-reference cell-harmonize -> table-cohorts during markdown import." + ] + }, + { + "format": "latex", + "exporter": "collab-tex-v1", + "importer": "collab-tex-v1", + "timestamp": "2026-05-20T09:42:00Z", + "fidelityScore": 36, + "decision": "repair-before-submit", + "findingCount": 5, + "findings": [ + { + "severity": "medium", + "code": "citation-key-lost", + "message": "Citation keys missing after round trip for intro-eq-1: nguyen2025-imaging.", + "task": "Preserve citation keys metadata for intro-eq-1 in this format.", + "blockId": "intro-eq-1" + }, + { + "severity": "medium", + "code": "comment-thread-lost", + "message": "Comment threads missing after round trip for intro-eq-1: resolve prior sensitivity note.", + "task": "Preserve comment threads metadata for intro-eq-1 in this format.", + "blockId": "intro-eq-1" + }, + { + "severity": "high", + "code": "equation-mutated", + "message": "Equation LaTeX changed for intro-eq-1: expected \"p(\\\\theta | y) \\\\propto p(y | \\\\theta)p(\\\\theta)\", got \"p(\\\\theta | y) = p(y | \\\\theta)p(\\\\theta)\".", + "task": "Regenerate the export/import adapter so equation latex survives round trip for intro-eq-1.", + "blockId": "intro-eq-1" + }, + { + "severity": "low", + "code": "source-asset-transcoded", + "message": "fig-signal source asset changed from figures/signal-panel.svg to figures/signal-panel.pdf.", + "task": "Confirm the transcoded figure keeps resolution, accessibility metadata, and citation anchors.", + "blockId": "fig-signal" + }, + { + "severity": "high", + "code": "table-column-lost", + "message": "Table columns missing after round trip for table-cohorts: median_age.", + "task": "Preserve table columns metadata for table-cohorts in this format.", + "blockId": "table-cohorts" + } + ], + "repairedBy": [ + "Preserve citation keys metadata for intro-eq-1 in this format.", + "Preserve comment threads metadata for intro-eq-1 in this format.", + "Regenerate the export/import adapter so equation latex survives round trip for intro-eq-1.", + "Confirm the transcoded figure keeps resolution, accessibility metadata, and citation anchors.", + "Preserve table columns metadata for table-cohorts in this format." + ] + }, + { + "format": "notebook", + "exporter": "collab-ipynb-v3", + "importer": "collab-ipynb-v3", + "timestamp": "2026-05-20T09:44:00Z", + "fidelityScore": 10, + "decision": "repair-before-submit", + "findingCount": 6, + "findings": [ + { + "severity": "medium", + "code": "suggestion-lost", + "message": "Unresolved suggestions missing after round trip for intro-eq-1: explain weakly informative prior.", + "task": "Preserve unresolved suggestions metadata for intro-eq-1 in this format.", + "blockId": "intro-eq-1" + }, + { + "severity": "medium", + "code": "comment-thread-lost", + "message": "Comment threads missing after round trip for fig-signal: confirm color-safe palette.", + "task": "Preserve comment threads metadata for fig-signal in this format.", + "blockId": "fig-signal" + }, + { + "severity": "high", + "code": "cell-output-mutated", + "message": "Notebook output hash changed for cell-harmonize: expected \"sha256:cell-output-harmonize\", got \"sha256:cell-output-stale\".", + "task": "Regenerate the export/import adapter so notebook output hash survives round trip for cell-harmonize.", + "blockId": "cell-harmonize" + }, + { + "severity": "medium", + "code": "cell-execution-count-drift", + "message": "Execution count changed for cell-harmonize: expected 18, got 12.", + "task": "Regenerate the export/import adapter so execution count survives round trip for cell-harmonize.", + "blockId": "cell-harmonize" + }, + { + "severity": "high", + "code": "cross-reference-lost", + "message": "notebook round trip lost produces-summary link cell-harmonize -> table-cohorts.", + "task": "Preserve cross-reference cell-harmonize -> table-cohorts during notebook import.", + "blockId": "cell-harmonize" + }, + { + "severity": "high", + "code": "cross-reference-lost", + "message": "notebook round trip lost shared-cohort link table-cohorts -> fig-signal.", + "task": "Preserve cross-reference table-cohorts -> fig-signal during notebook import.", + "blockId": "table-cohorts" + } + ], + "repairedBy": [ + "Preserve unresolved suggestions metadata for intro-eq-1 in this format.", + "Preserve comment threads metadata for fig-signal in this format.", + "Regenerate the export/import adapter so notebook output hash survives round trip for cell-harmonize.", + "Regenerate the export/import adapter so execution count survives round trip for cell-harmonize.", + "Preserve cross-reference cell-harmonize -> table-cohorts during notebook import.", + "Preserve cross-reference table-cohorts -> fig-signal during notebook import." + ] + } + ], + "auditDigest": "da45ce37c71f4a4f7ffcaaca096fc93a959517aaee096c7bc15f3346ed557599" +} \ No newline at end of file diff --git a/scientific-roundtrip-fidelity-checker/reports/roundtrip-fidelity.svg b/scientific-roundtrip-fidelity-checker/reports/roundtrip-fidelity.svg new file mode 100644 index 0000000..ad64b6b --- /dev/null +++ b/scientific-roundtrip-fidelity-checker/reports/roundtrip-fidelity.svg @@ -0,0 +1 @@ +Scientific Round-Trip Fidelity CheckerDecision: repair-before-submit | Avg 35 | Audit da45ce37c71fmarkdownrepair-before-submit (60)3 finding(s)latexrepair-before-submit (36)5 finding(s)notebookrepair-before-submit (10)6 finding(s) \ No newline at end of file diff --git a/scientific-roundtrip-fidelity-checker/requirements-map.md b/scientific-roundtrip-fidelity-checker/requirements-map.md new file mode 100644 index 0000000..a94d9ac --- /dev/null +++ b/scientific-roundtrip-fidelity-checker/requirements-map.md @@ -0,0 +1,17 @@ +# Requirements Map + +Source: SCIBASE.AI issue #12, Real-Time Collaborative Editor. + +| Requirement | Implementation | +| --- | --- | +| Markdown and LaTeX support | The checker compares canonical scientific blocks against Markdown and LaTeX round-trip snapshots. | +| Inline equations | Equation LaTeX and anchor labels are validated for mutation and loss. | +| Cross-referencing figures, tables, and citations | Figure/table/equation labels, citation keys, and cross-reference edges are checked. | +| Reference manager integration | Citation keys are preserved as structured metadata across export/import paths. | +| Jupyter notebook integration | Notebook-cell language, source hash, output hash, and execution count are compared. | +| Output display preservation | Notebook output hashes are treated as high-severity fidelity evidence. | +| Inline comments and suggestions | Comment threads and unresolved suggestions are checked for loss in each format. | +| Version history and autosave compatibility | The audit includes document version metadata and deterministic digests for review history. | +| Publication export readiness | The summary returns block-export, repair-before-submit, review-warnings, or ready decisions. | +| Reviewer-facing artifacts | Demo emits JSON, Markdown, SVG, and MP4 artifacts under `reports/`. | +| Local verification | `test.js` covers format decisions, dropped blocks, equation mutation, citation loss, table column loss, notebook output drift, digest stability, Markdown output, and SVG output. | diff --git a/scientific-roundtrip-fidelity-checker/sample-data.js b/scientific-roundtrip-fidelity-checker/sample-data.js new file mode 100644 index 0000000..941dddb --- /dev/null +++ b/scientific-roundtrip-fidelity-checker/sample-data.js @@ -0,0 +1,192 @@ +const canonicalDocument = { + id: "collab-ms-127", + title: "Hybrid Bayesian Imaging Biomarkers", + version: "v4.2.0", + publicationStyle: "nature", + blocks: [ + { + id: "intro-eq-1", + type: "equation", + label: "eq:posterior-update", + latex: "p(\\theta | y) \\propto p(y | \\theta)p(\\theta)", + inlineMarkdown: "$p(\\theta | y) \\propto p(y | \\theta)p(\\theta)$", + references: ["smith2024-bayes", "nguyen2025-imaging"], + comments: ["resolve prior sensitivity note"], + suggestions: ["explain weakly informative prior"], + requiredFormats: ["markdown", "latex", "notebook"] + }, + { + id: "fig-signal", + type: "figure", + label: "fig:signal-panel", + caption: "Posterior signal trajectories by scanner cohort.", + sourceAsset: "figures/signal-panel.svg", + references: ["cohort-registry"], + comments: ["confirm color-safe palette"], + suggestions: [], + requiredFormats: ["markdown", "latex", "notebook"] + }, + { + id: "table-cohorts", + type: "table", + label: "tab:cohort-balance", + caption: "Cohort balance after scanner harmonization.", + columns: ["cohort", "n", "median_age", "scanner_family"], + references: ["cohort-registry", "harmonization-lockfile"], + comments: [], + suggestions: ["add missing data footnote"], + requiredFormats: ["markdown", "latex"] + }, + { + id: "cell-harmonize", + type: "notebook-cell", + label: "cell:harmonize", + language: "python", + sourceHash: "sha256:cell-source-harmonize", + outputHash: "sha256:cell-output-harmonize", + executionCount: 18, + references: ["harmonization-lockfile"], + comments: ["rerun after cohort freeze"], + suggestions: [], + requiredFormats: ["notebook"] + } + ], + crossReferences: [ + { from: "intro-eq-1", to: "fig-signal", kind: "supports-claim" }, + { from: "cell-harmonize", to: "table-cohorts", kind: "produces-summary" }, + { from: "table-cohorts", to: "fig-signal", kind: "shared-cohort" } + ] +}; + +const roundTripExports = [ + { + format: "markdown", + exporter: "collab-md-v2", + importer: "collab-md-v2", + timestamp: "2026-05-20T09:40:00Z", + blocks: [ + { + id: "intro-eq-1", + type: "equation", + label: "eq:posterior-update", + latex: "p(\\theta | y) \\propto p(y | \\theta)p(\\theta)", + references: ["smith2024-bayes", "nguyen2025-imaging"], + comments: ["resolve prior sensitivity note"], + suggestions: ["explain weakly informative prior"] + }, + { + id: "fig-signal", + type: "figure", + label: "fig:signal-panel", + caption: "Posterior signal trajectories by scanner cohort.", + sourceAsset: "figures/signal-panel.svg", + references: ["cohort-registry"], + comments: ["confirm color-safe palette"], + suggestions: [] + }, + { + id: "table-cohorts", + type: "table", + label: "tab:cohort-balance", + caption: "Cohort balance after scanner harmonization.", + columns: ["cohort", "n", "median_age", "scanner_family"], + references: ["cohort-registry"], + comments: [], + suggestions: [] + } + ], + crossReferences: [ + { from: "intro-eq-1", to: "fig-signal", kind: "supports-claim" }, + { from: "table-cohorts", to: "fig-signal", kind: "shared-cohort" } + ] + }, + { + format: "latex", + exporter: "collab-tex-v1", + importer: "collab-tex-v1", + timestamp: "2026-05-20T09:42:00Z", + blocks: [ + { + id: "intro-eq-1", + type: "equation", + label: "eq:posterior-update", + latex: "p(\\theta | y) = p(y | \\theta)p(\\theta)", + references: ["smith2024-bayes"], + comments: [], + suggestions: ["explain weakly informative prior"] + }, + { + id: "fig-signal", + type: "figure", + label: "fig:signal-panel", + caption: "Posterior signal trajectories by scanner cohort.", + sourceAsset: "figures/signal-panel.pdf", + references: ["cohort-registry"], + comments: ["confirm color-safe palette"], + suggestions: [] + }, + { + id: "table-cohorts", + type: "table", + label: "tab:cohort-balance", + caption: "Cohort balance after scanner harmonization.", + columns: ["cohort", "n", "scanner_family"], + references: ["cohort-registry", "harmonization-lockfile"], + comments: [], + suggestions: ["add missing data footnote"] + } + ], + crossReferences: [ + { from: "intro-eq-1", to: "fig-signal", kind: "supports-claim" }, + { from: "cell-harmonize", to: "table-cohorts", kind: "produces-summary" }, + { from: "table-cohorts", to: "fig-signal", kind: "shared-cohort" } + ] + }, + { + format: "notebook", + exporter: "collab-ipynb-v3", + importer: "collab-ipynb-v3", + timestamp: "2026-05-20T09:44:00Z", + blocks: [ + { + id: "intro-eq-1", + type: "equation", + label: "eq:posterior-update", + latex: "p(\\theta | y) \\propto p(y | \\theta)p(\\theta)", + references: ["smith2024-bayes", "nguyen2025-imaging"], + comments: ["resolve prior sensitivity note"], + suggestions: [] + }, + { + id: "fig-signal", + type: "figure", + label: "fig:signal-panel", + caption: "Posterior signal trajectories by scanner cohort.", + sourceAsset: "figures/signal-panel.svg", + references: ["cohort-registry"], + comments: [], + suggestions: [] + }, + { + id: "cell-harmonize", + type: "notebook-cell", + label: "cell:harmonize", + language: "python", + sourceHash: "sha256:cell-source-harmonize", + outputHash: "sha256:cell-output-stale", + executionCount: 12, + references: ["harmonization-lockfile"], + comments: ["rerun after cohort freeze"], + suggestions: [] + } + ], + crossReferences: [ + { from: "intro-eq-1", to: "fig-signal", kind: "supports-claim" } + ] + } +]; + +module.exports = { + canonicalDocument, + roundTripExports +}; diff --git a/scientific-roundtrip-fidelity-checker/test.js b/scientific-roundtrip-fidelity-checker/test.js new file mode 100644 index 0000000..c340283 --- /dev/null +++ b/scientific-roundtrip-fidelity-checker/test.js @@ -0,0 +1,42 @@ +const assert = require("node:assert/strict"); +const data = require("./sample-data"); +const { digest, evaluateRoundTrip, renderMarkdown, renderSvg, runRoundTripAudit } = require("./index"); + +const report = runRoundTripAudit(data); +const byFormat = new Map(report.formatReports.map((formatReport) => [formatReport.format, formatReport])); + +assert.equal(report.checker, "scientific-roundtrip-fidelity-checker"); +assert.equal(report.summary.releaseDecision, "repair-before-submit"); +assert.equal(report.formatReports.length, 3); +assert.match(report.auditDigest, /^[a-f0-9]{64}$/); + +assert.equal(byFormat.get("markdown").decision, "repair-before-submit"); +assert.ok(byFormat.get("markdown").findings.some((finding) => finding.code === "suggestion-lost")); +assert.ok(byFormat.get("markdown").findings.some((finding) => finding.code === "cross-reference-lost")); + +assert.equal(byFormat.get("latex").decision, "repair-before-submit"); +assert.ok(byFormat.get("latex").findings.some((finding) => finding.code === "equation-mutated")); +assert.ok(byFormat.get("latex").findings.some((finding) => finding.code === "citation-key-lost")); +assert.ok(byFormat.get("latex").findings.some((finding) => finding.code === "table-column-lost")); + +assert.equal(byFormat.get("notebook").decision, "repair-before-submit"); +assert.ok(byFormat.get("notebook").findings.some((finding) => finding.code === "cell-output-mutated")); +assert.ok(byFormat.get("notebook").findings.some((finding) => finding.code === "cell-execution-count-drift")); + +const emptyNotebook = { + format: "notebook", + exporter: "test", + importer: "test", + timestamp: "2026-05-20T10:00:00Z", + blocks: [], + crossReferences: [] +}; +const emptyReport = evaluateRoundTrip(data.canonicalDocument, emptyNotebook); +assert.equal(emptyReport.decision, "block-export"); +assert.ok(emptyReport.findings.some((finding) => finding.code === "block-dropped")); + +assert.equal(digest({ b: 2, a: 1 }), digest({ a: 1, b: 2 })); +assert.ok(renderMarkdown(report).includes("Round-Trip Fidelity Review")); +assert.ok(renderSvg(report).startsWith("