diff --git a/README.md b/README.md index d338cf6..4a5261b 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,6 @@ # deepevents.ai deepevents.ai main codebase + +## Scientific Data & Code Hosting + +- `raw-instrument-preview-integrity-gate/` adds a self-contained #14 slice for raw instrument preview safety, checksum/metadata drift checks, and DataCite/schema.org preview packets. diff --git a/raw-instrument-preview-integrity-gate/README.md b/raw-instrument-preview-integrity-gate/README.md new file mode 100644 index 0000000..7f3679d --- /dev/null +++ b/raw-instrument-preview-integrity-gate/README.md @@ -0,0 +1,32 @@ +# Raw Instrument Preview Integrity Gate + +This module is a focused Scientific/Engineering Data & Code Hosting slice for SCIBASE issue #14. It protects previews for raw instrument outputs so researchers can inspect hosted data without accidentally exposing raw restricted values or drifting away from the original instrument metadata. + +## What It Adds + +- Raw instrument type classification for `.fcs`, `.mzml`, `.nd2`, and `.h5` artifacts. +- Checksum, unit, calibration, instrument metadata, license, and embargo checks before preview generation. +- Preview transform validation for deterministic lineage-preserving downsampling. +- DataCite and schema.org packets that preserve preview decisions and technical metadata. +- Reviewer queues for blocked and metadata-only previews. +- Offline JSON and SVG demo output from synthetic instrument artifacts. + +## Why This Is Distinct + +Existing #14 submissions cover broad FAIR manifests, access/compute-run governance, executable environment drift, provenance chains, artifact quarantine, storage quotas, FAIR access gates, and artifact package integrity. This slice focuses specifically on raw instrument output preview safety and metadata-preserving transformations. + +## Run + +```bash +node raw-instrument-preview-integrity-gate/test.js +node raw-instrument-preview-integrity-gate/demo.js +``` + +The demo writes: + +- `raw-instrument-preview-integrity-gate/preview-report.json` +- `raw-instrument-preview-integrity-gate/demo.svg` + +## Decision Policy + +Checksum drift or high-risk transform failures block preview generation. Unit or calibration drift falls back to metadata-only previews. Stable artifacts with lineage-preserving transforms receive safe preview descriptors. diff --git a/raw-instrument-preview-integrity-gate/demo.js b/raw-instrument-preview-integrity-gate/demo.js new file mode 100644 index 0000000..c8cd782 --- /dev/null +++ b/raw-instrument-preview-integrity-gate/demo.js @@ -0,0 +1,17 @@ +"use strict"; + +const fs = require("node:fs"); +const path = require("node:path"); +const { + buildPreviewIntegrityGate, + renderPreviewSvg +} = require("./index"); +const sampleData = require("./sample-data"); + +const report = buildPreviewIntegrityGate(sampleData); +const outDir = __dirname; + +fs.writeFileSync(path.join(outDir, "preview-report.json"), `${JSON.stringify(report, null, 2)}\n`); +fs.writeFileSync(path.join(outDir, "demo.svg"), renderPreviewSvg(report)); + +console.log(JSON.stringify(report, null, 2)); diff --git a/raw-instrument-preview-integrity-gate/demo.mp4 b/raw-instrument-preview-integrity-gate/demo.mp4 new file mode 100644 index 0000000..1fdd2e2 Binary files /dev/null and b/raw-instrument-preview-integrity-gate/demo.mp4 differ diff --git a/raw-instrument-preview-integrity-gate/demo.svg b/raw-instrument-preview-integrity-gate/demo.svg new file mode 100644 index 0000000..2bb21b2 --- /dev/null +++ b/raw-instrument-preview-integrity-gate/demo.svg @@ -0,0 +1 @@ +Raw Instrument Preview Integrity GatePreserves calibration, units, checksums, and safe preview policy for instrument outputs.metabolomics-run.mzmlmzml | block_preview | score 100sensor-sweep.h5h5 | metadata_only_preview | score 42microglia-panel.fcsfcs | allow_safe_preview | score 18organoid-stack.nd2nd2 | allow_safe_preview | score 18 \ No newline at end of file diff --git a/raw-instrument-preview-integrity-gate/index.js b/raw-instrument-preview-integrity-gate/index.js new file mode 100644 index 0000000..207d6ab --- /dev/null +++ b/raw-instrument-preview-integrity-gate/index.js @@ -0,0 +1,278 @@ +"use strict"; + +const SUPPORTED_RAW_TYPES = Object.freeze({ + fcs: { + label: "Flow cytometry standard", + requiredMetadata: ["instrumentModel", "detectorPanel", "compensationMatrix", "units"], + previewKind: "gated_scatter_summary" + }, + mzml: { + label: "Mass spectrometry mzML", + requiredMetadata: ["instrumentModel", "ionMode", "mzRange", "units"], + previewKind: "peak_intensity_summary" + }, + nd2: { + label: "Microscopy ND2 image stack", + requiredMetadata: ["instrumentModel", "objective", "channelMap", "pixelSizeMicrons", "units"], + previewKind: "channel_thumbnail_contact_sheet" + }, + h5: { + label: "Instrument HDF5 bundle", + requiredMetadata: ["instrumentModel", "schemaVersion", "axisMap", "units"], + previewKind: "dataset_tree_summary" + } +}); + +function assertArray(value, name) { + if (!Array.isArray(value)) throw new TypeError(`${name} must be an array`); +} + +function round(value, digits = 3) { + const factor = 10 ** digits; + return Math.round(value * factor) / factor; +} + +function extensionFor(fileName) { + return String(fileName || "").split(".").pop().toLowerCase(); +} + +function checksumChanged(artifact) { + return Boolean(artifact.expectedChecksum && artifact.actualChecksum && artifact.expectedChecksum !== artifact.actualChecksum); +} + +function metadataDiff(artifact) { + const expected = artifact.expectedMetadata || {}; + const observed = artifact.observedMetadata || {}; + const keys = new Set([...Object.keys(expected), ...Object.keys(observed)]); + const diffs = []; + for (const key of keys) { + if (JSON.stringify(expected[key]) !== JSON.stringify(observed[key])) { + diffs.push({ field: key, expected: expected[key] ?? null, observed: observed[key] ?? null }); + } + } + return diffs; +} + +function missingRequiredMetadata(artifact, typeSpec) { + const observed = artifact.observedMetadata || {}; + return typeSpec.requiredMetadata.filter((field) => observed[field] === undefined || observed[field] === null || observed[field] === ""); +} + +function unitDrift(artifact) { + const expectedUnits = artifact.expectedMetadata?.units || {}; + const observedUnits = artifact.observedMetadata?.units || {}; + const fields = new Set([...Object.keys(expectedUnits), ...Object.keys(observedUnits)]); + return [...fields].filter((field) => expectedUnits[field] !== observedUnits[field]); +} + +function calibrationDrift(artifact) { + const expected = artifact.expectedMetadata?.calibration || {}; + const observed = artifact.observedMetadata?.calibration || {}; + const fields = new Set([...Object.keys(expected), ...Object.keys(observed)]); + return [...fields].filter((field) => { + const left = Number(expected[field]); + const right = Number(observed[field]); + if (!Number.isFinite(left) || !Number.isFinite(right)) return expected[field] !== observed[field]; + const denominator = Math.max(Math.abs(left), 1); + return Math.abs(left - right) / denominator > Number(artifact.calibrationTolerance || 0.02); + }); +} + +function licenseRisk(artifact) { + if (artifact.license === "restricted" && artifact.previewPolicy !== "metadata-only") { + return "restricted_license_requires_metadata_only_preview"; + } + return null; +} + +function validatePreviewTransform(transform) { + const failures = []; + if (!transform) { + failures.push("preview transform is missing"); + return failures; + } + if (transform.requiresRawValueExport) failures.push("preview transform would expose raw values"); + if (!transform.deterministic) failures.push("preview transform is not deterministic"); + if (!transform.recordsLineage) failures.push("preview transform does not record lineage"); + if (transform.downsampleRatio !== undefined && Number(transform.downsampleRatio) > 0.25) { + failures.push("preview transform downsample ratio is too high for safe preview"); + } + return failures; +} + +function previewDecision(artifact) { + const type = extensionFor(artifact.fileName); + const typeSpec = SUPPORTED_RAW_TYPES[type]; + const reasons = []; + if (!typeSpec) { + return { + artifactId: artifact.id, + fileName: artifact.fileName, + rawType: type, + previewKind: "unsupported", + decision: "block_preview", + severity: "high", + score: 76, + reasons: [`unsupported raw instrument type: ${type}`], + datacite: null, + schemaOrg: null + }; + } + + if (checksumChanged(artifact)) reasons.push("raw artifact checksum changed"); + const diffs = metadataDiff(artifact); + const missing = missingRequiredMetadata(artifact, typeSpec); + const unitFields = unitDrift(artifact); + const calibrationFields = calibrationDrift(artifact); + const transformFailures = validatePreviewTransform(artifact.previewTransform); + const licenseFailure = licenseRisk(artifact); + + if (missing.length) reasons.push(`missing required metadata: ${missing.join(", ")}`); + if (unitFields.length) reasons.push(`unit metadata drift: ${unitFields.join(", ")}`); + if (calibrationFields.length) reasons.push(`calibration drift: ${calibrationFields.join(", ")}`); + if (diffs.some((diff) => diff.field === "instrumentModel")) reasons.push("instrument model drift"); + if (transformFailures.length) reasons.push(...transformFailures); + if (licenseFailure) reasons.push(licenseFailure); + if (artifact.embargoed && artifact.previewPolicy !== "metadata-only") reasons.push("embargoed artifact requires metadata-only preview"); + + const score = Math.min( + 100, + 18 + + (checksumChanged(artifact) ? 32 : 0) + + missing.length * 10 + + unitFields.length * 12 + + calibrationFields.length * 12 + + transformFailures.length * 11 + + (licenseFailure ? 20 : 0) + + (artifact.embargoed && artifact.previewPolicy !== "metadata-only" ? 20 : 0) + ); + + const decision = score >= 70 || checksumChanged(artifact) ? "block_preview" : score >= 40 ? "metadata_only_preview" : "allow_safe_preview"; + const severity = decision === "block_preview" ? "high" : decision === "metadata_only_preview" ? "medium" : "low"; + + return { + artifactId: artifact.id, + fileName: artifact.fileName, + rawType: type, + previewKind: decision === "allow_safe_preview" ? typeSpec.previewKind : "metadata_packet", + decision, + severity, + score: round(score), + reasons: [...new Set(reasons)], + datacite: buildDataCitePacket(artifact, typeSpec, decision), + schemaOrg: buildSchemaOrgPacket(artifact, typeSpec, decision) + }; +} + +function buildDataCitePacket(artifact, typeSpec, decision) { + return { + identifier: artifact.doi || artifact.id, + creators: artifact.creators || [], + titles: [{ title: artifact.title }], + publisher: artifact.publisher || "SCIBASE synthetic repository", + publicationYear: artifact.publicationYear || new Date().getUTCFullYear(), + types: { resourceTypeGeneral: "Dataset", resourceType: typeSpec.label }, + rightsList: [{ rights: artifact.license || "unknown" }], + descriptions: [ + { + descriptionType: "TechnicalInfo", + description: `Preview decision: ${decision}; transform: ${artifact.previewTransform?.name || "none"}` + } + ] + }; +} + +function buildSchemaOrgPacket(artifact, typeSpec, decision) { + return { + "@context": "https://schema.org", + "@type": "Dataset", + identifier: artifact.doi || artifact.id, + name: artifact.title, + measurementTechnique: typeSpec.label, + license: artifact.license, + encodingFormat: extensionFor(artifact.fileName), + variableMeasured: Object.keys(artifact.observedMetadata?.units || {}), + additionalProperty: [ + { "@type": "PropertyValue", name: "previewDecision", value: decision }, + { "@type": "PropertyValue", name: "previewKind", value: typeSpec.previewKind }, + { "@type": "PropertyValue", name: "actualChecksum", value: artifact.actualChecksum } + ] + }; +} + +function buildPreviewIntegrityGate(input) { + const data = input || {}; + assertArray(data.artifacts, "artifacts"); + const decisions = data.artifacts.map(previewDecision).sort((a, b) => b.score - a.score || a.fileName.localeCompare(b.fileName)); + return { + generatedAt: new Date().toISOString(), + decisions, + stats: { + artifactCount: decisions.length, + blocked: decisions.filter((decision) => decision.decision === "block_preview").length, + metadataOnly: decisions.filter((decision) => decision.decision === "metadata_only_preview").length, + safePreview: decisions.filter((decision) => decision.decision === "allow_safe_preview").length + }, + reviewerQueue: decisions + .filter((decision) => decision.decision !== "allow_safe_preview") + .map((decision) => ({ + artifactId: decision.artifactId, + fileName: decision.fileName, + decision: decision.decision, + reasons: decision.reasons + })) + }; +} + +function renderPreviewSvg(report) { + const width = 940; + const rowHeight = 84; + const height = 124 + report.decisions.length * rowHeight; + const rows = report.decisions + .map((decision, index) => { + const y = 90 + index * rowHeight; + const color = + decision.decision === "block_preview" + ? "#be123c" + : decision.decision === "metadata_only_preview" + ? "#b45309" + : "#15803d"; + return [ + ``, + ``, + `${escapeXml(decision.fileName)}`, + `${escapeXml(decision.rawType)} | ${escapeXml(decision.decision)} | score ${decision.score}`, + ``, + ``, + `` + ].join(""); + }) + .join(""); + return [ + ``, + ``, + `Raw Instrument Preview Integrity Gate`, + `Preserves calibration, units, checksums, and safe preview policy for instrument outputs.`, + rows, + `` + ].join(""); +} + +function escapeXml(value) { + return String(value) + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """); +} + +module.exports = { + SUPPORTED_RAW_TYPES, + buildPreviewIntegrityGate, + checksumChanged, + metadataDiff, + previewDecision, + renderPreviewSvg, + unitDrift, + calibrationDrift +}; diff --git a/raw-instrument-preview-integrity-gate/preview-report.json b/raw-instrument-preview-integrity-gate/preview-report.json new file mode 100644 index 0000000..f7a3297 --- /dev/null +++ b/raw-instrument-preview-integrity-gate/preview-report.json @@ -0,0 +1,322 @@ +{ + "generatedAt": "2026-05-20T10:53:17.035Z", + "decisions": [ + { + "artifactId": "artifact:mzml-002", + "fileName": "metabolomics-run.mzml", + "rawType": "mzml", + "previewKind": "metadata_packet", + "decision": "block_preview", + "severity": "high", + "score": 100, + "reasons": [ + "raw artifact checksum changed", + "unit metadata drift: intensity", + "calibration drift: mz", + "preview transform would expose raw values", + "preview transform does not record lineage", + "preview transform downsample ratio is too high for safe preview", + "restricted_license_requires_metadata_only_preview", + "embargoed artifact requires metadata-only preview" + ], + "datacite": { + "identifier": "10.0000/scibase.mzml.002", + "creators": [ + "SCIBASE Metabolomics Core" + ], + "titles": [ + { + "title": "Embargoed metabolomics peak run" + } + ], + "publisher": "SCIBASE synthetic repository", + "publicationYear": 2026, + "types": { + "resourceTypeGeneral": "Dataset", + "resourceType": "Mass spectrometry mzML" + }, + "rightsList": [ + { + "rights": "restricted" + } + ], + "descriptions": [ + { + "descriptionType": "TechnicalInfo", + "description": "Preview decision: block_preview; transform: peak-topline" + } + ] + }, + "schemaOrg": { + "@context": "https://schema.org", + "@type": "Dataset", + "identifier": "10.0000/scibase.mzml.002", + "name": "Embargoed metabolomics peak run", + "measurementTechnique": "Mass spectrometry mzML", + "license": "restricted", + "encodingFormat": "mzml", + "variableMeasured": [ + "mz", + "intensity" + ], + "additionalProperty": [ + { + "@type": "PropertyValue", + "name": "previewDecision", + "value": "block_preview" + }, + { + "@type": "PropertyValue", + "name": "previewKind", + "value": "peak_intensity_summary" + }, + { + "@type": "PropertyValue", + "name": "actualChecksum", + "value": "sha256:mzml-after" + } + ] + } + }, + { + "artifactId": "artifact:h5-004", + "fileName": "sensor-sweep.h5", + "rawType": "h5", + "previewKind": "metadata_packet", + "decision": "metadata_only_preview", + "severity": "medium", + "score": 42, + "reasons": [ + "unit metadata drift: impedance", + "calibration drift: impedance" + ], + "datacite": { + "identifier": "artifact:h5-004", + "creators": [ + "SCIBASE Materials Core" + ], + "titles": [ + { + "title": "Sensor HDF5 sweep bundle" + } + ], + "publisher": "SCIBASE synthetic repository", + "publicationYear": 2026, + "types": { + "resourceTypeGeneral": "Dataset", + "resourceType": "Instrument HDF5 bundle" + }, + "rightsList": [ + { + "rights": "CC0-1.0" + } + ], + "descriptions": [ + { + "descriptionType": "TechnicalInfo", + "description": "Preview decision: metadata_only_preview; transform: dataset-tree-only" + } + ] + }, + "schemaOrg": { + "@context": "https://schema.org", + "@type": "Dataset", + "identifier": "artifact:h5-004", + "name": "Sensor HDF5 sweep bundle", + "measurementTechnique": "Instrument HDF5 bundle", + "license": "CC0-1.0", + "encodingFormat": "h5", + "variableMeasured": [ + "frequency", + "impedance" + ], + "additionalProperty": [ + { + "@type": "PropertyValue", + "name": "previewDecision", + "value": "metadata_only_preview" + }, + { + "@type": "PropertyValue", + "name": "previewKind", + "value": "dataset_tree_summary" + }, + { + "@type": "PropertyValue", + "name": "actualChecksum", + "value": "sha256:h5-stable" + } + ] + } + }, + { + "artifactId": "artifact:fcs-001", + "fileName": "microglia-panel.fcs", + "rawType": "fcs", + "previewKind": "gated_scatter_summary", + "decision": "allow_safe_preview", + "severity": "low", + "score": 18, + "reasons": [], + "datacite": { + "identifier": "10.0000/scibase.fcs.001", + "creators": [ + "SCIBASE Flow Core" + ], + "titles": [ + { + "title": "Microglia cytokine flow panel" + } + ], + "publisher": "SCIBASE synthetic repository", + "publicationYear": 2026, + "types": { + "resourceTypeGeneral": "Dataset", + "resourceType": "Flow cytometry standard" + }, + "rightsList": [ + { + "rights": "CC-BY-4.0" + } + ], + "descriptions": [ + { + "descriptionType": "TechnicalInfo", + "description": "Preview decision: allow_safe_preview; transform: scatter-density-downsample" + } + ] + }, + "schemaOrg": { + "@context": "https://schema.org", + "@type": "Dataset", + "identifier": "10.0000/scibase.fcs.001", + "name": "Microglia cytokine flow panel", + "measurementTechnique": "Flow cytometry standard", + "license": "CC-BY-4.0", + "encodingFormat": "fcs", + "variableMeasured": [ + "FSC", + "IL6" + ], + "additionalProperty": [ + { + "@type": "PropertyValue", + "name": "previewDecision", + "value": "allow_safe_preview" + }, + { + "@type": "PropertyValue", + "name": "previewKind", + "value": "gated_scatter_summary" + }, + { + "@type": "PropertyValue", + "name": "actualChecksum", + "value": "sha256:fcs-original" + } + ] + } + }, + { + "artifactId": "artifact:nd2-003", + "fileName": "organoid-stack.nd2", + "rawType": "nd2", + "previewKind": "channel_thumbnail_contact_sheet", + "decision": "allow_safe_preview", + "severity": "low", + "score": 18, + "reasons": [], + "datacite": { + "identifier": "artifact:nd2-003", + "creators": [ + "SCIBASE Imaging Core" + ], + "titles": [ + { + "title": "Organoid microscopy stack" + } + ], + "publisher": "SCIBASE synthetic repository", + "publicationYear": 2026, + "types": { + "resourceTypeGeneral": "Dataset", + "resourceType": "Microscopy ND2 image stack" + }, + "rightsList": [ + { + "rights": "CC-BY-NC-4.0" + } + ], + "descriptions": [ + { + "descriptionType": "TechnicalInfo", + "description": "Preview decision: allow_safe_preview; transform: thumbnail-contact-sheet" + } + ] + }, + "schemaOrg": { + "@context": "https://schema.org", + "@type": "Dataset", + "identifier": "artifact:nd2-003", + "name": "Organoid microscopy stack", + "measurementTechnique": "Microscopy ND2 image stack", + "license": "CC-BY-NC-4.0", + "encodingFormat": "nd2", + "variableMeasured": [ + "x", + "y", + "intensity" + ], + "additionalProperty": [ + { + "@type": "PropertyValue", + "name": "previewDecision", + "value": "allow_safe_preview" + }, + { + "@type": "PropertyValue", + "name": "previewKind", + "value": "channel_thumbnail_contact_sheet" + }, + { + "@type": "PropertyValue", + "name": "actualChecksum", + "value": "sha256:nd2-stable" + } + ] + } + } + ], + "stats": { + "artifactCount": 4, + "blocked": 1, + "metadataOnly": 1, + "safePreview": 2 + }, + "reviewerQueue": [ + { + "artifactId": "artifact:mzml-002", + "fileName": "metabolomics-run.mzml", + "decision": "block_preview", + "reasons": [ + "raw artifact checksum changed", + "unit metadata drift: intensity", + "calibration drift: mz", + "preview transform would expose raw values", + "preview transform does not record lineage", + "preview transform downsample ratio is too high for safe preview", + "restricted_license_requires_metadata_only_preview", + "embargoed artifact requires metadata-only preview" + ] + }, + { + "artifactId": "artifact:h5-004", + "fileName": "sensor-sweep.h5", + "decision": "metadata_only_preview", + "reasons": [ + "unit metadata drift: impedance", + "calibration drift: impedance" + ] + } + ] +} diff --git a/raw-instrument-preview-integrity-gate/requirements-map.md b/raw-instrument-preview-integrity-gate/requirements-map.md new file mode 100644 index 0000000..b4d34c5 --- /dev/null +++ b/raw-instrument-preview-integrity-gate/requirements-map.md @@ -0,0 +1,21 @@ +# Requirements Map + +## Issue #14: Scientific/Engineering Data & Code Hosting + +| Requirement | Coverage | +| --- | --- | +| Support for raw instrument outputs | `SUPPORTED_RAW_TYPES` classifies `.fcs`, `.mzml`, `.nd2`, and `.h5` artifacts. | +| Metadata-aware previews | `previewDecision` verifies required instrument metadata before generating preview descriptors. | +| Structured metadata standards | `buildDataCitePacket` and `buildSchemaOrgPacket` emit DataCite-style and schema.org-compatible metadata. | +| FAIR reusable metadata | Decisions preserve checksums, licenses, units, calibration, preview policies, and creators. | +| Versioning and diff safety | Checksum and metadata drift block or downgrade previews before researchers inspect stale transformed data. | +| Access and licensing controls | Restricted licenses and embargoed artifacts require metadata-only previews. | +| Executable/reproducible preview transforms | `validatePreviewTransform` requires deterministic, lineage-recording transforms that do not expose raw values. | +| Tests and demo | `test.js` covers safe previews, checksum drift blocks, unit/calibration drift, restricted/embargo behavior, unsupported types, DataCite, and schema.org output. `demo.js` emits JSON and SVG artifacts. | + +## Acceptance Notes + +- Synthetic data only; no external service calls or private data. +- No dependencies beyond the Node.js standard library. +- This is preview-safety logic for raw instrument outputs, not another broad storage manifest. +- Blocked previews preserve reviewer-visible reasons and metadata packets rather than silently failing. diff --git a/raw-instrument-preview-integrity-gate/sample-data.js b/raw-instrument-preview-integrity-gate/sample-data.js new file mode 100644 index 0000000..b28a8f9 --- /dev/null +++ b/raw-instrument-preview-integrity-gate/sample-data.js @@ -0,0 +1,136 @@ +"use strict"; + +module.exports = { + artifacts: [ + { + id: "artifact:fcs-001", + title: "Microglia cytokine flow panel", + fileName: "microglia-panel.fcs", + doi: "10.0000/scibase.fcs.001", + expectedChecksum: "sha256:fcs-original", + actualChecksum: "sha256:fcs-original", + license: "CC-BY-4.0", + previewPolicy: "safe-preview", + creators: ["SCIBASE Flow Core"], + publicationYear: 2026, + expectedMetadata: { + instrumentModel: "Cytek Aurora", + detectorPanel: "IL6/TNF/CD11b", + compensationMatrix: "matrix-v3", + calibration: { laser405: 1.0, laser488: 1.0 }, + units: { FSC: "a.u.", IL6: "a.u." } + }, + observedMetadata: { + instrumentModel: "Cytek Aurora", + detectorPanel: "IL6/TNF/CD11b", + compensationMatrix: "matrix-v3", + calibration: { laser405: 1.01, laser488: 0.99 }, + units: { FSC: "a.u.", IL6: "a.u." } + }, + previewTransform: { + name: "scatter-density-downsample", + deterministic: true, + recordsLineage: true, + requiresRawValueExport: false, + downsampleRatio: 0.1 + } + }, + { + id: "artifact:mzml-002", + title: "Embargoed metabolomics peak run", + fileName: "metabolomics-run.mzml", + doi: "10.0000/scibase.mzml.002", + expectedChecksum: "sha256:mzml-before", + actualChecksum: "sha256:mzml-after", + license: "restricted", + previewPolicy: "safe-preview", + embargoed: true, + creators: ["SCIBASE Metabolomics Core"], + expectedMetadata: { + instrumentModel: "Orbitrap Eclipse", + ionMode: "positive", + mzRange: "50-1500", + calibration: { mz: 1.0 }, + units: { mz: "m/z", intensity: "counts" } + }, + observedMetadata: { + instrumentModel: "Orbitrap Eclipse", + ionMode: "positive", + mzRange: "50-1500", + calibration: { mz: 1.08 }, + units: { mz: "m/z", intensity: "relative" } + }, + previewTransform: { + name: "peak-topline", + deterministic: true, + recordsLineage: false, + requiresRawValueExport: true, + downsampleRatio: 0.5 + } + }, + { + id: "artifact:nd2-003", + title: "Organoid microscopy stack", + fileName: "organoid-stack.nd2", + expectedChecksum: "sha256:nd2-stable", + actualChecksum: "sha256:nd2-stable", + license: "CC-BY-NC-4.0", + previewPolicy: "safe-preview", + creators: ["SCIBASE Imaging Core"], + expectedMetadata: { + instrumentModel: "Nikon AX R", + objective: "40x", + channelMap: ["DAPI", "GFP"], + pixelSizeMicrons: 0.31, + calibration: { pixelSizeMicrons: 0.31 }, + units: { x: "micron", y: "micron", intensity: "a.u." } + }, + observedMetadata: { + instrumentModel: "Nikon AX R", + objective: "40x", + channelMap: ["DAPI", "GFP"], + pixelSizeMicrons: 0.31, + calibration: { pixelSizeMicrons: 0.31 }, + units: { x: "micron", y: "micron", intensity: "a.u." } + }, + previewTransform: { + name: "thumbnail-contact-sheet", + deterministic: true, + recordsLineage: true, + requiresRawValueExport: false, + downsampleRatio: 0.05 + } + }, + { + id: "artifact:h5-004", + title: "Sensor HDF5 sweep bundle", + fileName: "sensor-sweep.h5", + expectedChecksum: "sha256:h5-stable", + actualChecksum: "sha256:h5-stable", + license: "CC0-1.0", + previewPolicy: "safe-preview", + creators: ["SCIBASE Materials Core"], + expectedMetadata: { + instrumentModel: "Keysight E4990A", + schemaVersion: "2.0", + axisMap: { frequency: "Hz", impedance: "Ohm" }, + calibration: { impedance: 1.0 }, + units: { frequency: "Hz", impedance: "Ohm" } + }, + observedMetadata: { + instrumentModel: "Keysight E4990A", + schemaVersion: "2.0", + axisMap: { frequency: "Hz", impedance: "Ohm" }, + calibration: { impedance: 1.03 }, + units: { frequency: "Hz", impedance: "kOhm" } + }, + previewTransform: { + name: "dataset-tree-only", + deterministic: true, + recordsLineage: true, + requiresRawValueExport: false, + downsampleRatio: 0 + } + } + ] +}; diff --git a/raw-instrument-preview-integrity-gate/test.js b/raw-instrument-preview-integrity-gate/test.js new file mode 100644 index 0000000..affb3a2 --- /dev/null +++ b/raw-instrument-preview-integrity-gate/test.js @@ -0,0 +1,62 @@ +"use strict"; + +const assert = require("node:assert/strict"); +const { + buildPreviewIntegrityGate, + checksumChanged, + metadataDiff, + previewDecision, + unitDrift, + calibrationDrift +} = require("./index"); +const sampleData = require("./sample-data"); + +const report = buildPreviewIntegrityGate(sampleData); + +assert.equal(report.stats.artifactCount, 4); +assert.equal(report.stats.blocked, 1); +assert.equal(report.stats.metadataOnly, 1); +assert.equal(report.stats.safePreview, 2); + +const mzml = report.decisions.find((decision) => decision.artifactId === "artifact:mzml-002"); +assert.equal(mzml.decision, "block_preview"); +assert.equal(mzml.severity, "high"); +assert.ok(mzml.reasons.includes("raw artifact checksum changed")); +assert.ok(mzml.reasons.includes("unit metadata drift: intensity")); +assert.ok(mzml.reasons.includes("calibration drift: mz")); +assert.ok(mzml.reasons.includes("preview transform would expose raw values")); +assert.ok(mzml.reasons.includes("restricted_license_requires_metadata_only_preview")); +assert.ok(mzml.reasons.includes("embargoed artifact requires metadata-only preview")); +assert.equal(mzml.datacite.types.resourceType, "Mass spectrometry mzML"); +assert.equal(mzml.schemaOrg["@context"], "https://schema.org"); + +const fcs = report.decisions.find((decision) => decision.artifactId === "artifact:fcs-001"); +assert.equal(fcs.decision, "allow_safe_preview"); +assert.equal(fcs.previewKind, "gated_scatter_summary"); + +const h5 = report.decisions.find((decision) => decision.artifactId === "artifact:h5-004"); +assert.equal(h5.decision, "metadata_only_preview"); +assert.ok(h5.reasons.includes("unit metadata drift: impedance")); +assert.ok(h5.reasons.includes("calibration drift: impedance")); + +const stableArtifact = sampleData.artifacts[0]; +assert.equal(checksumChanged(stableArtifact), false); +assert.deepEqual(unitDrift(stableArtifact), []); +assert.deepEqual(calibrationDrift(stableArtifact), []); +assert.deepEqual(metadataDiff(stableArtifact), [ + { field: "calibration", expected: { laser405: 1, laser488: 1 }, observed: { laser405: 1.01, laser488: 0.99 } } +]); + +const unsupported = previewDecision({ + id: "artifact:raw-unknown", + title: "Unknown raw bundle", + fileName: "bundle.raw", + expectedChecksum: "sha256:a", + actualChecksum: "sha256:a", + observedMetadata: {}, + previewTransform: { deterministic: true, recordsLineage: true, requiresRawValueExport: false } +}); +assert.equal(unsupported.decision, "block_preview"); +assert.ok(unsupported.reasons[0].includes("unsupported raw instrument type")); + +console.log("raw-instrument-preview-integrity-gate tests passed");