diff --git a/dataset-curation-reuse-credit-ledger/README.md b/dataset-curation-reuse-credit-ledger/README.md new file mode 100644 index 0000000..cb58285 --- /dev/null +++ b/dataset-curation-reuse-credit-ledger/README.md @@ -0,0 +1,43 @@ +# Dataset Curation Reuse Credit Ledger + +This is a focused Community & User Reputation System slice for SCIBASE issue #15. It awards or quarantines reputation credit for downstream reuse of curated datasets and code, using citation evidence, license compatibility, self-reuse controls, duplicate evidence checks, and deterministic audit digests. + +## Scope + +- Allocates curator, validator, and maintainer credit from validated downstream reuse. +- Requires citation evidence before awarding public reputation. +- Checks license compatibility between source asset and reuse event. +- Quarantines self-reuse until independent evidence exists. +- Flags duplicate evidence hashes. +- Emits profile-ready reputation deltas and reuse badges. +- Emits stable `dcrcl_` audit digests for each reuse packet. + +It intentionally does not duplicate broad reputation ledgers, review-quality safeguards, abuse/appeals detectors, peer-review calibration, reviewer COI assignment, contributor credit attestation, transparency receipts, correction-impact ledgers, or mentorship impact ladders. + +## Run + +```powershell +node dataset-curation-reuse-credit-ledger/test.js +node dataset-curation-reuse-credit-ledger/demo.js +``` + +The demo writes: + +- `dataset-curation-reuse-credit-ledger/demo-output/reuse-credit-ledger.json` +- `dataset-curation-reuse-credit-ledger/demo-output/demo.svg` + +This PR also includes the required short MP4 demo artifact: + +- `dataset-curation-reuse-credit-ledger/demo-output/demo.mp4` + +## API + +```js +const { + auditReuseCredits, + buildReputationSummary, + createReuseDigest, +} = require("./dataset-curation-reuse-credit-ledger"); + +const audit = auditReuseCredits({ assets, reuseEvents, generatedAt }); +``` diff --git a/dataset-curation-reuse-credit-ledger/acceptance-notes.md b/dataset-curation-reuse-credit-ledger/acceptance-notes.md new file mode 100644 index 0000000..b5643e6 --- /dev/null +++ b/dataset-curation-reuse-credit-ledger/acceptance-notes.md @@ -0,0 +1,27 @@ +# Acceptance Notes + +## What This Adds + +- Dependency-free Node.js module under `dataset-curation-reuse-credit-ledger/`. +- Deterministic reuse-credit audit packets for curated datasets and code. +- Tests for valid credit allocation, unsafe evidence quarantine, profile-ready reputation summaries, and stable reuse digests. +- Demo JSON, SVG, and MP4 artifacts for bounty review. + +## Verification + +Use these commands from the repository root: + +```powershell +node dataset-curation-reuse-credit-ledger/test.js +node dataset-curation-reuse-credit-ledger/demo.js +node --check dataset-curation-reuse-credit-ledger/index.js +node --check dataset-curation-reuse-credit-ledger/test.js +node --check dataset-curation-reuse-credit-ledger/demo.js +node --check dataset-curation-reuse-credit-ledger/sample-data.js +ffprobe -v error -show_entries format=duration,size -show_entries stream=codec_name,width,height -of default=noprint_wrappers=1 dataset-curation-reuse-credit-ledger/demo-output/demo.mp4 +git diff --check +``` + +## AI Assistance Disclosure + +This contribution was prepared with AI assistance from OpenAI Codex and reviewed through local deterministic tests and artifact checks before submission. diff --git a/dataset-curation-reuse-credit-ledger/demo-output/demo.mp4 b/dataset-curation-reuse-credit-ledger/demo-output/demo.mp4 new file mode 100644 index 0000000..80124f9 Binary files /dev/null and b/dataset-curation-reuse-credit-ledger/demo-output/demo.mp4 differ diff --git a/dataset-curation-reuse-credit-ledger/demo-output/demo.svg b/dataset-curation-reuse-credit-ledger/demo-output/demo.svg new file mode 100644 index 0000000..59f52fa --- /dev/null +++ b/dataset-curation-reuse-credit-ledger/demo-output/demo.svg @@ -0,0 +1,57 @@ + + + + Dataset Curation Reuse Credit Ledger + Community reputation from downstream dataset and code reuse + + + 3 + Reuse + + + + 2 + Awarded + + + + 1 + Quarantine + + + + 1 + License + + + + + reuse-1 - Single-cell assay benchmark + Award Credit + Credit awarded + dcrcl_c48af1209078c13469ce3a4d + + + + reuse-2 - Single-cell assay benchmark + Quarantine Credit + MISSING_CITATION | LICENSE_MISMATCH | SELF_REUSE | DUPLICATE_EVIDENCE + dcrcl_1098b9741d89e72ac0581b45 + + + + reuse-3 - QC pipeline + Award Credit + Credit awarded + dcrcl_bca70185e357d2e0deffdc38 + + u-alina +21 u-minh +24 + \ No newline at end of file diff --git a/dataset-curation-reuse-credit-ledger/demo-output/reuse-credit-ledger.json b/dataset-curation-reuse-credit-ledger/demo-output/reuse-credit-ledger.json new file mode 100644 index 0000000..58c9021 --- /dev/null +++ b/dataset-curation-reuse-credit-ledger/demo-output/reuse-credit-ledger.json @@ -0,0 +1,101 @@ +{ + "generatedAt": "2026-05-20T13:30:00.000Z", + "reusePackets": [ + { + "eventId": "reuse-1", + "assetId": "dataset-cell-42", + "assetTitle": "Single-cell assay benchmark", + "assetType": "dataset", + "generatedAt": "2026-05-20T13:30:00.000Z", + "decision": "award_credit", + "flags": [], + "creditAllocations": [ + { + "userId": "u-alina", + "assetId": "dataset-cell-42", + "points": 21, + "reason": "curator reuse credit" + }, + { + "userId": "u-minh", + "assetId": "dataset-cell-42", + "points": 9, + "reason": "validator reuse credit" + } + ], + "actions": [ + "Publish reuse credit to curator profiles" + ], + "reuseDigest": "dcrcl_c48af1209078c13469ce3a4d" + }, + { + "eventId": "reuse-2", + "assetId": "dataset-cell-42", + "assetTitle": "Single-cell assay benchmark", + "assetType": "dataset", + "generatedAt": "2026-05-20T13:30:00.000Z", + "decision": "quarantine_credit", + "flags": [ + "MISSING_CITATION", + "LICENSE_MISMATCH", + "SELF_REUSE", + "DUPLICATE_EVIDENCE" + ], + "creditAllocations": [], + "actions": [ + "Add citation evidence for reuse-2", + "Resolve CC0 vs CC-BY-4.0 license mismatch", + "Require independent reuse evidence before awarding curator reputation", + "Deduplicate evidence hash sha256:evidence-1" + ], + "reuseDigest": "dcrcl_1098b9741d89e72ac0581b45" + }, + { + "eventId": "reuse-3", + "assetId": "code-qc-7", + "assetTitle": "QC pipeline", + "assetType": "code", + "generatedAt": "2026-05-20T13:30:00.000Z", + "decision": "award_credit", + "flags": [], + "creditAllocations": [ + { + "userId": "u-minh", + "assetId": "code-qc-7", + "points": 15, + "reason": "maintainer reuse credit" + } + ], + "actions": [ + "Publish reuse credit to curator profiles" + ], + "reuseDigest": "dcrcl_bca70185e357d2e0deffdc38" + } + ], + "reputationSummary": { + "counts": { + "reuseEvents": 3, + "awarded": 2, + "quarantined": 1, + "duplicateEvidence": 1, + "licenseIssues": 1 + }, + "reputationDeltas": [ + { + "userId": "u-alina", + "points": 21, + "badges": [ + "dataset_reuse_curator" + ] + }, + { + "userId": "u-minh", + "points": 24, + "badges": [ + "code_reuse_maintainer", + "dataset_reuse_validator" + ] + } + ] + } +} diff --git a/dataset-curation-reuse-credit-ledger/demo.js b/dataset-curation-reuse-credit-ledger/demo.js new file mode 100644 index 0000000..c765fde --- /dev/null +++ b/dataset-curation-reuse-credit-ledger/demo.js @@ -0,0 +1,83 @@ +const fs = require("fs"); +const path = require("path"); + +const { auditReuseCredits } = require("./index"); +const { assets, reuseEvents } = require("./sample-data"); + +const generatedAt = "2026-05-20T13:30:00.000Z"; +const outputDir = path.join(__dirname, "demo-output"); + +fs.mkdirSync(outputDir, { recursive: true }); + +const audit = auditReuseCredits({ assets, reuseEvents, generatedAt }); +fs.writeFileSync(path.join(outputDir, "reuse-credit-ledger.json"), `${JSON.stringify(audit, null, 2)}\n`); +fs.writeFileSync(path.join(outputDir, "demo.svg"), buildSvg(audit)); + +console.log("Dataset curation reuse credit ledger demo"); +console.log(`Reuse events: ${audit.reputationSummary.counts.reuseEvents}`); +console.log(`Awarded: ${audit.reputationSummary.counts.awarded}`); +console.log(`Quarantined: ${audit.reputationSummary.counts.quarantined}`); +console.log(`License issues: ${audit.reputationSummary.counts.licenseIssues}`); +console.log(`Wrote ${path.join(outputDir, "reuse-credit-ledger.json")}`); +console.log(`Wrote ${path.join(outputDir, "demo.svg")}`); + +function buildSvg(audit) { + const rows = audit.reusePackets.map((packet, index) => { + const y = 196 + index * 82; + const color = packet.decision === "award_credit" ? "#1f8a5b" : "#b42318"; + const flags = packet.flags.length === 0 ? "Credit awarded" : packet.flags.join(" | "); + return ` + + + ${escapeXml(packet.eventId)} - ${escapeXml(packet.assetTitle)} + ${escapeXml(formatDecision(packet.decision))} + ${escapeXml(flags)} + ${escapeXml(packet.reuseDigest)} + `; + }).join(""); + + const deltas = audit.reputationSummary.reputationDeltas + .map((delta) => `${delta.userId} +${delta.points}`) + .join(" "); + + return ` + + + Dataset Curation Reuse Credit Ledger + Community reputation from downstream dataset and code reuse + ${metricCard(64, 112, "Reuse", audit.reputationSummary.counts.reuseEvents, "#0b5fff")} + ${metricCard(252, 112, "Awarded", audit.reputationSummary.counts.awarded, "#1f8a5b")} + ${metricCard(440, 112, "Quarantine", audit.reputationSummary.counts.quarantined, "#b42318")} + ${metricCard(628, 112, "License", audit.reputationSummary.counts.licenseIssues, "#ad6f00")} + ${rows} + ${escapeXml(deltas)} +`; +} + +function metricCard(x, y, label, value, color) { + return ` + + ${value} + ${escapeXml(label)} + `; +} + +function formatDecision(decision) { + return decision.split("_").map((part) => part[0].toUpperCase() + part.slice(1)).join(" "); +} + +function escapeXml(value) { + return String(value) + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """); +} diff --git a/dataset-curation-reuse-credit-ledger/index.js b/dataset-curation-reuse-credit-ledger/index.js new file mode 100644 index 0000000..837d0d7 --- /dev/null +++ b/dataset-curation-reuse-credit-ledger/index.js @@ -0,0 +1,159 @@ +const crypto = require("crypto"); + +const BASE_POINTS = { + replicated: 30, + validated: 15, + unverified: 5, +}; + +function auditReuseCredits({ assets, reuseEvents, generatedAt = new Date().toISOString() }) { + if (!Array.isArray(assets)) { + throw new Error("assets must be an array"); + } + if (!Array.isArray(reuseEvents)) { + throw new Error("reuseEvents must be an array"); + } + + const assetById = new Map(assets.map((asset) => [asset.id, asset])); + const seenEvidenceHashes = new Set(); + const reusePackets = reuseEvents.map((event) => { + const asset = assetById.get(event.assetId); + if (!asset) { + throw new Error(`Unknown asset '${event.assetId}'`); + } + const packet = evaluateReuseEvent(event, asset, seenEvidenceHashes, generatedAt); + if (event.evidenceHash) { + seenEvidenceHashes.add(event.evidenceHash); + } + return packet; + }); + + const audit = { + generatedAt, + reusePackets, + }; + audit.reputationSummary = buildReputationSummary(audit); + return audit; +} + +function evaluateReuseEvent(event, asset, seenEvidenceHashes, generatedAt) { + const flags = []; + if (!String(event.citation || "").trim()) { + flags.push("MISSING_CITATION"); + } + if (event.licenseUsed !== asset.license) { + flags.push("LICENSE_MISMATCH"); + } + const maintainerIds = new Set((asset.maintainers || []).map((maintainer) => maintainer.userId)); + if (maintainerIds.has(event.reuserId)) { + flags.push("SELF_REUSE"); + } + if (event.evidenceHash && seenEvidenceHashes.has(event.evidenceHash)) { + flags.push("DUPLICATE_EVIDENCE"); + } + + const decision = flags.length === 0 ? "award_credit" : "quarantine_credit"; + const creditAllocations = decision === "award_credit" + ? allocateCredits(asset, event.validationStatus) + : []; + const packet = { + eventId: event.id, + assetId: asset.id, + assetTitle: asset.title, + assetType: asset.type, + generatedAt, + decision, + flags, + creditAllocations, + actions: buildActions(flags, event, asset), + }; + packet.reuseDigest = createReuseDigest(packet); + return packet; +} + +function allocateCredits(asset, validationStatus) { + const base = BASE_POINTS[validationStatus] || BASE_POINTS.unverified; + return (asset.maintainers || []).map((maintainer) => ({ + userId: maintainer.userId, + assetId: asset.id, + points: Math.round(base * maintainer.share), + reason: `${maintainer.role} reuse credit`, + })); +} + +function buildActions(flags, event, asset) { + if (flags.length === 0) { + return ["Publish reuse credit to curator profiles"]; + } + const actions = []; + if (flags.includes("MISSING_CITATION")) { + actions.push(`Add citation evidence for ${event.id}`); + } + if (flags.includes("LICENSE_MISMATCH")) { + actions.push(`Resolve ${event.licenseUsed} vs ${asset.license} license mismatch`); + } + if (flags.includes("SELF_REUSE")) { + actions.push("Require independent reuse evidence before awarding curator reputation"); + } + if (flags.includes("DUPLICATE_EVIDENCE")) { + actions.push(`Deduplicate evidence hash ${event.evidenceHash}`); + } + return actions; +} + +function buildReputationSummary(audit) { + const packets = audit.reusePackets || []; + const counts = { + reuseEvents: packets.length, + awarded: packets.filter((packet) => packet.decision === "award_credit").length, + quarantined: packets.filter((packet) => packet.decision === "quarantine_credit").length, + duplicateEvidence: countFlag(packets, "DUPLICATE_EVIDENCE"), + licenseIssues: countFlag(packets, "LICENSE_MISMATCH"), + }; + const pointsByUser = new Map(); + const badgesByUser = new Map(); + for (const packet of packets) { + for (const allocation of packet.creditAllocations) { + pointsByUser.set(allocation.userId, (pointsByUser.get(allocation.userId) || 0) + allocation.points); + if (!badgesByUser.has(allocation.userId)) { + badgesByUser.set(allocation.userId, new Set()); + } + badgesByUser.get(allocation.userId).add(`${packet.assetType}_reuse_${roleFromReason(allocation.reason)}`); + } + } + + const reputationDeltas = [...pointsByUser.keys()].sort().map((userId) => ({ + userId, + points: pointsByUser.get(userId), + badges: [...(badgesByUser.get(userId) || [])].sort(), + })); + + return { counts, reputationDeltas }; +} + +function createReuseDigest(packet) { + const stableFacts = { + eventId: packet.eventId, + assetId: packet.assetId, + decision: packet.decision, + flags: [...(packet.flags || [])].sort(), + creditAllocations: [...(packet.creditAllocations || [])].sort((left, right) => + `${left.userId}:${left.assetId}`.localeCompare(`${right.userId}:${right.assetId}`) + ), + }; + return `dcrcl_${crypto.createHash("sha256").update(JSON.stringify(stableFacts)).digest("hex").slice(0, 24)}`; +} + +function countFlag(packets, flag) { + return packets.filter((packet) => packet.flags.includes(flag)).length; +} + +function roleFromReason(reason) { + return String(reason).split(" ")[0]; +} + +module.exports = { + auditReuseCredits, + buildReputationSummary, + createReuseDigest, +}; diff --git a/dataset-curation-reuse-credit-ledger/requirements-map.md b/dataset-curation-reuse-credit-ledger/requirements-map.md new file mode 100644 index 0000000..7be75d6 --- /dev/null +++ b/dataset-curation-reuse-credit-ledger/requirements-map.md @@ -0,0 +1,15 @@ +# Requirements Map + +| Issue #15 requirement | Implementation coverage | +| --- | --- | +| Contributor credits | Allocates points to dataset curators, validators, and code maintainers. | +| Dataset uploads or curation | Focuses on downstream reuse of curated datasets and code assets. | +| Visible credit on researcher profiles | Emits profile-ready reputation deltas and badges. | +| Transparent reputation scoring | Uses deterministic point allocations from validation status and maintainer share. | +| Peer validation | Awards higher credit to replicated or validated reuse events. | +| Abuse-resistant scoring | Quarantines missing citations, license mismatches, self-reuse, and duplicate evidence. | +| Project timelines and auditability | Emits per-reuse packets with stable `dcrcl_` digests. | + +## Non-Overlap Statement + +This slice focuses on downstream reuse credit for curated datasets and code. It does not duplicate broad reputation ledgers, review safeguards, abuse/appeals modules, peer-review calibration, reviewer COI assignment, contributor credit attestation, transparency receipts, correction-impact ledgers, or mentorship impact ladders. diff --git a/dataset-curation-reuse-credit-ledger/sample-data.js b/dataset-curation-reuse-credit-ledger/sample-data.js new file mode 100644 index 0000000..d6782a6 --- /dev/null +++ b/dataset-curation-reuse-credit-ledger/sample-data.js @@ -0,0 +1,54 @@ +const assets = [ + { + id: "dataset-cell-42", + title: "Single-cell assay benchmark", + type: "dataset", + license: "CC-BY-4.0", + maintainers: [ + { userId: "u-alina", role: "curator", share: 0.7 }, + { userId: "u-minh", role: "validator", share: 0.3 }, + ], + }, + { + id: "code-qc-7", + title: "QC pipeline", + type: "code", + license: "MIT", + maintainers: [{ userId: "u-minh", role: "maintainer", share: 1 }], + }, +]; + +const reuseEvents = [ + { + id: "reuse-1", + assetId: "dataset-cell-42", + reuserId: "u-labwest", + projectId: "paper-9", + citation: "DOI:10.1000/cell42", + licenseUsed: "CC-BY-4.0", + evidenceHash: "sha256:evidence-1", + validationStatus: "replicated", + }, + { + id: "reuse-2", + assetId: "dataset-cell-42", + reuserId: "u-alina", + projectId: "paper-self", + citation: "", + licenseUsed: "CC0", + evidenceHash: "sha256:evidence-1", + validationStatus: "unverified", + }, + { + id: "reuse-3", + assetId: "code-qc-7", + reuserId: "u-labwest", + projectId: "paper-10", + citation: "GitHub release v1.4", + licenseUsed: "MIT", + evidenceHash: "sha256:evidence-3", + validationStatus: "validated", + }, +]; + +module.exports = { assets, reuseEvents }; diff --git a/dataset-curation-reuse-credit-ledger/test.js b/dataset-curation-reuse-credit-ledger/test.js new file mode 100644 index 0000000..840f0f7 --- /dev/null +++ b/dataset-curation-reuse-credit-ledger/test.js @@ -0,0 +1,123 @@ +const assert = require("assert"); + +const { + auditReuseCredits, + buildReputationSummary, + createReuseDigest, +} = require("./index"); + +const generatedAt = "2026-05-20T13:30:00.000Z"; + +const assets = [ + { + id: "dataset-cell-42", + title: "Single-cell assay benchmark", + type: "dataset", + license: "CC-BY-4.0", + maintainers: [ + { userId: "u-alina", role: "curator", share: 0.7 }, + { userId: "u-minh", role: "validator", share: 0.3 }, + ], + }, + { + id: "code-qc-7", + title: "QC pipeline", + type: "code", + license: "MIT", + maintainers: [{ userId: "u-minh", role: "maintainer", share: 1 }], + }, +]; + +const reuseEvents = [ + { + id: "reuse-1", + assetId: "dataset-cell-42", + reuserId: "u-labwest", + projectId: "paper-9", + citation: "DOI:10.1000/cell42", + licenseUsed: "CC-BY-4.0", + evidenceHash: "sha256:evidence-1", + validationStatus: "replicated", + }, + { + id: "reuse-2", + assetId: "dataset-cell-42", + reuserId: "u-alina", + projectId: "paper-self", + citation: "", + licenseUsed: "CC0", + evidenceHash: "sha256:evidence-1", + validationStatus: "unverified", + }, + { + id: "reuse-3", + assetId: "code-qc-7", + reuserId: "u-labwest", + projectId: "paper-10", + citation: "GitHub release v1.4", + licenseUsed: "MIT", + evidenceHash: "sha256:evidence-3", + validationStatus: "validated", + }, +]; + +function test(name, fn) { + try { + fn(); + console.log(`ok - ${name}`); + } catch (error) { + console.error(`not ok - ${name}`); + console.error(error); + process.exitCode = 1; + } +} + +test("allocates valid reuse credit and quarantines unsafe evidence", () => { + const audit = auditReuseCredits({ assets, reuseEvents, generatedAt }); + + const valid = audit.reusePackets.find((packet) => packet.eventId === "reuse-1"); + assert(valid, "expected reuse-1 packet"); + assert.strictEqual(valid.decision, "award_credit"); + assert.deepStrictEqual(valid.flags, []); + assert.deepStrictEqual(valid.creditAllocations, [ + { userId: "u-alina", assetId: "dataset-cell-42", points: 21, reason: "curator reuse credit" }, + { userId: "u-minh", assetId: "dataset-cell-42", points: 9, reason: "validator reuse credit" }, + ]); + + const unsafe = audit.reusePackets.find((packet) => packet.eventId === "reuse-2"); + assert(unsafe, "expected reuse-2 packet"); + assert.strictEqual(unsafe.decision, "quarantine_credit"); + assert(unsafe.flags.includes("MISSING_CITATION")); + assert(unsafe.flags.includes("LICENSE_MISMATCH")); + assert(unsafe.flags.includes("SELF_REUSE")); + assert(unsafe.flags.includes("DUPLICATE_EVIDENCE")); + assert(unsafe.actions.some((action) => action.includes("Add citation evidence"))); +}); + +test("builds profile-ready reputation summary", () => { + const audit = auditReuseCredits({ assets, reuseEvents, generatedAt }); + const summary = buildReputationSummary(audit); + + assert.deepStrictEqual(summary.counts, { + reuseEvents: 3, + awarded: 2, + quarantined: 1, + duplicateEvidence: 1, + licenseIssues: 1, + }); + assert.deepStrictEqual(summary.reputationDeltas, [ + { userId: "u-alina", points: 21, badges: ["dataset_reuse_curator"] }, + { userId: "u-minh", points: 24, badges: ["code_reuse_maintainer", "dataset_reuse_validator"] }, + ]); +}); + +test("creates stable reuse digests", () => { + const audit = auditReuseCredits({ assets, reuseEvents, generatedAt }); + const packet = audit.reusePackets.find((item) => item.eventId === "reuse-2"); + + const first = createReuseDigest(packet); + const second = createReuseDigest({ ...packet, actions: [...packet.actions] }); + + assert.strictEqual(first, second); + assert.match(first, /^dcrcl_[a-f0-9]{24}$/); +});