diff --git a/.github/workflows/index-freshness.yml b/.github/workflows/index-freshness.yml new file mode 100644 index 0000000..c004aec --- /dev/null +++ b/.github/workflows/index-freshness.yml @@ -0,0 +1,54 @@ +# SPDX-License-Identifier: PMPL-1.0-or-later +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) +# +# Fails if the committed index.json drifts from what scripts/regen-index.sh +# produces from scans/ on disk. Pair with branch protection so this check +# is required on PRs touching scans/ or index.json. +# +# Closes V-L3-M1. + +name: index.json freshness + +on: + push: + branches: [main] + pull_request: + branches: [main] + paths: + - 'scans/**' + - 'index.json' + - 'scripts/regen-index.sh' + - '.github/workflows/index-freshness.yml' + +concurrency: + group: index-freshness-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + check: + name: index.json matches scans/ + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - name: Checkout (with history; regen-index needs git log on scans/) + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + + - name: Regenerate index.json + run: bash scripts/regen-index.sh + + - name: Fail if index.json drifted + run: | + if ! git diff --quiet --exit-code index.json; then + echo "::error::index.json is out of sync with scans/." + echo "Run 'bash scripts/regen-index.sh' locally and commit the result." + echo "" + echo "----- diff -----" + git --no-pager diff index.json + exit 1 + fi + echo "index.json is in sync with scans/." diff --git a/EXPLAINME.adoc b/EXPLAINME.adoc index e043b70..a4c6fc7 100644 --- a/EXPLAINME.adoc +++ b/EXPLAINME.adoc @@ -7,36 +7,54 @@ The README makes claims. This file backs them up. [quote, README] ____ -Git-backed flat-file storage for scan results and drift detection data. +This repository serves two explicit purposes: (1) a flat-file data +store for panic-attacker scan results, hardware-crash-team findings, +and drift snapshots; (2) an ABI dogfood site for the hyperpolymath +Idris2 + Zig ABI shared with `proven`, `burble`, `gossamer`. ____ -== Technology Choices +See `docs/decisions/ADR-0001-repo-purpose.adoc` for why both purposes +live in one repo. + +== Technology choices (Purpose 2 — ABI) [cols="1,2"] |=== | Technology | Learn More -| **Zig** | https://ziglang.org +| **Zig** | https://ziglang.org | **Idris2 ABI** | https://www.idris-lang.org |=== -== Dogfooded Across The Account - -Uses the hyperpolymath ABI/FFI standard (Idris2 + Zig). Same pattern used across +The Zig FFI implementation lives in `ffi/zig/` and follows the same +pattern as https://github.com/hyperpolymath/proven[proven], https://github.com/hyperpolymath/burble[burble], and https://github.com/hyperpolymath/gossamer[gossamer]. -== File Map +== File map -[cols="1,2"] +[cols="1,2,1"] |=== -| Path | What's There - -| `src/` | Source code -| `ffi/` | Foreign function interface +| Path | What's there | Purpose + +| `scans/` | Per-repo scan results (JSON; ~301 files) | data +| `dispatch/` | Dispatch records | data +| `patterns/` | Drift / scan pattern definitions | data +| `recipes/` | Ingest and aggregation recipes | data +| `outcomes/` | Outcome records | data +| `policy/` | Storage and retention policy notes | data +| `health/` | Health-state snapshots | data +| `index.json` | Master index of stored data | data +| `ffi/zig/` | Zig FFI implementation | ABI dogfood |=== +== Related + +`verisimiser` (https://github.com/hyperpolymath/verisimiser) is the +augmentation CLI; this repo holds its scan/drift output but is not a +runtime dependency. + == Questions? Open an issue or reach out directly — happy to explain anything in more detail. diff --git a/README.adoc b/README.adoc index 56016bb..6951328 100644 --- a/README.adoc +++ b/README.adoc @@ -1,25 +1,49 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) = VeriSimDB Data Repository -Git-backed flat-file storage for scan results and drift detection data. +This repository serves *two explicit purposes*, per +`docs/decisions/ADR-0001-repo-purpose.adoc`: -== Structure +. **Flat-file data store** for panic-attacker scan results, + hardware-crash-team findings, and drift snapshots. +. **ABI dogfood** for the hyperpolymath Idris2 + Zig ABI standard + shared with https://github.com/hyperpolymath/proven[`proven`], + https://github.com/hyperpolymath/burble[`burble`], and + https://github.com/hyperpolymath/gossamer[`gossamer`]. -- `scans/` - panic-attack scan results per repo -- `hardware/` - hardware-crash-team findings -- `drift/` - drift detection snapshots -- `index.json` - Master index of all stored data +== Related repos -== Usage +* https://github.com/hyperpolymath/verisimiser[`verisimiser`] — the + augmentation CLI. This repo holds its scan and drift output; + standalone consumers (`panic-attacker`, `hardware-crash-team`) also + write here. Verisimiser does not require this repo at runtime. -This repo receives scan results via GitHub Actions workflow_dispatch events -and stores them as JSON files. The ingest workflow updates the index -automatically. +== Layout (by purpose) -== Integration +[cols="1,2,1"] +|=== +| Path | What's there | Purpose -=== Sending Scan Results +| `scans/` | Per-repo scan results (JSON; ~301 files) | data +| `dispatch/` | Dispatch records | data +| `patterns/` | Drift / scan pattern definitions | data +| `recipes/` | Ingest and aggregation recipes | data +| `outcomes/` | Outcome records | data +| `policy/` | Storage and retention policy notes | data +| `health/` | Health-state snapshots | data +| `index.json` | Master index of stored data | data +| `ffi/zig/` | Zig FFI implementation | ABI dogfood +| `.machine_readable/` | Contractile machinery (org-wide) | both +| `.github/workflows/` | CI (ingest, mirrors, governance) | both +| `docs/decisions/` | ADRs | both +|=== -Other repos can send scan results using the reusable workflow: +== Purpose 1 — flat-file data store + +=== Receiving scan results + +Other repos send scan results using the reusable workflow: [source,yaml] ---- @@ -35,7 +59,7 @@ jobs: uses: hyperpolymath/panic-attacker/.github/workflows/scan-and-report.yml@main ---- -=== Querying Results +=== Querying results Clone this repo and read the JSON files: @@ -43,11 +67,11 @@ Clone this repo and read the JSON files: ---- git clone https://github.com/hyperpolymath/verisimdb-data cd verisimdb-data -cat scans/echidna.json # View specific repo results -jq '.repos' index.json # View all scan summaries +cat scans/echidna.json # specific repo +jq '.repos' index.json # all scan summaries ---- -== File Format +=== File format Each scan result in `scans/` is a JSON file with the structure: @@ -75,6 +99,19 @@ Each scan result in `scans/` is a JSON file with the structure: } ---- +=== Index freshness + +`index.json` is the master summary. A CI lane regenerates it from +`scans/` on every push and fails if the working tree changes (i.e. the +committed index must match what the regen script produces from the +files on disk). See `scripts/regen-index.sh`. + +== Purpose 2 — ABI dogfood + +`ffi/zig/` is a working Zig FFI implementation of the hyperpolymath +Idris2 + Zig ABI standard. The same pattern is used in `proven`, +`burble`, and `gossamer`; this repo serves as the dogfood instance. + == License SPDX-License-Identifier: PMPL-1.0-or-later diff --git a/TOPOLOGY.md b/TOPOLOGY.md index 023dde3..dcf51d0 100644 --- a/TOPOLOGY.md +++ b/TOPOLOGY.md @@ -5,30 +5,50 @@ ## Purpose -VeriSimDB Data Repository is a git-backed flat-file storage system for scan results and drift detection data. Receives panic-attack scan results, hardware-crash-team findings, and drift detection snapshots via GitHub Actions workflow_dispatch events. Maintains master index and enables historical analysis of repository health and compliance drift over time. +This repository serves two explicit purposes per +[`docs/decisions/ADR-0001-repo-purpose.adoc`](docs/decisions/ADR-0001-repo-purpose.adoc): -## Module Map +1. **Flat-file data store** for panic-attacker scan results, + hardware-crash-team findings, and drift snapshots received via + GitHub Actions `workflow_dispatch` events. Maintains a master + index for historical analysis of repository health and compliance + drift over time. +2. **ABI dogfood** for the hyperpolymath Idris2 + Zig ABI standard + (shared with `proven`, `burble`, `gossamer`). Lives in `ffi/zig/`. + +## Module map ``` verisimdb-data/ -├── scans/ # panic-attack scan results per repo -├── hardware/ # hardware-crash-team findings -├── drift/ # drift detection snapshots -├── index.json # Master index of all stored data -└── .github/workflows/ # Ingest workflows (receive results) +├── scans/ # panic-attacker scan results per repo (data) +├── dispatch/ # dispatch records (data) +├── patterns/ # drift / scan pattern definitions (data) +├── recipes/ # ingest and aggregation recipes (data) +├── outcomes/ # outcome records (data) +├── policy/ # storage and retention policy notes (data) +├── health/ # health-state snapshots (data) +├── index.json # master index of stored data (data) +├── ffi/ +│ └── zig/ # Zig FFI implementation (ABI dogfood) +├── scripts/ # ingest + index regen scripts (data) +├── docs/ +│ └── decisions/ # ADRs +└── .github/workflows/ # ingest + governance workflows ``` -## Data Flow +## Data flow (Purpose 1) ``` [Workflow Dispatch] ──► [Ingest Handler] ──► [JSON Validation] ──► [File Storage] - ↓ - [Index Update] ──► [Query Ready] + │ + [Index Regen on push] ──► [Query Ready] ``` -## Integration Points +## Integration points -- **panic-attack**: Upstream scanner sending results -- **hardware-crash-team**: Hardware failure analysis -- **Drift detection**: Compliance change tracking -- **Hyperpolymath CI/CD**: Automated data aggregation +- **panic-attacker**: Upstream scanner sending results into `scans/`. +- **hardware-crash-team**: Hardware failure analysis (Purpose 1 consumer). +- **Drift detection**: Compliance change tracking against `scans/` history. +- **verisimiser**: Consumes scan/drift data when wired to this repo as + its sidecar storage. Not a runtime dependency. +- **hyperpolymath CI/CD**: Automated data aggregation. diff --git a/docs/decisions/ADR-0001-repo-purpose.adoc b/docs/decisions/ADR-0001-repo-purpose.adoc new file mode 100644 index 0000000..ef0514f --- /dev/null +++ b/docs/decisions/ADR-0001-repo-purpose.adoc @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) += ADR-0001: verisimdb-data carries two explicit purposes +:revdate: 2026-05-13 +:status: Accepted + +== Status + +Accepted — 2026-05-13. + +Resolves: https://github.com/hyperpolymath/verisimdb-data/issues/2[V-L1-J1]. + +== Context + +This repository advertised three different identities across its top-level +docs: + +. `README.adoc` — "Git-backed flat-file storage for scan results and drift + detection data" with panic-attacker scan ingest workflow. +. `EXPLAINME.adoc` — "Uses the hyperpolymath ABI/FFI standard + (Idris2 + Zig). Same pattern used across `proven`, `burble`, and + `gossamer`." +. `TOPOLOGY.md` — "Receives panic-attack scan results, + hardware-crash-team findings, and drift detection snapshots… enables + historical analysis of repository health and compliance drift over time." + +The directory layout backs two of these stories with real content: + +* `scans/` holds ~301 JSON files of scan results (production data). +* `ffi/zig/` holds working Zig FFI code (`build.zig` + three `.zig` files). + +The contractile and `.machine_readable/` scaffolding is org-wide +infrastructure that applies to either purpose. + +== Decision + +The repository serves **two purposes**, explicitly declared in `README.adoc`: + +. **Flat-file data store** for panic-attacker scan output and drift + snapshots (`scans/`, `dispatch/`, `patterns/`, `recipes/`, `outcomes/`, + `policy/`, `health/`). +. **ABI dogfood site** for the hyperpolymath Idris2 + Zig ABI standard + shared with `proven`, `burble`, and `gossamer` (`ffi/zig/`). + +Both purposes coexist; neither is moved out. The acceptable layout is +documented at the top of `README.adoc` and reflected in `TOPOLOGY.md`. + +Verisimiser does *not* depend on `ffi/zig/` here; if a downstream consumer +needs the Zig FFI extracted, that is a future ADR. + +== Consequences + +. `README.adoc`, `EXPLAINME.adoc`, and `TOPOLOGY.md` are brought into + agreement on the two-purpose framing (one PR with this ADR). +. The directory layout document in `README.adoc` lists which subtrees + serve which purpose. +. Future expansions to either purpose are fine — the repo is explicitly + multi-purpose, not "drifting" between purposes. + +== Alternatives considered + +Scan-store only (move `ffi/zig/` out):: +Rejected. The Zig FFI lives here for dogfooding reasons that predate +verisimiser; relocating it costs more than documenting the dual purpose. + +ABI dogfood only (move `scans/` out):: +Rejected. The scan ingest workflow is wired up and producing data; +moving 301 JSON files plus the ingest pipeline elsewhere has no payoff. + +Split into two repos:: +Rejected for the same combined reason. Maybe revisit in a year if either +purpose grows large enough to warrant its own repo and CI surface. diff --git a/index.json b/index.json index e836d35..1135408 100644 --- a/index.json +++ b/index.json @@ -1,12 +1,308 @@ { - "last_updated": "2026-03-07T22:19:01+00:00", - "total_scans": 301, + "last_updated": "2026-03-28T08:28:32+00:00", "repos": [ + "0-ai-gatekeeper-protocol", + "7-tentacles", + "Axiology.jl", + "Axiom.jl", + "BowtieRisk.jl", + "Causals.jl", + "Cladistics.jl", + "Cliodynamics.jl", + "Cliometrics.jl", + "Exnovation.jl", + "HackenbushGames.jl", + "IDApixiTIK", + "KnotTheory.jl", + "PolyglotFormalisms.jl", + "ProvenCrypto.jl", + "SMTLib.jl", + "ZeroProb.jl", + "_pathroot", + "a2ml", + "absolute-zero", + "academic-workflow-suite", + "accessibility-everywhere", + "accessibilitybot", + "ada-loom-registry", + "aerie", + "affinescript", + "aggregate-library", + "aletheia", + "algorithm-shield", + "alkahest-shell-transmuter", + "ambientops", + "amethe", + "anamnesis", + "anvomidav", + "asdf-augmenters", + "asdf-tool-plugins", + "assemblyline-20260307195240", + "ats2-tui", + "avatar-fabrication-facility", + "avow-protocol", + "axel-protocol", + "befunge93-vault-cracker", + "betlang", + "bgp-backbone-lab", + "bitfuckit", + "blocky-writer", + "blue-screen-of-app", + "bofig", + "boinc-boinc", + "branch-newspaper", + "bridge-nginx-zig", + "broad-spectrum", + "bunsenite", + "cadre-router", + "candy-crash", + "casket-ssg", + "cccp", + "cerro-torre", + "checky-monkey", + "chimichanga", + "cicada", + "cipherbot", + "civic-connect", + "civic-stream", + "claim-forge", + "claude-integrations", + "cloud-sync-tuner", + "cloudflare-dns-terraform", + "conative-gating", + "conflow", + "consent-aware-http", + "contractiles", + "coq-jr", + "czech-file-knife", + "defensive-multiplicity", + "defiant", + "deno-ecosystem", + "developer-ecosystem", + "devkit-risc-v", + "dicti0nary-attack", + "did-you-actually-do-that", + "disinfo-nesy-detector", + "displace", + "dnfinition", + "docmatrix", + "docudactyl", + "dotmatrix-fileprinter", + "double-track-browser", + "echidna", + "echidnabot", + "echomesh", + "eclexia", + "elegant-state", + "elixir-mcp-server", + "emergency-button", + "empty-linter", + "ephapax", + "error-lang", + "esn", + "etma-handler", + "excel-economic-numbers-tool", + "explicit-trust-plane", + "feedback-o-tron", + "ffmpeg-ffi", + "filesoup", + "finishingbot", + "fireflag", + "flat-mate", + "flatracoon", + "flatracoon-os", + "fogbinder", + "formatrix-docs", + "formdb-http", + "forth-dotmatrix", + "funfriendly-git", + "games", + "git-inflate", + "git-private-farm", + "gitbot-fleet", + "glambot", + "glyphbase", + "gql-dt", + "grim-repo", + "gsbot", + "hesiod-dns-map", + "heterogenous-mobile-computing", + "http-capability-gateway", + "hybrid-automation-router", + "hypatia", + "hyperpolymath", + "hyperpolymath-archive", + "hyperpolymath-sovereign-registry", + "hyperpolymath.github.io", + "idaptik", + "idris2-ecosystem", + "im-docs", + "immutable-linux-auditor", + "indieweb2-bastion", + "ipfs-overlay", + "ipv6-site-enforcer", + "ipv6-tools", + "januskey", + "julia-the-viper", + "k9-svc", + "k9-tools", + "kaldor-iiot", + "kea-tools", + "kith", + "laminar", + "language-bridges", + "language-interop-compiler", + "laniakea", + "lcb-website", + "libstamp", + "lithoglyph", + "live-files", + "llm-tools", + "lol", + "lsm", + "maa-framework", + "manifesto", + "mcp-repo-guardian", + "me-dialect", + "megadog", + "metadata-grammar", + "misinformation-defence-platform", + "modshells", + "must", + "my-lang", + "my-lang-archive", "nafa-app", + "nafa-app-ambient", + "nano-aider", + "nerdsafe-restart", + "network-dashboard", + "network-orchestrator", + "neural-foundations", + "neurophone", + "nexia-list", + "nextgen-languages", + "nick-shells", + "nickel-augmentation", + "no-nonsense-nntps", + "nuj-lcb-production", + "oblibeny", + "ochrance", + "odds-and-sods-package-manager", + "opam-repository", + "opm-canonicalizer", + "opsm-ui", "outreach", + "package-publishers", + "packages", + "palimpsest-license", + "palimpsest-plasma", + "panic-attacker", + "panll", + "panoptes", + "patallm-gallery", + "personal-sysadmin", + "phantom-metal-taste", + "php-aegis", + "phronesis", + "pimcore-fortress", "playbooks", + "poly-k8s-mcp", + "poly-observability-mcp", + "poly-secret-mcp", + "polyglot-formalisms-elixir", + "polyglot-formalisms-gleam", + "polyglot-i18n", + "polysafe-gitfixer", + "polystack", + "preference-injector", + "presswerk", "project-cb", - "v-ecosystem" + "project-wharf", + "proof-of-work", + "protocol-squisher", + "proven", + "proven-specialist-repos", + "qubes-sdp", + "raze-tui", + "reasonably-good-token-vault", + "recon-silly-ation", + "refugia", + "repo-batcher", + "repo-customiser", + "repo-guardian-fs", + "repo-slm-augmentor", + "reposystem", + "rescript-ecosystem", + "resource-record-fluctuator", + "rhodibot", + "rhodium-pipeline", + "rhodium-standard-repositories", + "riscv-guix-buildsys", + "robodog-ecm", + "robot-repo-automaton", + "robot-vacuum-cleaner", + "rpa-elysium", + "rrecord-verity", + "rsr-template-repo", + "safe-brute-force", + "sanctify-php", + "scaffoldia", + "scripts", + "sdp-hkdf-deployment", + "seambot", + "seamstress", + "selur", + "slopctl", + "snapcreate", + "social-media-tools", + "squisher-corpus", + "ssg-collection", + "standards", + "stapeln", + "stateful-artefacts", + "stateful-artefacts-for-gitforges", + "supernorma", + "sustainabot", + "svalinn", + "synapse", + "system-tools", + "tangle", + "techstack-filterlist", + "template-repo", + "the-hotchocolabot", + "thejeffparadox", + "theoneshow", + "thunderbird-template-reloaded", + "tma-mark2", + "total-recall", + "total-update", + "traffic-conditioner", + "tree-navigator", + "twingate-helm-deploy", + "ubicity", + "unified-dataset-vocab", + "union-policy-parser", + "universal-chat-extractor", + "universal-extension-format", + "universal-language-server-plugin", + "universal-project-manager", + "v-ecosystem", + "v3-templater", + "valence-shell", + "verified-container-spec", + "verisimdb", + "vex-tools", + "volumod", + "vordr", + "voyage-enterprise-decision-system", + "well-known-ecosystem", + "wokelang", + "wordpress-tools", + "zerostep", + "zerotier-k8s-link", + "zig-ffi", + "zotero-tools" ], - "total_repos": 5 + "total_repos": 301, + "total_scans": 301 } diff --git a/scripts/regen-index.sh b/scripts/regen-index.sh new file mode 100644 index 0000000..3919692 --- /dev/null +++ b/scripts/regen-index.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: PMPL-1.0-or-later +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) +# +# regen-index.sh — rebuild index.json from scans/ on disk. +# +# Deterministic: output depends only on the contents of scans/ and on +# git history (for the last_updated timestamp). Running this script +# twice in a row produces byte-identical output. +# +# Used by: +# * scripts/ingest-scan.sh (after appending a new scan) +# * .github/workflows/index-freshness.yml (CI lane that fails if the +# committed index drifts from what this script produces). +# +# Schema (matches the historical shape): +# { +# "last_updated": "", +# "total_scans": , +# "total_repos": , +# "repos": [ "", … sorted ] +# } +# +# Subdirectories under scans/ (e.g. scans/octads/) are intentionally +# excluded from `repos` / `total_scans`. They are conceptually a +# different space; if a separate index is needed for them, file a +# follow-up issue. + +set -euo pipefail + +SCAN_DIR="${SCAN_DIR:-scans}" +OUT="${OUT:-index.json}" + +cd "$(git rev-parse --show-toplevel)" + +if [[ ! -d "$SCAN_DIR" ]]; then + echo "regen-index: $SCAN_DIR/ not found" >&2 + exit 1 +fi + +# Determine last_updated deterministically: ISO-8601 commit time of the +# latest commit that touched scans/. Falls back to epoch if there is no +# such commit yet (e.g. on a fresh clone before any scan landed). +last_updated="$(git log -1 --format=%cI -- "$SCAN_DIR" 2>/dev/null || true)" +if [[ -z "${last_updated}" ]]; then + last_updated="1970-01-01T00:00:00+00:00" +fi + +# Collect top-level *.json files, strip extension, sort. +mapfile -t repos < <( + find "$SCAN_DIR" -maxdepth 1 -mindepth 1 -type f -name '*.json' -printf '%f\n' \ + | sed -E 's/\.json$//' \ + | LC_ALL=C sort -u +) + +total_scans=${#repos[@]} + +# Build repos JSON array via jq (handles edge cases: empty, special chars). +repos_json="$(printf '%s\n' "${repos[@]:-}" | jq -R -s -c 'split("\n") | map(select(length > 0))')" + +# Compose final JSON. `--sort-keys` keeps output stable. +jq -n --sort-keys \ + --arg last_updated "$last_updated" \ + --argjson total_scans "$total_scans" \ + --argjson total_repos "$total_scans" \ + --argjson repos "$repos_json" \ + '{ + last_updated: $last_updated, + total_scans: $total_scans, + total_repos: $total_repos, + repos: $repos + }' > "$OUT.tmp" + +# Atomic replace. +mv "$OUT.tmp" "$OUT" + +echo "regen-index: wrote $OUT — $total_scans scans, last_updated=$last_updated"