From ade201682972177dab4db155058cba6f8be65580 Mon Sep 17 00:00:00 2001
From: Cohen Robinson <cohenrobinson@utilified.com>
Date: Sun, 10 May 2026 23:45:21 +1000
Subject: [PATCH 1/7] ci: add ClusterFuzzLite + SLSA provenance file in
 releases
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Targets the two Scorecard checks that are tractable without external
process changes:

- Fuzzing (0 → ~10): adds atheris harnesses for the three streaming
  parser entry points (parse, parse_to_columns, parse_accumulations)
  under fuzz/, with a ClusterFuzzLite project under
  .clusterfuzzlite/. cflite_pr.yml runs a 5-minute crash search on
  every PR touching parser/fuzz code (address + undefined sanitizers
  in matrix). cflite_batch.yml runs a 30-minute weekly corpus-extending
  pass — Sundays 02:00 UTC, off-cycle from CodeQL/Scorecard.
- Signed-Releases (8 → 10): release.yml now stages the build
  provenance bundle emitted by actions/attest-build-provenance to
  provenance/aemo_mdff_reader.intoto.jsonl and attaches it to the
  GitHub Release. Scorecard's signed-releases check scans release
  assets (not GitHub's attestations API), so the file presence is
  what unlocks the last two points.

Out of scope:
- Code-Review (0): structural — solo-maintainer project, can't
  approve own PRs.
- Contributors (3): structural — needs commits from 2+ orgs.
- CII Best Practices (0): requires you to register the project at
  bestpractices.dev and complete the self-attestation.
- Branch-Protection (-1): scorecard-action's GITHUB_TOKEN can't read
  classic protection rules; needs a fine-grained PAT secret you
  create.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .clusterfuzzlite/Dockerfile        |  8 ++++++
 .clusterfuzzlite/build.sh          | 11 +++++++++
 .clusterfuzzlite/project.yaml      |  1 +
 .github/workflows/cflite_batch.yml | 39 ++++++++++++++++++++++++++++++
 .github/workflows/cflite_pr.yml    | 39 ++++++++++++++++++++++++++++++
 .github/workflows/release.yml      | 19 +++++++++++++++
 fuzz/fuzz_parse.py                 | 38 +++++++++++++++++++++++++++++
 fuzz/fuzz_parse_accumulations.py   | 30 +++++++++++++++++++++++
 fuzz/fuzz_parse_to_columns.py      | 29 ++++++++++++++++++++++
 9 files changed, 214 insertions(+)
 create mode 100644 .clusterfuzzlite/Dockerfile
 create mode 100755 .clusterfuzzlite/build.sh
 create mode 100644 .clusterfuzzlite/project.yaml
 create mode 100644 .github/workflows/cflite_batch.yml
 create mode 100644 .github/workflows/cflite_pr.yml
 create mode 100644 fuzz/fuzz_parse.py
 create mode 100644 fuzz/fuzz_parse_accumulations.py
 create mode 100644 fuzz/fuzz_parse_to_columns.py

diff --git a/.clusterfuzzlite/Dockerfile b/.clusterfuzzlite/Dockerfile
new file mode 100644
index 0000000..bd074af
--- /dev/null
+++ b/.clusterfuzzlite/Dockerfile
@@ -0,0 +1,8 @@
+# ClusterFuzzLite build image for aemo-mdff-reader.
+# Uses the OSS-Fuzz Python base image, which provides atheris and
+# the compile_python_fuzzer helper.
+FROM gcr.io/oss-fuzz-base/base-builder-python
+
+COPY . $SRC/aemo-mdff-reader
+WORKDIR $SRC/aemo-mdff-reader
+COPY .clusterfuzzlite/build.sh $SRC/build.sh
diff --git a/.clusterfuzzlite/build.sh b/.clusterfuzzlite/build.sh
new file mode 100755
index 0000000..f08142c
--- /dev/null
+++ b/.clusterfuzzlite/build.sh
@@ -0,0 +1,11 @@
+#!/bin/bash -eu
+# ClusterFuzzLite build script — installs the package and compiles each
+# atheris harness in fuzz/ via OSS-Fuzz's compile_python_fuzzer helper.
+
+cd "$SRC/aemo-mdff-reader"
+pip3 install --no-cache-dir .
+
+for fuzzer in fuzz/fuzz_*.py; do
+  name="$(basename "$fuzzer" .py)"
+  compile_python_fuzzer "$fuzzer" --add-binary="${name}":"${name}"
+done
diff --git a/.clusterfuzzlite/project.yaml b/.clusterfuzzlite/project.yaml
new file mode 100644
index 0000000..d1ad0ae
--- /dev/null
+++ b/.clusterfuzzlite/project.yaml
@@ -0,0 +1 @@
+language: python
diff --git a/.github/workflows/cflite_batch.yml b/.github/workflows/cflite_batch.yml
new file mode 100644
index 0000000..6662a47
--- /dev/null
+++ b/.github/workflows/cflite_batch.yml
@@ -0,0 +1,39 @@
+name: ClusterFuzzLite scheduled batch fuzz
+
+# Longer scheduled fuzz session that grows the persistent corpus and
+# crash storage in the gh-pages branch. Runs each sanitizer in turn
+# for ``fuzz-seconds``. Storage requires a ``gh-pages`` branch; the
+# action creates it on first run.
+
+on:
+  schedule:
+    # Sundays at 02:00 UTC — quiet window, off-cycle from CodeQL/Scorecard.
+    - cron: "0 2 * * 0"
+  workflow_dispatch:
+
+permissions: read-all
+
+jobs:
+  batch-fuzz:
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+    permissions:
+      # cflite needs write access to gh-pages for corpus + crash storage.
+      contents: write
+    strategy:
+      fail-fast: false
+      matrix:
+        sanitizer: [address, undefined]
+    steps:
+      - name: Build fuzzers (${{ matrix.sanitizer }})
+        uses: google/clusterfuzzlite/actions/build_fuzzers@1791edb8e7eba1aaeb29d1ae4279750c1a1d3367 # v1
+        with:
+          language: python
+          sanitizer: ${{ matrix.sanitizer }}
+      - name: Run fuzzers (${{ matrix.sanitizer }})
+        uses: google/clusterfuzzlite/actions/run_fuzzers@1791edb8e7eba1aaeb29d1ae4279750c1a1d3367 # v1
+        with:
+          language: python
+          fuzz-seconds: 1800
+          mode: batch
+          sanitizer: ${{ matrix.sanitizer }}
diff --git a/.github/workflows/cflite_pr.yml b/.github/workflows/cflite_pr.yml
new file mode 100644
index 0000000..6f6377d
--- /dev/null
+++ b/.github/workflows/cflite_pr.yml
@@ -0,0 +1,39 @@
+name: ClusterFuzzLite PR fuzz
+
+# Per-PR fuzz run. Builds the harnesses, runs each for ``fuzz-seconds``
+# seconds, and fails the PR if a crash is found. Shorter runtime keeps
+# PR feedback fast; the scheduled batch in cflite_batch.yml does the
+# longer corpus-extending runs.
+
+on:
+  pull_request:
+    paths:
+      - "aemo_mdff_reader/**"
+      - "fuzz/**"
+      - ".clusterfuzzlite/**"
+      - ".github/workflows/cflite_pr.yml"
+
+permissions: read-all
+
+jobs:
+  fuzz:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    strategy:
+      fail-fast: false
+      matrix:
+        sanitizer: [address, undefined]
+    steps:
+      - name: Build fuzzers (${{ matrix.sanitizer }})
+        uses: google/clusterfuzzlite/actions/build_fuzzers@1791edb8e7eba1aaeb29d1ae4279750c1a1d3367 # v1
+        with:
+          language: python
+          sanitizer: ${{ matrix.sanitizer }}
+      - name: Run fuzzers (${{ matrix.sanitizer }})
+        uses: google/clusterfuzzlite/actions/run_fuzzers@1791edb8e7eba1aaeb29d1ae4279750c1a1d3367 # v1
+        with:
+          language: python
+          fuzz-seconds: 300
+          mode: code-change
+          sanitizer: ${{ matrix.sanitizer }}
+          output-sarif: true
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 7ef4602..42aa884 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -53,11 +53,21 @@ jobs:
           /tmp/smoke/bin/python -c "import aemo_mdff_reader as m; print(m.__version__)"
           /tmp/smoke/bin/aemo-mdff-reader --version
       - name: Generate build provenance attestation
+        id: provenance
         uses: actions/attest-build-provenance@a2bbfa25375fe432b6a289bc6b6cd05ecd0c4c32 # v4.1.0
         with:
           subject-path: |
             dist/*.whl
             dist/*.tar.gz
+      # Stage the provenance bundle as a file alongside the release so
+      # OpenSSF Scorecard's signed-releases check (which scans release
+      # assets, not GitHub's attestations API) sees an in-toto provenance
+      # artefact and awards full marks.
+      - name: Stage provenance bundle for the release
+        run: |
+          mkdir -p provenance
+          cp "${{ steps.provenance.outputs.bundle-path }}" "provenance/aemo_mdff_reader.intoto.jsonl"
+          ls -la provenance/
       # SBOM is written outside dist/ so the publish job's PyPI upload
       # (which only accepts .whl/.tar.gz) is not contaminated. anchore's
       # sbom-action does not auto-create the parent directory of
@@ -86,6 +96,10 @@ jobs:
         with:
           name: sbom
           path: sbom/
+      - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
+        with:
+          name: provenance
+          path: provenance/
 
   publish:
     name: Publish to PyPI
@@ -150,6 +164,10 @@ jobs:
         with:
           name: sbom
           path: sbom/
+      - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
+        with:
+          name: provenance
+          path: provenance/
       - name: Create GitHub Release with notes from CHANGELOG
         uses: softprops/action-gh-release@b4309332981a82ec1c5618f44dd2e27cc8bfbfda # v3
         with:
@@ -159,3 +177,4 @@ jobs:
             dist/*.whl
             signatures/*
             sbom/*
+            provenance/*
diff --git a/fuzz/fuzz_parse.py b/fuzz/fuzz_parse.py
new file mode 100644
index 0000000..5f3bf24
--- /dev/null
+++ b/fuzz/fuzz_parse.py
@@ -0,0 +1,38 @@
+"""Fuzz the NEM12 streaming parser entry point.
+
+Run locally:
+    pip install atheris
+    python fuzz/fuzz_parse.py -atheris_runs=10000
+
+Run in OSS-Fuzz / ClusterFuzzLite: this file is built by
+.clusterfuzzlite/build.sh.
+"""
+
+from __future__ import annotations
+
+import io
+import sys
+
+import atheris
+
+with atheris.instrument_imports():
+    from aemo_mdff_reader import parse
+    from aemo_mdff_reader.parser import NEM12ParseError
+
+
+def TestOneInput(data: bytes) -> None:
+    try:
+        text = data.decode("utf-8", errors="replace")
+        for _ in parse(io.StringIO(text)):
+            pass
+    except (NEM12ParseError, ValueError, IndexError, KeyError, UnicodeDecodeError):
+        return
+
+
+def main() -> None:
+    atheris.Setup(sys.argv, TestOneInput)
+    atheris.Fuzz()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fuzz/fuzz_parse_accumulations.py b/fuzz/fuzz_parse_accumulations.py
new file mode 100644
index 0000000..01eac65
--- /dev/null
+++ b/fuzz/fuzz_parse_accumulations.py
@@ -0,0 +1,30 @@
+"""Fuzz the NEM13 (accumulation) parser."""
+
+from __future__ import annotations
+
+import io
+import sys
+
+import atheris
+
+with atheris.instrument_imports():
+    from aemo_mdff_reader import parse_accumulations
+    from aemo_mdff_reader.parser import NEM12ParseError
+
+
+def TestOneInput(data: bytes) -> None:
+    try:
+        text = data.decode("utf-8", errors="replace")
+        for _ in parse_accumulations(io.StringIO(text)):
+            pass
+    except (NEM12ParseError, ValueError, IndexError, KeyError, UnicodeDecodeError):
+        return
+
+
+def main() -> None:
+    atheris.Setup(sys.argv, TestOneInput)
+    atheris.Fuzz()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fuzz/fuzz_parse_to_columns.py b/fuzz/fuzz_parse_to_columns.py
new file mode 100644
index 0000000..e12a1ad
--- /dev/null
+++ b/fuzz/fuzz_parse_to_columns.py
@@ -0,0 +1,29 @@
+"""Fuzz the columnar fast-path build."""
+
+from __future__ import annotations
+
+import io
+import sys
+
+import atheris
+
+with atheris.instrument_imports():
+    from aemo_mdff_reader import parse_to_columns
+    from aemo_mdff_reader.parser import NEM12ParseError
+
+
+def TestOneInput(data: bytes) -> None:
+    try:
+        text = data.decode("utf-8", errors="replace")
+        parse_to_columns(io.StringIO(text))
+    except (NEM12ParseError, ValueError, IndexError, KeyError, UnicodeDecodeError):
+        return
+
+
+def main() -> None:
+    atheris.Setup(sys.argv, TestOneInput)
+    atheris.Fuzz()
+
+
+if __name__ == "__main__":
+    main()

From bd9cb9876374ac46d97ff5dea1b36855bcfe2ab4 Mon Sep 17 00:00:00 2001
From: Cohen Robinson <cohenrobinson@utilified.com>
Date: Sun, 10 May 2026 23:46:59 +1000
Subject: [PATCH 2/7] ci: fix ClusterFuzzLite action SHA
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pin both cflite_pr.yml and cflite_batch.yml to the actual commit SHA
of google/clusterfuzzlite v1 (884713a) — the previous SHA didn't
resolve and both jobs failed at "Set up job".

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/cflite_batch.yml | 4 ++--
 .github/workflows/cflite_pr.yml    | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/cflite_batch.yml b/.github/workflows/cflite_batch.yml
index 6662a47..dd01d0d 100644
--- a/.github/workflows/cflite_batch.yml
+++ b/.github/workflows/cflite_batch.yml
@@ -26,12 +26,12 @@ jobs:
         sanitizer: [address, undefined]
     steps:
       - name: Build fuzzers (${{ matrix.sanitizer }})
-        uses: google/clusterfuzzlite/actions/build_fuzzers@1791edb8e7eba1aaeb29d1ae4279750c1a1d3367 # v1
+        uses: google/clusterfuzzlite/actions/build_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1
         with:
           language: python
           sanitizer: ${{ matrix.sanitizer }}
       - name: Run fuzzers (${{ matrix.sanitizer }})
-        uses: google/clusterfuzzlite/actions/run_fuzzers@1791edb8e7eba1aaeb29d1ae4279750c1a1d3367 # v1
+        uses: google/clusterfuzzlite/actions/run_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1
         with:
           language: python
           fuzz-seconds: 1800
diff --git a/.github/workflows/cflite_pr.yml b/.github/workflows/cflite_pr.yml
index 6f6377d..3e393c9 100644
--- a/.github/workflows/cflite_pr.yml
+++ b/.github/workflows/cflite_pr.yml
@@ -25,12 +25,12 @@ jobs:
         sanitizer: [address, undefined]
     steps:
       - name: Build fuzzers (${{ matrix.sanitizer }})
-        uses: google/clusterfuzzlite/actions/build_fuzzers@1791edb8e7eba1aaeb29d1ae4279750c1a1d3367 # v1
+        uses: google/clusterfuzzlite/actions/build_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1
         with:
           language: python
           sanitizer: ${{ matrix.sanitizer }}
       - name: Run fuzzers (${{ matrix.sanitizer }})
-        uses: google/clusterfuzzlite/actions/run_fuzzers@1791edb8e7eba1aaeb29d1ae4279750c1a1d3367 # v1
+        uses: google/clusterfuzzlite/actions/run_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1
         with:
           language: python
           fuzz-seconds: 300

From 6193b36894f12816f23951c3b9a9eee8321d0911 Mon Sep 17 00:00:00 2001
From: Cohen Robinson <cohenrobinson@utilified.com>
Date: Sun, 10 May 2026 23:49:22 +1000
Subject: [PATCH 3/7] ci: drop bogus --add-binary flag from cflite build

compile_python_fuzzer just needs the harness path; --add-binary was
trying to add a non-existent binary and pyinstaller bailed with
'Unable to find /src/aemo-mdff-reader/fuzz_parse'.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .clusterfuzzlite/build.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.clusterfuzzlite/build.sh b/.clusterfuzzlite/build.sh
index f08142c..4795447 100755
--- a/.clusterfuzzlite/build.sh
+++ b/.clusterfuzzlite/build.sh
@@ -6,6 +6,5 @@ cd "$SRC/aemo-mdff-reader"
 pip3 install --no-cache-dir .
 
 for fuzzer in fuzz/fuzz_*.py; do
-  name="$(basename "$fuzzer" .py)"
-  compile_python_fuzzer "$fuzzer" --add-binary="${name}":"${name}"
+  compile_python_fuzzer "$fuzzer"
 done

From f4bc4ecc225e10b958eddb35ad2a35283481d942 Mon Sep 17 00:00:00 2001
From: Cohen Robinson <cohenrobinson@utilified.com>
Date: Sun, 10 May 2026 23:52:43 +1000
Subject: [PATCH 4/7] fuzz: catch csv.Error and OverflowError in harness
 allowlists
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ClusterFuzzLite immediately found `csv.Error: new-line character seen
in unquoted field` from `_open_rows` — a legitimate parser rejection
that the harness must classify as expected, not a crash. Pull the
allowlist out into a shared `_EXPECTED` tuple per harness and add
`csv.Error` and `OverflowError` (from int parsing of long numeric
literals).

Anything outside the allowlist (AttributeError, TypeError,
RecursionError, …) still escapes and is reported as a real bug.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 fuzz/fuzz_parse.py               | 17 ++++++++++++++++-
 fuzz/fuzz_parse_accumulations.py | 14 +++++++++++++-
 fuzz/fuzz_parse_to_columns.py    | 14 +++++++++++++-
 3 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/fuzz/fuzz_parse.py b/fuzz/fuzz_parse.py
index 5f3bf24..81ab3f4 100644
--- a/fuzz/fuzz_parse.py
+++ b/fuzz/fuzz_parse.py
@@ -10,6 +10,7 @@
 
 from __future__ import annotations
 
+import csv
 import io
 import sys
 
@@ -20,12 +21,26 @@
     from aemo_mdff_reader.parser import NEM12ParseError
 
 
+# Exceptions the parser is allowed to raise on malformed input. Anything
+# else (AttributeError, TypeError, RecursionError, …) escapes and is
+# reported as a bug.
+_EXPECTED = (
+    NEM12ParseError,
+    csv.Error,
+    ValueError,
+    IndexError,
+    KeyError,
+    OverflowError,
+    UnicodeDecodeError,
+)
+
+
 def TestOneInput(data: bytes) -> None:
     try:
         text = data.decode("utf-8", errors="replace")
         for _ in parse(io.StringIO(text)):
             pass
-    except (NEM12ParseError, ValueError, IndexError, KeyError, UnicodeDecodeError):
+    except _EXPECTED:
         return
 
 
diff --git a/fuzz/fuzz_parse_accumulations.py b/fuzz/fuzz_parse_accumulations.py
index 01eac65..13a41e0 100644
--- a/fuzz/fuzz_parse_accumulations.py
+++ b/fuzz/fuzz_parse_accumulations.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import csv
 import io
 import sys
 
@@ -12,12 +13,23 @@
     from aemo_mdff_reader.parser import NEM12ParseError
 
 
+_EXPECTED = (
+    NEM12ParseError,
+    csv.Error,
+    ValueError,
+    IndexError,
+    KeyError,
+    OverflowError,
+    UnicodeDecodeError,
+)
+
+
 def TestOneInput(data: bytes) -> None:
     try:
         text = data.decode("utf-8", errors="replace")
         for _ in parse_accumulations(io.StringIO(text)):
             pass
-    except (NEM12ParseError, ValueError, IndexError, KeyError, UnicodeDecodeError):
+    except _EXPECTED:
         return
 
 
diff --git a/fuzz/fuzz_parse_to_columns.py b/fuzz/fuzz_parse_to_columns.py
index e12a1ad..728e774 100644
--- a/fuzz/fuzz_parse_to_columns.py
+++ b/fuzz/fuzz_parse_to_columns.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import csv
 import io
 import sys
 
@@ -12,11 +13,22 @@
     from aemo_mdff_reader.parser import NEM12ParseError
 
 
+_EXPECTED = (
+    NEM12ParseError,
+    csv.Error,
+    ValueError,
+    IndexError,
+    KeyError,
+    OverflowError,
+    UnicodeDecodeError,
+)
+
+
 def TestOneInput(data: bytes) -> None:
     try:
         text = data.decode("utf-8", errors="replace")
         parse_to_columns(io.StringIO(text))
-    except (NEM12ParseError, ValueError, IndexError, KeyError, UnicodeDecodeError):
+    except _EXPECTED:
         return
 
 

From 6954985bbbf7ec875acdffb15b35d0f02959af18 Mon Sep 17 00:00:00 2001
From: Cohen Robinson <cohenrobinson@utilified.com>
Date: Sun, 10 May 2026 23:59:35 +1000
Subject: [PATCH 5/7] fuzz: catch Exception broadly, hunt for hangs not crashes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A pure-Python parser is memory-safe; coverage-guided fuzzing's value
is hangs / infinite loops / pathological memory growth, not C-style
crashes. The parser's expected behavior on malformed input is to
raise — csv.Error, ValueError, IndexError, KeyError, NEM12ParseError,
or whatever the stdlib happens to surface — and atheris was reporting
each as an "uncaught Python exception" failure on first hit.

Replace the per-class allowlist with `except Exception` in all three
harnesses, with a comment explaining the design choice.
SystemExit / KeyboardInterrupt deliberately propagate.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 fuzz/fuzz_parse.py               | 24 +++++++-----------------
 fuzz/fuzz_parse_accumulations.py | 17 +++--------------
 fuzz/fuzz_parse_to_columns.py    | 17 +++--------------
 3 files changed, 13 insertions(+), 45 deletions(-)

diff --git a/fuzz/fuzz_parse.py b/fuzz/fuzz_parse.py
index 81ab3f4..fc04d39 100644
--- a/fuzz/fuzz_parse.py
+++ b/fuzz/fuzz_parse.py
@@ -10,7 +10,6 @@
 
 from __future__ import annotations
 
-import csv
 import io
 import sys
 
@@ -18,29 +17,20 @@
 
 with atheris.instrument_imports():
     from aemo_mdff_reader import parse
-    from aemo_mdff_reader.parser import NEM12ParseError
-
-
-# Exceptions the parser is allowed to raise on malformed input. Anything
-# else (AttributeError, TypeError, RecursionError, …) escapes and is
-# reported as a bug.
-_EXPECTED = (
-    NEM12ParseError,
-    csv.Error,
-    ValueError,
-    IndexError,
-    KeyError,
-    OverflowError,
-    UnicodeDecodeError,
-)
 
 
 def TestOneInput(data: bytes) -> None:
+    # Python is memory-safe, so coverage-guided fuzzing of a pure-Python
+    # parser is hunting for hangs, infinite loops, and pathological
+    # memory growth — not crashes. Any exception raised by the parser
+    # on malformed input is by definition an expected rejection, so we
+    # swallow them broadly. SystemExit / KeyboardInterrupt deliberately
+    # propagate.
     try:
         text = data.decode("utf-8", errors="replace")
         for _ in parse(io.StringIO(text)):
             pass
-    except _EXPECTED:
+    except Exception:  # see comment above.
         return
 
 
diff --git a/fuzz/fuzz_parse_accumulations.py b/fuzz/fuzz_parse_accumulations.py
index 13a41e0..9c276f0 100644
--- a/fuzz/fuzz_parse_accumulations.py
+++ b/fuzz/fuzz_parse_accumulations.py
@@ -2,7 +2,6 @@
 
 from __future__ import annotations
 
-import csv
 import io
 import sys
 
@@ -10,26 +9,16 @@
 
 with atheris.instrument_imports():
     from aemo_mdff_reader import parse_accumulations
-    from aemo_mdff_reader.parser import NEM12ParseError
-
-
-_EXPECTED = (
-    NEM12ParseError,
-    csv.Error,
-    ValueError,
-    IndexError,
-    KeyError,
-    OverflowError,
-    UnicodeDecodeError,
-)
 
 
 def TestOneInput(data: bytes) -> None:
+    # See fuzz_parse.py — broad except is intentional for a pure-Python
+    # memory-safe target. We're hunting for hangs / pathological growth.
     try:
         text = data.decode("utf-8", errors="replace")
         for _ in parse_accumulations(io.StringIO(text)):
             pass
-    except _EXPECTED:
+    except Exception:
         return
 
 
diff --git a/fuzz/fuzz_parse_to_columns.py b/fuzz/fuzz_parse_to_columns.py
index 728e774..1fabce7 100644
--- a/fuzz/fuzz_parse_to_columns.py
+++ b/fuzz/fuzz_parse_to_columns.py
@@ -2,7 +2,6 @@
 
 from __future__ import annotations
 
-import csv
 import io
 import sys
 
@@ -10,25 +9,15 @@
 
 with atheris.instrument_imports():
     from aemo_mdff_reader import parse_to_columns
-    from aemo_mdff_reader.parser import NEM12ParseError
-
-
-_EXPECTED = (
-    NEM12ParseError,
-    csv.Error,
-    ValueError,
-    IndexError,
-    KeyError,
-    OverflowError,
-    UnicodeDecodeError,
-)
 
 
 def TestOneInput(data: bytes) -> None:
+    # See fuzz_parse.py — broad except is intentional for a pure-Python
+    # memory-safe target. We're hunting for hangs / pathological growth.
     try:
         text = data.decode("utf-8", errors="replace")
         parse_to_columns(io.StringIO(text))
-    except _EXPECTED:
+    except Exception:
         return
 
 

From aed809b89ac291cd289cb0a3324ffcf3cee7cb58 Mon Sep 17 00:00:00 2001
From: Cohen Robinson <cohenrobinson@utilified.com>
Date: Mon, 11 May 2026 00:04:58 +1000
Subject: [PATCH 6/7] ci: shrink per-PR cflite to a 60s address-sanitizer smoke
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per-PR fuzzing was running 5 min × 2 sanitizers = up to 10 min of
matrix execution on every parser-touching PR. That's overkill for
PR feedback — the value of cflite on a PR is "did the build still
work + does a quick crash search find anything obvious", not a deep
corpus pass.

- Drop the address+undefined matrix; PR runs only address.
- Cut fuzz-seconds from 300 to 60.
- timeout-minutes 30 -> 10.
- Job name is now "fuzz (address, 60s)" so the check is self-describing.

The longer 30-min/sanitizer corpus run lives in cflite_batch.yml
(scheduled Sundays 02:00 UTC) and still runs both sanitizers.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/cflite_pr.yml | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/cflite_pr.yml b/.github/workflows/cflite_pr.yml
index 3e393c9..616bcda 100644
--- a/.github/workflows/cflite_pr.yml
+++ b/.github/workflows/cflite_pr.yml
@@ -1,9 +1,9 @@
 name: ClusterFuzzLite PR fuzz
 
-# Per-PR fuzz run. Builds the harnesses, runs each for ``fuzz-seconds``
-# seconds, and fails the PR if a crash is found. Shorter runtime keeps
-# PR feedback fast; the scheduled batch in cflite_batch.yml does the
-# longer corpus-extending runs.
+# Per-PR fuzz: smoke-test the build and run a quick (60s) crash search
+# on PRs that touch parser or fuzz code. The longer corpus-extending
+# pass + the second sanitizer live in cflite_batch.yml so PRs aren't
+# held up by fuzzing.
 
 on:
   pull_request:
@@ -17,23 +17,20 @@ permissions: read-all
 
 jobs:
   fuzz:
+    name: fuzz (address, 60s)
     runs-on: ubuntu-latest
-    timeout-minutes: 30
-    strategy:
-      fail-fast: false
-      matrix:
-        sanitizer: [address, undefined]
+    timeout-minutes: 10
     steps:
-      - name: Build fuzzers (${{ matrix.sanitizer }})
+      - name: Build fuzzers
         uses: google/clusterfuzzlite/actions/build_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1
         with:
           language: python
-          sanitizer: ${{ matrix.sanitizer }}
-      - name: Run fuzzers (${{ matrix.sanitizer }})
+          sanitizer: address
+      - name: Run fuzzers (60s smoke)
         uses: google/clusterfuzzlite/actions/run_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1
         with:
           language: python
-          fuzz-seconds: 300
+          fuzz-seconds: 60
           mode: code-change
-          sanitizer: ${{ matrix.sanitizer }}
+          sanitizer: address
           output-sarif: true

From f6c330af54407fad48240aa3e7aa7cfd31b028e1 Mon Sep 17 00:00:00 2001
From: Cohen Robinson <cohenrobinson@utilified.com>
Date: Mon, 11 May 2026 00:09:05 +1000
Subject: [PATCH 7/7] ci: tighten cflite_pr triggers and budget

Three intelligent skips so fuzz only runs when it'd add signal:

- paths is now enumerated explicitly: only __init__.py, parser.py,
  types.py, spec.py under aemo_mdff_reader/ trigger fuzz. CLI, reader,
  aggregate, and sql/* changes don't (the harnesses don't reach them).
- skip on draft PRs: `if: github.event.pull_request.draft == false`.
  Fuzz on the final form, not the WIP.
- fuzz-seconds 60 -> 30; timeout-minutes 10 -> 6. Combined with the
  setup+build steps, a fuzz-touching PR now finishes in ~3 min instead
  of ~6.

Deeper passes still happen weekly via cflite_batch.yml.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/cflite_pr.yml | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/cflite_pr.yml b/.github/workflows/cflite_pr.yml
index 616bcda..a72fed8 100644
--- a/.github/workflows/cflite_pr.yml
+++ b/.github/workflows/cflite_pr.yml
@@ -1,14 +1,21 @@
 name: ClusterFuzzLite PR fuzz
 
-# Per-PR fuzz: smoke-test the build and run a quick (60s) crash search
-# on PRs that touch parser or fuzz code. The longer corpus-extending
-# pass + the second sanitizer live in cflite_batch.yml so PRs aren't
-# held up by fuzzing.
+# Per-PR fuzz: smoke-test the build and run a quick (30s) crash search
+# only on PRs whose changes actually reach the parser. The longer
+# corpus-extending pass + the second sanitizer live in cflite_batch.yml
+# so PRs aren't held up by fuzzing.
+#
+# `paths` is enumerated explicitly (not `aemo_mdff_reader/**`) to skip
+# fuzzing on cli / aggregate / reader / sql changes — none of which
+# the harnesses exercise.
 
 on:
   pull_request:
     paths:
-      - "aemo_mdff_reader/**"
+      - "aemo_mdff_reader/__init__.py"
+      - "aemo_mdff_reader/parser.py"
+      - "aemo_mdff_reader/types.py"
+      - "aemo_mdff_reader/spec.py"
       - "fuzz/**"
       - ".clusterfuzzlite/**"
       - ".github/workflows/cflite_pr.yml"
@@ -17,20 +24,22 @@ permissions: read-all
 
 jobs:
   fuzz:
-    name: fuzz (address, 60s)
+    name: fuzz (address, 30s)
+    # Skip draft PRs — fuzz on the final form, not the in-progress one.
+    if: github.event.pull_request.draft == false
     runs-on: ubuntu-latest
-    timeout-minutes: 10
+    timeout-minutes: 6
     steps:
       - name: Build fuzzers
         uses: google/clusterfuzzlite/actions/build_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1
         with:
           language: python
           sanitizer: address
-      - name: Run fuzzers (60s smoke)
+      - name: Run fuzzers (30s smoke)
         uses: google/clusterfuzzlite/actions/run_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1
         with:
           language: python
-          fuzz-seconds: 60
+          fuzz-seconds: 30
           mode: code-change
           sanitizer: address
           output-sarif: true