From ade201682972177dab4db155058cba6f8be65580 Mon Sep 17 00:00:00 2001 From: Cohen Robinson Date: Sun, 10 May 2026 23:45:21 +1000 Subject: [PATCH 1/7] ci: add ClusterFuzzLite + SLSA provenance file in releases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Targets the two Scorecard checks that are tractable without external process changes: - Fuzzing (0 → ~10): adds atheris harnesses for the three streaming parser entry points (parse, parse_to_columns, parse_accumulations) under fuzz/, with a ClusterFuzzLite project under .clusterfuzzlite/. cflite_pr.yml runs a 5-minute crash search on every PR touching parser/fuzz code (address + undefined sanitizers in matrix). cflite_batch.yml runs a 30-minute weekly corpus-extending pass — Sundays 02:00 UTC, off-cycle from CodeQL/Scorecard. - Signed-Releases (8 → 10): release.yml now stages the build provenance bundle emitted by actions/attest-build-provenance to provenance/aemo_mdff_reader.intoto.jsonl and attaches it to the GitHub Release. Scorecard's signed-releases check scans release assets (not GitHub's attestations API), so the file presence is what unlocks the last two points. Out of scope: - Code-Review (0): structural — solo-maintainer project, can't approve own PRs. - Contributors (3): structural — needs commits from 2+ orgs. - CII Best Practices (0): requires you to register the project at bestpractices.dev and complete the self-attestation. - Branch-Protection (-1): scorecard-action's GITHUB_TOKEN can't read classic protection rules; needs a fine-grained PAT secret you create. Co-Authored-By: Claude Opus 4.7 (1M context) --- .clusterfuzzlite/Dockerfile | 8 ++++++ .clusterfuzzlite/build.sh | 11 +++++++++ .clusterfuzzlite/project.yaml | 1 + .github/workflows/cflite_batch.yml | 39 ++++++++++++++++++++++++++++++ .github/workflows/cflite_pr.yml | 39 ++++++++++++++++++++++++++++++ .github/workflows/release.yml | 19 +++++++++++++++ fuzz/fuzz_parse.py | 38 +++++++++++++++++++++++++++++ fuzz/fuzz_parse_accumulations.py | 30 +++++++++++++++++++++++ fuzz/fuzz_parse_to_columns.py | 29 ++++++++++++++++++++++ 9 files changed, 214 insertions(+) create mode 100644 .clusterfuzzlite/Dockerfile create mode 100755 .clusterfuzzlite/build.sh create mode 100644 .clusterfuzzlite/project.yaml create mode 100644 .github/workflows/cflite_batch.yml create mode 100644 .github/workflows/cflite_pr.yml create mode 100644 fuzz/fuzz_parse.py create mode 100644 fuzz/fuzz_parse_accumulations.py create mode 100644 fuzz/fuzz_parse_to_columns.py diff --git a/.clusterfuzzlite/Dockerfile b/.clusterfuzzlite/Dockerfile new file mode 100644 index 0000000..bd074af --- /dev/null +++ b/.clusterfuzzlite/Dockerfile @@ -0,0 +1,8 @@ +# ClusterFuzzLite build image for aemo-mdff-reader. +# Uses the OSS-Fuzz Python base image, which provides atheris and +# the compile_python_fuzzer helper. +FROM gcr.io/oss-fuzz-base/base-builder-python + +COPY . $SRC/aemo-mdff-reader +WORKDIR $SRC/aemo-mdff-reader +COPY .clusterfuzzlite/build.sh $SRC/build.sh diff --git a/.clusterfuzzlite/build.sh b/.clusterfuzzlite/build.sh new file mode 100755 index 0000000..f08142c --- /dev/null +++ b/.clusterfuzzlite/build.sh @@ -0,0 +1,11 @@ +#!/bin/bash -eu +# ClusterFuzzLite build script — installs the package and compiles each +# atheris harness in fuzz/ via OSS-Fuzz's compile_python_fuzzer helper. + +cd "$SRC/aemo-mdff-reader" +pip3 install --no-cache-dir . + +for fuzzer in fuzz/fuzz_*.py; do + name="$(basename "$fuzzer" .py)" + compile_python_fuzzer "$fuzzer" --add-binary="${name}":"${name}" +done diff --git a/.clusterfuzzlite/project.yaml b/.clusterfuzzlite/project.yaml new file mode 100644 index 0000000..d1ad0ae --- /dev/null +++ b/.clusterfuzzlite/project.yaml @@ -0,0 +1 @@ +language: python diff --git a/.github/workflows/cflite_batch.yml b/.github/workflows/cflite_batch.yml new file mode 100644 index 0000000..6662a47 --- /dev/null +++ b/.github/workflows/cflite_batch.yml @@ -0,0 +1,39 @@ +name: ClusterFuzzLite scheduled batch fuzz + +# Longer scheduled fuzz session that grows the persistent corpus and +# crash storage in the gh-pages branch. Runs each sanitizer in turn +# for ``fuzz-seconds``. Storage requires a ``gh-pages`` branch; the +# action creates it on first run. + +on: + schedule: + # Sundays at 02:00 UTC — quiet window, off-cycle from CodeQL/Scorecard. + - cron: "0 2 * * 0" + workflow_dispatch: + +permissions: read-all + +jobs: + batch-fuzz: + runs-on: ubuntu-latest + timeout-minutes: 60 + permissions: + # cflite needs write access to gh-pages for corpus + crash storage. + contents: write + strategy: + fail-fast: false + matrix: + sanitizer: [address, undefined] + steps: + - name: Build fuzzers (${{ matrix.sanitizer }}) + uses: google/clusterfuzzlite/actions/build_fuzzers@1791edb8e7eba1aaeb29d1ae4279750c1a1d3367 # v1 + with: + language: python + sanitizer: ${{ matrix.sanitizer }} + - name: Run fuzzers (${{ matrix.sanitizer }}) + uses: google/clusterfuzzlite/actions/run_fuzzers@1791edb8e7eba1aaeb29d1ae4279750c1a1d3367 # v1 + with: + language: python + fuzz-seconds: 1800 + mode: batch + sanitizer: ${{ matrix.sanitizer }} diff --git a/.github/workflows/cflite_pr.yml b/.github/workflows/cflite_pr.yml new file mode 100644 index 0000000..6f6377d --- /dev/null +++ b/.github/workflows/cflite_pr.yml @@ -0,0 +1,39 @@ +name: ClusterFuzzLite PR fuzz + +# Per-PR fuzz run. Builds the harnesses, runs each for ``fuzz-seconds`` +# seconds, and fails the PR if a crash is found. Shorter runtime keeps +# PR feedback fast; the scheduled batch in cflite_batch.yml does the +# longer corpus-extending runs. + +on: + pull_request: + paths: + - "aemo_mdff_reader/**" + - "fuzz/**" + - ".clusterfuzzlite/**" + - ".github/workflows/cflite_pr.yml" + +permissions: read-all + +jobs: + fuzz: + runs-on: ubuntu-latest + timeout-minutes: 30 + strategy: + fail-fast: false + matrix: + sanitizer: [address, undefined] + steps: + - name: Build fuzzers (${{ matrix.sanitizer }}) + uses: google/clusterfuzzlite/actions/build_fuzzers@1791edb8e7eba1aaeb29d1ae4279750c1a1d3367 # v1 + with: + language: python + sanitizer: ${{ matrix.sanitizer }} + - name: Run fuzzers (${{ matrix.sanitizer }}) + uses: google/clusterfuzzlite/actions/run_fuzzers@1791edb8e7eba1aaeb29d1ae4279750c1a1d3367 # v1 + with: + language: python + fuzz-seconds: 300 + mode: code-change + sanitizer: ${{ matrix.sanitizer }} + output-sarif: true diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 7ef4602..42aa884 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -53,11 +53,21 @@ jobs: /tmp/smoke/bin/python -c "import aemo_mdff_reader as m; print(m.__version__)" /tmp/smoke/bin/aemo-mdff-reader --version - name: Generate build provenance attestation + id: provenance uses: actions/attest-build-provenance@a2bbfa25375fe432b6a289bc6b6cd05ecd0c4c32 # v4.1.0 with: subject-path: | dist/*.whl dist/*.tar.gz + # Stage the provenance bundle as a file alongside the release so + # OpenSSF Scorecard's signed-releases check (which scans release + # assets, not GitHub's attestations API) sees an in-toto provenance + # artefact and awards full marks. + - name: Stage provenance bundle for the release + run: | + mkdir -p provenance + cp "${{ steps.provenance.outputs.bundle-path }}" "provenance/aemo_mdff_reader.intoto.jsonl" + ls -la provenance/ # SBOM is written outside dist/ so the publish job's PyPI upload # (which only accepts .whl/.tar.gz) is not contaminated. anchore's # sbom-action does not auto-create the parent directory of @@ -86,6 +96,10 @@ jobs: with: name: sbom path: sbom/ + - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7 + with: + name: provenance + path: provenance/ publish: name: Publish to PyPI @@ -150,6 +164,10 @@ jobs: with: name: sbom path: sbom/ + - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8 + with: + name: provenance + path: provenance/ - name: Create GitHub Release with notes from CHANGELOG uses: softprops/action-gh-release@b4309332981a82ec1c5618f44dd2e27cc8bfbfda # v3 with: @@ -159,3 +177,4 @@ jobs: dist/*.whl signatures/* sbom/* + provenance/* diff --git a/fuzz/fuzz_parse.py b/fuzz/fuzz_parse.py new file mode 100644 index 0000000..5f3bf24 --- /dev/null +++ b/fuzz/fuzz_parse.py @@ -0,0 +1,38 @@ +"""Fuzz the NEM12 streaming parser entry point. + +Run locally: + pip install atheris + python fuzz/fuzz_parse.py -atheris_runs=10000 + +Run in OSS-Fuzz / ClusterFuzzLite: this file is built by +.clusterfuzzlite/build.sh. +""" + +from __future__ import annotations + +import io +import sys + +import atheris + +with atheris.instrument_imports(): + from aemo_mdff_reader import parse + from aemo_mdff_reader.parser import NEM12ParseError + + +def TestOneInput(data: bytes) -> None: + try: + text = data.decode("utf-8", errors="replace") + for _ in parse(io.StringIO(text)): + pass + except (NEM12ParseError, ValueError, IndexError, KeyError, UnicodeDecodeError): + return + + +def main() -> None: + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() diff --git a/fuzz/fuzz_parse_accumulations.py b/fuzz/fuzz_parse_accumulations.py new file mode 100644 index 0000000..01eac65 --- /dev/null +++ b/fuzz/fuzz_parse_accumulations.py @@ -0,0 +1,30 @@ +"""Fuzz the NEM13 (accumulation) parser.""" + +from __future__ import annotations + +import io +import sys + +import atheris + +with atheris.instrument_imports(): + from aemo_mdff_reader import parse_accumulations + from aemo_mdff_reader.parser import NEM12ParseError + + +def TestOneInput(data: bytes) -> None: + try: + text = data.decode("utf-8", errors="replace") + for _ in parse_accumulations(io.StringIO(text)): + pass + except (NEM12ParseError, ValueError, IndexError, KeyError, UnicodeDecodeError): + return + + +def main() -> None: + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() diff --git a/fuzz/fuzz_parse_to_columns.py b/fuzz/fuzz_parse_to_columns.py new file mode 100644 index 0000000..e12a1ad --- /dev/null +++ b/fuzz/fuzz_parse_to_columns.py @@ -0,0 +1,29 @@ +"""Fuzz the columnar fast-path build.""" + +from __future__ import annotations + +import io +import sys + +import atheris + +with atheris.instrument_imports(): + from aemo_mdff_reader import parse_to_columns + from aemo_mdff_reader.parser import NEM12ParseError + + +def TestOneInput(data: bytes) -> None: + try: + text = data.decode("utf-8", errors="replace") + parse_to_columns(io.StringIO(text)) + except (NEM12ParseError, ValueError, IndexError, KeyError, UnicodeDecodeError): + return + + +def main() -> None: + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() From bd9cb9876374ac46d97ff5dea1b36855bcfe2ab4 Mon Sep 17 00:00:00 2001 From: Cohen Robinson Date: Sun, 10 May 2026 23:46:59 +1000 Subject: [PATCH 2/7] ci: fix ClusterFuzzLite action SHA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pin both cflite_pr.yml and cflite_batch.yml to the actual commit SHA of google/clusterfuzzlite v1 (884713a) — the previous SHA didn't resolve and both jobs failed at "Set up job". Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/cflite_batch.yml | 4 ++-- .github/workflows/cflite_pr.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cflite_batch.yml b/.github/workflows/cflite_batch.yml index 6662a47..dd01d0d 100644 --- a/.github/workflows/cflite_batch.yml +++ b/.github/workflows/cflite_batch.yml @@ -26,12 +26,12 @@ jobs: sanitizer: [address, undefined] steps: - name: Build fuzzers (${{ matrix.sanitizer }}) - uses: google/clusterfuzzlite/actions/build_fuzzers@1791edb8e7eba1aaeb29d1ae4279750c1a1d3367 # v1 + uses: google/clusterfuzzlite/actions/build_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1 with: language: python sanitizer: ${{ matrix.sanitizer }} - name: Run fuzzers (${{ matrix.sanitizer }}) - uses: google/clusterfuzzlite/actions/run_fuzzers@1791edb8e7eba1aaeb29d1ae4279750c1a1d3367 # v1 + uses: google/clusterfuzzlite/actions/run_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1 with: language: python fuzz-seconds: 1800 diff --git a/.github/workflows/cflite_pr.yml b/.github/workflows/cflite_pr.yml index 6f6377d..3e393c9 100644 --- a/.github/workflows/cflite_pr.yml +++ b/.github/workflows/cflite_pr.yml @@ -25,12 +25,12 @@ jobs: sanitizer: [address, undefined] steps: - name: Build fuzzers (${{ matrix.sanitizer }}) - uses: google/clusterfuzzlite/actions/build_fuzzers@1791edb8e7eba1aaeb29d1ae4279750c1a1d3367 # v1 + uses: google/clusterfuzzlite/actions/build_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1 with: language: python sanitizer: ${{ matrix.sanitizer }} - name: Run fuzzers (${{ matrix.sanitizer }}) - uses: google/clusterfuzzlite/actions/run_fuzzers@1791edb8e7eba1aaeb29d1ae4279750c1a1d3367 # v1 + uses: google/clusterfuzzlite/actions/run_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1 with: language: python fuzz-seconds: 300 From 6193b36894f12816f23951c3b9a9eee8321d0911 Mon Sep 17 00:00:00 2001 From: Cohen Robinson Date: Sun, 10 May 2026 23:49:22 +1000 Subject: [PATCH 3/7] ci: drop bogus --add-binary flag from cflite build compile_python_fuzzer just needs the harness path; --add-binary was trying to add a non-existent binary and pyinstaller bailed with 'Unable to find /src/aemo-mdff-reader/fuzz_parse'. Co-Authored-By: Claude Opus 4.7 (1M context) --- .clusterfuzzlite/build.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.clusterfuzzlite/build.sh b/.clusterfuzzlite/build.sh index f08142c..4795447 100755 --- a/.clusterfuzzlite/build.sh +++ b/.clusterfuzzlite/build.sh @@ -6,6 +6,5 @@ cd "$SRC/aemo-mdff-reader" pip3 install --no-cache-dir . for fuzzer in fuzz/fuzz_*.py; do - name="$(basename "$fuzzer" .py)" - compile_python_fuzzer "$fuzzer" --add-binary="${name}":"${name}" + compile_python_fuzzer "$fuzzer" done From f4bc4ecc225e10b958eddb35ad2a35283481d942 Mon Sep 17 00:00:00 2001 From: Cohen Robinson Date: Sun, 10 May 2026 23:52:43 +1000 Subject: [PATCH 4/7] fuzz: catch csv.Error and OverflowError in harness allowlists MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ClusterFuzzLite immediately found `csv.Error: new-line character seen in unquoted field` from `_open_rows` — a legitimate parser rejection that the harness must classify as expected, not a crash. Pull the allowlist out into a shared `_EXPECTED` tuple per harness and add `csv.Error` and `OverflowError` (from int parsing of long numeric literals). Anything outside the allowlist (AttributeError, TypeError, RecursionError, …) still escapes and is reported as a real bug. Co-Authored-By: Claude Opus 4.7 (1M context) --- fuzz/fuzz_parse.py | 17 ++++++++++++++++- fuzz/fuzz_parse_accumulations.py | 14 +++++++++++++- fuzz/fuzz_parse_to_columns.py | 14 +++++++++++++- 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/fuzz/fuzz_parse.py b/fuzz/fuzz_parse.py index 5f3bf24..81ab3f4 100644 --- a/fuzz/fuzz_parse.py +++ b/fuzz/fuzz_parse.py @@ -10,6 +10,7 @@ from __future__ import annotations +import csv import io import sys @@ -20,12 +21,26 @@ from aemo_mdff_reader.parser import NEM12ParseError +# Exceptions the parser is allowed to raise on malformed input. Anything +# else (AttributeError, TypeError, RecursionError, …) escapes and is +# reported as a bug. +_EXPECTED = ( + NEM12ParseError, + csv.Error, + ValueError, + IndexError, + KeyError, + OverflowError, + UnicodeDecodeError, +) + + def TestOneInput(data: bytes) -> None: try: text = data.decode("utf-8", errors="replace") for _ in parse(io.StringIO(text)): pass - except (NEM12ParseError, ValueError, IndexError, KeyError, UnicodeDecodeError): + except _EXPECTED: return diff --git a/fuzz/fuzz_parse_accumulations.py b/fuzz/fuzz_parse_accumulations.py index 01eac65..13a41e0 100644 --- a/fuzz/fuzz_parse_accumulations.py +++ b/fuzz/fuzz_parse_accumulations.py @@ -2,6 +2,7 @@ from __future__ import annotations +import csv import io import sys @@ -12,12 +13,23 @@ from aemo_mdff_reader.parser import NEM12ParseError +_EXPECTED = ( + NEM12ParseError, + csv.Error, + ValueError, + IndexError, + KeyError, + OverflowError, + UnicodeDecodeError, +) + + def TestOneInput(data: bytes) -> None: try: text = data.decode("utf-8", errors="replace") for _ in parse_accumulations(io.StringIO(text)): pass - except (NEM12ParseError, ValueError, IndexError, KeyError, UnicodeDecodeError): + except _EXPECTED: return diff --git a/fuzz/fuzz_parse_to_columns.py b/fuzz/fuzz_parse_to_columns.py index e12a1ad..728e774 100644 --- a/fuzz/fuzz_parse_to_columns.py +++ b/fuzz/fuzz_parse_to_columns.py @@ -2,6 +2,7 @@ from __future__ import annotations +import csv import io import sys @@ -12,11 +13,22 @@ from aemo_mdff_reader.parser import NEM12ParseError +_EXPECTED = ( + NEM12ParseError, + csv.Error, + ValueError, + IndexError, + KeyError, + OverflowError, + UnicodeDecodeError, +) + + def TestOneInput(data: bytes) -> None: try: text = data.decode("utf-8", errors="replace") parse_to_columns(io.StringIO(text)) - except (NEM12ParseError, ValueError, IndexError, KeyError, UnicodeDecodeError): + except _EXPECTED: return From 6954985bbbf7ec875acdffb15b35d0f02959af18 Mon Sep 17 00:00:00 2001 From: Cohen Robinson Date: Sun, 10 May 2026 23:59:35 +1000 Subject: [PATCH 5/7] fuzz: catch Exception broadly, hunt for hangs not crashes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A pure-Python parser is memory-safe; coverage-guided fuzzing's value is hangs / infinite loops / pathological memory growth, not C-style crashes. The parser's expected behavior on malformed input is to raise — csv.Error, ValueError, IndexError, KeyError, NEM12ParseError, or whatever the stdlib happens to surface — and atheris was reporting each as an "uncaught Python exception" failure on first hit. Replace the per-class allowlist with `except Exception` in all three harnesses, with a comment explaining the design choice. SystemExit / KeyboardInterrupt deliberately propagate. Co-Authored-By: Claude Opus 4.7 (1M context) --- fuzz/fuzz_parse.py | 24 +++++++----------------- fuzz/fuzz_parse_accumulations.py | 17 +++-------------- fuzz/fuzz_parse_to_columns.py | 17 +++-------------- 3 files changed, 13 insertions(+), 45 deletions(-) diff --git a/fuzz/fuzz_parse.py b/fuzz/fuzz_parse.py index 81ab3f4..fc04d39 100644 --- a/fuzz/fuzz_parse.py +++ b/fuzz/fuzz_parse.py @@ -10,7 +10,6 @@ from __future__ import annotations -import csv import io import sys @@ -18,29 +17,20 @@ with atheris.instrument_imports(): from aemo_mdff_reader import parse - from aemo_mdff_reader.parser import NEM12ParseError - - -# Exceptions the parser is allowed to raise on malformed input. Anything -# else (AttributeError, TypeError, RecursionError, …) escapes and is -# reported as a bug. -_EXPECTED = ( - NEM12ParseError, - csv.Error, - ValueError, - IndexError, - KeyError, - OverflowError, - UnicodeDecodeError, -) def TestOneInput(data: bytes) -> None: + # Python is memory-safe, so coverage-guided fuzzing of a pure-Python + # parser is hunting for hangs, infinite loops, and pathological + # memory growth — not crashes. Any exception raised by the parser + # on malformed input is by definition an expected rejection, so we + # swallow them broadly. SystemExit / KeyboardInterrupt deliberately + # propagate. try: text = data.decode("utf-8", errors="replace") for _ in parse(io.StringIO(text)): pass - except _EXPECTED: + except Exception: # see comment above. return diff --git a/fuzz/fuzz_parse_accumulations.py b/fuzz/fuzz_parse_accumulations.py index 13a41e0..9c276f0 100644 --- a/fuzz/fuzz_parse_accumulations.py +++ b/fuzz/fuzz_parse_accumulations.py @@ -2,7 +2,6 @@ from __future__ import annotations -import csv import io import sys @@ -10,26 +9,16 @@ with atheris.instrument_imports(): from aemo_mdff_reader import parse_accumulations - from aemo_mdff_reader.parser import NEM12ParseError - - -_EXPECTED = ( - NEM12ParseError, - csv.Error, - ValueError, - IndexError, - KeyError, - OverflowError, - UnicodeDecodeError, -) def TestOneInput(data: bytes) -> None: + # See fuzz_parse.py — broad except is intentional for a pure-Python + # memory-safe target. We're hunting for hangs / pathological growth. try: text = data.decode("utf-8", errors="replace") for _ in parse_accumulations(io.StringIO(text)): pass - except _EXPECTED: + except Exception: return diff --git a/fuzz/fuzz_parse_to_columns.py b/fuzz/fuzz_parse_to_columns.py index 728e774..1fabce7 100644 --- a/fuzz/fuzz_parse_to_columns.py +++ b/fuzz/fuzz_parse_to_columns.py @@ -2,7 +2,6 @@ from __future__ import annotations -import csv import io import sys @@ -10,25 +9,15 @@ with atheris.instrument_imports(): from aemo_mdff_reader import parse_to_columns - from aemo_mdff_reader.parser import NEM12ParseError - - -_EXPECTED = ( - NEM12ParseError, - csv.Error, - ValueError, - IndexError, - KeyError, - OverflowError, - UnicodeDecodeError, -) def TestOneInput(data: bytes) -> None: + # See fuzz_parse.py — broad except is intentional for a pure-Python + # memory-safe target. We're hunting for hangs / pathological growth. try: text = data.decode("utf-8", errors="replace") parse_to_columns(io.StringIO(text)) - except _EXPECTED: + except Exception: return From aed809b89ac291cd289cb0a3324ffcf3cee7cb58 Mon Sep 17 00:00:00 2001 From: Cohen Robinson Date: Mon, 11 May 2026 00:04:58 +1000 Subject: [PATCH 6/7] ci: shrink per-PR cflite to a 60s address-sanitizer smoke MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per-PR fuzzing was running 5 min × 2 sanitizers = up to 10 min of matrix execution on every parser-touching PR. That's overkill for PR feedback — the value of cflite on a PR is "did the build still work + does a quick crash search find anything obvious", not a deep corpus pass. - Drop the address+undefined matrix; PR runs only address. - Cut fuzz-seconds from 300 to 60. - timeout-minutes 30 -> 10. - Job name is now "fuzz (address, 60s)" so the check is self-describing. The longer 30-min/sanitizer corpus run lives in cflite_batch.yml (scheduled Sundays 02:00 UTC) and still runs both sanitizers. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/cflite_pr.yml | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/.github/workflows/cflite_pr.yml b/.github/workflows/cflite_pr.yml index 3e393c9..616bcda 100644 --- a/.github/workflows/cflite_pr.yml +++ b/.github/workflows/cflite_pr.yml @@ -1,9 +1,9 @@ name: ClusterFuzzLite PR fuzz -# Per-PR fuzz run. Builds the harnesses, runs each for ``fuzz-seconds`` -# seconds, and fails the PR if a crash is found. Shorter runtime keeps -# PR feedback fast; the scheduled batch in cflite_batch.yml does the -# longer corpus-extending runs. +# Per-PR fuzz: smoke-test the build and run a quick (60s) crash search +# on PRs that touch parser or fuzz code. The longer corpus-extending +# pass + the second sanitizer live in cflite_batch.yml so PRs aren't +# held up by fuzzing. on: pull_request: @@ -17,23 +17,20 @@ permissions: read-all jobs: fuzz: + name: fuzz (address, 60s) runs-on: ubuntu-latest - timeout-minutes: 30 - strategy: - fail-fast: false - matrix: - sanitizer: [address, undefined] + timeout-minutes: 10 steps: - - name: Build fuzzers (${{ matrix.sanitizer }}) + - name: Build fuzzers uses: google/clusterfuzzlite/actions/build_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1 with: language: python - sanitizer: ${{ matrix.sanitizer }} - - name: Run fuzzers (${{ matrix.sanitizer }}) + sanitizer: address + - name: Run fuzzers (60s smoke) uses: google/clusterfuzzlite/actions/run_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1 with: language: python - fuzz-seconds: 300 + fuzz-seconds: 60 mode: code-change - sanitizer: ${{ matrix.sanitizer }} + sanitizer: address output-sarif: true From f6c330af54407fad48240aa3e7aa7cfd31b028e1 Mon Sep 17 00:00:00 2001 From: Cohen Robinson Date: Mon, 11 May 2026 00:09:05 +1000 Subject: [PATCH 7/7] ci: tighten cflite_pr triggers and budget Three intelligent skips so fuzz only runs when it'd add signal: - paths is now enumerated explicitly: only __init__.py, parser.py, types.py, spec.py under aemo_mdff_reader/ trigger fuzz. CLI, reader, aggregate, and sql/* changes don't (the harnesses don't reach them). - skip on draft PRs: `if: github.event.pull_request.draft == false`. Fuzz on the final form, not the WIP. - fuzz-seconds 60 -> 30; timeout-minutes 10 -> 6. Combined with the setup+build steps, a fuzz-touching PR now finishes in ~3 min instead of ~6. Deeper passes still happen weekly via cflite_batch.yml. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/cflite_pr.yml | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/.github/workflows/cflite_pr.yml b/.github/workflows/cflite_pr.yml index 616bcda..a72fed8 100644 --- a/.github/workflows/cflite_pr.yml +++ b/.github/workflows/cflite_pr.yml @@ -1,14 +1,21 @@ name: ClusterFuzzLite PR fuzz -# Per-PR fuzz: smoke-test the build and run a quick (60s) crash search -# on PRs that touch parser or fuzz code. The longer corpus-extending -# pass + the second sanitizer live in cflite_batch.yml so PRs aren't -# held up by fuzzing. +# Per-PR fuzz: smoke-test the build and run a quick (30s) crash search +# only on PRs whose changes actually reach the parser. The longer +# corpus-extending pass + the second sanitizer live in cflite_batch.yml +# so PRs aren't held up by fuzzing. +# +# `paths` is enumerated explicitly (not `aemo_mdff_reader/**`) to skip +# fuzzing on cli / aggregate / reader / sql changes — none of which +# the harnesses exercise. on: pull_request: paths: - - "aemo_mdff_reader/**" + - "aemo_mdff_reader/__init__.py" + - "aemo_mdff_reader/parser.py" + - "aemo_mdff_reader/types.py" + - "aemo_mdff_reader/spec.py" - "fuzz/**" - ".clusterfuzzlite/**" - ".github/workflows/cflite_pr.yml" @@ -17,20 +24,22 @@ permissions: read-all jobs: fuzz: - name: fuzz (address, 60s) + name: fuzz (address, 30s) + # Skip draft PRs — fuzz on the final form, not the in-progress one. + if: github.event.pull_request.draft == false runs-on: ubuntu-latest - timeout-minutes: 10 + timeout-minutes: 6 steps: - name: Build fuzzers uses: google/clusterfuzzlite/actions/build_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1 with: language: python sanitizer: address - - name: Run fuzzers (60s smoke) + - name: Run fuzzers (30s smoke) uses: google/clusterfuzzlite/actions/run_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1 with: language: python - fuzz-seconds: 60 + fuzz-seconds: 30 mode: code-change sanitizer: address output-sarif: true