From fc9f80f31ff320c144b05040879141d92b8a0075 Mon Sep 17 00:00:00 2001 From: harryswift01 Date: Thu, 7 May 2026 11:52:02 +0100 Subject: [PATCH 1/6] ci: disable regression cache and serialize tests for flake investigation --- .github/workflows/pr.yaml | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 96aaa0c..b20327a 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -7,6 +7,13 @@ concurrency: group: pr-${{ github.ref }} cancel-in-progress: true +env: + OMP_NUM_THREADS: 1 + MKL_NUM_THREADS: 1 + OPENBLAS_NUM_THREADS: 1 + NUMEXPR_NUM_THREADS: 1 + PYTHONHASHSEED: 0 + jobs: unit: name: Unit @@ -34,7 +41,7 @@ jobs: python -m pip install -e .[testing] - name: Pytest (unit) • ${{ matrix.os }}, ${{ matrix.python-version }} - run: python -m pytest tests/unit + run: python -m pytest tests/unit --cache-clear discover-systems: name: Discover regression systems @@ -84,23 +91,34 @@ jobs: python-version: "3.14" cache: pip - - name: Cache testdata - uses: actions/cache@v4 - with: - path: .testdata - key: codeentropy-testdata-${{ runner.os }}-py314 + # Intentionally disabled while debugging CI-only regression flakes. + # The old key was static and could restore stale generated test data: + # codeentropy-testdata-${{ runner.os }}-py314 + # + # - name: Cache testdata + # uses: actions/cache@v4 + # with: + # path: .testdata + # key: codeentropy-testdata-${{ runner.os }}-py314 - name: Install testing dependencies run: | python -m pip install --upgrade pip python -m pip install -e .[testing] + - name: Clean local test caches + shell: bash + run: | + rm -rf .pytest_cache + rm -rf .testdata + find . -type d -name "__pycache__" -prune -exec rm -rf {} + + - name: Run fast regression tests (per system) run: | python -m pytest tests/regression \ -m "not slow" \ - -n auto \ - --dist=loadscope \ + --cache-clear \ + -n 1 \ -k "${{ matrix.system }}" \ -vv \ --durations=20 @@ -200,6 +218,7 @@ jobs: - name: Run coverage run: | pytest tests/unit \ + --cache-clear \ --cov CodeEntropy \ --cov-report term-missing \ --cov-report xml \ From e068296d27a9e5a97dce5ce2348383abace7bf16 Mon Sep 17 00:00:00 2001 From: harryswift01 Date: Thu, 7 May 2026 12:13:31 +0100 Subject: [PATCH 2/6] ci: add regression testdata integrity checks to CI debugging workflow --- .github/workflows/pr.yaml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index b20327a..3bfe849 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -113,6 +113,15 @@ jobs: rm -rf .testdata find . -type d -name "__pycache__" -prune -exec rm -rf {} + + - name: Snapshot regression test data before tests + shell: bash + run: | + if [ -d .testdata ]; then + find .testdata -type f -exec sha256sum {} \; | sort > testdata.before.sha256 + else + touch testdata.before.sha256 + fi + - name: Run fast regression tests (per system) run: | python -m pytest tests/regression \ @@ -123,6 +132,22 @@ jobs: -vv \ --durations=20 + - name: Snapshot regression test data after tests + if: always() + shell: bash + run: | + if [ -d .testdata ]; then + find .testdata -type f -exec sha256sum {} \; | sort > testdata.after.sha256 + else + touch testdata.after.sha256 + fi + + - name: Check regression test data did not change + if: always() + shell: bash + run: | + diff -u testdata.before.sha256 testdata.after.sha256 + - name: Upload artifacts (failure) if: failure() uses: actions/upload-artifact@v4 @@ -130,6 +155,8 @@ jobs: name: quick-regression-failure-${{ matrix.system }} path: | .testdata/** + testdata.before.sha256 + testdata.after.sha256 /tmp/pytest-of-*/pytest-*/** docs: From 792158f187778146f2da711a7fa590a819614c96 Mon Sep 17 00:00:00 2001 From: harryswift01 Date: Thu, 7 May 2026 13:50:48 +0100 Subject: [PATCH 3/6] ci: prepare regression datasets before test execution and verify immutability --- .github/workflows/pr.yaml | 28 ++++++++------- tests/regression/conftest.py | 3 -- tests/regression/prepare_testdata.py | 54 ++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 16 deletions(-) create mode 100644 tests/regression/prepare_testdata.py diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 3bfe849..20cbdcf 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -110,37 +110,39 @@ jobs: shell: bash run: | rm -rf .pytest_cache - rm -rf .testdata find . -type d -name "__pycache__" -prune -exec rm -rf {} + - - name: Snapshot regression test data before tests + - name: Prepare regression test data shell: bash run: | - if [ -d .testdata ]; then - find .testdata -type f -exec sha256sum {} \; | sort > testdata.before.sha256 - else - touch testdata.before.sha256 + python -m tests.regression.prepare_testdata --system "${{ matrix.system }}" + + if [ ! -d .testdata ]; then + echo ".testdata was not created" + exit 1 fi + - name: Snapshot regression test data before tests + shell: bash + run: | + find .testdata -type f -exec sha256sum {} \; | sort > testdata.before.sha256 + - name: Run fast regression tests (per system) run: | python -m pytest tests/regression \ -m "not slow" \ --cache-clear \ -n 1 \ - -k "${{ matrix.system }}" \ + --system "${{ matrix.system }}" \ -vv \ - --durations=20 + --durations=20 \ + --codeentropy-debug - name: Snapshot regression test data after tests if: always() shell: bash run: | - if [ -d .testdata ]; then - find .testdata -type f -exec sha256sum {} \; | sort > testdata.after.sha256 - else - touch testdata.after.sha256 - fi + find .testdata -type f -exec sha256sum {} \; | sort > testdata.after.sha256 - name: Check regression test data did not change if: always() diff --git a/tests/regression/conftest.py b/tests/regression/conftest.py index 2dfd281..45f5635 100644 --- a/tests/regression/conftest.py +++ b/tests/regression/conftest.py @@ -1,6 +1,5 @@ from __future__ import annotations -import os import random import numpy as np @@ -48,8 +47,6 @@ def pytest_configure(config: pytest.Config) -> None: random.seed(seed) np.random.seed(seed) - os.environ["PYTHONHASHSEED"] = "0" - def pytest_collection_modifyitems( config: pytest.Config, items: list[pytest.Item] diff --git a/tests/regression/prepare_testdata.py b/tests/regression/prepare_testdata.py new file mode 100644 index 0000000..6d10112 --- /dev/null +++ b/tests/regression/prepare_testdata.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +import argparse +from pathlib import Path + +import yaml + +from tests.regression.cases import discover_cases +from tests.regression.helpers import ( + _abspathify_config_paths, + ensure_testdata_for_system, +) + + +def required_paths_for_case(case) -> list[Path]: + raw = yaml.safe_load(case.config_path.read_text()) + cooked = _abspathify_config_paths(raw, base_dir=case.config_path.parent) + + required: list[Path] = [] + run1 = cooked.get("run1") + + if isinstance(run1, dict): + ff = run1.get("force_file") + if isinstance(ff, str) and ff: + required.append(Path(ff)) + + for p in run1.get("top_traj_file") or []: + if isinstance(p, str) and p: + required.append(Path(p)) + + return required + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--system", action="append", default=None) + args = parser.parse_args() + + selected = set(args.system or []) + cases = discover_cases() + + for case in cases: + if selected and case.system not in selected: + continue + + required = required_paths_for_case(case) + if required: + ensure_testdata_for_system(case.system, required_paths=required) + + print("Regression test data prepared.") + + +if __name__ == "__main__": + main() From f5bfa10681eccada33f7d381fbcbd470b76347b0 Mon Sep 17 00:00:00 2001 From: harryswift01 Date: Thu, 7 May 2026 13:56:41 +0100 Subject: [PATCH 4/6] ci: fix regression testdata preparation for pytest parameterized cases --- .github/workflows/pr.yaml | 7 ++++++- tests/regression/prepare_testdata.py | 16 +++++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 20cbdcf..66f339c 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -142,12 +142,17 @@ jobs: if: always() shell: bash run: | - find .testdata -type f -exec sha256sum {} \; | sort > testdata.after.sha256 + if [ -d .testdata ]; then + find .testdata -type f -exec sha256sum {} \; | sort > testdata.after.sha256 + else + touch testdata.after.sha256 + fi - name: Check regression test data did not change if: always() shell: bash run: | + touch testdata.before.sha256 testdata.after.sha256 diff -u testdata.before.sha256 testdata.after.sha256 - name: Upload artifacts (failure) diff --git a/tests/regression/prepare_testdata.py b/tests/regression/prepare_testdata.py index 6d10112..76df4c1 100644 --- a/tests/regression/prepare_testdata.py +++ b/tests/regression/prepare_testdata.py @@ -2,6 +2,7 @@ import argparse from pathlib import Path +from typing import Any import yaml @@ -12,7 +13,15 @@ ) -def required_paths_for_case(case) -> list[Path]: +def unwrap_case(case_or_param: Any) -> Any: + if hasattr(case_or_param, "values"): + values = case_or_param.values + if values: + return values[0] + return case_or_param + + +def required_paths_for_case(case: Any) -> list[Path]: raw = yaml.safe_load(case.config_path.read_text()) cooked = _abspathify_config_paths(raw, base_dir=case.config_path.parent) @@ -37,9 +46,10 @@ def main() -> None: args = parser.parse_args() selected = set(args.system or []) - cases = discover_cases() - for case in cases: + for case_or_param in discover_cases(): + case = unwrap_case(case_or_param) + if selected and case.system not in selected: continue From cd88bfcda8ed25afad5f7d9c00c36fa7d5e533cb Mon Sep 17 00:00:00 2001 From: harryswift01 Date: Thu, 7 May 2026 14:24:44 +0100 Subject: [PATCH 5/6] ci: stabilize regression test execution environment --- .github/workflows/pr.yaml | 44 --------------------------------------- 1 file changed, 44 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 66f339c..72fb8e6 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -91,16 +91,6 @@ jobs: python-version: "3.14" cache: pip - # Intentionally disabled while debugging CI-only regression flakes. - # The old key was static and could restore stale generated test data: - # codeentropy-testdata-${{ runner.os }}-py314 - # - # - name: Cache testdata - # uses: actions/cache@v4 - # with: - # path: .testdata - # key: codeentropy-testdata-${{ runner.os }}-py314 - - name: Install testing dependencies run: | python -m pip install --upgrade pip @@ -112,21 +102,6 @@ jobs: rm -rf .pytest_cache find . -type d -name "__pycache__" -prune -exec rm -rf {} + - - name: Prepare regression test data - shell: bash - run: | - python -m tests.regression.prepare_testdata --system "${{ matrix.system }}" - - if [ ! -d .testdata ]; then - echo ".testdata was not created" - exit 1 - fi - - - name: Snapshot regression test data before tests - shell: bash - run: | - find .testdata -type f -exec sha256sum {} \; | sort > testdata.before.sha256 - - name: Run fast regression tests (per system) run: | python -m pytest tests/regression \ @@ -138,23 +113,6 @@ jobs: --durations=20 \ --codeentropy-debug - - name: Snapshot regression test data after tests - if: always() - shell: bash - run: | - if [ -d .testdata ]; then - find .testdata -type f -exec sha256sum {} \; | sort > testdata.after.sha256 - else - touch testdata.after.sha256 - fi - - - name: Check regression test data did not change - if: always() - shell: bash - run: | - touch testdata.before.sha256 testdata.after.sha256 - diff -u testdata.before.sha256 testdata.after.sha256 - - name: Upload artifacts (failure) if: failure() uses: actions/upload-artifact@v4 @@ -162,8 +120,6 @@ jobs: name: quick-regression-failure-${{ matrix.system }} path: | .testdata/** - testdata.before.sha256 - testdata.after.sha256 /tmp/pytest-of-*/pytest-*/** docs: From d2cc920f8538ef35586cf5173a90bd740dace4c9 Mon Sep 17 00:00:00 2001 From: harryswift01 Date: Thu, 7 May 2026 15:54:01 +0100 Subject: [PATCH 6/6] ci: pin python version 3.13 within regression test --- .github/workflows/pr.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 72fb8e6..c3cdb43 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -88,7 +88,7 @@ jobs: - name: Set up Python 3.14 uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 with: - python-version: "3.14" + python-version: "3.13" cache: pip - name: Install testing dependencies