From 90566cca618a895f435762d033c955304b18c532 Mon Sep 17 00:00:00 2001 From: Mike Kryjak Date: Tue, 17 Mar 2026 18:39:46 +0000 Subject: [PATCH 01/12] Add sudo apt-get update to actions Was erroring out on installing libcurl4-openssl-dev before. --- .github/workflows/master.yml | 2 ++ .github/workflows/pythonpackage.yml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 33019e4a..b16fb83e 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -32,6 +32,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip + sudo apt-get update sudo apt-get -y install libhdf5-dev - name: Install package run: | @@ -61,6 +62,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip + sudo apt-get update sudo apt-get -y install libhdf5-dev pip install xarray~=2023.12.0 pandas~=2.2.0 - name: Install package diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index f0f9392c..2dab6935 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -32,6 +32,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip + sudo apt-get update sudo apt-get -y install libhdf5-dev - name: Install package run: | @@ -61,6 +62,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip + sudo apt-get update sudo apt-get -y install libhdf5-dev pip install xarray~=2023.12.0 pandas~=2.2.0 - name: Install package From 0261f6f204258b7d52edef5d2c55350cff909088 Mon Sep 17 00:00:00 2001 From: Mike Kryjak Date: Mon, 13 Apr 2026 12:01:28 +0100 Subject: [PATCH 02/12] Safe dataset loading in test_boutdataset::TestSaveRestart::test_to_restart Fixes CI hang on Python >= 3.13 --- xbout/tests/test_boutdataset.py | 39 ++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/xbout/tests/test_boutdataset.py b/xbout/tests/test_boutdataset.py index 61c03d11..d8bfa503 100644 --- a/xbout/tests/test_boutdataset.py +++ b/xbout/tests/test_boutdataset.py @@ -2224,17 +2224,11 @@ def test_to_restart(self, tmp_path_factory, bout_xyt_example_files, tind): for proc_xind in range(nxpe): num = nxpe * proc_yind + proc_xind - restart_ds = open_dataset(savepath.joinpath(f"BOUT.restart.{num}.nc")) - if tind is None: t = -1 else: t = tind - # ignore guard cells - they are filled with NaN in the created restart - # files - restart_ds = restart_ds.isel(x=slice(2, -2), y=slice(2, -2)) - check_ds = ds.isel( t=t, x=slice(2 + proc_xind * mxsub, 2 + (proc_xind + 1) * mxsub), @@ -2249,6 +2243,13 @@ def test_to_restart(self, tmp_path_factory, bout_xyt_example_files, tind): # ignore variables that depend on the rank of the BOUT++ process - they # cannot be consistent with check_ds rank_dependent_vars = ["PE_XIND", "PE_YIND", "MYPE"] + with open_dataset(savepath.joinpath(f"BOUT.restart.{num}.nc")) as restart_file: + restart_ds = restart_file.load() + + # ignore guard cells - they are filled with NaN in the created restart + # files + restart_ds = restart_ds.isel(x=slice(2, -2), y=slice(2, -2)) + for v in restart_ds: if v in check_ds: xrt.assert_equal(restart_ds[v], check_ds[v]) @@ -2299,12 +2300,6 @@ def test_to_restart_change_npe(self, tmp_path_factory, bout_xyt_example_files): for proc_xind in range(nxpe): num = nxpe * proc_yind + proc_xind - restart_ds = open_dataset(savepath.joinpath(f"BOUT.restart.{num}.nc")) - - # ignore guard cells - they are filled with NaN in the created restart - # files - restart_ds = restart_ds.isel(x=slice(2, -2), y=slice(2, -2)) - check_ds = ds.isel( t=-1, x=slice(2 + proc_xind * mxsub, 2 + (proc_xind + 1) * mxsub), @@ -2319,6 +2314,13 @@ def test_to_restart_change_npe(self, tmp_path_factory, bout_xyt_example_files): # ignore variables that depend on the rank of the BOUT++ process - they # cannot be consistent with check_ds rank_dependent_vars = ["PE_XIND", "PE_YIND", "MYPE"] + with open_dataset(savepath.joinpath(f"BOUT.restart.{num}.nc")) as restart_file: + restart_ds = restart_file.load() + + # ignore guard cells - they are filled with NaN in the created restart + # files + restart_ds = restart_ds.isel(x=slice(2, -2), y=slice(2, -2)) + for v in restart_ds: if v in check_ds: xrt.assert_equal(restart_ds[v], check_ds[v]) @@ -2372,12 +2374,6 @@ def test_to_restart_change_npe_doublenull( for proc_xind in range(nxpe): num = nxpe * proc_yind + proc_xind - restart_ds = open_dataset(savepath.joinpath(f"BOUT.restart.{num}.nc")) - - # ignore guard cells - they are filled with NaN in the created restart - # files - restart_ds = restart_ds.isel(x=slice(2, -2), y=slice(2, -2)) - check_ds = ds.isel( t=-1, x=slice(2 + proc_xind * mxsub, 2 + (proc_xind + 1) * mxsub), @@ -2392,6 +2388,13 @@ def test_to_restart_change_npe_doublenull( # ignore variables that depend on the rank of the BOUT++ process - they # cannot be consistent with check_ds rank_dependent_vars = ["PE_XIND", "PE_YIND", "MYPE"] + with open_dataset(savepath.joinpath(f"BOUT.restart.{num}.nc")) as restart_file: + restart_ds = restart_file.load() + + # ignore guard cells - they are filled with NaN in the created restart + # files + restart_ds = restart_ds.isel(x=slice(2, -2), y=slice(2, -2)) + for v in restart_ds: if v in check_ds: xrt.assert_equal(restart_ds[v], check_ds[v]) From a9e8f05add68dcbe99847addddbc6b034576e44b Mon Sep 17 00:00:00 2001 From: mikekryjak Date: Mon, 13 Apr 2026 11:04:32 +0000 Subject: [PATCH 03/12] Apply black formatting --- xbout/tests/test_boutdataset.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/xbout/tests/test_boutdataset.py b/xbout/tests/test_boutdataset.py index d8bfa503..9810f941 100644 --- a/xbout/tests/test_boutdataset.py +++ b/xbout/tests/test_boutdataset.py @@ -2243,7 +2243,9 @@ def test_to_restart(self, tmp_path_factory, bout_xyt_example_files, tind): # ignore variables that depend on the rank of the BOUT++ process - they # cannot be consistent with check_ds rank_dependent_vars = ["PE_XIND", "PE_YIND", "MYPE"] - with open_dataset(savepath.joinpath(f"BOUT.restart.{num}.nc")) as restart_file: + with open_dataset( + savepath.joinpath(f"BOUT.restart.{num}.nc") + ) as restart_file: restart_ds = restart_file.load() # ignore guard cells - they are filled with NaN in the created restart @@ -2314,7 +2316,9 @@ def test_to_restart_change_npe(self, tmp_path_factory, bout_xyt_example_files): # ignore variables that depend on the rank of the BOUT++ process - they # cannot be consistent with check_ds rank_dependent_vars = ["PE_XIND", "PE_YIND", "MYPE"] - with open_dataset(savepath.joinpath(f"BOUT.restart.{num}.nc")) as restart_file: + with open_dataset( + savepath.joinpath(f"BOUT.restart.{num}.nc") + ) as restart_file: restart_ds = restart_file.load() # ignore guard cells - they are filled with NaN in the created restart @@ -2388,7 +2392,9 @@ def test_to_restart_change_npe_doublenull( # ignore variables that depend on the rank of the BOUT++ process - they # cannot be consistent with check_ds rank_dependent_vars = ["PE_XIND", "PE_YIND", "MYPE"] - with open_dataset(savepath.joinpath(f"BOUT.restart.{num}.nc")) as restart_file: + with open_dataset( + savepath.joinpath(f"BOUT.restart.{num}.nc") + ) as restart_file: restart_ds = restart_file.load() # ignore guard cells - they are filled with NaN in the created restart From 8107aa8c92f143b2bcbdacce843b80f20f67da84 Mon Sep 17 00:00:00 2001 From: David Bold Date: Mon, 13 Apr 2026 14:37:51 +0200 Subject: [PATCH 04/12] Close more files while testing --- xbout/tests/test_boutdataset.py | 37 ++++++++++++++++++++++++++++++++ xbout/tests/test_grid.py | 38 ++++++++++++++++----------------- 2 files changed, 56 insertions(+), 19 deletions(-) diff --git a/xbout/tests/test_boutdataset.py b/xbout/tests/test_boutdataset.py index 9810f941..7264caae 100644 --- a/xbout/tests/test_boutdataset.py +++ b/xbout/tests/test_boutdataset.py @@ -42,6 +42,8 @@ def test_concat(self, bout_xyt_example_files): ) result = concat([bd1, bd2], dim="run") assert result.sizes == {**bd1.sizes, "run": 2} + bd1.close() + bd2.close() def test_isel(self, bout_xyt_example_files): dataset_list = bout_xyt_example_files(None, nxpe=1, nype=1, nt=1) @@ -52,6 +54,7 @@ def test_isel(self, bout_xyt_example_files): actual = bd.isel(x=slice(None, None, 2)) expected = bd.bout.data.isel(x=slice(None, None, 2)) xrt.assert_equal(actual, expected) + bd.close() class TestBoutDatasetMethods: @@ -83,6 +86,7 @@ def test_get_field_aligned(self, bout_xyt_example_files): ds["n_aligned"] = ds["T"] ds["n_aligned"].attrs["direction_y"] = "Aligned" xrt.assert_allclose(ds.bout.get_field_aligned("n"), ds["T"]) + ds.close() @pytest.mark.parametrize("mxg", [0, pytest.param(2, marks=pytest.mark.long)]) @pytest.mark.parametrize("myg", [pytest.param(0, marks=pytest.mark.long), 2]) @@ -144,6 +148,8 @@ def test_remove_yboundaries(self, bout_xyt_example_files, mxg, myg): # don't need to delete MYG again xrt.assert_equal(ds, ds_no_yboundaries) + ds.close() + ds_no_yboundaries.close() @pytest.mark.parametrize( "nz", @@ -257,6 +263,7 @@ def test_to_field_aligned(self, bout_xyt_example_files, nz): rtol=1.0e-15, atol=0.0, ) # noqa: E501 + ds.close() def test_to_field_aligned_dask(self, bout_xyt_example_files): nz = 6 @@ -370,6 +377,7 @@ def test_to_field_aligned_dask(self, bout_xyt_example_files): rtol=1.0e-15, atol=0.0, ) # noqa: E501 + ds.close() @pytest.mark.parametrize( "nz", @@ -484,6 +492,8 @@ def test_from_field_aligned(self, bout_xyt_example_files, nz): atol=0.0, ) # noqa: E501 + ds.close() + @pytest.mark.parametrize("stag_location", ["CELL_XLOW", "CELL_YLOW", "CELL_ZLOW"]) def test_to_field_aligned_staggered(self, bout_xyt_example_files, stag_location): dataset_list = bout_xyt_example_files( @@ -542,6 +552,7 @@ def test_to_field_aligned_staggered(self, bout_xyt_example_files, stag_location) assert n_stag_al.direction_y == "Aligned" npt.assert_equal(n_stag_al.values, n_al.values) + ds.close() @pytest.mark.parametrize("stag_location", ["CELL_XLOW", "CELL_YLOW", "CELL_ZLOW"]) def test_from_field_aligned_staggered(self, bout_xyt_example_files, stag_location): @@ -615,6 +626,7 @@ def test_set_parallel_interpolation_factor(self): assert ds.metadata["fine_interpolation_factor"] == 42 assert ds["a"].metadata["fine_interpolation_factor"] == 42 + ds.close() @pytest.mark.parametrize(params_guards, params_guards_values) @pytest.mark.parametrize(params_boundaries, params_boundaries_values) @@ -1090,6 +1102,7 @@ def test_interpolate_parallel( ).values, v_lower_outer_SOL.isel(theta=slice(myg)).values, ) + ds.close() def test_interpolate_parallel_all_variables_arg(self, bout_xyt_example_files): # Check that passing 'variables=...' to interpolate_parallel() does actually @@ -1138,6 +1151,7 @@ def test_interpolate_parallel_all_variables_arg(self, bout_xyt_example_files): "psixy", ) ) + ds.close() def test_interpolate_parallel_limiter( self, @@ -1184,6 +1198,7 @@ def test_interpolate_parallel_limiter( # Remove attributes that are expected to be different del v_sol.attrs["regions"] xrt.assert_identical(v_noregions.isel(x=slice(ixs1 - mxg, None)), v_sol) + ds.close() def test_integrate_midpoints_slab(self, bout_xyt_example_files): # Create data @@ -1370,6 +1385,7 @@ def test_integrate_midpoints_slab(self, bout_xyt_example_files): (tintegral * xintegral * yintegral * zintegral), rtol=1.4e-4, ) + ds.close() @pytest.mark.parametrize( "location", ["CELL_CENTRE", "CELL_XLOW", "CELL_YLOW", "CELL_ZLOW"] @@ -1722,6 +1738,7 @@ def func(theta): rtol=1.0e-5, atol=0.0, ) + ds.close() def test_interpolate_from_unstructured(self, bout_xyt_example_files): dataset_list, grid_ds = bout_xyt_example_files( @@ -1771,6 +1788,7 @@ def test_interpolate_from_unstructured(self, bout_xyt_example_files): # Check there were non-nan values to compare in the previous assert_allclose assert int((~np.isnan(n_rect)).count()) == 851 + ds.close() def test_interpolate_from_unstructured_unstructured_output( self, bout_xyt_example_files @@ -1837,6 +1855,7 @@ def test_interpolate_from_unstructured_unstructured_output( n_check = R_unstruct + Z_unstruct n_unstruct = n_unstruct.isel(t=0, zeta=0) npt.assert_allclose(n_unstruct.values, n_check, atol=1.0e-7) + ds.close() def test_interpolate_to_cartesian(self, bout_xyt_example_files): dataset_list = bout_xyt_example_files( @@ -1880,6 +1899,7 @@ def test_interpolate_to_cartesian(self, bout_xyt_example_files): assert np.isnan(ds_cartesian["n"].isel(t=0, X=0, Y=0, Z=0).item()) # Check output is float32 assert ds_cartesian["n"].dtype == np.float32 + ds.close() def test_add_cartesian_coordinates(self, bout_xyt_example_files): dataset_list = bout_xyt_example_files(None, nxpe=1, nype=1, nt=1) @@ -1918,6 +1938,7 @@ def test_add_cartesian_coordinates(self, bout_xyt_example_files): ds["Z_cartesian"], Z[:, :, np.newaxis] * np.ones(ds.metadata["nz"])[np.newaxis, np.newaxis, :], ) + ds.close() class TestLoadInputFile: @@ -1938,6 +1959,7 @@ def test_load_options_in_dataset(self, bout_xyt_example_files): datapath=dataset_list, inputfilepath=EXAMPLE_OPTIONS_FILE_PATH ) assert isinstance(ds.options, BoutOptions) + ds.close() @pytest.mark.skip(reason="Not yet implemented") @@ -1966,6 +1988,7 @@ def test_save_all(self, tmp_path_factory, bout_xyt_example_files): # Compare equal (not identical because attributes are changed when saving) xrt.assert_equal(original, recovered) + original.close() @pytest.mark.parametrize("geometry", [None, "toroidal"]) def test_reload_all(self, tmp_path_factory, bout_xyt_example_files, geometry): @@ -2021,6 +2044,8 @@ def test_reload_all(self, tmp_path_factory, bout_xyt_example_files, geometry): ) # Datasets won't be exactly the same because different geometry was # applied + recovered.close() + original.close() @pytest.mark.parametrize("save_dtype", [np.float64, np.float32]) @pytest.mark.parametrize( @@ -2056,6 +2081,7 @@ def test_save_dtype( for v in original: assert recovered[v].values.dtype == np.dtype(save_dtype) + original.close() def test_save_separate_variables(self, tmp_path_factory, bout_xyt_example_files): path = bout_xyt_example_files( @@ -2083,6 +2109,8 @@ def test_save_separate_variables(self, tmp_path_factory, bout_xyt_example_files) savepath = savedir.joinpath("temp_boutdata_*.nc") recovered = open_boutdataset(savepath) xrt.assert_identical(original, recovered) + original.close() + recovered.close() @pytest.mark.parametrize("geometry", [None, "toroidal"]) def test_reload_separate_variables( @@ -2130,6 +2158,8 @@ def test_reload_separate_variables( # Compare xrt.assert_identical(recovered, original) + original.close() + recovered.close() @pytest.mark.parametrize("geometry", [None, "toroidal"]) def test_reload_separate_variables_time_split( @@ -2184,6 +2214,8 @@ def test_reload_separate_variables_time_split( # Compare xrt.assert_identical(recovered, original) + original.close() + recovered.close() class TestSaveRestart: @@ -2265,6 +2297,7 @@ def test_to_restart(self, tmp_path_factory, bout_xyt_example_files, tind): assert restart_ds[v].values == t_array elif v not in rank_dependent_vars: assert restart_ds[v].values == check_ds.metadata[v] + ds.close() def test_to_restart_change_npe(self, tmp_path_factory, bout_xyt_example_files): nxpe_in = 3 @@ -2337,6 +2370,7 @@ def test_to_restart_change_npe(self, tmp_path_factory, bout_xyt_example_files): assert restart_ds[v].values == t_array elif v not in rank_dependent_vars: assert restart_ds[v].values == check_ds.metadata[v] + ds.close() @pytest.mark.long def test_to_restart_change_npe_doublenull( @@ -2413,6 +2447,7 @@ def test_to_restart_change_npe_doublenull( assert restart_ds[v].values == t_array elif v not in rank_dependent_vars: assert restart_ds[v].values == check_ds.metadata[v] + ds.close() @pytest.mark.long @pytest.mark.parametrize("npes", [(2, 6), (3, 4)]) @@ -2445,6 +2480,7 @@ def test_to_restart_change_npe_doublenull_expect_fail( ) with pytest.raises(ValueError): ds.bout.to_restart(savepath=savepath, nxpe=nxpe, nype=nype) + ds.close() def test_from_restart_to_restart(self, tmp_path): datapath = Path(__file__).parent.joinpath( @@ -2453,3 +2489,4 @@ def test_from_restart_to_restart(self, tmp_path): ds = open_boutdataset(datapath, keep_xboundaries=True, keep_yboundaries=True) ds.bout.to_restart(savepath=tmp_path, nxpe=1, nype=4) + ds.close() diff --git a/xbout/tests/test_grid.py b/xbout/tests/test_grid.py index 87ff4a98..56d06cc9 100644 --- a/xbout/tests/test_grid.py +++ b/xbout/tests/test_grid.py @@ -36,10 +36,10 @@ class TestOpenGrid: def test_open_grid(self, create_example_grid_file): example_grid = create_example_grid_file with pytest.warns(UserWarning): - result = open_boutdataset(datapath=example_grid) - result = result.drop_vars(["x", "y"]) - assert_equal(result, open_dataset(example_grid)) - result.close() + with open_boutdataset(datapath=example_grid) as result: + result = result.drop_vars(["x", "y"]) + with open_dataset(example_grid) as tmp: + assert_equal(result, tmp) def test_open_grid_extra_dims(self, create_example_grid_file, tmp_path_factory): example_grid = open_dataset(create_example_grid_file) @@ -57,10 +57,10 @@ def test_open_grid_extra_dims(self, create_example_grid_file, tmp_path_factory): with pytest.warns( UserWarning, match="drop all variables containing " "the dimensions 'w'" ): - result = open_boutdataset(datapath=dodgy_grid_path) - result = result.drop_vars(["x", "y"]) + with open_boutdataset(datapath=dodgy_grid_path) as result: + result = result.drop_vars(["x", "y"]) assert_equal(result, example_grid) - result.close() + example_grid.close() def test_open_grid_apply_geometry(self, create_example_grid_file): @register_geometry(name="Schwarzschild") @@ -71,9 +71,7 @@ def add_schwarzschild_coords(ds, coordinates=None): example_grid = create_example_grid_file - result = result = open_boutdataset( - datapath=example_grid, geometry="Schwarzschild" - ) + result = open_boutdataset(datapath=example_grid, geometry="Schwarzschild") assert_equal(result["event_horizon"], DataArray(4.0)) # clean up @@ -83,17 +81,19 @@ def add_schwarzschild_coords(ds, coordinates=None): def test_open_grid_chunks(self, create_example_grid_file): example_grid = create_example_grid_file with pytest.warns(UserWarning): - result = open_boutdataset(datapath=example_grid, chunks={"x": 4, "y": 5}) - result = result.drop_vars(["x", "y"]) - assert_equal(result, open_dataset(example_grid)) - result.close() + with open_boutdataset( + datapath=example_grid, chunks={"x": 4, "y": 5} + ) as result: + result = result.drop_vars(["x", "y"]) + with open_dataset(example_grid) as tmp: + assert_equal(result, tmp) def test_open_grid_chunks_not_in_grid(self, create_example_grid_file): example_grid = create_example_grid_file with pytest.warns(UserWarning): - result = open_boutdataset( + with open_boutdataset( datapath=example_grid, chunks={"anonexistantdimension": 5} - ) - result = result.drop_vars(["x", "y"]) - assert_equal(result, open_dataset(example_grid)) - result.close() + ) as result: + result = result.drop_vars(["x", "y"]) + with open_dataset(example_grid) as tmp: + assert_equal(result, tmp) From 7e79284a9a3c5250cfeae801c4e91ef3a4ba4b21 Mon Sep 17 00:00:00 2001 From: David Bold Date: Tue, 10 Mar 2026 10:46:05 +0100 Subject: [PATCH 05/12] CI: get backtrace on timeout --- .github/workflows/pythonpackage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 2dab6935..66ee6182 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -40,7 +40,7 @@ jobs: pip install git+https://github.com/dschwoerer/xarray@netcdf4-locking-closing - name: Test with pytest run: | - pytest -vv --long --cov + timeout -s INT 90m pytest -vvv --long --cov --full-trace # test with oldest supported version of packages From 689613b00e48a80d951e2cae508e6ad3c4aa5b8e Mon Sep 17 00:00:00 2001 From: Mike Kryjak Date: Mon, 13 Apr 2026 14:35:10 +0100 Subject: [PATCH 06/12] Safe boutdataset loading in test_boutdataset::TestSave::test_reload_all Cause of the next hang locally. This is a different file now than last time, which means it may be that many tests need to be fixed. For now, I am pushing this to see if this is enough. --- xbout/tests/test_boutdataset.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/xbout/tests/test_boutdataset.py b/xbout/tests/test_boutdataset.py index 7264caae..5294a9e8 100644 --- a/xbout/tests/test_boutdataset.py +++ b/xbout/tests/test_boutdataset.py @@ -2002,19 +2002,21 @@ def test_reload_all(self, tmp_path_factory, bout_xyt_example_files, geometry): # Load it as a boutdataset if geometry is None: with pytest.warns(UserWarning): - original = open_boutdataset( + with open_boutdataset( datapath=path, inputfilepath=None, geometry=geometry, gridfilepath=None if geometry is None else gridpath, - ) + ) as original_file: + original = original_file.load() else: - original = open_boutdataset( + with open_boutdataset( datapath=path, inputfilepath=None, geometry=geometry, gridfilepath=None if geometry is None else gridpath, - ) + ) as original_file: + original = original_file.load() # Save it to a netCDF file savedir = tmp_path_factory.mktemp("test_reload_all") From e4104f4f3b5b325eb3be987496fb38f12b16cd01 Mon Sep 17 00:00:00 2001 From: Mike Kryjak Date: Tue, 14 Apr 2026 09:06:05 +0100 Subject: [PATCH 07/12] Make I/O safe all over test_boutdataset --- xbout/tests/test_boutdataset.py | 113 +++++++++++++++++--------------- 1 file changed, 59 insertions(+), 54 deletions(-) diff --git a/xbout/tests/test_boutdataset.py b/xbout/tests/test_boutdataset.py index 5294a9e8..c872a30b 100644 --- a/xbout/tests/test_boutdataset.py +++ b/xbout/tests/test_boutdataset.py @@ -1976,7 +1976,8 @@ def test_save_all(self, tmp_path_factory, bout_xyt_example_files): # Load it as a boutdataset with pytest.warns(UserWarning): - original = open_boutdataset(datapath=path, inputfilepath=None) + with open_boutdataset(datapath=path, inputfilepath=None) as original_file: + original = original_file.load().copy(deep=True) # Save it to a netCDF file savedir = tmp_path_factory.mktemp("test_save_all") @@ -1984,10 +1985,9 @@ def test_save_all(self, tmp_path_factory, bout_xyt_example_files): original.bout.save(savepath=savepath) # Load it again using bare xarray - recovered = open_dataset(savepath) - - # Compare equal (not identical because attributes are changed when saving) - xrt.assert_equal(original, recovered) + with open_dataset(savepath) as recovered: + # Compare equal (not identical because attributes are changed when saving) + xrt.assert_equal(original, recovered) original.close() @pytest.mark.parametrize("geometry", [None, "toroidal"]) @@ -2008,7 +2008,7 @@ def test_reload_all(self, tmp_path_factory, bout_xyt_example_files, geometry): geometry=geometry, gridfilepath=None if geometry is None else gridpath, ) as original_file: - original = original_file.load() + original = original_file.load().copy(deep=True) else: with open_boutdataset( datapath=path, @@ -2016,7 +2016,7 @@ def test_reload_all(self, tmp_path_factory, bout_xyt_example_files, geometry): geometry=geometry, gridfilepath=None if geometry is None else gridpath, ) as original_file: - original = original_file.load() + original = original_file.load().copy(deep=True) # Save it to a netCDF file savedir = tmp_path_factory.mktemp("test_reload_all") @@ -2024,29 +2024,28 @@ def test_reload_all(self, tmp_path_factory, bout_xyt_example_files, geometry): original.bout.save(savepath=savepath) # Load it again - recovered = open_boutdataset(savepath) - - xrt.assert_identical(original.load(), recovered.load()) + with open_boutdataset(savepath) as recovered: + xrt.assert_identical(original.load(), recovered.load()) # Check if we can load with a different geometry argument for reload_geometry in [None, "toroidal"]: if reload_geometry is None or geometry == reload_geometry: - recovered = open_boutdataset( + with open_boutdataset( savepath, geometry=reload_geometry, gridfilepath=None if reload_geometry is None else gridpath, - ) - xrt.assert_identical(original.load(), recovered.load()) + ) as recovered: + xrt.assert_identical(original.load(), recovered.load()) else: # Expect a warning because we change the geometry print("here", gridpath) with pytest.warns(UserWarning): - recovered = open_boutdataset( + with open_boutdataset( savepath, geometry=reload_geometry, gridfilepath=gridpath - ) + ): + pass # Datasets won't be exactly the same because different geometry was # applied - recovered.close() original.close() @pytest.mark.parametrize("save_dtype", [np.float64, np.float32]) @@ -2063,7 +2062,8 @@ def test_save_dtype( # Load it as a boutdataset with pytest.warns(UserWarning): - original = open_boutdataset(datapath=path, inputfilepath=None) + with open_boutdataset(datapath=path, inputfilepath=None) as original_file: + original = original_file.load().copy(deep=True) # Save it to a netCDF file savedir = tmp_path_factory.mktemp("test_save_dtype") @@ -2076,13 +2076,12 @@ def test_save_dtype( if separate_vars: for v in ["n", "T"]: savepath = savedir.joinpath(f"temp_boutdata_{v}.nc") - recovered = open_dataset(savepath) - assert recovered[v].values.dtype == np.dtype(save_dtype) + with open_dataset(savepath) as recovered: + assert recovered[v].values.dtype == np.dtype(save_dtype) else: - recovered = open_dataset(savepath) - - for v in original: - assert recovered[v].values.dtype == np.dtype(save_dtype) + with open_dataset(savepath) as recovered: + for v in original: + assert recovered[v].values.dtype == np.dtype(save_dtype) original.close() def test_save_separate_variables(self, tmp_path_factory, bout_xyt_example_files): @@ -2092,7 +2091,8 @@ def test_save_separate_variables(self, tmp_path_factory, bout_xyt_example_files) # Load it as a boutdataset with pytest.warns(UserWarning): - original = open_boutdataset(datapath=path, inputfilepath=None) + with open_boutdataset(datapath=path, inputfilepath=None) as original_file: + original = original_file.load().copy(deep=True) # Save it to a netCDF file savedir = tmp_path_factory.mktemp("test_save_separate_variables") @@ -2102,17 +2102,15 @@ def test_save_separate_variables(self, tmp_path_factory, bout_xyt_example_files) for var in ["n", "T"]: # Load it again using bare xarray savepath = savedir.joinpath(f"temp_boutdata_{var}.nc") - recovered = open_dataset(savepath) - - # Compare equal (not identical because attributes are changed when saving) - xrt.assert_equal(recovered[var], original[var]) + with open_dataset(savepath) as recovered: + # Compare equal (not identical because attributes are changed when saving) + xrt.assert_equal(recovered[var], original[var]) # test open_boutdataset() on dataset saved with separate_vars=True savepath = savedir.joinpath("temp_boutdata_*.nc") - recovered = open_boutdataset(savepath) - xrt.assert_identical(original, recovered) + with open_boutdataset(savepath) as recovered: + xrt.assert_identical(original, recovered) original.close() - recovered.close() @pytest.mark.parametrize("geometry", [None, "toroidal"]) def test_reload_separate_variables( @@ -2135,19 +2133,21 @@ def test_reload_separate_variables( # Load it as a boutdataset if geometry is None: with pytest.warns(UserWarning): - original = open_boutdataset( + with open_boutdataset( datapath=path, inputfilepath=None, geometry=geometry, gridfilepath=gridpath, - ) + ) as original_file: + original = original_file.load().copy(deep=True) else: - original = open_boutdataset( + with open_boutdataset( datapath=path, inputfilepath=None, geometry=geometry, gridfilepath=gridpath, - ) + ) as original_file: + original = original_file.load().copy(deep=True) # Save it to a netCDF file savedir = tmp_path_factory.mktemp("test_reload_separate_variables") @@ -2156,12 +2156,10 @@ def test_reload_separate_variables( # Load it again savepath = savedir.joinpath("temp_boutdata_*.nc") - recovered = open_boutdataset(savepath) - - # Compare - xrt.assert_identical(recovered, original) + with open_boutdataset(savepath) as recovered: + # Compare + xrt.assert_identical(recovered, original) original.close() - recovered.close() @pytest.mark.parametrize("geometry", [None, "toroidal"]) def test_reload_separate_variables_time_split( @@ -2184,19 +2182,21 @@ def test_reload_separate_variables_time_split( # Load it as a boutdataset if geometry is None: with pytest.warns(UserWarning): - original = open_boutdataset( + with open_boutdataset( datapath=path, inputfilepath=None, geometry=geometry, gridfilepath=gridpath, - ) + ) as original_file: + original = original_file.load().copy(deep=True) else: - original = open_boutdataset( + with open_boutdataset( datapath=path, inputfilepath=None, geometry=geometry, gridfilepath=gridpath, - ) + ) as original_file: + original = original_file.load().copy(deep=True) # Save it to a netCDF file tcoord = original.metadata.get("bout_tdim", "t") @@ -2212,12 +2212,10 @@ def test_reload_separate_variables_time_split( # Load it again savepath = savedir.joinpath("temp_boutdata_*.nc") - recovered = open_boutdataset(savepath) - - # Compare - xrt.assert_identical(recovered, original) + with open_boutdataset(savepath) as recovered: + # Compare + xrt.assert_identical(recovered, original) original.close() - recovered.close() class TestSaveRestart: @@ -2240,7 +2238,8 @@ def test_to_restart(self, tmp_path_factory, bout_xyt_example_files, tind): # Load it as a boutdataset with pytest.warns(UserWarning): - ds = open_boutdataset(datapath=path) + with open_boutdataset(datapath=path) as ds_file: + ds = ds_file.load().copy(deep=True) nx = ds.metadata["nx"] ny = ds.metadata["ny"] @@ -2322,7 +2321,8 @@ def test_to_restart_change_npe(self, tmp_path_factory, bout_xyt_example_files): # Load it as a boutdataset with pytest.warns(UserWarning): - ds = open_boutdataset(datapath=path) + with open_boutdataset(datapath=path) as ds_file: + ds = ds_file.load().copy(deep=True) nx = ds.metadata["nx"] ny = ds.metadata["ny"] @@ -2399,7 +2399,8 @@ def test_to_restart_change_npe_doublenull( # Load it as a boutdataset with pytest.warns(UserWarning): - ds = open_boutdataset(datapath=path) + with open_boutdataset(datapath=path) as ds_file: + ds = ds_file.load().copy(deep=True) nx = ds.metadata["nx"] ny = ds.metadata["ny"] @@ -2474,7 +2475,8 @@ def test_to_restart_change_npe_doublenull_expect_fail( # Load it as a boutdataset with pytest.warns(UserWarning): - ds = open_boutdataset(datapath=path) + with open_boutdataset(datapath=path) as ds_file: + ds = ds_file.load().copy(deep=True) # Save it to a netCDF file savepath = tmp_path_factory.mktemp( @@ -2488,7 +2490,10 @@ def test_from_restart_to_restart(self, tmp_path): datapath = Path(__file__).parent.joinpath( "data", "restart", "BOUT.restart.*.nc" ) - ds = open_boutdataset(datapath, keep_xboundaries=True, keep_yboundaries=True) + with open_boutdataset( + datapath, keep_xboundaries=True, keep_yboundaries=True + ) as ds_file: + ds = ds_file.load().copy(deep=True) ds.bout.to_restart(savepath=tmp_path, nxpe=1, nype=4) ds.close() From aad6212f7e23cd8b5eeb2f56b1270a2cc03c834e Mon Sep 17 00:00:00 2001 From: Mike Kryjak Date: Tue, 14 Apr 2026 09:06:30 +0100 Subject: [PATCH 08/12] Make boutdataset.save and boutdataset.to_restart use h5netcdf --- xbout/boutdataset.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/xbout/boutdataset.py b/xbout/boutdataset.py index 3a662fb2..474c9edd 100644 --- a/xbout/boutdataset.py +++ b/xbout/boutdataset.py @@ -29,6 +29,7 @@ from .region import _from_region from .utils import ( _add_cartesian_coordinates, + _check_filetype, _get_bounding_surfaces, _split_into_restarts, ) @@ -775,7 +776,7 @@ def get_bounding_surfaces(self, coords=("R", "Z")): def save( self, savepath="./boutdata.nc", - filetype="NETCDF4", + filetype="h5netcdf", variables=None, save_dtype=None, separate_vars=False, @@ -992,7 +993,12 @@ def to_restart( ) with ProgressBar(): - xr.save_mfdataset(restart_datasets, paths, compute=True) + xr.save_mfdataset( + restart_datasets, + paths, + compute=True, + engine=_check_filetype(paths[0]), + ) def animate_list( self, From 7e91494ceab1e3c84e12967c9d126bbdfd9fe094 Mon Sep 17 00:00:00 2001 From: Mike Kryjak Date: Tue, 14 Apr 2026 11:15:31 +0100 Subject: [PATCH 09/12] Fix boutdataset.save engine choice I got confused between engine and filetype - they are two separate things. filetype should be NETCDF4 and this uses the latest standard, but the engine for writing the file is a separate thing - now hooked up to the same tool as the other saves and defaulting to h5netcdf. --- xbout/boutdataset.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/xbout/boutdataset.py b/xbout/boutdataset.py index 474c9edd..b26297d3 100644 --- a/xbout/boutdataset.py +++ b/xbout/boutdataset.py @@ -776,7 +776,7 @@ def get_bounding_surfaces(self, coords=("R", "Z")): def save( self, savepath="./boutdata.nc", - filetype="h5netcdf", + filetype="NETCDF4", variables=None, save_dtype=None, separate_vars=False, @@ -789,6 +789,9 @@ def save( ---------- savepath : str, optional filetype : str, optional + netCDF format passed to xarray. NOT THE SAME as "engine", which + defaults to h5netcdf via ``_check_filetype()``. + See https://docs.xarray.dev/en/latest/generated/xarray.Dataset.to_netcdf.html variables : list of str, optional Variables from the dataset to save. Default is to save all of them. separate_vars: bool, optional @@ -911,6 +914,7 @@ def dict_to_attrs(obj, section): single_var_ds.to_netcdf( path=str(var_savepath), format=filetype, + engine=_check_filetype(Path(var_savepath)), compute=True, encoding=var_encoding, ) @@ -924,7 +928,11 @@ def dict_to_attrs(obj, section): print("Saving data...") with ProgressBar(): to_save.to_netcdf( - path=savepath, format=filetype, compute=True, encoding=encoding + path=savepath, + engine=_check_filetype(Path(savepath)), + format=filetype, + compute=True, + encoding=encoding, ) return From 6a85150b8961bd6fa6b7cffb33114f330582f932 Mon Sep 17 00:00:00 2001 From: Mike Kryjak Date: Tue, 14 Apr 2026 11:17:37 +0100 Subject: [PATCH 10/12] Formatting --- xbout/boutdataset.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/xbout/boutdataset.py b/xbout/boutdataset.py index b26297d3..c316884c 100644 --- a/xbout/boutdataset.py +++ b/xbout/boutdataset.py @@ -720,8 +720,7 @@ def remove_yboundaries(self, **kwargs): new_metadata = variables[-1].metadata elif ycoord in self.data[v].dims: raise ValueError( - f"{v} only has a {ycoord}-dimension so cannot split " - f"into regions." + f"{v} only has a {ycoord}-dimension so cannot split into regions." ) else: variable = self.data[v] @@ -790,7 +789,7 @@ def save( savepath : str, optional filetype : str, optional netCDF format passed to xarray. NOT THE SAME as "engine", which - defaults to h5netcdf via ``_check_filetype()``. + defaults to h5netcdf via ``_check_filetype()``. See https://docs.xarray.dev/en/latest/generated/xarray.Dataset.to_netcdf.html variables : list of str, optional Variables from the dataset to save. Default is to save all of them. From b083b08525651d74219830fcb5a5faafa2207075 Mon Sep 17 00:00:00 2001 From: Mike Kryjak Date: Tue, 14 Apr 2026 12:55:39 +0100 Subject: [PATCH 11/12] Make open_boutdataset default to h5netcdf --- xbout/load.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/xbout/load.py b/xbout/load.py index 4f565f8b..c3ddedd1 100644 --- a/xbout/load.py +++ b/xbout/load.py @@ -197,12 +197,15 @@ def open_boutdataset( # xr.open_mfdataset only accepts glob patterns as # strings, not Path objects datapath = str(datapath) + _, filetype = _expand_filepaths(datapath) + reload_kwargs = dict(kwargs) + reload_kwargs.setdefault("engine", file_engine or filetype) ds = xr.open_mfdataset( datapath, chunks=chunks, combine="by_coords", data_vars="minimal", - **kwargs, + **reload_kwargs, ) elif input_type == "reload_fake": ds = xr.combine_by_coords(datapath, data_vars="minimal").chunk(chunks) From 194dee8e12c238667be5176cb448ef7fb2a0eb74 Mon Sep 17 00:00:00 2001 From: Mike Kryjak Date: Tue, 14 Apr 2026 13:22:08 +0100 Subject: [PATCH 12/12] Use h5netcdf for reads in test_boutdataset.py --- xbout/tests/test_boutdataset.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/xbout/tests/test_boutdataset.py b/xbout/tests/test_boutdataset.py index c872a30b..bb94b8b3 100644 --- a/xbout/tests/test_boutdataset.py +++ b/xbout/tests/test_boutdataset.py @@ -17,7 +17,7 @@ ) from xbout import open_boutdataset from xbout.geometries import apply_geometry -from xbout.utils import _set_attrs_on_all_vars, _1d_coord_from_spacing +from xbout.utils import _check_filetype, _set_attrs_on_all_vars, _1d_coord_from_spacing from xbout.tests.utils_for_tests import set_geometry_from_input_file EXAMPLE_OPTIONS_FILE_PATH = "./xbout/tests/data/options/BOUT.inp" @@ -2276,8 +2276,10 @@ def test_to_restart(self, tmp_path_factory, bout_xyt_example_files, tind): # ignore variables that depend on the rank of the BOUT++ process - they # cannot be consistent with check_ds rank_dependent_vars = ["PE_XIND", "PE_YIND", "MYPE"] + restart_path = savepath.joinpath(f"BOUT.restart.{num}.nc") with open_dataset( - savepath.joinpath(f"BOUT.restart.{num}.nc") + restart_path, + engine=_check_filetype(restart_path), ) as restart_file: restart_ds = restart_file.load() @@ -2351,8 +2353,10 @@ def test_to_restart_change_npe(self, tmp_path_factory, bout_xyt_example_files): # ignore variables that depend on the rank of the BOUT++ process - they # cannot be consistent with check_ds rank_dependent_vars = ["PE_XIND", "PE_YIND", "MYPE"] + restart_path = savepath.joinpath(f"BOUT.restart.{num}.nc") with open_dataset( - savepath.joinpath(f"BOUT.restart.{num}.nc") + restart_path, + engine=_check_filetype(restart_path), ) as restart_file: restart_ds = restart_file.load() @@ -2429,8 +2433,10 @@ def test_to_restart_change_npe_doublenull( # ignore variables that depend on the rank of the BOUT++ process - they # cannot be consistent with check_ds rank_dependent_vars = ["PE_XIND", "PE_YIND", "MYPE"] + restart_path = savepath.joinpath(f"BOUT.restart.{num}.nc") with open_dataset( - savepath.joinpath(f"BOUT.restart.{num}.nc") + restart_path, + engine=_check_filetype(restart_path), ) as restart_file: restart_ds = restart_file.load()