diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index b16fb83e..e8a753dc 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -20,7 +20,7 @@ jobs: if: always() strategy: matrix: - python-version: ["3.12", "3.13", "3.14", "3.x"] + python-version: ["3.12", "3.13", "3.14"] fail-fast: false steps: diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 66ee6182..55bfbef7 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -20,7 +20,7 @@ jobs: if: always() strategy: matrix: - python-version: ["3.12", "3.13", "3.14", "3.x"] + python-version: ["3.12", "3.13", "3.14"] fail-fast: false steps: diff --git a/xbout/lazyload.py b/xbout/lazyload.py index e4c53630..520ecfc8 100644 --- a/xbout/lazyload.py +++ b/xbout/lazyload.py @@ -226,7 +226,10 @@ def make_lazy_array( # Check x and y dimension sizes assert file_shape[xdim] == chunkinfo["nxsub"] - assert file_shape[ydim] == chunkinfo["nysub"] + assert ( + file_shape[ydim] == chunkinfo["nysub"] + ), """Maybe you are trying to read a squashed datafile, which is +not supported with lazyloading. Try loading with setting lazy_load=False""" # The name serves two purposes: # 1. Graph key prefix — it's the first element of every task key tuple @@ -329,9 +332,13 @@ def lazy_open_boutdataset( # Extract all scalars as metadata metadata = { - name: var.item() for name, var in ds.data_vars.items() if len(var.dims) == 0 + name: var.item() + for name, var in ds.data_vars.items() + if len(var.dims) == 0 and name != "dz" } + drop_vars = kwargs.get("drop_variables", []) + # Identify processor layout and the array slices from each file chunkinfo = make_chunkinfo( metadata, keep_xboundaries=keep_xboundaries, keep_yboundaries=keep_yboundaries @@ -340,6 +347,8 @@ def lazy_open_boutdataset( # Process all data variables data_vars = {} for name, var in ds.data_vars.items(): + if name in drop_vars: + continue if "x" in var.dims and "y" in var.dims: # Array distributed over processors in x and y data_vars[name] = xr.DataArray( @@ -350,6 +359,8 @@ def lazy_open_boutdataset( attrs=var.attrs, ) elif len(var.dims) == 0: + if name == "dz": + data_vars[name] = var continue # scalars already in metadata elif ("x" not in var.dims) and ("y" not in var.dims): # Take DataArray from first processor diff --git a/xbout/load.py b/xbout/load.py index 8cf714db..1a758317 100644 --- a/xbout/load.py +++ b/xbout/load.py @@ -91,7 +91,7 @@ def open_boutdataset( source term was changed between some of the restarts, but the source term is saved as time-independent, without a t-dimension). In this case one workaround is to pass a list of - variable names to the keyword argument ``drop_vars`` to ignore the + variable names to the keyword argument ``drop_variables`` to ignore the variables with conflicts, e.g. if ``"S1"`` and ``"S2"`` have conflicts:: @@ -291,7 +291,7 @@ def attrs_remove_section(obj, section): if "dump" in input_type or "restart" in input_type: def is_netcdf_collection(datapath): - if not isinstance(datapath, str): + if not _is_path(datapath): return None # Expand globs into a list of files p = Path(datapath) diff --git a/xbout/tests/test_grid.py b/xbout/tests/test_grid.py index 56d06cc9..aa8c42de 100644 --- a/xbout/tests/test_grid.py +++ b/xbout/tests/test_grid.py @@ -55,12 +55,14 @@ def test_open_grid_extra_dims(self, create_example_grid_file, tmp_path_factory): merge([example_grid, new_var]).to_netcdf(dodgy_grid_path, engine="h5netcdf") with pytest.warns( - UserWarning, match="drop all variables containing " "the dimensions 'w'" + UserWarning, match="drop all variables containing the dimensions 'w'" ): - with open_boutdataset(datapath=dodgy_grid_path) as result: - result = result.drop_vars(["x", "y"]) + with pytest.warns(UserWarning, match="o geometry type fou"): + with open_boutdataset(datapath=dodgy_grid_path) as result: + result = result.drop_vars(["x", "y"]) assert_equal(result, example_grid) example_grid.close() + result.close() def test_open_grid_apply_geometry(self, create_example_grid_file): @register_geometry(name="Schwarzschild") diff --git a/xbout/tests/test_load.py b/xbout/tests/test_load.py index 6b7ad323..75b1e33d 100644 --- a/xbout/tests/test_load.py +++ b/xbout/tests/test_load.py @@ -294,9 +294,9 @@ def test_single_file(self, tmp_path_factory, bout_xyt_example_files): with pytest.warns(UserWarning): actual = open_boutdataset(datapath=path, keep_xboundaries=False) expected = create_bout_ds() - expected = expected.set_coords(["t_array", "dx", "dy", "dz"]).rename( - t_array="t" - ) + expected = expected.set_coords(["t_array", "dx", "dy", "dz"]) + with pytest.warns(UserWarning): + expected = expected.rename(t_array="t") xrt.assert_equal( actual.drop_vars(["x", "y", "z"]).load(), expected.drop_vars( @@ -317,9 +317,9 @@ def test_squashed_file(self, tmp_path_factory, bout_xyt_example_files): with pytest.warns(UserWarning): actual = open_boutdataset(datapath=path, keep_xboundaries=False) expected = create_bout_ds(lengths=(6, 8, 12, 7)) - expected = expected.set_coords(["t_array", "dx", "dy", "dz"]).rename( - t_array="t" - ) + expected = expected.set_coords(["t_array", "dx", "dy", "dz"]) + with pytest.warns(UserWarning): + expected = expected.rename(t_array="t") xrt.assert_equal( actual.drop_vars(["x", "y", "z"]).load(), expected.drop_vars( @@ -400,6 +400,7 @@ def test_squashed_doublenull_file( datapath=path, keep_xboundaries=keep_xboundaries, keep_yboundaries=keep_yboundaries, + lazy_load=False, ) # bout_xyt_example_files when creating a 'squashed' file just makes it with @@ -428,9 +429,9 @@ def test_combine_along_x(self, tmp_path_factory, bout_xyt_example_files): dim="x", data_vars="minimal", ) - expected = expected.set_coords(["t_array", "dx", "dy", "dz"]).rename( - t_array="t" - ) + expected = expected.set_coords(["t_array", "dx", "dy", "dz"]) + with pytest.warns(UserWarning): + expected = expected.rename(t_array="t") xrt.assert_equal( actual.drop_vars(["x", "y", "z"]).load(), expected.drop_vars( @@ -462,9 +463,9 @@ def test_combine_along_y(self, tmp_path_factory, bout_xyt_example_files): expected = concat( [bout_ds(0), bout_ds(1), bout_ds(2)], dim="y", data_vars="minimal" ) - expected = expected.set_coords(["t_array", "dx", "dy", "dz"]).rename( - t_array="t" - ) + expected = expected.set_coords(["t_array", "dx", "dy", "dz"]) + with pytest.warns(UserWarning): + expected = expected.rename(t_array="t") xrt.assert_equal( actual.drop_vars(["x", "y", "z"]).load(), expected.drop_vars( @@ -525,9 +526,9 @@ def bout_ds(syn_data_type): data_vars="minimal", ) expected = concat([line1, line2, line3], dim="y", data_vars="minimal") - expected = expected.set_coords(["t_array", "dx", "dy", "dz"]).rename( - t_array="t" - ) + expected = expected.set_coords(["t_array", "dx", "dy", "dz"]) + with pytest.warns(UserWarning): + expected = expected.rename(t_array="t") vars_to_drop = METADATA_VARS + _BOUT_PER_PROC_VARIABLES xrt.assert_equal( actual.drop_vars(["x", "y", "z"]).load(), @@ -632,7 +633,10 @@ def test_restarts(self): datapath = Path(__file__).parent.joinpath( "data", "restart", "BOUT.restart.*.nc" ) - ds = open_boutdataset(datapath, keep_xboundaries=True, keep_yboundaries=True) + with pytest.warns(UserWarning): + ds = open_boutdataset( + datapath, keep_xboundaries=True, keep_yboundaries=True + ) assert "T" in ds @@ -860,7 +864,7 @@ def test_infer_boundaries_2d_parallelization_doublenull_by_filenum( @pytest.mark.parametrize("is_restart", [False, True]) def test_keep_xboundaries(self, is_restart): ds = create_test_data(0) - ds = ds.rename({"dim2": "x"}) + ds = ds.swap_dims({"dim2": "x"}) # Manually add filename - encoding normally added by xr.open_dataset ds.encoding["source"] = "folder0/BOUT.dmp.0.nc" @@ -882,7 +886,7 @@ def test_keep_xboundaries(self, is_restart): @pytest.mark.parametrize("is_restart", [False, True]) def test_keep_yboundaries(self, is_restart): ds = create_test_data(0) - ds = ds.rename({"dim2": "y"}) + ds = ds.swap_dims({"dim2": "y"}) # Manually add filename - encoding normally added by xr.open_dataset ds.encoding["source"] = "folder0/BOUT.dmp.0.nc" @@ -910,7 +914,7 @@ def test_keep_yboundaries_doublenull_by_filenum( self, filenum, lower, upper, is_restart ): ds = create_test_data(0) - ds = ds.rename({"dim2": "y"}) + ds = ds.swap_dims({"dim2": "y"}) # Manually add filename - encoding normally added by xr.open_dataset ds.encoding["source"] = "folder0/BOUT.dmp." + str(filenum) + ".nc"