diff --git a/README.md b/README.md index 289e774..3bbfa6a 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,8 @@ data.spectra # < xr.Dataset > If using Jupyter notebook, you can quickly preview the loaded metadata by simply running a cell with just `data` in it (or in regular python, by doing `print(data)`). +> Note, that by default, the `hdf5` support is disabled in `nt2py` (i.e., only `ADIOS2` format is supported). To enable it, install the package as `pip install "nt2py[hdf5]"` instead of simply `pip install nt2py`. + #### Examples Plot a field (in cartesian space) at a specific time (or output step): diff --git a/dist/nt2py-1.3.0-py3-none-any.whl b/dist/nt2py-1.3.0-py3-none-any.whl new file mode 100644 index 0000000..62f8ebd Binary files /dev/null and b/dist/nt2py-1.3.0-py3-none-any.whl differ diff --git a/dist/nt2py-1.3.0.tar.gz b/dist/nt2py-1.3.0.tar.gz new file mode 100644 index 0000000..9470de8 Binary files /dev/null and b/dist/nt2py-1.3.0.tar.gz differ diff --git a/nt2/__init__.py b/nt2/__init__.py index fe39f4a..8679bb6 100644 --- a/nt2/__init__.py +++ b/nt2/__init__.py @@ -1,4 +1,4 @@ -__version__ = "1.2.1" +__version__ = "1.3.0" import nt2.containers.data as nt2_data diff --git a/nt2/containers/data.py b/nt2/containers/data.py index 78b0a7a..a4f6e00 100644 --- a/nt2/containers/data.py +++ b/nt2/containers/data.py @@ -26,6 +26,7 @@ def override(method): from nt2.readers.adios2 import Reader as BP5Reader from nt2.containers.fields import Fields from nt2.containers.particles import Particles +from nt2.containers.spectra import Spectra import nt2.plotters.polar as acc_polar import nt2.plotters.particles as acc_particles @@ -64,7 +65,7 @@ class MoviePlotAccessor(acc_movie.accessor): pass -class Data(Fields, Particles): +class Data(Fields, Particles, Spectra): """Main class to manage all the data containers. Inherits from all category-specific containers. @@ -283,7 +284,7 @@ def compactify(lst: list[Any] | KeysView[Any]) -> str: cntr = 0 for l_ in lst: if cntr > 5: - c += "\n " + c += "\n| " cntr = 0 c += f"{l_}, " cntr += 1 @@ -291,34 +292,67 @@ def compactify(lst: list[Any] | KeysView[Any]) -> str: string = "" if self.fields_defined: - string += "Fields:\n" - string += f" - coordinates: {self.coordinate_system.value}\n" - string += f" - data axes: {compactify(self.fields.indexes.keys())}\n" + string += "FieldsDataset:\n" + string += "==============\n" + string += f"| Coordinates:\n| {self.coordinate_system.value}\n|\n" + string += f"| Data axes:\n| {compactify(self.fields.indexes.keys())}\n|\n" delta_t = ( self.fields.coords["t"].values[1] - self.fields.coords["t"].values[0] ) / (self.fields.coords["s"].values[1] - self.fields.coords["s"].values[0]) - string += f" - dt: {delta_t:.2e}\n" + string += f"| - dt: {delta_t:.2e}\n" for key in self.fields.coords.keys(): crd = self.fields.coords[key].values fmt = "" if key != "s": fmt = ".2f" - string += f" - {key}: {crd.min():{fmt}} -> {crd.max():{fmt}} [{len(crd)}]\n" - string += ( - f" - quantities: {compactify(sorted(self.fields.data_vars.keys()))}\n" - ) - string += f" - total size: {ToHumanReadable(self.fields.nbytes)}\n\n" + string += f"| - {key}: {crd.min():{fmt}} -> {crd.max():{fmt}} [{len(crd)}]\n" + string += "|\n" + string += f"| Quantities:\n| {compactify(sorted(self.fields.data_vars.keys()))}\n|\n" + string += f"| Total size: {ToHumanReadable(self.fields.nbytes)}\n\n" else: - string += "Fields: empty\n\n" + string += "FieldsDataset:\n" + string += "==============\n" + string += " empty\n\n" if self.particles_defined and self.particles is not None: species = sorted(self.particles.species) - string += "Particle species:\n" - string += f" - species: {compactify(species)}\n" - string += f" - timesteps: {len(self.particles.times)}\n" - string += f" - quantities: {compactify(self.particles.columns)}\n" - string += f" - total size: {ToHumanReadable(self.particles.nbytes)}\n\n" + string += "ParticleDataset:\n" + string += "================\n" + string += f"| Species:\n| {compactify(species)}\n|\n" + string += f"| Timesteps:\n| {len(self.particles.times)}\n|\n" + string += f"| Quantities:\n| {compactify(self.particles.columns)}\n|\n" + string += f"| Total size: {ToHumanReadable(self.particles.nbytes)}\n|\n" + string += self.help_particles("| ") + string += "\n" + else: + string += "ParticleDataset:\n" + string += "================\n" + string += " empty\n\n" + if self.spectra_defined and self.spectra is not None: + string += "SpectraDataset:\n" + string += "===============\n" + string += ( + f"| Data axes:\n| {compactify(self.spectra.indexes.keys())}\n|\n" + ) + delta_t = ( + self.spectra.coords["t"].values[1] - self.spectra.coords["t"].values[0] + ) / ( + self.spectra.coords["s"].values[1] - self.spectra.coords["s"].values[0] + ) + string += f"| - dt: {delta_t:.2e}\n" + for key in self.spectra.coords.keys(): + crd = self.spectra.coords[key].values + fmt = "" + if key != "s": + fmt = ".2f" + string += f"| - {key}: {crd.min():{fmt}} -> {crd.max():{fmt}} [{len(crd)}]\n" + string += "|\n" + string += f"| Quantities:\n| {compactify(sorted(self.spectra.data_vars.keys()))}\n|\n" + string += f"| Total size: {ToHumanReadable(self.spectra.nbytes)}\n|\n" + string += self.help_spectra("| ") else: - string += "Particles: empty\n\n" + string += "SpectraDataset:\n" + string += "===============\n" + string += " empty\n\n" return string diff --git a/nt2/containers/particles.py b/nt2/containers/particles.py index 5a01dfb..e7d4f0e 100644 --- a/nt2/containers/particles.py +++ b/nt2/containers/particles.py @@ -384,26 +384,26 @@ def _attach_columns(part: pd.DataFrame) -> pd.DataFrame: .drop(columns=["row"]) ) - def help(self) -> str: - ret = "- use .sel(...) to select particles based on criteria:\n" - ret += " t : time (float)\n" - ret += " st : step (int)\n" - ret += " sp : species (int)\n" - ret += " id : particle id (int)\n\n" - ret += " # example:\n" - ret += " # .sel(t=slice(10.0, 20.0), sp=[1, 2, 3], id=[42, 22])\n\n" - ret += "- use .isel(...) to select particles based on output step:\n" - ret += " t : timestamp index (int)\n" - ret += " st : step index (int)\n\n" - ret += " # example:\n" - ret += " # .isel(t=-1)\n" - ret += "\n" - ret += "- .sel and .isel can be chained together:\n\n" - ret += " # example:\n" - ret += " # .isel(t=-1).sel(sp=1).sel(id=[55, 66])\n\n" - ret += "- use .load(cols=[...]) to load data into a pandas DataFrame (`cols` defaults to all columns)\n\n" - ret += " # example:\n" - ret += " # .sel(...).load()\n" + def help(self, prepend="") -> str: + ret = f"{prepend}- use .sel(...) to select particles based on criteria:\n" + ret += f"{prepend} t : time (float)\n" + ret += f"{prepend} st : step (int)\n" + ret += f"{prepend} sp : species (int)\n" + ret += f"{prepend} id : particle id (int)\n{prepend}\n" + ret += f"{prepend} # example:\n" + ret += f"{prepend} # .sel(t=slice(10.0, 20.0), sp=[1, 2, 3], id=[42, 22])\n{prepend}\n" + ret += f"{prepend}- use .isel(...) to select particles based on output step:\n" + ret += f"{prepend} t : timestamp index (int)\n" + ret += f"{prepend} st : step index (int)\n{prepend}\n" + ret += f"{prepend} # example:\n" + ret += f"{prepend} # .isel(t=-1)\n" + ret += f"{prepend}\n" + ret += f"{prepend}- .sel and .isel can be chained together:\n{prepend}\n" + ret += f"{prepend} # example:\n" + ret += f"{prepend} # .isel(t=-1).sel(sp=1).sel(id=[55, 66])\n{prepend}\n" + ret += f"{prepend}- use .load(cols=[...]) to load data into a pandas DataFrame (`cols` defaults to all columns)\n{prepend}\n" + ret += f"{prepend} # example:\n" + ret += f"{prepend} # .sel(...).load()\n" return ret def __repr__(self) -> str: @@ -726,3 +726,6 @@ def get_quantity_for_species(sp: int) -> np.ndarray: read_colnames=ReadColnames, read_column=ReadColumn, ) + + def help_particles(self, prepend="") -> str: + return self.particles.help(prepend) if self.particles is not None else "" diff --git a/nt2/containers/spectra.py b/nt2/containers/spectra.py new file mode 100644 index 0000000..7288b42 --- /dev/null +++ b/nt2/containers/spectra.py @@ -0,0 +1,137 @@ +from typing import Any + +import dask +import dask.array as da +import xarray as xr +import numpy as np + +from nt2.containers.container import BaseContainer +from nt2.readers.base import BaseReader + + +class Spectra(BaseContainer): + """Parent class to manager the spectra dataframe.""" + + @staticmethod + @dask.delayed + def __read_spectrum(path: str, reader: BaseReader, spectrum: str, step: int) -> Any: + """Reads a spectrum from the data. + + This is a dask-delayed function used further to build the dataset. + + Parameters + ---------- + path : str + Main path to the data. + reader : BaseReader + Reader to use to read the data. + spectrum : str + Spectrum array to read. + step : int + Step to read. + + Returns + ------- + Any + Spectrum data. + + """ + return reader.ReadArrayAtTimestep(path, "spectra", spectrum, step) + + def __init__(self, **kwargs: Any) -> None: + super(Spectra, self).__init__(**kwargs) + if self.reader.DefinesCategory(self.path, "spectra"): + self.__spectra_defined = True + self.__spectra = self.__read_spectra() + else: + self.__spectra_defined = False + self.__spectra = xr.Dataset() + + @property + def spectra_defined(self) -> bool: + """bool: Whether the spectra category is defined.""" + return self.__spectra_defined + + @property + def spectra(self) -> xr.Dataset: + """xr.Dataset: The spectra dataframe.""" + return self.__spectra + + def __read_spectra(self) -> xr.Dataset: + self.reader.VerifySameCategoryNames(self.path, "spectra", "s") + valid_steps = sorted(self.reader.GetValidSteps(self.path, "spectra")) + spectra_names = self.reader.ReadCategoryNamesAtTimestep( + self.path, "spectra", "s", valid_steps[0] + ) + spectra_names = set(s for s in sorted(spectra_names) if s.startswith("sN")) + ebin_name = "sEbn" + first_step = valid_steps[0] + first_spectrum_name = next(iter(spectra_names)) + shape = self.reader.ReadArrayShapeExplicitlyAtTimestep( + self.path, "spectra", first_spectrum_name, first_step + ) + times = self.reader.ReadPerTimestepVariable(self.path, "spectra", "Time", "t") + steps = self.reader.ReadPerTimestepVariable(self.path, "spectra", "Step", "s") + + ebins = self.reader.ReadArrayAtTimestep( + self.path, "spectra", ebin_name, first_step + ) + + diffs = np.diff(ebins) + if np.isclose(diffs[1] - diffs[0], diffs[-1] - diffs[-2], atol=1e-2): + ebins = 0.5 * (ebins[1:] + ebins[:-1]) + else: + ebins = (ebins[1:] * ebins[:-1]) ** 0.5 + + all_dims = {**times, "E": ebins} + all_coords = {**all_dims, "s": ("t", steps["s"])} + + def remap_name(name: str) -> str: + return name[1:] + + return xr.Dataset( + { + remap_name(spectrum): xr.DataArray( + da.stack( + [ + da.from_delayed( + self.__read_spectrum( + path=self.path, + reader=self.reader, + spectrum=spectrum, + step=step, + ), + shape=shape, + dtype="float", + ) + for step in valid_steps + ], + ), + name=remap_name(spectrum), + dims=all_dims, + coords=all_coords, + ) + for spectrum in spectra_names + }, + attrs=self.reader.ReadAttrsAtTimestep( + path=self.path, category="spectra", step=first_step + ), + ) + + def help_spectra(self, prepend="") -> str: + ret = f"{prepend}- use .sel(...) to select specific energy or time intervals\n" + ret += f"{prepend} t : time (float)\n" + ret += f"{prepend} st : step (int)\n" + ret += f"{prepend} E : energy bin (float)\n{prepend}\n" + ret += f"{prepend} # example:\n" + ret += f"{prepend} # .sel(E=slice(10.0, 20.0)).sel(t=0, method='nearest')\n{prepend}\n" + ret += f"{prepend}- use .isel(...) to select spectra based on energy bin or time index:\n" + ret += f"{prepend} t : timestamp index (int)\n" + ret += f"{prepend} st : step index (int)\n" + ret += f"{prepend} E : energy bin index (int)\n{prepend}\n" + ret += f"{prepend} # example:\n" + ret += f"{prepend} # .isel(t=-1, E=11)\n" + ret += f"{prepend}\n" + ret += f"{prepend} # example:\n" + ret += f"{prepend} # .spectra.N_1.sel(E=slice(None, 50)).isel(t=5).plot()\n" + return ret diff --git a/nt2/readers/adios2.py b/nt2/readers/adios2.py index ac352cf..e01d4a0 100644 --- a/nt2/readers/adios2.py +++ b/nt2/readers/adios2.py @@ -142,6 +142,24 @@ def ReadArrayShapeAtTimestep( f"{category.capitalize()} {quantity} not found in the {filename}" ) + @override + def ReadArrayShapeExplicitlyAtTimestep( + self, path: str, category: str, quantity: str, step: int + ) -> tuple[int]: + with bp.FileReader(filename := self.FullPath(path, category, step)) as f: + if quantity in f.available_variables(): + var = f.inquire_variable(quantity) + if var is not None and (read := f.read(var)) is not None: + return read.shape + else: + raise ValueError( + f"{category.capitalize()} {quantity} is not a group in the {filename}" + ) + else: + raise ValueError( + f"{category.capitalize()} {quantity} not found in the {filename}" + ) + @override def ReadFieldCoordsAtTimestep( self, path: str, step: int diff --git a/nt2/readers/base.py b/nt2/readers/base.py index 7f31551..1b21a06 100644 --- a/nt2/readers/base.py +++ b/nt2/readers/base.py @@ -226,6 +226,36 @@ def ReadArrayShapeAtTimestep( """ raise NotImplementedError("ReadArrayShapeAtTimestep is not implemented") + def ReadArrayShapeExplicitlyAtTimestep( + self, + path: str, + category: str, + quantity: str, + step: int, + ) -> tuple[int]: + """Read the shape of an array at a given timestep, without relying on metadata. + + Parameters + ---------- + path : str + The path to the files. + category : str + The category of the files. + quantity : str + The name of the quantity to be read. + step : int + The timestep to be read. + + Returns + ------- + tuple[int] + The shape of the array at a given timestep. + """ + + raise NotImplementedError( + "ReadArrayShapeExplicitlyAtTimestep is not implemented" + ) + def ReadFieldCoordsAtTimestep( self, path: str, diff --git a/nt2/readers/hdf5.py b/nt2/readers/hdf5.py index 435185f..4217753 100644 --- a/nt2/readers/hdf5.py +++ b/nt2/readers/hdf5.py @@ -1,4 +1,6 @@ -from typing import Any +from __future__ import annotations + +from typing import Any, TYPE_CHECKING import sys @@ -15,18 +17,34 @@ def override(method): import numpy as np import numpy.typing as npt -import h5py +try: + import h5py # runtime +except ImportError: # pragma: no cover + h5py = None # type: ignore[assignment] + +if TYPE_CHECKING: + import h5py as _h5py from nt2.utils import Format, Layout from nt2.readers.base import BaseReader +def _require_h5py(): + if h5py is None: + raise ImportError( + "HDF5 support requires the optional dependency 'h5py'. " + "Install it with: pip install nt2py[hdf5]" + ) + return h5py + + class Reader(BaseReader): @staticmethod - def __extract_step0(f: h5py.File) -> h5py.Group: + def __extract_step0(f: "_h5py.File") -> "_h5py.Group": + h5 = _require_h5py() if "Step0" in f.keys(): f0 = f["Step0"] - if isinstance(f0, h5py.Group): + if isinstance(f0, h5.Group): return f0 else: raise ValueError(f"Step0 is not a group in the HDF5 file") @@ -42,8 +60,9 @@ def format(self) -> Format: @override def EnterFile( filename: str, - ) -> h5py.File: - return h5py.File(filename, "r") + ) -> "_h5py.File": + h5 = _require_h5py() + return h5.File(filename, "r") @override def ReadPerTimestepVariable( @@ -54,15 +73,16 @@ def ReadPerTimestepVariable( newname: str, ) -> dict[str, npt.NDArray[Any]]: variables: list[Any] = [] + h5 = _require_h5py() for filename in self.GetValidFiles( path=path, category=category, ): - with h5py.File(os.path.join(path, category, filename), "r") as f: + with h5.File(os.path.join(path, category, filename), "r") as f: f0 = Reader.__extract_step0(f) if varname in f0.keys(): var = f0[varname] - if isinstance(var, h5py.Dataset): + if isinstance(var, h5.Dataset): variables.append(var[()]) else: raise ValueError( @@ -80,7 +100,8 @@ def ReadAttrsAtTimestep( category: str, step: int, ) -> dict[str, Any]: - with h5py.File(self.FullPath(path, category, step), "r") as f: + h5 = _require_h5py() + with h5.File(self.FullPath(path, category, step), "r") as f: return {k: v for k, v in f.attrs.items()} @override @@ -89,7 +110,8 @@ def ReadEdgeCoordsAtTimestep( path: str, step: int, ) -> dict[str, npt.NDArray[Any]]: - with h5py.File(self.FullPath(path, "fields", step), "r") as f: + h5 = _require_h5py() + with h5.File(self.FullPath(path, "fields", step), "r") as f: f0 = Reader.__extract_step0(f) return {k: v[:] for k, v in f0.items() if k[0] == "X" and k[-1] == "e"} @@ -101,11 +123,12 @@ def ReadArrayAtTimestep( quantity: str, step: int, ) -> npt.NDArray[Any]: - with h5py.File(filename := self.FullPath(path, category, step), "r") as f: + h5 = _require_h5py() + with h5.File(filename := self.FullPath(path, category, step), "r") as f: f0 = Reader.__extract_step0(f) if quantity in f0.keys(): var = f0[quantity] - if isinstance(var, h5py.Dataset): + if isinstance(var, h5.Dataset): return np.array(var[:]) else: raise ValueError(f"{quantity} is not a group in the {filename}") @@ -120,7 +143,8 @@ def ReadCategoryNamesAtTimestep( prefix: str, step: int, ) -> set[str]: - with h5py.File(self.FullPath(path, category, step), "r") as f: + h5 = _require_h5py() + with h5.File(self.FullPath(path, category, step), "r") as f: f0 = Reader.__extract_step0(f) keys: list[str] = list(f0.keys()) return set(c for c in keys if c.startswith(prefix)) @@ -129,11 +153,12 @@ def ReadCategoryNamesAtTimestep( def ReadArrayShapeAtTimestep( self, path: str, category: str, quantity: str, step: int ) -> tuple[int]: - with h5py.File(filename := self.FullPath(path, category, step), "r") as f: + h5 = _require_h5py() + with h5.File(filename := self.FullPath(path, category, step), "r") as f: f0 = Reader.__extract_step0(f) if quantity in f0.keys(): var = f0[quantity] - if isinstance(var, h5py.Dataset): + if isinstance(var, h5.Dataset): return var.shape else: raise ValueError( @@ -144,16 +169,37 @@ def ReadArrayShapeAtTimestep( f"{category.capitalize()} {quantity} not found in the {filename}" ) + @override + def ReadArrayShapeExplicitlyAtTimestep( + self, path: str, category: str, quantity: str, step: int + ) -> tuple[int]: + h5 = _require_h5py() + with h5.File(self.FullPath(path, category, step), "r") as f: + f0 = Reader.__extract_step0(f) + if quantity in f0.keys(): + var = f0[quantity] + if isinstance(var, h5.Dataset) and (read := var[:]) is not None: + return read.shape + else: + raise ValueError( + f"{category.capitalize()} {quantity} is not a group in the HDF5 file" + ) + else: + raise ValueError( + f"{category.capitalize()} {quantity} not found in the HDF5 file" + ) + @override def ReadFieldCoordsAtTimestep( self, path: str, step: int ) -> dict[str, npt.NDArray[Any]]: - with h5py.File(filename := self.FullPath(path, "fields", step), "r") as f: + h5 = _require_h5py() + with h5.File(filename := self.FullPath(path, "fields", step), "r") as f: f0 = Reader.__extract_step0(f) def get_coord(c: str) -> Any: f0_c = f0[c] - if isinstance(f0_c, h5py.Dataset): + if isinstance(f0_c, h5.Dataset): return f0_c[:] else: raise ValueError(f"Field {c} is not a group in the {filename}") @@ -163,7 +209,8 @@ def get_coord(c: str) -> Any: @override def ReadFieldLayoutAtTimestep(self, path: str, step: int) -> Layout: - with h5py.File(filename := self.FullPath(path, "fields", step), "r") as f: + h5 = _require_h5py() + with h5.File(filename := self.FullPath(path, "fields", step), "r") as f: if "LayoutRight" not in f.attrs: raise ValueError(f"LayoutRight attribute not found in the {filename}") return Layout.R if f.attrs["LayoutRight"] else Layout.L diff --git a/pyproject.toml b/pyproject.toml index 9d25aef..2ea2644 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,6 @@ dependencies = [ "xarray", "numpy", "scipy", - "h5py", "matplotlib", "tqdm", "contourpy", @@ -41,6 +40,9 @@ classifiers = [ "Programming Language :: Python :: 3.13", ] +[project.optional-dependencies] +hdf5 = ["h5py"] + [project.urls] Repository = "https://github.com/entity-toolkit/nt2py" diff --git a/shell.nix b/shell.nix index d7ee1a0..77eed82 100644 --- a/shell.nix +++ b/shell.nix @@ -1,6 +1,6 @@ { pkgs ? import { }, - py ? "312", + py ? "313", }: pkgs.mkShell {