From 3bf38ff45a0ba7838de4b35d8cb753006d2fd90d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 3 Jan 2026 10:58:19 +0000 Subject: [PATCH 01/12] Initial plan From 7e196dd9eff1ce04f52a9b739e2a51b6c5105a51 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 3 Jan 2026 11:03:01 +0000 Subject: [PATCH 02/12] Add ND2Reader class and pixel_size field to ImageInfo Co-authored-by: AdvancedImagingUTSW <42547789+AdvancedImagingUTSW@users.noreply.github.com> --- pyproject.toml | 1 + src/clearex/io/read.py | 211 +++++++++++++++++++++++++++++++++++++++-- tests/io/test_read.py | 1 + 3 files changed, 207 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 434fb36..8a93783 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "jupyterlab", "matplotlib", "napari[all]>0.6.1", + "nd2", "neuroglancer>=2.40.1,<3.0.0", "opencv-python", "pandas>=2.3.3", diff --git a/src/clearex/io/read.py b/src/clearex/io/read.py index ae1870c..9f7ac07 100644 --- a/src/clearex/io/read.py +++ b/src/clearex/io/read.py @@ -38,6 +38,7 @@ import tifffile import zarr import h5py +import nd2 from numpy.typing import NDArray # Local Imports @@ -54,8 +55,8 @@ class ImageInfo: """Container for image metadata. This dataclass stores metadata about an opened image file, including - its path, shape, data type, axis labels, and any additional metadata - extracted from the file format. + its path, shape, data type, axis labels, pixel size, and any additional + metadata extracted from the file format. Attributes ---------- @@ -70,6 +71,11 @@ class ImageInfo: A string describing the axis order, such as "TCZYX" for time-channel-Z-Y-X. Common in OME-TIFF and other formats. Defaults to None if not available. + pixel_size : Tuple[float, ...], optional + Physical pixel/voxel sizes in micrometers, typically ordered as + (X, Y, Z) or matching the axes order. For example, (0.65, 0.65, 2.0) + for a 3D image with 0.65 µm XY resolution and 2.0 µm Z spacing. + Defaults to None if not available. metadata : Dict[str, Any], optional Additional metadata extracted from the file format, such as attributes from Zarr/HDF5 or custom tags. Defaults to None. @@ -83,16 +89,20 @@ class ImageInfo: ... shape=(512, 512), ... dtype=np.uint16, ... axes="YX", + ... pixel_size=(0.65, 0.65), ... metadata={"scale": 1.0} ... ) >>> print(info.shape) (512, 512) + >>> print(info.pixel_size) + (0.65, 0.65) """ path: Path shape: Tuple[int, ...] dtype: Any axes: Optional[str] = None + pixel_size: Optional[Tuple[float, ...]] = None metadata: Optional[Dict[str, Any]] = None @@ -982,6 +992,195 @@ def open( return arr, info +class ND2Reader(Reader): + """Reader for Nikon ND2 files using the nd2 library. + + This reader handles Nikon's proprietary ND2 format files commonly used + in microscopy. It can extract comprehensive metadata including pixel/voxel + sizes, axis information, and experimental parameters. + + Attributes + ---------- + SUFFIXES : tuple of str + Supported file extensions: ('.nd2',). + + See Also + -------- + Reader : Abstract base class for all readers. + TiffReader : Reader for TIFF/OME-TIFF files. + ZarrReader : Reader for Zarr stores. + + Notes + ----- + The nd2 library provides native support for both NumPy arrays and Dask + arrays through its `to_dask()` method. When `prefer_dask=True`, this + reader uses the native Dask support for efficient lazy loading of large + ND2 files without loading them entirely into memory. + + Pixel size information is extracted from the Volume metadata and stored + in the ImageInfo as (X, Y, Z) calibration values in micrometers. + + Examples + -------- + >>> from pathlib import Path + >>> reader = ND2Reader() + >>> arr, info = reader.open(Path("image.nd2")) + >>> print(arr.shape) + (10, 512, 512) + >>> print(info.pixel_size) + (0.65, 0.65, 2.0) + + >>> # For large files, use Dask + >>> darr, info = reader.open(Path("large.nd2"), prefer_dask=True) + >>> print(type(darr).__name__) + Array + """ + + SUFFIXES = (".nd2",) + + def open( + self, + path: Path, + prefer_dask: bool = False, + chunks: Optional[Union[int, Tuple[int, ...]]] = None, + **kwargs: Any, + ) -> Tuple[NDArray[Any], ImageInfo]: + """Open an ND2 file and return the image data and metadata. + + This method reads Nikon ND2 files using the nd2 library. It extracts + comprehensive metadata including pixel sizes, axis order, and other + experimental parameters. + + Parameters + ---------- + path : Path + The path to the ND2 file. + prefer_dask : bool, optional + If True, return a Dask array for lazy evaluation. If False, load + the entire image into memory as a NumPy array. Defaults to False. + chunks : int or tuple of int, optional + Chunk size for Dask arrays. Can be a single integer (applied to + all dimensions) or a tuple specifying chunk size per dimension. + If None, uses nd2's default chunking. Only relevant when + `prefer_dask=True`. Defaults to None. + **kwargs : dict + Additional keyword arguments passed to `nd2.ND2File`. + + Returns + ------- + arr : NDArray[Any] or dask.array.Array + The loaded image data. Returns a NumPy ndarray if `prefer_dask=False`, + or a Dask array if `prefer_dask=True`. + info : ImageInfo + Metadata about the loaded image, including path, shape, dtype, + axes information, pixel size (in micrometers), and format-specific + metadata. + + Raises + ------ + ValueError + If the file cannot be read as a valid ND2 file. + FileNotFoundError + If the specified file does not exist. + + Notes + ----- + The reader extracts pixel/voxel sizes from the ND2 metadata's Volume + section. These are stored as (X, Y, Z) calibration values in micrometers + in the `pixel_size` field of ImageInfo. + + Axes information is extracted from the ND2 file's dimension names and + converted to a string format (e.g., "TCZYX"). + + Examples + -------- + >>> from pathlib import Path + >>> reader = ND2Reader() + + >>> # Load a standard ND2 file into memory + >>> arr, info = reader.open(Path("sample.nd2")) + >>> print(f"Shape: {info.shape}, dtype: {info.dtype}") + Shape: (5, 512, 512), dtype: uint16 + >>> print(f"Pixel size: {info.pixel_size}") + Pixel size: (0.325, 0.325, 1.0) + + >>> # Load an ND2 as a Dask array + >>> darr, info = reader.open(Path("large.nd2"), prefer_dask=True) + >>> print(f"Type: {type(darr).__name__}") + Type: Array + + >>> # Specify custom chunking for Dask + >>> darr, info = reader.open( + ... Path("timelapse.nd2"), + ... prefer_dask=True, + ... chunks=(1, 512, 512) + ... ) + >>> print(darr.chunksize) + (1, 512, 512) + """ + + with nd2.ND2File(str(path), **kwargs) as nd2_file: + # Extract metadata + metadata_dict = {} + axes = None + pixel_size = None + + # Get axes information from sizes dict + if hasattr(nd2_file, 'sizes') and nd2_file.sizes: + # Convert sizes dict to axes string (e.g., {'T': 10, 'C': 3, 'Z': 5, 'Y': 512, 'X': 512} -> "TCZYX") + axes = "".join(nd2_file.sizes.keys()) + + # Extract pixel size from metadata + try: + if nd2_file.metadata and nd2_file.metadata.channels: + # Get the first channel's volume information + channel = nd2_file.metadata.channels[0] + if channel.volume and channel.volume.axesCalibration: + # axesCalibration is (X, Y, Z) in micrometers + pixel_size = channel.volume.axesCalibration + except (AttributeError, IndexError, TypeError): + # If metadata extraction fails, pixel_size remains None + pass + + # Store additional metadata + if nd2_file.metadata: + metadata_dict['metadata'] = nd2_file.metadata + if hasattr(nd2_file, 'attributes') and nd2_file.attributes: + metadata_dict['attributes'] = nd2_file.attributes + + if prefer_dask: + # Use nd2's native Dask support + darr = nd2_file.to_dask() + + # Apply custom chunking if specified + if chunks is not None: + darr = darr.rechunk(chunks) + + info = ImageInfo( + path=path, + shape=tuple(darr.shape), + dtype=darr.dtype, + axes=axes, + pixel_size=pixel_size, + metadata=metadata_dict, + ) + logger.info(f"Loaded {path.name} as a Dask array.") + return darr, info + else: + # Load to memory as NumPy + arr = nd2_file.asarray() + info = ImageInfo( + path=path, + shape=tuple(arr.shape), + dtype=arr.dtype, + axes=axes, + pixel_size=pixel_size, + metadata=metadata_dict, + ) + logger.info(f"Loaded {path.name} as NumPy array.") + return arr, info + + class ImageOpener: """Automatic image file format detection and reading. @@ -1041,9 +1240,9 @@ def __init__(self, readers: Optional[Iterable[Type[Reader]]] = None) -> None: ---------- readers : Iterable of Reader subclasses, optional An ordered sequence of reader classes to use. If None, uses the - default registry: (TiffReader, ZarrReader, NumpyReader, HDF5Reader). - The order determines priority when multiple readers could handle - the same file. + default registry: (TiffReader, ZarrReader, NumpyReader, HDF5Reader, + ND2Reader). The order determines priority when multiple readers + could handle the same file. Notes ----- @@ -1064,7 +1263,7 @@ def __init__(self, readers: Optional[Iterable[Type[Reader]]] = None) -> None: """ # Registry order is priority order self._readers: Tuple[Type[Reader], ...] = tuple( - readers or (TiffReader, ZarrReader, NumpyReader, HDF5Reader) + readers or (TiffReader, ZarrReader, NumpyReader, HDF5Reader, ND2Reader) ) def open( diff --git a/tests/io/test_read.py b/tests/io/test_read.py index a78c648..c9e7dd3 100644 --- a/tests/io/test_read.py +++ b/tests/io/test_read.py @@ -47,6 +47,7 @@ ZarrReader, HDF5Reader, NumpyReader, + ND2Reader, ImageOpener, ) from tests import download_test_registration_data From c93eda86e42e281de34dc6b92b1d21828de2fc80 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 3 Jan 2026 11:07:10 +0000 Subject: [PATCH 03/12] Update existing readers to extract pixel_size from metadata Co-authored-by: AdvancedImagingUTSW <42547789+AdvancedImagingUTSW@users.noreply.github.com> --- src/clearex/io/read.py | 106 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 104 insertions(+), 2 deletions(-) diff --git a/src/clearex/io/read.py b/src/clearex/io/read.py index 9f7ac07..172ddf1 100644 --- a/src/clearex/io/read.py +++ b/src/clearex/io/read.py @@ -357,15 +357,68 @@ def open( (1, 512, 512) """ - # Try OME-axes and metadata + # Try OME-axes, pixel size, and metadata with tifffile.TiffFile(str(path)) as tf: ome_meta = getattr(tf, "omexml", None) axes = None + pixel_size = None + if ome_meta is not None: try: axes = ome_meta.image().pixels().DimensionOrder # e.g., "TCZYX" except Exception: axes = None + + # Try to extract pixel size from OME metadata + if hasattr(tf, 'ome_metadata') and tf.ome_metadata: + try: + from ome_types import from_xml + ome = from_xml(tf.ome_metadata) + if ome.images and ome.images[0].pixels: + pixels = ome.images[0].pixels + # Extract physical sizes (in micrometers by default in OME) + size_x = getattr(pixels, 'physical_size_x', None) + size_y = getattr(pixels, 'physical_size_y', None) + size_z = getattr(pixels, 'physical_size_z', None) + + # Build pixel_size tuple based on available dimensions + if size_x is not None and size_y is not None: + if size_z is not None: + pixel_size = (size_x, size_y, size_z) + else: + pixel_size = (size_x, size_y) + except Exception: + # If OME parsing fails, pixel_size remains None + pass + + # Fallback: try to extract from standard TIFF resolution tags + if pixel_size is None: + try: + page = tf.pages[0] + x_res = page.tags.get('XResolution') + y_res = page.tags.get('YResolution') + res_unit = page.tags.get('ResolutionUnit') + + if x_res and y_res and res_unit: + # Extract resolution values + x_val = x_res.value[0] / x_res.value[1] if isinstance(x_res.value, tuple) else x_res.value + y_val = y_res.value[0] / y_res.value[1] if isinstance(y_res.value, tuple) else y_res.value + + # Convert to micrometers per pixel based on unit + # Resolution unit: 1=none, 2=inch, 3=centimeter + if res_unit.value == 2: # inch + # pixels per inch -> um per pixel + x_um = 25400.0 / x_val # 1 inch = 25400 um + y_um = 25400.0 / y_val + pixel_size = (x_um, y_um) + elif res_unit.value == 3: # centimeter + # pixels per cm -> um per pixel + x_um = 10000.0 / x_val # 1 cm = 10000 um + y_um = 10000.0 / y_val + pixel_size = (x_um, y_um) + except Exception: + # If standard TIFF tag parsing fails, pixel_size remains None + pass if prefer_dask: # Option A: use tifffile's OME-as-zarr path if possible @@ -377,6 +430,7 @@ def open( shape=tuple(darr.shape), dtype=darr.dtype, axes=axes, + pixel_size=pixel_size, metadata={}, ) logger.info(f"Loaded {path.name} as a Dask array.") @@ -389,6 +443,7 @@ def open( shape=tuple(arr.shape), dtype=arr.dtype, axes=axes, + pixel_size=pixel_size, metadata={}, ) logger.info(f"Loaded {path.name} as NumPy array.") @@ -544,11 +599,37 @@ def open( array = max(arrays, key=lambda arr: np.prod(arr.shape)) axes = None + pixel_size = None meta = {} try: attrs = getattr(array, "attrs", {}) axes = attrs.get("multiscales", [{}])[0].get("axes") or attrs.get("axes") meta = dict(attrs) + + # Try to extract pixel size from Zarr attributes + # Check for OME-Zarr style multiscales metadata + if "multiscales" in attrs and attrs["multiscales"]: + multiscale = attrs["multiscales"][0] + if "axes" in multiscale and isinstance(multiscale["axes"], list): + # Look for scale/transform information + if "datasets" in multiscale and multiscale["datasets"]: + dataset = multiscale["datasets"][0] + if "coordinateTransformations" in dataset: + for transform in dataset["coordinateTransformations"]: + if transform.get("type") == "scale" and "scale" in transform: + # Scale values typically correspond to axis order + pixel_size = tuple(transform["scale"]) + break + + # Fallback: check for direct pixel_size or scale attributes + if pixel_size is None: + if "pixel_size" in attrs: + pixel_size = tuple(attrs["pixel_size"]) if isinstance(attrs["pixel_size"], (list, tuple)) else attrs["pixel_size"] + elif "scale" in attrs: + pixel_size = tuple(attrs["scale"]) if isinstance(attrs["scale"], (list, tuple)) else attrs["scale"] + elif "resolution" in attrs: + pixel_size = tuple(attrs["resolution"]) if isinstance(attrs["resolution"], (list, tuple)) else attrs["resolution"] + except Exception: pass @@ -560,6 +641,7 @@ def open( shape=tuple(darr.shape), dtype=darr.dtype, axes=axes, + pixel_size=pixel_size, metadata=meta, ) return darr, info @@ -572,6 +654,7 @@ def open( shape=tuple(np_arr.shape), dtype=np_arr.dtype, axes=axes, + pixel_size=pixel_size, metadata=meta, ) return np_arr, info @@ -743,8 +826,9 @@ def _collect_datasets(group: h5py.Group) -> list: # Highest resolution ≈ largest number of elements ds = max(datasets, key=lambda d: int(np.prod(d.shape))) - # Extract basic metadata / axes if present + # Extract basic metadata / axes / pixel_size if present axes = None + pixel_size = None meta: Dict[str, Any] = {} try: attrs = dict(ds.attrs) if hasattr(ds, "attrs") else {} @@ -755,6 +839,18 @@ def _collect_datasets(group: h5py.Group) -> list: or attrs.get("DimensionOrder") or attrs.get("DIMENSION_LABELS") # sometimes stored by other tools ) + + # Try to extract pixel size from HDF5 attributes + if "pixel_size" in attrs: + pixel_size = tuple(attrs["pixel_size"]) if isinstance(attrs["pixel_size"], (list, tuple, np.ndarray)) else attrs["pixel_size"] + elif "scale" in attrs: + pixel_size = tuple(attrs["scale"]) if isinstance(attrs["scale"], (list, tuple, np.ndarray)) else attrs["scale"] + elif "resolution" in attrs: + pixel_size = tuple(attrs["resolution"]) if isinstance(attrs["resolution"], (list, tuple, np.ndarray)) else attrs["resolution"] + # Check for individual axis scales + elif "element_size_um" in attrs: + pixel_size = tuple(attrs["element_size_um"]) if isinstance(attrs["element_size_um"], (list, tuple, np.ndarray)) else attrs["element_size_um"] + except Exception: pass @@ -779,6 +875,7 @@ def _collect_datasets(group: h5py.Group) -> list: shape=tuple(darr.shape), dtype=darr.dtype, axes=axes, + pixel_size=pixel_size, metadata=meta, ) logger.info( @@ -794,6 +891,7 @@ def _collect_datasets(group: h5py.Group) -> list: shape=tuple(np_arr.shape), dtype=np_arr.dtype, axes=axes, + pixel_size=pixel_size, metadata=meta, ) logger.info( @@ -953,6 +1051,7 @@ def open( shape=tuple(darr.shape), dtype=darr.dtype, axes=None, + pixel_size=None, metadata={}, ) return darr, info @@ -963,6 +1062,7 @@ def open( shape=tuple(arr.shape), dtype=arr.dtype, axes=None, + pixel_size=None, metadata={}, ) return arr, info @@ -978,6 +1078,7 @@ def open( shape=tuple(darr.shape), dtype=darr.dtype, axes=None, + pixel_size=None, metadata={"npz_key": first_key}, ) return darr, info @@ -987,6 +1088,7 @@ def open( shape=tuple(arr.shape), dtype=arr.dtype, axes=None, + pixel_size=None, metadata={"npz_key": first_key}, ) return arr, info From 42d009ee7145d34b79ab3748f81b7c29520c84cb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 3 Jan 2026 11:11:45 +0000 Subject: [PATCH 04/12] Address code review comments: move imports to top and add helper function Co-authored-by: AdvancedImagingUTSW <42547789+AdvancedImagingUTSW@users.noreply.github.com> --- src/clearex/io/read.py | 51 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/src/clearex/io/read.py b/src/clearex/io/read.py index 172ddf1..6e46f55 100644 --- a/src/clearex/io/read.py +++ b/src/clearex/io/read.py @@ -41,6 +41,13 @@ import nd2 from numpy.typing import NDArray +# Try to import ome-types for OME-TIFF metadata parsing +try: + from ome_types import from_xml + HAS_OME_TYPES = True +except ImportError: + HAS_OME_TYPES = False + # Local Imports ArrayLike = Union[NDArray[Any], da.Array] @@ -50,6 +57,33 @@ logger.addHandler(hdlr=logging.NullHandler()) +def _ensure_tuple(value: Any) -> Optional[Tuple[float, ...]]: + """Convert various types to tuple of floats for pixel_size. + + Parameters + ---------- + value : Any + Value to convert (list, tuple, ndarray, or single value) + + Returns + ------- + Optional[Tuple[float, ...]] + Tuple of floats if conversion successful, None otherwise + """ + if value is None: + return None + try: + if isinstance(value, (list, tuple)): + return tuple(float(x) for x in value) + elif isinstance(value, np.ndarray): + return tuple(float(x) for x in value.flat) + else: + # Single value + return (float(value),) + except (ValueError, TypeError): + return None + + @dataclass class ImageInfo: """Container for image metadata. @@ -370,9 +404,8 @@ def open( axes = None # Try to extract pixel size from OME metadata - if hasattr(tf, 'ome_metadata') and tf.ome_metadata: + if HAS_OME_TYPES and hasattr(tf, 'ome_metadata') and tf.ome_metadata: try: - from ome_types import from_xml ome = from_xml(tf.ome_metadata) if ome.images and ome.images[0].pixels: pixels = ome.images[0].pixels @@ -624,11 +657,11 @@ def open( # Fallback: check for direct pixel_size or scale attributes if pixel_size is None: if "pixel_size" in attrs: - pixel_size = tuple(attrs["pixel_size"]) if isinstance(attrs["pixel_size"], (list, tuple)) else attrs["pixel_size"] + pixel_size = _ensure_tuple(attrs["pixel_size"]) elif "scale" in attrs: - pixel_size = tuple(attrs["scale"]) if isinstance(attrs["scale"], (list, tuple)) else attrs["scale"] + pixel_size = _ensure_tuple(attrs["scale"]) elif "resolution" in attrs: - pixel_size = tuple(attrs["resolution"]) if isinstance(attrs["resolution"], (list, tuple)) else attrs["resolution"] + pixel_size = _ensure_tuple(attrs["resolution"]) except Exception: pass @@ -842,14 +875,14 @@ def _collect_datasets(group: h5py.Group) -> list: # Try to extract pixel size from HDF5 attributes if "pixel_size" in attrs: - pixel_size = tuple(attrs["pixel_size"]) if isinstance(attrs["pixel_size"], (list, tuple, np.ndarray)) else attrs["pixel_size"] + pixel_size = _ensure_tuple(attrs["pixel_size"]) elif "scale" in attrs: - pixel_size = tuple(attrs["scale"]) if isinstance(attrs["scale"], (list, tuple, np.ndarray)) else attrs["scale"] + pixel_size = _ensure_tuple(attrs["scale"]) elif "resolution" in attrs: - pixel_size = tuple(attrs["resolution"]) if isinstance(attrs["resolution"], (list, tuple, np.ndarray)) else attrs["resolution"] + pixel_size = _ensure_tuple(attrs["resolution"]) # Check for individual axis scales elif "element_size_um" in attrs: - pixel_size = tuple(attrs["element_size_um"]) if isinstance(attrs["element_size_um"], (list, tuple, np.ndarray)) else attrs["element_size_um"] + pixel_size = _ensure_tuple(attrs["element_size_um"]) except Exception: pass From 207a2e548a886534310b3c5c7b08786b166e6da4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 3 Jan 2026 11:42:45 +0000 Subject: [PATCH 05/12] Change axes to list format for OME-NGFF compatibility with 5D support Co-authored-by: AdvancedImagingUTSW <42547789+AdvancedImagingUTSW@users.noreply.github.com> --- src/clearex/io/read.py | 123 +++++++++++++++++++++++++++++++++-------- 1 file changed, 99 insertions(+), 24 deletions(-) diff --git a/src/clearex/io/read.py b/src/clearex/io/read.py index 6e46f55..9f0081f 100644 --- a/src/clearex/io/read.py +++ b/src/clearex/io/read.py @@ -84,6 +84,60 @@ def _ensure_tuple(value: Any) -> Optional[Tuple[float, ...]]: return None +def _normalize_axes(axes: Any) -> Optional[list]: + """Normalize axis information to OME-NGFF compatible list format. + + Converts various axis representations to a standardized list format + compatible with OME-NGFF (up to 5 dimensions: t, c, z, y, x). + + Parameters + ---------- + axes : Any + Axis information in various formats: + - String like "TCZYX" or "ZYX" + - List of axis names like ["t", "c", "z", "y", "x"] + - List of OME-NGFF axis dicts like [{"name": "t", "type": "time"}, ...] + - Dict with keys like {'T': 10, 'C': 3, 'Z': 5, 'Y': 512, 'X': 512} + + Returns + ------- + Optional[list] + List of lowercase axis names (e.g., ["t", "c", "z", "y", "x"]) or None + + Notes + ----- + Following OME-NGFF conventions: + - Supports up to 5 dimensions + - Common axis names: 't' (time), 'c' (channel), 'z', 'y', 'x' (spatial) + - Preserves order from input + - Normalizes to lowercase for consistency + """ + if axes is None: + return None + + try: + # Handle string format (e.g., "TCZYX") + if isinstance(axes, str): + return [ax.lower() for ax in axes] + + # Handle list of dicts (OME-NGFF format) + if isinstance(axes, list): + if all(isinstance(item, dict) and "name" in item for item in axes): + return [item["name"].lower() for item in axes] + # Handle simple list of strings + elif all(isinstance(item, str) for item in axes): + return [ax.lower() for ax in axes] + + # Handle dict format (e.g., sizes dict from ND2) + if isinstance(axes, dict): + return [key.lower() for key in axes.keys()] + + except Exception: + pass + + return None + + @dataclass class ImageInfo: """Container for image metadata. @@ -101,15 +155,23 @@ class ImageInfo: (depth, height, width) for 3D, or arbitrary N-dimensional shapes). dtype : Any The NumPy dtype of the image data (e.g., np.uint8, np.float32). - axes : str, optional - A string describing the axis order, such as "TCZYX" for - time-channel-Z-Y-X. Common in OME-TIFF and other formats. + axes : list of str, optional + List of axis names describing dimension order, following OME-NGFF + conventions. Supports up to 5 dimensions with common names: + - 't' or 'time' for time dimension + - 'c' or 'channel' for channel dimension + - 'z' for Z spatial dimension + - 'y' for Y spatial dimension + - 'x' for X spatial dimension + + Examples: ["t", "c", "z", "y", "x"], ["z", "y", "x"], ["y", "x"] + Order matches the actual array dimension order. Defaults to None if not available. pixel_size : Tuple[float, ...], optional - Physical pixel/voxel sizes in micrometers, typically ordered as - (X, Y, Z) or matching the axes order. For example, (0.65, 0.65, 2.0) - for a 3D image with 0.65 µm XY resolution and 2.0 µm Z spacing. - Defaults to None if not available. + Physical pixel/voxel sizes in micrometers, ordered to match the + spatial axes in the axes list. For example, with axes=["z", "y", "x"] + and pixel_size=(2.0, 0.65, 0.65), the Z spacing is 2.0 µm and + XY resolution is 0.65 µm. Defaults to None if not available. metadata : Dict[str, Any], optional Additional metadata extracted from the file format, such as attributes from Zarr/HDF5 or custom tags. Defaults to None. @@ -120,22 +182,35 @@ class ImageInfo: >>> import numpy as np >>> info = ImageInfo( ... path=Path("image.tif"), - ... shape=(512, 512), + ... shape=(10, 512, 512), ... dtype=np.uint16, - ... axes="YX", - ... pixel_size=(0.65, 0.65), + ... axes=["z", "y", "x"], + ... pixel_size=(2.0, 0.65, 0.65), ... metadata={"scale": 1.0} ... ) >>> print(info.shape) - (512, 512) + (10, 512, 512) + >>> print(info.axes) + ['z', 'y', 'x'] >>> print(info.pixel_size) - (0.65, 0.65) + (2.0, 0.65, 0.65) + + >>> # 5D example + >>> info_5d = ImageInfo( + ... path=Path("timeseries.nd2"), + ... shape=(20, 3, 10, 512, 512), + ... dtype=np.uint16, + ... axes=["t", "c", "z", "y", "x"], + ... pixel_size=(2.0, 0.65, 0.65), # Z, Y, X only + ... ) + >>> print(info_5d.axes) + ['t', 'c', 'z', 'y', 'x'] """ path: Path shape: Tuple[int, ...] dtype: Any - axes: Optional[str] = None + axes: Optional[list] = None pixel_size: Optional[Tuple[float, ...]] = None metadata: Optional[Dict[str, Any]] = None @@ -379,7 +454,7 @@ def open( >>> # Load an OME-TIFF as a Dask array >>> darr, info = reader.open(Path("timelapse.ome.tif"), prefer_dask=True) >>> print(f"Axes: {info.axes}, Type: {type(darr).__name__}") - Axes: TCZYX, Type: Array + Axes: ['t', 'c', 'z', 'y', 'x'], Type: Array >>> # Specify custom chunking for Dask >>> darr, info = reader.open( @@ -462,7 +537,7 @@ def open( path=path, shape=tuple(darr.shape), dtype=darr.dtype, - axes=axes, + axes=_normalize_axes(axes), pixel_size=pixel_size, metadata={}, ) @@ -475,7 +550,7 @@ def open( path=path, shape=tuple(arr.shape), dtype=arr.dtype, - axes=axes, + axes=_normalize_axes(axes), pixel_size=pixel_size, metadata={}, ) @@ -673,7 +748,7 @@ def open( path=path, shape=tuple(darr.shape), dtype=darr.dtype, - axes=axes, + axes=_normalize_axes(axes), pixel_size=pixel_size, metadata=meta, ) @@ -686,7 +761,7 @@ def open( path=path, shape=tuple(np_arr.shape), dtype=np_arr.dtype, - axes=axes, + axes=_normalize_axes(axes), pixel_size=pixel_size, metadata=meta, ) @@ -907,7 +982,7 @@ def _collect_datasets(group: h5py.Group) -> list: path=Path(path_str), shape=tuple(darr.shape), dtype=darr.dtype, - axes=axes, + axes=_normalize_axes(axes), pixel_size=pixel_size, metadata=meta, ) @@ -923,7 +998,7 @@ def _collect_datasets(group: h5py.Group) -> list: path=Path(path_str), shape=tuple(np_arr.shape), dtype=np_arr.dtype, - axes=axes, + axes=_normalize_axes(axes), pixel_size=pixel_size, metadata=meta, ) @@ -1262,8 +1337,8 @@ def open( # Get axes information from sizes dict if hasattr(nd2_file, 'sizes') and nd2_file.sizes: - # Convert sizes dict to axes string (e.g., {'T': 10, 'C': 3, 'Z': 5, 'Y': 512, 'X': 512} -> "TCZYX") - axes = "".join(nd2_file.sizes.keys()) + # Pass the sizes dict to normalize_axes which will extract keys + axes = nd2_file.sizes # Extract pixel size from metadata try: @@ -1295,7 +1370,7 @@ def open( path=path, shape=tuple(darr.shape), dtype=darr.dtype, - axes=axes, + axes=_normalize_axes(axes), pixel_size=pixel_size, metadata=metadata_dict, ) @@ -1308,7 +1383,7 @@ def open( path=path, shape=tuple(arr.shape), dtype=arr.dtype, - axes=axes, + axes=_normalize_axes(axes), pixel_size=pixel_size, metadata=metadata_dict, ) From 0f1cd7541318625d88b9f47e1e9b4fc275513303 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 3 Jan 2026 11:45:35 +0000 Subject: [PATCH 06/12] Address code review: add nd2 import guard and fix empty list checks Co-authored-by: AdvancedImagingUTSW <42547789+AdvancedImagingUTSW@users.noreply.github.com> --- src/clearex/io/read.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/clearex/io/read.py b/src/clearex/io/read.py index 9f0081f..5f978e5 100644 --- a/src/clearex/io/read.py +++ b/src/clearex/io/read.py @@ -38,7 +38,6 @@ import tifffile import zarr import h5py -import nd2 from numpy.typing import NDArray # Try to import ome-types for OME-TIFF metadata parsing @@ -48,6 +47,13 @@ except ImportError: HAS_OME_TYPES = False +# Try to import nd2 for ND2 file support +try: + import nd2 + HAS_ND2 = True +except ImportError: + HAS_ND2 = False + # Local Imports ArrayLike = Union[NDArray[Any], da.Array] @@ -118,10 +124,12 @@ def _normalize_axes(axes: Any) -> Optional[list]: try: # Handle string format (e.g., "TCZYX") if isinstance(axes, str): - return [ax.lower() for ax in axes] + return [ax.lower() for ax in axes] if axes else None # Handle list of dicts (OME-NGFF format) if isinstance(axes, list): + if not axes: # Empty list + return None if all(isinstance(item, dict) and "name" in item for item in axes): return [item["name"].lower() for item in axes] # Handle simple list of strings @@ -130,7 +138,7 @@ def _normalize_axes(axes: Any) -> Optional[list]: # Handle dict format (e.g., sizes dict from ND2) if isinstance(axes, dict): - return [key.lower() for key in axes.keys()] + return [key.lower() for key in axes.keys()] if axes else None except Exception: pass @@ -1328,6 +1336,12 @@ def open( >>> print(darr.chunksize) (1, 512, 512) """ + + if not HAS_ND2: + raise ImportError( + "The 'nd2' library is required to read ND2 files. " + "Install it with: pip install nd2" + ) with nd2.ND2File(str(path), **kwargs) as nd2_file: # Extract metadata From 62b8f47681934e7b4630a8b4cc0faf65db0ad2a0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 3 Jan 2026 11:48:01 +0000 Subject: [PATCH 07/12] Improve type annotations for axes field (Optional[List[str]]) Co-authored-by: AdvancedImagingUTSW <42547789+AdvancedImagingUTSW@users.noreply.github.com> --- src/clearex/io/read.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/clearex/io/read.py b/src/clearex/io/read.py index 5f978e5..e72eaa4 100644 --- a/src/clearex/io/read.py +++ b/src/clearex/io/read.py @@ -29,7 +29,7 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from pathlib import Path -from typing import Any, Dict, Iterable, Optional, Tuple, Type, Union +from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, Union import logging # Third Party Imports @@ -90,7 +90,7 @@ def _ensure_tuple(value: Any) -> Optional[Tuple[float, ...]]: return None -def _normalize_axes(axes: Any) -> Optional[list]: +def _normalize_axes(axes: Any) -> Optional[List[str]]: """Normalize axis information to OME-NGFF compatible list format. Converts various axis representations to a standardized list format @@ -218,7 +218,7 @@ class ImageInfo: path: Path shape: Tuple[int, ...] dtype: Any - axes: Optional[list] = None + axes: Optional[List[str]] = None pixel_size: Optional[Tuple[float, ...]] = None metadata: Optional[Dict[str, Any]] = None From 5369549b7d57600b043b6ad1154bf1eeec86be66 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 3 Jan 2026 12:29:40 +0000 Subject: [PATCH 08/12] Add ome-types dependency and fix pixel_size ordering to ZYX convention Co-authored-by: AdvancedImagingUTSW <42547789+AdvancedImagingUTSW@users.noreply.github.com> --- pyproject.toml | 1 + src/clearex/io/read.py | 26 +++++++++++++++----------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8a93783..912cc4c 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ dependencies = [ "napari[all]>0.6.1", "nd2", "neuroglancer>=2.40.1,<3.0.0", + "ome-types", "opencv-python", "pandas>=2.3.3", "pywavelets", diff --git a/src/clearex/io/read.py b/src/clearex/io/read.py index e72eaa4..6f0ae40 100644 --- a/src/clearex/io/read.py +++ b/src/clearex/io/read.py @@ -497,12 +497,12 @@ def open( size_y = getattr(pixels, 'physical_size_y', None) size_z = getattr(pixels, 'physical_size_z', None) - # Build pixel_size tuple based on available dimensions + # Build pixel_size tuple in ZYX order to match axes convention if size_x is not None and size_y is not None: if size_z is not None: - pixel_size = (size_x, size_y, size_z) + pixel_size = (size_z, size_y, size_x) else: - pixel_size = (size_x, size_y) + pixel_size = (size_y, size_x) except Exception: # If OME parsing fails, pixel_size remains None pass @@ -526,12 +526,12 @@ def open( # pixels per inch -> um per pixel x_um = 25400.0 / x_val # 1 inch = 25400 um y_um = 25400.0 / y_val - pixel_size = (x_um, y_um) + pixel_size = (y_um, x_um) # YX order elif res_unit.value == 3: # centimeter # pixels per cm -> um per pixel x_um = 10000.0 / x_val # 1 cm = 10000 um y_um = 10000.0 / y_val - pixel_size = (x_um, y_um) + pixel_size = (y_um, x_um) # YX order except Exception: # If standard TIFF tag parsing fails, pixel_size remains None pass @@ -1236,7 +1236,8 @@ class ND2Reader(Reader): ND2 files without loading them entirely into memory. Pixel size information is extracted from the Volume metadata and stored - in the ImageInfo as (X, Y, Z) calibration values in micrometers. + in the ImageInfo as (Z, Y, X) calibration values in micrometers, reordered + from the ND2 native (X, Y, Z) format to match our axes convention. Examples -------- @@ -1246,7 +1247,7 @@ class ND2Reader(Reader): >>> print(arr.shape) (10, 512, 512) >>> print(info.pixel_size) - (0.65, 0.65, 2.0) + (2.0, 0.65, 0.65) # Z, Y, X in micrometers >>> # For large files, use Dask >>> darr, info = reader.open(Path("large.nd2"), prefer_dask=True) @@ -1304,8 +1305,9 @@ def open( Notes ----- The reader extracts pixel/voxel sizes from the ND2 metadata's Volume - section. These are stored as (X, Y, Z) calibration values in micrometers - in the `pixel_size` field of ImageInfo. + section. The native ND2 format stores calibration as (X, Y, Z), but + these are reordered to (Z, Y, X) to match our axes convention and + stored in the `pixel_size` field of ImageInfo. Axes information is extracted from the ND2 file's dimension names and converted to a string format (e.g., "TCZYX"). @@ -1320,7 +1322,7 @@ def open( >>> print(f"Shape: {info.shape}, dtype: {info.dtype}") Shape: (5, 512, 512), dtype: uint16 >>> print(f"Pixel size: {info.pixel_size}") - Pixel size: (0.325, 0.325, 1.0) + Pixel size: (1.0, 0.325, 0.325) # Z, Y, X in micrometers >>> # Load an ND2 as a Dask array >>> darr, info = reader.open(Path("large.nd2"), prefer_dask=True) @@ -1361,7 +1363,9 @@ def open( channel = nd2_file.metadata.channels[0] if channel.volume and channel.volume.axesCalibration: # axesCalibration is (X, Y, Z) in micrometers - pixel_size = channel.volume.axesCalibration + # Reorder to (Z, Y, X) to match our axes convention + x, y, z = channel.volume.axesCalibration + pixel_size = (z, y, x) except (AttributeError, IndexError, TypeError): # If metadata extraction fails, pixel_size remains None pass From d5c1161e332c42fb17b19c65583189d96f681f95 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 3 Jan 2026 12:31:42 +0000 Subject: [PATCH 09/12] Handle 2D and 3D ND2 calibration values safely Co-authored-by: AdvancedImagingUTSW <42547789+AdvancedImagingUTSW@users.noreply.github.com> --- src/clearex/io/read.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/clearex/io/read.py b/src/clearex/io/read.py index 6f0ae40..897c311 100644 --- a/src/clearex/io/read.py +++ b/src/clearex/io/read.py @@ -1362,11 +1362,19 @@ def open( # Get the first channel's volume information channel = nd2_file.metadata.channels[0] if channel.volume and channel.volume.axesCalibration: - # axesCalibration is (X, Y, Z) in micrometers + # axesCalibration is typically (X, Y, Z) in micrometers # Reorder to (Z, Y, X) to match our axes convention - x, y, z = channel.volume.axesCalibration - pixel_size = (z, y, x) - except (AttributeError, IndexError, TypeError): + calib = channel.volume.axesCalibration + if len(calib) == 3: + x, y, z = calib + pixel_size = (z, y, x) + elif len(calib) == 2: + x, y = calib + pixel_size = (y, x) + else: + # For other cases, use as-is + pixel_size = tuple(calib) + except (AttributeError, IndexError, TypeError, ValueError): # If metadata extraction fails, pixel_size remains None pass From 9397b74cab0c36a852946f125c4f1ee36945e71c Mon Sep 17 00:00:00 2001 From: Kevin Dean Date: Sat, 3 Jan 2026 08:40:19 -0600 Subject: [PATCH 10/12] Add ND2 image frame renumbering and export functions; enhance TIFF metadata parsing --- examples/notebooks/loading_data.ipynb | 242 ++++++++++++++++++++++++++ pyproject.toml | 1 + src/clearex/io/read.py | 100 ++++++++--- src/clearex/visualization/export.py | 126 ++++++++++++++ tests/io/test_read.py | 53 ++++++ uv.lock | 111 +++++++++++- 6 files changed, 600 insertions(+), 33 deletions(-) create mode 100644 examples/notebooks/loading_data.ipynb create mode 100644 src/clearex/visualization/export.py diff --git a/examples/notebooks/loading_data.ipynb b/examples/notebooks/loading_data.ipynb new file mode 100644 index 0000000..601014f --- /dev/null +++ b/examples/notebooks/loading_data.ipynb @@ -0,0 +1,242 @@ +{ + "cells": [ + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Examples for loading data using the ImageOpener class\n", + "\n", + "**Note:** If you've made code changes to clearex, run the next cell to reload the module:" + ], + "id": "75ca24c94a6de263" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-03T14:37:37.071704943Z", + "start_time": "2026-01-03T14:37:37.013361003Z" + } + }, + "cell_type": "code", + "source": [ + "# OPTIONAL: Reload module to pick up code changes\n", + "# Only run this if you've edited the clearex source code\n", + "import importlib\n", + "import clearex.io.read\n", + "importlib.reload(clearex.io.read)" + ], + "id": "35a13da2a9189a62", + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 4 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-03T14:37:38.589824061Z", + "start_time": "2026-01-03T14:37:38.579088264Z" + } + }, + "cell_type": "code", + "source": [ + "from clearex.io.read import ImageOpener\n", + "\n", + "reader = ImageOpener()" + ], + "id": "2d718ad83b4fbd2b", + "outputs": [], + "execution_count": 5 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-03T14:38:12.799913710Z", + "start_time": "2026-01-03T14:37:46.029633536Z" + } + }, + "cell_type": "code", + "source": [ + "# ND2 file example\n", + "nd2_path = \"/archive/bioinformatics/Danuser_lab/Dean/Seweryn/SoRa/s_green_EEA1/1_non_expanded/full_large_8_sytox_EEA1_002.nd2\"\n", + "data, info = reader.open(nd2_path)\n", + "print(f\"ND2 pixel size: {info.pixel_size}\") # (0.9, 0.325, 0.325) µm" + ], + "id": "ee2276296eca1b46", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ND2 pixel size: (0.9, 0.325, 0.325)\n" + ] + } + ], + "execution_count": 7 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-03T14:37:43.009163248Z", + "start_time": "2026-01-03T14:37:42.412701051Z" + } + }, + "cell_type": "code", + "source": [ + "# TIFF file example - extracts Z spacing from ImageDescription JSON\n", + "tiff_path = \"/archive/bioinformatics/Danuser_lab/Dean/dean/2024-10-18-yuanyuan/38_100umrange_0.2umstep_Cell_004/CH00_000000.tiff\"\n", + "data_tiff, info_tiff = reader.open(tiff_path)\n", + "print(f\"TIFF pixel size: {info_tiff.pixel_size}\") # (0.2, 0.167, 0.167) µm" + ], + "id": "5a95ea350ed12536", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TIFF pixel size: (0.2, 0.1670001075060573, 0.1670001075060573)\n" + ] + } + ], + "execution_count": 6 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-03T14:38:37.480737331Z", + "start_time": "2026-01-03T14:38:37.063030889Z" + } + }, + "cell_type": "code", + "source": [ + "n5_path = \"/archive/bioinformatics/Danuser_lab/Dean/dean/2024-11-26-yuanyuan/fused.n5\"\n", + "data_n5, info_n5 = reader.open(n5_path)\n", + "print(info_n5.pixel_size)" + ], + "id": "76cf6052bd3fca69", + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/project/bioinformatics/Danuser_lab/Dean/dean/git/clearex/.venv/lib/python3.13/site-packages/zarr/n5.py:77: FutureWarning: The N5Store is deprecated and will be removed in a Zarr-Python version 3, see https://github.com/zarr-developers/zarr-python/issues/1274 for more information.\n", + " super().__init__(*args, **kwargs)\n", + "/project/bioinformatics/Danuser_lab/Dean/dean/git/clearex/.venv/lib/python3.13/site-packages/zarr/storage.py:179: FutureWarning: The N5Store is deprecated and will be removed in a Zarr-Python version 3, see https://github.com/zarr-developers/zarr-python/issues/1274 and https://github.com/zarr-developers/n5py for more information.\n", + " return N5Store(store)\n" + ] + }, + { + "ename": "ValueError", + "evalue": "('No suitable reader found for:', PosixPath('/archive/bioinformatics/Danuser_lab/Dean/dean/2024-11-26-yuanyuan/fused.n5'))", + "output_type": "error", + "traceback": [ + "\u001B[31m---------------------------------------------------------------------------\u001B[39m", + "\u001B[31mValueError\u001B[39m Traceback (most recent call last)", + "\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[8]\u001B[39m\u001B[32m, line 2\u001B[39m\n\u001B[32m 1\u001B[39m n5_path = \u001B[33m\"\u001B[39m\u001B[33m/archive/bioinformatics/Danuser_lab/Dean/dean/2024-11-26-yuanyuan/fused.n5\u001B[39m\u001B[33m\"\u001B[39m\n\u001B[32m----> \u001B[39m\u001B[32m2\u001B[39m data_n5, info_n5 = \u001B[43mreader\u001B[49m\u001B[43m.\u001B[49m\u001B[43mopen\u001B[49m\u001B[43m(\u001B[49m\u001B[43mn5_path\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 3\u001B[39m \u001B[38;5;28mprint\u001B[39m(info_n5.pixel_size)\n", + "\u001B[36mFile \u001B[39m\u001B[32m/project/bioinformatics/Danuser_lab/Dean/dean/git/clearex/src/clearex/io/read.py:1683\u001B[39m, in \u001B[36mImageOpener.open\u001B[39m\u001B[34m(self, path, prefer_dask, chunks, **kwargs)\u001B[39m\n\u001B[32m 1680\u001B[39m \u001B[38;5;28;01mcontinue\u001B[39;00m\n\u001B[32m 1682\u001B[39m logger.error(msg=\u001B[33mf\u001B[39m\u001B[33m\"\u001B[39m\u001B[33mNo suitable reader found for \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mp\u001B[38;5;132;01m}\u001B[39;00m\u001B[33m\"\u001B[39m)\n\u001B[32m-> \u001B[39m\u001B[32m1683\u001B[39m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[33m\"\u001B[39m\u001B[33mNo suitable reader found for:\u001B[39m\u001B[33m\"\u001B[39m, p)\n", + "\u001B[31mValueError\u001B[39m: ('No suitable reader found for:', PosixPath('/archive/bioinformatics/Danuser_lab/Dean/dean/2024-11-26-yuanyuan/fused.n5'))" + ] + } + ], + "execution_count": 8 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-03T12:22:14.605995096Z", + "start_time": "2026-01-03T12:18:47.636065786Z" + } + }, + "cell_type": "code", + "source": [ + "h5_path = \"/archive/bioinformatics/Danuser_lab/Dean/dean/2023-11-13-Nicole/488myosin_561nuclear_647rfp/2023-11-08/Cell_001/CH00_000000.h5\"\n", + "data_h5, info_h5 = reader.open(h5_path)\n", + "info_h5.pixel_size" + ], + "id": "609d1ecb0dd284dc", + "outputs": [ + { + "ename": "ValueError", + "evalue": "('No suitable reader found for:', PosixPath('/archive/bioinformatics/Danuser_lab/Dean/dean/2023-11-13-Nicole/488myosin_561nuclear_647rfp/2023-11-08/Cell_001/CH00_000000.h5'))", + "output_type": "error", + "traceback": [ + "\u001B[31m---------------------------------------------------------------------------\u001B[39m", + "\u001B[31mValueError\u001B[39m Traceback (most recent call last)", + "\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[12]\u001B[39m\u001B[32m, line 2\u001B[39m\n\u001B[32m 1\u001B[39m h5_path = \u001B[33m\"\u001B[39m\u001B[33m/archive/bioinformatics/Danuser_lab/Dean/dean/2023-11-13-Nicole/488myosin_561nuclear_647rfp/2023-11-08/Cell_001/CH00_000000.h5\u001B[39m\u001B[33m\"\u001B[39m\n\u001B[32m----> \u001B[39m\u001B[32m2\u001B[39m data_h5, info_h5 = \u001B[43mreader\u001B[49m\u001B[43m.\u001B[49m\u001B[43mopen\u001B[49m\u001B[43m(\u001B[49m\u001B[43mh5_path\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 3\u001B[39m info_h5.pixel_size\n", + "\u001B[36mFile \u001B[39m\u001B[32m/project/bioinformatics/Danuser_lab/Dean/dean/git/clearex/src/clearex/io/read.py:1621\u001B[39m, in \u001B[36mImageOpener.open\u001B[39m\u001B[34m(self, path, prefer_dask, chunks, **kwargs)\u001B[39m\n\u001B[32m 1618\u001B[39m \u001B[38;5;28;01mcontinue\u001B[39;00m\n\u001B[32m 1620\u001B[39m logger.error(msg=\u001B[33mf\u001B[39m\u001B[33m\"\u001B[39m\u001B[33mNo suitable reader found for \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mp\u001B[38;5;132;01m}\u001B[39;00m\u001B[33m\"\u001B[39m)\n\u001B[32m-> \u001B[39m\u001B[32m1621\u001B[39m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[33m\"\u001B[39m\u001B[33mNo suitable reader found for:\u001B[39m\u001B[33m\"\u001B[39m, p)\n", + "\u001B[31mValueError\u001B[39m: ('No suitable reader found for:', PosixPath('/archive/bioinformatics/Danuser_lab/Dean/dean/2023-11-13-Nicole/488myosin_561nuclear_647rfp/2023-11-08/Cell_001/CH00_000000.h5'))" + ] + } + ], + "execution_count": 12 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-03T12:23:31.758345762Z", + "start_time": "2026-01-03T12:23:31.694648815Z" + } + }, + "cell_type": "code", + "source": [ + "zarr_path = \"/archive/bioinformatics/Danuser_lab/Dean/dean/2024-05-21-tiling/cell5_fused_tp_0_ch_0.zarr\"\n", + "data_zarr, info_zarr = reader.open(zarr_path)\n", + "info_zarr.pixel_size" + ], + "id": "75008c08db1cd093", + "outputs": [ + { + "ename": "ValueError", + "evalue": "('No suitable reader found for:', PosixPath('/archive/bioinformatics/Danuser_lab/Dean/dean/2024-05-21-tiling/cell5_fused_tp_0_ch_0.zarr'))", + "output_type": "error", + "traceback": [ + "\u001B[31m---------------------------------------------------------------------------\u001B[39m", + "\u001B[31mValueError\u001B[39m Traceback (most recent call last)", + "\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[13]\u001B[39m\u001B[32m, line 2\u001B[39m\n\u001B[32m 1\u001B[39m zarr_path = \u001B[33m\"\u001B[39m\u001B[33m/archive/bioinformatics/Danuser_lab/Dean/dean/2024-05-21-tiling/cell5_fused_tp_0_ch_0.zarr\u001B[39m\u001B[33m\"\u001B[39m\n\u001B[32m----> \u001B[39m\u001B[32m2\u001B[39m data_zarr, info_zarr = \u001B[43mreader\u001B[49m\u001B[43m.\u001B[49m\u001B[43mopen\u001B[49m\u001B[43m(\u001B[49m\u001B[43mzarr_path\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 3\u001B[39m info_zarr.pixel_size\n", + "\u001B[36mFile \u001B[39m\u001B[32m/project/bioinformatics/Danuser_lab/Dean/dean/git/clearex/src/clearex/io/read.py:1621\u001B[39m, in \u001B[36mImageOpener.open\u001B[39m\u001B[34m(self, path, prefer_dask, chunks, **kwargs)\u001B[39m\n\u001B[32m 1618\u001B[39m \u001B[38;5;28;01mcontinue\u001B[39;00m\n\u001B[32m 1620\u001B[39m logger.error(msg=\u001B[33mf\u001B[39m\u001B[33m\"\u001B[39m\u001B[33mNo suitable reader found for \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mp\u001B[38;5;132;01m}\u001B[39;00m\u001B[33m\"\u001B[39m)\n\u001B[32m-> \u001B[39m\u001B[32m1621\u001B[39m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[33m\"\u001B[39m\u001B[33mNo suitable reader found for:\u001B[39m\u001B[33m\"\u001B[39m, p)\n", + "\u001B[31mValueError\u001B[39m: ('No suitable reader found for:', PosixPath('/archive/bioinformatics/Danuser_lab/Dean/dean/2024-05-21-tiling/cell5_fused_tp_0_ch_0.zarr'))" + ] + } + ], + "execution_count": 13 + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": "", + "id": "de201bd830e2a842" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pyproject.toml b/pyproject.toml index 912cc4c..3e70819 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,7 @@ dependencies = [ "ome-types", "opencv-python", "pandas>=2.3.3", + "pytest>=8.4.2", "pywavelets", "scikit-image", "scipy", diff --git a/src/clearex/io/read.py b/src/clearex/io/read.py index 897c311..f2db507 100644 --- a/src/clearex/io/read.py +++ b/src/clearex/io/read.py @@ -476,39 +476,65 @@ def open( # Try OME-axes, pixel size, and metadata with tifffile.TiffFile(str(path)) as tf: - ome_meta = getattr(tf, "omexml", None) + import json axes = None pixel_size = None - + size_x = None + size_y = None + size_z = None + + # Method 1: Try to extract from ImageDescription tag (may contain JSON dict with spacing) + try: + if tf.pages and "ImageDescription" in tf.pages[0].tags: + desc = tf.pages[0].tags["ImageDescription"].value + + # If desc is already a dict (tifffile auto-parsed it) + if isinstance(desc, dict): + if 'spacing' in desc: + unit = desc.get('unit', 'um') + if unit in ('um', 'µm'): + size_z = desc['spacing'] + # If desc is a string, try to parse as JSON + elif isinstance(desc, (str, bytes)): + if isinstance(desc, bytes): + desc = desc.decode("utf-8", errors="ignore") + # Check if it looks like JSON (not OME-XML) + if desc.strip().startswith('{'): + try: + desc_data = json.loads(desc) + if 'spacing' in desc_data: + unit = desc_data.get('unit', 'um') + if unit in ('um', 'µm'): + size_z = desc_data['spacing'] + except (json.JSONDecodeError, ValueError): + pass + except Exception: + pass + + # Method 2: Try OME metadata for axes and pixel sizes + ome_meta = getattr(tf, "ome_metadata", None) if ome_meta is not None: try: axes = ome_meta.image().pixels().DimensionOrder # e.g., "TCZYX" except Exception: axes = None - # Try to extract pixel size from OME metadata + # Try to extract X, Y, Z pixel sizes from OME metadata using ome-types if HAS_OME_TYPES and hasattr(tf, 'ome_metadata') and tf.ome_metadata: try: ome = from_xml(tf.ome_metadata) if ome.images and ome.images[0].pixels: pixels = ome.images[0].pixels - # Extract physical sizes (in micrometers by default in OME) size_x = getattr(pixels, 'physical_size_x', None) size_y = getattr(pixels, 'physical_size_y', None) - size_z = getattr(pixels, 'physical_size_z', None) - - # Build pixel_size tuple in ZYX order to match axes convention - if size_x is not None and size_y is not None: - if size_z is not None: - pixel_size = (size_z, size_y, size_x) - else: - pixel_size = (size_y, size_x) + # Only use PhysicalSizeZ if we didn't get it from Description + if size_z is None: + size_z = getattr(pixels, 'physical_size_z', None) except Exception: - # If OME parsing fails, pixel_size remains None pass - - # Fallback: try to extract from standard TIFF resolution tags - if pixel_size is None: + + # Method 3: Fallback to standard TIFF resolution tags for X and Y if not found + if size_x is None or size_y is None: try: page = tf.pages[0] x_res = page.tags.get('XResolution') @@ -523,19 +549,21 @@ def open( # Convert to micrometers per pixel based on unit # Resolution unit: 1=none, 2=inch, 3=centimeter if res_unit.value == 2: # inch - # pixels per inch -> um per pixel - x_um = 25400.0 / x_val # 1 inch = 25400 um - y_um = 25400.0 / y_val - pixel_size = (y_um, x_um) # YX order + size_x = 25400.0 / x_val # 1 inch = 25400 um + size_y = 25400.0 / y_val elif res_unit.value == 3: # centimeter - # pixels per cm -> um per pixel - x_um = 10000.0 / x_val # 1 cm = 10000 um - y_um = 10000.0 / y_val - pixel_size = (y_um, x_um) # YX order + size_x = 10000.0 / x_val # 1 cm = 10000 um + size_y = 10000.0 / y_val except Exception: - # If standard TIFF tag parsing fails, pixel_size remains None pass + # Build pixel_size tuple in ZYX order to match axes convention + if size_x is not None and size_y is not None: + if size_z is not None: + pixel_size = (size_z, size_y, size_x) + else: + pixel_size = (size_y, size_x) + if prefer_dask: # Option A: use tifffile's OME-as-zarr path if possible # This keeps it lazy and chunked without loading into RAM @@ -565,6 +593,28 @@ def open( logger.info(f"Loaded {path.name} as NumPy array.") return arr, info + @staticmethod + def find_ome(path, max_pages=None): + with tifffile.TiffFile(str(path)) as tf: + n = len(tf.pages) + lim = n if max_pages is None else min(n, max_pages) + + for idx in range(lim): + p = tf.pages[idx] + if "ImageDescription" not in p.tags: + continue + desc = p.tags["ImageDescription"].value + if isinstance(desc, bytes): + desc = desc.decode("utf-8", errors="ignore") + + print("Desc", desc) + if " + + + {json.dumps({"spacing": 0.2, "unit": "um", "axes": "ZYX", "shape": [10, 256, 256]})} + + + + + + +''' + + # Write TIFF with OME metadata + tifffile.imwrite(str(tiff_path), arr, metadata={'axes': 'TYX'}, description=ome_metadata) + + # Open and check that pixel_size includes Z spacing from Description + arr_out, info = tiff_reader.open(tiff_path) + + assert isinstance(arr_out, np.ndarray) + assert arr_out.shape == arr.shape + assert info.pixel_size is not None + + # Check that pixel_size tuple contains Z, Y, X values + # Z spacing should be 0.2 from Description, Y and X should be 0.167 from PhysicalSize + assert len(info.pixel_size) == 3 + assert abs(info.pixel_size[0] - 0.2) < 1e-6 # Z spacing from Description + assert abs(info.pixel_size[1] - 0.167) < 1e-3 # Y spacing from PhysicalSizeY + assert abs(info.pixel_size[2] - 0.167) < 1e-3 # X spacing from PhysicalSizeX + # ============================================================================= # Test ZarrReader Class diff --git a/uv.lock b/uv.lock index 1fdbf67..12592ad 100644 --- a/uv.lock +++ b/uv.lock @@ -660,10 +660,12 @@ dependencies = [ { name = "jupyterlab" }, { name = "matplotlib" }, { name = "napari", extra = ["all"] }, + { name = "nd2" }, { name = "neuroglancer" }, + { name = "ome-types" }, { name = "opencv-python" }, { name = "pandas" }, - { name = "pypetakit5d" }, + { name = "pytest" }, { name = "pywavelets" }, { name = "scikit-image" }, { name = "scipy" }, @@ -673,6 +675,9 @@ dependencies = [ ] [package.optional-dependencies] +decon = [ + { name = "pypetakit5d" }, +] dev = [ { name = "black" }, { name = "pandas-stubs" }, @@ -712,14 +717,17 @@ requires-dist = [ { name = "matplotlib" }, { name = "napari", extras = ["all"], specifier = ">0.6.1" }, { name = "nbconvert", marker = "extra == 'docs'" }, + { name = "nd2" }, { name = "neuroglancer", specifier = ">=2.40.1,<3.0.0" }, { name = "numpydoc", marker = "extra == 'docs'" }, + { name = "ome-types" }, { name = "opencv-python" }, { name = "pandas", specifier = ">=2.3.3" }, { name = "pandas-stubs", marker = "extra == 'dev'", specifier = "~=2.3.3" }, { name = "pre-commit", marker = "extra == 'dev'" }, { name = "pydata-sphinx-theme", marker = "extra == 'docs'" }, - { name = "pypetakit5d" }, + { name = "pypetakit5d", marker = "extra == 'decon'" }, + { name = "pytest", specifier = ">=8.4.2" }, { name = "pytest", marker = "extra == 'dev'" }, { name = "pytest-cov", marker = "extra == 'dev'" }, { name = "pytest-xvfb", marker = "extra == 'dev'" }, @@ -728,7 +736,7 @@ requires-dist = [ { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.14.8" }, { name = "scikit-image" }, { name = "scipy" }, - { name = "scipy-stubs", marker = "extra == 'dev'", specifier = "~=1.15.3" }, + { name = "scipy-stubs", marker = "extra == 'dev'", specifier = "~=1.16.3" }, { name = "seaborn" }, { name = "sphinx", marker = "extra == 'docs'" }, { name = "sphinx-copybutton", marker = "extra == 'docs'" }, @@ -740,7 +748,7 @@ requires-dist = [ { name = "ty", marker = "extra == 'dev'", specifier = ">=0.0.1a32" }, { name = "zarr", specifier = "<3.0" }, ] -provides-extras = ["dev", "docs"] +provides-extras = ["decon", "dev", "docs"] [[package]] name = "click" @@ -2552,6 +2560,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a9/82/0340caa499416c78e5d8f5f05947ae4bc3cba53c9f038ab6e9ed964e22f1/nbformat-5.10.4-py3-none-any.whl", hash = "sha256:3b48d6c8fbca4b299bf3982ea7db1af21580e4fec269ad087b9e81588891200b", size = 78454, upload-time = "2024-04-04T11:20:34.895Z" }, ] +[[package]] +name = "nd2" +version = "0.10.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "ome-types" }, + { name = "pydantic-core", marker = "python_full_version >= '3.13'" }, + { name = "resource-backed-dask-array" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9a/19/99919c3d702b050ca1527020dd50ed5b39ecfec191f2424201946de27ea0/nd2-0.10.4.tar.gz", hash = "sha256:cce1abfb4e0ae31bc307b5f49b8c514190e29105af52c6d59de4105f9aade70a", size = 858647, upload-time = "2025-10-23T13:14:48.808Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/15/0f/9b9cd1e74849e75b4e4df3337232c084b99e2d3e2916fbc87276d2e49bea/nd2-0.10.4-py3-none-any.whl", hash = "sha256:1cd0f07d1cc9d2eb2caf19047884c7c9cbfb0a1f1701099ffd32fe4a8de377f5", size = 83673, upload-time = "2025-10-23T13:14:47.274Z" }, +] + [[package]] name = "nest-asyncio" version = "1.6.0" @@ -2713,6 +2737,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/67/0e/35082d13c09c02c011cf21570543d202ad929d961c02a147493cb0c2bdf5/numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06", size = 12771374, upload-time = "2025-05-17T21:43:35.479Z" }, ] +[[package]] +name = "numpy-typing-compat" +version = "20250818.2.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d5/2c/6312cfc304b141bf9fa25a73dcd37d74a96cf411f0aa0f6df11f83e0b0ee/numpy_typing_compat-20250818.2.2.tar.gz", hash = "sha256:84f50c86908bf796857180856f1acb7da3c5bf22f461558de1cd225128c028ba", size = 4981, upload-time = "2025-08-18T23:46:42.17Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/36/4093816a4614df4d99ba71010736f1122acc2dbae316373575f17aaa2f8e/numpy_typing_compat-20250818.2.2-py3-none-any.whl", hash = "sha256:8b6c551952fd46e887ee905e75b6e4977d97defe1c63ae1b516343e9913e1534", size = 6292, upload-time = "2025-08-18T23:46:34.175Z" }, +] + [[package]] name = "numpydoc" version = "1.9.0" @@ -2741,6 +2777,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/95/a9/4f25a14d23f0786b64875b91784607c2277eff25d48f915e39ff0cff505a/oauth2client-4.1.3-py2.py3-none-any.whl", hash = "sha256:b8a81cc5d60e2d364f0b1b98f958dbd472887acaf1a5b05e21c28c31a2d6d3ac", size = 98206, upload-time = "2018-09-07T21:38:16.742Z" }, ] +[[package]] +name = "ome-types" +version = "0.6.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "pydantic-core", marker = "python_full_version >= '3.13'" }, + { name = "pydantic-extra-types" }, + { name = "xsdata" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/48/4c/d252c1619c733eec9b4d2d21fe369fd21a2594954b396bf4352edea1e272/ome_types-0.6.3.tar.gz", hash = "sha256:eef4138cda5edfdcb2a44cfb90b714a59ead1b69e4c5ce5f9892ad397ccaaa68", size = 121784, upload-time = "2025-11-26T00:28:24.34Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/6a/1000cad1700ab0af4d1b1d0a9c23c34badddb4f547c008bde2a6c61968f1/ome_types-0.6.3-py3-none-any.whl", hash = "sha256:ce9753ff351bbc534ee5c5038d3cf60b1e4c13d69ad2e6b5a5b75de2a52521a5", size = 245802, upload-time = "2025-11-26T00:28:22.853Z" }, +] + [[package]] name = "opencv-python" version = "4.12.0.88" @@ -2770,6 +2821,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/84/a6/11b0eb65eeafa87260d36858b69ec4e0072d09e37ea6714280960030bc93/optype-0.14.0-py3-none-any.whl", hash = "sha256:50d02edafd04edf2e5e27d6249760a51b2198adb9f6ffd778030b3d2806b026b", size = 89465, upload-time = "2025-10-01T04:49:54.674Z" }, ] +[package.optional-dependencies] +numpy = [ + { name = "numpy" }, + { name = "numpy-typing-compat" }, +] + [[package]] name = "packaging" version = "25.0" @@ -3386,6 +3443,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" }, ] +[[package]] +name = "pydantic-extra-types" +version = "2.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fd/35/2fee58b1316a73e025728583d3b1447218a97e621933fc776fb8c0f2ebdd/pydantic_extra_types-2.11.0.tar.gz", hash = "sha256:4e9991959d045b75feb775683437a97991d02c138e00b59176571db9ce634f0e", size = 157226, upload-time = "2025-12-31T16:18:27.944Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fe/17/fabd56da47096d240dd45ba627bead0333b0cf0ee8ada9bec579287dadf3/pydantic_extra_types-2.11.0-py3-none-any.whl", hash = "sha256:84b864d250a0fc62535b7ec591e36f2c5b4d1325fa0017eb8cda9aeb63b374a6", size = 74296, upload-time = "2025-12-31T16:18:26.38Z" }, +] + [[package]] name = "pydata-sphinx-theme" version = "0.16.1" @@ -3817,6 +3887,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" }, ] +[[package]] +name = "resource-backed-dask-array" +version = "0.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "dask", extra = ["array"] }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/62/80/b8952048ae1772d33b95dbf7d7107cf364c037cc229a2690fc8fa9ee8e48/resource_backed_dask_array-0.1.0.tar.gz", hash = "sha256:8fabcccf5c7e29059b5badd6786dd7675a258a203c58babf10077d9c90ada54f", size = 10300, upload-time = "2022-02-18T02:10:06.981Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0d/b5/852f619e53fa7fb70d8915fcae66632df3958cac7e926c4ac38458958674/resource_backed_dask_array-0.1.0-py2.py3-none-any.whl", hash = "sha256:ec457fa72d81f0340a67ea6557a5a5919323a11cccc978a950df29fa69fe5679", size = 8044, upload-time = "2022-02-18T02:10:05.559Z" }, +] + [[package]] name = "rfc3339-validator" version = "0.1.4" @@ -4145,14 +4228,14 @@ wheels = [ [[package]] name = "scipy-stubs" -version = "1.15.3.0" +version = "1.16.3.3" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "optype" }, + { name = "optype", extra = ["numpy"] }, ] -sdist = { url = "https://files.pythonhosted.org/packages/0b/5f/35c43bd7d412add4adcd68475702571b2489b50c40b6564f808b2355e452/scipy_stubs-1.15.3.0.tar.gz", hash = "sha256:e8f76c9887461cf9424c1e2ad78ea5dac71dd4cbb383dc85f91adfe8f74d1e17", size = 275699, upload-time = "2025-05-08T16:58:35.139Z" } +sdist = { url = "https://files.pythonhosted.org/packages/08/91/1700d2a1a9f64f19bb019a547e510b99a6af1fef49641a0bce86bc85fb8e/scipy_stubs-1.16.3.3.tar.gz", hash = "sha256:af47578875d5557567225a16ec1b9b38a48c4c4377d92396413ebd65406c44ee", size = 361468, upload-time = "2025-12-08T13:45:38.37Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6c/42/cd8dc81f8060de1f14960885ad5b2d2651f41de8b93d09f3f919d6567a5a/scipy_stubs-1.15.3.0-py3-none-any.whl", hash = "sha256:a251254cf4fd6e7fb87c55c1feee92d32ddbc1f542ecdf6a0159cdb81c2fb62d", size = 459062, upload-time = "2025-05-08T16:58:33.356Z" }, + { url = "https://files.pythonhosted.org/packages/7c/e2/3b8826f281f59301e3284989b19cfc56fdccf799134c1befedd38482a23a/scipy_stubs-1.16.3.3-py3-none-any.whl", hash = "sha256:f6316b36cd0fb272c994ae5b10c4a73c644a7e156ed8d32bcd9c35303d0e1b7e", size = 561750, upload-time = "2025-12-08T13:45:36.568Z" }, ] [[package]] @@ -4927,6 +5010,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1f/f6/a933bd70f98e9cf3e08167fc5cd7aaaca49147e48411c0bd5ae701bb2194/wrapt-1.17.3-py3-none-any.whl", hash = "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22", size = 23591, upload-time = "2025-08-12T05:53:20.674Z" }, ] +[[package]] +name = "xsdata" +version = "25.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/50/cf/d393286e40f7574c5d662a3ceefcf8e4cd65e73af6e54db0585c5b17c541/xsdata-25.7.tar.gz", hash = "sha256:1291ef759f4663baadb86562be4c25ebfc0003ca0debae3042b0067663f0c548", size = 345469, upload-time = "2025-07-06T16:40:03.19Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/10/c866e7b0fd57c92a4d5676884b81383005d81f8d7f07f1ac17e9c0ab3643/xsdata-25.7-py3-none-any.whl", hash = "sha256:d50b8c39389fd2b7283767a68a80cbf3bc51a3ede9cc3fefb30e84a52c999a9d", size = 234469, upload-time = "2025-07-06T16:40:01.656Z" }, +] + [[package]] name = "yarl" version = "1.22.0" From 0fc00845cbae97e6e7d215bd38d461537f380057 Mon Sep 17 00:00:00 2001 From: Kevin Dean Date: Sun, 4 Jan 2026 12:11:59 -0600 Subject: [PATCH 11/12] Add N5Reader for N5 format files; enhance file opening and error handling --- examples/notebooks/loading_data.ipynb | 172 ++++++++++++------ src/clearex/io/read.py | 246 ++++++++++++++++++++++++-- 2 files changed, 345 insertions(+), 73 deletions(-) diff --git a/examples/notebooks/loading_data.ipynb b/examples/notebooks/loading_data.ipynb index 601014f..6860279 100644 --- a/examples/notebooks/loading_data.ipynb +++ b/examples/notebooks/loading_data.ipynb @@ -13,12 +13,14 @@ { "metadata": { "ExecuteTime": { - "end_time": "2026-01-03T14:37:37.071704943Z", - "start_time": "2026-01-03T14:37:37.013361003Z" + "end_time": "2026-01-04T13:08:43.391281834Z", + "start_time": "2026-01-04T13:08:41.874159158Z" } }, "cell_type": "code", "source": [ + "from pathlib import Path\n", + "\n", "# OPTIONAL: Reload module to pick up code changes\n", "# Only run this if you've edited the clearex source code\n", "import importlib\n", @@ -33,35 +35,94 @@ "" ] }, - "execution_count": 4, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], - "execution_count": 4 + "execution_count": 1 }, { "metadata": { "ExecuteTime": { - "end_time": "2026-01-03T14:37:38.589824061Z", - "start_time": "2026-01-03T14:37:38.579088264Z" + "end_time": "2026-01-04T13:01:14.348628027Z", + "start_time": "2026-01-04T13:01:14.343427809Z" } }, "cell_type": "code", "source": [ "from clearex.io.read import ImageOpener\n", - "\n", "reader = ImageOpener()" ], "id": "2d718ad83b4fbd2b", "outputs": [], - "execution_count": 5 + "execution_count": 8 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Troubleshooting cell - check if files exist and what type they are", + "id": "2e2fcb6884f2aa1f" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-04T12:54:20.103374315Z", + "start_time": "2026-01-04T12:54:20.084388894Z" + } + }, + "cell_type": "code", + "source": [ + "# Check file/directory status\n", + "paths_to_check = [\n", + " \"/archive/bioinformatics/Danuser_lab/Dean/dean/2024-11-26-yuanyuan/fused.n5\",\n", + " \"/archive/bioinformatics/Danuser_lab/Dean/dean/2023-11-13-Nicole/488myosin_561nuclear_647rfp/2023-11-08/Cell_001/CH00_000000.h5\",\n", + " \"/archive/bioinformatics/Danuser_lab/Dean/dean/2024-05-21-tiling/cell5_fused_tp_0_ch_0.zarr\",\n", + "]\n", + "\n", + "for path_str in paths_to_check:\n", + " p = Path(path_str)\n", + " print(f\"{p.name}:\")\n", + " print(f\" Exists: {p.exists()}\")\n", + " print(f\" Is file: {p.is_file()}\")\n", + " print(f\" Is dir: {p.is_dir()}\")\n", + " print(f\" Suffix: {p.suffix}\")\n", + " print()" + ], + "id": "4a116e8acf32c0c0", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fused.n5:\n", + " Exists: True\n", + " Is file: False\n", + " Is dir: True\n", + " Suffix: .n5\n", + "\n", + "CH00_000000.h5:\n", + " Exists: True\n", + " Is file: True\n", + " Is dir: False\n", + " Suffix: .h5\n", + "\n", + "cell5_fused_tp_0_ch_0.zarr:\n", + " Exists: True\n", + " Is file: False\n", + " Is dir: True\n", + " Suffix: .zarr\n", + "\n" + ] + } + ], + "execution_count": 3 }, { "metadata": { "ExecuteTime": { - "end_time": "2026-01-03T14:38:12.799913710Z", - "start_time": "2026-01-03T14:37:46.029633536Z" + "end_time": "2026-01-04T12:55:35.864905675Z", + "start_time": "2026-01-04T12:54:38.365907936Z" } }, "cell_type": "code", @@ -81,13 +142,13 @@ ] } ], - "execution_count": 7 + "execution_count": 4 }, { "metadata": { "ExecuteTime": { - "end_time": "2026-01-03T14:37:43.009163248Z", - "start_time": "2026-01-03T14:37:42.412701051Z" + "end_time": "2026-01-04T12:56:18.440127781Z", + "start_time": "2026-01-04T12:56:17.514223975Z" } }, "cell_type": "code", @@ -107,53 +168,44 @@ ] } ], - "execution_count": 6 + "execution_count": 5 }, { "metadata": { "ExecuteTime": { - "end_time": "2026-01-03T14:38:37.480737331Z", - "start_time": "2026-01-03T14:38:37.063030889Z" + "end_time": "2026-01-04T18:10:10.857155914Z", + "start_time": "2026-01-04T18:10:10.809669800Z" } }, "cell_type": "code", "source": [ - "n5_path = \"/archive/bioinformatics/Danuser_lab/Dean/dean/2024-11-26-yuanyuan/fused.n5\"\n", - "data_n5, info_n5 = reader.open(n5_path)\n", - "print(info_n5.pixel_size)" + "importlib.reload(clearex.io.read)\n", + "from clearex.io.read import ImageOpener\n", + "reader = ImageOpener()\n", + "n5_path = \"/archive/bioinformatics/Danuser_lab/Dean/dean/2024-11-26-yuanyuan/fused.n5/\"\n", + "data_n5, info_n5 = reader.open(n5_path, prefer_dask=True)\n", + "print(info_n5.pixel_size)\n", + "print(data_n5.shape)" ], "id": "76cf6052bd3fca69", "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "/project/bioinformatics/Danuser_lab/Dean/dean/git/clearex/.venv/lib/python3.13/site-packages/zarr/n5.py:77: FutureWarning: The N5Store is deprecated and will be removed in a Zarr-Python version 3, see https://github.com/zarr-developers/zarr-python/issues/1274 for more information.\n", - " super().__init__(*args, **kwargs)\n", - "/project/bioinformatics/Danuser_lab/Dean/dean/git/clearex/.venv/lib/python3.13/site-packages/zarr/storage.py:179: FutureWarning: The N5Store is deprecated and will be removed in a Zarr-Python version 3, see https://github.com/zarr-developers/zarr-python/issues/1274 and https://github.com/zarr-developers/n5py for more information.\n", - " return N5Store(store)\n" - ] - }, - { - "ename": "ValueError", - "evalue": "('No suitable reader found for:', PosixPath('/archive/bioinformatics/Danuser_lab/Dean/dean/2024-11-26-yuanyuan/fused.n5'))", - "output_type": "error", - "traceback": [ - "\u001B[31m---------------------------------------------------------------------------\u001B[39m", - "\u001B[31mValueError\u001B[39m Traceback (most recent call last)", - "\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[8]\u001B[39m\u001B[32m, line 2\u001B[39m\n\u001B[32m 1\u001B[39m n5_path = \u001B[33m\"\u001B[39m\u001B[33m/archive/bioinformatics/Danuser_lab/Dean/dean/2024-11-26-yuanyuan/fused.n5\u001B[39m\u001B[33m\"\u001B[39m\n\u001B[32m----> \u001B[39m\u001B[32m2\u001B[39m data_n5, info_n5 = \u001B[43mreader\u001B[49m\u001B[43m.\u001B[49m\u001B[43mopen\u001B[49m\u001B[43m(\u001B[49m\u001B[43mn5_path\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 3\u001B[39m \u001B[38;5;28mprint\u001B[39m(info_n5.pixel_size)\n", - "\u001B[36mFile \u001B[39m\u001B[32m/project/bioinformatics/Danuser_lab/Dean/dean/git/clearex/src/clearex/io/read.py:1683\u001B[39m, in \u001B[36mImageOpener.open\u001B[39m\u001B[34m(self, path, prefer_dask, chunks, **kwargs)\u001B[39m\n\u001B[32m 1680\u001B[39m \u001B[38;5;28;01mcontinue\u001B[39;00m\n\u001B[32m 1682\u001B[39m logger.error(msg=\u001B[33mf\u001B[39m\u001B[33m\"\u001B[39m\u001B[33mNo suitable reader found for \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mp\u001B[38;5;132;01m}\u001B[39;00m\u001B[33m\"\u001B[39m)\n\u001B[32m-> \u001B[39m\u001B[32m1683\u001B[39m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[33m\"\u001B[39m\u001B[33mNo suitable reader found for:\u001B[39m\u001B[33m\"\u001B[39m, p)\n", - "\u001B[31mValueError\u001B[39m: ('No suitable reader found for:', PosixPath('/archive/bioinformatics/Danuser_lab/Dean/dean/2024-11-26-yuanyuan/fused.n5'))" + "Reader N5Reader claims the file.\n", + "None\n", + "(1401, 13108, 13109)\n" ] } ], - "execution_count": 8 + "execution_count": 37 }, { "metadata": { "ExecuteTime": { - "end_time": "2026-01-03T12:22:14.605995096Z", - "start_time": "2026-01-03T12:18:47.636065786Z" + "end_time": "2026-01-04T18:11:23.676134437Z", + "start_time": "2026-01-04T18:11:17.158002498Z" } }, "cell_type": "code", @@ -164,50 +216,56 @@ ], "id": "609d1ecb0dd284dc", "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reader HDF5Reader claims the file.\n", + "Reader HDF5Reader failed to open the file: [Errno 13] Unable to synchronously open file (file read failed: time = Sun Jan 4 12:11:17 2026\n", + ", filename = '/archive/bioinformatics/Danuser_lab/Dean/dean/2023-11-13-Nicole/488myosin_561nuclear_647rfp/2023-11-08/Cell_001/CH00_000000.h5', file descriptor = 60, errno = 13, error message = 'Permission denied', buf = 0x7ffc76276f48, total read size = 8, bytes this sub-read = 8, offset = 0)\n" + ] + }, { "ename": "ValueError", - "evalue": "('No suitable reader found for:', PosixPath('/archive/bioinformatics/Danuser_lab/Dean/dean/2023-11-13-Nicole/488myosin_561nuclear_647rfp/2023-11-08/Cell_001/CH00_000000.h5'))", + "evalue": "No suitable reader found for: /archive/bioinformatics/Danuser_lab/Dean/dean/2023-11-13-Nicole/488myosin_561nuclear_647rfp/2023-11-08/Cell_001/CH00_000000.h5", "output_type": "error", "traceback": [ "\u001B[31m---------------------------------------------------------------------------\u001B[39m", "\u001B[31mValueError\u001B[39m Traceback (most recent call last)", - "\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[12]\u001B[39m\u001B[32m, line 2\u001B[39m\n\u001B[32m 1\u001B[39m h5_path = \u001B[33m\"\u001B[39m\u001B[33m/archive/bioinformatics/Danuser_lab/Dean/dean/2023-11-13-Nicole/488myosin_561nuclear_647rfp/2023-11-08/Cell_001/CH00_000000.h5\u001B[39m\u001B[33m\"\u001B[39m\n\u001B[32m----> \u001B[39m\u001B[32m2\u001B[39m data_h5, info_h5 = \u001B[43mreader\u001B[49m\u001B[43m.\u001B[49m\u001B[43mopen\u001B[49m\u001B[43m(\u001B[49m\u001B[43mh5_path\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 3\u001B[39m info_h5.pixel_size\n", - "\u001B[36mFile \u001B[39m\u001B[32m/project/bioinformatics/Danuser_lab/Dean/dean/git/clearex/src/clearex/io/read.py:1621\u001B[39m, in \u001B[36mImageOpener.open\u001B[39m\u001B[34m(self, path, prefer_dask, chunks, **kwargs)\u001B[39m\n\u001B[32m 1618\u001B[39m \u001B[38;5;28;01mcontinue\u001B[39;00m\n\u001B[32m 1620\u001B[39m logger.error(msg=\u001B[33mf\u001B[39m\u001B[33m\"\u001B[39m\u001B[33mNo suitable reader found for \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mp\u001B[38;5;132;01m}\u001B[39;00m\u001B[33m\"\u001B[39m)\n\u001B[32m-> \u001B[39m\u001B[32m1621\u001B[39m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[33m\"\u001B[39m\u001B[33mNo suitable reader found for:\u001B[39m\u001B[33m\"\u001B[39m, p)\n", - "\u001B[31mValueError\u001B[39m: ('No suitable reader found for:', PosixPath('/archive/bioinformatics/Danuser_lab/Dean/dean/2023-11-13-Nicole/488myosin_561nuclear_647rfp/2023-11-08/Cell_001/CH00_000000.h5'))" + "\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[40]\u001B[39m\u001B[32m, line 2\u001B[39m\n\u001B[32m 1\u001B[39m h5_path = \u001B[33m\"\u001B[39m\u001B[33m/archive/bioinformatics/Danuser_lab/Dean/dean/2023-11-13-Nicole/488myosin_561nuclear_647rfp/2023-11-08/Cell_001/CH00_000000.h5\u001B[39m\u001B[33m\"\u001B[39m\n\u001B[32m----> \u001B[39m\u001B[32m2\u001B[39m data_h5, info_h5 = \u001B[43mreader\u001B[49m\u001B[43m.\u001B[49m\u001B[43mopen\u001B[49m\u001B[43m(\u001B[49m\u001B[43mh5_path\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 3\u001B[39m info_h5.pixel_size\n", + "\u001B[36mFile \u001B[39m\u001B[32m/project/bioinformatics/Danuser_lab/Dean/dean/git/clearex/src/clearex/io/read.py:1897\u001B[39m, in \u001B[36mImageOpener.open\u001B[39m\u001B[34m(self, path, prefer_dask, chunks, **kwargs)\u001B[39m\n\u001B[32m 1895\u001B[39m error_msg += \u001B[33mf\u001B[39m\u001B[33m\"\u001B[39m\u001B[33m - \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mreader_name\u001B[38;5;132;01m}\u001B[39;00m\u001B[33m: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00merror\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[33m\"\u001B[39m\n\u001B[32m 1896\u001B[39m logger.error(msg=error_msg)\n\u001B[32m-> \u001B[39m\u001B[32m1897\u001B[39m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[33mf\u001B[39m\u001B[33m\"\u001B[39m\u001B[33mNo suitable reader found for: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mp\u001B[38;5;132;01m}\u001B[39;00m\u001B[33m\"\u001B[39m)\n", + "\u001B[31mValueError\u001B[39m: No suitable reader found for: /archive/bioinformatics/Danuser_lab/Dean/dean/2023-11-13-Nicole/488myosin_561nuclear_647rfp/2023-11-08/Cell_001/CH00_000000.h5" ] } ], - "execution_count": 12 + "execution_count": 40 }, { "metadata": { "ExecuteTime": { - "end_time": "2026-01-03T12:23:31.758345762Z", - "start_time": "2026-01-03T12:23:31.694648815Z" + "end_time": "2026-01-04T18:11:09.752524718Z", + "start_time": "2026-01-04T18:11:02.637067432Z" } }, "cell_type": "code", "source": [ "zarr_path = \"/archive/bioinformatics/Danuser_lab/Dean/dean/2024-05-21-tiling/cell5_fused_tp_0_ch_0.zarr\"\n", "data_zarr, info_zarr = reader.open(zarr_path)\n", - "info_zarr.pixel_size" + "info_zarr.pixel_size\n", + "print(data_zarr.shape)" ], "id": "75008c08db1cd093", "outputs": [ { - "ename": "ValueError", - "evalue": "('No suitable reader found for:', PosixPath('/archive/bioinformatics/Danuser_lab/Dean/dean/2024-05-21-tiling/cell5_fused_tp_0_ch_0.zarr'))", - "output_type": "error", - "traceback": [ - "\u001B[31m---------------------------------------------------------------------------\u001B[39m", - "\u001B[31mValueError\u001B[39m Traceback (most recent call last)", - "\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[13]\u001B[39m\u001B[32m, line 2\u001B[39m\n\u001B[32m 1\u001B[39m zarr_path = \u001B[33m\"\u001B[39m\u001B[33m/archive/bioinformatics/Danuser_lab/Dean/dean/2024-05-21-tiling/cell5_fused_tp_0_ch_0.zarr\u001B[39m\u001B[33m\"\u001B[39m\n\u001B[32m----> \u001B[39m\u001B[32m2\u001B[39m data_zarr, info_zarr = \u001B[43mreader\u001B[49m\u001B[43m.\u001B[49m\u001B[43mopen\u001B[49m\u001B[43m(\u001B[49m\u001B[43mzarr_path\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 3\u001B[39m info_zarr.pixel_size\n", - "\u001B[36mFile \u001B[39m\u001B[32m/project/bioinformatics/Danuser_lab/Dean/dean/git/clearex/src/clearex/io/read.py:1621\u001B[39m, in \u001B[36mImageOpener.open\u001B[39m\u001B[34m(self, path, prefer_dask, chunks, **kwargs)\u001B[39m\n\u001B[32m 1618\u001B[39m \u001B[38;5;28;01mcontinue\u001B[39;00m\n\u001B[32m 1620\u001B[39m logger.error(msg=\u001B[33mf\u001B[39m\u001B[33m\"\u001B[39m\u001B[33mNo suitable reader found for \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mp\u001B[38;5;132;01m}\u001B[39;00m\u001B[33m\"\u001B[39m)\n\u001B[32m-> \u001B[39m\u001B[32m1621\u001B[39m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[33m\"\u001B[39m\u001B[33mNo suitable reader found for:\u001B[39m\u001B[33m\"\u001B[39m, p)\n", - "\u001B[31mValueError\u001B[39m: ('No suitable reader found for:', PosixPath('/archive/bioinformatics/Danuser_lab/Dean/dean/2024-05-21-tiling/cell5_fused_tp_0_ch_0.zarr'))" + "name": "stdout", + "output_type": "stream", + "text": [ + "Reader ZarrReader claims the file.\n", + "(1, 1, 65, 5734, 9550)\n" ] } ], - "execution_count": 13 + "execution_count": 39 }, { "metadata": {}, diff --git a/src/clearex/io/read.py b/src/clearex/io/read.py index f2db507..945d02a 100644 --- a/src/clearex/io/read.py +++ b/src/clearex/io/read.py @@ -39,6 +39,7 @@ import zarr import h5py from numpy.typing import NDArray +from zarr import N5Store # Try to import ome-types for OME-TIFF metadata parsing try: @@ -616,6 +617,165 @@ def find_ome(path, max_pages=None): return None, None + +class N5Reader(Reader): + """Reader for N5 format files. + N5 is a chunked array storage format similar to Zarr but with a different + directory structure. This reader handles N5 stores by searching the filesystem + for attributes.json files that define arrays. + Attributes + ---------- + SUFFIXES : tuple of str + Supported file extensions: ('.n5', '.n5/'). + """ + SUFFIXES = (".n5", ".n5/") + @staticmethod + def _find_arrays_in_n5(base_path: Path, depth=0) -> List[Tuple[str, Dict[str, Any]]]: + """Recursively find all N5 arrays by looking for attributes.json files. + Parameters + ---------- + base_path : Path + Base directory to search + depth : int + Current recursion depth + Returns + ------- + List[Tuple[str, Dict]] + List of (relative_path, attributes_dict) for each array found + """ + import json + arrays = [] + if not base_path.is_dir(): + return arrays + # Check if this directory has an attributes.json that defines an array + attrs_file = base_path / "attributes.json" + if attrs_file.exists(): + try: + with open(attrs_file, 'r') as f: + attrs = json.load(f) + # N5 arrays have 'dimensions' attribute + if 'dimensions' in attrs: + arrays.append(("", attrs)) + return arrays # Don't recurse into array directories + except Exception: + pass + # Recurse into subdirectories + try: + for item in base_path.iterdir(): + if item.is_dir(): + sub_arrays = N5Reader._find_arrays_in_n5(item, depth + 1) + # Prepend the subdirectory name to the paths + for path, attrs in sub_arrays: + rel_path = item.name if not path else f"{item.name}/{path}" + arrays.append((rel_path, attrs)) + except Exception: + pass + return arrays + def open( + self, + path: Path, + prefer_dask: bool = False, + chunks: Optional[Union[int, Tuple[int, ...]]] = None, + **kwargs: Any, + ) -> Tuple[NDArray[Any], ImageInfo]: + """Open an N5 file and return the image data and metadata. + Parameters + ---------- + path : Path + The path to the N5 store directory (e.g., 'data.n5'). + prefer_dask : bool, optional + If True, return a Dask array for lazy evaluation. If False, load + the entire array into memory as a NumPy array. Defaults to False. + chunks : int or tuple of int, optional + Chunk size for Dask arrays. If None, uses the N5 store's native + chunking. Only relevant when `prefer_dask=True`. Defaults to None. + **kwargs : dict + Additional keyword arguments (unused for N5). + Returns + ------- + arr : NDArray[Any] or dask.array.Array + The loaded image data. + info : ImageInfo + Metadata about the loaded image. + Raises + ------ + ValueError + If the N5 store contains no arrays. + """ + from zarr.n5 import N5Store + # Find all arrays in the N5 store + arrays_info = self._find_arrays_in_n5(path) + if not arrays_info: + logger.error(f"No arrays found in N5 store: {path}") + raise ValueError(f"No arrays found in N5 store: {path}") + logger.info(f"Found {len(arrays_info)} array(s) in N5 store") + # Open the N5 store at the root level + store = N5Store(str(path)) + root = zarr.open(store, mode='r') + # Access nested arrays by path + arrays = [] + for rel_path, attrs in arrays_info: + try: + if rel_path: + # Navigate to nested array + arr = root + for part in rel_path.split('/'): + arr = arr[part] + else: + # Root level array + arr = root + if isinstance(arr, zarr.Array): + arrays.append(arr) + logger.info(f" Loaded N5 array at '{rel_path}': shape={arr.shape}, dtype={arr.dtype}") + except Exception as e: + logger.warning(f"Failed to access N5 array at {rel_path}: {e}") + continue + + if not arrays: + logger.error(f"Could not open any arrays in N5 store: {path}") + raise ValueError(f"Could not open any arrays in N5 store: {path}") + # Pick array with the largest number of elements + array = max(arrays, key=lambda arr: np.prod(arr.shape)) + # Extract metadata + axes = None + pixel_size = None + meta = {} + try: + attrs = getattr(array, "attrs", {}) + axes = attrs.get("axes") + meta = dict(attrs) + if "pixel_size" in attrs: + pixel_size = _ensure_tuple(attrs["pixel_size"]) + elif "scale" in attrs: + pixel_size = _ensure_tuple(attrs["scale"]) + elif "resolution" in attrs: + pixel_size = _ensure_tuple(attrs["resolution"]) + except Exception: + pass + if prefer_dask: + darr = da.from_zarr(array, chunks=chunks) if chunks else da.from_zarr(array) + logger.info(f"Loaded {path.name} as a Dask array.") + info = ImageInfo( + path=path, + shape=tuple(darr.shape), + dtype=darr.dtype, + axes=_normalize_axes(axes), + pixel_size=pixel_size, + metadata=meta, + ) + return darr, info + else: + np_arr = np.array(array) if np is not None else array[:] + logger.info(f"Loaded {path.name} as a NumPy array.") + info = ImageInfo( + path=path, + shape=tuple(np_arr.shape), + dtype=np_arr.dtype, + axes=_normalize_axes(axes), + pixel_size=pixel_size, + metadata=meta, + ) + return np_arr, info class ZarrReader(Reader): """Reader for Zarr and N5 storage formats. @@ -659,7 +819,7 @@ class ZarrReader(Reader): ['z', 'y', 'x'] """ - SUFFIXES = (".zarr", ".zarr/", ".n5", ".n5/") + SUFFIXES = (".zarr", ".zarr/") def open( self, @@ -752,14 +912,12 @@ def open( """ grp = zarr.open_group(str(path), mode="r") - # collect all arrays - arrays = [] - if hasattr(grp, "array_keys") and callable(grp.array_keys): - arrays = [grp[k] for k in grp.array_keys()] + # Collect all arrays + arrays = self._collect_arrays(grp) if not arrays: - logger.error(f"No arrays found in Zarr group: {path}") - raise ValueError(f"No arrays found in Zarr group: {path}") + logger.error(f"No arrays found in Zarr store: {path}") + raise ValueError(f"No arrays found in Zarr store: {path}") # Pick array with the largest number of elements array = max(arrays, key=lambda arr: np.prod(arr.shape)) @@ -795,7 +953,7 @@ def open( pixel_size = _ensure_tuple(attrs["scale"]) elif "resolution" in attrs: pixel_size = _ensure_tuple(attrs["resolution"]) - + except Exception: pass @@ -825,6 +983,48 @@ def open( ) return np_arr, info + @staticmethod + def _collect_arrays(group, depth=0): + """Recursively collect all arrays from a Zarr/N5 group. + + Parameters + ---------- + group : zarr.Group + The Zarr group to search + depth : int + Current recursion depth + + Returns + ------- + list + List of arrays found in the group and its children + """ + out = [] + + # Check if this is actually an Array + if isinstance(group, zarr.Array): + out.append(group) + return out + + # If it's a Group, iterate over children + if isinstance(group, zarr.Group): + try: + for key in group.keys(): + try: + item = group[key] + # Check if item is an Array + if isinstance(item, zarr.Array): + out.append(item) + # Check if item is a Group - recurse into it + elif isinstance(item, zarr.Group): + out.extend(ZarrReader._collect_arrays(item, depth + 1)) + except Exception: + continue + except Exception: + pass + + return out + class HDF5Reader(Reader): """Reader for HDF5 files using the h5py library. @@ -1005,7 +1205,7 @@ def _collect_datasets(group: h5py.Group) -> list: or attrs.get("DimensionOrder") or attrs.get("DIMENSION_LABELS") # sometimes stored by other tools ) - + # Try to extract pixel size from HDF5 attributes if "pixel_size" in attrs: pixel_size = _ensure_tuple(attrs["pixel_size"]) @@ -1016,7 +1216,7 @@ def _collect_datasets(group: h5py.Group) -> list: # Check for individual axis scales elif "element_size_um" in attrs: pixel_size = _ensure_tuple(attrs["element_size_um"]) - + except Exception: pass @@ -1549,7 +1749,7 @@ def __init__(self, readers: Optional[Iterable[Type[Reader]]] = None) -> None: """ # Registry order is priority order self._readers: Tuple[Type[Reader], ...] = tuple( - readers or (TiffReader, ZarrReader, NumpyReader, HDF5Reader, ND2Reader) + readers or (TiffReader, N5Reader, ZarrReader, NumpyReader, HDF5Reader, ND2Reader) ) def open( @@ -1653,18 +1853,24 @@ def open( logger.error(msg=f"File {p} does not exist") raise FileNotFoundError(p) + # Track errors for better diagnostics + errors = [] + # 1) Extension-based selection logger.info(msg=f"Opening {p}") for reader_cls in self._readers: try: if reader_cls.claims(p): + print(f"Reader {reader_cls.__name__} claims the file.") reader = reader_cls() logger.info(msg=f"Using reader: {reader_cls.__name__}.") return reader.open( path=p, prefer_dask=prefer_dask, chunks=chunks, **kwargs ) - except Exception: - pass + except Exception as e: + print(f"Reader {reader_cls.__name__} failed to open the file: {e}") + errors.append((reader_cls.__name__, str(e))) + logger.debug(msg=f"{reader_cls.__name__} failed: {e}") # 2) Fallback: probe readers that didn't claim the file logger.info( @@ -1673,14 +1879,22 @@ def open( for reader_cls in self._readers: try: reader: Reader = reader_cls() + logger.debug(msg=f"Trying fallback reader: {reader_cls.__name__}") return reader.open( path=p, prefer_dask=prefer_dask, chunks=chunks, **kwargs ) - except Exception: + except Exception as e: + errors.append((reader_cls.__name__, str(e))) + logger.debug(msg=f"{reader_cls.__name__} fallback failed: {e}") continue - logger.error(msg=f"No suitable reader found for {p}") - raise ValueError("No suitable reader found for:", p) + # Report all errors for debugging + error_msg = f"No suitable reader found for {p}.\n" + error_msg += "Attempted readers and their errors:\n" + for reader_name, error in errors: + error_msg += f" - {reader_name}: {error}\n" + logger.error(msg=error_msg) + raise ValueError(f"No suitable reader found for: {p}") def rename_tiff_to_tif(base_path: str, recursive: bool = True) -> int: From 1ba709352fb9b75c005b2b0aadd432268c6f4b8d Mon Sep 17 00:00:00 2001 From: Kevin Dean Date: Sun, 4 Jan 2026 12:21:28 -0600 Subject: [PATCH 12/12] Add N5Reader for N5 format files; enhance file opening and error handling --- examples/notebooks/loading_data.ipynb | 18 ++-- pyproject.toml | 1 + src/clearex/io/read.py | 139 +++++++++++++++----------- uv.lock | 2 + 4 files changed, 91 insertions(+), 69 deletions(-) diff --git a/examples/notebooks/loading_data.ipynb b/examples/notebooks/loading_data.ipynb index 6860279..ba705af 100644 --- a/examples/notebooks/loading_data.ipynb +++ b/examples/notebooks/loading_data.ipynb @@ -204,13 +204,13 @@ { "metadata": { "ExecuteTime": { - "end_time": "2026-01-04T18:11:23.676134437Z", - "start_time": "2026-01-04T18:11:17.158002498Z" + "end_time": "2026-01-04T18:19:14.387967971Z", + "start_time": "2026-01-04T18:15:19.468743033Z" } }, "cell_type": "code", "source": [ - "h5_path = \"/archive/bioinformatics/Danuser_lab/Dean/dean/2023-11-13-Nicole/488myosin_561nuclear_647rfp/2023-11-08/Cell_001/CH00_000000.h5\"\n", + "h5_path = \"/archive/bioinformatics/Danuser_lab/Dean/dean/2023-11-13-Nicole/NA/488myosin_561nuclear_647rfp/2023-11-08/Cell_001/CH00_000000.h5\"\n", "data_h5, info_h5 = reader.open(h5_path)\n", "info_h5.pixel_size" ], @@ -221,24 +221,24 @@ "output_type": "stream", "text": [ "Reader HDF5Reader claims the file.\n", - "Reader HDF5Reader failed to open the file: [Errno 13] Unable to synchronously open file (file read failed: time = Sun Jan 4 12:11:17 2026\n", - ", filename = '/archive/bioinformatics/Danuser_lab/Dean/dean/2023-11-13-Nicole/488myosin_561nuclear_647rfp/2023-11-08/Cell_001/CH00_000000.h5', file descriptor = 60, errno = 13, error message = 'Permission denied', buf = 0x7ffc76276f48, total read size = 8, bytes this sub-read = 8, offset = 0)\n" + "Reader HDF5Reader failed to open the file: [Errno 13] Unable to synchronously open file (file read failed: time = Sun Jan 4 12:19:09 2026\n", + ", filename = '/archive/bioinformatics/Danuser_lab/Dean/dean/2023-11-13-Nicole/NA/488myosin_561nuclear_647rfp/2023-11-08/Cell_001/CH00_000000.h5', file descriptor = 56, errno = 13, error message = 'Permission denied', buf = 0x7ffc76276f48, total read size = 8, bytes this sub-read = 8, offset = 0)\n" ] }, { "ename": "ValueError", - "evalue": "No suitable reader found for: /archive/bioinformatics/Danuser_lab/Dean/dean/2023-11-13-Nicole/488myosin_561nuclear_647rfp/2023-11-08/Cell_001/CH00_000000.h5", + "evalue": "No suitable reader found for: /archive/bioinformatics/Danuser_lab/Dean/dean/2023-11-13-Nicole/NA/488myosin_561nuclear_647rfp/2023-11-08/Cell_001/CH00_000000.h5", "output_type": "error", "traceback": [ "\u001B[31m---------------------------------------------------------------------------\u001B[39m", "\u001B[31mValueError\u001B[39m Traceback (most recent call last)", - "\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[40]\u001B[39m\u001B[32m, line 2\u001B[39m\n\u001B[32m 1\u001B[39m h5_path = \u001B[33m\"\u001B[39m\u001B[33m/archive/bioinformatics/Danuser_lab/Dean/dean/2023-11-13-Nicole/488myosin_561nuclear_647rfp/2023-11-08/Cell_001/CH00_000000.h5\u001B[39m\u001B[33m\"\u001B[39m\n\u001B[32m----> \u001B[39m\u001B[32m2\u001B[39m data_h5, info_h5 = \u001B[43mreader\u001B[49m\u001B[43m.\u001B[49m\u001B[43mopen\u001B[49m\u001B[43m(\u001B[49m\u001B[43mh5_path\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 3\u001B[39m info_h5.pixel_size\n", + "\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[41]\u001B[39m\u001B[32m, line 2\u001B[39m\n\u001B[32m 1\u001B[39m h5_path = \u001B[33m\"\u001B[39m\u001B[33m/archive/bioinformatics/Danuser_lab/Dean/dean/2023-11-13-Nicole/NA/488myosin_561nuclear_647rfp/2023-11-08/Cell_001/CH00_000000.h5\u001B[39m\u001B[33m\"\u001B[39m\n\u001B[32m----> \u001B[39m\u001B[32m2\u001B[39m data_h5, info_h5 = \u001B[43mreader\u001B[49m\u001B[43m.\u001B[49m\u001B[43mopen\u001B[49m\u001B[43m(\u001B[49m\u001B[43mh5_path\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 3\u001B[39m info_h5.pixel_size\n", "\u001B[36mFile \u001B[39m\u001B[32m/project/bioinformatics/Danuser_lab/Dean/dean/git/clearex/src/clearex/io/read.py:1897\u001B[39m, in \u001B[36mImageOpener.open\u001B[39m\u001B[34m(self, path, prefer_dask, chunks, **kwargs)\u001B[39m\n\u001B[32m 1895\u001B[39m error_msg += \u001B[33mf\u001B[39m\u001B[33m\"\u001B[39m\u001B[33m - \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mreader_name\u001B[38;5;132;01m}\u001B[39;00m\u001B[33m: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00merror\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[33m\"\u001B[39m\n\u001B[32m 1896\u001B[39m logger.error(msg=error_msg)\n\u001B[32m-> \u001B[39m\u001B[32m1897\u001B[39m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[33mf\u001B[39m\u001B[33m\"\u001B[39m\u001B[33mNo suitable reader found for: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mp\u001B[38;5;132;01m}\u001B[39;00m\u001B[33m\"\u001B[39m)\n", - "\u001B[31mValueError\u001B[39m: No suitable reader found for: /archive/bioinformatics/Danuser_lab/Dean/dean/2023-11-13-Nicole/488myosin_561nuclear_647rfp/2023-11-08/Cell_001/CH00_000000.h5" + "\u001B[31mValueError\u001B[39m: No suitable reader found for: /archive/bioinformatics/Danuser_lab/Dean/dean/2023-11-13-Nicole/NA/488myosin_561nuclear_647rfp/2023-11-08/Cell_001/CH00_000000.h5" ] } ], - "execution_count": 40 + "execution_count": 41 }, { "metadata": { diff --git a/pyproject.toml b/pyproject.toml index 3e70819..ae8a47b 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ requires-python = ">=3.12" dependencies = [ "antspyx", + "black>=25.11.0", "cython>=3.1.4", "dask==2025.1.0", "dask-image", diff --git a/src/clearex/io/read.py b/src/clearex/io/read.py index 945d02a..9fb76ad 100644 --- a/src/clearex/io/read.py +++ b/src/clearex/io/read.py @@ -31,6 +31,7 @@ from pathlib import Path from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, Union import logging +import json # Third Party Imports import numpy as np @@ -39,11 +40,12 @@ import zarr import h5py from numpy.typing import NDArray -from zarr import N5Store +from zarr.n5 import N5Store # Try to import ome-types for OME-TIFF metadata parsing try: from ome_types import from_xml + HAS_OME_TYPES = True except ImportError: HAS_OME_TYPES = False @@ -51,12 +53,14 @@ # Try to import nd2 for ND2 file support try: import nd2 + HAS_ND2 = True except ImportError: HAS_ND2 = False # Local Imports +# Define a custom Typing object ArrayLike = Union[NDArray[Any], da.Array] # Start logging @@ -66,12 +70,12 @@ def _ensure_tuple(value: Any) -> Optional[Tuple[float, ...]]: """Convert various types to tuple of floats for pixel_size. - + Parameters ---------- value : Any Value to convert (list, tuple, ndarray, or single value) - + Returns ------- Optional[Tuple[float, ...]] @@ -93,10 +97,10 @@ def _ensure_tuple(value: Any) -> Optional[Tuple[float, ...]]: def _normalize_axes(axes: Any) -> Optional[List[str]]: """Normalize axis information to OME-NGFF compatible list format. - + Converts various axis representations to a standardized list format compatible with OME-NGFF (up to 5 dimensions: t, c, z, y, x). - + Parameters ---------- axes : Any @@ -105,12 +109,12 @@ def _normalize_axes(axes: Any) -> Optional[List[str]]: - List of axis names like ["t", "c", "z", "y", "x"] - List of OME-NGFF axis dicts like [{"name": "t", "type": "time"}, ...] - Dict with keys like {'T': 10, 'C': 3, 'Z': 5, 'Y': 512, 'X': 512} - + Returns ------- Optional[list] List of lowercase axis names (e.g., ["t", "c", "z", "y", "x"]) or None - + Notes ----- Following OME-NGFF conventions: @@ -121,12 +125,12 @@ def _normalize_axes(axes: Any) -> Optional[List[str]]: """ if axes is None: return None - + try: # Handle string format (e.g., "TCZYX") if isinstance(axes, str): return [ax.lower() for ax in axes] if axes else None - + # Handle list of dicts (OME-NGFF format) if isinstance(axes, list): if not axes: # Empty list @@ -136,14 +140,14 @@ def _normalize_axes(axes: Any) -> Optional[List[str]]: # Handle simple list of strings elif all(isinstance(item, str) for item in axes): return [ax.lower() for ax in axes] - + # Handle dict format (e.g., sizes dict from ND2) if isinstance(axes, dict): return [key.lower() for key in axes.keys()] if axes else None - + except Exception: pass - + return None @@ -172,7 +176,7 @@ class ImageInfo: - 'z' for Z spatial dimension - 'y' for Y spatial dimension - 'x' for X spatial dimension - + Examples: ["t", "c", "z", "y", "x"], ["z", "y", "x"], ["y", "x"] Order matches the actual array dimension order. Defaults to None if not available. @@ -203,7 +207,7 @@ class ImageInfo: ['z', 'y', 'x'] >>> print(info.pixel_size) (2.0, 0.65, 0.65) - + >>> # 5D example >>> info_5d = ImageInfo( ... path=Path("timeseries.nd2"), @@ -477,7 +481,6 @@ def open( # Try OME-axes, pixel size, and metadata with tifffile.TiffFile(str(path)) as tf: - import json axes = None pixel_size = None size_x = None @@ -491,22 +494,22 @@ def open( # If desc is already a dict (tifffile auto-parsed it) if isinstance(desc, dict): - if 'spacing' in desc: - unit = desc.get('unit', 'um') - if unit in ('um', 'µm'): - size_z = desc['spacing'] + if "spacing" in desc: + unit = desc.get("unit", "um") + if unit in ("um", "µm"): + size_z = desc["spacing"] # If desc is a string, try to parse as JSON elif isinstance(desc, (str, bytes)): if isinstance(desc, bytes): desc = desc.decode("utf-8", errors="ignore") # Check if it looks like JSON (not OME-XML) - if desc.strip().startswith('{'): + if desc.strip().startswith("{"): try: desc_data = json.loads(desc) - if 'spacing' in desc_data: - unit = desc_data.get('unit', 'um') - if unit in ('um', 'µm'): - size_z = desc_data['spacing'] + if "spacing" in desc_data: + unit = desc_data.get("unit", "um") + if unit in ("um", "µm"): + size_z = desc_data["spacing"] except (json.JSONDecodeError, ValueError): pass except Exception: @@ -519,18 +522,18 @@ def open( axes = ome_meta.image().pixels().DimensionOrder # e.g., "TCZYX" except Exception: axes = None - + # Try to extract X, Y, Z pixel sizes from OME metadata using ome-types - if HAS_OME_TYPES and hasattr(tf, 'ome_metadata') and tf.ome_metadata: + if HAS_OME_TYPES and hasattr(tf, "ome_metadata") and tf.ome_metadata: try: ome = from_xml(tf.ome_metadata) if ome.images and ome.images[0].pixels: pixels = ome.images[0].pixels - size_x = getattr(pixels, 'physical_size_x', None) - size_y = getattr(pixels, 'physical_size_y', None) + size_x = getattr(pixels, "physical_size_x", None) + size_y = getattr(pixels, "physical_size_y", None) # Only use PhysicalSizeZ if we didn't get it from Description if size_z is None: - size_z = getattr(pixels, 'physical_size_z', None) + size_z = getattr(pixels, "physical_size_z", None) except Exception: pass @@ -538,15 +541,23 @@ def open( if size_x is None or size_y is None: try: page = tf.pages[0] - x_res = page.tags.get('XResolution') - y_res = page.tags.get('YResolution') - res_unit = page.tags.get('ResolutionUnit') - + x_res = page.tags.get("XResolution") + y_res = page.tags.get("YResolution") + res_unit = page.tags.get("ResolutionUnit") + if x_res and y_res and res_unit: # Extract resolution values - x_val = x_res.value[0] / x_res.value[1] if isinstance(x_res.value, tuple) else x_res.value - y_val = y_res.value[0] / y_res.value[1] if isinstance(y_res.value, tuple) else y_res.value - + x_val = ( + x_res.value[0] / x_res.value[1] + if isinstance(x_res.value, tuple) + else x_res.value + ) + y_val = ( + y_res.value[0] / y_res.value[1] + if isinstance(y_res.value, tuple) + else y_res.value + ) + # Convert to micrometers per pixel based on unit # Resolution unit: 1=none, 2=inch, 3=centimeter if res_unit.value == 2: # inch @@ -608,7 +619,6 @@ def find_ome(path, max_pages=None): if isinstance(desc, bytes): desc = desc.decode("utf-8", errors="ignore") - print("Desc", desc) if " List[Tuple[str, Dict[str, Any]]]: + def _find_arrays_in_n5( + base_path: Path, depth=0 + ) -> List[Tuple[str, Dict[str, Any]]]: """Recursively find all N5 arrays by looking for attributes.json files. Parameters ---------- @@ -643,7 +656,7 @@ def _find_arrays_in_n5(base_path: Path, depth=0) -> List[Tuple[str, Dict[str, An List[Tuple[str, Dict]] List of (relative_path, attributes_dict) for each array found """ - import json + arrays = [] if not base_path.is_dir(): return arrays @@ -651,10 +664,10 @@ def _find_arrays_in_n5(base_path: Path, depth=0) -> List[Tuple[str, Dict[str, An attrs_file = base_path / "attributes.json" if attrs_file.exists(): try: - with open(attrs_file, 'r') as f: + with open(attrs_file, "r") as f: attrs = json.load(f) # N5 arrays have 'dimensions' attribute - if 'dimensions' in attrs: + if "dimensions" in attrs: arrays.append(("", attrs)) return arrays # Don't recurse into array directories except Exception: @@ -671,6 +684,7 @@ def _find_arrays_in_n5(base_path: Path, depth=0) -> List[Tuple[str, Dict[str, An except Exception: pass return arrays + def open( self, path: Path, @@ -702,7 +716,6 @@ def open( ValueError If the N5 store contains no arrays. """ - from zarr.n5 import N5Store # Find all arrays in the N5 store arrays_info = self._find_arrays_in_n5(path) if not arrays_info: @@ -711,7 +724,7 @@ def open( logger.info(f"Found {len(arrays_info)} array(s) in N5 store") # Open the N5 store at the root level store = N5Store(str(path)) - root = zarr.open(store, mode='r') + root = zarr.open(store, mode="r") # Access nested arrays by path arrays = [] for rel_path, attrs in arrays_info: @@ -719,18 +732,20 @@ def open( if rel_path: # Navigate to nested array arr = root - for part in rel_path.split('/'): + for part in rel_path.split("/"): arr = arr[part] else: # Root level array arr = root if isinstance(arr, zarr.Array): arrays.append(arr) - logger.info(f" Loaded N5 array at '{rel_path}': shape={arr.shape}, dtype={arr.dtype}") + logger.info( + f" Loaded N5 array at '{rel_path}': shape={arr.shape}, dtype={arr.dtype}" + ) except Exception as e: logger.warning(f"Failed to access N5 array at {rel_path}: {e}") continue - + if not arrays: logger.error(f"Could not open any arrays in N5 store: {path}") raise ValueError(f"Could not open any arrays in N5 store: {path}") @@ -776,6 +791,8 @@ def open( metadata=meta, ) return np_arr, info + + class ZarrReader(Reader): """Reader for Zarr and N5 storage formats. @@ -929,7 +946,7 @@ def open( attrs = getattr(array, "attrs", {}) axes = attrs.get("multiscales", [{}])[0].get("axes") or attrs.get("axes") meta = dict(attrs) - + # Try to extract pixel size from Zarr attributes # Check for OME-Zarr style multiscales metadata if "multiscales" in attrs and attrs["multiscales"]: @@ -940,11 +957,14 @@ def open( dataset = multiscale["datasets"][0] if "coordinateTransformations" in dataset: for transform in dataset["coordinateTransformations"]: - if transform.get("type") == "scale" and "scale" in transform: + if ( + transform.get("type") == "scale" + and "scale" in transform + ): # Scale values typically correspond to axis order pixel_size = tuple(transform["scale"]) break - + # Fallback: check for direct pixel_size or scale attributes if pixel_size is None: if "pixel_size" in attrs: @@ -1588,7 +1608,7 @@ def open( >>> print(darr.chunksize) (1, 512, 512) """ - + if not HAS_ND2: raise ImportError( "The 'nd2' library is required to read ND2 files. " @@ -1602,7 +1622,7 @@ def open( pixel_size = None # Get axes information from sizes dict - if hasattr(nd2_file, 'sizes') and nd2_file.sizes: + if hasattr(nd2_file, "sizes") and nd2_file.sizes: # Pass the sizes dict to normalize_axes which will extract keys axes = nd2_file.sizes @@ -1630,18 +1650,18 @@ def open( # Store additional metadata if nd2_file.metadata: - metadata_dict['metadata'] = nd2_file.metadata - if hasattr(nd2_file, 'attributes') and nd2_file.attributes: - metadata_dict['attributes'] = nd2_file.attributes + metadata_dict["metadata"] = nd2_file.metadata + if hasattr(nd2_file, "attributes") and nd2_file.attributes: + metadata_dict["attributes"] = nd2_file.attributes if prefer_dask: # Use nd2's native Dask support darr = nd2_file.to_dask() - + # Apply custom chunking if specified if chunks is not None: darr = darr.rechunk(chunks) - + info = ImageInfo( path=path, shape=tuple(darr.shape), @@ -1749,7 +1769,8 @@ def __init__(self, readers: Optional[Iterable[Type[Reader]]] = None) -> None: """ # Registry order is priority order self._readers: Tuple[Type[Reader], ...] = tuple( - readers or (TiffReader, N5Reader, ZarrReader, NumpyReader, HDF5Reader, ND2Reader) + readers + or (TiffReader, N5Reader, ZarrReader, NumpyReader, HDF5Reader, ND2Reader) ) def open( @@ -1861,14 +1882,12 @@ def open( for reader_cls in self._readers: try: if reader_cls.claims(p): - print(f"Reader {reader_cls.__name__} claims the file.") reader = reader_cls() logger.info(msg=f"Using reader: {reader_cls.__name__}.") return reader.open( path=p, prefer_dask=prefer_dask, chunks=chunks, **kwargs ) except Exception as e: - print(f"Reader {reader_cls.__name__} failed to open the file: {e}") errors.append((reader_cls.__name__, str(e))) logger.debug(msg=f"{reader_cls.__name__} failed: {e}") diff --git a/uv.lock b/uv.lock index 12592ad..4c59e02 100644 --- a/uv.lock +++ b/uv.lock @@ -650,6 +650,7 @@ version = "0.1.1" source = { editable = "." } dependencies = [ { name = "antspyx" }, + { name = "black" }, { name = "cython" }, { name = "dask" }, { name = "dask-image" }, @@ -705,6 +706,7 @@ docs = [ [package.metadata] requires-dist = [ { name = "antspyx" }, + { name = "black", specifier = ">=25.11.0" }, { name = "black", marker = "extra == 'dev'", specifier = ">=25.11.0" }, { name = "cython", specifier = ">=3.1.4" }, { name = "dask", specifier = "==2025.1.0" },