From d78dab16dbea6b94a5e64b212e7f36280f8c4837 Mon Sep 17 00:00:00 2001 From: Martin Helm Date: Fri, 12 Dec 2025 12:43:24 +0000 Subject: [PATCH 1/3] Correctly parse chunks parameter for RasterModels when using DataArrays --- src/spatialdata/models/models.py | 23 ++++++++++++++++++- tests/models/test_models.py | 39 ++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/src/spatialdata/models/models.py b/src/spatialdata/models/models.py index bed33ff1..2bdd9629 100644 --- a/src/spatialdata/models/models.py +++ b/src/spatialdata/models/models.py @@ -185,7 +185,7 @@ def parse( # if there are no dims in the data, use the model's dims or provided dims elif isinstance(data, np.ndarray | DaskArray): if not isinstance(data, DaskArray): # numpy -> dask - data = from_array(data) + data = from_array(data.data) if dims is None: dims = cls.dims.dims else: @@ -239,6 +239,10 @@ def parse( chunks=chunks, ) _parse_transformations(data, parsed_transform) + else: + if chunks is None: + chunks = "auto" # type: ignore[assignment] + data = data.chunk(chunks=chunks) cls()._check_chunk_size_not_too_large(data) # recompute coordinates for (multiscale) spatial image return compute_coordinates(data) @@ -1280,3 +1284,20 @@ def _get_region_metadata_from_region_key_column(table: AnnData) -> list[str]: annotated_regions = table.obs[region_key].cat.remove_unused_categories().cat.categories.unique().tolist() assert isinstance(annotated_regions, list) return annotated_regions + + +# def _parse_chunk_for_dataarray( +# model: type[RasterSchema], +# chunks: int | tuple[int, ...] | tuple[tuple[int, ...], ...] | Mapping[Any, int | tuple[int, ...] | None] | None, +# ) -> str | int | tuple[int, ...] | tuple[tuple[int, ...], ...]: +# if chunks is None: +# chunks_parsed = "auto" +# elif isinstance(chunks, dict): +# dims = np.array(model.dims.dims).tolist() +# assert set(dims) == set(chunks.keys()) +# chunks_parsed = tuple([int(chunks[dim]) for dim in dims]) +# else: +# # do nothing +# chunks_parsed = chunks + +# return chunks_parsed diff --git a/tests/models/test_models.py b/tests/models/test_models.py index 2ed108b7..7c7087b8 100644 --- a/tests/models/test_models.py +++ b/tests/models/test_models.py @@ -195,6 +195,45 @@ def test_raster_schema( with pytest.raises(ValueError): model.parse(image, **kwargs) + @pytest.mark.parametrize( + "model,chunks,expected", + [ + (Labels2DModel, None, (10, 10)), + (Labels2DModel, 5, (5, 5)), + (Labels2DModel, (5, 5), (5, 5)), + (Labels2DModel, {"x": 5, "y": 5}, (5, 5)), + (Labels3DModel, None, (1, 10, 10)), + (Labels3DModel, 5, (1, 5, 5)), + (Labels3DModel, (1, 5, 5), (1, 5, 5)), + (Labels3DModel, {"z": 1, "x": 5, "y": 5}, (1, 5, 5)), + (Image2DModel, None, (1, 10, 10)), # Image2D Models always have a c dimension + (Image2DModel, 5, (1, 5, 5)), + (Image2DModel, (1, 5, 5), (1, 5, 5)), + (Image2DModel, {"c": 1, "x": 5, "y": 5}, (1, 5, 5)), + (Image3DModel, None, (1, 1, 10, 10)), # Image3D models have z in addition, so 4 total dimensions + (Image3DModel, 5, (1, 1, 5, 5)), + (Image3DModel, (1, 1, 5, 5), (1, 1, 5, 5)), + ( + Image3DModel, + {"c": 1, "z": 1, "x": 5, "y": 5}, + (1, 1, 5, 5), + ), + ], + ) + def test_raster_models_parse_with_chunks_parameter(self, model, chunks, expected): + dims = np.array(model.dims.dims).tolist() + n_dims = len(dims) + + image: ArrayLike = np.arange(100).reshape((10, 10)) + if n_dims == 3: + image = np.expand_dims(image, axis=0) + + if n_dims == 4: + image = np.expand_dims(image, axis=(0, 1)) + + x = model.parse(image, chunks=chunks) + assert x.data.chunksize == expected + @pytest.mark.parametrize("model", [Labels2DModel, Labels3DModel]) def test_labels_model_with_multiscales(self, model): # Passing "scale_factors" should generate multiscales with a "method" appropriate for labels From 0b7352d7b6e8cd4e3b84ab549ec99cae260d4cc4 Mon Sep 17 00:00:00 2001 From: Martin Helm Date: Fri, 12 Dec 2025 12:44:05 +0000 Subject: [PATCH 2/3] Remove parsing logic as it is not necessary for spatialimage class --- src/spatialdata/models/models.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/src/spatialdata/models/models.py b/src/spatialdata/models/models.py index 2bdd9629..4ff25c16 100644 --- a/src/spatialdata/models/models.py +++ b/src/spatialdata/models/models.py @@ -1284,20 +1284,3 @@ def _get_region_metadata_from_region_key_column(table: AnnData) -> list[str]: annotated_regions = table.obs[region_key].cat.remove_unused_categories().cat.categories.unique().tolist() assert isinstance(annotated_regions, list) return annotated_regions - - -# def _parse_chunk_for_dataarray( -# model: type[RasterSchema], -# chunks: int | tuple[int, ...] | tuple[tuple[int, ...], ...] | Mapping[Any, int | tuple[int, ...] | None] | None, -# ) -> str | int | tuple[int, ...] | tuple[tuple[int, ...], ...]: -# if chunks is None: -# chunks_parsed = "auto" -# elif isinstance(chunks, dict): -# dims = np.array(model.dims.dims).tolist() -# assert set(dims) == set(chunks.keys()) -# chunks_parsed = tuple([int(chunks[dim]) for dim in dims]) -# else: -# # do nothing -# chunks_parsed = chunks - -# return chunks_parsed From 61573c35db5ec0e8c23f2c7b94e119e7dad79f32 Mon Sep 17 00:00:00 2001 From: Martin Helm Date: Fri, 12 Dec 2025 15:22:05 +0000 Subject: [PATCH 3/3] Dont overwrite chunks of input data if no specific chunksize is set by the user --- src/spatialdata/models/models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/spatialdata/models/models.py b/src/spatialdata/models/models.py index 4ff25c16..9ddea9c4 100644 --- a/src/spatialdata/models/models.py +++ b/src/spatialdata/models/models.py @@ -240,9 +240,9 @@ def parse( ) _parse_transformations(data, parsed_transform) else: - if chunks is None: - chunks = "auto" # type: ignore[assignment] - data = data.chunk(chunks=chunks) + # Chunk single scale images + if chunks is not None: + data = data.chunk(chunks=chunks) cls()._check_chunk_size_not_too_large(data) # recompute coordinates for (multiscale) spatial image return compute_coordinates(data)