From 09b0a00fb1bab5097c7cc90479886005fe0116c9 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sun, 17 May 2026 21:41:16 -0700 Subject: [PATCH 1/2] rasterize: propagate like.attrs/coords and emit _FillValue (#2018) When the caller passed ``like=template``, ``rasterize()`` dropped all of the template's ``attrs`` (including ``crs``, ``res``, ``transform``, ``nodatavals``) and rebuilt the output coords via ``np.linspace`` from re-derived bounds. The output then no longer ``xr.align``-ed with the template, and chained pipelines like ``slope(rasterize(gdf, like=elevation))`` silently saw a no-CRS, no-res raster and recomputed cell-size from coords -- the same class of bug as the #1407 sky_view_factor cellsize issue. Three fixes, all at the same site (line 2227, where the output ``xr.DataArray`` is built): 1. ``_extract_grid_from_like`` now also returns the input ``x`` and ``y`` coords and ``attrs``. ``rasterize()`` copies ``like.attrs`` onto the output. 2. When the resolved output grid matches ``like`` (same width, height, and bounds), the output reuses ``like.coords['x']`` and ``like.coords['y']`` directly so the result is bit-identical to the template and ``xr.align`` keeps working. 3. The ``fill`` value is now emitted as ``attrs['_FillValue']`` and ``attrs['nodatavals']`` when ``fill`` is not NaN. Downstream tools (``to_geotiff``, custom masks) can identify nodata pixels. All four backends (numpy, cupy, dask+numpy, dask+cupy) route through the same final ``xr.DataArray`` constructor, so the fix is in one place and behaves identically across backends. Adds ``TestMetadataPropagation`` to ``test_rasterize.py`` with 9 cases covering attrs propagation, bit-identical coord reuse, fill-value emission, isolation from the template's attrs dict, and parity across all four backends. Closes #2018. --- .claude/sweep-metadata-state.csv | 1 + xrspatial/rasterize.py | 51 +++++++++++-- xrspatial/tests/test_rasterize.py | 120 ++++++++++++++++++++++++++++++ 3 files changed, 164 insertions(+), 8 deletions(-) diff --git a/.claude/sweep-metadata-state.csv b/.claude/sweep-metadata-state.csv index 7abec7c04..7ad7a401c 100644 --- a/.claude/sweep-metadata-state.csv +++ b/.claude/sweep-metadata-state.csv @@ -1,3 +1,4 @@ module,last_inspected,issue,severity_max,categories_found,notes geotiff,2026-05-15,1909,HIGH,4;5,"Re-audit 2026-05-15 (agent-a55b69cec1ef2a092 worktree, branch deep-sweep-metadata-geotiff-2026-05-15). 4-backend (numpy/cupy/dask+numpy/dask+cupy) parity reverified after the #1813 modular refactor: full reads, windowed reads, multi-band, band=N selection, no-georef integer pixel coords, crs/crs_wkt/transform/nodata/x_resolution/y_resolution/resolution_unit/image_description/gdal_metadata all agree across backends. DataArray .name and dims agree (y, x for 2D; y, x, band for 3D). NEW HIGH finding #1909: GDS chunked GPU path (_read_geotiff_gpu_chunked_gds) declared the dask graph dtype as float64 when source had an in-range integer nodata sentinel, matching the CPU dask path's #1597 contract, but the per-chunk _chunk_task did not cast its returned cupy array to declared_dtype -- chunks with no sentinel hit returned the raw uint16/int16 source dtype, producing a silent declared/actual dtype mismatch. Fix mirrors the #1597 + #1624 CPU dask pattern: compute declared_dtype before defining _chunk_task, cast inside the task only when arr.dtype != declared_dtype to skip the no-op astype(copy=True). 6 regression tests added in test_chunked_gpu_declared_dtype_1909.py covering declared vs computed parity, CPU/GPU dask declared-dtype agreement, eager paths preserve source dtype, no-nodata round-trip, explicit dtype= kwarg, and sentinel-hit float64 promotion. Pre-existing test failures in test_predictor2_big_endian_gpu_1517.py and test_size_param_validation_gpu_vrt_1776.py exist on main (read_to_array AttributeError after #1813 refactor, tile_size=4 rejected by stricter _validate_tile_size_arg) and are unrelated to this audit." +rasterize,2026-05-17,2018,HIGH,1;2;4,"rasterize() drops like.attrs, rebuilds like.coords via linspace (not bit-identical), and never emits _FillValue/nodatavals even when fill is non-NaN. Cat 1 HIGH: chained pipelines like slope(rasterize(gdf, like=elevation)) silently lose crs/res/transform. Cat 2 MEDIUM: linspace round-trip from re-derived bounds breaks xr.align with like. Cat 4 MEDIUM: rasterize(..., fill=-9999, dtype=int32) emits no _FillValue. All 4 backends share the same final return so the fix is one place. Fixed in deep-sweep-metadata-rasterize-2026-05-17-01 (worktree agent-ab7a9aee97c1e4cdf): _extract_grid_from_like now returns coords/attrs; rasterize() reuses like.coords directly when grid matches, copies like.attrs, and emits _FillValue + nodatavals when fill is not NaN. 9 new tests in TestMetadataPropagation cover attrs propagation, bit-identical coord reuse, fill-value emission, isolation from template attrs, and parity across numpy/cupy/dask+numpy/dask+cupy backends. Full test suite (193 passing) clean." reproject,2026-05-10,1572;1573,HIGH,1;3;4,geoid_height_raster dropped input attrs and used dims[-2:] for 3D inputs (#1572). reproject/merge ignored nodatavals (rasterio convention) when rioxarray absent (#1573). Fixed in same branch. diff --git a/xrspatial/rasterize.py b/xrspatial/rasterize.py index d668003b4..7df00b742 100644 --- a/xrspatial/rasterize.py +++ b/xrspatial/rasterize.py @@ -1928,7 +1928,13 @@ def _parse_input(geometries, column=None, columns=None): def _extract_grid_from_like(like): - """Extract width, height, bounds, dtype from a template DataArray.""" + """Extract width, height, bounds, dtype from a template DataArray. + + Also returns the input coords and attrs so the caller can propagate + them onto the output raster. Reusing ``like.coords`` directly (rather + than rebuilding with ``linspace``) keeps the output bit-identical to + the template and so preserves ``xr.align`` compatibility. + """ if not isinstance(like, xr.DataArray): raise TypeError("'like' must be an xr.DataArray") if like.ndim != 2 or 'y' not in like.dims or 'x' not in like.dims: @@ -1956,7 +1962,9 @@ def _extract_grid_from_like(like): ymin = float(np.min(y)) - py / 2 ymax = float(np.max(y)) + py / 2 - return width, height, (xmin, ymin, xmax, ymax), dt + return (width, height, (xmin, ymin, xmax, ymax), dt, + like.coords['x'].copy(), like.coords['y'].copy(), + dict(like.attrs)) # --------------------------------------------------------------------------- @@ -2119,8 +2127,11 @@ def rasterize( # Extract defaults from template raster like_width = like_height = like_bounds = like_dtype = None + like_x_coord = like_y_coord = None + like_attrs = None if like is not None: - like_width, like_height, like_bounds, like_dtype = \ + (like_width, like_height, like_bounds, like_dtype, + like_x_coord, like_y_coord, like_attrs) = \ _extract_grid_from_like(like) # Parse input geometries @@ -2218,15 +2229,39 @@ def rasterize( final_height, final_width, fill, final_dtype, all_touched, merge_fn) - # Build coordinates - px = (xmax - xmin) / final_width - py = (ymax - ymin) / final_height - x_coords = np.linspace(xmin + px / 2, xmax - px / 2, final_width) - y_coords = np.linspace(ymax - py / 2, ymin + py / 2, final_height) + # Build coordinates. When the caller passed ``like``, reuse its + # x/y coordinates directly (bit-identical, ``xr.align`` compatible) + # provided the resolved output grid matches. Otherwise build fresh + # coords from ``bounds``. + if (like_x_coord is not None + and like_x_coord.sizes['x'] == final_width + and like_y_coord.sizes['y'] == final_height + and like_bounds is not None + and final_bounds == like_bounds): + x_coords = like_x_coord + y_coords = like_y_coord + else: + px = (xmax - xmin) / final_width + py = (ymax - ymin) / final_height + x_coords = np.linspace(xmin + px / 2, xmax - px / 2, final_width) + y_coords = np.linspace(ymax - py / 2, ymin + py / 2, final_height) + + # Build attrs. Start from ``like.attrs`` if given so chained spatial + # pipelines (``slope(rasterize(gdf, like=elevation))``) see the same + # ``res``, ``crs``, ``transform``, etc. as the template. Emit the + # fill value as ``_FillValue`` (and ``nodatavals`` for xrspatial + # internal compatibility) whenever ``fill`` is not NaN -- otherwise + # downstream tools cannot identify which pixels are nodata. + out_attrs = dict(like_attrs) if like_attrs is not None else {} + fill_is_nan = isinstance(fill, float) and np.isnan(fill) + if not fill_is_nan: + out_attrs['_FillValue'] = fill + out_attrs['nodatavals'] = (fill,) return xr.DataArray( out, name=name, dims=['y', 'x'], coords={'y': y_coords, 'x': x_coords}, + attrs=out_attrs, ) diff --git a/xrspatial/tests/test_rasterize.py b/xrspatial/tests/test_rasterize.py index c27437fe1..f2ff0e9c3 100644 --- a/xrspatial/tests/test_rasterize.py +++ b/xrspatial/tests/test_rasterize.py @@ -1592,3 +1592,123 @@ def test_rasterize_public_path_still_works_after_int64_change(self): ) # The polygon covers a 6x6 inner block; just check it burned in. assert np.any(result.values == 3.0) + + +# --------------------------------------------------------------------------- +# Metadata propagation (attrs from `like`, coord reuse, _FillValue) +# --------------------------------------------------------------------------- + +def _make_like(width=10, height=10, attrs=None, dtype=np.float64): + """Build a 2D template DataArray with georeferenced coords and attrs.""" + x = np.linspace(0.5, width - 0.5, width) + y = np.linspace(height - 0.5, 0.5, height) + data = np.zeros((height, width), dtype=dtype) + return xr.DataArray( + data, dims=['y', 'x'], + coords={'y': y, 'x': x}, + attrs=dict(attrs or {}), + ) + + +class TestMetadataPropagation: + """Verify `like.attrs` propagates, `like.coords` are reused, and the + fill value lands in `_FillValue` / `nodatavals`. + """ + + def test_like_propagates_attrs(self): + attrs = { + 'crs': 'EPSG:32610', + 'transform': (1.0, 0.0, 0.0, 0.0, -1.0, 10.0), + 'res': (1.0, 1.0), + } + like = _make_like(attrs=attrs) + result = rasterize( + [(box(2, 2, 8, 8), 1.0)], like=like, fill=0, + ) + assert result.attrs.get('crs') == 'EPSG:32610' + assert result.attrs.get('transform') == \ + (1.0, 0.0, 0.0, 0.0, -1.0, 10.0) + assert result.attrs.get('res') == (1.0, 1.0) + + def test_like_preserves_coords_bit_identical(self): + like = _make_like() + result = rasterize( + [(box(2, 2, 8, 8), 1.0)], like=like, fill=0, + ) + # When the caller passed `like`, the output should reuse its + # coords exactly (not rebuild via linspace). Otherwise + # xr.align between the rasterized output and `like` breaks + # silently for round-off-prone bounds. + np.testing.assert_array_equal( + result.coords['x'].values, like.coords['x'].values) + np.testing.assert_array_equal( + result.coords['y'].values, like.coords['y'].values) + + def test_like_attrs_isolated_from_template(self): + """Mutating output attrs must not mutate the template's attrs.""" + attrs = {'crs': 'EPSG:32610'} + like = _make_like(attrs=attrs) + result = rasterize( + [(box(2, 2, 8, 8), 1.0)], like=like, fill=0, + ) + result.attrs['crs'] = 'EPSG:4326' + assert like.attrs['crs'] == 'EPSG:32610' + + def test_fill_value_recorded_when_not_nan(self): + result = rasterize( + [(box(2, 2, 8, 8), 1.0)], + width=10, height=10, bounds=(0, 0, 10, 10), + fill=-9999, dtype=np.int32, + ) + assert result.attrs.get('_FillValue') == -9999 + assert result.attrs.get('nodatavals') == (-9999,) + + def test_fill_value_omitted_for_nan(self): + """Default fill=NaN should not pollute attrs with _FillValue.""" + result = rasterize( + [(box(2, 2, 8, 8), 1.0)], + width=10, height=10, bounds=(0, 0, 10, 10), + ) + assert '_FillValue' not in result.attrs + assert 'nodatavals' not in result.attrs + + def test_no_like_no_attrs_pollution(self): + """Without `like` and with NaN fill, attrs must stay empty.""" + result = rasterize( + [(box(2, 2, 8, 8), 1.0)], + width=10, height=10, bounds=(0, 0, 10, 10), + ) + assert result.attrs == {} + + @skip_no_dask + def test_like_attrs_propagated_dask(self): + like = _make_like(attrs={'crs': 'EPSG:32610'}) + result = rasterize( + [(box(2, 2, 8, 8), 1.0)], like=like, fill=0, chunks=5, + ) + # The dask backend routes through the same final xr.DataArray + # constructor, so attrs / coords / _FillValue behave identically. + assert result.attrs.get('crs') == 'EPSG:32610' + assert result.attrs.get('_FillValue') == 0 + np.testing.assert_array_equal( + result.coords['x'].values, like.coords['x'].values) + + @skip_no_cuda + def test_like_attrs_propagated_cupy(self): + like = _make_like(attrs={'crs': 'EPSG:32610'}) + result = rasterize( + [(box(2, 2, 8, 8), 1.0)], like=like, fill=0, use_cuda=True, + ) + assert result.attrs.get('crs') == 'EPSG:32610' + assert result.attrs.get('_FillValue') == 0 + + @skip_no_cuda + @skip_no_dask + def test_like_attrs_propagated_dask_cupy(self): + like = _make_like(attrs={'crs': 'EPSG:32610'}) + result = rasterize( + [(box(2, 2, 8, 8), 1.0)], + like=like, fill=0, use_cuda=True, chunks=5, + ) + assert result.attrs.get('crs') == 'EPSG:32610' + assert result.attrs.get('_FillValue') == 0 From 9ff096544bc38a4d05e1ce839a4c1f21054cfd67 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 18 May 2026 06:59:58 -0700 Subject: [PATCH 2/2] rasterize: address review suggestions on metadata propagation Strip inherited nodata / _FillValue / nodatavals from like.attrs before emitting a fresh triplet keyed off the actual fill, so stale sentinels from a prior round-trip can't outlive the new fill. Detect numpy-typed NaN via float() + np.isnan() so np.float32(np.nan) is treated as NaN. Propagate non-dim coords (e.g. rioxarray's spatial_ref). Replace the float-equal bounds check in the coord-reuse predicate with a check on "caller didn't override bounds/resolution". Adds tests for inherited nodata, numpy-scalar NaN, spatial_ref propagation, and a geotiff round-trip pinning the user-visible #2018 fix. --- xrspatial/rasterize.py | 74 ++++++++++++++------ xrspatial/tests/test_rasterize.py | 109 ++++++++++++++++++++++++++++-- 2 files changed, 157 insertions(+), 26 deletions(-) diff --git a/xrspatial/rasterize.py b/xrspatial/rasterize.py index 7df00b742..30a79a6d4 100644 --- a/xrspatial/rasterize.py +++ b/xrspatial/rasterize.py @@ -1962,9 +1962,17 @@ def _extract_grid_from_like(like): ymin = float(np.min(y)) - py / 2 ymax = float(np.max(y)) + py / 2 + # Carry through any non-dim coords (e.g. rioxarray's ``spatial_ref`` + # CRS coord). The y/x dim coords are returned separately because the + # caller decides whether to reuse them (bit-identical grid) or build + # fresh ones (resized grid). + extra_coords = { + k: v for k, v in like.coords.items() if k not in ('x', 'y') + } + return (width, height, (xmin, ymin, xmax, ymax), dt, - like.coords['x'].copy(), like.coords['y'].copy(), - dict(like.attrs)) + like.coords['x'], like.coords['y'], + extra_coords, dict(like.attrs)) # --------------------------------------------------------------------------- @@ -2128,10 +2136,12 @@ def rasterize( # Extract defaults from template raster like_width = like_height = like_bounds = like_dtype = None like_x_coord = like_y_coord = None + like_extra_coords = {} like_attrs = None + bounds_explicit = bounds is not None if like is not None: (like_width, like_height, like_bounds, like_dtype, - like_x_coord, like_y_coord, like_attrs) = \ + like_x_coord, like_y_coord, like_extra_coords, like_attrs) = \ _extract_grid_from_like(like) # Parse input geometries @@ -2229,15 +2239,20 @@ def rasterize( final_height, final_width, fill, final_dtype, all_touched, merge_fn) - # Build coordinates. When the caller passed ``like``, reuse its - # x/y coordinates directly (bit-identical, ``xr.align`` compatible) - # provided the resolved output grid matches. Otherwise build fresh - # coords from ``bounds``. - if (like_x_coord is not None - and like_x_coord.sizes['x'] == final_width - and like_y_coord.sizes['y'] == final_height - and like_bounds is not None - and final_bounds == like_bounds): + # Build coordinates. When the caller didn't override the grid (no + # explicit width/height/bounds/resolution that resizes the output), + # reuse like.coords directly so the output is bit-identical to the + # template and xr.align keeps working. Float-equal bound comparison + # would be fragile, so key the reuse on size + "bounds weren't + # overridden" instead. + reuse_like_coords = ( + like_x_coord is not None + and like_x_coord.sizes['x'] == final_width + and like_y_coord.sizes['y'] == final_height + and not bounds_explicit + and resolution is None + ) + if reuse_like_coords: x_coords = like_x_coord y_coords = like_y_coord else: @@ -2246,22 +2261,39 @@ def rasterize( x_coords = np.linspace(xmin + px / 2, xmax - px / 2, final_width) y_coords = np.linspace(ymax - py / 2, ymin + py / 2, final_height) - # Build attrs. Start from ``like.attrs`` if given so chained spatial - # pipelines (``slope(rasterize(gdf, like=elevation))``) see the same - # ``res``, ``crs``, ``transform``, etc. as the template. Emit the - # fill value as ``_FillValue`` (and ``nodatavals`` for xrspatial - # internal compatibility) whenever ``fill`` is not NaN -- otherwise - # downstream tools cannot identify which pixels are nodata. - out_attrs = dict(like_attrs) if like_attrs is not None else {} - fill_is_nan = isinstance(fill, float) and np.isnan(fill) + # Build attrs. Start from like.attrs if given so chained spatial + # pipelines (slope(rasterize(gdf, like=elevation))) see the same res, + # crs, transform, etc. as the template. Strip any inherited nodata + # keys first -- the template's old fill value almost certainly + # disagrees with this call's fill, and the geotiff writer's + # _resolve_nodata_attr would otherwise tag pixels with a stale + # sentinel. Then re-emit a consistent triplet keyed off the actual + # fill: nodata (xrspatial's primary key), _FillValue (CF), and + # nodatavals (rioxarray's per-band tuple). + out_attrs = like_attrs if like_attrs is not None else {} + for k in ('nodata', '_FillValue', 'nodatavals'): + out_attrs.pop(k, None) + try: + fill_as_float = float(fill) + fill_is_nan = np.isnan(fill_as_float) + except (TypeError, ValueError): + fill_is_nan = False if not fill_is_nan: + out_attrs['nodata'] = fill out_attrs['_FillValue'] = fill out_attrs['nodatavals'] = (fill,) + # Combine y/x dim coords with any non-dim coords carried from the + # template (e.g. rioxarray's spatial_ref CRS coord). + out_coords = {'y': y_coords, 'x': x_coords} + for k, v in like_extra_coords.items(): + if k not in out_coords: + out_coords[k] = v + return xr.DataArray( out, name=name, dims=['y', 'x'], - coords={'y': y_coords, 'x': x_coords}, + coords=out_coords, attrs=out_attrs, ) diff --git a/xrspatial/tests/test_rasterize.py b/xrspatial/tests/test_rasterize.py index f2ff0e9c3..df89328a8 100644 --- a/xrspatial/tests/test_rasterize.py +++ b/xrspatial/tests/test_rasterize.py @@ -1635,10 +1635,7 @@ def test_like_preserves_coords_bit_identical(self): result = rasterize( [(box(2, 2, 8, 8), 1.0)], like=like, fill=0, ) - # When the caller passed `like`, the output should reuse its - # coords exactly (not rebuild via linspace). Otherwise - # xr.align between the rasterized output and `like` breaks - # silently for round-off-prone bounds. + # Output reuses like.coords exactly so xr.align keeps working. np.testing.assert_array_equal( result.coords['x'].values, like.coords['x'].values) np.testing.assert_array_equal( @@ -1660,15 +1657,25 @@ def test_fill_value_recorded_when_not_nan(self): width=10, height=10, bounds=(0, 0, 10, 10), fill=-9999, dtype=np.int32, ) + # Emit the full triplet: xrspatial's primary (nodata), the CF + # alias (_FillValue), and rioxarray's per-band tuple (nodatavals). + # All three must agree with fill so downstream consumers all + # resolve to the same sentinel regardless of which key they read. + assert result.attrs.get('nodata') == -9999 assert result.attrs.get('_FillValue') == -9999 assert result.attrs.get('nodatavals') == (-9999,) + # Sentinel round-trips cleanly when the array is cast to its + # declared dtype -- pins #1973-style dtype mismatch regressions. + assert np.array(-9999, dtype=result.dtype) == \ + np.array(result.attrs['_FillValue'], dtype=result.dtype) def test_fill_value_omitted_for_nan(self): - """Default fill=NaN should not pollute attrs with _FillValue.""" + """Default fill=NaN should not pollute attrs with nodata keys.""" result = rasterize( [(box(2, 2, 8, 8), 1.0)], width=10, height=10, bounds=(0, 0, 10, 10), ) + assert 'nodata' not in result.attrs assert '_FillValue' not in result.attrs assert 'nodatavals' not in result.attrs @@ -1712,3 +1719,95 @@ def test_like_attrs_propagated_dask_cupy(self): ) assert result.attrs.get('crs') == 'EPSG:32610' assert result.attrs.get('_FillValue') == 0 + + def test_like_stale_nodata_keys_replaced_with_fill(self): + """Inherited nodata keys from like must not outlive the new fill. + + The geotiff writer's _resolve_nodata_attr checks ``nodata`` → + ``nodatavals`` → ``_FillValue``. If a previous round-trip left + any of them on the template, they have to be replaced (or + cleared) so the writer doesn't tag pixels with a stale sentinel + that disagrees with the actual fill in the new array. + """ + like = _make_like(attrs={ + 'nodata': -9999, + '_FillValue': -9999, + 'nodatavals': (-9999,), + 'crs': 'EPSG:32610', + }) + # Case 1: explicit fill replaces all three keys. + result = rasterize( + [(box(2, 2, 8, 8), 1.0)], like=like, fill=0, + ) + assert result.attrs.get('nodata') == 0 + assert result.attrs.get('_FillValue') == 0 + assert result.attrs.get('nodatavals') == (0,) + assert result.attrs.get('crs') == 'EPSG:32610' + # Case 2: NaN fill strips inherited nodata keys outright -- the + # actual unwritten pixels are NaN, not -9999, so advertising + # -9999 would lie to downstream tools. + result = rasterize( + [(box(2, 2, 8, 8), 1.0)], like=like, + ) + assert 'nodata' not in result.attrs + assert '_FillValue' not in result.attrs + assert 'nodatavals' not in result.attrs + assert result.attrs.get('crs') == 'EPSG:32610' + + def test_numpy_float_nan_fill_treated_as_nan(self): + """Numpy scalar NaN must be detected as NaN, not emitted as fill. + + ``isinstance(np.float32(np.nan), float)`` is False, so a naive + ``isinstance(fill, float) and np.isnan(fill)`` check would let a + numpy-typed NaN slip through and land in ``_FillValue``. + """ + result = rasterize( + [(box(2, 2, 8, 8), 1.0)], + width=10, height=10, bounds=(0, 0, 10, 10), + fill=np.float32(np.nan), + ) + assert 'nodata' not in result.attrs + assert '_FillValue' not in result.attrs + assert 'nodatavals' not in result.attrs + + def test_like_non_dim_coords_propagated(self): + """Non-dim coords on like (e.g. rioxarray's spatial_ref) carry over.""" + like = _make_like() + # rioxarray attaches the CRS as a scalar non-dim coord + # called ``spatial_ref``. rasterize must propagate it. + like = like.assign_coords(spatial_ref=0) + like['spatial_ref'].attrs['crs_wkt'] = ( + 'GEOGCS["WGS 84",DATUM["WGS_1984"]]' + ) + result = rasterize( + [(box(2, 2, 8, 8), 1.0)], like=like, fill=0, + ) + assert 'spatial_ref' in result.coords + assert (result.coords['spatial_ref'].attrs.get('crs_wkt') + == 'GEOGCS["WGS 84",DATUM["WGS_1984"]]') + + def test_geotiff_round_trip_preserves_fill(self, tmp_path): + """rasterize → to_geotiff → read → nodata matches. + + The user-visible motivation for #2018: a rasterized output + written to GeoTIFF must round-trip the fill sentinel so masks + and downstream readers identify the right nodata pixels. + """ + pytest.importorskip('tifffile') + from xrspatial.geotiff import to_geotiff, open_geotiff + + result = rasterize( + [(box(2, 2, 8, 8), 1.0)], + width=10, height=10, bounds=(0, 0, 10, 10), + fill=-9999.0, dtype=np.float32, + ) + path = str(tmp_path / 'rasterize_fill_round_trip.tif') + to_geotiff(result, path) + read_back = open_geotiff(path) + # GeoTIFF stores nodata as a scalar; either ``nodata`` or + # ``_FillValue`` should land on the read-back DataArray and + # match the fill we supplied. + nodata = (read_back.attrs.get('nodata') + or read_back.attrs.get('_FillValue')) + assert nodata is not None + assert float(nodata) == -9999.0