diff --git a/CHANGELOG.md b/CHANGELOG.md index 953a4d2d..fe5a3570 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -77,6 +77,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). the broken `3.9.0` sdist under `--resolution lowest-direct`. - `pydantic` minimum raised to `>=2.10` so `--resolution lowest-direct` on Python 3.13 does not resolve to `pydantic-core==2.0.1`, which fails to build. +- Preserve `"datetime": null` and other explicit JSON null values during dehydration and hydration. Previously, `merge_jsonb` and `strip_jsonb` used PostgreSQL's recursive `jsonb_strip_nulls()` which removed null-valued keys at all depths of the JSON tree, producing STAC-invalid items when `start_datetime`/`end_datetime` were used (the STAC spec requires `"datetime": null` to be explicitly present). Replaced `jsonb_strip_nulls()` with a non-recursive filter that only removes SQL NULL entries (sentinel-marked keys and equal-to-base values) without stripping legitimate JSON nulls. ## [v0.9.11] diff --git a/src/pgstac/migrations/pgstac.0.9.11-unreleased.sql b/src/pgstac/migrations/pgstac.0.9.11-unreleased.sql index cddaf7eb..85924099 100644 --- a/src/pgstac/migrations/pgstac.0.9.11-unreleased.sql +++ b/src/pgstac/migrations/pgstac.0.9.11-unreleased.sql @@ -194,6 +194,96 @@ RETURNS timestamptz AS $$ ; $$ LANGUAGE SQL IMMUTABLE STRICT; -- BEGIN migra calculated SQL +set check_function_bodies = off; + +CREATE OR REPLACE FUNCTION pgstac.merge_jsonb(_a jsonb, _b jsonb) + RETURNS jsonb + LANGUAGE sql + IMMUTABLE +AS $function$ + SELECT + CASE + WHEN _a = '"𒍟※"'::jsonb THEN NULL + WHEN _a IS NULL THEN _b + WHEN jsonb_typeof(_a) = 'null' THEN coalesce(_b, 'null'::jsonb) + WHEN jsonb_typeof(_a) = 'object' AND jsonb_typeof(_b) = 'object' THEN + ( + SELECT coalesce(jsonb_object_agg(sub.key, sub.val), '{}'::jsonb) + FROM ( + SELECT key, merge_jsonb(a.value, b.value) AS val + FROM + jsonb_each(coalesce(_a,'{}'::jsonb)) as a + FULL JOIN + jsonb_each(coalesce(_b,'{}'::jsonb)) as b + USING (key) + ) sub + WHERE sub.val IS NOT NULL + ) + WHEN + jsonb_typeof(_a) = 'array' + AND jsonb_typeof(_b) = 'array' + AND jsonb_array_length(_a) = jsonb_array_length(_b) + THEN + ( + SELECT jsonb_agg(m) FROM + ( SELECT + merge_jsonb( + jsonb_array_elements(_a), + jsonb_array_elements(_b) + ) as m + ) as l + ) + ELSE _a + END + ; +$function$ +; + +CREATE OR REPLACE FUNCTION pgstac.strip_jsonb(_a jsonb, _b jsonb) + RETURNS jsonb + LANGUAGE sql + IMMUTABLE +AS $function$ + SELECT + CASE + + WHEN (_a IS NULL OR jsonb_typeof(_a) = 'null') AND _b IS NOT NULL AND jsonb_typeof(_b) != 'null' THEN '"𒍟※"'::jsonb + WHEN _b IS NULL OR jsonb_typeof(_a) = 'null' THEN _a + WHEN _a = _b AND jsonb_typeof(_a) = 'object' THEN '{}'::jsonb + WHEN _a = _b THEN NULL + WHEN jsonb_typeof(_a) = 'object' AND jsonb_typeof(_b) = 'object' THEN + ( + SELECT coalesce(jsonb_object_agg(sub.key, sub.val), '{}'::jsonb) + FROM ( + SELECT key, strip_jsonb(a.value, b.value) AS val + FROM + jsonb_each(_a) as a + FULL JOIN + jsonb_each(_b) as b + USING (key) + ) sub + WHERE sub.val IS NOT NULL + ) + WHEN + jsonb_typeof(_a) = 'array' + AND jsonb_typeof(_b) = 'array' + AND jsonb_array_length(_a) = jsonb_array_length(_b) + THEN + ( + SELECT jsonb_agg(m) FROM + ( SELECT + strip_jsonb( + jsonb_array_elements(_a), + jsonb_array_elements(_b) + ) as m + ) as l + ) + ELSE _a + END + ; +$function$ +; + -- END migra calculated SQL DO $$ BEGIN diff --git a/src/pgstac/migrations/pgstac.unreleased.sql b/src/pgstac/migrations/pgstac.unreleased.sql index 4c836762..ca9e06a7 100644 --- a/src/pgstac/migrations/pgstac.unreleased.sql +++ b/src/pgstac/migrations/pgstac.unreleased.sql @@ -671,21 +671,20 @@ CREATE OR REPLACE FUNCTION merge_jsonb(_a jsonb, _b jsonb) RETURNS jsonb AS $$ SELECT CASE WHEN _a = '"𒍟※"'::jsonb THEN NULL - WHEN _a IS NULL OR jsonb_typeof(_a) = 'null' THEN _b + WHEN _a IS NULL THEN _b + WHEN jsonb_typeof(_a) = 'null' THEN coalesce(_b, 'null'::jsonb) WHEN jsonb_typeof(_a) = 'object' AND jsonb_typeof(_b) = 'object' THEN ( - SELECT - jsonb_strip_nulls( - jsonb_object_agg( - key, - merge_jsonb(a.value, b.value) - ) - ) - FROM - jsonb_each(coalesce(_a,'{}'::jsonb)) as a - FULL JOIN - jsonb_each(coalesce(_b,'{}'::jsonb)) as b - USING (key) + SELECT coalesce(jsonb_object_agg(sub.key, sub.val), '{}'::jsonb) + FROM ( + SELECT key, merge_jsonb(a.value, b.value) AS val + FROM + jsonb_each(coalesce(_a,'{}'::jsonb)) as a + FULL JOIN + jsonb_each(coalesce(_b,'{}'::jsonb)) as b + USING (key) + ) sub + WHERE sub.val IS NOT NULL ) WHEN jsonb_typeof(_a) = 'array' @@ -716,18 +715,16 @@ CREATE OR REPLACE FUNCTION strip_jsonb(_a jsonb, _b jsonb) RETURNS jsonb AS $$ WHEN _a = _b THEN NULL WHEN jsonb_typeof(_a) = 'object' AND jsonb_typeof(_b) = 'object' THEN ( - SELECT - jsonb_strip_nulls( - jsonb_object_agg( - key, - strip_jsonb(a.value, b.value) - ) - ) - FROM - jsonb_each(_a) as a - FULL JOIN - jsonb_each(_b) as b - USING (key) + SELECT coalesce(jsonb_object_agg(sub.key, sub.val), '{}'::jsonb) + FROM ( + SELECT key, strip_jsonb(a.value, b.value) AS val + FROM + jsonb_each(_a) as a + FULL JOIN + jsonb_each(_b) as b + USING (key) + ) sub + WHERE sub.val IS NOT NULL ) WHEN jsonb_typeof(_a) = 'array' diff --git a/src/pgstac/pgstac.sql b/src/pgstac/pgstac.sql index 4c836762..ca9e06a7 100644 --- a/src/pgstac/pgstac.sql +++ b/src/pgstac/pgstac.sql @@ -671,21 +671,20 @@ CREATE OR REPLACE FUNCTION merge_jsonb(_a jsonb, _b jsonb) RETURNS jsonb AS $$ SELECT CASE WHEN _a = '"𒍟※"'::jsonb THEN NULL - WHEN _a IS NULL OR jsonb_typeof(_a) = 'null' THEN _b + WHEN _a IS NULL THEN _b + WHEN jsonb_typeof(_a) = 'null' THEN coalesce(_b, 'null'::jsonb) WHEN jsonb_typeof(_a) = 'object' AND jsonb_typeof(_b) = 'object' THEN ( - SELECT - jsonb_strip_nulls( - jsonb_object_agg( - key, - merge_jsonb(a.value, b.value) - ) - ) - FROM - jsonb_each(coalesce(_a,'{}'::jsonb)) as a - FULL JOIN - jsonb_each(coalesce(_b,'{}'::jsonb)) as b - USING (key) + SELECT coalesce(jsonb_object_agg(sub.key, sub.val), '{}'::jsonb) + FROM ( + SELECT key, merge_jsonb(a.value, b.value) AS val + FROM + jsonb_each(coalesce(_a,'{}'::jsonb)) as a + FULL JOIN + jsonb_each(coalesce(_b,'{}'::jsonb)) as b + USING (key) + ) sub + WHERE sub.val IS NOT NULL ) WHEN jsonb_typeof(_a) = 'array' @@ -716,18 +715,16 @@ CREATE OR REPLACE FUNCTION strip_jsonb(_a jsonb, _b jsonb) RETURNS jsonb AS $$ WHEN _a = _b THEN NULL WHEN jsonb_typeof(_a) = 'object' AND jsonb_typeof(_b) = 'object' THEN ( - SELECT - jsonb_strip_nulls( - jsonb_object_agg( - key, - strip_jsonb(a.value, b.value) - ) - ) - FROM - jsonb_each(_a) as a - FULL JOIN - jsonb_each(_b) as b - USING (key) + SELECT coalesce(jsonb_object_agg(sub.key, sub.val), '{}'::jsonb) + FROM ( + SELECT key, strip_jsonb(a.value, b.value) AS val + FROM + jsonb_each(_a) as a + FULL JOIN + jsonb_each(_b) as b + USING (key) + ) sub + WHERE sub.val IS NOT NULL ) WHEN jsonb_typeof(_a) = 'array' diff --git a/src/pgstac/sql/001a_jsonutils.sql b/src/pgstac/sql/001a_jsonutils.sql index 9fcb2375..4e9d0403 100644 --- a/src/pgstac/sql/001a_jsonutils.sql +++ b/src/pgstac/sql/001a_jsonutils.sql @@ -151,21 +151,20 @@ CREATE OR REPLACE FUNCTION merge_jsonb(_a jsonb, _b jsonb) RETURNS jsonb AS $$ SELECT CASE WHEN _a = '"𒍟※"'::jsonb THEN NULL - WHEN _a IS NULL OR jsonb_typeof(_a) = 'null' THEN _b + WHEN _a IS NULL THEN _b + WHEN jsonb_typeof(_a) = 'null' THEN coalesce(_b, 'null'::jsonb) WHEN jsonb_typeof(_a) = 'object' AND jsonb_typeof(_b) = 'object' THEN ( - SELECT - jsonb_strip_nulls( - jsonb_object_agg( - key, - merge_jsonb(a.value, b.value) - ) - ) - FROM - jsonb_each(coalesce(_a,'{}'::jsonb)) as a - FULL JOIN - jsonb_each(coalesce(_b,'{}'::jsonb)) as b - USING (key) + SELECT coalesce(jsonb_object_agg(sub.key, sub.val), '{}'::jsonb) + FROM ( + SELECT key, merge_jsonb(a.value, b.value) AS val + FROM + jsonb_each(coalesce(_a,'{}'::jsonb)) as a + FULL JOIN + jsonb_each(coalesce(_b,'{}'::jsonb)) as b + USING (key) + ) sub + WHERE sub.val IS NOT NULL ) WHEN jsonb_typeof(_a) = 'array' @@ -196,18 +195,16 @@ CREATE OR REPLACE FUNCTION strip_jsonb(_a jsonb, _b jsonb) RETURNS jsonb AS $$ WHEN _a = _b THEN NULL WHEN jsonb_typeof(_a) = 'object' AND jsonb_typeof(_b) = 'object' THEN ( - SELECT - jsonb_strip_nulls( - jsonb_object_agg( - key, - strip_jsonb(a.value, b.value) - ) - ) - FROM - jsonb_each(_a) as a - FULL JOIN - jsonb_each(_b) as b - USING (key) + SELECT coalesce(jsonb_object_agg(sub.key, sub.val), '{}'::jsonb) + FROM ( + SELECT key, strip_jsonb(a.value, b.value) AS val + FROM + jsonb_each(_a) as a + FULL JOIN + jsonb_each(_b) as b + USING (key) + ) sub + WHERE sub.val IS NOT NULL ) WHEN jsonb_typeof(_a) = 'array' diff --git a/src/pypgstac/tests/hydration/test_dehydrate.py b/src/pypgstac/tests/hydration/test_dehydrate.py index a41efa3f..080258ca 100644 --- a/src/pypgstac/tests/hydration/test_dehydrate.py +++ b/src/pypgstac/tests/hydration/test_dehydrate.py @@ -243,3 +243,40 @@ def test_top_level_base_keys_marked(self) -> None: "triple": DO_NOT_MERGE_MARKER, "unique": "value", } + + def test_null_datetime_preserved_in_dehydration(self) -> None: + """Test that datetime: null survives dehydration. + + Per the STAC spec, items with start_datetime/end_datetime must have + datetime explicitly set to null. Dehydration must not strip this null. + """ + base_item = { + "type": "Feature", + "stac_version": "1.1.0", + "collection": "test-collection", + } + item = { + "type": "Feature", + "stac_version": "1.1.0", + "collection": "test-collection", + "properties": { + "datetime": None, + "start_datetime": "2024-01-01T00:00:00Z", + "end_datetime": "2024-01-02T00:00:00Z", + }, + } + dehydrated = self.dehydrate(base_item, item) + assert "properties" in dehydrated + assert "datetime" in dehydrated["properties"], ( + "datetime key must survive dehydration" + ) + assert dehydrated["properties"]["datetime"] is None, ( + "datetime must be null after dehydration, not absent" + ) + + def test_nested_null_values_preserved_in_dehydration(self) -> None: + """Test that null values in nested objects survive dehydration.""" + base_item = {"a": "first"} + item = {"a": "first", "b": {"c": None, "d": "value"}} + dehydrated = self.dehydrate(base_item, item) + assert dehydrated == {"b": {"c": None, "d": "value"}} diff --git a/src/pypgstac/tests/hydration/test_hydrate.py b/src/pypgstac/tests/hydration/test_hydrate.py index efa49567..3ba17147 100644 --- a/src/pypgstac/tests/hydration/test_hydrate.py +++ b/src/pypgstac/tests/hydration/test_hydrate.py @@ -243,3 +243,44 @@ def test_base_none(self) -> None: dehydrated = {"value": {"a": "b"}} hydrated = self.hydrate(base_item, dehydrated) assert hydrated == {"value": {"a": "b"}} + + def test_null_datetime_preserved(self) -> None: + """Test that datetime: null is preserved during hydration. + + Per the STAC spec, items with start_datetime/end_datetime must have + datetime explicitly set to null. This null must survive the + dehydration/hydration round-trip. + """ + base_item = { + "type": "Feature", + "stac_version": "1.1.0", + "collection": "test-collection", + } + dehydrated = { + "properties": { + "datetime": None, + "start_datetime": "2024-01-01T00:00:00Z", + "end_datetime": "2024-01-02T00:00:00Z", + }, + } + hydrated = self.hydrate(base_item, dehydrated) + assert "datetime" in hydrated["properties"], ( + "datetime key must be present in hydrated properties" + ) + assert hydrated["properties"]["datetime"] is None, ( + "datetime must be null, not absent" + ) + + def test_nested_null_values_preserved(self) -> None: + """Test that null values inside nested objects are preserved.""" + base_item = {"a": "first"} + dehydrated = {"b": {"c": None, "d": "value"}} + hydrated = self.hydrate(base_item, dehydrated) + assert hydrated == {"a": "first", "b": {"c": None, "d": "value"}} + + def test_null_overrides_base_value(self) -> None: + """Test that a sentinel-encoded null overrides a base item value.""" + base_item = {"a": "first", "b": "second"} + dehydrated = {"b": DO_NOT_MERGE_MARKER} + hydrated = self.hydrate(base_item, dehydrated) + assert "b" not in hydrated