diff --git a/CHANGELOG.md b/CHANGELOG.md index 3266f14c..d62475f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,17 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] ### Added + +- Add deterministic SHA-256 `content_hash` to STAC items to track data changes across migrations. +- Add `pgstac_updated_at` column to items table as part of separating STAC property updates from database metadata updates. + +### Changed + +- Replaced expensive row-based trigger for item inserts with optimized SQL/PLPGSQL hydration strategies to improve ingestion throughput. +- Update pypgstac loaders to dynamically generate hashes during ingestion where required, avoiding trigger recalculation. +- Add tombstone table `items_deleted_log` and `pgstac_updated_at` metadata column to items table. +- Add batched tombstone GC routines: `gc_deleted_items_log_batch(interval, integer)`, overloaded `gc_deleted_items_log(interval, integer)`, and `gc_deleted_items_log_committed(interval, integer)` for commit-per-batch cleanup of large tombstone backlogs. +- Add PGTap coverage for batched tombstone GC signatures/behavior and read-only rejection paths. - New `pgstac-migrate` package under `src/pgstac-migrate/` with a standalone CLI, Python API, and tests for migration planning and execution. - New Rust crate under `src/pgstac-rs/` with updated CI/release wiring, diff --git a/src/pgstac/migrations/pgstac--0.9.11--unreleased.sql b/src/pgstac/migrations/pgstac--0.9.11--unreleased.sql index db90a279..e5733646 100644 --- a/src/pgstac/migrations/pgstac--0.9.11--unreleased.sql +++ b/src/pgstac/migrations/pgstac--0.9.11--unreleased.sql @@ -214,6 +214,22 @@ drop index if exists "pgstac"."search_wheres_where"; drop table "pgstac"."search_wheres"; +create table "pgstac"."items_deleted_log" ( + "id" bigint generated always as identity not null, + "item_id" text not null, + "collection" text not null, + "partition" text, + "datetime" timestamp with time zone, + "end_datetime" timestamp with time zone, + "content_hash" text not null default ''::text, + "deleted_at" timestamp with time zone not null default now() +); + + +alter table "pgstac"."items" add column "content_hash" text not null default ''::text; + +alter table "pgstac"."items" add column "pgstac_updated_at" timestamp with time zone not null default now(); + alter table "pgstac"."searches" add column "context_count" bigint; alter table "pgstac"."searches" add column "created_at" timestamp with time zone default now(); @@ -226,10 +242,16 @@ alter table "pgstac"."searches" add column "statslastupdated" timestamp with tim alter table "pgstac"."searches" alter column "hash" drop expression; +CREATE INDEX items_deleted_log_deleted_at_idx ON pgstac.items_deleted_log USING btree (deleted_at); + +CREATE UNIQUE INDEX items_deleted_log_pkey ON pgstac.items_deleted_log USING btree (id); + CREATE INDEX searches_lastused_anon_idx ON pgstac.searches USING btree (lastused) WHERE ((name IS NULL) AND (NOT pinned)); CREATE UNIQUE INDEX searches_name_key ON pgstac.searches USING btree (name); +alter table "pgstac"."items_deleted_log" add constraint "items_deleted_log_pkey" PRIMARY KEY using index "items_deleted_log_pkey"; + alter table "pgstac"."searches" add constraint "searches_name_key" UNIQUE using index "searches_name_key"; set check_function_bodies = off; @@ -258,6 +280,78 @@ AS $function$ $function$ ; +CREATE OR REPLACE FUNCTION pgstac.gc_deleted_items_log(retention_interval interval DEFAULT '30 days'::interval) + RETURNS bigint + LANGUAGE sql + SECURITY DEFINER +AS $function$ + SELECT gc_deleted_items_log(retention_interval, 10000); +$function$ +; + +CREATE OR REPLACE FUNCTION pgstac.gc_deleted_items_log(retention_interval interval, batch_limit integer) + RETURNS bigint + LANGUAGE plpgsql + SECURITY DEFINER +AS $function$ +DECLARE + deleted_count bigint := 0; + batch_deleted bigint; +BEGIN + LOOP + batch_deleted := gc_deleted_items_log_batch(retention_interval, batch_limit); + deleted_count := deleted_count + batch_deleted; + EXIT WHEN batch_deleted = 0; + END LOOP; + + RETURN deleted_count; +END; +$function$ +; + +CREATE OR REPLACE FUNCTION pgstac.gc_deleted_items_log_batch(retention_interval interval DEFAULT '30 days'::interval, batch_limit integer DEFAULT 10000) + RETURNS bigint + LANGUAGE plpgsql + SECURITY DEFINER +AS $function$ +DECLARE + batch_deleted bigint; +BEGIN + WITH to_delete AS ( + SELECT ctid + FROM items_deleted_log + WHERE deleted_at < now() - retention_interval + ORDER BY deleted_at + LIMIT GREATEST(COALESCE(batch_limit, 10000), 1) + ), + deleted AS ( + DELETE FROM items_deleted_log d + USING to_delete td + WHERE d.ctid = td.ctid + RETURNING 1 + ) + SELECT count(*)::bigint INTO batch_deleted FROM deleted; + + RETURN batch_deleted; +END; +$function$ +; + +CREATE OR REPLACE PROCEDURE pgstac.gc_deleted_items_log_committed(IN retention_interval interval DEFAULT '30 days'::interval, IN batch_limit integer DEFAULT 10000) + LANGUAGE plpgsql +AS $procedure$ +DECLARE + batch_deleted bigint; +BEGIN + LOOP + batch_deleted := gc_deleted_items_log_batch(retention_interval, batch_limit); + EXIT WHEN batch_deleted = 0; + COMMIT; + END LOOP; +END; +$procedure$ +; + CREATE OR REPLACE FUNCTION pgstac.gc_search_caches(retention_interval interval DEFAULT NULL::interval, conf jsonb DEFAULT NULL::jsonb) RETURNS jsonb LANGUAGE sql @@ -270,6 +364,47 @@ AS $function$ $function$ ; +CREATE OR REPLACE FUNCTION pgstac.items_delete_log_trigger() + RETURNS trigger + LANGUAGE plpgsql + SECURITY DEFINER +AS $function$ +BEGIN + INSERT INTO items_deleted_log ( + item_id, + collection, + partition, + datetime, + end_datetime, + content_hash + ) + SELECT + old_rows.id, + old_rows.collection, + (partition_name(old_rows.collection, old_rows.datetime)).partition_name, + old_rows.datetime, + old_rows.end_datetime, + old_rows.content_hash + FROM old_rows; + + RETURN NULL; +END; +$function$ +; + +CREATE OR REPLACE FUNCTION pgstac.items_touch_triggerfunc() + RETURNS trigger + LANGUAGE plpgsql + SECURITY DEFINER +AS $function$ +BEGIN + NEW.pgstac_updated_at := now(); + NEW.content_hash := encode(sha256(content_hydrate(NEW)::text::bytea), 'hex'); + RETURN NEW; +END; +$function$ +; + CREATE OR REPLACE FUNCTION pgstac.name_search(_search jsonb, _name text, _metadata jsonb DEFAULT '{}'::jsonb) RETURNS searches LANGUAGE plpgsql @@ -674,21 +809,55 @@ $function$ CREATE OR REPLACE FUNCTION pgstac.content_dehydrate(content jsonb) RETURNS items - LANGUAGE sql + LANGUAGE plpgsql STABLE AS $function$ - SELECT - content->>'id' as id, - stac_geom(content) as geometry, - content->>'collection' as collection, - stac_datetime(content) as datetime, - stac_end_datetime(content) as end_datetime, - strip_jsonb( - content - '{id,geometry,collection,type}'::text[], - collection_base_item(content->>'collection') - ) - '{id,geometry,collection,type}'::text[] as content, - null::jsonb as private - ; +DECLARE + out items; +BEGIN + out.id := content->>'id'; + out.geometry := stac_geom(content); + out.collection := content->>'collection'; + out.datetime := stac_datetime(content); + out.end_datetime := stac_end_datetime(content); + out.pgstac_updated_at := now(); + out.content_hash := encode(sha256(content::text::bytea), 'hex'); + out.content := strip_jsonb( + content - '{id,geometry,collection,type}'::text[], + collection_base_item(content->>'collection') + ) - '{id,geometry,collection,type}'::text[]; + out.private := null; + RETURN out; +END; +$function$ +; + +CREATE TRIGGER items_before_update_trigger BEFORE UPDATE ON pgstac.items FOR EACH ROW EXECUTE FUNCTION items_touch_triggerfunc(); + +CREATE TRIGGER items_delete_log_after_delete_trigger AFTER DELETE ON pgstac.items REFERENCING OLD TABLE AS old_rows FOR EACH STATEMENT EXECUTE FUNCTION items_delete_log_trigger(); + +CREATE OR REPLACE FUNCTION pgstac.content_dehydrate(content jsonb) + RETURNS items + LANGUAGE plpgsql + STABLE +AS $function$ +DECLARE + out items; +BEGIN + out.id := content->>'id'; + out.geometry := stac_geom(content); + out.collection := content->>'collection'; + out.datetime := stac_datetime(content); + out.end_datetime := stac_end_datetime(content); + out.pgstac_updated_at := now(); + out.content_hash := encode(sha256(content::text::bytea), 'hex'); + out.content := strip_jsonb( + content - '{id,geometry,collection,type}'::text[], + collection_base_item(content->>'collection') + ) - '{id,geometry,collection,type}'::text[]; + out.private := null; + RETURN out; +END; $function$ ; @@ -1075,6 +1244,9 @@ ALTER FUNCTION pin_search SECURITY DEFINER; ALTER FUNCTION unpin_search SECURITY DEFINER; ALTER FUNCTION gc_anonymous_searches(interval, jsonb) SECURITY DEFINER; ALTER FUNCTION gc_search_caches(interval, jsonb) SECURITY DEFINER; +ALTER FUNCTION gc_deleted_items_log_batch(interval, integer) SECURITY DEFINER; +ALTER FUNCTION gc_deleted_items_log(interval, integer) SECURITY DEFINER; +ALTER FUNCTION gc_deleted_items_log(interval) SECURITY DEFINER; ALTER FUNCTION format_item SECURITY DEFINER; ALTER FUNCTION maintain_index SECURITY DEFINER; @@ -1100,6 +1272,9 @@ GRANT ALL ON PROCEDURE run_queued_queries TO pgstac_admin; REVOKE ALL PRIVILEGES ON FUNCTION run_queued_queries_intransaction FROM public; GRANT ALL ON FUNCTION run_queued_queries_intransaction TO pgstac_admin; +REVOKE ALL PRIVILEGES ON PROCEDURE gc_deleted_items_log_committed(interval, integer) FROM public; +GRANT ALL ON PROCEDURE gc_deleted_items_log_committed(interval, integer) TO pgstac_admin; + RESET ROLE; SET ROLE pgstac_ingest; diff --git a/src/pgstac/migrations/pgstac--unreleased.sql b/src/pgstac/migrations/pgstac--unreleased.sql index 60426185..522595a7 100644 --- a/src/pgstac/migrations/pgstac--unreleased.sql +++ b/src/pgstac/migrations/pgstac--unreleased.sql @@ -2070,12 +2070,26 @@ CREATE TABLE items ( collection text NOT NULL, datetime timestamptz NOT NULL, end_datetime timestamptz NOT NULL, + pgstac_updated_at timestamptz NOT NULL DEFAULT now(), + content_hash text NOT NULL DEFAULT '', content JSONB NOT NULL, private jsonb ) PARTITION BY LIST (collection) ; +CREATE TABLE IF NOT EXISTS items_deleted_log ( + id bigint GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + item_id text NOT NULL, + collection text NOT NULL, + partition text, + datetime timestamptz, + end_datetime timestamptz, + content_hash text NOT NULL DEFAULT '', + deleted_at timestamptz NOT NULL DEFAULT now() +); +CREATE INDEX IF NOT EXISTS items_deleted_log_deleted_at_idx ON items_deleted_log (deleted_at); + CREATE INDEX "datetime_idx" ON items USING BTREE (datetime DESC, end_datetime ASC); CREATE INDEX "geometry_idx" ON items USING GIST (geometry); @@ -2121,21 +2135,70 @@ REFERENCING NEW TABLE AS newdata FOR EACH STATEMENT EXECUTE FUNCTION partition_after_triggerfunc(); +CREATE OR REPLACE FUNCTION items_touch_triggerfunc() RETURNS TRIGGER AS $$ +BEGIN + NEW.pgstac_updated_at := now(); + NEW.content_hash := encode(sha256(content_hydrate(NEW)::text::bytea), 'hex'); + RETURN NEW; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; -CREATE OR REPLACE FUNCTION content_dehydrate(content jsonb) RETURNS items AS $$ +DROP TRIGGER IF EXISTS items_before_upsert_trigger ON items; +DROP TRIGGER IF EXISTS items_before_update_trigger ON items; +CREATE TRIGGER items_before_update_trigger +BEFORE UPDATE ON items +FOR EACH ROW +EXECUTE FUNCTION items_touch_triggerfunc(); + +CREATE OR REPLACE FUNCTION items_delete_log_trigger() RETURNS TRIGGER AS $$ +BEGIN + INSERT INTO items_deleted_log ( + item_id, + collection, + partition, + datetime, + end_datetime, + content_hash + ) SELECT - content->>'id' as id, - stac_geom(content) as geometry, - content->>'collection' as collection, - stac_datetime(content) as datetime, - stac_end_datetime(content) as end_datetime, - strip_jsonb( - content - '{id,geometry,collection,type}'::text[], - collection_base_item(content->>'collection') - ) - '{id,geometry,collection,type}'::text[] as content, - null::jsonb as private - ; -$$ LANGUAGE SQL STABLE; + old_rows.id, + old_rows.collection, + (partition_name(old_rows.collection, old_rows.datetime)).partition_name, + old_rows.datetime, + old_rows.end_datetime, + old_rows.content_hash + FROM old_rows; + + RETURN NULL; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +DROP TRIGGER IF EXISTS items_delete_log_after_delete_trigger ON items; +CREATE TRIGGER items_delete_log_after_delete_trigger + AFTER DELETE ON items + REFERENCING OLD TABLE AS old_rows + FOR EACH STATEMENT EXECUTE FUNCTION items_delete_log_trigger(); + + +CREATE OR REPLACE FUNCTION content_dehydrate(content jsonb) RETURNS items AS $$ +DECLARE + out items; +BEGIN + out.id := content->>'id'; + out.geometry := stac_geom(content); + out.collection := content->>'collection'; + out.datetime := stac_datetime(content); + out.end_datetime := stac_end_datetime(content); + out.pgstac_updated_at := now(); + out.content_hash := encode(sha256(content::text::bytea), 'hex'); + out.content := strip_jsonb( + content - '{id,geometry,collection,type}'::text[], + collection_base_item(content->>'collection') + ) - '{id,geometry,collection,type}'::text[]; + out.private := null; + RETURN out; +END; +$$ LANGUAGE PLPGSQL STABLE; CREATE OR REPLACE FUNCTION include_field(f text, fields jsonb DEFAULT '{}'::jsonb) RETURNS boolean AS $$ DECLARE @@ -4726,6 +4789,69 @@ BEGIN RETURN NULL; END; $$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION gc_deleted_items_log_batch( + retention_interval interval DEFAULT '30 days', + batch_limit integer DEFAULT 10000 +) RETURNS bigint AS $$ +DECLARE + batch_deleted bigint; +BEGIN + WITH to_delete AS ( + SELECT ctid + FROM items_deleted_log + WHERE deleted_at < now() - retention_interval + ORDER BY deleted_at + LIMIT GREATEST(COALESCE(batch_limit, 10000), 1) + ), + deleted AS ( + DELETE FROM items_deleted_log d + USING to_delete td + WHERE d.ctid = td.ctid + RETURNING 1 + ) + SELECT count(*)::bigint INTO batch_deleted FROM deleted; + + RETURN batch_deleted; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +CREATE OR REPLACE FUNCTION gc_deleted_items_log( + retention_interval interval, + batch_limit integer +) RETURNS bigint AS $$ +DECLARE + deleted_count bigint := 0; + batch_deleted bigint; +BEGIN + LOOP + batch_deleted := gc_deleted_items_log_batch(retention_interval, batch_limit); + deleted_count := deleted_count + batch_deleted; + EXIT WHEN batch_deleted = 0; + END LOOP; + + RETURN deleted_count; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +CREATE OR REPLACE FUNCTION gc_deleted_items_log(retention_interval interval DEFAULT '30 days') RETURNS bigint AS $$ + SELECT gc_deleted_items_log(retention_interval, 10000); +$$ LANGUAGE SQL SECURITY DEFINER; + +CREATE OR REPLACE PROCEDURE gc_deleted_items_log_committed( + retention_interval interval DEFAULT '30 days', + batch_limit integer DEFAULT 10000 +) AS $$ +DECLARE + batch_deleted bigint; +BEGIN + LOOP + batch_deleted := gc_deleted_items_log_batch(retention_interval, batch_limit); + EXIT WHEN batch_deleted = 0; + COMMIT; + END LOOP; +END; +$$ LANGUAGE PLPGSQL; -- END FRAGMENT: 997_maintenance.sql -- BEGIN FRAGMENT: 998_idempotent_post.sql @@ -4834,6 +4960,9 @@ ALTER FUNCTION pin_search SECURITY DEFINER; ALTER FUNCTION unpin_search SECURITY DEFINER; ALTER FUNCTION gc_anonymous_searches(interval, jsonb) SECURITY DEFINER; ALTER FUNCTION gc_search_caches(interval, jsonb) SECURITY DEFINER; +ALTER FUNCTION gc_deleted_items_log_batch(interval, integer) SECURITY DEFINER; +ALTER FUNCTION gc_deleted_items_log(interval, integer) SECURITY DEFINER; +ALTER FUNCTION gc_deleted_items_log(interval) SECURITY DEFINER; ALTER FUNCTION format_item SECURITY DEFINER; ALTER FUNCTION maintain_index SECURITY DEFINER; @@ -4859,6 +4988,9 @@ GRANT ALL ON PROCEDURE run_queued_queries TO pgstac_admin; REVOKE ALL PRIVILEGES ON FUNCTION run_queued_queries_intransaction FROM public; GRANT ALL ON FUNCTION run_queued_queries_intransaction TO pgstac_admin; +REVOKE ALL PRIVILEGES ON PROCEDURE gc_deleted_items_log_committed(interval, integer) FROM public; +GRANT ALL ON PROCEDURE gc_deleted_items_log_committed(interval, integer) TO pgstac_admin; + RESET ROLE; SET ROLE pgstac_ingest; diff --git a/src/pgstac/pgstac.sql b/src/pgstac/pgstac.sql index 60426185..522595a7 100644 --- a/src/pgstac/pgstac.sql +++ b/src/pgstac/pgstac.sql @@ -2070,12 +2070,26 @@ CREATE TABLE items ( collection text NOT NULL, datetime timestamptz NOT NULL, end_datetime timestamptz NOT NULL, + pgstac_updated_at timestamptz NOT NULL DEFAULT now(), + content_hash text NOT NULL DEFAULT '', content JSONB NOT NULL, private jsonb ) PARTITION BY LIST (collection) ; +CREATE TABLE IF NOT EXISTS items_deleted_log ( + id bigint GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + item_id text NOT NULL, + collection text NOT NULL, + partition text, + datetime timestamptz, + end_datetime timestamptz, + content_hash text NOT NULL DEFAULT '', + deleted_at timestamptz NOT NULL DEFAULT now() +); +CREATE INDEX IF NOT EXISTS items_deleted_log_deleted_at_idx ON items_deleted_log (deleted_at); + CREATE INDEX "datetime_idx" ON items USING BTREE (datetime DESC, end_datetime ASC); CREATE INDEX "geometry_idx" ON items USING GIST (geometry); @@ -2121,21 +2135,70 @@ REFERENCING NEW TABLE AS newdata FOR EACH STATEMENT EXECUTE FUNCTION partition_after_triggerfunc(); +CREATE OR REPLACE FUNCTION items_touch_triggerfunc() RETURNS TRIGGER AS $$ +BEGIN + NEW.pgstac_updated_at := now(); + NEW.content_hash := encode(sha256(content_hydrate(NEW)::text::bytea), 'hex'); + RETURN NEW; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; -CREATE OR REPLACE FUNCTION content_dehydrate(content jsonb) RETURNS items AS $$ +DROP TRIGGER IF EXISTS items_before_upsert_trigger ON items; +DROP TRIGGER IF EXISTS items_before_update_trigger ON items; +CREATE TRIGGER items_before_update_trigger +BEFORE UPDATE ON items +FOR EACH ROW +EXECUTE FUNCTION items_touch_triggerfunc(); + +CREATE OR REPLACE FUNCTION items_delete_log_trigger() RETURNS TRIGGER AS $$ +BEGIN + INSERT INTO items_deleted_log ( + item_id, + collection, + partition, + datetime, + end_datetime, + content_hash + ) SELECT - content->>'id' as id, - stac_geom(content) as geometry, - content->>'collection' as collection, - stac_datetime(content) as datetime, - stac_end_datetime(content) as end_datetime, - strip_jsonb( - content - '{id,geometry,collection,type}'::text[], - collection_base_item(content->>'collection') - ) - '{id,geometry,collection,type}'::text[] as content, - null::jsonb as private - ; -$$ LANGUAGE SQL STABLE; + old_rows.id, + old_rows.collection, + (partition_name(old_rows.collection, old_rows.datetime)).partition_name, + old_rows.datetime, + old_rows.end_datetime, + old_rows.content_hash + FROM old_rows; + + RETURN NULL; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +DROP TRIGGER IF EXISTS items_delete_log_after_delete_trigger ON items; +CREATE TRIGGER items_delete_log_after_delete_trigger + AFTER DELETE ON items + REFERENCING OLD TABLE AS old_rows + FOR EACH STATEMENT EXECUTE FUNCTION items_delete_log_trigger(); + + +CREATE OR REPLACE FUNCTION content_dehydrate(content jsonb) RETURNS items AS $$ +DECLARE + out items; +BEGIN + out.id := content->>'id'; + out.geometry := stac_geom(content); + out.collection := content->>'collection'; + out.datetime := stac_datetime(content); + out.end_datetime := stac_end_datetime(content); + out.pgstac_updated_at := now(); + out.content_hash := encode(sha256(content::text::bytea), 'hex'); + out.content := strip_jsonb( + content - '{id,geometry,collection,type}'::text[], + collection_base_item(content->>'collection') + ) - '{id,geometry,collection,type}'::text[]; + out.private := null; + RETURN out; +END; +$$ LANGUAGE PLPGSQL STABLE; CREATE OR REPLACE FUNCTION include_field(f text, fields jsonb DEFAULT '{}'::jsonb) RETURNS boolean AS $$ DECLARE @@ -4726,6 +4789,69 @@ BEGIN RETURN NULL; END; $$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION gc_deleted_items_log_batch( + retention_interval interval DEFAULT '30 days', + batch_limit integer DEFAULT 10000 +) RETURNS bigint AS $$ +DECLARE + batch_deleted bigint; +BEGIN + WITH to_delete AS ( + SELECT ctid + FROM items_deleted_log + WHERE deleted_at < now() - retention_interval + ORDER BY deleted_at + LIMIT GREATEST(COALESCE(batch_limit, 10000), 1) + ), + deleted AS ( + DELETE FROM items_deleted_log d + USING to_delete td + WHERE d.ctid = td.ctid + RETURNING 1 + ) + SELECT count(*)::bigint INTO batch_deleted FROM deleted; + + RETURN batch_deleted; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +CREATE OR REPLACE FUNCTION gc_deleted_items_log( + retention_interval interval, + batch_limit integer +) RETURNS bigint AS $$ +DECLARE + deleted_count bigint := 0; + batch_deleted bigint; +BEGIN + LOOP + batch_deleted := gc_deleted_items_log_batch(retention_interval, batch_limit); + deleted_count := deleted_count + batch_deleted; + EXIT WHEN batch_deleted = 0; + END LOOP; + + RETURN deleted_count; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +CREATE OR REPLACE FUNCTION gc_deleted_items_log(retention_interval interval DEFAULT '30 days') RETURNS bigint AS $$ + SELECT gc_deleted_items_log(retention_interval, 10000); +$$ LANGUAGE SQL SECURITY DEFINER; + +CREATE OR REPLACE PROCEDURE gc_deleted_items_log_committed( + retention_interval interval DEFAULT '30 days', + batch_limit integer DEFAULT 10000 +) AS $$ +DECLARE + batch_deleted bigint; +BEGIN + LOOP + batch_deleted := gc_deleted_items_log_batch(retention_interval, batch_limit); + EXIT WHEN batch_deleted = 0; + COMMIT; + END LOOP; +END; +$$ LANGUAGE PLPGSQL; -- END FRAGMENT: 997_maintenance.sql -- BEGIN FRAGMENT: 998_idempotent_post.sql @@ -4834,6 +4960,9 @@ ALTER FUNCTION pin_search SECURITY DEFINER; ALTER FUNCTION unpin_search SECURITY DEFINER; ALTER FUNCTION gc_anonymous_searches(interval, jsonb) SECURITY DEFINER; ALTER FUNCTION gc_search_caches(interval, jsonb) SECURITY DEFINER; +ALTER FUNCTION gc_deleted_items_log_batch(interval, integer) SECURITY DEFINER; +ALTER FUNCTION gc_deleted_items_log(interval, integer) SECURITY DEFINER; +ALTER FUNCTION gc_deleted_items_log(interval) SECURITY DEFINER; ALTER FUNCTION format_item SECURITY DEFINER; ALTER FUNCTION maintain_index SECURITY DEFINER; @@ -4859,6 +4988,9 @@ GRANT ALL ON PROCEDURE run_queued_queries TO pgstac_admin; REVOKE ALL PRIVILEGES ON FUNCTION run_queued_queries_intransaction FROM public; GRANT ALL ON FUNCTION run_queued_queries_intransaction TO pgstac_admin; +REVOKE ALL PRIVILEGES ON PROCEDURE gc_deleted_items_log_committed(interval, integer) FROM public; +GRANT ALL ON PROCEDURE gc_deleted_items_log_committed(interval, integer) TO pgstac_admin; + RESET ROLE; SET ROLE pgstac_ingest; diff --git a/src/pgstac/sql/003a_items.sql b/src/pgstac/sql/003a_items.sql index d1a3e7b2..b252e9cf 100644 --- a/src/pgstac/sql/003a_items.sql +++ b/src/pgstac/sql/003a_items.sql @@ -4,12 +4,26 @@ CREATE TABLE items ( collection text NOT NULL, datetime timestamptz NOT NULL, end_datetime timestamptz NOT NULL, + pgstac_updated_at timestamptz NOT NULL DEFAULT now(), + content_hash text NOT NULL DEFAULT '', content JSONB NOT NULL, private jsonb ) PARTITION BY LIST (collection) ; +CREATE TABLE IF NOT EXISTS items_deleted_log ( + id bigint GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + item_id text NOT NULL, + collection text NOT NULL, + partition text, + datetime timestamptz, + end_datetime timestamptz, + content_hash text NOT NULL DEFAULT '', + deleted_at timestamptz NOT NULL DEFAULT now() +); +CREATE INDEX IF NOT EXISTS items_deleted_log_deleted_at_idx ON items_deleted_log (deleted_at); + CREATE INDEX "datetime_idx" ON items USING BTREE (datetime DESC, end_datetime ASC); CREATE INDEX "geometry_idx" ON items USING GIST (geometry); @@ -55,21 +69,70 @@ REFERENCING NEW TABLE AS newdata FOR EACH STATEMENT EXECUTE FUNCTION partition_after_triggerfunc(); +CREATE OR REPLACE FUNCTION items_touch_triggerfunc() RETURNS TRIGGER AS $$ +BEGIN + NEW.pgstac_updated_at := now(); + NEW.content_hash := encode(sha256(content_hydrate(NEW)::text::bytea), 'hex'); + RETURN NEW; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +DROP TRIGGER IF EXISTS items_before_upsert_trigger ON items; +DROP TRIGGER IF EXISTS items_before_update_trigger ON items; +CREATE TRIGGER items_before_update_trigger +BEFORE UPDATE ON items +FOR EACH ROW +EXECUTE FUNCTION items_touch_triggerfunc(); -CREATE OR REPLACE FUNCTION content_dehydrate(content jsonb) RETURNS items AS $$ +CREATE OR REPLACE FUNCTION items_delete_log_trigger() RETURNS TRIGGER AS $$ +BEGIN + INSERT INTO items_deleted_log ( + item_id, + collection, + partition, + datetime, + end_datetime, + content_hash + ) SELECT - content->>'id' as id, - stac_geom(content) as geometry, - content->>'collection' as collection, - stac_datetime(content) as datetime, - stac_end_datetime(content) as end_datetime, - strip_jsonb( - content - '{id,geometry,collection,type}'::text[], - collection_base_item(content->>'collection') - ) - '{id,geometry,collection,type}'::text[] as content, - null::jsonb as private - ; -$$ LANGUAGE SQL STABLE; + old_rows.id, + old_rows.collection, + (partition_name(old_rows.collection, old_rows.datetime)).partition_name, + old_rows.datetime, + old_rows.end_datetime, + old_rows.content_hash + FROM old_rows; + + RETURN NULL; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +DROP TRIGGER IF EXISTS items_delete_log_after_delete_trigger ON items; +CREATE TRIGGER items_delete_log_after_delete_trigger + AFTER DELETE ON items + REFERENCING OLD TABLE AS old_rows + FOR EACH STATEMENT EXECUTE FUNCTION items_delete_log_trigger(); + + +CREATE OR REPLACE FUNCTION content_dehydrate(content jsonb) RETURNS items AS $$ +DECLARE + out items; +BEGIN + out.id := content->>'id'; + out.geometry := stac_geom(content); + out.collection := content->>'collection'; + out.datetime := stac_datetime(content); + out.end_datetime := stac_end_datetime(content); + out.pgstac_updated_at := now(); + out.content_hash := encode(sha256(content::text::bytea), 'hex'); + out.content := strip_jsonb( + content - '{id,geometry,collection,type}'::text[], + collection_base_item(content->>'collection') + ) - '{id,geometry,collection,type}'::text[]; + out.private := null; + RETURN out; +END; +$$ LANGUAGE PLPGSQL STABLE; CREATE OR REPLACE FUNCTION include_field(f text, fields jsonb DEFAULT '{}'::jsonb) RETURNS boolean AS $$ DECLARE diff --git a/src/pgstac/sql/997_maintenance.sql b/src/pgstac/sql/997_maintenance.sql index df1175d2..bf758424 100644 --- a/src/pgstac/sql/997_maintenance.sql +++ b/src/pgstac/sql/997_maintenance.sql @@ -85,3 +85,66 @@ BEGIN RETURN NULL; END; $$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION gc_deleted_items_log_batch( + retention_interval interval DEFAULT '30 days', + batch_limit integer DEFAULT 10000 +) RETURNS bigint AS $$ +DECLARE + batch_deleted bigint; +BEGIN + WITH to_delete AS ( + SELECT ctid + FROM items_deleted_log + WHERE deleted_at < now() - retention_interval + ORDER BY deleted_at + LIMIT GREATEST(COALESCE(batch_limit, 10000), 1) + ), + deleted AS ( + DELETE FROM items_deleted_log d + USING to_delete td + WHERE d.ctid = td.ctid + RETURNING 1 + ) + SELECT count(*)::bigint INTO batch_deleted FROM deleted; + + RETURN batch_deleted; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +CREATE OR REPLACE FUNCTION gc_deleted_items_log( + retention_interval interval, + batch_limit integer +) RETURNS bigint AS $$ +DECLARE + deleted_count bigint := 0; + batch_deleted bigint; +BEGIN + LOOP + batch_deleted := gc_deleted_items_log_batch(retention_interval, batch_limit); + deleted_count := deleted_count + batch_deleted; + EXIT WHEN batch_deleted = 0; + END LOOP; + + RETURN deleted_count; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +CREATE OR REPLACE FUNCTION gc_deleted_items_log(retention_interval interval DEFAULT '30 days') RETURNS bigint AS $$ + SELECT gc_deleted_items_log(retention_interval, 10000); +$$ LANGUAGE SQL SECURITY DEFINER; + +CREATE OR REPLACE PROCEDURE gc_deleted_items_log_committed( + retention_interval interval DEFAULT '30 days', + batch_limit integer DEFAULT 10000 +) AS $$ +DECLARE + batch_deleted bigint; +BEGIN + LOOP + batch_deleted := gc_deleted_items_log_batch(retention_interval, batch_limit); + EXIT WHEN batch_deleted = 0; + COMMIT; + END LOOP; +END; +$$ LANGUAGE PLPGSQL; diff --git a/src/pgstac/sql/998_idempotent_post.sql b/src/pgstac/sql/998_idempotent_post.sql index d99bc6b4..c74d7d2a 100644 --- a/src/pgstac/sql/998_idempotent_post.sql +++ b/src/pgstac/sql/998_idempotent_post.sql @@ -103,6 +103,9 @@ ALTER FUNCTION pin_search SECURITY DEFINER; ALTER FUNCTION unpin_search SECURITY DEFINER; ALTER FUNCTION gc_anonymous_searches(interval, jsonb) SECURITY DEFINER; ALTER FUNCTION gc_search_caches(interval, jsonb) SECURITY DEFINER; +ALTER FUNCTION gc_deleted_items_log_batch(interval, integer) SECURITY DEFINER; +ALTER FUNCTION gc_deleted_items_log(interval, integer) SECURITY DEFINER; +ALTER FUNCTION gc_deleted_items_log(interval) SECURITY DEFINER; ALTER FUNCTION format_item SECURITY DEFINER; ALTER FUNCTION maintain_index SECURITY DEFINER; @@ -128,6 +131,9 @@ GRANT ALL ON PROCEDURE run_queued_queries TO pgstac_admin; REVOKE ALL PRIVILEGES ON FUNCTION run_queued_queries_intransaction FROM public; GRANT ALL ON FUNCTION run_queued_queries_intransaction TO pgstac_admin; +REVOKE ALL PRIVILEGES ON PROCEDURE gc_deleted_items_log_committed(interval, integer) FROM public; +GRANT ALL ON PROCEDURE gc_deleted_items_log_committed(interval, integer) TO pgstac_admin; + RESET ROLE; SET ROLE pgstac_ingest; diff --git a/src/pgstac/tests/basic/crud_functions.sql b/src/pgstac/tests/basic/crud_functions.sql index 68eefa53..8c619777 100644 --- a/src/pgstac/tests/basic/crud_functions.sql +++ b/src/pgstac/tests/basic/crud_functions.sql @@ -18,30 +18,30 @@ INSERT INTO collections (content, partition_trunc) VALUES ('{"id":"pgstactest-cr -- Create an item SELECT create_item((SELECT content FROM test_items LIMIT 1)); -SELECT * FROM items WHERE collection='pgstactest-crudtest'; +SELECT id, geometry, collection, datetime, end_datetime, content, private FROM items WHERE collection='pgstactest-crudtest'; -- Check to see if extent got updated SELECT content->'extent' FROM collections WHERE id='pgstactest-crudtest'; -- Update item with new datetime that is in a different partition SELECT update_item((SELECT content || '{"properties":{"datetime":"2023-01-01 00:00:00Z"}}'::jsonb FROM test_items LIMIT 1)); -SELECT * FROM items WHERE collection='pgstactest-crudtest'; +SELECT id, geometry, collection, datetime, end_datetime, content, private FROM items WHERE collection='pgstactest-crudtest'; -- Check to see if extent got updated SELECT content->'extent' FROM collections WHERE id='pgstactest-crudtest'; -- Update item with new datetime that is in a different partition SELECT upsert_item((SELECT content || '{"properties":{"datetime":"2023-02-01 00:00:00Z"}}'::jsonb FROM test_items LIMIT 1)); -SELECT * FROM items WHERE collection='pgstactest-crudtest'; +SELECT id, geometry, collection, datetime, end_datetime, content, private FROM items WHERE collection='pgstactest-crudtest'; -- Delete an item SELECT delete_item('pgstactest-crudtest-1', 'pgstactest-crudtest'); -SELECT * FROM items WHERE collection='pgstactest-crudtest'; +SELECT id, geometry, collection, datetime, end_datetime, content, private FROM items WHERE collection='pgstactest-crudtest'; WITH c AS (SELECT content FROM test_items LIMIT 2), aggregated AS (SELECT jsonb_agg(content) as items FROM c) SELECT create_items(items) FROM aggregated; -SELECT * FROM items WHERE collection='pgstactest-crudtest'; +SELECT id, geometry, collection, datetime, end_datetime, content, private FROM items WHERE collection='pgstactest-crudtest'; DELETE FROM items WHERE collection='pgstactest-crudtest'; @@ -49,13 +49,13 @@ DELETE FROM items WHERE collection='pgstactest-crudtest'; WITH c AS (SELECT content FROM test_items LIMIT 2), aggregated AS (SELECT jsonb_agg(content) as items FROM c) SELECT upsert_items(items) FROM aggregated; -SELECT * FROM items WHERE collection='pgstactest-crudtest'; +SELECT id, geometry, collection, datetime, end_datetime, content, private FROM items WHERE collection='pgstactest-crudtest'; -- upsert items that already exist and are to be modified WITH c AS (SELECT content || '{"properties":{"datetime":"2023-02-01 00:00:00Z"}}'::jsonb as content FROM test_items LIMIT 2), aggregated AS (SELECT jsonb_agg(content) as items FROM c) SELECT upsert_items(items) FROM aggregated; -SELECT * FROM items WHERE collection='pgstactest-crudtest'; +SELECT id, geometry, collection, datetime, end_datetime, content, private FROM items WHERE collection='pgstactest-crudtest'; -- turn off update_collection_extent then add an item and verify that the extent did not get updated automatically SET pgstac.update_collection_extent=FALSE; diff --git a/src/pgstac/tests/basic/crud_functions.sql.out b/src/pgstac/tests/basic/crud_functions.sql.out index 44ec404c..8f059830 100644 --- a/src/pgstac/tests/basic/crud_functions.sql.out +++ b/src/pgstac/tests/basic/crud_functions.sql.out @@ -27,7 +27,7 @@ INSERT 0 1 SELECT create_item((SELECT content FROM test_items LIMIT 1)); -SELECT * FROM items WHERE collection='pgstactest-crudtest'; +SELECT id, geometry, collection, datetime, end_datetime, content, private FROM items WHERE collection='pgstactest-crudtest'; pgstactest-crudtest-1 | 0103000020E610000001000000050000005B3FFD67CD5355C0C4211B4817EF3E400CE6AF90B95355C0A112D731AE003F4004C93B87325855C0BEBC00FBE8003F40FA0AD28C455855C000E5EFDE51EF3E405B3FFD67CD5355C0C4211B4817EF3E40 | pgstactest-crudtest | 2020-01-01 00:00:00+00 | 2020-01-01 00:00:00+00 | {"properties": {"datetime": "2020-01-01 00:00:00+00"}} | -- Check to see if extent got updated @@ -38,7 +38,7 @@ SELECT content->'extent' FROM collections WHERE id='pgstactest-crudtest'; -- Update item with new datetime that is in a different partition SELECT update_item((SELECT content || '{"properties":{"datetime":"2023-01-01 00:00:00Z"}}'::jsonb FROM test_items LIMIT 1)); -SELECT * FROM items WHERE collection='pgstactest-crudtest'; +SELECT id, geometry, collection, datetime, end_datetime, content, private FROM items WHERE collection='pgstactest-crudtest'; pgstactest-crudtest-1 | 0103000020E610000001000000050000005B3FFD67CD5355C0C4211B4817EF3E400CE6AF90B95355C0A112D731AE003F4004C93B87325855C0BEBC00FBE8003F40FA0AD28C455855C000E5EFDE51EF3E405B3FFD67CD5355C0C4211B4817EF3E40 | pgstactest-crudtest | 2023-01-01 00:00:00+00 | 2023-01-01 00:00:00+00 | {"properties": {"datetime": "2023-01-01 00:00:00Z"}} | -- Check to see if extent got updated @@ -49,21 +49,21 @@ SELECT content->'extent' FROM collections WHERE id='pgstactest-crudtest'; SELECT upsert_item((SELECT content || '{"properties":{"datetime":"2023-02-01 00:00:00Z"}}'::jsonb FROM test_items LIMIT 1)); -SELECT * FROM items WHERE collection='pgstactest-crudtest'; +SELECT id, geometry, collection, datetime, end_datetime, content, private FROM items WHERE collection='pgstactest-crudtest'; pgstactest-crudtest-1 | 0103000020E610000001000000050000005B3FFD67CD5355C0C4211B4817EF3E400CE6AF90B95355C0A112D731AE003F4004C93B87325855C0BEBC00FBE8003F40FA0AD28C455855C000E5EFDE51EF3E405B3FFD67CD5355C0C4211B4817EF3E40 | pgstactest-crudtest | 2023-02-01 00:00:00+00 | 2023-02-01 00:00:00+00 | {"properties": {"datetime": "2023-02-01 00:00:00Z"}} | -- Delete an item SELECT delete_item('pgstactest-crudtest-1', 'pgstactest-crudtest'); -SELECT * FROM items WHERE collection='pgstactest-crudtest'; +SELECT id, geometry, collection, datetime, end_datetime, content, private FROM items WHERE collection='pgstactest-crudtest'; WITH c AS (SELECT content FROM test_items LIMIT 2), aggregated AS (SELECT jsonb_agg(content) as items FROM c) SELECT create_items(items) FROM aggregated; -SELECT * FROM items WHERE collection='pgstactest-crudtest'; +SELECT id, geometry, collection, datetime, end_datetime, content, private FROM items WHERE collection='pgstactest-crudtest'; pgstactest-crudtest-1 | 0103000020E610000001000000050000005B3FFD67CD5355C0C4211B4817EF3E400CE6AF90B95355C0A112D731AE003F4004C93B87325855C0BEBC00FBE8003F40FA0AD28C455855C000E5EFDE51EF3E405B3FFD67CD5355C0C4211B4817EF3E40 | pgstactest-crudtest | 2020-01-01 00:00:00+00 | 2020-01-01 00:00:00+00 | {"properties": {"datetime": "2020-01-01 00:00:00+00"}} | pgstactest-crudtest-2 | 0103000020E610000001000000050000005B3FFD67CD5355C0C4211B4817EF3E400CE6AF90B95355C0A112D731AE003F4004C93B87325855C0BEBC00FBE8003F40FA0AD28C455855C000E5EFDE51EF3E405B3FFD67CD5355C0C4211B4817EF3E40 | pgstactest-crudtest | 2020-02-01 00:00:00+00 | 2020-02-01 00:00:00+00 | {"properties": {"datetime": "2020-02-01 00:00:00+00"}} | @@ -75,7 +75,7 @@ aggregated AS (SELECT jsonb_agg(content) as items FROM c) SELECT upsert_items(items) FROM aggregated; -SELECT * FROM items WHERE collection='pgstactest-crudtest'; +SELECT id, geometry, collection, datetime, end_datetime, content, private FROM items WHERE collection='pgstactest-crudtest'; pgstactest-crudtest-1 | 0103000020E610000001000000050000005B3FFD67CD5355C0C4211B4817EF3E400CE6AF90B95355C0A112D731AE003F4004C93B87325855C0BEBC00FBE8003F40FA0AD28C455855C000E5EFDE51EF3E405B3FFD67CD5355C0C4211B4817EF3E40 | pgstactest-crudtest | 2020-01-01 00:00:00+00 | 2020-01-01 00:00:00+00 | {"properties": {"datetime": "2020-01-01 00:00:00+00"}} | pgstactest-crudtest-2 | 0103000020E610000001000000050000005B3FFD67CD5355C0C4211B4817EF3E400CE6AF90B95355C0A112D731AE003F4004C93B87325855C0BEBC00FBE8003F40FA0AD28C455855C000E5EFDE51EF3E405B3FFD67CD5355C0C4211B4817EF3E40 | pgstactest-crudtest | 2020-02-01 00:00:00+00 | 2020-02-01 00:00:00+00 | {"properties": {"datetime": "2020-02-01 00:00:00+00"}} | @@ -85,7 +85,7 @@ aggregated AS (SELECT jsonb_agg(content) as items FROM c) SELECT upsert_items(items) FROM aggregated; -SELECT * FROM items WHERE collection='pgstactest-crudtest'; +SELECT id, geometry, collection, datetime, end_datetime, content, private FROM items WHERE collection='pgstactest-crudtest'; pgstactest-crudtest-1 | 0103000020E610000001000000050000005B3FFD67CD5355C0C4211B4817EF3E400CE6AF90B95355C0A112D731AE003F4004C93B87325855C0BEBC00FBE8003F40FA0AD28C455855C000E5EFDE51EF3E405B3FFD67CD5355C0C4211B4817EF3E40 | pgstactest-crudtest | 2023-02-01 00:00:00+00 | 2023-02-01 00:00:00+00 | {"properties": {"datetime": "2023-02-01 00:00:00Z"}} | pgstactest-crudtest-2 | 0103000020E610000001000000050000005B3FFD67CD5355C0C4211B4817EF3E400CE6AF90B95355C0A112D731AE003F4004C93B87325855C0BEBC00FBE8003F40FA0AD28C455855C000E5EFDE51EF3E405B3FFD67CD5355C0C4211B4817EF3E40 | pgstactest-crudtest | 2023-02-01 00:00:00+00 | 2023-02-01 00:00:00+00 | {"properties": {"datetime": "2023-02-01 00:00:00Z"}} | diff --git a/src/pgstac/tests/pgtap/003_items.sql b/src/pgstac/tests/pgtap/003_items.sql index ddebf80a..8412f18b 100644 --- a/src/pgstac/tests/pgtap/003_items.sql +++ b/src/pgstac/tests/pgtap/003_items.sql @@ -1,4 +1,5 @@ SELECT has_table('pgstac'::name, 'items'::name); +SELECT has_table('pgstac'::name, 'items_deleted_log'::name); SELECT is_indexed('pgstac'::name, 'items'::name, 'geometry'); @@ -13,6 +14,9 @@ SELECT has_function('pgstac'::name, 'update_item', ARRAY['jsonb']); SELECT has_function('pgstac'::name, 'upsert_item', ARRAY['jsonb']); SELECT has_function('pgstac'::name, 'create_items', ARRAY['jsonb']); SELECT has_function('pgstac'::name, 'upsert_items', ARRAY['jsonb']); +SELECT has_function('pgstac'::name, 'gc_deleted_items_log', ARRAY['interval']); +SELECT has_function('pgstac'::name, 'gc_deleted_items_log', ARRAY['interval', 'integer']); +SELECT has_function('pgstac'::name, 'gc_deleted_items_log_batch', ARRAY['interval', 'integer']); -- tools to update collection extents based on extents in items @@ -33,6 +37,15 @@ SELECT results_eq($$ 'Test create_item function' ); +SELECT ok( + (SELECT pgstac_updated_at IS NOT NULL FROM items WHERE id='pgstac-test-item-0003' AND collection='pgstac-test-collection'), + 'create_item populates pgstac_updated_at' +); +SELECT ok( + (SELECT length(content_hash) = 64 FROM items WHERE id='pgstac-test-item-0003' AND collection='pgstac-test-collection'), + 'create_item generates sha256 content_hash' +); + SELECT update_item('{"id": "pgstac-test-item-0003", "bbox": [-85.379245, 30.933949, -85.308201, 31.003555], "type": "Feature", "links": [], "assets": {"image": {"href": "https://naipeuwest.blob.core.windows.net/naip/v002/al/2011/al_100cm_2011/30085/m_3008506_nw_16_1_20110825.tif", "type": "image/tiff; application=geotiff; profile=cloud-optimized", "roles": ["data"], "title": "RGBIR COG tile", "eo:bands": [{"name": "Red", "common_name": "red"}, {"name": "Green", "common_name": "green"}, {"name": "Blue", "common_name": "blue"}, {"name": "NIR", "common_name": "nir", "description": "near-infrared"}]}, "metadata": {"href": "https://naipeuwest.blob.core.windows.net/naip/v002/al/2011/al_fgdc_2011/30085/m_3008506_nw_16_1_20110825.txt", "type": "text/plain", "roles": ["metadata"], "title": "FGDC Metdata"}, "thumbnail": {"href": "https://naipeuwest.blob.core.windows.net/naip/v002/al/2011/al_100cm_2011/30085/m_3008506_nw_16_1_20110825.200.jpg", "type": "image/jpeg", "roles": ["thumbnail"], "title": "Thumbnail"}}, "geometry": {"type": "Polygon", "coordinates": [[[-85.309412, 30.933949], [-85.308201, 31.002658], [-85.378084, 31.003555], [-85.379245, 30.934843], [-85.309412, 30.933949]]]}, "collection": "pgstac-test-collection", "properties": {"gsd": 1, "datetime": "2011-08-25T00:00:00Z", "naip:year": "2011", "proj:bbox": [654842, 3423507, 661516, 3431125], "proj:epsg": 26916, "providers": [{"url": "https://www.fsa.usda.gov/programs-and-services/aerial-photography/imagery-programs/naip-imagery/", "name": "USDA Farm Service Agency", "roles": ["producer", "licensor"]}], "naip:state": "al", "proj:shape": [7618, 6674], "eo:cloud_cover": 29, "proj:transform": [1, 0, 654842, 0, -1, 3431125, 0, 0, 1]}, "stac_version": "1.0.0-beta.2", "stac_extensions": ["eo", "projection"]}'); SELECT results_eq($$ @@ -43,6 +56,27 @@ SELECT results_eq($$ 'Test update_item function' ); +SELECT results_eq($$ + WITH old_row AS ( + SELECT pgstac_updated_at FROM items WHERE id='pgstac-test-item-0003' AND collection='pgstac-test-collection' + ), + updated AS ( + UPDATE items + SET private = '{}'::jsonb + WHERE id='pgstac-test-item-0003' AND collection='pgstac-test-collection' + RETURNING pgstac_updated_at + ) + SELECT (SELECT pgstac_updated_at FROM updated) >= (SELECT pgstac_updated_at FROM old_row); + $$,$$ + SELECT TRUE; + $$, + 'updates refresh pgstac_updated_at through items_touch_triggerfunc' +); +SELECT ok( + (SELECT length(content_hash) = 64 FROM items WHERE id='pgstac-test-item-0003' AND collection='pgstac-test-collection'), + 'update path generates new sha256 content_hash' +); + select delete_item('pgstac-test-item-0003'); SELECT results_eq($$ @@ -52,3 +86,55 @@ SELECT results_eq($$ $$, 'Test delete_item function' ); + +SELECT ok( + EXISTS ( + SELECT 1 + FROM items_deleted_log + WHERE item_id='pgstac-test-item-0003' AND collection='pgstac-test-collection' + ), + 'delete_item writes tombstone rows to items_deleted_log' +); + +SELECT lives_ok($$ + UPDATE items_deleted_log + SET deleted_at = now() - '40 days'::interval + WHERE item_id='pgstac-test-item-0003' AND collection='pgstac-test-collection'; +$$, 'Age tombstone rows for gc_deleted_items_log test'); + +SELECT results_eq($$ + SELECT gc_deleted_items_log('30 days'::interval) > 0; + $$,$$ + SELECT TRUE; + $$, + 'gc_deleted_items_log removes aged tombstones' +); + +SELECT lives_ok($$ + INSERT INTO items_deleted_log ( + item_id, + collection, + partition, + datetime, + end_datetime, + content_hash, + deleted_at + ) + VALUES ( + 'pgstac-test-item-0003', + 'pgstac-test-collection', + NULL, + now() - '41 days'::interval, + now() - '41 days'::interval, + repeat('a', 64), + now() - '40 days'::interval + ); +$$, 'Insert aged tombstone row for batched gc_deleted_items_log test'); + +SELECT results_eq($$ + SELECT gc_deleted_items_log('30 days'::interval, 1) > 0; + $$,$$ + SELECT TRUE; + $$, + 'gc_deleted_items_log(interval, integer) removes aged tombstones in batches' +); diff --git a/src/pgstac/tests/pgtap/9999_readonly.sql b/src/pgstac/tests/pgtap/9999_readonly.sql index 679f0af1..efb21d4e 100644 --- a/src/pgstac/tests/pgtap/9999_readonly.sql +++ b/src/pgstac/tests/pgtap/9999_readonly.sql @@ -41,4 +41,16 @@ SELECT throws_ok( $$ SELECT gc_anonymous_searches(NULL, '{"search_gc_retention_interval":"1 second"}'::jsonb); $$, '25006' ); +SELECT throws_ok( + $$ SELECT gc_deleted_items_log('1 second'::interval); $$, + '25006' +); +SELECT throws_ok( + $$ SELECT gc_deleted_items_log('1 second'::interval, 1); $$, + '25006' +); +SELECT throws_ok( + $$ SELECT gc_deleted_items_log_batch('1 second'::interval, 1); $$, + '25006' +); RESET pgstac.readonly; diff --git a/src/pypgstac/src/pypgstac/load.py b/src/pypgstac/src/pypgstac/load.py index 76e39502..657580cb 100644 --- a/src/pypgstac/src/pypgstac/load.py +++ b/src/pypgstac/src/pypgstac/load.py @@ -378,7 +378,7 @@ def load_partition( """ DROP TABLE IF EXISTS items_ingest_temp; CREATE TEMP TABLE items_ingest_temp - ON COMMIT DROP AS SELECT * FROM items LIMIT 0; + (LIKE items INCLUDING DEFAULTS) ON COMMIT DROP; """, ) with cur.copy(