Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
6677497
feat: add pgstac-migrate compatibility layer
bitner May 5, 2026
a5a2b5c
chore: switch pgpkg workflows to published packages
bitner May 5, 2026
3e853c6
chore: clean up test warnings
bitner May 5, 2026
392c04c
update changelog
bitner May 11, 2026
2dc9775
Merge branch 'main' into pgpkgmigrations
bitner May 11, 2026
f31bcd2
add more tests
bitner May 11, 2026
88c039d
pr1: switch search_wheres hashing to sha256 and stage unreleased migr…
bitner May 11, 2026
5982900
Update scripts/makemigration
bitner May 12, 2026
85a299f
Update .github/workflows/release.yml
bitner May 12, 2026
356bd17
Complete PR1: pin pgpkg 0.1.1, regenerate migrations, fix test assert…
bitner May 12, 2026
5a38220
Address PR review feedback and harden migration docs/deps
bitner May 12, 2026
adfdd4c
Merge remote-tracking branch 'origin/pgpkgmigrations' into v010-pr1-h…
bitner May 12, 2026
7561525
Merge remote-tracking branch 'origin/main' into v010-pr1-hash-and-dea…
bitner May 12, 2026
50d73bc
Move pgstac_hash into search SQL
bitner May 12, 2026
5932cd8
Refine unreleased changelog for search cache hardening
bitner May 12, 2026
d03f9be
Enable pg_stat_statements and pg_cron in test image
bitner May 12, 2026
2dff33e
Wire search_query updatestats into where_stats
bitner May 12, 2026
bba2f27
Update unreleased changelog for search stats refresh
bitner May 12, 2026
197587a
Move Rust crate under src
bitner May 12, 2026
999a74b
don't save _PLAN.md docs
bitner May 12, 2026
dd5e7de
Merge branch 'main' into v010-pr1-hash-and-dead-code-rerun
bitner May 12, 2026
cd9b9e1
Merge branch 'rustac_cleanup' into v010-pr1-hash-and-dead-code-rerun
bitner May 12, 2026
dd4a621
Document Rust crate move
bitner May 12, 2026
a0d5c3c
Fix server extension smoke test db selection
bitner May 12, 2026
9a253e3
Harden CI extension smoke tests and tighten changelog
bitner May 12, 2026
584861c
Remove content_slim and regenerate SQL artifacts
bitner May 12, 2026
1cd306b
items: add lifecycle metadata and tombstone GC
bitner May 12, 2026
9bd3c43
Merge branch 'main' of github.com:stac-utils/pgstac
bitner May 13, 2026
9ce73ce
Merge main into v010-pr2-sync-metadata-and-search-lifecycle
bitner May 13, 2026
fd7b439
Update CHANGELOG
bitner May 13, 2026
96b0a7f
Refactor item insert triggers, dehydration, and pypgstac loaders
bitner May 13, 2026
b1b1194
Update changelog for PR2
bitner May 13, 2026
11e57a7
Add batched tombstone GC with committed procedure
bitner May 18, 2026
d28e9ab
Document batched tombstone GC in changelog
bitner May 18, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,17 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
## [Unreleased]

### Added

- Add deterministic SHA-256 `content_hash` to STAC items to track data changes across migrations.
- Add `pgstac_updated_at` column to items table as part of separating STAC property updates from database metadata updates.

### Changed

- Replaced expensive row-based trigger for item inserts with optimized SQL/PLPGSQL hydration strategies to improve ingestion throughput.
- Update pypgstac loaders to dynamically generate hashes during ingestion where required, avoiding trigger recalculation.
- Add tombstone table `items_deleted_log` and `pgstac_updated_at` metadata column to items table.
- Add batched tombstone GC routines: `gc_deleted_items_log_batch(interval, integer)`, overloaded `gc_deleted_items_log(interval, integer)`, and `gc_deleted_items_log_committed(interval, integer)` for commit-per-batch cleanup of large tombstone backlogs.
- Add PGTap coverage for batched tombstone GC signatures/behavior and read-only rejection paths.
- New `pgstac-migrate` package under `src/pgstac-migrate/` with a standalone
CLI, Python API, and tests for migration planning and execution.
- New Rust crate under `src/pgstac-rs/` with updated CI/release wiring,
Expand Down
201 changes: 188 additions & 13 deletions src/pgstac/migrations/pgstac--0.9.11--unreleased.sql
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,22 @@ drop index if exists "pgstac"."search_wheres_where";

drop table "pgstac"."search_wheres";

create table "pgstac"."items_deleted_log" (
"id" bigint generated always as identity not null,
"item_id" text not null,
"collection" text not null,
"partition" text,
"datetime" timestamp with time zone,
"end_datetime" timestamp with time zone,
"content_hash" text not null default ''::text,
"deleted_at" timestamp with time zone not null default now()
);


alter table "pgstac"."items" add column "content_hash" text not null default ''::text;

alter table "pgstac"."items" add column "pgstac_updated_at" timestamp with time zone not null default now();

alter table "pgstac"."searches" add column "context_count" bigint;

alter table "pgstac"."searches" add column "created_at" timestamp with time zone default now();
Expand All @@ -226,10 +242,16 @@ alter table "pgstac"."searches" add column "statslastupdated" timestamp with tim

alter table "pgstac"."searches" alter column "hash" drop expression;

CREATE INDEX items_deleted_log_deleted_at_idx ON pgstac.items_deleted_log USING btree (deleted_at);

CREATE UNIQUE INDEX items_deleted_log_pkey ON pgstac.items_deleted_log USING btree (id);

CREATE INDEX searches_lastused_anon_idx ON pgstac.searches USING btree (lastused) WHERE ((name IS NULL) AND (NOT pinned));

CREATE UNIQUE INDEX searches_name_key ON pgstac.searches USING btree (name);

alter table "pgstac"."items_deleted_log" add constraint "items_deleted_log_pkey" PRIMARY KEY using index "items_deleted_log_pkey";

alter table "pgstac"."searches" add constraint "searches_name_key" UNIQUE using index "searches_name_key";

set check_function_bodies = off;
Expand Down Expand Up @@ -258,6 +280,78 @@ AS $function$
$function$
;

CREATE OR REPLACE FUNCTION pgstac.gc_deleted_items_log(retention_interval interval DEFAULT '30 days'::interval)
RETURNS bigint
LANGUAGE sql
SECURITY DEFINER
AS $function$
SELECT gc_deleted_items_log(retention_interval, 10000);
$function$
;

CREATE OR REPLACE FUNCTION pgstac.gc_deleted_items_log(retention_interval interval, batch_limit integer)
RETURNS bigint
LANGUAGE plpgsql
SECURITY DEFINER
AS $function$
DECLARE
deleted_count bigint := 0;
batch_deleted bigint;
BEGIN
LOOP
batch_deleted := gc_deleted_items_log_batch(retention_interval, batch_limit);
deleted_count := deleted_count + batch_deleted;
EXIT WHEN batch_deleted = 0;
END LOOP;

RETURN deleted_count;
END;
$function$
;

CREATE OR REPLACE FUNCTION pgstac.gc_deleted_items_log_batch(retention_interval interval DEFAULT '30 days'::interval, batch_limit integer DEFAULT 10000)
RETURNS bigint
LANGUAGE plpgsql
SECURITY DEFINER
AS $function$
DECLARE
batch_deleted bigint;
BEGIN
WITH to_delete AS (
SELECT ctid
FROM items_deleted_log
WHERE deleted_at < now() - retention_interval
ORDER BY deleted_at
LIMIT GREATEST(COALESCE(batch_limit, 10000), 1)
),
deleted AS (
DELETE FROM items_deleted_log d
USING to_delete td
WHERE d.ctid = td.ctid
RETURNING 1
)
SELECT count(*)::bigint INTO batch_deleted FROM deleted;

RETURN batch_deleted;
END;
$function$
;

CREATE OR REPLACE PROCEDURE pgstac.gc_deleted_items_log_committed(IN retention_interval interval DEFAULT '30 days'::interval, IN batch_limit integer DEFAULT 10000)
LANGUAGE plpgsql
AS $procedure$
DECLARE
batch_deleted bigint;
BEGIN
LOOP
batch_deleted := gc_deleted_items_log_batch(retention_interval, batch_limit);
EXIT WHEN batch_deleted = 0;
COMMIT;
END LOOP;
END;
$procedure$
;

CREATE OR REPLACE FUNCTION pgstac.gc_search_caches(retention_interval interval DEFAULT NULL::interval, conf jsonb DEFAULT NULL::jsonb)
RETURNS jsonb
LANGUAGE sql
Expand All @@ -270,6 +364,47 @@ AS $function$
$function$
;

CREATE OR REPLACE FUNCTION pgstac.items_delete_log_trigger()
RETURNS trigger
LANGUAGE plpgsql
SECURITY DEFINER
AS $function$
BEGIN
INSERT INTO items_deleted_log (
item_id,
collection,
partition,
datetime,
end_datetime,
content_hash
)
SELECT
old_rows.id,
old_rows.collection,
(partition_name(old_rows.collection, old_rows.datetime)).partition_name,
old_rows.datetime,
old_rows.end_datetime,
old_rows.content_hash
FROM old_rows;

RETURN NULL;
END;
$function$
;

CREATE OR REPLACE FUNCTION pgstac.items_touch_triggerfunc()
RETURNS trigger
LANGUAGE plpgsql
SECURITY DEFINER
AS $function$
BEGIN
NEW.pgstac_updated_at := now();
NEW.content_hash := encode(sha256(content_hydrate(NEW)::text::bytea), 'hex');
RETURN NEW;
END;
$function$
;

CREATE OR REPLACE FUNCTION pgstac.name_search(_search jsonb, _name text, _metadata jsonb DEFAULT '{}'::jsonb)
RETURNS searches
LANGUAGE plpgsql
Expand Down Expand Up @@ -674,21 +809,55 @@ $function$

CREATE OR REPLACE FUNCTION pgstac.content_dehydrate(content jsonb)
RETURNS items
LANGUAGE sql
LANGUAGE plpgsql
STABLE
AS $function$
SELECT
content->>'id' as id,
stac_geom(content) as geometry,
content->>'collection' as collection,
stac_datetime(content) as datetime,
stac_end_datetime(content) as end_datetime,
strip_jsonb(
content - '{id,geometry,collection,type}'::text[],
collection_base_item(content->>'collection')
) - '{id,geometry,collection,type}'::text[] as content,
null::jsonb as private
;
DECLARE
out items;
BEGIN
out.id := content->>'id';
out.geometry := stac_geom(content);
out.collection := content->>'collection';
out.datetime := stac_datetime(content);
out.end_datetime := stac_end_datetime(content);
out.pgstac_updated_at := now();
out.content_hash := encode(sha256(content::text::bytea), 'hex');
out.content := strip_jsonb(
content - '{id,geometry,collection,type}'::text[],
collection_base_item(content->>'collection')
) - '{id,geometry,collection,type}'::text[];
out.private := null;
RETURN out;
END;
$function$
;

CREATE TRIGGER items_before_update_trigger BEFORE UPDATE ON pgstac.items FOR EACH ROW EXECUTE FUNCTION items_touch_triggerfunc();

CREATE TRIGGER items_delete_log_after_delete_trigger AFTER DELETE ON pgstac.items REFERENCING OLD TABLE AS old_rows FOR EACH STATEMENT EXECUTE FUNCTION items_delete_log_trigger();

CREATE OR REPLACE FUNCTION pgstac.content_dehydrate(content jsonb)
RETURNS items
LANGUAGE plpgsql
STABLE
AS $function$
DECLARE
out items;
BEGIN
out.id := content->>'id';
out.geometry := stac_geom(content);
out.collection := content->>'collection';
out.datetime := stac_datetime(content);
out.end_datetime := stac_end_datetime(content);
out.pgstac_updated_at := now();
out.content_hash := encode(sha256(content::text::bytea), 'hex');
out.content := strip_jsonb(
content - '{id,geometry,collection,type}'::text[],
collection_base_item(content->>'collection')
) - '{id,geometry,collection,type}'::text[];
out.private := null;
RETURN out;
END;
$function$
;

Expand Down Expand Up @@ -1075,6 +1244,9 @@ ALTER FUNCTION pin_search SECURITY DEFINER;
ALTER FUNCTION unpin_search SECURITY DEFINER;
ALTER FUNCTION gc_anonymous_searches(interval, jsonb) SECURITY DEFINER;
ALTER FUNCTION gc_search_caches(interval, jsonb) SECURITY DEFINER;
ALTER FUNCTION gc_deleted_items_log_batch(interval, integer) SECURITY DEFINER;
ALTER FUNCTION gc_deleted_items_log(interval, integer) SECURITY DEFINER;
ALTER FUNCTION gc_deleted_items_log(interval) SECURITY DEFINER;
ALTER FUNCTION format_item SECURITY DEFINER;
ALTER FUNCTION maintain_index SECURITY DEFINER;

Expand All @@ -1100,6 +1272,9 @@ GRANT ALL ON PROCEDURE run_queued_queries TO pgstac_admin;
REVOKE ALL PRIVILEGES ON FUNCTION run_queued_queries_intransaction FROM public;
GRANT ALL ON FUNCTION run_queued_queries_intransaction TO pgstac_admin;

REVOKE ALL PRIVILEGES ON PROCEDURE gc_deleted_items_log_committed(interval, integer) FROM public;
GRANT ALL ON PROCEDURE gc_deleted_items_log_committed(interval, integer) TO pgstac_admin;

RESET ROLE;

SET ROLE pgstac_ingest;
Expand Down
Loading