diff --git a/.github/instructions/scripts.instructions.md b/.github/instructions/scripts.instructions.md index 40e7aa6f..536370c0 100644 --- a/.github/instructions/scripts.instructions.md +++ b/.github/instructions/scripts.instructions.md @@ -10,6 +10,7 @@ See CLAUDE.md "Development Workflow" for usage. All scripts require the Docker c - `runinpypgstac` uses the published-package path by default; set `PGPKG_LOCAL_REPO_DIR` to mount a local `pgpkg` checkout at `/pgpkg` when you need an override - `scripts/container-scripts/` contains the in-container script payload copied into the pypgstac image; keep host wrappers in `scripts/` - `stageversion` modifies version files AND generates migrations — see CLAUDE.md "Migration Process" +- `stageversion` regenerates `*unreleased*` migrations each run; if you hand-edit incremental SQL, rebuild the baked artifact with `uv run --directory src/pgstac-migrate pgstac-migrate build-artifact` and avoid rerunning `stageversion` unless you intend to overwrite edits - `scripts/container-scripts/stageversion` and `scripts/container-scripts/makemigration` now shell through `pgpkg` inside the container rather than assembling/diffing SQL directly - Set `PGPKG_LOCAL_REPO_DIR` on the host when you need to force a local pgpkg checkout for `stageversion`, `makemigration`, or related container-script testing - Tagged releases run `.github/workflows/release.yml`, which publishes both `pypgstac` and `pgstac-migrate` to PyPI via the GitHub `pypi` environment; PyPI trusted publishers must exist for both projects diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml index 92f056e0..cce24c84 100644 --- a/.github/workflows/continuous-integration.yml +++ b/.github/workflows/continuous-integration.yml @@ -225,4 +225,4 @@ jobs: - name: Set search_path run: psql -c "ALTER ROLE username SET search_path TO pgstac, public;" - name: Test - run: cargo test -p pgstac --all-features --manifest-path rust/Cargo.toml + run: cargo test -p pgstac --all-features --manifest-path src/pgstac-rs/Cargo.toml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 47ee8d97..baad2b58 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -261,7 +261,7 @@ jobs: - uses: rust-lang/crates-io-auth-action@bbd81622f20ce9e2dd9622e3218b975523e45bbe # v1.0.4 id: auth - name: Publish - working-directory: rust + working-directory: src/pgstac-rs env: CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }} run: cargo publish diff --git a/.gitignore b/.gitignore index c646d853..eaeef9f5 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,4 @@ src/pgstacrust/target/ src/pgstac-migrate/dist/ src/pgstac-migrate/src/pgstac_migrate/migrations.tar.zst src/pypgstac/uv.lock +*_PLAN.md diff --git a/AGENTS.md b/AGENTS.md index 73568f98..8e4ee00d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -26,10 +26,11 @@ Migration specialist for PgSTAC. See CLAUDE.md "Migration Process" for full work 2. `src/pgstac/pyproject.toml` is the `pgpkg` project config for the SQL + migrations tree 3. `uv run --directory src/pgstac-migrate pgstac-migrate info|versions|plan` inspects the baked migration artifact during wrapper work 4. `uv run --directory src/pypgstac pypgstac migrate -- --help` remains a backwards-compatible wrapper over `pgstac-migrate`; put new runtime migration behavior in `src/pgstac-migrate/`, not `src/pypgstac/` -5. `scripts/stageversion VERSION` → generates canonical `pgstac--VERSION.sql` plus an incremental `.staged` migration; set `PGPKG_LOCAL_REPO_DIR` when `stageversion` or `makemigration` should run against a local pgpkg checkout. The Docker-backed flow mounts that override at `/pgpkg` and exports `PGPKG_REPO_DIR` to the container scripts. -6. Review `.staged` file (watch for DROPs, unsafe ALTERs, missing `CREATE OR REPLACE`) -7. Remove `.staged` suffix → `scripts/test --migrations` -8. Tagged releases publish both `pypgstac` and `pgstac-migrate` to PyPI from `.github/workflows/release.yml`; keep the PyPI trusted publisher registration aligned with the `pypi` environment and workflow path +5. `scripts/stageversion VERSION` regenerates canonical `pgstac--VERSION.sql` plus incremental `pgstac--FROM--TO.sql`; set `PGPKG_LOCAL_REPO_DIR` when `stageversion` or `makemigration` should run against a local pgpkg checkout. The Docker-backed flow mounts that override at `/pgpkg` and exports `PGPKG_REPO_DIR` to the container scripts. +6. Review the generated incremental migration (watch for DROPs, unsafe ALTERs, missing `CREATE OR REPLACE`) +7. If you hand-edit the incremental migration, rebuild the baked artifact: `uv run --directory src/pgstac-migrate pgstac-migrate build-artifact` +8. Run `scripts/test --migrations` (or full `scripts/test` gate) +9. Tagged releases publish both `pypgstac` and `pgstac-migrate` to PyPI from `.github/workflows/release.yml`; keep the PyPI trusted publisher registration aligned with the `pypi` environment and workflow path ### Review Checklist diff --git a/CHANGELOG.md b/CHANGELOG.md index 41a541a9..3266f14c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ### Added - New `pgstac-migrate` package under `src/pgstac-migrate/` with a standalone CLI, Python API, and tests for migration planning and execution. +- New Rust crate under `src/pgstac-rs/` with updated CI/release wiring, + README guidance, and test coverage. - `src/pgstac/pyproject.toml` `tool.pgpkg` project metadata for canonical SQL + migration staging. - `scripts/makemigration` host wrapper for the in-container `makemigration` helper. @@ -29,6 +31,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - `workflow_dispatch` trigger for manual CI runs. - `pg_tle` v1.5.2 built and pre-loaded in the `pgstacbase` image; database init runs `CREATE EXTENSION IF NOT EXISTS pg_tle`. +- `pg_stat_statements` and `pg_cron` are now installed in the pgstac Docker image, + added to `shared_preload_libraries`, and initialized during container bootstrap + (`pg_stat_statements` in the app database, `pg_cron` in `postgres`). +- `scripts/container-scripts/test` now includes extension smoke tests that verify + preload configuration plus basic runtime behavior for both + `pg_stat_statements` and `pg_cron`. - `pypgstac-runtime` Docker target: slim Python 3.13-trixie image without the Rust/build toolchain, for production deployments where the Rust build environment is not needed. - Dependabot coverage expanded to Docker base images and pip packages (two new @@ -48,6 +56,15 @@ and this project adheres to [Semantic Versioning](http://semver.org/). `PGPKG_REPO_DIR` override support. - `scripts/runinpypgstac` now supports a `PGPKG_LOCAL_REPO_DIR` mount override for local pgpkg development while keeping the default flow PyPI-first. +- Search cache hashing, storage, and concurrency control were reworked: SHA-256 + cache keys, canonical where-clause inputs, `searches`-backed lifecycle, + retention-driven GC, and less blocking row touch / update behavior. +- Search context stats updates now use optimistic compare-and-update guards on + `statslastupdated`, reducing stale overwrites when concurrent workers refresh + counts. +- GitHub Actions and release automation were refreshed for the current layout: + Rust crate path updates, workflow/action version bumps, and Dependabot group + adjustments. - Tagged releases now publish the new `pgstac-migrate` package to PyPI alongside `pypgstac` via trusted publishing in `.github/workflows/release.yml`. - In-container helper scripts moved from `docker/pypgstac/bin/` to `scripts/container-scripts/`; container `PATH` updated accordingly. @@ -61,22 +78,23 @@ and this project adheres to [Semantic Versioning](http://semver.org/). `--build` flag; `PGSTAC_BUILD_POLICY` env var provides a persistent default. - Dev tooling: `flake8`, `black`, and `mypy` removed in favour of `ruff==0.15.11` and `ty==0.0.31`. `pre-commit` pinned to `3.5.0`. `pre-commit-hooks` updated to v5.0.0. -- `pypgstac` package floor raised to Python 3.11; metadata now advertises 3.11-3.14. -- `pypgstac` settings now use `pydantic-settings` (`BaseSettings` from - `pydantic_settings`) and require `pydantic>=2,<3`. - `cachetools` upper bound removed (`cachetools>=5.3.0`) since `pypgstac` only uses `cachetools.func.lru_cache`; no known incompatible API changes affect this usage. - `pypgstac` developer tooling config now consistently targets Ruff + ty: removes stale mypy config, pins Ruff to `0.15.11` to match pre-commit, and adds minimal `[tool.ty]` project settings. +- `pypgstac` now requires Python 3.11+ and advertises support through 3.14; + settings now use `pydantic-settings` and require `pydantic>=2,<3`. - Formatting/type-check pipeline now uses `scripts/test --formatting` as the single pre-commit entry point (removing duplicate direct Ruff pre-commit hooks) and aligns Ruff line-length handling with the formatter (`E501` ignored; explicit `line-length = 88`). -- GitHub Actions updated: `dorny/paths-filter` v2→v3, `docker/build-push-action` - v4→v6, `astral-sh/setup-uv` v8.0.0→v8.1.0; all SHA pins refreshed. -- Dependabot groups reworked: `actions-all` (replaces `minor-and-patch`), new - `docker-base-images`, `python-dev-tooling`, and `python-runtime` groups. +- GitHub Actions and release automation were refreshed for the current layout: + Rust crate path updates, `dorny/paths-filter` v2→v3, + `docker/build-push-action` v4→v6, `astral-sh/setup-uv` v8.0.0→v8.1.0, + refreshed SHA pins, and Dependabot group updates (`actions-all` replaces + `minor-and-patch`, with new `docker-base-images`, `python-dev-tooling`, and + `python-runtime` groups). - `docker-compose.yml` removes explicit `container_name` entries to avoid conflicts between concurrent local instances. @@ -86,8 +104,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - `flake8`, `black`, and `mypy` removed from dev dependencies. ### Fixed +- Explicit search stats refresh now propagates through cached and uncached search paths when `updatestats` is requested, keeping `numberMatched`/context counts current. - `scripts/container-scripts/test` now refreshes collation metadata for the `postgres` database during setup to avoid noisy warning output. +- Read-only search with context now returns `numberMatched` without requiring + cache writes, reducing failure risk for replica/read-only deployments. - `load.py`: Use timezone-aware `MIN_DATETIME_UTC` / `MAX_DATETIME_UTC` sentinel constants (instead of naive `datetime.min` / `datetime.max`) to avoid `TypeError: can't compare offset-naive and offset-aware datetimes`. @@ -97,6 +118,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/). the broken `3.9.0` sdist under `--resolution lowest-direct`. - `pydantic` minimum raised to `>=2.10` so `--resolution lowest-direct` on Python 3.13 does not resolve to `pydantic-core==2.0.1`, which fails to build. +- `scripts/container-scripts/test` now derives the active database from + `PGDATABASE`/`POSTGRES_DB` when checking server extensions and refreshing + collation versions, instead of assuming `postgis`. ## [v0.9.11] diff --git a/CLAUDE.md b/CLAUDE.md index 96fc13eb..364618f5 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -127,10 +127,12 @@ This runs inside Docker and: 4. Appends `998_idempotent_post.sql` and `SELECT set_version(...)` 5. Writes `migrations/pgstac--0.9.10--0.9.11.sql` -**Important**: The generated migration is created with a `.staged` suffix. You MUST: -1. Review the `.staged` file for correctness -2. Remove the `.staged` suffix to enable it -3. Run `scripts/test --migrations` to validate +**Important**: +1. `scripts/stageversion` regenerates `*unreleased*` migration files on each run. +2. If you hand-edit an incremental migration, do not rerun `stageversion` unless you want those edits overwritten. +3. After hand-editing an incremental migration, rebuild the baked artifact: + `uv run --directory src/pgstac-migrate pgstac-migrate build-artifact` +4. Validate with `scripts/test --migrations` (or `scripts/test` for the full gate). ### Running Migrations @@ -181,13 +183,14 @@ Tests create `pgstac_test_db_template` from `pgstac.sql`, then clone it per test ## Release Checklist 1. `scripts/stageversion VERSION` -2. Review `.staged` migration, remove suffix -3. `scripts/test --migrations` -4. Move CHANGELOG "Unreleased" → new version -5. Copy updated `CHANGELOG.md` to `docs/src/release-notes.md` (keep identical) -6. Create PR, merge -7. `git tag vVERSION && git push origin vVERSION` -8. CI publishes `pypgstac` and `pgstac-migrate` to PyPI plus the ghcr.io images (requires trusted publishers for both PyPI projects on `.github/workflows/release.yml` with the `pypi` environment) +2. Review generated incremental migration for correctness +3. If hand-edited, run `uv run --directory src/pgstac-migrate pgstac-migrate build-artifact` +4. `scripts/test --migrations` +5. Move CHANGELOG "Unreleased" → new version +6. Copy updated `CHANGELOG.md` to `docs/src/release-notes.md` (keep identical) +7. Create PR, merge +8. `git tag vVERSION && git push origin vVERSION` +9. CI publishes `pypgstac` and `pgstac-migrate` to PyPI plus the ghcr.io images (requires trusted publishers for both PyPI projects on `.github/workflows/release.yml` with the `pypi` environment) ## Common Patterns diff --git a/docker/pgstac/Dockerfile b/docker/pgstac/Dockerfile index 671ac025..61c941c8 100644 --- a/docker/pgstac/Dockerfile +++ b/docker/pgstac/Dockerfile @@ -16,6 +16,8 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ && apt-get install -y --no-install-recommends \ postgresql-$PG_MAJOR-postgis-$POSTGIS_MAJOR \ postgresql-$PG_MAJOR-postgis-$POSTGIS_MAJOR-scripts \ + postgresql-$PG_MAJOR-cron \ + postgresql-contrib-$PG_MAJOR \ postgresql-$PG_MAJOR-pgtap \ postgresql-$PG_MAJOR-plpgsql-check \ postgresql-$PG_MAJOR-partman \ @@ -31,8 +33,8 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ && make -C /tmp/pg_tle \ && make -C /tmp/pg_tle install \ && rm -rf /tmp/pg_tle \ - && sed -i "s/^#shared_preload_libraries = .*/shared_preload_libraries = 'pg_tle'/" /usr/share/postgresql/$PG_MAJOR/postgresql.conf.sample \ - && sed -i "s/^#shared_preload_libraries = .*/shared_preload_libraries = 'pg_tle'/" /usr/share/postgresql/postgresql.conf.sample \ + && sed -i "s/^#shared_preload_libraries = .*/shared_preload_libraries = 'pg_tle,pg_stat_statements,pg_cron'/" /usr/share/postgresql/$PG_MAJOR/postgresql.conf.sample \ + && sed -i "s/^#shared_preload_libraries = .*/shared_preload_libraries = 'pg_tle,pg_stat_statements,pg_cron'/" /usr/share/postgresql/postgresql.conf.sample \ && apt-get purge -y --auto-remove \ postgresql-server-dev-$PG_MAJOR \ build-essential \ diff --git a/docker/pgstac/dbinit/pgstac.sh b/docker/pgstac/dbinit/pgstac.sh index f4e97986..8751eb08 100644 --- a/docker/pgstac/dbinit/pgstac.sh +++ b/docker/pgstac/dbinit/pgstac.sh @@ -3,11 +3,14 @@ SHARED_BUFFERS=$(( $SYSMEM/4 )) EFFECTIVE_CACHE_SIZE=$(( $SYSMEM*3/4 )) MAINTENANCE_WORK_MEM=$(( $SYSMEM/8 )) WORK_MEM=$(( $SHARED_BUFFERS/50 )) - psql -X -q -v ON_ERROR_STOP=1 < /dev/null && pwd ) SRCDIR=${PGSTAC_REPO_DIR:-/opt/src} cd $SRCDIR @@ -29,13 +31,23 @@ Usage: $(basename "$0") [version] Create a new base migration, update pypgstac version metadata, and generate the incremental migration via makemigration. +Important workflow note: + This command regenerates *unreleased* migrations each time it runs. + If you hand-edit an incremental migration, do NOT rerun stageversion unless + you want those edits overwritten. After hand edits, rebuild the baked + artifact with: + uv run --directory src/pgstac-migrate pgstac-migrate build-artifact + Environment: PGSTAC_VERSION Default version when no positional version is provided. PGPKG_REPO_DIR Optional local pgpkg checkout to use instead of the installed package. EOF } -if [[ "$1" == "-h" || "$1" == "--help" ]]; then +ARG1=${1:-} +VERSION="" + +if [[ "$ARG1" == "-h" || "$ARG1" == "--help" ]]; then usage exit 0 fi @@ -45,8 +57,8 @@ fi find $MIGRATIONSDIR -name "*unreleased*" -exec rm {} \; # Get Version -if [[ -n "$1" ]]; then - VERSION=$1 +if [[ -n "$ARG1" ]]; then + VERSION=$ARG1 elif [[ -n "${PGSTAC_VERSION:-}" ]]; then VERSION=$PGSTAC_VERSION fi @@ -83,8 +95,13 @@ cd $PGSTACDIR echo "Setting pypgstac version to $PYVERSION" cat < $PYPGSTACDIR/src/pypgstac/version.py """Version.""" + __version__ = "${PYVERSION}" EOD sed -i "s/^version[ ]*=[ ]*.*$/version = \"${PYVERSION}\"/" $PYPGSTACDIR/pyproject.toml makemigration -f $OLDVERSION -t $VERSION + +echo "Stageversion complete." +echo "If you hand-edit the incremental migration, rebuild the baked artifact with:" +echo " uv run --directory src/pgstac-migrate pgstac-migrate build-artifact" diff --git a/scripts/container-scripts/test b/scripts/container-scripts/test index 4bbc525d..3bce8376 100755 --- a/scripts/container-scripts/test +++ b/scripts/container-scripts/test @@ -65,9 +65,73 @@ EOSQL function refresh_collation_versions(){ # Newer container libc versions can make template collation metadata stale. - psql -X -q -d postgres -c "ALTER DATABASE template1 REFRESH COLLATION VERSION;" >/dev/null 2>&1 || true - psql -X -q -d postgres -c "ALTER DATABASE postgres REFRESH COLLATION VERSION;" >/dev/null 2>&1 || true - psql -X -q -d postgres -c "ALTER DATABASE postgis REFRESH COLLATION VERSION;" >/dev/null 2>&1 || true + local appdb="${PGDATABASE:-${POSTGRES_DB:-postgres}}" + local db + + for db in template1 postgres "$appdb"; do + psql -X -q -d postgres -c "ALTER DATABASE ${db} REFRESH COLLATION VERSION;" >/dev/null 2>&1 || true + done +} + +function test_server_extensions(){ + local appdb="${PGDATABASE:-${POSTGRES_DB:-postgres}}" + local pgss_count + + # CI test jobs use the pgstacbase image (no init scripts), so create the + # extensions in the active databases before validating preload/runtime behavior. + psql -X -q -v ON_ERROR_STOP=1 -d "$appdb" -c "CREATE EXTENSION IF NOT EXISTS pg_stat_statements;" >/dev/null + psql -X -q -v ON_ERROR_STOP=1 -d postgres -c "CREATE EXTENSION IF NOT EXISTS pg_cron;" >/dev/null + + psql -X -q -v ON_ERROR_STOP=1 -d "$appdb" </dev/null + psql -X -q -v ON_ERROR_STOP=1 -d "$appdb" -c "SELECT count(*) FROM pg_class;" >/dev/null + pgss_count=$(psql -X -q -t -A -v ON_ERROR_STOP=1 -d "$appdb" -c "SELECT count(*) FROM pg_stat_statements;") + if [[ -z "$pgss_count" || "$pgss_count" -eq 0 ]]; then + echo "pg_stat_statements did not record statements for ${appdb}" >&2 + exit 1 + fi + + psql -X -q -v ON_ERROR_STOP=1 -d postgres < 'postgres' THEN + RAISE EXCEPTION 'cron.database_name expected postgres but was %', current_setting('cron.database_name'); + END IF; + IF NOT EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'pg_cron') THEN + RAISE EXCEPTION 'pg_cron extension is not installed in %', current_database(); + END IF; + + PERFORM cron.unschedule(cron.schedule('* * * * *', 'SELECT 1')); +END +\$\$; +EOSQL + + echo "Server extension tests passed for ${appdb} + postgres." } function test_formatting(){ @@ -410,7 +474,11 @@ then fi [ $FORMATTING -eq 1 ] && test_formatting -[ $SETUPDB -eq 1 ] && refresh_collation_versions && setuptestdb +if [ $SETUPDB -eq 1 ]; then + refresh_collation_versions + test_server_extensions + setuptestdb +fi [ $PGTAP -eq 1 ] && test_pgtap [ $BASICSQL -eq 1 ] && test_basicsql [ $PYPGSTAC -eq 1 ] && test_pypgstac diff --git a/rust/.gitignore b/src/pgstac-rs/.gitignore similarity index 100% rename from rust/.gitignore rename to src/pgstac-rs/.gitignore diff --git a/rust/CHANGELOG.md b/src/pgstac-rs/CHANGELOG.md similarity index 100% rename from rust/CHANGELOG.md rename to src/pgstac-rs/CHANGELOG.md diff --git a/rust/Cargo.lock b/src/pgstac-rs/Cargo.lock similarity index 100% rename from rust/Cargo.lock rename to src/pgstac-rs/Cargo.lock diff --git a/rust/Cargo.toml b/src/pgstac-rs/Cargo.toml similarity index 100% rename from rust/Cargo.toml rename to src/pgstac-rs/Cargo.toml diff --git a/rust/README.md b/src/pgstac-rs/README.md similarity index 90% rename from rust/README.md rename to src/pgstac-rs/README.md index 09102376..6df92d00 100644 --- a/rust/README.md +++ b/src/pgstac-rs/README.md @@ -28,7 +28,7 @@ scripts/server Then, in another terminal: ```sh -cargo test --manifest-path rust/Cargo.toml +cargo test --manifest-path src/pgstac-rs/Cargo.toml ``` Each test is run in its own transaction, which is rolled back after the test. @@ -39,7 +39,7 @@ By default, the tests will connect to the database at `postgresql://username:pas If you need to customize the connection information for whatever reason, set your `PGSTAC_RS_TEST_DB` environment variable: ```shell -PGSTAC_RS_TEST_DB=postgresql://otherusername:otherpassword@otherhost:7822/otherdbname cargo test --manifest-path rust/Cargo.toml +PGSTAC_RS_TEST_DB=postgresql://otherusername:otherpassword@otherhost:7822/otherdbname cargo test --manifest-path src/pgstac-rs/Cargo.toml ``` ## Other info diff --git a/rust/src/client.rs b/src/pgstac-rs/src/client.rs similarity index 100% rename from rust/src/client.rs rename to src/pgstac-rs/src/client.rs diff --git a/rust/src/lib.rs b/src/pgstac-rs/src/lib.rs similarity index 100% rename from rust/src/lib.rs rename to src/pgstac-rs/src/lib.rs diff --git a/rust/src/page.rs b/src/pgstac-rs/src/page.rs similarity index 100% rename from rust/src/page.rs rename to src/pgstac-rs/src/page.rs diff --git a/src/pgstac/migrations/pgstac--0.9.11--unreleased.sql b/src/pgstac/migrations/pgstac--0.9.11--unreleased.sql index 82443707..db90a279 100644 --- a/src/pgstac/migrations/pgstac--0.9.11--unreleased.sql +++ b/src/pgstac/migrations/pgstac--0.9.11--unreleased.sql @@ -197,6 +197,779 @@ RETURNS timestamptz AS $$ END ; $$ LANGUAGE SQL IMMUTABLE STRICT; +drop function if exists "pgstac"."content_slim"(_item jsonb); + + +drop function if exists "pgstac"."search_rows"(_where text, _orderby text, partitions text[], _limit integer); + +drop function if exists "pgstac"."where_stats"(inwhere text, updatestats boolean, conf jsonb); + +alter table "pgstac"."search_wheres" drop constraint "search_wheres_pkey"; + +drop index if exists "pgstac"."search_wheres_partitions"; + +drop index if exists "pgstac"."search_wheres_pkey"; + +drop index if exists "pgstac"."search_wheres_where"; + +drop table "pgstac"."search_wheres"; + +alter table "pgstac"."searches" add column "context_count" bigint; + +alter table "pgstac"."searches" add column "created_at" timestamp with time zone default now(); + +alter table "pgstac"."searches" add column "name" text; + +alter table "pgstac"."searches" add column "pinned" boolean not null default false; + +alter table "pgstac"."searches" add column "statslastupdated" timestamp with time zone; + +alter table "pgstac"."searches" alter column "hash" drop expression; + +CREATE INDEX searches_lastused_anon_idx ON pgstac.searches USING btree (lastused) WHERE ((name IS NULL) AND (NOT pinned)); + +CREATE UNIQUE INDEX searches_name_key ON pgstac.searches USING btree (name); + +alter table "pgstac"."searches" add constraint "searches_name_key" UNIQUE using index "searches_name_key"; + +set check_function_bodies = off; + +CREATE OR REPLACE FUNCTION pgstac.gc_anonymous_searches(retention_interval interval DEFAULT NULL::interval, conf jsonb DEFAULT NULL::jsonb) + RETURNS bigint + LANGUAGE sql + SECURITY DEFINER +AS $function$ + WITH effective_retention AS ( + SELECT COALESCE( + retention_interval, + search_gc_retention_interval(conf) + ) AS i + ), + deleted AS ( + DELETE FROM searches + USING effective_retention + WHERE + name IS NULL + AND NOT pinned + AND lastused < now() - effective_retention.i + RETURNING 1 + ) + SELECT count(*)::bigint FROM deleted; +$function$ +; + +CREATE OR REPLACE FUNCTION pgstac.gc_search_caches(retention_interval interval DEFAULT NULL::interval, conf jsonb DEFAULT NULL::jsonb) + RETURNS jsonb + LANGUAGE sql + SECURITY DEFINER +AS $function$ + SELECT jsonb_build_object( + 'removed_searches', + gc_anonymous_searches(retention_interval, conf) + ); +$function$ +; + +CREATE OR REPLACE FUNCTION pgstac.name_search(_search jsonb, _name text, _metadata jsonb DEFAULT '{}'::jsonb) + RETURNS searches + LANGUAGE plpgsql + SECURITY DEFINER +AS $function$ +DECLARE + named searches%ROWTYPE; +BEGIN + named := search_query(_search, false, _metadata); + UPDATE searches + SET + name = _name, + lastused = now(), + usecount = searches.usecount + 1 + WHERE hash = named.hash + RETURNING * INTO named; + + IF named IS NULL THEN + RAISE EXCEPTION 'Could not name search for input: %', _search; + END IF; + + RETURN named; +END; +$function$ +; + +CREATE OR REPLACE FUNCTION pgstac.pgstac_hash(data text) + RETURNS text + LANGUAGE sql + IMMUTABLE PARALLEL SAFE STRICT +AS $function$ + SELECT encode(sha256(convert_to(data, 'UTF8')), 'hex'); +$function$ +; + +CREATE OR REPLACE FUNCTION pgstac.pin_search(_name text) + RETURNS searches + LANGUAGE plpgsql + SECURITY DEFINER +AS $function$ +DECLARE + pinned_search searches%ROWTYPE; +BEGIN + UPDATE searches + SET + pinned = true, + lastused = now(), + usecount = searches.usecount + 1 + WHERE name = _name + RETURNING * INTO pinned_search; + + IF pinned_search IS NULL THEN + RAISE EXCEPTION 'Named search % not found', _name; + END IF; + + RETURN pinned_search; +END; +$function$ +; + +CREATE OR REPLACE FUNCTION pgstac.rename_search(_old_name text, _new_name text) + RETURNS searches + LANGUAGE plpgsql + SECURITY DEFINER +AS $function$ +DECLARE + renamed searches%ROWTYPE; +BEGIN + -- Serialize rename-pair operations to avoid deadlocks on concurrent name swaps. + PERFORM pg_advisory_xact_lock( + hashtext( + least(_old_name, _new_name) + || '|' + || greatest(_old_name, _new_name) + ) + ); + + UPDATE searches + SET + name = _new_name, + lastused = now(), + usecount = searches.usecount + 1 + WHERE name = _old_name + RETURNING * INTO renamed; + + IF renamed IS NULL THEN + RAISE EXCEPTION 'Named search % not found', _old_name; + END IF; + + RETURN renamed; +END; +$function$ +; + +CREATE OR REPLACE FUNCTION pgstac.search_gc_retention_interval(conf jsonb DEFAULT NULL::jsonb) + RETURNS interval + LANGUAGE sql +AS $function$ + SELECT pgstac.get_setting('search_gc_retention_interval', conf)::interval; +$function$ +; + +CREATE OR REPLACE FUNCTION pgstac.search_hash(_search jsonb, _metadata jsonb DEFAULT '{}'::jsonb) + RETURNS text + LANGUAGE sql + STABLE PARALLEL SAFE +AS $function$ + SELECT search_hash_from_where( + stac_search_to_where(_search), + _metadata + ); +$function$ +; + +CREATE OR REPLACE FUNCTION pgstac.search_hash_from_where(_where text, _metadata jsonb DEFAULT '{}'::jsonb) + RETURNS text + LANGUAGE sql + IMMUTABLE PARALLEL SAFE +AS $function$ + SELECT pgstac_hash( + format( + '%s|%s', + _where, + coalesce(_metadata, '{}'::jsonb)::text + ) + ); +$function$ +; + +CREATE OR REPLACE FUNCTION pgstac.search_rows(_where text DEFAULT 'TRUE'::text, _orderby text DEFAULT 'datetime DESC, id DESC'::text, _limit integer DEFAULT 10) + RETURNS SETOF items + LANGUAGE plpgsql + SET search_path TO 'pgstac', 'public' +AS $function$ +DECLARE + base_query text; + query text; + sdate timestamptz; + edate timestamptz; + n int; + records_left int := _limit; + timer timestamptz := clock_timestamp(); + full_timer timestamptz := clock_timestamp(); +BEGIN +IF _where IS NULL OR trim(_where) = '' THEN + _where = ' TRUE '; +END IF; +RAISE NOTICE 'Getting chunks for % %', _where, _orderby; + +base_query := $q$ + SELECT * FROM items + WHERE + datetime >= %L AND datetime < %L + AND (%s) + ORDER BY %s + LIMIT %L +$q$; + +IF _orderby ILIKE 'datetime d%' THEN + FOR sdate, edate IN SELECT * FROM chunker(_where) ORDER BY 1 DESC LOOP + RAISE NOTICE 'Running Query for % to %. %', sdate, edate, age_ms(full_timer); + query := format( + base_query, + sdate, + edate, + _where, + _orderby, + records_left + ); + RAISE DEBUG 'QUERY: %', query; + timer := clock_timestamp(); + RETURN QUERY EXECUTE query; + + GET DIAGNOSTICS n = ROW_COUNT; + records_left := records_left - n; + RAISE NOTICE 'Returned %/% Rows From % to %. % to go. Time: %ms', n, _limit, sdate, edate, records_left, age_ms(timer); + timer := clock_timestamp(); + IF records_left <= 0 THEN + RAISE NOTICE 'SEARCH_ROWS TOOK %ms', age_ms(full_timer); + RETURN; + END IF; + END LOOP; +ELSIF _orderby ILIKE 'datetime a%' THEN + FOR sdate, edate IN SELECT * FROM chunker(_where) ORDER BY 1 ASC LOOP + RAISE NOTICE 'Running Query for % to %. %', sdate, edate, age_ms(full_timer); + query := format( + base_query, + sdate, + edate, + _where, + _orderby, + records_left + ); + RAISE DEBUG 'QUERY: %', query; + timer := clock_timestamp(); + RETURN QUERY EXECUTE query; + + GET DIAGNOSTICS n = ROW_COUNT; + records_left := records_left - n; + RAISE NOTICE 'Returned %/% Rows From % to %. % to go. Time: %ms', n, _limit, sdate, edate, records_left, age_ms(timer); + timer := clock_timestamp(); + IF records_left <= 0 THEN + RAISE NOTICE 'SEARCH_ROWS TOOK %ms', age_ms(full_timer); + RETURN; + END IF; + END LOOP; +ELSE + query := format($q$ + SELECT * FROM items + WHERE %s + ORDER BY %s + LIMIT %L + $q$, _where, _orderby, _limit + ); + RAISE DEBUG 'QUERY: %', query; + timer := clock_timestamp(); + RETURN QUERY EXECUTE query; + RAISE NOTICE 'FULL QUERY TOOK %ms', age_ms(timer); +END IF; +RAISE NOTICE 'SEARCH_ROWS TOOK %ms', age_ms(full_timer); +RETURN; +END; +$function$ +; + +CREATE OR REPLACE FUNCTION pgstac.unname_search(_name text) + RETURNS searches + LANGUAGE plpgsql + SECURITY DEFINER +AS $function$ +DECLARE + unnamed searches%ROWTYPE; +BEGIN + UPDATE searches + SET + name = NULL, + pinned = false, + lastused = now(), + usecount = searches.usecount + 1 + WHERE name = _name + RETURNING * INTO unnamed; + + IF unnamed IS NULL THEN + RAISE EXCEPTION 'Named search % not found', _name; + END IF; + + RETURN unnamed; +END; +$function$ +; + +CREATE OR REPLACE FUNCTION pgstac.unpin_search(_name text) + RETURNS searches + LANGUAGE plpgsql + SECURITY DEFINER +AS $function$ +DECLARE + unpinned_search searches%ROWTYPE; +BEGIN + UPDATE searches + SET + pinned = false, + lastused = now(), + usecount = searches.usecount + 1 + WHERE name = _name + RETURNING * INTO unpinned_search; + + IF unpinned_search IS NULL THEN + RAISE EXCEPTION 'Named search % not found', _name; + END IF; + + RETURN unpinned_search; +END; +$function$ +; + +CREATE OR REPLACE FUNCTION pgstac.where_stats(inhash text, inwhere text, updatestats boolean DEFAULT false, conf jsonb DEFAULT NULL::jsonb) + RETURNS searches + LANGUAGE plpgsql + SECURITY DEFINER +AS $function$ +DECLARE + t timestamptz; + i interval; + explain_json jsonb; + sw searches%ROWTYPE; + sw_statslastupdated timestamptz; + sw_estimated_count bigint; + sw_estimated_cost float; + _context text := lower(context(conf)); + _stats_ttl interval := context_stats_ttl(conf); + _estimated_cost_threshold float := context_estimated_cost(conf); + _estimated_count_threshold int := context_estimated_count(conf); + ro bool := pgstac.readonly(conf); +BEGIN + -- If updatestats is true then set ttl to 0 + IF updatestats THEN + RAISE DEBUG 'Updatestats set to TRUE, setting TTL to 0'; + _stats_ttl := '0'::interval; + END IF; + + -- If we don't need to calculate context, just return + IF _context = 'off' THEN + RETURN sw; + END IF; + + -- Read current stats state without holding row locks during expensive + -- estimate/count operations. + SELECT * INTO sw FROM searches WHERE hash = inhash; + + IF sw IS NULL THEN + -- In read-only mode, searches may not be persisted. Continue with + -- non-persistent estimate/count calculation so context can still be + -- returned to callers. + sw.hash := inhash; + sw._where := inwhere; + sw_statslastupdated := NULL; + ELSE + sw_statslastupdated := sw.statslastupdated; + END IF; + + -- If there is a cached row, figure out if we need to update + IF + sw IS NOT NULL + AND sw.statslastupdated IS NOT NULL + AND sw.context_count IS NOT NULL + AND now() - sw.statslastupdated <= _stats_ttl + THEN + -- We have a cached row with data that is within our ttl. + RAISE DEBUG 'Stats present in table and lastupdated within ttl: %', sw; + RAISE DEBUG 'Returning cached counts. %', sw; + RETURN sw; + END IF; + + -- Calculate estimated cost and rows + -- Use explain to get estimated count/cost + RAISE DEBUG 'Calculating estimated stats'; + t := clock_timestamp(); + EXECUTE format('EXPLAIN (format json) SELECT 1 FROM items WHERE %s', inwhere) + INTO explain_json; + RAISE DEBUG 'Time for just the explain: %', clock_timestamp() - t; + i := clock_timestamp() - t; + + sw_estimated_count := (explain_json->0->'Plan'->>'Plan Rows')::bigint; + sw_estimated_cost := (explain_json->0->'Plan'->>'Total Cost')::float; + + RAISE DEBUG 'ESTIMATED_COUNT: %, THRESHOLD %', sw_estimated_count, _estimated_count_threshold; + RAISE DEBUG 'ESTIMATED_COST: %, THRESHOLD %', sw_estimated_cost, _estimated_cost_threshold; + + -- If context is set to auto and the costs are within the threshold return the estimated costs + IF + _context = 'auto' + AND sw_estimated_count >= _estimated_count_threshold + AND sw_estimated_cost >= _estimated_cost_threshold + THEN + sw.context_count := sw_estimated_count; + IF NOT ro THEN + UPDATE searches SET + statslastupdated = now(), + context_count = sw.context_count + WHERE + hash = inhash + AND statslastupdated IS NOT DISTINCT FROM sw_statslastupdated + RETURNING * INTO sw; + + IF sw IS NULL THEN + SELECT * INTO sw FROM searches WHERE hash = inhash; + END IF; + END IF; + RAISE DEBUG 'Estimates are within thresholds, returning estimates. %', sw; + RETURN sw; + END IF; + + -- Calculate Actual Count + t := clock_timestamp(); + RAISE NOTICE 'Calculating actual count...'; + EXECUTE format( + 'SELECT count(*) FROM items WHERE %s', + inwhere + ) INTO sw.context_count; + i := clock_timestamp() - t; + RAISE NOTICE 'Actual Count: % -- %', sw.context_count, i; + + IF NOT ro THEN + UPDATE searches SET + statslastupdated = now(), + context_count = sw.context_count + WHERE + hash = inhash + AND statslastupdated IS NOT DISTINCT FROM sw_statslastupdated + RETURNING * INTO sw; + + IF sw IS NULL THEN + SELECT * INTO sw FROM searches WHERE hash = inhash; + END IF; + END IF; + RAISE DEBUG 'Returning with actual count. %', sw; + RETURN sw; +END; +$function$ +; + +CREATE OR REPLACE FUNCTION pgstac.content_dehydrate(content jsonb) + RETURNS items + LANGUAGE sql + STABLE +AS $function$ + SELECT + content->>'id' as id, + stac_geom(content) as geometry, + content->>'collection' as collection, + stac_datetime(content) as datetime, + stac_end_datetime(content) as end_datetime, + strip_jsonb( + content - '{id,geometry,collection,type}'::text[], + collection_base_item(content->>'collection') + ) - '{id,geometry,collection,type}'::text[] as content, + null::jsonb as private + ; +$function$ +; + +CREATE OR REPLACE FUNCTION pgstac.search(_search jsonb DEFAULT '{}'::jsonb) + RETURNS jsonb + LANGUAGE plpgsql +AS $function$ +DECLARE + searches searches%ROWTYPE; + _where text; + orderby text; + search_where searches%ROWTYPE; + total_count bigint; + token record; + token_prev boolean; + token_item items%ROWTYPE; + token_where text; + full_where text; + init_ts timestamptz := clock_timestamp(); + timer timestamptz := clock_timestamp(); + hydrate bool := NOT (_search->'conf'->>'nohydrate' IS NOT NULL AND (_search->'conf'->>'nohydrate')::boolean = true); + prev text; + next text; + collection jsonb; + out_records jsonb; + out_len int; + _limit int := coalesce((_search->>'limit')::int, 10); + _querylimit int; + _fields jsonb := coalesce(_search->'fields', '{}'::jsonb); + has_prev boolean := FALSE; + has_next boolean := FALSE; + links jsonb := '[]'::jsonb; + base_url text:= concat(rtrim(base_url(_search->'conf'),'/')); +BEGIN + searches := search_query(_search); + _where := searches._where; + orderby := searches.orderby; + search_where := where_stats(searches.hash, _where, false, _search->'conf'); + total_count := search_where.context_count; + RAISE NOTICE 'SEARCH:TOKEN: %', _search->>'token'; + token := get_token_record(_search->>'token'); + RAISE NOTICE '***TOKEN: %', token; + _querylimit := _limit + 1; + IF token IS NOT NULL THEN + token_prev := token.prev; + token_item := token.item; + token_where := get_token_filter(_search->'sortby', token_item, token_prev, FALSE); + RAISE DEBUG 'TOKEN_WHERE: % (%ms from search start)', token_where, age_ms(timer); + IF token_prev THEN -- if we are using a prev token, we know has_next is true + RAISE DEBUG 'There is a previous token, so automatically setting has_next to true'; + has_next := TRUE; + orderby := sort_sqlorderby(_search, TRUE); + ELSE + RAISE DEBUG 'There is a next token, so automatically setting has_prev to true'; + has_prev := TRUE; + + END IF; + ELSE -- if there was no token, we know there is no prev + RAISE DEBUG 'There is no token, so we know there is no prev. setting has_prev to false'; + has_prev := FALSE; + END IF; + + full_where := concat_ws(' AND ', _where, token_where); + RAISE NOTICE 'FULL WHERE CLAUSE: %', full_where; + RAISE NOTICE 'Time to get counts and build query %', age_ms(timer); + timer := clock_timestamp(); + + IF hydrate THEN + RAISE NOTICE 'Getting hydrated data.'; + ELSE + RAISE NOTICE 'Getting non-hydrated data.'; + END IF; + RAISE NOTICE 'CACHE SET TO %', get_setting_bool('format_cache'); + RAISE NOTICE 'Time to set hydration/formatting %', age_ms(timer); + timer := clock_timestamp(); + SELECT jsonb_agg(format_item(i, _fields, hydrate)) INTO out_records + FROM search_rows( + full_where, + orderby, + _querylimit + ) as i; + + RAISE NOTICE 'Time to fetch rows %', age_ms(timer); + timer := clock_timestamp(); + + + IF token_prev THEN + out_records := flip_jsonb_array(out_records); + END IF; + + RAISE NOTICE 'Query returned % records.', jsonb_array_length(out_records); + RAISE DEBUG 'TOKEN: % %', token_item.id, token_item.collection; + RAISE DEBUG 'RECORD_1: % %', out_records->0->>'id', out_records->0->>'collection'; + RAISE DEBUG 'RECORD-1: % %', out_records->-1->>'id', out_records->-1->>'collection'; + + -- REMOVE records that were from our token + IF out_records->0->>'id' = token_item.id AND out_records->0->>'collection' = token_item.collection THEN + out_records := out_records - 0; + ELSIF out_records->-1->>'id' = token_item.id AND out_records->-1->>'collection' = token_item.collection THEN + out_records := out_records - -1; + END IF; + + out_len := jsonb_array_length(out_records); + + IF out_len = _limit + 1 THEN + IF token_prev THEN + has_prev := TRUE; + out_records := out_records - 0; + ELSE + has_next := TRUE; + out_records := out_records - -1; + END IF; + END IF; + + + links := links || jsonb_build_object( + 'rel', 'root', + 'type', 'application/json', + 'href', base_url + ) || jsonb_build_object( + 'rel', 'self', + 'type', 'application/json', + 'href', concat(base_url, '/search') + ); + + IF has_next THEN + next := concat(out_records->-1->>'collection', ':', out_records->-1->>'id'); + RAISE NOTICE 'HAS NEXT | %', next; + links := links || jsonb_build_object( + 'rel', 'next', + 'type', 'application/geo+json', + 'method', 'GET', + 'href', concat(base_url, '/search?token=next:', next) + ); + END IF; + + IF has_prev THEN + prev := concat(out_records->0->>'collection', ':', out_records->0->>'id'); + RAISE NOTICE 'HAS PREV | %', prev; + links := links || jsonb_build_object( + 'rel', 'prev', + 'type', 'application/geo+json', + 'method', 'GET', + 'href', concat(base_url, '/search?token=prev:', prev) + ); + END IF; + + RAISE NOTICE 'Time to get prev/next %', age_ms(timer); + timer := clock_timestamp(); + + + collection := jsonb_build_object( + 'type', 'FeatureCollection', + 'features', coalesce(out_records, '[]'::jsonb), + 'links', links + ); + + + + IF context(_search->'conf') != 'off' THEN + collection := collection || jsonb_strip_nulls(jsonb_build_object( + 'numberMatched', total_count, + 'numberReturned', coalesce(jsonb_array_length(out_records), 0) + )); + ELSE + collection := collection || jsonb_strip_nulls(jsonb_build_object( + 'numberReturned', coalesce(jsonb_array_length(out_records), 0) + )); + END IF; + + IF get_setting_bool('timing', _search->'conf') THEN + collection = collection || jsonb_build_object('timing', age_ms(init_ts)); + END IF; + + RAISE NOTICE 'Time to build final json %', age_ms(timer); + timer := clock_timestamp(); + + RAISE NOTICE 'Total Time: %', age_ms(current_timestamp); + RAISE NOTICE 'RETURNING % records. NEXT: %. PREV: %', collection->>'numberReturned', collection->>'next', collection->>'prev'; + RETURN collection; +END; +$function$ +; + +CREATE OR REPLACE FUNCTION pgstac.search_fromhash(_hash text) + RETURNS searches + LANGUAGE sql + STRICT +AS $function$ + SELECT * FROM searches WHERE hash = _hash LIMIT 1; +$function$ +; + +CREATE OR REPLACE FUNCTION pgstac.search_query(_search jsonb DEFAULT '{}'::jsonb, updatestats boolean DEFAULT false, _metadata jsonb DEFAULT '{}'::jsonb) + RETURNS searches + LANGUAGE plpgsql + SECURITY DEFINER +AS $function$ +DECLARE + search searches%ROWTYPE; + cached_search searches%ROWTYPE; + search_where searches%ROWTYPE; + ro boolean := pgstac.readonly(); +BEGIN + RAISE NOTICE 'SEARCH: %', _search; + -- Calculate hash, where clause, and order by statement + search.search := _search; + search.metadata := _metadata; + search._where := stac_search_to_where(_search); + search.hash := search_hash_from_where(search._where, search.metadata); + search.orderby := sort_sqlorderby(_search); + search.lastused := now(); + search.usecount := 1; + + -- If we are in read only mode, directly return search + IF ro THEN + RETURN search; + END IF; + + -- Cache bookkeeping is best-effort and non-blocking. We always return + -- canonical hash + where, even if cache touch cannot be acquired quickly. + UPDATE searches + SET + lastused = now(), + usecount = searches.usecount + 1 + WHERE ctid = ( + SELECT ctid + FROM searches + WHERE hash = search.hash + FOR UPDATE SKIP LOCKED + LIMIT 1 + ) + RETURNING * INTO cached_search; + + IF cached_search IS NULL THEN + IF pg_try_advisory_xact_lock(hashtext(search.hash)) THEN + INSERT INTO searches (hash, search, _where, orderby, lastused, usecount, metadata) + VALUES (search.hash, search.search, search._where, search.orderby, now(), 1, search.metadata) + ON CONFLICT (hash) DO UPDATE SET + lastused = EXCLUDED.lastused, + usecount = searches.usecount + 1 + RETURNING * INTO cached_search; + END IF; + + IF cached_search IS NULL THEN + SELECT * INTO cached_search FROM searches WHERE hash = search.hash; + END IF; + END IF; + + IF cached_search IS NOT NULL THEN + cached_search._where = search._where; + cached_search.orderby = search.orderby; + IF updatestats THEN + search_where := where_stats( + cached_search.hash, + cached_search._where, + true, + _search->'conf' + ); + cached_search.context_count := search_where.context_count; + cached_search.statslastupdated := search_where.statslastupdated; + END IF; + RETURN cached_search; + END IF; + + IF updatestats THEN + search_where := where_stats( + search.hash, + search._where, + true, + _search->'conf' + ); + search.context_count := search_where.context_count; + search.statslastupdated := search_where.statslastupdated; + END IF; + + RETURN search; + +END; +$function$ +; DO $$ BEGIN INSERT INTO queryables (name, definition, property_wrapper, property_index_type) VALUES @@ -233,6 +1006,7 @@ INSERT INTO pgstac_settings (name, value) VALUES ('context_estimated_count', '100000'), ('context_estimated_cost', '100000'), ('context_stats_ttl', '1 day'), + ('search_gc_retention_interval', '7 days'), ('default_filter_lang', 'cql2-json'), ('additional_properties', 'true'), ('use_queue', 'false'), @@ -292,8 +1066,15 @@ ALTER FUNCTION drop_table_constraints SECURITY DEFINER; ALTER FUNCTION create_table_constraints SECURITY DEFINER; ALTER FUNCTION check_partition SECURITY DEFINER; ALTER FUNCTION repartition SECURITY DEFINER; -ALTER FUNCTION where_stats SECURITY DEFINER; +ALTER FUNCTION where_stats(text, text, boolean, jsonb) SECURITY DEFINER; ALTER FUNCTION search_query SECURITY DEFINER; +ALTER FUNCTION name_search SECURITY DEFINER; +ALTER FUNCTION rename_search SECURITY DEFINER; +ALTER FUNCTION unname_search SECURITY DEFINER; +ALTER FUNCTION pin_search SECURITY DEFINER; +ALTER FUNCTION unpin_search SECURITY DEFINER; +ALTER FUNCTION gc_anonymous_searches(interval, jsonb) SECURITY DEFINER; +ALTER FUNCTION gc_search_caches(interval, jsonb) SECURITY DEFINER; ALTER FUNCTION format_item SECURITY DEFINER; ALTER FUNCTION maintain_index SECURITY DEFINER; diff --git a/src/pgstac/migrations/pgstac--unreleased.sql b/src/pgstac/migrations/pgstac--unreleased.sql index e12c4bd6..60426185 100644 --- a/src/pgstac/migrations/pgstac--unreleased.sql +++ b/src/pgstac/migrations/pgstac--unreleased.sql @@ -282,6 +282,10 @@ CREATE OR REPLACE FUNCTION context_stats_ttl(conf jsonb DEFAULT NULL) RETURNS in SELECT pgstac.get_setting('context_stats_ttl', conf)::interval; $$ LANGUAGE SQL; +CREATE OR REPLACE FUNCTION search_gc_retention_interval(conf jsonb DEFAULT NULL) RETURNS interval AS $$ + SELECT pgstac.get_setting('search_gc_retention_interval', conf)::interval; +$$ LANGUAGE SQL; + CREATE OR REPLACE FUNCTION t2s(text) RETURNS text AS $$ SELECT extract(epoch FROM $1::interval)::text || ' s'; $$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE STRICT; @@ -290,7 +294,6 @@ CREATE OR REPLACE FUNCTION age_ms(a timestamptz, b timestamptz DEFAULT clock_tim SELECT abs(extract(epoch from age(a,b)) * 1000); $$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE; - CREATE OR REPLACE FUNCTION queue_timeout() RETURNS interval AS $$ SELECT t2s(coalesce( get_setting('queue_timeout'), @@ -2119,10 +2122,6 @@ FOR EACH STATEMENT EXECUTE FUNCTION partition_after_triggerfunc(); -CREATE OR REPLACE FUNCTION content_slim(_item jsonb) RETURNS jsonb AS $$ - SELECT strip_jsonb(_item - '{id,geometry,collection,type}'::text[], collection_base_item(_item->>'collection')) - '{id,geometry,collection,type}'::text[]; -$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE; - CREATE OR REPLACE FUNCTION content_dehydrate(content jsonb) RETURNS items AS $$ SELECT content->>'id' as id, @@ -2130,7 +2129,10 @@ CREATE OR REPLACE FUNCTION content_dehydrate(content jsonb) RETURNS items AS $$ content->>'collection' as collection, stac_datetime(content) as datetime, stac_end_datetime(content) as end_datetime, - content_slim(content) as content, + strip_jsonb( + content - '{id,geometry,collection,type}'::text[], + collection_base_item(content->>'collection') + ) - '{id,geometry,collection,type}'::text[] as content, null::jsonb as private ; $$ LANGUAGE SQL STABLE; @@ -3522,50 +3524,74 @@ BEGIN $$ LANGUAGE PLPGSQL SET transform_null_equals TO TRUE ; -CREATE OR REPLACE FUNCTION search_hash(jsonb, jsonb) RETURNS text AS $$ - SELECT md5(concat(($1 - '{token,limit,context,includes,excludes}'::text[])::text,$2::text)); +-- ============================================================================ +-- Search Hashing +-- ============================================================================ + +CREATE OR REPLACE FUNCTION pgstac_hash(data text) RETURNS text AS $$ + SELECT encode(sha256(convert_to(data, 'UTF8')), 'hex'); +$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE STRICT; + +-- Central hash helper: one canonical where-clause + metadata payload to hash. +CREATE OR REPLACE FUNCTION search_hash_from_where(_where text, _metadata jsonb DEFAULT '{}'::jsonb) RETURNS text AS $$ + SELECT pgstac_hash( + format( + '%s|%s', + _where, + coalesce(_metadata, '{}'::jsonb)::text + ) + ); $$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE; -DROP FUNCTION IF EXISTS search_tohash(jsonb); +CREATE OR REPLACE FUNCTION search_hash(_search jsonb, _metadata jsonb DEFAULT '{}'::jsonb) RETURNS text AS $$ + SELECT search_hash_from_where( + stac_search_to_where(_search), + _metadata + ); +$$ LANGUAGE SQL STABLE PARALLEL SAFE; + +-- ============================================================================ +-- Search Cache Table +-- ============================================================================ + +-- Search lifecycle and context cache now live on searches; search_wheres is retired. CREATE TABLE IF NOT EXISTS searches( - hash text GENERATED ALWAYS AS (search_hash(search, metadata)) STORED PRIMARY KEY, + hash text PRIMARY KEY, + name text UNIQUE, search jsonb NOT NULL, _where text, orderby text, lastused timestamptz DEFAULT now(), usecount bigint DEFAULT 0, - metadata jsonb DEFAULT '{}'::jsonb NOT NULL -); - -CREATE TABLE IF NOT EXISTS search_wheres( - id bigint generated always as identity primary key, - _where text NOT NULL, - lastused timestamptz DEFAULT now(), - usecount bigint DEFAULT 0, + metadata jsonb DEFAULT '{}'::jsonb NOT NULL, + pinned boolean NOT NULL DEFAULT false, + created_at timestamptz DEFAULT now(), statslastupdated timestamptz, - estimated_count bigint, - estimated_cost float, - time_to_estimate float, - total_count bigint, - time_to_count float, - partitions text[] + context_count bigint ); +CREATE INDEX IF NOT EXISTS searches_lastused_anon_idx + ON searches (lastused) WHERE name IS NULL AND NOT pinned; -CREATE INDEX IF NOT EXISTS search_wheres_partitions ON search_wheres USING GIN (partitions); -CREATE UNIQUE INDEX IF NOT EXISTS search_wheres_where ON search_wheres ((md5(_where))); +DROP TABLE IF EXISTS search_wheres; + +-- ============================================================================ +-- Context Stats (estimate/count/TTL) +-- ============================================================================ CREATE OR REPLACE FUNCTION where_stats( + inhash text, inwhere text, updatestats boolean default false, conf jsonb default null -) RETURNS search_wheres AS $$ +) RETURNS searches AS $$ DECLARE t timestamptz; i interval; explain_json jsonb; - partitions text[]; - sw search_wheres%ROWTYPE; - inwhere_hash text := md5(inwhere); + sw searches%ROWTYPE; + sw_statslastupdated timestamptz; + sw_estimated_count bigint; + sw_estimated_cost float; _context text := lower(context(conf)); _stats_ttl interval := context_stats_ttl(conf); _estimated_cost_threshold float := context_estimated_cost(conf); @@ -3580,96 +3606,71 @@ BEGIN -- If we don't need to calculate context, just return IF _context = 'off' THEN - sw._where = inwhere; RETURN sw; END IF; - -- Get any stats that we have. - IF NOT ro THEN - -- If there is a lock where another process is - -- updating the stats, wait so that we don't end up calculating a bunch of times. - SELECT * INTO sw FROM search_wheres WHERE md5(_where)=inwhere_hash FOR UPDATE; + -- Read current stats state without holding row locks during expensive + -- estimate/count operations. + SELECT * INTO sw FROM searches WHERE hash = inhash; + + IF sw IS NULL THEN + -- In read-only mode, searches may not be persisted. Continue with + -- non-persistent estimate/count calculation so context can still be + -- returned to callers. + sw.hash := inhash; + sw._where := inwhere; + sw_statslastupdated := NULL; ELSE - SELECT * INTO sw FROM search_wheres WHERE md5(_where)=inwhere_hash; + sw_statslastupdated := sw.statslastupdated; END IF; -- If there is a cached row, figure out if we need to update IF sw IS NOT NULL AND sw.statslastupdated IS NOT NULL - AND sw.total_count IS NOT NULL + AND sw.context_count IS NOT NULL AND now() - sw.statslastupdated <= _stats_ttl THEN - -- we have a cached row with data that is within our ttl + -- We have a cached row with data that is within our ttl. RAISE DEBUG 'Stats present in table and lastupdated within ttl: %', sw; - IF NOT ro THEN - RAISE DEBUG 'Updating search_wheres only bumping lastused and usecount'; - UPDATE search_wheres SET - lastused = now(), - usecount = search_wheres.usecount + 1 - WHERE md5(_where) = inwhere_hash - RETURNING * INTO sw; - END IF; RAISE DEBUG 'Returning cached counts. %', sw; RETURN sw; END IF; -- Calculate estimated cost and rows -- Use explain to get estimated count/cost - IF sw.estimated_count IS NULL OR sw.estimated_cost IS NULL THEN - RAISE DEBUG 'Calculating estimated stats'; - t := clock_timestamp(); - EXECUTE format('EXPLAIN (format json) SELECT 1 FROM items WHERE %s', inwhere) - INTO explain_json; - RAISE DEBUG 'Time for just the explain: %', clock_timestamp() - t; - i := clock_timestamp() - t; + RAISE DEBUG 'Calculating estimated stats'; + t := clock_timestamp(); + EXECUTE format('EXPLAIN (format json) SELECT 1 FROM items WHERE %s', inwhere) + INTO explain_json; + RAISE DEBUG 'Time for just the explain: %', clock_timestamp() - t; + i := clock_timestamp() - t; - sw.estimated_count := explain_json->0->'Plan'->'Plan Rows'; - sw.estimated_cost := explain_json->0->'Plan'->'Total Cost'; - sw.time_to_estimate := extract(epoch from i); - END IF; + sw_estimated_count := (explain_json->0->'Plan'->>'Plan Rows')::bigint; + sw_estimated_cost := (explain_json->0->'Plan'->>'Total Cost')::float; - RAISE DEBUG 'ESTIMATED_COUNT: %, THRESHOLD %', sw.estimated_count, _estimated_count_threshold; - RAISE DEBUG 'ESTIMATED_COST: %, THRESHOLD %', sw.estimated_cost, _estimated_cost_threshold; + RAISE DEBUG 'ESTIMATED_COUNT: %, THRESHOLD %', sw_estimated_count, _estimated_count_threshold; + RAISE DEBUG 'ESTIMATED_COST: %, THRESHOLD %', sw_estimated_cost, _estimated_cost_threshold; -- If context is set to auto and the costs are within the threshold return the estimated costs IF _context = 'auto' - AND sw.estimated_count >= _estimated_count_threshold - AND sw.estimated_cost >= _estimated_cost_threshold + AND sw_estimated_count >= _estimated_count_threshold + AND sw_estimated_cost >= _estimated_cost_threshold THEN + sw.context_count := sw_estimated_count; IF NOT ro THEN - INSERT INTO search_wheres ( - _where, - lastused, - usecount, - statslastupdated, - estimated_count, - estimated_cost, - time_to_estimate, - total_count, - time_to_count - ) VALUES ( - inwhere, - now(), - 1, - now(), - sw.estimated_count, - sw.estimated_cost, - sw.time_to_estimate, - null, - null - ) ON CONFLICT ((md5(_where))) - DO UPDATE SET - lastused = EXCLUDED.lastused, - usecount = search_wheres.usecount + 1, - statslastupdated = EXCLUDED.statslastupdated, - estimated_count = EXCLUDED.estimated_count, - estimated_cost = EXCLUDED.estimated_cost, - time_to_estimate = EXCLUDED.time_to_estimate, - total_count = EXCLUDED.total_count, - time_to_count = EXCLUDED.time_to_count + UPDATE searches SET + statslastupdated = now(), + context_count = sw.context_count + WHERE + hash = inhash + AND statslastupdated IS NOT DISTINCT FROM sw_statslastupdated RETURNING * INTO sw; + + IF sw IS NULL THEN + SELECT * INTO sw FROM searches WHERE hash = inhash; + END IF; END IF; RAISE DEBUG 'Estimates are within thresholds, returning estimates. %', sw; RETURN sw; @@ -3681,43 +3682,22 @@ BEGIN EXECUTE format( 'SELECT count(*) FROM items WHERE %s', inwhere - ) INTO sw.total_count; + ) INTO sw.context_count; i := clock_timestamp() - t; - RAISE NOTICE 'Actual Count: % -- %', sw.total_count, i; - sw.time_to_count := extract(epoch FROM i); + RAISE NOTICE 'Actual Count: % -- %', sw.context_count, i; IF NOT ro THEN - INSERT INTO search_wheres ( - _where, - lastused, - usecount, - statslastupdated, - estimated_count, - estimated_cost, - time_to_estimate, - total_count, - time_to_count - ) VALUES ( - inwhere, - now(), - 1, - now(), - sw.estimated_count, - sw.estimated_cost, - sw.time_to_estimate, - sw.total_count, - sw.time_to_count - ) ON CONFLICT ((md5(_where))) - DO UPDATE SET - lastused = EXCLUDED.lastused, - usecount = search_wheres.usecount + 1, - statslastupdated = EXCLUDED.statslastupdated, - estimated_count = EXCLUDED.estimated_count, - estimated_cost = EXCLUDED.estimated_cost, - time_to_estimate = EXCLUDED.time_to_estimate, - total_count = EXCLUDED.total_count, - time_to_count = EXCLUDED.time_to_count + UPDATE searches SET + statslastupdated = now(), + context_count = sw.context_count + WHERE + hash = inhash + AND statslastupdated IS NOT DISTINCT FROM sw_statslastupdated RETURNING * INTO sw; + + IF sw IS NULL THEN + SELECT * INTO sw FROM searches WHERE hash = inhash; + END IF; END IF; RAISE DEBUG 'Returning with actual count. %', sw; RETURN sw; @@ -3725,6 +3705,12 @@ END; $$ LANGUAGE PLPGSQL SECURITY DEFINER; +-- ============================================================================ +-- Search Cache Lifecycle (create, name, pin, GC) +-- ============================================================================ + +DROP FUNCTION IF EXISTS search_query(jsonb, boolean, jsonb); + CREATE OR REPLACE FUNCTION search_query( _search jsonb = '{}'::jsonb, updatestats boolean = false, @@ -3733,20 +3719,15 @@ CREATE OR REPLACE FUNCTION search_query( DECLARE search searches%ROWTYPE; cached_search searches%ROWTYPE; - pexplain jsonb; - t timestamptz; - i interval; - doupdate boolean := FALSE; - insertfound boolean := FALSE; + search_where searches%ROWTYPE; ro boolean := pgstac.readonly(); - found_search text; BEGIN RAISE NOTICE 'SEARCH: %', _search; -- Calculate hash, where clause, and order by statement search.search := _search; search.metadata := _metadata; - search.hash := search_hash(_search, _metadata); search._where := stac_search_to_where(_search); + search.hash := search_hash_from_where(search._where, search.metadata); search.orderby := sort_sqlorderby(_search); search.lastused := now(); search.usecount := 1; @@ -3756,31 +3737,63 @@ BEGIN RETURN search; END IF; - RAISE NOTICE 'Updating Statistics for search: %s', search; - -- Update statistics for times used and and when last used - -- If the entry is locked, rather than waiting, skip updating the stats - INSERT INTO searches (search, lastused, usecount, metadata) - VALUES (search.search, now(), 1, search.metadata) - ON CONFLICT DO NOTHING - RETURNING * INTO cached_search - ; + -- Cache bookkeeping is best-effort and non-blocking. We always return + -- canonical hash + where, even if cache touch cannot be acquired quickly. + UPDATE searches + SET + lastused = now(), + usecount = searches.usecount + 1 + WHERE ctid = ( + SELECT ctid + FROM searches + WHERE hash = search.hash + FOR UPDATE SKIP LOCKED + LIMIT 1 + ) + RETURNING * INTO cached_search; + + IF cached_search IS NULL THEN + IF pg_try_advisory_xact_lock(hashtext(search.hash)) THEN + INSERT INTO searches (hash, search, _where, orderby, lastused, usecount, metadata) + VALUES (search.hash, search.search, search._where, search.orderby, now(), 1, search.metadata) + ON CONFLICT (hash) DO UPDATE SET + lastused = EXCLUDED.lastused, + usecount = searches.usecount + 1 + RETURNING * INTO cached_search; + END IF; - IF NOT FOUND OR cached_search IS NULL THEN - UPDATE searches SET - lastused = now(), - usecount = searches.usecount + 1 - WHERE hash = ( - SELECT hash FROM searches WHERE hash=search.hash FOR UPDATE SKIP LOCKED - ) - RETURNING * INTO cached_search - ; + IF cached_search IS NULL THEN + SELECT * INTO cached_search FROM searches WHERE hash = search.hash; + END IF; END IF; IF cached_search IS NOT NULL THEN cached_search._where = search._where; cached_search.orderby = search.orderby; + IF updatestats THEN + search_where := where_stats( + cached_search.hash, + cached_search._where, + true, + _search->'conf' + ); + cached_search.context_count := search_where.context_count; + cached_search.statslastupdated := search_where.statslastupdated; + END IF; RETURN cached_search; END IF; + + IF updatestats THEN + search_where := where_stats( + search.hash, + search._where, + true, + _search->'conf' + ); + search.context_count := search_where.context_count; + search.statslastupdated := search_where.statslastupdated; + END IF; + RETURN search; END; @@ -3789,13 +3802,153 @@ $$ LANGUAGE PLPGSQL SECURITY DEFINER; CREATE OR REPLACE FUNCTION search_fromhash( _hash text ) RETURNS searches AS $$ - SELECT * FROM search_query((SELECT search FROM searches WHERE hash=_hash LIMIT 1)); + SELECT * FROM searches WHERE hash = _hash LIMIT 1; $$ LANGUAGE SQL STRICT; +CREATE OR REPLACE FUNCTION name_search( + _search jsonb, + _name text, + _metadata jsonb DEFAULT '{}'::jsonb +) RETURNS searches AS $$ +DECLARE + named searches%ROWTYPE; +BEGIN + named := search_query(_search, false, _metadata); + UPDATE searches + SET + name = _name, + lastused = now(), + usecount = searches.usecount + 1 + WHERE hash = named.hash + RETURNING * INTO named; + + IF named IS NULL THEN + RAISE EXCEPTION 'Could not name search for input: %', _search; + END IF; + + RETURN named; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +CREATE OR REPLACE FUNCTION rename_search(_old_name text, _new_name text) RETURNS searches AS $$ +DECLARE + renamed searches%ROWTYPE; +BEGIN + -- Serialize rename-pair operations to avoid deadlocks on concurrent name swaps. + PERFORM pg_advisory_xact_lock( + hashtext( + least(_old_name, _new_name) + || '|' + || greatest(_old_name, _new_name) + ) + ); + + UPDATE searches + SET + name = _new_name, + lastused = now(), + usecount = searches.usecount + 1 + WHERE name = _old_name + RETURNING * INTO renamed; + + IF renamed IS NULL THEN + RAISE EXCEPTION 'Named search % not found', _old_name; + END IF; + + RETURN renamed; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +CREATE OR REPLACE FUNCTION unname_search(_name text) RETURNS searches AS $$ +DECLARE + unnamed searches%ROWTYPE; +BEGIN + UPDATE searches + SET + name = NULL, + pinned = false, + lastused = now(), + usecount = searches.usecount + 1 + WHERE name = _name + RETURNING * INTO unnamed; + + IF unnamed IS NULL THEN + RAISE EXCEPTION 'Named search % not found', _name; + END IF; + + RETURN unnamed; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +CREATE OR REPLACE FUNCTION pin_search(_name text) RETURNS searches AS $$ +DECLARE + pinned_search searches%ROWTYPE; +BEGIN + UPDATE searches + SET + pinned = true, + lastused = now(), + usecount = searches.usecount + 1 + WHERE name = _name + RETURNING * INTO pinned_search; + + IF pinned_search IS NULL THEN + RAISE EXCEPTION 'Named search % not found', _name; + END IF; + + RETURN pinned_search; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +CREATE OR REPLACE FUNCTION unpin_search(_name text) RETURNS searches AS $$ +DECLARE + unpinned_search searches%ROWTYPE; +BEGIN + UPDATE searches + SET + pinned = false, + lastused = now(), + usecount = searches.usecount + 1 + WHERE name = _name + RETURNING * INTO unpinned_search; + + IF unpinned_search IS NULL THEN + RAISE EXCEPTION 'Named search % not found', _name; + END IF; + + RETURN unpinned_search; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +CREATE OR REPLACE FUNCTION gc_anonymous_searches(retention_interval interval DEFAULT NULL, conf jsonb DEFAULT NULL) RETURNS bigint AS $$ + WITH effective_retention AS ( + SELECT COALESCE( + retention_interval, + search_gc_retention_interval(conf) + ) AS i + ), + deleted AS ( + DELETE FROM searches + USING effective_retention + WHERE + name IS NULL + AND NOT pinned + AND lastused < now() - effective_retention.i + RETURNING 1 + ) + SELECT count(*)::bigint FROM deleted; +$$ LANGUAGE SQL SECURITY DEFINER; + +CREATE OR REPLACE FUNCTION gc_search_caches(retention_interval interval DEFAULT NULL, conf jsonb DEFAULT NULL) RETURNS jsonb AS $$ + SELECT jsonb_build_object( + 'removed_searches', + gc_anonymous_searches(retention_interval, conf) + ); +$$ LANGUAGE SQL SECURITY DEFINER; + CREATE OR REPLACE FUNCTION search_rows( IN _where text DEFAULT 'TRUE', IN _orderby text DEFAULT 'datetime DESC, id DESC', - IN partitions text[] DEFAULT NULL, IN _limit int DEFAULT 10 ) RETURNS SETOF items AS $$ DECLARE @@ -3932,13 +4085,14 @@ BEGIN END; $$ LANGUAGE PLPGSQL SECURITY DEFINER; +DROP FUNCTION IF EXISTS search(jsonb); CREATE OR REPLACE FUNCTION search(_search jsonb = '{}'::jsonb) RETURNS jsonb AS $$ DECLARE searches searches%ROWTYPE; _where text; orderby text; - search_where search_wheres%ROWTYPE; + search_where searches%ROWTYPE; total_count bigint; token record; token_prev boolean; @@ -3950,7 +4104,6 @@ DECLARE hydrate bool := NOT (_search->'conf'->>'nohydrate' IS NOT NULL AND (_search->'conf'->>'nohydrate')::boolean = true); prev text; next text; - context jsonb; collection jsonb; out_records jsonb; out_len int; @@ -3965,8 +4118,8 @@ BEGIN searches := search_query(_search); _where := searches._where; orderby := searches.orderby; - search_where := where_stats(_where); - total_count := coalesce(search_where.total_count, search_where.estimated_count); + search_where := where_stats(searches.hash, _where, false, _search->'conf'); + total_count := search_where.context_count; RAISE NOTICE 'SEARCH:TOKEN: %', _search->>'token'; token := get_token_record(_search->>'token'); RAISE NOTICE '***TOKEN: %', token; @@ -4007,7 +4160,6 @@ BEGIN FROM search_rows( full_where, orderby, - search_where.partitions, _querylimit ) as i; @@ -4613,6 +4765,7 @@ INSERT INTO pgstac_settings (name, value) VALUES ('context_estimated_count', '100000'), ('context_estimated_cost', '100000'), ('context_stats_ttl', '1 day'), + ('search_gc_retention_interval', '7 days'), ('default_filter_lang', 'cql2-json'), ('additional_properties', 'true'), ('use_queue', 'false'), @@ -4672,8 +4825,15 @@ ALTER FUNCTION drop_table_constraints SECURITY DEFINER; ALTER FUNCTION create_table_constraints SECURITY DEFINER; ALTER FUNCTION check_partition SECURITY DEFINER; ALTER FUNCTION repartition SECURITY DEFINER; -ALTER FUNCTION where_stats SECURITY DEFINER; +ALTER FUNCTION where_stats(text, text, boolean, jsonb) SECURITY DEFINER; ALTER FUNCTION search_query SECURITY DEFINER; +ALTER FUNCTION name_search SECURITY DEFINER; +ALTER FUNCTION rename_search SECURITY DEFINER; +ALTER FUNCTION unname_search SECURITY DEFINER; +ALTER FUNCTION pin_search SECURITY DEFINER; +ALTER FUNCTION unpin_search SECURITY DEFINER; +ALTER FUNCTION gc_anonymous_searches(interval, jsonb) SECURITY DEFINER; +ALTER FUNCTION gc_search_caches(interval, jsonb) SECURITY DEFINER; ALTER FUNCTION format_item SECURITY DEFINER; ALTER FUNCTION maintain_index SECURITY DEFINER; diff --git a/src/pgstac/pgstac.sql b/src/pgstac/pgstac.sql index e12c4bd6..60426185 100644 --- a/src/pgstac/pgstac.sql +++ b/src/pgstac/pgstac.sql @@ -282,6 +282,10 @@ CREATE OR REPLACE FUNCTION context_stats_ttl(conf jsonb DEFAULT NULL) RETURNS in SELECT pgstac.get_setting('context_stats_ttl', conf)::interval; $$ LANGUAGE SQL; +CREATE OR REPLACE FUNCTION search_gc_retention_interval(conf jsonb DEFAULT NULL) RETURNS interval AS $$ + SELECT pgstac.get_setting('search_gc_retention_interval', conf)::interval; +$$ LANGUAGE SQL; + CREATE OR REPLACE FUNCTION t2s(text) RETURNS text AS $$ SELECT extract(epoch FROM $1::interval)::text || ' s'; $$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE STRICT; @@ -290,7 +294,6 @@ CREATE OR REPLACE FUNCTION age_ms(a timestamptz, b timestamptz DEFAULT clock_tim SELECT abs(extract(epoch from age(a,b)) * 1000); $$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE; - CREATE OR REPLACE FUNCTION queue_timeout() RETURNS interval AS $$ SELECT t2s(coalesce( get_setting('queue_timeout'), @@ -2119,10 +2122,6 @@ FOR EACH STATEMENT EXECUTE FUNCTION partition_after_triggerfunc(); -CREATE OR REPLACE FUNCTION content_slim(_item jsonb) RETURNS jsonb AS $$ - SELECT strip_jsonb(_item - '{id,geometry,collection,type}'::text[], collection_base_item(_item->>'collection')) - '{id,geometry,collection,type}'::text[]; -$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE; - CREATE OR REPLACE FUNCTION content_dehydrate(content jsonb) RETURNS items AS $$ SELECT content->>'id' as id, @@ -2130,7 +2129,10 @@ CREATE OR REPLACE FUNCTION content_dehydrate(content jsonb) RETURNS items AS $$ content->>'collection' as collection, stac_datetime(content) as datetime, stac_end_datetime(content) as end_datetime, - content_slim(content) as content, + strip_jsonb( + content - '{id,geometry,collection,type}'::text[], + collection_base_item(content->>'collection') + ) - '{id,geometry,collection,type}'::text[] as content, null::jsonb as private ; $$ LANGUAGE SQL STABLE; @@ -3522,50 +3524,74 @@ BEGIN $$ LANGUAGE PLPGSQL SET transform_null_equals TO TRUE ; -CREATE OR REPLACE FUNCTION search_hash(jsonb, jsonb) RETURNS text AS $$ - SELECT md5(concat(($1 - '{token,limit,context,includes,excludes}'::text[])::text,$2::text)); +-- ============================================================================ +-- Search Hashing +-- ============================================================================ + +CREATE OR REPLACE FUNCTION pgstac_hash(data text) RETURNS text AS $$ + SELECT encode(sha256(convert_to(data, 'UTF8')), 'hex'); +$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE STRICT; + +-- Central hash helper: one canonical where-clause + metadata payload to hash. +CREATE OR REPLACE FUNCTION search_hash_from_where(_where text, _metadata jsonb DEFAULT '{}'::jsonb) RETURNS text AS $$ + SELECT pgstac_hash( + format( + '%s|%s', + _where, + coalesce(_metadata, '{}'::jsonb)::text + ) + ); $$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE; -DROP FUNCTION IF EXISTS search_tohash(jsonb); +CREATE OR REPLACE FUNCTION search_hash(_search jsonb, _metadata jsonb DEFAULT '{}'::jsonb) RETURNS text AS $$ + SELECT search_hash_from_where( + stac_search_to_where(_search), + _metadata + ); +$$ LANGUAGE SQL STABLE PARALLEL SAFE; + +-- ============================================================================ +-- Search Cache Table +-- ============================================================================ + +-- Search lifecycle and context cache now live on searches; search_wheres is retired. CREATE TABLE IF NOT EXISTS searches( - hash text GENERATED ALWAYS AS (search_hash(search, metadata)) STORED PRIMARY KEY, + hash text PRIMARY KEY, + name text UNIQUE, search jsonb NOT NULL, _where text, orderby text, lastused timestamptz DEFAULT now(), usecount bigint DEFAULT 0, - metadata jsonb DEFAULT '{}'::jsonb NOT NULL -); - -CREATE TABLE IF NOT EXISTS search_wheres( - id bigint generated always as identity primary key, - _where text NOT NULL, - lastused timestamptz DEFAULT now(), - usecount bigint DEFAULT 0, + metadata jsonb DEFAULT '{}'::jsonb NOT NULL, + pinned boolean NOT NULL DEFAULT false, + created_at timestamptz DEFAULT now(), statslastupdated timestamptz, - estimated_count bigint, - estimated_cost float, - time_to_estimate float, - total_count bigint, - time_to_count float, - partitions text[] + context_count bigint ); +CREATE INDEX IF NOT EXISTS searches_lastused_anon_idx + ON searches (lastused) WHERE name IS NULL AND NOT pinned; -CREATE INDEX IF NOT EXISTS search_wheres_partitions ON search_wheres USING GIN (partitions); -CREATE UNIQUE INDEX IF NOT EXISTS search_wheres_where ON search_wheres ((md5(_where))); +DROP TABLE IF EXISTS search_wheres; + +-- ============================================================================ +-- Context Stats (estimate/count/TTL) +-- ============================================================================ CREATE OR REPLACE FUNCTION where_stats( + inhash text, inwhere text, updatestats boolean default false, conf jsonb default null -) RETURNS search_wheres AS $$ +) RETURNS searches AS $$ DECLARE t timestamptz; i interval; explain_json jsonb; - partitions text[]; - sw search_wheres%ROWTYPE; - inwhere_hash text := md5(inwhere); + sw searches%ROWTYPE; + sw_statslastupdated timestamptz; + sw_estimated_count bigint; + sw_estimated_cost float; _context text := lower(context(conf)); _stats_ttl interval := context_stats_ttl(conf); _estimated_cost_threshold float := context_estimated_cost(conf); @@ -3580,96 +3606,71 @@ BEGIN -- If we don't need to calculate context, just return IF _context = 'off' THEN - sw._where = inwhere; RETURN sw; END IF; - -- Get any stats that we have. - IF NOT ro THEN - -- If there is a lock where another process is - -- updating the stats, wait so that we don't end up calculating a bunch of times. - SELECT * INTO sw FROM search_wheres WHERE md5(_where)=inwhere_hash FOR UPDATE; + -- Read current stats state without holding row locks during expensive + -- estimate/count operations. + SELECT * INTO sw FROM searches WHERE hash = inhash; + + IF sw IS NULL THEN + -- In read-only mode, searches may not be persisted. Continue with + -- non-persistent estimate/count calculation so context can still be + -- returned to callers. + sw.hash := inhash; + sw._where := inwhere; + sw_statslastupdated := NULL; ELSE - SELECT * INTO sw FROM search_wheres WHERE md5(_where)=inwhere_hash; + sw_statslastupdated := sw.statslastupdated; END IF; -- If there is a cached row, figure out if we need to update IF sw IS NOT NULL AND sw.statslastupdated IS NOT NULL - AND sw.total_count IS NOT NULL + AND sw.context_count IS NOT NULL AND now() - sw.statslastupdated <= _stats_ttl THEN - -- we have a cached row with data that is within our ttl + -- We have a cached row with data that is within our ttl. RAISE DEBUG 'Stats present in table and lastupdated within ttl: %', sw; - IF NOT ro THEN - RAISE DEBUG 'Updating search_wheres only bumping lastused and usecount'; - UPDATE search_wheres SET - lastused = now(), - usecount = search_wheres.usecount + 1 - WHERE md5(_where) = inwhere_hash - RETURNING * INTO sw; - END IF; RAISE DEBUG 'Returning cached counts. %', sw; RETURN sw; END IF; -- Calculate estimated cost and rows -- Use explain to get estimated count/cost - IF sw.estimated_count IS NULL OR sw.estimated_cost IS NULL THEN - RAISE DEBUG 'Calculating estimated stats'; - t := clock_timestamp(); - EXECUTE format('EXPLAIN (format json) SELECT 1 FROM items WHERE %s', inwhere) - INTO explain_json; - RAISE DEBUG 'Time for just the explain: %', clock_timestamp() - t; - i := clock_timestamp() - t; + RAISE DEBUG 'Calculating estimated stats'; + t := clock_timestamp(); + EXECUTE format('EXPLAIN (format json) SELECT 1 FROM items WHERE %s', inwhere) + INTO explain_json; + RAISE DEBUG 'Time for just the explain: %', clock_timestamp() - t; + i := clock_timestamp() - t; - sw.estimated_count := explain_json->0->'Plan'->'Plan Rows'; - sw.estimated_cost := explain_json->0->'Plan'->'Total Cost'; - sw.time_to_estimate := extract(epoch from i); - END IF; + sw_estimated_count := (explain_json->0->'Plan'->>'Plan Rows')::bigint; + sw_estimated_cost := (explain_json->0->'Plan'->>'Total Cost')::float; - RAISE DEBUG 'ESTIMATED_COUNT: %, THRESHOLD %', sw.estimated_count, _estimated_count_threshold; - RAISE DEBUG 'ESTIMATED_COST: %, THRESHOLD %', sw.estimated_cost, _estimated_cost_threshold; + RAISE DEBUG 'ESTIMATED_COUNT: %, THRESHOLD %', sw_estimated_count, _estimated_count_threshold; + RAISE DEBUG 'ESTIMATED_COST: %, THRESHOLD %', sw_estimated_cost, _estimated_cost_threshold; -- If context is set to auto and the costs are within the threshold return the estimated costs IF _context = 'auto' - AND sw.estimated_count >= _estimated_count_threshold - AND sw.estimated_cost >= _estimated_cost_threshold + AND sw_estimated_count >= _estimated_count_threshold + AND sw_estimated_cost >= _estimated_cost_threshold THEN + sw.context_count := sw_estimated_count; IF NOT ro THEN - INSERT INTO search_wheres ( - _where, - lastused, - usecount, - statslastupdated, - estimated_count, - estimated_cost, - time_to_estimate, - total_count, - time_to_count - ) VALUES ( - inwhere, - now(), - 1, - now(), - sw.estimated_count, - sw.estimated_cost, - sw.time_to_estimate, - null, - null - ) ON CONFLICT ((md5(_where))) - DO UPDATE SET - lastused = EXCLUDED.lastused, - usecount = search_wheres.usecount + 1, - statslastupdated = EXCLUDED.statslastupdated, - estimated_count = EXCLUDED.estimated_count, - estimated_cost = EXCLUDED.estimated_cost, - time_to_estimate = EXCLUDED.time_to_estimate, - total_count = EXCLUDED.total_count, - time_to_count = EXCLUDED.time_to_count + UPDATE searches SET + statslastupdated = now(), + context_count = sw.context_count + WHERE + hash = inhash + AND statslastupdated IS NOT DISTINCT FROM sw_statslastupdated RETURNING * INTO sw; + + IF sw IS NULL THEN + SELECT * INTO sw FROM searches WHERE hash = inhash; + END IF; END IF; RAISE DEBUG 'Estimates are within thresholds, returning estimates. %', sw; RETURN sw; @@ -3681,43 +3682,22 @@ BEGIN EXECUTE format( 'SELECT count(*) FROM items WHERE %s', inwhere - ) INTO sw.total_count; + ) INTO sw.context_count; i := clock_timestamp() - t; - RAISE NOTICE 'Actual Count: % -- %', sw.total_count, i; - sw.time_to_count := extract(epoch FROM i); + RAISE NOTICE 'Actual Count: % -- %', sw.context_count, i; IF NOT ro THEN - INSERT INTO search_wheres ( - _where, - lastused, - usecount, - statslastupdated, - estimated_count, - estimated_cost, - time_to_estimate, - total_count, - time_to_count - ) VALUES ( - inwhere, - now(), - 1, - now(), - sw.estimated_count, - sw.estimated_cost, - sw.time_to_estimate, - sw.total_count, - sw.time_to_count - ) ON CONFLICT ((md5(_where))) - DO UPDATE SET - lastused = EXCLUDED.lastused, - usecount = search_wheres.usecount + 1, - statslastupdated = EXCLUDED.statslastupdated, - estimated_count = EXCLUDED.estimated_count, - estimated_cost = EXCLUDED.estimated_cost, - time_to_estimate = EXCLUDED.time_to_estimate, - total_count = EXCLUDED.total_count, - time_to_count = EXCLUDED.time_to_count + UPDATE searches SET + statslastupdated = now(), + context_count = sw.context_count + WHERE + hash = inhash + AND statslastupdated IS NOT DISTINCT FROM sw_statslastupdated RETURNING * INTO sw; + + IF sw IS NULL THEN + SELECT * INTO sw FROM searches WHERE hash = inhash; + END IF; END IF; RAISE DEBUG 'Returning with actual count. %', sw; RETURN sw; @@ -3725,6 +3705,12 @@ END; $$ LANGUAGE PLPGSQL SECURITY DEFINER; +-- ============================================================================ +-- Search Cache Lifecycle (create, name, pin, GC) +-- ============================================================================ + +DROP FUNCTION IF EXISTS search_query(jsonb, boolean, jsonb); + CREATE OR REPLACE FUNCTION search_query( _search jsonb = '{}'::jsonb, updatestats boolean = false, @@ -3733,20 +3719,15 @@ CREATE OR REPLACE FUNCTION search_query( DECLARE search searches%ROWTYPE; cached_search searches%ROWTYPE; - pexplain jsonb; - t timestamptz; - i interval; - doupdate boolean := FALSE; - insertfound boolean := FALSE; + search_where searches%ROWTYPE; ro boolean := pgstac.readonly(); - found_search text; BEGIN RAISE NOTICE 'SEARCH: %', _search; -- Calculate hash, where clause, and order by statement search.search := _search; search.metadata := _metadata; - search.hash := search_hash(_search, _metadata); search._where := stac_search_to_where(_search); + search.hash := search_hash_from_where(search._where, search.metadata); search.orderby := sort_sqlorderby(_search); search.lastused := now(); search.usecount := 1; @@ -3756,31 +3737,63 @@ BEGIN RETURN search; END IF; - RAISE NOTICE 'Updating Statistics for search: %s', search; - -- Update statistics for times used and and when last used - -- If the entry is locked, rather than waiting, skip updating the stats - INSERT INTO searches (search, lastused, usecount, metadata) - VALUES (search.search, now(), 1, search.metadata) - ON CONFLICT DO NOTHING - RETURNING * INTO cached_search - ; + -- Cache bookkeeping is best-effort and non-blocking. We always return + -- canonical hash + where, even if cache touch cannot be acquired quickly. + UPDATE searches + SET + lastused = now(), + usecount = searches.usecount + 1 + WHERE ctid = ( + SELECT ctid + FROM searches + WHERE hash = search.hash + FOR UPDATE SKIP LOCKED + LIMIT 1 + ) + RETURNING * INTO cached_search; + + IF cached_search IS NULL THEN + IF pg_try_advisory_xact_lock(hashtext(search.hash)) THEN + INSERT INTO searches (hash, search, _where, orderby, lastused, usecount, metadata) + VALUES (search.hash, search.search, search._where, search.orderby, now(), 1, search.metadata) + ON CONFLICT (hash) DO UPDATE SET + lastused = EXCLUDED.lastused, + usecount = searches.usecount + 1 + RETURNING * INTO cached_search; + END IF; - IF NOT FOUND OR cached_search IS NULL THEN - UPDATE searches SET - lastused = now(), - usecount = searches.usecount + 1 - WHERE hash = ( - SELECT hash FROM searches WHERE hash=search.hash FOR UPDATE SKIP LOCKED - ) - RETURNING * INTO cached_search - ; + IF cached_search IS NULL THEN + SELECT * INTO cached_search FROM searches WHERE hash = search.hash; + END IF; END IF; IF cached_search IS NOT NULL THEN cached_search._where = search._where; cached_search.orderby = search.orderby; + IF updatestats THEN + search_where := where_stats( + cached_search.hash, + cached_search._where, + true, + _search->'conf' + ); + cached_search.context_count := search_where.context_count; + cached_search.statslastupdated := search_where.statslastupdated; + END IF; RETURN cached_search; END IF; + + IF updatestats THEN + search_where := where_stats( + search.hash, + search._where, + true, + _search->'conf' + ); + search.context_count := search_where.context_count; + search.statslastupdated := search_where.statslastupdated; + END IF; + RETURN search; END; @@ -3789,13 +3802,153 @@ $$ LANGUAGE PLPGSQL SECURITY DEFINER; CREATE OR REPLACE FUNCTION search_fromhash( _hash text ) RETURNS searches AS $$ - SELECT * FROM search_query((SELECT search FROM searches WHERE hash=_hash LIMIT 1)); + SELECT * FROM searches WHERE hash = _hash LIMIT 1; $$ LANGUAGE SQL STRICT; +CREATE OR REPLACE FUNCTION name_search( + _search jsonb, + _name text, + _metadata jsonb DEFAULT '{}'::jsonb +) RETURNS searches AS $$ +DECLARE + named searches%ROWTYPE; +BEGIN + named := search_query(_search, false, _metadata); + UPDATE searches + SET + name = _name, + lastused = now(), + usecount = searches.usecount + 1 + WHERE hash = named.hash + RETURNING * INTO named; + + IF named IS NULL THEN + RAISE EXCEPTION 'Could not name search for input: %', _search; + END IF; + + RETURN named; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +CREATE OR REPLACE FUNCTION rename_search(_old_name text, _new_name text) RETURNS searches AS $$ +DECLARE + renamed searches%ROWTYPE; +BEGIN + -- Serialize rename-pair operations to avoid deadlocks on concurrent name swaps. + PERFORM pg_advisory_xact_lock( + hashtext( + least(_old_name, _new_name) + || '|' + || greatest(_old_name, _new_name) + ) + ); + + UPDATE searches + SET + name = _new_name, + lastused = now(), + usecount = searches.usecount + 1 + WHERE name = _old_name + RETURNING * INTO renamed; + + IF renamed IS NULL THEN + RAISE EXCEPTION 'Named search % not found', _old_name; + END IF; + + RETURN renamed; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +CREATE OR REPLACE FUNCTION unname_search(_name text) RETURNS searches AS $$ +DECLARE + unnamed searches%ROWTYPE; +BEGIN + UPDATE searches + SET + name = NULL, + pinned = false, + lastused = now(), + usecount = searches.usecount + 1 + WHERE name = _name + RETURNING * INTO unnamed; + + IF unnamed IS NULL THEN + RAISE EXCEPTION 'Named search % not found', _name; + END IF; + + RETURN unnamed; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +CREATE OR REPLACE FUNCTION pin_search(_name text) RETURNS searches AS $$ +DECLARE + pinned_search searches%ROWTYPE; +BEGIN + UPDATE searches + SET + pinned = true, + lastused = now(), + usecount = searches.usecount + 1 + WHERE name = _name + RETURNING * INTO pinned_search; + + IF pinned_search IS NULL THEN + RAISE EXCEPTION 'Named search % not found', _name; + END IF; + + RETURN pinned_search; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +CREATE OR REPLACE FUNCTION unpin_search(_name text) RETURNS searches AS $$ +DECLARE + unpinned_search searches%ROWTYPE; +BEGIN + UPDATE searches + SET + pinned = false, + lastused = now(), + usecount = searches.usecount + 1 + WHERE name = _name + RETURNING * INTO unpinned_search; + + IF unpinned_search IS NULL THEN + RAISE EXCEPTION 'Named search % not found', _name; + END IF; + + RETURN unpinned_search; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +CREATE OR REPLACE FUNCTION gc_anonymous_searches(retention_interval interval DEFAULT NULL, conf jsonb DEFAULT NULL) RETURNS bigint AS $$ + WITH effective_retention AS ( + SELECT COALESCE( + retention_interval, + search_gc_retention_interval(conf) + ) AS i + ), + deleted AS ( + DELETE FROM searches + USING effective_retention + WHERE + name IS NULL + AND NOT pinned + AND lastused < now() - effective_retention.i + RETURNING 1 + ) + SELECT count(*)::bigint FROM deleted; +$$ LANGUAGE SQL SECURITY DEFINER; + +CREATE OR REPLACE FUNCTION gc_search_caches(retention_interval interval DEFAULT NULL, conf jsonb DEFAULT NULL) RETURNS jsonb AS $$ + SELECT jsonb_build_object( + 'removed_searches', + gc_anonymous_searches(retention_interval, conf) + ); +$$ LANGUAGE SQL SECURITY DEFINER; + CREATE OR REPLACE FUNCTION search_rows( IN _where text DEFAULT 'TRUE', IN _orderby text DEFAULT 'datetime DESC, id DESC', - IN partitions text[] DEFAULT NULL, IN _limit int DEFAULT 10 ) RETURNS SETOF items AS $$ DECLARE @@ -3932,13 +4085,14 @@ BEGIN END; $$ LANGUAGE PLPGSQL SECURITY DEFINER; +DROP FUNCTION IF EXISTS search(jsonb); CREATE OR REPLACE FUNCTION search(_search jsonb = '{}'::jsonb) RETURNS jsonb AS $$ DECLARE searches searches%ROWTYPE; _where text; orderby text; - search_where search_wheres%ROWTYPE; + search_where searches%ROWTYPE; total_count bigint; token record; token_prev boolean; @@ -3950,7 +4104,6 @@ DECLARE hydrate bool := NOT (_search->'conf'->>'nohydrate' IS NOT NULL AND (_search->'conf'->>'nohydrate')::boolean = true); prev text; next text; - context jsonb; collection jsonb; out_records jsonb; out_len int; @@ -3965,8 +4118,8 @@ BEGIN searches := search_query(_search); _where := searches._where; orderby := searches.orderby; - search_where := where_stats(_where); - total_count := coalesce(search_where.total_count, search_where.estimated_count); + search_where := where_stats(searches.hash, _where, false, _search->'conf'); + total_count := search_where.context_count; RAISE NOTICE 'SEARCH:TOKEN: %', _search->>'token'; token := get_token_record(_search->>'token'); RAISE NOTICE '***TOKEN: %', token; @@ -4007,7 +4160,6 @@ BEGIN FROM search_rows( full_where, orderby, - search_where.partitions, _querylimit ) as i; @@ -4613,6 +4765,7 @@ INSERT INTO pgstac_settings (name, value) VALUES ('context_estimated_count', '100000'), ('context_estimated_cost', '100000'), ('context_stats_ttl', '1 day'), + ('search_gc_retention_interval', '7 days'), ('default_filter_lang', 'cql2-json'), ('additional_properties', 'true'), ('use_queue', 'false'), @@ -4672,8 +4825,15 @@ ALTER FUNCTION drop_table_constraints SECURITY DEFINER; ALTER FUNCTION create_table_constraints SECURITY DEFINER; ALTER FUNCTION check_partition SECURITY DEFINER; ALTER FUNCTION repartition SECURITY DEFINER; -ALTER FUNCTION where_stats SECURITY DEFINER; +ALTER FUNCTION where_stats(text, text, boolean, jsonb) SECURITY DEFINER; ALTER FUNCTION search_query SECURITY DEFINER; +ALTER FUNCTION name_search SECURITY DEFINER; +ALTER FUNCTION rename_search SECURITY DEFINER; +ALTER FUNCTION unname_search SECURITY DEFINER; +ALTER FUNCTION pin_search SECURITY DEFINER; +ALTER FUNCTION unpin_search SECURITY DEFINER; +ALTER FUNCTION gc_anonymous_searches(interval, jsonb) SECURITY DEFINER; +ALTER FUNCTION gc_search_caches(interval, jsonb) SECURITY DEFINER; ALTER FUNCTION format_item SECURITY DEFINER; ALTER FUNCTION maintain_index SECURITY DEFINER; diff --git a/src/pgstac/sql/001_core.sql b/src/pgstac/sql/001_core.sql index 46d71dbd..10c42b14 100644 --- a/src/pgstac/sql/001_core.sql +++ b/src/pgstac/sql/001_core.sql @@ -81,6 +81,10 @@ CREATE OR REPLACE FUNCTION context_stats_ttl(conf jsonb DEFAULT NULL) RETURNS in SELECT pgstac.get_setting('context_stats_ttl', conf)::interval; $$ LANGUAGE SQL; +CREATE OR REPLACE FUNCTION search_gc_retention_interval(conf jsonb DEFAULT NULL) RETURNS interval AS $$ + SELECT pgstac.get_setting('search_gc_retention_interval', conf)::interval; +$$ LANGUAGE SQL; + CREATE OR REPLACE FUNCTION t2s(text) RETURNS text AS $$ SELECT extract(epoch FROM $1::interval)::text || ' s'; $$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE STRICT; @@ -89,7 +93,6 @@ CREATE OR REPLACE FUNCTION age_ms(a timestamptz, b timestamptz DEFAULT clock_tim SELECT abs(extract(epoch from age(a,b)) * 1000); $$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE; - CREATE OR REPLACE FUNCTION queue_timeout() RETURNS interval AS $$ SELECT t2s(coalesce( get_setting('queue_timeout'), diff --git a/src/pgstac/sql/003a_items.sql b/src/pgstac/sql/003a_items.sql index 88924d17..d1a3e7b2 100644 --- a/src/pgstac/sql/003a_items.sql +++ b/src/pgstac/sql/003a_items.sql @@ -56,10 +56,6 @@ FOR EACH STATEMENT EXECUTE FUNCTION partition_after_triggerfunc(); -CREATE OR REPLACE FUNCTION content_slim(_item jsonb) RETURNS jsonb AS $$ - SELECT strip_jsonb(_item - '{id,geometry,collection,type}'::text[], collection_base_item(_item->>'collection')) - '{id,geometry,collection,type}'::text[]; -$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE; - CREATE OR REPLACE FUNCTION content_dehydrate(content jsonb) RETURNS items AS $$ SELECT content->>'id' as id, @@ -67,7 +63,10 @@ CREATE OR REPLACE FUNCTION content_dehydrate(content jsonb) RETURNS items AS $$ content->>'collection' as collection, stac_datetime(content) as datetime, stac_end_datetime(content) as end_datetime, - content_slim(content) as content, + strip_jsonb( + content - '{id,geometry,collection,type}'::text[], + collection_base_item(content->>'collection') + ) - '{id,geometry,collection,type}'::text[] as content, null::jsonb as private ; $$ LANGUAGE SQL STABLE; diff --git a/src/pgstac/sql/004_search.sql b/src/pgstac/sql/004_search.sql index a00f569e..1ced3795 100644 --- a/src/pgstac/sql/004_search.sql +++ b/src/pgstac/sql/004_search.sql @@ -501,50 +501,74 @@ BEGIN $$ LANGUAGE PLPGSQL SET transform_null_equals TO TRUE ; -CREATE OR REPLACE FUNCTION search_hash(jsonb, jsonb) RETURNS text AS $$ - SELECT md5(concat(($1 - '{token,limit,context,includes,excludes}'::text[])::text,$2::text)); +-- ============================================================================ +-- Search Hashing +-- ============================================================================ + +CREATE OR REPLACE FUNCTION pgstac_hash(data text) RETURNS text AS $$ + SELECT encode(sha256(convert_to(data, 'UTF8')), 'hex'); +$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE STRICT; + +-- Central hash helper: one canonical where-clause + metadata payload to hash. +CREATE OR REPLACE FUNCTION search_hash_from_where(_where text, _metadata jsonb DEFAULT '{}'::jsonb) RETURNS text AS $$ + SELECT pgstac_hash( + format( + '%s|%s', + _where, + coalesce(_metadata, '{}'::jsonb)::text + ) + ); $$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE; -DROP FUNCTION IF EXISTS search_tohash(jsonb); +CREATE OR REPLACE FUNCTION search_hash(_search jsonb, _metadata jsonb DEFAULT '{}'::jsonb) RETURNS text AS $$ + SELECT search_hash_from_where( + stac_search_to_where(_search), + _metadata + ); +$$ LANGUAGE SQL STABLE PARALLEL SAFE; + +-- ============================================================================ +-- Search Cache Table +-- ============================================================================ + +-- Search lifecycle and context cache now live on searches; search_wheres is retired. CREATE TABLE IF NOT EXISTS searches( - hash text GENERATED ALWAYS AS (search_hash(search, metadata)) STORED PRIMARY KEY, + hash text PRIMARY KEY, + name text UNIQUE, search jsonb NOT NULL, _where text, orderby text, lastused timestamptz DEFAULT now(), usecount bigint DEFAULT 0, - metadata jsonb DEFAULT '{}'::jsonb NOT NULL -); - -CREATE TABLE IF NOT EXISTS search_wheres( - id bigint generated always as identity primary key, - _where text NOT NULL, - lastused timestamptz DEFAULT now(), - usecount bigint DEFAULT 0, + metadata jsonb DEFAULT '{}'::jsonb NOT NULL, + pinned boolean NOT NULL DEFAULT false, + created_at timestamptz DEFAULT now(), statslastupdated timestamptz, - estimated_count bigint, - estimated_cost float, - time_to_estimate float, - total_count bigint, - time_to_count float, - partitions text[] + context_count bigint ); +CREATE INDEX IF NOT EXISTS searches_lastused_anon_idx + ON searches (lastused) WHERE name IS NULL AND NOT pinned; + +DROP TABLE IF EXISTS search_wheres; -CREATE INDEX IF NOT EXISTS search_wheres_partitions ON search_wheres USING GIN (partitions); -CREATE UNIQUE INDEX IF NOT EXISTS search_wheres_where ON search_wheres ((md5(_where))); +-- ============================================================================ +-- Context Stats (estimate/count/TTL) +-- ============================================================================ CREATE OR REPLACE FUNCTION where_stats( + inhash text, inwhere text, updatestats boolean default false, conf jsonb default null -) RETURNS search_wheres AS $$ +) RETURNS searches AS $$ DECLARE t timestamptz; i interval; explain_json jsonb; - partitions text[]; - sw search_wheres%ROWTYPE; - inwhere_hash text := md5(inwhere); + sw searches%ROWTYPE; + sw_statslastupdated timestamptz; + sw_estimated_count bigint; + sw_estimated_cost float; _context text := lower(context(conf)); _stats_ttl interval := context_stats_ttl(conf); _estimated_cost_threshold float := context_estimated_cost(conf); @@ -559,96 +583,71 @@ BEGIN -- If we don't need to calculate context, just return IF _context = 'off' THEN - sw._where = inwhere; RETURN sw; END IF; - -- Get any stats that we have. - IF NOT ro THEN - -- If there is a lock where another process is - -- updating the stats, wait so that we don't end up calculating a bunch of times. - SELECT * INTO sw FROM search_wheres WHERE md5(_where)=inwhere_hash FOR UPDATE; + -- Read current stats state without holding row locks during expensive + -- estimate/count operations. + SELECT * INTO sw FROM searches WHERE hash = inhash; + + IF sw IS NULL THEN + -- In read-only mode, searches may not be persisted. Continue with + -- non-persistent estimate/count calculation so context can still be + -- returned to callers. + sw.hash := inhash; + sw._where := inwhere; + sw_statslastupdated := NULL; ELSE - SELECT * INTO sw FROM search_wheres WHERE md5(_where)=inwhere_hash; + sw_statslastupdated := sw.statslastupdated; END IF; -- If there is a cached row, figure out if we need to update IF sw IS NOT NULL AND sw.statslastupdated IS NOT NULL - AND sw.total_count IS NOT NULL + AND sw.context_count IS NOT NULL AND now() - sw.statslastupdated <= _stats_ttl THEN - -- we have a cached row with data that is within our ttl + -- We have a cached row with data that is within our ttl. RAISE DEBUG 'Stats present in table and lastupdated within ttl: %', sw; - IF NOT ro THEN - RAISE DEBUG 'Updating search_wheres only bumping lastused and usecount'; - UPDATE search_wheres SET - lastused = now(), - usecount = search_wheres.usecount + 1 - WHERE md5(_where) = inwhere_hash - RETURNING * INTO sw; - END IF; RAISE DEBUG 'Returning cached counts. %', sw; RETURN sw; END IF; -- Calculate estimated cost and rows -- Use explain to get estimated count/cost - IF sw.estimated_count IS NULL OR sw.estimated_cost IS NULL THEN - RAISE DEBUG 'Calculating estimated stats'; - t := clock_timestamp(); - EXECUTE format('EXPLAIN (format json) SELECT 1 FROM items WHERE %s', inwhere) - INTO explain_json; - RAISE DEBUG 'Time for just the explain: %', clock_timestamp() - t; - i := clock_timestamp() - t; - - sw.estimated_count := explain_json->0->'Plan'->'Plan Rows'; - sw.estimated_cost := explain_json->0->'Plan'->'Total Cost'; - sw.time_to_estimate := extract(epoch from i); - END IF; + RAISE DEBUG 'Calculating estimated stats'; + t := clock_timestamp(); + EXECUTE format('EXPLAIN (format json) SELECT 1 FROM items WHERE %s', inwhere) + INTO explain_json; + RAISE DEBUG 'Time for just the explain: %', clock_timestamp() - t; + i := clock_timestamp() - t; + + sw_estimated_count := (explain_json->0->'Plan'->>'Plan Rows')::bigint; + sw_estimated_cost := (explain_json->0->'Plan'->>'Total Cost')::float; - RAISE DEBUG 'ESTIMATED_COUNT: %, THRESHOLD %', sw.estimated_count, _estimated_count_threshold; - RAISE DEBUG 'ESTIMATED_COST: %, THRESHOLD %', sw.estimated_cost, _estimated_cost_threshold; + RAISE DEBUG 'ESTIMATED_COUNT: %, THRESHOLD %', sw_estimated_count, _estimated_count_threshold; + RAISE DEBUG 'ESTIMATED_COST: %, THRESHOLD %', sw_estimated_cost, _estimated_cost_threshold; -- If context is set to auto and the costs are within the threshold return the estimated costs IF _context = 'auto' - AND sw.estimated_count >= _estimated_count_threshold - AND sw.estimated_cost >= _estimated_cost_threshold + AND sw_estimated_count >= _estimated_count_threshold + AND sw_estimated_cost >= _estimated_cost_threshold THEN + sw.context_count := sw_estimated_count; IF NOT ro THEN - INSERT INTO search_wheres ( - _where, - lastused, - usecount, - statslastupdated, - estimated_count, - estimated_cost, - time_to_estimate, - total_count, - time_to_count - ) VALUES ( - inwhere, - now(), - 1, - now(), - sw.estimated_count, - sw.estimated_cost, - sw.time_to_estimate, - null, - null - ) ON CONFLICT ((md5(_where))) - DO UPDATE SET - lastused = EXCLUDED.lastused, - usecount = search_wheres.usecount + 1, - statslastupdated = EXCLUDED.statslastupdated, - estimated_count = EXCLUDED.estimated_count, - estimated_cost = EXCLUDED.estimated_cost, - time_to_estimate = EXCLUDED.time_to_estimate, - total_count = EXCLUDED.total_count, - time_to_count = EXCLUDED.time_to_count + UPDATE searches SET + statslastupdated = now(), + context_count = sw.context_count + WHERE + hash = inhash + AND statslastupdated IS NOT DISTINCT FROM sw_statslastupdated RETURNING * INTO sw; + + IF sw IS NULL THEN + SELECT * INTO sw FROM searches WHERE hash = inhash; + END IF; END IF; RAISE DEBUG 'Estimates are within thresholds, returning estimates. %', sw; RETURN sw; @@ -660,43 +659,22 @@ BEGIN EXECUTE format( 'SELECT count(*) FROM items WHERE %s', inwhere - ) INTO sw.total_count; + ) INTO sw.context_count; i := clock_timestamp() - t; - RAISE NOTICE 'Actual Count: % -- %', sw.total_count, i; - sw.time_to_count := extract(epoch FROM i); + RAISE NOTICE 'Actual Count: % -- %', sw.context_count, i; IF NOT ro THEN - INSERT INTO search_wheres ( - _where, - lastused, - usecount, - statslastupdated, - estimated_count, - estimated_cost, - time_to_estimate, - total_count, - time_to_count - ) VALUES ( - inwhere, - now(), - 1, - now(), - sw.estimated_count, - sw.estimated_cost, - sw.time_to_estimate, - sw.total_count, - sw.time_to_count - ) ON CONFLICT ((md5(_where))) - DO UPDATE SET - lastused = EXCLUDED.lastused, - usecount = search_wheres.usecount + 1, - statslastupdated = EXCLUDED.statslastupdated, - estimated_count = EXCLUDED.estimated_count, - estimated_cost = EXCLUDED.estimated_cost, - time_to_estimate = EXCLUDED.time_to_estimate, - total_count = EXCLUDED.total_count, - time_to_count = EXCLUDED.time_to_count + UPDATE searches SET + statslastupdated = now(), + context_count = sw.context_count + WHERE + hash = inhash + AND statslastupdated IS NOT DISTINCT FROM sw_statslastupdated RETURNING * INTO sw; + + IF sw IS NULL THEN + SELECT * INTO sw FROM searches WHERE hash = inhash; + END IF; END IF; RAISE DEBUG 'Returning with actual count. %', sw; RETURN sw; @@ -704,6 +682,12 @@ END; $$ LANGUAGE PLPGSQL SECURITY DEFINER; +-- ============================================================================ +-- Search Cache Lifecycle (create, name, pin, GC) +-- ============================================================================ + +DROP FUNCTION IF EXISTS search_query(jsonb, boolean, jsonb); + CREATE OR REPLACE FUNCTION search_query( _search jsonb = '{}'::jsonb, updatestats boolean = false, @@ -712,20 +696,15 @@ CREATE OR REPLACE FUNCTION search_query( DECLARE search searches%ROWTYPE; cached_search searches%ROWTYPE; - pexplain jsonb; - t timestamptz; - i interval; - doupdate boolean := FALSE; - insertfound boolean := FALSE; + search_where searches%ROWTYPE; ro boolean := pgstac.readonly(); - found_search text; BEGIN RAISE NOTICE 'SEARCH: %', _search; -- Calculate hash, where clause, and order by statement search.search := _search; search.metadata := _metadata; - search.hash := search_hash(_search, _metadata); search._where := stac_search_to_where(_search); + search.hash := search_hash_from_where(search._where, search.metadata); search.orderby := sort_sqlorderby(_search); search.lastused := now(); search.usecount := 1; @@ -735,31 +714,63 @@ BEGIN RETURN search; END IF; - RAISE NOTICE 'Updating Statistics for search: %s', search; - -- Update statistics for times used and and when last used - -- If the entry is locked, rather than waiting, skip updating the stats - INSERT INTO searches (search, lastused, usecount, metadata) - VALUES (search.search, now(), 1, search.metadata) - ON CONFLICT DO NOTHING - RETURNING * INTO cached_search - ; + -- Cache bookkeeping is best-effort and non-blocking. We always return + -- canonical hash + where, even if cache touch cannot be acquired quickly. + UPDATE searches + SET + lastused = now(), + usecount = searches.usecount + 1 + WHERE ctid = ( + SELECT ctid + FROM searches + WHERE hash = search.hash + FOR UPDATE SKIP LOCKED + LIMIT 1 + ) + RETURNING * INTO cached_search; + + IF cached_search IS NULL THEN + IF pg_try_advisory_xact_lock(hashtext(search.hash)) THEN + INSERT INTO searches (hash, search, _where, orderby, lastused, usecount, metadata) + VALUES (search.hash, search.search, search._where, search.orderby, now(), 1, search.metadata) + ON CONFLICT (hash) DO UPDATE SET + lastused = EXCLUDED.lastused, + usecount = searches.usecount + 1 + RETURNING * INTO cached_search; + END IF; - IF NOT FOUND OR cached_search IS NULL THEN - UPDATE searches SET - lastused = now(), - usecount = searches.usecount + 1 - WHERE hash = ( - SELECT hash FROM searches WHERE hash=search.hash FOR UPDATE SKIP LOCKED - ) - RETURNING * INTO cached_search - ; + IF cached_search IS NULL THEN + SELECT * INTO cached_search FROM searches WHERE hash = search.hash; + END IF; END IF; IF cached_search IS NOT NULL THEN cached_search._where = search._where; cached_search.orderby = search.orderby; + IF updatestats THEN + search_where := where_stats( + cached_search.hash, + cached_search._where, + true, + _search->'conf' + ); + cached_search.context_count := search_where.context_count; + cached_search.statslastupdated := search_where.statslastupdated; + END IF; RETURN cached_search; END IF; + + IF updatestats THEN + search_where := where_stats( + search.hash, + search._where, + true, + _search->'conf' + ); + search.context_count := search_where.context_count; + search.statslastupdated := search_where.statslastupdated; + END IF; + RETURN search; END; @@ -768,13 +779,153 @@ $$ LANGUAGE PLPGSQL SECURITY DEFINER; CREATE OR REPLACE FUNCTION search_fromhash( _hash text ) RETURNS searches AS $$ - SELECT * FROM search_query((SELECT search FROM searches WHERE hash=_hash LIMIT 1)); + SELECT * FROM searches WHERE hash = _hash LIMIT 1; $$ LANGUAGE SQL STRICT; +CREATE OR REPLACE FUNCTION name_search( + _search jsonb, + _name text, + _metadata jsonb DEFAULT '{}'::jsonb +) RETURNS searches AS $$ +DECLARE + named searches%ROWTYPE; +BEGIN + named := search_query(_search, false, _metadata); + UPDATE searches + SET + name = _name, + lastused = now(), + usecount = searches.usecount + 1 + WHERE hash = named.hash + RETURNING * INTO named; + + IF named IS NULL THEN + RAISE EXCEPTION 'Could not name search for input: %', _search; + END IF; + + RETURN named; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +CREATE OR REPLACE FUNCTION rename_search(_old_name text, _new_name text) RETURNS searches AS $$ +DECLARE + renamed searches%ROWTYPE; +BEGIN + -- Serialize rename-pair operations to avoid deadlocks on concurrent name swaps. + PERFORM pg_advisory_xact_lock( + hashtext( + least(_old_name, _new_name) + || '|' + || greatest(_old_name, _new_name) + ) + ); + + UPDATE searches + SET + name = _new_name, + lastused = now(), + usecount = searches.usecount + 1 + WHERE name = _old_name + RETURNING * INTO renamed; + + IF renamed IS NULL THEN + RAISE EXCEPTION 'Named search % not found', _old_name; + END IF; + + RETURN renamed; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +CREATE OR REPLACE FUNCTION unname_search(_name text) RETURNS searches AS $$ +DECLARE + unnamed searches%ROWTYPE; +BEGIN + UPDATE searches + SET + name = NULL, + pinned = false, + lastused = now(), + usecount = searches.usecount + 1 + WHERE name = _name + RETURNING * INTO unnamed; + + IF unnamed IS NULL THEN + RAISE EXCEPTION 'Named search % not found', _name; + END IF; + + RETURN unnamed; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +CREATE OR REPLACE FUNCTION pin_search(_name text) RETURNS searches AS $$ +DECLARE + pinned_search searches%ROWTYPE; +BEGIN + UPDATE searches + SET + pinned = true, + lastused = now(), + usecount = searches.usecount + 1 + WHERE name = _name + RETURNING * INTO pinned_search; + + IF pinned_search IS NULL THEN + RAISE EXCEPTION 'Named search % not found', _name; + END IF; + + RETURN pinned_search; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +CREATE OR REPLACE FUNCTION unpin_search(_name text) RETURNS searches AS $$ +DECLARE + unpinned_search searches%ROWTYPE; +BEGIN + UPDATE searches + SET + pinned = false, + lastused = now(), + usecount = searches.usecount + 1 + WHERE name = _name + RETURNING * INTO unpinned_search; + + IF unpinned_search IS NULL THEN + RAISE EXCEPTION 'Named search % not found', _name; + END IF; + + RETURN unpinned_search; +END; +$$ LANGUAGE PLPGSQL SECURITY DEFINER; + +CREATE OR REPLACE FUNCTION gc_anonymous_searches(retention_interval interval DEFAULT NULL, conf jsonb DEFAULT NULL) RETURNS bigint AS $$ + WITH effective_retention AS ( + SELECT COALESCE( + retention_interval, + search_gc_retention_interval(conf) + ) AS i + ), + deleted AS ( + DELETE FROM searches + USING effective_retention + WHERE + name IS NULL + AND NOT pinned + AND lastused < now() - effective_retention.i + RETURNING 1 + ) + SELECT count(*)::bigint FROM deleted; +$$ LANGUAGE SQL SECURITY DEFINER; + +CREATE OR REPLACE FUNCTION gc_search_caches(retention_interval interval DEFAULT NULL, conf jsonb DEFAULT NULL) RETURNS jsonb AS $$ + SELECT jsonb_build_object( + 'removed_searches', + gc_anonymous_searches(retention_interval, conf) + ); +$$ LANGUAGE SQL SECURITY DEFINER; + CREATE OR REPLACE FUNCTION search_rows( IN _where text DEFAULT 'TRUE', IN _orderby text DEFAULT 'datetime DESC, id DESC', - IN partitions text[] DEFAULT NULL, IN _limit int DEFAULT 10 ) RETURNS SETOF items AS $$ DECLARE @@ -911,13 +1062,14 @@ BEGIN END; $$ LANGUAGE PLPGSQL SECURITY DEFINER; +DROP FUNCTION IF EXISTS search(jsonb); CREATE OR REPLACE FUNCTION search(_search jsonb = '{}'::jsonb) RETURNS jsonb AS $$ DECLARE searches searches%ROWTYPE; _where text; orderby text; - search_where search_wheres%ROWTYPE; + search_where searches%ROWTYPE; total_count bigint; token record; token_prev boolean; @@ -929,7 +1081,6 @@ DECLARE hydrate bool := NOT (_search->'conf'->>'nohydrate' IS NOT NULL AND (_search->'conf'->>'nohydrate')::boolean = true); prev text; next text; - context jsonb; collection jsonb; out_records jsonb; out_len int; @@ -944,8 +1095,8 @@ BEGIN searches := search_query(_search); _where := searches._where; orderby := searches.orderby; - search_where := where_stats(_where); - total_count := coalesce(search_where.total_count, search_where.estimated_count); + search_where := where_stats(searches.hash, _where, false, _search->'conf'); + total_count := search_where.context_count; RAISE NOTICE 'SEARCH:TOKEN: %', _search->>'token'; token := get_token_record(_search->>'token'); RAISE NOTICE '***TOKEN: %', token; @@ -986,7 +1137,6 @@ BEGIN FROM search_rows( full_where, orderby, - search_where.partitions, _querylimit ) as i; diff --git a/src/pgstac/sql/998_idempotent_post.sql b/src/pgstac/sql/998_idempotent_post.sql index 2a6cad7c..d99bc6b4 100644 --- a/src/pgstac/sql/998_idempotent_post.sql +++ b/src/pgstac/sql/998_idempotent_post.sql @@ -34,6 +34,7 @@ INSERT INTO pgstac_settings (name, value) VALUES ('context_estimated_count', '100000'), ('context_estimated_cost', '100000'), ('context_stats_ttl', '1 day'), + ('search_gc_retention_interval', '7 days'), ('default_filter_lang', 'cql2-json'), ('additional_properties', 'true'), ('use_queue', 'false'), @@ -93,8 +94,15 @@ ALTER FUNCTION drop_table_constraints SECURITY DEFINER; ALTER FUNCTION create_table_constraints SECURITY DEFINER; ALTER FUNCTION check_partition SECURITY DEFINER; ALTER FUNCTION repartition SECURITY DEFINER; -ALTER FUNCTION where_stats SECURITY DEFINER; +ALTER FUNCTION where_stats(text, text, boolean, jsonb) SECURITY DEFINER; ALTER FUNCTION search_query SECURITY DEFINER; +ALTER FUNCTION name_search SECURITY DEFINER; +ALTER FUNCTION rename_search SECURITY DEFINER; +ALTER FUNCTION unname_search SECURITY DEFINER; +ALTER FUNCTION pin_search SECURITY DEFINER; +ALTER FUNCTION unpin_search SECURITY DEFINER; +ALTER FUNCTION gc_anonymous_searches(interval, jsonb) SECURITY DEFINER; +ALTER FUNCTION gc_search_caches(interval, jsonb) SECURITY DEFINER; ALTER FUNCTION format_item SECURITY DEFINER; ALTER FUNCTION maintain_index SECURITY DEFINER; diff --git a/src/pgstac/tests/basic/cql_searches.sql.out b/src/pgstac/tests/basic/cql_searches.sql.out index 3db697a0..98dd7e9b 100644 --- a/src/pgstac/tests/basic/cql_searches.sql.out +++ b/src/pgstac/tests/basic/cql_searches.sql.out @@ -57,10 +57,10 @@ SELECT usecount IS NOT NULL and usecount > 0 AND lastused IS NOT NULL AND lastus t SELECT hash, search, _where, orderby, metadata from search_query('{"collections":["pgstac-test-collection"]}'::jsonb, _metadata=>'{"meta":"value"}'::jsonb); - 06efe6c09f0d61fd212e882325041a73 | {"collections": ["pgstac-test-collection"]} | collection = ANY ('{pgstac-test-collection}') | datetime DESC, id DESC | {"meta": "value"} + 5caf5ff614e63896266921420f5aa36823dd5be253542f204ab24fd402002574 | {"collections": ["pgstac-test-collection"]} | collection = ANY ('{pgstac-test-collection}') | datetime DESC, id DESC | {"meta": "value"} SELECT hash, search, _where, orderby, metadata from search_query('{"collections":["pgstac-test-collection"]}'::jsonb, _metadata=>'{"meta":"value"}'::jsonb); - 06efe6c09f0d61fd212e882325041a73 | {"collections": ["pgstac-test-collection"]} | collection = ANY ('{pgstac-test-collection}') | datetime DESC, id DESC | {"meta": "value"} + 5caf5ff614e63896266921420f5aa36823dd5be253542f204ab24fd402002574 | {"collections": ["pgstac-test-collection"]} | collection = ANY ('{pgstac-test-collection}') | datetime DESC, id DESC | {"meta": "value"} SELECT usecount IS NOT NULL and usecount > 0 AND lastused IS NOT NULL AND lastused < clock_timestamp() FROM search_query('{"collections":["pgstac-test-collection"]}'); t diff --git a/src/pgstac/tests/basic/xyz_searches.sql b/src/pgstac/tests/basic/xyz_searches.sql index 841e194c..f2af5cb8 100644 --- a/src/pgstac/tests/basic/xyz_searches.sql +++ b/src/pgstac/tests/basic/xyz_searches.sql @@ -2,18 +2,18 @@ SET pgstac."default_filter_lang" TO 'cql-json'; SELECT hash from search_query('{"collections":["pgstac-test-collection"]}'); -SELECT hash, search, metadata FROM search_fromhash('2bbae9a0ef0bbb5ffaca06603ce621d7'); +SELECT hash, search, metadata FROM search_fromhash('fd8daf2e208762fc3eedb83e8a9213421c7372bbd23723f31f51d18330f0bec7'); -SELECT xyzsearch(8615, 13418, 15, '2bbae9a0ef0bbb5ffaca06603ce621d7', '{"include":["id"]}'::jsonb); +SELECT xyzsearch(8615, 13418, 15, 'fd8daf2e208762fc3eedb83e8a9213421c7372bbd23723f31f51d18330f0bec7', '{"include":["id"]}'::jsonb); -SELECT xyzsearch(1048, 1682, 12, '2bbae9a0ef0bbb5ffaca06603ce621d7', '{"include":["id"]}'::jsonb); +SELECT xyzsearch(1048, 1682, 12, 'fd8daf2e208762fc3eedb83e8a9213421c7372bbd23723f31f51d18330f0bec7', '{"include":["id"]}'::jsonb); -SELECT xyzsearch(1048, 1682, 12, '2bbae9a0ef0bbb5ffaca06603ce621d7', '{"include":["id"]}'::jsonb, NULL, 1); +SELECT xyzsearch(1048, 1682, 12, 'fd8daf2e208762fc3eedb83e8a9213421c7372bbd23723f31f51d18330f0bec7', '{"include":["id"]}'::jsonb, NULL, 1); -SELECT xyzsearch(16792, 26892, 16, '2bbae9a0ef0bbb5ffaca06603ce621d7', '{"include":["id"]}'::jsonb, exitwhenfull => true); +SELECT xyzsearch(16792, 26892, 16, 'fd8daf2e208762fc3eedb83e8a9213421c7372bbd23723f31f51d18330f0bec7', '{"include":["id"]}'::jsonb, exitwhenfull => true); -SELECT xyzsearch(16792, 26892, 16, '2bbae9a0ef0bbb5ffaca06603ce621d7', '{"include":["id"]}'::jsonb, exitwhenfull => false, skipcovered => false); +SELECT xyzsearch(16792, 26892, 16, 'fd8daf2e208762fc3eedb83e8a9213421c7372bbd23723f31f51d18330f0bec7', '{"include":["id"]}'::jsonb, exitwhenfull => false, skipcovered => false); -SELECT geojsonsearch('{"type": "Point","coordinates": [-87.75608539581299,30.692471153735646]}', '2bbae9a0ef0bbb5ffaca06603ce621d7', '{"include":["id"]}'::jsonb, exitwhenfull => true, skipcovered => true); +SELECT geojsonsearch('{"type": "Point","coordinates": [-87.75608539581299,30.692471153735646]}', 'fd8daf2e208762fc3eedb83e8a9213421c7372bbd23723f31f51d18330f0bec7', '{"include":["id"]}'::jsonb, exitwhenfull => true, skipcovered => true); -SELECT geojsonsearch('{"type": "Point","coordinates": [-87.75608539581299,30.692471153735646]}', '2bbae9a0ef0bbb5ffaca06603ce621d7', '{"include":["id"]}'::jsonb, exitwhenfull => false, skipcovered => false) s; +SELECT geojsonsearch('{"type": "Point","coordinates": [-87.75608539581299,30.692471153735646]}', 'fd8daf2e208762fc3eedb83e8a9213421c7372bbd23723f31f51d18330f0bec7', '{"include":["id"]}'::jsonb, exitwhenfull => false, skipcovered => false) s; diff --git a/src/pgstac/tests/basic/xyz_searches.sql.out b/src/pgstac/tests/basic/xyz_searches.sql.out index efec8aff..46f94c88 100644 --- a/src/pgstac/tests/basic/xyz_searches.sql.out +++ b/src/pgstac/tests/basic/xyz_searches.sql.out @@ -1,28 +1,28 @@ SET pgstac."default_filter_lang" TO 'cql-json'; SET SELECT hash from search_query('{"collections":["pgstac-test-collection"]}'); - 2bbae9a0ef0bbb5ffaca06603ce621d7 + fd8daf2e208762fc3eedb83e8a9213421c7372bbd23723f31f51d18330f0bec7 -SELECT hash, search, metadata FROM search_fromhash('2bbae9a0ef0bbb5ffaca06603ce621d7'); - 2bbae9a0ef0bbb5ffaca06603ce621d7 | {"collections": ["pgstac-test-collection"]} | {} +SELECT hash, search, metadata FROM search_fromhash('fd8daf2e208762fc3eedb83e8a9213421c7372bbd23723f31f51d18330f0bec7'); + fd8daf2e208762fc3eedb83e8a9213421c7372bbd23723f31f51d18330f0bec7 | {"collections": ["pgstac-test-collection"]} | {} -SELECT xyzsearch(8615, 13418, 15, '2bbae9a0ef0bbb5ffaca06603ce621d7', '{"include":["id"]}'::jsonb); +SELECT xyzsearch(8615, 13418, 15, 'fd8daf2e208762fc3eedb83e8a9213421c7372bbd23723f31f51d18330f0bec7', '{"include":["id"]}'::jsonb); {"type": "FeatureCollection", "features": [{"id": "pgstac-test-item-0003", "collection": "pgstac-test-collection"}]} -SELECT xyzsearch(1048, 1682, 12, '2bbae9a0ef0bbb5ffaca06603ce621d7', '{"include":["id"]}'::jsonb); +SELECT xyzsearch(1048, 1682, 12, 'fd8daf2e208762fc3eedb83e8a9213421c7372bbd23723f31f51d18330f0bec7', '{"include":["id"]}'::jsonb); {"type": "FeatureCollection", "features": [{"id": "pgstac-test-item-0050", "collection": "pgstac-test-collection"}, {"id": "pgstac-test-item-0049", "collection": "pgstac-test-collection"}, {"id": "pgstac-test-item-0048", "collection": "pgstac-test-collection"}, {"id": "pgstac-test-item-0047", "collection": "pgstac-test-collection"}, {"id": "pgstac-test-item-0100", "collection": "pgstac-test-collection"}, {"id": "pgstac-test-item-0089", "collection": "pgstac-test-collection"}]} -SELECT xyzsearch(1048, 1682, 12, '2bbae9a0ef0bbb5ffaca06603ce621d7', '{"include":["id"]}'::jsonb, NULL, 1); +SELECT xyzsearch(1048, 1682, 12, 'fd8daf2e208762fc3eedb83e8a9213421c7372bbd23723f31f51d18330f0bec7', '{"include":["id"]}'::jsonb, NULL, 1); {"type": "FeatureCollection", "features": [{"id": "pgstac-test-item-0050", "collection": "pgstac-test-collection"}]} -SELECT xyzsearch(16792, 26892, 16, '2bbae9a0ef0bbb5ffaca06603ce621d7', '{"include":["id"]}'::jsonb, exitwhenfull => true); +SELECT xyzsearch(16792, 26892, 16, 'fd8daf2e208762fc3eedb83e8a9213421c7372bbd23723f31f51d18330f0bec7', '{"include":["id"]}'::jsonb, exitwhenfull => true); {"type": "FeatureCollection", "features": [{"id": "pgstac-test-item-0098", "collection": "pgstac-test-collection"}, {"id": "pgstac-test-item-0097", "collection": "pgstac-test-collection"}]} -SELECT xyzsearch(16792, 26892, 16, '2bbae9a0ef0bbb5ffaca06603ce621d7', '{"include":["id"]}'::jsonb, exitwhenfull => false, skipcovered => false); +SELECT xyzsearch(16792, 26892, 16, 'fd8daf2e208762fc3eedb83e8a9213421c7372bbd23723f31f51d18330f0bec7', '{"include":["id"]}'::jsonb, exitwhenfull => false, skipcovered => false); {"type": "FeatureCollection", "features": [{"id": "pgstac-test-item-0098", "collection": "pgstac-test-collection"}, {"id": "pgstac-test-item-0097", "collection": "pgstac-test-collection"}, {"id": "pgstac-test-item-0091", "collection": "pgstac-test-collection"}]} -SELECT geojsonsearch('{"type": "Point","coordinates": [-87.75608539581299,30.692471153735646]}', '2bbae9a0ef0bbb5ffaca06603ce621d7', '{"include":["id"]}'::jsonb, exitwhenfull => true, skipcovered => true); +SELECT geojsonsearch('{"type": "Point","coordinates": [-87.75608539581299,30.692471153735646]}', 'fd8daf2e208762fc3eedb83e8a9213421c7372bbd23723f31f51d18330f0bec7', '{"include":["id"]}'::jsonb, exitwhenfull => true, skipcovered => true); {"type": "FeatureCollection", "features": [{"id": "pgstac-test-item-0097", "collection": "pgstac-test-collection"}]} -SELECT geojsonsearch('{"type": "Point","coordinates": [-87.75608539581299,30.692471153735646]}', '2bbae9a0ef0bbb5ffaca06603ce621d7', '{"include":["id"]}'::jsonb, exitwhenfull => false, skipcovered => false) s; +SELECT geojsonsearch('{"type": "Point","coordinates": [-87.75608539581299,30.692471153735646]}', 'fd8daf2e208762fc3eedb83e8a9213421c7372bbd23723f31f51d18330f0bec7', '{"include":["id"]}'::jsonb, exitwhenfull => false, skipcovered => false) s; {"type": "FeatureCollection", "features": [{"id": "pgstac-test-item-0097", "collection": "pgstac-test-collection"}]} diff --git a/src/pgstac/tests/pgtap.sql b/src/pgstac/tests/pgtap.sql index a2819eaa..ed61bceb 100644 --- a/src/pgstac/tests/pgtap.sql +++ b/src/pgstac/tests/pgtap.sql @@ -17,7 +17,7 @@ CREATE EXTENSION IF NOT EXISTS pgtap; SET SEARCH_PATH TO pgstac, pgtap, public; -- Plan the tests. -SELECT plan(229); +SELECT plan(248); --SELECT * FROM no_plan(); -- Run the tests. diff --git a/src/pgstac/tests/pgtap/001_core.sql b/src/pgstac/tests/pgtap/001_core.sql index ee8ecbbf..9ae836d7 100644 --- a/src/pgstac/tests/pgtap/001_core.sql +++ b/src/pgstac/tests/pgtap/001_core.sql @@ -14,6 +14,18 @@ SELECT results_eq( 'to_text_array returns text[] from jsonb array' ); +SELECT has_function('pgstac'::name, 'pgstac_hash', ARRAY['text']); +SELECT results_eq( + $$ SELECT pgstac_hash('abc') $$, + $$ SELECT 'ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad'::text $$, + 'pgstac_hash returns the expected sha256 hex digest' +); +SELECT is( + pgstac_hash(NULL), + NULL, + 'pgstac_hash is strict and returns NULL for NULL input' +); + SET pgstac.readonly to 'false'; SELECT results_eq( diff --git a/src/pgstac/tests/pgtap/004_search.sql b/src/pgstac/tests/pgtap/004_search.sql index cbbc6678..a5b81a45 100644 --- a/src/pgstac/tests/pgtap/004_search.sql +++ b/src/pgstac/tests/pgtap/004_search.sql @@ -69,6 +69,143 @@ SELECT results_eq($$ SELECT has_function('pgstac'::name, 'search_query', ARRAY['jsonb','boolean','jsonb']); +SELECT has_function('pgstac'::name, 'name_search', ARRAY['jsonb','text','jsonb']); +SELECT has_function('pgstac'::name, 'rename_search', ARRAY['text','text']); +SELECT has_function('pgstac'::name, 'unname_search', ARRAY['text']); +SELECT has_function('pgstac'::name, 'pin_search', ARRAY['text']); +SELECT has_function('pgstac'::name, 'unpin_search', ARRAY['text']); +SELECT has_function('pgstac'::name, 'search_gc_retention_interval', ARRAY['jsonb']); +SELECT has_function('pgstac'::name, 'gc_anonymous_searches', ARRAY['interval','jsonb']); +SELECT has_function('pgstac'::name, 'gc_search_caches', ARRAY['interval','jsonb']); + +SELECT results_eq( + $$ SELECT (name_search('{"collections":["pgstac-test-collection"]}'::jsonb, 'pgstac-test-named-search')).name $$, + $$ SELECT 'pgstac-test-named-search'::text $$, + 'name_search assigns a stable name' +); +SELECT results_eq( + $$ SELECT (rename_search('pgstac-test-named-search', 'pgstac-test-renamed-search')).name $$, + $$ SELECT 'pgstac-test-renamed-search'::text $$, + 'rename_search renames an existing named search' +); +SELECT results_eq( + $$ SELECT (pin_search('pgstac-test-renamed-search')).pinned $$, + $$ SELECT TRUE $$, + 'pin_search sets pinned=true' +); +SELECT results_eq( + $$ SELECT (unpin_search('pgstac-test-renamed-search')).pinned $$, + $$ SELECT FALSE $$, + 'unpin_search sets pinned=false' +); +SELECT results_eq( + $$ SELECT (unname_search('pgstac-test-renamed-search')).name IS NULL $$, + $$ SELECT TRUE $$, + 'unname_search clears search name' +); +SELECT results_eq( + $$ SELECT search_gc_retention_interval('{"search_gc_retention_interval":"3 days"}'::jsonb) $$, + $$ SELECT '3 days'::interval $$, + 'GC retention interval honors conf override' +); +SELECT lives_ok( + $$ + INSERT INTO searches ( + hash, + search, + _where, + orderby, + metadata, + lastused, + usecount, + pinned, + name + ) VALUES ( + pgstac_hash('gc-test-row-' || clock_timestamp()::text), + '{}'::jsonb, + 'TRUE', + 'datetime DESC, id DESC', + '{}'::jsonb, + now() - '2 days'::interval, + 1, + false, + NULL + ) + $$, + 'Seed an old anonymous search row for GC test' +); +SELECT results_eq( + $$ SELECT gc_anonymous_searches(NULL, '{"search_gc_retention_interval":"1 day"}'::jsonb) > 0 $$, + $$ SELECT TRUE $$, + 'gc_anonymous_searches uses retention from conf when interval arg is null' +); + +SELECT ok( + to_regclass('pgstac.search_wheres') IS NULL, + 'search_wheres table removed' +); +SELECT ok( + EXISTS ( + SELECT 1 + FROM information_schema.columns + WHERE + table_schema = 'pgstac' + AND table_name = 'searches' + AND column_name = 'context_count' + ), + 'searches table stores context_count cache' +); +SELECT ok( + EXISTS ( + SELECT 1 + FROM information_schema.columns + WHERE + table_schema = 'pgstac' + AND table_name = 'searches' + AND column_name = 'statslastupdated' + ), + 'searches table stores statslastupdated for TTL' +); +SELECT results_eq( + $$ + SELECT to_jsonb(array_agg(column_name ORDER BY column_name)) + FROM information_schema.columns + WHERE table_schema = 'pgstac' AND table_name = 'searches' + $$, + $$ + SELECT to_jsonb(ARRAY[ + '_where', + 'context_count', + 'created_at', + 'hash', + 'lastused', + 'metadata', + 'name', + 'orderby', + 'pinned', + 'search', + 'statslastupdated', + 'usecount' + ]::text[]) + $$, + 'searches table has only expected columns' +); + +SELECT results_eq( + $$ + SELECT search_hash( + '{"collections":["pgstac-test-collection"],"limit":10,"token":"next:abc","context":"on","sortby":[{"field":"id","direction":"asc"}]}'::jsonb, + '{}'::jsonb + ) + $$, + $$ + SELECT search_hash( + '{"collections":["pgstac-test-collection"],"limit":1,"token":"prev:def","context":"off","sortby":[{"field":"datetime","direction":"desc"}]}'::jsonb, + '{}'::jsonb + ) + $$, + 'search_hash ignores pagination, token, context, and sort fields' +); SELECT results_eq($$ diff --git a/src/pgstac/tests/pgtap/9999_readonly.sql b/src/pgstac/tests/pgtap/9999_readonly.sql index 4c7c474f..679f0af1 100644 --- a/src/pgstac/tests/pgtap/9999_readonly.sql +++ b/src/pgstac/tests/pgtap/9999_readonly.sql @@ -28,4 +28,17 @@ SELECT lives_ok( $$ SELECT search('{}'); $$, 'Search works with readonly mode set to on in readonly mode and the context extension enabled.' ); +SELECT results_eq( + $$ SELECT (search('{}')->>'numberMatched') IS NOT NULL; $$, + $$ SELECT TRUE; $$, + 'Readonly search with context on returns numberMatched without requiring cache writes.' +); +SELECT throws_ok( + $$ SELECT name_search('{"collections":["pgstac-test-collection"]}'::jsonb, 'readonly-should-fail'); $$, + '25006' +); +SELECT throws_ok( + $$ SELECT gc_anonymous_searches(NULL, '{"search_gc_retention_interval":"1 second"}'::jsonb); $$, + '25006' +); RESET pgstac.readonly;