stac-utils · bitner · May 21, 2026 · May 20, 2026 · May 20, 2026 · May 20, 2026
diff --git a/.github/instructions/scripts.instructions.md b/.github/instructions/scripts.instructions.md
@@ -15,3 +15,7 @@ See CLAUDE.md "Development Workflow" for usage. All scripts require the Docker c
 - Set `PGPKG_LOCAL_REPO_DIR` on the host when you need to force a local pgpkg checkout for `stageversion`, `makemigration`, or related container-script testing
 - Tagged releases run `.github/workflows/release.yml`, which publishes both `pypgstac` and `pgstac-migrate` to PyPI via the GitHub `pypi` environment; PyPI trusted publishers must exist for both projects
 - DO NOT run `stageversion` without understanding its side effects
+- Benchmark fixture/reporting scripts:
+  - `scripts/benchmark_fetch_pc_fixtures.py` materializes deterministic Planetary Computer fixtures from `benchmarks/fixtures/planetary-computer/manifest.json`
+  - `scripts/benchmark_run.py` runs ingest/hydrate/storage benchmarks and writes JSON/CSV/Markdown artifacts
+  - `scripts/benchmark_compare_results.py` compares two benchmark JSON reports and emits machine-readable deltas
diff --git a/.github/workflows/benchmark-compare.yml b/.github/workflows/benchmark-compare.yml
@@ -0,0 +1,99 @@
+name: Benchmark Compare (manual)
+
+on:
+  workflow_dispatch:
+    inputs:
+      base_ref:
+        description: "Base ref to compare against"
+        required: true
+        default: "origin/main"
+      hydrate_iterations:
+        description: "Hydrate benchmark iterations"
+        required: true
+        default: "5"
+
+permissions:
+  contents: read
+
+jobs:
+  compare:
+    runs-on: ubuntu-latest
+    services:
+      postgres:
+        image: ghcr.io/stac-utils/pgstac-postgres:main-pg17
+        env:
+          POSTGRES_USER: username
+          POSTGRES_PASSWORD: password
+          POSTGRES_DB: postgis
+        ports:
+          - 5439:5432
+        options: >-
+          --health-cmd pg_isready
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 10
+    env:
+      PGHOST: localhost
+      PGPORT: 5439
+      PGUSER: username
+      PGPASSWORD: password
+      PGDATABASE: postgis
+
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          fetch-depth: 0
+
+      - uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
+
+      - name: Fetch deterministic benchmark fixtures
+        run: |
+          uv run --no-project --with psycopg[binary] \
+            python scripts/benchmark_fetch_pc_fixtures.py \
+            --manifest benchmarks/fixtures/planetary-computer/manifest.json \
+            --output-dir /tmp/pgstac-benchmark-fixtures
+
+      - name: Add base worktree
+        run: |
+          git fetch --no-tags origin main
+          git worktree add /tmp/pgstac-benchmark-base "${{ inputs.base_ref }}"
+
+      - name: Benchmark base ref
+        run: |
+          uv run --no-project --with psycopg[binary] \
+            python scripts/benchmark_run.py \
+            --fixtures-dir /tmp/pgstac-benchmark-fixtures \
+            --repo-root /tmp/pgstac-benchmark-base \
+            --label base \
+            --hydrate-iterations "${{ inputs.hydrate_iterations }}" \
+            --output-dir /tmp/pgstac-benchmark-results
+
+      - name: Benchmark head ref
+        run: |
+          uv run --no-project --with psycopg[binary] \
+            python scripts/benchmark_run.py \
+            --fixtures-dir /tmp/pgstac-benchmark-fixtures \
+            --repo-root "$GITHUB_WORKSPACE" \
+            --label head \
+            --hydrate-iterations "${{ inputs.hydrate_iterations }}" \
+            --output-dir /tmp/pgstac-benchmark-results
+
+      - name: Compare benchmark outputs
+        run: |
+          uv run --no-project --with psycopg[binary] \
+            python scripts/benchmark_compare_results.py \
+            --base /tmp/pgstac-benchmark-results/base.json \
+            --head /tmp/pgstac-benchmark-results/head.json \
+            --output-dir /tmp/pgstac-benchmark-comparison
+
+      - name: Publish comparison summary
+        run: cat /tmp/pgstac-benchmark-comparison/comparison.md >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Upload benchmark comparison artifacts
+        uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
+        with:
+          name: benchmark-compare-${{ github.sha }}
+          path: |
+            /tmp/pgstac-benchmark-fixtures/fixture-summary.json
+            /tmp/pgstac-benchmark-results/
+            /tmp/pgstac-benchmark-comparison/
diff --git a/.github/workflows/benchmark-fixtures.yml b/.github/workflows/benchmark-fixtures.yml
@@ -0,0 +1,73 @@
+name: Benchmark Fixtures
+
+on:
+  pull_request:
+    paths:
+      - '.github/workflows/benchmark-fixtures.yml'
+      - 'scripts/benchmark_*'
+      - 'benchmarks/fixtures/**'
+      - 'src/pgstac/sql/**'
+      - 'src/pypgstac/**'
+  workflow_dispatch:
+  schedule:
+    - cron: '17 6 * * 1'
+
+permissions:
+  contents: read
+
+jobs:
+  benchmark:
+    runs-on: ubuntu-latest
+    services:
+      postgres:
+        image: ghcr.io/stac-utils/pgstac-postgres:main-pg17
+        env:
+          POSTGRES_USER: username
+          POSTGRES_PASSWORD: password
+          POSTGRES_DB: postgis
+        ports:
+          - 5439:5432
+        options: >-
+          --health-cmd pg_isready
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 10
+    env:
+      PGHOST: localhost
+      PGPORT: 5439
+      PGUSER: username
+      PGPASSWORD: password
+      PGDATABASE: postgis
+
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      - uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
+
+      - name: Fetch deterministic benchmark fixtures
+        run: |
+          uv run --no-project --with psycopg[binary] \
+            python scripts/benchmark_fetch_pc_fixtures.py \
+            --manifest benchmarks/fixtures/planetary-computer/manifest.json \
+            --output-dir /tmp/pgstac-benchmark-fixtures
+
+      - name: Run benchmark suite
+        run: |
+          uv run --no-project --with psycopg[binary] \
+            python scripts/benchmark_run.py \
+            --fixtures-dir /tmp/pgstac-benchmark-fixtures \
+            --repo-root "$GITHUB_WORKSPACE" \
+            --label "$GITHUB_SHA" \
+            --hydrate-iterations 5 \
+            --output-dir /tmp/pgstac-benchmark-results
+
+      - name: Publish benchmark summary
+        run: cat /tmp/pgstac-benchmark-results/"$GITHUB_SHA".md >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Upload benchmark artifacts
+        uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
+        with:
+          name: benchmark-fixtures-${{ github.sha }}
+          path: |
+            /tmp/pgstac-benchmark-fixtures/fixture-summary.json
+            /tmp/pgstac-benchmark-results/
diff --git a/AGENTS.md b/AGENTS.md
@@ -53,3 +53,18 @@ Specialist in pypgstac bulk loading (`src/pypgstac/src/pypgstac/load.py`). See C
 - **Retry scope**: `CheckViolation`, `DeadlockDetected`, `SerializationFailure`, `LockNotAvailable`, `ObjectInUse`
 - **Load modes**: `insert`, `ignore`/`insert_ignore`, `upsert`, `delsert`
 - Test: `scripts/runinpypgstac --build test --pypgstac`
+
+---
+
+## benchmark-engineer
+
+Benchmark fixture and reporting specialist for PgSTAC load/hydrate/storage comparisons.
+
+### Commands
+
+- Generate deterministic Planetary Computer fixtures (1000 per collection):
+  `uv run --no-project --with psycopg[binary] python scripts/benchmark_fetch_pc_fixtures.py --manifest benchmarks/fixtures/planetary-computer/manifest.json --output-dir benchmarks/fixtures/planetary-computer/data`
+- Run benchmark suite:
+  `uv run --no-project --with psycopg[binary] python scripts/benchmark_run.py --fixtures-dir benchmarks/fixtures/planetary-computer/data --repo-root "$PWD" --label local --output-dir /tmp/pgstac-benchmark-results`
+- Compare benchmark JSON outputs:
+  `uv run --no-project --with psycopg[binary] python scripts/benchmark_compare_results.py --base <base.json> --head <head.json> --output-dir /tmp/pgstac-benchmark-comparison`
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
 
 - Add deterministic SHA-256 `content_hash` to STAC items to track data changes across migrations.
 - Add `pgstac_updated_at` column to items table as part of separating STAC property updates from database metadata updates.
+- Deterministic Planetary Computer benchmark fixture manifest + fetch tooling for `naip`, `sentinel-2-l2a`, and `landsat-c2-l2` (1000 items per collection), plus CI/manual benchmark workflows that emit JSON/CSV/Markdown artifacts and branch comparison reports.
 
 ### Changed
 

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -93,6 +93,24 @@ scripts/test --pgdump           # pg_dump/pg_restore round-trip test
 
 All tests run inside Docker via `scripts/runinpypgstac`. Use `--build` to rebuild images first.
 
+### Benchmark Fixtures and Reporting
+
+```bash
+uv run --no-project --with psycopg[binary] python scripts/benchmark_fetch_pc_fixtures.py \
+  --manifest benchmarks/fixtures/planetary-computer/manifest.json \
+  --output-dir benchmarks/fixtures/planetary-computer/data
+
+uv run --no-project --with psycopg[binary] python scripts/benchmark_run.py \
+  --fixtures-dir benchmarks/fixtures/planetary-computer/data \
+  --repo-root "$PWD" \
+  --label local \
+  --output-dir /tmp/pgstac-benchmark-results
+```
+
+GitHub Actions:
+- `.github/workflows/benchmark-fixtures.yml` for fixture-based benchmark artifact generation
+- `.github/workflows/benchmark-compare.yml` for manual base-vs-head comparison reports
+
 ### Docker Architecture
 
 - **pgstac** container: PostgreSQL 17 + PostGIS 3 + extensions, port 5439→5432

diff --git a/benchmarks/fixtures/planetary-computer/.gitignore b/benchmarks/fixtures/planetary-computer/.gitignore
@@ -0,0 +1,2 @@
+data/**
+!data/.gitkeep
diff --git a/benchmarks/fixtures/planetary-computer/README.md b/benchmarks/fixtures/planetary-computer/README.md
@@ -0,0 +1,38 @@
+# Planetary Computer benchmark fixtures
+
+This directory defines reproducible benchmark fixtures for PgSTAC load-path benchmarking.
+
+## Collections
+
+- `naip`
+- `sentinel-2-l2a`
+- `landsat-c2-l2`
+
+Each fixture set materializes:
+
+- one collection document (`collection.json`)
+- exactly 1000 STAC items (`items.ndjson`)
+
+## Why fixtures are generated
+
+Committing 3000 raw Planetary Computer items would add a large and frequently-changing payload to the repository.
+Instead, this directory commits a deterministic fixture manifest plus a fetch script.
+
+## Generate fixtures
+
+From repository root:
+
+```bash
+uv run --no-project --with psycopg[binary] python scripts/benchmark_fetch_pc_fixtures.py \
+  --manifest benchmarks/fixtures/planetary-computer/manifest.json \
+  --output-dir benchmarks/fixtures/planetary-computer/data
+```
+
+## Validate generated fixtures
+
+```bash
+uv run --no-project --with psycopg[binary] python scripts/benchmark_fetch_pc_fixtures.py \
+  --manifest benchmarks/fixtures/planetary-computer/manifest.json \
+  --output-dir benchmarks/fixtures/planetary-computer/data \
+  --validate-only
+```
diff --git a/benchmarks/fixtures/planetary-computer/data/.gitkeep b/benchmarks/fixtures/planetary-computer/data/.gitkeep
diff --git a/benchmarks/fixtures/planetary-computer/manifest.json b/benchmarks/fixtures/planetary-computer/manifest.json
@@ -0,0 +1,36 @@
+{
+  "api_url": "https://planetarycomputer.microsoft.com/api/stac/v1",
+  "item_count": 1000,
+  "collections": [
+    {
+      "id": "naip",
+      "datetime": "../2025-01-01T00:00:00Z",
+      "sortby": [
+        {
+          "field": "id",
+          "direction": "asc"
+        }
+      ]
+    },
+    {
+      "id": "sentinel-2-l2a",
+      "datetime": "../2025-01-01T00:00:00Z",
+      "sortby": [
+        {
+          "field": "id",
+          "direction": "asc"
+        }
+      ]
+    },
+    {
+      "id": "landsat-c2-l2",
+      "datetime": "../2025-01-01T00:00:00Z",
+      "sortby": [
+        {
+          "field": "id",
+          "direction": "asc"
+        }
+      ]
+    }
+  ]
+}