Skip to content

Aggregate and Publish Metadata #855

Aggregate and Publish Metadata

Aggregate and Publish Metadata #855

name: Aggregate and Publish Metadata
on:
workflow_dispatch:
workflow_run:
workflows: [
"Build & Publish CRCP Snapshots",
"Planet GeoDesk v1",
"Planet GeoDesk v2",
"Planet GeoParquet",
"Planet PBF",
]
types: [completed]
branches: [main]
permissions:
actions: read
contents: read
env:
BASE_URL: https://download.openplanetdata.com
RCLONE_REMOTE: openplanetdata-r2
jobs:
aggregate:
runs-on: self-hosted
steps:
- name: Install core tools
uses: openplanetdata/actions/install-packages@main
with:
packages: curl jq
- name: Install Rclone
uses: openplanetdata/actions/install-rclone@main
env:
RCLONE_CONFIG_DATA: ${{ secrets.RCLONE_CONFIG_DATA }}
- name: Fetch and aggregate metadata
run: |
set -euo pipefail
TMPDIR=$(mktemp -d)
echo "Working directory: $TMPDIR"
# Initialize empty array
jq -n '[]' > "$TMPDIR/out.json"
# List of metadata file URLs to aggregate
# Format: {remote_path}/v{remote_version}/{remote_filename}.metadata
metadata_paths=(
"osm/cities/geoparquet/v1/cities-latest.osm.parquet.metadata"
"osm/countries/geoparquet/v1/countries-latest.osm.parquet.metadata"
"osm/planet/geoparquet/v1/planet-latest.osm.parquet.metadata"
"osm/planet/gob/v2/planet-latest.osm.gob.metadata"
"osm/planet/gol/v1/planet-latest.osm.gol.metadata"
"osm/planet/gol/v2/planet-latest.osm.gol.metadata"
"osm/planet/pbf/v1/planet-latest.osm.pbf.metadata"
"osm/postalcodes/geoparquet/v1/postalcodes-latest.osm.parquet.metadata"
"osm/regions/geoparquet/v1/regions-latest.osm.parquet.metadata"
)
# Fetch and aggregate each metadata file
for relpath in "${metadata_paths[@]}"; do
metadata_url="$BASE_URL/$relpath"
echo "::group::Fetching: $relpath"
echo "URL: $metadata_url"
if curl -sf "$metadata_url" > "$TMPDIR/file.meta"; then
echo "✓ Successfully fetched"
# Extract format from remote_filename
remote_filename=$(jq -r '.remote_filename // empty' "$TMPDIR/file.meta")
if [ -n "$remote_filename" ]; then
# Extract file extension as format
format="${remote_filename##*.}"
else
format="unknown"
fi
# Add format field and append to array
jq --argjson meta "$(cat "$TMPDIR/file.meta")" \
--arg format "$format" \
'. + [$meta + {format: $format}]' \
"$TMPDIR/out.json" > "$TMPDIR/tmp" && mv "$TMPDIR/tmp" "$TMPDIR/out.json"
echo "✓ Added to aggregated metadata (format: $format)"
else
echo "⚠️ Failed to fetch (may not exist yet)"
fi
echo "::endgroup::"
done
# Sort array by format (asc) then by remote_version (asc), and sort keys in each object
jq 'sort_by(.format, (.remote_version | tonumber)) | map(. | to_entries | sort_by(.key) | from_entries)' "$TMPDIR/out.json" > "$TMPDIR/sorted.json"
mv "$TMPDIR/sorted.json" metadata.json
echo "::group::Final aggregated metadata"
jq . metadata.json
echo "::endgroup::"
- name: Upload to R2
run: |
rclone copyto metadata.json "$RCLONE_REMOTE:openplanetdata/osm/planet.metadata"
- name: Cleanup downloaded and generated files
if: always()
run: |
rm -f metadata.json
find /tmp -name "tmp.*" -user "$USER" -delete 2>/dev/null || true