Aggregate and Publish Metadata #855
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Aggregate and Publish Metadata | |
| on: | |
| workflow_dispatch: | |
| workflow_run: | |
| workflows: [ | |
| "Build & Publish CRCP Snapshots", | |
| "Planet GeoDesk v1", | |
| "Planet GeoDesk v2", | |
| "Planet GeoParquet", | |
| "Planet PBF", | |
| ] | |
| types: [completed] | |
| branches: [main] | |
| permissions: | |
| actions: read | |
| contents: read | |
| env: | |
| BASE_URL: https://download.openplanetdata.com | |
| RCLONE_REMOTE: openplanetdata-r2 | |
| jobs: | |
| aggregate: | |
| runs-on: self-hosted | |
| steps: | |
| - name: Install core tools | |
| uses: openplanetdata/actions/install-packages@main | |
| with: | |
| packages: curl jq | |
| - name: Install Rclone | |
| uses: openplanetdata/actions/install-rclone@main | |
| env: | |
| RCLONE_CONFIG_DATA: ${{ secrets.RCLONE_CONFIG_DATA }} | |
| - name: Fetch and aggregate metadata | |
| run: | | |
| set -euo pipefail | |
| TMPDIR=$(mktemp -d) | |
| echo "Working directory: $TMPDIR" | |
| # Initialize empty array | |
| jq -n '[]' > "$TMPDIR/out.json" | |
| # List of metadata file URLs to aggregate | |
| # Format: {remote_path}/v{remote_version}/{remote_filename}.metadata | |
| metadata_paths=( | |
| "osm/cities/geoparquet/v1/cities-latest.osm.parquet.metadata" | |
| "osm/countries/geoparquet/v1/countries-latest.osm.parquet.metadata" | |
| "osm/planet/geoparquet/v1/planet-latest.osm.parquet.metadata" | |
| "osm/planet/gob/v2/planet-latest.osm.gob.metadata" | |
| "osm/planet/gol/v1/planet-latest.osm.gol.metadata" | |
| "osm/planet/gol/v2/planet-latest.osm.gol.metadata" | |
| "osm/planet/pbf/v1/planet-latest.osm.pbf.metadata" | |
| "osm/postalcodes/geoparquet/v1/postalcodes-latest.osm.parquet.metadata" | |
| "osm/regions/geoparquet/v1/regions-latest.osm.parquet.metadata" | |
| ) | |
| # Fetch and aggregate each metadata file | |
| for relpath in "${metadata_paths[@]}"; do | |
| metadata_url="$BASE_URL/$relpath" | |
| echo "::group::Fetching: $relpath" | |
| echo "URL: $metadata_url" | |
| if curl -sf "$metadata_url" > "$TMPDIR/file.meta"; then | |
| echo "✓ Successfully fetched" | |
| # Extract format from remote_filename | |
| remote_filename=$(jq -r '.remote_filename // empty' "$TMPDIR/file.meta") | |
| if [ -n "$remote_filename" ]; then | |
| # Extract file extension as format | |
| format="${remote_filename##*.}" | |
| else | |
| format="unknown" | |
| fi | |
| # Add format field and append to array | |
| jq --argjson meta "$(cat "$TMPDIR/file.meta")" \ | |
| --arg format "$format" \ | |
| '. + [$meta + {format: $format}]' \ | |
| "$TMPDIR/out.json" > "$TMPDIR/tmp" && mv "$TMPDIR/tmp" "$TMPDIR/out.json" | |
| echo "✓ Added to aggregated metadata (format: $format)" | |
| else | |
| echo "⚠️ Failed to fetch (may not exist yet)" | |
| fi | |
| echo "::endgroup::" | |
| done | |
| # Sort array by format (asc) then by remote_version (asc), and sort keys in each object | |
| jq 'sort_by(.format, (.remote_version | tonumber)) | map(. | to_entries | sort_by(.key) | from_entries)' "$TMPDIR/out.json" > "$TMPDIR/sorted.json" | |
| mv "$TMPDIR/sorted.json" metadata.json | |
| echo "::group::Final aggregated metadata" | |
| jq . metadata.json | |
| echo "::endgroup::" | |
| - name: Upload to R2 | |
| run: | | |
| rclone copyto metadata.json "$RCLONE_REMOTE:openplanetdata/osm/planet.metadata" | |
| - name: Cleanup downloaded and generated files | |
| if: always() | |
| run: | | |
| rm -f metadata.json | |
| find /tmp -name "tmp.*" -user "$USER" -delete 2>/dev/null || true |