Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions scripts/item_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,14 +149,13 @@ def load_validation_cache(urls_to_check: list[str]) -> dict:
existing_urls = set()
logger.info("No existing validation cache found, will validate all URLs")

# Detect old-format rows missing spatial metadata
has_spatial = set()
# Detect old-format rows missing spatial metadata (all spatial columns null).
# Rows that were extracted but have epsg=None (no CRS) are NOT re-upgraded —
# they'll have height/width/transform populated from the extraction.
needs_upgrade = set()
if "epsg" in df_existing.columns:
if "transform" in df_existing.columns:
for _, row in df_existing.iterrows():
if pd.notna(row.get("epsg")):
has_spatial.add(row["url"])
elif row.get("is_geotiff"):
if row.get("is_geotiff") and pd.isna(row.get("transform")):
needs_upgrade.add(row["url"])
else:
needs_upgrade = {row["url"] for _, row in df_existing.iterrows() if row["is_geotiff"]}
Expand Down
17 changes: 11 additions & 6 deletions scripts/stac_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"""

import json
import logging
import re
from datetime import datetime, timezone

Expand All @@ -19,6 +20,8 @@
from rio_cogeo.cogeo import cog_validate
from shapely.geometry import box, mapping

logger = logging.getLogger(__name__)


# =============================================================================
# Path Configuration
Expand Down Expand Up @@ -93,11 +96,12 @@ def datetime_parse_item(s: str | None) -> datetime | None:
# =============================================================================

def geotiff_extract_metadata(url: str) -> dict:
"""Extract spatial metadata and validate GeoTIFF/COG status in one remote read.
"""Extract spatial metadata and validate GeoTIFF/COG status.

Opens the remote GeoTIFF via /vsicurl/, extracts CRS, bounds, shape, and
transform, then validates COG status. All metadata needed for STAC item
creation is returned so subsequent builds can skip the remote read.
Opens the remote GeoTIFF via /vsicurl/ to extract CRS, bounds, shape, and
transform, then validates COG status (second remote read via cog_validate).
All metadata needed for STAC item creation is cached so subsequent builds
skip remote reads entirely.

Returns dict with url, is_geotiff, is_cog, epsg, height, width, transform, bounds.
"""
Expand All @@ -106,7 +110,7 @@ def geotiff_extract_metadata(url: str) -> dict:

try:
with rasterio.open(vsicurl_path) as src:
epsg = src.crs.to_epsg()
epsg = src.crs.to_epsg() if src.crs else None
height = src.height
width = src.width
transform = list(src.transform)[:6]
Expand All @@ -124,7 +128,8 @@ def geotiff_extract_metadata(url: str) -> dict:
"transform": json.dumps(transform),
"bounds": json.dumps(bounds),
}
except Exception:
except Exception as e:
logger.warning("Failed to read %s: %s", url, e)
return {
"url": url,
"is_geotiff": False,
Expand Down