Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions data/stac_geotiff_checks.csv
Original file line number Diff line number Diff line change
Expand Up @@ -60323,3 +60323,4 @@ https://nrs.objectstore.gov.bc.ca/gdwuts/albers10k2m/_completed_dem/dem_104_045.
https://nrs.objectstore.gov.bc.ca/gdwuts/albers10k2m/_completed_dem/dem_127_052.tif,True,False
https://nrs.objectstore.gov.bc.ca/gdwuts/albers10k2m/_completed_dem/dem_062_089.tif,True,False
https://nrs.objectstore.gov.bc.ca/gdwuts/albers10k2m/_completed_dem/dem_126_052.tif,True,False
https:/nrs.objectstore.gov.bc.ca/gdwuts/082/082e/2018/dem/bc_082e003_xli1m_utm11_2018 (2).tif,True,True
2,107 changes: 2,107 additions & 0 deletions data/urls_list.txt

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion scripts/item_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
check_geotiff_cog,
date_extract_from_path,
datetime_parse_item,
encode_url_for_gdal,
fix_url,
get_output_dir,
url_to_item_id,
Expand Down Expand Up @@ -80,8 +81,10 @@ def process_item(path_item: str, collection_id: str, path_local: str,
)

try:
# Encode for GDAL/vsicurl (spaces → %20), but keep original for asset href
gdal_path = encode_url_for_gdal(path_item)
item = rio_stac.stac.create_stac_item(
path_item,
gdal_path,
id=item_id,
asset_media_type=media_type,
asset_name='image',
Expand Down
4 changes: 3 additions & 1 deletion scripts/item_reprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from stac_utils import (
date_extract_from_path,
datetime_parse_item,
encode_url_for_gdal,
fix_url,
url_to_item_id,
get_output_dir,
Expand Down Expand Up @@ -73,8 +74,9 @@ def process_item(path_item: str, collection, results_lookup) -> dict | None:
)

try:
gdal_path = encode_url_for_gdal(path_item)
item = rio_stac.stac.create_stac_item(
path_item,
gdal_path,
id=item_id,
asset_media_type=media_type,
asset_name='image',
Expand Down
11 changes: 10 additions & 1 deletion scripts/stac_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,9 @@ def check_geotiff_cog(url: str) -> dict:
Returns dict with url, is_geotiff (readable), and is_cog (cloud-optimized).
"""
try:
gdal_url = encode_url_for_gdal(url)
result = subprocess.run(
["rio", "cogeo", "validate", f"/vsicurl/{url}"],
["rio", "cogeo", "validate", f"/vsicurl/{gdal_url}"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=False
Expand Down Expand Up @@ -144,6 +145,14 @@ def fix_url(url: str) -> str:
return url


def encode_url_for_gdal(url: str) -> str:
"""Encode URL for use with /vsicurl/ (GDAL virtual filesystem).

Spaces in filenames cause CURL errors with /vsicurl/. URL-encode them.
"""
return url.replace(" ", "%20")


def url_to_item_id(url: str) -> str:
"""Convert a GeoTIFF URL to a STAC item ID."""
return url[len(PATH_S3):].lstrip("/").replace("/", "-").removesuffix(".tif")
8 changes: 2 additions & 6 deletions scripts/urls_fetch.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,6 @@ if (test_only && file.exists("data/urls_list.txt")) {
pattern = c("dem", "*.tif")
)

# Remove paths with ( in them (all fail validation - see issue #8)
keys_clean <- keys[!stringr::str_detect(keys, "\\(")]

readr::write_lines(keys_clean, "data/urls_list.txt")
cat(sprintf("Fetched and saved %d URLs (excluded %d with parentheses)\n",
length(keys_clean), length(keys) - length(keys_clean)))
readr::write_lines(keys, "data/urls_list.txt")
cat(sprintf("Fetched and saved %d URLs\n", length(keys)))
}
Loading