GeoscienceAustralia · arcisad · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026
diff --git a/.gitignore b/.gitignore
@@ -24,6 +24,9 @@ build
 .pixi
 *.egg-info
 
+# ignore .env files
+.env
+
 
 # ignore the version file made by setuptools_scm
 dem_handler/_version.py
diff --git a/dem_handler/__init__.py b/dem_handler/__init__.py
@@ -5,10 +5,14 @@
 DATA_DIR = Path(__file__).parent / Path("data")
 REMA_GPKG_PATH = DATA_DIR / Path("REMA_Mosaic_Index_v2.gpkg")
 COP30_GPKG_PATH = DATA_DIR / Path("copdem_tindex_filename.gpkg")
+REMA_SERIES_CONFIG_PATH = DATA_DIR / Path("rema_series_config.json")
 
-REMAResolutions = typing.Literal[2, 10, 32]
+REMAResolutions = typing.Literal[2, 10, 30, 32]
 COPResolutions = typing.Literal[30]
 ValidDEMResolutions = typing.Literal[REMAResolutions, COPResolutions]
 
+REMABoundCheckSkipResolutions = typing.Literal[30]
+
 REMA_VALID_RESOLUTIONS = typing.get_args(REMAResolutions)
 COP_VALID_RESOLUTIONS = typing.get_args(COPResolutions)
+REMA_BOUND_CHECK_SKIP_RESOLUTIONS = typing.get_args(REMABoundCheckSkipResolutions)
diff --git a/dem_handler/data/rema_series_config.json b/dem_handler/data/rema_series_config.json
@@ -0,0 +1,24 @@
+{ 
+    "resolution": {
+        "30": "v0.5",
+        "32": "v0.4",
+        "10": "v0.4",
+        "2": "v0.4"
+    },
+    "aoi_name": {
+        "v0.4": "thwaites",
+        "v0.5": null
+    },
+    "v0.4": {
+        "endpoint": "umn1.osn.mghpcc.org",
+        "s3_bucket": "cse-pgc-test",
+        "remote_folder": "digital_earth_antarctica/v0.4/mosaics",
+        "indexing_file": null
+    },
+    "v0.5": {
+        "endpoint": "umn1.osn.mghpcc.org",
+        "s3_bucket": "cse-pgc-test",
+        "remote_folder": "digital_earth_antarctica/v0.5",
+        "indexing_file": "MultiTemporalREMAIndex.parquet"
+    }
+}
diff --git a/dem_handler/dem/rema.py b/dem_handler/dem/rema.py
@@ -1,4 +1,5 @@
 from __future__ import annotations
+import json
 from pathlib import Path
 import shapely
 from shapely import box
@@ -20,11 +21,15 @@
 from dem_handler.dem.geoid import apply_geoid
 from dem_handler.download.aws import download_egm_08_geoid
 
-
 # Create a custom type that allows use of BoundingBox or tuple(left, bottom, right, top)
 BBox = BoundingBox | tuple[float | int, float | int, float | int, float | int]
 
-from dem_handler import REMA_GPKG_PATH, REMA_VALID_RESOLUTIONS, REMAResolutions
+from dem_handler import (
+    REMA_GPKG_PATH,
+    REMA_SERIES_CONFIG_PATH,
+    REMA_VALID_RESOLUTIONS,
+    REMAResolutions,
+)
 
 
 @log_timing
@@ -45,6 +50,7 @@ def get_rema_dem_for_bounds(
     num_tasks: int | None = None,
     return_paths: bool = False,
     download_dir: Path | str = "rema_dems_temp_folder",
+    rema_series_config_file: Path | str | None = None,
 ) -> tuple[np.ndarray, Profile | list[Path]] | list[Path] | tuple[None, None, None]:
     """Finds the REMA DEM tiles in a given bounding box and merges them into a single tile.
 
@@ -86,6 +92,12 @@ def get_rema_dem_for_bounds(
         Flag to return the local paths for downloaded DEMs only, by default False
     download_dir: Path | str , optional
         Directory to download the REMA DEMs to, by default "rema_dems_temp_folder"
+    rema_series_config_file: Path | str | None, optional
+        Path to the REMA series config file, by default None.
+        If None, it will look for a file named "rema_series_config.json" in the same directory as this script.
+        This config file contains information on the remote folder and indexing file names for different versions of the REMA timeseries product.
+        This is only used when reading the REMA timeseries product.
+
     Returns
     -------
     tuple[np.ndarray, Profile | list[Path]] | list[Path]
@@ -214,12 +226,32 @@ def get_rema_dem_for_bounds(
 
     else:
         # Read in the rema timeseries dem from appropriate vrt
+
+        if (rema_series_config_file is None) or (rema_series_config_file == ""):
+            rema_series_config_file = REMA_SERIES_CONFIG_PATH
+        else:
+            rema_series_config_file = Path(rema_series_config_file).resolve()
+            if not rema_series_config_file.exists():
+                raise FileNotFoundError(
+                    f"REMA series config file not found at {rema_series_config_file}. "
+                    f"Please provide the correct path to the config file."
+                )
+
+        with open(rema_series_config_file) as f:
+            rema_remote_config_data = json.load(f)
+
+        rema_version = rema_remote_config_data["resolution"][str(resolution)]
+        rema_remote_config = rema_remote_config_data[rema_version]
+        aoi_name = rema_remote_config_data["aoi_name"][rema_version]
+
         logging.info("Reading in REMA timeseries .vrt")
         dem_array, dem_profile = read_rema_timeseries_vrt(
             year=rema_year,
             bounds=bounds,
             save_path=save_path,
             resolution=resolution,
+            aoi_name=aoi_name,
+            **rema_remote_config,
         )
         raster_paths = []
 
@@ -353,16 +385,86 @@ def _round_up_to_step(coord, origin, res):
 import tempfile
 import boto3
 from rasterio.session import AWSSession
-from dem_handler.utils.raster import read_raster_from_vrt
+from dem_handler.utils.raster import read_raster_from_vrt, read_raster_from_gdf
+
+
+def read_indexing_file(indexing_file: str, storage_options: dict | None = None):
+    """Reads the indexing file for the REMA timeseries product,
+    which contains the urls for the individual DEM tiles and their metadata.
+    The indexing file can be read from a local path or from a remote datastore using the provided storage options.
+
+    Parameters
+    ----------
+    indexing_file : str
+        Path to the indexing file, either local or remote.
+    storage_options : dict, optional
+        Storage options for reading the indexing file from a remote datastore, by default None
+
+    Returns
+    -------
+    gpd.GeoDataFrame
+        GeoDataFrame containing the metadata and urls for the REMA DEM tiles
+    """
+    if os.path.exists(indexing_file):
+        print(f"Reading indexing file from local path: {indexing_file}")
+        indexing_gdf = gpd.read_parquet(indexing_file)
+    else:
+        try:
+            indexing_gdf = gpd.read_parquet(
+                indexing_file, storage_options=storage_options
+            )
+        except Exception as e:
+            raise FileNotFoundError(
+                f"Indexing file not found at {indexing_file}. Please provide the correct path to the indexing file."
+            ) from e
+    return indexing_gdf
 
 
 def read_rema_timeseries_vrt(
-    year,
-    bounds,
-    save_path,
-    resolution,
-    version_number=0.3,
-):
+    year: int,
+    bounds: BBox,
+    save_path: str | None,
+    resolution: int,
+    aoi_name: (
+        str | None
+    ) = "thwaites",  # Reading data using indexing file doesn't need the aoi name.
+    endpoint: str = "umn1.osn.mghpcc.org",
+    s3_bucket: str = "cse-pgc-test",
+    remote_folder: str = "digital_earth_antarctica/v0.4/mosaics",  # or "digital_earth_antarctica/v0.5" for version 0.5
+    indexing_file: (
+        str | gpd.GeoDataFrame | None
+    ) = None,  # or "MultiTemporalREMAIndex.parquet" for version 0.5
+) -> tuple[np.ndarray, Profile]:
+    """Reads the REMA timeseries .vrt or a provided indexing file for a given year and bounds, merging the intersecting tiles into one raster.
+
+    Parameters
+    ----------
+    year : int
+        The year for the DEM if the timeseries product is required.
+    bounds : BBox
+        Bounding box for the area of interest in 3031.
+    save_path : str | None
+        Local path to save the output tile.
+    resolution : int
+        Resolution of the required tiles, for example 10.
+    aoi_name : str | None, optional
+        Name of the area of interest, by default "thwaites". This is only used
+        when reading data without an indexing file.
+    endpoint : str, optional
+        Endpoint for the s3 datastore, by default "umn1.osn.mghpcc.org"
+    s3_bucket : str, optional
+        S3 bucket for the datastore, by default "cse-pgc-test"
+    remote_folder : str, optional
+        Remote folder in the s3 bucket where the data is stored, by default "digital_earth_antarctica/v0.4/mosaics"
+    indexing_file : str | gpd.GeoDataFrame | None, optional
+        Name of the indexing file in the remote datastore or a GeoDataFrame.
+        If None, it will attempt to read directly from the vrt, which is the case for version 0.4 of the REMA timeseries product
+
+    Returns
+    -------
+    tuple[np.ndarray, Profile]
+        Tuple of the output tile array and its profile
+    """
 
     logging.info(f"Reading DEM .vrt from remote datastore")
     if not os.environ.get("REMA_AWS_ACCESS_KEY_ID"):
@@ -376,66 +478,103 @@ def read_rema_timeseries_vrt(
     else:
         aws_secret_access_key = os.environ.get("REMA_AWS_SECRET_ACCESS_KEY")
 
-    # S3-compatible endpoint
-    endpoint = "umn1.osn.mghpcc.org"
-    endpoint_url = "https://umn1.osn.mghpcc.org"
-
-    # File path inside the bucket
-    s3_bucket = "cse-pgc-test"
-    vrt_folder = f"digital_earth_antarctica/v{version_number}/mosaics/thwaites_remote"
-
     # Start boto3 session
     session = boto3.Session(
         aws_access_key_id=aws_access_key_id,
         aws_secret_access_key=aws_secret_access_key,
     )
 
-    # download and edit the vrt as the remote vrt references browse images and not the data
-    s3 = session.client("s3", endpoint_url=endpoint_url)
-
-    # check to ensure the year is there
-    vrt_pattern = re.compile(rf"thwaites_{resolution}m_remote_z_(\d+)\.vrt$")
-    # List all objects under prefix
-    paginator = s3.get_paginator("list_objects_v2")
-    pages = paginator.paginate(Bucket=s3_bucket, Prefix=vrt_folder)
-    valid_years = set()
-
-    for page in pages:
-        for obj in page.get("Contents", []):
-            key = obj["Key"]
-            match = vrt_pattern.search(key)
-            if match:
-                valid_years.add(int(match.group(1)))
-
-    years_list = sorted(valid_years)
-
-    if year not in valid_years:
-        raise ValueError(
-            f'The requested year "{year}" does not have a rema timeseries. '
-            f"valid years are : {years_list}"
-        )
-
-    vrt_path = f"{vrt_folder}/thwaites_{resolution}m_remote_z_{year}.vrt"
-    logging.info(f"Downloading .vrt from : {endpoint_url}/{vrt_path}")
-
-    response = s3.get_object(Bucket=s3_bucket, Key=vrt_path)
-    vrt_content = response["Body"].read()
-
-    with tempfile.NamedTemporaryFile("w", delete=False) as f:
-        f.write(vrt_content.decode("utf-8"))
-        temp_vrt = f.name
+    endpoint_url = f"https://{endpoint}"
+
+    if indexing_file is not None:
+        if isinstance(indexing_file, gpd.GeoDataFrame):
+            print("Using provided GeoDataFrame as indexing file")
+            indexing_gdf = indexing_file
+        else:
+            if os.path.exists(indexing_file):
+                print(f"Reading indexing file from local path: {indexing_file}")
+                indexing_file_path = indexing_file
+                storage_options = None
+            else:
+                indexing_file_prefix = f"{remote_folder}/{indexing_file}"
+                print(
+                    f"Looking for indexing file at s3://{s3_bucket}/{indexing_file_prefix}"
+                )
+                storage_options = {
+                    "key": aws_access_key_id,
+                    "secret": aws_secret_access_key,
+                    "client_kwargs": {"endpoint_url": endpoint_url},
+                }
+                indexing_file_path = f"s3://{s3_bucket}/{indexing_file_prefix}"
+
+            indexing_gdf = read_indexing_file(indexing_file_path, storage_options)
+
+        indexing_gdf = indexing_gdf[indexing_gdf.year == str(year)]
+        bounds_poly = box(*bounds)
+        indexing_gdf = indexing_gdf[indexing_gdf.intersects(bounds_poly)]
+        indexing_gdf = indexing_gdf[
+            indexing_gdf.dem_url.apply(lambda x: os.path.basename(x).split("_")[2])
+            == str(resolution) + "m"
+        ]
 
-    logging.info("Adjusting rema bounds to stop small offsets")
-    logging.info(f"original bounds : {bounds}")
-    bounds = adjust_bounds_for_rema_profile(bounds, [temp_vrt])
-    logging.info(f"adjusted bounds : {bounds}")
+        with rasterio.Env(
+            AWSSession(session),
+            AWS_S3_ENDPOINT=endpoint,
+            AWS_VIRTUAL_HOSTING=False,  # critical for non-AWS S3 providers
+        ):
+            logging.info(f"Reading raster for bounds from remote indexing file")
+            dem_array, dem_profile = read_raster_from_gdf(
+                indexing_gdf, bounds_poly, save_path
+            )
+    else:
+        # download and edit the vrt as the remote vrt references browse images and not the data
+        s3 = session.client("s3", endpoint_url=endpoint_url)
+
+        # check to ensure the year is there
+        vrt_pattern = re.compile(rf"{aoi_name}_.*?{resolution}m_dem_(\d+)\.vrt$")
+        # List all objects under prefix
+        paginator = s3.get_paginator("list_objects_v2")
+        pages = paginator.paginate(Bucket=s3_bucket, Prefix=remote_folder)
+        valid_years = set()
+        valid_files = {}
+
+        for page in pages:
+            for obj in page.get("Contents", []):
+                key = obj["Key"]
+                match = vrt_pattern.search(key)
+                if match:
+                    year_found = int(match.group(1))
+                    valid_years.add(year_found)
+                    valid_files[year_found] = key
+
+        years_list = sorted(valid_years)
+
+        if year not in valid_years:
+            raise ValueError(
+                f'The requested year "{year}" does not have a rema timeseries. '
+                f"valid years are : {years_list}"
+            )
 
-    with rasterio.Env(
-        AWSSession(session),
-        AWS_S3_ENDPOINT=endpoint,
-        AWS_VIRTUAL_HOSTING=False,  # critical for non-AWS S3 providers
-    ):
-        logging.info(f"Reading raster for bounds from remote vrt")
-        dem_array, dem_profile = read_raster_from_vrt(temp_vrt, bounds, save_path)
+        vrt_path = valid_files[year]
+        logging.info(f"Downloading .vrt from : {endpoint_url}/{vrt_path}")
+        response = s3.get_object(Bucket=s3_bucket, Key=vrt_path)
+        vrt_content = response["Body"].read()
+
+        with tempfile.NamedTemporaryFile("w", delete=False) as f:
+            f.write(vrt_content.decode("utf-8"))
+            temp_vrt = f.name
+
+        logging.info("Adjusting rema bounds to stop small offsets")
+        logging.info(f"original bounds : {bounds}")
+        bounds = adjust_bounds_for_rema_profile(bounds, [temp_vrt])
+        logging.info(f"adjusted bounds : {bounds}")
+
+        with rasterio.Env(
+            AWSSession(session),
+            AWS_S3_ENDPOINT=endpoint,
+            AWS_VIRTUAL_HOSTING=False,  # critical for non-AWS S3 providers
+        ):
+            logging.info(f"Reading raster for bounds from remote vrt")
+            dem_array, dem_profile = read_raster_from_vrt(temp_vrt, bounds, save_path)
 
     return dem_array, dem_profile