From b86dbb8d94bc064892d6386d149e0b3cfca8d459 Mon Sep 17 00:00:00 2001 From: Matthew Hoffman Date: Thu, 7 May 2026 17:00:35 -0600 Subject: [PATCH 1/7] Add ASTE tile concatenator from ASTE matlab tool Based on files here: https://arcticdata.io/data/10.18739/A2CV4BS5K/matlab_tools/ASTE_mlibrary/ nctiles2aste_v2.m tiles2field.m Converted to python by copilot --- landice/mesh_tools_li/misc/nctiles2aste_v2.py | 305 ++++++++++++++++++ 1 file changed, 305 insertions(+) create mode 100644 landice/mesh_tools_li/misc/nctiles2aste_v2.py diff --git a/landice/mesh_tools_li/misc/nctiles2aste_v2.py b/landice/mesh_tools_li/misc/nctiles2aste_v2.py new file mode 100644 index 000000000..1069e13d1 --- /dev/null +++ b/landice/mesh_tools_li/misc/nctiles2aste_v2.py @@ -0,0 +1,305 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any, Iterable, Sequence + +import numpy as np +import xarray as xr + + +def _get_struct_value(struct: Any, key: str) -> Any: + """Get a field from dict-like or attribute-style MATLAB-loaded structs.""" + if isinstance(struct, dict): + if key not in struct: + raise KeyError(f"Missing key '{key}' in tiles structure.") + return struct[key] + + if hasattr(struct, key): + return getattr(struct, key) + + dtype = getattr(struct, "dtype", None) + if dtype is not None and getattr(dtype, "names", None) and key in dtype.names: + return struct[key] + + raise KeyError(f"Missing key '{key}' in tiles structure.") + + +def _to_numpy(value: Any) -> np.ndarray: + """Convert nested scalar/object values (common in MATLAB imports) to ndarray.""" + arr = np.asarray(value) + while arr.dtype == object and arr.size == 1: + arr = np.asarray(arr.item()) + return arr + + +def _to_int_tuple_2(value: Any, name: str) -> tuple[int, int]: + arr = _to_numpy(value).squeeze() + if arr.size < 2: + raise ValueError(f"{name} must contain at least 2 values.") + return int(arr.flat[0]), int(arr.flat[1]) + + +def _to_int(value: Any, name: str) -> int: + arr = _to_numpy(value).squeeze() + if arr.size != 1: + raise ValueError(f"{name} must be scalar.") + return int(arr.item()) + + +def _matlab_cell_to_list(value: Any) -> list[Any]: + """Convert MATLAB cell-like arrays to a Python list.""" + arr = _to_numpy(value) + if arr.dtype == object: + return [arr.flat[idx] for idx in range(arr.size)] + return [arr] + + +def _get_tile_indices(tiles_index: Any, tile_num: int) -> np.ndarray: + """Get 0-based linear indices for a 1-based tile number.""" + index_list = _matlab_cell_to_list(tiles_index) + if tile_num < 1 or tile_num > len(index_list): + raise ValueError( + f"Requested tile number {tile_num} is out of range 1..{len(index_list)}" + ) + + idx = _to_numpy(index_list[tile_num - 1]).astype(np.int64).ravel() + # ASTE MATLAB indices are 1-based linear indices in column-major order. + return idx - 1 + + +def tiles2field( + field_tiles: Sequence[np.ndarray] | np.ndarray, + indices: Sequence[np.ndarray] | np.ndarray, + field_size: Sequence[int], +) -> np.ndarray: + """ + Reconstruct a compact ASTE field from tile arrays. + + Parameters + ---------- + field_tiles + A list of 2D/3D tile arrays (or a single tile array). + indices + A list of linear indices (or one index array), one per tile. + Indices are expected to be 0-based and follow MATLAB column-major + flattening semantics. + field_size + Final compact field size ``(nx, ny[, nz])``. + + Returns + ------- + np.ndarray + Compact ASTE field of shape ``(nx, ny, nz)`` with zeros in locations + not covered by provided tiles. + """ + field_size_arr = np.asarray(field_size, dtype=int).ravel() + if field_size_arr.size < 2: + raise ValueError("field_size must contain at least nx and ny.") + + dfieldx = int(field_size_arr[0]) + dfieldy = int(field_size_arr[1]) + + tile_list = list(field_tiles) if isinstance(field_tiles, (list, tuple)) else [field_tiles] + index_list = list(indices) if isinstance(indices, (list, tuple)) else [indices] + + if len(tile_list) != len(index_list): + raise ValueError("There must be one set of indices for each tile.") + + first_tile = np.asarray(tile_list[0]) + if first_tile.ndim == 2: + dfieldz = 1 + tile_shape_xy = first_tile.shape + elif first_tile.ndim == 3: + dfieldz = first_tile.shape[2] + tile_shape_xy = first_tile.shape[:2] + else: + raise ValueError("Tiles must be 2D or 3D arrays.") + + field = np.zeros((dfieldx, dfieldy, dfieldz), dtype=float) + + for tile, idx in zip(tile_list, index_list): + tile_arr = np.asarray(tile) + idx_arr = np.asarray(idx, dtype=np.int64).ravel() + + if tile_arr.ndim == 2: + tile_arr = tile_arr[:, :, np.newaxis] + + if tile_arr.shape[:2] != tile_shape_xy: + raise ValueError("All tiles must have the same x/y dimensions.") + + if tile_arr.shape[0] * tile_arr.shape[1] != idx_arr.size: + raise ValueError( + "Number of points in indices does not match tile x*y size." + ) + + for iz in range(dfieldz): + tmpf = field[:, :, iz] + flat_tmpf = tmpf.reshape(-1, order="F") + tile_flat = tile_arr[:, :, iz].reshape(-1, order="F") + flat_tmpf[idx_arr] = tile_flat + field[:, :, iz] = flat_tmpf.reshape((dfieldx, dfieldy), order="F") + + return field + + +def nctiles2aste_v2( + var_name: str, + root_dir: str | Path, + tile_list: Iterable[int], + levels_list: Iterable[int], + times_list: Iterable[int], + tiles: Any, + flag_aste: bool, +) -> tuple[np.ndarray | list[np.ndarray], np.ndarray]: + """ + Read tile NetCDF files and return either per-tile arrays or compact ASTE fields. + + Notes + ----- + ``tile_list``, ``levels_list``, and ``times_list`` are interpreted as + 1-based indices to match MATLAB behavior. + """ + root_dir = Path(root_dir) + data_dir = root_dir / var_name + if not data_dir.is_dir(): + raise FileNotFoundError(f"Data directory not found: {data_dir}") + + file_list = sorted(data_dir.glob("*.nc")) + if not file_list: + raise FileNotFoundError(f"No NetCDF files found in: {data_dir}") + + tile_list = [int(tile) for tile in tile_list] + levels_list = [int(level) for level in levels_list] + times_list = [int(time) for time in times_list] + + nx, ny = _to_int_tuple_2(_get_struct_value(tiles, "fsize"), "tiles.fsize") + dtilex, dtiley = _to_int_tuple_2(_get_struct_value(tiles, "tsize"), "tiles.tsize") + wet_tiles = _to_int(_get_struct_value(tiles, "wet"), "tiles.wet") + tiles_index = _get_struct_value(tiles, "index") + + sample_ds = xr.open_dataset(file_list[0]) + try: + if var_name not in sample_ds: + raise KeyError(f"Variable '{var_name}' not found in {file_list[0].name}") + if "timstep" not in sample_ds: + raise KeyError(f"Variable 'timstep' not found in {file_list[0].name}") + + sample_var = sample_ds[var_name] + if sample_var.ndim == 3: + nz = 1 + ntimes_max = int(sample_var.shape[2]) + file_ndims = 4 + elif sample_var.ndim == 4: + nz = int(sample_var.shape[2]) + ntimes_max = int(sample_var.shape[3]) + file_ndims = 5 + else: + raise ValueError( + f"Unsupported variable rank for '{var_name}': {sample_var.ndim}. " + "Expected 3D (x, y, time) or 4D (x, y, z, time)." + ) + + if dtilex != int(sample_var.shape[0]) or dtiley != int(sample_var.shape[1]): + raise ValueError( + "nctiles2aste ERROR: Dimensions from tiles.tsize " + f"{dtilex}x{dtiley} do not match NetCDF variable dimensions " + f"{sample_var.shape[0]}x{sample_var.shape[1]}." + ) + finally: + sample_ds.close() + + ntiles_max = len(file_list) + ntiles = len(tile_list) + nlevels = len(levels_list) + ntimes = len(times_list) + + if ntiles_max != wet_tiles: + print( + "nctiles2aste WARNING: number of tiles in directory " + f"{ntiles_max} is different from number of wet tiles {wet_tiles}" + ) + + if ntiles > wet_tiles: + raise ValueError( + "nctiles2aste ERROR: number of requested tiles exceeds wet tile count " + f"({ntiles} > {wet_tiles})." + ) + + if max(tile_list) > wet_tiles: + raise ValueError( + "nctiles2aste ERROR: requested tile number exceeds wet tile count " + f"({max(tile_list)} > {wet_tiles})." + ) + + if nlevels > nz: + raise ValueError( + "nctiles2aste ERROR: number of requested levels exceeds available " + f"levels ({nlevels} > {nz})." + ) + + if max(levels_list) > nz: + raise ValueError( + "nctiles2aste ERROR: requested level exceeds available levels " + f"({max(levels_list)} > {nz})." + ) + + if max(times_list) > ntimes_max: + raise ValueError( + "nctiles2aste ERROR: requested time step exceeds available time steps " + f"({max(times_list)} > {ntimes_max})." + ) + + field_tiles: list[np.ndarray] = [] + indices: list[np.ndarray] = [] + time_step = np.full((ntimes,), np.nan) + + for itile, tile_num in enumerate(tile_list): + tile_indices = _get_tile_indices(tiles_index, tile_num) + indices.append(tile_indices) + + tile_fname = data_dir / f"{var_name}.{tile_num:04d}.nc" + if not tile_fname.is_file(): + raise FileNotFoundError(f"Tile file not found: {tile_fname}") + + tmp = np.full((dtilex, dtiley, nlevels, ntimes), np.nan) + + ds = xr.open_dataset(tile_fname) + try: + var = ds[var_name] + if itile == 0: + for itime, time_1based in enumerate(times_list): + time_step[itime] = ( + ds["timstep"].isel({ds["timstep"].dims[0]: time_1based - 1}).item() + ) + + if file_ndims == 4: + x_dim, y_dim, t_dim = var.dims + for itime, time_1based in enumerate(times_list): + slice2d = var.isel({t_dim: time_1based - 1}).values + tmp[:, :, 0, itime] = np.asarray(slice2d) + else: + x_dim, y_dim, z_dim, t_dim = var.dims + for itime, time_1based in enumerate(times_list): + for ilev, level_1based in enumerate(levels_list): + slice2d = var.isel( + {z_dim: level_1based - 1, t_dim: time_1based - 1} + ).values + tmp[:, :, ilev, itime] = np.asarray(slice2d) + + field_tiles.append(tmp) + finally: + ds.close() + + if flag_aste: + print( + "WARNING: putting fields into full ASTE compact format; " + "this can be memory intensive" + ) + field = np.full((nx, ny, nlevels, ntimes), np.nan) + for itime in range(ntimes): + time_tiles = [tile[:, :, :, itime] for tile in field_tiles] + field[:, :, :, itime] = tiles2field(time_tiles, indices, (nx, ny, nlevels)) + else: + field = field_tiles + + return field, time_step From c7b8d34ca53cd54fc87b8af813e57758e1c1ba08 Mon Sep 17 00:00:00 2001 From: Matthew Hoffman Date: Thu, 7 May 2026 17:06:11 -0600 Subject: [PATCH 2/7] Add CLI entrypoint and output of result to netcdf --- landice/mesh_tools_li/misc/nctiles2aste_v2.py | 207 +++++++++++++++++- 1 file changed, 205 insertions(+), 2 deletions(-) diff --git a/landice/mesh_tools_li/misc/nctiles2aste_v2.py b/landice/mesh_tools_li/misc/nctiles2aste_v2.py index 1069e13d1..52e497c5e 100644 --- a/landice/mesh_tools_li/misc/nctiles2aste_v2.py +++ b/landice/mesh_tools_li/misc/nctiles2aste_v2.py @@ -1,5 +1,7 @@ from __future__ import annotations +import argparse +import json from pathlib import Path from typing import Any, Iterable, Sequence @@ -273,12 +275,13 @@ def nctiles2aste_v2( ) if file_ndims == 4: - x_dim, y_dim, t_dim = var.dims + t_dim = var.dims[2] for itime, time_1based in enumerate(times_list): slice2d = var.isel({t_dim: time_1based - 1}).values tmp[:, :, 0, itime] = np.asarray(slice2d) else: - x_dim, y_dim, z_dim, t_dim = var.dims + z_dim = var.dims[2] + t_dim = var.dims[3] for itime, time_1based in enumerate(times_list): for ilev, level_1based in enumerate(levels_list): slice2d = var.isel( @@ -303,3 +306,203 @@ def nctiles2aste_v2( field = field_tiles return field, time_step + + +def _load_tiles_structure(tiles_file: str | Path, tiles_key: str = "tiles") -> Any: + """Load the tiles structure from .mat or .json.""" + tiles_path = Path(tiles_file) + if not tiles_path.is_file(): + raise FileNotFoundError(f"Tiles file not found: {tiles_path}") + + suffix = tiles_path.suffix.lower() + if suffix == ".mat": + try: + from scipy.io import loadmat + except ImportError as exc: + raise ImportError( + "Reading MATLAB .mat tiles files requires scipy. " + "Install it with: pip install scipy" + ) from exc + + data = loadmat(tiles_path, squeeze_me=True, struct_as_record=False) + if tiles_key not in data: + raise KeyError( + f"Key '{tiles_key}' not found in {tiles_path}. " + f"Available top-level keys: {sorted(k for k in data if not k.startswith('__'))}" + ) + return data[tiles_key] + + if suffix == ".json": + with tiles_path.open("r", encoding="utf-8") as handle: + payload = json.load(handle) + if tiles_key in payload: + return payload[tiles_key] + return payload + + raise ValueError("Unsupported tiles file type. Use .mat or .json") + + +def _field_to_data_array( + field: np.ndarray, + var_name: str, + time_step: np.ndarray, + has_vertical_dim: bool, +) -> xr.DataArray: + """Convert compact field output to an xarray DataArray.""" + arr = np.asarray(field) + if arr.ndim != 4: + raise ValueError( + f"Expected compact field with 4 dimensions (x, y, level, time), got {arr.ndim}." + ) + + nx, ny, nlevels, ntimes = arr.shape + if has_vertical_dim: + return xr.DataArray( + arr, + dims=("x", "y", "level", "time"), + coords={ + "x": np.arange(1, nx + 1), + "y": np.arange(1, ny + 1), + "level": np.arange(1, nlevels + 1), + "time": np.arange(1, ntimes + 1), + "timstep": ("time", np.asarray(time_step)), + }, + name=var_name, + ) + + arr2d = arr[:, :, 0, :] + return xr.DataArray( + arr2d, + dims=("x", "y", "time"), + coords={ + "x": np.arange(1, nx + 1), + "y": np.arange(1, ny + 1), + "time": np.arange(1, ntimes + 1), + "timstep": ("time", np.asarray(time_step)), + }, + name=var_name, + ) + + +def _var_has_vertical_dim(root_dir: str | Path, var_name: str) -> bool: + """Return True when the tile variable is (x, y, z, time).""" + root_path = Path(root_dir) + var_dir = root_path / var_name + files = sorted(var_dir.glob("*.nc")) + if not files: + raise FileNotFoundError(f"No NetCDF files found in: {var_dir}") + + ds = xr.open_dataset(files[0]) + try: + if var_name not in ds: + raise KeyError(f"Variable '{var_name}' not found in {files[0].name}") + return ds[var_name].ndim == 4 + finally: + ds.close() + + +def _build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description=( + "Read ASTE tile NetCDF files and write one selected variable as a compact " + "ASTE NetCDF file." + ) + ) + parser.add_argument( + "--root-dir", + required=True, + help="Directory that contains per-variable folders (e.g. .../SALT, .../THETA).", + ) + parser.add_argument( + "--tiles-file", + required=True, + help="Path to tiles metadata file (.mat or .json).", + ) + parser.add_argument( + "--tiles-key", + default="tiles", + help="Top-level variable/key name inside --tiles-file (default: tiles).", + ) + parser.add_argument("--var", required=True, help="Variable name to extract.") + parser.add_argument( + "--tile-list", + nargs="+", + type=int, + required=True, + help="1-based tile numbers to extract.", + ) + parser.add_argument( + "--levels-list", + nargs="+", + type=int, + required=True, + help="1-based vertical level numbers to extract.", + ) + parser.add_argument( + "--times-list", + nargs="+", + type=int, + required=True, + help="1-based time indices to extract.", + ) + parser.add_argument( + "--output", + required=True, + help="Output NetCDF filename.", + ) + parser.add_argument( + "--flag-aste", + action="store_true", + help="Assemble selected tiles into compact ASTE field before writing.", + ) + return parser + + +def main() -> None: + parser = _build_parser() + args = parser.parse_args() + + tiles = _load_tiles_structure(args.tiles_file, args.tiles_key) + + var_name = args.var + has_vertical_dim = _var_has_vertical_dim(args.root_dir, var_name) + levels_for_var = args.levels_list if has_vertical_dim else [1] + + field, timesteps = nctiles2aste_v2( + var_name=var_name, + root_dir=args.root_dir, + tile_list=args.tile_list, + levels_list=levels_for_var, + times_list=args.times_list, + tiles=tiles, + flag_aste=args.flag_aste, + ) + + if not args.flag_aste: + raise ValueError( + "CLI NetCDF writing currently requires --flag-aste so each variable " + "is represented on a compact (x, y, level, time) grid." + ) + + ds_out = xr.Dataset() + ds_out[var_name] = _field_to_data_array( + field, + var_name, + timesteps, + has_vertical_dim=has_vertical_dim, + ) + + ds_out.attrs["source"] = "Generated by nctiles2aste_v2.py" + ds_out.attrs["tile_list"] = ",".join(str(tile) for tile in args.tile_list) + ds_out.attrs["levels_list"] = ",".join(str(level) for level in args.levels_list) + ds_out.attrs["times_list"] = ",".join(str(time) for time in args.times_list) + ds_out.attrs["variable"] = var_name + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + ds_out.to_netcdf(output_path) + print(f"Wrote: {output_path}") + + +if __name__ == "__main__": + main() From eef114ad74233a8aec8f43115f24448ab2c6cc5c Mon Sep 17 00:00:00 2001 From: Matthew Hoffman Date: Thu, 7 May 2026 17:11:03 -0600 Subject: [PATCH 3/7] Use single flat dir for ASTE data --- landice/mesh_tools_li/misc/nctiles2aste_v2.py | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/landice/mesh_tools_li/misc/nctiles2aste_v2.py b/landice/mesh_tools_li/misc/nctiles2aste_v2.py index 52e497c5e..d43acb8e0 100644 --- a/landice/mesh_tools_li/misc/nctiles2aste_v2.py +++ b/landice/mesh_tools_li/misc/nctiles2aste_v2.py @@ -162,13 +162,15 @@ def nctiles2aste_v2( 1-based indices to match MATLAB behavior. """ root_dir = Path(root_dir) - data_dir = root_dir / var_name + data_dir = root_dir if not data_dir.is_dir(): raise FileNotFoundError(f"Data directory not found: {data_dir}") - file_list = sorted(data_dir.glob("*.nc")) + file_list = sorted(data_dir.glob(f"{var_name}.*.nc")) if not file_list: - raise FileNotFoundError(f"No NetCDF files found in: {data_dir}") + raise FileNotFoundError( + f"No NetCDF files found for variable '{var_name}' in: {data_dir}" + ) tile_list = [int(tile) for tile in tile_list] levels_list = [int(level) for level in levels_list] @@ -387,10 +389,11 @@ def _field_to_data_array( def _var_has_vertical_dim(root_dir: str | Path, var_name: str) -> bool: """Return True when the tile variable is (x, y, z, time).""" root_path = Path(root_dir) - var_dir = root_path / var_name - files = sorted(var_dir.glob("*.nc")) + files = sorted(root_path.glob(f"{var_name}.*.nc")) if not files: - raise FileNotFoundError(f"No NetCDF files found in: {var_dir}") + raise FileNotFoundError( + f"No NetCDF files found for variable '{var_name}' in: {root_path}" + ) ds = xr.open_dataset(files[0]) try: @@ -411,7 +414,10 @@ def _build_parser() -> argparse.ArgumentParser: parser.add_argument( "--root-dir", required=True, - help="Directory that contains per-variable folders (e.g. .../SALT, .../THETA).", + help=( + "Flat directory containing ASTE tile files such as " + "SALT.0005.nc, THETA.0005.nc, SIarea.0005.nc." + ), ) parser.add_argument( "--tiles-file", From a051ed339daeb66ebdecbe17b54f3200ff83b01e Mon Sep 17 00:00:00 2001 From: Matthew Hoffman Date: Thu, 7 May 2026 17:14:13 -0600 Subject: [PATCH 4/7] make time and level lists optional --- landice/mesh_tools_li/misc/nctiles2aste_v2.py | 40 ++++++++++++------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/landice/mesh_tools_li/misc/nctiles2aste_v2.py b/landice/mesh_tools_li/misc/nctiles2aste_v2.py index d43acb8e0..84a1d78f6 100644 --- a/landice/mesh_tools_li/misc/nctiles2aste_v2.py +++ b/landice/mesh_tools_li/misc/nctiles2aste_v2.py @@ -386,8 +386,8 @@ def _field_to_data_array( ) -def _var_has_vertical_dim(root_dir: str | Path, var_name: str) -> bool: - """Return True when the tile variable is (x, y, z, time).""" +def _get_var_grid_info(root_dir: str | Path, var_name: str) -> tuple[bool, int, int]: + """Return (has_vertical_dim, nz, ntimes) for a variable in flat tile files.""" root_path = Path(root_dir) files = sorted(root_path.glob(f"{var_name}.*.nc")) if not files: @@ -399,7 +399,15 @@ def _var_has_vertical_dim(root_dir: str | Path, var_name: str) -> bool: try: if var_name not in ds: raise KeyError(f"Variable '{var_name}' not found in {files[0].name}") - return ds[var_name].ndim == 4 + var = ds[var_name] + if var.ndim == 4: + return True, int(var.shape[2]), int(var.shape[3]) + if var.ndim == 3: + return False, 1, int(var.shape[2]) + raise ValueError( + f"Unsupported variable rank for '{var_name}': {var.ndim}. " + "Expected 3D (x, y, time) or 4D (x, y, z, time)." + ) finally: ds.close() @@ -439,17 +447,15 @@ def _build_parser() -> argparse.ArgumentParser: ) parser.add_argument( "--levels-list", - nargs="+", + nargs="*", type=int, - required=True, - help="1-based vertical level numbers to extract.", + help="1-based vertical level numbers to extract (default: all available levels).", ) parser.add_argument( "--times-list", - nargs="+", + nargs="*", type=int, - required=True, - help="1-based time indices to extract.", + help="1-based time indices to extract (default: all available times).", ) parser.add_argument( "--output", @@ -471,15 +477,21 @@ def main() -> None: tiles = _load_tiles_structure(args.tiles_file, args.tiles_key) var_name = args.var - has_vertical_dim = _var_has_vertical_dim(args.root_dir, var_name) - levels_for_var = args.levels_list if has_vertical_dim else [1] + has_vertical_dim, nz, ntimes_max = _get_var_grid_info(args.root_dir, var_name) + + if has_vertical_dim: + levels_for_var = args.levels_list if args.levels_list else list(range(1, nz + 1)) + else: + levels_for_var = [1] + + times_for_var = args.times_list if args.times_list else list(range(1, ntimes_max + 1)) field, timesteps = nctiles2aste_v2( var_name=var_name, root_dir=args.root_dir, tile_list=args.tile_list, levels_list=levels_for_var, - times_list=args.times_list, + times_list=times_for_var, tiles=tiles, flag_aste=args.flag_aste, ) @@ -500,8 +512,8 @@ def main() -> None: ds_out.attrs["source"] = "Generated by nctiles2aste_v2.py" ds_out.attrs["tile_list"] = ",".join(str(tile) for tile in args.tile_list) - ds_out.attrs["levels_list"] = ",".join(str(level) for level in args.levels_list) - ds_out.attrs["times_list"] = ",".join(str(time) for time in args.times_list) + ds_out.attrs["levels_list"] = ",".join(str(level) for level in levels_for_var) + ds_out.attrs["times_list"] = ",".join(str(time) for time in times_for_var) ds_out.attrs["variable"] = var_name output_path = Path(args.output) From 85d4fe743c2c56a393208f1c50963b24a18dd18d Mon Sep 17 00:00:00 2001 From: Matthew Hoffman Date: Thu, 7 May 2026 17:18:37 -0600 Subject: [PATCH 5/7] dimension issues being fixed --- landice/mesh_tools_li/misc/nctiles2aste_v2.py | 136 +++++++++++++----- 1 file changed, 101 insertions(+), 35 deletions(-) diff --git a/landice/mesh_tools_li/misc/nctiles2aste_v2.py b/landice/mesh_tools_li/misc/nctiles2aste_v2.py index 84a1d78f6..bfc2a4df9 100644 --- a/landice/mesh_tools_li/misc/nctiles2aste_v2.py +++ b/landice/mesh_tools_li/misc/nctiles2aste_v2.py @@ -144,6 +144,78 @@ def tiles2field( return field +def _infer_var_layout( + var: xr.DataArray, + timstep_len: int, + dtilex: int | None = None, + dtiley: int | None = None, +) -> dict[str, Any]: + """Infer dimension roles for ASTE tile variables. + + Supported ranks are 3D (2D field in time) and 4D (3D field in time). + """ + if var.ndim not in (3, 4): + raise ValueError( + f"Unsupported variable rank for '{var.name}': {var.ndim}. " + "Expected 3D or 4D." + ) + + dims = list(var.dims) + shape = list(var.shape) + + if dtilex is not None and dtiley is not None: + if shape[-2] == dtilex and shape[-1] == dtiley: + x_idx, y_idx = var.ndim - 2, var.ndim - 1 + elif shape[0] == dtilex and shape[1] == dtiley: + x_idx, y_idx = 0, 1 + else: + raise ValueError( + "nctiles2aste ERROR: could not find tile dimensions in variable " + f"'{var.name}' with shape {tuple(shape)} for tiles.tsize " + f"{dtilex}x{dtiley}." + ) + else: + x_idx, y_idx = var.ndim - 2, var.ndim - 1 + + time_candidates = [idx for idx, size in enumerate(shape) if size == timstep_len] + spatial_idxs = {x_idx, y_idx} + time_idx = next((idx for idx in time_candidates if idx not in spatial_idxs), None) + if time_idx is None: + time_idx = next((idx for idx in range(var.ndim) if idx not in spatial_idxs), None) + + if time_idx is None: + raise ValueError( + f"Could not infer time dimension for '{var.name}' with shape {tuple(shape)}." + ) + + if var.ndim == 4: + z_candidates = [ + idx for idx in range(var.ndim) if idx not in spatial_idxs and idx != time_idx + ] + if len(z_candidates) != 1: + raise ValueError( + f"Could not infer vertical dimension for '{var.name}' with shape {tuple(shape)}." + ) + z_idx = z_candidates[0] + z_dim = dims[z_idx] + nz = int(shape[z_idx]) + has_vertical_dim = True + else: + z_dim = None + nz = 1 + has_vertical_dim = False + + return { + "has_vertical_dim": has_vertical_dim, + "x_dim": dims[x_idx], + "y_dim": dims[y_idx], + "time_dim": dims[time_idx], + "z_dim": z_dim, + "nz": nz, + "ntimes": int(shape[time_idx]), + } + + def nctiles2aste_v2( var_name: str, root_dir: str | Path, @@ -189,26 +261,11 @@ def nctiles2aste_v2( raise KeyError(f"Variable 'timstep' not found in {file_list[0].name}") sample_var = sample_ds[var_name] - if sample_var.ndim == 3: - nz = 1 - ntimes_max = int(sample_var.shape[2]) - file_ndims = 4 - elif sample_var.ndim == 4: - nz = int(sample_var.shape[2]) - ntimes_max = int(sample_var.shape[3]) - file_ndims = 5 - else: - raise ValueError( - f"Unsupported variable rank for '{var_name}': {sample_var.ndim}. " - "Expected 3D (x, y, time) or 4D (x, y, z, time)." - ) - - if dtilex != int(sample_var.shape[0]) or dtiley != int(sample_var.shape[1]): - raise ValueError( - "nctiles2aste ERROR: Dimensions from tiles.tsize " - f"{dtilex}x{dtiley} do not match NetCDF variable dimensions " - f"{sample_var.shape[0]}x{sample_var.shape[1]}." - ) + timstep_len = int(sample_ds["timstep"].size) + layout = _infer_var_layout(sample_var, timstep_len, dtilex=dtilex, dtiley=dtiley) + nz = int(layout["nz"]) + ntimes_max = int(layout["ntimes"]) + has_vertical_dim = bool(layout["has_vertical_dim"]) finally: sample_ds.close() @@ -276,19 +333,33 @@ def nctiles2aste_v2( ds["timstep"].isel({ds["timstep"].dims[0]: time_1based - 1}).item() ) - if file_ndims == 4: - t_dim = var.dims[2] + if not has_vertical_dim: + t_dim = layout["time_dim"] + x_dim = layout["x_dim"] + y_dim = layout["y_dim"] for itime, time_1based in enumerate(times_list): - slice2d = var.isel({t_dim: time_1based - 1}).values + slice2d = var.isel({t_dim: time_1based - 1}).transpose(x_dim, y_dim).values + if slice2d.shape != (dtilex, dtiley): + raise ValueError( + f"Unexpected 2D slice shape {slice2d.shape} for '{var_name}', " + f"expected {(dtilex, dtiley)}." + ) tmp[:, :, 0, itime] = np.asarray(slice2d) else: - z_dim = var.dims[2] - t_dim = var.dims[3] + z_dim = layout["z_dim"] + t_dim = layout["time_dim"] + x_dim = layout["x_dim"] + y_dim = layout["y_dim"] for itime, time_1based in enumerate(times_list): for ilev, level_1based in enumerate(levels_list): slice2d = var.isel( {z_dim: level_1based - 1, t_dim: time_1based - 1} - ).values + ).transpose(x_dim, y_dim).values + if slice2d.shape != (dtilex, dtiley): + raise ValueError( + f"Unexpected 2D slice shape {slice2d.shape} for '{var_name}', " + f"expected {(dtilex, dtiley)}." + ) tmp[:, :, ilev, itime] = np.asarray(slice2d) field_tiles.append(tmp) @@ -399,15 +470,10 @@ def _get_var_grid_info(root_dir: str | Path, var_name: str) -> tuple[bool, int, try: if var_name not in ds: raise KeyError(f"Variable '{var_name}' not found in {files[0].name}") - var = ds[var_name] - if var.ndim == 4: - return True, int(var.shape[2]), int(var.shape[3]) - if var.ndim == 3: - return False, 1, int(var.shape[2]) - raise ValueError( - f"Unsupported variable rank for '{var_name}': {var.ndim}. " - "Expected 3D (x, y, time) or 4D (x, y, z, time)." - ) + if "timstep" not in ds: + raise KeyError(f"Variable 'timstep' not found in {files[0].name}") + layout = _infer_var_layout(ds[var_name], int(ds["timstep"].size)) + return bool(layout["has_vertical_dim"]), int(layout["nz"]), int(layout["ntimes"]) finally: ds.close() From 2060f1496ccb980dfb85f74d1c951f2ea48786b3 Mon Sep 17 00:00:00 2001 From: Matthew Hoffman Date: Thu, 7 May 2026 19:00:21 -0700 Subject: [PATCH 6/7] reorder dims --- landice/mesh_tools_li/misc/nctiles2aste_v2.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/landice/mesh_tools_li/misc/nctiles2aste_v2.py b/landice/mesh_tools_li/misc/nctiles2aste_v2.py index bfc2a4df9..278d2cec1 100644 --- a/landice/mesh_tools_li/misc/nctiles2aste_v2.py +++ b/landice/mesh_tools_li/misc/nctiles2aste_v2.py @@ -430,27 +430,31 @@ def _field_to_data_array( nx, ny, nlevels, ntimes = arr.shape if has_vertical_dim: + # Keep internal assembly order (x, y, level, time), but write NetCDF + # variable dimensions as time-first to match ASTE tile input ordering. + arr_out = np.transpose(arr, (3, 2, 0, 1)) return xr.DataArray( - arr, - dims=("x", "y", "level", "time"), + arr_out, + dims=("time", "level", "x", "y"), coords={ + "time": np.arange(1, ntimes + 1), + "level": np.arange(1, nlevels + 1), "x": np.arange(1, nx + 1), "y": np.arange(1, ny + 1), - "level": np.arange(1, nlevels + 1), - "time": np.arange(1, ntimes + 1), "timstep": ("time", np.asarray(time_step)), }, name=var_name, ) arr2d = arr[:, :, 0, :] + arr2d_out = np.transpose(arr2d, (2, 0, 1)) return xr.DataArray( - arr2d, - dims=("x", "y", "time"), + arr2d_out, + dims=("time", "x", "y"), coords={ + "time": np.arange(1, ntimes + 1), "x": np.arange(1, nx + 1), "y": np.arange(1, ny + 1), - "time": np.arange(1, ntimes + 1), "timstep": ("time", np.asarray(time_step)), }, name=var_name, From 697197e3ca27ea867c2a9e647faa77843e18b4a2 Mon Sep 17 00:00:00 2001 From: Matthew Hoffman Date: Thu, 7 May 2026 21:16:55 -0700 Subject: [PATCH 7/7] Refactor seam logic to support all Greenland tiles --- .../mesh_tools_li/interpolate_ecco_to_mali.py | 597 +++++++++--------- 1 file changed, 291 insertions(+), 306 deletions(-) diff --git a/landice/mesh_tools_li/interpolate_ecco_to_mali.py b/landice/mesh_tools_li/interpolate_ecco_to_mali.py index a35b28451..6bcba3d9f 100755 --- a/landice/mesh_tools_li/interpolate_ecco_to_mali.py +++ b/landice/mesh_tools_li/interpolate_ecco_to_mali.py @@ -9,11 +9,9 @@ option. ECCO grid files are unconventional for MITgcm simulations and do not contain -adequate grid cell corner information for tile edge cells (necessary for -creating scrip file and remapping). Therefore, this information needed to be -hardcoded into interpolate_ecco_to_mali.py, using grid corner information from -adjacent MITgcm tiles. Currently supported tiles are 14 and 27, which cover the -western and northern coasts of Greenland. +adequate grid cell corner information for all tile edge cells (necessary for +creating scrip file and remapping). This script includes seam-aware stitching +for the Greenland ASTE subset (tiles 5, 11, 12, 14, 15, 27). ''' import os @@ -24,15 +22,18 @@ import pandas as pd from mpas_tools.logging import check_call from mpas_tools.scrip.from_mpas import scrip_from_mpas -from mpas_tools.ocean.depth import compute_zmid from argparse import ArgumentParser import time import cftime -import glob import json -from shapely.geometry import shape -from shapely.prepared import prep -from shapely.geometry import Point +try: + from shapely.geometry import shape + from shapely.prepared import prep + from shapely.geometry import Point +except ImportError: + shape = None + prep = None + Point = None class eccoToMaliInterp: @@ -57,150 +58,217 @@ def __init__(self): self.options = args self.mapping_file_name = f'ecco_to_mali_mapping_{self.options.method}.nc' - self.tileNums = [14, 27] # Hardcoding ECCO tiles for now. Indexing is dependent on tile order, do not change + self.supportedTiles = {5, 11, 12, 14, 15, 27} + self.tileNums = sorted(self.supportedTiles) + # Hardcoded seams for Greenland ASTE subset: tile_edge -> neighbor_edge. + self.greenlandSeams = { + 5: {'right': (14, 'bottom', True)}, + 11: {'right': (14, 'left', False), 'top': (12, 'bottom', False)}, + 12: {'right': (15, 'left', False)}, + 14: {'right': (27, 'bottom', True), 'top': (15, 'bottom', False)}, + 27: {'right': (5, 'bottom', True)} + } + self._tileGeom = None + + @staticmethod + def _edge_vector(xg, yg, edge_name): + if edge_name == 'left': + return xg[0, :].copy(), yg[0, :].copy() + if edge_name == 'right': + return xg[-1, :].copy(), yg[-1, :].copy() + if edge_name == 'bottom': + return xg[:, 0].copy(), yg[:, 0].copy() + if edge_name == 'top': + return xg[:, -1].copy(), yg[:, -1].copy() + raise ValueError(f"Unknown edge name: {edge_name}") + + def _build_greenland_tile_geometry(self): + if self._tileGeom is not None: + return self._tileGeom + + grids = {} + for tile in self.tileNums: + padded_tile = str(tile).zfill(4) + grid_file = os.path.join(self.options.eccoDir, f'GRID.{padded_tile}.nc') + if not os.path.exists(grid_file): + raise ValueError(f"GRID netcdf file for tile {tile} not found in {self.options.eccoDir}") + with xr.open_dataset(grid_file) as grd: + grids[tile] = { + 'XC': grd['XC'][:].values, + 'YC': grd['YC'][:].values, + 'XG': grd['XG'][:].values, + 'YG': grd['YG'][:].values + } + + tile_geom = {} + for tile in self.tileNums: + xc = grids[tile]['XC'] + yc = grids[tile]['YC'] + xg = grids[tile]['XG'] + yg = grids[tile]['YG'] + nx, ny = xc.shape + + right_vec_lon = None + right_vec_lat = None + top_vec_lon = None + top_vec_lat = None + + tile_seams = self.greenlandSeams.get(tile, {}) + + if 'right' in tile_seams: + neighbor_tile, neighbor_edge, reverse_order = tile_seams['right'] + if neighbor_tile in grids: + rv_lon, rv_lat = self._edge_vector(grids[neighbor_tile]['XG'], grids[neighbor_tile]['YG'], neighbor_edge) + if reverse_order: + rv_lon = rv_lon[::-1] + rv_lat = rv_lat[::-1] + right_vec_lon, right_vec_lat = rv_lon, rv_lat + + if 'top' in tile_seams: + neighbor_tile, neighbor_edge, reverse_order = tile_seams['top'] + if neighbor_tile in grids: + tv_lon, tv_lat = self._edge_vector(grids[neighbor_tile]['XG'], grids[neighbor_tile]['YG'], neighbor_edge) + if reverse_order: + tv_lon = tv_lon[::-1] + tv_lat = tv_lat[::-1] + top_vec_lon, top_vec_lat = tv_lon, tv_lat + + i_cells = [] + j_cells = [] + lon_cells = [] + lat_cells = [] + c1_lon = [] + c2_lon = [] + c3_lon = [] + c4_lon = [] + c1_lat = [] + c2_lat = [] + c3_lat = [] + c4_lat = [] + + for i in range(nx): + for j in range(ny): + se_lon = xg[i, j] + se_lat = yg[i, j] + + if i + 1 < nx: + sw_lon = xg[i + 1, j] + sw_lat = yg[i + 1, j] + elif right_vec_lon is not None: + sw_lon = right_vec_lon[j] + sw_lat = right_vec_lat[j] + else: + continue + + if j + 1 < ny: + ne_lon = xg[i, j + 1] + ne_lat = yg[i, j + 1] + elif top_vec_lon is not None: + ne_lon = top_vec_lon[i] + ne_lat = top_vec_lat[i] + else: + continue + + if i + 1 < nx and j + 1 < ny: + nw_lon = xg[i + 1, j + 1] + nw_lat = yg[i + 1, j + 1] + elif i + 1 == nx and j + 1 < ny and right_vec_lon is not None: + nw_lon = right_vec_lon[j + 1] + nw_lat = right_vec_lat[j + 1] + elif i + 1 < nx and j + 1 == ny and top_vec_lon is not None: + nw_lon = top_vec_lon[i + 1] + nw_lat = top_vec_lat[i + 1] + else: + # Corner cell where both +i and +j need cross-face corner. + continue + + i_cells.append(i) + j_cells.append(j) + lon_cells.append(xc[i, j]) + lat_cells.append(yc[i, j]) + c1_lon.append(se_lon) + c2_lon.append(sw_lon) + c3_lon.append(nw_lon) + c4_lon.append(ne_lon) + c1_lat.append(se_lat) + c2_lat.append(sw_lat) + c3_lat.append(nw_lat) + c4_lat.append(ne_lat) + + tile_geom[tile] = { + 'i': np.array(i_cells, dtype=np.int32), + 'j': np.array(j_cells, dtype=np.int32), + 'center_lon': np.array(lon_cells, dtype=np.float64), + 'center_lat': np.array(lat_cells, dtype=np.float64), + 'corner_lon': np.stack((c1_lon, c2_lon, c3_lon, c4_lon), axis=1).astype(np.float64), + 'corner_lat': np.stack((c1_lat, c2_lat, c3_lat, c4_lat), axis=1).astype(np.float64) + } + + self._tileGeom = tile_geom + return tile_geom def merge_MITgcm_files_to_unstruct_grid(self): - ncfiles = glob.glob(os.path.join(self.options.eccoDir, "*.nc")) - saltBool = 0 - thetaBool = 0 - siaBool = 0 - - #loop through MITgcm tile files and create unstructured arrays for each remapped variable + tile_geom = self._build_greenland_tile_geometry() + + sal_tiles = [] + theta_tiles = [] + sia_tiles = [] + mask_tiles = [] + z = None + time_vals = None + + # Loop through selected MITgcm tile files and create unstructured arrays. for tile in self.tileNums: - print(f"Processing Tile {tile}") - o3dmBool = 0 - gridBool = 0 - for file in ncfiles: - print(f"Processing File {file}") - paddedTile = str(tile).zfill(4) - print(f"Padded Tile: {paddedTile}") - if paddedTile in file: - print("Padded tile is in file") - - if 'SALT' in file: - validFile = self.options.eccoDir + '/SALT.' + paddedTile + '.nc' - ds_ecco = xr.open_dataset(validFile) - - # only open depth variable once, regardless of which variables we are interested in - if saltBool == 0 and thetaBool == 0: - z = ds_ecco['dep'].values - - if tile == 14: - sal = ds_ecco['SALT'][:].values - # omit rows/columns where no corner information in scrip file - sal = sal[:,:,:,1:-1] - nt,nz,nx,ny = np.shape(sal) - sal = sal.reshape(nt, nz, nx * ny) - elif tile == 27: - sl = ds_ecco['SALT'][:].values - # omit rows/columns where no corner information in scrip file - sl = sl[:,:,0:-1,0:-1] - nt,nz,nx,ny = np.shape(sl) - sl = sl.reshape(nt, nz, nx * ny) - sal = np.concatenate((sal,sl), axis=2) - - # ECCO 'land' variable is equivalent to MALI's orig3dOceanMask, despite the counterintuitive name. - # Only need to call this once per tile, but needs to be using a theta/salinity file because it includes depth - # No need to call this if only interpolating SIarea - if tile == 14 and o3dmBool == 0: - orig3dOceanMask = ds_ecco['land'].values - # omit rows/columns where no corner information in scrip file - orig3dOceanMask = orig3dOceanMask[:,:,1:-1] - nz,nx,ny = np.shape(orig3dOceanMask) - orig3dOceanMask = orig3dOceanMask.reshape(nz, nx * ny) - od3mBool = 1 - elif tile == 27 and o3dmBool == 0: - o3dm = ds_ecco['land'].values - # omit rows/columns where no corner information in scrip file - o3dm = o3dm[:,0:-1,0:-1] - nz,nx,ny = np.shape(o3dm) - o3dm = o3dm.reshape(nz, nx * ny) - orig3dOceanMask = np.concatenate((orig3dOceanMask, o3dm), axis=1) - o3dmBool = 1 - - # set MALI invalid value - boolZ,boolXY = np.where(orig3dOceanMask == 0) - for iZ,iXY in zip(boolZ,boolXY): - sal[:,iZ,iXY] = 1e36 - - saltBool = 1 - - # Repeat for potential temperature if needed - elif 'THETA' in file: - validFile = self.options.eccoDir + '/THETA.' + paddedTile + '.nc' - ds_ecco = xr.open_dataset(validFile) - - if saltBool == 0 and thetaBool == 0: - z = ds_ecco['dep'].values - - if tile == 14: - theta = ds_ecco['THETA'][:].values - # omit rows/columns where no corner information in scrip file - theta = theta[:,:,:,1:-1] - nt,nz,nx,ny = np.shape(theta) - theta = theta.reshape(nt, nz, nx * ny) - elif tile == 27: - th = ds_ecco['THETA'][:].values - # omit rows/columns where no corner information in scrip file - th = th[:,:,0:-1,0:-1] - nt,nz,nx,ny = np.shape(th) - th = th.reshape(nt, nz, nx * ny) - theta = np.concatenate((theta,th), axis=2) - - # ECCO 'land' variable is equivalent to MALI's orig3dOceanMask, despite the counterintuitive name. - # Only need to call this once per tile, but needs to be using a theta/salinity file because it includes depth - # No need to call this if only interpolating SIarea - if tile == 14 and o3dmBool == 0: - orig3dOceanMask = ds_ecco['land'].values - # omit rows/columns where no corner information in scrip file - orig3dOceanMask = orig3dOceanMask[:,:,1:-1] - nz,nx,ny = np.shape(orig3dOceanMask) - orig3dOceanMask = orig3dOceanMask.reshape(nz, nx * ny) - od3mBool = 1 - elif tile == 27 and o3dmBool == 0: - o3dm = ds_ecco['land'].values - # omit rows/columns where no corner information in scrip file - o3dm = o3dm[:,0:-1,0:-1] - nz,nx,ny = np.shape(o3dm) - o3dm = o3dm.reshape(nz, nx * ny) - orig3dOceanMask = np.concatenate((orig3dOceanMask, o3dm), axis=1) - o3dmBool = 1 - - # set MALI invalid value - boolZ,boolXY = np.where(orig3dOceanMask == 0) - for iZ,iXY in zip(boolZ,boolXY): - theta[:,iZ,iXY] = 1e36 - - thetaBool = 1 - - # Repeat for sea ice fraction if needed - elif 'SIarea' in file and self.options.sia: - siaBool = 1 - validFile = self.options.eccoDir + '/SIarea.' + paddedTile + '.nc' - ds_ecco = xr.open_dataset(validFile) - if tile == 14: - sia = ds_ecco['SIarea'][:].values - # omit rows/columns where no corner information in scrip file - sia = sia[:,:,1:-1] - nt,nx,ny = np.shape(sia) - sia = sia.reshape(nt, nx * ny) - elif tile == 27: - si = ds_ecco['SIarea'][:].values - # omit rows/columns where no corner information in scrip file - si = si[:,0:-1,0:-1] - nt,nx,ny = np.shape(si) - si = si.reshape(nt, nx * ny) - sia = np.concatenate((sia,si), axis=1) - - if (saltBool == 0): - raise ValueError(f"SALT netcdf file for tile {tile} not found in {self.options.eccoDir}") + paddedTile = str(tile).zfill(4) + print(f"Processing tile {paddedTile}") + + salt_file = os.path.join(self.options.eccoDir, f"SALT.{paddedTile}.nc") + theta_file = os.path.join(self.options.eccoDir, f"THETA.{paddedTile}.nc") + sia_file = os.path.join(self.options.eccoDir, f"SIarea.{paddedTile}.nc") - if (thetaBool == 0): + if not os.path.exists(salt_file): + raise ValueError(f"SALT netcdf file for tile {tile} not found in {self.options.eccoDir}") + if not os.path.exists(theta_file): raise ValueError(f"THETA netcdf file for tile {tile} not found in {self.options.eccoDir}") - - if (self.options.sia and siaBool == 0): + if self.options.sia and not os.path.exists(sia_file): raise ValueError(f"SIarea netcdf file for tile {tile} not found in {self.options.eccoDir}") + with xr.open_dataset(salt_file) as ds_salt: + if z is None: + z = ds_salt['dep'].values + if time_vals is None: + time_vals = ds_salt['tim'].values + i_idx = tile_geom[tile]['i'] + j_idx = tile_geom[tile]['j'] + sal_tile = ds_salt['SALT'][:].values + nt, nz, nx, ny = np.shape(sal_tile) + sal_tiles.append(sal_tile[:, :, i_idx, j_idx]) + + # ECCO 'land' is equivalent to MALI's orig3dOceanMask. + mask_tile = ds_salt['land'][:].values + mask_tiles.append(mask_tile[:, i_idx, j_idx]) + + with xr.open_dataset(theta_file) as ds_theta: + theta_tile = ds_theta['THETA'][:].values + theta_tiles.append(theta_tile[:, :, i_idx, j_idx]) + + if self.options.sia: + with xr.open_dataset(sia_file) as ds_sia_in: + sia_tile = ds_sia_in['SIarea'][:].values + sia_tiles.append(sia_tile[:, i_idx, j_idx]) + + sal = np.concatenate(sal_tiles, axis=2) + theta = np.concatenate(theta_tiles, axis=2) + orig3dOceanMask = np.concatenate(mask_tiles, axis=1) + nt = sal.shape[0] + if self.options.sia: + sia = np.concatenate(sia_tiles, axis=1) + + # Set MALI invalid value over non-ocean cells. + ocean_mask = orig3dOceanMask[np.newaxis, :, :] > 0 + sal = np.where(ocean_mask, sal, 1e36) + theta = np.where(ocean_mask, theta, 1e36) + # combine into new netcdf. Need to load depth and time from original ecco files. Assuming time/depth is consistent # across all files, just load info from most recent @@ -211,7 +279,7 @@ def merge_MITgcm_files_to_unstruct_grid(self): # Save ECCO time variable as datetime string. Will be converted into xtime format when saving final output file # Subtract one month from each time step. Ecco to output every month as an average of the previous month. We # want to force MALI with ECCO data from the same month (avoid 1 month lag) - time = ds_ecco['tim'].values + time = time_vals self.xtime_str = [(pd.to_datetime(dt) - pd.DateOffset(months=1)).strftime("%Y-%m-%d_%H:%M:%S").ljust(64) for dt in time] # save unstructured variables with MALI/MPAS-O names @@ -375,6 +443,8 @@ def remap_ecco_to_mali(self): # Use geojson to add any additional cells to icebergFjordMask if self.options.geojson is not None: + if shape is None or prep is None or Point is None: + raise ImportError("shapely is required when using --geojson") with open(self.options.geojson) as f: geo = json.load(f) polygon = prep(shape(geo["features"][0]["geometry"])) @@ -477,165 +547,80 @@ def remap_ecco_to_mali(self): def create_ECCO_scrip_file(self): print("Creating ECCO scrip file") + tile_geom = self._build_greenland_tile_geometry() + + lon_parts = [] + lat_parts = [] + lon_corner1_parts = [] + lon_corner2_parts = [] + lon_corner3_parts = [] + lon_corner4_parts = [] + lat_corner1_parts = [] + lat_corner2_parts = [] + lat_corner3_parts = [] + lat_corner4_parts = [] - for tile in self.tileNums : - # start processing with grid files - paddedTile = str(tile).zfill(4) - gridFile = self.options.eccoDir + '/GRID.' + paddedTile + '.nc' - grd = xr.open_dataset(gridFile) - - # identify cell centers and corners - # lat/lon centers - XC = grd['XC'][:].values - YC = grd['YC'][:].values - - # lat/lon corners - XG = grd['XG'][:].values - YG = grd['YG'][:].values - - Xse_corner = np.zeros((XC.shape)) - Xsw_corner = np.zeros((XC.shape)) - Xnw_corner = np.zeros((XC.shape)) - Xne_corner = np.zeros((XC.shape)) - - Yse_corner = np.zeros((YC.shape)) - Ysw_corner = np.zeros((YC.shape)) - Ynw_corner = np.zeros((YC.shape)) - Yne_corner = np.zeros((YC.shape)) - - nx,ny = XC.shape - for i in np.arange(0,nx-1): - for ii in np.arange(0,ny-1): - Xse_corner[i,ii] = XG[i,ii] - Xsw_corner[i,ii] = XG[i+1,ii] - Xnw_corner[i,ii] = XG[i+1,ii+1] - Xne_corner[i,ii] = XG[i,ii+1] - - Yse_corner[i,ii] = YG[i,ii] - Ysw_corner[i,ii] = YG[i+1,ii] - Ynw_corner[i,ii] = YG[i+1,ii+1] - Yne_corner[i,ii] = YG[i,ii+1] - - if tile == 14: - # Stitch together corner info from neighboring tile - gridFile = self.options.eccoDir + '/GRID.0027.nc' - grd27 = xr.open_dataset(gridFile) - xg27 = grd27['XG'][:].values - yg27 = grd27['YG'][:].values - xg = np.flip(xg27[1:,0]) - yg = np.flip(yg27[1:,0]) - - for i in np.arange(0,ny-1): - Xse_corner[nx-1,i] = XG[nx-1,i] - Xne_corner[nx-1,i] = XG[nx-1,i+1] - Xnw_corner[nx-1,i] = xg[i] - Xsw_corner[nx-1,i] = xg[i-1] - - Yse_corner[nx-1,i] = YG[nx-1,i] - Yne_corner[nx-1,i] = YG[nx-1,i+1] - Ynw_corner[nx-1,i] = yg[i] - Ysw_corner[nx-1,i] = yg[i-1] - - # Remove edge cells without corner information - XC = XC[:,1:-1] - YC = YC[:,1:-1] - Xse_corner = Xse_corner[:,1:-1] - Xne_corner = Xne_corner[:,1:-1] - Xnw_corner = Xnw_corner[:,1:-1] - Xsw_corner = Xsw_corner[:,1:-1] - Yse_corner = Yse_corner[:,1:-1] - Yne_corner = Yne_corner[:,1:-1] - Ynw_corner = Ynw_corner[:,1:-1] - Ysw_corner = Ysw_corner[:,1:-1] - - lon = XC.flatten() - lat = YC.flatten() - lon_corner1 = Xse_corner.flatten() - lon_corner2 = Xsw_corner.flatten() - lon_corner3 = Xnw_corner.flatten() - lon_corner4 = Xne_corner.flatten() - lat_corner1 = Yse_corner.flatten() - lat_corner2 = Ysw_corner.flatten() - lat_corner3 = Ynw_corner.flatten() - lat_corner4 = Yne_corner.flatten() - - elif tile == 27 : - # Remove edge cells without corner information - XC = XC[0:-1,0:-1] - YC = YC[0:-1,0:-1] - Xse_corner = Xse_corner[0:-1,0:-1] - Xne_corner = Xne_corner[0:-1,0:-1] - Xnw_corner = Xnw_corner[0:-1,0:-1] - Xsw_corner = Xsw_corner[0:-1,0:-1] - Yse_corner = Yse_corner[0:-1,0:-1] - Yne_corner = Yne_corner[0:-1,0:-1] - Ynw_corner = Ynw_corner[0:-1,0:-1] - Ysw_corner = Ysw_corner[0:-1,0:-1] - - ln = XC.flatten() - lon = np.concatenate((lon, ln)) - lt = YC.flatten() - lat = np.concatenate((lat,lt)) - lnc1 = Xse_corner.flatten() - lon_corner1 = np.concatenate((lon_corner1,lnc1)) - lnc2 = Xsw_corner.flatten() - lon_corner2 = np.concatenate((lon_corner2,lnc2)) - lnc3 = Xnw_corner.flatten() - lon_corner3 = np.concatenate((lon_corner3,lnc3)) - lnc4 = Xne_corner.flatten() - lon_corner4 = np.concatenate((lon_corner4,lnc4)) - - ltc1 = Yse_corner.flatten() - lat_corner1 = np.concatenate((lat_corner1,ltc1)) - ltc2 = Ysw_corner.flatten() - lat_corner2 = np.concatenate((lat_corner2,ltc2)) - ltc3 = Ynw_corner.flatten() - lat_corner3 = np.concatenate((lat_corner3,ltc3)) - ltc4 = Yne_corner.flatten() - lat_corner4 = np.concatenate((lat_corner4,ltc4)) - - # Create scrip file for combined unstructured grid - grid_corner_lon = np.zeros((len(lon_corner1),4)) - grid_corner_lat = np.zeros((len(lat_corner1),4)) - grid_center_lon = np.zeros((len(lon),)) - grid_center_lat = np.zeros((len(lat),)) - grid_corner_lon[:,0] = lon_corner1 - grid_corner_lon[:,1] = lon_corner2 - grid_corner_lon[:,2] = lon_corner3 - grid_corner_lon[:,3] = lon_corner4 - grid_corner_lat[:,0] = lat_corner1 - grid_corner_lat[:,1] = lat_corner2 - grid_corner_lat[:,2] = lat_corner3 - grid_corner_lat[:,3] = lat_corner4 - grid_center_lon = lon - grid_center_lat = lat - grid_imask = np.ones((len(lon),)) # assume using all points - grid_dims = np.array(lon.shape) - - scrip = xr.Dataset() - scrip['grid_corner_lon'] = xr.DataArray(grid_corner_lon.astype('float64'), - dims=('grid_size','grid_corners'), - attrs={'units':'degrees'}) - scrip['grid_corner_lat'] = xr.DataArray(grid_corner_lat.astype('float64'), - dims=('grid_size','grid_corners'), - attrs={'units':'degrees'}) - scrip['grid_center_lon'] = xr.DataArray(grid_center_lon.astype('float64'), - dims=('grid_size'), - attrs={'units':'degrees'}) - scrip['grid_center_lat'] = xr.DataArray(grid_center_lat.astype('float64'), - dims=('grid_size'), - attrs={'units':'degrees'}) - scrip['grid_imask'] = xr.DataArray(grid_imask.astype('int32'), - dims=('grid_size'), - attrs={'units':'unitless'}) - scrip['grid_dims'] = xr.DataArray(grid_dims.astype('int32'), - dims=('grid_rank'), - attrs={'units':'unitless'}) - self.ecco_scripfile = 'ecco_combined_unstructured.scrip.nc' - scrip.to_netcdf(self.ecco_scripfile) - scrip.close() - subprocess.run(["ncatted", "-a", "_FillValue,,d,,", self.ecco_scripfile]) - print("ECCO scrip file complete") + for tile in self.tileNums: + lon_parts.append(tile_geom[tile]['center_lon']) + lat_parts.append(tile_geom[tile]['center_lat']) + lon_corner1_parts.append(tile_geom[tile]['corner_lon'][:, 0]) + lon_corner2_parts.append(tile_geom[tile]['corner_lon'][:, 1]) + lon_corner3_parts.append(tile_geom[tile]['corner_lon'][:, 2]) + lon_corner4_parts.append(tile_geom[tile]['corner_lon'][:, 3]) + lat_corner1_parts.append(tile_geom[tile]['corner_lat'][:, 0]) + lat_corner2_parts.append(tile_geom[tile]['corner_lat'][:, 1]) + lat_corner3_parts.append(tile_geom[tile]['corner_lat'][:, 2]) + lat_corner4_parts.append(tile_geom[tile]['corner_lat'][:, 3]) + + lon = np.concatenate(lon_parts) + lat = np.concatenate(lat_parts) + lon_corner1 = np.concatenate(lon_corner1_parts) + lon_corner2 = np.concatenate(lon_corner2_parts) + lon_corner3 = np.concatenate(lon_corner3_parts) + lon_corner4 = np.concatenate(lon_corner4_parts) + lat_corner1 = np.concatenate(lat_corner1_parts) + lat_corner2 = np.concatenate(lat_corner2_parts) + lat_corner3 = np.concatenate(lat_corner3_parts) + lat_corner4 = np.concatenate(lat_corner4_parts) + + # Create scrip file for combined unstructured grid. + grid_corner_lon = np.zeros((len(lon_corner1), 4)) + grid_corner_lat = np.zeros((len(lat_corner1), 4)) + grid_corner_lon[:, 0] = lon_corner1 + grid_corner_lon[:, 1] = lon_corner2 + grid_corner_lon[:, 2] = lon_corner3 + grid_corner_lon[:, 3] = lon_corner4 + grid_corner_lat[:, 0] = lat_corner1 + grid_corner_lat[:, 1] = lat_corner2 + grid_corner_lat[:, 2] = lat_corner3 + grid_corner_lat[:, 3] = lat_corner4 + grid_imask = np.ones((len(lon),)) + grid_dims = np.array(lon.shape) + + scrip = xr.Dataset() + scrip['grid_corner_lon'] = xr.DataArray(grid_corner_lon.astype('float64'), + dims=('grid_size', 'grid_corners'), + attrs={'units': 'degrees'}) + scrip['grid_corner_lat'] = xr.DataArray(grid_corner_lat.astype('float64'), + dims=('grid_size', 'grid_corners'), + attrs={'units': 'degrees'}) + scrip['grid_center_lon'] = xr.DataArray(lon.astype('float64'), + dims=('grid_size'), + attrs={'units': 'degrees'}) + scrip['grid_center_lat'] = xr.DataArray(lat.astype('float64'), + dims=('grid_size'), + attrs={'units': 'degrees'}) + scrip['grid_imask'] = xr.DataArray(grid_imask.astype('int32'), + dims=('grid_size'), + attrs={'units': 'unitless'}) + scrip['grid_dims'] = xr.DataArray(grid_dims.astype('int32'), + dims=('grid_rank'), + attrs={'units': 'unitless'}) + self.ecco_scripfile = 'ecco_combined_unstructured.scrip.nc' + scrip.to_netcdf(self.ecco_scripfile) + scrip.close() + subprocess.run(["ncatted", "-a", "_FillValue,,d,,", self.ecco_scripfile]) + print("ECCO scrip file complete") def main(): run = eccoToMaliInterp()