diff --git a/regrid_common.py b/regrid_common.py new file mode 100644 index 0000000..b34fad9 --- /dev/null +++ b/regrid_common.py @@ -0,0 +1,252 @@ +# Copyright 2026 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. +# SPDX-License-Identifier: Apache-2.0 + +# ========================================================================================= +# These are common functions/classes which assist with regridding +# ========================================================================================= + +import os +import xesmf as xe +import xarray as xr +import argparse + +from scripts_common import md5sum + + +def _guess_longitude_name(ds): + coords = ds.cf.coordinates + if "longitude" in coords: + if len(coords["longitude"]) == 1: + return coords["longitude"][0] + else: + raise KeyError("Multiple longitude variables exist. Please pass --lon-name") + elif "lon" in ds: + return "lon" + else: + raise KeyError( + "Cannot automatically determine the name of the longitude variable. Please pass --lon-name" + ) + + +def _guess_latitude_name(ds): + coords = ds.cf.coordinates + if "latitude" in coords: + if len(coords["latitude"]) == 1: + return coords["latitude"][0] + else: + raise KeyError("Multiple latitude variables exist. Please pass --lat-name") + elif "lat" in ds: + return "lat" + else: + raise KeyError( + "Cannot automatically determine the name of the latitude variable. Please pass --lat-name" + ) + + +class Regrid_Common: + + def __init__(self): + self.method = "bilinear" + self.extrap = "nearest_s2d" + self.periodic = True + self.parser = argparse.ArgumentParser( + description=( + "Interpolate a provided forcing file to a grid specified by a provided MOM supergrid file" + ) + ) + + def parse_cli(self): + + parser = self.parser + + parser.add_argument( + "--hgrid-filename", + type=str, + required=True, + help="The path to the MOM supergrid file to use as a grid for interpolation.", + ) + + parser.add_argument( + "--output-filename", + type=str, + required=True, + help="The path to the file to be outputted.", + ) + + parser.add_argument( + "--mask-filename", + type=str, + default=None, + help=( + "The path to a land-sea (0-1) mask for the forcing file. Cells that fall on land in " + "the destination grid will take their values from the nearest source grid cell." + ), + ) + + parser.add_argument( + "--lon-name", + type=str, + default=None, + help=( + "The name of the longitude variable in the input grid. If not passed, an attempt will " + "be made to guess the name." + ), + ) + + parser.add_argument( + "--lat-name", + type=str, + default=None, + help=( + "The name of the latitude variable in the input grid. If not passed, an attempt will " + "be made to guess the name." + ), + ) + + args = parser.parse_args() + self.args = args + self.hgrid_filename = os.path.abspath(args.hgrid_filename) + self.output_filename = os.path.abspath(args.output_filename) + self.mask_filename = args.mask_filename + self.lon_name = args.lon_name + self.lat_name = args.lat_name + + if self.mask_filename: + self.mask_filename = os.path.abspath(self.mask_filename) + + # some info about how the file was generated + + runcmd_args = f"--hgrid-filename={self.hgrid_filename} --output-filename={self.output_filename}" + + if self.mask_filename: + runcmd_args += f" --mask-filename={mask_filename}" + if self.lon_name: + runcmd_args += f" --lon-name={lon_name}" + if self.lat_name: + runcmd_args += f" --lat-name={lat_name}" + + self.runcmd_args = runcmd_args + + ## NOTE: it's implied that forcing_filename is set outside this class + + def open_datasets(self): + + forcing_filename = self.forcing_filename + hgrid_filename = self.hgrid_filename + mask_filename = self.mask_filename + lon_name = self.lon_name + lat_name = self.lat_name + + # Load the input data + forcing_src = xr.open_dataset(forcing_filename).compute() + hgrid = xr.open_dataset(hgrid_filename).compute() + if mask_filename: + forcing_mask = xr.open_dataset(mask_filename).compute() + + # Drop "mask" variable from forcing_src if it exists + if "mask" in forcing_src: + forcing_src = forcing_src.drop_vars("mask") + + # Standardise lon/lat coordinate names + if not lon_name: + lon_name = _guess_longitude_name(forcing_src) + + if not lat_name: + lat_name = _guess_latitude_name(forcing_src) + + forcing_src = forcing_src.rename({lon_name: "lon", lat_name: "lat"}) + + # Get source and destination grid + grid_src = forcing_src[["lon", "lat"]] + if mask_filename: + # Add the mask to the source grid so that land values are extrapolated + if "mask" in forcing_mask: + grid_src = grid_src.assign(mask=forcing_mask["mask"]) + else: + raise ValueError( + f"Input mask-filename must contain a variable named 'mask'" + ) + + # Destination grid is tracer cell centres + # hgrid = xr.open_dataset(hgrid_path) + hgrid_x = hgrid.x[1::2, 1::2] + hgrid_y = hgrid.y[1::2, 1::2] + hgrid_xc = hgrid.x[::2, ::2] + hgrid_yc = hgrid.y[::2, ::2] + grid_dest = xr.Dataset( + coords={ + "lon": (("ny", "nx"), hgrid_x.values), + "lat": (("ny", "nx"), hgrid_y.values), + "lon_b": (("nyp", "nxp"), hgrid_xc.values), + "lat_b": (("nyp", "nxp"), hgrid_yc.values), + }, + ) + + self.forcing_src = forcing_src + if mask_filename: + self.forcing_mask = forcing_mask + self.grid_src = grid_src + self.grid_dest = grid_dest + + def regrid_forcing(self): + + # Regrid using bilinear interpolation with nearest neighbour extrapolation + # NOTE: This will not conserve global quantities + regridder = xe.Regridder( + self.grid_src, + self.grid_dest, + method=self.method, + extrap_method=self.extrap, + periodic=self.periodic, + ) + forcing_regrid = regridder(self.forcing_src, keep_attrs=True) + + # Add coodinates (required by data_table) + forcing_regrid = forcing_regrid.assign_coords( + lon=self.grid_dest["lon"], lat=self.grid_dest["lat"] + ) + + forcing_regrid["lat"].attrs = dict( + long_name="Latitude of T-cell center", + standard_name="latitude", + units="degree_north", + ) + forcing_regrid["lon"].attrs = dict( + long_name="Longitude of T-cell center", + standard_name="longitude", + units="degrees_east", + ) + + self.forcing_regrid = forcing_regrid + + def save_output(self): + + forcing_regrid = self.forcing_regrid + + # Info about input data used + file_hashes = [ + f"{self.forcing_filename} (md5 hash: {md5sum(self.forcing_filename)})", + f"{self.hgrid_filename} (md5 hash: {md5sum(self.hgrid_filename)})", + ] + if self.mask_filename: + file_hashes.append( + f"{self.mask_filename} (md5 hash: {md5sum(self.mask_filename)})" + ) + + global_attrs = { + "inputFile": ", ".join(file_hashes), + } + forcing_regrid.attrs = forcing_regrid.attrs | global_attrs + + # Save output + # _FillValue is required by older (MOM5-era) versions of FMS + var_encoding = dict(zlib=True, complevel=4, _FillValue=-1.0e10) + for var in forcing_regrid.data_vars: + forcing_regrid[var].encoding |= var_encoding + # Coordinates should not have _FillValue + coord_encoding = dict(_FillValue=None) + for coord in forcing_regrid.coords: + forcing_regrid[coord].encoding |= coord_encoding + + unlimited_dims = "time" if "time" in forcing_regrid.dims else None + forcing_regrid.to_netcdf(self.output_filename, unlimited_dims=unlimited_dims) diff --git a/rof_pattern_generation/generate_rofi_pattern.py b/rof_pattern_generation/generate_rofi_pattern.py new file mode 100755 index 0000000..453e2b2 --- /dev/null +++ b/rof_pattern_generation/generate_rofi_pattern.py @@ -0,0 +1,229 @@ +# Copyright 2026 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. +# SPDX-License-Identifier: Apache-2.0 + +# ========================================================================================= +# Generate a spreading pattern to spread iceberge runoff fluxes, based on regridding +# the Mankoff 2025 spreading climatology dataset and moving any results on land to the nearest ocean cell +# +# To run: +# python generate_rofi_pattern.py --hgrid-filename= +# --output-filename= --topog-file= +# +# This script currently supports using the hgrid file and topog file in MOM6 formats +# for the ocean grid and mask +# +# The run command and full github url of the current version of this script is added to the +# metadata of the generated weights file. This is to uniquely identify the script and inputs used +# to generate the mesh file. To produce weights files for sharing, ensure you are using a version +# of this script which is committed and pushed to github. For mesh files intended for released +# configurations, use the latest version checked in to the main branch of the github repository. +# +# Contact: +# Anton Steketee +# +# Dependencies: +# xesmf, xarray and sklearn +# ========================================================================================= + + +import os +import sys +from copy import copy +from sklearn.neighbors import BallTree +from xesmf.util import cell_area +import xarray as xr +import numpy as np + +from pathlib import Path + +path_root = Path(__file__).parents[1] +sys.path.append(str(path_root)) + +from regrid_common import Regrid_Common +from scripts_common import get_provenance_metadata, md5sum +from mesh_generation.generate_mesh import mom6_mask_detection + +# in a climatology, with 365 day calendar, whats the day of the middle of each month +DAY_IN_MONTH = [15.5, 45, 74.5, 105, 135.5, 166, 196.5, 227.5, 258, 288.5, 319, 349.5] + +# source data to regrid +AQ_MELT_PATTERN = "/g/data/av17/access-nri/OM3/Mankoff_2025_V9/AQ_iceberg_melt.nc" +GL_MELT_PATTERN = "/g/data/av17/access-nri/OM3/Mankoff_2025_V9/GL_iceberg_melt.nc" + +# attributes to copy from source data +ATTRS = [ + "title", + "history", + "original_data_source", +] + + +def move_runoff_on_land(grid_mask, mask, forcing_regrid_glob): + """ + For a provided grid, land mask, and DataArray, move any data which is on land + into ocean, usign a nearest neighbour algorithm + """ + + grid_dest["mask"] = xr.DataArray(mask, dims=["ny", "nx"]) + + nx = len(grid_dest.nx) + ny = len(grid_dest.ny) + + # convert destination grid into 1d (lat, lon) + # source cells are all grid cells, target cells are ocean cells only + grid_dest["i"] = xr.DataArray( + np.arange(0, ny * nx).reshape(ny, nx), dims=["ny", "nx"] + ) + grid_stacked = grid_dest[["lat", "lon", "i"]].stack(points=["ny", "nx"]) + source_index = grid_stacked.i.values + source_cells = list(zip(grid_stacked["lat"].values, grid_stacked["lon"].values)) + source_cells_rad = np.deg2rad(source_cells) + + mask_stacked = grid_stacked.where( + grid_dest["mask"].stack(points=["ny", "nx"]), drop=True + ) + target_cells = list(zip(mask_stacked["lat"].values, mask_stacked["lon"].values)) + target_index = mask_stacked["i"].values + target_cells_rad = np.deg2rad(target_cells) + + # create a BallTree (nearest neighbour) and query for all source cells + mask_tree = BallTree(target_cells_rad, metric="haversine") + ii = mask_tree.query(source_cells_rad, return_distance=False) + # ii is index in target_cells, convert to grid index + new_index = mask_stacked.i.values[ii[:, 0]].astype(np.int64) + + # adjustment for fraction of old_area/new_area, use esmf grid areas for consistency with CMEPS + area = cell_area(grid_dest) + area_1d = area.values.flatten() + area_frac = area_1d / area_1d[new_index] + + # convert regrid result into 1d, move to nearest neighbour where needed + weights = np.reshape(forcing_regrid_glob.values, (12, nx * ny)) + weights_adj = copy(weights) + for i, new_i in enumerate(new_index): + if i != new_i: + weights_adj[:, i] = 0 + weights_adj[:, new_i] += weights[:, i] * area_frac[i] + + # new output + weights_da = xr.DataArray( + np.reshape(weights_adj, (12, ny, nx)), dims=["time", "rof_ny", "rof_nx"] + ) + + return weights_da + + +def main(): + + regrid_aq = Regrid_Common() + + regrid_aq.parser.description = ( + "Interpolate Mankoff 2025 climatology of iceberg spreading to to a grid " + "specified by a provided MOM supergrid file and landmask from a MOM topog " + " file. For use in CMEPS." + ) + + regrid_aq.parser.add_argument( + "--topog-file", + type=str, + required=True, + help="Path to the model topography file, which is used to generate the model mask.", + ) + + regrid_aq.parse_cli() + + # We need one regrid instance for each hemisphere: + regrid_gl = copy(regrid_aq) + regrid_aq.forcing_filename = AQ_MELT_PATTERN + regrid_gl.forcing_filename = GL_MELT_PATTERN + + # stash some metadata for later and then regrid each hemsiphere + global_attrs = {} + for regrid in [regrid_aq, regrid_gl]: + regrid.open_datasets() + for k in ATTRS: + val = regrid.forcing_src.attrs.get(k) + if val is not None: + if k in global_attrs: + global_attrs[k] += f"\n{val}" + else: + global_attrs[k] = val + + global_attrs["Mankoff_doi"] = regrid.forcing_src.attrs["DOI"] + + # combine forcing for all regions + regrid.forcing_src = ( + regrid.forcing_src.drop_vars(["region_map", "region_map_expanded"]) + .sum("region") + .melt + ) + + regrid.regrid_forcing() + + # combine two regridding results + regrid = regrid_aq + forcing_regrid_glob = regrid_aq.forcing_regrid + regrid_gl.forcing_regrid + + # find the ocean mask using the bathymetry + topo = xr.open_dataset(regrid.args.topog_file) + mask = mom6_mask_detection(topo) + + # After doing the regridding, map any runoff on land cells into the ocean + weights_da = move_runoff_on_land(regrid.grid_dest, mask, forcing_regrid_glob) + + weights_ds = weights_da.to_dataset(name="pattern_Forr_rofi") + weights_ds["lat"] = forcing_regrid_glob["lat"] + weights_ds["lon"] = forcing_regrid_glob["lon"] + + # Set calendar + weights_ds["time"] = DAY_IN_MONTH + weights_ds.time.attrs = { + "standard_name": "time", + "calendar": "proleptic_gregorian", + "units": "days since 0001-01-01 00:00:00", + } + + # Add some info about how the file was generated + this_file = os.path.normpath(__file__) + + runcmd = ( + f"python3 {os.path.basename(this_file)} --topog-file={regrid.args.topog_file} " + f"{regrid.runcmd_args}" + ) + + # Info about input data used + file_hashes = [ + f"{AQ_MELT_PATTERN} (md5 hash: {md5sum(AQ_MELT_PATTERN)})", + f"{GL_MELT_PATTERN} (md5 hash: {md5sum(GL_MELT_PATTERN)})", + f"{regrid.hgrid_filename} (md5 hash: {md5sum(regrid.hgrid_filename)})", + f"{regrid.args.topog_file} (md5 hash: {md5sum(regrid.args.topog_file)})", + ] + if regrid.mask_filename: + file_hashes.append( + f"{regrid.mask_filename} (md5 hash: {md5sum(regrid.mask_filename)})" + ) + + global_attrs |= { + "description": "Mankoff 2025 iceberg spreading climatology remapped onto an ACCESS-OM3 grid", + "history": get_provenance_metadata(this_file, runcmd), + "inputFile": ", ".join(file_hashes), + } + + weights_ds.attrs = weights_ds.attrs | global_attrs + + # Save output + var_encoding = dict(zlib=True, complevel=1, _FillValue=-1.0e10) + for var in weights_ds.data_vars: + weights_ds[var].encoding |= var_encoding + # Coordinates should not have _FillValue + coord_encoding = dict(_FillValue=None) + for coord in weights_ds.coords: + weights_ds[coord].encoding |= coord_encoding + + unlimited_dims = "time" if "time" in weights_ds.dims else None + weights_ds.to_netcdf(regrid.output_filename, unlimited_dims=unlimited_dims) + + +if __name__ == "__main__": + + main() diff --git a/wombat_ic_generation/regrid_forcing.py b/wombat_ic_generation/regrid_forcing.py index 3e8df26..4c50cb0 100644 --- a/wombat_ic_generation/regrid_forcing.py +++ b/wombat_ic_generation/regrid_forcing.py @@ -1,4 +1,4 @@ -# Copyright 2025 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. +# Copyright 2026 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. # SPDX-License-Identifier: Apache-2.0 # ========================================================================================= @@ -30,191 +30,41 @@ import sys from pathlib import Path -import xarray as xr -import xesmf as xe - path_root = Path(__file__).parents[1] sys.path.append(str(path_root)) +from regrid_common import Regrid_Common from scripts_common import get_provenance_metadata, md5sum -def _guess_longitude_name(ds): - coords = ds.cf.coordinates - if "longitude" in coords: - if len(coords["longitude"]) == 1: - return coords["longitude"][0] - else: - raise KeyError("Multiple longitude variables exist. Please pass --lon-name") - elif "lon" in ds: - return "lon" - else: - raise KeyError( - "Cannot automatically determine the name of the longitude variable. Please pass --lon-name" - ) - - -def _guess_latitude_name(ds): - coords = ds.cf.coordinates - if "latitude" in coords: - if len(coords["latitude"]) == 1: - return coords["latitude"][0] - else: - raise KeyError("Multiple latitude variables exist. Please pass --lat-name") - elif "lat" in ds: - return "lat" - else: - raise KeyError( - "Cannot automatically determine the name of the latitude variable. Please pass --lat-name" - ) - - def main(): - parser = argparse.ArgumentParser( - description=( - "Interpolate a provided forcing file to a grid specified by a provided MOM supergrid" - "file for use in the MOM data_table." - ) - ) - parser.add_argument( + # Init + regrid = Regrid_Common() + + regrid.parser.add_argument( "--forcing-filename", type=str, required=True, help="The path to the forcing file to interpolate.", ) - parser.add_argument( - "--hgrid-filename", - type=str, - required=True, - help="The path to the MOM supergrid file to use as a grid for interpolation.", - ) - - parser.add_argument( - "--output-filename", - type=str, - required=True, - help="The path to the file to be outputted.", - ) - - parser.add_argument( - "--mask-filename", - type=str, - default=None, - help=( - "The path to a land-sea (0-1) mask for the forcing file. Cells that fall on land in " - "the destination grid will take their values from the nearest source grid cell." - ), - ) - - parser.add_argument( - "--lon-name", - type=str, - default=None, - help=( - "The name of the longitude variable in the input grid. If not passed, an attempt will " - "be made to guess the name." - ), - ) - - parser.add_argument( - "--lat-name", - type=str, - default=None, - help=( - "The name of the latitude variable in the input grid. If not passed, an attempt will " - "be made to guess the name." - ), - ) - - args = parser.parse_args() - forcing_filename = os.path.abspath(args.forcing_filename) - hgrid_filename = os.path.abspath(args.hgrid_filename) - output_filename = os.path.abspath(args.output_filename) - mask_filename = args.mask_filename - lon_name = args.lon_name - lat_name = args.lat_name - - if mask_filename: - mask_filename = os.path.abspath(mask_filename) - - this_file = os.path.normpath(__file__) - - # Add some info about how the file was generated - runcmd = ( - f"python3 {os.path.basename(this_file)} --forcing-filename={forcing_filename} " - f"--hgrid-filename={hgrid_filename} --output-filename={output_filename}" + regrid.parser.description = ( + regrid.parser.description + " For use in the MOM data_table." ) - if mask_filename: - runcmd += f" --mask-filename={mask_filename}" - if lon_name: - runcmd += f" --lon-name={lon_name}" - if lat_name: - runcmd += f" --lat-name={lat_name}" - - file_hashes = [ - f"{forcing_filename} (md5 hash: {md5sum(forcing_filename)})", - f"{hgrid_filename} (md5 hash: {md5sum(hgrid_filename)})", - ] - if mask_filename: - file_hashes.append(f"{mask_filename} (md5 hash: {md5sum(mask_filename)})") - global_attrs = { - "history": get_provenance_metadata(this_file, runcmd), - "inputFile": ", ".join(file_hashes), - } - - # Load the input data - forcing_src = xr.open_dataset(forcing_filename).compute() - hgrid = xr.open_dataset(hgrid_filename).compute() - if mask_filename: - forcing_mask = xr.open_dataset(mask_filename).compute() - - # Drop "mask" variable from forcing_src if it exists - if "mask" in forcing_src: - forcing_src = forcing_src.drop_vars("mask") - - # Standardise lon/lat coordinate names - if not lon_name: - lon_name = _guess_longitude_name(forcing_src) - if not lat_name: - lat_name = _guess_latitude_name(forcing_src) + # Parse + regrid.parse_cli() + regrid.forcing_filename = os.path.abspath(regrid.args.forcing_filename) - forcing_src = forcing_src.rename({lon_name: "lon", lat_name: "lat"}) + regrid.open_datasets() - # Get source and destination grid - grid_src = forcing_src[["lon", "lat"]] - if mask_filename: - # Add the mask to the source grid so that land values are extrapolated - if "mask" in forcing_mask: - grid_src = grid_src.assign(mask=forcing_mask["mask"]) - else: - raise ValueError( - f"Input mask-filename must contain a variable named 'mask'" - ) + regrid.regrid_forcing() - # Destination grid is tracer cell centres - lon_dest = hgrid["x"][1:-1:2, 1:-1:2].to_dataset(name="lon") - lat_dest = hgrid["y"][1:-1:2, 1:-1:2].to_dataset(name="lat") - grid_dest = xr.merge((lon_dest, lat_dest)) + forcing_regrid = regrid.forcing_regrid - # Regrid using bilinear interpolation with nearest neighbour extrapolation - # NOTE: This will not conserve global quantities - regridder = xe.Regridder( - grid_src, - grid_dest, - method="bilinear", - extrap_method="nearest_s2d", - periodic=True, - ) - forcing_regrid = regridder(forcing_src, keep_attrs=True) - - # Add coodinates and metadata required by data_table - forcing_regrid = forcing_regrid.assign_coords( - lon=lon_dest["lon"], lat=lat_dest["lat"] - ) - forcing_regrid = forcing_regrid.rename({"nyp": "ny", "nxp": "nx"}) + # Add metadata required by data_table + # forcing_regrid = forcing_regrid.rename({"nyp": "ny", "nxp": "nx"}) forcing_regrid = forcing_regrid.assign_coords( { "ny": ("ny", range(forcing_regrid.sizes["ny"])), @@ -226,35 +76,30 @@ def main(): # (MOM5-era) versions of FMS forcing_regrid["ny"].attrs = dict(axis="Y", cartesian_axis="Y") forcing_regrid["nx"].attrs = dict(axis="X", cartesian_axis="X") - forcing_regrid["lat"].attrs = dict( - long_name="Latitude of T-cell center", - standard_name="latitude", - units="degree_north", - ) - forcing_regrid["lon"].attrs = dict( - long_name="Longitude of T-cell center", - standard_name="longitude", - units="degrees_east", - ) - forcing_regrid.attrs = forcing_regrid.attrs | global_attrs - # Save output - # _FillValue is required by older (MOM5-era) versions of FMS - var_encoding = dict(zlib=True, complevel=4, _FillValue=-1.0e36) - for var in forcing_regrid.data_vars: - forcing_regrid[var].encoding |= var_encoding - # Coordinates should not have _FillValue - coord_encoding = dict(_FillValue=None) - for coord in forcing_regrid.coords: - forcing_regrid[coord].encoding |= coord_encoding # Older (MOM5-era) versions of FMS can't handle integer type dimensions forcing_regrid["nx"].encoding |= {"dtype": "float32"} forcing_regrid["ny"].encoding |= {"dtype": "float32"} - unlimited_dims = "time" if "time" in forcing_regrid.dims else None - forcing_regrid.to_netcdf(output_filename, unlimited_dims=unlimited_dims) + + # Add some info about how the file was generated + this_file = os.path.normpath(__file__) + + runcmd = ( + f"python3 {os.path.basename(this_file)} --forcing-filename={regrid.forcing_filename} " + f"{regrid.runcmd_args}" + ) + + global_attrs = { + "history": get_provenance_metadata(this_file, runcmd), + } + + forcing_regrid.attrs = forcing_regrid.attrs | global_attrs + + regrid.forcing_regrid = forcing_regrid + + regrid.save_output() if __name__ == "__main__": - import argparse main()