From bb04706024ac2fa0e4e39110df3a2c3e3d94b58f Mon Sep 17 00:00:00 2001 From: Chris Blanton Date: Mon, 15 Dec 2025 18:27:01 -0500 Subject: [PATCH 01/12] Use xarray instead of CDO for merging the climatologies --- fre/app/generate_time_averages/combine.py | 42 ++++++++++++----------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/fre/app/generate_time_averages/combine.py b/fre/app/generate_time_averages/combine.py index ee3cff863..4452cab85 100644 --- a/fre/app/generate_time_averages/combine.py +++ b/fre/app/generate_time_averages/combine.py @@ -8,6 +8,7 @@ import subprocess import metomi.isodatetime.parsers +import xarray as xr from ..helpers import change_directory @@ -39,21 +40,29 @@ def form_bronx_directory_name(frequency: str, return frequency_label + '_' + str(interval_object.years) + 'yr' -def check_glob(target: str) -> None: +def merge_netcdfs(source: str, target: str) -> None: """ - Verify that at least one file is resolved by the glob. - Raises FileNotFoundError if no files are found. - - :param target: Glob target to resolve - :type target: str - :raises FileNotFoundError: No files found + Merge a glob string identifying a group of NetCDF files + into one combined NetCDF file. + + :param source: Glob target of input files + :type source: str + :param target: Output file + :type source: str + :raises FileNotFoundError: Input files not found + :raises FileExistsError: Output file already exists :rtype: None """ - files = glob.glob(target) - if len(files) >= 1: - fre_logger.debug("%s has %s files", target, len(files)) + input_files = glob.glob(source) + if len(input_files) >= 1: + fre_logger.debug(f"'{source}' has {len(input_files)} files") else: - raise FileNotFoundError(f"target={target} resolves to no files") + raise FileNotFoundError(f"'{source}' resolves to no files") + if Path(target).exists(): + raise FileExistsError(f"Output file '{target}' already exists") + + ds = xr.open_mfdataset(input_files, compat='override', coords='minimal') + ds.to_netcdf(target) def combine( root_in_dir: str, @@ -107,8 +116,7 @@ def combine( root_in_dir: str, if frequency == 'yr': source = component + '.' + date_string + '.*.nc' target = component + '.' + date_string + '.nc' - check_glob(source) - subprocess.run(['cdo', '-O', 'merge', source, target], check=True) + merge_netcdfs(source, target) fre_logger.debug("Output file created: %s", target) fre_logger.debug("Copying to %s", outdir) subprocess.run(['cp', '-v', target, outdir], check=True) @@ -116,11 +124,5 @@ def combine( root_in_dir: str, for month_int in range(1,13): source = f"{component}.{date_string}.*.{month_int:02d}.nc" target = f"{component}.{date_string}.{month_int:02d}.nc" - check_glob(source) - - # does there exist a python-cdo way of doing the merge? - subprocess.run(['cdo', '-O', 'merge', source, target], check=True) - fre_logger.debug("Output file created: %s", target) - fre_logger.debug("Copying to %s", outdir) - + merge_netcdfs(source, target) subprocess.run(['cp', '-v', target, outdir], check=True) From 0276cf48e173f3b5c30736c02d6b343e2edf1f6b Mon Sep 17 00:00:00 2001 From: Chris Blanton Date: Tue, 16 Dec 2025 16:15:06 -0500 Subject: [PATCH 02/12] Regrid the static history files as well as the temporal ones --- fre/app/regrid_xy/regrid_xy.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fre/app/regrid_xy/regrid_xy.py b/fre/app/regrid_xy/regrid_xy.py index 3e261c2e1..a7164f247 100644 --- a/fre/app/regrid_xy/regrid_xy.py +++ b/fre/app/regrid_xy/regrid_xy.py @@ -326,11 +326,18 @@ def regrid_xy(yamlfile: str, datadict["remap_dir"] = remap_dir datadict["input_date"] = input_date[:8] + # add temporal and static history files components = [] for component in yamldict["postprocess"]["components"]: - for this_source in component["sources"]: - if this_source["history_file"] == source: + for temporal_history in component["sources"]: + if temporal_history["history_file"] == source: components.append(component) + try: + for static_history in component["static"]: + if static_history["source"] == source: + components.append(component) + except KeyError: + pass # submit fregrid job for each component for component in components: From 877a792eef54c1154ecc22aaf89162a8a08af7a4 Mon Sep 17 00:00:00 2001 From: Chris Blanton Date: Tue, 16 Dec 2025 17:44:46 -0500 Subject: [PATCH 03/12] Update catalogbuilder import structure after upstream change --- fre/catalog/frecatalog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fre/catalog/frecatalog.py b/fre/catalog/frecatalog.py index 7c65a6b7d..227ea35ca 100644 --- a/fre/catalog/frecatalog.py +++ b/fre/catalog/frecatalog.py @@ -5,7 +5,7 @@ import click from catalogbuilder.scripts import gen_intake_gfdl -from catalogbuilder.tests import compval +from catalogbuilder.scripts import compval from catalogbuilder.scripts import combine_cats From bae7bb23d28e16ae14039295eebd3fcded4919c1 Mon Sep 17 00:00:00 2001 From: Chris Blanton Date: Wed, 17 Dec 2025 09:09:07 -0500 Subject: [PATCH 04/12] Set time to be a record dimension so analysis scripts can use them --- fre/app/generate_time_averages/combine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fre/app/generate_time_averages/combine.py b/fre/app/generate_time_averages/combine.py index 4452cab85..dc93411cb 100644 --- a/fre/app/generate_time_averages/combine.py +++ b/fre/app/generate_time_averages/combine.py @@ -62,7 +62,7 @@ def merge_netcdfs(source: str, target: str) -> None: raise FileExistsError(f"Output file '{target}' already exists") ds = xr.open_mfdataset(input_files, compat='override', coords='minimal') - ds.to_netcdf(target) + ds.to_netcdf(target, unlimited_dims=['time']) def combine( root_in_dir: str, From bb51b3cee467563b6daffae2d1f2efa42981c8bd Mon Sep 17 00:00:00 2001 From: Chris Blanton Date: Mon, 15 Dec 2025 18:27:01 -0500 Subject: [PATCH 05/12] Use xarray instead of CDO for merging the climatologies --- fre/app/generate_time_averages/combine.py | 42 ++++++++++++----------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/fre/app/generate_time_averages/combine.py b/fre/app/generate_time_averages/combine.py index ee3cff863..4452cab85 100644 --- a/fre/app/generate_time_averages/combine.py +++ b/fre/app/generate_time_averages/combine.py @@ -8,6 +8,7 @@ import subprocess import metomi.isodatetime.parsers +import xarray as xr from ..helpers import change_directory @@ -39,21 +40,29 @@ def form_bronx_directory_name(frequency: str, return frequency_label + '_' + str(interval_object.years) + 'yr' -def check_glob(target: str) -> None: +def merge_netcdfs(source: str, target: str) -> None: """ - Verify that at least one file is resolved by the glob. - Raises FileNotFoundError if no files are found. - - :param target: Glob target to resolve - :type target: str - :raises FileNotFoundError: No files found + Merge a glob string identifying a group of NetCDF files + into one combined NetCDF file. + + :param source: Glob target of input files + :type source: str + :param target: Output file + :type source: str + :raises FileNotFoundError: Input files not found + :raises FileExistsError: Output file already exists :rtype: None """ - files = glob.glob(target) - if len(files) >= 1: - fre_logger.debug("%s has %s files", target, len(files)) + input_files = glob.glob(source) + if len(input_files) >= 1: + fre_logger.debug(f"'{source}' has {len(input_files)} files") else: - raise FileNotFoundError(f"target={target} resolves to no files") + raise FileNotFoundError(f"'{source}' resolves to no files") + if Path(target).exists(): + raise FileExistsError(f"Output file '{target}' already exists") + + ds = xr.open_mfdataset(input_files, compat='override', coords='minimal') + ds.to_netcdf(target) def combine( root_in_dir: str, @@ -107,8 +116,7 @@ def combine( root_in_dir: str, if frequency == 'yr': source = component + '.' + date_string + '.*.nc' target = component + '.' + date_string + '.nc' - check_glob(source) - subprocess.run(['cdo', '-O', 'merge', source, target], check=True) + merge_netcdfs(source, target) fre_logger.debug("Output file created: %s", target) fre_logger.debug("Copying to %s", outdir) subprocess.run(['cp', '-v', target, outdir], check=True) @@ -116,11 +124,5 @@ def combine( root_in_dir: str, for month_int in range(1,13): source = f"{component}.{date_string}.*.{month_int:02d}.nc" target = f"{component}.{date_string}.{month_int:02d}.nc" - check_glob(source) - - # does there exist a python-cdo way of doing the merge? - subprocess.run(['cdo', '-O', 'merge', source, target], check=True) - fre_logger.debug("Output file created: %s", target) - fre_logger.debug("Copying to %s", outdir) - + merge_netcdfs(source, target) subprocess.run(['cp', '-v', target, outdir], check=True) From 85ee6a5b6a3c089e2577f9d091ed60e9ce9af3c8 Mon Sep 17 00:00:00 2001 From: Chris Blanton Date: Tue, 16 Dec 2025 16:15:06 -0500 Subject: [PATCH 06/12] Regrid the static history files as well as the temporal ones --- fre/app/regrid_xy/regrid_xy.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fre/app/regrid_xy/regrid_xy.py b/fre/app/regrid_xy/regrid_xy.py index 3e261c2e1..a7164f247 100644 --- a/fre/app/regrid_xy/regrid_xy.py +++ b/fre/app/regrid_xy/regrid_xy.py @@ -326,11 +326,18 @@ def regrid_xy(yamlfile: str, datadict["remap_dir"] = remap_dir datadict["input_date"] = input_date[:8] + # add temporal and static history files components = [] for component in yamldict["postprocess"]["components"]: - for this_source in component["sources"]: - if this_source["history_file"] == source: + for temporal_history in component["sources"]: + if temporal_history["history_file"] == source: components.append(component) + try: + for static_history in component["static"]: + if static_history["source"] == source: + components.append(component) + except KeyError: + pass # submit fregrid job for each component for component in components: From e3b1016a8fb9ff1d3997c52395bee1f2197b4b21 Mon Sep 17 00:00:00 2001 From: Chris Blanton Date: Wed, 17 Dec 2025 09:09:07 -0500 Subject: [PATCH 07/12] Set time to be a record dimension so analysis scripts can use them --- fre/app/generate_time_averages/combine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fre/app/generate_time_averages/combine.py b/fre/app/generate_time_averages/combine.py index 4452cab85..dc93411cb 100644 --- a/fre/app/generate_time_averages/combine.py +++ b/fre/app/generate_time_averages/combine.py @@ -62,7 +62,7 @@ def merge_netcdfs(source: str, target: str) -> None: raise FileExistsError(f"Output file '{target}' already exists") ds = xr.open_mfdataset(input_files, compat='override', coords='minimal') - ds.to_netcdf(target) + ds.to_netcdf(target, unlimited_dims=['time']) def combine( root_in_dir: str, From b989cfa63255e27ec5f21c44ce7cf1489cf0ac86 Mon Sep 17 00:00:00 2001 From: Chris Blanton Date: Mon, 22 Dec 2025 16:49:19 -0500 Subject: [PATCH 08/12] Modify fre pp mask-atmos-plevel to accept ".false." instead of "false" which is now users are writing it. Also, remove the _unmsk note from the error message so folks don't use it --- fre/app/mask_atmos_plevel/mask_atmos_plevel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fre/app/mask_atmos_plevel/mask_atmos_plevel.py b/fre/app/mask_atmos_plevel/mask_atmos_plevel.py index d60bd48bb..b7c409720 100755 --- a/fre/app/mask_atmos_plevel/mask_atmos_plevel.py +++ b/fre/app/mask_atmos_plevel/mask_atmos_plevel.py @@ -76,7 +76,7 @@ def mask_atmos_plevel_subtool(infile: str = None, for var in list(ds_in.variables): fre_logger.debug('for var = %s', var) if 'pressure_mask' in ds_in[var].attrs: - if ds_in[var].attrs['pressure_mask'].lower() == 'false': + if ds_in[var].attrs['pressure_mask'].lower() == '.false.': fre_logger.debug('first pressure masking trigger passed. processing data.') ds_out[var] = mask_field_above_surface_pressure(ds_in, var, ds_ps) ds_out[var].attrs['pressure_mask'] = "True" @@ -103,7 +103,7 @@ def mask_atmos_plevel_subtool(infile: str = None, fre_logger.info("Finished processing %s, wrote %s, pressure_mask is True", var, masked_var) else: - fre_logger.debug("Not processing %s, no pressure_mask attr, nor _unmsk in the variable name", var) + fre_logger.debug("Not processing '%s', no 'pressure_mask' attribute", var) fre_logger.info('Write the output file if any unmasked variables were masked') if ds_out.variables: From 0fe44fda6bb826a0939c7aa1f5ab77d0bc8e701d Mon Sep 17 00:00:00 2001 From: Chris Blanton Date: Tue, 23 Dec 2025 10:36:33 -0500 Subject: [PATCH 09/12] Adjust mask-atmos-plevel test case .false. is now replacing False as the variable attribute trigger --- .../test_files/reduced_ascii_files/atmos_cmip.ua_unmsk.cdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fre/tests/test_files/reduced_ascii_files/atmos_cmip.ua_unmsk.cdl b/fre/tests/test_files/reduced_ascii_files/atmos_cmip.ua_unmsk.cdl index def2bc5f1..899bf6433 100644 --- a/fre/tests/test_files/reduced_ascii_files/atmos_cmip.ua_unmsk.cdl +++ b/fre/tests/test_files/reduced_ascii_files/atmos_cmip.ua_unmsk.cdl @@ -50,7 +50,7 @@ variables: ua_unmsk:cell_measures = "area: area" ; ua_unmsk:standard_name = "eastward_wind" ; ua_unmsk:interp_method = "conserve_order2" ; - ua_unmsk:pressure_mask = "False" ; + ua_unmsk:pressure_mask = ".false." ; // global attributes: :title = "c96L65_am5f9d7r0_amip" ; From d4ab40ac4f932baeb79720b60e7040070690f45c Mon Sep 17 00:00:00 2001 From: Chris Blanton Date: Tue, 23 Dec 2025 12:06:41 -0500 Subject: [PATCH 10/12] Add static history file parsing to regrid-xy test --- fre/app/regrid_xy/tests/generate_files.py | 7 ++++++ fre/app/regrid_xy/tests/test_regrid_xy.py | 30 +++++++++++++++++++++-- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/fre/app/regrid_xy/tests/generate_files.py b/fre/app/regrid_xy/tests/generate_files.py index a39de7b12..8e2d022e5 100644 --- a/fre/app/regrid_xy/tests/generate_files.py +++ b/fre/app/regrid_xy/tests/generate_files.py @@ -152,6 +152,13 @@ def make_data(): history_file = source["history_file"] for i in range(1, ntiles+1): dataset.to_netcdf(f"{input_dir}/{date}.{history_file}.tile{i}.nc") + try: + for static_source in component["static"]: + history_file = static_source["source"] + for i in range(1, ntiles+1): + dataset.to_netcdf(f"{input_dir}/{date}.{history_file}.tile{i}.nc") + except KeyError: + pass def make_all(): diff --git a/fre/app/regrid_xy/tests/test_regrid_xy.py b/fre/app/regrid_xy/tests/test_regrid_xy.py index af5831d3e..74482091f 100644 --- a/fre/app/regrid_xy/tests/test_regrid_xy.py +++ b/fre/app/regrid_xy/tests/test_regrid_xy.py @@ -21,12 +21,14 @@ components = [] pp_input_files = [{"history_file":"pemberley"}, {"history_file":"longbourn"}] +pp_input_static_files = [{"source": "my_static_history"}, {"source": "my_static2"}] components.append({"xyInterp": f"{nxy},{nxy}", "interpMethod": "conserve_order2", "inputRealm": "atmos", "type": f"pride_and_prejudice", "sources": pp_input_files, - "postprocess_on": True} + "postprocess_on": True, + "static": pp_input_static_files} ) emma_input_files = [{"history_file":"hartfield"}, {"history_file":"donwell_abbey"}] components.append({"xyInterp": f"{nxy},{nxy}", @@ -91,10 +93,21 @@ def test_regrid_xy(): source=source, input_date=date+"TTTT") + # regrid the static inputs + for static_source_dict in pp_input_static_files: + source = static_source_dict['source'] + regrid_xy.regrid_xy(yamlfile=str(yamlfile), + input_dir=str(input_dir), + output_dir=str(output_dir), + work_dir=str(work_dir), + remap_dir=str(remap_dir), + source=source, + input_date=date+"TTTT") + #check answers + output_subdir = output_dir/f"{nxy}_{nxy}.conserve_order2" for source_dict in pp_input_files + emma_input_files: # Files are now output to a subdirectory based on grid size and interpolation method - output_subdir = output_dir/f"{nxy}_{nxy}.conserve_order2" outfile = output_subdir/f"{date}.{source_dict['history_file']}.nc" test = xr.load_dataset(outfile) @@ -108,6 +121,19 @@ def test_regrid_xy(): assert np.all(test["darcy"].values==np.float64(2.0)) assert np.all(test["wins"].values==np.float64(3.0)) + for static_source_dict in pp_input_static_files: + outfile = output_subdir/f"{date}.{static_source_dict['source']}.nc" + test = xr.load_dataset(outfile) + + assert "wet_c" not in test + assert "mister" in test + assert "darcy" in test + assert "wins" in test + + assert np.all(test["mister"].values==np.float64(1.0)) + assert np.all(test["darcy"].values==np.float64(2.0)) + assert np.all(test["wins"].values==np.float64(3.0)) + #check answers, these shouldn't have been regridded for source_dict in here_input_files: ifile = source_dict["history_file"] From 3a8795c214a00d17f4fdbd9843a66138c86a85d9 Mon Sep 17 00:00:00 2001 From: Chris Blanton Date: Tue, 23 Dec 2025 13:27:27 -0500 Subject: [PATCH 11/12] After a second thought (thanks Ian) we will use "false" as the trigger not ".false." --- fre/app/mask_atmos_plevel/mask_atmos_plevel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fre/app/mask_atmos_plevel/mask_atmos_plevel.py b/fre/app/mask_atmos_plevel/mask_atmos_plevel.py index b7c409720..a43ea272d 100755 --- a/fre/app/mask_atmos_plevel/mask_atmos_plevel.py +++ b/fre/app/mask_atmos_plevel/mask_atmos_plevel.py @@ -76,7 +76,7 @@ def mask_atmos_plevel_subtool(infile: str = None, for var in list(ds_in.variables): fre_logger.debug('for var = %s', var) if 'pressure_mask' in ds_in[var].attrs: - if ds_in[var].attrs['pressure_mask'].lower() == '.false.': + if ds_in[var].attrs['pressure_mask'].lower() == 'false': fre_logger.debug('first pressure masking trigger passed. processing data.') ds_out[var] = mask_field_above_surface_pressure(ds_in, var, ds_ps) ds_out[var].attrs['pressure_mask'] = "True" From 22af975be02e8eb45671eebe9245672966aaa3d5 Mon Sep 17 00:00:00 2001 From: Chris Blanton Date: Tue, 23 Dec 2025 13:30:26 -0500 Subject: [PATCH 12/12] Revert "Adjust mask-atmos-plevel test case" This reverts commit 0fe44fda6bb826a0939c7aa1f5ab77d0bc8e701d. --- .../test_files/reduced_ascii_files/atmos_cmip.ua_unmsk.cdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fre/tests/test_files/reduced_ascii_files/atmos_cmip.ua_unmsk.cdl b/fre/tests/test_files/reduced_ascii_files/atmos_cmip.ua_unmsk.cdl index 899bf6433..def2bc5f1 100644 --- a/fre/tests/test_files/reduced_ascii_files/atmos_cmip.ua_unmsk.cdl +++ b/fre/tests/test_files/reduced_ascii_files/atmos_cmip.ua_unmsk.cdl @@ -50,7 +50,7 @@ variables: ua_unmsk:cell_measures = "area: area" ; ua_unmsk:standard_name = "eastward_wind" ; ua_unmsk:interp_method = "conserve_order2" ; - ua_unmsk:pressure_mask = ".false." ; + ua_unmsk:pressure_mask = "False" ; // global attributes: :title = "c96L65_am5f9d7r0_amip" ;