Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 22 additions & 20 deletions fre/app/generate_time_averages/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import subprocess
import metomi.isodatetime.parsers
import xarray as xr

from ..helpers import change_directory

Expand Down Expand Up @@ -39,21 +40,29 @@
return frequency_label + '_' + str(interval_object.years) + 'yr'


def check_glob(target: str) -> None:
def merge_netcdfs(source: str, target: str) -> None:
"""
Verify that at least one file is resolved by the glob.
Raises FileNotFoundError if no files are found.
:param target: Glob target to resolve
:type target: str
:raises FileNotFoundError: No files found
Merge a glob string identifying a group of NetCDF files
into one combined NetCDF file.
:param source: Glob target of input files
:type source: str
:param target: Output file
:type source: str
:raises FileNotFoundError: Input files not found
:raises FileExistsError: Output file already exists
:rtype: None
"""
files = glob.glob(target)
if len(files) >= 1:
fre_logger.debug("%s has %s files", target, len(files))
input_files = glob.glob(source)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the term source is being used too often and too inconsistently. Here, source is really just a string input to glob, which uses it to find "source files".

Thus, if anything, input_files is actually the source's, not the input to glob

if len(input_files) >= 1:
fre_logger.debug(f"'{source}' has {len(input_files)} files")
else:
raise FileNotFoundError(f"target={target} resolves to no files")
raise FileNotFoundError(f"'{source}' resolves to no files")
if Path(target).exists():
raise FileExistsError(f"Output file '{target}' already exists")

ds = xr.open_mfdataset(input_files, compat='override', coords='minimal')

Check warning on line 64 in fre/app/generate_time_averages/combine.py

View workflow job for this annotation

GitHub Actions / spellcheck

Unknown word (mfdataset)
ds.to_netcdf(target, unlimited_dims=['time'])


def combine( root_in_dir: str,
Expand Down Expand Up @@ -107,20 +116,13 @@
if frequency == 'yr':
source = component + '.' + date_string + '.*.nc'
target = component + '.' + date_string + '.nc'
check_glob(source)
subprocess.run(['cdo', '-O', 'merge', source, target], check=True)
merge_netcdfs(source, target)
fre_logger.debug("Output file created: %s", target)
fre_logger.debug("Copying to %s", outdir)
subprocess.run(['cp', '-v', target, outdir], check=True)
elif frequency == 'mon':
for month_int in range(1,13):
source = f"{component}.{date_string}.*.{month_int:02d}.nc"
target = f"{component}.{date_string}.{month_int:02d}.nc"
check_glob(source)

# does there exist a python-cdo way of doing the merge?
subprocess.run(['cdo', '-O', 'merge', source, target], check=True)
fre_logger.debug("Output file created: %s", target)
fre_logger.debug("Copying to %s", outdir)

merge_netcdfs(source, target)
subprocess.run(['cp', '-v', target, outdir], check=True)
2 changes: 1 addition & 1 deletion fre/app/mask_atmos_plevel/mask_atmos_plevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def mask_atmos_plevel_subtool(infile: str = None,

fre_logger.info("Finished processing %s, wrote %s, pressure_mask is True", var, masked_var)
else:
fre_logger.debug("Not processing %s, no pressure_mask attr, nor _unmsk in the variable name", var)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why would we hide an error message that clearly explains the code's behavior?

fre_logger.debug("Not processing '%s', no 'pressure_mask' attribute", var)

fre_logger.info('Write the output file if any unmasked variables were masked')
if ds_out.variables:
Expand Down
11 changes: 9 additions & 2 deletions fre/app/regrid_xy/regrid_xy.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,11 +326,18 @@ def regrid_xy(yamlfile: str,
datadict["remap_dir"] = remap_dir
datadict["input_date"] = input_date[:8]

# add temporal and static history files
components = []
for component in yamldict["postprocess"]["components"]:
for this_source in component["sources"]:
if this_source["history_file"] == source:
for temporal_history in component["sources"]:
if temporal_history["history_file"] == source:
components.append(component)
try:
for static_history in component["static"]:
if static_history["source"] == source:
components.append(component)
except KeyError:
pass
Comment on lines +339 to +340
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

silent passing drives people (including me) nuts! maybe:

Suggested change
except KeyError:
pass
except KeyError:
fre_logger.warning( ... )
pass


# submit fregrid job for each component
for component in components:
Expand Down
7 changes: 7 additions & 0 deletions fre/app/regrid_xy/tests/generate_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,13 @@ def make_data():
history_file = source["history_file"]
for i in range(1, ntiles+1):
dataset.to_netcdf(f"{input_dir}/{date}.{history_file}.tile{i}.nc")
try:
for static_source in component["static"]:
history_file = static_source["source"]
for i in range(1, ntiles+1):
dataset.to_netcdf(f"{input_dir}/{date}.{history_file}.tile{i}.nc")
except KeyError:
pass
Comment on lines +155 to +161
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

try/except with a silent pass in a pytest set-up function seems ill-advised and likely to spawn silent problems. given that this is only run for testing, we should eschew try/except to cut down on the number of branching possibilities, and add an assert to enforce what this block is supposed to do for test setup



def make_all():
Expand Down
30 changes: 28 additions & 2 deletions fre/app/regrid_xy/tests/test_regrid_xy.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@

components = []
pp_input_files = [{"history_file":"pemberley"}, {"history_file":"longbourn"}]
pp_input_static_files = [{"source": "my_static_history"}, {"source": "my_static2"}]
components.append({"xyInterp": f"{nxy},{nxy}",
"interpMethod": "conserve_order2",
"inputRealm": "atmos",
"type": f"pride_and_prejudice",
"sources": pp_input_files,
"postprocess_on": True}
"postprocess_on": True,
"static": pp_input_static_files}
)
emma_input_files = [{"history_file":"hartfield"}, {"history_file":"donwell_abbey"}]
components.append({"xyInterp": f"{nxy},{nxy}",
Expand Down Expand Up @@ -91,10 +93,21 @@ def test_regrid_xy():
source=source,
input_date=date+"TTTT")

# regrid the static inputs
for static_source_dict in pp_input_static_files:
source = static_source_dict['source']
regrid_xy.regrid_xy(yamlfile=str(yamlfile),
input_dir=str(input_dir),
output_dir=str(output_dir),
work_dir=str(work_dir),
remap_dir=str(remap_dir),
source=source,
input_date=date+"TTTT")

#check answers
output_subdir = output_dir/f"{nxy}_{nxy}.conserve_order2"
for source_dict in pp_input_files + emma_input_files:
# Files are now output to a subdirectory based on grid size and interpolation method
output_subdir = output_dir/f"{nxy}_{nxy}.conserve_order2"
outfile = output_subdir/f"{date}.{source_dict['history_file']}.nc"

test = xr.load_dataset(outfile)
Expand All @@ -108,6 +121,19 @@ def test_regrid_xy():
assert np.all(test["darcy"].values==np.float64(2.0))
assert np.all(test["wins"].values==np.float64(3.0))

for static_source_dict in pp_input_static_files:
outfile = output_subdir/f"{date}.{static_source_dict['source']}.nc"
test = xr.load_dataset(outfile)

assert "wet_c" not in test
assert "mister" in test
assert "darcy" in test
assert "wins" in test

assert np.all(test["mister"].values==np.float64(1.0))
assert np.all(test["darcy"].values==np.float64(2.0))
assert np.all(test["wins"].values==np.float64(3.0))
Comment on lines +124 to +135
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how many static test cases are there? can we be more careful/explicit with checking the static case? We're overloading the variable test here... within a pytest test routine, at a different scope level. Gives me anxiety!


#check answers, these shouldn't have been regridded
for source_dict in here_input_files:
ifile = source_dict["history_file"]
Expand Down
Loading