From 5f0948360da1b6f55af029d0c8080fc9173042d3 Mon Sep 17 00:00:00 2001 From: Kieran Ricardo Date: Mon, 9 Feb 2026 14:22:32 +1100 Subject: [PATCH 1/3] update ocean archiving to handle monthly single variable files --- scripts/cleanup_output/cleanup_output.py | 93 +++--------------------- 1 file changed, 11 insertions(+), 82 deletions(-) diff --git a/scripts/cleanup_output/cleanup_output.py b/scripts/cleanup_output/cleanup_output.py index 1d253c4..776d763 100755 --- a/scripts/cleanup_output/cleanup_output.py +++ b/scripts/cleanup_output/cleanup_output.py @@ -124,93 +124,22 @@ def move_atmos(year, share_dir, atmosphere_archive_dir): def move_ocean(year, work_dirs, ocean_archive_dir): # Move static ocean file - static_file = "access-cm3.mom6.h.static.nc" - if not (ocean_archive_dir / static_file).is_file(): - shutil.copy2(work_dirs[0] / static_file, ocean_archive_dir / static_file) + file_pattern = rf"access-((cm3)|(om3)).mom6.static.nc" + + for file in os.listdir(work_dirs[0]): + if re.match(file_pattern, file): + shutil.copy2(work_dirs[0] / file, ocean_archive_dir / file.replace('om3', 'cm3')) # Process non-statc files: # - Concatenate into years - # - Separate non-1d vars into individual files - # - 1D variables combined into single file - file_patterns = { - "native": rf"access-cm3\.mom6.h\.native_{year}_([0-9]{{2}})\.nc", - # "sfc": rf"access-cm3\.mom6\.h\.sfc_{year}_([0-9]{{2}})\.nc", - "z": rf"access-cm3\.mom6\.h\.z_{year}_([0-9]{{2}})\.nc", - "rho2": rf"access-cm3\.mom6\.h\.rho2_{year}_([0-9]{{2}})\.nc", - } - for output_type, pattern in file_patterns.items(): - matches = [] - for dir in work_dirs: - for file in os.listdir(dir): - if re.match(pattern, file): - filepath = dir / file - matches.append(filepath) + file_pattern = rf"access-((om3)|(cm3))\.mom6\.((2d)|(3d)|(scalar)).*\.nc" - # Sanity check - if (matches != []) and (len(matches) != 12): - raise FileNotFoundError( - f"Only {len(matches)} file found for pattern {pattern}" - ) + for file in os.listdir(work_dirs[0]): + if re.match(file_pattern, file): + out_filepath = ocean_archive_dir / re.sub(r'_(?=\d{4})', '', file).replace('om3', 'cm3') - # Concatenate all files matching the current pattern - working_file = xr.open_mfdataset(matches, - decode_times=False, - preprocess=to_proleptic) - - # File wide attributes - frequency = frequency = get_frequency(working_file.time) - data_years = working_file["time.year"] - check_year(year, data_years) - - scalar_fields = [] - groups_to_save = [] - # Loop through variables in dataset, saving each one to file - for var_name in working_file: - if var_name in AUX_VARS: - continue - - single_var_da = working_file[var_name] - - dim_label = get_ndims(single_var_da.dims) - if output_type == "z": - dim_label = f"{dim_label}_z" - elif output_type == "rho2": - dim_label = f"{dim_label}_rho2" - - reduction_method = parse_cell_methods( - single_var_da.attrs["cell_methods"] - )["time"] - - # Handle scalar fields separately - if is_scalar_var(single_var_da.dims): - scalar_fields.append(var_name) - continue - - file_name = set_ocn_file_name(dim_label, - var_name, - frequency, - reduction_method, - year) - file_path = ocean_archive_dir / file_name - single_var_ds = working_file[[var_name] + AUX_VARS] - - groups_to_save.append((single_var_ds, file_path)) - - # Generate file name for scalar variables - if scalar_fields: - scalar_file_name = set_scalar_name(working_file, scalar_fields, frequency, year) - scalar_ds = working_file[scalar_fields + AUX_VARS] - groups_to_save.append((scalar_ds, ocean_archive_dir / scalar_file_name)) - - # Save files in parallel - datasets, filepaths = zip(*groups_to_save) - for path in filepaths: - check_exists(path) - print("Saving ocean variables") - - for dataset, filepath in groups_to_save: - dataset.load().to_netcdf(filepath) - # xr.save_mfdataset(datasets, filepaths) + dataset = xr.open_mfdataset([folder / file for folder in work_dirs], decode_times=False, preprocess=to_proleptic) + dataset.load().to_netcdf(out_filepath) def is_scalar_var(dims): From fda5cef7d4f1eabee8c57868617ba61ce0db6950 Mon Sep 17 00:00:00 2001 From: Kieran Ricardo Date: Mon, 9 Feb 2026 14:50:19 +1100 Subject: [PATCH 2/3] update expected number of ocean files --- scripts/cleanup_output/check-archive-run.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/cleanup_output/check-archive-run.sh b/scripts/cleanup_output/check-archive-run.sh index 053de4f..c78fbd4 100644 --- a/scripts/cleanup_output/check-archive-run.sh +++ b/scripts/cleanup_output/check-archive-run.sh @@ -1,7 +1,7 @@ -start_year=2031 -end_year=2040 +start_year=1981 +end_year=1981 -ARCHIVE_DIR=/g/data/zv30/non-cmip/ACCESS-CM3/cm3-run-11-08-2025-25km-beta-om3-new-um-params-continued +ARCHIVE_DIR=/g/data/zv30/non-cmip/ACCESS-CM3/cm3-run-20-01-2026-om3-update for year in $(seq $start_year $end_year) do @@ -18,7 +18,7 @@ do fi nfiles=$(ls -l $ARCHIVE_DIR/archive/$year/ocean | wc -l) - if [ "$nfiles" -ne "97" ]; then + if [ "$nfiles" -ne "86" ]; then echo "MOM number of files incorrect" fi done \ No newline at end of file From 9466f8a70f7cf580e4baa1161387f922cf10d0dc Mon Sep 17 00:00:00 2001 From: Kieran Ricardo Date: Tue, 17 Mar 2026 16:26:18 +1100 Subject: [PATCH 3/3] remove unused functions --- scripts/cleanup_output/cleanup_output.py | 94 ------------------------ 1 file changed, 94 deletions(-) diff --git a/scripts/cleanup_output/cleanup_output.py b/scripts/cleanup_output/cleanup_output.py index 776d763..8f3b0c8 100755 --- a/scripts/cleanup_output/cleanup_output.py +++ b/scripts/cleanup_output/cleanup_output.py @@ -142,100 +142,6 @@ def move_ocean(year, work_dirs, ocean_archive_dir): dataset.load().to_netcdf(out_filepath) -def is_scalar_var(dims): - return ("scalar_axis" in dims) - - -def get_ndims(dims): - non_time_dims = [dim for dim in dims - if dim != "time"] - return f"{len(non_time_dims)}d" - - -def get_frequency(times): - """ - Find whether variable frequency is daily or monthly - """ - time_deltas = [(times[i+1] - times[i]).astype('timedelta64[D]') for i in range(len(times) - 1)] - - if all([delta in MONTH_LENGTHS for delta in time_deltas]): - frequency = "1mon" - elif all([delta == np.timedelta64(1,'D') for delta in time_deltas]): - frequency = "1day" - else: - raise RuntimeError( - f"Unable to extract frequency from times {times}" - ) - return frequency - - -def check_year(year, da_years): - if all([da_year == year for da_year in da_years]): - return - else: - raise ValueError( - f"Data years {da_years} do not all match specified year {year}." - ) - - -def set_scalar_name(ds, scalar_vars, frequency, year): - """ - Set file name for scalar output. - """ - reduction_methods = [ - parse_cell_methods(ds[var].attrs["cell_methods"])["time"] - for var in scalar_vars - ] - if len(set(reduction_methods)) == 1: - reduction_method = reduction_methods[0] - else: - raise RuntimeError( - f"Require single reduction method. Instead recieved {reduction_methods}" - ) - - name = f"access-cm3.mom6.scalar.{frequency}.{reduction_method}.{year}.nc" - return name - - -def set_ocn_file_name(ndims, - field_name, - frequency, - reduction_method, - year): - """ - Set the file name for a single variable. - File names follow format: - 'access-cm3.mom6.h......nc - """ - return( - f"access-cm3.mom6.{ndims}.{field_name}.{frequency}.{reduction_method}.{year}.nc" - ) - - -def parse_cell_methods(methods_string): - """ - Return each cell method from a string of form - 'area:mean yh:mean xh:mean time: mean' - """ - pattern = r"(.+?:\s?[a-z]+)" - method_list = re.findall(pattern, methods_string) - # Strip any whitespace - method_list = ["".join(method.split()) for method in method_list] - - method_pattern = r"(.+):([a-z]+)" - - cell_methods = {} - for method_str in method_list: - match = re.match(method_pattern, method_str) - if not match: - raise RuntimeError("Failed to parse cell methods") - dim = match[1] - method = match[2] - cell_methods[dim] = method - - return cell_methods - - def move_ice(work_dirs, ice_archive_dir): pattern = r"access-cm3\.cice\.1mon\.(?!r)" datasets = []