From 5f0948360da1b6f55af029d0c8080fc9173042d3 Mon Sep 17 00:00:00 2001
From: Kieran Ricardo <kieran.ricardo@anu.edu.au>
Date: Mon, 9 Feb 2026 14:22:32 +1100
Subject: [PATCH 1/3] update ocean archiving to handle monthly single variable
 files

---
 scripts/cleanup_output/cleanup_output.py | 93 +++---------------------
 1 file changed, 11 insertions(+), 82 deletions(-)

diff --git a/scripts/cleanup_output/cleanup_output.py b/scripts/cleanup_output/cleanup_output.py
index 1d253c4..776d763 100755
--- a/scripts/cleanup_output/cleanup_output.py
+++ b/scripts/cleanup_output/cleanup_output.py
@@ -124,93 +124,22 @@ def move_atmos(year, share_dir, atmosphere_archive_dir):
 
 def move_ocean(year, work_dirs, ocean_archive_dir):
     # Move static ocean file
-    static_file = "access-cm3.mom6.h.static.nc"
-    if not (ocean_archive_dir / static_file).is_file():
-        shutil.copy2(work_dirs[0] / static_file, ocean_archive_dir / static_file)
+    file_pattern = rf"access-((cm3)|(om3)).mom6.static.nc"
+
+    for file in os.listdir(work_dirs[0]):
+        if re.match(file_pattern, file):
+            shutil.copy2(work_dirs[0] / file, ocean_archive_dir / file.replace('om3', 'cm3'))
 
     # Process non-statc files:
     # - Concatenate into years
-    # - Separate non-1d vars into individual files
-    # - 1D variables combined into single file
-    file_patterns = {
-        "native": rf"access-cm3\.mom6.h\.native_{year}_([0-9]{{2}})\.nc",
-        # "sfc": rf"access-cm3\.mom6\.h\.sfc_{year}_([0-9]{{2}})\.nc",
-        "z": rf"access-cm3\.mom6\.h\.z_{year}_([0-9]{{2}})\.nc",
-        "rho2": rf"access-cm3\.mom6\.h\.rho2_{year}_([0-9]{{2}})\.nc",
-    }
-    for output_type, pattern in file_patterns.items():
-        matches = []
-        for dir in work_dirs:
-            for file in os.listdir(dir):
-                if re.match(pattern, file):
-                    filepath = dir / file
-                    matches.append(filepath)
+    file_pattern = rf"access-((om3)|(cm3))\.mom6\.((2d)|(3d)|(scalar)).*\.nc"
 
-        # Sanity check
-        if (matches != []) and (len(matches) != 12):
-            raise FileNotFoundError(
-                f"Only {len(matches)} file found for pattern {pattern}"
-            )
+    for file in os.listdir(work_dirs[0]):
+        if re.match(file_pattern, file):
+            out_filepath = ocean_archive_dir / re.sub(r'_(?=\d{4})', '', file).replace('om3', 'cm3')
 
-        # Concatenate all files matching the current pattern
-        working_file = xr.open_mfdataset(matches,
-                                         decode_times=False,
-                                         preprocess=to_proleptic)
-
-        # File wide attributes
-        frequency = frequency = get_frequency(working_file.time)
-        data_years = working_file["time.year"]
-        check_year(year, data_years)
-
-        scalar_fields = []
-        groups_to_save = []
-        # Loop through variables in dataset, saving each one to file
-        for var_name in working_file:
-            if var_name in AUX_VARS:
-                continue
-
-            single_var_da = working_file[var_name]
-
-            dim_label = get_ndims(single_var_da.dims)
-            if output_type == "z":
-                dim_label = f"{dim_label}_z"
-            elif output_type == "rho2":
-                dim_label = f"{dim_label}_rho2"
-
-            reduction_method = parse_cell_methods(
-                single_var_da.attrs["cell_methods"]
-            )["time"]
-
-            # Handle scalar fields separately
-            if is_scalar_var(single_var_da.dims):
-                scalar_fields.append(var_name)
-                continue
-
-            file_name = set_ocn_file_name(dim_label,
-                                          var_name,
-                                          frequency,
-                                          reduction_method,
-                                          year)
-            file_path = ocean_archive_dir / file_name
-            single_var_ds = working_file[[var_name] + AUX_VARS]
-
-            groups_to_save.append((single_var_ds, file_path))
-
-        # Generate file name for scalar variables
-        if scalar_fields:
-            scalar_file_name = set_scalar_name(working_file, scalar_fields, frequency, year)
-            scalar_ds = working_file[scalar_fields + AUX_VARS]
-            groups_to_save.append((scalar_ds, ocean_archive_dir / scalar_file_name))
-
-        # Save files in parallel
-        datasets, filepaths = zip(*groups_to_save)
-        for path in filepaths:
-            check_exists(path)
-        print("Saving ocean variables")
-
-        for dataset, filepath in groups_to_save:
-            dataset.load().to_netcdf(filepath)
-        # xr.save_mfdataset(datasets, filepaths)
+            dataset = xr.open_mfdataset([folder / file for folder in work_dirs], decode_times=False, preprocess=to_proleptic)
+            dataset.load().to_netcdf(out_filepath)
 
 
 def is_scalar_var(dims):

From fda5cef7d4f1eabee8c57868617ba61ce0db6950 Mon Sep 17 00:00:00 2001
From: Kieran Ricardo <kieran.ricardo@anu.edu.au>
Date: Mon, 9 Feb 2026 14:50:19 +1100
Subject: [PATCH 2/3] update expected number of ocean files

---
 scripts/cleanup_output/check-archive-run.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/cleanup_output/check-archive-run.sh b/scripts/cleanup_output/check-archive-run.sh
index 053de4f..c78fbd4 100644
--- a/scripts/cleanup_output/check-archive-run.sh
+++ b/scripts/cleanup_output/check-archive-run.sh
@@ -1,7 +1,7 @@
-start_year=2031
-end_year=2040
+start_year=1981
+end_year=1981
 
-ARCHIVE_DIR=/g/data/zv30/non-cmip/ACCESS-CM3/cm3-run-11-08-2025-25km-beta-om3-new-um-params-continued
+ARCHIVE_DIR=/g/data/zv30/non-cmip/ACCESS-CM3/cm3-run-20-01-2026-om3-update
 
 for year in $(seq $start_year $end_year)
 do
@@ -18,7 +18,7 @@ do
   fi
 
   nfiles=$(ls -l $ARCHIVE_DIR/archive/$year/ocean | wc -l)
-  if [ "$nfiles" -ne "97" ]; then
+  if [ "$nfiles" -ne "86" ]; then
     echo "MOM number of files incorrect"
   fi
 done
\ No newline at end of file

From 9466f8a70f7cf580e4baa1161387f922cf10d0dc Mon Sep 17 00:00:00 2001
From: Kieran Ricardo <kieran.ricardo@anu.edu.au>
Date: Tue, 17 Mar 2026 16:26:18 +1100
Subject: [PATCH 3/3] remove unused functions

---
 scripts/cleanup_output/cleanup_output.py | 94 ------------------------
 1 file changed, 94 deletions(-)

diff --git a/scripts/cleanup_output/cleanup_output.py b/scripts/cleanup_output/cleanup_output.py
index 776d763..8f3b0c8 100755
--- a/scripts/cleanup_output/cleanup_output.py
+++ b/scripts/cleanup_output/cleanup_output.py
@@ -142,100 +142,6 @@ def move_ocean(year, work_dirs, ocean_archive_dir):
             dataset.load().to_netcdf(out_filepath)
 
 
-def is_scalar_var(dims):
-    return ("scalar_axis" in dims)
-
-
-def get_ndims(dims):
-    non_time_dims = [dim for dim in dims
-                     if dim != "time"]
-    return f"{len(non_time_dims)}d"
-
-
-def get_frequency(times):
-    """
-    Find whether variable frequency is daily or monthly
-    """
-    time_deltas = [(times[i+1] - times[i]).astype('timedelta64[D]') for i in range(len(times) - 1)]
-
-    if all([delta in MONTH_LENGTHS for delta in time_deltas]):
-        frequency = "1mon"
-    elif all([delta == np.timedelta64(1,'D') for delta in time_deltas]):
-        frequency = "1day"
-    else:
-        raise RuntimeError(
-            f"Unable to extract frequency from times {times}"
-        )
-    return frequency
-
-
-def check_year(year, da_years):
-    if all([da_year == year for da_year in da_years]):
-        return
-    else:
-        raise ValueError(
-            f"Data years {da_years} do not all match specified year {year}."
-        )
-
-
-def set_scalar_name(ds, scalar_vars, frequency, year):
-    """
-    Set file name for scalar output.
-    """
-    reduction_methods = [
-        parse_cell_methods(ds[var].attrs["cell_methods"])["time"]
-        for var in scalar_vars
-    ]
-    if len(set(reduction_methods)) == 1:
-        reduction_method = reduction_methods[0]
-    else:
-        raise RuntimeError(
-            f"Require single reduction method. Instead recieved {reduction_methods}"
-        )
-
-    name = f"access-cm3.mom6.scalar.{frequency}.{reduction_method}.{year}.nc"
-    return name
-
-
-def set_ocn_file_name(ndims,
-                      field_name,
-                      frequency,
-                      reduction_method,
-                      year):
-    """
-    Set the file name for a single variable.
-    File names follow format:
-    'access-cm3.mom6.h.<dimension>.<field-name>.<frequency>.<reduction-method>.<year>.nc
-    """
-    return(
-        f"access-cm3.mom6.{ndims}.{field_name}.{frequency}.{reduction_method}.{year}.nc"
-    )
-
-
-def parse_cell_methods(methods_string):
-    """
-    Return each cell method from a string of form
-    'area:mean yh:mean xh:mean time: mean'
-    """
-    pattern = r"(.+?:\s?[a-z]+)"
-    method_list = re.findall(pattern, methods_string)
-    # Strip any whitespace
-    method_list = ["".join(method.split()) for method in method_list]
-
-    method_pattern = r"(.+):([a-z]+)"
-
-    cell_methods = {}
-    for method_str in method_list:
-        match = re.match(method_pattern, method_str)
-        if not match:
-            raise RuntimeError("Failed to parse cell methods")
-        dim = match[1]
-        method = match[2]
-        cell_methods[dim] = method
-
-    return cell_methods
-
-
 def move_ice(work_dirs, ice_archive_dir):
     pattern = r"access-cm3\.cice\.1mon\.(?!r)"
     datasets = []