Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions scripts/cleanup_output/check-archive-run.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
start_year=2031
end_year=2040
start_year=1981
end_year=1981

ARCHIVE_DIR=/g/data/zv30/non-cmip/ACCESS-CM3/cm3-run-11-08-2025-25km-beta-om3-new-um-params-continued
ARCHIVE_DIR=/g/data/zv30/non-cmip/ACCESS-CM3/cm3-run-20-01-2026-om3-update

for year in $(seq $start_year $end_year)
do
Expand All @@ -18,7 +18,7 @@ do
fi

nfiles=$(ls -l $ARCHIVE_DIR/archive/$year/ocean | wc -l)
if [ "$nfiles" -ne "97" ]; then
if [ "$nfiles" -ne "86" ]; then
echo "MOM number of files incorrect"
fi
done
187 changes: 11 additions & 176 deletions scripts/cleanup_output/cleanup_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,187 +124,22 @@ def move_atmos(year, share_dir, atmosphere_archive_dir):

def move_ocean(year, work_dirs, ocean_archive_dir):
# Move static ocean file
static_file = "access-cm3.mom6.h.static.nc"
if not (ocean_archive_dir / static_file).is_file():
shutil.copy2(work_dirs[0] / static_file, ocean_archive_dir / static_file)
file_pattern = rf"access-((cm3)|(om3)).mom6.static.nc"

for file in os.listdir(work_dirs[0]):
if re.match(file_pattern, file):
shutil.copy2(work_dirs[0] / file, ocean_archive_dir / file.replace('om3', 'cm3'))

# Process non-statc files:
# - Concatenate into years
# - Separate non-1d vars into individual files
# - 1D variables combined into single file
file_patterns = {
"native": rf"access-cm3\.mom6.h\.native_{year}_([0-9]{{2}})\.nc",
# "sfc": rf"access-cm3\.mom6\.h\.sfc_{year}_([0-9]{{2}})\.nc",
"z": rf"access-cm3\.mom6\.h\.z_{year}_([0-9]{{2}})\.nc",
"rho2": rf"access-cm3\.mom6\.h\.rho2_{year}_([0-9]{{2}})\.nc",
}
for output_type, pattern in file_patterns.items():
matches = []
for dir in work_dirs:
for file in os.listdir(dir):
if re.match(pattern, file):
filepath = dir / file
matches.append(filepath)

# Sanity check
if (matches != []) and (len(matches) != 12):
raise FileNotFoundError(
f"Only {len(matches)} file found for pattern {pattern}"
)

# Concatenate all files matching the current pattern
working_file = xr.open_mfdataset(matches,
decode_times=False,
preprocess=to_proleptic)

# File wide attributes
frequency = frequency = get_frequency(working_file.time)
data_years = working_file["time.year"]
check_year(year, data_years)

scalar_fields = []
groups_to_save = []
# Loop through variables in dataset, saving each one to file
for var_name in working_file:
if var_name in AUX_VARS:
continue

single_var_da = working_file[var_name]

dim_label = get_ndims(single_var_da.dims)
if output_type == "z":
dim_label = f"{dim_label}_z"
elif output_type == "rho2":
dim_label = f"{dim_label}_rho2"

reduction_method = parse_cell_methods(
single_var_da.attrs["cell_methods"]
)["time"]

# Handle scalar fields separately
if is_scalar_var(single_var_da.dims):
scalar_fields.append(var_name)
continue

file_name = set_ocn_file_name(dim_label,
var_name,
frequency,
reduction_method,
year)
file_path = ocean_archive_dir / file_name
single_var_ds = working_file[[var_name] + AUX_VARS]

groups_to_save.append((single_var_ds, file_path))

# Generate file name for scalar variables
if scalar_fields:
scalar_file_name = set_scalar_name(working_file, scalar_fields, frequency, year)
scalar_ds = working_file[scalar_fields + AUX_VARS]
groups_to_save.append((scalar_ds, ocean_archive_dir / scalar_file_name))

# Save files in parallel
datasets, filepaths = zip(*groups_to_save)
for path in filepaths:
check_exists(path)
print("Saving ocean variables")

for dataset, filepath in groups_to_save:
dataset.load().to_netcdf(filepath)
# xr.save_mfdataset(datasets, filepaths)


def is_scalar_var(dims):
return ("scalar_axis" in dims)


def get_ndims(dims):
non_time_dims = [dim for dim in dims
if dim != "time"]
return f"{len(non_time_dims)}d"
file_pattern = rf"access-((om3)|(cm3))\.mom6\.((2d)|(3d)|(scalar)).*\.nc"

for file in os.listdir(work_dirs[0]):
if re.match(file_pattern, file):
out_filepath = ocean_archive_dir / re.sub(r'_(?=\d{4})', '', file).replace('om3', 'cm3')

def get_frequency(times):
"""
Find whether variable frequency is daily or monthly
"""
time_deltas = [(times[i+1] - times[i]).astype('timedelta64[D]') for i in range(len(times) - 1)]

if all([delta in MONTH_LENGTHS for delta in time_deltas]):
frequency = "1mon"
elif all([delta == np.timedelta64(1,'D') for delta in time_deltas]):
frequency = "1day"
else:
raise RuntimeError(
f"Unable to extract frequency from times {times}"
)
return frequency


def check_year(year, da_years):
if all([da_year == year for da_year in da_years]):
return
else:
raise ValueError(
f"Data years {da_years} do not all match specified year {year}."
)


def set_scalar_name(ds, scalar_vars, frequency, year):
"""
Set file name for scalar output.
"""
reduction_methods = [
parse_cell_methods(ds[var].attrs["cell_methods"])["time"]
for var in scalar_vars
]
if len(set(reduction_methods)) == 1:
reduction_method = reduction_methods[0]
else:
raise RuntimeError(
f"Require single reduction method. Instead recieved {reduction_methods}"
)

name = f"access-cm3.mom6.scalar.{frequency}.{reduction_method}.{year}.nc"
return name


def set_ocn_file_name(ndims,
field_name,
frequency,
reduction_method,
year):
"""
Set the file name for a single variable.
File names follow format:
'access-cm3.mom6.h.<dimension>.<field-name>.<frequency>.<reduction-method>.<year>.nc
"""
return(
f"access-cm3.mom6.{ndims}.{field_name}.{frequency}.{reduction_method}.{year}.nc"
)


def parse_cell_methods(methods_string):
"""
Return each cell method from a string of form
'area:mean yh:mean xh:mean time: mean'
"""
pattern = r"(.+?:\s?[a-z]+)"
method_list = re.findall(pattern, methods_string)
# Strip any whitespace
method_list = ["".join(method.split()) for method in method_list]

method_pattern = r"(.+):([a-z]+)"

cell_methods = {}
for method_str in method_list:
match = re.match(method_pattern, method_str)
if not match:
raise RuntimeError("Failed to parse cell methods")
dim = match[1]
method = match[2]
cell_methods[dim] = method

return cell_methods
dataset = xr.open_mfdataset([folder / file for folder in work_dirs], decode_times=False, preprocess=to_proleptic)
dataset.load().to_netcdf(out_filepath)


def move_ice(work_dirs, ice_archive_dir):
Expand Down