From 88ac5b6d3cd24b75e8d7d449b09ba919f58f58a6 Mon Sep 17 00:00:00 2001 From: Stefan Poll Date: Thu, 26 Feb 2026 15:15:12 +0100 Subject: [PATCH 1/2] add compress_extract_nc-files.sh script --- README.md | 5 ++ .../compress_extract_nc-files.sh | 86 +++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 datahandling_prepostprocessing_tsmp2/compress_extract_nc-files.sh diff --git a/README.md b/README.md index a71988d..7d0912e 100644 --- a/README.md +++ b/README.md @@ -30,3 +30,8 @@ These are scriptsi and the accompagnying software environment under `env/` (`sou - [convert_phead2volsoilmoist.py](ideal-fs/convert_phead2volsoilmoist.py): Python script convert pressure from ParFlow to volumetric soil moisture and relative saturation. +## [datahandling_prepostprocessing_tsmp2](datahandling_prepostprocessing_tsmp2/) + +Shell script(s) to handle tsmp2 data for pre- and postprocessing. + +- [compress_extract_nc-files.sh](datahandling_prepostprocessing_tsmp2/compress_extract_nc-files.sh): Shell script to extract or compress netcdf data. diff --git a/datahandling_prepostprocessing_tsmp2/compress_extract_nc-files.sh b/datahandling_prepostprocessing_tsmp2/compress_extract_nc-files.sh new file mode 100644 index 0000000..7000795 --- /dev/null +++ b/datahandling_prepostprocessing_tsmp2/compress_extract_nc-files.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash +#SBATCH --export=ALL +#SBATCH --account=slts +#SBATCH --partition=dc-cpu +#SBATCH --nodes=1 +#SBATCH --ntasks=128 +#SBATCH --job-name=nc-archive +#SBATCH --time=07:25:00 +#SBATCH --output=%x_%j.out +#SBATCH --error=%x_%j.err + +set -euo pipefail + +if [ "$#" -lt 2 ]; then + echo "Usage:" + echo " sbatch compress_extract_nc-files.sh compress folder*" + echo " sbatch compress_extract_nc-files.sh extract folder*.tar" + exit 1 +fi + +MODE="$1" +shift +INPUTS=("$@") + +MAX_PARALLEL=${SLURM_NTASKS:-1} + +# ---- timer start ---- +START_TIME=$(date +%s) +echo "Job started at: $(date)" +echo "Mode: $MODE" +echo "Parallel jobs: $MAX_PARALLEL" +echo "Inputs:" +printf " %s\n" "${INPUTS[@]}" + +case "$MODE" in + compress) + echo "Starting gzip step..." + + find "${INPUTS[@]}" -type f -name "*.nc" -print0 \ + | xargs -0 -n 1 -P "$MAX_PARALLEL" gzip + + echo "Gzip finished, starting tar step..." + + for dir in "${INPUTS[@]}"; do + absdir=$(realpath "$dir") + parentdir=$(dirname "$absdir") + basename_dir=$(basename "$absdir") + +# tar -cf "${dir}.tar" "$dir" + tar -C "$parentdir" -cf "$parentdir/${basename_dir}.tar" "$basename_dir" + done + + ;; + + extract) + echo "Starting untar step..." + + for tarfile in "${INPUTS[@]}"; do + abs_tar=$(realpath "$tarfile") + tar_dir=$(dirname "$abs_tar") + tar_base=$(basename "$abs_tar") + +# tar -xf "$tarfile" + tar -C "$tar_dir" -xf "$abs_tar" + done + + echo "Untar finished, starting gunzip step..." + + find . -type f -name "*.gz" -print0 \ + | xargs -0 -n 1 -P "$MAX_PARALLEL" gunzip + + ;; + + *) + echo "ERROR: Unknown mode '$MODE' (use compress or extract)" + exit 1 + ;; +esac + +# ---- timer end ---- +END_TIME=$(date +%s) +ELAPSED=$((END_TIME - START_TIME)) + +printf "Job finished at: %s\n" "$(date)" +printf "Total runtime: %02d:%02d:%02d (hh:mm:ss)\n" \ + $((ELAPSED/3600)) $((ELAPSED%3600/60)) $((ELAPSED%60)) From 162e0c89dd5c3bb6b977296a421967fafb510d00 Mon Sep 17 00:00:00 2001 From: Stefan Poll Date: Thu, 26 Feb 2026 15:37:16 +0100 Subject: [PATCH 2/2] adapt walltime and find path in compress_extract_nc-files.sh --- .../compress_extract_nc-files.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datahandling_prepostprocessing_tsmp2/compress_extract_nc-files.sh b/datahandling_prepostprocessing_tsmp2/compress_extract_nc-files.sh index 7000795..d864eeb 100644 --- a/datahandling_prepostprocessing_tsmp2/compress_extract_nc-files.sh +++ b/datahandling_prepostprocessing_tsmp2/compress_extract_nc-files.sh @@ -5,7 +5,7 @@ #SBATCH --nodes=1 #SBATCH --ntasks=128 #SBATCH --job-name=nc-archive -#SBATCH --time=07:25:00 +#SBATCH --time=01:25:00 #SBATCH --output=%x_%j.out #SBATCH --error=%x_%j.err @@ -66,7 +66,7 @@ case "$MODE" in echo "Untar finished, starting gunzip step..." - find . -type f -name "*.gz" -print0 \ + find . -type f -name "*.nc.gz" -print0 \ | xargs -0 -n 1 -P "$MAX_PARALLEL" gunzip ;;