-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathauto_eval.sh
More file actions
executable file
·40 lines (34 loc) · 1.1 KB
/
auto_eval.sh
File metadata and controls
executable file
·40 lines (34 loc) · 1.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/usr/bin/env bash
set -euo pipefail
ROOT="/pscratch/sd/k/kas7897/dCLIMAD_BA"
BASE_DIR="$ROOT/outputs/spatial2_Adam_harmonic0/jobs_LOCAspatioTempConv1d"
# Detect number of GPUs
NUM_GPUS=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
echo "Detected $NUM_GPUS GPUs."
# 1. Run model selector
bash "$ROOT/run_model_selector.sh"
# 2. For each model, extract best trial info and run eval_exp.py on a different GPU
gpu=0
pids=()
for model in "$BASE_DIR"/*-livneh; do
out_json="$model/demo_select_livneh.json"
if [[ -f "$out_json" ]]; then
run_id=$(jq -r '.best.run_id' "$out_json")
best_epoch=$(jq -r '.best.best_epoch' "$out_json")
echo "[eval] $model: run_id=$run_id, epoch=$best_epoch on GPU $gpu"
CUDA_VISIBLE_DEVICES=$gpu python "$ROOT/eval_exp.py" \
--run_id "$run_id" \
--testepoch "$best_epoch" \
--base_dir "$BASE_DIR" \
--test_period "1990,2014" \
--spatial_extent "02" &
pids+=($!)
gpu=$(( (gpu + 1) % NUM_GPUS ))
else
echo "No best trial found for $model"
fi
done
# Wait for all jobs to finish
for pid in "${pids[@]}"; do
wait $pid
done