CLAMP/visuohaptic_experiments.sub at release · empriselab/CLAMP · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/bin/bash
#SBATCH --job-name=haptic_experiments       # Job name
#SBATCH --requeue
#SBATCH --output=slurm/slurm_output_%A_%a.out  # Output log (%A=job ID, %a=array task ID)
#SBATCH --error=slurm/slurm_error_%A_%a.err    # Error log
#SBATCH --mail-type=ALL                       # Email notifications
#SBATCH --mail-user=pnt8@cornell.edu          # Email address
#SBATCH --array=0-3                             # Array index; modify range as needed
#SBATCH --get-user-env                        # Retrieve user's environment
#SBATCH --cpus-per-task=8
#SBATCH --mem=24G
#SBATCH --gres=gpu:a6000:1
#SBATCH --partition=gpu
#SBATCH --time=100:00:00

export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
export SLURM_ARRAY_TASK_ID=${SLURM_ARRAY_TASK_ID:-0}

experiment_name="KL divergence test (output is target distribution)"

# Source Conda's initialization script
source "$(conda info --base)/etc/profile.d/conda.sh"
conda activate hd_learn2

# Define hyperparameter options
learning_rates=(1e-5)
batch_sizes=(64)
filters=(256)
epochs=(100)
lambda_kl=(0 0.01 0.1 0.5)
seeds=(18)

# Compute number of options for each hyperparameter
num_lr=${#learning_rates[@]}
num_bs=${#batch_sizes[@]}
num_filters=${#filters[@]}
num_epochs=${#epochs[@]}
num_lambda_kl=${#lambda_kl[@]}
num_seeds=${#seeds[@]}

# Calculate total number of combinations
total_combinations=$(( num_lr * num_bs * num_filters * num_epochs * num_seeds ))

# Get the current SLURM array index
idx=$SLURM_ARRAY_TASK_ID

# Check that idx is within total combinations
if [ $idx -ge $total_combinations ]; then
    echo "SLURM_ARRAY_TASK_ID $idx exceeds total combinations $total_combinations."
    exit 1
fi

# Calculate indices for each hyperparameter combination using modulo arithmetic
lr_idx=$(( idx % num_lr ))
bs_idx=$(( (idx / (num_lr * num_wd)) % num_bs ))
filters_idx=$(( (idx / (num_lr * num_bs)) % num_filters ))
epochs_idx=$(( (idx / (num_lr * num_bs * num_filters)) % num_epochs ))
lambda_kl_idx=$(( (idx / (num_lr * num_bs * num_filters * num_epochs)) % num_lambda_kl ))
seed_idx=$(( (idx / (num_lr * num_bs * num_filters * num_epochs)) % num_seeds ))

# Extract hyperparameter values for this combination
lr=${learning_rates[$lr_idx]}
bs=${batch_sizes[$bs_idx]}
nf=${filters[$filters_idx]}
ep=${epochs[$epochs_idx]}
lambda=${lambda_kl[$lambda_kl_idx]}
seed=${seeds[$seed_idx]}

# Print selected hyperparameters for debugging
echo "Selected hyperparameters:"
echo "Learning Rate: $lr"
echo "Batch Size: $bs"
echo "Filters: $nf"
echo "Epochs: $ep"
echo "Lambda KL: $lambda"
echo "Seed: $seed"

# Run your Hydra-enabled Python script.
# Note: We escape the $ in hydra.job.num so that Hydra resolves it using the environment variable.
python learning/visuohaptic_learner.py \
  'hydra.job.num=${env:SLURM_ARRAY_TASK_ID,0}' \
  experiment_name="${experiment_name}" \
  lr="${lr}" \
  batch_size="${bs}" \
  nf="${nf}" \
  epochs="${ep}" \
  lambda_kl="${lambda}" \
  seed="${seed}"