Skip to content

Commit 0c2089a

Browse files
authored
Updating compute budget on JURECA/JSC (hwai) to the active one (#148)
* Update simple_jureca.sbatch Updating compute budget account to the active one, westai0066 * Update process_shards_jureca.sbatch Updating compute budget on hwai to the active one, westai0066 * Update SETUP_JURECA.md Updating compute budget on hwai to the active one, westai0066
1 parent ce5cea9 commit 0c2089a

3 files changed

Lines changed: 5 additions & 5 deletions

File tree

eval/distributed/SETUP_JURECA.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ huggingface-cli download mlfoundations-dev/evalset_2870 --repo-type dataset
8888
huggingface-cli download open-thoughts/OpenThinker-7B
8989
9090
# Request an interactive node for testing
91-
salloc --nodes=1 --ntasks-per-node=1 --gres=gpu:1 --cpus-per-task=12 -p dc-hwai -A westai0007
91+
salloc --nodes=1 --ntasks-per-node=1 --gres=gpu:1 --cpus-per-task=12 -p dc-hwai -A westai0066
9292
9393
# Verify GPU is available
9494
srun bash -c 'nvidia-smi'

eval/distributed/process_shards_jureca.sbatch

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#SBATCH --gres=gpu:1
66
#SBATCH --time=01:00:00
77
#SBATCH --cpus-per-task=12
8-
#SBATCH --account=westai0007
8+
#SBATCH --account=westai0066
99
#SBATCH --partition=dc-hwai
1010

1111
# ENVIRONMENT VARIABLES
@@ -23,4 +23,4 @@ export OUTPUT_DATASET="$DCFT_DATA/evalchemy_results/${MODEL_NAME##*--}_${INPUT_D
2323

2424
# RUN SHARDED INFERENCE
2525
srun echo -e "GLOBAL_SIZE: ${GLOBAL_SIZE}\nRANK: ${RANK}\nMODEL: ${MODEL_NAME}\nINPUT_DATASET: ${INPUT_DATASET}\nOUTPUT_DATASET: ${OUTPUT_DATASET}"
26-
srun python $EVALCHEMY/eval/distributed/process_shard.py --global_size ${GLOBAL_SIZE} --rank ${RANK} --input_dataset ${INPUT_DATASET} --model_name ${MODEL_NAME} --output_dataset ${OUTPUT_DATASET}
26+
srun python $EVALCHEMY/eval/distributed/process_shard.py --global_size ${GLOBAL_SIZE} --rank ${RANK} --input_dataset ${INPUT_DATASET} --model_name ${MODEL_NAME} --output_dataset ${OUTPUT_DATASET}

eval/distributed/simple_jureca.sbatch

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#SBATCH --gres=gpu:4
55
#SBATCH --time={time_limit}
66
#SBATCH --cpus-per-task=12
7-
#SBATCH --account=westai0007
7+
#SBATCH --account=westai0066
88
#SBATCH --partition=dc-hwai
99
#SBATCH --job-name={job_name}
1010
#SBATCH --output={logs_dir}/%x_%j.out
@@ -25,4 +25,4 @@ OUTPUT_DATASET={output_dataset}
2525

2626
# RUN SHARDED INFERENCE
2727
srun --output={logs_dir}/%x_%j_%t.out bash -c 'echo -e "GLOBAL_SIZE: ${SLURM_STEP_NUM_TASKS}\nRANK: ${SLURM_PROCID}\nMODEL: '$MODEL_NAME'\nINPUT_DATASET: '$INPUT_DATASET'\nOUTPUT_DATASET: '$OUTPUT_DATASET'"'
28-
srun --output={logs_dir}/%x_%j_%t.out bash -c 'CUDA_VISIBLE_DEVICES=${SLURM_LOCALID} python $EVALCHEMY/eval/distributed/process_shard.py --global_size ${SLURM_STEP_NUM_TASKS} --rank ${SLURM_PROCID} --input_dataset '${INPUT_DATASET}' --model_name '${MODEL_NAME}' --output_dataset '${OUTPUT_DATASET}''
28+
srun --output={logs_dir}/%x_%j_%t.out bash -c 'CUDA_VISIBLE_DEVICES=${SLURM_LOCALID} python $EVALCHEMY/eval/distributed/process_shard.py --global_size ${SLURM_STEP_NUM_TASKS} --rank ${SLURM_PROCID} --input_dataset '${INPUT_DATASET}' --model_name '${MODEL_NAME}' --output_dataset '${OUTPUT_DATASET}''

0 commit comments

Comments
 (0)