File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -88,7 +88,7 @@ huggingface-cli download mlfoundations-dev/evalset_2870 --repo-type dataset
8888huggingface-cli download open-thoughts/OpenThinker-7B
8989
9090# Request an interactive node for testing
91- salloc --nodes=1 --ntasks-per-node=1 --gres=gpu:1 --cpus-per-task=12 -p dc-hwai -A westai0007
91+ salloc --nodes=1 --ntasks-per-node=1 --gres=gpu:1 --cpus-per-task=12 -p dc-hwai -A westai0066
9292
9393# Verify GPU is available
9494srun bash -c 'nvidia-smi'
Original file line number Diff line number Diff line change 55# SBATCH --gres=gpu:1
66# SBATCH --time=01:00:00
77# SBATCH --cpus-per-task=12
8- # SBATCH --account=westai0007
8+ # SBATCH --account=westai0066
99# SBATCH --partition=dc-hwai
1010
1111# ENVIRONMENT VARIABLES
@@ -23,4 +23,4 @@ export OUTPUT_DATASET="$DCFT_DATA/evalchemy_results/${MODEL_NAME##*--}_${INPUT_D
2323
2424# RUN SHARDED INFERENCE
2525srun echo -e " GLOBAL_SIZE: ${GLOBAL_SIZE} \nRANK: ${RANK} \nMODEL: ${MODEL_NAME} \nINPUT_DATASET: ${INPUT_DATASET} \nOUTPUT_DATASET: ${OUTPUT_DATASET} "
26- srun python $EVALCHEMY /eval/distributed/process_shard.py --global_size ${GLOBAL_SIZE} --rank ${RANK} --input_dataset ${INPUT_DATASET} --model_name ${MODEL_NAME} --output_dataset ${OUTPUT_DATASET}
26+ srun python $EVALCHEMY /eval/distributed/process_shard.py --global_size ${GLOBAL_SIZE} --rank ${RANK} --input_dataset ${INPUT_DATASET} --model_name ${MODEL_NAME} --output_dataset ${OUTPUT_DATASET}
Original file line number Diff line number Diff line change 44# SBATCH --gres=gpu:4
55# SBATCH --time={time_limit}
66# SBATCH --cpus-per-task=12
7- # SBATCH --account=westai0007
7+ # SBATCH --account=westai0066
88# SBATCH --partition=dc-hwai
99# SBATCH --job-name={job_name}
1010# SBATCH --output={logs_dir}/%x_%j.out
@@ -25,4 +25,4 @@ OUTPUT_DATASET={output_dataset}
2525
2626# RUN SHARDED INFERENCE
2727srun --output={logs_dir}/%x_%j_%t.out bash -c ' echo -e "GLOBAL_SIZE: ${SLURM_STEP_NUM_TASKS}\nRANK: ${SLURM_PROCID}\nMODEL: ' $MODEL_NAME ' \nINPUT_DATASET: ' $INPUT_DATASET ' \nOUTPUT_DATASET: ' $OUTPUT_DATASET ' "'
28- srun --output={logs_dir}/%x_%j_%t.out bash -c ' CUDA_VISIBLE_DEVICES=${SLURM_LOCALID} python $EVALCHEMY/eval/distributed/process_shard.py --global_size ${SLURM_STEP_NUM_TASKS} --rank ${SLURM_PROCID} --input_dataset ' ${INPUT_DATASET} ' --model_name ' ${MODEL_NAME} ' --output_dataset ' ${OUTPUT_DATASET} ' '
28+ srun --output={logs_dir}/%x_%j_%t.out bash -c ' CUDA_VISIBLE_DEVICES=${SLURM_LOCALID} python $EVALCHEMY/eval/distributed/process_shard.py --global_size ${SLURM_STEP_NUM_TASKS} --rank ${SLURM_PROCID} --input_dataset ' ${INPUT_DATASET} ' --model_name ' ${MODEL_NAME} ' --output_dataset ' ${OUTPUT_DATASET} ' '
You can’t perform that action at this time.
0 commit comments