-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsubmit_2gpu.sub
More file actions
22 lines (19 loc) · 898 Bytes
/
submit_2gpu.sub
File metadata and controls
22 lines (19 loc) · 898 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#!/bin/bash
#SBATCH --job-name=bml_proj2 # Job name
#SBATCH --output=logs/%x_%j.out # Standard output and error log
#SBATCH --error=logs/%x_%j.err # Error log
#SBATCH --gres=gpu:2 # Request one GPU
#SBATCH --partition=plgrid-gpu-a100 # Specify GPU partition
#SBATCH --cpus-per-gpu=8 # Number of CPUs per GPU
#SBATCH --mem=100G # Memory requested
#SBATCH --account=plgllmparamgr-gpu-a100 # Account name
#SBATCH --time=0-00:15:00 # Time limit (D-HH:MM:SS)
# Set up the environment
WORK_DIR="/home/j321m_a100/bml/bml_project2"
cd $WORK_DIR
# Create and activate the virtual environment
source bml/bin/activate
source .env
echo $NEPTUNE_PROJECT
# Run the Python script using torchrun
torchrun --nproc_per_node=2 main.py --dataset_path /local_storage_2/llm-random/datasets/c4_ --use_fsdp true