diff --git a/apps/grpo/qwen3_32b.yaml b/apps/grpo/qwen3_32b.yaml index 639f6669e..e32dbd8b7 100644 --- a/apps/grpo/qwen3_32b.yaml +++ b/apps/grpo/qwen3_32b.yaml @@ -4,7 +4,7 @@ # Global configuration group_size: 16 -local_batch_size: 32 # per-device batch size +local_batch_size: 2 # per-device batch size max_req_tokens: 1024 max_res_tokens: 1024 model: "Qwen/Qwen3-32B" diff --git a/apps/grpo/qwen3_8b.yaml b/apps/grpo/qwen3_8b.yaml index a2815c5c0..50ed08b68 100644 --- a/apps/grpo/qwen3_8b.yaml +++ b/apps/grpo/qwen3_8b.yaml @@ -12,6 +12,7 @@ off_by_n: 1 # Off by one by default # Observability configuration metric_logging: wandb: + entity: agentic-models project: grpo-training group: grpo_exp_${oc.env:USER} logging_mode: global_reduce # global_reduce, per_rank_reduce, per_rank_no_reduce diff --git a/src/forge/controller/launcher.py b/src/forge/controller/launcher.py index c40982634..e39955aa0 100644 --- a/src/forge/controller/launcher.py +++ b/src/forge/controller/launcher.py @@ -136,14 +136,19 @@ async def get_allocator(self, name: str, num_hosts: int) -> tuple[Any, Any, str] for role in appdef.roles: # Note - this is hardcoded to SLURM # We got this with sinfo - role.resource.memMB = 2062607 - role.resource.cpu = 128 + role.resource.memMB = 2047962 + role.resource.cpu = 192 role.resource.gpu = 8 # Note - we cannot add in an empty workspace, so we create a fake temporary one temp_workspace = tempfile.mkdtemp(prefix="forge_workspace_") server_config = Config( scheduler="slurm", + scheduler_args={ + "account": "agentic-models", + "qos": "h100_lowest", + "time": "72:00:00" + }, appdef=appdef, workspace=monarch.tools.config.workspace.Workspace(dirs=[temp_workspace]), ) diff --git a/submit_grpo.sh b/submit_grpo.sh new file mode 100755 index 000000000..d99730ea2 --- /dev/null +++ b/submit_grpo.sh @@ -0,0 +1,24 @@ +#!/bin/bash +#SBATCH --job-name=grpo-qwen3-32b +#SBATCH --qos=h200_agentic-models_high +#SBATCH --account=agentic-models +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --gpus-per-node=8 +#SBATCH --cpus-per-task=128 +#SBATCH --mem=500G +#SBATCH --time=72:00:00 + +echo "Starting GRPO training job" + +eval "$(conda shell.bash hook)" + +conda activate forge + +export TORCH_COMPILE_DISABLE=1 +unset SLURM_MEM_PER_CPU SLURM_MEM_PER_GPU SLURM_MEM_PER_NODE +export TORCHSTORE_RDMA_ENABLED=0 + +cd /storage/home/daniellepintz/torchforge + +python -m apps.grpo.main --config apps/grpo/qwen3_32b.yaml