From 47a850c150a366f7e1b2c8bae7d6dd752ff8e2b7 Mon Sep 17 00:00:00 2001
From: daniellepintz <daniellepintz@gmail.com>
Date: Thu, 20 Nov 2025 16:07:29 +0000
Subject: [PATCH 1/4] add slurm batch script

---
 apps/grpo/qwen3_8b.yaml |  1 +
 submit_grpo.sh          | 31 +++++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+)
 create mode 100755 submit_grpo.sh

diff --git a/apps/grpo/qwen3_8b.yaml b/apps/grpo/qwen3_8b.yaml
index a2815c5c0..50ed08b68 100644
--- a/apps/grpo/qwen3_8b.yaml
+++ b/apps/grpo/qwen3_8b.yaml
@@ -12,6 +12,7 @@ off_by_n: 1 # Off by one by default
 # Observability configuration
 metric_logging:
   wandb:
+    entity: agentic-models
     project: grpo-training
     group: grpo_exp_${oc.env:USER}
     logging_mode: global_reduce # global_reduce, per_rank_reduce, per_rank_no_reduce
diff --git a/submit_grpo.sh b/submit_grpo.sh
new file mode 100755
index 000000000..db44042dd
--- /dev/null
+++ b/submit_grpo.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+#SBATCH --job-name=grpo-qwen3-32b
+#SBATCH --qos=h200_agentic-models_high
+#SBATCH --account=agentic-models
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --gpus-per-node=8
+#SBATCH --cpus-per-task=128
+#SBATCH --mem=500G
+#SBATCH --time=72:00:00
+
+echo "Starting GRPO training job"
+
+# Initialize conda
+eval "$(conda shell.bash hook)"
+
+# Activate the conda environment (replace 'forge' with your actual environment name if different)
+conda activate forge
+
+# # Option 1: Set wandb API key (replace with your actual API key)
+# export "WANDB_API_KEY=4cf092866223040751bacd9b149cfd87304d19a2"
+
+# export WANDB_MODE=offline
+# export WANDB_DIR="/mnt/wsfuse/teamforge/wandb/$(cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 5 | head -n 1)"
+# mkdir -p "$WANDB_DIR"
+
+# Change to the torchforge directory
+cd /storage/home/daniellepintz/torchforge
+
+# Run the GRPO training
+srun python -m apps.grpo.main --config apps/grpo/qwen3_8b.yaml

From 9eef5f488146e390e537427eaedd3c1afd595528 Mon Sep 17 00:00:00 2001
From: daniellepintz <daniellepintz@gmail.com>
Date: Thu, 20 Nov 2025 16:10:51 +0000
Subject: [PATCH 2/4] update

---
 submit_grpo.sh | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/submit_grpo.sh b/submit_grpo.sh
index db44042dd..e64662b4a 100755
--- a/submit_grpo.sh
+++ b/submit_grpo.sh
@@ -11,21 +11,10 @@
 
 echo "Starting GRPO training job"
 
-# Initialize conda
 eval "$(conda shell.bash hook)"
 
-# Activate the conda environment (replace 'forge' with your actual environment name if different)
 conda activate forge
 
-# # Option 1: Set wandb API key (replace with your actual API key)
-# export "WANDB_API_KEY=4cf092866223040751bacd9b149cfd87304d19a2"
-
-# export WANDB_MODE=offline
-# export WANDB_DIR="/mnt/wsfuse/teamforge/wandb/$(cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 5 | head -n 1)"
-# mkdir -p "$WANDB_DIR"
-
-# Change to the torchforge directory
 cd /storage/home/daniellepintz/torchforge
 
-# Run the GRPO training
 srun python -m apps.grpo.main --config apps/grpo/qwen3_8b.yaml

From 6177f21c7ec78ea12391afd014ca716ccb1ed574 Mon Sep 17 00:00:00 2001
From: daniellepintz <daniellepintz@gmail.com>
Date: Fri, 21 Nov 2025 15:33:13 +0000
Subject: [PATCH 3/4] upd

---
 submit_grpo.sh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/submit_grpo.sh b/submit_grpo.sh
index e64662b4a..1f18ea034 100755
--- a/submit_grpo.sh
+++ b/submit_grpo.sh
@@ -15,6 +15,8 @@ eval "$(conda shell.bash hook)"
 
 conda activate forge
 
+export TORCHSTORE_RDMA_ENABLED=0
+
 cd /storage/home/daniellepintz/torchforge
 
-srun python -m apps.grpo.main --config apps/grpo/qwen3_8b.yaml
+srun python -m apps.grpo.main --config apps/grpo/qwen3_32b.yaml

From 797989bfb3e90d096225fc5967003ea1e7b75141 Mon Sep 17 00:00:00 2001
From: daniellepintz <daniellepintz@gmail.com>
Date: Sun, 23 Nov 2025 13:22:52 +0000
Subject: [PATCH 4/4] upd

---
 apps/grpo/qwen3_32b.yaml         | 2 +-
 src/forge/controller/launcher.py | 9 +++++++--
 submit_grpo.sh                   | 4 +++-
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/apps/grpo/qwen3_32b.yaml b/apps/grpo/qwen3_32b.yaml
index 639f6669e..e32dbd8b7 100644
--- a/apps/grpo/qwen3_32b.yaml
+++ b/apps/grpo/qwen3_32b.yaml
@@ -4,7 +4,7 @@
 
 # Global configuration
 group_size: 16
-local_batch_size: 32 # per-device batch size
+local_batch_size: 2 # per-device batch size
 max_req_tokens: 1024
 max_res_tokens: 1024
 model: "Qwen/Qwen3-32B"
diff --git a/src/forge/controller/launcher.py b/src/forge/controller/launcher.py
index c40982634..e39955aa0 100644
--- a/src/forge/controller/launcher.py
+++ b/src/forge/controller/launcher.py
@@ -136,14 +136,19 @@ async def get_allocator(self, name: str, num_hosts: int) -> tuple[Any, Any, str]
         for role in appdef.roles:
             # Note - this is hardcoded to SLURM
             # We got this with sinfo
-            role.resource.memMB = 2062607
-            role.resource.cpu = 128
+            role.resource.memMB = 2047962
+            role.resource.cpu = 192
             role.resource.gpu = 8
 
         # Note - we cannot add in an empty workspace, so we create a fake temporary one
         temp_workspace = tempfile.mkdtemp(prefix="forge_workspace_")
         server_config = Config(
             scheduler="slurm",
+            scheduler_args={
+                "account": "agentic-models",
+                "qos": "h100_lowest",
+                "time": "72:00:00"
+            },
             appdef=appdef,
             workspace=monarch.tools.config.workspace.Workspace(dirs=[temp_workspace]),
         )
diff --git a/submit_grpo.sh b/submit_grpo.sh
index 1f18ea034..d99730ea2 100755
--- a/submit_grpo.sh
+++ b/submit_grpo.sh
@@ -15,8 +15,10 @@ eval "$(conda shell.bash hook)"
 
 conda activate forge
 
+export TORCH_COMPILE_DISABLE=1
+unset SLURM_MEM_PER_CPU SLURM_MEM_PER_GPU SLURM_MEM_PER_NODE
 export TORCHSTORE_RDMA_ENABLED=0
 
 cd /storage/home/daniellepintz/torchforge
 
-srun python -m apps.grpo.main --config apps/grpo/qwen3_32b.yaml
+python -m apps.grpo.main --config apps/grpo/qwen3_32b.yaml