Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
38d6334
can run grpo workflow, but reward abnormal
RayenTian Dec 14, 2025
1465c7d
update paraname map from vllm lora manager
RayenTian Dec 24, 2025
c605850
add funtional test
RayenTian Dec 29, 2025
95d5b62
add unit test and functional test
RayenTian Jan 2, 2026
ae698f8
update default value
RayenTian Jan 5, 2026
dc96866
remove debug print
RayenTian Jan 9, 2026
02e55d3
Implement LoRA assertions in GRPO setup and add functional test for L…
RayenTian Jan 9, 2026
4e7aa40
add nightly test
RayenTian Jan 9, 2026
2a7975d
Update LoRA configuration in YAML files to disable LoRA in grpo_math_…
RayenTian Jan 9, 2026
6d7e746
remove non-colocated unit test
RayenTian Jan 9, 2026
6e306c3
remove debug code and update functional test
RayenTian Jan 9, 2026
999790f
remove unused comment
RayenTian Jan 9, 2026
41d735d
chmod for execution
RayenTian Jan 9, 2026
b8a8c5b
support lora grpo on sync + co-located config
RayenTian Jan 13, 2026
b65cb83
Added megatron specific peft checks
vadam5 Jan 13, 2026
98a4ae5
resolved merge conflicts
vadam5 Jan 13, 2026
ded5cc8
allow megatron to set lora_enabled
vadam5 Jan 14, 2026
00eb4aa
added lora aware parameter iteration for weight streaming and broadca…
vadam5 Jan 14, 2026
13a42aa
Convert lora weights only to HF format then stream for rollout
vadam5 Jan 20, 2026
20f8cbf
added merge lora weight function
vadam5 Jan 21, 2026
c6c0249
Update Megatron submodule pins
yaoyu-33 Jan 16, 2026
9823469
Bump Megatron submodules
yaoyu-33 Jan 16, 2026
4ca187b
fix CACHED_DEPENDENCIES
ashors1 Jan 16, 2026
470f196
API updates
ashors1 Jan 17, 2026
e6a062c
updated .gitmodules
vadam5 Jan 21, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[submodule "3rdparty/Megatron-LM"]
path = 3rdparty/Megatron-LM-workspace/Megatron-LM
url = https://github.com/terrykong/Megatron-LM.git
branch = yuya/nemo-rl-use-dev
url = https://github.com/NVIDIA-NeMo/Megatron-LM.git
branch = main
shallow = true
[submodule "3rdparty/Megatron-Bridge"]
path = 3rdparty/Megatron-Bridge-workspace/Megatron-Bridge
Expand Down
2 changes: 1 addition & 1 deletion 3rdparty/Megatron-Bridge-workspace/Megatron-Bridge
Submodule Megatron-Bridge updated 324 files
5 changes: 3 additions & 2 deletions 3rdparty/Megatron-Bridge-workspace/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
bridge_package_name = "megatron.bridge"

CACHED_DEPENDENCIES = [
"transformers>=4.57.1",
"accelerate",
"transformers==4.57.1",
"datasets",
"omegaconf>=2.3.0",
"tensorboard>=2.19.0",
Expand All @@ -40,7 +41,7 @@
"hydra-core>1.3,<=1.3.2",
"megatron-core[dev,mlm]>=0.15.0a0,<0.17.0",
"qwen-vl-utils",
"transformer-engine[pytorch]>=2.9.0a0,<2.10.0",
"transformer-engine[pytorch]>=2.10.0a0,<2.12.0",
"mamba-ssm",
"nvidia-resiliency-ext",
"causal-conv1d",
Expand Down
2 changes: 1 addition & 1 deletion 3rdparty/Megatron-LM-workspace/Megatron-LM
16 changes: 8 additions & 8 deletions 3rdparty/Megatron-LM-workspace/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,30 +44,30 @@
CACHED_DEPENDENCIES = [
# Default dependencies from pyproject.toml
"torch",
"numpy<2.0.0",
"numpy",
"packaging>=24.2",
# Dev dependencies from pyproject.toml
"nvidia-modelopt[torch]>=0.33.0a0,<0.34.0; sys_platform != 'darwin'",
"transformer-engine[pytorch]>=2.9.0a0,<2.10.0",
"nvidia-resiliency-ext>=0.4.0a0,<0.5.0",
"nvidia-modelopt[torch]; sys_platform != 'darwin'",
"transformer-engine[pytorch,core_cu13]>=2.9.0a0,<2.12.0",
"nvidia-resiliency-ext",
"tqdm",
"einops~=0.8",
"tensorstore~=0.1,!=0.1.46,!=0.1.72",
"nvtx~=0.2",
"multi-storage-client~=0.27",
"opentelemetry-api~=1.33.1",
"setuptools<80.0.0",
"mamba-ssm~=2.2",
"causal-conv1d~=1.5",
"nv-grouped-gemm~=1.1",
"megatron-energon[av_decode]~=6.0",
"av<16.0.0",
"flashinfer-python",
"av",
"flashinfer-python~=0.5.0",
"wget",
"onnxscript",
"flash-linear-attention~=0.3.2",
# VCS dependency - must match pyproject.toml [tool.uv.sources]
"emerging_optimizers @ git+https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git@v0.1.0",
"datasets",
"fastapi~=0.50",
]


Expand Down
12 changes: 12 additions & 0 deletions examples/configs/grpo_math_1B.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,18 @@ policy:
tensor_parallel_size: 1
context_parallel_size: 1
custom_parallel_plan: null
# LoRA (Low-Rank Adaptation) Configuration
lora_cfg:
enabled: False # Set to True to enable LoRA fine-tuning
target_modules: [] # List of module names to apply LoRA (empty list with match_all_linear=true applies to all linear layers)
exclude_modules: [] # List of module names to exclude from LoRA
match_all_linear: true # If True, applies LoRA to all linear layers (overrides target_modules)
dim: 8 # LoRA rank (r): lower rank = fewer parameters but less capacity. Typical values: 4, 8, 16, 32, 64
alpha: 32 # LoRA scaling factor: effective learning rate multiplier = alpha/dim. Typical values: 16, 32, 64
dropout: 0.0 # Dropout probability applied to LoRA layers (0.0 = no dropout)
dropout_position: "post" # Where to apply dropout: "pre" (before LoRA) or "post" (after LoRA)
lora_A_init: "xavier" # Initialization method for LoRA A matrix: "xavier" or "uniform"
use_triton: true # Use Triton-optimized kernels for LoRA (faster but requires flash-attn). Disable when tensor_parallel_size > 1

megatron_cfg:
enabled: false
Expand Down
Loading
Loading