Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions megatron/core/safe_globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
RerunState,
BytesIO,
Signals,
torch._C.Generator, # Needed for torch format ckpt loading after weights_only default change
]


Expand Down
4 changes: 0 additions & 4 deletions megatron/training/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -1483,10 +1483,6 @@ def validate_args(args, defaults={}):
args.use_layer_wise_distributed_optimizer = True
args.use_distributed_optimizer = False

if args.optimizer == 'muon':
assert not args.overlap_grad_reduce, "Muon optimizer does not support overlap grad reduce. Use dist_muon instead."
assert not args.overlap_param_gather, "Muon optimizer does not support overlap param gather. Use dist_muon instead."

assert not args.use_distributed_optimizer, "Muon optimizer does not support distributed optimizer for now."
assert not args.use_torch_fsdp2, "Muon optimizer does not support Torch-FSDP2 for now."
assert not args.use_megatron_fsdp, "Muon optimizer does not support Megatron-FSDP for now."
Expand Down
Loading