From adc332cb075c3e0c9229f0d5a754e934aeb1e3b2 Mon Sep 17 00:00:00 2001 From: powerofaisinstudy-debug Date: Thu, 11 Jun 2026 17:10:09 +0530 Subject: [PATCH 1/5] Create gradnorm.py --- gradnorm.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 gradnorm.py diff --git a/gradnorm.py b/gradnorm.py new file mode 100644 index 00000000..2ed5db0a --- /dev/null +++ b/gradnorm.py @@ -0,0 +1,35 @@ +from torch import Tensor, nn +import torch +from torchjd.src.torchjd.scalarization._scalarizer_base import Scalarizer + +class GradNormScalarizer(Scalarizer): + def __init__(self, num_tasks: int, alpha: float = 1.5): + super().__init__() + self.num_tasks = num_tasks + self.weights = nn.Parameter(torch.ones(num_tasks)) + self.alpha = alpha + self.register_buffer("initial_losses", None) + + def forward(self, values: Tensor, model: nn.Module = None) -> Tensor: + # 1. Initialize losses if this is the first step + if self.initial_losses is None: + self.initial_losses = values.detach().clone() + + # 2. IF model is provided, calculate the GradNorm balancing + if model is not None: + # We move your logic inside here so it actually runs + norms = self._compute_gradient_norms(values, model) + # ... (add your GradNorm balancing calculation here) ... + # You would update self.weights based on the norms + + # 3. Always return the weighted sum + return (values * self.weights).sum() + + def _compute_gradient_norms(self, values: Tensor, model: nn.Module) -> Tensor: + # This helper remains, but is now called by forward() + norms = [] + for loss in values: + grads = torch.autograd.grad(loss, model.parameters(), retain_graph=True) + norm = torch.norm(torch.cat([g.view(-1) for g in grads])) + norms.append(norm) + return torch.stack(norms) From 7bdb1df26f4132f0819af06b4275f4cf8dc48eeb Mon Sep 17 00:00:00 2001 From: powerofaisinstudy-debug Date: Thu, 11 Jun 2026 17:29:35 +0530 Subject: [PATCH 2/5] Update gradnorm.py --- gradnorm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gradnorm.py b/gradnorm.py index 2ed5db0a..5a1f1d11 100644 --- a/gradnorm.py +++ b/gradnorm.py @@ -11,18 +11,18 @@ def __init__(self, num_tasks: int, alpha: float = 1.5): self.register_buffer("initial_losses", None) def forward(self, values: Tensor, model: nn.Module = None) -> Tensor: - # 1. Initialize losses if this is the first step + if self.initial_losses is None: self.initial_losses = values.detach().clone() - # 2. IF model is provided, calculate the GradNorm balancing + if model is not None: # We move your logic inside here so it actually runs norms = self._compute_gradient_norms(values, model) # ... (add your GradNorm balancing calculation here) ... # You would update self.weights based on the norms - # 3. Always return the weighted sum + return (values * self.weights).sum() def _compute_gradient_norms(self, values: Tensor, model: nn.Module) -> Tensor: From ebcaa698a7f916c1d2000073f9d1dc3c3f08c480 Mon Sep 17 00:00:00 2001 From: powerofaisinstudy-debug Date: Thu, 11 Jun 2026 17:36:42 +0530 Subject: [PATCH 3/5] Update gradnorm.py --- gradnorm.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/gradnorm.py b/gradnorm.py index 5a1f1d11..d886750b 100644 --- a/gradnorm.py +++ b/gradnorm.py @@ -1,5 +1,5 @@ -from torch import Tensor, nn import torch +from torch import Tensor, nn from torchjd.src.torchjd.scalarization._scalarizer_base import Scalarizer class GradNormScalarizer(Scalarizer): @@ -11,22 +11,18 @@ def __init__(self, num_tasks: int, alpha: float = 1.5): self.register_buffer("initial_losses", None) def forward(self, values: Tensor, model: nn.Module = None) -> Tensor: - if self.initial_losses is None: self.initial_losses = values.detach().clone() - if model is not None: - # We move your logic inside here so it actually runs norms = self._compute_gradient_norms(values, model) - # ... (add your GradNorm balancing calculation here) ... - # You would update self.weights based on the norms - - + loss_ratios = values / self.initial_losses + target_norm = torch.mean(norms) * (loss_ratios ** self.alpha) + self.weights.data = target_norm / norms + return (values * self.weights).sum() def _compute_gradient_norms(self, values: Tensor, model: nn.Module) -> Tensor: - # This helper remains, but is now called by forward() norms = [] for loss in values: grads = torch.autograd.grad(loss, model.parameters(), retain_graph=True) From 30d497f40bc578aa7050fa4826af2156a6df0da2 Mon Sep 17 00:00:00 2001 From: SundaramGupta Date: Fri, 12 Jun 2026 08:06:56 +0530 Subject: [PATCH 4/5] Update gradnorm.py --- gradnorm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradnorm.py b/gradnorm.py index d886750b..bf8e3897 100644 --- a/gradnorm.py +++ b/gradnorm.py @@ -1,6 +1,6 @@ import torch from torch import Tensor, nn -from torchjd.src.torchjd.scalarization._scalarizer_base import Scalarizer +from ._scalarizer_base import Scalarizer class GradNormScalarizer(Scalarizer): def __init__(self, num_tasks: int, alpha: float = 1.5): From ab6f2bc7805a46b9461475de6bf5f386222fd199 Mon Sep 17 00:00:00 2001 From: SundaramGupta Date: Fri, 12 Jun 2026 08:44:13 +0530 Subject: [PATCH 5/5] Update gradnorm.py --- gradnorm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradnorm.py b/gradnorm.py index bf8e3897..c37fa0af 100644 --- a/gradnorm.py +++ b/gradnorm.py @@ -3,7 +3,7 @@ from ._scalarizer_base import Scalarizer class GradNormScalarizer(Scalarizer): - def __init__(self, num_tasks: int, alpha: float = 1.5): + def __init__(self, num_tasks: int, alpha: float = 1.5) -> None: super().__init__() self.num_tasks = num_tasks self.weights = nn.Parameter(torch.ones(num_tasks))