From adc332cb075c3e0c9229f0d5a754e934aeb1e3b2 Mon Sep 17 00:00:00 2001
From: powerofaisinstudy-debug <powerofaisinstudy@gmail.com>
Date: Thu, 11 Jun 2026 17:10:09 +0530
Subject: [PATCH 1/5] Create gradnorm.py

---
 gradnorm.py | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 gradnorm.py

diff --git a/gradnorm.py b/gradnorm.py
new file mode 100644
index 00000000..2ed5db0a
--- /dev/null
+++ b/gradnorm.py
@@ -0,0 +1,35 @@
+from torch import Tensor, nn
+import torch
+from torchjd.src.torchjd.scalarization._scalarizer_base import Scalarizer
+
+class GradNormScalarizer(Scalarizer):
+    def __init__(self, num_tasks: int, alpha: float = 1.5):
+        super().__init__()
+        self.num_tasks = num_tasks
+        self.weights = nn.Parameter(torch.ones(num_tasks))
+        self.alpha = alpha
+        self.register_buffer("initial_losses", None)
+
+    def forward(self, values: Tensor, model: nn.Module = None) -> Tensor:
+        # 1. Initialize losses if this is the first step
+        if self.initial_losses is None:
+            self.initial_losses = values.detach().clone()
+        
+        # 2. IF model is provided, calculate the GradNorm balancing
+        if model is not None:
+            # We move your logic inside here so it actually runs
+            norms = self._compute_gradient_norms(values, model)
+            # ... (add your GradNorm balancing calculation here) ...
+            # You would update self.weights based on the norms
+        
+        # 3. Always return the weighted sum
+        return (values * self.weights).sum()
+    
+    def _compute_gradient_norms(self, values: Tensor, model: nn.Module) -> Tensor:
+        # This helper remains, but is now called by forward()
+        norms = []
+        for loss in values:
+            grads = torch.autograd.grad(loss, model.parameters(), retain_graph=True)
+            norm = torch.norm(torch.cat([g.view(-1) for g in grads]))
+            norms.append(norm)
+        return torch.stack(norms)

From 7bdb1df26f4132f0819af06b4275f4cf8dc48eeb Mon Sep 17 00:00:00 2001
From: powerofaisinstudy-debug <powerofaisinstudy@gmail.com>
Date: Thu, 11 Jun 2026 17:29:35 +0530
Subject: [PATCH 2/5] Update gradnorm.py

---
 gradnorm.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gradnorm.py b/gradnorm.py
index 2ed5db0a..5a1f1d11 100644
--- a/gradnorm.py
+++ b/gradnorm.py
@@ -11,18 +11,18 @@ def __init__(self, num_tasks: int, alpha: float = 1.5):
         self.register_buffer("initial_losses", None)
 
     def forward(self, values: Tensor, model: nn.Module = None) -> Tensor:
-        # 1. Initialize losses if this is the first step
+        
         if self.initial_losses is None:
             self.initial_losses = values.detach().clone()
         
-        # 2. IF model is provided, calculate the GradNorm balancing
+        
         if model is not None:
             # We move your logic inside here so it actually runs
             norms = self._compute_gradient_norms(values, model)
             # ... (add your GradNorm balancing calculation here) ...
             # You would update self.weights based on the norms
         
-        # 3. Always return the weighted sum
+        
         return (values * self.weights).sum()
     
     def _compute_gradient_norms(self, values: Tensor, model: nn.Module) -> Tensor:

From ebcaa698a7f916c1d2000073f9d1dc3c3f08c480 Mon Sep 17 00:00:00 2001
From: powerofaisinstudy-debug <powerofaisinstudy@gmail.com>
Date: Thu, 11 Jun 2026 17:36:42 +0530
Subject: [PATCH 3/5] Update gradnorm.py

---
 gradnorm.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/gradnorm.py b/gradnorm.py
index 5a1f1d11..d886750b 100644
--- a/gradnorm.py
+++ b/gradnorm.py
@@ -1,5 +1,5 @@
-from torch import Tensor, nn
 import torch
+from torch import Tensor, nn
 from torchjd.src.torchjd.scalarization._scalarizer_base import Scalarizer
 
 class GradNormScalarizer(Scalarizer):
@@ -11,22 +11,18 @@ def __init__(self, num_tasks: int, alpha: float = 1.5):
         self.register_buffer("initial_losses", None)
 
     def forward(self, values: Tensor, model: nn.Module = None) -> Tensor:
-        
         if self.initial_losses is None:
             self.initial_losses = values.detach().clone()
         
-        
         if model is not None:
-            # We move your logic inside here so it actually runs
             norms = self._compute_gradient_norms(values, model)
-            # ... (add your GradNorm balancing calculation here) ...
-            # You would update self.weights based on the norms
-        
-        
+            loss_ratios = values / self.initial_losses
+            target_norm = torch.mean(norms) * (loss_ratios ** self.alpha)
+            self.weights.data = target_norm / norms
+            
         return (values * self.weights).sum()
     
     def _compute_gradient_norms(self, values: Tensor, model: nn.Module) -> Tensor:
-        # This helper remains, but is now called by forward()
         norms = []
         for loss in values:
             grads = torch.autograd.grad(loss, model.parameters(), retain_graph=True)

From 30d497f40bc578aa7050fa4826af2156a6df0da2 Mon Sep 17 00:00:00 2001
From: SundaramGupta <powerofaisinstudy@gmail.com>
Date: Fri, 12 Jun 2026 08:06:56 +0530
Subject: [PATCH 4/5] Update gradnorm.py

---
 gradnorm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gradnorm.py b/gradnorm.py
index d886750b..bf8e3897 100644
--- a/gradnorm.py
+++ b/gradnorm.py
@@ -1,6 +1,6 @@
 import torch
 from torch import Tensor, nn
-from torchjd.src.torchjd.scalarization._scalarizer_base import Scalarizer
+from ._scalarizer_base import Scalarizer
 
 class GradNormScalarizer(Scalarizer):
     def __init__(self, num_tasks: int, alpha: float = 1.5):

From ab6f2bc7805a46b9461475de6bf5f386222fd199 Mon Sep 17 00:00:00 2001
From: SundaramGupta <powerofaisinstudy@gmail.com>
Date: Fri, 12 Jun 2026 08:44:13 +0530
Subject: [PATCH 5/5] Update gradnorm.py

---
 gradnorm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gradnorm.py b/gradnorm.py
index bf8e3897..c37fa0af 100644
--- a/gradnorm.py
+++ b/gradnorm.py
@@ -3,7 +3,7 @@
 from ._scalarizer_base import Scalarizer
 
 class GradNormScalarizer(Scalarizer):
-    def __init__(self, num_tasks: int, alpha: float = 1.5):
+    def __init__(self, num_tasks: int, alpha: float = 1.5) -> None:
         super().__init__()
         self.num_tasks = num_tasks
         self.weights = nn.Parameter(torch.ones(num_tasks))