SimplexLab · powerofaisinstudy-debug · Jun 12, 2026 · Jun 13, 2026 · Jun 14, 2026
@@ -0,0 +1,14 @@
+# TorchJD: GradNorm Integration
+
+This fork adds the `GradNormScalarizer` to the `TorchJD` library to support dynamic loss balancing in multi-task learning.
+
+## Key Features
+- Dynamic gradient norm balancing.
+- Easy integration with existing `Scalarizer` interface.
+
+## Usage
+```python
+from torchjd.scalarization import GradNormScalarizer
+
+# Initialize the scalarizer
+scalarizer = GradNormScalarizer(num_tasks=3)
@@ -3,7 +3,6 @@
 import torch
 from torch import Tensor, nn, vmap
 from torch.autograd.graph import get_gradient_edge
-
 from torchjd._linalg import flatten, movedim, reshape
 from torchjd.linalg import PSDMatrix
 

@@ -3,11 +3,11 @@
 
 from torch import Tensor
 from torch.utils._pytree import PyTree
-
 from torchjd._linalg import compute_gramian
-from torchjd.autogram._jacobian_computer import JacobianComputer
 from torchjd.linalg import Matrix, PSDMatrix
 
+from torchjd.autogram._jacobian_computer import JacobianComputer
+
 
 class GramianComputer(ABC):
     @abstractmethod

@@ -7,7 +7,6 @@
 from torch.nn import Parameter
 from torch.overrides import is_tensor_like
 from torch.utils._pytree import PyTree, tree_flatten, tree_map, tree_map_only
-
 from torchjd.linalg import Matrix
 
 # Note about import from protected _pytree module:

@@ -0,0 +1,8 @@
+from setuptools import find_packages, setup
+
+setup(
+    name="torchjd",
+    version="0.1.0",
+    package_dir={"": "src"},
+    packages=find_packages(where="src"),
+)
@@ -0,0 +1,33 @@
+import torch
+from torch import Tensor, nn
+
+from ._scalarizer_base import Scalarizer
+
+
+class GradNormScalarizer(Scalarizer):
+    def __init__(self, num_tasks: int, alpha: float = 1.5) -> None:
+        super().__init__()
+        self.num_tasks = num_tasks
+        self.weights = nn.Parameter(torch.ones(num_tasks))
+        self.alpha = alpha
+        self.register_buffer("initial_losses", None)
+
+    def forward(self, values: Tensor, model: nn.Module = None) -> Tensor:
+        if self.initial_losses is None:
+            self.initial_losses = values.detach().clone()
+
+        if model is not None:
+            norms = self._compute_gradient_norms(values, model)
+            loss_ratios = values / self.initial_losses
+            target_norm = torch.mean(norms) * (loss_ratios**self.alpha)
+            self.weights.data = target_norm / norms
+
+        return (values * self.weights).sum()
+
+    def _compute_gradient_norms(self, values: Tensor, model: nn.Module) -> Tensor:
+        norms = []
+        for loss in values:
+            grads = torch.autograd.grad(loss, model.parameters(), retain_graph=True)
+            norm = torch.norm(torch.cat([g.view(-1) for g in grads]))
+            norms.append(norm)
+        return torch.stack(norms)