From b4978131c8cfa63524a04a21ab8929174a805f76 Mon Sep 17 00:00:00 2001
From: Pierre Quinton <pierre.quinton@gmail.com>
Date: Thu, 11 Jun 2026 10:56:53 +0200
Subject: [PATCH 1/3] docs: Rename  to  to comply with diataxis

---
 docs/source/{docs => reference}/aggregation/aligned_mtl.rst      | 0
 docs/source/{docs => reference}/aggregation/cagrad.rst           | 0
 docs/source/{docs => reference}/aggregation/config.rst           | 0
 docs/source/{docs => reference}/aggregation/constant.rst         | 0
 docs/source/{docs => reference}/aggregation/cr_mogm.rst          | 0
 docs/source/{docs => reference}/aggregation/dualproj.rst         | 0
 docs/source/{docs => reference}/aggregation/fairgrad.rst         | 0
 docs/source/{docs => reference}/aggregation/graddrop.rst         | 0
 docs/source/{docs => reference}/aggregation/gradvac.rst          | 0
 docs/source/{docs => reference}/aggregation/imtl_g.rst           | 0
 docs/source/{docs => reference}/aggregation/index.rst            | 0
 docs/source/{docs => reference}/aggregation/krum.rst             | 0
 docs/source/{docs => reference}/aggregation/mean.rst             | 0
 docs/source/{docs => reference}/aggregation/mgda.rst             | 0
 docs/source/{docs => reference}/aggregation/modo.rst             | 0
 docs/source/{docs => reference}/aggregation/nash_mtl.rst         | 0
 docs/source/{docs => reference}/aggregation/pcgrad.rst           | 0
 docs/source/{docs => reference}/aggregation/random.rst           | 0
 docs/source/{docs => reference}/aggregation/sum.rst              | 0
 docs/source/{docs => reference}/aggregation/trimmed_mean.rst     | 0
 docs/source/{docs => reference}/aggregation/upgrad.rst           | 0
 docs/source/{docs => reference}/autogram/engine.rst              | 0
 docs/source/{docs => reference}/autogram/index.rst               | 0
 docs/source/{docs => reference}/autojac/backward.rst             | 0
 docs/source/{docs => reference}/autojac/index.rst                | 0
 docs/source/{docs => reference}/autojac/jac.rst                  | 0
 docs/source/{docs => reference}/autojac/jac_to_grad.rst          | 0
 docs/source/{docs => reference}/autojac/mtl_backward.rst         | 0
 docs/source/{docs => reference}/linalg/dual_cone.rst             | 0
 docs/source/{docs => reference}/linalg/index.rst                 | 0
 docs/source/{docs => reference}/linalg/matrix.rst                | 0
 docs/source/{docs => reference}/linalg/psd_matrix.rst            | 0
 docs/source/{docs => reference}/scalarization/constant.rst       | 0
 docs/source/{docs => reference}/scalarization/geometric_mean.rst | 0
 docs/source/{docs => reference}/scalarization/imtl_l.rst         | 0
 docs/source/{docs => reference}/scalarization/index.rst          | 0
 docs/source/{docs => reference}/scalarization/mean.rst           | 0
 docs/source/{docs => reference}/scalarization/random.rst         | 0
 docs/source/{docs => reference}/scalarization/stch.rst           | 0
 docs/source/{docs => reference}/scalarization/sum.rst            | 0
 docs/source/{docs => reference}/scalarization/uw.rst             | 0
 docs/source/{docs => reference}/stateful.rst                     | 0
 42 files changed, 0 insertions(+), 0 deletions(-)
 rename docs/source/{docs => reference}/aggregation/aligned_mtl.rst (100%)
 rename docs/source/{docs => reference}/aggregation/cagrad.rst (100%)
 rename docs/source/{docs => reference}/aggregation/config.rst (100%)
 rename docs/source/{docs => reference}/aggregation/constant.rst (100%)
 rename docs/source/{docs => reference}/aggregation/cr_mogm.rst (100%)
 rename docs/source/{docs => reference}/aggregation/dualproj.rst (100%)
 rename docs/source/{docs => reference}/aggregation/fairgrad.rst (100%)
 rename docs/source/{docs => reference}/aggregation/graddrop.rst (100%)
 rename docs/source/{docs => reference}/aggregation/gradvac.rst (100%)
 rename docs/source/{docs => reference}/aggregation/imtl_g.rst (100%)
 rename docs/source/{docs => reference}/aggregation/index.rst (100%)
 rename docs/source/{docs => reference}/aggregation/krum.rst (100%)
 rename docs/source/{docs => reference}/aggregation/mean.rst (100%)
 rename docs/source/{docs => reference}/aggregation/mgda.rst (100%)
 rename docs/source/{docs => reference}/aggregation/modo.rst (100%)
 rename docs/source/{docs => reference}/aggregation/nash_mtl.rst (100%)
 rename docs/source/{docs => reference}/aggregation/pcgrad.rst (100%)
 rename docs/source/{docs => reference}/aggregation/random.rst (100%)
 rename docs/source/{docs => reference}/aggregation/sum.rst (100%)
 rename docs/source/{docs => reference}/aggregation/trimmed_mean.rst (100%)
 rename docs/source/{docs => reference}/aggregation/upgrad.rst (100%)
 rename docs/source/{docs => reference}/autogram/engine.rst (100%)
 rename docs/source/{docs => reference}/autogram/index.rst (100%)
 rename docs/source/{docs => reference}/autojac/backward.rst (100%)
 rename docs/source/{docs => reference}/autojac/index.rst (100%)
 rename docs/source/{docs => reference}/autojac/jac.rst (100%)
 rename docs/source/{docs => reference}/autojac/jac_to_grad.rst (100%)
 rename docs/source/{docs => reference}/autojac/mtl_backward.rst (100%)
 rename docs/source/{docs => reference}/linalg/dual_cone.rst (100%)
 rename docs/source/{docs => reference}/linalg/index.rst (100%)
 rename docs/source/{docs => reference}/linalg/matrix.rst (100%)
 rename docs/source/{docs => reference}/linalg/psd_matrix.rst (100%)
 rename docs/source/{docs => reference}/scalarization/constant.rst (100%)
 rename docs/source/{docs => reference}/scalarization/geometric_mean.rst (100%)
 rename docs/source/{docs => reference}/scalarization/imtl_l.rst (100%)
 rename docs/source/{docs => reference}/scalarization/index.rst (100%)
 rename docs/source/{docs => reference}/scalarization/mean.rst (100%)
 rename docs/source/{docs => reference}/scalarization/random.rst (100%)
 rename docs/source/{docs => reference}/scalarization/stch.rst (100%)
 rename docs/source/{docs => reference}/scalarization/sum.rst (100%)
 rename docs/source/{docs => reference}/scalarization/uw.rst (100%)
 rename docs/source/{docs => reference}/stateful.rst (100%)

diff --git a/docs/source/docs/aggregation/aligned_mtl.rst b/docs/source/reference/aggregation/aligned_mtl.rst
similarity index 100%
rename from docs/source/docs/aggregation/aligned_mtl.rst
rename to docs/source/reference/aggregation/aligned_mtl.rst
diff --git a/docs/source/docs/aggregation/cagrad.rst b/docs/source/reference/aggregation/cagrad.rst
similarity index 100%
rename from docs/source/docs/aggregation/cagrad.rst
rename to docs/source/reference/aggregation/cagrad.rst
diff --git a/docs/source/docs/aggregation/config.rst b/docs/source/reference/aggregation/config.rst
similarity index 100%
rename from docs/source/docs/aggregation/config.rst
rename to docs/source/reference/aggregation/config.rst
diff --git a/docs/source/docs/aggregation/constant.rst b/docs/source/reference/aggregation/constant.rst
similarity index 100%
rename from docs/source/docs/aggregation/constant.rst
rename to docs/source/reference/aggregation/constant.rst
diff --git a/docs/source/docs/aggregation/cr_mogm.rst b/docs/source/reference/aggregation/cr_mogm.rst
similarity index 100%
rename from docs/source/docs/aggregation/cr_mogm.rst
rename to docs/source/reference/aggregation/cr_mogm.rst
diff --git a/docs/source/docs/aggregation/dualproj.rst b/docs/source/reference/aggregation/dualproj.rst
similarity index 100%
rename from docs/source/docs/aggregation/dualproj.rst
rename to docs/source/reference/aggregation/dualproj.rst
diff --git a/docs/source/docs/aggregation/fairgrad.rst b/docs/source/reference/aggregation/fairgrad.rst
similarity index 100%
rename from docs/source/docs/aggregation/fairgrad.rst
rename to docs/source/reference/aggregation/fairgrad.rst
diff --git a/docs/source/docs/aggregation/graddrop.rst b/docs/source/reference/aggregation/graddrop.rst
similarity index 100%
rename from docs/source/docs/aggregation/graddrop.rst
rename to docs/source/reference/aggregation/graddrop.rst
diff --git a/docs/source/docs/aggregation/gradvac.rst b/docs/source/reference/aggregation/gradvac.rst
similarity index 100%
rename from docs/source/docs/aggregation/gradvac.rst
rename to docs/source/reference/aggregation/gradvac.rst
diff --git a/docs/source/docs/aggregation/imtl_g.rst b/docs/source/reference/aggregation/imtl_g.rst
similarity index 100%
rename from docs/source/docs/aggregation/imtl_g.rst
rename to docs/source/reference/aggregation/imtl_g.rst
diff --git a/docs/source/docs/aggregation/index.rst b/docs/source/reference/aggregation/index.rst
similarity index 100%
rename from docs/source/docs/aggregation/index.rst
rename to docs/source/reference/aggregation/index.rst
diff --git a/docs/source/docs/aggregation/krum.rst b/docs/source/reference/aggregation/krum.rst
similarity index 100%
rename from docs/source/docs/aggregation/krum.rst
rename to docs/source/reference/aggregation/krum.rst
diff --git a/docs/source/docs/aggregation/mean.rst b/docs/source/reference/aggregation/mean.rst
similarity index 100%
rename from docs/source/docs/aggregation/mean.rst
rename to docs/source/reference/aggregation/mean.rst
diff --git a/docs/source/docs/aggregation/mgda.rst b/docs/source/reference/aggregation/mgda.rst
similarity index 100%
rename from docs/source/docs/aggregation/mgda.rst
rename to docs/source/reference/aggregation/mgda.rst
diff --git a/docs/source/docs/aggregation/modo.rst b/docs/source/reference/aggregation/modo.rst
similarity index 100%
rename from docs/source/docs/aggregation/modo.rst
rename to docs/source/reference/aggregation/modo.rst
diff --git a/docs/source/docs/aggregation/nash_mtl.rst b/docs/source/reference/aggregation/nash_mtl.rst
similarity index 100%
rename from docs/source/docs/aggregation/nash_mtl.rst
rename to docs/source/reference/aggregation/nash_mtl.rst
diff --git a/docs/source/docs/aggregation/pcgrad.rst b/docs/source/reference/aggregation/pcgrad.rst
similarity index 100%
rename from docs/source/docs/aggregation/pcgrad.rst
rename to docs/source/reference/aggregation/pcgrad.rst
diff --git a/docs/source/docs/aggregation/random.rst b/docs/source/reference/aggregation/random.rst
similarity index 100%
rename from docs/source/docs/aggregation/random.rst
rename to docs/source/reference/aggregation/random.rst
diff --git a/docs/source/docs/aggregation/sum.rst b/docs/source/reference/aggregation/sum.rst
similarity index 100%
rename from docs/source/docs/aggregation/sum.rst
rename to docs/source/reference/aggregation/sum.rst
diff --git a/docs/source/docs/aggregation/trimmed_mean.rst b/docs/source/reference/aggregation/trimmed_mean.rst
similarity index 100%
rename from docs/source/docs/aggregation/trimmed_mean.rst
rename to docs/source/reference/aggregation/trimmed_mean.rst
diff --git a/docs/source/docs/aggregation/upgrad.rst b/docs/source/reference/aggregation/upgrad.rst
similarity index 100%
rename from docs/source/docs/aggregation/upgrad.rst
rename to docs/source/reference/aggregation/upgrad.rst
diff --git a/docs/source/docs/autogram/engine.rst b/docs/source/reference/autogram/engine.rst
similarity index 100%
rename from docs/source/docs/autogram/engine.rst
rename to docs/source/reference/autogram/engine.rst
diff --git a/docs/source/docs/autogram/index.rst b/docs/source/reference/autogram/index.rst
similarity index 100%
rename from docs/source/docs/autogram/index.rst
rename to docs/source/reference/autogram/index.rst
diff --git a/docs/source/docs/autojac/backward.rst b/docs/source/reference/autojac/backward.rst
similarity index 100%
rename from docs/source/docs/autojac/backward.rst
rename to docs/source/reference/autojac/backward.rst
diff --git a/docs/source/docs/autojac/index.rst b/docs/source/reference/autojac/index.rst
similarity index 100%
rename from docs/source/docs/autojac/index.rst
rename to docs/source/reference/autojac/index.rst
diff --git a/docs/source/docs/autojac/jac.rst b/docs/source/reference/autojac/jac.rst
similarity index 100%
rename from docs/source/docs/autojac/jac.rst
rename to docs/source/reference/autojac/jac.rst
diff --git a/docs/source/docs/autojac/jac_to_grad.rst b/docs/source/reference/autojac/jac_to_grad.rst
similarity index 100%
rename from docs/source/docs/autojac/jac_to_grad.rst
rename to docs/source/reference/autojac/jac_to_grad.rst
diff --git a/docs/source/docs/autojac/mtl_backward.rst b/docs/source/reference/autojac/mtl_backward.rst
similarity index 100%
rename from docs/source/docs/autojac/mtl_backward.rst
rename to docs/source/reference/autojac/mtl_backward.rst
diff --git a/docs/source/docs/linalg/dual_cone.rst b/docs/source/reference/linalg/dual_cone.rst
similarity index 100%
rename from docs/source/docs/linalg/dual_cone.rst
rename to docs/source/reference/linalg/dual_cone.rst
diff --git a/docs/source/docs/linalg/index.rst b/docs/source/reference/linalg/index.rst
similarity index 100%
rename from docs/source/docs/linalg/index.rst
rename to docs/source/reference/linalg/index.rst
diff --git a/docs/source/docs/linalg/matrix.rst b/docs/source/reference/linalg/matrix.rst
similarity index 100%
rename from docs/source/docs/linalg/matrix.rst
rename to docs/source/reference/linalg/matrix.rst
diff --git a/docs/source/docs/linalg/psd_matrix.rst b/docs/source/reference/linalg/psd_matrix.rst
similarity index 100%
rename from docs/source/docs/linalg/psd_matrix.rst
rename to docs/source/reference/linalg/psd_matrix.rst
diff --git a/docs/source/docs/scalarization/constant.rst b/docs/source/reference/scalarization/constant.rst
similarity index 100%
rename from docs/source/docs/scalarization/constant.rst
rename to docs/source/reference/scalarization/constant.rst
diff --git a/docs/source/docs/scalarization/geometric_mean.rst b/docs/source/reference/scalarization/geometric_mean.rst
similarity index 100%
rename from docs/source/docs/scalarization/geometric_mean.rst
rename to docs/source/reference/scalarization/geometric_mean.rst
diff --git a/docs/source/docs/scalarization/imtl_l.rst b/docs/source/reference/scalarization/imtl_l.rst
similarity index 100%
rename from docs/source/docs/scalarization/imtl_l.rst
rename to docs/source/reference/scalarization/imtl_l.rst
diff --git a/docs/source/docs/scalarization/index.rst b/docs/source/reference/scalarization/index.rst
similarity index 100%
rename from docs/source/docs/scalarization/index.rst
rename to docs/source/reference/scalarization/index.rst
diff --git a/docs/source/docs/scalarization/mean.rst b/docs/source/reference/scalarization/mean.rst
similarity index 100%
rename from docs/source/docs/scalarization/mean.rst
rename to docs/source/reference/scalarization/mean.rst
diff --git a/docs/source/docs/scalarization/random.rst b/docs/source/reference/scalarization/random.rst
similarity index 100%
rename from docs/source/docs/scalarization/random.rst
rename to docs/source/reference/scalarization/random.rst
diff --git a/docs/source/docs/scalarization/stch.rst b/docs/source/reference/scalarization/stch.rst
similarity index 100%
rename from docs/source/docs/scalarization/stch.rst
rename to docs/source/reference/scalarization/stch.rst
diff --git a/docs/source/docs/scalarization/sum.rst b/docs/source/reference/scalarization/sum.rst
similarity index 100%
rename from docs/source/docs/scalarization/sum.rst
rename to docs/source/reference/scalarization/sum.rst
diff --git a/docs/source/docs/scalarization/uw.rst b/docs/source/reference/scalarization/uw.rst
similarity index 100%
rename from docs/source/docs/scalarization/uw.rst
rename to docs/source/reference/scalarization/uw.rst
diff --git a/docs/source/docs/stateful.rst b/docs/source/reference/stateful.rst
similarity index 100%
rename from docs/source/docs/stateful.rst
rename to docs/source/reference/stateful.rst

From 204a6ba8f68ed6c710ffe703666117ce6652b33d Mon Sep 17 00:00:00 2001
From: Pierre Quinton <pierre.quinton@gmail.com>
Date: Thu, 11 Jun 2026 11:36:17 +0200
Subject: [PATCH 2/3] Update references accordingly. Add a redirect.py file to
 redirect the old location to new ones.

---
 .github/workflows/checks.yml                  |   4 +
 README.md                                     |  66 ++---
 docs/Makefile                                 |  29 +-
 docs/_redirects.py                            | 262 ++++++++++++++++++
 docs/source/examples/amp.rst                  |   2 +-
 docs/source/examples/basic_usage.rst          |   4 +-
 docs/source/examples/index.rst                |   8 +-
 docs/source/examples/iwmtl.rst                |   2 +-
 docs/source/examples/iwrm.rst                 |  16 +-
 .../source/examples/lightning_integration.rst |   4 +-
 docs/source/examples/monitoring.rst           |   8 +-
 docs/source/index.rst                         |  16 +-
 12 files changed, 357 insertions(+), 64 deletions(-)
 create mode 100644 docs/_redirects.py

diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
index bdc857fed..c2f36cbf2 100644
--- a/.github/workflows/checks.yml
+++ b/.github/workflows/checks.yml
@@ -87,6 +87,10 @@ jobs:
         working-directory: docs
         run: uv run make dirhtml
 
+      - name: Verify Redirects
+        working-directory: docs
+        run: uv run make redirects-check
+
       - name: Test Documentation
         working-directory: docs
         run: uv run make doctest
diff --git a/README.md b/README.md
index 3d9b1d58f..d870de1ac 100644
--- a/README.md
+++ b/README.md
@@ -49,7 +49,7 @@ Gradients $\mathcal A_{\text{UPGrad}}$: it
 projects each gradient onto the dual cone, and averages the projections. This ensures that the
 update will always be beneficial to each individual objective (given a sufficiently small step
 size). In addition to $\mathcal A_{\text{UPGrad}}$, TorchJD supports
-[more than 10 aggregators from the literature](https://torchjd.org/stable/docs/aggregation).
+[more than 10 aggregators from the literature](https://torchjd.org/stable/reference/aggregation).
 
 ## Installation
 <!-- start installation -->
@@ -76,28 +76,28 @@ In standard `torch`, you generally combine your `losses` into a single scalar `l
 `loss.backward()` to compute the gradient of the loss with respect to each model parameter and to
 store it in the `.grad` fields of those parameters. The basic usage of `torchjd` is to replace this
 `loss.backward()` by a call to
-[`torchjd.autojac.backward(losses)`](https://torchjd.org/stable/docs/autojac/backward/). Instead of
+[`torchjd.autojac.backward(losses)`](https://torchjd.org/stable/reference/autojac/backward/). Instead of
 computing the gradient of a scalar loss, it will compute the Jacobian of a vector of losses, and
 store it in the `.jac` fields of the model parameters. You then have to call
-[`torchjd.autojac.jac_to_grad`](https://torchjd.org/stable/docs/autojac/jac_to_grad/) to aggregate
+[`torchjd.autojac.jac_to_grad`](https://torchjd.org/stable/reference/autojac/jac_to_grad/) to aggregate
 this Jacobian using the specified
-[`Aggregator`](https://torchjd.org/stable/docs/aggregation#torchjd.aggregation.Aggregator), and to
+[`Aggregator`](https://torchjd.org/stable/reference/aggregation#torchjd.aggregation.Aggregator), and to
 store the result into the `.grad` fields of the model parameters. See this
 [usage example](https://torchjd.org/stable/examples/basic_usage/) for more details.
 
 #### 2. `mtl_backward` + `jac_to_grad`
 In the case of multi-task learning, an alternative to
-[`torchjd.autojac.backward`](https://torchjd.org/stable/docs/autojac/backward/) is
-[`torchjd.autojac.mtl_backward`](https://torchjd.org/stable/docs/autojac/mtl_backward/). It computes
+[`torchjd.autojac.backward`](https://torchjd.org/stable/reference/autojac/backward/) is
+[`torchjd.autojac.mtl_backward`](https://torchjd.org/stable/reference/autojac/mtl_backward/). It computes
 the gradient of each task-specific loss with respect to the corresponding task's parameters, and
 stores it in their `.grad` fields. It also computes the Jacobian of the vector of losses with
 respect to the shared parameters and stores it in their `.jac` field. Then, the
-[`torchjd.autojac.jac_to_grad`](https://torchjd.org/stable/docs/autojac/jac_to_grad/) function can
+[`torchjd.autojac.jac_to_grad`](https://torchjd.org/stable/reference/autojac/jac_to_grad/) function can
 be called to aggregate this Jacobian and replace the `.jac` fields by `.grad` fields for the shared
 parameters.
 
 The following example shows how to use TorchJD to train a multi-task model with Jacobian descent,
-using [UPGrad](https://torchjd.org/stable/docs/aggregation/upgrad/).
+using [UPGrad](https://torchjd.org/stable/reference/aggregation/upgrad/).
 
 ```diff
   import torch
@@ -151,7 +151,7 @@ using [UPGrad](https://torchjd.org/stable/docs/aggregation/upgrad/).
 #### 3. `jac`
 
 If you're simply interested in computing Jacobians without storing them in the `.jac` fields, you
-can also use the [`torchjd.autojac.jac`](https://torchjd.org/stable/docs/autojac/jac/) function,
+can also use the [`torchjd.autojac.jac`](https://torchjd.org/stable/reference/autojac/jac/) function,
 that is analog to
 [`torch.autograd.grad`](https://docs.pytorch.org/docs/stable/generated/torch.autograd.grad.html),
 except that it computes the Jacobian of a vector of losses rather than the gradient of a scalar
@@ -162,23 +162,23 @@ loss.
 The Gramian of the Jacobian, defined as the Jacobian multiplied by its transpose, contains all the
 dot products between individual gradients. It thus contains all the information about conflict and
 gradient imbalance. It turns out that most aggregators from the literature
-(e.g. [UPGrad](https://torchjd.org/stable/docs/aggregation/upgrad/)) make a linear combination of
+(e.g. [UPGrad](https://torchjd.org/stable/reference/aggregation/upgrad/)) make a linear combination of
 the rows of the Jacobian, whose weights only depend on the Gramian of the Jacobian.
 
 An alternative implementation of Jacobian descent is thus to:
 - Compute this Gramian incrementally (layer by layer), without ever storing the full Jacobian in
   memory.
 - Extract the weights from it using a
-  [`Weighting`](https://torchjd.org/stable/docs/aggregation#torchjd.aggregation.Weighting).
+  [`Weighting`](https://torchjd.org/stable/reference/aggregation#torchjd.aggregation.Weighting).
 - Combine the losses using those weights and make a step of gradient descent on the combined loss.
 
 The main advantage of this approach is to save memory because the Jacobian (that is typically large)
 never has to be stored in memory. The
-[`torchjd.autogram.Engine`](https://torchjd.org/stable/docs/autogram/engine/) is precisely made to
+[`torchjd.autogram.Engine`](https://torchjd.org/stable/reference/autogram/engine/) is precisely made to
 compute the Gramian of the Jacobian efficiently.
 
 The following example shows how to use the `autogram` engine to minimize the vector of per-instance
-losses with Jacobian descent using [UPGrad](https://torchjd.org/stable/docs/aggregation/upgrad/).
+losses with Jacobian descent using [UPGrad](https://torchjd.org/stable/reference/aggregation/upgrad/).
 
 ```diff
   import torch
@@ -224,26 +224,26 @@ TorchJD provides many existing aggregators from the literature, listed in the fo
 <!-- recommended aggregators first, then alphabetical order -->
 | Aggregator                                                                                                 | Weighting                                                                                                              | Publication                                                                                                                                                          |
 |------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| [UPGrad](https://torchjd.org/stable/docs/aggregation/upgrad/#torchjd.aggregation.UPGrad) (recommended) | [UPGradWeighting](https://torchjd.org/stable/docs/aggregation/upgrad/#torchjd.aggregation.UPGradWeighting)              | [Jacobian Descent For Multi-Objective Optimization](https://arxiv.org/pdf/2406.16232)                                                                                |
-| [AlignedMTL](https://torchjd.org/stable/docs/aggregation/aligned_mtl#torchjd.aggregation.AlignedMTL)       | [AlignedMTLWeighting](https://torchjd.org/stable/docs/aggregation/aligned_mtl#torchjd.aggregation.AlignedMTLWeighting) | [Independent Component Alignment for Multi-Task Learning](https://arxiv.org/pdf/2305.19000)                                                                          |
-| [CAGrad](https://torchjd.org/stable/docs/aggregation/cagrad#torchjd.aggregation.CAGrad)                    | [CAGradWeighting](https://torchjd.org/stable/docs/aggregation/cagrad#torchjd.aggregation.CAGradWeighting)              | [Conflict-Averse Gradient Descent for Multi-task Learning](https://arxiv.org/pdf/2110.14048)                                                                         |
-| [ConFIG](https://torchjd.org/stable/docs/aggregation/config#torchjd.aggregation.ConFIG)                    | -                                                                                                                      | [ConFIG: Towards Conflict-free Training of Physics Informed Neural Networks](https://arxiv.org/pdf/2408.11104)                                                       |
-| [Constant](https://torchjd.org/stable/docs/aggregation/constant#torchjd.aggregation.Constant)              | [ConstantWeighting](https://torchjd.org/stable/docs/aggregation/constant#torchjd.aggregation.ConstantWeighting)        | -                                                                                                                                                                    |
-| -                                                                                                           | [CRMOGMWeighting](https://torchjd.org/stable/docs/aggregation/cr_mogm/#torchjd.aggregation.CRMOGMWeighting)           | [On the Convergence of Stochastic Multi-Objective Gradient Manipulation and Beyond](https://proceedings.neurips.cc/paper_files/paper/2022/file/f91bd64a3620aad8e70a27ad9cb3ca57-Paper-Conference.pdf) |
-| [DualProj](https://torchjd.org/stable/docs/aggregation/dualproj#torchjd.aggregation.DualProj)              | [DualProjWeighting](https://torchjd.org/stable/docs/aggregation/dualproj#torchjd.aggregation.DualProjWeighting)        | [Gradient Episodic Memory for Continual Learning](https://arxiv.org/pdf/1706.08840)                                                                                  |
-| [FairGrad](https://torchjd.org/stable/docs/aggregation/fairgrad#torchjd.aggregation.FairGrad)              | [FairGradWeighting](https://torchjd.org/stable/docs/aggregation/fairgrad#torchjd.aggregation.FairGradWeighting)        | [Fair Resource Allocation in Multi-Task Learning](https://arxiv.org/pdf/2402.15638)                                                                                  |
-| [GradDrop](https://torchjd.org/stable/docs/aggregation/graddrop#torchjd.aggregation.GradDrop)              | -                                                                                                                      | [Just Pick a Sign: Optimizing Deep Multitask Models with Gradient Sign Dropout](https://arxiv.org/pdf/2010.06808)                                                    |
-| [GradVac](https://torchjd.org/stable/docs/aggregation/gradvac#torchjd.aggregation.GradVac)              | [GradVacWeighting](https://torchjd.org/stable/docs/aggregation/gradvac#torchjd.aggregation.GradVacWeighting)                                                                                                                      | [Gradient Vaccine: Investigating and Improving Multi-task Optimization in Massively Multilingual Models](https://arxiv.org/pdf/2010.05874)                                                    |
-| [IMTLG](https://torchjd.org/stable/docs/aggregation/imtl_g#torchjd.aggregation.IMTLG)                      | [IMTLGWeighting](https://torchjd.org/stable/docs/aggregation/imtl_g#torchjd.aggregation.IMTLGWeighting)                | [Towards Impartial Multi-task Learning](https://discovery.ucl.ac.uk/id/eprint/10120667/)                                                                             |
-| [Krum](https://torchjd.org/stable/docs/aggregation/krum#torchjd.aggregation.Krum)                          | [KrumWeighting](https://torchjd.org/stable/docs/aggregation/krum#torchjd.aggregation.KrumWeighting)                    | [Machine Learning with Adversaries: Byzantine Tolerant Gradient Descent](https://proceedings.neurips.cc/paper/2017/file/f4b9ec30ad9f68f89b29639786cb62ef-Paper.pdf)  |
-| [Mean](https://torchjd.org/stable/docs/aggregation/mean#torchjd.aggregation.Mean)                          | [MeanWeighting](https://torchjd.org/stable/docs/aggregation/mean#torchjd.aggregation.MeanWeighting)                    | -                                                                                                                                                                    |
-| [MGDA](https://torchjd.org/stable/docs/aggregation/mgda#torchjd.aggregation.MGDA)                          | [MGDAWeighting](https://torchjd.org/stable/docs/aggregation/mgda#torchjd.aggregation.MGDAWeighting)                    | [Multiple-gradient descent algorithm (MGDA) for multiobjective optimization](https://comptes-rendus.academie-sciences.fr/mathematique/articles/10.1016/j.crma.2012.03.014/)                    |
-| -                                                                                                           | [MoDoWeighting](https://torchjd.org/stable/docs/aggregation/modo/#torchjd.aggregation.MoDoWeighting)                  | [Three-Way Trade-Off in Multi-Objective Learning: Optimization, Generalization and Conflict-Avoidance](https://www.jmlr.org/papers/volume25/23-1287/23-1287.pdf)     |
-| [NashMTL](https://torchjd.org/stable/docs/aggregation/nash_mtl#torchjd.aggregation.NashMTL)                | -                                                                                                                      | [Multi-Task Learning as a Bargaining Game](https://arxiv.org/pdf/2202.01017)                                                                                         |
-| [PCGrad](https://torchjd.org/stable/docs/aggregation/pcgrad#torchjd.aggregation.PCGrad)                    | [PCGradWeighting](https://torchjd.org/stable/docs/aggregation/pcgrad#torchjd.aggregation.PCGradWeighting)              | [Gradient Surgery for Multi-Task Learning](https://arxiv.org/pdf/2001.06782)                                                                                         |
-| [Random](https://torchjd.org/stable/docs/aggregation/random#torchjd.aggregation.Random)                    | [RandomWeighting](https://torchjd.org/stable/docs/aggregation/random#torchjd.aggregation.RandomWeighting)              | [Reasonable Effectiveness of Random Weighting: A Litmus Test for Multi-Task Learning](https://arxiv.org/pdf/2111.10603)                                              |
-| [Sum](https://torchjd.org/stable/docs/aggregation/sum#torchjd.aggregation.Sum)                             | [SumWeighting](https://torchjd.org/stable/docs/aggregation/sum#torchjd.aggregation.SumWeighting)                       | -                                                                                                                                                                    |
-| [Trimmed Mean](https://torchjd.org/stable/docs/aggregation/trimmed_mean#torchjd.aggregation.TrimmedMean)   | -                                                                                                                      | [Byzantine-Robust Distributed Learning: Towards Optimal Statistical Rates](https://proceedings.mlr.press/v80/yin18a/yin18a.pdf)                                      |
+| [UPGrad](https://torchjd.org/stable/reference/aggregation/upgrad/#torchjd.aggregation.UPGrad) (recommended) | [UPGradWeighting](https://torchjd.org/stable/reference/aggregation/upgrad/#torchjd.aggregation.UPGradWeighting)              | [Jacobian Descent For Multi-Objective Optimization](https://arxiv.org/pdf/2406.16232)                                                                                |
+| [AlignedMTL](https://torchjd.org/stable/reference/aggregation/aligned_mtl#torchjd.aggregation.AlignedMTL)       | [AlignedMTLWeighting](https://torchjd.org/stable/reference/aggregation/aligned_mtl#torchjd.aggregation.AlignedMTLWeighting) | [Independent Component Alignment for Multi-Task Learning](https://arxiv.org/pdf/2305.19000)                                                                          |
+| [CAGrad](https://torchjd.org/stable/reference/aggregation/cagrad#torchjd.aggregation.CAGrad)                    | [CAGradWeighting](https://torchjd.org/stable/reference/aggregation/cagrad#torchjd.aggregation.CAGradWeighting)              | [Conflict-Averse Gradient Descent for Multi-task Learning](https://arxiv.org/pdf/2110.14048)                                                                         |
+| [ConFIG](https://torchjd.org/stable/reference/aggregation/config#torchjd.aggregation.ConFIG)                    | -                                                                                                                      | [ConFIG: Towards Conflict-free Training of Physics Informed Neural Networks](https://arxiv.org/pdf/2408.11104)                                                       |
+| [Constant](https://torchjd.org/stable/reference/aggregation/constant#torchjd.aggregation.Constant)              | [ConstantWeighting](https://torchjd.org/stable/reference/aggregation/constant#torchjd.aggregation.ConstantWeighting)        | -                                                                                                                                                                    |
+| -                                                                                                           | [CRMOGMWeighting](https://torchjd.org/stable/reference/aggregation/cr_mogm/#torchjd.aggregation.CRMOGMWeighting)           | [On the Convergence of Stochastic Multi-Objective Gradient Manipulation and Beyond](https://proceedings.neurips.cc/paper_files/paper/2022/file/f91bd64a3620aad8e70a27ad9cb3ca57-Paper-Conference.pdf) |
+| [DualProj](https://torchjd.org/stable/reference/aggregation/dualproj#torchjd.aggregation.DualProj)              | [DualProjWeighting](https://torchjd.org/stable/reference/aggregation/dualproj#torchjd.aggregation.DualProjWeighting)        | [Gradient Episodic Memory for Continual Learning](https://arxiv.org/pdf/1706.08840)                                                                                  |
+| [FairGrad](https://torchjd.org/stable/reference/aggregation/fairgrad#torchjd.aggregation.FairGrad)              | [FairGradWeighting](https://torchjd.org/stable/reference/aggregation/fairgrad#torchjd.aggregation.FairGradWeighting)        | [Fair Resource Allocation in Multi-Task Learning](https://arxiv.org/pdf/2402.15638)                                                                                  |
+| [GradDrop](https://torchjd.org/stable/reference/aggregation/graddrop#torchjd.aggregation.GradDrop)              | -                                                                                                                      | [Just Pick a Sign: Optimizing Deep Multitask Models with Gradient Sign Dropout](https://arxiv.org/pdf/2010.06808)                                                    |
+| [GradVac](https://torchjd.org/stable/reference/aggregation/gradvac#torchjd.aggregation.GradVac)              | [GradVacWeighting](https://torchjd.org/stable/reference/aggregation/gradvac#torchjd.aggregation.GradVacWeighting)                                                                                                                      | [Gradient Vaccine: Investigating and Improving Multi-task Optimization in Massively Multilingual Models](https://arxiv.org/pdf/2010.05874)                                                    |
+| [IMTLG](https://torchjd.org/stable/reference/aggregation/imtl_g#torchjd.aggregation.IMTLG)                      | [IMTLGWeighting](https://torchjd.org/stable/reference/aggregation/imtl_g#torchjd.aggregation.IMTLGWeighting)                | [Towards Impartial Multi-task Learning](https://discovery.ucl.ac.uk/id/eprint/10120667/)                                                                             |
+| [Krum](https://torchjd.org/stable/reference/aggregation/krum#torchjd.aggregation.Krum)                          | [KrumWeighting](https://torchjd.org/stable/reference/aggregation/krum#torchjd.aggregation.KrumWeighting)                    | [Machine Learning with Adversaries: Byzantine Tolerant Gradient Descent](https://proceedings.neurips.cc/paper/2017/file/f4b9ec30ad9f68f89b29639786cb62ef-Paper.pdf)  |
+| [Mean](https://torchjd.org/stable/reference/aggregation/mean#torchjd.aggregation.Mean)                          | [MeanWeighting](https://torchjd.org/stable/reference/aggregation/mean#torchjd.aggregation.MeanWeighting)                    | -                                                                                                                                                                    |
+| [MGDA](https://torchjd.org/stable/reference/aggregation/mgda#torchjd.aggregation.MGDA)                          | [MGDAWeighting](https://torchjd.org/stable/reference/aggregation/mgda#torchjd.aggregation.MGDAWeighting)                    | [Multiple-gradient descent algorithm (MGDA) for multiobjective optimization](https://comptes-rendus.academie-sciences.fr/mathematique/articles/10.1016/j.crma.2012.03.014/)                    |
+| -                                                                                                           | [MoDoWeighting](https://torchjd.org/stable/reference/aggregation/modo/#torchjd.aggregation.MoDoWeighting)                  | [Three-Way Trade-Off in Multi-Objective Learning: Optimization, Generalization and Conflict-Avoidance](https://www.jmlr.org/papers/volume25/23-1287/23-1287.pdf)     |
+| [NashMTL](https://torchjd.org/stable/reference/aggregation/nash_mtl#torchjd.aggregation.NashMTL)                | -                                                                                                                      | [Multi-Task Learning as a Bargaining Game](https://arxiv.org/pdf/2202.01017)                                                                                         |
+| [PCGrad](https://torchjd.org/stable/reference/aggregation/pcgrad#torchjd.aggregation.PCGrad)                    | [PCGradWeighting](https://torchjd.org/stable/reference/aggregation/pcgrad#torchjd.aggregation.PCGradWeighting)              | [Gradient Surgery for Multi-Task Learning](https://arxiv.org/pdf/2001.06782)                                                                                         |
+| [Random](https://torchjd.org/stable/reference/aggregation/random#torchjd.aggregation.Random)                    | [RandomWeighting](https://torchjd.org/stable/reference/aggregation/random#torchjd.aggregation.RandomWeighting)              | [Reasonable Effectiveness of Random Weighting: A Litmus Test for Multi-Task Learning](https://arxiv.org/pdf/2111.10603)                                              |
+| [Sum](https://torchjd.org/stable/reference/aggregation/sum#torchjd.aggregation.Sum)                             | [SumWeighting](https://torchjd.org/stable/reference/aggregation/sum#torchjd.aggregation.SumWeighting)                       | -                                                                                                                                                                    |
+| [Trimmed Mean](https://torchjd.org/stable/reference/aggregation/trimmed_mean#torchjd.aggregation.TrimmedMean)   | -                                                                                                                      | [Byzantine-Robust Distributed Learning: Towards Optimal Statistical Rates](https://proceedings.mlr.press/v80/yin18a/yin18a.pdf)                                      |
 
 ## Release Methodology
 
diff --git a/docs/Makefile b/docs/Makefile
index f248e33eb..074ac70d8 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -12,7 +12,34 @@ BUILDDIR      = build
 help:
 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
 
-.PHONY: help Makefile
+.PHONY: help Makefile redirects redirects-check
+
+redirects:
+	@if [ -d "$(BUILDDIR)/dirhtml" ]; then \
+		python _redirects.py "$(BUILDDIR)/dirhtml"; \
+	elif [ -d "$(BUILDDIR)/html" ]; then \
+		python _redirects.py "$(BUILDDIR)/html"; \
+	else \
+		echo "No build directory found; run make html or make dirhtml first."; \
+	fi
+
+redirects-check:
+	@if [ -d "$(BUILDDIR)/dirhtml" ]; then \
+		python _redirects.py --check "$(BUILDDIR)/dirhtml"; \
+	elif [ -d "$(BUILDDIR)/html" ]; then \
+		python _redirects.py --check "$(BUILDDIR)/html"; \
+	else \
+		echo "No build directory found; run make html or make dirhtml first."; \
+		exit 1; \
+	fi
+
+html:
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+	@python _redirects.py "$(BUILDDIR)/html"
+
+dirhtml:
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+	@python _redirects.py "$(BUILDDIR)/dirhtml"
 
 # Catch-all target: route all unknown targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
diff --git a/docs/_redirects.py b/docs/_redirects.py
new file mode 100644
index 000000000..f1f4eabdf
--- /dev/null
+++ b/docs/_redirects.py
@@ -0,0 +1,262 @@
+import os
+import re
+import sys
+
+
+def generate_redirects(build_dir: str) -> None:
+    """
+    Generates HTML redirect pages from the old ``docs/`` paths to the new ``reference/`` paths.
+
+    For each page found under ``<build_dir>/reference/``, this creates a corresponding redirect
+    HTML file under ``<build_dir>/docs/`` that redirects browser visits to the new location.
+
+    Supports both the ``dirhtml`` and ``html`` Sphinx builders.
+
+    :param build_dir: Path to the build output directory (e.g. ``build/dirhtml`` or ``build/html``).
+    """
+
+    reference_dir = os.path.join(build_dir, "reference")
+    if not os.path.isdir(reference_dir):
+        print(f"No reference directory found at {reference_dir}, skipping redirects.")
+        return
+
+    if _is_dirhtml_builder(reference_dir):
+        _generate_redirects_dirhtml(build_dir, reference_dir)
+    else:
+        _generate_redirects_html(build_dir, reference_dir)
+
+
+def verify_redirects(build_dir: str) -> bool:
+    """
+    Verifies that all redirect files are correct.
+
+    Checks:
+    - Every ``reference/`` page has a corresponding redirect in ``docs/``.
+    - Every redirect target resolves to an existing file on disk.
+    - No orphaned redirects (redirect files with no matching ``reference/`` page).
+
+    :param build_dir: Path to the build output directory.
+    :returns: ``True`` if all checks pass, ``False`` otherwise.
+    """
+
+    reference_dir = os.path.join(build_dir, "reference")
+    if not os.path.isdir(reference_dir):
+        print(f"Error: no reference directory found at {reference_dir}")
+        return False
+
+    if _is_dirhtml_builder(reference_dir):
+        return _verify_redirects_dirhtml(build_dir, reference_dir)
+    return _verify_redirects_html(build_dir, reference_dir)
+
+
+def _is_dirhtml_builder(reference_dir: str) -> bool:
+    upgrad_path = os.path.join(reference_dir, "aggregation", "upgrad")
+    return os.path.isdir(upgrad_path)
+
+
+# ── generation ────────────────────────────────────────────────────────────────────
+
+
+def _generate_redirects_dirhtml(build_dir: str, reference_dir: str) -> None:
+    for root, _dirs, _files in os.walk(reference_dir):
+        rel_path = os.path.relpath(root, reference_dir)
+        if rel_path == ".":
+            rel_path = ""
+
+        depth = rel_path.count(os.sep) + int(rel_path != "")
+        relative_prefix = os.sep.join([".."] * (depth + 1))
+        if rel_path:
+            redirect_target = f"{relative_prefix}/reference/{rel_path}/"
+        else:
+            redirect_target = f"{relative_prefix}/reference/"
+
+        dest_dir = os.path.join(build_dir, "docs", rel_path)
+        os.makedirs(dest_dir, exist_ok=True)
+        _write_redirect_file(os.path.join(dest_dir, "index.html"), redirect_target)
+        print(f"Redirect: docs/{rel_path}/ -> reference/{rel_path}/")
+
+
+def _generate_redirects_html(build_dir: str, reference_dir: str) -> None:
+    for root, _dirs, files in os.walk(reference_dir):
+        rel_dir = os.path.relpath(root, reference_dir)
+        if rel_dir == ".":
+            rel_dir = ""
+
+        for file in files:
+            if not file.endswith(".html"):
+                continue
+            rel_file = os.path.join(rel_dir, file) if rel_dir else file
+
+            depth = rel_file.count(os.sep) + 1
+            relative_prefix = os.sep.join([".."] * depth)
+            redirect_target = f"{relative_prefix}/reference/{rel_file}"
+
+            dest_file = os.path.join(build_dir, "docs", rel_file)
+            os.makedirs(os.path.dirname(dest_file), exist_ok=True)
+            _write_redirect_file(dest_file, redirect_target)
+            print(f"Redirect: docs/{rel_file} -> reference/{rel_file}")
+
+
+def _write_redirect_file(filepath: str, target: str) -> None:
+    canonical_path = target
+    while canonical_path.startswith("../"):
+        canonical_path = canonical_path[3:]
+    canonical_url = f"https://torchjd.org/stable/{canonical_path}"
+    content = (
+        "<!DOCTYPE html>\n"
+        "<html>\n"
+        "<head>\n"
+        f'    <meta http-equiv="refresh" content="0; url={target}">\n'
+        f'    <link rel="canonical" href="{canonical_url}">\n'
+        "</head>\n"
+        "<body>\n"
+        f'    <p>This page has moved to <a href="{target}">{target}</a>.</p>\n'
+        "</body>\n"
+        "</html>\n"
+    )
+    with open(filepath, "w") as f:
+        f.write(content)
+
+
+# ── verification ───────────────────────────────────────────────────────────────────
+
+_META_REFRESH_RE = re.compile(r'<meta\s+http-equiv="refresh"\s+content="0;\s*url=([^"]+)"')
+
+
+def _verify_redirects_dirhtml(build_dir: str, reference_dir: str) -> bool:
+    ok = True
+    docs_dir = os.path.join(build_dir, "docs")
+
+    # Collect all reference pages (every directory in reference/ is a page).
+    ref_pages = set()
+    for root, _dirs, _files in os.walk(reference_dir):
+        rel_path = os.path.relpath(root, reference_dir)
+        if rel_path == ".":
+            rel_path = ""
+        ref_pages.add(rel_path)
+
+    # Collect all redirect pages.
+    redirect_pages = set()
+    for root, _dirs, files in os.walk(docs_dir):
+        if "index.html" in files:
+            rel_path = os.path.relpath(root, docs_dir)
+            if rel_path == ".":
+                rel_path = ""
+            redirect_pages.add(rel_path)
+
+    # Check every reference page has a redirect.
+    for page in sorted(ref_pages):
+        if page not in redirect_pages:
+            print(f"Missing redirect: docs/{page}/")
+            ok = False
+
+    # Check no orphaned redirects.
+    for page in sorted(redirect_pages):
+        if page not in ref_pages:
+            print(f"Orphaned redirect: docs/{page}/ (no matching reference page)")
+            ok = False
+
+    # Check redirect targets exist.
+    for page in sorted(ref_pages & redirect_pages):
+        redirect_file = os.path.join(docs_dir, page, "index.html")
+        target = _extract_redirect_target(redirect_file)
+        if target is None:
+            print(f"Broken redirect (no meta refresh): docs/{page}/index.html")
+            ok = False
+            continue
+        resolved = os.path.normpath(os.path.join(os.path.dirname(redirect_file), target))
+        if not os.path.isdir(resolved):
+            print(f"Broken redirect (target directory not found): docs/{page}/ -> {target}")
+            ok = False
+
+    return ok
+
+
+def _verify_redirects_html(build_dir: str, reference_dir: str) -> bool:
+    ok = True
+    docs_dir = os.path.join(build_dir, "docs")
+
+    # Collect all reference pages (.html files).
+    ref_pages = set()
+    for root, _dirs, files in os.walk(reference_dir):
+        rel_dir = os.path.relpath(root, reference_dir)
+        if rel_dir == ".":
+            rel_dir = ""
+        for file in files:
+            if file.endswith(".html"):
+                rel_file = os.path.join(rel_dir, file) if rel_dir else file
+                ref_pages.add(rel_file)
+
+    # Collect all redirect pages (.html files).
+    redirect_pages = set()
+    for root, _dirs, files in os.walk(docs_dir):
+        rel_dir = os.path.relpath(root, docs_dir)
+        if rel_dir == ".":
+            rel_dir = ""
+        for file in files:
+            if file.endswith(".html"):
+                rel_file = os.path.join(rel_dir, file) if rel_dir else file
+                redirect_pages.add(rel_file)
+
+    # Check every reference page has a redirect.
+    for page in sorted(ref_pages):
+        if page not in redirect_pages:
+            print(f"Missing redirect: docs/{page}")
+            ok = False
+
+    # Check no orphaned redirects.
+    for page in sorted(redirect_pages):
+        if page not in ref_pages:
+            print(f"Orphaned redirect: docs/{page} (no matching reference page)")
+            ok = False
+
+    # Check redirect targets exist.
+    for page in sorted(ref_pages & redirect_pages):
+        redirect_file = os.path.join(docs_dir, page)
+        target = _extract_redirect_target(redirect_file)
+        if target is None:
+            print(f"Broken redirect (no meta refresh): docs/{page}")
+            ok = False
+            continue
+        resolved = os.path.normpath(os.path.join(os.path.dirname(redirect_file), target))
+        if not os.path.isfile(resolved):
+            print(f"Broken redirect (target not found): docs/{page} -> {target}")
+            ok = False
+
+    return ok
+
+
+def _extract_redirect_target(filepath: str) -> str | None:
+    try:
+        with open(filepath) as f:
+            content = f.read()
+    except OSError:
+        return None
+    match = _META_REFRESH_RE.search(content)
+    if match is None:
+        return None
+    return match.group(1)
+
+
+# ── cli ─────────────────────────────────────────────────────────────────────────────
+
+
+if __name__ == "__main__":
+    if "--check" in sys.argv:
+        sys.argv.remove("--check")
+        if len(sys.argv) > 1:
+            build_dir = sys.argv[1]
+        elif os.path.isdir(os.path.join("build", "dirhtml")):
+            build_dir = os.path.join("build", "dirhtml")
+        else:
+            build_dir = os.path.join("build", "html")
+        ok = verify_redirects(build_dir)
+        sys.exit(0 if ok else 1)
+
+    if len(sys.argv) > 1:
+        build_dir = sys.argv[1]
+    elif os.path.isdir(os.path.join("build", "dirhtml")):
+        build_dir = os.path.join("build", "dirhtml")
+    else:
+        build_dir = os.path.join("build", "html")
+    generate_redirects(build_dir)
diff --git a/docs/source/examples/amp.rst b/docs/source/examples/amp.rst
index a5b0fc389..3df0ecbd3 100644
--- a/docs/source/examples/amp.rst
+++ b/docs/source/examples/amp.rst
@@ -62,4 +62,4 @@ following example shows the resulting code for a multi-task learning use-case.
     behavior of PyTorch, that would also compute all gradients in ``float32`` type.
 
 .. note::
-    :doc:`torchjd.backward <../docs/autojac/backward>` can be similarly combined with AMP.
+    :doc:`torchjd.backward <../reference/autojac/backward>` can be similarly combined with AMP.
diff --git a/docs/source/examples/basic_usage.rst b/docs/source/examples/basic_usage.rst
index 1d82d1d63..e7d0a5ac7 100644
--- a/docs/source/examples/basic_usage.rst
+++ b/docs/source/examples/basic_usage.rst
@@ -5,7 +5,7 @@ This example shows how to use TorchJD to perform an iteration of Jacobian descen
 model with two objectives. In this example, a batch of inputs is forwarded through the model and two
 corresponding batches of labels are used to compute two losses. These losses are then backwarded
 through the model. The obtained Jacobian matrix, consisting of the gradients of the two losses with
-respect to the parameters, is then aggregated using :doc:`UPGrad <../docs/aggregation/upgrad>`, and
+respect to the parameters, is then aggregated using :doc:`UPGrad <../reference/aggregation/upgrad>`, and
 the parameters are updated using the resulting aggregation.
 
 
@@ -35,7 +35,7 @@ Define the aggregator that will be used to combine the Jacobian matrix:
 
     aggregator = UPGrad()
 
-In essence, :doc:`UPGrad <../docs/aggregation/upgrad>` projects each gradient onto the dual cone of
+In essence, :doc:`UPGrad <../reference/aggregation/upgrad>` projects each gradient onto the dual cone of
 the rows of the Jacobian and averages the results. This ensures that locally, no loss will be
 negatively affected by the update.
 
diff --git a/docs/source/examples/index.rst b/docs/source/examples/index.rst
index 603c8a3f1..91b0f4b06 100644
--- a/docs/source/examples/index.rst
+++ b/docs/source/examples/index.rst
@@ -4,8 +4,8 @@ Examples
 This section contains some usage examples for TorchJD.
 
 - :doc:`Basic Usage <basic_usage>` provides a toy example using :doc:`torchjd.backward
-  <../docs/autojac/backward>` to make a step of Jacobian descent with the :doc:`UPGrad
-  <../docs/aggregation/upgrad>` aggregator.
+  <../reference/autojac/backward>` to make a step of Jacobian descent with the :doc:`UPGrad
+  <../reference/aggregation/upgrad>` aggregator.
 - :doc:`Instance-Wise Risk Minimization (IWRM) <iwrm>` provides an example in which we minimize the
   vector of per-instance losses, using stochastic sub-Jacobian descent (SSJD). It is compared to the
   usual minimization of the average loss, called empirical risk minimization (ERM), using stochastic
@@ -17,10 +17,10 @@ This section contains some usage examples for TorchJD.
   aggregation precision.
 - :doc:`Multi-Task Learning (MTL) <mtl>` provides an example of multi-task learning where Jacobian
   descent is used to optimize the vector of per-task losses of a multi-task model, using the
-  dedicated backpropagation function :doc:`mtl_backward <../docs/autojac/mtl_backward>`.
+  dedicated backpropagation function :doc:`mtl_backward <../reference/autojac/mtl_backward>`.
 - :doc:`Instance-Wise Multi-Task Learning (IWMTL) <iwmtl>` shows how to combine multi-task learning
   with instance-wise risk minimization: one loss per task and per element of the batch, using the
-  :doc:`autogram.Engine <../docs/autogram/engine>`.
+  :doc:`autogram.Engine <../reference/autogram/engine>`.
 - :doc:`Recurrent Neural Network (RNN) <rnn>` shows how to apply Jacobian descent to RNN training,
   with one loss per output sequence element.
 - :doc:`Monitoring Aggregations <monitoring>` shows how to monitor the aggregation performed by the
diff --git a/docs/source/examples/iwmtl.rst b/docs/source/examples/iwmtl.rst
index dd76f3dbb..7d9213b7e 100644
--- a/docs/source/examples/iwmtl.rst
+++ b/docs/source/examples/iwmtl.rst
@@ -3,7 +3,7 @@ Instance-Wise Multi-Task Learning (IWMTL)
 
 When training a model with multiple tasks, the gradients of the individual tasks are likely to
 conflict. This is particularly true when looking at the individual (per-sample) gradients.
-The :doc:`autogram engine <../docs/autogram/engine>` can be used to efficiently compute the Gramian
+The :doc:`autogram engine <../reference/autogram/engine>` can be used to efficiently compute the Gramian
 of the Jacobian of the matrix of per-sample and per-task losses. Weights can then be extracted from
 this Gramian to reweight the gradients and resolve conflict entirely.
 
diff --git a/docs/source/examples/iwrm.rst b/docs/source/examples/iwrm.rst
index 4d553f435..8c91f22ee 100644
--- a/docs/source/examples/iwrm.rst
+++ b/docs/source/examples/iwrm.rst
@@ -4,7 +4,7 @@ Instance-Wise Risk Minimization (IWRM)
 This example shows how to use TorchJD to minimize the vector of per-instance losses. This learning
 paradigm, called IWRM, is multi-objective, as opposed to the usual empirical risk minimization
 (ERM), which seeks to minimize the average loss. While a step of ERM may increase the loss of some
-samples of the batch, a step of IWRM using :doc:`UPGrad <../docs/aggregation/upgrad>` guarantees
+samples of the batch, a step of IWRM using :doc:`UPGrad <../reference/aggregation/upgrad>` guarantees
 that no loss from the batch is increased (given a sufficiently small learning rate).
 
 .. hint::
@@ -12,31 +12,31 @@ that no loss from the batch is increased (given a sufficiently small learning ra
     available in `Jacobian Descent For Multi-Objective Optimization
     <https://arxiv.org/pdf/2406.16232>`_.
 
-TorchJD offers two methods to perform IWRM. The :doc:`autojac <../docs/autojac/index>` engine
+TorchJD offers two methods to perform IWRM. The :doc:`autojac <../reference/autojac/index>` engine
 backpropagates the Jacobian of each sample's loss. It uses an
-:doc:`Aggregator <../docs/aggregation/index>` to combine the rows of this Jacobian to fill the
+:doc:`Aggregator <../reference/aggregation/index>` to combine the rows of this Jacobian to fill the
 ``.grad`` fields of the model's parameters. Because it has to store the full Jacobian, this approach
 uses a lot of memory.
 
-The recommended approach, called the :doc:`autogram engine <../docs/autogram/engine>`, works by
+The recommended approach, called the :doc:`autogram engine <../reference/autogram/engine>`, works by
 backpropagating the Gramian of the Jacobian of each sample's loss with respect to the model's
 parameters. This method is more memory-efficient and generally much faster because it avoids
 storing the full Jacobians. A vector of weights is then computed by applying a
-:doc:`Weighting <../docs/aggregation/index>` to the obtained Gramian, and a normal step of gradient
+:doc:`Weighting <../reference/aggregation/index>` to the obtained Gramian, and a normal step of gradient
 descent is then done on the weighted sum of the losses.
 
 Both approaches (autojac and autogram) are mathematically equivalent, and should thus give the same
 results up to small numerical differences. Even though the autogram engine is generally much faster
 than the autojac engine, there are some layers that are incompatible with it. These limitations are
-documented :doc:`here <../docs/autogram/engine>`.
+documented :doc:`here <../reference/autogram/engine>`.
 
 For the sake of the example, we generate a fake dataset consisting of 8 batches of 16 random input
 vectors of dimension 10, and their corresponding scalar labels. We train a very simple regression
 model to retrieve the label from the corresponding input. To minimize the average loss (ERM), we use
 stochastic gradient descent (SGD), where each gradient is computed from the average loss over a
 batch of data. When minimizing per-instance losses (IWRM), we use either autojac, with
-:doc:`UPGrad <../docs/aggregation/upgrad>` to aggregate the Jacobian, or autogram, with
-:doc:`UPGradWeighting <../docs/aggregation/upgrad>` to extract weights from the Gramian.
+:doc:`UPGrad <../reference/aggregation/upgrad>` to aggregate the Jacobian, or autogram, with
+:doc:`UPGradWeighting <../reference/aggregation/upgrad>` to extract weights from the Gramian.
 
 .. tab-set::
     .. tab-item:: autograd (baseline)
diff --git a/docs/source/examples/lightning_integration.rst b/docs/source/examples/lightning_integration.rst
index 115f42268..319a54574 100644
--- a/docs/source/examples/lightning_integration.rst
+++ b/docs/source/examples/lightning_integration.rst
@@ -4,11 +4,11 @@ PyTorch Lightning Integration
 To use Jacobian descent with TorchJD in a :class:`~lightning.pytorch.core.LightningModule`, you need
 to turn off automatic optimization by setting ``automatic_optimization`` to ``False`` and to
 customize the ``training_step`` method to make it call the appropriate TorchJD method
-(:doc:`backward <../docs/autojac/backward>` or :doc:`mtl_backward <../docs/autojac/mtl_backward>`).
+(:doc:`backward <../reference/autojac/backward>` or :doc:`mtl_backward <../reference/autojac/mtl_backward>`).
 
 The following code example demonstrates a basic multi-task learning setup using a
 :class:`~lightning.pytorch.core.LightningModule` that will call :doc:`mtl_backward
-<../docs/autojac/mtl_backward>` at each training iteration.
+<../reference/autojac/mtl_backward>` at each training iteration.
 
 .. testsetup::
 
diff --git a/docs/source/examples/monitoring.rst b/docs/source/examples/monitoring.rst
index 0570c9f10..cae38af54 100644
--- a/docs/source/examples/monitoring.rst
+++ b/docs/source/examples/monitoring.rst
@@ -1,17 +1,17 @@
 Monitoring aggregations
 =======================
 
-The :doc:`Aggregator <../docs/aggregation/index>` class is a subclass of :class:`torch.nn.Module`.
+The :doc:`Aggregator <../reference/aggregation/index>` class is a subclass of :class:`torch.nn.Module`.
 This allows registering hooks, which can be used to monitor some information about aggregations.
 The following code example demonstrates registering a hook to compute and print the cosine
-similarity between the aggregation performed by :doc:`UPGrad <../docs/aggregation/upgrad>` and the
+similarity between the aggregation performed by :doc:`UPGrad <../reference/aggregation/upgrad>` and the
 average of the gradients, and another hook to compute and print the weights of the weighting of
-:doc:`UPGrad <../docs/aggregation/upgrad>`.
+:doc:`UPGrad <../reference/aggregation/upgrad>`.
 
 Updating the parameters of the model with the average gradient is equivalent to using gradient
 descent on the average of the losses. Observing a cosine similarity smaller than 1 means that
 Jacobian descent is doing something different than gradient descent. With
-:doc:`UPGrad <../docs/aggregation/upgrad>`, this happens when the original gradients conflict (i.e.
+:doc:`UPGrad <../reference/aggregation/upgrad>`, this happens when the original gradients conflict (i.e.
 they have a negative inner product).
 
 .. testsetup::
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 20d0b6db8..6df77372f 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -28,10 +28,10 @@ optimization. To get started, check out our :doc:`basic usage example
 Gradient descent relies on gradients to optimize a single objective. Jacobian descent takes this
 idea a step further, using the Jacobian to optimize multiple objectives. An important component of
 Jacobian descent is the aggregator, which maps the Jacobian to an optimization step. In the page
-:doc:`Aggregation <docs/aggregation/index>`, we provide an overview of the various aggregators
+:doc:`Aggregation <reference/aggregation/index>`, we provide an overview of the various aggregators
 available in TorchJD, and their corresponding weightings.
 
-For comparison against simple baselines, the :doc:`Scalarization <docs/scalarization/index>`
+For comparison against simple baselines, the :doc:`Scalarization <reference/scalarization/index>`
 package provides scalarizers that combine a tensor of losses into a single scalar loss, allowing
 standard gradient descent to be used.
 
@@ -50,7 +50,7 @@ the gradient of the obtained weighted loss. The iterative computation of the Gra
 Algorithm 3 of
 `Jacobian Descent For Multi-Objective Optimization <https://arxiv.org/pdf/2406.16232>`_. The
 documentation and usage example of this algorithm is provided in
-:doc:`autogram.Engine <docs/autogram/engine>`.
+:doc:`autogram.Engine <reference/autogram/engine>`.
 
 The original usage of the autogram engine is to compute the Gramian of the Jacobian very efficiently
 for :doc:`IWRM <examples/iwrm>`. Another direct application is when considering one loss per element
@@ -71,8 +71,8 @@ TorchJD is open-source, under MIT License. The source code is available on
     :caption: API Reference
     :hidden:
 
-    docs/autogram/index.rst
-    docs/autojac/index.rst
-    docs/aggregation/index.rst
-    docs/scalarization/index.rst
-    docs/linalg/index.rst
+    reference/autogram/index.rst
+    reference/autojac/index.rst
+    reference/aggregation/index.rst
+    reference/scalarization/index.rst
+    reference/linalg/index.rst

From 3fb9cbbcf3de7e7dc1e13ce1a2aea43c010622ab Mon Sep 17 00:00:00 2001
From: Pierre Quinton <pierre.quinton@gmail.com>
Date: Sat, 13 Jun 2026 10:38:15 +0200
Subject: [PATCH 3/3] Split `examples` into `how_to` and `tutorials`

---
 README.md                                     |   6 +-
 docs/_redirects.py                            | 232 ++++++++++++++++--
 docs/source/examples/index.rst                |  48 ----
 .../{examples => how_to}/basic_usage.rst      |   0
 docs/source/how_to/index.rst                  |  13 +
 docs/source/index.rst                         |  15 +-
 docs/source/{examples => tutorials}/amp.rst   |   0
 .../{examples => tutorials}/grouping.rst      |   0
 docs/source/tutorials/index.rst               |  41 ++++
 docs/source/{examples => tutorials}/iwmtl.rst |   0
 docs/source/{examples => tutorials}/iwrm.rst  |   0
 .../lightning_integration.rst                 |   0
 .../{examples => tutorials}/monitoring.rst    |   0
 docs/source/{examples => tutorials}/mtl.rst   |   0
 .../{examples => tutorials}/partial_jd.rst    |   0
 docs/source/{examples => tutorials}/rnn.rst   |   0
 src/torchjd/aggregation/_gradvac.py           |   2 +-
 src/torchjd/autogram/_engine.py               |   2 +-
 src/torchjd/autojac/_mtl_backward.py          |   2 +-
 19 files changed, 285 insertions(+), 76 deletions(-)
 delete mode 100644 docs/source/examples/index.rst
 rename docs/source/{examples => how_to}/basic_usage.rst (100%)
 create mode 100644 docs/source/how_to/index.rst
 rename docs/source/{examples => tutorials}/amp.rst (100%)
 rename docs/source/{examples => tutorials}/grouping.rst (100%)
 create mode 100644 docs/source/tutorials/index.rst
 rename docs/source/{examples => tutorials}/iwmtl.rst (100%)
 rename docs/source/{examples => tutorials}/iwrm.rst (100%)
 rename docs/source/{examples => tutorials}/lightning_integration.rst (100%)
 rename docs/source/{examples => tutorials}/monitoring.rst (100%)
 rename docs/source/{examples => tutorials}/mtl.rst (100%)
 rename docs/source/{examples => tutorials}/partial_jd.rst (100%)
 rename docs/source/{examples => tutorials}/rnn.rst (100%)

diff --git a/README.md b/README.md
index d870de1ac..71e176ac4 100644
--- a/README.md
+++ b/README.md
@@ -83,7 +83,7 @@ store it in the `.jac` fields of the model parameters. You then have to call
 this Jacobian using the specified
 [`Aggregator`](https://torchjd.org/stable/reference/aggregation#torchjd.aggregation.Aggregator), and to
 store the result into the `.grad` fields of the model parameters. See this
-[usage example](https://torchjd.org/stable/examples/basic_usage/) for more details.
+[usage example](https://torchjd.org/stable/how_to/basic_usage/) for more details.
 
 #### 2. `mtl_backward` + `jac_to_grad`
 In the case of multi-task learning, an alternative to
@@ -214,9 +214,9 @@ losses with Jacobian descent using [UPGrad](https://torchjd.org/stable/reference
 ```
 
 You can even go one step further by considering the multiple tasks and each element of the batch
-independently (Instance-Wise Multitask Learning). See [this example](https://torchjd.org/stable/examples/iwmtl/) for more details.
+independently (Instance-Wise Multitask Learning). See [this example](https://torchjd.org/stable/tutorials/iwmtl/) for more details.
 
-More usage examples can be found [here](https://torchjd.org/stable/examples/).
+More usage examples can be found [here](https://torchjd.org/stable/tutorials/).
 
 ## Supported Aggregators and Weightings
 TorchJD provides many existing aggregators from the literature, listed in the following table.
diff --git a/docs/_redirects.py b/docs/_redirects.py
index f1f4eabdf..85f0bc8a2 100644
--- a/docs/_redirects.py
+++ b/docs/_redirects.py
@@ -5,11 +5,15 @@
 
 def generate_redirects(build_dir: str) -> None:
     """
-    Generates HTML redirect pages from the old ``docs/`` paths to the new ``reference/`` paths.
+    Generates HTML redirect pages from the old ``docs/`` paths to the new ``reference/`` paths,
+    and from the old ``examples/`` paths to the new ``how_to/`` and ``tutorials/`` paths.
 
     For each page found under ``<build_dir>/reference/``, this creates a corresponding redirect
     HTML file under ``<build_dir>/docs/`` that redirects browser visits to the new location.
 
+    For each page found under ``<build_dir>/how_to/`` or ``<build_dir>/tutorials/``, this creates
+    a corresponding redirect HTML file under ``<build_dir>/examples/``.
+
     Supports both the ``dirhtml`` and ``html`` Sphinx builders.
 
     :param build_dir: Path to the build output directory (e.g. ``build/dirhtml`` or ``build/html``).
@@ -17,13 +21,14 @@ def generate_redirects(build_dir: str) -> None:
 
     reference_dir = os.path.join(build_dir, "reference")
     if not os.path.isdir(reference_dir):
-        print(f"No reference directory found at {reference_dir}, skipping redirects.")
-        return
-
-    if _is_dirhtml_builder(reference_dir):
-        _generate_redirects_dirhtml(build_dir, reference_dir)
+        print(f"No reference directory found at {reference_dir}, skipping reference redirects.")
     else:
-        _generate_redirects_html(build_dir, reference_dir)
+        if _is_dirhtml_builder(build_dir):
+            _generate_redirects_dirhtml(build_dir, reference_dir)
+        else:
+            _generate_redirects_html(build_dir, reference_dir)
+
+    _generate_examples_redirects(build_dir)
 
 
 def verify_redirects(build_dir: str) -> bool:
@@ -32,8 +37,9 @@ def verify_redirects(build_dir: str) -> bool:
 
     Checks:
     - Every ``reference/`` page has a corresponding redirect in ``docs/``.
+    - Every ``how_to/`` and ``tutorials/`` page has a corresponding redirect in ``examples/``.
     - Every redirect target resolves to an existing file on disk.
-    - No orphaned redirects (redirect files with no matching ``reference/`` page).
+    - No orphaned redirects (redirect files with no matching source page).
 
     :param build_dir: Path to the build output directory.
     :returns: ``True`` if all checks pass, ``False`` otherwise.
@@ -44,14 +50,21 @@ def verify_redirects(build_dir: str) -> bool:
         print(f"Error: no reference directory found at {reference_dir}")
         return False
 
-    if _is_dirhtml_builder(reference_dir):
-        return _verify_redirects_dirhtml(build_dir, reference_dir)
-    return _verify_redirects_html(build_dir, reference_dir)
+    if _is_dirhtml_builder(build_dir):
+        ok_ref = _verify_redirects_dirhtml(build_dir, reference_dir)
+    else:
+        ok_ref = _verify_redirects_html(build_dir, reference_dir)
+
+    ok_examples = _verify_examples_redirects(build_dir)
+    return ok_ref and ok_examples
 
 
-def _is_dirhtml_builder(reference_dir: str) -> bool:
-    upgrad_path = os.path.join(reference_dir, "aggregation", "upgrad")
-    return os.path.isdir(upgrad_path)
+def _is_dirhtml_builder(build_dir: str) -> bool:
+    upgrad_path = os.path.join(build_dir, "reference", "aggregation", "upgrad")
+    if os.path.isdir(upgrad_path):
+        return True
+    how_to_path = os.path.join(build_dir, "how_to", "basic_usage")
+    return os.path.isdir(how_to_path)
 
 
 # ── generation ────────────────────────────────────────────────────────────────────
@@ -118,7 +131,196 @@ def _write_redirect_file(filepath: str, target: str) -> None:
         f.write(content)
 
 
-# ── verification ───────────────────────────────────────────────────────────────────
+# ── examples redirects ──────────────────────────────────────────────────────────────
+
+_EXAMPLES_REDIRECTS = {"basic_usage": "how_to"}
+
+
+def _target_category(rel_path: str) -> str:
+    """Returns 'how_to' for basic_usage, 'tutorials' for everything else."""
+    base = rel_path.rstrip("/").split("/")[0]
+    return _EXAMPLES_REDIRECTS.get(base, "tutorials")
+
+
+def _generate_examples_redirects(build_dir: str) -> None:
+    how_to_dir = os.path.join(build_dir, "how_to")
+    tutorials_dir = os.path.join(build_dir, "tutorials")
+
+    if _is_dirhtml_builder(build_dir):
+        _generate_examples_redirects_dirhtml(build_dir, how_to_dir, tutorials_dir)
+    else:
+        _generate_examples_redirects_html(build_dir, how_to_dir, tutorials_dir)
+
+
+def _generate_examples_redirects_dirhtml(
+    build_dir: str, how_to_dir: str, tutorials_dir: str
+) -> None:
+    for src_dir in (how_to_dir, tutorials_dir):
+        if not os.path.isdir(src_dir):
+            continue
+        for root, _dirs, _files in os.walk(src_dir):
+            rel_path = os.path.relpath(root, src_dir)
+            if rel_path == ".":
+                rel_path = ""
+            category = "how_to" if src_dir == how_to_dir else "tutorials"
+            depth = rel_path.count(os.sep) + int(rel_path != "")
+            relative_prefix = os.sep.join([".."] * (depth + 1))
+            if rel_path:
+                redirect_target = f"{relative_prefix}/{category}/{rel_path}/"
+            else:
+                redirect_target = f"{relative_prefix}/{category}/"
+
+            dest_dir = os.path.join(build_dir, "examples", rel_path)
+            os.makedirs(dest_dir, exist_ok=True)
+            _write_redirect_file(os.path.join(dest_dir, "index.html"), redirect_target)
+            print(f"Redirect: examples/{rel_path}/ -> {category}/{rel_path}/")
+
+
+def _generate_examples_redirects_html(build_dir: str, how_to_dir: str, tutorials_dir: str) -> None:
+    for src_dir in (how_to_dir, tutorials_dir):
+        if not os.path.isdir(src_dir):
+            continue
+        for root, _dirs, files in os.walk(src_dir):
+            rel_dir = os.path.relpath(root, src_dir)
+            if rel_dir == ".":
+                rel_dir = ""
+            category = "how_to" if src_dir == how_to_dir else "tutorials"
+            for file in files:
+                if not file.endswith(".html"):
+                    continue
+                rel_file = os.path.join(rel_dir, file) if rel_dir else file
+
+                depth = rel_file.count(os.sep) + 1
+                relative_prefix = os.sep.join([".."] * depth)
+                redirect_target = f"{relative_prefix}/{category}/{rel_file}"
+
+                dest_file = os.path.join(build_dir, "examples", rel_file)
+                os.makedirs(os.path.dirname(dest_file), exist_ok=True)
+                _write_redirect_file(dest_file, redirect_target)
+                print(f"Redirect: examples/{rel_file} -> {category}/{rel_file}")
+
+
+def _verify_examples_redirects(build_dir: str) -> bool:
+    how_to_dir = os.path.join(build_dir, "how_to")
+    tutorials_dir = os.path.join(build_dir, "tutorials")
+
+    if _is_dirhtml_builder(build_dir):
+        return _verify_examples_redirects_dirhtml(build_dir, how_to_dir, tutorials_dir)
+    return _verify_examples_redirects_html(build_dir, how_to_dir, tutorials_dir)
+
+
+def _verify_examples_redirects_dirhtml(build_dir: str, how_to_dir: str, tutorials_dir: str) -> bool:
+    ok = True
+    examples_dir = os.path.join(build_dir, "examples")
+
+    # Collect expected target pages from how_to/ and tutorials/.
+    target_pages = set()
+    for src_dir, category in [(how_to_dir, "how_to"), (tutorials_dir, "tutorials")]:
+        if not os.path.isdir(src_dir):
+            continue
+        for root, _dirs, _files in os.walk(src_dir):
+            rel_path = os.path.relpath(root, src_dir)
+            if rel_path == ".":
+                rel_path = ""
+            target_pages.add((rel_path, category))
+
+    # Collect existing redirect pages.
+    redirect_pages = set()
+    if os.path.isdir(examples_dir):
+        for root, _dirs, files in os.walk(examples_dir):
+            if "index.html" in files:
+                rel_path = os.path.relpath(root, examples_dir)
+                if rel_path == ".":
+                    rel_path = ""
+                redirect_pages.add(rel_path)
+
+    # Check every target has a redirect.
+    for page, category in sorted(target_pages):
+        if page not in redirect_pages:
+            print(f"Missing redirect: examples/{page}/ -> {category}/{page}/")
+            ok = False
+
+    # Check no orphaned redirects.
+    for page in sorted(redirect_pages):
+        if page not in {p for p, _ in target_pages}:
+            print(f"Orphaned redirect: examples/{page}/ (no matching how_to/ or tutorials/ page)")
+            ok = False
+
+    # Check redirect targets exist.
+    for page, _category in target_pages:
+        if page in redirect_pages:
+            redirect_file = os.path.join(examples_dir, page, "index.html")
+            target = _extract_redirect_target(redirect_file)
+            if target is None:
+                print(f"Broken redirect (no meta refresh): examples/{page}/index.html")
+                ok = False
+                continue
+            resolved = os.path.normpath(os.path.join(os.path.dirname(redirect_file), target))
+            if not os.path.isdir(resolved):
+                print(f"Broken redirect (target directory not found): examples/{page}/ -> {target}")
+                ok = False
+
+    return ok
+
+
+def _verify_examples_redirects_html(build_dir: str, how_to_dir: str, tutorials_dir: str) -> bool:
+    ok = True
+    examples_dir = os.path.join(build_dir, "examples")
+
+    # Collect expected target pages from how_to/ and tutorials/.
+    target_pages = set()
+    for src_dir, category in [(how_to_dir, "how_to"), (tutorials_dir, "tutorials")]:
+        if not os.path.isdir(src_dir):
+            continue
+        for root, _dirs, files in os.walk(src_dir):
+            rel_dir = os.path.relpath(root, src_dir)
+            if rel_dir == ".":
+                rel_dir = ""
+            for file in files:
+                if file.endswith(".html"):
+                    rel_file = os.path.join(rel_dir, file) if rel_dir else file
+                    target_pages.add((rel_file, category))
+
+    # Collect existing redirect pages.
+    redirect_pages = set()
+    if os.path.isdir(examples_dir):
+        for root, _dirs, files in os.walk(examples_dir):
+            rel_dir = os.path.relpath(root, examples_dir)
+            if rel_dir == ".":
+                rel_dir = ""
+            for file in files:
+                if file.endswith(".html"):
+                    rel_file = os.path.join(rel_dir, file) if rel_dir else file
+                    redirect_pages.add(rel_file)
+
+    # Check every target has a redirect.
+    for page, category in sorted(target_pages):
+        if page not in redirect_pages:
+            print(f"Missing redirect: examples/{page} -> {category}/{page}")
+            ok = False
+
+    # Check no orphaned redirects.
+    for page in sorted(redirect_pages):
+        if page not in {p for p, _ in target_pages}:
+            print(f"Orphaned redirect: examples/{page} (no matching how_to/ or tutorials/ page)")
+            ok = False
+
+    # Check redirect targets exist.
+    for page, _category in target_pages:
+        if page in redirect_pages:
+            redirect_file = os.path.join(examples_dir, page)
+            target = _extract_redirect_target(redirect_file)
+            if target is None:
+                print(f"Broken redirect (no meta refresh): examples/{page}")
+                ok = False
+                continue
+            resolved = os.path.normpath(os.path.join(os.path.dirname(redirect_file), target))
+            if not os.path.isfile(resolved):
+                print(f"Broken redirect (target not found): examples/{page} -> {target}")
+                ok = False
+
+    return ok
+
 
 _META_REFRESH_RE = re.compile(r'<meta\s+http-equiv="refresh"\s+content="0;\s*url=([^"]+)"')
 
diff --git a/docs/source/examples/index.rst b/docs/source/examples/index.rst
deleted file mode 100644
index 91b0f4b06..000000000
--- a/docs/source/examples/index.rst
+++ /dev/null
@@ -1,48 +0,0 @@
-Examples
-========
-
-This section contains some usage examples for TorchJD.
-
-- :doc:`Basic Usage <basic_usage>` provides a toy example using :doc:`torchjd.backward
-  <../reference/autojac/backward>` to make a step of Jacobian descent with the :doc:`UPGrad
-  <../reference/aggregation/upgrad>` aggregator.
-- :doc:`Instance-Wise Risk Minimization (IWRM) <iwrm>` provides an example in which we minimize the
-  vector of per-instance losses, using stochastic sub-Jacobian descent (SSJD). It is compared to the
-  usual minimization of the average loss, called empirical risk minimization (ERM), using stochastic
-  gradient descent (SGD).
-- :doc:`Partial Jacobian Descent for IWRM <partial_jd>` provides an example in which we minimize the
-  vector of per-instance losses using stochastic sub-Jacobian descent, similar to our :doc:`IWRM <iwrm>`
-  example. However, this method bases the aggregation decision on the Jacobian of the losses with respect
-  to **only a subset** of the model's parameters, offering a trade-off between computational cost and
-  aggregation precision.
-- :doc:`Multi-Task Learning (MTL) <mtl>` provides an example of multi-task learning where Jacobian
-  descent is used to optimize the vector of per-task losses of a multi-task model, using the
-  dedicated backpropagation function :doc:`mtl_backward <../reference/autojac/mtl_backward>`.
-- :doc:`Instance-Wise Multi-Task Learning (IWMTL) <iwmtl>` shows how to combine multi-task learning
-  with instance-wise risk minimization: one loss per task and per element of the batch, using the
-  :doc:`autogram.Engine <../reference/autogram/engine>`.
-- :doc:`Recurrent Neural Network (RNN) <rnn>` shows how to apply Jacobian descent to RNN training,
-  with one loss per output sequence element.
-- :doc:`Monitoring Aggregations <monitoring>` shows how to monitor the aggregation performed by the
-  aggregator, to check if Jacobian descent is prescribed for your use-case.
-- :doc:`PyTorch Lightning Integration <lightning_integration>` showcases how to combine
-  TorchJD with PyTorch Lightning, by providing an example implementation of a multi-task
-  ``LightningModule`` optimized by Jacobian descent.
-- :doc:`Grouping <grouping>` shows how to apply an aggregator independently per parameter group
-  (e.g. per layer), so that conflict resolution happens at a finer granularity than the full
-  parameter vector.
-- :doc:`Automatic Mixed Precision <amp>` shows how to combine mixed precision training with TorchJD.
-
-.. toctree::
-    :hidden:
-
-    basic_usage.rst
-    iwrm.rst
-    partial_jd.rst
-    mtl.rst
-    iwmtl.rst
-    rnn.rst
-    monitoring.rst
-    lightning_integration.rst
-    amp.rst
-    grouping.rst
diff --git a/docs/source/examples/basic_usage.rst b/docs/source/how_to/basic_usage.rst
similarity index 100%
rename from docs/source/examples/basic_usage.rst
rename to docs/source/how_to/basic_usage.rst
diff --git a/docs/source/how_to/index.rst b/docs/source/how_to/index.rst
new file mode 100644
index 000000000..4e8e6924d
--- /dev/null
+++ b/docs/source/how_to/index.rst
@@ -0,0 +1,13 @@
+How-To Guides
+=============
+
+This section provides step-by-step instructions for common tasks with TorchJD.
+
+- :doc:`Basic Usage <basic_usage>` shows how to use TorchJD to perform an iteration of
+  Jacobian descent on a regression model with two objectives. This is the recommended starting
+  point for new users.
+
+.. toctree::
+    :hidden:
+
+    basic_usage.rst
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 6df77372f..19372e500 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -22,8 +22,8 @@ objectives. It is based on the theory from `Jacobian Descent For Multi-Objective
 
 The main purpose is to jointly optimize multiple objectives without combining them into a single
 scalar loss. When the objectives are conflicting, this can be the key to a successful and stable
-optimization. To get started, check out our :doc:`basic usage example
-<examples/basic_usage>`.
+optimization. To get started, check out our :doc:`basic usage guide
+<how_to/basic_usage>`.
 
 Gradient descent relies on gradients to optimize a single objective. Jacobian descent takes this
 idea a step further, using the Jacobian to optimize multiple objectives. An important component of
@@ -37,10 +37,10 @@ standard gradient descent to be used.
 
 A straightforward application of Jacobian descent is multi-task learning, in which the vector of
 per-task losses has to be minimized. To start using TorchJD for multi-task learning, follow our
-:doc:`MTL example <examples/mtl>`.
+:doc:`MTL tutorial <tutorials/mtl>`.
 
 Another more interesting application is to consider separately the loss of each element in the
-batch. This is what we define as :doc:`Instance-Wise Risk Minimization <examples/iwrm>` (IWRM).
+batch. This is what we define as :doc:`Instance-Wise Risk Minimization <tutorials/iwrm>` (IWRM).
 
 The Gramian-based Jacobian descent algorithm provides a very efficient alternative way of
 performing Jacobian descent. It consists in computing
@@ -53,9 +53,9 @@ documentation and usage example of this algorithm is provided in
 :doc:`autogram.Engine <reference/autogram/engine>`.
 
 The original usage of the autogram engine is to compute the Gramian of the Jacobian very efficiently
-for :doc:`IWRM <examples/iwrm>`. Another direct application is when considering one loss per element
+for :doc:`IWRM <tutorials/iwrm>`. Another direct application is when considering one loss per element
 of the batch and per task, in the context of multi-task learning. We call this
-:doc:`Instance-Wise Risk Multi-Task Learning <examples/iwmtl>` (IWMTL).
+:doc:`Instance-Wise Risk Multi-Task Learning <tutorials/iwmtl>` (IWMTL).
 
 TorchJD is open-source, under MIT License. The source code is available on
 `GitHub <https://github.com/SimplexLab/TorchJD>`_.
@@ -65,7 +65,8 @@ TorchJD is open-source, under MIT License. The source code is available on
     :hidden:
 
     installation.md
-    examples/index.rst
+    how_to/index.rst
+    tutorials/index.rst
 
 .. toctree::
     :caption: API Reference
diff --git a/docs/source/examples/amp.rst b/docs/source/tutorials/amp.rst
similarity index 100%
rename from docs/source/examples/amp.rst
rename to docs/source/tutorials/amp.rst
diff --git a/docs/source/examples/grouping.rst b/docs/source/tutorials/grouping.rst
similarity index 100%
rename from docs/source/examples/grouping.rst
rename to docs/source/tutorials/grouping.rst
diff --git a/docs/source/tutorials/index.rst b/docs/source/tutorials/index.rst
new file mode 100644
index 000000000..5a43b2b9d
--- /dev/null
+++ b/docs/source/tutorials/index.rst
@@ -0,0 +1,41 @@
+Tutorials
+=========
+
+This section contains tutorials that walk you through more advanced usage patterns of TorchJD.
+
+- :doc:`Instance-Wise Risk Minimization (IWRM) <iwrm>` demonstrates how to minimize the vector of
+  per-instance losses, using stochastic sub-Jacobian descent (SSJD), compared to the usual
+  minimization of the average loss (ERM) with stochastic gradient descent (SGD).
+- :doc:`Partial Jacobian Descent for IWRM <partial_jd>` shows how to base the aggregation decision
+  on the Jacobian of the losses with respect to only a subset of the model's parameters, offering a
+  trade-off between computational cost and aggregation precision.
+- :doc:`Multi-Task Learning (MTL) <mtl>` walks through multi-task learning where Jacobian descent
+  optimizes the vector of per-task losses of a multi-task model, using the dedicated
+  backpropagation function :doc:`mtl_backward <../reference/autojac/mtl_backward>`.
+- :doc:`Instance-Wise Multi-Task Learning (IWMTL) <iwmtl>` shows how to combine multi-task
+  learning with instance-wise risk minimization: one loss per task and per element of the batch,
+  using the :doc:`autogram.Engine <../reference/autogram/engine>`.
+- :doc:`Recurrent Neural Network (RNN) <rnn>` shows how to apply Jacobian descent to RNN training,
+  with one loss per output sequence element.
+- :doc:`Monitoring Aggregations <monitoring>` shows how to monitor the aggregation performed by the
+  aggregator, to check if Jacobian descent is prescribed for your use-case.
+- :doc:`PyTorch Lightning Integration <lightning_integration>` showcases how to combine TorchJD
+  with PyTorch Lightning, by providing an example implementation of a multi-task
+  ``LightningModule`` optimized by Jacobian descent.
+- :doc:`Grouping <grouping>` shows how to apply an aggregator independently per parameter group
+  (e.g. per layer), so that conflict resolution happens at a finer granularity than the full
+  parameter vector.
+- :doc:`Automatic Mixed Precision <amp>` shows how to combine mixed precision training with TorchJD.
+
+.. toctree::
+    :hidden:
+
+    iwrm.rst
+    partial_jd.rst
+    mtl.rst
+    iwmtl.rst
+    rnn.rst
+    monitoring.rst
+    lightning_integration.rst
+    amp.rst
+    grouping.rst
diff --git a/docs/source/examples/iwmtl.rst b/docs/source/tutorials/iwmtl.rst
similarity index 100%
rename from docs/source/examples/iwmtl.rst
rename to docs/source/tutorials/iwmtl.rst
diff --git a/docs/source/examples/iwrm.rst b/docs/source/tutorials/iwrm.rst
similarity index 100%
rename from docs/source/examples/iwrm.rst
rename to docs/source/tutorials/iwrm.rst
diff --git a/docs/source/examples/lightning_integration.rst b/docs/source/tutorials/lightning_integration.rst
similarity index 100%
rename from docs/source/examples/lightning_integration.rst
rename to docs/source/tutorials/lightning_integration.rst
diff --git a/docs/source/examples/monitoring.rst b/docs/source/tutorials/monitoring.rst
similarity index 100%
rename from docs/source/examples/monitoring.rst
rename to docs/source/tutorials/monitoring.rst
diff --git a/docs/source/examples/mtl.rst b/docs/source/tutorials/mtl.rst
similarity index 100%
rename from docs/source/examples/mtl.rst
rename to docs/source/tutorials/mtl.rst
diff --git a/docs/source/examples/partial_jd.rst b/docs/source/tutorials/partial_jd.rst
similarity index 100%
rename from docs/source/examples/partial_jd.rst
rename to docs/source/tutorials/partial_jd.rst
diff --git a/docs/source/examples/rnn.rst b/docs/source/tutorials/rnn.rst
similarity index 100%
rename from docs/source/examples/rnn.rst
rename to docs/source/tutorials/rnn.rst
diff --git a/src/torchjd/aggregation/_gradvac.py b/src/torchjd/aggregation/_gradvac.py
index 54b6d0c3e..a33e754ed 100644
--- a/src/torchjd/aggregation/_gradvac.py
+++ b/src/torchjd/aggregation/_gradvac.py
@@ -159,7 +159,7 @@ class GradVac(GramianWeightedAggregator, Stateful, _NonDifferentiable):
 
     .. note::
         To apply GradVac with the `whole_model`, `enc_dec`, `all_layer` or `all_matrix` grouping
-        strategy, please refer to the :doc:`Grouping </examples/grouping>` examples.
+        strategy, please refer to the :doc:`Grouping </tutorials/grouping>` examples.
     """
 
     gramian_weighting: GradVacWeighting
diff --git a/src/torchjd/autogram/_engine.py b/src/torchjd/autogram/_engine.py
index 8a8633df9..b2b4deb31 100644
--- a/src/torchjd/autogram/_engine.py
+++ b/src/torchjd/autogram/_engine.py
@@ -257,7 +257,7 @@ def compute_gramian(self, output: Tensor, /) -> Tensor:
                   standard setting of Jacobian descent).
                 - Matrix ``output`` of dimension :math:`m_1\times m_2`: :math:`m_1 m_2 \times m_1 m_2`
                   Gramian (this can be used for :doc:`Instance-Wise Multi-Task Learning (IWMTL)
-                  <../../examples/iwmtl>`, as each sample in the batch has one loss per task).
+                  <../../tutorials/iwmtl>`, as each sample in the batch has one loss per task).
         """
 
         if self._batch_dim is not None:
diff --git a/src/torchjd/autojac/_mtl_backward.py b/src/torchjd/autojac/_mtl_backward.py
index cb34bc526..22bfbc8c5 100644
--- a/src/torchjd/autojac/_mtl_backward.py
+++ b/src/torchjd/autojac/_mtl_backward.py
@@ -74,7 +74,7 @@ def mtl_backward(
         Example
 
         A usage example of ``mtl_backward`` is provided in
-        :doc:`Multi-Task Learning (MTL) <../../examples/mtl>`.
+        :doc:`Multi-Task Learning (MTL) <../../tutorials/mtl>`.
 
     .. note::
         ``shared_params`` should contain no parameter in common with ``tasks_params``. The different