tensorflow · tiagofabre · Apr 5, 2026 · Apr 8, 2026
diff --git a/index.html b/index.html
@@ -36,9 +36,11 @@
 
   <meta name="author" content="Daniel Smilkov and Shan Carter">
   <title>A Neural Network Playground</title>
+  <link rel="stylesheet" href="katex.min.css" type="text/css">
   <link rel="stylesheet" href="bundle.css" type="text/css">
   <link href="https://fonts.googleapis.com/css?family=Roboto:300,400,500|Material+Icons" rel="stylesheet" type="text/css">
   <script src="lib.js"></script>
+  <script src="katex.min.js"></script>
 </head>
 <body>
   <!-- GitHub link -->
@@ -142,6 +144,7 @@ <h1 class="l--page">Tinker With a <b>Neural Network</b> <span class="optional">R
 
   <!-- Main Part -->
   <div id="main-part" class="l--page">
+    <div class="main-part-columns">
 
     <!--  Data Column-->
     <div class="column data">
@@ -308,6 +311,35 @@ <h4>Output</h4>
       </div>
     </div>
 
+    </div>
+
+    <div id="nn-equation-panel" class="nn-equation-panel ui-equationPanel">
+      <div class="nn-equation-header">
+        <h5 class="nn-equation-title">Model equations</h5>
+      </div>
+      <div class="nn-equation-block">
+        <h6 class="nn-equation-subtitle">Forward pass</h6>
+        <div id="nn-equation-forward" class="nn-equation"></div>
+        <div id="nn-equation-forward-desc" class="nn-equation-desc" aria-label="Forward pass terms"></div>
+      </div>
+      <div class="nn-equation-block">
+        <h6 class="nn-equation-subtitle">Training objective</h6>
+        <div id="nn-equation-objective" class="nn-equation"></div>
+        <div id="nn-equation-objective-desc" class="nn-equation-desc" aria-label="Training objective terms"></div>
+      </div>
+      <div class="nn-equation-block">
+        <h6 class="nn-equation-subtitle">Backpropagation (per example)</h6>
+        <div id="nn-equation-backprop" class="nn-equation"></div>
+        <div id="nn-equation-backprop-desc" class="nn-equation-desc" aria-label="Backpropagation terms"></div>
+      </div>
+      <div class="nn-equation-block">
+        <h6 class="nn-equation-subtitle">Weight and bias updates (mini-batch)</h6>
+        <div id="nn-equation-update" class="nn-equation"></div>
+        <div id="nn-equation-update-desc" class="nn-equation-desc" aria-label="Weight update terms"></div>
+      </div>
+      <div id="nn-equation-legend" class="nn-equation-legend" aria-label="Equation symbol reference"></div>
+    </div>
+
   </div>
 
   <!-- More -->

diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -6,7 +6,7 @@
   "scripts": {
     "clean": "rimraf dist",
     "start": "npm run serve-watch",
-    "prep": "copyfiles analytics.js dist && concat node_modules/material-design-lite/material.min.js node_modules/seedrandom/seedrandom.min.js > dist/lib.js",
+    "prep": "copyfiles analytics.js dist && copyfiles -u 3 node_modules/katex/dist/katex.min.css dist && copyfiles -u 3 node_modules/katex/dist/katex.min.js dist && copyfiles -u 3 \"node_modules/katex/dist/fonts/*.woff2\" dist && concat node_modules/material-design-lite/material.min.js node_modules/seedrandom/seedrandom.min.js > dist/lib.js",
     "build-css": "concat node_modules/material-design-lite/material.min.css styles.css > dist/bundle.css",
     "watch-css": "concat node_modules/material-design-lite/material.min.css styles.css -o dist/bundle.css",
     "build-html": "copyfiles index.html dist",
@@ -32,6 +32,7 @@
   },
   "dependencies": {
     "d3": "^3.5.16",
+    "katex": "^0.16.11",
     "material-design-lite": "^1.3.0",
     "seedrandom": "^2.4.3"
   }

diff --git a/src/equation-guides.ts b/src/equation-guides.ts
@@ -0,0 +1,232 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+interface GuideItem {
+  term: string;
+  detail: string;
+}
+
+function fillDescriptionList(
+    container: HTMLElement | null, items: GuideItem[]): void {
+  if (container == null) {
+    return;
+  }
+  container.innerHTML = "";
+  let dl = document.createElement("dl");
+  dl.className = "nn-equation-desc-list";
+  for (let i = 0; i < items.length; i++) {
+    let it = items[i];
+    let dt = document.createElement("dt");
+    dt.textContent = it.term;
+    let dd = document.createElement("dd");
+    dd.textContent = it.detail;
+    dl.appendChild(dt);
+    dl.appendChild(dd);
+  }
+  container.appendChild(dl);
+}
+
+const FORWARD_GUIDE: GuideItem[] = [
+  {
+    term: "Layered formula",
+    detail: "Each line is one neuron: affine map (bias plus weighted sum of " +
+        "incoming values) then the layer activation. Numbers are the live " +
+        "weights and biases from the diagram."
+  },
+  {
+    term: "Input symbols (X1, X2, …)",
+    detail: "Values from the Features column for one point, in top-to-bottom " +
+        "order of input nodes."
+  },
+  {
+    term: "h_j^(l)",
+    detail: "Hidden unit j in hidden layer l (column l after inputs), top " +
+        "to bottom in the diagram."
+  },
+  {
+    term: "ŷ",
+    detail: "Scalar network output after the output activation (regression: " +
+        "linear; classification: tanh in this demo)."
+  }
+];
+
+const OBJECTIVE_GUIDE: GuideItem[] = [
+  {
+    term: "L (calligraphic in the formula)",
+    detail: "Scalar objective minimized during training: average prediction " +
+        "error on the training set, plus an optional weight penalty."
+  },
+  {
+    term: "N",
+    detail: "Number of training examples used in the average."
+  },
+  {
+    term: "ŷ_i",
+    detail: "Network output for training example i (same definition as ŷ in " +
+        "the forward pass)."
+  },
+  {
+    term: "y_i",
+    detail: "Target label for example i (class or numeric target from the " +
+        "dataset)."
+  },
+  {
+    term: "½(ŷ_i − y_i)²",
+    detail: "Half squared error for one example; the mean over i is the data " +
+        "loss shown as training loss (up to the same formula)."
+  },
+  {
+    term: "λ and ∑_w",
+    detail: "When regularization is on: λ is the regularization-rate slider; " +
+        "the sum is over all connection weights. L1 adds λ|w|; L2 adds λ·½w² " +
+        "per weight (matching the implementation)."
+  },
+  {
+    term: "Gradient of L w.r.t. a weight (data + regularization)",
+    detail: "If L = (1/N)Σ_i E_i + λ Σ_w R(w), then for a fixed connection " +
+        "weight w, ∂L/∂w = (1/N)Σ_i ∂E_i/∂w + λ R′(w). The first term is what " +
+        "backprop builds from the chain rule through the network (prediction → " +
+        "layers → that edge). The second term does not pass through ŷ: R depends " +
+        "only on w, so its contribution is just λ R′(w). This demo averages " +
+        "per-example ∂E/∂w over the mini-batch, then applies a separate " +
+        "regularization step—equivalent to adding those two parts of the gradient."
+  }
+];
+
+const BACKPROP_GUIDE: GuideItem[] = [
+  {
+    term: "Chain rule (overall)",
+    detail: "E depends on ŷ, which depends on a long composition of affine maps " +
+        "(z = b + Σ w a) and activations (a = σ(z)). Backprop is the chain rule " +
+        "applied in reverse order: start from ∂E/∂ŷ and propagate factors " +
+        "∂E/∂z, ∂E/∂w, and ∂E/∂a layer by layer until every weight has a " +
+        "∂E/∂w."
+  },
+  {
+    term: "E",
+    detail: "Error for a single training example only: E = ½(ŷ − y)². " +
+        "Backpropagation computes derivatives of this E (not the full-batch " +
+        "average) with respect to weights and activations."
+  },
+  {
+    term: "ŷ and y",
+    detail: "Prediction and label for that one example—the same ŷ as after " +
+        "forward pass, y from the data point."
+  },
+  {
+    term: "∂E/∂ŷ",
+    detail: "Derivative of E with respect to the network output. For this " +
+        "square loss it equals ŷ − y; it is the starting signal at the output " +
+        "neuron before applying the output activation derivative. This is the " +
+        "first factor in the chain: ∂E/∂z_out = (∂E/∂ŷ)(∂ŷ/∂z_out) with " +
+        "∂ŷ/∂z_out = σ′(z_out) when ŷ = σ(z_out)."
+  },
+  {
+    term: "δ^(z) and ∂E/∂z",
+    detail: "Same quantity: error signal for a neuron’s total input z (bias " +
+        "plus weighted sum). In code this is inputDer. It is ∂E/∂z: how much E " +
+        "changes if z nudges, after all later layers are folded in by the chain " +
+        "rule."
+  },
+  {
+    term: "Chain rule through the activation (σ)",
+    detail: "Here a = σ(z). E depends on a, and a depends on z, so by the " +
+        "chain rule ∂E/∂z = (∂E/∂a)(da/dz) = (∂E/∂a)σ′(z). That product is " +
+        "exactly δ^(z). For a linear output, σ′ = 1 so ∂E/∂z = ∂E/∂a."
+  },
+  {
+    term: "z and a",
+    detail: "z is pre-activation input to the neuron; a = σ(z) is the value " +
+        "sent along outgoing edges (what the diagram shows on the node)."
+  },
+  {
+    term: "∂E/∂a (before σ′)",
+    detail: "How E changes with the neuron’s output a. At the output node, " +
+        "∂E/∂a is ∂E/∂ŷ (or merges with it depending on notation). Deeper " +
+        "layers receive ∂E/∂a as the sum of backward messages from the next " +
+        "column (see the sum formula)."
+  },
+  {
+    term: "Chain rule for one weight",
+    detail: "The affine map gives z_to = b + Σ w a_from, so ∂z_to/∂w = a_from " +
+        "when w is one of those edges. Hence ∂E/∂w = (∂E/∂z_to)(∂z_to/∂w) = " +
+        "δ^(z)_to · a_from (errorDer on that link in code)."
+  },
+  {
+    term: "Chain rule into an earlier activation (the sum)",
+    detail: "Each downstream z depends on a_from through z = … + w a_from + …, " +
+        "so ∂z/∂a_from = w for that edge. If several units feed from the same " +
+        "a_from, E changes through each path: ∂E/∂a_from = Σ_out " +
+        "(∂E/∂z_out)(∂z_out/∂a_from) = Σ_out w_out δ^(z)_out. That is the " +
+        "displayed sum; it becomes ∂E/∂a for the previous layer’s nodes."
+  }
+];
+
+const UPDATE_GUIDE: GuideItem[] = [
+  {
+    term: "w and b",
+    detail: "Connection weight and neuron bias updated after a mini-batch of " +
+        "examples (each example ran forward and backward and contributed to " +
+        "running sums)."
+  },
+  {
+    term: "η (eta)",
+    detail: "Learning rate from the UI; scales how far each update moves " +
+        "along the negative gradient."
+  },
+  {
+    term: "n_b and the overline",
+    detail: "Mini-batch size (batch size control). The bar means average: " +
+        "sum of per-example ∂E/∂w or ∂E/∂b over the batch, divided by n_b."
+  },
+  {
+    term: "Second w line (regularization)",
+    detail: "If L1 or L2 is enabled: after the gradient step, each weight is " +
+        "moved again to shrink the penalty R(w), scaled by η and λ."
+  },
+  {
+    term: "R(w) and R′(w)",
+    detail: "Regularization term and its derivative: L2 uses ½w² and R′=w; L1 " +
+        "uses |w| with a subgradient in {−1, 0, 1} (0 at w = 0), as in the " +
+        "implementation."
+  },
+  {
+    term: "Chain rule: data loss vs. regularization in the update",
+    detail: "The averaged term uses ∂E/∂w from backprop—every factor is a chain " +
+        "rule through ŷ and the layers. The term λ R′(w) is not chained through " +
+        "the network: ∂(λ R(w))/∂w = λ R′(w) because R depends only on w. The " +
+        "code applies −(η/n_b)·(sum of ∂E/∂w) and then −ηλ R′(w), which matches " +
+        "descending the sum of data-gradient and regularization-gradient on w."
+  }
+];
+
+/** Populate glossary lists under each equation block. */
+export function fillEquationPanelGuides(
+    forwardDesc: HTMLElement | null,
+    objectiveDesc: HTMLElement | null,
+    backpropDesc: HTMLElement | null,
+    updateDesc: HTMLElement | null): void {
+  fillDescriptionList(forwardDesc, FORWARD_GUIDE);
+  fillDescriptionList(objectiveDesc, OBJECTIVE_GUIDE);
+  fillDescriptionList(backpropDesc, BACKPROP_GUIDE);
+  fillDescriptionList(updateDesc, UPDATE_GUIDE);
+}
+
+export const EQUATION_DESC_ELEMENT_IDS: string[] = [
+  "nn-equation-forward-desc",
+  "nn-equation-objective-desc",
+  "nn-equation-backprop-desc",
+  "nn-equation-update-desc"
+];