diff --git a/Chapter01/Multi_Head_Attention_Sub_Layer.ipynb b/Chapter01/Multi_Head_Attention_Sub_Layer.ipynb
index 9dcb907..3a2eb47 100644
--- a/Chapter01/Multi_Head_Attention_Sub_Layer.ipynb
+++ b/Chapter01/Multi_Head_Attention_Sub_Layer.ipynb
@@ -1,273 +1,10 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "name": "Multi-Head Attention Sub-Layer.ipynb",
-      "provenance": [],
-      "collapsed_sections": []
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "accelerator": "GPU",
-    "widgets": {
-      "application/vnd.jupyter.widget-state+json": {
-        "946c90b82f7f46caa25c885668b75eab": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HBoxModel",
-          "state": {
-            "_view_name": "HBoxView",
-            "_dom_classes": [],
-            "_model_name": "HBoxModel",
-            "_view_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_view_count": null,
-            "_view_module_version": "1.5.0",
-            "box_style": "",
-            "layout": "IPY_MODEL_4191af78535e4da8bb797690eff84e00",
-            "_model_module": "@jupyter-widgets/controls",
-            "children": [
-              "IPY_MODEL_9ce3d57b96b64da0b15e3f3626bacb30",
-              "IPY_MODEL_f8da2c91156342a69d9b262f4f993aa4"
-            ]
-          }
-        },
-        "4191af78535e4da8bb797690eff84e00": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "state": {
-            "_view_name": "LayoutView",
-            "grid_template_rows": null,
-            "right": null,
-            "justify_content": null,
-            "_view_module": "@jupyter-widgets/base",
-            "overflow": null,
-            "_model_module_version": "1.2.0",
-            "_view_count": null,
-            "flex_flow": null,
-            "width": null,
-            "min_width": null,
-            "border": null,
-            "align_items": null,
-            "bottom": null,
-            "_model_module": "@jupyter-widgets/base",
-            "top": null,
-            "grid_column": null,
-            "overflow_y": null,
-            "overflow_x": null,
-            "grid_auto_flow": null,
-            "grid_area": null,
-            "grid_template_columns": null,
-            "flex": null,
-            "_model_name": "LayoutModel",
-            "justify_items": null,
-            "grid_row": null,
-            "max_height": null,
-            "align_content": null,
-            "visibility": null,
-            "align_self": null,
-            "height": null,
-            "min_height": null,
-            "padding": null,
-            "grid_auto_rows": null,
-            "grid_gap": null,
-            "max_width": null,
-            "order": null,
-            "_view_module_version": "1.2.0",
-            "grid_template_areas": null,
-            "object_position": null,
-            "object_fit": null,
-            "grid_auto_columns": null,
-            "margin": null,
-            "display": null,
-            "left": null
-          }
-        },
-        "9ce3d57b96b64da0b15e3f3626bacb30": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_view_name": "ProgressView",
-            "style": "IPY_MODEL_97370923218945c5b80ab468751ac8a7",
-            "_dom_classes": [],
-            "description": "Downloading: 100%",
-            "_model_name": "FloatProgressModel",
-            "bar_style": "success",
-            "max": 230,
-            "_view_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "value": 230,
-            "_view_count": null,
-            "_view_module_version": "1.5.0",
-            "orientation": "horizontal",
-            "min": 0,
-            "description_tooltip": null,
-            "_model_module": "@jupyter-widgets/controls",
-            "layout": "IPY_MODEL_0ba4a91f472e4c41ba80ab4025288446"
-          }
-        },
-        "f8da2c91156342a69d9b262f4f993aa4": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "state": {
-            "_view_name": "HTMLView",
-            "style": "IPY_MODEL_15aa4b6f8f784c74804107be249126b9",
-            "_dom_classes": [],
-            "description": "",
-            "_model_name": "HTMLModel",
-            "placeholder": "​",
-            "_view_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "value": " 230/230 [00:01&lt;00:00, 185B/s]",
-            "_view_count": null,
-            "_view_module_version": "1.5.0",
-            "description_tooltip": null,
-            "_model_module": "@jupyter-widgets/controls",
-            "layout": "IPY_MODEL_edea457617ed4792aeeb65292019ceb4"
-          }
-        },
-        "97370923218945c5b80ab468751ac8a7": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_view_name": "StyleView",
-            "_model_name": "ProgressStyleModel",
-            "description_width": "initial",
-            "_view_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.5.0",
-            "_view_count": null,
-            "_view_module_version": "1.2.0",
-            "bar_color": null,
-            "_model_module": "@jupyter-widgets/controls"
-          }
-        },
-        "0ba4a91f472e4c41ba80ab4025288446": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "state": {
-            "_view_name": "LayoutView",
-            "grid_template_rows": null,
-            "right": null,
-            "justify_content": null,
-            "_view_module": "@jupyter-widgets/base",
-            "overflow": null,
-            "_model_module_version": "1.2.0",
-            "_view_count": null,
-            "flex_flow": null,
-            "width": null,
-            "min_width": null,
-            "border": null,
-            "align_items": null,
-            "bottom": null,
-            "_model_module": "@jupyter-widgets/base",
-            "top": null,
-            "grid_column": null,
-            "overflow_y": null,
-            "overflow_x": null,
-            "grid_auto_flow": null,
-            "grid_area": null,
-            "grid_template_columns": null,
-            "flex": null,
-            "_model_name": "LayoutModel",
-            "justify_items": null,
-            "grid_row": null,
-            "max_height": null,
-            "align_content": null,
-            "visibility": null,
-            "align_self": null,
-            "height": null,
-            "min_height": null,
-            "padding": null,
-            "grid_auto_rows": null,
-            "grid_gap": null,
-            "max_width": null,
-            "order": null,
-            "_view_module_version": "1.2.0",
-            "grid_template_areas": null,
-            "object_position": null,
-            "object_fit": null,
-            "grid_auto_columns": null,
-            "margin": null,
-            "display": null,
-            "left": null
-          }
-        },
-        "15aa4b6f8f784c74804107be249126b9": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_view_name": "StyleView",
-            "_model_name": "DescriptionStyleModel",
-            "description_width": "",
-            "_view_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.5.0",
-            "_view_count": null,
-            "_view_module_version": "1.2.0",
-            "_model_module": "@jupyter-widgets/controls"
-          }
-        },
-        "edea457617ed4792aeeb65292019ceb4": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "state": {
-            "_view_name": "LayoutView",
-            "grid_template_rows": null,
-            "right": null,
-            "justify_content": null,
-            "_view_module": "@jupyter-widgets/base",
-            "overflow": null,
-            "_model_module_version": "1.2.0",
-            "_view_count": null,
-            "flex_flow": null,
-            "width": null,
-            "min_width": null,
-            "border": null,
-            "align_items": null,
-            "bottom": null,
-            "_model_module": "@jupyter-widgets/base",
-            "top": null,
-            "grid_column": null,
-            "overflow_y": null,
-            "overflow_x": null,
-            "grid_auto_flow": null,
-            "grid_area": null,
-            "grid_template_columns": null,
-            "flex": null,
-            "_model_name": "LayoutModel",
-            "justify_items": null,
-            "grid_row": null,
-            "max_height": null,
-            "align_content": null,
-            "visibility": null,
-            "align_self": null,
-            "height": null,
-            "min_height": null,
-            "padding": null,
-            "grid_auto_rows": null,
-            "grid_gap": null,
-            "max_width": null,
-            "order": null,
-            "_view_module_version": "1.2.0",
-            "grid_template_areas": null,
-            "object_position": null,
-            "object_fit": null,
-            "grid_auto_columns": null,
-            "margin": null,
-            "display": null,
-            "left": null
-          }
-        }
-      }
-    }
-  },
   "cells": [
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "aXACkAtfNpG0",
-        "colab_type": "text"
+        "colab_type": "text",
+        "id": "aXACkAtfNpG0"
       },
       "source": [
         "# The Attention Mechanism\n",
@@ -280,73 +17,65 @@
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "veRoFjFRNXwJ",
+        "colab": {},
         "colab_type": "code",
-        "colab": {}
+        "id": "veRoFjFRNXwJ"
       },
+      "outputs": [],
       "source": [
         "import numpy as np\n",
         "from scipy.special import softmax"
-      ],
-      "execution_count": 0,
-      "outputs": []
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "JLe9lWCJNogW",
-        "colab_type": "code",
-        "outputId": "733e039b-343e-4161-9919-19b3a1ec130f",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 90
-        }
+        },
+        "colab_type": "code",
+        "id": "JLe9lWCJNogW",
+        "outputId": "733e039b-343e-4161-9919-19b3a1ec130f"
       },
-      "source": [
-        "print(\"Step 1: Input : 3 inputs, d_model=4\")\n",
-        "x =np.array([[1.0, 0.0, 1.0, 0.0],   # Input 1\n",
-        "             [0.0, 2.0, 0.0, 2.0],   # Input 2\n",
-        "             [1.0, 1.0, 1.0, 1.0]])  # Input 3\n",
-        "print(x)"
-      ],
-      "execution_count": 0,
       "outputs": [
         {
+          "name": "stdout",
           "output_type": "stream",
           "text": [
             "Step 1: Input : 3 inputs, d_model=4\n",
             "[[1. 0. 1. 0.]\n",
             " [0. 2. 0. 2.]\n",
             " [1. 1. 1. 1.]]\n"
-          ],
-          "name": "stdout"
+          ]
         }
+      ],
+      "source": [
+        "print(\"Step 1: Input : 3 inputs, d_model=4\")\n",
+        "x = np.array([[1.0, 0.0, 1.0, 0.0],   # Input 1\n",
+        "              [0.0, 2.0, 0.0, 2.0],   # Input 2\n",
+        "              [1.0, 1.0, 1.0, 1.0]])  # Input 3\n",
+        "print(x)"
       ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "JZImwtHPN91V",
-        "colab_type": "code",
-        "outputId": "07706940-e200-4956-b957-fe9681139d0d",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 126
-        }
+        },
+        "colab_type": "code",
+        "id": "JZImwtHPN91V",
+        "outputId": "07706940-e200-4956-b957-fe9681139d0d"
       },
-      "source": [
-        "print(\"Step 2: weights 3 dimensions x d_model=4\")\n",
-        "print(\"w_query\")\n",
-        "w_query =np.array([[1, 0, 1],\n",
-        "                   [1, 0, 0],\n",
-        "                   [0, 0, 1],\n",
-        "                   [0, 1, 1]])\n",
-        "print(w_query)"
-      ],
-      "execution_count": 0,
       "outputs": [
         {
+          "name": "stdout",
           "output_type": "stream",
           "text": [
             "Step 2: weights 3 dimensions x d_model=4\n",
@@ -355,33 +84,34 @@
             " [1 0 0]\n",
             " [0 0 1]\n",
             " [0 1 1]]\n"
-          ],
-          "name": "stdout"
+          ]
         }
+      ],
+      "source": [
+        "print(\"Step 2: weights 3 dimensions x d_model=4\")\n",
+        "print(\"w_query\")\n",
+        "w_query = np.array([[1, 0, 1],\n",
+        "                    [1, 0, 0],\n",
+        "                    [0, 0, 1],\n",
+        "                    [0, 1, 1]])\n",
+        "print(w_query)"
       ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "7kRBS7MUOFgV",
-        "colab_type": "code",
-        "outputId": "8b0bcc03-88b1-4e8d-a483-dacc91ffa9ee",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 108
-        }
+        },
+        "colab_type": "code",
+        "id": "7kRBS7MUOFgV",
+        "outputId": "8b0bcc03-88b1-4e8d-a483-dacc91ffa9ee"
       },
-      "source": [
-        "print(\"w_key\")\n",
-        "w_key =np.array([[0, 0, 1],\n",
-        "                 [1, 1, 0],\n",
-        "                 [0, 1, 0],\n",
-        "                 [1, 1, 0]])\n",
-        "print(w_key)"
-      ],
-      "execution_count": 0,
       "outputs": [
         {
+          "name": "stdout",
           "output_type": "stream",
           "text": [
             "w_key\n",
@@ -389,33 +119,33 @@
             " [1 1 0]\n",
             " [0 1 0]\n",
             " [1 1 0]]\n"
-          ],
-          "name": "stdout"
+          ]
         }
+      ],
+      "source": [
+        "print(\"w_key\")\n",
+        "w_key = np.array([[0, 0, 1],\n",
+        "                  [1, 1, 0],\n",
+        "                  [0, 1, 0],\n",
+        "                  [1, 1, 0]])\n",
+        "print(w_key)"
       ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "Napm2VtkOIEN",
-        "colab_type": "code",
-        "outputId": "7331eb08-64d5-4a36-eeef-0a0a556f130b",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 108
-        }
+        },
+        "colab_type": "code",
+        "id": "Napm2VtkOIEN",
+        "outputId": "7331eb08-64d5-4a36-eeef-0a0a556f130b"
       },
-      "source": [
-        "print(\"w_value\")\n",
-        "w_value = np.array([[0, 2, 0],\n",
-        "                    [0, 3, 0],\n",
-        "                    [1, 0, 3],\n",
-        "                    [1, 1, 0]])\n",
-        "print(w_value)"
-      ],
-      "execution_count": 0,
       "outputs": [
         {
+          "name": "stdout",
           "output_type": "stream",
           "text": [
             "w_value\n",
@@ -423,32 +153,33 @@
             " [0 3 0]\n",
             " [1 0 3]\n",
             " [1 1 0]]\n"
-          ],
-          "name": "stdout"
+          ]
         }
+      ],
+      "source": [
+        "print(\"w_value\")\n",
+        "w_value = np.array([[0, 2, 0],\n",
+        "                    [0, 3, 0],\n",
+        "                    [1, 0, 3],\n",
+        "                    [1, 1, 0]])\n",
+        "print(w_value)"
       ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "JqapIgfDOQ7d",
-        "colab_type": "code",
-        "outputId": "fd610d7a-968a-47e6-d614-40ad03c1d172",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 108
-        }
+        },
+        "colab_type": "code",
+        "id": "JqapIgfDOQ7d",
+        "outputId": "fd610d7a-968a-47e6-d614-40ad03c1d172"
       },
-      "source": [
-        "print(\"Step 3: Matrix multiplication to obtain Q,K,V\")\n",
-        "\n",
-        "print(\"Queries: x * w_query\")\n",
-        "Q=np.matmul(x,w_query)\n",
-        "print(Q)"
-      ],
-      "execution_count": 0,
       "outputs": [
         {
+          "name": "stdout",
           "output_type": "stream",
           "text": [
             "Step 3: Matrix multiplication to obtain Q,K,V\n",
@@ -456,32 +187,32 @@
             "[[1. 0. 2.]\n",
             " [2. 2. 2.]\n",
             " [2. 1. 3.]]\n"
-          ],
-          "name": "stdout"
+          ]
         }
+      ],
+      "source": [
+        "print(\"Step 3: Matrix multiplication to obtain Q,K,V\")\n",
+        "\n",
+        "print(\"Queries: x * w_query\")\n",
+        "Q = np.matmul(x, w_query)\n",
+        "print(Q)"
       ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "NmfMln1Wmv73",
-        "colab_type": "code",
-        "outputId": "065b63ba-7584-4302-97cd-d5e1765470ed",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 108
-        }
+        },
+        "colab_type": "code",
+        "id": "NmfMln1Wmv73",
+        "outputId": "065b63ba-7584-4302-97cd-d5e1765470ed"
       },
-      "source": [
-        "print(\"Step 3: Matrix multiplication to obtain Q,K,V\")\n",
-        "\n",
-        "print(\"Keys: x * w_key\")\n",
-        "K=np.matmul(x,w_key)\n",
-        "print(K)"
-      ],
-      "execution_count": 0,
       "outputs": [
         {
+          "name": "stdout",
           "output_type": "stream",
           "text": [
             "Step 3: Matrix multiplication to obtain Q,K,V\n",
@@ -489,138 +220,127 @@
             "[[0. 1. 1.]\n",
             " [4. 4. 0.]\n",
             " [2. 3. 1.]]\n"
-          ],
-          "name": "stdout"
+          ]
         }
+      ],
+      "source": [
+        "print(\"Step 3: Matrix multiplication to obtain Q,K,V\")\n",
+        "\n",
+        "print(\"Keys: x * w_key\")\n",
+        "K= np.matmul(x, w_key)\n",
+        "print(K)"
       ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "v3Asv-8mOWkN",
-        "colab_type": "code",
-        "outputId": "2ec71310-0486-46f4-d9f5-d12a1a6ad0e6",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 90
-        }
+        },
+        "colab_type": "code",
+        "id": "v3Asv-8mOWkN",
+        "outputId": "2ec71310-0486-46f4-d9f5-d12a1a6ad0e6"
       },
-      "source": [
-        "print(\"Values: x * w_value\")\n",
-        "V=np.matmul(x,w_value)\n",
-        "print(V)"
-      ],
-      "execution_count": 0,
       "outputs": [
         {
+          "name": "stdout",
           "output_type": "stream",
           "text": [
             "Values: x * w_value\n",
             "[[1. 2. 3.]\n",
             " [2. 8. 0.]\n",
             " [2. 6. 3.]]\n"
-          ],
-          "name": "stdout"
+          ]
         }
+      ],
+      "source": [
+        "print(\"Values: x * w_value\")\n",
+        "V = np.matmul(x, w_value)\n",
+        "print(V)"
       ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "gfgRAHUuOp5c",
-        "colab_type": "code",
-        "outputId": "ad02f055-11e0-4b9a-eb15-b66e4846c95e",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 90
-        }
+        },
+        "colab_type": "code",
+        "id": "gfgRAHUuOp5c",
+        "outputId": "ad02f055-11e0-4b9a-eb15-b66e4846c95e"
       },
-      "source": [
-        "print(\"Step 4: Scaled Attention Scores\")\n",
-        "k_d=1   #square root of k_d=3 rounded down to 1 for this example\n",
-        "attention_scores = (Q @ K.transpose())/k_d\n",
-        "print(attention_scores)"
-      ],
-      "execution_count": 0,
       "outputs": [
         {
+          "name": "stdout",
           "output_type": "stream",
           "text": [
             "Step 4: Scaled Attention Scores\n",
             "[[ 2.  4.  4.]\n",
             " [ 4. 16. 12.]\n",
             " [ 4. 12. 10.]]\n"
-          ],
-          "name": "stdout"
+          ]
         }
+      ],
+      "source": [
+        "print(\"Step 4: Scaled Attention Scores\")\n",
+        "k_d = 1 #square root of k_d=3 rounded down to 1 for this example\n",
+        "attention_scores = np.matmul(Q, K.T) / k_d\n",
+        "print(attention_scores)"
       ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "hg2t6KuNOjzM",
-        "colab_type": "code",
-        "outputId": "c0610f91-cd1d-4b0f-b5ce-f6445481186a",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 90
-        }
+        },
+        "colab_type": "code",
+        "id": "hg2t6KuNOjzM",
+        "outputId": "c0610f91-cd1d-4b0f-b5ce-f6445481186a"
       },
-      "source": [
-        "print(\"Step 5: Scaled softmax attention_scores for each vector\")\n",
-        "attention_scores[0]=softmax(attention_scores[0])\n",
-        "attention_scores[1]=softmax(attention_scores[1])\n",
-        "attention_scores[2]=softmax(attention_scores[2])\n",
-        "print(attention_scores[0])\n",
-        "print(attention_scores[1])\n",
-        "print(attention_scores[2])"
-      ],
-      "execution_count": 0,
       "outputs": [
         {
+          "name": "stdout",
           "output_type": "stream",
           "text": [
             "Step 5: Scaled softmax attention_scores for each vector\n",
             "[0.06337894 0.46831053 0.46831053]\n",
             "[6.03366485e-06 9.82007865e-01 1.79861014e-02]\n",
             "[2.95387223e-04 8.80536902e-01 1.19167711e-01]\n"
-          ],
-          "name": "stdout"
+          ]
         }
+      ],
+      "source": [
+        "print(\"Step 5: Scaled softmax attention_scores for each vector\")\n",
+        "attention_scores[0] = softmax(attention_scores[0])\n",
+        "attention_scores[1] = softmax(attention_scores[1])\n",
+        "attention_scores[2] = softmax(attention_scores[2])\n",
+        "print(attention_scores[0])\n",
+        "print(attention_scores[1])\n",
+        "print(attention_scores[2])"
       ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "R4Es7A7NOvjD",
-        "colab_type": "code",
-        "outputId": "b86060fe-1292-47c5-93f6-ddeeca1bfb62",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 199
-        }
+        },
+        "colab_type": "code",
+        "id": "R4Es7A7NOvjD",
+        "outputId": "b86060fe-1292-47c5-93f6-ddeeca1bfb62"
       },
-      "source": [
-        "print(\"Step 6: attention value obtained by score1/k_d * V\")\n",
-        "print(V[0])\n",
-        "print(V[1])\n",
-        "print(V[2])\n",
-        "print(\"Attention 1\")\n",
-        "attention1=attention_scores[0].reshape(-1,1)\n",
-        "attention1=attention_scores[0][0]*V[0]\n",
-        "print(attention1)\n",
-        "\n",
-        "print(\"Attention 2\")\n",
-        "attention2=attention_scores[0][1]*V[1]\n",
-        "print(attention2)\n",
-        "\n",
-        "print(\"Attention 3\")\n",
-        "attention3=attention_scores[0][2]*V[2]\n",
-        "print(attention3)"
-      ],
-      "execution_count": 0,
       "outputs": [
         {
+          "name": "stdout",
           "output_type": "stream",
           "text": [
             "Step 6: attention value obtained by score1/k_d * V\n",
@@ -633,60 +353,71 @@
             "[0.93662106 3.74648425 0.        ]\n",
             "Attention 3\n",
             "[0.93662106 2.80986319 1.40493159]\n"
-          ],
-          "name": "stdout"
+          ]
         }
+      ],
+      "source": [
+        "print(\"Step 6: attention value obtained by score1/k_d * V\")\n",
+        "print(V[0])\n",
+        "print(V[1])\n",
+        "print(V[2])\n",
+        "print(\"Attention 1\")\n",
+        "attention1 = attention_scores[0].reshape(-1,1)\n",
+        "attention1 = attention_scores[0][0] * V[0]\n",
+        "print(attention1)\n",
+        "\n",
+        "print(\"Attention 2\")\n",
+        "attention2 = attention_scores[0][1] * V[1]\n",
+        "print(attention2)\n",
+        "\n",
+        "print(\"Attention 3\")\n",
+        "attention3 = attention_scores[0][2] * V[2]\n",
+        "print(attention3)"
       ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "uBDKhaCvOzXj",
-        "colab_type": "code",
-        "outputId": "138901d8-0aa9-4db9-b8b1-76ad557e6688",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 54
-        }
+        },
+        "colab_type": "code",
+        "id": "uBDKhaCvOzXj",
+        "outputId": "138901d8-0aa9-4db9-b8b1-76ad557e6688"
       },
-      "source": [
-        "print(\"Step 7: summed the results to create the first line of the output matrix\")\n",
-        "attention_input1=attention1+attention2+attention3\n",
-        "print(attention_input1)"
-      ],
-      "execution_count": 0,
       "outputs": [
         {
+          "name": "stdout",
           "output_type": "stream",
           "text": [
             "Step 7: summed the results to create the first line of the output matrix\n",
             "[1.93662106 6.68310531 1.59506841]\n"
-          ],
-          "name": "stdout"
+          ]
         }
+      ],
+      "source": [
+        "print(\"Step 7: summed the results to create the first line of the output matrix\")\n",
+        "attention_input1 = attention1 + attention2 + attention3\n",
+        "print(attention_input1)"
       ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "iEjgRcqHO4ik",
-        "colab_type": "code",
-        "outputId": "675a154b-a305-4c0c-e314-353541abfd3e",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 635
-        }
+        },
+        "colab_type": "code",
+        "id": "iEjgRcqHO4ik",
+        "outputId": "675a154b-a305-4c0c-e314-353541abfd3e"
       },
-      "source": [
-        "print(\"Step 8: Step 1 to 7 for inputs 1 to 3\")\n",
-        "#We assume we have 3 results with learned weights (they were not trained in this example)\n",
-        "#We assume we are implementing the original Transformer paper. We will have 3 results of 64 dimensions each\n",
-        "attention_head1=np.random.random((3, 64))\n",
-        "print(attention_head1)"
-      ],
-      "execution_count": 0,
       "outputs": [
         {
+          "name": "stdout",
           "output_type": "stream",
           "text": [
             "Step 8: Step 1 to 7 for inputs 1 to 3\n",
@@ -723,81 +454,87 @@
             "  0.04674047 0.97762416 0.72747288 0.75616534 0.68105477 0.06914679\n",
             "  0.14054312 0.42816012 0.66792325 0.03168237 0.68685758 0.43487164\n",
             "  0.08064005 0.23444144 0.60360253 0.21423994]]\n"
-          ],
-          "name": "stdout"
+          ]
         }
+      ],
+      "source": [
+        "print(\"Step 8: Step 1 to 7 for inputs 1 to 3\")\n",
+        "#We assume we have 3 results with learned weights (they were not trained in this example)\n",
+        "#We assume we are implementing the original Transformer paper. We will have 3 results of 64 dimensions each\n",
+        "attention_head1 = np.random.random((3, 64))\n",
+        "print(attention_head1)"
       ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "QI50dkZ1O630",
-        "colab_type": "code",
-        "outputId": "7d467842-f837-4e41-e099-534549b6fc05",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 54
-        }
+        },
+        "colab_type": "code",
+        "id": "QI50dkZ1O630",
+        "outputId": "7d467842-f837-4e41-e099-534549b6fc05"
       },
-      "source": [
-        "print(\"Step 9: We assume we have trained the 8 heads of the attention sub-layer\")\n",
-        "z0h1=np.random.random((3, 64))\n",
-        "z1h2=np.random.random((3, 64))\n",
-        "z2h3=np.random.random((3, 64))\n",
-        "z3h4=np.random.random((3, 64))\n",
-        "z4h5=np.random.random((3, 64))\n",
-        "z5h6=np.random.random((3, 64))\n",
-        "z6h7=np.random.random((3, 64))\n",
-        "z7h8=np.random.random((3, 64))\n",
-        "print(\"shape of one head\",z0h1.shape,\"dimension of 8 heads\",64*8)"
-      ],
-      "execution_count": 0,
       "outputs": [
         {
+          "name": "stdout",
           "output_type": "stream",
           "text": [
             "Step 9: We assume we have trained the 8 heads of the attention sub-layer\n",
             "shape of one head (3, 64) dimension of 8 heads 512\n"
-          ],
-          "name": "stdout"
+          ]
         }
+      ],
+      "source": [
+        "print(\"Step 9: We assume we have trained the 8 heads of the attention sub-layer\")\n",
+        "z0h1 = np.random.random((3, 64))\n",
+        "z1h2 = np.random.random((3, 64))\n",
+        "z2h3 = np.random.random((3, 64))\n",
+        "z3h4 = np.random.random((3, 64))\n",
+        "z4h5 = np.random.random((3, 64))\n",
+        "z5h6 = np.random.random((3, 64))\n",
+        "z6h7 = np.random.random((3, 64))\n",
+        "z7h8 = np.random.random((3, 64))\n",
+        "print(\"shape of one head\", z0h1.shape, \"dimension of 8 heads\", 64 * 8)"
       ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "3n87LE92_Puf",
-        "colab_type": "code",
-        "outputId": "55d00415-ebea-43a6-b4c5-ff13e02c3052",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 90
-        }
+        },
+        "colab_type": "code",
+        "id": "3n87LE92_Puf",
+        "outputId": "55d00415-ebea-43a6-b4c5-ff13e02c3052"
       },
-      "source": [
-        "print(\"Step 10: Concatenation of heads 1 to 8 to obtain the original 8x64=512 output dimension of the model\")\n",
-        "output_attention=np.hstack((z0h1,z1h2,z2h3,z3h4,z4h5,z5h6,z6h7,z7h8))\n",
-        "print(output_attention)"
-      ],
-      "execution_count": 0,
       "outputs": [
         {
+          "name": "stdout",
           "output_type": "stream",
           "text": [
             "Step 10: Concantenation of heads 1 to 8 to obtain the original 8x64=512 ouput dimension of the model\n",
             "[[0.46950893 0.88546586 0.47615937 ... 0.08285802 0.16577096 0.61094461]\n",
             " [0.31638247 0.24246402 0.30390966 ... 0.42283366 0.62127905 0.64414042]\n",
             " [0.1922683  0.7017995  0.60116595 ... 0.20012387 0.16264044 0.93645276]]\n"
-          ],
-          "name": "stdout"
+          ]
         }
+      ],
+      "source": [
+        "print(\"Step 10: Concatenation of heads 1 to 8 to obtain the original 8x64=512 output dimension of the model\")\n",
+        "output_attention = np.hstack((z0h1, z1h2, z2h3, z3h4, z4h5, z5h6, z6h7, z7h8))\n",
+        "print(output_attention)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "PJLl4Jf3fPLh",
-        "colab_type": "text"
+        "colab_type": "text",
+        "id": "PJLl4Jf3fPLh"
       },
       "source": [
         "And now with Hugging Face in one line!"
@@ -805,24 +542,22 @@
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "CZIRvcRmfTPb",
+        "colab": {},
         "colab_type": "code",
-        "colab": {}
+        "id": "CZIRvcRmfTPb"
       },
+      "outputs": [],
       "source": [
         "#@title Transformer Installation\n",
         "!pip -qq install transformers"
-      ],
-      "execution_count": 0,
-      "outputs": []
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "cNwLYc-SfXdF",
-        "colab_type": "code",
-        "outputId": "d1314cc6-74d6-45cf-b8d6-0a903e58ac60",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 85,
@@ -836,24 +571,18 @@
             "15aa4b6f8f784c74804107be249126b9",
             "edea457617ed4792aeeb65292019ceb4"
           ]
-        }
+        },
+        "colab_type": "code",
+        "id": "cNwLYc-SfXdF",
+        "outputId": "d1314cc6-74d6-45cf-b8d6-0a903e58ac60"
       },
-      "source": [
-        "#@title Retrieve pipeline of modules and choose English to French translation\n",
-        "from transformers import pipeline\n",
-        "translator = pipeline(\"translation_en_to_fr\")\n",
-        "#One line of code!\n",
-        "print(translator(\"It is easy to translate languages with transformers\", max_length=40))"
-      ],
-      "execution_count": 0,
       "outputs": [
         {
-          "output_type": "display_data",
           "data": {
             "application/vnd.jupyter.widget-view+json": {
               "model_id": "946c90b82f7f46caa25c885668b75eab",
-              "version_minor": 0,
-              "version_major": 2
+              "version_major": 2,
+              "version_minor": 0
             },
             "text/plain": [
               "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=230.0, style=ProgressStyle(description_…"
@@ -861,17 +590,298 @@
           },
           "metadata": {
             "tags": []
-          }
+          },
+          "output_type": "display_data"
         },
         {
+          "name": "stdout",
           "output_type": "stream",
           "text": [
             "\n",
             "[{'translation_text': 'Il est facile de traduire des langues avec des transformateurs.'}]\n"
-          ],
-          "name": "stdout"
+          ]
         }
+      ],
+      "source": [
+        "#@title Retrieve pipeline of modules and choose English to French translation\n",
+        "from transformers import pipeline\n",
+        "translator = pipeline(\"translation_en_to_fr\")\n",
+        "#One line of code!\n",
+        "print(translator(\"It is easy to translate languages with transformers\", max_length=40))"
       ]
     }
-  ]
-}
\ No newline at end of file
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [],
+      "name": "Multi-Head Attention Sub-Layer.ipynb",
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python",
+      "version": "3.10.6 (main, Mar 10 2023, 10:55:28) [GCC 11.3.0]"
+    },
+    "vscode": {
+      "interpreter": {
+        "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
+      }
+    },
+    "widgets": {
+      "application/vnd.jupyter.widget-state+json": {
+        "0ba4a91f472e4c41ba80ab4025288446": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "15aa4b6f8f784c74804107be249126b9": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "4191af78535e4da8bb797690eff84e00": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "946c90b82f7f46caa25c885668b75eab": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_9ce3d57b96b64da0b15e3f3626bacb30",
+              "IPY_MODEL_f8da2c91156342a69d9b262f4f993aa4"
+            ],
+            "layout": "IPY_MODEL_4191af78535e4da8bb797690eff84e00"
+          }
+        },
+        "97370923218945c5b80ab468751ac8a7": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": "initial"
+          }
+        },
+        "9ce3d57b96b64da0b15e3f3626bacb30": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "Downloading: 100%",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_0ba4a91f472e4c41ba80ab4025288446",
+            "max": 230,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_97370923218945c5b80ab468751ac8a7",
+            "value": 230
+          }
+        },
+        "edea457617ed4792aeeb65292019ceb4": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "f8da2c91156342a69d9b262f4f993aa4": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_edea457617ed4792aeeb65292019ceb4",
+            "placeholder": "​",
+            "style": "IPY_MODEL_15aa4b6f8f784c74804107be249126b9",
+            "value": " 230/230 [00:01&lt;00:00, 185B/s]"
+          }
+        }
+      }
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}