From 23679634c208d7f9a36bee0163cfd94c63ef5bd8 Mon Sep 17 00:00:00 2001
From: Ahmed Mejbri <ahmed.mejb@gmail.com>
Date: Sun, 9 Feb 2020 20:15:43 +0300
Subject: [PATCH 1/3] Update Sentiment Analysis with RNN.ipynb

---
 Sentiment Analysis with RNN.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Sentiment Analysis with RNN.ipynb b/Sentiment Analysis with RNN.ipynb
index 48b8b45..32f1eb8 100644
--- a/Sentiment Analysis with RNN.ipynb	
+++ b/Sentiment Analysis with RNN.ipynb	
@@ -140,7 +140,7 @@
    ],
    "source": [
     "print('Minimum review length: {}'.format(\n",
-    "len(min((X_test + X_test), key=len))))"
+    "len(min((X_train + X_test), key=len))))"
    ]
   },
   {

From baed947690e8138a90c9ef83fb1c444c65e9108c Mon Sep 17 00:00:00 2001
From: Ahmed Mejbri <ahmed.mejb@gmail.com>
Date: Mon, 10 Feb 2020 01:32:38 +0300
Subject: [PATCH 2/3] Created using Colaboratory

---
 sentiment_analysis_1st.ipynb | 488 +++++++++++++++++++++++++++++++++++
 1 file changed, 488 insertions(+)
 create mode 100644 sentiment_analysis_1st.ipynb

diff --git a/sentiment_analysis_1st.ipynb b/sentiment_analysis_1st.ipynb
new file mode 100644
index 0000000..60408fc
--- /dev/null
+++ b/sentiment_analysis_1st.ipynb
@@ -0,0 +1,488 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "sentiment-analysis-1st.ipynb",
+      "provenance": [],
+      "collapsed_sections": [],
+      "authorship_tag": "ABX9TyP2K1/X8LCE5kyxnFPznaia",
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/lastmaj/NLP-with-Python/blob/master/sentiment_analysis_1st.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2jYwItyXgLRJ",
+        "colab_type": "text"
+      },
+      "source": [
+        "We would use RNN (LSTM : long short time memory) to perform sentiment analysis.\n",
+        "Keras has IMDb movie reviews that we can use.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "iPzuJGaJgGLa",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "from keras.datasets import imdb"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "a5SKWBi-hyzg",
+        "colab_type": "code",
+        "outputId": "7cb08e48-9b32-44a9-b7d0-161439f735f5",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 34
+        }
+      },
+      "source": [
+        "vocabulary_size = 5000\n",
+        "\n",
+        "(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words = vocabulary_size)\n",
+        "print(\"Loaded dataset with {} training samples and {} testing samples\".format(len(x_train), len(x_test)))"
+      ],
+      "execution_count": 105,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Loaded dataset with 25000 training samples and 25000 testing samples\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "1uv0NSctlB06",
+        "colab_type": "code",
+        "outputId": "42ba41b9-fe9f-4e86-d220-25a1743b76f3",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 105
+        }
+      },
+      "source": [
+        "print('---review---')\n",
+        "print(x_train[6])\n",
+        "print('---label---')\n",
+        "print(y_train[6])"
+      ],
+      "execution_count": 106,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "---review---\n",
+            "[1, 2, 365, 1234, 5, 1156, 354, 11, 14, 2, 2, 7, 1016, 2, 2, 356, 44, 4, 1349, 500, 746, 5, 200, 4, 4132, 11, 2, 2, 1117, 1831, 2, 5, 4831, 26, 6, 2, 4183, 17, 369, 37, 215, 1345, 143, 2, 5, 1838, 8, 1974, 15, 36, 119, 257, 85, 52, 486, 9, 6, 2, 2, 63, 271, 6, 196, 96, 949, 4121, 4, 2, 7, 4, 2212, 2436, 819, 63, 47, 77, 2, 180, 6, 227, 11, 94, 2494, 2, 13, 423, 4, 168, 7, 4, 22, 5, 89, 665, 71, 270, 56, 5, 13, 197, 12, 161, 2, 99, 76, 23, 2, 7, 419, 665, 40, 91, 85, 108, 7, 4, 2084, 5, 4773, 81, 55, 52, 1901]\n",
+            "---label---\n",
+            "1\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Xd14L9WVlQW0",
+        "colab_type": "code",
+        "outputId": "c0447049-799e-4154-c988-3e1739b24eca",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 54
+        }
+      },
+      "source": [
+        "word2id = imdb.get_word_index()\n",
+        "id2word = {}\n",
+        "for word, i in word2id.items():\n",
+        "  id2word[i] = word\n",
+        "\n",
+        "#testing id2word\n",
+        "print ([id2word.get(i, \" \") for i in x_train[6]])"
+      ],
+      "execution_count": 107,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "['the', 'and', 'full', 'involving', 'to', 'impressive', 'boring', 'this', 'as', 'and', 'and', 'br', 'villain', 'and', 'and', 'need', 'has', 'of', 'costumes', 'b', 'message', 'to', 'may', 'of', 'props', 'this', 'and', 'and', 'concept', 'issue', 'and', 'to', \"god's\", 'he', 'is', 'and', 'unfolds', 'movie', 'women', 'like', \"isn't\", 'surely', \"i'm\", 'and', 'to', 'toward', 'in', \"here's\", 'for', 'from', 'did', 'having', 'because', 'very', 'quality', 'it', 'is', 'and', 'and', 'really', 'book', 'is', 'both', 'too', 'worked', 'carl', 'of', 'and', 'br', 'of', 'reviewer', 'closer', 'figure', 'really', 'there', 'will', 'and', 'things', 'is', 'far', 'this', 'make', 'mistakes', 'and', 'was', \"couldn't\", 'of', 'few', 'br', 'of', 'you', 'to', \"don't\", 'female', 'than', 'place', 'she', 'to', 'was', 'between', 'that', 'nothing', 'and', 'movies', 'get', 'are', 'and', 'br', 'yes', 'female', 'just', 'its', 'because', 'many', 'br', 'of', 'overly', 'to', 'descent', 'people', 'time', 'very', 'bland']\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "N1gCyYOU6tzb",
+        "colab_type": "text"
+      },
+      "source": [
+        "Review words : \n",
+        "\n",
+        "['the', 'and', 'full', 'involving', 'to', 'impressive', 'boring', 'this', 'as', 'and', 'and', 'br', 'villain', 'and', 'and', 'need', 'has', 'of', 'costumes', 'b', 'message', 'to', 'may', 'of', 'props', 'this', 'and', 'and', 'concept', 'issue', 'and', 'to', \"god's\", 'he', 'is', 'and', 'unfolds', 'movie', 'women', 'like', \"isn't\", 'surely', \"i'm\", 'and', 'to', 'toward', 'in', \"here's\", 'for', 'from', 'did', 'having', 'because', 'very', 'quality', 'it', 'is', 'and', 'and', 'really', 'book', 'is', 'both', 'too', 'worked', 'carl', 'of', 'and', 'br', 'of', 'reviewer', 'closer', 'figure', 'really', 'there', 'will', 'and', 'things', 'is', 'far', 'this', 'make', 'mistakes', 'and', 'was', \"couldn't\", 'of', 'few', 'br', 'of', 'you', 'to', \"don't\", 'female', 'than', 'place', 'she', 'to', 'was', 'between', 'that', 'nothing', 'and', 'movies', 'get', 'are', 'and', 'br', 'yes', 'female', 'just', 'its', 'because', 'many', 'br', 'of', 'overly', 'to', 'descent', 'people', 'time', 'very', 'bland']"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NaSu8KsR7IAR",
+        "colab_type": "text"
+      },
+      "source": [
+        "# Padding reviews\n",
+        "In order to be fed to the RNN, all reviews must have the same length, which is now set for 500 words. Longer reviews are truncated and shorter ones are padded"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "UubP-aPo7bTM",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "from keras.preprocessing import sequence \n",
+        "max_words = 500\n",
+        "x_train = sequence.pad_sequences(x_train, maxlen=max_words)\n",
+        "x_test = sequence.pad_sequences(x_test, maxlen=max_words)\n",
+        "\n"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ZnXqsB8iOzAx",
+        "colab_type": "text"
+      },
+      "source": [
+        "# Building the Model\n",
+        "\n",
+        "Input : sequence of words represented by their IDs\n",
+        "Output : binary sentiment\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "5P10k7S17who",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 272
+        },
+        "outputId": "b79bfd58-fec3-4dce-82ed-4703fd74a76a"
+      },
+      "source": [
+        "from keras import Sequential\n",
+        "from keras.layers import Embedding, LSTM, Dense, Dropout\n",
+        "\n",
+        "embedding_size=32\n",
+        "model=Sequential()\n",
+        "model.add(Embedding(vocabulary_size, embedding_size, input_length=max_words))\n",
+        "model.add(LSTM(100))\n",
+        "model.add(Dense(1, activation='sigmoid'))\n",
+        "\n",
+        "print(model.summary())\n"
+      ],
+      "execution_count": 109,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Model: \"sequential_3\"\n",
+            "_________________________________________________________________\n",
+            "Layer (type)                 Output Shape              Param #   \n",
+            "=================================================================\n",
+            "embedding_3 (Embedding)      (None, 500, 32)           160000    \n",
+            "_________________________________________________________________\n",
+            "lstm_3 (LSTM)                (None, 100)               53200     \n",
+            "_________________________________________________________________\n",
+            "dense_3 (Dense)              (None, 1)                 101       \n",
+            "=================================================================\n",
+            "Total params: 213,301\n",
+            "Trainable params: 213,301\n",
+            "Non-trainable params: 0\n",
+            "_________________________________________________________________\n",
+            "None\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wBR-qK4rR07n",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Training\n",
+        "\n",
+        "Here we specify the loss function as well as the optimizer, and evaluation metrics (here, it's just \"accuracy\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "SkO16QZIQP6W",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "model.compile(loss='binary_crossentropy', \n",
+        "             optimizer='adam', \n",
+        "             metrics=['accuracy'])"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2TnjWYbVShY9",
+        "colab_type": "text"
+      },
+      "source": [
+        "Actual training"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Oi0oVVB9SKSm",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 153
+        },
+        "outputId": "4e12ace0-bced-4df1-b8e0-e410a1d043da"
+      },
+      "source": [
+        "batch_size = 64\n",
+        "num_epochs = 3\n",
+        "\n",
+        "x_valid, y_valid = x_train[:batch_size], y_train[:batch_size]\n",
+        "x_train2, y_train2 = x_train[batch_size:], y_train[batch_size:]\n",
+        "\n",
+        "model.fit(x_train2, y_train2, validation_data=(x_valid, y_valid), batch_size=batch_size, epochs=num_epochs)"
+      ],
+      "execution_count": 111,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Train on 24936 samples, validate on 64 samples\n",
+            "Epoch 1/3\n",
+            "24936/24936 [==============================] - 304s 12ms/step - loss: 0.4639 - acc: 0.7776 - val_loss: 0.2284 - val_acc: 0.9062\n",
+            "Epoch 2/3\n",
+            "24936/24936 [==============================] - 307s 12ms/step - loss: 0.3038 - acc: 0.8787 - val_loss: 0.2659 - val_acc: 0.9062\n",
+            "Epoch 3/3\n",
+            "24936/24936 [==============================] - 311s 12ms/step - loss: 0.2418 - acc: 0.9065 - val_loss: 0.2280 - val_acc: 0.9219\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "<keras.callbacks.History at 0x7f4970e24b38>"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 111
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "pyA6zlxaSuly",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 34
+        },
+        "outputId": "7412c433-21f1-4763-df8e-cc30e3b7654e"
+      },
+      "source": [
+        "scores = model.evaluate(x_test, y_test, verbose=0)\n",
+        "print('Test accuracy:', scores[1])"
+      ],
+      "execution_count": 112,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Test accuracy: 0.87364\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "HR8D8-BmXAbc",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 799
+        },
+        "outputId": "725c6220-1b6e-4c44-c4cf-17c46b2e294e"
+      },
+      "source": [
+        "x_train[6]"
+      ],
+      "execution_count": 113,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
+              "          0,    0,    0,    1,    2,  365, 1234,    5, 1156,  354,   11,\n",
+              "         14,    2,    2,    7, 1016,    2,    2,  356,   44,    4, 1349,\n",
+              "        500,  746,    5,  200,    4, 4132,   11,    2,    2, 1117, 1831,\n",
+              "          2,    5, 4831,   26,    6,    2, 4183,   17,  369,   37,  215,\n",
+              "       1345,  143,    2,    5, 1838,    8, 1974,   15,   36,  119,  257,\n",
+              "         85,   52,  486,    9,    6,    2,    2,   63,  271,    6,  196,\n",
+              "         96,  949, 4121,    4,    2,    7,    4, 2212, 2436,  819,   63,\n",
+              "         47,   77,    2,  180,    6,  227,   11,   94, 2494,    2,   13,\n",
+              "        423,    4,  168,    7,    4,   22,    5,   89,  665,   71,  270,\n",
+              "         56,    5,   13,  197,   12,  161,    2,   99,   76,   23,    2,\n",
+              "          7,  419,  665,   40,   91,   85,  108,    7,    4, 2084,    5,\n",
+              "       4773,   81,   55,   52, 1901], dtype=int32)"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 113
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "2BxZcRMnXdsv",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 340
+        },
+        "outputId": "3efd95b1-52fe-425d-b3d2-00b1e9303141"
+      },
+      "source": [
+        "import numpy as np\n",
+        "\n",
+        "ex_str =  [[\"Hey I don't think you're getting what your users are hoping for br what a shame\"]]\n",
+        "\n",
+        "\n",
+        "#for word in ex_arr:\n",
+        "  #print(word2id.get(word, 0))\n",
+        "\n",
+        "ex = np.zeros([500-len(ex_arr),], dtype=int)\n",
+        "\n",
+        "ex = np.append(ex, [word2id.get(word, 0) for word in ex_arr])\n",
+        "\n",
+        "model.predict(ex)"
+      ],
+      "execution_count": 154,
+      "outputs": [
+        {
+          "output_type": "error",
+          "ename": "ValueError",
+          "evalue": "ignored",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+            "\u001b[0;32m<ipython-input-154-5621156818c5>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     11\u001b[0m \u001b[0mex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mword2id\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mword\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mword\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mex_arr\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 13\u001b[0;31m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, x, batch_size, verbose, steps, callbacks, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m   1378\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1379\u001b[0m         \u001b[0;31m# Case 2: Symbolic tensors or Numpy array-like.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1380\u001b[0;31m         \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_standardize_user_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1381\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstateful\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1382\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0mbatch_size\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mbatch_size\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras/engine/training.py\u001b[0m in \u001b[0;36m_standardize_user_data\u001b[0;34m(self, x, y, sample_weight, class_weight, check_array_lengths, batch_size)\u001b[0m\n\u001b[1;32m    755\u001b[0m             \u001b[0mfeed_input_shapes\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    756\u001b[0m             \u001b[0mcheck_batch_axis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m  \u001b[0;31m# Don't enforce the batch size.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 757\u001b[0;31m             exception_prefix='input')\n\u001b[0m\u001b[1;32m    758\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    759\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0my\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras/engine/training_utils.py\u001b[0m in \u001b[0;36mstandardize_input_data\u001b[0;34m(data, names, shapes, check_batch_axis, exception_prefix)\u001b[0m\n\u001b[1;32m    139\u001b[0m                             \u001b[0;34m': expected '\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mnames\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' to have shape '\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    140\u001b[0m                             \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' but got array with shape '\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 141\u001b[0;31m                             str(data_shape))\n\u001b[0m\u001b[1;32m    142\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    143\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;31mValueError\u001b[0m: Error when checking input: expected embedding_3_input to have shape (500,) but got array with shape (1,)"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "UpGihYlpdoZ6",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": 0,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file

From a820169fbb469f1c42bd4a2d1e6a0e3ed0d40b17 Mon Sep 17 00:00:00 2001
From: Ahmed Mejbri <ahmed.mejb@gmail.com>
Date: Mon, 10 Feb 2020 01:39:34 +0300
Subject: [PATCH 3/3] Delete sentiment_analysis_1st.ipynb

---
 sentiment_analysis_1st.ipynb | 488 -----------------------------------
 1 file changed, 488 deletions(-)
 delete mode 100644 sentiment_analysis_1st.ipynb

diff --git a/sentiment_analysis_1st.ipynb b/sentiment_analysis_1st.ipynb
deleted file mode 100644
index 60408fc..0000000
--- a/sentiment_analysis_1st.ipynb
+++ /dev/null
@@ -1,488 +0,0 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "name": "sentiment-analysis-1st.ipynb",
-      "provenance": [],
-      "collapsed_sections": [],
-      "authorship_tag": "ABX9TyP2K1/X8LCE5kyxnFPznaia",
-      "include_colab_link": true
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    }
-  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "view-in-github",
-        "colab_type": "text"
-      },
-      "source": [
-        "<a href=\"https://colab.research.google.com/github/lastmaj/NLP-with-Python/blob/master/sentiment_analysis_1st.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "2jYwItyXgLRJ",
-        "colab_type": "text"
-      },
-      "source": [
-        "We would use RNN (LSTM : long short time memory) to perform sentiment analysis.\n",
-        "Keras has IMDb movie reviews that we can use.\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "iPzuJGaJgGLa",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "from keras.datasets import imdb"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "a5SKWBi-hyzg",
-        "colab_type": "code",
-        "outputId": "7cb08e48-9b32-44a9-b7d0-161439f735f5",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 34
-        }
-      },
-      "source": [
-        "vocabulary_size = 5000\n",
-        "\n",
-        "(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words = vocabulary_size)\n",
-        "print(\"Loaded dataset with {} training samples and {} testing samples\".format(len(x_train), len(x_test)))"
-      ],
-      "execution_count": 105,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Loaded dataset with 25000 training samples and 25000 testing samples\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "1uv0NSctlB06",
-        "colab_type": "code",
-        "outputId": "42ba41b9-fe9f-4e86-d220-25a1743b76f3",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 105
-        }
-      },
-      "source": [
-        "print('---review---')\n",
-        "print(x_train[6])\n",
-        "print('---label---')\n",
-        "print(y_train[6])"
-      ],
-      "execution_count": 106,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "---review---\n",
-            "[1, 2, 365, 1234, 5, 1156, 354, 11, 14, 2, 2, 7, 1016, 2, 2, 356, 44, 4, 1349, 500, 746, 5, 200, 4, 4132, 11, 2, 2, 1117, 1831, 2, 5, 4831, 26, 6, 2, 4183, 17, 369, 37, 215, 1345, 143, 2, 5, 1838, 8, 1974, 15, 36, 119, 257, 85, 52, 486, 9, 6, 2, 2, 63, 271, 6, 196, 96, 949, 4121, 4, 2, 7, 4, 2212, 2436, 819, 63, 47, 77, 2, 180, 6, 227, 11, 94, 2494, 2, 13, 423, 4, 168, 7, 4, 22, 5, 89, 665, 71, 270, 56, 5, 13, 197, 12, 161, 2, 99, 76, 23, 2, 7, 419, 665, 40, 91, 85, 108, 7, 4, 2084, 5, 4773, 81, 55, 52, 1901]\n",
-            "---label---\n",
-            "1\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "Xd14L9WVlQW0",
-        "colab_type": "code",
-        "outputId": "c0447049-799e-4154-c988-3e1739b24eca",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 54
-        }
-      },
-      "source": [
-        "word2id = imdb.get_word_index()\n",
-        "id2word = {}\n",
-        "for word, i in word2id.items():\n",
-        "  id2word[i] = word\n",
-        "\n",
-        "#testing id2word\n",
-        "print ([id2word.get(i, \" \") for i in x_train[6]])"
-      ],
-      "execution_count": 107,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "['the', 'and', 'full', 'involving', 'to', 'impressive', 'boring', 'this', 'as', 'and', 'and', 'br', 'villain', 'and', 'and', 'need', 'has', 'of', 'costumes', 'b', 'message', 'to', 'may', 'of', 'props', 'this', 'and', 'and', 'concept', 'issue', 'and', 'to', \"god's\", 'he', 'is', 'and', 'unfolds', 'movie', 'women', 'like', \"isn't\", 'surely', \"i'm\", 'and', 'to', 'toward', 'in', \"here's\", 'for', 'from', 'did', 'having', 'because', 'very', 'quality', 'it', 'is', 'and', 'and', 'really', 'book', 'is', 'both', 'too', 'worked', 'carl', 'of', 'and', 'br', 'of', 'reviewer', 'closer', 'figure', 'really', 'there', 'will', 'and', 'things', 'is', 'far', 'this', 'make', 'mistakes', 'and', 'was', \"couldn't\", 'of', 'few', 'br', 'of', 'you', 'to', \"don't\", 'female', 'than', 'place', 'she', 'to', 'was', 'between', 'that', 'nothing', 'and', 'movies', 'get', 'are', 'and', 'br', 'yes', 'female', 'just', 'its', 'because', 'many', 'br', 'of', 'overly', 'to', 'descent', 'people', 'time', 'very', 'bland']\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "N1gCyYOU6tzb",
-        "colab_type": "text"
-      },
-      "source": [
-        "Review words : \n",
-        "\n",
-        "['the', 'and', 'full', 'involving', 'to', 'impressive', 'boring', 'this', 'as', 'and', 'and', 'br', 'villain', 'and', 'and', 'need', 'has', 'of', 'costumes', 'b', 'message', 'to', 'may', 'of', 'props', 'this', 'and', 'and', 'concept', 'issue', 'and', 'to', \"god's\", 'he', 'is', 'and', 'unfolds', 'movie', 'women', 'like', \"isn't\", 'surely', \"i'm\", 'and', 'to', 'toward', 'in', \"here's\", 'for', 'from', 'did', 'having', 'because', 'very', 'quality', 'it', 'is', 'and', 'and', 'really', 'book', 'is', 'both', 'too', 'worked', 'carl', 'of', 'and', 'br', 'of', 'reviewer', 'closer', 'figure', 'really', 'there', 'will', 'and', 'things', 'is', 'far', 'this', 'make', 'mistakes', 'and', 'was', \"couldn't\", 'of', 'few', 'br', 'of', 'you', 'to', \"don't\", 'female', 'than', 'place', 'she', 'to', 'was', 'between', 'that', 'nothing', 'and', 'movies', 'get', 'are', 'and', 'br', 'yes', 'female', 'just', 'its', 'because', 'many', 'br', 'of', 'overly', 'to', 'descent', 'people', 'time', 'very', 'bland']"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "NaSu8KsR7IAR",
-        "colab_type": "text"
-      },
-      "source": [
-        "# Padding reviews\n",
-        "In order to be fed to the RNN, all reviews must have the same length, which is now set for 500 words. Longer reviews are truncated and shorter ones are padded"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "UubP-aPo7bTM",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "from keras.preprocessing import sequence \n",
-        "max_words = 500\n",
-        "x_train = sequence.pad_sequences(x_train, maxlen=max_words)\n",
-        "x_test = sequence.pad_sequences(x_test, maxlen=max_words)\n",
-        "\n"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ZnXqsB8iOzAx",
-        "colab_type": "text"
-      },
-      "source": [
-        "# Building the Model\n",
-        "\n",
-        "Input : sequence of words represented by their IDs\n",
-        "Output : binary sentiment\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "5P10k7S17who",
-        "colab_type": "code",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 272
-        },
-        "outputId": "b79bfd58-fec3-4dce-82ed-4703fd74a76a"
-      },
-      "source": [
-        "from keras import Sequential\n",
-        "from keras.layers import Embedding, LSTM, Dense, Dropout\n",
-        "\n",
-        "embedding_size=32\n",
-        "model=Sequential()\n",
-        "model.add(Embedding(vocabulary_size, embedding_size, input_length=max_words))\n",
-        "model.add(LSTM(100))\n",
-        "model.add(Dense(1, activation='sigmoid'))\n",
-        "\n",
-        "print(model.summary())\n"
-      ],
-      "execution_count": 109,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Model: \"sequential_3\"\n",
-            "_________________________________________________________________\n",
-            "Layer (type)                 Output Shape              Param #   \n",
-            "=================================================================\n",
-            "embedding_3 (Embedding)      (None, 500, 32)           160000    \n",
-            "_________________________________________________________________\n",
-            "lstm_3 (LSTM)                (None, 100)               53200     \n",
-            "_________________________________________________________________\n",
-            "dense_3 (Dense)              (None, 1)                 101       \n",
-            "=================================================================\n",
-            "Total params: 213,301\n",
-            "Trainable params: 213,301\n",
-            "Non-trainable params: 0\n",
-            "_________________________________________________________________\n",
-            "None\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "wBR-qK4rR07n",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Training\n",
-        "\n",
-        "Here we specify the loss function as well as the optimizer, and evaluation metrics (here, it's just \"accuracy\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "SkO16QZIQP6W",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "model.compile(loss='binary_crossentropy', \n",
-        "             optimizer='adam', \n",
-        "             metrics=['accuracy'])"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "2TnjWYbVShY9",
-        "colab_type": "text"
-      },
-      "source": [
-        "Actual training"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "Oi0oVVB9SKSm",
-        "colab_type": "code",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 153
-        },
-        "outputId": "4e12ace0-bced-4df1-b8e0-e410a1d043da"
-      },
-      "source": [
-        "batch_size = 64\n",
-        "num_epochs = 3\n",
-        "\n",
-        "x_valid, y_valid = x_train[:batch_size], y_train[:batch_size]\n",
-        "x_train2, y_train2 = x_train[batch_size:], y_train[batch_size:]\n",
-        "\n",
-        "model.fit(x_train2, y_train2, validation_data=(x_valid, y_valid), batch_size=batch_size, epochs=num_epochs)"
-      ],
-      "execution_count": 111,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Train on 24936 samples, validate on 64 samples\n",
-            "Epoch 1/3\n",
-            "24936/24936 [==============================] - 304s 12ms/step - loss: 0.4639 - acc: 0.7776 - val_loss: 0.2284 - val_acc: 0.9062\n",
-            "Epoch 2/3\n",
-            "24936/24936 [==============================] - 307s 12ms/step - loss: 0.3038 - acc: 0.8787 - val_loss: 0.2659 - val_acc: 0.9062\n",
-            "Epoch 3/3\n",
-            "24936/24936 [==============================] - 311s 12ms/step - loss: 0.2418 - acc: 0.9065 - val_loss: 0.2280 - val_acc: 0.9219\n"
-          ],
-          "name": "stdout"
-        },
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "<keras.callbacks.History at 0x7f4970e24b38>"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          },
-          "execution_count": 111
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "pyA6zlxaSuly",
-        "colab_type": "code",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 34
-        },
-        "outputId": "7412c433-21f1-4763-df8e-cc30e3b7654e"
-      },
-      "source": [
-        "scores = model.evaluate(x_test, y_test, verbose=0)\n",
-        "print('Test accuracy:', scores[1])"
-      ],
-      "execution_count": 112,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Test accuracy: 0.87364\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "HR8D8-BmXAbc",
-        "colab_type": "code",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 799
-        },
-        "outputId": "725c6220-1b6e-4c44-c4cf-17c46b2e294e"
-      },
-      "source": [
-        "x_train[6]"
-      ],
-      "execution_count": 113,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-              "          0,    0,    0,    1,    2,  365, 1234,    5, 1156,  354,   11,\n",
-              "         14,    2,    2,    7, 1016,    2,    2,  356,   44,    4, 1349,\n",
-              "        500,  746,    5,  200,    4, 4132,   11,    2,    2, 1117, 1831,\n",
-              "          2,    5, 4831,   26,    6,    2, 4183,   17,  369,   37,  215,\n",
-              "       1345,  143,    2,    5, 1838,    8, 1974,   15,   36,  119,  257,\n",
-              "         85,   52,  486,    9,    6,    2,    2,   63,  271,    6,  196,\n",
-              "         96,  949, 4121,    4,    2,    7,    4, 2212, 2436,  819,   63,\n",
-              "         47,   77,    2,  180,    6,  227,   11,   94, 2494,    2,   13,\n",
-              "        423,    4,  168,    7,    4,   22,    5,   89,  665,   71,  270,\n",
-              "         56,    5,   13,  197,   12,  161,    2,   99,   76,   23,    2,\n",
-              "          7,  419,  665,   40,   91,   85,  108,    7,    4, 2084,    5,\n",
-              "       4773,   81,   55,   52, 1901], dtype=int32)"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          },
-          "execution_count": 113
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "2BxZcRMnXdsv",
-        "colab_type": "code",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 340
-        },
-        "outputId": "3efd95b1-52fe-425d-b3d2-00b1e9303141"
-      },
-      "source": [
-        "import numpy as np\n",
-        "\n",
-        "ex_str =  [[\"Hey I don't think you're getting what your users are hoping for br what a shame\"]]\n",
-        "\n",
-        "\n",
-        "#for word in ex_arr:\n",
-        "  #print(word2id.get(word, 0))\n",
-        "\n",
-        "ex = np.zeros([500-len(ex_arr),], dtype=int)\n",
-        "\n",
-        "ex = np.append(ex, [word2id.get(word, 0) for word in ex_arr])\n",
-        "\n",
-        "model.predict(ex)"
-      ],
-      "execution_count": 154,
-      "outputs": [
-        {
-          "output_type": "error",
-          "ename": "ValueError",
-          "evalue": "ignored",
-          "traceback": [
-            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-            "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-154-5621156818c5>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     11\u001b[0m \u001b[0mex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mword2id\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mword\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mword\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mex_arr\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 13\u001b[0;31m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, x, batch_size, verbose, steps, callbacks, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m   1378\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1379\u001b[0m         \u001b[0;31m# Case 2: Symbolic tensors or Numpy array-like.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1380\u001b[0;31m         \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_standardize_user_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1381\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstateful\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1382\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0mbatch_size\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mbatch_size\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras/engine/training.py\u001b[0m in \u001b[0;36m_standardize_user_data\u001b[0;34m(self, x, y, sample_weight, class_weight, check_array_lengths, batch_size)\u001b[0m\n\u001b[1;32m    755\u001b[0m             \u001b[0mfeed_input_shapes\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    756\u001b[0m             \u001b[0mcheck_batch_axis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m  \u001b[0;31m# Don't enforce the batch size.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 757\u001b[0;31m             exception_prefix='input')\n\u001b[0m\u001b[1;32m    758\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    759\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0my\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras/engine/training_utils.py\u001b[0m in \u001b[0;36mstandardize_input_data\u001b[0;34m(data, names, shapes, check_batch_axis, exception_prefix)\u001b[0m\n\u001b[1;32m    139\u001b[0m                             \u001b[0;34m': expected '\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mnames\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' to have shape '\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    140\u001b[0m                             \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' but got array with shape '\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 141\u001b[0;31m                             str(data_shape))\n\u001b[0m\u001b[1;32m    142\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    143\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;31mValueError\u001b[0m: Error when checking input: expected embedding_3_input to have shape (500,) but got array with shape (1,)"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "UpGihYlpdoZ6",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        ""
-      ],
-      "execution_count": 0,
-      "outputs": []
-    }
-  ]
-}
\ No newline at end of file