From 7f7e88aa6e5f9b71d1caf8d29ae8eec9816a5ade Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Harald=20L=C3=B8nsethagen?= <haraldlons@gmail.com>
Date: Sun, 19 Apr 2020 12:29:12 +0200
Subject: [PATCH 1/5] change to GaussianNB from DecisionTreeClassifier

after copy-paste someone forgot to replace DecisionTreeClassifier with GaussianNB for the Naive Bayes section. Fixed this.
---
 Tutorial.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Tutorial.ipynb b/Tutorial.ipynb
index 862db3d..23a845d 100644
--- a/Tutorial.ipynb
+++ b/Tutorial.ipynb
@@ -302,7 +302,7 @@
    "source": [
     "from sklearn.naive_bayes import GaussianNB\n",
     "\n",
-    "clf_gnb = DecisionTreeClassifier()\n",
+    "clf_gnb = GaussianNB()\n",
     "clf_gnb.fit(train_x_vectors, train_y)\n",
     "\n",
     "clf_gnb.predict(test_x_vectors[0])\n"

From 817d4dfc33cdac9dcc8ac1a44397a3933184c282 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Harald=20L=C3=B8nsethagen?= <haraldlons@gmail.com>
Date: Sun, 19 Apr 2020 12:39:46 +0200
Subject: [PATCH 2/5] add .todense() for GaussianNB classifier

---
 Tutorial.ipynb | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Tutorial.ipynb b/Tutorial.ipynb
index 23a845d..4d7e797 100644
--- a/Tutorial.ipynb
+++ b/Tutorial.ipynb
@@ -303,9 +303,9 @@
     "from sklearn.naive_bayes import GaussianNB\n",
     "\n",
     "clf_gnb = GaussianNB()\n",
-    "clf_gnb.fit(train_x_vectors, train_y)\n",
+    "clf_gnb.fit(train_x_vectors.todense(), train_y)\n",
     "\n",
-    "clf_gnb.predict(test_x_vectors[0])\n"
+    "clf_gnb.predict(test_x_vectors.todense()[0])"
    ]
   },
   {
@@ -376,7 +376,7 @@
     "# Mean Accuracy\n",
     "print(clf_svm.score(test_x_vectors, test_y))\n",
     "print(clf_dec.score(test_x_vectors, test_y))\n",
-    "print(clf_gnb.score(test_x_vectors, test_y))\n",
+    "print(clf_gnb.score(test_x_vectors.todense(), test_y))\n",
     "print(clf_log.score(test_x_vectors, test_y))"
    ]
   },

From d721d83cb2aed14e1d2068c2ecd51038391b4a35 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Harald=20L=C3=B8nsethagen?= <haraldlons@gmail.com>
Date: Sun, 19 Apr 2020 12:43:20 +0200
Subject: [PATCH 3/5] fix typo in filename

---
 Tutorial.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Tutorial.ipynb b/Tutorial.ipynb
index 4d7e797..2616efb 100644
--- a/Tutorial.ipynb
+++ b/Tutorial.ipynb
@@ -80,7 +80,7 @@
    "source": [
     "import json\n",
     "\n",
-    "file_name = './data/sentiment/books_small_10000.json'\n",
+    "file_name = './data/sentiment/Books_small_10000.json'\n",
     "\n",
     "reviews = []\n",
     "with open(file_name) as f:\n",

From 872da77cbc0f927bc64abae6820ad660076db33b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Harald=20L=C3=B8nsethagen?= <haraldlons@gmail.com>
Date: Sun, 19 Apr 2020 12:43:46 +0200
Subject: [PATCH 4/5] fix another typo in filename

---
 Tutorial.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Tutorial.ipynb b/Tutorial.ipynb
index 2616efb..30218a8 100644
--- a/Tutorial.ipynb
+++ b/Tutorial.ipynb
@@ -549,7 +549,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "with open('./models/entiment_classifier.pkl', 'rb') as f:\n",
+    "with open('./models/sentiment_classifier.pkl', 'rb') as f:\n",
     "    loaded_clf = pickle.load(f)"
    ]
   },

From 260c9b99cd85a3ae25310f9535a1085959381877 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Harald=20L=C3=B8nsethagen?= <haraldlons@gmail.com>
Date: Sun, 19 Apr 2020 12:47:52 +0200
Subject: [PATCH 5/5] remove unnecessary spaces

---
 Tutorial.ipynb | 133 ++++++++++++++++++++-----------------------------
 1 file changed, 53 insertions(+), 80 deletions(-)

diff --git a/Tutorial.ipynb b/Tutorial.ipynb
index 30218a8..f9cefed 100644
--- a/Tutorial.ipynb
+++ b/Tutorial.ipynb
@@ -49,9 +49,7 @@
     "        positive = list(filter(lambda x: x.sentiment == Sentiment.POSITIVE, self.reviews))\n",
     "        positive_shrunk = positive[:len(negative)]\n",
     "        self.reviews = negative + positive_shrunk\n",
-    "        random.shuffle(self.reviews)\n",
-    "        \n",
-    "        "
+    "        random.shuffle(self.reviews)"
    ]
   },
   {
@@ -88,8 +86,7 @@
     "        review = json.loads(line)\n",
     "        reviews.append(Review(review['reviewText'], review['overall']))\n",
     "        \n",
-    "reviews[5].text\n",
-    "        "
+    "reviews[5].text    "
    ]
   },
   {
@@ -101,7 +98,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -116,7 +113,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -150,14 +147,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "I read this book over a year ago & enjoyed the various stories, the author takes you on a journey of life as it pretty much is in today's world & society, as you end one story you look forward to starting the next, relaxed reading I highly recommend it for peps who enjoy stories from back in their grand-ma & grand-dad days in the South.  I will peruse more books by this author for future purchase.\n",
+      "I was very disappointed with this book, not up to snuff by Deaver. Too many filler words, too expensive. Not interesting.\n",
       "[[0. 0. 0. ... 0. 0. 0.]]\n"
      ]
     }
@@ -174,10 +171,7 @@
     "test_x_vectors = vectorizer.transform(test_x)\n",
     "\n",
     "print(train_x[0])\n",
-    "print(train_x_vectors[0].toarray())\n",
-    "\n",
-    "\n",
-    "\n"
+    "print(train_x_vectors[0].toarray())"
    ]
   },
   {
@@ -213,16 +207,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "array(['POSITIVE'], dtype='<U8')"
+       "array(['NEGATIVE'], dtype='<U8')"
       ]
      },
-     "execution_count": 50,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -236,12 +230,7 @@
     "\n",
     "test_x[0]\n",
     "\n",
-    "clf_svm.predict(test_x_vectors[0])\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n"
+    "clf_svm.predict(test_x_vectors[0])"
    ]
   },
   {
@@ -253,7 +242,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -262,7 +251,7 @@
        "array(['NEGATIVE'], dtype='<U8')"
       ]
      },
-     "execution_count": 51,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -273,7 +262,7 @@
     "clf_dec = DecisionTreeClassifier()\n",
     "clf_dec.fit(train_x_vectors, train_y)\n",
     "\n",
-    "clf_dec.predict(test_x_vectors[0])\n"
+    "clf_dec.predict(test_x_vectors[0])"
    ]
   },
   {
@@ -285,7 +274,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -294,7 +283,7 @@
        "array(['NEGATIVE'], dtype='<U8')"
       ]
      },
-     "execution_count": 52,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -317,7 +306,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -334,7 +323,7 @@
        "array(['POSITIVE'], dtype='<U8')"
       ]
      },
-     "execution_count": 53,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -345,8 +334,7 @@
     "clf_log = LogisticRegression()\n",
     "clf_log.fit(train_x_vectors, train_y)\n",
     "\n",
-    "clf_log.predict(test_x_vectors[0])\n",
-    "\n"
+    "clf_log.predict(test_x_vectors[0])"
    ]
   },
   {
@@ -358,7 +346,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -366,8 +354,8 @@
      "output_type": "stream",
      "text": [
       "0.8076923076923077\n",
-      "0.65625\n",
-      "0.6706730769230769\n",
+      "0.6225961538461539\n",
+      "0.6610576923076923\n",
       "0.8028846153846154\n"
      ]
     }
@@ -382,7 +370,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 55,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -391,7 +379,7 @@
        "array([0.80582524, 0.80952381])"
       ]
      },
-     "execution_count": 55,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -401,32 +389,30 @@
     "from sklearn.metrics import f1_score\n",
     "\n",
     "f1_score(test_y, clf_svm.predict(test_x_vectors), average=None, labels=[Sentiment.POSITIVE, Sentiment.NEGATIVE])\n",
-    "#f1_score(test_y, clf_log.predict(test_x_vectors), average=None, labels=[Sentiment.POSITIVE, Sentiment.NEUTRAL, Sentiment.NEGATIVE])\n",
-    "\n",
-    "\n"
+    "#f1_score(test_y, clf_log.predict(test_x_vectors), average=None, labels=[Sentiment.POSITIVE, Sentiment.NEUTRAL, Sentiment.NEGATIVE])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 56,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "array(['POSITIVE', 'NEGATIVE', 'NEGATIVE'], dtype='<U8')"
+       "array(['NEGATIVE', 'NEGATIVE', 'NEGATIVE'], dtype='<U8')"
       ]
      },
-     "execution_count": 56,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "test_set = ['very fun', \"bad book do not buy\", 'horrible waste of time']\n",
+    "test_set = ['I would never read this book again', \"bad book do not buy\", 'horrible waste of time']\n",
     "new_test = vectorizer.transform(test_set)\n",
     "\n",
-    "clf_svm.predict(new_test)\n"
+    "clf_svm.predict(new_test)"
    ]
   },
   {
@@ -438,7 +424,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 58,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -453,18 +439,17 @@
      "data": {
       "text/plain": [
        "GridSearchCV(cv=5, error_score='raise-deprecating',\n",
-       "             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n",
-       "                           decision_function_shape='ovr', degree=3,\n",
-       "                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,\n",
-       "                           probability=False, random_state=None, shrinking=True,\n",
-       "                           tol=0.001, verbose=False),\n",
-       "             iid='warn', n_jobs=None,\n",
-       "             param_grid={'C': (1, 4, 8, 16, 32), 'kernel': ('linear', 'rbf')},\n",
-       "             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,\n",
-       "             scoring=None, verbose=0)"
+       "       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n",
+       "  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',\n",
+       "  kernel='rbf', max_iter=-1, probability=False, random_state=None,\n",
+       "  shrinking=True, tol=0.001, verbose=False),\n",
+       "       fit_params=None, iid='warn', n_jobs=None,\n",
+       "       param_grid={'kernel': ('linear', 'rbf'), 'C': (1, 4, 8, 16, 32)},\n",
+       "       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',\n",
+       "       scoring=None, verbose=0)"
       ]
      },
-     "execution_count": 58,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -476,33 +461,19 @@
     "\n",
     "svc = svm.SVC()\n",
     "clf = GridSearchCV(svc, parameters, cv=5)\n",
-    "clf.fit(train_x_vectors, train_y)\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n"
+    "clf.fit(train_x_vectors, train_y)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 66,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0.8076923076923077\n"
+      "0.8028846153846154\n"
      ]
     }
    ],
@@ -526,7 +497,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 68,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -545,7 +516,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 70,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -555,23 +526,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 72,
-   "metadata": {},
+   "execution_count": 17,
+   "metadata": {
+    "scrolled": false
+   },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "I loved this book and the previous books in this series. It brings out every emotion you can think of. I look forward to reading more books by this author.\n"
+      "Nothing too cosmic and scary, but a great story of first college age lost love and finding yourself. Stephen King is BACK!\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "array(['POSITIVE'], dtype='<U8')"
+       "array(['NEGATIVE'], dtype='<U8')"
       ]
      },
-     "execution_count": 72,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -611,7 +584,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.6.8"
   }
  },
  "nbformat": 4,