From 7f7e88aa6e5f9b71d1caf8d29ae8eec9816a5ade Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Harald=20L=C3=B8nsethagen?= Date: Sun, 19 Apr 2020 12:29:12 +0200 Subject: [PATCH 1/5] change to GaussianNB from DecisionTreeClassifier after copy-paste someone forgot to replace DecisionTreeClassifier with GaussianNB for the Naive Bayes section. Fixed this. --- Tutorial.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tutorial.ipynb b/Tutorial.ipynb index 862db3d..23a845d 100644 --- a/Tutorial.ipynb +++ b/Tutorial.ipynb @@ -302,7 +302,7 @@ "source": [ "from sklearn.naive_bayes import GaussianNB\n", "\n", - "clf_gnb = DecisionTreeClassifier()\n", + "clf_gnb = GaussianNB()\n", "clf_gnb.fit(train_x_vectors, train_y)\n", "\n", "clf_gnb.predict(test_x_vectors[0])\n" From 817d4dfc33cdac9dcc8ac1a44397a3933184c282 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Harald=20L=C3=B8nsethagen?= Date: Sun, 19 Apr 2020 12:39:46 +0200 Subject: [PATCH 2/5] add .todense() for GaussianNB classifier --- Tutorial.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Tutorial.ipynb b/Tutorial.ipynb index 23a845d..4d7e797 100644 --- a/Tutorial.ipynb +++ b/Tutorial.ipynb @@ -303,9 +303,9 @@ "from sklearn.naive_bayes import GaussianNB\n", "\n", "clf_gnb = GaussianNB()\n", - "clf_gnb.fit(train_x_vectors, train_y)\n", + "clf_gnb.fit(train_x_vectors.todense(), train_y)\n", "\n", - "clf_gnb.predict(test_x_vectors[0])\n" + "clf_gnb.predict(test_x_vectors.todense()[0])" ] }, { @@ -376,7 +376,7 @@ "# Mean Accuracy\n", "print(clf_svm.score(test_x_vectors, test_y))\n", "print(clf_dec.score(test_x_vectors, test_y))\n", - "print(clf_gnb.score(test_x_vectors, test_y))\n", + "print(clf_gnb.score(test_x_vectors.todense(), test_y))\n", "print(clf_log.score(test_x_vectors, test_y))" ] }, From d721d83cb2aed14e1d2068c2ecd51038391b4a35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Harald=20L=C3=B8nsethagen?= Date: Sun, 19 Apr 2020 12:43:20 +0200 Subject: [PATCH 3/5] fix typo in filename --- Tutorial.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tutorial.ipynb b/Tutorial.ipynb index 4d7e797..2616efb 100644 --- a/Tutorial.ipynb +++ b/Tutorial.ipynb @@ -80,7 +80,7 @@ "source": [ "import json\n", "\n", - "file_name = './data/sentiment/books_small_10000.json'\n", + "file_name = './data/sentiment/Books_small_10000.json'\n", "\n", "reviews = []\n", "with open(file_name) as f:\n", From 872da77cbc0f927bc64abae6820ad660076db33b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Harald=20L=C3=B8nsethagen?= Date: Sun, 19 Apr 2020 12:43:46 +0200 Subject: [PATCH 4/5] fix another typo in filename --- Tutorial.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tutorial.ipynb b/Tutorial.ipynb index 2616efb..30218a8 100644 --- a/Tutorial.ipynb +++ b/Tutorial.ipynb @@ -549,7 +549,7 @@ "metadata": {}, "outputs": [], "source": [ - "with open('./models/entiment_classifier.pkl', 'rb') as f:\n", + "with open('./models/sentiment_classifier.pkl', 'rb') as f:\n", " loaded_clf = pickle.load(f)" ] }, From 260c9b99cd85a3ae25310f9535a1085959381877 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Harald=20L=C3=B8nsethagen?= Date: Sun, 19 Apr 2020 12:47:52 +0200 Subject: [PATCH 5/5] remove unnecessary spaces --- Tutorial.ipynb | 133 ++++++++++++++++++++----------------------------- 1 file changed, 53 insertions(+), 80 deletions(-) diff --git a/Tutorial.ipynb b/Tutorial.ipynb index 30218a8..f9cefed 100644 --- a/Tutorial.ipynb +++ b/Tutorial.ipynb @@ -49,9 +49,7 @@ " positive = list(filter(lambda x: x.sentiment == Sentiment.POSITIVE, self.reviews))\n", " positive_shrunk = positive[:len(negative)]\n", " self.reviews = negative + positive_shrunk\n", - " random.shuffle(self.reviews)\n", - " \n", - " " + " random.shuffle(self.reviews)" ] }, { @@ -88,8 +86,7 @@ " review = json.loads(line)\n", " reviews.append(Review(review['reviewText'], review['overall']))\n", " \n", - "reviews[5].text\n", - " " + "reviews[5].text " ] }, { @@ -101,7 +98,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -116,7 +113,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -150,14 +147,14 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "I read this book over a year ago & enjoyed the various stories, the author takes you on a journey of life as it pretty much is in today's world & society, as you end one story you look forward to starting the next, relaxed reading I highly recommend it for peps who enjoy stories from back in their grand-ma & grand-dad days in the South. I will peruse more books by this author for future purchase.\n", + "I was very disappointed with this book, not up to snuff by Deaver. Too many filler words, too expensive. Not interesting.\n", "[[0. 0. 0. ... 0. 0. 0.]]\n" ] } @@ -174,10 +171,7 @@ "test_x_vectors = vectorizer.transform(test_x)\n", "\n", "print(train_x[0])\n", - "print(train_x_vectors[0].toarray())\n", - "\n", - "\n", - "\n" + "print(train_x_vectors[0].toarray())" ] }, { @@ -213,16 +207,16 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array(['POSITIVE'], dtype='