-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathAmazon_LSTM
More file actions
1 lines (1 loc) · 28.7 KB
/
Amazon_LSTM
File metadata and controls
1 lines (1 loc) · 28.7 KB
1
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Amazon_LSTM","version":"0.3.2","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"metadata":{"id":"cKS9tDrQ6suR","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":121},"outputId":"71d8491d-8d90-4e74-b4d0-be8fba923ece","executionInfo":{"status":"ok","timestamp":1549623786898,"user_tz":-330,"elapsed":980597,"user":{"displayName":"Akash Jain","photoUrl":"https://lh6.googleusercontent.com/-URkka8RJQGo/AAAAAAAAAAI/AAAAAAAAAKQ/rFW0wYatpjA/s64/photo.jpg","userId":"13748978918988779747"}}},"cell_type":"code","source":["from google.colab import drive\n","drive.mount('/content/drive/')"],"execution_count":1,"outputs":[{"output_type":"stream","text":["Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code\n","\n","Enter your authorization code:\n","··········\n","Mounted at /content/drive/\n"],"name":"stdout"}]},{"metadata":{"id":"qXqJ5cLWApoL","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":50},"outputId":"073f5eb6-b14f-4be4-b7a2-b3acfdfc30c6","executionInfo":{"status":"ok","timestamp":1549623789839,"user_tz":-330,"elapsed":983426,"user":{"displayName":"Akash Jain","photoUrl":"https://lh6.googleusercontent.com/-URkka8RJQGo/AAAAAAAAAAI/AAAAAAAAAKQ/rFW0wYatpjA/s64/photo.jpg","userId":"13748978918988779747"}}},"cell_type":"code","source":["!ls \"/content/drive/My Drive\""],"execution_count":2,"outputs":[{"output_type":"stream","text":["'Colab Notebooks' LSTM_IMDB.ipynb Untitled0.ipynb\n","'Colab Uploads' Reviews.csv\n"],"name":"stdout"}]},{"metadata":{"id":"xhTZS0gLA9QO","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"dd1027d3-6eb5-4270-db2b-a842eec7eb72","executionInfo":{"status":"ok","timestamp":1549623790853,"user_tz":-330,"elapsed":984388,"user":{"displayName":"Akash Jain","photoUrl":"https://lh6.googleusercontent.com/-URkka8RJQGo/AAAAAAAAAAI/AAAAAAAAAKQ/rFW0wYatpjA/s64/photo.jpg","userId":"13748978918988779747"}}},"cell_type":"code","source":["#importing all libraries\n","import pandas as pd\n","import numpy as np\n","import matplotlib.pyplot as plt\n","import seaborn as sns\n","from tqdm import tqdm\n","import warnings\n","warnings.filterwarnings(\"ignore\")\n","from keras.models import Sequential\n","from keras.layers import Dense,LSTM,Dropout\n","from keras.layers.embeddings import Embedding\n","from keras.preprocessing import sequence"],"execution_count":3,"outputs":[{"output_type":"stream","text":["Using TensorFlow backend.\n"],"name":"stderr"}]},{"metadata":{"id":"EzloSMWoBbYJ","colab_type":"code","colab":{}},"cell_type":"code","source":["data = pd.read_csv('/content/drive/My Drive/Reviews.csv')"],"execution_count":0,"outputs":[]},{"metadata":{"id":"cKbVTuUTCPr0","colab_type":"code","colab":{}},"cell_type":"code","source":["data = data[:10000]"],"execution_count":0,"outputs":[]},{"metadata":{"id":"Ft_zV04MCYO3","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":106},"outputId":"9f744f83-db88-4dc7-d425-98258a2d027a","executionInfo":{"status":"ok","timestamp":1549623799620,"user_tz":-330,"elapsed":992918,"user":{"displayName":"Akash Jain","photoUrl":"https://lh6.googleusercontent.com/-URkka8RJQGo/AAAAAAAAAAI/AAAAAAAAAKQ/rFW0wYatpjA/s64/photo.jpg","userId":"13748978918988779747"}}},"cell_type":"code","source":["data.head(2)"],"execution_count":6,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>Id</th>\n"," <th>ProductId</th>\n"," <th>UserId</th>\n"," <th>ProfileName</th>\n"," <th>HelpfulnessNumerator</th>\n"," <th>HelpfulnessDenominator</th>\n"," <th>Score</th>\n"," <th>Time</th>\n"," <th>Summary</th>\n"," <th>Text</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>1</td>\n"," <td>B001E4KFG0</td>\n"," <td>A3SGXH7AUHU8GW</td>\n"," <td>delmartian</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>5</td>\n"," <td>1303862400</td>\n"," <td>Good Quality Dog Food</td>\n"," <td>I have bought several of the Vitality canned d...</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>2</td>\n"," <td>B00813GRG4</td>\n"," <td>A1D87F6ZCVE5NK</td>\n"," <td>dll pa</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>1346976000</td>\n"," <td>Not as Advertised</td>\n"," <td>Product arrived labeled as Jumbo Salted Peanut...</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" Id ProductId UserId ProfileName HelpfulnessNumerator \\\n","0 1 B001E4KFG0 A3SGXH7AUHU8GW delmartian 1 \n","1 2 B00813GRG4 A1D87F6ZCVE5NK dll pa 0 \n","\n"," HelpfulnessDenominator Score Time Summary \\\n","0 1 5 1303862400 Good Quality Dog Food \n","1 0 1 1346976000 Not as Advertised \n","\n"," Text \n","0 I have bought several of the Vitality canned d... \n","1 Product arrived labeled as Jumbo Salted Peanut... "]},"metadata":{"tags":[]},"execution_count":6}]},{"metadata":{"id":"zl26tkCWCaza","colab_type":"code","colab":{}},"cell_type":"code","source":["data = data.drop_duplicates(subset = ['UserId','ProfileName','Time','Summary'])"],"execution_count":0,"outputs":[]},{"metadata":{"id":"6uMgYohOCqND","colab_type":"code","colab":{}},"cell_type":"code","source":["data = data.dropna()"],"execution_count":0,"outputs":[]},{"metadata":{"id":"XLn2z9hXCtvh","colab_type":"code","colab":{}},"cell_type":"code","source":["data = data.sort_values(by = 'Time')"],"execution_count":0,"outputs":[]},{"metadata":{"id":"Hh7LTggdC1N7","colab_type":"code","colab":{}},"cell_type":"code","source":["def classifier(x):\n"," if x>3:\n"," return 1\n"," return 0 "],"execution_count":0,"outputs":[]},{"metadata":{"id":"RiOiMa7MC_yh","colab_type":"code","colab":{}},"cell_type":"code","source":["data.Score = data.Score.map(classifier)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"M_bRONNYDWPo","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":140},"outputId":"b6d9f6c2-fc46-4d59-f16d-c4957b313535","executionInfo":{"status":"ok","timestamp":1549623799633,"user_tz":-330,"elapsed":992753,"user":{"displayName":"Akash Jain","photoUrl":"https://lh6.googleusercontent.com/-URkka8RJQGo/AAAAAAAAAAI/AAAAAAAAAKQ/rFW0wYatpjA/s64/photo.jpg","userId":"13748978918988779747"}}},"cell_type":"code","source":["data.head(2)"],"execution_count":12,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>Id</th>\n"," <th>ProductId</th>\n"," <th>UserId</th>\n"," <th>ProfileName</th>\n"," <th>HelpfulnessNumerator</th>\n"," <th>HelpfulnessDenominator</th>\n"," <th>Score</th>\n"," <th>Time</th>\n"," <th>Summary</th>\n"," <th>Text</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>1244</th>\n"," <td>1245</td>\n"," <td>B00002Z754</td>\n"," <td>A29Z5PI9BW2PU3</td>\n"," <td>Robbie</td>\n"," <td>7</td>\n"," <td>7</td>\n"," <td>1</td>\n"," <td>961718400</td>\n"," <td>Great Product</td>\n"," <td>This was a really good idea and the final prod...</td>\n"," </tr>\n"," <tr>\n"," <th>1243</th>\n"," <td>1244</td>\n"," <td>B00002Z754</td>\n"," <td>A3B8RCEI0FXFI6</td>\n"," <td>B G Chase</td>\n"," <td>10</td>\n"," <td>10</td>\n"," <td>1</td>\n"," <td>962236800</td>\n"," <td>WOW Make your own 'slickers' !</td>\n"," <td>I just received my shipment and could hardly w...</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" Id ProductId UserId ProfileName HelpfulnessNumerator \\\n","1244 1245 B00002Z754 A29Z5PI9BW2PU3 Robbie 7 \n","1243 1244 B00002Z754 A3B8RCEI0FXFI6 B G Chase 10 \n","\n"," HelpfulnessDenominator Score Time \\\n","1244 7 1 961718400 \n","1243 10 1 962236800 \n","\n"," Summary \\\n","1244 Great Product \n","1243 WOW Make your own 'slickers' ! \n","\n"," Text \n","1244 This was a really good idea and the final prod... \n","1243 I just received my shipment and could hardly w... "]},"metadata":{"tags":[]},"execution_count":12}]},{"metadata":{"id":"fgl44FWyDYGY","colab_type":"code","colab":{}},"cell_type":"code","source":["text_data = data['Summary']+' '+data['Summary']+' '+data['Summary'] +' '+ data['Text']"],"execution_count":0,"outputs":[]},{"metadata":{"id":"FnClhBsrD8Wa","colab_type":"code","colab":{}},"cell_type":"code","source":["#cleaning the data\n","# importing the libraries needed\n","import re\n","import nltk\n","import string\n","from bs4 import BeautifulSoup"],"execution_count":0,"outputs":[]},{"metadata":{"id":"QPmLiLI1EH8s","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":101},"outputId":"e4772d17-d33c-4cc1-b5a2-1d81c3ddbb43","executionInfo":{"status":"ok","timestamp":1549623801412,"user_tz":-330,"elapsed":993616,"user":{"displayName":"Akash Jain","photoUrl":"https://lh6.googleusercontent.com/-URkka8RJQGo/AAAAAAAAAAI/AAAAAAAAAKQ/rFW0wYatpjA/s64/photo.jpg","userId":"13748978918988779747"}}},"cell_type":"code","source":["nltk.download('wordnet')\n","nltk.download('stopwords')"],"execution_count":15,"outputs":[{"output_type":"stream","text":["[nltk_data] Downloading package wordnet to /root/nltk_data...\n","[nltk_data] Unzipping corpora/wordnet.zip.\n","[nltk_data] Downloading package stopwords to /root/nltk_data...\n","[nltk_data] Unzipping corpora/stopwords.zip.\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["True"]},"metadata":{"tags":[]},"execution_count":15}]},{"metadata":{"id":"Siv1Klw7WI-8","colab_type":"code","colab":{}},"cell_type":"code","source":["stopword = nltk.corpus.stopwords.words('english')\n","lemm = nltk.stem.wordnet.WordNetLemmatizer()\n","stopword = set(stopword)\n","def cleaner(phrase):\n"," # decontraction\n"," phrase = re.sub(r\"won't\", \"will not\", phrase)\n"," phrase = re.sub(r\"can\\'t\", \"can not\", phrase)\n"," phrase = re.sub(r\"n\\'t\", \" not\", phrase)\n"," phrase = re.sub(r\"\\'re\", \" are\", phrase) # r means raw string\n"," phrase = re.sub(r\"\\'s\", \" is\", phrase)\n"," phrase = re.sub(r\"\\'d\", \" would\", phrase)\n"," phrase = re.sub(r\"\\'ll\", \" will\", phrase)\n"," phrase = re.sub(r\"\\'t\", \" not\", phrase)\n"," phrase = re.sub(r\"\\'ve\", \" have\", phrase)\n"," phrase = re.sub(r\"\\'m\", \" am\", phrase)\n"," #removing words with numbers and special characters\n"," phrase = re.sub(r\"\\S*\\d\\S\",\"\", phrase).strip()\n"," phrase = re.sub(r\"[^A-Za-z0-9]+\",\" \",phrase)\n"," return phrase"],"execution_count":0,"outputs":[]},{"metadata":{"id":"7sPovd5xWJC2","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"27acc6f2-126c-4a71-ef90-a5851836528c","executionInfo":{"status":"ok","timestamp":1549623810428,"user_tz":-330,"elapsed":1002513,"user":{"displayName":"Akash Jain","photoUrl":"https://lh6.googleusercontent.com/-URkka8RJQGo/AAAAAAAAAAI/AAAAAAAAAKQ/rFW0wYatpjA/s64/photo.jpg","userId":"13748978918988779747"}}},"cell_type":"code","source":["from tqdm import tqdm\n","cleaned_text = []\n","words = []\n","for sent in tqdm(text_data):\n"," sent = BeautifulSoup(sent,'lxml').get_text()\n"," sent = cleaner(sent)\n"," sent = ' '.join(e.lower() for e in sent.split() if e.lower() not in stopword)\n"," sent = ' '.join(lemm.lemmatize(e) for e in sent.split())\n"," cleaned_text.append(sent)"],"execution_count":17,"outputs":[{"output_type":"stream","text":["100%|██████████| 9510/9510 [00:08<00:00, 1064.07it/s]\n"],"name":"stderr"}]},{"metadata":{"id":"8lIWfNFAGvRA","colab_type":"code","colab":{}},"cell_type":"code","source":["from collections import Counter\n","vocab_with_freq = Counter(str(cleaned_text).split())"],"execution_count":0,"outputs":[]},{"metadata":{"id":"isqa-AfhGwJ2","colab_type":"code","colab":{}},"cell_type":"code","source":["vocab = vocab_with_freq.most_common(10000)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"Hs9PlFACM35n","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":403},"outputId":"9e5fa969-c2ae-4bdb-a610-c69bc2ce11fd","executionInfo":{"status":"ok","timestamp":1549623811019,"user_tz":-330,"elapsed":1003038,"user":{"displayName":"Akash Jain","photoUrl":"https://lh6.googleusercontent.com/-URkka8RJQGo/AAAAAAAAAAI/AAAAAAAAAKQ/rFW0wYatpjA/s64/photo.jpg","userId":"13748978918988779747"}}},"cell_type":"code","source":["cleaned_text[0].split()"],"execution_count":20,"outputs":[{"output_type":"execute_result","data":{"text/plain":["['great',\n"," 'product',\n"," 'great',\n"," 'product',\n"," 'great',\n"," 'product',\n"," 'really',\n"," 'good',\n"," 'idea',\n"," 'final',\n"," 'product',\n"," 'outstanding',\n"," 'use',\n"," 'decal',\n"," 'car',\n"," 'window',\n"," 'everybody',\n"," 'asks',\n"," 'bought',\n"," 'decal',\n"," 'made',\n"," 'two',\n"," 'thumb']"]},"metadata":{"tags":[]},"execution_count":20}]},{"metadata":{"id":"2z3jnsotNcHA","colab_type":"code","colab":{}},"cell_type":"code","source":["top_words = []\n","for (word,freq) in vocab:\n"," top_words.append(word)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"5mwUtmhiT1hg","colab_type":"code","colab":{}},"cell_type":"code","source":["dic = dict(vocab)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"PV2I_hKYUKof","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"49271133-f060-4af8-9f90-20bebe68d034","executionInfo":{"status":"ok","timestamp":1549623811027,"user_tz":-330,"elapsed":1002973,"user":{"displayName":"Akash Jain","photoUrl":"https://lh6.googleusercontent.com/-URkka8RJQGo/AAAAAAAAAAI/AAAAAAAAAKQ/rFW0wYatpjA/s64/photo.jpg","userId":"13748978918988779747"}}},"cell_type":"code","source":["dic.get('like')"],"execution_count":23,"outputs":[{"output_type":"execute_result","data":{"text/plain":["4760"]},"metadata":{"tags":[]},"execution_count":23}]},{"metadata":{"id":"sMl80fLcUDdp","colab_type":"code","colab":{}},"cell_type":"code","source":["final_text_data = []\n","for i in range(len(cleaned_text)):\n"," new_text = []\n"," for wor in str(cleaned_text[i]).split():\n"," if wor in top_words:\n"," new_text.append(dic.get(wor))\n"," else:\n"," new_text.append(0)\n"," final_text_data.append(new_text)\n"],"execution_count":0,"outputs":[]},{"metadata":{"id":"VV81G_CNY2io","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"bb53fb1a-1a6b-4df5-ec69-67f536d62782","executionInfo":{"status":"ok","timestamp":1549623821942,"user_tz":-330,"elapsed":1013859,"user":{"displayName":"Akash Jain","photoUrl":"https://lh6.googleusercontent.com/-URkka8RJQGo/AAAAAAAAAAI/AAAAAAAAAKQ/rFW0wYatpjA/s64/photo.jpg","userId":"13748978918988779747"}}},"cell_type":"code","source":["len(final_text_data)"],"execution_count":25,"outputs":[{"output_type":"execute_result","data":{"text/plain":["9510"]},"metadata":{"tags":[]},"execution_count":25}]},{"metadata":{"id":"hCO79zdwY3ro","colab_type":"code","colab":{}},"cell_type":"code","source":["max_length_of_text= []\n","for lst in final_text_data:\n"," max_length_of_text.append(len(lst))"],"execution_count":0,"outputs":[]},{"metadata":{"id":"yjvtdrwnZPUZ","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"8dbd0b14-da00-4a92-8b68-84f24ae654c0","executionInfo":{"status":"ok","timestamp":1549623821945,"user_tz":-330,"elapsed":1013820,"user":{"displayName":"Akash Jain","photoUrl":"https://lh6.googleusercontent.com/-URkka8RJQGo/AAAAAAAAAAI/AAAAAAAAAKQ/rFW0wYatpjA/s64/photo.jpg","userId":"13748978918988779747"}}},"cell_type":"code","source":["sorted(max_length_of_text,reverse=True)[0]"],"execution_count":27,"outputs":[{"output_type":"execute_result","data":{"text/plain":["874"]},"metadata":{"tags":[]},"execution_count":27}]},{"metadata":{"id":"cFCag_T5akuO","colab_type":"text"},"cell_type":"markdown","source":["Maximum Length of text data is 874"]},{"metadata":{"id":"TeHlSeFma4SB","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"f7d735bc-37c1-4cb5-eeb1-7d3967a6cdf5","executionInfo":{"status":"ok","timestamp":1549623821947,"user_tz":-330,"elapsed":1013799,"user":{"displayName":"Akash Jain","photoUrl":"https://lh6.googleusercontent.com/-URkka8RJQGo/AAAAAAAAAAI/AAAAAAAAAKQ/rFW0wYatpjA/s64/photo.jpg","userId":"13748978918988779747"}}},"cell_type":"code","source":["data.shape"],"execution_count":28,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(9510, 10)"]},"metadata":{"tags":[]},"execution_count":28}]},{"metadata":{"id":"TuPA4Ztscva3","colab_type":"code","colab":{}},"cell_type":"code","source":["y = data['Score']"],"execution_count":0,"outputs":[]},{"metadata":{"id":"U0AE4PX_czD7","colab_type":"code","colab":{}},"cell_type":"code","source":["from sklearn.model_selection import train_test_split\n","xtr,xtest,ytr,ytest = train_test_split(final_text_data,y,test_size = 0.2)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"_EOKQ5JRc_xz","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":1092},"outputId":"f7b27863-d70b-453e-b991-4e87b0e47e41","executionInfo":{"status":"ok","timestamp":1549623821959,"user_tz":-330,"elapsed":1013771,"user":{"displayName":"Akash Jain","photoUrl":"https://lh6.googleusercontent.com/-URkka8RJQGo/AAAAAAAAAAI/AAAAAAAAAKQ/rFW0wYatpjA/s64/photo.jpg","userId":"13748978918988779747"}}},"cell_type":"code","source":["# truncate and/or pad input sequences\n","max_review_length = 874\n","X_train = sequence.pad_sequences(xtr, maxlen=max_review_length)\n","X_test = sequence.pad_sequences(xtest, maxlen=max_review_length)\n","\n","print(X_train.shape)\n","print(X_train[1])"],"execution_count":31,"outputs":[{"output_type":"stream","text":["(7608, 874)\n","[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"," 0 0 0 122 122 122 261 177 793 1576 601 156 0 3801\n"," 248 793 1576 2077 890 122]\n"],"name":"stdout"}]},{"metadata":{"id":"9Xvjk474do68","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":440},"outputId":"f4845b47-5845-4f4f-cbb8-be097231492e","executionInfo":{"status":"ok","timestamp":1549623821960,"user_tz":-330,"elapsed":1013727,"user":{"displayName":"Akash Jain","photoUrl":"https://lh6.googleusercontent.com/-URkka8RJQGo/AAAAAAAAAAI/AAAAAAAAAKQ/rFW0wYatpjA/s64/photo.jpg","userId":"13748978918988779747"}}},"cell_type":"code","source":["# create the model\n","embedding_vecor_length = 32\n","model = Sequential()\n","model.add(Embedding(10000, embedding_vecor_length, input_length=max_review_length))\n","model.add(LSTM(100,return_sequences=True))\n","model.add(LSTM(100))\n","model.add(Dropout(0.2))\n","model.add(Dense(1, activation='sigmoid'))\n","model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])\n","print(model.summary())\n","#Refer: https://datascience.stackexchange.com/questions/10615/number-of-parameters-in-an-lstm-model"],"execution_count":32,"outputs":[{"output_type":"stream","text":["WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n","Instructions for updating:\n","Colocations handled automatically by placer.\n","WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.\n","Instructions for updating:\n","Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.\n","_________________________________________________________________\n","Layer (type) Output Shape Param # \n","=================================================================\n","embedding_1 (Embedding) (None, 874, 32) 320000 \n","_________________________________________________________________\n","lstm_1 (LSTM) (None, 874, 100) 53200 \n","_________________________________________________________________\n","lstm_2 (LSTM) (None, 100) 80400 \n","_________________________________________________________________\n","dropout_1 (Dropout) (None, 100) 0 \n","_________________________________________________________________\n","dense_1 (Dense) (None, 1) 101 \n","=================================================================\n","Total params: 453,701\n","Trainable params: 453,701\n","Non-trainable params: 0\n","_________________________________________________________________\n","None\n"],"name":"stdout"}]},{"metadata":{"id":"-A9YqmJej2cP","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":440},"outputId":"be3d9f75-af2e-4d17-ed48-c91d9ef70776","executionInfo":{"status":"ok","timestamp":1549629221365,"user_tz":-330,"elapsed":1490,"user":{"displayName":"Akash Jain","photoUrl":"https://lh6.googleusercontent.com/-URkka8RJQGo/AAAAAAAAAAI/AAAAAAAAAKQ/rFW0wYatpjA/s64/photo.jpg","userId":"13748978918988779747"}}},"cell_type":"code","source":["model.fit(X_train, ytr, nb_epoch=10, batch_size=64)\n","# Final evaluation of the model\n","scores = model.evaluate(X_test, ytest, verbose=0)\n","print(\"Accuracy: %.2f%%\" % (scores[1]*100))"],"execution_count":33,"outputs":[{"output_type":"stream","text":["WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n","Instructions for updating:\n","Use tf.cast instead.\n","Epoch 1/10\n","7608/7608 [==============================] - 447s 59ms/step - loss: 0.5441 - acc: 0.7869\n","Epoch 2/10\n","7608/7608 [==============================] - 448s 59ms/step - loss: 0.3555 - acc: 0.8456\n","Epoch 3/10\n","7608/7608 [==============================] - 448s 59ms/step - loss: 0.3251 - acc: 0.8647\n","Epoch 4/10\n","7608/7608 [==============================] - 448s 59ms/step - loss: 0.3158 - acc: 0.8695\n","Epoch 5/10\n","7608/7608 [==============================] - 447s 59ms/step - loss: 0.3202 - acc: 0.8663\n","Epoch 6/10\n","7608/7608 [==============================] - 447s 59ms/step - loss: 0.3129 - acc: 0.8704\n","Epoch 7/10\n","7608/7608 [==============================] - 448s 59ms/step - loss: 0.2954 - acc: 0.8810\n","Epoch 8/10\n","7608/7608 [==============================] - 448s 59ms/step - loss: 0.2832 - acc: 0.8871\n","Epoch 9/10\n","7608/7608 [==============================] - 448s 59ms/step - loss: 0.2739 - acc: 0.8921\n","Epoch 10/10\n","7608/7608 [==============================] - 446s 59ms/step - loss: 0.2660 - acc: 0.8968\n","Accuracy: 84.96%\n"],"name":"stdout"}]},{"metadata":{"id":"NSQj6HGIlUvA","colab_type":"code","colab":{}},"cell_type":"code","source":[""],"execution_count":0,"outputs":[]}]}