-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsvm_model.py
More file actions
55 lines (32 loc) · 1.14 KB
/
svm_model.py
File metadata and controls
55 lines (32 loc) · 1.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# coding: utf-8
# In[48]:
from keras.models import Sequential
from keras import layers
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import SGDClassifier
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.metrics import classification_report
# file import
import data_cleaner as dc
import model_helper as mh
df = dc.clean_item_data(0)
df = dc.cleanup_categoryid(df)
# In[49]:
X = df.item_title
Y = df.categoryId
# In[50]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.5)
# In[51]:
# vectorize training input data
sgd = Pipeline([('vect', CountVectorizer()),
('tfidf', TfidfTransformer()),
('clf', SGDClassifier(loss='hinge', penalty='l2',alpha=1e-3, random_state=42, max_iter=5, tol=None)),
])
sgd.fit(X_train, Y_train)
Y_pred = sgd.predict(X_test)
print('accuracy %s' % accuracy_score(Y_pred, Y_test))
print(classification_report(Y_test, Y_pred))
#Accuracy 81%