-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathimdb_review_binary_classification.py
More file actions
112 lines (82 loc) · 3.44 KB
/
imdb_review_binary_classification.py
File metadata and controls
112 lines (82 loc) · 3.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from keras.datasets import imdb
from keras import models
from keras import layers
import numpy as np
import matplotlib.pyplot as plt
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)
print('train_data shape:', train_data.shape)
print('train_labels length:', len(train_labels))
print('test_data shape:', test_data.shape)
print('test_labels length:', len(test_labels))
# print('train_data max value:', max([max(sequence) for sequence in train_data]))
def decode_review(review):
word_index = imdb.get_word_index()
reverse_word_index = dict([(value, word) for (word, value) in word_index.items()])
decoded_review = ' '.join([reverse_word_index.get(value - 3, '?') for value in review])
print(decoded_review)
# decode_review(train_data[0])
def vectorize_sequences(sequences):
results = np.zeros((len(sequences), dimension))
for i, sequence in enumerate(sequences):
results[i, sequence] = 1.
return results
DATA_VECTOR_DIMENSION = 10000
x_train = vectorize_sequences(train_data, DATA_VECTOR_DIMENSION)
x_test = vectorize_sequences(test_data, DATA_VECTOR_DIMENSION)
y_train = np.asarray(train_labels).astype('float32')
y_test = np.asarray(test_labels).astype('float32')
def train_validate():
x_val = x_train[:10000]
partial_x_train = x_train[10000:]
y_val = y_train[:10000]
partial_y_train = y_train[10000:]
model = models.Sequential()
model.add(layers.Dense(16, activation='relu', input_shape=(DATA_VECTOR_DIMENSION,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(optimizer='rmsprop',
loss='binary_crossentropy',
metrics=['accuracy'])
history = model.fit(partial_x_train,
partial_y_train,
epochs=20,
batch_size=512,
validation_data=(x_val, y_val))
history_dict = history.history
loss_list = history_dict['loss']
val_loss_list = history_dict['val_loss']
accuracy_list = history_dict['accuracy']
val_accuracy_list = history_dict['val_accuracy']
epochs = range(1, len(loss_list) + 1)
plt.plot(epochs, loss_list, 'bo', label='Training loss')
plt.plot(epochs, val_loss_list, 'b', label='Validation loss')
plt.plot(epochs, accuracy_list, 'ro', label="Training accuracy")
plt.plot(epochs, val_accuracy_list, 'r', label="Validation accuracy")
plt.title('Training and validation loss with acc')
plt.xlabel('Epochs')
plt.ylabel('Rate')
plt.legend()
plt.show()
# train_validate()
def train_evaluate():
model = models.Sequential()
model.add(layers.Dense(16, activation='relu', input_shape=(DATA_VECTOR_DIMENSION,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(optimizer='rmsprop',
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit(x_train,
y_train,
epochs=4,
batch_size=512)
loss, accuracy = model.evaluate(x_test, y_test)
print('evaluate loss:', loss)
print('evaluate accuracy:', accuracy)
def predict():
predictions = model.predict(x_test)
comparisons = [(prediction[0], y_test[i]) for i, prediction in enumerate(predictions)]
for comparison in comparisons:
print(comparison)
predict()
train_evaluate()