-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathxray_classifier
More file actions
178 lines (140 loc) · 7.33 KB
/
xray_classifier
File metadata and controls
178 lines (140 loc) · 7.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
from keras.preprocessing.image import ImageDataGenerator
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D, BatchNormalization, Dropout, MaxPooling2D, Flatten, Activation
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
import matplotlib.pyplot as plt
batch_size = 16
im_height = 150
im_width = 150
# handling the different possible input shapes for the model
if keras.backend.image_data_format() == 'channels_first':
inp_shape = (3, im_width, im_height)
else:
inp_shape = (im_width, im_height, 3)
train_dir = "chest_xray/train"
val_dir = "chest_xray/val"
test_dir = "chest_xray/test"
# generate the data for both training and validation data
# when we instantiate the data generator we pass in transformations to use
# rescaling and flipping here
train_1_datagen = ImageDataGenerator(rescale=1. / 255)
train_2_datagen = ImageDataGenerator(rescale=1. /255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, rotation_range=30,
width_shift_range=0., channel_shift_range=0.9, brightness_range=[0.5, 1.5])
test_datagen = ImageDataGenerator(rescale=1. /255)
test_datagen_2 = ImageDataGenerator(rescale=1. /255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, rotation_range=30,
width_shift_range=0., channel_shift_range=0.9, brightness_range=[0.5, 1.5])
# after creating the objects flow from directory
# declare what directory to flow from, as well as image size and batch size
# class mode is binary here, either normal xray or not normal
# we could do "class_mode = binary" here, if so, be sure to make the final output 1 and not 2
train_generator_1 = train_1_datagen.flow_from_directory(train_dir, target_size=(im_width, im_height),
batch_size=batch_size)
test_generator_1 = test_datagen.flow_from_directory(test_dir, target_size=(im_width, im_height),
batch_size = batch_size)
train_generator_2 = train_2_datagen.flow_from_directory(val_dir, target_size=(im_width, im_height),
batch_size = batch_size)
test_generator_2 = test_datagen_2.flow_from_directory(test_dir, target_size=(im_width, im_height),
batch_size = batch_size)
def image_show(image_generator):
x, y = image_generator.next()
fig = plt.figure(figsize=(8, 8))
columns = 3
rows = 3
for i in range(1, 10):
# img = np.random.randint(10)
image = x[i]
fig.add_subplot(rows, columns, i)
plt.imshow(image.transpose(0, 1, 2))
plt.show()
# If the training of the model crashes due to a plotting error, stop showing the images
image_show(train_generator_1)
image_show(train_generator_2)
def create_model():
# first specify the sequential nature of the model
model = Sequential()
# second parameter is the size of the "window" you want the CNN to use
# the shape of the data we are passing in, 3 x 150 x 150
# last element is just the image in the series, others are pixel widths
model.add(Conv2D(64, (3, 3), input_shape=(150, 150, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(128, (3, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(256, (3, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Conv2D(256, (3, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
# you'll need to flatten the data again if you plan on having Dense layers in the model,
# as it needs a 1d unlike a 2d CNN
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(2, activation='sigmoid'))
# now compile the model, specify loss, optimization, etc
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit the model, specify batch size, validation split and epochs
return model
model = create_model()
filepath = "weights_training_1.hdf5"
callbacks = [ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max'),
ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, verbose=1, mode='min', min_lr=0.00001),
EarlyStopping(monitor= 'val_loss', min_delta=1e-10, patience=15, verbose=1, restore_best_weights=True)]
records = model.fit_generator(train_generator_1, steps_per_epoch=100, epochs=25, validation_data=test_generator_1, validation_steps=7, verbose=1, callbacks=callbacks)
t_loss = records.history['loss']
v_loss = records.history['val_loss']
t_acc = records.history['acc']
v_acc = records.history['val_acc']
# gets the length of how long the model was trained for
train_length = range(1, len(t_acc) + 1)
def evaluation(model, train_length, training_acc, val_acc, training_loss, validation_loss, steps, generator):
# plot the loss across the number of epochs
plt.figure()
plt.plot(train_length, training_loss, label='Training Loss')
plt.plot(train_length, validation_loss, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.figure()
plt.plot(train_length, training_acc, label='Training Accuracy')
plt.plot(train_length, val_acc, label='Validation Accuracy')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
# compare against the test training set
# get the score/accuracy for the current model
scores = model.evaluate_generator(generator, steps=steps)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100))
evaluation(model, train_length, t_acc, v_acc, t_loss, v_loss, 100, test_generator_1)
model_2 = create_model()
model_2.load_weights("weights_training_1.hdf5")
for layer in model_2.layers[:-5]:
layer.trainable = False
for layer in model_2.layers:
print(layer, layer.trainable)
# now compile the model, specify loss, optimization, etc
model_2.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit the model, specify batch size, validation split and epochs
filepath = "C:/Users/Daniel/Downloads/chest-xray-pneumonia/chest_xray/weights_training_2.hdf5"
callbacks = [ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max'),
ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, verbose=1, mode='min', min_lr=0.00001),
EarlyStopping(monitor= 'val_loss', min_delta=1e-10, patience=15, verbose=1, restore_best_weights=True)]
records = model_2.fit_generator(train_generator_2, steps_per_epoch=85, epochs=20, validation_data=test_generator_2, validation_steps=7, verbose=1, callbacks=callbacks)
t_loss = records.history['loss']
v_loss = records.history['val_loss']
t_acc = records.history['acc']
v_acc = records.history['val_acc']
# gets the length of how long the model was trained for
train_length = range(1, len(t_acc) + 1)
evaluation(model_2, train_length, t_acc, v_acc, t_loss, v_loss, 80, test_generator_2)