Merge branch 'develop'

amsukdu · amsukdu · commit 8cbbfb04ffb8 · 2017-08-02T21:23:47.000+09:00
diff --git a/classes/conv_layer.py b/classes/conv_layer.py
@@ -1,6 +1,6 @@
 import numpy as np
-from neural_layer import NeuralLayer
-import utils as u
+from classes.neural_layer import NeuralLayer
+import classes.utils as u
 
 
 class ConvLayer(NeuralLayer):
@@ -69,7 +69,7 @@ def backward(self, d, need_d=True):
             d = d.reshape(self.w2, self.h2, self.k, -1).T
 
         delta = d * u.relu_d(self.forward_result)
-        padding = ((self.w - 1) * self.s + self.f - self.w2) / 2
+        padding = ((self.w - 1) * self.s + self.f - self.w2) // 2
         cols = u.im2col_indices(delta, self.f, self.f, padding=padding, stride=self.s)
         sum_weights = []
         for index, n in enumerate(self.neurons):
diff --git a/classes/neural_layer.py b/classes/neural_layer.py
@@ -1,7 +1,7 @@
 from classes.layer import Layer
-from neuron import Neuron
+from classes.neuron import Neuron
 import numpy as np
-import utils as u
+import classes.utils as u
 
 
 class NeuralLayer(Layer):
@@ -17,7 +17,7 @@ def __init__(self, input_size, k, u_type='adam', a_type='relu'):
         if isinstance(input_size, tuple):
             input_size = np.prod(input_size)
 
-        for n in xrange(k):
+        for n in range(k):
             self.neurons.append(Neuron(input_size))
 
     def predict(self, batch):
diff --git a/classes/neural_net.py b/classes/neural_net.py
@@ -1,8 +1,8 @@
-from neural_layer import NeuralLayer
-from conv_layer import ConvLayer
-from pool_layer import PoolLayer
+from classes.neural_layer import NeuralLayer
+from classes.conv_layer import ConvLayer
+from classes.pool_layer import PoolLayer
+import classes.utils as utils
 import numpy as np
-import utils
 
 class NeuralNetwork(object):
     def __init__(self, input_shape, layer_list, lr, l2_reg=0, dropout_p=1, loss='softmax'):
@@ -44,7 +44,6 @@ def __init__(self, input_shape, layer_list, lr, l2_reg=0, dropout_p=1, loss='sof
                 self.layers.append(fc)
                 next_input_size = fc.output_size()
 
-
     def predict(self, batch, label):
         next_input = batch
         for index, layer in enumerate(self.layers):
@@ -97,10 +96,8 @@ def epoch(self, batch, label):
                 dropout_mask = self.dropout_masks.pop()
                 back_input *= dropout_mask
 
-
         # update
         for index, layer in enumerate(self.layers):
             layer.update(self.lr, l2_reg=self.l2_reg, t=self.t)
 
-
         return loss + self.l2_reg * l2 / 2, correct_count / float(len(max_result)) * 100
diff --git a/classes/pool_layer.py b/classes/pool_layer.py
@@ -1,6 +1,6 @@
 import numpy as np
-from layer import Layer
-import utils as u
+from classes.layer import Layer
+import classes.utils as u
 
 
 # TODO only not overlapping f & s works...
@@ -30,16 +30,16 @@ def __init__(self, input_size, f=2, s=2):
 
         offset = 0
         i_offset = 0
-        for i in xrange(self.h):
+        for i in range(self.h):
             if i % self.f == 0:
                 start = self.w * i
                 offset = start
                 i_offset = i
             else:
                 start = self.f * (i - i_offset) + offset
 
-            for j in xrange(self.w / self.f):
-                self.indices += xrange(start, start + self.f)
+            for j in range(int(self.w / self.f)):
+                self.indices += range(start, start + self.f)
                 start += field_size
 
 
diff --git a/classes/utils.py b/classes/utils.py
@@ -1,13 +1,14 @@
 import numpy as np
 
+
 def softmax_loss(x, y):
     x = x.T
     probs = np.exp(x - np.max(x, axis=1, keepdims=True))
     probs /= np.sum(probs, axis=1, keepdims=True)
     N = x.shape[0]
-    loss = -np.sum(np.log(probs[xrange(N), y])) / N
+    loss = -np.sum(np.log(probs[range(N), y])) / N
     dx = probs
-    dx[xrange(N), y] -= 1
+    dx[range(N), y] -= 1
     dx /= N
     return loss, dx
 
@@ -18,12 +19,13 @@ def logistic_loss(x, y):
     dx = -(y - x)
     return loss, dx.T
 
+
 def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1):
     N, C, H, W = x_shape
     assert (H + 2 * padding - field_height) % stride == 0
     assert (W + 2 * padding - field_height) % stride == 0
-    out_height = (H + 2 * padding - field_height) / stride + 1
-    out_width = (W + 2 * padding - field_width) / stride + 1
+    out_height = (H + 2 * padding - field_height) // stride + 1
+    out_width = (W + 2 * padding - field_width) // stride + 1
 
     i0 = np.repeat(np.arange(field_height), field_width)
     i0 = np.tile(i0, C)
@@ -78,6 +80,7 @@ def adam_update(neurons, lr, t, l2_reg=0, beta1=np.float32(0.9), beta2=np.float3
         n.weights -= lr * m / (np.sqrt(v) + 1e-8) + l2
         n.b -= lr * d_bias
 
+
 def nag_update(neurons, lr, l2_reg=0, mu=np.float32(0.9)):
     for n in neurons:
         l2 = l2_reg * n.weights
@@ -90,6 +93,7 @@ def nag_update(neurons, lr, l2_reg=0, mu=np.float32(0.9)):
         n.weights += -mu * n.v_prev + (1 + mu) * n.v - l2
         n.b -= lr * d_bias
 
+
 def momentum_update(neurons, lr, l2_reg=0, mu=np.float32(0.9)):
     for n in neurons:
         l2 = l2_reg * n.weights
@@ -111,14 +115,18 @@ def vanila_update(neurons, lr, l2_reg=0):
         n.weights -= lr * dx + l2
         n.b -= lr * d_bias
 
+
 def sigmoid(input):
     return 1/(1+np.exp(-input))
 
+
 def relu(input):
     return np.maximum(0, input)
 
+
 def sigmoid_d(input):
     return input * (1 - input)
 
+
 def relu_d(input):
     return input > 0
diff --git a/example/cifar-10-batches-py/main.py b/example/cifar-10-batches-py/main.py
@@ -1,14 +1,15 @@
 import sys, os
 sys.path.insert(1, os.path.split(os.path.split(sys.path[0])[0])[0])
-import cPickle as pkl
+import pickle as pkl
 import numpy as np
 from sklearn import preprocessing
 from sklearn.utils import shuffle
 from classes.neural_net import NeuralNetwork
 
+
 def unpickle(file):
     fo = open(file, 'rb')
-    dict = pkl.load(fo)
+    dict = pkl.load(fo, encoding='latin1')
     fo.close()
     return dict
 
@@ -20,7 +21,7 @@ def unpickle(file):
 
 test_images = None
 test_labels = []
-for i in xrange(1, 6):
+for i in range(1, 6):
     data = unpickle(sys.path[0] + '/data_batch_'+str(i))
     if train_images is None:
         train_images = data['data']
@@ -29,75 +30,66 @@ def unpickle(file):
 
     train_labels += data['labels']
 
-# train_images = train_images[:100]
-# train_labels = train_labels[:100]
-
 train_images = train_images.reshape(-1, 3, 32, 32)
 train_images = train_images.astype(np.float128)
-train_images /= 255.0
-
-r_mean = np.average(train_images[:, 0])
-g_mean = np.average(train_images[:, 1])
-b_mean = np.average(train_images[:, 2])
 
-train_images[:, 0] -= r_mean
-train_images[:, 1] -= g_mean
-train_images[:, 2] -= b_mean
+mean_image= np.mean(train_images, axis=0)
+train_images -= mean_image
+std = np.std(train_images, axis=0)
+train_images /= std
 
 train_images = train_images.astype(np.float32)
 
 data = unpickle(sys.path[0] + '/test_batch')
 test_images = data['data'].reshape(-1, 3, 32, 32)
 test_images = test_images.astype(np.float128)
-test_images /= 255.0
 
-test_images[:, 0] -= r_mean
-test_images[:, 1] -= g_mean
-test_images[:, 2] -= b_mean
+test_images -= mean_image
+test_images /= std
 
-test_images.astype(np.float32)
+test_images = test_images.astype(np.float32)
 
 test_labels = data['labels']
 
 lr = 1e-4
-dropout_percent = 0.5
-l2_reg = 4e-7
-learning_rate_decay = np.float32(99e-2)
-batch_size = 10
+dropout_percent = 0.4
+l2_reg = 3e-6
+learning_rate_decay = np.float32(100e-2)
+batch_size = 1
 
 cnn = NeuralNetwork(train_images.shape[1:],
                     [
-                        {'type': 'conv', 'k': 16, 'u_type': 'adam', 'f': 5, 's': 1, 'p': 2},
+                        {'type': 'conv', 'k': 16, 'u_type': 'nag', 'f': 5, 's': 1, 'p': 2},
                         {'type': 'pool'},
-                        {'type': 'conv', 'k': 20, 'u_type': 'adam', 'f': 5, 's': 1, 'p': 2},
+                        {'type': 'conv', 'k': 20, 'u_type': 'nag', 'f': 5, 's': 1, 'p': 2},
                         {'type': 'pool'},
-                        {'type': 'conv', 'k': 20, 'u_type': 'adam', 'f': 5, 's': 1, 'p': 2},
+                        {'type': 'conv', 'k': 20, 'u_type': 'nag', 'f': 5, 's': 1, 'p': 2},
                         {'type': 'pool'},
                         {'type': 'output', 'k': len(le.classes_), 'u_type': 'adam'}
                     ]
                     , lr, l2_reg=l2_reg, dropout_p=dropout_percent)
 
 cnn.epoch_count = 0
 
-for i in xrange(60000000):
+for i in range(60000000):
     start = i * batch_size % len(train_images)
     end = start + batch_size
 
     if start == 0 and i != 0:
         cnn.epoch_count += 1
         train_images, train_labels = shuffle(train_images, train_labels)
-        print '{} epoch finish. learning rate is {}'.format(str(cnn.epoch_count), str(cnn.lr))
+        print('{} epoch finish. learning rate is {}'.format(str(cnn.epoch_count), str(cnn.lr)))
         cnn.lr *= learning_rate_decay
 
-        loss, acc = cnn.predict(train_images[:4000], train_labels[:4000])
-        print 'training acc:{}'.format(acc)
-        print 'training loss:{}'.format(loss)
+        loss, acc = cnn.predict(train_images[:2000], train_labels[:2000])
+        print('training acc:{}'.format(acc))
+        print('training loss:{}'.format(loss))
 
-        test_loss, test_acc = cnn.predict(test_images, test_labels)
-        print 'test acc:{}'.format(test_acc)
-        print 'test loss:{}'.format(test_loss)
+        test_loss, test_acc = cnn.predict(test_images[:5000], test_labels[:5000])
+        print('test acc:{}'.format(test_acc))
+        print('test loss:{}'.format(test_loss))
 
     cnn.t += 1
     loss, acc = cnn.epoch(train_images[start:end], train_labels[start:end])
-    # print loss
-    # print acc
+    # print(loss)
+    # print(acc)