Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@ pip install numpy
python simple_mlp.py
```

Make sure you have Python 2 installed, since this does not run in Python 3.
As for prior knowledge, know how Python works and the basics of calculus/matrix algebra.
Basic Python and calculus/matrix algebra knowledge might be needed to fully understand this tutorial.

I don't go over how numpy works in this tutorial but just imagine it as a super cool/easy library to work with matrixes and many other things. Every numpy method call will have an ```np``` come before it. If you don't get what it's doing, refer to this easy doc: http://cs231n.github.io/python-numpy-tutorial/

Expand Down
104 changes: 59 additions & 45 deletions fancy_mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,64 +2,78 @@


def applySigmoid(x, giveMeTheDerivative = False):
if(giveMeTheDerivative == True):
return applySigmoid(x) * (1 - applySigmoid(x))
return 1 / (1 + np.exp(-x))

def print_data(iter, inputs, keys, weights, prediction):
print "This is iteration # ", iter
print "Your original input data was... \n", inputs
print "Your orignal keys were... \n", keys
print "Your weights at this specific iteration are... \n", weights
print "Our prediction at this iteration was... \n", prediction
print "--------------------------------------------------\n"
if(giveMeTheDerivative == True):
return applySigmoid(x) * (1 - applySigmoid(x))
return 1 / (1 + np.exp(-x))

def print_data(iter, inputs, keys, layer_one_weights, layer_two_weights, prediction):
print ("This is iteration # %d" % iter)
print ("Your original input data was...\n%s" % inputs)
print ("Your orignal keys were...\n%s" % keys)
print ("Layer one weights at this specific iteration are... \n%s" % layer_one_weights)
print ("Layer two weights at this specific iteration are... \n%s" % layer_two_weights)
print ("Our prediction at this iteration was...\n%s" % prediction)
print ("--------------------------------------------------\n")

def train(inputs, keys, layer_one_weights, layer_two_weights):
for iter in xrange(20000):
for iter in range(40000):

# Layer one will have its own inputs and they are the ones directly given to us from main.
layer_one_inputs = inputs;
# Layer one will have its own inputs and they are the ones directly given to us from main.
layer_one_inputs = inputs

# Predict just like in simple_mlp.py
layer_one_prediction = applySigmoid(np.dot(layer_one_inputs, layer_one_weights))
# Predict just like in simple_mlp.py
layer_one_prediction = applySigmoid(np.dot(layer_one_inputs, layer_one_weights))

# Take the prediction from layer one and forward proogate it to the second layer of weights for a final output.
layer_two_prediction = applySigmoid(np.dot(layer_one_prediction, layer_two_weights))
# Take the prediction from layer one and forward proogate it to the second layer of weights for a final output.
layer_two_prediction = applySigmoid(np.dot(layer_one_prediction, layer_two_weights))

# How much were we off by?
layer_two_error = keys - layer_two_prediction
# How much were we off by?
layer_two_error = keys - layer_two_prediction

# Change in error just like in simple_mlp.py
layer_two_change_in_error = layer_two_error * applySigmoid(layer_two_prediction, True)
# Change in error just like in simple_mlp.py
layer_two_change_in_error = layer_two_error * applySigmoid(layer_two_prediction, True)

# Figure out how wrong our output for layer_one was by seeing how wrong the layer_two_prediction was
layer_one_error = np.dot(layer_two_change_in_error, layer_two_weights.T)
# Figure out how wrong our output for layer_one was by seeing how wrong the layer_two_prediction was
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did the spacing change here?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Woops, I have been trying out vim and messed up the spacing. Reverting it :)

layer_one_error = np.dot(layer_two_change_in_error, layer_two_weights.T)

# Just like in simple_mlp.py
layer_one_change_in_error = layer_one_error * applySigmoid(layer_one_error, True)
# Just like in simple_mlp.py
layer_one_change_in_error = layer_one_error * applySigmoid(layer_one_prediction, True)
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wait, why did this change? This should be layer_one_error

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, this is inconsistent with simple_mlp.py implementation and will lead to no convergence.
It is even inconsistent with the layer_two_change_in_error definition above.
layer_*N*_change_in_error should be defined as layer_*N*_error * applySigmoid(layer_*N*_prediction, True).

I refer to Wikipedia Backpropagation (See Phase 2: weight update in Algorithm in code) where for each weight:

  • The weight's output delta and input activation are multiplied to find the gradient of the weight.
  • A ratio of the weight's gradient is subtracted from the weight.

Give it a try on your own laptop, you will see the difference!
To check Python 3 compatibility you can create a conda virtual environment with Python 3, but I bet you know about it ;)


# adjust your weights accoridngly.
layer_one_weights += np.dot(layer_one_prediction.T, layer_one_change_in_error)
layer_two_weights += np.dot(layer_two_prediction.T, layer_two_change_in_error)
if iter == 0:
assert layer_one_prediction.shape[0] == 4
assert layer_one_prediction.shape[1] == 4
assert layer_two_prediction.shape[0] == 4
assert layer_two_prediction.shape[1] == 1
assert layer_one_weights.shape[0] == 3
assert layer_one_weights.shape[1] == 4
assert layer_two_weights.shape[0] == 4
assert layer_two_weights.shape[1] == 1

if(iter == 0 or iter == 5000 or iter == 9999):
print_data(iter, inputs, keys, weights, prediction)
# adjust your weights accoridngly.
layer_one_weights += np.dot(inputs.T, layer_one_change_in_error)
layer_two_weights += np.dot(layer_one_prediction.T,
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll look at this more later, but shouldn't this be
layer_one_weights += np.dot(layer_one_prediction.T, layer_one_change_in_error)
layer_two_weights += np.dot(layer_two_prediction.T, layer_two_change_in_error)

Copy link
Copy Markdown
Author

@azouaoui-cv azouaoui-cv Jul 31, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then it's inconsistent with the simple_mlp.py.
In there you update weights by doing: weights += np.dot(inputs.T, change_in_error), which works fine.

Have you tried running your fancy_mlp implementation?

First off it throws an error since the shapes do not match (See PR commit message for details).
Now if I only change layer_one_weights += np.dot(layer_one_prediction.T, layer_one_change_in_error) and layer_two_weights += np.dot(layer_two_prediction.T, layer_two_change_in_error) it does not converge.
This is the training output after 40000 steps:

Output After Training:
[[0.49949126]
 [0.49944283]
 [0.49888797]
 [0.50120373]]

This is why I also change layer_one_change_in_error. Again, this is consistent with the simple_mlp.py implementation.
And there is the result after 40000 steps:

Output After Training:
[[0.0052001 ]
 [0.99379807]
 [0.99356872]
 [0.00481841]]

layer_two_change_in_error)

print "Output After Training:"
print prediction
if(iter == 0 or iter == 5000 or iter == 9999):
print_data(iter,
inputs,
keys,
layer_one_weights,
layer_two_weights,
layer_two_prediction)

def main():
np.random.seed(1)
inputs = np.array( [[0,0,1],
[1,0,1],
[0,1,1],
[1,1,1]])
print ("Output After Training:\n%s" % layer_two_prediction)

keys = np.array([[0,1,1,0]]).T
layer_one_weights = 2*np.random.random((3,4)) - 1
layer_two_weights = 2*np.random.random((4,1)) - 1
train(inputs, keys, layer_one_weights, layer_two_weights)
def main():
np.random.seed(1)
inputs = np.array([[0,0,1],
[1,0,1],
[0,1,1],
[1,1,1]])
keys = np.array([[0,1,1,0]]).T
layer_one_weights = 2 * np.random.random((3,4)) - 1
layer_two_weights = 2 * np.random.random((4,1)) - 1
train(inputs, keys, layer_one_weights, layer_two_weights)

if __name__ == "__main__":
main()

main()
32 changes: 15 additions & 17 deletions simple_mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,34 +7,32 @@ def applySigmoid(x, giveMeTheDerivative = False):
return 1 / (1 + np.exp(-x))

def print_data(iter, inputs, keys, weights, prediction):
print "This is iteration # ", iter
print "Your original input data was... \n", inputs
print "Your orignal keys were... \n", keys
print "Your weights at this specific iteration are... \n", weights
print "Our prediction at this iteration was... \n", prediction
print "--------------------------------------------------\n"
print ("This is iteration # %d" % iter)
print ("Your original input data was...\n%s" % inputs)
print ("Your orignal keys were...\n%s" % keys)
print ("Your weights at this specific iteration are...\n%s" % weights)
print ("Our prediction at this iteration was...\n%s" % prediction)
print ("--------------------------------------------------\n")

def train(inputs, keys, weights):
for iter in xrange(20000):
for iter in range(20000):
prediction = applySigmoid(np.dot(inputs, weights))
error = keys - prediction
change_in_error = error * applySigmoid(prediction,True)
weights += np.dot(inputs.T ,change_in_error)
weights += np.dot(inputs.T, change_in_error)
if(iter == 0 or iter == 5000 or iter == 9999):
print_data(iter, inputs, keys, weights, prediction)

print "Output After Training:"
print prediction
print ("Output After Training:\n%s" % prediction)

def main():
np.random.seed(1)
inputs = np.array( [[0,0,1],
[1,1,1],
[1,0,1],
[0,1,1]])

keys = np.array([[0,1,1,0]]).T
weights = 2*np.random.random((3,1)) - 1
inputs = np.array([[0,0,1],
[1,0,1],
[0,1,1],
[1,1,1]])
keys = np.array([[0,1,0,1]]).T
weights = 2 * np.random.random((3,1)) - 1
train(inputs, keys, weights)

if __name__ == "__main__":
Expand Down