Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
.ipynb_checkpoints/
__pycache__/
test/*.json
109 changes: 91 additions & 18 deletions micrograd/engine.py
Original file line number Diff line number Diff line change
@@ -1,94 +1,167 @@

class Value:
""" stores a single scalar value and its gradient """
"""stores a single scalar value and its gradient"""

def __init__(self, data, _children=(), _op=''):
def __init__(self, data, _children=(), _op=""):
self.data = data
self.grad = 0
# internal variables used for autograd graph construction
self._backward = lambda: None
self._prev = set(_children)
self._op = _op # the op that produced this node, for graphviz / debugging / etc
self._op = _op # the op that produced this node, for graphviz / debugging / etc

def __add__(self, other):
other = other if isinstance(other, Value) else Value(other)
out = Value(self.data + other.data, (self, other), '+')
out = Value(self.data + other.data, (self, other), "+")

def _backward():
self.grad += out.grad
other.grad += out.grad

out._backward = _backward

return out

def __mul__(self, other):
other = other if isinstance(other, Value) else Value(other)
out = Value(self.data * other.data, (self, other), '*')
out = Value(self.data * other.data, (self, other), "*")

def _backward():
self.grad += other.data * out.grad
other.grad += self.data * out.grad

out._backward = _backward

return out

def __pow__(self, other):
assert isinstance(other, (int, float)), "only supporting int/float powers for now"
out = Value(self.data**other, (self,), f'**{other}')
assert isinstance(other, (int, float)), (
"only supporting int/float powers for now"
)
# change _op to simplify loading of the model Value.data_(data)
out = Value(self.data**other, (self,), "**")

def _backward():
self.grad += (other * self.data**(other-1)) * out.grad
self.grad += (other * self.data ** (other - 1)) * out.grad

out._backward = _backward

return out

def relu(self):
out = Value(0 if self.data < 0 else self.data, (self,), 'ReLU')
out = Value(0 if self.data < 0 else self.data, (self,), "ReLU")

def _backward():
self.grad += (out.data > 0) * out.grad

out._backward = _backward

return out

def backward(self):

# topological order all of the children in the graph
topo = []
visited = set()

def build_topo(v):
if v not in visited:
visited.add(v)
for child in v._prev:
build_topo(child)
topo.append(v)

build_topo(self)

# go one variable at a time and apply the chain rule to get its gradient
self.grad = 1
for v in reversed(topo):
v._backward()

def __neg__(self): # -self
def __neg__(self): # -self
return self * -1

def __radd__(self, other): # other + self
def __radd__(self, other): # other + self
return self + other

def __sub__(self, other): # self - other
def __sub__(self, other): # self - other
return self + (-other)

def __rsub__(self, other): # other - self
def __rsub__(self, other): # other - self
return other + (-self)

def __rmul__(self, other): # other * self
def __rmul__(self, other): # other * self
return self * other

def __truediv__(self, other): # self / other
def __truediv__(self, other): # self / other
return self * other**-1

def __rtruediv__(self, other): # other / self
def __rtruediv__(self, other): # other / self
return other * self**-1

def __repr__(self):
return f"Value(data={self.data}, grad={self.grad})"

# SAVE: base object structure as json
def _data(self):
"""
Return a dict representing this Value and its computation graph.
"""
return {
"d": self.data,
"o": self._op,
"c": [child._data() for child in self._prev],
"g": self.grad,
}

@staticmethod
def data_(data):
"""
Create a Value from a saved dictionary (as from _data).
"""
inst = Value(data["d"], [], data["o"])
inst.grad = data["g"]
if not data["c"]:
return inst
inst._prev = []
for cd in data["c"]:
inst._prev.append(Value.data_(cd))

# recreating _backward lambada based on operator and related values
backward = lambda: None
match data["o"]:
case "*":

def back():
inst._prev[0].grad += inst._prev[1].data * inst.grad
inst._prev[1].grad += inst._prev[0].data * inst.grad

backward = back
case "+":

def back():
inst._prev[0].grad += 1.0 * inst.grad
inst._prev[1].grad += 1.0 * inst.grad

backward = back
case "**":

def back():
inst._prev[0].grad += (
inst.data * inst._prev[0].data ** (inst.data - 1)
) * inst.grad

backward = back
case "tanh":

def back():
inst._prev[0].grad += (1 - (inst.data) ** 2) * inst.grad

backward = back
case "ReLU":

def back():
inst._prev[0].grad += (inst.data > 0) * inst.grad

backward = back

inst._backward = backward
return inst
95 changes: 88 additions & 7 deletions micrograd/nn.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,54 @@
import json
import os
import random

from micrograd.engine import Value

class Module:

class Module:
def zero_grad(self):
for p in self.parameters():
p.grad = 0

def parameters(self):
return []

class Neuron(Module):

class Neuron(Module):
def __init__(self, nin, nonlin=True):
self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]
self.w = [Value(random.uniform(-1, 1)) for _ in range(nin)]
self.b = Value(0)
self.nonlin = nonlin

def __call__(self, x):
act = sum((wi*xi for wi,xi in zip(self.w, x)), self.b)
act = sum((wi * xi for wi, xi in zip(self.w, x)), self.b)
return act.relu() if self.nonlin else act

def parameters(self):
return self.w + [self.b]

# SAVE neuron data
def _data(self):
return {
"b": self.b._data(),
"il": self.nonlin,
"w": [cw._data() for cw in self.w],
}

# LOAD neuron from saved data
@staticmethod
def data_(data):
inst = Neuron(0, nonlin=data["il"])
inst.w = [Value.data_(rw) for rw in data["w"]]
inst.b = Value.data_(data["b"])

return inst

def __repr__(self):
return f"{'ReLU' if self.nonlin else 'Linear'}Neuron({len(self.w)})"

class Layer(Module):

class Layer(Module):
def __init__(self, nin, nout, **kwargs):
self.neurons = [Neuron(nin, **kwargs) for _ in range(nout)]

Expand All @@ -39,14 +59,29 @@ def __call__(self, x):
def parameters(self):
return [p for n in self.neurons for p in n.parameters()]

# SAVE layer
def _data(self):
return {"ns": [n._data() for n in self.neurons]}

# LOAD layer
@staticmethod
def data_(data):
inst = Layer(0, 0)
inst.neurons = [Neuron.data_(nr) for nr in data["ns"]]

return inst

def __repr__(self):
return f"Layer of [{', '.join(str(n) for n in self.neurons)}]"

class MLP(Module):

class MLP(Module):
def __init__(self, nin, nouts):
sz = [nin] + nouts
self.layers = [Layer(sz[i], sz[i+1], nonlin=i!=len(nouts)-1) for i in range(len(nouts))]
self.layers = [
Layer(sz[i], sz[i + 1], nonlin=i != len(nouts) - 1)
for i in range(len(nouts))
]

def __call__(self, x):
for layer in self.layers:
Expand All @@ -58,3 +93,49 @@ def parameters(self):

def __repr__(self):
return f"MLP of [{', '.join(str(layer) for layer in self.layers)}]"

# optional to automate training
def fit(self, X, y, epochs=1000, lr=0.001):
for epoch in range(epochs):
# Forward pass
out = [self(x) for x in X] # Each `mp(x)` should return a `Value`

loss = sum((ya - yp) ** 2 for yp, ya in zip(out, y)) / len(y)

loss.backward() # This works now because `loss` is a `Value`

for param in self.parameters():
param.data -= lr * param.grad # Gradient descent update
param.grad = 0 # Reset gradients for next iteration

if epoch % 100 == 0:
print(f"Epoch: {epoch}, Loss: {loss.data}")

# SAVE mlp
def _data(self):
return {"ls": [ly._data() for ly in self.layers]}

# LOAD mlp
@staticmethod
def data_(data):
inst = MLP(0, [])
inst.layers = [Layer.data_(ly) for ly in data["ls"]]
return inst

# Interface point for saving model
def save(self, fp: str):
if fp == "" or fp is None:
raise ValueError("File path is not specified")

with open(fp, "w", encoding="utf-8") as f:
json.dump(self._data(), f, indent=2)
print("SUCCESS! Model saved!")

# Inteface point for loading model
@staticmethod
def load(fp: str):
if not os.path.exists(fp):
raise FileNotFoundError
with open(fp, "r", encoding="utf-8") as f:
contents = json.load(f)
return MLP.data_(contents)
Loading