-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathGradientDescent.py
More file actions
54 lines (45 loc) · 2.38 KB
/
GradientDescent.py
File metadata and controls
54 lines (45 loc) · 2.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import numpy as np
from DifferentiableFunction import IDifferentiableFunction, DifferentiableFunction
from Set import AffineSpace
from typing import Callable
class GradientDescent(object):
def __init__(self):
super().__init__()
def Minimize(self, function: IDifferentiableFunction, startingpoint: np.array, iterations: int = 100, learningrate: float = 0.1) -> np.array:
x = startingpoint
for step in range(iterations):
gradient = function.jacobian(x).reshape([-1])
x = x - learningrate * gradient
return x
def StochasticMinimize(self, toLoss: Callable, data_x: np.array, data_y: np.array, startingpoint: np.array, iterations: int = 100, learningrate: float = 0.1, batch_size: int = 1) -> np.array:
"""
Stochastic gradient descent
:param toLoss: A function that takes x and y as input and returns a loss function
:param data_x: The input data
:param data_y: The output data
:param startingpoint: The starting point
:param iterations: The number of iterations
:param learningrate: The learning rate
:param batch_size: The batch size
:return: The optimized parameters
Complexity: O(iterations*n/batch_size * d * h^2),
where n is the number of data points, d is the dimension of the data points, h is the hidden dimension
"""
x = startingpoint
n = data_x.shape[0]
for step in range(iterations):
# Shuffle the data at the beginning of each epoch
indices = np.arange(n)
np.random.shuffle(indices)
for batch_start in range(0, n, batch_size):
batch_end = min(batch_start + batch_size, n)
batch_indices = indices[batch_start:batch_end]
batch_x = data_x[batch_indices]
batch_y = data_y[batch_indices]
# Compute the gradient for the mini-batch
# Complexity of toLoss: # Complexity: O(n * d * h^2),
# where n is the number of data points, d is the dimension of the data points, h is the hidden dimension
loss = toLoss(batch_x, batch_y)
gradient = loss.jacobian(x).reshape([-1])
x = x - learningrate * gradient
return x