-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
91 lines (76 loc) · 2.74 KB
/
main.py
File metadata and controls
91 lines (76 loc) · 2.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import math
import matplotlib.pyplot as plt
import csv
import random
with open("DATA.csv") as dt: #imports the data, 1st row as headers are excluded
c = list(csv.reader(dt))[1:]
test_x=[float(jl[1]) for jl in c]
test_y=[float(jl[0]) for jl in c]
# Comment out next 2 lines to use CSV data rather than test data.
test_x = [1,2,3,4,5,6,7,8,9,10]
test_y = [0,0,0,1,1,1,1,0,0,0]
#shuffle data
_t = list(zip(test_x,test_y))
random.shuffle(_t)
test_x, test_y = zip(*_t)
test_x = list(test_x)
test_y = list(test_y)
#div. by range to normalise x values
ran = max(test_x)-min(test_x)
test_x = [i/ran for i in test_x]
size = len(test_x) #size of data
#ADJUSTABLE PARAMETERS
order = 5 #order of hypothesis polynomial
batch = size # the number of datapoints to fit to under each iteration
lrate = 5 # learning rate (by how much each set of feedback affects the hypothesis)
iters = 1000 # the number of iterations
detail = 50 # number of sample points in the hypothesis plot
thetas= [1]*(order+1) #initialising the vector for coefficients in the hypothesis polynomial
def sigmoid(x): # normalisation function to clamp values between 0 and 1
return 1/(1+math.exp(-x))
def hyp(x): #hypothesis function
h = 0
for i in range(order+1):
h+= x**i * thetas[i]
return sigmoid(h)
ran = max(test_x)-min(test_x) #formatting x values to a smaller range
inc = ran/detail
scdata = [min(test_x)+inc*i for i in range(detail+1)]
costs=[] #cost function outputs
for iterations in range(iters):
for i in range(0,size, batch):
xs = test_x[i: i+batch]
ys = test_y[i: i+batch]
ts = len(xs) #test size
updates = [0]*(order+1)
cost = 0 #COST (only for analysis)
for datapoint in range(0,ts):
_x = xs[datapoint]
_y = ys[datapoint]
_hypo = hyp(_x)
_f = _hypo-_y
for j in range(order+1): #update each theta
th = _f*(_x**j) #because each "Feature" is just x to increasing powers
updates[j] += th
if _y == 1: #costs for analysis
cost -= math.log(_hypo) #costs for analysis
elif _y == 0: #costs for analysis
cost -= math.log(1-_hypo) #costs for analysis
_g= (lrate/ts)
for tindex in range(order+1):
thetas[tindex] -= _g*updates[tindex]
costs.append(cost/ts) #costs for analysis
#PLOTTING
plt.subplot(2,1,1)
plt.plot(test_x, test_y, 'o')
plt.plot(scdata, [hyp(i) for i in scdata])
plt.xlabel("X")
plt.ylabel("Y")
plt.title("Hypothesis and data")
plt.subplot(2,1,2)
plt.plot(costs)
plt.xlabel("Iterations")
plt.ylabel("Cost")
plt.title("Cost over iterations")
plt.tight_layout()
plt.show()