-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpredict_stocks.py
More file actions
49 lines (40 loc) · 1.26 KB
/
predict_stocks.py
File metadata and controls
49 lines (40 loc) · 1.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import pandas as pd
from datetime import datetime
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
df = pd.read_csv("sphist.csv")
df["Date"] = pd.to_datetime(df["Date"])
# df = df.sort_values(df.index,inplace = False,ascending=False)
df = df.iloc[::-1]
#5 day mean
m = df["Close"].rolling(window=5)
df["day_5"] = m.mean()
#30 day mean
m = df["Close"].rolling(window=20)
df["day_30"] = m.mean()
#365 day mean
m = df["Close"].rolling(window=252)
df["day_365"] = m.mean()
df = df.iloc[::-1]
df = df[df["Date"] > datetime(year=1951, month=1, day=2)]
df = df.dropna(axis=0)
train = df[df["Date"] < datetime(year=2013, month=1, day=1)]
test = df[df["Date"] >= datetime(year=2013, month=1, day=1)]
predictors = train[["day_5","day_30","day_365"]]
to_fit = train[["Close"]]
to_predict = test[["Close"]]
predict = dict()
for item in predictors:
reg.fit(train[[item]],to_fit)
predict[item] = reg.predict(to_predict)
mse = dict()
for key,value in predict.items():
_sum = 0
diff = (value - to_predict) ** 2
_sum = diff.sum()
mse[key] = _sum/len(value)
import matplotlib.pyplot as plt
# Make a scatterplot with the actual values in the training set
plt.scatter(train["day_5"], train["Close"])
plt.plot(train["day_5"], reg.predict(train[["Close"]]))
plt.show()