-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstacking.py
More file actions
116 lines (98 loc) · 3.57 KB
/
stacking.py
File metadata and controls
116 lines (98 loc) · 3.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import os
import sys
import glob
import joblib
import numpy as np
from time import *
import pandas as pd
from scipy.stats import pearsonr
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, mean_absolute_error
begin_time = time()
class Logger(object):
def __init__(self, filename="Default.log"):
self.terminal = sys.stdout
self.log = open(filename, "a")
def write(self, message):
self.terminal.write(message)
self.log.write(message)
def flush(self):
pass
path = os.path.abspath(os.path.dirname(__file__))
type = sys.getfilesystemencoding()
sys.stdout = Logger('./log/layer2_result.txt')
print(path)
print(os.path.dirname(__file__))
print('----------prediction results----------')
if __name__ == "__main__":
files = glob.glob(".\Results\Yield_model_ture_pred\*.csv") #Base models prediction valves
df = None
for f in files:
if df is None:
df = pd.read_csv(f)
else:
temp_df = pd.read_csv(f)
df = df.merge(temp_df, on=['Hybrid', 'Yield','kfold'], how='left')
df = df.groupby(['Hybrid', 'Yield', 'kfold']).agg('first').reset_index()
print(df)
df.to_csv("./data/train_set/New_Yield_values.csv", index=False) #Generate a new dataset-Mate dataset
def run_training(fold):
df = pd.read_csv("./data/train_set/New_Yield_values.csv")
df_train = df[df.kfold != fold].reset_index(drop=True)
df_valid = df[df.kfold == fold].reset_index(drop=True)
print(np.shape(df))
xtrain = df_train.iloc[:, 3:7]
xvalid = df_valid.iloc[:, 3:7]
# print(xtrain)
#print(xvalid)
ytrain = df_train.Yield.values
yvalid = df_valid.Yield.values
#print(ytrain)
#print(yvalid)
#clf = LinearRegression()
clf = linear_model.Ridge(alpha=0.0001, normalize=False)
clf.fit(xtrain, ytrain)
pred = clf.predict(xvalid)
test = pd.read_csv("./data/test_set/New_test_values.csv")
test_x = test.iloc[:, 1:5]
# print(test_x)
# 保存
model_save_path = './models/layer2model.pkl'
# 保存模型
joblib.dump(clf, model_save_path)
# 加载模型并预测
Ind_test = joblib.load(model_save_path)
test_pred = Ind_test.predict(test_x)
# 读取Pred_data.csv的前两列 "Hybrid","Env"
Hybrid = []
fr = open("./data/test_set/New_test_values.csv", 'r')
for line in fr.readlines():
# 删除引号
line = line.replace('"', '"')
d = line.split(',')
Hybrid.append((d[1], d[2]))
# 删除第一个
# Env.pop(0)
out_x = pd.DataFrame([Hybrid, test_pred])
out_x.T.to_csv("./Results/Metamodel_LARS_pred/fold{}_layer2_test_pred.csv".format(fold), index=False)
MSE = mean_squared_error(yvalid, pred)
RMSE = np.sqrt(mean_squared_error(yvalid, pred))
MAE = mean_absolute_error(yvalid, pred)
PCCs = pearsonr(yvalid, pred)
print(f"fold={fold}, MSE={MSE}")
print(f"fold={fold}, RMSE={RMSE}")
print(f"fold={fold}, MAE={MAE}")
print(f"fold={fold}, PCCs={PCCs}")
df_valid.loc[:, "Layer2_pred"] = pred
return df_valid[["Hybrid", "Yield", "kfold", "Layer2_pred"]]
if __name__ == "__main__":
dfs = []
for j in range(10):
temp_df = run_training(j)
dfs.append(temp_df)
fin_valid_df = pd.concat(dfs)
print(fin_valid_df.shape)
fin_valid_df.to_csv("./Results/layer2 ture result/LARS_pred.csv",index=False)
end_time = time()
run_time = end_time - begin_time
print("LARS program run time:" , run_time)