You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
importsysimportnumpyasnpfromsklearn.feature_selectionimportSelectPercentile, mutual_info_regression# 导入MI for regression包trainFile=sys.argv[1]
testFile=sys.argv[2]
train=np.loadtxt(trainFile, delimiter='\t') # 载入训练集test=np.loadtxt(testFile, delimiter='\t') # 载入测试集trX=train[:,1:]
trY=train[:,0]
teX=test[:,1:]
teY=test[:,0]
percentile=int(sys.argv[3]) # Percent of features to keepselector=SelectPercentile(mutual_info_regression, percentile=percentile) # 创建一个基于MI的SelectPercentile实例trX=selector.fit_transform(trX, trY)
teX=selector.transform(teX)
newTrainFile=sys.argv[4]
newTestFile=sys.argv[5]
np.savetxt(newTrainFile, np.hstack((trY.reshape(-1,1), trX)), fmt='%g', delimiter='\t') # 将Y与X以列组合后,保存到文件np.savetxt(newTestFile, np.hstack((teY.reshape(-1,1), teX)), fmt='%g', delimiter='\t')
print('%d features are selected.'%trX.shape[1])
print('New training set is saved into: %s\nNew test set is saved into: %s'% (newTrainFile, newTestFile))
importsysimportnumpyasnpfromsklearn.feature_selectionimportSelectFromModel# 导入SelectFromModel包fromsklearn.ensembleimportRandomForestClassifier# 导入RF包frompytictocimportTicToctrainFile=sys.argv[1]
testFile=sys.argv[2]
train=np.loadtxt(trainFile, delimiter=',') # 载入训练集test=np.loadtxt(testFile, delimiter=',') # 载入测试集trX=train[:,1:]
trY=train[:,0]
teX=test[:,1:]
teY=test[:,0]
n_features=sys.argv[3]
t=TicToc()
t.tic()
clf=RandomForestClassifier(max_depth=2, random_state=0, max_features=n_features) # 创建一个RF实例clf=clf.fit(trX, trY)
selector=SelectFromModel(clf, prefit=True) # 创建一个SelectFromModel实例trX=selector.transform(trX)
teX=selector.transform(teX)
print('Time cost in selecting fetures with Random-Forest: %gs'%t.tocvalue())
newTrainFile=sys.argv[4]
newTestFile=sys.argv[5]
np.savetxt(newTrainFile, np.hstack((trY.reshape(-1,1), trX)), fmt='%g', delimiter=',') # 将Y与X以列组合后,保存到文件np.savetxt(newTestFile, np.hstack((teY.reshape(-1,1), teX)), fmt='%g', delimiter=',')
print('%d features are selected.'%trX.shape[1])
print('New training set is saved into: %s\nNew test set is saved into: %s'% (newTrainFile, newTestFile))
# max_features (looking for the best split): set as 'auto'
$ python3 myRandomForest.py EI_train.txt EI_test.txt auto EI_train_rf.txt EI_test_rf.txt
# 以SVR建模预测:规格化、rbf核、10次交叉寻优
$ python3 ../lab_04/mySVC.py EI_train_rf.txt EI_test_rf.txt 1 rbf 1 10