-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmod_ave_funcs.R
More file actions
128 lines (92 loc) · 3.89 KB
/
mod_ave_funcs.R
File metadata and controls
128 lines (92 loc) · 3.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# this script contians functions for model averaging and comparison of methods
#function. Takes data, data type, vector of model averaging methods, and covariate columns to use. Returns a list of RMSE, predictions,weights, and run time for each method.
all_methods_MA<-function(data_type,dat,methods,covs){
n_methods<-length(methods) # number of methods
methods_vec<-numeric(n_methods) # vector that will go in list below
names(methods_vec)=methods
n_test_data<-nrow(dat)/2 #number of data points in the test data
n_mods<-2^length(covs) # number of models being averaged
#list to hold outputs from model averaging methods: weights, predictions, rmse, and run time
compiled_outputs<-list(
rmse=methods_vec,
weights=matrix(NA,nrow=n_mods,ncol=n_methods,dimnames = list(1:n_mods,methods)),
ma_preds=matrix(NA,nrow=n_test_data,ncol=n_methods,dimnames = list(1:n_test_data,methods)),
time=methods_vec
)
for(i in 1:n_methods){
ma_out<-model_average_function(method= methods[i],data_in = dat,covs=covs,data_type = data_type)
compiled_outputs$rmse[i] <- ma_out$rmse
compiled_outputs$weights[,i] <- ma_out$weights
compiled_outputs$ma_preds[,i] <- ma_out$wtpreds
compiled_outputs$time[i] <- ma_out$run_time
}
return(compiled_outputs)
}
# The functions take as inputs training and testing data, and return 1) model weights, 2) model predictions to test data, 3) RMSE for test data
# methods include: CP, minimum variance, bottstrapping, AIC, BIC, leave one out cross validation, jackknife, and stacking
model_average_function<-function(
method, #model averaing method
data_in, #data
covs, # vector of columns of covariates to include
data_type # for flm function "family" argument. e.g., "binomial" or "gaussian"
){
#training data
train<-data_in[1:(nrow(data_in)/2),c(1,covs)]
#testing response
test_y<-data_in[((nrow(data_in)/2)+1):(nrow(data_in)),1]
#testing covariate values
test_covs<-data_in[((nrow(data_in)/2)+1):(nrow(data_in)),covs]
start_time<-Sys.time()
## fit global model
fm1 <- glm(y~., data=train, na.action = "na.fail", family=data_type,x=TRUE)
#get all possible models
dfm1<-dredge(fm1)
#get all of the model output
fm1.mods.all<-get.models(dfm1, subset=TRUE)
#calculate weights
#information criterion
if(method%in%c("AIC","BIC","Cp","loo")){
#Get the information criterion value
if(method=="loo"){
mod_criterion_value<-sapply(fm1.mods.all, method,type="rmse")
}else{
mod_criterion_value<-sapply(fm1.mods.all, method)
}
# Calculate the weights for each model based on Cp
weights<-exp(-0.5*(mod_criterion_value-min(mod_criterion_value)))/sum(exp(-0.5*(mod_criterion_value-min(mod_criterion_value))))
}else
#Bates-granger/minimal variance weights
if(method=="minimum_variance"){
weights<-(BGWeights(fm1.mods.all, data=train))#some are negative
}else
if(method=="bootstrap"){
set.seed(2020)
weights<-(bootWeights(fm1.mods.all, data=train,R=1000))
}else
if(method=="stacking"){
weights<-(stackingWeights(fm1.mods.all, data=train, R=1000))[1,]
}else
if(method=="jackknife"){
weights<-(jackknifeWeights(fm1.mods.all, data=train))
}else
if(method=="cos_squared"){
weights<-(cos2Weights(fm1.mods.all, data=train))
}
#predict for all models
preds_all_mods <-sapply(fm1.mods.all, predict, newdata = test_covs,family=data_type, type = "response")
#weigh the models by the BG/MV weights
wtpreds<-preds_all_mods%*%weights
#RMSE
rmse<-sqrt(mean((wtpreds-test_y )^2))
#time it took to run
total_time<-Sys.time()-start_time
#return
return(list(
weights=weights,
wtpreds=wtpreds,
rmse=rmse,
run_time=total_time,
method=method,
true_test=test_y
))
}