-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathKulas.R
More file actions
62 lines (47 loc) · 2.16 KB
/
Kulas.R
File metadata and controls
62 lines (47 loc) · 2.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
data <- read.csv("cbb.csv")
data$POSTSEASON[data$POSTSEASON=="Champions"] <- 6 # Changing qualitative outcome to numeric (how many rounds won)
data$POSTSEASON[data$POSTSEASON=="2ND"] <- 5
data$POSTSEASON[data$POSTSEASON=="F4"] <- 4
data$POSTSEASON[data$POSTSEASON=="E8"] <- 3
data$POSTSEASON[data$POSTSEASON=="S16"] <- 2
data$POSTSEASON[data$POSTSEASON=="R32"] <- 1
data$POSTSEASON[data$POSTSEASON=="R64"] <- 0
data$POSTSEASON <- as.numeric(data$POSTSEASON)
hist(data$POSTSEASON)
library(caret)
control <- trainControl(method="cv", number=10,verboseIter = TRUE)
metric <- "RMSE"
use <- data[complete.cases(data), ] # Problem with NAs (3/17/21)
# fit.svm <- train(POSTSEASON ~ ., data=use, method="lm", metric=metric, trControl=control)
## Too many predictors, need to trim down
# psych::describe(use)
use2 <- use[,-c(1,2,3,24)]
set.seed(32)
fit.lm <- train(POSTSEASON ~ ., data=use2, method="lm", metric=metric, trControl=control, preProcess = "knnImpute")
set.seed(56)
fit.wm <- train(POSTSEASON ~ ., data=use2, method="WM", metric=metric, trControl=control, preProcess = "knnImpute")
# fit.evtree <- train(POSTSEASON ~ ., data=use2, method="evtree", metric=metric, trControl=control)
# CART
set.seed(90)
fit.cart <- train(POSTSEASON~., data=use2, method="rpart", metric=metric, trControl=control, preProcess = "knnImpute")
# kNN
set.seed(12)
fit.knn <- train(POSTSEASON~., data=use2, method="knn", metric=metric, trControl=control, preProcess = "knnImpute")
# c) advanced algorithms
# SVM
set.seed(48)
fit.svm <- train(POSTSEASON~., data=use2, method="svmRadial", metric=metric, trControl=control, preProcess = "knnImpute")
# Random Forest
set.seed(87)
fit.rf <- train(POSTSEASON~., data=use2, method="rf", metric=metric, trControl=control, preProcess = "knnImpute")
# summarize accuracy of models
results <- resamples(list(cart=fit.cart, knn=fit.knn, svm=fit.svm, rf=fit.rf, lm=fit.lm, wm=fit.wm))
summary(results)
dotplot(results)
#results <- resamples(list(lm=fit.lm, wm=fit.wm))
#summary(results)
#dotplot(results)
validate <- read.csv("cbb22.csv")
validate <- validate[,-c(1,2,3)]
predictions <- predict(fit.lm, validate)
write.csv(predictions, "winner.csv")