forked from MariekeDirk/ML_project
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDec_TreeR2.Rmd
More file actions
138 lines (83 loc) · 2.82 KB
/
Dec_TreeR2.Rmd
File metadata and controls
138 lines (83 loc) · 2.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
---
title: "Dec_Tree"
author: "Eva Kleingeld"
date: "December 22, 2016"
output: html_document
---
First clear your working environment and install all necessary packages
```{r}
rm(list=ls())
# Install packages
#install.packages("caret")
#install.packages("caret", dependencies = c("Imports", "Depends", "Suggests"))
```
Now load in the test and train sets and split them into target and predictor sets
```{r}
# Load train
load("/usr/people/kleingel/Projects/MLProject/Train_3D_noPCA.Rda")
# Load test
load("/usr/people/kleingel/Projects/MLProject/Test_3D_noPCA.Rda")
# Split train set into target and predictors
Target_Train <- Train_set$TRoad
Train_set <- subset(Train_set, select=-c(TRoad))
# Split test set into target and predictors
Target_Test <- Test_set$TRoad
Test_set <- subset(Test_set, select = -c(TRoad))
```
# Build a decision tree
```{r}
library(caret)
library(doParallel)
library(parallel)
library(rpart)
library(rattle)
cluster_1<-makeCluster(3)
registerDoParallel(cluster_1)
getDoParWorkers()
trainC <- trainControl(method = "repeatedcv", repeats = 5, number = 10, allowParallel = TRUE)
DecTree <- train(x = Train_set,
y = Target_Train,
method = "rpart",
trControl = trainC,
tuneLength = 10)
# Stop the cluster
stopCluster(cluster_1)
registerDoSEQ()
#Save the model
save(DecTree, file = "/usr/people/kleingel/Projects/MLProject/DecTree_noPCA.Rda")
```
# Analysis
```{r}
#Load the model
load("/usr/people/dirksen/Eva/DecTree_noPCA.Rda")
#Summary (If you plot this summary you will get a large amount of numbers printed to your screen, which won't be very interpretable. Therefore I commented this command.)
#summary(DecTree)
# Get the RMSE and R2 for each of the folds
DecTree$results
# Plot the decision tree
png("TreePlot_3D_NoPCA")
fancyRpartPlot(DecTree$finalModel, sub = "Three Days train set without PCA")
dev.off()
# Variable importance
DecTree_Imp <- varImp(DecTree)
#Predict
DecTree_Predict <- extractPrediction(models = list(DecTree), testX = Test_set, testY = Target_Test)
# THIS PLOT IS TOO COMPUTATIONALLY INTENSIVE
# Plot observed versus predicted and save to a png
#png("DecTree_ObsPred")
#plotObsVsPred(DecTree_Predict)
#dev.off()
# Plot the same manually (ggplot2)
png("DecTree_ObsPred_3D_NoPCA")
ggplot() + geom_point(aes(x = DecTree_Predict$obs, y = DecTree_Predict$pred)) +
xlab("Observed road temperature (K)") + ylab("Predicted road temperature (K)")
dev.off()
# Calculate the residuals for the train/test sets
DecTree_Residuals <- (DecTree_Predict$obs - DecTree_Predict$pred)
# Plot the residuals versus the observed values
png("DecTree_ResPlot_3D_noPCA")
plot(DecTree_Predict$obs,DecTree_Residuals,
ylab="Residuals", xlab="Observed road temperature")
abline(0, 0)
dev.off()
```