ML_project/Neuralnet_R2.Rmd at master · KNMI-DataLab/ML_project · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
---
title: "Neuralnet_R2"
author: "Eva Kleingeld"
date: "December 19, 2016"
output: html_document
---


First clear your working environment and install all necessary packages

```{r}
rm(list=ls())

# Install packages
#install.packages("caret")
#install.packages("caret", dependencies = c("Imports", "Depends", "Suggests"))


```

Now load in the test and train sets and split them into target and predictor sets

```{r}
library(caret)
# Load train
load("/usr/people/kleingel/Projects/MLProject/Train_3D_noPCA.Rda")

# Load test
load("/usr/people/kleingel/Projects/MLProject/Test_3D_noPCA.Rda")

# # Reduce the train set to 1% of its original size
# # 1% equals..
# The_One_Percent <- createDataPartition(Train_set$TRoad, p = 0.01, list = FALSE)
# One_Percent_Train <- Train_set[The_One_Percent, ]
#
# # Split into Train_set and Target_Train
# Target_Train <- One_Percent_Train$TRoad
# Train_set <- subset(One_Percent_Train, select=-c(TRoad))

# DOESN'T WORK. neuralnet somehow thinks you are doing classification if you center scale the target var.
# Test the effect of centering and scaling the target variable
#xTrans <- preProcess(as.data.frame(Target_Train), method = c("center", "scale"), na.remove = TRUE)
#Target_Train <- as.vector(predict(xTrans, as.data.frame(Target_Train)))


# Split train set into target and predictors
Target_Train <- Train_set$TRoad
Train_set <- subset(Train_set, select=-c(TRoad))

# Split test set into target and predictors
Target_Test <- Test_set$TRoad
Test_set <- subset(Test_set, select = -c(TRoad))


# Remove all the predictors which don't say much about TW
Train_set <- Train_set[, 1:7]

Test_set <- Test_set[, 1:7]


# Remove TD, which is highly correlated with TL
cor(Train_set$TL, Train_set$TD)
Train_set <- subset(Train_set, select=-c(TD))

cor(Test_set$TL, Test_set$TD)
Test_set <- subset(Test_set, select=-c(TD))

```

# Build a neural network with neuralnet

Here I build a neural network with the neuralnet package and caret

```{r}
library(caret)
library(doParallel)
library(parallel)
library(neuralnet)
library(doMC)

#cluster_1<-makeCluster(3)
#registerDoParallel(cluster_1)

registerDoMC(cores = 4)
getDoParWorkers()

NN_trainC <- trainControl(method = "cv", number = 10, allowParallel = TRUE, returnData = FALSE)

NeuralNetw <- train(x  = Train_set,
                     y = Target_Train,
                     method = "neuralnet",
                     trControl = NN_trainC,
                     tuneLength = 3)

                    #  hidden = c(6),
                    #  trControl = NN_trainC,
                    # savePredictions = "none")
# ,
#                      linear.output = TRUE,
#                      lifesign = 'minimal',
#                      trControl = NN_trainC
#                     )

stopCluster(cluster_1)
registerDoSEQ()


summary(NeuralNetw)

print(NeuralNetw$results)


# Test if any NA

for(i in seq_along(colnames(Train_set))){
  print(i)
  print(any(is.na(Train_set[, i])))
  print(summary(Train_set[, i]))
}


any(is.na(Test_set))


```