forked from MariekeDirk/ML_project
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathAnalyze_Env.R
More file actions
59 lines (43 loc) · 1.85 KB
/
Analyze_Env.R
File metadata and controls
59 lines (43 loc) · 1.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
##############################################################################################################
# In this script the environmental data is analyzed.
# Distributions are analysed and plotted
# Correlations between variabeles are analyzed
##############################################################################################################
# Empty environment
rm(list=ls())
# Read in environmental data
load("/usr/people/kleingel/Projects/MLProject/Env_Data.Rda")
# Plot histograms of LAT/LON/ALT
# The altitude data is especially skewed
hist(Env_Data_4$LAT, main = "Histogram of Latitude")
hist(Env_Data_4$LON, main = "Histogram of Longitude")
hist(Env_Data_4$ALT, main = "Histogram of Altitude")
# Shapiro-Wilk test
# If the Shapiro test finds a p-value HIGHER than 0.05 then you have proven that the data is not normally
# distributed. However, if p < 0.05 you have not proven that the data is normally distributed.
# See research log for the Stackoverflow comments discussing this.
# ALT, LAT and LON all have p < 0.05
shapiro.test(Env_Data_4$LAT)
shapiro.test(Env_Data_4$LON)
shapiro.test(Env_Data_4$ALT)
# Q-Q plots
# The qqplots suggest that ALT is definately not normally distributed.
# LAT and LON are approximately normally distributed
qqnorm(Env_Data_4$LAT)
qqnorm(Env_Data_4$LON)
qqnorm(Env_Data_4$ALT)
# Determine the skewness of LAT/LON/ALT
# LAT is slightly right skewed
# LON is slightly left skewed
# ALT is right skewed
library(e1071)
skewness(Env_Data_4$LAT)
skewness(Env_Data_4$LON)
skewness(Env_Data_4$ALT)
# Determine kurtosis
# The LAT and LON are slightly flattened distributions
# ALT is strongly peaked (kurtosis is more than 26) (also called leptokurtic)
kurtosis(Env_Data_4$LAT)
kurtosis(Env_Data_4$LON)
kurtosis(Env_Data_4$ALT)
# You can conclude that the altitude has a positive skew = right skewed