-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_analysis.R
More file actions
84 lines (56 loc) · 2.38 KB
/
run_analysis.R
File metadata and controls
84 lines (56 loc) · 2.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# Script for analysing data from a wearable device
# assumes that data is present in the current directory (under subfolders)
library(dplyr)
# read the descriptive data
features <- read.table("UCI HAR Dataset\\features.txt")
activity_labels <- read.table("UCI HAR Dataset\\activity_labels.txt")
####
# read the data sets, both training and test
####
subject_train <- read.table("UCI HAR Dataset\\train\\subject_train.txt", header = FALSE)
activity_train <- read.table("UCI HAR Dataset\\train\\y_train.txt", header = FALSE)
features_train <- read.table("UCI HAR Dataset\\train\\x_train.txt", header = FALSE)
subject_test <- read.table("UCI HAR Dataset\\test\\subject_test.txt", header = FALSE)
activity_test <- read.table("UCI HAR Dataset\\test\\y_test.txt", header = FALSE)
features_test <- read.table("UCI HAR Dataset\\test\\x_test.txt", header = FALSE)
####
# merge the data together
####
subject_complete <- rbind(subject_train, subject_test)
activity_complete <- rbind(activity_train, activity_test)
features_complete <- rbind(features_train, features_test)
####
# set the column names
####
# set the column names for the features data set, this is given by the second column in the file
# features.txt
colnames(features_complete) <- t(features[2])
# then gives column names to the activity and subject data
colnames(activity_complete) <- "Activity"
colnames(subject_complete) <- "Subject"
####
# join the data together
####
har_dataset = cbind(subject_complete, activity_complete, features_complete)
####
# extract mean and standard deviation measurements
####
cols_for_mean_and_std <- grep(".*-mean|.*-std", names(har_dataset))
# subject and activity are cols 1 and 2
har_mean_and_std_dataset <- har_dataset[,c(1,2,cols_for_mean_and_std)]
####
# change numbers denoting activity with the label (they are 1 to 6)
####
har_mean_and_std_dataset$Activity <- as.character(har_mean_and_std_dataset$Activity)
for (i in 1:6){
har_mean_and_std_dataset$Activity[har_mean_and_std_dataset$Activity == i] <- as.character(activity_labels[i,2])
}
####
# aggregrate the data by subject and activity
####
final_output <- har_mean_and_std_dataset %>% group_by(Subject,Activity) %>% summarize_each(funs(mean(., na.rm=TRUE)))
####
# write out the tidy data set
####
write.table(har_mean_and_std_dataset, "har_mean_and_std_dataset.txt", row.names = FALSE)
write.table(final_output, "final_output.txt", row.names = FALSE)