-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_analysis.R
More file actions
120 lines (67 loc) · 3.05 KB
/
run_analysis.R
File metadata and controls
120 lines (67 loc) · 3.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
run_analysis <- function() {
library(plyr)
setwd ( "c:/temp/UCI HAR Dataset")
# Read Activity labels file
activity_labels_data <- read.csv ("activity_labels.txt" , sep = " ", header=FALSE)
# Assign columns names
colnames(activity_labels_data) <- c("ACTIVITY_ID", "ACTIVITY_NAME")
# Read Metrics or Features File
metrics_label_data <- read.csv ("features.txt" , sep = " ", header=FALSE)
# change directory to test
setwd("test")
# Read X_test. txt file that has metrics
test_data <- read.table("X_test.txt")
# Assign metrics Names to columns
colnames(test_data) <- metrics_label_data[,2]
#Read Subject file
subject_test_data <- read.table ("subject_test.txt")
# Assign column name
colnames(subject_test_data) <- c ("SUBJECT_ID")
# Read y_test.txt that contasins Activity
y_test_data <- read.table ("y_test.txt")
# Assign column name
colnames(y_test_data) <- c("ACTIVITY_ID")
# Combine Activity, Subject and rest of metrics together
test_data <- cbind ( y_test_data, test_data )
test_data <- cbind ( subject_test_data, test_data )
# Change directory to train
setwd("../train")
# Read train data
train_data <- read.table("X_train.txt")
#Asign column names
colnames(train_data) <- metrics_label_data[,2]
#Read Subject file
subject_train_data <- read.table ("subject_train.txt")
#Assign column name to subject_train_data
colnames(subject_train_data) <- c ("SUBJECT_ID")
#Read Activity File
y_train_data <- read.table ("y_train.txt")
#Assign column name
colnames(y_train_data) <- c("ACTIVITY_ID")
# Combine Activity, Subject and rest of metrics together
train_data <- cbind ( y_train_data, train_data )
train_data <- cbind ( subject_train_data, train_data )
# Combine Train and Test data
whole_data_set <- rbind ( train_data, test_data)
# Get Activity Name for each Activity_ID in the whole_data_set
data_set_ACTIVITY_NAME_RESOLVED = merge(whole_data_set,activity_labels_data, by.x="ACTIVITY_ID", by.y="ACTIVITY_ID", all=TRUE )
# column names having 'mean(' or 'std' or 'ACTIVITY' OR SUBJECT part of their names
mean_std_column_id <- grep("mean\\(|std|ACTIVITY|SUBJECT" , colnames( data_set_ACTIVITY_NAME_RESOLVED))
#Keep mean and std columns
subdata_set <- data_set_ACTIVITY_NAME_RESOLVED[,mean_std_column_id]
#get the mean of each metric per SUBJECT_ID, per ACTIVITY_ID
q <- aggregate(subdata_set , by=list(subdata_set$SUBJECT_ID,subdata_set$ACTIVITY_ID), FUN=mean)
#REmove columns
q$Group.1 <- NULL
q$Group.2 <- NULL
q$Group.3 <- NULL
#Resolve name of Activity
tidy_dataset = merge(q,activity_labels_data, by.x="ACTIVITY_ID", by.y="ACTIVITY_ID", all=TRUE )
#Remove duplicate column
tidy_dataset$ACTIVITY_NAME.x <- NULL
#Add 'AVG' as prefix to metrics
for ( i in 3:68 ) { names(tidy_dataset)[i] <- sub("^", "AVG_", names(tidy_dataset)[i]) }
names(tidy_dataset)[69] <- 'ACTIVITY_NAME'
#return data set
tidy_dataset
}