HumanActivityRecognition/run_analysis.R at master · shahramjoon/HumanActivityRecognition · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
run_analysis <- function() {

  library(plyr)

  setwd ( "c:/temp/UCI HAR Dataset")

  # Read Activity labels file

  activity_labels_data <- read.csv ("activity_labels.txt" , sep = " ", header=FALSE)

  # Assign  columns names

  colnames(activity_labels_data) <- c("ACTIVITY_ID", "ACTIVITY_NAME")


  # Read Metrics or Features File

  metrics_label_data <- read.csv ("features.txt" , sep = " ", header=FALSE)


  # change directory to test
  setwd("test")

  # Read X_test. txt file that has metrics

  test_data <- read.table("X_test.txt")

  # Assign metrics Names to columns
  colnames(test_data) <- metrics_label_data[,2]


  #Read Subject file
  subject_test_data <- read.table ("subject_test.txt")

  # Assign column name
  colnames(subject_test_data) <- c ("SUBJECT_ID")


  # Read y_test.txt that contasins Activity
  y_test_data <- read.table ("y_test.txt")

  # Assign column name
  colnames(y_test_data) <- c("ACTIVITY_ID")

  # Combine Activity, Subject and rest of metrics together
  test_data <- cbind ( y_test_data, test_data )
  test_data <- cbind ( subject_test_data, test_data )


  # Change directory to train

  setwd("../train")

  # Read train data
  train_data <- read.table("X_train.txt")

  #Asign column names
  colnames(train_data) <- metrics_label_data[,2]

  #Read Subject file
  subject_train_data <- read.table ("subject_train.txt")

  #Assign column name to subject_train_data
  colnames(subject_train_data) <- c ("SUBJECT_ID")

  #Read Activity File
  y_train_data <- read.table ("y_train.txt")

  #Assign column name
  colnames(y_train_data) <- c("ACTIVITY_ID")

  # Combine Activity, Subject and rest of metrics together
  train_data <- cbind ( y_train_data, train_data )
  train_data <- cbind ( subject_train_data, train_data )

  # Combine Train and Test data
  whole_data_set <- rbind ( train_data, test_data)

  # Get Activity Name for each Activity_ID in the whole_data_set
  data_set_ACTIVITY_NAME_RESOLVED = merge(whole_data_set,activity_labels_data, by.x="ACTIVITY_ID", by.y="ACTIVITY_ID", all=TRUE )


  # column names having 'mean(' or 'std' or 'ACTIVITY' OR SUBJECT part of their names
  mean_std_column_id <- grep("mean\\(|std|ACTIVITY|SUBJECT" , colnames( data_set_ACTIVITY_NAME_RESOLVED))

  #Keep mean and std columns
  subdata_set <- data_set_ACTIVITY_NAME_RESOLVED[,mean_std_column_id]


  #get the mean of each metric per  SUBJECT_ID, per ACTIVITY_ID
  q <- aggregate(subdata_set , by=list(subdata_set$SUBJECT_ID,subdata_set$ACTIVITY_ID), FUN=mean)

  #REmove columns
  q$Group.1 <- NULL

  q$Group.2 <- NULL


  q$Group.3 <- NULL

  #Resolve name of Activity
  tidy_dataset = merge(q,activity_labels_data, by.x="ACTIVITY_ID", by.y="ACTIVITY_ID", all=TRUE )

  #Remove duplicate column
  tidy_dataset$ACTIVITY_NAME.x <- NULL


  #Add 'AVG' as prefix to metrics
  for ( i in 3:68 ) { names(tidy_dataset)[i] <- sub("^", "AVG_", names(tidy_dataset)[i]) }

  names(tidy_dataset)[69] <- 'ACTIVITY_NAME'


  #return data set
  tidy_dataset


}