run_analysis/run_analysis.R at master · yannbu/run_analysis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
run_anal2 <- function(x) {

        ## STEP 0 : Initialize Features names

        features_names <- read.table("UCI HAR Dataset/features.txt")

        col_names <-features_names[2]

        col_names <- as.vector(t(col_names))

        ## STEP 1 : Merges the training and the test sets to create one data set.

        test_x <- read.table("UCI HAR Dataset/Test/X_Test.txt")
        colnames(test_x) <- col_names

        train_x <- read.table("UCI HAR Dataset/train/X_train.txt")
        colnames(train_x) <- col_names

        merge_x <- rbind(test_x, train_x)

        ## STEP 2 : Extracts only the measurements on the mean and standard deviation for each measurement.

        merge_ok <- merge_x[,unique(c(grep("mean", names(merge_x)),grep("std", names(merge_x))))]

        to_keep <- unique(c(grep("mean", names(merge_x)),grep("std", names(merge_x))))

        to_remove <- grep("meanFreq", names(merge_x))

        to_remove_bool <- to_keep %in% to_remove

        merge_clean <- merge_ok[!to_remove_bool]

        ## STEP 2.5 : Merge the Y and subject together and then merge it with the measurements

        test_y <- read.table("UCI HAR Dataset/Test/Y_Test.txt")
        colnames(test_y)[1] <- "Activity"

        train_y <- read.table("UCI HAR Dataset/train/Y_train.txt")
        colnames(train_y)[1] <- "Activity"

        merge_y <- rbind(test_y, train_y)

        test_subject <- read.table("UCI HAR Dataset/Test/subject_test.txt")
        colnames(test_subject)[1] <- "Subject"

        train_subject <- read.table("UCI HAR Dataset/train/subject_train.txt")
        colnames(train_subject)[1] <- "Subject"

        merge_subject <- rbind(test_subject, train_subject)

        merge_global <- cbind(merge_clean, merge_y, merge_subject)

        # STEP 3 : Uses descriptive activity names to name the activities in the data set

        activities_match <- read.table("UCI HAR Dataset/activity_labels.txt")

        names(activities_match)[names(activities_match)=="V2"] <- "activity_explicit"

        merge_global <- merge(merge_global,activities_match, by.x="Activity",by.y="V1", all=TRUE)

        # STEP 4 : Appropriately labels the data set with descriptive variable names.

        names(merge_global) <- mapply(gsub, "[()-]","",names(merge_global))

        names(merge_global) <- mapply(tolower, names(merge_global))

        # STEP 5 : Creates a second, independent tidy data set with the average of each variable for each activity and each subject.

        library(plyr)

        data_summary <-ddply(merge_global, .(activity_explicit, subject),numcolwise(mean))

        write.table(data_summary, "data_summary.txt", row.name=FALSE)

}