diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/.DS_Store differ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5b6a065 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata diff --git a/Class 7 Instructions.Rmd b/Class 7 Instructions.Rmd index 5ae641a..4ba7520 100644 --- a/Class 7 Instructions.Rmd +++ b/Class 7 Instructions.Rmd @@ -1,3 +1,4 @@ + --- title: "Assignment 3" author: "Charles Lang" @@ -18,7 +19,7 @@ library(tidyr, dplyr) ##Upload wide format instructor data (instructor_activity_wide.csv) ```{r} -data_wide <- read.table("~/Documents/NYU/EDCT2550/Assignments/Assignment 3/instructor_activity_wide.csv", sep = ",", header = TRUE) +data_wide <- read.table("~/class7/instructor_activity_wide.csv", sep = ",", header = TRUE) #Now view the data you have uploaded and notice how its structure: each variable is a date and each row is a type of measure. View(data_wide) @@ -59,7 +60,9 @@ instructor_data <- spread(data_long, variables, measure) ##Now we have a workable instructor data set!The next step is to create a workable student data set. Upload the data "student_activity.csv". View your file once you have uploaded it and then draw on a piece of paper the structure that you want before you attempt to code it. Write the code you use in the chunk below. (Hint: you can do it in one step) ```{r} - +data_student <- read.table("~/class7/student_activity.csv", sep = ",", header = TRUE) +student_data <- spread(data_student, variable, measure) +View(student_data) ``` ##Now that you have workable student data set, subset it to create a data set that only includes data from the second class. @@ -75,7 +78,7 @@ student_data_2 <- dplyr::filter(student_data, date == 20160204) Now subset the student_activity data frame to create a data frame that only includes students who have sat at table 4. Write your code in the following chunk: ```{r} - +student_data_3 <- dplyr::filter(student_data, table == 4) ``` ##Make a new variable @@ -89,7 +92,7 @@ instructor_data <- dplyr::mutate(instructor_data, total_sleep = s_deep + s_light Now, refering to the cheat sheet, create a data frame called "instructor_sleep" that contains ONLY the total_sleep variable. Write your code in the following code chunk: ```{r} - +instructor_sleep <- dplyr::select(instructor_data, total_sleep) ``` Now, we can combine several commands together to create a new variable that contains a grouping. The following code creates a weekly grouping variable called "week" in the instructor data set: @@ -100,7 +103,7 @@ instructor_data <- dplyr::mutate(instructor_data, week = dplyr::ntile(date, 3)) Create the same variables for the student data frame, write your code in the code chunk below: ```{r} - +student_data <- dplyr::mutate(student_data, week = dplyr::ntile(date, 3)) ``` ##Sumaraizing @@ -117,7 +120,8 @@ student_data %>% dplyr::group_by(date) %>% dplyr::summarise(mean(motivation)) Create two new data sets using this method. One that sumarizes average motivation for students for each week (student_week) and another than sumarizes "m_active_time" for the instructor per week (instructor_week). Write your code in the following chunk: ```{r} - +student_week <- student_data %>% dplyr::group_by(week) %>% dplyr::summarise(mean(motivation)) +instructor_week <- instructor_data %>% dplyr::group_by(week) %>% dplyr::summarise(mean(m_active_time)) ``` ##Merging @@ -130,8 +134,33 @@ merge <- dplyr::full_join(instructor_week, student_week, "week") ##Visualize Visualize the relationship between these two variables (mean motivation and mean instructor activity) with the "plot" command and then run a Pearson correlation test (hint: cor.test()). Write the code for the these commands below: + + ```{r} +z <- dplyr::select(merge, -week) +plot(z) + +a <- dplyr::select(z, 1) +aa <-t(a) + +b <- dplyr::select(z, 2) +bb <-t(b) +cor.test(aa,bb) ``` +##visualization result + +image: ![](Rplot.png) + +Pearson's product-moment correlation + +data: aa and bb +## t = -2.0734, df = 1, p-value = 0.2861 (not significant correlation) +alternative hypothesis: true correlation is not equal to 0 +sample estimates: + cor +-0.9007108 + + Fnally save your markdown document and your plot to this folder and comit, push and pull your repo to submit. diff --git a/Rplot.png b/Rplot.png new file mode 100644 index 0000000..7cdc359 Binary files /dev/null and b/Rplot.png differ diff --git a/class7script.R b/class7script.R new file mode 100644 index 0000000..1bebd64 --- /dev/null +++ b/class7script.R @@ -0,0 +1,49 @@ +data_wide <- read.table("~/class7/instructor_activity_wide.csv", sep = ",", header = TRUE) + +#Now view the data you have uploaded and notice how its structure: each variable is a date and each row is a type of measure. +View(data_wide) + +data_long <- gather(data_wide, date, variables) +#Rename the variables so we don't get confused about what is what! +names(data_long) <- c("variables", "date", "measure") +#Take a look at your new data, looks weird huh? +View(data_long) + +instructor_data <- spread(data_long, variables, measure) + +data_student <- read.table("~/class7/student_activity.csv", sep = ",", header = TRUE) +student_data <- spread(data_student, variable, measure) +View(student_data) + +student_data_2 <- dplyr::filter(student_data, date == 20160204) + +student_data_3 <- dplyr::filter(student_data, table == 4) + +instructor_data <- dplyr::mutate(instructor_data, total_sleep = s_deep + s_light) + +instructor_sleep <- dplyr::select(instructor_data, total_sleep) + +instructor_data <- dplyr::mutate(instructor_data, week = dplyr::ntile(date, 3)) + +student_data <- dplyr::mutate(student_data, week = dplyr::ntile(date, 3)) + +student_data %>% dplyr::summarise(mean(motivation)) + +#That isn't super interesting, so let's break it down by week: + +student_data %>% dplyr::group_by(date) %>% dplyr::summarise(mean(motivation)) + +student_week <- student_data %>% dplyr::group_by(week) %>% dplyr::summarise(mean(motivation)) +instructor_week <- instructor_data %>% dplyr::group_by(week) %>% dplyr::summarise(mean(m_active_time)) + +merge <- dplyr::full_join(instructor_week, student_week, "week") + +z <- dplyr::select(merge, -week) +plot(z) + +a <- dplyr::select(z, 1) +aa <-t(a) + +b <- dplyr::select(z, 2) +bb <-t(b) +cor.test(aa,bb)