You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
{{ message }}
This repository was archived by the owner on Jan 13, 2020. It is now read-only.
## steps date interval
## 1 NA 2012-10-01 0
## 2 NA 2012-10-01 5
## 3 NA 2012-10-01 10
## 4 NA 2012-10-01 15
## 5 NA 2012-10-01 20
## 6 NA 2012-10-01 25
What is mean total number of steps taken per day?
# preprocessactivity_by_day<-activity[!is.na(activity$steps),]
activity_by_day$date<- as.Date(activity_by_day$date)
# plot sum of steps by date as histogramplot<- ggplot(data=activity_by_day, aes(x=date, y=steps, width=0.5, color="steps"))
plot<-plot+ stat_summary(fun.y=sum, geom="bar")
plot<-plot+ scale_x_date(labels= date_format("%Y-%m-%d"), breaks="1 week")
plot<-plot+ theme(axis.text.x= element_text(angle=90, hjust=1))
plot<-plot+ xlab("") + ylab("steps (sum)")
plot
# print mean and median for overall stepsactivity_steps_by_day<-activity_by_day %>% group_by(date) %>% summarize(sum= sum(steps))
mean_steps_per_day<- mean(activity_steps_by_day$sum)
median_steps_per_day<- median(activity_steps_by_day$sum)
print("Median and Mean stats for overall dataset")
## [1] "Median and Mean stats for overall dataset"
print(c("Mean", mean_steps_per_day))
## [1] "Mean" "10766.1886792453"
print(c("Median", median_steps_per_day))
## [1] "Median" "10765"
What is the average daily activity pattern?
# preprocessactivity_by_interval<-activity[!is.na(activity$steps),]
activity_by_interval$date<- as.Date(activity_by_day$date)
activity_by_interval<-activity_by_interval %>% group_by(interval)
# plot the average number of steps for each intervalplot<- ggplot(data=activity_by_interval, aes(x=interval, y=steps, width=0.5))
plot<-plot+ stat_summary(fun.y=mean, geom="line", aes(color="mean/5 min"))
plot<-plot+ theme(axis.text.x= element_text(angle=90, hjust=1))
plot<-plot+ xlab("") + ylab("steps (mean)")
plot
# find and filter by the highest average of stepsactivity_steps_max_by_interval<-activity_by_interval %>% group_by(interval) %>% summarize(max= max(mean(steps)))
highest<-activity_steps_max_by_interval[which.max(activity_steps_max_by_interval$max), c(1, 2)]
print("The 5-minute interval that, on average, contains the maximum number of steps")
## [1] "The 5-minute interval that, on average, contains the maximum number of steps"
print(highest)
## Source: local data frame [1 x 2]
##
## interval max
## (int) (dbl)
## 1 835 206.1698
Imputing missing values
# report the number of rows with NA from the original datasetactivity_incomplete<- is.na(activity$steps)
sum_activity_incomplete<- sum(activity_incomplete)
print(c("Number of records without a value: ", sum_activity_incomplete))
## [1] "Number of records without a value: "
## [2] "2304"
# create a new dataset where NA are replaced with the average of the overall 5 min intervals# for this create a dataframe which maps interval to the corresponding mean (inteval x mean)activity_by_interval_and_mean=activity_by_interval %>% group_by(interval) %>% summarize(mean= mean(steps))
# replace all NAs with the corresponding mean using previously computed results# (there is a "nicer" way doing a vectorized computing but somehow this was even more unreadable as the for loop)activity_imputed<-activityactivity_imputed$date<- as.Date(activity_imputed$date)
for (iin1:nrow(activity_imputed)) {
if (is.na(activity_imputed[i, ]$steps)) {
activity_imputed[i, ]$steps<-activity_by_interval_and_mean[activity_by_interval_and_mean$interval==activity_imputed[i, ]$interval, ]$mean
}
}
# plot sum of steps by date as histogramplot<- ggplot(data=activity_imputed, aes(x=date, y=steps, width=0.5, color="steps"))
plot<-plot+ stat_summary(fun.y=sum, geom="bar")
plot<-plot+ scale_x_date(labels= date_format("%Y-%m-%d"), breaks="1 week")
plot<-plot+ theme(axis.text.x= element_text(angle=90, hjust=1))
plot<-plot+ xlab("") + ylab("steps (sum)")
plot
# print mean and median for overall stepsactivity_imputed_by_day<-activity_imputed %>% group_by(date) %>% summarize(sum= sum(steps))
mean_steps_per_day_imputed<- mean(activity_imputed_by_day$sum)
median_steps_per_day_imputed<- median(activity_imputed_by_day$sum)
print("Median and Mean stats for overall dataset (imputed")
## [1] "Median and Mean stats for overall dataset (imputed"
print(c("Mean", mean_steps_per_day_imputed))
## [1] "Mean" "10766.1886792453"
print(c("Median", median_steps_per_day_imputed))
## [1] "Median" "10766.1886792453"
print("In contrast to unimposed data there are values available for all days now. Means and Medians are not really affected. That might be due to the overall low amout of missing data (2304 of 17,568)")
## [1] "In contrast to unimposed data there are values available for all days now. Means and Medians are not really affected. That might be due to the overall low amout of missing data (2304 of 17,568)"
Are there differences in activity patterns between weekdays and weekends?
# create a factor variable for whether the date is a weekday or weekendactivity_imputed_with_weekend_factor<-activity_imputed %>% mutate(weekday= weekdays(date)) %>% mutate(is_weekend= as.factor(ifelse (weekday%in% c("Saturday", "Sunday"), "Weekend", "Weekday"))) %>% select(-weekday)
# plot the average number of steps for each interval and weekday/weekendactivity_imputed_by_weekday<-activity_imputed_with_weekend_factor[which(activity_imputed_with_weekend_factor$is_weekend=="Weekday"), ]
activity_imputed_by_weekend<-activity_imputed_with_weekend_factor[which(activity_imputed_with_weekend_factor$is_weekend=="Weekend"), ]
# for better comparing the data I have choosen to draw them in one panelplot<- ggplot(data=activity_imputed_with_weekend_factor, aes(x=interval, y=steps, width=0.5))
plot<-plot+ stat_summary(data=activity_imputed_by_weekday, fun.y=mean, geom="line", aes(color="mean/weekday"))
plot<-plot+ stat_summary(data=activity_imputed_by_weekend, fun.y=mean, geom="line", aes(color="mean/weekend"))
plot<-plot+ theme(axis.text.x= element_text(angle=90, hjust=1))
plot<-plot+ xlab("") + ylab("steps (mean)")
plot