-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscript.R
More file actions
194 lines (153 loc) · 5.93 KB
/
script.R
File metadata and controls
194 lines (153 loc) · 5.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#load required packages
library(tidyr)
library(readr)
library(kaggler)
library(dplyr)
library(ggplot2)
#set the API
response <- kgl_datasets_download_all(owner_dataset = "ashley93/openpowerlifting")
download.file(response[["url"]], "data/temp.zip", mode="wb")
unzip_result <- unzip("data/temp.zip", exdir = "data/", overwrite = TRUE)
powerlift_data <- read.csv("data/openpowerlifting.csv")
colnames(powerlift_data)
final_df <- powerlift_data %>%
select(Name, Sex, Age, Division, BodyweightKg, WeightClassKg, BestSquatKg,
BestBenchKg, BestDeadliftKg, TotalKg, Place) %>%
filter(!is.na(Name) & !is.na(Sex) & !is.na(Age) & !is.na(Division) &
!is.na(BodyweightKg) & !is.na(WeightClassKg) & !is.na(BestSquatKg) &
!is.na(BestBenchKg) & !is.na(BestDeadliftKg) & !is.na(TotalKg) &
!is.na(Place))
# Define age groups
final_df <- final_df %>%
mutate(AgeGroup = case_when(
Age < 20 ~ "Teen",
Age >= 20 & Age < 40 ~ "Open",
Age >= 40 & Age < 60 ~ "Master 40-59",
Age >= 60 ~ "Master 60+",
TRUE ~ "Unknown"
))
View(final_df)
#separate the male and female
#male
male_data <- final_df %>%
filter(final_df$Sex == "M")
#female
female_data <- final_df %>%
filter(final_df$Sex == "F")
#View(female_data)
#number of weight classes
weight_class_count <- final_df %>%
count(WeightClassKg)
# Create the side-by-side horizontal bar chart for comparing the male and female competitor
gender_weight_class_count <- final_df %>%
count(WeightClassKg, Sex)
ggplot(gender_weight_class_count, aes(x = reorder(WeightClassKg, n), y = n, fill = Sex)) +
geom_bar(stat = "identity", position = "dodge") + # Position bars side by side
coord_flip() + # Flip coordinates to make the bars horizontal
labs(x = "Weight Class (kg)", y = "Number of Competitors",
title = "Number of Competitors per Weight Class by Gender") +
theme_minimal() + # Use a minimal theme for cleaner appearance
theme(
axis.text.y = element_text(size = 10), # Adjust y-axis text size for better readability
plot.title = element_text(hjust = 0.5) # Center the plot title
)
#highest and lowest number of competitor for each weight class
# male
# Count the number of competitors for each weight class for males
male_weight_class_count <- male_data %>%
count(WeightClassKg) %>%
arrange(desc(n))
# Highest weight class for males
highest_male_weight_class <- male_weight_class_count %>%
arrange(desc(n)) %>%
slice(1)
# Lowest weight class for males
lowest_male_weight_class <- male_weight_class_count %>%
arrange(n) %>%
slice(1)
# View results
print(highest_male_weight_class)
print(lowest_male_weight_class)
# female
female_weight_class_count <- female_data %>%
count(WeightClassKg) %>%
arrange(desc(n))
# Highest weight class for males
highest_female_weight_class <- female_weight_class_count %>%
arrange(desc(n)) %>%
slice(1)
# Lowest weight class for males
lowest_female_weight_class <- female_weight_class_count %>%
arrange(n) %>%
slice(1)
# View results
print(highest_female_weight_class)
print(lowest_female_weight_class)
#wining weight to body weight class ratio
# Define the function
get_top_places <- function(df, division_input, weight_class_input) {
df %>%
filter(Division == division_input, WeightClassKg == weight_class_input) %>%
arrange(desc(TotalKg)) %>%
slice(1:3) %>%
mutate(Place = row_number()) %>% # Add place column
select(WeightClassKg, BestBenchKg, BestSquatKg, BestDeadliftKg, TotalKg, Place) %>%
arrange(Place) # Arrange by Place for a cleaner output
}
# Example usage
result <- get_top_places(male_data, "Amateur Junior (20-23)", 67.5)
print(result)
# Box plot for total weight lifted by male age group
ggplot(male_data, aes(x = AgeGroup, y = TotalKg, fill = AgeGroup)) +
geom_boxplot(outlier.colour = "black", outlier.size = 2) + # Box plot with outliers
stat_summary(
fun = mean, # Function to calculate mean
geom = "point", # Geometric object to use for mean values
color = "black", # Color of the mean points
size = 3, # Size of the mean points
shape = 18 # Shape of the mean points
) +
stat_summary(
fun = mean, # Function to calculate mean
geom = "text", # Geometric object to use for mean values
color = "black", # Color of the text labels
size = 3, # Size of the text labels
vjust = -1, # Vertical adjustment of the text labels
aes(label = round(..y.., 1)) # Label with rounded mean values
) +
labs(x = "Age Group", y = "Total Weight Lifted (kg)",
title = "Distribution of Total Weight Lifted by Male Age Group") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5) # Center the plot title
)
# box plot for total weight lifted my female age group
ggplot(female_data, aes(x = AgeGroup, y = TotalKg, fill = AgeGroup)) +
geom_boxplot(outlier.colour = "black", outlier.size = 2) + # Box plot with outliers
stat_summary(
fun = mean, # Function to calculate mean
geom = "point", # Geometric object to use for mean values
color = "black", # Color of the mean points
size = 3, # Size of the mean points
shape = 18 # Shape of the mean points
) +
stat_summary(
fun = mean, # Function to calculate mean
geom = "text", # Geometric object to use for mean values
color = "black", # Color of the text labels
size = 3, # Size of the text labels
vjust = -1, # Vertical adjustment of the text labels
aes(label = round(..y.., 1)) # Label with rounded mean values
) +
labs(x = "Age Group", y = "Total Weight Lifted (kg)",
title = "Distribution of Total Weight Lifted by Feale Age Group") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5) # Center the plot title
)
# ANOVA to test if there are significant differences in performance between age groups
anova_result <- aov(TotalKg ~ AgeGroup, data = male_data)
summary(anova_result)
# post-hoc test
tukey_result <- TukeyHSD(anova_result)
print(tukey_result)