@@ -56,7 +56,7 @@ This document presents the cleaning code and summary of the West Coast US Annual
5656
5757## Data cleaning in R
5858
59- ``` {r cleaning_code, code = readLines(here("./cleaning.codes/get.wcann .R")), eval = FALSE}
59+ ``` {r cleaning_code, code = readLines(here("./cleaning_codes/get_wcann .R")), eval = FALSE}
6060
6161```
6262
@@ -92,22 +92,22 @@ World_map <- rnaturalearth::ne_countries(scale = 'medium', returnclass = c("sf")
9292## 1. Overview of the survey data table
9393
9494``` {r head_survey, eval = T, echo = F}
95- kable(survey[1:5,1:7 ], format = "latex", booktabs = T) %>%
95+ kable(survey[1:5,1:6 ], format = "latex", booktabs = T) %>%
9696kable_styling(latex_options = c("striped","HOLD_position"))
9797
98- kable(survey[1:5,8 :15], format = "latex", booktabs = T) %>%
98+ kable(survey[1:5,7 :15], format = "latex", booktabs = T) %>%
9999kable_styling(latex_options = c("striped","HOLD_position"))
100100
101- kable(survey[1:5,16:21 ], format = "latex", booktabs = T) %>%
101+ kable(survey[1:5,16:23 ], format = "latex", booktabs = T) %>%
102102kable_styling(latex_options = c("striped","HOLD_position"))
103103
104- kable(survey[1:5,22:27 ], format = "latex", booktabs = T) %>%
104+ kable(survey[1:5,24:30 ], format = "latex", booktabs = T) %>%
105105kable_styling(latex_options = c("striped","HOLD_position"))
106106
107- kable(survey[1:5,28:32 ], format = "latex", booktabs = T) %>%
107+ kable(survey[1:5,31:35 ], format = "latex", booktabs = T) %>%
108108kable_styling(latex_options = c("striped","HOLD_position"))
109109
110- kable(survey[1:5,33:39 ], format = "latex", booktabs = T) %>%
110+ kable(survey[1:5,36:42 ], format = "latex", booktabs = T) %>%
111111kable_styling(latex_options = c("striped","HOLD_position"))
112112
113113
@@ -184,18 +184,18 @@ var_plot
184184
185185Here we display the yearly total and average across hauls of the following variables recorded in the data:
186186
187- - * num_cpue * , number of individuals (abundance) in $\frac{individuals}{km^2}$
188- - * num_h * , number of individuals (abundance) in $\frac{individuals}{h}$
187+ - * num_cpua * , number of individuals (abundance) in $\frac{individuals}{km^2}$
188+ - * num_cpue * , number of individuals (abundance) in $\frac{individuals}{h}$
189189- * num* , number of individuals (abundance)
190- - * wgt_cpue * , weight in $\frac{kg}{km^2}$
191- - * wgt_h * , weight in $\frac{kg}{h}$
190+ - * wgt_cpua * , weight in $\frac{kg}{km^2}$
191+ - * wgt_cpue * , weight in $\frac{kg}{h}$
192192- * wgt* , weight in ${kg}$
193193
194194``` {r summary_var_plot, eval = T, echo = F, message = F,warning = F}
195195
196196var_plot <- survey %>%
197197 group_by(year) %>%
198- summarise_at(vars(num:wgt_cpue ),
198+ summarise_at(vars(num:wgt_cpua ),
199199 funs(sum,mean),na.rm=T) %>%
200200 # head()
201201 gather("var","val",2:13) %>%
@@ -234,15 +234,15 @@ var_plot
234234
235235Here we show a yearly total distribution of the biomass data to visualize outliers:
236236
237- - * wgt * , total weight in ${kg}$ per haul and year per haul and year, if available in the survey data
238- - * num * , total number of individuals, if available in the survey data
237+ - * num_cpue * , number of individuals (abundance) in $\frac{individuals}{km^2}$
238+ - * wgt_cpue * , weight in $\frac{kg}{km^2}$
239239
240240``` {r extreme_biomass, eval = T, echo = F, message = F,warning = F}
241241
242- if(!is.na(mean(survey$num_cpue , na.rm=T)) & !is.na(mean(survey$wgt_cpue , na.rm=T))){
242+ if(!is.na(mean(survey$num_cpua , na.rm=T)) & !is.na(mean(survey$wgt_cpua , na.rm=T))){
243243 var_plot <- survey %>%
244244 group_by(year, haul_id) %>%
245- summarize(Weight = sum(wgt_cpue ), Abundance = sum(num_cpue )) %>%
245+ summarize(Weight = sum(wgt_cpua ), Abundance = sum(num_cpua )) %>%
246246 gather("var","val",3:4) %>%
247247 ggplot() +
248248 geom_boxplot(
@@ -259,10 +259,10 @@ if(!is.na(mean(survey$num_cpue, na.rm=T)) & !is.na(mean(survey$wgt_cpue, na.rm=T
259259 theme(axis.text.x = element_text(angle = 90))
260260}
261261
262- if(!is.na(mean(survey$num_cpue , na.rm=T)) & is.na(mean(survey$wgt_cpue , na.rm=T))){
262+ if(!is.na(mean(survey$num_cpua , na.rm=T)) & is.na(mean(survey$wgt_cpua , na.rm=T))){
263263var_plot <- survey %>%
264264 group_by(year, haul_id) %>%
265- summarize(Abundance = sum(num_cpue )) %>%
265+ summarize(Abundance = sum(num_cpua )) %>%
266266 # head()
267267 ggplot() +
268268 geom_boxplot(
@@ -278,10 +278,10 @@ var_plot <- survey %>%
278278 theme(axis.text.x = element_text(angle = 90))
279279}
280280
281- if(is.na(mean(survey$num_cpue , na.rm=T)) & !is.na(mean(survey$wgt_cpue , na.rm=T))){
281+ if(is.na(mean(survey$num_cpua , na.rm=T)) & !is.na(mean(survey$wgt_cpua , na.rm=T))){
282282var_plot <- survey %>%
283283 group_by(year, haul_id) %>%
284- summarize(Weight = sum(wgt_cpue )) %>%
284+ summarize(Weight = sum(wgt_cpua )) %>%
285285 # head()
286286 ggplot() +
287287 geom_boxplot(
@@ -297,7 +297,7 @@ var_plot <- survey %>%
297297 theme(axis.text.x = element_text(angle = 90))
298298}
299299var_plot
300- rm(var_plot)
300+
301301```
302302
303303
@@ -308,16 +308,17 @@ rm(var_plot)
308308Here we show the total abundance and number of taxa relationships with the area swept:
309309
310310- * nbr_taxa* , number of marine fish taxa after taxonomic data cleaning
311- - * num* , number of individuals, if available in the survey data
312- - * wgt* , weight in ${kg}$, if available in the survey data
311+ - * num_cpua* , number of individuals (abundance) in $\frac{individuals}{km^2}$
312+ - * wgt_cpua* , weight in $\frac{kg}{km^2}$
313+
313314
314315
315316``` {r summary_var_swept, eval = T, echo = F, message = F,warning = F}
316317
317- if(!is.na(mean(survey$num , na.rm=T)) & !is.na(mean(survey$wgt , na.rm=T))){
318+ if(!is.na(mean(survey$num_cpua , na.rm=T)) & !is.na(mean(survey$wgt_cpua , na.rm=T))){
318319 var_plot <- survey %>%
319320 group_by(haul_id, haul_dur, area_swept) %>%
320- summarize(Number_Taxa = length(accepted_name), Abundance = sum(num ),Weight = sum(wgt )) %>%
321+ summarize(Number_Taxa = length(accepted_name), Abundance = sum(num_cpua ),Weight = sum(wgt_cpua )) %>%
321322 gather("var","val",4:6) %>%
322323 # head()
323324 ggplot() +
@@ -327,10 +328,10 @@ if(!is.na(mean(survey$num, na.rm=T)) & !is.na(mean(survey$wgt, na.rm=T))){
327328 theme_bw()
328329}
329330
330- if(!is.na(mean(survey$num , na.rm=T)) & is.na( mean(survey$wgt , na.rm=T) )){
331+ if(!is.na(mean(survey$num_cpue , na.rm=T)) & mean(survey$wgt_cpue , na.rm=T)){
331332 var_plot <- survey %>%
332333 group_by(haul_id, haul_dur, area_swept) %>%
333- summarize(Number_Taxa = length(accepted_name), Abundance = sum(num )) %>%
334+ summarize(Number_Taxa = length(accepted_name), Abundance = sum(num_cpue )) %>%
334335 gather("var","val",4:5) %>%
335336 # head()
336337 ggplot() +
@@ -340,10 +341,10 @@ if(!is.na(mean(survey$num, na.rm=T)) & is.na(mean(survey$wgt, na.rm=T))){
340341 theme_bw()
341342}
342343
343- if(is.na(mean(survey$num , na.rm=T)) & !is.na(mean(survey$wgt , na.rm=T))){
344+ if(is.na(mean(survey$num_cpua , na.rm=T)) & !is.na(mean(survey$wgt_cpua , na.rm=T))){
344345 var_plot <- survey %>%
345346 group_by(haul_id, haul_dur, area_swept) %>%
346- summarize(Number_Taxa = length(accepted_name), Weight = sum(wgt )) %>%
347+ summarize(Number_Taxa = length(accepted_name), Weight = sum(wgt_cpua )) %>%
347348 gather("var","val",4:5) %>%
348349 # head()
349350 ggplot() +
@@ -354,7 +355,6 @@ if(is.na(mean(survey$num, na.rm=T)) & !is.na(mean(survey$wgt, na.rm=T))){
354355}
355356
356357var_plot
357-
358358```
359359
360360\clearpage
@@ -363,10 +363,10 @@ var_plot
363363
364364``` {r abundant_spp, eval=T, echo=F, message=F, warning=F}
365365
366- if(!is.na(mean(survey$wgt_cpue , na.rm=T))){
366+ if(!is.na(mean(survey$num_cpua , na.rm=T))){
367367spp <- survey %>%
368368 group_by(year, accepted_name) %>%
369- summarize(wgt = sum(wgt_cpue ), nbr_years = length(year)) %>%
369+ summarize(wgt = sum(wgt_cpua ), nbr_years = length(year)) %>%
370370 filter(nbr_years>10) %>%
371371 group_by(accepted_name) %>%
372372 summarize(wgt = median(wgt)) %>%
@@ -377,7 +377,7 @@ spp <- survey %>%
377377spp_plot <- survey %>%
378378 filter(accepted_name %in% spp) %>%
379379 group_by(year, accepted_name) %>%
380- summarize(wgt = sum(wgt_cpue , na.rm=T)) %>%
380+ summarize(wgt = sum(wgt_cpua , na.rm=T)) %>%
381381 ggplot() +
382382 geom_point( aes(x = year, y = wgt), size=0.5 ) +
383383 geom_line(aes(x = year,y = wgt), size=0.5) +
@@ -386,10 +386,10 @@ spp_plot <- survey %>%
386386 ylab("Species Weight (kg)") + xlab("Year")
387387}
388388
389- if(is.na(mean(survey$wgt_cpue , na.rm=T))){
389+ if(is.na(mean(survey$wgt_cpua , na.rm=T))){
390390 spp <- survey %>%
391391 group_by(year, accepted_name) %>%
392- summarize(num = sum(num_cpue ), nbr_years = length(year)) %>%
392+ summarize(num = sum(num_cpua ), nbr_years = length(year)) %>%
393393 filter(nbr_years>10) %>%
394394 group_by(accepted_name) %>%
395395 summarize(num = median(num)) %>%
@@ -400,7 +400,7 @@ if(is.na(mean(survey$wgt_cpue, na.rm=T))){
400400spp_plot <- survey %>%
401401 filter(accepted_name %in% spp) %>%
402402 group_by(year, accepted_name) %>%
403- summarize(num = sum(num_cpue , na.rm=T)) %>%
403+ summarize(num = sum(num_cpua , na.rm=T)) %>%
404404 ggplot() +
405405 geom_point( aes(x = year, y = num), size=0.5 ) +
406406 geom_line(aes(x = year,y = num), size=0.5) +
@@ -410,7 +410,6 @@ spp_plot <- survey %>%
410410}
411411
412412spp_plot
413-
414413```
415414
416415\clearpage
@@ -420,8 +419,6 @@ spp_plot
420419Map of the sampling distribution in space. Note that we only show one year per coordinate.
421420
422421``` {r fixed_point_map, eval = T, echo = F, fig.width=10, fig.height= 5, message = F,warning = F}
423-
424- # Fixed map
425422survey %>%
426423 select(longitude,latitude) %>%
427424 distinct() %>%
@@ -441,7 +438,7 @@ survey %>%
441438
442439```
443440
444-
441+ \clearpage
445442
446443## 9. Taxonomic flagging
447444
@@ -450,16 +447,18 @@ This species flagging method was adapted from https://github.com/pinskylab/Ocean
450447Visualization of flagged taxa
451448
452449``` {r, echo=FALSE, out.width = '80%'}
453- knitr::include_graphics(here::here("standardization_steps ", "outputs", "taxonomic_flagging", paste0(survey$survey[1],"_taxonomic_flagging.png")))
450+ knitr::include_graphics(here::here("outputs ", "Flags", "taxonomic_flagging", paste0(survey$survey[1],"_taxonomic_flagging.png")))
454451```
455452
456453Statistics related to the taxonomic flagging outputs
457454
458455``` {r, echo=FALSE}
459- df <- read.csv(here::here("standardization_steps ", "outputs", "taxonomic_flagging", paste0(survey$survey[1],'_stats.csv')))
456+ df <- read.csv(here::here("outputs ", "Flags", "taxonomic_flagging", paste0(survey$survey[1],'_stats.csv')))
460457knitr::kable(df, col.names = NULL)
461458```
462459
460+ \clearpage
461+
463462## 10. Spatio-temporal standardization
464463
465464### a. Standardization method 1
@@ -471,32 +470,32 @@ It was run for hex resolution 7 and 8.
471470Plot of number of cells x years with overlaid flagging options
472471
473472``` {r, echo=FALSE, out.width = '80%'}
474- knitr::include_graphics(here::here("standardization_steps ", "outputs", "trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_plot.png")))
473+ knitr::include_graphics(here::here("outputs ", "Flags", "trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_plot.png")))
475474```
476475``` {r, echo=FALSE, out.width = '80%'}
477- knitr::include_graphics(here::here("standardization_steps ", "outputs", "trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_plot.png")))
476+ knitr::include_graphics(here::here("outputs ", "Flags", "trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_plot.png")))
478477```
479478
480479Map of hauls retained and removed per flagging method and threshold
481480
482481``` {r, echo=FALSE, out.width = '100%'}
483- knitr::include_graphics(here::here("standardization_steps ", "outputs", "trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_map_per_haul.png")))
482+ knitr::include_graphics(here::here("outputs ", "Flags", "trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_map_per_haul.png")))
484483```
485484
486485``` {r, echo=FALSE, out.width = '100%'}
487- knitr::include_graphics(here::here("standardization_steps ", "outputs ", "trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_map_per_haul.png")))
486+ knitr::include_graphics(here::here("outputs ", "Flags ", "trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_map_per_haul.png")))
488487```
489488
490489
491490Map of numbers of years removed per grid cell and flagging method/threshold
492491
493492``` {r, echo=FALSE, out.width = '100%'}
494- knitr::include_graphics(here::here("standardization_steps ", "outputs", "trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_map_per_grid_nyears.png")))
493+ knitr::include_graphics(here::here("outputs ", "Flags", "trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_map_per_grid_nyears.png")))
495494```
496495
497496
498497``` {r, echo=FALSE, out.width = '100%'}
499- knitr::include_graphics(here::here("standardization_steps ", "outputs", "trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_map_per_grid_nyears.png")))
498+ knitr::include_graphics(here::here("outputs ", "Flags", "trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_map_per_grid_nyears.png")))
500499```
501500
502501
@@ -507,7 +506,7 @@ This standardization method was adapted from BioTIME code from https://github.co
507506Map of hauls retained and removed
508507
509508``` {r, echo=FALSE, out.width = '100%'}
510- knitr::include_graphics(here::here("standardization_steps ", "outputs", "trimming_method2",
509+ knitr::include_graphics(here::here("outputs ", "Flags", "trimming_method2",
511510 paste0(survey$survey[1],"_map_per_haul.png")))
512511```
513512
@@ -516,9 +515,9 @@ knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming
516515Statistics of hauls removed for each standardization method
517516
518517``` {r, echo=FALSE}
519- met1_7 <- read.csv(here::here("standardization_steps ", "outputs", "trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_stats_hauls.csv")))
520- met1_8 <- read.csv(here::here("standardization_steps ", "outputs", "trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_stats_hauls.csv")))
521- met2 <- read.csv(here::here("standardization_steps ", "outputs ", "trimming_method2",
518+ met1_7 <- read.csv(here::here("outputs ", "Flags", "trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_stats_hauls.csv")))
519+ met1_8 <- read.csv(here::here("outputs ", "Flags", "trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_stats_hauls.csv")))
520+ met2 <- read.csv(here::here("outputs ", "Flags ", "trimming_method2",
522521 paste0(survey$survey[1],"_stats_hauls.csv")))
523522knitr::kable(cbind(met1_7, met1_8[,2:3], met2[,2]),
524523 col.names = c("summary", "grid cell 7, 0% threshold", "grid cell 7, 2% threshold",
0 commit comments