From a30efec25f89e94ec81e961b2190a3ce9c9ca5f9 Mon Sep 17 00:00:00 2001 From: erosquesada Date: Wed, 15 Oct 2025 14:32:39 +0200 Subject: [PATCH 01/30] With dplyr code --- R/createTableOfRDBESIds.r | 161 +++++++++++++++++++++++++++----------- 1 file changed, 116 insertions(+), 45 deletions(-) diff --git a/R/createTableOfRDBESIds.r b/R/createTableOfRDBESIds.r index 4431d356..5e02fbd7 100644 --- a/R/createTableOfRDBESIds.r +++ b/R/createTableOfRDBESIds.r @@ -1,3 +1,4 @@ + #' Create a table of RDBES Ids #' #' examples for now see @@ -18,51 +19,121 @@ #' myTableOfIds<- createTableOfRDBESIds(myH1RawObject) #' } -createTableOfRDBESIds<-function(x, addSAseqNums=TRUE) -{ - -# note: needs developments for different lower hierarchies - -# x is RDBESobj -# hierarchy is hierarchy (integer) -# outputs a table with ids for matching - - -CStableNames<- getTablesInRDBESHierarchy(hierarchy = x$DE$DEhierarchy[1], - includeOptTables = FALSE, - includeLowHierTables = TRUE, - includeTablesNotInSampHier = FALSE) - -for (i in 1:(length(CStableNames)-1)) -{ -id_1<-paste0(CStableNames[i],"id") -id_2<-paste0(CStableNames[i+1],"id") -if (i==1) df_1<-data.frame(x[[CStableNames[i]]][,list(get(id_1))]); colnames(df_1)<-id_1 -if((CStableNames[i+1] == "SA" & addSAseqNums == TRUE) | CStableNames[i+1] %in% c("BV")){ - if(CStableNames[i+1]=="SA") {df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2), get("SAseqNum"), get("SAparSequNum"))]); colnames(df_2)<-c(id_1,id_2,"SAseqNum","SAparSequNum")} - if(CStableNames[i+1]=="BV") {df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2), get("BVfishId"))]); colnames(df_2)<-c(id_1,id_2,"BVfishId")} - } else { - df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2))]); colnames(df_2)<-c(id_1,id_2) - } - -if (i==1) out<-merge(df_1,df_2, all.x=T) else out<-merge(out, df_2, all.x=T) - -#colnames(out)<-c(id_1,id_2) - -} -# reorders -if(addSAseqNums==TRUE){ - out<-out[,c(paste0(CStableNames,"id"),"BVfishId","SAseqNum","SAparSequNum")] - } else { - out<-out[,c(paste0(CStableNames,"id"),"BVfishId")] - - - } -out +createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ + + # note: needs developments for different lower hierarchies + + # x is RDBESobj + # hierarchy is hierarchy (integer) + # outputs a table with ids for matching + + + CStableNames<- getTablesInRDBESHierarchy(hierarchy = x$DE$DEhierarchy[1], + includeOptTables = FALSE, + includeLowHierTables = TRUE, + includeTablesNotInSampHier = FALSE) + + for (i in 1:(length(CStableNames)-1)){ + cat("Processing", CStableNames[i], "table. \n") + cat("Merging", CStableNames[i], " with ", CStableNames[i+1], " tables. \n") + id_1<-paste0(CStableNames[i],"id") + id_2<-paste0(CStableNames[i+1],"id") + + if(i==1){ + cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, " from ", CStableNames[i+1], "table. \n") + df_1<-data.frame(x[[CStableNames[i]]][,list(get(id_1))]); colnames(df_1)<-id_1 + } + + if((CStableNames[i+1] == "SA" & addSAseqNums == TRUE) | CStableNames[i+1] %in% c("BV")){ + + if(CStableNames[i+1]=="SA"){ + + cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, "SAseqNum, and SAparSequNum from ", CStableNames[i+1], "table. \n") + df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2), get("SAseqNum"), get("SAparSequNum"))]); colnames(df_2)<-c(id_1,id_2,"SAseqNum","SAparSequNum") + + } + + if(CStableNames[i+1]=="BV"){ + + cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, "and BVfishId from ", CStableNames[i+1], "table. \n") + df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2), get("BVfishId"))]); colnames(df_2)<-c(id_1,id_2,"BVfishId") + # We also need a reference reporting the SAid, for when the lower hierarchy is C, see below. + df_2C<-data.frame(rdbesobj[[CStableNames[i+1]]][,list(get("SAid"), get(id_1), get(id_2))]); colnames(df_2C)<-c("SAid", id_1,id_2) + + } + + }else{ + + cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, " from ", CStableNames[i+1], "table. \n") + df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2))]); colnames(df_2)<-c(id_1,id_2) + + } + + if (i==1){ + + out<-merge(df_1,df_2, all.x=T) else out<-merge(out, df_2, all.x=T) + + }else{ + + if(CStableNames[i+1]=="BV"){ + + # The lower hierarchy (A:D) implies that FM is used or not. + # Due to this, we need to do a conditional merging by either FMid (where present) or SAid. + outTmp = merge(out, rdbesobj$SA[,c("SAid","SAlowHierarchy")]) + + # Convert to data.table if not already + setDT(outTmp) + setDT(out) + setDT(df_2) + setDT(df_2C) + + # Filter and get SAid groups + keepA <- outTmp[SAlowHierarchy == "A", SAid] + keepB <- outTmp[SAlowHierarchy == "B", SAid] + keepC <- outTmp[SAlowHierarchy == "C", SAid] + keepD <- outTmp[SAlowHierarchy == "D", SAid] + + # Split 'out' accordingly + toMergeA <- out[SAid %in% keepA] + toMergeB <- out[SAid %in% keepB] + toMergeC <- out[SAid %in% keepC] + toMergeD <- out[SAid %in% keepD] + + # Conditional merges + mergedA <- df_2[toMergeA, on = "FMid"] # left join equivalent (all.x = TRUE) + mergedB <- df_2[toMergeB, on = "FMid"] # same for group B + mergedC <- df_2C[toMergeC, on = "SAid", nomatch = 0][, .SD, .SDcols = c(names(toMergeC), "BVid")] + mergedD <- toMergeD # unchanged group D + + # Combine back + out <- rbindlist(list(mergedA, mergedB, mergedC, mergedD), use.names = TRUE, fill = TRUE) + + }else{ + + out<-merge(out, df_2, all.x=T) + + } + + } + } + + # reorders + if(addSAseqNums==TRUE){ + + out<-out[,c(paste0(CStableNames,"id"),"BVfishId","SAseqNum","SAparSequNum")] + + } else { + + out<-out[,c(paste0(CStableNames,"id"),"BVfishId")] + + } + + out + } # e.g., - ## default adds "SAseqNum","SAparSequNum" - #head(createTableOfRDBESIds(x = RDBESprepObj)) - ## if addSAseqNums is set to FALSE, "SAseqNum" and "SAparSequNum" are not added to output - # head(createTableOfRDBESIds(x = RDBESprepObj, addSAseqNums=FALSE)) +## default adds "SAseqNum","SAparSequNum" +#head(createTableOfRDBESIds(x = RDBESprepObj)) +## if addSAseqNums is set to FALSE, "SAseqNum" and "SAparSequNum" are not added to output +# head(createTableOfRDBESIds(x = RDBESprepObj, addSAseqNums=FALSE)) From f9069e270c34c49fb9512ec187c748c0c9c2dc21 Mon Sep 17 00:00:00 2001 From: erosquesada Date: Wed, 15 Oct 2025 14:50:33 +0200 Subject: [PATCH 02/30] Fixed with data.table code, to check --- R/createTableOfRDBESIds.r | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/R/createTableOfRDBESIds.r b/R/createTableOfRDBESIds.r index 5e02fbd7..47e2c837 100644 --- a/R/createTableOfRDBESIds.r +++ b/R/createTableOfRDBESIds.r @@ -71,7 +71,7 @@ createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ if (i==1){ - out<-merge(df_1,df_2, all.x=T) else out<-merge(out, df_2, all.x=T) + out<-merge(df_1,df_2, all.x=T) }else{ @@ -109,15 +109,17 @@ createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ out <- rbindlist(list(mergedA, mergedB, mergedC, mergedD), use.names = TRUE, fill = TRUE) }else{ - + out<-merge(out, df_2, all.x=T) } } } - - # reorders + + out <- as.data.frame(out) + + # reorders if(addSAseqNums==TRUE){ out<-out[,c(paste0(CStableNames,"id"),"BVfishId","SAseqNum","SAparSequNum")] @@ -129,11 +131,9 @@ createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ } out - + } # e.g., -## default adds "SAseqNum","SAparSequNum" -#head(createTableOfRDBESIds(x = RDBESprepObj)) -## if addSAseqNums is set to FALSE, "SAseqNum" and "SAparSequNum" are not added to output -# head(createTableOfRDBESIds(x = RDBESprepObj, addSAseqNums=FALSE)) +# check <- createTableOfRDBESIds(rdbesobj, addSAseqNums = T) +# check From c26a9690601718944a0673caaa23e4cf11e0f411 Mon Sep 17 00:00:00 2001 From: erosquesada Date: Wed, 15 Oct 2025 14:51:07 +0200 Subject: [PATCH 03/30] Fixed with data.table code, to check on #237 --- R/createTableOfRDBESIds.r | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/createTableOfRDBESIds.r b/R/createTableOfRDBESIds.r index 47e2c837..2f169e57 100644 --- a/R/createTableOfRDBESIds.r +++ b/R/createTableOfRDBESIds.r @@ -18,7 +18,7 @@ #' #' myTableOfIds<- createTableOfRDBESIds(myH1RawObject) #' } - + createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ # note: needs developments for different lower hierarchies From 3f757e628f0b72df9c65510ed6e1188b593a92d6 Mon Sep 17 00:00:00 2001 From: nmprista <31890653+nmprista@users.noreply.github.com> Date: Wed, 15 Oct 2025 15:31:22 +0200 Subject: [PATCH 04/30] ISrecordType was SLrecordType ISrecordType value updated --- .../aux_TextBookExamples/IS_base.rds | Bin 212 -> 211 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/data-raw/exampleData/TextBookExamples/aux_TextBookExamples/IS_base.rds b/data-raw/exampleData/TextBookExamples/aux_TextBookExamples/IS_base.rds index 0e31f5c55f65887b94b409112156a62172eedd32..bc9453bd28575e36d954330c6969bb06c11e5e1d 100644 GIT binary patch literal 211 zcmV;^04)C>iwFP!000002CYv`3&JoA&E|S=IGG6k3+L6NAP7Bgf=6qY!JuoU9jG^d zw})xe3c9m_myeg^h3sMg7(m1V7%)nKZHvu(N!;|+07wY|L@=Zn@|^tm$*gXRJ-n?~ zN8#7T(3(@GMT3rt6(T5(YL!&H_OdjgZJ)-75sROV1s z7AqoW9-T+}{`R#n;<-g>Y*}26ANdK-rD+1;ki4atkJDa*+0!b5B^Oa Nx(5s>D;b#q007T?UvB^a literal 212 zcmV;_04x6=iwFP!000002CYv`3xYrp9Ze5Kh7$BIvTMg62!w7LbZlEk5LQ=qh3eMt z?qH3&LV0fB&BvRWH_Ukk026Q=3nnh5;GUAhdQ050YXJBJ3mlkIwBsZBMa%47n+x}W zktxiGTp2V5e*bN^Fppw|T&gsARD*mF#atEzDxNcuu)8cx!;4~FA Date: Wed, 15 Oct 2025 15:32:48 +0200 Subject: [PATCH 05/30] replacing with x the object --- R/createTableOfRDBESIds.r | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/createTableOfRDBESIds.r b/R/createTableOfRDBESIds.r index 2f169e57..16c3d644 100644 --- a/R/createTableOfRDBESIds.r +++ b/R/createTableOfRDBESIds.r @@ -23,7 +23,7 @@ createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ # note: needs developments for different lower hierarchies - # x is RDBESobj + # x is x # hierarchy is hierarchy (integer) # outputs a table with ids for matching @@ -58,7 +58,7 @@ createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, "and BVfishId from ", CStableNames[i+1], "table. \n") df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2), get("BVfishId"))]); colnames(df_2)<-c(id_1,id_2,"BVfishId") # We also need a reference reporting the SAid, for when the lower hierarchy is C, see below. - df_2C<-data.frame(rdbesobj[[CStableNames[i+1]]][,list(get("SAid"), get(id_1), get(id_2))]); colnames(df_2C)<-c("SAid", id_1,id_2) + df_2C<-data.frame(x[[CStableNames[i+1]]][,list(get("SAid"), get(id_1), get(id_2))]); colnames(df_2C)<-c("SAid", id_1,id_2) } @@ -79,7 +79,7 @@ createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ # The lower hierarchy (A:D) implies that FM is used or not. # Due to this, we need to do a conditional merging by either FMid (where present) or SAid. - outTmp = merge(out, rdbesobj$SA[,c("SAid","SAlowHierarchy")]) + outTmp = merge(out, x$SA[,c("SAid","SAlowHierarchy")]) # Convert to data.table if not already setDT(outTmp) @@ -135,5 +135,5 @@ createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ } # e.g., -# check <- createTableOfRDBESIds(rdbesobj, addSAseqNums = T) +# check <- createTableOfRDBESIds(x, addSAseqNums = T) # check From 124ea7506be86d95398ece6ff069c186380885d9 Mon Sep 17 00:00:00 2001 From: Kasia-MIR <60775640+Kasia-MIR@users.noreply.github.com> Date: Wed, 15 Oct 2025 15:42:40 +0200 Subject: [PATCH 06/30] required library --- R/createTableOfRDBESIds.r | 84 +++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 43 deletions(-) diff --git a/R/createTableOfRDBESIds.r b/R/createTableOfRDBESIds.r index 16c3d644..a2c42265 100644 --- a/R/createTableOfRDBESIds.r +++ b/R/createTableOfRDBESIds.r @@ -18,102 +18,104 @@ #' #' myTableOfIds<- createTableOfRDBESIds(myH1RawObject) #' } - + createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ - + # note: needs developments for different lower hierarchies - + # x is x # hierarchy is hierarchy (integer) # outputs a table with ids for matching - - + + #libraries + require(data.table) + CStableNames<- getTablesInRDBESHierarchy(hierarchy = x$DE$DEhierarchy[1], includeOptTables = FALSE, includeLowHierTables = TRUE, includeTablesNotInSampHier = FALSE) - + for (i in 1:(length(CStableNames)-1)){ cat("Processing", CStableNames[i], "table. \n") cat("Merging", CStableNames[i], " with ", CStableNames[i+1], " tables. \n") id_1<-paste0(CStableNames[i],"id") id_2<-paste0(CStableNames[i+1],"id") - + if(i==1){ cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, " from ", CStableNames[i+1], "table. \n") df_1<-data.frame(x[[CStableNames[i]]][,list(get(id_1))]); colnames(df_1)<-id_1 } - + if((CStableNames[i+1] == "SA" & addSAseqNums == TRUE) | CStableNames[i+1] %in% c("BV")){ - + if(CStableNames[i+1]=="SA"){ - + cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, "SAseqNum, and SAparSequNum from ", CStableNames[i+1], "table. \n") df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2), get("SAseqNum"), get("SAparSequNum"))]); colnames(df_2)<-c(id_1,id_2,"SAseqNum","SAparSequNum") - + } - + if(CStableNames[i+1]=="BV"){ - + cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, "and BVfishId from ", CStableNames[i+1], "table. \n") df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2), get("BVfishId"))]); colnames(df_2)<-c(id_1,id_2,"BVfishId") - # We also need a reference reporting the SAid, for when the lower hierarchy is C, see below. + # We also need a reference reporting the SAid, for when the lower hierarchy is C, see below. df_2C<-data.frame(x[[CStableNames[i+1]]][,list(get("SAid"), get(id_1), get(id_2))]); colnames(df_2C)<-c("SAid", id_1,id_2) - + } - + }else{ - + cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, " from ", CStableNames[i+1], "table. \n") df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2))]); colnames(df_2)<-c(id_1,id_2) - + } - + if (i==1){ - - out<-merge(df_1,df_2, all.x=T) - + + out<-merge(df_1,df_2, all.x=T) + }else{ - + if(CStableNames[i+1]=="BV"){ - - # The lower hierarchy (A:D) implies that FM is used or not. - # Due to this, we need to do a conditional merging by either FMid (where present) or SAid. + + # The lower hierarchy (A:D) implies that FM is used or not. + # Due to this, we need to do a conditional merging by either FMid (where present) or SAid. outTmp = merge(out, x$SA[,c("SAid","SAlowHierarchy")]) - + # Convert to data.table if not already setDT(outTmp) setDT(out) setDT(df_2) setDT(df_2C) - + # Filter and get SAid groups keepA <- outTmp[SAlowHierarchy == "A", SAid] keepB <- outTmp[SAlowHierarchy == "B", SAid] keepC <- outTmp[SAlowHierarchy == "C", SAid] keepD <- outTmp[SAlowHierarchy == "D", SAid] - + # Split 'out' accordingly toMergeA <- out[SAid %in% keepA] toMergeB <- out[SAid %in% keepB] toMergeC <- out[SAid %in% keepC] toMergeD <- out[SAid %in% keepD] - + # Conditional merges mergedA <- df_2[toMergeA, on = "FMid"] # left join equivalent (all.x = TRUE) mergedB <- df_2[toMergeB, on = "FMid"] # same for group B mergedC <- df_2C[toMergeC, on = "SAid", nomatch = 0][, .SD, .SDcols = c(names(toMergeC), "BVid")] mergedD <- toMergeD # unchanged group D - + # Combine back out <- rbindlist(list(mergedA, mergedB, mergedC, mergedD), use.names = TRUE, fill = TRUE) - + }else{ out<-merge(out, df_2, all.x=T) - + } - + } } @@ -121,19 +123,15 @@ createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ # reorders if(addSAseqNums==TRUE){ - + out<-out[,c(paste0(CStableNames,"id"),"BVfishId","SAseqNum","SAparSequNum")] - + } else { - + out<-out[,c(paste0(CStableNames,"id"),"BVfishId")] - + } - + out } - -# e.g., -# check <- createTableOfRDBESIds(x, addSAseqNums = T) -# check From 566ac671ea54012779cb944bb6b02ad4d62ff494 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 15 Oct 2025 14:43:41 +0000 Subject: [PATCH 07/30] Initial plan From 2092d0c3610b316460766328651c9e0dc3448429 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 15 Oct 2025 14:49:18 +0000 Subject: [PATCH 08/30] Add warning for combining objects from different hierarchies Co-authored-by: rix133 <6965560+rix133@users.noreply.github.com> --- R/combineRDBESDataObjects.R | 33 +++++++++++++++++++ tests/testthat/test-combineRDBESDataObjects.R | 26 +++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/R/combineRDBESDataObjects.R b/R/combineRDBESDataObjects.R index e58831f6..865171e2 100644 --- a/R/combineRDBESDataObjects.R +++ b/R/combineRDBESDataObjects.R @@ -9,6 +9,13 @@ #' @param strict (Optional) This function validates its input data - should #' the validation be strict? The default is TRUE. #' +#' @details +#' When combining RDBESDataObjects from different hierarchies (e.g., H1 and H5), +#' a warning is issued. The resulting combined object will have a mixed hierarchy, +#' which may be structurally and statistically invalid for some analyses. However, +#' such combinations can be useful for fisheries overviews, annual reports, or +#' countries performing broader estimations. +#' #' @return the combination of \code{RDBESDataObject1} and \code{RDBESDataObject2} #' @seealso \link[data.table]{rbindlist} #' @export @@ -31,6 +38,32 @@ combineRDBESDataObjects <- function(RDBESDataObject1, validateRDBESDataObject(RDBESDataObject1, verbose = verbose, strict = strict) validateRDBESDataObject(RDBESDataObject2, verbose = verbose, strict = strict) + + # Check for multiple hierarchies + hierarchy1 <- NULL + hierarchy2 <- NULL + + if (!is.null(RDBESDataObject1$DE) && nrow(RDBESDataObject1$DE) > 0) { + hierarchy1 <- unique(RDBESDataObject1$DE$DEhierarchy) + } + + if (!is.null(RDBESDataObject2$DE) && nrow(RDBESDataObject2$DE) > 0) { + hierarchy2 <- unique(RDBESDataObject2$DE$DEhierarchy) + } + + # Warn if combining different hierarchies + if (!is.null(hierarchy1) && !is.null(hierarchy2) && + length(hierarchy1) > 0 && length(hierarchy2) > 0) { + if (!all(hierarchy1 %in% hierarchy2) || !all(hierarchy2 %in% hierarchy1)) { + warning("Combining RDBESDataObjects from different hierarchies (", + paste(hierarchy1, collapse = ", "), " and ", + paste(hierarchy2, collapse = ", "), + "). This creates a mixed hierarchy object that may be structurally ", + "and statistically invalid for some analyses.", + call. = FALSE) + } + } + # Create an empty RDBESDataObject as the basis of what we will return myRDBESDataObject <- createRDBESDataObject() diff --git a/tests/testthat/test-combineRDBESDataObjects.R b/tests/testthat/test-combineRDBESDataObjects.R index 92425576..2c7c533a 100644 --- a/tests/testthat/test-combineRDBESDataObjects.R +++ b/tests/testthat/test-combineRDBESDataObjects.R @@ -40,4 +40,30 @@ test_that("combineRDBESDataObjects returns valid RDBESDataObject when supplied expect_error(validateRDBESDataObject(myCombinedObject), NA) }) +test_that("combineRDBESDataObjects warns when combining objects from different hierarchies", { + + myObject1 <- importRDBESDataCSV(rdbesExtractPath = "./h1_v_20250211") + myObject2 <- importRDBESDataCSV(rdbesExtractPath = "./h5_v_20250211") + + # Check these are valid objects before we try and combine them + expect_error(validateRDBESDataObject(myObject1), NA) + expect_error(validateRDBESDataObject(myObject2), NA) + + # Expect a warning about different hierarchies + expect_warning(combineRDBESDataObjects(RDBESDataObject1=myObject1, + RDBESDataObject2=myObject2), + "Combining RDBESDataObjects from different hierarchies") +}) + +test_that("combineRDBESDataObjects does not warn when combining objects from same hierarchy", { + + myObject1 <- importRDBESDataCSV(rdbesExtractPath = "./h1_v_20250211") + myObject2 <- importRDBESDataCSV(rdbesExtractPath = "./h1_v_20250211") + + # Expect no warning about different hierarchies (but will have duplicate rows error later) + expect_warning(combineRDBESDataObjects(RDBESDataObject1=myObject1, + RDBESDataObject2=myObject2), + NA) +}) + }) ## end capture.output From af7946d10e5837ea5f1277db3315596fa8076ca6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 15 Oct 2025 14:51:49 +0000 Subject: [PATCH 09/30] Add comprehensive tests for hierarchy warning edge cases Co-authored-by: rix133 <6965560+rix133@users.noreply.github.com> --- tests/testthat/test-combineRDBESDataObjects.R | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/testthat/test-combineRDBESDataObjects.R b/tests/testthat/test-combineRDBESDataObjects.R index 2c7c533a..5cf38035 100644 --- a/tests/testthat/test-combineRDBESDataObjects.R +++ b/tests/testthat/test-combineRDBESDataObjects.R @@ -66,4 +66,15 @@ test_that("combineRDBESDataObjects does not warn when combining objects from sam NA) }) +test_that("combineRDBESDataObjects does not warn when one object has no DE table", { + + myObject1 <- importRDBESDataCSV(rdbesExtractPath = "./h1_v_20250211") + myObject2 <- createRDBESDataObject() # Empty object with no DE + + # Expect no warning because one object has no hierarchy + expect_warning(combineRDBESDataObjects(RDBESDataObject1=myObject1, + RDBESDataObject2=myObject2), + NA) +}) + }) ## end capture.output From b92ce853d100b34e0389fbb09c99ce1776cfc1a3 Mon Sep 17 00:00:00 2001 From: Richard Meitern Date: Wed, 15 Oct 2025 18:10:15 +0300 Subject: [PATCH 10/30] Add branch to pull request trigger in pkgdown.yaml Only when main updates --- .github/workflows/pkgdown.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index bfc9f4db..3de2a835 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -4,6 +4,7 @@ on: push: branches: [main, master] pull_request: + branches: [ main ] release: types: [published] workflow_dispatch: From 644c8ca3df45665d15a7a62e9081f48f3abf4376 Mon Sep 17 00:00:00 2001 From: nmprista <31890653+nmprista@users.noreply.github.com> Date: Wed, 15 Oct 2025 17:21:00 +0200 Subject: [PATCH 11/30] updates to new version of RDBES --- ...ookDataUpload_Pckg_SDAResources_agsrs_H1.R | 53 ++++++++++----- ...kDataUpload_Pckg_SDAResources_agstrat_H1.R | 64 +++++++++++-------- ...kDataUpload_Pckg_SDAResources_algebra_H1.R | 10 ++- 3 files changed, 81 insertions(+), 46 deletions(-) diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_agsrs_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_agsrs_H1.R index f464573f..c627b825 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_agsrs_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_agsrs_H1.R @@ -38,6 +38,7 @@ baseDir <- "" VD_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/VD_base.rds")) SL_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/SL_base.rds")) + IS_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/IS_base.rds")) #nameof the directory where the outputs are saved currently @@ -234,6 +235,7 @@ VS_df$VSnumberTotal<-3078 # 21 FTarrivalLocation [M] - Harbour_LOCODE # 22 FTarrivalDate [M] - Date # 23 FTarrivalTime [M/O] - StringLength60 +# FTdominantLandingDate # 24 FTnumberTotal [DV,O] - int # 25 FTnumberSampled [DV,O] - int # 26 FTselectionProb [DV,O] - Decimal0-1 @@ -277,6 +279,7 @@ FT_df <- data.frame( FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE FTarrivalDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date FTarrivalTime="", #[O] - time + FTdominantLandingDate= "", FTnumberTotal= 1, #[DV,O] - int FTnumberSampled=1, #[DV,O] - int FTselectionProb=1, #[DV,O] - DecimalPrec10 @@ -320,6 +323,7 @@ FT_df <- data.frame( # 16 FOendDate [M] - Date # 17 FOendTime [M/O] - Time # 18 FOduration [M/O] - int +# FOfishingDurationDataBasis # 19 FOdurationSource [M] - DurationSource # 20 FOhandlingTime [O] - int # 21 FOstartLat [O] - Decimal-90.000000-90.000000 @@ -387,6 +391,7 @@ FO_df <- data.frame( FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M FOendTime ="",#M FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! + FOfishingDurationDataBasis = "Measured", FOdurationSource = "Crew", FOhandlingTime = "", FOstartLat="", # ATT! @@ -399,12 +404,16 @@ FO_df <- data.frame( FOfisheriesManagementUnit = "", FOgsaSubarea = "NotApplicable", #M FOjurisdictionArea = "", + FOgeographicalDataBasis = "Measured", + FOgeographicalSource = "", FOfishingDepth = "", FOwaterDepth = "", FOnationalFishingActivity = "", FOmetier5 = "", FOmetier6 = "OTT_CRU_70-89_2_35",#M FOgear = "OTT",#M + FOgearDataBasis = "Measured", + FOgearSource = "", FOmeshSize = "", FOselectionDevice = "", FOselectionDeviceMeshSize = "", @@ -500,8 +509,10 @@ SS_df<-data.frame( SSspeciesListName = project_name_outputs, #M SSuseForCalculateZero = "N", #M SStimeTotal = "", + SStimeTotalDataBasis = "Measured", SStimeSampled = "", SSnumberTotal = 1, + SSnumberTotalDataBasis = "Measured", SSnumberSampled = 1, SSselectionProb = 1, SSinclusionProb = 1, @@ -604,10 +615,14 @@ SA_df<-data.frame( SAfisheriesManagementUnit = "", SAgsaSubarea = "NotApplicable", #M SAjurisdictionArea = "", + SAgeographicalDataBasis = "Measured", + SAgeographicalSource = "", SAnationalFishingActivity = "", SAmetier5 = "", SAmetier6 = "", SAgear = "", + SAgearDataBasis = "Measured", + SAgearSource = "", SAmeshSize = "", SAselectionDevice = "", SAselectionDeviceMeshSize = "", @@ -628,6 +643,7 @@ SA_df<-data.frame( SAreasonNotSampledFM = "", SAreasonNotSampledBV = "", SAtotalWeightMeasured = dataset[[target_var]], + SAtotalWeightMeasuredDataBasis = "Measured", SAsampleWeightMeasured = dataset[[target_var]], SAconversionFactorMeasLive = 1, SAauxiliaryVariableTotal = "", @@ -687,15 +703,14 @@ RDBESlist[[i]][which(grepl(colnames(RDBESlist[[i]]),pat="[A-Z]id"))]<-NULL #===Save============ - dir_outputs<-paste0(base_dir_outputs,"/", - project_name_outputs,"/") - dir.create(dir_outputs, recursive=T, showWarnings=FALSE) - filename_output_CS <- paste0("H1.csv") - filename_output_SL <- paste0("HSL.csv") - filename_output_VD <- paste0("HVD.csv") + dir_outputs<-paste0(base_dir_outputs,"/", project_name_outputs,"/") + dir.create(dir_outputs, recursive=T, showWarnings=FALSE) + filename_output_CS <- paste0(dir_outputs,"H1.csv") + filename_output_SL <- paste0(dir_outputs,"HSL.csv") + filename_output_VD <- paste0(dir_outputs,"HVD.csv") -lapply(RDBESlist, function(x, filename1 = paste0(dir_outputs,filename_output_CS)){ +lapply(RDBESlist, function(x, filename1 = filename_output_CS){ if("DErecordType" %in% colnames(x)){ write.table(x, file = filename1, append = FALSE, quote = FALSE, sep = ",", eol = "\n", na = "NA", dec = ".", row.names = FALSE, @@ -707,7 +722,7 @@ write.table(x, file = filename1, append = TRUE, quote = FALSE, sep = ",", } }) -b<-read.table(file=paste0(dir_outputs,filename_output_CS), header=F, sep=";") +b<-read.table(file = filename_output_CS, header=F, sep=";") b<-cbind(key,b) b<-b[order(as.character(b$key), decreasing=FALSE),] b<-b[!is.na(key),] @@ -715,24 +730,30 @@ b$key<-NULL b$V1<-as.character(b$V1) # saves CS output -write.table(b$V1, file=paste0(dir_outputs,filename_output_CS), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +write.table(b$V1, file = filename_output_CS, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") -# -----Builds and saves dummySL----------------- +# -----Builds and saves dummySL and dummyIS----------------- SL_base$SLspeciesListName<-project_name_outputs SL_base$SLyear<-DEyear SL_base$SLinstitute<-SDinstitution -# saves SL output - write.table(SL_base, file=paste0(dir_outputs,filename_output_SL), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - + + # saves SL output + write.table(rbind(paste(SL_base, collapse=","), paste(IS_base, collapse=",")), file=filename_output_SL, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") # -----Builds and saves dummyVD----------------- - # saves VD output VD_base$VDyear<-DEyear - write.table(VD_base, file=paste0(dir_outputs,filename_output_VD), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + write.table(VD_base, file=filename_output_VD, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + + + + + +# check - is this needed? +if(1==2){ @@ -753,5 +774,5 @@ write.csv(tmp, file=paste0(dir_outputs,"tmp/SpeciesList.csv"), quote=F, row.name zip::zip(zipfile = paste0(project_name_outputs,".zip"), files =paste0(dir_outputs,"tmp/",dir(paste0(dir_outputs,"tmp"))), mode="cherry-pick") unlink(paste0(dir_outputs,"tmp"), force=T, recursive = T) - +} diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_agstrat_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_agstrat_H1.R index 64872c3a..a15781af 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_agstrat_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_agstrat_H1.R @@ -32,7 +32,7 @@ DEstratumName <- "Pckg_SDAResources_agstrat_H1" project_name_outputs <- gsub(" ","_", paste0(DEsamplingScheme,"_", DEstratumName)) baseDir <- "./data-raw/exampleData/TextBookExamples/" - #baseDir <- "" + baseDir <- "" VD_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/VD_base.rds")) SL_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/SL_base.rds")) IS_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/IS_base.rds")) @@ -231,6 +231,7 @@ VS_df$VSnumberTotal<-strataSize[VS_df$VSstratumName] # 21 FTarrivalLocation [M] - Harbour_LOCODE # 22 FTarrivalDate [M] - Date # 23 FTarrivalTime [M/O] - StringLength60 +# FTdominantLandingDate # 24 FTnumberTotal [DV,O] - int # 25 FTnumberSampled [DV,O] - int # 26 FTselectionProb [DV,O] - Decimal0-1 @@ -274,6 +275,7 @@ FT_df <- data.frame( FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE FTarrivalDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date FTarrivalTime="", #[O] - time + FTdominantLandingDate= "", FTnumberTotal= 1, #[DV,O] - int FTnumberSampled=1, #[DV,O] - int FTselectionProb=1, #[DV,O] - DecimalPrec10 @@ -317,6 +319,7 @@ FT_df <- data.frame( # 16 FOendDate [M] - Date # 17 FOendTime [M/O] - Time # 18 FOduration [M/O] - int +# FOfishingDurationDataBasis # 19 FOdurationSource [M] - DurationSource # 20 FOhandlingTime [O] - int # 21 FOstartLat [O] - Decimal-90.000000-90.000000 @@ -384,6 +387,7 @@ FO_df <- data.frame( FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M FOendTime ="",#M FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! + FOfishingDurationDataBasis = "Measured", FOdurationSource = "Crew", FOhandlingTime = "", FOstartLat="", # ATT! @@ -396,12 +400,16 @@ FO_df <- data.frame( FOfisheriesManagementUnit = "", FOgsaSubarea = "NotApplicable", #M FOjurisdictionArea = "", + FOgeographicalDataBasis = "Measured", + FOgeographicalSource = "", FOfishingDepth = "", FOwaterDepth = "", FOnationalFishingActivity = "", FOmetier5 = "", FOmetier6 = "OTT_CRU_70-89_2_35",#M FOgear = "OTT",#M + FOgearDataBasis = "Measured", + FOgearSource = "", FOmeshSize = "", FOselectionDevice = "", FOselectionDeviceMeshSize = "", @@ -497,8 +505,10 @@ SS_df<-data.frame( SSspeciesListName = project_name_outputs, #M SSuseForCalculateZero = "N", #M SStimeTotal = "", + SStimeTotalDataBasis = "Measured", SStimeSampled = "", SSnumberTotal = 1, + SSnumberTotalDataBasis = "Measured", SSnumberSampled = 1, SSselectionProb = 1, SSinclusionProb = 1, @@ -601,10 +611,14 @@ SA_df<-data.frame( SAfisheriesManagementUnit = "", SAgsaSubarea = "NotApplicable", #M SAjurisdictionArea = "", + SAgeographicalDataBasis = "Measured", + SAgeographicalSource = "", SAnationalFishingActivity = "", SAmetier5 = "", SAmetier6 = "", SAgear = "", + SAgearDataBasis = "Measured", + SAgearSource = "", SAmeshSize = "", SAselectionDevice = "", SAselectionDeviceMeshSize = "", @@ -625,6 +639,7 @@ SA_df<-data.frame( SAreasonNotSampledFM = "", SAreasonNotSampledBV = "", SAtotalWeightMeasured = dataset[[target_var]], + SAtotalWeightMeasuredDataBasis = "Measured", SAsampleWeightMeasured = dataset[[target_var]], SAconversionFactorMeasLive = 1, SAauxiliaryVariableTotal = "", @@ -636,14 +651,12 @@ SA_df<-data.frame( - - #====Builds final format=========== RDBESlist = list(DE = DE_df,SD = SD_df, VS = VS_df, FT = FT_df, FO = FO_df, SS = SS_df, SA = SA_df) - +if(1==2){ a<-RDBESlist a[c("TE","LO","OS","LE","CL","CE","BV","FM")] <- NULL a$SA[c("SAgeoDatBas","SAgeoSou","SAgeaDatBas","SAgearSou","SAtotWtMeaDatBas")] <- NA @@ -656,7 +669,7 @@ Pckg_SDAResources_agstrat_H1 <- createRDBESDataObject(a, verbose = F) usethis::use_data(Pckg_SDAResources_agstrat_H1, overwrite = TRUE) stop("The new code stops here!") - +} #id table a<-merge(DE_df["DEid"],SD_df[c("DEid","SDid")]) a<-merge(a, VS_df[c("SDid","VSid")], all.x=T) @@ -700,16 +713,14 @@ RDBESlist[[i]][which(grepl(colnames(RDBESlist[[i]]),pat="[A-Z]id"))]<-NULL #===Save============ - dir_outputs<-paste0(base_dir_outputs,"/", - project_name_outputs,"/") - dir.create(dir_outputs, recursive=T, showWarnings=FALSE) - filename_output_CS <- paste0("H1.csv") - filename_output_SL <- paste0("HSL.csv") - filename_output_IS <- paste0("IS.csv") - filename_output_VD <- paste0("HVD.csv") + dir_outputs<-paste0(base_dir_outputs,"/", project_name_outputs,"/") + dir.create(dir_outputs, recursive=T, showWarnings=FALSE) + filename_output_CS <- paste0(dir_outputs,"H1.csv") + filename_output_SL <- paste0(dir_outputs,"HSL.csv") + filename_output_VD <- paste0(dir_outputs,"HVD.csv") -lapply(RDBESlist, function(x, filename1 = paste0(dir_outputs,filename_output_CS)){ +lapply(RDBESlist, function(x, filename1 = filename_output_CS){ if("DErecordType" %in% colnames(x)){ write.table(x, file = filename1, append = FALSE, quote = FALSE, sep = ",", eol = "\n", na = "NA", dec = ".", row.names = FALSE, @@ -721,7 +732,7 @@ write.table(x, file = filename1, append = TRUE, quote = FALSE, sep = ",", } }) -b<-read.table(file=paste0(dir_outputs,filename_output_CS), header=F, sep=";") +b<-read.table(file = filename_output_CS, header=F, sep=";") b<-cbind(key,b) b<-b[order(as.character(b$key), decreasing=FALSE),] b<-b[!is.na(key),] @@ -729,34 +740,30 @@ b$key<-NULL b$V1<-as.character(b$V1) # saves CS output -write.table(b$V1, file=paste0(dir_outputs,filename_output_CS), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +write.table(b$V1, file = filename_output_CS, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") -# -----Builds and saves dummySL----------------- +# -----Builds and saves dummySL and dummyIS----------------- SL_base$SLspeciesListName<-project_name_outputs SL_base$SLyear<-DEyear SL_base$SLinstitute<-SDinstitution -# saves SL output - write.table(SL_base, file=paste0(dir_outputs,filename_output_SL), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - - # -----Builds and saves dummyIS----------------- - - - # saves IS output - write.table(IS_base, file=paste0(dir_outputs,filename_output_IS), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - - + + # saves SL output + write.table(rbind(paste(SL_base, collapse=","), paste(IS_base, collapse=",")), file=filename_output_SL, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") # -----Builds and saves dummyVD----------------- - # saves VD output VD_base$VDyear<-DEyear - write.table(VD_base, file=paste0(dir_outputs,filename_output_VD), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + write.table(VD_base, file=filename_output_VD, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +# check - is this needed? +if(1==2){ + + # -----Clean SL after dowload----------------- @@ -776,4 +783,5 @@ write.table(b$V1, file=paste0(dir_outputs,filename_output_CS), col.names=FALSE, #zip::zip(zipfile = paste0(project_name_outputs,".zip"), files =paste0(dir_outputs,"tmp/",dir(paste0(dir_outputs,"tmp"))), mode="cherry-pick") #unlink(paste0(dir_outputs,"tmp"), force=T, recursive = T) +} diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_algebra_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_algebra_H1.R index c859449f..6cec408f 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_algebra_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_algebra_H1.R @@ -45,10 +45,11 @@ baseDir <- "" VD_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/VD_base.rds")) SL_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/SL_base.rds")) + IS_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/IS_base.rds")) #nameof the directory where the outputs are saved currently base_dir_outputs <- paste0(baseDir,"BuiltUploads") - if(!file.exists(base_dir_outputs)) dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) + dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) #========Outline of Hierarchy 1================ @@ -102,7 +103,7 @@ DE_df<-data.frame( DEauxiliaryVariableTotal = "", DEauxiliaryVariableValue = "", DEauxiliaryVariableName = "", - DEauxiliaryVariableUnit = "", + DEauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) @@ -252,6 +253,7 @@ VS_df$VSinclusionProbCluster<-"" # 21 FTarrivalLocation [M] - Harbour_LOCODE # 22 FTarrivalDate [M] - Date # 23 FTarrivalTime [M/O] - StringLength60 +# FTdominantLandingDate # 24 FTnumberTotal [DV,O] - int # 25 FTnumberSampled [DV,O] - int # 26 FTselectionProb [DV,O] - Decimal0-1 @@ -338,6 +340,7 @@ FT_df <- data.frame( # 16 FOendDate [M] - Date # 17 FOendTime [M/O] - Time # 18 FOduration [M/O] - int +# FOfishingDurationDataBasis # 19 FOdurationSource [M] - DurationSource # 20 FOhandlingTime [O] - int # 21 FOstartLat [O] - Decimal-90.000000-90.000000 @@ -405,6 +408,7 @@ FO_df <- data.frame( FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M FOendTime ="",#M FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! + FOfishingDurationDataBasis ="", FOdurationSource = "Crew", FOhandlingTime = "", FOstartLat="", # ATT! @@ -417,6 +421,8 @@ FO_df <- data.frame( FOfisheriesManagementUnit = "", FOgsaSubarea = "NotApplicable", #M FOjurisdictionArea = "", + FOgeographicalDataBasis = "", + FOgeographicalSource = "", FOfishingDepth = "", FOwaterDepth = "", FOnationalFishingActivity = "", From 5ad4ecac3742b2e0dda860984c1763f755baed72 Mon Sep 17 00:00:00 2001 From: Richard Meitern Date: Wed, 15 Oct 2025 18:41:30 +0300 Subject: [PATCH 12/30] make the strict work closes #242 --- R/combineRDBESDataObjects.R | 28 +++++++++++-------- tests/testthat/test-combineRDBESDataObjects.R | 13 ++++----- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/R/combineRDBESDataObjects.R b/R/combineRDBESDataObjects.R index 865171e2..9bfc7cb0 100644 --- a/R/combineRDBESDataObjects.R +++ b/R/combineRDBESDataObjects.R @@ -9,7 +9,7 @@ #' @param strict (Optional) This function validates its input data - should #' the validation be strict? The default is TRUE. #' -#' @details +#' @details #' When combining RDBESDataObjects from different hierarchies (e.g., H1 and H5), #' a warning is issued. The resulting combined object will have a mixed hierarchy, #' which may be structurally and statistically invalid for some analyses. However, @@ -38,32 +38,36 @@ combineRDBESDataObjects <- function(RDBESDataObject1, validateRDBESDataObject(RDBESDataObject1, verbose = verbose, strict = strict) validateRDBESDataObject(RDBESDataObject2, verbose = verbose, strict = strict) - + # Check for multiple hierarchies hierarchy1 <- NULL hierarchy2 <- NULL - + if (!is.null(RDBESDataObject1$DE) && nrow(RDBESDataObject1$DE) > 0) { hierarchy1 <- unique(RDBESDataObject1$DE$DEhierarchy) } - + if (!is.null(RDBESDataObject2$DE) && nrow(RDBESDataObject2$DE) > 0) { hierarchy2 <- unique(RDBESDataObject2$DE$DEhierarchy) } - + # Warn if combining different hierarchies - if (!is.null(hierarchy1) && !is.null(hierarchy2) && + if (!is.null(hierarchy1) && !is.null(hierarchy2) && length(hierarchy1) > 0 && length(hierarchy2) > 0) { if (!all(hierarchy1 %in% hierarchy2) || !all(hierarchy2 %in% hierarchy1)) { - warning("Combining RDBESDataObjects from different hierarchies (", - paste(hierarchy1, collapse = ", "), " and ", - paste(hierarchy2, collapse = ", "), + warnMsg <- paste("Combining RDBESDataObjects from different hierarchies (", + paste(hierarchy1, collapse = ", "), " and ", + paste(hierarchy2, collapse = ", "), "). This creates a mixed hierarchy object that may be structurally ", - "and statistically invalid for some analyses.", - call. = FALSE) + "and statistically invalid for some analyses.") + if(strict){ + stop(warnMsg, call. = F) + } else{ + warning(warnMsg, call. = F) + } } } - + # Create an empty RDBESDataObject as the basis of what we will return myRDBESDataObject <- createRDBESDataObject() diff --git a/tests/testthat/test-combineRDBESDataObjects.R b/tests/testthat/test-combineRDBESDataObjects.R index 5cf38035..347e153a 100644 --- a/tests/testthat/test-combineRDBESDataObjects.R +++ b/tests/testthat/test-combineRDBESDataObjects.R @@ -24,7 +24,7 @@ test_that("combineRDBESDataObjects returns invalid RDBESDataObject when expect_error(validateRDBESDataObject(myCombinedObject), "duplicate rows") }) -test_that("combineRDBESDataObjects returns valid RDBESDataObject when supplied +test_that("combineRDBESDataObjects warns when supplied with valid, different RDBESDataObjects", { myObject1 <- importRDBESDataCSV(rdbesExtractPath = "./h1_v_20250211") @@ -34,13 +34,12 @@ test_that("combineRDBESDataObjects returns valid RDBESDataObject when supplied expect_error(validateRDBESDataObject(myObject1), NA) expect_error(validateRDBESDataObject(myObject2), NA) - myCombinedObject <- combineRDBESDataObjects(RDBESDataObject1=myObject1, - RDBESDataObject2=myObject2) - - expect_error(validateRDBESDataObject(myCombinedObject), NA) + expect_warning(combineRDBESDataObjects(RDBESDataObject1=myObject1, + RDBESDataObject2=myObject2, strict = F), + "Combining RDBESDataObjects from different hierarchies") }) -test_that("combineRDBESDataObjects warns when combining objects from different hierarchies", { +test_that("combineRDBESDataObjects stops when combining objects from different hierarchies", { myObject1 <- importRDBESDataCSV(rdbesExtractPath = "./h1_v_20250211") myObject2 <- importRDBESDataCSV(rdbesExtractPath = "./h5_v_20250211") @@ -50,7 +49,7 @@ test_that("combineRDBESDataObjects warns when combining objects from different h expect_error(validateRDBESDataObject(myObject2), NA) # Expect a warning about different hierarchies - expect_warning(combineRDBESDataObjects(RDBESDataObject1=myObject1, + expect_error(combineRDBESDataObjects(RDBESDataObject1=myObject1, RDBESDataObject2=myObject2), "Combining RDBESDataObjects from different hierarchies") }) From daa8eb7d06938a4f44f946441315f82ace53d0d9 Mon Sep 17 00:00:00 2001 From: Richard Meitern Date: Wed, 15 Oct 2025 19:06:19 +0300 Subject: [PATCH 13/30] presentation update --- package_overview.Rmd | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/package_overview.Rmd b/package_overview.Rmd index f1570360..2968eb92 100644 --- a/package_overview.Rmd +++ b/package_overview.Rmd @@ -1,11 +1,16 @@ --- -title: "RDBEScore: Using R for ICES Regional Database & Estimation System (RDBES) Data" +title: 'RDBEScore: Using R for ICES Regional Database & Estimation System (RDBES) + Data' output: - powerpoint_presentation: + ioslides_presentation: + incremental: true + powerpoint_presentation: reference_doc: "./data-raw/styles.pptx" slide_level: 2 keep_md: true incremental: true + beamer_presentation: + incremental: true --- ## Importing RDBES Data @@ -15,7 +20,7 @@ The aim of this document is to outline the basic workflow of importing data down The function **createRDBESDataObject** is intended to directly import Commercial Landing (CL), Commercial Effort (CE) and Commercial Sampling (CS) tables downloaded from [RDBES](https://rdbes.ices.dk/#/). ## Introducton - +2 RDBEScore is an R package developed to facilitate the analysis of data from the ICES Regional Database and Estimation System (RDBES). The package provides functions to: - import functions like `createRDBESDataObject` and `validateRDBESDataObject` @@ -52,7 +57,7 @@ library(RDBEScore) To see the complete list of vignettes available in the package use the following command: -```{rm echo=TRUE, eval=FALSE} +```{r echo=TRUE, eval=FALSE} browseVignettes(package = "RDBEScore") ``` @@ -84,9 +89,9 @@ importedH1 It can import the CL, CE, VD or SL tables `.zip` archives, but will include all other tables as `NULL`: ```{r} -importedH5 <- createRDBESDataObject(input = "./data-raw/exampleData/H5_2025_02_11.zip") +importedHSL <- createRDBESDataObject(input = "./inst/extdata/HSL_Example.zip") #print the not NULL table names -importedH5 +importedHSL ``` ## Importing a List of Data Frames @@ -103,7 +108,7 @@ importedList <- createRDBESDataObject(listOfDfsH1) ## Object class RDBESDataObject -It should be noted that the objects created are of the S3 class "**RDBESDataObject**". The class has defined **print()**, **summary()** and **sort()** methods. For more info on these see vignette [Manipulating RDBESDataObjects](https://ices-tools-dev.github.io/RDBEScore/articles/manipulating-rdbesdataobjects.html). +It should be noted that the objects created are of the S3 class "**RDBESDataObject**". The class has defined **print()**, **summary()** and **sort()** methods. For more info on these see vignettes @@ -147,7 +152,7 @@ validateRDBESDataObject(myFilteredObjectNoOrphans, verbose = FALSE) ``` -Again to see more details on the functions see the vignette [Manipulating RDBESDataObjects](https://ices-tools-dev.github.io/RDBEScore/articles/manipulating-rdbesdataobjects.html). +Again to see more details on the functions see the vignettes. ## Getting Subsets of RDBESDataObject Tables @@ -208,7 +213,7 @@ Right now estimation actually is done on a *RDBESEstObject* that is generate fr In the next sections we will use data from R packages [survey](https://CRAN.R-project.org/package=survey) and [SDAResources](https://CRAN.R-project.org/package=SDAResources) that are converted into the *RDBESDataObject* to demonstrate the estimation procedure. -For more detailed information on the estimation functions see the vignette [Estimating Population parameters from RDBESDataObjects](https://ices-tools-dev.github.io/RDBEScore/articles/estimating-rdbesdataobjects.html). +For more detailed information on the estimation functions see the vignettes ```{r} @@ -274,6 +279,7 @@ strataListCL <- list(CLarea="27.3.d.28.1", There is a function in development **addCLtoLowerCS(...)** that can be used to add the CL data to the lower hierarchy. + ```{r} #we are using the lower hierarchy C meaning that we are extracting the BV data #as the biological data @@ -293,7 +299,7 @@ lenCANUMQ1 <- doBVestimCANUM(biolCLQ1, c("sumCLoffWeight"), classBreaks = seq(70,130,10), verbose = FALSE) -knitr::kable(lenCANUMQ1[, c("Group", "WeightgMean", "LengthmmMean", "totNum")], +knitr::kable(lenCANUMQ1[order(lenCANUMQ1$LengthmmMean), c("Group", "WeightgMean", "LengthmmMean", "totNum")], digits = 2) ``` From 96bcfb5f49a2b6b6106a88e10f088f3beac08e90 Mon Sep 17 00:00:00 2001 From: Richard Meitern Date: Wed, 15 Oct 2025 19:07:26 +0300 Subject: [PATCH 14/30] ignore knitted --- .gitignore | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index 37912496..428307b6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,57 +1,46 @@ # History files .Rhistory .Rapp.history - # Session Data files .RData - # User-specific files .Ruserdata .DS_Store - # Example code in package build process *-Ex.R - # Output files from R CMD build /*.tar.gz - # Output files from R CMD check /*.Rcheck/ - # RStudio files .Rproj.user/ - # produced vignettes vignettes/*.html vignettes/*.pdf - # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 .httr-oauth - # knitr and R markdown default cache directories *_cache/ /cache/ - # Temporary files created by R markdown *.utf8.md *.knit.md - # R Environment Variables .Renviron - # FishNCo raw test data FishNCo/testData/RegionalTestData/* - # MS Excel temp files ~$* - /doc/ /Meta/ <<<<<<< Updated upstream .vscode/ docs ======= - # Data to develop fun /NLdata/ >>>>>>> Stashed changes +package_overview.tex +package_overview.html +package_overview.pptx +package_overview.md From 8412cdef1223ec6d7f68bedf51b6c45f0bbfc167 Mon Sep 17 00:00:00 2001 From: karolinamg Date: Thu, 16 Oct 2025 09:43:38 +0200 Subject: [PATCH 15/30] Changes ratio est --- R/doEstimationRatio.R | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/R/doEstimationRatio.R b/R/doEstimationRatio.R index 73cf5c79..278dbb72 100644 --- a/R/doEstimationRatio.R +++ b/R/doEstimationRatio.R @@ -75,11 +75,9 @@ doEstimationRatio <- function(RDBESDataObj, idx <- utils::menu(weightVar, title = "Select the BV weight type to use:") if (idx == 0L) stop("Selection cancelled.") wcol <- weightVar[idx] - weightName <- paste0("BV", wcol) } else { message("Only one weight type present. Using: ", weightVar[1L]) wcol <- weightVar[1L] - weightName <- paste0("BV", wcol) } } } @@ -151,13 +149,14 @@ doEstimationRatio <- function(RDBESDataObj, bv <- setDT(RDBESEstRatioObj$BV) bv <- bv[, unique(.SD), .SDcols = c("SAid", "BVfishId", "BVtypeMeas", "BVvalueMeas")] bv <- dcast(bv, ... ~ BVtypeMeas , value.var = c("BVvalueMeas"), drop = TRUE) - bv[, (weightName) := as.numeric(get(wcol))] + bv[, BVweight := as.numeric(get(wcol))] + bv[, LengthTotal := as.numeric(LengthTotal)] # TODO this probably needs to be an argument # or needs to be defined later on? bv$LengthClass <- floor(bv$LengthTotal/10) # TODO This needs to be defined by the user bv1 <- bv[ - , .(BVMeanWeight = mean(get(wcol), na.rm = TRUE), + , .(BVMeanWeight = mean(BVweight, na.rm = TRUE), BVNumbersAtLength = .N), by = .(SAid, LengthClass) ][ @@ -165,7 +164,7 @@ doEstimationRatio <- function(RDBESDataObj, , BVTotCount := sum(BVNumbersAtLength), by = SAid ][ # add total weight per SAid - bv[, .(BVTotWeight = sum(get(wcol), na.rm = TRUE)), by = SAid], + bv[, .(BVTotWeight = sum(BVweight, na.rm = TRUE)), by = SAid], on = "SAid" ] @@ -249,12 +248,12 @@ doEstimationRatio <- function(RDBESDataObj, bv <- setDT(RDBESEstRatioObj$BV) bv <- bv[, unique(.SD), .SDcols = c("SAid", "BVfishId", "BVtypeMeas", "BVvalueMeas")] bv <- dcast(bv, ... ~ BVtypeMeas , value.var = c("BVvalueMeas"), drop = TRUE) - bv[, `:=`( WeightMeasured = as.numeric(get(wcol)))] + bv[, BVweight := as.numeric(get(wcol))] # TODO this probably needs to be an argument # or needs to be defined later on? bv1 <- bv[ - , .(BVMeanWeight = mean(get(wcol), na.rm = TRUE), + , .(BVMeanWeight = mean(BVweight, na.rm = TRUE), BVNumbersAtAge = .N), by = .(SAid, Age) ][ @@ -262,7 +261,7 @@ doEstimationRatio <- function(RDBESDataObj, , BVTotCount := sum(BVNumbersAtAge), by = SAid ][ # add total weight per SAid - bv[, .(BVTotWeight = sum(get(wcol), na.rm = TRUE)), by = SAid], + bv[, .(BVTotWeight = sum(BVweight, na.rm = TRUE)), by = SAid], on = "SAid" ] @@ -325,6 +324,8 @@ doEstimationRatio <- function(RDBESDataObj, bv <- dcast(bv, ... ~ BVtypeMeas , value.var = c("BVvalueMeas"), drop = TRUE) bv[, `:=`(wcol = as.numeric(wcol ))] + # subsample -> sample weights -> weight from where the sample came from + # if age exists # if indv weights + lengths exist From 4daa09bb958043f28032d255e106de685e6195a0 Mon Sep 17 00:00:00 2001 From: Richard Meitern Date: Thu, 16 Oct 2025 11:36:43 +0300 Subject: [PATCH 16/30] added tests to #229 --- tests/testthat/test-importRDBESDataCSV.R | 34 ++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tests/testthat/test-importRDBESDataCSV.R b/tests/testthat/test-importRDBESDataCSV.R index 63d7e52a..d3b6ea46 100644 --- a/tests/testthat/test-importRDBESDataCSV.R +++ b/tests/testthat/test-importRDBESDataCSV.R @@ -48,6 +48,40 @@ capture.output({ ## suppresses printing of console output when running test() }) + test_that("importRDBESDataCSV keeps extra long BVfishId if character", { + #make a temporary BV table from the package data + BV <- H1Example$BV[1,] + #make a 40 char length dummy ID this is the max length in RDBES + id <- paste0(rep("A", 40), collapse = "") + BV$BVfishId <- id + tempBVfNname <- tempfile(fileext = ".csv") + write.csv(BV, file=tempBVfNname, quote=F, row.names = F) + fname <- basename(tempBVfNname) + dirname <- dirname(tempBVfNname) + BV <- importRDBESDataCSV(rdbesExtractPath = dirname, listOfFileNames = list("BV"=fname))$BV + file.remove(tempBVfNname) + expect_equal(BV$BVfishId[1],id) + }) + + test_that("importRDBESDataCSV keeps extra long BVfishId if int", { + #make a temporary BV table from the package data + BV <- H1Example$BV[1,] + #make a 40 char length dummy ID this is the max length in RDBES + id <- paste0(rep("1", 40), collapse = "") + #its not possible to get the exact number if we save as numeric + id <- format(as.numeric(id), scientific = FALSE, trim = TRUE) + #the thing we want to test is if we have a long numeric string in there + #do we get the same string back in when importing + BV$BVfishId <- id + tempBVfNname <- tempfile(fileext = ".csv") + write.csv(BV, file=tempBVfNname, quote=F, row.names = F) + fname <- basename(tempBVfNname) + dirname <- dirname(tempBVfNname) + BV <- importRDBESDataCSV(rdbesExtractPath = dirname, listOfFileNames = list("BV"=fname))$BV + file.remove(tempBVfNname) + expect_equal(BV$BVfishId[1],id) + }) + From bac6e52713d2b453ef85945fba9f9d301db421bc Mon Sep 17 00:00:00 2001 From: karolinamg Date: Thu, 16 Oct 2025 14:09:04 +0200 Subject: [PATCH 17/30] Upd ratio --- R/doEstimationRatio.R | 102 +++++++++++++++++++++++++++++++++--------- 1 file changed, 81 insertions(+), 21 deletions(-) diff --git a/R/doEstimationRatio.R b/R/doEstimationRatio.R index 278dbb72..8a4965a8 100644 --- a/R/doEstimationRatio.R +++ b/R/doEstimationRatio.R @@ -62,6 +62,11 @@ doEstimationRatio <- function(RDBESDataObj, # Filter out NULL tables RDBESEstRatioObj <- Filter(Negate(is.null), RDBESEstRatioObj) + # If raiseVar == possible + possibleValues <- unique(RDBESDataObj$BV$BVtypeMeas) + + if(!raiseVar %in% possibleValues){ + #---------------------- if(raiseVar == "Weight"){ if(unique(RDBESDataObj$SA$SAlowHierarchy) == "B" ){ @@ -82,6 +87,9 @@ doEstimationRatio <- function(RDBESDataObj, } } } + } + + # Do we need both CL and CE? Allow the user to define the population (i.e. effort or landings or both)? @@ -108,35 +116,54 @@ doEstimationRatio <- function(RDBESDataObj, # LH A & B ---------------------------------------------------------------- if(unique(RDBESEstRatioObj$SA$SAlowHierarchy) %in% c("A", "B")){ - # bv <- setDT(RDBESEstRatioObj$BV) - # fm <- setDT(RDBESEstRatioObj$FM) - # bv <- bv[, unique(.SD), .SDcols = c( "FMid","BVfishId", "BVtypeMeas", "BVvalueMeas")] - # bv <- dcast(bv, ... ~ BVtypeMeas , value.var = c("BVvalueMeas"), drop = TRUE) - # bv[, `:=`(LengthTotal = as.numeric(LengthTotal), WeightMeasured = as.numeric(WeightMeasured))] + # TODO mean weight at length - # Select only FM data for now - BV possibly used for ALK - warning("Only FM table used. BV is not yet implemented") + if(!is.null(LWparam)){ + + stop("Not yet implemented") + }else{ + # else stop + stop("Nor an auxiliary variable nor lw params are provided. Not possible to produce the mean weight at length") + } + # Select only FM data for now - BV possibly used for ALK + warning("If lower hierarchy A, only the FM table is used to calculate the numbers at length.") + fm <- data.table::setDT(RDBESEstRatioObj$FM) + sa <- data.table::setDT(RDBESEstRatioObj$SA) + fm <- fm[fm, unique(.SD), .SDcols = c("SAid", "FMid", "FMclassMeas", "FMnumAtUnit")] + sa <- sa[, unique(.SD), .SDcols = c("SAid", "SAlowHierarchy", "SAtotalWtMes" , "SAsampWtMes", "SAnumTotal", "SAnumSamp", "SAauxVarValue", "SAauxVarUnit" )] + bv$LengthClass <- floor(bv$LengthTotal/10) # TODO This needs to be defined by the user + fm1 <- fm[ + , .(FMNumbersAtLength = .N), + by = .(SAid, LengthClass) + ][ + # add total count per SAid + , FMTotCount := sum(FMNumbersAtLength), by = SAid + ] + su <- merge(fm1, sa, by = c("SAid")) + if(raiseVar == "Weight"){ - if(!is.null(LWparam)){ + su$raiseFactor <- su$SAtotalWtMes/su$SAsampWtMes + su$NumbersAtLength <- su$raiseFactor*su$FMNumbersAtLength - stop("Not yet implemented") + }else if(raiseVar == "Count"){ + su$raiseFactor <- su$SAnumTotal/su$SAnumSamp + su$NumbersAtLength <- su$raiseFactor*su$FMNumbersAtLength }else{ - # else stop - stop("Nor an auxiliary variable nor lw params are provided. Not possible to produce the mean weight at length") - } - # otherwise check if you can calculate it - # otherwise stop + su$NumbersAtLength <- su$SAauxVarValue*su$FMNumbersAtLength + } + + return(su) @@ -146,7 +173,7 @@ doEstimationRatio <- function(RDBESDataObj, }else if(unique(RDBESEstRatioObj$SA$SAlowHierarchy) == "C"){ - bv <- setDT(RDBESEstRatioObj$BV) + bv <- data.table::setDT(RDBESEstRatioObj$BV) bv <- bv[, unique(.SD), .SDcols = c("SAid", "BVfishId", "BVtypeMeas", "BVvalueMeas")] bv <- dcast(bv, ... ~ BVtypeMeas , value.var = c("BVvalueMeas"), drop = TRUE) bv[, BVweight := as.numeric(get(wcol))] @@ -171,7 +198,7 @@ doEstimationRatio <- function(RDBESDataObj, # bv1$BVLengthClassProp <- bv1$BVNumbersAtLength/bv1$TotCount - sa <- setDT(RDBESEstRatioObj$SA) + sa <- data.table::setDT(RDBESEstRatioObj$SA) sa <- sa[, unique(.SD), .SDcols = c("SAid", "SAlowHierarchy", "SAtotalWtMes" , "SAsampWtMes", "SAnumTotal", "SAnumSamp", "SAauxVarValue", "SAauxVarUnit" )] # Do not need the # species, the filtering of the "strata" variables will be done before the estimation # To test @@ -245,7 +272,7 @@ doEstimationRatio <- function(RDBESDataObj, if(unique(RDBESDataObj$SA$SAlowHierarchy) == "C"){ - bv <- setDT(RDBESEstRatioObj$BV) + bv <- data.table::setDT(RDBESEstRatioObj$BV) bv <- bv[, unique(.SD), .SDcols = c("SAid", "BVfishId", "BVtypeMeas", "BVvalueMeas")] bv <- dcast(bv, ... ~ BVtypeMeas , value.var = c("BVvalueMeas"), drop = TRUE) bv[, BVweight := as.numeric(get(wcol))] @@ -268,7 +295,7 @@ doEstimationRatio <- function(RDBESDataObj, # bv1$BVLengthClassProp <- bv1$BVNumbersAtLength/bv1$TotCount - sa <- setDT(RDBESEstRatioObj$SA) + sa <- data.table::setDT(RDBESEstRatioObj$SA) sa <- sa[, unique(.SD), .SDcols = c("SAid", "SAlowHierarchy", "SAtotalWtMes" , "SAsampWtMes", "SAnumTotal", "SAnumSamp", "SAauxVarValue", "SAauxVarUnit" )] # Do not need the # species, the filtering of the "strata" variables will be done before the estimation # To test @@ -318,11 +345,42 @@ doEstimationRatio <- function(RDBESDataObj, }else if(unique(RDBESDataObj$SA$SAlowHierarchy) == "A"){ - bv <- setDT(RDBESEstRatioObj$BV) - fm <- setDT(RDBESEstRatioObj$FM) + bv <- data.table::setDT(RDBESEstRatioObj$BV) + fm <- data.table::setDT(RDBESEstRatioObj$FM) + sa <- data.table::setDT(RDBESEstRatioObj$SA) bv <- bv[, unique(.SD), .SDcols = c( "FMid","BVfishId", "BVtypeMeas", "BVvalueMeas")] bv <- dcast(bv, ... ~ BVtypeMeas , value.var = c("BVvalueMeas"), drop = TRUE) - bv[, `:=`(wcol = as.numeric(wcol ))] + bv[, BVweight := as.numeric(get(wcol))] + + bv1 <- bv[ + , .(BVMeanWeight = mean(BVweight, na.rm = TRUE), + BVNumbersAtAge = .N), + by = .(FMid, Age) + ][ + # add total count per SAid + , BVTotCount := sum(BVNumbersAtAge), by = FMid + ][ + # add total weight per SAid + bv[, .(BVTotWeight = sum(BVweight, na.rm = TRUE)), by = FMid], + on = "SAid" + ] + + + fm <- fm[fm, unique(.SD), .SDcols = c("SAid", "FMid", "FMclassMeas", "FMnumAtUnit")] + sa <- sa[, unique(.SD), .SDcols = c("SAid", "SAlowHierarchy", "SAtotalWtMes" , "SAsampWtMes", "SAnumTotal", "SAnumSamp", "SAauxVarValue", "SAauxVarUnit" )] + + fm1 <- unique( + fm[FMclassMeas %chin% c("LengthTotal","LengthMeasured","Length"), + .(FMid, SAid, FMnumAtUnit)] + ) + + bv1 <- fm_len[bv1, on = "FMid"][, + num_raise := fifelse(BVTotCount > 0, FMnumAtUnit / BVTotCount, NA_real_) + ][ + , N_at_age := BVNumbersAtAge * num_raise + ] + + # subsample -> sample weights -> weight from where the sample came from @@ -341,6 +399,8 @@ doEstimationRatio <- function(RDBESDataObj, # else stop # TODO include FM. For now the FM is not yet implemented + }else{ + stop("Age composition can't be calculated with lower hierachy B.") } } From 7462c96936ceb1be977b9c1efc57b8f0fbea9a20 Mon Sep 17 00:00:00 2001 From: Kasia-MIR <60775640+Kasia-MIR@users.noreply.github.com> Date: Thu, 16 Oct 2025 15:29:32 +0200 Subject: [PATCH 18/30] fix bug with BVfishId passed the test: https://github.com/ices-tools-dev/RDBEScore/pull/246#issuecomment-3406981773 --- R/createTableOfRDBESIds.r | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/createTableOfRDBESIds.r b/R/createTableOfRDBESIds.r index a2c42265..0ba86b1b 100644 --- a/R/createTableOfRDBESIds.r +++ b/R/createTableOfRDBESIds.r @@ -60,7 +60,7 @@ createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, "and BVfishId from ", CStableNames[i+1], "table. \n") df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2), get("BVfishId"))]); colnames(df_2)<-c(id_1,id_2,"BVfishId") # We also need a reference reporting the SAid, for when the lower hierarchy is C, see below. - df_2C<-data.frame(x[[CStableNames[i+1]]][,list(get("SAid"), get(id_1), get(id_2))]); colnames(df_2C)<-c("SAid", id_1,id_2) + df_2C<-data.frame(x[[CStableNames[i+1]]][,list(get("SAid"), get(id_1), get(id_2),get("BVfishId"))]); colnames(df_2C)<-c("SAid", id_1,id_2,"BVfishId") } @@ -104,7 +104,7 @@ createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ # Conditional merges mergedA <- df_2[toMergeA, on = "FMid"] # left join equivalent (all.x = TRUE) mergedB <- df_2[toMergeB, on = "FMid"] # same for group B - mergedC <- df_2C[toMergeC, on = "SAid", nomatch = 0][, .SD, .SDcols = c(names(toMergeC), "BVid")] + mergedC <- df_2C[toMergeC, on = "SAid", nomatch = 0][, .SD, .SDcols = c(names(toMergeC), "BVid","BVfishId")] mergedD <- toMergeD # unchanged group D # Combine back From ec03f63328746c5be2ad7581ee15352abfc5291c Mon Sep 17 00:00:00 2001 From: Richard Meitern Date: Thu, 16 Oct 2025 17:21:53 +0300 Subject: [PATCH 19/30] fixes #235 documenting inverse filtering --- R/filterRDBESDataObject.R | 11 +++++ .../v01b-manipulating-rdbesdataobjects.Rmd | 41 ++++++++++++++++++- 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/R/filterRDBESDataObject.R b/R/filterRDBESDataObject.R index 2c00b547..f2839b11 100644 --- a/R/filterRDBESDataObject.R +++ b/R/filterRDBESDataObject.R @@ -39,6 +39,17 @@ #' fieldsToFilter = myFields, #' valuesToFilter = myValues #' ) +#' +#' # Inverse filtering (exclude certain values) +#' # Example: keep all DE rows except those with DEid in `excludedValues` +#' # Compute the complement of the excluded set using setdiff +#' allValues <- unique(myH1RawObject$DE$DEid) +#' excludedValues <- c(5351) +#' myInverseFiltered <- filterRDBESDataObject( +#' myH1RawObject, +#' fieldsToFilter = "DEid", +#' valuesToFilter = setdiff(allValues, excludedValues) +#' ) #' } filterRDBESDataObject <- function(RDBESDataObjectToFilter, fieldsToFilter, diff --git a/vignettes/v01b-manipulating-rdbesdataobjects.Rmd b/vignettes/v01b-manipulating-rdbesdataobjects.Rmd index fe8ce108..7a31f435 100644 --- a/vignettes/v01b-manipulating-rdbesdataobjects.Rmd +++ b/vignettes/v01b-manipulating-rdbesdataobjects.Rmd @@ -125,7 +125,10 @@ myH1RawObject <- H1Example # Number of rows in each non-null table print(myH1RawObject) +``` + +```{r filter} myFields <- c("SDctry","VDctry","VDflgCtry","FTarvLoc") myValues <- c("ZW","ZWBZH","ZWVFA" ) @@ -147,9 +150,44 @@ myFilteredObjectNoOrphans <- validateRDBESDataObject(myFilteredObjectNoOrphans, verbose = FALSE) +myFilteredObjectNoOrphans +``` +**NB!** Currently filtering happens to all fields together i.e it is not possible to filter same codelist differently in the same filter call. Imagine a situation where you want to filter on both "SDCtry" and "VDflgCtry" ie vessels from EH country sampled by ZW institution. Ie tow calls are needed + +```{r} +myFilteredObject <- filterRDBESDataObject(myH1RawObject, + fieldsToFilter = "SDctry", + valuesToFilter = "ZW", + killOrphans = T) + +filterRDBESDataObject(myFilteredObject, + fieldsToFilter = "VDflgCtry", + valuesToFilter = "EH", + killOrphans = T) + ``` +Sometimes you might to do the inverse filter eg exclude something. You can do this by selecting the complement set of values using `setdiff`. + +```{r} +# Exclude specific DEid values by selecting all others +allValues <- unique(myH1RawObject$DE$DEid) +excludedValues <- c(5351) +myInverseFiltered <- filterRDBESDataObject( + myH1RawObject, + fieldsToFilter = "DEid", + valuesToFilter = setdiff(allValues, excludedValues), + killOrphans = TRUE +) + +validateRDBESDataObject(myInverseFiltered, verbose = FALSE) +print(myInverseFiltered) +``` + + + + You can also remove any records that are not linking to a row in the VesselDetails (VD) table using the **removeBrokenVesselLinks()** function. ```{r clean2} @@ -188,7 +226,7 @@ validateRDBESDataObject(myFilteredObjectValidSpeciesLinks, verbose = FALSE) ## Getting Subsets of RDBESDataObject Tables -Sometimes it we want to see how a field or values in the **RDBESDataObject** are connected to otther tables. +Sometimes it we want to see how a field or values in the **RDBESDataObject** are connected to other tables. One use case would be e.g. to see when a specific Landing Event (LE) occured.For this we can use the **getLinkedDataFromLevel()** function. ```{r} @@ -203,6 +241,7 @@ getLinkedDataFromLevel("TEstratumName", c("November"), H8ExampleEE1, "LE", verbo ``` Several values can be used to get a subset of the table. + ```{r} #get the SA table corresponding to the first 2 TEids in the H8ExampleEE1 object getLinkedDataFromLevel("TEid", c(1,2), H8ExampleEE1, "SA", verbose = TRUE) From 910a82a2f0e2b31ba7f01994853d2c6457059056 Mon Sep 17 00:00:00 2001 From: Richard Meitern Date: Thu, 16 Oct 2025 17:22:50 +0300 Subject: [PATCH 20/30] fixing documentation --- R/createRDBESDataObject.R | 19 +++++++++++++++-- R/importRDBESDataZIP.R | 2 +- man/combineRDBESDataObjects.Rd | 7 +++++++ man/createRDBESDataObject.Rd | 21 +++++++++++++++++-- man/filterRDBESDataObject.Rd | 11 ++++++++++ tests/testthat/test-createRDBESDataObject.R | 4 ++-- tests/testthat/test-validateRDBESDataObject.R | 4 ++-- 7 files changed, 59 insertions(+), 9 deletions(-) diff --git a/R/createRDBESDataObject.R b/R/createRDBESDataObject.R index 74c8a7bf..b9652c9b 100644 --- a/R/createRDBESDataObject.R +++ b/R/createRDBESDataObject.R @@ -20,6 +20,11 @@ #' given. You should not input different hierarchy files; this function will not #' combine them. #' +#' If the zip contains multiple hierarchies (e.g., H1 and H5 within the same +#' archive), you can select which one to import by passing `Hierarchy` via +#' `...`, for example: `Hierarchy = 1`. If `Hierarchy` is not specified and the +#' zip contains multiple hierarchies, an error is raised prompting you to set it. +#' #' ***CSV file inputs*** #' This `input` should be a path to a folder of `csv` files. These can be the #' `csv` files downloaded from RDBES (e.g. an unzipped hierarchy), or *any* set @@ -56,8 +61,17 @@ #' in. Default is `TRUE`. #' @param verbose (Optional) Set to TRUE if you want informative text printed #' out, or FALSE if you don't. The default is FALSE. -#' @param ... parameters passed to validateRDBESDataObject -#' e.g.`strict=FALSE` +#' @param ... Additional parameters forwarded to helper functions used by this +#' function. Most commonly these are forwarded to +#' `validateRDBESDataObject()` during the validation step. Common options: +#' - `strict` (logical, default `TRUE`): if `FALSE`, validation issues result +#' in warnings instead of stopping with an error. +#' - `verbose` (logical, default `FALSE`): request extra informational output +#' from validation. +#' - `Hierarchy` (integer, e.g. `1`, optional; zip inputs only): when the zip +#' file contains multiple hierarchies, selects which hierarchy to import. +#' Note: `checkDataTypes` is controlled by the `castToCorrectDataTypes` +#' argument of this function and should not be supplied via `...`. #' @importFrom utils file_test #' #' @return A RDBESDataObject @@ -65,6 +79,7 @@ #' @md #' #' @examples +#' # Create an empty object #' myEmptyRDBESObject <- createRDBESDataObject(input = NULL) createRDBESDataObject <- function(input = NULL, diff --git a/R/importRDBESDataZIP.R b/R/importRDBESDataZIP.R index 94d66a40..b8f29b0c 100644 --- a/R/importRDBESDataZIP.R +++ b/R/importRDBESDataZIP.R @@ -73,7 +73,7 @@ importRDBESDataZIP <- function(filenames, hdirs <- dirs[grepl("H[0-9]+", dirs)] if(length(hdirs) > 1) { valid_hierarchies <- as.numeric(gsub("H", "", hdirs)) - example <- paste0("Hierachy = ", valid_hierarchies[1]) + example <- paste0("Hierarchy = ", valid_hierarchies[1]) if(is.null(Hierarchy)) { stop("The zip file contains multiple hierarchies.\n", "To import a selected hierarchy, please provide the hierarchy ", diff --git a/man/combineRDBESDataObjects.Rd b/man/combineRDBESDataObjects.Rd index 1dd38e55..76ce514f 100644 --- a/man/combineRDBESDataObjects.Rd +++ b/man/combineRDBESDataObjects.Rd @@ -32,6 +32,13 @@ Combine Two RDBES Raw Objects combines 2 RDBESDataObjects into a single RDBESDataObject by merging individual tables one by one } +\details{ +When combining RDBESDataObjects from different hierarchies (e.g., H1 and H5), +a warning is issued. The resulting combined object will have a mixed hierarchy, +which may be structurally and statistically invalid for some analyses. However, +such combinations can be useful for fisheries overviews, annual reports, or +countries performing broader estimations. +} \examples{ \dontrun{ diff --git a/man/createRDBESDataObject.Rd b/man/createRDBESDataObject.Rd index 783496f5..8895db41 100644 --- a/man/createRDBESDataObject.Rd +++ b/man/createRDBESDataObject.Rd @@ -33,8 +33,19 @@ in. Default is \code{TRUE}.} \item{verbose}{(Optional) Set to TRUE if you want informative text printed out, or FALSE if you don't. The default is FALSE.} -\item{...}{parameters passed to validateRDBESDataObject -e.g.\code{strict=FALSE}} +\item{...}{Additional parameters forwarded to helper functions used by this +function. Most commonly these are forwarded to +\code{validateRDBESDataObject()} during the validation step. Common options: +\itemize{ +\item \code{strict} (logical, default \code{TRUE}): if \code{FALSE}, validation issues result +in warnings instead of stopping with an error. +\item \code{verbose} (logical, default \code{FALSE}): request extra informational output +from validation. +\item \code{Hierarchy} (integer, e.g. \code{1}, optional; zip inputs only): when the zip +file contains multiple hierarchies, selects which hierarchy to import. +Note: \code{checkDataTypes} is controlled by the \code{castToCorrectDataTypes} +argument of this function and should not be supplied via \code{...}. +}} } \value{ A RDBESDataObject @@ -61,6 +72,11 @@ any tables in the first input are overwritten by other inputs a warning is given. You should not input different hierarchy files; this function will not combine them. +If the zip contains multiple hierarchies (e.g., H1 and H5 within the same +archive), you can select which one to import by passing \code{Hierarchy} via +\code{...}, for example: \code{Hierarchy = 1}. If \code{Hierarchy} is not specified and the +zip contains multiple hierarchies, an error is raised prompting you to set it. + \emph{\strong{CSV file inputs}} This \code{input} should be a path to a folder of \code{csv} files. These can be the \code{csv} files downloaded from RDBES (e.g. an unzipped hierarchy), or \emph{any} set @@ -81,5 +97,6 @@ This \code{input} produces an empty \code{RDBESDataObject}, i.e. all tables with correct data classes but the tables will be empty. } \examples{ +# Create an empty object myEmptyRDBESObject <- createRDBESDataObject(input = NULL) } diff --git a/man/filterRDBESDataObject.Rd b/man/filterRDBESDataObject.Rd index bc6f2952..51ec44a7 100644 --- a/man/filterRDBESDataObject.Rd +++ b/man/filterRDBESDataObject.Rd @@ -58,5 +58,16 @@ myFilteredObject <- filterRDBESDataObject(myH1RawObject, fieldsToFilter = myFields, valuesToFilter = myValues ) + +# Inverse filtering (exclude certain values) +# Example: keep all DE rows except those with DEid in `excludedValues` +# Compute the complement of the excluded set using setdiff +allValues <- unique(myH1RawObject$DE$DEid) +excludedValues <- c(5351) +myInverseFiltered <- filterRDBESDataObject( + myH1RawObject, + fieldsToFilter = "DEid", + valuesToFilter = setdiff(allValues, excludedValues) +) } } diff --git a/tests/testthat/test-createRDBESDataObject.R b/tests/testthat/test-createRDBESDataObject.R index 6feb1eec..32d4e9b0 100644 --- a/tests/testthat/test-createRDBESDataObject.R +++ b/tests/testthat/test-createRDBESDataObject.R @@ -75,7 +75,7 @@ capture.output({ ## suppresses printing of console output when running test() genObj <- expect_error( createRDBESDataObject(paste0(dirH1, zipFiles), castToCorrectDataTypes = TRUE), - "The zip file contains multiple hierarchies.\nTo import a selected hierarchy, please provide the hierarchy as an argument e.g like:\nHierachy = 1" + "The zip file contains multiple hierarchies.\nTo import a selected hierarchy, please provide the hierarchy as an argument e.g like:\nHierarchy = 1" ) }) @@ -102,7 +102,7 @@ capture.output({ ## suppresses printing of console output when running test() createRDBESDataObject(paste0(dirH1, zipFiles), castToCorrectDataTypes = TRUE, Hierarchy = "H1"), - "The zip file does not contain the hierarchy specified. The options are: 1, 2\nPlease provide a valid hierarchy as an argument. e.g like:\nHierachy = 1" + "The zip file does not contain the hierarchy specified. The options are: 1, 2\nPlease provide a valid hierarchy as an argument. e.g like:\nHierarchy = 1" ) }) diff --git a/tests/testthat/test-validateRDBESDataObject.R b/tests/testthat/test-validateRDBESDataObject.R index 6fd4fbb6..6730e992 100644 --- a/tests/testthat/test-validateRDBESDataObject.R +++ b/tests/testthat/test-validateRDBESDataObject.R @@ -218,9 +218,9 @@ test_that("validateRDBESDataObject produces correct text output", { # Tests for CHECK 5b: SL non-empty requires non-empty IS test_that("validateRDBESDataObject errors when SL has rows and IS is NULL", { # Build minimal object: non-empty SL, NULL IS - myObject <- H1Example + myObject <- data.table::copy(H1Example) - myObject["IS"]<- list(NULL) + myObject["IS"] <- list(NULL) expect_error( validateRDBESDataObject(objectToCheck = myObject, verbose = FALSE), From 6c9bb1bfaac161bdeaf2293620368c5d754194d3 Mon Sep 17 00:00:00 2001 From: Richard Meitern Date: Thu, 16 Oct 2025 17:41:41 +0300 Subject: [PATCH 21/30] closes #251 numeric and integer are considered compatible both ways now --- R/validateRDBESDataObjectDataTypes.R | 9 ++++--- .../test-validateRDBESDataObjectDataTypes.R | 25 +++++++++++++++++++ 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/R/validateRDBESDataObjectDataTypes.R b/R/validateRDBESDataObjectDataTypes.R index 881d1071..b72e418c 100644 --- a/R/validateRDBESDataObjectDataTypes.R +++ b/R/validateRDBESDataObjectDataTypes.R @@ -61,10 +61,11 @@ validateRDBESDataObjectDataTypes <- function(objectToCheck){ names(myDiffs)[which(names(myDiffs) == "RDataType.x")] <- "RDataType_expected" names(myDiffs)[which(names(myDiffs) == "RDataType.y")] <- "RDataType_actual" - # Let's say that if we were expecting a numeric but got an intger that it's ok - myDiffs <- - myDiffs[!(myDiffs$RDataType_expected == "numeric" & - myDiffs$RDataType_actual == "integer"),] + # Treat integer and numeric as compatible in both directions (issue #251) + myDiffs <- myDiffs[! + ((myDiffs$RDataType_expected == "numeric" & myDiffs$RDataType_actual == "integer") | + (myDiffs$RDataType_expected == "integer" & myDiffs$RDataType_actual == "numeric")) + ,] # Return the differences myDiffs diff --git a/tests/testthat/test-validateRDBESDataObjectDataTypes.R b/tests/testthat/test-validateRDBESDataObjectDataTypes.R index 2bf5baa2..5577d4d5 100644 --- a/tests/testthat/test-validateRDBESDataObjectDataTypes.R +++ b/tests/testthat/test-validateRDBESDataObjectDataTypes.R @@ -46,4 +46,29 @@ test_that("checkRDBESDataObjectDataTypes returns 2 differences for an object wit expect_equal(numberOfDifferences,2) }) +test_that("checkRDBESDataObjectDataTypes treats expected integer vs actual numeric as compatible (issue #251)", { + + # Here DEid, DEyear and DEhierarchy are provided as numeric (double) + # while the mapping expects integer. We want this to be considered OK. + myDE <- data.frame("DEid" = c(1,2), # numeric (not 1L, 2L) + "DErecType" = c("DE","DE"), + "DEsampScheme" = c("WGRDBES-EST TEST","WGRDBES-EST TEST"), + "DEsampSchemeType" = c("NatPilCF","NatPilCF"), + "DEyear" = c(1965,1965), # numeric + "DEstratumName" = c("1","2"), + "DEhierarchyCor" = c("Y","N"), + "DEhierarchy" = c(1,1), # numeric + "DEsamp" = c("Y","N"), + "DEnoSampReason" = c ("",""), + stringsAsFactors = FALSE + ) + + myObject <- list() + myObject[["DE"]] <- myDE + + myDiffs <- validateRDBESDataObjectDataTypes(myObject) + numberOfDifferences <- nrow(myDiffs) + expect_equal(numberOfDifferences,0) +}) + }) ## end capture.output From ee48e2a7786c2fad8bec7544079172207b020ba1 Mon Sep 17 00:00:00 2001 From: nmprista <31890653+nmprista@users.noreply.github.com> Date: Thu, 16 Oct 2025 17:24:54 +0200 Subject: [PATCH 22/30] update textbook data code to new RDBES version update to latest RDBES version (April 2025) code made more consistent and easy to compare between different scripts deleted old migration code created a new run_all script textbooks successfully uploaded 16/10/2025 #248 #247 --- .../BuildTextBookDataUpload_All.R | 11 + ...ookDataUpload_Pckg_SDAResources_agsrs_H1.R | 386 +----- ...kDataUpload_Pckg_SDAResources_agstrat_H1.R | 488 ++------ ...kDataUpload_Pckg_SDAResources_algebra_H1.R | 326 +---- ...ookDataUpload_Pckg_SDAResources_coots_H1.R | 328 +---- ...ad_Pckg_SDAResources_coots_multistage_H1.R | 1064 +++++++---------- ...tBookDataUpload_Pckg_SDAResources_gpa_H1.R | 431 ++----- ...kDataUpload_Pckg_SDAResources_schools_H1.R | 433 ++----- ...ookDataUpload_Pckg_survey_apiclus1_v2_H1.R | 521 +++----- ...xtBookDataUpload_Pckg_survey_apiclus2_H1.R | 569 +++------ ...ookDataUpload_Pckg_survey_apiclus2_v2_H1.R | 514 +++----- ...xtBookDataUpload_Pckg_survey_apistrat_H1.R | 502 ++------ ...kg_SDAResources_agstrat_H1_to_new_format.R | 50 - ...te_Pckg_survey_apiclus2_H1_to_new_format.R | 50 - ...te_Pckg_survey_apistrat_H1_to_new_format.R | 50 - 15 files changed, 1451 insertions(+), 4272 deletions(-) create mode 100644 data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_All.R delete mode 100644 data-raw/exampleData/TextBookExamples/Migrate_Pckg_SDAResources_agstrat_H1_to_new_format.R delete mode 100644 data-raw/exampleData/TextBookExamples/Migrate_Pckg_survey_apiclus2_H1_to_new_format.R delete mode 100644 data-raw/exampleData/TextBookExamples/Migrate_Pckg_survey_apistrat_H1_to_new_format.R diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_All.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_All.R new file mode 100644 index 00000000..b9e98c61 --- /dev/null +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_All.R @@ -0,0 +1,11 @@ +source("BuildTextBookDataUpload_Pckg_SDAResources_agsrs_H1.R") +source("BuildTextBookDataUpload_Pckg_SDAResources_agstrat_H1.R") +source("BuildTextBookDataUpload_Pckg_SDAResources_algebra_H1.R") +source("BuildTextBookDataUpload_Pckg_SDAResources_coots_H1.R") +source("BuildTextBookDataUpload_Pckg_SDAResources_coots_multistage_H1.R") +source("BuildTextBookDataUpload_Pckg_SDAResources_gpa_H1.R") +source("BuildTextBookDataUpload_Pckg_SDAResources_schools_H1.R") +source("BuildTextBookDataUpload_Pckg_survey_apiclus1_v2_H1.R") +source("BuildTextBookDataUpload_Pckg_survey_apiclus2_H1.R") +source("BuildTextBookDataUpload_Pckg_survey_apiclus2_v2_H1.R") +source("BuildTextBookDataUpload_Pckg_survey_apistrat_H1.R") diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_agsrs_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_agsrs_H1.R index c627b825..2999be55 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_agsrs_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_agsrs_H1.R @@ -63,22 +63,6 @@ -# 1 DEid [] - int -# 2 DErecordType [M] - string -# 3 DEsamplingScheme [M] - SamplingScheme -# 4 DEsamplingSchemeType [M] - SamplingSchemeType -# 5 DEyear [M] - Year -# 6 DEstratumName [M] - StringLength100 -# 7 DEhierarchyCorrect [M] - YesNoFields -# 8 DEhierarchy [M] - RDBESUpperHierarchy -# 9 DEsampled [DV,M] - YesNoFields -# 10 DEreasonNotSampled [DV,O] - ReasonForNotSampling -# 11 DEnonResponseCollected [DV,O] - YesNoFields -# 12 DEauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 13 DEauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 14 DEauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 15 DEauxiliaryVariableUnit[DV,O]-MUNIT - DE_df_base<-expand.grid(DEyear=DEyear, DEstratumName="U",stringsAsFactors=F) @@ -104,13 +88,6 @@ DE_df<-data.frame( #===SD============ - # x -# 1 SDid [M] - int -# 2 DEid [M] - int -# 3 SDrecordType [M] - string -# 4 SDcountry [M] - ISO_3166 -# 5 SDinstitution [M] - EDMO - SD_df<-data.frame( SDid=DE_df$DEid, DEid=DE_df$DEid, @@ -122,41 +99,6 @@ SD_df<-data.frame( #===VS============ - - # x -# 1 VSid [M] - int -# 2 SDid [M] - int -# 3 VDid [M] - int -# 4 TEid [M/O] - int -# 5 VSrecordType [M] - string -# 6 VSsequenceNumber [M] - int -# 7 VSencryptedVesselCode [M] - StringLength100 -# 8 VSstratification [DV,M] - YesNoFields -# 9 VSstratumName [DV,M] - StringLength100 -# 10 VSclustering [DV,M] - Clustering -# 11 VSclusterName [DV,M] - StringLength100 -# 12 VSsampler [O] - Sampler -# 13 VSnumberTotal [DV,O] - int -# 14 VSnumberSampled [DV,O] - int -# 15 VSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 16 VSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 17 VSselectionMethod [DV,M] - SelectionMethod -# 18 VSunitName [DV,M] - StringLength100 -# 19 VSselectionMethodCluster [DV,O] - SelectionMethod -# 20 VSnumberTotalClusters [DV,O] - int -# 21 VSnumberSampledClusters [DV,O] - int -# 22 VSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 23 VSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 24 VSsampled [DV,M] - YesNoFields -# 25 VSreasonNotSampled [DV,O] - ReasonForNotSampling -# 26 VSnonResponseCollected [DV,O] - YesNoFields -# 27 VSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 28 VSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 29 VSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 30 VSauxiliaryVariableUnit [DV,O] - MUNIT - -#check_All_fields("VS") - # adds VSid to dataset dataset$VSid <- 1:nrow(dataset) @@ -211,50 +153,6 @@ VS_df$VSnumberTotal<-3078 #====FT=========== - -# 1 FTid [M] - int -# 2 OSid [M/O] - int -# 3 VSid [M/O] - int -# 4 VDid [M] - int -# 5 SDid [M/O] - int -# 6 FOid [M/O] - int -# 7 TEid [M/O] - int -# 8 FTrecordType [M] - string -# 9 FTencryptedVesselCode [M] - StringLength100 -# 10 FTsequenceNumber [M] - int -# 11 FTstratification [DV,M] - YesNoFields -# 12 FTstratumName [DV,M] - StringLength100 -# 13 FTclustering [DV,M] - Clustering -# 14 FTclusterName [DV,M] - StringLength100 -# 15 FTsampler [O] - Sampler -# 16 FTsamplingType [M] - SamplingContext -# 17 FTnumberOfHaulsOrSets [M/O] - IntZeroToMax -# 18 FTdepartureLocation [M/O] - Harbour_LOCODE -# 19 FTdepartureDate [M/O] - Date -# 20 FTdepartureTime [M/O] - StringLength60 -# 21 FTarrivalLocation [M] - Harbour_LOCODE -# 22 FTarrivalDate [M] - Date -# 23 FTarrivalTime [M/O] - StringLength60 -# FTdominantLandingDate -# 24 FTnumberTotal [DV,O] - int -# 25 FTnumberSampled [DV,O] - int -# 26 FTselectionProb [DV,O] - Decimal0-1 -# 27 FTinclusionProb [DV,O] - Decimal0-1 -# 28 FTselectionMethod [DV,M] - SelectionMethod -# 29 FTunitName [DV,M] - StringLength100 -# 30 FTselectionMethodCluster [DV,O] - SelectionMethod -# 31 FTnumberTotalClusters [DV,O] - int -# 32 FTnumberSampledClusters [DV,O] - int -# 33 FTselectionProbCluster [DV,O] - Decimal0-1 -# 34 FTinclusionProbCluster [DV,O] - Decimal0-1 -# 35 FTsampled [DV,M] - YesNoFields -# 36 FTreasonNotSampled [DV,O] - ReasonForNotSampling -# 37 FTnonResponseCollected [DV,O] - YesNoFields -# 38 FTauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 39 FTauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 40 FTauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 41 FTauxiliaryVariableUnit [DV,O] - MUNIT - FT_df <- data.frame( FTid = dataset$VSid,#[M] - int OSid = "",# [M/O] - int @@ -305,73 +203,6 @@ FT_df <- data.frame( #====FO=========== -# 1 FOid [M] - int -# 2 FTid [M/O] - int -# 3 SDid [M/O] - int -# 4 FOrecordType [M] - string -# 5 FOstratification [DV,M] - YesNoFields -# 6 FOsequenceNumber [M] - int -# 7 FOstratumName [DV,M] - StringLength100 -# 8 FOclustering [DV,M] - Clustering -# 9 FOclusterName [DV,M] - StringLength100 -# 10 FOsampler [O] - Sampler -# 11 FOaggregationLevel [M] - AggregationLevel -# 12 FOvalidity [M] - ValidityFlag -# 13 FOcatchReg [M] - CatchRegistration -# 14 FOstartDate [O] - Date -# 15 FOstartTime [O] - Time -# 16 FOendDate [M] - Date -# 17 FOendTime [M/O] - Time -# 18 FOduration [M/O] - int -# FOfishingDurationDataBasis -# 19 FOdurationSource [M] - DurationSource -# 20 FOhandlingTime [O] - int -# 21 FOstartLat [O] - Decimal-90.000000-90.000000 -# 22 FOstartLon [O] - Decimal-180.000000-180.000000 -# 23 FOstopLat [O] - Decimal-90.000000-90.000000 -# 24 FOstopLon [O] - Decimal-180.000000-180.000000 -# 25 FOexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 26 FOarea [M] - ICES_Area -# 27 FOrectangle [O] - StatRec -# 28 FOfisheriesManagementUnit [O] - AreaNonFAO -# 29 FOgsaSubarea [M] - Areas_GFCM_GSA -# 30 FOjurisdictionArea [O] - JurisdictionArea -# 31 FOfishingDepth [O] - int -# 32 FOwaterDepth [O] - int -# 33 FOnationalFishingActivity [O] - NationalFishingActivity -# 34 FOmetier5 [O] - Metier5_FishingActivity -# 35 FOmetier6 [M] - Metier6_FishingActivity -# 36 FOgear [M] - GearType -# 37 FOmeshSize [O] - int -# 38 FOselectionDevice [O] - SelectionDevice -# 39 FOselectionDeviceMeshSize [O] - int -# 40 FOtargetSpecies [O] - TargetSpecies -# 41 FOincidentalByCatchMitigationDeviceFirst [M] - BycatchMitigationDevice -# 42 FOincidentalByCatchMitigationDeviceTargetFirst [M] - BycatchMitigationDeviceTarget -# 43 FOincidentalByCatchMitigationDeviceSecond [M] - BycatchMitigationDevice -# 44 FOincidentalByCatchMitigationDeviceTargetSecond [M] - BycatchMitigationDeviceTarget -# 45 FOgearDimensions [O] - int -# 46 FOobservationCode [M] - ObservationCode -# 47 FOnumberTotal [DV,O] - int -# 48 FOnumberSampled [DV,O] - int -# 49 FOselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 50 FOinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 51 FOselectionMethod [DV,M] - SelectionMethod -# 52 FOunitName [DV,M] - StringLength100 -# 53 FOselectionMethodCluster [DV,O] - SelectionMethod -# 54 FOnumberTotalClusters [DV,O] - int -# 55 FOnumberSampledClusters [DV,O] - int -# 56 FOselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 57 FOinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 58 FOsampled [DV,M] - YesNoFields -# 59 FOreasonNotSampled [DV,O] - ReasonForNotSampling -# 60 FOnonResponseCollected [DV,O] - YesNoFields -# 61 FOauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 62 FOauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 63 FOauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 64 FOauxiliaryVariableUnit [DV,O] - MUNIT - - FO_df <- data.frame( FOid = dataset$VSid, FTid = dataset$VSid, @@ -398,14 +229,14 @@ FO_df <- data.frame( FOstartLon="", # ATT! FOstopLat="", FOstopLon="", - FOexclusiveEconomicZoneIndicator = "", # might differ!! + FOexclusiveEconomicZoneIndicator = "", # FOarea = "27.3.a.21", #M FOrectangle = "", FOfisheriesManagementUnit = "", FOgsaSubarea = "NotApplicable", #M FOjurisdictionArea = "", FOgeographicalDataBasis = "Measured", - FOgeographicalSource = "", + FOgeographicalSource = "Crew", FOfishingDepth = "", FOwaterDepth = "", FOnationalFishingActivity = "", @@ -448,45 +279,6 @@ stringsAsFactors=FALSE #===SS============ -# 1 SSid [] - int -# 2 LEid [M/O] - int -# 3 FOid [M/O] - int -# 4 FTid [M/O] - int -# 5 OSid [M/O] - int -# 6 TEid [M/O] - int -# 7 SLid [M] - int -# 8 SSrecordType [M] - string -# 9 SSsequenceNumber [M] - int -# 10 SSstratification [DV,M] - YesNoFields -# 11 SSstratumName [DV,M] - StringLength100 -# 12 SSclustering [DV,M] - Clustering -# 13 SSclusterName [DV,M] - StringLength100 -# 14 SSobservationActivityType [DV,M] - ObservationActivityType -# 15 SScatchFraction [M] - CatchFraction -# 16 SSobservationType [DV,M] - ObservationMethod -# 17 SSsampler [O] - Sampler -# 18 SSspeciesListName [M] - StringLength100 -# 19 SSuseForCalculateZero [M] - YesNoFields -# 20 SStimeTotal [O] - int -# 21 SStimeSampled [O] - int -# 22 SSnumberTotal [DV,O] - int -# 23 SSnumberSampled [DV,O] - int -# 24 SSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 25 SSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 26 SSselectionMethod [DV,M] - SelectionMethod -# 27 SSunitName [DV,M] - StringLength100 -# 28 SSselectionMethodCluster [DV,O] - SelectionMethod -# 29 SSnumberTotalClusters [DV,O] - int -# 30 SSnumberSampledClusters [DV,O] - int -# 31 SSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 32 SSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 33 SSsampled [DV,M] - YesNoFields -# 34 SSreasonNotSampled [DV,O] - ReasonForNotSampling -# 35 SSnonResponseCollected [DV,O] - YesNoFields -# 36 SSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 37 SSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 38 SSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 39 SSauxiliaryVariableUnit [DV,O] - MUNIT SS_df<-data.frame( SSid = dataset$VSid, @@ -535,122 +327,66 @@ SS_df<-data.frame( #====SA=========== - -# 1 SAid [] - int -# 2 SSid [] - int -# 3 SArecordType [M] - string -# 4 SAsequenceNumber [M] - int -# 5 SAparentSequenceNumber [O] - int -# 6 SAstratification [DV,M] - YesNoFields -# 7 SAstratumName [DV,M] - StringLength100 -# 8 SAspeciesCode [M] - SpecWoRMS -# 9 SAspeciesCodeFAO [O] - SpecASFIS -# 10 SAstateOfProcessing [M] - StateOfProcessing -# 11 SApresentation [M] - ProductPresentation -# 12 SAspecimensState [M] - SpecimensState -# 13 SAcatchCategory [M] - CatchCategory -# 14 SAlandingCategory [O] - LandingCategory -# 15 SAcommSizeCatScale [O] - CommercialSizeCategoryScale -# 16 SAcommSizeCat [O] - CommercialSizeCategory -# 17 SAsex [M] - SEXCO -# 18 SAexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 19 SAarea [O] - ICES_Area -# 20 SArectangle [O] - StatRec -# 21 SAfisheriesManagementUnit [O] - AreaNonFAO -# 22 SAgsaSubarea [M] - Areas_GFCM_GSA -# 23 SAjurisdictionArea [O] - JurisdictionArea -# 24 SAnationalFishingActivity [O] - NationalFishingActivity -# 25 SAmetier5 [O] - Metier5_FishingActivity -# 26 SAmetier6 [O] - Metier6_FishingActivity -# 27 SAgear [O] - GearType -# 28 SAmeshSize [O] - int -# 29 SAselectionDevice [O] - SelectionDevice -# 30 SAselectionDeviceMeshSize [O] - int -# 31 SAunitType [M] - SamplingUnit -# 32 SAtotalWeightLive [O] - int -# 33 SAsampleWeightLive [O] - int -# 34 SAnumberTotal [DV,O] - Decimal0.1-999999999 -# 35 SAnumberSampled [DV,O] - Decimal0.1-999999999 -# 36 SAselectionProb [DV,O] - Decimal0-1 -# 37 SAinclusionProb [DV,O] - Decimal0-1 -# 38 SAselectionMethod [DV,M] - SelectionMethod -# 39 SAunitName [DV,M] - StringLength100 -# 40 SAlowerHierarchy [M/O] - LowerHierarchy -# 41 SAsampler [O] - Sampler -# 42 SAsampled [DV,M] - YesNoFields -# 43 SAreasonNotSampled [DV,O] - ReasonForNotSampling -# 44 SAnonResponseCollected [DV,O] - YesNoFields -# 45 SAreasonNotSampledFM [DV,O] - ReasonForNotSampling -# 46 SAreasonNotSampledBV [DV,O] - ReasonForNotSampling -# 47 SAtotalWeightMeasured [O] - int -# 48 SAsampleWeightMeasured [O] - int -# 49 SAconversionFactorMeasLive [O] - Decimal0.900-10 -# 50 SAauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 51 SAauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 52 SAauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 53 SAauxiliaryVariableUnit [DV,O] - MUNIT - - SA_df<-data.frame( - SAid = dataset$VSid, - SSid = dataset$VSid, - SArecordType = "SA", #M - SAsequenceNumber = dataset$VSid, #M - SAparentSequenceNumber = "", - SAstratification = "N", #M - SAstratumName = "U", #M - SAspeciesCode = 107254, #M - SAspeciesCodeFAO = "NEP", - SAstateOfProcessing = "FRE", #M - SApresentation = "WHL", #M - SAspecimensState = "DeadOrZeroProbSurvival", #M - SAcatchCategory = "Lan", #M - SAlandingCategory = "", - SAcommSizeCatScale = "", - SAcommSizeCat = "", - SAsex = "U", #M - SAexclusiveEconomicZoneIndicator = "", - SAarea = ifelse(dataset$farmcat=='large','27.4.a','27.3.a.20'), # this is the domain - SArectangle = "", - SAfisheriesManagementUnit = "", - SAgsaSubarea = "NotApplicable", #M - SAjurisdictionArea = "", - SAgeographicalDataBasis = "Measured", - SAgeographicalSource = "", - SAnationalFishingActivity = "", - SAmetier5 = "", - SAmetier6 = "", - SAgear = "", - SAgearDataBasis = "Measured", - SAgearSource = "", - SAmeshSize = "", - SAselectionDevice = "", - SAselectionDeviceMeshSize = "", - SAunitType = "Box", #M - SAtotalWeightLive = "", - SAsampleWeightLive = "", - SAnumberTotal = 1, - SAnumberSampled = 1, - SAselectionProb = 1, - SAinclusionProb = 1, - SAselectionMethod = "CENSUS", #M - SAunitName = dataset$VSid, #M - SAlowerHierarchy = "D", - SAsampler = "Observer", - SAsampled = "N", #M - SAreasonNotSampled = "", - SAnonResponseCollected = "N", - SAreasonNotSampledFM = "", - SAreasonNotSampledBV = "", - SAtotalWeightMeasured = dataset[[target_var]], - SAtotalWeightMeasuredDataBasis = "Measured", - SAsampleWeightMeasured = dataset[[target_var]], - SAconversionFactorMeasLive = 1, - SAauxiliaryVariableTotal = "", - SAauxiliaryVariableValue = "", - SAauxiliaryVariableName = "", - SAauxiliaryVariableUnit = "", - stringsAsFactors=FALSE +SAid = dataset$VSid, +SSid = dataset$VSid, +SArecordType = "SA", #M +SAsequenceNumber = dataset$VSid, #M +SAparentSequenceNumber = "", +SAstratification = "N", #M +SAstratumName = "U", #M +SAspeciesCode = 107254, #M +SAspeciesCodeFAO = "NEP", +SAstateOfProcessing = "FRE", #M +SApresentation = "WHL", #M +SAspecimensState = "DeadOrZeroProbSurvival", #M +SAcatchCategory = "Lan", #M +SAlandingCategory = "", +SAcommSizeCatScale = "", +SAcommSizeCat = "", +SAsex = "U", #M +SAexclusiveEconomicZoneIndicator = "", +SAarea = ifelse(dataset$farmcat=='large','27.4.a','27.3.a.20'), # this is the domain +SArectangle = "", +SAfisheriesManagementUnit = "", +SAgsaSubarea = "NotApplicable", #M +SAjurisdictionArea = "", +SAgeographicalDataBasis = "Measured", +SAgeographicalSource = "", +SAnationalFishingActivity = "", +SAmetier5 = "", +SAmetier6 = "", +SAgear = "", +SAgearDataBasis = "Measured", +SAgearSource = "", +SAmeshSize = "", +SAselectionDevice = "", +SAselectionDeviceMeshSize = "", +SAunitType = "Box", #M +SAtotalWeightLive = "", +SAsampleWeightLive = "", +SAnumberTotal = 1, +SAnumberSampled = 1, +SAselectionProb = 1, +SAinclusionProb = 1, +SAselectionMethod = "CENSUS", #M +SAunitName = dataset$VSid, #M +SAlowerHierarchy = "D", +SAsampler = "Observer", +SAsampled = "N", #M +SAreasonNotSampled = "", +SAnonResponseCollected = "N", +SAreasonNotSampledFM = "", +SAreasonNotSampledBV = "", +SAtotalWeightMeasured = dataset[[target_var]], +SAtotalWeightMeasuredDataBasis = "Measured", +SAsampleWeightMeasured = dataset[[target_var]], +SAconversionFactorMeasLive = 1, +SAauxiliaryVariableTotal = "", +SAauxiliaryVariableValue = "", +SAauxiliaryVariableName = "", +SAauxiliaryVariableUnit = "", +stringsAsFactors=FALSE ) diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_agstrat_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_agstrat_H1.R index a15781af..dba43362 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_agstrat_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_agstrat_H1.R @@ -59,22 +59,6 @@ -# 1 DEid [] - int -# 2 DErecordType [M] - string -# 3 DEsamplingScheme [M] - SamplingScheme -# 4 DEsamplingSchemeType [M] - SamplingSchemeType -# 5 DEyear [M] - Year -# 6 DEstratumName [M] - StringLength100 -# 7 DEhierarchyCorrect [M] - YesNoFields -# 8 DEhierarchy [M] - RDBESUpperHierarchy -# 9 DEsampled [DV,M] - YesNoFields -# 10 DEreasonNotSampled [DV,O] - ReasonForNotSampling -# 11 DEnonResponseCollected [DV,O] - YesNoFields -# 12 DEauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 13 DEauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 14 DEauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 15 DEauxiliaryVariableUnit[DV,O]-MUNIT - DE_df_base<-expand.grid(DEyear=DEyear, DEstratumName="U",stringsAsFactors=F) @@ -100,13 +84,6 @@ DE_df<-data.frame( #===SD============ - # x -# 1 SDid [M] - int -# 2 DEid [M] - int -# 3 SDrecordType [M] - string -# 4 SDcountry [M] - ISO_3166 -# 5 SDinstitution [M] - EDMO - SD_df<-data.frame( SDid=DE_df$DEid, DEid=DE_df$DEid, @@ -118,41 +95,6 @@ SD_df<-data.frame( #===VS============ - - # x -# 1 VSid [M] - int -# 2 SDid [M] - int -# 3 VDid [M] - int -# 4 TEid [M/O] - int -# 5 VSrecordType [M] - string -# 6 VSsequenceNumber [M] - int -# 7 VSencryptedVesselCode [M] - StringLength100 -# 8 VSstratification [DV,M] - YesNoFields -# 9 VSstratumName [DV,M] - StringLength100 -# 10 VSclustering [DV,M] - Clustering -# 11 VSclusterName [DV,M] - StringLength100 -# 12 VSsampler [O] - Sampler -# 13 VSnumberTotal [DV,O] - int -# 14 VSnumberSampled [DV,O] - int -# 15 VSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 16 VSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 17 VSselectionMethod [DV,M] - SelectionMethod -# 18 VSunitName [DV,M] - StringLength100 -# 19 VSselectionMethodCluster [DV,O] - SelectionMethod -# 20 VSnumberTotalClusters [DV,O] - int -# 21 VSnumberSampledClusters [DV,O] - int -# 22 VSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 23 VSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 24 VSsampled [DV,M] - YesNoFields -# 25 VSreasonNotSampled [DV,O] - ReasonForNotSampling -# 26 VSnonResponseCollected [DV,O] - YesNoFields -# 27 VSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 28 VSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 29 VSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 30 VSauxiliaryVariableUnit [DV,O] - MUNIT - -#check_All_fields("VS") - # adds VSid to dataset dataset$VSid <- 1:nrow(dataset) @@ -207,167 +149,56 @@ VS_df$VSnumberTotal<-strataSize[VS_df$VSstratumName] #====FT=========== - -# 1 FTid [M] - int -# 2 OSid [M/O] - int -# 3 VSid [M/O] - int -# 4 VDid [M] - int -# 5 SDid [M/O] - int -# 6 FOid [M/O] - int -# 7 TEid [M/O] - int -# 8 FTrecordType [M] - string -# 9 FTencryptedVesselCode [M] - StringLength100 -# 10 FTsequenceNumber [M] - int -# 11 FTstratification [DV,M] - YesNoFields -# 12 FTstratumName [DV,M] - StringLength100 -# 13 FTclustering [DV,M] - Clustering -# 14 FTclusterName [DV,M] - StringLength100 -# 15 FTsampler [O] - Sampler -# 16 FTsamplingType [M] - SamplingContext -# 17 FTnumberOfHaulsOrSets [M/O] - IntZeroToMax -# 18 FTdepartureLocation [M/O] - Harbour_LOCODE -# 19 FTdepartureDate [M/O] - Date -# 20 FTdepartureTime [M/O] - StringLength60 -# 21 FTarrivalLocation [M] - Harbour_LOCODE -# 22 FTarrivalDate [M] - Date -# 23 FTarrivalTime [M/O] - StringLength60 -# FTdominantLandingDate -# 24 FTnumberTotal [DV,O] - int -# 25 FTnumberSampled [DV,O] - int -# 26 FTselectionProb [DV,O] - Decimal0-1 -# 27 FTinclusionProb [DV,O] - Decimal0-1 -# 28 FTselectionMethod [DV,M] - SelectionMethod -# 29 FTunitName [DV,M] - StringLength100 -# 30 FTselectionMethodCluster [DV,O] - SelectionMethod -# 31 FTnumberTotalClusters [DV,O] - int -# 32 FTnumberSampledClusters [DV,O] - int -# 33 FTselectionProbCluster [DV,O] - Decimal0-1 -# 34 FTinclusionProbCluster [DV,O] - Decimal0-1 -# 35 FTsampled [DV,M] - YesNoFields -# 36 FTreasonNotSampled [DV,O] - ReasonForNotSampling -# 37 FTnonResponseCollected [DV,O] - YesNoFields -# 38 FTauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 39 FTauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 40 FTauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 41 FTauxiliaryVariableUnit [DV,O] - MUNIT - FT_df <- data.frame( - FTid = dataset$VSid,#[M] - int - OSid = "",# [M/O] - int - VSid = dataset$VSid, #[M/O] - VDid = "", #[M] - int - SDid = "", #[M/O] - int - FOid = "", #[M/O] - int - TEid = "", #[M/O] - int - FTrecordType='FT', #[M] - string - FTencryptedVesselCode = dataset$VSencryptedVesselCode, #[M] - FTsequenceNumber = as.integer(1:nrow(dataset)), #[M] - string - FTstratification = "N", #[DV,M] - RS_Stratification - FTstratumName = "U", #[DV,M] - string - FTclustering = "N", #[DV,M] - RS_Clustering - FTclusterName = "U", #[DV,M] - string - FTsampler = "Observer", #[M] - RS_Sampler - FTsamplingType = "AtSea" , #[M] - RS_SamplingType - FTnumberOfHaulsOrSets = 1, #[O] - int - FTdepartureLocation="ZWHWN", #[O] - Harbour_LOCODE - FTdepartureDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M/O] - date - FTdepartureTime="", #[O] - time - FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE - FTarrivalDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date - FTarrivalTime="", #[O] - time - FTdominantLandingDate= "", - FTnumberTotal= 1, #[DV,O] - int - FTnumberSampled=1, #[DV,O] - int - FTselectionProb=1, #[DV,O] - DecimalPrec10 - FTinclusionProb=1, #[DV,O] - DecimalPrec10 - FTselectionMethod="CENSUS", #[DV,M] - RS_SelectionMethod - FTunitName = dataset$VSid, #[DV,M] - string - FTselectionMethodCluster="", #[DV,O] - RS_SelectionMethod - FTnumberTotalClusters="", #[DV,O] - int - FTnumberSampledClusters="", #[DV,O] - int - FTselectionProbCluster="", #[DV,O] - DecimalPrec10 - FTinclusionProbCluster="", #[DV,O] - DecimalPrec10 - FTsampled="Y", #[DV,M] - YesNoFields - FTreasonNotSampled= "", #[DV,O] - RS_ReasonForNotSampling - FTnonResponseCollected = "N", - FTauxiliaryVariableTotal = "", - FTauxiliaryVariableValue = "", - FTauxiliaryVariableName = "", - FTauxiliaryVariableUnit = "", - stringsAsFactors=FALSE +FTid = dataset$VSid,#[M] - int +OSid = "",# [M/O] - int +VSid = dataset$VSid, #[M/O] +VDid = "", #[M] - int +SDid = "", #[M/O] - int +FOid = "", #[M/O] - int +TEid = "", #[M/O] - int +FTrecordType='FT', #[M] - string +FTencryptedVesselCode = dataset$VSencryptedVesselCode, #[M] +FTsequenceNumber = as.integer(1:nrow(dataset)), #[M] - string +FTstratification = "N", #[DV,M] - RS_Stratification +FTstratumName = "U", #[DV,M] - string +FTclustering = "N", #[DV,M] - RS_Clustering +FTclusterName = "U", #[DV,M] - string +FTsampler = "Observer", #[M] - RS_Sampler +FTsamplingType = "AtSea" , #[M] - RS_SamplingType +FTnumberOfHaulsOrSets = 1, #[O] - int +FTdepartureLocation="ZWHWN", #[O] - Harbour_LOCODE +FTdepartureDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M/O] - date +FTdepartureTime="", #[O] - time +FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE +FTarrivalDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date +FTarrivalTime="", #[O] - time +FTdominantLandingDate= "", +FTnumberTotal= 1, #[DV,O] - int +FTnumberSampled=1, #[DV,O] - int +FTselectionProb=1, #[DV,O] - DecimalPrec10 +FTinclusionProb=1, #[DV,O] - DecimalPrec10 +FTselectionMethod="CENSUS", #[DV,M] - RS_SelectionMethod +FTunitName = dataset$VSid, #[DV,M] - string +FTselectionMethodCluster="", #[DV,O] - RS_SelectionMethod +FTnumberTotalClusters="", #[DV,O] - int +FTnumberSampledClusters="", #[DV,O] - int +FTselectionProbCluster="", #[DV,O] - DecimalPrec10 +FTinclusionProbCluster="", #[DV,O] - DecimalPrec10 +FTsampled="Y", #[DV,M] - YesNoFields +FTreasonNotSampled= "", #[DV,O] - RS_ReasonForNotSampling +FTnonResponseCollected = "N", +FTauxiliaryVariableTotal = "", +FTauxiliaryVariableValue = "", +FTauxiliaryVariableName = "", +FTauxiliaryVariableUnit = "", +stringsAsFactors=FALSE ) #====FO=========== -# 1 FOid [M] - int -# 2 FTid [M/O] - int -# 3 SDid [M/O] - int -# 4 FOrecordType [M] - string -# 5 FOstratification [DV,M] - YesNoFields -# 6 FOsequenceNumber [M] - int -# 7 FOstratumName [DV,M] - StringLength100 -# 8 FOclustering [DV,M] - Clustering -# 9 FOclusterName [DV,M] - StringLength100 -# 10 FOsampler [O] - Sampler -# 11 FOaggregationLevel [M] - AggregationLevel -# 12 FOvalidity [M] - ValidityFlag -# 13 FOcatchReg [M] - CatchRegistration -# 14 FOstartDate [O] - Date -# 15 FOstartTime [O] - Time -# 16 FOendDate [M] - Date -# 17 FOendTime [M/O] - Time -# 18 FOduration [M/O] - int -# FOfishingDurationDataBasis -# 19 FOdurationSource [M] - DurationSource -# 20 FOhandlingTime [O] - int -# 21 FOstartLat [O] - Decimal-90.000000-90.000000 -# 22 FOstartLon [O] - Decimal-180.000000-180.000000 -# 23 FOstopLat [O] - Decimal-90.000000-90.000000 -# 24 FOstopLon [O] - Decimal-180.000000-180.000000 -# 25 FOexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 26 FOarea [M] - ICES_Area -# 27 FOrectangle [O] - StatRec -# 28 FOfisheriesManagementUnit [O] - AreaNonFAO -# 29 FOgsaSubarea [M] - Areas_GFCM_GSA -# 30 FOjurisdictionArea [O] - JurisdictionArea -# 31 FOfishingDepth [O] - int -# 32 FOwaterDepth [O] - int -# 33 FOnationalFishingActivity [O] - NationalFishingActivity -# 34 FOmetier5 [O] - Metier5_FishingActivity -# 35 FOmetier6 [M] - Metier6_FishingActivity -# 36 FOgear [M] - GearType -# 37 FOmeshSize [O] - int -# 38 FOselectionDevice [O] - SelectionDevice -# 39 FOselectionDeviceMeshSize [O] - int -# 40 FOtargetSpecies [O] - TargetSpecies -# 41 FOincidentalByCatchMitigationDeviceFirst [M] - BycatchMitigationDevice -# 42 FOincidentalByCatchMitigationDeviceTargetFirst [M] - BycatchMitigationDeviceTarget -# 43 FOincidentalByCatchMitigationDeviceSecond [M] - BycatchMitigationDevice -# 44 FOincidentalByCatchMitigationDeviceTargetSecond [M] - BycatchMitigationDeviceTarget -# 45 FOgearDimensions [O] - int -# 46 FOobservationCode [M] - ObservationCode -# 47 FOnumberTotal [DV,O] - int -# 48 FOnumberSampled [DV,O] - int -# 49 FOselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 50 FOinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 51 FOselectionMethod [DV,M] - SelectionMethod -# 52 FOunitName [DV,M] - StringLength100 -# 53 FOselectionMethodCluster [DV,O] - SelectionMethod -# 54 FOnumberTotalClusters [DV,O] - int -# 55 FOnumberSampledClusters [DV,O] - int -# 56 FOselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 57 FOinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 58 FOsampled [DV,M] - YesNoFields -# 59 FOreasonNotSampled [DV,O] - ReasonForNotSampling -# 60 FOnonResponseCollected [DV,O] - YesNoFields -# 61 FOauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 62 FOauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 63 FOauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 64 FOauxiliaryVariableUnit [DV,O] - MUNIT - - FO_df <- data.frame( FOid = dataset$VSid, FTid = dataset$VSid, @@ -394,14 +225,14 @@ FO_df <- data.frame( FOstartLon="", # ATT! FOstopLat="", FOstopLon="", - FOexclusiveEconomicZoneIndicator = "", # might differ!! + FOexclusiveEconomicZoneIndicator = "", # FOarea = "27.3.a.21", #M FOrectangle = "", FOfisheriesManagementUnit = "", FOgsaSubarea = "NotApplicable", #M FOjurisdictionArea = "", FOgeographicalDataBasis = "Measured", - FOgeographicalSource = "", + FOgeographicalSource = "Crew", FOfishingDepth = "", FOwaterDepth = "", FOnationalFishingActivity = "", @@ -444,45 +275,6 @@ stringsAsFactors=FALSE #===SS============ -# 1 SSid [] - int -# 2 LEid [M/O] - int -# 3 FOid [M/O] - int -# 4 FTid [M/O] - int -# 5 OSid [M/O] - int -# 6 TEid [M/O] - int -# 7 SLid [M] - int -# 8 SSrecordType [M] - string -# 9 SSsequenceNumber [M] - int -# 10 SSstratification [DV,M] - YesNoFields -# 11 SSstratumName [DV,M] - StringLength100 -# 12 SSclustering [DV,M] - Clustering -# 13 SSclusterName [DV,M] - StringLength100 -# 14 SSobservationActivityType [DV,M] - ObservationActivityType -# 15 SScatchFraction [M] - CatchFraction -# 16 SSobservationType [DV,M] - ObservationMethod -# 17 SSsampler [O] - Sampler -# 18 SSspeciesListName [M] - StringLength100 -# 19 SSuseForCalculateZero [M] - YesNoFields -# 20 SStimeTotal [O] - int -# 21 SStimeSampled [O] - int -# 22 SSnumberTotal [DV,O] - int -# 23 SSnumberSampled [DV,O] - int -# 24 SSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 25 SSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 26 SSselectionMethod [DV,M] - SelectionMethod -# 27 SSunitName [DV,M] - StringLength100 -# 28 SSselectionMethodCluster [DV,O] - SelectionMethod -# 29 SSnumberTotalClusters [DV,O] - int -# 30 SSnumberSampledClusters [DV,O] - int -# 31 SSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 32 SSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 33 SSsampled [DV,M] - YesNoFields -# 34 SSreasonNotSampled [DV,O] - ReasonForNotSampling -# 35 SSnonResponseCollected [DV,O] - YesNoFields -# 36 SSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 37 SSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 38 SSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 39 SSauxiliaryVariableUnit [DV,O] - MUNIT SS_df<-data.frame( SSid = dataset$VSid, @@ -531,122 +323,66 @@ SS_df<-data.frame( #====SA=========== - -# 1 SAid [] - int -# 2 SSid [] - int -# 3 SArecordType [M] - string -# 4 SAsequenceNumber [M] - int -# 5 SAparentSequenceNumber [O] - int -# 6 SAstratification [DV,M] - YesNoFields -# 7 SAstratumName [DV,M] - StringLength100 -# 8 SAspeciesCode [M] - SpecWoRMS -# 9 SAspeciesCodeFAO [O] - SpecASFIS -# 10 SAstateOfProcessing [M] - StateOfProcessing -# 11 SApresentation [M] - ProductPresentation -# 12 SAspecimensState [M] - SpecimensState -# 13 SAcatchCategory [M] - CatchCategory -# 14 SAlandingCategory [O] - LandingCategory -# 15 SAcommSizeCatScale [O] - CommercialSizeCategoryScale -# 16 SAcommSizeCat [O] - CommercialSizeCategory -# 17 SAsex [M] - SEXCO -# 18 SAexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 19 SAarea [O] - ICES_Area -# 20 SArectangle [O] - StatRec -# 21 SAfisheriesManagementUnit [O] - AreaNonFAO -# 22 SAgsaSubarea [M] - Areas_GFCM_GSA -# 23 SAjurisdictionArea [O] - JurisdictionArea -# 24 SAnationalFishingActivity [O] - NationalFishingActivity -# 25 SAmetier5 [O] - Metier5_FishingActivity -# 26 SAmetier6 [O] - Metier6_FishingActivity -# 27 SAgear [O] - GearType -# 28 SAmeshSize [O] - int -# 29 SAselectionDevice [O] - SelectionDevice -# 30 SAselectionDeviceMeshSize [O] - int -# 31 SAunitType [M] - SamplingUnit -# 32 SAtotalWeightLive [O] - int -# 33 SAsampleWeightLive [O] - int -# 34 SAnumberTotal [DV,O] - Decimal0.1-999999999 -# 35 SAnumberSampled [DV,O] - Decimal0.1-999999999 -# 36 SAselectionProb [DV,O] - Decimal0-1 -# 37 SAinclusionProb [DV,O] - Decimal0-1 -# 38 SAselectionMethod [DV,M] - SelectionMethod -# 39 SAunitName [DV,M] - StringLength100 -# 40 SAlowerHierarchy [M/O] - LowerHierarchy -# 41 SAsampler [O] - Sampler -# 42 SAsampled [DV,M] - YesNoFields -# 43 SAreasonNotSampled [DV,O] - ReasonForNotSampling -# 44 SAnonResponseCollected [DV,O] - YesNoFields -# 45 SAreasonNotSampledFM [DV,O] - ReasonForNotSampling -# 46 SAreasonNotSampledBV [DV,O] - ReasonForNotSampling -# 47 SAtotalWeightMeasured [O] - int -# 48 SAsampleWeightMeasured [O] - int -# 49 SAconversionFactorMeasLive [O] - Decimal0.900-10 -# 50 SAauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 51 SAauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 52 SAauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 53 SAauxiliaryVariableUnit [DV,O] - MUNIT - - SA_df<-data.frame( - SAid = dataset$VSid, - SSid = dataset$VSid, - SArecordType = "SA", #M - SAsequenceNumber = dataset$VSid, #M - SAparentSequenceNumber = "", - SAstratification = "N", #M - SAstratumName = "U", #M - SAspeciesCode = 107254, #M - SAspeciesCodeFAO = "NEP", - SAstateOfProcessing = "FRE", #M - SApresentation = "WHL", #M - SAspecimensState = "DeadOrZeroProbSurvival", #M - SAcatchCategory = "Lan", #M - SAlandingCategory = "", - SAcommSizeCatScale = "", - SAcommSizeCat = "", - SAsex = "U", #M - SAexclusiveEconomicZoneIndicator = "", - SAarea = "", - SArectangle = "", - SAfisheriesManagementUnit = "", - SAgsaSubarea = "NotApplicable", #M - SAjurisdictionArea = "", - SAgeographicalDataBasis = "Measured", - SAgeographicalSource = "", - SAnationalFishingActivity = "", - SAmetier5 = "", - SAmetier6 = "", - SAgear = "", - SAgearDataBasis = "Measured", - SAgearSource = "", - SAmeshSize = "", - SAselectionDevice = "", - SAselectionDeviceMeshSize = "", - SAunitType = "Box", #M - SAtotalWeightLive = "", - SAsampleWeightLive = "", - SAnumberTotal = 1, - SAnumberSampled = 1, - SAselectionProb = 1, - SAinclusionProb = 1, - SAselectionMethod = "CENSUS", #M - SAunitName = dataset$VSid, #M - SAlowerHierarchy = "D", - SAsampler = "Observer", - SAsampled = "N", #M - SAreasonNotSampled = "", - SAnonResponseCollected = "N", - SAreasonNotSampledFM = "", - SAreasonNotSampledBV = "", - SAtotalWeightMeasured = dataset[[target_var]], - SAtotalWeightMeasuredDataBasis = "Measured", - SAsampleWeightMeasured = dataset[[target_var]], - SAconversionFactorMeasLive = 1, - SAauxiliaryVariableTotal = "", - SAauxiliaryVariableValue = "", - SAauxiliaryVariableName = "", - SAauxiliaryVariableUnit = "", - stringsAsFactors=FALSE +SAid = dataset$VSid, +SSid = dataset$VSid, +SArecordType = "SA", #M +SAsequenceNumber = dataset$VSid, #M +SAparentSequenceNumber = "", +SAstratification = "N", #M +SAstratumName = "U", #M +SAspeciesCode = 107254, #M +SAspeciesCodeFAO = "NEP", +SAstateOfProcessing = "FRE", #M +SApresentation = "WHL", #M +SAspecimensState = "DeadOrZeroProbSurvival", #M +SAcatchCategory = "Lan", #M +SAlandingCategory = "", +SAcommSizeCatScale = "", +SAcommSizeCat = "", +SAsex = "U", #M +SAexclusiveEconomicZoneIndicator = "", +SAarea = "", +SArectangle = "", +SAfisheriesManagementUnit = "", +SAgsaSubarea = "NotApplicable", #M +SAjurisdictionArea = "", +SAgeographicalDataBasis = "Measured", +SAgeographicalSource = "", +SAnationalFishingActivity = "", +SAmetier5 = "", +SAmetier6 = "", +SAgear = "", +SAgearDataBasis = "Measured", +SAgearSource = "", +SAmeshSize = "", +SAselectionDevice = "", +SAselectionDeviceMeshSize = "", +SAunitType = "Box", #M +SAtotalWeightLive = "", +SAsampleWeightLive = "", +SAnumberTotal = 1, +SAnumberSampled = 1, +SAselectionProb = 1, +SAinclusionProb = 1, +SAselectionMethod = "CENSUS", #M +SAunitName = dataset$VSid, #M +SAlowerHierarchy = "D", +SAsampler = "Observer", +SAsampled = "N", #M +SAreasonNotSampled = "", +SAnonResponseCollected = "N", +SAreasonNotSampledFM = "", +SAreasonNotSampledBV = "", +SAtotalWeightMeasured = dataset[[target_var]], +SAtotalWeightMeasuredDataBasis = "Measured", +SAsampleWeightMeasured = dataset[[target_var]], +SAconversionFactorMeasLive = 1, +SAauxiliaryVariableTotal = "", +SAauxiliaryVariableValue = "", +SAauxiliaryVariableName = "", +SAauxiliaryVariableUnit = "", +stringsAsFactors=FALSE ) @@ -656,20 +392,6 @@ SA_df<-data.frame( RDBESlist = list(DE = DE_df,SD = SD_df, VS = VS_df, FT = FT_df, FO = FO_df, SS = SS_df, SA = SA_df) -if(1==2){ -a<-RDBESlist -a[c("TE","LO","OS","LE","CL","CE","BV","FM")] <- NULL -a$SA[c("SAgeoDatBas","SAgeoSou","SAgeaDatBas","SAgearSou","SAtotWtMeaDatBas")] <- NA -a$FO[c("FOfishDuraDatBas","FOgeoDatBas","FOgeoSou","FOgeaDatBas","FOgearSou")] <- NA -a$SS[c("SStimeTotalDatBas","SSnumTotalDatBas")] <- NA -a$FT$FTdomLanDate <- NA -#a$IS <- data.frame(ISid = 1, SLid = 47865, IScommTaxon = 107254, ISsppCode = 107254) -a$SL[c("SLcommTaxon","SLsppCode")] <- NULL -Pckg_SDAResources_agstrat_H1 <- createRDBESDataObject(a, verbose = F) -usethis::use_data(Pckg_SDAResources_agstrat_H1, overwrite = TRUE) - -stop("The new code stops here!") -} #id table a<-merge(DE_df["DEid"],SD_df[c("DEid","SDid")]) a<-merge(a, VS_df[c("SDid","VSid")], all.x=T) @@ -760,6 +482,8 @@ write.table(b$V1, file = filename_output_CS, col.names=FALSE, row.names = FALSE, + + # check - is this needed? if(1==2){ diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_algebra_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_algebra_H1.R index 6cec408f..9ac961f7 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_algebra_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_algebra_H1.R @@ -69,22 +69,6 @@ -# 1 DEid [] - int -# 2 DErecordType [M] - string -# 3 DEsamplingScheme [M] - SamplingScheme -# 4 DEsamplingSchemeType [M] - SamplingSchemeType -# 5 DEyear [M] - Year -# 6 DEstratumName [M] - StringLength100 -# 7 DEhierarchyCorrect [M] - YesNoFields -# 8 DEhierarchy [M] - RDBESUpperHierarchy -# 9 DEsampled [DV,M] - YesNoFields -# 10 DEreasonNotSampled [DV,O] - ReasonForNotSampling -# 11 DEnonResponseCollected [DV,O] - YesNoFields -# 12 DEauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 13 DEauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 14 DEauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 15 DEauxiliaryVariableUnit[DV,O]-MUNIT - DE_df_base<-expand.grid(DEyear=DEyear, DEstratumName="U",stringsAsFactors=F) @@ -110,13 +94,6 @@ DE_df<-data.frame( #===SD============ - # x -# 1 SDid [M] - int -# 2 DEid [M] - int -# 3 SDrecordType [M] - string -# 4 SDcountry [M] - ISO_3166 -# 5 SDinstitution [M] - EDMO - SD_df<-data.frame( SDid=DE_df$DEid, DEid=DE_df$DEid, @@ -128,41 +105,6 @@ SD_df<-data.frame( #===VS============ - - # x -# 1 VSid [M] - int -# 2 SDid [M] - int -# 3 VDid [M] - int -# 4 TEid [M/O] - int -# 5 VSrecordType [M] - string -# 6 VSsequenceNumber [M] - int -# 7 VSencryptedVesselCode [M] - StringLength100 -# 8 VSstratification [DV,M] - YesNoFields -# 9 VSstratumName [DV,M] - StringLength100 -# 10 VSclustering [DV,M] - Clustering -# 11 VSclusterName [DV,M] - StringLength100 -# 12 VSsampler [O] - Sampler -# 13 VSnumberTotal [DV,O] - int -# 14 VSnumberSampled [DV,O] - int -# 15 VSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 16 VSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 17 VSselectionMethod [DV,M] - SelectionMethod -# 18 VSunitName [DV,M] - StringLength100 -# 19 VSselectionMethodCluster [DV,O] - SelectionMethod -# 20 VSnumberTotalClusters [DV,O] - int -# 21 VSnumberSampledClusters [DV,O] - int -# 22 VSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 23 VSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 24 VSsampled [DV,M] - YesNoFields -# 25 VSreasonNotSampled [DV,O] - ReasonForNotSampling -# 26 VSnonResponseCollected [DV,O] - YesNoFields -# 27 VSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 28 VSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 29 VSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 30 VSauxiliaryVariableUnit [DV,O] - MUNIT - -#check_All_fields("VS") - # adds VSid to dataset dataset$VSid <- 1:nrow(dataset) @@ -229,50 +171,6 @@ VS_df$VSinclusionProbCluster<-"" #====FT=========== - -# 1 FTid [M] - int -# 2 OSid [M/O] - int -# 3 VSid [M/O] - int -# 4 VDid [M] - int -# 5 SDid [M/O] - int -# 6 FOid [M/O] - int -# 7 TEid [M/O] - int -# 8 FTrecordType [M] - string -# 9 FTencryptedVesselCode [M] - StringLength100 -# 10 FTsequenceNumber [M] - int -# 11 FTstratification [DV,M] - YesNoFields -# 12 FTstratumName [DV,M] - StringLength100 -# 13 FTclustering [DV,M] - Clustering -# 14 FTclusterName [DV,M] - StringLength100 -# 15 FTsampler [O] - Sampler -# 16 FTsamplingType [M] - SamplingContext -# 17 FTnumberOfHaulsOrSets [M/O] - IntZeroToMax -# 18 FTdepartureLocation [M/O] - Harbour_LOCODE -# 19 FTdepartureDate [M/O] - Date -# 20 FTdepartureTime [M/O] - StringLength60 -# 21 FTarrivalLocation [M] - Harbour_LOCODE -# 22 FTarrivalDate [M] - Date -# 23 FTarrivalTime [M/O] - StringLength60 -# FTdominantLandingDate -# 24 FTnumberTotal [DV,O] - int -# 25 FTnumberSampled [DV,O] - int -# 26 FTselectionProb [DV,O] - Decimal0-1 -# 27 FTinclusionProb [DV,O] - Decimal0-1 -# 28 FTselectionMethod [DV,M] - SelectionMethod -# 29 FTunitName [DV,M] - StringLength100 -# 30 FTselectionMethodCluster [DV,O] - SelectionMethod -# 31 FTnumberTotalClusters [DV,O] - int -# 32 FTnumberSampledClusters [DV,O] - int -# 33 FTselectionProbCluster [DV,O] - Decimal0-1 -# 34 FTinclusionProbCluster [DV,O] - Decimal0-1 -# 35 FTsampled [DV,M] - YesNoFields -# 36 FTreasonNotSampled [DV,O] - ReasonForNotSampling -# 37 FTnonResponseCollected [DV,O] - YesNoFields -# 38 FTauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 39 FTauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 40 FTauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 41 FTauxiliaryVariableUnit [DV,O] - MUNIT - FT_df <- data.frame( FTid = dataset$VSid,#[M] - int OSid = "",# [M/O] - int @@ -297,6 +195,7 @@ FT_df <- data.frame( FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE FTarrivalDate=seq(from = as.Date("1965-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date FTarrivalTime="", #[O] - time + FTdominantLandingDate= "", FTnumberTotal= 1, #[DV,O] - int FTnumberSampled=1, #[DV,O] - int FTselectionProb=1, #[DV,O] - DecimalPrec10 @@ -314,7 +213,7 @@ FT_df <- data.frame( FTauxiliaryVariableTotal = "", FTauxiliaryVariableValue = "", FTauxiliaryVariableName = "", - FTauxiliaryVariableUnit = "", + FTauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) @@ -322,73 +221,6 @@ FT_df <- data.frame( #====FO=========== -# 1 FOid [M] - int -# 2 FTid [M/O] - int -# 3 SDid [M/O] - int -# 4 FOrecordType [M] - string -# 5 FOstratification [DV,M] - YesNoFields -# 6 FOsequenceNumber [M] - int -# 7 FOstratumName [DV,M] - StringLength100 -# 8 FOclustering [DV,M] - Clustering -# 9 FOclusterName [DV,M] - StringLength100 -# 10 FOsampler [O] - Sampler -# 11 FOaggregationLevel [M] - AggregationLevel -# 12 FOvalidity [M] - ValidityFlag -# 13 FOcatchReg [M] - CatchRegistration -# 14 FOstartDate [O] - Date -# 15 FOstartTime [O] - Time -# 16 FOendDate [M] - Date -# 17 FOendTime [M/O] - Time -# 18 FOduration [M/O] - int -# FOfishingDurationDataBasis -# 19 FOdurationSource [M] - DurationSource -# 20 FOhandlingTime [O] - int -# 21 FOstartLat [O] - Decimal-90.000000-90.000000 -# 22 FOstartLon [O] - Decimal-180.000000-180.000000 -# 23 FOstopLat [O] - Decimal-90.000000-90.000000 -# 24 FOstopLon [O] - Decimal-180.000000-180.000000 -# 25 FOexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 26 FOarea [M] - ICES_Area -# 27 FOrectangle [O] - StatRec -# 28 FOfisheriesManagementUnit [O] - AreaNonFAO -# 29 FOgsaSubarea [M] - Areas_GFCM_GSA -# 30 FOjurisdictionArea [O] - JurisdictionArea -# 31 FOfishingDepth [O] - int -# 32 FOwaterDepth [O] - int -# 33 FOnationalFishingActivity [O] - NationalFishingActivity -# 34 FOmetier5 [O] - Metier5_FishingActivity -# 35 FOmetier6 [M] - Metier6_FishingActivity -# 36 FOgear [M] - GearType -# 37 FOmeshSize [O] - int -# 38 FOselectionDevice [O] - SelectionDevice -# 39 FOselectionDeviceMeshSize [O] - int -# 40 FOtargetSpecies [O] - TargetSpecies -# 41 FOincidentalByCatchMitigationDeviceFirst [M] - BycatchMitigationDevice -# 42 FOincidentalByCatchMitigationDeviceTargetFirst [M] - BycatchMitigationDeviceTarget -# 43 FOincidentalByCatchMitigationDeviceSecond [M] - BycatchMitigationDevice -# 44 FOincidentalByCatchMitigationDeviceTargetSecond [M] - BycatchMitigationDeviceTarget -# 45 FOgearDimensions [O] - int -# 46 FOobservationCode [M] - ObservationCode -# 47 FOnumberTotal [DV,O] - int -# 48 FOnumberSampled [DV,O] - int -# 49 FOselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 50 FOinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 51 FOselectionMethod [DV,M] - SelectionMethod -# 52 FOunitName [DV,M] - StringLength100 -# 53 FOselectionMethodCluster [DV,O] - SelectionMethod -# 54 FOnumberTotalClusters [DV,O] - int -# 55 FOnumberSampledClusters [DV,O] - int -# 56 FOselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 57 FOinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 58 FOsampled [DV,M] - YesNoFields -# 59 FOreasonNotSampled [DV,O] - ReasonForNotSampling -# 60 FOnonResponseCollected [DV,O] - YesNoFields -# 61 FOauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 62 FOauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 63 FOauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 64 FOauxiliaryVariableUnit [DV,O] - MUNIT - - FO_df <- data.frame( FOid = dataset$VSid, FTid = dataset$VSid, @@ -408,20 +240,20 @@ FO_df <- data.frame( FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M FOendTime ="",#M FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! - FOfishingDurationDataBasis ="", + FOfishingDurationDataBasis = "Measured", FOdurationSource = "Crew", FOhandlingTime = "", FOstartLat="", # ATT! FOstartLon="", # ATT! FOstopLat="", FOstopLon="", - FOexclusiveEconomicZoneIndicator = "", # + FOexclusiveEconomicZoneIndicator = "", # FOarea = "27.3.a.21", #M FOrectangle = "", FOfisheriesManagementUnit = "", FOgsaSubarea = "NotApplicable", #M FOjurisdictionArea = "", - FOgeographicalDataBasis = "", + FOgeographicalDataBasis = "Measured", FOgeographicalSource = "", FOfishingDepth = "", FOwaterDepth = "", @@ -429,6 +261,8 @@ FO_df <- data.frame( FOmetier5 = "", FOmetier6 = "OTT_CRU_70-89_2_35",#M FOgear = "OTT",#M + FOgearDataBasis = "Measured", + FOgearSource = "", FOmeshSize = "", FOselectionDevice = "", FOselectionDeviceMeshSize = "", @@ -456,52 +290,13 @@ FO_df <- data.frame( FOauxiliaryVariableTotal = "", FOauxiliaryVariableValue = "", FOauxiliaryVariableName = "", - FOauxiliaryVariableUnit = "", + FOauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) #===SS============ -# 1 SSid [] - int -# 2 LEid [M/O] - int -# 3 FOid [M/O] - int -# 4 FTid [M/O] - int -# 5 OSid [M/O] - int -# 6 TEid [M/O] - int -# 7 SLid [M] - int -# 8 SSrecordType [M] - string -# 9 SSsequenceNumber [M] - int -# 10 SSstratification [DV,M] - YesNoFields -# 11 SSstratumName [DV,M] - StringLength100 -# 12 SSclustering [DV,M] - Clustering -# 13 SSclusterName [DV,M] - StringLength100 -# 14 SSobservationActivityType [DV,M] - ObservationActivityType -# 15 SScatchFraction [M] - CatchFraction -# 16 SSobservationType [DV,M] - ObservationMethod -# 17 SSsampler [O] - Sampler -# 18 SSspeciesListName [M] - StringLength100 -# 19 SSuseForCalculateZero [M] - YesNoFields -# 20 SStimeTotal [O] - int -# 21 SStimeSampled [O] - int -# 22 SSnumberTotal [DV,O] - int -# 23 SSnumberSampled [DV,O] - int -# 24 SSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 25 SSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 26 SSselectionMethod [DV,M] - SelectionMethod -# 27 SSunitName [DV,M] - StringLength100 -# 28 SSselectionMethodCluster [DV,O] - SelectionMethod -# 29 SSnumberTotalClusters [DV,O] - int -# 30 SSnumberSampledClusters [DV,O] - int -# 31 SSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 32 SSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 33 SSsampled [DV,M] - YesNoFields -# 34 SSreasonNotSampled [DV,O] - ReasonForNotSampling -# 35 SSnonResponseCollected [DV,O] - YesNoFields -# 36 SSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 37 SSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 38 SSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 39 SSauxiliaryVariableUnit [DV,O] - MUNIT SS_df<-data.frame( SSid = dataset$VSid, @@ -524,8 +319,10 @@ SS_df<-data.frame( SSspeciesListName = project_name_outputs, #M SSuseForCalculateZero = "N", #M SStimeTotal = "", + SStimeTotalDataBasis = "Measured", SStimeSampled = "", SSnumberTotal = 1, + SSnumberTotalDataBasis = "Measured", SSnumberSampled = 1, SSselectionProb = 1, SSinclusionProb = 1, @@ -542,68 +339,12 @@ SS_df<-data.frame( SSauxiliaryVariableTotal = "", SSauxiliaryVariableValue = "", SSauxiliaryVariableName = "", - SSauxiliaryVariableUnit = "", - stringsAsFactors=FALSE + SSauxiliaryVariableUnit = "", + stringsAsFactors=FALSE ) #====SA=========== - -# 1 SAid [] - int -# 2 SSid [] - int -# 3 SArecordType [M] - string -# 4 SAsequenceNumber [M] - int -# 5 SAparentSequenceNumber [O] - int -# 6 SAstratification [DV,M] - YesNoFields -# 7 SAstratumName [DV,M] - StringLength100 -# 8 SAspeciesCode [M] - SpecWoRMS -# 9 SAspeciesCodeFAO [O] - SpecASFIS -# 10 SAstateOfProcessing [M] - StateOfProcessing -# 11 SApresentation [M] - ProductPresentation -# 12 SAspecimensState [M] - SpecimensState -# 13 SAcatchCategory [M] - CatchCategory -# 14 SAlandingCategory [O] - LandingCategory -# 15 SAcommSizeCatScale [O] - CommercialSizeCategoryScale -# 16 SAcommSizeCat [O] - CommercialSizeCategory -# 17 SAsex [M] - SEXCO -# 18 SAexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 19 SAarea [O] - ICES_Area -# 20 SArectangle [O] - StatRec -# 21 SAfisheriesManagementUnit [O] - AreaNonFAO -# 22 SAgsaSubarea [M] - Areas_GFCM_GSA -# 23 SAjurisdictionArea [O] - JurisdictionArea -# 24 SAnationalFishingActivity [O] - NationalFishingActivity -# 25 SAmetier5 [O] - Metier5_FishingActivity -# 26 SAmetier6 [O] - Metier6_FishingActivity -# 27 SAgear [O] - GearType -# 28 SAmeshSize [O] - int -# 29 SAselectionDevice [O] - SelectionDevice -# 30 SAselectionDeviceMeshSize [O] - int -# 31 SAunitType [M] - SamplingUnit -# 32 SAtotalWeightLive [O] - int -# 33 SAsampleWeightLive [O] - int -# 34 SAnumberTotal [DV,O] - Decimal0.1-999999999 -# 35 SAnumberSampled [DV,O] - Decimal0.1-999999999 -# 36 SAselectionProb [DV,O] - Decimal0-1 -# 37 SAinclusionProb [DV,O] - Decimal0-1 -# 38 SAselectionMethod [DV,M] - SelectionMethod -# 39 SAunitName [DV,M] - StringLength100 -# 40 SAlowerHierarchy [M/O] - LowerHierarchy -# 41 SAsampler [O] - Sampler -# 42 SAsampled [DV,M] - YesNoFields -# 43 SAreasonNotSampled [DV,O] - ReasonForNotSampling -# 44 SAnonResponseCollected [DV,O] - YesNoFields -# 45 SAreasonNotSampledFM [DV,O] - ReasonForNotSampling -# 46 SAreasonNotSampledBV [DV,O] - ReasonForNotSampling -# 47 SAtotalWeightMeasured [O] - int -# 48 SAsampleWeightMeasured [O] - int -# 49 SAconversionFactorMeasLive [O] - Decimal0.900-10 -# 50 SAauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 51 SAauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 52 SAauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 53 SAauxiliaryVariableUnit [DV,O] - MUNIT - - SA_df<-data.frame( SAid = 1:nrow(dataset), SSid = dataset$VSid, @@ -628,10 +369,14 @@ SA_df<-data.frame( SAfisheriesManagementUnit = "", SAgsaSubarea = "NotApplicable", #M SAjurisdictionArea = "", + SAgeographicalDataBasis = "Measured", + SAgeographicalSource = "", SAnationalFishingActivity = "", SAmetier5 = "", SAmetier6 = "", SAgear = "", + SAgearDataBasis = "Measured", + SAgearSource = "", SAmeshSize = "", SAselectionDevice = "", SAselectionDeviceMeshSize = "", @@ -652,12 +397,13 @@ SA_df<-data.frame( SAreasonNotSampledFM = "", SAreasonNotSampledBV = "", SAtotalWeightMeasured = dataset[[target_var]], # + SAtotalWeightMeasuredDataBasis = "Measured", SAsampleWeightMeasured = dataset[[target_var]], # SAconversionFactorMeasLive = 1, SAauxiliaryVariableTotal = "", SAauxiliaryVariableValue = "", SAauxiliaryVariableName = "", - SAauxiliaryVariableUnit = "", + SAauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) @@ -711,15 +457,14 @@ RDBESlist[[i]][which(grepl(colnames(RDBESlist[[i]]),pat="[A-Z]id"))]<-NULL #===Save============ - dir_outputs<-paste0(base_dir_outputs,"/", - project_name_outputs,"/") - dir.create(dir_outputs, recursive=T, showWarnings=FALSE) - filename_output_CS <- paste0("H1.csv") - filename_output_SL <- paste0("HSL.csv") - filename_output_VD <- paste0("HVD.csv") + dir_outputs<-paste0(base_dir_outputs,"/", project_name_outputs,"/") + dir.create(dir_outputs, recursive=T, showWarnings=FALSE) + filename_output_CS <- paste0(dir_outputs,"H1.csv") + filename_output_SL <- paste0(dir_outputs,"HSL.csv") + filename_output_VD <- paste0(dir_outputs,"HVD.csv") -lapply(RDBESlist, function(x, filename1 = paste0(dir_outputs,filename_output_CS)){ +lapply(RDBESlist, function(x, filename1 = filename_output_CS){ if("DErecordType" %in% colnames(x)){ write.table(x, file = filename1, append = FALSE, quote = FALSE, sep = ",", eol = "\n", na = "NA", dec = ".", row.names = FALSE, @@ -731,7 +476,7 @@ write.table(x, file = filename1, append = TRUE, quote = FALSE, sep = ",", } }) -b<-read.table(file=paste0(dir_outputs,filename_output_CS), header=F, sep=";") +b<-read.table(file = filename_output_CS, header=F, sep=";") b<-cbind(key,b) b<-b[order(as.character(b$key), decreasing=FALSE),] b<-b[!is.na(key),] @@ -739,24 +484,30 @@ b$key<-NULL b$V1<-as.character(b$V1) # saves CS output -write.table(b$V1, file=paste0(dir_outputs,filename_output_CS), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +write.table(b$V1, file = filename_output_CS, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") -# -----Builds and saves dummySL----------------- +# -----Builds and saves dummySL and dummyIS----------------- SL_base$SLspeciesListName<-project_name_outputs SL_base$SLyear<-DEyear SL_base$SLinstitute<-SDinstitution -# saves SL output - write.table(SL_base, file=paste0(dir_outputs,filename_output_SL), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - + + # saves SL output + write.table(rbind(paste(SL_base, collapse=","), paste(IS_base, collapse=",")), file=filename_output_SL, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") # -----Builds and saves dummyVD----------------- - # saves VD output VD_base$VDyear<-DEyear - write.table(VD_base, file=paste0(dir_outputs,filename_output_VD), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + write.table(VD_base, file=filename_output_VD, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + + + + + +# check - is this needed? +if(1==2){ @@ -778,4 +529,5 @@ library(zip) zip::zip(zipfile = paste0(project_name_outputs,".zip"), files =paste0(dir_outputs,"tmp/",dir(paste0(dir_outputs,"tmp"))), mode="cherry-pick") unlink(paste0(dir_outputs,"tmp"), force=T, recursive = T) +} diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_coots_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_coots_H1.R index c2791a24..39d4dbd9 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_coots_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_coots_H1.R @@ -46,10 +46,11 @@ baseDir <- "" VD_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/VD_base.rds")) SL_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/SL_base.rds")) + IS_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/IS_base.rds")) #nameof the directory where the outputs are saved currently base_dir_outputs <- paste0(baseDir,"BuiltUploads") - if(!file.exists(base_dir_outputs)) dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) + dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) #========Outline of Hierarchy 1================ @@ -69,22 +70,6 @@ -# 1 DEid [] - int -# 2 DErecordType [M] - string -# 3 DEsamplingScheme [M] - SamplingScheme -# 4 DEsamplingSchemeType [M] - SamplingSchemeType -# 5 DEyear [M] - Year -# 6 DEstratumName [M] - StringLength100 -# 7 DEhierarchyCorrect [M] - YesNoFields -# 8 DEhierarchy [M] - RDBESUpperHierarchy -# 9 DEsampled [DV,M] - YesNoFields -# 10 DEreasonNotSampled [DV,O] - ReasonForNotSampling -# 11 DEnonResponseCollected [DV,O] - YesNoFields -# 12 DEauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 13 DEauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 14 DEauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 15 DEauxiliaryVariableUnit[DV,O]-MUNIT - DE_df_base<-expand.grid(DEyear=DEyear, DEstratumName="U",stringsAsFactors=F) @@ -103,20 +88,13 @@ DE_df<-data.frame( DEauxiliaryVariableTotal = "", DEauxiliaryVariableValue = "", DEauxiliaryVariableName = "", - DEauxiliaryVariableUnit = "", + DEauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) #===SD============ - # x -# 1 SDid [M] - int -# 2 DEid [M] - int -# 3 SDrecordType [M] - string -# 4 SDcountry [M] - ISO_3166 -# 5 SDinstitution [M] - EDMO - SD_df<-data.frame( SDid=DE_df$DEid, DEid=DE_df$DEid, @@ -128,41 +106,6 @@ SD_df<-data.frame( #===VS============ - - # x -# 1 VSid [M] - int -# 2 SDid [M] - int -# 3 VDid [M] - int -# 4 TEid [M/O] - int -# 5 VSrecordType [M] - string -# 6 VSsequenceNumber [M] - int -# 7 VSencryptedVesselCode [M] - StringLength100 -# 8 VSstratification [DV,M] - YesNoFields -# 9 VSstratumName [DV,M] - StringLength100 -# 10 VSclustering [DV,M] - Clustering -# 11 VSclusterName [DV,M] - StringLength100 -# 12 VSsampler [O] - Sampler -# 13 VSnumberTotal [DV,O] - int -# 14 VSnumberSampled [DV,O] - int -# 15 VSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 16 VSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 17 VSselectionMethod [DV,M] - SelectionMethod -# 18 VSunitName [DV,M] - StringLength100 -# 19 VSselectionMethodCluster [DV,O] - SelectionMethod -# 20 VSnumberTotalClusters [DV,O] - int -# 21 VSnumberSampledClusters [DV,O] - int -# 22 VSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 23 VSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 24 VSsampled [DV,M] - YesNoFields -# 25 VSreasonNotSampled [DV,O] - ReasonForNotSampling -# 26 VSnonResponseCollected [DV,O] - YesNoFields -# 27 VSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 28 VSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 29 VSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 30 VSauxiliaryVariableUnit [DV,O] - MUNIT - -#check_All_fields("VS") - # adds VSid to dataset dataset$VSid <- 1:nrow(dataset) @@ -229,49 +172,6 @@ VS_df$VSinclusionProbCluster<-"" # not known see page 58 #====FT=========== - -# 1 FTid [M] - int -# 2 OSid [M/O] - int -# 3 VSid [M/O] - int -# 4 VDid [M] - int -# 5 SDid [M/O] - int -# 6 FOid [M/O] - int -# 7 TEid [M/O] - int -# 8 FTrecordType [M] - string -# 9 FTencryptedVesselCode [M] - StringLength100 -# 10 FTsequenceNumber [M] - int -# 11 FTstratification [DV,M] - YesNoFields -# 12 FTstratumName [DV,M] - StringLength100 -# 13 FTclustering [DV,M] - Clustering -# 14 FTclusterName [DV,M] - StringLength100 -# 15 FTsampler [O] - Sampler -# 16 FTsamplingType [M] - SamplingContext -# 17 FTnumberOfHaulsOrSets [M/O] - IntZeroToMax -# 18 FTdepartureLocation [M/O] - Harbour_LOCODE -# 19 FTdepartureDate [M/O] - Date -# 20 FTdepartureTime [M/O] - StringLength60 -# 21 FTarrivalLocation [M] - Harbour_LOCODE -# 22 FTarrivalDate [M] - Date -# 23 FTarrivalTime [M/O] - StringLength60 -# 24 FTnumberTotal [DV,O] - int -# 25 FTnumberSampled [DV,O] - int -# 26 FTselectionProb [DV,O] - Decimal0-1 -# 27 FTinclusionProb [DV,O] - Decimal0-1 -# 28 FTselectionMethod [DV,M] - SelectionMethod -# 29 FTunitName [DV,M] - StringLength100 -# 30 FTselectionMethodCluster [DV,O] - SelectionMethod -# 31 FTnumberTotalClusters [DV,O] - int -# 32 FTnumberSampledClusters [DV,O] - int -# 33 FTselectionProbCluster [DV,O] - Decimal0-1 -# 34 FTinclusionProbCluster [DV,O] - Decimal0-1 -# 35 FTsampled [DV,M] - YesNoFields -# 36 FTreasonNotSampled [DV,O] - ReasonForNotSampling -# 37 FTnonResponseCollected [DV,O] - YesNoFields -# 38 FTauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 39 FTauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 40 FTauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 41 FTauxiliaryVariableUnit [DV,O] - MUNIT - FT_df <- data.frame( FTid = dataset$VSid,#[M] - int OSid = "",# [M/O] - int @@ -296,6 +196,7 @@ FT_df <- data.frame( FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE FTarrivalDate=seq(from = as.Date("1965-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date FTarrivalTime="", #[O] - time + FTdominantLandingDate= "", FTnumberTotal= 1, #[DV,O] - int FTnumberSampled=1, #[DV,O] - int FTselectionProb=1, #[DV,O] - DecimalPrec10 @@ -313,7 +214,7 @@ FT_df <- data.frame( FTauxiliaryVariableTotal = "", FTauxiliaryVariableValue = "", FTauxiliaryVariableName = "", - FTauxiliaryVariableUnit = "", + FTauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) @@ -321,72 +222,6 @@ FT_df <- data.frame( #====FO=========== -# 1 FOid [M] - int -# 2 FTid [M/O] - int -# 3 SDid [M/O] - int -# 4 FOrecordType [M] - string -# 5 FOstratification [DV,M] - YesNoFields -# 6 FOsequenceNumber [M] - int -# 7 FOstratumName [DV,M] - StringLength100 -# 8 FOclustering [DV,M] - Clustering -# 9 FOclusterName [DV,M] - StringLength100 -# 10 FOsampler [O] - Sampler -# 11 FOaggregationLevel [M] - AggregationLevel -# 12 FOvalidity [M] - ValidityFlag -# 13 FOcatchReg [M] - CatchRegistration -# 14 FOstartDate [O] - Date -# 15 FOstartTime [O] - Time -# 16 FOendDate [M] - Date -# 17 FOendTime [M/O] - Time -# 18 FOduration [M/O] - int -# 19 FOdurationSource [M] - DurationSource -# 20 FOhandlingTime [O] - int -# 21 FOstartLat [O] - Decimal-90.000000-90.000000 -# 22 FOstartLon [O] - Decimal-180.000000-180.000000 -# 23 FOstopLat [O] - Decimal-90.000000-90.000000 -# 24 FOstopLon [O] - Decimal-180.000000-180.000000 -# 25 FOexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 26 FOarea [M] - ICES_Area -# 27 FOrectangle [O] - StatRec -# 28 FOfisheriesManagementUnit [O] - AreaNonFAO -# 29 FOgsaSubarea [M] - Areas_GFCM_GSA -# 30 FOjurisdictionArea [O] - JurisdictionArea -# 31 FOfishingDepth [O] - int -# 32 FOwaterDepth [O] - int -# 33 FOnationalFishingActivity [O] - NationalFishingActivity -# 34 FOmetier5 [O] - Metier5_FishingActivity -# 35 FOmetier6 [M] - Metier6_FishingActivity -# 36 FOgear [M] - GearType -# 37 FOmeshSize [O] - int -# 38 FOselectionDevice [O] - SelectionDevice -# 39 FOselectionDeviceMeshSize [O] - int -# 40 FOtargetSpecies [O] - TargetSpecies -# 41 FOincidentalByCatchMitigationDeviceFirst [M] - BycatchMitigationDevice -# 42 FOincidentalByCatchMitigationDeviceTargetFirst [M] - BycatchMitigationDeviceTarget -# 43 FOincidentalByCatchMitigationDeviceSecond [M] - BycatchMitigationDevice -# 44 FOincidentalByCatchMitigationDeviceTargetSecond [M] - BycatchMitigationDeviceTarget -# 45 FOgearDimensions [O] - int -# 46 FOobservationCode [M] - ObservationCode -# 47 FOnumberTotal [DV,O] - int -# 48 FOnumberSampled [DV,O] - int -# 49 FOselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 50 FOinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 51 FOselectionMethod [DV,M] - SelectionMethod -# 52 FOunitName [DV,M] - StringLength100 -# 53 FOselectionMethodCluster [DV,O] - SelectionMethod -# 54 FOnumberTotalClusters [DV,O] - int -# 55 FOnumberSampledClusters [DV,O] - int -# 56 FOselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 57 FOinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 58 FOsampled [DV,M] - YesNoFields -# 59 FOreasonNotSampled [DV,O] - ReasonForNotSampling -# 60 FOnonResponseCollected [DV,O] - YesNoFields -# 61 FOauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 62 FOauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 63 FOauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 64 FOauxiliaryVariableUnit [DV,O] - MUNIT - - FO_df <- data.frame( FOid = dataset$VSid, FTid = dataset$VSid, @@ -406,24 +241,29 @@ FO_df <- data.frame( FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M FOendTime ="",#M FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! + FOfishingDurationDataBasis = "Measured", FOdurationSource = "Crew", FOhandlingTime = "", FOstartLat="", # ATT! FOstartLon="", # ATT! FOstopLat="", FOstopLon="", - FOexclusiveEconomicZoneIndicator = "", # + FOexclusiveEconomicZoneIndicator = "", # FOarea = "27.3.a.21", #M FOrectangle = "", FOfisheriesManagementUnit = "", FOgsaSubarea = "NotApplicable", #M FOjurisdictionArea = "", + FOgeographicalDataBasis = "Measured", + FOgeographicalSource = "", FOfishingDepth = "", FOwaterDepth = "", FOnationalFishingActivity = "", FOmetier5 = "", FOmetier6 = "OTT_CRU_70-89_2_35",#M FOgear = "OTT",#M + FOgearDataBasis = "Measured", + FOgearSource = "", FOmeshSize = "", FOselectionDevice = "", FOselectionDeviceMeshSize = "", @@ -451,52 +291,13 @@ FO_df <- data.frame( FOauxiliaryVariableTotal = "", FOauxiliaryVariableValue = "", FOauxiliaryVariableName = "", - FOauxiliaryVariableUnit = "", + FOauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) #===SS============ -# 1 SSid [] - int -# 2 LEid [M/O] - int -# 3 FOid [M/O] - int -# 4 FTid [M/O] - int -# 5 OSid [M/O] - int -# 6 TEid [M/O] - int -# 7 SLid [M] - int -# 8 SSrecordType [M] - string -# 9 SSsequenceNumber [M] - int -# 10 SSstratification [DV,M] - YesNoFields -# 11 SSstratumName [DV,M] - StringLength100 -# 12 SSclustering [DV,M] - Clustering -# 13 SSclusterName [DV,M] - StringLength100 -# 14 SSobservationActivityType [DV,M] - ObservationActivityType -# 15 SScatchFraction [M] - CatchFraction -# 16 SSobservationType [DV,M] - ObservationMethod -# 17 SSsampler [O] - Sampler -# 18 SSspeciesListName [M] - StringLength100 -# 19 SSuseForCalculateZero [M] - YesNoFields -# 20 SStimeTotal [O] - int -# 21 SStimeSampled [O] - int -# 22 SSnumberTotal [DV,O] - int -# 23 SSnumberSampled [DV,O] - int -# 24 SSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 25 SSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 26 SSselectionMethod [DV,M] - SelectionMethod -# 27 SSunitName [DV,M] - StringLength100 -# 28 SSselectionMethodCluster [DV,O] - SelectionMethod -# 29 SSnumberTotalClusters [DV,O] - int -# 30 SSnumberSampledClusters [DV,O] - int -# 31 SSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 32 SSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 33 SSsampled [DV,M] - YesNoFields -# 34 SSreasonNotSampled [DV,O] - ReasonForNotSampling -# 35 SSnonResponseCollected [DV,O] - YesNoFields -# 36 SSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 37 SSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 38 SSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 39 SSauxiliaryVariableUnit [DV,O] - MUNIT SS_df<-data.frame( SSid = dataset$VSid, @@ -519,8 +320,10 @@ SS_df<-data.frame( SSspeciesListName = project_name_outputs, #M SSuseForCalculateZero = "N", #M SStimeTotal = "", + SStimeTotalDataBasis = "Measured", SStimeSampled = "", SSnumberTotal = 1, + SSnumberTotalDataBasis = "Measured", SSnumberSampled = 1, SSselectionProb = 1, SSinclusionProb = 1, @@ -537,68 +340,12 @@ SS_df<-data.frame( SSauxiliaryVariableTotal = "", SSauxiliaryVariableValue = "", SSauxiliaryVariableName = "", - SSauxiliaryVariableUnit = "", - stringsAsFactors=FALSE + SSauxiliaryVariableUnit = "", + stringsAsFactors=FALSE ) #====SA=========== - -# 1 SAid [] - int -# 2 SSid [] - int -# 3 SArecordType [M] - string -# 4 SAsequenceNumber [M] - int -# 5 SAparentSequenceNumber [O] - int -# 6 SAstratification [DV,M] - YesNoFields -# 7 SAstratumName [DV,M] - StringLength100 -# 8 SAspeciesCode [M] - SpecWoRMS -# 9 SAspeciesCodeFAO [O] - SpecASFIS -# 10 SAstateOfProcessing [M] - StateOfProcessing -# 11 SApresentation [M] - ProductPresentation -# 12 SAspecimensState [M] - SpecimensState -# 13 SAcatchCategory [M] - CatchCategory -# 14 SAlandingCategory [O] - LandingCategory -# 15 SAcommSizeCatScale [O] - CommercialSizeCategoryScale -# 16 SAcommSizeCat [O] - CommercialSizeCategory -# 17 SAsex [M] - SEXCO -# 18 SAexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 19 SAarea [O] - ICES_Area -# 20 SArectangle [O] - StatRec -# 21 SAfisheriesManagementUnit [O] - AreaNonFAO -# 22 SAgsaSubarea [M] - Areas_GFCM_GSA -# 23 SAjurisdictionArea [O] - JurisdictionArea -# 24 SAnationalFishingActivity [O] - NationalFishingActivity -# 25 SAmetier5 [O] - Metier5_FishingActivity -# 26 SAmetier6 [O] - Metier6_FishingActivity -# 27 SAgear [O] - GearType -# 28 SAmeshSize [O] - int -# 29 SAselectionDevice [O] - SelectionDevice -# 30 SAselectionDeviceMeshSize [O] - int -# 31 SAunitType [M] - SamplingUnit -# 32 SAtotalWeightLive [O] - int -# 33 SAsampleWeightLive [O] - int -# 34 SAnumberTotal [DV,O] - Decimal0.1-999999999 -# 35 SAnumberSampled [DV,O] - Decimal0.1-999999999 -# 36 SAselectionProb [DV,O] - Decimal0-1 -# 37 SAinclusionProb [DV,O] - Decimal0-1 -# 38 SAselectionMethod [DV,M] - SelectionMethod -# 39 SAunitName [DV,M] - StringLength100 -# 40 SAlowerHierarchy [M/O] - LowerHierarchy -# 41 SAsampler [O] - Sampler -# 42 SAsampled [DV,M] - YesNoFields -# 43 SAreasonNotSampled [DV,O] - ReasonForNotSampling -# 44 SAnonResponseCollected [DV,O] - YesNoFields -# 45 SAreasonNotSampledFM [DV,O] - ReasonForNotSampling -# 46 SAreasonNotSampledBV [DV,O] - ReasonForNotSampling -# 47 SAtotalWeightMeasured [O] - int -# 48 SAsampleWeightMeasured [O] - int -# 49 SAconversionFactorMeasLive [O] - Decimal0.900-10 -# 50 SAauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 51 SAauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 52 SAauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 53 SAauxiliaryVariableUnit [DV,O] - MUNIT - - SA_df<-data.frame( SAid = 1:nrow(dataset), SSid = dataset$VSid, @@ -623,10 +370,14 @@ SA_df<-data.frame( SAfisheriesManagementUnit = "", SAgsaSubarea = "NotApplicable", #M SAjurisdictionArea = "", + SAgeographicalDataBasis = "Measured", + SAgeographicalSource = "", SAnationalFishingActivity = "", SAmetier5 = "", SAmetier6 = "", SAgear = "", + SAgearDataBasis = "Measured", + SAgearSource = "", SAmeshSize = "", SAselectionDevice = "", SAselectionDeviceMeshSize = "", @@ -647,12 +398,13 @@ SA_df<-data.frame( SAreasonNotSampledFM = "", SAreasonNotSampledBV = "", SAtotalWeightMeasured = dataset[[target_var]]*100000000, # *100000000 to meet type required (integer) + SAtotalWeightMeasuredDataBasis = "Measured", SAsampleWeightMeasured = dataset[[target_var]]*100000000, # *100000000 to meet type required (integer) SAconversionFactorMeasLive = 1, SAauxiliaryVariableTotal = "", SAauxiliaryVariableValue = "", SAauxiliaryVariableName = "", - SAauxiliaryVariableUnit = "", + SAauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) @@ -706,15 +458,14 @@ RDBESlist[[i]][which(grepl(colnames(RDBESlist[[i]]),pat="[A-Z]id"))]<-NULL #===Save============ - dir_outputs<-paste0(base_dir_outputs,"/", - project_name_outputs,"/") - dir.create(dir_outputs, recursive=T, showWarnings=FALSE) - filename_output_CS <- paste0("H1.csv") - filename_output_SL <- paste0("HSL.csv") - filename_output_VD <- paste0("HVD.csv") + dir_outputs<-paste0(base_dir_outputs,"/", project_name_outputs,"/") + dir.create(dir_outputs, recursive=T, showWarnings=FALSE) + filename_output_CS <- paste0(dir_outputs,"H1.csv") + filename_output_SL <- paste0(dir_outputs,"HSL.csv") + filename_output_VD <- paste0(dir_outputs,"HVD.csv") -lapply(RDBESlist, function(x, filename1 = paste0(dir_outputs,filename_output_CS)){ +lapply(RDBESlist, function(x, filename1 = filename_output_CS){ if("DErecordType" %in% colnames(x)){ write.table(x, file = filename1, append = FALSE, quote = FALSE, sep = ",", eol = "\n", na = "NA", dec = ".", row.names = FALSE, @@ -726,7 +477,7 @@ write.table(x, file = filename1, append = TRUE, quote = FALSE, sep = ",", } }) -b<-read.table(file=paste0(dir_outputs,filename_output_CS), header=F, sep=";") +b<-read.table(file = filename_output_CS, header=F, sep=";") b<-cbind(key,b) b<-b[order(as.character(b$key), decreasing=FALSE),] b<-b[!is.na(key),] @@ -734,24 +485,30 @@ b$key<-NULL b$V1<-as.character(b$V1) # saves CS output -write.table(b$V1, file=paste0(dir_outputs,filename_output_CS), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +write.table(b$V1, file = filename_output_CS, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") -# -----Builds and saves dummySL----------------- +# -----Builds and saves dummySL and dummyIS----------------- SL_base$SLspeciesListName<-project_name_outputs SL_base$SLyear<-DEyear SL_base$SLinstitute<-SDinstitution -# saves SL output - write.table(SL_base, file=paste0(dir_outputs,filename_output_SL), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - + + # saves SL output + write.table(rbind(paste(SL_base, collapse=","), paste(IS_base, collapse=",")), file=filename_output_SL, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") # -----Builds and saves dummyVD----------------- - # saves VD output VD_base$VDyear<-DEyear - write.table(VD_base, file=paste0(dir_outputs,filename_output_VD), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + write.table(VD_base, file=filename_output_VD, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + + + + + +# check - is this needed? +if(1==2){ @@ -773,4 +530,5 @@ library(zip) zip::zip(zipfile = paste0(project_name_outputs,".zip"), files =paste0(dir_outputs,"tmp/",dir(paste0(dir_outputs,"tmp"))), mode="cherry-pick") unlink(paste0(dir_outputs,"tmp"), force=T, recursive = T) +} diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_coots_multistage_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_coots_multistage_H1.R index 4d33eb0d..533b09bb 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_coots_multistage_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_coots_multistage_H1.R @@ -48,13 +48,15 @@ DEstratumName <- "Pckg_SDAResources_coots_multistage_H1" project_name_outputs <- gsub(" ","_", paste0(DEsamplingScheme,"_", DEstratumName)) baseDir <- "./data-raw/exampleData/TextBookExamples/" - #baseDir <- "" + baseDir <- "" VD_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/VD_base.rds")) SL_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/SL_base.rds")) + IS_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/IS_base.rds")) #nameof the directory where the outputs are saved currently - base_dir_outputs <- paste0(baseDir,"BuiltUploads/") - if(!file.exists(base_dir_outputs)) dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) + + base_dir_outputs <- paste0(baseDir,"BuiltUploads") + dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) #========Outline of Hierarchy 1================ @@ -70,26 +72,10 @@ - - #====DE=========== +#===DE============ - # 1 DEid [] - int - # 2 DErecordType [M] - string - # 3 DEsamplingScheme [M] - SamplingScheme - # 4 DEsamplingSchemeType [M] - SamplingSchemeType - # 5 DEyear [M] - Year - # 6 DEstratumName [M] - StringLength100 - # 7 DEhierarchyCorrect [M] - YesNoFields - # 8 DEhierarchy [M] - RDBESUpperHierarchy - # 9 DEsampled [DV,M] - YesNoFields - # 10 DEreasonNotSampled [DV,O] - ReasonForNotSampling - # 11 DEnonResponseCollected [DV,O] - YesNoFields - # 12 DEauxiliaryVariableTotal [DV,O] - DecimalPrec3 - # 13 DEauxiliaryVariableValue [DV,O] - DecimalPrec3 - # 14 DEauxiliaryVariableName [DV,O] - AuxiliaryVariableName - # 15 DEauxiliaryVariableUnit[DV,O]-MUNIT DE_df_base<-expand.grid(DEyear=DEyear, DEstratumName="U",stringsAsFactors=F) @@ -111,628 +97,422 @@ DEauxiliaryVariableName = "", DEauxiliaryVariableUnit = "", stringsAsFactors=FALSE - ) - - #====SD=========== - - - # x - # 1 SDid [M] - int - # 2 DEid [M] - int - # 3 SDrecordType [M] - string - # 4 SDcountry [M] - ISO_3166 - # 5 SDinstitution [M] - EDMO + ) + +#===SD============ + + +SD_df<-data.frame( + SDid=DE_df$DEid, + DEid=DE_df$DEid, + SDrecordType="SD", + SDcountry="ZW", + SDinstitution=as.integer(SDinstitution), + stringsAsFactors=FALSE +) + +#===VS============ + + + + # The PSU will be Vessels and the SSU will be Trips + df_vessel <- df[!duplicated(df[c("clutch")]),] + # adds VSid to df + df_vessel$VSid <- 1:nrow(df_vessel) + + # creates a dummyVD and adds df + # restricts VD_base to what is needed + VD_base <- VD_base[1:nrow(df_vessel),] + + df_vessel$VSencryptedVesselCode<-VD_base$VDencryptedVesselCode + # should be 0 + test<-sum(duplicated(df_vessel$VSencryptedVesselCode))==0 + if(!test) stop( "duplicated VSencryptedVesselCode") + + +VS_df <- data.frame( + VSid = df_vessel$VSid, + SDid = as.integer(1), + VDid = "", + TEid = "", + VSrecordType = 'VS', + VSsequenceNumber = 1:nrow(df_vessel),# M + VSencryptedVesselCode = df_vessel$VSencryptedVesselCode, #M + VSstratification = "Y", + VSstratumName = "U", #M + VSclustering = "N", #M + VSclusterName = "U", #M + VSsampler = "Observer", #M + VSnumberTotal = "", + VSnumberSampled = nrow(df_vessel), + VSselectionProb = "", + VSinclusionProb = "", + VSselectionMethod = "SRSWOR", #M + VSunitName = paste0("clutch_",df_vessel$clutch),#M + VSselectionMethodCluster = "", + VSnumberTotalClusters = "", + VSnumberSampledClusters = "", + VSselectionProbCluster = "", + VSinclusionProbCluster = "", + VSsampled = "Y",#M + VSreasonNotSampled = "", + VSnonResponseCollected = "", + VSauxiliaryVariableTotal = "", + VSauxiliaryVariableValue = "", + VSauxiliaryVariableName = "", + VSauxiliaryVariableUnit = "", + stringsAsFactors=FALSE + ) + + + +#====FT=========== + + + + makeChildTbl <- function(parent, data, tbl="FT", by = "dname"){ + if(tbl !="FT"){stop("Not implemented!")} + mergeCols <- c(setdiff(colnames(parent), colnames(data)), by) + df <- merge(data, parent[mergeCols], by=by) + sc <- table(df[[by]]) + df$sampled_count <- as.vector(sc[df[[by]]]) + +FT_df <- data.frame( + FTid = 1:nrow(df),#[M] - int + OSid = "",# [M/O] - int + VSid = df$VSid, #[M/O] + VDid = "", #[M] - int + SDid = "", #[M/O] - int + FOid = "", #[M/O] - int + TEid = "", #[M/O] - int + FTrecordType='FT', #[M] - string + FTencryptedVesselCode = df$VSencryptedVesselCode, #[M] + FTsequenceNumber = as.integer(1:nrow(df)), #[M] - string + FTstratification = "N", #[DV,M] - RS_Stratification + FTstratumName = "U", #[DV,M] - string + FTclustering = "N", #[DV,M] - RS_Clustering + FTclusterName = "U", #[DV,M] - string + FTsampler = "Observer", #[M] - RS_Sampler + FTsamplingType = "AtSea" , #[M] - RS_SamplingType + FTnumberOfHaulsOrSets = 1, #[O] - int + FTdepartureLocation="ZWHWN", #[O] - Harbour_LOCODE + FTdepartureDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(df)), #[M/O] - date + FTdepartureTime="", #[O] - time + FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE + FTarrivalDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(df)), #[M] - date + FTarrivalTime="", #[O] - time + FTdominantLandingDate= "", + FTnumberTotal= as.vector(df$csize), #[DV,O] - int + FTnumberSampled=df$sampled_count, #[DV,O] - int + FTselectionProb="", #[DV,O] - DecimalPrec10 + FTinclusionProb="", #[DV,O] - DecimalPrec10 + FTselectionMethod="SRSWOR", #[DV,M] - RS_SelectionMethod + FTunitName = df$VSid, #[DV,M] - string + FTselectionMethodCluster="", #[DV,O] - RS_SelectionMethod + FTnumberTotalClusters="", #[DV,O] - int + FTnumberSampledClusters="", #[DV,O] - int + FTselectionProbCluster="", #[DV,O] - DecimalPrec10 + FTinclusionProbCluster="", #[DV,O] - DecimalPrec10 + FTsampled="Y", #[DV,M] - YesNoFields + FTreasonNotSampled= "", #[DV,O] - RS_ReasonForNotSampling, + FTnonResponseCollected = factor("N", levels = c('N', 'Y')), + FTauxiliaryVariableTotal = "", + FTauxiliaryVariableValue = "", + FTauxiliaryVariableName = "", + FTauxiliaryVariableUnit = "", + stringsAsFactors=FALSE +) + +if(any(FT_df$FTnumberTotal < FT_df$FTnumberSampled)){ +stop("Sampled is more than total") +} +FT_df +} +FT_df <- makeChildTbl(df_vessel, df, by="clutch") + +#====FO=========== + + +FO_df <- data.frame( + FOid = FT_df$FTid, + FTid = FT_df$FTid, + SDid = "", + FOrecordType = "FO",#M + FOstratification = "N",#M + FOsequenceNumber = as.integer(1:nrow(FT_df)),#M + FOstratumName = "U",#M + FOclustering = "N",#M + FOclusterName = "U",#M + FOsampler = "Observer", + FOaggregationLevel = "H",#M + FOvalidity = "V",#M + FOcatchReg = "Lan",#M + FOstartDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'), + FOstartTime = "", + FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M + FOendTime ="",#M + FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! + FOfishingDurationDataBasis = "Measured", + FOdurationSource = "Crew", + FOhandlingTime = "", + FOstartLat="", # ATT! + FOstartLon="", # ATT! + FOstopLat="", + FOstopLon="", + FOexclusiveEconomicZoneIndicator = "", # + FOarea = "27.3.a.21", #M + FOrectangle = "", + FOfisheriesManagementUnit = "", + FOgsaSubarea = "NotApplicable", #M + FOjurisdictionArea = "", + FOgeographicalDataBasis = "Measured", + FOgeographicalSource = "", + FOfishingDepth = "", + FOwaterDepth = "", + FOnationalFishingActivity = "", + FOmetier5 = "", + FOmetier6 = "OTT_CRU_70-89_2_35",#M + FOgear = "OTT",#M + FOgearDataBasis = "Measured", + FOgearSource = "", + FOmeshSize = "", + FOselectionDevice = "", + FOselectionDeviceMeshSize = "", + FOtargetSpecies = "", + FOincidentalByCatchMitigationDeviceFirst = "NotRecorded",#M + FOincidentalByCatchMitigationDeviceTargetFirst = "NotApplicable",# [M] + FOincidentalByCatchMitigationDeviceSecond = "NotRecorded",#M + FOincidentalByCatchMitigationDeviceTargetSecond = "NotApplicable",#M + FOgearDimensions = "", + FOobservationCode = 'So', #M + FOnumberTotal = 10, + FOnumberSampled = 10, + FOselectionProb = 1, + FOinclusionProb = 1, + FOselectionMethod = "CENSUS", #M + FOunitName = FT_df$FTid, #M + FOselectionMethodCluster = "", + FOnumberTotalClusters = "", + FOnumberSampledClusters = "", + FOselectionProbCluster = "", + FOinclusionProbCluster = "", + FOsampled = "Y", #M + FOreasonNotSampled = "", + FOnonResponseCollected = "N", + FOauxiliaryVariableTotal = "", + FOauxiliaryVariableValue = "", + FOauxiliaryVariableName = "", + FOauxiliaryVariableUnit = "", + stringsAsFactors=FALSE +) + + +#===SS============ + + +SS_df<-data.frame( + SSid = FO_df$FOid, + LEid = "", + FOid = FO_df$FOid, + FTid = "", + OSid = "", + TEid = "", + SLid = "", + SSrecordType = "SS", #M + SSsequenceNumber = FO_df$FOid, #M + SSstratification = "N", #M + SSstratumName = "U", #M + SSclustering = "N", #M + SSclusterName = "U", #M + SSobservationActivityType = "Sort", #M + SScatchFraction = "Lan", #M + SSobservationType = "Volume", #M + SSsampler = "Observer", #M + SSspeciesListName = project_name_outputs, #M + SSuseForCalculateZero = "N", #M + SStimeTotal = "", + SStimeTotalDataBasis = "Measured", + SStimeSampled = "", + SSnumberTotal = 1, + SSnumberTotalDataBasis = "Measured", + SSnumberSampled = 1, + SSselectionProb = 1, + SSinclusionProb = 1, + SSselectionMethod = "CENSUS", + SSunitName = as.character(FO_df$FOid), + SSselectionMethodCluster = "", + SSnumberTotalClusters = "", + SSnumberSampledClusters = "", + SSselectionProbCluster = "", + SSinclusionProbCluster = "", + SSsampled = "Y", #M, + SSreasonNotSampled = "", + SSnonResponseCollected = "N", + SSauxiliaryVariableTotal = "", + SSauxiliaryVariableValue = "", + SSauxiliaryVariableName = "", + SSauxiliaryVariableUnit = "", + stringsAsFactors=FALSE +) + +#====SA=========== + +SA_df<-data.frame( +SAid = SS_df$SSid, +SSid = SS_df$SSid, +SArecordType = "SA", #M +SAsequenceNumber = SS_df$SSid, #M +SAparentSequenceNumber = "", +SAstratification = "N", #M +SAstratumName = "U", #M +SAspeciesCode = 107254, #M +SAspeciesCodeFAO = "NEP", +SAstateOfProcessing = "FRE", #M +SApresentation = "WHL", #M +SAspecimensState = "DeadOrZeroProbSurvival", #M +SAcatchCategory = "Lan", #M +SAlandingCategory = "", +SAcommSizeCatScale = "", +SAcommSizeCat = "", +SAsex = "U", #M +SAexclusiveEconomicZoneIndicator = "", +SAarea = "", +SArectangle = "", +SAfisheriesManagementUnit = "", +SAgsaSubarea = "NotApplicable", #M +SAjurisdictionArea = "", +SAgeographicalDataBasis = "Measured", +SAgeographicalSource = "", +SAnationalFishingActivity = "", +SAmetier5 = "", +SAmetier6 = "", +SAgear = "", +SAgearDataBasis = "Measured", +SAgearSource = "", +SAmeshSize = "", +SAselectionDevice = "", +SAselectionDeviceMeshSize = "", +SAunitType = "Box", #M +SAtotalWeightLive = "", +SAsampleWeightLive = "", +SAnumberTotal = 1, +SAnumberSampled = 1, +SAselectionProb = 1, +SAinclusionProb = 1, +SAselectionMethod = "CENSUS", #M +SAunitName = SS_df$SSid, #M +SAlowerHierarchy = "D", +SAsampler = "Observer", +SAsampled = ifelse(!is.na(df[[target_var]]),"Y","N"), #M +SAreasonNotSampled = "", +SAnonResponseCollected = "Y", +SAreasonNotSampledFM = "", +SAreasonNotSampledBV = "", +SAtotalWeightMeasured = ifelse(!is.na(df[[target_var]]),df[[target_var]]*targetMultiplyer,""), # +SAtotalWeightMeasuredDataBasis = "Measured", +SAsampleWeightMeasured = ifelse(!is.na(df[[target_var]]),df[[target_var]]*targetMultiplyer,""), # +SAconversionFactorMeasLive = 1, +SAauxiliaryVariableTotal = "", +SAauxiliaryVariableValue = "", +SAauxiliaryVariableName = "", +SAauxiliaryVariableUnit = "", +stringsAsFactors=FALSE +) + + + +#====Builds final format=========== + + +RDBESlist = list(DE = DE_df,SD = SD_df, VS = VS_df, FT = FT_df, FO = FO_df, SS = SS_df, SA = SA_df) + +#id table +a<-merge(DE_df["DEid"],SD_df[c("DEid","SDid")]) +a<-merge(a, VS_df[c("SDid","VSid")], all.x=T) +a<-merge(a, FT_df[c("VSid","FTid")], all.x=T) +a<-merge(a, FO_df[c("FTid","FOid")], all.x=T) +a<-merge(a, SS_df[c("FOid","SSid")], all.x=T) +a<-merge(a, SA_df[c("SSid","SAid")], all.x=T) + +# reorder columns +a<-a[c("DEid","SDid","VSid","FTid","FOid","SSid","SAid")] +# reorder rows +a<-data.table(a) +a<-a[order(DEid,SDid,VSid,FTid,FOid,SSid,SAid),] + +a$DEindex=apply(a[,1:which(colnames(a)=="DEid")],1,paste, collapse="_") +a$SDindex=apply(a[,1:which(colnames(a)=="SDid")],1,paste, collapse="_") +a$VSindex=apply(a[,1:which(colnames(a)=="VSid")],1,paste, collapse="_") +a$FTindex=apply(a[,1:which(colnames(a)=="FTid")],1,paste, collapse="_") +a$FOindex=apply(a[,1:which(colnames(a)=="FOid")],1,paste, collapse="_") +a$SSindex=apply(a[,1:which(colnames(a)=="SSid")],1,paste, collapse="_") +a$SAindex=apply(a[,1:which(colnames(a)=="SAid")],1,paste, collapse="_") + +key<-c(a$DEindex[match(DE_df$DEid,a$DEid)], +a$SDindex[match(SD_df$SDid,a$SDid)], +a$VSindex[match(VS_df$VSid,a$VSid)], +a$FTindex[match(FT_df$FTid,a$FTid)], +a$FOindex[match(FO_df$FOid,a$FOid)], +a$SSindex[match(SS_df$SSid,a$SSid)], +a$SAindex[match(SA_df$SAid,a$SAid)] +) + +# file production +Oldscipen<-.Options$scipen +options(scipen=500) + +#remove all id +for (i in names(RDBESlist)) +{ +RDBESlist[[i]][which(grepl(colnames(RDBESlist[[i]]),pat="[A-Z]id"))]<-NULL +} + +#===Save============ + + dir_outputs<-paste0(base_dir_outputs,"/", project_name_outputs,"/") + dir.create(dir_outputs, recursive=T, showWarnings=FALSE) + filename_output_CS <- paste0(dir_outputs,"H1.csv") + filename_output_SL <- paste0(dir_outputs,"HSL.csv") + filename_output_VD <- paste0(dir_outputs,"HVD.csv") + + +lapply(RDBESlist, function(x, filename1 = filename_output_CS){ +if("DErecordType" %in% colnames(x)){ + write.table(x, file = filename1, append = FALSE, quote = FALSE, sep = ",", + eol = "\n", na = "NA", dec = ".", row.names = FALSE, + col.names = FALSE, qmethod = c("escape", "double")) + } else { +write.table(x, file = filename1, append = TRUE, quote = FALSE, sep = ",", + eol = "\n", na = "NA", dec = ".", row.names = FALSE, + col.names = FALSE, qmethod = c("escape", "double")) + } +}) + +b<-read.table(file = filename_output_CS, header=F, sep=";") +b<-cbind(key,b) +b<-b[order(as.character(b$key), decreasing=FALSE),] +b<-b[!is.na(key),] +b$key<-NULL +b$V1<-as.character(b$V1) + +# saves CS output +write.table(b$V1, file = filename_output_CS, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + +# -----Builds and saves dummySL and dummyIS----------------- + + + SL_base$SLspeciesListName<-project_name_outputs + SL_base$SLyear<-DEyear + SL_base$SLinstitute<-SDinstitution + + # saves SL output + write.table(rbind(paste(SL_base, collapse=","), paste(IS_base, collapse=",")), file=filename_output_SL, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + +# -----Builds and saves dummyVD----------------- + +# saves VD output + VD_base$VDyear<-DEyear + write.table(VD_base, file=filename_output_VD, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - SD_df<-data.frame( - SDid=DE_df$DEid, - DEid=DE_df$DEid, - SDrecordType="SD", - SDcountry="ZW", - SDinstitution=as.integer(SDinstitution), - stringsAsFactors=FALSE - ) - - #===VS============ - - # x - # 1 VSid [M] - int - # 2 SDid [M] - int - # 3 VDid [M] - int - # 4 TEid [M/O] - int - # 5 VSrecordType [M] - string - # 6 VSsequenceNumber [M] - int - # 7 VSencryptedVesselCode [M] - StringLength100 - # 8 VSstratification [DV,M] - YesNoFields - # 9 VSstratumName [DV,M] - StringLength100 - # 10 VSclustering [DV,M] - Clustering - # 11 VSclusterName [DV,M] - StringLength100 - # 12 VSsampler [O] - Sampler - # 13 VSnumberTotal [DV,O] - int - # 14 VSnumberSampled [DV,O] - int - # 15 VSselectionProb [DV,O] - Decimal0.0000000000000001-1 - # 16 VSinclusionProb [DV,O] - Decimal0.0000000000000001-1 - # 17 VSselectionMethod [DV,M] - SelectionMethod - # 18 VSunitName [DV,M] - StringLength100 - # 19 VSselectionMethodCluster [DV,O] - SelectionMethod - # 20 VSnumberTotalClusters [DV,O] - int - # 21 VSnumberSampledClusters [DV,O] - int - # 22 VSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 - # 23 VSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 - # 24 VSsampled [DV,M] - YesNoFields - # 25 VSreasonNotSampled [DV,O] - ReasonForNotSampling - - - # The PSU will be Vessels and the SSU will be Trips - df_vessel <- df[!duplicated(df[c("clutch")]),] - # adds VSid to df - df_vessel$VSid <- 1:nrow(df_vessel) - - # creates a dummyVD and adds df - # restricts VD_base to what is needed - VD_base <- VD_base[1:nrow(df_vessel),] - - df_vessel$VSencryptedVesselCode<-VD_base$VDencryptedVesselCode - # should be 0 - test<-sum(duplicated(df_vessel$VSencryptedVesselCode))==0 - if(!test) stop( "duplicated VSencryptedVesselCode") - - - VS_df <- data.frame( - VSid = df_vessel$VSid, - SDid = as.integer(1), - VDid = "", - TEid = "", - VSrecordType = 'VS', - VSsequenceNumber = 1:nrow(df_vessel),# M - VSencryptedVesselCode = df_vessel$VSencryptedVesselCode, #M - VSstratification = "Y", - VSstratumName = "U", #M - VSclustering = "N", #M - VSclusterName = "U", #M - VSsampler = "Observer", #M - VSnumberTotal = "", - VSnumberSampled = nrow(df_vessel), - VSselectionProb = "", - VSinclusionProb = "", - VSselectionMethod = "SRSWOR", #M - VSunitName = paste0("clutch_",df_vessel$clutch),#M - VSselectionMethodCluster = "", - VSnumberTotalClusters = "", - VSnumberSampledClusters = "", - VSselectionProbCluster = "", - VSinclusionProbCluster = "", - VSsampled = "Y",#M - VSreasonNotSampled = "", - VSnonResponseCollected = "", - VSauxiliaryVariableTotal = "", - VSauxiliaryVariableValue = "", - VSauxiliaryVariableName = "", - VSauxiliaryVariableUnit = "", - stringsAsFactors=FALSE - ) - - - #===FT============ - - - # 1 FTid [M] - int - # 2 OSid [M/O] - int - # 3 VSid [M/O] - int - # 4 VDid [M] - int - # 5 SDid [M/O] - int - # 6 FOid [M/O] - int - # 7 TEid [M/O] - int - # 8 FTrecordType [M] - string - # 9 FTencryptedVesselCode [M] - StringLength100 - # 10 FTsequenceNumber [M] - StringLength100 - # 11 FTstratification [DV,M] - YesNoFields - # 12 FTstratumName [DV,M] - StringLength100 - # 13 FTclustering [DV,M] - Clustering - # 14 FTclusterName [DV,M] - StringLength100 - # 15 FTsampler [O] - Sampler - # 16 FTsamplingType [M] - SamplingContext - # 17 FTnumberOfHaulsOrSets [M/O] - IntZeroToMax - # 18 FTdepartureLocation [M/O] - Harbour_LOCODE - # 19 FTdepartureDate [M/O] - Date - # 20 FTdepartureTime [M/O] - Time - # 21 FTarrivalLocation [M] - Harbour_LOCODE - # 22 FTarrivalDate [M] - Date - # 23 FTarrivalTime [M/O] - Time - # 24 FTnumberTotal [DV,O] - int - # 25 FTnumberSampled [DV,O] - int - # 26 FTselectionProb [DV,O] - Decimal0.0000000000000001-1 - # 27 FTinclusionProb [DV,O] - Decimal0.0000000000000001-1 - # 28 FTselectionMethod [DV,M] - SelectionMethod - # 29 FTunitName [DV,M] - StringLength100 - # 30 FTselectionMethodCluster [DV,O] - SelectionMethod - # 31 FTnumberTotalClusters [DV,O] - int - # 32 FTnumberSampledClusters [DV,O] - int - # 33 FTselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 - # 34 FTinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 - # 35 FTsampled [DV,M] - YesNoFields - # 36 FTreasonNotSampled [DV,O] - ReasonForNotSampling - - makeChildTbl <- function(parent, data, tbl="FT", by = "dname"){ - - if(tbl !="FT"){stop("Not implemented!")} - mergeCols <- c(setdiff(colnames(parent), colnames(data)), by) - df <- merge(data, parent[mergeCols], by=by) - sc <- table(df[[by]]) - df$sampled_count <- as.vector(sc[df[[by]]]) - FT_df <- data.frame( - FTid = 1:nrow(df),#[M] - int - OSid = "",# [M/O] - int - VSid = df$VSid, #[M/O] - VDid = "", #[M] - int - SDid = "", #[M/O] - int - FOid = "", #[M/O] - int - TEid = "", #[M/O] - int - FTrecordType='FT', #[M] - string - FTencryptedVesselCode = df$VSencryptedVesselCode, #[M] - FTsequenceNumber = as.integer(1:nrow(df)), #[M] - string - FTstratification = "N", #[DV,M] - RS_Stratification - FTstratumName = "U", #[DV,M] - string - FTclustering = "N", #[DV,M] - RS_Clustering - FTclusterName = "U", #[DV,M] - string - FTsampler = "Observer", #[M] - RS_Sampler - FTsamplingType = "AtSea" , #[M] - RS_SamplingType - FTnumberOfHaulsOrSets = 1, #[O] - int - FTdepartureLocation="ZWHWN", #[O] - Harbour_LOCODE - FTdepartureDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(df)), #[M/O] - date - FTdepartureTime="", #[O] - time - FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE - FTarrivalDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(df)), #[M] - date - FTarrivalTime="", #[O] - time - FTnumberTotal= as.vector(df$csize), #[DV,O] - int - FTnumberSampled=df$sampled_count, #[DV,O] - int - FTselectionProb="", #[DV,O] - DecimalPrec10 - FTinclusionProb="", #[DV,O] - DecimalPrec10 - FTselectionMethod="SRSWOR", #[DV,M] - RS_SelectionMethod - FTunitName = df$VSid, #[DV,M] - string - FTselectionMethodCluster="", #[DV,O] - RS_SelectionMethod - FTnumberTotalClusters="", #[DV,O] - int - FTnumberSampledClusters="", #[DV,O] - int - FTselectionProbCluster="", #[DV,O] - DecimalPrec10 - FTinclusionProbCluster="", #[DV,O] - DecimalPrec10 - FTsampled="Y", #[DV,M] - YesNoFields - FTreasonNotSampled= "", #[DV,O] - RS_ReasonForNotSampling, - FTnonResponseCollected = factor("N", levels = c('N', 'Y')), - FTauxiliaryVariableTotal = "", - FTauxiliaryVariableValue = "", - FTauxiliaryVariableName = "", - FTauxiliaryVariableUnit = "", - stringsAsFactors=FALSE) - if(any(FT_df$FTnumberTotal < FT_df$FTnumberSampled)){ - stop("Sampled is more than total") - } - FT_df - } - - - FT_df <- makeChildTbl(df_vessel, df, by="clutch") - - - #====FO=========== - - - # 1 FOid [M] - int - # 2 FTid [M/O] - int - # 3 SDid [M/O] - int - # 4 FOrecordType [M] - string - # 5 FOstratification [DV,M] - YesNoFields - # 6 FOsequenceNumber [M] - int - # 7 FOstratumName [DV,M] - StringLength100 - # 8 FOclustering [DV,M] - Clustering - # 9 FOclusterName [DV,M] - StringLength100 - # 10 FOsampler [O] - Sampler - # 11 FOaggregationLevel [M] - AggregationLevel - # 12 FOvalidity [M] - ValidityFlag - # 13 FOcatchReg [M] - CatchRegistration - # 14 FOstartDate [O] - Date - # 15 FOstartTime [O] - Time - # 16 FOendDate [M] - Date - # 17 FOendTime [M/O] - Time - # 18 FOduration [M/O] - int - # 19 FOdurationSource [M] - DurationSource - # 20 FOhandlingTime [O] - int - # 21 FOstartLat [O] - Decimal-90.000000-90.000000 - # 22 FOstartLon [O] - Decimal-180.000000-180.000000 - # 23 FOstopLat [O] - Decimal-90.000000-90.000000 - # 24 FOstopLon [O] - Decimal-180.000000-180.000000 - # 25 FOexclusiveEconomicZoneIndicator [O] - ISO_3166 - # 26 FOarea [M] - ICES_Area - # 27 FOrectangle [O] - StatRec - # 28 FOgsaSubarea [M] - Areas_GFCM_GSA - # 29 FOjurisdictionArea [O] - JurisdictionArea - # 30 FOfishingDepth [O] - int - # 31 FOwaterDepth [O] - int - # 32 FOnationalFishingActivity [O] - NationalFishingActivity - # 33 FOmetier5 [O] - Metier5_FishingActivity - # 34 FOmetier6 [M] - Metier6_FishingActivity - # 35 FOgear [M] - GearType - # 36 FOmeshSize [O] - int - # 37 FOselectionDevice [O] - SelectionDevice - # 38 FOselectionDeviceMeshSize [O] - int - # 39 FOtargetSpecies [O] - TargetSpecies - # 40 FOincidentalByCatchMitigationDeviceFirst [M] - BycatchMitigationDevice - # 41 FOincidentalByCatchMitigationDeviceTargetFirst [M] - BycatchMitigationDeviceTarget - # 42 FOincidentalByCatchMitigationDeviceSecond [M] - BycatchMitigationDevice - # 43 FOincidentalByCatchMitigationDeviceTargetSecond [M] - BycatchMitigationDeviceTarget - # 44 FOgearDimensions [O] - int - # 45 FOobservationCode [M] - ObservationCode - # 46 FOnumberTotal [DV,O] - int - # 47 FOnumberSampled [DV,O] - int - # 48 FOselectionProb [DV,O] - Decimal0.0000000000000001-1 - # 49 FOinclusionProb [DV,O] - Decimal0.0000000000000001-1 - # 50 FOselectionMethod [DV,M] - SelectionMethod - # 51 FOunitName [DV,M] - StringLength100 - # 52 FOselectionMethodCluster [DV,O] - SelectionMethod - # 53 FOnumberTotalClusters [DV,O] - int - # 54 FOnumberSampledClusters [DV,O] - int - # 55 FOselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 - # 56 FOinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 - # 57 FOsampled [DV,M] - YesNoFields - # 58 FOreasonNotSampled [DV,O] - ReasonForNotSampling - - - FO_df <- data.frame( - FOid = FT_df$FTid, - FTid = FT_df$FTid, - SDid = "", - FOrecordType = "FO",#M - FOstratification = "N",#M - FOsequenceNumber = as.integer(1:nrow(FT_df)),#M - FOstratumName = "U",#M - FOclustering = "N",#M - FOclusterName = "U",#M - FOsampler = "Observer", - FOaggregationLevel = "H",#M - FOvalidity = "V",#M - FOcatchReg = "Lan",#M - FOstartDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'), - FOstartTime = "", - FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M - FOendTime ="",#M - FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! - FOdurationSource = "Crew", - FOhandlingTime = "", - FOstartLat="", # ATT! - FOstartLon="", # ATT! - FOstopLat="", - FOstopLon="", - FOexclusiveEconomicZoneIndicator = "", # - FOarea = "27.3.a.21", #M - FOrectangle = "", - FOfisheriesManagementUnit = "", - FOgsaSubarea = "NotApplicable", #M - FOjurisdictionArea = "", - FOfishingDepth = "", - FOwaterDepth = "", - FOnationalFishingActivity = "", - FOmetier5 = "", - FOmetier6 = "OTT_CRU_70-89_2_35",#M - FOgear = "OTT",#M - FOmeshSize = "", - FOselectionDevice = "", - FOselectionDeviceMeshSize = "", - FOtargetSpecies = "", - FOincidentalByCatchMitigationDeviceFirst = "NotRecorded",#M - FOincidentalByCatchMitigationDeviceTargetFirst = "NotApplicable",# [M] - FOincidentalByCatchMitigationDeviceSecond = "NotRecorded",#M - FOincidentalByCatchMitigationDeviceTargetSecond = "NotApplicable",#M - FOgearDimensions = "", - FOobservationCode = 'So', #M - FOnumberTotal = 10, - FOnumberSampled = 10, - FOselectionProb = 1, - FOinclusionProb = 1, - FOselectionMethod = "CENSUS", #M - FOunitName = FT_df$FTid, #M - FOselectionMethodCluster = "", - FOnumberTotalClusters = "", - FOnumberSampledClusters = "", - FOselectionProbCluster = "", - FOinclusionProbCluster = "", - FOsampled = "Y", #M - FOreasonNotSampled = "", - FOnonResponseCollected = factor("N", levels = c('N', 'Y')), - FOfisheriesManagementUnit = "", - FOauxiliaryVariableTotal = "", - FOauxiliaryVariableValue = "", - FOauxiliaryVariableName = "", - FOauxiliaryVariableUnit = "", - stringsAsFactors=FALSE - ) - - - - # 1 SSid [] - int - # 2 LEid [M/O] - int - # 3 FOid [M/O] - int - # 4 FTid [M/O] - int - # 5 OSid [M/O] - int - # 6 TEid [M/O] - int - # 7 SLid [M] - int - # 8 SSrecordType [M] - string - # 9 SSsequenceNumber [M] - int - # 10 SSstratification [DV,M] - YesNoFields - # 11 SSstratumName [DV,M] - StringLength100 - # 12 SSclustering [DV,M] - Clustering - # 13 SSclusterName [DV,M] - StringLength100 - # 14 SSobservationActivityType [DV,M] - ObservationActivityType - # 15 SScatchFraction [M] - CatchFraction - # 16 SSobservationType [DV,M] - ObservationMethod - # 17 SSsampler [O] - Sampler - # 18 SSspeciesListName [M] - StringLength100 - # 19 SSuseForCalculateZero [M] - YesNoFields - # 20 SStimeTotal [O] - int - # 21 SStimeSampled [O] - int - # 22 SSnumberTotal [DV,O] - int - # 23 SSnumberSampled [DV,O] - int - # 24 SSselectionProb [DV,O] - Decimal0.0000000000000001-1 - # 25 SSinclusionProb [DV,O] - Decimal0.0000000000000001-1 - # 26 SSselectionMethod [DV,M] - SelectionMethod - # 27 SSunitName [DV,M] - StringLength100 - # 28 SSselectionMethodCluster [DV,O] - SelectionMethod - # 29 SSnumberTotalClusters [DV,O] - int - # 30 SSnumberSampledClusters [DV,O] - int - # 31 SSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 - # 32 SSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 - # 33 SSsampled [DV,M] - YesNoFields - # 34 SSreasonNotSampled [DV,O] - ReasonForNotSampling - - #====SS=========== - - SS_df<-data.frame( - SSid = FO_df$FOid, - LEid = "", - FOid = FO_df$FOid, - FTid = "", - OSid = "", - TEid = "", - SLid = "", - SSrecordType = "SS", #M - SSsequenceNumber = FO_df$FOid, #M - SSstratification = "N", #M - SSstratumName = "U", #M - SSclustering = "N", #M - SSclusterName = "U", #M - SSobservationActivityType = "Sort", #M - SScatchFraction = "Lan", #M - SSobservationType = "Volume", #M - SSsampler = "Observer", #M - SSspeciesListName = project_name_outputs, #M - SSuseForCalculateZero = "N", #M - SStimeTotal = "", - SStimeSampled = "", - SSnumberTotal = 1, - SSnumberSampled = 1, - SSselectionProb = 1, - SSinclusionProb = 1, - SSselectionMethod = "CENSUS", - SSunitName = as.character(FO_df$FOid), - SSselectionMethodCluster = "", - SSnumberTotalClusters = "", - SSnumberSampledClusters = "", - SSselectionProbCluster = "", - SSinclusionProbCluster = "", - SSsampled = "Y", #M, - SSreasonNotSampled = "", - SSnonResponseCollected = "N", - SSauxiliaryVariableTotal = "", - SSauxiliaryVariableValue = "", - SSauxiliaryVariableName = "", - SSauxiliaryVariableUnit = "", - stringsAsFactors = F - ) - - #====SA=========== - - - # 1 SAid [] - int - # 2 SSid [] - int - # 3 SArecordType [M] - string - # 4 SAsequenceNumber [M] - int - # 5 SAparentSequenceNumber [O] - int - # 6 SAstratification [DV,M] - YesNoFields - # 7 SAstratumName [DV,M] - StringLength100 - # 8 SAspeciesCode [M] - SpecWoRMS - # 9 SAspeciesCodeFAO [O] - SpecASFIS - # 10 SAstateOfProcessing [M] - StateOfProcessing - # 11 SApresentation [M] - ProductPresentation - # 12 SAspecimensState [M] - SpecimensState - # 13 SAcatchCategory [M] - CatchCategory - # 14 SAlandingCategory [O] - LandingCategory - # 15 SAcommSizeCatScale [O] - CommercialSizeCategoryScale - # 16 SAcommSizeCat [O] - CommercialSizeCategory - # 17 SAsex [M] - SEXCO - # 18 SAexclusiveEconomicZoneIndicator [O] - ISO_3166 - # 19 SAarea [O] - ICES_Area - # 20 SArectangle [O] - StatRec - # 21 SAgsaSubarea [M] - Areas_GFCM_GSA - # 22 SAjurisdictionArea [O] - JurisdictionArea - # 23 SAnationalFishingActivity [O] - NationalFishingActivity - # 24 SAmetier5 [O] - Metier5_FishingActivity - # 25 SAmetier6 [O] - Metier6_FishingActivity - # 26 SAgear [O] - GearType - # 27 SAmeshSize [O] - int - # 28 SAselectionDevice [O] - SelectionDevice - # 29 SAselectionDeviceMeshSize [O] - int - # 30 SAunitType [M] - SamplingUnit - # 31 SAtotalWeightLive [O] - int - # 32 SAsampleWeightLive [O] - int - # 33 SAnumberTotal [DV,O] - Decimal0.1-999999999 - # 34 SAnumberSampled [DV,O] - Decimal0.1-999999999 - # 35 SAselectionProb [DV,O] - Decimal0.0000000000000001-1 - # 36 SAinclusionProb [DV,O] - Decimal0.0000000000000001-1 - # 37 SAselectionMethod [DV,M] - SelectionMethod - # 38 SAunitName [DV,M] - StringLength100 - # 39 SAlowerHierarchy [M/O] - LowerHierarchy - # 40 SAsampler [O] - Sampler - # 41 SAsampled [DV,M] - YesNoFields - # 42 SAreasonNotSampledFM [DV,O] - ReasonForNotSampling - # 43 SAreasonNotSampledBV [DV,O] - ReasonForNotSampling - # 44 SAtotalWeightMeasured [O] - int - # 45 SAsampleWeightMeasured [O] - int - # 46 SAconversionFactorMeasLive [O] - Decimal0.900-10 - - SA_df<-data.frame( - SAid = SS_df$SSid, - SSid = SS_df$SSid, - SArecordType = "SA", #M - SAsequenceNumber = SS_df$SSid, #M - SAparentSequenceNumber = "", - SAstratification = "N", #M - SAstratumName = "U", #M - SAspeciesCode = 107254, #M - SAspeciesCodeFAO = "NEP", - SAstateOfProcessing = "FRE", #M - SApresentation = "WHL", #M - SAspecimensState = "DeadOrZeroProbSurvival", #M - SAcatchCategory = "Lan", #M - SAlandingCategory = "", - SAcommSizeCatScale = "", - SAcommSizeCat = "", - SAsex = "U", #M - SAexclusiveEconomicZoneIndicator = "", - SAarea = "", - SArectangle = "", - SAfisheriesManagementUnit = "", - SAgsaSubarea = "NotApplicable", #M - SAjurisdictionArea = "", - SAnationalFishingActivity = "", - SAmetier5 = "", - SAmetier6 = "", - SAgear = "", - SAmeshSize = "", - SAselectionDevice = "", - SAselectionDeviceMeshSize = "", - SAunitType = "Box", #M - SAtotalWeightLive = "", - SAsampleWeightLive = "", - SAnumberTotal = 1, - SAnumberSampled = 1, - SAselectionProb = 1, - SAinclusionProb = 1, - SAselectionMethod = "CENSUS", #M - SAunitName = SS_df$SSid, #M - SAlowerHierarchy = "D", - SAsampler = "Observer", - SAsampled = ifelse(!is.na(df[[target_var]]),"Y","N"), #M - SAreasonNotSampled = "", - SAnonResponseCollected = "Y", - SAreasonNotSampledFM = "", - SAreasonNotSampledBV = "", - SAtotalWeightMeasured = ifelse(!is.na(df[[target_var]]),df[[target_var]]*targetMultiplyer,""), # - SAsampleWeightMeasured = ifelse(!is.na(df[[target_var]]),df[[target_var]]*targetMultiplyer,""), # - SAconversionFactorMeasLive = 1, - SAauxiliaryVariableTotal = "", - SAauxiliaryVariableValue = "", - SAauxiliaryVariableName = "", - SAauxiliaryVariableUnit = "", - stringsAsFactors=FALSE - ) - - - - #====Builds final format=========== - - RDBESlist <- list(DE = DE_df, - SD = SD_df, - VS = VS_df, - FT = FT_df, - FO = FO_df, - SS = SS_df, - SA = SA_df) - - #id table - a<-merge(DE_df["DEid"],SD_df[c("DEid","SDid")]) - a<-merge(a, VS_df[c("SDid","VSid")], all.x=T) - a<-merge(a, FT_df[c("VSid","FTid")], all.x=T) - a<-merge(a, FO_df[c("FTid","FOid")], all.x=T) - a<-merge(a, SS_df[c("FOid","SSid")], all.x=T) - a<-merge(a, SA_df[c("SSid","SAid")], all.x=T) - - # reorder columns - a<-a[c("DEid","SDid","VSid","FTid","FOid","SSid","SAid")] - # reorder rows - a<-data.table(a) - a<-a[order(DEid,SDid,VSid,FTid,FOid,SSid,SAid),] - - a$DEindex=apply(a[,1:which(colnames(a)=="DEid")],1,paste, collapse="_") - a$SDindex=apply(a[,1:which(colnames(a)=="SDid")],1,paste, collapse="_") - a$VSindex=apply(a[,1:which(colnames(a)=="VSid")],1,paste, collapse="_") - a$FTindex=apply(a[,1:which(colnames(a)=="FTid")],1,paste, collapse="_") - a$FOindex=apply(a[,1:which(colnames(a)=="FOid")],1,paste, collapse="_") - a$SSindex=apply(a[,1:which(colnames(a)=="SSid")],1,paste, collapse="_") - a$SAindex=apply(a[,1:which(colnames(a)=="SAid")],1,paste, collapse="_") - - key<-c(a$DEindex[match(DE_df$DEid,a$DEid)], - a$SDindex[match(SD_df$SDid,a$SDid)], - a$VSindex[match(VS_df$VSid,a$VSid)], - a$FTindex[match(FT_df$FTid,a$FTid)], - a$FOindex[match(FO_df$FOid,a$FOid)], - a$SSindex[match(SS_df$SSid,a$SSid)], - a$SAindex[match(SA_df$SAid,a$SAid)] - ) - - # file production - Oldscipen<-.Options$scipen - options(scipen=500) - - #remove all id - for (i in names(RDBESlist)) - { - RDBESlist[[i]][which(grepl(colnames(RDBESlist[[i]]),pat="[A-Z]id"))]<-NULL - } - - #===Save============= - - dir_outputs<-paste0(base_dir_outputs, - project_name_outputs,"/") - dir.create(dir_outputs, recursive=T, showWarnings=FALSE) - filename_output_CS <- paste0("H1.csv") - filename_output_SL <- paste0("HSL.csv") - filename_output_VD <- paste0("HVD.csv") - - - - lapply(RDBESlist, function(x, filename1 = paste0(dir_outputs,filename_output_CS)){ - if("DErecordType" %in% colnames(x)){ - write.table(x, file = filename1, append = FALSE, quote = FALSE, sep = ",", - eol = "\n", na = "NA", dec = ".", row.names = FALSE, - col.names = FALSE, qmethod = c("escape", "double")) - } else { - write.table(x, file = filename1, append = TRUE, quote = FALSE, sep = ",", - eol = "\n", na = "NA", dec = ".", row.names = FALSE, - col.names = FALSE, qmethod = c("escape", "double")) - } - }) - - b<-read.table(file=paste0(dir_outputs,filename_output_CS), header=F, sep=";") - b<-cbind(key,b) - b<-b[order(as.character(b$key), decreasing=FALSE),] - b<-b[!is.na(key),] - b$key<-NULL - b$V1<-as.character(b$V1) - - # saves CS output - write.table(b$V1, file=paste0(dir_outputs,filename_output_CS), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - - #---- Builds and saves dummySL ------------ - - SL_base$SLspeciesListName<-project_name_outputs - SL_base$SLyear<-DEyear - SL_base$SLinstitute<-SDinstitution - # saves SL output - write.table(SL_base, file=paste0(dir_outputs,filename_output_SL), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - - - #-------Builds and saves dummyVD--------------- - - - # saves VD output - - write.table(VD_base, file=paste0(dir_outputs,filename_output_VD), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_gpa_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_gpa_H1.R index 417116c2..231036eb 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_gpa_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_gpa_H1.R @@ -18,7 +18,6 @@ # in SA each gpa score is a SAsampleWeightMeasured # ATT gpa scores are *100 to meet type requirement (integer) - rm(list=ls()) library(data.table) @@ -41,10 +40,12 @@ baseDir <- "" VD_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/VD_base.rds")) SL_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/SL_base.rds")) + IS_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/IS_base.rds")) #nameof the directory where the outputs are saved currently + base_dir_outputs <- paste0(baseDir,"BuiltUploads") - if(!file.exists(base_dir_outputs)) dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) + dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) #========Outline of Hierarchy 1================ @@ -64,21 +65,6 @@ -# 1 DEid [] - int -# 2 DErecordType [M] - string -# 3 DEsamplingScheme [M] - SamplingScheme -# 4 DEsamplingSchemeType [M] - SamplingSchemeType -# 5 DEyear [M] - Year -# 6 DEstratumName [M] - StringLength100 -# 7 DEhierarchyCorrect [M] - YesNoFields -# 8 DEhierarchy [M] - RDBESUpperHierarchy -# 9 DEsampled [DV,M] - YesNoFields -# 10 DEreasonNotSampled [DV,O] - ReasonForNotSampling -# 11 DEnonResponseCollected [DV,O] - YesNoFields -# 12 DEauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 13 DEauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 14 DEauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 15 DEauxiliaryVariableUnit[DV,O]-MUNIT DE_df_base<-expand.grid(DEyear=DEyear, DEstratumName="U",stringsAsFactors=F) @@ -98,20 +84,13 @@ DE_df<-data.frame( DEauxiliaryVariableTotal = "", DEauxiliaryVariableValue = "", DEauxiliaryVariableName = "", - DEauxiliaryVariableUnit = "", + DEauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) #===SD============ - # x -# 1 SDid [M] - int -# 2 DEid [M] - int -# 3 SDrecordType [M] - string -# 4 SDcountry [M] - ISO_3166 -# 5 SDinstitution [M] - EDMO - SD_df<-data.frame( SDid=DE_df$DEid, DEid=DE_df$DEid, @@ -124,37 +103,6 @@ SD_df<-data.frame( #===VS============ - # x -# 1 VSid [M] - int -# 2 SDid [M] - int -# 3 VDid [M] - int -# 4 TEid [M/O] - int -# 5 VSrecordType [M] - string -# 6 VSsequenceNumber [M] - int -# 7 VSencryptedVesselCode [M] - StringLength100 -# 8 VSstratification [DV,M] - YesNoFields -# 9 VSstratumName [DV,M] - StringLength100 -# 10 VSclustering [DV,M] - Clustering -# 11 VSclusterName [DV,M] - StringLength100 -# 12 VSsampler [O] - Sampler -# 13 VSnumberTotal [DV,O] - int -# 14 VSnumberSampled [DV,O] - int -# 15 VSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 16 VSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 17 VSselectionMethod [DV,M] - SelectionMethod -# 18 VSunitName [DV,M] - StringLength100 -# 19 VSselectionMethodCluster [DV,O] - SelectionMethod -# 20 VSnumberTotalClusters [DV,O] - int -# 21 VSnumberSampledClusters [DV,O] - int -# 22 VSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 23 VSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 24 VSsampled [DV,M] - YesNoFields -# 25 VSreasonNotSampled [DV,O] - ReasonForNotSampling -# 26 VSnonResponseCollected [DV,O] - YesNoFields -# 27 VSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 28 VSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 29 VSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 30 VSauxiliaryVariableUnit [DV,O] - MUNIT #check_All_fields("VS") @@ -191,7 +139,7 @@ VS_df <- data.frame( VSselectionMethod = "SRSWOR", #M VSunitName = dataset$VSid,#M VSselectionMethodCluster = "SRSWOR", - VSnumberTotalClusters = "", + VSnumberTotalClusters = "", VSnumberSampledClusters = "", VSselectionProbCluster = "", VSinclusionProbCluster = "", @@ -201,7 +149,7 @@ VS_df <- data.frame( VSauxiliaryVariableTotal = "", VSauxiliaryVariableValue = "", VSauxiliaryVariableName = "", - VSauxiliaryVariableUnit = "", + VSauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) @@ -223,49 +171,6 @@ VS_df$VSinclusionProbCluster<-5/100 #====FT=========== - -# 1 FTid [M] - int -# 2 OSid [M/O] - int -# 3 VSid [M/O] - int -# 4 VDid [M] - int -# 5 SDid [M/O] - int -# 6 FOid [M/O] - int -# 7 TEid [M/O] - int -# 8 FTrecordType [M] - string -# 9 FTencryptedVesselCode [M] - StringLength100 -# 10 FTsequenceNumber [M] - int -# 11 FTstratification [DV,M] - YesNoFields -# 12 FTstratumName [DV,M] - StringLength100 -# 13 FTclustering [DV,M] - Clustering -# 14 FTclusterName [DV,M] - StringLength100 -# 15 FTsampler [O] - Sampler -# 16 FTsamplingType [M] - SamplingContext -# 17 FTnumberOfHaulsOrSets [M/O] - IntZeroToMax -# 18 FTdepartureLocation [M/O] - Harbour_LOCODE -# 19 FTdepartureDate [M/O] - Date -# 20 FTdepartureTime [M/O] - StringLength60 -# 21 FTarrivalLocation [M] - Harbour_LOCODE -# 22 FTarrivalDate [M] - Date -# 23 FTarrivalTime [M/O] - StringLength60 -# 24 FTnumberTotal [DV,O] - int -# 25 FTnumberSampled [DV,O] - int -# 26 FTselectionProb [DV,O] - Decimal0-1 -# 27 FTinclusionProb [DV,O] - Decimal0-1 -# 28 FTselectionMethod [DV,M] - SelectionMethod -# 29 FTunitName [DV,M] - StringLength100 -# 30 FTselectionMethodCluster [DV,O] - SelectionMethod -# 31 FTnumberTotalClusters [DV,O] - int -# 32 FTnumberSampledClusters [DV,O] - int -# 33 FTselectionProbCluster [DV,O] - Decimal0-1 -# 34 FTinclusionProbCluster [DV,O] - Decimal0-1 -# 35 FTsampled [DV,M] - YesNoFields -# 36 FTreasonNotSampled [DV,O] - ReasonForNotSampling -# 37 FTnonResponseCollected [DV,O] - YesNoFields -# 38 FTauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 39 FTauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 40 FTauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 41 FTauxiliaryVariableUnit [DV,O] - MUNIT - FT_df <- data.frame( FTid = dataset$VSid,#[M] - int OSid = "",# [M/O] - int @@ -290,6 +195,7 @@ FT_df <- data.frame( FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE FTarrivalDate=seq(from = as.Date("1965-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date FTarrivalTime="", #[O] - time + FTdominantLandingDate= "", FTnumberTotal= 1, #[DV,O] - int FTnumberSampled=1, #[DV,O] - int FTselectionProb=1, #[DV,O] - DecimalPrec10 @@ -307,7 +213,7 @@ FT_df <- data.frame( FTauxiliaryVariableTotal = "", FTauxiliaryVariableValue = "", FTauxiliaryVariableName = "", - FTauxiliaryVariableUnit = "", + FTauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) @@ -315,72 +221,6 @@ FT_df <- data.frame( #====FO=========== -# 1 FOid [M] - int -# 2 FTid [M/O] - int -# 3 SDid [M/O] - int -# 4 FOrecordType [M] - string -# 5 FOstratification [DV,M] - YesNoFields -# 6 FOsequenceNumber [M] - int -# 7 FOstratumName [DV,M] - StringLength100 -# 8 FOclustering [DV,M] - Clustering -# 9 FOclusterName [DV,M] - StringLength100 -# 10 FOsampler [O] - Sampler -# 11 FOaggregationLevel [M] - AggregationLevel -# 12 FOvalidity [M] - ValidityFlag -# 13 FOcatchReg [M] - CatchRegistration -# 14 FOstartDate [O] - Date -# 15 FOstartTime [O] - Time -# 16 FOendDate [M] - Date -# 17 FOendTime [M/O] - Time -# 18 FOduration [M/O] - int -# 19 FOdurationSource [M] - DurationSource -# 20 FOhandlingTime [O] - int -# 21 FOstartLat [O] - Decimal-90.000000-90.000000 -# 22 FOstartLon [O] - Decimal-180.000000-180.000000 -# 23 FOstopLat [O] - Decimal-90.000000-90.000000 -# 24 FOstopLon [O] - Decimal-180.000000-180.000000 -# 25 FOexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 26 FOarea [M] - ICES_Area -# 27 FOrectangle [O] - StatRec -# 28 FOfisheriesManagementUnit [O] - AreaNonFAO -# 29 FOgsaSubarea [M] - Areas_GFCM_GSA -# 30 FOjurisdictionArea [O] - JurisdictionArea -# 31 FOfishingDepth [O] - int -# 32 FOwaterDepth [O] - int -# 33 FOnationalFishingActivity [O] - NationalFishingActivity -# 34 FOmetier5 [O] - Metier5_FishingActivity -# 35 FOmetier6 [M] - Metier6_FishingActivity -# 36 FOgear [M] - GearType -# 37 FOmeshSize [O] - int -# 38 FOselectionDevice [O] - SelectionDevice -# 39 FOselectionDeviceMeshSize [O] - int -# 40 FOtargetSpecies [O] - TargetSpecies -# 41 FOincidentalByCatchMitigationDeviceFirst [M] - BycatchMitigationDevice -# 42 FOincidentalByCatchMitigationDeviceTargetFirst [M] - BycatchMitigationDeviceTarget -# 43 FOincidentalByCatchMitigationDeviceSecond [M] - BycatchMitigationDevice -# 44 FOincidentalByCatchMitigationDeviceTargetSecond [M] - BycatchMitigationDeviceTarget -# 45 FOgearDimensions [O] - int -# 46 FOobservationCode [M] - ObservationCode -# 47 FOnumberTotal [DV,O] - int -# 48 FOnumberSampled [DV,O] - int -# 49 FOselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 50 FOinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 51 FOselectionMethod [DV,M] - SelectionMethod -# 52 FOunitName [DV,M] - StringLength100 -# 53 FOselectionMethodCluster [DV,O] - SelectionMethod -# 54 FOnumberTotalClusters [DV,O] - int -# 55 FOnumberSampledClusters [DV,O] - int -# 56 FOselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 57 FOinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 58 FOsampled [DV,M] - YesNoFields -# 59 FOreasonNotSampled [DV,O] - ReasonForNotSampling -# 60 FOnonResponseCollected [DV,O] - YesNoFields -# 61 FOauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 62 FOauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 63 FOauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 64 FOauxiliaryVariableUnit [DV,O] - MUNIT - - FO_df <- data.frame( FOid = dataset$VSid, FTid = dataset$VSid, @@ -400,24 +240,29 @@ FO_df <- data.frame( FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M FOendTime ="",#M FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! + FOfishingDurationDataBasis = "Measured", FOdurationSource = "Crew", FOhandlingTime = "", FOstartLat="", # ATT! FOstartLon="", # ATT! FOstopLat="", FOstopLon="", - FOexclusiveEconomicZoneIndicator = "", # + FOexclusiveEconomicZoneIndicator = "", # FOarea = "27.3.a.21", #M FOrectangle = "", FOfisheriesManagementUnit = "", FOgsaSubarea = "NotApplicable", #M FOjurisdictionArea = "", + FOgeographicalDataBasis = "Measured", + FOgeographicalSource = "Crew", FOfishingDepth = "", FOwaterDepth = "", FOnationalFishingActivity = "", FOmetier5 = "", FOmetier6 = "OTT_CRU_70-89_2_35",#M FOgear = "OTT",#M + FOgearDataBasis = "Measured", + FOgearSource = "", FOmeshSize = "", FOselectionDevice = "", FOselectionDeviceMeshSize = "", @@ -445,52 +290,13 @@ FO_df <- data.frame( FOauxiliaryVariableTotal = "", FOauxiliaryVariableValue = "", FOauxiliaryVariableName = "", - FOauxiliaryVariableUnit = "", + FOauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) #===SS============ -# 1 SSid [] - int -# 2 LEid [M/O] - int -# 3 FOid [M/O] - int -# 4 FTid [M/O] - int -# 5 OSid [M/O] - int -# 6 TEid [M/O] - int -# 7 SLid [M] - int -# 8 SSrecordType [M] - string -# 9 SSsequenceNumber [M] - int -# 10 SSstratification [DV,M] - YesNoFields -# 11 SSstratumName [DV,M] - StringLength100 -# 12 SSclustering [DV,M] - Clustering -# 13 SSclusterName [DV,M] - StringLength100 -# 14 SSobservationActivityType [DV,M] - ObservationActivityType -# 15 SScatchFraction [M] - CatchFraction -# 16 SSobservationType [DV,M] - ObservationMethod -# 17 SSsampler [O] - Sampler -# 18 SSspeciesListName [M] - StringLength100 -# 19 SSuseForCalculateZero [M] - YesNoFields -# 20 SStimeTotal [O] - int -# 21 SStimeSampled [O] - int -# 22 SSnumberTotal [DV,O] - int -# 23 SSnumberSampled [DV,O] - int -# 24 SSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 25 SSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 26 SSselectionMethod [DV,M] - SelectionMethod -# 27 SSunitName [DV,M] - StringLength100 -# 28 SSselectionMethodCluster [DV,O] - SelectionMethod -# 29 SSnumberTotalClusters [DV,O] - int -# 30 SSnumberSampledClusters [DV,O] - int -# 31 SSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 32 SSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 33 SSsampled [DV,M] - YesNoFields -# 34 SSreasonNotSampled [DV,O] - ReasonForNotSampling -# 35 SSnonResponseCollected [DV,O] - YesNoFields -# 36 SSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 37 SSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 38 SSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 39 SSauxiliaryVariableUnit [DV,O] - MUNIT SS_df<-data.frame( SSid = dataset$VSid, @@ -513,8 +319,10 @@ SS_df<-data.frame( SSspeciesListName = project_name_outputs, #M SSuseForCalculateZero = "N", #M SStimeTotal = "", + SStimeTotalDataBasis = "Measured", SStimeSampled = "", SSnumberTotal = 1, + SSnumberTotalDataBasis = "Measured", SSnumberSampled = 1, SSselectionProb = 1, SSinclusionProb = 1, @@ -531,123 +339,72 @@ SS_df<-data.frame( SSauxiliaryVariableTotal = "", SSauxiliaryVariableValue = "", SSauxiliaryVariableName = "", - SSauxiliaryVariableUnit = "", - stringsAsFactors=FALSE + SSauxiliaryVariableUnit = "", + stringsAsFactors=FALSE ) #====SA=========== - -# 1 SAid [] - int -# 2 SSid [] - int -# 3 SArecordType [M] - string -# 4 SAsequenceNumber [M] - int -# 5 SAparentSequenceNumber [O] - int -# 6 SAstratification [DV,M] - YesNoFields -# 7 SAstratumName [DV,M] - StringLength100 -# 8 SAspeciesCode [M] - SpecWoRMS -# 9 SAspeciesCodeFAO [O] - SpecASFIS -# 10 SAstateOfProcessing [M] - StateOfProcessing -# 11 SApresentation [M] - ProductPresentation -# 12 SAspecimensState [M] - SpecimensState -# 13 SAcatchCategory [M] - CatchCategory -# 14 SAlandingCategory [O] - LandingCategory -# 15 SAcommSizeCatScale [O] - CommercialSizeCategoryScale -# 16 SAcommSizeCat [O] - CommercialSizeCategory -# 17 SAsex [M] - SEXCO -# 18 SAexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 19 SAarea [O] - ICES_Area -# 20 SArectangle [O] - StatRec -# 21 SAfisheriesManagementUnit [O] - AreaNonFAO -# 22 SAgsaSubarea [M] - Areas_GFCM_GSA -# 23 SAjurisdictionArea [O] - JurisdictionArea -# 24 SAnationalFishingActivity [O] - NationalFishingActivity -# 25 SAmetier5 [O] - Metier5_FishingActivity -# 26 SAmetier6 [O] - Metier6_FishingActivity -# 27 SAgear [O] - GearType -# 28 SAmeshSize [O] - int -# 29 SAselectionDevice [O] - SelectionDevice -# 30 SAselectionDeviceMeshSize [O] - int -# 31 SAunitType [M] - SamplingUnit -# 32 SAtotalWeightLive [O] - int -# 33 SAsampleWeightLive [O] - int -# 34 SAnumberTotal [DV,O] - Decimal0.1-999999999 -# 35 SAnumberSampled [DV,O] - Decimal0.1-999999999 -# 36 SAselectionProb [DV,O] - Decimal0-1 -# 37 SAinclusionProb [DV,O] - Decimal0-1 -# 38 SAselectionMethod [DV,M] - SelectionMethod -# 39 SAunitName [DV,M] - StringLength100 -# 40 SAlowerHierarchy [M/O] - LowerHierarchy -# 41 SAsampler [O] - Sampler -# 42 SAsampled [DV,M] - YesNoFields -# 43 SAreasonNotSampled [DV,O] - ReasonForNotSampling -# 44 SAnonResponseCollected [DV,O] - YesNoFields -# 45 SAreasonNotSampledFM [DV,O] - ReasonForNotSampling -# 46 SAreasonNotSampledBV [DV,O] - ReasonForNotSampling -# 47 SAtotalWeightMeasured [O] - int -# 48 SAsampleWeightMeasured [O] - int -# 49 SAconversionFactorMeasLive [O] - Decimal0.900-10 -# 50 SAauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 51 SAauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 52 SAauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 53 SAauxiliaryVariableUnit [DV,O] - MUNIT - - SA_df<-data.frame( - SAid = dataset$VSid, - SSid = dataset$VSid, - SArecordType = "SA", #M - SAsequenceNumber = dataset$VSid, #M - SAparentSequenceNumber = "", - SAstratification = "N", #M - SAstratumName = "U", #M - SAspeciesCode = 107254, #M - SAspeciesCodeFAO = "NEP", - SAstateOfProcessing = "FRE", #M - SApresentation = "WHL", #M - SAspecimensState = "DeadOrZeroProbSurvival", #M - SAcatchCategory = "Lan", #M - SAlandingCategory = "", - SAcommSizeCatScale = "", - SAcommSizeCat = "", - SAsex = "U", #M - SAexclusiveEconomicZoneIndicator = "", - SAarea = "", - SArectangle = "", - SAfisheriesManagementUnit = "", - SAgsaSubarea = "NotApplicable", #M - SAjurisdictionArea = "", - SAnationalFishingActivity = "", - SAmetier5 = "", - SAmetier6 = "", - SAgear = "", - SAmeshSize = "", - SAselectionDevice = "", - SAselectionDeviceMeshSize = "", - SAunitType = "Box", #M - SAtotalWeightLive = "", - SAsampleWeightLive = "", - SAnumberTotal = 1, - SAnumberSampled = 1, - SAselectionProb = 1, - SAinclusionProb = 1, - SAselectionMethod = "CENSUS", #M - SAunitName = dataset$VSid, #M - SAlowerHierarchy = "D", - SAsampler = "Observer", - SAsampled = "N", #M - SAreasonNotSampled = "", - SAnonResponseCollected = "N", - SAreasonNotSampledFM = "", - SAreasonNotSampledBV = "", - SAtotalWeightMeasured = dataset[[target_var]]*100, # *100 to meet type required (integer) - SAsampleWeightMeasured = dataset[[target_var]]*100, # *100 to meet type required (integer) - SAconversionFactorMeasLive = 1, - SAauxiliaryVariableTotal = "", - SAauxiliaryVariableValue = "", - SAauxiliaryVariableName = "", - SAauxiliaryVariableUnit = "", - stringsAsFactors=FALSE +SAid = dataset$VSid, +SSid = dataset$VSid, +SArecordType = "SA", #M +SAsequenceNumber = dataset$VSid, #M +SAparentSequenceNumber = "", +SAstratification = "N", #M +SAstratumName = "U", #M +SAspeciesCode = 107254, #M +SAspeciesCodeFAO = "NEP", +SAstateOfProcessing = "FRE", #M +SApresentation = "WHL", #M +SAspecimensState = "DeadOrZeroProbSurvival", #M +SAcatchCategory = "Lan", #M +SAlandingCategory = "", +SAcommSizeCatScale = "", +SAcommSizeCat = "", +SAsex = "U", #M +SAexclusiveEconomicZoneIndicator = "", +SAarea = "", +SArectangle = "", +SAfisheriesManagementUnit = "", +SAgsaSubarea = "NotApplicable", #M +SAjurisdictionArea = "", +SAgeographicalDataBasis = "Measured", +SAgeographicalSource = "", +SAnationalFishingActivity = "", +SAmetier5 = "", +SAmetier6 = "", +SAgear = "", +SAgearDataBasis = "Measured", +SAgearSource = "", +SAmeshSize = "", +SAselectionDevice = "", +SAselectionDeviceMeshSize = "", +SAunitType = "Box", #M +SAtotalWeightLive = "", +SAsampleWeightLive = "", +SAnumberTotal = 1, +SAnumberSampled = 1, +SAselectionProb = 1, +SAinclusionProb = 1, +SAselectionMethod = "CENSUS", #M +SAunitName = dataset$VSid, #M +SAlowerHierarchy = "D", +SAsampler = "Observer", +SAsampled = "N", #M +SAreasonNotSampled = "", +SAnonResponseCollected = "N", +SAreasonNotSampledFM = "", +SAreasonNotSampledBV = "", +SAtotalWeightMeasured = dataset[[target_var]]*100, # *100 to meet type required (integer) +SAtotalWeightMeasuredDataBasis = "Measured", +SAsampleWeightMeasured = dataset[[target_var]]*100, # *100 to meet type required (integer) +SAconversionFactorMeasLive = 1, +SAauxiliaryVariableTotal = "", +SAauxiliaryVariableValue = "", +SAauxiliaryVariableName = "", +SAauxiliaryVariableUnit = "", +stringsAsFactors=FALSE ) @@ -700,15 +457,14 @@ RDBESlist[[i]][which(grepl(colnames(RDBESlist[[i]]),pat="[A-Z]id"))]<-NULL #===Save============ - dir_outputs<-paste0(base_dir_outputs,"/", - project_name_outputs,"/") - dir.create(dir_outputs, recursive=T, showWarnings=FALSE) - filename_output_CS <- paste0("H1.csv") - filename_output_SL <- paste0("HSL.csv") - filename_output_VD <- paste0("HVD.csv") + dir_outputs<-paste0(base_dir_outputs,"/", project_name_outputs,"/") + dir.create(dir_outputs, recursive=T, showWarnings=FALSE) + filename_output_CS <- paste0(dir_outputs,"H1.csv") + filename_output_SL <- paste0(dir_outputs,"HSL.csv") + filename_output_VD <- paste0(dir_outputs,"HVD.csv") -lapply(RDBESlist, function(x, filename1 = paste0(dir_outputs,filename_output_CS)){ +lapply(RDBESlist, function(x, filename1 = filename_output_CS){ if("DErecordType" %in% colnames(x)){ write.table(x, file = filename1, append = FALSE, quote = FALSE, sep = ",", eol = "\n", na = "NA", dec = ".", row.names = FALSE, @@ -720,7 +476,7 @@ write.table(x, file = filename1, append = TRUE, quote = FALSE, sep = ",", } }) -b<-read.table(file=paste0(dir_outputs,filename_output_CS), header=F, sep=";") +b<-read.table(file = filename_output_CS, header=F, sep=";") b<-cbind(key,b) b<-b[order(as.character(b$key), decreasing=FALSE),] b<-b[!is.na(key),] @@ -728,27 +484,29 @@ b$key<-NULL b$V1<-as.character(b$V1) # saves CS output -write.table(b$V1, file=paste0(dir_outputs,filename_output_CS), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +write.table(b$V1, file = filename_output_CS, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") -# -----Builds and saves dummySL----------------- +# -----Builds and saves dummySL and dummyIS----------------- SL_base$SLspeciesListName<-project_name_outputs SL_base$SLyear<-DEyear SL_base$SLinstitute<-SDinstitution -# saves SL output - write.table(SL_base, file=paste0(dir_outputs,filename_output_SL), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - + + # saves SL output + write.table(rbind(paste(SL_base, collapse=","), paste(IS_base, collapse=",")), file=filename_output_SL, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") # -----Builds and saves dummyVD----------------- - # saves VD output VD_base$VDyear<-DEyear - write.table(VD_base, file=paste0(dir_outputs,filename_output_VD), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + write.table(VD_base, file=filename_output_VD, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +# check - is this needed? +if(1==2){ + # -----Clean SL after dowload----------------- # cleans excess of SL rows frequently present in download @@ -767,4 +525,5 @@ library(zip) zip::zip(zipfile = paste0(project_name_outputs,".zip"), files =paste0(dir_outputs,"tmp/",dir(paste0(dir_outputs,"tmp"))), mode="cherry-pick") unlink(paste0(dir_outputs,"tmp"), force=T, recursive = T) +} diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_schools_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_schools_H1.R index 16020d49..ebc7649e 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_schools_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_schools_H1.R @@ -48,10 +48,11 @@ baseDir <- "" VD_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/VD_base.rds")) SL_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/SL_base.rds")) + IS_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/IS_base.rds")) #nameof the directory where the outputs are saved currently base_dir_outputs <- paste0(baseDir,"BuiltUploads") - if(!file.exists(base_dir_outputs)) dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) + dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) #========Outline of Hierarchy 1================ @@ -71,22 +72,6 @@ -# 1 DEid [] - int -# 2 DErecordType [M] - string -# 3 DEsamplingScheme [M] - SamplingScheme -# 4 DEsamplingSchemeType [M] - SamplingSchemeType -# 5 DEyear [M] - Year -# 6 DEstratumName [M] - StringLength100 -# 7 DEhierarchyCorrect [M] - YesNoFields -# 8 DEhierarchy [M] - RDBESUpperHierarchy -# 9 DEsampled [DV,M] - YesNoFields -# 10 DEreasonNotSampled [DV,O] - ReasonForNotSampling -# 11 DEnonResponseCollected [DV,O] - YesNoFields -# 12 DEauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 13 DEauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 14 DEauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 15 DEauxiliaryVariableUnit[DV,O]-MUNIT - DE_df_base<-expand.grid(DEyear=DEyear, DEstratumName="U",stringsAsFactors=F) @@ -105,20 +90,13 @@ DE_df<-data.frame( DEauxiliaryVariableTotal = "", DEauxiliaryVariableValue = "", DEauxiliaryVariableName = "", - DEauxiliaryVariableUnit = "", + DEauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) #===SD============ - # x -# 1 SDid [M] - int -# 2 DEid [M] - int -# 3 SDrecordType [M] - string -# 4 SDcountry [M] - ISO_3166 -# 5 SDinstitution [M] - EDMO - SD_df<-data.frame( SDid=DE_df$DEid, DEid=DE_df$DEid, @@ -131,37 +109,6 @@ SD_df<-data.frame( #===VS============ - # x -# 1 VSid [M] - int -# 2 SDid [M] - int -# 3 VDid [M] - int -# 4 TEid [M/O] - int -# 5 VSrecordType [M] - string -# 6 VSsequenceNumber [M] - int -# 7 VSencryptedVesselCode [M] - StringLength100 -# 8 VSstratification [DV,M] - YesNoFields -# 9 VSstratumName [DV,M] - StringLength100 -# 10 VSclustering [DV,M] - Clustering -# 11 VSclusterName [DV,M] - StringLength100 -# 12 VSsampler [O] - Sampler -# 13 VSnumberTotal [DV,O] - int -# 14 VSnumberSampled [DV,O] - int -# 15 VSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 16 VSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 17 VSselectionMethod [DV,M] - SelectionMethod -# 18 VSunitName [DV,M] - StringLength100 -# 19 VSselectionMethodCluster [DV,O] - SelectionMethod -# 20 VSnumberTotalClusters [DV,O] - int -# 21 VSnumberSampledClusters [DV,O] - int -# 22 VSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 23 VSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 24 VSsampled [DV,M] - YesNoFields -# 25 VSreasonNotSampled [DV,O] - ReasonForNotSampling -# 26 VSnonResponseCollected [DV,O] - YesNoFields -# 27 VSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 28 VSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 29 VSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 30 VSauxiliaryVariableUnit [DV,O] - MUNIT #check_All_fields("VS") @@ -180,7 +127,6 @@ SD_df<-data.frame( if(!test) stop( "duplicated VSencryptedVesselCode") - VS_df <- data.frame( VSid = dataset$VSid, SDid = as.integer(1), @@ -211,7 +157,7 @@ VS_df <- data.frame( VSauxiliaryVariableTotal = "", VSauxiliaryVariableValue = "", VSauxiliaryVariableName = "", - VSauxiliaryVariableUnit = "", + VSauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) @@ -231,49 +177,6 @@ VS_df$VSinclusionProbCluster<-"" #====FT=========== - -# 1 FTid [M] - int -# 2 OSid [M/O] - int -# 3 VSid [M/O] - int -# 4 VDid [M] - int -# 5 SDid [M/O] - int -# 6 FOid [M/O] - int -# 7 TEid [M/O] - int -# 8 FTrecordType [M] - string -# 9 FTencryptedVesselCode [M] - StringLength100 -# 10 FTsequenceNumber [M] - int -# 11 FTstratification [DV,M] - YesNoFields -# 12 FTstratumName [DV,M] - StringLength100 -# 13 FTclustering [DV,M] - Clustering -# 14 FTclusterName [DV,M] - StringLength100 -# 15 FTsampler [O] - Sampler -# 16 FTsamplingType [M] - SamplingContext -# 17 FTnumberOfHaulsOrSets [M/O] - IntZeroToMax -# 18 FTdepartureLocation [M/O] - Harbour_LOCODE -# 19 FTdepartureDate [M/O] - Date -# 20 FTdepartureTime [M/O] - StringLength60 -# 21 FTarrivalLocation [M] - Harbour_LOCODE -# 22 FTarrivalDate [M] - Date -# 23 FTarrivalTime [M/O] - StringLength60 -# 24 FTnumberTotal [DV,O] - int -# 25 FTnumberSampled [DV,O] - int -# 26 FTselectionProb [DV,O] - Decimal0-1 -# 27 FTinclusionProb [DV,O] - Decimal0-1 -# 28 FTselectionMethod [DV,M] - SelectionMethod -# 29 FTunitName [DV,M] - StringLength100 -# 30 FTselectionMethodCluster [DV,O] - SelectionMethod -# 31 FTnumberTotalClusters [DV,O] - int -# 32 FTnumberSampledClusters [DV,O] - int -# 33 FTselectionProbCluster [DV,O] - Decimal0-1 -# 34 FTinclusionProbCluster [DV,O] - Decimal0-1 -# 35 FTsampled [DV,M] - YesNoFields -# 36 FTreasonNotSampled [DV,O] - ReasonForNotSampling -# 37 FTnonResponseCollected [DV,O] - YesNoFields -# 38 FTauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 39 FTauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 40 FTauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 41 FTauxiliaryVariableUnit [DV,O] - MUNIT - FT_df <- data.frame( FTid = dataset$VSid,#[M] - int OSid = "",# [M/O] - int @@ -298,6 +201,7 @@ FT_df <- data.frame( FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE FTarrivalDate=seq(from = as.Date("1965-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date FTarrivalTime="", #[O] - time + FTdominantLandingDate= "", FTnumberTotal= 1, #[DV,O] - int FTnumberSampled=1, #[DV,O] - int FTselectionProb=1, #[DV,O] - DecimalPrec10 @@ -315,7 +219,7 @@ FT_df <- data.frame( FTauxiliaryVariableTotal = "", FTauxiliaryVariableValue = "", FTauxiliaryVariableName = "", - FTauxiliaryVariableUnit = "", + FTauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) @@ -323,72 +227,6 @@ FT_df <- data.frame( #====FO=========== -# 1 FOid [M] - int -# 2 FTid [M/O] - int -# 3 SDid [M/O] - int -# 4 FOrecordType [M] - string -# 5 FOstratification [DV,M] - YesNoFields -# 6 FOsequenceNumber [M] - int -# 7 FOstratumName [DV,M] - StringLength100 -# 8 FOclustering [DV,M] - Clustering -# 9 FOclusterName [DV,M] - StringLength100 -# 10 FOsampler [O] - Sampler -# 11 FOaggregationLevel [M] - AggregationLevel -# 12 FOvalidity [M] - ValidityFlag -# 13 FOcatchReg [M] - CatchRegistration -# 14 FOstartDate [O] - Date -# 15 FOstartTime [O] - Time -# 16 FOendDate [M] - Date -# 17 FOendTime [M/O] - Time -# 18 FOduration [M/O] - int -# 19 FOdurationSource [M] - DurationSource -# 20 FOhandlingTime [O] - int -# 21 FOstartLat [O] - Decimal-90.000000-90.000000 -# 22 FOstartLon [O] - Decimal-180.000000-180.000000 -# 23 FOstopLat [O] - Decimal-90.000000-90.000000 -# 24 FOstopLon [O] - Decimal-180.000000-180.000000 -# 25 FOexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 26 FOarea [M] - ICES_Area -# 27 FOrectangle [O] - StatRec -# 28 FOfisheriesManagementUnit [O] - AreaNonFAO -# 29 FOgsaSubarea [M] - Areas_GFCM_GSA -# 30 FOjurisdictionArea [O] - JurisdictionArea -# 31 FOfishingDepth [O] - int -# 32 FOwaterDepth [O] - int -# 33 FOnationalFishingActivity [O] - NationalFishingActivity -# 34 FOmetier5 [O] - Metier5_FishingActivity -# 35 FOmetier6 [M] - Metier6_FishingActivity -# 36 FOgear [M] - GearType -# 37 FOmeshSize [O] - int -# 38 FOselectionDevice [O] - SelectionDevice -# 39 FOselectionDeviceMeshSize [O] - int -# 40 FOtargetSpecies [O] - TargetSpecies -# 41 FOincidentalByCatchMitigationDeviceFirst [M] - BycatchMitigationDevice -# 42 FOincidentalByCatchMitigationDeviceTargetFirst [M] - BycatchMitigationDeviceTarget -# 43 FOincidentalByCatchMitigationDeviceSecond [M] - BycatchMitigationDevice -# 44 FOincidentalByCatchMitigationDeviceTargetSecond [M] - BycatchMitigationDeviceTarget -# 45 FOgearDimensions [O] - int -# 46 FOobservationCode [M] - ObservationCode -# 47 FOnumberTotal [DV,O] - int -# 48 FOnumberSampled [DV,O] - int -# 49 FOselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 50 FOinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 51 FOselectionMethod [DV,M] - SelectionMethod -# 52 FOunitName [DV,M] - StringLength100 -# 53 FOselectionMethodCluster [DV,O] - SelectionMethod -# 54 FOnumberTotalClusters [DV,O] - int -# 55 FOnumberSampledClusters [DV,O] - int -# 56 FOselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 57 FOinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 58 FOsampled [DV,M] - YesNoFields -# 59 FOreasonNotSampled [DV,O] - ReasonForNotSampling -# 60 FOnonResponseCollected [DV,O] - YesNoFields -# 61 FOauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 62 FOauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 63 FOauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 64 FOauxiliaryVariableUnit [DV,O] - MUNIT - - FO_df <- data.frame( FOid = dataset$VSid, FTid = dataset$VSid, @@ -408,24 +246,29 @@ FO_df <- data.frame( FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M FOendTime ="",#M FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! + FOfishingDurationDataBasis = "Measured", FOdurationSource = "Crew", FOhandlingTime = "", FOstartLat="", # ATT! FOstartLon="", # ATT! FOstopLat="", FOstopLon="", - FOexclusiveEconomicZoneIndicator = "", # + FOexclusiveEconomicZoneIndicator = "", # FOarea = "27.3.a.21", #M FOrectangle = "", FOfisheriesManagementUnit = "", FOgsaSubarea = "NotApplicable", #M FOjurisdictionArea = "", + FOgeographicalDataBasis = "Measured", + FOgeographicalSource = "Crew", FOfishingDepth = "", FOwaterDepth = "", FOnationalFishingActivity = "", FOmetier5 = "", FOmetier6 = "OTT_CRU_70-89_2_35",#M FOgear = "OTT",#M + FOgearDataBasis = "Measured", + FOgearSource = "", FOmeshSize = "", FOselectionDevice = "", FOselectionDeviceMeshSize = "", @@ -453,52 +296,13 @@ FO_df <- data.frame( FOauxiliaryVariableTotal = "", FOauxiliaryVariableValue = "", FOauxiliaryVariableName = "", - FOauxiliaryVariableUnit = "", + FOauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) #===SS============ -# 1 SSid [] - int -# 2 LEid [M/O] - int -# 3 FOid [M/O] - int -# 4 FTid [M/O] - int -# 5 OSid [M/O] - int -# 6 TEid [M/O] - int -# 7 SLid [M] - int -# 8 SSrecordType [M] - string -# 9 SSsequenceNumber [M] - int -# 10 SSstratification [DV,M] - YesNoFields -# 11 SSstratumName [DV,M] - StringLength100 -# 12 SSclustering [DV,M] - Clustering -# 13 SSclusterName [DV,M] - StringLength100 -# 14 SSobservationActivityType [DV,M] - ObservationActivityType -# 15 SScatchFraction [M] - CatchFraction -# 16 SSobservationType [DV,M] - ObservationMethod -# 17 SSsampler [O] - Sampler -# 18 SSspeciesListName [M] - StringLength100 -# 19 SSuseForCalculateZero [M] - YesNoFields -# 20 SStimeTotal [O] - int -# 21 SStimeSampled [O] - int -# 22 SSnumberTotal [DV,O] - int -# 23 SSnumberSampled [DV,O] - int -# 24 SSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 25 SSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 26 SSselectionMethod [DV,M] - SelectionMethod -# 27 SSunitName [DV,M] - StringLength100 -# 28 SSselectionMethodCluster [DV,O] - SelectionMethod -# 29 SSnumberTotalClusters [DV,O] - int -# 30 SSnumberSampledClusters [DV,O] - int -# 31 SSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 32 SSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 33 SSsampled [DV,M] - YesNoFields -# 34 SSreasonNotSampled [DV,O] - ReasonForNotSampling -# 35 SSnonResponseCollected [DV,O] - YesNoFields -# 36 SSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 37 SSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 38 SSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 39 SSauxiliaryVariableUnit [DV,O] - MUNIT SS_df<-data.frame( SSid = dataset$VSid, @@ -521,8 +325,10 @@ SS_df<-data.frame( SSspeciesListName = project_name_outputs, #M SSuseForCalculateZero = "N", #M SStimeTotal = "", + SStimeTotalDataBasis = "Measured", SStimeSampled = "", SSnumberTotal = 1, + SSnumberTotalDataBasis = "Measured", SSnumberSampled = 1, SSselectionProb = 1, SSinclusionProb = 1, @@ -539,123 +345,72 @@ SS_df<-data.frame( SSauxiliaryVariableTotal = "", SSauxiliaryVariableValue = "", SSauxiliaryVariableName = "", - SSauxiliaryVariableUnit = "", - stringsAsFactors=FALSE + SSauxiliaryVariableUnit = "", + stringsAsFactors=FALSE ) #====SA=========== - -# 1 SAid [] - int -# 2 SSid [] - int -# 3 SArecordType [M] - string -# 4 SAsequenceNumber [M] - int -# 5 SAparentSequenceNumber [O] - int -# 6 SAstratification [DV,M] - YesNoFields -# 7 SAstratumName [DV,M] - StringLength100 -# 8 SAspeciesCode [M] - SpecWoRMS -# 9 SAspeciesCodeFAO [O] - SpecASFIS -# 10 SAstateOfProcessing [M] - StateOfProcessing -# 11 SApresentation [M] - ProductPresentation -# 12 SAspecimensState [M] - SpecimensState -# 13 SAcatchCategory [M] - CatchCategory -# 14 SAlandingCategory [O] - LandingCategory -# 15 SAcommSizeCatScale [O] - CommercialSizeCategoryScale -# 16 SAcommSizeCat [O] - CommercialSizeCategory -# 17 SAsex [M] - SEXCO -# 18 SAexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 19 SAarea [O] - ICES_Area -# 20 SArectangle [O] - StatRec -# 21 SAfisheriesManagementUnit [O] - AreaNonFAO -# 22 SAgsaSubarea [M] - Areas_GFCM_GSA -# 23 SAjurisdictionArea [O] - JurisdictionArea -# 24 SAnationalFishingActivity [O] - NationalFishingActivity -# 25 SAmetier5 [O] - Metier5_FishingActivity -# 26 SAmetier6 [O] - Metier6_FishingActivity -# 27 SAgear [O] - GearType -# 28 SAmeshSize [O] - int -# 29 SAselectionDevice [O] - SelectionDevice -# 30 SAselectionDeviceMeshSize [O] - int -# 31 SAunitType [M] - SamplingUnit -# 32 SAtotalWeightLive [O] - int -# 33 SAsampleWeightLive [O] - int -# 34 SAnumberTotal [DV,O] - Decimal0.1-999999999 -# 35 SAnumberSampled [DV,O] - Decimal0.1-999999999 -# 36 SAselectionProb [DV,O] - Decimal0-1 -# 37 SAinclusionProb [DV,O] - Decimal0-1 -# 38 SAselectionMethod [DV,M] - SelectionMethod -# 39 SAunitName [DV,M] - StringLength100 -# 40 SAlowerHierarchy [M/O] - LowerHierarchy -# 41 SAsampler [O] - Sampler -# 42 SAsampled [DV,M] - YesNoFields -# 43 SAreasonNotSampled [DV,O] - ReasonForNotSampling -# 44 SAnonResponseCollected [DV,O] - YesNoFields -# 45 SAreasonNotSampledFM [DV,O] - ReasonForNotSampling -# 46 SAreasonNotSampledBV [DV,O] - ReasonForNotSampling -# 47 SAtotalWeightMeasured [O] - int -# 48 SAsampleWeightMeasured [O] - int -# 49 SAconversionFactorMeasLive [O] - Decimal0.900-10 -# 50 SAauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 51 SAauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 52 SAauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 53 SAauxiliaryVariableUnit [DV,O] - MUNIT - - SA_df<-data.frame( - SAid = 1:nrow(dataset), - SSid = dataset$VSid, - SArecordType = "SA", #M - SAsequenceNumber = dataset$VSid, #M - SAparentSequenceNumber = "", - SAstratification = "N", #M - SAstratumName = "U", #M - SAspeciesCode = 107254, #M - SAspeciesCodeFAO = "NEP", - SAstateOfProcessing = "FRE", #M - SApresentation = "WHL", #M - SAspecimensState = "DeadOrZeroProbSurvival", #M - SAcatchCategory = "Lan", #M - SAlandingCategory = "", - SAcommSizeCatScale = "", - SAcommSizeCat = "", - SAsex = "U", #M - SAexclusiveEconomicZoneIndicator = "", - SAarea = "", - SArectangle = "", - SAfisheriesManagementUnit = "", - SAgsaSubarea = "NotApplicable", #M - SAjurisdictionArea = "", - SAnationalFishingActivity = "", - SAmetier5 = "", - SAmetier6 = "", - SAgear = "", - SAmeshSize = "", - SAselectionDevice = "", - SAselectionDeviceMeshSize = "", - SAunitType = "Box", #M - SAtotalWeightLive = "", - SAsampleWeightLive = "", - SAnumberTotal = 1, - SAnumberSampled = 1, - SAselectionProb = 1, - SAinclusionProb = 1, - SAselectionMethod = "CENSUS", #M - SAunitName = dataset$VSid, #M - SAlowerHierarchy = "D", - SAsampler = "Observer", - SAsampled = "N", #M - SAreasonNotSampled = "", - SAnonResponseCollected = "N", - SAreasonNotSampledFM = "", - SAreasonNotSampledBV = "", - SAtotalWeightMeasured = dataset[[target_var]], # - SAsampleWeightMeasured = dataset[[target_var]], # - SAconversionFactorMeasLive = 1, - SAauxiliaryVariableTotal = "", - SAauxiliaryVariableValue = "", - SAauxiliaryVariableName = "", - SAauxiliaryVariableUnit = "", - stringsAsFactors=FALSE +SAid = 1:nrow(dataset), +SSid = dataset$VSid, +SArecordType = "SA", #M +SAsequenceNumber = dataset$VSid, #M +SAparentSequenceNumber = "", +SAstratification = "N", #M +SAstratumName = "U", #M +SAspeciesCode = 107254, #M +SAspeciesCodeFAO = "NEP", +SAstateOfProcessing = "FRE", #M +SApresentation = "WHL", #M +SAspecimensState = "DeadOrZeroProbSurvival", #M +SAcatchCategory = "Lan", #M +SAlandingCategory = "", +SAcommSizeCatScale = "", +SAcommSizeCat = "", +SAsex = "U", #M +SAexclusiveEconomicZoneIndicator = "", +SAarea = "", +SArectangle = "", +SAfisheriesManagementUnit = "", +SAgsaSubarea = "NotApplicable", #M +SAjurisdictionArea = "", +SAgeographicalDataBasis = "Measured", +SAgeographicalSource = "", +SAnationalFishingActivity = "", +SAmetier5 = "", +SAmetier6 = "", +SAgear = "", +SAgearDataBasis = "Measured", +SAgearSource = "", +SAmeshSize = "", +SAselectionDevice = "", +SAselectionDeviceMeshSize = "", +SAunitType = "Box", #M +SAtotalWeightLive = "", +SAsampleWeightLive = "", +SAnumberTotal = 1, +SAnumberSampled = 1, +SAselectionProb = 1, +SAinclusionProb = 1, +SAselectionMethod = "CENSUS", #M +SAunitName = dataset$VSid, #M +SAlowerHierarchy = "D", +SAsampler = "Observer", +SAsampled = "N", #M +SAreasonNotSampled = "", +SAnonResponseCollected = "N", +SAreasonNotSampledFM = "", +SAreasonNotSampledBV = "", +SAtotalWeightMeasured = dataset[[target_var]], # +SAtotalWeightMeasuredDataBasis = "Measured", +SAsampleWeightMeasured = dataset[[target_var]], # +SAconversionFactorMeasLive = 1, +SAauxiliaryVariableTotal = "", +SAauxiliaryVariableValue = "", +SAauxiliaryVariableName = "", +SAauxiliaryVariableUnit = "", +stringsAsFactors=FALSE ) @@ -708,15 +463,14 @@ RDBESlist[[i]][which(grepl(colnames(RDBESlist[[i]]),pat="[A-Z]id"))]<-NULL #===Save============ - dir_outputs<-paste0(base_dir_outputs,"/", - project_name_outputs,"/") - dir.create(dir_outputs, recursive=T, showWarnings=FALSE) - filename_output_CS <- paste0("H1.csv") - filename_output_SL <- paste0("HSL.csv") - filename_output_VD <- paste0("HVD.csv") + dir_outputs<-paste0(base_dir_outputs,"/", project_name_outputs,"/") + dir.create(dir_outputs, recursive=T, showWarnings=FALSE) + filename_output_CS <- paste0(dir_outputs,"H1.csv") + filename_output_SL <- paste0(dir_outputs,"HSL.csv") + filename_output_VD <- paste0(dir_outputs,"HVD.csv") -lapply(RDBESlist, function(x, filename1 = paste0(dir_outputs,filename_output_CS)){ +lapply(RDBESlist, function(x, filename1 = filename_output_CS){ if("DErecordType" %in% colnames(x)){ write.table(x, file = filename1, append = FALSE, quote = FALSE, sep = ",", eol = "\n", na = "NA", dec = ".", row.names = FALSE, @@ -728,7 +482,7 @@ write.table(x, file = filename1, append = TRUE, quote = FALSE, sep = ",", } }) -b<-read.table(file=paste0(dir_outputs,filename_output_CS), header=F, sep=";") +b<-read.table(file = filename_output_CS, header=F, sep=";") b<-cbind(key,b) b<-b[order(as.character(b$key), decreasing=FALSE),] b<-b[!is.na(key),] @@ -736,24 +490,30 @@ b$key<-NULL b$V1<-as.character(b$V1) # saves CS output -write.table(b$V1, file=paste0(dir_outputs,filename_output_CS), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +write.table(b$V1, file = filename_output_CS, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") -# -----Builds and saves dummySL----------------- +# -----Builds and saves dummySL and dummyIS----------------- SL_base$SLspeciesListName<-project_name_outputs SL_base$SLyear<-DEyear SL_base$SLinstitute<-SDinstitution -# saves SL output - write.table(SL_base, file=paste0(dir_outputs,filename_output_SL), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - + + # saves SL output + write.table(rbind(paste(SL_base, collapse=","), paste(IS_base, collapse=",")), file=filename_output_SL, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") # -----Builds and saves dummyVD----------------- - # saves VD output VD_base$VDyear<-DEyear - write.table(VD_base, file=paste0(dir_outputs,filename_output_VD), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + write.table(VD_base, file=filename_output_VD, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + + + + + +# check - is this needed? +if(1==2){ @@ -775,4 +535,5 @@ library(zip) zip::zip(zipfile = paste0(project_name_outputs,".zip"), files =paste0(dir_outputs,"tmp/",dir(paste0(dir_outputs,"tmp"))), mode="cherry-pick") unlink(paste0(dir_outputs,"tmp"), force=T, recursive = T) +} diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apiclus1_v2_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apiclus1_v2_H1.R index fc2b09e2..03fbeb0a 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apiclus1_v2_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apiclus1_v2_H1.R @@ -50,13 +50,14 @@ baseDir <- "" VD_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/VD_base.rds")) SL_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/SL_base.rds")) + IS_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/IS_base.rds")) #nameof the directory where the outputs are saved currently base_dir_outputs <- paste0(baseDir,"BuiltUploads") - if(!file.exists(base_dir_outputs)) dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) + dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) -#=========Outline of Hierarchy 1=============== +#========Outline of Hierarchy 1================ # Design # Sampling details # Vessel Selection @@ -69,26 +70,10 @@ -#====DE=========== +#===DE============ -# 1 DEid [] - int -# 2 DErecordType [M] - string -# 3 DEsamplingScheme [M] - SamplingScheme -# 4 DEsamplingSchemeType [M] - SamplingSchemeType -# 5 DEyear [M] - Year -# 6 DEstratumName [M] - StringLength100 -# 7 DEhierarchyCorrect [M] - YesNoFields -# 8 DEhierarchy [M] - RDBESUpperHierarchy -# 9 DEsampled [DV,M] - YesNoFields -# 10 DEreasonNotSampled [DV,O] - ReasonForNotSampling -# 11 DEnonResponseCollected [DV,O] - YesNoFields -# 12 DEauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 13 DEauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 14 DEauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 15 DEauxiliaryVariableUnit[DV,O]-MUNIT - DE_df_base<-expand.grid(DEyear=DEyear, DEstratumName="U",stringsAsFactors=F) @@ -107,19 +92,12 @@ DE_df<-data.frame( DEauxiliaryVariableTotal = "", DEauxiliaryVariableValue = "", DEauxiliaryVariableName = "", - DEauxiliaryVariableUnit = "", + DEauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) -#====SD=========== - +#===SD============ - # x -# 1 SDid [M] - int -# 2 DEid [M] - int -# 3 SDrecordType [M] - string -# 4 SDcountry [M] - ISO_3166 -# 5 SDinstitution [M] - EDMO SD_df<-data.frame( SDid=DE_df$DEid, @@ -132,37 +110,8 @@ SD_df<-data.frame( #===VS============ - # x -# 1 VSid [M] - int -# 2 SDid [M] - int -# 3 VDid [M] - int -# 4 TEid [M/O] - int -# 5 VSrecordType [M] - string -# 6 VSsequenceNumber [M] - int -# 7 VSencryptedVesselCode [M] - StringLength100 -# 8 VSstratification [DV,M] - YesNoFields -# 9 VSstratumName [DV,M] - StringLength100 -# 10 VSclustering [DV,M] - Clustering -# 11 VSclusterName [DV,M] - StringLength100 -# 12 VSsampler [O] - Sampler -# 13 VSnumberTotal [DV,O] - int -# 14 VSnumberSampled [DV,O] - int -# 15 VSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 16 VSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 17 VSselectionMethod [DV,M] - SelectionMethod -# 18 VSunitName [DV,M] - StringLength100 -# 19 VSselectionMethodCluster [DV,O] - SelectionMethod -# 20 VSnumberTotalClusters [DV,O] - int -# 21 VSnumberSampledClusters [DV,O] - int -# 22 VSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 23 VSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 24 VSsampled [DV,M] - YesNoFields -# 25 VSreasonNotSampled [DV,O] - ReasonForNotSampling -# 26 VSnonResponseCollected [DV,O] - YesNoFields -# 27 VSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 28 VSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 29 VSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 30 VSauxiliaryVariableUnit [DV,O] - MUNIT + + #check_All_fields("VS") dataset$VSid <- 1:nrow(dataset) @@ -225,165 +174,59 @@ VS_df$VSselectionMethodCluster<-"SRSWOR" VS_df$VSselectionProbCluster<-"" VS_df$VSinclusionProbCluster<-1/VS_base$pw # based on the original weights provided in apiclus1 -#===FT============ - - -# 1 FTid [M] - int -# 2 OSid [M/O] - int -# 3 VSid [M/O] - int -# 4 VDid [M] - int -# 5 SDid [M/O] - int -# 6 FOid [M/O] - int -# 7 TEid [M/O] - int -# 8 FTrecordType [M] - string -# 9 FTencryptedVesselCode [M] - StringLength100 -# 10 FTsequenceNumber [M] - int -# 11 FTstratification [DV,M] - YesNoFields -# 12 FTstratumName [DV,M] - StringLength100 -# 13 FTclustering [DV,M] - Clustering -# 14 FTclusterName [DV,M] - StringLength100 -# 15 FTsampler [O] - Sampler -# 16 FTsamplingType [M] - SamplingContext -# 17 FTnumberOfHaulsOrSets [M/O] - IntZeroToMax -# 18 FTdepartureLocation [M/O] - Harbour_LOCODE -# 19 FTdepartureDate [M/O] - Date -# 20 FTdepartureTime [M/O] - StringLength60 -# 21 FTarrivalLocation [M] - Harbour_LOCODE -# 22 FTarrivalDate [M] - Date -# 23 FTarrivalTime [M/O] - StringLength60 -# 24 FTnumberTotal [DV,O] - int -# 25 FTnumberSampled [DV,O] - int -# 26 FTselectionProb [DV,O] - Decimal0-1 -# 27 FTinclusionProb [DV,O] - Decimal0-1 -# 28 FTselectionMethod [DV,M] - SelectionMethod -# 29 FTunitName [DV,M] - StringLength100 -# 30 FTselectionMethodCluster [DV,O] - SelectionMethod -# 31 FTnumberTotalClusters [DV,O] - int -# 32 FTnumberSampledClusters [DV,O] - int -# 33 FTselectionProbCluster [DV,O] - Decimal0-1 -# 34 FTinclusionProbCluster [DV,O] - Decimal0-1 -# 35 FTsampled [DV,M] - YesNoFields -# 36 FTreasonNotSampled [DV,O] - ReasonForNotSampling -# 37 FTnonResponseCollected [DV,O] - YesNoFields -# 38 FTauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 39 FTauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 40 FTauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 41 FTauxiliaryVariableUnit [DV,O] - MUNIT +#====FT=========== + + FT_df <- data.frame( - FTid = dataset$VSid,#[M] - int - OSid = "",# [M/O] - int - VSid = dataset$VSid, #[M/O] - VDid = "", #[M] - int - SDid = "", #[M/O] - int - FOid = "", #[M/O] - int - TEid = "", #[M/O] - int - FTrecordType='FT', #[M] - string - FTencryptedVesselCode = VS_base$VSencryptedVesselCode, #[M] - FTsequenceNumber = as.integer(1:nrow(dataset)), #[M] - string - FTstratification = "N", #[DV,M] - RS_Stratification - FTstratumName = "U", #[DV,M] - string - FTclustering = "N", #[DV,M] - RS_Clustering - FTclusterName = "U", #[DV,M] - string - FTsampler = "Observer", #[M] - RS_Sampler - FTsamplingType = "AtSea" , #[M] - RS_SamplingType - FTnumberOfHaulsOrSets = 1, #[O] - int - FTdepartureLocation="ZWHWN", #[O] - Harbour_LOCODE - FTdepartureDate=seq(from = as.Date("1965-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M/O] - date - FTdepartureTime="", #[O] - time - FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE - FTarrivalDate=seq(from = as.Date("1965-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date - FTarrivalTime="", #[O] - time - FTnumberTotal= 1, #[DV,O] - int - FTnumberSampled=1, #[DV,O] - int - FTselectionProb=1, #[DV,O] - DecimalPrec10 - FTinclusionProb=1, #[DV,O] - DecimalPrec10 - FTselectionMethod="CENSUS", #[DV,M] - RS_SelectionMethod - FTunitName = VS_base$VSencryptedVesselCode, #[DV,M] - string - FTselectionMethodCluster="", #[DV,O] - RS_SelectionMethod - FTnumberTotalClusters="", #[DV,O] - int - FTnumberSampledClusters="", #[DV,O] - int - FTselectionProbCluster="", #[DV,O] - DecimalPrec10 - FTinclusionProbCluster="", #[DV,O] - DecimalPrec10 - FTsampled="Y", #[DV,M] - YesNoFields - FTreasonNotSampled= "", #[DV,O] - RS_ReasonForNotSampling - FTnonResponseCollected = "N", - FTauxiliaryVariableTotal = "", - FTauxiliaryVariableValue = "", - FTauxiliaryVariableName = "", - FTauxiliaryVariableUnit = "", - stringsAsFactors=FALSE +FTid = dataset$VSid,#[M] - int +OSid = "",# [M/O] - int +VSid = dataset$VSid, #[M/O] +VDid = "", #[M] - int +SDid = "", #[M/O] - int +FOid = "", #[M/O] - int +TEid = "", #[M/O] - int +FTrecordType='FT', #[M] - string +FTencryptedVesselCode = VS_base$VSencryptedVesselCode, #[M] +FTsequenceNumber = as.integer(1:nrow(dataset)), #[M] - string +FTstratification = "N", #[DV,M] - RS_Stratification +FTstratumName = "U", #[DV,M] - string +FTclustering = "N", #[DV,M] - RS_Clustering +FTclusterName = "U", #[DV,M] - string +FTsampler = "Observer", #[M] - RS_Sampler +FTsamplingType = "AtSea" , #[M] - RS_SamplingType +FTnumberOfHaulsOrSets = 1, #[O] - int +FTdepartureLocation="ZWHWN", #[O] - Harbour_LOCODE +FTdepartureDate=seq(from = as.Date("1965-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M/O] - date +FTdepartureTime="", #[O] - time +FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE +FTarrivalDate=seq(from = as.Date("1965-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date +FTarrivalTime="", #[O] - time +FTdominantLandingDate= "", +FTnumberTotal= 1, #[DV,O] - int +FTnumberSampled=1, #[DV,O] - int +FTselectionProb=1, #[DV,O] - DecimalPrec10 +FTinclusionProb=1, #[DV,O] - DecimalPrec10 +FTselectionMethod="CENSUS", #[DV,M] - RS_SelectionMethod +FTunitName = VS_base$VSencryptedVesselCode, #[DV,M] - string +FTselectionMethodCluster="", #[DV,O] - RS_SelectionMethod +FTnumberTotalClusters="", #[DV,O] - int +FTnumberSampledClusters="", #[DV,O] - int +FTselectionProbCluster="", #[DV,O] - DecimalPrec10 +FTinclusionProbCluster="", #[DV,O] - DecimalPrec10 +FTsampled="Y", #[DV,M] - YesNoFields +FTreasonNotSampled= "", #[DV,O] - RS_ReasonForNotSampling +FTnonResponseCollected = "N", +FTauxiliaryVariableTotal = "", +FTauxiliaryVariableValue = "", +FTauxiliaryVariableName = "", +FTauxiliaryVariableUnit = "", +stringsAsFactors=FALSE ) #====FO=========== -# 1 FOid [M] - int -# 2 FTid [M/O] - int -# 3 SDid [M/O] - int -# 4 FOrecordType [M] - string -# 5 FOstratification [DV,M] - YesNoFields -# 6 FOsequenceNumber [M] - int -# 7 FOstratumName [DV,M] - StringLength100 -# 8 FOclustering [DV,M] - Clustering -# 9 FOclusterName [DV,M] - StringLength100 -# 10 FOsampler [O] - Sampler -# 11 FOaggregationLevel [M] - AggregationLevel -# 12 FOvalidity [M] - ValidityFlag -# 13 FOcatchReg [M] - CatchRegistration -# 14 FOstartDate [O] - Date -# 15 FOstartTime [O] - Time -# 16 FOendDate [M] - Date -# 17 FOendTime [M/O] - Time -# 18 FOduration [M/O] - int -# 19 FOdurationSource [M] - DurationSource -# 20 FOhandlingTime [O] - int -# 21 FOstartLat [O] - Decimal-90.000000-90.000000 -# 22 FOstartLon [O] - Decimal-180.000000-180.000000 -# 23 FOstopLat [O] - Decimal-90.000000-90.000000 -# 24 FOstopLon [O] - Decimal-180.000000-180.000000 -# 25 FOexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 26 FOarea [M] - ICES_Area -# 27 FOrectangle [O] - StatRec -# 28 FOfisheriesManagementUnit [O] - AreaNonFAO -# 29 FOgsaSubarea [M] - Areas_GFCM_GSA -# 30 FOjurisdictionArea [O] - JurisdictionArea -# 31 FOfishingDepth [O] - int -# 32 FOwaterDepth [O] - int -# 33 FOnationalFishingActivity [O] - NationalFishingActivity -# 34 FOmetier5 [O] - Metier5_FishingActivity -# 35 FOmetier6 [M] - Metier6_FishingActivity -# 36 FOgear [M] - GearType -# 37 FOmeshSize [O] - int -# 38 FOselectionDevice [O] - SelectionDevice -# 39 FOselectionDeviceMeshSize [O] - int -# 40 FOtargetSpecies [O] - TargetSpecies -# 41 FOincidentalByCatchMitigationDeviceFirst [M] - BycatchMitigationDevice -# 42 FOincidentalByCatchMitigationDeviceTargetFirst [M] - BycatchMitigationDeviceTarget -# 43 FOincidentalByCatchMitigationDeviceSecond [M] - BycatchMitigationDevice -# 44 FOincidentalByCatchMitigationDeviceTargetSecond [M] - BycatchMitigationDeviceTarget -# 45 FOgearDimensions [O] - int -# 46 FOobservationCode [M] - ObservationCode -# 47 FOnumberTotal [DV,O] - int -# 48 FOnumberSampled [DV,O] - int -# 49 FOselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 50 FOinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 51 FOselectionMethod [DV,M] - SelectionMethod -# 52 FOunitName [DV,M] - StringLength100 -# 53 FOselectionMethodCluster [DV,O] - SelectionMethod -# 54 FOnumberTotalClusters [DV,O] - int -# 55 FOnumberSampledClusters [DV,O] - int -# 56 FOselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 57 FOinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 58 FOsampled [DV,M] - YesNoFields -# 59 FOreasonNotSampled [DV,O] - ReasonForNotSampling -# 60 FOnonResponseCollected [DV,O] - YesNoFields -# 61 FOauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 62 FOauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 63 FOauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 64 FOauxiliaryVariableUnit [DV,O] - MUNIT - - FO_df <- data.frame( FOid = dataset$VSid, FTid = dataset$VSid, @@ -403,24 +246,29 @@ FO_df <- data.frame( FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M FOendTime ="",#M FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! + FOfishingDurationDataBasis = "Measured", FOdurationSource = "Crew", FOhandlingTime = "", FOstartLat="", # ATT! FOstartLon="", # ATT! FOstopLat="", FOstopLon="", - FOexclusiveEconomicZoneIndicator = "", # + FOexclusiveEconomicZoneIndicator = "", # FOarea = "27.3.a.21", #M FOrectangle = "", FOfisheriesManagementUnit = "", FOgsaSubarea = "NotApplicable", #M FOjurisdictionArea = "", + FOgeographicalDataBasis = "Measured", + FOgeographicalSource = "Crew", FOfishingDepth = "", FOwaterDepth = "", FOnationalFishingActivity = "", FOmetier5 = "", FOmetier6 = "OTT_CRU_70-89_2_35",#M FOgear = "OTT",#M + FOgearDataBasis = "Measured", + FOgearSource = "", FOmeshSize = "", FOselectionDevice = "", FOselectionDeviceMeshSize = "", @@ -455,45 +303,6 @@ stringsAsFactors=FALSE #===SS============ -# 1 SSid [] - int -# 2 LEid [M/O] - int -# 3 FOid [M/O] - int -# 4 FTid [M/O] - int -# 5 OSid [M/O] - int -# 6 TEid [M/O] - int -# 7 SLid [M] - int -# 8 SSrecordType [M] - string -# 9 SSsequenceNumber [M] - int -# 10 SSstratification [DV,M] - YesNoFields -# 11 SSstratumName [DV,M] - StringLength100 -# 12 SSclustering [DV,M] - Clustering -# 13 SSclusterName [DV,M] - StringLength100 -# 14 SSobservationActivityType [DV,M] - ObservationActivityType -# 15 SScatchFraction [M] - CatchFraction -# 16 SSobservationType [DV,M] - ObservationMethod -# 17 SSsampler [O] - Sampler -# 18 SSspeciesListName [M] - StringLength100 -# 19 SSuseForCalculateZero [M] - YesNoFields -# 20 SStimeTotal [O] - int -# 21 SStimeSampled [O] - int -# 22 SSnumberTotal [DV,O] - int -# 23 SSnumberSampled [DV,O] - int -# 24 SSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 25 SSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 26 SSselectionMethod [DV,M] - SelectionMethod -# 27 SSunitName [DV,M] - StringLength100 -# 28 SSselectionMethodCluster [DV,O] - SelectionMethod -# 29 SSnumberTotalClusters [DV,O] - int -# 30 SSnumberSampledClusters [DV,O] - int -# 31 SSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 32 SSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 33 SSsampled [DV,M] - YesNoFields -# 34 SSreasonNotSampled [DV,O] - ReasonForNotSampling -# 35 SSnonResponseCollected [DV,O] - YesNoFields -# 36 SSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 37 SSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 38 SSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 39 SSauxiliaryVariableUnit [DV,O] - MUNIT SS_df<-data.frame( SSid = dataset$VSid, @@ -516,8 +325,10 @@ SS_df<-data.frame( SSspeciesListName = project_name_outputs, #M SSuseForCalculateZero = "N", #M SStimeTotal = "", + SStimeTotalDataBasis = "Measured", SStimeSampled = "", SSnumberTotal = 1, + SSnumberTotalDataBasis = "Measured", SSnumberSampled = 1, SSselectionProb = 1, SSinclusionProb = 1, @@ -535,122 +346,71 @@ SS_df<-data.frame( SSauxiliaryVariableValue = "", SSauxiliaryVariableName = "", SSauxiliaryVariableUnit = "", - stringsAsFactors=FALSE + stringsAsFactors=FALSE ) #====SA=========== - -# 1 SAid [] - int -# 2 SSid [] - int -# 3 SArecordType [M] - string -# 4 SAsequenceNumber [M] - int -# 5 SAparentSequenceNumber [O] - int -# 6 SAstratification [DV,M] - YesNoFields -# 7 SAstratumName [DV,M] - StringLength100 -# 8 SAspeciesCode [M] - SpecWoRMS -# 9 SAspeciesCodeFAO [O] - SpecASFIS -# 10 SAstateOfProcessing [M] - StateOfProcessing -# 11 SApresentation [M] - ProductPresentation -# 12 SAspecimensState [M] - SpecimensState -# 13 SAcatchCategory [M] - CatchCategory -# 14 SAlandingCategory [O] - LandingCategory -# 15 SAcommSizeCatScale [O] - CommercialSizeCategoryScale -# 16 SAcommSizeCat [O] - CommercialSizeCategory -# 17 SAsex [M] - SEXCO -# 18 SAexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 19 SAarea [O] - ICES_Area -# 20 SArectangle [O] - StatRec -# 21 SAfisheriesManagementUnit [O] - AreaNonFAO -# 22 SAgsaSubarea [M] - Areas_GFCM_GSA -# 23 SAjurisdictionArea [O] - JurisdictionArea -# 24 SAnationalFishingActivity [O] - NationalFishingActivity -# 25 SAmetier5 [O] - Metier5_FishingActivity -# 26 SAmetier6 [O] - Metier6_FishingActivity -# 27 SAgear [O] - GearType -# 28 SAmeshSize [O] - int -# 29 SAselectionDevice [O] - SelectionDevice -# 30 SAselectionDeviceMeshSize [O] - int -# 31 SAunitType [M] - SamplingUnit -# 32 SAtotalWeightLive [O] - int -# 33 SAsampleWeightLive [O] - int -# 34 SAnumberTotal [DV,O] - Decimal0.1-999999999 -# 35 SAnumberSampled [DV,O] - Decimal0.1-999999999 -# 36 SAselectionProb [DV,O] - Decimal0-1 -# 37 SAinclusionProb [DV,O] - Decimal0-1 -# 38 SAselectionMethod [DV,M] - SelectionMethod -# 39 SAunitName [DV,M] - StringLength100 -# 40 SAlowerHierarchy [M/O] - LowerHierarchy -# 41 SAsampler [O] - Sampler -# 42 SAsampled [DV,M] - YesNoFields -# 43 SAreasonNotSampled [DV,O] - ReasonForNotSampling -# 44 SAnonResponseCollected [DV,O] - YesNoFields -# 45 SAreasonNotSampledFM [DV,O] - ReasonForNotSampling -# 46 SAreasonNotSampledBV [DV,O] - ReasonForNotSampling -# 47 SAtotalWeightMeasured [O] - int -# 48 SAsampleWeightMeasured [O] - int -# 49 SAconversionFactorMeasLive [O] - Decimal0.900-10 -# 50 SAauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 51 SAauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 52 SAauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 53 SAauxiliaryVariableUnit [DV,O] - MUNIT - - SA_df<-data.frame( - SAid = 1:nrow(dataset), - SSid = dataset$VSid, - SArecordType = "SA", #M - SAsequenceNumber = dataset$VSid, #M - SAparentSequenceNumber = "", - SAstratification = "N", #M - SAstratumName = "U", #M - SAspeciesCode = 107254, #M - SAspeciesCodeFAO = "NEP", - SAstateOfProcessing = "FRE", #M - SApresentation = "WHL", #M - SAspecimensState = "DeadOrZeroProbSurvival", #M - SAcatchCategory = "Lan", #M - SAlandingCategory = "", - SAcommSizeCatScale = "", - SAcommSizeCat = "", - SAsex = "U", #M - SAexclusiveEconomicZoneIndicator = "", - SAarea = "", - SArectangle = "", - SAfisheriesManagementUnit = "", - SAgsaSubarea = "NotApplicable", #M - SAjurisdictionArea = "", - SAnationalFishingActivity = "", - SAmetier5 = "", - SAmetier6 = "", - SAgear = "", - SAmeshSize = "", - SAselectionDevice = "", - SAselectionDeviceMeshSize = "", - SAunitType = "Box", #M - SAtotalWeightLive = "", - SAsampleWeightLive = "", - SAnumberTotal = 1, - SAnumberSampled = 1, - SAselectionProb = 1, - SAinclusionProb = 1, - SAselectionMethod = "CENSUS", #M - SAunitName = dataset$VSid, #M - SAlowerHierarchy = "D", - SAsampler = "Observer", - SAsampled = ifelse(!is.na(dataset[[target_var]]),"Y","N"), #M - SAreasonNotSampled = "", - SAnonResponseCollected = "Y", - SAreasonNotSampledFM = "", - SAreasonNotSampledBV = "", - SAtotalWeightMeasured = ifelse(!is.na(dataset[[target_var]]),dataset[[target_var]],""), # - SAsampleWeightMeasured = ifelse(!is.na(dataset[[target_var]]),dataset[[target_var]],""), # - SAconversionFactorMeasLive = 1, - SAauxiliaryVariableTotal = "", - SAauxiliaryVariableValue = "", - SAauxiliaryVariableName = "", - SAauxiliaryVariableUnit = "", - stringsAsFactors=FALSE +SAid = 1:nrow(dataset), +SSid = dataset$VSid, +SArecordType = "SA", #M +SAsequenceNumber = dataset$VSid, #M +SAparentSequenceNumber = "", +SAstratification = "N", #M +SAstratumName = "U", #M +SAspeciesCode = 107254, #M +SAspeciesCodeFAO = "NEP", +SAstateOfProcessing = "FRE", #M +SApresentation = "WHL", #M +SAspecimensState = "DeadOrZeroProbSurvival", #M +SAcatchCategory = "Lan", #M +SAlandingCategory = "", +SAcommSizeCatScale = "", +SAcommSizeCat = "", +SAsex = "U", #M +SAexclusiveEconomicZoneIndicator = "", +SAarea = "", +SArectangle = "", +SAfisheriesManagementUnit = "", +SAgsaSubarea = "NotApplicable", #M +SAjurisdictionArea = "", +SAgeographicalDataBasis = "Measured", +SAgeographicalSource = "", +SAnationalFishingActivity = "", +SAmetier5 = "", +SAmetier6 = "", +SAgear = "", +SAgearDataBasis = "Measured", +SAgearSource = "", +SAmeshSize = "", +SAselectionDevice = "", +SAselectionDeviceMeshSize = "", +SAunitType = "Box", #M +SAtotalWeightLive = "", +SAsampleWeightLive = "", +SAnumberTotal = 1, +SAnumberSampled = 1, +SAselectionProb = 1, +SAinclusionProb = 1, +SAselectionMethod = "CENSUS", #M +SAunitName = dataset$VSid, #M +SAlowerHierarchy = "D", +SAsampler = "Observer", +SAsampled = ifelse(!is.na(dataset[[target_var]]),"Y","N"), #M +SAreasonNotSampled = "", +SAnonResponseCollected = "Y", +SAreasonNotSampledFM = "", +SAreasonNotSampledBV = "", +SAtotalWeightMeasured = ifelse(!is.na(dataset[[target_var]]),dataset[[target_var]],""), # +SAtotalWeightMeasuredDataBasis = "Measured", +SAsampleWeightMeasured = ifelse(!is.na(dataset[[target_var]]),dataset[[target_var]],""), # +SAconversionFactorMeasLive = 1, +SAauxiliaryVariableTotal = "", +SAauxiliaryVariableValue = "", +SAauxiliaryVariableName = "", +SAauxiliaryVariableUnit = "", +stringsAsFactors=FALSE ) @@ -703,16 +463,14 @@ RDBESlist[[i]][which(grepl(colnames(RDBESlist[[i]]),pat="[A-Z]id"))]<-NULL #===Save============ - dir_outputs<-paste0(base_dir_outputs,"/", - project_name_outputs,"/") - dir.create(dir_outputs, recursive=T, showWarnings=FALSE) - filename_output_CS <- paste0("H1.csv") - filename_output_SL <- paste0("HSL.csv") - filename_output_VD <- paste0("HVD.csv") - + dir_outputs<-paste0(base_dir_outputs,"/", project_name_outputs,"/") + dir.create(dir_outputs, recursive=T, showWarnings=FALSE) + filename_output_CS <- paste0(dir_outputs,"H1.csv") + filename_output_SL <- paste0(dir_outputs,"HSL.csv") + filename_output_VD <- paste0(dir_outputs,"HVD.csv") -lapply(RDBESlist, function(x, filename1 = paste0(dir_outputs,filename_output_CS)){ +lapply(RDBESlist, function(x, filename1 = filename_output_CS){ if("DErecordType" %in% colnames(x)){ write.table(x, file = filename1, append = FALSE, quote = FALSE, sep = ",", eol = "\n", na = "NA", dec = ".", row.names = FALSE, @@ -724,7 +482,7 @@ write.table(x, file = filename1, append = TRUE, quote = FALSE, sep = ",", } }) -b<-read.table(file=paste0(dir_outputs,filename_output_CS), header=F, sep=";") +b<-read.table(file = filename_output_CS, header=F, sep=";") b<-cbind(key,b) b<-b[order(as.character(b$key), decreasing=FALSE),] b<-b[!is.na(key),] @@ -732,24 +490,30 @@ b$key<-NULL b$V1<-as.character(b$V1) # saves CS output -write.table(b$V1, file=paste0(dir_outputs,filename_output_CS), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +write.table(b$V1, file = filename_output_CS, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") -# -----Builds and saves dummySL----------------- +# -----Builds and saves dummySL and dummyIS----------------- SL_base$SLspeciesListName<-project_name_outputs SL_base$SLyear<-DEyear SL_base$SLinstitute<-SDinstitution -# saves SL output - write.table(SL_base, file=paste0(dir_outputs,filename_output_SL), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - + + # saves SL output + write.table(rbind(paste(SL_base, collapse=","), paste(IS_base, collapse=",")), file=filename_output_SL, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") # -----Builds and saves dummyVD----------------- - # saves VD output VD_base$VDyear<-DEyear - write.table(VD_base, file=paste0(dir_outputs,filename_output_VD), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + write.table(VD_base, file=filename_output_VD, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + + + + + +# check - is this needed? +if(1==2){ @@ -770,5 +534,6 @@ write.csv(tmp, file=paste0(dir_outputs,"tmp/SpeciesList.csv"), quote=F, row.name library(zip) zip::zip(zipfile = paste0(project_name_outputs,".zip"), files =paste0(dir_outputs,"tmp/",dir(paste0(dir_outputs,"tmp"))), mode="cherry-pick") unlink(paste0(dir_outputs,"tmp"), force=T, recursive = T) +} diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apiclus2_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apiclus2_H1.R index 702c497c..c286a7d3 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apiclus2_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apiclus2_H1.R @@ -43,79 +43,56 @@ DEsamplingScheme<-"WGRDBES-EST TEST 1" DEstratumName <- "Pckg_survey_apiclus2_H1" project_name_outputs <- gsub(" ","_", paste0(DEsamplingScheme,"_", DEstratumName)) baseDir <- "./data-raw/exampleData/TextBookExamples/" -#baseDir <- "" -VD_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/VD_base.rds")) -SL_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/SL_base.rds")) +baseDir <- "" + VD_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/VD_base.rds")) + SL_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/SL_base.rds")) + IS_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/IS_base.rds")) + #nameof the directory where the outputs are saved currently + base_dir_outputs <- paste0(baseDir,"BuiltUploads") + dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) -#nameof the directory where the outputs are saved currently -base_dir_outputs <- paste0(baseDir,"BuiltUploads/") -if(!file.exists(base_dir_outputs)) dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) +#========Outline of Hierarchy 1================ + # Design + # Sampling details + # Vessel Selection + # Fishing Trip + # Fishing Operation + # Species Selection + # Sample + # Length + # Biological variables -#=========Outline of Hierarchy 1=============== -# Design -# Sampling details -# Vessel Selection -# Fishing Trip -# Fishing Operation -# Species Selection -# Sample -# Length -# Biological variables +#===DE============ -#====DE=========== - -# 1 DEid [] - int -# 2 DErecordType [M] - string -# 3 DEsamplingScheme [M] - SamplingScheme -# 4 DEsamplingSchemeType [M] - SamplingSchemeType -# 5 DEyear [M] - Year -# 6 DEstratumName [M] - StringLength100 -# 7 DEhierarchyCorrect [M] - YesNoFields -# 8 DEhierarchy [M] - RDBESUpperHierarchy -# 9 DEsampled [DV,M] - YesNoFields -# 10 DEreasonNotSampled [DV,O] - ReasonForNotSampling -# 11 DEnonResponseCollected [DV,O] - YesNoFields -# 12 DEauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 13 DEauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 14 DEauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 15 DEauxiliaryVariableUnit[DV,O]-MUNIT - DE_df_base<-expand.grid(DEyear=DEyear, DEstratumName="U",stringsAsFactors=F) DE_df<-data.frame( - DEid = 1:nrow(DE_df_base), - DErecordType = "DE", - DEsamplingScheme = DEsamplingScheme, - DEsamplingSchemeType = "NatRouCF", - DEyear = as.integer(DEyear), - DEstratumName = DEstratumName, - DEhierarchyCorrect = "Y", - DEhierarchy = 1, - DEsampled = "Y", - DEreasonNotSampled = "", - DEnonResponseCollected = "Y", - DEauxiliaryVariableTotal = "", - DEauxiliaryVariableValue = "", - DEauxiliaryVariableName = "", - DEauxiliaryVariableUnit = "", - stringsAsFactors=FALSE -) - -#====SD=========== - + DEid = 1:nrow(DE_df_base), + DErecordType = "DE", + DEsamplingScheme = DEsamplingScheme, + DEsamplingSchemeType = "NatRouCF", + DEyear = as.integer(DEyear), + DEstratumName = DEstratumName, + DEhierarchyCorrect = "Y", + DEhierarchy = 1, + DEsampled = "Y", + DEreasonNotSampled = "", + DEnonResponseCollected = "Y", + DEauxiliaryVariableTotal = "", + DEauxiliaryVariableValue = "", + DEauxiliaryVariableName = "", + DEauxiliaryVariableUnit = "", + stringsAsFactors=FALSE + ) + +#===SD============ -# x -# 1 SDid [M] - int -# 2 DEid [M] - int -# 3 SDrecordType [M] - string -# 4 SDcountry [M] - ISO_3166 -# 5 SDinstitution [M] - EDMO SD_df<-data.frame( SDid=DE_df$DEid, @@ -128,33 +105,8 @@ SD_df<-data.frame( #===VS============ - # x -# 1 VSid [M] - int -# 2 SDid [M] - int -# 3 VDid [M] - int -# 4 TEid [M/O] - int -# 5 VSrecordType [M] - string -# 6 VSsequenceNumber [M] - int -# 7 VSencryptedVesselCode [M] - StringLength100 -# 8 VSstratification [DV,M] - YesNoFields -# 9 VSstratumName [DV,M] - StringLength100 -# 10 VSclustering [DV,M] - Clustering -# 11 VSclusterName [DV,M] - StringLength100 -# 12 VSsampler [O] - Sampler -# 13 VSnumberTotal [DV,O] - int -# 14 VSnumberSampled [DV,O] - int -# 15 VSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 16 VSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 17 VSselectionMethod [DV,M] - SelectionMethod -# 18 VSunitName [DV,M] - StringLength100 -# 19 VSselectionMethodCluster [DV,O] - SelectionMethod -# 20 VSnumberTotalClusters [DV,O] - int -# 21 VSnumberSampledClusters [DV,O] - int -# 22 VSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 23 VSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 24 VSsampled [DV,M] - YesNoFields -# 25 VSreasonNotSampled [DV,O] - ReasonForNotSampling + # The PSU will be Vessels and the SSU will be Trips df_vessel <- df[!duplicated(df[c("dname")]),] @@ -206,45 +158,9 @@ VS_df <- data.frame( ) -#===FT============ - - -# 1 FTid [M] - int -# 2 OSid [M/O] - int -# 3 VSid [M/O] - int -# 4 VDid [M] - int -# 5 SDid [M/O] - int -# 6 FOid [M/O] - int -# 7 TEid [M/O] - int -# 8 FTrecordType [M] - string -# 9 FTencryptedVesselCode [M] - StringLength100 -# 10 FTsequenceNumber [M] - StringLength100 -# 11 FTstratification [DV,M] - YesNoFields -# 12 FTstratumName [DV,M] - StringLength100 -# 13 FTclustering [DV,M] - Clustering -# 14 FTclusterName [DV,M] - StringLength100 -# 15 FTsampler [O] - Sampler -# 16 FTsamplingType [M] - SamplingContext -# 17 FTnumberOfHaulsOrSets [M/O] - IntZeroToMax -# 18 FTdepartureLocation [M/O] - Harbour_LOCODE -# 19 FTdepartureDate [M/O] - Date -# 20 FTdepartureTime [M/O] - Time -# 21 FTarrivalLocation [M] - Harbour_LOCODE -# 22 FTarrivalDate [M] - Date -# 23 FTarrivalTime [M/O] - Time -# 24 FTnumberTotal [DV,O] - int -# 25 FTnumberSampled [DV,O] - int -# 26 FTselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 27 FTinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 28 FTselectionMethod [DV,M] - SelectionMethod -# 29 FTunitName [DV,M] - StringLength100 -# 30 FTselectionMethodCluster [DV,O] - SelectionMethod -# 31 FTnumberTotalClusters [DV,O] - int -# 32 FTnumberSampledClusters [DV,O] - int -# 33 FTselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 34 FTinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 35 FTsampled [DV,M] - YesNoFields -# 36 FTreasonNotSampled [DV,O] - ReasonForNotSampling +#====FT=========== + + makeChildTbl <- function(parent, data, tbl="FT", by = "dname"){ @@ -253,49 +169,53 @@ makeChildTbl <- function(parent, data, tbl="FT", by = "dname"){ df <- merge(data, parent[mergeCols], by=by) sc <- table(df[[by]]) df$sampled_count <- as.vector(sc[df[[by]]]) - FT_df <- data.frame( - FTid = 1:nrow(df),#[M] - int - OSid = "",# [M/O] - int - VSid = df$VSid, #[M/O] - VDid = "", #[M] - int - SDid = "", #[M/O] - int - FOid = "", #[M/O] - int - TEid = "", #[M/O] - int - FTrecordType='FT', #[M] - string - FTencryptedVesselCode = df$VSencryptedVesselCode, #[M] - FTsequenceNumber = as.integer(1:nrow(df)), #[M] - string - FTstratification = "N", #[DV,M] - RS_Stratification - FTstratumName = "U", #[DV,M] - string - FTclustering = "N", #[DV,M] - RS_Clustering - FTclusterName = "U", #[DV,M] - string - FTsampler = "Observer", #[M] - RS_Sampler - FTsamplingType = "AtSea" , #[M] - RS_SamplingType - FTnumberOfHaulsOrSets = 1, #[O] - int - FTdepartureLocation="ZWHWN", #[O] - Harbour_LOCODE - FTdepartureDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(df)), #[M/O] - date - FTdepartureTime="", #[O] - time - FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE - FTarrivalDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(df)), #[M] - date - FTarrivalTime="", #[O] - time - FTnumberTotal= as.vector(df$fpc2), #[DV,O] - int - FTnumberSampled=df$sampled_count, #[DV,O] - int - FTselectionProb="", #[DV,O] - DecimalPrec10 - FTinclusionProb="", #[DV,O] - DecimalPrec10 - FTselectionMethod="SRSWOR", #[DV,M] - RS_SelectionMethod - FTunitName = df$VSid, #[DV,M] - string - FTselectionMethodCluster="", #[DV,O] - RS_SelectionMethod - FTnumberTotalClusters="", #[DV,O] - int - FTnumberSampledClusters="", #[DV,O] - int - FTselectionProbCluster="", #[DV,O] - DecimalPrec10 - FTinclusionProbCluster="", #[DV,O] - DecimalPrec10 - FTsampled="Y", #[DV,M] - YesNoFields - FTreasonNotSampled= "", #[DV,O] - RS_ReasonForNotSampling, - FTnonResponseCollected = factor("N", levels = c('N', 'Y')), - FTauxiliaryVariableTotal = "", - FTauxiliaryVariableValue = "", - FTauxiliaryVariableName = "", - FTauxiliaryVariableUnit = "", - stringsAsFactors=FALSE) + +FT_df <- data.frame( +FTid = 1:nrow(df),#[M] - int +OSid = "",# [M/O] - int +VSid = df$VSid, #[M/O] +VDid = "", #[M] - int +SDid = "", #[M/O] - int +FOid = "", #[M/O] - int +TEid = "", #[M/O] - int +FTrecordType='FT', #[M] - string +FTencryptedVesselCode = df$VSencryptedVesselCode, #[M] +FTsequenceNumber = as.integer(1:nrow(df)), #[M] - string +FTstratification = "N", #[DV,M] - RS_Stratification +FTstratumName = "U", #[DV,M] - string +FTclustering = "N", #[DV,M] - RS_Clustering +FTclusterName = "U", #[DV,M] - string +FTsampler = "Observer", #[M] - RS_Sampler +FTsamplingType = "AtSea" , #[M] - RS_SamplingType +FTnumberOfHaulsOrSets = 1, #[O] - int +FTdepartureLocation="ZWHWN", #[O] - Harbour_LOCODE +FTdepartureDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(df)), #[M/O] - date +FTdepartureTime="", #[O] - time +FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE +FTarrivalDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(df)), #[M] - date +FTarrivalTime="", #[O] - time +FTdominantLandingDate= "", +FTnumberTotal= as.vector(df$fpc2), #[DV,O] - int +FTnumberSampled=df$sampled_count, #[DV,O] - int +FTselectionProb="", #[DV,O] - DecimalPrec10 +FTinclusionProb="", #[DV,O] - DecimalPrec10 +FTselectionMethod="SRSWOR", #[DV,M] - RS_SelectionMethod +FTunitName = df$VSid, #[DV,M] - string +FTselectionMethodCluster="", #[DV,O] - RS_SelectionMethod +FTnumberTotalClusters="", #[DV,O] - int +FTnumberSampledClusters="", #[DV,O] - int +FTselectionProbCluster="", #[DV,O] - DecimalPrec10 +FTinclusionProbCluster="", #[DV,O] - DecimalPrec10 +FTsampled="Y", #[DV,M] - YesNoFields +FTreasonNotSampled= "", #[DV,O] - RS_ReasonForNotSampling, +FTnonResponseCollected = factor("N", levels = c('N', 'Y')), +FTauxiliaryVariableTotal = "", +FTauxiliaryVariableValue = "", +FTauxiliaryVariableName = "", +FTauxiliaryVariableUnit = "", +stringsAsFactors=FALSE +) + if(any(FT_df$FTnumberTotal < FT_df$FTnumberSampled)){ stop("Sampled is more than total") } @@ -309,66 +229,6 @@ FT_df <- makeChildTbl(df_vessel, df, by="dname") #====FO=========== -# 1 FOid [M] - int -# 2 FTid [M/O] - int -# 3 SDid [M/O] - int -# 4 FOrecordType [M] - string -# 5 FOstratification [DV,M] - YesNoFields -# 6 FOsequenceNumber [M] - int -# 7 FOstratumName [DV,M] - StringLength100 -# 8 FOclustering [DV,M] - Clustering -# 9 FOclusterName [DV,M] - StringLength100 -# 10 FOsampler [O] - Sampler -# 11 FOaggregationLevel [M] - AggregationLevel -# 12 FOvalidity [M] - ValidityFlag -# 13 FOcatchReg [M] - CatchRegistration -# 14 FOstartDate [O] - Date -# 15 FOstartTime [O] - Time -# 16 FOendDate [M] - Date -# 17 FOendTime [M/O] - Time -# 18 FOduration [M/O] - int -# 19 FOdurationSource [M] - DurationSource -# 20 FOhandlingTime [O] - int -# 21 FOstartLat [O] - Decimal-90.000000-90.000000 -# 22 FOstartLon [O] - Decimal-180.000000-180.000000 -# 23 FOstopLat [O] - Decimal-90.000000-90.000000 -# 24 FOstopLon [O] - Decimal-180.000000-180.000000 -# 25 FOexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 26 FOarea [M] - ICES_Area -# 27 FOrectangle [O] - StatRec -# 28 FOgsaSubarea [M] - Areas_GFCM_GSA -# 29 FOjurisdictionArea [O] - JurisdictionArea -# 30 FOfishingDepth [O] - int -# 31 FOwaterDepth [O] - int -# 32 FOnationalFishingActivity [O] - NationalFishingActivity -# 33 FOmetier5 [O] - Metier5_FishingActivity -# 34 FOmetier6 [M] - Metier6_FishingActivity -# 35 FOgear [M] - GearType -# 36 FOmeshSize [O] - int -# 37 FOselectionDevice [O] - SelectionDevice -# 38 FOselectionDeviceMeshSize [O] - int -# 39 FOtargetSpecies [O] - TargetSpecies -# 40 FOincidentalByCatchMitigationDeviceFirst [M] - BycatchMitigationDevice -# 41 FOincidentalByCatchMitigationDeviceTargetFirst [M] - BycatchMitigationDeviceTarget -# 42 FOincidentalByCatchMitigationDeviceSecond [M] - BycatchMitigationDevice -# 43 FOincidentalByCatchMitigationDeviceTargetSecond [M] - BycatchMitigationDeviceTarget -# 44 FOgearDimensions [O] - int -# 45 FOobservationCode [M] - ObservationCode -# 46 FOnumberTotal [DV,O] - int -# 47 FOnumberSampled [DV,O] - int -# 48 FOselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 49 FOinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 50 FOselectionMethod [DV,M] - SelectionMethod -# 51 FOunitName [DV,M] - StringLength100 -# 52 FOselectionMethodCluster [DV,O] - SelectionMethod -# 53 FOnumberTotalClusters [DV,O] - int -# 54 FOnumberSampledClusters [DV,O] - int -# 55 FOselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 56 FOinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 57 FOsampled [DV,M] - YesNoFields -# 58 FOreasonNotSampled [DV,O] - ReasonForNotSampling - - FO_df <- data.frame( FOid = FT_df$FTid, FTid = FT_df$FTid, @@ -388,6 +248,7 @@ FO_df <- data.frame( FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M FOendTime ="",#M FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! + FOfishingDurationDataBasis = "Measured", FOdurationSource = "Crew", FOhandlingTime = "", FOstartLat="", # ATT! @@ -400,12 +261,16 @@ FO_df <- data.frame( FOfisheriesManagementUnit = "", FOgsaSubarea = "NotApplicable", #M FOjurisdictionArea = "", + FOgeographicalDataBasis = "Measured", + FOgeographicalSource = "Crew", FOfishingDepth = "", FOwaterDepth = "", FOnationalFishingActivity = "", FOmetier5 = "", FOmetier6 = "OTT_CRU_70-89_2_35",#M FOgear = "OTT",#M + FOgearDataBasis = "Measured", + FOgearSource = "", FOmeshSize = "", FOselectionDevice = "", FOselectionDeviceMeshSize = "", @@ -430,7 +295,6 @@ FO_df <- data.frame( FOsampled = "Y", #M FOreasonNotSampled = "", FOnonResponseCollected = factor("N", levels = c('N', 'Y')), - FOfisheriesManagementUnit = "", FOauxiliaryVariableTotal = "", FOauxiliaryVariableValue = "", FOauxiliaryVariableName = "", @@ -439,43 +303,8 @@ stringsAsFactors=FALSE ) +#===SS============ -# 1 SSid [] - int -# 2 LEid [M/O] - int -# 3 FOid [M/O] - int -# 4 FTid [M/O] - int -# 5 OSid [M/O] - int -# 6 TEid [M/O] - int -# 7 SLid [M] - int -# 8 SSrecordType [M] - string -# 9 SSsequenceNumber [M] - int -# 10 SSstratification [DV,M] - YesNoFields -# 11 SSstratumName [DV,M] - StringLength100 -# 12 SSclustering [DV,M] - Clustering -# 13 SSclusterName [DV,M] - StringLength100 -# 14 SSobservationActivityType [DV,M] - ObservationActivityType -# 15 SScatchFraction [M] - CatchFraction -# 16 SSobservationType [DV,M] - ObservationMethod -# 17 SSsampler [O] - Sampler -# 18 SSspeciesListName [M] - StringLength100 -# 19 SSuseForCalculateZero [M] - YesNoFields -# 20 SStimeTotal [O] - int -# 21 SStimeSampled [O] - int -# 22 SSnumberTotal [DV,O] - int -# 23 SSnumberSampled [DV,O] - int -# 24 SSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 25 SSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 26 SSselectionMethod [DV,M] - SelectionMethod -# 27 SSunitName [DV,M] - StringLength100 -# 28 SSselectionMethodCluster [DV,O] - SelectionMethod -# 29 SSnumberTotalClusters [DV,O] - int -# 30 SSnumberSampledClusters [DV,O] - int -# 31 SSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 32 SSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 33 SSsampled [DV,M] - YesNoFields -# 34 SSreasonNotSampled [DV,O] - ReasonForNotSampling - -#====SS=========== SS_df<-data.frame( SSid = FO_df$FOid, @@ -498,8 +327,10 @@ SS_df<-data.frame( SSspeciesListName = project_name_outputs, #M SSuseForCalculateZero = "N", #M SStimeTotal = "", + SStimeTotalDataBasis = "Measured", SStimeSampled = "", SSnumberTotal = 1, + SSnumberTotalDataBasis = "Measured", SSnumberSampled = 1, SSselectionProb = 1, SSinclusionProb = 1, @@ -517,127 +348,79 @@ SS_df<-data.frame( SSauxiliaryVariableValue = "", SSauxiliaryVariableName = "", SSauxiliaryVariableUnit = "", - stringsAsFactors = F + stringsAsFactors=FALSE ) #====SA=========== - -# 1 SAid [] - int -# 2 SSid [] - int -# 3 SArecordType [M] - string -# 4 SAsequenceNumber [M] - int -# 5 SAparentSequenceNumber [O] - int -# 6 SAstratification [DV,M] - YesNoFields -# 7 SAstratumName [DV,M] - StringLength100 -# 8 SAspeciesCode [M] - SpecWoRMS -# 9 SAspeciesCodeFAO [O] - SpecASFIS -# 10 SAstateOfProcessing [M] - StateOfProcessing -# 11 SApresentation [M] - ProductPresentation -# 12 SAspecimensState [M] - SpecimensState -# 13 SAcatchCategory [M] - CatchCategory -# 14 SAlandingCategory [O] - LandingCategory -# 15 SAcommSizeCatScale [O] - CommercialSizeCategoryScale -# 16 SAcommSizeCat [O] - CommercialSizeCategory -# 17 SAsex [M] - SEXCO -# 18 SAexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 19 SAarea [O] - ICES_Area -# 20 SArectangle [O] - StatRec -# 21 SAgsaSubarea [M] - Areas_GFCM_GSA -# 22 SAjurisdictionArea [O] - JurisdictionArea -# 23 SAnationalFishingActivity [O] - NationalFishingActivity -# 24 SAmetier5 [O] - Metier5_FishingActivity -# 25 SAmetier6 [O] - Metier6_FishingActivity -# 26 SAgear [O] - GearType -# 27 SAmeshSize [O] - int -# 28 SAselectionDevice [O] - SelectionDevice -# 29 SAselectionDeviceMeshSize [O] - int -# 30 SAunitType [M] - SamplingUnit -# 31 SAtotalWeightLive [O] - int -# 32 SAsampleWeightLive [O] - int -# 33 SAnumberTotal [DV,O] - Decimal0.1-999999999 -# 34 SAnumberSampled [DV,O] - Decimal0.1-999999999 -# 35 SAselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 36 SAinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 37 SAselectionMethod [DV,M] - SelectionMethod -# 38 SAunitName [DV,M] - StringLength100 -# 39 SAlowerHierarchy [M/O] - LowerHierarchy -# 40 SAsampler [O] - Sampler -# 41 SAsampled [DV,M] - YesNoFields -# 42 SAreasonNotSampledFM [DV,O] - ReasonForNotSampling -# 43 SAreasonNotSampledBV [DV,O] - ReasonForNotSampling -# 44 SAtotalWeightMeasured [O] - int -# 45 SAsampleWeightMeasured [O] - int -# 46 SAconversionFactorMeasLive [O] - Decimal0.900-10 - SA_df<-data.frame( - SAid = SS_df$SSid, - SSid = SS_df$SSid, - SArecordType = "SA", #M - SAsequenceNumber = SS_df$SSid, #M - SAparentSequenceNumber = "", - SAstratification = "N", #M - SAstratumName = "U", #M - SAspeciesCode = 107254, #M - SAspeciesCodeFAO = "NEP", - SAstateOfProcessing = "FRE", #M - SApresentation = "WHL", #M - SAspecimensState = "DeadOrZeroProbSurvival", #M - SAcatchCategory = "Lan", #M - SAlandingCategory = "", - SAcommSizeCatScale = "", - SAcommSizeCat = "", - SAsex = "U", #M - SAexclusiveEconomicZoneIndicator = "", - SAarea = "", - SArectangle = "", - SAfisheriesManagementUnit = "", - SAgsaSubarea = "NotApplicable", #M - SAjurisdictionArea = "", - SAnationalFishingActivity = "", - SAmetier5 = "", - SAmetier6 = "", - SAgear = "", - SAmeshSize = "", - SAselectionDevice = "", - SAselectionDeviceMeshSize = "", - SAunitType = "Box", #M - SAtotalWeightLive = "", - SAsampleWeightLive = "", - SAnumberTotal = 1, - SAnumberSampled = 1, - SAselectionProb = 1, - SAinclusionProb = 1, - SAselectionMethod = "CENSUS", #M - SAunitName = SS_df$SSid, #M - SAlowerHierarchy = "D", - SAsampler = "Observer", - SAsampled = ifelse(!is.na(df[[target_var]]),"Y","N"), #M - SAreasonNotSampled = "", - SAnonResponseCollected = "Y", - SAreasonNotSampledFM = "", - SAreasonNotSampledBV = "", - SAtotalWeightMeasured = ifelse(!is.na(df[[target_var]]),df[[target_var]],""), # - SAsampleWeightMeasured = ifelse(!is.na(df[[target_var]]),df[[target_var]],""), # - SAconversionFactorMeasLive = 1, - SAauxiliaryVariableTotal = "", - SAauxiliaryVariableValue = "", - SAauxiliaryVariableName = "", - SAauxiliaryVariableUnit = "", - stringsAsFactors=FALSE +SAid = SS_df$SSid, +SSid = SS_df$SSid, +SArecordType = "SA", #M +SAsequenceNumber = SS_df$SSid, #M +SAparentSequenceNumber = "", +SAstratification = "N", #M +SAstratumName = "U", #M +SAspeciesCode = 107254, #M +SAspeciesCodeFAO = "NEP", +SAstateOfProcessing = "FRE", #M +SApresentation = "WHL", #M +SAspecimensState = "DeadOrZeroProbSurvival", #M +SAcatchCategory = "Lan", #M +SAlandingCategory = "", +SAcommSizeCatScale = "", +SAcommSizeCat = "", +SAsex = "U", #M +SAexclusiveEconomicZoneIndicator = "", +SAarea = "", +SArectangle = "", +SAfisheriesManagementUnit = "", +SAgsaSubarea = "NotApplicable", #M +SAjurisdictionArea = "", +SAgeographicalDataBasis = "Measured", +SAgeographicalSource = "", +SAnationalFishingActivity = "", +SAmetier5 = "", +SAmetier6 = "", +SAgear = "", +SAgearDataBasis = "Measured", +SAgearSource = "", +SAmeshSize = "", +SAselectionDevice = "", +SAselectionDeviceMeshSize = "", +SAunitType = "Box", #M +SAtotalWeightLive = "", +SAsampleWeightLive = "", +SAnumberTotal = 1, +SAnumberSampled = 1, +SAselectionProb = 1, +SAinclusionProb = 1, +SAselectionMethod = "CENSUS", #M +SAunitName = SS_df$SSid, #M +SAlowerHierarchy = "D", +SAsampler = "Observer", +SAsampled = ifelse(!is.na(df[[target_var]]),"Y","N"), #M +SAreasonNotSampled = "", +SAnonResponseCollected = "Y", +SAreasonNotSampledFM = "", +SAreasonNotSampledBV = "", +SAtotalWeightMeasured = ifelse(!is.na(df[[target_var]]),df[[target_var]],""), # +SAtotalWeightMeasuredDataBasis = "Measured", +SAsampleWeightMeasured = ifelse(!is.na(df[[target_var]]),df[[target_var]],""), # +SAconversionFactorMeasLive = 1, +SAauxiliaryVariableTotal = "", +SAauxiliaryVariableValue = "", +SAauxiliaryVariableName = "", +SAauxiliaryVariableUnit = "", +stringsAsFactors=FALSE ) #====Builds final format=========== -RDBESlist <- list(DE = DE_df, - SD = SD_df, - VS = VS_df, - FT = FT_df, - FO = FO_df, - SS = SS_df, - SA = SA_df) + +RDBESlist = list(DE = DE_df,SD = SD_df, VS = VS_df, FT = FT_df, FO = FO_df, SS = SS_df, SA = SA_df) #id table a<-merge(DE_df["DEid"],SD_df[c("DEid","SDid")]) @@ -680,18 +463,16 @@ for (i in names(RDBESlist)) RDBESlist[[i]][which(grepl(colnames(RDBESlist[[i]]),pat="[A-Z]id"))]<-NULL } -#===Save============= +#===Save============ - dir_outputs<-paste0(base_dir_outputs, - project_name_outputs,"/") - dir.create(dir_outputs, recursive=T, showWarnings=FALSE) - filename_output_CS <- paste0("H1.csv") - filename_output_SL <- paste0("HSL.csv") - filename_output_VD <- paste0("HVD.csv") + dir_outputs<-paste0(base_dir_outputs,"/", project_name_outputs,"/") + dir.create(dir_outputs, recursive=T, showWarnings=FALSE) + filename_output_CS <- paste0(dir_outputs,"H1.csv") + filename_output_SL <- paste0(dir_outputs,"HSL.csv") + filename_output_VD <- paste0(dir_outputs,"HVD.csv") - -lapply(RDBESlist, function(x, filename1 = paste0(dir_outputs,filename_output_CS)){ +lapply(RDBESlist, function(x, filename1 = filename_output_CS){ if("DErecordType" %in% colnames(x)){ write.table(x, file = filename1, append = FALSE, quote = FALSE, sep = ",", eol = "\n", na = "NA", dec = ".", row.names = FALSE, @@ -703,7 +484,7 @@ write.table(x, file = filename1, append = TRUE, quote = FALSE, sep = ",", } }) -b<-read.table(file=paste0(dir_outputs,filename_output_CS), header=F, sep=";") +b<-read.table(file = filename_output_CS, header=F, sep=";") b<-cbind(key,b) b<-b[order(as.character(b$key), decreasing=FALSE),] b<-b[!is.na(key),] @@ -711,22 +492,22 @@ b$key<-NULL b$V1<-as.character(b$V1) # saves CS output -write.table(b$V1, file=paste0(dir_outputs,filename_output_CS), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +write.table(b$V1, file = filename_output_CS, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + +# -----Builds and saves dummySL and dummyIS----------------- -#---- Builds and saves dummySL ------------ SL_base$SLspeciesListName<-project_name_outputs SL_base$SLyear<-DEyear SL_base$SLinstitute<-SDinstitution -# saves SL output - write.table(SL_base, file=paste0(dir_outputs,filename_output_SL), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - - -#-------Builds and saves dummyVD--------------- + + # saves SL output + write.table(rbind(paste(SL_base, collapse=","), paste(IS_base, collapse=",")), file=filename_output_SL, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +# -----Builds and saves dummyVD----------------- # saves VD output - -write.table(VD_base, file=paste0(dir_outputs,filename_output_VD), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + VD_base$VDyear<-DEyear + write.table(VD_base, file=filename_output_VD, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apiclus2_v2_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apiclus2_v2_H1.R index 83b6e138..2c93b663 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apiclus2_v2_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apiclus2_v2_H1.R @@ -45,10 +45,10 @@ baseDir <- "" VD_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/VD_base.rds")) SL_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/SL_base.rds")) - + IS_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/IS_base.rds")) #nameof the directory where the outputs are saved currently base_dir_outputs <- paste0(baseDir,"BuiltUploads") - if(!file.exists(base_dir_outputs)) dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) + dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) #=========Outline of Hierarchy 1=============== @@ -64,28 +64,12 @@ -#====DE=========== - +#===DE============ -# 1 DEid [] - int -# 2 DErecordType [M] - string -# 3 DEsamplingScheme [M] - SamplingScheme -# 4 DEsamplingSchemeType [M] - SamplingSchemeType -# 5 DEyear [M] - Year -# 6 DEstratumName [M] - StringLength100 -# 7 DEhierarchyCorrect [M] - YesNoFields -# 8 DEhierarchy [M] - RDBESUpperHierarchy -# 9 DEsampled [DV,M] - YesNoFields -# 10 DEreasonNotSampled [DV,O] - ReasonForNotSampling -# 11 DEnonResponseCollected [DV,O] - YesNoFields -# 12 DEauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 13 DEauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 14 DEauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 15 DEauxiliaryVariableUnit[DV,O]-MUNIT DE_df_base<-expand.grid(DEyear=DEyear, - DEstratumName="U",stringsAsFactors=F) + DEstratumName="U",stringsAsFactors=F) DE_df<-data.frame( DEid = 1:nrow(DE_df_base), @@ -106,15 +90,8 @@ DE_df<-data.frame( stringsAsFactors=FALSE ) -#====SD=========== - +#===SD============ - # x -# 1 SDid [M] - int -# 2 DEid [M] - int -# 3 SDrecordType [M] - string -# 4 SDcountry [M] - ISO_3166 -# 5 SDinstitution [M] - EDMO SD_df<-data.frame( SDid=DE_df$DEid, @@ -127,39 +104,8 @@ SD_df<-data.frame( #===VS============ - # x -# 1 VSid [M] - int -# 2 SDid [M] - int -# 3 VDid [M] - int -# 4 TEid [M/O] - int -# 5 VSrecordType [M] - string -# 6 VSsequenceNumber [M] - int -# 7 VSencryptedVesselCode [M] - StringLength100 -# 8 VSstratification [DV,M] - YesNoFields -# 9 VSstratumName [DV,M] - StringLength100 -# 10 VSclustering [DV,M] - Clustering -# 11 VSclusterName [DV,M] - StringLength100 -# 12 VSsampler [O] - Sampler -# 13 VSnumberTotal [DV,O] - int -# 14 VSnumberSampled [DV,O] - int -# 15 VSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 16 VSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 17 VSselectionMethod [DV,M] - SelectionMethod -# 18 VSunitName [DV,M] - StringLength100 -# 19 VSselectionMethodCluster [DV,O] - SelectionMethod -# 20 VSnumberTotalClusters [DV,O] - int -# 21 VSnumberSampledClusters [DV,O] - int -# 22 VSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 23 VSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 24 VSsampled [DV,M] - YesNoFields -# 25 VSreasonNotSampled [DV,O] - ReasonForNotSampling -# 26 VSnonResponseCollected [DV,O] - YesNoFields -# 27 VSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 28 VSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 29 VSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 30 VSauxiliaryVariableUnit [DV,O] - MUNIT - -#check_All_fields("VS") + + dataset$VSid <- 1:nrow(dataset) aux_sampleSizes<-as.data.table(dataset)[,list(Npsu=757,npsu=40, Nssu="",nssu=.N),list(dnum, dname)] @@ -220,165 +166,59 @@ VS_df$VSselectionMethodCluster<-"SRSWOR" VS_df$VSselectionProbCluster<-"" VS_df$VSinclusionProbCluster<-"" -#===FT============ - - -# 1 FTid [M] - int -# 2 OSid [M/O] - int -# 3 VSid [M/O] - int -# 4 VDid [M] - int -# 5 SDid [M/O] - int -# 6 FOid [M/O] - int -# 7 TEid [M/O] - int -# 8 FTrecordType [M] - string -# 9 FTencryptedVesselCode [M] - StringLength100 -# 10 FTsequenceNumber [M] - int -# 11 FTstratification [DV,M] - YesNoFields -# 12 FTstratumName [DV,M] - StringLength100 -# 13 FTclustering [DV,M] - Clustering -# 14 FTclusterName [DV,M] - StringLength100 -# 15 FTsampler [O] - Sampler -# 16 FTsamplingType [M] - SamplingContext -# 17 FTnumberOfHaulsOrSets [M/O] - IntZeroToMax -# 18 FTdepartureLocation [M/O] - Harbour_LOCODE -# 19 FTdepartureDate [M/O] - Date -# 20 FTdepartureTime [M/O] - StringLength60 -# 21 FTarrivalLocation [M] - Harbour_LOCODE -# 22 FTarrivalDate [M] - Date -# 23 FTarrivalTime [M/O] - StringLength60 -# 24 FTnumberTotal [DV,O] - int -# 25 FTnumberSampled [DV,O] - int -# 26 FTselectionProb [DV,O] - Decimal0-1 -# 27 FTinclusionProb [DV,O] - Decimal0-1 -# 28 FTselectionMethod [DV,M] - SelectionMethod -# 29 FTunitName [DV,M] - StringLength100 -# 30 FTselectionMethodCluster [DV,O] - SelectionMethod -# 31 FTnumberTotalClusters [DV,O] - int -# 32 FTnumberSampledClusters [DV,O] - int -# 33 FTselectionProbCluster [DV,O] - Decimal0-1 -# 34 FTinclusionProbCluster [DV,O] - Decimal0-1 -# 35 FTsampled [DV,M] - YesNoFields -# 36 FTreasonNotSampled [DV,O] - ReasonForNotSampling -# 37 FTnonResponseCollected [DV,O] - YesNoFields -# 38 FTauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 39 FTauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 40 FTauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 41 FTauxiliaryVariableUnit [DV,O] - MUNIT +#====FT=========== + + FT_df <- data.frame( - FTid = dataset$VSid,#[M] - int - OSid = "",# [M/O] - int - VSid = dataset$VSid, #[M/O] - VDid = "", #[M] - int - SDid = "", #[M/O] - int - FOid = "", #[M/O] - int - TEid = "", #[M/O] - int - FTrecordType='FT', #[M] - string - FTencryptedVesselCode = VS_base$VSencryptedVesselCode, #[M] - FTsequenceNumber = as.integer(1:nrow(dataset)), #[M] - string - FTstratification = "N", #[DV,M] - RS_Stratification - FTstratumName = "U", #[DV,M] - string - FTclustering = "N", #[DV,M] - RS_Clustering - FTclusterName = "U", #[DV,M] - string - FTsampler = "Observer", #[M] - RS_Sampler - FTsamplingType = "AtSea" , #[M] - RS_SamplingType - FTnumberOfHaulsOrSets = 1, #[O] - int - FTdepartureLocation="ZWHWN", #[O] - Harbour_LOCODE - FTdepartureDate=seq(from = as.Date("1965-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M/O] - date - FTdepartureTime="", #[O] - time - FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE - FTarrivalDate=seq(from = as.Date("1965-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date - FTarrivalTime="", #[O] - time - FTnumberTotal= 1, #[DV,O] - int - FTnumberSampled=1, #[DV,O] - int - FTselectionProb=1, #[DV,O] - DecimalPrec10 - FTinclusionProb=1, #[DV,O] - DecimalPrec10 - FTselectionMethod="CENSUS", #[DV,M] - RS_SelectionMethod - FTunitName = VS_base$VSencryptedVesselCode, #[DV,M] - string - FTselectionMethodCluster="", #[DV,O] - RS_SelectionMethod - FTnumberTotalClusters="", #[DV,O] - int - FTnumberSampledClusters="", #[DV,O] - int - FTselectionProbCluster="", #[DV,O] - DecimalPrec10 - FTinclusionProbCluster="", #[DV,O] - DecimalPrec10 - FTsampled="Y", #[DV,M] - YesNoFields - FTreasonNotSampled= "", #[DV,O] - RS_ReasonForNotSampling - FTnonResponseCollected = "N", - FTauxiliaryVariableTotal = "", - FTauxiliaryVariableValue = "", - FTauxiliaryVariableName = "", - FTauxiliaryVariableUnit = "", - stringsAsFactors=FALSE +FTid = dataset$VSid,#[M] - int +OSid = "",# [M/O] - int +VSid = dataset$VSid, #[M/O] +VDid = "", #[M] - int +SDid = "", #[M/O] - int +FOid = "", #[M/O] - int +TEid = "", #[M/O] - int +FTrecordType='FT', #[M] - string +FTencryptedVesselCode = VS_base$VSencryptedVesselCode, #[M] +FTsequenceNumber = as.integer(1:nrow(dataset)), #[M] - string +FTstratification = "N", #[DV,M] - RS_Stratification +FTstratumName = "U", #[DV,M] - string +FTclustering = "N", #[DV,M] - RS_Clustering +FTclusterName = "U", #[DV,M] - string +FTsampler = "Observer", #[M] - RS_Sampler +FTsamplingType = "AtSea" , #[M] - RS_SamplingType +FTnumberOfHaulsOrSets = 1, #[O] - int +FTdepartureLocation="ZWHWN", #[O] - Harbour_LOCODE +FTdepartureDate=seq(from = as.Date("1965-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M/O] - date +FTdepartureTime="", #[O] - time +FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE +FTarrivalDate=seq(from = as.Date("1965-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date +FTarrivalTime="", #[O] - time +FTdominantLandingDate= "", +FTnumberTotal= 1, #[DV,O] - int +FTnumberSampled=1, #[DV,O] - int +FTselectionProb=1, #[DV,O] - DecimalPrec10 +FTinclusionProb=1, #[DV,O] - DecimalPrec10 +FTselectionMethod="CENSUS", #[DV,M] - RS_SelectionMethod +FTunitName = VS_base$VSencryptedVesselCode, #[DV,M] - string +FTselectionMethodCluster="", #[DV,O] - RS_SelectionMethod +FTnumberTotalClusters="", #[DV,O] - int +FTnumberSampledClusters="", #[DV,O] - int +FTselectionProbCluster="", #[DV,O] - DecimalPrec10 +FTinclusionProbCluster="", #[DV,O] - DecimalPrec10 +FTsampled="Y", #[DV,M] - YesNoFields +FTreasonNotSampled= "", #[DV,O] - RS_ReasonForNotSampling +FTnonResponseCollected = "N", +FTauxiliaryVariableTotal = "", +FTauxiliaryVariableValue = "", +FTauxiliaryVariableName = "", +FTauxiliaryVariableUnit = "", +stringsAsFactors=FALSE ) #====FO=========== -# 1 FOid [M] - int -# 2 FTid [M/O] - int -# 3 SDid [M/O] - int -# 4 FOrecordType [M] - string -# 5 FOstratification [DV,M] - YesNoFields -# 6 FOsequenceNumber [M] - int -# 7 FOstratumName [DV,M] - StringLength100 -# 8 FOclustering [DV,M] - Clustering -# 9 FOclusterName [DV,M] - StringLength100 -# 10 FOsampler [O] - Sampler -# 11 FOaggregationLevel [M] - AggregationLevel -# 12 FOvalidity [M] - ValidityFlag -# 13 FOcatchReg [M] - CatchRegistration -# 14 FOstartDate [O] - Date -# 15 FOstartTime [O] - Time -# 16 FOendDate [M] - Date -# 17 FOendTime [M/O] - Time -# 18 FOduration [M/O] - int -# 19 FOdurationSource [M] - DurationSource -# 20 FOhandlingTime [O] - int -# 21 FOstartLat [O] - Decimal-90.000000-90.000000 -# 22 FOstartLon [O] - Decimal-180.000000-180.000000 -# 23 FOstopLat [O] - Decimal-90.000000-90.000000 -# 24 FOstopLon [O] - Decimal-180.000000-180.000000 -# 25 FOexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 26 FOarea [M] - ICES_Area -# 27 FOrectangle [O] - StatRec -# 28 FOfisheriesManagementUnit [O] - AreaNonFAO -# 29 FOgsaSubarea [M] - Areas_GFCM_GSA -# 30 FOjurisdictionArea [O] - JurisdictionArea -# 31 FOfishingDepth [O] - int -# 32 FOwaterDepth [O] - int -# 33 FOnationalFishingActivity [O] - NationalFishingActivity -# 34 FOmetier5 [O] - Metier5_FishingActivity -# 35 FOmetier6 [M] - Metier6_FishingActivity -# 36 FOgear [M] - GearType -# 37 FOmeshSize [O] - int -# 38 FOselectionDevice [O] - SelectionDevice -# 39 FOselectionDeviceMeshSize [O] - int -# 40 FOtargetSpecies [O] - TargetSpecies -# 41 FOincidentalByCatchMitigationDeviceFirst [M] - BycatchMitigationDevice -# 42 FOincidentalByCatchMitigationDeviceTargetFirst [M] - BycatchMitigationDeviceTarget -# 43 FOincidentalByCatchMitigationDeviceSecond [M] - BycatchMitigationDevice -# 44 FOincidentalByCatchMitigationDeviceTargetSecond [M] - BycatchMitigationDeviceTarget -# 45 FOgearDimensions [O] - int -# 46 FOobservationCode [M] - ObservationCode -# 47 FOnumberTotal [DV,O] - int -# 48 FOnumberSampled [DV,O] - int -# 49 FOselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 50 FOinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 51 FOselectionMethod [DV,M] - SelectionMethod -# 52 FOunitName [DV,M] - StringLength100 -# 53 FOselectionMethodCluster [DV,O] - SelectionMethod -# 54 FOnumberTotalClusters [DV,O] - int -# 55 FOnumberSampledClusters [DV,O] - int -# 56 FOselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 57 FOinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 58 FOsampled [DV,M] - YesNoFields -# 59 FOreasonNotSampled [DV,O] - ReasonForNotSampling -# 60 FOnonResponseCollected [DV,O] - YesNoFields -# 61 FOauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 62 FOauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 63 FOauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 64 FOauxiliaryVariableUnit [DV,O] - MUNIT - - FO_df <- data.frame( FOid = dataset$VSid, FTid = dataset$VSid, @@ -398,6 +238,7 @@ FO_df <- data.frame( FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M FOendTime ="",#M FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! + FOfishingDurationDataBasis = "Measured", FOdurationSource = "Crew", FOhandlingTime = "", FOstartLat="", # ATT! @@ -410,12 +251,16 @@ FO_df <- data.frame( FOfisheriesManagementUnit = "", FOgsaSubarea = "NotApplicable", #M FOjurisdictionArea = "", + FOgeographicalDataBasis = "Measured", + FOgeographicalSource = "Crew", FOfishingDepth = "", FOwaterDepth = "", FOnationalFishingActivity = "", FOmetier5 = "", FOmetier6 = "OTT_CRU_70-89_2_35",#M FOgear = "OTT",#M + FOgearDataBasis = "Measured", + FOgearSource = "", FOmeshSize = "", FOselectionDevice = "", FOselectionDeviceMeshSize = "", @@ -450,45 +295,6 @@ stringsAsFactors=FALSE #===SS============ -# 1 SSid [] - int -# 2 LEid [M/O] - int -# 3 FOid [M/O] - int -# 4 FTid [M/O] - int -# 5 OSid [M/O] - int -# 6 TEid [M/O] - int -# 7 SLid [M] - int -# 8 SSrecordType [M] - string -# 9 SSsequenceNumber [M] - int -# 10 SSstratification [DV,M] - YesNoFields -# 11 SSstratumName [DV,M] - StringLength100 -# 12 SSclustering [DV,M] - Clustering -# 13 SSclusterName [DV,M] - StringLength100 -# 14 SSobservationActivityType [DV,M] - ObservationActivityType -# 15 SScatchFraction [M] - CatchFraction -# 16 SSobservationType [DV,M] - ObservationMethod -# 17 SSsampler [O] - Sampler -# 18 SSspeciesListName [M] - StringLength100 -# 19 SSuseForCalculateZero [M] - YesNoFields -# 20 SStimeTotal [O] - int -# 21 SStimeSampled [O] - int -# 22 SSnumberTotal [DV,O] - int -# 23 SSnumberSampled [DV,O] - int -# 24 SSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 25 SSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 26 SSselectionMethod [DV,M] - SelectionMethod -# 27 SSunitName [DV,M] - StringLength100 -# 28 SSselectionMethodCluster [DV,O] - SelectionMethod -# 29 SSnumberTotalClusters [DV,O] - int -# 30 SSnumberSampledClusters [DV,O] - int -# 31 SSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 32 SSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 33 SSsampled [DV,M] - YesNoFields -# 34 SSreasonNotSampled [DV,O] - ReasonForNotSampling -# 35 SSnonResponseCollected [DV,O] - YesNoFields -# 36 SSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 37 SSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 38 SSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 39 SSauxiliaryVariableUnit [DV,O] - MUNIT SS_df<-data.frame( SSid = dataset$VSid, @@ -511,8 +317,10 @@ SS_df<-data.frame( SSspeciesListName = project_name_outputs, #M SSuseForCalculateZero = "N", #M SStimeTotal = "", + SStimeTotalDataBasis = "Measured", SStimeSampled = "", SSnumberTotal = 1, + SSnumberTotalDataBasis = "Measured", SSnumberSampled = 1, SSselectionProb = 1, SSinclusionProb = 1, @@ -535,117 +343,66 @@ SS_df<-data.frame( #====SA=========== - -# 1 SAid [] - int -# 2 SSid [] - int -# 3 SArecordType [M] - string -# 4 SAsequenceNumber [M] - int -# 5 SAparentSequenceNumber [O] - int -# 6 SAstratification [DV,M] - YesNoFields -# 7 SAstratumName [DV,M] - StringLength100 -# 8 SAspeciesCode [M] - SpecWoRMS -# 9 SAspeciesCodeFAO [O] - SpecASFIS -# 10 SAstateOfProcessing [M] - StateOfProcessing -# 11 SApresentation [M] - ProductPresentation -# 12 SAspecimensState [M] - SpecimensState -# 13 SAcatchCategory [M] - CatchCategory -# 14 SAlandingCategory [O] - LandingCategory -# 15 SAcommSizeCatScale [O] - CommercialSizeCategoryScale -# 16 SAcommSizeCat [O] - CommercialSizeCategory -# 17 SAsex [M] - SEXCO -# 18 SAexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 19 SAarea [O] - ICES_Area -# 20 SArectangle [O] - StatRec -# 21 SAfisheriesManagementUnit [O] - AreaNonFAO -# 22 SAgsaSubarea [M] - Areas_GFCM_GSA -# 23 SAjurisdictionArea [O] - JurisdictionArea -# 24 SAnationalFishingActivity [O] - NationalFishingActivity -# 25 SAmetier5 [O] - Metier5_FishingActivity -# 26 SAmetier6 [O] - Metier6_FishingActivity -# 27 SAgear [O] - GearType -# 28 SAmeshSize [O] - int -# 29 SAselectionDevice [O] - SelectionDevice -# 30 SAselectionDeviceMeshSize [O] - int -# 31 SAunitType [M] - SamplingUnit -# 32 SAtotalWeightLive [O] - int -# 33 SAsampleWeightLive [O] - int -# 34 SAnumberTotal [DV,O] - Decimal0.1-999999999 -# 35 SAnumberSampled [DV,O] - Decimal0.1-999999999 -# 36 SAselectionProb [DV,O] - Decimal0-1 -# 37 SAinclusionProb [DV,O] - Decimal0-1 -# 38 SAselectionMethod [DV,M] - SelectionMethod -# 39 SAunitName [DV,M] - StringLength100 -# 40 SAlowerHierarchy [M/O] - LowerHierarchy -# 41 SAsampler [O] - Sampler -# 42 SAsampled [DV,M] - YesNoFields -# 43 SAreasonNotSampled [DV,O] - ReasonForNotSampling -# 44 SAnonResponseCollected [DV,O] - YesNoFields -# 45 SAreasonNotSampledFM [DV,O] - ReasonForNotSampling -# 46 SAreasonNotSampledBV [DV,O] - ReasonForNotSampling -# 47 SAtotalWeightMeasured [O] - int -# 48 SAsampleWeightMeasured [O] - int -# 49 SAconversionFactorMeasLive [O] - Decimal0.900-10 -# 50 SAauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 51 SAauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 52 SAauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 53 SAauxiliaryVariableUnit [DV,O] - MUNIT - - SA_df<-data.frame( - SAid = 1:nrow(dataset), - SSid = dataset$VSid, - SArecordType = "SA", #M - SAsequenceNumber = dataset$VSid, #M - SAparentSequenceNumber = "", - SAstratification = "N", #M - SAstratumName = "U", #M - SAspeciesCode = 107254, #M - SAspeciesCodeFAO = "NEP", - SAstateOfProcessing = "FRE", #M - SApresentation = "WHL", #M - SAspecimensState = "DeadOrZeroProbSurvival", #M - SAcatchCategory = "Lan", #M - SAlandingCategory = "", - SAcommSizeCatScale = "", - SAcommSizeCat = "", - SAsex = "U", #M - SAexclusiveEconomicZoneIndicator = "", - SAarea = "", - SArectangle = "", - SAfisheriesManagementUnit = "", - SAgsaSubarea = "NotApplicable", #M - SAjurisdictionArea = "", - SAnationalFishingActivity = "", - SAmetier5 = "", - SAmetier6 = "", - SAgear = "", - SAmeshSize = "", - SAselectionDevice = "", - SAselectionDeviceMeshSize = "", - SAunitType = "Box", #M - SAtotalWeightLive = "", - SAsampleWeightLive = "", - SAnumberTotal = 1, - SAnumberSampled = 1, - SAselectionProb = 1, - SAinclusionProb = 1, - SAselectionMethod = "CENSUS", #M - SAunitName = dataset$VSid, #M - SAlowerHierarchy = "D", - SAsampler = "Observer", - SAsampled = ifelse(!is.na(dataset[[target_var]]),"Y","N"), #M - SAreasonNotSampled = "", - SAnonResponseCollected = "Y", - SAreasonNotSampledFM = "", - SAreasonNotSampledBV = "", - SAtotalWeightMeasured = ifelse(!is.na(dataset[[target_var]]),dataset[[target_var]],""), # - SAsampleWeightMeasured = ifelse(!is.na(dataset[[target_var]]),dataset[[target_var]],""), # - SAconversionFactorMeasLive = 1, - SAauxiliaryVariableTotal = "", - SAauxiliaryVariableValue = "", - SAauxiliaryVariableName = "", - SAauxiliaryVariableUnit = "", - stringsAsFactors=FALSE +SAid = 1:nrow(dataset), +SSid = dataset$VSid, +SArecordType = "SA", #M +SAsequenceNumber = dataset$VSid, #M +SAparentSequenceNumber = "", +SAstratification = "N", #M +SAstratumName = "U", #M +SAspeciesCode = 107254, #M +SAspeciesCodeFAO = "NEP", +SAstateOfProcessing = "FRE", #M +SApresentation = "WHL", #M +SAspecimensState = "DeadOrZeroProbSurvival", #M +SAcatchCategory = "Lan", #M +SAlandingCategory = "", +SAcommSizeCatScale = "", +SAcommSizeCat = "", +SAsex = "U", #M +SAexclusiveEconomicZoneIndicator = "", +SAarea = "", +SArectangle = "", +SAfisheriesManagementUnit = "", +SAgsaSubarea = "NotApplicable", #M +SAjurisdictionArea = "", +SAgeographicalDataBasis = "Measured", +SAgeographicalSource = "", +SAnationalFishingActivity = "", +SAmetier5 = "", +SAmetier6 = "", +SAgear = "", +SAgearDataBasis = "Measured", +SAgearSource = "", +SAmeshSize = "", +SAselectionDevice = "", +SAselectionDeviceMeshSize = "", +SAunitType = "Box", #M +SAtotalWeightLive = "", +SAsampleWeightLive = "", +SAnumberTotal = 1, +SAnumberSampled = 1, +SAselectionProb = 1, +SAinclusionProb = 1, +SAselectionMethod = "CENSUS", #M +SAunitName = dataset$VSid, #M +SAlowerHierarchy = "D", +SAsampler = "Observer", +SAsampled = ifelse(!is.na(dataset[[target_var]]),"Y","N"), #M +SAreasonNotSampled = "", +SAnonResponseCollected = "Y", +SAreasonNotSampledFM = "", +SAreasonNotSampledBV = "", +SAtotalWeightMeasured = ifelse(!is.na(dataset[[target_var]]),dataset[[target_var]],""), # +SAtotalWeightMeasuredDataBasis = "Measured", +SAsampleWeightMeasured = ifelse(!is.na(dataset[[target_var]]),dataset[[target_var]],""), # +SAconversionFactorMeasLive = 1, +SAauxiliaryVariableTotal = "", +SAauxiliaryVariableValue = "", +SAauxiliaryVariableName = "", +SAauxiliaryVariableUnit = "", +stringsAsFactors=FALSE ) @@ -698,16 +455,14 @@ RDBESlist[[i]][which(grepl(colnames(RDBESlist[[i]]),pat="[A-Z]id"))]<-NULL #===Save============ - dir_outputs<-paste0(base_dir_outputs,"/", - project_name_outputs,"/") - dir.create(dir_outputs, recursive=T, showWarnings=FALSE) - filename_output_CS <- paste0("H1.csv") - filename_output_SL <- paste0("HSL.csv") - filename_output_VD <- paste0("HVD.csv") - + dir_outputs<-paste0(base_dir_outputs,"/", project_name_outputs,"/") + dir.create(dir_outputs, recursive=T, showWarnings=FALSE) + filename_output_CS <- paste0(dir_outputs,"H1.csv") + filename_output_SL <- paste0(dir_outputs,"HSL.csv") + filename_output_VD <- paste0(dir_outputs,"HVD.csv") -lapply(RDBESlist, function(x, filename1 = paste0(dir_outputs,filename_output_CS)){ +lapply(RDBESlist, function(x, filename1 = filename_output_CS){ if("DErecordType" %in% colnames(x)){ write.table(x, file = filename1, append = FALSE, quote = FALSE, sep = ",", eol = "\n", na = "NA", dec = ".", row.names = FALSE, @@ -719,7 +474,7 @@ write.table(x, file = filename1, append = TRUE, quote = FALSE, sep = ",", } }) -b<-read.table(file=paste0(dir_outputs,filename_output_CS), header=F, sep=";") +b<-read.table(file = filename_output_CS, header=F, sep=";") b<-cbind(key,b) b<-b[order(as.character(b$key), decreasing=FALSE),] b<-b[!is.na(key),] @@ -727,27 +482,26 @@ b$key<-NULL b$V1<-as.character(b$V1) # saves CS output -write.table(b$V1, file=paste0(dir_outputs,filename_output_CS), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +write.table(b$V1, file = filename_output_CS, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") -# -----Builds and saves dummySL----------------- +# -----Builds and saves dummySL and dummyIS----------------- SL_base$SLspeciesListName<-project_name_outputs SL_base$SLyear<-DEyear SL_base$SLinstitute<-SDinstitution -# saves SL output - write.table(SL_base, file=paste0(dir_outputs,filename_output_SL), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - + + # saves SL output + write.table(rbind(paste(SL_base, collapse=","), paste(IS_base, collapse=",")), file=filename_output_SL, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") # -----Builds and saves dummyVD----------------- - # saves VD output VD_base$VDyear<-DEyear - write.table(VD_base, file=paste0(dir_outputs,filename_output_VD), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - + write.table(VD_base, file=filename_output_VD, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +if(1==2){ # -----Clean SL after dowload----------------- # cleans excess of SL rows frequently present in download @@ -765,5 +519,5 @@ write.csv(tmp, file=paste0(dir_outputs,"tmp/SpeciesList.csv"), quote=F, row.name library(zip) zip::zip(zipfile = paste0(project_name_outputs,".zip"), files =paste0(dir_outputs,"tmp/",dir(paste0(dir_outputs,"tmp"))), mode="cherry-pick") unlink(paste0(dir_outputs,"tmp"), force=T, recursive = T) - +} diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apistrat_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apistrat_H1.R index 55f0acdf..b373a4a1 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apistrat_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apistrat_H1.R @@ -20,14 +20,14 @@ DEstratumName <- "Pckg_survey_apistrat_H1" project_name_outputs <- gsub(" ","_", paste0(DEsamplingScheme,"_", DEstratumName)) baseDir <- "./data-raw/exampleData/TextBookExamples/" - #baseDir <- "" + baseDir <- "" VD_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/VD_base.rds")) SL_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/SL_base.rds")) IS_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/IS_base.rds")) #nameof the directory where the outputs are saved currently base_dir_outputs <- paste0(baseDir,"BuiltUploads") - if(!file.exists(base_dir_outputs)) dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) + dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) #========Outline of Hierarchy 1================ @@ -47,22 +47,6 @@ -# 1 DEid [] - int -# 2 DErecordType [M] - string -# 3 DEsamplingScheme [M] - SamplingScheme -# 4 DEsamplingSchemeType [M] - SamplingSchemeType -# 5 DEyear [M] - Year -# 6 DEstratumName [M] - StringLength100 -# 7 DEhierarchyCorrect [M] - YesNoFields -# 8 DEhierarchy [M] - RDBESUpperHierarchy -# 9 DEsampled [DV,M] - YesNoFields -# 10 DEreasonNotSampled [DV,O] - ReasonForNotSampling -# 11 DEnonResponseCollected [DV,O] - YesNoFields -# 12 DEauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 13 DEauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 14 DEauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 15 DEauxiliaryVariableUnit[DV,O]-MUNIT - DE_df_base<-expand.grid(DEyear=DEyear, DEstratumName="U",stringsAsFactors=F) @@ -88,13 +72,6 @@ DE_df<-data.frame( #===SD============ - # x -# 1 SDid [M] - int -# 2 DEid [M] - int -# 3 SDrecordType [M] - string -# 4 SDcountry [M] - ISO_3166 -# 5 SDinstitution [M] - EDMO - SD_df<-data.frame( SDid=DE_df$DEid, DEid=DE_df$DEid, @@ -106,41 +83,6 @@ SD_df<-data.frame( #===VS============ - - # x -# 1 VSid [M] - int -# 2 SDid [M] - int -# 3 VDid [M] - int -# 4 TEid [M/O] - int -# 5 VSrecordType [M] - string -# 6 VSsequenceNumber [M] - int -# 7 VSencryptedVesselCode [M] - StringLength100 -# 8 VSstratification [DV,M] - YesNoFields -# 9 VSstratumName [DV,M] - StringLength100 -# 10 VSclustering [DV,M] - Clustering -# 11 VSclusterName [DV,M] - StringLength100 -# 12 VSsampler [O] - Sampler -# 13 VSnumberTotal [DV,O] - int -# 14 VSnumberSampled [DV,O] - int -# 15 VSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 16 VSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 17 VSselectionMethod [DV,M] - SelectionMethod -# 18 VSunitName [DV,M] - StringLength100 -# 19 VSselectionMethodCluster [DV,O] - SelectionMethod -# 20 VSnumberTotalClusters [DV,O] - int -# 21 VSnumberSampledClusters [DV,O] - int -# 22 VSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 23 VSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 24 VSsampled [DV,M] - YesNoFields -# 25 VSreasonNotSampled [DV,O] - ReasonForNotSampling -# 26 VSnonResponseCollected [DV,O] - YesNoFields -# 27 VSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 28 VSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 29 VSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 30 VSauxiliaryVariableUnit [DV,O] - MUNIT - - - # adds VSid to dataset dataset$VSid <- 1:nrow(dataset) @@ -195,164 +137,56 @@ VS_df$VSnumberTotal<-strataSize[VS_df$VSstratumName] #====FT=========== - -# 1 FTid [M] - int -# 2 OSid [M/O] - int -# 3 VSid [M/O] - int -# 4 VDid [M] - int -# 5 SDid [M/O] - int -# 6 FOid [M/O] - int -# 7 TEid [M/O] - int -# 8 FTrecordType [M] - string -# 9 FTencryptedVesselCode [M] - StringLength100 -# 10 FTsequenceNumber [M] - int -# 11 FTstratification [DV,M] - YesNoFields -# 12 FTstratumName [DV,M] - StringLength100 -# 13 FTclustering [DV,M] - Clustering -# 14 FTclusterName [DV,M] - StringLength100 -# 15 FTsampler [O] - Sampler -# 16 FTsamplingType [M] - SamplingContext -# 17 FTnumberOfHaulsOrSets [M/O] - IntZeroToMax -# 18 FTdepartureLocation [M/O] - Harbour_LOCODE -# 19 FTdepartureDate [M/O] - Date -# 20 FTdepartureTime [M/O] - StringLength60 -# 21 FTarrivalLocation [M] - Harbour_LOCODE -# 22 FTarrivalDate [M] - Date -# 23 FTarrivalTime [M/O] - StringLength60 -# 24 FTnumberTotal [DV,O] - int -# 25 FTnumberSampled [DV,O] - int -# 26 FTselectionProb [DV,O] - Decimal0-1 -# 27 FTinclusionProb [DV,O] - Decimal0-1 -# 28 FTselectionMethod [DV,M] - SelectionMethod -# 29 FTunitName [DV,M] - StringLength100 -# 30 FTselectionMethodCluster [DV,O] - SelectionMethod -# 31 FTnumberTotalClusters [DV,O] - int -# 32 FTnumberSampledClusters [DV,O] - int -# 33 FTselectionProbCluster [DV,O] - Decimal0-1 -# 34 FTinclusionProbCluster [DV,O] - Decimal0-1 -# 35 FTsampled [DV,M] - YesNoFields -# 36 FTreasonNotSampled [DV,O] - ReasonForNotSampling -# 37 FTnonResponseCollected [DV,O] - YesNoFields -# 38 FTauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 39 FTauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 40 FTauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 41 FTauxiliaryVariableUnit [DV,O] - MUNIT - FT_df <- data.frame( - FTid = dataset$VSid,#[M] - int - OSid = "",# [M/O] - int - VSid = dataset$VSid, #[M/O] - VDid = "", #[M] - int - SDid = "", #[M/O] - int - FOid = "", #[M/O] - int - TEid = "", #[M/O] - int - FTrecordType='FT', #[M] - string - FTencryptedVesselCode = dataset$VSencryptedVesselCode, #[M] - FTsequenceNumber = as.integer(1:nrow(dataset)), #[M] - string - FTstratification = "N", #[DV,M] - RS_Stratification - FTstratumName = "U", #[DV,M] - string - FTclustering = "N", #[DV,M] - RS_Clustering - FTclusterName = "U", #[DV,M] - string - FTsampler = "Observer", #[M] - RS_Sampler - FTsamplingType = "AtSea" , #[M] - RS_SamplingType - FTnumberOfHaulsOrSets = 1, #[O] - int - FTdepartureLocation="ZWHWN", #[O] - Harbour_LOCODE - FTdepartureDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M/O] - date - FTdepartureTime="", #[O] - time - FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE - FTarrivalDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date - FTarrivalTime="", #[O] - time - FTnumberTotal= 1, #[DV,O] - int - FTnumberSampled=1, #[DV,O] - int - FTselectionProb=1, #[DV,O] - DecimalPrec10 - FTinclusionProb=1, #[DV,O] - DecimalPrec10 - FTselectionMethod="CENSUS", #[DV,M] - RS_SelectionMethod - FTunitName = dataset$VSid, #[DV,M] - string - FTselectionMethodCluster="", #[DV,O] - RS_SelectionMethod - FTnumberTotalClusters="", #[DV,O] - int - FTnumberSampledClusters="", #[DV,O] - int - FTselectionProbCluster="", #[DV,O] - DecimalPrec10 - FTinclusionProbCluster="", #[DV,O] - DecimalPrec10 - FTsampled="Y", #[DV,M] - YesNoFields - FTreasonNotSampled= "", #[DV,O] - RS_ReasonForNotSampling - FTnonResponseCollected = "N", - FTauxiliaryVariableTotal = "", - FTauxiliaryVariableValue = "", - FTauxiliaryVariableName = "", - FTauxiliaryVariableUnit = "", - stringsAsFactors=FALSE +FTid = dataset$VSid,#[M] - int +OSid = "",# [M/O] - int +VSid = dataset$VSid, #[M/O] +VDid = "", #[M] - int +SDid = "", #[M/O] - int +FOid = "", #[M/O] - int +TEid = "", #[M/O] - int +FTrecordType='FT', #[M] - string +FTencryptedVesselCode = dataset$VSencryptedVesselCode, #[M] +FTsequenceNumber = as.integer(1:nrow(dataset)), #[M] - string +FTstratification = "N", #[DV,M] - RS_Stratification +FTstratumName = "U", #[DV,M] - string +FTclustering = "N", #[DV,M] - RS_Clustering +FTclusterName = "U", #[DV,M] - string +FTsampler = "Observer", #[M] - RS_Sampler +FTsamplingType = "AtSea" , #[M] - RS_SamplingType +FTnumberOfHaulsOrSets = 1, #[O] - int +FTdepartureLocation="ZWHWN", #[O] - Harbour_LOCODE +FTdepartureDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M/O] - date +FTdepartureTime="", #[O] - time +FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE +FTarrivalDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date +FTarrivalTime="", #[O] - time +FTdominantLandingDate= "", +FTnumberTotal= 1, #[DV,O] - int +FTnumberSampled=1, #[DV,O] - int +FTselectionProb=1, #[DV,O] - DecimalPrec10 +FTinclusionProb=1, #[DV,O] - DecimalPrec10 +FTselectionMethod="CENSUS", #[DV,M] - RS_SelectionMethod +FTunitName = dataset$VSid, #[DV,M] - string +FTselectionMethodCluster="", #[DV,O] - RS_SelectionMethod +FTnumberTotalClusters="", #[DV,O] - int +FTnumberSampledClusters="", #[DV,O] - int +FTselectionProbCluster="", #[DV,O] - DecimalPrec10 +FTinclusionProbCluster="", #[DV,O] - DecimalPrec10 +FTsampled="Y", #[DV,M] - YesNoFields +FTreasonNotSampled= "", #[DV,O] - RS_ReasonForNotSampling +FTnonResponseCollected = "N", +FTauxiliaryVariableTotal = "", +FTauxiliaryVariableValue = "", +FTauxiliaryVariableName = "", +FTauxiliaryVariableUnit = "", +stringsAsFactors=FALSE ) #====FO=========== -# 1 FOid [M] - int -# 2 FTid [M/O] - int -# 3 SDid [M/O] - int -# 4 FOrecordType [M] - string -# 5 FOstratification [DV,M] - YesNoFields -# 6 FOsequenceNumber [M] - int -# 7 FOstratumName [DV,M] - StringLength100 -# 8 FOclustering [DV,M] - Clustering -# 9 FOclusterName [DV,M] - StringLength100 -# 10 FOsampler [O] - Sampler -# 11 FOaggregationLevel [M] - AggregationLevel -# 12 FOvalidity [M] - ValidityFlag -# 13 FOcatchReg [M] - CatchRegistration -# 14 FOstartDate [O] - Date -# 15 FOstartTime [O] - Time -# 16 FOendDate [M] - Date -# 17 FOendTime [M/O] - Time -# 18 FOduration [M/O] - int -# 19 FOdurationSource [M] - DurationSource -# 20 FOhandlingTime [O] - int -# 21 FOstartLat [O] - Decimal-90.000000-90.000000 -# 22 FOstartLon [O] - Decimal-180.000000-180.000000 -# 23 FOstopLat [O] - Decimal-90.000000-90.000000 -# 24 FOstopLon [O] - Decimal-180.000000-180.000000 -# 25 FOexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 26 FOarea [M] - ICES_Area -# 27 FOrectangle [O] - StatRec -# 28 FOfisheriesManagementUnit [O] - AreaNonFAO -# 29 FOgsaSubarea [M] - Areas_GFCM_GSA -# 30 FOjurisdictionArea [O] - JurisdictionArea -# 31 FOfishingDepth [O] - int -# 32 FOwaterDepth [O] - int -# 33 FOnationalFishingActivity [O] - NationalFishingActivity -# 34 FOmetier5 [O] - Metier5_FishingActivity -# 35 FOmetier6 [M] - Metier6_FishingActivity -# 36 FOgear [M] - GearType -# 37 FOmeshSize [O] - int -# 38 FOselectionDevice [O] - SelectionDevice -# 39 FOselectionDeviceMeshSize [O] - int -# 40 FOtargetSpecies [O] - TargetSpecies -# 41 FOincidentalByCatchMitigationDeviceFirst [M] - BycatchMitigationDevice -# 42 FOincidentalByCatchMitigationDeviceTargetFirst [M] - BycatchMitigationDeviceTarget -# 43 FOincidentalByCatchMitigationDeviceSecond [M] - BycatchMitigationDevice -# 44 FOincidentalByCatchMitigationDeviceTargetSecond [M] - BycatchMitigationDeviceTarget -# 45 FOgearDimensions [O] - int -# 46 FOobservationCode [M] - ObservationCode -# 47 FOnumberTotal [DV,O] - int -# 48 FOnumberSampled [DV,O] - int -# 49 FOselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 50 FOinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 51 FOselectionMethod [DV,M] - SelectionMethod -# 52 FOunitName [DV,M] - StringLength100 -# 53 FOselectionMethodCluster [DV,O] - SelectionMethod -# 54 FOnumberTotalClusters [DV,O] - int -# 55 FOnumberSampledClusters [DV,O] - int -# 56 FOselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 57 FOinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 58 FOsampled [DV,M] - YesNoFields -# 59 FOreasonNotSampled [DV,O] - ReasonForNotSampling -# 60 FOnonResponseCollected [DV,O] - YesNoFields -# 61 FOauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 62 FOauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 63 FOauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 64 FOauxiliaryVariableUnit [DV,O] - MUNIT - - FO_df <- data.frame( FOid = dataset$VSid, FTid = dataset$VSid, @@ -372,24 +206,29 @@ FO_df <- data.frame( FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M FOendTime ="",#M FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! + FOfishingDurationDataBasis = "Measured", FOdurationSource = "Crew", FOhandlingTime = "", FOstartLat="", # ATT! FOstartLon="", # ATT! FOstopLat="", FOstopLon="", - FOexclusiveEconomicZoneIndicator = "", # might differ!! + FOexclusiveEconomicZoneIndicator = "", # FOarea = "27.3.a.21", #M FOrectangle = "", FOfisheriesManagementUnit = "", FOgsaSubarea = "NotApplicable", #M FOjurisdictionArea = "", + FOgeographicalDataBasis = "Measured", + FOgeographicalSource = "Crew", FOfishingDepth = "", FOwaterDepth = "", FOnationalFishingActivity = "", FOmetier5 = "", FOmetier6 = "OTT_CRU_70-89_2_35",#M FOgear = "OTT",#M + FOgearDataBasis = "Measured", + FOgearSource = "", FOmeshSize = "", FOselectionDevice = "", FOselectionDeviceMeshSize = "", @@ -424,45 +263,6 @@ stringsAsFactors=FALSE #===SS============ -# 1 SSid [] - int -# 2 LEid [M/O] - int -# 3 FOid [M/O] - int -# 4 FTid [M/O] - int -# 5 OSid [M/O] - int -# 6 TEid [M/O] - int -# 7 SLid [M] - int -# 8 SSrecordType [M] - string -# 9 SSsequenceNumber [M] - int -# 10 SSstratification [DV,M] - YesNoFields -# 11 SSstratumName [DV,M] - StringLength100 -# 12 SSclustering [DV,M] - Clustering -# 13 SSclusterName [DV,M] - StringLength100 -# 14 SSobservationActivityType [DV,M] - ObservationActivityType -# 15 SScatchFraction [M] - CatchFraction -# 16 SSobservationType [DV,M] - ObservationMethod -# 17 SSsampler [O] - Sampler -# 18 SSspeciesListName [M] - StringLength100 -# 19 SSuseForCalculateZero [M] - YesNoFields -# 20 SStimeTotal [O] - int -# 21 SStimeSampled [O] - int -# 22 SSnumberTotal [DV,O] - int -# 23 SSnumberSampled [DV,O] - int -# 24 SSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 25 SSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 26 SSselectionMethod [DV,M] - SelectionMethod -# 27 SSunitName [DV,M] - StringLength100 -# 28 SSselectionMethodCluster [DV,O] - SelectionMethod -# 29 SSnumberTotalClusters [DV,O] - int -# 30 SSnumberSampledClusters [DV,O] - int -# 31 SSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 32 SSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 33 SSsampled [DV,M] - YesNoFields -# 34 SSreasonNotSampled [DV,O] - ReasonForNotSampling -# 35 SSnonResponseCollected [DV,O] - YesNoFields -# 36 SSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 37 SSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 38 SSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 39 SSauxiliaryVariableUnit [DV,O] - MUNIT SS_df<-data.frame( SSid = dataset$VSid, @@ -485,8 +285,10 @@ SS_df<-data.frame( SSspeciesListName = project_name_outputs, #M SSuseForCalculateZero = "N", #M SStimeTotal = "", + SStimeTotalDataBasis = "Measured", SStimeSampled = "", SSnumberTotal = 1, + SSnumberTotalDataBasis = "Measured", SSnumberSampled = 1, SSselectionProb = 1, SSinclusionProb = 1, @@ -509,117 +311,66 @@ SS_df<-data.frame( #====SA=========== - -# 1 SAid [] - int -# 2 SSid [] - int -# 3 SArecordType [M] - string -# 4 SAsequenceNumber [M] - int -# 5 SAparentSequenceNumber [O] - int -# 6 SAstratification [DV,M] - YesNoFields -# 7 SAstratumName [DV,M] - StringLength100 -# 8 SAspeciesCode [M] - SpecWoRMS -# 9 SAspeciesCodeFAO [O] - SpecASFIS -# 10 SAstateOfProcessing [M] - StateOfProcessing -# 11 SApresentation [M] - ProductPresentation -# 12 SAspecimensState [M] - SpecimensState -# 13 SAcatchCategory [M] - CatchCategory -# 14 SAlandingCategory [O] - LandingCategory -# 15 SAcommSizeCatScale [O] - CommercialSizeCategoryScale -# 16 SAcommSizeCat [O] - CommercialSizeCategory -# 17 SAsex [M] - SEXCO -# 18 SAexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 19 SAarea [O] - ICES_Area -# 20 SArectangle [O] - StatRec -# 21 SAfisheriesManagementUnit [O] - AreaNonFAO -# 22 SAgsaSubarea [M] - Areas_GFCM_GSA -# 23 SAjurisdictionArea [O] - JurisdictionArea -# 24 SAnationalFishingActivity [O] - NationalFishingActivity -# 25 SAmetier5 [O] - Metier5_FishingActivity -# 26 SAmetier6 [O] - Metier6_FishingActivity -# 27 SAgear [O] - GearType -# 28 SAmeshSize [O] - int -# 29 SAselectionDevice [O] - SelectionDevice -# 30 SAselectionDeviceMeshSize [O] - int -# 31 SAunitType [M] - SamplingUnit -# 32 SAtotalWeightLive [O] - int -# 33 SAsampleWeightLive [O] - int -# 34 SAnumberTotal [DV,O] - Decimal0.1-999999999 -# 35 SAnumberSampled [DV,O] - Decimal0.1-999999999 -# 36 SAselectionProb [DV,O] - Decimal0-1 -# 37 SAinclusionProb [DV,O] - Decimal0-1 -# 38 SAselectionMethod [DV,M] - SelectionMethod -# 39 SAunitName [DV,M] - StringLength100 -# 40 SAlowerHierarchy [M/O] - LowerHierarchy -# 41 SAsampler [O] - Sampler -# 42 SAsampled [DV,M] - YesNoFields -# 43 SAreasonNotSampled [DV,O] - ReasonForNotSampling -# 44 SAnonResponseCollected [DV,O] - YesNoFields -# 45 SAreasonNotSampledFM [DV,O] - ReasonForNotSampling -# 46 SAreasonNotSampledBV [DV,O] - ReasonForNotSampling -# 47 SAtotalWeightMeasured [O] - int -# 48 SAsampleWeightMeasured [O] - int -# 49 SAconversionFactorMeasLive [O] - Decimal0.900-10 -# 50 SAauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 51 SAauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 52 SAauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 53 SAauxiliaryVariableUnit [DV,O] - MUNIT - - SA_df<-data.frame( - SAid = dataset$VSid, - SSid = dataset$VSid, - SArecordType = "SA", #M - SAsequenceNumber = dataset$VSid, #M - SAparentSequenceNumber = "", - SAstratification = "N", #M - SAstratumName = "U", #M - SAspeciesCode = 107254, #M - SAspeciesCodeFAO = "NEP", - SAstateOfProcessing = "FRE", #M - SApresentation = "WHL", #M - SAspecimensState = "DeadOrZeroProbSurvival", #M - SAcatchCategory = "Lan", #M - SAlandingCategory = "", - SAcommSizeCatScale = "", - SAcommSizeCat = "", - SAsex = "U", #M - SAexclusiveEconomicZoneIndicator = "", - SAarea = "", - SArectangle = "", - SAfisheriesManagementUnit = "", - SAgsaSubarea = "NotApplicable", #M - SAjurisdictionArea = "", - SAnationalFishingActivity = "", - SAmetier5 = "", - SAmetier6 = "", - SAgear = "", - SAmeshSize = "", - SAselectionDevice = "", - SAselectionDeviceMeshSize = "", - SAunitType = "Box", #M - SAtotalWeightLive = "", - SAsampleWeightLive = "", - SAnumberTotal = 1, - SAnumberSampled = 1, - SAselectionProb = 1, - SAinclusionProb = 1, - SAselectionMethod = "CENSUS", #M - SAunitName = dataset$VSid, #M - SAlowerHierarchy = "D", - SAsampler = "Observer", - SAsampled = "N", #M - SAreasonNotSampled = "", - SAnonResponseCollected = "N", - SAreasonNotSampledFM = "", - SAreasonNotSampledBV = "", - SAtotalWeightMeasured = dataset[[target_var]], - SAsampleWeightMeasured = dataset[[target_var]], - SAconversionFactorMeasLive = 1, - SAauxiliaryVariableTotal = "", - SAauxiliaryVariableValue = "", - SAauxiliaryVariableName = "", - SAauxiliaryVariableUnit = "", - stringsAsFactors=FALSE +SAid = dataset$VSid, +SSid = dataset$VSid, +SArecordType = "SA", #M +SAsequenceNumber = dataset$VSid, #M +SAparentSequenceNumber = "", +SAstratification = "N", #M +SAstratumName = "U", #M +SAspeciesCode = 107254, #M +SAspeciesCodeFAO = "NEP", +SAstateOfProcessing = "FRE", #M +SApresentation = "WHL", #M +SAspecimensState = "DeadOrZeroProbSurvival", #M +SAcatchCategory = "Lan", #M +SAlandingCategory = "", +SAcommSizeCatScale = "", +SAcommSizeCat = "", +SAsex = "U", #M +SAexclusiveEconomicZoneIndicator = "", +SAarea = "", +SArectangle = "", +SAfisheriesManagementUnit = "", +SAgsaSubarea = "NotApplicable", #M +SAjurisdictionArea = "", +SAgeographicalDataBasis = "Measured", +SAgeographicalSource = "", +SAnationalFishingActivity = "", +SAmetier5 = "", +SAmetier6 = "", +SAgear = "", +SAgearDataBasis = "Measured", +SAgearSource = "", +SAmeshSize = "", +SAselectionDevice = "", +SAselectionDeviceMeshSize = "", +SAunitType = "Box", #M +SAtotalWeightLive = "", +SAsampleWeightLive = "", +SAnumberTotal = 1, +SAnumberSampled = 1, +SAselectionProb = 1, +SAinclusionProb = 1, +SAselectionMethod = "CENSUS", #M +SAunitName = dataset$VSid, #M +SAlowerHierarchy = "D", +SAsampler = "Observer", +SAsampled = "N", #M +SAreasonNotSampled = "", +SAnonResponseCollected = "N", +SAreasonNotSampledFM = "", +SAreasonNotSampledBV = "", +SAtotalWeightMeasured = dataset[[target_var]], +SAtotalWeightMeasuredDataBasis = "Measured", +SAsampleWeightMeasured = dataset[[target_var]], +SAconversionFactorMeasLive = 1, +SAauxiliaryVariableTotal = "", +SAauxiliaryVariableValue = "", +SAauxiliaryVariableName = "", +SAauxiliaryVariableUnit = "", +stringsAsFactors=FALSE ) @@ -672,16 +423,14 @@ RDBESlist[[i]][which(grepl(colnames(RDBESlist[[i]]),pat="[A-Z]id"))]<-NULL #===Save============ - dir_outputs<-paste0(base_dir_outputs,"/", - project_name_outputs,"/") - dir.create(dir_outputs, recursive=T, showWarnings=FALSE) - filename_output_CS <- paste0("H1.csv") - filename_output_SL <- paste0("HSL.csv") - filename_output_VD <- paste0("HVD.csv") - + dir_outputs<-paste0(base_dir_outputs,"/", project_name_outputs,"/") + dir.create(dir_outputs, recursive=T, showWarnings=FALSE) + filename_output_CS <- paste0(dir_outputs,"H1.csv") + filename_output_SL <- paste0(dir_outputs,"HSL.csv") + filename_output_VD <- paste0(dir_outputs,"HVD.csv") -lapply(RDBESlist, function(x, filename1 = paste0(dir_outputs,filename_output_CS)){ +lapply(RDBESlist, function(x, filename1 = filename_output_CS){ if("DErecordType" %in% colnames(x)){ write.table(x, file = filename1, append = FALSE, quote = FALSE, sep = ",", eol = "\n", na = "NA", dec = ".", row.names = FALSE, @@ -693,7 +442,7 @@ write.table(x, file = filename1, append = TRUE, quote = FALSE, sep = ",", } }) -b<-read.table(file=paste0(dir_outputs,filename_output_CS), header=F, sep=";") +b<-read.table(file = filename_output_CS, header=F, sep=";") b<-cbind(key,b) b<-b[order(as.character(b$key), decreasing=FALSE),] b<-b[!is.na(key),] @@ -701,22 +450,21 @@ b$key<-NULL b$V1<-as.character(b$V1) # saves CS output -write.table(b$V1, file=paste0(dir_outputs,filename_output_CS), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +write.table(b$V1, file = filename_output_CS, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") -# -----Builds and saves dummySL----------------- +# -----Builds and saves dummySL and dummyIS----------------- SL_base$SLspeciesListName<-project_name_outputs SL_base$SLyear<-DEyear SL_base$SLinstitute<-SDinstitution -# saves SL output - write.table(SL_base, file=paste0(dir_outputs,filename_output_SL), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - + + # saves SL output + write.table(rbind(paste(SL_base, collapse=","), paste(IS_base, collapse=",")), file=filename_output_SL, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") # -----Builds and saves dummyVD----------------- - # saves VD output VD_base$VDyear<-DEyear - write.table(VD_base, file=paste0(dir_outputs,filename_output_VD), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + write.table(VD_base, file=filename_output_VD, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") diff --git a/data-raw/exampleData/TextBookExamples/Migrate_Pckg_SDAResources_agstrat_H1_to_new_format.R b/data-raw/exampleData/TextBookExamples/Migrate_Pckg_SDAResources_agstrat_H1_to_new_format.R deleted file mode 100644 index f559e4da..00000000 --- a/data-raw/exampleData/TextBookExamples/Migrate_Pckg_SDAResources_agstrat_H1_to_new_format.R +++ /dev/null @@ -1,50 +0,0 @@ -# 15/10/2025 -# Update "Pckg_SDAResources_agstrat_H1" to the new data format - -library(RDBEScore) -# Get aux data -load(file='./data/Pckg_survey_apiclus2_H1.rda') -# Get the data -load(file='./data/Pckg_SDAResources_agstrat_H1.rda') -validateRDBESDataObject(Pckg_SDAResources_agstrat_H1, strict = FALSE, verbose = TRUE) -newH1 <- Pckg_SDAResources_agstrat_H1 - -# Add an entry for IS and create the entries using SL -newH1$IS <- readRDS(paste0(baseDir,"aux_TextBookExamples/IS_base.rds")) -names(newH1$IS) <- c("ISrecordType","IScommTaxon","ISsppCode") -#head(newH1$IS) -newH1[["IS"]]$ISrecType <- "IS" -newH1[["IS"]]$SLid <- newH1[["SL"]]$SLid -newH1[["IS"]]$ISid <- 1001 -newH1$IS <- newH1$IS[,c("ISid", "SLid", "ISrecType", "IScommTaxon", "ISsppCode")] -data.table::setkey(newH1[["IS"]],ISid) -# Remove the unneccessary columns from SL -#newH1$SL <- newH1$SL[,c("SLid","SLrecType","SLcou","SLinst","SLspeclistName","SLyear","SLcatchFrac")] -# Add column to FT -#newH1[["FT"]]$FTdomLanDate <- NA -# Add columns to FO -#newH1[["FO"]]$FOfishDuraDatBas <- NA -#newH1[["FO"]]$FOgeoDatBas <- NA -#newH1[["FO"]]$FOgeoSou <- NA -#newH1[["FO"]]$FOgeaDatBas <- NA -#newH1[["FO"]]$FOgearSou <- NA -# Add columns to SS -#newH1[["SS"]]$SStimeTotalDatBas <- NA -#newH1[["SS"]]$SSnumTotalDatBas <- NA -# Add columns to SA -#newH1[["SA"]]$SAgeoDatBas <- NA -#newH1[["SA"]]$SAgeoSou <- NA -#newH1[["SA"]]$SAgeaDatBas <- NA -#newH1[["SA"]]$SAgearSou <- NA -#newH1[["SA"]]$SAtotWtMeaDatBas <- NA -# generate empty BV and FM -newH1[["FM"]]<-Pckg_survey_apiclus2_H1$FM -newH1[["BV"]]<-Pckg_survey_apiclus2_H1$BV - - -validateRDBESDataObject(newH1, strict = TRUE, verbose = TRUE) - -# Save the data -Pckg_SDAResources_agstrat_H1 <- newH1 -usethis::use_data(Pckg_SDAResources_agstrat_H1, overwrite = TRUE) - diff --git a/data-raw/exampleData/TextBookExamples/Migrate_Pckg_survey_apiclus2_H1_to_new_format.R b/data-raw/exampleData/TextBookExamples/Migrate_Pckg_survey_apiclus2_H1_to_new_format.R deleted file mode 100644 index c01f7262..00000000 --- a/data-raw/exampleData/TextBookExamples/Migrate_Pckg_survey_apiclus2_H1_to_new_format.R +++ /dev/null @@ -1,50 +0,0 @@ -# 12/2/2025 -# Update "Pckg_survey_apiclus2_H1" to the new data format - -library(RDBEScore) -# Get the data -load(file='./data/Pckg_survey_apiclus2_H1.rda') -validateRDBESDataObject(Pckg_survey_apiclus2_H1, strict = TRUE, verbose = TRUE) -newH1 <- Pckg_survey_apiclus2_H1 -validateRDBESDataObject(newH1, strict = TRUE, verbose = TRUE) - -# Add an entry for IS and create the entries using SL -newH1$IS <- newH1$SL -newH1$IS <- newH1$IS[,c("SLid","SLrecType","SLcommTaxon","SLsppCode")] -names(newH1$IS) <- c("ISid","ISrecType","IScommTaxon","ISsppCode" ) -#head(newH1$IS) -newH1[["IS"]]$ISrecType <- "IS" -newH1[["IS"]]$SLid <- 47891 -newH1[["IS"]]$ISid <- 1001 -newH1$IS <- newH1$IS[,c("ISid", "SLid", "ISrecType", "IScommTaxon", "ISsppCode")] -data.table::setkey(newH1[["IS"]],ISid) -# Remove the unneccessary columns from SL -newH1$SL <- newH1$SL[,c("SLid","SLrecType","SLcou","SLinst","SLspeclistName","SLyear","SLcatchFrac")] -# Add column to FT -newH1[["FT"]]$FTdomLanDate <- NA -# Add columns to FO -newH1[["FO"]]$FOfishDuraDatBas <- NA -newH1[["FO"]]$FOgeoDatBas <- NA -newH1[["FO"]]$FOgeoSou <- NA -newH1[["FO"]]$FOgeaDatBas <- NA -newH1[["FO"]]$FOgearSou <- NA -# Add columns to SS -newH1[["SS"]]$SStimeTotalDatBas <- NA -newH1[["SS"]]$SSnumTotalDatBas <- NA -# Add columns to SA -newH1[["SA"]]$SAgeoDatBas <- NA -newH1[["SA"]]$SAgeoSou <- NA -newH1[["SA"]]$SAgeaDatBas <- NA -newH1[["SA"]]$SAgearSou <- NA -newH1[["SA"]]$SAtotWtMeaDatBas <- NA -# Rename cols in BV -newH1[["BV"]]$BVspecType <- newH1[["BV"]]$BVmethod -newH1[["BV"]]$BVanalysisType <- newH1[["BV"]]$BVmeasEquip -newH1[["BV"]]$BVmethod <- NULL -newH1[["BV"]]$BVmeasEquip <- NULL -validateRDBESDataObject(newH1, strict = TRUE, verbose = TRUE) - -# Save the data -Pckg_survey_apiclus2_H1 <- newH1 -usethis::use_data(Pckg_survey_apiclus2_H1, overwrite = TRUE) - diff --git a/data-raw/exampleData/TextBookExamples/Migrate_Pckg_survey_apistrat_H1_to_new_format.R b/data-raw/exampleData/TextBookExamples/Migrate_Pckg_survey_apistrat_H1_to_new_format.R deleted file mode 100644 index db5669f3..00000000 --- a/data-raw/exampleData/TextBookExamples/Migrate_Pckg_survey_apistrat_H1_to_new_format.R +++ /dev/null @@ -1,50 +0,0 @@ -# 12/2/2025 -# Update "Pckg_survey_apistrat_H1" to the new data format - -library(RDBEScore) -# Get the data -load(file='./data/Pckg_survey_apistrat_H1.rda') -validateRDBESDataObject(Pckg_survey_apistrat_H1, strict = TRUE, verbose = TRUE) -newH1 <- Pckg_survey_apistrat_H1 -validateRDBESDataObject(newH1, strict = TRUE, verbose = TRUE) - -# Add an entry for IS and create the entries using SL -newH1$IS <- newH1$SL -newH1$IS <- newH1$IS[,c("SLid","SLrecType","SLcommTaxon","SLsppCode")] -names(newH1$IS) <- c("ISid","ISrecType","IScommTaxon","ISsppCode" ) -#head(newH1$IS) -newH1[["IS"]]$ISrecType <- "IS" -newH1[["IS"]]$SLid <- 47891 -newH1[["IS"]]$ISid <- 1001 -newH1$IS <- newH1$IS[,c("ISid", "SLid", "ISrecType", "IScommTaxon", "ISsppCode")] -data.table::setkey(newH1[["IS"]],ISid) -# Remove the unneccessary columns from SL -newH1$SL <- newH1$SL[,c("SLid","SLrecType","SLcou","SLinst","SLspeclistName","SLyear","SLcatchFrac")] -# Add column to FT -newH1[["FT"]]$FTdomLanDate <- NA -# Add columns to FO -newH1[["FO"]]$FOfishDuraDatBas <- NA -newH1[["FO"]]$FOgeoDatBas <- NA -newH1[["FO"]]$FOgeoSou <- NA -newH1[["FO"]]$FOgeaDatBas <- NA -newH1[["FO"]]$FOgearSou <- NA -# Add columns to SS -newH1[["SS"]]$SStimeTotalDatBas <- NA -newH1[["SS"]]$SSnumTotalDatBas <- NA -# Add columns to SA -newH1[["SA"]]$SAgeoDatBas <- NA -newH1[["SA"]]$SAgeoSou <- NA -newH1[["SA"]]$SAgeaDatBas <- NA -newH1[["SA"]]$SAgearSou <- NA -newH1[["SA"]]$SAtotWtMeaDatBas <- NA -# Rename cols in BV -newH1[["BV"]]$BVspecType <- newH1[["BV"]]$BVmethod -newH1[["BV"]]$BVanalysisType <- newH1[["BV"]]$BVmeasEquip -newH1[["BV"]]$BVmethod <- NULL -newH1[["BV"]]$BVmeasEquip <- NULL -validateRDBESDataObject(newH1, strict = TRUE, verbose = TRUE) - -# Save the data -Pckg_survey_apistrat_H1 <- newH1 -usethis::use_data(Pckg_survey_apistrat_H1, overwrite = TRUE) - From b1a1e428ef5cf3f6944c214fae70e3e4de81c40e Mon Sep 17 00:00:00 2001 From: Richard Meitern Date: Thu, 16 Oct 2025 18:47:39 +0300 Subject: [PATCH 23/30] lowerTbl data keeps merge IDs --- R/getLowerTableSubsets.R | 2 +- R/lowerTblData.R | 46 ++++++++++++++++++++++++++---- man/lowerTblData.Rd | 2 +- tests/testthat/test-lowerTblData.R | 8 +++--- 4 files changed, 47 insertions(+), 11 deletions(-) diff --git a/R/getLowerTableSubsets.R b/R/getLowerTableSubsets.R index 830bfcae..fe169005 100644 --- a/R/getLowerTableSubsets.R +++ b/R/getLowerTableSubsets.R @@ -47,7 +47,7 @@ getLowerTableSubsets <- function(subsets, tblName, rdbesTables, combineStrata = } # Bind the data together and filter based on intersected IDs - res <- data.table::rbindlist(res) + res <- data.table::rbindlist(res, fill = TRUE) res <- res[get(paste0(tblName, "id")) %in% ids] res <- unique(res, by = paste0(tblName, "id")) diff --git a/R/lowerTblData.R b/R/lowerTblData.R index 12fa7264..98469a08 100644 --- a/R/lowerTblData.R +++ b/R/lowerTblData.R @@ -28,8 +28,7 @@ #' #' RDBEScore:::lowerTblData("TEid", c(4), tblsSprat, "LE", TRUE) #' @keywords internal -lowerTblData <- function(field, values, tbls, level, verbose = FALSE) { - #check if tables are of correct type +lowerTblData <- function(field, values, tbls, level, verbose = FALSE, path_order = NULL) { if(!is.list(tbls)) stop("tbls must be a list") start <- substr(field, start = 1, stop = 2) @@ -39,10 +38,9 @@ lowerTblData <- function(field, values, tbls, level, verbose = FALSE) { } currTbl <- which(start == names(tbls)) - #this assumes that the tables are in the correct order and no empty tables + # assumes tables are in correct order and no empty tables tc <- 1 nextTbl <- tbls[[currTbl + tc]] - #skip tables that are NULL while (is.null(nextTbl)) { tc <- tc + 1 if(currTbl + tc > length(tbls)) stop("No more lower tables found") @@ -59,6 +57,44 @@ lowerTblData <- function(field, values, tbls, level, verbose = FALSE) { values <- tbl[get(field) %in% values, get(curTblId)] field <- curTblId } + + # Initialize and update path order to ensure the original search field stays first + if (is.null(path_order)) path_order <- field + path_order <- unique(c(path_order, nextTblField)) + + # values in next table for recursion nextTblValues <- nextTbl[get(field) %in% values, get(nextTblField)] - lowerTblData(nextTblField, nextTblValues, tbls, level, verbose) + + # NEW: build link (current id -> next id) so we can keep intermediate IDs + linkDT <- unique(nextTbl[get(field) %in% values, + .(from = get(field), to = get(nextTblField))]) + data.table::setnames(linkDT, c("from","to"), c(field, nextTblField)) + + # recurse + res <- lowerTblData(nextTblField, nextTblValues, tbls, level, verbose, path_order) + + # NEW: merge link back so the current level's ID is preserved + # Always attach the authoritative parent id from the link. + # If a column with the same name already exists (from another variant), drop it first, + # then merge in the value derived from the current hierarchy. + if (field %in% names(res)) { + res[, (field) := NULL] + } + res <- merge( + linkDT[, c(nextTblField, field), with = FALSE], + res, + by = nextTblField, + all.y = TRUE, + allow.cartesian = TRUE + ) + + #keep path columns toward the front with stable overall path order + path_cols <- intersect(path_order, names(res)) + other_cols <- setdiff(names(res), path_cols) + if (length(path_cols) > 0) data.table::setcolorder(res, c(path_cols, other_cols)) + + # Ensure no data.table key is set on the result + data.table::setkeyv(res, NULL) + + res } diff --git a/man/lowerTblData.Rd b/man/lowerTblData.Rd index ae28d2b8..20673c43 100644 --- a/man/lowerTblData.Rd +++ b/man/lowerTblData.Rd @@ -4,7 +4,7 @@ \alias{lowerTblData} \title{Get lower table data from upper table id} \usage{ -lowerTblData(field, values, tbls, level, verbose = FALSE) +lowerTblData(field, values, tbls, level, verbose = FALSE, path_order = NULL) } \arguments{ \item{field}{A character string specifying the name of the upper table field.} diff --git a/tests/testthat/test-lowerTblData.R b/tests/testthat/test-lowerTblData.R index 5fd240a9..cd29eee9 100644 --- a/tests/testthat/test-lowerTblData.R +++ b/tests/testthat/test-lowerTblData.R @@ -10,21 +10,21 @@ tblsSprat <- list(DE = DE, SD = SD, TE = TE, VS = VS, LE = LE) # Begin test cases test_that("Function returns correct data for valid inputs", { result <- lowerTblData("TEid", c(1), tblsSprat, "LE", FALSE) - expected <- data.table(VSid = 1,LEid=1, value = 10) + expected <- data.table(TEid = 1, VSid = 1, LEid=1, value = 10) expect_equal(result, expected) }) test_that("Function handles multiple values correctly", { result <- lowerTblData("TEid", c(1, 4), tblsSprat, "LE", FALSE) - expected <- data.table(VSid = c(1, 4),LEid=c(1,4) , value = c(10, 4)) + expected <- data.table(TEid=c(1,4) ,VSid = c(1, 4),LEid=c(1,4) , value = c(10, 4)) expect_equal(result, expected) }) test_that("Function handles non-existent field gracefully", { result <- lowerTblData("TEid", c(99), tblsSprat, "LE", FALSE) - expected <- data.table(VSid = integer(),LEid=integer() , value = integer()) + expected <- data.table(TEid = integer(),VSid = integer(), LEid=integer() , value = integer()) expect_equal(result, expected) }) @@ -35,7 +35,7 @@ test_that("Function works with printLevels = TRUE", { test_that("Recursive functionality works", { result <- lowerTblData("SDid", c(1), tblsSprat, "LE", FALSE) - expected <- data.table(VSid = 1, LEid=1, value = 10) + expected <- data.table(SDid = 1, TEid = 1, VSid = 1, LEid=1, value = 10) expect_equal(result, expected) }) From bd8b386be5dbaeb975f7d907a16fe15a0b0ff314 Mon Sep 17 00:00:00 2001 From: karolinamg Date: Thu, 16 Oct 2025 21:32:34 +0200 Subject: [PATCH 24/30] clean up ratio --- R/doEstimationRatio.R | 137 +++++++++++++++++++++++++++--------------- 1 file changed, 87 insertions(+), 50 deletions(-) diff --git a/R/doEstimationRatio.R b/R/doEstimationRatio.R index 8a4965a8..6449b82a 100644 --- a/R/doEstimationRatio.R +++ b/R/doEstimationRatio.R @@ -38,76 +38,97 @@ doEstimationRatio <- function(RDBESDataObj, LWparam = NULL, # vector of two values lowerAux = NULL, # you can use a strongly correlated value present in your data for the estimation of the values of interest verbose = FALSE){ - +RDBESDataObj <- myFilteredObject +targetValue <- "AgeComp" raiseVar <- "Weight" - RDBESDataObj <- H8ExampleEE1 + # H1 <- H1Example + # + # myFields <- c("SAlowHierarchy") + # myValues <- c("A") + # RDBESDataObj <- filterRDBESDataObject(H1, + # fieldsToFilter = myFields, + # valuesToFilter = myValues, + # strict = FALSE, # this is to skip the validation function + # killOrphans = TRUE) # Check we have a valid RDBESEstObject before doing anything else - RDBEScore::validateRDBESDataObject(RDBESDataObj, verbose = FALSE) - # RDBESDataObj <- createRDBESDataObject(input = c("./NLdata/2025_10_14_093927.zip", - # "./NLdata/HCL_2025_10_06_102840215.zip")) - # validateRDBESDataObject(h1, verbose = TRUE) + classUnits = "mm" + classBreaks = c(100, 300, 10) + - # Check upper hierarchy - DEhierarchy <- unique(RDBESDataObj$DE$DEhierarchy) - if(length(unique(DEhierarchy )) > 1){ - stop("Multiple upper hierarchies not yet implemented")} - # Check lower hierarchy +# Checks ------------------------------------------------------------------ + + RDBEScore::validateRDBESDataObject(RDBESDataObj, verbose = FALSE) + + # Unique upper hierarchy + if(length(unique(RDBESDataObj$DE$DEhierarchy)) > 1){ + stop("Multiple upper hierarchies not implemented")} + # Unique lower hierarchy if(length(unique(RDBESDataObj$SA$SAlowHierarchy)) > 1){ stop("Multiple lower hierarchies not allowed") } - RDBESEstRatioObj <- RDBESDataObj[c("CL", "CE", RDBEScore::getTablesInRDBESHierarchy(DEhierarchy))] - # Filter out NULL tables - RDBESEstRatioObj <- Filter(Negate(is.null), RDBESEstRatioObj) - - # If raiseVar == possible - possibleValues <- unique(RDBESDataObj$BV$BVtypeMeas) + RDBESEstRatioObj <- Filter(Negate(is.null),RDBESDataObj) + + # If no individual weight of fish in BV, then can't run raiseVar = Weight + # because we don't have the weight of the subsample + if(unique(RDBESEstRatioObj$SA$SAlowHierarchy) == "A") { + weightVar <- grep("(?i)weight", unique(RDBESEstRatioObj$BV$BVtypeMeas), value = TRUE) + if (is.null( weightVar) || length(weightVar) == 0 || all(is.na(weightVar))){ + stop("no individual weight measured") + } + } - if(!raiseVar %in% possibleValues){ + # Does anything exist after SA? + # Do we need that? -#---------------------- - if(raiseVar == "Weight"){ - if(unique(RDBESDataObj$SA$SAlowHierarchy) == "B" ){ - stop("Lower hierarchy B not yet implemented for weight") - }else{ - weightVar <- grep("(?i)weight", unique(RDBESDataObj$BV$BVtypeMeas), value = TRUE) - # weightVar <- c("WeightLive", "WeightGutted", "WeightMeasured") - if (interactive()) { - if(length(unique(weightVar)) > 1) { - # Print a numbered menu and get user's selection - idx <- utils::menu(weightVar, title = "Select the BV weight type to use:") - if (idx == 0L) stop("Selection cancelled.") - wcol <- weightVar[idx] - } else { - message("Only one weight type present. Using: ", weightVar[1L]) - wcol <- weightVar[1L] - } - } + if(length(unique(names(RDBESEstRatioObj))) > 1){ + if(!tail(names(RDBESEstRatioObj), n = 1) %in% c("FM", "BV")){ + stop("No FM or BV tables provided") } } - } + # TODO (to be developed) match with pop (Landings or Effort) + # RDBESEstRatioObj <- RDBESDataObj[c("CL", "CE", RDBEScore::getTablesInRDBESHierarchy(DEhierarchy))] -# Do we need both CL and CE? Allow the user to define the population (i.e. effort or landings or both)? - # Not working check - # if(!names(RDBESDataObj) %in% c("CL", "CE")){ - # stop("The object does not have population data") - # } -# Check which tables exist after SA +# raiseVar options -------------------------------------------------------- + + # Can have multiple types of weight measured for the same individual + # If the user defined in the raiseVar argument one of the options in the ICES vocab for + # the weight codes in the field BVtypeMeas + # If there is only one present, this is used by default + # If more than one are present, allow the user to choose + possibleValues <- unique(RDBESDataObj$BV$BVtypeMeas) + if(!raiseVar %in% possibleValues){ + if(raiseVar == "Weight"){ + if(unique(RDBESDataObj$SA$SAlowHierarchy) == "B" ){ + stop("Lower hierarchy B not implemented for weight") + }else{ + weightVar <- grep("(?i)weight", unique(RDBESDataObj$BV$BVtypeMeas), value = TRUE) + if (interactive()) { + if(length(unique(weightVar)) > 1) { + # Print a numbered menu and get user's selection + idx <- utils::menu(weightVar, title = "Select the BV weight type to use:") + if (idx == 0L) stop("Selection cancelled.") + wcol <- weightVar[idx] + } else { + message("Only one weight type present. Using: ", weightVar[1L]) + wcol <- weightVar[1L] + } + } + } + } + } + + + -# Does anything exist after SA? - if(length(unique(names(RDBESEstRatioObj))) > 1){ - if(!tail(names(RDBESEstRatioObj), n = 1) %in% c("FM", "BV")){ - stop("No FM or BV tables provided") - } - } # Length composition ------------------------------------------------------ if(targetValue == "LengthComp"){ @@ -136,7 +157,22 @@ doEstimationRatio <- function(RDBESDataObj, sa <- data.table::setDT(RDBESEstRatioObj$SA) fm <- fm[fm, unique(.SD), .SDcols = c("SAid", "FMid", "FMclassMeas", "FMnumAtUnit")] sa <- sa[, unique(.SD), .SDcols = c("SAid", "SAlowHierarchy", "SAtotalWtMes" , "SAsampWtMes", "SAnumTotal", "SAnumSamp", "SAauxVarValue", "SAauxVarUnit" )] - bv$LengthClass <- floor(bv$LengthTotal/10) # TODO This needs to be defined by the user + + # need to identify current units and + u2mm <- c(mm = 1, cm = 10, m = 1000) + conv <- u2mm[[classUnits]] + vals <- fm$FMclassMeas / conv + + brks <- seq(classBreaks[1], classBreaks[2], by = classBreaks[3]) + + fm[, LengthClass := cut( + vals, + breaks = brks, + right = FALSE, # [) + include.lowest = TRUE, + labels = head(brks, -1) + )] + fm1 <- fm[ , .(FMNumbersAtLength = .N), by = .(SAid, LengthClass) @@ -366,6 +402,7 @@ doEstimationRatio <- function(RDBESDataObj, ] + fm <- fm[fm, unique(.SD), .SDcols = c("SAid", "FMid", "FMclassMeas", "FMnumAtUnit")] sa <- sa[, unique(.SD), .SDcols = c("SAid", "SAlowHierarchy", "SAtotalWtMes" , "SAsampWtMes", "SAnumTotal", "SAnumSamp", "SAauxVarValue", "SAauxVarUnit" )] From 7c3ac3f10704e281c82a6cfd5bd076900f8f5e9b Mon Sep 17 00:00:00 2001 From: Richard Meitern Date: Fri, 17 Oct 2025 13:02:07 +0300 Subject: [PATCH 25/30] new release --- DESCRIPTION | 2 +- NEWS.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ae242e48..972c2d39 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: RDBEScore Type: Package Title: Functions for the ICES Regional Database and Estimation System (RDBES) -Version: 0.3.4 +Version: 0.3.5 Author: c( person(given = "David", family = "Currie", diff --git a/NEWS.md b/NEWS.md index 2752c99e..44ec5bf4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,5 @@ +# RDBEScore 0.3.5 + # RDBEScore 0.3.4 - Defaults: `createRDBESDataObject()` now runs validation by default From bcf74a12ec30d3329ca6125ff54fac296976abb7 Mon Sep 17 00:00:00 2001 From: Richard Meitern Date: Fri, 17 Oct 2025 13:08:13 +0300 Subject: [PATCH 26/30] add news --- NEWS.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/NEWS.md b/NEWS.md index 44ec5bf4..cfbcdcee 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,11 @@ # RDBEScore 0.3.5 +- Bug fix: addressed [#251](https://github.com/ices-tools-dev/RDBEScore/issues/251). +- Docs/params: expanded docs for `combineRDBESDataObjects()` and `createRDBESDataObject()`; clarified hierarchy behavior and `...` options (strict, verbose, hierarchy). +- Mixed hierarchies: `combineRDBESDataObjects()` now warns/errors when objects use different hierarchies (`strict=TRUE` for error). +- Estimation: major refactor of `doEstimationRatio()` — stronger validation, hierarchy‑specific logic (A/B/C), standardized names (e.g., `BVweight`), clearer messages; uses `data.table`. +- ID tables: `createTableOfRDBESIds()` merging more robust by hierarchy; clearer BV handling and console output. + # RDBEScore 0.3.4 - Defaults: `createRDBESDataObject()` now runs validation by default From 0d7d6ae4fef9b0a5b7286e2acc1e9b72fe6617d2 Mon Sep 17 00:00:00 2001 From: Richard Meitern Date: Fri, 17 Oct 2025 13:23:58 +0300 Subject: [PATCH 27/30] #242 change --- vignettes/v01b-manipulating-rdbesdataobjects.Rmd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vignettes/v01b-manipulating-rdbesdataobjects.Rmd b/vignettes/v01b-manipulating-rdbesdataobjects.Rmd index 7a31f435..ff45131b 100644 --- a/vignettes/v01b-manipulating-rdbesdataobjects.Rmd +++ b/vignettes/v01b-manipulating-rdbesdataobjects.Rmd @@ -107,7 +107,8 @@ RDBESDataObjects can be combined using the **combineRDBESDataObjects()** functio ```{r combine} myCombinedRawObject <- combineRDBESDataObjects(myH1RawObject, - myH5RawObject) + myH5RawObject, + strict = FALSE) # Number of rows in each non-null table and hierarchies print(myCombinedRawObject) From 9648d1f810d8dd9b89b153131769fc36b53713c6 Mon Sep 17 00:00:00 2001 From: Richard Meitern Date: Fri, 17 Oct 2025 13:30:13 +0300 Subject: [PATCH 28/30] double chunk naming fixed --- vignettes/v01b-manipulating-rdbesdataobjects.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/v01b-manipulating-rdbesdataobjects.Rmd b/vignettes/v01b-manipulating-rdbesdataobjects.Rmd index ff45131b..e8849bd0 100644 --- a/vignettes/v01b-manipulating-rdbesdataobjects.Rmd +++ b/vignettes/v01b-manipulating-rdbesdataobjects.Rmd @@ -129,7 +129,7 @@ print(myH1RawObject) ``` -```{r filter} +```{r filter2} myFields <- c("SDctry","VDctry","VDflgCtry","FTarvLoc") myValues <- c("ZW","ZWBZH","ZWVFA" ) From 7d850a77e7cb40d83655729b15ffa9fed8a89738 Mon Sep 17 00:00:00 2001 From: Richard Meitern Date: Fri, 17 Oct 2025 15:11:53 +0300 Subject: [PATCH 29/30] fix for CRAN checks --- NEWS.md | 1 - R/createTableOfRDBESIds.r | 18 +-- R/doEstimationRatio.R | 242 ++++++++++++-------------------------- R/utils.R | 2 +- 4 files changed, 83 insertions(+), 180 deletions(-) diff --git a/NEWS.md b/NEWS.md index cfbcdcee..fe60ad83 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,7 +3,6 @@ - Bug fix: addressed [#251](https://github.com/ices-tools-dev/RDBEScore/issues/251). - Docs/params: expanded docs for `combineRDBESDataObjects()` and `createRDBESDataObject()`; clarified hierarchy behavior and `...` options (strict, verbose, hierarchy). - Mixed hierarchies: `combineRDBESDataObjects()` now warns/errors when objects use different hierarchies (`strict=TRUE` for error). -- Estimation: major refactor of `doEstimationRatio()` — stronger validation, hierarchy‑specific logic (A/B/C), standardized names (e.g., `BVweight`), clearer messages; uses `data.table`. - ID tables: `createTableOfRDBESIds()` merging more robust by hierarchy; clearer BV handling and console output. # RDBEScore 0.3.4 diff --git a/R/createTableOfRDBESIds.r b/R/createTableOfRDBESIds.r index 0ba86b1b..434803ae 100644 --- a/R/createTableOfRDBESIds.r +++ b/R/createTableOfRDBESIds.r @@ -21,14 +21,16 @@ createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ + # Avoid R CMD check notes for data.table's NSE column references + SAlowHierarchy <- SAid <- NULL + # note: needs developments for different lower hierarchies # x is x # hierarchy is hierarchy (integer) # outputs a table with ids for matching - #libraries - require(data.table) + # data.table is listed in Imports and loaded via NAMESPACE; avoid require/library in package code CStableNames<- getTablesInRDBESHierarchy(hierarchy = x$DE$DEhierarchy[1], includeOptTables = FALSE, @@ -46,7 +48,7 @@ createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ df_1<-data.frame(x[[CStableNames[i]]][,list(get(id_1))]); colnames(df_1)<-id_1 } - if((CStableNames[i+1] == "SA" & addSAseqNums == TRUE) | CStableNames[i+1] %in% c("BV")){ + if(((CStableNames[i+1] == "SA" && addSAseqNums == TRUE) || CStableNames[i+1] %in% c("BV"))){ if(CStableNames[i+1]=="SA"){ @@ -84,10 +86,10 @@ createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ outTmp = merge(out, x$SA[,c("SAid","SAlowHierarchy")]) # Convert to data.table if not already - setDT(outTmp) - setDT(out) - setDT(df_2) - setDT(df_2C) + data.table::setDT(outTmp) + data.table::setDT(out) + data.table::setDT(df_2) + data.table::setDT(df_2C) # Filter and get SAid groups keepA <- outTmp[SAlowHierarchy == "A", SAid] @@ -108,7 +110,7 @@ createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ mergedD <- toMergeD # unchanged group D # Combine back - out <- rbindlist(list(mergedA, mergedB, mergedC, mergedD), use.names = TRUE, fill = TRUE) + out <- data.table::rbindlist(list(mergedA, mergedB, mergedC, mergedD), use.names = TRUE, fill = TRUE) }else{ diff --git a/R/doEstimationRatio.R b/R/doEstimationRatio.R index 6449b82a..73cf5c79 100644 --- a/R/doEstimationRatio.R +++ b/R/doEstimationRatio.R @@ -38,97 +38,70 @@ doEstimationRatio <- function(RDBESDataObj, LWparam = NULL, # vector of two values lowerAux = NULL, # you can use a strongly correlated value present in your data for the estimation of the values of interest verbose = FALSE){ -RDBESDataObj <- myFilteredObject -targetValue <- "AgeComp" + raiseVar <- "Weight" - # H1 <- H1Example - # - # myFields <- c("SAlowHierarchy") - # myValues <- c("A") - # RDBESDataObj <- filterRDBESDataObject(H1, - # fieldsToFilter = myFields, - # valuesToFilter = myValues, - # strict = FALSE, # this is to skip the validation function - # killOrphans = TRUE) + RDBESDataObj <- H8ExampleEE1 # Check we have a valid RDBESEstObject before doing anything else - - classUnits = "mm" - classBreaks = c(100, 300, 10) - - -# Checks ------------------------------------------------------------------ - RDBEScore::validateRDBESDataObject(RDBESDataObj, verbose = FALSE) - # Unique upper hierarchy - if(length(unique(RDBESDataObj$DE$DEhierarchy)) > 1){ - stop("Multiple upper hierarchies not implemented")} - # Unique lower hierarchy + # RDBESDataObj <- createRDBESDataObject(input = c("./NLdata/2025_10_14_093927.zip", + # "./NLdata/HCL_2025_10_06_102840215.zip")) + # validateRDBESDataObject(h1, verbose = TRUE) + + # Check upper hierarchy + DEhierarchy <- unique(RDBESDataObj$DE$DEhierarchy) + if(length(unique(DEhierarchy )) > 1){ + stop("Multiple upper hierarchies not yet implemented")} + # Check lower hierarchy if(length(unique(RDBESDataObj$SA$SAlowHierarchy)) > 1){ stop("Multiple lower hierarchies not allowed") } - # Filter out NULL tables - RDBESEstRatioObj <- Filter(Negate(is.null),RDBESDataObj) - - # If no individual weight of fish in BV, then can't run raiseVar = Weight - # because we don't have the weight of the subsample - if(unique(RDBESEstRatioObj$SA$SAlowHierarchy) == "A") { - weightVar <- grep("(?i)weight", unique(RDBESEstRatioObj$BV$BVtypeMeas), value = TRUE) - if (is.null( weightVar) || length(weightVar) == 0 || all(is.na(weightVar))){ - stop("no individual weight measured") - } - } - - # Does anything exist after SA? - # Do we need that? - - if(length(unique(names(RDBESEstRatioObj))) > 1){ - if(!tail(names(RDBESEstRatioObj), n = 1) %in% c("FM", "BV")){ - stop("No FM or BV tables provided") - } - } - - # TODO (to be developed) match with pop (Landings or Effort) - # RDBESEstRatioObj <- RDBESDataObj[c("CL", "CE", RDBEScore::getTablesInRDBESHierarchy(DEhierarchy))] - - - + RDBESEstRatioObj <- RDBESDataObj[c("CL", "CE", RDBEScore::getTablesInRDBESHierarchy(DEhierarchy))] + # Filter out NULL tables + RDBESEstRatioObj <- Filter(Negate(is.null), RDBESEstRatioObj) -# raiseVar options -------------------------------------------------------- - - # Can have multiple types of weight measured for the same individual - # If the user defined in the raiseVar argument one of the options in the ICES vocab for - # the weight codes in the field BVtypeMeas - # If there is only one present, this is used by default - # If more than one are present, allow the user to choose - possibleValues <- unique(RDBESDataObj$BV$BVtypeMeas) - if(!raiseVar %in% possibleValues){ - if(raiseVar == "Weight"){ - if(unique(RDBESDataObj$SA$SAlowHierarchy) == "B" ){ - stop("Lower hierarchy B not implemented for weight") - }else{ - weightVar <- grep("(?i)weight", unique(RDBESDataObj$BV$BVtypeMeas), value = TRUE) - if (interactive()) { - if(length(unique(weightVar)) > 1) { - # Print a numbered menu and get user's selection - idx <- utils::menu(weightVar, title = "Select the BV weight type to use:") - if (idx == 0L) stop("Selection cancelled.") - wcol <- weightVar[idx] - } else { - message("Only one weight type present. Using: ", weightVar[1L]) - wcol <- weightVar[1L] - } +#---------------------- + if(raiseVar == "Weight"){ + if(unique(RDBESDataObj$SA$SAlowHierarchy) == "B" ){ + stop("Lower hierarchy B not yet implemented for weight") + }else{ + weightVar <- grep("(?i)weight", unique(RDBESDataObj$BV$BVtypeMeas), value = TRUE) + # weightVar <- c("WeightLive", "WeightGutted", "WeightMeasured") + if (interactive()) { + if(length(unique(weightVar)) > 1) { + # Print a numbered menu and get user's selection + idx <- utils::menu(weightVar, title = "Select the BV weight type to use:") + if (idx == 0L) stop("Selection cancelled.") + wcol <- weightVar[idx] + weightName <- paste0("BV", wcol) + } else { + message("Only one weight type present. Using: ", weightVar[1L]) + wcol <- weightVar[1L] + weightName <- paste0("BV", wcol) } } } } +# Do we need both CL and CE? Allow the user to define the population (i.e. effort or landings or both)? + # Not working check + # if(!names(RDBESDataObj) %in% c("CL", "CE")){ + # stop("The object does not have population data") + # } +# Check which tables exist after SA +# Does anything exist after SA? + + if(length(unique(names(RDBESEstRatioObj))) > 1){ + if(!tail(names(RDBESEstRatioObj), n = 1) %in% c("FM", "BV")){ + stop("No FM or BV tables provided") + } + } # Length composition ------------------------------------------------------ if(targetValue == "LengthComp"){ @@ -137,69 +110,35 @@ targetValue <- "AgeComp" # LH A & B ---------------------------------------------------------------- if(unique(RDBESEstRatioObj$SA$SAlowHierarchy) %in% c("A", "B")){ - # TODO mean weight at length + # bv <- setDT(RDBESEstRatioObj$BV) + # fm <- setDT(RDBESEstRatioObj$FM) + # bv <- bv[, unique(.SD), .SDcols = c( "FMid","BVfishId", "BVtypeMeas", "BVvalueMeas")] + # bv <- dcast(bv, ... ~ BVtypeMeas , value.var = c("BVvalueMeas"), drop = TRUE) + # bv[, `:=`(LengthTotal = as.numeric(LengthTotal), WeightMeasured = as.numeric(WeightMeasured))] - if(!is.null(LWparam)){ + # Select only FM data for now - BV possibly used for ALK + warning("Only FM table used. BV is not yet implemented") - stop("Not yet implemented") - }else{ - # else stop - stop("Nor an auxiliary variable nor lw params are provided. Not possible to produce the mean weight at length") - } - # Select only FM data for now - BV possibly used for ALK - warning("If lower hierarchy A, only the FM table is used to calculate the numbers at length.") - - fm <- data.table::setDT(RDBESEstRatioObj$FM) - sa <- data.table::setDT(RDBESEstRatioObj$SA) - fm <- fm[fm, unique(.SD), .SDcols = c("SAid", "FMid", "FMclassMeas", "FMnumAtUnit")] - sa <- sa[, unique(.SD), .SDcols = c("SAid", "SAlowHierarchy", "SAtotalWtMes" , "SAsampWtMes", "SAnumTotal", "SAnumSamp", "SAauxVarValue", "SAauxVarUnit" )] - - # need to identify current units and - u2mm <- c(mm = 1, cm = 10, m = 1000) - conv <- u2mm[[classUnits]] - vals <- fm$FMclassMeas / conv - - brks <- seq(classBreaks[1], classBreaks[2], by = classBreaks[3]) - - fm[, LengthClass := cut( - vals, - breaks = brks, - right = FALSE, # [) - include.lowest = TRUE, - labels = head(brks, -1) - )] - - fm1 <- fm[ - , .(FMNumbersAtLength = .N), - by = .(SAid, LengthClass) - ][ - # add total count per SAid - , FMTotCount := sum(FMNumbersAtLength), by = SAid - ] - su <- merge(fm1, sa, by = c("SAid")) - if(raiseVar == "Weight"){ - su$raiseFactor <- su$SAtotalWtMes/su$SAsampWtMes - su$NumbersAtLength <- su$raiseFactor*su$FMNumbersAtLength + if(!is.null(LWparam)){ - }else if(raiseVar == "Count"){ + stop("Not yet implemented") - su$raiseFactor <- su$SAnumTotal/su$SAnumSamp - su$NumbersAtLength <- su$raiseFactor*su$FMNumbersAtLength }else{ - - su$NumbersAtLength <- su$SAauxVarValue*su$FMNumbersAtLength - + # else stop + stop("Nor an auxiliary variable nor lw params are provided. Not possible to produce the mean weight at length") } - return(su) + # otherwise check if you can calculate it + # otherwise stop + @@ -209,17 +148,16 @@ targetValue <- "AgeComp" }else if(unique(RDBESEstRatioObj$SA$SAlowHierarchy) == "C"){ - bv <- data.table::setDT(RDBESEstRatioObj$BV) + bv <- setDT(RDBESEstRatioObj$BV) bv <- bv[, unique(.SD), .SDcols = c("SAid", "BVfishId", "BVtypeMeas", "BVvalueMeas")] bv <- dcast(bv, ... ~ BVtypeMeas , value.var = c("BVvalueMeas"), drop = TRUE) - bv[, BVweight := as.numeric(get(wcol))] - bv[, LengthTotal := as.numeric(LengthTotal)] + bv[, (weightName) := as.numeric(get(wcol))] # TODO this probably needs to be an argument # or needs to be defined later on? bv$LengthClass <- floor(bv$LengthTotal/10) # TODO This needs to be defined by the user bv1 <- bv[ - , .(BVMeanWeight = mean(BVweight, na.rm = TRUE), + , .(BVMeanWeight = mean(get(wcol), na.rm = TRUE), BVNumbersAtLength = .N), by = .(SAid, LengthClass) ][ @@ -227,14 +165,14 @@ targetValue <- "AgeComp" , BVTotCount := sum(BVNumbersAtLength), by = SAid ][ # add total weight per SAid - bv[, .(BVTotWeight = sum(BVweight, na.rm = TRUE)), by = SAid], + bv[, .(BVTotWeight = sum(get(wcol), na.rm = TRUE)), by = SAid], on = "SAid" ] # bv1$BVLengthClassProp <- bv1$BVNumbersAtLength/bv1$TotCount - sa <- data.table::setDT(RDBESEstRatioObj$SA) + sa <- setDT(RDBESEstRatioObj$SA) sa <- sa[, unique(.SD), .SDcols = c("SAid", "SAlowHierarchy", "SAtotalWtMes" , "SAsampWtMes", "SAnumTotal", "SAnumSamp", "SAauxVarValue", "SAauxVarUnit" )] # Do not need the # species, the filtering of the "strata" variables will be done before the estimation # To test @@ -308,15 +246,15 @@ targetValue <- "AgeComp" if(unique(RDBESDataObj$SA$SAlowHierarchy) == "C"){ - bv <- data.table::setDT(RDBESEstRatioObj$BV) + bv <- setDT(RDBESEstRatioObj$BV) bv <- bv[, unique(.SD), .SDcols = c("SAid", "BVfishId", "BVtypeMeas", "BVvalueMeas")] bv <- dcast(bv, ... ~ BVtypeMeas , value.var = c("BVvalueMeas"), drop = TRUE) - bv[, BVweight := as.numeric(get(wcol))] + bv[, `:=`( WeightMeasured = as.numeric(get(wcol)))] # TODO this probably needs to be an argument # or needs to be defined later on? bv1 <- bv[ - , .(BVMeanWeight = mean(BVweight, na.rm = TRUE), + , .(BVMeanWeight = mean(get(wcol), na.rm = TRUE), BVNumbersAtAge = .N), by = .(SAid, Age) ][ @@ -324,14 +262,14 @@ targetValue <- "AgeComp" , BVTotCount := sum(BVNumbersAtAge), by = SAid ][ # add total weight per SAid - bv[, .(BVTotWeight = sum(BVweight, na.rm = TRUE)), by = SAid], + bv[, .(BVTotWeight = sum(get(wcol), na.rm = TRUE)), by = SAid], on = "SAid" ] # bv1$BVLengthClassProp <- bv1$BVNumbersAtLength/bv1$TotCount - sa <- data.table::setDT(RDBESEstRatioObj$SA) + sa <- setDT(RDBESEstRatioObj$SA) sa <- sa[, unique(.SD), .SDcols = c("SAid", "SAlowHierarchy", "SAtotalWtMes" , "SAsampWtMes", "SAnumTotal", "SAnumSamp", "SAauxVarValue", "SAauxVarUnit" )] # Do not need the # species, the filtering of the "strata" variables will be done before the estimation # To test @@ -381,45 +319,11 @@ targetValue <- "AgeComp" }else if(unique(RDBESDataObj$SA$SAlowHierarchy) == "A"){ - bv <- data.table::setDT(RDBESEstRatioObj$BV) - fm <- data.table::setDT(RDBESEstRatioObj$FM) - sa <- data.table::setDT(RDBESEstRatioObj$SA) + bv <- setDT(RDBESEstRatioObj$BV) + fm <- setDT(RDBESEstRatioObj$FM) bv <- bv[, unique(.SD), .SDcols = c( "FMid","BVfishId", "BVtypeMeas", "BVvalueMeas")] bv <- dcast(bv, ... ~ BVtypeMeas , value.var = c("BVvalueMeas"), drop = TRUE) - bv[, BVweight := as.numeric(get(wcol))] - - bv1 <- bv[ - , .(BVMeanWeight = mean(BVweight, na.rm = TRUE), - BVNumbersAtAge = .N), - by = .(FMid, Age) - ][ - # add total count per SAid - , BVTotCount := sum(BVNumbersAtAge), by = FMid - ][ - # add total weight per SAid - bv[, .(BVTotWeight = sum(BVweight, na.rm = TRUE)), by = FMid], - on = "SAid" - ] - - - - fm <- fm[fm, unique(.SD), .SDcols = c("SAid", "FMid", "FMclassMeas", "FMnumAtUnit")] - sa <- sa[, unique(.SD), .SDcols = c("SAid", "SAlowHierarchy", "SAtotalWtMes" , "SAsampWtMes", "SAnumTotal", "SAnumSamp", "SAauxVarValue", "SAauxVarUnit" )] - - fm1 <- unique( - fm[FMclassMeas %chin% c("LengthTotal","LengthMeasured","Length"), - .(FMid, SAid, FMnumAtUnit)] - ) - - bv1 <- fm_len[bv1, on = "FMid"][, - num_raise := fifelse(BVTotCount > 0, FMnumAtUnit / BVTotCount, NA_real_) - ][ - , N_at_age := BVNumbersAtAge * num_raise - ] - - - - # subsample -> sample weights -> weight from where the sample came from + bv[, `:=`(wcol = as.numeric(wcol ))] # if age exists @@ -436,8 +340,6 @@ targetValue <- "AgeComp" # else stop # TODO include FM. For now the FM is not yet implemented - }else{ - stop("Age composition can't be calculated with lower hierachy B.") } } diff --git a/R/utils.R b/R/utils.R index 8a1e7741..0e982554 100644 --- a/R/utils.R +++ b/R/utils.R @@ -28,7 +28,7 @@ evenMoreUsedVars <- c("Weightg", "Lengthmm", "Group", "WeightIndexSum", "WeightIndex", "H8ExampleEE1", "suLevels", "LengthTotal", "WeightMeasured", "LengthClass", "TotCount", "BVNumbersAtLength", "SAauxVarValue", "ISid", - "ISrecType", "..x","n_matches", + "ISrecType", "..x","n_matches","SAlowHierarchy", "SAparSequNum", "i.SAid", "SAparentID", "BVTotCount", "Age", "BVNumbersAtAge", "i.SAparentID", "i.subSampleLevel", "i.topLevelSAid", From 82a6e3e40c4eaaefe66e03e941e0c7dee14617a6 Mon Sep 17 00:00:00 2001 From: Richard Meitern Date: Fri, 17 Oct 2025 15:32:00 +0300 Subject: [PATCH 30/30] fix spelling and missing parameter docs --- R/lowerTblData.R | 3 +++ package_overview.Rmd | 2 +- vignettes/v01b-manipulating-rdbesdataobjects.Rmd | 4 ++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/R/lowerTblData.R b/R/lowerTblData.R index 98469a08..8f539bfa 100644 --- a/R/lowerTblData.R +++ b/R/lowerTblData.R @@ -11,6 +11,9 @@ #' @param tbls A named list of data frames representing the tables. #' @param level A character string specifying the name of the target lower level table. #' @param verbose A logical value indicating whether to print intermediate levels during recursion. +#' @param path_order Internal: character vector tracking the traversal path of IDs during +#' recursion to preserve a stable column order in the returned result. Users should not +#' normally set this; it is maintained by recursive calls (default NULL). #' #' @return A data frame containing the rows of the target lower level table that are associated with #' the given values of the upper table field. diff --git a/package_overview.Rmd b/package_overview.Rmd index 2968eb92..6bc617cc 100644 --- a/package_overview.Rmd +++ b/package_overview.Rmd @@ -19,7 +19,7 @@ The aim of this document is to outline the basic workflow of importing data down The function **createRDBESDataObject** is intended to directly import Commercial Landing (CL), Commercial Effort (CE) and Commercial Sampling (CS) tables downloaded from [RDBES](https://rdbes.ices.dk/#/). -## Introducton +## Introduction 2 RDBEScore is an R package developed to facilitate the analysis of data from the ICES Regional Database and Estimation System (RDBES). The package provides functions to: diff --git a/vignettes/v01b-manipulating-rdbesdataobjects.Rmd b/vignettes/v01b-manipulating-rdbesdataobjects.Rmd index e8849bd0..457f81b6 100644 --- a/vignettes/v01b-manipulating-rdbesdataobjects.Rmd +++ b/vignettes/v01b-manipulating-rdbesdataobjects.Rmd @@ -153,7 +153,7 @@ validateRDBESDataObject(myFilteredObjectNoOrphans, verbose = FALSE) myFilteredObjectNoOrphans ``` -**NB!** Currently filtering happens to all fields together i.e it is not possible to filter same codelist differently in the same filter call. Imagine a situation where you want to filter on both "SDCtry" and "VDflgCtry" ie vessels from EH country sampled by ZW institution. Ie tow calls are needed +**NB!** Currently filtering happens to all fields together i.e it is not possible to filter same codelist differently in the same filter call. Imagine a situation where you want to filter on both "SDCtry" and "VDflgCtry" ie vessels from EH country sampled by ZW institution. Ie two calls are needed ```{r} myFilteredObject <- filterRDBESDataObject(myH1RawObject, @@ -169,7 +169,7 @@ filterRDBESDataObject(myFilteredObject, ``` -Sometimes you might to do the inverse filter eg exclude something. You can do this by selecting the complement set of values using `setdiff`. +Sometimes you might want to do the inverse filter eg exclude something. You can do this by selecting the complement set of values using `setdiff`. ```{r} # Exclude specific DEid values by selecting all others