diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index bfc9f4db..3de2a835 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -4,6 +4,7 @@ on: push: branches: [main, master] pull_request: + branches: [ main ] release: types: [published] workflow_dispatch: diff --git a/.gitignore b/.gitignore index 37912496..428307b6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,57 +1,46 @@ # History files .Rhistory .Rapp.history - # Session Data files .RData - # User-specific files .Ruserdata .DS_Store - # Example code in package build process *-Ex.R - # Output files from R CMD build /*.tar.gz - # Output files from R CMD check /*.Rcheck/ - # RStudio files .Rproj.user/ - # produced vignettes vignettes/*.html vignettes/*.pdf - # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 .httr-oauth - # knitr and R markdown default cache directories *_cache/ /cache/ - # Temporary files created by R markdown *.utf8.md *.knit.md - # R Environment Variables .Renviron - # FishNCo raw test data FishNCo/testData/RegionalTestData/* - # MS Excel temp files ~$* - /doc/ /Meta/ <<<<<<< Updated upstream .vscode/ docs ======= - # Data to develop fun /NLdata/ >>>>>>> Stashed changes +package_overview.tex +package_overview.html +package_overview.pptx +package_overview.md diff --git a/DESCRIPTION b/DESCRIPTION index ae242e48..972c2d39 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: RDBEScore Type: Package Title: Functions for the ICES Regional Database and Estimation System (RDBES) -Version: 0.3.4 +Version: 0.3.5 Author: c( person(given = "David", family = "Currie", diff --git a/NEWS.md b/NEWS.md index 2752c99e..fe60ad83 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# RDBEScore 0.3.5 + +- Bug fix: addressed [#251](https://github.com/ices-tools-dev/RDBEScore/issues/251). +- Docs/params: expanded docs for `combineRDBESDataObjects()` and `createRDBESDataObject()`; clarified hierarchy behavior and `...` options (strict, verbose, hierarchy). +- Mixed hierarchies: `combineRDBESDataObjects()` now warns/errors when objects use different hierarchies (`strict=TRUE` for error). +- ID tables: `createTableOfRDBESIds()` merging more robust by hierarchy; clearer BV handling and console output. + # RDBEScore 0.3.4 - Defaults: `createRDBESDataObject()` now runs validation by default diff --git a/R/combineRDBESDataObjects.R b/R/combineRDBESDataObjects.R index e58831f6..9bfc7cb0 100644 --- a/R/combineRDBESDataObjects.R +++ b/R/combineRDBESDataObjects.R @@ -9,6 +9,13 @@ #' @param strict (Optional) This function validates its input data - should #' the validation be strict? The default is TRUE. #' +#' @details +#' When combining RDBESDataObjects from different hierarchies (e.g., H1 and H5), +#' a warning is issued. The resulting combined object will have a mixed hierarchy, +#' which may be structurally and statistically invalid for some analyses. However, +#' such combinations can be useful for fisheries overviews, annual reports, or +#' countries performing broader estimations. +#' #' @return the combination of \code{RDBESDataObject1} and \code{RDBESDataObject2} #' @seealso \link[data.table]{rbindlist} #' @export @@ -31,6 +38,36 @@ combineRDBESDataObjects <- function(RDBESDataObject1, validateRDBESDataObject(RDBESDataObject1, verbose = verbose, strict = strict) validateRDBESDataObject(RDBESDataObject2, verbose = verbose, strict = strict) + + # Check for multiple hierarchies + hierarchy1 <- NULL + hierarchy2 <- NULL + + if (!is.null(RDBESDataObject1$DE) && nrow(RDBESDataObject1$DE) > 0) { + hierarchy1 <- unique(RDBESDataObject1$DE$DEhierarchy) + } + + if (!is.null(RDBESDataObject2$DE) && nrow(RDBESDataObject2$DE) > 0) { + hierarchy2 <- unique(RDBESDataObject2$DE$DEhierarchy) + } + + # Warn if combining different hierarchies + if (!is.null(hierarchy1) && !is.null(hierarchy2) && + length(hierarchy1) > 0 && length(hierarchy2) > 0) { + if (!all(hierarchy1 %in% hierarchy2) || !all(hierarchy2 %in% hierarchy1)) { + warnMsg <- paste("Combining RDBESDataObjects from different hierarchies (", + paste(hierarchy1, collapse = ", "), " and ", + paste(hierarchy2, collapse = ", "), + "). This creates a mixed hierarchy object that may be structurally ", + "and statistically invalid for some analyses.") + if(strict){ + stop(warnMsg, call. = F) + } else{ + warning(warnMsg, call. = F) + } + } + } + # Create an empty RDBESDataObject as the basis of what we will return myRDBESDataObject <- createRDBESDataObject() diff --git a/R/createRDBESDataObject.R b/R/createRDBESDataObject.R index 74c8a7bf..b9652c9b 100644 --- a/R/createRDBESDataObject.R +++ b/R/createRDBESDataObject.R @@ -20,6 +20,11 @@ #' given. You should not input different hierarchy files; this function will not #' combine them. #' +#' If the zip contains multiple hierarchies (e.g., H1 and H5 within the same +#' archive), you can select which one to import by passing `Hierarchy` via +#' `...`, for example: `Hierarchy = 1`. If `Hierarchy` is not specified and the +#' zip contains multiple hierarchies, an error is raised prompting you to set it. +#' #' ***CSV file inputs*** #' This `input` should be a path to a folder of `csv` files. These can be the #' `csv` files downloaded from RDBES (e.g. an unzipped hierarchy), or *any* set @@ -56,8 +61,17 @@ #' in. Default is `TRUE`. #' @param verbose (Optional) Set to TRUE if you want informative text printed #' out, or FALSE if you don't. The default is FALSE. -#' @param ... parameters passed to validateRDBESDataObject -#' e.g.`strict=FALSE` +#' @param ... Additional parameters forwarded to helper functions used by this +#' function. Most commonly these are forwarded to +#' `validateRDBESDataObject()` during the validation step. Common options: +#' - `strict` (logical, default `TRUE`): if `FALSE`, validation issues result +#' in warnings instead of stopping with an error. +#' - `verbose` (logical, default `FALSE`): request extra informational output +#' from validation. +#' - `Hierarchy` (integer, e.g. `1`, optional; zip inputs only): when the zip +#' file contains multiple hierarchies, selects which hierarchy to import. +#' Note: `checkDataTypes` is controlled by the `castToCorrectDataTypes` +#' argument of this function and should not be supplied via `...`. #' @importFrom utils file_test #' #' @return A RDBESDataObject @@ -65,6 +79,7 @@ #' @md #' #' @examples +#' # Create an empty object #' myEmptyRDBESObject <- createRDBESDataObject(input = NULL) createRDBESDataObject <- function(input = NULL, diff --git a/R/createTableOfRDBESIds.r b/R/createTableOfRDBESIds.r index 4431d356..434803ae 100644 --- a/R/createTableOfRDBESIds.r +++ b/R/createTableOfRDBESIds.r @@ -1,3 +1,4 @@ + #' Create a table of RDBES Ids #' #' examples for now see @@ -18,51 +19,121 @@ #' myTableOfIds<- createTableOfRDBESIds(myH1RawObject) #' } -createTableOfRDBESIds<-function(x, addSAseqNums=TRUE) -{ +createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ -# note: needs developments for different lower hierarchies + # Avoid R CMD check notes for data.table's NSE column references + SAlowHierarchy <- SAid <- NULL -# x is RDBESobj -# hierarchy is hierarchy (integer) -# outputs a table with ids for matching + # note: needs developments for different lower hierarchies + # x is x + # hierarchy is hierarchy (integer) + # outputs a table with ids for matching -CStableNames<- getTablesInRDBESHierarchy(hierarchy = x$DE$DEhierarchy[1], - includeOptTables = FALSE, - includeLowHierTables = TRUE, - includeTablesNotInSampHier = FALSE) + # data.table is listed in Imports and loaded via NAMESPACE; avoid require/library in package code -for (i in 1:(length(CStableNames)-1)) -{ -id_1<-paste0(CStableNames[i],"id") -id_2<-paste0(CStableNames[i+1],"id") -if (i==1) df_1<-data.frame(x[[CStableNames[i]]][,list(get(id_1))]); colnames(df_1)<-id_1 -if((CStableNames[i+1] == "SA" & addSAseqNums == TRUE) | CStableNames[i+1] %in% c("BV")){ - if(CStableNames[i+1]=="SA") {df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2), get("SAseqNum"), get("SAparSequNum"))]); colnames(df_2)<-c(id_1,id_2,"SAseqNum","SAparSequNum")} - if(CStableNames[i+1]=="BV") {df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2), get("BVfishId"))]); colnames(df_2)<-c(id_1,id_2,"BVfishId")} - } else { - df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2))]); colnames(df_2)<-c(id_1,id_2) - } + CStableNames<- getTablesInRDBESHierarchy(hierarchy = x$DE$DEhierarchy[1], + includeOptTables = FALSE, + includeLowHierTables = TRUE, + includeTablesNotInSampHier = FALSE) -if (i==1) out<-merge(df_1,df_2, all.x=T) else out<-merge(out, df_2, all.x=T) + for (i in 1:(length(CStableNames)-1)){ + cat("Processing", CStableNames[i], "table. \n") + cat("Merging", CStableNames[i], " with ", CStableNames[i+1], " tables. \n") + id_1<-paste0(CStableNames[i],"id") + id_2<-paste0(CStableNames[i+1],"id") -#colnames(out)<-c(id_1,id_2) + if(i==1){ + cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, " from ", CStableNames[i+1], "table. \n") + df_1<-data.frame(x[[CStableNames[i]]][,list(get(id_1))]); colnames(df_1)<-id_1 + } -} -# reorders -if(addSAseqNums==TRUE){ - out<-out[,c(paste0(CStableNames,"id"),"BVfishId","SAseqNum","SAparSequNum")] - } else { - out<-out[,c(paste0(CStableNames,"id"),"BVfishId")] + if(((CStableNames[i+1] == "SA" && addSAseqNums == TRUE) || CStableNames[i+1] %in% c("BV"))){ + if(CStableNames[i+1]=="SA"){ - } -out -} + cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, "SAseqNum, and SAparSequNum from ", CStableNames[i+1], "table. \n") + df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2), get("SAseqNum"), get("SAparSequNum"))]); colnames(df_2)<-c(id_1,id_2,"SAseqNum","SAparSequNum") + + } + + if(CStableNames[i+1]=="BV"){ + + cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, "and BVfishId from ", CStableNames[i+1], "table. \n") + df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2), get("BVfishId"))]); colnames(df_2)<-c(id_1,id_2,"BVfishId") + # We also need a reference reporting the SAid, for when the lower hierarchy is C, see below. + df_2C<-data.frame(x[[CStableNames[i+1]]][,list(get("SAid"), get(id_1), get(id_2),get("BVfishId"))]); colnames(df_2C)<-c("SAid", id_1,id_2,"BVfishId") + + } + + }else{ + + cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, " from ", CStableNames[i+1], "table. \n") + df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2))]); colnames(df_2)<-c(id_1,id_2) + + } + + if (i==1){ + + out<-merge(df_1,df_2, all.x=T) + + }else{ + + if(CStableNames[i+1]=="BV"){ + + # The lower hierarchy (A:D) implies that FM is used or not. + # Due to this, we need to do a conditional merging by either FMid (where present) or SAid. + outTmp = merge(out, x$SA[,c("SAid","SAlowHierarchy")]) + + # Convert to data.table if not already + data.table::setDT(outTmp) + data.table::setDT(out) + data.table::setDT(df_2) + data.table::setDT(df_2C) -# e.g., - ## default adds "SAseqNum","SAparSequNum" - #head(createTableOfRDBESIds(x = RDBESprepObj)) - ## if addSAseqNums is set to FALSE, "SAseqNum" and "SAparSequNum" are not added to output - # head(createTableOfRDBESIds(x = RDBESprepObj, addSAseqNums=FALSE)) + # Filter and get SAid groups + keepA <- outTmp[SAlowHierarchy == "A", SAid] + keepB <- outTmp[SAlowHierarchy == "B", SAid] + keepC <- outTmp[SAlowHierarchy == "C", SAid] + keepD <- outTmp[SAlowHierarchy == "D", SAid] + + # Split 'out' accordingly + toMergeA <- out[SAid %in% keepA] + toMergeB <- out[SAid %in% keepB] + toMergeC <- out[SAid %in% keepC] + toMergeD <- out[SAid %in% keepD] + + # Conditional merges + mergedA <- df_2[toMergeA, on = "FMid"] # left join equivalent (all.x = TRUE) + mergedB <- df_2[toMergeB, on = "FMid"] # same for group B + mergedC <- df_2C[toMergeC, on = "SAid", nomatch = 0][, .SD, .SDcols = c(names(toMergeC), "BVid","BVfishId")] + mergedD <- toMergeD # unchanged group D + + # Combine back + out <- data.table::rbindlist(list(mergedA, mergedB, mergedC, mergedD), use.names = TRUE, fill = TRUE) + + }else{ + + out<-merge(out, df_2, all.x=T) + + } + + } + } + + out <- as.data.frame(out) + + # reorders + if(addSAseqNums==TRUE){ + + out<-out[,c(paste0(CStableNames,"id"),"BVfishId","SAseqNum","SAparSequNum")] + + } else { + + out<-out[,c(paste0(CStableNames,"id"),"BVfishId")] + + } + + out + +} diff --git a/R/filterRDBESDataObject.R b/R/filterRDBESDataObject.R index 2c00b547..f2839b11 100644 --- a/R/filterRDBESDataObject.R +++ b/R/filterRDBESDataObject.R @@ -39,6 +39,17 @@ #' fieldsToFilter = myFields, #' valuesToFilter = myValues #' ) +#' +#' # Inverse filtering (exclude certain values) +#' # Example: keep all DE rows except those with DEid in `excludedValues` +#' # Compute the complement of the excluded set using setdiff +#' allValues <- unique(myH1RawObject$DE$DEid) +#' excludedValues <- c(5351) +#' myInverseFiltered <- filterRDBESDataObject( +#' myH1RawObject, +#' fieldsToFilter = "DEid", +#' valuesToFilter = setdiff(allValues, excludedValues) +#' ) #' } filterRDBESDataObject <- function(RDBESDataObjectToFilter, fieldsToFilter, diff --git a/R/getLowerTableSubsets.R b/R/getLowerTableSubsets.R index 830bfcae..fe169005 100644 --- a/R/getLowerTableSubsets.R +++ b/R/getLowerTableSubsets.R @@ -47,7 +47,7 @@ getLowerTableSubsets <- function(subsets, tblName, rdbesTables, combineStrata = } # Bind the data together and filter based on intersected IDs - res <- data.table::rbindlist(res) + res <- data.table::rbindlist(res, fill = TRUE) res <- res[get(paste0(tblName, "id")) %in% ids] res <- unique(res, by = paste0(tblName, "id")) diff --git a/R/importRDBESDataZIP.R b/R/importRDBESDataZIP.R index 94d66a40..b8f29b0c 100644 --- a/R/importRDBESDataZIP.R +++ b/R/importRDBESDataZIP.R @@ -73,7 +73,7 @@ importRDBESDataZIP <- function(filenames, hdirs <- dirs[grepl("H[0-9]+", dirs)] if(length(hdirs) > 1) { valid_hierarchies <- as.numeric(gsub("H", "", hdirs)) - example <- paste0("Hierachy = ", valid_hierarchies[1]) + example <- paste0("Hierarchy = ", valid_hierarchies[1]) if(is.null(Hierarchy)) { stop("The zip file contains multiple hierarchies.\n", "To import a selected hierarchy, please provide the hierarchy ", diff --git a/R/lowerTblData.R b/R/lowerTblData.R index 12fa7264..8f539bfa 100644 --- a/R/lowerTblData.R +++ b/R/lowerTblData.R @@ -11,6 +11,9 @@ #' @param tbls A named list of data frames representing the tables. #' @param level A character string specifying the name of the target lower level table. #' @param verbose A logical value indicating whether to print intermediate levels during recursion. +#' @param path_order Internal: character vector tracking the traversal path of IDs during +#' recursion to preserve a stable column order in the returned result. Users should not +#' normally set this; it is maintained by recursive calls (default NULL). #' #' @return A data frame containing the rows of the target lower level table that are associated with #' the given values of the upper table field. @@ -28,8 +31,7 @@ #' #' RDBEScore:::lowerTblData("TEid", c(4), tblsSprat, "LE", TRUE) #' @keywords internal -lowerTblData <- function(field, values, tbls, level, verbose = FALSE) { - #check if tables are of correct type +lowerTblData <- function(field, values, tbls, level, verbose = FALSE, path_order = NULL) { if(!is.list(tbls)) stop("tbls must be a list") start <- substr(field, start = 1, stop = 2) @@ -39,10 +41,9 @@ lowerTblData <- function(field, values, tbls, level, verbose = FALSE) { } currTbl <- which(start == names(tbls)) - #this assumes that the tables are in the correct order and no empty tables + # assumes tables are in correct order and no empty tables tc <- 1 nextTbl <- tbls[[currTbl + tc]] - #skip tables that are NULL while (is.null(nextTbl)) { tc <- tc + 1 if(currTbl + tc > length(tbls)) stop("No more lower tables found") @@ -59,6 +60,44 @@ lowerTblData <- function(field, values, tbls, level, verbose = FALSE) { values <- tbl[get(field) %in% values, get(curTblId)] field <- curTblId } + + # Initialize and update path order to ensure the original search field stays first + if (is.null(path_order)) path_order <- field + path_order <- unique(c(path_order, nextTblField)) + + # values in next table for recursion nextTblValues <- nextTbl[get(field) %in% values, get(nextTblField)] - lowerTblData(nextTblField, nextTblValues, tbls, level, verbose) + + # NEW: build link (current id -> next id) so we can keep intermediate IDs + linkDT <- unique(nextTbl[get(field) %in% values, + .(from = get(field), to = get(nextTblField))]) + data.table::setnames(linkDT, c("from","to"), c(field, nextTblField)) + + # recurse + res <- lowerTblData(nextTblField, nextTblValues, tbls, level, verbose, path_order) + + # NEW: merge link back so the current level's ID is preserved + # Always attach the authoritative parent id from the link. + # If a column with the same name already exists (from another variant), drop it first, + # then merge in the value derived from the current hierarchy. + if (field %in% names(res)) { + res[, (field) := NULL] + } + res <- merge( + linkDT[, c(nextTblField, field), with = FALSE], + res, + by = nextTblField, + all.y = TRUE, + allow.cartesian = TRUE + ) + + #keep path columns toward the front with stable overall path order + path_cols <- intersect(path_order, names(res)) + other_cols <- setdiff(names(res), path_cols) + if (length(path_cols) > 0) data.table::setcolorder(res, c(path_cols, other_cols)) + + # Ensure no data.table key is set on the result + data.table::setkeyv(res, NULL) + + res } diff --git a/R/utils.R b/R/utils.R index 8a1e7741..0e982554 100644 --- a/R/utils.R +++ b/R/utils.R @@ -28,7 +28,7 @@ evenMoreUsedVars <- c("Weightg", "Lengthmm", "Group", "WeightIndexSum", "WeightIndex", "H8ExampleEE1", "suLevels", "LengthTotal", "WeightMeasured", "LengthClass", "TotCount", "BVNumbersAtLength", "SAauxVarValue", "ISid", - "ISrecType", "..x","n_matches", + "ISrecType", "..x","n_matches","SAlowHierarchy", "SAparSequNum", "i.SAid", "SAparentID", "BVTotCount", "Age", "BVNumbersAtAge", "i.SAparentID", "i.subSampleLevel", "i.topLevelSAid", diff --git a/R/validateRDBESDataObjectDataTypes.R b/R/validateRDBESDataObjectDataTypes.R index 881d1071..b72e418c 100644 --- a/R/validateRDBESDataObjectDataTypes.R +++ b/R/validateRDBESDataObjectDataTypes.R @@ -61,10 +61,11 @@ validateRDBESDataObjectDataTypes <- function(objectToCheck){ names(myDiffs)[which(names(myDiffs) == "RDataType.x")] <- "RDataType_expected" names(myDiffs)[which(names(myDiffs) == "RDataType.y")] <- "RDataType_actual" - # Let's say that if we were expecting a numeric but got an intger that it's ok - myDiffs <- - myDiffs[!(myDiffs$RDataType_expected == "numeric" & - myDiffs$RDataType_actual == "integer"),] + # Treat integer and numeric as compatible in both directions (issue #251) + myDiffs <- myDiffs[! + ((myDiffs$RDataType_expected == "numeric" & myDiffs$RDataType_actual == "integer") | + (myDiffs$RDataType_expected == "integer" & myDiffs$RDataType_actual == "numeric")) + ,] # Return the differences myDiffs diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_All.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_All.R new file mode 100644 index 00000000..b9e98c61 --- /dev/null +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_All.R @@ -0,0 +1,11 @@ +source("BuildTextBookDataUpload_Pckg_SDAResources_agsrs_H1.R") +source("BuildTextBookDataUpload_Pckg_SDAResources_agstrat_H1.R") +source("BuildTextBookDataUpload_Pckg_SDAResources_algebra_H1.R") +source("BuildTextBookDataUpload_Pckg_SDAResources_coots_H1.R") +source("BuildTextBookDataUpload_Pckg_SDAResources_coots_multistage_H1.R") +source("BuildTextBookDataUpload_Pckg_SDAResources_gpa_H1.R") +source("BuildTextBookDataUpload_Pckg_SDAResources_schools_H1.R") +source("BuildTextBookDataUpload_Pckg_survey_apiclus1_v2_H1.R") +source("BuildTextBookDataUpload_Pckg_survey_apiclus2_H1.R") +source("BuildTextBookDataUpload_Pckg_survey_apiclus2_v2_H1.R") +source("BuildTextBookDataUpload_Pckg_survey_apistrat_H1.R") diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_agsrs_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_agsrs_H1.R index f464573f..2999be55 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_agsrs_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_agsrs_H1.R @@ -38,6 +38,7 @@ baseDir <- "" VD_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/VD_base.rds")) SL_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/SL_base.rds")) + IS_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/IS_base.rds")) #nameof the directory where the outputs are saved currently @@ -62,22 +63,6 @@ -# 1 DEid [] - int -# 2 DErecordType [M] - string -# 3 DEsamplingScheme [M] - SamplingScheme -# 4 DEsamplingSchemeType [M] - SamplingSchemeType -# 5 DEyear [M] - Year -# 6 DEstratumName [M] - StringLength100 -# 7 DEhierarchyCorrect [M] - YesNoFields -# 8 DEhierarchy [M] - RDBESUpperHierarchy -# 9 DEsampled [DV,M] - YesNoFields -# 10 DEreasonNotSampled [DV,O] - ReasonForNotSampling -# 11 DEnonResponseCollected [DV,O] - YesNoFields -# 12 DEauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 13 DEauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 14 DEauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 15 DEauxiliaryVariableUnit[DV,O]-MUNIT - DE_df_base<-expand.grid(DEyear=DEyear, DEstratumName="U",stringsAsFactors=F) @@ -103,13 +88,6 @@ DE_df<-data.frame( #===SD============ - # x -# 1 SDid [M] - int -# 2 DEid [M] - int -# 3 SDrecordType [M] - string -# 4 SDcountry [M] - ISO_3166 -# 5 SDinstitution [M] - EDMO - SD_df<-data.frame( SDid=DE_df$DEid, DEid=DE_df$DEid, @@ -121,41 +99,6 @@ SD_df<-data.frame( #===VS============ - - # x -# 1 VSid [M] - int -# 2 SDid [M] - int -# 3 VDid [M] - int -# 4 TEid [M/O] - int -# 5 VSrecordType [M] - string -# 6 VSsequenceNumber [M] - int -# 7 VSencryptedVesselCode [M] - StringLength100 -# 8 VSstratification [DV,M] - YesNoFields -# 9 VSstratumName [DV,M] - StringLength100 -# 10 VSclustering [DV,M] - Clustering -# 11 VSclusterName [DV,M] - StringLength100 -# 12 VSsampler [O] - Sampler -# 13 VSnumberTotal [DV,O] - int -# 14 VSnumberSampled [DV,O] - int -# 15 VSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 16 VSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 17 VSselectionMethod [DV,M] - SelectionMethod -# 18 VSunitName [DV,M] - StringLength100 -# 19 VSselectionMethodCluster [DV,O] - SelectionMethod -# 20 VSnumberTotalClusters [DV,O] - int -# 21 VSnumberSampledClusters [DV,O] - int -# 22 VSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 23 VSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 24 VSsampled [DV,M] - YesNoFields -# 25 VSreasonNotSampled [DV,O] - ReasonForNotSampling -# 26 VSnonResponseCollected [DV,O] - YesNoFields -# 27 VSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 28 VSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 29 VSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 30 VSauxiliaryVariableUnit [DV,O] - MUNIT - -#check_All_fields("VS") - # adds VSid to dataset dataset$VSid <- 1:nrow(dataset) @@ -210,49 +153,6 @@ VS_df$VSnumberTotal<-3078 #====FT=========== - -# 1 FTid [M] - int -# 2 OSid [M/O] - int -# 3 VSid [M/O] - int -# 4 VDid [M] - int -# 5 SDid [M/O] - int -# 6 FOid [M/O] - int -# 7 TEid [M/O] - int -# 8 FTrecordType [M] - string -# 9 FTencryptedVesselCode [M] - StringLength100 -# 10 FTsequenceNumber [M] - int -# 11 FTstratification [DV,M] - YesNoFields -# 12 FTstratumName [DV,M] - StringLength100 -# 13 FTclustering [DV,M] - Clustering -# 14 FTclusterName [DV,M] - StringLength100 -# 15 FTsampler [O] - Sampler -# 16 FTsamplingType [M] - SamplingContext -# 17 FTnumberOfHaulsOrSets [M/O] - IntZeroToMax -# 18 FTdepartureLocation [M/O] - Harbour_LOCODE -# 19 FTdepartureDate [M/O] - Date -# 20 FTdepartureTime [M/O] - StringLength60 -# 21 FTarrivalLocation [M] - Harbour_LOCODE -# 22 FTarrivalDate [M] - Date -# 23 FTarrivalTime [M/O] - StringLength60 -# 24 FTnumberTotal [DV,O] - int -# 25 FTnumberSampled [DV,O] - int -# 26 FTselectionProb [DV,O] - Decimal0-1 -# 27 FTinclusionProb [DV,O] - Decimal0-1 -# 28 FTselectionMethod [DV,M] - SelectionMethod -# 29 FTunitName [DV,M] - StringLength100 -# 30 FTselectionMethodCluster [DV,O] - SelectionMethod -# 31 FTnumberTotalClusters [DV,O] - int -# 32 FTnumberSampledClusters [DV,O] - int -# 33 FTselectionProbCluster [DV,O] - Decimal0-1 -# 34 FTinclusionProbCluster [DV,O] - Decimal0-1 -# 35 FTsampled [DV,M] - YesNoFields -# 36 FTreasonNotSampled [DV,O] - ReasonForNotSampling -# 37 FTnonResponseCollected [DV,O] - YesNoFields -# 38 FTauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 39 FTauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 40 FTauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 41 FTauxiliaryVariableUnit [DV,O] - MUNIT - FT_df <- data.frame( FTid = dataset$VSid,#[M] - int OSid = "",# [M/O] - int @@ -277,6 +177,7 @@ FT_df <- data.frame( FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE FTarrivalDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date FTarrivalTime="", #[O] - time + FTdominantLandingDate= "", FTnumberTotal= 1, #[DV,O] - int FTnumberSampled=1, #[DV,O] - int FTselectionProb=1, #[DV,O] - DecimalPrec10 @@ -302,72 +203,6 @@ FT_df <- data.frame( #====FO=========== -# 1 FOid [M] - int -# 2 FTid [M/O] - int -# 3 SDid [M/O] - int -# 4 FOrecordType [M] - string -# 5 FOstratification [DV,M] - YesNoFields -# 6 FOsequenceNumber [M] - int -# 7 FOstratumName [DV,M] - StringLength100 -# 8 FOclustering [DV,M] - Clustering -# 9 FOclusterName [DV,M] - StringLength100 -# 10 FOsampler [O] - Sampler -# 11 FOaggregationLevel [M] - AggregationLevel -# 12 FOvalidity [M] - ValidityFlag -# 13 FOcatchReg [M] - CatchRegistration -# 14 FOstartDate [O] - Date -# 15 FOstartTime [O] - Time -# 16 FOendDate [M] - Date -# 17 FOendTime [M/O] - Time -# 18 FOduration [M/O] - int -# 19 FOdurationSource [M] - DurationSource -# 20 FOhandlingTime [O] - int -# 21 FOstartLat [O] - Decimal-90.000000-90.000000 -# 22 FOstartLon [O] - Decimal-180.000000-180.000000 -# 23 FOstopLat [O] - Decimal-90.000000-90.000000 -# 24 FOstopLon [O] - Decimal-180.000000-180.000000 -# 25 FOexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 26 FOarea [M] - ICES_Area -# 27 FOrectangle [O] - StatRec -# 28 FOfisheriesManagementUnit [O] - AreaNonFAO -# 29 FOgsaSubarea [M] - Areas_GFCM_GSA -# 30 FOjurisdictionArea [O] - JurisdictionArea -# 31 FOfishingDepth [O] - int -# 32 FOwaterDepth [O] - int -# 33 FOnationalFishingActivity [O] - NationalFishingActivity -# 34 FOmetier5 [O] - Metier5_FishingActivity -# 35 FOmetier6 [M] - Metier6_FishingActivity -# 36 FOgear [M] - GearType -# 37 FOmeshSize [O] - int -# 38 FOselectionDevice [O] - SelectionDevice -# 39 FOselectionDeviceMeshSize [O] - int -# 40 FOtargetSpecies [O] - TargetSpecies -# 41 FOincidentalByCatchMitigationDeviceFirst [M] - BycatchMitigationDevice -# 42 FOincidentalByCatchMitigationDeviceTargetFirst [M] - BycatchMitigationDeviceTarget -# 43 FOincidentalByCatchMitigationDeviceSecond [M] - BycatchMitigationDevice -# 44 FOincidentalByCatchMitigationDeviceTargetSecond [M] - BycatchMitigationDeviceTarget -# 45 FOgearDimensions [O] - int -# 46 FOobservationCode [M] - ObservationCode -# 47 FOnumberTotal [DV,O] - int -# 48 FOnumberSampled [DV,O] - int -# 49 FOselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 50 FOinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 51 FOselectionMethod [DV,M] - SelectionMethod -# 52 FOunitName [DV,M] - StringLength100 -# 53 FOselectionMethodCluster [DV,O] - SelectionMethod -# 54 FOnumberTotalClusters [DV,O] - int -# 55 FOnumberSampledClusters [DV,O] - int -# 56 FOselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 57 FOinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 58 FOsampled [DV,M] - YesNoFields -# 59 FOreasonNotSampled [DV,O] - ReasonForNotSampling -# 60 FOnonResponseCollected [DV,O] - YesNoFields -# 61 FOauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 62 FOauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 63 FOauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 64 FOauxiliaryVariableUnit [DV,O] - MUNIT - - FO_df <- data.frame( FOid = dataset$VSid, FTid = dataset$VSid, @@ -387,24 +222,29 @@ FO_df <- data.frame( FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M FOendTime ="",#M FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! + FOfishingDurationDataBasis = "Measured", FOdurationSource = "Crew", FOhandlingTime = "", FOstartLat="", # ATT! FOstartLon="", # ATT! FOstopLat="", FOstopLon="", - FOexclusiveEconomicZoneIndicator = "", # might differ!! + FOexclusiveEconomicZoneIndicator = "", # FOarea = "27.3.a.21", #M FOrectangle = "", FOfisheriesManagementUnit = "", FOgsaSubarea = "NotApplicable", #M FOjurisdictionArea = "", + FOgeographicalDataBasis = "Measured", + FOgeographicalSource = "Crew", FOfishingDepth = "", FOwaterDepth = "", FOnationalFishingActivity = "", FOmetier5 = "", FOmetier6 = "OTT_CRU_70-89_2_35",#M FOgear = "OTT",#M + FOgearDataBasis = "Measured", + FOgearSource = "", FOmeshSize = "", FOselectionDevice = "", FOselectionDeviceMeshSize = "", @@ -439,45 +279,6 @@ stringsAsFactors=FALSE #===SS============ -# 1 SSid [] - int -# 2 LEid [M/O] - int -# 3 FOid [M/O] - int -# 4 FTid [M/O] - int -# 5 OSid [M/O] - int -# 6 TEid [M/O] - int -# 7 SLid [M] - int -# 8 SSrecordType [M] - string -# 9 SSsequenceNumber [M] - int -# 10 SSstratification [DV,M] - YesNoFields -# 11 SSstratumName [DV,M] - StringLength100 -# 12 SSclustering [DV,M] - Clustering -# 13 SSclusterName [DV,M] - StringLength100 -# 14 SSobservationActivityType [DV,M] - ObservationActivityType -# 15 SScatchFraction [M] - CatchFraction -# 16 SSobservationType [DV,M] - ObservationMethod -# 17 SSsampler [O] - Sampler -# 18 SSspeciesListName [M] - StringLength100 -# 19 SSuseForCalculateZero [M] - YesNoFields -# 20 SStimeTotal [O] - int -# 21 SStimeSampled [O] - int -# 22 SSnumberTotal [DV,O] - int -# 23 SSnumberSampled [DV,O] - int -# 24 SSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 25 SSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 26 SSselectionMethod [DV,M] - SelectionMethod -# 27 SSunitName [DV,M] - StringLength100 -# 28 SSselectionMethodCluster [DV,O] - SelectionMethod -# 29 SSnumberTotalClusters [DV,O] - int -# 30 SSnumberSampledClusters [DV,O] - int -# 31 SSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 32 SSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 33 SSsampled [DV,M] - YesNoFields -# 34 SSreasonNotSampled [DV,O] - ReasonForNotSampling -# 35 SSnonResponseCollected [DV,O] - YesNoFields -# 36 SSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 37 SSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 38 SSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 39 SSauxiliaryVariableUnit [DV,O] - MUNIT SS_df<-data.frame( SSid = dataset$VSid, @@ -500,8 +301,10 @@ SS_df<-data.frame( SSspeciesListName = project_name_outputs, #M SSuseForCalculateZero = "N", #M SStimeTotal = "", + SStimeTotalDataBasis = "Measured", SStimeSampled = "", SSnumberTotal = 1, + SSnumberTotalDataBasis = "Measured", SSnumberSampled = 1, SSselectionProb = 1, SSinclusionProb = 1, @@ -524,117 +327,66 @@ SS_df<-data.frame( #====SA=========== - -# 1 SAid [] - int -# 2 SSid [] - int -# 3 SArecordType [M] - string -# 4 SAsequenceNumber [M] - int -# 5 SAparentSequenceNumber [O] - int -# 6 SAstratification [DV,M] - YesNoFields -# 7 SAstratumName [DV,M] - StringLength100 -# 8 SAspeciesCode [M] - SpecWoRMS -# 9 SAspeciesCodeFAO [O] - SpecASFIS -# 10 SAstateOfProcessing [M] - StateOfProcessing -# 11 SApresentation [M] - ProductPresentation -# 12 SAspecimensState [M] - SpecimensState -# 13 SAcatchCategory [M] - CatchCategory -# 14 SAlandingCategory [O] - LandingCategory -# 15 SAcommSizeCatScale [O] - CommercialSizeCategoryScale -# 16 SAcommSizeCat [O] - CommercialSizeCategory -# 17 SAsex [M] - SEXCO -# 18 SAexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 19 SAarea [O] - ICES_Area -# 20 SArectangle [O] - StatRec -# 21 SAfisheriesManagementUnit [O] - AreaNonFAO -# 22 SAgsaSubarea [M] - Areas_GFCM_GSA -# 23 SAjurisdictionArea [O] - JurisdictionArea -# 24 SAnationalFishingActivity [O] - NationalFishingActivity -# 25 SAmetier5 [O] - Metier5_FishingActivity -# 26 SAmetier6 [O] - Metier6_FishingActivity -# 27 SAgear [O] - GearType -# 28 SAmeshSize [O] - int -# 29 SAselectionDevice [O] - SelectionDevice -# 30 SAselectionDeviceMeshSize [O] - int -# 31 SAunitType [M] - SamplingUnit -# 32 SAtotalWeightLive [O] - int -# 33 SAsampleWeightLive [O] - int -# 34 SAnumberTotal [DV,O] - Decimal0.1-999999999 -# 35 SAnumberSampled [DV,O] - Decimal0.1-999999999 -# 36 SAselectionProb [DV,O] - Decimal0-1 -# 37 SAinclusionProb [DV,O] - Decimal0-1 -# 38 SAselectionMethod [DV,M] - SelectionMethod -# 39 SAunitName [DV,M] - StringLength100 -# 40 SAlowerHierarchy [M/O] - LowerHierarchy -# 41 SAsampler [O] - Sampler -# 42 SAsampled [DV,M] - YesNoFields -# 43 SAreasonNotSampled [DV,O] - ReasonForNotSampling -# 44 SAnonResponseCollected [DV,O] - YesNoFields -# 45 SAreasonNotSampledFM [DV,O] - ReasonForNotSampling -# 46 SAreasonNotSampledBV [DV,O] - ReasonForNotSampling -# 47 SAtotalWeightMeasured [O] - int -# 48 SAsampleWeightMeasured [O] - int -# 49 SAconversionFactorMeasLive [O] - Decimal0.900-10 -# 50 SAauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 51 SAauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 52 SAauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 53 SAauxiliaryVariableUnit [DV,O] - MUNIT - - SA_df<-data.frame( - SAid = dataset$VSid, - SSid = dataset$VSid, - SArecordType = "SA", #M - SAsequenceNumber = dataset$VSid, #M - SAparentSequenceNumber = "", - SAstratification = "N", #M - SAstratumName = "U", #M - SAspeciesCode = 107254, #M - SAspeciesCodeFAO = "NEP", - SAstateOfProcessing = "FRE", #M - SApresentation = "WHL", #M - SAspecimensState = "DeadOrZeroProbSurvival", #M - SAcatchCategory = "Lan", #M - SAlandingCategory = "", - SAcommSizeCatScale = "", - SAcommSizeCat = "", - SAsex = "U", #M - SAexclusiveEconomicZoneIndicator = "", - SAarea = ifelse(dataset$farmcat=='large','27.4.a','27.3.a.20'), # this is the domain - SArectangle = "", - SAfisheriesManagementUnit = "", - SAgsaSubarea = "NotApplicable", #M - SAjurisdictionArea = "", - SAnationalFishingActivity = "", - SAmetier5 = "", - SAmetier6 = "", - SAgear = "", - SAmeshSize = "", - SAselectionDevice = "", - SAselectionDeviceMeshSize = "", - SAunitType = "Box", #M - SAtotalWeightLive = "", - SAsampleWeightLive = "", - SAnumberTotal = 1, - SAnumberSampled = 1, - SAselectionProb = 1, - SAinclusionProb = 1, - SAselectionMethod = "CENSUS", #M - SAunitName = dataset$VSid, #M - SAlowerHierarchy = "D", - SAsampler = "Observer", - SAsampled = "N", #M - SAreasonNotSampled = "", - SAnonResponseCollected = "N", - SAreasonNotSampledFM = "", - SAreasonNotSampledBV = "", - SAtotalWeightMeasured = dataset[[target_var]], - SAsampleWeightMeasured = dataset[[target_var]], - SAconversionFactorMeasLive = 1, - SAauxiliaryVariableTotal = "", - SAauxiliaryVariableValue = "", - SAauxiliaryVariableName = "", - SAauxiliaryVariableUnit = "", - stringsAsFactors=FALSE +SAid = dataset$VSid, +SSid = dataset$VSid, +SArecordType = "SA", #M +SAsequenceNumber = dataset$VSid, #M +SAparentSequenceNumber = "", +SAstratification = "N", #M +SAstratumName = "U", #M +SAspeciesCode = 107254, #M +SAspeciesCodeFAO = "NEP", +SAstateOfProcessing = "FRE", #M +SApresentation = "WHL", #M +SAspecimensState = "DeadOrZeroProbSurvival", #M +SAcatchCategory = "Lan", #M +SAlandingCategory = "", +SAcommSizeCatScale = "", +SAcommSizeCat = "", +SAsex = "U", #M +SAexclusiveEconomicZoneIndicator = "", +SAarea = ifelse(dataset$farmcat=='large','27.4.a','27.3.a.20'), # this is the domain +SArectangle = "", +SAfisheriesManagementUnit = "", +SAgsaSubarea = "NotApplicable", #M +SAjurisdictionArea = "", +SAgeographicalDataBasis = "Measured", +SAgeographicalSource = "", +SAnationalFishingActivity = "", +SAmetier5 = "", +SAmetier6 = "", +SAgear = "", +SAgearDataBasis = "Measured", +SAgearSource = "", +SAmeshSize = "", +SAselectionDevice = "", +SAselectionDeviceMeshSize = "", +SAunitType = "Box", #M +SAtotalWeightLive = "", +SAsampleWeightLive = "", +SAnumberTotal = 1, +SAnumberSampled = 1, +SAselectionProb = 1, +SAinclusionProb = 1, +SAselectionMethod = "CENSUS", #M +SAunitName = dataset$VSid, #M +SAlowerHierarchy = "D", +SAsampler = "Observer", +SAsampled = "N", #M +SAreasonNotSampled = "", +SAnonResponseCollected = "N", +SAreasonNotSampledFM = "", +SAreasonNotSampledBV = "", +SAtotalWeightMeasured = dataset[[target_var]], +SAtotalWeightMeasuredDataBasis = "Measured", +SAsampleWeightMeasured = dataset[[target_var]], +SAconversionFactorMeasLive = 1, +SAauxiliaryVariableTotal = "", +SAauxiliaryVariableValue = "", +SAauxiliaryVariableName = "", +SAauxiliaryVariableUnit = "", +stringsAsFactors=FALSE ) @@ -687,15 +439,14 @@ RDBESlist[[i]][which(grepl(colnames(RDBESlist[[i]]),pat="[A-Z]id"))]<-NULL #===Save============ - dir_outputs<-paste0(base_dir_outputs,"/", - project_name_outputs,"/") - dir.create(dir_outputs, recursive=T, showWarnings=FALSE) - filename_output_CS <- paste0("H1.csv") - filename_output_SL <- paste0("HSL.csv") - filename_output_VD <- paste0("HVD.csv") + dir_outputs<-paste0(base_dir_outputs,"/", project_name_outputs,"/") + dir.create(dir_outputs, recursive=T, showWarnings=FALSE) + filename_output_CS <- paste0(dir_outputs,"H1.csv") + filename_output_SL <- paste0(dir_outputs,"HSL.csv") + filename_output_VD <- paste0(dir_outputs,"HVD.csv") -lapply(RDBESlist, function(x, filename1 = paste0(dir_outputs,filename_output_CS)){ +lapply(RDBESlist, function(x, filename1 = filename_output_CS){ if("DErecordType" %in% colnames(x)){ write.table(x, file = filename1, append = FALSE, quote = FALSE, sep = ",", eol = "\n", na = "NA", dec = ".", row.names = FALSE, @@ -707,7 +458,7 @@ write.table(x, file = filename1, append = TRUE, quote = FALSE, sep = ",", } }) -b<-read.table(file=paste0(dir_outputs,filename_output_CS), header=F, sep=";") +b<-read.table(file = filename_output_CS, header=F, sep=";") b<-cbind(key,b) b<-b[order(as.character(b$key), decreasing=FALSE),] b<-b[!is.na(key),] @@ -715,24 +466,30 @@ b$key<-NULL b$V1<-as.character(b$V1) # saves CS output -write.table(b$V1, file=paste0(dir_outputs,filename_output_CS), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +write.table(b$V1, file = filename_output_CS, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") -# -----Builds and saves dummySL----------------- +# -----Builds and saves dummySL and dummyIS----------------- SL_base$SLspeciesListName<-project_name_outputs SL_base$SLyear<-DEyear SL_base$SLinstitute<-SDinstitution -# saves SL output - write.table(SL_base, file=paste0(dir_outputs,filename_output_SL), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - + + # saves SL output + write.table(rbind(paste(SL_base, collapse=","), paste(IS_base, collapse=",")), file=filename_output_SL, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") # -----Builds and saves dummyVD----------------- - # saves VD output VD_base$VDyear<-DEyear - write.table(VD_base, file=paste0(dir_outputs,filename_output_VD), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + write.table(VD_base, file=filename_output_VD, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + + + + + +# check - is this needed? +if(1==2){ @@ -753,5 +510,5 @@ write.csv(tmp, file=paste0(dir_outputs,"tmp/SpeciesList.csv"), quote=F, row.name zip::zip(zipfile = paste0(project_name_outputs,".zip"), files =paste0(dir_outputs,"tmp/",dir(paste0(dir_outputs,"tmp"))), mode="cherry-pick") unlink(paste0(dir_outputs,"tmp"), force=T, recursive = T) - +} diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_agstrat_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_agstrat_H1.R index 64872c3a..dba43362 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_agstrat_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_agstrat_H1.R @@ -32,7 +32,7 @@ DEstratumName <- "Pckg_SDAResources_agstrat_H1" project_name_outputs <- gsub(" ","_", paste0(DEsamplingScheme,"_", DEstratumName)) baseDir <- "./data-raw/exampleData/TextBookExamples/" - #baseDir <- "" + baseDir <- "" VD_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/VD_base.rds")) SL_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/SL_base.rds")) IS_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/IS_base.rds")) @@ -59,22 +59,6 @@ -# 1 DEid [] - int -# 2 DErecordType [M] - string -# 3 DEsamplingScheme [M] - SamplingScheme -# 4 DEsamplingSchemeType [M] - SamplingSchemeType -# 5 DEyear [M] - Year -# 6 DEstratumName [M] - StringLength100 -# 7 DEhierarchyCorrect [M] - YesNoFields -# 8 DEhierarchy [M] - RDBESUpperHierarchy -# 9 DEsampled [DV,M] - YesNoFields -# 10 DEreasonNotSampled [DV,O] - ReasonForNotSampling -# 11 DEnonResponseCollected [DV,O] - YesNoFields -# 12 DEauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 13 DEauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 14 DEauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 15 DEauxiliaryVariableUnit[DV,O]-MUNIT - DE_df_base<-expand.grid(DEyear=DEyear, DEstratumName="U",stringsAsFactors=F) @@ -100,13 +84,6 @@ DE_df<-data.frame( #===SD============ - # x -# 1 SDid [M] - int -# 2 DEid [M] - int -# 3 SDrecordType [M] - string -# 4 SDcountry [M] - ISO_3166 -# 5 SDinstitution [M] - EDMO - SD_df<-data.frame( SDid=DE_df$DEid, DEid=DE_df$DEid, @@ -118,41 +95,6 @@ SD_df<-data.frame( #===VS============ - - # x -# 1 VSid [M] - int -# 2 SDid [M] - int -# 3 VDid [M] - int -# 4 TEid [M/O] - int -# 5 VSrecordType [M] - string -# 6 VSsequenceNumber [M] - int -# 7 VSencryptedVesselCode [M] - StringLength100 -# 8 VSstratification [DV,M] - YesNoFields -# 9 VSstratumName [DV,M] - StringLength100 -# 10 VSclustering [DV,M] - Clustering -# 11 VSclusterName [DV,M] - StringLength100 -# 12 VSsampler [O] - Sampler -# 13 VSnumberTotal [DV,O] - int -# 14 VSnumberSampled [DV,O] - int -# 15 VSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 16 VSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 17 VSselectionMethod [DV,M] - SelectionMethod -# 18 VSunitName [DV,M] - StringLength100 -# 19 VSselectionMethodCluster [DV,O] - SelectionMethod -# 20 VSnumberTotalClusters [DV,O] - int -# 21 VSnumberSampledClusters [DV,O] - int -# 22 VSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 23 VSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 24 VSsampled [DV,M] - YesNoFields -# 25 VSreasonNotSampled [DV,O] - ReasonForNotSampling -# 26 VSnonResponseCollected [DV,O] - YesNoFields -# 27 VSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 28 VSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 29 VSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 30 VSauxiliaryVariableUnit [DV,O] - MUNIT - -#check_All_fields("VS") - # adds VSid to dataset dataset$VSid <- 1:nrow(dataset) @@ -207,164 +149,56 @@ VS_df$VSnumberTotal<-strataSize[VS_df$VSstratumName] #====FT=========== - -# 1 FTid [M] - int -# 2 OSid [M/O] - int -# 3 VSid [M/O] - int -# 4 VDid [M] - int -# 5 SDid [M/O] - int -# 6 FOid [M/O] - int -# 7 TEid [M/O] - int -# 8 FTrecordType [M] - string -# 9 FTencryptedVesselCode [M] - StringLength100 -# 10 FTsequenceNumber [M] - int -# 11 FTstratification [DV,M] - YesNoFields -# 12 FTstratumName [DV,M] - StringLength100 -# 13 FTclustering [DV,M] - Clustering -# 14 FTclusterName [DV,M] - StringLength100 -# 15 FTsampler [O] - Sampler -# 16 FTsamplingType [M] - SamplingContext -# 17 FTnumberOfHaulsOrSets [M/O] - IntZeroToMax -# 18 FTdepartureLocation [M/O] - Harbour_LOCODE -# 19 FTdepartureDate [M/O] - Date -# 20 FTdepartureTime [M/O] - StringLength60 -# 21 FTarrivalLocation [M] - Harbour_LOCODE -# 22 FTarrivalDate [M] - Date -# 23 FTarrivalTime [M/O] - StringLength60 -# 24 FTnumberTotal [DV,O] - int -# 25 FTnumberSampled [DV,O] - int -# 26 FTselectionProb [DV,O] - Decimal0-1 -# 27 FTinclusionProb [DV,O] - Decimal0-1 -# 28 FTselectionMethod [DV,M] - SelectionMethod -# 29 FTunitName [DV,M] - StringLength100 -# 30 FTselectionMethodCluster [DV,O] - SelectionMethod -# 31 FTnumberTotalClusters [DV,O] - int -# 32 FTnumberSampledClusters [DV,O] - int -# 33 FTselectionProbCluster [DV,O] - Decimal0-1 -# 34 FTinclusionProbCluster [DV,O] - Decimal0-1 -# 35 FTsampled [DV,M] - YesNoFields -# 36 FTreasonNotSampled [DV,O] - ReasonForNotSampling -# 37 FTnonResponseCollected [DV,O] - YesNoFields -# 38 FTauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 39 FTauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 40 FTauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 41 FTauxiliaryVariableUnit [DV,O] - MUNIT - FT_df <- data.frame( - FTid = dataset$VSid,#[M] - int - OSid = "",# [M/O] - int - VSid = dataset$VSid, #[M/O] - VDid = "", #[M] - int - SDid = "", #[M/O] - int - FOid = "", #[M/O] - int - TEid = "", #[M/O] - int - FTrecordType='FT', #[M] - string - FTencryptedVesselCode = dataset$VSencryptedVesselCode, #[M] - FTsequenceNumber = as.integer(1:nrow(dataset)), #[M] - string - FTstratification = "N", #[DV,M] - RS_Stratification - FTstratumName = "U", #[DV,M] - string - FTclustering = "N", #[DV,M] - RS_Clustering - FTclusterName = "U", #[DV,M] - string - FTsampler = "Observer", #[M] - RS_Sampler - FTsamplingType = "AtSea" , #[M] - RS_SamplingType - FTnumberOfHaulsOrSets = 1, #[O] - int - FTdepartureLocation="ZWHWN", #[O] - Harbour_LOCODE - FTdepartureDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M/O] - date - FTdepartureTime="", #[O] - time - FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE - FTarrivalDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date - FTarrivalTime="", #[O] - time - FTnumberTotal= 1, #[DV,O] - int - FTnumberSampled=1, #[DV,O] - int - FTselectionProb=1, #[DV,O] - DecimalPrec10 - FTinclusionProb=1, #[DV,O] - DecimalPrec10 - FTselectionMethod="CENSUS", #[DV,M] - RS_SelectionMethod - FTunitName = dataset$VSid, #[DV,M] - string - FTselectionMethodCluster="", #[DV,O] - RS_SelectionMethod - FTnumberTotalClusters="", #[DV,O] - int - FTnumberSampledClusters="", #[DV,O] - int - FTselectionProbCluster="", #[DV,O] - DecimalPrec10 - FTinclusionProbCluster="", #[DV,O] - DecimalPrec10 - FTsampled="Y", #[DV,M] - YesNoFields - FTreasonNotSampled= "", #[DV,O] - RS_ReasonForNotSampling - FTnonResponseCollected = "N", - FTauxiliaryVariableTotal = "", - FTauxiliaryVariableValue = "", - FTauxiliaryVariableName = "", - FTauxiliaryVariableUnit = "", - stringsAsFactors=FALSE +FTid = dataset$VSid,#[M] - int +OSid = "",# [M/O] - int +VSid = dataset$VSid, #[M/O] +VDid = "", #[M] - int +SDid = "", #[M/O] - int +FOid = "", #[M/O] - int +TEid = "", #[M/O] - int +FTrecordType='FT', #[M] - string +FTencryptedVesselCode = dataset$VSencryptedVesselCode, #[M] +FTsequenceNumber = as.integer(1:nrow(dataset)), #[M] - string +FTstratification = "N", #[DV,M] - RS_Stratification +FTstratumName = "U", #[DV,M] - string +FTclustering = "N", #[DV,M] - RS_Clustering +FTclusterName = "U", #[DV,M] - string +FTsampler = "Observer", #[M] - RS_Sampler +FTsamplingType = "AtSea" , #[M] - RS_SamplingType +FTnumberOfHaulsOrSets = 1, #[O] - int +FTdepartureLocation="ZWHWN", #[O] - Harbour_LOCODE +FTdepartureDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M/O] - date +FTdepartureTime="", #[O] - time +FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE +FTarrivalDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date +FTarrivalTime="", #[O] - time +FTdominantLandingDate= "", +FTnumberTotal= 1, #[DV,O] - int +FTnumberSampled=1, #[DV,O] - int +FTselectionProb=1, #[DV,O] - DecimalPrec10 +FTinclusionProb=1, #[DV,O] - DecimalPrec10 +FTselectionMethod="CENSUS", #[DV,M] - RS_SelectionMethod +FTunitName = dataset$VSid, #[DV,M] - string +FTselectionMethodCluster="", #[DV,O] - RS_SelectionMethod +FTnumberTotalClusters="", #[DV,O] - int +FTnumberSampledClusters="", #[DV,O] - int +FTselectionProbCluster="", #[DV,O] - DecimalPrec10 +FTinclusionProbCluster="", #[DV,O] - DecimalPrec10 +FTsampled="Y", #[DV,M] - YesNoFields +FTreasonNotSampled= "", #[DV,O] - RS_ReasonForNotSampling +FTnonResponseCollected = "N", +FTauxiliaryVariableTotal = "", +FTauxiliaryVariableValue = "", +FTauxiliaryVariableName = "", +FTauxiliaryVariableUnit = "", +stringsAsFactors=FALSE ) #====FO=========== -# 1 FOid [M] - int -# 2 FTid [M/O] - int -# 3 SDid [M/O] - int -# 4 FOrecordType [M] - string -# 5 FOstratification [DV,M] - YesNoFields -# 6 FOsequenceNumber [M] - int -# 7 FOstratumName [DV,M] - StringLength100 -# 8 FOclustering [DV,M] - Clustering -# 9 FOclusterName [DV,M] - StringLength100 -# 10 FOsampler [O] - Sampler -# 11 FOaggregationLevel [M] - AggregationLevel -# 12 FOvalidity [M] - ValidityFlag -# 13 FOcatchReg [M] - CatchRegistration -# 14 FOstartDate [O] - Date -# 15 FOstartTime [O] - Time -# 16 FOendDate [M] - Date -# 17 FOendTime [M/O] - Time -# 18 FOduration [M/O] - int -# 19 FOdurationSource [M] - DurationSource -# 20 FOhandlingTime [O] - int -# 21 FOstartLat [O] - Decimal-90.000000-90.000000 -# 22 FOstartLon [O] - Decimal-180.000000-180.000000 -# 23 FOstopLat [O] - Decimal-90.000000-90.000000 -# 24 FOstopLon [O] - Decimal-180.000000-180.000000 -# 25 FOexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 26 FOarea [M] - ICES_Area -# 27 FOrectangle [O] - StatRec -# 28 FOfisheriesManagementUnit [O] - AreaNonFAO -# 29 FOgsaSubarea [M] - Areas_GFCM_GSA -# 30 FOjurisdictionArea [O] - JurisdictionArea -# 31 FOfishingDepth [O] - int -# 32 FOwaterDepth [O] - int -# 33 FOnationalFishingActivity [O] - NationalFishingActivity -# 34 FOmetier5 [O] - Metier5_FishingActivity -# 35 FOmetier6 [M] - Metier6_FishingActivity -# 36 FOgear [M] - GearType -# 37 FOmeshSize [O] - int -# 38 FOselectionDevice [O] - SelectionDevice -# 39 FOselectionDeviceMeshSize [O] - int -# 40 FOtargetSpecies [O] - TargetSpecies -# 41 FOincidentalByCatchMitigationDeviceFirst [M] - BycatchMitigationDevice -# 42 FOincidentalByCatchMitigationDeviceTargetFirst [M] - BycatchMitigationDeviceTarget -# 43 FOincidentalByCatchMitigationDeviceSecond [M] - BycatchMitigationDevice -# 44 FOincidentalByCatchMitigationDeviceTargetSecond [M] - BycatchMitigationDeviceTarget -# 45 FOgearDimensions [O] - int -# 46 FOobservationCode [M] - ObservationCode -# 47 FOnumberTotal [DV,O] - int -# 48 FOnumberSampled [DV,O] - int -# 49 FOselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 50 FOinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 51 FOselectionMethod [DV,M] - SelectionMethod -# 52 FOunitName [DV,M] - StringLength100 -# 53 FOselectionMethodCluster [DV,O] - SelectionMethod -# 54 FOnumberTotalClusters [DV,O] - int -# 55 FOnumberSampledClusters [DV,O] - int -# 56 FOselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 57 FOinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 58 FOsampled [DV,M] - YesNoFields -# 59 FOreasonNotSampled [DV,O] - ReasonForNotSampling -# 60 FOnonResponseCollected [DV,O] - YesNoFields -# 61 FOauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 62 FOauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 63 FOauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 64 FOauxiliaryVariableUnit [DV,O] - MUNIT - - FO_df <- data.frame( FOid = dataset$VSid, FTid = dataset$VSid, @@ -384,24 +218,29 @@ FO_df <- data.frame( FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M FOendTime ="",#M FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! + FOfishingDurationDataBasis = "Measured", FOdurationSource = "Crew", FOhandlingTime = "", FOstartLat="", # ATT! FOstartLon="", # ATT! FOstopLat="", FOstopLon="", - FOexclusiveEconomicZoneIndicator = "", # might differ!! + FOexclusiveEconomicZoneIndicator = "", # FOarea = "27.3.a.21", #M FOrectangle = "", FOfisheriesManagementUnit = "", FOgsaSubarea = "NotApplicable", #M FOjurisdictionArea = "", + FOgeographicalDataBasis = "Measured", + FOgeographicalSource = "Crew", FOfishingDepth = "", FOwaterDepth = "", FOnationalFishingActivity = "", FOmetier5 = "", FOmetier6 = "OTT_CRU_70-89_2_35",#M FOgear = "OTT",#M + FOgearDataBasis = "Measured", + FOgearSource = "", FOmeshSize = "", FOselectionDevice = "", FOselectionDeviceMeshSize = "", @@ -436,45 +275,6 @@ stringsAsFactors=FALSE #===SS============ -# 1 SSid [] - int -# 2 LEid [M/O] - int -# 3 FOid [M/O] - int -# 4 FTid [M/O] - int -# 5 OSid [M/O] - int -# 6 TEid [M/O] - int -# 7 SLid [M] - int -# 8 SSrecordType [M] - string -# 9 SSsequenceNumber [M] - int -# 10 SSstratification [DV,M] - YesNoFields -# 11 SSstratumName [DV,M] - StringLength100 -# 12 SSclustering [DV,M] - Clustering -# 13 SSclusterName [DV,M] - StringLength100 -# 14 SSobservationActivityType [DV,M] - ObservationActivityType -# 15 SScatchFraction [M] - CatchFraction -# 16 SSobservationType [DV,M] - ObservationMethod -# 17 SSsampler [O] - Sampler -# 18 SSspeciesListName [M] - StringLength100 -# 19 SSuseForCalculateZero [M] - YesNoFields -# 20 SStimeTotal [O] - int -# 21 SStimeSampled [O] - int -# 22 SSnumberTotal [DV,O] - int -# 23 SSnumberSampled [DV,O] - int -# 24 SSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 25 SSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 26 SSselectionMethod [DV,M] - SelectionMethod -# 27 SSunitName [DV,M] - StringLength100 -# 28 SSselectionMethodCluster [DV,O] - SelectionMethod -# 29 SSnumberTotalClusters [DV,O] - int -# 30 SSnumberSampledClusters [DV,O] - int -# 31 SSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 32 SSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 33 SSsampled [DV,M] - YesNoFields -# 34 SSreasonNotSampled [DV,O] - ReasonForNotSampling -# 35 SSnonResponseCollected [DV,O] - YesNoFields -# 36 SSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 37 SSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 38 SSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 39 SSauxiliaryVariableUnit [DV,O] - MUNIT SS_df<-data.frame( SSid = dataset$VSid, @@ -497,8 +297,10 @@ SS_df<-data.frame( SSspeciesListName = project_name_outputs, #M SSuseForCalculateZero = "N", #M SStimeTotal = "", + SStimeTotalDataBasis = "Measured", SStimeSampled = "", SSnumberTotal = 1, + SSnumberTotalDataBasis = "Measured", SSnumberSampled = 1, SSselectionProb = 1, SSinclusionProb = 1, @@ -521,142 +323,75 @@ SS_df<-data.frame( #====SA=========== - -# 1 SAid [] - int -# 2 SSid [] - int -# 3 SArecordType [M] - string -# 4 SAsequenceNumber [M] - int -# 5 SAparentSequenceNumber [O] - int -# 6 SAstratification [DV,M] - YesNoFields -# 7 SAstratumName [DV,M] - StringLength100 -# 8 SAspeciesCode [M] - SpecWoRMS -# 9 SAspeciesCodeFAO [O] - SpecASFIS -# 10 SAstateOfProcessing [M] - StateOfProcessing -# 11 SApresentation [M] - ProductPresentation -# 12 SAspecimensState [M] - SpecimensState -# 13 SAcatchCategory [M] - CatchCategory -# 14 SAlandingCategory [O] - LandingCategory -# 15 SAcommSizeCatScale [O] - CommercialSizeCategoryScale -# 16 SAcommSizeCat [O] - CommercialSizeCategory -# 17 SAsex [M] - SEXCO -# 18 SAexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 19 SAarea [O] - ICES_Area -# 20 SArectangle [O] - StatRec -# 21 SAfisheriesManagementUnit [O] - AreaNonFAO -# 22 SAgsaSubarea [M] - Areas_GFCM_GSA -# 23 SAjurisdictionArea [O] - JurisdictionArea -# 24 SAnationalFishingActivity [O] - NationalFishingActivity -# 25 SAmetier5 [O] - Metier5_FishingActivity -# 26 SAmetier6 [O] - Metier6_FishingActivity -# 27 SAgear [O] - GearType -# 28 SAmeshSize [O] - int -# 29 SAselectionDevice [O] - SelectionDevice -# 30 SAselectionDeviceMeshSize [O] - int -# 31 SAunitType [M] - SamplingUnit -# 32 SAtotalWeightLive [O] - int -# 33 SAsampleWeightLive [O] - int -# 34 SAnumberTotal [DV,O] - Decimal0.1-999999999 -# 35 SAnumberSampled [DV,O] - Decimal0.1-999999999 -# 36 SAselectionProb [DV,O] - Decimal0-1 -# 37 SAinclusionProb [DV,O] - Decimal0-1 -# 38 SAselectionMethod [DV,M] - SelectionMethod -# 39 SAunitName [DV,M] - StringLength100 -# 40 SAlowerHierarchy [M/O] - LowerHierarchy -# 41 SAsampler [O] - Sampler -# 42 SAsampled [DV,M] - YesNoFields -# 43 SAreasonNotSampled [DV,O] - ReasonForNotSampling -# 44 SAnonResponseCollected [DV,O] - YesNoFields -# 45 SAreasonNotSampledFM [DV,O] - ReasonForNotSampling -# 46 SAreasonNotSampledBV [DV,O] - ReasonForNotSampling -# 47 SAtotalWeightMeasured [O] - int -# 48 SAsampleWeightMeasured [O] - int -# 49 SAconversionFactorMeasLive [O] - Decimal0.900-10 -# 50 SAauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 51 SAauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 52 SAauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 53 SAauxiliaryVariableUnit [DV,O] - MUNIT - - SA_df<-data.frame( - SAid = dataset$VSid, - SSid = dataset$VSid, - SArecordType = "SA", #M - SAsequenceNumber = dataset$VSid, #M - SAparentSequenceNumber = "", - SAstratification = "N", #M - SAstratumName = "U", #M - SAspeciesCode = 107254, #M - SAspeciesCodeFAO = "NEP", - SAstateOfProcessing = "FRE", #M - SApresentation = "WHL", #M - SAspecimensState = "DeadOrZeroProbSurvival", #M - SAcatchCategory = "Lan", #M - SAlandingCategory = "", - SAcommSizeCatScale = "", - SAcommSizeCat = "", - SAsex = "U", #M - SAexclusiveEconomicZoneIndicator = "", - SAarea = "", - SArectangle = "", - SAfisheriesManagementUnit = "", - SAgsaSubarea = "NotApplicable", #M - SAjurisdictionArea = "", - SAnationalFishingActivity = "", - SAmetier5 = "", - SAmetier6 = "", - SAgear = "", - SAmeshSize = "", - SAselectionDevice = "", - SAselectionDeviceMeshSize = "", - SAunitType = "Box", #M - SAtotalWeightLive = "", - SAsampleWeightLive = "", - SAnumberTotal = 1, - SAnumberSampled = 1, - SAselectionProb = 1, - SAinclusionProb = 1, - SAselectionMethod = "CENSUS", #M - SAunitName = dataset$VSid, #M - SAlowerHierarchy = "D", - SAsampler = "Observer", - SAsampled = "N", #M - SAreasonNotSampled = "", - SAnonResponseCollected = "N", - SAreasonNotSampledFM = "", - SAreasonNotSampledBV = "", - SAtotalWeightMeasured = dataset[[target_var]], - SAsampleWeightMeasured = dataset[[target_var]], - SAconversionFactorMeasLive = 1, - SAauxiliaryVariableTotal = "", - SAauxiliaryVariableValue = "", - SAauxiliaryVariableName = "", - SAauxiliaryVariableUnit = "", - stringsAsFactors=FALSE +SAid = dataset$VSid, +SSid = dataset$VSid, +SArecordType = "SA", #M +SAsequenceNumber = dataset$VSid, #M +SAparentSequenceNumber = "", +SAstratification = "N", #M +SAstratumName = "U", #M +SAspeciesCode = 107254, #M +SAspeciesCodeFAO = "NEP", +SAstateOfProcessing = "FRE", #M +SApresentation = "WHL", #M +SAspecimensState = "DeadOrZeroProbSurvival", #M +SAcatchCategory = "Lan", #M +SAlandingCategory = "", +SAcommSizeCatScale = "", +SAcommSizeCat = "", +SAsex = "U", #M +SAexclusiveEconomicZoneIndicator = "", +SAarea = "", +SArectangle = "", +SAfisheriesManagementUnit = "", +SAgsaSubarea = "NotApplicable", #M +SAjurisdictionArea = "", +SAgeographicalDataBasis = "Measured", +SAgeographicalSource = "", +SAnationalFishingActivity = "", +SAmetier5 = "", +SAmetier6 = "", +SAgear = "", +SAgearDataBasis = "Measured", +SAgearSource = "", +SAmeshSize = "", +SAselectionDevice = "", +SAselectionDeviceMeshSize = "", +SAunitType = "Box", #M +SAtotalWeightLive = "", +SAsampleWeightLive = "", +SAnumberTotal = 1, +SAnumberSampled = 1, +SAselectionProb = 1, +SAinclusionProb = 1, +SAselectionMethod = "CENSUS", #M +SAunitName = dataset$VSid, #M +SAlowerHierarchy = "D", +SAsampler = "Observer", +SAsampled = "N", #M +SAreasonNotSampled = "", +SAnonResponseCollected = "N", +SAreasonNotSampledFM = "", +SAreasonNotSampledBV = "", +SAtotalWeightMeasured = dataset[[target_var]], +SAtotalWeightMeasuredDataBasis = "Measured", +SAsampleWeightMeasured = dataset[[target_var]], +SAconversionFactorMeasLive = 1, +SAauxiliaryVariableTotal = "", +SAauxiliaryVariableValue = "", +SAauxiliaryVariableName = "", +SAauxiliaryVariableUnit = "", +stringsAsFactors=FALSE ) - - #====Builds final format=========== RDBESlist = list(DE = DE_df,SD = SD_df, VS = VS_df, FT = FT_df, FO = FO_df, SS = SS_df, SA = SA_df) - -a<-RDBESlist -a[c("TE","LO","OS","LE","CL","CE","BV","FM")] <- NULL -a$SA[c("SAgeoDatBas","SAgeoSou","SAgeaDatBas","SAgearSou","SAtotWtMeaDatBas")] <- NA -a$FO[c("FOfishDuraDatBas","FOgeoDatBas","FOgeoSou","FOgeaDatBas","FOgearSou")] <- NA -a$SS[c("SStimeTotalDatBas","SSnumTotalDatBas")] <- NA -a$FT$FTdomLanDate <- NA -#a$IS <- data.frame(ISid = 1, SLid = 47865, IScommTaxon = 107254, ISsppCode = 107254) -a$SL[c("SLcommTaxon","SLsppCode")] <- NULL -Pckg_SDAResources_agstrat_H1 <- createRDBESDataObject(a, verbose = F) -usethis::use_data(Pckg_SDAResources_agstrat_H1, overwrite = TRUE) - -stop("The new code stops here!") - #id table a<-merge(DE_df["DEid"],SD_df[c("DEid","SDid")]) a<-merge(a, VS_df[c("SDid","VSid")], all.x=T) @@ -700,16 +435,14 @@ RDBESlist[[i]][which(grepl(colnames(RDBESlist[[i]]),pat="[A-Z]id"))]<-NULL #===Save============ - dir_outputs<-paste0(base_dir_outputs,"/", - project_name_outputs,"/") - dir.create(dir_outputs, recursive=T, showWarnings=FALSE) - filename_output_CS <- paste0("H1.csv") - filename_output_SL <- paste0("HSL.csv") - filename_output_IS <- paste0("IS.csv") - filename_output_VD <- paste0("HVD.csv") + dir_outputs<-paste0(base_dir_outputs,"/", project_name_outputs,"/") + dir.create(dir_outputs, recursive=T, showWarnings=FALSE) + filename_output_CS <- paste0(dir_outputs,"H1.csv") + filename_output_SL <- paste0(dir_outputs,"HSL.csv") + filename_output_VD <- paste0(dir_outputs,"HVD.csv") -lapply(RDBESlist, function(x, filename1 = paste0(dir_outputs,filename_output_CS)){ +lapply(RDBESlist, function(x, filename1 = filename_output_CS){ if("DErecordType" %in% colnames(x)){ write.table(x, file = filename1, append = FALSE, quote = FALSE, sep = ",", eol = "\n", na = "NA", dec = ".", row.names = FALSE, @@ -721,7 +454,7 @@ write.table(x, file = filename1, append = TRUE, quote = FALSE, sep = ",", } }) -b<-read.table(file=paste0(dir_outputs,filename_output_CS), header=F, sep=";") +b<-read.table(file = filename_output_CS, header=F, sep=";") b<-cbind(key,b) b<-b[order(as.character(b$key), decreasing=FALSE),] b<-b[!is.na(key),] @@ -729,32 +462,30 @@ b$key<-NULL b$V1<-as.character(b$V1) # saves CS output -write.table(b$V1, file=paste0(dir_outputs,filename_output_CS), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +write.table(b$V1, file = filename_output_CS, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") -# -----Builds and saves dummySL----------------- +# -----Builds and saves dummySL and dummyIS----------------- SL_base$SLspeciesListName<-project_name_outputs SL_base$SLyear<-DEyear SL_base$SLinstitute<-SDinstitution -# saves SL output - write.table(SL_base, file=paste0(dir_outputs,filename_output_SL), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - - # -----Builds and saves dummyIS----------------- - + + # saves SL output + write.table(rbind(paste(SL_base, collapse=","), paste(IS_base, collapse=",")), file=filename_output_SL, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - # saves IS output - write.table(IS_base, file=paste0(dir_outputs,filename_output_IS), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +# -----Builds and saves dummyVD----------------- +# saves VD output + VD_base$VDyear<-DEyear + write.table(VD_base, file=filename_output_VD, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") -# -----Builds and saves dummyVD----------------- -# saves VD output - VD_base$VDyear<-DEyear - write.table(VD_base, file=paste0(dir_outputs,filename_output_VD), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +# check - is this needed? +if(1==2){ @@ -776,4 +507,5 @@ write.table(b$V1, file=paste0(dir_outputs,filename_output_CS), col.names=FALSE, #zip::zip(zipfile = paste0(project_name_outputs,".zip"), files =paste0(dir_outputs,"tmp/",dir(paste0(dir_outputs,"tmp"))), mode="cherry-pick") #unlink(paste0(dir_outputs,"tmp"), force=T, recursive = T) +} diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_algebra_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_algebra_H1.R index c859449f..9ac961f7 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_algebra_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_algebra_H1.R @@ -45,10 +45,11 @@ baseDir <- "" VD_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/VD_base.rds")) SL_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/SL_base.rds")) + IS_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/IS_base.rds")) #nameof the directory where the outputs are saved currently base_dir_outputs <- paste0(baseDir,"BuiltUploads") - if(!file.exists(base_dir_outputs)) dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) + dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) #========Outline of Hierarchy 1================ @@ -68,22 +69,6 @@ -# 1 DEid [] - int -# 2 DErecordType [M] - string -# 3 DEsamplingScheme [M] - SamplingScheme -# 4 DEsamplingSchemeType [M] - SamplingSchemeType -# 5 DEyear [M] - Year -# 6 DEstratumName [M] - StringLength100 -# 7 DEhierarchyCorrect [M] - YesNoFields -# 8 DEhierarchy [M] - RDBESUpperHierarchy -# 9 DEsampled [DV,M] - YesNoFields -# 10 DEreasonNotSampled [DV,O] - ReasonForNotSampling -# 11 DEnonResponseCollected [DV,O] - YesNoFields -# 12 DEauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 13 DEauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 14 DEauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 15 DEauxiliaryVariableUnit[DV,O]-MUNIT - DE_df_base<-expand.grid(DEyear=DEyear, DEstratumName="U",stringsAsFactors=F) @@ -102,20 +87,13 @@ DE_df<-data.frame( DEauxiliaryVariableTotal = "", DEauxiliaryVariableValue = "", DEauxiliaryVariableName = "", - DEauxiliaryVariableUnit = "", + DEauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) #===SD============ - # x -# 1 SDid [M] - int -# 2 DEid [M] - int -# 3 SDrecordType [M] - string -# 4 SDcountry [M] - ISO_3166 -# 5 SDinstitution [M] - EDMO - SD_df<-data.frame( SDid=DE_df$DEid, DEid=DE_df$DEid, @@ -127,41 +105,6 @@ SD_df<-data.frame( #===VS============ - - # x -# 1 VSid [M] - int -# 2 SDid [M] - int -# 3 VDid [M] - int -# 4 TEid [M/O] - int -# 5 VSrecordType [M] - string -# 6 VSsequenceNumber [M] - int -# 7 VSencryptedVesselCode [M] - StringLength100 -# 8 VSstratification [DV,M] - YesNoFields -# 9 VSstratumName [DV,M] - StringLength100 -# 10 VSclustering [DV,M] - Clustering -# 11 VSclusterName [DV,M] - StringLength100 -# 12 VSsampler [O] - Sampler -# 13 VSnumberTotal [DV,O] - int -# 14 VSnumberSampled [DV,O] - int -# 15 VSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 16 VSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 17 VSselectionMethod [DV,M] - SelectionMethod -# 18 VSunitName [DV,M] - StringLength100 -# 19 VSselectionMethodCluster [DV,O] - SelectionMethod -# 20 VSnumberTotalClusters [DV,O] - int -# 21 VSnumberSampledClusters [DV,O] - int -# 22 VSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 23 VSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 24 VSsampled [DV,M] - YesNoFields -# 25 VSreasonNotSampled [DV,O] - ReasonForNotSampling -# 26 VSnonResponseCollected [DV,O] - YesNoFields -# 27 VSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 28 VSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 29 VSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 30 VSauxiliaryVariableUnit [DV,O] - MUNIT - -#check_All_fields("VS") - # adds VSid to dataset dataset$VSid <- 1:nrow(dataset) @@ -228,49 +171,6 @@ VS_df$VSinclusionProbCluster<-"" #====FT=========== - -# 1 FTid [M] - int -# 2 OSid [M/O] - int -# 3 VSid [M/O] - int -# 4 VDid [M] - int -# 5 SDid [M/O] - int -# 6 FOid [M/O] - int -# 7 TEid [M/O] - int -# 8 FTrecordType [M] - string -# 9 FTencryptedVesselCode [M] - StringLength100 -# 10 FTsequenceNumber [M] - int -# 11 FTstratification [DV,M] - YesNoFields -# 12 FTstratumName [DV,M] - StringLength100 -# 13 FTclustering [DV,M] - Clustering -# 14 FTclusterName [DV,M] - StringLength100 -# 15 FTsampler [O] - Sampler -# 16 FTsamplingType [M] - SamplingContext -# 17 FTnumberOfHaulsOrSets [M/O] - IntZeroToMax -# 18 FTdepartureLocation [M/O] - Harbour_LOCODE -# 19 FTdepartureDate [M/O] - Date -# 20 FTdepartureTime [M/O] - StringLength60 -# 21 FTarrivalLocation [M] - Harbour_LOCODE -# 22 FTarrivalDate [M] - Date -# 23 FTarrivalTime [M/O] - StringLength60 -# 24 FTnumberTotal [DV,O] - int -# 25 FTnumberSampled [DV,O] - int -# 26 FTselectionProb [DV,O] - Decimal0-1 -# 27 FTinclusionProb [DV,O] - Decimal0-1 -# 28 FTselectionMethod [DV,M] - SelectionMethod -# 29 FTunitName [DV,M] - StringLength100 -# 30 FTselectionMethodCluster [DV,O] - SelectionMethod -# 31 FTnumberTotalClusters [DV,O] - int -# 32 FTnumberSampledClusters [DV,O] - int -# 33 FTselectionProbCluster [DV,O] - Decimal0-1 -# 34 FTinclusionProbCluster [DV,O] - Decimal0-1 -# 35 FTsampled [DV,M] - YesNoFields -# 36 FTreasonNotSampled [DV,O] - ReasonForNotSampling -# 37 FTnonResponseCollected [DV,O] - YesNoFields -# 38 FTauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 39 FTauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 40 FTauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 41 FTauxiliaryVariableUnit [DV,O] - MUNIT - FT_df <- data.frame( FTid = dataset$VSid,#[M] - int OSid = "",# [M/O] - int @@ -295,6 +195,7 @@ FT_df <- data.frame( FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE FTarrivalDate=seq(from = as.Date("1965-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date FTarrivalTime="", #[O] - time + FTdominantLandingDate= "", FTnumberTotal= 1, #[DV,O] - int FTnumberSampled=1, #[DV,O] - int FTselectionProb=1, #[DV,O] - DecimalPrec10 @@ -312,7 +213,7 @@ FT_df <- data.frame( FTauxiliaryVariableTotal = "", FTauxiliaryVariableValue = "", FTauxiliaryVariableName = "", - FTauxiliaryVariableUnit = "", + FTauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) @@ -320,72 +221,6 @@ FT_df <- data.frame( #====FO=========== -# 1 FOid [M] - int -# 2 FTid [M/O] - int -# 3 SDid [M/O] - int -# 4 FOrecordType [M] - string -# 5 FOstratification [DV,M] - YesNoFields -# 6 FOsequenceNumber [M] - int -# 7 FOstratumName [DV,M] - StringLength100 -# 8 FOclustering [DV,M] - Clustering -# 9 FOclusterName [DV,M] - StringLength100 -# 10 FOsampler [O] - Sampler -# 11 FOaggregationLevel [M] - AggregationLevel -# 12 FOvalidity [M] - ValidityFlag -# 13 FOcatchReg [M] - CatchRegistration -# 14 FOstartDate [O] - Date -# 15 FOstartTime [O] - Time -# 16 FOendDate [M] - Date -# 17 FOendTime [M/O] - Time -# 18 FOduration [M/O] - int -# 19 FOdurationSource [M] - DurationSource -# 20 FOhandlingTime [O] - int -# 21 FOstartLat [O] - Decimal-90.000000-90.000000 -# 22 FOstartLon [O] - Decimal-180.000000-180.000000 -# 23 FOstopLat [O] - Decimal-90.000000-90.000000 -# 24 FOstopLon [O] - Decimal-180.000000-180.000000 -# 25 FOexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 26 FOarea [M] - ICES_Area -# 27 FOrectangle [O] - StatRec -# 28 FOfisheriesManagementUnit [O] - AreaNonFAO -# 29 FOgsaSubarea [M] - Areas_GFCM_GSA -# 30 FOjurisdictionArea [O] - JurisdictionArea -# 31 FOfishingDepth [O] - int -# 32 FOwaterDepth [O] - int -# 33 FOnationalFishingActivity [O] - NationalFishingActivity -# 34 FOmetier5 [O] - Metier5_FishingActivity -# 35 FOmetier6 [M] - Metier6_FishingActivity -# 36 FOgear [M] - GearType -# 37 FOmeshSize [O] - int -# 38 FOselectionDevice [O] - SelectionDevice -# 39 FOselectionDeviceMeshSize [O] - int -# 40 FOtargetSpecies [O] - TargetSpecies -# 41 FOincidentalByCatchMitigationDeviceFirst [M] - BycatchMitigationDevice -# 42 FOincidentalByCatchMitigationDeviceTargetFirst [M] - BycatchMitigationDeviceTarget -# 43 FOincidentalByCatchMitigationDeviceSecond [M] - BycatchMitigationDevice -# 44 FOincidentalByCatchMitigationDeviceTargetSecond [M] - BycatchMitigationDeviceTarget -# 45 FOgearDimensions [O] - int -# 46 FOobservationCode [M] - ObservationCode -# 47 FOnumberTotal [DV,O] - int -# 48 FOnumberSampled [DV,O] - int -# 49 FOselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 50 FOinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 51 FOselectionMethod [DV,M] - SelectionMethod -# 52 FOunitName [DV,M] - StringLength100 -# 53 FOselectionMethodCluster [DV,O] - SelectionMethod -# 54 FOnumberTotalClusters [DV,O] - int -# 55 FOnumberSampledClusters [DV,O] - int -# 56 FOselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 57 FOinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 58 FOsampled [DV,M] - YesNoFields -# 59 FOreasonNotSampled [DV,O] - ReasonForNotSampling -# 60 FOnonResponseCollected [DV,O] - YesNoFields -# 61 FOauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 62 FOauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 63 FOauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 64 FOauxiliaryVariableUnit [DV,O] - MUNIT - - FO_df <- data.frame( FOid = dataset$VSid, FTid = dataset$VSid, @@ -405,24 +240,29 @@ FO_df <- data.frame( FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M FOendTime ="",#M FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! + FOfishingDurationDataBasis = "Measured", FOdurationSource = "Crew", FOhandlingTime = "", FOstartLat="", # ATT! FOstartLon="", # ATT! FOstopLat="", FOstopLon="", - FOexclusiveEconomicZoneIndicator = "", # + FOexclusiveEconomicZoneIndicator = "", # FOarea = "27.3.a.21", #M FOrectangle = "", FOfisheriesManagementUnit = "", FOgsaSubarea = "NotApplicable", #M FOjurisdictionArea = "", + FOgeographicalDataBasis = "Measured", + FOgeographicalSource = "", FOfishingDepth = "", FOwaterDepth = "", FOnationalFishingActivity = "", FOmetier5 = "", FOmetier6 = "OTT_CRU_70-89_2_35",#M FOgear = "OTT",#M + FOgearDataBasis = "Measured", + FOgearSource = "", FOmeshSize = "", FOselectionDevice = "", FOselectionDeviceMeshSize = "", @@ -450,52 +290,13 @@ FO_df <- data.frame( FOauxiliaryVariableTotal = "", FOauxiliaryVariableValue = "", FOauxiliaryVariableName = "", - FOauxiliaryVariableUnit = "", + FOauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) #===SS============ -# 1 SSid [] - int -# 2 LEid [M/O] - int -# 3 FOid [M/O] - int -# 4 FTid [M/O] - int -# 5 OSid [M/O] - int -# 6 TEid [M/O] - int -# 7 SLid [M] - int -# 8 SSrecordType [M] - string -# 9 SSsequenceNumber [M] - int -# 10 SSstratification [DV,M] - YesNoFields -# 11 SSstratumName [DV,M] - StringLength100 -# 12 SSclustering [DV,M] - Clustering -# 13 SSclusterName [DV,M] - StringLength100 -# 14 SSobservationActivityType [DV,M] - ObservationActivityType -# 15 SScatchFraction [M] - CatchFraction -# 16 SSobservationType [DV,M] - ObservationMethod -# 17 SSsampler [O] - Sampler -# 18 SSspeciesListName [M] - StringLength100 -# 19 SSuseForCalculateZero [M] - YesNoFields -# 20 SStimeTotal [O] - int -# 21 SStimeSampled [O] - int -# 22 SSnumberTotal [DV,O] - int -# 23 SSnumberSampled [DV,O] - int -# 24 SSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 25 SSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 26 SSselectionMethod [DV,M] - SelectionMethod -# 27 SSunitName [DV,M] - StringLength100 -# 28 SSselectionMethodCluster [DV,O] - SelectionMethod -# 29 SSnumberTotalClusters [DV,O] - int -# 30 SSnumberSampledClusters [DV,O] - int -# 31 SSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 32 SSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 33 SSsampled [DV,M] - YesNoFields -# 34 SSreasonNotSampled [DV,O] - ReasonForNotSampling -# 35 SSnonResponseCollected [DV,O] - YesNoFields -# 36 SSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 37 SSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 38 SSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 39 SSauxiliaryVariableUnit [DV,O] - MUNIT SS_df<-data.frame( SSid = dataset$VSid, @@ -518,8 +319,10 @@ SS_df<-data.frame( SSspeciesListName = project_name_outputs, #M SSuseForCalculateZero = "N", #M SStimeTotal = "", + SStimeTotalDataBasis = "Measured", SStimeSampled = "", SSnumberTotal = 1, + SSnumberTotalDataBasis = "Measured", SSnumberSampled = 1, SSselectionProb = 1, SSinclusionProb = 1, @@ -536,68 +339,12 @@ SS_df<-data.frame( SSauxiliaryVariableTotal = "", SSauxiliaryVariableValue = "", SSauxiliaryVariableName = "", - SSauxiliaryVariableUnit = "", - stringsAsFactors=FALSE + SSauxiliaryVariableUnit = "", + stringsAsFactors=FALSE ) #====SA=========== - -# 1 SAid [] - int -# 2 SSid [] - int -# 3 SArecordType [M] - string -# 4 SAsequenceNumber [M] - int -# 5 SAparentSequenceNumber [O] - int -# 6 SAstratification [DV,M] - YesNoFields -# 7 SAstratumName [DV,M] - StringLength100 -# 8 SAspeciesCode [M] - SpecWoRMS -# 9 SAspeciesCodeFAO [O] - SpecASFIS -# 10 SAstateOfProcessing [M] - StateOfProcessing -# 11 SApresentation [M] - ProductPresentation -# 12 SAspecimensState [M] - SpecimensState -# 13 SAcatchCategory [M] - CatchCategory -# 14 SAlandingCategory [O] - LandingCategory -# 15 SAcommSizeCatScale [O] - CommercialSizeCategoryScale -# 16 SAcommSizeCat [O] - CommercialSizeCategory -# 17 SAsex [M] - SEXCO -# 18 SAexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 19 SAarea [O] - ICES_Area -# 20 SArectangle [O] - StatRec -# 21 SAfisheriesManagementUnit [O] - AreaNonFAO -# 22 SAgsaSubarea [M] - Areas_GFCM_GSA -# 23 SAjurisdictionArea [O] - JurisdictionArea -# 24 SAnationalFishingActivity [O] - NationalFishingActivity -# 25 SAmetier5 [O] - Metier5_FishingActivity -# 26 SAmetier6 [O] - Metier6_FishingActivity -# 27 SAgear [O] - GearType -# 28 SAmeshSize [O] - int -# 29 SAselectionDevice [O] - SelectionDevice -# 30 SAselectionDeviceMeshSize [O] - int -# 31 SAunitType [M] - SamplingUnit -# 32 SAtotalWeightLive [O] - int -# 33 SAsampleWeightLive [O] - int -# 34 SAnumberTotal [DV,O] - Decimal0.1-999999999 -# 35 SAnumberSampled [DV,O] - Decimal0.1-999999999 -# 36 SAselectionProb [DV,O] - Decimal0-1 -# 37 SAinclusionProb [DV,O] - Decimal0-1 -# 38 SAselectionMethod [DV,M] - SelectionMethod -# 39 SAunitName [DV,M] - StringLength100 -# 40 SAlowerHierarchy [M/O] - LowerHierarchy -# 41 SAsampler [O] - Sampler -# 42 SAsampled [DV,M] - YesNoFields -# 43 SAreasonNotSampled [DV,O] - ReasonForNotSampling -# 44 SAnonResponseCollected [DV,O] - YesNoFields -# 45 SAreasonNotSampledFM [DV,O] - ReasonForNotSampling -# 46 SAreasonNotSampledBV [DV,O] - ReasonForNotSampling -# 47 SAtotalWeightMeasured [O] - int -# 48 SAsampleWeightMeasured [O] - int -# 49 SAconversionFactorMeasLive [O] - Decimal0.900-10 -# 50 SAauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 51 SAauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 52 SAauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 53 SAauxiliaryVariableUnit [DV,O] - MUNIT - - SA_df<-data.frame( SAid = 1:nrow(dataset), SSid = dataset$VSid, @@ -622,10 +369,14 @@ SA_df<-data.frame( SAfisheriesManagementUnit = "", SAgsaSubarea = "NotApplicable", #M SAjurisdictionArea = "", + SAgeographicalDataBasis = "Measured", + SAgeographicalSource = "", SAnationalFishingActivity = "", SAmetier5 = "", SAmetier6 = "", SAgear = "", + SAgearDataBasis = "Measured", + SAgearSource = "", SAmeshSize = "", SAselectionDevice = "", SAselectionDeviceMeshSize = "", @@ -646,12 +397,13 @@ SA_df<-data.frame( SAreasonNotSampledFM = "", SAreasonNotSampledBV = "", SAtotalWeightMeasured = dataset[[target_var]], # + SAtotalWeightMeasuredDataBasis = "Measured", SAsampleWeightMeasured = dataset[[target_var]], # SAconversionFactorMeasLive = 1, SAauxiliaryVariableTotal = "", SAauxiliaryVariableValue = "", SAauxiliaryVariableName = "", - SAauxiliaryVariableUnit = "", + SAauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) @@ -705,15 +457,14 @@ RDBESlist[[i]][which(grepl(colnames(RDBESlist[[i]]),pat="[A-Z]id"))]<-NULL #===Save============ - dir_outputs<-paste0(base_dir_outputs,"/", - project_name_outputs,"/") - dir.create(dir_outputs, recursive=T, showWarnings=FALSE) - filename_output_CS <- paste0("H1.csv") - filename_output_SL <- paste0("HSL.csv") - filename_output_VD <- paste0("HVD.csv") + dir_outputs<-paste0(base_dir_outputs,"/", project_name_outputs,"/") + dir.create(dir_outputs, recursive=T, showWarnings=FALSE) + filename_output_CS <- paste0(dir_outputs,"H1.csv") + filename_output_SL <- paste0(dir_outputs,"HSL.csv") + filename_output_VD <- paste0(dir_outputs,"HVD.csv") -lapply(RDBESlist, function(x, filename1 = paste0(dir_outputs,filename_output_CS)){ +lapply(RDBESlist, function(x, filename1 = filename_output_CS){ if("DErecordType" %in% colnames(x)){ write.table(x, file = filename1, append = FALSE, quote = FALSE, sep = ",", eol = "\n", na = "NA", dec = ".", row.names = FALSE, @@ -725,7 +476,7 @@ write.table(x, file = filename1, append = TRUE, quote = FALSE, sep = ",", } }) -b<-read.table(file=paste0(dir_outputs,filename_output_CS), header=F, sep=";") +b<-read.table(file = filename_output_CS, header=F, sep=";") b<-cbind(key,b) b<-b[order(as.character(b$key), decreasing=FALSE),] b<-b[!is.na(key),] @@ -733,24 +484,30 @@ b$key<-NULL b$V1<-as.character(b$V1) # saves CS output -write.table(b$V1, file=paste0(dir_outputs,filename_output_CS), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +write.table(b$V1, file = filename_output_CS, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") -# -----Builds and saves dummySL----------------- +# -----Builds and saves dummySL and dummyIS----------------- SL_base$SLspeciesListName<-project_name_outputs SL_base$SLyear<-DEyear SL_base$SLinstitute<-SDinstitution -# saves SL output - write.table(SL_base, file=paste0(dir_outputs,filename_output_SL), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - + + # saves SL output + write.table(rbind(paste(SL_base, collapse=","), paste(IS_base, collapse=",")), file=filename_output_SL, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") # -----Builds and saves dummyVD----------------- - # saves VD output VD_base$VDyear<-DEyear - write.table(VD_base, file=paste0(dir_outputs,filename_output_VD), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + write.table(VD_base, file=filename_output_VD, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + + + + + +# check - is this needed? +if(1==2){ @@ -772,4 +529,5 @@ library(zip) zip::zip(zipfile = paste0(project_name_outputs,".zip"), files =paste0(dir_outputs,"tmp/",dir(paste0(dir_outputs,"tmp"))), mode="cherry-pick") unlink(paste0(dir_outputs,"tmp"), force=T, recursive = T) +} diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_coots_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_coots_H1.R index c2791a24..39d4dbd9 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_coots_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_coots_H1.R @@ -46,10 +46,11 @@ baseDir <- "" VD_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/VD_base.rds")) SL_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/SL_base.rds")) + IS_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/IS_base.rds")) #nameof the directory where the outputs are saved currently base_dir_outputs <- paste0(baseDir,"BuiltUploads") - if(!file.exists(base_dir_outputs)) dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) + dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) #========Outline of Hierarchy 1================ @@ -69,22 +70,6 @@ -# 1 DEid [] - int -# 2 DErecordType [M] - string -# 3 DEsamplingScheme [M] - SamplingScheme -# 4 DEsamplingSchemeType [M] - SamplingSchemeType -# 5 DEyear [M] - Year -# 6 DEstratumName [M] - StringLength100 -# 7 DEhierarchyCorrect [M] - YesNoFields -# 8 DEhierarchy [M] - RDBESUpperHierarchy -# 9 DEsampled [DV,M] - YesNoFields -# 10 DEreasonNotSampled [DV,O] - ReasonForNotSampling -# 11 DEnonResponseCollected [DV,O] - YesNoFields -# 12 DEauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 13 DEauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 14 DEauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 15 DEauxiliaryVariableUnit[DV,O]-MUNIT - DE_df_base<-expand.grid(DEyear=DEyear, DEstratumName="U",stringsAsFactors=F) @@ -103,20 +88,13 @@ DE_df<-data.frame( DEauxiliaryVariableTotal = "", DEauxiliaryVariableValue = "", DEauxiliaryVariableName = "", - DEauxiliaryVariableUnit = "", + DEauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) #===SD============ - # x -# 1 SDid [M] - int -# 2 DEid [M] - int -# 3 SDrecordType [M] - string -# 4 SDcountry [M] - ISO_3166 -# 5 SDinstitution [M] - EDMO - SD_df<-data.frame( SDid=DE_df$DEid, DEid=DE_df$DEid, @@ -128,41 +106,6 @@ SD_df<-data.frame( #===VS============ - - # x -# 1 VSid [M] - int -# 2 SDid [M] - int -# 3 VDid [M] - int -# 4 TEid [M/O] - int -# 5 VSrecordType [M] - string -# 6 VSsequenceNumber [M] - int -# 7 VSencryptedVesselCode [M] - StringLength100 -# 8 VSstratification [DV,M] - YesNoFields -# 9 VSstratumName [DV,M] - StringLength100 -# 10 VSclustering [DV,M] - Clustering -# 11 VSclusterName [DV,M] - StringLength100 -# 12 VSsampler [O] - Sampler -# 13 VSnumberTotal [DV,O] - int -# 14 VSnumberSampled [DV,O] - int -# 15 VSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 16 VSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 17 VSselectionMethod [DV,M] - SelectionMethod -# 18 VSunitName [DV,M] - StringLength100 -# 19 VSselectionMethodCluster [DV,O] - SelectionMethod -# 20 VSnumberTotalClusters [DV,O] - int -# 21 VSnumberSampledClusters [DV,O] - int -# 22 VSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 23 VSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 24 VSsampled [DV,M] - YesNoFields -# 25 VSreasonNotSampled [DV,O] - ReasonForNotSampling -# 26 VSnonResponseCollected [DV,O] - YesNoFields -# 27 VSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 28 VSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 29 VSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 30 VSauxiliaryVariableUnit [DV,O] - MUNIT - -#check_All_fields("VS") - # adds VSid to dataset dataset$VSid <- 1:nrow(dataset) @@ -229,49 +172,6 @@ VS_df$VSinclusionProbCluster<-"" # not known see page 58 #====FT=========== - -# 1 FTid [M] - int -# 2 OSid [M/O] - int -# 3 VSid [M/O] - int -# 4 VDid [M] - int -# 5 SDid [M/O] - int -# 6 FOid [M/O] - int -# 7 TEid [M/O] - int -# 8 FTrecordType [M] - string -# 9 FTencryptedVesselCode [M] - StringLength100 -# 10 FTsequenceNumber [M] - int -# 11 FTstratification [DV,M] - YesNoFields -# 12 FTstratumName [DV,M] - StringLength100 -# 13 FTclustering [DV,M] - Clustering -# 14 FTclusterName [DV,M] - StringLength100 -# 15 FTsampler [O] - Sampler -# 16 FTsamplingType [M] - SamplingContext -# 17 FTnumberOfHaulsOrSets [M/O] - IntZeroToMax -# 18 FTdepartureLocation [M/O] - Harbour_LOCODE -# 19 FTdepartureDate [M/O] - Date -# 20 FTdepartureTime [M/O] - StringLength60 -# 21 FTarrivalLocation [M] - Harbour_LOCODE -# 22 FTarrivalDate [M] - Date -# 23 FTarrivalTime [M/O] - StringLength60 -# 24 FTnumberTotal [DV,O] - int -# 25 FTnumberSampled [DV,O] - int -# 26 FTselectionProb [DV,O] - Decimal0-1 -# 27 FTinclusionProb [DV,O] - Decimal0-1 -# 28 FTselectionMethod [DV,M] - SelectionMethod -# 29 FTunitName [DV,M] - StringLength100 -# 30 FTselectionMethodCluster [DV,O] - SelectionMethod -# 31 FTnumberTotalClusters [DV,O] - int -# 32 FTnumberSampledClusters [DV,O] - int -# 33 FTselectionProbCluster [DV,O] - Decimal0-1 -# 34 FTinclusionProbCluster [DV,O] - Decimal0-1 -# 35 FTsampled [DV,M] - YesNoFields -# 36 FTreasonNotSampled [DV,O] - ReasonForNotSampling -# 37 FTnonResponseCollected [DV,O] - YesNoFields -# 38 FTauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 39 FTauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 40 FTauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 41 FTauxiliaryVariableUnit [DV,O] - MUNIT - FT_df <- data.frame( FTid = dataset$VSid,#[M] - int OSid = "",# [M/O] - int @@ -296,6 +196,7 @@ FT_df <- data.frame( FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE FTarrivalDate=seq(from = as.Date("1965-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date FTarrivalTime="", #[O] - time + FTdominantLandingDate= "", FTnumberTotal= 1, #[DV,O] - int FTnumberSampled=1, #[DV,O] - int FTselectionProb=1, #[DV,O] - DecimalPrec10 @@ -313,7 +214,7 @@ FT_df <- data.frame( FTauxiliaryVariableTotal = "", FTauxiliaryVariableValue = "", FTauxiliaryVariableName = "", - FTauxiliaryVariableUnit = "", + FTauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) @@ -321,72 +222,6 @@ FT_df <- data.frame( #====FO=========== -# 1 FOid [M] - int -# 2 FTid [M/O] - int -# 3 SDid [M/O] - int -# 4 FOrecordType [M] - string -# 5 FOstratification [DV,M] - YesNoFields -# 6 FOsequenceNumber [M] - int -# 7 FOstratumName [DV,M] - StringLength100 -# 8 FOclustering [DV,M] - Clustering -# 9 FOclusterName [DV,M] - StringLength100 -# 10 FOsampler [O] - Sampler -# 11 FOaggregationLevel [M] - AggregationLevel -# 12 FOvalidity [M] - ValidityFlag -# 13 FOcatchReg [M] - CatchRegistration -# 14 FOstartDate [O] - Date -# 15 FOstartTime [O] - Time -# 16 FOendDate [M] - Date -# 17 FOendTime [M/O] - Time -# 18 FOduration [M/O] - int -# 19 FOdurationSource [M] - DurationSource -# 20 FOhandlingTime [O] - int -# 21 FOstartLat [O] - Decimal-90.000000-90.000000 -# 22 FOstartLon [O] - Decimal-180.000000-180.000000 -# 23 FOstopLat [O] - Decimal-90.000000-90.000000 -# 24 FOstopLon [O] - Decimal-180.000000-180.000000 -# 25 FOexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 26 FOarea [M] - ICES_Area -# 27 FOrectangle [O] - StatRec -# 28 FOfisheriesManagementUnit [O] - AreaNonFAO -# 29 FOgsaSubarea [M] - Areas_GFCM_GSA -# 30 FOjurisdictionArea [O] - JurisdictionArea -# 31 FOfishingDepth [O] - int -# 32 FOwaterDepth [O] - int -# 33 FOnationalFishingActivity [O] - NationalFishingActivity -# 34 FOmetier5 [O] - Metier5_FishingActivity -# 35 FOmetier6 [M] - Metier6_FishingActivity -# 36 FOgear [M] - GearType -# 37 FOmeshSize [O] - int -# 38 FOselectionDevice [O] - SelectionDevice -# 39 FOselectionDeviceMeshSize [O] - int -# 40 FOtargetSpecies [O] - TargetSpecies -# 41 FOincidentalByCatchMitigationDeviceFirst [M] - BycatchMitigationDevice -# 42 FOincidentalByCatchMitigationDeviceTargetFirst [M] - BycatchMitigationDeviceTarget -# 43 FOincidentalByCatchMitigationDeviceSecond [M] - BycatchMitigationDevice -# 44 FOincidentalByCatchMitigationDeviceTargetSecond [M] - BycatchMitigationDeviceTarget -# 45 FOgearDimensions [O] - int -# 46 FOobservationCode [M] - ObservationCode -# 47 FOnumberTotal [DV,O] - int -# 48 FOnumberSampled [DV,O] - int -# 49 FOselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 50 FOinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 51 FOselectionMethod [DV,M] - SelectionMethod -# 52 FOunitName [DV,M] - StringLength100 -# 53 FOselectionMethodCluster [DV,O] - SelectionMethod -# 54 FOnumberTotalClusters [DV,O] - int -# 55 FOnumberSampledClusters [DV,O] - int -# 56 FOselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 57 FOinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 58 FOsampled [DV,M] - YesNoFields -# 59 FOreasonNotSampled [DV,O] - ReasonForNotSampling -# 60 FOnonResponseCollected [DV,O] - YesNoFields -# 61 FOauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 62 FOauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 63 FOauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 64 FOauxiliaryVariableUnit [DV,O] - MUNIT - - FO_df <- data.frame( FOid = dataset$VSid, FTid = dataset$VSid, @@ -406,24 +241,29 @@ FO_df <- data.frame( FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M FOendTime ="",#M FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! + FOfishingDurationDataBasis = "Measured", FOdurationSource = "Crew", FOhandlingTime = "", FOstartLat="", # ATT! FOstartLon="", # ATT! FOstopLat="", FOstopLon="", - FOexclusiveEconomicZoneIndicator = "", # + FOexclusiveEconomicZoneIndicator = "", # FOarea = "27.3.a.21", #M FOrectangle = "", FOfisheriesManagementUnit = "", FOgsaSubarea = "NotApplicable", #M FOjurisdictionArea = "", + FOgeographicalDataBasis = "Measured", + FOgeographicalSource = "", FOfishingDepth = "", FOwaterDepth = "", FOnationalFishingActivity = "", FOmetier5 = "", FOmetier6 = "OTT_CRU_70-89_2_35",#M FOgear = "OTT",#M + FOgearDataBasis = "Measured", + FOgearSource = "", FOmeshSize = "", FOselectionDevice = "", FOselectionDeviceMeshSize = "", @@ -451,52 +291,13 @@ FO_df <- data.frame( FOauxiliaryVariableTotal = "", FOauxiliaryVariableValue = "", FOauxiliaryVariableName = "", - FOauxiliaryVariableUnit = "", + FOauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) #===SS============ -# 1 SSid [] - int -# 2 LEid [M/O] - int -# 3 FOid [M/O] - int -# 4 FTid [M/O] - int -# 5 OSid [M/O] - int -# 6 TEid [M/O] - int -# 7 SLid [M] - int -# 8 SSrecordType [M] - string -# 9 SSsequenceNumber [M] - int -# 10 SSstratification [DV,M] - YesNoFields -# 11 SSstratumName [DV,M] - StringLength100 -# 12 SSclustering [DV,M] - Clustering -# 13 SSclusterName [DV,M] - StringLength100 -# 14 SSobservationActivityType [DV,M] - ObservationActivityType -# 15 SScatchFraction [M] - CatchFraction -# 16 SSobservationType [DV,M] - ObservationMethod -# 17 SSsampler [O] - Sampler -# 18 SSspeciesListName [M] - StringLength100 -# 19 SSuseForCalculateZero [M] - YesNoFields -# 20 SStimeTotal [O] - int -# 21 SStimeSampled [O] - int -# 22 SSnumberTotal [DV,O] - int -# 23 SSnumberSampled [DV,O] - int -# 24 SSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 25 SSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 26 SSselectionMethod [DV,M] - SelectionMethod -# 27 SSunitName [DV,M] - StringLength100 -# 28 SSselectionMethodCluster [DV,O] - SelectionMethod -# 29 SSnumberTotalClusters [DV,O] - int -# 30 SSnumberSampledClusters [DV,O] - int -# 31 SSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 32 SSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 33 SSsampled [DV,M] - YesNoFields -# 34 SSreasonNotSampled [DV,O] - ReasonForNotSampling -# 35 SSnonResponseCollected [DV,O] - YesNoFields -# 36 SSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 37 SSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 38 SSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 39 SSauxiliaryVariableUnit [DV,O] - MUNIT SS_df<-data.frame( SSid = dataset$VSid, @@ -519,8 +320,10 @@ SS_df<-data.frame( SSspeciesListName = project_name_outputs, #M SSuseForCalculateZero = "N", #M SStimeTotal = "", + SStimeTotalDataBasis = "Measured", SStimeSampled = "", SSnumberTotal = 1, + SSnumberTotalDataBasis = "Measured", SSnumberSampled = 1, SSselectionProb = 1, SSinclusionProb = 1, @@ -537,68 +340,12 @@ SS_df<-data.frame( SSauxiliaryVariableTotal = "", SSauxiliaryVariableValue = "", SSauxiliaryVariableName = "", - SSauxiliaryVariableUnit = "", - stringsAsFactors=FALSE + SSauxiliaryVariableUnit = "", + stringsAsFactors=FALSE ) #====SA=========== - -# 1 SAid [] - int -# 2 SSid [] - int -# 3 SArecordType [M] - string -# 4 SAsequenceNumber [M] - int -# 5 SAparentSequenceNumber [O] - int -# 6 SAstratification [DV,M] - YesNoFields -# 7 SAstratumName [DV,M] - StringLength100 -# 8 SAspeciesCode [M] - SpecWoRMS -# 9 SAspeciesCodeFAO [O] - SpecASFIS -# 10 SAstateOfProcessing [M] - StateOfProcessing -# 11 SApresentation [M] - ProductPresentation -# 12 SAspecimensState [M] - SpecimensState -# 13 SAcatchCategory [M] - CatchCategory -# 14 SAlandingCategory [O] - LandingCategory -# 15 SAcommSizeCatScale [O] - CommercialSizeCategoryScale -# 16 SAcommSizeCat [O] - CommercialSizeCategory -# 17 SAsex [M] - SEXCO -# 18 SAexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 19 SAarea [O] - ICES_Area -# 20 SArectangle [O] - StatRec -# 21 SAfisheriesManagementUnit [O] - AreaNonFAO -# 22 SAgsaSubarea [M] - Areas_GFCM_GSA -# 23 SAjurisdictionArea [O] - JurisdictionArea -# 24 SAnationalFishingActivity [O] - NationalFishingActivity -# 25 SAmetier5 [O] - Metier5_FishingActivity -# 26 SAmetier6 [O] - Metier6_FishingActivity -# 27 SAgear [O] - GearType -# 28 SAmeshSize [O] - int -# 29 SAselectionDevice [O] - SelectionDevice -# 30 SAselectionDeviceMeshSize [O] - int -# 31 SAunitType [M] - SamplingUnit -# 32 SAtotalWeightLive [O] - int -# 33 SAsampleWeightLive [O] - int -# 34 SAnumberTotal [DV,O] - Decimal0.1-999999999 -# 35 SAnumberSampled [DV,O] - Decimal0.1-999999999 -# 36 SAselectionProb [DV,O] - Decimal0-1 -# 37 SAinclusionProb [DV,O] - Decimal0-1 -# 38 SAselectionMethod [DV,M] - SelectionMethod -# 39 SAunitName [DV,M] - StringLength100 -# 40 SAlowerHierarchy [M/O] - LowerHierarchy -# 41 SAsampler [O] - Sampler -# 42 SAsampled [DV,M] - YesNoFields -# 43 SAreasonNotSampled [DV,O] - ReasonForNotSampling -# 44 SAnonResponseCollected [DV,O] - YesNoFields -# 45 SAreasonNotSampledFM [DV,O] - ReasonForNotSampling -# 46 SAreasonNotSampledBV [DV,O] - ReasonForNotSampling -# 47 SAtotalWeightMeasured [O] - int -# 48 SAsampleWeightMeasured [O] - int -# 49 SAconversionFactorMeasLive [O] - Decimal0.900-10 -# 50 SAauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 51 SAauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 52 SAauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 53 SAauxiliaryVariableUnit [DV,O] - MUNIT - - SA_df<-data.frame( SAid = 1:nrow(dataset), SSid = dataset$VSid, @@ -623,10 +370,14 @@ SA_df<-data.frame( SAfisheriesManagementUnit = "", SAgsaSubarea = "NotApplicable", #M SAjurisdictionArea = "", + SAgeographicalDataBasis = "Measured", + SAgeographicalSource = "", SAnationalFishingActivity = "", SAmetier5 = "", SAmetier6 = "", SAgear = "", + SAgearDataBasis = "Measured", + SAgearSource = "", SAmeshSize = "", SAselectionDevice = "", SAselectionDeviceMeshSize = "", @@ -647,12 +398,13 @@ SA_df<-data.frame( SAreasonNotSampledFM = "", SAreasonNotSampledBV = "", SAtotalWeightMeasured = dataset[[target_var]]*100000000, # *100000000 to meet type required (integer) + SAtotalWeightMeasuredDataBasis = "Measured", SAsampleWeightMeasured = dataset[[target_var]]*100000000, # *100000000 to meet type required (integer) SAconversionFactorMeasLive = 1, SAauxiliaryVariableTotal = "", SAauxiliaryVariableValue = "", SAauxiliaryVariableName = "", - SAauxiliaryVariableUnit = "", + SAauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) @@ -706,15 +458,14 @@ RDBESlist[[i]][which(grepl(colnames(RDBESlist[[i]]),pat="[A-Z]id"))]<-NULL #===Save============ - dir_outputs<-paste0(base_dir_outputs,"/", - project_name_outputs,"/") - dir.create(dir_outputs, recursive=T, showWarnings=FALSE) - filename_output_CS <- paste0("H1.csv") - filename_output_SL <- paste0("HSL.csv") - filename_output_VD <- paste0("HVD.csv") + dir_outputs<-paste0(base_dir_outputs,"/", project_name_outputs,"/") + dir.create(dir_outputs, recursive=T, showWarnings=FALSE) + filename_output_CS <- paste0(dir_outputs,"H1.csv") + filename_output_SL <- paste0(dir_outputs,"HSL.csv") + filename_output_VD <- paste0(dir_outputs,"HVD.csv") -lapply(RDBESlist, function(x, filename1 = paste0(dir_outputs,filename_output_CS)){ +lapply(RDBESlist, function(x, filename1 = filename_output_CS){ if("DErecordType" %in% colnames(x)){ write.table(x, file = filename1, append = FALSE, quote = FALSE, sep = ",", eol = "\n", na = "NA", dec = ".", row.names = FALSE, @@ -726,7 +477,7 @@ write.table(x, file = filename1, append = TRUE, quote = FALSE, sep = ",", } }) -b<-read.table(file=paste0(dir_outputs,filename_output_CS), header=F, sep=";") +b<-read.table(file = filename_output_CS, header=F, sep=";") b<-cbind(key,b) b<-b[order(as.character(b$key), decreasing=FALSE),] b<-b[!is.na(key),] @@ -734,24 +485,30 @@ b$key<-NULL b$V1<-as.character(b$V1) # saves CS output -write.table(b$V1, file=paste0(dir_outputs,filename_output_CS), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +write.table(b$V1, file = filename_output_CS, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") -# -----Builds and saves dummySL----------------- +# -----Builds and saves dummySL and dummyIS----------------- SL_base$SLspeciesListName<-project_name_outputs SL_base$SLyear<-DEyear SL_base$SLinstitute<-SDinstitution -# saves SL output - write.table(SL_base, file=paste0(dir_outputs,filename_output_SL), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - + + # saves SL output + write.table(rbind(paste(SL_base, collapse=","), paste(IS_base, collapse=",")), file=filename_output_SL, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") # -----Builds and saves dummyVD----------------- - # saves VD output VD_base$VDyear<-DEyear - write.table(VD_base, file=paste0(dir_outputs,filename_output_VD), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + write.table(VD_base, file=filename_output_VD, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + + + + + +# check - is this needed? +if(1==2){ @@ -773,4 +530,5 @@ library(zip) zip::zip(zipfile = paste0(project_name_outputs,".zip"), files =paste0(dir_outputs,"tmp/",dir(paste0(dir_outputs,"tmp"))), mode="cherry-pick") unlink(paste0(dir_outputs,"tmp"), force=T, recursive = T) +} diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_coots_multistage_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_coots_multistage_H1.R index 4d33eb0d..533b09bb 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_coots_multistage_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_coots_multistage_H1.R @@ -48,13 +48,15 @@ DEstratumName <- "Pckg_SDAResources_coots_multistage_H1" project_name_outputs <- gsub(" ","_", paste0(DEsamplingScheme,"_", DEstratumName)) baseDir <- "./data-raw/exampleData/TextBookExamples/" - #baseDir <- "" + baseDir <- "" VD_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/VD_base.rds")) SL_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/SL_base.rds")) + IS_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/IS_base.rds")) #nameof the directory where the outputs are saved currently - base_dir_outputs <- paste0(baseDir,"BuiltUploads/") - if(!file.exists(base_dir_outputs)) dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) + + base_dir_outputs <- paste0(baseDir,"BuiltUploads") + dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) #========Outline of Hierarchy 1================ @@ -70,26 +72,10 @@ - - #====DE=========== +#===DE============ - # 1 DEid [] - int - # 2 DErecordType [M] - string - # 3 DEsamplingScheme [M] - SamplingScheme - # 4 DEsamplingSchemeType [M] - SamplingSchemeType - # 5 DEyear [M] - Year - # 6 DEstratumName [M] - StringLength100 - # 7 DEhierarchyCorrect [M] - YesNoFields - # 8 DEhierarchy [M] - RDBESUpperHierarchy - # 9 DEsampled [DV,M] - YesNoFields - # 10 DEreasonNotSampled [DV,O] - ReasonForNotSampling - # 11 DEnonResponseCollected [DV,O] - YesNoFields - # 12 DEauxiliaryVariableTotal [DV,O] - DecimalPrec3 - # 13 DEauxiliaryVariableValue [DV,O] - DecimalPrec3 - # 14 DEauxiliaryVariableName [DV,O] - AuxiliaryVariableName - # 15 DEauxiliaryVariableUnit[DV,O]-MUNIT DE_df_base<-expand.grid(DEyear=DEyear, DEstratumName="U",stringsAsFactors=F) @@ -111,628 +97,422 @@ DEauxiliaryVariableName = "", DEauxiliaryVariableUnit = "", stringsAsFactors=FALSE - ) - - #====SD=========== - - - # x - # 1 SDid [M] - int - # 2 DEid [M] - int - # 3 SDrecordType [M] - string - # 4 SDcountry [M] - ISO_3166 - # 5 SDinstitution [M] - EDMO + ) + +#===SD============ + + +SD_df<-data.frame( + SDid=DE_df$DEid, + DEid=DE_df$DEid, + SDrecordType="SD", + SDcountry="ZW", + SDinstitution=as.integer(SDinstitution), + stringsAsFactors=FALSE +) + +#===VS============ + + + + # The PSU will be Vessels and the SSU will be Trips + df_vessel <- df[!duplicated(df[c("clutch")]),] + # adds VSid to df + df_vessel$VSid <- 1:nrow(df_vessel) + + # creates a dummyVD and adds df + # restricts VD_base to what is needed + VD_base <- VD_base[1:nrow(df_vessel),] + + df_vessel$VSencryptedVesselCode<-VD_base$VDencryptedVesselCode + # should be 0 + test<-sum(duplicated(df_vessel$VSencryptedVesselCode))==0 + if(!test) stop( "duplicated VSencryptedVesselCode") + + +VS_df <- data.frame( + VSid = df_vessel$VSid, + SDid = as.integer(1), + VDid = "", + TEid = "", + VSrecordType = 'VS', + VSsequenceNumber = 1:nrow(df_vessel),# M + VSencryptedVesselCode = df_vessel$VSencryptedVesselCode, #M + VSstratification = "Y", + VSstratumName = "U", #M + VSclustering = "N", #M + VSclusterName = "U", #M + VSsampler = "Observer", #M + VSnumberTotal = "", + VSnumberSampled = nrow(df_vessel), + VSselectionProb = "", + VSinclusionProb = "", + VSselectionMethod = "SRSWOR", #M + VSunitName = paste0("clutch_",df_vessel$clutch),#M + VSselectionMethodCluster = "", + VSnumberTotalClusters = "", + VSnumberSampledClusters = "", + VSselectionProbCluster = "", + VSinclusionProbCluster = "", + VSsampled = "Y",#M + VSreasonNotSampled = "", + VSnonResponseCollected = "", + VSauxiliaryVariableTotal = "", + VSauxiliaryVariableValue = "", + VSauxiliaryVariableName = "", + VSauxiliaryVariableUnit = "", + stringsAsFactors=FALSE + ) + + + +#====FT=========== + + + + makeChildTbl <- function(parent, data, tbl="FT", by = "dname"){ + if(tbl !="FT"){stop("Not implemented!")} + mergeCols <- c(setdiff(colnames(parent), colnames(data)), by) + df <- merge(data, parent[mergeCols], by=by) + sc <- table(df[[by]]) + df$sampled_count <- as.vector(sc[df[[by]]]) + +FT_df <- data.frame( + FTid = 1:nrow(df),#[M] - int + OSid = "",# [M/O] - int + VSid = df$VSid, #[M/O] + VDid = "", #[M] - int + SDid = "", #[M/O] - int + FOid = "", #[M/O] - int + TEid = "", #[M/O] - int + FTrecordType='FT', #[M] - string + FTencryptedVesselCode = df$VSencryptedVesselCode, #[M] + FTsequenceNumber = as.integer(1:nrow(df)), #[M] - string + FTstratification = "N", #[DV,M] - RS_Stratification + FTstratumName = "U", #[DV,M] - string + FTclustering = "N", #[DV,M] - RS_Clustering + FTclusterName = "U", #[DV,M] - string + FTsampler = "Observer", #[M] - RS_Sampler + FTsamplingType = "AtSea" , #[M] - RS_SamplingType + FTnumberOfHaulsOrSets = 1, #[O] - int + FTdepartureLocation="ZWHWN", #[O] - Harbour_LOCODE + FTdepartureDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(df)), #[M/O] - date + FTdepartureTime="", #[O] - time + FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE + FTarrivalDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(df)), #[M] - date + FTarrivalTime="", #[O] - time + FTdominantLandingDate= "", + FTnumberTotal= as.vector(df$csize), #[DV,O] - int + FTnumberSampled=df$sampled_count, #[DV,O] - int + FTselectionProb="", #[DV,O] - DecimalPrec10 + FTinclusionProb="", #[DV,O] - DecimalPrec10 + FTselectionMethod="SRSWOR", #[DV,M] - RS_SelectionMethod + FTunitName = df$VSid, #[DV,M] - string + FTselectionMethodCluster="", #[DV,O] - RS_SelectionMethod + FTnumberTotalClusters="", #[DV,O] - int + FTnumberSampledClusters="", #[DV,O] - int + FTselectionProbCluster="", #[DV,O] - DecimalPrec10 + FTinclusionProbCluster="", #[DV,O] - DecimalPrec10 + FTsampled="Y", #[DV,M] - YesNoFields + FTreasonNotSampled= "", #[DV,O] - RS_ReasonForNotSampling, + FTnonResponseCollected = factor("N", levels = c('N', 'Y')), + FTauxiliaryVariableTotal = "", + FTauxiliaryVariableValue = "", + FTauxiliaryVariableName = "", + FTauxiliaryVariableUnit = "", + stringsAsFactors=FALSE +) + +if(any(FT_df$FTnumberTotal < FT_df$FTnumberSampled)){ +stop("Sampled is more than total") +} +FT_df +} +FT_df <- makeChildTbl(df_vessel, df, by="clutch") + +#====FO=========== + + +FO_df <- data.frame( + FOid = FT_df$FTid, + FTid = FT_df$FTid, + SDid = "", + FOrecordType = "FO",#M + FOstratification = "N",#M + FOsequenceNumber = as.integer(1:nrow(FT_df)),#M + FOstratumName = "U",#M + FOclustering = "N",#M + FOclusterName = "U",#M + FOsampler = "Observer", + FOaggregationLevel = "H",#M + FOvalidity = "V",#M + FOcatchReg = "Lan",#M + FOstartDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'), + FOstartTime = "", + FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M + FOendTime ="",#M + FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! + FOfishingDurationDataBasis = "Measured", + FOdurationSource = "Crew", + FOhandlingTime = "", + FOstartLat="", # ATT! + FOstartLon="", # ATT! + FOstopLat="", + FOstopLon="", + FOexclusiveEconomicZoneIndicator = "", # + FOarea = "27.3.a.21", #M + FOrectangle = "", + FOfisheriesManagementUnit = "", + FOgsaSubarea = "NotApplicable", #M + FOjurisdictionArea = "", + FOgeographicalDataBasis = "Measured", + FOgeographicalSource = "", + FOfishingDepth = "", + FOwaterDepth = "", + FOnationalFishingActivity = "", + FOmetier5 = "", + FOmetier6 = "OTT_CRU_70-89_2_35",#M + FOgear = "OTT",#M + FOgearDataBasis = "Measured", + FOgearSource = "", + FOmeshSize = "", + FOselectionDevice = "", + FOselectionDeviceMeshSize = "", + FOtargetSpecies = "", + FOincidentalByCatchMitigationDeviceFirst = "NotRecorded",#M + FOincidentalByCatchMitigationDeviceTargetFirst = "NotApplicable",# [M] + FOincidentalByCatchMitigationDeviceSecond = "NotRecorded",#M + FOincidentalByCatchMitigationDeviceTargetSecond = "NotApplicable",#M + FOgearDimensions = "", + FOobservationCode = 'So', #M + FOnumberTotal = 10, + FOnumberSampled = 10, + FOselectionProb = 1, + FOinclusionProb = 1, + FOselectionMethod = "CENSUS", #M + FOunitName = FT_df$FTid, #M + FOselectionMethodCluster = "", + FOnumberTotalClusters = "", + FOnumberSampledClusters = "", + FOselectionProbCluster = "", + FOinclusionProbCluster = "", + FOsampled = "Y", #M + FOreasonNotSampled = "", + FOnonResponseCollected = "N", + FOauxiliaryVariableTotal = "", + FOauxiliaryVariableValue = "", + FOauxiliaryVariableName = "", + FOauxiliaryVariableUnit = "", + stringsAsFactors=FALSE +) + + +#===SS============ + + +SS_df<-data.frame( + SSid = FO_df$FOid, + LEid = "", + FOid = FO_df$FOid, + FTid = "", + OSid = "", + TEid = "", + SLid = "", + SSrecordType = "SS", #M + SSsequenceNumber = FO_df$FOid, #M + SSstratification = "N", #M + SSstratumName = "U", #M + SSclustering = "N", #M + SSclusterName = "U", #M + SSobservationActivityType = "Sort", #M + SScatchFraction = "Lan", #M + SSobservationType = "Volume", #M + SSsampler = "Observer", #M + SSspeciesListName = project_name_outputs, #M + SSuseForCalculateZero = "N", #M + SStimeTotal = "", + SStimeTotalDataBasis = "Measured", + SStimeSampled = "", + SSnumberTotal = 1, + SSnumberTotalDataBasis = "Measured", + SSnumberSampled = 1, + SSselectionProb = 1, + SSinclusionProb = 1, + SSselectionMethod = "CENSUS", + SSunitName = as.character(FO_df$FOid), + SSselectionMethodCluster = "", + SSnumberTotalClusters = "", + SSnumberSampledClusters = "", + SSselectionProbCluster = "", + SSinclusionProbCluster = "", + SSsampled = "Y", #M, + SSreasonNotSampled = "", + SSnonResponseCollected = "N", + SSauxiliaryVariableTotal = "", + SSauxiliaryVariableValue = "", + SSauxiliaryVariableName = "", + SSauxiliaryVariableUnit = "", + stringsAsFactors=FALSE +) + +#====SA=========== + +SA_df<-data.frame( +SAid = SS_df$SSid, +SSid = SS_df$SSid, +SArecordType = "SA", #M +SAsequenceNumber = SS_df$SSid, #M +SAparentSequenceNumber = "", +SAstratification = "N", #M +SAstratumName = "U", #M +SAspeciesCode = 107254, #M +SAspeciesCodeFAO = "NEP", +SAstateOfProcessing = "FRE", #M +SApresentation = "WHL", #M +SAspecimensState = "DeadOrZeroProbSurvival", #M +SAcatchCategory = "Lan", #M +SAlandingCategory = "", +SAcommSizeCatScale = "", +SAcommSizeCat = "", +SAsex = "U", #M +SAexclusiveEconomicZoneIndicator = "", +SAarea = "", +SArectangle = "", +SAfisheriesManagementUnit = "", +SAgsaSubarea = "NotApplicable", #M +SAjurisdictionArea = "", +SAgeographicalDataBasis = "Measured", +SAgeographicalSource = "", +SAnationalFishingActivity = "", +SAmetier5 = "", +SAmetier6 = "", +SAgear = "", +SAgearDataBasis = "Measured", +SAgearSource = "", +SAmeshSize = "", +SAselectionDevice = "", +SAselectionDeviceMeshSize = "", +SAunitType = "Box", #M +SAtotalWeightLive = "", +SAsampleWeightLive = "", +SAnumberTotal = 1, +SAnumberSampled = 1, +SAselectionProb = 1, +SAinclusionProb = 1, +SAselectionMethod = "CENSUS", #M +SAunitName = SS_df$SSid, #M +SAlowerHierarchy = "D", +SAsampler = "Observer", +SAsampled = ifelse(!is.na(df[[target_var]]),"Y","N"), #M +SAreasonNotSampled = "", +SAnonResponseCollected = "Y", +SAreasonNotSampledFM = "", +SAreasonNotSampledBV = "", +SAtotalWeightMeasured = ifelse(!is.na(df[[target_var]]),df[[target_var]]*targetMultiplyer,""), # +SAtotalWeightMeasuredDataBasis = "Measured", +SAsampleWeightMeasured = ifelse(!is.na(df[[target_var]]),df[[target_var]]*targetMultiplyer,""), # +SAconversionFactorMeasLive = 1, +SAauxiliaryVariableTotal = "", +SAauxiliaryVariableValue = "", +SAauxiliaryVariableName = "", +SAauxiliaryVariableUnit = "", +stringsAsFactors=FALSE +) + + + +#====Builds final format=========== + + +RDBESlist = list(DE = DE_df,SD = SD_df, VS = VS_df, FT = FT_df, FO = FO_df, SS = SS_df, SA = SA_df) + +#id table +a<-merge(DE_df["DEid"],SD_df[c("DEid","SDid")]) +a<-merge(a, VS_df[c("SDid","VSid")], all.x=T) +a<-merge(a, FT_df[c("VSid","FTid")], all.x=T) +a<-merge(a, FO_df[c("FTid","FOid")], all.x=T) +a<-merge(a, SS_df[c("FOid","SSid")], all.x=T) +a<-merge(a, SA_df[c("SSid","SAid")], all.x=T) + +# reorder columns +a<-a[c("DEid","SDid","VSid","FTid","FOid","SSid","SAid")] +# reorder rows +a<-data.table(a) +a<-a[order(DEid,SDid,VSid,FTid,FOid,SSid,SAid),] + +a$DEindex=apply(a[,1:which(colnames(a)=="DEid")],1,paste, collapse="_") +a$SDindex=apply(a[,1:which(colnames(a)=="SDid")],1,paste, collapse="_") +a$VSindex=apply(a[,1:which(colnames(a)=="VSid")],1,paste, collapse="_") +a$FTindex=apply(a[,1:which(colnames(a)=="FTid")],1,paste, collapse="_") +a$FOindex=apply(a[,1:which(colnames(a)=="FOid")],1,paste, collapse="_") +a$SSindex=apply(a[,1:which(colnames(a)=="SSid")],1,paste, collapse="_") +a$SAindex=apply(a[,1:which(colnames(a)=="SAid")],1,paste, collapse="_") + +key<-c(a$DEindex[match(DE_df$DEid,a$DEid)], +a$SDindex[match(SD_df$SDid,a$SDid)], +a$VSindex[match(VS_df$VSid,a$VSid)], +a$FTindex[match(FT_df$FTid,a$FTid)], +a$FOindex[match(FO_df$FOid,a$FOid)], +a$SSindex[match(SS_df$SSid,a$SSid)], +a$SAindex[match(SA_df$SAid,a$SAid)] +) + +# file production +Oldscipen<-.Options$scipen +options(scipen=500) + +#remove all id +for (i in names(RDBESlist)) +{ +RDBESlist[[i]][which(grepl(colnames(RDBESlist[[i]]),pat="[A-Z]id"))]<-NULL +} + +#===Save============ + + dir_outputs<-paste0(base_dir_outputs,"/", project_name_outputs,"/") + dir.create(dir_outputs, recursive=T, showWarnings=FALSE) + filename_output_CS <- paste0(dir_outputs,"H1.csv") + filename_output_SL <- paste0(dir_outputs,"HSL.csv") + filename_output_VD <- paste0(dir_outputs,"HVD.csv") + + +lapply(RDBESlist, function(x, filename1 = filename_output_CS){ +if("DErecordType" %in% colnames(x)){ + write.table(x, file = filename1, append = FALSE, quote = FALSE, sep = ",", + eol = "\n", na = "NA", dec = ".", row.names = FALSE, + col.names = FALSE, qmethod = c("escape", "double")) + } else { +write.table(x, file = filename1, append = TRUE, quote = FALSE, sep = ",", + eol = "\n", na = "NA", dec = ".", row.names = FALSE, + col.names = FALSE, qmethod = c("escape", "double")) + } +}) + +b<-read.table(file = filename_output_CS, header=F, sep=";") +b<-cbind(key,b) +b<-b[order(as.character(b$key), decreasing=FALSE),] +b<-b[!is.na(key),] +b$key<-NULL +b$V1<-as.character(b$V1) + +# saves CS output +write.table(b$V1, file = filename_output_CS, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + +# -----Builds and saves dummySL and dummyIS----------------- + + + SL_base$SLspeciesListName<-project_name_outputs + SL_base$SLyear<-DEyear + SL_base$SLinstitute<-SDinstitution + + # saves SL output + write.table(rbind(paste(SL_base, collapse=","), paste(IS_base, collapse=",")), file=filename_output_SL, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + +# -----Builds and saves dummyVD----------------- + +# saves VD output + VD_base$VDyear<-DEyear + write.table(VD_base, file=filename_output_VD, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - SD_df<-data.frame( - SDid=DE_df$DEid, - DEid=DE_df$DEid, - SDrecordType="SD", - SDcountry="ZW", - SDinstitution=as.integer(SDinstitution), - stringsAsFactors=FALSE - ) - - #===VS============ - - # x - # 1 VSid [M] - int - # 2 SDid [M] - int - # 3 VDid [M] - int - # 4 TEid [M/O] - int - # 5 VSrecordType [M] - string - # 6 VSsequenceNumber [M] - int - # 7 VSencryptedVesselCode [M] - StringLength100 - # 8 VSstratification [DV,M] - YesNoFields - # 9 VSstratumName [DV,M] - StringLength100 - # 10 VSclustering [DV,M] - Clustering - # 11 VSclusterName [DV,M] - StringLength100 - # 12 VSsampler [O] - Sampler - # 13 VSnumberTotal [DV,O] - int - # 14 VSnumberSampled [DV,O] - int - # 15 VSselectionProb [DV,O] - Decimal0.0000000000000001-1 - # 16 VSinclusionProb [DV,O] - Decimal0.0000000000000001-1 - # 17 VSselectionMethod [DV,M] - SelectionMethod - # 18 VSunitName [DV,M] - StringLength100 - # 19 VSselectionMethodCluster [DV,O] - SelectionMethod - # 20 VSnumberTotalClusters [DV,O] - int - # 21 VSnumberSampledClusters [DV,O] - int - # 22 VSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 - # 23 VSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 - # 24 VSsampled [DV,M] - YesNoFields - # 25 VSreasonNotSampled [DV,O] - ReasonForNotSampling - - - # The PSU will be Vessels and the SSU will be Trips - df_vessel <- df[!duplicated(df[c("clutch")]),] - # adds VSid to df - df_vessel$VSid <- 1:nrow(df_vessel) - - # creates a dummyVD and adds df - # restricts VD_base to what is needed - VD_base <- VD_base[1:nrow(df_vessel),] - - df_vessel$VSencryptedVesselCode<-VD_base$VDencryptedVesselCode - # should be 0 - test<-sum(duplicated(df_vessel$VSencryptedVesselCode))==0 - if(!test) stop( "duplicated VSencryptedVesselCode") - - - VS_df <- data.frame( - VSid = df_vessel$VSid, - SDid = as.integer(1), - VDid = "", - TEid = "", - VSrecordType = 'VS', - VSsequenceNumber = 1:nrow(df_vessel),# M - VSencryptedVesselCode = df_vessel$VSencryptedVesselCode, #M - VSstratification = "Y", - VSstratumName = "U", #M - VSclustering = "N", #M - VSclusterName = "U", #M - VSsampler = "Observer", #M - VSnumberTotal = "", - VSnumberSampled = nrow(df_vessel), - VSselectionProb = "", - VSinclusionProb = "", - VSselectionMethod = "SRSWOR", #M - VSunitName = paste0("clutch_",df_vessel$clutch),#M - VSselectionMethodCluster = "", - VSnumberTotalClusters = "", - VSnumberSampledClusters = "", - VSselectionProbCluster = "", - VSinclusionProbCluster = "", - VSsampled = "Y",#M - VSreasonNotSampled = "", - VSnonResponseCollected = "", - VSauxiliaryVariableTotal = "", - VSauxiliaryVariableValue = "", - VSauxiliaryVariableName = "", - VSauxiliaryVariableUnit = "", - stringsAsFactors=FALSE - ) - - - #===FT============ - - - # 1 FTid [M] - int - # 2 OSid [M/O] - int - # 3 VSid [M/O] - int - # 4 VDid [M] - int - # 5 SDid [M/O] - int - # 6 FOid [M/O] - int - # 7 TEid [M/O] - int - # 8 FTrecordType [M] - string - # 9 FTencryptedVesselCode [M] - StringLength100 - # 10 FTsequenceNumber [M] - StringLength100 - # 11 FTstratification [DV,M] - YesNoFields - # 12 FTstratumName [DV,M] - StringLength100 - # 13 FTclustering [DV,M] - Clustering - # 14 FTclusterName [DV,M] - StringLength100 - # 15 FTsampler [O] - Sampler - # 16 FTsamplingType [M] - SamplingContext - # 17 FTnumberOfHaulsOrSets [M/O] - IntZeroToMax - # 18 FTdepartureLocation [M/O] - Harbour_LOCODE - # 19 FTdepartureDate [M/O] - Date - # 20 FTdepartureTime [M/O] - Time - # 21 FTarrivalLocation [M] - Harbour_LOCODE - # 22 FTarrivalDate [M] - Date - # 23 FTarrivalTime [M/O] - Time - # 24 FTnumberTotal [DV,O] - int - # 25 FTnumberSampled [DV,O] - int - # 26 FTselectionProb [DV,O] - Decimal0.0000000000000001-1 - # 27 FTinclusionProb [DV,O] - Decimal0.0000000000000001-1 - # 28 FTselectionMethod [DV,M] - SelectionMethod - # 29 FTunitName [DV,M] - StringLength100 - # 30 FTselectionMethodCluster [DV,O] - SelectionMethod - # 31 FTnumberTotalClusters [DV,O] - int - # 32 FTnumberSampledClusters [DV,O] - int - # 33 FTselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 - # 34 FTinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 - # 35 FTsampled [DV,M] - YesNoFields - # 36 FTreasonNotSampled [DV,O] - ReasonForNotSampling - - makeChildTbl <- function(parent, data, tbl="FT", by = "dname"){ - - if(tbl !="FT"){stop("Not implemented!")} - mergeCols <- c(setdiff(colnames(parent), colnames(data)), by) - df <- merge(data, parent[mergeCols], by=by) - sc <- table(df[[by]]) - df$sampled_count <- as.vector(sc[df[[by]]]) - FT_df <- data.frame( - FTid = 1:nrow(df),#[M] - int - OSid = "",# [M/O] - int - VSid = df$VSid, #[M/O] - VDid = "", #[M] - int - SDid = "", #[M/O] - int - FOid = "", #[M/O] - int - TEid = "", #[M/O] - int - FTrecordType='FT', #[M] - string - FTencryptedVesselCode = df$VSencryptedVesselCode, #[M] - FTsequenceNumber = as.integer(1:nrow(df)), #[M] - string - FTstratification = "N", #[DV,M] - RS_Stratification - FTstratumName = "U", #[DV,M] - string - FTclustering = "N", #[DV,M] - RS_Clustering - FTclusterName = "U", #[DV,M] - string - FTsampler = "Observer", #[M] - RS_Sampler - FTsamplingType = "AtSea" , #[M] - RS_SamplingType - FTnumberOfHaulsOrSets = 1, #[O] - int - FTdepartureLocation="ZWHWN", #[O] - Harbour_LOCODE - FTdepartureDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(df)), #[M/O] - date - FTdepartureTime="", #[O] - time - FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE - FTarrivalDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(df)), #[M] - date - FTarrivalTime="", #[O] - time - FTnumberTotal= as.vector(df$csize), #[DV,O] - int - FTnumberSampled=df$sampled_count, #[DV,O] - int - FTselectionProb="", #[DV,O] - DecimalPrec10 - FTinclusionProb="", #[DV,O] - DecimalPrec10 - FTselectionMethod="SRSWOR", #[DV,M] - RS_SelectionMethod - FTunitName = df$VSid, #[DV,M] - string - FTselectionMethodCluster="", #[DV,O] - RS_SelectionMethod - FTnumberTotalClusters="", #[DV,O] - int - FTnumberSampledClusters="", #[DV,O] - int - FTselectionProbCluster="", #[DV,O] - DecimalPrec10 - FTinclusionProbCluster="", #[DV,O] - DecimalPrec10 - FTsampled="Y", #[DV,M] - YesNoFields - FTreasonNotSampled= "", #[DV,O] - RS_ReasonForNotSampling, - FTnonResponseCollected = factor("N", levels = c('N', 'Y')), - FTauxiliaryVariableTotal = "", - FTauxiliaryVariableValue = "", - FTauxiliaryVariableName = "", - FTauxiliaryVariableUnit = "", - stringsAsFactors=FALSE) - if(any(FT_df$FTnumberTotal < FT_df$FTnumberSampled)){ - stop("Sampled is more than total") - } - FT_df - } - - - FT_df <- makeChildTbl(df_vessel, df, by="clutch") - - - #====FO=========== - - - # 1 FOid [M] - int - # 2 FTid [M/O] - int - # 3 SDid [M/O] - int - # 4 FOrecordType [M] - string - # 5 FOstratification [DV,M] - YesNoFields - # 6 FOsequenceNumber [M] - int - # 7 FOstratumName [DV,M] - StringLength100 - # 8 FOclustering [DV,M] - Clustering - # 9 FOclusterName [DV,M] - StringLength100 - # 10 FOsampler [O] - Sampler - # 11 FOaggregationLevel [M] - AggregationLevel - # 12 FOvalidity [M] - ValidityFlag - # 13 FOcatchReg [M] - CatchRegistration - # 14 FOstartDate [O] - Date - # 15 FOstartTime [O] - Time - # 16 FOendDate [M] - Date - # 17 FOendTime [M/O] - Time - # 18 FOduration [M/O] - int - # 19 FOdurationSource [M] - DurationSource - # 20 FOhandlingTime [O] - int - # 21 FOstartLat [O] - Decimal-90.000000-90.000000 - # 22 FOstartLon [O] - Decimal-180.000000-180.000000 - # 23 FOstopLat [O] - Decimal-90.000000-90.000000 - # 24 FOstopLon [O] - Decimal-180.000000-180.000000 - # 25 FOexclusiveEconomicZoneIndicator [O] - ISO_3166 - # 26 FOarea [M] - ICES_Area - # 27 FOrectangle [O] - StatRec - # 28 FOgsaSubarea [M] - Areas_GFCM_GSA - # 29 FOjurisdictionArea [O] - JurisdictionArea - # 30 FOfishingDepth [O] - int - # 31 FOwaterDepth [O] - int - # 32 FOnationalFishingActivity [O] - NationalFishingActivity - # 33 FOmetier5 [O] - Metier5_FishingActivity - # 34 FOmetier6 [M] - Metier6_FishingActivity - # 35 FOgear [M] - GearType - # 36 FOmeshSize [O] - int - # 37 FOselectionDevice [O] - SelectionDevice - # 38 FOselectionDeviceMeshSize [O] - int - # 39 FOtargetSpecies [O] - TargetSpecies - # 40 FOincidentalByCatchMitigationDeviceFirst [M] - BycatchMitigationDevice - # 41 FOincidentalByCatchMitigationDeviceTargetFirst [M] - BycatchMitigationDeviceTarget - # 42 FOincidentalByCatchMitigationDeviceSecond [M] - BycatchMitigationDevice - # 43 FOincidentalByCatchMitigationDeviceTargetSecond [M] - BycatchMitigationDeviceTarget - # 44 FOgearDimensions [O] - int - # 45 FOobservationCode [M] - ObservationCode - # 46 FOnumberTotal [DV,O] - int - # 47 FOnumberSampled [DV,O] - int - # 48 FOselectionProb [DV,O] - Decimal0.0000000000000001-1 - # 49 FOinclusionProb [DV,O] - Decimal0.0000000000000001-1 - # 50 FOselectionMethod [DV,M] - SelectionMethod - # 51 FOunitName [DV,M] - StringLength100 - # 52 FOselectionMethodCluster [DV,O] - SelectionMethod - # 53 FOnumberTotalClusters [DV,O] - int - # 54 FOnumberSampledClusters [DV,O] - int - # 55 FOselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 - # 56 FOinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 - # 57 FOsampled [DV,M] - YesNoFields - # 58 FOreasonNotSampled [DV,O] - ReasonForNotSampling - - - FO_df <- data.frame( - FOid = FT_df$FTid, - FTid = FT_df$FTid, - SDid = "", - FOrecordType = "FO",#M - FOstratification = "N",#M - FOsequenceNumber = as.integer(1:nrow(FT_df)),#M - FOstratumName = "U",#M - FOclustering = "N",#M - FOclusterName = "U",#M - FOsampler = "Observer", - FOaggregationLevel = "H",#M - FOvalidity = "V",#M - FOcatchReg = "Lan",#M - FOstartDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'), - FOstartTime = "", - FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M - FOendTime ="",#M - FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! - FOdurationSource = "Crew", - FOhandlingTime = "", - FOstartLat="", # ATT! - FOstartLon="", # ATT! - FOstopLat="", - FOstopLon="", - FOexclusiveEconomicZoneIndicator = "", # - FOarea = "27.3.a.21", #M - FOrectangle = "", - FOfisheriesManagementUnit = "", - FOgsaSubarea = "NotApplicable", #M - FOjurisdictionArea = "", - FOfishingDepth = "", - FOwaterDepth = "", - FOnationalFishingActivity = "", - FOmetier5 = "", - FOmetier6 = "OTT_CRU_70-89_2_35",#M - FOgear = "OTT",#M - FOmeshSize = "", - FOselectionDevice = "", - FOselectionDeviceMeshSize = "", - FOtargetSpecies = "", - FOincidentalByCatchMitigationDeviceFirst = "NotRecorded",#M - FOincidentalByCatchMitigationDeviceTargetFirst = "NotApplicable",# [M] - FOincidentalByCatchMitigationDeviceSecond = "NotRecorded",#M - FOincidentalByCatchMitigationDeviceTargetSecond = "NotApplicable",#M - FOgearDimensions = "", - FOobservationCode = 'So', #M - FOnumberTotal = 10, - FOnumberSampled = 10, - FOselectionProb = 1, - FOinclusionProb = 1, - FOselectionMethod = "CENSUS", #M - FOunitName = FT_df$FTid, #M - FOselectionMethodCluster = "", - FOnumberTotalClusters = "", - FOnumberSampledClusters = "", - FOselectionProbCluster = "", - FOinclusionProbCluster = "", - FOsampled = "Y", #M - FOreasonNotSampled = "", - FOnonResponseCollected = factor("N", levels = c('N', 'Y')), - FOfisheriesManagementUnit = "", - FOauxiliaryVariableTotal = "", - FOauxiliaryVariableValue = "", - FOauxiliaryVariableName = "", - FOauxiliaryVariableUnit = "", - stringsAsFactors=FALSE - ) - - - - # 1 SSid [] - int - # 2 LEid [M/O] - int - # 3 FOid [M/O] - int - # 4 FTid [M/O] - int - # 5 OSid [M/O] - int - # 6 TEid [M/O] - int - # 7 SLid [M] - int - # 8 SSrecordType [M] - string - # 9 SSsequenceNumber [M] - int - # 10 SSstratification [DV,M] - YesNoFields - # 11 SSstratumName [DV,M] - StringLength100 - # 12 SSclustering [DV,M] - Clustering - # 13 SSclusterName [DV,M] - StringLength100 - # 14 SSobservationActivityType [DV,M] - ObservationActivityType - # 15 SScatchFraction [M] - CatchFraction - # 16 SSobservationType [DV,M] - ObservationMethod - # 17 SSsampler [O] - Sampler - # 18 SSspeciesListName [M] - StringLength100 - # 19 SSuseForCalculateZero [M] - YesNoFields - # 20 SStimeTotal [O] - int - # 21 SStimeSampled [O] - int - # 22 SSnumberTotal [DV,O] - int - # 23 SSnumberSampled [DV,O] - int - # 24 SSselectionProb [DV,O] - Decimal0.0000000000000001-1 - # 25 SSinclusionProb [DV,O] - Decimal0.0000000000000001-1 - # 26 SSselectionMethod [DV,M] - SelectionMethod - # 27 SSunitName [DV,M] - StringLength100 - # 28 SSselectionMethodCluster [DV,O] - SelectionMethod - # 29 SSnumberTotalClusters [DV,O] - int - # 30 SSnumberSampledClusters [DV,O] - int - # 31 SSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 - # 32 SSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 - # 33 SSsampled [DV,M] - YesNoFields - # 34 SSreasonNotSampled [DV,O] - ReasonForNotSampling - - #====SS=========== - - SS_df<-data.frame( - SSid = FO_df$FOid, - LEid = "", - FOid = FO_df$FOid, - FTid = "", - OSid = "", - TEid = "", - SLid = "", - SSrecordType = "SS", #M - SSsequenceNumber = FO_df$FOid, #M - SSstratification = "N", #M - SSstratumName = "U", #M - SSclustering = "N", #M - SSclusterName = "U", #M - SSobservationActivityType = "Sort", #M - SScatchFraction = "Lan", #M - SSobservationType = "Volume", #M - SSsampler = "Observer", #M - SSspeciesListName = project_name_outputs, #M - SSuseForCalculateZero = "N", #M - SStimeTotal = "", - SStimeSampled = "", - SSnumberTotal = 1, - SSnumberSampled = 1, - SSselectionProb = 1, - SSinclusionProb = 1, - SSselectionMethod = "CENSUS", - SSunitName = as.character(FO_df$FOid), - SSselectionMethodCluster = "", - SSnumberTotalClusters = "", - SSnumberSampledClusters = "", - SSselectionProbCluster = "", - SSinclusionProbCluster = "", - SSsampled = "Y", #M, - SSreasonNotSampled = "", - SSnonResponseCollected = "N", - SSauxiliaryVariableTotal = "", - SSauxiliaryVariableValue = "", - SSauxiliaryVariableName = "", - SSauxiliaryVariableUnit = "", - stringsAsFactors = F - ) - - #====SA=========== - - - # 1 SAid [] - int - # 2 SSid [] - int - # 3 SArecordType [M] - string - # 4 SAsequenceNumber [M] - int - # 5 SAparentSequenceNumber [O] - int - # 6 SAstratification [DV,M] - YesNoFields - # 7 SAstratumName [DV,M] - StringLength100 - # 8 SAspeciesCode [M] - SpecWoRMS - # 9 SAspeciesCodeFAO [O] - SpecASFIS - # 10 SAstateOfProcessing [M] - StateOfProcessing - # 11 SApresentation [M] - ProductPresentation - # 12 SAspecimensState [M] - SpecimensState - # 13 SAcatchCategory [M] - CatchCategory - # 14 SAlandingCategory [O] - LandingCategory - # 15 SAcommSizeCatScale [O] - CommercialSizeCategoryScale - # 16 SAcommSizeCat [O] - CommercialSizeCategory - # 17 SAsex [M] - SEXCO - # 18 SAexclusiveEconomicZoneIndicator [O] - ISO_3166 - # 19 SAarea [O] - ICES_Area - # 20 SArectangle [O] - StatRec - # 21 SAgsaSubarea [M] - Areas_GFCM_GSA - # 22 SAjurisdictionArea [O] - JurisdictionArea - # 23 SAnationalFishingActivity [O] - NationalFishingActivity - # 24 SAmetier5 [O] - Metier5_FishingActivity - # 25 SAmetier6 [O] - Metier6_FishingActivity - # 26 SAgear [O] - GearType - # 27 SAmeshSize [O] - int - # 28 SAselectionDevice [O] - SelectionDevice - # 29 SAselectionDeviceMeshSize [O] - int - # 30 SAunitType [M] - SamplingUnit - # 31 SAtotalWeightLive [O] - int - # 32 SAsampleWeightLive [O] - int - # 33 SAnumberTotal [DV,O] - Decimal0.1-999999999 - # 34 SAnumberSampled [DV,O] - Decimal0.1-999999999 - # 35 SAselectionProb [DV,O] - Decimal0.0000000000000001-1 - # 36 SAinclusionProb [DV,O] - Decimal0.0000000000000001-1 - # 37 SAselectionMethod [DV,M] - SelectionMethod - # 38 SAunitName [DV,M] - StringLength100 - # 39 SAlowerHierarchy [M/O] - LowerHierarchy - # 40 SAsampler [O] - Sampler - # 41 SAsampled [DV,M] - YesNoFields - # 42 SAreasonNotSampledFM [DV,O] - ReasonForNotSampling - # 43 SAreasonNotSampledBV [DV,O] - ReasonForNotSampling - # 44 SAtotalWeightMeasured [O] - int - # 45 SAsampleWeightMeasured [O] - int - # 46 SAconversionFactorMeasLive [O] - Decimal0.900-10 - - SA_df<-data.frame( - SAid = SS_df$SSid, - SSid = SS_df$SSid, - SArecordType = "SA", #M - SAsequenceNumber = SS_df$SSid, #M - SAparentSequenceNumber = "", - SAstratification = "N", #M - SAstratumName = "U", #M - SAspeciesCode = 107254, #M - SAspeciesCodeFAO = "NEP", - SAstateOfProcessing = "FRE", #M - SApresentation = "WHL", #M - SAspecimensState = "DeadOrZeroProbSurvival", #M - SAcatchCategory = "Lan", #M - SAlandingCategory = "", - SAcommSizeCatScale = "", - SAcommSizeCat = "", - SAsex = "U", #M - SAexclusiveEconomicZoneIndicator = "", - SAarea = "", - SArectangle = "", - SAfisheriesManagementUnit = "", - SAgsaSubarea = "NotApplicable", #M - SAjurisdictionArea = "", - SAnationalFishingActivity = "", - SAmetier5 = "", - SAmetier6 = "", - SAgear = "", - SAmeshSize = "", - SAselectionDevice = "", - SAselectionDeviceMeshSize = "", - SAunitType = "Box", #M - SAtotalWeightLive = "", - SAsampleWeightLive = "", - SAnumberTotal = 1, - SAnumberSampled = 1, - SAselectionProb = 1, - SAinclusionProb = 1, - SAselectionMethod = "CENSUS", #M - SAunitName = SS_df$SSid, #M - SAlowerHierarchy = "D", - SAsampler = "Observer", - SAsampled = ifelse(!is.na(df[[target_var]]),"Y","N"), #M - SAreasonNotSampled = "", - SAnonResponseCollected = "Y", - SAreasonNotSampledFM = "", - SAreasonNotSampledBV = "", - SAtotalWeightMeasured = ifelse(!is.na(df[[target_var]]),df[[target_var]]*targetMultiplyer,""), # - SAsampleWeightMeasured = ifelse(!is.na(df[[target_var]]),df[[target_var]]*targetMultiplyer,""), # - SAconversionFactorMeasLive = 1, - SAauxiliaryVariableTotal = "", - SAauxiliaryVariableValue = "", - SAauxiliaryVariableName = "", - SAauxiliaryVariableUnit = "", - stringsAsFactors=FALSE - ) - - - - #====Builds final format=========== - - RDBESlist <- list(DE = DE_df, - SD = SD_df, - VS = VS_df, - FT = FT_df, - FO = FO_df, - SS = SS_df, - SA = SA_df) - - #id table - a<-merge(DE_df["DEid"],SD_df[c("DEid","SDid")]) - a<-merge(a, VS_df[c("SDid","VSid")], all.x=T) - a<-merge(a, FT_df[c("VSid","FTid")], all.x=T) - a<-merge(a, FO_df[c("FTid","FOid")], all.x=T) - a<-merge(a, SS_df[c("FOid","SSid")], all.x=T) - a<-merge(a, SA_df[c("SSid","SAid")], all.x=T) - - # reorder columns - a<-a[c("DEid","SDid","VSid","FTid","FOid","SSid","SAid")] - # reorder rows - a<-data.table(a) - a<-a[order(DEid,SDid,VSid,FTid,FOid,SSid,SAid),] - - a$DEindex=apply(a[,1:which(colnames(a)=="DEid")],1,paste, collapse="_") - a$SDindex=apply(a[,1:which(colnames(a)=="SDid")],1,paste, collapse="_") - a$VSindex=apply(a[,1:which(colnames(a)=="VSid")],1,paste, collapse="_") - a$FTindex=apply(a[,1:which(colnames(a)=="FTid")],1,paste, collapse="_") - a$FOindex=apply(a[,1:which(colnames(a)=="FOid")],1,paste, collapse="_") - a$SSindex=apply(a[,1:which(colnames(a)=="SSid")],1,paste, collapse="_") - a$SAindex=apply(a[,1:which(colnames(a)=="SAid")],1,paste, collapse="_") - - key<-c(a$DEindex[match(DE_df$DEid,a$DEid)], - a$SDindex[match(SD_df$SDid,a$SDid)], - a$VSindex[match(VS_df$VSid,a$VSid)], - a$FTindex[match(FT_df$FTid,a$FTid)], - a$FOindex[match(FO_df$FOid,a$FOid)], - a$SSindex[match(SS_df$SSid,a$SSid)], - a$SAindex[match(SA_df$SAid,a$SAid)] - ) - - # file production - Oldscipen<-.Options$scipen - options(scipen=500) - - #remove all id - for (i in names(RDBESlist)) - { - RDBESlist[[i]][which(grepl(colnames(RDBESlist[[i]]),pat="[A-Z]id"))]<-NULL - } - - #===Save============= - - dir_outputs<-paste0(base_dir_outputs, - project_name_outputs,"/") - dir.create(dir_outputs, recursive=T, showWarnings=FALSE) - filename_output_CS <- paste0("H1.csv") - filename_output_SL <- paste0("HSL.csv") - filename_output_VD <- paste0("HVD.csv") - - - - lapply(RDBESlist, function(x, filename1 = paste0(dir_outputs,filename_output_CS)){ - if("DErecordType" %in% colnames(x)){ - write.table(x, file = filename1, append = FALSE, quote = FALSE, sep = ",", - eol = "\n", na = "NA", dec = ".", row.names = FALSE, - col.names = FALSE, qmethod = c("escape", "double")) - } else { - write.table(x, file = filename1, append = TRUE, quote = FALSE, sep = ",", - eol = "\n", na = "NA", dec = ".", row.names = FALSE, - col.names = FALSE, qmethod = c("escape", "double")) - } - }) - - b<-read.table(file=paste0(dir_outputs,filename_output_CS), header=F, sep=";") - b<-cbind(key,b) - b<-b[order(as.character(b$key), decreasing=FALSE),] - b<-b[!is.na(key),] - b$key<-NULL - b$V1<-as.character(b$V1) - - # saves CS output - write.table(b$V1, file=paste0(dir_outputs,filename_output_CS), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - - #---- Builds and saves dummySL ------------ - - SL_base$SLspeciesListName<-project_name_outputs - SL_base$SLyear<-DEyear - SL_base$SLinstitute<-SDinstitution - # saves SL output - write.table(SL_base, file=paste0(dir_outputs,filename_output_SL), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - - - #-------Builds and saves dummyVD--------------- - - - # saves VD output - - write.table(VD_base, file=paste0(dir_outputs,filename_output_VD), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_gpa_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_gpa_H1.R index 417116c2..231036eb 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_gpa_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_gpa_H1.R @@ -18,7 +18,6 @@ # in SA each gpa score is a SAsampleWeightMeasured # ATT gpa scores are *100 to meet type requirement (integer) - rm(list=ls()) library(data.table) @@ -41,10 +40,12 @@ baseDir <- "" VD_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/VD_base.rds")) SL_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/SL_base.rds")) + IS_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/IS_base.rds")) #nameof the directory where the outputs are saved currently + base_dir_outputs <- paste0(baseDir,"BuiltUploads") - if(!file.exists(base_dir_outputs)) dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) + dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) #========Outline of Hierarchy 1================ @@ -64,21 +65,6 @@ -# 1 DEid [] - int -# 2 DErecordType [M] - string -# 3 DEsamplingScheme [M] - SamplingScheme -# 4 DEsamplingSchemeType [M] - SamplingSchemeType -# 5 DEyear [M] - Year -# 6 DEstratumName [M] - StringLength100 -# 7 DEhierarchyCorrect [M] - YesNoFields -# 8 DEhierarchy [M] - RDBESUpperHierarchy -# 9 DEsampled [DV,M] - YesNoFields -# 10 DEreasonNotSampled [DV,O] - ReasonForNotSampling -# 11 DEnonResponseCollected [DV,O] - YesNoFields -# 12 DEauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 13 DEauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 14 DEauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 15 DEauxiliaryVariableUnit[DV,O]-MUNIT DE_df_base<-expand.grid(DEyear=DEyear, DEstratumName="U",stringsAsFactors=F) @@ -98,20 +84,13 @@ DE_df<-data.frame( DEauxiliaryVariableTotal = "", DEauxiliaryVariableValue = "", DEauxiliaryVariableName = "", - DEauxiliaryVariableUnit = "", + DEauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) #===SD============ - # x -# 1 SDid [M] - int -# 2 DEid [M] - int -# 3 SDrecordType [M] - string -# 4 SDcountry [M] - ISO_3166 -# 5 SDinstitution [M] - EDMO - SD_df<-data.frame( SDid=DE_df$DEid, DEid=DE_df$DEid, @@ -124,37 +103,6 @@ SD_df<-data.frame( #===VS============ - # x -# 1 VSid [M] - int -# 2 SDid [M] - int -# 3 VDid [M] - int -# 4 TEid [M/O] - int -# 5 VSrecordType [M] - string -# 6 VSsequenceNumber [M] - int -# 7 VSencryptedVesselCode [M] - StringLength100 -# 8 VSstratification [DV,M] - YesNoFields -# 9 VSstratumName [DV,M] - StringLength100 -# 10 VSclustering [DV,M] - Clustering -# 11 VSclusterName [DV,M] - StringLength100 -# 12 VSsampler [O] - Sampler -# 13 VSnumberTotal [DV,O] - int -# 14 VSnumberSampled [DV,O] - int -# 15 VSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 16 VSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 17 VSselectionMethod [DV,M] - SelectionMethod -# 18 VSunitName [DV,M] - StringLength100 -# 19 VSselectionMethodCluster [DV,O] - SelectionMethod -# 20 VSnumberTotalClusters [DV,O] - int -# 21 VSnumberSampledClusters [DV,O] - int -# 22 VSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 23 VSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 24 VSsampled [DV,M] - YesNoFields -# 25 VSreasonNotSampled [DV,O] - ReasonForNotSampling -# 26 VSnonResponseCollected [DV,O] - YesNoFields -# 27 VSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 28 VSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 29 VSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 30 VSauxiliaryVariableUnit [DV,O] - MUNIT #check_All_fields("VS") @@ -191,7 +139,7 @@ VS_df <- data.frame( VSselectionMethod = "SRSWOR", #M VSunitName = dataset$VSid,#M VSselectionMethodCluster = "SRSWOR", - VSnumberTotalClusters = "", + VSnumberTotalClusters = "", VSnumberSampledClusters = "", VSselectionProbCluster = "", VSinclusionProbCluster = "", @@ -201,7 +149,7 @@ VS_df <- data.frame( VSauxiliaryVariableTotal = "", VSauxiliaryVariableValue = "", VSauxiliaryVariableName = "", - VSauxiliaryVariableUnit = "", + VSauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) @@ -223,49 +171,6 @@ VS_df$VSinclusionProbCluster<-5/100 #====FT=========== - -# 1 FTid [M] - int -# 2 OSid [M/O] - int -# 3 VSid [M/O] - int -# 4 VDid [M] - int -# 5 SDid [M/O] - int -# 6 FOid [M/O] - int -# 7 TEid [M/O] - int -# 8 FTrecordType [M] - string -# 9 FTencryptedVesselCode [M] - StringLength100 -# 10 FTsequenceNumber [M] - int -# 11 FTstratification [DV,M] - YesNoFields -# 12 FTstratumName [DV,M] - StringLength100 -# 13 FTclustering [DV,M] - Clustering -# 14 FTclusterName [DV,M] - StringLength100 -# 15 FTsampler [O] - Sampler -# 16 FTsamplingType [M] - SamplingContext -# 17 FTnumberOfHaulsOrSets [M/O] - IntZeroToMax -# 18 FTdepartureLocation [M/O] - Harbour_LOCODE -# 19 FTdepartureDate [M/O] - Date -# 20 FTdepartureTime [M/O] - StringLength60 -# 21 FTarrivalLocation [M] - Harbour_LOCODE -# 22 FTarrivalDate [M] - Date -# 23 FTarrivalTime [M/O] - StringLength60 -# 24 FTnumberTotal [DV,O] - int -# 25 FTnumberSampled [DV,O] - int -# 26 FTselectionProb [DV,O] - Decimal0-1 -# 27 FTinclusionProb [DV,O] - Decimal0-1 -# 28 FTselectionMethod [DV,M] - SelectionMethod -# 29 FTunitName [DV,M] - StringLength100 -# 30 FTselectionMethodCluster [DV,O] - SelectionMethod -# 31 FTnumberTotalClusters [DV,O] - int -# 32 FTnumberSampledClusters [DV,O] - int -# 33 FTselectionProbCluster [DV,O] - Decimal0-1 -# 34 FTinclusionProbCluster [DV,O] - Decimal0-1 -# 35 FTsampled [DV,M] - YesNoFields -# 36 FTreasonNotSampled [DV,O] - ReasonForNotSampling -# 37 FTnonResponseCollected [DV,O] - YesNoFields -# 38 FTauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 39 FTauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 40 FTauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 41 FTauxiliaryVariableUnit [DV,O] - MUNIT - FT_df <- data.frame( FTid = dataset$VSid,#[M] - int OSid = "",# [M/O] - int @@ -290,6 +195,7 @@ FT_df <- data.frame( FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE FTarrivalDate=seq(from = as.Date("1965-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date FTarrivalTime="", #[O] - time + FTdominantLandingDate= "", FTnumberTotal= 1, #[DV,O] - int FTnumberSampled=1, #[DV,O] - int FTselectionProb=1, #[DV,O] - DecimalPrec10 @@ -307,7 +213,7 @@ FT_df <- data.frame( FTauxiliaryVariableTotal = "", FTauxiliaryVariableValue = "", FTauxiliaryVariableName = "", - FTauxiliaryVariableUnit = "", + FTauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) @@ -315,72 +221,6 @@ FT_df <- data.frame( #====FO=========== -# 1 FOid [M] - int -# 2 FTid [M/O] - int -# 3 SDid [M/O] - int -# 4 FOrecordType [M] - string -# 5 FOstratification [DV,M] - YesNoFields -# 6 FOsequenceNumber [M] - int -# 7 FOstratumName [DV,M] - StringLength100 -# 8 FOclustering [DV,M] - Clustering -# 9 FOclusterName [DV,M] - StringLength100 -# 10 FOsampler [O] - Sampler -# 11 FOaggregationLevel [M] - AggregationLevel -# 12 FOvalidity [M] - ValidityFlag -# 13 FOcatchReg [M] - CatchRegistration -# 14 FOstartDate [O] - Date -# 15 FOstartTime [O] - Time -# 16 FOendDate [M] - Date -# 17 FOendTime [M/O] - Time -# 18 FOduration [M/O] - int -# 19 FOdurationSource [M] - DurationSource -# 20 FOhandlingTime [O] - int -# 21 FOstartLat [O] - Decimal-90.000000-90.000000 -# 22 FOstartLon [O] - Decimal-180.000000-180.000000 -# 23 FOstopLat [O] - Decimal-90.000000-90.000000 -# 24 FOstopLon [O] - Decimal-180.000000-180.000000 -# 25 FOexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 26 FOarea [M] - ICES_Area -# 27 FOrectangle [O] - StatRec -# 28 FOfisheriesManagementUnit [O] - AreaNonFAO -# 29 FOgsaSubarea [M] - Areas_GFCM_GSA -# 30 FOjurisdictionArea [O] - JurisdictionArea -# 31 FOfishingDepth [O] - int -# 32 FOwaterDepth [O] - int -# 33 FOnationalFishingActivity [O] - NationalFishingActivity -# 34 FOmetier5 [O] - Metier5_FishingActivity -# 35 FOmetier6 [M] - Metier6_FishingActivity -# 36 FOgear [M] - GearType -# 37 FOmeshSize [O] - int -# 38 FOselectionDevice [O] - SelectionDevice -# 39 FOselectionDeviceMeshSize [O] - int -# 40 FOtargetSpecies [O] - TargetSpecies -# 41 FOincidentalByCatchMitigationDeviceFirst [M] - BycatchMitigationDevice -# 42 FOincidentalByCatchMitigationDeviceTargetFirst [M] - BycatchMitigationDeviceTarget -# 43 FOincidentalByCatchMitigationDeviceSecond [M] - BycatchMitigationDevice -# 44 FOincidentalByCatchMitigationDeviceTargetSecond [M] - BycatchMitigationDeviceTarget -# 45 FOgearDimensions [O] - int -# 46 FOobservationCode [M] - ObservationCode -# 47 FOnumberTotal [DV,O] - int -# 48 FOnumberSampled [DV,O] - int -# 49 FOselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 50 FOinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 51 FOselectionMethod [DV,M] - SelectionMethod -# 52 FOunitName [DV,M] - StringLength100 -# 53 FOselectionMethodCluster [DV,O] - SelectionMethod -# 54 FOnumberTotalClusters [DV,O] - int -# 55 FOnumberSampledClusters [DV,O] - int -# 56 FOselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 57 FOinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 58 FOsampled [DV,M] - YesNoFields -# 59 FOreasonNotSampled [DV,O] - ReasonForNotSampling -# 60 FOnonResponseCollected [DV,O] - YesNoFields -# 61 FOauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 62 FOauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 63 FOauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 64 FOauxiliaryVariableUnit [DV,O] - MUNIT - - FO_df <- data.frame( FOid = dataset$VSid, FTid = dataset$VSid, @@ -400,24 +240,29 @@ FO_df <- data.frame( FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M FOendTime ="",#M FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! + FOfishingDurationDataBasis = "Measured", FOdurationSource = "Crew", FOhandlingTime = "", FOstartLat="", # ATT! FOstartLon="", # ATT! FOstopLat="", FOstopLon="", - FOexclusiveEconomicZoneIndicator = "", # + FOexclusiveEconomicZoneIndicator = "", # FOarea = "27.3.a.21", #M FOrectangle = "", FOfisheriesManagementUnit = "", FOgsaSubarea = "NotApplicable", #M FOjurisdictionArea = "", + FOgeographicalDataBasis = "Measured", + FOgeographicalSource = "Crew", FOfishingDepth = "", FOwaterDepth = "", FOnationalFishingActivity = "", FOmetier5 = "", FOmetier6 = "OTT_CRU_70-89_2_35",#M FOgear = "OTT",#M + FOgearDataBasis = "Measured", + FOgearSource = "", FOmeshSize = "", FOselectionDevice = "", FOselectionDeviceMeshSize = "", @@ -445,52 +290,13 @@ FO_df <- data.frame( FOauxiliaryVariableTotal = "", FOauxiliaryVariableValue = "", FOauxiliaryVariableName = "", - FOauxiliaryVariableUnit = "", + FOauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) #===SS============ -# 1 SSid [] - int -# 2 LEid [M/O] - int -# 3 FOid [M/O] - int -# 4 FTid [M/O] - int -# 5 OSid [M/O] - int -# 6 TEid [M/O] - int -# 7 SLid [M] - int -# 8 SSrecordType [M] - string -# 9 SSsequenceNumber [M] - int -# 10 SSstratification [DV,M] - YesNoFields -# 11 SSstratumName [DV,M] - StringLength100 -# 12 SSclustering [DV,M] - Clustering -# 13 SSclusterName [DV,M] - StringLength100 -# 14 SSobservationActivityType [DV,M] - ObservationActivityType -# 15 SScatchFraction [M] - CatchFraction -# 16 SSobservationType [DV,M] - ObservationMethod -# 17 SSsampler [O] - Sampler -# 18 SSspeciesListName [M] - StringLength100 -# 19 SSuseForCalculateZero [M] - YesNoFields -# 20 SStimeTotal [O] - int -# 21 SStimeSampled [O] - int -# 22 SSnumberTotal [DV,O] - int -# 23 SSnumberSampled [DV,O] - int -# 24 SSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 25 SSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 26 SSselectionMethod [DV,M] - SelectionMethod -# 27 SSunitName [DV,M] - StringLength100 -# 28 SSselectionMethodCluster [DV,O] - SelectionMethod -# 29 SSnumberTotalClusters [DV,O] - int -# 30 SSnumberSampledClusters [DV,O] - int -# 31 SSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 32 SSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 33 SSsampled [DV,M] - YesNoFields -# 34 SSreasonNotSampled [DV,O] - ReasonForNotSampling -# 35 SSnonResponseCollected [DV,O] - YesNoFields -# 36 SSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 37 SSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 38 SSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 39 SSauxiliaryVariableUnit [DV,O] - MUNIT SS_df<-data.frame( SSid = dataset$VSid, @@ -513,8 +319,10 @@ SS_df<-data.frame( SSspeciesListName = project_name_outputs, #M SSuseForCalculateZero = "N", #M SStimeTotal = "", + SStimeTotalDataBasis = "Measured", SStimeSampled = "", SSnumberTotal = 1, + SSnumberTotalDataBasis = "Measured", SSnumberSampled = 1, SSselectionProb = 1, SSinclusionProb = 1, @@ -531,123 +339,72 @@ SS_df<-data.frame( SSauxiliaryVariableTotal = "", SSauxiliaryVariableValue = "", SSauxiliaryVariableName = "", - SSauxiliaryVariableUnit = "", - stringsAsFactors=FALSE + SSauxiliaryVariableUnit = "", + stringsAsFactors=FALSE ) #====SA=========== - -# 1 SAid [] - int -# 2 SSid [] - int -# 3 SArecordType [M] - string -# 4 SAsequenceNumber [M] - int -# 5 SAparentSequenceNumber [O] - int -# 6 SAstratification [DV,M] - YesNoFields -# 7 SAstratumName [DV,M] - StringLength100 -# 8 SAspeciesCode [M] - SpecWoRMS -# 9 SAspeciesCodeFAO [O] - SpecASFIS -# 10 SAstateOfProcessing [M] - StateOfProcessing -# 11 SApresentation [M] - ProductPresentation -# 12 SAspecimensState [M] - SpecimensState -# 13 SAcatchCategory [M] - CatchCategory -# 14 SAlandingCategory [O] - LandingCategory -# 15 SAcommSizeCatScale [O] - CommercialSizeCategoryScale -# 16 SAcommSizeCat [O] - CommercialSizeCategory -# 17 SAsex [M] - SEXCO -# 18 SAexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 19 SAarea [O] - ICES_Area -# 20 SArectangle [O] - StatRec -# 21 SAfisheriesManagementUnit [O] - AreaNonFAO -# 22 SAgsaSubarea [M] - Areas_GFCM_GSA -# 23 SAjurisdictionArea [O] - JurisdictionArea -# 24 SAnationalFishingActivity [O] - NationalFishingActivity -# 25 SAmetier5 [O] - Metier5_FishingActivity -# 26 SAmetier6 [O] - Metier6_FishingActivity -# 27 SAgear [O] - GearType -# 28 SAmeshSize [O] - int -# 29 SAselectionDevice [O] - SelectionDevice -# 30 SAselectionDeviceMeshSize [O] - int -# 31 SAunitType [M] - SamplingUnit -# 32 SAtotalWeightLive [O] - int -# 33 SAsampleWeightLive [O] - int -# 34 SAnumberTotal [DV,O] - Decimal0.1-999999999 -# 35 SAnumberSampled [DV,O] - Decimal0.1-999999999 -# 36 SAselectionProb [DV,O] - Decimal0-1 -# 37 SAinclusionProb [DV,O] - Decimal0-1 -# 38 SAselectionMethod [DV,M] - SelectionMethod -# 39 SAunitName [DV,M] - StringLength100 -# 40 SAlowerHierarchy [M/O] - LowerHierarchy -# 41 SAsampler [O] - Sampler -# 42 SAsampled [DV,M] - YesNoFields -# 43 SAreasonNotSampled [DV,O] - ReasonForNotSampling -# 44 SAnonResponseCollected [DV,O] - YesNoFields -# 45 SAreasonNotSampledFM [DV,O] - ReasonForNotSampling -# 46 SAreasonNotSampledBV [DV,O] - ReasonForNotSampling -# 47 SAtotalWeightMeasured [O] - int -# 48 SAsampleWeightMeasured [O] - int -# 49 SAconversionFactorMeasLive [O] - Decimal0.900-10 -# 50 SAauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 51 SAauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 52 SAauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 53 SAauxiliaryVariableUnit [DV,O] - MUNIT - - SA_df<-data.frame( - SAid = dataset$VSid, - SSid = dataset$VSid, - SArecordType = "SA", #M - SAsequenceNumber = dataset$VSid, #M - SAparentSequenceNumber = "", - SAstratification = "N", #M - SAstratumName = "U", #M - SAspeciesCode = 107254, #M - SAspeciesCodeFAO = "NEP", - SAstateOfProcessing = "FRE", #M - SApresentation = "WHL", #M - SAspecimensState = "DeadOrZeroProbSurvival", #M - SAcatchCategory = "Lan", #M - SAlandingCategory = "", - SAcommSizeCatScale = "", - SAcommSizeCat = "", - SAsex = "U", #M - SAexclusiveEconomicZoneIndicator = "", - SAarea = "", - SArectangle = "", - SAfisheriesManagementUnit = "", - SAgsaSubarea = "NotApplicable", #M - SAjurisdictionArea = "", - SAnationalFishingActivity = "", - SAmetier5 = "", - SAmetier6 = "", - SAgear = "", - SAmeshSize = "", - SAselectionDevice = "", - SAselectionDeviceMeshSize = "", - SAunitType = "Box", #M - SAtotalWeightLive = "", - SAsampleWeightLive = "", - SAnumberTotal = 1, - SAnumberSampled = 1, - SAselectionProb = 1, - SAinclusionProb = 1, - SAselectionMethod = "CENSUS", #M - SAunitName = dataset$VSid, #M - SAlowerHierarchy = "D", - SAsampler = "Observer", - SAsampled = "N", #M - SAreasonNotSampled = "", - SAnonResponseCollected = "N", - SAreasonNotSampledFM = "", - SAreasonNotSampledBV = "", - SAtotalWeightMeasured = dataset[[target_var]]*100, # *100 to meet type required (integer) - SAsampleWeightMeasured = dataset[[target_var]]*100, # *100 to meet type required (integer) - SAconversionFactorMeasLive = 1, - SAauxiliaryVariableTotal = "", - SAauxiliaryVariableValue = "", - SAauxiliaryVariableName = "", - SAauxiliaryVariableUnit = "", - stringsAsFactors=FALSE +SAid = dataset$VSid, +SSid = dataset$VSid, +SArecordType = "SA", #M +SAsequenceNumber = dataset$VSid, #M +SAparentSequenceNumber = "", +SAstratification = "N", #M +SAstratumName = "U", #M +SAspeciesCode = 107254, #M +SAspeciesCodeFAO = "NEP", +SAstateOfProcessing = "FRE", #M +SApresentation = "WHL", #M +SAspecimensState = "DeadOrZeroProbSurvival", #M +SAcatchCategory = "Lan", #M +SAlandingCategory = "", +SAcommSizeCatScale = "", +SAcommSizeCat = "", +SAsex = "U", #M +SAexclusiveEconomicZoneIndicator = "", +SAarea = "", +SArectangle = "", +SAfisheriesManagementUnit = "", +SAgsaSubarea = "NotApplicable", #M +SAjurisdictionArea = "", +SAgeographicalDataBasis = "Measured", +SAgeographicalSource = "", +SAnationalFishingActivity = "", +SAmetier5 = "", +SAmetier6 = "", +SAgear = "", +SAgearDataBasis = "Measured", +SAgearSource = "", +SAmeshSize = "", +SAselectionDevice = "", +SAselectionDeviceMeshSize = "", +SAunitType = "Box", #M +SAtotalWeightLive = "", +SAsampleWeightLive = "", +SAnumberTotal = 1, +SAnumberSampled = 1, +SAselectionProb = 1, +SAinclusionProb = 1, +SAselectionMethod = "CENSUS", #M +SAunitName = dataset$VSid, #M +SAlowerHierarchy = "D", +SAsampler = "Observer", +SAsampled = "N", #M +SAreasonNotSampled = "", +SAnonResponseCollected = "N", +SAreasonNotSampledFM = "", +SAreasonNotSampledBV = "", +SAtotalWeightMeasured = dataset[[target_var]]*100, # *100 to meet type required (integer) +SAtotalWeightMeasuredDataBasis = "Measured", +SAsampleWeightMeasured = dataset[[target_var]]*100, # *100 to meet type required (integer) +SAconversionFactorMeasLive = 1, +SAauxiliaryVariableTotal = "", +SAauxiliaryVariableValue = "", +SAauxiliaryVariableName = "", +SAauxiliaryVariableUnit = "", +stringsAsFactors=FALSE ) @@ -700,15 +457,14 @@ RDBESlist[[i]][which(grepl(colnames(RDBESlist[[i]]),pat="[A-Z]id"))]<-NULL #===Save============ - dir_outputs<-paste0(base_dir_outputs,"/", - project_name_outputs,"/") - dir.create(dir_outputs, recursive=T, showWarnings=FALSE) - filename_output_CS <- paste0("H1.csv") - filename_output_SL <- paste0("HSL.csv") - filename_output_VD <- paste0("HVD.csv") + dir_outputs<-paste0(base_dir_outputs,"/", project_name_outputs,"/") + dir.create(dir_outputs, recursive=T, showWarnings=FALSE) + filename_output_CS <- paste0(dir_outputs,"H1.csv") + filename_output_SL <- paste0(dir_outputs,"HSL.csv") + filename_output_VD <- paste0(dir_outputs,"HVD.csv") -lapply(RDBESlist, function(x, filename1 = paste0(dir_outputs,filename_output_CS)){ +lapply(RDBESlist, function(x, filename1 = filename_output_CS){ if("DErecordType" %in% colnames(x)){ write.table(x, file = filename1, append = FALSE, quote = FALSE, sep = ",", eol = "\n", na = "NA", dec = ".", row.names = FALSE, @@ -720,7 +476,7 @@ write.table(x, file = filename1, append = TRUE, quote = FALSE, sep = ",", } }) -b<-read.table(file=paste0(dir_outputs,filename_output_CS), header=F, sep=";") +b<-read.table(file = filename_output_CS, header=F, sep=";") b<-cbind(key,b) b<-b[order(as.character(b$key), decreasing=FALSE),] b<-b[!is.na(key),] @@ -728,27 +484,29 @@ b$key<-NULL b$V1<-as.character(b$V1) # saves CS output -write.table(b$V1, file=paste0(dir_outputs,filename_output_CS), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +write.table(b$V1, file = filename_output_CS, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") -# -----Builds and saves dummySL----------------- +# -----Builds and saves dummySL and dummyIS----------------- SL_base$SLspeciesListName<-project_name_outputs SL_base$SLyear<-DEyear SL_base$SLinstitute<-SDinstitution -# saves SL output - write.table(SL_base, file=paste0(dir_outputs,filename_output_SL), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - + + # saves SL output + write.table(rbind(paste(SL_base, collapse=","), paste(IS_base, collapse=",")), file=filename_output_SL, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") # -----Builds and saves dummyVD----------------- - # saves VD output VD_base$VDyear<-DEyear - write.table(VD_base, file=paste0(dir_outputs,filename_output_VD), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + write.table(VD_base, file=filename_output_VD, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +# check - is this needed? +if(1==2){ + # -----Clean SL after dowload----------------- # cleans excess of SL rows frequently present in download @@ -767,4 +525,5 @@ library(zip) zip::zip(zipfile = paste0(project_name_outputs,".zip"), files =paste0(dir_outputs,"tmp/",dir(paste0(dir_outputs,"tmp"))), mode="cherry-pick") unlink(paste0(dir_outputs,"tmp"), force=T, recursive = T) +} diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_schools_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_schools_H1.R index 16020d49..ebc7649e 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_schools_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_SDAResources_schools_H1.R @@ -48,10 +48,11 @@ baseDir <- "" VD_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/VD_base.rds")) SL_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/SL_base.rds")) + IS_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/IS_base.rds")) #nameof the directory where the outputs are saved currently base_dir_outputs <- paste0(baseDir,"BuiltUploads") - if(!file.exists(base_dir_outputs)) dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) + dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) #========Outline of Hierarchy 1================ @@ -71,22 +72,6 @@ -# 1 DEid [] - int -# 2 DErecordType [M] - string -# 3 DEsamplingScheme [M] - SamplingScheme -# 4 DEsamplingSchemeType [M] - SamplingSchemeType -# 5 DEyear [M] - Year -# 6 DEstratumName [M] - StringLength100 -# 7 DEhierarchyCorrect [M] - YesNoFields -# 8 DEhierarchy [M] - RDBESUpperHierarchy -# 9 DEsampled [DV,M] - YesNoFields -# 10 DEreasonNotSampled [DV,O] - ReasonForNotSampling -# 11 DEnonResponseCollected [DV,O] - YesNoFields -# 12 DEauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 13 DEauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 14 DEauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 15 DEauxiliaryVariableUnit[DV,O]-MUNIT - DE_df_base<-expand.grid(DEyear=DEyear, DEstratumName="U",stringsAsFactors=F) @@ -105,20 +90,13 @@ DE_df<-data.frame( DEauxiliaryVariableTotal = "", DEauxiliaryVariableValue = "", DEauxiliaryVariableName = "", - DEauxiliaryVariableUnit = "", + DEauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) #===SD============ - # x -# 1 SDid [M] - int -# 2 DEid [M] - int -# 3 SDrecordType [M] - string -# 4 SDcountry [M] - ISO_3166 -# 5 SDinstitution [M] - EDMO - SD_df<-data.frame( SDid=DE_df$DEid, DEid=DE_df$DEid, @@ -131,37 +109,6 @@ SD_df<-data.frame( #===VS============ - # x -# 1 VSid [M] - int -# 2 SDid [M] - int -# 3 VDid [M] - int -# 4 TEid [M/O] - int -# 5 VSrecordType [M] - string -# 6 VSsequenceNumber [M] - int -# 7 VSencryptedVesselCode [M] - StringLength100 -# 8 VSstratification [DV,M] - YesNoFields -# 9 VSstratumName [DV,M] - StringLength100 -# 10 VSclustering [DV,M] - Clustering -# 11 VSclusterName [DV,M] - StringLength100 -# 12 VSsampler [O] - Sampler -# 13 VSnumberTotal [DV,O] - int -# 14 VSnumberSampled [DV,O] - int -# 15 VSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 16 VSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 17 VSselectionMethod [DV,M] - SelectionMethod -# 18 VSunitName [DV,M] - StringLength100 -# 19 VSselectionMethodCluster [DV,O] - SelectionMethod -# 20 VSnumberTotalClusters [DV,O] - int -# 21 VSnumberSampledClusters [DV,O] - int -# 22 VSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 23 VSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 24 VSsampled [DV,M] - YesNoFields -# 25 VSreasonNotSampled [DV,O] - ReasonForNotSampling -# 26 VSnonResponseCollected [DV,O] - YesNoFields -# 27 VSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 28 VSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 29 VSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 30 VSauxiliaryVariableUnit [DV,O] - MUNIT #check_All_fields("VS") @@ -180,7 +127,6 @@ SD_df<-data.frame( if(!test) stop( "duplicated VSencryptedVesselCode") - VS_df <- data.frame( VSid = dataset$VSid, SDid = as.integer(1), @@ -211,7 +157,7 @@ VS_df <- data.frame( VSauxiliaryVariableTotal = "", VSauxiliaryVariableValue = "", VSauxiliaryVariableName = "", - VSauxiliaryVariableUnit = "", + VSauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) @@ -231,49 +177,6 @@ VS_df$VSinclusionProbCluster<-"" #====FT=========== - -# 1 FTid [M] - int -# 2 OSid [M/O] - int -# 3 VSid [M/O] - int -# 4 VDid [M] - int -# 5 SDid [M/O] - int -# 6 FOid [M/O] - int -# 7 TEid [M/O] - int -# 8 FTrecordType [M] - string -# 9 FTencryptedVesselCode [M] - StringLength100 -# 10 FTsequenceNumber [M] - int -# 11 FTstratification [DV,M] - YesNoFields -# 12 FTstratumName [DV,M] - StringLength100 -# 13 FTclustering [DV,M] - Clustering -# 14 FTclusterName [DV,M] - StringLength100 -# 15 FTsampler [O] - Sampler -# 16 FTsamplingType [M] - SamplingContext -# 17 FTnumberOfHaulsOrSets [M/O] - IntZeroToMax -# 18 FTdepartureLocation [M/O] - Harbour_LOCODE -# 19 FTdepartureDate [M/O] - Date -# 20 FTdepartureTime [M/O] - StringLength60 -# 21 FTarrivalLocation [M] - Harbour_LOCODE -# 22 FTarrivalDate [M] - Date -# 23 FTarrivalTime [M/O] - StringLength60 -# 24 FTnumberTotal [DV,O] - int -# 25 FTnumberSampled [DV,O] - int -# 26 FTselectionProb [DV,O] - Decimal0-1 -# 27 FTinclusionProb [DV,O] - Decimal0-1 -# 28 FTselectionMethod [DV,M] - SelectionMethod -# 29 FTunitName [DV,M] - StringLength100 -# 30 FTselectionMethodCluster [DV,O] - SelectionMethod -# 31 FTnumberTotalClusters [DV,O] - int -# 32 FTnumberSampledClusters [DV,O] - int -# 33 FTselectionProbCluster [DV,O] - Decimal0-1 -# 34 FTinclusionProbCluster [DV,O] - Decimal0-1 -# 35 FTsampled [DV,M] - YesNoFields -# 36 FTreasonNotSampled [DV,O] - ReasonForNotSampling -# 37 FTnonResponseCollected [DV,O] - YesNoFields -# 38 FTauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 39 FTauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 40 FTauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 41 FTauxiliaryVariableUnit [DV,O] - MUNIT - FT_df <- data.frame( FTid = dataset$VSid,#[M] - int OSid = "",# [M/O] - int @@ -298,6 +201,7 @@ FT_df <- data.frame( FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE FTarrivalDate=seq(from = as.Date("1965-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date FTarrivalTime="", #[O] - time + FTdominantLandingDate= "", FTnumberTotal= 1, #[DV,O] - int FTnumberSampled=1, #[DV,O] - int FTselectionProb=1, #[DV,O] - DecimalPrec10 @@ -315,7 +219,7 @@ FT_df <- data.frame( FTauxiliaryVariableTotal = "", FTauxiliaryVariableValue = "", FTauxiliaryVariableName = "", - FTauxiliaryVariableUnit = "", + FTauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) @@ -323,72 +227,6 @@ FT_df <- data.frame( #====FO=========== -# 1 FOid [M] - int -# 2 FTid [M/O] - int -# 3 SDid [M/O] - int -# 4 FOrecordType [M] - string -# 5 FOstratification [DV,M] - YesNoFields -# 6 FOsequenceNumber [M] - int -# 7 FOstratumName [DV,M] - StringLength100 -# 8 FOclustering [DV,M] - Clustering -# 9 FOclusterName [DV,M] - StringLength100 -# 10 FOsampler [O] - Sampler -# 11 FOaggregationLevel [M] - AggregationLevel -# 12 FOvalidity [M] - ValidityFlag -# 13 FOcatchReg [M] - CatchRegistration -# 14 FOstartDate [O] - Date -# 15 FOstartTime [O] - Time -# 16 FOendDate [M] - Date -# 17 FOendTime [M/O] - Time -# 18 FOduration [M/O] - int -# 19 FOdurationSource [M] - DurationSource -# 20 FOhandlingTime [O] - int -# 21 FOstartLat [O] - Decimal-90.000000-90.000000 -# 22 FOstartLon [O] - Decimal-180.000000-180.000000 -# 23 FOstopLat [O] - Decimal-90.000000-90.000000 -# 24 FOstopLon [O] - Decimal-180.000000-180.000000 -# 25 FOexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 26 FOarea [M] - ICES_Area -# 27 FOrectangle [O] - StatRec -# 28 FOfisheriesManagementUnit [O] - AreaNonFAO -# 29 FOgsaSubarea [M] - Areas_GFCM_GSA -# 30 FOjurisdictionArea [O] - JurisdictionArea -# 31 FOfishingDepth [O] - int -# 32 FOwaterDepth [O] - int -# 33 FOnationalFishingActivity [O] - NationalFishingActivity -# 34 FOmetier5 [O] - Metier5_FishingActivity -# 35 FOmetier6 [M] - Metier6_FishingActivity -# 36 FOgear [M] - GearType -# 37 FOmeshSize [O] - int -# 38 FOselectionDevice [O] - SelectionDevice -# 39 FOselectionDeviceMeshSize [O] - int -# 40 FOtargetSpecies [O] - TargetSpecies -# 41 FOincidentalByCatchMitigationDeviceFirst [M] - BycatchMitigationDevice -# 42 FOincidentalByCatchMitigationDeviceTargetFirst [M] - BycatchMitigationDeviceTarget -# 43 FOincidentalByCatchMitigationDeviceSecond [M] - BycatchMitigationDevice -# 44 FOincidentalByCatchMitigationDeviceTargetSecond [M] - BycatchMitigationDeviceTarget -# 45 FOgearDimensions [O] - int -# 46 FOobservationCode [M] - ObservationCode -# 47 FOnumberTotal [DV,O] - int -# 48 FOnumberSampled [DV,O] - int -# 49 FOselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 50 FOinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 51 FOselectionMethod [DV,M] - SelectionMethod -# 52 FOunitName [DV,M] - StringLength100 -# 53 FOselectionMethodCluster [DV,O] - SelectionMethod -# 54 FOnumberTotalClusters [DV,O] - int -# 55 FOnumberSampledClusters [DV,O] - int -# 56 FOselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 57 FOinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 58 FOsampled [DV,M] - YesNoFields -# 59 FOreasonNotSampled [DV,O] - ReasonForNotSampling -# 60 FOnonResponseCollected [DV,O] - YesNoFields -# 61 FOauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 62 FOauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 63 FOauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 64 FOauxiliaryVariableUnit [DV,O] - MUNIT - - FO_df <- data.frame( FOid = dataset$VSid, FTid = dataset$VSid, @@ -408,24 +246,29 @@ FO_df <- data.frame( FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M FOendTime ="",#M FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! + FOfishingDurationDataBasis = "Measured", FOdurationSource = "Crew", FOhandlingTime = "", FOstartLat="", # ATT! FOstartLon="", # ATT! FOstopLat="", FOstopLon="", - FOexclusiveEconomicZoneIndicator = "", # + FOexclusiveEconomicZoneIndicator = "", # FOarea = "27.3.a.21", #M FOrectangle = "", FOfisheriesManagementUnit = "", FOgsaSubarea = "NotApplicable", #M FOjurisdictionArea = "", + FOgeographicalDataBasis = "Measured", + FOgeographicalSource = "Crew", FOfishingDepth = "", FOwaterDepth = "", FOnationalFishingActivity = "", FOmetier5 = "", FOmetier6 = "OTT_CRU_70-89_2_35",#M FOgear = "OTT",#M + FOgearDataBasis = "Measured", + FOgearSource = "", FOmeshSize = "", FOselectionDevice = "", FOselectionDeviceMeshSize = "", @@ -453,52 +296,13 @@ FO_df <- data.frame( FOauxiliaryVariableTotal = "", FOauxiliaryVariableValue = "", FOauxiliaryVariableName = "", - FOauxiliaryVariableUnit = "", + FOauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) #===SS============ -# 1 SSid [] - int -# 2 LEid [M/O] - int -# 3 FOid [M/O] - int -# 4 FTid [M/O] - int -# 5 OSid [M/O] - int -# 6 TEid [M/O] - int -# 7 SLid [M] - int -# 8 SSrecordType [M] - string -# 9 SSsequenceNumber [M] - int -# 10 SSstratification [DV,M] - YesNoFields -# 11 SSstratumName [DV,M] - StringLength100 -# 12 SSclustering [DV,M] - Clustering -# 13 SSclusterName [DV,M] - StringLength100 -# 14 SSobservationActivityType [DV,M] - ObservationActivityType -# 15 SScatchFraction [M] - CatchFraction -# 16 SSobservationType [DV,M] - ObservationMethod -# 17 SSsampler [O] - Sampler -# 18 SSspeciesListName [M] - StringLength100 -# 19 SSuseForCalculateZero [M] - YesNoFields -# 20 SStimeTotal [O] - int -# 21 SStimeSampled [O] - int -# 22 SSnumberTotal [DV,O] - int -# 23 SSnumberSampled [DV,O] - int -# 24 SSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 25 SSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 26 SSselectionMethod [DV,M] - SelectionMethod -# 27 SSunitName [DV,M] - StringLength100 -# 28 SSselectionMethodCluster [DV,O] - SelectionMethod -# 29 SSnumberTotalClusters [DV,O] - int -# 30 SSnumberSampledClusters [DV,O] - int -# 31 SSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 32 SSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 33 SSsampled [DV,M] - YesNoFields -# 34 SSreasonNotSampled [DV,O] - ReasonForNotSampling -# 35 SSnonResponseCollected [DV,O] - YesNoFields -# 36 SSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 37 SSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 38 SSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 39 SSauxiliaryVariableUnit [DV,O] - MUNIT SS_df<-data.frame( SSid = dataset$VSid, @@ -521,8 +325,10 @@ SS_df<-data.frame( SSspeciesListName = project_name_outputs, #M SSuseForCalculateZero = "N", #M SStimeTotal = "", + SStimeTotalDataBasis = "Measured", SStimeSampled = "", SSnumberTotal = 1, + SSnumberTotalDataBasis = "Measured", SSnumberSampled = 1, SSselectionProb = 1, SSinclusionProb = 1, @@ -539,123 +345,72 @@ SS_df<-data.frame( SSauxiliaryVariableTotal = "", SSauxiliaryVariableValue = "", SSauxiliaryVariableName = "", - SSauxiliaryVariableUnit = "", - stringsAsFactors=FALSE + SSauxiliaryVariableUnit = "", + stringsAsFactors=FALSE ) #====SA=========== - -# 1 SAid [] - int -# 2 SSid [] - int -# 3 SArecordType [M] - string -# 4 SAsequenceNumber [M] - int -# 5 SAparentSequenceNumber [O] - int -# 6 SAstratification [DV,M] - YesNoFields -# 7 SAstratumName [DV,M] - StringLength100 -# 8 SAspeciesCode [M] - SpecWoRMS -# 9 SAspeciesCodeFAO [O] - SpecASFIS -# 10 SAstateOfProcessing [M] - StateOfProcessing -# 11 SApresentation [M] - ProductPresentation -# 12 SAspecimensState [M] - SpecimensState -# 13 SAcatchCategory [M] - CatchCategory -# 14 SAlandingCategory [O] - LandingCategory -# 15 SAcommSizeCatScale [O] - CommercialSizeCategoryScale -# 16 SAcommSizeCat [O] - CommercialSizeCategory -# 17 SAsex [M] - SEXCO -# 18 SAexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 19 SAarea [O] - ICES_Area -# 20 SArectangle [O] - StatRec -# 21 SAfisheriesManagementUnit [O] - AreaNonFAO -# 22 SAgsaSubarea [M] - Areas_GFCM_GSA -# 23 SAjurisdictionArea [O] - JurisdictionArea -# 24 SAnationalFishingActivity [O] - NationalFishingActivity -# 25 SAmetier5 [O] - Metier5_FishingActivity -# 26 SAmetier6 [O] - Metier6_FishingActivity -# 27 SAgear [O] - GearType -# 28 SAmeshSize [O] - int -# 29 SAselectionDevice [O] - SelectionDevice -# 30 SAselectionDeviceMeshSize [O] - int -# 31 SAunitType [M] - SamplingUnit -# 32 SAtotalWeightLive [O] - int -# 33 SAsampleWeightLive [O] - int -# 34 SAnumberTotal [DV,O] - Decimal0.1-999999999 -# 35 SAnumberSampled [DV,O] - Decimal0.1-999999999 -# 36 SAselectionProb [DV,O] - Decimal0-1 -# 37 SAinclusionProb [DV,O] - Decimal0-1 -# 38 SAselectionMethod [DV,M] - SelectionMethod -# 39 SAunitName [DV,M] - StringLength100 -# 40 SAlowerHierarchy [M/O] - LowerHierarchy -# 41 SAsampler [O] - Sampler -# 42 SAsampled [DV,M] - YesNoFields -# 43 SAreasonNotSampled [DV,O] - ReasonForNotSampling -# 44 SAnonResponseCollected [DV,O] - YesNoFields -# 45 SAreasonNotSampledFM [DV,O] - ReasonForNotSampling -# 46 SAreasonNotSampledBV [DV,O] - ReasonForNotSampling -# 47 SAtotalWeightMeasured [O] - int -# 48 SAsampleWeightMeasured [O] - int -# 49 SAconversionFactorMeasLive [O] - Decimal0.900-10 -# 50 SAauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 51 SAauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 52 SAauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 53 SAauxiliaryVariableUnit [DV,O] - MUNIT - - SA_df<-data.frame( - SAid = 1:nrow(dataset), - SSid = dataset$VSid, - SArecordType = "SA", #M - SAsequenceNumber = dataset$VSid, #M - SAparentSequenceNumber = "", - SAstratification = "N", #M - SAstratumName = "U", #M - SAspeciesCode = 107254, #M - SAspeciesCodeFAO = "NEP", - SAstateOfProcessing = "FRE", #M - SApresentation = "WHL", #M - SAspecimensState = "DeadOrZeroProbSurvival", #M - SAcatchCategory = "Lan", #M - SAlandingCategory = "", - SAcommSizeCatScale = "", - SAcommSizeCat = "", - SAsex = "U", #M - SAexclusiveEconomicZoneIndicator = "", - SAarea = "", - SArectangle = "", - SAfisheriesManagementUnit = "", - SAgsaSubarea = "NotApplicable", #M - SAjurisdictionArea = "", - SAnationalFishingActivity = "", - SAmetier5 = "", - SAmetier6 = "", - SAgear = "", - SAmeshSize = "", - SAselectionDevice = "", - SAselectionDeviceMeshSize = "", - SAunitType = "Box", #M - SAtotalWeightLive = "", - SAsampleWeightLive = "", - SAnumberTotal = 1, - SAnumberSampled = 1, - SAselectionProb = 1, - SAinclusionProb = 1, - SAselectionMethod = "CENSUS", #M - SAunitName = dataset$VSid, #M - SAlowerHierarchy = "D", - SAsampler = "Observer", - SAsampled = "N", #M - SAreasonNotSampled = "", - SAnonResponseCollected = "N", - SAreasonNotSampledFM = "", - SAreasonNotSampledBV = "", - SAtotalWeightMeasured = dataset[[target_var]], # - SAsampleWeightMeasured = dataset[[target_var]], # - SAconversionFactorMeasLive = 1, - SAauxiliaryVariableTotal = "", - SAauxiliaryVariableValue = "", - SAauxiliaryVariableName = "", - SAauxiliaryVariableUnit = "", - stringsAsFactors=FALSE +SAid = 1:nrow(dataset), +SSid = dataset$VSid, +SArecordType = "SA", #M +SAsequenceNumber = dataset$VSid, #M +SAparentSequenceNumber = "", +SAstratification = "N", #M +SAstratumName = "U", #M +SAspeciesCode = 107254, #M +SAspeciesCodeFAO = "NEP", +SAstateOfProcessing = "FRE", #M +SApresentation = "WHL", #M +SAspecimensState = "DeadOrZeroProbSurvival", #M +SAcatchCategory = "Lan", #M +SAlandingCategory = "", +SAcommSizeCatScale = "", +SAcommSizeCat = "", +SAsex = "U", #M +SAexclusiveEconomicZoneIndicator = "", +SAarea = "", +SArectangle = "", +SAfisheriesManagementUnit = "", +SAgsaSubarea = "NotApplicable", #M +SAjurisdictionArea = "", +SAgeographicalDataBasis = "Measured", +SAgeographicalSource = "", +SAnationalFishingActivity = "", +SAmetier5 = "", +SAmetier6 = "", +SAgear = "", +SAgearDataBasis = "Measured", +SAgearSource = "", +SAmeshSize = "", +SAselectionDevice = "", +SAselectionDeviceMeshSize = "", +SAunitType = "Box", #M +SAtotalWeightLive = "", +SAsampleWeightLive = "", +SAnumberTotal = 1, +SAnumberSampled = 1, +SAselectionProb = 1, +SAinclusionProb = 1, +SAselectionMethod = "CENSUS", #M +SAunitName = dataset$VSid, #M +SAlowerHierarchy = "D", +SAsampler = "Observer", +SAsampled = "N", #M +SAreasonNotSampled = "", +SAnonResponseCollected = "N", +SAreasonNotSampledFM = "", +SAreasonNotSampledBV = "", +SAtotalWeightMeasured = dataset[[target_var]], # +SAtotalWeightMeasuredDataBasis = "Measured", +SAsampleWeightMeasured = dataset[[target_var]], # +SAconversionFactorMeasLive = 1, +SAauxiliaryVariableTotal = "", +SAauxiliaryVariableValue = "", +SAauxiliaryVariableName = "", +SAauxiliaryVariableUnit = "", +stringsAsFactors=FALSE ) @@ -708,15 +463,14 @@ RDBESlist[[i]][which(grepl(colnames(RDBESlist[[i]]),pat="[A-Z]id"))]<-NULL #===Save============ - dir_outputs<-paste0(base_dir_outputs,"/", - project_name_outputs,"/") - dir.create(dir_outputs, recursive=T, showWarnings=FALSE) - filename_output_CS <- paste0("H1.csv") - filename_output_SL <- paste0("HSL.csv") - filename_output_VD <- paste0("HVD.csv") + dir_outputs<-paste0(base_dir_outputs,"/", project_name_outputs,"/") + dir.create(dir_outputs, recursive=T, showWarnings=FALSE) + filename_output_CS <- paste0(dir_outputs,"H1.csv") + filename_output_SL <- paste0(dir_outputs,"HSL.csv") + filename_output_VD <- paste0(dir_outputs,"HVD.csv") -lapply(RDBESlist, function(x, filename1 = paste0(dir_outputs,filename_output_CS)){ +lapply(RDBESlist, function(x, filename1 = filename_output_CS){ if("DErecordType" %in% colnames(x)){ write.table(x, file = filename1, append = FALSE, quote = FALSE, sep = ",", eol = "\n", na = "NA", dec = ".", row.names = FALSE, @@ -728,7 +482,7 @@ write.table(x, file = filename1, append = TRUE, quote = FALSE, sep = ",", } }) -b<-read.table(file=paste0(dir_outputs,filename_output_CS), header=F, sep=";") +b<-read.table(file = filename_output_CS, header=F, sep=";") b<-cbind(key,b) b<-b[order(as.character(b$key), decreasing=FALSE),] b<-b[!is.na(key),] @@ -736,24 +490,30 @@ b$key<-NULL b$V1<-as.character(b$V1) # saves CS output -write.table(b$V1, file=paste0(dir_outputs,filename_output_CS), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +write.table(b$V1, file = filename_output_CS, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") -# -----Builds and saves dummySL----------------- +# -----Builds and saves dummySL and dummyIS----------------- SL_base$SLspeciesListName<-project_name_outputs SL_base$SLyear<-DEyear SL_base$SLinstitute<-SDinstitution -# saves SL output - write.table(SL_base, file=paste0(dir_outputs,filename_output_SL), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - + + # saves SL output + write.table(rbind(paste(SL_base, collapse=","), paste(IS_base, collapse=",")), file=filename_output_SL, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") # -----Builds and saves dummyVD----------------- - # saves VD output VD_base$VDyear<-DEyear - write.table(VD_base, file=paste0(dir_outputs,filename_output_VD), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + write.table(VD_base, file=filename_output_VD, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + + + + + +# check - is this needed? +if(1==2){ @@ -775,4 +535,5 @@ library(zip) zip::zip(zipfile = paste0(project_name_outputs,".zip"), files =paste0(dir_outputs,"tmp/",dir(paste0(dir_outputs,"tmp"))), mode="cherry-pick") unlink(paste0(dir_outputs,"tmp"), force=T, recursive = T) +} diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apiclus1_v2_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apiclus1_v2_H1.R index fc2b09e2..03fbeb0a 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apiclus1_v2_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apiclus1_v2_H1.R @@ -50,13 +50,14 @@ baseDir <- "" VD_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/VD_base.rds")) SL_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/SL_base.rds")) + IS_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/IS_base.rds")) #nameof the directory where the outputs are saved currently base_dir_outputs <- paste0(baseDir,"BuiltUploads") - if(!file.exists(base_dir_outputs)) dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) + dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) -#=========Outline of Hierarchy 1=============== +#========Outline of Hierarchy 1================ # Design # Sampling details # Vessel Selection @@ -69,26 +70,10 @@ -#====DE=========== +#===DE============ -# 1 DEid [] - int -# 2 DErecordType [M] - string -# 3 DEsamplingScheme [M] - SamplingScheme -# 4 DEsamplingSchemeType [M] - SamplingSchemeType -# 5 DEyear [M] - Year -# 6 DEstratumName [M] - StringLength100 -# 7 DEhierarchyCorrect [M] - YesNoFields -# 8 DEhierarchy [M] - RDBESUpperHierarchy -# 9 DEsampled [DV,M] - YesNoFields -# 10 DEreasonNotSampled [DV,O] - ReasonForNotSampling -# 11 DEnonResponseCollected [DV,O] - YesNoFields -# 12 DEauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 13 DEauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 14 DEauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 15 DEauxiliaryVariableUnit[DV,O]-MUNIT - DE_df_base<-expand.grid(DEyear=DEyear, DEstratumName="U",stringsAsFactors=F) @@ -107,19 +92,12 @@ DE_df<-data.frame( DEauxiliaryVariableTotal = "", DEauxiliaryVariableValue = "", DEauxiliaryVariableName = "", - DEauxiliaryVariableUnit = "", + DEauxiliaryVariableUnit = "", stringsAsFactors=FALSE ) -#====SD=========== - +#===SD============ - # x -# 1 SDid [M] - int -# 2 DEid [M] - int -# 3 SDrecordType [M] - string -# 4 SDcountry [M] - ISO_3166 -# 5 SDinstitution [M] - EDMO SD_df<-data.frame( SDid=DE_df$DEid, @@ -132,37 +110,8 @@ SD_df<-data.frame( #===VS============ - # x -# 1 VSid [M] - int -# 2 SDid [M] - int -# 3 VDid [M] - int -# 4 TEid [M/O] - int -# 5 VSrecordType [M] - string -# 6 VSsequenceNumber [M] - int -# 7 VSencryptedVesselCode [M] - StringLength100 -# 8 VSstratification [DV,M] - YesNoFields -# 9 VSstratumName [DV,M] - StringLength100 -# 10 VSclustering [DV,M] - Clustering -# 11 VSclusterName [DV,M] - StringLength100 -# 12 VSsampler [O] - Sampler -# 13 VSnumberTotal [DV,O] - int -# 14 VSnumberSampled [DV,O] - int -# 15 VSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 16 VSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 17 VSselectionMethod [DV,M] - SelectionMethod -# 18 VSunitName [DV,M] - StringLength100 -# 19 VSselectionMethodCluster [DV,O] - SelectionMethod -# 20 VSnumberTotalClusters [DV,O] - int -# 21 VSnumberSampledClusters [DV,O] - int -# 22 VSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 23 VSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 24 VSsampled [DV,M] - YesNoFields -# 25 VSreasonNotSampled [DV,O] - ReasonForNotSampling -# 26 VSnonResponseCollected [DV,O] - YesNoFields -# 27 VSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 28 VSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 29 VSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 30 VSauxiliaryVariableUnit [DV,O] - MUNIT + + #check_All_fields("VS") dataset$VSid <- 1:nrow(dataset) @@ -225,165 +174,59 @@ VS_df$VSselectionMethodCluster<-"SRSWOR" VS_df$VSselectionProbCluster<-"" VS_df$VSinclusionProbCluster<-1/VS_base$pw # based on the original weights provided in apiclus1 -#===FT============ - - -# 1 FTid [M] - int -# 2 OSid [M/O] - int -# 3 VSid [M/O] - int -# 4 VDid [M] - int -# 5 SDid [M/O] - int -# 6 FOid [M/O] - int -# 7 TEid [M/O] - int -# 8 FTrecordType [M] - string -# 9 FTencryptedVesselCode [M] - StringLength100 -# 10 FTsequenceNumber [M] - int -# 11 FTstratification [DV,M] - YesNoFields -# 12 FTstratumName [DV,M] - StringLength100 -# 13 FTclustering [DV,M] - Clustering -# 14 FTclusterName [DV,M] - StringLength100 -# 15 FTsampler [O] - Sampler -# 16 FTsamplingType [M] - SamplingContext -# 17 FTnumberOfHaulsOrSets [M/O] - IntZeroToMax -# 18 FTdepartureLocation [M/O] - Harbour_LOCODE -# 19 FTdepartureDate [M/O] - Date -# 20 FTdepartureTime [M/O] - StringLength60 -# 21 FTarrivalLocation [M] - Harbour_LOCODE -# 22 FTarrivalDate [M] - Date -# 23 FTarrivalTime [M/O] - StringLength60 -# 24 FTnumberTotal [DV,O] - int -# 25 FTnumberSampled [DV,O] - int -# 26 FTselectionProb [DV,O] - Decimal0-1 -# 27 FTinclusionProb [DV,O] - Decimal0-1 -# 28 FTselectionMethod [DV,M] - SelectionMethod -# 29 FTunitName [DV,M] - StringLength100 -# 30 FTselectionMethodCluster [DV,O] - SelectionMethod -# 31 FTnumberTotalClusters [DV,O] - int -# 32 FTnumberSampledClusters [DV,O] - int -# 33 FTselectionProbCluster [DV,O] - Decimal0-1 -# 34 FTinclusionProbCluster [DV,O] - Decimal0-1 -# 35 FTsampled [DV,M] - YesNoFields -# 36 FTreasonNotSampled [DV,O] - ReasonForNotSampling -# 37 FTnonResponseCollected [DV,O] - YesNoFields -# 38 FTauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 39 FTauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 40 FTauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 41 FTauxiliaryVariableUnit [DV,O] - MUNIT +#====FT=========== + + FT_df <- data.frame( - FTid = dataset$VSid,#[M] - int - OSid = "",# [M/O] - int - VSid = dataset$VSid, #[M/O] - VDid = "", #[M] - int - SDid = "", #[M/O] - int - FOid = "", #[M/O] - int - TEid = "", #[M/O] - int - FTrecordType='FT', #[M] - string - FTencryptedVesselCode = VS_base$VSencryptedVesselCode, #[M] - FTsequenceNumber = as.integer(1:nrow(dataset)), #[M] - string - FTstratification = "N", #[DV,M] - RS_Stratification - FTstratumName = "U", #[DV,M] - string - FTclustering = "N", #[DV,M] - RS_Clustering - FTclusterName = "U", #[DV,M] - string - FTsampler = "Observer", #[M] - RS_Sampler - FTsamplingType = "AtSea" , #[M] - RS_SamplingType - FTnumberOfHaulsOrSets = 1, #[O] - int - FTdepartureLocation="ZWHWN", #[O] - Harbour_LOCODE - FTdepartureDate=seq(from = as.Date("1965-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M/O] - date - FTdepartureTime="", #[O] - time - FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE - FTarrivalDate=seq(from = as.Date("1965-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date - FTarrivalTime="", #[O] - time - FTnumberTotal= 1, #[DV,O] - int - FTnumberSampled=1, #[DV,O] - int - FTselectionProb=1, #[DV,O] - DecimalPrec10 - FTinclusionProb=1, #[DV,O] - DecimalPrec10 - FTselectionMethod="CENSUS", #[DV,M] - RS_SelectionMethod - FTunitName = VS_base$VSencryptedVesselCode, #[DV,M] - string - FTselectionMethodCluster="", #[DV,O] - RS_SelectionMethod - FTnumberTotalClusters="", #[DV,O] - int - FTnumberSampledClusters="", #[DV,O] - int - FTselectionProbCluster="", #[DV,O] - DecimalPrec10 - FTinclusionProbCluster="", #[DV,O] - DecimalPrec10 - FTsampled="Y", #[DV,M] - YesNoFields - FTreasonNotSampled= "", #[DV,O] - RS_ReasonForNotSampling - FTnonResponseCollected = "N", - FTauxiliaryVariableTotal = "", - FTauxiliaryVariableValue = "", - FTauxiliaryVariableName = "", - FTauxiliaryVariableUnit = "", - stringsAsFactors=FALSE +FTid = dataset$VSid,#[M] - int +OSid = "",# [M/O] - int +VSid = dataset$VSid, #[M/O] +VDid = "", #[M] - int +SDid = "", #[M/O] - int +FOid = "", #[M/O] - int +TEid = "", #[M/O] - int +FTrecordType='FT', #[M] - string +FTencryptedVesselCode = VS_base$VSencryptedVesselCode, #[M] +FTsequenceNumber = as.integer(1:nrow(dataset)), #[M] - string +FTstratification = "N", #[DV,M] - RS_Stratification +FTstratumName = "U", #[DV,M] - string +FTclustering = "N", #[DV,M] - RS_Clustering +FTclusterName = "U", #[DV,M] - string +FTsampler = "Observer", #[M] - RS_Sampler +FTsamplingType = "AtSea" , #[M] - RS_SamplingType +FTnumberOfHaulsOrSets = 1, #[O] - int +FTdepartureLocation="ZWHWN", #[O] - Harbour_LOCODE +FTdepartureDate=seq(from = as.Date("1965-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M/O] - date +FTdepartureTime="", #[O] - time +FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE +FTarrivalDate=seq(from = as.Date("1965-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date +FTarrivalTime="", #[O] - time +FTdominantLandingDate= "", +FTnumberTotal= 1, #[DV,O] - int +FTnumberSampled=1, #[DV,O] - int +FTselectionProb=1, #[DV,O] - DecimalPrec10 +FTinclusionProb=1, #[DV,O] - DecimalPrec10 +FTselectionMethod="CENSUS", #[DV,M] - RS_SelectionMethod +FTunitName = VS_base$VSencryptedVesselCode, #[DV,M] - string +FTselectionMethodCluster="", #[DV,O] - RS_SelectionMethod +FTnumberTotalClusters="", #[DV,O] - int +FTnumberSampledClusters="", #[DV,O] - int +FTselectionProbCluster="", #[DV,O] - DecimalPrec10 +FTinclusionProbCluster="", #[DV,O] - DecimalPrec10 +FTsampled="Y", #[DV,M] - YesNoFields +FTreasonNotSampled= "", #[DV,O] - RS_ReasonForNotSampling +FTnonResponseCollected = "N", +FTauxiliaryVariableTotal = "", +FTauxiliaryVariableValue = "", +FTauxiliaryVariableName = "", +FTauxiliaryVariableUnit = "", +stringsAsFactors=FALSE ) #====FO=========== -# 1 FOid [M] - int -# 2 FTid [M/O] - int -# 3 SDid [M/O] - int -# 4 FOrecordType [M] - string -# 5 FOstratification [DV,M] - YesNoFields -# 6 FOsequenceNumber [M] - int -# 7 FOstratumName [DV,M] - StringLength100 -# 8 FOclustering [DV,M] - Clustering -# 9 FOclusterName [DV,M] - StringLength100 -# 10 FOsampler [O] - Sampler -# 11 FOaggregationLevel [M] - AggregationLevel -# 12 FOvalidity [M] - ValidityFlag -# 13 FOcatchReg [M] - CatchRegistration -# 14 FOstartDate [O] - Date -# 15 FOstartTime [O] - Time -# 16 FOendDate [M] - Date -# 17 FOendTime [M/O] - Time -# 18 FOduration [M/O] - int -# 19 FOdurationSource [M] - DurationSource -# 20 FOhandlingTime [O] - int -# 21 FOstartLat [O] - Decimal-90.000000-90.000000 -# 22 FOstartLon [O] - Decimal-180.000000-180.000000 -# 23 FOstopLat [O] - Decimal-90.000000-90.000000 -# 24 FOstopLon [O] - Decimal-180.000000-180.000000 -# 25 FOexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 26 FOarea [M] - ICES_Area -# 27 FOrectangle [O] - StatRec -# 28 FOfisheriesManagementUnit [O] - AreaNonFAO -# 29 FOgsaSubarea [M] - Areas_GFCM_GSA -# 30 FOjurisdictionArea [O] - JurisdictionArea -# 31 FOfishingDepth [O] - int -# 32 FOwaterDepth [O] - int -# 33 FOnationalFishingActivity [O] - NationalFishingActivity -# 34 FOmetier5 [O] - Metier5_FishingActivity -# 35 FOmetier6 [M] - Metier6_FishingActivity -# 36 FOgear [M] - GearType -# 37 FOmeshSize [O] - int -# 38 FOselectionDevice [O] - SelectionDevice -# 39 FOselectionDeviceMeshSize [O] - int -# 40 FOtargetSpecies [O] - TargetSpecies -# 41 FOincidentalByCatchMitigationDeviceFirst [M] - BycatchMitigationDevice -# 42 FOincidentalByCatchMitigationDeviceTargetFirst [M] - BycatchMitigationDeviceTarget -# 43 FOincidentalByCatchMitigationDeviceSecond [M] - BycatchMitigationDevice -# 44 FOincidentalByCatchMitigationDeviceTargetSecond [M] - BycatchMitigationDeviceTarget -# 45 FOgearDimensions [O] - int -# 46 FOobservationCode [M] - ObservationCode -# 47 FOnumberTotal [DV,O] - int -# 48 FOnumberSampled [DV,O] - int -# 49 FOselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 50 FOinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 51 FOselectionMethod [DV,M] - SelectionMethod -# 52 FOunitName [DV,M] - StringLength100 -# 53 FOselectionMethodCluster [DV,O] - SelectionMethod -# 54 FOnumberTotalClusters [DV,O] - int -# 55 FOnumberSampledClusters [DV,O] - int -# 56 FOselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 57 FOinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 58 FOsampled [DV,M] - YesNoFields -# 59 FOreasonNotSampled [DV,O] - ReasonForNotSampling -# 60 FOnonResponseCollected [DV,O] - YesNoFields -# 61 FOauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 62 FOauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 63 FOauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 64 FOauxiliaryVariableUnit [DV,O] - MUNIT - - FO_df <- data.frame( FOid = dataset$VSid, FTid = dataset$VSid, @@ -403,24 +246,29 @@ FO_df <- data.frame( FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M FOendTime ="",#M FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! + FOfishingDurationDataBasis = "Measured", FOdurationSource = "Crew", FOhandlingTime = "", FOstartLat="", # ATT! FOstartLon="", # ATT! FOstopLat="", FOstopLon="", - FOexclusiveEconomicZoneIndicator = "", # + FOexclusiveEconomicZoneIndicator = "", # FOarea = "27.3.a.21", #M FOrectangle = "", FOfisheriesManagementUnit = "", FOgsaSubarea = "NotApplicable", #M FOjurisdictionArea = "", + FOgeographicalDataBasis = "Measured", + FOgeographicalSource = "Crew", FOfishingDepth = "", FOwaterDepth = "", FOnationalFishingActivity = "", FOmetier5 = "", FOmetier6 = "OTT_CRU_70-89_2_35",#M FOgear = "OTT",#M + FOgearDataBasis = "Measured", + FOgearSource = "", FOmeshSize = "", FOselectionDevice = "", FOselectionDeviceMeshSize = "", @@ -455,45 +303,6 @@ stringsAsFactors=FALSE #===SS============ -# 1 SSid [] - int -# 2 LEid [M/O] - int -# 3 FOid [M/O] - int -# 4 FTid [M/O] - int -# 5 OSid [M/O] - int -# 6 TEid [M/O] - int -# 7 SLid [M] - int -# 8 SSrecordType [M] - string -# 9 SSsequenceNumber [M] - int -# 10 SSstratification [DV,M] - YesNoFields -# 11 SSstratumName [DV,M] - StringLength100 -# 12 SSclustering [DV,M] - Clustering -# 13 SSclusterName [DV,M] - StringLength100 -# 14 SSobservationActivityType [DV,M] - ObservationActivityType -# 15 SScatchFraction [M] - CatchFraction -# 16 SSobservationType [DV,M] - ObservationMethod -# 17 SSsampler [O] - Sampler -# 18 SSspeciesListName [M] - StringLength100 -# 19 SSuseForCalculateZero [M] - YesNoFields -# 20 SStimeTotal [O] - int -# 21 SStimeSampled [O] - int -# 22 SSnumberTotal [DV,O] - int -# 23 SSnumberSampled [DV,O] - int -# 24 SSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 25 SSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 26 SSselectionMethod [DV,M] - SelectionMethod -# 27 SSunitName [DV,M] - StringLength100 -# 28 SSselectionMethodCluster [DV,O] - SelectionMethod -# 29 SSnumberTotalClusters [DV,O] - int -# 30 SSnumberSampledClusters [DV,O] - int -# 31 SSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 32 SSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 33 SSsampled [DV,M] - YesNoFields -# 34 SSreasonNotSampled [DV,O] - ReasonForNotSampling -# 35 SSnonResponseCollected [DV,O] - YesNoFields -# 36 SSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 37 SSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 38 SSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 39 SSauxiliaryVariableUnit [DV,O] - MUNIT SS_df<-data.frame( SSid = dataset$VSid, @@ -516,8 +325,10 @@ SS_df<-data.frame( SSspeciesListName = project_name_outputs, #M SSuseForCalculateZero = "N", #M SStimeTotal = "", + SStimeTotalDataBasis = "Measured", SStimeSampled = "", SSnumberTotal = 1, + SSnumberTotalDataBasis = "Measured", SSnumberSampled = 1, SSselectionProb = 1, SSinclusionProb = 1, @@ -535,122 +346,71 @@ SS_df<-data.frame( SSauxiliaryVariableValue = "", SSauxiliaryVariableName = "", SSauxiliaryVariableUnit = "", - stringsAsFactors=FALSE + stringsAsFactors=FALSE ) #====SA=========== - -# 1 SAid [] - int -# 2 SSid [] - int -# 3 SArecordType [M] - string -# 4 SAsequenceNumber [M] - int -# 5 SAparentSequenceNumber [O] - int -# 6 SAstratification [DV,M] - YesNoFields -# 7 SAstratumName [DV,M] - StringLength100 -# 8 SAspeciesCode [M] - SpecWoRMS -# 9 SAspeciesCodeFAO [O] - SpecASFIS -# 10 SAstateOfProcessing [M] - StateOfProcessing -# 11 SApresentation [M] - ProductPresentation -# 12 SAspecimensState [M] - SpecimensState -# 13 SAcatchCategory [M] - CatchCategory -# 14 SAlandingCategory [O] - LandingCategory -# 15 SAcommSizeCatScale [O] - CommercialSizeCategoryScale -# 16 SAcommSizeCat [O] - CommercialSizeCategory -# 17 SAsex [M] - SEXCO -# 18 SAexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 19 SAarea [O] - ICES_Area -# 20 SArectangle [O] - StatRec -# 21 SAfisheriesManagementUnit [O] - AreaNonFAO -# 22 SAgsaSubarea [M] - Areas_GFCM_GSA -# 23 SAjurisdictionArea [O] - JurisdictionArea -# 24 SAnationalFishingActivity [O] - NationalFishingActivity -# 25 SAmetier5 [O] - Metier5_FishingActivity -# 26 SAmetier6 [O] - Metier6_FishingActivity -# 27 SAgear [O] - GearType -# 28 SAmeshSize [O] - int -# 29 SAselectionDevice [O] - SelectionDevice -# 30 SAselectionDeviceMeshSize [O] - int -# 31 SAunitType [M] - SamplingUnit -# 32 SAtotalWeightLive [O] - int -# 33 SAsampleWeightLive [O] - int -# 34 SAnumberTotal [DV,O] - Decimal0.1-999999999 -# 35 SAnumberSampled [DV,O] - Decimal0.1-999999999 -# 36 SAselectionProb [DV,O] - Decimal0-1 -# 37 SAinclusionProb [DV,O] - Decimal0-1 -# 38 SAselectionMethod [DV,M] - SelectionMethod -# 39 SAunitName [DV,M] - StringLength100 -# 40 SAlowerHierarchy [M/O] - LowerHierarchy -# 41 SAsampler [O] - Sampler -# 42 SAsampled [DV,M] - YesNoFields -# 43 SAreasonNotSampled [DV,O] - ReasonForNotSampling -# 44 SAnonResponseCollected [DV,O] - YesNoFields -# 45 SAreasonNotSampledFM [DV,O] - ReasonForNotSampling -# 46 SAreasonNotSampledBV [DV,O] - ReasonForNotSampling -# 47 SAtotalWeightMeasured [O] - int -# 48 SAsampleWeightMeasured [O] - int -# 49 SAconversionFactorMeasLive [O] - Decimal0.900-10 -# 50 SAauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 51 SAauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 52 SAauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 53 SAauxiliaryVariableUnit [DV,O] - MUNIT - - SA_df<-data.frame( - SAid = 1:nrow(dataset), - SSid = dataset$VSid, - SArecordType = "SA", #M - SAsequenceNumber = dataset$VSid, #M - SAparentSequenceNumber = "", - SAstratification = "N", #M - SAstratumName = "U", #M - SAspeciesCode = 107254, #M - SAspeciesCodeFAO = "NEP", - SAstateOfProcessing = "FRE", #M - SApresentation = "WHL", #M - SAspecimensState = "DeadOrZeroProbSurvival", #M - SAcatchCategory = "Lan", #M - SAlandingCategory = "", - SAcommSizeCatScale = "", - SAcommSizeCat = "", - SAsex = "U", #M - SAexclusiveEconomicZoneIndicator = "", - SAarea = "", - SArectangle = "", - SAfisheriesManagementUnit = "", - SAgsaSubarea = "NotApplicable", #M - SAjurisdictionArea = "", - SAnationalFishingActivity = "", - SAmetier5 = "", - SAmetier6 = "", - SAgear = "", - SAmeshSize = "", - SAselectionDevice = "", - SAselectionDeviceMeshSize = "", - SAunitType = "Box", #M - SAtotalWeightLive = "", - SAsampleWeightLive = "", - SAnumberTotal = 1, - SAnumberSampled = 1, - SAselectionProb = 1, - SAinclusionProb = 1, - SAselectionMethod = "CENSUS", #M - SAunitName = dataset$VSid, #M - SAlowerHierarchy = "D", - SAsampler = "Observer", - SAsampled = ifelse(!is.na(dataset[[target_var]]),"Y","N"), #M - SAreasonNotSampled = "", - SAnonResponseCollected = "Y", - SAreasonNotSampledFM = "", - SAreasonNotSampledBV = "", - SAtotalWeightMeasured = ifelse(!is.na(dataset[[target_var]]),dataset[[target_var]],""), # - SAsampleWeightMeasured = ifelse(!is.na(dataset[[target_var]]),dataset[[target_var]],""), # - SAconversionFactorMeasLive = 1, - SAauxiliaryVariableTotal = "", - SAauxiliaryVariableValue = "", - SAauxiliaryVariableName = "", - SAauxiliaryVariableUnit = "", - stringsAsFactors=FALSE +SAid = 1:nrow(dataset), +SSid = dataset$VSid, +SArecordType = "SA", #M +SAsequenceNumber = dataset$VSid, #M +SAparentSequenceNumber = "", +SAstratification = "N", #M +SAstratumName = "U", #M +SAspeciesCode = 107254, #M +SAspeciesCodeFAO = "NEP", +SAstateOfProcessing = "FRE", #M +SApresentation = "WHL", #M +SAspecimensState = "DeadOrZeroProbSurvival", #M +SAcatchCategory = "Lan", #M +SAlandingCategory = "", +SAcommSizeCatScale = "", +SAcommSizeCat = "", +SAsex = "U", #M +SAexclusiveEconomicZoneIndicator = "", +SAarea = "", +SArectangle = "", +SAfisheriesManagementUnit = "", +SAgsaSubarea = "NotApplicable", #M +SAjurisdictionArea = "", +SAgeographicalDataBasis = "Measured", +SAgeographicalSource = "", +SAnationalFishingActivity = "", +SAmetier5 = "", +SAmetier6 = "", +SAgear = "", +SAgearDataBasis = "Measured", +SAgearSource = "", +SAmeshSize = "", +SAselectionDevice = "", +SAselectionDeviceMeshSize = "", +SAunitType = "Box", #M +SAtotalWeightLive = "", +SAsampleWeightLive = "", +SAnumberTotal = 1, +SAnumberSampled = 1, +SAselectionProb = 1, +SAinclusionProb = 1, +SAselectionMethod = "CENSUS", #M +SAunitName = dataset$VSid, #M +SAlowerHierarchy = "D", +SAsampler = "Observer", +SAsampled = ifelse(!is.na(dataset[[target_var]]),"Y","N"), #M +SAreasonNotSampled = "", +SAnonResponseCollected = "Y", +SAreasonNotSampledFM = "", +SAreasonNotSampledBV = "", +SAtotalWeightMeasured = ifelse(!is.na(dataset[[target_var]]),dataset[[target_var]],""), # +SAtotalWeightMeasuredDataBasis = "Measured", +SAsampleWeightMeasured = ifelse(!is.na(dataset[[target_var]]),dataset[[target_var]],""), # +SAconversionFactorMeasLive = 1, +SAauxiliaryVariableTotal = "", +SAauxiliaryVariableValue = "", +SAauxiliaryVariableName = "", +SAauxiliaryVariableUnit = "", +stringsAsFactors=FALSE ) @@ -703,16 +463,14 @@ RDBESlist[[i]][which(grepl(colnames(RDBESlist[[i]]),pat="[A-Z]id"))]<-NULL #===Save============ - dir_outputs<-paste0(base_dir_outputs,"/", - project_name_outputs,"/") - dir.create(dir_outputs, recursive=T, showWarnings=FALSE) - filename_output_CS <- paste0("H1.csv") - filename_output_SL <- paste0("HSL.csv") - filename_output_VD <- paste0("HVD.csv") - + dir_outputs<-paste0(base_dir_outputs,"/", project_name_outputs,"/") + dir.create(dir_outputs, recursive=T, showWarnings=FALSE) + filename_output_CS <- paste0(dir_outputs,"H1.csv") + filename_output_SL <- paste0(dir_outputs,"HSL.csv") + filename_output_VD <- paste0(dir_outputs,"HVD.csv") -lapply(RDBESlist, function(x, filename1 = paste0(dir_outputs,filename_output_CS)){ +lapply(RDBESlist, function(x, filename1 = filename_output_CS){ if("DErecordType" %in% colnames(x)){ write.table(x, file = filename1, append = FALSE, quote = FALSE, sep = ",", eol = "\n", na = "NA", dec = ".", row.names = FALSE, @@ -724,7 +482,7 @@ write.table(x, file = filename1, append = TRUE, quote = FALSE, sep = ",", } }) -b<-read.table(file=paste0(dir_outputs,filename_output_CS), header=F, sep=";") +b<-read.table(file = filename_output_CS, header=F, sep=";") b<-cbind(key,b) b<-b[order(as.character(b$key), decreasing=FALSE),] b<-b[!is.na(key),] @@ -732,24 +490,30 @@ b$key<-NULL b$V1<-as.character(b$V1) # saves CS output -write.table(b$V1, file=paste0(dir_outputs,filename_output_CS), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +write.table(b$V1, file = filename_output_CS, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") -# -----Builds and saves dummySL----------------- +# -----Builds and saves dummySL and dummyIS----------------- SL_base$SLspeciesListName<-project_name_outputs SL_base$SLyear<-DEyear SL_base$SLinstitute<-SDinstitution -# saves SL output - write.table(SL_base, file=paste0(dir_outputs,filename_output_SL), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - + + # saves SL output + write.table(rbind(paste(SL_base, collapse=","), paste(IS_base, collapse=",")), file=filename_output_SL, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") # -----Builds and saves dummyVD----------------- - # saves VD output VD_base$VDyear<-DEyear - write.table(VD_base, file=paste0(dir_outputs,filename_output_VD), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + write.table(VD_base, file=filename_output_VD, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + + + + + +# check - is this needed? +if(1==2){ @@ -770,5 +534,6 @@ write.csv(tmp, file=paste0(dir_outputs,"tmp/SpeciesList.csv"), quote=F, row.name library(zip) zip::zip(zipfile = paste0(project_name_outputs,".zip"), files =paste0(dir_outputs,"tmp/",dir(paste0(dir_outputs,"tmp"))), mode="cherry-pick") unlink(paste0(dir_outputs,"tmp"), force=T, recursive = T) +} diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apiclus2_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apiclus2_H1.R index 702c497c..c286a7d3 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apiclus2_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apiclus2_H1.R @@ -43,79 +43,56 @@ DEsamplingScheme<-"WGRDBES-EST TEST 1" DEstratumName <- "Pckg_survey_apiclus2_H1" project_name_outputs <- gsub(" ","_", paste0(DEsamplingScheme,"_", DEstratumName)) baseDir <- "./data-raw/exampleData/TextBookExamples/" -#baseDir <- "" -VD_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/VD_base.rds")) -SL_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/SL_base.rds")) +baseDir <- "" + VD_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/VD_base.rds")) + SL_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/SL_base.rds")) + IS_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/IS_base.rds")) + #nameof the directory where the outputs are saved currently + base_dir_outputs <- paste0(baseDir,"BuiltUploads") + dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) -#nameof the directory where the outputs are saved currently -base_dir_outputs <- paste0(baseDir,"BuiltUploads/") -if(!file.exists(base_dir_outputs)) dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) +#========Outline of Hierarchy 1================ + # Design + # Sampling details + # Vessel Selection + # Fishing Trip + # Fishing Operation + # Species Selection + # Sample + # Length + # Biological variables -#=========Outline of Hierarchy 1=============== -# Design -# Sampling details -# Vessel Selection -# Fishing Trip -# Fishing Operation -# Species Selection -# Sample -# Length -# Biological variables +#===DE============ -#====DE=========== - -# 1 DEid [] - int -# 2 DErecordType [M] - string -# 3 DEsamplingScheme [M] - SamplingScheme -# 4 DEsamplingSchemeType [M] - SamplingSchemeType -# 5 DEyear [M] - Year -# 6 DEstratumName [M] - StringLength100 -# 7 DEhierarchyCorrect [M] - YesNoFields -# 8 DEhierarchy [M] - RDBESUpperHierarchy -# 9 DEsampled [DV,M] - YesNoFields -# 10 DEreasonNotSampled [DV,O] - ReasonForNotSampling -# 11 DEnonResponseCollected [DV,O] - YesNoFields -# 12 DEauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 13 DEauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 14 DEauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 15 DEauxiliaryVariableUnit[DV,O]-MUNIT - DE_df_base<-expand.grid(DEyear=DEyear, DEstratumName="U",stringsAsFactors=F) DE_df<-data.frame( - DEid = 1:nrow(DE_df_base), - DErecordType = "DE", - DEsamplingScheme = DEsamplingScheme, - DEsamplingSchemeType = "NatRouCF", - DEyear = as.integer(DEyear), - DEstratumName = DEstratumName, - DEhierarchyCorrect = "Y", - DEhierarchy = 1, - DEsampled = "Y", - DEreasonNotSampled = "", - DEnonResponseCollected = "Y", - DEauxiliaryVariableTotal = "", - DEauxiliaryVariableValue = "", - DEauxiliaryVariableName = "", - DEauxiliaryVariableUnit = "", - stringsAsFactors=FALSE -) - -#====SD=========== - + DEid = 1:nrow(DE_df_base), + DErecordType = "DE", + DEsamplingScheme = DEsamplingScheme, + DEsamplingSchemeType = "NatRouCF", + DEyear = as.integer(DEyear), + DEstratumName = DEstratumName, + DEhierarchyCorrect = "Y", + DEhierarchy = 1, + DEsampled = "Y", + DEreasonNotSampled = "", + DEnonResponseCollected = "Y", + DEauxiliaryVariableTotal = "", + DEauxiliaryVariableValue = "", + DEauxiliaryVariableName = "", + DEauxiliaryVariableUnit = "", + stringsAsFactors=FALSE + ) + +#===SD============ -# x -# 1 SDid [M] - int -# 2 DEid [M] - int -# 3 SDrecordType [M] - string -# 4 SDcountry [M] - ISO_3166 -# 5 SDinstitution [M] - EDMO SD_df<-data.frame( SDid=DE_df$DEid, @@ -128,33 +105,8 @@ SD_df<-data.frame( #===VS============ - # x -# 1 VSid [M] - int -# 2 SDid [M] - int -# 3 VDid [M] - int -# 4 TEid [M/O] - int -# 5 VSrecordType [M] - string -# 6 VSsequenceNumber [M] - int -# 7 VSencryptedVesselCode [M] - StringLength100 -# 8 VSstratification [DV,M] - YesNoFields -# 9 VSstratumName [DV,M] - StringLength100 -# 10 VSclustering [DV,M] - Clustering -# 11 VSclusterName [DV,M] - StringLength100 -# 12 VSsampler [O] - Sampler -# 13 VSnumberTotal [DV,O] - int -# 14 VSnumberSampled [DV,O] - int -# 15 VSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 16 VSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 17 VSselectionMethod [DV,M] - SelectionMethod -# 18 VSunitName [DV,M] - StringLength100 -# 19 VSselectionMethodCluster [DV,O] - SelectionMethod -# 20 VSnumberTotalClusters [DV,O] - int -# 21 VSnumberSampledClusters [DV,O] - int -# 22 VSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 23 VSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 24 VSsampled [DV,M] - YesNoFields -# 25 VSreasonNotSampled [DV,O] - ReasonForNotSampling + # The PSU will be Vessels and the SSU will be Trips df_vessel <- df[!duplicated(df[c("dname")]),] @@ -206,45 +158,9 @@ VS_df <- data.frame( ) -#===FT============ - - -# 1 FTid [M] - int -# 2 OSid [M/O] - int -# 3 VSid [M/O] - int -# 4 VDid [M] - int -# 5 SDid [M/O] - int -# 6 FOid [M/O] - int -# 7 TEid [M/O] - int -# 8 FTrecordType [M] - string -# 9 FTencryptedVesselCode [M] - StringLength100 -# 10 FTsequenceNumber [M] - StringLength100 -# 11 FTstratification [DV,M] - YesNoFields -# 12 FTstratumName [DV,M] - StringLength100 -# 13 FTclustering [DV,M] - Clustering -# 14 FTclusterName [DV,M] - StringLength100 -# 15 FTsampler [O] - Sampler -# 16 FTsamplingType [M] - SamplingContext -# 17 FTnumberOfHaulsOrSets [M/O] - IntZeroToMax -# 18 FTdepartureLocation [M/O] - Harbour_LOCODE -# 19 FTdepartureDate [M/O] - Date -# 20 FTdepartureTime [M/O] - Time -# 21 FTarrivalLocation [M] - Harbour_LOCODE -# 22 FTarrivalDate [M] - Date -# 23 FTarrivalTime [M/O] - Time -# 24 FTnumberTotal [DV,O] - int -# 25 FTnumberSampled [DV,O] - int -# 26 FTselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 27 FTinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 28 FTselectionMethod [DV,M] - SelectionMethod -# 29 FTunitName [DV,M] - StringLength100 -# 30 FTselectionMethodCluster [DV,O] - SelectionMethod -# 31 FTnumberTotalClusters [DV,O] - int -# 32 FTnumberSampledClusters [DV,O] - int -# 33 FTselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 34 FTinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 35 FTsampled [DV,M] - YesNoFields -# 36 FTreasonNotSampled [DV,O] - ReasonForNotSampling +#====FT=========== + + makeChildTbl <- function(parent, data, tbl="FT", by = "dname"){ @@ -253,49 +169,53 @@ makeChildTbl <- function(parent, data, tbl="FT", by = "dname"){ df <- merge(data, parent[mergeCols], by=by) sc <- table(df[[by]]) df$sampled_count <- as.vector(sc[df[[by]]]) - FT_df <- data.frame( - FTid = 1:nrow(df),#[M] - int - OSid = "",# [M/O] - int - VSid = df$VSid, #[M/O] - VDid = "", #[M] - int - SDid = "", #[M/O] - int - FOid = "", #[M/O] - int - TEid = "", #[M/O] - int - FTrecordType='FT', #[M] - string - FTencryptedVesselCode = df$VSencryptedVesselCode, #[M] - FTsequenceNumber = as.integer(1:nrow(df)), #[M] - string - FTstratification = "N", #[DV,M] - RS_Stratification - FTstratumName = "U", #[DV,M] - string - FTclustering = "N", #[DV,M] - RS_Clustering - FTclusterName = "U", #[DV,M] - string - FTsampler = "Observer", #[M] - RS_Sampler - FTsamplingType = "AtSea" , #[M] - RS_SamplingType - FTnumberOfHaulsOrSets = 1, #[O] - int - FTdepartureLocation="ZWHWN", #[O] - Harbour_LOCODE - FTdepartureDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(df)), #[M/O] - date - FTdepartureTime="", #[O] - time - FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE - FTarrivalDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(df)), #[M] - date - FTarrivalTime="", #[O] - time - FTnumberTotal= as.vector(df$fpc2), #[DV,O] - int - FTnumberSampled=df$sampled_count, #[DV,O] - int - FTselectionProb="", #[DV,O] - DecimalPrec10 - FTinclusionProb="", #[DV,O] - DecimalPrec10 - FTselectionMethod="SRSWOR", #[DV,M] - RS_SelectionMethod - FTunitName = df$VSid, #[DV,M] - string - FTselectionMethodCluster="", #[DV,O] - RS_SelectionMethod - FTnumberTotalClusters="", #[DV,O] - int - FTnumberSampledClusters="", #[DV,O] - int - FTselectionProbCluster="", #[DV,O] - DecimalPrec10 - FTinclusionProbCluster="", #[DV,O] - DecimalPrec10 - FTsampled="Y", #[DV,M] - YesNoFields - FTreasonNotSampled= "", #[DV,O] - RS_ReasonForNotSampling, - FTnonResponseCollected = factor("N", levels = c('N', 'Y')), - FTauxiliaryVariableTotal = "", - FTauxiliaryVariableValue = "", - FTauxiliaryVariableName = "", - FTauxiliaryVariableUnit = "", - stringsAsFactors=FALSE) + +FT_df <- data.frame( +FTid = 1:nrow(df),#[M] - int +OSid = "",# [M/O] - int +VSid = df$VSid, #[M/O] +VDid = "", #[M] - int +SDid = "", #[M/O] - int +FOid = "", #[M/O] - int +TEid = "", #[M/O] - int +FTrecordType='FT', #[M] - string +FTencryptedVesselCode = df$VSencryptedVesselCode, #[M] +FTsequenceNumber = as.integer(1:nrow(df)), #[M] - string +FTstratification = "N", #[DV,M] - RS_Stratification +FTstratumName = "U", #[DV,M] - string +FTclustering = "N", #[DV,M] - RS_Clustering +FTclusterName = "U", #[DV,M] - string +FTsampler = "Observer", #[M] - RS_Sampler +FTsamplingType = "AtSea" , #[M] - RS_SamplingType +FTnumberOfHaulsOrSets = 1, #[O] - int +FTdepartureLocation="ZWHWN", #[O] - Harbour_LOCODE +FTdepartureDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(df)), #[M/O] - date +FTdepartureTime="", #[O] - time +FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE +FTarrivalDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(df)), #[M] - date +FTarrivalTime="", #[O] - time +FTdominantLandingDate= "", +FTnumberTotal= as.vector(df$fpc2), #[DV,O] - int +FTnumberSampled=df$sampled_count, #[DV,O] - int +FTselectionProb="", #[DV,O] - DecimalPrec10 +FTinclusionProb="", #[DV,O] - DecimalPrec10 +FTselectionMethod="SRSWOR", #[DV,M] - RS_SelectionMethod +FTunitName = df$VSid, #[DV,M] - string +FTselectionMethodCluster="", #[DV,O] - RS_SelectionMethod +FTnumberTotalClusters="", #[DV,O] - int +FTnumberSampledClusters="", #[DV,O] - int +FTselectionProbCluster="", #[DV,O] - DecimalPrec10 +FTinclusionProbCluster="", #[DV,O] - DecimalPrec10 +FTsampled="Y", #[DV,M] - YesNoFields +FTreasonNotSampled= "", #[DV,O] - RS_ReasonForNotSampling, +FTnonResponseCollected = factor("N", levels = c('N', 'Y')), +FTauxiliaryVariableTotal = "", +FTauxiliaryVariableValue = "", +FTauxiliaryVariableName = "", +FTauxiliaryVariableUnit = "", +stringsAsFactors=FALSE +) + if(any(FT_df$FTnumberTotal < FT_df$FTnumberSampled)){ stop("Sampled is more than total") } @@ -309,66 +229,6 @@ FT_df <- makeChildTbl(df_vessel, df, by="dname") #====FO=========== -# 1 FOid [M] - int -# 2 FTid [M/O] - int -# 3 SDid [M/O] - int -# 4 FOrecordType [M] - string -# 5 FOstratification [DV,M] - YesNoFields -# 6 FOsequenceNumber [M] - int -# 7 FOstratumName [DV,M] - StringLength100 -# 8 FOclustering [DV,M] - Clustering -# 9 FOclusterName [DV,M] - StringLength100 -# 10 FOsampler [O] - Sampler -# 11 FOaggregationLevel [M] - AggregationLevel -# 12 FOvalidity [M] - ValidityFlag -# 13 FOcatchReg [M] - CatchRegistration -# 14 FOstartDate [O] - Date -# 15 FOstartTime [O] - Time -# 16 FOendDate [M] - Date -# 17 FOendTime [M/O] - Time -# 18 FOduration [M/O] - int -# 19 FOdurationSource [M] - DurationSource -# 20 FOhandlingTime [O] - int -# 21 FOstartLat [O] - Decimal-90.000000-90.000000 -# 22 FOstartLon [O] - Decimal-180.000000-180.000000 -# 23 FOstopLat [O] - Decimal-90.000000-90.000000 -# 24 FOstopLon [O] - Decimal-180.000000-180.000000 -# 25 FOexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 26 FOarea [M] - ICES_Area -# 27 FOrectangle [O] - StatRec -# 28 FOgsaSubarea [M] - Areas_GFCM_GSA -# 29 FOjurisdictionArea [O] - JurisdictionArea -# 30 FOfishingDepth [O] - int -# 31 FOwaterDepth [O] - int -# 32 FOnationalFishingActivity [O] - NationalFishingActivity -# 33 FOmetier5 [O] - Metier5_FishingActivity -# 34 FOmetier6 [M] - Metier6_FishingActivity -# 35 FOgear [M] - GearType -# 36 FOmeshSize [O] - int -# 37 FOselectionDevice [O] - SelectionDevice -# 38 FOselectionDeviceMeshSize [O] - int -# 39 FOtargetSpecies [O] - TargetSpecies -# 40 FOincidentalByCatchMitigationDeviceFirst [M] - BycatchMitigationDevice -# 41 FOincidentalByCatchMitigationDeviceTargetFirst [M] - BycatchMitigationDeviceTarget -# 42 FOincidentalByCatchMitigationDeviceSecond [M] - BycatchMitigationDevice -# 43 FOincidentalByCatchMitigationDeviceTargetSecond [M] - BycatchMitigationDeviceTarget -# 44 FOgearDimensions [O] - int -# 45 FOobservationCode [M] - ObservationCode -# 46 FOnumberTotal [DV,O] - int -# 47 FOnumberSampled [DV,O] - int -# 48 FOselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 49 FOinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 50 FOselectionMethod [DV,M] - SelectionMethod -# 51 FOunitName [DV,M] - StringLength100 -# 52 FOselectionMethodCluster [DV,O] - SelectionMethod -# 53 FOnumberTotalClusters [DV,O] - int -# 54 FOnumberSampledClusters [DV,O] - int -# 55 FOselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 56 FOinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 57 FOsampled [DV,M] - YesNoFields -# 58 FOreasonNotSampled [DV,O] - ReasonForNotSampling - - FO_df <- data.frame( FOid = FT_df$FTid, FTid = FT_df$FTid, @@ -388,6 +248,7 @@ FO_df <- data.frame( FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M FOendTime ="",#M FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! + FOfishingDurationDataBasis = "Measured", FOdurationSource = "Crew", FOhandlingTime = "", FOstartLat="", # ATT! @@ -400,12 +261,16 @@ FO_df <- data.frame( FOfisheriesManagementUnit = "", FOgsaSubarea = "NotApplicable", #M FOjurisdictionArea = "", + FOgeographicalDataBasis = "Measured", + FOgeographicalSource = "Crew", FOfishingDepth = "", FOwaterDepth = "", FOnationalFishingActivity = "", FOmetier5 = "", FOmetier6 = "OTT_CRU_70-89_2_35",#M FOgear = "OTT",#M + FOgearDataBasis = "Measured", + FOgearSource = "", FOmeshSize = "", FOselectionDevice = "", FOselectionDeviceMeshSize = "", @@ -430,7 +295,6 @@ FO_df <- data.frame( FOsampled = "Y", #M FOreasonNotSampled = "", FOnonResponseCollected = factor("N", levels = c('N', 'Y')), - FOfisheriesManagementUnit = "", FOauxiliaryVariableTotal = "", FOauxiliaryVariableValue = "", FOauxiliaryVariableName = "", @@ -439,43 +303,8 @@ stringsAsFactors=FALSE ) +#===SS============ -# 1 SSid [] - int -# 2 LEid [M/O] - int -# 3 FOid [M/O] - int -# 4 FTid [M/O] - int -# 5 OSid [M/O] - int -# 6 TEid [M/O] - int -# 7 SLid [M] - int -# 8 SSrecordType [M] - string -# 9 SSsequenceNumber [M] - int -# 10 SSstratification [DV,M] - YesNoFields -# 11 SSstratumName [DV,M] - StringLength100 -# 12 SSclustering [DV,M] - Clustering -# 13 SSclusterName [DV,M] - StringLength100 -# 14 SSobservationActivityType [DV,M] - ObservationActivityType -# 15 SScatchFraction [M] - CatchFraction -# 16 SSobservationType [DV,M] - ObservationMethod -# 17 SSsampler [O] - Sampler -# 18 SSspeciesListName [M] - StringLength100 -# 19 SSuseForCalculateZero [M] - YesNoFields -# 20 SStimeTotal [O] - int -# 21 SStimeSampled [O] - int -# 22 SSnumberTotal [DV,O] - int -# 23 SSnumberSampled [DV,O] - int -# 24 SSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 25 SSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 26 SSselectionMethod [DV,M] - SelectionMethod -# 27 SSunitName [DV,M] - StringLength100 -# 28 SSselectionMethodCluster [DV,O] - SelectionMethod -# 29 SSnumberTotalClusters [DV,O] - int -# 30 SSnumberSampledClusters [DV,O] - int -# 31 SSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 32 SSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 33 SSsampled [DV,M] - YesNoFields -# 34 SSreasonNotSampled [DV,O] - ReasonForNotSampling - -#====SS=========== SS_df<-data.frame( SSid = FO_df$FOid, @@ -498,8 +327,10 @@ SS_df<-data.frame( SSspeciesListName = project_name_outputs, #M SSuseForCalculateZero = "N", #M SStimeTotal = "", + SStimeTotalDataBasis = "Measured", SStimeSampled = "", SSnumberTotal = 1, + SSnumberTotalDataBasis = "Measured", SSnumberSampled = 1, SSselectionProb = 1, SSinclusionProb = 1, @@ -517,127 +348,79 @@ SS_df<-data.frame( SSauxiliaryVariableValue = "", SSauxiliaryVariableName = "", SSauxiliaryVariableUnit = "", - stringsAsFactors = F + stringsAsFactors=FALSE ) #====SA=========== - -# 1 SAid [] - int -# 2 SSid [] - int -# 3 SArecordType [M] - string -# 4 SAsequenceNumber [M] - int -# 5 SAparentSequenceNumber [O] - int -# 6 SAstratification [DV,M] - YesNoFields -# 7 SAstratumName [DV,M] - StringLength100 -# 8 SAspeciesCode [M] - SpecWoRMS -# 9 SAspeciesCodeFAO [O] - SpecASFIS -# 10 SAstateOfProcessing [M] - StateOfProcessing -# 11 SApresentation [M] - ProductPresentation -# 12 SAspecimensState [M] - SpecimensState -# 13 SAcatchCategory [M] - CatchCategory -# 14 SAlandingCategory [O] - LandingCategory -# 15 SAcommSizeCatScale [O] - CommercialSizeCategoryScale -# 16 SAcommSizeCat [O] - CommercialSizeCategory -# 17 SAsex [M] - SEXCO -# 18 SAexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 19 SAarea [O] - ICES_Area -# 20 SArectangle [O] - StatRec -# 21 SAgsaSubarea [M] - Areas_GFCM_GSA -# 22 SAjurisdictionArea [O] - JurisdictionArea -# 23 SAnationalFishingActivity [O] - NationalFishingActivity -# 24 SAmetier5 [O] - Metier5_FishingActivity -# 25 SAmetier6 [O] - Metier6_FishingActivity -# 26 SAgear [O] - GearType -# 27 SAmeshSize [O] - int -# 28 SAselectionDevice [O] - SelectionDevice -# 29 SAselectionDeviceMeshSize [O] - int -# 30 SAunitType [M] - SamplingUnit -# 31 SAtotalWeightLive [O] - int -# 32 SAsampleWeightLive [O] - int -# 33 SAnumberTotal [DV,O] - Decimal0.1-999999999 -# 34 SAnumberSampled [DV,O] - Decimal0.1-999999999 -# 35 SAselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 36 SAinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 37 SAselectionMethod [DV,M] - SelectionMethod -# 38 SAunitName [DV,M] - StringLength100 -# 39 SAlowerHierarchy [M/O] - LowerHierarchy -# 40 SAsampler [O] - Sampler -# 41 SAsampled [DV,M] - YesNoFields -# 42 SAreasonNotSampledFM [DV,O] - ReasonForNotSampling -# 43 SAreasonNotSampledBV [DV,O] - ReasonForNotSampling -# 44 SAtotalWeightMeasured [O] - int -# 45 SAsampleWeightMeasured [O] - int -# 46 SAconversionFactorMeasLive [O] - Decimal0.900-10 - SA_df<-data.frame( - SAid = SS_df$SSid, - SSid = SS_df$SSid, - SArecordType = "SA", #M - SAsequenceNumber = SS_df$SSid, #M - SAparentSequenceNumber = "", - SAstratification = "N", #M - SAstratumName = "U", #M - SAspeciesCode = 107254, #M - SAspeciesCodeFAO = "NEP", - SAstateOfProcessing = "FRE", #M - SApresentation = "WHL", #M - SAspecimensState = "DeadOrZeroProbSurvival", #M - SAcatchCategory = "Lan", #M - SAlandingCategory = "", - SAcommSizeCatScale = "", - SAcommSizeCat = "", - SAsex = "U", #M - SAexclusiveEconomicZoneIndicator = "", - SAarea = "", - SArectangle = "", - SAfisheriesManagementUnit = "", - SAgsaSubarea = "NotApplicable", #M - SAjurisdictionArea = "", - SAnationalFishingActivity = "", - SAmetier5 = "", - SAmetier6 = "", - SAgear = "", - SAmeshSize = "", - SAselectionDevice = "", - SAselectionDeviceMeshSize = "", - SAunitType = "Box", #M - SAtotalWeightLive = "", - SAsampleWeightLive = "", - SAnumberTotal = 1, - SAnumberSampled = 1, - SAselectionProb = 1, - SAinclusionProb = 1, - SAselectionMethod = "CENSUS", #M - SAunitName = SS_df$SSid, #M - SAlowerHierarchy = "D", - SAsampler = "Observer", - SAsampled = ifelse(!is.na(df[[target_var]]),"Y","N"), #M - SAreasonNotSampled = "", - SAnonResponseCollected = "Y", - SAreasonNotSampledFM = "", - SAreasonNotSampledBV = "", - SAtotalWeightMeasured = ifelse(!is.na(df[[target_var]]),df[[target_var]],""), # - SAsampleWeightMeasured = ifelse(!is.na(df[[target_var]]),df[[target_var]],""), # - SAconversionFactorMeasLive = 1, - SAauxiliaryVariableTotal = "", - SAauxiliaryVariableValue = "", - SAauxiliaryVariableName = "", - SAauxiliaryVariableUnit = "", - stringsAsFactors=FALSE +SAid = SS_df$SSid, +SSid = SS_df$SSid, +SArecordType = "SA", #M +SAsequenceNumber = SS_df$SSid, #M +SAparentSequenceNumber = "", +SAstratification = "N", #M +SAstratumName = "U", #M +SAspeciesCode = 107254, #M +SAspeciesCodeFAO = "NEP", +SAstateOfProcessing = "FRE", #M +SApresentation = "WHL", #M +SAspecimensState = "DeadOrZeroProbSurvival", #M +SAcatchCategory = "Lan", #M +SAlandingCategory = "", +SAcommSizeCatScale = "", +SAcommSizeCat = "", +SAsex = "U", #M +SAexclusiveEconomicZoneIndicator = "", +SAarea = "", +SArectangle = "", +SAfisheriesManagementUnit = "", +SAgsaSubarea = "NotApplicable", #M +SAjurisdictionArea = "", +SAgeographicalDataBasis = "Measured", +SAgeographicalSource = "", +SAnationalFishingActivity = "", +SAmetier5 = "", +SAmetier6 = "", +SAgear = "", +SAgearDataBasis = "Measured", +SAgearSource = "", +SAmeshSize = "", +SAselectionDevice = "", +SAselectionDeviceMeshSize = "", +SAunitType = "Box", #M +SAtotalWeightLive = "", +SAsampleWeightLive = "", +SAnumberTotal = 1, +SAnumberSampled = 1, +SAselectionProb = 1, +SAinclusionProb = 1, +SAselectionMethod = "CENSUS", #M +SAunitName = SS_df$SSid, #M +SAlowerHierarchy = "D", +SAsampler = "Observer", +SAsampled = ifelse(!is.na(df[[target_var]]),"Y","N"), #M +SAreasonNotSampled = "", +SAnonResponseCollected = "Y", +SAreasonNotSampledFM = "", +SAreasonNotSampledBV = "", +SAtotalWeightMeasured = ifelse(!is.na(df[[target_var]]),df[[target_var]],""), # +SAtotalWeightMeasuredDataBasis = "Measured", +SAsampleWeightMeasured = ifelse(!is.na(df[[target_var]]),df[[target_var]],""), # +SAconversionFactorMeasLive = 1, +SAauxiliaryVariableTotal = "", +SAauxiliaryVariableValue = "", +SAauxiliaryVariableName = "", +SAauxiliaryVariableUnit = "", +stringsAsFactors=FALSE ) #====Builds final format=========== -RDBESlist <- list(DE = DE_df, - SD = SD_df, - VS = VS_df, - FT = FT_df, - FO = FO_df, - SS = SS_df, - SA = SA_df) + +RDBESlist = list(DE = DE_df,SD = SD_df, VS = VS_df, FT = FT_df, FO = FO_df, SS = SS_df, SA = SA_df) #id table a<-merge(DE_df["DEid"],SD_df[c("DEid","SDid")]) @@ -680,18 +463,16 @@ for (i in names(RDBESlist)) RDBESlist[[i]][which(grepl(colnames(RDBESlist[[i]]),pat="[A-Z]id"))]<-NULL } -#===Save============= +#===Save============ - dir_outputs<-paste0(base_dir_outputs, - project_name_outputs,"/") - dir.create(dir_outputs, recursive=T, showWarnings=FALSE) - filename_output_CS <- paste0("H1.csv") - filename_output_SL <- paste0("HSL.csv") - filename_output_VD <- paste0("HVD.csv") + dir_outputs<-paste0(base_dir_outputs,"/", project_name_outputs,"/") + dir.create(dir_outputs, recursive=T, showWarnings=FALSE) + filename_output_CS <- paste0(dir_outputs,"H1.csv") + filename_output_SL <- paste0(dir_outputs,"HSL.csv") + filename_output_VD <- paste0(dir_outputs,"HVD.csv") - -lapply(RDBESlist, function(x, filename1 = paste0(dir_outputs,filename_output_CS)){ +lapply(RDBESlist, function(x, filename1 = filename_output_CS){ if("DErecordType" %in% colnames(x)){ write.table(x, file = filename1, append = FALSE, quote = FALSE, sep = ",", eol = "\n", na = "NA", dec = ".", row.names = FALSE, @@ -703,7 +484,7 @@ write.table(x, file = filename1, append = TRUE, quote = FALSE, sep = ",", } }) -b<-read.table(file=paste0(dir_outputs,filename_output_CS), header=F, sep=";") +b<-read.table(file = filename_output_CS, header=F, sep=";") b<-cbind(key,b) b<-b[order(as.character(b$key), decreasing=FALSE),] b<-b[!is.na(key),] @@ -711,22 +492,22 @@ b$key<-NULL b$V1<-as.character(b$V1) # saves CS output -write.table(b$V1, file=paste0(dir_outputs,filename_output_CS), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +write.table(b$V1, file = filename_output_CS, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + +# -----Builds and saves dummySL and dummyIS----------------- -#---- Builds and saves dummySL ------------ SL_base$SLspeciesListName<-project_name_outputs SL_base$SLyear<-DEyear SL_base$SLinstitute<-SDinstitution -# saves SL output - write.table(SL_base, file=paste0(dir_outputs,filename_output_SL), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - - -#-------Builds and saves dummyVD--------------- + + # saves SL output + write.table(rbind(paste(SL_base, collapse=","), paste(IS_base, collapse=",")), file=filename_output_SL, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +# -----Builds and saves dummyVD----------------- # saves VD output - -write.table(VD_base, file=paste0(dir_outputs,filename_output_VD), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + VD_base$VDyear<-DEyear + write.table(VD_base, file=filename_output_VD, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apiclus2_v2_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apiclus2_v2_H1.R index 83b6e138..2c93b663 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apiclus2_v2_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apiclus2_v2_H1.R @@ -45,10 +45,10 @@ baseDir <- "" VD_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/VD_base.rds")) SL_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/SL_base.rds")) - + IS_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/IS_base.rds")) #nameof the directory where the outputs are saved currently base_dir_outputs <- paste0(baseDir,"BuiltUploads") - if(!file.exists(base_dir_outputs)) dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) + dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) #=========Outline of Hierarchy 1=============== @@ -64,28 +64,12 @@ -#====DE=========== - +#===DE============ -# 1 DEid [] - int -# 2 DErecordType [M] - string -# 3 DEsamplingScheme [M] - SamplingScheme -# 4 DEsamplingSchemeType [M] - SamplingSchemeType -# 5 DEyear [M] - Year -# 6 DEstratumName [M] - StringLength100 -# 7 DEhierarchyCorrect [M] - YesNoFields -# 8 DEhierarchy [M] - RDBESUpperHierarchy -# 9 DEsampled [DV,M] - YesNoFields -# 10 DEreasonNotSampled [DV,O] - ReasonForNotSampling -# 11 DEnonResponseCollected [DV,O] - YesNoFields -# 12 DEauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 13 DEauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 14 DEauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 15 DEauxiliaryVariableUnit[DV,O]-MUNIT DE_df_base<-expand.grid(DEyear=DEyear, - DEstratumName="U",stringsAsFactors=F) + DEstratumName="U",stringsAsFactors=F) DE_df<-data.frame( DEid = 1:nrow(DE_df_base), @@ -106,15 +90,8 @@ DE_df<-data.frame( stringsAsFactors=FALSE ) -#====SD=========== - +#===SD============ - # x -# 1 SDid [M] - int -# 2 DEid [M] - int -# 3 SDrecordType [M] - string -# 4 SDcountry [M] - ISO_3166 -# 5 SDinstitution [M] - EDMO SD_df<-data.frame( SDid=DE_df$DEid, @@ -127,39 +104,8 @@ SD_df<-data.frame( #===VS============ - # x -# 1 VSid [M] - int -# 2 SDid [M] - int -# 3 VDid [M] - int -# 4 TEid [M/O] - int -# 5 VSrecordType [M] - string -# 6 VSsequenceNumber [M] - int -# 7 VSencryptedVesselCode [M] - StringLength100 -# 8 VSstratification [DV,M] - YesNoFields -# 9 VSstratumName [DV,M] - StringLength100 -# 10 VSclustering [DV,M] - Clustering -# 11 VSclusterName [DV,M] - StringLength100 -# 12 VSsampler [O] - Sampler -# 13 VSnumberTotal [DV,O] - int -# 14 VSnumberSampled [DV,O] - int -# 15 VSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 16 VSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 17 VSselectionMethod [DV,M] - SelectionMethod -# 18 VSunitName [DV,M] - StringLength100 -# 19 VSselectionMethodCluster [DV,O] - SelectionMethod -# 20 VSnumberTotalClusters [DV,O] - int -# 21 VSnumberSampledClusters [DV,O] - int -# 22 VSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 23 VSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 24 VSsampled [DV,M] - YesNoFields -# 25 VSreasonNotSampled [DV,O] - ReasonForNotSampling -# 26 VSnonResponseCollected [DV,O] - YesNoFields -# 27 VSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 28 VSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 29 VSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 30 VSauxiliaryVariableUnit [DV,O] - MUNIT - -#check_All_fields("VS") + + dataset$VSid <- 1:nrow(dataset) aux_sampleSizes<-as.data.table(dataset)[,list(Npsu=757,npsu=40, Nssu="",nssu=.N),list(dnum, dname)] @@ -220,165 +166,59 @@ VS_df$VSselectionMethodCluster<-"SRSWOR" VS_df$VSselectionProbCluster<-"" VS_df$VSinclusionProbCluster<-"" -#===FT============ - - -# 1 FTid [M] - int -# 2 OSid [M/O] - int -# 3 VSid [M/O] - int -# 4 VDid [M] - int -# 5 SDid [M/O] - int -# 6 FOid [M/O] - int -# 7 TEid [M/O] - int -# 8 FTrecordType [M] - string -# 9 FTencryptedVesselCode [M] - StringLength100 -# 10 FTsequenceNumber [M] - int -# 11 FTstratification [DV,M] - YesNoFields -# 12 FTstratumName [DV,M] - StringLength100 -# 13 FTclustering [DV,M] - Clustering -# 14 FTclusterName [DV,M] - StringLength100 -# 15 FTsampler [O] - Sampler -# 16 FTsamplingType [M] - SamplingContext -# 17 FTnumberOfHaulsOrSets [M/O] - IntZeroToMax -# 18 FTdepartureLocation [M/O] - Harbour_LOCODE -# 19 FTdepartureDate [M/O] - Date -# 20 FTdepartureTime [M/O] - StringLength60 -# 21 FTarrivalLocation [M] - Harbour_LOCODE -# 22 FTarrivalDate [M] - Date -# 23 FTarrivalTime [M/O] - StringLength60 -# 24 FTnumberTotal [DV,O] - int -# 25 FTnumberSampled [DV,O] - int -# 26 FTselectionProb [DV,O] - Decimal0-1 -# 27 FTinclusionProb [DV,O] - Decimal0-1 -# 28 FTselectionMethod [DV,M] - SelectionMethod -# 29 FTunitName [DV,M] - StringLength100 -# 30 FTselectionMethodCluster [DV,O] - SelectionMethod -# 31 FTnumberTotalClusters [DV,O] - int -# 32 FTnumberSampledClusters [DV,O] - int -# 33 FTselectionProbCluster [DV,O] - Decimal0-1 -# 34 FTinclusionProbCluster [DV,O] - Decimal0-1 -# 35 FTsampled [DV,M] - YesNoFields -# 36 FTreasonNotSampled [DV,O] - ReasonForNotSampling -# 37 FTnonResponseCollected [DV,O] - YesNoFields -# 38 FTauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 39 FTauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 40 FTauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 41 FTauxiliaryVariableUnit [DV,O] - MUNIT +#====FT=========== + + FT_df <- data.frame( - FTid = dataset$VSid,#[M] - int - OSid = "",# [M/O] - int - VSid = dataset$VSid, #[M/O] - VDid = "", #[M] - int - SDid = "", #[M/O] - int - FOid = "", #[M/O] - int - TEid = "", #[M/O] - int - FTrecordType='FT', #[M] - string - FTencryptedVesselCode = VS_base$VSencryptedVesselCode, #[M] - FTsequenceNumber = as.integer(1:nrow(dataset)), #[M] - string - FTstratification = "N", #[DV,M] - RS_Stratification - FTstratumName = "U", #[DV,M] - string - FTclustering = "N", #[DV,M] - RS_Clustering - FTclusterName = "U", #[DV,M] - string - FTsampler = "Observer", #[M] - RS_Sampler - FTsamplingType = "AtSea" , #[M] - RS_SamplingType - FTnumberOfHaulsOrSets = 1, #[O] - int - FTdepartureLocation="ZWHWN", #[O] - Harbour_LOCODE - FTdepartureDate=seq(from = as.Date("1965-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M/O] - date - FTdepartureTime="", #[O] - time - FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE - FTarrivalDate=seq(from = as.Date("1965-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date - FTarrivalTime="", #[O] - time - FTnumberTotal= 1, #[DV,O] - int - FTnumberSampled=1, #[DV,O] - int - FTselectionProb=1, #[DV,O] - DecimalPrec10 - FTinclusionProb=1, #[DV,O] - DecimalPrec10 - FTselectionMethod="CENSUS", #[DV,M] - RS_SelectionMethod - FTunitName = VS_base$VSencryptedVesselCode, #[DV,M] - string - FTselectionMethodCluster="", #[DV,O] - RS_SelectionMethod - FTnumberTotalClusters="", #[DV,O] - int - FTnumberSampledClusters="", #[DV,O] - int - FTselectionProbCluster="", #[DV,O] - DecimalPrec10 - FTinclusionProbCluster="", #[DV,O] - DecimalPrec10 - FTsampled="Y", #[DV,M] - YesNoFields - FTreasonNotSampled= "", #[DV,O] - RS_ReasonForNotSampling - FTnonResponseCollected = "N", - FTauxiliaryVariableTotal = "", - FTauxiliaryVariableValue = "", - FTauxiliaryVariableName = "", - FTauxiliaryVariableUnit = "", - stringsAsFactors=FALSE +FTid = dataset$VSid,#[M] - int +OSid = "",# [M/O] - int +VSid = dataset$VSid, #[M/O] +VDid = "", #[M] - int +SDid = "", #[M/O] - int +FOid = "", #[M/O] - int +TEid = "", #[M/O] - int +FTrecordType='FT', #[M] - string +FTencryptedVesselCode = VS_base$VSencryptedVesselCode, #[M] +FTsequenceNumber = as.integer(1:nrow(dataset)), #[M] - string +FTstratification = "N", #[DV,M] - RS_Stratification +FTstratumName = "U", #[DV,M] - string +FTclustering = "N", #[DV,M] - RS_Clustering +FTclusterName = "U", #[DV,M] - string +FTsampler = "Observer", #[M] - RS_Sampler +FTsamplingType = "AtSea" , #[M] - RS_SamplingType +FTnumberOfHaulsOrSets = 1, #[O] - int +FTdepartureLocation="ZWHWN", #[O] - Harbour_LOCODE +FTdepartureDate=seq(from = as.Date("1965-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M/O] - date +FTdepartureTime="", #[O] - time +FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE +FTarrivalDate=seq(from = as.Date("1965-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date +FTarrivalTime="", #[O] - time +FTdominantLandingDate= "", +FTnumberTotal= 1, #[DV,O] - int +FTnumberSampled=1, #[DV,O] - int +FTselectionProb=1, #[DV,O] - DecimalPrec10 +FTinclusionProb=1, #[DV,O] - DecimalPrec10 +FTselectionMethod="CENSUS", #[DV,M] - RS_SelectionMethod +FTunitName = VS_base$VSencryptedVesselCode, #[DV,M] - string +FTselectionMethodCluster="", #[DV,O] - RS_SelectionMethod +FTnumberTotalClusters="", #[DV,O] - int +FTnumberSampledClusters="", #[DV,O] - int +FTselectionProbCluster="", #[DV,O] - DecimalPrec10 +FTinclusionProbCluster="", #[DV,O] - DecimalPrec10 +FTsampled="Y", #[DV,M] - YesNoFields +FTreasonNotSampled= "", #[DV,O] - RS_ReasonForNotSampling +FTnonResponseCollected = "N", +FTauxiliaryVariableTotal = "", +FTauxiliaryVariableValue = "", +FTauxiliaryVariableName = "", +FTauxiliaryVariableUnit = "", +stringsAsFactors=FALSE ) #====FO=========== -# 1 FOid [M] - int -# 2 FTid [M/O] - int -# 3 SDid [M/O] - int -# 4 FOrecordType [M] - string -# 5 FOstratification [DV,M] - YesNoFields -# 6 FOsequenceNumber [M] - int -# 7 FOstratumName [DV,M] - StringLength100 -# 8 FOclustering [DV,M] - Clustering -# 9 FOclusterName [DV,M] - StringLength100 -# 10 FOsampler [O] - Sampler -# 11 FOaggregationLevel [M] - AggregationLevel -# 12 FOvalidity [M] - ValidityFlag -# 13 FOcatchReg [M] - CatchRegistration -# 14 FOstartDate [O] - Date -# 15 FOstartTime [O] - Time -# 16 FOendDate [M] - Date -# 17 FOendTime [M/O] - Time -# 18 FOduration [M/O] - int -# 19 FOdurationSource [M] - DurationSource -# 20 FOhandlingTime [O] - int -# 21 FOstartLat [O] - Decimal-90.000000-90.000000 -# 22 FOstartLon [O] - Decimal-180.000000-180.000000 -# 23 FOstopLat [O] - Decimal-90.000000-90.000000 -# 24 FOstopLon [O] - Decimal-180.000000-180.000000 -# 25 FOexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 26 FOarea [M] - ICES_Area -# 27 FOrectangle [O] - StatRec -# 28 FOfisheriesManagementUnit [O] - AreaNonFAO -# 29 FOgsaSubarea [M] - Areas_GFCM_GSA -# 30 FOjurisdictionArea [O] - JurisdictionArea -# 31 FOfishingDepth [O] - int -# 32 FOwaterDepth [O] - int -# 33 FOnationalFishingActivity [O] - NationalFishingActivity -# 34 FOmetier5 [O] - Metier5_FishingActivity -# 35 FOmetier6 [M] - Metier6_FishingActivity -# 36 FOgear [M] - GearType -# 37 FOmeshSize [O] - int -# 38 FOselectionDevice [O] - SelectionDevice -# 39 FOselectionDeviceMeshSize [O] - int -# 40 FOtargetSpecies [O] - TargetSpecies -# 41 FOincidentalByCatchMitigationDeviceFirst [M] - BycatchMitigationDevice -# 42 FOincidentalByCatchMitigationDeviceTargetFirst [M] - BycatchMitigationDeviceTarget -# 43 FOincidentalByCatchMitigationDeviceSecond [M] - BycatchMitigationDevice -# 44 FOincidentalByCatchMitigationDeviceTargetSecond [M] - BycatchMitigationDeviceTarget -# 45 FOgearDimensions [O] - int -# 46 FOobservationCode [M] - ObservationCode -# 47 FOnumberTotal [DV,O] - int -# 48 FOnumberSampled [DV,O] - int -# 49 FOselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 50 FOinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 51 FOselectionMethod [DV,M] - SelectionMethod -# 52 FOunitName [DV,M] - StringLength100 -# 53 FOselectionMethodCluster [DV,O] - SelectionMethod -# 54 FOnumberTotalClusters [DV,O] - int -# 55 FOnumberSampledClusters [DV,O] - int -# 56 FOselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 57 FOinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 58 FOsampled [DV,M] - YesNoFields -# 59 FOreasonNotSampled [DV,O] - ReasonForNotSampling -# 60 FOnonResponseCollected [DV,O] - YesNoFields -# 61 FOauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 62 FOauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 63 FOauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 64 FOauxiliaryVariableUnit [DV,O] - MUNIT - - FO_df <- data.frame( FOid = dataset$VSid, FTid = dataset$VSid, @@ -398,6 +238,7 @@ FO_df <- data.frame( FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M FOendTime ="",#M FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! + FOfishingDurationDataBasis = "Measured", FOdurationSource = "Crew", FOhandlingTime = "", FOstartLat="", # ATT! @@ -410,12 +251,16 @@ FO_df <- data.frame( FOfisheriesManagementUnit = "", FOgsaSubarea = "NotApplicable", #M FOjurisdictionArea = "", + FOgeographicalDataBasis = "Measured", + FOgeographicalSource = "Crew", FOfishingDepth = "", FOwaterDepth = "", FOnationalFishingActivity = "", FOmetier5 = "", FOmetier6 = "OTT_CRU_70-89_2_35",#M FOgear = "OTT",#M + FOgearDataBasis = "Measured", + FOgearSource = "", FOmeshSize = "", FOselectionDevice = "", FOselectionDeviceMeshSize = "", @@ -450,45 +295,6 @@ stringsAsFactors=FALSE #===SS============ -# 1 SSid [] - int -# 2 LEid [M/O] - int -# 3 FOid [M/O] - int -# 4 FTid [M/O] - int -# 5 OSid [M/O] - int -# 6 TEid [M/O] - int -# 7 SLid [M] - int -# 8 SSrecordType [M] - string -# 9 SSsequenceNumber [M] - int -# 10 SSstratification [DV,M] - YesNoFields -# 11 SSstratumName [DV,M] - StringLength100 -# 12 SSclustering [DV,M] - Clustering -# 13 SSclusterName [DV,M] - StringLength100 -# 14 SSobservationActivityType [DV,M] - ObservationActivityType -# 15 SScatchFraction [M] - CatchFraction -# 16 SSobservationType [DV,M] - ObservationMethod -# 17 SSsampler [O] - Sampler -# 18 SSspeciesListName [M] - StringLength100 -# 19 SSuseForCalculateZero [M] - YesNoFields -# 20 SStimeTotal [O] - int -# 21 SStimeSampled [O] - int -# 22 SSnumberTotal [DV,O] - int -# 23 SSnumberSampled [DV,O] - int -# 24 SSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 25 SSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 26 SSselectionMethod [DV,M] - SelectionMethod -# 27 SSunitName [DV,M] - StringLength100 -# 28 SSselectionMethodCluster [DV,O] - SelectionMethod -# 29 SSnumberTotalClusters [DV,O] - int -# 30 SSnumberSampledClusters [DV,O] - int -# 31 SSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 32 SSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 33 SSsampled [DV,M] - YesNoFields -# 34 SSreasonNotSampled [DV,O] - ReasonForNotSampling -# 35 SSnonResponseCollected [DV,O] - YesNoFields -# 36 SSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 37 SSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 38 SSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 39 SSauxiliaryVariableUnit [DV,O] - MUNIT SS_df<-data.frame( SSid = dataset$VSid, @@ -511,8 +317,10 @@ SS_df<-data.frame( SSspeciesListName = project_name_outputs, #M SSuseForCalculateZero = "N", #M SStimeTotal = "", + SStimeTotalDataBasis = "Measured", SStimeSampled = "", SSnumberTotal = 1, + SSnumberTotalDataBasis = "Measured", SSnumberSampled = 1, SSselectionProb = 1, SSinclusionProb = 1, @@ -535,117 +343,66 @@ SS_df<-data.frame( #====SA=========== - -# 1 SAid [] - int -# 2 SSid [] - int -# 3 SArecordType [M] - string -# 4 SAsequenceNumber [M] - int -# 5 SAparentSequenceNumber [O] - int -# 6 SAstratification [DV,M] - YesNoFields -# 7 SAstratumName [DV,M] - StringLength100 -# 8 SAspeciesCode [M] - SpecWoRMS -# 9 SAspeciesCodeFAO [O] - SpecASFIS -# 10 SAstateOfProcessing [M] - StateOfProcessing -# 11 SApresentation [M] - ProductPresentation -# 12 SAspecimensState [M] - SpecimensState -# 13 SAcatchCategory [M] - CatchCategory -# 14 SAlandingCategory [O] - LandingCategory -# 15 SAcommSizeCatScale [O] - CommercialSizeCategoryScale -# 16 SAcommSizeCat [O] - CommercialSizeCategory -# 17 SAsex [M] - SEXCO -# 18 SAexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 19 SAarea [O] - ICES_Area -# 20 SArectangle [O] - StatRec -# 21 SAfisheriesManagementUnit [O] - AreaNonFAO -# 22 SAgsaSubarea [M] - Areas_GFCM_GSA -# 23 SAjurisdictionArea [O] - JurisdictionArea -# 24 SAnationalFishingActivity [O] - NationalFishingActivity -# 25 SAmetier5 [O] - Metier5_FishingActivity -# 26 SAmetier6 [O] - Metier6_FishingActivity -# 27 SAgear [O] - GearType -# 28 SAmeshSize [O] - int -# 29 SAselectionDevice [O] - SelectionDevice -# 30 SAselectionDeviceMeshSize [O] - int -# 31 SAunitType [M] - SamplingUnit -# 32 SAtotalWeightLive [O] - int -# 33 SAsampleWeightLive [O] - int -# 34 SAnumberTotal [DV,O] - Decimal0.1-999999999 -# 35 SAnumberSampled [DV,O] - Decimal0.1-999999999 -# 36 SAselectionProb [DV,O] - Decimal0-1 -# 37 SAinclusionProb [DV,O] - Decimal0-1 -# 38 SAselectionMethod [DV,M] - SelectionMethod -# 39 SAunitName [DV,M] - StringLength100 -# 40 SAlowerHierarchy [M/O] - LowerHierarchy -# 41 SAsampler [O] - Sampler -# 42 SAsampled [DV,M] - YesNoFields -# 43 SAreasonNotSampled [DV,O] - ReasonForNotSampling -# 44 SAnonResponseCollected [DV,O] - YesNoFields -# 45 SAreasonNotSampledFM [DV,O] - ReasonForNotSampling -# 46 SAreasonNotSampledBV [DV,O] - ReasonForNotSampling -# 47 SAtotalWeightMeasured [O] - int -# 48 SAsampleWeightMeasured [O] - int -# 49 SAconversionFactorMeasLive [O] - Decimal0.900-10 -# 50 SAauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 51 SAauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 52 SAauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 53 SAauxiliaryVariableUnit [DV,O] - MUNIT - - SA_df<-data.frame( - SAid = 1:nrow(dataset), - SSid = dataset$VSid, - SArecordType = "SA", #M - SAsequenceNumber = dataset$VSid, #M - SAparentSequenceNumber = "", - SAstratification = "N", #M - SAstratumName = "U", #M - SAspeciesCode = 107254, #M - SAspeciesCodeFAO = "NEP", - SAstateOfProcessing = "FRE", #M - SApresentation = "WHL", #M - SAspecimensState = "DeadOrZeroProbSurvival", #M - SAcatchCategory = "Lan", #M - SAlandingCategory = "", - SAcommSizeCatScale = "", - SAcommSizeCat = "", - SAsex = "U", #M - SAexclusiveEconomicZoneIndicator = "", - SAarea = "", - SArectangle = "", - SAfisheriesManagementUnit = "", - SAgsaSubarea = "NotApplicable", #M - SAjurisdictionArea = "", - SAnationalFishingActivity = "", - SAmetier5 = "", - SAmetier6 = "", - SAgear = "", - SAmeshSize = "", - SAselectionDevice = "", - SAselectionDeviceMeshSize = "", - SAunitType = "Box", #M - SAtotalWeightLive = "", - SAsampleWeightLive = "", - SAnumberTotal = 1, - SAnumberSampled = 1, - SAselectionProb = 1, - SAinclusionProb = 1, - SAselectionMethod = "CENSUS", #M - SAunitName = dataset$VSid, #M - SAlowerHierarchy = "D", - SAsampler = "Observer", - SAsampled = ifelse(!is.na(dataset[[target_var]]),"Y","N"), #M - SAreasonNotSampled = "", - SAnonResponseCollected = "Y", - SAreasonNotSampledFM = "", - SAreasonNotSampledBV = "", - SAtotalWeightMeasured = ifelse(!is.na(dataset[[target_var]]),dataset[[target_var]],""), # - SAsampleWeightMeasured = ifelse(!is.na(dataset[[target_var]]),dataset[[target_var]],""), # - SAconversionFactorMeasLive = 1, - SAauxiliaryVariableTotal = "", - SAauxiliaryVariableValue = "", - SAauxiliaryVariableName = "", - SAauxiliaryVariableUnit = "", - stringsAsFactors=FALSE +SAid = 1:nrow(dataset), +SSid = dataset$VSid, +SArecordType = "SA", #M +SAsequenceNumber = dataset$VSid, #M +SAparentSequenceNumber = "", +SAstratification = "N", #M +SAstratumName = "U", #M +SAspeciesCode = 107254, #M +SAspeciesCodeFAO = "NEP", +SAstateOfProcessing = "FRE", #M +SApresentation = "WHL", #M +SAspecimensState = "DeadOrZeroProbSurvival", #M +SAcatchCategory = "Lan", #M +SAlandingCategory = "", +SAcommSizeCatScale = "", +SAcommSizeCat = "", +SAsex = "U", #M +SAexclusiveEconomicZoneIndicator = "", +SAarea = "", +SArectangle = "", +SAfisheriesManagementUnit = "", +SAgsaSubarea = "NotApplicable", #M +SAjurisdictionArea = "", +SAgeographicalDataBasis = "Measured", +SAgeographicalSource = "", +SAnationalFishingActivity = "", +SAmetier5 = "", +SAmetier6 = "", +SAgear = "", +SAgearDataBasis = "Measured", +SAgearSource = "", +SAmeshSize = "", +SAselectionDevice = "", +SAselectionDeviceMeshSize = "", +SAunitType = "Box", #M +SAtotalWeightLive = "", +SAsampleWeightLive = "", +SAnumberTotal = 1, +SAnumberSampled = 1, +SAselectionProb = 1, +SAinclusionProb = 1, +SAselectionMethod = "CENSUS", #M +SAunitName = dataset$VSid, #M +SAlowerHierarchy = "D", +SAsampler = "Observer", +SAsampled = ifelse(!is.na(dataset[[target_var]]),"Y","N"), #M +SAreasonNotSampled = "", +SAnonResponseCollected = "Y", +SAreasonNotSampledFM = "", +SAreasonNotSampledBV = "", +SAtotalWeightMeasured = ifelse(!is.na(dataset[[target_var]]),dataset[[target_var]],""), # +SAtotalWeightMeasuredDataBasis = "Measured", +SAsampleWeightMeasured = ifelse(!is.na(dataset[[target_var]]),dataset[[target_var]],""), # +SAconversionFactorMeasLive = 1, +SAauxiliaryVariableTotal = "", +SAauxiliaryVariableValue = "", +SAauxiliaryVariableName = "", +SAauxiliaryVariableUnit = "", +stringsAsFactors=FALSE ) @@ -698,16 +455,14 @@ RDBESlist[[i]][which(grepl(colnames(RDBESlist[[i]]),pat="[A-Z]id"))]<-NULL #===Save============ - dir_outputs<-paste0(base_dir_outputs,"/", - project_name_outputs,"/") - dir.create(dir_outputs, recursive=T, showWarnings=FALSE) - filename_output_CS <- paste0("H1.csv") - filename_output_SL <- paste0("HSL.csv") - filename_output_VD <- paste0("HVD.csv") - + dir_outputs<-paste0(base_dir_outputs,"/", project_name_outputs,"/") + dir.create(dir_outputs, recursive=T, showWarnings=FALSE) + filename_output_CS <- paste0(dir_outputs,"H1.csv") + filename_output_SL <- paste0(dir_outputs,"HSL.csv") + filename_output_VD <- paste0(dir_outputs,"HVD.csv") -lapply(RDBESlist, function(x, filename1 = paste0(dir_outputs,filename_output_CS)){ +lapply(RDBESlist, function(x, filename1 = filename_output_CS){ if("DErecordType" %in% colnames(x)){ write.table(x, file = filename1, append = FALSE, quote = FALSE, sep = ",", eol = "\n", na = "NA", dec = ".", row.names = FALSE, @@ -719,7 +474,7 @@ write.table(x, file = filename1, append = TRUE, quote = FALSE, sep = ",", } }) -b<-read.table(file=paste0(dir_outputs,filename_output_CS), header=F, sep=";") +b<-read.table(file = filename_output_CS, header=F, sep=";") b<-cbind(key,b) b<-b[order(as.character(b$key), decreasing=FALSE),] b<-b[!is.na(key),] @@ -727,27 +482,26 @@ b$key<-NULL b$V1<-as.character(b$V1) # saves CS output -write.table(b$V1, file=paste0(dir_outputs,filename_output_CS), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +write.table(b$V1, file = filename_output_CS, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") -# -----Builds and saves dummySL----------------- +# -----Builds and saves dummySL and dummyIS----------------- SL_base$SLspeciesListName<-project_name_outputs SL_base$SLyear<-DEyear SL_base$SLinstitute<-SDinstitution -# saves SL output - write.table(SL_base, file=paste0(dir_outputs,filename_output_SL), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - + + # saves SL output + write.table(rbind(paste(SL_base, collapse=","), paste(IS_base, collapse=",")), file=filename_output_SL, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") # -----Builds and saves dummyVD----------------- - # saves VD output VD_base$VDyear<-DEyear - write.table(VD_base, file=paste0(dir_outputs,filename_output_VD), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - + write.table(VD_base, file=filename_output_VD, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +if(1==2){ # -----Clean SL after dowload----------------- # cleans excess of SL rows frequently present in download @@ -765,5 +519,5 @@ write.csv(tmp, file=paste0(dir_outputs,"tmp/SpeciesList.csv"), quote=F, row.name library(zip) zip::zip(zipfile = paste0(project_name_outputs,".zip"), files =paste0(dir_outputs,"tmp/",dir(paste0(dir_outputs,"tmp"))), mode="cherry-pick") unlink(paste0(dir_outputs,"tmp"), force=T, recursive = T) - +} diff --git a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apistrat_H1.R b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apistrat_H1.R index 55f0acdf..b373a4a1 100644 --- a/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apistrat_H1.R +++ b/data-raw/exampleData/TextBookExamples/BuildTextBookDataUpload_Pckg_survey_apistrat_H1.R @@ -20,14 +20,14 @@ DEstratumName <- "Pckg_survey_apistrat_H1" project_name_outputs <- gsub(" ","_", paste0(DEsamplingScheme,"_", DEstratumName)) baseDir <- "./data-raw/exampleData/TextBookExamples/" - #baseDir <- "" + baseDir <- "" VD_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/VD_base.rds")) SL_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/SL_base.rds")) IS_base <- readRDS(paste0(baseDir,"aux_TextBookExamples/IS_base.rds")) #nameof the directory where the outputs are saved currently base_dir_outputs <- paste0(baseDir,"BuiltUploads") - if(!file.exists(base_dir_outputs)) dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) + dir.create(base_dir_outputs, recursive=T, showWarnings=FALSE) #========Outline of Hierarchy 1================ @@ -47,22 +47,6 @@ -# 1 DEid [] - int -# 2 DErecordType [M] - string -# 3 DEsamplingScheme [M] - SamplingScheme -# 4 DEsamplingSchemeType [M] - SamplingSchemeType -# 5 DEyear [M] - Year -# 6 DEstratumName [M] - StringLength100 -# 7 DEhierarchyCorrect [M] - YesNoFields -# 8 DEhierarchy [M] - RDBESUpperHierarchy -# 9 DEsampled [DV,M] - YesNoFields -# 10 DEreasonNotSampled [DV,O] - ReasonForNotSampling -# 11 DEnonResponseCollected [DV,O] - YesNoFields -# 12 DEauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 13 DEauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 14 DEauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 15 DEauxiliaryVariableUnit[DV,O]-MUNIT - DE_df_base<-expand.grid(DEyear=DEyear, DEstratumName="U",stringsAsFactors=F) @@ -88,13 +72,6 @@ DE_df<-data.frame( #===SD============ - # x -# 1 SDid [M] - int -# 2 DEid [M] - int -# 3 SDrecordType [M] - string -# 4 SDcountry [M] - ISO_3166 -# 5 SDinstitution [M] - EDMO - SD_df<-data.frame( SDid=DE_df$DEid, DEid=DE_df$DEid, @@ -106,41 +83,6 @@ SD_df<-data.frame( #===VS============ - - # x -# 1 VSid [M] - int -# 2 SDid [M] - int -# 3 VDid [M] - int -# 4 TEid [M/O] - int -# 5 VSrecordType [M] - string -# 6 VSsequenceNumber [M] - int -# 7 VSencryptedVesselCode [M] - StringLength100 -# 8 VSstratification [DV,M] - YesNoFields -# 9 VSstratumName [DV,M] - StringLength100 -# 10 VSclustering [DV,M] - Clustering -# 11 VSclusterName [DV,M] - StringLength100 -# 12 VSsampler [O] - Sampler -# 13 VSnumberTotal [DV,O] - int -# 14 VSnumberSampled [DV,O] - int -# 15 VSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 16 VSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 17 VSselectionMethod [DV,M] - SelectionMethod -# 18 VSunitName [DV,M] - StringLength100 -# 19 VSselectionMethodCluster [DV,O] - SelectionMethod -# 20 VSnumberTotalClusters [DV,O] - int -# 21 VSnumberSampledClusters [DV,O] - int -# 22 VSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 23 VSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 24 VSsampled [DV,M] - YesNoFields -# 25 VSreasonNotSampled [DV,O] - ReasonForNotSampling -# 26 VSnonResponseCollected [DV,O] - YesNoFields -# 27 VSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 28 VSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 29 VSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 30 VSauxiliaryVariableUnit [DV,O] - MUNIT - - - # adds VSid to dataset dataset$VSid <- 1:nrow(dataset) @@ -195,164 +137,56 @@ VS_df$VSnumberTotal<-strataSize[VS_df$VSstratumName] #====FT=========== - -# 1 FTid [M] - int -# 2 OSid [M/O] - int -# 3 VSid [M/O] - int -# 4 VDid [M] - int -# 5 SDid [M/O] - int -# 6 FOid [M/O] - int -# 7 TEid [M/O] - int -# 8 FTrecordType [M] - string -# 9 FTencryptedVesselCode [M] - StringLength100 -# 10 FTsequenceNumber [M] - int -# 11 FTstratification [DV,M] - YesNoFields -# 12 FTstratumName [DV,M] - StringLength100 -# 13 FTclustering [DV,M] - Clustering -# 14 FTclusterName [DV,M] - StringLength100 -# 15 FTsampler [O] - Sampler -# 16 FTsamplingType [M] - SamplingContext -# 17 FTnumberOfHaulsOrSets [M/O] - IntZeroToMax -# 18 FTdepartureLocation [M/O] - Harbour_LOCODE -# 19 FTdepartureDate [M/O] - Date -# 20 FTdepartureTime [M/O] - StringLength60 -# 21 FTarrivalLocation [M] - Harbour_LOCODE -# 22 FTarrivalDate [M] - Date -# 23 FTarrivalTime [M/O] - StringLength60 -# 24 FTnumberTotal [DV,O] - int -# 25 FTnumberSampled [DV,O] - int -# 26 FTselectionProb [DV,O] - Decimal0-1 -# 27 FTinclusionProb [DV,O] - Decimal0-1 -# 28 FTselectionMethod [DV,M] - SelectionMethod -# 29 FTunitName [DV,M] - StringLength100 -# 30 FTselectionMethodCluster [DV,O] - SelectionMethod -# 31 FTnumberTotalClusters [DV,O] - int -# 32 FTnumberSampledClusters [DV,O] - int -# 33 FTselectionProbCluster [DV,O] - Decimal0-1 -# 34 FTinclusionProbCluster [DV,O] - Decimal0-1 -# 35 FTsampled [DV,M] - YesNoFields -# 36 FTreasonNotSampled [DV,O] - ReasonForNotSampling -# 37 FTnonResponseCollected [DV,O] - YesNoFields -# 38 FTauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 39 FTauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 40 FTauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 41 FTauxiliaryVariableUnit [DV,O] - MUNIT - FT_df <- data.frame( - FTid = dataset$VSid,#[M] - int - OSid = "",# [M/O] - int - VSid = dataset$VSid, #[M/O] - VDid = "", #[M] - int - SDid = "", #[M/O] - int - FOid = "", #[M/O] - int - TEid = "", #[M/O] - int - FTrecordType='FT', #[M] - string - FTencryptedVesselCode = dataset$VSencryptedVesselCode, #[M] - FTsequenceNumber = as.integer(1:nrow(dataset)), #[M] - string - FTstratification = "N", #[DV,M] - RS_Stratification - FTstratumName = "U", #[DV,M] - string - FTclustering = "N", #[DV,M] - RS_Clustering - FTclusterName = "U", #[DV,M] - string - FTsampler = "Observer", #[M] - RS_Sampler - FTsamplingType = "AtSea" , #[M] - RS_SamplingType - FTnumberOfHaulsOrSets = 1, #[O] - int - FTdepartureLocation="ZWHWN", #[O] - Harbour_LOCODE - FTdepartureDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M/O] - date - FTdepartureTime="", #[O] - time - FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE - FTarrivalDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date - FTarrivalTime="", #[O] - time - FTnumberTotal= 1, #[DV,O] - int - FTnumberSampled=1, #[DV,O] - int - FTselectionProb=1, #[DV,O] - DecimalPrec10 - FTinclusionProb=1, #[DV,O] - DecimalPrec10 - FTselectionMethod="CENSUS", #[DV,M] - RS_SelectionMethod - FTunitName = dataset$VSid, #[DV,M] - string - FTselectionMethodCluster="", #[DV,O] - RS_SelectionMethod - FTnumberTotalClusters="", #[DV,O] - int - FTnumberSampledClusters="", #[DV,O] - int - FTselectionProbCluster="", #[DV,O] - DecimalPrec10 - FTinclusionProbCluster="", #[DV,O] - DecimalPrec10 - FTsampled="Y", #[DV,M] - YesNoFields - FTreasonNotSampled= "", #[DV,O] - RS_ReasonForNotSampling - FTnonResponseCollected = "N", - FTauxiliaryVariableTotal = "", - FTauxiliaryVariableValue = "", - FTauxiliaryVariableName = "", - FTauxiliaryVariableUnit = "", - stringsAsFactors=FALSE +FTid = dataset$VSid,#[M] - int +OSid = "",# [M/O] - int +VSid = dataset$VSid, #[M/O] +VDid = "", #[M] - int +SDid = "", #[M/O] - int +FOid = "", #[M/O] - int +TEid = "", #[M/O] - int +FTrecordType='FT', #[M] - string +FTencryptedVesselCode = dataset$VSencryptedVesselCode, #[M] +FTsequenceNumber = as.integer(1:nrow(dataset)), #[M] - string +FTstratification = "N", #[DV,M] - RS_Stratification +FTstratumName = "U", #[DV,M] - string +FTclustering = "N", #[DV,M] - RS_Clustering +FTclusterName = "U", #[DV,M] - string +FTsampler = "Observer", #[M] - RS_Sampler +FTsamplingType = "AtSea" , #[M] - RS_SamplingType +FTnumberOfHaulsOrSets = 1, #[O] - int +FTdepartureLocation="ZWHWN", #[O] - Harbour_LOCODE +FTdepartureDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M/O] - date +FTdepartureTime="", #[O] - time +FTarrivalLocation = "ZWHWN", #[M] - Harbour_LOCODE +FTarrivalDate=seq(from = as.Date("1968-01-01", format='%Y-%m-%d'), by=1, length.out=nrow(dataset)), #[M] - date +FTarrivalTime="", #[O] - time +FTdominantLandingDate= "", +FTnumberTotal= 1, #[DV,O] - int +FTnumberSampled=1, #[DV,O] - int +FTselectionProb=1, #[DV,O] - DecimalPrec10 +FTinclusionProb=1, #[DV,O] - DecimalPrec10 +FTselectionMethod="CENSUS", #[DV,M] - RS_SelectionMethod +FTunitName = dataset$VSid, #[DV,M] - string +FTselectionMethodCluster="", #[DV,O] - RS_SelectionMethod +FTnumberTotalClusters="", #[DV,O] - int +FTnumberSampledClusters="", #[DV,O] - int +FTselectionProbCluster="", #[DV,O] - DecimalPrec10 +FTinclusionProbCluster="", #[DV,O] - DecimalPrec10 +FTsampled="Y", #[DV,M] - YesNoFields +FTreasonNotSampled= "", #[DV,O] - RS_ReasonForNotSampling +FTnonResponseCollected = "N", +FTauxiliaryVariableTotal = "", +FTauxiliaryVariableValue = "", +FTauxiliaryVariableName = "", +FTauxiliaryVariableUnit = "", +stringsAsFactors=FALSE ) #====FO=========== -# 1 FOid [M] - int -# 2 FTid [M/O] - int -# 3 SDid [M/O] - int -# 4 FOrecordType [M] - string -# 5 FOstratification [DV,M] - YesNoFields -# 6 FOsequenceNumber [M] - int -# 7 FOstratumName [DV,M] - StringLength100 -# 8 FOclustering [DV,M] - Clustering -# 9 FOclusterName [DV,M] - StringLength100 -# 10 FOsampler [O] - Sampler -# 11 FOaggregationLevel [M] - AggregationLevel -# 12 FOvalidity [M] - ValidityFlag -# 13 FOcatchReg [M] - CatchRegistration -# 14 FOstartDate [O] - Date -# 15 FOstartTime [O] - Time -# 16 FOendDate [M] - Date -# 17 FOendTime [M/O] - Time -# 18 FOduration [M/O] - int -# 19 FOdurationSource [M] - DurationSource -# 20 FOhandlingTime [O] - int -# 21 FOstartLat [O] - Decimal-90.000000-90.000000 -# 22 FOstartLon [O] - Decimal-180.000000-180.000000 -# 23 FOstopLat [O] - Decimal-90.000000-90.000000 -# 24 FOstopLon [O] - Decimal-180.000000-180.000000 -# 25 FOexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 26 FOarea [M] - ICES_Area -# 27 FOrectangle [O] - StatRec -# 28 FOfisheriesManagementUnit [O] - AreaNonFAO -# 29 FOgsaSubarea [M] - Areas_GFCM_GSA -# 30 FOjurisdictionArea [O] - JurisdictionArea -# 31 FOfishingDepth [O] - int -# 32 FOwaterDepth [O] - int -# 33 FOnationalFishingActivity [O] - NationalFishingActivity -# 34 FOmetier5 [O] - Metier5_FishingActivity -# 35 FOmetier6 [M] - Metier6_FishingActivity -# 36 FOgear [M] - GearType -# 37 FOmeshSize [O] - int -# 38 FOselectionDevice [O] - SelectionDevice -# 39 FOselectionDeviceMeshSize [O] - int -# 40 FOtargetSpecies [O] - TargetSpecies -# 41 FOincidentalByCatchMitigationDeviceFirst [M] - BycatchMitigationDevice -# 42 FOincidentalByCatchMitigationDeviceTargetFirst [M] - BycatchMitigationDeviceTarget -# 43 FOincidentalByCatchMitigationDeviceSecond [M] - BycatchMitigationDevice -# 44 FOincidentalByCatchMitigationDeviceTargetSecond [M] - BycatchMitigationDeviceTarget -# 45 FOgearDimensions [O] - int -# 46 FOobservationCode [M] - ObservationCode -# 47 FOnumberTotal [DV,O] - int -# 48 FOnumberSampled [DV,O] - int -# 49 FOselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 50 FOinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 51 FOselectionMethod [DV,M] - SelectionMethod -# 52 FOunitName [DV,M] - StringLength100 -# 53 FOselectionMethodCluster [DV,O] - SelectionMethod -# 54 FOnumberTotalClusters [DV,O] - int -# 55 FOnumberSampledClusters [DV,O] - int -# 56 FOselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 57 FOinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 58 FOsampled [DV,M] - YesNoFields -# 59 FOreasonNotSampled [DV,O] - ReasonForNotSampling -# 60 FOnonResponseCollected [DV,O] - YesNoFields -# 61 FOauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 62 FOauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 63 FOauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 64 FOauxiliaryVariableUnit [DV,O] - MUNIT - - FO_df <- data.frame( FOid = dataset$VSid, FTid = dataset$VSid, @@ -372,24 +206,29 @@ FO_df <- data.frame( FOendDate = as.Date(FT_df$FTdepartureDate, format='%Y-%m-%d'),#M FOendTime ="",#M FOduration = 60, #M ATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT!!!! + FOfishingDurationDataBasis = "Measured", FOdurationSource = "Crew", FOhandlingTime = "", FOstartLat="", # ATT! FOstartLon="", # ATT! FOstopLat="", FOstopLon="", - FOexclusiveEconomicZoneIndicator = "", # might differ!! + FOexclusiveEconomicZoneIndicator = "", # FOarea = "27.3.a.21", #M FOrectangle = "", FOfisheriesManagementUnit = "", FOgsaSubarea = "NotApplicable", #M FOjurisdictionArea = "", + FOgeographicalDataBasis = "Measured", + FOgeographicalSource = "Crew", FOfishingDepth = "", FOwaterDepth = "", FOnationalFishingActivity = "", FOmetier5 = "", FOmetier6 = "OTT_CRU_70-89_2_35",#M FOgear = "OTT",#M + FOgearDataBasis = "Measured", + FOgearSource = "", FOmeshSize = "", FOselectionDevice = "", FOselectionDeviceMeshSize = "", @@ -424,45 +263,6 @@ stringsAsFactors=FALSE #===SS============ -# 1 SSid [] - int -# 2 LEid [M/O] - int -# 3 FOid [M/O] - int -# 4 FTid [M/O] - int -# 5 OSid [M/O] - int -# 6 TEid [M/O] - int -# 7 SLid [M] - int -# 8 SSrecordType [M] - string -# 9 SSsequenceNumber [M] - int -# 10 SSstratification [DV,M] - YesNoFields -# 11 SSstratumName [DV,M] - StringLength100 -# 12 SSclustering [DV,M] - Clustering -# 13 SSclusterName [DV,M] - StringLength100 -# 14 SSobservationActivityType [DV,M] - ObservationActivityType -# 15 SScatchFraction [M] - CatchFraction -# 16 SSobservationType [DV,M] - ObservationMethod -# 17 SSsampler [O] - Sampler -# 18 SSspeciesListName [M] - StringLength100 -# 19 SSuseForCalculateZero [M] - YesNoFields -# 20 SStimeTotal [O] - int -# 21 SStimeSampled [O] - int -# 22 SSnumberTotal [DV,O] - int -# 23 SSnumberSampled [DV,O] - int -# 24 SSselectionProb [DV,O] - Decimal0.0000000000000001-1 -# 25 SSinclusionProb [DV,O] - Decimal0.0000000000000001-1 -# 26 SSselectionMethod [DV,M] - SelectionMethod -# 27 SSunitName [DV,M] - StringLength100 -# 28 SSselectionMethodCluster [DV,O] - SelectionMethod -# 29 SSnumberTotalClusters [DV,O] - int -# 30 SSnumberSampledClusters [DV,O] - int -# 31 SSselectionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 32 SSinclusionProbCluster [DV,O] - Decimal0.0000000000000001-1 -# 33 SSsampled [DV,M] - YesNoFields -# 34 SSreasonNotSampled [DV,O] - ReasonForNotSampling -# 35 SSnonResponseCollected [DV,O] - YesNoFields -# 36 SSauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 37 SSauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 38 SSauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 39 SSauxiliaryVariableUnit [DV,O] - MUNIT SS_df<-data.frame( SSid = dataset$VSid, @@ -485,8 +285,10 @@ SS_df<-data.frame( SSspeciesListName = project_name_outputs, #M SSuseForCalculateZero = "N", #M SStimeTotal = "", + SStimeTotalDataBasis = "Measured", SStimeSampled = "", SSnumberTotal = 1, + SSnumberTotalDataBasis = "Measured", SSnumberSampled = 1, SSselectionProb = 1, SSinclusionProb = 1, @@ -509,117 +311,66 @@ SS_df<-data.frame( #====SA=========== - -# 1 SAid [] - int -# 2 SSid [] - int -# 3 SArecordType [M] - string -# 4 SAsequenceNumber [M] - int -# 5 SAparentSequenceNumber [O] - int -# 6 SAstratification [DV,M] - YesNoFields -# 7 SAstratumName [DV,M] - StringLength100 -# 8 SAspeciesCode [M] - SpecWoRMS -# 9 SAspeciesCodeFAO [O] - SpecASFIS -# 10 SAstateOfProcessing [M] - StateOfProcessing -# 11 SApresentation [M] - ProductPresentation -# 12 SAspecimensState [M] - SpecimensState -# 13 SAcatchCategory [M] - CatchCategory -# 14 SAlandingCategory [O] - LandingCategory -# 15 SAcommSizeCatScale [O] - CommercialSizeCategoryScale -# 16 SAcommSizeCat [O] - CommercialSizeCategory -# 17 SAsex [M] - SEXCO -# 18 SAexclusiveEconomicZoneIndicator [O] - ISO_3166 -# 19 SAarea [O] - ICES_Area -# 20 SArectangle [O] - StatRec -# 21 SAfisheriesManagementUnit [O] - AreaNonFAO -# 22 SAgsaSubarea [M] - Areas_GFCM_GSA -# 23 SAjurisdictionArea [O] - JurisdictionArea -# 24 SAnationalFishingActivity [O] - NationalFishingActivity -# 25 SAmetier5 [O] - Metier5_FishingActivity -# 26 SAmetier6 [O] - Metier6_FishingActivity -# 27 SAgear [O] - GearType -# 28 SAmeshSize [O] - int -# 29 SAselectionDevice [O] - SelectionDevice -# 30 SAselectionDeviceMeshSize [O] - int -# 31 SAunitType [M] - SamplingUnit -# 32 SAtotalWeightLive [O] - int -# 33 SAsampleWeightLive [O] - int -# 34 SAnumberTotal [DV,O] - Decimal0.1-999999999 -# 35 SAnumberSampled [DV,O] - Decimal0.1-999999999 -# 36 SAselectionProb [DV,O] - Decimal0-1 -# 37 SAinclusionProb [DV,O] - Decimal0-1 -# 38 SAselectionMethod [DV,M] - SelectionMethod -# 39 SAunitName [DV,M] - StringLength100 -# 40 SAlowerHierarchy [M/O] - LowerHierarchy -# 41 SAsampler [O] - Sampler -# 42 SAsampled [DV,M] - YesNoFields -# 43 SAreasonNotSampled [DV,O] - ReasonForNotSampling -# 44 SAnonResponseCollected [DV,O] - YesNoFields -# 45 SAreasonNotSampledFM [DV,O] - ReasonForNotSampling -# 46 SAreasonNotSampledBV [DV,O] - ReasonForNotSampling -# 47 SAtotalWeightMeasured [O] - int -# 48 SAsampleWeightMeasured [O] - int -# 49 SAconversionFactorMeasLive [O] - Decimal0.900-10 -# 50 SAauxiliaryVariableTotal [DV,O] - DecimalPrec3 -# 51 SAauxiliaryVariableValue [DV,O] - DecimalPrec3 -# 52 SAauxiliaryVariableName [DV,O] - AuxiliaryVariableName -# 53 SAauxiliaryVariableUnit [DV,O] - MUNIT - - SA_df<-data.frame( - SAid = dataset$VSid, - SSid = dataset$VSid, - SArecordType = "SA", #M - SAsequenceNumber = dataset$VSid, #M - SAparentSequenceNumber = "", - SAstratification = "N", #M - SAstratumName = "U", #M - SAspeciesCode = 107254, #M - SAspeciesCodeFAO = "NEP", - SAstateOfProcessing = "FRE", #M - SApresentation = "WHL", #M - SAspecimensState = "DeadOrZeroProbSurvival", #M - SAcatchCategory = "Lan", #M - SAlandingCategory = "", - SAcommSizeCatScale = "", - SAcommSizeCat = "", - SAsex = "U", #M - SAexclusiveEconomicZoneIndicator = "", - SAarea = "", - SArectangle = "", - SAfisheriesManagementUnit = "", - SAgsaSubarea = "NotApplicable", #M - SAjurisdictionArea = "", - SAnationalFishingActivity = "", - SAmetier5 = "", - SAmetier6 = "", - SAgear = "", - SAmeshSize = "", - SAselectionDevice = "", - SAselectionDeviceMeshSize = "", - SAunitType = "Box", #M - SAtotalWeightLive = "", - SAsampleWeightLive = "", - SAnumberTotal = 1, - SAnumberSampled = 1, - SAselectionProb = 1, - SAinclusionProb = 1, - SAselectionMethod = "CENSUS", #M - SAunitName = dataset$VSid, #M - SAlowerHierarchy = "D", - SAsampler = "Observer", - SAsampled = "N", #M - SAreasonNotSampled = "", - SAnonResponseCollected = "N", - SAreasonNotSampledFM = "", - SAreasonNotSampledBV = "", - SAtotalWeightMeasured = dataset[[target_var]], - SAsampleWeightMeasured = dataset[[target_var]], - SAconversionFactorMeasLive = 1, - SAauxiliaryVariableTotal = "", - SAauxiliaryVariableValue = "", - SAauxiliaryVariableName = "", - SAauxiliaryVariableUnit = "", - stringsAsFactors=FALSE +SAid = dataset$VSid, +SSid = dataset$VSid, +SArecordType = "SA", #M +SAsequenceNumber = dataset$VSid, #M +SAparentSequenceNumber = "", +SAstratification = "N", #M +SAstratumName = "U", #M +SAspeciesCode = 107254, #M +SAspeciesCodeFAO = "NEP", +SAstateOfProcessing = "FRE", #M +SApresentation = "WHL", #M +SAspecimensState = "DeadOrZeroProbSurvival", #M +SAcatchCategory = "Lan", #M +SAlandingCategory = "", +SAcommSizeCatScale = "", +SAcommSizeCat = "", +SAsex = "U", #M +SAexclusiveEconomicZoneIndicator = "", +SAarea = "", +SArectangle = "", +SAfisheriesManagementUnit = "", +SAgsaSubarea = "NotApplicable", #M +SAjurisdictionArea = "", +SAgeographicalDataBasis = "Measured", +SAgeographicalSource = "", +SAnationalFishingActivity = "", +SAmetier5 = "", +SAmetier6 = "", +SAgear = "", +SAgearDataBasis = "Measured", +SAgearSource = "", +SAmeshSize = "", +SAselectionDevice = "", +SAselectionDeviceMeshSize = "", +SAunitType = "Box", #M +SAtotalWeightLive = "", +SAsampleWeightLive = "", +SAnumberTotal = 1, +SAnumberSampled = 1, +SAselectionProb = 1, +SAinclusionProb = 1, +SAselectionMethod = "CENSUS", #M +SAunitName = dataset$VSid, #M +SAlowerHierarchy = "D", +SAsampler = "Observer", +SAsampled = "N", #M +SAreasonNotSampled = "", +SAnonResponseCollected = "N", +SAreasonNotSampledFM = "", +SAreasonNotSampledBV = "", +SAtotalWeightMeasured = dataset[[target_var]], +SAtotalWeightMeasuredDataBasis = "Measured", +SAsampleWeightMeasured = dataset[[target_var]], +SAconversionFactorMeasLive = 1, +SAauxiliaryVariableTotal = "", +SAauxiliaryVariableValue = "", +SAauxiliaryVariableName = "", +SAauxiliaryVariableUnit = "", +stringsAsFactors=FALSE ) @@ -672,16 +423,14 @@ RDBESlist[[i]][which(grepl(colnames(RDBESlist[[i]]),pat="[A-Z]id"))]<-NULL #===Save============ - dir_outputs<-paste0(base_dir_outputs,"/", - project_name_outputs,"/") - dir.create(dir_outputs, recursive=T, showWarnings=FALSE) - filename_output_CS <- paste0("H1.csv") - filename_output_SL <- paste0("HSL.csv") - filename_output_VD <- paste0("HVD.csv") - + dir_outputs<-paste0(base_dir_outputs,"/", project_name_outputs,"/") + dir.create(dir_outputs, recursive=T, showWarnings=FALSE) + filename_output_CS <- paste0(dir_outputs,"H1.csv") + filename_output_SL <- paste0(dir_outputs,"HSL.csv") + filename_output_VD <- paste0(dir_outputs,"HVD.csv") -lapply(RDBESlist, function(x, filename1 = paste0(dir_outputs,filename_output_CS)){ +lapply(RDBESlist, function(x, filename1 = filename_output_CS){ if("DErecordType" %in% colnames(x)){ write.table(x, file = filename1, append = FALSE, quote = FALSE, sep = ",", eol = "\n", na = "NA", dec = ".", row.names = FALSE, @@ -693,7 +442,7 @@ write.table(x, file = filename1, append = TRUE, quote = FALSE, sep = ",", } }) -b<-read.table(file=paste0(dir_outputs,filename_output_CS), header=F, sep=";") +b<-read.table(file = filename_output_CS, header=F, sep=";") b<-cbind(key,b) b<-b[order(as.character(b$key), decreasing=FALSE),] b<-b[!is.na(key),] @@ -701,22 +450,21 @@ b$key<-NULL b$V1<-as.character(b$V1) # saves CS output -write.table(b$V1, file=paste0(dir_outputs,filename_output_CS), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") +write.table(b$V1, file = filename_output_CS, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") -# -----Builds and saves dummySL----------------- +# -----Builds and saves dummySL and dummyIS----------------- SL_base$SLspeciesListName<-project_name_outputs SL_base$SLyear<-DEyear SL_base$SLinstitute<-SDinstitution -# saves SL output - write.table(SL_base, file=paste0(dir_outputs,filename_output_SL), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") - + + # saves SL output + write.table(rbind(paste(SL_base, collapse=","), paste(IS_base, collapse=",")), file=filename_output_SL, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") # -----Builds and saves dummyVD----------------- - # saves VD output VD_base$VDyear<-DEyear - write.table(VD_base, file=paste0(dir_outputs,filename_output_VD), col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") + write.table(VD_base, file=filename_output_VD, col.names=FALSE, row.names = FALSE, quote=FALSE,sep=",") diff --git a/data-raw/exampleData/TextBookExamples/Migrate_Pckg_SDAResources_agstrat_H1_to_new_format.R b/data-raw/exampleData/TextBookExamples/Migrate_Pckg_SDAResources_agstrat_H1_to_new_format.R deleted file mode 100644 index f559e4da..00000000 --- a/data-raw/exampleData/TextBookExamples/Migrate_Pckg_SDAResources_agstrat_H1_to_new_format.R +++ /dev/null @@ -1,50 +0,0 @@ -# 15/10/2025 -# Update "Pckg_SDAResources_agstrat_H1" to the new data format - -library(RDBEScore) -# Get aux data -load(file='./data/Pckg_survey_apiclus2_H1.rda') -# Get the data -load(file='./data/Pckg_SDAResources_agstrat_H1.rda') -validateRDBESDataObject(Pckg_SDAResources_agstrat_H1, strict = FALSE, verbose = TRUE) -newH1 <- Pckg_SDAResources_agstrat_H1 - -# Add an entry for IS and create the entries using SL -newH1$IS <- readRDS(paste0(baseDir,"aux_TextBookExamples/IS_base.rds")) -names(newH1$IS) <- c("ISrecordType","IScommTaxon","ISsppCode") -#head(newH1$IS) -newH1[["IS"]]$ISrecType <- "IS" -newH1[["IS"]]$SLid <- newH1[["SL"]]$SLid -newH1[["IS"]]$ISid <- 1001 -newH1$IS <- newH1$IS[,c("ISid", "SLid", "ISrecType", "IScommTaxon", "ISsppCode")] -data.table::setkey(newH1[["IS"]],ISid) -# Remove the unneccessary columns from SL -#newH1$SL <- newH1$SL[,c("SLid","SLrecType","SLcou","SLinst","SLspeclistName","SLyear","SLcatchFrac")] -# Add column to FT -#newH1[["FT"]]$FTdomLanDate <- NA -# Add columns to FO -#newH1[["FO"]]$FOfishDuraDatBas <- NA -#newH1[["FO"]]$FOgeoDatBas <- NA -#newH1[["FO"]]$FOgeoSou <- NA -#newH1[["FO"]]$FOgeaDatBas <- NA -#newH1[["FO"]]$FOgearSou <- NA -# Add columns to SS -#newH1[["SS"]]$SStimeTotalDatBas <- NA -#newH1[["SS"]]$SSnumTotalDatBas <- NA -# Add columns to SA -#newH1[["SA"]]$SAgeoDatBas <- NA -#newH1[["SA"]]$SAgeoSou <- NA -#newH1[["SA"]]$SAgeaDatBas <- NA -#newH1[["SA"]]$SAgearSou <- NA -#newH1[["SA"]]$SAtotWtMeaDatBas <- NA -# generate empty BV and FM -newH1[["FM"]]<-Pckg_survey_apiclus2_H1$FM -newH1[["BV"]]<-Pckg_survey_apiclus2_H1$BV - - -validateRDBESDataObject(newH1, strict = TRUE, verbose = TRUE) - -# Save the data -Pckg_SDAResources_agstrat_H1 <- newH1 -usethis::use_data(Pckg_SDAResources_agstrat_H1, overwrite = TRUE) - diff --git a/data-raw/exampleData/TextBookExamples/Migrate_Pckg_survey_apiclus2_H1_to_new_format.R b/data-raw/exampleData/TextBookExamples/Migrate_Pckg_survey_apiclus2_H1_to_new_format.R deleted file mode 100644 index c01f7262..00000000 --- a/data-raw/exampleData/TextBookExamples/Migrate_Pckg_survey_apiclus2_H1_to_new_format.R +++ /dev/null @@ -1,50 +0,0 @@ -# 12/2/2025 -# Update "Pckg_survey_apiclus2_H1" to the new data format - -library(RDBEScore) -# Get the data -load(file='./data/Pckg_survey_apiclus2_H1.rda') -validateRDBESDataObject(Pckg_survey_apiclus2_H1, strict = TRUE, verbose = TRUE) -newH1 <- Pckg_survey_apiclus2_H1 -validateRDBESDataObject(newH1, strict = TRUE, verbose = TRUE) - -# Add an entry for IS and create the entries using SL -newH1$IS <- newH1$SL -newH1$IS <- newH1$IS[,c("SLid","SLrecType","SLcommTaxon","SLsppCode")] -names(newH1$IS) <- c("ISid","ISrecType","IScommTaxon","ISsppCode" ) -#head(newH1$IS) -newH1[["IS"]]$ISrecType <- "IS" -newH1[["IS"]]$SLid <- 47891 -newH1[["IS"]]$ISid <- 1001 -newH1$IS <- newH1$IS[,c("ISid", "SLid", "ISrecType", "IScommTaxon", "ISsppCode")] -data.table::setkey(newH1[["IS"]],ISid) -# Remove the unneccessary columns from SL -newH1$SL <- newH1$SL[,c("SLid","SLrecType","SLcou","SLinst","SLspeclistName","SLyear","SLcatchFrac")] -# Add column to FT -newH1[["FT"]]$FTdomLanDate <- NA -# Add columns to FO -newH1[["FO"]]$FOfishDuraDatBas <- NA -newH1[["FO"]]$FOgeoDatBas <- NA -newH1[["FO"]]$FOgeoSou <- NA -newH1[["FO"]]$FOgeaDatBas <- NA -newH1[["FO"]]$FOgearSou <- NA -# Add columns to SS -newH1[["SS"]]$SStimeTotalDatBas <- NA -newH1[["SS"]]$SSnumTotalDatBas <- NA -# Add columns to SA -newH1[["SA"]]$SAgeoDatBas <- NA -newH1[["SA"]]$SAgeoSou <- NA -newH1[["SA"]]$SAgeaDatBas <- NA -newH1[["SA"]]$SAgearSou <- NA -newH1[["SA"]]$SAtotWtMeaDatBas <- NA -# Rename cols in BV -newH1[["BV"]]$BVspecType <- newH1[["BV"]]$BVmethod -newH1[["BV"]]$BVanalysisType <- newH1[["BV"]]$BVmeasEquip -newH1[["BV"]]$BVmethod <- NULL -newH1[["BV"]]$BVmeasEquip <- NULL -validateRDBESDataObject(newH1, strict = TRUE, verbose = TRUE) - -# Save the data -Pckg_survey_apiclus2_H1 <- newH1 -usethis::use_data(Pckg_survey_apiclus2_H1, overwrite = TRUE) - diff --git a/data-raw/exampleData/TextBookExamples/Migrate_Pckg_survey_apistrat_H1_to_new_format.R b/data-raw/exampleData/TextBookExamples/Migrate_Pckg_survey_apistrat_H1_to_new_format.R deleted file mode 100644 index db5669f3..00000000 --- a/data-raw/exampleData/TextBookExamples/Migrate_Pckg_survey_apistrat_H1_to_new_format.R +++ /dev/null @@ -1,50 +0,0 @@ -# 12/2/2025 -# Update "Pckg_survey_apistrat_H1" to the new data format - -library(RDBEScore) -# Get the data -load(file='./data/Pckg_survey_apistrat_H1.rda') -validateRDBESDataObject(Pckg_survey_apistrat_H1, strict = TRUE, verbose = TRUE) -newH1 <- Pckg_survey_apistrat_H1 -validateRDBESDataObject(newH1, strict = TRUE, verbose = TRUE) - -# Add an entry for IS and create the entries using SL -newH1$IS <- newH1$SL -newH1$IS <- newH1$IS[,c("SLid","SLrecType","SLcommTaxon","SLsppCode")] -names(newH1$IS) <- c("ISid","ISrecType","IScommTaxon","ISsppCode" ) -#head(newH1$IS) -newH1[["IS"]]$ISrecType <- "IS" -newH1[["IS"]]$SLid <- 47891 -newH1[["IS"]]$ISid <- 1001 -newH1$IS <- newH1$IS[,c("ISid", "SLid", "ISrecType", "IScommTaxon", "ISsppCode")] -data.table::setkey(newH1[["IS"]],ISid) -# Remove the unneccessary columns from SL -newH1$SL <- newH1$SL[,c("SLid","SLrecType","SLcou","SLinst","SLspeclistName","SLyear","SLcatchFrac")] -# Add column to FT -newH1[["FT"]]$FTdomLanDate <- NA -# Add columns to FO -newH1[["FO"]]$FOfishDuraDatBas <- NA -newH1[["FO"]]$FOgeoDatBas <- NA -newH1[["FO"]]$FOgeoSou <- NA -newH1[["FO"]]$FOgeaDatBas <- NA -newH1[["FO"]]$FOgearSou <- NA -# Add columns to SS -newH1[["SS"]]$SStimeTotalDatBas <- NA -newH1[["SS"]]$SSnumTotalDatBas <- NA -# Add columns to SA -newH1[["SA"]]$SAgeoDatBas <- NA -newH1[["SA"]]$SAgeoSou <- NA -newH1[["SA"]]$SAgeaDatBas <- NA -newH1[["SA"]]$SAgearSou <- NA -newH1[["SA"]]$SAtotWtMeaDatBas <- NA -# Rename cols in BV -newH1[["BV"]]$BVspecType <- newH1[["BV"]]$BVmethod -newH1[["BV"]]$BVanalysisType <- newH1[["BV"]]$BVmeasEquip -newH1[["BV"]]$BVmethod <- NULL -newH1[["BV"]]$BVmeasEquip <- NULL -validateRDBESDataObject(newH1, strict = TRUE, verbose = TRUE) - -# Save the data -Pckg_survey_apistrat_H1 <- newH1 -usethis::use_data(Pckg_survey_apistrat_H1, overwrite = TRUE) - diff --git a/data-raw/exampleData/TextBookExamples/aux_TextBookExamples/IS_base.rds b/data-raw/exampleData/TextBookExamples/aux_TextBookExamples/IS_base.rds index 0e31f5c5..bc9453bd 100644 Binary files a/data-raw/exampleData/TextBookExamples/aux_TextBookExamples/IS_base.rds and b/data-raw/exampleData/TextBookExamples/aux_TextBookExamples/IS_base.rds differ diff --git a/man/combineRDBESDataObjects.Rd b/man/combineRDBESDataObjects.Rd index 1dd38e55..76ce514f 100644 --- a/man/combineRDBESDataObjects.Rd +++ b/man/combineRDBESDataObjects.Rd @@ -32,6 +32,13 @@ Combine Two RDBES Raw Objects combines 2 RDBESDataObjects into a single RDBESDataObject by merging individual tables one by one } +\details{ +When combining RDBESDataObjects from different hierarchies (e.g., H1 and H5), +a warning is issued. The resulting combined object will have a mixed hierarchy, +which may be structurally and statistically invalid for some analyses. However, +such combinations can be useful for fisheries overviews, annual reports, or +countries performing broader estimations. +} \examples{ \dontrun{ diff --git a/man/createRDBESDataObject.Rd b/man/createRDBESDataObject.Rd index 783496f5..8895db41 100644 --- a/man/createRDBESDataObject.Rd +++ b/man/createRDBESDataObject.Rd @@ -33,8 +33,19 @@ in. Default is \code{TRUE}.} \item{verbose}{(Optional) Set to TRUE if you want informative text printed out, or FALSE if you don't. The default is FALSE.} -\item{...}{parameters passed to validateRDBESDataObject -e.g.\code{strict=FALSE}} +\item{...}{Additional parameters forwarded to helper functions used by this +function. Most commonly these are forwarded to +\code{validateRDBESDataObject()} during the validation step. Common options: +\itemize{ +\item \code{strict} (logical, default \code{TRUE}): if \code{FALSE}, validation issues result +in warnings instead of stopping with an error. +\item \code{verbose} (logical, default \code{FALSE}): request extra informational output +from validation. +\item \code{Hierarchy} (integer, e.g. \code{1}, optional; zip inputs only): when the zip +file contains multiple hierarchies, selects which hierarchy to import. +Note: \code{checkDataTypes} is controlled by the \code{castToCorrectDataTypes} +argument of this function and should not be supplied via \code{...}. +}} } \value{ A RDBESDataObject @@ -61,6 +72,11 @@ any tables in the first input are overwritten by other inputs a warning is given. You should not input different hierarchy files; this function will not combine them. +If the zip contains multiple hierarchies (e.g., H1 and H5 within the same +archive), you can select which one to import by passing \code{Hierarchy} via +\code{...}, for example: \code{Hierarchy = 1}. If \code{Hierarchy} is not specified and the +zip contains multiple hierarchies, an error is raised prompting you to set it. + \emph{\strong{CSV file inputs}} This \code{input} should be a path to a folder of \code{csv} files. These can be the \code{csv} files downloaded from RDBES (e.g. an unzipped hierarchy), or \emph{any} set @@ -81,5 +97,6 @@ This \code{input} produces an empty \code{RDBESDataObject}, i.e. all tables with correct data classes but the tables will be empty. } \examples{ +# Create an empty object myEmptyRDBESObject <- createRDBESDataObject(input = NULL) } diff --git a/man/filterRDBESDataObject.Rd b/man/filterRDBESDataObject.Rd index bc6f2952..51ec44a7 100644 --- a/man/filterRDBESDataObject.Rd +++ b/man/filterRDBESDataObject.Rd @@ -58,5 +58,16 @@ myFilteredObject <- filterRDBESDataObject(myH1RawObject, fieldsToFilter = myFields, valuesToFilter = myValues ) + +# Inverse filtering (exclude certain values) +# Example: keep all DE rows except those with DEid in `excludedValues` +# Compute the complement of the excluded set using setdiff +allValues <- unique(myH1RawObject$DE$DEid) +excludedValues <- c(5351) +myInverseFiltered <- filterRDBESDataObject( + myH1RawObject, + fieldsToFilter = "DEid", + valuesToFilter = setdiff(allValues, excludedValues) +) } } diff --git a/man/lowerTblData.Rd b/man/lowerTblData.Rd index ae28d2b8..20673c43 100644 --- a/man/lowerTblData.Rd +++ b/man/lowerTblData.Rd @@ -4,7 +4,7 @@ \alias{lowerTblData} \title{Get lower table data from upper table id} \usage{ -lowerTblData(field, values, tbls, level, verbose = FALSE) +lowerTblData(field, values, tbls, level, verbose = FALSE, path_order = NULL) } \arguments{ \item{field}{A character string specifying the name of the upper table field.} diff --git a/package_overview.Rmd b/package_overview.Rmd index f1570360..6bc617cc 100644 --- a/package_overview.Rmd +++ b/package_overview.Rmd @@ -1,11 +1,16 @@ --- -title: "RDBEScore: Using R for ICES Regional Database & Estimation System (RDBES) Data" +title: 'RDBEScore: Using R for ICES Regional Database & Estimation System (RDBES) + Data' output: - powerpoint_presentation: + ioslides_presentation: + incremental: true + powerpoint_presentation: reference_doc: "./data-raw/styles.pptx" slide_level: 2 keep_md: true incremental: true + beamer_presentation: + incremental: true --- ## Importing RDBES Data @@ -14,8 +19,8 @@ The aim of this document is to outline the basic workflow of importing data down The function **createRDBESDataObject** is intended to directly import Commercial Landing (CL), Commercial Effort (CE) and Commercial Sampling (CS) tables downloaded from [RDBES](https://rdbes.ices.dk/#/). -## Introducton - +## Introduction +2 RDBEScore is an R package developed to facilitate the analysis of data from the ICES Regional Database and Estimation System (RDBES). The package provides functions to: - import functions like `createRDBESDataObject` and `validateRDBESDataObject` @@ -52,7 +57,7 @@ library(RDBEScore) To see the complete list of vignettes available in the package use the following command: -```{rm echo=TRUE, eval=FALSE} +```{r echo=TRUE, eval=FALSE} browseVignettes(package = "RDBEScore") ``` @@ -84,9 +89,9 @@ importedH1 It can import the CL, CE, VD or SL tables `.zip` archives, but will include all other tables as `NULL`: ```{r} -importedH5 <- createRDBESDataObject(input = "./data-raw/exampleData/H5_2025_02_11.zip") +importedHSL <- createRDBESDataObject(input = "./inst/extdata/HSL_Example.zip") #print the not NULL table names -importedH5 +importedHSL ``` ## Importing a List of Data Frames @@ -103,7 +108,7 @@ importedList <- createRDBESDataObject(listOfDfsH1) ## Object class RDBESDataObject -It should be noted that the objects created are of the S3 class "**RDBESDataObject**". The class has defined **print()**, **summary()** and **sort()** methods. For more info on these see vignette [Manipulating RDBESDataObjects](https://ices-tools-dev.github.io/RDBEScore/articles/manipulating-rdbesdataobjects.html). +It should be noted that the objects created are of the S3 class "**RDBESDataObject**". The class has defined **print()**, **summary()** and **sort()** methods. For more info on these see vignettes @@ -147,7 +152,7 @@ validateRDBESDataObject(myFilteredObjectNoOrphans, verbose = FALSE) ``` -Again to see more details on the functions see the vignette [Manipulating RDBESDataObjects](https://ices-tools-dev.github.io/RDBEScore/articles/manipulating-rdbesdataobjects.html). +Again to see more details on the functions see the vignettes. ## Getting Subsets of RDBESDataObject Tables @@ -208,7 +213,7 @@ Right now estimation actually is done on a *RDBESEstObject* that is generate fr In the next sections we will use data from R packages [survey](https://CRAN.R-project.org/package=survey) and [SDAResources](https://CRAN.R-project.org/package=SDAResources) that are converted into the *RDBESDataObject* to demonstrate the estimation procedure. -For more detailed information on the estimation functions see the vignette [Estimating Population parameters from RDBESDataObjects](https://ices-tools-dev.github.io/RDBEScore/articles/estimating-rdbesdataobjects.html). +For more detailed information on the estimation functions see the vignettes ```{r} @@ -274,6 +279,7 @@ strataListCL <- list(CLarea="27.3.d.28.1", There is a function in development **addCLtoLowerCS(...)** that can be used to add the CL data to the lower hierarchy. + ```{r} #we are using the lower hierarchy C meaning that we are extracting the BV data #as the biological data @@ -293,7 +299,7 @@ lenCANUMQ1 <- doBVestimCANUM(biolCLQ1, c("sumCLoffWeight"), classBreaks = seq(70,130,10), verbose = FALSE) -knitr::kable(lenCANUMQ1[, c("Group", "WeightgMean", "LengthmmMean", "totNum")], +knitr::kable(lenCANUMQ1[order(lenCANUMQ1$LengthmmMean), c("Group", "WeightgMean", "LengthmmMean", "totNum")], digits = 2) ``` diff --git a/tests/testthat/test-combineRDBESDataObjects.R b/tests/testthat/test-combineRDBESDataObjects.R index 92425576..347e153a 100644 --- a/tests/testthat/test-combineRDBESDataObjects.R +++ b/tests/testthat/test-combineRDBESDataObjects.R @@ -24,7 +24,7 @@ test_that("combineRDBESDataObjects returns invalid RDBESDataObject when expect_error(validateRDBESDataObject(myCombinedObject), "duplicate rows") }) -test_that("combineRDBESDataObjects returns valid RDBESDataObject when supplied +test_that("combineRDBESDataObjects warns when supplied with valid, different RDBESDataObjects", { myObject1 <- importRDBESDataCSV(rdbesExtractPath = "./h1_v_20250211") @@ -34,10 +34,46 @@ test_that("combineRDBESDataObjects returns valid RDBESDataObject when supplied expect_error(validateRDBESDataObject(myObject1), NA) expect_error(validateRDBESDataObject(myObject2), NA) - myCombinedObject <- combineRDBESDataObjects(RDBESDataObject1=myObject1, - RDBESDataObject2=myObject2) + expect_warning(combineRDBESDataObjects(RDBESDataObject1=myObject1, + RDBESDataObject2=myObject2, strict = F), + "Combining RDBESDataObjects from different hierarchies") +}) + +test_that("combineRDBESDataObjects stops when combining objects from different hierarchies", { + + myObject1 <- importRDBESDataCSV(rdbesExtractPath = "./h1_v_20250211") + myObject2 <- importRDBESDataCSV(rdbesExtractPath = "./h5_v_20250211") + + # Check these are valid objects before we try and combine them + expect_error(validateRDBESDataObject(myObject1), NA) + expect_error(validateRDBESDataObject(myObject2), NA) + + # Expect a warning about different hierarchies + expect_error(combineRDBESDataObjects(RDBESDataObject1=myObject1, + RDBESDataObject2=myObject2), + "Combining RDBESDataObjects from different hierarchies") +}) + +test_that("combineRDBESDataObjects does not warn when combining objects from same hierarchy", { + + myObject1 <- importRDBESDataCSV(rdbesExtractPath = "./h1_v_20250211") + myObject2 <- importRDBESDataCSV(rdbesExtractPath = "./h1_v_20250211") + + # Expect no warning about different hierarchies (but will have duplicate rows error later) + expect_warning(combineRDBESDataObjects(RDBESDataObject1=myObject1, + RDBESDataObject2=myObject2), + NA) +}) + +test_that("combineRDBESDataObjects does not warn when one object has no DE table", { + + myObject1 <- importRDBESDataCSV(rdbesExtractPath = "./h1_v_20250211") + myObject2 <- createRDBESDataObject() # Empty object with no DE - expect_error(validateRDBESDataObject(myCombinedObject), NA) + # Expect no warning because one object has no hierarchy + expect_warning(combineRDBESDataObjects(RDBESDataObject1=myObject1, + RDBESDataObject2=myObject2), + NA) }) }) ## end capture.output diff --git a/tests/testthat/test-createRDBESDataObject.R b/tests/testthat/test-createRDBESDataObject.R index 6feb1eec..32d4e9b0 100644 --- a/tests/testthat/test-createRDBESDataObject.R +++ b/tests/testthat/test-createRDBESDataObject.R @@ -75,7 +75,7 @@ capture.output({ ## suppresses printing of console output when running test() genObj <- expect_error( createRDBESDataObject(paste0(dirH1, zipFiles), castToCorrectDataTypes = TRUE), - "The zip file contains multiple hierarchies.\nTo import a selected hierarchy, please provide the hierarchy as an argument e.g like:\nHierachy = 1" + "The zip file contains multiple hierarchies.\nTo import a selected hierarchy, please provide the hierarchy as an argument e.g like:\nHierarchy = 1" ) }) @@ -102,7 +102,7 @@ capture.output({ ## suppresses printing of console output when running test() createRDBESDataObject(paste0(dirH1, zipFiles), castToCorrectDataTypes = TRUE, Hierarchy = "H1"), - "The zip file does not contain the hierarchy specified. The options are: 1, 2\nPlease provide a valid hierarchy as an argument. e.g like:\nHierachy = 1" + "The zip file does not contain the hierarchy specified. The options are: 1, 2\nPlease provide a valid hierarchy as an argument. e.g like:\nHierarchy = 1" ) }) diff --git a/tests/testthat/test-importRDBESDataCSV.R b/tests/testthat/test-importRDBESDataCSV.R index 63d7e52a..d3b6ea46 100644 --- a/tests/testthat/test-importRDBESDataCSV.R +++ b/tests/testthat/test-importRDBESDataCSV.R @@ -48,6 +48,40 @@ capture.output({ ## suppresses printing of console output when running test() }) + test_that("importRDBESDataCSV keeps extra long BVfishId if character", { + #make a temporary BV table from the package data + BV <- H1Example$BV[1,] + #make a 40 char length dummy ID this is the max length in RDBES + id <- paste0(rep("A", 40), collapse = "") + BV$BVfishId <- id + tempBVfNname <- tempfile(fileext = ".csv") + write.csv(BV, file=tempBVfNname, quote=F, row.names = F) + fname <- basename(tempBVfNname) + dirname <- dirname(tempBVfNname) + BV <- importRDBESDataCSV(rdbesExtractPath = dirname, listOfFileNames = list("BV"=fname))$BV + file.remove(tempBVfNname) + expect_equal(BV$BVfishId[1],id) + }) + + test_that("importRDBESDataCSV keeps extra long BVfishId if int", { + #make a temporary BV table from the package data + BV <- H1Example$BV[1,] + #make a 40 char length dummy ID this is the max length in RDBES + id <- paste0(rep("1", 40), collapse = "") + #its not possible to get the exact number if we save as numeric + id <- format(as.numeric(id), scientific = FALSE, trim = TRUE) + #the thing we want to test is if we have a long numeric string in there + #do we get the same string back in when importing + BV$BVfishId <- id + tempBVfNname <- tempfile(fileext = ".csv") + write.csv(BV, file=tempBVfNname, quote=F, row.names = F) + fname <- basename(tempBVfNname) + dirname <- dirname(tempBVfNname) + BV <- importRDBESDataCSV(rdbesExtractPath = dirname, listOfFileNames = list("BV"=fname))$BV + file.remove(tempBVfNname) + expect_equal(BV$BVfishId[1],id) + }) + diff --git a/tests/testthat/test-lowerTblData.R b/tests/testthat/test-lowerTblData.R index 5fd240a9..cd29eee9 100644 --- a/tests/testthat/test-lowerTblData.R +++ b/tests/testthat/test-lowerTblData.R @@ -10,21 +10,21 @@ tblsSprat <- list(DE = DE, SD = SD, TE = TE, VS = VS, LE = LE) # Begin test cases test_that("Function returns correct data for valid inputs", { result <- lowerTblData("TEid", c(1), tblsSprat, "LE", FALSE) - expected <- data.table(VSid = 1,LEid=1, value = 10) + expected <- data.table(TEid = 1, VSid = 1, LEid=1, value = 10) expect_equal(result, expected) }) test_that("Function handles multiple values correctly", { result <- lowerTblData("TEid", c(1, 4), tblsSprat, "LE", FALSE) - expected <- data.table(VSid = c(1, 4),LEid=c(1,4) , value = c(10, 4)) + expected <- data.table(TEid=c(1,4) ,VSid = c(1, 4),LEid=c(1,4) , value = c(10, 4)) expect_equal(result, expected) }) test_that("Function handles non-existent field gracefully", { result <- lowerTblData("TEid", c(99), tblsSprat, "LE", FALSE) - expected <- data.table(VSid = integer(),LEid=integer() , value = integer()) + expected <- data.table(TEid = integer(),VSid = integer(), LEid=integer() , value = integer()) expect_equal(result, expected) }) @@ -35,7 +35,7 @@ test_that("Function works with printLevels = TRUE", { test_that("Recursive functionality works", { result <- lowerTblData("SDid", c(1), tblsSprat, "LE", FALSE) - expected <- data.table(VSid = 1, LEid=1, value = 10) + expected <- data.table(SDid = 1, TEid = 1, VSid = 1, LEid=1, value = 10) expect_equal(result, expected) }) diff --git a/tests/testthat/test-validateRDBESDataObject.R b/tests/testthat/test-validateRDBESDataObject.R index 6fd4fbb6..6730e992 100644 --- a/tests/testthat/test-validateRDBESDataObject.R +++ b/tests/testthat/test-validateRDBESDataObject.R @@ -218,9 +218,9 @@ test_that("validateRDBESDataObject produces correct text output", { # Tests for CHECK 5b: SL non-empty requires non-empty IS test_that("validateRDBESDataObject errors when SL has rows and IS is NULL", { # Build minimal object: non-empty SL, NULL IS - myObject <- H1Example + myObject <- data.table::copy(H1Example) - myObject["IS"]<- list(NULL) + myObject["IS"] <- list(NULL) expect_error( validateRDBESDataObject(objectToCheck = myObject, verbose = FALSE), diff --git a/tests/testthat/test-validateRDBESDataObjectDataTypes.R b/tests/testthat/test-validateRDBESDataObjectDataTypes.R index 2bf5baa2..5577d4d5 100644 --- a/tests/testthat/test-validateRDBESDataObjectDataTypes.R +++ b/tests/testthat/test-validateRDBESDataObjectDataTypes.R @@ -46,4 +46,29 @@ test_that("checkRDBESDataObjectDataTypes returns 2 differences for an object wit expect_equal(numberOfDifferences,2) }) +test_that("checkRDBESDataObjectDataTypes treats expected integer vs actual numeric as compatible (issue #251)", { + + # Here DEid, DEyear and DEhierarchy are provided as numeric (double) + # while the mapping expects integer. We want this to be considered OK. + myDE <- data.frame("DEid" = c(1,2), # numeric (not 1L, 2L) + "DErecType" = c("DE","DE"), + "DEsampScheme" = c("WGRDBES-EST TEST","WGRDBES-EST TEST"), + "DEsampSchemeType" = c("NatPilCF","NatPilCF"), + "DEyear" = c(1965,1965), # numeric + "DEstratumName" = c("1","2"), + "DEhierarchyCor" = c("Y","N"), + "DEhierarchy" = c(1,1), # numeric + "DEsamp" = c("Y","N"), + "DEnoSampReason" = c ("",""), + stringsAsFactors = FALSE + ) + + myObject <- list() + myObject[["DE"]] <- myDE + + myDiffs <- validateRDBESDataObjectDataTypes(myObject) + numberOfDifferences <- nrow(myDiffs) + expect_equal(numberOfDifferences,0) +}) + }) ## end capture.output diff --git a/vignettes/v01b-manipulating-rdbesdataobjects.Rmd b/vignettes/v01b-manipulating-rdbesdataobjects.Rmd index fe8ce108..457f81b6 100644 --- a/vignettes/v01b-manipulating-rdbesdataobjects.Rmd +++ b/vignettes/v01b-manipulating-rdbesdataobjects.Rmd @@ -107,7 +107,8 @@ RDBESDataObjects can be combined using the **combineRDBESDataObjects()** functio ```{r combine} myCombinedRawObject <- combineRDBESDataObjects(myH1RawObject, - myH5RawObject) + myH5RawObject, + strict = FALSE) # Number of rows in each non-null table and hierarchies print(myCombinedRawObject) @@ -125,7 +126,10 @@ myH1RawObject <- H1Example # Number of rows in each non-null table print(myH1RawObject) +``` + +```{r filter2} myFields <- c("SDctry","VDctry","VDflgCtry","FTarvLoc") myValues <- c("ZW","ZWBZH","ZWVFA" ) @@ -147,9 +151,44 @@ myFilteredObjectNoOrphans <- validateRDBESDataObject(myFilteredObjectNoOrphans, verbose = FALSE) +myFilteredObjectNoOrphans +``` +**NB!** Currently filtering happens to all fields together i.e it is not possible to filter same codelist differently in the same filter call. Imagine a situation where you want to filter on both "SDCtry" and "VDflgCtry" ie vessels from EH country sampled by ZW institution. Ie two calls are needed + +```{r} +myFilteredObject <- filterRDBESDataObject(myH1RawObject, + fieldsToFilter = "SDctry", + valuesToFilter = "ZW", + killOrphans = T) + +filterRDBESDataObject(myFilteredObject, + fieldsToFilter = "VDflgCtry", + valuesToFilter = "EH", + killOrphans = T) + + +``` + +Sometimes you might want to do the inverse filter eg exclude something. You can do this by selecting the complement set of values using `setdiff`. + +```{r} +# Exclude specific DEid values by selecting all others +allValues <- unique(myH1RawObject$DE$DEid) +excludedValues <- c(5351) +myInverseFiltered <- filterRDBESDataObject( + myH1RawObject, + fieldsToFilter = "DEid", + valuesToFilter = setdiff(allValues, excludedValues), + killOrphans = TRUE +) +validateRDBESDataObject(myInverseFiltered, verbose = FALSE) +print(myInverseFiltered) ``` + + + You can also remove any records that are not linking to a row in the VesselDetails (VD) table using the **removeBrokenVesselLinks()** function. ```{r clean2} @@ -188,7 +227,7 @@ validateRDBESDataObject(myFilteredObjectValidSpeciesLinks, verbose = FALSE) ## Getting Subsets of RDBESDataObject Tables -Sometimes it we want to see how a field or values in the **RDBESDataObject** are connected to otther tables. +Sometimes it we want to see how a field or values in the **RDBESDataObject** are connected to other tables. One use case would be e.g. to see when a specific Landing Event (LE) occured.For this we can use the **getLinkedDataFromLevel()** function. ```{r} @@ -203,6 +242,7 @@ getLinkedDataFromLevel("TEstratumName", c("November"), H8ExampleEE1, "LE", verbo ``` Several values can be used to get a subset of the table. + ```{r} #get the SA table corresponding to the first 2 TEids in the H8ExampleEE1 object getLinkedDataFromLevel("TEid", c(1,2), H8ExampleEE1, "SA", verbose = TRUE)