ices-tools-dev · nmprista · Oct 17, 2025 · Oct 15, 2025 · Oct 15, 2025 · Oct 15, 2025
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
@@ -4,6 +4,7 @@ on:
   push:
     branches: [main, master]
   pull_request:
+    branches: [ main ] 
   release:
     types: [published]
   workflow_dispatch:

diff --git a/.gitignore b/.gitignore
@@ -1,57 +1,46 @@
 # History files
 .Rhistory
 .Rapp.history
-
 # Session Data files
 .RData
-
 # User-specific files
 .Ruserdata
 .DS_Store
-
 # Example code in package build process
 *-Ex.R
-
 # Output files from R CMD build
 /*.tar.gz
-
 # Output files from R CMD check
 /*.Rcheck/
-
 # RStudio files
 .Rproj.user/
-
 # produced vignettes
 vignettes/*.html
 vignettes/*.pdf
-
 # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
 .httr-oauth
-
 # knitr and R markdown default cache directories
 *_cache/
 /cache/
-
 # Temporary files created by R markdown
 *.utf8.md
 *.knit.md
-
 # R Environment Variables
 .Renviron
-
 # FishNCo raw test data
 FishNCo/testData/RegionalTestData/*
-
 # MS Excel temp files
 ~$*
-
 /doc/
 /Meta/
 <<<<<<< Updated upstream
 .vscode/
 docs
 =======
-
 # Data to develop fun
 /NLdata/
 >>>>>>> Stashed changes
+package_overview.tex
+package_overview.html
+package_overview.pptx
+package_overview.md
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: RDBEScore
 Type: Package
 Title: Functions for the ICES Regional Database and Estimation System (RDBES)
-Version: 0.3.4
+Version: 0.3.5
 Author: c(
     person(given = "David", 
            family = "Currie", 

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,10 @@
+# RDBEScore 0.3.5
+
+- Bug fix: addressed [#251](https://github.com/ices-tools-dev/RDBEScore/issues/251).
+- Docs/params: expanded docs for `combineRDBESDataObjects()` and `createRDBESDataObject()`; clarified hierarchy behavior and `...` options (strict, verbose, hierarchy).
+- Mixed hierarchies: `combineRDBESDataObjects()` now warns/errors when objects use different hierarchies (`strict=TRUE` for error).
+- ID tables: `createTableOfRDBESIds()` merging more robust by hierarchy; clearer BV handling and console output.
+
 # RDBEScore 0.3.4
 
 - Defaults: `createRDBESDataObject()`  now runs validation by default

diff --git a/R/combineRDBESDataObjects.R b/R/combineRDBESDataObjects.R
@@ -9,6 +9,13 @@
 #' @param strict (Optional) This function validates its input data - should
 #' the validation be strict? The default is TRUE.
 #'
+#' @details
+#' When combining RDBESDataObjects from different hierarchies (e.g., H1 and H5),
+#' a warning is issued. The resulting combined object will have a mixed hierarchy,
+#' which may be structurally and statistically invalid for some analyses. However,
+#' such combinations can be useful for fisheries overviews, annual reports, or
+#' countries performing broader estimations.
+#'
 #' @return the combination of \code{RDBESDataObject1} and  \code{RDBESDataObject2}
 #' @seealso  \link[data.table]{rbindlist}
 #' @export
@@ -31,6 +38,36 @@ combineRDBESDataObjects <- function(RDBESDataObject1,
 
   validateRDBESDataObject(RDBESDataObject1, verbose = verbose, strict = strict)
   validateRDBESDataObject(RDBESDataObject2, verbose = verbose, strict = strict)
+
+  # Check for multiple hierarchies
+  hierarchy1 <- NULL
+  hierarchy2 <- NULL
+
+  if (!is.null(RDBESDataObject1$DE) && nrow(RDBESDataObject1$DE) > 0) {
+    hierarchy1 <- unique(RDBESDataObject1$DE$DEhierarchy)
+  }
+
+  if (!is.null(RDBESDataObject2$DE) && nrow(RDBESDataObject2$DE) > 0) {
+    hierarchy2 <- unique(RDBESDataObject2$DE$DEhierarchy)
+  }
+
+  # Warn if combining different hierarchies
+  if (!is.null(hierarchy1) && !is.null(hierarchy2) &&
+      length(hierarchy1) > 0 && length(hierarchy2) > 0) {
+    if (!all(hierarchy1 %in% hierarchy2) || !all(hierarchy2 %in% hierarchy1)) {
+      warnMsg <- paste("Combining RDBESDataObjects from different hierarchies (",
+              paste(hierarchy1, collapse = ", "), " and ",
+              paste(hierarchy2, collapse = ", "),
+              "). This creates a mixed hierarchy object that may be structurally ",
+              "and statistically invalid for some analyses.")
+      if(strict){
+        stop(warnMsg, call. = F)
+      } else{
+        warning(warnMsg, call. = F)
+      }
+    }
+  }
+
   # Create an empty RDBESDataObject as the basis of what we will return
   myRDBESDataObject <- createRDBESDataObject()
 

diff --git a/R/createRDBESDataObject.R b/R/createRDBESDataObject.R
@@ -20,6 +20,11 @@
 #' given. You should not input different hierarchy files; this function will not
 #' combine them.
 #'
+#' If the zip contains multiple hierarchies (e.g., H1 and H5 within the same
+#' archive), you can select which one to import by passing `Hierarchy` via
+#' `...`, for example: `Hierarchy = 1`. If `Hierarchy` is not specified and the
+#' zip contains multiple hierarchies, an error is raised prompting you to set it.
+#'
 #' ***CSV file inputs***
 #' This `input` should be a path to a folder of `csv` files. These can be the
 #' `csv` files downloaded from RDBES (e.g. an unzipped hierarchy), or *any* set
@@ -56,15 +61,25 @@
 #'   in. Default is `TRUE`.
 #' @param verbose (Optional) Set to TRUE if you want informative text printed
 #'  out, or FALSE if you don't.  The default is FALSE.
-#' @param ... parameters passed to validateRDBESDataObject
-#'  e.g.`strict=FALSE`
+#' @param ... Additional parameters forwarded to helper functions used by this
+#'   function. Most commonly these are forwarded to
+#'   `validateRDBESDataObject()` during the validation step. Common options:
+#'   - `strict` (logical, default `TRUE`): if `FALSE`, validation issues result
+#'     in warnings instead of stopping with an error.
+#'   - `verbose` (logical, default `FALSE`): request extra informational output
+#'     from validation.
+#'   - `Hierarchy` (integer, e.g. `1`, optional; zip inputs only): when the zip
+#'     file contains multiple hierarchies, selects which hierarchy to import.
+#'   Note: `checkDataTypes` is controlled by the `castToCorrectDataTypes`
+#'   argument of this function and should not be supplied via `...`.
 #' @importFrom utils file_test
 #'
 #' @return A RDBESDataObject
 #' @export
 #' @md
 #'
 #' @examples
+#' # Create an empty object
 #' myEmptyRDBESObject <- createRDBESDataObject(input = NULL)
 
 createRDBESDataObject <- function(input = NULL,

diff --git a/R/createTableOfRDBESIds.r b/R/createTableOfRDBESIds.r
@@ -1,3 +1,4 @@
+
 #' Create a table of RDBES Ids
 #'
 #' examples for now see
@@ -18,51 +19,121 @@
 #' myTableOfIds<- createTableOfRDBESIds(myH1RawObject)
 #' }
 
-createTableOfRDBESIds<-function(x, addSAseqNums=TRUE)
-{
+createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){
 
-# note: needs developments for different lower hierarchies
+  # Avoid R CMD check notes for data.table's NSE column references
+  SAlowHierarchy <- SAid <- NULL
 
-# x is RDBESobj
-# hierarchy is hierarchy (integer)
-# outputs a table with ids for matching
+  # note: needs developments for different lower hierarchies
 
+  # x is x
+  # hierarchy is hierarchy (integer)
+  # outputs a table with ids for matching
 
-CStableNames<- getTablesInRDBESHierarchy(hierarchy = x$DE$DEhierarchy[1],
-                                      includeOptTables = FALSE,
-                                      includeLowHierTables = TRUE,
-                                      includeTablesNotInSampHier = FALSE)
+  # data.table is listed in Imports and loaded via NAMESPACE; avoid require/library in package code
 
-for (i in 1:(length(CStableNames)-1))
-{
-id_1<-paste0(CStableNames[i],"id")
-id_2<-paste0(CStableNames[i+1],"id")
-if (i==1) df_1<-data.frame(x[[CStableNames[i]]][,list(get(id_1))]); colnames(df_1)<-id_1
-if((CStableNames[i+1] == "SA" & addSAseqNums == TRUE) | CStableNames[i+1] %in% c("BV")){
-							if(CStableNames[i+1]=="SA") {df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2), get("SAseqNum"), get("SAparSequNum"))]); colnames(df_2)<-c(id_1,id_2,"SAseqNum","SAparSequNum")}
-							if(CStableNames[i+1]=="BV") {df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2), get("BVfishId"))]); colnames(df_2)<-c(id_1,id_2,"BVfishId")}
-							} else {
-								df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2))]); colnames(df_2)<-c(id_1,id_2)
-								}
+  CStableNames<- getTablesInRDBESHierarchy(hierarchy = x$DE$DEhierarchy[1],
+                                           includeOptTables = FALSE,
+                                           includeLowHierTables = TRUE,
+                                           includeTablesNotInSampHier = FALSE)
 
-if (i==1) out<-merge(df_1,df_2, all.x=T) else out<-merge(out, df_2, all.x=T)
+  for (i in 1:(length(CStableNames)-1)){
+    cat("Processing", CStableNames[i], "table. \n")
+    cat("Merging", CStableNames[i], " with ", CStableNames[i+1], " tables. \n")
+    id_1<-paste0(CStableNames[i],"id")
+    id_2<-paste0(CStableNames[i+1],"id")
 
-#colnames(out)<-c(id_1,id_2)
+    if(i==1){
+      cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, " from ", CStableNames[i+1], "table. \n")
+      df_1<-data.frame(x[[CStableNames[i]]][,list(get(id_1))]); colnames(df_1)<-id_1
+    }
 
-}
-# reorders
-if(addSAseqNums==TRUE){
-	out<-out[,c(paste0(CStableNames,"id"),"BVfishId","SAseqNum","SAparSequNum")]
-	} else {
-		out<-out[,c(paste0(CStableNames,"id"),"BVfishId")]
+  if(((CStableNames[i+1] == "SA" && addSAseqNums == TRUE) || CStableNames[i+1] %in% c("BV"))){
 
+      if(CStableNames[i+1]=="SA"){
 
-		}
-out
-}
+        cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, "SAseqNum, and SAparSequNum from ", CStableNames[i+1], "table. \n")
+        df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2), get("SAseqNum"), get("SAparSequNum"))]); colnames(df_2)<-c(id_1,id_2,"SAseqNum","SAparSequNum")
+
+      }
+
+      if(CStableNames[i+1]=="BV"){
+
+        cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, "and BVfishId from ", CStableNames[i+1], "table. \n")
+        df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2), get("BVfishId"))]); colnames(df_2)<-c(id_1,id_2,"BVfishId")
+        # We also need a reference reporting the SAid, for when the lower hierarchy is C, see below.
+        df_2C<-data.frame(x[[CStableNames[i+1]]][,list(get("SAid"), get(id_1), get(id_2),get("BVfishId"))]); colnames(df_2C)<-c("SAid", id_1,id_2,"BVfishId")
+
+      }
+
+    }else{
+
+      cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, " from ", CStableNames[i+1], "table. \n")
+      df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2))]); colnames(df_2)<-c(id_1,id_2)
+
+    }
+
+    if (i==1){
+
+      out<-merge(df_1,df_2, all.x=T)
+
+    }else{
+
+      if(CStableNames[i+1]=="BV"){
+
+        # The lower hierarchy (A:D) implies that FM is used or not.
+        # Due to this, we need to do a conditional merging by either FMid (where present) or SAid.
+        outTmp = merge(out, x$SA[,c("SAid","SAlowHierarchy")])
+
+        # Convert to data.table if not already
+        data.table::setDT(outTmp)
+        data.table::setDT(out)
+        data.table::setDT(df_2)
+        data.table::setDT(df_2C)
 
-# e.g.,
- ## default adds "SAseqNum","SAparSequNum"
- #head(createTableOfRDBESIds(x = RDBESprepObj))
- ## if addSAseqNums is set to FALSE, "SAseqNum" and "SAparSequNum" are not added to output
- # head(createTableOfRDBESIds(x = RDBESprepObj, addSAseqNums=FALSE))
+        # Filter and get SAid groups
+        keepA <- outTmp[SAlowHierarchy == "A", SAid]
+        keepB <- outTmp[SAlowHierarchy == "B", SAid]
+        keepC <- outTmp[SAlowHierarchy == "C", SAid]
+        keepD <- outTmp[SAlowHierarchy == "D", SAid]
+
+        # Split 'out' accordingly
+        toMergeA <- out[SAid %in% keepA]
+        toMergeB <- out[SAid %in% keepB]
+        toMergeC <- out[SAid %in% keepC]
+        toMergeD <- out[SAid %in% keepD]
+
+        # Conditional merges
+        mergedA <- df_2[toMergeA, on = "FMid"]              # left join equivalent (all.x = TRUE)
+        mergedB <- df_2[toMergeB, on = "FMid"]              # same for group B
+        mergedC <- df_2C[toMergeC, on = "SAid", nomatch = 0][, .SD, .SDcols = c(names(toMergeC), "BVid","BVfishId")]
+        mergedD <- toMergeD                                 # unchanged group D
+
+        # Combine back
+        out <- data.table::rbindlist(list(mergedA, mergedB, mergedC, mergedD), use.names = TRUE, fill = TRUE)
+
+      }else{
+
+        out<-merge(out, df_2, all.x=T)
+
+      }
+
+    }
+  }
+
+  out <- as.data.frame(out)
+
+    # reorders
+  if(addSAseqNums==TRUE){
+
+    out<-out[,c(paste0(CStableNames,"id"),"BVfishId","SAseqNum","SAparSequNum")]
+
+  } else {
+
+    out<-out[,c(paste0(CStableNames,"id"),"BVfishId")]
+
+  }
+
+  out
+
+}
diff --git a/R/filterRDBESDataObject.R b/R/filterRDBESDataObject.R
@@ -39,6 +39,17 @@
 #'   fieldsToFilter = myFields,
 #'   valuesToFilter = myValues
 #' )
+#'
+#' # Inverse filtering (exclude certain values)
+#' # Example: keep all DE rows except those with DEid in `excludedValues`
+#' # Compute the complement of the excluded set using setdiff
+#' allValues <- unique(myH1RawObject$DE$DEid)
+#' excludedValues <- c(5351)
+#' myInverseFiltered <- filterRDBESDataObject(
+#'   myH1RawObject,
+#'   fieldsToFilter = "DEid",
+#'   valuesToFilter = setdiff(allValues, excludedValues)
+#' )
 #' }
 filterRDBESDataObject <- function(RDBESDataObjectToFilter,
                                  fieldsToFilter,

diff --git a/R/getLowerTableSubsets.R b/R/getLowerTableSubsets.R
@@ -47,7 +47,7 @@ getLowerTableSubsets <- function(subsets, tblName, rdbesTables, combineStrata =
   }
 
   # Bind the data together and filter based on intersected IDs
-  res <- data.table::rbindlist(res)
+  res <- data.table::rbindlist(res, fill = TRUE)
   res <- res[get(paste0(tblName, "id")) %in% ids]
   res <- unique(res, by = paste0(tblName, "id"))
 

diff --git a/R/importRDBESDataZIP.R b/R/importRDBESDataZIP.R
@@ -73,7 +73,7 @@ importRDBESDataZIP <- function(filenames,
     hdirs <- dirs[grepl("H[0-9]+", dirs)]
     if(length(hdirs) > 1) {
       valid_hierarchies <- as.numeric(gsub("H", "", hdirs))
-      example  <- paste0("Hierachy = ", valid_hierarchies[1])
+      example  <- paste0("Hierarchy = ", valid_hierarchies[1])
       if(is.null(Hierarchy)) {
         stop("The zip file contains multiple hierarchies.\n",
              "To import a selected hierarchy, please provide the hierarchy ",