From a30efec25f89e94ec81e961b2190a3ce9c9ca5f9 Mon Sep 17 00:00:00 2001 From: erosquesada Date: Wed, 15 Oct 2025 14:32:39 +0200 Subject: [PATCH 1/6] With dplyr code --- R/createTableOfRDBESIds.r | 161 +++++++++++++++++++++++++++----------- 1 file changed, 116 insertions(+), 45 deletions(-) diff --git a/R/createTableOfRDBESIds.r b/R/createTableOfRDBESIds.r index 4431d356..5e02fbd7 100644 --- a/R/createTableOfRDBESIds.r +++ b/R/createTableOfRDBESIds.r @@ -1,3 +1,4 @@ + #' Create a table of RDBES Ids #' #' examples for now see @@ -18,51 +19,121 @@ #' myTableOfIds<- createTableOfRDBESIds(myH1RawObject) #' } -createTableOfRDBESIds<-function(x, addSAseqNums=TRUE) -{ - -# note: needs developments for different lower hierarchies - -# x is RDBESobj -# hierarchy is hierarchy (integer) -# outputs a table with ids for matching - - -CStableNames<- getTablesInRDBESHierarchy(hierarchy = x$DE$DEhierarchy[1], - includeOptTables = FALSE, - includeLowHierTables = TRUE, - includeTablesNotInSampHier = FALSE) - -for (i in 1:(length(CStableNames)-1)) -{ -id_1<-paste0(CStableNames[i],"id") -id_2<-paste0(CStableNames[i+1],"id") -if (i==1) df_1<-data.frame(x[[CStableNames[i]]][,list(get(id_1))]); colnames(df_1)<-id_1 -if((CStableNames[i+1] == "SA" & addSAseqNums == TRUE) | CStableNames[i+1] %in% c("BV")){ - if(CStableNames[i+1]=="SA") {df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2), get("SAseqNum"), get("SAparSequNum"))]); colnames(df_2)<-c(id_1,id_2,"SAseqNum","SAparSequNum")} - if(CStableNames[i+1]=="BV") {df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2), get("BVfishId"))]); colnames(df_2)<-c(id_1,id_2,"BVfishId")} - } else { - df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2))]); colnames(df_2)<-c(id_1,id_2) - } - -if (i==1) out<-merge(df_1,df_2, all.x=T) else out<-merge(out, df_2, all.x=T) - -#colnames(out)<-c(id_1,id_2) - -} -# reorders -if(addSAseqNums==TRUE){ - out<-out[,c(paste0(CStableNames,"id"),"BVfishId","SAseqNum","SAparSequNum")] - } else { - out<-out[,c(paste0(CStableNames,"id"),"BVfishId")] - - - } -out +createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ + + # note: needs developments for different lower hierarchies + + # x is RDBESobj + # hierarchy is hierarchy (integer) + # outputs a table with ids for matching + + + CStableNames<- getTablesInRDBESHierarchy(hierarchy = x$DE$DEhierarchy[1], + includeOptTables = FALSE, + includeLowHierTables = TRUE, + includeTablesNotInSampHier = FALSE) + + for (i in 1:(length(CStableNames)-1)){ + cat("Processing", CStableNames[i], "table. \n") + cat("Merging", CStableNames[i], " with ", CStableNames[i+1], " tables. \n") + id_1<-paste0(CStableNames[i],"id") + id_2<-paste0(CStableNames[i+1],"id") + + if(i==1){ + cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, " from ", CStableNames[i+1], "table. \n") + df_1<-data.frame(x[[CStableNames[i]]][,list(get(id_1))]); colnames(df_1)<-id_1 + } + + if((CStableNames[i+1] == "SA" & addSAseqNums == TRUE) | CStableNames[i+1] %in% c("BV")){ + + if(CStableNames[i+1]=="SA"){ + + cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, "SAseqNum, and SAparSequNum from ", CStableNames[i+1], "table. \n") + df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2), get("SAseqNum"), get("SAparSequNum"))]); colnames(df_2)<-c(id_1,id_2,"SAseqNum","SAparSequNum") + + } + + if(CStableNames[i+1]=="BV"){ + + cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, "and BVfishId from ", CStableNames[i+1], "table. \n") + df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2), get("BVfishId"))]); colnames(df_2)<-c(id_1,id_2,"BVfishId") + # We also need a reference reporting the SAid, for when the lower hierarchy is C, see below. + df_2C<-data.frame(rdbesobj[[CStableNames[i+1]]][,list(get("SAid"), get(id_1), get(id_2))]); colnames(df_2C)<-c("SAid", id_1,id_2) + + } + + }else{ + + cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, " from ", CStableNames[i+1], "table. \n") + df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2))]); colnames(df_2)<-c(id_1,id_2) + + } + + if (i==1){ + + out<-merge(df_1,df_2, all.x=T) else out<-merge(out, df_2, all.x=T) + + }else{ + + if(CStableNames[i+1]=="BV"){ + + # The lower hierarchy (A:D) implies that FM is used or not. + # Due to this, we need to do a conditional merging by either FMid (where present) or SAid. + outTmp = merge(out, rdbesobj$SA[,c("SAid","SAlowHierarchy")]) + + # Convert to data.table if not already + setDT(outTmp) + setDT(out) + setDT(df_2) + setDT(df_2C) + + # Filter and get SAid groups + keepA <- outTmp[SAlowHierarchy == "A", SAid] + keepB <- outTmp[SAlowHierarchy == "B", SAid] + keepC <- outTmp[SAlowHierarchy == "C", SAid] + keepD <- outTmp[SAlowHierarchy == "D", SAid] + + # Split 'out' accordingly + toMergeA <- out[SAid %in% keepA] + toMergeB <- out[SAid %in% keepB] + toMergeC <- out[SAid %in% keepC] + toMergeD <- out[SAid %in% keepD] + + # Conditional merges + mergedA <- df_2[toMergeA, on = "FMid"] # left join equivalent (all.x = TRUE) + mergedB <- df_2[toMergeB, on = "FMid"] # same for group B + mergedC <- df_2C[toMergeC, on = "SAid", nomatch = 0][, .SD, .SDcols = c(names(toMergeC), "BVid")] + mergedD <- toMergeD # unchanged group D + + # Combine back + out <- rbindlist(list(mergedA, mergedB, mergedC, mergedD), use.names = TRUE, fill = TRUE) + + }else{ + + out<-merge(out, df_2, all.x=T) + + } + + } + } + + # reorders + if(addSAseqNums==TRUE){ + + out<-out[,c(paste0(CStableNames,"id"),"BVfishId","SAseqNum","SAparSequNum")] + + } else { + + out<-out[,c(paste0(CStableNames,"id"),"BVfishId")] + + } + + out + } # e.g., - ## default adds "SAseqNum","SAparSequNum" - #head(createTableOfRDBESIds(x = RDBESprepObj)) - ## if addSAseqNums is set to FALSE, "SAseqNum" and "SAparSequNum" are not added to output - # head(createTableOfRDBESIds(x = RDBESprepObj, addSAseqNums=FALSE)) +## default adds "SAseqNum","SAparSequNum" +#head(createTableOfRDBESIds(x = RDBESprepObj)) +## if addSAseqNums is set to FALSE, "SAseqNum" and "SAparSequNum" are not added to output +# head(createTableOfRDBESIds(x = RDBESprepObj, addSAseqNums=FALSE)) From f9069e270c34c49fb9512ec187c748c0c9c2dc21 Mon Sep 17 00:00:00 2001 From: erosquesada Date: Wed, 15 Oct 2025 14:50:33 +0200 Subject: [PATCH 2/6] Fixed with data.table code, to check --- R/createTableOfRDBESIds.r | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/R/createTableOfRDBESIds.r b/R/createTableOfRDBESIds.r index 5e02fbd7..47e2c837 100644 --- a/R/createTableOfRDBESIds.r +++ b/R/createTableOfRDBESIds.r @@ -71,7 +71,7 @@ createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ if (i==1){ - out<-merge(df_1,df_2, all.x=T) else out<-merge(out, df_2, all.x=T) + out<-merge(df_1,df_2, all.x=T) }else{ @@ -109,15 +109,17 @@ createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ out <- rbindlist(list(mergedA, mergedB, mergedC, mergedD), use.names = TRUE, fill = TRUE) }else{ - + out<-merge(out, df_2, all.x=T) } } } - - # reorders + + out <- as.data.frame(out) + + # reorders if(addSAseqNums==TRUE){ out<-out[,c(paste0(CStableNames,"id"),"BVfishId","SAseqNum","SAparSequNum")] @@ -129,11 +131,9 @@ createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ } out - + } # e.g., -## default adds "SAseqNum","SAparSequNum" -#head(createTableOfRDBESIds(x = RDBESprepObj)) -## if addSAseqNums is set to FALSE, "SAseqNum" and "SAparSequNum" are not added to output -# head(createTableOfRDBESIds(x = RDBESprepObj, addSAseqNums=FALSE)) +# check <- createTableOfRDBESIds(rdbesobj, addSAseqNums = T) +# check From c26a9690601718944a0673caaa23e4cf11e0f411 Mon Sep 17 00:00:00 2001 From: erosquesada Date: Wed, 15 Oct 2025 14:51:07 +0200 Subject: [PATCH 3/6] Fixed with data.table code, to check on #237 --- R/createTableOfRDBESIds.r | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/createTableOfRDBESIds.r b/R/createTableOfRDBESIds.r index 47e2c837..2f169e57 100644 --- a/R/createTableOfRDBESIds.r +++ b/R/createTableOfRDBESIds.r @@ -18,7 +18,7 @@ #' #' myTableOfIds<- createTableOfRDBESIds(myH1RawObject) #' } - + createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ # note: needs developments for different lower hierarchies From 8ce31ccddb9a62220762ddcea387ffba15fdd80e Mon Sep 17 00:00:00 2001 From: erosquesada Date: Wed, 15 Oct 2025 15:32:48 +0200 Subject: [PATCH 4/6] replacing with x the object --- R/createTableOfRDBESIds.r | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/createTableOfRDBESIds.r b/R/createTableOfRDBESIds.r index 2f169e57..16c3d644 100644 --- a/R/createTableOfRDBESIds.r +++ b/R/createTableOfRDBESIds.r @@ -23,7 +23,7 @@ createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ # note: needs developments for different lower hierarchies - # x is RDBESobj + # x is x # hierarchy is hierarchy (integer) # outputs a table with ids for matching @@ -58,7 +58,7 @@ createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, "and BVfishId from ", CStableNames[i+1], "table. \n") df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2), get("BVfishId"))]); colnames(df_2)<-c(id_1,id_2,"BVfishId") # We also need a reference reporting the SAid, for when the lower hierarchy is C, see below. - df_2C<-data.frame(rdbesobj[[CStableNames[i+1]]][,list(get("SAid"), get(id_1), get(id_2))]); colnames(df_2C)<-c("SAid", id_1,id_2) + df_2C<-data.frame(x[[CStableNames[i+1]]][,list(get("SAid"), get(id_1), get(id_2))]); colnames(df_2C)<-c("SAid", id_1,id_2) } @@ -79,7 +79,7 @@ createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ # The lower hierarchy (A:D) implies that FM is used or not. # Due to this, we need to do a conditional merging by either FMid (where present) or SAid. - outTmp = merge(out, rdbesobj$SA[,c("SAid","SAlowHierarchy")]) + outTmp = merge(out, x$SA[,c("SAid","SAlowHierarchy")]) # Convert to data.table if not already setDT(outTmp) @@ -135,5 +135,5 @@ createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ } # e.g., -# check <- createTableOfRDBESIds(rdbesobj, addSAseqNums = T) +# check <- createTableOfRDBESIds(x, addSAseqNums = T) # check From 124ea7506be86d95398ece6ff069c186380885d9 Mon Sep 17 00:00:00 2001 From: Kasia-MIR <60775640+Kasia-MIR@users.noreply.github.com> Date: Wed, 15 Oct 2025 15:42:40 +0200 Subject: [PATCH 5/6] required library --- R/createTableOfRDBESIds.r | 84 +++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 43 deletions(-) diff --git a/R/createTableOfRDBESIds.r b/R/createTableOfRDBESIds.r index 16c3d644..a2c42265 100644 --- a/R/createTableOfRDBESIds.r +++ b/R/createTableOfRDBESIds.r @@ -18,102 +18,104 @@ #' #' myTableOfIds<- createTableOfRDBESIds(myH1RawObject) #' } - + createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ - + # note: needs developments for different lower hierarchies - + # x is x # hierarchy is hierarchy (integer) # outputs a table with ids for matching - - + + #libraries + require(data.table) + CStableNames<- getTablesInRDBESHierarchy(hierarchy = x$DE$DEhierarchy[1], includeOptTables = FALSE, includeLowHierTables = TRUE, includeTablesNotInSampHier = FALSE) - + for (i in 1:(length(CStableNames)-1)){ cat("Processing", CStableNames[i], "table. \n") cat("Merging", CStableNames[i], " with ", CStableNames[i+1], " tables. \n") id_1<-paste0(CStableNames[i],"id") id_2<-paste0(CStableNames[i+1],"id") - + if(i==1){ cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, " from ", CStableNames[i+1], "table. \n") df_1<-data.frame(x[[CStableNames[i]]][,list(get(id_1))]); colnames(df_1)<-id_1 } - + if((CStableNames[i+1] == "SA" & addSAseqNums == TRUE) | CStableNames[i+1] %in% c("BV")){ - + if(CStableNames[i+1]=="SA"){ - + cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, "SAseqNum, and SAparSequNum from ", CStableNames[i+1], "table. \n") df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2), get("SAseqNum"), get("SAparSequNum"))]); colnames(df_2)<-c(id_1,id_2,"SAseqNum","SAparSequNum") - + } - + if(CStableNames[i+1]=="BV"){ - + cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, "and BVfishId from ", CStableNames[i+1], "table. \n") df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2), get("BVfishId"))]); colnames(df_2)<-c(id_1,id_2,"BVfishId") - # We also need a reference reporting the SAid, for when the lower hierarchy is C, see below. + # We also need a reference reporting the SAid, for when the lower hierarchy is C, see below. df_2C<-data.frame(x[[CStableNames[i+1]]][,list(get("SAid"), get(id_1), get(id_2))]); colnames(df_2C)<-c("SAid", id_1,id_2) - + } - + }else{ - + cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, " from ", CStableNames[i+1], "table. \n") df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2))]); colnames(df_2)<-c(id_1,id_2) - + } - + if (i==1){ - - out<-merge(df_1,df_2, all.x=T) - + + out<-merge(df_1,df_2, all.x=T) + }else{ - + if(CStableNames[i+1]=="BV"){ - - # The lower hierarchy (A:D) implies that FM is used or not. - # Due to this, we need to do a conditional merging by either FMid (where present) or SAid. + + # The lower hierarchy (A:D) implies that FM is used or not. + # Due to this, we need to do a conditional merging by either FMid (where present) or SAid. outTmp = merge(out, x$SA[,c("SAid","SAlowHierarchy")]) - + # Convert to data.table if not already setDT(outTmp) setDT(out) setDT(df_2) setDT(df_2C) - + # Filter and get SAid groups keepA <- outTmp[SAlowHierarchy == "A", SAid] keepB <- outTmp[SAlowHierarchy == "B", SAid] keepC <- outTmp[SAlowHierarchy == "C", SAid] keepD <- outTmp[SAlowHierarchy == "D", SAid] - + # Split 'out' accordingly toMergeA <- out[SAid %in% keepA] toMergeB <- out[SAid %in% keepB] toMergeC <- out[SAid %in% keepC] toMergeD <- out[SAid %in% keepD] - + # Conditional merges mergedA <- df_2[toMergeA, on = "FMid"] # left join equivalent (all.x = TRUE) mergedB <- df_2[toMergeB, on = "FMid"] # same for group B mergedC <- df_2C[toMergeC, on = "SAid", nomatch = 0][, .SD, .SDcols = c(names(toMergeC), "BVid")] mergedD <- toMergeD # unchanged group D - + # Combine back out <- rbindlist(list(mergedA, mergedB, mergedC, mergedD), use.names = TRUE, fill = TRUE) - + }else{ out<-merge(out, df_2, all.x=T) - + } - + } } @@ -121,19 +123,15 @@ createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ # reorders if(addSAseqNums==TRUE){ - + out<-out[,c(paste0(CStableNames,"id"),"BVfishId","SAseqNum","SAparSequNum")] - + } else { - + out<-out[,c(paste0(CStableNames,"id"),"BVfishId")] - + } - + out } - -# e.g., -# check <- createTableOfRDBESIds(x, addSAseqNums = T) -# check From 7462c96936ceb1be977b9c1efc57b8f0fbea9a20 Mon Sep 17 00:00:00 2001 From: Kasia-MIR <60775640+Kasia-MIR@users.noreply.github.com> Date: Thu, 16 Oct 2025 15:29:32 +0200 Subject: [PATCH 6/6] fix bug with BVfishId passed the test: https://github.com/ices-tools-dev/RDBEScore/pull/246#issuecomment-3406981773 --- R/createTableOfRDBESIds.r | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/createTableOfRDBESIds.r b/R/createTableOfRDBESIds.r index a2c42265..0ba86b1b 100644 --- a/R/createTableOfRDBESIds.r +++ b/R/createTableOfRDBESIds.r @@ -60,7 +60,7 @@ createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ cat("Using", id_1, " from ", CStableNames[i], "table and", id_2, "and BVfishId from ", CStableNames[i+1], "table. \n") df_2<-data.frame(x[[CStableNames[i+1]]][,list(get(id_1), get(id_2), get("BVfishId"))]); colnames(df_2)<-c(id_1,id_2,"BVfishId") # We also need a reference reporting the SAid, for when the lower hierarchy is C, see below. - df_2C<-data.frame(x[[CStableNames[i+1]]][,list(get("SAid"), get(id_1), get(id_2))]); colnames(df_2C)<-c("SAid", id_1,id_2) + df_2C<-data.frame(x[[CStableNames[i+1]]][,list(get("SAid"), get(id_1), get(id_2),get("BVfishId"))]); colnames(df_2C)<-c("SAid", id_1,id_2,"BVfishId") } @@ -104,7 +104,7 @@ createTableOfRDBESIds<-function(x, addSAseqNums=TRUE){ # Conditional merges mergedA <- df_2[toMergeA, on = "FMid"] # left join equivalent (all.x = TRUE) mergedB <- df_2[toMergeB, on = "FMid"] # same for group B - mergedC <- df_2C[toMergeC, on = "SAid", nomatch = 0][, .SD, .SDcols = c(names(toMergeC), "BVid")] + mergedC <- df_2C[toMergeC, on = "SAid", nomatch = 0][, .SD, .SDcols = c(names(toMergeC), "BVid","BVfishId")] mergedD <- toMergeD # unchanged group D # Combine back