diff --git a/DESCRIPTION b/DESCRIPTION index 69a4f50f..2d27bf2f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: BGmisc Title: An R Package for Extended Behavior Genetics Analysis -Version: 1.6.0 +Version: 1.6.0.1 Authors@R: c( person("S. Mason", "Garrison", , "garrissm@wfu.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-4804-6003")), diff --git a/NAMESPACE b/NAMESPACE index babeb73d..2b4a4878 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,12 +17,10 @@ export(computeParentAdjacency) export(createGenDataFrame) export(determineSex) export(dropLink) -export(evenInsert) export(fitComponentModel) export(getWikiTreeSummary) export(identifyComponentModel) export(inferRelatedness) -export(insertEven) export(makeInbreeding) export(makeTwins) export(ped2add) diff --git a/NEWS.md b/NEWS.md index ed537985..92292f4a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,9 @@ # BGmisc NEWS +# Development version: 1.6.0.9000 -# BGmisc 1.6 +# BGmisc 1.6.0.1 +* Add helper functions for checkParents etc +* fixed incorrect direction so that parents are pointing to children in the graphs * Optimize simulatePedigree and helpers for speed and memory usage * Major gains in speed for deeper pedigrees * Added more tests for simulatePedigree diff --git a/R/checkIDs.R b/R/checkIDs.R index 3b518388..05f104c1 100644 --- a/R/checkIDs.R +++ b/R/checkIDs.R @@ -41,7 +41,7 @@ checkIDs <- function(ped, verbose = FALSE, repair = FALSE) { cat("Validation Results:\n") message(validation_results) } - if (repair) { + if (repair == TRUE) { if (verbose == TRUE) { cat("Attempting to repair:\n") cat("Step 1: Attempting to repair non-unique IDs...\n") @@ -56,19 +56,12 @@ checkIDs <- function(ped, verbose = FALSE, repair = FALSE) { # if there are non-unique IDs if (length(validation_results$non_unique_ids) > 0) { # loop through each non-unique ID - for (id in validation_results$non_unique_ids) { - rows_with_id <- repaired_ped[repaired_ped$ID == id, ] - # If all rows with the same ID are truly identical, keep only the first occurrence - if (nrow(unique(rows_with_id)) == 1) { - # Mark as removed in the changes list - changes[[paste0("ID", id)]] <- "Removed duplicates" - # Keep only the first row, remove the rest - repaired_ped <- repaired_ped[-which(repaired_ped$ID == id)[-1], ] # Remove all but the first occurrence - } else { - # Mark as kept in the changes list - changes[[paste0("ID", id)]] <- "Kept duplicates" - } - } + + processed <- dropIdenticalDuplicateIDs(ped = repaired_ped, + ids = validation_results$non_unique_ids, + changes = changes) + repaired_ped <- processed$ped + changes <- processed$changes } if (verbose == TRUE) { cat("Step 2: No repair for parents who are their children at this time\n") diff --git a/R/checkParents.R b/R/checkParents.R index 3a2216d6..2abe3e24 100644 --- a/R/checkParents.R +++ b/R/checkParents.R @@ -52,14 +52,14 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, missing_mothers <- ped$ID[which(!is.na(ped$dadID) & is.na(ped$momID))] # Update the validation_results list - if (length(missing_fathers) > 0) { - validation_results$missing_fathers <- missing_fathers - } - if (length(missing_mothers) > 0) { - validation_results$missing_mothers <- missing_mothers - } - validation_results$single_parents <- length(validation_results) > 0 + validation_results <- addIfAny(validation_results, "missing_fathers", missing_fathers) + validation_results <- addIfAny(validation_results, "missing_mothers", missing_mothers) + + validation_results$single_parents <- (length(missing_fathers) + length(missing_mothers)) > 0 + + + if (verbose && validation_results$single_parents) cat("Missing single parents found.\n") @@ -269,11 +269,12 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, } # restore orginal names that the user orginally provided - names(ped)[names(ped) == "ID"] <- personID - names(ped)[names(ped) == "momID"] <- momID - names(ped)[names(ped) == "dadID"] <- dadID - names(ped)[names(ped) == "famID"] <- famID - return(ped) + ped <- restorePedColnames(ped, + famID = famID, + personID = personID, + momID = momID, + dadID = dadID) + } #' Repair Parent IDs #' diff --git a/R/helpChecks.R b/R/helpChecks.R new file mode 100644 index 00000000..eb233bb3 --- /dev/null +++ b/R/helpChecks.R @@ -0,0 +1,47 @@ +#' Drop Identical Duplicate IDs from Pedigree Data Frame +#' +#' #' This function identifies and removes duplicate entries in a pedigree data frame +#' based on a list of specified IDs. If multiple rows share the same ID and are +#' identical, only one instance is retained. The function returns the modified pedigree +#' data frame along with a log of changes made. +#' @param ped A data frame representing the pedigree. +#' @param ids A vector of IDs to check for duplicates in the pedigree. +#' @param changes An optional list to log changes made during the process. +dropIdenticalDuplicateIDs <- function(ped, ids, changes = NULL +) { + if (!is.data.frame(ped)) { + stop("ped must be a data frame") + } + if (is.null(changes)) { + changes <- list() + } else if (!is.list(changes)) { + stop("changes must be a list or NULL") + } + + out <- ped + + if (!is.null(ids) && length(ids) > 0) { + for (id in ids) { + rows_with_id <- out[out$ID == id, , drop = FALSE] + if (nrow(unique(rows_with_id)) == 1) { + changes[[paste0("ID", id)]] <- "Removed duplicates" + out <- out[-which(out$ID == id)[-1], , drop = FALSE] + } else { + changes[[paste0("ID", id)]] <- "Kept duplicates" + } + } + } + + list(ped = out, changes = changes) +} + +#' Helper function to conditionally add elements to a list +#' @param validation A list to which elements may be added. +#' @param name A character string representing the name of the element to add. +#' @param value The value to add to the list if it is not NULL or empty +#' @return The updated list with the new element added if applicable. +#' @keywords internal +addIfAny <- function(validation, name, value) { + if (!is.null(value) && length(value) > 0) validation[[name]] <- value + validation +} diff --git a/R/insertEven.R b/R/helpInsertEven.R similarity index 98% rename from R/insertEven.R rename to R/helpInsertEven.R index 3c35b915..284e8445 100644 --- a/R/insertEven.R +++ b/R/helpInsertEven.R @@ -9,7 +9,6 @@ #' The function takes two vectors, m and n, and inserts the elements of m evenly into n. #' If the length of m is greater than the length of n, the vectors are swapped, and the insertion proceeds. #' The resulting vector is a combination of m and n, with the elements of m evenly distributed within n. -#' @export #' @seealso \code{\link{SimPed}} for the main function that uses this supporting function. insertEven <- function(m, n, verbose = FALSE) { @@ -55,5 +54,4 @@ insertEven <- function(m, n, verbose = FALSE) { } #' @rdname insertEven -#' @export evenInsert <- insertEven diff --git a/R/cleanPedigree.R b/R/helpNames.R similarity index 67% rename from R/cleanPedigree.R rename to R/helpNames.R index 1605bc60..71b0cb69 100644 --- a/R/cleanPedigree.R +++ b/R/helpNames.R @@ -41,6 +41,81 @@ standardizeColnames <- function(df, verbose = FALSE) { return(df) } +#' Restore Original Column Names in a Pedigree Dataframe +#' +#' This function restores the original column names of a pedigree dataframe +#' based on user-specified names. It is useful for reverting standardized column +#' names back to their original names after processing. +#' @param ped A pedigree dataframe with standardized column names. +#' @param famID The original name for the family ID column. Default is "fam +#' ID". +#' @param personID The original name for the person ID column. Default is "ID". +#' @param momID The original name for the mother ID column. Default is "momID". +#' @param dadID The original name for the father ID column. Default is "dadID". +#' @param gen The original name for the generation column. Default is "gen". +#' @param patID The original name for the paternal ID column. Default is "patID". +#' @param matID The original name for the maternal ID column. Default is "matID". +#' @param spID The original name for the spouse ID column. Default is "spID". +#' @param twinID The original name for the twin ID column. Default is "twinID". +#' @param zygosity The original name for the zygosity column. Default is "zygosity". +#' @param sex The original name for the sex column. Default is "sex". +#' @param verbose A logical indicating whether to print progress messages. +#' @return A pedigree dataframe with restored original column names. +restorePedColnames <- function(ped, + famID = "famID", + personID = "ID", + momID = "momID", + dadID = "dadID", + gen = "gen", + patID = "patID", + matID = "matID", + spID = "spID", + twinID = "twinID", + zygosity = "zygosity", + sex = "sex", + verbose = FALSE) { + if (verbose == TRUE) { + message("Restoring original column names...") + } + if (!inherits(ped, "data.frame")) { + stop("ped should be a data.frame or inherit to a data.frame") + } + if (!is.null(personID) && !is.null(ped$ID)) { + names(ped)[names(ped) == "ID"] <- personID + } + if (!is.null(momID) && !is.null(ped$momID)) { + names(ped)[names(ped) == "momID"] <- momID + } + if (!is.null(dadID) && !is.null(ped$dadID)) { + names(ped)[names(ped) == "dadID"] <- dadID + } + + if (!is.null(famID) && !is.null(ped$famID)) { + names(ped)[names(ped) == "famID"] <- famID + } + if (!is.null(gen) && !is.null(ped$gen)) { + names(ped)[names(ped) == "gen"] <- gen + } + if (!is.null(patID) && !is.null(ped$patID)) { + names(ped)[names(ped) == "patID"] <- patID + } + if (!is.null(matID) && !is.null(ped$matID)) { + names(ped)[names(ped) == "matID"] <- matID + } + if (!is.null(spID) && !is.null(ped$spID)) { + names(ped)[names(ped) == "spID"] <- spID + } + if (!is.null(twinID) && !is.null(ped$twinID)) { + names(ped)[names(ped) == "twinID"] <- twinID + } + if (!is.null(zygosity) && !is.null(ped$zygosity)) { + names(ped)[names(ped) == "zygosity"] <- zygosity + } + if (!is.null(sex) && !is.null(ped$sex)) { + names(ped)[names(ped) == "sex"] <- sex + } + ped +} # Repair Pedigree diff --git a/R/segmentPedigree.R b/R/segmentPedigree.R index 2fbf3e44..3179dbfd 100644 --- a/R/segmentPedigree.R +++ b/R/segmentPedigree.R @@ -139,12 +139,13 @@ ped2graph <- function(ped, ) edges <- rbind( as.matrix(data.frame( - personID = as.character(ped[[personID]]), - momID = as.character(ped[[momID]]) + # need to be parent to child for igraph + momID = as.character(ped[[momID]]), + personID = as.character(ped[[personID]]) )), as.matrix(data.frame( - personID = as.character(ped[[personID]]), - dadID = as.character(ped[[dadID]]) + dadID = as.character(ped[[dadID]]), + personID = as.character(ped[[personID]]) )) ) } else if (adjacent == "mothers") { @@ -154,8 +155,8 @@ ped2graph <- function(ped, ) ) edges <- as.matrix(data.frame( - personID = as.character(ped[[personID]]), - momID = as.character(ped[[momID]]) + momID = as.character(ped[[momID]]), + personID = as.character(ped[[personID]]) )) } else if (adjacent == "fathers") { nodes <- unique( @@ -164,8 +165,8 @@ ped2graph <- function(ped, ) ) edges <- as.matrix(data.frame( - personID = as.character(ped[[personID]]), - dadID = as.character(ped[[dadID]]) + dadID = as.character(ped[[dadID]]), + personID = as.character(ped[[personID]]) )) } edges <- edges[stats::complete.cases(edges), ] diff --git a/man/addIfAny.Rd b/man/addIfAny.Rd new file mode 100644 index 00000000..6d57a15d --- /dev/null +++ b/man/addIfAny.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpChecks.R +\name{addIfAny} +\alias{addIfAny} +\title{Helper function to conditionally add elements to a list} +\usage{ +addIfAny(validation, name, value) +} +\arguments{ +\item{validation}{A list to which elements may be added.} + +\item{name}{A character string representing the name of the element to add.} + +\item{value}{The value to add to the list if it is not NULL or empty} +} +\value{ +The updated list with the new element added if applicable. +} +\description{ +Helper function to conditionally add elements to a list +} +\keyword{internal} diff --git a/man/dropIdenticalDuplicateIDs.Rd b/man/dropIdenticalDuplicateIDs.Rd new file mode 100644 index 00000000..b084fe64 --- /dev/null +++ b/man/dropIdenticalDuplicateIDs.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpChecks.R +\name{dropIdenticalDuplicateIDs} +\alias{dropIdenticalDuplicateIDs} +\title{Drop Identical Duplicate IDs from Pedigree Data Frame} +\usage{ +dropIdenticalDuplicateIDs(ped, ids, changes = NULL) +} +\arguments{ +\item{ped}{A data frame representing the pedigree.} + +\item{ids}{A vector of IDs to check for duplicates in the pedigree.} + +\item{changes}{An optional list to log changes made during the process.} +} +\description{ +#' This function identifies and removes duplicate entries in a pedigree data frame +based on a list of specified IDs. If multiple rows share the same ID and are +identical, only one instance is retained. The function returns the modified pedigree +data frame along with a log of changes made. +} diff --git a/man/insertEven.Rd b/man/insertEven.Rd index 7fc08138..324e910e 100644 --- a/man/insertEven.Rd +++ b/man/insertEven.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/insertEven.R +% Please edit documentation in R/helpInsertEven.R \name{insertEven} \alias{insertEven} \alias{evenInsert} diff --git a/man/restorePedColnames.Rd b/man/restorePedColnames.Rd new file mode 100644 index 00000000..352f9c2a --- /dev/null +++ b/man/restorePedColnames.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpNames.R +\name{restorePedColnames} +\alias{restorePedColnames} +\title{Restore Original Column Names in a Pedigree Dataframe} +\usage{ +restorePedColnames( + ped, + famID = "famID", + personID = "ID", + momID = "momID", + dadID = "dadID", + gen = "gen", + patID = "patID", + matID = "matID", + spID = "spID", + twinID = "twinID", + zygosity = "zygosity", + sex = "sex", + verbose = FALSE +) +} +\arguments{ +\item{ped}{A pedigree dataframe with standardized column names.} + +\item{famID}{The original name for the family ID column. Default is "fam +ID".} + +\item{personID}{The original name for the person ID column. Default is "ID".} + +\item{momID}{The original name for the mother ID column. Default is "momID".} + +\item{dadID}{The original name for the father ID column. Default is "dadID".} + +\item{gen}{The original name for the generation column. Default is "gen".} + +\item{patID}{The original name for the paternal ID column. Default is "patID".} + +\item{matID}{The original name for the maternal ID column. Default is "matID".} + +\item{spID}{The original name for the spouse ID column. Default is "spID".} + +\item{twinID}{The original name for the twin ID column. Default is "twinID".} + +\item{zygosity}{The original name for the zygosity column. Default is "zygosity".} + +\item{sex}{The original name for the sex column. Default is "sex".} + +\item{verbose}{A logical indicating whether to print progress messages.} +} +\value{ +A pedigree dataframe with restored original column names. +} +\description{ +This function restores the original column names of a pedigree dataframe +based on user-specified names. It is useful for reverting standardized column +names back to their original names after processing. +} diff --git a/man/standardizeColnames.Rd b/man/standardizeColnames.Rd index 96e3c5d0..46b2e1b4 100644 --- a/man/standardizeColnames.Rd +++ b/man/standardizeColnames.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cleanPedigree.R +% Please edit documentation in R/helpNames.R \name{standardizeColnames} \alias{standardizeColnames} \title{Standardize Column Names in a Dataframe (Internal)} diff --git a/tests/testthat/test-segmentPedigree.R b/tests/testthat/test-segmentPedigree.R index bd2ad541..c7224c6d 100644 --- a/tests/testthat/test-segmentPedigree.R +++ b/tests/testthat/test-segmentPedigree.R @@ -46,6 +46,8 @@ test_that("ped2graph produces a graph for hazard data with mothers", { expect_true(inherits(g, "igraph")) }) + + test_that("ped2graph produces a graph for hazard data with fathers", { expect_silent(data(hazard)) g <- ped2graph(hazard, adjacent = "fathers") @@ -57,3 +59,25 @@ test_that("ped2graph produces a graph for inbreeding data", { g <- ped2graph(inbreeding) expect_true(inherits(g, "igraph")) }) + +# are parents pointing to children? +test_that("ped2graph produces correct edges for potter data", { + expect_silent(data(potter)) + potter_df <- potter + potter_df$ID <- potter$personID + g <- ped2graph(potter_df) + expect_true(inherits(g, "igraph")) + vertices_df <- igraph::V(g) + expect_all_true(igraph::as_ids(vertices_df) %in% as.character(potter_df$personID)) + edges_df <- igraph::E(g) + # are all edges from parent to child? + expect_all_true( + igraph::as_ids(edges_df) %in% paste0(potter_df$dadID, "|", potter_df$personID) | + igraph::as_ids(edges_df) %in% paste0(potter_df$momID, "|", potter_df$personID) + ) + # are there any edges from child to parent? + expect_all_false( + igraph::as_ids(edges_df) %in% paste0(potter_df$personID, "|", potter_df$dadID) | + igraph::as_ids(edges_df) %in% paste0(potter_df$personID, "|", potter_df$momID) + ) +})