Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: BGmisc
Title: An R Package for Extended Behavior Genetics Analysis
Version: 1.6.0
Version: 1.6.0.1
Authors@R: c(
person("S. Mason", "Garrison", , "garrissm@wfu.edu", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-4804-6003")),
Expand Down
2 changes: 0 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,10 @@ export(computeParentAdjacency)
export(createGenDataFrame)
export(determineSex)
export(dropLink)
export(evenInsert)
export(fitComponentModel)
export(getWikiTreeSummary)
export(identifyComponentModel)
export(inferRelatedness)
export(insertEven)
export(makeInbreeding)
export(makeTwins)
export(ped2add)
Expand Down
5 changes: 4 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# BGmisc NEWS
# Development version: 1.6.0.9000

# BGmisc 1.6
# BGmisc 1.6.0.1
* Add helper functions for checkParents etc
* fixed incorrect direction so that parents are pointing to children in the graphs
* Optimize simulatePedigree and helpers for speed and memory usage
* Major gains in speed for deeper pedigrees
* Added more tests for simulatePedigree
Expand Down
21 changes: 7 additions & 14 deletions R/checkIDs.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ checkIDs <- function(ped, verbose = FALSE, repair = FALSE) {
cat("Validation Results:\n")
message(validation_results)
}
if (repair) {
if (repair == TRUE) {
if (verbose == TRUE) {
cat("Attempting to repair:\n")
cat("Step 1: Attempting to repair non-unique IDs...\n")
Expand All @@ -56,19 +56,12 @@ checkIDs <- function(ped, verbose = FALSE, repair = FALSE) {
# if there are non-unique IDs
if (length(validation_results$non_unique_ids) > 0) {
# loop through each non-unique ID
for (id in validation_results$non_unique_ids) {
rows_with_id <- repaired_ped[repaired_ped$ID == id, ]
# If all rows with the same ID are truly identical, keep only the first occurrence
if (nrow(unique(rows_with_id)) == 1) {
# Mark as removed in the changes list
changes[[paste0("ID", id)]] <- "Removed duplicates"
# Keep only the first row, remove the rest
repaired_ped <- repaired_ped[-which(repaired_ped$ID == id)[-1], ] # Remove all but the first occurrence
} else {
# Mark as kept in the changes list
changes[[paste0("ID", id)]] <- "Kept duplicates"
}
}

processed <- dropIdenticalDuplicateIDs(ped = repaired_ped,
ids = validation_results$non_unique_ids,
changes = changes)
repaired_ped <- processed$ped
changes <- processed$changes
}
if (verbose == TRUE) {
cat("Step 2: No repair for parents who are their children at this time\n")
Expand Down
25 changes: 13 additions & 12 deletions R/checkParents.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE,
missing_mothers <- ped$ID[which(!is.na(ped$dadID) & is.na(ped$momID))]

# Update the validation_results list
if (length(missing_fathers) > 0) {
validation_results$missing_fathers <- missing_fathers
}
if (length(missing_mothers) > 0) {
validation_results$missing_mothers <- missing_mothers
}

validation_results$single_parents <- length(validation_results) > 0
validation_results <- addIfAny(validation_results, "missing_fathers", missing_fathers)
validation_results <- addIfAny(validation_results, "missing_mothers", missing_mothers)

validation_results$single_parents <- (length(missing_fathers) + length(missing_mothers)) > 0





if (verbose && validation_results$single_parents) cat("Missing single parents found.\n")
Expand Down Expand Up @@ -269,11 +269,12 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE,
}

# restore orginal names that the user orginally provided
names(ped)[names(ped) == "ID"] <- personID
names(ped)[names(ped) == "momID"] <- momID
names(ped)[names(ped) == "dadID"] <- dadID
names(ped)[names(ped) == "famID"] <- famID
return(ped)
ped <- restorePedColnames(ped,
famID = famID,
personID = personID,
momID = momID,
dadID = dadID)

}
#' Repair Parent IDs
#'
Expand Down
47 changes: 47 additions & 0 deletions R/helpChecks.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#' Drop Identical Duplicate IDs from Pedigree Data Frame
#'
#' #' This function identifies and removes duplicate entries in a pedigree data frame
#' based on a list of specified IDs. If multiple rows share the same ID and are
#' identical, only one instance is retained. The function returns the modified pedigree
#' data frame along with a log of changes made.
#' @param ped A data frame representing the pedigree.
#' @param ids A vector of IDs to check for duplicates in the pedigree.
#' @param changes An optional list to log changes made during the process.
dropIdenticalDuplicateIDs <- function(ped, ids, changes = NULL
) {
if (!is.data.frame(ped)) {
stop("ped must be a data frame")
}
if (is.null(changes)) {
changes <- list()
} else if (!is.list(changes)) {
stop("changes must be a list or NULL")
}

out <- ped

if (!is.null(ids) && length(ids) > 0) {
for (id in ids) {
rows_with_id <- out[out$ID == id, , drop = FALSE]
if (nrow(unique(rows_with_id)) == 1) {
changes[[paste0("ID", id)]] <- "Removed duplicates"
out <- out[-which(out$ID == id)[-1], , drop = FALSE]
} else {
changes[[paste0("ID", id)]] <- "Kept duplicates"
}
}
}

list(ped = out, changes = changes)
}

#' Helper function to conditionally add elements to a list
#' @param validation A list to which elements may be added.
#' @param name A character string representing the name of the element to add.
#' @param value The value to add to the list if it is not NULL or empty
#' @return The updated list with the new element added if applicable.
#' @keywords internal
addIfAny <- function(validation, name, value) {
if (!is.null(value) && length(value) > 0) validation[[name]] <- value
validation
}
2 changes: 0 additions & 2 deletions R/insertEven.R → R/helpInsertEven.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
#' The function takes two vectors, m and n, and inserts the elements of m evenly into n.
#' If the length of m is greater than the length of n, the vectors are swapped, and the insertion proceeds.
#' The resulting vector is a combination of m and n, with the elements of m evenly distributed within n.
#' @export
#' @seealso \code{\link{SimPed}} for the main function that uses this supporting function.

insertEven <- function(m, n, verbose = FALSE) {
Expand Down Expand Up @@ -55,5 +54,4 @@ insertEven <- function(m, n, verbose = FALSE) {
}

#' @rdname insertEven
#' @export
evenInsert <- insertEven
75 changes: 75 additions & 0 deletions R/cleanPedigree.R → R/helpNames.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,81 @@ standardizeColnames <- function(df, verbose = FALSE) {

return(df)
}
#' Restore Original Column Names in a Pedigree Dataframe
#'
#' This function restores the original column names of a pedigree dataframe
#' based on user-specified names. It is useful for reverting standardized column
#' names back to their original names after processing.
#' @param ped A pedigree dataframe with standardized column names.
#' @param famID The original name for the family ID column. Default is "fam
#' ID".
#' @param personID The original name for the person ID column. Default is "ID".
#' @param momID The original name for the mother ID column. Default is "momID".
#' @param dadID The original name for the father ID column. Default is "dadID".
#' @param gen The original name for the generation column. Default is "gen".
#' @param patID The original name for the paternal ID column. Default is "patID".
#' @param matID The original name for the maternal ID column. Default is "matID".
#' @param spID The original name for the spouse ID column. Default is "spID".
#' @param twinID The original name for the twin ID column. Default is "twinID".
#' @param zygosity The original name for the zygosity column. Default is "zygosity".
#' @param sex The original name for the sex column. Default is "sex".
#' @param verbose A logical indicating whether to print progress messages.
#' @return A pedigree dataframe with restored original column names.
restorePedColnames <- function(ped,
famID = "famID",
personID = "ID",
momID = "momID",
dadID = "dadID",
gen = "gen",
patID = "patID",
matID = "matID",
spID = "spID",
twinID = "twinID",
zygosity = "zygosity",
sex = "sex",
verbose = FALSE) {
if (verbose == TRUE) {
message("Restoring original column names...")
}
if (!inherits(ped, "data.frame")) {
stop("ped should be a data.frame or inherit to a data.frame")
}
if (!is.null(personID) && !is.null(ped$ID)) {
names(ped)[names(ped) == "ID"] <- personID
}
if (!is.null(momID) && !is.null(ped$momID)) {
names(ped)[names(ped) == "momID"] <- momID
}
if (!is.null(dadID) && !is.null(ped$dadID)) {
names(ped)[names(ped) == "dadID"] <- dadID
}

if (!is.null(famID) && !is.null(ped$famID)) {
names(ped)[names(ped) == "famID"] <- famID
}
if (!is.null(gen) && !is.null(ped$gen)) {
names(ped)[names(ped) == "gen"] <- gen
}
if (!is.null(patID) && !is.null(ped$patID)) {
names(ped)[names(ped) == "patID"] <- patID
}
if (!is.null(matID) && !is.null(ped$matID)) {
names(ped)[names(ped) == "matID"] <- matID
}
if (!is.null(spID) && !is.null(ped$spID)) {
names(ped)[names(ped) == "spID"] <- spID
}
if (!is.null(twinID) && !is.null(ped$twinID)) {
names(ped)[names(ped) == "twinID"] <- twinID
}
if (!is.null(zygosity) && !is.null(ped$zygosity)) {
names(ped)[names(ped) == "zygosity"] <- zygosity
}
if (!is.null(sex) && !is.null(ped$sex)) {
names(ped)[names(ped) == "sex"] <- sex
}
ped
}


# Repair Pedigree
Expand Down
17 changes: 9 additions & 8 deletions R/segmentPedigree.R
Original file line number Diff line number Diff line change
Expand Up @@ -139,12 +139,13 @@ ped2graph <- function(ped,
)
edges <- rbind(
as.matrix(data.frame(
personID = as.character(ped[[personID]]),
momID = as.character(ped[[momID]])
# need to be parent to child for igraph
momID = as.character(ped[[momID]]),
personID = as.character(ped[[personID]])
)),
as.matrix(data.frame(
personID = as.character(ped[[personID]]),
dadID = as.character(ped[[dadID]])
dadID = as.character(ped[[dadID]]),
personID = as.character(ped[[personID]])
))
)
} else if (adjacent == "mothers") {
Expand All @@ -154,8 +155,8 @@ ped2graph <- function(ped,
)
)
edges <- as.matrix(data.frame(
personID = as.character(ped[[personID]]),
momID = as.character(ped[[momID]])
momID = as.character(ped[[momID]]),
personID = as.character(ped[[personID]])
))
} else if (adjacent == "fathers") {
nodes <- unique(
Expand All @@ -164,8 +165,8 @@ ped2graph <- function(ped,
)
)
edges <- as.matrix(data.frame(
personID = as.character(ped[[personID]]),
dadID = as.character(ped[[dadID]])
dadID = as.character(ped[[dadID]]),
personID = as.character(ped[[personID]])
))
}
edges <- edges[stats::complete.cases(edges), ]
Expand Down
22 changes: 22 additions & 0 deletions man/addIfAny.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions man/dropIdenticalDuplicateIDs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/insertEven.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading