Skip to content

Commit 5bc027b

Browse files
Add helper functions for pedigree checks and refactor
Introduces helper functions addIfAny and dropIdenticalDuplicateIDs to streamline pedigree validation and repair logic. Refactors checkIDs and checkParents to use these helpers, improving code clarity and modularity. Enhances restorePedColnames to be more robust and adds documentation for new helpers.
1 parent a7a2096 commit 5bc027b

11 files changed

Lines changed: 208 additions & 41 deletions

NAMESPACE

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,10 @@ export(computeParentAdjacency)
1717
export(createGenDataFrame)
1818
export(determineSex)
1919
export(dropLink)
20-
export(evenInsert)
2120
export(fitComponentModel)
2221
export(getWikiTreeSummary)
2322
export(identifyComponentModel)
2423
export(inferRelatedness)
25-
export(insertEven)
2624
export(makeInbreeding)
2725
export(makeTwins)
2826
export(ped2add)

NEWS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
# BGmisc NEWS
2+
# Development version: 1.6.0.9000
3+
4+
* Add helper functions for checkParents etc
25

36
# BGmisc 1.6
47
* Optimize simulatePedigree and helpers for speed and memory usage

R/checkIDs.R

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -56,19 +56,12 @@ checkIDs <- function(ped, verbose = FALSE, repair = FALSE) {
5656
# if there are non-unique IDs
5757
if (length(validation_results$non_unique_ids) > 0) {
5858
# loop through each non-unique ID
59-
for (id in validation_results$non_unique_ids) {
60-
rows_with_id <- repaired_ped[repaired_ped$ID == id, ]
61-
# If all rows with the same ID are truly identical, keep only the first occurrence
62-
if (nrow(unique(rows_with_id)) == 1) {
63-
# Mark as removed in the changes list
64-
changes[[paste0("ID", id)]] <- "Removed duplicates"
65-
# Keep only the first row, remove the rest
66-
repaired_ped <- repaired_ped[-which(repaired_ped$ID == id)[-1], ] # Remove all but the first occurrence
67-
} else {
68-
# Mark as kept in the changes list
69-
changes[[paste0("ID", id)]] <- "Kept duplicates"
70-
}
71-
}
59+
60+
processed <- dropIdenticalDuplicateIDs(ped = repaired_ped,
61+
ids = validation_results$non_unique_ids,
62+
changes = changes)
63+
repaired_ped <- processed$ped
64+
changes <- processed$changes
7265
}
7366
if (verbose == TRUE) {
7467
cat("Step 2: No repair for parents who are their children at this time\n")

R/checkParents.R

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -52,14 +52,14 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE,
5252
missing_mothers <- ped$ID[which(!is.na(ped$dadID) & is.na(ped$momID))]
5353

5454
# Update the validation_results list
55-
if (length(missing_fathers) > 0) {
56-
validation_results$missing_fathers <- missing_fathers
57-
}
58-
if (length(missing_mothers) > 0) {
59-
validation_results$missing_mothers <- missing_mothers
60-
}
6155

62-
validation_results$single_parents <- length(validation_results) > 0
56+
validation_results <- addIfAny(validation_results, "missing_fathers", missing_fathers)
57+
validation_results <- addIfAny(validation_results, "missing_mothers", missing_mothers)
58+
59+
validation_results$single_parents <- (length(missing_fathers) + length(missing_mothers)) > 0
60+
61+
62+
6363

6464

6565
if (verbose && validation_results$single_parents) cat("Missing single parents found.\n")
@@ -269,11 +269,12 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE,
269269
}
270270

271271
# restore orginal names that the user orginally provided
272-
names(ped)[names(ped) == "ID"] <- personID
273-
names(ped)[names(ped) == "momID"] <- momID
274-
names(ped)[names(ped) == "dadID"] <- dadID
275-
names(ped)[names(ped) == "famID"] <- famID
276-
return(ped)
272+
ped <- restorePedColnames(ped,
273+
famID = famID,
274+
personID = personID,
275+
momID = momID,
276+
dadID = dadID)
277+
277278
}
278279
#' Repair Parent IDs
279280
#'

R/helpChecks.R

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#' Drop Identical Duplicate IDs from Pedigree Data Frame
2+
#'
3+
#' #' This function identifies and removes duplicate entries in a pedigree data frame
4+
#' based on a list of specified IDs. If multiple rows share the same ID and are
5+
#' identical, only one instance is retained. The function returns the modified pedigree
6+
#' data frame along with a log of changes made.
7+
#' @param ped A data frame representing the pedigree.
8+
#' @param ids A vector of IDs to check for duplicates in the pedigree.
9+
#' @param changes An optional list to log changes made during the process.
10+
dropIdenticalDuplicateIDs <- function(ped, ids, changes = NULL
11+
) {
12+
if (!is.data.frame(ped)) {
13+
stop("ped must be a data frame")
14+
}
15+
if (is.null(changes)) {
16+
changes <- list()
17+
} else if (!is.list(changes)) {
18+
stop("changes must be a list or NULL")
19+
}
20+
21+
out <- ped
22+
23+
if (!is.null(ids) && length(ids) > 0) {
24+
for (id in ids) {
25+
rows_with_id <- out[out$ID == id, , drop = FALSE]
26+
if (nrow(unique(rows_with_id)) == 1) {
27+
changes[[paste0("ID", id)]] <- "Removed duplicates"
28+
out <- out[-which(out$ID == id)[-1], , drop = FALSE]
29+
} else {
30+
changes[[paste0("ID", id)]] <- "Kept duplicates"
31+
}
32+
}
33+
}
34+
35+
list(ped = out, changes = changes)
36+
}
37+
38+
#' Helper function to conditionally add elements to a list
39+
#' @param validation A list to which elements may be added.
40+
#' @param name A character string representing the name of the element to add.
41+
#' @param value The value to add to the list if it is not NULL or empty
42+
#' @return The updated list with the new element added if applicable.
43+
#' @keywords internal
44+
addIfAny <- function(validation, name, value) {
45+
if (!is.null(value) && length(value) > 0) validation[[name]] <- value
46+
validation
47+
}

R/helpNames.R

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -71,20 +71,49 @@ restorePedColnames <- function(ped,
7171
spID = "spID",
7272
twinID = "twinID",
7373
zygosity = "zygosity",
74-
sex = "sex"
74+
sex = "sex",
75+
verbose = FALSE
7576
) {
77+
if (verbose == TRUE) {
78+
message("Restoring original column names...")
79+
}
80+
if (!inherits(ped, "data.frame")) {
81+
stop("ped should be a data.frame or inherit to a data.frame")
82+
}
83+
if (!is.null(personID) && !is.null(ped$ID)) {
84+
names(ped)[names(ped) == "ID"] <- personID
85+
}
86+
if (!is.null(momID) && !is.null(ped$momID)) {
87+
names(ped)[names(ped) == "momID"] <- momID
88+
}
89+
if (!is.null(dadID) && !is.null(ped$dadID)) {
90+
names(ped)[names(ped) == "dadID"] <- dadID
91+
}
7692

77-
names(ped)[names(ped) == "ID"] <- personID
78-
names(ped)[names(ped) == "momID"] <- momID
79-
names(ped)[names(ped) == "dadID"] <- dadID
80-
names(ped)[names(ped) == "famID"] <- famID
81-
names(ped)[names(ped) == "gen"] <- gen
82-
names(ped)[names(ped) == "patID"] <- patID
83-
names(ped)[names(ped) == "matID"] <- matID
84-
names(ped)[names(ped) == "spID"] <- spID
85-
names(ped)[names(ped) == "twinID"] <- twinID
86-
names(ped)[names(ped) == "zygosity"] <- zygosity
87-
names(ped)[names(ped) == "sex"] <- sex
93+
if (!is.null(famID) && !is.null(ped$famID)) {
94+
names(ped)[names(ped) == "famID"] <- famID
95+
}
96+
if (!is.null(gen) && !is.null(ped$gen)) {
97+
names(ped)[names(ped) == "gen"] <- gen
98+
}
99+
if (!is.null(patID) && !is.null(ped$patID)) {
100+
names(ped)[names(ped) == "patID"] <- patID
101+
}
102+
if (!is.null(matID) && !is.null(ped$matID)) {
103+
names(ped)[names(ped) == "matID"] <- matID
104+
}
105+
if (!is.null(spID) && !is.null(ped$spID)) {
106+
names(ped)[names(ped) == "spID"] <- spID
107+
}
108+
if (!is.null(twinID) && !is.null(ped$twinID)) {
109+
names(ped)[names(ped) == "twinID"] <- twinID
110+
}
111+
if (!is.null(zygosity) && !is.null(ped$zygosity)) {
112+
names(ped)[names(ped) == "zygosity"] <- zygosity
113+
}
114+
if (!is.null(sex) && !is.null(ped$sex)) {
115+
names(ped)[names(ped) == "sex"] <- sex
116+
}
88117
ped
89118
}
90119

man/addIfAny.Rd

Lines changed: 22 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/dropIdenticalDuplicateIDs.Rd

Lines changed: 19 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/insertEven.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/restorePedColnames.Rd

Lines changed: 55 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)