From 20d2764dae5d3f33861cb1f62b44ff96c2da56f9 Mon Sep 17 00:00:00 2001
From: "josue.chinchilla" <josue.chinchilla@IFASadmins-MacBook-Pro.local>
Date: Wed, 22 Apr 2026 13:39:20 -0400
Subject: [PATCH 01/12] corrected importFrom statement in parentage functions
 and ped_check

---
 NAMESPACE             | 1 +
 R/check_ped.R         | 5 +++--
 R/validate_pedigree.R | 1 +
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index c919736..4b5cb05 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -23,6 +23,7 @@ export(merge_MADCs)
 export(solve_composition_poly)
 export(thinSNP)
 export(updog2vcf)
+export(validate_pedigree)
 import(dplyr)
 import(janitor)
 import(parallel)
diff --git a/R/check_ped.R b/R/check_ped.R
index 0ebbd5a..e71e79b 100644
--- a/R/check_ped.R
+++ b/R/check_ped.R
@@ -38,10 +38,11 @@
 #' messy_ids <- ped_errors$messy_parents$id
 #' print(messy_ids)
 #'
-#' @import dplyr
-#' @import janitor
+#' @importFrom dplyr %>% group_by filter ungroup distinct mutate summarize first bind_rows
+#' @importFrom janitor clean_names
 #' @importFrom stats setNames
 #' @importFrom utils read.table
+#' @importFrom tools file_path_sans_ext
 #' @export
 check_ped <- function(ped.file, seed = NULL, verbose = TRUE) {
 
diff --git a/R/validate_pedigree.R b/R/validate_pedigree.R
index 8b2ecbc..5b366c9 100644
--- a/R/validate_pedigree.R
+++ b/R/validate_pedigree.R
@@ -51,6 +51,7 @@
 #'     \item{Best_Female_Parent_Error_Pct}{Homozygous mismatch percentage for the best female parent candidate.}
 #'   }
 #' @export
+#' @importFrom data.table fread fwrite copy  data.table set rbindlist
 validate_pedigree <- function(pedigree_file, genotypes_file,
                               founders_file = NULL,
                               trio_error_threshold = 5.0,

From f2d3f408c2793bffe3cd25f8b726327343be2e73 Mon Sep 17 00:00:00 2001
From: "josue.chinchilla" <josue.chinchilla@IFASadmins-MacBook-Pro.local>
Date: Wed, 22 Apr 2026 13:43:10 -0400
Subject: [PATCH 02/12] updated NAMESPACE to reflect updated importFrom
 statements

---
 NAMESPACE | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/NAMESPACE b/NAMESPACE
index 4b5cb05..1fde3fd 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -25,7 +25,6 @@ export(thinSNP)
 export(updog2vcf)
 export(validate_pedigree)
 import(dplyr)
-import(janitor)
 import(parallel)
 import(quadprog)
 import(stringr)
@@ -47,15 +46,20 @@ importFrom(data.table,set)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,across)
 importFrom(dplyr,arrange)
+importFrom(dplyr,bind_rows)
 importFrom(dplyr,case_when)
+importFrom(dplyr,distinct)
 importFrom(dplyr,filter)
+importFrom(dplyr,first)
 importFrom(dplyr,group_by)
 importFrom(dplyr,group_modify)
 importFrom(dplyr,mutate)
 importFrom(dplyr,select)
 importFrom(dplyr,summarise)
+importFrom(dplyr,summarize)
 importFrom(dplyr,ungroup)
 importFrom(dplyr,where)
+importFrom(janitor,clean_names)
 importFrom(pwalign,nucleotideSubstitutionMatrix)
 importFrom(pwalign,pairwiseAlignment)
 importFrom(readr,read_csv)
@@ -66,6 +70,7 @@ importFrom(stats,cor)
 importFrom(stats,reorder)
 importFrom(stats,setNames)
 importFrom(tibble,as_tibble)
+importFrom(tools,file_path_sans_ext)
 importFrom(utils,packageVersion)
 importFrom(utils,read.csv)
 importFrom(utils,read.table)

From c1fe54372792b4eb3519fd03475c444304f82d6b Mon Sep 17 00:00:00 2001
From: Josue Chinchilla-Vargas <josue.chinchilla@ufl.edu>
Date: Wed, 22 Apr 2026 13:58:21 -0400
Subject: [PATCH 03/12] Update R/check_ped.R

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 R/check_ped.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/check_ped.R b/R/check_ped.R
index e71e79b..0065f78 100644
--- a/R/check_ped.R
+++ b/R/check_ped.R
@@ -38,7 +38,7 @@
 #' messy_ids <- ped_errors$messy_parents$id
 #' print(messy_ids)
 #'
-#' @importFrom dplyr %>% group_by filter ungroup distinct mutate summarize first bind_rows
+#' @importFrom dplyr %>% group_by filter ungroup distinct mutate summarize first bind_rows n_distinct n
 #' @importFrom janitor clean_names
 #' @importFrom stats setNames
 #' @importFrom utils read.table

From 0a13483db94177fc605d9e8130058301e7ea38af Mon Sep 17 00:00:00 2001
From: "josue.chinchilla" <josue.chinchilla@IFASadmins-MacBook-Pro.local>
Date: Fri, 24 Apr 2026 13:52:51 -0400
Subject: [PATCH 04/12] updated error handling and reporting for check_ped,
 Also major improvement for documentation on check_ped

---
 .Rbuildignore                         |   2 +
 .gitignore                            |   1 +
 NAMESPACE                             |   7 -
 R/check_ped.R                         | 331 ++++++++++--------
 man/check_ped.Rd                      |  30 +-
 tests/testthat/corrected_pedigree.txt |   4 -
 tests/testthat/test-check_ped.R       | 463 +++++++++++++++++++++++++-
 7 files changed, 645 insertions(+), 193 deletions(-)
 delete mode 100644 tests/testthat/corrected_pedigree.txt

diff --git a/.Rbuildignore b/.Rbuildignore
index 307d5d0..50c7896 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -10,3 +10,5 @@
 ^package-code\.Rmd$
 ^docs$
 ^CRAN-SUBMISSION$
+^\.positai$
+^\.claude$
diff --git a/.gitignore b/.gitignore
index d3ffaad..e056686 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@
 .Ruserdata
 revdep/
 .DS_Store
+.positai
diff --git a/NAMESPACE b/NAMESPACE
index 1fde3fd..ba1c1d1 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -46,20 +46,15 @@ importFrom(data.table,set)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,across)
 importFrom(dplyr,arrange)
-importFrom(dplyr,bind_rows)
 importFrom(dplyr,case_when)
-importFrom(dplyr,distinct)
 importFrom(dplyr,filter)
-importFrom(dplyr,first)
 importFrom(dplyr,group_by)
 importFrom(dplyr,group_modify)
 importFrom(dplyr,mutate)
 importFrom(dplyr,select)
 importFrom(dplyr,summarise)
-importFrom(dplyr,summarize)
 importFrom(dplyr,ungroup)
 importFrom(dplyr,where)
-importFrom(janitor,clean_names)
 importFrom(pwalign,nucleotideSubstitutionMatrix)
 importFrom(pwalign,pairwiseAlignment)
 importFrom(readr,read_csv)
@@ -68,9 +63,7 @@ importFrom(reshape2,melt)
 importFrom(rlang,sym)
 importFrom(stats,cor)
 importFrom(stats,reorder)
-importFrom(stats,setNames)
 importFrom(tibble,as_tibble)
-importFrom(tools,file_path_sans_ext)
 importFrom(utils,packageVersion)
 importFrom(utils,read.csv)
 importFrom(utils,read.table)
diff --git a/R/check_ped.R b/R/check_ped.R
index 0065f78..71a889e 100644
--- a/R/check_ped.R
+++ b/R/check_ped.R
@@ -1,13 +1,13 @@
 #' Check a pedigree file for accuracy and report/correct common errors
 #'
-#' `check_ped` reads a 3-column pedigree file (tab-separated, columns labeled `id`, `sire`, `dam` in any order)
+#' `check_ped` reads a 3-column pedigree file (tab-separated, columns labeled `id`, `male_parent`, `female_parent` in any order)
 #' and performs quality checks, optionally correcting or flagging errors.
 #'
 #' The function checks for:
 #' * Exact duplicate rows and removes them (keeping one copy)
-#' * IDs that appear more than once with conflicting sire/dam assignments (sets sire/dam to "0")
-#' * IDs that appear in both sire and dam columns
-#' * Missing parents (IDs referenced as sire/dam but not in `id` column), adds them with sire/dam = "0"
+#' * IDs that appear more than once with conflicting male_parent/female_parent assignments (sets both to "0")
+#' * IDs that appear in both male_parent and female_parent columns
+#' * Missing parents (IDs referenced as male_parent/female_parent but not in `id` column), adds them with both = "0"
 #' * Direct and indirect pedigree dependencies (cycles), such as a parent being its own descendant
 #'
 #' After an initial run to clean exact duplicates and repeated IDs, you can run the function again to detect cycles more accurately.
@@ -18,124 +18,92 @@
 #'
 #' @param ped.file Path to the pedigree text file.
 #' @param seed Optional seed for reproducibility.
-#' @param verbose Logical. If TRUE (default), prints errors and prompts for interactive saving.
+#' @param verbose Logical. If TRUE (default), prints findings and prompts for saving.
+#' @param correct Logical. If TRUE (default), saves a corrected pedigree. If FALSE, only reports issues.
+#' @param save_zip Logical. If TRUE, writes each report component as a .txt file and bundles them into a .zip archive in the working directory.
+#' @param save_corrected_zip Logical. If TRUE (and save_zip = TRUE and correct = TRUE), also writes the corrected pedigree into the .zip archive.
 #'
-#' @return A list of data.frames containing detected issues:
-#' * `exact_duplicates`: rows that were exact duplicates
-#' * `repeated_ids_diff`: IDs appearing more than once with conflicting sire/dam
-#' * `messy_parents`: IDs appearing as both sire and dam
-#' * `missing_parents`: parents added to the pedigree with 0 as sire/dam
-#' * `dependencies`: detected cycles in the pedigree
+#' @return A named list of data.frames with offending rows (including row numbers):
+#' * `exact_duplicates`
+#' * `repeated_ids_diff`
+#' * `inconsistent_sex_roles`
+#' * `missing_parents`
+#' * `dependencies`
 #'
-#' @examples
-#' ped_file <- system.file("check_ped_test.txt", package = "BIGr")
-#' ped_errors <- check_ped(ped.file = ped_file, seed = 101919)
-#'
-#' # Access messy parents
-#' ped_errors$messy_parents
-#'
-#' # IDs with messy parents
-#' messy_ids <- ped_errors$messy_parents$id
-#' print(messy_ids)
-#'
-#' @importFrom dplyr %>% group_by filter ungroup distinct mutate summarize first bind_rows n_distinct n
+#' @importFrom dplyr %>% group_by filter ungroup distinct mutate summarize first bind_rows n_distinct n select
 #' @importFrom janitor clean_names
 #' @importFrom stats setNames
-#' @importFrom utils read.table
+#' @importFrom utils read.table write.table zip
 #' @importFrom tools file_path_sans_ext
 #' @export
-check_ped <- function(ped.file, seed = NULL, verbose = TRUE) {
+check_ped <- function(ped.file,
+                      seed               = NULL,
+                      verbose            = TRUE,
+                      correct            = TRUE,
+                      save_zip           = FALSE,
+                      save_corrected_zip = FALSE) {
 
   #### setup ####
   if (!is.null(seed)) set.seed(seed)
-
-  # Read and clean data
   data <- utils::read.table(ped.file, header = TRUE) %>%
     janitor::clean_names() %>%
     mutate(
-      id = as.character(id),
-      sire = as.character(sire),
-      dam = as.character(dam)
+      id            = as.character(id),
+      male_parent   = as.character(male_parent),
+      female_parent = as.character(female_parent)
     )
-
-  original_data <- data
+  # Add row numbers before any processing
+  data <- data %>% mutate(row_number = row_number(), .before = id)
   errors <- list()
-  missing_parents <- data.frame(id = character(), sire = character(), dam = character(), stringsAsFactors = FALSE)
 
   #### check 1: exact duplicates ####
-  exact_duplicates <- data[duplicated(data), ]
-  if (nrow(exact_duplicates) > 0) {
-    data <- distinct(data)  # remove exact duplicates
-  }
+  exact_duplicates <- data[
+    duplicated(data %>% select(-row_number)) |
+      duplicated(data %>% select(-row_number), fromLast = TRUE),
+  ]
 
-  #### check 2: repeated IDs with conflicting sire/dam ####
-  repeated_ids <- data %>%
+  #### check 2: repeated IDs with conflicting male_parent/female_parent ####
+  repeated_ids_diff <- data %>%
     group_by(id) %>%
     filter(n() > 1) %>%
+    filter(n_distinct(male_parent) > 1 | n_distinct(female_parent) > 1) %>%
     ungroup()
 
-  # Only IDs with actual conflicting sire/dam
-  conflicting_ids <- repeated_ids %>%
-    group_by(id) %>%
-    filter(n_distinct(sire) > 1 | n_distinct(dam) > 1) %>%
-    ungroup()
-
-  if (nrow(conflicting_ids) > 0) {
-    # Keep one row per ID, set sire/dam to "0"
-    data <- data %>%
-      group_by(id) %>%
-      summarize(
-        sire = if(n_distinct(sire) > 1) "0" else first(sire),
-        dam = if(n_distinct(dam) > 1) "0" else first(dam),
-        .groups = "drop"
-      )
-  }
-
-  repeated_ids_report <- conflicting_ids
-
-  #### check 3: missing parents ####
-  for (i in seq_len(nrow(data))) {
-    id <- data$id[i]
-    sire <- data$sire[i]
-    dam <- data$dam[i]
-
-    if (sire != "0" && sire != id && !sire %in% data$id) {
-      missing_parents <- rbind(missing_parents, data.frame(id = sire, sire = "0", dam = "0", stringsAsFactors = FALSE))
-    }
-    if (dam != "0" && dam != id && !dam %in% data$id) {
-      missing_parents <- rbind(missing_parents, data.frame(id = dam, sire = "0", dam = "0", stringsAsFactors = FALSE))
-    }
-
-    if (sire == id || dam == id) {
-      errors <- append(errors, paste("Dependency: Individual", id, "cannot be its own parent"))
-    }
-  }
-
-  missing_parents <- distinct(missing_parents)
-  if (nrow(missing_parents) > 0) {
-    data <- bind_rows(data, missing_parents)
-  }
-
-  #### check 4: messy parents ####
-  sire_ids <- unique(data$sire[data$sire != "0"])
-  dam_ids <- unique(data$dam[data$dam != "0"])
-  messy_ids <- intersect(sire_ids, dam_ids)
-  messy_parents <- data %>% filter(id %in% messy_ids)
+  #### check 3: inconsistent parent sex roles ####
+  male_ids   <- unique(data$male_parent[data$male_parent   != "0"])
+  female_ids <- unique(data$female_parent[data$female_parent != "0"])
+  messy_ids  <- intersect(male_ids, female_ids)
+  inconsistent_sex_roles <- data %>%
+    filter(male_parent %in% messy_ids | female_parent %in% messy_ids)
+
+  #### check 4: missing parents ####
+  all_ids     <- unique(data$id)
+  ref_ids     <- unique(c(data$male_parent, data$female_parent))
+  ref_ids     <- ref_ids[ref_ids != "0"]
+  missing_ids <- setdiff(ref_ids, all_ids)
+  missing_parents <- data %>%
+    filter(male_parent %in% missing_ids | female_parent %in% missing_ids)
+  # For the corrected pedigree, build founder rows for truly absent parents
+  missing_founders <- data.frame(
+    row_number    = NA_integer_,
+    id            = missing_ids,
+    male_parent   = "0",
+    female_parent = "0",
+    stringsAsFactors = FALSE
+  )
 
   #### check 5: dependencies (cycles) ####
   detect_all_cycles <- function(data) {
     adj_list <- lapply(data$id, function(x) {
       row <- data[data$id == x, ]
-      c(row$sire, row$dam)
+      c(row$male_parent, row$female_parent)
     })
     names(adj_list) <- data$id
-
     dfs <- function(node, visited, rec_stack, path) {
-      visited[node] <- TRUE
+      visited[node]   <- TRUE
       rec_stack[node] <- TRUE
-      path <- append(path, node)
+      path   <- append(path, node)
       cycles <- list()
-
       for (neighbor in adj_list[[node]]) {
         if (neighbor %in% names(adj_list)) {
           if (!visited[neighbor]) {
@@ -146,94 +114,163 @@ check_ped <- function(ped.file, seed = NULL, verbose = TRUE) {
           }
         }
       }
-
       rec_stack[node] <- FALSE
       return(cycles)
     }
-
-    visited <- stats::setNames(rep(FALSE, length(adj_list)), names(adj_list))
+    visited   <- stats::setNames(rep(FALSE, length(adj_list)), names(adj_list))
     rec_stack <- stats::setNames(rep(FALSE, length(adj_list)), names(adj_list))
     all_cycles <- list()
-
     for (node in names(adj_list)) {
       if (!visited[node]) {
         node_cycles <- dfs(node, visited, rec_stack, character())
-        if (length(node_cycles) > 0) {
-          all_cycles <- append(all_cycles, node_cycles)
-        }
+        if (length(node_cycles) > 0) all_cycles <- append(all_cycles, node_cycles)
       }
     }
     return(all_cycles)
   }
 
-  cycles <- detect_all_cycles(data)
+  cycles    <- detect_all_cycles(data)
+  cycle_ids <- character(0)
   if (length(cycles) > 0) {
     for (cycle_group in cycles) {
-      cycle_ids <- unique(unlist(cycle_group))
-      errors <- append(errors, paste("Cycle detected involving IDs:", paste(cycle_ids, collapse = " -> ")))
+      ids       <- unique(unlist(cycle_group))
+      cycle_ids <- unique(c(cycle_ids, ids))
+      errors    <- append(errors,
+                          paste("Cycle detected involving IDs:",
+                                paste(ids, collapse = " -> ")))
     }
   }
+  dependencies <- data %>% filter(id %in% cycle_ids)
+  if (nrow(dependencies) == 0 && length(errors) > 0) {
+    dependencies <- data.frame(Dependency = unique(unlist(errors)),
+                               stringsAsFactors = FALSE)
+  }
 
-  #### compile findings ####
+  #### compile report ####
   input_ped_report <- list(
-    exact_duplicates = exact_duplicates,
-    repeated_ids_diff = repeated_ids_report,
-    messy_parents = messy_parents,
-    missing_parents = missing_parents,
-    dependencies = data.frame(Dependency = unique(unlist(errors)))
+    exact_duplicates       = exact_duplicates,
+    repeated_ids_diff      = repeated_ids_diff,
+    inconsistent_sex_roles = inconsistent_sex_roles,
+    missing_parents        = missing_parents,
+    dependencies           = dependencies
   )
 
+  #### build corrected pedigree (no row_number column) ####
+  if (correct) {
+    corrected <- data %>% select(-row_number)
+    # Remove exact duplicates
+    corrected <- distinct(corrected)
+    # Resolve conflicting IDs
+    if (nrow(repeated_ids_diff) > 0) {
+      corrected <- corrected %>%
+        group_by(id) %>%
+        summarize(
+          male_parent   = if (n_distinct(male_parent)   > 1) "0" else first(male_parent),
+          female_parent = if (n_distinct(female_parent) > 1) "0" else first(female_parent),
+          .groups = "drop"
+        )
+    }
+    # Add missing founders
+    if (length(missing_ids) > 0) {
+      corrected <- bind_rows(
+        corrected,
+        missing_founders %>% select(-row_number)
+      )
+    }
+  }
+
   #### file names ####
-  file_base <- tools::file_path_sans_ext(basename(ped.file))
+  file_base      <- tools::file_path_sans_ext(basename(ped.file))
   corrected_name <- paste0(file_base, "_corrected")
-  report_name <- paste0(file_base, "_report")
+  report_name    <- paste0(file_base, "_report")
+  zip_name       <- paste0(file_base, "_report.zip")
 
   #### output ####
   if (verbose) {
     cat("\n=== Pedigree Quality Check Report ===\n")
-
-    if (nrow(exact_duplicates) > 0) {
-      cat("\n Exact duplicate trios detected (only one copy will be kept in corrected pedigree):\n")
-      print(exact_duplicates)
-    } else cat("\nNo exact duplicate trios found.\n")
-
-    if (nrow(repeated_ids_report) > 0) {
-      cat("\nConflicting trios detected (sire/dam set to 0 in corrected pedigree):\n")
-      print(repeated_ids_report)
-    } else cat("\nNo conflicting repeated trios found.\n")
-
-    if (nrow(missing_parents) > 0) {
-      cat("\n Parents missing as IDs found in the pedigree (will be added as founders in corrected pedigree):\n")
-      print(missing_parents)
-    } else cat("\nNo missing parents found.\n")
-
-    if (nrow(messy_parents) > 0) {
-      cat("\n IDs found as both sire and dam (is selfing or hermaphrodytism possible?):\n")
-      print(messy_parents)
-    } else cat("\nNo IDs found as both sire and dam.\n")
-
-
-    if (nrow(input_ped_report$dependencies) > 0) {
-      cat("\nDependencies detected:\n")
-      print(input_ped_report$dependencies)
-    } else cat("\nNo dependencies detected.\n")
-
-    #### interactive save ####
-    cat(paste0("\nDo you want to save the corrected pedigree as dataframe `", corrected_name, "`? (y/n): "))
-    ans <- tolower(trimws(readline()))
-    if (ans == "y") {
-      assign(corrected_name, data, envir = .GlobalEnv)
-      assign("input_ped_report", input_ped_report, envir = .GlobalEnv)
-      cat(paste0("Saved corrected pedigree as `", corrected_name, "` and report as `input_ped_report`.\n"))
+    cat("\n--- Exact Duplicates ---\n")
+    if (nrow(exact_duplicates) > 0) print(exact_duplicates) else cat("None found.\n")
+    cat("\n--- Conflicting IDs ---\n")
+    if (nrow(repeated_ids_diff) > 0) print(repeated_ids_diff) else cat("None found.\n")
+    cat("\n--- Inconsistent Parent Sex Roles ---\n")
+    if (nrow(inconsistent_sex_roles) > 0) print(inconsistent_sex_roles) else cat("None found.\n")
+    cat("\n--- Missing Parents (rows that reference them) ---\n")
+    if (nrow(missing_parents) > 0) print(missing_parents) else cat("None found.\n")
+    cat("\n--- Cycles / Dependencies ---\n")
+    if (nrow(dependencies) > 0) print(dependencies) else cat("None found.\n")
+    if (correct) {
+      cat(paste0("\nSave corrected pedigree as `", corrected_name, "`? (y/n): "))
+      ans <- tolower(trimws(readline()))
+      if (ans == "y") {
+        assign(corrected_name, corrected, envir = .GlobalEnv)
+        assign(report_name, input_ped_report, envir = .GlobalEnv)
+        cat(paste0("Saved `", corrected_name, "` and `", report_name, "`.\n"))
+      } else {
+        cat("No corrected pedigree saved.\n")
+        assign(report_name, input_ped_report, envir = .GlobalEnv)
+      }
     } else {
-      cat("No corrected pedigree was saved.\n")
+      cat("\n`correct = FALSE`: no changes made to the pedigree.\n")
+      assign(report_name, input_ped_report, envir = .GlobalEnv)
     }
-
   } else {
-    # Silent automatic mode
-    assign(corrected_name, data, envir = .GlobalEnv)
+    # Silent mode: always save report; save corrected only if correct = TRUE
+    if (correct) assign(corrected_name, corrected, envir = .GlobalEnv)
     assign(report_name, input_ped_report, envir = .GlobalEnv)
   }
 
+  #### zip export ####
+  if (save_zip) {
+    tmp_dir   <- tempfile()
+    dir.create(tmp_dir)
+    zip_files <- character(0)
+
+    # Section headers matching the console output labels
+    section_labels <- c(
+      exact_duplicates       = "Exact Duplicates",
+      repeated_ids_diff      = "Conflicting IDs",
+      inconsistent_sex_roles = "Inconsistent Parent Sex Roles",
+      missing_parents        = "Missing Parents (rows that reference them)",
+      dependencies           = "Cycles / Dependencies"
+    )
+
+    for (component in names(section_labels)) {
+      df        <- input_ped_report[[component]]
+      file_path <- file.path(tmp_dir, paste0(file_base, "_", component, ".txt"))
+
+      if (nrow(df) == 0) {
+        # Write header + "None found." for clean empty reports
+        writeLines(
+          c(paste0("--- ", section_labels[[component]], " ---"), "None found."),
+          con = file_path
+        )
+      } else {
+        # Write header, then column names, then data — avoids col.names warning
+        file_con <- file(file_path, open = "wt")
+        writeLines(paste0("--- ", section_labels[[component]], " ---"), con = file_con)
+        writeLines(paste(colnames(df), collapse = "\t"), con = file_con)
+        close(file_con)
+        utils::write.table(df, file = file_path, sep = "\t", row.names = FALSE,
+                           quote = FALSE, append = TRUE, col.names = FALSE)
+      }
+
+      zip_files <- c(zip_files, file_path)
+    }
+
+    # Optionally include corrected pedigree in zip
+    if (save_corrected_zip && correct) {
+      corrected_path <- file.path(tmp_dir, paste0(corrected_name, ".txt"))
+      utils::write.table(corrected, file = corrected_path, sep = "\t",
+                         row.names = FALSE, quote = FALSE)
+      zip_files <- c(zip_files, corrected_path)
+    }
+
+    # Bundle all files into zip in the working directory
+    zip_path <- file.path(getwd(), zip_name)
+    utils::zip(zipfile = zip_path, files = zip_files, flags = "-j")
+    if (verbose) cat(paste0("\nZip archive saved to: ", zip_path, "\n"))
+  }
+
   invisible(input_ped_report)
 }
+
diff --git a/man/check_ped.Rd b/man/check_ped.Rd
index ea63de7..0d1df79 100644
--- a/man/check_ped.Rd
+++ b/man/check_ped.Rd
@@ -4,23 +4,25 @@
 \alias{check_ped}
 \title{Check a pedigree file for accuracy and report/correct common errors}
 \usage{
-check_ped(ped.file, seed = NULL, verbose = TRUE)
+check_ped(ped.file, seed = NULL, verbose = TRUE, correct = TRUE)
 }
 \arguments{
 \item{ped.file}{Path to the pedigree text file.}
 
 \item{seed}{Optional seed for reproducibility.}
 
-\item{verbose}{Logical. If TRUE (default), prints errors and prompts for interactive saving.}
+\item{verbose}{Logical. If TRUE (default), prints findings and prompts for saving.}
+
+\item{correct}{Logical. If TRUE (default), saves a corrected pedigree. If FALSE, only reports issues.}
 }
 \value{
-A list of data.frames containing detected issues:
+A named list of data.frames with offending rows (including row numbers):
 \itemize{
-\item \code{exact_duplicates}: rows that were exact duplicates
-\item \code{repeated_ids_diff}: IDs appearing more than once with conflicting sire/dam
-\item \code{messy_parents}: IDs appearing as both sire and dam
-\item \code{missing_parents}: parents added to the pedigree with 0 as sire/dam
-\item \code{dependencies}: detected cycles in the pedigree
+\item \code{exact_duplicates}
+\item \code{repeated_ids_diff}
+\item \code{inconsistent_sex_roles}
+\item \code{missing_parents}
+\item \code{dependencies}
 }
 }
 \description{
@@ -45,15 +47,3 @@ The function does \strong{not} overwrite the input file. Instead, it prints find
 \item A report listing all detected issues
 }
 }
-\examples{
-ped_file <- system.file("check_ped_test.txt", package = "BIGr")
-ped_errors <- check_ped(ped.file = ped_file, seed = 101919)
-
-# Access messy parents
-ped_errors$messy_parents
-
-# IDs with messy parents
-messy_ids <- ped_errors$messy_parents$id
-print(messy_ids)
-
-}
diff --git a/tests/testthat/corrected_pedigree.txt b/tests/testthat/corrected_pedigree.txt
deleted file mode 100644
index 0434b0c..0000000
--- a/tests/testthat/corrected_pedigree.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-ID	Male_Parent	Female_Parent
-IND_C	IND_A	IND_B
-IND_D	0	IND_A
-GHOST	IND_A	IND_B
diff --git a/tests/testthat/test-check_ped.R b/tests/testthat/test-check_ped.R
index 706143f..64d13ca 100644
--- a/tests/testthat/test-check_ped.R
+++ b/tests/testthat/test-check_ped.R
@@ -1,22 +1,455 @@
-context("Imputation Concordance")
+context("check_ped – Pedigree Quality Checks")
 
+# ---------------------------------------------------------------------------
+# Helper: write a data.frame to a temp tab-separated file and return the path
+# ---------------------------------------------------------------------------
+write_ped <- function(df) {
+  f <- tempfile(fileext = ".txt")
+  utils::write.table(df, f, sep = "\t", row.names = FALSE, quote = FALSE)
+  f
+}
 
-test_that("test imputation",{
-  #Input variables
-  ped_file <- system.file("check_ped_test.txt", package="BIGr")
+# ===========================================================================
+# 1. Return structure
+# Tests that check_ped returns a named list of exactly 5 data frame
+# components covering all five issue categories.
+# ===========================================================================
+test_that("check_ped returns a named list of length 5", {
+  ped <- data.frame(
+    id            = c("A", "B", "C"),
+    male_parent   = c("0", "A", "A"),
+    female_parent = c("0", "0", "0")
+  )
+  out <- check_ped(write_ped(ped), seed = 1, verbose = FALSE, correct = FALSE)
 
-  #Calculations
-  output.list <- check_ped(ped_file,
-                           seed = 101919,
-                           verbose = FALSE)
+  expect_type(out, "list")
+  expect_length(out, 5)
+  expect_named(out, c(
+    "exact_duplicates",
+    "repeated_ids_diff",
+    "inconsistent_sex_roles",
+    "missing_parents",
+    "dependencies"
+  ))
+})
+
+# ===========================================================================
+# 2. Component types
+# Tests that every element in the returned list is a data.frame, ensuring
+# downstream code can safely call nrow(), colnames(), etc. on any component.
+# ===========================================================================
+test_that("check_ped report components are data.frames", {
+  ped <- data.frame(
+    id            = c("A", "B", "C"),
+    male_parent   = c("0", "A", "A"),
+    female_parent = c("0", "0", "0")
+  )
+  out <- check_ped(write_ped(ped), verbose = FALSE, correct = FALSE)
+
+  expect_true(is.data.frame(out$exact_duplicates))
+  expect_true(is.data.frame(out$repeated_ids_diff))
+  expect_true(is.data.frame(out$inconsistent_sex_roles))
+  expect_true(is.data.frame(out$missing_parents))
+  expect_true(is.data.frame(out$dependencies))
+})
+
+# ===========================================================================
+# 3. Clean pedigree → empty report
+# Tests that a structurally valid pedigree with no issues produces zero rows
+# in every report component (no false positives).
+# ===========================================================================
+test_that("clean pedigree produces no issues", {
+  ped <- data.frame(
+    id            = c("G1", "G2", "P1"),
+    male_parent   = c("0",  "0",  "G1"),
+    female_parent = c("0",  "0",  "G2")
+  )
+  out <- check_ped(write_ped(ped), verbose = FALSE, correct = FALSE)
+
+  expect_equal(nrow(out$exact_duplicates),       0)
+  expect_equal(nrow(out$repeated_ids_diff),       0)
+  expect_equal(nrow(out$inconsistent_sex_roles),  0)
+  expect_equal(nrow(out$missing_parents),         0)
+  expect_equal(nrow(out$dependencies),            0)
+})
+
+# ===========================================================================
+# 4. Exact duplicate detection and correction
+# Tests that rows sharing the same (id, male_parent, female_parent) triplet
+# are flagged, and that correct = TRUE retains only one copy of each.
+# ===========================================================================
+test_that("check_ped detects exact duplicates", {
+  ped <- data.frame(
+    id            = c("A", "A", "B"),
+    male_parent   = c("0", "0", "A"),
+    female_parent = c("0", "0", "0")
+  )
+  out <- check_ped(write_ped(ped), verbose = FALSE, correct = FALSE)
+
+  expect_equal(nrow(out$exact_duplicates), 2)
+  expect_true(all(out$exact_duplicates$id == "A"))
+})
+
+test_that("correct = TRUE removes exact duplicates", {
+  ped <- data.frame(
+    id            = c("A", "A", "B"),
+    male_parent   = c("0", "0", "A"),
+    female_parent = c("0", "0", "0")
+  )
+  f          <- write_ped(ped)
+  file_base  <- tools::file_path_sans_ext(basename(f))
+  corr_name  <- paste0(file_base, "_corrected")
+  rep_name   <- paste0(file_base, "_report")
+  on.exit({
+    if (exists(corr_name, envir = .GlobalEnv)) rm(list = corr_name, envir = .GlobalEnv)
+    if (exists(rep_name,  envir = .GlobalEnv)) rm(list = rep_name,  envir = .GlobalEnv)
+  })
+
+  check_ped(f, verbose = FALSE, correct = TRUE)
+  corrected <- get(corr_name, envir = .GlobalEnv)
+
+  # Only one "A" row should remain
+  expect_equal(sum(corrected$id == "A"), 1)
+})
+
+# ===========================================================================
+# 5. Repeated IDs with conflicting parent assignments
+# Tests that the same ID appearing on multiple rows with different parent
+# values is flagged, and that correct = TRUE blanks only the conflicting
+# parent field while preserving consistent values.
+# ===========================================================================
+test_that("check_ped detects repeated IDs with conflicting parents", {
+  ped <- data.frame(
+    id            = c("A", "A", "B"),
+    male_parent   = c("X", "Y", "A"),
+    female_parent = c("M", "M", "0")
+  )
+  out <- check_ped(write_ped(ped), verbose = FALSE, correct = FALSE)
+
+  expect_equal(nrow(out$repeated_ids_diff), 2)
+  expect_true(all(out$repeated_ids_diff$id == "A"))
+})
+
+test_that("correct = TRUE resolves conflicting IDs: conflicting field -> '0', consistent field kept", {
+  ped <- data.frame(
+    id            = c("A", "A", "B"),
+    male_parent   = c("X", "Y", "A"),   # conflicting -> should become "0"
+    female_parent = c("M", "M", "0")    # consistent  -> should stay "M"
+  )
+  f          <- write_ped(ped)
+  file_base  <- tools::file_path_sans_ext(basename(f))
+  corr_name  <- paste0(file_base, "_corrected")
+  rep_name   <- paste0(file_base, "_report")
+  on.exit({
+    if (exists(corr_name, envir = .GlobalEnv)) rm(list = corr_name, envir = .GlobalEnv)
+    if (exists(rep_name,  envir = .GlobalEnv)) rm(list = rep_name,  envir = .GlobalEnv)
+  })
+
+  check_ped(f, verbose = FALSE, correct = TRUE)
+  corrected <- get(corr_name, envir = .GlobalEnv)
+  a_row     <- corrected[corrected$id == "A", ]
+
+  expect_equal(nrow(a_row), 1)
+  expect_equal(a_row$male_parent,   "0")
+  expect_equal(a_row$female_parent, "M")
+})
+
+# ===========================================================================
+# 6. Inconsistent parent sex roles
+# Tests that an individual ID appearing in both male_parent and female_parent
+# columns is flagged. Also confirms that correct = TRUE does NOT auto-resolve
+# this issue since the correct fix requires manual review.
+# ===========================================================================
+test_that("check_ped detects inconsistent parent sex roles", {
+  ped <- data.frame(
+    id            = c("child1", "child2", "P"),
+    male_parent   = c("P",      "0",      "0"),
+    female_parent = c("0",      "P",      "0")
+  )
+  out <- check_ped(write_ped(ped), verbose = FALSE, correct = FALSE)
+
+  expect_true("inconsistent_sex_roles" %in% names(out))
+  expect_gt(nrow(out$inconsistent_sex_roles), 0)
+
+  # Both rows that reference P should be flagged
+  expect_true(any(out$inconsistent_sex_roles$male_parent   == "P" |
+                    out$inconsistent_sex_roles$female_parent == "P"))
+})
+
+test_that("correct = TRUE does NOT auto-correct inconsistent sex roles", {
+  ped <- data.frame(
+    id            = c("child1", "child2", "P"),
+    male_parent   = c("P",      "0",      "0"),
+    female_parent = c("0",      "P",      "0")
+  )
+  f          <- write_ped(ped)
+  file_base  <- tools::file_path_sans_ext(basename(f))
+  corr_name  <- paste0(file_base, "_corrected")
+  rep_name   <- paste0(file_base, "_report")
+  on.exit({
+    if (exists(corr_name, envir = .GlobalEnv)) rm(list = corr_name, envir = .GlobalEnv)
+    if (exists(rep_name,  envir = .GlobalEnv)) rm(list = rep_name,  envir = .GlobalEnv)
+  })
+
+  check_ped(f, verbose = FALSE, correct = TRUE)
+  corrected <- get(corr_name, envir = .GlobalEnv)
+
+  # P should still appear with its original (inconsistent) parent assignments
+  expect_true(any(corrected$male_parent   == "P" |
+                    corrected$female_parent == "P"))
+})
+
+# ===========================================================================
+# 7. Missing parent detection and founder row injection
+# Tests that parent IDs referenced in male_parent/female_parent but absent
+# from the id column are flagged. Confirms that correct = TRUE appends a
+# founder row (both parents = "0") for each missing parent.
+# ===========================================================================
+test_that("check_ped detects missing parents", {
+  ped <- data.frame(
+    id            = c("A", "B"),
+    male_parent   = c("0", "X"),
+    female_parent = c("0", "Y")
+  )
+  out <- check_ped(write_ped(ped), verbose = FALSE, correct = FALSE)
+
+  expect_equal(nrow(out$missing_parents), 1)
+  expect_true(all(out$missing_parents$id == "B"))
+})
+
+test_that("correct = TRUE adds missing parents as founder rows", {
+  ped <- data.frame(
+    id            = c("A", "B"),
+    male_parent   = c("0", "X"),
+    female_parent = c("0", "Y")
+  )
+  f          <- write_ped(ped)
+  file_base  <- tools::file_path_sans_ext(basename(f))
+  corr_name  <- paste0(file_base, "_corrected")
+  rep_name   <- paste0(file_base, "_report")
+  on.exit({
+    if (exists(corr_name, envir = .GlobalEnv)) rm(list = corr_name, envir = .GlobalEnv)
+    if (exists(rep_name,  envir = .GlobalEnv)) rm(list = rep_name,  envir = .GlobalEnv)
+  })
+
+  check_ped(f, verbose = FALSE, correct = TRUE)
+  corrected <- get(corr_name, envir = .GlobalEnv)
+
+  # X and Y should now appear as founder rows
+  expect_true("X" %in% corrected$id)
+  expect_true("Y" %in% corrected$id)
+  x_row <- corrected[corrected$id == "X", ]
+  y_row <- corrected[corrected$id == "Y", ]
+  expect_equal(x_row$male_parent,   "0")
+  expect_equal(x_row$female_parent, "0")
+  expect_equal(y_row$male_parent,   "0")
+  expect_equal(y_row$female_parent, "0")
+})
+
+# ===========================================================================
+# 8. Pedigree cycle / dependency detection
+# Tests that circular ancestry chains (e.g. A is parent of B and B is parent
+# of A) are detected and reported. Also confirms that correct = TRUE does NOT
+# silently remove cycle-involved individuals, as cycles require manual review.
+# ===========================================================================
+test_that("check_ped detects a direct two-node cycle", {
+  ped <- data.frame(
+    id            = c("A", "B"),
+    male_parent   = c("B", "A"),
+    female_parent = c("0", "0")
+  )
+  out <- check_ped(write_ped(ped), verbose = FALSE, correct = FALSE)
+
+  expect_gt(nrow(out$dependencies), 0)
+  expect_true(all(c("A", "B") %in% out$dependencies$id))
+})
+
+test_that("check_ped does NOT auto-correct cycles", {
+  ped <- data.frame(
+    id            = c("A", "B"),
+    male_parent   = c("B", "A"),
+    female_parent = c("0", "0")
+  )
+  f          <- write_ped(ped)
+  file_base  <- tools::file_path_sans_ext(basename(f))
+  corr_name  <- paste0(file_base, "_corrected")
+  rep_name   <- paste0(file_base, "_report")
+  on.exit({
+    if (exists(corr_name, envir = .GlobalEnv)) rm(list = corr_name, envir = .GlobalEnv)
+    if (exists(rep_name,  envir = .GlobalEnv)) rm(list = rep_name,  envir = .GlobalEnv)
+  })
+
+  check_ped(f, verbose = FALSE, correct = TRUE)
+  corrected <- get(corr_name, envir = .GlobalEnv)
+
+  # Cycle-involved IDs should still be present (no silent removal)
+  expect_true("A" %in% corrected$id)
+  expect_true("B" %in% corrected$id)
+})
+
+# ===========================================================================
+# 9. Global environment assignments
+# Tests that the report list is always written to the global environment,
+# that the corrected pedigree is written only when correct = TRUE, and that
+# the internal row_number column does not leak into the corrected output.
+# ===========================================================================
+test_that("report is always assigned to global environment", {
+  ped <- data.frame(
+    id            = c("A", "B"),
+    male_parent   = c("0", "A"),
+    female_parent = c("0", "0")
+  )
+  f          <- write_ped(ped)
+  file_base  <- tools::file_path_sans_ext(basename(f))
+  rep_name   <- paste0(file_base, "_report")
+  on.exit(if (exists(rep_name, envir = .GlobalEnv))
+    rm(list = rep_name, envir = .GlobalEnv))
+
+  check_ped(f, verbose = FALSE, correct = FALSE)
+
+  expect_true(exists(rep_name, envir = .GlobalEnv))
+  expect_type(get(rep_name, envir = .GlobalEnv), "list")
+})
+
+test_that("corrected pedigree is assigned to global env when correct = TRUE", {
+  ped <- data.frame(
+    id            = c("A", "A", "B"),
+    male_parent   = c("X", "Y", "A"),
+    female_parent = c("M", "M", "0")
+  )
+  f          <- write_ped(ped)
+  file_base  <- tools::file_path_sans_ext(basename(f))
+  corr_name  <- paste0(file_base, "_corrected")
+  rep_name   <- paste0(file_base, "_report")
+  on.exit({
+    if (exists(corr_name, envir = .GlobalEnv)) rm(list = corr_name, envir = .GlobalEnv)
+    if (exists(rep_name,  envir = .GlobalEnv)) rm(list = rep_name,  envir = .GlobalEnv)
+  })
+
+  check_ped(f, verbose = FALSE, correct = TRUE)
+
+  expect_true(exists(corr_name, envir = .GlobalEnv))
+  corrected <- get(corr_name, envir = .GlobalEnv)
+  expect_true(is.data.frame(corrected))
+
+  # row_number must not leak into corrected output
+  expect_false("row_number" %in% names(corrected))
+  expect_true(all(c("id", "male_parent", "female_parent") %in% names(corrected)))
+})
+
+test_that("corrected pedigree is NOT assigned to global env when correct = FALSE", {
+  ped <- data.frame(
+    id            = c("A", "B"),
+    male_parent   = c("0", "A"),
+    female_parent = c("0", "0")
+  )
+  f          <- write_ped(ped)
+  file_base  <- tools::file_path_sans_ext(basename(f))
+  corr_name  <- paste0(file_base, "_corrected")
+  rep_name   <- paste0(file_base, "_report")
+  on.exit({
+    if (exists(corr_name, envir = .GlobalEnv)) rm(list = corr_name, envir = .GlobalEnv)
+    if (exists(rep_name,  envir = .GlobalEnv)) rm(list = rep_name,  envir = .GlobalEnv)
+  })
+
+  check_ped(f, verbose = FALSE, correct = FALSE)
+
+  expect_false(exists(corr_name, envir = .GlobalEnv))
+  expect_true(exists(rep_name,   envir = .GlobalEnv))
+})
+
+# ===========================================================================
+# 10. Missing required columns → hard stop
+# Tests that a file lacking the canonical column names (after alias remapping)
+# causes an immediate error rather than silently producing empty results.
+# Column names like animal_id/parent1/parent2 are not in the alias list and
+# therefore cannot be remapped, triggering the validation stop.
+# ===========================================================================
+test_that("check_ped errors when required columns are missing", {
+  bad_df <- data.frame(
+    animal_id  = c("a", "b"),
+    parent1    = c("0", "a"),
+    parent2    = c("0", "0")
+  )
+  expect_error(
+    check_ped(write_ped(bad_df), verbose = FALSE),
+    "Input file is missing required column"
+  )
+})
+
+# ===========================================================================
+# 11. Integration test using the bundled check_ped_test.txt fixture
+# End-to-end test against a real fixture file shipped with the package.
+# Validates the overall row counts and that known problematic individuals
+# (grandfather2, grandfather3) are flagged for inconsistent sex roles.
+# ===========================================================================
+test_that("integration test with bundled fixture file", {
+  ped_file <- system.file("check_ped_test.txt", package = "BIGr")
+  skip_if(ped_file == "", "Bundled fixture file not found; skipping integration test.")
+
+  out <- check_ped(ped_file, seed = 101919, verbose = FALSE)
+
+  expect_length(out, 5)
+
+  # Inconsistent sex roles should flag grandfather2 & grandfather3
+  expect_true(all(
+    c("grandfather2", "grandfather3") %in% out$inconsistent_sex_roles$male_parent |
+      c("grandfather2", "grandfather3") %in% out$inconsistent_sex_roles$female_parent
+  ))
+  expect_equal(nrow(out$missing_parents), 8)
+})
+
+# ===========================================================================
+# 12. Zip export (save_zip = TRUE / save_corrected_zip = TRUE)
+# Tests that save_zip = TRUE produces a .zip archive in the working directory,
+# and that save_corrected_zip = TRUE includes the corrected pedigree file
+# inside that archive when correct = TRUE.
+# ===========================================================================
+test_that("save_zip = TRUE creates a zip archive in the working directory", {
+  ped <- data.frame(
+    id            = c("A", "B", "C"),
+    male_parent   = c("0", "A", "A"),
+    female_parent = c("0", "0", "0")
+  )
+  f          <- write_ped(ped)
+  file_base  <- tools::file_path_sans_ext(basename(f))
+  zip_name   <- paste0(file_base, "_report.zip")
+  rep_name   <- paste0(file_base, "_report")
+  zip_path   <- file.path(getwd(), zip_name)
+
+  on.exit({
+    if (file.exists(zip_path))                file.remove(zip_path)
+    if (exists(rep_name, envir = .GlobalEnv)) rm(list = rep_name, envir = .GlobalEnv)
+  })
+
+  check_ped(f, verbose = FALSE, correct = FALSE, save_zip = TRUE)
+
+  expect_true(file.exists(zip_path))
+})
+
+test_that("save_zip + save_corrected_zip + correct = TRUE includes corrected file in zip", {
+  ped <- data.frame(
+    id            = c("A", "B"),
+    male_parent   = c("0", "X"),
+    female_parent = c("0", "Y")
+  )
+  f          <- write_ped(ped)
+  file_base  <- tools::file_path_sans_ext(basename(f))
+  zip_name   <- paste0(file_base, "_report.zip")
+  corr_name  <- paste0(file_base, "_corrected")
+  rep_name   <- paste0(file_base, "_report")
+  zip_path   <- file.path(getwd(), zip_name)
 
-  #Check
-  df_length <- length(output.list)
-  messy_parents <- output.list$messy_parents
-  missing_parents <- output.list$missing_parents
+  on.exit({
+    if (file.exists(zip_path))                 file.remove(zip_path)
+    if (exists(corr_name, envir = .GlobalEnv)) rm(list = corr_name, envir = .GlobalEnv)
+    if (exists(rep_name,  envir = .GlobalEnv)) rm(list = rep_name,  envir = .GlobalEnv)
+  })
 
-  expect_true(df_length == 5) # Before was 4
-  expect_true(all(messy_parents$id == c("grandfather2","grandfather3")))
-  expect_true(nrow(missing_parents) == 13)
+  check_ped(f, verbose = FALSE, correct = TRUE,
+            save_zip = TRUE, save_corrected_zip = TRUE)
 
+  expect_true(file.exists(zip_path))
+  zip_contents <- utils::unzip(zip_path, list = TRUE)$Name
+  expect_true(any(grepl("_corrected", zip_contents)))
 })

From 9bbc5077612b73e356b0498ab88036ead189b88b Mon Sep 17 00:00:00 2001
From: "josue.chinchilla" <josue.chinchilla@IFASadmins-MacBook-Pro.local>
Date: Fri, 24 Apr 2026 13:52:51 -0400
Subject: [PATCH 05/12] updated error handling and reporting for check_ped

---
 NAMESPACE        |  11 ++++
 R/check_ped.R    | 164 ++++++++++++++++++++++++-----------------------
 man/check_ped.Rd |  64 +++++++++---------
 3 files changed, 129 insertions(+), 110 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index ba1c1d1..4e518d1 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -46,13 +46,20 @@ importFrom(data.table,set)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,across)
 importFrom(dplyr,arrange)
+importFrom(dplyr,bind_rows)
 importFrom(dplyr,case_when)
+importFrom(dplyr,distinct)
 importFrom(dplyr,filter)
+importFrom(dplyr,first)
 importFrom(dplyr,group_by)
 importFrom(dplyr,group_modify)
 importFrom(dplyr,mutate)
+importFrom(dplyr,n)
+importFrom(dplyr,n_distinct)
+importFrom(dplyr,row_number)
 importFrom(dplyr,select)
 importFrom(dplyr,summarise)
+importFrom(dplyr,summarize)
 importFrom(dplyr,ungroup)
 importFrom(dplyr,where)
 importFrom(pwalign,nucleotideSubstitutionMatrix)
@@ -63,13 +70,17 @@ importFrom(reshape2,melt)
 importFrom(rlang,sym)
 importFrom(stats,cor)
 importFrom(stats,reorder)
+importFrom(stats,setNames)
 importFrom(tibble,as_tibble)
+importFrom(tools,file_path_sans_ext)
+importFrom(utils,capture.output)
 importFrom(utils,packageVersion)
 importFrom(utils,read.csv)
 importFrom(utils,read.table)
 importFrom(utils,tail)
 importFrom(utils,write.csv)
 importFrom(utils,write.table)
+importFrom(utils,zip)
 importFrom(vcfR,extract.gt)
 importFrom(vcfR,maf)
 importFrom(vcfR,read.vcfR)
diff --git a/R/check_ped.R b/R/check_ped.R
index 71a889e..e1d9407 100644
--- a/R/check_ped.R
+++ b/R/check_ped.R
@@ -1,39 +1,37 @@
-#' Check a pedigree file for accuracy and report/correct common errors
+#' Check a pedigree file for structural issues and optionally apply corrections
 #'
-#' `check_ped` reads a 3-column pedigree file (tab-separated, columns labeled `id`, `male_parent`, `female_parent` in any order)
-#' and performs quality checks, optionally correcting or flagging errors.
+#' Reads a tab-separated pedigree file with columns `id`, `male_parent`, and
+#' `female_parent` and checks for five classes of issues: missing required
+#' columns, exact duplicate rows, repeated IDs with conflicting parent
+#' assignments, inconsistent parent sex roles, and pedigree cycles. The input
+#' file is never modified. A report list is always assigned to the global
+#' environment; when `correct = TRUE`, a corrected pedigree is assigned as well.
 #'
-#' The function checks for:
-#' * Exact duplicate rows and removes them (keeping one copy)
-#' * IDs that appear more than once with conflicting male_parent/female_parent assignments (sets both to "0")
-#' * IDs that appear in both male_parent and female_parent columns
-#' * Missing parents (IDs referenced as male_parent/female_parent but not in `id` column), adds them with both = "0"
-#' * Direct and indirect pedigree dependencies (cycles), such as a parent being its own descendant
-#'
-#' After an initial run to clean exact duplicates and repeated IDs, you can run the function again to detect cycles more accurately.
-#'
-#' The function does **not** overwrite the input file. Instead, it prints findings to the console and optionally saves:
-#' * Corrected pedigree as a dataframe in the global environment
-#' * A report listing all detected issues
+#' When `correct = TRUE`, the function removes exact duplicates, collapses
+#' conflicting repeated IDs (replacing the inconsistent parent field with `"0"`),
+#' and appends missing parents as founder rows. Inconsistent sex roles and
+#' pedigree cycles are reported but not corrected and must be resolved manually.
 #'
 #' @param ped.file Path to the pedigree text file.
-#' @param seed Optional seed for reproducibility.
-#' @param verbose Logical. If TRUE (default), prints findings and prompts for saving.
-#' @param correct Logical. If TRUE (default), saves a corrected pedigree. If FALSE, only reports issues.
-#' @param save_zip Logical. If TRUE, writes each report component as a .txt file and bundles them into a .zip archive in the working directory.
-#' @param save_corrected_zip Logical. If TRUE (and save_zip = TRUE and correct = TRUE), also writes the corrected pedigree into the .zip archive.
+#' @param seed Optional integer seed for reproducibility.
+#' @param verbose Logical. If `TRUE` (default), prints findings to the console.
+#' @param correct Logical. If `TRUE` (default), builds and assigns a corrected
+#'   pedigree to the global environment. If `FALSE`, issues are only reported.
+#' @param save_zip Logical. If `TRUE`, bundles report components into a `.zip`
+#'   archive in the working directory.
+#' @param save_corrected_zip Logical. If `TRUE` (requires `save_zip = TRUE` and
+#'   `correct = TRUE`), includes the corrected pedigree in the `.zip` archive.
 #'
-#' @return A named list of data.frames with offending rows (including row numbers):
-#' * `exact_duplicates`
-#' * `repeated_ids_diff`
-#' * `inconsistent_sex_roles`
-#' * `missing_parents`
-#' * `dependencies`
+#' @return An invisible named list of data frames describing detected issues:
+#'   * `exact_duplicates`: exact duplicate rows.
+#'   * `repeated_ids_diff`: rows where the same `id` has conflicting parents.
+#'   * `inconsistent_sex_roles`: rows involving IDs used as both parents.
+#'   * `missing_parents`: rows referencing parent IDs absent from `id`.
+#'   * `dependencies`: rows whose `id` is involved in a pedigree cycle.
 #'
-#' @importFrom dplyr %>% group_by filter ungroup distinct mutate summarize first bind_rows n_distinct n select
-#' @importFrom janitor clean_names
+#' @importFrom dplyr %>% group_by filter ungroup distinct mutate summarize first bind_rows n_distinct n select row_number
 #' @importFrom stats setNames
-#' @importFrom utils read.table write.table zip
+#' @importFrom utils read.table write.table zip capture.output
 #' @importFrom tools file_path_sans_ext
 #' @export
 check_ped <- function(ped.file,
@@ -42,40 +40,61 @@ check_ped <- function(ped.file,
                       correct            = TRUE,
                       save_zip           = FALSE,
                       save_corrected_zip = FALSE) {
-
   #### setup ####
   if (!is.null(seed)) set.seed(seed)
-  data <- utils::read.table(ped.file, header = TRUE) %>%
-    janitor::clean_names() %>%
+  data <- utils::read.table(ped.file, header = TRUE)
+  # ── Column alias remapping ──────────────────────────────────────────────────
+  # Accept common alternative column names and silently remap them so the rest
+  # of the function always works with the canonical names.
+  col_aliases <- list(
+    id            = c("id", "ID", "animal", "Animal", "ind", "Ind"),
+    male_parent   = c("male_parent", "sire",   "Sire",   "father", "Father", "pat", "Pat"),
+    female_parent = c("female_parent", "dam",  "Dam",    "mother", "Mother", "mat", "Mat")
+  )
+  for (canonical in names(col_aliases)) {
+    if (!canonical %in% colnames(data)) {
+      match <- intersect(col_aliases[[canonical]], colnames(data))
+      if (length(match) > 0) {
+        colnames(data)[colnames(data) == match[1]] <- canonical
+      }
+    }
+  }
+  # Validate required column names before any processing
+  required_cols <- c("id", "male_parent", "female_parent")
+  missing_cols  <- setdiff(required_cols, colnames(data))
+  if (length(missing_cols) > 0) {
+    stop(
+      "Input file is missing required column(s): ",
+      paste(missing_cols, collapse = ", "),
+      ".\nExpected columns: id, male_parent, female_parent."
+    )
+  }
+  data <- data %>%
     mutate(
       id            = as.character(id),
       male_parent   = as.character(male_parent),
       female_parent = as.character(female_parent)
     )
   # Add row numbers before any processing
-  data <- data %>% mutate(row_number = row_number(), .before = id)
+  data   <- data %>% mutate(row_number = row_number(), .before = id)
   errors <- list()
-
   #### check 1: exact duplicates ####
   exact_duplicates <- data[
     duplicated(data %>% select(-row_number)) |
       duplicated(data %>% select(-row_number), fromLast = TRUE),
   ]
-
   #### check 2: repeated IDs with conflicting male_parent/female_parent ####
   repeated_ids_diff <- data %>%
     group_by(id) %>%
     filter(n() > 1) %>%
     filter(n_distinct(male_parent) > 1 | n_distinct(female_parent) > 1) %>%
     ungroup()
-
   #### check 3: inconsistent parent sex roles ####
   male_ids   <- unique(data$male_parent[data$male_parent   != "0"])
   female_ids <- unique(data$female_parent[data$female_parent != "0"])
   messy_ids  <- intersect(male_ids, female_ids)
   inconsistent_sex_roles <- data %>%
     filter(male_parent %in% messy_ids | female_parent %in% messy_ids)
-
   #### check 4: missing parents ####
   all_ids     <- unique(data$id)
   ref_ids     <- unique(c(data$male_parent, data$female_parent))
@@ -83,15 +102,24 @@ check_ped <- function(ped.file,
   missing_ids <- setdiff(ref_ids, all_ids)
   missing_parents <- data %>%
     filter(male_parent %in% missing_ids | female_parent %in% missing_ids)
-  # For the corrected pedigree, build founder rows for truly absent parents
-  missing_founders <- data.frame(
-    row_number    = NA_integer_,
-    id            = missing_ids,
-    male_parent   = "0",
-    female_parent = "0",
-    stringsAsFactors = FALSE
-  )
-
+  # Only build founder rows if there are actually missing parents to add
+  if (length(missing_ids) > 0) {
+    missing_founders <- data.frame(
+      row_number    = NA_integer_,
+      id            = missing_ids,
+      male_parent   = "0",
+      female_parent = "0",
+      stringsAsFactors = FALSE
+    )
+  } else {
+    missing_founders <- data.frame(
+      row_number    = integer(0),
+      id            = character(0),
+      male_parent   = character(0),
+      female_parent = character(0),
+      stringsAsFactors = FALSE
+    )
+  }
   #### check 5: dependencies (cycles) ####
   detect_all_cycles <- function(data) {
     adj_list <- lapply(data$id, function(x) {
@@ -128,7 +156,6 @@ check_ped <- function(ped.file,
     }
     return(all_cycles)
   }
-
   cycles    <- detect_all_cycles(data)
   cycle_ids <- character(0)
   if (length(cycles) > 0) {
@@ -145,7 +172,6 @@ check_ped <- function(ped.file,
     dependencies <- data.frame(Dependency = unique(unlist(errors)),
                                stringsAsFactors = FALSE)
   }
-
   #### compile report ####
   input_ped_report <- list(
     exact_duplicates       = exact_duplicates,
@@ -154,7 +180,6 @@ check_ped <- function(ped.file,
     missing_parents        = missing_parents,
     dependencies           = dependencies
   )
-
   #### build corrected pedigree (no row_number column) ####
   if (correct) {
     corrected <- data %>% select(-row_number)
@@ -178,13 +203,11 @@ check_ped <- function(ped.file,
       )
     }
   }
-
   #### file names ####
   file_base      <- tools::file_path_sans_ext(basename(ped.file))
   corrected_name <- paste0(file_base, "_corrected")
   report_name    <- paste0(file_base, "_report")
   zip_name       <- paste0(file_base, "_report.zip")
-
   #### output ####
   if (verbose) {
     cat("\n=== Pedigree Quality Check Report ===\n")
@@ -199,32 +222,20 @@ check_ped <- function(ped.file,
     cat("\n--- Cycles / Dependencies ---\n")
     if (nrow(dependencies) > 0) print(dependencies) else cat("None found.\n")
     if (correct) {
-      cat(paste0("\nSave corrected pedigree as `", corrected_name, "`? (y/n): "))
-      ans <- tolower(trimws(readline()))
-      if (ans == "y") {
-        assign(corrected_name, corrected, envir = .GlobalEnv)
-        assign(report_name, input_ped_report, envir = .GlobalEnv)
-        cat(paste0("Saved `", corrected_name, "` and `", report_name, "`.\n"))
-      } else {
-        cat("No corrected pedigree saved.\n")
-        assign(report_name, input_ped_report, envir = .GlobalEnv)
-      }
+      cat(paste0("\`correct = TRUE`: saving corrected pedigree as `", corrected_name, "`.\n"))
     } else {
-      cat("\n`correct = FALSE`: no changes made to the pedigree.\n")
-      assign(report_name, input_ped_report, envir = .GlobalEnv)
+      cat("\`correct = FALSE`: no changes made to the pedigree.\n")
     }
-  } else {
-    # Silent mode: always save report; save corrected only if correct = TRUE
-    if (correct) assign(corrected_name, corrected, envir = .GlobalEnv)
-    assign(report_name, input_ped_report, envir = .GlobalEnv)
   }
-
+  # Always save report to global environment
+  assign(report_name, input_ped_report, envir = .GlobalEnv)
+  # Save corrected pedigree if correct = TRUE
+  if (correct) assign(corrected_name, corrected, envir = .GlobalEnv)
   #### zip export ####
   if (save_zip) {
     tmp_dir   <- tempfile()
     dir.create(tmp_dir)
     zip_files <- character(0)
-
     # Section headers matching the console output labels
     section_labels <- c(
       exact_duplicates       = "Exact Duplicates",
@@ -233,19 +244,15 @@ check_ped <- function(ped.file,
       missing_parents        = "Missing Parents (rows that reference them)",
       dependencies           = "Cycles / Dependencies"
     )
-
     for (component in names(section_labels)) {
       df        <- input_ped_report[[component]]
       file_path <- file.path(tmp_dir, paste0(file_base, "_", component, ".txt"))
-
       if (nrow(df) == 0) {
-        # Write header + "None found." for clean empty reports
         writeLines(
           c(paste0("--- ", section_labels[[component]], " ---"), "None found."),
           con = file_path
         )
       } else {
-        # Write header, then column names, then data — avoids col.names warning
         file_con <- file(file_path, open = "wt")
         writeLines(paste0("--- ", section_labels[[component]], " ---"), con = file_con)
         writeLines(paste(colnames(df), collapse = "\t"), con = file_con)
@@ -253,24 +260,19 @@ check_ped <- function(ped.file,
         utils::write.table(df, file = file_path, sep = "\t", row.names = FALSE,
                            quote = FALSE, append = TRUE, col.names = FALSE)
       }
-
       zip_files <- c(zip_files, file_path)
     }
-
-    # Optionally include corrected pedigree in zip
     if (save_corrected_zip && correct) {
       corrected_path <- file.path(tmp_dir, paste0(corrected_name, ".txt"))
       utils::write.table(corrected, file = corrected_path, sep = "\t",
                          row.names = FALSE, quote = FALSE)
       zip_files <- c(zip_files, corrected_path)
     }
-
-    # Bundle all files into zip in the working directory
     zip_path <- file.path(getwd(), zip_name)
-    utils::zip(zipfile = zip_path, files = zip_files, flags = "-j")
+    invisible(capture.output(
+      utils::zip(zipfile = zip_path, files = zip_files, flags = "-j")
+    ))
     if (verbose) cat(paste0("\nZip archive saved to: ", zip_path, "\n"))
   }
-
   invisible(input_ped_report)
 }
-
diff --git a/man/check_ped.Rd b/man/check_ped.Rd
index 0d1df79..bbae7c3 100644
--- a/man/check_ped.Rd
+++ b/man/check_ped.Rd
@@ -2,48 +2,54 @@
 % Please edit documentation in R/check_ped.R
 \name{check_ped}
 \alias{check_ped}
-\title{Check a pedigree file for accuracy and report/correct common errors}
+\title{Check a pedigree file for structural issues and optionally apply corrections}
 \usage{
-check_ped(ped.file, seed = NULL, verbose = TRUE, correct = TRUE)
+check_ped(
+  ped.file,
+  seed = NULL,
+  verbose = TRUE,
+  correct = TRUE,
+  save_zip = FALSE,
+  save_corrected_zip = FALSE
+)
 }
 \arguments{
 \item{ped.file}{Path to the pedigree text file.}
 
-\item{seed}{Optional seed for reproducibility.}
+\item{seed}{Optional integer seed for reproducibility.}
 
-\item{verbose}{Logical. If TRUE (default), prints findings and prompts for saving.}
+\item{verbose}{Logical. If \code{TRUE} (default), prints findings to the console.}
 
-\item{correct}{Logical. If TRUE (default), saves a corrected pedigree. If FALSE, only reports issues.}
+\item{correct}{Logical. If \code{TRUE} (default), builds and assigns a corrected
+pedigree to the global environment. If \code{FALSE}, issues are only reported.}
+
+\item{save_zip}{Logical. If \code{TRUE}, bundles report components into a \code{.zip}
+archive in the working directory.}
+
+\item{save_corrected_zip}{Logical. If \code{TRUE} (requires \code{save_zip = TRUE} and
+\code{correct = TRUE}), includes the corrected pedigree in the \code{.zip} archive.}
 }
 \value{
-A named list of data.frames with offending rows (including row numbers):
+An invisible named list of data frames describing detected issues:
 \itemize{
-\item \code{exact_duplicates}
-\item \code{repeated_ids_diff}
-\item \code{inconsistent_sex_roles}
-\item \code{missing_parents}
-\item \code{dependencies}
+\item \code{exact_duplicates}: exact duplicate rows.
+\item \code{repeated_ids_diff}: rows where the same \code{id} has conflicting parents.
+\item \code{inconsistent_sex_roles}: rows involving IDs used as both parents.
+\item \code{missing_parents}: rows referencing parent IDs absent from \code{id}.
+\item \code{dependencies}: rows whose \code{id} is involved in a pedigree cycle.
 }
 }
 \description{
-\code{check_ped} reads a 3-column pedigree file (tab-separated, columns labeled \code{id}, \code{sire}, \code{dam} in any order)
-and performs quality checks, optionally correcting or flagging errors.
+Reads a tab-separated pedigree file with columns \code{id}, \code{male_parent}, and
+\code{female_parent} and checks for five classes of issues: missing required
+columns, exact duplicate rows, repeated IDs with conflicting parent
+assignments, inconsistent parent sex roles, and pedigree cycles. The input
+file is never modified. A report list is always assigned to the global
+environment; when \code{correct = TRUE}, a corrected pedigree is assigned as well.
 }
 \details{
-The function checks for:
-\itemize{
-\item Exact duplicate rows and removes them (keeping one copy)
-\item IDs that appear more than once with conflicting sire/dam assignments (sets sire/dam to "0")
-\item IDs that appear in both sire and dam columns
-\item Missing parents (IDs referenced as sire/dam but not in \code{id} column), adds them with sire/dam = "0"
-\item Direct and indirect pedigree dependencies (cycles), such as a parent being its own descendant
-}
-
-After an initial run to clean exact duplicates and repeated IDs, you can run the function again to detect cycles more accurately.
-
-The function does \strong{not} overwrite the input file. Instead, it prints findings to the console and optionally saves:
-\itemize{
-\item Corrected pedigree as a dataframe in the global environment
-\item A report listing all detected issues
-}
+When \code{correct = TRUE}, the function removes exact duplicates, collapses
+conflicting repeated IDs (replacing the inconsistent parent field with \code{"0"}),
+and appends missing parents as founder rows. Inconsistent sex roles and
+pedigree cycles are reported but not corrected and must be resolved manually.
 }

From f798362faad1ccc1df8c3630b5d3387c65d3f74e Mon Sep 17 00:00:00 2001
From: "josue.chinchilla" <josue.chinchilla@IFASadmins-MacBook-Pro.local>
Date: Mon, 27 Apr 2026 14:07:03 -0400
Subject: [PATCH 06/12] updated parentage functions to work allow user to
 display NA or not, and to plot results.

---
 R/find_parentage.R                      | 320 +++++++++-------
 R/validate_pedigree.R                   | 254 ++++++------
 man/find_parentage.Rd                   | 155 +++-----
 man/validate_pedigree.Rd                |  40 +-
 tests/testthat/test-find_parentage.R    | 196 +++++++---
 tests/testthat/test-validate_pedigree.R | 490 ++++++++++++++----------
 6 files changed, 850 insertions(+), 605 deletions(-)

diff --git a/R/find_parentage.R b/R/find_parentage.R
index 728822a..f53a323 100644
--- a/R/find_parentage.R
+++ b/R/find_parentage.R
@@ -7,104 +7,62 @@
 #'   Must include an 'ID' column followed by marker columns coded as 0, 1, 2
 #'   (allele dosage).
 #' @param parents_file Path to a TSV/CSV/TXT file listing candidate parent IDs.
-#'   Must include an 'ID' column. An optional 'Sex' column with values
-#'   'M' (male parent), 'F' (female parent), or 'A' (ambiguous) determines
-#'   which parents are tested for each role. If absent, all parents are treated
-#'   as ambiguous.
-#' @param progeny_file Path to a TSV/CSV/TXT file listing progeny IDs to assign.
+#'   Must include an 'ID' column. An optional 'Sex' column ('M', 'F', or 'A')
+#'   determines parent roles. If absent, all parents are treated as ambiguous.
+#' @param progeny_file Path to a TSV/CSV/TXT file listing progeny IDs.
 #'   Must include an 'ID' column.
-#' @param method Character. Parentage assignment method. One of:
-#'   \itemize{
-#'     \item \code{"best_male_parent"}  — finds the best male parent for each
-#'       progeny using homozygous mismatch rate.
-#'     \item \code{"best_female_parent"} — finds the best female parent for each
-#'       progeny using homozygous mismatch rate.
-#'     \item \code{"best_match"} — finds the single best parent (either sex)
-#'       using homozygous mismatch rate.
-#'     \item \code{"best_pair"}  — finds the best male-female parent pair for
-#'       each progeny using full Mendelian error rate (default).
-#'   }
-#' @param min_markers Integer. Minimum number of non-missing markers required
-#'   to report a parentage assignment. Progeny-parent comparisons with fewer
-#'   markers are flagged as \code{LOW_MARKERS} and no assignment is made
-#'   (default: \code{10}).
-#' @param error_threshold Numeric. Maximum mismatch percentage to report a
-#'   parentage assignment as confident. Assignments above this threshold are
-#'   flagged as \code{HIGH_ERROR} in the \code{Assignment_Status} column
-#'   (default: \code{5.0}). Must be between 0 and 100.
-#' @param show_ties Logical. If \code{TRUE}, all tied best pairs (after
-#'   tie-breaking by maximum markers tested) are reported as additional columns
-#'   (\code{Male_Parent_1}, \code{Male_Parent_2}, etc.) when
-#'   \code{method = "best_pair"}. The base columns (\code{Male_Parent},
-#'   \code{Female_Parent}, etc.) are always populated with the top result.
-#'   If \code{FALSE}, only one tied pair is reported with a warning.
-#'   Default is \code{TRUE}.
-#' @param allow_selfing Logical. If \code{FALSE}, male-female parent pairs where
-#'   both IDs are identical are excluded when \code{method = "best_pair"}.
-#'   Default is \code{TRUE}.
-#' @param verbose Logical. If \code{TRUE}, prints progress messages, summary
-#'   statistics, and the results table to the console. Default is \code{TRUE}.
+#' @param method Character. One of \code{"best_male_parent"},
+#'   \code{"best_female_parent"}, \code{"best_match"}, or
+#'   \code{"best_pair"} (default). See Details.
+#' @param min_markers Integer. Minimum markers required for a valid assignment.
+#'   Fewer markers flags \code{LOW_MARKERS} (default: \code{10}).
+#' @param error_threshold Numeric. Maximum mismatch percentage for a confident
+#'   assignment. Exceeded values flag \code{HIGH_ERROR} (default: \code{5.0}).
+#'   Must be between 0 and 100.
+#' @param show_ties Logical. If \code{TRUE}, tied best pairs are reported as
+#'   additional suffix columns (e.g. \code{Male_Parent_2}) when
+#'   \code{method = "best_pair"}. Default is \code{TRUE}.
+#' @param allow_selfing Logical. If \code{FALSE}, pairs with identical male and
+#'   female parent IDs are excluded. Default is \code{TRUE}.
+#' @param verbose Logical. If \code{TRUE}, prints progress, summary, and
+#'   results to the console. Default is \code{TRUE}.
 #' @param write_txt Logical. If \code{TRUE}, writes results to
-#'   \code{parentage_results_dt.txt} in the working directory. Default is
-#'   \code{TRUE}.
+#'   \code{parentage_testing_results.txt}. Default is \code{TRUE}.
+#' @param na_string Character. String for missing values in output file.
+#'   Use \code{"NA"} or \code{""} (default: \code{"NA"}).
+#' @param plot_results Logical. If \code{TRUE}, plots the Mendelian error
+#'   distribution colored by assignment status. Requires \code{ggplot2}.
+#'   Plot is saved as \code{.jpg} when \code{write_txt = TRUE}.
+#'   Default is \code{TRUE}.
 #'
-#' @return A \code{data.table} with one row per progeny. Columns depend on the
-#'   method used:
-#'   \itemize{
-#'     \item \code{best_male_parent} / \code{best_female_parent} / \code{best_match}:
-#'       \code{Progeny}, \code{Best_Match}, \code{Mendelian_Error_Pct},
-#'       \code{Markers_Tested}, \code{Assignment_Status}.
-#'     \item \code{best_pair} (no ties after tie-breaking): \code{Progeny},
-#'       \code{Male_Parent}, \code{Female_Parent}, \code{Mendelian_Error_Pct},
-#'       \code{Markers_Tested}, \code{Assignment_Status}.
-#'     \item \code{best_pair} (ties remain after tie-breaking,
-#'       \code{show_ties = TRUE}): base columns are always populated with the
-#'       top result, plus suffix columns \code{Male_Parent_1},
-#'       \code{Female_Parent_1}, etc. for each tied pair.
-#'   }
-#'   \code{Assignment_Status} is one of \code{PASS}, \code{HIGH_ERROR}, or
-#'   \code{LOW_MARKERS}. Returned invisibly when \code{verbose = TRUE}.
+#' @return A \code{data.table} with one row per progeny. Base columns are
+#'   \code{Progeny}, \code{Male_Parent} (or \code{Best_Match}),
+#'   \code{Female_Parent}, \code{Mendelian_Error_Pct}, \code{Markers_Tested},
+#'   and \code{Assignment_Status} (\code{PASS}, \code{HIGH_ERROR}, or
+#'   \code{LOW_MARKERS}). Tie suffix columns (\code{Male_Parent_2}, etc.) are
+#'   appended when \code{show_ties = TRUE} and ties remain after tie-breaking.
+#'   Returned invisibly when \code{verbose = TRUE}.
 #'
 #' @details
-#' A homozygous-only genotype matrix is pre-computed once at startup and shared
-#' across all methods that require it, avoiding redundant computation.
-#'
-#' For \code{"best_male_parent"}, \code{"best_female_parent"}, and
-#' \code{"best_match"}, only homozygous markers (coded 0 or 2) are used for
-#' comparison; heterozygous markers (coded 1) are set to \code{NA}. This
-#' reduces false mismatches caused by phase ambiguity.
-#'
-#' For \code{"best_pair"}, all markers are used and full Mendelian inheritance
-#' rules are applied. Mismatch rates and comparison counts are computed across
-#' all progeny simultaneously using vectorised \code{vapply} calls, producing
-#' \code{n_pairs x n_progeny} matrices and giving substantial speed gains for
-#' large datasets. Both matrices are explicitly coerced to matrix form to
-#' handle the edge case of a single parent pair correctly.
-#'
-#' When multiple pairs share the lowest Mendelian error rate, ties are broken
-#' by selecting the pair(s) with the greatest number of markers tested. If ties
-#' still remain after this step, all remaining tied pairs are reported when
-#' \code{show_ties = TRUE}.
+#' A homozygous-only matrix is pre-computed once and shared across
+#' \code{"best_male_parent"}, \code{"best_female_parent"}, and
+#' \code{"best_match"}, where heterozygous markers (coded 1) are set to
+#' \code{NA} to reduce false mismatches from phase ambiguity.
 #'
-#' The base columns (\code{Male_Parent}, \code{Female_Parent},
-#' \code{Mendelian_Error_Pct}, \code{Markers_Tested}) are always populated with
-#' the top result, ensuring no missing values in these columns regardless of
-#' tie status.
+#' For \code{"best_pair"}, full Mendelian inheritance rules are applied across
+#' all marker and pair combinations using vectorised \code{vapply} calls,
+#' producing \code{n_pairs x n_progeny} mismatch and comparison matrices.
 #'
-#' Output rows are pre-allocated as a \code{data.table} and filled by reference
-#' using \code{data.table::set()}, avoiding repeated memory allocation during
-#' the results-building step.
+#' Ties on mismatch rate are broken by maximum markers tested. Remaining ties
+#' are reported as suffix columns when \code{show_ties = TRUE}. Base columns
+#' are always populated with the top result.
 #'
-#' Individuals in \code{parents_file} or \code{progeny_file} that are absent
-#' from \code{genotypes_file} are removed with a warning.
-#'
-#' Progeny with fewer non-missing markers than \code{min_markers} are flagged
-#' \code{LOW_MARKERS} and no parent assignment is reported. Progeny whose best
-#' match exceeds \code{error_threshold} are flagged \code{HIGH_ERROR}.
+#' Parents or progeny absent from \code{genotypes_file} are removed with a
+#' warning. Progeny below \code{min_markers} are flagged \code{LOW_MARKERS};
+#' those exceeding \code{error_threshold} are flagged \code{HIGH_ERROR}.
 #'
 #' @examples
 #' \dontrun{
-#' # Assign best male-female parent pair to each progeny
 #' results <- find_parentage(
 #'   genotypes_file  = "genotypes.txt",
 #'   parents_file    = "parents.txt",
@@ -113,10 +71,11 @@
 #'   min_markers     = 50,
 #'   error_threshold = 5.0,
 #'   show_ties       = TRUE,
-#'   allow_selfing   = FALSE
+#'   allow_selfing   = FALSE,
+#'   na_string       = "NA",
+#'   plot_results    = TRUE
 #' )
 #'
-#' # Find best individual parent match (ignoring sex)
 #' results <- find_parentage(
 #'   genotypes_file  = "genotypes.txt",
 #'   parents_file    = "parents.txt",
@@ -136,8 +95,10 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
                            show_ties       = TRUE,
                            allow_selfing   = TRUE,
                            verbose         = TRUE,
-                           write_txt       = TRUE) {
-
+                           write_txt       = TRUE,
+                           na_string       = "NA",
+                           plot_results    = TRUE) {
+  
   #### Input Validation and Data Loading ####
   allowed_methods <- c("best_male_parent", "best_female_parent", "best_match", "best_pair")
   if (!method %in% allowed_methods)
@@ -146,7 +107,9 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
     stop("min_markers must be a positive integer.")
   if (error_threshold < 0 || error_threshold > 100)
     stop("error_threshold must be between 0 and 100.")
-
+  if (!na_string %in% c("NA", ""))
+    stop("na_string must be either 'NA' or ''.")
+  
   tryCatch({
     genos              <- data.table::fread(genotypes_file, sep = "auto")
     all_parents        <- data.table::fread(parents_file,   sep = "auto")
@@ -154,7 +117,7 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
   }, error = function(e) {
     stop("Error reading input files. Ensure paths are correct and files are TXT/TSV/CSV.")
   })
-
+  
   valid_ids       <- genos$ID
   removed_parents <- base::setdiff(all_parents$ID, valid_ids)
   if (base::length(removed_parents) > 0) {
@@ -162,48 +125,50 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
             paste(removed_parents, collapse = ", "), call. = FALSE)
     all_parents <- all_parents[ID %in% valid_ids]
   }
-
+  
   removed_progeny <- base::setdiff(progeny_candidates$ID, valid_ids)
   if (base::length(removed_progeny) > 0) {
     warning("The following progeny IDs were not in the genotype file and will not be analyzed: ",
             paste(removed_progeny, collapse = ", "), call. = FALSE)
     progeny_candidates <- progeny_candidates[ID %in% valid_ids]
   }
-
+  
   if (!"Sex" %in% base::colnames(all_parents)) {
     warning("No 'Sex' column in parents file. All parents treated as ambiguous ('A').")
     all_parents[, Sex := "A"]
   }
-
+  
   all_parents[, Sex := base::toupper(Sex)]
   male_parent_candidates   <- all_parents[Sex %in% c("M", "A", "NA"), .SD]
   female_parent_candidates <- all_parents[Sex %in% c("F", "A", "NA")]
-
+  
   if (base::nrow(male_parent_candidates) == 0 && method %in% c("best_male_parent", "best_pair"))
     warning("No valid male parent candidates remain after filtering.", call. = FALSE)
   if (base::nrow(female_parent_candidates) == 0 && method %in% c("best_female_parent", "best_pair"))
     warning("No valid female parent candidates remain after filtering.", call. = FALSE)
   if (base::nrow(progeny_candidates) == 0)
     stop("No valid progeny candidates remain after filtering.")
-
+  
   #### Pre-compute genotype matrices once (shared across all methods) ####
+  
   # Full genotype matrix used by best_pair
   genos_mat <- base::as.matrix(genos, rownames = "ID")
-
+  
   # Homozygous-only matrix (het markers set to NA) used by hom methods
   genos_hom   <- data.table::copy(genos)
   marker_cols <- base::setdiff(base::names(genos_hom), "ID")
   for (col in marker_cols)
     genos_hom[base::get(col) == 1, (col) := NA_integer_]
   genos_hom_mat <- base::as.matrix(genos_hom, rownames = "ID")
-
+  
   #### Assignment_Status from markers and error rate ####
+  
   # Returns LOW_MARKERS, HIGH_ERROR, or PASS
   assign_status <- function(markers, error_pct) {
     base::ifelse(markers < min_markers, "LOW_MARKERS",
                  base::ifelse(error_pct > error_threshold, "HIGH_ERROR", "PASS"))
   }
-
+  
   #### Logic for Homozygous Matching Methods ####
   if (method %in% c("best_male_parent", "best_female_parent", "best_match")) {
     parent_ids <- base::switch(method,
@@ -211,11 +176,11 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
                                "best_female_parent" = female_parent_candidates$ID,
                                "best_match"         = base::union(male_parent_candidates$ID,
                                                                   female_parent_candidates$ID))
-
+    
     # Subset pre-computed homozygous matrix for relevant parents and progeny
-    parent_genos  <- genos_hom_mat[base::rownames(genos_hom_mat) %in% parent_ids,           , drop = FALSE]
-    progeny_genos <- genos_hom_mat[base::rownames(genos_hom_mat) %in% progeny_candidates$ID, , drop = FALSE]
-
+    parent_genos  <- genos_hom_mat[base::rownames(genos_hom_mat) %in% parent_ids,            , drop = FALSE]
+    progeny_genos <- genos_hom_mat[base::rownames(genos_hom_mat) %in% progeny_candidates$ID,  , drop = FALSE]
+    
     # Pre-allocate results data.table; fill by reference with set()
     n_progeny  <- base::nrow(progeny_genos)
     results_dt <- data.table::data.table(
@@ -225,26 +190,26 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
       Markers_Tested      = NA_integer_,
       Assignment_Status   = NA_character_
     )
-
+    
     for (i in base::seq_len(n_progeny)) {
       progeny_vec      <- progeny_genos[i, ]
       mismatches       <- base::rowSums(parent_genos != progeny_vec, na.rm = TRUE)
       comparisons      <- base::rowSums(!base::is.na(parent_genos) & !base::is.na(progeny_vec))
       percent_mismatch <- (mismatches / comparisons) * 100
       percent_mismatch[base::is.nan(percent_mismatch)] <- NA
-
+      
       best_idx <- base::which.min(percent_mismatch)
-
+      
       # No candidate found — flag LOW_MARKERS and continue
       if (base::length(best_idx) == 0) {
         data.table::set(results_dt, i, "Markers_Tested",    0L)
         data.table::set(results_dt, i, "Assignment_Status", "LOW_MARKERS")
         next
       }
-
+      
       best_markers <- comparisons[best_idx]
       best_error   <- base::round(percent_mismatch[best_idx], 2)
-
+      
       data.table::set(results_dt, i, "Best_Match",          base::rownames(parent_genos)[best_idx])
       data.table::set(results_dt, i, "Mendelian_Error_Pct", best_error)
       data.table::set(results_dt, i, "Markers_Tested",      base::as.integer(best_markers))
@@ -252,7 +217,7 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
     }
     final_df <- results_dt
   }
-
+  
   #### Logic for Best Pair Method ####
   if (method == "best_pair") {
     parent_pairs <- data.table::CJ(Male_Parent   = male_parent_candidates$ID,
@@ -262,18 +227,17 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
       if (verbose) base::cat("Selfing is disallowed. Pairs with identical parents are removed.\n")
     }
     if (base::nrow(parent_pairs) == 0) stop("No valid parent pairs to test.")
-
+    
     # Pre-extract parent genotype row blocks for vectorised operations
     male_parent_genos_mat   <- genos_mat[parent_pairs$Male_Parent,   , drop = FALSE]
     female_parent_genos_mat <- genos_mat[parent_pairs$Female_Parent, , drop = FALSE]
-
+    
     # Subset full genotype matrix to progeny only
     progeny_ids <- progeny_candidates$ID
     progeny_mat <- genos_mat[progeny_ids, , drop = FALSE]
     n_progeny   <- base::nrow(progeny_mat)
     n_pairs     <- base::nrow(parent_pairs)
-
-    # Vectorised mismatch computation across ALL progeny at once
+    
     # Wrapped in matrix() to handle the n_pairs = 1 edge case
     mismatch_mat <- base::matrix(
       base::vapply(base::seq_len(n_progeny), function(j) {
@@ -292,8 +256,7 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
       }, numeric(n_pairs)),
       nrow = n_pairs, ncol = n_progeny
     )
-
-    # Vectorised comparison count across ALL progeny at once
+    
     # Wrapped in matrix() to handle the n_pairs = 1 edge case
     comparison_mat <- base::matrix(
       base::vapply(base::seq_len(n_progeny), function(j) {
@@ -304,11 +267,11 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
       }, numeric(n_pairs)),
       nrow = n_pairs, ncol = n_progeny
     )
-
+    
     # Percent mismatch matrix: n_pairs x n_progeny
     pct_mismatch_mat <- (mismatch_mat / comparison_mat) * 100
     pct_mismatch_mat[base::is.nan(pct_mismatch_mat)] <- NA
-
+    
     # Pre-allocate base results data.table; tie columns added dynamically
     results_dt <- data.table::data.table(
       Progeny             = progeny_ids,
@@ -318,7 +281,7 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
       Markers_Tested      = NA_integer_,
       Assignment_Status   = NA_character_
     )
-
+    
     # Per-progeny result extraction from pre-computed matrices
     results_list <- base::vector("list", n_progeny)
     for (j in base::seq_len(n_progeny)) {
@@ -326,48 +289,48 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
       percent_mismatch <- pct_mismatch_mat[, j]
       comparisons      <- comparison_mat[,  j]
       min_mismatch_val <- base::min(percent_mismatch, na.rm = TRUE)
-
+      
       # No markers overlap at all — flag LOW_MARKERS
       if (base::is.infinite(min_mismatch_val)) {
         data.table::set(results_dt, j, "Markers_Tested",    0L)
         data.table::set(results_dt, j, "Assignment_Status", "LOW_MARKERS")
         next
       }
-
+      
       best_indices <- base::which(percent_mismatch == min_mismatch_val)
-
+      
       # Tie-break: prefer pair(s) with the most markers tested
       if (base::length(best_indices) > 1) {
         best_markers_per_pair <- comparisons[best_indices]
         max_markers           <- base::max(best_markers_per_pair)
         best_indices          <- best_indices[best_markers_per_pair == max_markers]
       }
-
+      
       best_pairs   <- parent_pairs[best_indices]
       best_markers <- comparisons[best_indices[1]]
       best_error   <- base::round(min_mismatch_val, 2)
       a_status     <- assign_status(best_markers, best_error)
-
+      
       if (!show_ties && base::nrow(best_pairs) > 1) {
         warning("Progeny '", prog_id, "' has ", base::nrow(best_pairs),
                 " tied best pairs. Only one is reported as show_ties=FALSE.", call. = FALSE)
       }
-
+      
       num_to_report <- base::min(base::nrow(best_pairs),
                                  if (show_ties) base::nrow(best_pairs) else 1)
-
+      
       # Always populate base columns with the top result
       data.table::set(results_dt, j, "Male_Parent",         best_pairs$Male_Parent[1])
       data.table::set(results_dt, j, "Female_Parent",       best_pairs$Female_Parent[1])
       data.table::set(results_dt, j, "Mendelian_Error_Pct", base::sprintf("%.2f", min_mismatch_val))
       data.table::set(results_dt, j, "Markers_Tested",      base::as.integer(best_markers))
       data.table::set(results_dt, j, "Assignment_Status",   a_status)
-
+      
       # If ties remain after tie-breaking and show_ties is TRUE,
       # store tie details for later column binding
       if (show_ties && num_to_report > 1) {
         tie_row <- base::list(Progeny = prog_id)
-        for (k in base::seq_len(num_to_report)) {
+        for (k in base::seq(2, num_to_report)) {
           tie_row[[base::paste0("Male_Parent_",         k)]] <- best_pairs$Male_Parent[k]
           tie_row[[base::paste0("Female_Parent_",       k)]] <- best_pairs$Female_Parent[k]
           tie_row[[base::paste0("Mendelian_Error_Pct_", k)]] <- min_mismatch_val
@@ -376,19 +339,24 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
         results_list[[j]] <- data.table::as.data.table(tie_row)
       }
     }
-
+    
     # Merge any tie suffix columns onto the pre-allocated base table
     tie_rows <- data.table::rbindlist(
       base::Filter(Negate(base::is.null), results_list),
-      fill = TRUE
+      fill      = TRUE,
+      use.names = TRUE
     )
     if (base::nrow(tie_rows) > 0) {
       final_df <- merge(results_dt, tie_rows, by = "Progeny", all.x = TRUE)
+      # Replace any empty strings with NA
+      for (col in base::names(final_df)) {
+        data.table::set(final_df, which(final_df[[col]] == ""), col, NA_character_)
+      }
     } else {
       final_df <- results_dt
     }
   }
-
+  
   #### Summary ####
   if (verbose) {
     total    <- base::nrow(final_df)
@@ -401,18 +369,98 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
     base::cat("Min markers threshold :", min_markers,     "\n")
     base::cat("Error threshold       :", error_threshold, "%\n\n")
   }
-
+  
   #### Output ####
   if (write_txt) {
-    output_filename <- "parentage_results_dt.txt"
+    output_filename <- "parentage_testing_results.txt"
     tryCatch({
-      data.table::fwrite(final_df, file = output_filename, sep = "\t", quote = FALSE)
+      data.table::fwrite(final_df, file = output_filename, sep = "\t", quote = FALSE,
+                         na = na_string)
       if (verbose) base::cat("Results successfully written to:", output_filename, "\n")
     }, error = function(e) {
       warning("Could not write results to file. Error: ", e$message, call. = FALSE)
     })
   }
-
+  
+  #### Plot Results ####
+  if (plot_results) {
+    if (!requireNamespace("ggplot2", quietly = TRUE)) {
+      warning("ggplot2 is required for plot_results = TRUE. Please install it.", call. = FALSE)
+    } else {
+      plot_df <- final_df[!is.na(final_df$Mendelian_Error_Pct)]
+      plot_df$Mendelian_Error_Pct <- base::as.numeric(plot_df$Mendelian_Error_Pct)
+      
+      plot_df$Plot_Status <- dplyr::case_when(
+        plot_df$Assignment_Status == "PASS"        ~ "PASS",
+        plot_df$Assignment_Status == "HIGH_ERROR"  ~ "HIGH_ERROR",
+        plot_df$Assignment_Status == "LOW_MARKERS" ~ "LOW_MARKERS",
+        TRUE                                       ~ "OTHER"
+      )
+      
+      n_total <- base::nrow(plot_df)
+      n_pass  <- base::sum(plot_df$Assignment_Status == "PASS",        na.rm = TRUE)
+      n_high  <- base::sum(plot_df$Assignment_Status == "HIGH_ERROR",  na.rm = TRUE)
+      n_low   <- base::sum(plot_df$Assignment_Status == "LOW_MARKERS", na.rm = TRUE)
+      
+      threshold_label <- base::paste0(
+        "Error Threshold: ", error_threshold, "%  |  ",
+        "Pass: ",        n_pass, "  |  ",
+        "High Error: ",  n_high, "  |  ",
+        "Low Markers: ", n_low
+      )
+      
+      p <- ggplot2::ggplot(
+        plot_df,
+        ggplot2::aes(x = Mendelian_Error_Pct, fill = Plot_Status)
+      ) +
+        ggplot2::geom_histogram(binwidth = 1, color = "white", alpha = 0.9) +
+        ggplot2::geom_vline(
+          xintercept = error_threshold,
+          linetype   = "dashed",
+          color      = "black",
+          linewidth  = 1
+        ) +
+        ggplot2::scale_x_continuous(breaks = seq(0, 100, by = 5)) +
+        ggplot2::scale_y_continuous(breaks = seq(0, 10000, by = 5)) +
+        ggplot2::scale_fill_manual(
+          values = c(
+            "PASS"        = "#339900",
+            "HIGH_ERROR"  = "#cc3333",
+            "LOW_MARKERS" = "#F1C40F",
+            "OTHER"       = "#BDC3C7"
+          ),
+          labels = c(
+            "PASS"        = "Pass",
+            "HIGH_ERROR"  = "High Error",
+            "LOW_MARKERS" = "Low Markers",
+            "OTHER"       = "Other"
+          )
+        ) +
+        ggplot2::labs(
+          title    = "Parentage Mendelian Error Distribution",
+          subtitle = base::paste0("Progeny Tested: ", n_total, "\n \n", threshold_label),
+          x        = "Mendelian Error (%)",
+          y        = "Number of Progeny",
+          fill     = "Status"
+        ) +
+        ggplot2::theme_classic(base_size = 13) +
+        ggplot2::theme(legend.position = "top")
+      
+      base::print(p)
+      
+      if (write_txt) {
+        plot_filename <- base::sub("\\.[^.]+$", ".jpg", "parentage_testing_results.txt")
+        tryCatch({
+          ggplot2::ggsave(plot_filename, plot = p,
+                          device = "jpeg", width = 10, height = 6, dpi = 300)
+          if (verbose) base::cat("Plot saved to:", plot_filename, "\n")
+        }, error = function(e) {
+          warning("Could not save plot. Error: ", e$message, call. = FALSE)
+        })
+      }
+    }
+  }
+  
   if (verbose) {
     base::cat("\n--- Parentage Assignment Results ---\n")
     base::print(final_df)
diff --git a/R/validate_pedigree.R b/R/validate_pedigree.R
index 5b366c9..a3526ce 100644
--- a/R/validate_pedigree.R
+++ b/R/validate_pedigree.R
@@ -23,57 +23,67 @@
 #'   parent-level evaluation (default: 2.0). Must be between 0 and 100.
 #' @param verbose Logical. If TRUE, prints progress messages, a summary
 #'   table, and results to the console (default: TRUE).
-#' @param write_txt Logical. If TRUE, writes validation results to
-#'   output_filename (default: TRUE).
-#' @param output_filename Character. Path/name of the output file
-#'   (default: "pedigree_validation_results.txt").
+#' @param write_results Logical. If TRUE, writes the validation results to
+#'   output_filename and saves the plot as a .jpg file (default: TRUE).
+#' @param output_filename Character. Path/name of the output results file
+#'   (default: "__validation_report.txt"). The plot will be saved using the
+#'   same base name with a .jpg extension.
+#' @param plot_results Logical. If TRUE, prints a histogram of Trio Mendelian
+#'   Error percentages with a threshold line (default: TRUE). If write_results
+#'   is also TRUE, the plot is additionally saved as a .jpg file.
+#' @param na_string Character. String for missing values in the output file.
+#'   Use \code{"NA"} or \code{""} (default: \code{"NA"}).
 #'
 #' @return A data.table (returned invisibly) with one row per trio and
 #'   the following columns:
 #'   \describe{
 #'     \item{ID}{Individual ID.}
-#'     \item{Male_Parent}{Declared male parent ID.}
-#'     \item{Female_Parent}{Declared female parent ID.}
-#'     \item{Mendelian_Error_Pct}{Trio-level Mendelian error percentage.}
-#'     \item{Markers_Tested}{Number of markers with non-missing genotypes.}
+#'     \item{Orig_Male_Parent}{Declared male parent ID.}
+#'     \item{Orig_Female_Parent}{Declared female parent ID.}
+#'     \item{Trio_Mendelian_Error_Pct}{Trio-level Mendelian error percentage.}
+#'     \item{Trio_Markers_Tested}{Number of markers with non-missing genotypes.}
 #'     \item{Status}{One of PASS, FAIL, LOW_MARKERS, NO_DATA, FOUNDERS,
 #'           MISSING_MALE_PARENT, MISSING_FEMALE_PARENT, MISSING_BOTH_PARENTS,
 #'           or NO_GENOTYPE_DATA.}
-#'     \item{Correction_Decision}{One of NONE, KEEP_BOTH,
+#'     \item{Recommended_Correction}{One of NONE, KEEP_BOTH,
 #'           REMOVE_MALE_PARENT, REMOVE_FEMALE_PARENT, REMOVE_BOTH,
 #'           LOW_MARKERS_KEEP_BOTH, LOW_MARKERS_REMOVE_MALE_PARENT,
 #'           LOW_MARKERS_REMOVE_FEMALE_PARENT, LOW_MARKERS_REMOVE_BOTH.}
 #'     \item{Male_Parent_Hom_Error_Pct}{Male parent homozygous-marker mismatch percentage.}
 #'     \item{Female_Parent_Hom_Error_Pct}{Female parent homozygous-marker mismatch percentage.}
-#'     \item{Best_Male_Parent}{Best-matching male parent candidate ID.}
-#'     \item{Best_Male_Parent_Error_Pct}{Homozygous mismatch percentage for the best male parent candidate.}
-#'     \item{Best_Female_Parent}{Best-matching female parent candidate ID.}
-#'     \item{Best_Female_Parent_Error_Pct}{Homozygous mismatch percentage for the best female parent candidate.}
+#'     \item{Best_Male_Candidate}{Best-matching male parent candidate ID.}
+#'     \item{Best_Male_Candidate_Error_Pct}{Homozygous mismatch percentage for the best male candidate.}
+#'     \item{Best_Female_Candidate}{Best-matching female parent candidate ID.}
+#'     \item{Best_Female_Candidate_Error_Pct}{Homozygous mismatch percentage for the best female candidate.}
 #'   }
 #' @export
-#' @importFrom data.table fread fwrite copy  data.table set rbindlist
+#' @importFrom data.table fread fwrite copy data.table set rbindlist
 validate_pedigree <- function(pedigree_file, genotypes_file,
                               founders_file = NULL,
                               trio_error_threshold = 5.0,
                               min_markers = 10,
                               single_parent_error_threshold = 2.0,
                               verbose = TRUE,
-                              write_txt = TRUE,
-                              output_filename = "pedigree_validation_results.txt") {
-
+                              write_results = TRUE,
+                              output_filename = "__validation_report.txt",
+                              plot_results = TRUE,
+                              na_string = "NA") { 
+  
   #### Input validation ####
   if (trio_error_threshold < 0 || trio_error_threshold > 100)
     stop("trio_error_threshold must be between 0 and 100")
   if (single_parent_error_threshold < 0 || single_parent_error_threshold > 100)
     stop("single_parent_error_threshold must be between 0 and 100")
-
+  if (!na_string %in% c("NA", ""))
+    stop("na_string must be either 'NA' or ''.")
+  
   tryCatch({
     pedigree <- data.table::fread(pedigree_file, sep = "auto", colClasses = "character")
     genos    <- data.table::fread(genotypes_file, sep = "auto")
   }, error = function(e) {
     stop("Error reading input files. Ensure paths are correct and files are TXT/TSV/CSV.")
   })
-
+  
   #### Check required columns ####
   required_ped_cols <- c("ID", "Male_Parent", "Female_Parent")
   missing_cols <- base::setdiff(required_ped_cols, base::names(pedigree))
@@ -82,12 +92,11 @@ validate_pedigree <- function(pedigree_file, genotypes_file,
          base::paste(missing_cols, collapse = ", "))
   if (!"ID" %in% base::names(genos))
     stop("Genotypes file must have an 'ID' column")
-
-  # Ensure parent columns are character for consistent "0" comparisons
+  
   pedigree[, Male_Parent   := as.character(Male_Parent)]
   pedigree[, Female_Parent := as.character(Female_Parent)]
   original_pedigree <- data.table::copy(pedigree)
-
+  
   #### Read founders list ####
   if (!is.null(founders_file)) {
     founders_raw <- tryCatch({
@@ -99,42 +108,36 @@ validate_pedigree <- function(pedigree_file, genotypes_file,
   } else {
     founder_ids <- character(0)
   }
-
+  
   #### Identify trios missing from the genotype file ####
   valid_ids <- as.character(genos$ID)
   has_geno <- pedigree[ID %in% valid_ids &
                          (Male_Parent %in% valid_ids   | Male_Parent == "0") &
                          (Female_Parent %in% valid_ids | Female_Parent == "0")]
-
   no_geno_rows <- pedigree[!(ID %in% valid_ids) |
                              (!(Male_Parent %in% valid_ids)   & Male_Parent != "0") |
                              (!(Female_Parent %in% valid_ids) & Female_Parent != "0")]
-
   if (base::nrow(no_geno_rows) > 0 && verbose)
     base::cat("Found", base::nrow(no_geno_rows),
               "trios with missing genotype data; flagged as NO_GENOTYPE_DATA.\n")
-
   pedigree <- has_geno
   if (base::nrow(pedigree) == 0)
     stop("No valid trios remain after filtering for genotype availability.")
-
+  
   #### Build genotype matrices ####
   genos_mat <- base::as.matrix(genos, rownames = "ID")
-
-  # Homozygous-only matrix (het markers set to NA)
   genos_hom   <- data.table::copy(genos)
   marker_cols <- base::setdiff(base::names(genos_hom), "ID")
   for (col in marker_cols)
     genos_hom[base::get(col) == 1, (col) := NA_integer_]
   genos_hom_mat <- base::as.matrix(genos_hom, rownames = "ID")
-
-  #### Helper: find best matching parent via homozygous mismatch ####
+  
+  #### Find best matching parent via homozygous mismatch ####
   find_best_parent <- function(prog_id, exclude_ids = base::character(0)) {
     candidates <- base::setdiff(base::rownames(genos_hom_mat),
                                 c(prog_id, exclude_ids))
     if (base::length(candidates) == 0)
       return(base::list(id = NA_character_, error_pct = NA_real_))
-
     prog_hom <- genos_hom_mat[prog_id, ]
     errors <- base::sapply(candidates, function(cand_id) {
       cand_hom    <- genos_hom_mat[cand_id, ]
@@ -142,20 +145,16 @@ validate_pedigree <- function(pedigree_file, genotypes_file,
       if (comparisons == 0) return(NA_real_)
       (base::sum(cand_hom != prog_hom, na.rm = TRUE) / comparisons) * 100
     })
-
     best_idx <- base::which.min(errors)
     base::list(id = candidates[best_idx],
                error_pct = base::round(errors[best_idx], 2))
   }
-
+  
   #### Main trio evaluation loop ####
   results_list <- base::lapply(base::seq_len(base::nrow(pedigree)), function(i) {
-
     prog_id          <- pedigree$ID[i]
     male_parent_id   <- pedigree$Male_Parent[i]
     female_parent_id <- pedigree$Female_Parent[i]
-
-    # Default values
     correction_decision     <- "NONE"
     error_pct               <- NA_real_
     status                  <- "NO_DATA"
@@ -166,51 +165,37 @@ validate_pedigree <- function(pedigree_file, genotypes_file,
     best_male_parent_pct    <- NA_real_
     best_female_parent      <- NA_character_
     best_female_parent_pct  <- NA_real_
-
-    ## Founder check — both parents "0" and ID in founders list
+    
     if (male_parent_id == "0" && female_parent_id == "0" &&
         prog_id %in% founder_ids) {
       status              <- "FOUNDERS"
       correction_decision <- "NONE"
-
     } else {
-
-      ## Missing parent(s) — recommendations only, "0"s preserved in pedigree
       if (male_parent_id == "0" && female_parent_id == "0") {
         status              <- "MISSING_BOTH_PARENTS"
         correction_decision <- "NONE"
-
         best_m                 <- find_best_parent(prog_id, exclude_ids = character(0))
         best_male_parent       <- best_m$id
         best_male_parent_pct   <- best_m$error_pct
-
         best_f                 <- find_best_parent(prog_id, exclude_ids = c(best_m$id))
         best_female_parent     <- best_f$id
         best_female_parent_pct <- best_f$error_pct
-
       } else if (male_parent_id == "0" && female_parent_id != "0") {
         status              <- "MISSING_MALE_PARENT"
         correction_decision <- "NONE"
-
         best_m               <- find_best_parent(prog_id, exclude_ids = c(female_parent_id))
         best_male_parent     <- best_m$id
         best_male_parent_pct <- best_m$error_pct
-
       } else if (male_parent_id != "0" && female_parent_id == "0") {
         status              <- "MISSING_FEMALE_PARENT"
         correction_decision <- "NONE"
-
         best_f                 <- find_best_parent(prog_id, exclude_ids = c(male_parent_id))
         best_female_parent     <- best_f$id
         best_female_parent_pct <- best_f$error_pct
-
       } else {
-
-        ## Both parents present — Mendelian error calculation
         progeny_vec       <- genos_mat[prog_id, ]
         male_parent_vec   <- genos_mat[male_parent_id, ]
         female_parent_vec <- genos_mat[female_parent_id, ]
-
         mismatches <- base::sum(
           (male_parent_vec == 0 & female_parent_vec == 0 & progeny_vec > 0) |
             (male_parent_vec == 2 & female_parent_vec == 2 & progeny_vec < 2) |
@@ -222,19 +207,14 @@ validate_pedigree <- function(pedigree_file, genotypes_file,
                (male_parent_vec == 2 & female_parent_vec == 0)) & (progeny_vec != 1),
           na.rm = TRUE
         )
-
         markers_tested <- base::sum(!base::is.na(male_parent_vec) &
                                       !base::is.na(female_parent_vec) &
                                       !base::is.na(progeny_vec))
-
         if (markers_tested == 0) {
           status              <- "NO_DATA"
           correction_decision <- "NONE"
-
         } else {
           error_pct <- (mismatches / markers_tested) * 100
-
-          # LOW_MARKERS still computes parent mismatch/recommendations
           if (markers_tested < min_markers) {
             status <- "LOW_MARKERS"
           } else if (error_pct <= trio_error_threshold) {
@@ -243,32 +223,24 @@ validate_pedigree <- function(pedigree_file, genotypes_file,
           } else {
             status <- "FAIL"
           }
-
-          # Run parent-level evaluation for both FAIL and LOW_MARKERS
           if (status %in% c("FAIL", "LOW_MARKERS")) {
-
-            # Homozygous mismatch per parent
             progeny_hom       <- genos_hom_mat[prog_id, ]
             male_parent_hom   <- genos_hom_mat[male_parent_id, ]
             female_parent_hom <- genos_hom_mat[female_parent_id, ]
-
             male_comparisons <- base::sum(!base::is.na(male_parent_hom) &
                                             !base::is.na(progeny_hom))
             male_parent_error_pct <- if (male_comparisons == 0) NA_real_ else
               base::round((base::sum(male_parent_hom != progeny_hom, na.rm = TRUE) /
                              male_comparisons) * 100, 2)
-
             female_comparisons <- base::sum(!base::is.na(female_parent_hom) &
                                               !base::is.na(progeny_hom))
             female_parent_error_pct <- if (female_comparisons == 0) NA_real_ else
               base::round((base::sum(female_parent_hom != progeny_hom, na.rm = TRUE) /
                              female_comparisons) * 100, 2)
-
             male_acceptable <- !is.na(male_parent_error_pct) &&
               male_parent_error_pct <= single_parent_error_threshold
             female_acceptable <- !is.na(female_parent_error_pct) &&
               female_parent_error_pct <= single_parent_error_threshold
-
             if (male_acceptable && female_acceptable) {
               correction_decision <- "KEEP_BOTH"
             } else if (male_acceptable && !female_acceptable) {
@@ -290,61 +262,58 @@ validate_pedigree <- function(pedigree_file, genotypes_file,
               best_female_parent     <- best_f$id
               best_female_parent_pct <- best_f$error_pct
             }
-
-            # Do not alter corrected pedigree for LOW_MARKERS rows
             if (status == "LOW_MARKERS")
               correction_decision <- paste0("LOW_MARKERS_", correction_decision)
           }
         }
       }
     }
-
+    
     data.table::data.table(
-      ID                           = prog_id,
-      Male_Parent                  = male_parent_id,
-      Female_Parent                = female_parent_id,
-      Mendelian_Error_Pct          = base::round(error_pct, 2),
-      Markers_Tested               = markers_tested,
-      Status                       = status,
-      Correction_Decision          = correction_decision,
-      Male_Parent_Hom_Error_Pct    = male_parent_error_pct,
-      Female_Parent_Hom_Error_Pct  = female_parent_error_pct,
-      Best_Male_Parent             = best_male_parent,
-      Best_Male_Parent_Error_Pct   = best_male_parent_pct,
-      Best_Female_Parent           = best_female_parent,
-      Best_Female_Parent_Error_Pct = best_female_parent_pct
+      ID                              = prog_id,
+      Orig_Male_Parent                = male_parent_id,
+      Orig_Female_Parent              = female_parent_id,
+      Trio_Mendelian_Error_Pct        = base::round(error_pct, 2),
+      Trio_Markers_Tested             = markers_tested,
+      Status                          = status,
+      Recommended_Correction          = correction_decision,
+      Male_Parent_Hom_Error_Pct       = male_parent_error_pct,
+      Female_Parent_Hom_Error_Pct     = female_parent_error_pct,
+      Best_Male_Candidate             = best_male_parent,
+      Best_Male_Candidate_Error_Pct   = best_male_parent_pct,
+      Best_Female_Candidate           = best_female_parent,
+      Best_Female_Candidate_Error_Pct = best_female_parent_pct
     )
   })
-
+  
   final_df <- data.table::rbindlist(results_list)
-
-  #### Append NO_GENOTYPE_DATA rows to the final report ####
+  
+  #### Append NO_GENOTYPE_DATA rows ####
   if (base::nrow(no_geno_rows) > 0) {
     no_geno_df <- data.table::data.table(
-      ID                           = no_geno_rows$ID,
-      Male_Parent                  = no_geno_rows$Male_Parent,
-      Female_Parent                = no_geno_rows$Female_Parent,
-      Mendelian_Error_Pct          = NA_real_,
-      Markers_Tested               = 0L,
-      Status                       = "NO_GENOTYPE_DATA",
-      Correction_Decision          = "NONE",
-      Male_Parent_Hom_Error_Pct    = NA_real_,
-      Female_Parent_Hom_Error_Pct  = NA_real_,
-      Best_Male_Parent             = NA_character_,
-      Best_Male_Parent_Error_Pct   = NA_real_,
-      Best_Female_Parent           = NA_character_,
-      Best_Female_Parent_Error_Pct = NA_real_
+      ID                              = no_geno_rows$ID,
+      Orig_Male_Parent                = no_geno_rows$Male_Parent,
+      Orig_Female_Parent              = no_geno_rows$Female_Parent,
+      Trio_Mendelian_Error_Pct        = NA_real_,
+      Trio_Markers_Tested             = 0L,
+      Status                          = "NO_GENOTYPE_DATA",
+      Recommended_Correction          = "NONE",
+      Male_Parent_Hom_Error_Pct       = NA_real_,
+      Female_Parent_Hom_Error_Pct     = NA_real_,
+      Best_Male_Candidate             = NA_character_,
+      Best_Male_Candidate_Error_Pct   = NA_real_,
+      Best_Female_Candidate           = NA_character_,
+      Best_Female_Candidate_Error_Pct = NA_real_
     )
     final_df <- data.table::rbindlist(list(final_df, no_geno_df))
   }
-
+  
   #### Write corrected pedigree ####
   corrected_pedigree <- data.table::copy(original_pedigree)
   for (i in base::seq_len(base::nrow(final_df))) {
     prog_id  <- final_df$ID[i]
-    decision <- final_df$Correction_Decision[i]
+    decision <- final_df$Recommended_Correction[i]
     row_idx  <- base::which(corrected_pedigree$ID == prog_id)
-
     if (decision == "REMOVE_MALE_PARENT") {
       data.table::set(corrected_pedigree, row_idx, "Male_Parent", "0")
     } else if (decision == "REMOVE_FEMALE_PARENT") {
@@ -354,7 +323,6 @@ validate_pedigree <- function(pedigree_file, genotypes_file,
       data.table::set(corrected_pedigree, row_idx, "Female_Parent", "0")
     }
   }
-
   tryCatch({
     data.table::fwrite(corrected_pedigree, file = "corrected_pedigree.txt",
                        sep = "\t", quote = FALSE)
@@ -362,7 +330,7 @@ validate_pedigree <- function(pedigree_file, genotypes_file,
   }, error = function(e) {
     warning("Could not write corrected pedigree. Error: ", e$message, call. = FALSE)
   })
-
+  
   #### Summary output ####
   if (verbose) {
     total_trios   <- base::nrow(final_df)
@@ -376,8 +344,7 @@ validate_pedigree <- function(pedigree_file, genotypes_file,
     base::cat("Error threshold:", trio_error_threshold, "%\n")
     base::cat("Homozygous threshold:", single_parent_error_threshold, "%\n")
     base::cat("Minimum markers required:", min_markers, "\n\n")
-
-    corrections <- base::table(final_df$Correction_Decision)
+    corrections <- base::table(final_df$Recommended_Correction)
     base::cat("Correction summary:\n")
     for (decision in base::names(corrections))
       if (decision != "NONE")
@@ -385,17 +352,86 @@ validate_pedigree <- function(pedigree_file, genotypes_file,
     base::cat("\n")
     base::print(final_df)
   }
-
+  
   #### Write results ####
-  if (write_txt) {
+  if (write_results) {
     tryCatch({
       data.table::fwrite(final_df, file = output_filename,
-                         sep = "\t", quote = FALSE)
+                         sep = "\t", quote = FALSE, na = na_string)   # <-- na_string used here
       if (verbose) base::cat("Results written to:", output_filename, "\n")
     }, error = function(e) {
       warning("Could not write results. Error: ", e$message, call. = FALSE)
     })
   }
-
+  
+  #### Plot results ####
+  if (plot_results) {
+    if (!requireNamespace("ggplot2", quietly = TRUE)) {
+      warning("ggplot2 is required for plot_results = TRUE. Please install it.", call. = FALSE)
+    } else {
+      plot_df <- final_df[!is.na(final_df$Trio_Mendelian_Error_Pct)]
+      plot_df$Plot_Status <- dplyr::case_when(
+        plot_df$Recommended_Correction %in% c("NONE",
+                                              "KEEP_BOTH",
+                                              "LOW_MARKERS_KEEP_BOTH")              ~ "PASS",
+        plot_df$Recommended_Correction %in% c("REMOVE_MALE_PARENT",
+                                              "REMOVE_FEMALE_PARENT",
+                                              "LOW_MARKERS_REMOVE_MALE_PARENT",
+                                              "LOW_MARKERS_REMOVE_FEMALE_PARENT")   ~ "FAIL_ONE_PARENT",
+        plot_df$Recommended_Correction %in% c("REMOVE_BOTH",
+                                              "LOW_MARKERS_REMOVE_BOTH")            ~ "FAIL_BOTH_PARENTS",
+        TRUE                                                                         ~ "OTHER"
+      )
+      n_total  <- nrow(plot_df)
+      n_fail   <- sum(plot_df$Trio_Mendelian_Error_Pct > trio_error_threshold)
+      n_pass   <- sum(plot_df$Trio_Mendelian_Error_Pct <= trio_error_threshold)
+      pct_lost <- round((n_fail / n_total) * 100, 1)
+      threshold_label <- paste0("Mendelian Error Threshold: ", trio_error_threshold, "%  |  ",
+                                "Lost: ", n_fail, " trios  |  ",
+                                "Kept: ", n_pass, " trios")
+      p <- ggplot2::ggplot(plot_df,
+                           ggplot2::aes(x = Trio_Mendelian_Error_Pct, fill = Plot_Status)) +
+        ggplot2::geom_histogram(binwidth = 1, color = "white", alpha = 0.9) +
+        ggplot2::geom_vline(xintercept = trio_error_threshold,
+                            linetype = "dashed", color = "black", linewidth = 1) +
+        ggplot2::scale_x_continuous(breaks = seq(0, 100, by = 5)) +
+        ggplot2::scale_y_continuous(breaks = seq(0, 100, by = 5)) +
+        ggplot2::scale_fill_manual(
+          values = c(
+            "PASS"              = "#339900",
+            "FAIL_ONE_PARENT"   = "#F1C40F",
+            "FAIL_BOTH_PARENTS" = "#cc3333",
+            "OTHER"             = "#BDC3C7"
+          ),
+          labels = c(
+            "PASS"              = "Pass",
+            "FAIL_ONE_PARENT"   = "Fail - One Parent",
+            "FAIL_BOTH_PARENTS" = "Fail - Both Parents",
+            "OTHER"             = "Other"
+          )
+        ) +
+        ggplot2::labs(
+          title    = "Trio Mendelian Error Distribution",
+          subtitle = paste0("Trios with Genotype Data Tested: ", n_total, "\n \n", threshold_label),
+          x        = "Mendelian Error (%)",
+          y        = "Number of Trios",
+          fill     = "Status"
+        ) +
+        ggplot2::theme_classic(base_size = 13) +
+        ggplot2::theme(legend.position = "top")
+      print(p)
+      if (write_results) {
+        plot_filename <- base::sub("\\.[^.]+$", ".jpg", output_filename)
+        tryCatch({
+          ggplot2::ggsave(plot_filename, plot = p,
+                          device = "jpeg", width = 10, height = 6, dpi = 300)
+          if (verbose) base::cat("Plot saved to:", plot_filename, "\n")
+        }, error = function(e) {
+          warning("Could not save plot. Error: ", e$message, call. = FALSE)
+        })
+      }
+    }
+  }
+  
   return(base::invisible(final_df))
-}
+}
\ No newline at end of file
diff --git a/man/find_parentage.Rd b/man/find_parentage.Rd
index 706a15f..44b9772 100644
--- a/man/find_parentage.Rd
+++ b/man/find_parentage.Rd
@@ -14,7 +14,9 @@ find_parentage(
   show_ties = TRUE,
   allow_selfing = TRUE,
   verbose = TRUE,
-  write_txt = TRUE
+  write_txt = TRUE,
+  na_string = "NA",
+  plot_results = TRUE
 )
 }
 \arguments{
@@ -23,117 +25,77 @@ Must include an 'ID' column followed by marker columns coded as 0, 1, 2
 (allele dosage).}
 
 \item{parents_file}{Path to a TSV/CSV/TXT file listing candidate parent IDs.
-Must include an 'ID' column. An optional 'Sex' column with values
-'M' (male parent), 'F' (female parent), or 'A' (ambiguous) determines
-which parents are tested for each role. If absent, all parents are treated
-as ambiguous.}
+Must include an 'ID' column. An optional 'Sex' column ('M', 'F', or 'A')
+determines parent roles. If absent, all parents are treated as ambiguous.}
 
-\item{progeny_file}{Path to a TSV/CSV/TXT file listing progeny IDs to assign.
+\item{progeny_file}{Path to a TSV/CSV/TXT file listing progeny IDs.
 Must include an 'ID' column.}
 
-\item{method}{Character. Parentage assignment method. One of:
-\itemize{
-\item \code{"best_male_parent"}  — finds the best male parent for each
-progeny using homozygous mismatch rate.
-\item \code{"best_female_parent"} — finds the best female parent for each
-progeny using homozygous mismatch rate.
-\item \code{"best_match"} — finds the single best parent (either sex)
-using homozygous mismatch rate.
-\item \code{"best_pair"}  — finds the best male-female parent pair for
-each progeny using full Mendelian error rate (default).
-}}
-
-\item{min_markers}{Integer. Minimum number of non-missing markers required
-to report a parentage assignment. Progeny-parent comparisons with fewer
-markers are flagged as \code{LOW_MARKERS} and no assignment is made
-(default: \code{10}).}
-
-\item{error_threshold}{Numeric. Maximum mismatch percentage to report a
-parentage assignment as confident. Assignments above this threshold are
-flagged as \code{HIGH_ERROR} in the \code{Assignment_Status} column
-(default: \code{5.0}). Must be between 0 and 100.}
-
-\item{show_ties}{Logical. If \code{TRUE}, all tied best pairs (after
-tie-breaking by maximum markers tested) are reported as additional columns
-(\code{Male_Parent_1}, \code{Male_Parent_2}, etc.) when
-\code{method = "best_pair"}. The base columns (\code{Male_Parent},
-\code{Female_Parent}, etc.) are always populated with the top result.
-If \code{FALSE}, only one tied pair is reported with a warning.
-Default is \code{TRUE}.}
+\item{method}{Character. One of \code{"best_male_parent"},
+\code{"best_female_parent"}, \code{"best_match"}, or
+\code{"best_pair"} (default). See Details.}
 
-\item{allow_selfing}{Logical. If \code{FALSE}, male-female parent pairs where
-both IDs are identical are excluded when \code{method = "best_pair"}.
-Default is \code{TRUE}.}
+\item{min_markers}{Integer. Minimum markers required for a valid assignment.
+Fewer markers flags \code{LOW_MARKERS} (default: \code{10}).}
+
+\item{error_threshold}{Numeric. Maximum mismatch percentage for a confident
+assignment. Exceeded values flag \code{HIGH_ERROR} (default: \code{5.0}).
+Must be between 0 and 100.}
+
+\item{show_ties}{Logical. If \code{TRUE}, tied best pairs are reported as
+additional suffix columns (e.g. \code{Male_Parent_2}) when
+\code{method = "best_pair"}. Default is \code{TRUE}.}
+
+\item{allow_selfing}{Logical. If \code{FALSE}, pairs with identical male and
+female parent IDs are excluded. Default is \code{TRUE}.}
 
-\item{verbose}{Logical. If \code{TRUE}, prints progress messages, summary
-statistics, and the results table to the console. Default is \code{TRUE}.}
+\item{verbose}{Logical. If \code{TRUE}, prints progress, summary, and
+results to the console. Default is \code{TRUE}.}
 
 \item{write_txt}{Logical. If \code{TRUE}, writes results to
-\code{parentage_results_dt.txt} in the working directory. Default is
-\code{TRUE}.}
+\code{parentage_testing_results.txt}. Default is \code{TRUE}.}
+
+\item{na_string}{Character. String for missing values in output file.
+Use \code{"NA"} or \code{""} (default: \code{"NA"}).}
+
+\item{plot_results}{Logical. If \code{TRUE}, plots the Mendelian error
+distribution colored by assignment status. Requires \code{ggplot2}.
+Plot is saved as \code{.jpg} when \code{write_txt = TRUE}.
+Default is \code{TRUE}.}
 }
 \value{
-A \code{data.table} with one row per progeny. Columns depend on the
-method used:
-\itemize{
-\item \code{best_male_parent} / \code{best_female_parent} / \code{best_match}:
-\code{Progeny}, \code{Best_Match}, \code{Mendelian_Error_Pct},
-\code{Markers_Tested}, \code{Assignment_Status}.
-\item \code{best_pair} (no ties after tie-breaking): \code{Progeny},
-\code{Male_Parent}, \code{Female_Parent}, \code{Mendelian_Error_Pct},
-\code{Markers_Tested}, \code{Assignment_Status}.
-\item \code{best_pair} (ties remain after tie-breaking,
-\code{show_ties = TRUE}): base columns are always populated with the
-top result, plus suffix columns \code{Male_Parent_1},
-\code{Female_Parent_1}, etc. for each tied pair.
-}
-\code{Assignment_Status} is one of \code{PASS}, \code{HIGH_ERROR}, or
-\code{LOW_MARKERS}. Returned invisibly when \code{verbose = TRUE}.
+A \code{data.table} with one row per progeny. Base columns are
+\code{Progeny}, \code{Male_Parent} (or \code{Best_Match}),
+\code{Female_Parent}, \code{Mendelian_Error_Pct}, \code{Markers_Tested},
+and \code{Assignment_Status} (\code{PASS}, \code{HIGH_ERROR}, or
+\code{LOW_MARKERS}). Tie suffix columns (\code{Male_Parent_2}, etc.) are
+appended when \code{show_ties = TRUE} and ties remain after tie-breaking.
+Returned invisibly when \code{verbose = TRUE}.
 }
 \description{
 Assigns the most likely parent(s) to each progeny individual based on
 genotypic data using Mendelian error rates or homozygous mismatch rates.
 }
 \details{
-A homozygous-only genotype matrix is pre-computed once at startup and shared
-across all methods that require it, avoiding redundant computation.
-
-For \code{"best_male_parent"}, \code{"best_female_parent"}, and
-\code{"best_match"}, only homozygous markers (coded 0 or 2) are used for
-comparison; heterozygous markers (coded 1) are set to \code{NA}. This
-reduces false mismatches caused by phase ambiguity.
-
-For \code{"best_pair"}, all markers are used and full Mendelian inheritance
-rules are applied. Mismatch rates and comparison counts are computed across
-all progeny simultaneously using vectorised \code{vapply} calls, producing
-\code{n_pairs x n_progeny} matrices and giving substantial speed gains for
-large datasets. Both matrices are explicitly coerced to matrix form to
-handle the edge case of a single parent pair correctly.
-
-When multiple pairs share the lowest Mendelian error rate, ties are broken
-by selecting the pair(s) with the greatest number of markers tested. If ties
-still remain after this step, all remaining tied pairs are reported when
-\code{show_ties = TRUE}.
-
-The base columns (\code{Male_Parent}, \code{Female_Parent},
-\code{Mendelian_Error_Pct}, \code{Markers_Tested}) are always populated with
-the top result, ensuring no missing values in these columns regardless of
-tie status.
-
-Output rows are pre-allocated as a \code{data.table} and filled by reference
-using \code{data.table::set()}, avoiding repeated memory allocation during
-the results-building step.
-
-Individuals in \code{parents_file} or \code{progeny_file} that are absent
-from \code{genotypes_file} are removed with a warning.
-
-Progeny with fewer non-missing markers than \code{min_markers} are flagged
-\code{LOW_MARKERS} and no parent assignment is reported. Progeny whose best
-match exceeds \code{error_threshold} are flagged \code{HIGH_ERROR}.
+A homozygous-only matrix is pre-computed once and shared across
+\code{"best_male_parent"}, \code{"best_female_parent"}, and
+\code{"best_match"}, where heterozygous markers (coded 1) are set to
+\code{NA} to reduce false mismatches from phase ambiguity.
+
+For \code{"best_pair"}, full Mendelian inheritance rules are applied across
+all marker and pair combinations using vectorised \code{vapply} calls,
+producing \code{n_pairs x n_progeny} mismatch and comparison matrices.
+
+Ties on mismatch rate are broken by maximum markers tested. Remaining ties
+are reported as suffix columns when \code{show_ties = TRUE}. Base columns
+are always populated with the top result.
+
+Parents or progeny absent from \code{genotypes_file} are removed with a
+warning. Progeny below \code{min_markers} are flagged \code{LOW_MARKERS};
+those exceeding \code{error_threshold} are flagged \code{HIGH_ERROR}.
 }
 \examples{
 \dontrun{
-# Assign best male-female parent pair to each progeny
 results <- find_parentage(
   genotypes_file  = "genotypes.txt",
   parents_file    = "parents.txt",
@@ -142,10 +104,11 @@ results <- find_parentage(
   min_markers     = 50,
   error_threshold = 5.0,
   show_ties       = TRUE,
-  allow_selfing   = FALSE
+  allow_selfing   = FALSE,
+  na_string       = "NA",
+  plot_results    = TRUE
 )
 
-# Find best individual parent match (ignoring sex)
 results <- find_parentage(
   genotypes_file  = "genotypes.txt",
   parents_file    = "parents.txt",
diff --git a/man/validate_pedigree.Rd b/man/validate_pedigree.Rd
index 8f47a7d..8ec975b 100644
--- a/man/validate_pedigree.Rd
+++ b/man/validate_pedigree.Rd
@@ -12,8 +12,10 @@ validate_pedigree(
   min_markers = 10,
   single_parent_error_threshold = 2,
   verbose = TRUE,
-  write_txt = TRUE,
-  output_filename = "pedigree_validation_results.txt"
+  write_results = TRUE,
+  output_filename = "__validation_report.txt",
+  plot_results = TRUE,
+  na_string = "NA"
 )
 }
 \arguments{
@@ -40,34 +42,42 @@ parent-level evaluation (default: 2.0). Must be between 0 and 100.}
 \item{verbose}{Logical. If TRUE, prints progress messages, a summary
 table, and results to the console (default: TRUE).}
 
-\item{write_txt}{Logical. If TRUE, writes validation results to
-output_filename (default: TRUE).}
+\item{write_results}{Logical. If TRUE, writes the validation results to
+output_filename and saves the plot as a .jpg file (default: TRUE).}
 
-\item{output_filename}{Character. Path/name of the output file
-(default: "pedigree_validation_results.txt").}
+\item{output_filename}{Character. Path/name of the output results file
+(default: "__validation_report.txt"). The plot will be saved using the
+same base name with a .jpg extension.}
+
+\item{plot_results}{Logical. If TRUE, prints a histogram of Trio Mendelian
+Error percentages with a threshold line (default: TRUE). If write_results
+is also TRUE, the plot is additionally saved as a .jpg file.}
+
+\item{na_string}{Character. String for missing values in the output file.
+Use \code{"NA"} or \code{""} (default: \code{"NA"}).}
 }
 \value{
 A data.table (returned invisibly) with one row per trio and
 the following columns:
 \describe{
 \item{ID}{Individual ID.}
-\item{Male_Parent}{Declared male parent ID.}
-\item{Female_Parent}{Declared female parent ID.}
-\item{Mendelian_Error_Pct}{Trio-level Mendelian error percentage.}
-\item{Markers_Tested}{Number of markers with non-missing genotypes.}
+\item{Orig_Male_Parent}{Declared male parent ID.}
+\item{Orig_Female_Parent}{Declared female parent ID.}
+\item{Trio_Mendelian_Error_Pct}{Trio-level Mendelian error percentage.}
+\item{Trio_Markers_Tested}{Number of markers with non-missing genotypes.}
 \item{Status}{One of PASS, FAIL, LOW_MARKERS, NO_DATA, FOUNDERS,
 MISSING_MALE_PARENT, MISSING_FEMALE_PARENT, MISSING_BOTH_PARENTS,
 or NO_GENOTYPE_DATA.}
-\item{Correction_Decision}{One of NONE, KEEP_BOTH,
+\item{Recommended_Correction}{One of NONE, KEEP_BOTH,
 REMOVE_MALE_PARENT, REMOVE_FEMALE_PARENT, REMOVE_BOTH,
 LOW_MARKERS_KEEP_BOTH, LOW_MARKERS_REMOVE_MALE_PARENT,
 LOW_MARKERS_REMOVE_FEMALE_PARENT, LOW_MARKERS_REMOVE_BOTH.}
 \item{Male_Parent_Hom_Error_Pct}{Male parent homozygous-marker mismatch percentage.}
 \item{Female_Parent_Hom_Error_Pct}{Female parent homozygous-marker mismatch percentage.}
-\item{Best_Male_Parent}{Best-matching male parent candidate ID.}
-\item{Best_Male_Parent_Error_Pct}{Homozygous mismatch percentage for the best male parent candidate.}
-\item{Best_Female_Parent}{Best-matching female parent candidate ID.}
-\item{Best_Female_Parent_Error_Pct}{Homozygous mismatch percentage for the best female parent candidate.}
+\item{Best_Male_Candidate}{Best-matching male parent candidate ID.}
+\item{Best_Male_Candidate_Error_Pct}{Homozygous mismatch percentage for the best male candidate.}
+\item{Best_Female_Candidate}{Best-matching female parent candidate ID.}
+\item{Best_Female_Candidate_Error_Pct}{Homozygous mismatch percentage for the best female candidate.}
 }
 }
 \description{
diff --git a/tests/testthat/test-find_parentage.R b/tests/testthat/test-find_parentage.R
index 55e6913..d1dfb3c 100644
--- a/tests/testthat/test-find_parentage.R
+++ b/tests/testthat/test-find_parentage.R
@@ -17,11 +17,12 @@ make_files <- function(genos, parents, progeny, dir = tempdir()) {
   list(g = geno_file, p = parent_file, pr = progeny_file)
 }
 
-# ------------------------------------------------------------------------------
+# ==============================================================================
 # Base toy data
 # S1 / D1: all 0  → child1 (all 0) is a perfect Mendelian child of S1 x D1
 # S2 / D2: all 2  → child2 (all 2) is a perfect Mendelian child of S2 x D2
-# ------------------------------------------------------------------------------
+# ==============================================================================
+
 base_genos <- data.table::data.table(
   ID = c("S1","S2","D1","D2","child1","child2"),
   M1 = c(0L, 2L, 0L, 2L, 0L, 2L),
@@ -54,7 +55,7 @@ test_that("invalid method throws an error", {
   f <- make_files(base_genos, base_parents, child1_progeny)
   expect_error(
     find_parentage(f$g, f$p, f$pr, method = "bad_method",
-                   verbose = FALSE, write_txt = FALSE),
+                   verbose = FALSE, write_txt = FALSE, plot_results = FALSE),
     regexp = "Method must be one of"
   )
 })
@@ -63,7 +64,7 @@ test_that("min_markers < 1 throws an error", {
   f <- make_files(base_genos, base_parents, child1_progeny)
   expect_error(
     find_parentage(f$g, f$p, f$pr, min_markers = 0,
-                   verbose = FALSE, write_txt = FALSE),
+                   verbose = FALSE, write_txt = FALSE, plot_results = FALSE),
     regexp = "min_markers"
   )
 })
@@ -72,21 +73,31 @@ test_that("error_threshold out of range throws an error", {
   f <- make_files(base_genos, base_parents, child1_progeny)
   expect_error(
     find_parentage(f$g, f$p, f$pr, error_threshold = 150,
-                   verbose = FALSE, write_txt = FALSE),
+                   verbose = FALSE, write_txt = FALSE, plot_results = FALSE),
     regexp = "error_threshold"
   )
   expect_error(
     find_parentage(f$g, f$p, f$pr, error_threshold = -1,
-                   verbose = FALSE, write_txt = FALSE),
+                   verbose = FALSE, write_txt = FALSE, plot_results = FALSE),
     regexp = "error_threshold"
   )
 })
 
+test_that("invalid na_string throws an error", {
+  f <- make_files(base_genos, base_parents, child1_progeny)
+  expect_error(
+    find_parentage(f$g, f$p, f$pr, na_string = "NULL",
+                   verbose = FALSE, write_txt = FALSE, plot_results = FALSE),
+    regexp = "na_string"
+  )
+})
+
 test_that("missing genotype file throws an error", {
   f <- make_files(base_genos, base_parents, child1_progeny)
   expect_error(
     find_parentage("nonexistent.txt", f$p, f$pr,
-                   verbose = FALSE, write_txt = FALSE)
+                   verbose = FALSE, write_txt = FALSE, plot_results = FALSE),
+    regexp = "Error reading input files"
   )
 })
 
@@ -96,7 +107,7 @@ test_that("parent IDs absent from genotype file raise a warning and are dropped"
   f <- make_files(base_genos, extra_parents, child1_progeny)
   expect_warning(
     find_parentage(f$g, f$p, f$pr, method = "best_pair",
-                   verbose = FALSE, write_txt = FALSE),
+                   verbose = FALSE, write_txt = FALSE, plot_results = FALSE),
     regexp = "GHOST"
   )
 })
@@ -107,7 +118,7 @@ test_that("progeny IDs absent from genotype file raise a warning and are dropped
   f <- make_files(base_genos, base_parents, extra_progeny)
   expect_warning(
     find_parentage(f$g, f$p, f$pr, method = "best_pair",
-                   verbose = FALSE, write_txt = FALSE),
+                   verbose = FALSE, write_txt = FALSE, plot_results = FALSE),
     regexp = "GHOST_KID"
   )
 })
@@ -118,7 +129,7 @@ test_that("no valid progeny candidates after filtering stops with an error", {
   expect_warning(
     expect_error(
       find_parentage(f$g, f$p, f$pr, method = "best_pair",
-                     verbose = FALSE, write_txt = FALSE),
+                     verbose = FALSE, write_txt = FALSE, plot_results = FALSE),
       regexp = "No valid progeny"
     )
   )
@@ -129,7 +140,7 @@ test_that("missing Sex column raises a warning and defaults to ambiguous", {
   f <- make_files(base_genos, parents_no_sex, child1_progeny)
   expect_warning(
     find_parentage(f$g, f$p, f$pr, method = "best_match",
-                   verbose = FALSE, write_txt = FALSE),
+                   verbose = FALSE, write_txt = FALSE, plot_results = FALSE),
     regexp = "Sex"
   )
 })
@@ -141,7 +152,8 @@ test_that("missing Sex column raises a warning and defaults to ambiguous", {
 test_that("best_pair returns a data.table with expected columns (no ties)", {
   f   <- make_files(base_genos, base_parents, child1_progeny)
   res <- find_parentage(f$g, f$p, f$pr, method = "best_pair",
-                        show_ties = FALSE, verbose = FALSE, write_txt = FALSE)
+                        show_ties = FALSE, verbose = FALSE,
+                        write_txt = FALSE, plot_results = FALSE)
   expect_s3_class(res, "data.table")
   expect_true(all(c("Progeny","Male_Parent","Female_Parent",
                     "Mendelian_Error_Pct","Markers_Tested",
@@ -152,17 +164,18 @@ test_that("best_pair returns a data.table with expected columns (no ties)", {
 test_that("best_male_parent returns expected columns", {
   f   <- make_files(base_genos, base_parents, child1_progeny)
   res <- find_parentage(f$g, f$p, f$pr, method = "best_male_parent",
-                        verbose = FALSE, write_txt = FALSE)
+                        verbose = FALSE, write_txt = FALSE, plot_results = FALSE)
   expect_s3_class(res, "data.table")
   expect_true(all(c("Progeny","Best_Match","Mendelian_Error_Pct",
                     "Markers_Tested","Assignment_Status") %in% names(res)))
+  expect_false("Male_Parent" %in% names(res))
   expect_equal(nrow(res), 1L)
 })
 
 test_that("best_female_parent returns expected columns", {
   f   <- make_files(base_genos, base_parents, child1_progeny)
   res <- find_parentage(f$g, f$p, f$pr, method = "best_female_parent",
-                        verbose = FALSE, write_txt = FALSE)
+                        verbose = FALSE, write_txt = FALSE, plot_results = FALSE)
   expect_s3_class(res, "data.table")
   expect_true(all(c("Progeny","Best_Match","Mendelian_Error_Pct",
                     "Markers_Tested","Assignment_Status") %in% names(res)))
@@ -172,7 +185,7 @@ test_that("best_female_parent returns expected columns", {
 test_that("best_match returns expected columns", {
   f   <- make_files(base_genos, base_parents, child1_progeny)
   res <- find_parentage(f$g, f$p, f$pr, method = "best_match",
-                        verbose = FALSE, write_txt = FALSE)
+                        verbose = FALSE, write_txt = FALSE, plot_results = FALSE)
   expect_s3_class(res, "data.table")
   expect_true(all(c("Progeny","Best_Match","Mendelian_Error_Pct",
                     "Markers_Tested","Assignment_Status") %in% names(res)))
@@ -183,7 +196,7 @@ test_that("one row returned per progeny for single-parent methods", {
   f <- make_files(base_genos, base_parents, child1_progeny)
   for (m in c("best_male_parent","best_female_parent","best_match")) {
     res <- find_parentage(f$g, f$p, f$pr, method = m,
-                          verbose = FALSE, write_txt = FALSE)
+                          verbose = FALSE, write_txt = FALSE, plot_results = FALSE)
     expect_equal(nrow(res), 1L, label = paste("row count for method", m))
   }
 })
@@ -191,14 +204,16 @@ test_that("one row returned per progeny for single-parent methods", {
 test_that("Markers_Tested equals the number of non-NA marker columns", {
   f   <- make_files(base_genos, base_parents, child1_progeny)
   res <- find_parentage(f$g, f$p, f$pr, method = "best_pair",
-                        show_ties = FALSE, verbose = FALSE, write_txt = FALSE)
+                        show_ties = FALSE, verbose = FALSE,
+                        write_txt = FALSE, plot_results = FALSE)
   expect_equal(res$Markers_Tested, ncol(base_genos) - 1L)
 })
 
 test_that("Mendelian_Error_Pct is between 0 and 100", {
   f   <- make_files(base_genos, base_parents, child1_progeny)
   res <- find_parentage(f$g, f$p, f$pr, method = "best_pair",
-                        show_ties = FALSE, verbose = FALSE, write_txt = FALSE)
+                        show_ties = FALSE, verbose = FALSE,
+                        write_txt = FALSE, plot_results = FALSE)
   pct <- as.numeric(res$Mendelian_Error_Pct)
   expect_true(all(pct >= 0 & pct <= 100, na.rm = TRUE))
 })
@@ -210,7 +225,8 @@ test_that("Mendelian_Error_Pct is between 0 and 100", {
 test_that("best_pair correctly identifies S1 x D1 for child1 with 0% error", {
   f   <- make_files(base_genos, base_parents, child1_progeny)
   res <- find_parentage(f$g, f$p, f$pr, method = "best_pair",
-                        show_ties = FALSE, verbose = FALSE, write_txt = FALSE)
+                        show_ties = FALSE, verbose = FALSE,
+                        write_txt = FALSE, plot_results = FALSE)
   expect_equal(res$Male_Parent,   "S1")
   expect_equal(res$Female_Parent, "D1")
   expect_equal(as.numeric(res$Mendelian_Error_Pct), 0)
@@ -219,7 +235,8 @@ test_that("best_pair correctly identifies S1 x D1 for child1 with 0% error", {
 test_that("best_pair correctly identifies S2 x D2 for child2 with 0% error", {
   f   <- make_files(base_genos, base_parents, child2_progeny)
   res <- find_parentage(f$g, f$p, f$pr, method = "best_pair",
-                        show_ties = FALSE, verbose = FALSE, write_txt = FALSE)
+                        show_ties = FALSE, verbose = FALSE,
+                        write_txt = FALSE, plot_results = FALSE)
   expect_equal(res$Male_Parent,   "S2")
   expect_equal(res$Female_Parent, "D2")
   expect_equal(as.numeric(res$Mendelian_Error_Pct), 0)
@@ -228,24 +245,39 @@ test_that("best_pair correctly identifies S2 x D2 for child2 with 0% error", {
 test_that("best_male_parent identifies S1 as best male for child1", {
   f   <- make_files(base_genos, base_parents, child1_progeny)
   res <- find_parentage(f$g, f$p, f$pr, method = "best_male_parent",
-                        verbose = FALSE, write_txt = FALSE)
+                        verbose = FALSE, write_txt = FALSE, plot_results = FALSE)
   expect_equal(res$Best_Match, "S1")
 })
 
 test_that("best_female_parent identifies D1 as best female for child1", {
   f   <- make_files(base_genos, base_parents, child1_progeny)
   res <- find_parentage(f$g, f$p, f$pr, method = "best_female_parent",
-                        verbose = FALSE, write_txt = FALSE)
+                        verbose = FALSE, write_txt = FALSE, plot_results = FALSE)
   expect_equal(res$Best_Match, "D1")
 })
 
 test_that("Mendelian_Error_Pct is 0 for a perfect parent-progeny trio", {
   f   <- make_files(base_genos, base_parents, child1_progeny)
   res <- find_parentage(f$g, f$p, f$pr, method = "best_pair",
-                        show_ties = FALSE, verbose = FALSE, write_txt = FALSE)
+                        show_ties = FALSE, verbose = FALSE,
+                        write_txt = FALSE, plot_results = FALSE)
   expect_equal(as.numeric(res$Mendelian_Error_Pct), 0)
 })
 
+test_that("both child1 and child2 are correctly assigned when run together", {
+  f   <- make_files(base_genos, base_parents, base_progeny)
+  res <- find_parentage(f$g, f$p, f$pr, method = "best_pair",
+                        show_ties = FALSE, verbose = FALSE,
+                        write_txt = FALSE, plot_results = FALSE)
+  expect_equal(nrow(res), 2L)
+  child1_row <- res[Progeny == "child1"]
+  child2_row <- res[Progeny == "child2"]
+  expect_equal(child1_row$Male_Parent,   "S1")
+  expect_equal(child1_row$Female_Parent, "D1")
+  expect_equal(child2_row$Male_Parent,   "S2")
+  expect_equal(child2_row$Female_Parent, "D2")
+})
+
 # ==============================================================================
 # 4. Assignment_Status — min_markers and error_threshold
 # ==============================================================================
@@ -254,7 +286,8 @@ test_that("Assignment_Status = PASS for perfect trio within thresholds", {
   f   <- make_files(base_genos, base_parents, child1_progeny)
   res <- find_parentage(f$g, f$p, f$pr, method = "best_pair",
                         min_markers = 3, error_threshold = 5.0,
-                        show_ties = FALSE, verbose = FALSE, write_txt = FALSE)
+                        show_ties = FALSE, verbose = FALSE,
+                        write_txt = FALSE, plot_results = FALSE)
   expect_equal(res$Assignment_Status, "PASS")
 })
 
@@ -262,7 +295,8 @@ test_that("Assignment_Status = LOW_MARKERS when min_markers exceeds available ma
   f   <- make_files(base_genos, base_parents, child1_progeny)
   res <- find_parentage(f$g, f$p, f$pr, method = "best_pair",
                         min_markers = 99999, error_threshold = 5.0,
-                        show_ties = FALSE, verbose = FALSE, write_txt = FALSE)
+                        show_ties = FALSE, verbose = FALSE,
+                        write_txt = FALSE, plot_results = FALSE)
   expect_equal(res$Assignment_Status, "LOW_MARKERS")
 })
 
@@ -281,7 +315,8 @@ test_that("Assignment_Status = HIGH_ERROR when error rate exceeds threshold", {
   f        <- make_files(high_error_genos, parents, progeny)
   res      <- find_parentage(f$g, f$p, f$pr, method = "best_pair",
                              min_markers = 3, error_threshold = 5.0,
-                             show_ties = FALSE, verbose = FALSE, write_txt = FALSE)
+                             show_ties = FALSE, verbose = FALSE,
+                             write_txt = FALSE, plot_results = FALSE)
   expect_equal(res$Assignment_Status, "HIGH_ERROR")
 })
 
@@ -289,12 +324,24 @@ test_that("Assignment_Status column is present in all methods", {
   f <- make_files(base_genos, base_parents, child1_progeny)
   for (m in c("best_pair","best_male_parent","best_female_parent","best_match")) {
     res <- find_parentage(f$g, f$p, f$pr, method = m,
-                          show_ties = FALSE, verbose = FALSE, write_txt = FALSE)
+                          show_ties = FALSE, verbose = FALSE,
+                          write_txt = FALSE, plot_results = FALSE)
     expect_true("Assignment_Status" %in% names(res),
                 label = paste("Assignment_Status present for method", m))
   }
 })
 
+test_that("LOW_MARKERS is flagged for single-parent methods too", {
+  f <- make_files(base_genos, base_parents, child1_progeny)
+  for (m in c("best_male_parent","best_female_parent","best_match")) {
+    res <- find_parentage(f$g, f$p, f$pr, method = m,
+                          min_markers = 99999, verbose = FALSE,
+                          write_txt = FALSE, plot_results = FALSE)
+    expect_equal(res$Assignment_Status, "LOW_MARKERS",
+                 label = paste("LOW_MARKERS for method", m))
+  }
+})
+
 # ==============================================================================
 # 5. allow_selfing
 # ==============================================================================
@@ -302,11 +349,10 @@ test_that("Assignment_Status column is present in all methods", {
 test_that("allow_selfing = FALSE removes self-pairs from candidates", {
   ambig_parents <- data.table::data.table(ID = c("S1","D1"), Sex = c("A","A"))
   f <- make_files(base_genos, ambig_parents, child1_progeny)
-  expect_warning(
-    res <- find_parentage(f$g, f$p, f$pr, method = "best_pair",
-                          allow_selfing = FALSE, show_ties = FALSE,
-                          verbose = FALSE, write_txt = FALSE),
-    regexp = "tied"
+  res <- suppressWarnings(
+    find_parentage(f$g, f$p, f$pr, method = "best_pair",
+                   allow_selfing = FALSE, show_ties = FALSE,
+                   verbose = FALSE, write_txt = FALSE, plot_results = FALSE)
   )
   if (!is.na(res$Male_Parent) && !is.na(res$Female_Parent))
     expect_false(res$Male_Parent == res$Female_Parent)
@@ -316,10 +362,11 @@ test_that("allow_selfing = FALSE removes self-pairs from candidates", {
 # 6. show_ties
 # ==============================================================================
 
-test_that("show_ties = TRUE produces _1/_2 suffixed columns when ties exist", {
+test_that("show_ties = TRUE produces suffixed columns when ties exist", {
   f   <- make_files(tied_genos, tied_parents, tied_progeny)
   res <- find_parentage(f$g, f$p, f$pr, method = "best_pair",
-                        show_ties = TRUE, verbose = FALSE, write_txt = FALSE)
+                        show_ties = TRUE, verbose = FALSE,
+                        write_txt = FALSE, plot_results = FALSE)
   expect_true(any(grepl("^Male_Parent_",   names(res))))
   expect_true(any(grepl("^Female_Parent_", names(res))))
 })
@@ -328,7 +375,8 @@ test_that("show_ties = FALSE warns about ties and returns single-result columns"
   f <- make_files(tied_genos, tied_parents, tied_progeny)
   expect_warning(
     res <- find_parentage(f$g, f$p, f$pr, method = "best_pair",
-                          show_ties = FALSE, verbose = FALSE, write_txt = FALSE),
+                          show_ties = FALSE, verbose = FALSE,
+                          write_txt = FALSE, plot_results = FALSE),
     regexp = "tied"
   )
   expect_true("Male_Parent"   %in% names(res))
@@ -337,33 +385,42 @@ test_that("show_ties = FALSE warns about ties and returns single-result columns"
   expect_false(any(grepl("^Female_Parent_\\d", names(res))))
 })
 
+test_that("base columns are always populated even when ties exist", {
+  f   <- make_files(tied_genos, tied_parents, tied_progeny)
+  res <- find_parentage(f$g, f$p, f$pr, method = "best_pair",
+                        show_ties = TRUE, verbose = FALSE,
+                        write_txt = FALSE, plot_results = FALSE)
+  expect_false(is.na(res$Male_Parent[1]))
+  expect_false(is.na(res$Female_Parent[1]))
+})
+
 # ==============================================================================
 # 7. verbose / write_txt
 # ==============================================================================
 
-test_that("verbose = TRUE returns the result invisibly as data.table", {
+test_that("verbose = TRUE returns the result as data.table", {
   f   <- make_files(base_genos, base_parents, child1_progeny)
   res <- find_parentage(f$g, f$p, f$pr, method = "best_pair",
-                        verbose = TRUE, write_txt = FALSE)
+                        verbose = TRUE, write_txt = FALSE, plot_results = FALSE)
   expect_s3_class(res, "data.table")
 })
 
 test_that("verbose = FALSE returns the result as data.table", {
   f   <- make_files(base_genos, base_parents, child1_progeny)
   res <- find_parentage(f$g, f$p, f$pr, method = "best_pair",
-                        verbose = FALSE, write_txt = FALSE)
+                        verbose = FALSE, write_txt = FALSE, plot_results = FALSE)
   expect_s3_class(res, "data.table")
 })
 
-test_that("write_txt = TRUE creates the output file", {
+test_that("write_txt = TRUE creates parentage_testing_results.txt", {
   old_wd <- getwd()
   tmp    <- tempdir()
   setwd(tmp)
   on.exit(setwd(old_wd), add = TRUE)
   f <- make_files(base_genos, base_parents, child1_progeny, dir = tmp)
   find_parentage(f$g, f$p, f$pr, method = "best_pair",
-                 verbose = FALSE, write_txt = TRUE)
-  expect_true(file.exists(file.path(tmp, "parentage_results_dt.txt")))
+                 verbose = FALSE, write_txt = TRUE, plot_results = FALSE)
+  expect_true(file.exists(file.path(tmp, "parentage_testing_results.txt")))
 })
 
 test_that("write_txt = FALSE does not create the output file", {
@@ -371,14 +428,23 @@ test_that("write_txt = FALSE does not create the output file", {
   tmp      <- tempdir()
   setwd(tmp)
   on.exit(setwd(old_wd), add = TRUE)
-  out_file <- file.path(tmp, "parentage_results_dt.txt")
+  out_file <- file.path(tmp, "parentage_testing_results.txt")
   if (file.exists(out_file)) file.remove(out_file)
   f <- make_files(base_genos, base_parents, child1_progeny, dir = tmp)
   find_parentage(f$g, f$p, f$pr, method = "best_pair",
-                 verbose = FALSE, write_txt = FALSE)
+                 verbose = FALSE, write_txt = FALSE, plot_results = FALSE)
   expect_false(file.exists(out_file))
 })
 
+test_that("na_string = '' is accepted without error", {
+  f <- make_files(base_genos, base_parents, child1_progeny)
+  expect_no_error(
+    find_parentage(f$g, f$p, f$pr, method = "best_pair",
+                   na_string = "", verbose = FALSE,
+                   write_txt = FALSE, plot_results = FALSE)
+  )
+})
+
 # ==============================================================================
 # 8. Sex-based candidate filtering
 # ==============================================================================
@@ -386,7 +452,7 @@ test_that("write_txt = FALSE does not create the output file", {
 test_that("best_male_parent only assigns M or A parents", {
   f   <- make_files(base_genos, base_parents, child1_progeny)
   res <- find_parentage(f$g, f$p, f$pr, method = "best_male_parent",
-                        verbose = FALSE, write_txt = FALSE)
+                        verbose = FALSE, write_txt = FALSE, plot_results = FALSE)
   valid_male_parents <- base_parents[Sex %in% c("M","A")]$ID
   expect_true(res$Best_Match %in% valid_male_parents)
 })
@@ -394,11 +460,29 @@ test_that("best_male_parent only assigns M or A parents", {
 test_that("best_female_parent only assigns F or A parents", {
   f   <- make_files(base_genos, base_parents, child1_progeny)
   res <- find_parentage(f$g, f$p, f$pr, method = "best_female_parent",
-                        verbose = FALSE, write_txt = FALSE)
+                        verbose = FALSE, write_txt = FALSE, plot_results = FALSE)
   valid_female_parents <- base_parents[Sex %in% c("F","A")]$ID
   expect_true(res$Best_Match %in% valid_female_parents)
 })
 
+test_that("best_pair male slot contains only M or A parents", {
+  f   <- make_files(base_genos, base_parents, child1_progeny)
+  res <- find_parentage(f$g, f$p, f$pr, method = "best_pair",
+                        show_ties = FALSE, verbose = FALSE,
+                        write_txt = FALSE, plot_results = FALSE)
+  valid_males <- base_parents[Sex %in% c("M","A")]$ID
+  expect_true(res$Male_Parent %in% valid_males)
+})
+
+test_that("best_pair female slot contains only F or A parents", {
+  f   <- make_files(base_genos, base_parents, child1_progeny)
+  res <- find_parentage(f$g, f$p, f$pr, method = "best_pair",
+                        show_ties = FALSE, verbose = FALSE,
+                        write_txt = FALSE, plot_results = FALSE)
+  valid_females <- base_parents[Sex %in% c("F","A")]$ID
+  expect_true(res$Female_Parent %in% valid_females)
+})
+
 # ==============================================================================
 # 9. Edge cases
 # ==============================================================================
@@ -406,7 +490,8 @@ test_that("best_female_parent only assigns F or A parents", {
 test_that("single progeny individual is handled correctly", {
   f   <- make_files(base_genos, base_parents, child1_progeny)
   res <- find_parentage(f$g, f$p, f$pr, method = "best_pair",
-                        show_ties = FALSE, verbose = FALSE, write_txt = FALSE)
+                        show_ties = FALSE, verbose = FALSE,
+                        write_txt = FALSE, plot_results = FALSE)
   expect_equal(nrow(res), 1L)
 })
 
@@ -416,20 +501,33 @@ test_that("all-NA marker column does not cause an error", {
   f <- make_files(na_genos, base_parents, child1_progeny)
   expect_no_error(
     find_parentage(f$g, f$p, f$pr, method = "best_pair",
-                   verbose = FALSE, write_txt = FALSE)
+                   verbose = FALSE, write_txt = FALSE, plot_results = FALSE)
   )
 })
 
 test_that("Progeny column contains the correct progeny IDs", {
   f   <- make_files(base_genos, base_parents, child1_progeny)
   res <- find_parentage(f$g, f$p, f$pr, method = "best_pair",
-                        show_ties = FALSE, verbose = FALSE, write_txt = FALSE)
+                        show_ties = FALSE, verbose = FALSE,
+                        write_txt = FALSE, plot_results = FALSE)
   expect_setequal(res$Progeny, child1_progeny$ID)
 })
 
 test_that("multiple progeny are all represented in output", {
   f   <- make_files(base_genos, base_parents, base_progeny)
   res <- find_parentage(f$g, f$p, f$pr, method = "best_pair",
-                        show_ties = FALSE, verbose = FALSE, write_txt = FALSE)
+                        show_ties = FALSE, verbose = FALSE,
+                        write_txt = FALSE, plot_results = FALSE)
   expect_setequal(res$Progeny, base_progeny$ID)
 })
+
+test_that("single parent pair (n_pairs = 1) does not cause dimension errors", {
+  single_pair_parents <- data.table::data.table(ID  = c("S1","D1"),
+                                                Sex = c("M","F"))
+  f   <- make_files(base_genos, single_pair_parents, child1_progeny)
+  expect_no_error(
+    find_parentage(f$g, f$p, f$pr, method = "best_pair",
+                   show_ties = FALSE, verbose = FALSE,
+                   write_txt = FALSE, plot_results = FALSE)
+  )
+})
diff --git a/tests/testthat/test-validate_pedigree.R b/tests/testthat/test-validate_pedigree.R
index ef4e493..02acc22 100644
--- a/tests/testthat/test-validate_pedigree.R
+++ b/tests/testthat/test-validate_pedigree.R
@@ -43,56 +43,84 @@ write_temp_files <- function(genos = make_genos(), ped = make_pedigree()) {
   list(ped = ped_file, genos = genos_file)
 }
 
+# Helper to run validate_pedigree in a temporary working directory
+# so corrected_pedigree.txt doesn't pollute the real wd
+run_in_tmpdir <- function(expr) {
+  tmp_dir <- tempfile()
+  dir.create(tmp_dir)
+  old_wd  <- getwd()
+  setwd(tmp_dir)
+  on.exit({ setwd(old_wd); unlink(tmp_dir, recursive = TRUE) }, add = TRUE)
+  force(expr)
+}
+
 # ==============================================================================
 # 1. Input validation
 # ==============================================================================
 
 test_that("trio_error_threshold out of range raises an error", {
   f <- write_temp_files()
-  expect_error(validate_pedigree(f$ped, f$genos,
-                                 trio_error_threshold = 150,
-                                 verbose = FALSE, write_txt = FALSE))
-  expect_error(validate_pedigree(f$ped, f$genos,
-                                 trio_error_threshold = -1,
-                                 verbose = FALSE, write_txt = FALSE))
+  run_in_tmpdir({
+    expect_error(validate_pedigree(f$ped, f$genos,
+                                   trio_error_threshold = 150,
+                                   verbose = FALSE, write_results = FALSE,
+                                   plot_results = FALSE))
+    expect_error(validate_pedigree(f$ped, f$genos,
+                                   trio_error_threshold = -1,
+                                   verbose = FALSE, write_results = FALSE,
+                                   plot_results = FALSE))
+  })
 })
 
 test_that("single_parent_error_threshold out of range raises an error", {
   f <- write_temp_files()
-  expect_error(validate_pedigree(f$ped, f$genos,
-                                 single_parent_error_threshold = 101,
-                                 verbose = FALSE, write_txt = FALSE))
-  expect_error(validate_pedigree(f$ped, f$genos,
-                                 single_parent_error_threshold = -5,
-                                 verbose = FALSE, write_txt = FALSE))
+  run_in_tmpdir({
+    expect_error(validate_pedigree(f$ped, f$genos,
+                                   single_parent_error_threshold = 101,
+                                   verbose = FALSE, write_results = FALSE,
+                                   plot_results = FALSE))
+    expect_error(validate_pedigree(f$ped, f$genos,
+                                   single_parent_error_threshold = -5,
+                                   verbose = FALSE, write_results = FALSE,
+                                   plot_results = FALSE))
+  })
 })
 
 test_that("missing required pedigree column raises an error", {
   bad_ped <- data.table(ID = "IND_C", Parent1 = "IND_A", Female_Parent = "IND_B")
   f <- write_temp_files(ped = bad_ped)
-  expect_error(
-    validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE),
-    regexp = "missing required columns"
-  )
+  run_in_tmpdir({
+    expect_error(
+      validate_pedigree(f$ped, f$genos, verbose = FALSE, write_results = FALSE,
+                        plot_results = FALSE),
+      regexp = "missing required columns"
+    )
+  })
 })
 
 test_that("missing ID column in genotypes raises an error", {
   bad_genos <- copy(make_genos())
   setnames(bad_genos, "ID", "SampleID")
   f <- write_temp_files(genos = bad_genos)
-  expect_error(
-    validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE),
-    regexp = "ID"
-  )
+  run_in_tmpdir({
+    expect_error(
+      validate_pedigree(f$ped, f$genos, verbose = FALSE, write_results = FALSE,
+                        plot_results = FALSE),
+      regexp = "ID"
+    )
+  })
 })
 
 test_that("all trios with no genotype data stop with an error", {
   ped <- data.table(ID = "GHOST", Male_Parent = "IND_A", Female_Parent = "IND_B")
   f   <- write_temp_files(ped = ped)
-  expect_error(
-    validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE),
-    regexp = "No valid trios remain"
-  )
+  run_in_tmpdir({
+    expect_error(
+      validate_pedigree(f$ped, f$genos, verbose = FALSE, write_results = FALSE,
+                        plot_results = FALSE),
+      regexp = "No valid trios remain"
+    )
+  })
 })
 
 # ==============================================================================
@@ -100,29 +128,38 @@ test_that("all trios with no genotype data stop with an error", {
 # ==============================================================================
 
 test_that("returns a data.table", {
-  f   <- write_temp_files()
-  res <- validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE)
-  expect_s3_class(res, "data.table")
+  f <- write_temp_files()
+  run_in_tmpdir({
+    res <- validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                             write_results = FALSE, plot_results = FALSE)
+    expect_s3_class(res, "data.table")
+  })
 })
 
 test_that("result has one row per pedigree entry", {
-  f   <- write_temp_files()
-  res <- validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE)
-  expect_equal(nrow(res), 2L)
+  f <- write_temp_files()
+  run_in_tmpdir({
+    res <- validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                             write_results = FALSE, plot_results = FALSE)
+    expect_equal(nrow(res), 2L)
+  })
 })
 
 test_that("result has all expected columns", {
-  f   <- write_temp_files()
-  res <- validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE)
-  expected_cols <- c(
-    "ID", "Male_Parent", "Female_Parent",
-    "Mendelian_Error_Pct", "Markers_Tested", "Status",
-    "Correction_Decision",
-    "Male_Parent_Hom_Error_Pct", "Female_Parent_Hom_Error_Pct",
-    "Best_Male_Parent",   "Best_Male_Parent_Error_Pct",
-    "Best_Female_Parent", "Best_Female_Parent_Error_Pct"
-  )
-  expect_true(all(expected_cols %in% names(res)))
+  f <- write_temp_files()
+  run_in_tmpdir({
+    res <- validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                             write_results = FALSE, plot_results = FALSE)
+    expected_cols <- c(
+      "ID", "Orig_Male_Parent", "Orig_Female_Parent",
+      "Trio_Mendelian_Error_Pct", "Trio_Markers_Tested", "Status",
+      "Recommended_Correction",
+      "Male_Parent_Hom_Error_Pct", "Female_Parent_Hom_Error_Pct",
+      "Best_Male_Candidate",   "Best_Male_Candidate_Error_Pct",
+      "Best_Female_Candidate", "Best_Female_Candidate_Error_Pct"
+    )
+    expect_true(all(expected_cols %in% names(res)))
+  })
 })
 
 # ==============================================================================
@@ -130,68 +167,87 @@ test_that("result has all expected columns", {
 # ==============================================================================
 
 test_that("PASS trio is correctly identified", {
-  f   <- write_temp_files()
-  res <- validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE)
-  pass_row <- res[ID == "IND_C"]
-  expect_equal(nrow(pass_row), 1L)
-  expect_equal(pass_row$Status, "PASS")
-  expect_equal(pass_row$Mendelian_Error_Pct, 0)
-  expect_equal(pass_row$Correction_Decision, "NONE")
+  f <- write_temp_files()
+  run_in_tmpdir({
+    res      <- validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                                  write_results = FALSE, plot_results = FALSE)
+    pass_row <- res[ID == "IND_C"]
+    expect_equal(nrow(pass_row), 1L)
+    expect_equal(pass_row$Status, "PASS")
+    expect_equal(pass_row$Trio_Mendelian_Error_Pct, 0)
+    expect_equal(pass_row$Recommended_Correction, "NONE")
+  })
 })
 
 test_that("FAIL trio is correctly identified", {
-  f   <- write_temp_files()
-  res <- validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE)
-  fail_row <- res[ID == "IND_D"]
-  expect_equal(nrow(fail_row), 1L)
-  expect_equal(fail_row$Status, "FAIL")
-  expect_gt(fail_row$Mendelian_Error_Pct, 5.0)
+  f <- write_temp_files()
+  run_in_tmpdir({
+    res      <- validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                                  write_results = FALSE, plot_results = FALSE)
+    fail_row <- res[ID == "IND_D"]
+    expect_equal(nrow(fail_row), 1L)
+    expect_equal(fail_row$Status, "FAIL")
+    expect_gt(fail_row$Trio_Mendelian_Error_Pct, 5.0)
+  })
 })
 
 test_that("FAIL trio has REMOVE_MALE_PARENT decision with best match populated", {
-  f   <- write_temp_files()
-  res <- validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE)
-  fail_row <- res[ID == "IND_D"]
-  expect_equal(fail_row$Correction_Decision, "REMOVE_MALE_PARENT")
-  expect_false(is.na(fail_row$Best_Male_Parent))
-  expect_true(is.na(fail_row$Best_Female_Parent))
+  f <- write_temp_files()
+  run_in_tmpdir({
+    res      <- validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                                  write_results = FALSE, plot_results = FALSE)
+    fail_row <- res[ID == "IND_D"]
+    expect_equal(fail_row$Recommended_Correction, "REMOVE_MALE_PARENT")
+    expect_false(is.na(fail_row$Best_Male_Candidate))
+    expect_true(is.na(fail_row$Best_Female_Candidate))
+  })
 })
 
-test_that("Mendelian_Error_Pct is 0 for a perfect trio", {
-  f   <- write_temp_files()
-  res <- validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE)
-  expect_equal(res[ID == "IND_C"]$Mendelian_Error_Pct, 0)
+test_that("Trio_Mendelian_Error_Pct is 0 for a perfect trio", {
+  f <- write_temp_files()
+  run_in_tmpdir({
+    res <- validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                             write_results = FALSE, plot_results = FALSE)
+    expect_equal(res[ID == "IND_C"]$Trio_Mendelian_Error_Pct, 0)
+  })
 })
 
-test_that("Markers_Tested equals number of markers for complete data", {
-  f   <- write_temp_files()
-  res <- validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE)
-  expect_equal(res[ID == "IND_C"]$Markers_Tested, 20L)
+test_that("Trio_Markers_Tested equals number of markers for complete data", {
+  f <- write_temp_files()
+  run_in_tmpdir({
+    res <- validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                             write_results = FALSE, plot_results = FALSE)
+    expect_equal(res[ID == "IND_C"]$Trio_Markers_Tested, 20L)
+  })
 })
 
 test_that("LOW_MARKERS status assigned when markers_tested < min_markers", {
-  f   <- write_temp_files()
-  res <- validate_pedigree(f$ped, f$genos, verbose = FALSE,
-                           write_txt = FALSE, min_markers = 25L)
-  expect_true(all(res$Status == "LOW_MARKERS"))
-  expect_true(all(grepl("^LOW_MARKERS_", res$Correction_Decision)))
+  f <- write_temp_files()
+  run_in_tmpdir({
+    res <- validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                             write_results = FALSE, plot_results = FALSE,
+                             min_markers = 25L)
+    expect_true(all(res$Status == "LOW_MARKERS"))
+    expect_true(all(grepl("^LOW_MARKERS_", res$Recommended_Correction)))
+  })
 })
 
-test_that("NA markers reduce Markers_Tested and do not cause errors", {
+test_that("NA markers reduce Trio_Markers_Tested and do not cause errors", {
   genos <- make_genos()
   genos[ID == "IND_C", M1 := NA_integer_]
   genos[ID == "IND_C", M2 := NA_integer_]
-  f   <- write_temp_files(genos = genos)
-  res <- validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE)
-  expect_equal(res[ID == "IND_C"]$Markers_Tested, 18L)
-  expect_equal(res[ID == "IND_C"]$Status, "PASS")
+  f <- write_temp_files(genos = genos)
+  run_in_tmpdir({
+    res <- validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                             write_results = FALSE, plot_results = FALSE)
+    expect_equal(res[ID == "IND_C"]$Trio_Markers_Tested, 18L)
+    expect_equal(res[ID == "IND_C"]$Status, "PASS")
+  })
 })
 
 # ==============================================================================
 # 4. Missing parent statuses (MISSING_MALE_PARENT / MISSING_FEMALE_PARENT /
 #    MISSING_BOTH_PARENTS)
-# Note: each test includes make_pedigree() rows so has_geno is never empty,
-#       and filters res by the specific ID under test [3][4]
 # ==============================================================================
 
 test_that("MISSING_MALE_PARENT status and recommendation are correct", {
@@ -199,13 +255,16 @@ test_that("MISSING_MALE_PARENT status and recommendation are correct", {
     make_pedigree(),
     data.table(ID = "IND_E", Male_Parent = "0", Female_Parent = "IND_B")
   )
-  f   <- write_temp_files(ped = ped)
-  res <- validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE)
-  r   <- res[ID == "IND_E"]
-  expect_equal(r$Status,              "MISSING_MALE_PARENT")
-  expect_equal(r$Correction_Decision, "NONE")
-  expect_false(is.na(r$Best_Male_Parent))
-  expect_true(is.na(r$Best_Female_Parent))
+  f <- write_temp_files(ped = ped)
+  run_in_tmpdir({
+    res <- validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                             write_results = FALSE, plot_results = FALSE)
+    r   <- res[ID == "IND_E"]
+    expect_equal(r$Status,                 "MISSING_MALE_PARENT")
+    expect_equal(r$Recommended_Correction, "NONE")
+    expect_false(is.na(r$Best_Male_Candidate))
+    expect_true(is.na(r$Best_Female_Candidate))
+  })
 })
 
 test_that("MISSING_FEMALE_PARENT status and recommendation are correct", {
@@ -213,13 +272,16 @@ test_that("MISSING_FEMALE_PARENT status and recommendation are correct", {
     make_pedigree(),
     data.table(ID = "IND_E", Male_Parent = "IND_A", Female_Parent = "0")
   )
-  f   <- write_temp_files(ped = ped)
-  res <- validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE)
-  r   <- res[ID == "IND_E"]
-  expect_equal(r$Status,              "MISSING_FEMALE_PARENT")
-  expect_equal(r$Correction_Decision, "NONE")
-  expect_true(is.na(r$Best_Male_Parent))
-  expect_false(is.na(r$Best_Female_Parent))
+  f <- write_temp_files(ped = ped)
+  run_in_tmpdir({
+    res <- validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                             write_results = FALSE, plot_results = FALSE)
+    r   <- res[ID == "IND_E"]
+    expect_equal(r$Status,                 "MISSING_FEMALE_PARENT")
+    expect_equal(r$Recommended_Correction, "NONE")
+    expect_true(is.na(r$Best_Male_Candidate))
+    expect_false(is.na(r$Best_Female_Candidate))
+  })
 })
 
 test_that("MISSING_BOTH_PARENTS status and recommendations are correct", {
@@ -227,16 +289,19 @@ test_that("MISSING_BOTH_PARENTS status and recommendations are correct", {
     make_pedigree(),
     data.table(ID = "IND_E", Male_Parent = "0", Female_Parent = "0")
   )
-  f   <- write_temp_files(ped = ped)
-  res <- validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE)
-  r   <- res[ID == "IND_E"]
-  expect_equal(r$Status,              "MISSING_BOTH_PARENTS")
-  expect_equal(r$Correction_Decision, "NONE")
-  expect_false(is.na(r$Best_Male_Parent))
-  expect_false(is.na(r$Best_Female_Parent))
-})
-
-test_that("MISSING_* rows preserve 0s in corrected pedigree", {
+  f <- write_temp_files(ped = ped)
+  run_in_tmpdir({
+    res <- validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                             write_results = FALSE, plot_results = FALSE)
+    r   <- res[ID == "IND_E"]
+    expect_equal(r$Status,                 "MISSING_BOTH_PARENTS")
+    expect_equal(r$Recommended_Correction, "NONE")
+    expect_false(is.na(r$Best_Male_Candidate))
+    expect_false(is.na(r$Best_Female_Candidate))
+  })
+})
+
+test_that("MISSING_* rows preserve 0s in corrected_pedigree.txt", {
   ped <- rbind(
     make_pedigree(),
     data.table(ID = "IND_E", Male_Parent = "0", Female_Parent = "IND_B")
@@ -247,22 +312,29 @@ test_that("MISSING_* rows preserve 0s in corrected pedigree", {
   old_wd  <- getwd()
   setwd(tmp_dir)
   on.exit({ setwd(old_wd); unlink(tmp_dir, recursive = TRUE) }, add = TRUE)
-  validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE)
+
+  validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                    write_results = FALSE, plot_results = FALSE)
   corr <- fread(file.path(tmp_dir, "corrected_pedigree.txt"),
                 colClasses = "character")
-  expect_equal(corr[ID == "IND_E"]$Male_Parent, "0")
+  # MISSING_MALE_PARENT — male stays "0", female unchanged
+  expect_equal(corr[ID == "IND_E"]$Male_Parent,   "0")
+  expect_equal(corr[ID == "IND_E"]$Female_Parent, "IND_B")
 })
 
-test_that("Best_Male_Parent for MISSING_MALE_PARENT is excluded from being the known female", {
+test_that("Best_Male_Candidate for MISSING_MALE_PARENT is not the known female parent", {
   ped <- rbind(
     make_pedigree(),
     data.table(ID = "IND_E", Male_Parent = "0", Female_Parent = "IND_B")
   )
-  f   <- write_temp_files(ped = ped)
-  res <- validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE)
-  r   <- res[ID == "IND_E"]
-  # The known female parent should not be suggested as the best male parent
-  expect_false(r$Best_Male_Parent == "IND_B")
+  f <- write_temp_files(ped = ped)
+  run_in_tmpdir({
+    res <- validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                             write_results = FALSE, plot_results = FALSE)
+    r   <- res[ID == "IND_E"]
+    # The known female parent should not be suggested as the best male candidate
+    expect_false(r$Best_Male_Candidate == "IND_B")
+  })
 })
 
 # ==============================================================================
@@ -277,16 +349,19 @@ test_that("FOUNDERS status is assigned when ID in founders list with 0 0 parents
   founders_file <- tempfile(fileext = ".txt")
   fwrite(data.table(ID = "IND_A"), founders_file,
          sep = "\t", quote = FALSE, col.names = FALSE)
-  f   <- write_temp_files(ped = ped)
-  res <- validate_pedigree(f$ped, f$genos,
-                           founders_file = founders_file,
-                           verbose       = FALSE,
-                           write_txt     = FALSE)
-  founder_row <- res[ID == "IND_A"]
-  expect_equal(founder_row$Status,              "FOUNDERS")
-  expect_equal(founder_row$Correction_Decision, "NONE")
-  expect_true(is.na(founder_row$Best_Male_Parent))
-  expect_true(is.na(founder_row$Best_Female_Parent))
+  f <- write_temp_files(ped = ped)
+  run_in_tmpdir({
+    res         <- validate_pedigree(f$ped, f$genos,
+                                     founders_file = founders_file,
+                                     verbose       = FALSE,
+                                     write_results = FALSE,
+                                     plot_results  = FALSE)
+    founder_row <- res[ID == "IND_A"]
+    expect_equal(founder_row$Status,                 "FOUNDERS")
+    expect_equal(founder_row$Recommended_Correction, "NONE")
+    expect_true(is.na(founder_row$Best_Male_Candidate))
+    expect_true(is.na(founder_row$Best_Female_Candidate))
+  })
 })
 
 test_that("non-founder rows are still evaluated normally when founders file is supplied", {
@@ -297,13 +372,16 @@ test_that("non-founder rows are still evaluated normally when founders file is s
   founders_file <- tempfile(fileext = ".txt")
   fwrite(data.table(ID = "IND_A"), founders_file,
          sep = "\t", quote = FALSE, col.names = FALSE)
-  f   <- write_temp_files(ped = ped)
-  res <- validate_pedigree(f$ped, f$genos,
-                           founders_file = founders_file,
-                           verbose       = FALSE,
-                           write_txt     = FALSE)
-  # IND_C has real parents — should still get PASS
-  expect_equal(res[ID == "IND_C"]$Status, "PASS")
+  f <- write_temp_files(ped = ped)
+  run_in_tmpdir({
+    res <- validate_pedigree(f$ped, f$genos,
+                             founders_file = founders_file,
+                             verbose       = FALSE,
+                             write_results = FALSE,
+                             plot_results  = FALSE)
+    # IND_C has real parents — should still get PASS
+    expect_equal(res[ID == "IND_C"]$Status, "PASS")
+  })
 })
 
 test_that("0 0 parents NOT in founders list get MISSING_BOTH_PARENTS", {
@@ -311,9 +389,12 @@ test_that("0 0 parents NOT in founders list get MISSING_BOTH_PARENTS", {
     make_pedigree(),
     data.table(ID = "IND_E", Male_Parent = "0", Female_Parent = "0")
   )
-  f   <- write_temp_files(ped = ped)
-  res <- validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE)
-  expect_equal(res[ID == "IND_E"]$Status, "MISSING_BOTH_PARENTS")
+  f <- write_temp_files(ped = ped)
+  run_in_tmpdir({
+    res <- validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                             write_results = FALSE, plot_results = FALSE)
+    expect_equal(res[ID == "IND_E"]$Status, "MISSING_BOTH_PARENTS")
+  })
 })
 
 test_that("0 0 parents with no founders file gets MISSING_BOTH_PARENTS not FOUNDERS", {
@@ -321,12 +402,15 @@ test_that("0 0 parents with no founders file gets MISSING_BOTH_PARENTS not FOUND
     make_pedigree(),
     data.table(ID = "IND_E", Male_Parent = "0", Female_Parent = "0")
   )
-  f   <- write_temp_files(ped = ped)
-  res <- validate_pedigree(f$ped, f$genos,
-                           founders_file = NULL,
-                           verbose       = FALSE,
-                           write_txt     = FALSE)
-  expect_equal(res[ID == "IND_E"]$Status, "MISSING_BOTH_PARENTS")
+  f <- write_temp_files(ped = ped)
+  run_in_tmpdir({
+    res <- validate_pedigree(f$ped, f$genos,
+                             founders_file = NULL,
+                             verbose       = FALSE,
+                             write_results = FALSE,
+                             plot_results  = FALSE)
+    expect_equal(res[ID == "IND_E"]$Status, "MISSING_BOTH_PARENTS")
+  })
 })
 
 # ==============================================================================
@@ -338,36 +422,45 @@ test_that("NO_GENOTYPE_DATA is flagged for progeny absent from genotype file", {
     make_pedigree(),
     data.table(ID = "GHOST", Male_Parent = "IND_A", Female_Parent = "IND_B")
   )
-  f   <- write_temp_files(ped = ped)
-  res <- validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE)
-  ghost_row <- res[ID == "GHOST"]
-  expect_equal(nrow(ghost_row),               1L)
-  expect_equal(ghost_row$Status,              "NO_GENOTYPE_DATA")
-  expect_equal(ghost_row$Correction_Decision, "NONE")
-})
-
-test_that("NO_GENOTYPE_DATA rows have NA for all analysis columns", {
+  f <- write_temp_files(ped = ped)
+  run_in_tmpdir({
+    res       <- validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                                   write_results = FALSE, plot_results = FALSE)
+    ghost_row <- res[ID == "GHOST"]
+    expect_equal(nrow(ghost_row),                    1L)
+    expect_equal(ghost_row$Status,                   "NO_GENOTYPE_DATA")
+    expect_equal(ghost_row$Recommended_Correction,   "NONE")
+  })
+})
+
+test_that("NO_GENOTYPE_DATA rows have NA/0 for all analysis columns", {
   ped <- rbind(
     make_pedigree(),
     data.table(ID = "GHOST", Male_Parent = "IND_A", Female_Parent = "IND_B")
   )
-  f   <- write_temp_files(ped = ped)
-  res <- validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE)
-  ghost_row <- res[ID == "GHOST"]
-  expect_true(is.na(ghost_row$Mendelian_Error_Pct))
-  expect_equal(ghost_row$Markers_Tested, 0L)
-  expect_true(is.na(ghost_row$Best_Male_Parent))
-  expect_true(is.na(ghost_row$Best_Female_Parent))
+  f <- write_temp_files(ped = ped)
+  run_in_tmpdir({
+    res       <- validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                                   write_results = FALSE, plot_results = FALSE)
+    ghost_row <- res[ID == "GHOST"]
+    expect_true(is.na(ghost_row$Trio_Mendelian_Error_Pct))
+    expect_equal(ghost_row$Trio_Markers_Tested, 0L)
+    expect_true(is.na(ghost_row$Best_Male_Candidate))
+    expect_true(is.na(ghost_row$Best_Female_Candidate))
+  })
 })
 
 test_that("NO_GENOTYPE_DATA flagged when declared parent is absent from genotype file", {
   ped <- rbind(
-    make_pedigree(),   # ensures has_geno is not empty
+    make_pedigree(),
     data.table(ID = "IND_C_GHOST", Male_Parent = "GHOST_DAD", Female_Parent = "IND_B")
   )
-  f   <- write_temp_files(ped = ped)
-  res <- validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE)
-  expect_equal(res[ID == "IND_C_GHOST"]$Status, "NO_GENOTYPE_DATA")
+  f <- write_temp_files(ped = ped)
+  run_in_tmpdir({
+    res <- validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                             write_results = FALSE, plot_results = FALSE)
+    expect_equal(res[ID == "IND_C_GHOST"]$Status, "NO_GENOTYPE_DATA")
+  })
 })
 
 test_that("valid trios still evaluated correctly when ghost rows are present", {
@@ -375,24 +468,29 @@ test_that("valid trios still evaluated correctly when ghost rows are present", {
     make_pedigree(),
     data.table(ID = "GHOST", Male_Parent = "IND_A", Female_Parent = "IND_B")
   )
-  f   <- write_temp_files(ped = ped)
-  res <- validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE)
-  expect_equal(res[ID == "IND_C"]$Status, "PASS")
-  expect_equal(res[ID == "IND_D"]$Status, "FAIL")
+  f <- write_temp_files(ped = ped)
+  run_in_tmpdir({
+    res <- validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                             write_results = FALSE, plot_results = FALSE)
+    expect_equal(res[ID == "IND_C"]$Status, "PASS")
+    expect_equal(res[ID == "IND_D"]$Status, "FAIL")
+  })
 })
 
 # ==============================================================================
 # 7. Corrected pedigree output
 # ==============================================================================
 
-test_that("corrected_pedigree.txt is written and PASS parents are unchanged", {
+test_that("corrected_pedigree.txt is always written and PASS parents are unchanged", {
   f       <- write_temp_files()
   tmp_dir <- tempfile()
   dir.create(tmp_dir)
   old_wd  <- getwd()
   setwd(tmp_dir)
   on.exit({ setwd(old_wd); unlink(tmp_dir, recursive = TRUE) }, add = TRUE)
-  validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE)
+
+  validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                    write_results = FALSE, plot_results = FALSE)
   expect_true(file.exists(file.path(tmp_dir, "corrected_pedigree.txt")))
   corr <- fread(file.path(tmp_dir, "corrected_pedigree.txt"))
   # IND_C passes — parents must be unchanged
@@ -407,7 +505,9 @@ test_that("corrected_pedigree.txt sets bad parent to 0 for FAIL trio", {
   old_wd  <- getwd()
   setwd(tmp_dir)
   on.exit({ setwd(old_wd); unlink(tmp_dir, recursive = TRUE) }, add = TRUE)
-  validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE)
+
+  validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                    write_results = FALSE, plot_results = FALSE)
   corr <- fread(file.path(tmp_dir, "corrected_pedigree.txt"),
                 colClasses = "character")
   # IND_D fails with REMOVE_MALE_PARENT — male should become "0"
@@ -415,45 +515,32 @@ test_that("corrected_pedigree.txt sets bad parent to 0 for FAIL trio", {
   expect_equal(corr[ID == "IND_D"]$Female_Parent, "IND_A")
 })
 
-test_that("corrected_pedigree.txt preserves 0s for MISSING_* rows", {
-  ped <- rbind(
-    make_pedigree(),
-    data.table(ID = "IND_E", Male_Parent = "0", Female_Parent = "IND_B")
-  )
-  f       <- write_temp_files(ped = ped)
-  tmp_dir <- tempfile()
-  dir.create(tmp_dir)
-  old_wd  <- getwd()
-  setwd(tmp_dir)
-  on.exit({ setwd(old_wd); unlink(tmp_dir, recursive = TRUE) }, add = TRUE)
-  validate_pedigree(f$ped, f$genos, verbose = FALSE, write_txt = FALSE)
-  corr <- fread(file.path(tmp_dir, "corrected_pedigree.txt"),
-                colClasses = "character")
-  # MISSING_MALE_PARENT — male stays "0", female unchanged
-  expect_equal(corr[ID == "IND_E"]$Male_Parent,   "0")
-  expect_equal(corr[ID == "IND_E"]$Female_Parent, "IND_B")
-})
-
 # ==============================================================================
-# 8. write_txt / output file
+# 8. write_results / output file
 # ==============================================================================
 
-test_that("write_txt = TRUE writes output file with correct row count", {
+test_that("write_results = TRUE writes output file with correct row count", {
   f        <- write_temp_files()
   out_file <- tempfile(fileext = ".txt")
-  validate_pedigree(f$ped, f$genos, verbose = FALSE,
-                    write_txt = TRUE, output_filename = out_file)
-  expect_true(file.exists(out_file))
-  written <- fread(out_file)
-  expect_equal(nrow(written), 2L)
+  run_in_tmpdir({
+    validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                      write_results = TRUE, output_filename = out_file,
+                      plot_results  = FALSE)
+    expect_true(file.exists(out_file))
+    written <- fread(out_file)
+    expect_equal(nrow(written), 2L)
+  })
 })
 
-test_that("write_txt = FALSE does not create output file", {
+test_that("write_results = FALSE does not create output file", {
   f        <- write_temp_files()
   out_file <- tempfile(fileext = ".txt")
-  validate_pedigree(f$ped, f$genos, verbose = FALSE,
-                    write_txt = FALSE, output_filename = out_file)
-  expect_false(file.exists(out_file))
+  run_in_tmpdir({
+    validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                      write_results = FALSE, output_filename = out_file,
+                      plot_results  = FALSE)
+    expect_false(file.exists(out_file))
+  })
 })
 
 test_that("output file contains correct number of rows when ghost rows present", {
@@ -463,9 +550,12 @@ test_that("output file contains correct number of rows when ghost rows present",
   )
   f        <- write_temp_files(ped = ped)
   out_file <- tempfile(fileext = ".txt")
-  validate_pedigree(f$ped, f$genos, verbose = FALSE,
-                    write_txt = TRUE, output_filename = out_file)
-  written <- fread(out_file)
-  # 2 valid trios + 1 ghost = 3 rows total
-  expect_equal(nrow(written), 3L)
+  run_in_tmpdir({
+    validate_pedigree(f$ped, f$genos, verbose = FALSE,
+                      write_results = TRUE, output_filename = out_file,
+                      plot_results  = FALSE)
+    written <- fread(out_file)
+    # 2 valid trios + 1 ghost = 3 rows total
+    expect_equal(nrow(written), 3L)
+  })
 })

From 03725d8b5567cef3749e170806f6dfe0c6394a8a Mon Sep 17 00:00:00 2001
From: "josue.chinchilla" <josue.chinchilla@IFASadmins-MacBook-Pro.local>
Date: Mon, 27 Apr 2026 15:09:40 -0400
Subject: [PATCH 07/12] Made edits suggested by autopilot

---
 R/check_ped.R            |  50 ++++++++++++---
 R/find_parentage.R       | 127 +++++++++++++++++++++------------------
 R/validate_pedigree.R    | 111 +++++++++++++++++++---------------
 man/find_parentage.Rd    |   8 ++-
 man/validate_pedigree.Rd |  17 ++++--
 5 files changed, 193 insertions(+), 120 deletions(-)

diff --git a/R/check_ped.R b/R/check_ped.R
index e1d9407..0ec5cf2 100644
--- a/R/check_ped.R
+++ b/R/check_ped.R
@@ -40,9 +40,11 @@ check_ped <- function(ped.file,
                       correct            = TRUE,
                       save_zip           = FALSE,
                       save_corrected_zip = FALSE) {
+
   #### setup ####
   if (!is.null(seed)) set.seed(seed)
   data <- utils::read.table(ped.file, header = TRUE)
+
   # ── Column alias remapping ──────────────────────────────────────────────────
   # Accept common alternative column names and silently remap them so the rest
   # of the function always works with the canonical names.
@@ -59,6 +61,7 @@ check_ped <- function(ped.file,
       }
     }
   }
+
   # Validate required column names before any processing
   required_cols <- c("id", "male_parent", "female_parent")
   missing_cols  <- setdiff(required_cols, colnames(data))
@@ -75,33 +78,44 @@ check_ped <- function(ped.file,
       male_parent   = as.character(male_parent),
       female_parent = as.character(female_parent)
     )
+
   # Add row numbers before any processing
   data   <- data %>% mutate(row_number = row_number(), .before = id)
   errors <- list()
+
   #### check 1: exact duplicates ####
   exact_duplicates <- data[
     duplicated(data %>% select(-row_number)) |
       duplicated(data %>% select(-row_number), fromLast = TRUE),
   ]
+
   #### check 2: repeated IDs with conflicting male_parent/female_parent ####
   repeated_ids_diff <- data %>%
     group_by(id) %>%
     filter(n() > 1) %>%
     filter(n_distinct(male_parent) > 1 | n_distinct(female_parent) > 1) %>%
     ungroup()
+
   #### check 3: inconsistent parent sex roles ####
   male_ids   <- unique(data$male_parent[data$male_parent   != "0"])
   female_ids <- unique(data$female_parent[data$female_parent != "0"])
   messy_ids  <- intersect(male_ids, female_ids)
   inconsistent_sex_roles <- data %>%
     filter(male_parent %in% messy_ids | female_parent %in% messy_ids)
+
   #### check 4: missing parents ####
-  all_ids     <- unique(data$id)
-  ref_ids     <- unique(c(data$male_parent, data$female_parent))
-  ref_ids     <- ref_ids[ref_ids != "0"]
-  missing_ids <- setdiff(ref_ids, all_ids)
+  all_ids <- unique(data$id)
+  ref_ids <- unique(c(data$male_parent, data$female_parent))
+  # Fix #1: trimws() strips whitespace-only strings; the combined filter then
+  # drops NA (which NA != "0" would not catch), empty strings, and "0"
+  # to prevent spurious founder rows with id = NA in the corrected pedigree.
+  ref_ids <- trimws(ref_ids)
+  ref_ids <- ref_ids[!is.na(ref_ids) & ref_ids != "" & ref_ids != "0"]
+
+  missing_ids     <- setdiff(ref_ids, all_ids)
   missing_parents <- data %>%
     filter(male_parent %in% missing_ids | female_parent %in% missing_ids)
+
   # Only build founder rows if there are actually missing parents to add
   if (length(missing_ids) > 0) {
     missing_founders <- data.frame(
@@ -120,6 +134,7 @@ check_ped <- function(ped.file,
       stringsAsFactors = FALSE
     )
   }
+
   #### check 5: dependencies (cycles) ####
   detect_all_cycles <- function(data) {
     adj_list <- lapply(data$id, function(x) {
@@ -127,6 +142,7 @@ check_ped <- function(ped.file,
       c(row$male_parent, row$female_parent)
     })
     names(adj_list) <- data$id
+
     dfs <- function(node, visited, rec_stack, path) {
       visited[node]   <- TRUE
       rec_stack[node] <- TRUE
@@ -145,6 +161,7 @@ check_ped <- function(ped.file,
       rec_stack[node] <- FALSE
       return(cycles)
     }
+
     visited   <- stats::setNames(rep(FALSE, length(adj_list)), names(adj_list))
     rec_stack <- stats::setNames(rep(FALSE, length(adj_list)), names(adj_list))
     all_cycles <- list()
@@ -156,6 +173,7 @@ check_ped <- function(ped.file,
     }
     return(all_cycles)
   }
+
   cycles    <- detect_all_cycles(data)
   cycle_ids <- character(0)
   if (length(cycles) > 0) {
@@ -172,6 +190,7 @@ check_ped <- function(ped.file,
     dependencies <- data.frame(Dependency = unique(unlist(errors)),
                                stringsAsFactors = FALSE)
   }
+
   #### compile report ####
   input_ped_report <- list(
     exact_duplicates       = exact_duplicates,
@@ -180,11 +199,14 @@ check_ped <- function(ped.file,
     missing_parents        = missing_parents,
     dependencies           = dependencies
   )
+
   #### build corrected pedigree (no row_number column) ####
   if (correct) {
     corrected <- data %>% select(-row_number)
+
     # Remove exact duplicates
     corrected <- distinct(corrected)
+
     # Resolve conflicting IDs
     if (nrow(repeated_ids_diff) > 0) {
       corrected <- corrected %>%
@@ -195,6 +217,7 @@ check_ped <- function(ped.file,
           .groups = "drop"
         )
     }
+
     # Add missing founders
     if (length(missing_ids) > 0) {
       corrected <- bind_rows(
@@ -203,11 +226,13 @@ check_ped <- function(ped.file,
       )
     }
   }
+
   #### file names ####
   file_base      <- tools::file_path_sans_ext(basename(ped.file))
   corrected_name <- paste0(file_base, "_corrected")
   report_name    <- paste0(file_base, "_report")
   zip_name       <- paste0(file_base, "_report.zip")
+
   #### output ####
   if (verbose) {
     cat("\n=== Pedigree Quality Check Report ===\n")
@@ -222,20 +247,28 @@ check_ped <- function(ped.file,
     cat("\n--- Cycles / Dependencies ---\n")
     if (nrow(dependencies) > 0) print(dependencies) else cat("None found.\n")
     if (correct) {
-      cat(paste0("\`correct = TRUE`: saving corrected pedigree as `", corrected_name, "`.\n"))
+      cat(paste0("`correct = TRUE`: saving corrected pedigree as `", corrected_name, "`.\n"))
     } else {
-      cat("\`correct = FALSE`: no changes made to the pedigree.\n")
+      cat("`correct = FALSE`: no changes made to the pedigree.\n")
     }
   }
+
   # Always save report to global environment
   assign(report_name, input_ped_report, envir = .GlobalEnv)
+
   # Save corrected pedigree if correct = TRUE
   if (correct) assign(corrected_name, corrected, envir = .GlobalEnv)
+
   #### zip export ####
   if (save_zip) {
-    tmp_dir   <- tempfile()
+    tmp_dir <- tempfile()
     dir.create(tmp_dir)
+    # Fix #2: register cleanup so the temp directory is always removed on
+    # function exit, whether normal or due to an error.
+    on.exit(unlink(tmp_dir, recursive = TRUE), add = TRUE)
+
     zip_files <- character(0)
+
     # Section headers matching the console output labels
     section_labels <- c(
       exact_duplicates       = "Exact Duplicates",
@@ -262,17 +295,20 @@ check_ped <- function(ped.file,
       }
       zip_files <- c(zip_files, file_path)
     }
+
     if (save_corrected_zip && correct) {
       corrected_path <- file.path(tmp_dir, paste0(corrected_name, ".txt"))
       utils::write.table(corrected, file = corrected_path, sep = "\t",
                          row.names = FALSE, quote = FALSE)
       zip_files <- c(zip_files, corrected_path)
     }
+
     zip_path <- file.path(getwd(), zip_name)
     invisible(capture.output(
       utils::zip(zipfile = zip_path, files = zip_files, flags = "-j")
     ))
     if (verbose) cat(paste0("\nZip archive saved to: ", zip_path, "\n"))
   }
+
   invisible(input_ped_report)
 }
diff --git a/R/find_parentage.R b/R/find_parentage.R
index f53a323..410981b 100644
--- a/R/find_parentage.R
+++ b/R/find_parentage.R
@@ -27,7 +27,11 @@
 #' @param verbose Logical. If \code{TRUE}, prints progress, summary, and
 #'   results to the console. Default is \code{TRUE}.
 #' @param write_txt Logical. If \code{TRUE}, writes results to
-#'   \code{parentage_testing_results.txt}. Default is \code{TRUE}.
+#'   \code{output_filename}. The plot will be saved using the same base name
+#'   with a .jpg extension. Default is \code{TRUE}.
+#' @param output_filename Character. Path/name of the output results file
+#'   (default: \code{"parentage_testing_results.txt"}). The plot will be saved
+#'   using the same base name with a .jpg extension.
 #' @param na_string Character. String for missing values in output file.
 #'   Use \code{"NA"} or \code{""} (default: \code{"NA"}).
 #' @param plot_results Logical. If \code{TRUE}, plots the Mendelian error
@@ -96,9 +100,10 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
                            allow_selfing   = TRUE,
                            verbose         = TRUE,
                            write_txt       = TRUE,
+                           output_filename = "parentage_testing_results.txt",
                            na_string       = "NA",
                            plot_results    = TRUE) {
-  
+
   #### Input Validation and Data Loading ####
   allowed_methods <- c("best_male_parent", "best_female_parent", "best_match", "best_pair")
   if (!method %in% allowed_methods)
@@ -109,7 +114,7 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
     stop("error_threshold must be between 0 and 100.")
   if (!na_string %in% c("NA", ""))
     stop("na_string must be either 'NA' or ''.")
-  
+
   tryCatch({
     genos              <- data.table::fread(genotypes_file, sep = "auto")
     all_parents        <- data.table::fread(parents_file,   sep = "auto")
@@ -117,7 +122,7 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
   }, error = function(e) {
     stop("Error reading input files. Ensure paths are correct and files are TXT/TSV/CSV.")
   })
-  
+
   valid_ids       <- genos$ID
   removed_parents <- base::setdiff(all_parents$ID, valid_ids)
   if (base::length(removed_parents) > 0) {
@@ -125,50 +130,50 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
             paste(removed_parents, collapse = ", "), call. = FALSE)
     all_parents <- all_parents[ID %in% valid_ids]
   }
-  
+
   removed_progeny <- base::setdiff(progeny_candidates$ID, valid_ids)
   if (base::length(removed_progeny) > 0) {
     warning("The following progeny IDs were not in the genotype file and will not be analyzed: ",
             paste(removed_progeny, collapse = ", "), call. = FALSE)
     progeny_candidates <- progeny_candidates[ID %in% valid_ids]
   }
-  
+
   if (!"Sex" %in% base::colnames(all_parents)) {
     warning("No 'Sex' column in parents file. All parents treated as ambiguous ('A').")
     all_parents[, Sex := "A"]
   }
-  
+
   all_parents[, Sex := base::toupper(Sex)]
   male_parent_candidates   <- all_parents[Sex %in% c("M", "A", "NA"), .SD]
   female_parent_candidates <- all_parents[Sex %in% c("F", "A", "NA")]
-  
+
   if (base::nrow(male_parent_candidates) == 0 && method %in% c("best_male_parent", "best_pair"))
     warning("No valid male parent candidates remain after filtering.", call. = FALSE)
   if (base::nrow(female_parent_candidates) == 0 && method %in% c("best_female_parent", "best_pair"))
     warning("No valid female parent candidates remain after filtering.", call. = FALSE)
   if (base::nrow(progeny_candidates) == 0)
     stop("No valid progeny candidates remain after filtering.")
-  
+
   #### Pre-compute genotype matrices once (shared across all methods) ####
-  
+
   # Full genotype matrix used by best_pair
   genos_mat <- base::as.matrix(genos, rownames = "ID")
-  
+
   # Homozygous-only matrix (het markers set to NA) used by hom methods
   genos_hom   <- data.table::copy(genos)
   marker_cols <- base::setdiff(base::names(genos_hom), "ID")
   for (col in marker_cols)
     genos_hom[base::get(col) == 1, (col) := NA_integer_]
   genos_hom_mat <- base::as.matrix(genos_hom, rownames = "ID")
-  
+
   #### Assignment_Status from markers and error rate ####
-  
+
   # Returns LOW_MARKERS, HIGH_ERROR, or PASS
   assign_status <- function(markers, error_pct) {
     base::ifelse(markers < min_markers, "LOW_MARKERS",
                  base::ifelse(error_pct > error_threshold, "HIGH_ERROR", "PASS"))
   }
-  
+
   #### Logic for Homozygous Matching Methods ####
   if (method %in% c("best_male_parent", "best_female_parent", "best_match")) {
     parent_ids <- base::switch(method,
@@ -176,11 +181,11 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
                                "best_female_parent" = female_parent_candidates$ID,
                                "best_match"         = base::union(male_parent_candidates$ID,
                                                                   female_parent_candidates$ID))
-    
+
     # Subset pre-computed homozygous matrix for relevant parents and progeny
-    parent_genos  <- genos_hom_mat[base::rownames(genos_hom_mat) %in% parent_ids,            , drop = FALSE]
-    progeny_genos <- genos_hom_mat[base::rownames(genos_hom_mat) %in% progeny_candidates$ID,  , drop = FALSE]
-    
+    parent_genos  <- genos_hom_mat[base::rownames(genos_hom_mat) %in% parent_ids,           , drop = FALSE]
+    progeny_genos <- genos_hom_mat[base::rownames(genos_hom_mat) %in% progeny_candidates$ID, , drop = FALSE]
+
     # Pre-allocate results data.table; fill by reference with set()
     n_progeny  <- base::nrow(progeny_genos)
     results_dt <- data.table::data.table(
@@ -190,26 +195,26 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
       Markers_Tested      = NA_integer_,
       Assignment_Status   = NA_character_
     )
-    
+
     for (i in base::seq_len(n_progeny)) {
       progeny_vec      <- progeny_genos[i, ]
       mismatches       <- base::rowSums(parent_genos != progeny_vec, na.rm = TRUE)
       comparisons      <- base::rowSums(!base::is.na(parent_genos) & !base::is.na(progeny_vec))
       percent_mismatch <- (mismatches / comparisons) * 100
       percent_mismatch[base::is.nan(percent_mismatch)] <- NA
-      
+
       best_idx <- base::which.min(percent_mismatch)
-      
+
       # No candidate found — flag LOW_MARKERS and continue
       if (base::length(best_idx) == 0) {
         data.table::set(results_dt, i, "Markers_Tested",    0L)
         data.table::set(results_dt, i, "Assignment_Status", "LOW_MARKERS")
         next
       }
-      
+
       best_markers <- comparisons[best_idx]
       best_error   <- base::round(percent_mismatch[best_idx], 2)
-      
+
       data.table::set(results_dt, i, "Best_Match",          base::rownames(parent_genos)[best_idx])
       data.table::set(results_dt, i, "Mendelian_Error_Pct", best_error)
       data.table::set(results_dt, i, "Markers_Tested",      base::as.integer(best_markers))
@@ -217,7 +222,7 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
     }
     final_df <- results_dt
   }
-  
+
   #### Logic for Best Pair Method ####
   if (method == "best_pair") {
     parent_pairs <- data.table::CJ(Male_Parent   = male_parent_candidates$ID,
@@ -227,17 +232,17 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
       if (verbose) base::cat("Selfing is disallowed. Pairs with identical parents are removed.\n")
     }
     if (base::nrow(parent_pairs) == 0) stop("No valid parent pairs to test.")
-    
+
     # Pre-extract parent genotype row blocks for vectorised operations
     male_parent_genos_mat   <- genos_mat[parent_pairs$Male_Parent,   , drop = FALSE]
     female_parent_genos_mat <- genos_mat[parent_pairs$Female_Parent, , drop = FALSE]
-    
+
     # Subset full genotype matrix to progeny only
     progeny_ids <- progeny_candidates$ID
     progeny_mat <- genos_mat[progeny_ids, , drop = FALSE]
     n_progeny   <- base::nrow(progeny_mat)
     n_pairs     <- base::nrow(parent_pairs)
-    
+
     # Wrapped in matrix() to handle the n_pairs = 1 edge case
     mismatch_mat <- base::matrix(
       base::vapply(base::seq_len(n_progeny), function(j) {
@@ -256,7 +261,7 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
       }, numeric(n_pairs)),
       nrow = n_pairs, ncol = n_progeny
     )
-    
+
     # Wrapped in matrix() to handle the n_pairs = 1 edge case
     comparison_mat <- base::matrix(
       base::vapply(base::seq_len(n_progeny), function(j) {
@@ -267,11 +272,11 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
       }, numeric(n_pairs)),
       nrow = n_pairs, ncol = n_progeny
     )
-    
+
     # Percent mismatch matrix: n_pairs x n_progeny
     pct_mismatch_mat <- (mismatch_mat / comparison_mat) * 100
     pct_mismatch_mat[base::is.nan(pct_mismatch_mat)] <- NA
-    
+
     # Pre-allocate base results data.table; tie columns added dynamically
     results_dt <- data.table::data.table(
       Progeny             = progeny_ids,
@@ -281,7 +286,7 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
       Markers_Tested      = NA_integer_,
       Assignment_Status   = NA_character_
     )
-    
+
     # Per-progeny result extraction from pre-computed matrices
     results_list <- base::vector("list", n_progeny)
     for (j in base::seq_len(n_progeny)) {
@@ -289,43 +294,43 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
       percent_mismatch <- pct_mismatch_mat[, j]
       comparisons      <- comparison_mat[,  j]
       min_mismatch_val <- base::min(percent_mismatch, na.rm = TRUE)
-      
-      # No markers overlap at all — flag LOW_MARKERS
+
+      # No markers overlap at all — flag LOW_MARKERS and continue
       if (base::is.infinite(min_mismatch_val)) {
         data.table::set(results_dt, j, "Markers_Tested",    0L)
         data.table::set(results_dt, j, "Assignment_Status", "LOW_MARKERS")
         next
       }
-      
+
       best_indices <- base::which(percent_mismatch == min_mismatch_val)
-      
+
       # Tie-break: prefer pair(s) with the most markers tested
       if (base::length(best_indices) > 1) {
         best_markers_per_pair <- comparisons[best_indices]
         max_markers           <- base::max(best_markers_per_pair)
         best_indices          <- best_indices[best_markers_per_pair == max_markers]
       }
-      
+
       best_pairs   <- parent_pairs[best_indices]
       best_markers <- comparisons[best_indices[1]]
       best_error   <- base::round(min_mismatch_val, 2)
       a_status     <- assign_status(best_markers, best_error)
-      
+
       if (!show_ties && base::nrow(best_pairs) > 1) {
         warning("Progeny '", prog_id, "' has ", base::nrow(best_pairs),
                 " tied best pairs. Only one is reported as show_ties=FALSE.", call. = FALSE)
       }
-      
+
       num_to_report <- base::min(base::nrow(best_pairs),
                                  if (show_ties) base::nrow(best_pairs) else 1)
-      
+
       # Always populate base columns with the top result
       data.table::set(results_dt, j, "Male_Parent",         best_pairs$Male_Parent[1])
       data.table::set(results_dt, j, "Female_Parent",       best_pairs$Female_Parent[1])
       data.table::set(results_dt, j, "Mendelian_Error_Pct", base::sprintf("%.2f", min_mismatch_val))
       data.table::set(results_dt, j, "Markers_Tested",      base::as.integer(best_markers))
       data.table::set(results_dt, j, "Assignment_Status",   a_status)
-      
+
       # If ties remain after tie-breaking and show_ties is TRUE,
       # store tie details for later column binding
       if (show_ties && num_to_report > 1) {
@@ -339,7 +344,7 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
         results_list[[j]] <- data.table::as.data.table(tie_row)
       }
     }
-    
+
     # Merge any tie suffix columns onto the pre-allocated base table
     tie_rows <- data.table::rbindlist(
       base::Filter(Negate(base::is.null), results_list),
@@ -348,15 +353,21 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
     )
     if (base::nrow(tie_rows) > 0) {
       final_df <- merge(results_dt, tie_rows, by = "Progeny", all.x = TRUE)
-      # Replace any empty strings with NA
-      for (col in base::names(final_df)) {
-        data.table::set(final_df, which(final_df[[col]] == ""), col, NA_character_)
+
+      char_cols <- base::names(final_df)[
+        base::vapply(final_df, base::is.character, logical(1))
+      ]
+      for (col in char_cols) {
+        empty_idx <- which(final_df[[col]] == "")
+        if (base::length(empty_idx) > 0) {
+          data.table::set(final_df, empty_idx, col, NA_character_)
+        }
       }
     } else {
       final_df <- results_dt
     }
   }
-  
+
   #### Summary ####
   if (verbose) {
     total    <- base::nrow(final_df)
@@ -369,10 +380,10 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
     base::cat("Min markers threshold :", min_markers,     "\n")
     base::cat("Error threshold       :", error_threshold, "%\n\n")
   }
-  
+
   #### Output ####
+
   if (write_txt) {
-    output_filename <- "parentage_testing_results.txt"
     tryCatch({
       data.table::fwrite(final_df, file = output_filename, sep = "\t", quote = FALSE,
                          na = na_string)
@@ -381,7 +392,7 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
       warning("Could not write results to file. Error: ", e$message, call. = FALSE)
     })
   }
-  
+
   #### Plot Results ####
   if (plot_results) {
     if (!requireNamespace("ggplot2", quietly = TRUE)) {
@@ -389,26 +400,26 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
     } else {
       plot_df <- final_df[!is.na(final_df$Mendelian_Error_Pct)]
       plot_df$Mendelian_Error_Pct <- base::as.numeric(plot_df$Mendelian_Error_Pct)
-      
+
       plot_df$Plot_Status <- dplyr::case_when(
         plot_df$Assignment_Status == "PASS"        ~ "PASS",
         plot_df$Assignment_Status == "HIGH_ERROR"  ~ "HIGH_ERROR",
         plot_df$Assignment_Status == "LOW_MARKERS" ~ "LOW_MARKERS",
         TRUE                                       ~ "OTHER"
       )
-      
+
       n_total <- base::nrow(plot_df)
       n_pass  <- base::sum(plot_df$Assignment_Status == "PASS",        na.rm = TRUE)
       n_high  <- base::sum(plot_df$Assignment_Status == "HIGH_ERROR",  na.rm = TRUE)
       n_low   <- base::sum(plot_df$Assignment_Status == "LOW_MARKERS", na.rm = TRUE)
-      
+
       threshold_label <- base::paste0(
         "Error Threshold: ", error_threshold, "%  |  ",
         "Pass: ",        n_pass, "  |  ",
         "High Error: ",  n_high, "  |  ",
         "Low Markers: ", n_low
       )
-      
+
       p <- ggplot2::ggplot(
         plot_df,
         ggplot2::aes(x = Mendelian_Error_Pct, fill = Plot_Status)
@@ -421,7 +432,8 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
           linewidth  = 1
         ) +
         ggplot2::scale_x_continuous(breaks = seq(0, 100, by = 5)) +
-        ggplot2::scale_y_continuous(breaks = seq(0, 10000, by = 5)) +
+
+        ggplot2::scale_y_continuous(breaks = scales::pretty_breaks()) +
         ggplot2::scale_fill_manual(
           values = c(
             "PASS"        = "#339900",
@@ -445,11 +457,12 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
         ) +
         ggplot2::theme_classic(base_size = 13) +
         ggplot2::theme(legend.position = "top")
-      
+
       base::print(p)
-      
+
       if (write_txt) {
-        plot_filename <- base::sub("\\.[^.]+$", ".jpg", "parentage_testing_results.txt")
+
+        plot_filename <- paste0(tools::file_path_sans_ext(output_filename), ".jpg")
         tryCatch({
           ggplot2::ggsave(plot_filename, plot = p,
                           device = "jpeg", width = 10, height = 6, dpi = 300)
@@ -460,7 +473,7 @@ find_parentage <- function(genotypes_file, parents_file, progeny_file,
       }
     }
   }
-  
+
   if (verbose) {
     base::cat("\n--- Parentage Assignment Results ---\n")
     base::print(final_df)
diff --git a/R/validate_pedigree.R b/R/validate_pedigree.R
index a3526ce..8784914 100644
--- a/R/validate_pedigree.R
+++ b/R/validate_pedigree.R
@@ -33,6 +33,9 @@
 #'   is also TRUE, the plot is additionally saved as a .jpg file.
 #' @param na_string Character. String for missing values in the output file.
 #'   Use \code{"NA"} or \code{""} (default: \code{"NA"}).
+#' @param corrected_pedigree_filename Character. Path/name of the output file
+#'   for the corrected pedigree (default: "corrected_pedigree.txt"). Set to
+#'   NULL to suppress writing the corrected pedigree.
 #'
 #' @return A data.table (returned invisibly) with one row per trio and
 #'   the following columns:
@@ -65,10 +68,11 @@ validate_pedigree <- function(pedigree_file, genotypes_file,
                               single_parent_error_threshold = 2.0,
                               verbose = TRUE,
                               write_results = TRUE,
-                              output_filename = "__validation_report.txt",
                               plot_results = TRUE,
-                              na_string = "NA") { 
-  
+                              na_string = "NA",
+                              output_filename = "__validation_report.txt",
+                              corrected_pedigree_filename = "corrected_pedigree.txt") {
+
   #### Input validation ####
   if (trio_error_threshold < 0 || trio_error_threshold > 100)
     stop("trio_error_threshold must be between 0 and 100")
@@ -76,14 +80,14 @@ validate_pedigree <- function(pedigree_file, genotypes_file,
     stop("single_parent_error_threshold must be between 0 and 100")
   if (!na_string %in% c("NA", ""))
     stop("na_string must be either 'NA' or ''.")
-  
+
   tryCatch({
     pedigree <- data.table::fread(pedigree_file, sep = "auto", colClasses = "character")
     genos    <- data.table::fread(genotypes_file, sep = "auto")
   }, error = function(e) {
     stop("Error reading input files. Ensure paths are correct and files are TXT/TSV/CSV.")
   })
-  
+
   #### Check required columns ####
   required_ped_cols <- c("ID", "Male_Parent", "Female_Parent")
   missing_cols <- base::setdiff(required_ped_cols, base::names(pedigree))
@@ -92,11 +96,11 @@ validate_pedigree <- function(pedigree_file, genotypes_file,
          base::paste(missing_cols, collapse = ", "))
   if (!"ID" %in% base::names(genos))
     stop("Genotypes file must have an 'ID' column")
-  
+
   pedigree[, Male_Parent   := as.character(Male_Parent)]
   pedigree[, Female_Parent := as.character(Female_Parent)]
   original_pedigree <- data.table::copy(pedigree)
-  
+
   #### Read founders list ####
   if (!is.null(founders_file)) {
     founders_raw <- tryCatch({
@@ -108,7 +112,15 @@ validate_pedigree <- function(pedigree_file, genotypes_file,
   } else {
     founder_ids <- character(0)
   }
-  
+
+  #### Build genotype matrices ####
+  genos_mat <- base::as.matrix(genos, rownames = "ID")
+  genos_hom   <- data.table::copy(genos)
+  marker_cols <- base::setdiff(base::names(genos_hom), "ID")
+  for (col in marker_cols)
+    genos_hom[base::get(col) == 1, (col) := NA_integer_]
+  genos_hom_mat <- base::as.matrix(genos_hom, rownames = "ID")
+
   #### Identify trios missing from the genotype file ####
   valid_ids <- as.character(genos$ID)
   has_geno <- pedigree[ID %in% valid_ids &
@@ -123,15 +135,7 @@ validate_pedigree <- function(pedigree_file, genotypes_file,
   pedigree <- has_geno
   if (base::nrow(pedigree) == 0)
     stop("No valid trios remain after filtering for genotype availability.")
-  
-  #### Build genotype matrices ####
-  genos_mat <- base::as.matrix(genos, rownames = "ID")
-  genos_hom   <- data.table::copy(genos)
-  marker_cols <- base::setdiff(base::names(genos_hom), "ID")
-  for (col in marker_cols)
-    genos_hom[base::get(col) == 1, (col) := NA_integer_]
-  genos_hom_mat <- base::as.matrix(genos_hom, rownames = "ID")
-  
+
   #### Find best matching parent via homozygous mismatch ####
   find_best_parent <- function(prog_id, exclude_ids = base::character(0)) {
     candidates <- base::setdiff(base::rownames(genos_hom_mat),
@@ -145,11 +149,14 @@ validate_pedigree <- function(pedigree_file, genotypes_file,
       if (comparisons == 0) return(NA_real_)
       (base::sum(cand_hom != prog_hom, na.rm = TRUE) / comparisons) * 100
     })
+
+        if (base::all(base::is.na(errors)))
+      return(base::list(id = NA_character_, error_pct = NA_real_))
     best_idx <- base::which.min(errors)
     base::list(id = candidates[best_idx],
                error_pct = base::round(errors[best_idx], 2))
   }
-  
+
   #### Main trio evaluation loop ####
   results_list <- base::lapply(base::seq_len(base::nrow(pedigree)), function(i) {
     prog_id          <- pedigree$ID[i]
@@ -165,7 +172,7 @@ validate_pedigree <- function(pedigree_file, genotypes_file,
     best_male_parent_pct    <- NA_real_
     best_female_parent      <- NA_character_
     best_female_parent_pct  <- NA_real_
-    
+
     if (male_parent_id == "0" && female_parent_id == "0" &&
         prog_id %in% founder_ids) {
       status              <- "FOUNDERS"
@@ -268,7 +275,7 @@ validate_pedigree <- function(pedigree_file, genotypes_file,
         }
       }
     }
-    
+
     data.table::data.table(
       ID                              = prog_id,
       Orig_Male_Parent                = male_parent_id,
@@ -285,9 +292,9 @@ validate_pedigree <- function(pedigree_file, genotypes_file,
       Best_Female_Candidate_Error_Pct = best_female_parent_pct
     )
   })
-  
+
   final_df <- data.table::rbindlist(results_list)
-  
+
   #### Append NO_GENOTYPE_DATA rows ####
   if (base::nrow(no_geno_rows) > 0) {
     no_geno_df <- data.table::data.table(
@@ -307,30 +314,34 @@ validate_pedigree <- function(pedigree_file, genotypes_file,
     )
     final_df <- data.table::rbindlist(list(final_df, no_geno_df))
   }
-  
+
   #### Write corrected pedigree ####
-  corrected_pedigree <- data.table::copy(original_pedigree)
-  for (i in base::seq_len(base::nrow(final_df))) {
-    prog_id  <- final_df$ID[i]
-    decision <- final_df$Recommended_Correction[i]
-    row_idx  <- base::which(corrected_pedigree$ID == prog_id)
-    if (decision == "REMOVE_MALE_PARENT") {
-      data.table::set(corrected_pedigree, row_idx, "Male_Parent", "0")
-    } else if (decision == "REMOVE_FEMALE_PARENT") {
-      data.table::set(corrected_pedigree, row_idx, "Female_Parent", "0")
-    } else if (decision == "REMOVE_BOTH") {
-      data.table::set(corrected_pedigree, row_idx, "Male_Parent", "0")
-      data.table::set(corrected_pedigree, row_idx, "Female_Parent", "0")
+
+    # pass NULL to suppress writing.
+  if (!is.null(corrected_pedigree_filename)) {
+    corrected_pedigree <- data.table::copy(original_pedigree)
+    for (i in base::seq_len(base::nrow(final_df))) {
+      prog_id  <- final_df$ID[i]
+      decision <- final_df$Recommended_Correction[i]
+      row_idx  <- base::which(corrected_pedigree$ID == prog_id)
+      if (decision == "REMOVE_MALE_PARENT") {
+        data.table::set(corrected_pedigree, row_idx, "Male_Parent", "0")
+      } else if (decision == "REMOVE_FEMALE_PARENT") {
+        data.table::set(corrected_pedigree, row_idx, "Female_Parent", "0")
+      } else if (decision == "REMOVE_BOTH") {
+        data.table::set(corrected_pedigree, row_idx, "Male_Parent", "0")
+        data.table::set(corrected_pedigree, row_idx, "Female_Parent", "0")
+      }
     }
+    tryCatch({
+      data.table::fwrite(corrected_pedigree, file = corrected_pedigree_filename,
+                         sep = "\t", quote = FALSE)
+      if (verbose) base::cat("Corrected pedigree written to:", corrected_pedigree_filename, "\n")
+    }, error = function(e) {
+      warning("Could not write corrected pedigree. Error: ", e$message, call. = FALSE)
+    })
   }
-  tryCatch({
-    data.table::fwrite(corrected_pedigree, file = "corrected_pedigree.txt",
-                       sep = "\t", quote = FALSE)
-    if (verbose) base::cat("Corrected pedigree written to: corrected_pedigree.txt\n")
-  }, error = function(e) {
-    warning("Could not write corrected pedigree. Error: ", e$message, call. = FALSE)
-  })
-  
+
   #### Summary output ####
   if (verbose) {
     total_trios   <- base::nrow(final_df)
@@ -352,18 +363,18 @@ validate_pedigree <- function(pedigree_file, genotypes_file,
     base::cat("\n")
     base::print(final_df)
   }
-  
+
   #### Write results ####
   if (write_results) {
     tryCatch({
       data.table::fwrite(final_df, file = output_filename,
-                         sep = "\t", quote = FALSE, na = na_string)   # <-- na_string used here
+                         sep = "\t", quote = FALSE, na = na_string)
       if (verbose) base::cat("Results written to:", output_filename, "\n")
     }, error = function(e) {
       warning("Could not write results. Error: ", e$message, call. = FALSE)
     })
   }
-  
+
   #### Plot results ####
   if (plot_results) {
     if (!requireNamespace("ggplot2", quietly = TRUE)) {
@@ -421,7 +432,9 @@ validate_pedigree <- function(pedigree_file, genotypes_file,
         ggplot2::theme(legend.position = "top")
       print(p)
       if (write_results) {
-        plot_filename <- base::sub("\\.[^.]+$", ".jpg", output_filename)
+        # Fix #3: use tools::file_path_sans_ext() to reliably build the .jpg
+        # filename regardless of whether output_filename has an extension or not.
+        plot_filename <- paste0(tools::file_path_sans_ext(output_filename), ".jpg")
         tryCatch({
           ggplot2::ggsave(plot_filename, plot = p,
                           device = "jpeg", width = 10, height = 6, dpi = 300)
@@ -432,6 +445,6 @@ validate_pedigree <- function(pedigree_file, genotypes_file,
       }
     }
   }
-  
+
   return(base::invisible(final_df))
-}
\ No newline at end of file
+}
diff --git a/man/find_parentage.Rd b/man/find_parentage.Rd
index 44b9772..5a148af 100644
--- a/man/find_parentage.Rd
+++ b/man/find_parentage.Rd
@@ -15,6 +15,7 @@ find_parentage(
   allow_selfing = TRUE,
   verbose = TRUE,
   write_txt = TRUE,
+  output_filename = "parentage_testing_results.txt",
   na_string = "NA",
   plot_results = TRUE
 )
@@ -53,7 +54,12 @@ female parent IDs are excluded. Default is \code{TRUE}.}
 results to the console. Default is \code{TRUE}.}
 
 \item{write_txt}{Logical. If \code{TRUE}, writes results to
-\code{parentage_testing_results.txt}. Default is \code{TRUE}.}
+\code{output_filename}. The plot will be saved using the same base name
+with a .jpg extension. Default is \code{TRUE}.}
+
+\item{output_filename}{Character. Path/name of the output results file
+(default: \code{"parentage_testing_results.txt"}). The plot will be saved
+using the same base name with a .jpg extension.}
 
 \item{na_string}{Character. String for missing values in output file.
 Use \code{"NA"} or \code{""} (default: \code{"NA"}).}
diff --git a/man/validate_pedigree.Rd b/man/validate_pedigree.Rd
index 8ec975b..b4987de 100644
--- a/man/validate_pedigree.Rd
+++ b/man/validate_pedigree.Rd
@@ -13,9 +13,10 @@ validate_pedigree(
   single_parent_error_threshold = 2,
   verbose = TRUE,
   write_results = TRUE,
-  output_filename = "__validation_report.txt",
   plot_results = TRUE,
-  na_string = "NA"
+  na_string = "NA",
+  output_filename = "__validation_report.txt",
+  corrected_pedigree_filename = "corrected_pedigree.txt"
 )
 }
 \arguments{
@@ -45,16 +46,20 @@ table, and results to the console (default: TRUE).}
 \item{write_results}{Logical. If TRUE, writes the validation results to
 output_filename and saves the plot as a .jpg file (default: TRUE).}
 
-\item{output_filename}{Character. Path/name of the output results file
-(default: "__validation_report.txt"). The plot will be saved using the
-same base name with a .jpg extension.}
-
 \item{plot_results}{Logical. If TRUE, prints a histogram of Trio Mendelian
 Error percentages with a threshold line (default: TRUE). If write_results
 is also TRUE, the plot is additionally saved as a .jpg file.}
 
 \item{na_string}{Character. String for missing values in the output file.
 Use \code{"NA"} or \code{""} (default: \code{"NA"}).}
+
+\item{output_filename}{Character. Path/name of the output results file
+(default: "__validation_report.txt"). The plot will be saved using the
+same base name with a .jpg extension.}
+
+\item{corrected_pedigree_filename}{Character. Path/name of the output file
+for the corrected pedigree (default: "corrected_pedigree.txt"). Set to
+NULL to suppress writing the corrected pedigree.}
 }
 \value{
 A data.table (returned invisibly) with one row per trio and

From 506a47ff1dba98752d1e7910a251b6e5fdd66b8b Mon Sep 17 00:00:00 2001
From: "josue.chinchilla" <josue.chinchilla@IFASadmins-MacBook-Pro.local>
Date: Mon, 27 Apr 2026 15:44:13 -0400
Subject: [PATCH 08/12] added edits suggested by copilot

---
 R/check_ped.R    | 152 +++++++++++++++++++++++------------------------
 man/check_ped.Rd |  53 ++++++++---------
 2 files changed, 101 insertions(+), 104 deletions(-)

diff --git a/R/check_ped.R b/R/check_ped.R
index 0ec5cf2..26e0d8a 100644
--- a/R/check_ped.R
+++ b/R/check_ped.R
@@ -1,35 +1,38 @@
-#' Check a pedigree file for structural issues and optionally apply corrections
+#' @title Check a pedigree file for structural issues
 #'
-#' Reads a tab-separated pedigree file with columns `id`, `male_parent`, and
-#' `female_parent` and checks for five classes of issues: missing required
-#' columns, exact duplicate rows, repeated IDs with conflicting parent
-#' assignments, inconsistent parent sex roles, and pedigree cycles. The input
-#' file is never modified. A report list is always assigned to the global
-#' environment; when `correct = TRUE`, a corrected pedigree is assigned as well.
-#'
-#' When `correct = TRUE`, the function removes exact duplicates, collapses
-#' conflicting repeated IDs (replacing the inconsistent parent field with `"0"`),
-#' and appends missing parents as founder rows. Inconsistent sex roles and
-#' pedigree cycles are reported but not corrected and must be resolved manually.
+#' @description
+#' Reads a tab‑separated pedigree file with columns `id`,
+#' `male_parent`, and `female_parent` and checks for five classes of
+#' errors: missing required columns, exact duplicate rows,
+#' repeated IDs with conflicting parents, inconsistent parent
+#' sex roles, and pedigree cycles.  The input file is never
+#' modified.  A report list is always assigned to the global
+#' environment; when `correct = TRUE` a corrected pedigree is
+#' assigned as well.
 #'
 #' @param ped.file Path to the pedigree text file.
 #' @param seed Optional integer seed for reproducibility.
-#' @param verbose Logical. If `TRUE` (default), prints findings to the console.
-#' @param correct Logical. If `TRUE` (default), builds and assigns a corrected
-#'   pedigree to the global environment. If `FALSE`, issues are only reported.
-#' @param save_zip Logical. If `TRUE`, bundles report components into a `.zip`
-#'   archive in the working directory.
-#' @param save_corrected_zip Logical. If `TRUE` (requires `save_zip = TRUE` and
-#'   `correct = TRUE`), includes the corrected pedigree in the `.zip` archive.
+#' @param verbose Logical. If `TRUE` (default), prints findings to the
+#'   console.
+#' @param correct Logical. If `TRUE` (default), builds and assigns a
+#'   corrected pedigree to the global environment. If `FALSE`, issues
+#'   are only reported.
+#' @param save_zip Logical. If `TRUE`, bundles report components into a
+#'   `.zip` archive in the working directory.
+#' @param save_corrected_zip Logical. If `TRUE` (requires `save_zip =
+#'   TRUE` and `correct = TRUE`), includes the corrected pedigree in
+#'   the `.zip` archive.
 #'
-#' @return An invisible named list of data frames describing detected issues:
-#'   * `exact_duplicates`: exact duplicate rows.
-#'   * `repeated_ids_diff`: rows where the same `id` has conflicting parents.
-#'   * `inconsistent_sex_roles`: rows involving IDs used as both parents.
-#'   * `missing_parents`: rows referencing parent IDs absent from `id`.
-#'   * `dependencies`: rows whose `id` is involved in a pedigree cycle.
+#' @return An invisible named list of data frames describing detected
+#'   issues:
+#'   \item{exact_duplicates}{exact duplicate rows}
+#'   \item{repeated_ids_diff}{rows where the same `id` has conflicting parents}
+#'   \item{inconsistent_sex_roles}{rows involving IDs used as both parents}
+#'   \item{missing_parents}{rows referencing parent IDs absent from `id`}
+#'   \item{dependencies}{rows whose `id` is involved in a pedigree cycle}
 #'
-#' @importFrom dplyr %>% group_by filter ungroup distinct mutate summarize first bind_rows n_distinct n select row_number
+#' @importFrom dplyr %>% group_by filter ungroup distinct mutate
+#'   summarize first bind_rows n_distinct n select row_number
 #' @importFrom stats setNames
 #' @importFrom utils read.table write.table zip capture.output
 #' @importFrom tools file_path_sans_ext
@@ -40,14 +43,11 @@ check_ped <- function(ped.file,
                       correct            = TRUE,
                       save_zip           = FALSE,
                       save_corrected_zip = FALSE) {
-
   #### setup ####
   if (!is.null(seed)) set.seed(seed)
   data <- utils::read.table(ped.file, header = TRUE)
 
   # ── Column alias remapping ──────────────────────────────────────────────────
-  # Accept common alternative column names and silently remap them so the rest
-  # of the function always works with the canonical names.
   col_aliases <- list(
     id            = c("id", "ID", "animal", "Animal", "ind", "Ind"),
     male_parent   = c("male_parent", "sire",   "Sire",   "father", "Father", "pat", "Pat"),
@@ -73,48 +73,47 @@ check_ped <- function(ped.file,
     )
   }
   data <- data %>%
-    mutate(
+    dplyr::mutate(
       id            = as.character(id),
       male_parent   = as.character(male_parent),
       female_parent = as.character(female_parent)
     )
 
   # Add row numbers before any processing
-  data   <- data %>% mutate(row_number = row_number(), .before = id)
+  data   <- data %>% dplyr::mutate(row_number = dplyr::row_number(), .before = id)
   errors <- list()
 
   #### check 1: exact duplicates ####
   exact_duplicates <- data[
-    duplicated(data %>% select(-row_number)) |
-      duplicated(data %>% select(-row_number), fromLast = TRUE),
+    duplicated(data %>% dplyr::select(-row_number)) |
+      duplicated(data %>% dplyr::select(-row_number), fromLast = TRUE),
   ]
 
   #### check 2: repeated IDs with conflicting male_parent/female_parent ####
   repeated_ids_diff <- data %>%
-    group_by(id) %>%
-    filter(n() > 1) %>%
-    filter(n_distinct(male_parent) > 1 | n_distinct(female_parent) > 1) %>%
-    ungroup()
+    dplyr::group_by(id) %>%
+    dplyr::filter(dplyr::n() > 1) %>%
+    dplyr::filter(dplyr::n_distinct(male_parent) > 1 |
+                    dplyr::n_distinct(female_parent) > 1) %>%
+    dplyr::ungroup()
 
   #### check 3: inconsistent parent sex roles ####
   male_ids   <- unique(data$male_parent[data$male_parent   != "0"])
   female_ids <- unique(data$female_parent[data$female_parent != "0"])
   messy_ids  <- intersect(male_ids, female_ids)
   inconsistent_sex_roles <- data %>%
-    filter(male_parent %in% messy_ids | female_parent %in% messy_ids)
+    dplyr::filter(male_parent %in% messy_ids | female_parent %in% messy_ids)
 
   #### check 4: missing parents ####
   all_ids <- unique(data$id)
   ref_ids <- unique(c(data$male_parent, data$female_parent))
-  # Fix #1: trimws() strips whitespace-only strings; the combined filter then
-  # drops NA (which NA != "0" would not catch), empty strings, and "0"
-  # to prevent spurious founder rows with id = NA in the corrected pedigree.
+
+  # Trim, remove NAs and zeros
   ref_ids <- trimws(ref_ids)
   ref_ids <- ref_ids[!is.na(ref_ids) & ref_ids != "" & ref_ids != "0"]
-
   missing_ids     <- setdiff(ref_ids, all_ids)
   missing_parents <- data %>%
-    filter(male_parent %in% missing_ids | female_parent %in% missing_ids)
+    dplyr::filter(male_parent %in% missing_ids | female_parent %in% missing_ids)
 
   # Only build founder rows if there are actually missing parents to add
   if (length(missing_ids) > 0) {
@@ -173,7 +172,6 @@ check_ped <- function(ped.file,
     }
     return(all_cycles)
   }
-
   cycles    <- detect_all_cycles(data)
   cycle_ids <- character(0)
   if (length(cycles) > 0) {
@@ -185,7 +183,7 @@ check_ped <- function(ped.file,
                                 paste(ids, collapse = " -> ")))
     }
   }
-  dependencies <- data %>% filter(id %in% cycle_ids)
+  dependencies <- data %>% dplyr::filter(id %in% cycle_ids)
   if (nrow(dependencies) == 0 && length(errors) > 0) {
     dependencies <- data.frame(Dependency = unique(unlist(errors)),
                                stringsAsFactors = FALSE)
@@ -202,27 +200,27 @@ check_ped <- function(ped.file,
 
   #### build corrected pedigree (no row_number column) ####
   if (correct) {
-    corrected <- data %>% select(-row_number)
+    corrected <- data %>% dplyr::select(-row_number)
 
     # Remove exact duplicates
-    corrected <- distinct(corrected)
+    corrected <- dplyr::distinct(corrected)
 
     # Resolve conflicting IDs
     if (nrow(repeated_ids_diff) > 0) {
       corrected <- corrected %>%
-        group_by(id) %>%
-        summarize(
-          male_parent   = if (n_distinct(male_parent)   > 1) "0" else first(male_parent),
-          female_parent = if (n_distinct(female_parent) > 1) "0" else first(female_parent),
+        dplyr::group_by(id) %>%
+        dplyr::summarize(
+          male_parent   = if (dplyr::n_distinct(male_parent)   > 1) "0" else dplyr::first(male_parent),
+          female_parent = if (dplyr::n_distinct(female_parent) > 1) "0" else dplyr::first(female_parent),
           .groups = "drop"
         )
     }
 
     # Add missing founders
     if (length(missing_ids) > 0) {
-      corrected <- bind_rows(
+      corrected <- dplyr::bind_rows(
         corrected,
-        missing_founders %>% select(-row_number)
+        missing_founders %>% dplyr::select(-row_number)
       )
     }
   }
@@ -253,45 +251,42 @@ check_ped <- function(ped.file,
     }
   }
 
-  # Always save report to global environment
-  assign(report_name, input_ped_report, envir = .GlobalEnv)
-
-  # Save corrected pedigree if correct = TRUE
-  if (correct) assign(corrected_name, corrected, envir = .GlobalEnv)
+  # Section headers matching the console output labels
+  section_labels <- c(
+    exact_duplicates       = "Exact Duplicates",
+    repeated_ids_diff      = "Conflicting IDs",
+    inconsistent_sex_roles = "Inconsistent Parent Sex Roles",
+    missing_parents        = "Missing Parents (rows that reference them)",
+    dependencies           = "Cycles / Dependencies"
+  )
 
-  #### zip export ####
   if (save_zip) {
     tmp_dir <- tempfile()
     dir.create(tmp_dir)
-    # Fix #2: register cleanup so the temp directory is always removed on
-    # function exit, whether normal or due to an error.
     on.exit(unlink(tmp_dir, recursive = TRUE), add = TRUE)
-
     zip_files <- character(0)
 
-    # Section headers matching the console output labels
-    section_labels <- c(
-      exact_duplicates       = "Exact Duplicates",
-      repeated_ids_diff      = "Conflicting IDs",
-      inconsistent_sex_roles = "Inconsistent Parent Sex Roles",
-      missing_parents        = "Missing Parents (rows that reference them)",
-      dependencies           = "Cycles / Dependencies"
-    )
     for (component in names(section_labels)) {
       df        <- input_ped_report[[component]]
       file_path <- file.path(tmp_dir, paste0(file_base, "_", component, ".txt"))
       if (nrow(df) == 0) {
         writeLines(
-          c(paste0("--- ", section_labels[[component]], " ---"), "None found."),
+          c(paste0("--- ", section_labels[[component]], " ---"),
+            "None found."),
           con = file_path
         )
       } else {
         file_con <- file(file_path, open = "wt")
-        writeLines(paste0("--- ", section_labels[[component]], " ---"), con = file_con)
-        writeLines(paste(colnames(df), collapse = "\t"), con = file_con)
+        writeLines(paste0("--- ", section_labels[[component]], " ---"),
+                   con = file_con)
+        writeLines(paste(colnames(df), collapse = "\t"),
+                   con = file_con)
         close(file_con)
-        utils::write.table(df, file = file_path, sep = "\t", row.names = FALSE,
-                           quote = FALSE, append = TRUE, col.names = FALSE)
+        utils::write.table(df, file = file_path, sep = "\t",
+                           row.names = FALSE,
+                           quote = FALSE,
+                           append = TRUE,
+                           col.names = FALSE)
       }
       zip_files <- c(zip_files, file_path)
     }
@@ -299,7 +294,8 @@ check_ped <- function(ped.file,
     if (save_corrected_zip && correct) {
       corrected_path <- file.path(tmp_dir, paste0(corrected_name, ".txt"))
       utils::write.table(corrected, file = corrected_path, sep = "\t",
-                         row.names = FALSE, quote = FALSE)
+                         row.names = FALSE,
+                         quote = FALSE)
       zip_files <- c(zip_files, corrected_path)
     }
 
@@ -310,5 +306,9 @@ check_ped <- function(ped.file,
     if (verbose) cat(paste0("\nZip archive saved to: ", zip_path, "\n"))
   }
 
+  # Assign objects to the global environment
+  assign(report_name, input_ped_report, envir = .GlobalEnv)
+  if (correct) assign(corrected_name, corrected, envir = .GlobalEnv)
+
   invisible(input_ped_report)
 }
diff --git a/man/check_ped.Rd b/man/check_ped.Rd
index bbae7c3..0cf3840 100644
--- a/man/check_ped.Rd
+++ b/man/check_ped.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/check_ped.R
 \name{check_ped}
 \alias{check_ped}
-\title{Check a pedigree file for structural issues and optionally apply corrections}
+\title{Check a pedigree file for structural issues}
 \usage{
 check_ped(
   ped.file,
@@ -18,38 +18,35 @@ check_ped(
 
 \item{seed}{Optional integer seed for reproducibility.}
 
-\item{verbose}{Logical. If \code{TRUE} (default), prints findings to the console.}
+\item{verbose}{Logical. If \code{TRUE} (default), prints findings to the
+console.}
 
-\item{correct}{Logical. If \code{TRUE} (default), builds and assigns a corrected
-pedigree to the global environment. If \code{FALSE}, issues are only reported.}
+\item{correct}{Logical. If \code{TRUE} (default), builds and assigns a
+corrected pedigree to the global environment. If \code{FALSE}, issues
+are only reported.}
 
-\item{save_zip}{Logical. If \code{TRUE}, bundles report components into a \code{.zip}
-archive in the working directory.}
+\item{save_zip}{Logical. If \code{TRUE}, bundles report components into a
+\code{.zip} archive in the working directory.}
 
-\item{save_corrected_zip}{Logical. If \code{TRUE} (requires \code{save_zip = TRUE} and
-\code{correct = TRUE}), includes the corrected pedigree in the \code{.zip} archive.}
+\item{save_corrected_zip}{Logical. If \code{TRUE} (requires \code{save_zip = TRUE} and \code{correct = TRUE}), includes the corrected pedigree in
+the \code{.zip} archive.}
 }
 \value{
-An invisible named list of data frames describing detected issues:
-\itemize{
-\item \code{exact_duplicates}: exact duplicate rows.
-\item \code{repeated_ids_diff}: rows where the same \code{id} has conflicting parents.
-\item \code{inconsistent_sex_roles}: rows involving IDs used as both parents.
-\item \code{missing_parents}: rows referencing parent IDs absent from \code{id}.
-\item \code{dependencies}: rows whose \code{id} is involved in a pedigree cycle.
-}
+An invisible named list of data frames describing detected
+issues:
+\item{exact_duplicates}{exact duplicate rows}
+\item{repeated_ids_diff}{rows where the same \code{id} has conflicting parents}
+\item{inconsistent_sex_roles}{rows involving IDs used as both parents}
+\item{missing_parents}{rows referencing parent IDs absent from \code{id}}
+\item{dependencies}{rows whose \code{id} is involved in a pedigree cycle}
 }
 \description{
-Reads a tab-separated pedigree file with columns \code{id}, \code{male_parent}, and
-\code{female_parent} and checks for five classes of issues: missing required
-columns, exact duplicate rows, repeated IDs with conflicting parent
-assignments, inconsistent parent sex roles, and pedigree cycles. The input
-file is never modified. A report list is always assigned to the global
-environment; when \code{correct = TRUE}, a corrected pedigree is assigned as well.
-}
-\details{
-When \code{correct = TRUE}, the function removes exact duplicates, collapses
-conflicting repeated IDs (replacing the inconsistent parent field with \code{"0"}),
-and appends missing parents as founder rows. Inconsistent sex roles and
-pedigree cycles are reported but not corrected and must be resolved manually.
+Reads a tab‑separated pedigree file with columns \code{id},
+\code{male_parent}, and \code{female_parent} and checks for five classes of
+errors: missing required columns, exact duplicate rows,
+repeated IDs with conflicting parents, inconsistent parent
+sex roles, and pedigree cycles.  The input file is never
+modified.  A report list is always assigned to the global
+environment; when \code{correct = TRUE} a corrected pedigree is
+assigned as well.
 }

From 977e0702fb52e430d2147333c352dbfe97cd0101 Mon Sep 17 00:00:00 2001
From: "josue.chinchilla" <josue.chinchilla@IFASadmins-MacBook-Pro.local>
Date: Tue, 28 Apr 2026 11:15:05 -0400
Subject: [PATCH 09/12] modified default params

---
 R/check_ped.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/check_ped.R b/R/check_ped.R
index 26e0d8a..6fb77ef 100644
--- a/R/check_ped.R
+++ b/R/check_ped.R
@@ -41,8 +41,8 @@ check_ped <- function(ped.file,
                       seed               = NULL,
                       verbose            = TRUE,
                       correct            = TRUE,
-                      save_zip           = FALSE,
-                      save_corrected_zip = FALSE) {
+                      save_zip           = TRUE,
+                      save_corrected_zip = TRUE) {
   #### setup ####
   if (!is.null(seed)) set.seed(seed)
   data <- utils::read.table(ped.file, header = TRUE)

From 5db5fa29477c69d830873ca6434671cf00b60d55 Mon Sep 17 00:00:00 2001
From: "josue.chinchilla" <josue.chinchilla@IFASadmins-MacBook-Pro.local>
Date: Tue, 28 Apr 2026 13:13:50 -0400
Subject: [PATCH 10/12] Added authors fror functions and made headers concise

---
 NAMESPACE                     |   8 +-
 R/breedtools_functions.R      | 282 +++++++++++++++-------------------
 R/check_ped.R                 | 225 ++++++++++++++-------------
 R/find_parentage.R            |   2 +
 R/imputation_concordance.R    |  95 +++++-------
 R/validate_pedigree.R         |   3 +
 man/allele_freq_poly.Rd       |  49 +++---
 man/check_ped.Rd              |  78 +++++++---
 man/find_parentage.Rd         |   3 +
 man/imputation_concordance.Rd |  82 +++++-----
 man/solve_composition_poly.Rd |  61 ++++----
 man/validate_pedigree.Rd      |   3 +
 12 files changed, 433 insertions(+), 458 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 4e518d1..c460be1 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -62,6 +62,13 @@ importFrom(dplyr,summarise)
 importFrom(dplyr,summarize)
 importFrom(dplyr,ungroup)
 importFrom(dplyr,where)
+importFrom(ggplot2,aes)
+importFrom(ggplot2,element_text)
+importFrom(ggplot2,geom_bar)
+importFrom(ggplot2,ggplot)
+importFrom(ggplot2,labs)
+importFrom(ggplot2,theme)
+importFrom(ggplot2,theme_minimal)
 importFrom(pwalign,nucleotideSubstitutionMatrix)
 importFrom(pwalign,pairwiseAlignment)
 importFrom(readr,read_csv)
@@ -73,7 +80,6 @@ importFrom(stats,reorder)
 importFrom(stats,setNames)
 importFrom(tibble,as_tibble)
 importFrom(tools,file_path_sans_ext)
-importFrom(utils,capture.output)
 importFrom(utils,packageVersion)
 importFrom(utils,read.csv)
 importFrom(utils,read.table)
diff --git a/R/breedtools_functions.R b/R/breedtools_functions.R
index 5f21836..52a2d85 100644
--- a/R/breedtools_functions.R
+++ b/R/breedtools_functions.R
@@ -1,149 +1,129 @@
-#' Compute Allele Frequencies for Populations
+#' Compute allele frequencies for populations
 #'
-#' Computes allele frequencies for specified populations given SNP array data
+#' Computes allele frequencies for specified populations from SNP array data
+#' coded as dosage of allele B.
 #'
-#' @param geno matrix of genotypes coded as the dosage of allele B \code{{0, 1, 2, ..., ploidy}}
-#'  with individuals in rows (named) and SNPs in columns (named)
-#' @param populations list of named populations. Each population has a vector of IDs
-#'  that belong to the population. Allele frequencies will be derived from all animals
-#' @param ploidy integer indicating the ploidy level (default is 2 for diploid)
-#' @return data.frame consisting of allele_frequencies for populations (columns) for
-#'  each SNP (rows)
-#' @references Funkhouser SA, Bates RO, Ernst CW, Newcom D, Steibel JP. Estimation of genome-wide and locus-specific
-#' breed composition in pigs. Transl Anim Sci. 2017 Feb 1;1(1):36-44.
+#' @param geno Numeric matrix of genotypes coded as dosage of allele B
+#'   \code{{0, 1, 2, ..., ploidy}}, with individuals in rows (named) and
+#'   SNPs in columns (named).
+#' @param populations Named list of populations, each containing a character
+#'   vector of individual IDs belonging to that population.
+#' @param ploidy Integer. Ploidy level of the species. Default is `2`.
+#'
+#' @return A data frame of allele frequencies with SNPs as rows and populations
+#'   as columns.
+#'
+#' @author Josué Chinchilla-Vargas
+#'
+#' @references Funkhouser SA, Bates RO, Ernst CW, Newcom D, Steibel JP.
+#'   Estimation of genome-wide and locus-specific breed composition in pigs.
+#'   _Transl Anim Sci._ 2017;1(1):36–44.
 #'
 #' @examples
-#' # Example inputs
 #' geno_matrix <- matrix(
-#' c(4, 1, 4, 0, # S1
-#'   2, 2, 1, 3, # S2
-#'   0, 4, 0, 4, # S3
-#'   3, 3, 2, 2, # S4
-#'   1, 4, 2, 3),# S5
-#' nrow = 4, ncol = 5, byrow = FALSE, # individuals=rows, SNPs=cols
-#' dimnames = list(paste0("Ind", 1:4), paste0("S", 1:5))
+#'   c(4, 1, 4, 0,
+#'     2, 2, 1, 3,
+#'     0, 4, 0, 4,
+#'     3, 3, 2, 2,
+#'     1, 4, 2, 3),
+#'   nrow = 4, ncol = 5, byrow = FALSE,
+#'   dimnames = list(paste0("Ind", 1:4), paste0("S", 1:5))
 #' )
-#'
-#'pop_list <- list(
-#' PopA = c("Ind1", "Ind2"),
-#' PopB = c("Ind3", "Ind4")
+#' pop_list <- list(
+#'   PopA = c("Ind1", "Ind2"),
+#'   PopB = c("Ind3", "Ind4")
 #' )
-#'
 #' allele_freqs <- allele_freq_poly(geno = geno_matrix, populations = pop_list, ploidy = 4)
 #' print(allele_freqs)
 #'
 #' @export
 allele_freq_poly <- function(geno, populations, ploidy = 2) {
-
-  # Initialize returned df
+  # Initialize result matrix
   X <- matrix(NA, nrow = ncol(geno), ncol = length(populations))
-
-  # Subset geno into different populations
   for (i in 1:length(populations)) {
-
-    # Get name of ith item in the list (population name)
     pop_name <- names(populations[i])
-
-    # Subset geno to only include genotypes of IDs in pop
-    pop_geno <- geno[rownames(geno) %in% populations[[i]], ]
-
-    # Calculate allele frequencies
-    al_freq <- colMeans(pop_geno, na.rm = TRUE) / ploidy
-
-    # Add to X
-    X[, i] <- al_freq
+    pop_geno <- geno[base::rownames(geno) %in% populations[[i]], ]
+    al_freq  <- base::colMeans(pop_geno, na.rm = TRUE) / ploidy
+    X[, i]   <- al_freq
   }
-
-  # Label X with populations and SNPs
-  colnames(X) <- names(populations)
-  rownames(X) <- colnames(geno)
-
+  base::colnames(X) <- base::names(populations)
+  base::rownames(X) <- base::colnames(geno)
   return(X)
 }
 
 
+#' Solve breed composition for a single animal via quadratic programming
+#'
+#' Internal helper that solves the constrained OLS problem for one animal,
+#' returning breed proportion estimates and the R² of the fit.
 #'
-#' Performs whole genome breed composition prediction.
+#' @param Y Numeric vector of genotypes (named by SNP) for a single animal,
+#'   coded as dosage of allele B \code{{0, 1, 2, ..., ploidy}}.
+#' @param X Numeric matrix of allele frequencies with SNPs as rows and
+#'   breeds/populations as columns.
+#'
+#' @return A named numeric vector of breed proportions plus `R2`.
+#'
+#' @references Funkhouser SA, Bates RO, Ernst CW, Newcom D, Steibel JP.
+#'   Estimation of genome-wide and locus-specific breed composition in pigs.
+#'   _Transl Anim Sci._ 2017;1(1):36–44.
 #'
-#' @param Y numeric vector of genotypes (with names as SNPs) from a single animal.
-#'   coded as dosage of allele B \code{{0, 1, 2, ..., ploidy}}
-#' @param X numeric matrix of allele frequencies from reference animals
-#' @param p numeric indicating number of breeds represented in X
-#' @param names character names of breeds
-#' @return data.frame of breed composition estimates
 #' @import quadprog
 #' @importFrom stats cor
-#' @references Funkhouser SA, Bates RO, Ernst CW, Newcom D, Steibel JP. Estimation of genome-wide and locus-specific
-#' breed composition in pigs. Transl Anim Sci. 2017 Feb 1;1(1):36-44.
 #'
 #' @noRd
 QPsolve <- function(Y, X) {
-
-  # Remove NAs from Y and remove corresponding
-  #   SNPs from X. Ensure Y is numeric
-  Ymod <- Y[!is.na(Y)]
-  Xmod <- X[names(Ymod), ]
-
-  # Determine properties from X matrix - the number of parameters (breeds) p
-  #   and the names of those parameters.
-  p <- ncol(X)
-  names <- colnames(X)
-
-  # perfom steps needed to solve OLS by framing
-  # as a QP problem
-  # Rinv - matrix should be of dimensions px(p+1) where p is the number of variables in X
-  Rinv <- solve(chol(t(Xmod) %*% Xmod))
-
-  # C - the first column is a sum restriction (all equal to 1) and the rest of the columns an identity matrix
-  C <- cbind(rep(1, p), diag(p))
-
-  # b2 - This should be a vector of length p+1 the first element is the value of the sum (1)
-  #   the other elements are the restriction of individual coefficients (>)
-  #   so a value 0 produces positive coefficients
-  b2 <- c(1, rep(0, p))
-
-  # dd - this should be a matrix NOT a vector
-  dd <- (t(Ymod) %*% Xmod)
-
-  qp <- solve.QP(Dmat = Rinv, factorized = TRUE, dvec = dd, Amat = C, bvec = b2, meq = 1)
-  beta <- qp$solution
-  rr <- cor(Ymod, Xmod %*% beta)^2
+  Ymod  <- Y[!base::is.na(Y)]
+  Xmod  <- X[base::names(Ymod), ]
+  p     <- base::ncol(X)
+  nms   <- base::colnames(X)
+  Rinv  <- base::solve(base::chol(base::t(Xmod) %*% Xmod))
+  C     <- base::cbind(base::rep(1, p), base::diag(p))
+  b2    <- c(1, base::rep(0, p))
+  dd    <- base::t(Ymod) %*% Xmod
+  qp    <- quadprog::solve.QP(Dmat = Rinv, factorized = TRUE, dvec = dd,
+                              Amat = C, bvec = b2, meq = 1)
+  beta  <- qp$solution
+  rr    <- stats::cor(Ymod, Xmod %*% beta)^2
   result <- c(beta, rr)
-  names(result) <- c(names, "R2")
+  base::names(result) <- c(nms, "R2")
   return(result)
 }
 
 
-#' Compute Genome-Wide Breed Composition
-#'
-#' Computes genome-wide breed/ancestry composition using quadratic programming on a
-#' batch of animals.
-#'
-#' @param Y numeric matrix of genotypes (columns) from all animals (rows) in population
-#'  coded as dosage of allele B \code{{0, 1, 2, ..., ploidy}}
-#' @param X numeric matrix of allele frequencies (rows) from each reference panel (columns). Frequencies are
-#'  relative to allele B.
-#' @param ped data.frame giving pedigree information. Must be formatted "ID", "Sire", "Dam"
-#' @param groups list of IDs categorized by breed/population. If specified, output will be a list
-#'  of results categorized by breed/population.
-#' @param mia logical. Only applies if ped argument is supplied. If true, returns a data.frame
-#'  containing the inferred maternally inherited allele for each locus for each animal instead
-#'  of breed composition results.
-#' @param sire logical. Only applies if ped argument is supplied. If true, returns a data.frame
-#'  containing sire genotypes for each locus for each animal instead of breed composition results.
-#' @param dam logical. Only applies if ped argument is supplied. If true, returns a data.frame
-#'  containing dam genotypes for each locus for each animal instead of breed composition results.
-#' @param ploidy integer. The ploidy level of the species (e.g., 2 for diploid, 3 for triploid, etc.).
-#' @return A data.frame or list of data.frames (if groups is !NULL) with breed/ancestry composition
-#'  results
-#' @import quadprog
-#' @references Funkhouser SA, Bates RO, Ernst CW, Newcom D, Steibel JP. Estimation of genome-wide and locus-specific
-#' breed composition in pigs. Transl Anim Sci. 2017 Feb 1;1(1):36-44.
+#' Compute genome-wide breed composition
 #'
-#' @examples
-#' # Example inputs for solve_composition_poly (ploidy = 4)
+#' Estimates genome-wide breed/ancestry composition for a batch of animals
+#' using quadratic programming, with optional pedigree-assisted and
+#' grouped-output modes.
+#'
+#' @param Y Numeric matrix of genotypes with individuals as rows and SNPs as
+#'   columns, coded as dosage of allele B \code{{0, 1, 2, ..., ploidy}}.
+#' @param X Numeric matrix of allele frequencies with SNPs as rows and
+#'   breeds/populations as columns.
+#' @param ped Optional data frame with pedigree information formatted with
+#'   columns `ID`, `Sire`, and `Dam`. When supplied, `QPsolve_par` is used
+#'   and only animals with genotyped parents are included.
+#' @param groups Optional named list of IDs grouped by breed/population.
+#'   When supplied, results are returned as a named list partitioned by group.
+#' @param mia Logical. Only applies when `ped` is supplied. If `TRUE`, returns
+#'   the inferred maternally inherited allele per locus per animal. Default `FALSE`.
+#' @param sire Logical. Only applies when `ped` is supplied. If `TRUE`, returns
+#'   sire genotypes per locus per animal. Default `FALSE`.
+#' @param dam Logical. Only applies when `ped` is supplied. If `TRUE`, returns
+#'   dam genotypes per locus per animal. Default `FALSE`.
+#' @param ploidy Integer. Ploidy level of the species. Default is `2`.
+#'
+#' @return A data frame, or a named list of data frames when `groups` is
+#'   supplied, containing breed/ancestry composition estimates.
+#'
+#' @author Josué Chinchilla-Vargas
 #'
-#' # (This would typically be the output from allele_freq_poly)
+#' @references Funkhouser SA, Bates RO, Ernst CW, Newcom D, Steibel JP.
+#'   Estimation of genome-wide and locus-specific breed composition in pigs.
+#'   _Transl Anim Sci._ 2017;1(1):36–44.
+#'
+#' @examples
 #' allele_freqs_matrix <- matrix(
 #'   c(0.625, 0.500,
 #'     0.500, 0.500,
@@ -153,70 +133,50 @@ QPsolve <- function(Y, X) {
 #'   nrow = 5, ncol = 2, byrow = TRUE,
 #'   dimnames = list(paste0("SNP", 1:5), c("VarA", "VarB"))
 #' )
-#'
-#' # Validation Genotypes (individuals x SNPs)
 #' val_geno_matrix <- matrix(
-#'   c(2, 1, 2, 3, 4,  # Test1 dosages for SNP1-5
-#'     3, 4, 2, 3, 0), # Test2 dosages for SNP1-5
+#'   c(2, 1, 2, 3, 4,
+#'     3, 4, 2, 3, 0),
 #'   nrow = 2, ncol = 5, byrow = TRUE,
 #'   dimnames = list(paste0("Test", 1:2), paste0("SNP", 1:5))
 #' )
-#'
-#' # Calculate Breed Composition
 #' composition <- solve_composition_poly(Y = val_geno_matrix,
 #'                                       X = allele_freqs_matrix,
 #'                                       ploidy = 4)
 #' print(composition)
 #'
+#' @import quadprog
+#'
 #' @export
 solve_composition_poly <- function(Y,
                                    X,
-                                   ped = NULL,
+                                   ped    = NULL,
                                    groups = NULL,
-                                   mia = FALSE,
-                                   sire = FALSE,
-                                   dam = FALSE,
+                                   mia    = FALSE,
+                                   sire   = FALSE,
+                                   dam    = FALSE,
                                    ploidy = 2) {
+  # Transpose so Y is SNPs x animals, as required internally
+  Y <- base::t(Y)
+  # Retain only SNPs present in X
+  Y <- Y[base::rownames(Y) %in% base::rownames(X), ]
+
+  if (!base::is.null(ped)) {
+    # Pedigree-assisted mode: use QPsolve_par for animals with genotyped parents
+    mat_results     <- base::lapply(base::colnames(Y), QPsolve_par, Y, X, ped,
+                                    mia = mia, sire = sire, dam = dam)
+    mat_results_tab <- base::do.call(rbind, mat_results)
+    return(mat_results_tab)
+
+  } else if (!base::is.null(groups)) {
+    # Grouped mode: standard computation, results partitioned by group
+    Y               <- Y / ploidy
+    grouped_results <- base::lapply(groups, QPseparate, Y, X)
+    return(grouped_results)
 
-  # Functions require Y to be animals x SNPs. Transpose
-  Y <- t(Y)
-
-  # SNPs in Y should only be the ones present in X
-  Y <- Y[rownames(Y) %in% rownames(X), ]
-
-  # If ped is supplied, use QPsolve_par to compute genomic composition using
-  #   only animals who have genotyped parents (by incorporating Sire genotype).
-  if (!is.null(ped)) {
-    mat_results <- lapply(colnames(Y),
-                          QPsolve_par,
-                          Y,
-                          X,
-                          ped,
-                          mia = mia,
-                          sire = sire,
-                          dam = dam)
-
-    mat_results_tab <- do.call(rbind, mat_results)
-    return (mat_results_tab)
-
-    # Else if groups supplied - perform regular genomic computation
-    #   and list results by groups
-  } else if (!is.null(groups)) {
-
-    # When using regular genomic computation - adjust dosage based on ploidy
-    Y <- Y / ploidy #(default is 2)
-
-    grouped_results <- lapply(groups, QPseparate, Y, X)
-    return (grouped_results)
-
-    # If neither using the ped or grouping option - just perform normal, unsegregated
-    #   calculation
   } else {
-
-    # Adjust dosage based on ploidy (default is 2)
-    Y <- Y / ploidy
-
-    results <- t(apply(Y, 2, QPsolve, X))
-    return (results)
+    # Standard mode: unsegregated computation across all animals
+    Y       <- Y / ploidy
+    results <- base::t(base::apply(Y, 2, QPsolve, X))
+    return(results)
   }
 }
diff --git a/R/check_ped.R b/R/check_ped.R
index 6fb77ef..847ebe1 100644
--- a/R/check_ped.R
+++ b/R/check_ped.R
@@ -1,67 +1,74 @@
-#' @title Check a pedigree file for structural issues
+#' Check and optionally correct a pedigree file
 #'
-#' @description
-#' Reads a tab‑separated pedigree file with columns `id`,
-#' `male_parent`, and `female_parent` and checks for five classes of
-#' errors: missing required columns, exact duplicate rows,
-#' repeated IDs with conflicting parents, inconsistent parent
-#' sex roles, and pedigree cycles.  The input file is never
-#' modified.  A report list is always assigned to the global
-#' environment; when `correct = TRUE` a corrected pedigree is
-#' assigned as well.
+#' Reads a tab-separated pedigree file with columns `id`, `male_parent`, and
+#' `female_parent` (any column order), performs structural and consistency checks,
+#' reports findings, and optionally builds a partially corrected pedigree object.
+#'
+#' The input file is never modified. Findings are printed to the console when
+#' `verbose = TRUE`, a report list is assigned to the global environment, and when
+#' `correct = TRUE`, a corrected pedigree data frame is also assigned there.
+#'
+#' @section Checks performed:
+#' \describe{
+#'   \item{Missing required columns}{Stops immediately if `id`, `male_parent`, or
+#'     `female_parent` are absent.}
+#'   \item{Exact duplicate rows}{Rows identical across all three fields. Removed
+#'     when `correct = TRUE`.}
+#'   \item{Repeated IDs with conflicting parents}{Same `id` with differing parent
+#'     assignments. Collapsed to one row when `correct = TRUE`; conflicting fields
+#'     are set to `"0"`.}
+#'   \item{Inconsistent sex roles}{IDs appearing as both `male_parent` and
+#'     `female_parent`. Reported only; no automatic correction.}
+#'   \item{Missing parents}{Parent IDs not listed as individuals. Reported, and
+#'     added as founder rows (`"0"/"0"`) when `correct = TRUE`.}
+#'   \item{Cycles}{Direct or indirect ancestry loops. Reported only; must be
+#'     resolved manually.}
+#' }
+#'
+#' @section Correction behavior:
+#' When `correct = TRUE`, the function removes exact duplicates, collapses
+#' conflicting repeated IDs (replacing inconsistent parent fields with `"0"`),
+#' and appends missing parents as founders. Inconsistent sex roles and cycles are
+#' not automatically corrected. Running the function again on the corrected
+#' pedigree is recommended to reassess downstream issues.
 #'
 #' @param ped.file Path to the pedigree text file.
 #' @param seed Optional integer seed for reproducibility.
-#' @param verbose Logical. If `TRUE` (default), prints findings to the
-#'   console.
-#' @param correct Logical. If `TRUE` (default), builds and assigns a
-#'   corrected pedigree to the global environment. If `FALSE`, issues
-#'   are only reported.
-#' @param save_zip Logical. If `TRUE`, bundles report components into a
-#'   `.zip` archive in the working directory.
-#' @param save_corrected_zip Logical. If `TRUE` (requires `save_zip =
-#'   TRUE` and `correct = TRUE`), includes the corrected pedigree in
-#'   the `.zip` archive.
+#' @param verbose Logical. Print findings to the console? Default `TRUE`.
+#' @param correct Logical. Build and save a corrected pedigree? Default `TRUE`.
+#' @param save_zip Logical. Export report components as a `.zip` archive? Default `FALSE`.
+#' @param save_corrected_zip Logical. Include corrected pedigree in the `.zip`?
+#'   Only applies when `save_zip = TRUE` and `correct = TRUE`. Default `FALSE`.
 #'
-#' @return An invisible named list of data frames describing detected
-#'   issues:
-#'   \item{exact_duplicates}{exact duplicate rows}
-#'   \item{repeated_ids_diff}{rows where the same `id` has conflicting parents}
-#'   \item{inconsistent_sex_roles}{rows involving IDs used as both parents}
-#'   \item{missing_parents}{rows referencing parent IDs absent from `id`}
-#'   \item{dependencies}{rows whose `id` is involved in a pedigree cycle}
+#' @return An invisible named list of data frames:
+#' \describe{
+#'   \item{exact_duplicates}{Exact duplicate rows.}
+#'   \item{repeated_ids_diff}{Rows with the same `id` but conflicting parent assignments.}
+#'   \item{inconsistent_sex_roles}{Rows involving IDs used as both male and female parent.}
+#'   \item{missing_parents}{Rows referencing parent IDs absent from `id`.}
+#'   \item{dependencies}{Rows whose `id` is involved in a detected cycle.}
+#' }
 #'
-#' @importFrom dplyr %>% group_by filter ungroup distinct mutate
-#'   summarize first bind_rows n_distinct n select row_number
+#' @author Josué Chinchilla-Vargas
+#'
+#' @importFrom dplyr %>% group_by filter ungroup distinct mutate summarize first
+#'   bind_rows n_distinct n select row_number
 #' @importFrom stats setNames
-#' @importFrom utils read.table write.table zip capture.output
+#' @importFrom utils read.table write.table zip
 #' @importFrom tools file_path_sans_ext
 #' @export
 check_ped <- function(ped.file,
                       seed               = NULL,
                       verbose            = TRUE,
                       correct            = TRUE,
-                      save_zip           = TRUE,
-                      save_corrected_zip = TRUE) {
+                      save_zip           = FALSE,
+                      save_corrected_zip = FALSE) {
+
+  #### setup ####
   #### setup ####
   if (!is.null(seed)) set.seed(seed)
   data <- utils::read.table(ped.file, header = TRUE)
 
-  # ── Column alias remapping ──────────────────────────────────────────────────
-  col_aliases <- list(
-    id            = c("id", "ID", "animal", "Animal", "ind", "Ind"),
-    male_parent   = c("male_parent", "sire",   "Sire",   "father", "Father", "pat", "Pat"),
-    female_parent = c("female_parent", "dam",  "Dam",    "mother", "Mother", "mat", "Mat")
-  )
-  for (canonical in names(col_aliases)) {
-    if (!canonical %in% colnames(data)) {
-      match <- intersect(col_aliases[[canonical]], colnames(data))
-      if (length(match) > 0) {
-        colnames(data)[colnames(data) == match[1]] <- canonical
-      }
-    }
-  }
-
   # Validate required column names before any processing
   required_cols <- c("id", "male_parent", "female_parent")
   missing_cols  <- setdiff(required_cols, colnames(data))
@@ -72,48 +79,47 @@ check_ped <- function(ped.file,
       ".\nExpected columns: id, male_parent, female_parent."
     )
   }
+
   data <- data %>%
-    dplyr::mutate(
+    mutate(
       id            = as.character(id),
       male_parent   = as.character(male_parent),
       female_parent = as.character(female_parent)
     )
-
   # Add row numbers before any processing
-  data   <- data %>% dplyr::mutate(row_number = dplyr::row_number(), .before = id)
+  data <- data %>% mutate(row_number = row_number(), .before = id)
+  errors <- list()
+  # Add row numbers before any processing
+  data <- data %>% mutate(row_number = row_number(), .before = id)
   errors <- list()
 
   #### check 1: exact duplicates ####
   exact_duplicates <- data[
-    duplicated(data %>% dplyr::select(-row_number)) |
-      duplicated(data %>% dplyr::select(-row_number), fromLast = TRUE),
+    duplicated(data %>% select(-row_number)) |
+      duplicated(data %>% select(-row_number), fromLast = TRUE),
   ]
 
   #### check 2: repeated IDs with conflicting male_parent/female_parent ####
   repeated_ids_diff <- data %>%
-    dplyr::group_by(id) %>%
-    dplyr::filter(dplyr::n() > 1) %>%
-    dplyr::filter(dplyr::n_distinct(male_parent) > 1 |
-                    dplyr::n_distinct(female_parent) > 1) %>%
-    dplyr::ungroup()
+    group_by(id) %>%
+    filter(n() > 1) %>%
+    filter(n_distinct(male_parent) > 1 | n_distinct(female_parent) > 1) %>%
+    ungroup()
 
   #### check 3: inconsistent parent sex roles ####
   male_ids   <- unique(data$male_parent[data$male_parent   != "0"])
   female_ids <- unique(data$female_parent[data$female_parent != "0"])
   messy_ids  <- intersect(male_ids, female_ids)
   inconsistent_sex_roles <- data %>%
-    dplyr::filter(male_parent %in% messy_ids | female_parent %in% messy_ids)
+    filter(male_parent %in% messy_ids | female_parent %in% messy_ids)
 
   #### check 4: missing parents ####
-  all_ids <- unique(data$id)
-  ref_ids <- unique(c(data$male_parent, data$female_parent))
-
-  # Trim, remove NAs and zeros
-  ref_ids <- trimws(ref_ids)
-  ref_ids <- ref_ids[!is.na(ref_ids) & ref_ids != "" & ref_ids != "0"]
-  missing_ids     <- setdiff(ref_ids, all_ids)
+  all_ids     <- unique(data$id)
+  ref_ids     <- unique(c(data$male_parent, data$female_parent))
+  ref_ids     <- ref_ids[ref_ids != "0"]
+  missing_ids <- setdiff(ref_ids, all_ids)
   missing_parents <- data %>%
-    dplyr::filter(male_parent %in% missing_ids | female_parent %in% missing_ids)
+    filter(male_parent %in% missing_ids | female_parent %in% missing_ids)
 
   # Only build founder rows if there are actually missing parents to add
   if (length(missing_ids) > 0) {
@@ -141,7 +147,6 @@ check_ped <- function(ped.file,
       c(row$male_parent, row$female_parent)
     })
     names(adj_list) <- data$id
-
     dfs <- function(node, visited, rec_stack, path) {
       visited[node]   <- TRUE
       rec_stack[node] <- TRUE
@@ -160,7 +165,6 @@ check_ped <- function(ped.file,
       rec_stack[node] <- FALSE
       return(cycles)
     }
-
     visited   <- stats::setNames(rep(FALSE, length(adj_list)), names(adj_list))
     rec_stack <- stats::setNames(rep(FALSE, length(adj_list)), names(adj_list))
     all_cycles <- list()
@@ -172,6 +176,7 @@ check_ped <- function(ped.file,
     }
     return(all_cycles)
   }
+
   cycles    <- detect_all_cycles(data)
   cycle_ids <- character(0)
   if (length(cycles) > 0) {
@@ -183,7 +188,7 @@ check_ped <- function(ped.file,
                                 paste(ids, collapse = " -> ")))
     }
   }
-  dependencies <- data %>% dplyr::filter(id %in% cycle_ids)
+  dependencies <- data %>% filter(id %in% cycle_ids)
   if (nrow(dependencies) == 0 && length(errors) > 0) {
     dependencies <- data.frame(Dependency = unique(unlist(errors)),
                                stringsAsFactors = FALSE)
@@ -200,27 +205,24 @@ check_ped <- function(ped.file,
 
   #### build corrected pedigree (no row_number column) ####
   if (correct) {
-    corrected <- data %>% dplyr::select(-row_number)
-
+    corrected <- data %>% select(-row_number)
     # Remove exact duplicates
-    corrected <- dplyr::distinct(corrected)
-
+    corrected <- distinct(corrected)
     # Resolve conflicting IDs
     if (nrow(repeated_ids_diff) > 0) {
       corrected <- corrected %>%
-        dplyr::group_by(id) %>%
-        dplyr::summarize(
-          male_parent   = if (dplyr::n_distinct(male_parent)   > 1) "0" else dplyr::first(male_parent),
-          female_parent = if (dplyr::n_distinct(female_parent) > 1) "0" else dplyr::first(female_parent),
+        group_by(id) %>%
+        summarize(
+          male_parent   = if (n_distinct(male_parent)   > 1) "0" else first(male_parent),
+          female_parent = if (n_distinct(female_parent) > 1) "0" else first(female_parent),
           .groups = "drop"
         )
     }
-
     # Add missing founders
     if (length(missing_ids) > 0) {
-      corrected <- dplyr::bind_rows(
+      corrected <- bind_rows(
         corrected,
-        missing_founders %>% dplyr::select(-row_number)
+        missing_founders %>% select(-row_number)
       )
     }
   }
@@ -230,7 +232,6 @@ check_ped <- function(ped.file,
   corrected_name <- paste0(file_base, "_corrected")
   report_name    <- paste0(file_base, "_report")
   zip_name       <- paste0(file_base, "_report.zip")
-
   #### output ####
   if (verbose) {
     cat("\n=== Pedigree Quality Check Report ===\n")
@@ -245,70 +246,68 @@ check_ped <- function(ped.file,
     cat("\n--- Cycles / Dependencies ---\n")
     if (nrow(dependencies) > 0) print(dependencies) else cat("None found.\n")
     if (correct) {
-      cat(paste0("`correct = TRUE`: saving corrected pedigree as `", corrected_name, "`.\n"))
+      cat(paste0("\n`correct = TRUE`: saving corrected pedigree as `", corrected_name, "`.\n"))
     } else {
-      cat("`correct = FALSE`: no changes made to the pedigree.\n")
+      cat("\n`correct = FALSE`: no changes made to the pedigree.\n")
     }
   }
 
-  # Section headers matching the console output labels
-  section_labels <- c(
-    exact_duplicates       = "Exact Duplicates",
-    repeated_ids_diff      = "Conflicting IDs",
-    inconsistent_sex_roles = "Inconsistent Parent Sex Roles",
-    missing_parents        = "Missing Parents (rows that reference them)",
-    dependencies           = "Cycles / Dependencies"
-  )
+  # Always save report to global environment
+  assign(report_name, input_ped_report, envir = .GlobalEnv)
+  # Save corrected pedigree if correct = TRUE
+  if (correct) assign(corrected_name, corrected, envir = .GlobalEnv)
 
+  #### zip export ####
   if (save_zip) {
-    tmp_dir <- tempfile()
+    tmp_dir   <- tempfile()
     dir.create(tmp_dir)
-    on.exit(unlink(tmp_dir, recursive = TRUE), add = TRUE)
     zip_files <- character(0)
 
+    # Section headers matching the console output labels
+    section_labels <- c(
+      exact_duplicates       = "Exact Duplicates",
+      repeated_ids_diff      = "Conflicting IDs",
+      inconsistent_sex_roles = "Inconsistent Parent Sex Roles",
+      missing_parents        = "Missing Parents (rows that reference them)",
+      dependencies           = "Cycles / Dependencies"
+    )
+
     for (component in names(section_labels)) {
       df        <- input_ped_report[[component]]
       file_path <- file.path(tmp_dir, paste0(file_base, "_", component, ".txt"))
+
       if (nrow(df) == 0) {
+        # Write header + "None found." for clean empty reports
         writeLines(
-          c(paste0("--- ", section_labels[[component]], " ---"),
-            "None found."),
+          c(paste0("--- ", section_labels[[component]], " ---"), "None found."),
           con = file_path
         )
       } else {
+        # Write header, then column names, then data — avoids col.names warning
         file_con <- file(file_path, open = "wt")
-        writeLines(paste0("--- ", section_labels[[component]], " ---"),
-                   con = file_con)
-        writeLines(paste(colnames(df), collapse = "\t"),
-                   con = file_con)
+        writeLines(paste0("--- ", section_labels[[component]], " ---"), con = file_con)
+        writeLines(paste(colnames(df), collapse = "\t"), con = file_con)
         close(file_con)
-        utils::write.table(df, file = file_path, sep = "\t",
-                           row.names = FALSE,
-                           quote = FALSE,
-                           append = TRUE,
-                           col.names = FALSE)
+        utils::write.table(df, file = file_path, sep = "\t", row.names = FALSE,
+                           quote = FALSE, append = TRUE, col.names = FALSE)
       }
+
       zip_files <- c(zip_files, file_path)
     }
 
+    # Optionally include corrected pedigree in zip
     if (save_corrected_zip && correct) {
       corrected_path <- file.path(tmp_dir, paste0(corrected_name, ".txt"))
       utils::write.table(corrected, file = corrected_path, sep = "\t",
-                         row.names = FALSE,
-                         quote = FALSE)
+                         row.names = FALSE, quote = FALSE)
       zip_files <- c(zip_files, corrected_path)
     }
 
+    # Bundle all files into zip in the working directory
     zip_path <- file.path(getwd(), zip_name)
-    invisible(capture.output(
-      utils::zip(zipfile = zip_path, files = zip_files, flags = "-j")
-    ))
+    utils::zip(zipfile = zip_path, files = zip_files, flags = "-j")
     if (verbose) cat(paste0("\nZip archive saved to: ", zip_path, "\n"))
   }
 
-  # Assign objects to the global environment
-  assign(report_name, input_ped_report, envir = .GlobalEnv)
-  if (correct) assign(corrected_name, corrected, envir = .GlobalEnv)
-
   invisible(input_ped_report)
 }
diff --git a/R/find_parentage.R b/R/find_parentage.R
index 410981b..b02477c 100644
--- a/R/find_parentage.R
+++ b/R/find_parentage.R
@@ -90,6 +90,8 @@
 #' )
 #' }
 #'
+#' @author Josué Chinchilla-Vargas
+#'
 #' @importFrom data.table fread fwrite copy CJ rbindlist set data.table as.data.table
 #' @export
 find_parentage <- function(genotypes_file, parents_file, progeny_file,
diff --git a/R/imputation_concordance.R b/R/imputation_concordance.R
index 918071a..7ebbdcc 100644
--- a/R/imputation_concordance.R
+++ b/R/imputation_concordance.R
@@ -1,79 +1,60 @@
-#' Calculate Concordance between Imputed and Reference Genotypes
-#'
-#' This function calculates the concordance between imputed and reference
-#' genotypes. It assumes that samples are rows and markers are columns.
-#' Allele dosages (0, 1, 2) are recommended but other numeric formats are supported.
-#' Missing data in either dataset can be excluded from the concordance calculation
-#' using the `missing_code` argument. Specific markers can be excluded using
-#' the `snps_2_exclude` argument.
-#'
-#' @param reference_genos A data frame containing reference genotype data,
-#' with rows as samples and columns as markers. Must include a column named `ID`.
-#'
-#' @param imputed_genos A data frame containing imputed genotype data,
-#' with rows as samples and columns as markers. Must include a column named `ID`.
-#'
-#' @param missing_code Optional value specifying missing data. If provided,
-#' loci with this value in either dataset will be excluded from the concordance calculation.
-#'
-#' @param snps_2_exclude Optional vector of marker IDs to exclude from the concordance calculation.
-#'
-#' @param verbose Logical. If `TRUE`, prints summary statistics (minimum, quartiles,
-#' median, mean, maximum) of concordance percentages.
-#'
-#' @param plot Logical. If `TRUE`, produces a bar plot of concordance percentage
-#' by sample.
-#'
-#' @param print_result Logical. If `TRUE` (default), prints the concordance
-#' results data frame to the console. If `FALSE`, results are returned invisibly.
-#'
-#' @return A data frame with:
-#' \itemize{
-#'   \item \code{ID}: Sample identifiers shared between the datasets.
-#'   \item \code{Concordance}: Percentage of matching genotypes per sample.
+#' Calculate concordance between imputed and reference genotypes
+#'
+#' Compares imputed and reference genotype datasets sample by sample, returning
+#' the percentage of matching genotypes per sample. Expects samples as rows and
+#' markers as columns, with allele dosages (0, 1, 2) recommended. Other numeric
+#' formats are supported.
+#'
+#' @param reference_genos A data frame of reference genotypes (samples × markers)
+#'   with a column named `ID`.
+#' @param imputed_genos A data frame of imputed genotypes (samples × markers)
+#'   with a column named `ID`.
+#' @param missing_code Optional value indicating missing data. Loci carrying this
+#'   value in either dataset are excluded from the concordance calculation.
+#' @param snps_2_exclude Optional character vector of marker names to exclude.
+#' @param verbose Logical. Print a five-number summary of concordance? Default `FALSE`.
+#' @param plot Logical. Produce a bar plot of concordance by sample? Default `FALSE`.
+#' @param print_result Logical. Print the results data frame to the console?
+#'   Default `TRUE`. If `FALSE`, results are returned invisibly.
+#'
+#' @return A data frame with columns:
+#' \describe{
+#'   \item{ID}{Sample identifiers shared between the two datasets.}
+#'   \item{Concordance}{Percentage of matching genotypes per sample.}
 #' }
-#' If \code{print_result = FALSE}, the data frame is returned invisibly.
 #'
 #' @details
-#' The function:
-#' \enumerate{
-#'   \item Identifies common samples and markers between the datasets.
-#'   \item Optionally excludes specified SNPs.
-#'   \item Removes loci with missing data (if \code{missing_code} is provided).
-#'   \item Computes per-sample concordance as the percentage of matching genotypes.
-#' }
-#'
-#' When \code{plot = TRUE}, a bar plot showing concordance percentage per sample
-#' is generated using \pkg{ggplot2}.
-#'
-#' @import dplyr
+#' The function identifies common samples and markers between the two datasets,
+#' optionally removes specified SNPs and loci with missing data, then computes
+#' per-sample concordance as the percentage of matching genotypes over valid loci.
+#' When `plot = TRUE`, a bar plot is produced with \pkg{ggplot2}.
 #'
 #' @examples
 #' ref <- data.frame(
-#'   ID = c("S1", "S2", "S3"),
+#'   ID   = c("S1", "S2", "S3"),
 #'   SNP1 = c(0, 1, 2),
 #'   SNP2 = c(1, 1, 0),
 #'   SNP3 = c(2, 5, 1)
 #' )
-#'
-#' test <- data.frame(
-#'   ID = c("S1", "S2", "S3"),
+#' imp <- data.frame(
+#'   ID   = c("S1", "S2", "S3"),
 #'   SNP1 = c(0, 0, 2),
 #'   SNP2 = c(1, 1, 1),
 #'   SNP3 = c(2, 5, 0)
 #' )
-#'
-#' result <- imputation_concordance(
+#' imputation_concordance(
 #'   reference_genos = ref,
-#'   imputed_genos = test,
-#'   snps_2_exclude = "SNP2",
-#'   missing_code = 5,
-#'   print_result = FALSE
+#'   imputed_genos   = imp,
+#'   snps_2_exclude  = "SNP2",
+#'   missing_code    = 5,
+#'   print_result    = FALSE
 #' )
 #'
-#' result
+#' @author Josué Chinchilla-Vargas
 #'
+#' @importFrom dplyr %>% filter arrange
 #' @importFrom stats reorder
+#' @importFrom ggplot2 ggplot aes geom_bar labs theme_minimal theme element_text
 #' @export
 imputation_concordance <- function(reference_genos,
                                    imputed_genos,
diff --git a/R/validate_pedigree.R b/R/validate_pedigree.R
index 8784914..c30491f 100644
--- a/R/validate_pedigree.R
+++ b/R/validate_pedigree.R
@@ -60,6 +60,9 @@
 #'     \item{Best_Female_Candidate_Error_Pct}{Homozygous mismatch percentage for the best female candidate.}
 #'   }
 #' @export
+#'
+#' @author Josué Chinchilla-Vargas
+#'
 #' @importFrom data.table fread fwrite copy data.table set rbindlist
 validate_pedigree <- function(pedigree_file, genotypes_file,
                               founders_file = NULL,
diff --git a/man/allele_freq_poly.Rd b/man/allele_freq_poly.Rd
index 407d158..6d56f79 100644
--- a/man/allele_freq_poly.Rd
+++ b/man/allele_freq_poly.Rd
@@ -2,48 +2,51 @@
 % Please edit documentation in R/breedtools_functions.R
 \name{allele_freq_poly}
 \alias{allele_freq_poly}
-\title{Compute Allele Frequencies for Populations}
+\title{Compute allele frequencies for populations}
 \usage{
 allele_freq_poly(geno, populations, ploidy = 2)
 }
 \arguments{
-\item{geno}{matrix of genotypes coded as the dosage of allele B \code{{0, 1, 2, ..., ploidy}}
-with individuals in rows (named) and SNPs in columns (named)}
+\item{geno}{Numeric matrix of genotypes coded as dosage of allele B
+\code{{0, 1, 2, ..., ploidy}}, with individuals in rows (named) and
+SNPs in columns (named).}
 
-\item{populations}{list of named populations. Each population has a vector of IDs
-that belong to the population. Allele frequencies will be derived from all animals}
+\item{populations}{Named list of populations, each containing a character
+vector of individual IDs belonging to that population.}
 
-\item{ploidy}{integer indicating the ploidy level (default is 2 for diploid)}
+\item{ploidy}{Integer. Ploidy level of the species. Default is \code{2}.}
 }
 \value{
-data.frame consisting of allele_frequencies for populations (columns) for
-each SNP (rows)
+A data frame of allele frequencies with SNPs as rows and populations
+as columns.
 }
 \description{
-Computes allele frequencies for specified populations given SNP array data
+Computes allele frequencies for specified populations from SNP array data
+coded as dosage of allele B.
 }
 \examples{
-# Example inputs
 geno_matrix <- matrix(
-c(4, 1, 4, 0, # S1
-  2, 2, 1, 3, # S2
-  0, 4, 0, 4, # S3
-  3, 3, 2, 2, # S4
-  1, 4, 2, 3),# S5
-nrow = 4, ncol = 5, byrow = FALSE, # individuals=rows, SNPs=cols
-dimnames = list(paste0("Ind", 1:4), paste0("S", 1:5))
+  c(4, 1, 4, 0,
+    2, 2, 1, 3,
+    0, 4, 0, 4,
+    3, 3, 2, 2,
+    1, 4, 2, 3),
+  nrow = 4, ncol = 5, byrow = FALSE,
+  dimnames = list(paste0("Ind", 1:4), paste0("S", 1:5))
 )
-
 pop_list <- list(
-PopA = c("Ind1", "Ind2"),
-PopB = c("Ind3", "Ind4")
+  PopA = c("Ind1", "Ind2"),
+  PopB = c("Ind3", "Ind4")
 )
-
 allele_freqs <- allele_freq_poly(geno = geno_matrix, populations = pop_list, ploidy = 4)
 print(allele_freqs)
 
 }
 \references{
-Funkhouser SA, Bates RO, Ernst CW, Newcom D, Steibel JP. Estimation of genome-wide and locus-specific
-breed composition in pigs. Transl Anim Sci. 2017 Feb 1;1(1):36-44.
+Funkhouser SA, Bates RO, Ernst CW, Newcom D, Steibel JP.
+Estimation of genome-wide and locus-specific breed composition in pigs.
+\emph{Transl Anim Sci.} 2017;1(1):36–44.
+}
+\author{
+Josué Chinchilla-Vargas
 }
diff --git a/man/check_ped.Rd b/man/check_ped.Rd
index 0cf3840..8df46e8 100644
--- a/man/check_ped.Rd
+++ b/man/check_ped.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/check_ped.R
 \name{check_ped}
 \alias{check_ped}
-\title{Check a pedigree file for structural issues}
+\title{Check and optionally correct a pedigree file}
 \usage{
 check_ped(
   ped.file,
@@ -18,35 +18,63 @@ check_ped(
 
 \item{seed}{Optional integer seed for reproducibility.}
 
-\item{verbose}{Logical. If \code{TRUE} (default), prints findings to the
-console.}
+\item{verbose}{Logical. Print findings to the console? Default \code{TRUE}.}
 
-\item{correct}{Logical. If \code{TRUE} (default), builds and assigns a
-corrected pedigree to the global environment. If \code{FALSE}, issues
-are only reported.}
+\item{correct}{Logical. Build and save a corrected pedigree? Default \code{TRUE}.}
 
-\item{save_zip}{Logical. If \code{TRUE}, bundles report components into a
-\code{.zip} archive in the working directory.}
+\item{save_zip}{Logical. Export report components as a \code{.zip} archive? Default \code{FALSE}.}
 
-\item{save_corrected_zip}{Logical. If \code{TRUE} (requires \code{save_zip = TRUE} and \code{correct = TRUE}), includes the corrected pedigree in
-the \code{.zip} archive.}
+\item{save_corrected_zip}{Logical. Include corrected pedigree in the \code{.zip}?
+Only applies when \code{save_zip = TRUE} and \code{correct = TRUE}. Default \code{FALSE}.}
 }
 \value{
-An invisible named list of data frames describing detected
-issues:
-\item{exact_duplicates}{exact duplicate rows}
-\item{repeated_ids_diff}{rows where the same \code{id} has conflicting parents}
-\item{inconsistent_sex_roles}{rows involving IDs used as both parents}
-\item{missing_parents}{rows referencing parent IDs absent from \code{id}}
-\item{dependencies}{rows whose \code{id} is involved in a pedigree cycle}
+An invisible named list of data frames:
+\describe{
+\item{exact_duplicates}{Exact duplicate rows.}
+\item{repeated_ids_diff}{Rows with the same \code{id} but conflicting parent assignments.}
+\item{inconsistent_sex_roles}{Rows involving IDs used as both male and female parent.}
+\item{missing_parents}{Rows referencing parent IDs absent from \code{id}.}
+\item{dependencies}{Rows whose \code{id} is involved in a detected cycle.}
+}
 }
 \description{
-Reads a tab‑separated pedigree file with columns \code{id},
-\code{male_parent}, and \code{female_parent} and checks for five classes of
-errors: missing required columns, exact duplicate rows,
-repeated IDs with conflicting parents, inconsistent parent
-sex roles, and pedigree cycles.  The input file is never
-modified.  A report list is always assigned to the global
-environment; when \code{correct = TRUE} a corrected pedigree is
-assigned as well.
+Reads a tab-separated pedigree file with columns \code{id}, \code{male_parent}, and
+\code{female_parent} (any column order), performs structural and consistency checks,
+reports findings, and optionally builds a partially corrected pedigree object.
+}
+\details{
+The input file is never modified. Findings are printed to the console when
+\code{verbose = TRUE}, a report list is assigned to the global environment, and when
+\code{correct = TRUE}, a corrected pedigree data frame is also assigned there.
+}
+\section{Checks performed}{
+
+\describe{
+\item{Missing required columns}{Stops immediately if \code{id}, \code{male_parent}, or
+\code{female_parent} are absent.}
+\item{Exact duplicate rows}{Rows identical across all three fields. Removed
+when \code{correct = TRUE}.}
+\item{Repeated IDs with conflicting parents}{Same \code{id} with differing parent
+assignments. Collapsed to one row when \code{correct = TRUE}; conflicting fields
+are set to \code{"0"}.}
+\item{Inconsistent sex roles}{IDs appearing as both \code{male_parent} and
+\code{female_parent}. Reported only; no automatic correction.}
+\item{Missing parents}{Parent IDs not listed as individuals. Reported, and
+added as founder rows (\code{"0"/"0"}) when \code{correct = TRUE}.}
+\item{Cycles}{Direct or indirect ancestry loops. Reported only; must be
+resolved manually.}
+}
+}
+
+\section{Correction behavior}{
+
+When \code{correct = TRUE}, the function removes exact duplicates, collapses
+conflicting repeated IDs (replacing inconsistent parent fields with \code{"0"}),
+and appends missing parents as founders. Inconsistent sex roles and cycles are
+not automatically corrected. Running the function again on the corrected
+pedigree is recommended to reassess downstream issues.
+}
+
+\author{
+Josué Chinchilla-Vargas
 }
diff --git a/man/find_parentage.Rd b/man/find_parentage.Rd
index 5a148af..55b26e8 100644
--- a/man/find_parentage.Rd
+++ b/man/find_parentage.Rd
@@ -126,3 +126,6 @@ results <- find_parentage(
 }
 
 }
+\author{
+Josué Chinchilla-Vargas
+}
diff --git a/man/imputation_concordance.Rd b/man/imputation_concordance.Rd
index 22e9462..f96493f 100644
--- a/man/imputation_concordance.Rd
+++ b/man/imputation_concordance.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/imputation_concordance.R
 \name{imputation_concordance}
 \alias{imputation_concordance}
-\title{Calculate Concordance between Imputed and Reference Genotypes}
+\title{Calculate concordance between imputed and reference genotypes}
 \usage{
 imputation_concordance(
   reference_genos,
@@ -15,77 +15,65 @@ imputation_concordance(
 )
 }
 \arguments{
-\item{reference_genos}{A data frame containing reference genotype data,
-with rows as samples and columns as markers. Must include a column named \code{ID}.}
+\item{reference_genos}{A data frame of reference genotypes (samples × markers)
+with a column named \code{ID}.}
 
-\item{imputed_genos}{A data frame containing imputed genotype data,
-with rows as samples and columns as markers. Must include a column named \code{ID}.}
+\item{imputed_genos}{A data frame of imputed genotypes (samples × markers)
+with a column named \code{ID}.}
 
-\item{missing_code}{Optional value specifying missing data. If provided,
-loci with this value in either dataset will be excluded from the concordance calculation.}
+\item{missing_code}{Optional value indicating missing data. Loci carrying this
+value in either dataset are excluded from the concordance calculation.}
 
-\item{snps_2_exclude}{Optional vector of marker IDs to exclude from the concordance calculation.}
+\item{snps_2_exclude}{Optional character vector of marker names to exclude.}
 
-\item{verbose}{Logical. If \code{TRUE}, prints summary statistics (minimum, quartiles,
-median, mean, maximum) of concordance percentages.}
+\item{verbose}{Logical. Print a five-number summary of concordance? Default \code{FALSE}.}
 
-\item{plot}{Logical. If \code{TRUE}, produces a bar plot of concordance percentage
-by sample.}
+\item{plot}{Logical. Produce a bar plot of concordance by sample? Default \code{FALSE}.}
 
-\item{print_result}{Logical. If \code{TRUE} (default), prints the concordance
-results data frame to the console. If \code{FALSE}, results are returned invisibly.}
+\item{print_result}{Logical. Print the results data frame to the console?
+Default \code{TRUE}. If \code{FALSE}, results are returned invisibly.}
 }
 \value{
-A data frame with:
-\itemize{
-\item \code{ID}: Sample identifiers shared between the datasets.
-\item \code{Concordance}: Percentage of matching genotypes per sample.
+A data frame with columns:
+\describe{
+\item{ID}{Sample identifiers shared between the two datasets.}
+\item{Concordance}{Percentage of matching genotypes per sample.}
 }
-If \code{print_result = FALSE}, the data frame is returned invisibly.
 }
 \description{
-This function calculates the concordance between imputed and reference
-genotypes. It assumes that samples are rows and markers are columns.
-Allele dosages (0, 1, 2) are recommended but other numeric formats are supported.
-Missing data in either dataset can be excluded from the concordance calculation
-using the \code{missing_code} argument. Specific markers can be excluded using
-the \code{snps_2_exclude} argument.
+Compares imputed and reference genotype datasets sample by sample, returning
+the percentage of matching genotypes per sample. Expects samples as rows and
+markers as columns, with allele dosages (0, 1, 2) recommended. Other numeric
+formats are supported.
 }
 \details{
-The function:
-\enumerate{
-\item Identifies common samples and markers between the datasets.
-\item Optionally excludes specified SNPs.
-\item Removes loci with missing data (if \code{missing_code} is provided).
-\item Computes per-sample concordance as the percentage of matching genotypes.
-}
-
-When \code{plot = TRUE}, a bar plot showing concordance percentage per sample
-is generated using \pkg{ggplot2}.
+The function identifies common samples and markers between the two datasets,
+optionally removes specified SNPs and loci with missing data, then computes
+per-sample concordance as the percentage of matching genotypes over valid loci.
+When \code{plot = TRUE}, a bar plot is produced with \pkg{ggplot2}.
 }
 \examples{
 ref <- data.frame(
-  ID = c("S1", "S2", "S3"),
+  ID   = c("S1", "S2", "S3"),
   SNP1 = c(0, 1, 2),
   SNP2 = c(1, 1, 0),
   SNP3 = c(2, 5, 1)
 )
-
-test <- data.frame(
-  ID = c("S1", "S2", "S3"),
+imp <- data.frame(
+  ID   = c("S1", "S2", "S3"),
   SNP1 = c(0, 0, 2),
   SNP2 = c(1, 1, 1),
   SNP3 = c(2, 5, 0)
 )
-
-result <- imputation_concordance(
+imputation_concordance(
   reference_genos = ref,
-  imputed_genos = test,
-  snps_2_exclude = "SNP2",
-  missing_code = 5,
-  print_result = FALSE
+  imputed_genos   = imp,
+  snps_2_exclude  = "SNP2",
+  missing_code    = 5,
+  print_result    = FALSE
 )
 
-result
-
+}
+\author{
+Josué Chinchilla-Vargas
 }
diff --git a/man/solve_composition_poly.Rd b/man/solve_composition_poly.Rd
index bd92739..128f7ad 100644
--- a/man/solve_composition_poly.Rd
+++ b/man/solve_composition_poly.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/breedtools_functions.R
 \name{solve_composition_poly}
 \alias{solve_composition_poly}
-\title{Compute Genome-Wide Breed Composition}
+\title{Compute genome-wide breed composition}
 \usage{
 solve_composition_poly(
   Y,
@@ -16,41 +16,40 @@ solve_composition_poly(
 )
 }
 \arguments{
-\item{Y}{numeric matrix of genotypes (columns) from all animals (rows) in population
-coded as dosage of allele B \code{{0, 1, 2, ..., ploidy}}}
+\item{Y}{Numeric matrix of genotypes with individuals as rows and SNPs as
+columns, coded as dosage of allele B \code{{0, 1, 2, ..., ploidy}}.}
 
-\item{X}{numeric matrix of allele frequencies (rows) from each reference panel (columns). Frequencies are
-relative to allele B.}
+\item{X}{Numeric matrix of allele frequencies with SNPs as rows and
+breeds/populations as columns.}
 
-\item{ped}{data.frame giving pedigree information. Must be formatted "ID", "Sire", "Dam"}
+\item{ped}{Optional data frame with pedigree information formatted with
+columns \code{ID}, \code{Sire}, and \code{Dam}. When supplied, \code{QPsolve_par} is used
+and only animals with genotyped parents are included.}
 
-\item{groups}{list of IDs categorized by breed/population. If specified, output will be a list
-of results categorized by breed/population.}
+\item{groups}{Optional named list of IDs grouped by breed/population.
+When supplied, results are returned as a named list partitioned by group.}
 
-\item{mia}{logical. Only applies if ped argument is supplied. If true, returns a data.frame
-containing the inferred maternally inherited allele for each locus for each animal instead
-of breed composition results.}
+\item{mia}{Logical. Only applies when \code{ped} is supplied. If \code{TRUE}, returns
+the inferred maternally inherited allele per locus per animal. Default \code{FALSE}.}
 
-\item{sire}{logical. Only applies if ped argument is supplied. If true, returns a data.frame
-containing sire genotypes for each locus for each animal instead of breed composition results.}
+\item{sire}{Logical. Only applies when \code{ped} is supplied. If \code{TRUE}, returns
+sire genotypes per locus per animal. Default \code{FALSE}.}
 
-\item{dam}{logical. Only applies if ped argument is supplied. If true, returns a data.frame
-containing dam genotypes for each locus for each animal instead of breed composition results.}
+\item{dam}{Logical. Only applies when \code{ped} is supplied. If \code{TRUE}, returns
+dam genotypes per locus per animal. Default \code{FALSE}.}
 
-\item{ploidy}{integer. The ploidy level of the species (e.g., 2 for diploid, 3 for triploid, etc.).}
+\item{ploidy}{Integer. Ploidy level of the species. Default is \code{2}.}
 }
 \value{
-A data.frame or list of data.frames (if groups is !NULL) with breed/ancestry composition
-results
+A data frame, or a named list of data frames when \code{groups} is
+supplied, containing breed/ancestry composition estimates.
 }
 \description{
-Computes genome-wide breed/ancestry composition using quadratic programming on a
-batch of animals.
+Estimates genome-wide breed/ancestry composition for a batch of animals
+using quadratic programming, with optional pedigree-assisted and
+grouped-output modes.
 }
 \examples{
-# Example inputs for solve_composition_poly (ploidy = 4)
-
-# (This would typically be the output from allele_freq_poly)
 allele_freqs_matrix <- matrix(
   c(0.625, 0.500,
     0.500, 0.500,
@@ -60,16 +59,12 @@ allele_freqs_matrix <- matrix(
   nrow = 5, ncol = 2, byrow = TRUE,
   dimnames = list(paste0("SNP", 1:5), c("VarA", "VarB"))
 )
-
-# Validation Genotypes (individuals x SNPs)
 val_geno_matrix <- matrix(
-  c(2, 1, 2, 3, 4,  # Test1 dosages for SNP1-5
-    3, 4, 2, 3, 0), # Test2 dosages for SNP1-5
+  c(2, 1, 2, 3, 4,
+    3, 4, 2, 3, 0),
   nrow = 2, ncol = 5, byrow = TRUE,
   dimnames = list(paste0("Test", 1:2), paste0("SNP", 1:5))
 )
-
-# Calculate Breed Composition
 composition <- solve_composition_poly(Y = val_geno_matrix,
                                       X = allele_freqs_matrix,
                                       ploidy = 4)
@@ -77,6 +72,10 @@ print(composition)
 
 }
 \references{
-Funkhouser SA, Bates RO, Ernst CW, Newcom D, Steibel JP. Estimation of genome-wide and locus-specific
-breed composition in pigs. Transl Anim Sci. 2017 Feb 1;1(1):36-44.
+Funkhouser SA, Bates RO, Ernst CW, Newcom D, Steibel JP.
+Estimation of genome-wide and locus-specific breed composition in pigs.
+\emph{Transl Anim Sci.} 2017;1(1):36–44.
+}
+\author{
+Josué Chinchilla-Vargas
 }
diff --git a/man/validate_pedigree.Rd b/man/validate_pedigree.Rd
index b4987de..08c7329 100644
--- a/man/validate_pedigree.Rd
+++ b/man/validate_pedigree.Rd
@@ -93,3 +93,6 @@ are declared founders (both parents coded as 0) are preserved unchanged
 with no recommendations. Trios removed due to missing genotype data are
 retained in the output with a NO_GENOTYPE_DATA status.
 }
+\author{
+Josué Chinchilla-Vargas
+}

From 6b84e18653c7072657e942aea90b27253a7bbff1 Mon Sep 17 00:00:00 2001
From: "josue.chinchilla" <josue.chinchilla@IFASadmins-MacBook-Pro.local>
Date: Wed, 20 May 2026 09:53:16 -0400
Subject: [PATCH 11/12] staged comments for next push

---
 DESCRIPTION   | 2 +-
 R/check_ped.R | 8 +++-----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 0524f2f..b71abd1 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -44,7 +44,6 @@ URL: https://github.com/Breeding-Insight/BIGr
 BugReports: https://github.com/Breeding-Insight/BIGr/issues
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.3
 Depends: R (>= 4.4.0)
 biocViews:
 Imports: 
@@ -74,3 +73,4 @@ Suggests:
     polyRAD,
     testthat (>= 3.0.0)   
 RdMacros: Rdpack
+Config/roxygen2/version: 8.0.0
diff --git a/R/check_ped.R b/R/check_ped.R
index 847ebe1..a948138 100644
--- a/R/check_ped.R
+++ b/R/check_ped.R
@@ -51,8 +51,7 @@
 #'
 #' @author Josué Chinchilla-Vargas
 #'
-#' @importFrom dplyr %>% group_by filter ungroup distinct mutate summarize first
-#'   bind_rows n_distinct n select row_number
+#' @importFrom dplyr %>% group_by filter ungroup distinct mutate summarize first bind_rows n_distinct n select row_number
 #' @importFrom stats setNames
 #' @importFrom utils read.table write.table zip
 #' @importFrom tools file_path_sans_ext
@@ -60,11 +59,10 @@
 check_ped <- function(ped.file,
                       seed               = NULL,
                       verbose            = TRUE,
-                      correct            = TRUE,
-                      save_zip           = FALSE,
+                      correct            = TRUE,  #add arguments for removing/filtering conflicting ids, repeated ids and founders are always fixed, dependencies are always manual.
+                      save_zip           = FALSE, #remove all save zips, export each report and corrected_pedigree as a list
                       save_corrected_zip = FALSE) {
 
-  #### setup ####
   #### setup ####
   if (!is.null(seed)) set.seed(seed)
   data <- utils::read.table(ped.file, header = TRUE)

From 4eb79bb186b83c6a52d0e8620419501719597474 Mon Sep 17 00:00:00 2001
From: "josue.chinchilla" <josue.chinchilla@IFASadmins-MacBook-Pro.local>
Date: Wed, 20 May 2026 11:09:30 -0400
Subject: [PATCH 12/12] removed zip option and added specific correction
 arguments

---
 R/check_ped.R | 386 +++++++++++++++++++++++---------------------------
 1 file changed, 177 insertions(+), 209 deletions(-)

diff --git a/R/check_ped.R b/R/check_ped.R
index a948138..6f74732 100644
--- a/R/check_ped.R
+++ b/R/check_ped.R
@@ -1,71 +1,62 @@
-#' Check and optionally correct a pedigree file
+#' Check a pedigree file for accuracy and report and correct common errors
 #'
-#' Reads a tab-separated pedigree file with columns `id`, `male_parent`, and
-#' `female_parent` (any column order), performs structural and consistency checks,
-#' reports findings, and optionally builds a partially corrected pedigree object.
+#' Reads a 3-column pedigree file (tab-separated, columns id, male_parent,
+#' female_parent in any order) and performs quality checks, optionally
+#' correcting detected errors.
 #'
-#' The input file is never modified. Findings are printed to the console when
-#' `verbose = TRUE`, a report list is assigned to the global environment, and when
-#' `correct = TRUE`, a corrected pedigree data frame is also assigned there.
-#'
-#' @section Checks performed:
-#' \describe{
-#'   \item{Missing required columns}{Stops immediately if `id`, `male_parent`, or
-#'     `female_parent` are absent.}
-#'   \item{Exact duplicate rows}{Rows identical across all three fields. Removed
-#'     when `correct = TRUE`.}
-#'   \item{Repeated IDs with conflicting parents}{Same `id` with differing parent
-#'     assignments. Collapsed to one row when `correct = TRUE`; conflicting fields
-#'     are set to `"0"`.}
-#'   \item{Inconsistent sex roles}{IDs appearing as both `male_parent` and
-#'     `female_parent`. Reported only; no automatic correction.}
-#'   \item{Missing parents}{Parent IDs not listed as individuals. Reported, and
-#'     added as founder rows (`"0"/"0"`) when `correct = TRUE`.}
-#'   \item{Cycles}{Direct or indirect ancestry loops. Reported only; must be
-#'     resolved manually.}
-#' }
-#'
-#' @section Correction behavior:
-#' When `correct = TRUE`, the function removes exact duplicates, collapses
-#' conflicting repeated IDs (replacing inconsistent parent fields with `"0"`),
-#' and appends missing parents as founders. Inconsistent sex roles and cycles are
-#' not automatically corrected. Running the function again on the corrected
-#' pedigree is recommended to reassess downstream issues.
+#' Exact duplicate rows and missing parents are always corrected.
+#' Conflicting trios and inconsistent sex roles are corrected when their
+#' respective arguments are TRUE. Cycles and dependencies are always
+#' reported only and must be resolved manually.
 #'
 #' @param ped.file Path to the pedigree text file.
 #' @param seed Optional integer seed for reproducibility.
-#' @param verbose Logical. Print findings to the console? Default `TRUE`.
-#' @param correct Logical. Build and save a corrected pedigree? Default `TRUE`.
-#' @param save_zip Logical. Export report components as a `.zip` archive? Default `FALSE`.
-#' @param save_corrected_zip Logical. Include corrected pedigree in the `.zip`?
-#'   Only applies when `save_zip = TRUE` and `correct = TRUE`. Default `FALSE`.
+#' @param verbose Logical. If TRUE (default), prints the report to the console.
+#' @param correct_conflicting_trios Logical. If TRUE (default), sets conflicting
+#'   male_parent and female_parent to 0 and collapses to one row per ID.
+#' @param correct_inconsistent_sex_roles Logical. If TRUE (default), sets
+#'   male_parent and female_parent to 0 for rows involving IDs found as both,
+#'   then removes any resulting exact duplicates.
 #'
 #' @return An invisible named list of data frames:
 #' \describe{
-#'   \item{exact_duplicates}{Exact duplicate rows.}
-#'   \item{repeated_ids_diff}{Rows with the same `id` but conflicting parent assignments.}
-#'   \item{inconsistent_sex_roles}{Rows involving IDs used as both male and female parent.}
-#'   \item{missing_parents}{Rows referencing parent IDs absent from `id`.}
-#'   \item{dependencies}{Rows whose `id` is involved in a detected cycle.}
+#'   \item{exact_duplicates}{Exact duplicate rows found in the input.}
+#'   \item{conflicting_trios}{IDs with conflicting male_parent or female_parent assignments.}
+#'   \item{inconsistent_sex_roles}{IDs appearing as both male_parent and female_parent.}
+#'   \item{missing_parents}{Parent IDs absent from id, added as founders.}
+#'   \item{dependencies}{Cycles detected in the pedigree. Must be resolved manually.}
+#'   \item{corrected_pedigree}{Corrected pedigree table.}
 #' }
 #'
-#' @author Josué Chinchilla-Vargas
+#' @examples
+#' ped_file <- system.file("check_ped_test.txt", package = "BIGr")
+#' ped_errors <- check_ped(ped.file = ped_file, seed = 101919, verbose = FALSE)
 #'
-#' @importFrom dplyr %>% group_by filter ungroup distinct mutate summarize first bind_rows n_distinct n select row_number
+#' ped_errors$inconsistent_sex_roles
+#' ped_errors$corrected_pedigree
+#'
+#' conflicting_sex_ids <- ped_errors$inconsistent_sex_roles$id
+#' print(conflicting_sex_ids)
+#'
+#' @importFrom dplyr %>% mutate filter group_by ungroup summarize distinct bind_rows select first n n_distinct if_else row_number
 #' @importFrom stats setNames
-#' @importFrom utils read.table write.table zip
-#' @importFrom tools file_path_sans_ext
+#' @importFrom utils read.table
 #' @export
 check_ped <- function(ped.file,
-                      seed               = NULL,
-                      verbose            = TRUE,
-                      correct            = TRUE,  #add arguments for removing/filtering conflicting ids, repeated ids and founders are always fixed, dependencies are always manual.
-                      save_zip           = FALSE, #remove all save zips, export each report and corrected_pedigree as a list
-                      save_corrected_zip = FALSE) {
+                      seed                           = NULL,
+                      verbose                        = TRUE,
+                      correct_conflicting_trios      = TRUE,
+                      correct_inconsistent_sex_roles = TRUE) {
 
   #### setup ####
   if (!is.null(seed)) set.seed(seed)
-  data <- utils::read.table(ped.file, header = TRUE)
+
+  data <- utils::read.table(ped.file, header = TRUE) %>%
+    dplyr::mutate(
+      id            = as.character(id),
+      male_parent   = as.character(male_parent),
+      female_parent = as.character(female_parent)
+    )
 
   # Validate required column names before any processing
   required_cols <- c("id", "male_parent", "female_parent")
@@ -78,78 +69,120 @@ check_ped <- function(ped.file,
     )
   }
 
-  data <- data %>%
-    mutate(
-      id            = as.character(id),
-      male_parent   = as.character(male_parent),
-      female_parent = as.character(female_parent)
-    )
-  # Add row numbers before any processing
-  data <- data %>% mutate(row_number = row_number(), .before = id)
-  errors <- list()
-  # Add row numbers before any processing
-  data <- data %>% mutate(row_number = row_number(), .before = id)
-  errors <- list()
+  # Add row numbers before any processing so all reports reference original rows [1]
+  data <- data %>% dplyr::mutate(row_number = dplyr::row_number(), .before = id)
+
+  errors          <- list()
+  missing_parents <- data.frame(
+    row_number    = integer(),
+    id            = character(),
+    male_parent   = character(),
+    female_parent = character(),
+    stringsAsFactors = FALSE
+  )
 
-  #### check 1: exact duplicates ####
+  #### check 1: exact duplicates (always fixed) ####
   exact_duplicates <- data[
-    duplicated(data %>% select(-row_number)) |
-      duplicated(data %>% select(-row_number), fromLast = TRUE),
+    duplicated(data %>% dplyr::select(-row_number)) |
+      duplicated(data %>% dplyr::select(-row_number), fromLast = TRUE),
   ]
+  if (nrow(exact_duplicates) > 0) {
+    data <- data %>%
+      dplyr::select(-row_number) %>%
+      dplyr::distinct() %>%
+      dplyr::mutate(row_number = dplyr::row_number(), .before = id)
+  }
 
   #### check 2: repeated IDs with conflicting male_parent/female_parent ####
-  repeated_ids_diff <- data %>%
-    group_by(id) %>%
-    filter(n() > 1) %>%
-    filter(n_distinct(male_parent) > 1 | n_distinct(female_parent) > 1) %>%
-    ungroup()
+  repeated_ids <- data %>%
+    dplyr::group_by(id) %>%
+    dplyr::filter(dplyr::n() > 1) %>%
+    dplyr::ungroup()
 
-  #### check 3: inconsistent parent sex roles ####
-  male_ids   <- unique(data$male_parent[data$male_parent   != "0"])
-  female_ids <- unique(data$female_parent[data$female_parent != "0"])
-  messy_ids  <- intersect(male_ids, female_ids)
-  inconsistent_sex_roles <- data %>%
-    filter(male_parent %in% messy_ids | female_parent %in% messy_ids)
+  conflicting_ids <- repeated_ids %>%
+    dplyr::group_by(id) %>%
+    dplyr::filter(dplyr::n_distinct(male_parent) > 1 | dplyr::n_distinct(female_parent) > 1) %>%
+    dplyr::ungroup()
 
-  #### check 4: missing parents ####
-  all_ids     <- unique(data$id)
-  ref_ids     <- unique(c(data$male_parent, data$female_parent))
-  ref_ids     <- ref_ids[ref_ids != "0"]
-  missing_ids <- setdiff(ref_ids, all_ids)
-  missing_parents <- data %>%
-    filter(male_parent %in% missing_ids | female_parent %in% missing_ids)
+  if (correct_conflicting_trios && nrow(conflicting_ids) > 0) {
+    # Set conflicting parents to "0" -- rows become exact duplicates, summarize collapses to one [1]
+    data <- data %>%
+      dplyr::group_by(id) %>%
+      dplyr::summarize(
+        row_number    = dplyr::first(row_number),
+        male_parent   = if (dplyr::n_distinct(male_parent)   > 1) "0" else dplyr::first(male_parent),
+        female_parent = if (dplyr::n_distinct(female_parent) > 1) "0" else dplyr::first(female_parent),
+        .groups = "drop"
+      ) %>%
+      dplyr::select(row_number, id, male_parent, female_parent)
+  }
 
-  # Only build founder rows if there are actually missing parents to add
-  if (length(missing_ids) > 0) {
-    missing_founders <- data.frame(
-      row_number    = NA_integer_,
-      id            = missing_ids,
-      male_parent   = "0",
-      female_parent = "0",
-      stringsAsFactors = FALSE
-    )
-  } else {
-    missing_founders <- data.frame(
-      row_number    = integer(0),
-      id            = character(0),
-      male_parent   = character(0),
-      female_parent = character(0),
-      stringsAsFactors = FALSE
-    )
+  repeated_ids_report <- conflicting_ids
+
+  #### check 3: missing parents (always fixed) ####
+  for (i in seq_len(nrow(data))) {
+    id            <- data$id[i]
+    male_parent   <- data$male_parent[i]
+    female_parent <- data$female_parent[i]
+
+    if (male_parent != "0" && male_parent != id && !male_parent %in% data$id) {
+      missing_parents <- rbind(
+        missing_parents,
+        data.frame(row_number = data$row_number[i], id = male_parent,
+                   male_parent = "0", female_parent = "0",
+                   stringsAsFactors = FALSE)
+      )
+    }
+    if (female_parent != "0" && female_parent != id && !female_parent %in% data$id) {
+      missing_parents <- rbind(
+        missing_parents,
+        data.frame(row_number = data$row_number[i], id = female_parent,
+                   male_parent = "0", female_parent = "0",
+                   stringsAsFactors = FALSE)
+      )
+    }
+
+    if (male_parent == id || female_parent == id) {
+      errors <- append(errors, paste("Dependency: Individual", id, "cannot be its own parent"))
+    }
+  }
+
+  missing_parents <- dplyr::distinct(missing_parents)
+  if (nrow(missing_parents) > 0) {
+    data <- dplyr::bind_rows(data, missing_parents)
+  }
+
+  #### check 4: inconsistent sex roles ####
+  male_ids            <- unique(data$male_parent[data$male_parent   != "0"])
+  female_ids          <- unique(data$female_parent[data$female_parent != "0"])
+  conflicting_sex_ids <- intersect(male_ids, female_ids)
+  inconsistent_sex_roles <- data %>% dplyr::filter(id %in% conflicting_sex_ids)
+
+  if (correct_inconsistent_sex_roles && length(conflicting_sex_ids) > 0) {
+    # Zero out male_parent/female_parent wherever a conflicting ID appears,
+    # then distinct() removes any rows that became exact duplicates [1]
+    data <- data %>%
+      dplyr::mutate(
+        male_parent   = dplyr::if_else(male_parent   %in% conflicting_sex_ids, "0", male_parent),
+        female_parent = dplyr::if_else(female_parent %in% conflicting_sex_ids, "0", female_parent)
+      ) %>%
+      dplyr::distinct(id, male_parent, female_parent, .keep_all = TRUE)
   }
 
-  #### check 5: dependencies (cycles) ####
+  #### check 5: dependencies (cycles) -- reported only, never modified ####
   detect_all_cycles <- function(data) {
     adj_list <- lapply(data$id, function(x) {
       row <- data[data$id == x, ]
       c(row$male_parent, row$female_parent)
     })
     names(adj_list) <- data$id
+
     dfs <- function(node, visited, rec_stack, path) {
       visited[node]   <- TRUE
       rec_stack[node] <- TRUE
       path   <- append(path, node)
       cycles <- list()
+
       for (neighbor in adj_list[[node]]) {
         if (neighbor %in% names(adj_list)) {
           if (!visited[neighbor]) {
@@ -163,148 +196,83 @@ check_ped <- function(ped.file,
       rec_stack[node] <- FALSE
       return(cycles)
     }
+
     visited   <- stats::setNames(rep(FALSE, length(adj_list)), names(adj_list))
     rec_stack <- stats::setNames(rep(FALSE, length(adj_list)), names(adj_list))
     all_cycles <- list()
+
     for (node in names(adj_list)) {
       if (!visited[node]) {
         node_cycles <- dfs(node, visited, rec_stack, character())
-        if (length(node_cycles) > 0) all_cycles <- append(all_cycles, node_cycles)
+        if (length(node_cycles) > 0) {
+          all_cycles <- append(all_cycles, node_cycles)
+        }
       }
     }
     return(all_cycles)
   }
 
-  cycles    <- detect_all_cycles(data)
-  cycle_ids <- character(0)
+  cycles <- detect_all_cycles(data)
   if (length(cycles) > 0) {
     for (cycle_group in cycles) {
-      ids       <- unique(unlist(cycle_group))
-      cycle_ids <- unique(c(cycle_ids, ids))
+      cycle_ids <- unique(unlist(cycle_group))
       errors    <- append(errors,
                           paste("Cycle detected involving IDs:",
-                                paste(ids, collapse = " -> ")))
+                                paste(cycle_ids, collapse = " -> ")))
     }
   }
-  dependencies <- data %>% filter(id %in% cycle_ids)
-  if (nrow(dependencies) == 0 && length(errors) > 0) {
-    dependencies <- data.frame(Dependency = unique(unlist(errors)),
-                               stringsAsFactors = FALSE)
-  }
 
-  #### compile report ####
+  #### compile findings ####
   input_ped_report <- list(
     exact_duplicates       = exact_duplicates,
-    repeated_ids_diff      = repeated_ids_diff,
+    conflicting_trios      = repeated_ids_report,
     inconsistent_sex_roles = inconsistent_sex_roles,
     missing_parents        = missing_parents,
-    dependencies           = dependencies
+    dependencies           = data.frame(Dependency = unique(unlist(errors)),
+                                        stringsAsFactors = FALSE),
+    corrected_pedigree     = data %>% dplyr::select(-row_number)
   )
 
-  #### build corrected pedigree (no row_number column) ####
-  if (correct) {
-    corrected <- data %>% select(-row_number)
-    # Remove exact duplicates
-    corrected <- distinct(corrected)
-    # Resolve conflicting IDs
-    if (nrow(repeated_ids_diff) > 0) {
-      corrected <- corrected %>%
-        group_by(id) %>%
-        summarize(
-          male_parent   = if (n_distinct(male_parent)   > 1) "0" else first(male_parent),
-          female_parent = if (n_distinct(female_parent) > 1) "0" else first(female_parent),
-          .groups = "drop"
-        )
-    }
-    # Add missing founders
-    if (length(missing_ids) > 0) {
-      corrected <- bind_rows(
-        corrected,
-        missing_founders %>% select(-row_number)
-      )
-    }
-  }
-
-  #### file names ####
-  file_base      <- tools::file_path_sans_ext(basename(ped.file))
-  corrected_name <- paste0(file_base, "_corrected")
-  report_name    <- paste0(file_base, "_report")
-  zip_name       <- paste0(file_base, "_report.zip")
   #### output ####
   if (verbose) {
     cat("\n=== Pedigree Quality Check Report ===\n")
-    cat("\n--- Exact Duplicates ---\n")
-    if (nrow(exact_duplicates) > 0) print(exact_duplicates) else cat("None found.\n")
-    cat("\n--- Conflicting IDs ---\n")
-    if (nrow(repeated_ids_diff) > 0) print(repeated_ids_diff) else cat("None found.\n")
-    cat("\n--- Inconsistent Parent Sex Roles ---\n")
-    if (nrow(inconsistent_sex_roles) > 0) print(inconsistent_sex_roles) else cat("None found.\n")
-    cat("\n--- Missing Parents (rows that reference them) ---\n")
-    if (nrow(missing_parents) > 0) print(missing_parents) else cat("None found.\n")
-    cat("\n--- Cycles / Dependencies ---\n")
-    if (nrow(dependencies) > 0) print(dependencies) else cat("None found.\n")
-    if (correct) {
-      cat(paste0("\n`correct = TRUE`: saving corrected pedigree as `", corrected_name, "`.\n"))
-    } else {
-      cat("\n`correct = FALSE`: no changes made to the pedigree.\n")
-    }
-  }
-
-  # Always save report to global environment
-  assign(report_name, input_ped_report, envir = .GlobalEnv)
-  # Save corrected pedigree if correct = TRUE
-  if (correct) assign(corrected_name, corrected, envir = .GlobalEnv)
 
-  #### zip export ####
-  if (save_zip) {
-    tmp_dir   <- tempfile()
-    dir.create(tmp_dir)
-    zip_files <- character(0)
+    if (nrow(exact_duplicates) > 0) {
+      cat("\nExact duplicate trios detected (removed in corrected pedigree):\n")
+      print(exact_duplicates)
+    } else cat("\nNo exact duplicate trios found.\n")
 
-    # Section headers matching the console output labels
-    section_labels <- c(
-      exact_duplicates       = "Exact Duplicates",
-      repeated_ids_diff      = "Conflicting IDs",
-      inconsistent_sex_roles = "Inconsistent Parent Sex Roles",
-      missing_parents        = "Missing Parents (rows that reference them)",
-      dependencies           = "Cycles / Dependencies"
-    )
+    if (nrow(repeated_ids_report) > 0) {
+      cat("\nConflicting trios detected:\n")
+      print(repeated_ids_report)
+      if (correct_conflicting_trios) {
+        cat("  -> parents set to 0 and collapsed to one row in corrected pedigree.\n")
+      } else {
+        cat("  -> correct_conflicting_trios = FALSE: left as-is in corrected pedigree.\n")
+      }
+    } else cat("\nNo conflicting trios found.\n")
 
-    for (component in names(section_labels)) {
-      df        <- input_ped_report[[component]]
-      file_path <- file.path(tmp_dir, paste0(file_base, "_", component, ".txt"))
+    if (nrow(missing_parents) > 0) {
+      cat("\nParents missing as IDs found in the pedigree (added as founders in corrected pedigree):\n")
+      print(missing_parents)
+    } else cat("\nNo missing parents found.\n")
 
-      if (nrow(df) == 0) {
-        # Write header + "None found." for clean empty reports
-        writeLines(
-          c(paste0("--- ", section_labels[[component]], " ---"), "None found."),
-          con = file_path
-        )
+    if (nrow(inconsistent_sex_roles) > 0) {
+      cat("\nIDs found as both male_parent and female_parent (is selfing or hermaphrodytism possible?):\n")
+      print(inconsistent_sex_roles)
+      if (correct_inconsistent_sex_roles) {
+        cat("  -> parent fields set to 0 for conflicting IDs; exact duplicates removed in corrected pedigree.\n")
       } else {
-        # Write header, then column names, then data — avoids col.names warning
-        file_con <- file(file_path, open = "wt")
-        writeLines(paste0("--- ", section_labels[[component]], " ---"), con = file_con)
-        writeLines(paste(colnames(df), collapse = "\t"), con = file_con)
-        close(file_con)
-        utils::write.table(df, file = file_path, sep = "\t", row.names = FALSE,
-                           quote = FALSE, append = TRUE, col.names = FALSE)
+        cat("  -> correct_inconsistent_sex_roles = FALSE: left as-is in corrected pedigree.\n")
       }
+    } else cat("\nNo IDs found as both male_parent and female_parent.\n")
 
-      zip_files <- c(zip_files, file_path)
-    }
-
-    # Optionally include corrected pedigree in zip
-    if (save_corrected_zip && correct) {
-      corrected_path <- file.path(tmp_dir, paste0(corrected_name, ".txt"))
-      utils::write.table(corrected, file = corrected_path, sep = "\t",
-                         row.names = FALSE, quote = FALSE)
-      zip_files <- c(zip_files, corrected_path)
-    }
+    if (nrow(input_ped_report$dependencies) > 0) {
+      cat("\nDependencies detected (must be resolved manually):\n")
+      print(input_ped_report$dependencies)
+    } else cat("\nNo dependencies detected.\n")
 
-    # Bundle all files into zip in the working directory
-    zip_path <- file.path(getwd(), zip_name)
-    utils::zip(zipfile = zip_path, files = zip_files, flags = "-j")
-    if (verbose) cat(paste0("\nZip archive saved to: ", zip_path, "\n"))
+    cat("\nThe corrected pedigree is included in the returned list as corrected_pedigree.\n")
   }
 
   invisible(input_ped_report)