diff --git a/DESCRIPTION b/DESCRIPTION
index 13190fc..91766e4 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: BIGapp
Title: Breeding Insight Genomics Shiny Application
-Version: 1.6.0
+Version: 1.8.0
Authors@R:
c(
person(c("Alexander", "M."), "Sandercock",
@@ -26,7 +26,7 @@ Description: This R shiny app provides a web-based user friendly way for researc
License: Apache License (== 2.0)
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.2
+RoxygenNote: 7.3.3
Depends: R (>= 4.4.0)
biocViews:
Imports:
diff --git a/R/app_server.R b/R/app_server.R
index 50c4b7d..07b7663 100644
--- a/R/app_server.R
+++ b/R/app_server.R
@@ -10,7 +10,7 @@ app_server <- function(input, output, session) {
# Your application server logic
##Add server configurations
- options(shiny.maxRequestSize = 1000000 * 1024^2) # Set maximum upload size to 1000GB
+ options(shiny.maxRequestSize = 100000 * 1024^2) # Set maximum upload size to 100GB
#shiny.maxRequestSize = 10000 * 1024^2; # 10 GB <- This is for a future limit when using BI's server remotely
callModule(mod_DosageCall_server,
diff --git a/R/app_ui.R b/R/app_ui.R
index 55d43cb..b6605f8 100644
--- a/R/app_ui.R
+++ b/R/app_ui.R
@@ -89,8 +89,8 @@ app_ui <- function(request) {
style = "display: flex; align-items: center;", # Align text and images horizontally
div(
style = "display: flex; flex-direction: column; margin-right: 15px; text-align: right;",
- div("2025 Breeding Insight"),
- div("Funded by USDA through Cornell University")
+ div("2026 Breeding Insight"),
+ div("Funded by USDA through UF|IFAS")
),
div(
a(
@@ -99,6 +99,9 @@ app_ui <- function(request) {
),
a(
img(src = "www/cornell_seal_simple_web_b31b1b.png", height = "45px")
+ ),
+ a(
+ img(src = "www/IFAS.jpg", height = "45px")
)
)
),
diff --git a/R/mod_Filtering.R b/R/mod_Filtering.R
index b040996..fa68891 100644
--- a/R/mod_Filtering.R
+++ b/R/mod_Filtering.R
@@ -104,7 +104,7 @@ mod_Filtering_ui <- function(id){
progressBar(id = ns("pb_filter"), value = 0, status = "info", display_pct = TRUE, striped = TRUE, title = " ")
),
# A placeholder for the download button. It will be rendered in the shinyalert modal.
- uiOutput(ns("download_ui_placeholder"))
+ uiOutput(ns("download_ui_placeholder"))
)
)
)
@@ -159,8 +159,8 @@ mod_Filtering_server <- function(input, output, session, parent_session){
# expand specific box
updateBox(id = "VCF_Filtering_box", action = "toggle", session = parent_session)
})
-
-
+
+
## Advanced options popup
#Default model choices
advanced_options <- reactiveValues(
@@ -168,7 +168,7 @@ mod_Filtering_server <- function(input, output, session, parent_session){
remove_list = NULL,
remove_file = NULL
)
-
+
#List the ped file name if previously uploaded
output$uploaded_file_name <- renderText({
if (!is.null(advanced_options$remove_file)) {
@@ -177,47 +177,47 @@ mod_Filtering_server <- function(input, output, session, parent_session){
"" # Return an empty string if no file has been uploaded
}
})
-
+
#Get list of sample names from VCF file
observeEvent(input$updog_rdata, {
#### VCF sanity check
- checks <- vcf_sanity_check(input$updog_rdata$datapath,
- max_markers = 16000,
+ checks <- vcf_sanity_check(input$updog_rdata$datapath,
+ max_markers = 16000,
depth_support_fields = c("DP", "AD", "RA"))
-
+
error_if_false <- c(
"VCF_header", "VCF_columns", "unique_FORMAT", "GT",
"samples", "chrom_info", "pos_info", "VCF_compressed", "allele_counts"
)
-
+
error_if_true <- c(
- "multiallelics", "phased_GT",
+ "multiallelics",
"duplicated_samples", "duplicated_markers"
)
-
+
warning_if_false <- c("ref_alt","max_markers")
-
- checks_result <- vcf_sanity_messages(checks,
- error_if_false,
- error_if_true,
- warning_if_false = warning_if_false,
+
+ checks_result <- vcf_sanity_messages(checks,
+ error_if_false,
+ error_if_true,
+ warning_if_false = warning_if_false,
warning_if_true = NULL)
-
+
print(checks)
print(checks_result)
if(checks_result) return() # Stop the analysis if checks fail
#########
-
-
+
+
#populate preview_data
preview_vcf <- read.vcfR(input$updog_rdata$datapath, verbose = FALSE, nrows = 1)
-
+
#Get names
advanced_options$sample_list <- names(data.frame(preview_vcf@gt, check.names=FALSE)[,-1])
-
+
rm(preview_vcf)
})
-
+
#UI popup window for input
observeEvent(input$advanced_options, {
showModal(modalDialog(
@@ -266,9 +266,9 @@ mod_Filtering_server <- function(input, output, session, parent_session){
)
))
})
-
-
-
+
+
+
#Close popup window when user "saves options"
observeEvent(input$save_advanced_options, {
#Only close the window if one of the options has been selected
@@ -278,10 +278,10 @@ mod_Filtering_server <- function(input, output, session, parent_session){
advanced_options$remove_list <- input$remove_list
advanced_options$remove_file <- input$remove_file
# Save other inputs as needed
-
+
removeModal()
}
-
+
})
#vcf
@@ -391,7 +391,7 @@ mod_Filtering_server <- function(input, output, session, parent_session){
animation = TRUE
)
}
-
+
if (input$use_updog & updog_par) {
# Use Updog filtering parameters
OD_filter <- as.numeric(input$OD_filter)
@@ -444,17 +444,17 @@ mod_Filtering_server <- function(input, output, session, parent_session){
filtering_files$raw_sample_miss_df <- as.numeric(colMeans(is.na(gt_matrix))) #Sample missing values
rm(gt_matrix) #Remove gt matrix
-
+
#Remove the samples if any are manually selected from advanced options
if (!is.null(advanced_options$remove_list)) {
advanced_options$remove_file <- NULL #Prioritize manually selected samples if a file was also uploaded (add a user warning if both are uploaded in model)
-
+
vcf_temp <- subset_vcf(vcf, remove.sample.list = advanced_options$remove_list)
vcf <- vcf_temp[[1]]
removed_samples <- vcf_temp[[2]]
rm(vcf_temp)
} else if (!is.null(advanced_options$remove_file)) {
-
+
#Remove the samples
vcf_temp <- subset_vcf(vcf, remove.sample.file = advanced_options$remove_file$datapath)
vcf <- vcf_temp[[1]]
@@ -525,7 +525,7 @@ mod_Filtering_server <- function(input, output, session, parent_session){
sample_removed <- length(starting_samples) - length(final_samples)
removed_names <- setdiff(starting_samples, final_samples)
filtering_files$removed_names <- removed_names
-
+
# Define the download handler
output$download_removed_samples <- downloadHandler(
filename = function() {
@@ -537,7 +537,7 @@ mod_Filtering_server <- function(input, output, session, parent_session){
}
}
)
-
+
if (sample_removed > 0 && removed_samples == 0) {
showModal(modalDialog(
title = "Samples Filtered",
@@ -1001,7 +1001,7 @@ mod_Filtering_server <- function(input, output, session, parent_session){
writeLines(paste(capture.output(filtering_summary_info()), collapse = "\n"), file)
}
)
-
+
}
## To be copied in the UI
diff --git a/R/mod_GS.R b/R/mod_GS.R
index 7367186..dcc3b29 100644
--- a/R/mod_GS.R
+++ b/R/mod_GS.R
@@ -140,7 +140,7 @@ mod_GS_ui <- function(id) {
#' @noRd
mod_GS_server <- function(input, output, session, parent_session) {
ns <- session$ns
-
+
# Help links
observeEvent(input$goPar, {
# change to help tab
@@ -148,7 +148,7 @@ mod_GS_server <- function(input, output, session, parent_session) {
session = parent_session, inputId = "MainMenu",
selected = "help"
)
-
+
# select specific tab
updateTabsetPanel(
session = parent_session, inputId = "Genomic_Prediction_tabset",
@@ -157,14 +157,14 @@ mod_GS_server <- function(input, output, session, parent_session) {
# expand specific box
updateBox(id = "Genomic_Prediction_box", action = "toggle", session = parent_session)
})
-
+
observeEvent(input$goRes, {
# change to help tab
updatebs4TabItems(
session = parent_session, inputId = "MainMenu",
selected = "help"
)
-
+
# select specific tab
updateTabsetPanel(
session = parent_session, inputId = "Genomic_Prediction_tabset",
@@ -173,14 +173,14 @@ mod_GS_server <- function(input, output, session, parent_session) {
# expand specific box
updateBox(id = "Genomic_Prediction_box", action = "toggle", session = parent_session)
})
-
+
observeEvent(input$goCite, {
# change to help tab
updatebs4TabItems(
session = parent_session, inputId = "MainMenu",
selected = "help"
)
-
+
# select specific tab
updateTabsetPanel(
session = parent_session, inputId = "Genomic_Prediction_tabset",
@@ -189,7 +189,7 @@ mod_GS_server <- function(input, output, session, parent_session) {
# expand specific box
updateBox(id = "Genomic_Prediction_box", action = "toggle", session = parent_session)
})
-
+
# Default model choices
advanced_options_pred <- reactiveValues(
pred_model = "GBLUP",
@@ -197,14 +197,14 @@ mod_GS_server <- function(input, output, session, parent_session) {
pred_est_file = NULL,
ped_file = NULL
)
-
+
pred_outputs <- reactiveValues(
corr_output = NULL,
comb_output = NULL,
all_GEBVs = NULL,
avg_GEBVs = NULL
)
-
+
# List the ped file name if previously uploaded
output$uploaded_file_name <- renderText({
if (!is.null(advanced_options_pred$ped_file)) {
@@ -213,7 +213,7 @@ mod_GS_server <- function(input, output, session, parent_session) {
"" # Return an empty string if no file has been uploaded
}
})
-
+
output$uploaded_file_name_pred <- renderText({
if (!is.null(advanced_options_pred$pred_est_file)) {
paste("Previously uploaded file:", advanced_options_pred$pred_est_file$name)
@@ -221,9 +221,9 @@ mod_GS_server <- function(input, output, session, parent_session) {
"" # Return an empty string if no file has been uploaded
}
})
-
+
## Trait file modal window
-
+
# Default choices
trait_options <- reactiveValues(
missing_data = "NA",
@@ -231,14 +231,14 @@ mod_GS_server <- function(input, output, session, parent_session) {
sample_column = NULL,
file_type = NULL
)
-
+
# UI popup window for input
observeEvent(input$pred_trait_file, {
req(input$pred_trait_file)
# Get the column names of the csv file
info_df <- read.csv(input$pred_trait_file$datapath, header = TRUE, check.names = FALSE, nrows = 2)
info_df[, 1] <- as.character(info_df[, 1]) # Makes sure that the sample names are characters instead of numeric
-
+
# Read first 5 rows for preview
preview_data <- tryCatch(
{
@@ -248,7 +248,7 @@ mod_GS_server <- function(input, output, session, parent_session) {
NULL
}
)
-
+
showModal(modalDialog(
title = "Trait File Options",
size = "l",
@@ -301,14 +301,14 @@ mod_GS_server <- function(input, output, session, parent_session) {
actionButton(ns("save_trait_options"), "Save")
)
))
-
+
# Render the preview table
output$file_preview <- renderTable({
req(preview_data)
preview_data
})
})
-
+
output$custom_missing_msg <- renderText({
if (input$missing_data == "Custom" && nchar(input$custom_missing) == 0) {
"Please enter a custom missing value."
@@ -316,8 +316,8 @@ mod_GS_server <- function(input, output, session, parent_session) {
""
}
})
-
-
+
+
# Close popup window when user "saves options"
observeEvent(input$save_trait_options, {
trait_options$missing_data <- input$missing_data
@@ -325,7 +325,7 @@ mod_GS_server <- function(input, output, session, parent_session) {
trait_options$sample_column <- input$sample_column
# trait_options$file_type
# Save other inputs as needed
-
+
if (input$missing_data == "Custom" && nchar(input$custom_missing) == 0) {
# Validation failed: display warning and prevent modal closure
showNotification(
@@ -335,10 +335,10 @@ mod_GS_server <- function(input, output, session, parent_session) {
)
return() # Stop further execution and keep the modal open
}
-
+
removeModal() # Close the modal after saving
})
-
+
# UI popup window for input
observeEvent(input$advanced_options_pred, {
showModal(modalDialog(
@@ -387,9 +387,7 @@ mod_GS_server <- function(input, output, session, parent_session) {
)
))
})
-
-
-
+
# Close popup window when user "saves options"
observeEvent(input$save_advanced_options_pred, {
advanced_options_pred$pred_model <- input$pred_model
@@ -397,17 +395,17 @@ mod_GS_server <- function(input, output, session, parent_session) {
advanced_options_pred$pred_est_file <- input$pred_est_file
advanced_options_pred$ped_file <- input$ped_file
# Save other inputs as needed
-
+
removeModal() # Close the modal after saving
})
-
+
### Genomic Prediction
# This tab involved 3 observeEvents
# 1) to get the traits listed in the phenotype file
# 2) to input and validate the input files
# 3) to perform the genomic prediction
-
-
+
+
# 1) Get traits
observeEvent(input$save_trait_options, {
info_df2 <- read.csv(input$pred_trait_file$datapath, header = TRUE, check.names = FALSE, nrow = 0)
@@ -416,11 +414,11 @@ mod_GS_server <- function(input, output, session, parent_session) {
updateVirtualSelect("pred_fixed_info2", choices = trait_var2, session = session)
updateVirtualSelect("pred_trait_info2", choices = trait_var2, session = session)
})
-
+
observeEvent(input$pred_fixed_info2, {
updateVirtualSelect("pred_fixed_cat2", choices = input$pred_fixed_info2, session = session)
})
-
+
# 2) Error check for prediction and save input files
continue_prediction2 <- reactiveVal(NULL)
pred_inputs2 <- reactiveValues(
@@ -432,7 +430,7 @@ mod_GS_server <- function(input, output, session, parent_session) {
pred_geno_pheno = NULL,
matrix = NULL
)
-
+
pred_outputs2 <- reactiveValues(
corr_output = NULL,
box_plot = NULL,
@@ -443,7 +441,7 @@ mod_GS_server <- function(input, output, session, parent_session) {
colors = NULL,
trait_output = NULL
)
-
+
# Reactive boxes
output$shared_snps <- renderValueBox({
valueBox(
@@ -453,12 +451,12 @@ mod_GS_server <- function(input, output, session, parent_session) {
color = "info"
)
})
-
+
observeEvent(input$prediction_est_start, {
# req(pred_inputs$pheno_input, pred_inputs$geno_input)
-
+
toggleClass(id = "pred_est_ploidy", class = "borderred", condition = (is.na(input$pred_est_ploidy) | is.null(input$pred_est_ploidy)))
-
+
if (is.null(input$pred_known_file$datapath) | is.null(input$pred_trait_file$datapath)) {
shinyalert(
title = "Missing input!",
@@ -476,16 +474,16 @@ mod_GS_server <- function(input, output, session, parent_session) {
)
}
req(input$pred_known_file$datapath, input$pred_trait_file$datapath, input$pred_est_ploidy)
-
+
# Status
updateProgressBar(session = session, id = "pb_gp", value = 5, title = "Checking input files")
-
+
# Variables
ploidy <- as.numeric(input$pred_est_ploidy)
train_geno_path <- input$pred_known_file$datapath
est_geno_path <- input$pred_est_file$datapath
traits <- input$pred_trait_info2
-
+
# Phenotypic variables
if (trait_options$missing_data == "(blank)") {
pheno2 <- read.csv(input$pred_trait_file$datapath, header = TRUE, check.names = FALSE, na.strings = "")
@@ -494,11 +492,11 @@ mod_GS_server <- function(input, output, session, parent_session) {
} else {
pheno2 <- read.csv(input$pred_trait_file$datapath, header = TRUE, check.names = FALSE, na.strings = trait_options$missing_data)
}
-
+
# Make the sample ID column the first column in the dataframe
sample_col_name <- input$sample_column
pheno2 <- pheno2[, c(sample_col_name, setdiff(names(pheno2), sample_col_name))]
-
+
# Add row names and catch errors
tryCatch(
{
@@ -521,12 +519,12 @@ mod_GS_server <- function(input, output, session, parent_session) {
return(NULL) # Return NULL to prevent further processing
}
)
-
+
# Assigning the IDs based on user input for column 1
ids_pheno <- pheno2[, 1]
-
-
-
+
+
+
# Make sure at least one trait was input
if (length(traits) == 0) {
# If condition is met, show notification toast
@@ -545,23 +543,23 @@ mod_GS_server <- function(input, output, session, parent_session) {
imageUrl = "",
animation = TRUE,
)
-
-
+
+
# Stop the observeEvent gracefully
return()
}
-
-
+
+
# Getting genotype matrix
-
+
# Geno file path
file_path <- train_geno_path
-
+
# Geno.file conversion if needed
if (grepl("\\.csv$", file_path)) {
train_geno <- read.csv(train_geno_path, header = TRUE, row.names = 1, check.names = FALSE)
est_geno <- read.csv(est_geno_path, header = TRUE, row.names = 1, check.names = FALSE)
-
+
# Save number of SNPs
# pred_inputs$pred_snps <- nrow(geno)
} else if (grepl("\\.vcf$", file_path) || grepl("\\.gz$", file_path)) {
@@ -578,32 +576,32 @@ mod_GS_server <- function(input, output, session, parent_session) {
}
})
}
-
+
#### VCF sanity check
checks <- vcf_sanity_check(train_geno_path)
-
+
error_if_false <- c(
"VCF_header", "VCF_columns", "unique_FORMAT", "GT",
"samples", "VCF_compressed"
)
-
+
error_if_true <- c(
- "multiallelics", "phased_GT", "mixed_ploidies",
+ "multiallelics", "mixed_ploidies",
"duplicated_samples", "duplicated_markers"
)
-
+
warning_if_false <- c("chrom_info", "pos_info", "ref_alt","max_markers")
- checks_result <- vcf_sanity_messages(checks,
- error_if_false,
- error_if_true,
- warning_if_false = warning_if_false,
+ checks_result <- vcf_sanity_messages(checks,
+ error_if_false,
+ error_if_true,
+ warning_if_false = warning_if_false,
warning_if_true = NULL,
input_ploidy = as.numeric(input$pred_est_ploidy))
-
+
if(checks_result) return() # Stop the analysis if checks fail
#########
-
+
# Convert VCF file if submitted
train_vcf <- vcfR::read.vcfR(train_geno_path, verbose = FALSE)
if (is.null(est_geno_path) || is.na(est_geno_path)) {
@@ -611,21 +609,21 @@ mod_GS_server <- function(input, output, session, parent_session) {
} else {
## Check VCF
checks <- vcf_sanity_check(est_geno_path)
- checks_result <- vcf_sanity_messages(checks,
- error_if_false,
- error_if_true,
- warning_if_false = warning_if_false,
+ checks_result <- vcf_sanity_messages(checks,
+ error_if_false,
+ error_if_true,
+ warning_if_false = warning_if_false,
warning_if_true = NULL,
as.numeric(input$pred_est_ploidy))
-
+
if(checks_result) return() # Stop the analysis if checks fail
-
+
est_vcf <- vcfR::read.vcfR(est_geno_path, verbose = FALSE)
}
-
+
# Get number of SNPs
# pred_inputs$pred_snps <- nrow(vcf)
-
+
# Extract GT
if (is.null(est_vcf)) {
est_geno <- NULL
@@ -656,11 +654,11 @@ mod_GS_server <- function(input, output, session, parent_session) {
imageUrl = "",
animation = TRUE,
)
-
+
# Stop the analysis
return()
}
-
+
# Check that the ploidy entered is correct
if (ploidy != max(train_geno, na.rm = TRUE)) {
# If condition is met, show notification toast
@@ -681,13 +679,13 @@ mod_GS_server <- function(input, output, session, parent_session) {
imageUrl = "",
animation = TRUE
)
-
-
+
+
# Stop the observeEvent gracefully
# return()
}
-
-
+
+
# Function to convert genotype matrix according to ploidy
#convert_genotype <- function(genotype_matrix, ploidy) {
@@ -705,7 +703,7 @@ mod_GS_server <- function(input, output, session, parent_session) {
#est_geno_adj_init <- convert_genotype(est_geno, as.numeric(ploidy))
est_geno_adj_init <- est_geno
}
-
+
# Make sure the trait file and genotype file are in the same order
# Column names for geno (assuming these are the individual IDs)
colnames_geno <- colnames(train_geno)
@@ -715,7 +713,7 @@ mod_GS_server <- function(input, output, session, parent_session) {
common_ids <- intersect(colnames_geno, ids_pheno)
# Get number of id
pred_inputs2$pred_geno_pheno <- length(common_ids)
-
+
# Throw an error if there are less matching samples in the phenotype file than the genotype file
if (length(common_ids) == 0) {
# If condition is met, show notification toast
@@ -734,8 +732,8 @@ mod_GS_server <- function(input, output, session, parent_session) {
imageUrl = "",
animation = TRUE,
)
-
-
+
+
# Stop the observeEvent gracefully
return()
} else if (length(common_ids) < length(colnames_geno)) {
@@ -757,15 +755,15 @@ mod_GS_server <- function(input, output, session, parent_session) {
imageUrl = "",
animation = TRUE
)
-
-
+
+
# Stop the observeEvent gracefully
# return()
}
-
-
-
-
+
+
+
+
# Final check before performing analyses
shinyalert(
title = "Ready?",
@@ -793,15 +791,15 @@ mod_GS_server <- function(input, output, session, parent_session) {
}
}
)
-
+
# Status
updateProgressBar(session = session, id = "pb_gp", value = 40, title = "Generating Matrices")
-
+
# Create relationship matrix depending on the input VCF files
if (is.null(advanced_options_pred$pred_est_file)) {
# Subset phenotype file by common IDs
pheno2 <- pheno2[common_ids, ]
-
+
#Matrix
#Kin_mat <- A.mat(t(train_geno_adj_init), max.missing=NULL,impute.method="mean",return.imputed=FALSE)
Kin_mat <- Gmatrix(t(train_geno_adj_init),
@@ -814,7 +812,7 @@ mod_GS_server <- function(input, output, session, parent_session) {
} else{
#Subset phenotype file and training file by common IDs
pheno2 <- pheno2[common_ids, ]
-
+
# Match training and testing genotype file SNPs
common_markers <- intersect(rownames(train_geno_adj_init), rownames(est_geno_adj_init))
# first remove samples from training genotype matrix that are not in the phenotype file
@@ -833,22 +831,22 @@ mod_GS_server <- function(input, output, session, parent_session) {
missingValue = "NA")
}
-
+
# Save to reactive values
# pred_inputs2$shared_snps <- length(common_markers)
pred_inputs2$matrix <- Kin_mat
pred_inputs2$pheno_input <- pheno2
})
-
+
# 3) Analysis only proceeds once continue_prediction is converted to TRUE
observe({
req(continue_prediction2(), pred_inputs2$pheno_input, pred_inputs2$matrix)
-
+
# Stop analysis if cancel was selected
if (isFALSE(continue_prediction2())) {
return()
}
-
+
# Variables
ploidy <- as.numeric(input$pred_est_ploidy)
gmat <- pred_inputs2$matrix
@@ -872,16 +870,16 @@ mod_GS_server <- function(input, output, session, parent_session) {
cores <- 1
total_population <- ncol(gmat)
# train_size <- floor(percentage / 100 * total_population)
-
+
# Status
updateProgressBar(session = session, id = "pb_gp", value = 90, title = "Generating Results")
-
+
# initialize dataframe
results_GEBVs <- matrix(nrow = ncol(gmat), ncol = length(traits) + 1)
results_TRAITs <- matrix(nrow = ncol(gmat), ncol = length(traits) + 1)
colnames(results_TRAITs) <- c("Sample", paste0(traits)) # Set the column names to be the traits
colnames(results_GEBVs) <- c("Sample", paste0(traits)) # Set the column names to be the traits
-
+
# GBLUP for each trait
for (trait_idx in 1:length(traits)) {
traitpred <- kin.blup(
@@ -893,24 +891,24 @@ mod_GS_server <- function(input, output, session, parent_session) {
K = gmat,
n.core = cores
)
-
+
results_GEBVs[, (trait_idx + 1)] <- traitpred$g
results_TRAITs[, (trait_idx + 1)] <- traitpred$pred
}
# Organize dataframes
results_GEBVs[, 1] <- row.names(data.frame(traitpred$g))
results_TRAITs[, 1] <- row.names(data.frame(traitpred$pred))
-
+
# Label the samples that already had phenotype information
results_GEBVs <- data.frame(results_GEBVs)
results_TRAITs <- data.frame(results_TRAITs)
exists_in_df <- results_GEBVs[[1]] %in% pheno2[[1]]
results_GEBVs <- cbind(results_GEBVs[1], "w/Pheno" = exists_in_df, results_GEBVs[-1])
results_TRAITs <- cbind(results_TRAITs[1], "w/Pheno" = exists_in_df, results_TRAITs[-1])
-
+
# Status
updateProgressBar(session = session, id = "pb_gp", value = 100, title = "Finished!")
-
+
## Make output tables depending on 1 or 2 VCF/pedigree files used.
# GEBVs
if (!is.null(advanced_options_pred$pred_est_file)) {
@@ -919,7 +917,7 @@ mod_GS_server <- function(input, output, session, parent_session) {
} else {
pred_outputs2$all_GEBVs <- results_GEBVs
}
-
+
# BLUPs of genetic values
if (!is.null(advanced_options_pred$pred_est_file)) {
# Subset rows where 'w/Pheno' is FALSE and drop the 'w/Pheno' column
@@ -927,11 +925,11 @@ mod_GS_server <- function(input, output, session, parent_session) {
} else {
pred_outputs2$trait_output <- results_TRAITs
}
-
+
# End the event
continue_prediction2(NULL)
})
-
+
# Output the prediction tables
all_GEBVs <- reactive({
validate(
@@ -939,7 +937,7 @@ mod_GS_server <- function(input, output, session, parent_session) {
)
pred_outputs2$all_GEBVs
})
-
+
# GEBVs from all iterations/folds
output$pred_gebvs_table2 <- renderDT(
{
@@ -947,14 +945,14 @@ mod_GS_server <- function(input, output, session, parent_session) {
},
options = list(scrollX = TRUE, autoWidth = FALSE, pageLength = 5)
)
-
+
trait_output <- reactive({
validate(
need(!is.null(pred_outputs2$trait_output), "Upload the input files, set the parameters and click 'run analysis' to access results in this session.")
)
pred_outputs2$trait_output
})
-
+
# GEBVs from all iterations/folds
output$pred_trait_table <- renderDT(
{
@@ -962,7 +960,7 @@ mod_GS_server <- function(input, output, session, parent_session) {
},
options = list(scrollX = TRUE, autoWidth = FALSE, pageLength = 5)
)
-
+
output$download_vcft <- downloadHandler(
filename = function() {
paste0("BIGapp_Training_VCF_Example_file.vcf.gz")
@@ -972,7 +970,7 @@ mod_GS_server <- function(input, output, session, parent_session) {
file.copy(ex, file)
}
)
-
+
output$download_vcfp <- downloadHandler(
filename = function() {
paste0("BIGapp_Predict_VCF_Example_file.vcf")
@@ -982,7 +980,7 @@ mod_GS_server <- function(input, output, session, parent_session) {
file.copy(ex, file)
}
)
-
+
output$download_pheno <- downloadHandler(
filename = function() {
paste0("BIGapp_passport_Example_file.csv")
@@ -992,7 +990,7 @@ mod_GS_server <- function(input, output, session, parent_session) {
file.copy(ex, file)
}
)
-
+
# Download files for GP
output$download_pred_results_file <- downloadHandler(
filename = function() {
@@ -1002,26 +1000,26 @@ mod_GS_server <- function(input, output, session, parent_session) {
# Temporary files list
temp_dir <- tempdir()
temp_files <- c()
-
+
# Create a temporary file for data frames
ebv_file <- file.path(temp_dir, paste0("GS-EBV-predictions-", Sys.Date(), ".csv"))
write.csv(pred_outputs2$all_GEBVs, ebv_file, row.names = FALSE)
temp_files <- c(temp_files, ebv_file)
-
+
trait_file <- file.path(temp_dir, paste0("GS-Phenotype-predictions-", Sys.Date(), ".csv"))
write.csv(pred_outputs2$trait_output, trait_file, row.names = FALSE)
temp_files <- c(temp_files, trait_file)
-
+
# Zip files only if there's something to zip
if (length(temp_files) > 0) {
zip(file, files = temp_files, extras = "-j") # Using -j to junk paths
}
-
+
# Optionally clean up
file.remove(temp_files)
}
)
-
+
## Summary Info
pred_summary_info <- function() {
# Handle possible NULL values for inputs
@@ -1029,7 +1027,7 @@ mod_GS_server <- function(input, output, session, parent_session) {
est_file_name <- if (!is.null(input$pred_est_file$name)) input$pred_est_file$name else "No file selected"
passport_file_name <- if (!is.null(input$pred_trait_file$name)) input$pred_trait_file$name else "No file selected"
selected_ploidy <- if (!is.null(input$pred_est_ploidy)) as.character(input$pred_est_ploidy) else "Not selected"
-
+
# Print the summary information
cat(
"BIGapp Selection Summary\n",
@@ -1063,7 +1061,7 @@ mod_GS_server <- function(input, output, session, parent_session) {
sep = ""
)
}
-
+
# Popup for analysis summary
observeEvent(input$pred_summary, {
showModal(modalDialog(
@@ -1079,8 +1077,8 @@ mod_GS_server <- function(input, output, session, parent_session) {
)
))
})
-
-
+
+
# Download Summary Info
output$download_pred_info <- downloadHandler(
filename = function() {
diff --git a/R/mod_GSAcc.R b/R/mod_GSAcc.R
index df9ab30..da246c2 100644
--- a/R/mod_GSAcc.R
+++ b/R/mod_GSAcc.R
@@ -257,9 +257,9 @@ mod_GSAcc_server <- function(input, output, session, parent_session){
removeModal() # Close the modal after saving
})
-
+
## Trait file modal window
-
+
#Default choices
trait_options <- reactiveValues(
missing_data = "NA",
@@ -267,25 +267,25 @@ mod_GSAcc_server <- function(input, output, session, parent_session){
sample_column = NULL,
file_type = NULL
)
-
+
#UI popup window for input
observeEvent(input$trait_file, {
req(input$trait_file)
#Get the column names of the csv file
info_df <- read.csv(input$trait_file$datapath, header = TRUE, check.names = FALSE, nrows=2)
info_df[,1] <- as.character(info_df[,1]) #Makes sure that the sample names are characters instead of numeric
-
+
# Read first 5 rows for preview
preview_data <- tryCatch({
head(read.csv(input$trait_file$datapath, nrows = 5, na.strings=trait_options$missing_data),5)
}, error = function(e) {
NULL
})
-
+
showModal(modalDialog(
title = "Trait File Options",
size= "l",
-
+
selectInput(
inputId = ns('missing_data'),
label = 'Missing Data Value',
@@ -312,7 +312,7 @@ mod_GSAcc_server <- function(input, output, session, parent_session){
label = 'Sample ID Column',
choices = colnames(info_df)
),
-
+
if (!is.null(preview_data)) {
div(
h4(
@@ -332,20 +332,20 @@ mod_GSAcc_server <- function(input, output, session, parent_session){
p("Could not load file preview.")
)
},
-
+
footer = tagList(
actionButton(ns("save_trait_options"), "Save")
)
))
-
+
# Render the preview table
output$file_preview <- renderTable({
req(preview_data)
preview_data
})
-
+
})
-
+
output$custom_missing_msg <- renderText({
if (input$missing_data == "Custom" && nchar(input$custom_missing) == 0) {
"Please enter a custom missing value."
@@ -353,8 +353,8 @@ mod_GSAcc_server <- function(input, output, session, parent_session){
""
}
})
-
-
+
+
#Close popup window when user "saves options"
observeEvent(input$save_trait_options, {
trait_options$missing_data <- input$missing_data
@@ -362,7 +362,7 @@ mod_GSAcc_server <- function(input, output, session, parent_session){
trait_options$sample_column <- input$sample_column
#trait_options$file_type
# Save other inputs as needed
-
+
if (input$missing_data == "Custom" && nchar(input$custom_missing) == 0) {
# Validation failed: display warning and prevent modal closure
showNotification(
@@ -372,7 +372,7 @@ mod_GSAcc_server <- function(input, output, session, parent_session){
)
return() # Stop further execution and keep the modal open
}
-
+
removeModal() # Close the modal after saving
})
@@ -428,35 +428,35 @@ mod_GSAcc_server <- function(input, output, session, parent_session){
pred_inputs <- eventReactive(input$prediction_start,{
toggleClass(id = "pred_ploidy", class = "borderred", condition = (is.na(input$pred_ploidy) | is.null(input$pred_ploidy)))
-
+
if (advanced_options$pred_matrix != "Amatrix"){
#### VCF sanity check
checks <- vcf_sanity_check(input$pred_file$datapath)
-
+
error_if_false <- c(
"VCF_header", "VCF_columns", "unique_FORMAT", "GT",
"samples", "VCF_compressed"
)
-
+
error_if_true <- c(
- "multiallelics", "phased_GT", "mixed_ploidies",
+ "multiallelics", "mixed_ploidies",
"duplicated_samples", "duplicated_markers"
)
-
+
warning_if_false <- c("chrom_info", "pos_info", "ref_alt","max_markers")
-
- checks_result <- vcf_sanity_messages(checks,
- error_if_false,
- error_if_true,
- warning_if_false = warning_if_false,
+
+ checks_result <- vcf_sanity_messages(checks,
+ error_if_false,
+ error_if_true,
+ warning_if_false = warning_if_false,
warning_if_true = NULL,
input_ploidy = as.numeric(input$pred_ploidy))
-
+
if(checks_result) return() # Stop the analysis if checks fail
}
#########
-
-
+
+
if(is.null(advanced_options$pred_matrix)) advanced_options$pred_matrix <- "none_selected"
if (((is.null(input$pred_file$datapath) & advanced_options$pred_matrix != "Amatrix") |
(is.null(advanced_options$ped_file$datapath) & advanced_options$pred_matrix == "Amatrix")) |
@@ -489,11 +489,11 @@ mod_GSAcc_server <- function(input, output, session, parent_session){
} else {
pheno <- read.csv(input$trait_file$datapath, header = TRUE, check.names = FALSE, na.strings = trait_options$missing_data)
}
-
+
# Make the sample ID column the first column in the dataframe
sample_col_name <- input$sample_column
pheno <- pheno[, c(sample_col_name, setdiff(names(pheno), sample_col_name))]
-
+
# Add row names and catch errors
tryCatch({
row.names(pheno) <- pheno[, 1]
@@ -512,7 +512,7 @@ mod_GSAcc_server <- function(input, output, session, parent_session){
)
return(NULL) # Return NULL to prevent further processing
})
-
+
# Assigning the IDs based on user input for column 1
ids_pheno <- pheno[, 1]
@@ -810,36 +810,36 @@ mod_GSAcc_server <- function(input, output, session, parent_session){
#pred_outputs
})
-
+
#Output the prediction tables
-
+
all_GEBVs <- reactive({
validate(
need(!is.null(pred_outputs$all_GEBVs), "Upload the input files, set the parameters and click 'run analysis' to access results in this session.")
)
pred_outputs$comb_output
})
-
+
output$pred_all_table <- renderDT({all_GEBVs()}, options = list(scrollX = TRUE,autoWidth = FALSE, pageLength = 5))
-
+
comb_output <- reactive({
validate(
need(!is.null(pred_outputs$comb_output), "Upload the input files, set the parameters and click 'run analysis' to access results in this session.")
)
-
+
#Adding the mean values to df
prepare_df <- round(pred_outputs$comb_output, 4)
trait_means <- round(colMeans(prepare_df), 4)
new_row <- c("Mean", trait_means[-1])
new_df <- rbind(prepare_df, new_row)
-
+
#exporting
new_df
-
+
})
-
+
output$pred_acc_table <- renderDT({comb_output()}, options = list(scrollX = TRUE,autoWidth = FALSE, pageLength = 5))
-
+
##Plots
plots <- reactive({
@@ -859,13 +859,13 @@ mod_GSAcc_server <- function(input, output, session, parent_session){
names_to = "Trait",
values_to = "Correlation"
)
-
+
# Determine dynamic y-axis lower bound based on the entire dataset
min_val <- min(df_long$Correlation, na.rm = TRUE)
y_axis_lower_bound <- if (min_val < 0) {
# If there are negative values, start the axis slightly below the minimum
# You can adjust the floor() logic for more/less padding, or just use min_val
- floor(min_val * 10) / 10
+ floor(min_val * 10) / 10
} else {
0
}
diff --git a/R/mod_PCA.R b/R/mod_PCA.R
index 3fc1c9d..87e50e9 100644
--- a/R/mod_PCA.R
+++ b/R/mod_PCA.R
@@ -168,7 +168,7 @@ mod_PCA_server <- function(input, output, session, parent_session){
# expand specific box
updateBox(id = "PCA_box", action = "toggle", session = parent_session)
})
-
+
#Default choices
trait_options <- reactiveValues(
missing_data = "NA",
@@ -176,7 +176,7 @@ mod_PCA_server <- function(input, output, session, parent_session){
sample_column = NULL,
file_type = NULL
)
-
+
# Update dropdown menu choices based on uploaded passport file
passport_table <- reactive({
validate(
@@ -184,26 +184,26 @@ mod_PCA_server <- function(input, output, session, parent_session){
)
info_df <- read.csv(input$passport_file$datapath, header = TRUE, check.names = FALSE, na.strings=trait_options$missing_data)
info_df[,1] <- as.character(info_df[,1]) #Makes sure that the sample names are characters instead of numeric
-
+
updateSelectInput(session, "group_info", choices = colnames(info_df))
info_df
})
-
+
#PCA specific category selection
observeEvent(passport_table(), {
#updateMaterialSwitch(session, inputId = "use_cat", status = "success")
-
+
# Get selected column name
selected_col <- input$group_info
-
+
# Extract unique values from the selected column
unique_values <- unique(passport_table()[[selected_col]])
-
+
#Add category selection
updateVirtualSelect("cat_color", choices = unique_values, selected = unique_values, session = session)
-
+
})
-
+
# PCA specific category selection. Update when group_info is changed.
observeEvent(input$group_info, {
req(passport_table()) # Ensure passport_table is available
@@ -211,25 +211,25 @@ mod_PCA_server <- function(input, output, session, parent_session){
unique_values <- unique(passport_table()[[selected_col]])
updateVirtualSelect("cat_color", choices = unique_values, selected = unique_values, session = session)
})
-
+
#UI popup window for input
observeEvent(input$passport_file, {
req(input$passport_file)
#Get the column names of the csv file
info_df <- read.csv(input$passport_file$datapath, header = TRUE, check.names = FALSE, nrows=2)
info_df[,1] <- as.character(info_df[,1]) #Makes sure that the sample names are characters instead of numeric
-
+
# Read first 5 rows for preview
preview_data <- tryCatch({
head(read.csv(input$passport_file$datapath, nrows = 5, na.strings=trait_options$missing_data),5)
}, error = function(e) {
NULL
})
-
+
showModal(modalDialog(
title = "Trait File Options",
size= "l",
-
+
selectInput(
inputId = ns('missing_data'),
label = 'Missing Data Value',
@@ -256,7 +256,7 @@ mod_PCA_server <- function(input, output, session, parent_session){
label = 'Sample ID Column',
choices = colnames(info_df)
),
-
+
if (!is.null(preview_data)) {
div(
h4(
@@ -276,20 +276,20 @@ mod_PCA_server <- function(input, output, session, parent_session){
p("Could not load file preview.")
)
},
-
+
footer = tagList(
actionButton(ns("save_trait_options"), "Save")
)
))
-
+
# Render the preview table
output$file_preview <- renderTable({
req(preview_data)
preview_data
})
-
+
})
-
+
output$custom_missing_msg <- renderText({
if (input$missing_data == "Custom" && nchar(input$custom_missing) == 0) {
"Please enter a custom missing value."
@@ -297,8 +297,8 @@ mod_PCA_server <- function(input, output, session, parent_session){
""
}
})
-
-
+
+
#Close popup window when user "saves options"
observeEvent(input$save_trait_options, {
trait_options$missing_data <- input$missing_data
@@ -306,7 +306,7 @@ mod_PCA_server <- function(input, output, session, parent_session){
trait_options$sample_column <- input$sample_column
#trait_options$file_type
# Save other inputs as needed
-
+
if (input$missing_data == "Custom" && nchar(input$custom_missing) == 0) {
# Validation failed: display warning and prevent modal closure
showNotification(
@@ -316,7 +316,7 @@ mod_PCA_server <- function(input, output, session, parent_session){
)
return() # Stop further execution and keep the modal open
}
-
+
removeModal() # Close the modal after saving
})
@@ -366,29 +366,29 @@ mod_PCA_server <- function(input, output, session, parent_session){
#### VCF sanity check
checks <- vcf_sanity_check(geno)
-
+
error_if_false <- c(
"VCF_header", "VCF_columns", "unique_FORMAT", "GT",
"samples", "VCF_compressed"
)
-
+
error_if_true <- c(
- "multiallelics", "phased_GT", "mixed_ploidies",
+ "multiallelics", "mixed_ploidies",
"duplicated_samples", "duplicated_markers"
)
-
+
warning_if_false <- c("chrom_info", "pos_info", "ref_alt", "max_markers")
-
- checks_result <- vcf_sanity_messages(checks,
- error_if_false,
- error_if_true,
- warning_if_false = warning_if_false,
+
+ checks_result <- vcf_sanity_messages(checks,
+ error_if_false,
+ error_if_true,
+ warning_if_false = warning_if_false,
warning_if_true = NULL,
input_ploidy = as.numeric(ploidy))
-
+
if(checks_result) return() # Stop the analysis if checks fail
#########
-
+
#Import genotype information if in VCF format
vcf <- read.vcfR(geno, verbose = FALSE)
@@ -412,7 +412,7 @@ mod_PCA_server <- function(input, output, session, parent_session){
}
#Start analysis following a genotype data check
-
+
if (ncol(genomat) < 5){
shinyalert(
title = "Small Genotype File",
@@ -441,7 +441,7 @@ mod_PCA_server <- function(input, output, session, parent_session){
} else {
info_df <- read.csv(input$passport_file$datapath, header = TRUE, check.names = FALSE, na.strings = trait_options$missing_data)
}
-
+
# Make the sample ID column the first column in the dataframe
sample_col_name <- input$sample_column
info_df <- info_df[, c(sample_col_name, setdiff(names(info_df), sample_col_name))]
@@ -537,17 +537,17 @@ mod_PCA_server <- function(input, output, session, parent_session){
validate(
need(!is.null(pca_data$pc_df_pop), "Input Genotype file, Species ploidy, and run the analysis to access results in this section.")
)
-
+
df_plot <- pca_data$pc_df_pop
group_var_name <- input$group_info
pc_x_col <- input$pc_X
pc_y_col <- input$pc_Y
-
+
# Define default aesthetics for unselected categories
label_to_value <- c("Light Grey" = "grey80", "Grey" = "grey60", "Dark Grey" = "grey40", "Black" = "black")
default_color <- label_to_value[[input$grey_choice]]
default_shape <- 16 # Solid circle (pch value)
-
+
# --- Palette Generation (local to this reactive, named) ---
local_my_palette <- NULL # This will be a named vector: category level -> color
if (group_var_name != "" && !is.null(group_var_name) && !is.null(input$color_choice)) {
@@ -556,18 +556,18 @@ mod_PCA_server <- function(input, output, session, parent_session){
shinyalert(title = "Error", text = paste("Group variable '", group_var_name, "' not found."), type = "error")
return(NULL)
}
-
+
unique_group_vals_for_plot <- unique(as.character(df_plot[[group_var_name]]))
num_levels_for_plot <- length(unique_group_vals_for_plot)
-
+
if (num_levels_for_plot > 0) {
max_brewer_colors <- tryCatch(
RColorBrewer::brewer.pal.info[input$color_choice, "maxcolors"],
error = function(e) 8 # Default max colors if palette info fails
)
-
+
n_base_colors <- num_levels_for_plot # Number of colors needed from brewer.pal directly or via ramp
-
+
# Adjust n for brewer.pal requirements (min 3 for most)
if (num_levels_for_plot == 1) {
base_palette_brewer <- RColorBrewer::brewer.pal(3, input$color_choice)[1]
@@ -576,32 +576,32 @@ mod_PCA_server <- function(input, output, session, parent_session){
} else {
base_palette_brewer <- RColorBrewer::brewer.pal(min(n_base_colors, max_brewer_colors), input$color_choice)
}
-
+
generated_palette_values <- grDevices::colorRampPalette(base_palette_brewer)(num_levels_for_plot)
local_my_palette <- setNames(generated_palette_values, unique_group_vals_for_plot)
}
}
-
+
# --- Base ggplot object ---
plot_obj <- ggplot(df_plot, aes(x = .data[[pc_x_col]], y = .data[[pc_y_col]]))
-
+
# --- Determine if color/shape are mapped to group or static ---
map_color_to_group <- group_var_name != "" && !is.null(group_var_name) && !is.null(local_my_palette)
map_shape_to_group <- map_color_to_group && input$use_shapes
-
+
# Arguments for geom_point (static values if not mapped)
geom_point_args <- list(size = 2.5, alpha = 0.8)
-
+
if (map_color_to_group) {
# Ensure group_var_name column is factor for consistent scale behavior
df_plot[[group_var_name]] <- as.factor(df_plot[[group_var_name]])
all_levels <- levels(df_plot[[group_var_name]])
-
+
plot_obj <- plot_obj + aes(color = as.factor(.data[[group_var_name]])) # Map color aesthetic
-
+
color_values_map <- setNames(rep(default_color, length(all_levels)), all_levels)
categories_to_colorize <- input$cat_color # User's selection from UI
-
+
if (input$use_cat && length(categories_to_colorize) > 0) {
for (cat_level in categories_to_colorize) {
if (cat_level %in% names(local_my_palette)) {
@@ -618,25 +618,25 @@ mod_PCA_server <- function(input, output, session, parent_session){
}
}
# If input$use_cat is TRUE but categories_to_colorize is empty, all remain default_color.
-
+
plot_obj <- plot_obj + scale_color_manual(
- name = group_var_name,
- values = color_values_map,
+ name = group_var_name,
+ values = color_values_map,
na.value = default_color # Handles explicit NAs in the grouping column
)
} else {
geom_point_args$color <- default_color # Set static color for all points
}
-
+
if (map_shape_to_group) {
df_plot[[group_var_name]] <- as.factor(df_plot[[group_var_name]]) # Ensure it's factor
all_levels <- levels(df_plot[[group_var_name]])
-
+
plot_obj <- plot_obj + aes(shape = .data[[group_var_name]]) # Map shape aesthetic
-
+
shape_values_map <- setNames(rep(default_shape, length(all_levels)), all_levels)
categories_to_shape <- input$cat_color # User's selection from UI for shaping
-
+
# Check for shinyalert for too many shapes (based on categories selected for shaping)
if (length(categories_to_shape) > 25) {
# The shinyalert is defined in your original code, ensure its condition matches this logic
@@ -650,23 +650,23 @@ mod_PCA_server <- function(input, output, session, parent_session){
type = "error",
showConfirmButton = TRUE,
confirmButtonText = "OK",
- confirmButtonCol = "#004192",
+ confirmButtonCol = "#004192",
showCancelButton = FALSE,
animation = TRUE
)
updateMaterialSwitch(session = session, inputId = "use_shapes", value = FALSE) # Use 'session' not 'parent_session' if in module
-
+
# Cap the number of categories getting distinct shapes
categories_to_shape_for_map <- categories_to_shape[1:25]
} else {
categories_to_shape_for_map <- categories_to_shape
}
-
+
available_custom_shapes <- c(0:24) # PCH values (0-14 are symbols, 15-20 are filled symbols, 21-24 are bordered)
# Standard practice often uses 1:25, let's stick to that.
available_custom_shapes <- c(1:25)
-
-
+
+
shape_idx <- 1
for (cat_level in categories_to_shape_for_map) {
if (cat_level %in% all_levels && shape_idx <= length(available_custom_shapes)) {
@@ -676,24 +676,24 @@ mod_PCA_server <- function(input, output, session, parent_session){
}
plot_obj <- plot_obj + scale_shape_manual(
name = group_var_name, # Same name as color scale for potential legend merging
- values = shape_values_map,
+ values = shape_values_map,
na.value = default_shape # Handles explicit NAs
)
} else {
geom_point_args$shape <- default_shape # Set static shape for all points
}
-
+
# --- Add geom_point layer ---
# geom_point_args list already has size, alpha, and conditionally color/shape if static
plot_obj <- plot_obj + do.call(geom_point, geom_point_args)
-
+
# --- Labels, Guides, and Theme ---
plot_obj <- plot_obj + labs(
x = paste0(pc_x_col, " (", pca_data$variance_explained[as.numeric(substr(pc_x_col, 3, 3))], "%)"),
y = paste0(pc_y_col, " (", pca_data$variance_explained[as.numeric(substr(pc_y_col, 3, 3))], "%)")
# Legend titles are taken from scale_manual 'name' argument
)
-
+
# Configure guides (legends)
guide_options <- list()
if (map_color_to_group) {
@@ -707,7 +707,7 @@ mod_PCA_server <- function(input, output, session, parent_session){
guide_options$shape <- "none"
}
plot_obj <- plot_obj + guides(!!!guide_options)
-
+
plot_obj <- plot_obj + theme_minimal() + theme(
panel.border = element_rect(color = "black", fill = NA),
legend.text = element_text(size = 14),
@@ -716,7 +716,7 @@ mod_PCA_server <- function(input, output, session, parent_session){
legend.title = element_text(size = 16),
legend.position = "right" # Example position, adjust as needed
)
-
+
return(plot_obj)
})
@@ -916,7 +916,7 @@ mod_PCA_server <- function(input, output, session, parent_session){
ex <- system.file("iris_passport_file.csv", package = "BIGapp")
file.copy(ex, file)
})
-
+
output$download_pcs <- downloadHandler(
filename = function() {
paste0("PCs.csv")
diff --git a/R/mod_dapc.R b/R/mod_dapc.R
index 921bc39..ea3ae4f 100644
--- a/R/mod_dapc.R
+++ b/R/mod_dapc.R
@@ -205,29 +205,29 @@ mod_dapc_server <- function(input, output, session, parent_session){
#Import genotype information if in VCF format
#### VCF sanity check
checks <- vcf_sanity_check(geno)
-
+
error_if_false <- c(
"VCF_header", "VCF_columns", "unique_FORMAT", "GT",
- "samples"
+ "samples", "VCF_compressed"
)
-
+
error_if_true <- c(
- "multiallelics", "phased_GT", "mixed_ploidies",
+ "multiallelics", "mixed_ploidies",
"duplicated_samples", "duplicated_markers"
)
-
+
warning_if_false <- c("chrom_info", "pos_info", "ref_alt")
-
- checks_result <- vcf_sanity_messages(checks,
- error_if_false,
- error_if_true,
- warning_if_false = warning_if_false,
+
+ checks_result <- vcf_sanity_messages(checks,
+ error_if_false,
+ error_if_true,
+ warning_if_false = warning_if_false,
warning_if_true = NULL,
input_ploidy = as.numeric(ploidy))
-
+
if(checks_result) return() # Stop the analysis if checks fail
#########
-
+
vcf <- read.vcfR(geno)
#Get items in FORMAT column
@@ -297,29 +297,29 @@ mod_dapc_server <- function(input, output, session, parent_session){
#Import genotype information if in VCF format
#### VCF sanity check
checks <- vcf_sanity_check(geno)
-
+
error_if_false <- c(
"VCF_header", "VCF_columns", "unique_FORMAT", "GT",
"samples", "VCF_compressed"
)
-
+
error_if_true <- c(
"multiallelics", "phased_GT", "mixed_ploidies",
"duplicated_samples", "duplicated_markers"
)
-
+
warning_if_false <- c("chrom_info", "pos_info", "ref_alt","max_markers")
-
- checks_result <- vcf_sanity_messages(checks,
- error_if_false,
- error_if_true,
- warning_if_false = warning_if_false,
+
+ checks_result <- vcf_sanity_messages(checks,
+ error_if_false,
+ error_if_true,
+ warning_if_false = warning_if_false,
warning_if_true = NULL,
input_ploidy = as.numeric(ploidy))
-
+
if(checks_result) return() # Stop the analysis if checks fail
#########
-
+
vcf <- read.vcfR(geno)
#Get items in FORMAT column
@@ -405,9 +405,9 @@ mod_dapc_server <- function(input, output, session, parent_session){
posi.da = "topright",
posi.pca="bottomright",
mstree = F, # lines connecting clusters
- lwd = 1,
+ lwd = 1,
lty = 2,
- legeng = F,
+ legeng = F,
clabel = 1) # legend and label of legend clusters. clab 0 or 1
})
diff --git a/R/mod_diversity.R b/R/mod_diversity.R
index a10c089..233eef3 100644
--- a/R/mod_diversity.R
+++ b/R/mod_diversity.R
@@ -143,14 +143,14 @@ mod_diversity_server <- function(input, output, session, parent_session){
# expand specific box
updateBox(id = "Genomic_Diversity_box", action = "toggle", session = parent_session)
})
-
+
##UI text
output$dosage_text <- renderUI({
# Check if input$plot_tabs is NULL before evaluating it
if (is.null(input$diversity_plot_tabs)) {
return(NULL)
}
-
+
# Render the text only for the "Dosage Plot" tab
if (input$diversity_plot_tabs == "Dosage Plot" && !is.null(diversity_items$dosage_df)) {
div(
@@ -226,29 +226,29 @@ mod_diversity_server <- function(input, output, session, parent_session){
#Import genotype information if in VCF format
#### VCF sanity check
checks <- vcf_sanity_check(geno)
-
+
error_if_false <- c(
"VCF_header", "VCF_columns", "unique_FORMAT", "GT",
"samples", "chrom_info", "pos_info", "VCF_compressed"
)
-
+
error_if_true <- c(
- "multiallelics", "phased_GT", "mixed_ploidies",
+ "multiallelics", "mixed_ploidies",
"duplicated_samples", "duplicated_markers"
)
-
+
warning_if_false <- c("ref_alt","max_markers")
-
- checks_result <- vcf_sanity_messages(checks,
- error_if_false,
- error_if_true,
- warning_if_false = NULL,
+
+ checks_result <- vcf_sanity_messages(checks,
+ error_if_false,
+ error_if_true,
+ warning_if_false = NULL,
warning_if_true = NULL,
input_ploidy = ploidy)
-
+
if(checks_result) return() # Stop the analysis if checks fail
#########
-
+
vcf <- read.vcfR(geno, verbose = FALSE)
#Save position information
diff --git a/R/mod_dosage2vcf.R b/R/mod_dosage2vcf.R
index b168d4f..2194a1a 100644
--- a/R/mod_dosage2vcf.R
+++ b/R/mod_dosage2vcf.R
@@ -29,11 +29,11 @@ mod_dosage2vcf_ui <- function(id){
title = "Inputs", width=12, status = "info", solidHeader = TRUE, collapsible = FALSE, collapsed = FALSE,
selectInput(ns('file_type'),
label = 'Select File Format',
- choices = c("DArT MADC file","DArT Dosage Reports", "Dosage Matrix"),
+ choices = c("DArT MADC file","DArT Dosage/SNP Report", "Dosage Matrix"),
selected = "DArT MADC file"),
- conditionalPanel(condition = "input.file_type == 'DArT Dosage Reports'",
+ conditionalPanel(condition = "input.file_type == 'DArT Dosage/SNP Report'",
ns = ns,
- fileInput(ns("report_file"), "Choose DArT Dose Report File", accept = c(".csv")),
+ fileInput(ns("report_file"), "Choose DArT Dose/SNP Report File", accept = c(".csv")),
fileInput(ns("counts_file"), "Choose DArT Counts File", accept = c(".csv")),
numericInput(ns("dosage2vcf_ploidy"), "Species Ploidy", min = 1, value = NULL)
),
@@ -49,47 +49,60 @@ mod_dosage2vcf_ui <- function(id){
hr(),
radioButtons(ns("snp_type"),
label = "Select Marker Type",
- choices = list("Target"= "target", "Target + Off-Target" = "target_off"),
+ choices = list("Target"= "target", "Target + Off-Target" = "target_off", "Microhaplotypes" = "multiallelic"),
selected = "target"),
selectInput(ns('species'),
label = 'Species',
- choices = c("alfalfa","blueberry", "cranberry", "cucumber", "lettuce", "pecan", "sweetpotato", "other"),
+ choices = c("alfalfa", "blueberry", "cranberry", "cucumber", "pecan", "potato", "strawberry", "sweetpotato", "other"),
selected = "alfalfa"),
conditionalPanel(condition = "input.snp_type == 'target_off'",
ns = ns,
conditionalPanel(condition = "input.species == 'other'",
ns = ns,
fileInput(ns("botloci_file"), "Upload bottom strand probes file (.botloci)"),
+ fileInput(ns("hapDB_file"), "Upload haplotype database file (fasta) (optional)"),
+ fileInput(ns("markers_info_file"), "Upload markers information (_lut.csv from HapApp) (optional)"),
),
- fileInput(ns("hapDB_file"), "Upload haplotype database file (fasta) (optional)"),
- sliderInput(ns("cores"), "Number of CPU Cores", min = 1, max = (availableCores() - 1), value = 1, step = 1)
+ sliderInput(ns("cores"), "Number of CPU Cores", min = 1, max = max(1, availableCores() - 1), value = 1, step = 1), br(),
+ div(
+ style = "text-align: left; margin-top: 10px;",
+ actionButton(ns("advanced_options_all"),
+ label = HTML(paste(icon("cog", style = "color: #007bff;"), "Advanced Options")),
+ style = "background-color: transparent; border: none; color: #007bff; font-size: smaller; text-decoration: underline; padding: 0;"
+ )
+ )
),
conditionalPanel(condition = "input.snp_type == 'target'",
ns = ns,
+ radioButtons(ns("collapse_matches_counts"),
+ label = "Collapse Matches Counts:",
+ choices = list("Yes"= TRUE, "No" = FALSE),
+ selected = FALSE),
conditionalPanel(condition = "input.species == 'other'",
ns = ns,
radioButtons(ns("ref_alt"),
label = "Extract REF and ALT info:",
- choices = list("Yes"= "TRUE", "No" = "FALSE"),
- selected = "TRUE"),
+ choices = list("Yes"= TRUE, "No" = FALSE),
+ selected = TRUE),
conditionalPanel(condition = "input.ref_alt == 'TRUE'",
ns = ns,
fileInput(ns("botloci_file"), "Upload bottom strand probes file (.botloci)"),
+ fileInput(ns("hapDB_file"), "Upload haplotype database file (fasta) (optional)"),
+ fileInput(ns("markers_info_file"), "Upload markers information (_lut.csv from HapApp) (optional)"),
)
)
)
),
hr(),
- textInput(ns("d2v_output_name"), "Output File Name"),
+ textInput(ns("d2v_output_name"), "Output File Name", placeholder = "e.g. my_output"),
hr(),
- actionButton(ns("run_analysis"), "Convert File"),
+ actionButton(ns("run_analysis"), "Convert File", width = "100%"), br(), br(),
uiOutput(ns('mybutton')),
-
+
div(style="display:inline-block; float:right",dropdownButton(
HTML("Input files"),
- p(downloadButton(ns('download_dose'), ""), "Dose Report Example File"),
- p(downloadButton(ns('download_counts'), ""), "Counts Example File"),
- p(downloadButton(ns('download_madc'), ""), "MADC Example File"),hr(),
+ p(downloadButton(ns('download_madc_fixed'), ""), "Fixed Allele MADC Example File"),
+ p(downloadButton(ns('download_madc'), ""), "Raw MADC Example File"),hr(),
p(HTML("Parameters description:"), actionButton(ns("goPar"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
p(HTML("Results description:"), actionButton(ns("goRes"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
p(HTML("How to cite:"), actionButton(ns("goCite"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
@@ -101,12 +114,17 @@ mod_dosage2vcf_ui <- function(id){
))
)
),
- column(width = 4,
- box(title = "Status", width = 12, collapsible = TRUE, status = "info",
- progressBar(id = ns("dosage2vcf_pb"), value = 0, status = "info", display_pct = TRUE, striped = TRUE, title = " ")
+ column(width = 5,
+ box(title = "Status & Log", width = 12, collapsible = TRUE, status = "info",
+ progressBar(id = ns("dosage2vcf_pb"), value = 0, status = "info", display_pct = TRUE, striped = TRUE, title = " "),
+ hr(),
+ tags$style(HTML(paste0(
+ "#", ns("d2vcf_log"), " { max-height: 300px; overflow-y: auto; background: #1e1e1e !important; color: #d4d4d4; padding: 10px; border-radius: 4px; font-size: 12px; }"
+ ))),
+ verbatimTextOutput(ns("d2vcf_log")),
+ uiOutput(ns("download_d2vcf_log_btn"))
)
- ),
- column(width = 1),
+ )
)
)
)
@@ -120,80 +138,190 @@ mod_dosage2vcf_ui <- function(id){
#'
#' @noRd
mod_dosage2vcf_server <- function(input, output, session, parent_session){
-
+
ns <- session$ns
-
+
+ # Reactive value to accumulate log messages
+ d2vcf_log <- reactiveVal("")
+
+ # Clear log when a new analysis is triggered
+ observeEvent(input$run_analysis, {
+ d2vcf_log("")
+ }, priority = 10)
+
+ output$d2vcf_log <- renderText({
+ d2vcf_log()
+ })
+
+ output$download_d2vcf_log_btn <- renderUI({
+ req(nchar(d2vcf_log()) > 0)
+ downloadButton(ns("download_d2vcf_log"), "Download Log", style = "margin-top: 8px;")
+ })
+
+ output$download_d2vcf_log <- downloadHandler(
+ filename = function() {
+ paste0("dosage2vcf_log_", Sys.Date(), ".txt")
+ },
+ content = function(file) {
+ writeLines(d2vcf_log(), file)
+ }
+ )
+
# Help links
observeEvent(input$goPar, {
# change to help tab
updatebs4TabItems(session = parent_session, inputId = "MainMenu",
selected = "help")
-
+
# select specific tab
updateTabsetPanel(session = parent_session, inputId = "DArT_Report2VCF_tabset",
selected = "DArT_Report2VCF_par")
# expand specific box
updateBox(id = "DArT_Report2VCF_box", action = "toggle", session = parent_session)
})
-
+
observeEvent(input$goRes, {
# change to help tab
updatebs4TabItems(session = parent_session, inputId = "MainMenu",
selected = "help")
-
+
# select specific tab
updateTabsetPanel(session = parent_session, inputId = "DArT_Report2VCF_tabset",
selected = "DArT_Report2VCF_results")
# expand specific box
updateBox(id = "DArT_Report2VCF_box", action = "toggle", session = parent_session)
})
-
+
observeEvent(input$goCite, {
# change to help tab
updatebs4TabItems(session = parent_session, inputId = "MainMenu",
selected = "help")
-
+
# select specific tab
updateTabsetPanel(session = parent_session, inputId = "DArT_Report2VCF_tabset",
selected = "DArT_Report2VCF_cite")
# expand specific box
updateBox(id = "DArT_Report2VCF_box", action = "toggle", session = parent_session)
})
-
+
disable("download_d2vcf")
-
- output$download_dose <- downloadHandler(
- filename = function() {
- paste0("BIGapp_Dose_Report_Example_file.csv")
- },
- content = function(file) {
- ex <- system.file("iris_DArT_Allele_Dose_Report.csv", package = "BIGapp")
- file.copy(ex, file)
- })
-
- output$download_counts <- downloadHandler(
+
+ output$download_madc <- downloadHandler(
filename = function() {
- paste0("BIGapp_Counts_Example_file.csv")
+ paste0("BIGapp_MADC_Example_file.csv")
},
content = function(file) {
- ex <- system.file("iris_DArT_Counts.csv", package = "BIGapp")
+ ex <- system.file("iris_DArT_MADC.csv", package = "BIGapp")
file.copy(ex, file)
})
-
- output$download_madc <- downloadHandler(
+
+ output$download_madc_fixed <- downloadHandler(
filename = function() {
- paste0("BIGapp_MADC_Example_file.csv")
+ paste0("BIGapp_MADC_Fixed_Example_file.csv")
},
content = function(file) {
- ex <- system.file("iris_DArT_MADC.csv", package = "BIGapp")
- file.copy(ex, file)
+ github_path <- "https://raw.githubusercontent.com/Breeding-Insight/BIGapp-PanelHub/refs/heads/long_seq/"
+ alfalfa_madc <- paste0(github_path, "test_madcs/alfalfa_madc.csv")
+ download.file(alfalfa_madc, destfile = file, mode = "wb")
})
-
-
+
+ # Default model choices
+ advanced_options_all <- reactiveValues(
+ rm_multiallelic_SNP = TRUE,
+ multiallelic_SNP_dp_thr = 0,
+ multiallelic_SNP_sample_thr = 0,
+ alignment_score_thr = 40,
+ add_others = FALSE,
+ others_max_snps = 5,
+ others_rm_with_indels = TRUE
+ )
+
+ # Close popup window when user "saves options"
+ observeEvent(input$save_advanced_options_all, {
+ advanced_options_all$rm_multiallelic_SNP <- input$rm_multiallelic_SNP
+ advanced_options_all$multiallelic_SNP_dp_thr <- input$multiallelic_SNP_dp_thr
+ advanced_options_all$multiallelic_SNP_sample_thr <- input$multiallelic_SNP_sample_thr
+ advanced_options_all$alignment_score_thr <- input$alignment_score_thr
+ advanced_options_all$add_others <- input$add_others
+ advanced_options_all$others_max_snps <- input$others_max_snps
+ advanced_options_all$others_rm_with_indels <- as.logical(input$others_rm_with_indels)
+ # Save other inputs as needed
+
+ removeModal() # Close the modal after saving
+ })
+
+ # UI popup window for input
+ observeEvent(input$advanced_options_all, {
+ showModal(modalDialog(
+ title = "Advanced Options",
+
+ numericInput(
+ inputId = ns("alignment_score_thr"),
+ label = "Alignment Score Threshold",
+ value = advanced_options_all$alignment_score_thr,
+ min = 0
+ ), hr(),
+ # rm_multiallelic_SNP
+ selectInput(
+ inputId = ns("rm_multiallelic_SNP"),
+ label = "Remove Multiallelic SNPs",
+ choices = c("TRUE" = TRUE, "FALSE" = FALSE),
+ selected = advanced_options_all$rm_multiallelic_SNP
+ ),
+
+ # Conditionally show thresholds when rm_multiallelic_SNP is FALSE
+ conditionalPanel(
+ condition = paste0("input['", ns("rm_multiallelic_SNP"), "'] == 'FALSE'"),
+ numericInput(
+ inputId = ns("multiallelic_SNP_dp_thr"),
+ label = "Multiallelic SNP Depth Threshold",
+ value = advanced_options_all$multiallelic_SNP_dp_thr,
+ min = 0
+ ),
+ numericInput(
+ inputId = ns("multiallelic_SNP_sample_thr"),
+ label = "Multiallelic SNP Sample Threshold",
+ value = advanced_options_all$multiallelic_SNP_sample_thr,
+ min = 0
+ )
+ ), hr(),
+
+ # add_others
+ selectInput(
+ inputId = ns("add_others"),
+ label = "Add Others",
+ choices = c("TRUE" = TRUE, "FALSE" = FALSE),
+ selected = advanced_options_all$add_others
+ ),
+
+ # Conditionally show others options when add_others is TRUE
+ conditionalPanel(
+ condition = paste0("input['", ns("add_others"), "'] == 'TRUE'"),
+ numericInput(
+ inputId = ns("others_max_snps"),
+ label = "Others Max SNPs",
+ value = advanced_options_all$others_max_snps,
+ min = 0
+ ),
+ selectInput(
+ inputId = ns("others_rm_with_indels"),
+ label = "Remove Others with Indels",
+ choices = c("TRUE" = TRUE, "FALSE" = FALSE),
+ selected = advanced_options_all$others_rm_with_indels
+ )
+ ),
+
+ footer = tagList(
+ modalButton("Close"),
+ actionButton(ns("save_advanced_options_all"), "Save")
+ )
+ ))
+ })
+
vcf_out <- eventReactive(input$run_analysis,{
# Ensure the files are uploaded
# Missing input with red border and alerts
- if(input$file_type == "DArT Dosage Reports"){
+ if(input$file_type == "DArT Dosage/SNP Report"){
if (is.null(input$report_file$datapath) | is.null(input$counts_file$datapath) | input$d2v_output_name == "" | input$dosage2vcf_ploidy == "") {
shinyalert(
title = "Missing input!",
@@ -215,49 +343,84 @@ mod_dosage2vcf_server <- function(input, output, session, parent_session){
dosage_file <- input$report_file$datapath
counts_file <- input$counts_file$datapath
ploidy <- input$dosage2vcf_ploidy
-
+
# Use a temporary file path without appending .vcf
temp_base <- tempfile()
-
+
#Status
updateProgressBar(session = session, id = "dosage2vcf_pb", value = 10, title = "Converting DArT files to VCF")
-
+
# Convert to VCF using the BIGr package
cat("Running BIGr::dosage2vcf...\n")
-
+
updateProgressBar(session = session, id = "dosage2vcf_pb", value = 50, title = "Writing VCF")
-
+
dosage2vcf(
dart.report = dosage_file,
dart.counts = counts_file,
output.file = temp_base,
ploidy = as.numeric(ploidy)
)
-
+
# The output file should be temp_base.vcf
output_name <- paste0(temp_base, ".vcf")
-
+
updateProgressBar(session = session, id = "dosage2vcf_pb", value = 100, title = "Complete!")
-
+
return(output_name)
-
+
} else if(input$file_type == "DArT MADC file"){
req(input$madc_file)
# First check if the MADC file is valid (a non-fixedAlleleID MADC file)
-
- #Read only the first column for the first seven rows
- first_seven_rows <- read.csv(input$madc_file$datapath, header = FALSE, nrows = 7, colClasses = c(NA, "NULL"))
-
- #Check if all entries in the first column are either blank or "*"
- check_entries <- all(first_seven_rows[, 1] %in% c("", "*"))
-
- #Check if the MADC file has the filler rows or is processed from updated fixed allele ID pipeline
- if (check_entries) {
- #Note: This assumes that the first 7 rows are placeholder info from DArT processing
+
+ # Select species botloci
+ github_path <- "https://raw.githubusercontent.com/Breeding-Insight/BIGapp-PanelHub/refs/heads/long_seq/"
+
+ botloci <- switch(input$species,
+ "alfalfa" = paste0(github_path, "alfalfa/20201030-BI-Alfalfa_SNPs_DArTag-probe-design_f180bp.botloci"),
+ "blueberry" = paste0(github_path, "blueberry/20200819-BI-Blueberry_10K_SNPs_forDArT_3K_ref_alt.botloci"),
+ "cranberry" = paste0(github_path, "cranberry/Cranberry_unique_alignment_126MAS_3K_54BB_rmDupTags_f180bp.botloci"),
+ "cucumber" = paste0(github_path, "cucumber/Cucumber_DArT3K_10192022_f180bp.botloci"),
+ "pecan" = paste0(github_path, "pecan/Pecan_unique_alignment_top48_MAS_14K_3K_f180bp.botloci"),
+ "potato" = paste0(github_path, "potato/potato_20K_SNPset_f180bp_forDArT_3K_f180bp.botloci"),
+ "strawberry" = paste0(github_path, "strawberry/strawberry_20K_SNPset_f180bp_forDArT_3K_f180bp.botloci"),
+ "sweetpotato" = paste0(github_path, "sweetpotato/sweetpotato_20K_SNPset_f180bp_forDArT_3K_f180bp.botloci"),
+ "other" = input$botloci_file$datapath)
+
+ microhapDB <- switch(input$species,
+ "alfalfa" = paste0(github_path, "alfalfa/alfalfa_allele_db_v001.fa"),
+ "blueberry" = paste0(github_path, "blueberry/blueberry_allele_db_v001.fa"),
+ "cranberry" = paste0(github_path, "cranberry/cranberry_allele_db_v001.fa"),
+ "cucumber" = paste0(github_path, "cucumber/cucumber_allele_db_v001.fa"),
+ "pecan" = paste0(github_path, "pecan/pecan_allele_db_v001.fa"),
+ "potato" = paste0(github_path, "potato/potato_allele_db_v001.fa"),
+ "strawberry" = paste0(github_path, "strawberry/strawberry_allele_db_v001.fa"),
+ "sweetpotato" = paste0(github_path, "sweetpotato/sweetpotato_allele_db_v000_refAlt109bp.fa"),
+ "other" = input$hapDB_file$datapath)
+
+ markers_info <- switch(input$species,
+ "alfalfa" = paste0(github_path, "alfalfa/20201030-BI-Alfalfa_SNPs_DArTag-probe-design_snpID_lut.csv"),
+ "blueberry" = paste0(github_path, "blueberry/20200819-BI-Blueberry_10K_SNPs_forDArT_3K_chrID_snpID_lut.csv"),
+ "cranberry" = paste0(github_path, "cranberry/Cranberry_unique_alignment_126MAS_3K_54BB_rmDupTags_lut.csv"),
+ "cucumber" = paste0(github_path, "cucumber/20221019-BI-Cucumber_DArTag-probe-desgin_snpID_lut.csv"),
+ "pecan" = paste0(github_path, "pecan/Pecan_unique_alignment_top48_MAS_14K_3K_snpID_lut.csv"),
+ "potato" = paste0(github_path, "potato/potato_dartag_v2_3915markers_rm7dupTags_6traitMarkers_rm1dup_snpID_lut.csv"),
+ "strawberry" = paste0(github_path, "strawberry/strawberry_9k_probe_3K_snpID_lut.csv"),
+ "sweetpotato" = paste0(github_path, "sweetpotato/sweetpotato_20K_SNPset_f180bp_forDArT_3K_snpID_lut.csv"),
+ "other" = if (!is.null(input$markers_info_file)) input$markers_info_file$datapath else NULL)
+
+ # Guard optional inputs (may be NULL if not uploaded)
+ microhapDB <- if (length(microhapDB) == 0) NULL else microhapDB
+ markers_info <- if (length(markers_info) == 0) NULL else markers_info
+
+ #Now perform conversion depending on user options
+
+ # Shared setup for all MADC snp_type branches
+ if (is.null(input$madc_file$datapath) | input$d2v_output_name == "") {
shinyalert(
- title = "Raw MADC!",
- text = "This MADC file has not been processed by the updated Breeding Insight fixed allele ID pipeline.",
- size = "m",
+ title = "Missing input!",
+ text = "Upload MADC and/or define a output file name",
+ size = "s",
closeOnEsc = TRUE,
closeOnClickOutside = FALSE,
html = TRUE,
@@ -268,106 +431,101 @@ mod_dosage2vcf_server <- function(input, output, session, parent_session){
showCancelButton = FALSE,
animation = TRUE
)
-
- #Exit the analysis
- return()
}
-
- # Select species botloci
- botloci <- switch(input$species,
- "alfalfa" = "https://raw.githubusercontent.com/Breeding-Insight/BIGapp-PanelHub/refs/heads/main/alfalfa/20201030-BI-Alfalfa_SNPs_DArTag-probe-design_f180bp.botloci",
- "blueberry" = "https://raw.githubusercontent.com/Breeding-Insight/BIGapp-PanelHub/refs/heads/main/blueberry/20200819-BI-Blueberry_10K_SNPs_forDArT_3K_ref_alt.botloci",
- "cranberry" = "https://raw.githubusercontent.com/Breeding-Insight/BIGapp-PanelHub/refs/heads/main/cranberry/Cranberry_unique_alignment_126MAS_3K_54BB_rmDupTags_f180bp.botloci",
- "cucumber" = "https://raw.githubusercontent.com/Breeding-Insight/BIGapp-PanelHub/refs/heads/main/cucumber/Cucumber_DArT3K_10192022_f180bp.botloci",
- "lettuce" = "https://raw.githubusercontent.com/Breeding-Insight/BIGapp-PanelHub/refs/heads/main/lettuce/Lettuce_DArT3K_08172022_bait_f180bp.botloci",
- "pecan" = "https://raw.githubusercontent.com/Breeding-Insight/BIGapp-PanelHub/refs/heads/main/pecan/Pecan_unique_alignment_top48_MAS_14K_3K_f180bp.botloci",
- "sweetpotato" = "https://raw.githubusercontent.com/Breeding-Insight/BIGapp-PanelHub/refs/heads/main/sweetpotato/sweetpotato_20K_SNPset_f180bp_forDArT_3K_f180bp.botloci",
- "other" = input$botloci_file$datapath)
-
- #Now perform conversion depending on user options
- if(input$snp_type == "target_off"){
- if (is.null(input$madc_file$datapath) | input$d2v_output_name == "") {
+ req(input$madc_file)
+
+ # Use a temporary file path without appending .vcf
+ temp_base <- tempfile()
+
+ # Merge MADC if multiple
+ if(length(input$madc_file$datapath) > 1){
+ updateProgressBar(session = session, id = "dosage2vcf_pb", value = 15, title = "Merging MADC files")
+
+ merged_madc <- paste0(temp_base, ".csv")
+
+ run_ids <- sapply(strsplit(input$madc_file$name, "_"), "[[", 1)
+ if(length(run_ids) == 0) run_ids <- NULL
+
+ merge_MADCs(madc_list = as.list(input$madc_file$datapath), out_madc = merged_madc, run_ids = run_ids)
+ read_madc <- merged_madc
+ } else read_madc <- input$madc_file$datapath
+
+ # The output file should be temp_base.vcf
+ output_name <- paste0(temp_base, ".vcf")
+
+ updateProgressBar(session = session, id = "dosage2vcf_pb", value = 30, title = "Writing VCF")
+
+ log_lines <- character(0)
+ tryCatch(
+ withCallingHandlers(
+ {
+ if(input$snp_type == "target_off"){
+ madc2vcf_all(madc = read_madc,
+ botloci_file = botloci,
+ hap_seq_file = microhapDB,
+ markers_info = markers_info,
+ n.cores= input$cores,
+ rm_multiallelic_SNP = as.logical(advanced_options_all$rm_multiallelic_SNP),
+ multiallelic_SNP_dp_thr = advanced_options_all$multiallelic_SNP_dp_thr,
+ multiallelic_SNP_sample_thr = advanced_options_all$multiallelic_SNP_sample_thr,
+ alignment_score_thr = advanced_options_all$alignment_score_thr,
+ add_others = as.logical(advanced_options_all$add_others),
+ others_max_snps = advanced_options_all$others_max_snps,
+ others_rm_with_indels = as.logical(advanced_options_all$others_rm_with_indels),
+ out_vcf = output_name,
+ verbose = TRUE)
+ } else if(input$snp_type == "target"){
+ madc2vcf_targets(read_madc, output_name, get_REF_ALT = as.logical(input$ref_alt), botloci_file = botloci,
+ markers_info = markers_info, collapse_matches_counts = input$collapse_matches_counts)
+ } else if(input$snp_type == "multiallelic"){
+ madc2vcf_multi(
+ madc_file = read_madc,
+ botloci_file = botloci,
+ outfile = output_name,
+ markers_info = markers_info,
+ ploidy = 4L,
+ verbose = TRUE
+ )
+ }
+ },
+ message = function(m) {
+ log_lines <<- c(log_lines, conditionMessage(m))
+ invokeRestart("muffleMessage")
+ },
+ warning = function(w) {
+ log_lines <<- c(log_lines, paste0("Warning: ", conditionMessage(w), "\n"))
+ shinyalert(
+ title = "Warning",
+ text = conditionMessage(w),
+ size = "s",
+ type = "warning",
+ showConfirmButton = TRUE,
+ confirmButtonText = "OK",
+ confirmButtonCol = "#004192"
+ )
+ invokeRestart("muffleWarning")
+ }
+ ),
+ error = function(e) {
+ log_lines <<- c(log_lines, paste0("Error: ", conditionMessage(e), "\n"))
shinyalert(
- title = "Missing input!",
- text = "Upload MADC and/or define a output file name",
+ title = "Error",
+ text = conditionMessage(e),
size = "s",
- closeOnEsc = TRUE,
- closeOnClickOutside = FALSE,
- html = TRUE,
type = "error",
showConfirmButton = TRUE,
confirmButtonText = "OK",
- confirmButtonCol = "#004192",
- showCancelButton = FALSE,
- animation = TRUE
+ confirmButtonCol = "#004192"
)
}
- req(input$madc_file)
-
- # Use a temporary file path without appending .vcf
- temp_base <- tempfile()
-
- # merge MADC if multiple
- if(length(input$madc_file$datapath) >1){
- updateProgressBar(session = session, id = "dosage2vcf_pb", value = 15, title = "Merging MADC files")
-
- merged_madc <- paste0(temp_base, ".csv")
-
- run_ids <- sapply(strsplit(input$madc_file$name, "_"), "[[",1)
- if(length(run_ids) == 0) run_ids <- NULL
-
- merge_MADCs(madc_list = as.list(input$madc_file$datapath), out_madc = merged_madc,run_ids = run_ids)
- read_madc <- merged_madc
- } else read_madc <- input$madc_file$datapath
-
- # The output file should be temp_base.vcf
- output_name <- paste0(temp_base, ".vcf")
-
- updateProgressBar(session = session, id = "dosage2vcf_pb", value = 30, title = "Writing VCF")
-
- madc2vcf_all(madc = read_madc,
- botloci_file = botloci,
- hap_seq_file = input$hapDB_file$datapath,
- n.cores= input$cores,
- rm_multiallelic_SNP = TRUE,
- out_vcf = output_name,
- verbose = FALSE)
-
- updateProgressBar(session = session, id = "dosage2vcf_pb", value = 100, title = "Complete!")
-
- return(output_name)
-
- } else if(input$snp_type == "target"){
-
- # Use a temporary file path without appending .vcf
- temp_base <- tempfile()
-
- # merge MADC if multiple
- if(length(input$madc_file$datapath) >1){
- updateProgressBar(session = session, id = "dosage2vcf_pb", value = 15, title = "Merging MADC files")
-
- merged_madc <- paste0(temp_base, ".csv")
-
- run_ids <- sapply(strsplit(input$madc_file$name, "_"), "[[",1)
- if(length(run_ids) == 0) run_ids <- NULL
-
- merge_MADCs(madc_list = as.list(input$madc_file$datapath), out_madc = merged_madc,run_ids = run_ids)
- read_madc <- merged_madc
- } else read_madc <- input$madc_file$datapath
-
-
- # The output file should be temp_base.vcf
- output_name <- paste0(temp_base, ".vcf")
-
- updateProgressBar(session = session, id = "dosage2vcf_pb", value = 30, title = "Writing VCF")
- madc2vcf_targets(read_madc, output_name, get_REF_ALT = input$ref_alt, botloci_file = botloci)
-
- updateProgressBar(session = session, id = "dosage2vcf_pb", value = 100, title = "Complete!")
- return(output_name)
- }
-
+ )
+
+ d2vcf_log(paste(log_lines, collapse = ""))
+ updateProgressBar(session = session, id = "dosage2vcf_pb", value = 100, title = "Complete!")
+ return(output_name)
+
} else if (input$file_type == "Dosage Matrix"){
-
+
if (is.null(input$matrix_file$datapath) | input$d2v_output_name == "" | input$dosage2vcf_ploidy == "") {
shinyalert(
title = "Missing input!",
@@ -387,42 +545,42 @@ mod_dosage2vcf_server <- function(input, output, session, parent_session){
req(input$matrix_file, input$d2v_output_name, input$dosage2vcf_ploidy)
#Status
updateProgressBar(session = session, id = "dosage2vcf_pb", value = 10, title = "Converting matrix to VCF")
-
+
# Get the uploaded file paths
matrix_file <- input$matrix_file$datapath
ploidy <- input$dosage2vcf_ploidy
-
+
# Use a temporary file path without appending .vcf
temp_base <- tempfile()
-
+
# The output file should be temp_base.vcf
output_name <- paste0(temp_base, ".vcf")
-
+
#Status
updateProgressBar(session = session, id = "dosage2vcf_pb", value = 50, title = "Writing VCF")
-
+
# Convert to VCF using the BIGr package
gmatrix2vcf(Gmat.file = matrix_file,
ploidy = ploidy,
output.file = output_name,
dosageCount = input$dosage_counts)
-
+
#Status
updateProgressBar(session = session, id = "dosage2vcf_pb", value = 100, title = "Complete!")
-
+
return(output_name)
-
+
}
-
+
})
-
+
# Only make available the download button when analysis is finished
output$mybutton <- renderUI({
if(isTruthy(vcf_out()))
downloadButton(ns("download_d2vcf"), "Download VCF file", class = "butt")
})
-
-
+
+
##This is for the DArT files conversion to VCF
output$download_d2vcf <- downloadHandler(
filename = function() {
@@ -433,14 +591,14 @@ mod_dosage2vcf_server <- function(input, output, session, parent_session){
bgzip_compress(vcf_out(), file)
}
)
-
+
##Summary Info
d2vcf_summary_info <- function() {
#Handle possible NULL values for inputs
report_file_name <- if (!is.null(input$report_file$name)) input$report_file$name else "No file selected"
counts_file_name <- if (!is.null(input$counts_file$name)) input$counts_file$name else "No file selected"
selected_ploidy <- if (!is.null(input$dosage2vcf_ploidy)) as.character(input$dosage2vcf_ploidy) else "Not selected"
-
+
#Print the summary information
cat(
"BIGapp Dosage2VCF Summary\n",
@@ -464,7 +622,7 @@ mod_dosage2vcf_server <- function(input, output, session, parent_session){
sep = ""
)
}
-
+
# Popup for analysis summary
observeEvent(input$d2vcf_summary, {
showModal(modalDialog(
@@ -480,8 +638,8 @@ mod_dosage2vcf_server <- function(input, output, session, parent_session){
)
))
})
-
-
+
+
# Download Summary Info
output$download_d2vcf_info <- downloadHandler(
filename = function() {
diff --git a/R/mod_gwas.R b/R/mod_gwas.R
index d308a9c..b72b853 100644
--- a/R/mod_gwas.R
+++ b/R/mod_gwas.R
@@ -190,7 +190,7 @@ mod_gwas_server <- function(input, output, session, parent_session){
# expand specific box
updateBox(id = "GWAS_box", action = "toggle", session = parent_session)
})
-
+
#Default choices
trait_options <- reactiveValues(
missing_data = "NA",
@@ -198,25 +198,25 @@ mod_gwas_server <- function(input, output, session, parent_session){
sample_column = NULL,
file_type = NULL
)
-
+
#UI popup window for input
observeEvent(input$phenotype_file, {
req(input$phenotype_file)
#Get the column names of the csv file
info_df <- read.csv(input$phenotype_file$datapath, header = TRUE, check.names = FALSE, nrows=2)
info_df[,1] <- as.character(info_df[,1]) #Makes sure that the sample names are characters instead of numeric
-
+
# Read first 5 rows for preview
preview_data <- tryCatch({
head(read.csv(input$phenotype_file$datapath, nrows = 5, na.strings=trait_options$missing_data),5)
}, error = function(e) {
NULL
})
-
+
showModal(modalDialog(
title = "Trait File Options",
size= "l",
-
+
selectInput(
inputId = ns('missing_data'),
label = 'Missing Data Value',
@@ -243,7 +243,7 @@ mod_gwas_server <- function(input, output, session, parent_session){
label = 'Sample ID Column',
choices = colnames(info_df)
),
-
+
if (!is.null(preview_data)) {
div(
h4(
@@ -263,20 +263,20 @@ mod_gwas_server <- function(input, output, session, parent_session){
p("Could not load file preview.")
)
},
-
+
footer = tagList(
actionButton(ns("save_trait_options"), "Save")
)
))
-
+
# Render the preview table
output$file_preview <- renderTable({
req(preview_data)
preview_data
})
-
+
})
-
+
output$custom_missing_msg <- renderText({
if (input$missing_data == "Custom" && nchar(input$custom_missing) == 0) {
"Please enter a custom missing value."
@@ -285,7 +285,7 @@ mod_gwas_server <- function(input, output, session, parent_session){
}
})
-
+
#Close popup window when user "saves options"
observeEvent(input$save_trait_options, {
trait_options$missing_data <- input$missing_data
@@ -293,7 +293,7 @@ mod_gwas_server <- function(input, output, session, parent_session){
trait_options$sample_column <- input$sample_column
#trait_options$file_type
# Save other inputs as needed
-
+
if (input$missing_data == "Custom" && nchar(input$custom_missing) == 0) {
# Validation failed: display warning and prevent modal closure
showNotification(
@@ -303,10 +303,10 @@ mod_gwas_server <- function(input, output, session, parent_session){
)
return() # Stop further execution and keep the modal open
}
-
+
removeModal() # Close the modal after saving
})
-
+
#Call some plots to NULL so that the spinners do not show before analysis
output$bic_plot <- renderDT(NULL)
@@ -387,7 +387,7 @@ mod_gwas_server <- function(input, output, session, parent_session){
} else {
phenotype_file <- read.csv(input$phenotype_file$datapath, header = TRUE, check.names = FALSE, na.strings = trait_options$missing_data)
}
-
+
# Make the sample ID column the first column in the dataframe
sample_col_name <- input$sample_column
phenotype_file <- phenotype_file[, c(sample_col_name, setdiff(names(phenotype_file), sample_col_name))]
@@ -458,29 +458,29 @@ mod_gwas_server <- function(input, output, session, parent_session){
#Convert VCF file if submitted
#### VCF sanity check
checks <- vcf_sanity_check(input$gwas_file$datapath, max_markers = 10000)
-
+
error_if_false <- c(
"VCF_header", "VCF_columns", "unique_FORMAT", "GT",
"samples", "chrom_info", "pos_info", "VCF_compressed"
)
-
+
error_if_true <- c(
- "multiallelics", "phased_GT", "mixed_ploidies",
+ "multiallelics", "mixed_ploidies",
"duplicated_samples", "duplicated_markers"
)
-
+
warning_if_false <- c("ref_alt","max_markers")
-
- checks_result <- vcf_sanity_messages(checks,
- error_if_false,
- error_if_true,
- warning_if_false = warning_if_false,
+
+ checks_result <- vcf_sanity_messages(checks,
+ error_if_false,
+ error_if_true,
+ warning_if_false = warning_if_false,
warning_if_true = NULL,
input_ploidy = ploidy)
-
+
if(checks_result) return() # Stop the analysis if checks fail
#########
-
+
vcf <- read.vcfR(input$gwas_file$datapath, verbose = FALSE)
#Extract GT
@@ -898,8 +898,8 @@ mod_gwas_server <- function(input, output, session, parent_session){
)
})
-
-
+
+
output$download_viewpoly <- downloadHandler(
filename = function() {
paste0("BIGapp_Viewpoly_GWASpoly.RData")
@@ -909,7 +909,7 @@ mod_gwas_server <- function(input, output, session, parent_session){
save(temp, file = file)
}
)
-
+
#Download files for GWAS
output$download_gwas_file <- downloadHandler(
filename = function() {
diff --git a/README.md b/README.md
index 90199ef..76a3e6f 100644
--- a/README.md
+++ b/README.md
@@ -139,4 +139,4 @@ BIGapp development is supported by [Breeding Insight](https://www.breedinginsigh
If you use BIGapp in your research, please cite:
-Sandercock A.M., Peel M.D., Tanigut C.H., Chinchilla-Vargas J., Chen S., Sapkota M., Lin M., Zhao D., Ackerman A.J., Basnet B.R., Beil C.T., Sheehan M.J. (2025). BIGapp: A User-Friendly Genomic Tool Kit Identified Quantitative Trait Loci for Creeping Rootedness in Alfalfa (_Medicago sativa_ L.)., The Plant Genome. doi:https://doi.org/10.1002/tpg2.70067
+Sandercock A.M., Peel M.D., Taniguti C.H., Chinchilla-Vargas J., Chen S., Sapkota M., Lin M., Zhao D., Ackerman A.J., Basnet B.R., Beil C.T., Sheehan M.J. (2025). BIGapp: A User-Friendly Genomic Tool Kit Identified Quantitative Trait Loci for Creeping Rootedness in Alfalfa (_Medicago sativa_ L.)., The Plant Genome. doi:https://doi.org/10.1002/tpg2.70067
diff --git a/inst/app/www/IFAS.jpg b/inst/app/www/IFAS.jpg
new file mode 100644
index 0000000..d108a43
Binary files /dev/null and b/inst/app/www/IFAS.jpg differ
diff --git a/inst/help_files/DArT_Report2VCF_par.Rmd b/inst/help_files/DArT_Report2VCF_par.Rmd
index 6968708..5d57eb1 100644
--- a/inst/help_files/DArT_Report2VCF_par.Rmd
+++ b/inst/help_files/DArT_Report2VCF_par.Rmd
@@ -4,38 +4,183 @@ output: html_document
date: "2024-08-29"
---
-* **DArT MADC File**
+## **DArT MADC File**
The DArT MADC (Missing Allele Discovery Counts) file is a comma-separated file provided by DArT from a sequencing project. It includes the following key columns:
-* AlleleID: Typically contains the chromosome, position, reference or alternative allele designation, and the allele ID according to the haplotype database (e.g., `Chr01_000084128|Ref_0001`).
-* CloneID: Represents the chromosome and position (e.g., `Chr01_000084128`).
-* AlleleSequence: The amplicon sequence (e.g., `GAGTGTGAAGATTTGGACAAAAGAGGTTGGTTTTTACTGTTATGGCATTTATCTCCTTATAAAATTTTGTATTTTTTTTGT`).
-* Sample Columns: Named with sample IDs and containing counts of each allele per sample.
+- AlleleID: Typically contains the chromosome, position, reference or alternative allele designation, and the allele ID according to the haplotype database (e.g., `Chr01_000084128|Ref_0001`).
+- CloneID: Represents the chromosome and position (e.g., `Chr01_000084128`).
+- AlleleSequence: The amplicon sequence (e.g., `GAGTGTGAAGATTTGGACAAAAGAGGTTGGTTTTTACTGTTATGGCATTTATCTCCTTATAAAATTTTGTATTTTTTTTGT`).
+- Sample Columns: Named with sample IDs and containing counts of each allele per sample.
-**Important**: BIGapp requires fixed AlleleIDs tagged with a respective number (e.g., suffix: `_0001`). Fixed AlleleIDs are generated by matching the MADC contents to the haplotype database. If your MADC file does not contain fixed AlleleIDs, please contact Breeding Insight.
+We refer here to two different versions of this format, one named **raw MADC** and the other named **MADC with fixed AlleleIDs**.
+The first one contains the original AlleleIDs as provided by DArT, which may not be fixed (i.e., they may not have a suffix with a number).
+The second version contains fixed AlleleIDs, which can be generated by HapApp that matches the MADC contents to the haplotype database.
+HapApp not only fixes the AlleleIDs but also makes corrections such as removing adaptor sequences, adding missing REF and ALT sequences, replacing IUPAC codes,
+generating the markers information file (`_lut.csv`) with chromosome, positions, REF and ALT bases, whether the target is an indel, indel length and indel type (insertion or deletion).
+This information is critical for the conversion to VCF, especially if you want to extract target SNPs with their REF and ALT bases.
-* **Target vs. Off-Target SNPs**
+
+### **Target vs. Off-Target SNPs** vs. **Microhaplotypes**
If you provide a MADC file, you can choose between extracting:
* Only **target SNPs** – These are the SNPs for which probes were specifically designed. If your panel includes 3,000 SNPs, your output VCF file should contain exactly 3,000 target SNPs.
* Both **target** and **off-target** SNPs – In addition to target SNPs, BIGapp will identify other SNPs within the amplicon region.
+* **Microhaplotypes** – These are combinations of closely linked SNPs within the same amplicon that can be treated as a single multi-allelic marker.
+
+### **Requirements**
+
+BIGapp will run a series of checks to ensure that the necessary information is available for the type of SNPs you want to extract.
+It will inform you if the operation requested is possible or not for your dataset and files provided.
+
+Here are some of the checks that BIGapp will perform:
+
+- If MADC has the expected columns
+- Presence of columns and rows with all NA
+- Presence of IUPAC codes on AlleleSequence
+- Presence of lower case bases on AlleleSequence
+- Presence of Indels
+- If CloneID follows the format Chr_Pos
+- If all Ref Alleles have a corresponding Alt and vice-versa
+- If "Other" exists in the MADC AlleleID (e.g. `Chr01_000084128|Other_0003`), which indicates the presence of additional alleles
+beyond the target SNPs. If "Other" alleles are present, BIGapp will inform the user and direct them to the `madc2vcf_all`
+function if they want to extract these additional SNPs as well
+
+#### Targets
+
+If you decide to extract only the **target SNPs**, BIGapp function `madc2vcf_targets` will be used.
-* **Target SNPs**
+You can choose:
-If you decide to extract only the **target SNPs**, you can choose if reference (REF) and alternative (ALT) bases should also be extracted. For extracting this information from the MADC file, BIGapp compares the reference sequences with the alternative, finds the polymorphic site and notate the changing base. This requires:
+- `Collapse Matches Counts`: the MADC contains the target tags named as `Chr_Pos|Ref_0001` and `Chr_Pos|Alt_0002`,
+(or `Chr_Pos|Ref` and `Chr_Pos|Alt` if raw MADC) and also contains the `Chr_Pos|RefMatch_0003` and `Chr_Pos|AltMatch_0004` tags,
+which represent off-target SNPs and bases at the target position matching the Ref or the Alt. If you choose to collapse the counts,
+the counts of the RefMatch and AltMatch tags will be added to the counts of the Ref and Alt tags, respectively. If not,
+the RefMatch and AltMatch tags will be discarded and only the Ref and Alt tags will be kept.
+
+- `Select Marker Type`: if reference (REF) and alternative (ALT) bases should also be extracted.
+BIGapp will extract this information from the `_lut.csv` file if provided; otherwise, it will extract it from the MADC file by comparing
+ the reference sequences with the alternative, finding the polymorphic site and notating the changing base.
+
+This requires:
- Reference (`Ref_0001`) and alternative (`Alt_0002`) sequences for each tag. If one or both is missing, the tag will be discarded.
- A single polymorphism between them. If more than one is found, the tag is discarded
- - A `.botloci` file to inform which tag sequences should be converted to its reverse complement. For BI-supported marker panel **species** this file is already embedded within the app, no upload is required.
+ - A `.botloci` file to inform which tag sequences should be converted to its reverse complement. This file is generated by HapApp when you upload a MADC file with unfixed AlleleIDs. For BI-supported marker panel **species** this file is already embedded within the app, no upload is required.
+ - or
+ - A markers information file (`_lut.csv`) with chromosome, position, REF and ALT bases, indel length and indel type (insertion or deletion) for each target SNP. This file is generated by HapApp when you upload a MADC file with unfixed AlleleIDs. For BI-supported marker panel **species** this file is already embedded within the app, no upload is required.
+
+If you choose to not extract REF and ALT information, all tags will be kept, but your VCF will have missing data (`.`) in the REF and ALT fields.
+
+`madc2vcf_targets` doesn't run if:
+
+ - MADC Column names are not correct
+ - Ignores Other alleles, but informs the user if they exist and directs them to `madc2vcf_all` in case they want to extract them as well
+
+See the table for a summary of `madc2vcf_targets` requirements according to MADC content:
-If you choose to not extract REF and ALT information, all tags will be kept, but your VCF will have missing data (`.`) in the REF and ALT fields. The only process in BIGapp that will require this information is the Dosage Calling with PolyRAD.
+
+
+ |
+ check status |
+ get_REF_ALT |
+ Requires |
+
+
+ | IUPAC |
+ detected |
+ TRUE |
+ markers_info REF/ALT |
+
+
+ | detected |
+ FALSE |
+ - |
+
+
+ | not detected |
+ TRUE |
+ botloci or markers_info REF/ALT |
+
+
+ | not detected |
+ FALSE |
+ - |
+
+
+ | Indels |
+ detected |
+ TRUE |
+ markers_info REF/ALT |
+
+
+ | detected |
+ FALSE |
+ - |
+
+
+ | not detected |
+ TRUE |
+ botloci or markers_info REF/ALT |
+
+
+ | not detected |
+ FALSE |
+ - |
+
+
+ | CloneID has Chr_Pos format |
+ detected |
+ TRUE |
+ botloci or markers_info REF/ALT |
+
+
+ | detected |
+ FALSE |
+ - |
+
+
+ | not detected |
+ TRUE |
+ markers_info CHR/POS/REF/ALT or markers_info CHR/POS/ + botloci |
+
+
+ | not detected |
+ FALSE |
+ markers_info CHR/POS |
+
+
+ | FixAlleleIDs |
+ detected |
+ TRUE |
+ botloci or markers_info REF/ALT |
+
+
+ | detected |
+ FALSE |
+ - |
+
+
+ | not detected |
+ TRUE |
+ markers_info REF/ALT |
+
+
+ | not detected |
+ FALSE |
+ - |
+
+
-* **Target and off-target (all) SNPs**
+
-To identify **off-target** SNPs, BIGapp aligns each amplicon against its reference (via Bioconductor’s `Biostrings` + `pwalign`) to uncover additional polymorphisms. This procedure requires:
+#### Off-target SNPs
+
+To identify **off-target** SNPs, BIGr function `madc2vcf_all` will be used.
+`madc2vcf_all` aligns each amplicon against its reference (via Bioconductor’s `Biostrings` + `pwalign`) to uncover additional
+polymorphisms. This procedure requires:
- Reference & alternative sequences (e.g. `Ref_0001`, `Alt_0002`) for every tag
- If a FASTA haplotype database is supplied, any missing sequences in the MADC file will be retrieved automatically.
@@ -44,42 +189,127 @@ To identify **off-target** SNPs, BIGapp aligns each amplicon against its referen
- Tags with zero or multiple variants are excluded.
- `.botloci` file
- Specifies which tags must be reverse-complemented. For BI-supported species, this file is bundled within the app—no upload needed.
+ - If your MADC contains indels as target, the markers information file (`_lut.csv`) with chromosome, position, REF and ALT bases, indel length and indel type (insertion or deletion) for each target SNP. This file is generated by HapApp when you upload a MADC file with unfixed AlleleIDs. For BI-supported marker panel **species** this file is already embedded within the app, no upload is required.
+
+The function has some parameters that allow you to customize resource usage, the stringency of the alignment, and tag filtering:
+
+- `Number of CPU Cores` to specify the number of cores to use for the alignment. By default, it uses a single core.
+
+Click on `Advanced Options` to see more parameters:
-* **Specie**
+- `Alignment Score Threshold` to specify the minimum alignment score for a SNP to be called. By default, it is set to 40, which means that only tags (sequences) with an alignment score of 40 or higher will be included in the output VCF file.
+- `Remove Multiallelic SNPs` to specify if you want to remove SNPs with more than two variations (e.g. Ref A and Alts C and T). By default, it is set to TRUE, which means that multiallelic SNPs will be removed.
+If set to FALSE, multiallelic SNPs will be kept in the output VCF file but you can specify filter thresholds:
+ - `Multiallelic SNP Depth Threshold` specify the minimum depth by tag threshold for filtering low-frequency SNP alleles. Default is 0
+ - `Multiallelic SNP Sample Threshold` specify the minimum number of samples threshold for filtering low-frequency SNP alleles. Default is 0.
+- `Add Others` to specify if you want to consider SNPs in the "Other" alleles (e.g. `Chr01_000084128|Other_0003`) for the output VCF file. By default, it is set to FALSE, which means that "Other" alleles will be discarded.
+If set to TRUE, "Other" alleles will be included and you can define some filters:
+ - `Others Max SNPs` to specify the maximum number of SNPs allowed in the "Other" tags alleles. Default is 5. If an "Other" allele has more than this number of SNPs, it will be discarded.
+ - `Remove Others with Indels` to specify if you want to remove "Other" alleles with indels. By default, it is set to TRUE, which means that "Other" alleles with indels will be removed.
-Specify the species so BIGapp can pick the correct `.botloci` file. BI-supported species files live in the [BIGapp-PanelHub](https://github.com/Breeding-Insight/BIGapp-PanelHub) repo and are auto-loaded by the app.
+`madc2vcf_all` doesn't run if:
-You can also input **multiple MADC files**. BIGapp will merge the files before the conversion. If you have repeated samples IDs, a suffix with the run ID (beginning of the file name) will be added to the repeated samples.
+ - MADC Column names are not correct
+ - If it is raw MADC
+ - If it has IUPAC codes
-Conversion is powered by the `madc2vcf_targets` and `madc2vcf_all` functions in the [BIGr](https://github.com/Breeding-Insight/BIGr) R package. You can also run them on the command line to access advanced options (e.g., multiallelic SNP support or custom alignment thresholds).
+See the table for `madc2vcf_all` requirements according to MADC content:
-* **DArTag Dosage Report**
+
+
+ |
+ Check status |
+ Requires |
+
+
+ | Indels |
+ detected |
+ markers_info REF/ALT/IndelPos/IndelLength + botloci |
+
+
+ | not detected |
+ botloci |
+
+
+ | CloneID has Chr_Pos format |
+ detected |
+ botloci |
+
+
+ | not detected |
+ markers_info CHR/POS + botloci |
+
+
+ | All Ref tags have corresponding Alt |
+ detected |
+ botloci |
+
+
+ | not detected |
+ botloci + microhapdb |
+
+
-The DArT Dosage Report is a tab-separated file provided by DArT from a sequencing project. It contains genotypic information for each target marker across all samples in the sequencing project.
+
-File Structure:
+#### Microhaplotypes
-* Markers are in rows, and samples are in columns.
-* The file includes several summary metric columns before the sample dosage columns.
-* Dosage values represent the number of alternative alleles present in each sample, ranging from 0 to the species’ ploidy number.
+To identify **microhaplotypes**, BIGapp looks for multiple polymorphisms within the same amplicon. This process requires:
-Dosage Interpretation (Example: Tetraploid Species - 4x)
+ - Reference & alternative sequences (e.g. `Ref_0001`, `Alt_0002`) for every tag
+ - At least two polymorphic sites within the same amplicon
+ - Tags with fewer than two variants are excluded.
+ - `.botloci` file
+ - Specifies which tags must be reverse-complemented. For BI-supported species, this file is bundled within the app—no upload needed.
+ - If your MADC contains indels as target, the markers information file (`_lut.csv`) with chromosome, position, REF and ALT bases, indel length and indel type (insertion or deletion) for each target SNP. This file is generated by HapApp when you upload a MADC file with unfixed AlleleIDs. For BI-supported marker panel **species** this file is already embedded within the app, no upload is required.
-* 4 → Homozygous for the reference allele (AAAA)
-* 3 → Heterozygous with three reference alleles and one alternative (AAAB)
-* 2 → Heterozygous with two reference and two alternative alleles (AABB)
-* 1 → Heterozygous with one reference allele and three alternative (ABBB)
-* 0 → Homozygous for the alternative allele (BBBB)
+`madc2vcf_multi` doesn’t run if:
-This codification is commonly used as input for various software, including AGHmatrix, rrBLUP, and mappoly. However, some programs require not only dosage information but also read counts (see description below).
+ - MADC Column names are not correct
+ - If it is raw MADC
+ - If it has IUPAC codes
+ - if Ref or Alt tags are absent
-To accommodate this, BIGapp integrates both dosage and read count data, generating a VCF file—the most widely accepted and standardized format for genotypic information, ensuring compatibility with a broad range of software.
+See the table for `madc2vcf_multi` requirements according to MADC content:
-
-* **DArTag Counts File**
+
+
+ |
+ Check status |
+ Requires |
+
+
+ | Indels |
+ detected |
+ botloci |
+
+
+ | not detected |
+ botloci |
+
+
+ | CloneID has Chr_Pos format |
+ detected |
+ botloci |
+
+
+ | not detected |
+ markers_info CHR/POS + botloci |
+
+
-The DArT counts file is a tab-separated file provided by DArT from a sequencing project. It contains the read count information for the reference and alternate allele at each target marker. The marker information is in the rows and the samples are in the columns. There are several informational columns that precede the sample columns. There are two versions of this file. The “collapsed counts” version contains the target markers that includes their multiallic read counts in their total counts. The “Counts” file contains the read counts for the target markers only (excluding the multiallelic read count information). BIGapp will merge the counts and the dosage information to generate a VCF file format —the most widely accepted and standardized format for genotypic information, ensuring compatibility with a broad range of software.
-
-* **Species Ploidy**
+
+
+### **Species**
+
+Specify the species so BIGapp can pick the correct `.botloci`, `_lut.csv`, and microhaplotype database file. BI-supported species files live in the [BIGapp-PanelHub](https://github.com/Breeding-Insight/BIGapp-PanelHub) repo and are auto-loaded by the app.
+
+### **Species Ploidy**
Specifies the ploidy level of the species. The current analysis supports both diploids and autopolyploids.
+
+### **Multiple MADC files**
+
+You can also input **multiple MADC files**. BIGapp will merge the files before the conversion. If you have repeated sample IDs, a suffix with the run ID (beginning of the file name) will be added to the repeated samples.
+
+Conversion is powered by the `madc2vcf_targets`, `madc2vcf_all`, and `madc2vcf_multi` functions in the [BIGr](https://github.com/Breeding-Insight/BIGr) R package.
diff --git a/man/polyRAD2vcf.Rd b/man/polyRAD2vcf.Rd
index 2bc0ec6..2efd427 100644
--- a/man/polyRAD2vcf.Rd
+++ b/man/polyRAD2vcf.Rd
@@ -2,25 +2,78 @@
% Please edit documentation in R/DosageCall_functions.R
\name{polyRAD2vcf}
\alias{polyRAD2vcf}
-\title{Function to convert polyRAD output to VCF}
+\title{Convert polyRAD Genotypes to a VCF File}
\usage{
polyRAD2vcf(geno, model, vcf_path, hindhe.obj, ploidy, output.file, session)
}
\arguments{
-\item{geno}{ToDo}
+\item{geno}{A marker-by-sample dosage matrix (or object coercible to a
+matrix) containing polyRAD genotype estimates. Row names must match the
+variant IDs in \code{vcf_path} (before allele suffixes), and columns
+correspond to samples.}
-\item{model}{ToDo}
+\item{model}{Character string indicating the polyRAD model used to fit the
+genotypes (e.g. \code{"Kriging"}, \code{"SampleDepth"}, etc.). This value
+is recorded in the VCF header for provenance.}
-\item{vcf_path}{ToDo}
+\item{vcf_path}{Path to the template VCF file used to provide variant
+metadata (CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, and original
+FORMAT fields).}
-\item{hindhe.obj}{ToDo}
+\item{hindhe.obj}{Object containing Hind/He values per locus (typically a
+matrix or data frame from polyRAD). Columns must correspond to alleles
+for each marker; the function computes the mean Hind/He per marker and
+adds it as an \code{INFO} field (\code{HH}) in the output VCF.}
-\item{ploidy}{ToDo}
+\item{ploidy}{Integer giving the ploidy level used for the polyRAD analysis.
+This is used to convert dosage values into genotype strings (e.g.
+\code{"0/1"}, \code{"1/1"}, \code{"0/0/1"}, etc.).}
-\item{output.file}{ToDo}
+\item{output.file}{Base name for the output VCF file (without extension).
+The function will append \code{".vcf"} to this name and write the
+resulting VCF to disk in the current working directory (unless a path
+is included).}
-\item{session}{ToDo}
+\item{session}{Optional Shiny \code{session} object. Currently included for
+compatibility with Shiny modules; not used internally in this function.}
+}
+\value{
+No value is returned. The function is called for its side effect of writing
+a VCF file to \code{paste0(output.file, ".vcf")}.
}
\description{
-Function to convert polyRAD output to VCF
+Takes genotype dosage estimates from polyRAD and appends them to an existing
+VCF, adding Hind/He summary statistics and custom FORMAT/INFO fields.
+The original variant metadata (CHROM, POS, REF, ALT, etc.) are preserved
+from a template VCF, and new polyRAD-derived fields are written for each
+sample and locus.
+}
+\details{
+The function:
+\itemize{
+\item Flips and formats polyRAD dosages using \code{\link[BIGr]{flip_dosage}}.
+\item Harmonizes marker IDs between \code{geno}, \code{hindhe.obj}, and
+the template VCF by stripping allele suffixes.
+\item Reads the template VCF with \code{\link[vcfR]{read.vcfR}} and
+updates the header to include:
+\itemize{
+\item \code{##INFO=} for Hind/He per locus.
+\item \code{##FORMAT=} for polyRAD-estimated genotypes
+(in allele-count space).
+\item \code{##FORMAT=} for the dosage count of reference
+alleles.
+\item Version tags for \code{polyRAD} and \code{BIGapp}.
+}
+\item Converts dosage values to genotype strings (\code{GT}-like,
+\code{"0/0"}, \code{"0/1"}, \code{"1/1"}, etc.) given the specified
+\code{ploidy}.
+\item Builds a new FORMAT field for each variant combining:
+\code{GTP}, \code{UD}, and the original FORMAT fields from the template.
+\item Writes the updated header and variant table to a VCF file.
+}
+
+Missing dosages are converted to a missing genotype string (e.g.
+\code{"./."} for diploids) and represented as \code{"."} in the dosage
+field. Variants are sorted by chromosome and numeric position before
+writing.
}