diff --git a/DESCRIPTION b/DESCRIPTION index 13190fc..91766e4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: BIGapp Title: Breeding Insight Genomics Shiny Application -Version: 1.6.0 +Version: 1.8.0 Authors@R: c( person(c("Alexander", "M."), "Sandercock", @@ -26,7 +26,7 @@ Description: This R shiny app provides a web-based user friendly way for researc License: Apache License (== 2.0) Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 Depends: R (>= 4.4.0) biocViews: Imports: diff --git a/R/app_server.R b/R/app_server.R index 50c4b7d..07b7663 100644 --- a/R/app_server.R +++ b/R/app_server.R @@ -10,7 +10,7 @@ app_server <- function(input, output, session) { # Your application server logic ##Add server configurations - options(shiny.maxRequestSize = 1000000 * 1024^2) # Set maximum upload size to 1000GB + options(shiny.maxRequestSize = 100000 * 1024^2) # Set maximum upload size to 100GB #shiny.maxRequestSize = 10000 * 1024^2; # 10 GB <- This is for a future limit when using BI's server remotely callModule(mod_DosageCall_server, diff --git a/R/app_ui.R b/R/app_ui.R index 55d43cb..b6605f8 100644 --- a/R/app_ui.R +++ b/R/app_ui.R @@ -89,8 +89,8 @@ app_ui <- function(request) { style = "display: flex; align-items: center;", # Align text and images horizontally div( style = "display: flex; flex-direction: column; margin-right: 15px; text-align: right;", - div("2025 Breeding Insight"), - div("Funded by USDA through Cornell University") + div("2026 Breeding Insight"), + div("Funded by USDA through UF|IFAS") ), div( a( @@ -99,6 +99,9 @@ app_ui <- function(request) { ), a( img(src = "www/cornell_seal_simple_web_b31b1b.png", height = "45px") + ), + a( + img(src = "www/IFAS.jpg", height = "45px") ) ) ), diff --git a/R/mod_Filtering.R b/R/mod_Filtering.R index b040996..fa68891 100644 --- a/R/mod_Filtering.R +++ b/R/mod_Filtering.R @@ -104,7 +104,7 @@ mod_Filtering_ui <- function(id){ progressBar(id = ns("pb_filter"), value = 0, status = "info", display_pct = TRUE, striped = TRUE, title = " ") ), # A placeholder for the download button. It will be rendered in the shinyalert modal. - uiOutput(ns("download_ui_placeholder")) + uiOutput(ns("download_ui_placeholder")) ) ) ) @@ -159,8 +159,8 @@ mod_Filtering_server <- function(input, output, session, parent_session){ # expand specific box updateBox(id = "VCF_Filtering_box", action = "toggle", session = parent_session) }) - - + + ## Advanced options popup #Default model choices advanced_options <- reactiveValues( @@ -168,7 +168,7 @@ mod_Filtering_server <- function(input, output, session, parent_session){ remove_list = NULL, remove_file = NULL ) - + #List the ped file name if previously uploaded output$uploaded_file_name <- renderText({ if (!is.null(advanced_options$remove_file)) { @@ -177,47 +177,47 @@ mod_Filtering_server <- function(input, output, session, parent_session){ "" # Return an empty string if no file has been uploaded } }) - + #Get list of sample names from VCF file observeEvent(input$updog_rdata, { #### VCF sanity check - checks <- vcf_sanity_check(input$updog_rdata$datapath, - max_markers = 16000, + checks <- vcf_sanity_check(input$updog_rdata$datapath, + max_markers = 16000, depth_support_fields = c("DP", "AD", "RA")) - + error_if_false <- c( "VCF_header", "VCF_columns", "unique_FORMAT", "GT", "samples", "chrom_info", "pos_info", "VCF_compressed", "allele_counts" ) - + error_if_true <- c( - "multiallelics", "phased_GT", + "multiallelics", "duplicated_samples", "duplicated_markers" ) - + warning_if_false <- c("ref_alt","max_markers") - - checks_result <- vcf_sanity_messages(checks, - error_if_false, - error_if_true, - warning_if_false = warning_if_false, + + checks_result <- vcf_sanity_messages(checks, + error_if_false, + error_if_true, + warning_if_false = warning_if_false, warning_if_true = NULL) - + print(checks) print(checks_result) if(checks_result) return() # Stop the analysis if checks fail ######### - - + + #populate preview_data preview_vcf <- read.vcfR(input$updog_rdata$datapath, verbose = FALSE, nrows = 1) - + #Get names advanced_options$sample_list <- names(data.frame(preview_vcf@gt, check.names=FALSE)[,-1]) - + rm(preview_vcf) }) - + #UI popup window for input observeEvent(input$advanced_options, { showModal(modalDialog( @@ -266,9 +266,9 @@ mod_Filtering_server <- function(input, output, session, parent_session){ ) )) }) - - - + + + #Close popup window when user "saves options" observeEvent(input$save_advanced_options, { #Only close the window if one of the options has been selected @@ -278,10 +278,10 @@ mod_Filtering_server <- function(input, output, session, parent_session){ advanced_options$remove_list <- input$remove_list advanced_options$remove_file <- input$remove_file # Save other inputs as needed - + removeModal() } - + }) #vcf @@ -391,7 +391,7 @@ mod_Filtering_server <- function(input, output, session, parent_session){ animation = TRUE ) } - + if (input$use_updog & updog_par) { # Use Updog filtering parameters OD_filter <- as.numeric(input$OD_filter) @@ -444,17 +444,17 @@ mod_Filtering_server <- function(input, output, session, parent_session){ filtering_files$raw_sample_miss_df <- as.numeric(colMeans(is.na(gt_matrix))) #Sample missing values rm(gt_matrix) #Remove gt matrix - + #Remove the samples if any are manually selected from advanced options if (!is.null(advanced_options$remove_list)) { advanced_options$remove_file <- NULL #Prioritize manually selected samples if a file was also uploaded (add a user warning if both are uploaded in model) - + vcf_temp <- subset_vcf(vcf, remove.sample.list = advanced_options$remove_list) vcf <- vcf_temp[[1]] removed_samples <- vcf_temp[[2]] rm(vcf_temp) } else if (!is.null(advanced_options$remove_file)) { - + #Remove the samples vcf_temp <- subset_vcf(vcf, remove.sample.file = advanced_options$remove_file$datapath) vcf <- vcf_temp[[1]] @@ -525,7 +525,7 @@ mod_Filtering_server <- function(input, output, session, parent_session){ sample_removed <- length(starting_samples) - length(final_samples) removed_names <- setdiff(starting_samples, final_samples) filtering_files$removed_names <- removed_names - + # Define the download handler output$download_removed_samples <- downloadHandler( filename = function() { @@ -537,7 +537,7 @@ mod_Filtering_server <- function(input, output, session, parent_session){ } } ) - + if (sample_removed > 0 && removed_samples == 0) { showModal(modalDialog( title = "Samples Filtered", @@ -1001,7 +1001,7 @@ mod_Filtering_server <- function(input, output, session, parent_session){ writeLines(paste(capture.output(filtering_summary_info()), collapse = "\n"), file) } ) - + } ## To be copied in the UI diff --git a/R/mod_GS.R b/R/mod_GS.R index 7367186..dcc3b29 100644 --- a/R/mod_GS.R +++ b/R/mod_GS.R @@ -140,7 +140,7 @@ mod_GS_ui <- function(id) { #' @noRd mod_GS_server <- function(input, output, session, parent_session) { ns <- session$ns - + # Help links observeEvent(input$goPar, { # change to help tab @@ -148,7 +148,7 @@ mod_GS_server <- function(input, output, session, parent_session) { session = parent_session, inputId = "MainMenu", selected = "help" ) - + # select specific tab updateTabsetPanel( session = parent_session, inputId = "Genomic_Prediction_tabset", @@ -157,14 +157,14 @@ mod_GS_server <- function(input, output, session, parent_session) { # expand specific box updateBox(id = "Genomic_Prediction_box", action = "toggle", session = parent_session) }) - + observeEvent(input$goRes, { # change to help tab updatebs4TabItems( session = parent_session, inputId = "MainMenu", selected = "help" ) - + # select specific tab updateTabsetPanel( session = parent_session, inputId = "Genomic_Prediction_tabset", @@ -173,14 +173,14 @@ mod_GS_server <- function(input, output, session, parent_session) { # expand specific box updateBox(id = "Genomic_Prediction_box", action = "toggle", session = parent_session) }) - + observeEvent(input$goCite, { # change to help tab updatebs4TabItems( session = parent_session, inputId = "MainMenu", selected = "help" ) - + # select specific tab updateTabsetPanel( session = parent_session, inputId = "Genomic_Prediction_tabset", @@ -189,7 +189,7 @@ mod_GS_server <- function(input, output, session, parent_session) { # expand specific box updateBox(id = "Genomic_Prediction_box", action = "toggle", session = parent_session) }) - + # Default model choices advanced_options_pred <- reactiveValues( pred_model = "GBLUP", @@ -197,14 +197,14 @@ mod_GS_server <- function(input, output, session, parent_session) { pred_est_file = NULL, ped_file = NULL ) - + pred_outputs <- reactiveValues( corr_output = NULL, comb_output = NULL, all_GEBVs = NULL, avg_GEBVs = NULL ) - + # List the ped file name if previously uploaded output$uploaded_file_name <- renderText({ if (!is.null(advanced_options_pred$ped_file)) { @@ -213,7 +213,7 @@ mod_GS_server <- function(input, output, session, parent_session) { "" # Return an empty string if no file has been uploaded } }) - + output$uploaded_file_name_pred <- renderText({ if (!is.null(advanced_options_pred$pred_est_file)) { paste("Previously uploaded file:", advanced_options_pred$pred_est_file$name) @@ -221,9 +221,9 @@ mod_GS_server <- function(input, output, session, parent_session) { "" # Return an empty string if no file has been uploaded } }) - + ## Trait file modal window - + # Default choices trait_options <- reactiveValues( missing_data = "NA", @@ -231,14 +231,14 @@ mod_GS_server <- function(input, output, session, parent_session) { sample_column = NULL, file_type = NULL ) - + # UI popup window for input observeEvent(input$pred_trait_file, { req(input$pred_trait_file) # Get the column names of the csv file info_df <- read.csv(input$pred_trait_file$datapath, header = TRUE, check.names = FALSE, nrows = 2) info_df[, 1] <- as.character(info_df[, 1]) # Makes sure that the sample names are characters instead of numeric - + # Read first 5 rows for preview preview_data <- tryCatch( { @@ -248,7 +248,7 @@ mod_GS_server <- function(input, output, session, parent_session) { NULL } ) - + showModal(modalDialog( title = "Trait File Options", size = "l", @@ -301,14 +301,14 @@ mod_GS_server <- function(input, output, session, parent_session) { actionButton(ns("save_trait_options"), "Save") ) )) - + # Render the preview table output$file_preview <- renderTable({ req(preview_data) preview_data }) }) - + output$custom_missing_msg <- renderText({ if (input$missing_data == "Custom" && nchar(input$custom_missing) == 0) { "Please enter a custom missing value." @@ -316,8 +316,8 @@ mod_GS_server <- function(input, output, session, parent_session) { "" } }) - - + + # Close popup window when user "saves options" observeEvent(input$save_trait_options, { trait_options$missing_data <- input$missing_data @@ -325,7 +325,7 @@ mod_GS_server <- function(input, output, session, parent_session) { trait_options$sample_column <- input$sample_column # trait_options$file_type # Save other inputs as needed - + if (input$missing_data == "Custom" && nchar(input$custom_missing) == 0) { # Validation failed: display warning and prevent modal closure showNotification( @@ -335,10 +335,10 @@ mod_GS_server <- function(input, output, session, parent_session) { ) return() # Stop further execution and keep the modal open } - + removeModal() # Close the modal after saving }) - + # UI popup window for input observeEvent(input$advanced_options_pred, { showModal(modalDialog( @@ -387,9 +387,7 @@ mod_GS_server <- function(input, output, session, parent_session) { ) )) }) - - - + # Close popup window when user "saves options" observeEvent(input$save_advanced_options_pred, { advanced_options_pred$pred_model <- input$pred_model @@ -397,17 +395,17 @@ mod_GS_server <- function(input, output, session, parent_session) { advanced_options_pred$pred_est_file <- input$pred_est_file advanced_options_pred$ped_file <- input$ped_file # Save other inputs as needed - + removeModal() # Close the modal after saving }) - + ### Genomic Prediction # This tab involved 3 observeEvents # 1) to get the traits listed in the phenotype file # 2) to input and validate the input files # 3) to perform the genomic prediction - - + + # 1) Get traits observeEvent(input$save_trait_options, { info_df2 <- read.csv(input$pred_trait_file$datapath, header = TRUE, check.names = FALSE, nrow = 0) @@ -416,11 +414,11 @@ mod_GS_server <- function(input, output, session, parent_session) { updateVirtualSelect("pred_fixed_info2", choices = trait_var2, session = session) updateVirtualSelect("pred_trait_info2", choices = trait_var2, session = session) }) - + observeEvent(input$pred_fixed_info2, { updateVirtualSelect("pred_fixed_cat2", choices = input$pred_fixed_info2, session = session) }) - + # 2) Error check for prediction and save input files continue_prediction2 <- reactiveVal(NULL) pred_inputs2 <- reactiveValues( @@ -432,7 +430,7 @@ mod_GS_server <- function(input, output, session, parent_session) { pred_geno_pheno = NULL, matrix = NULL ) - + pred_outputs2 <- reactiveValues( corr_output = NULL, box_plot = NULL, @@ -443,7 +441,7 @@ mod_GS_server <- function(input, output, session, parent_session) { colors = NULL, trait_output = NULL ) - + # Reactive boxes output$shared_snps <- renderValueBox({ valueBox( @@ -453,12 +451,12 @@ mod_GS_server <- function(input, output, session, parent_session) { color = "info" ) }) - + observeEvent(input$prediction_est_start, { # req(pred_inputs$pheno_input, pred_inputs$geno_input) - + toggleClass(id = "pred_est_ploidy", class = "borderred", condition = (is.na(input$pred_est_ploidy) | is.null(input$pred_est_ploidy))) - + if (is.null(input$pred_known_file$datapath) | is.null(input$pred_trait_file$datapath)) { shinyalert( title = "Missing input!", @@ -476,16 +474,16 @@ mod_GS_server <- function(input, output, session, parent_session) { ) } req(input$pred_known_file$datapath, input$pred_trait_file$datapath, input$pred_est_ploidy) - + # Status updateProgressBar(session = session, id = "pb_gp", value = 5, title = "Checking input files") - + # Variables ploidy <- as.numeric(input$pred_est_ploidy) train_geno_path <- input$pred_known_file$datapath est_geno_path <- input$pred_est_file$datapath traits <- input$pred_trait_info2 - + # Phenotypic variables if (trait_options$missing_data == "(blank)") { pheno2 <- read.csv(input$pred_trait_file$datapath, header = TRUE, check.names = FALSE, na.strings = "") @@ -494,11 +492,11 @@ mod_GS_server <- function(input, output, session, parent_session) { } else { pheno2 <- read.csv(input$pred_trait_file$datapath, header = TRUE, check.names = FALSE, na.strings = trait_options$missing_data) } - + # Make the sample ID column the first column in the dataframe sample_col_name <- input$sample_column pheno2 <- pheno2[, c(sample_col_name, setdiff(names(pheno2), sample_col_name))] - + # Add row names and catch errors tryCatch( { @@ -521,12 +519,12 @@ mod_GS_server <- function(input, output, session, parent_session) { return(NULL) # Return NULL to prevent further processing } ) - + # Assigning the IDs based on user input for column 1 ids_pheno <- pheno2[, 1] - - - + + + # Make sure at least one trait was input if (length(traits) == 0) { # If condition is met, show notification toast @@ -545,23 +543,23 @@ mod_GS_server <- function(input, output, session, parent_session) { imageUrl = "", animation = TRUE, ) - - + + # Stop the observeEvent gracefully return() } - - + + # Getting genotype matrix - + # Geno file path file_path <- train_geno_path - + # Geno.file conversion if needed if (grepl("\\.csv$", file_path)) { train_geno <- read.csv(train_geno_path, header = TRUE, row.names = 1, check.names = FALSE) est_geno <- read.csv(est_geno_path, header = TRUE, row.names = 1, check.names = FALSE) - + # Save number of SNPs # pred_inputs$pred_snps <- nrow(geno) } else if (grepl("\\.vcf$", file_path) || grepl("\\.gz$", file_path)) { @@ -578,32 +576,32 @@ mod_GS_server <- function(input, output, session, parent_session) { } }) } - + #### VCF sanity check checks <- vcf_sanity_check(train_geno_path) - + error_if_false <- c( "VCF_header", "VCF_columns", "unique_FORMAT", "GT", "samples", "VCF_compressed" ) - + error_if_true <- c( - "multiallelics", "phased_GT", "mixed_ploidies", + "multiallelics", "mixed_ploidies", "duplicated_samples", "duplicated_markers" ) - + warning_if_false <- c("chrom_info", "pos_info", "ref_alt","max_markers") - checks_result <- vcf_sanity_messages(checks, - error_if_false, - error_if_true, - warning_if_false = warning_if_false, + checks_result <- vcf_sanity_messages(checks, + error_if_false, + error_if_true, + warning_if_false = warning_if_false, warning_if_true = NULL, input_ploidy = as.numeric(input$pred_est_ploidy)) - + if(checks_result) return() # Stop the analysis if checks fail ######### - + # Convert VCF file if submitted train_vcf <- vcfR::read.vcfR(train_geno_path, verbose = FALSE) if (is.null(est_geno_path) || is.na(est_geno_path)) { @@ -611,21 +609,21 @@ mod_GS_server <- function(input, output, session, parent_session) { } else { ## Check VCF checks <- vcf_sanity_check(est_geno_path) - checks_result <- vcf_sanity_messages(checks, - error_if_false, - error_if_true, - warning_if_false = warning_if_false, + checks_result <- vcf_sanity_messages(checks, + error_if_false, + error_if_true, + warning_if_false = warning_if_false, warning_if_true = NULL, as.numeric(input$pred_est_ploidy)) - + if(checks_result) return() # Stop the analysis if checks fail - + est_vcf <- vcfR::read.vcfR(est_geno_path, verbose = FALSE) } - + # Get number of SNPs # pred_inputs$pred_snps <- nrow(vcf) - + # Extract GT if (is.null(est_vcf)) { est_geno <- NULL @@ -656,11 +654,11 @@ mod_GS_server <- function(input, output, session, parent_session) { imageUrl = "", animation = TRUE, ) - + # Stop the analysis return() } - + # Check that the ploidy entered is correct if (ploidy != max(train_geno, na.rm = TRUE)) { # If condition is met, show notification toast @@ -681,13 +679,13 @@ mod_GS_server <- function(input, output, session, parent_session) { imageUrl = "", animation = TRUE ) - - + + # Stop the observeEvent gracefully # return() } - - + + # Function to convert genotype matrix according to ploidy #convert_genotype <- function(genotype_matrix, ploidy) { @@ -705,7 +703,7 @@ mod_GS_server <- function(input, output, session, parent_session) { #est_geno_adj_init <- convert_genotype(est_geno, as.numeric(ploidy)) est_geno_adj_init <- est_geno } - + # Make sure the trait file and genotype file are in the same order # Column names for geno (assuming these are the individual IDs) colnames_geno <- colnames(train_geno) @@ -715,7 +713,7 @@ mod_GS_server <- function(input, output, session, parent_session) { common_ids <- intersect(colnames_geno, ids_pheno) # Get number of id pred_inputs2$pred_geno_pheno <- length(common_ids) - + # Throw an error if there are less matching samples in the phenotype file than the genotype file if (length(common_ids) == 0) { # If condition is met, show notification toast @@ -734,8 +732,8 @@ mod_GS_server <- function(input, output, session, parent_session) { imageUrl = "", animation = TRUE, ) - - + + # Stop the observeEvent gracefully return() } else if (length(common_ids) < length(colnames_geno)) { @@ -757,15 +755,15 @@ mod_GS_server <- function(input, output, session, parent_session) { imageUrl = "", animation = TRUE ) - - + + # Stop the observeEvent gracefully # return() } - - - - + + + + # Final check before performing analyses shinyalert( title = "Ready?", @@ -793,15 +791,15 @@ mod_GS_server <- function(input, output, session, parent_session) { } } ) - + # Status updateProgressBar(session = session, id = "pb_gp", value = 40, title = "Generating Matrices") - + # Create relationship matrix depending on the input VCF files if (is.null(advanced_options_pred$pred_est_file)) { # Subset phenotype file by common IDs pheno2 <- pheno2[common_ids, ] - + #Matrix #Kin_mat <- A.mat(t(train_geno_adj_init), max.missing=NULL,impute.method="mean",return.imputed=FALSE) Kin_mat <- Gmatrix(t(train_geno_adj_init), @@ -814,7 +812,7 @@ mod_GS_server <- function(input, output, session, parent_session) { } else{ #Subset phenotype file and training file by common IDs pheno2 <- pheno2[common_ids, ] - + # Match training and testing genotype file SNPs common_markers <- intersect(rownames(train_geno_adj_init), rownames(est_geno_adj_init)) # first remove samples from training genotype matrix that are not in the phenotype file @@ -833,22 +831,22 @@ mod_GS_server <- function(input, output, session, parent_session) { missingValue = "NA") } - + # Save to reactive values # pred_inputs2$shared_snps <- length(common_markers) pred_inputs2$matrix <- Kin_mat pred_inputs2$pheno_input <- pheno2 }) - + # 3) Analysis only proceeds once continue_prediction is converted to TRUE observe({ req(continue_prediction2(), pred_inputs2$pheno_input, pred_inputs2$matrix) - + # Stop analysis if cancel was selected if (isFALSE(continue_prediction2())) { return() } - + # Variables ploidy <- as.numeric(input$pred_est_ploidy) gmat <- pred_inputs2$matrix @@ -872,16 +870,16 @@ mod_GS_server <- function(input, output, session, parent_session) { cores <- 1 total_population <- ncol(gmat) # train_size <- floor(percentage / 100 * total_population) - + # Status updateProgressBar(session = session, id = "pb_gp", value = 90, title = "Generating Results") - + # initialize dataframe results_GEBVs <- matrix(nrow = ncol(gmat), ncol = length(traits) + 1) results_TRAITs <- matrix(nrow = ncol(gmat), ncol = length(traits) + 1) colnames(results_TRAITs) <- c("Sample", paste0(traits)) # Set the column names to be the traits colnames(results_GEBVs) <- c("Sample", paste0(traits)) # Set the column names to be the traits - + # GBLUP for each trait for (trait_idx in 1:length(traits)) { traitpred <- kin.blup( @@ -893,24 +891,24 @@ mod_GS_server <- function(input, output, session, parent_session) { K = gmat, n.core = cores ) - + results_GEBVs[, (trait_idx + 1)] <- traitpred$g results_TRAITs[, (trait_idx + 1)] <- traitpred$pred } # Organize dataframes results_GEBVs[, 1] <- row.names(data.frame(traitpred$g)) results_TRAITs[, 1] <- row.names(data.frame(traitpred$pred)) - + # Label the samples that already had phenotype information results_GEBVs <- data.frame(results_GEBVs) results_TRAITs <- data.frame(results_TRAITs) exists_in_df <- results_GEBVs[[1]] %in% pheno2[[1]] results_GEBVs <- cbind(results_GEBVs[1], "w/Pheno" = exists_in_df, results_GEBVs[-1]) results_TRAITs <- cbind(results_TRAITs[1], "w/Pheno" = exists_in_df, results_TRAITs[-1]) - + # Status updateProgressBar(session = session, id = "pb_gp", value = 100, title = "Finished!") - + ## Make output tables depending on 1 or 2 VCF/pedigree files used. # GEBVs if (!is.null(advanced_options_pred$pred_est_file)) { @@ -919,7 +917,7 @@ mod_GS_server <- function(input, output, session, parent_session) { } else { pred_outputs2$all_GEBVs <- results_GEBVs } - + # BLUPs of genetic values if (!is.null(advanced_options_pred$pred_est_file)) { # Subset rows where 'w/Pheno' is FALSE and drop the 'w/Pheno' column @@ -927,11 +925,11 @@ mod_GS_server <- function(input, output, session, parent_session) { } else { pred_outputs2$trait_output <- results_TRAITs } - + # End the event continue_prediction2(NULL) }) - + # Output the prediction tables all_GEBVs <- reactive({ validate( @@ -939,7 +937,7 @@ mod_GS_server <- function(input, output, session, parent_session) { ) pred_outputs2$all_GEBVs }) - + # GEBVs from all iterations/folds output$pred_gebvs_table2 <- renderDT( { @@ -947,14 +945,14 @@ mod_GS_server <- function(input, output, session, parent_session) { }, options = list(scrollX = TRUE, autoWidth = FALSE, pageLength = 5) ) - + trait_output <- reactive({ validate( need(!is.null(pred_outputs2$trait_output), "Upload the input files, set the parameters and click 'run analysis' to access results in this session.") ) pred_outputs2$trait_output }) - + # GEBVs from all iterations/folds output$pred_trait_table <- renderDT( { @@ -962,7 +960,7 @@ mod_GS_server <- function(input, output, session, parent_session) { }, options = list(scrollX = TRUE, autoWidth = FALSE, pageLength = 5) ) - + output$download_vcft <- downloadHandler( filename = function() { paste0("BIGapp_Training_VCF_Example_file.vcf.gz") @@ -972,7 +970,7 @@ mod_GS_server <- function(input, output, session, parent_session) { file.copy(ex, file) } ) - + output$download_vcfp <- downloadHandler( filename = function() { paste0("BIGapp_Predict_VCF_Example_file.vcf") @@ -982,7 +980,7 @@ mod_GS_server <- function(input, output, session, parent_session) { file.copy(ex, file) } ) - + output$download_pheno <- downloadHandler( filename = function() { paste0("BIGapp_passport_Example_file.csv") @@ -992,7 +990,7 @@ mod_GS_server <- function(input, output, session, parent_session) { file.copy(ex, file) } ) - + # Download files for GP output$download_pred_results_file <- downloadHandler( filename = function() { @@ -1002,26 +1000,26 @@ mod_GS_server <- function(input, output, session, parent_session) { # Temporary files list temp_dir <- tempdir() temp_files <- c() - + # Create a temporary file for data frames ebv_file <- file.path(temp_dir, paste0("GS-EBV-predictions-", Sys.Date(), ".csv")) write.csv(pred_outputs2$all_GEBVs, ebv_file, row.names = FALSE) temp_files <- c(temp_files, ebv_file) - + trait_file <- file.path(temp_dir, paste0("GS-Phenotype-predictions-", Sys.Date(), ".csv")) write.csv(pred_outputs2$trait_output, trait_file, row.names = FALSE) temp_files <- c(temp_files, trait_file) - + # Zip files only if there's something to zip if (length(temp_files) > 0) { zip(file, files = temp_files, extras = "-j") # Using -j to junk paths } - + # Optionally clean up file.remove(temp_files) } ) - + ## Summary Info pred_summary_info <- function() { # Handle possible NULL values for inputs @@ -1029,7 +1027,7 @@ mod_GS_server <- function(input, output, session, parent_session) { est_file_name <- if (!is.null(input$pred_est_file$name)) input$pred_est_file$name else "No file selected" passport_file_name <- if (!is.null(input$pred_trait_file$name)) input$pred_trait_file$name else "No file selected" selected_ploidy <- if (!is.null(input$pred_est_ploidy)) as.character(input$pred_est_ploidy) else "Not selected" - + # Print the summary information cat( "BIGapp Selection Summary\n", @@ -1063,7 +1061,7 @@ mod_GS_server <- function(input, output, session, parent_session) { sep = "" ) } - + # Popup for analysis summary observeEvent(input$pred_summary, { showModal(modalDialog( @@ -1079,8 +1077,8 @@ mod_GS_server <- function(input, output, session, parent_session) { ) )) }) - - + + # Download Summary Info output$download_pred_info <- downloadHandler( filename = function() { diff --git a/R/mod_GSAcc.R b/R/mod_GSAcc.R index df9ab30..da246c2 100644 --- a/R/mod_GSAcc.R +++ b/R/mod_GSAcc.R @@ -257,9 +257,9 @@ mod_GSAcc_server <- function(input, output, session, parent_session){ removeModal() # Close the modal after saving }) - + ## Trait file modal window - + #Default choices trait_options <- reactiveValues( missing_data = "NA", @@ -267,25 +267,25 @@ mod_GSAcc_server <- function(input, output, session, parent_session){ sample_column = NULL, file_type = NULL ) - + #UI popup window for input observeEvent(input$trait_file, { req(input$trait_file) #Get the column names of the csv file info_df <- read.csv(input$trait_file$datapath, header = TRUE, check.names = FALSE, nrows=2) info_df[,1] <- as.character(info_df[,1]) #Makes sure that the sample names are characters instead of numeric - + # Read first 5 rows for preview preview_data <- tryCatch({ head(read.csv(input$trait_file$datapath, nrows = 5, na.strings=trait_options$missing_data),5) }, error = function(e) { NULL }) - + showModal(modalDialog( title = "Trait File Options", size= "l", - + selectInput( inputId = ns('missing_data'), label = 'Missing Data Value', @@ -312,7 +312,7 @@ mod_GSAcc_server <- function(input, output, session, parent_session){ label = 'Sample ID Column', choices = colnames(info_df) ), - + if (!is.null(preview_data)) { div( h4( @@ -332,20 +332,20 @@ mod_GSAcc_server <- function(input, output, session, parent_session){ p("Could not load file preview.") ) }, - + footer = tagList( actionButton(ns("save_trait_options"), "Save") ) )) - + # Render the preview table output$file_preview <- renderTable({ req(preview_data) preview_data }) - + }) - + output$custom_missing_msg <- renderText({ if (input$missing_data == "Custom" && nchar(input$custom_missing) == 0) { "Please enter a custom missing value." @@ -353,8 +353,8 @@ mod_GSAcc_server <- function(input, output, session, parent_session){ "" } }) - - + + #Close popup window when user "saves options" observeEvent(input$save_trait_options, { trait_options$missing_data <- input$missing_data @@ -362,7 +362,7 @@ mod_GSAcc_server <- function(input, output, session, parent_session){ trait_options$sample_column <- input$sample_column #trait_options$file_type # Save other inputs as needed - + if (input$missing_data == "Custom" && nchar(input$custom_missing) == 0) { # Validation failed: display warning and prevent modal closure showNotification( @@ -372,7 +372,7 @@ mod_GSAcc_server <- function(input, output, session, parent_session){ ) return() # Stop further execution and keep the modal open } - + removeModal() # Close the modal after saving }) @@ -428,35 +428,35 @@ mod_GSAcc_server <- function(input, output, session, parent_session){ pred_inputs <- eventReactive(input$prediction_start,{ toggleClass(id = "pred_ploidy", class = "borderred", condition = (is.na(input$pred_ploidy) | is.null(input$pred_ploidy))) - + if (advanced_options$pred_matrix != "Amatrix"){ #### VCF sanity check checks <- vcf_sanity_check(input$pred_file$datapath) - + error_if_false <- c( "VCF_header", "VCF_columns", "unique_FORMAT", "GT", "samples", "VCF_compressed" ) - + error_if_true <- c( - "multiallelics", "phased_GT", "mixed_ploidies", + "multiallelics", "mixed_ploidies", "duplicated_samples", "duplicated_markers" ) - + warning_if_false <- c("chrom_info", "pos_info", "ref_alt","max_markers") - - checks_result <- vcf_sanity_messages(checks, - error_if_false, - error_if_true, - warning_if_false = warning_if_false, + + checks_result <- vcf_sanity_messages(checks, + error_if_false, + error_if_true, + warning_if_false = warning_if_false, warning_if_true = NULL, input_ploidy = as.numeric(input$pred_ploidy)) - + if(checks_result) return() # Stop the analysis if checks fail } ######### - - + + if(is.null(advanced_options$pred_matrix)) advanced_options$pred_matrix <- "none_selected" if (((is.null(input$pred_file$datapath) & advanced_options$pred_matrix != "Amatrix") | (is.null(advanced_options$ped_file$datapath) & advanced_options$pred_matrix == "Amatrix")) | @@ -489,11 +489,11 @@ mod_GSAcc_server <- function(input, output, session, parent_session){ } else { pheno <- read.csv(input$trait_file$datapath, header = TRUE, check.names = FALSE, na.strings = trait_options$missing_data) } - + # Make the sample ID column the first column in the dataframe sample_col_name <- input$sample_column pheno <- pheno[, c(sample_col_name, setdiff(names(pheno), sample_col_name))] - + # Add row names and catch errors tryCatch({ row.names(pheno) <- pheno[, 1] @@ -512,7 +512,7 @@ mod_GSAcc_server <- function(input, output, session, parent_session){ ) return(NULL) # Return NULL to prevent further processing }) - + # Assigning the IDs based on user input for column 1 ids_pheno <- pheno[, 1] @@ -810,36 +810,36 @@ mod_GSAcc_server <- function(input, output, session, parent_session){ #pred_outputs }) - + #Output the prediction tables - + all_GEBVs <- reactive({ validate( need(!is.null(pred_outputs$all_GEBVs), "Upload the input files, set the parameters and click 'run analysis' to access results in this session.") ) pred_outputs$comb_output }) - + output$pred_all_table <- renderDT({all_GEBVs()}, options = list(scrollX = TRUE,autoWidth = FALSE, pageLength = 5)) - + comb_output <- reactive({ validate( need(!is.null(pred_outputs$comb_output), "Upload the input files, set the parameters and click 'run analysis' to access results in this session.") ) - + #Adding the mean values to df prepare_df <- round(pred_outputs$comb_output, 4) trait_means <- round(colMeans(prepare_df), 4) new_row <- c("Mean", trait_means[-1]) new_df <- rbind(prepare_df, new_row) - + #exporting new_df - + }) - + output$pred_acc_table <- renderDT({comb_output()}, options = list(scrollX = TRUE,autoWidth = FALSE, pageLength = 5)) - + ##Plots plots <- reactive({ @@ -859,13 +859,13 @@ mod_GSAcc_server <- function(input, output, session, parent_session){ names_to = "Trait", values_to = "Correlation" ) - + # Determine dynamic y-axis lower bound based on the entire dataset min_val <- min(df_long$Correlation, na.rm = TRUE) y_axis_lower_bound <- if (min_val < 0) { # If there are negative values, start the axis slightly below the minimum # You can adjust the floor() logic for more/less padding, or just use min_val - floor(min_val * 10) / 10 + floor(min_val * 10) / 10 } else { 0 } diff --git a/R/mod_PCA.R b/R/mod_PCA.R index 3fc1c9d..87e50e9 100644 --- a/R/mod_PCA.R +++ b/R/mod_PCA.R @@ -168,7 +168,7 @@ mod_PCA_server <- function(input, output, session, parent_session){ # expand specific box updateBox(id = "PCA_box", action = "toggle", session = parent_session) }) - + #Default choices trait_options <- reactiveValues( missing_data = "NA", @@ -176,7 +176,7 @@ mod_PCA_server <- function(input, output, session, parent_session){ sample_column = NULL, file_type = NULL ) - + # Update dropdown menu choices based on uploaded passport file passport_table <- reactive({ validate( @@ -184,26 +184,26 @@ mod_PCA_server <- function(input, output, session, parent_session){ ) info_df <- read.csv(input$passport_file$datapath, header = TRUE, check.names = FALSE, na.strings=trait_options$missing_data) info_df[,1] <- as.character(info_df[,1]) #Makes sure that the sample names are characters instead of numeric - + updateSelectInput(session, "group_info", choices = colnames(info_df)) info_df }) - + #PCA specific category selection observeEvent(passport_table(), { #updateMaterialSwitch(session, inputId = "use_cat", status = "success") - + # Get selected column name selected_col <- input$group_info - + # Extract unique values from the selected column unique_values <- unique(passport_table()[[selected_col]]) - + #Add category selection updateVirtualSelect("cat_color", choices = unique_values, selected = unique_values, session = session) - + }) - + # PCA specific category selection. Update when group_info is changed. observeEvent(input$group_info, { req(passport_table()) # Ensure passport_table is available @@ -211,25 +211,25 @@ mod_PCA_server <- function(input, output, session, parent_session){ unique_values <- unique(passport_table()[[selected_col]]) updateVirtualSelect("cat_color", choices = unique_values, selected = unique_values, session = session) }) - + #UI popup window for input observeEvent(input$passport_file, { req(input$passport_file) #Get the column names of the csv file info_df <- read.csv(input$passport_file$datapath, header = TRUE, check.names = FALSE, nrows=2) info_df[,1] <- as.character(info_df[,1]) #Makes sure that the sample names are characters instead of numeric - + # Read first 5 rows for preview preview_data <- tryCatch({ head(read.csv(input$passport_file$datapath, nrows = 5, na.strings=trait_options$missing_data),5) }, error = function(e) { NULL }) - + showModal(modalDialog( title = "Trait File Options", size= "l", - + selectInput( inputId = ns('missing_data'), label = 'Missing Data Value', @@ -256,7 +256,7 @@ mod_PCA_server <- function(input, output, session, parent_session){ label = 'Sample ID Column', choices = colnames(info_df) ), - + if (!is.null(preview_data)) { div( h4( @@ -276,20 +276,20 @@ mod_PCA_server <- function(input, output, session, parent_session){ p("Could not load file preview.") ) }, - + footer = tagList( actionButton(ns("save_trait_options"), "Save") ) )) - + # Render the preview table output$file_preview <- renderTable({ req(preview_data) preview_data }) - + }) - + output$custom_missing_msg <- renderText({ if (input$missing_data == "Custom" && nchar(input$custom_missing) == 0) { "Please enter a custom missing value." @@ -297,8 +297,8 @@ mod_PCA_server <- function(input, output, session, parent_session){ "" } }) - - + + #Close popup window when user "saves options" observeEvent(input$save_trait_options, { trait_options$missing_data <- input$missing_data @@ -306,7 +306,7 @@ mod_PCA_server <- function(input, output, session, parent_session){ trait_options$sample_column <- input$sample_column #trait_options$file_type # Save other inputs as needed - + if (input$missing_data == "Custom" && nchar(input$custom_missing) == 0) { # Validation failed: display warning and prevent modal closure showNotification( @@ -316,7 +316,7 @@ mod_PCA_server <- function(input, output, session, parent_session){ ) return() # Stop further execution and keep the modal open } - + removeModal() # Close the modal after saving }) @@ -366,29 +366,29 @@ mod_PCA_server <- function(input, output, session, parent_session){ #### VCF sanity check checks <- vcf_sanity_check(geno) - + error_if_false <- c( "VCF_header", "VCF_columns", "unique_FORMAT", "GT", "samples", "VCF_compressed" ) - + error_if_true <- c( - "multiallelics", "phased_GT", "mixed_ploidies", + "multiallelics", "mixed_ploidies", "duplicated_samples", "duplicated_markers" ) - + warning_if_false <- c("chrom_info", "pos_info", "ref_alt", "max_markers") - - checks_result <- vcf_sanity_messages(checks, - error_if_false, - error_if_true, - warning_if_false = warning_if_false, + + checks_result <- vcf_sanity_messages(checks, + error_if_false, + error_if_true, + warning_if_false = warning_if_false, warning_if_true = NULL, input_ploidy = as.numeric(ploidy)) - + if(checks_result) return() # Stop the analysis if checks fail ######### - + #Import genotype information if in VCF format vcf <- read.vcfR(geno, verbose = FALSE) @@ -412,7 +412,7 @@ mod_PCA_server <- function(input, output, session, parent_session){ } #Start analysis following a genotype data check - + if (ncol(genomat) < 5){ shinyalert( title = "Small Genotype File", @@ -441,7 +441,7 @@ mod_PCA_server <- function(input, output, session, parent_session){ } else { info_df <- read.csv(input$passport_file$datapath, header = TRUE, check.names = FALSE, na.strings = trait_options$missing_data) } - + # Make the sample ID column the first column in the dataframe sample_col_name <- input$sample_column info_df <- info_df[, c(sample_col_name, setdiff(names(info_df), sample_col_name))] @@ -537,17 +537,17 @@ mod_PCA_server <- function(input, output, session, parent_session){ validate( need(!is.null(pca_data$pc_df_pop), "Input Genotype file, Species ploidy, and run the analysis to access results in this section.") ) - + df_plot <- pca_data$pc_df_pop group_var_name <- input$group_info pc_x_col <- input$pc_X pc_y_col <- input$pc_Y - + # Define default aesthetics for unselected categories label_to_value <- c("Light Grey" = "grey80", "Grey" = "grey60", "Dark Grey" = "grey40", "Black" = "black") default_color <- label_to_value[[input$grey_choice]] default_shape <- 16 # Solid circle (pch value) - + # --- Palette Generation (local to this reactive, named) --- local_my_palette <- NULL # This will be a named vector: category level -> color if (group_var_name != "" && !is.null(group_var_name) && !is.null(input$color_choice)) { @@ -556,18 +556,18 @@ mod_PCA_server <- function(input, output, session, parent_session){ shinyalert(title = "Error", text = paste("Group variable '", group_var_name, "' not found."), type = "error") return(NULL) } - + unique_group_vals_for_plot <- unique(as.character(df_plot[[group_var_name]])) num_levels_for_plot <- length(unique_group_vals_for_plot) - + if (num_levels_for_plot > 0) { max_brewer_colors <- tryCatch( RColorBrewer::brewer.pal.info[input$color_choice, "maxcolors"], error = function(e) 8 # Default max colors if palette info fails ) - + n_base_colors <- num_levels_for_plot # Number of colors needed from brewer.pal directly or via ramp - + # Adjust n for brewer.pal requirements (min 3 for most) if (num_levels_for_plot == 1) { base_palette_brewer <- RColorBrewer::brewer.pal(3, input$color_choice)[1] @@ -576,32 +576,32 @@ mod_PCA_server <- function(input, output, session, parent_session){ } else { base_palette_brewer <- RColorBrewer::brewer.pal(min(n_base_colors, max_brewer_colors), input$color_choice) } - + generated_palette_values <- grDevices::colorRampPalette(base_palette_brewer)(num_levels_for_plot) local_my_palette <- setNames(generated_palette_values, unique_group_vals_for_plot) } } - + # --- Base ggplot object --- plot_obj <- ggplot(df_plot, aes(x = .data[[pc_x_col]], y = .data[[pc_y_col]])) - + # --- Determine if color/shape are mapped to group or static --- map_color_to_group <- group_var_name != "" && !is.null(group_var_name) && !is.null(local_my_palette) map_shape_to_group <- map_color_to_group && input$use_shapes - + # Arguments for geom_point (static values if not mapped) geom_point_args <- list(size = 2.5, alpha = 0.8) - + if (map_color_to_group) { # Ensure group_var_name column is factor for consistent scale behavior df_plot[[group_var_name]] <- as.factor(df_plot[[group_var_name]]) all_levels <- levels(df_plot[[group_var_name]]) - + plot_obj <- plot_obj + aes(color = as.factor(.data[[group_var_name]])) # Map color aesthetic - + color_values_map <- setNames(rep(default_color, length(all_levels)), all_levels) categories_to_colorize <- input$cat_color # User's selection from UI - + if (input$use_cat && length(categories_to_colorize) > 0) { for (cat_level in categories_to_colorize) { if (cat_level %in% names(local_my_palette)) { @@ -618,25 +618,25 @@ mod_PCA_server <- function(input, output, session, parent_session){ } } # If input$use_cat is TRUE but categories_to_colorize is empty, all remain default_color. - + plot_obj <- plot_obj + scale_color_manual( - name = group_var_name, - values = color_values_map, + name = group_var_name, + values = color_values_map, na.value = default_color # Handles explicit NAs in the grouping column ) } else { geom_point_args$color <- default_color # Set static color for all points } - + if (map_shape_to_group) { df_plot[[group_var_name]] <- as.factor(df_plot[[group_var_name]]) # Ensure it's factor all_levels <- levels(df_plot[[group_var_name]]) - + plot_obj <- plot_obj + aes(shape = .data[[group_var_name]]) # Map shape aesthetic - + shape_values_map <- setNames(rep(default_shape, length(all_levels)), all_levels) categories_to_shape <- input$cat_color # User's selection from UI for shaping - + # Check for shinyalert for too many shapes (based on categories selected for shaping) if (length(categories_to_shape) > 25) { # The shinyalert is defined in your original code, ensure its condition matches this logic @@ -650,23 +650,23 @@ mod_PCA_server <- function(input, output, session, parent_session){ type = "error", showConfirmButton = TRUE, confirmButtonText = "OK", - confirmButtonCol = "#004192", + confirmButtonCol = "#004192", showCancelButton = FALSE, animation = TRUE ) updateMaterialSwitch(session = session, inputId = "use_shapes", value = FALSE) # Use 'session' not 'parent_session' if in module - + # Cap the number of categories getting distinct shapes categories_to_shape_for_map <- categories_to_shape[1:25] } else { categories_to_shape_for_map <- categories_to_shape } - + available_custom_shapes <- c(0:24) # PCH values (0-14 are symbols, 15-20 are filled symbols, 21-24 are bordered) # Standard practice often uses 1:25, let's stick to that. available_custom_shapes <- c(1:25) - - + + shape_idx <- 1 for (cat_level in categories_to_shape_for_map) { if (cat_level %in% all_levels && shape_idx <= length(available_custom_shapes)) { @@ -676,24 +676,24 @@ mod_PCA_server <- function(input, output, session, parent_session){ } plot_obj <- plot_obj + scale_shape_manual( name = group_var_name, # Same name as color scale for potential legend merging - values = shape_values_map, + values = shape_values_map, na.value = default_shape # Handles explicit NAs ) } else { geom_point_args$shape <- default_shape # Set static shape for all points } - + # --- Add geom_point layer --- # geom_point_args list already has size, alpha, and conditionally color/shape if static plot_obj <- plot_obj + do.call(geom_point, geom_point_args) - + # --- Labels, Guides, and Theme --- plot_obj <- plot_obj + labs( x = paste0(pc_x_col, " (", pca_data$variance_explained[as.numeric(substr(pc_x_col, 3, 3))], "%)"), y = paste0(pc_y_col, " (", pca_data$variance_explained[as.numeric(substr(pc_y_col, 3, 3))], "%)") # Legend titles are taken from scale_manual 'name' argument ) - + # Configure guides (legends) guide_options <- list() if (map_color_to_group) { @@ -707,7 +707,7 @@ mod_PCA_server <- function(input, output, session, parent_session){ guide_options$shape <- "none" } plot_obj <- plot_obj + guides(!!!guide_options) - + plot_obj <- plot_obj + theme_minimal() + theme( panel.border = element_rect(color = "black", fill = NA), legend.text = element_text(size = 14), @@ -716,7 +716,7 @@ mod_PCA_server <- function(input, output, session, parent_session){ legend.title = element_text(size = 16), legend.position = "right" # Example position, adjust as needed ) - + return(plot_obj) }) @@ -916,7 +916,7 @@ mod_PCA_server <- function(input, output, session, parent_session){ ex <- system.file("iris_passport_file.csv", package = "BIGapp") file.copy(ex, file) }) - + output$download_pcs <- downloadHandler( filename = function() { paste0("PCs.csv") diff --git a/R/mod_dapc.R b/R/mod_dapc.R index 921bc39..ea3ae4f 100644 --- a/R/mod_dapc.R +++ b/R/mod_dapc.R @@ -205,29 +205,29 @@ mod_dapc_server <- function(input, output, session, parent_session){ #Import genotype information if in VCF format #### VCF sanity check checks <- vcf_sanity_check(geno) - + error_if_false <- c( "VCF_header", "VCF_columns", "unique_FORMAT", "GT", - "samples" + "samples", "VCF_compressed" ) - + error_if_true <- c( - "multiallelics", "phased_GT", "mixed_ploidies", + "multiallelics", "mixed_ploidies", "duplicated_samples", "duplicated_markers" ) - + warning_if_false <- c("chrom_info", "pos_info", "ref_alt") - - checks_result <- vcf_sanity_messages(checks, - error_if_false, - error_if_true, - warning_if_false = warning_if_false, + + checks_result <- vcf_sanity_messages(checks, + error_if_false, + error_if_true, + warning_if_false = warning_if_false, warning_if_true = NULL, input_ploidy = as.numeric(ploidy)) - + if(checks_result) return() # Stop the analysis if checks fail ######### - + vcf <- read.vcfR(geno) #Get items in FORMAT column @@ -297,29 +297,29 @@ mod_dapc_server <- function(input, output, session, parent_session){ #Import genotype information if in VCF format #### VCF sanity check checks <- vcf_sanity_check(geno) - + error_if_false <- c( "VCF_header", "VCF_columns", "unique_FORMAT", "GT", "samples", "VCF_compressed" ) - + error_if_true <- c( "multiallelics", "phased_GT", "mixed_ploidies", "duplicated_samples", "duplicated_markers" ) - + warning_if_false <- c("chrom_info", "pos_info", "ref_alt","max_markers") - - checks_result <- vcf_sanity_messages(checks, - error_if_false, - error_if_true, - warning_if_false = warning_if_false, + + checks_result <- vcf_sanity_messages(checks, + error_if_false, + error_if_true, + warning_if_false = warning_if_false, warning_if_true = NULL, input_ploidy = as.numeric(ploidy)) - + if(checks_result) return() # Stop the analysis if checks fail ######### - + vcf <- read.vcfR(geno) #Get items in FORMAT column @@ -405,9 +405,9 @@ mod_dapc_server <- function(input, output, session, parent_session){ posi.da = "topright", posi.pca="bottomright", mstree = F, # lines connecting clusters - lwd = 1, + lwd = 1, lty = 2, - legeng = F, + legeng = F, clabel = 1) # legend and label of legend clusters. clab 0 or 1 }) diff --git a/R/mod_diversity.R b/R/mod_diversity.R index a10c089..233eef3 100644 --- a/R/mod_diversity.R +++ b/R/mod_diversity.R @@ -143,14 +143,14 @@ mod_diversity_server <- function(input, output, session, parent_session){ # expand specific box updateBox(id = "Genomic_Diversity_box", action = "toggle", session = parent_session) }) - + ##UI text output$dosage_text <- renderUI({ # Check if input$plot_tabs is NULL before evaluating it if (is.null(input$diversity_plot_tabs)) { return(NULL) } - + # Render the text only for the "Dosage Plot" tab if (input$diversity_plot_tabs == "Dosage Plot" && !is.null(diversity_items$dosage_df)) { div( @@ -226,29 +226,29 @@ mod_diversity_server <- function(input, output, session, parent_session){ #Import genotype information if in VCF format #### VCF sanity check checks <- vcf_sanity_check(geno) - + error_if_false <- c( "VCF_header", "VCF_columns", "unique_FORMAT", "GT", "samples", "chrom_info", "pos_info", "VCF_compressed" ) - + error_if_true <- c( - "multiallelics", "phased_GT", "mixed_ploidies", + "multiallelics", "mixed_ploidies", "duplicated_samples", "duplicated_markers" ) - + warning_if_false <- c("ref_alt","max_markers") - - checks_result <- vcf_sanity_messages(checks, - error_if_false, - error_if_true, - warning_if_false = NULL, + + checks_result <- vcf_sanity_messages(checks, + error_if_false, + error_if_true, + warning_if_false = NULL, warning_if_true = NULL, input_ploidy = ploidy) - + if(checks_result) return() # Stop the analysis if checks fail ######### - + vcf <- read.vcfR(geno, verbose = FALSE) #Save position information diff --git a/R/mod_dosage2vcf.R b/R/mod_dosage2vcf.R index b168d4f..2194a1a 100644 --- a/R/mod_dosage2vcf.R +++ b/R/mod_dosage2vcf.R @@ -29,11 +29,11 @@ mod_dosage2vcf_ui <- function(id){ title = "Inputs", width=12, status = "info", solidHeader = TRUE, collapsible = FALSE, collapsed = FALSE, selectInput(ns('file_type'), label = 'Select File Format', - choices = c("DArT MADC file","DArT Dosage Reports", "Dosage Matrix"), + choices = c("DArT MADC file","DArT Dosage/SNP Report", "Dosage Matrix"), selected = "DArT MADC file"), - conditionalPanel(condition = "input.file_type == 'DArT Dosage Reports'", + conditionalPanel(condition = "input.file_type == 'DArT Dosage/SNP Report'", ns = ns, - fileInput(ns("report_file"), "Choose DArT Dose Report File", accept = c(".csv")), + fileInput(ns("report_file"), "Choose DArT Dose/SNP Report File", accept = c(".csv")), fileInput(ns("counts_file"), "Choose DArT Counts File", accept = c(".csv")), numericInput(ns("dosage2vcf_ploidy"), "Species Ploidy", min = 1, value = NULL) ), @@ -49,47 +49,60 @@ mod_dosage2vcf_ui <- function(id){ hr(), radioButtons(ns("snp_type"), label = "Select Marker Type", - choices = list("Target"= "target", "Target + Off-Target" = "target_off"), + choices = list("Target"= "target", "Target + Off-Target" = "target_off", "Microhaplotypes" = "multiallelic"), selected = "target"), selectInput(ns('species'), label = 'Species', - choices = c("alfalfa","blueberry", "cranberry", "cucumber", "lettuce", "pecan", "sweetpotato", "other"), + choices = c("alfalfa", "blueberry", "cranberry", "cucumber", "pecan", "potato", "strawberry", "sweetpotato", "other"), selected = "alfalfa"), conditionalPanel(condition = "input.snp_type == 'target_off'", ns = ns, conditionalPanel(condition = "input.species == 'other'", ns = ns, fileInput(ns("botloci_file"), "Upload bottom strand probes file (.botloci)"), + fileInput(ns("hapDB_file"), "Upload haplotype database file (fasta) (optional)"), + fileInput(ns("markers_info_file"), "Upload markers information (_lut.csv from HapApp) (optional)"), ), - fileInput(ns("hapDB_file"), "Upload haplotype database file (fasta) (optional)"), - sliderInput(ns("cores"), "Number of CPU Cores", min = 1, max = (availableCores() - 1), value = 1, step = 1) + sliderInput(ns("cores"), "Number of CPU Cores", min = 1, max = max(1, availableCores() - 1), value = 1, step = 1), br(), + div( + style = "text-align: left; margin-top: 10px;", + actionButton(ns("advanced_options_all"), + label = HTML(paste(icon("cog", style = "color: #007bff;"), "Advanced Options")), + style = "background-color: transparent; border: none; color: #007bff; font-size: smaller; text-decoration: underline; padding: 0;" + ) + ) ), conditionalPanel(condition = "input.snp_type == 'target'", ns = ns, + radioButtons(ns("collapse_matches_counts"), + label = "Collapse Matches Counts:", + choices = list("Yes"= TRUE, "No" = FALSE), + selected = FALSE), conditionalPanel(condition = "input.species == 'other'", ns = ns, radioButtons(ns("ref_alt"), label = "Extract REF and ALT info:", - choices = list("Yes"= "TRUE", "No" = "FALSE"), - selected = "TRUE"), + choices = list("Yes"= TRUE, "No" = FALSE), + selected = TRUE), conditionalPanel(condition = "input.ref_alt == 'TRUE'", ns = ns, fileInput(ns("botloci_file"), "Upload bottom strand probes file (.botloci)"), + fileInput(ns("hapDB_file"), "Upload haplotype database file (fasta) (optional)"), + fileInput(ns("markers_info_file"), "Upload markers information (_lut.csv from HapApp) (optional)"), ) ) ) ), hr(), - textInput(ns("d2v_output_name"), "Output File Name"), + textInput(ns("d2v_output_name"), "Output File Name", placeholder = "e.g. my_output"), hr(), - actionButton(ns("run_analysis"), "Convert File"), + actionButton(ns("run_analysis"), "Convert File", width = "100%"), br(), br(), uiOutput(ns('mybutton')), - + div(style="display:inline-block; float:right",dropdownButton( HTML("Input files"), - p(downloadButton(ns('download_dose'), ""), "Dose Report Example File"), - p(downloadButton(ns('download_counts'), ""), "Counts Example File"), - p(downloadButton(ns('download_madc'), ""), "MADC Example File"),hr(), + p(downloadButton(ns('download_madc_fixed'), ""), "Fixed Allele MADC Example File"), + p(downloadButton(ns('download_madc'), ""), "Raw MADC Example File"),hr(), p(HTML("Parameters description:"), actionButton(ns("goPar"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(), p(HTML("Results description:"), actionButton(ns("goRes"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(), p(HTML("How to cite:"), actionButton(ns("goCite"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(), @@ -101,12 +114,17 @@ mod_dosage2vcf_ui <- function(id){ )) ) ), - column(width = 4, - box(title = "Status", width = 12, collapsible = TRUE, status = "info", - progressBar(id = ns("dosage2vcf_pb"), value = 0, status = "info", display_pct = TRUE, striped = TRUE, title = " ") + column(width = 5, + box(title = "Status & Log", width = 12, collapsible = TRUE, status = "info", + progressBar(id = ns("dosage2vcf_pb"), value = 0, status = "info", display_pct = TRUE, striped = TRUE, title = " "), + hr(), + tags$style(HTML(paste0( + "#", ns("d2vcf_log"), " { max-height: 300px; overflow-y: auto; background: #1e1e1e !important; color: #d4d4d4; padding: 10px; border-radius: 4px; font-size: 12px; }" + ))), + verbatimTextOutput(ns("d2vcf_log")), + uiOutput(ns("download_d2vcf_log_btn")) ) - ), - column(width = 1), + ) ) ) ) @@ -120,80 +138,190 @@ mod_dosage2vcf_ui <- function(id){ #' #' @noRd mod_dosage2vcf_server <- function(input, output, session, parent_session){ - + ns <- session$ns - + + # Reactive value to accumulate log messages + d2vcf_log <- reactiveVal("") + + # Clear log when a new analysis is triggered + observeEvent(input$run_analysis, { + d2vcf_log("") + }, priority = 10) + + output$d2vcf_log <- renderText({ + d2vcf_log() + }) + + output$download_d2vcf_log_btn <- renderUI({ + req(nchar(d2vcf_log()) > 0) + downloadButton(ns("download_d2vcf_log"), "Download Log", style = "margin-top: 8px;") + }) + + output$download_d2vcf_log <- downloadHandler( + filename = function() { + paste0("dosage2vcf_log_", Sys.Date(), ".txt") + }, + content = function(file) { + writeLines(d2vcf_log(), file) + } + ) + # Help links observeEvent(input$goPar, { # change to help tab updatebs4TabItems(session = parent_session, inputId = "MainMenu", selected = "help") - + # select specific tab updateTabsetPanel(session = parent_session, inputId = "DArT_Report2VCF_tabset", selected = "DArT_Report2VCF_par") # expand specific box updateBox(id = "DArT_Report2VCF_box", action = "toggle", session = parent_session) }) - + observeEvent(input$goRes, { # change to help tab updatebs4TabItems(session = parent_session, inputId = "MainMenu", selected = "help") - + # select specific tab updateTabsetPanel(session = parent_session, inputId = "DArT_Report2VCF_tabset", selected = "DArT_Report2VCF_results") # expand specific box updateBox(id = "DArT_Report2VCF_box", action = "toggle", session = parent_session) }) - + observeEvent(input$goCite, { # change to help tab updatebs4TabItems(session = parent_session, inputId = "MainMenu", selected = "help") - + # select specific tab updateTabsetPanel(session = parent_session, inputId = "DArT_Report2VCF_tabset", selected = "DArT_Report2VCF_cite") # expand specific box updateBox(id = "DArT_Report2VCF_box", action = "toggle", session = parent_session) }) - + disable("download_d2vcf") - - output$download_dose <- downloadHandler( - filename = function() { - paste0("BIGapp_Dose_Report_Example_file.csv") - }, - content = function(file) { - ex <- system.file("iris_DArT_Allele_Dose_Report.csv", package = "BIGapp") - file.copy(ex, file) - }) - - output$download_counts <- downloadHandler( + + output$download_madc <- downloadHandler( filename = function() { - paste0("BIGapp_Counts_Example_file.csv") + paste0("BIGapp_MADC_Example_file.csv") }, content = function(file) { - ex <- system.file("iris_DArT_Counts.csv", package = "BIGapp") + ex <- system.file("iris_DArT_MADC.csv", package = "BIGapp") file.copy(ex, file) }) - - output$download_madc <- downloadHandler( + + output$download_madc_fixed <- downloadHandler( filename = function() { - paste0("BIGapp_MADC_Example_file.csv") + paste0("BIGapp_MADC_Fixed_Example_file.csv") }, content = function(file) { - ex <- system.file("iris_DArT_MADC.csv", package = "BIGapp") - file.copy(ex, file) + github_path <- "https://raw.githubusercontent.com/Breeding-Insight/BIGapp-PanelHub/refs/heads/long_seq/" + alfalfa_madc <- paste0(github_path, "test_madcs/alfalfa_madc.csv") + download.file(alfalfa_madc, destfile = file, mode = "wb") }) - - + + # Default model choices + advanced_options_all <- reactiveValues( + rm_multiallelic_SNP = TRUE, + multiallelic_SNP_dp_thr = 0, + multiallelic_SNP_sample_thr = 0, + alignment_score_thr = 40, + add_others = FALSE, + others_max_snps = 5, + others_rm_with_indels = TRUE + ) + + # Close popup window when user "saves options" + observeEvent(input$save_advanced_options_all, { + advanced_options_all$rm_multiallelic_SNP <- input$rm_multiallelic_SNP + advanced_options_all$multiallelic_SNP_dp_thr <- input$multiallelic_SNP_dp_thr + advanced_options_all$multiallelic_SNP_sample_thr <- input$multiallelic_SNP_sample_thr + advanced_options_all$alignment_score_thr <- input$alignment_score_thr + advanced_options_all$add_others <- input$add_others + advanced_options_all$others_max_snps <- input$others_max_snps + advanced_options_all$others_rm_with_indels <- as.logical(input$others_rm_with_indels) + # Save other inputs as needed + + removeModal() # Close the modal after saving + }) + + # UI popup window for input + observeEvent(input$advanced_options_all, { + showModal(modalDialog( + title = "Advanced Options", + + numericInput( + inputId = ns("alignment_score_thr"), + label = "Alignment Score Threshold", + value = advanced_options_all$alignment_score_thr, + min = 0 + ), hr(), + # rm_multiallelic_SNP + selectInput( + inputId = ns("rm_multiallelic_SNP"), + label = "Remove Multiallelic SNPs", + choices = c("TRUE" = TRUE, "FALSE" = FALSE), + selected = advanced_options_all$rm_multiallelic_SNP + ), + + # Conditionally show thresholds when rm_multiallelic_SNP is FALSE + conditionalPanel( + condition = paste0("input['", ns("rm_multiallelic_SNP"), "'] == 'FALSE'"), + numericInput( + inputId = ns("multiallelic_SNP_dp_thr"), + label = "Multiallelic SNP Depth Threshold", + value = advanced_options_all$multiallelic_SNP_dp_thr, + min = 0 + ), + numericInput( + inputId = ns("multiallelic_SNP_sample_thr"), + label = "Multiallelic SNP Sample Threshold", + value = advanced_options_all$multiallelic_SNP_sample_thr, + min = 0 + ) + ), hr(), + + # add_others + selectInput( + inputId = ns("add_others"), + label = "Add Others", + choices = c("TRUE" = TRUE, "FALSE" = FALSE), + selected = advanced_options_all$add_others + ), + + # Conditionally show others options when add_others is TRUE + conditionalPanel( + condition = paste0("input['", ns("add_others"), "'] == 'TRUE'"), + numericInput( + inputId = ns("others_max_snps"), + label = "Others Max SNPs", + value = advanced_options_all$others_max_snps, + min = 0 + ), + selectInput( + inputId = ns("others_rm_with_indels"), + label = "Remove Others with Indels", + choices = c("TRUE" = TRUE, "FALSE" = FALSE), + selected = advanced_options_all$others_rm_with_indels + ) + ), + + footer = tagList( + modalButton("Close"), + actionButton(ns("save_advanced_options_all"), "Save") + ) + )) + }) + vcf_out <- eventReactive(input$run_analysis,{ # Ensure the files are uploaded # Missing input with red border and alerts - if(input$file_type == "DArT Dosage Reports"){ + if(input$file_type == "DArT Dosage/SNP Report"){ if (is.null(input$report_file$datapath) | is.null(input$counts_file$datapath) | input$d2v_output_name == "" | input$dosage2vcf_ploidy == "") { shinyalert( title = "Missing input!", @@ -215,49 +343,84 @@ mod_dosage2vcf_server <- function(input, output, session, parent_session){ dosage_file <- input$report_file$datapath counts_file <- input$counts_file$datapath ploidy <- input$dosage2vcf_ploidy - + # Use a temporary file path without appending .vcf temp_base <- tempfile() - + #Status updateProgressBar(session = session, id = "dosage2vcf_pb", value = 10, title = "Converting DArT files to VCF") - + # Convert to VCF using the BIGr package cat("Running BIGr::dosage2vcf...\n") - + updateProgressBar(session = session, id = "dosage2vcf_pb", value = 50, title = "Writing VCF") - + dosage2vcf( dart.report = dosage_file, dart.counts = counts_file, output.file = temp_base, ploidy = as.numeric(ploidy) ) - + # The output file should be temp_base.vcf output_name <- paste0(temp_base, ".vcf") - + updateProgressBar(session = session, id = "dosage2vcf_pb", value = 100, title = "Complete!") - + return(output_name) - + } else if(input$file_type == "DArT MADC file"){ req(input$madc_file) # First check if the MADC file is valid (a non-fixedAlleleID MADC file) - - #Read only the first column for the first seven rows - first_seven_rows <- read.csv(input$madc_file$datapath, header = FALSE, nrows = 7, colClasses = c(NA, "NULL")) - - #Check if all entries in the first column are either blank or "*" - check_entries <- all(first_seven_rows[, 1] %in% c("", "*")) - - #Check if the MADC file has the filler rows or is processed from updated fixed allele ID pipeline - if (check_entries) { - #Note: This assumes that the first 7 rows are placeholder info from DArT processing + + # Select species botloci + github_path <- "https://raw.githubusercontent.com/Breeding-Insight/BIGapp-PanelHub/refs/heads/long_seq/" + + botloci <- switch(input$species, + "alfalfa" = paste0(github_path, "alfalfa/20201030-BI-Alfalfa_SNPs_DArTag-probe-design_f180bp.botloci"), + "blueberry" = paste0(github_path, "blueberry/20200819-BI-Blueberry_10K_SNPs_forDArT_3K_ref_alt.botloci"), + "cranberry" = paste0(github_path, "cranberry/Cranberry_unique_alignment_126MAS_3K_54BB_rmDupTags_f180bp.botloci"), + "cucumber" = paste0(github_path, "cucumber/Cucumber_DArT3K_10192022_f180bp.botloci"), + "pecan" = paste0(github_path, "pecan/Pecan_unique_alignment_top48_MAS_14K_3K_f180bp.botloci"), + "potato" = paste0(github_path, "potato/potato_20K_SNPset_f180bp_forDArT_3K_f180bp.botloci"), + "strawberry" = paste0(github_path, "strawberry/strawberry_20K_SNPset_f180bp_forDArT_3K_f180bp.botloci"), + "sweetpotato" = paste0(github_path, "sweetpotato/sweetpotato_20K_SNPset_f180bp_forDArT_3K_f180bp.botloci"), + "other" = input$botloci_file$datapath) + + microhapDB <- switch(input$species, + "alfalfa" = paste0(github_path, "alfalfa/alfalfa_allele_db_v001.fa"), + "blueberry" = paste0(github_path, "blueberry/blueberry_allele_db_v001.fa"), + "cranberry" = paste0(github_path, "cranberry/cranberry_allele_db_v001.fa"), + "cucumber" = paste0(github_path, "cucumber/cucumber_allele_db_v001.fa"), + "pecan" = paste0(github_path, "pecan/pecan_allele_db_v001.fa"), + "potato" = paste0(github_path, "potato/potato_allele_db_v001.fa"), + "strawberry" = paste0(github_path, "strawberry/strawberry_allele_db_v001.fa"), + "sweetpotato" = paste0(github_path, "sweetpotato/sweetpotato_allele_db_v000_refAlt109bp.fa"), + "other" = input$hapDB_file$datapath) + + markers_info <- switch(input$species, + "alfalfa" = paste0(github_path, "alfalfa/20201030-BI-Alfalfa_SNPs_DArTag-probe-design_snpID_lut.csv"), + "blueberry" = paste0(github_path, "blueberry/20200819-BI-Blueberry_10K_SNPs_forDArT_3K_chrID_snpID_lut.csv"), + "cranberry" = paste0(github_path, "cranberry/Cranberry_unique_alignment_126MAS_3K_54BB_rmDupTags_lut.csv"), + "cucumber" = paste0(github_path, "cucumber/20221019-BI-Cucumber_DArTag-probe-desgin_snpID_lut.csv"), + "pecan" = paste0(github_path, "pecan/Pecan_unique_alignment_top48_MAS_14K_3K_snpID_lut.csv"), + "potato" = paste0(github_path, "potato/potato_dartag_v2_3915markers_rm7dupTags_6traitMarkers_rm1dup_snpID_lut.csv"), + "strawberry" = paste0(github_path, "strawberry/strawberry_9k_probe_3K_snpID_lut.csv"), + "sweetpotato" = paste0(github_path, "sweetpotato/sweetpotato_20K_SNPset_f180bp_forDArT_3K_snpID_lut.csv"), + "other" = if (!is.null(input$markers_info_file)) input$markers_info_file$datapath else NULL) + + # Guard optional inputs (may be NULL if not uploaded) + microhapDB <- if (length(microhapDB) == 0) NULL else microhapDB + markers_info <- if (length(markers_info) == 0) NULL else markers_info + + #Now perform conversion depending on user options + + # Shared setup for all MADC snp_type branches + if (is.null(input$madc_file$datapath) | input$d2v_output_name == "") { shinyalert( - title = "Raw MADC!", - text = "This MADC file has not been processed by the updated Breeding Insight fixed allele ID pipeline.", - size = "m", + title = "Missing input!", + text = "Upload MADC and/or define a output file name", + size = "s", closeOnEsc = TRUE, closeOnClickOutside = FALSE, html = TRUE, @@ -268,106 +431,101 @@ mod_dosage2vcf_server <- function(input, output, session, parent_session){ showCancelButton = FALSE, animation = TRUE ) - - #Exit the analysis - return() } - - # Select species botloci - botloci <- switch(input$species, - "alfalfa" = "https://raw.githubusercontent.com/Breeding-Insight/BIGapp-PanelHub/refs/heads/main/alfalfa/20201030-BI-Alfalfa_SNPs_DArTag-probe-design_f180bp.botloci", - "blueberry" = "https://raw.githubusercontent.com/Breeding-Insight/BIGapp-PanelHub/refs/heads/main/blueberry/20200819-BI-Blueberry_10K_SNPs_forDArT_3K_ref_alt.botloci", - "cranberry" = "https://raw.githubusercontent.com/Breeding-Insight/BIGapp-PanelHub/refs/heads/main/cranberry/Cranberry_unique_alignment_126MAS_3K_54BB_rmDupTags_f180bp.botloci", - "cucumber" = "https://raw.githubusercontent.com/Breeding-Insight/BIGapp-PanelHub/refs/heads/main/cucumber/Cucumber_DArT3K_10192022_f180bp.botloci", - "lettuce" = "https://raw.githubusercontent.com/Breeding-Insight/BIGapp-PanelHub/refs/heads/main/lettuce/Lettuce_DArT3K_08172022_bait_f180bp.botloci", - "pecan" = "https://raw.githubusercontent.com/Breeding-Insight/BIGapp-PanelHub/refs/heads/main/pecan/Pecan_unique_alignment_top48_MAS_14K_3K_f180bp.botloci", - "sweetpotato" = "https://raw.githubusercontent.com/Breeding-Insight/BIGapp-PanelHub/refs/heads/main/sweetpotato/sweetpotato_20K_SNPset_f180bp_forDArT_3K_f180bp.botloci", - "other" = input$botloci_file$datapath) - - #Now perform conversion depending on user options - if(input$snp_type == "target_off"){ - if (is.null(input$madc_file$datapath) | input$d2v_output_name == "") { + req(input$madc_file) + + # Use a temporary file path without appending .vcf + temp_base <- tempfile() + + # Merge MADC if multiple + if(length(input$madc_file$datapath) > 1){ + updateProgressBar(session = session, id = "dosage2vcf_pb", value = 15, title = "Merging MADC files") + + merged_madc <- paste0(temp_base, ".csv") + + run_ids <- sapply(strsplit(input$madc_file$name, "_"), "[[", 1) + if(length(run_ids) == 0) run_ids <- NULL + + merge_MADCs(madc_list = as.list(input$madc_file$datapath), out_madc = merged_madc, run_ids = run_ids) + read_madc <- merged_madc + } else read_madc <- input$madc_file$datapath + + # The output file should be temp_base.vcf + output_name <- paste0(temp_base, ".vcf") + + updateProgressBar(session = session, id = "dosage2vcf_pb", value = 30, title = "Writing VCF") + + log_lines <- character(0) + tryCatch( + withCallingHandlers( + { + if(input$snp_type == "target_off"){ + madc2vcf_all(madc = read_madc, + botloci_file = botloci, + hap_seq_file = microhapDB, + markers_info = markers_info, + n.cores= input$cores, + rm_multiallelic_SNP = as.logical(advanced_options_all$rm_multiallelic_SNP), + multiallelic_SNP_dp_thr = advanced_options_all$multiallelic_SNP_dp_thr, + multiallelic_SNP_sample_thr = advanced_options_all$multiallelic_SNP_sample_thr, + alignment_score_thr = advanced_options_all$alignment_score_thr, + add_others = as.logical(advanced_options_all$add_others), + others_max_snps = advanced_options_all$others_max_snps, + others_rm_with_indels = as.logical(advanced_options_all$others_rm_with_indels), + out_vcf = output_name, + verbose = TRUE) + } else if(input$snp_type == "target"){ + madc2vcf_targets(read_madc, output_name, get_REF_ALT = as.logical(input$ref_alt), botloci_file = botloci, + markers_info = markers_info, collapse_matches_counts = input$collapse_matches_counts) + } else if(input$snp_type == "multiallelic"){ + madc2vcf_multi( + madc_file = read_madc, + botloci_file = botloci, + outfile = output_name, + markers_info = markers_info, + ploidy = 4L, + verbose = TRUE + ) + } + }, + message = function(m) { + log_lines <<- c(log_lines, conditionMessage(m)) + invokeRestart("muffleMessage") + }, + warning = function(w) { + log_lines <<- c(log_lines, paste0("Warning: ", conditionMessage(w), "\n")) + shinyalert( + title = "Warning", + text = conditionMessage(w), + size = "s", + type = "warning", + showConfirmButton = TRUE, + confirmButtonText = "OK", + confirmButtonCol = "#004192" + ) + invokeRestart("muffleWarning") + } + ), + error = function(e) { + log_lines <<- c(log_lines, paste0("Error: ", conditionMessage(e), "\n")) shinyalert( - title = "Missing input!", - text = "Upload MADC and/or define a output file name", + title = "Error", + text = conditionMessage(e), size = "s", - closeOnEsc = TRUE, - closeOnClickOutside = FALSE, - html = TRUE, type = "error", showConfirmButton = TRUE, confirmButtonText = "OK", - confirmButtonCol = "#004192", - showCancelButton = FALSE, - animation = TRUE + confirmButtonCol = "#004192" ) } - req(input$madc_file) - - # Use a temporary file path without appending .vcf - temp_base <- tempfile() - - # merge MADC if multiple - if(length(input$madc_file$datapath) >1){ - updateProgressBar(session = session, id = "dosage2vcf_pb", value = 15, title = "Merging MADC files") - - merged_madc <- paste0(temp_base, ".csv") - - run_ids <- sapply(strsplit(input$madc_file$name, "_"), "[[",1) - if(length(run_ids) == 0) run_ids <- NULL - - merge_MADCs(madc_list = as.list(input$madc_file$datapath), out_madc = merged_madc,run_ids = run_ids) - read_madc <- merged_madc - } else read_madc <- input$madc_file$datapath - - # The output file should be temp_base.vcf - output_name <- paste0(temp_base, ".vcf") - - updateProgressBar(session = session, id = "dosage2vcf_pb", value = 30, title = "Writing VCF") - - madc2vcf_all(madc = read_madc, - botloci_file = botloci, - hap_seq_file = input$hapDB_file$datapath, - n.cores= input$cores, - rm_multiallelic_SNP = TRUE, - out_vcf = output_name, - verbose = FALSE) - - updateProgressBar(session = session, id = "dosage2vcf_pb", value = 100, title = "Complete!") - - return(output_name) - - } else if(input$snp_type == "target"){ - - # Use a temporary file path without appending .vcf - temp_base <- tempfile() - - # merge MADC if multiple - if(length(input$madc_file$datapath) >1){ - updateProgressBar(session = session, id = "dosage2vcf_pb", value = 15, title = "Merging MADC files") - - merged_madc <- paste0(temp_base, ".csv") - - run_ids <- sapply(strsplit(input$madc_file$name, "_"), "[[",1) - if(length(run_ids) == 0) run_ids <- NULL - - merge_MADCs(madc_list = as.list(input$madc_file$datapath), out_madc = merged_madc,run_ids = run_ids) - read_madc <- merged_madc - } else read_madc <- input$madc_file$datapath - - - # The output file should be temp_base.vcf - output_name <- paste0(temp_base, ".vcf") - - updateProgressBar(session = session, id = "dosage2vcf_pb", value = 30, title = "Writing VCF") - madc2vcf_targets(read_madc, output_name, get_REF_ALT = input$ref_alt, botloci_file = botloci) - - updateProgressBar(session = session, id = "dosage2vcf_pb", value = 100, title = "Complete!") - return(output_name) - } - + ) + + d2vcf_log(paste(log_lines, collapse = "")) + updateProgressBar(session = session, id = "dosage2vcf_pb", value = 100, title = "Complete!") + return(output_name) + } else if (input$file_type == "Dosage Matrix"){ - + if (is.null(input$matrix_file$datapath) | input$d2v_output_name == "" | input$dosage2vcf_ploidy == "") { shinyalert( title = "Missing input!", @@ -387,42 +545,42 @@ mod_dosage2vcf_server <- function(input, output, session, parent_session){ req(input$matrix_file, input$d2v_output_name, input$dosage2vcf_ploidy) #Status updateProgressBar(session = session, id = "dosage2vcf_pb", value = 10, title = "Converting matrix to VCF") - + # Get the uploaded file paths matrix_file <- input$matrix_file$datapath ploidy <- input$dosage2vcf_ploidy - + # Use a temporary file path without appending .vcf temp_base <- tempfile() - + # The output file should be temp_base.vcf output_name <- paste0(temp_base, ".vcf") - + #Status updateProgressBar(session = session, id = "dosage2vcf_pb", value = 50, title = "Writing VCF") - + # Convert to VCF using the BIGr package gmatrix2vcf(Gmat.file = matrix_file, ploidy = ploidy, output.file = output_name, dosageCount = input$dosage_counts) - + #Status updateProgressBar(session = session, id = "dosage2vcf_pb", value = 100, title = "Complete!") - + return(output_name) - + } - + }) - + # Only make available the download button when analysis is finished output$mybutton <- renderUI({ if(isTruthy(vcf_out())) downloadButton(ns("download_d2vcf"), "Download VCF file", class = "butt") }) - - + + ##This is for the DArT files conversion to VCF output$download_d2vcf <- downloadHandler( filename = function() { @@ -433,14 +591,14 @@ mod_dosage2vcf_server <- function(input, output, session, parent_session){ bgzip_compress(vcf_out(), file) } ) - + ##Summary Info d2vcf_summary_info <- function() { #Handle possible NULL values for inputs report_file_name <- if (!is.null(input$report_file$name)) input$report_file$name else "No file selected" counts_file_name <- if (!is.null(input$counts_file$name)) input$counts_file$name else "No file selected" selected_ploidy <- if (!is.null(input$dosage2vcf_ploidy)) as.character(input$dosage2vcf_ploidy) else "Not selected" - + #Print the summary information cat( "BIGapp Dosage2VCF Summary\n", @@ -464,7 +622,7 @@ mod_dosage2vcf_server <- function(input, output, session, parent_session){ sep = "" ) } - + # Popup for analysis summary observeEvent(input$d2vcf_summary, { showModal(modalDialog( @@ -480,8 +638,8 @@ mod_dosage2vcf_server <- function(input, output, session, parent_session){ ) )) }) - - + + # Download Summary Info output$download_d2vcf_info <- downloadHandler( filename = function() { diff --git a/R/mod_gwas.R b/R/mod_gwas.R index d308a9c..b72b853 100644 --- a/R/mod_gwas.R +++ b/R/mod_gwas.R @@ -190,7 +190,7 @@ mod_gwas_server <- function(input, output, session, parent_session){ # expand specific box updateBox(id = "GWAS_box", action = "toggle", session = parent_session) }) - + #Default choices trait_options <- reactiveValues( missing_data = "NA", @@ -198,25 +198,25 @@ mod_gwas_server <- function(input, output, session, parent_session){ sample_column = NULL, file_type = NULL ) - + #UI popup window for input observeEvent(input$phenotype_file, { req(input$phenotype_file) #Get the column names of the csv file info_df <- read.csv(input$phenotype_file$datapath, header = TRUE, check.names = FALSE, nrows=2) info_df[,1] <- as.character(info_df[,1]) #Makes sure that the sample names are characters instead of numeric - + # Read first 5 rows for preview preview_data <- tryCatch({ head(read.csv(input$phenotype_file$datapath, nrows = 5, na.strings=trait_options$missing_data),5) }, error = function(e) { NULL }) - + showModal(modalDialog( title = "Trait File Options", size= "l", - + selectInput( inputId = ns('missing_data'), label = 'Missing Data Value', @@ -243,7 +243,7 @@ mod_gwas_server <- function(input, output, session, parent_session){ label = 'Sample ID Column', choices = colnames(info_df) ), - + if (!is.null(preview_data)) { div( h4( @@ -263,20 +263,20 @@ mod_gwas_server <- function(input, output, session, parent_session){ p("Could not load file preview.") ) }, - + footer = tagList( actionButton(ns("save_trait_options"), "Save") ) )) - + # Render the preview table output$file_preview <- renderTable({ req(preview_data) preview_data }) - + }) - + output$custom_missing_msg <- renderText({ if (input$missing_data == "Custom" && nchar(input$custom_missing) == 0) { "Please enter a custom missing value." @@ -285,7 +285,7 @@ mod_gwas_server <- function(input, output, session, parent_session){ } }) - + #Close popup window when user "saves options" observeEvent(input$save_trait_options, { trait_options$missing_data <- input$missing_data @@ -293,7 +293,7 @@ mod_gwas_server <- function(input, output, session, parent_session){ trait_options$sample_column <- input$sample_column #trait_options$file_type # Save other inputs as needed - + if (input$missing_data == "Custom" && nchar(input$custom_missing) == 0) { # Validation failed: display warning and prevent modal closure showNotification( @@ -303,10 +303,10 @@ mod_gwas_server <- function(input, output, session, parent_session){ ) return() # Stop further execution and keep the modal open } - + removeModal() # Close the modal after saving }) - + #Call some plots to NULL so that the spinners do not show before analysis output$bic_plot <- renderDT(NULL) @@ -387,7 +387,7 @@ mod_gwas_server <- function(input, output, session, parent_session){ } else { phenotype_file <- read.csv(input$phenotype_file$datapath, header = TRUE, check.names = FALSE, na.strings = trait_options$missing_data) } - + # Make the sample ID column the first column in the dataframe sample_col_name <- input$sample_column phenotype_file <- phenotype_file[, c(sample_col_name, setdiff(names(phenotype_file), sample_col_name))] @@ -458,29 +458,29 @@ mod_gwas_server <- function(input, output, session, parent_session){ #Convert VCF file if submitted #### VCF sanity check checks <- vcf_sanity_check(input$gwas_file$datapath, max_markers = 10000) - + error_if_false <- c( "VCF_header", "VCF_columns", "unique_FORMAT", "GT", "samples", "chrom_info", "pos_info", "VCF_compressed" ) - + error_if_true <- c( - "multiallelics", "phased_GT", "mixed_ploidies", + "multiallelics", "mixed_ploidies", "duplicated_samples", "duplicated_markers" ) - + warning_if_false <- c("ref_alt","max_markers") - - checks_result <- vcf_sanity_messages(checks, - error_if_false, - error_if_true, - warning_if_false = warning_if_false, + + checks_result <- vcf_sanity_messages(checks, + error_if_false, + error_if_true, + warning_if_false = warning_if_false, warning_if_true = NULL, input_ploidy = ploidy) - + if(checks_result) return() # Stop the analysis if checks fail ######### - + vcf <- read.vcfR(input$gwas_file$datapath, verbose = FALSE) #Extract GT @@ -898,8 +898,8 @@ mod_gwas_server <- function(input, output, session, parent_session){ ) }) - - + + output$download_viewpoly <- downloadHandler( filename = function() { paste0("BIGapp_Viewpoly_GWASpoly.RData") @@ -909,7 +909,7 @@ mod_gwas_server <- function(input, output, session, parent_session){ save(temp, file = file) } ) - + #Download files for GWAS output$download_gwas_file <- downloadHandler( filename = function() { diff --git a/README.md b/README.md index 90199ef..76a3e6f 100644 --- a/README.md +++ b/README.md @@ -139,4 +139,4 @@ BIGapp development is supported by [Breeding Insight](https://www.breedinginsigh If you use BIGapp in your research, please cite: -Sandercock A.M., Peel M.D., Tanigut C.H., Chinchilla-Vargas J., Chen S., Sapkota M., Lin M., Zhao D., Ackerman A.J., Basnet B.R., Beil C.T., Sheehan M.J. (2025). BIGapp: A User-Friendly Genomic Tool Kit Identified Quantitative Trait Loci for Creeping Rootedness in Alfalfa (_Medicago sativa_ L.)., The Plant Genome. doi:https://doi.org/10.1002/tpg2.70067 +Sandercock A.M., Peel M.D., Taniguti C.H., Chinchilla-Vargas J., Chen S., Sapkota M., Lin M., Zhao D., Ackerman A.J., Basnet B.R., Beil C.T., Sheehan M.J. (2025). BIGapp: A User-Friendly Genomic Tool Kit Identified Quantitative Trait Loci for Creeping Rootedness in Alfalfa (_Medicago sativa_ L.)., The Plant Genome. doi:https://doi.org/10.1002/tpg2.70067 diff --git a/inst/app/www/IFAS.jpg b/inst/app/www/IFAS.jpg new file mode 100644 index 0000000..d108a43 Binary files /dev/null and b/inst/app/www/IFAS.jpg differ diff --git a/inst/help_files/DArT_Report2VCF_par.Rmd b/inst/help_files/DArT_Report2VCF_par.Rmd index 6968708..5d57eb1 100644 --- a/inst/help_files/DArT_Report2VCF_par.Rmd +++ b/inst/help_files/DArT_Report2VCF_par.Rmd @@ -4,38 +4,183 @@ output: html_document date: "2024-08-29" --- -* **DArT MADC File** +## **DArT MADC File** The DArT MADC (Missing Allele Discovery Counts) file is a comma-separated file provided by DArT from a sequencing project. It includes the following key columns: -* AlleleID: Typically contains the chromosome, position, reference or alternative allele designation, and the allele ID according to the haplotype database (e.g., `Chr01_000084128|Ref_0001`). -* CloneID: Represents the chromosome and position (e.g., `Chr01_000084128`). -* AlleleSequence: The amplicon sequence (e.g., `GAGTGTGAAGATTTGGACAAAAGAGGTTGGTTTTTACTGTTATGGCATTTATCTCCTTATAAAATTTTGTATTTTTTTTGT`). -* Sample Columns: Named with sample IDs and containing counts of each allele per sample. +- AlleleID: Typically contains the chromosome, position, reference or alternative allele designation, and the allele ID according to the haplotype database (e.g., `Chr01_000084128|Ref_0001`). +- CloneID: Represents the chromosome and position (e.g., `Chr01_000084128`). +- AlleleSequence: The amplicon sequence (e.g., `GAGTGTGAAGATTTGGACAAAAGAGGTTGGTTTTTACTGTTATGGCATTTATCTCCTTATAAAATTTTGTATTTTTTTTGT`). +- Sample Columns: Named with sample IDs and containing counts of each allele per sample. -**Important**: BIGapp requires fixed AlleleIDs tagged with a respective number (e.g., suffix: `_0001`). Fixed AlleleIDs are generated by matching the MADC contents to the haplotype database. If your MADC file does not contain fixed AlleleIDs, please contact Breeding Insight. +We refer here to two different versions of this format, one named **raw MADC** and the other named **MADC with fixed AlleleIDs**. +The first one contains the original AlleleIDs as provided by DArT, which may not be fixed (i.e., they may not have a suffix with a number). +The second version contains fixed AlleleIDs, which can be generated by HapApp that matches the MADC contents to the haplotype database. +HapApp not only fixes the AlleleIDs but also makes corrections such as removing adaptor sequences, adding missing REF and ALT sequences, replacing IUPAC codes, +generating the markers information file (`_lut.csv`) with chromosome, positions, REF and ALT bases, whether the target is an indel, indel length and indel type (insertion or deletion). +This information is critical for the conversion to VCF, especially if you want to extract target SNPs with their REF and ALT bases. -* **Target vs. Off-Target SNPs** + +### **Target vs. Off-Target SNPs** vs. **Microhaplotypes** If you provide a MADC file, you can choose between extracting: * Only **target SNPs** – These are the SNPs for which probes were specifically designed. If your panel includes 3,000 SNPs, your output VCF file should contain exactly 3,000 target SNPs. * Both **target** and **off-target** SNPs – In addition to target SNPs, BIGapp will identify other SNPs within the amplicon region. +* **Microhaplotypes** – These are combinations of closely linked SNPs within the same amplicon that can be treated as a single multi-allelic marker. + +### **Requirements** + +BIGapp will run a series of checks to ensure that the necessary information is available for the type of SNPs you want to extract. +It will inform you if the operation requested is possible or not for your dataset and files provided. + +Here are some of the checks that BIGapp will perform: + +- If MADC has the expected columns +- Presence of columns and rows with all NA +- Presence of IUPAC codes on AlleleSequence +- Presence of lower case bases on AlleleSequence +- Presence of Indels +- If CloneID follows the format Chr_Pos +- If all Ref Alleles have a corresponding Alt and vice-versa +- If "Other" exists in the MADC AlleleID (e.g. `Chr01_000084128|Other_0003`), which indicates the presence of additional alleles +beyond the target SNPs. If "Other" alleles are present, BIGapp will inform the user and direct them to the `madc2vcf_all` +function if they want to extract these additional SNPs as well + +#### Targets + +If you decide to extract only the **target SNPs**, BIGapp function `madc2vcf_targets` will be used. -* **Target SNPs** +You can choose: -If you decide to extract only the **target SNPs**, you can choose if reference (REF) and alternative (ALT) bases should also be extracted. For extracting this information from the MADC file, BIGapp compares the reference sequences with the alternative, finds the polymorphic site and notate the changing base. This requires: +- `Collapse Matches Counts`: the MADC contains the target tags named as `Chr_Pos|Ref_0001` and `Chr_Pos|Alt_0002`, +(or `Chr_Pos|Ref` and `Chr_Pos|Alt` if raw MADC) and also contains the `Chr_Pos|RefMatch_0003` and `Chr_Pos|AltMatch_0004` tags, +which represent off-target SNPs and bases at the target position matching the Ref or the Alt. If you choose to collapse the counts, +the counts of the RefMatch and AltMatch tags will be added to the counts of the Ref and Alt tags, respectively. If not, +the RefMatch and AltMatch tags will be discarded and only the Ref and Alt tags will be kept. + +- `Select Marker Type`: if reference (REF) and alternative (ALT) bases should also be extracted. +BIGapp will extract this information from the `_lut.csv` file if provided; otherwise, it will extract it from the MADC file by comparing + the reference sequences with the alternative, finding the polymorphic site and notating the changing base. + +This requires: - Reference (`Ref_0001`) and alternative (`Alt_0002`) sequences for each tag. If one or both is missing, the tag will be discarded. - A single polymorphism between them. If more than one is found, the tag is discarded - - A `.botloci` file to inform which tag sequences should be converted to its reverse complement. For BI-supported marker panel **species** this file is already embedded within the app, no upload is required. + - A `.botloci` file to inform which tag sequences should be converted to its reverse complement. This file is generated by HapApp when you upload a MADC file with unfixed AlleleIDs. For BI-supported marker panel **species** this file is already embedded within the app, no upload is required. + - or + - A markers information file (`_lut.csv`) with chromosome, position, REF and ALT bases, indel length and indel type (insertion or deletion) for each target SNP. This file is generated by HapApp when you upload a MADC file with unfixed AlleleIDs. For BI-supported marker panel **species** this file is already embedded within the app, no upload is required. + +If you choose to not extract REF and ALT information, all tags will be kept, but your VCF will have missing data (`.`) in the REF and ALT fields. + +`madc2vcf_targets` doesn't run if: + + - MADC Column names are not correct + - Ignores Other alleles, but informs the user if they exist and directs them to `madc2vcf_all` in case they want to extract them as well + +See the table for a summary of `madc2vcf_targets` requirements according to MADC content: -If you choose to not extract REF and ALT information, all tags will be kept, but your VCF will have missing data (`.`) in the REF and ALT fields. The only process in BIGapp that will require this information is the Dosage Calling with PolyRAD. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
check statusget_REF_ALTRequires
IUPACdetectedTRUEmarkers_info REF/ALT
detectedFALSE-
not detectedTRUEbotloci or markers_info REF/ALT
not detectedFALSE-
IndelsdetectedTRUEmarkers_info REF/ALT
detectedFALSE-
not detectedTRUEbotloci or markers_info REF/ALT
not detectedFALSE-
CloneID has Chr_Pos formatdetectedTRUEbotloci or markers_info REF/ALT
detectedFALSE-
not detectedTRUEmarkers_info CHR/POS/REF/ALT or markers_info CHR/POS/ + botloci
not detectedFALSEmarkers_info CHR/POS
FixAlleleIDsdetectedTRUEbotloci or markers_info REF/ALT
detectedFALSE-
not detectedTRUEmarkers_info REF/ALT
not detectedFALSE-
-* **Target and off-target (all) SNPs** +
-To identify **off-target** SNPs, BIGapp aligns each amplicon against its reference (via Bioconductor’s `Biostrings` + `pwalign`) to uncover additional polymorphisms. This procedure requires: +#### Off-target SNPs + +To identify **off-target** SNPs, BIGr function `madc2vcf_all` will be used. +`madc2vcf_all` aligns each amplicon against its reference (via Bioconductor’s `Biostrings` + `pwalign`) to uncover additional +polymorphisms. This procedure requires: - Reference & alternative sequences (e.g. `Ref_0001`, `Alt_0002`) for every tag - If a FASTA haplotype database is supplied, any missing sequences in the MADC file will be retrieved automatically. @@ -44,42 +189,127 @@ To identify **off-target** SNPs, BIGapp aligns each amplicon against its referen - Tags with zero or multiple variants are excluded. - `.botloci` file - Specifies which tags must be reverse-complemented. For BI-supported species, this file is bundled within the app—no upload needed. + - If your MADC contains indels as target, the markers information file (`_lut.csv`) with chromosome, position, REF and ALT bases, indel length and indel type (insertion or deletion) for each target SNP. This file is generated by HapApp when you upload a MADC file with unfixed AlleleIDs. For BI-supported marker panel **species** this file is already embedded within the app, no upload is required. + +The function has some parameters that allow you to customize resource usage, the stringency of the alignment, and tag filtering: + +- `Number of CPU Cores` to specify the number of cores to use for the alignment. By default, it uses a single core. + +Click on `Advanced Options` to see more parameters: -* **Specie** +- `Alignment Score Threshold` to specify the minimum alignment score for a SNP to be called. By default, it is set to 40, which means that only tags (sequences) with an alignment score of 40 or higher will be included in the output VCF file. +- `Remove Multiallelic SNPs` to specify if you want to remove SNPs with more than two variations (e.g. Ref A and Alts C and T). By default, it is set to TRUE, which means that multiallelic SNPs will be removed. +If set to FALSE, multiallelic SNPs will be kept in the output VCF file but you can specify filter thresholds: + - `Multiallelic SNP Depth Threshold` specify the minimum depth by tag threshold for filtering low-frequency SNP alleles. Default is 0 + - `Multiallelic SNP Sample Threshold` specify the minimum number of samples threshold for filtering low-frequency SNP alleles. Default is 0. +- `Add Others` to specify if you want to consider SNPs in the "Other" alleles (e.g. `Chr01_000084128|Other_0003`) for the output VCF file. By default, it is set to FALSE, which means that "Other" alleles will be discarded. +If set to TRUE, "Other" alleles will be included and you can define some filters: + - `Others Max SNPs` to specify the maximum number of SNPs allowed in the "Other" tags alleles. Default is 5. If an "Other" allele has more than this number of SNPs, it will be discarded. + - `Remove Others with Indels` to specify if you want to remove "Other" alleles with indels. By default, it is set to TRUE, which means that "Other" alleles with indels will be removed. -Specify the species so BIGapp can pick the correct `.botloci` file. BI-supported species files live in the [BIGapp-PanelHub](https://github.com/Breeding-Insight/BIGapp-PanelHub) repo and are auto-loaded by the app. +`madc2vcf_all` doesn't run if: -You can also input **multiple MADC files**. BIGapp will merge the files before the conversion. If you have repeated samples IDs, a suffix with the run ID (beginning of the file name) will be added to the repeated samples. + - MADC Column names are not correct + - If it is raw MADC + - If it has IUPAC codes -Conversion is powered by the `madc2vcf_targets` and `madc2vcf_all` functions in the [BIGr](https://github.com/Breeding-Insight/BIGr) R package. You can also run them on the command line to access advanced options (e.g., multiallelic SNP support or custom alignment thresholds). +See the table for `madc2vcf_all` requirements according to MADC content: -* **DArTag Dosage Report** + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Check statusRequires
Indelsdetectedmarkers_info REF/ALT/IndelPos/IndelLength + botloci
not detectedbotloci
CloneID has Chr_Pos formatdetectedbotloci
not detectedmarkers_info CHR/POS + botloci
All Ref tags have corresponding Altdetectedbotloci
not detectedbotloci + microhapdb
-The DArT Dosage Report is a tab-separated file provided by DArT from a sequencing project. It contains genotypic information for each target marker across all samples in the sequencing project. +
-File Structure: +#### Microhaplotypes -* Markers are in rows, and samples are in columns. -* The file includes several summary metric columns before the sample dosage columns. -* Dosage values represent the number of alternative alleles present in each sample, ranging from 0 to the species’ ploidy number. +To identify **microhaplotypes**, BIGapp looks for multiple polymorphisms within the same amplicon. This process requires: -Dosage Interpretation (Example: Tetraploid Species - 4x) + - Reference & alternative sequences (e.g. `Ref_0001`, `Alt_0002`) for every tag + - At least two polymorphic sites within the same amplicon + - Tags with fewer than two variants are excluded. + - `.botloci` file + - Specifies which tags must be reverse-complemented. For BI-supported species, this file is bundled within the app—no upload needed. + - If your MADC contains indels as target, the markers information file (`_lut.csv`) with chromosome, position, REF and ALT bases, indel length and indel type (insertion or deletion) for each target SNP. This file is generated by HapApp when you upload a MADC file with unfixed AlleleIDs. For BI-supported marker panel **species** this file is already embedded within the app, no upload is required. -* 4 → Homozygous for the reference allele (AAAA) -* 3 → Heterozygous with three reference alleles and one alternative (AAAB) -* 2 → Heterozygous with two reference and two alternative alleles (AABB) -* 1 → Heterozygous with one reference allele and three alternative (ABBB) -* 0 → Homozygous for the alternative allele (BBBB) +`madc2vcf_multi` doesn’t run if: -This codification is commonly used as input for various software, including AGHmatrix, rrBLUP, and mappoly. However, some programs require not only dosage information but also read counts (see description below). + - MADC Column names are not correct + - If it is raw MADC + - If it has IUPAC codes + - if Ref or Alt tags are absent -To accommodate this, BIGapp integrates both dosage and read count data, generating a VCF file—the most widely accepted and standardized format for genotypic information, ensuring compatibility with a broad range of software. +See the table for `madc2vcf_multi` requirements according to MADC content: - -* **DArTag Counts File** + + + + + + + + + + + + + + + + + + + + + + + + +
Check statusRequires
Indelsdetectedbotloci
not detectedbotloci
CloneID has Chr_Pos formatdetectedbotloci
not detectedmarkers_info CHR/POS + botloci
-The DArT counts file is a tab-separated file provided by DArT from a sequencing project. It contains the read count information for the reference and alternate allele at each target marker. The marker information is in the rows and the samples are in the columns. There are several informational columns that precede the sample columns. There are two versions of this file. The “collapsed counts” version contains the target markers that includes their multiallic read counts in their total counts. The “Counts” file contains the read counts for the target markers only (excluding the multiallelic read count information). BIGapp will merge the counts and the dosage information to generate a VCF file format —the most widely accepted and standardized format for genotypic information, ensuring compatibility with a broad range of software. - -* **Species Ploidy** +
+ +### **Species** + +Specify the species so BIGapp can pick the correct `.botloci`, `_lut.csv`, and microhaplotype database file. BI-supported species files live in the [BIGapp-PanelHub](https://github.com/Breeding-Insight/BIGapp-PanelHub) repo and are auto-loaded by the app. + +### **Species Ploidy** Specifies the ploidy level of the species. The current analysis supports both diploids and autopolyploids. + +### **Multiple MADC files** + +You can also input **multiple MADC files**. BIGapp will merge the files before the conversion. If you have repeated sample IDs, a suffix with the run ID (beginning of the file name) will be added to the repeated samples. + +Conversion is powered by the `madc2vcf_targets`, `madc2vcf_all`, and `madc2vcf_multi` functions in the [BIGr](https://github.com/Breeding-Insight/BIGr) R package. diff --git a/man/polyRAD2vcf.Rd b/man/polyRAD2vcf.Rd index 2bc0ec6..2efd427 100644 --- a/man/polyRAD2vcf.Rd +++ b/man/polyRAD2vcf.Rd @@ -2,25 +2,78 @@ % Please edit documentation in R/DosageCall_functions.R \name{polyRAD2vcf} \alias{polyRAD2vcf} -\title{Function to convert polyRAD output to VCF} +\title{Convert polyRAD Genotypes to a VCF File} \usage{ polyRAD2vcf(geno, model, vcf_path, hindhe.obj, ploidy, output.file, session) } \arguments{ -\item{geno}{ToDo} +\item{geno}{A marker-by-sample dosage matrix (or object coercible to a +matrix) containing polyRAD genotype estimates. Row names must match the +variant IDs in \code{vcf_path} (before allele suffixes), and columns +correspond to samples.} -\item{model}{ToDo} +\item{model}{Character string indicating the polyRAD model used to fit the +genotypes (e.g. \code{"Kriging"}, \code{"SampleDepth"}, etc.). This value +is recorded in the VCF header for provenance.} -\item{vcf_path}{ToDo} +\item{vcf_path}{Path to the template VCF file used to provide variant +metadata (CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, and original +FORMAT fields).} -\item{hindhe.obj}{ToDo} +\item{hindhe.obj}{Object containing Hind/He values per locus (typically a +matrix or data frame from polyRAD). Columns must correspond to alleles +for each marker; the function computes the mean Hind/He per marker and +adds it as an \code{INFO} field (\code{HH}) in the output VCF.} -\item{ploidy}{ToDo} +\item{ploidy}{Integer giving the ploidy level used for the polyRAD analysis. +This is used to convert dosage values into genotype strings (e.g. +\code{"0/1"}, \code{"1/1"}, \code{"0/0/1"}, etc.).} -\item{output.file}{ToDo} +\item{output.file}{Base name for the output VCF file (without extension). +The function will append \code{".vcf"} to this name and write the +resulting VCF to disk in the current working directory (unless a path +is included).} -\item{session}{ToDo} +\item{session}{Optional Shiny \code{session} object. Currently included for +compatibility with Shiny modules; not used internally in this function.} +} +\value{ +No value is returned. The function is called for its side effect of writing +a VCF file to \code{paste0(output.file, ".vcf")}. } \description{ -Function to convert polyRAD output to VCF +Takes genotype dosage estimates from polyRAD and appends them to an existing +VCF, adding Hind/He summary statistics and custom FORMAT/INFO fields. +The original variant metadata (CHROM, POS, REF, ALT, etc.) are preserved +from a template VCF, and new polyRAD-derived fields are written for each +sample and locus. +} +\details{ +The function: +\itemize{ +\item Flips and formats polyRAD dosages using \code{\link[BIGr]{flip_dosage}}. +\item Harmonizes marker IDs between \code{geno}, \code{hindhe.obj}, and +the template VCF by stripping allele suffixes. +\item Reads the template VCF with \code{\link[vcfR]{read.vcfR}} and +updates the header to include: +\itemize{ +\item \code{##INFO=} for Hind/He per locus. +\item \code{##FORMAT=} for polyRAD-estimated genotypes +(in allele-count space). +\item \code{##FORMAT=} for the dosage count of reference +alleles. +\item Version tags for \code{polyRAD} and \code{BIGapp}. +} +\item Converts dosage values to genotype strings (\code{GT}-like, +\code{"0/0"}, \code{"0/1"}, \code{"1/1"}, etc.) given the specified +\code{ploidy}. +\item Builds a new FORMAT field for each variant combining: +\code{GTP}, \code{UD}, and the original FORMAT fields from the template. +\item Writes the updated header and variant table to a VCF file. +} + +Missing dosages are converted to a missing genotype string (e.g. +\code{"./."} for diploids) and represented as \code{"."} in the dosage +field. Variants are sorted by chromosome and numeric position before +writing. }