diff --git a/CHANGELOG.md b/CHANGELOG.md index f64400e..68bd750 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v0.7.0dev - [date] +## v0.1.0dev Initial release of nf-core/scflow, created with the [nf-core](https://nf-co.re/) template. diff --git a/assets/NO_FILE.tsv b/assets/NO_FILE.tsv new file mode 100644 index 0000000..e69de29 diff --git a/bin/check_inputs.r b/bin/check_inputs.r index 3196fa4..8115c6e 100755 --- a/bin/check_inputs.r +++ b/bin/check_inputs.r @@ -48,7 +48,9 @@ manifest <- read.delim(args$manifest) # check manifest paths exist check_exists <- function(filepath) { - RCurl::url.exists(filepath) | dir.exists(filepath) + RCurl::url.exists(filepath) | + dir.exists(filepath) | + any(startsWith(filepath, c("gs://", "s3://"))) } dir_exists <- purrr::pmap_lgl(manifest, ~ check_exists(as.character(..2))) diff --git a/bin/scflow_dge.r b/bin/scflow_dge.r index 1893c7a..d419394 100755 --- a/bin/scflow_dge.r +++ b/bin/scflow_dge.r @@ -146,6 +146,14 @@ required$add_argument( help = "p-value cutoff for DE [default %(default)s]" ) +required$add_argument( + "--n_label", + type = "integer", + default = 5, + metavar = "number", + help = "Number of genes to be highlighted on volcano plot" +) + required$add_argument( "--ensembl_mappings", help = "path to ensembl mappings file", @@ -179,7 +187,9 @@ args$pseudobulk <- as.logical(args$pseudobulk) args$force_run <- as.logical(args$force_run) if (tolower(args$random_effects_var) == "null") args$random_effects_var <- NULL -args$max_cores <- if (toupper(args$max_cores) == "NULL") NULL else { +args$max_cores <- if (toupper(args$max_cores) == "NULL") { + NULL +} else { as.numeric(as.character(args$max_cores)) } @@ -202,6 +212,7 @@ cli::cli_alert(sprintf( n_cores )) + library(scFlow) # ____________________________________________________________________________ @@ -221,8 +232,10 @@ if (args$pseudobulk) { sce_subset <- pseudobulk_sce( sce_subset, keep_vars = c( - args$dependent_var, args$confounding_vars, args$random_effects_var - ), + args$dependent_var, + args$confounding_vars, + args$random_effects_var + ), assay_name = "counts", celltype_var = args$celltype_var, sample_var = args$sample_var @@ -249,26 +262,49 @@ de_results <- perform_de( species = getOption("scflow_species") ) -file_name <- paste0(args$celltype, "_", - args$de_method, pb_str, "_") +file_name <- paste0( + args$celltype, "_", + args$de_method, pb_str, "_" +) for (result in names(de_results)) { if (dim(de_results[[result]])[[1]] > 0) { write.table(de_results[[result]], - file = file.path(getwd(), - paste0(file_name, result, "_DE.tsv")), - quote = FALSE, sep = "\t", col.names = TRUE, row.names = FALSE) + file = file.path( + getwd(), + paste0(file_name, result, "_DE.tsv") + ), + quote = FALSE, sep = "\t", col.names = TRUE, row.names = FALSE + ) + report_de(de_results[[result]], - report_folder_path = file.path(getwd()), - report_file = paste0(file_name, result, "_scflow_de_report")) + fc_threshold = args$fc_threshold, + pval_cutoff = args$pval_cutoff, + n_label = args$n_label, + report_folder_path = file.path(getwd()), + report_file = paste0(file_name, result, "_scflow_de_report") + ) + print("report generated") - png(file.path(getwd(), - paste0(file_name, result, "_volcano_plot.png")), - width = 247, height = 170, units = "mm", res = 600) - print(attr(de_results[[result]], "plot")) - dev.off() + p <- scFlow::volcano_plot( + dt = de_results[[result]], + fc_threshold = args$fc_threshold, + pval_cutoff = args$pval_cutoff, + n_label = args$n_label + ) + + ggplot2::ggsave( + filename = file.path( + getwd(), + paste0(file_name, result, "_volcano_plot.png") + ), + plot = p, + width = 7, height = 5, units = "in", dpi = 600 + ) + + print("Volcano plot generated") } else { print(sprintf("No DE genes found for %s", result)) - } + } } diff --git a/bin/scflow_finalize_sce.r b/bin/scflow_finalize_sce.r index 58813af..2b4e473 100755 --- a/bin/scflow_finalize_sce.r +++ b/bin/scflow_finalize_sce.r @@ -5,12 +5,11 @@ # ____________________________________________________________________________ # Initialization #### +options(mc.cores = future::availableCores()) + ## ............................................................................ ## Load packages #### library(argparse) -library(scFlow) -library(magrittr) -library(SingleCellExperiment) ## ............................................................................ ## Parse command-line arguments #### @@ -106,6 +105,13 @@ required$add_argument( metavar = "N" ) +required$add_argument( + "--max_cores", + default = NULL, + help = "override for lower cpu core usage", + metavar = "N", + required = TRUE +) ### . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .. ### Pre-process args #### @@ -117,6 +123,33 @@ args$metric_vars <- strsplit(args$metric_vars, ",")[[1]] options("scflow_reddimplot_pointsize" = args$reddimplot_pointsize) options("scflow_reddimplot_alpha" = args$reddimplot_alpha) +args$max_cores <- if (toupper(args$max_cores) == "NULL") { + NULL +} else { + as.numeric(as.character(args$max_cores)) +} + +# ____________________________________________________________________________ +# Delay Package Loading for Optional Max Cores Override + +n_cores <- future::availableCores(methods = "mc.cores") + +if (is.null(args$max_cores)) { + options(mc.cores = n_cores) +} else { + options(mc.cores = min(args$max_cores, n_cores)) +} + +cli::cli_alert(sprintf( + "Using %s cores on system with %s available cores.", + getOption("mc.cores"), + n_cores +)) + +library(scFlow) +library(magrittr) +library(SingleCellExperiment) + ## ............................................................................ ## Start #### @@ -163,26 +196,28 @@ colnames(celltypes) <- c("celltype", "n_cells") write.table( data.frame(celltypes), file = "celltypes.tsv", - row.names = FALSE, col.names = TRUE, quote = FALSE, sep = "\t") + row.names = FALSE, col.names = TRUE, quote = FALSE, sep = "\t" +) ### Save Marker Gene Plots folder_path <- file.path(getwd(), "celltype_marker_plots") dir.create(folder_path) for (group in names(sce@metadata$markers)) { - pwidth <- max(10, - length( - unique(sce@metadata$markers[[group]]$marker_plot$data$Group) - ) + pwidth <- max( + 10, + length(unique(sce@metadata$markers[[group]]$marker_plot$data$Group)) ) - pheight <- length( - unique(sce@metadata$markers[[group]]$marker_plot$data$Gene) - ) + pheight <- length(unique(sce@metadata$markers[[group]]$marker_plot$data$Gene)) + p <- sce@metadata$markers[[group]]$marker_plot + plot_file_name <- paste0(group, "_markers") + # save PNG png(file.path(folder_path, paste0(plot_file_name, ".png")), - width = pwidth * 12, height = pheight * 5, units = "mm", res = 600) + width = pwidth * 12, height = pheight * 5, units = "mm", res = 600 + ) print(p) dev.off() @@ -195,14 +230,12 @@ for (group in names(sce@metadata$markers)) { units = "mm", scale = 1 ) - } ### Save Marker Gene Tables folder_path <- file.path(getwd(), "celltype_marker_tables") dir.create(folder_path) for (group in names(sce@metadata$markers)) { - marker_test_file_name <- paste0(group, "_markers_test.tsv") top_markers_file_name <- paste0(group, "_top_markers.tsv") @@ -221,7 +254,6 @@ for (group in names(sce@metadata$markers)) { col.names = TRUE, sep = "\t" ) - } @@ -231,5 +263,3 @@ write_sce( folder_path = file.path(getwd(), "final_sce") ) -## ............................................................................ -## Clean up #### diff --git a/bin/scflow_integrate.r b/bin/scflow_integrate.r index b9879e0..4ff89e6 100755 --- a/bin/scflow_integrate.r +++ b/bin/scflow_integrate.r @@ -5,12 +5,12 @@ # ____________________________________________________________________________ # Initialization #### -options(mc.cores = future::availableCores()) +options(mc.cores = max(2, future::availableCores(methods = "mc.cores"))) ## ............................................................................ ## Load packages #### -library(argparse) library(scFlow) +library(argparse) library(parallel) ## ............................................................................ @@ -33,14 +33,14 @@ required$add_argument( required$add_argument( "--method", required = TRUE, - help = "The integration method to use", + help ="The integration method to use", metavar = "Liger" ) required$add_argument( "--unique_id_var", required = TRUE, - help = "Unique id variable", + help ="Unique id variable", metavar = "manifest" ) @@ -48,7 +48,7 @@ required$add_argument( "--take_gene_union", default = FALSE, required = TRUE, - help = "Whether to fill out raw.data matrices with union of genes", + help ="Whether to fill out raw.data matrices with union of genes across all datasets (filling in 0 for missing data)", metavar = "Boolean" ) @@ -56,7 +56,7 @@ required$add_argument( "--remove_missing", default = TRUE, required = TRUE, - help = "Remove non-expressive genes and cells", + help ="Whether to remove cells not expressing any measured genes, and genes not expressed in any cells", metavar = "Boolean" ) @@ -65,7 +65,7 @@ required$add_argument( default = 3000, type = "integer", required = TRUE, - help = "Number of genes to find for each dataset", + help ="Number of genes to find for each dataset", metavar = "N" ) @@ -73,23 +73,15 @@ required$add_argument( "--combine", default = "union", required = TRUE, - help = "How to combine variable genes across experiments", + help ="How to combine variable genes across experiments", metavar = "union,intersect" ) -required$add_argument( - "--keep_unique", - default = FALSE, - required = TRUE, - help = "Keep genes that occur only in one dataset", - metavar = "Boolean" -) - required$add_argument( "--capitalize", default = FALSE, required = TRUE, - help = "Capitalize gene names to match homologous genes(i.e. across species)", + help ="Capitalize gene names to match homologous genes(ie. across species)", metavar = "Boolean" ) @@ -97,7 +89,7 @@ required$add_argument( "--use_cols", default = TRUE, required = TRUE, - help = "Treat each column as a cell", + help ="Treat each column as a cell", metavar = "Boolean" ) @@ -106,7 +98,7 @@ required$add_argument( default = 30, type = "integer", required = TRUE, - help = "Inner dimension of factorization (number of factors)", + help ="Inner dimension of factorization (number of factors)", metavar = "N" ) @@ -115,7 +107,7 @@ required$add_argument( default = 5.0, type = "double", required = TRUE, - help = "Regularization parameter", + help ="Regularization parameter. Larger values penalize dataset-specific effects more strongly (ie. alignment should increase as lambda increases)", metavar = "N" ) @@ -124,7 +116,7 @@ required$add_argument( default = 0.0001, type = "double", required = TRUE, - help = "Convergence threshold.", + help ="Convergence threshold. Convergence occurs when |obj0-obj|/(mean(obj0,obj)) < thresh", metavar = "N" ) @@ -133,7 +125,7 @@ required$add_argument( default = 100, type = "integer", required = TRUE, - help = "Maximum number of block coordinate descent iterations to perform", + help ="Maximum number of block coordinate descent iterations to perform", metavar = "N" ) @@ -142,7 +134,7 @@ required$add_argument( default = 1, type = "integer", required = TRUE, - help = "Number of restarts to perform", + help ="Number of restarts to perform", metavar = "N" ) @@ -151,7 +143,7 @@ required$add_argument( default = 1, type = "integer", required = TRUE, - help = "Random seed to allow reproducible results", + help ="Random seed to allow reproducible results", metavar = "N" ) @@ -160,33 +152,15 @@ required$add_argument( default = 20, type = "integer", required = TRUE, - help = "Number of nearest neighbors for within-dataset knn graph", - metavar = "N" -) - -required$add_argument( - "--k2", - default = 500, - type = "integer", - required = TRUE, - help = "Horizon parameter for shared nearest factor graph", - metavar = "N" -) - -required$add_argument( - "--prune_thresh", - default = 0.2, - type = "double", - required = TRUE, - help = "Minimum allowed edge weight. Any edges below this are removed", + help ="Number of nearest neighbors for within-dataset knn graph", metavar = "N" ) required$add_argument( "--ref_dataset", - default = "", + default = '', required = TRUE, - help = "Name of dataset to use as a reference for normalization", + help ="Name of dataset to use as a reference for normalization", metavar = "ref" ) @@ -195,7 +169,7 @@ required$add_argument( default = 2, type = "integer", required = TRUE, - help = "Minimum number of cells to consider a cluster shared across datasets", + help ="Minimum number of cells to consider a cluster shared across datasets", metavar = "N" ) @@ -204,16 +178,7 @@ required$add_argument( default = 50, type = "integer", required = TRUE, - help = "Number of quantiles to use for quantile normalization", - metavar = "N" -) - -required$add_argument( - "--nstart", - default = 10, - type = "integer", - required = TRUE, - help = "Number of times to perform Louvain community detection", + help ="Number of quantiles to use for quantile normalization", metavar = "N" ) @@ -222,43 +187,18 @@ required$add_argument( default = 1, type = "double", required = TRUE, - help = "Controls the number of communities detected", + help ="Controls the number of communities detected (Higher resolution -> more communities)", metavar = "N" ) -required$add_argument( - "--dims_use", - default = "null", - required = TRUE, - help = "Indices of factors to use for shared nearest factor determination", - metavar = "Indices" -) - -required$add_argument( - "--dist_use", - default = "CR", - required = TRUE, - help = "Distance metric to use in calculating nearest neighbors", - metavar = "CR" -) - required$add_argument( "--center", default = FALSE, required = TRUE, - help = "Centers the data when scaling factors", + help ="Centers the data when scaling factors (useful for less sparse modalities like methylation data)", metavar = "Boolean" ) -required$add_argument( - "--small_clust_thresh", - default = 0, - type = "double", - required = TRUE, - help = "Extracts small clusters loading highly on single factor", - metavar = "N" -) - ### . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .. ### Pre-process args #### @@ -290,12 +230,11 @@ sce <- integrate_sce( unique_id_var = args$unique_id_var, take_gene_union = args$take_gene_union, remove.missing = args$remove_missing, - make.sparse = T, num_genes = args$num_genes, combine = args$combine, - keep_unique = args$keep_unique, capitalize = args$capitalize, use_cols = args$use_cols, + num_cores = future::availableCores(methods = "mc.cores"), k = args$k, lambda = args$lambda, thresh = args$thresh, @@ -306,24 +245,15 @@ sce <- integrate_sce( V_init = NULL, rand_seed = args$rand_seed, knn_k = args$knn_k, - k2 = args$k2, - prune_thresh = args$prune_thresh, ref_dataset = args$ref_dataset, min_cells = args$min_cells, quantiles = args$quantiles, - nstart = args$nstart, resolution = args$resolution, - dims_use = args$dims_use, - dist_use = args$dist_use, center = args$center, - small_clust_thresh = args$small_clust_thresh, - do_plot = FALSE, - id_number = NULL, - print_obj = FALSE, - print_mod = FALSE, - print_align_summary = FALSE + print_obj = FALSE ) + ## ............................................................................ ## Save Outputs #### diff --git a/bin/scflow_ipa.r b/bin/scflow_ipa.r index 0cca3a8..5e09573 100755 --- a/bin/scflow_ipa.r +++ b/bin/scflow_ipa.r @@ -12,6 +12,7 @@ options(mc.cores = parallel::detectCores()) library(argparse) library(scFlow) library(cli) +library(dplyr) ## ............................................................................ ## Parse command-line arguments #### @@ -50,9 +51,32 @@ required$add_argument( required$add_argument( "--enrichment_database", help = "name of the enrichment databases", - metavar = "GO_Biological_Process,GO_Cellular_Component,GO_Molecular_Function", + metavar = "GO_Biological_Process,Reactome,Wikipathway", required = TRUE, - default = "KEGG" + default = "GO_Biological_Process" +) + +required$add_argument( + "--species", + help = "the biological species (e.g. mouse, human)", + default = "human", + required = TRUE +) + +required$add_argument( + "--fc_threshold", + type = "double", + default = 1.1, + metavar = "number", + help = "Absolute fold-change cutoff for DE [default %(default)s]" +) + +required$add_argument( + "--pval_cutoff", + type = "double", + default = 0.05, + metavar = "number", + help = "p-value cutoff for DE [default %(default)s]" ) @@ -62,6 +86,8 @@ required$add_argument( args <- parser$parse_args() +options("scflow_species" = args$species) + args$enrichment_method <- strsplit(args$enrichment_method, ",")[[1]] args$enrichment_tool <- strsplit(args$enrichment_tool, ",")[[1]] args$enrichment_database <- strsplit(args$enrichment_database, ",")[[1]] @@ -91,23 +117,48 @@ dir.create(output_dir) dir.create(report_dir) for (gene_file in args$gene_file) { - enrichment_result <- find_impacted_pathways( - gene_file = gene_file, - enrichment_tool = args$enrichment_tool, - enrichment_method = args$enrichment_method, - enrichment_database = args$enrichment_database, - is_output = TRUE, - output_dir = output_dir - ) - report_name <- tools::file_path_sans_ext(gene_file) - report_fp <- paste0(report_name, "_scflow_ipa_report") - report_impacted_pathway( - res = enrichment_result, - report_folder_path = report_dir, - report_file = report_fp - ) - cli::cli_text(c( - "{cli::col_green(symbol$tick)} Analysis complete, output is found at: ", - "{.file {output_dir}}" - )) + dt <- read.delim(gene_file) + + dt <- dt %>% + dplyr::filter( + padj <= args$pval_cutoff, + abs(logFC) >= log2(args$fc_threshold) + ) + + if (nrow(dt) < 5) { + cli::cli_alert_danger("Gene list is very short!") + } else { + enrichment_result <- find_impacted_pathways( + gene_file = dt, + reference_file = NULL, + organism = getOption("scflow_species"), + enrichment_tool = args$enrichment_tool, + enrichment_method = args$enrichment_method, + enrichment_database = args$enrichment_database, + is_output = TRUE, + output_dir = output_dir + ) + + if (all(unlist(lapply( + enrichment_result, function(dt) { + isFALSE(dt$metadata$result) + } + )))) { + cli::cli_alert_danger("No significant pathway was found at FDR 0.05") + } else { + report_name <- tools::file_path_sans_ext(gene_file) + report_fp <- paste0(report_name, "_scflow_ipa_report") + + report_impacted_pathway( + res = enrichment_result, + report_folder_path = report_dir, + report_file = report_fp + ) + + cli::cli_text(c( + "{cli::col_green(symbol$tick)} Analysis complete, output is found at: ", + "{.file {output_dir}}" + )) + } + } } diff --git a/bin/scflow_qc.r b/bin/scflow_qc.r index 726a29a..6fff875 100755 --- a/bin/scflow_qc.r +++ b/bin/scflow_qc.r @@ -422,6 +422,11 @@ if (args$find_singlets) { ) } + +sce <- sce[ , sce$total_counts >= args$min_library_size] +sce <- sce[ , sce$total_features_by_counts >= args$min_features] + + dir.create(file.path(getwd(), "qc_report")) report_qc_sce( diff --git a/conf/gcp.config b/conf/gcp.config new file mode 100644 index 0000000..d24d1f8 --- /dev/null +++ b/conf/gcp.config @@ -0,0 +1,39 @@ +/* +======================================================================================== + nf-core/scflow Nextflow Google Cloud Platform config file +======================================================================================== + A config file for the Google Cloud Platform +---------------------------------------------------------------------------------------- +*/ + +process { + disk = '20 GB' + withLabel:process_tiny { + machineType = 'e2-standard-2' // 2 CPU 8GB RAM + //cpus = 2 + //memory = '13 GB' + } + withLabel:process_low { + machineType = 'n1-highmem-2' // 2 CPU 13GB RAM + } + withLabel:process_medium { + machineType = 'n2-standard-8' // 8 CPU 32GB RAM + } + withLabel:process_high { + machineType = 'n2-highmem-16' // 16 CPU 128GB RAM + } + withLabel:process_long { + machineType = 'n1-highmem-4' // 4 CPU 26GB RAM + } + withLabel:process_high_memory { + machineType = 'n2-highmem-16' // 16 CPU 128GB RAM + //cpus = 2 + //memory = '100 GB' + } +} + +google.lifeSciences.bootDiskSize = '100 GB' + +params { + celltype_mappings = "$baseDir/assets/NO_FILE.tsv" +} diff --git a/conf/modules.config b/conf/modules.config index 92362c1..80a4cd5 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -32,12 +32,12 @@ params { 'scflow_qc' { publish_dir = 'quality_control' publish_files = [ - 'html':'../../reports/qc', + 'html':'reports', 'qc_plot_data':'', 'qc_plots':'', 'sce':'' ] - publish_by_id = true + publish_by_id = false } 'scflow_mergeqctables' { @@ -48,7 +48,7 @@ params { 'scflow_merge' { publish_dir = 'merged' publish_files = [ - 'merged_report':'../reports', + 'merged_report':'reports', 'merge_plots':'', 'merge_summary_plots':'' ] @@ -71,7 +71,7 @@ params { 'scflow_reportintegrated' { publish_dir = 'integration' - publish_files = ['integration_report':'../reports'] + publish_files = ['integration_report':'reports'] } 'scflow_mapcelltypes' { @@ -86,9 +86,9 @@ params { publish_files = [ 'final_sce':'SCE', 'celltypes.tsv':'', - 'celltype_metrics_report':'../reports', - 'celltype_marker_plots':'../celltype_markers', - 'celltype_marker_tables':'../celltype_markers' + 'celltype_metrics_report':'reports', + 'celltype_marker_plots':'', + 'celltype_marker_tables':'' ] } @@ -96,7 +96,7 @@ params { publish_dir = 'DGE' publish_files = [ 'tsv':'', - 'html':'../../reports/DGE', + 'html':'reports', 'png':'de_plots' ] publish_by_id = true @@ -106,15 +106,15 @@ params { publish_dir = 'IPA' publish_files = [ 'ipa':'', - 'html':'../reports/IPA' + 'html':'reports' ] publish_by_id = true } 'scflow_dirichlet' { - publish_dir = 'dirichlet' + publish_dir = 'reports' publish_files = [ - 'dirichlet_report':'../reports' + 'dirichlet_report':'reports' ] } diff --git a/conf/scflow_analysis.config b/conf/scflow_analysis.config index ebba67d..3af4bdd 100644 --- a/conf/scflow_analysis.config +++ b/conf/scflow_analysis.config @@ -3,166 +3,164 @@ params { // * = multiple comma-separated variables allowed // Options: Quality-Control - qc_key_colname = 'manifest' - qc_factor_vars = 'individual' // * - qc_min_library_size = 100 - qc_max_library_size = 'adaptive' // if numeric, pass as string - qc_min_features = 100 - qc_max_features = 'adaptive' // if numeric, pass as string - qc_max_mito = 'adaptive' // if numeric, pass as string - qc_min_ribo = 0 - qc_max_ribo = 1 - qc_min_counts = 2 - qc_min_cells = 2 - qc_drop_unmapped = 'true' - qc_drop_mito = 'true' - qc_drop_ribo = 'true' - qc_nmads = 4.0 + qc_key_colname = 'manifest' + qc_factor_vars = 'individual' // * + qc_min_library_size = 100 + qc_max_library_size = 'adaptive' // if numeric, pass as string + qc_min_features = 100 + qc_max_features = 'adaptive' // if numeric, pass as string + qc_max_mito = 'adaptive' // if numeric, pass as string + qc_min_ribo = 0 + qc_max_ribo = 1 + qc_min_counts = 2 + qc_min_cells = 2 + qc_drop_unmapped = 'true' + qc_drop_mito = 'true' + qc_drop_ribo = 'false' + qc_nmads = 4.0 // Options: Ambient RNA Profiling - amb_find_cells = 'false' - amb_lower = 100 - amb_retain = 'auto' // if numeric, pass as string - amb_alpha_cutoff = 0.001 - amb_niters = 10000 - amb_expect_cells = 3000 + amb_find_cells = 'false' + amb_lower = 100 + amb_retain = 'auto' // if numeric, pass as string + amb_alpha_cutoff = 0.001 + amb_niters = 10000 + amb_expect_cells = 3000 // Options: Multiplet Identification - mult_find_singlets = 'false' - mult_singlets_method = 'doubletfinder' - mult_vars_to_regress_out = 'nCount_RNA,pc_mito' // * - mult_pca_dims = 10 - mult_var_features = 2000 - mult_doublet_rate = 0 - mult_dpk = 8 - mult_pK = 0.02 + mult_find_singlets = 'false' + mult_singlets_method = 'doubletfinder' + mult_vars_to_regress_out = 'nCount_RNA,pc_mito' // * + mult_pca_dims = 10 + mult_var_features = 2000 + mult_doublet_rate = 0 + mult_dpk = 8 + mult_pK = 0.02 // Options: Integration - integ_method = 'Liger' - integ_unique_id_var = 'manifest' - integ_take_gene_union = 'false' - integ_remove_missing = 'true' - integ_num_genes = 3000 - integ_combine = 'union' - integ_keep_unique = 'false' - integ_capitalize = 'false' - integ_use_cols = 'true' - integ_k = 30 - integ_lambda = 5.0 - integ_thresh = 0.0001 - integ_max_iters = 100 - integ_nrep = 1 - integ_rand_seed = 1 - integ_knn_k = 20 - integ_k2 = 500 - integ_prune_thresh = 0.2 - integ_ref_dataset = 'NULL' - integ_min_cells = 2 - integ_quantiles = 50 - integ_nstart = 10 - integ_resolution = 1 - integ_dims_use = 'NULL' - integ_dist_use = 'CR' - integ_center = 'false' - integ_small_clust_thresh = 0 + integ_method = 'Liger' + integ_k = 30 + integ_unique_id_var = 'manifest' + integ_take_gene_union = 'false' + integ_remove_missing = 'true' + integ_num_genes = 3000 + integ_combine = 'union' + integ_capitalize = 'false' + integ_use_cols = 'true' + integ_lambda = 5.0 + integ_thresh = 0.0001 + integ_max_iters = 100 + integ_nrep = 1 + integ_rand_seed = 1 + integ_quantiles = 50 + integ_ref_dataset = 'null' + integ_min_cells = 2 + integ_knn_k = 20 + integ_center = 'false' + integ_resolution = 1 + integ_max_cores = 'null' + // Options: Integration report - integ_categorical_covariates = 'manifest,diagnosis,sex' // * - integ_input_reduced_dim = 'UMAP' + integ_categorical_covariates = 'manifest,diagnosis,sex' // * + integ_input_reduced_dim = 'UMAP' // Options: Merge - merge_plot_vars = 'total_features_by_counts,total_counts,pc_mito,pc_ribo' - merge_facet_vars = 'NULL' // * - merge_outlier_vars = 'total_features_by_counts,total_counts' // * + merge_plot_vars = 'total_features_by_counts,total_counts,pc_mito,pc_ribo' + merge_facet_vars = 'null' // * + merge_outlier_vars = 'total_features_by_counts,total_counts' // * // Options: Dimensionality Reduction - reddim_input_reduced_dim = 'PCA,Liger' // * - reddim_reduction_methods = 'tSNE,UMAP,UMAP3D' // * - reddim_vars_to_regress_out = 'nCount_RNA,pc_mito' // * + reddim_input_reduced_dim = 'PCA,Liger' // * + reddim_reduction_methods = 'tSNE,UMAP,UMAP3D' // * + reddim_vars_to_regress_out = 'nCount_RNA,pc_mito' // * // umap - reddim_umap_pca_dims = 30 - reddim_umap_n_neighbors = 35 - reddim_umap_n_components = 2 - reddim_umap_init = 'spectral' - reddim_umap_metric = 'euclidean' - reddim_umap_n_epochs = 200 - reddim_umap_learning_rate = 1 - reddim_umap_min_dist = 0.4 - reddim_umap_spread = 0.85 - reddim_umap_set_op_mix_ratio = 1 - reddim_umap_local_connectivity = 1 - reddim_umap_repulsion_strength = 1 + reddim_umap_pca_dims = 30 + reddim_umap_n_neighbors = 35 + reddim_umap_n_components = 2 + reddim_umap_init = 'spectral' + reddim_umap_metric = 'euclidean' + reddim_umap_n_epochs = 200 + reddim_umap_learning_rate = 1 + reddim_umap_min_dist = 0.4 + reddim_umap_spread = 0.85 + reddim_umap_set_op_mix_ratio = 1 + reddim_umap_local_connectivity = 1 + reddim_umap_repulsion_strength = 1 reddim_umap_negative_sample_rate = 5 - reddim_umap_fast_sgd = 'false' + reddim_umap_fast_sgd = 'false' // tsne - reddim_tsne_dims = 2 - reddim_tsne_initial_dims = 50 - reddim_tsne_perplexity = 150 - reddim_tsne_theta = 0.5 - reddim_tsne_stop_lying_iter = 250 - reddim_tsne_mom_switch_iter = 250 - reddim_tsne_max_iter = 1000 - reddim_tsne_pca_center = 'true' - reddim_tsne_pca_scale = 'false' - reddim_tsne_normalize = 'true' - reddim_tsne_momentum = 0.5 - reddim_tsne_final_momentum = 0.8 - reddim_tsne_eta = 1000 - reddim_tsne_exaggeration_factor = 12 + reddim_tsne_dims = 2 + reddim_tsne_initial_dims = 50 + reddim_tsne_perplexity = 150 + reddim_tsne_theta = 0.5 + reddim_tsne_stop_lying_iter = 250 + reddim_tsne_mom_switch_iter = 250 + reddim_tsne_max_iter = 1000 + reddim_tsne_pca_center = 'true' + reddim_tsne_pca_scale = 'false' + reddim_tsne_normalize = 'true' + reddim_tsne_momentum = 0.5 + reddim_tsne_final_momentum = 0.8 + reddim_tsne_eta = 1000 + reddim_tsne_exaggeration_factor = 12 // Options: Clustering - clust_cluster_method = 'leiden' - clust_reduction_method = 'UMAP_Liger' - clust_res = 0.01 - clust_k = 100 - clust_louvain_iter = 1 + clust_cluster_method = 'leiden' + clust_reduction_method = 'UMAP_Liger' + clust_res = 0.01 + clust_k = 100 + clust_louvain_iter = 1 // Options: Celltype Annotation - cta_clusters_colname = 'clusters' - cta_cells_to_sample = 10000 + cta_clusters_colname = 'clusters' + cta_cells_to_sample = 10000 + // Options: Celltype Metrics Report - cta_unique_id_var = 'manifest' - cta_clusters_colname = 'clusters' - cta_celltype_var = 'cluster_celltype' - cta_facet_vars = 'manifest,diagnosis,sex' - cta_metric_vars = 'pc_mito,pc_ribo,total_counts,total_features_by_counts' - cta_top_n = 5 + cta_unique_id_var = 'manifest' + cta_clusters_colname = 'clusters' + cta_celltype_var = 'cluster_celltype' + cta_facet_vars = 'manifest,diagnosis,sex' + cta_metric_vars = 'pc_mito,pc_ribo,total_counts,total_features_by_counts' + cta_top_n = 5 // Options: Differential Gene Expression - dge_de_method = 'MASTZLM' // * - dge_mast_method = 'bayesglm' - dge_min_counts = 1 - dge_min_cells_pc = 0.1 - dge_rescale_numerics = 'true' - dge_pseudobulk = 'false' - dge_celltype_var = 'cluster_celltype' - dge_sample_var = 'manifest' - dge_dependent_var = 'diagnosis' - dge_ref_class = 'Control' - dge_confounding_vars = 'cngeneson' // * - dge_random_effects_var = 'NULL' - dge_fc_threshold = 1.1 - dge_pval_cutoff = 0.05 - dge_force_run = 'false' - dge_max_cores = 'null' + dge_de_method = 'MASTZLM' // * + dge_mast_method = 'bayesglm' + dge_min_counts = 1 + dge_min_cells_pc = 0.1 + dge_rescale_numerics = 'true' + dge_pseudobulk = 'false' + dge_celltype_var = 'cluster_celltype' + dge_sample_var = 'manifest' + dge_dependent_var = 'diagnosis' + dge_ref_class = 'Control' + dge_confounding_vars = 'cngeneson' // * + dge_random_effects_var = 'null' + dge_fc_threshold = 1.1 + dge_pval_cutoff = 0.05 + dge_n_label = 5 + dge_force_run = 'false' + dge_max_cores = 'null' // Options: Integrated Pathway Analysis - ipa_enrichment_tool = 'WebGestaltR' - ipa_enrichment_method = 'ORA' - ipa_enrichment_database = 'GO_Biological_Process' // * + ipa_enrichment_tool = 'WebGestaltR' + ipa_enrichment_method = 'ORA' + ipa_enrichment_database = 'GO_Biological_Process' // * // Options: Dirichlet Modeling - dirich_unique_id_var = 'individual' - dirich_celltype_var = 'cluster_celltype' - dirich_dependent_var = 'diagnosis' - dirich_ref_class = 'Control' - dirich_var_order = 'NULL' // * + dirich_unique_id_var = 'individual' + dirich_celltype_var = 'cluster_celltype' + dirich_dependent_var = 'diagnosis' + dirich_ref_class = 'Control' + dirich_var_order = 'null' // * // Options: Plots (Reduced Dim) - plotreddim_reduction_methods = 'UMAP_Liger' // * - reddimplot_pointsize = 0.1 - reddimplot_alpha = 0.2 + plotreddim_reduction_methods = 'UMAP_Liger' // * + reddimplot_pointsize = 0.1 + reddimplot_alpha = 0.2 // Misc - species = 'human' + species = 'human' + max_cores = 'null' } diff --git a/modules/local/get_software_versions.nf b/modules/local/get_software_versions.nf index 7c83440..76a74f5 100644 --- a/modules/local/get_software_versions.nf +++ b/modules/local/get_software_versions.nf @@ -10,10 +10,11 @@ process GET_SOFTWARE_VERSIONS { tag 'Version Info' label 'process_tiny' + errorStrategy 'ignore' //cache false output: - path 'software_versions.tsv' , emit: tsv + path "software_versions.tsv" , emit: tsv script: // This script is bundled with the pipeline, in nf-core/scflow/bin/ """ diff --git a/modules/local/process/scflow/cluster.nf b/modules/local/process/scflow/cluster.nf index 9c2ab10..4f5f42e 100644 --- a/modules/local/process/scflow/cluster.nf +++ b/modules/local/process/scflow/cluster.nf @@ -10,7 +10,7 @@ def options = initOptions(params.options) process SCFLOW_CLUSTER { tag 'MERGED' - label 'process_low' + label 'process_medium' publishDir "${params.outdir}", mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:'') } diff --git a/modules/local/process/scflow/dge.nf b/modules/local/process/scflow/dge.nf index 7459e3b..80700b2 100644 --- a/modules/local/process/scflow/dge.nf +++ b/modules/local/process/scflow/dge.nf @@ -1,5 +1,5 @@ /* - * Generate 2D reduced dimension plots of gene expression + * Run differential gene expression analysis */ // Import generic module functions @@ -10,7 +10,7 @@ def options = initOptions(params.options) process SCFLOW_DGE { tag "${celltype} (${n_cells_str} cells) | ${de_method}" - label 'process_medium' + label 'process_high' errorStrategy 'ignore' publishDir "${params.outdir}", mode: params.publish_dir_mode, diff --git a/modules/local/process/scflow/finalize.nf b/modules/local/process/scflow/finalize.nf index fec2380..103fab1 100644 --- a/modules/local/process/scflow/finalize.nf +++ b/modules/local/process/scflow/finalize.nf @@ -30,6 +30,7 @@ process SCFLOW_FINALIZE { script: def software = getSoftwareName(task.process) + def ctm = celltype_mappings.simpleName != 'NO_FILE' ? "$celltype_mappings" : 'nofile' """ export MC_CORES=${task.cpus} @@ -37,7 +38,7 @@ process SCFLOW_FINALIZE { scflow_finalize_sce.r \ $options.args \ --sce_path ${sce} \ - --celltype_mappings ${celltype_mappings} + --celltype_mappings ${ctm} scflow_version=\$(Rscript -e 'cat(as.character(utils::packageVersion("scFlow")))'); echo "scFlow \${scflow_version}" > "scFlow_\${scflow_version}.version.txt" """ diff --git a/modules/local/process/scflow/integrate.nf b/modules/local/process/scflow/integrate.nf index fc564ff..ed6179f 100644 --- a/modules/local/process/scflow/integrate.nf +++ b/modules/local/process/scflow/integrate.nf @@ -10,7 +10,7 @@ def options = initOptions(params.options) process SCFLOW_INTEGRATE { tag 'MERGED' - label 'process_medium' + label 'process_high_memory' publishDir "${params.outdir}", mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:'') } @@ -29,7 +29,11 @@ process SCFLOW_INTEGRATE { """ export MC_CORES=${task.cpus} - + export MKL_NUM_THREADS=1 + export NUMEXPR_NUM_THREADS=1 + export OMP_NUM_THREADS=1 + export OPENBLAS_NUM_THREADS=1 + export VECLIB_MAXIMUM_THREADS=1 scflow_integrate.r \ $options.args \ --sce_path ${sce} diff --git a/modules/local/process/scflow/ipa.nf b/modules/local/process/scflow/ipa.nf index 79c1169..42791b2 100644 --- a/modules/local/process/scflow/ipa.nf +++ b/modules/local/process/scflow/ipa.nf @@ -1,5 +1,5 @@ /* - * Integrated pathway analysis of differentially expressed genes + * Impacted pathway analysis of differentially expressed genes */ // Import generic module functions diff --git a/modules/local/process/scflow/mapcelltypes.nf b/modules/local/process/scflow/mapcelltypes.nf index fafeac1..6f236fe 100644 --- a/modules/local/process/scflow/mapcelltypes.nf +++ b/modules/local/process/scflow/mapcelltypes.nf @@ -10,7 +10,7 @@ def options = initOptions(params.options) process SCFLOW_MAPCELLTYPES { tag 'MERGED' - label 'process_low' + label 'process_high' publishDir "${params.outdir}", mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:'') } diff --git a/nextflow.config b/nextflow.config index 834c279..c90fd42 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,12 +13,12 @@ manifest { description = 'Complete analysis workflow for single-cell/nuclei RNA-sequencing data.' mainScript = 'main.nf' nextflowVersion = '>=21.04.2' - version = '0.7.0dev' + version = '1.0.0' } // Container slug. Stable releases should specify release tag! // Developmental code should specify :dev -process.container = 'almurphy/scfdev:dev' +process.container = 'almurphy/scfdev:0.7.1' //workDir = "/rds/general/user/$USER/ephemeral/tmp" workDir = './work' diff --git a/nextflow_schema.json b/nextflow_schema.json index f209156..8771107 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -55,11 +55,16 @@ "description": "Input sample species.", "help_text": "Currently, \"human\" and \"mouse\" are supported." }, + "max_cores": { + "type": "string", + "default": "'null'", + "description": "Maximum CPU cores.", + "help_text": "The default value of 'null' utilizes all available CPU cores. Manually overriding this parameter can reduce the memory demands of parallelization across multiple cores." + }, "outdir": { "type": "string", "default": "./results", - "description": "Outputs directory.", - "fa_icon": "fas fa-folder-open" + "description": "Outputs directory." } }, "required": [ @@ -67,7 +72,8 @@ "input", "ensembl_mappings", "ctd_path", - "species" + "species", + "max_cores" ], "help_text": "" }, @@ -88,7 +94,7 @@ "default": "seqdate", "description": "The sample sheet variables to treat as factors.", "help_text": "All sample sheet columns with numbers which should be treated as factors should be specified here separated by commas. Examples include columns with dates, numeric sample identifiers, etc.", - "fa_icon": "fas fa-layer-group" + "fa_icon": "fas fa-quote-left" }, "qc_min_library_size": { "type": "integer", @@ -201,7 +207,7 @@ "properties": { "amb_find_cells": { "type": "string", - "default": "true", + "default": "True", "description": "Enable ambient RNA / empty droplet profiling.", "fa_icon": "fas fa-cut" }, @@ -259,39 +265,34 @@ "properties": { "mult_find_singlets": { "type": "string", - "default": "true", + "default": "True", "description": "Enable doublet/multiplet identification.", "fa_icon": "fas fa-cut" }, "mult_singlets_method": { "type": "string", "default": "doubletfinder", - "description": "Algorithm to use for doublet/multiplet identification.", - "fa_icon": "fas fa-toolbox" + "description": "Algorithm to use for doublet/multiplet identification." }, "mult_vars_to_regress_out": { "type": "string", "default": "nCount_RNA,pc_mito", - "description": "Variables to regress out for dimensionality reduction.", - "fa_icon": "fas fa-layer-group" + "description": "Variables to regress out for dimensionality reduction." }, "mult_pca_dims": { "type": "integer", "default": 10, - "description": "Number of PCA dimensions to use.", - "fa_icon": "fas fa-calculator" + "description": "Number of PCA dimensions to use." }, "mult_var_features": { "type": "integer", "default": 2000, - "description": "The top n most variable features to use.", - "fa_icon": "fas fa-calculator" + "description": "The top n most variable features to use." }, "mult_doublet_rate": { "type": "number", "description": "A fixed doublet rate.", - "help_text": "Use a fixed default rate (e.g. 0.075 to specify that 7.5% of all cells should be marked as doublets), or set to 0 to use the \"dpk\" method (recommended).", - "fa_icon": "fas fa-calculator" + "help_text": "Use a fixed default rate (e.g. 0.075 to specify that 7.5% of all cells should be marked as doublets), or set to 0 to use the \"dpk\" method (recommended)." }, "mult_dpk": { "type": "integer", @@ -299,15 +300,13 @@ "description": "Doublets per thousand cells increment.", "help_text": "The doublets per thousand cell increment specifies the expected doublet rate based on the number of cells, i.e. with a dpk of 8 (recommended by 10X), a dataset with 1000 cells is expected to contain 8 doublets per thousand cells, a dataset with 2000 cells is expected to contain 16 doublets per thousand cells, and a dataset with 10000 cells is expected to contain 80 cells per thousand cells (or 800 doublets in total). If the \"doublet_rate\" parameter is manually specified this recommended incremental behaviour is overridden.", "minimum": 0, - "maximum": 1000, - "fa_icon": "fas fa-calculator" + "maximum": 1000 }, "mult_pK": { "type": "number", "default": 0.02, "description": "Specify a pK value instead of parameter sweep.", - "help_text": "The optimal pK value used by the doubletFinder algorithm is determined following a compute-intensive parameter sweep. The parameter sweep can be overridden by manually specifying a pK value.", - "fa_icon": "fas fa-calculator" + "help_text": "The optimal pK value used by the doubletFinder algorithm is determined following a compute-intensive parameter sweep. The parameter sweep can be overridden by manually specifying a pK value." } }, "fa_icon": "fas fa-adjust", @@ -331,22 +330,19 @@ "type": "string", "default": "total_features_by_counts,total_counts,pc_mito,pc_ribo", "description": "Numeric variables for inter-sample metrics.", - "help_text": "A comma-separated list of numeric variables which differ between individual cells of each sample. The merged sample report will include plots facilitating between-sample comparisons for each of these numeric variables.", - "fa_icon": "fas fa-layer-group" + "help_text": "A comma-separated list of numeric variables which differ between individual cells of each sample. The merged sample report will include plots facilitating between-sample comparisons for each of these numeric variables." }, "merge_facet_vars": { "type": "string", "default": "NULL", "description": "Categorical variables for further sub-setting of plots", - "help_text": "A comma-separated list of categorical variables. The merged sample report will include additional plots of sample metrics subset by each of these variables (e.g. sex, diagnosis).", - "fa_icon": "fas fa-layer-group" + "help_text": "A comma-separated list of categorical variables. The merged sample report will include additional plots of sample metrics subset by each of these variables (e.g. sex, diagnosis)." }, "merge_outlier_vars": { "type": "string", "default": "total_features_by_counts,total_counts", "description": "Numeric variables for outlier identification.", - "help_text": "The merged report will include tables highlighting samples that are putative outliers for each of these numeric variables.", - "fa_icon": "fas fa-layer-group" + "help_text": "The merged report will include tables highlighting samples that are putative outliers for each of these numeric variables." } }, "fa_icon": "fas fa-object-ungroup", @@ -365,35 +361,37 @@ "integ_method": { "type": "string", "default": "Liger", - "description": "Choice of integration method.", - "fa_icon": "fas fa-toolbox" + "description": "Choice of integration method." + }, + "integ_k": { + "type": "integer", + "default": 30, + "description": "Inner dimension of factorization (n factors).", + "help_text": "See rliger::optimizeALS(). Inner dimension of factorization (number of factors). Run suggestK to determine appropriate value; a general rule of thumb is that a higher k will be needed for datasets with more sub-structure." }, "integ_unique_id_var": { "type": "string", "default": "manifest", - "description": "Unique sample identifier variable.", - "fa_icon": "fas fa-key" + "description": "Unique sample identifier variable." }, "integ_take_gene_union": { "type": "string", - "default": "false", "description": "Fill out matrices with union of genes.", "help_text": "See rliger::createLiger(). Whether to fill out raw.data matrices with union of genes across all datasets (filling in 0 for missing data) (requires make.sparse = TRUE) (default FALSE).", - "fa_icon": "fas fa-cut" + "fa_icon": "fas fa-cut", + "default": "false" }, "integ_remove_missing": { "type": "string", - "default": "true", + "default": "True", "description": "Remove non-expressing cells/genes.", - "help_text": "See rliger::createLiger(). Whether to remove cells not expressing any measured genes, and genes not expressed in any cells (if take.gene.union = TRUE, removes only genes not expressed in any dataset) (default TRUE).", - "fa_icon": "fas fa-cut" + "help_text": "See rliger::createLiger(). Whether to remove cells not expressing any measured genes, and genes not expressed in any cells (if take.gene.union = TRUE, removes only genes not expressed in any dataset) (default TRUE)." }, "integ_num_genes": { "type": "integer", "default": 3000, "description": "Number of genes to find for each dataset.", - "help_text": "See rliger::selectGenes(). Number of genes to find for each dataset. Optimises the value of var.thresh for each dataset to get this number of genes.", - "fa_icon": "fas fa-calculator" + "help_text": "See rliger::selectGenes(). Number of genes to find for each dataset. Optimises the value of var.thresh for each dataset to get this number of genes." }, "integ_combine": { "type": "string", @@ -402,194 +400,120 @@ "help_text": "See rliger::selectGenes(). Either \"union\" or \"intersection\".", "fa_icon": "fas fa-calculator" }, - "integ_keep_unique": { - "type": "string", - "default": "false", - "description": "Keep unique genes.", - "help_text": "See rliger::selectGenes().", - "fa_icon": "fas fa-cut" - }, "integ_capitalize": { "type": "string", - "default": "false", "description": "Capitalize gene names to match homologous genes.", "help_text": "See rliger::selectGenes().", - "fa_icon": "fab fa-adn" + "fa_icon": "fab fa-adn", + "default": "false" }, "integ_use_cols": { "type": "string", - "default": "true", + "default": "True", "description": "Treat each column as a cell.", - "help_text": "See rliger::removeMissingObs().", - "fa_icon": "fas fa-columns" - }, - "integ_k": { - "type": "integer", - "default": 30, - "description": "Inner dimension of factorization (n factors).", - "help_text": "See rliger::optimizeALS(). Inner dimension of factorization (number of factors). Run suggestK to determine appropriate value; a general rule of thumb is that a higher k will be needed for datasets with more sub-structure.", - "fa_icon": "fas fa-calculator" + "help_text": "See rliger::removeMissingObs()." }, "integ_lambda": { "type": "number", "default": 5, "description": "Regularization parameter.", - "help_text": "See rliger::optimizeALS(). Regularization parameter. Larger values penalize dataset-specific effects more strongly (ie. alignment should increase as lambda increases). Run suggestLambda to determine most appropriate value for balancing dataset alignment and agreement (default 5.0).", - "fa_icon": "fas fa-calculator" + "help_text": "See rliger::optimizeALS(). Regularization parameter. Larger values penalize dataset-specific effects more strongly (ie. alignment should increase as lambda increases). Run suggestLambda to determine most appropriate value for balancing dataset alignment and agreement (default 5.0)." }, "integ_thresh": { "type": "number", "default": 0.0001, "description": "Convergence threshold.", - "help_text": "See rliger::optimizeALS().", - "fa_icon": "fas fa-calculator" + "help_text": "See rliger::optimizeALS()." }, "integ_max_iters": { "type": "integer", "default": 100, "description": "Maximum number of block coordinate descent iterations.", - "help_text": "See rliger::optimizeALS().", - "fa_icon": "fas fa-less-than-equal" + "help_text": "See rliger::optimizeALS()." }, "integ_nrep": { "type": "integer", "default": 1, "description": "Number of restarts to perform.", - "help_text": "See rliger::optimizeALS().", - "fa_icon": "fas fa-calculator" + "help_text": "See rliger::optimizeALS()." }, "integ_rand_seed": { "type": "integer", "default": 1, - "description": "Random seed for reproducible results.", - "fa_icon": "fas fa-calculator" + "description": "Random seed for reproducible results." }, - "integ_knn_k": { - "type": "integer", - "default": 20, - "description": "Number of neearest neighbours for within-dataset knn graph.", - "help_text": "See rliger::quantile_norm().", - "fa_icon": "fas fa-calculator" - }, - "integ_k2": { + "integ_quantiles": { "type": "integer", - "default": 500, - "description": "Horizon parameter for shared nearest factor graph.", - "help_text": "See rliger::quantileAlignSNF(). Distances to all but the k2 nearest neighbors are set to 0 (cuts down on memory usage for very large graphs).", - "fa_icon": "fas fa-calculator" - }, - "integ_prune_thresh": { - "type": "number", - "default": 0.2, - "description": "Minimum allowed edge weight.", - "help_text": "See rliger::quantileAlignSNF().", - "fa_icon": "fas fa-greater-than-equal" + "default": 50, + "description": "Number of quantiles to use for normalization.", + "help_text": "See rliger::quantile_norm()." }, "integ_ref_dataset": { "type": "string", "default": "NULL", "description": "Name of dataset to use as a reference.", - "help_text": "See rliger::quantile_norm(). Name of dataset to use as a \"reference\" for normalization. By default, the dataset with the largest number of cells is used.", - "fa_icon": "fas fa-quote-left" + "help_text": "See rliger::quantile_norm(). Name of dataset to use as a \"reference\" for normalization. By default, the dataset with the largest number of cells is used." }, "integ_min_cells": { "type": "integer", "default": 2, "description": "Minimum number of cells to consider a cluster shared across datasets.", - "help_text": "See rliger::quantile_norm().", - "fa_icon": "fas fa-greater-than-equal" - }, - "integ_quantiles": { - "type": "integer", - "default": 50, - "description": "Number of quantiles to use for normalization.", - "help_text": "See rliger::quantile_norm().", - "fa_icon": "fas fa-calculator" - }, - "integ_nstart": { - "type": "integer", - "default": 10, - "description": "Number of times to perform Louvain community detection.", - "help_text": "See rliger::quantileAlignSNF(). Number of times to perform Louvain community detection with different random starts (default 10).", - "fa_icon": "fas fa-recycle" + "help_text": "See rliger::quantile_norm()." }, - "integ_resolution": { + "integ_knn_k": { "type": "integer", - "default": 1, - "description": "Controls the number of communities detected.", - "help_text": "See rliger::quantileAlignSNF().", - "fa_icon": "fas fa-calculator" - }, - "integ_dims_use": { - "type": "string", - "default": "NULL", - "description": "Indices of factors to use for shared nearest factor determination.", - "help_text": "See rliger::quantile_norm().", - "fa_icon": "fas fa-calculator" - }, - "integ_dist_use": { - "type": "string", - "default": "CR", - "description": "Distance metric to use in calculating nearest neighbour.", - "help_text": "See rliger::quantileAlignSNF(). Default \"CR\".", - "fa_icon": "fas fa-digital-tachograph" + "default": 20, + "description": "Number of neearest neighbours for within-dataset knn graph.", + "help_text": "See rliger::quantile_norm()." }, "integ_center": { "type": "string", "default": "false", "description": "Center the data when scaling factors.", - "help_text": "See rliger::quantile_norm().", - "fa_icon": "fas fa-compress-arrows-alt" + "help_text": "See rliger::quantile_norm()." }, - "integ_small_clust_thresh": { + "integ_resolution": { "type": "integer", - "help_text": "See rliger::quantileAlignSNF(). Extracts small clusters loading highly on single factor with fewer cells than this before regular alignment (default 0 \u2013 no small cluster extraction).", - "description": "Small cluster extraction cells threshold.", + "default": 1, + "description": "Controls the number of communities detected.", + "help_text": "See rliger::quantileAlignSNF().", "fa_icon": "fas fa-calculator" }, "integ_categorical_covariates": { "type": "string", "default": "individual,diagnosis,region,sex", "description": "Categorical variables for integration report metrics.", - "help_text": "The integration report will provide plots and integration metrics for these categorical variables.", - "fa_icon": "fas fa-layer-group" + "help_text": "The integration report will provide plots and integration metrics for these categorical variables." }, "integ_input_reduced_dim": { "type": "string", "default": "UMAP", "description": "Reduced dimension embedding for the integration report.", - "help_text": "The integration report will provide with and without integration plots using this embedding.", - "fa_icon": "fas fa-chess-board" + "help_text": "The integration report will provide with and without integration plots using this embedding." } }, "fa_icon": "far fa-object-group", "required": [ "integ_method", + "integ_k", "integ_unique_id_var", "integ_take_gene_union", "integ_remove_missing", "integ_num_genes", "integ_combine", - "integ_keep_unique", "integ_capitalize", "integ_use_cols", - "integ_k", "integ_lambda", "integ_thresh", "integ_max_iters", "integ_nrep", "integ_rand_seed", - "integ_knn_k", - "integ_k2", - "integ_prune_thresh", + "integ_quantiles", "integ_ref_dataset", "integ_min_cells", - "integ_quantiles", - "integ_nstart", - "integ_resolution", - "integ_dims_use", - "integ_dist_use", + "integ_knn_k", "integ_center", + "integ_resolution", "integ_categorical_covariates", "integ_input_reduced_dim" ] @@ -603,42 +527,36 @@ "reddim_input_reduced_dim": { "type": "string", "default": "PCA,Liger", - "description": "Input matrix for dimension reduction.", - "fa_icon": "fas fa-chess-board" + "description": "Input matrix for dimension reduction." }, "reddim_reduction_methods": { "type": "string", "default": "tSNE,UMAP,UMAP3D", "description": "Dimension reduction outputs to generate.", - "help_text": "Typically 'UMAP,UMAP3D' or 'tSNE'.", - "fa_icon": "fas fa-toolbox" + "help_text": "Typically 'UMAP,UMAP3D' or 'tSNE'." }, "reddim_vars_to_regress_out": { "type": "string", "default": "nCount_RNA,pc_mito", - "description": "Variables to regress out before dimension reduction.", - "fa_icon": "fas fa-layer-group" + "description": "Variables to regress out before dimension reduction." }, "reddim_umap_pca_dims": { "type": "integer", "default": 30, "description": "Number of PCA dimensions.", - "help_text": "See uwot::umap().", - "fa_icon": "fas fa-calculator" + "help_text": "See uwot::umap()." }, "reddim_umap_n_neighbors": { "type": "integer", "default": 35, "description": "Number of nearest neighbours to use.", - "help_text": "See uwot::umap().", - "fa_icon": "fas fa-calculator" + "help_text": "See uwot::umap()." }, "reddim_umap_n_components": { "type": "integer", "default": 2, "description": "The dimension of the space to embed into.", - "help_text": "See uwot::umap(). The dimension of the space to embed into. This defaults to 2 to provide easy visualization, but can reasonably be set to any integer value in the range 2 to 100.", - "fa_icon": "fas fa-calculator" + "help_text": "See uwot::umap(). The dimension of the space to embed into. This defaults to 2 to provide easy visualization, but can reasonably be set to any integer value in the range 2 to 100." }, "reddim_umap_init": { "type": "string", @@ -654,8 +572,7 @@ "pca", "spca", "agspectral" - ], - "fa_icon": "fas fa-calculator" + ] }, "reddim_umap_metric": { "type": "string", @@ -669,36 +586,31 @@ "hamming", "correlation", "categorical" - ], - "fa_icon": "fas fa-digital-tachograph" + ] }, "reddim_umap_n_epochs": { "type": "integer", "default": 200, "description": "Number of epochs to us during optimization of embedded coordinates.", - "help_text": "See uwot::umap().", - "fa_icon": "fas fa-calculator" + "help_text": "See uwot::umap()." }, "reddim_umap_learning_rate": { "type": "integer", "default": 1, "description": "Initial learning rate used in optimization of coordinates.", - "help_text": "See uwot::umap().", - "fa_icon": "fas fa-calculator" + "help_text": "See uwot::umap()." }, "reddim_umap_min_dist": { "type": "number", "default": 0.4, "description": "Effective minimum distance between embedded points.", - "help_text": "See uwot::umap(). Smaller values will result in a more clustered/clumped embedding where nearby points on the manifold are drawn closer together, while larger values will result on a more even dispersal of points. The value should be set relative to the spread value, which determines the scale at which embedded points will be spread out.", - "fa_icon": "fas fa-greater-than-equal" + "help_text": "See uwot::umap(). Smaller values will result in a more clustered/clumped embedding where nearby points on the manifold are drawn closer together, while larger values will result on a more even dispersal of points. The value should be set relative to the spread value, which determines the scale at which embedded points will be spread out." }, "reddim_umap_spread": { "type": "number", "default": 0.85, "description": "Effective scale of embedded points.", - "help_text": "See uwot::umap(). In combination with min_dist, this determines how clustered/clumped the embedded points are.", - "fa_icon": "fas fa-arrows-alt-h" + "help_text": "See uwot::umap(). In combination with min_dist, this determines how clustered/clumped the embedded points are." }, "reddim_umap_set_op_mix_ratio": { "type": "number", @@ -706,134 +618,117 @@ "description": "Interpolation to combine local fuzzy sets.", "help_text": "See uwot::umap(). The value of this parameter should be between 0.0 and 1.0; a value of 1.0 will use a pure fuzzy union, while 0.0 will use a pure fuzzy intersection.", "minimum": 0, - "maximum": 1, - "fa_icon": "fas fa-adjust" + "maximum": 1 }, "reddim_umap_local_connectivity": { "type": "integer", "default": 1, "description": "Local connectivity required.", - "help_text": "See uwot::umap(). The local connectivity required \u2013 i.e. the number of nearest neighbors that should be assumed to be connected at a local level. The higher this value the more connected the manifold becomes locally.", - "fa_icon": "fas fa-calculator" + "help_text": "See uwot::umap(). The local connectivity required \u2013 i.e. the number of nearest neighbors that should be assumed to be connected at a local level. The higher this value the more connected the manifold becomes locally." }, "reddim_umap_repulsion_strength": { "type": "integer", "default": 1, "description": "Weighting applied to negative samples in embedding optimization.", - "help_text": "See uwot::umap(). Weighting applied to negative samples in low dimensional embedding optimization. Values higher than one will result in greater weight being given to negative samples.", - "fa_icon": "fas fa-calculator" + "help_text": "See uwot::umap(). Weighting applied to negative samples in low dimensional embedding optimization. Values higher than one will result in greater weight being given to negative samples." }, "reddim_umap_negative_sample_rate": { "type": "integer", "default": 5, "description": "Number of negative edge samples to use per positive edge sample.", - "help_text": "See uwot::umap(). The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample in optimizing the low dimensional embedding.", - "fa_icon": "fas fa-calculator" + "help_text": "See uwot::umap(). The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample in optimizing the low dimensional embedding." }, "reddim_umap_fast_sgd": { "type": "string", - "default": "false", "description": "Use fast SGD.", "help_text": "See uwot::umap(). Setting this to TRUE will speed up the stochastic optimization phase, but give a potentially less accurate embedding, and which will not be exactly reproducible even with a fixed seed. For visualization, fast_sgd = TRUE will give perfectly good results. For more generic dimensionality reduction, it's safer to leave fast_sgd = FALSE.", - "fa_icon": "fas fa-skiing" + "fa_icon": "fas fa-skiing", + "default": "false" }, "reddim_tsne_dims": { "type": "integer", "default": 2, "description": "Output dimensionality.", - "help_text": "See Rtsne::Rtsne().", - "fa_icon": "fas fa-calculator" + "help_text": "See Rtsne::Rtsne()." }, "reddim_tsne_initial_dims": { "type": "integer", "default": 50, "description": "Number of dimensions retained in the initial PCA step.", - "help_text": "See Rtsne::Rtsne().", - "fa_icon": "fas fa-calculator" + "help_text": "See Rtsne::Rtsne()." }, "reddim_tsne_perplexity": { "type": "integer", "default": 150, "description": "Perplexity parameter.", - "help_text": "See Rtsne::Rtsne().", - "fa_icon": "fas fa-calculator" + "help_text": "See Rtsne::Rtsne()." }, "reddim_tsne_theta": { "type": "number", "default": 0.5, "description": "Speed/accuracy trade-off.", - "help_text": "See Rtsne::Rtsne(). Speed/accuracy trade-off (increase for less accuracy), set to 0.0 for exact TSNE (default: 0.5).", - "fa_icon": "fas fa-calculator" + "help_text": "See Rtsne::Rtsne(). Speed/accuracy trade-off (increase for less accuracy), set to 0.0 for exact TSNE (default: 0.5)." }, "reddim_tsne_stop_lying_iter": { "type": "integer", "default": 250, "description": "Iteration after which perplexities are no longer exaggerated.", - "help_text": "See Rtsne::Rtsne(). Iteration after which the perplexities are no longer exaggerated (default: 250, except when Y_init is used, then 0).", - "fa_icon": "fas fa-calculator" + "help_text": "See Rtsne::Rtsne(). Iteration after which the perplexities are no longer exaggerated (default: 250, except when Y_init is used, then 0)." }, "reddim_tsne_mom_switch_iter": { "type": "integer", "default": 250, "description": "Iteration after which the final momentum is used.", - "help_text": "See Rtsne::Rtsne(). Iteration after which the final momentum is used (default: 250, except when Y_init is used, then 0).", - "fa_icon": "fas fa-calculator" + "help_text": "See Rtsne::Rtsne(). Iteration after which the final momentum is used (default: 250, except when Y_init is used, then 0)." }, "reddim_tsne_max_iter": { "type": "integer", "default": 1000, "description": "Number of iterations.", - "help_text": "See Rtsne::Rtsne(). ", - "fa_icon": "fas fa-less-than-equal" + "help_text": "See Rtsne::Rtsne(). " }, "reddim_tsne_pca_center": { "type": "string", - "default": "true", + "default": "True", "description": "Center data before PCA.", - "help_text": "See Rtsne::Rtsne(). Should data be centered before pca is applied? (default: TRUE)", - "fa_icon": "fas fa-compress-arrows-alt" + "help_text": "See Rtsne::Rtsne(). Should data be centered before pca is applied? (default: TRUE)" }, "reddim_tsne_pca_scale": { "type": "string", - "default": "false", "description": "Scale data before PCA.", "help_text": "See Rtsne::Rtsne(). Should data be scaled before pca is applied? (default: FALSE).", - "fa_icon": "fas fa-balance-scale" + "fa_icon": "fas fa-balance-scale", + "default": "false" }, "reddim_tsne_normalize": { "type": "string", - "default": "true", + "default": "True", "description": "Normalize data before distance calculations.", - "help_text": "See Rtsne::Rtsne(). Should data be normalized internally prior to distance calculations with normalize_input? (default: TRUE)", - "fa_icon": "fas fa-balance-scale" + "help_text": "See Rtsne::Rtsne(). Should data be normalized internally prior to distance calculations with normalize_input? (default: TRUE)" }, "reddim_tsne_momentum": { "type": "number", "default": 0.5, "description": "Momentum used in the first part of optimization.", - "help_text": "See Rtsne::Rtsne(). ", - "fa_icon": "fas fa-calculator" + "help_text": "See Rtsne::Rtsne(). " }, "reddim_tsne_final_momentum": { "type": "number", "default": 0.8, "description": "Momentum used in the final part of optimization.", - "help_text": "See Rtsne::Rtsne(). ", - "fa_icon": "fas fa-calculator" + "help_text": "See Rtsne::Rtsne(). " }, "reddim_tsne_eta": { "type": "integer", "default": 1000, "description": "Learning rate.", - "help_text": "See Rtsne::Rtsne(). ", - "fa_icon": "fas fa-calculator" + "help_text": "See Rtsne::Rtsne(). " }, "reddim_tsne_exaggeration_factor": { "type": "integer", "default": 12, "description": "Exaggeration factor used in the first part of the optimization.", - "help_text": "See Rtsne::Rtsne(). Exaggeration factor used to multiply the P matrix in the first part of the optimization (default: 12.0).", - "fa_icon": "fas fa-calculator" + "help_text": "See Rtsne::Rtsne(). Exaggeration factor used to multiply the P matrix in the first part of the optimization (default: 12.0)." } }, "fa_icon": "fas fa-cubes", @@ -881,34 +776,29 @@ "type": "string", "default": "leiden", "description": "Clustering method.", - "help_text": "Specify \"leiden\" or \"louvain\".", - "fa_icon": "fas fa-toolbox" + "help_text": "Specify \"leiden\" or \"louvain\"." }, "clust_reduction_method": { "type": "string", "default": "UMAP_Liger", "description": "Reduced dimension input(s) for clustering.", - "help_text": "One or more of \"UMAP\", \"tSNE\", \"PCA\", \"LSI\".", - "fa_icon": "fas fa-chess-board" + "help_text": "One or more of \"UMAP\", \"tSNE\", \"PCA\", \"LSI\"." }, "clust_res": { "type": "number", "default": 0.001, - "description": "The resolution of clustering.", - "fa_icon": "fas fa-calculator" + "description": "The resolution of clustering." }, "clust_k": { "type": "integer", "default": 50, "description": "Integer number of nearest neighbours for clustering.", - "help_text": "Integer number of nearest neighbors to use when creating the k nearest neighbor graph for Louvain/Leiden clustering. k is related to the resolution of the clustering result, a bigger k will result in lower resolution and vice versa.", - "fa_icon": "fas fa-calculator" + "help_text": "Integer number of nearest neighbors to use when creating the k nearest neighbor graph for Louvain/Leiden clustering. k is related to the resolution of the clustering result, a bigger k will result in lower resolution and vice versa." }, "clust_louvain_iter": { "type": "integer", "default": 1, - "description": "The number of iterations for clustering.", - "fa_icon": "fas fa-recycle" + "description": "The number of iterations for clustering." } }, "fa_icon": "fas fa-braille", @@ -929,44 +819,37 @@ "cta_clusters_colname": { "type": "string", "default": "clusters", - "description": "SingleCellExperiment clusters colData variable name.", - "fa_icon": "fas fa-quote-left" + "description": "SingleCellExperiment clusters colData variable name." }, "cta_cells_to_sample": { "type": "integer", "default": 10000, - "description": "Max cells to sample.", - "fa_icon": "fas fa-calculator" + "description": "Max cells to sample." }, "cta_unique_id_var": { "type": "string", "default": "individual", - "description": "A sample metadata unique sample ID.", - "fa_icon": "fas fa-key" + "description": "A sample metadata unique sample ID." }, "cta_celltype_var": { "type": "string", "default": "cluster_celltype", - "description": "SingleCellExperiment cell-type colData variable name.", - "fa_icon": "fas fa-quote-left" + "description": "SingleCellExperiment cell-type colData variable name." }, "cta_facet_vars": { "type": "string", "default": "manifest,diagnosis,sex,capdate,prepdate,seqdate", - "description": "Cell-type metrics for categorical variables.", - "fa_icon": "fas fa-layer-group" + "description": "Cell-type metrics for categorical variables." }, "cta_metric_vars": { "type": "string", "default": "pc_mito,pc_ribo,total_counts,total_features_by_counts", - "description": "Cell-type metrics for numeric variables.", - "fa_icon": "fas fa-layer-group" + "description": "Cell-type metrics for numeric variables." }, "cta_top_n": { "type": "integer", "default": 5, - "description": "Number of top marker genes for plot/table generation.", - "fa_icon": "fas fa-calculator" + "description": "Number of top marker genes for plot/table generation." } }, "fa_icon": "fas fa-brain", @@ -989,8 +872,7 @@ "dge_de_method": { "type": "string", "default": "MASTZLM", - "description": "Differential gene expression method.", - "fa_icon": "fas fa-toolbox" + "description": "Differential gene expression method." }, "dge_mast_method": { "type": "string", @@ -1001,15 +883,13 @@ "glm", "glmer", "bayesglm" - ], - "fa_icon": "fas fa-toolbox" + ] }, "dge_min_counts": { "type": "integer", "default": 1, "description": "Expressive gene minimum counts.", - "help_text": "Only genes with at least min_counts in min_cells_pc will be tested for differential gene expression.", - "fa_icon": "fas fa-greater-than-equal" + "help_text": "Only genes with at least min_counts in min_cells_pc will be tested for differential gene expression." }, "dge_min_cells_pc": { "type": "number", @@ -1017,91 +897,86 @@ "minimum": 0, "maximum": 1, "description": "Expressive gene minimum cells fraction.", - "help_text": "Only genes with at least min_counts in min_cells_pc will be tested for differential gene expression. Default 0.1 (i.e. 10% of cells).", - "fa_icon": "fas fa-greater-than-equal" + "help_text": "Only genes with at least min_counts in min_cells_pc will be tested for differential gene expression. Default 0.1 (i.e. 10% of cells)." }, "dge_rescale_numerics": { "type": "string", - "default": "true", + "default": "True", "description": "Re-scale numeric covariates.", - "help_text": "Re-scaling and centring numeric covariates in a model can improve model performance.", - "fa_icon": "fas fa-balance-scale" + "help_text": "Re-scaling and centring numeric covariates in a model can improve model performance." }, "dge_pseudobulk": { "type": "string", - "default": "false", "description": "Pseudobulked differential gene expression.", "help_text": "Perform differential gene expression on a smaller matrix where counts are first summed across all cells within a sample (defined by dge_sample_var level).", - "fa_icon": "far fa-object-group" + "fa_icon": "far fa-object-group", + "default": "false" }, "dge_celltype_var": { "type": "string", "default": "cluster_celltype", "description": "Cell-type annotation variable name.", - "help_text": "Differential gene expression is performed separately for each cell-type of this colData variable.", - "fa_icon": "fas fa-quote-left" + "help_text": "Differential gene expression is performed separately for each cell-type of this colData variable." }, "dge_sample_var": { "type": "string", "default": "manifest", - "description": "Unique sample identifier variable.", - "fa_icon": "fas fa-key" + "description": "Unique sample identifier variable." }, "dge_dependent_var": { "type": "string", "default": "group", "description": "Dependent variable of DGE model.", - "help_text": "The dependent variable may be a categorical (e.g. diagnosis) or a numeric (e.g. histopathology score) variable.", - "fa_icon": "fas fa-quote-left" + "help_text": "The dependent variable may be a categorical (e.g. diagnosis) or a numeric (e.g. histopathology score) variable." }, "dge_ref_class": { "type": "string", "default": "Control", "help_text": "If a categorical dependent variable is specified, then the reference class of the dependent variable is specified here (e.g. 'Control').", - "description": "Reference class of categorical dependent variable.", - "fa_icon": "fas fa-quote-left" + "description": "Reference class of categorical dependent variable." }, "dge_confounding_vars": { "type": "string", "default": "cngeneson,seqdate,pc_mito", "description": "Confounding variables.", - "help_text": "A comma-separated list of confounding variables to account for in the DGE model.", - "fa_icon": "fas fa-layer-group" + "help_text": "A comma-separated list of confounding variables to account for in the DGE model." }, "dge_random_effects_var": { "type": "string", "default": "NULL", "description": "Random effect confounding variable.", - "help_text": "If specified, the term `+ (1 | x ) +`is added to the model, where x is the specified random effects variable.", - "fa_icon": "fas fa-quote-left" + "help_text": "If specified, the term `+ (1 | x ) +`is added to the model, where x is the specified random effects variable." }, "dge_fc_threshold": { "type": "number", "default": 1.1, "description": "Fold-change threshold for plotting.", - "help_text": "This absolute fold-change cut-off value is used in plots (e.g. volcano) and the DGE report.", - "fa_icon": "fas fa-calculator" + "help_text": "This absolute fold-change cut-off value is used in plots (e.g. volcano) and the DGE report." }, "dge_pval_cutoff": { "type": "number", "default": 0.05, "description": "Adjusted p-value cutoff.", - "help_text": "The adjusted p-value cutoff value is used in plots (e.g. volcano) and the DGE report.", - "fa_icon": "fas fa-less-than-equal" + "help_text": "The adjusted p-value cutoff value is used in plots (e.g. volcano) and the DGE report." + }, + "dge_n_label": { + "type": "number", + "default": 5, + "help_text": "The number of genes to label in plots (e.g. volcano) and the DGE report." }, "dge_force_run": { "type": "string", - "default": "false", "description": "Force model fit for non-full rank.", "help_text": "A non-full rank model specification will return an error; to override this to return a warning only, set to TRUE.", - "fa_icon": "fas fa-exclamation" + "fa_icon": "fas fa-exclamation", + "default": "false" }, "dge_max_cores": { "type": "string", - "default": "'null'", "description": "Maximum CPU cores.", "help_text": "The default value of 'null' utilizes all available CPU cores. As each additional CPU core increases the number of genes simultaneously fit, the RAM/memory demand increases concomitantly. Manually overriding this parameter can reduce the memory demands of parallelization across multiple cores.", - "fa_icon": "fas fa-microchip" + "fa_icon": "fas fa-microchip", + "default": "null" } }, "fa_icon": "fas fa-chart-bar", @@ -1120,6 +995,7 @@ "dge_random_effects_var", "dge_fc_threshold", "dge_pval_cutoff", + "dge_n_label", "dge_force_run", "dge_max_cores" ] @@ -1133,26 +1009,18 @@ "ipa_enrichment_tool": { "type": "string", "default": "WebGestaltR", - "description": "Pathway enrichment tool(s) to use.", - "enum": [ - "WebGestaltR", - "ROntoTools", - "enrichR" - ], - "fa_icon": "fas fa-toolbox" + "description": "Pathway enrichment tool(s) to use." }, "ipa_enrichment_method": { "type": "string", "default": "ORA", - "description": "Enrichment method.", - "fa_icon": "fas fa-layer-group" + "description": "Enrichment method." }, "ipa_enrichment_database": { "type": "string", "default": "GO_Biological_Process", "description": "Database(s) to use for enrichment.", - "help_text": "See scFlow::list_databases(). Name of the database(s) for enrichment. Examples include \"GO_Biological_Process\", \"GO_Cellular_Component\", \"GO_Molecular_Function\", \"KEGG\", \"Reactome\", \"Wikipathway\".", - "fa_icon": "fas fa-layer-group" + "help_text": "See scFlow::list_databases(). Name of the database(s) for enrichment. Examples include \"GO_Biological_Process\", \"GO_Cellular_Component\", \"GO_Molecular_Function\", \"KEGG\", \"Reactome\", \"Wikipathway\"." } }, "fa_icon": "fas fa-project-diagram", @@ -1171,33 +1039,28 @@ "dirich_unique_id_var": { "type": "string", "default": "individual", - "description": "Unique sampler identifier.", - "fa_icon": "fas fa-key" + "description": "Unique sampler identifier." }, "dirich_celltype_var": { "type": "string", "default": "cluster_celltype", - "description": "Cell-type annotation variable name.", - "fa_icon": "fas fa-quote-left" + "description": "Cell-type annotation variable name." }, "dirich_dependent_var": { "type": "string", "default": "group", - "description": "Dependent variable of Dirichlet model.", - "fa_icon": "fas fa-quote-left" + "description": "Dependent variable of Dirichlet model." }, "dirich_ref_class": { "type": "string", "default": "Control", - "description": "Reference class of categorical dependent variable.", - "fa_icon": "fas fa-quote-left" + "description": "Reference class of categorical dependent variable." }, "dirich_var_order": { "type": "string", "default": "Control,Low,High", "description": "Dependent variable classes order.", - "help_text": "For plotting and reports, the order of classes for the dependent variable can be manually specified (e.g. 'Control,Low,High').", - "fa_icon": "fas fa-layer-group" + "help_text": "For plotting and reports, the order of classes for the dependent variable can be manually specified (e.g. 'Control,Low,High')." } }, "fa_icon": "fas fa-chart-pie", @@ -1370,10 +1233,11 @@ "hidden": true }, "monochrome_logs": { - "type": "boolean", + "type": "string", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", - "hidden": true + "hidden": true, + "default": "false" }, "tracedir": { "type": "string", @@ -1424,8 +1288,7 @@ "options": { "type": "string", "description": "NA", - "hidden": true, - "fa_icon": "fas fa-filter" + "hidden": true } } } diff --git a/workflows/scflow.nf b/workflows/scflow.nf index 9f27efd..d79ff84 100644 --- a/workflows/scflow.nf +++ b/workflows/scflow.nf @@ -94,32 +94,25 @@ scflow_merge_options.args = def scflow_integrate_options = modules['scflow_integrate'] scflow_integrate_options.args = "--method ${params.integ_method} \ + --k ${params.integ_k} \ --unique_id_var ${params.integ_unique_id_var} \ --take_gene_union ${params.integ_take_gene_union} \ --remove_missing ${params.integ_remove_missing} \ --num_genes ${params.integ_num_genes} \ --combine ${params.integ_combine} \ - --keep_unique ${params.integ_keep_unique} \ --capitalize ${params.integ_capitalize} \ --use_cols ${params.integ_use_cols} \ - --k ${params.integ_k} \ --lambda ${params.integ_lambda} \ --thresh ${params.integ_thresh} \ --max_iters ${params.integ_max_iters} \ --nrep ${params.integ_nrep} \ --rand_seed ${params.integ_rand_seed} \ - --knn_k ${params.integ_knn_k} \ - --k2 ${params.integ_k2} \ - --prune_thresh ${params.integ_prune_thresh} \ + --quantiles ${params.integ_quantiles} \ --ref_dataset ${params.integ_ref_dataset} \ --min_cells ${params.integ_min_cells} \ - --quantiles ${params.integ_quantiles} \ - --nstart ${params.integ_nstart} \ - --resolution ${params.integ_resolution} \ - --dims_use ${params.integ_dims_use} \ - --dist_use ${params.integ_dist_use} \ + --knn_k ${params.integ_knn_k} \ --center ${params.integ_center} \ - --small_clust_thresh ${params.integ_small_clust_thresh}" + --resolution ${params.integ_resolution}" def scflow_reducedims_options = modules['scflow_reducedims'] scflow_reducedims_options.args = @@ -188,7 +181,8 @@ scflow_finalize_options.args = --metric_vars ${params.cta_metric_vars} \ --top_n ${params.cta_top_n} \ --reddimplot_pointsize ${params.reddimplot_pointsize} \ - --reddimplot_alpha ${params.reddimplot_alpha}" + --reddimplot_alpha ${params.reddimplot_alpha} \ + --max_cores ${params.max_cores}" def scflow_dge_options = modules['scflow_dge'] scflow_dge_options.args = @@ -219,7 +213,10 @@ def scflow_ipa_options = modules['scflow_ipa'] scflow_ipa_options.args = "--enrichment_tool ${params.ipa_enrichment_tool} \ --enrichment_method ${params.ipa_enrichment_method} \ - --enrichment_database ${params.ipa_enrichment_database}" + --enrichment_database ${params.ipa_enrichment_database} \ + --pval_cutoff ${params.dge_pval_cutoff} \ + --fc_threshold ${params.dge_fc_threshold} \ + --species ${params.species}" def scflow_dirichlet_options = modules['scflow_dirichlet'] scflow_dirichlet_options.args =