Skip to content

Commit 096c836

Browse files
committed
NIDAP json files for all templates
1 parent c99e5cd commit 096c836

26 files changed

Lines changed: 4750 additions & 1 deletion

File tree

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ tests/testthat/output/
1111
.Rproj.user
1212

1313
inst/doc
14-
inst/extdata/*
14+
#inst/extdata/*
1515
docs
1616
*.Rds
1717
*.rds
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
{
2+
"codeTemplate": "ExternalAnnotation_SO <- function({{{Input_Seurat_Object}}}, {{{External_Metadata_Table}}}) {\n \n## --------- ##\n## Libraries ##\n## --------- ##\n\n# Load necessary libraries\nlibrary(nidapFunctions)\nnidapLoadPackages(c(\"SCWorkflow\", \"magrittr\", \"tibble\", \"dplyr\", \"Seurat\"))\n\n## -------------------------------- ##\n## User-Defined Template Parameters ##\n## -------------------------------- ##\n\nseurat_object <- {{{Input_Seurat_Object}}}\nexternal_metadata <- {{{External_Metadata_Table}}}\nseurat_object_filename <- \"{{{Seurat_Object_Filename}}}\"\nbarcode_column = \"{{{Column_Containing_Barcodes}}}\"\nexternal_cols_to_add = {{{External_Metadata_Columns_to_Add}}}\ncol_to_viz = \"{{{Column_to_Visualize_Optional_}}}\"\n\n## -------------------------------- ##\n## Errors ##\n## -------------------------------- ##\n\n## -------------------------------- ##\n## Functions ##\n## -------------------------------- ##\n\n## --------------- ##\n## Main Code Block ##\n## --------------- ##\n\n# Load Seurat Object\ncat(sprintf(\"\\nReading Seurat Object from dataset: %s\\n\\n\", paste(seurat_object_filename, collapse = \", \")))\npath <- nidapGetPath(seurat_object, seurat_object_filename)\nso <- readRDS(path)\nprint(so)\n\n# Extract meta.data from Seurat Object\ncat(\"Extracting Metadata Table from Seurat Object\\n\\n\")\nif (\"meta.data\" %in% slotNames(so)) {\n met.df <- so@meta.data\n} else if (\"RNA\" %in% slotNames(so)) {\n met.df <- so$RNA@meta.data\n} else {\n stop(\"No recognizable meta.data found in the Seurat object.\")\n}\n\n# If no \"Barcode\" column exists, create it from rownames\nif (!(\"Barcode\" %in% colnames(met.df))) {\n met.df <- met.df %>% rownames_to_column(\"Barcode\")\n}\n\n# Check if the user-specified barcode column exists\nif (!(barcode_column %in% colnames(external_metadata))) {\n stop(paste(\"External metadata does not have the specified barcode column:\", barcode_column))\n}\n\n# Parse external columns to add\nif (!is.null(external_cols_to_add)) {\n cols_to_import <- trimws(unlist(strsplit(external_cols_to_add, \",\")))\n \n # Always include Barcode in the subset\n cols_to_keep <- unique(c(barcode_column, cols_to_import))\n \n # Ensure all requested columns exist in external_metadata\n missing_cols <- setdiff(cols_to_keep, colnames(external_metadata))\n if (length(missing_cols) > 0) {\n stop(paste(\"These columns were not found in external_metadata:\", paste(missing_cols, collapse = \", \")))\n }\n \n # Subset external_metadata to keep only the requested columns\n external_metadata <- external_metadata[, cols_to_keep, drop = FALSE]\n}\n\n# Merge the external metadata table with the Seurat object metadata by barcode\nmerged_metadata <- merge(met.df, external_metadata, by.x = \"Barcode\", by.y = barcode_column, all.x = TRUE)\n\n## troubleshooting, check if any NULL rows appear in merged meta\nif (any(is.na(merged_metadata$Barcode))){\n stop(\"STOP: Metadata not correctly merged. NA's produced.\")\n}\nif (nrow(met.df)!=nrow(merged_metadata)){\n stop(\"STOP: Metadata not correctly merged. Incorrect number of rows when merged.\")\n}\nif(length(setdiff(met.df$Barcode, external_metadata[[barcode_column]]))>0){\n warning(\"Warning: Not all cells from the Seurat Object are in the External Metadata table.\")\n}\nif(length(setdiff(external_metadata[[barcode_column]], met.df$Barcode))>0){\n warning(\"Warning: Some cells in the External Metadata are not found in the Seurat Object.\")\n}\n\n# Print or save the merged metadata for review\ncat(\"Metadata successfully merged. Here is a preview:\\n\")\nprint(head(merged_metadata))\n\n# Ensure that rownames in merged_metadata are barcodes, not numbers \nmerged_metadata=column_to_rownames(merged_metadata,'Barcode')\n\n# Update Seurat object with the new merged metadata\nso <- AddMetaData(so, merged_metadata)\n\n# If col_to_viz isn't empty, print TSNE/UMAP colored by this column.\nif(length(col_to_viz) != 0) {\n print(DimPlot(so, group.by = col_to_viz, label = FALSE, reduction = 'umap'))# + NoLegend()\n print(DimPlot(so, group.by = col_to_viz, label = FALSE, reduction = 'tsne'))# + NoLegend()\n}\n\noutput <- new.output()\noutput_fs <- output$fileSystem()\nsaveRDS(so, output_fs$get_path(\"seurat_object.rds\", 'w'))\n\nreturn(NULL)\n\n}\n\n#################################################\n## Global imports and functions included below ##\n#################################################\n\n# R.cache depends on a home folder\nSys.setenv(R_USER_CACHE_DIR = Sys.glob(file.path(R.home())));",
3+
"columns": [
4+
{
5+
"key": "Column_Containing_Barcodes",
6+
"displayName": "Column Containing Barcodes",
7+
"description": "Select the column in the external metadata table that has Barcodes you expect to correspond to those in the input Seurat Object. The selected column will be used to join the external metadata to the existing Seurat Object metadata.",
8+
"paramGroup": "Basic",
9+
"sourceDataset": "External_Metadata_Table",
10+
"defaultValue": null,
11+
"columnType": "ALL",
12+
"isMulti": null
13+
},
14+
{
15+
"key": "External_Metadata_Columns_to_Add",
16+
"displayName": "External Metadata Columns to Add",
17+
"description": "Select columns in external metadata that you want to add to your input Seurat Object's metadata.",
18+
"paramGroup": "Basic",
19+
"sourceDataset": "External_Metadata_Table",
20+
"defaultValue": null,
21+
"columnType": "ALL",
22+
"isMulti": true
23+
},
24+
{
25+
"key": "Column_to_Visualize_Optional_",
26+
"displayName": "Column to Visualize (Optional)",
27+
"description": "You may choose a single column from the External Metadata to visualize. TSNE & UMAP plots colored by this variable will be produced. You may leave this variable blank. Default is blank.",
28+
"paramGroup": "Basic",
29+
"sourceDataset": "External_Metadata_Table",
30+
"defaultValue": null,
31+
"columnType": "ALL",
32+
"isMulti": null
33+
}
34+
],
35+
"condaDependencies": [],
36+
"description": "This template will merge an external table of cell annotations into an existing Seurat Object's metadata table. The input external metadata table must have a column named \"Barcode\" that contains barcodes matching those found in the metadata already present in the input Seurat Object. The output will be a new Seurat Object with metadata that now includes the additional columns from the external table.",
37+
"externalId": "Add_External_Cell_Annotation_CCBR_scRNA_seq_",
38+
"inputDatasets": [
39+
{
40+
"key": "Input_Seurat_Object",
41+
"displayName": "Input Seurat Object",
42+
"description": "Select the input Seurat Object.",
43+
"paramGroup": null,
44+
"anchorDataset": false,
45+
"dataType": "R_TRANSFORM_INPUT",
46+
"tags": []
47+
},
48+
{
49+
"key": "External_Metadata_Table",
50+
"displayName": "External Metadata Table",
51+
"description": "Select the input external metadata table.",
52+
"paramGroup": null,
53+
"anchorDataset": false,
54+
"dataType": "R_NATIVE_DATAFRAME",
55+
"tags": []
56+
}
57+
],
58+
"vectorLanguage": "R",
59+
"codeLanguage": "R",
60+
"parameters": [
61+
{
62+
"key": "Seurat_Object_Filename",
63+
"displayName": "Seurat Object Filename",
64+
"description": "The name of the RDS within the input SO dataset. By default, this name is \"seurat_object.rds\".",
65+
"paramType": "STRING",
66+
"paramGroup": "Advanced",
67+
"paramValues": null,
68+
"defaultValue": "seurat_object.rds",
69+
"condition": null,
70+
"content": null,
71+
"objectPropertyReference": null
72+
}
73+
],
74+
"title": "Add External Cell Annotation [CCBR] [scRNA-seq]",
75+
"templateApiVersion": "0.1.0"
76+
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
{
2+
"codeTemplate": "Pseudobulk_AggregateCounts_BV6 <- function({{{Seurat_Object}}},{{{Metadata}}}) {\n\n ## --------- ##\n ## Libraries ##\n ## --------- ##\n \n library(nidapFunctions)\n nidapLoadPackages(c(\"SCWorkflow\",\"Seurat\",\"dplyr\",\"ggplot2\",\"plotly\"))\n \n ## -------------------------------- ##\n ## User-Defined Template Parameters ##\n ## -------------------------------- ##\n \n # PH- Need Better Explination of variables\n seruat_object={{{Seurat_Object}}}\n var_group={{{Pseudobulk_Variable}}} \n slot=\"{{{Slot}}}\"\n \n ## ------------------- ##\n ## Load Seurat Object ##\n ## ------------------- ##\n \n filename <- \"{{{Filename}}}\"\n fs <- seruat_object$fileSystem()\n path <- fs$get_path(filename, 'r')\n so <- readRDS(path)\n \n ##--------------- ##\n ## Error Messages ##\n ## -------------- ##\n \n # Check that input is single Seurat Object\n if (length(so) > 1){\n stop(\"Please provide an input containing a single Seurat Object\")\n }\n \n ## Rename variables in case of NIDAPisms\n # For instances where metadata variable is orig_ident and Seurat variable \n # is orig.ident\n if (\"orig_ident\" %in% var_group){\n var_group[var_group == \"orig_ident\"] <- \"orig.ident\"\n }\n \n ## Rename variables in case of NIDAPisms\n # Check if \"hash_ID\" is in var_group and replace with the actual column name in metadata\n if (\"hash_ID\" %in% var_group) {\n actual_col <- NULL\n if (\"hash.ID\" %in% colnames(so@meta.data)) {\n actual_col <- \"hash.ID\"\n } else if (\"hash_ID\" %in% colnames(so@meta.data)) {\n actual_col <- \"hash_ID\"\n } else {\n stop(\"Neither 'hash.ID' nor 'hash_ID' found in Seurat object metadata. \")\n }\n var_group[var_group == \"hash_ID\"] <- actual_col\n }\n\n\n # For instances where metadata variable is SCT_snn_res_X_Y and Seurat variable\n # is SCT_snn_res.X.Y (Old_SCT_res_X_Y from Recluster Template is fine)\n if (all(any(grepl(\"SCT_snn_res_(\\\\d+)_\",var_group)) & \n !grepl(\"old\",var_group))){\n var_group <- gsub(\"SCT_snn_res_(\\\\d+)_\",\"SCT_snn_res.\\\\1.\",var_group)\n } \n \n ## --------------- ##\n ## Run Fucnction ##\n ## --------------- ## \n \n pseudobulk_Table=aggregateCounts(object=so,\n var.group=var_group,\n slot=slot)\n \n \n return(pseudobulk_Table)\n}\n\n#################################################\n## Global imports and functions included below ##\n#################################################\n\naggregateCounts=function(object,\n var.group,\n slot){\n \n \n ## --------------- ##\n ## Main Code Block ##\n ## --------------- ##\n \n pseudobulk <- AverageExpression(object, \n return.seurat = FALSE,\n assay = \"SCT\",\n group.by = var.group,\n slot = slot)[[1]] %>% \n as.data.frame.matrix() \n \n pseudobulk$Gene <- rownames(pseudobulk)\n pseudobulk <- pseudobulk %>% select(\"Gene\", everything())\n rownames(pseudobulk) <- NULL\n\n # Further processing of column names\n colnames(pseudobulk) <- gsub(\"\\\\W\",\"_\",colnames(pseudobulk))\n\n # # Rename columns generated from Clustering to be more Generic\n # if(grepl(\"SCT_snn_res\",var.group)){ \n # colnames(pseudobulk)[grepl(\"\\\\d+\", colnames(pseudobulk))] <- \n # paste(\"Cluster\", \n # colnames(pseudobulk)[grepl(\"\\\\d+\", colnames(pseudobulk))], \n # sep = \"_\")\n # }\n \n # Return Table/Figure that gives statistics on \n # Number of Cells in each group/new sample\n # Distribution of cell Counts in each group/new sample\n meta <- object@meta.data[,var.group]\n \n # check that columns are all factors / categorical\n char_or_factor_cols <- sapply(meta, function(x) is.character(x) || is.factor(x))\n \n # do plots and tables if all columns are factors or characters\n if(all(char_or_factor_cols)){\n meta$interaction <- gsub(\"\\\\W\",\"_\",interaction(meta))\n \n df <- as.data.frame(table(pseudobulk_group = meta$interaction)) %>% filter(Freq != 0)\n # sort the table by the number of cells in each group\n df <- df[order(df$Freq, decreasing = F),]\n \n if(any(df$Freq == 1)){\n single_counts <- df$pseudobulk_group[df$Freq == 1]\n # sprintf, make custom warning message with %s as placeholder for single count groups\n warning(sprintf(\n \"Some groups have only 1 cell. It is recommended to have at least 2 cells in each group.\\nAffected groups: %s\",\n paste(single_counts, collapse = \", \")\n ))\n }\n \n p <- ggplotly(ggplot(df, aes(x = pseudobulk_group, y = Freq)) +\n geom_bar(stat = \"identity\", position = \"stack\") +\n labs(y = \"Counts\", x = \"Pseudobulk Groups\", title = \"Number of Cells in each Pseudobulk Group\") +\n theme(axis.text.x = element_text(angle = 90, hjust = 1)))\n \n print(p)\n \n } else {\n stop(\"All columns in var.group must be factors or characters\")\n }\n \n return(pseudobulk)\n}\n\n",
3+
"columns": [
4+
{
5+
"key": "Pseudobulk_Variable",
6+
"displayName": "Pseudobulk Variable",
7+
"description": "Column from Seurat Metadata that contains the categorical variable(s) to group cells and gene expression values by.",
8+
"paramGroup": null,
9+
"sourceDataset": "Metadata",
10+
"defaultValue": null,
11+
"columnType": "ALL",
12+
"isMulti": true
13+
}
14+
],
15+
"condaDependencies": [],
16+
"description": "This template is the first step in a Pseudobulk analysis of your scRNA-seq dataset. It groups cells based on chosen categorical variable(s) in the Seurat Object's Metadata and aggregates the counts of each gene in each group. \n\nThe output is a table of aggregate expression in which the rows are genes and the columns are values found in the chosen Pseudobulk variable. If you select multiple categories to aggregate by (e.g. Category1: A,B,C and Category2: D,E,F), cells will be grouped by combinations of category variables (e.g. A_D, A_E, A_F, B_D, B_E, B_F). By default, gene counts are averaged across cells in each group.\n\n",
17+
"externalId": "Aggregate_Seurat_Counts_CCBR_scRNA_seq_",
18+
"inputDatasets": [
19+
{
20+
"key": "Seurat_Object",
21+
"displayName": "Seurat Object",
22+
"description": "Seurat Object containing single-cell data.",
23+
"paramGroup": null,
24+
"anchorDataset": false,
25+
"dataType": "R_TRANSFORM_INPUT",
26+
"tags": []
27+
},
28+
{
29+
"key": "Metadata",
30+
"displayName": "Metadata",
31+
"description": "Metadata of Seurat Object, from the \"Get Metadata Table [CCBR] [scRNA-seq]\" template.",
32+
"paramGroup": null,
33+
"anchorDataset": false,
34+
"dataType": "R_NATIVE_DATAFRAME",
35+
"tags": []
36+
}
37+
],
38+
"vectorLanguage": "R",
39+
"codeLanguage": "R",
40+
"parameters": [
41+
{
42+
"key": "Slot",
43+
"displayName": "Slot",
44+
"description": "Select the specific gene expression data slot to run analysis on. The \"data\" is the log-normalized SCTransformed expression. The \"scale.data\" slot is scaled and centered SCT Normalized expression (mean 0 and variance 1 for each gene). The \"counts\" slot is the raw gene expression. Default is the \"data\" slot because intent is analysis with Limma-Voom downstream.",
45+
"paramType": "SELECT",
46+
"paramGroup": null,
47+
"paramValues": [
48+
"data",
49+
"scale.data",
50+
"counts"
51+
],
52+
"defaultValue": "data",
53+
"condition": null,
54+
"content": null,
55+
"objectPropertyReference": null
56+
},
57+
{
58+
"key": "Filename",
59+
"displayName": "Filename",
60+
"description": "Name of input file connected to this template.",
61+
"paramType": "STRING",
62+
"paramGroup": null,
63+
"paramValues": null,
64+
"defaultValue": "seurat_object.rds",
65+
"condition": null,
66+
"content": null,
67+
"objectPropertyReference": null
68+
}
69+
],
70+
"title": "Aggregate Seurat Counts [CCBR] [scRNA-seq]",
71+
"templateApiVersion": "0.1.0"
72+
}

0 commit comments

Comments
 (0)