Skip to content
Open
6 changes: 6 additions & 0 deletions DIMS/AverageTechReplicates.R
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ for (sample_nr in 1:length(repl_pattern)) {
pattern_list <- remove_from_repl_pattern(remove_neg, repl_pattern, nr_replicates)
repl_pattern_filtered <- pattern_list$pattern
save(repl_pattern_filtered, file = "negative_repl_pattern.RData")
if (is.null(remove_neg)) {
remove_neg <- "none"
}
write.table(
remove_neg,
file = "miss_infusions_negative.txt",
Expand All @@ -123,6 +126,9 @@ write.table(
pattern_list <- remove_from_repl_pattern(remove_pos, repl_pattern, nr_replicates)
repl_pattern_filtered <- pattern_list$pattern
save(repl_pattern_filtered, file = "positive_repl_pattern.RData")
if (is.null(remove_pos)) {
remove_pos <- "none"
}
write.table(
remove_pos,
file = "miss_infusions_positive.txt",
Expand Down
4 changes: 2 additions & 2 deletions DIMS/AverageTechReplicates.nf
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ process AverageTechReplicates {
output:
path('*_repl_pattern.RData'), emit: pattern_files
path('*_avg.RData'), emit: binned_files
path('miss_infusions_negative.txt')
path('miss_infusions_positive.txt')
path('miss_infusions_negative.txt'), emit: qc_miss_neg
path('miss_infusions_positive.txt'), emit: qc_miss_pos
path('*_TICplots.pdf'), emit: tic_plots_pdf

script:
Expand Down
78 changes: 63 additions & 15 deletions DIMS/GenerateQCOutput.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,16 @@ if (z_score == 1) {
is_list <- outlist[grep("Internal standard", outlist[, "relevance"], fixed = TRUE), ]
is_codes <- rownames(is_list)

# check if there is data present for all the samples that the pipeline started with,
# if not write sample name to a log file.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like this comment has been lost in this change. I think its quite useful

# check if there is data present for all the samples that the pipeline started with
sample_names_nodata <- setdiff(names(repl_pattern), names(is_list))
if (length(sample_names_nodata) == 0) {
sample_names_nodata <- "none"
}
write.table(sample_names_nodata,
file = paste(outdir, "sample_names_nodata.txt", sep = "/"),
row.names = FALSE, col.names = FALSE, quote = FALSE
)
if (!is.null(sample_names_nodata)) {
write.table(sample_names_nodata,
file = paste(outdir, "sample_names_nodata.txt", sep = "/"),
row.names = FALSE, col.names = FALSE, quote = FALSE
)
for (sample_name in sample_names_nodata) {
repl_pattern[[sample_name]] <- NULL
}
Expand Down Expand Up @@ -137,39 +139,47 @@ is_sum_selection <- c(
"13C6-Tyrosine (IS)"
)

# define threshold for acceptance of selected internal standards
threshold_is_dbs_neg <- c(15000, 200000, 130000, 18000, 50000)
threshold_is_dbs_pos <- c(150000, 3300000, 1750000, 150000, 270000)
threshold_is_dbs_sum <- c(1300000, 2500000, 500000, 1800000, 1400000)
threshold_is_pl_neg <- c(70000, 700000, 700000, 65000, 350000)
threshold_is_pl_pos <- c(1500000, 9000000, 3000000, 400000, 700000)
threshold_is_pl_sum <- c(8000000, 12500000, 2500000, 3000000, 4000000)

# add minimal intensity lines based on matrix (DBS or Plasma) and machine mode (neg, pos, sum)
if (dims_matrix == "DBS") {
add_min_intens_lines <- TRUE
hline_data_neg <-
data.frame(
int_line = c(15000, 200000, 130000, 18000, 50000),
int_line = threshold_is_dbs_neg,
HMDB_name = is_neg_selection
)
hline_data_pos <-
data.frame(
int_line = c(150000, 3300000, 1750000, 150000, 270000),
int_line = threshold_is_dbs_pos,
HMDB_name = is_pos_selection
)
hline_data_sum <-
data.frame(
int_line = c(1300000, 2500000, 500000, 1800000, 1400000),
int_line = threshold_is_dbs_sum,
HMDB_name = is_sum_selection
)
} else if (dims_matrix == "Plasma") {
add_min_intens_lines <- TRUE
hline_data_neg <-
data.frame(
int_line = c(70000, 700000, 700000, 65000, 350000),
int_line = threshold_is_pl_neg,
HMDB_name = is_neg_selection
)
hline_data_pos <-
data.frame(
int_line = c(1500000, 9000000, 3000000, 400000, 700000),
int_line = threshold_is_pl_pos,
HMDB_name = is_pos_selection
)
hline_data_sum <-
data.frame(
int_line = c(8000000, 12500000, 2500000, 3000000, 4000000),
int_line = threshold_is_pl_sum,
HMDB_name = is_sum_selection
)
} else {
Expand All @@ -180,9 +190,36 @@ if (dims_matrix == "DBS") {
plot_width <- 8 + 0.2 * sample_count
plot_height <- plot_width / 2.5

is_neg_selection <- subset(is_neg, HMDB_name %in% is_neg_selection)
is_pos_selection <- subset(is_pos, HMDB_name %in% is_pos_selection)
is_sum_selection <- subset(is_summed, HMDB_name %in% is_sum_selection)
is_neg_selection_subset <- subset(is_neg, HMDB_name %in% is_neg_selection)
is_pos_selection_subset <- subset(is_pos, HMDB_name %in% is_pos_selection)
is_sum_selection_subset <- subset(is_summed, HMDB_name %in% is_sum_selection)

# export txt file with samples with internal standard level below threshold
if (dims_matrix == "Plasma") {
is_below_threshold_neg <- find_is_below_threshold(is_neg_selection_subset, threshold_is_pl_neg, is_neg_selection, "neg")
is_below_threshold_pos <- find_is_below_threshold(is_pos_selection_subset, threshold_is_pl_pos, is_pos_selection, "pos")
is_below_threshold_sum <- find_is_below_threshold(is_sum_selection_subset, threshold_is_pl_sum, is_sum_selection, "sum")
is_below_threshold <- rbind(is_below_threshold_pos, is_below_threshold_neg, is_below_threshold_sum)
} else if (dims_matrix == "DBS") {
is_below_threshold_neg <- find_is_below_threshold(is_neg_selection_subset, threshold_is_dbs_neg, is_neg_selection, "neg")
is_below_threshold_pos <- find_is_below_threshold(is_pos_selection_subset, threshold_is_dbs_pos, is_pos_selection, "pos")
is_below_threshold_sum <- find_is_below_threshold(is_sum_selection_subset, threshold_is_dbs_sum, is_neg_selection, "sum")
is_below_threshold <- rbind(is_below_threshold_pos, is_below_threshold_neg, is_below_threshold_sum)
} else {
# generate empty table
is_below_threshold <- is_neg_selection_subset[0, ]
}

if (nrow(is_below_threshold) > 0) {
write.table(cbind(is_below_threshold, scanmode = scanmode_is),
file = "internal_standards_below_threshold.txt",
row.names = FALSE, sep = "\t")
} else {
write.table("no internal standards are below threshold",
file = "internal_standards_below_threshold.txt"
row.names = FALSE, col.names = FALSE
)
}

# bar plot either with or without minimal intensity lines
if (add_min_intens_lines) {
Expand Down Expand Up @@ -325,10 +362,12 @@ if (length(sst_colnrs) > 0) {
control_list_intensities <- sst_list[, control_col_ids]
control_list_cv <- calc_coefficient_of_variation(control_list_intensities)
sst_list_intensities <- cbind(sst_list_intensities, CV_controls = control_list_cv[, "CV_perc"])
sst_list_intensities <- as.data.frame(sst_list_intensities)
} else {
sst_list_intensities <- sst_list[, intensity_col_ids]
}
for (col_nr in seq_len(ncol(sst_list_intensities))) {
sst_list_intensities <- as.data.frame(sst_list_intensities)
sst_list_intensities[, col_nr] <- as.numeric(sst_list_intensities[, col_nr])
if (grepl("Zscore", colnames(sst_list_intensities)[col_nr])) {
sst_list_intensities[, col_nr] <- round(sst_list_intensities[, col_nr], 2)
Expand Down Expand Up @@ -356,6 +395,15 @@ xlsx_name <- paste0(outdir, "/", project, "_IS_SST.xlsx")
openxlsx::saveWorkbook(wb, xlsx_name, overwrite = TRUE)
rm(wb)

# generate text file for workflow completed mail for components with Z-score < 2
if (sum(grepl("P1001", colnames(sst_list_intensities))) > 0) {
zscore_column <- grep("_Zscore", colnames(sst_list_intensities))[1]
sst_list_intensities_qc <- sst_list_intensities[sst_list_intensities[, zscore_column] < 2, ]
sst_list_intensities_qc <- select(sst_list_intensities_qc, -c("CV_controls"))
write.table(sst_list_intensities_qc, file = paste(outdir, "sst_qc.txt", sep = "/"), row.names = FALSE, sep = "\t")
} else {
write.table("no SST sample present", file = paste(outdir, "sst_qc.txt", sep = "/"), row.names = FALSE, col.names = FALSE)
}

### MISSING M/Z CHECK
# check the outlist_identified_(negative/positive).RData files for missing m/z values and save to file
Expand Down
4 changes: 3 additions & 1 deletion DIMS/GenerateQCOutput.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ process GenerateQCOutput {
tuple path('positive_controls_warning.txt'), path('missing_mz_warning.txt'), path('sample_names_nodata.txt'), optional: true
tuple path('*_IS_SST.xlsx'), path('*_positive_control.xlsx'), optional: true
path('plots/IS_*.png'), emit: plot_files
path('Check_number_of_controls.txt'), optional: true
path('check_number_of_controls.txt'), optional: true
path('sst_qc.txt'), optional: true
path('internal_standards_below_threshold.txt'), optional: true

script:
"""
Expand Down
26 changes: 26 additions & 0 deletions DIMS/export/generate_qc_output_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -217,3 +217,29 @@ check_missing_mz <- function(mzmed_pgrp_ident, scanmode) {
}
return(results_mz_missing)
}

find_is_below_threshold <- function(is_selection_subset, thresholds, is_names, scanmode) {
#' Create a list of all internal standards with intensity below a threshold value
#'
#' @param is_selection_subset: Matrix with intensities for each internal standard in each sample
#' @param thresholds: Threshold values for a given scan mode and matrix
#' @param is_names: Array of names of internal standards for a given scan mode
#' @param scanmode: string indicating scan mode to include in output
#'
#' @return is_below_threshold: Matrix listing all samples for which internal standard intensity is below threshold

# initialize; get the headers of the matrix
is_below_threshold <- is_selection_subset[0, ]
# for every line, check if intensity is below the appropriate threshold
for (line_index in seq_len(nrow(is_selection_subset))) {
is_selected <- is_selection_subset$HMDB_name[line_index]
thresh_selected <- thresholds[which(is_names == is_selected)]
if (is_selection_subset$Intensity[line_index] < thresh_selected) {
is_below_threshold <- rbind(is_below_threshold, is_selection_subset[line_index, ])
}
}
# add information on scan mode
is_below_threshold <- cbind(is_below_threshold, scanmode = rep(scanmode, nrow(is_below_threshold)))
return(is_below_threshold)
}

20 changes: 20 additions & 0 deletions DIMS/tests/testthat/test_generate_qc_output.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# get_is_intensities, calc_coefficient_of_variation,
# check_missing_mz
library(ggplot2)
library(reshape2)
suppressMessages(library("dplyr"))
source("../../export/generate_qc_output_functions.R")

Expand Down Expand Up @@ -201,3 +202,22 @@ testthat::test_that("Check missing mz values", {
expect_identical(check_missing_mz(test_mz_pgrp, "Test")$`1`,
c(550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560))
})

testthat::test_that("list of internal standards below threshold is correctly created", {
test_is <- read.delim(test_path("fixtures", "test_internal_standards.txt"))
# select columns
test_is_wide <- test_is[ , c("HMDB_code", "HMDB_name", "C101.1", "C102.1", "P2.1", "P3.1")]
# melt into long format
test_is_long <- reshape2::melt(test_is_wide, id.vars = c("HMDB_name","HMDB_code"))
colnames(test_is_long)[4] <- "Intensity"

test_is_names <- c("metab_1 (IS)", "metab_2 (IS)", "metab_3 (IS)", "metab_4 (IS)")
test_thresholds <- rep(300, 4)

# 8 rows have intensity below threshold
expect_equal(nrow(find_is_below_threshold(test_is_long, test_thresholds, test_is_names, "test")), 8)
# the maximum intensity in the output should be less than threshold
expect_lt(max(find_is_below_threshold(test_is_long, test_thresholds, test_is_names, "test")$Intensity), 300)
# if all values are above threshold, the result should be an empty data table
expect_equal(nrow(find_is_below_threshold(test_is_long, test_thresholds / 3, test_is_names, "test")), 0)
})
Loading