UMCUGenetics · mraves2 · Oct 3, 2025 · Dec 15, 2025 · Dec 15, 2025 · Jan 9, 2026
diff --git a/DIMS/AverageTechReplicates.R b/DIMS/AverageTechReplicates.R
@@ -112,6 +112,9 @@ for (sample_nr in 1:length(repl_pattern)) {
 pattern_list <- remove_from_repl_pattern(remove_neg, repl_pattern, nr_replicates)
 repl_pattern_filtered <- pattern_list$pattern
 save(repl_pattern_filtered, file = "negative_repl_pattern.RData")
+if (is.null(remove_neg)) {
+  remove_neg <- "none"
+}
 write.table(
   remove_neg, 
   file = "miss_infusions_negative.txt", 
@@ -123,6 +126,9 @@ write.table(
 pattern_list <- remove_from_repl_pattern(remove_pos, repl_pattern, nr_replicates)
 repl_pattern_filtered <- pattern_list$pattern
 save(repl_pattern_filtered, file = "positive_repl_pattern.RData")
+if (is.null(remove_pos)) {
+  remove_pos <- "none"
+}
 write.table(
   remove_pos, 
   file = "miss_infusions_positive.txt", 

diff --git a/DIMS/AverageTechReplicates.nf b/DIMS/AverageTechReplicates.nf
@@ -17,8 +17,8 @@ process AverageTechReplicates {
     output:
        path('*_repl_pattern.RData'), emit: pattern_files
        path('*_avg.RData'), emit: binned_files
-       path('miss_infusions_negative.txt')
-       path('miss_infusions_positive.txt')
+       path('miss_infusions_negative.txt'), emit: qc_miss_neg
+       path('miss_infusions_positive.txt'), emit: qc_miss_pos
        path('*_TICplots.pdf'), emit: tic_plots_pdf
 
     script:

diff --git a/DIMS/GenerateQCOutput.R b/DIMS/GenerateQCOutput.R
@@ -48,14 +48,16 @@ if (z_score == 1) {
 is_list <- outlist[grep("Internal standard", outlist[, "relevance"], fixed = TRUE), ]
 is_codes <- rownames(is_list)
 
-# check if there is data present for all the samples that the pipeline started with,
-# if not write sample name to a log file.
+# check if there is data present for all the samples that the pipeline started with
 sample_names_nodata <- setdiff(names(repl_pattern), names(is_list))
+if (length(sample_names_nodata) == 0) {
+  sample_names_nodata <- "none"
+}
+write.table(sample_names_nodata,
+  file = paste(outdir, "sample_names_nodata.txt", sep = "/"),
+  row.names = FALSE, col.names = FALSE, quote = FALSE
+)
 if (!is.null(sample_names_nodata)) {
-  write.table(sample_names_nodata,
-    file = paste(outdir, "sample_names_nodata.txt", sep = "/"),
-    row.names = FALSE, col.names = FALSE, quote = FALSE
-  )
   for (sample_name in sample_names_nodata) {
     repl_pattern[[sample_name]] <- NULL
   }
@@ -137,39 +139,47 @@ is_sum_selection <- c(
   "13C6-Tyrosine (IS)"
 )
 
+# define threshold for acceptance of selected internal standards
+threshold_is_dbs_neg <- c(15000, 200000, 130000, 18000, 50000)
+threshold_is_dbs_pos <- c(150000, 3300000, 1750000, 150000, 270000)
+threshold_is_dbs_sum <- c(1300000, 2500000, 500000, 1800000, 1400000)
+threshold_is_pl_neg  <- c(70000, 700000, 700000, 65000, 350000)
+threshold_is_pl_pos  <- c(1500000, 9000000, 3000000, 400000, 700000)
+threshold_is_pl_sum  <- c(8000000, 12500000, 2500000, 3000000, 4000000)
+
 # add minimal intensity lines based on matrix (DBS or Plasma) and machine mode (neg, pos, sum)
 if (dims_matrix == "DBS") {
   add_min_intens_lines <- TRUE
   hline_data_neg <-
     data.frame(
-      int_line = c(15000, 200000, 130000, 18000, 50000),
+      int_line = threshold_is_dbs_neg,
       HMDB_name = is_neg_selection
     )
   hline_data_pos <-
     data.frame(
-      int_line = c(150000, 3300000, 1750000, 150000, 270000),
+      int_line = threshold_is_dbs_pos,
       HMDB_name = is_pos_selection
     )
   hline_data_sum <-
     data.frame(
-      int_line = c(1300000, 2500000, 500000, 1800000, 1400000),
+      int_line = threshold_is_dbs_sum,
       HMDB_name = is_sum_selection
     )
 } else if (dims_matrix == "Plasma") {
   add_min_intens_lines <- TRUE
   hline_data_neg <-
     data.frame(
-      int_line = c(70000, 700000, 700000, 65000, 350000),
+      int_line = threshold_is_pl_neg,
       HMDB_name = is_neg_selection
     )
   hline_data_pos <-
     data.frame(
-      int_line = c(1500000, 9000000, 3000000, 400000, 700000),
+      int_line = threshold_is_pl_pos,
       HMDB_name = is_pos_selection
     )
   hline_data_sum <-
     data.frame(
-      int_line = c(8000000, 12500000, 2500000, 3000000, 4000000),
+      int_line = threshold_is_pl_sum,
       HMDB_name = is_sum_selection
     )
 } else {
@@ -180,9 +190,36 @@ if (dims_matrix == "DBS") {
 plot_width <- 8 + 0.2 * sample_count
 plot_height <- plot_width / 2.5
 
-is_neg_selection <- subset(is_neg, HMDB_name %in% is_neg_selection)
-is_pos_selection <- subset(is_pos, HMDB_name %in% is_pos_selection)
-is_sum_selection <- subset(is_summed, HMDB_name %in% is_sum_selection)
+is_neg_selection_subset <- subset(is_neg, HMDB_name %in% is_neg_selection)
+is_pos_selection_subset <- subset(is_pos, HMDB_name %in% is_pos_selection)
+is_sum_selection_subset <- subset(is_summed, HMDB_name %in% is_sum_selection)
+
+# export txt file with samples with internal standard level below threshold
+if (dims_matrix == "Plasma") {
+  is_below_threshold_neg <- find_is_below_threshold(is_neg_selection_subset, threshold_is_pl_neg, is_neg_selection, "neg")
+  is_below_threshold_pos <- find_is_below_threshold(is_pos_selection_subset, threshold_is_pl_pos, is_pos_selection, "pos")
+  is_below_threshold_sum <- find_is_below_threshold(is_sum_selection_subset, threshold_is_pl_sum, is_sum_selection, "sum")
+  is_below_threshold <- rbind(is_below_threshold_pos, is_below_threshold_neg, is_below_threshold_sum)
+} else if (dims_matrix == "DBS") {
+  is_below_threshold_neg <- find_is_below_threshold(is_neg_selection_subset, threshold_is_dbs_neg, is_neg_selection, "neg")
+  is_below_threshold_pos <- find_is_below_threshold(is_pos_selection_subset, threshold_is_dbs_pos, is_pos_selection, "pos")
+  is_below_threshold_sum <- find_is_below_threshold(is_sum_selection_subset, threshold_is_dbs_sum, is_neg_selection, "sum")
+  is_below_threshold <- rbind(is_below_threshold_pos, is_below_threshold_neg, is_below_threshold_sum)
+} else {
+  # generate empty table
+  is_below_threshold <- is_neg_selection_subset[0, ]
+}
+
+if (nrow(is_below_threshold) > 0) {
+  write.table(cbind(is_below_threshold, scanmode = scanmode_is), 
+	      file = "internal_standards_below_threshold.txt", 
+	      row.names = FALSE, sep = "\t")
+} else { 
+  write.table("no internal standards are below threshold",
+	      file = "internal_standards_below_threshold.txt"
+	      row.names = FALSE, col.names = FALSE
+	      )
+}
 
 # bar plot either with or without minimal intensity lines
 if (add_min_intens_lines) {
@@ -325,10 +362,12 @@ if (length(sst_colnrs) > 0) {
   control_list_intensities <- sst_list[, control_col_ids]
   control_list_cv <- calc_coefficient_of_variation(control_list_intensities)
   sst_list_intensities <- cbind(sst_list_intensities, CV_controls = control_list_cv[, "CV_perc"])
+  sst_list_intensities <- as.data.frame(sst_list_intensities)
 } else {
   sst_list_intensities <- sst_list[, intensity_col_ids]
 }
 for (col_nr in seq_len(ncol(sst_list_intensities))) {
+  sst_list_intensities <- as.data.frame(sst_list_intensities)
   sst_list_intensities[, col_nr] <- as.numeric(sst_list_intensities[, col_nr])
   if (grepl("Zscore", colnames(sst_list_intensities)[col_nr])) {
     sst_list_intensities[, col_nr] <- round(sst_list_intensities[, col_nr], 2)
@@ -356,6 +395,15 @@ xlsx_name <- paste0(outdir, "/", project, "_IS_SST.xlsx")
 openxlsx::saveWorkbook(wb, xlsx_name, overwrite = TRUE)
 rm(wb)
 
+# generate text file for workflow completed mail for components with Z-score < 2
+if (sum(grepl("P1001", colnames(sst_list_intensities))) > 0) {
+  zscore_column <- grep("_Zscore", colnames(sst_list_intensities))[1]
+  sst_list_intensities_qc <- sst_list_intensities[sst_list_intensities[, zscore_column] < 2, ]
+  sst_list_intensities_qc <- select(sst_list_intensities_qc, -c("CV_controls"))
+  write.table(sst_list_intensities_qc, file = paste(outdir, "sst_qc.txt", sep = "/"), row.names = FALSE, sep = "\t")
+} else {
+  write.table("no SST sample present", file = paste(outdir, "sst_qc.txt", sep = "/"), row.names = FALSE, col.names = FALSE)
+}
 
 ### MISSING M/Z CHECK
 # check the outlist_identified_(negative/positive).RData files for missing m/z values and save to file

diff --git a/DIMS/GenerateQCOutput.nf b/DIMS/GenerateQCOutput.nf
@@ -16,7 +16,9 @@ process GenerateQCOutput {
        tuple path('positive_controls_warning.txt'), path('missing_mz_warning.txt'), path('sample_names_nodata.txt'), optional: true
        tuple path('*_IS_SST.xlsx'), path('*_positive_control.xlsx'), optional: true
        path('plots/IS_*.png'), emit: plot_files
-       path('Check_number_of_controls.txt'), optional: true
+       path('check_number_of_controls.txt'), optional: true
+       path('sst_qc.txt'), optional: true
+       path('internal_standards_below_threshold.txt'), optional: true
 
     script:
         """

diff --git a/DIMS/export/generate_qc_output_functions.R b/DIMS/export/generate_qc_output_functions.R
@@ -217,3 +217,29 @@ check_missing_mz <- function(mzmed_pgrp_ident, scanmode) {
   }
   return(results_mz_missing)
 }
+
+find_is_below_threshold <- function(is_selection_subset, thresholds, is_names, scanmode) {
+  #' Create a list of all internal standards with intensity below a threshold value
+  #'
+  #' @param is_selection_subset: Matrix with intensities for each internal standard in each sample
+  #' @param thresholds: Threshold values for a given scan mode and matrix
+  #' @param is_names: Array of names of internal standards for a given scan mode
+  #' @param scanmode: string indicating scan mode to include in output
+  #'
+  #' @return is_below_threshold: Matrix listing all samples for which internal standard intensity is below threshold
+
+  # initialize; get the headers of the matrix
+  is_below_threshold <- is_selection_subset[0, ]
+  # for every line, check if intensity is below the appropriate threshold
+  for (line_index in seq_len(nrow(is_selection_subset))) {
+    is_selected <- is_selection_subset$HMDB_name[line_index]
+    thresh_selected <- thresholds[which(is_names == is_selected)]
+    if (is_selection_subset$Intensity[line_index] < thresh_selected) {
+      is_below_threshold <- rbind(is_below_threshold, is_selection_subset[line_index, ])
+    }
+  }
+  # add information on scan mode
+  is_below_threshold <- cbind(is_below_threshold, scanmode = rep(scanmode, nrow(is_below_threshold)))
+  return(is_below_threshold)
+}
+
diff --git a/DIMS/tests/testthat/test_generate_qc_output.R b/DIMS/tests/testthat/test_generate_qc_output.R
@@ -4,6 +4,7 @@
 #            get_is_intensities, calc_coefficient_of_variation,
 #            check_missing_mz
 library(ggplot2)
+library(reshape2)
 suppressMessages(library("dplyr"))
 source("../../export/generate_qc_output_functions.R")
 
@@ -201,3 +202,22 @@ testthat::test_that("Check missing mz values", {
   expect_identical(check_missing_mz(test_mz_pgrp, "Test")$`1`,
                    c(550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560))
 })
+
+testthat::test_that("list of internal standards below threshold is correctly created", {
+  test_is <- read.delim(test_path("fixtures", "test_internal_standards.txt"))
+  # select columns
+  test_is_wide <- test_is[ , c("HMDB_code", "HMDB_name", "C101.1", "C102.1", "P2.1", "P3.1")]
+  # melt into long format
+  test_is_long <- reshape2::melt(test_is_wide, id.vars = c("HMDB_name","HMDB_code"))
+  colnames(test_is_long)[4] <- "Intensity"
+
+  test_is_names <- c("metab_1 (IS)", "metab_2 (IS)", "metab_3 (IS)", "metab_4 (IS)")
+  test_thresholds <- rep(300, 4)
+
+  # 8 rows have intensity below threshold
+  expect_equal(nrow(find_is_below_threshold(test_is_long, test_thresholds, test_is_names, "test")), 8)
+  # the maximum intensity in the output should be less than threshold
+  expect_lt(max(find_is_below_threshold(test_is_long, test_thresholds, test_is_names, "test")$Intensity), 300)
+  # if all values are above threshold, the result should be an empty data table
+  expect_equal(nrow(find_is_below_threshold(test_is_long, test_thresholds / 3, test_is_names, "test")), 0)
+})