From 54c6e62c65532ed303152d799bf0d538724c7829 Mon Sep 17 00:00:00 2001 From: valerianilucrezia Date: Wed, 25 Mar 2026 12:13:22 +0100 Subject: [PATCH 1/2] update ctree parsing of pyclone --- modules/nf-core/ctree/templates/main_script.R | 81 +++++++++---------- 1 file changed, 38 insertions(+), 43 deletions(-) diff --git a/modules/nf-core/ctree/templates/main_script.R b/modules/nf-core/ctree/templates/main_script.R index 353751a47d7b..5ac8ac604d03 100644 --- a/modules/nf-core/ctree/templates/main_script.R +++ b/modules/nf-core/ctree/templates/main_script.R @@ -62,51 +62,46 @@ add_dummy_driver = function(input_table, variant_colname, is_driver_colname) { } initialize_ctree_obj_pyclone = function(ctree_input) { - driver_cluster = unique(ctree_input[which(ctree_input["is.driver"]==TRUE),c("cluster")]) - # the CCF table must report CCF values for each cluster and sample - # cluster | nMuts | is.driver | is.clonal | sample1 | sample2 | ... - CCF_table = ctree_input %>% - dplyr::select(sample_id, cluster, nMuts, is.driver, is.clonal, CCF) %>% - dplyr::mutate(is.driver=ifelse(is.driver=="", FALSE, TRUE)) %>% - dplyr::filter(cluster!="Tail") %>% - dplyr::group_by(cluster) %>% - dplyr::mutate(is.driver=any(is.driver)) %>% - dplyr::filter(any(CCF>0)) %>% - dplyr::ungroup() %>% - unique() %>% - tidyr::pivot_wider(names_from="sample_id", values_from="CCF", values_fill=0) - - # the driver table must contain patient and variant IDs and report clonality and driver status - # patientID | variantID | is.driver | is.clonal | cluster | sample1 | sample2 | ... - drivers_table = ctree_input %>% - dplyr::filter(cluster %in% CCF_table[["cluster"]]) %>% - dplyr::mutate(is.driver=as.logical(is.driver)) %>% - dplyr::select(patientID, sample_id, variantID, cluster, is.driver, is.clonal, CCF) %>% - dplyr::filter(is.driver==TRUE) %>% - dplyr::mutate(variantID=replace(variantID, is.na(variantID), "")) %>% - tidyr::pivot_wider(names_from="sample_id", values_from="CCF", values_fill=0) %>% - dplyr::mutate(cluster=as.character(cluster)) - - samples = unique(ctree_input[["sample_id"]]) # if multisample, this is a list - patient = unique(ctree_input[["patientID"]]) - - CCF_table = add_dummy_driver(CCF_table, variant_colname="variantID", is_driver_colname="is.driver") %>% - dplyr::mutate(cluster=as.character(cluster)) - - if (nrow(drivers_table)==0) { - drivers_table = CCF_table %>% - dplyr::filter(is.driver) %>% - dplyr::select(-nMuts) %>% - dplyr::mutate(patientID=patient) - } - - ctree_init = list("CCF_table"=CCF_table, - "drivers_table"=drivers_table, - "samples"=samples, - "patient"=patient) - return(ctree_init) + ctree_input = add_dummy_driver(ctree_input, variant_colname="variantID", is_driver_colname="is.driver") + + + # the CCF table must report CCF values for each cluster and sample + # cluster | nMuts | is.driver | is.clonal | sample1 | sample2 | ... + CCF_table = ctree_input %>% + dplyr::select(sample_id, cluster, nMuts, is.driver, is.clonal, CCF) %>% + dplyr::mutate(is.driver=replace(is.driver, is.driver=="", "FALSE")) %>% + dplyr::mutate(is.driver=as.logical(is.driver)) %>% + dplyr::filter(cluster!="Tail") %>% + dplyr::mutate(cluster=as.character(cluster)) %>% + dplyr::group_by(cluster) %>% + dplyr::mutate(is.driver=any(is.driver)) %>% + dplyr::filter(any(CCF>0)) %>% + dplyr::ungroup() %>% unique() %>% + tidyr::pivot_wider(names_from="sample_id", values_from="CCF", values_fill=0) + + # the driver table must contain patient and variant IDs and report clonality and driver status + # patientID | variantID | is.driver | is.clonal | cluster | sample1 | sample2 | ... + drivers_table = ctree_input %>% + dplyr::filter(cluster %in% CCF_table[["cluster"]]) %>% + dplyr::mutate(is.driver=as.logical(is.driver)) %>% + dplyr::mutate(cluster=as.character(cluster)) %>% + dplyr::select(patientID, sample_id, variantID, cluster, is.driver, is.clonal, CCF) %>% + dplyr::filter(is.driver==TRUE) %>% + dplyr::mutate(variantID=replace(variantID, is.na(variantID), "")) %>% + tidyr::pivot_wider(names_from="sample_id", values_from="CCF", values_fill=0) + + samples = unique(ctree_input[["sample_id"]]) # if multisample, this is a list + patient = unique(ctree_input[["patientID"]]) + + + ctree_init = list("CCF_table"=CCF_table, + "drivers_table"=drivers_table, + "samples"=samples, + "patient"=patient) + return(ctree_init) } + if ( grepl(".rds\$", tolower("$ctree_input")) ) { best_fit = readRDS("$ctree_input") do_fit = TRUE From 7fd2402845732d3c5922dabcdf46d7d11bcbd8fe Mon Sep 17 00:00:00 2001 From: valerianilucrezia Date: Wed, 25 Mar 2026 13:24:15 +0100 Subject: [PATCH 2/2] solve precommit --- modules/nf-core/ctree/templates/main_script.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/modules/nf-core/ctree/templates/main_script.R b/modules/nf-core/ctree/templates/main_script.R index 5ac8ac604d03..afd8123fb70d 100644 --- a/modules/nf-core/ctree/templates/main_script.R +++ b/modules/nf-core/ctree/templates/main_script.R @@ -61,10 +61,11 @@ add_dummy_driver = function(input_table, variant_colname, is_driver_colname) { return(input_table) } + initialize_ctree_obj_pyclone = function(ctree_input) { ctree_input = add_dummy_driver(ctree_input, variant_colname="variantID", is_driver_colname="is.driver") - - + + # the CCF table must report CCF values for each cluster and sample # cluster | nMuts | is.driver | is.clonal | sample1 | sample2 | ... CCF_table = ctree_input %>% @@ -78,7 +79,7 @@ initialize_ctree_obj_pyclone = function(ctree_input) { dplyr::filter(any(CCF>0)) %>% dplyr::ungroup() %>% unique() %>% tidyr::pivot_wider(names_from="sample_id", values_from="CCF", values_fill=0) - + # the driver table must contain patient and variant IDs and report clonality and driver status # patientID | variantID | is.driver | is.clonal | cluster | sample1 | sample2 | ... drivers_table = ctree_input %>% @@ -89,11 +90,11 @@ initialize_ctree_obj_pyclone = function(ctree_input) { dplyr::filter(is.driver==TRUE) %>% dplyr::mutate(variantID=replace(variantID, is.na(variantID), "")) %>% tidyr::pivot_wider(names_from="sample_id", values_from="CCF", values_fill=0) - + samples = unique(ctree_input[["sample_id"]]) # if multisample, this is a list patient = unique(ctree_input[["patientID"]]) - - + + ctree_init = list("CCF_table"=CCF_table, "drivers_table"=drivers_table, "samples"=samples,