From ba378dbd8272d1bd6a9e383b448718e18ed17ca1 Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Tue, 30 Dec 2025 20:44:42 +0800 Subject: [PATCH 01/22] test --- modules/local/utils/msrescore_features/main.nf | 2 +- modules/local/utils/msrescore_fine_tuning/main.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/utils/msrescore_features/main.nf b/modules/local/utils/msrescore_features/main.nf index a1c27c11..3617788a 100644 --- a/modules/local/utils/msrescore_features/main.nf +++ b/modules/local/utils/msrescore_features/main.nf @@ -4,7 +4,7 @@ process MSRESCORE_FEATURES { container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.13' : - 'ghcr.io/bigbio/quantms-rescoring:0.0.13' }" + 'ghcr.io/daichengxin/quantms-rescoring:0.0.14' }" input: tuple val(meta), path(idxml), path(mzml), path(model_weight) diff --git a/modules/local/utils/msrescore_fine_tuning/main.nf b/modules/local/utils/msrescore_fine_tuning/main.nf index 70ddd073..972e42da 100644 --- a/modules/local/utils/msrescore_fine_tuning/main.nf +++ b/modules/local/utils/msrescore_fine_tuning/main.nf @@ -4,7 +4,7 @@ process MSRESCORE_FINE_TUNING { container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.13' : - 'ghcr.io/bigbio/quantms-rescoring:0.0.13' }" + 'ghcr.io/daichengxin/quantms-rescoring:0.0.14' }" input: tuple val(meta), path(idxml), path(mzml), path(ms2_model_dir) From dfdd2076228d1fa6126187eee65d7a36eab50266 Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Wed, 31 Dec 2025 10:45:37 +0800 Subject: [PATCH 02/22] test --- modules/local/utils/msrescore_features/main.nf | 2 +- modules/local/utils/msrescore_fine_tuning/main.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/utils/msrescore_features/main.nf b/modules/local/utils/msrescore_features/main.nf index 3617788a..b2b38bd3 100644 --- a/modules/local/utils/msrescore_features/main.nf +++ b/modules/local/utils/msrescore_features/main.nf @@ -3,7 +3,7 @@ process MSRESCORE_FEATURES { label 'process_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.13' : + 'oras://ghcr.io/daichengxin/quantms-rescoring-sif:0.0.14' : 'ghcr.io/daichengxin/quantms-rescoring:0.0.14' }" input: diff --git a/modules/local/utils/msrescore_fine_tuning/main.nf b/modules/local/utils/msrescore_fine_tuning/main.nf index 972e42da..6c054643 100644 --- a/modules/local/utils/msrescore_fine_tuning/main.nf +++ b/modules/local/utils/msrescore_fine_tuning/main.nf @@ -3,7 +3,7 @@ process MSRESCORE_FINE_TUNING { label 'process_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.13' : + 'oras://ghcr.io/daichengxin/quantms-rescoring-sif:0.0.14' : 'ghcr.io/daichengxin/quantms-rescoring:0.0.14' }" input: From 224d6799d7cf386ee220f7c75d9e1689a6dfe11f Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Wed, 31 Dec 2025 17:37:36 +0800 Subject: [PATCH 03/22] Update modules.config --- conf/modules/modules.config | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/conf/modules/modules.config b/conf/modules/modules.config index 979da993..a6260b7e 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -215,6 +215,7 @@ process { // MS2RESCORE withName: 'MSRESCORE_FEATURES' { + beforeScript = 'export HOME=/tmp' ext.args = [ "--ms2_model ${params.ms2features_model}", "--calibration_set_size ${params.ms2features_calibration}", @@ -222,6 +223,11 @@ process { ].join(' ').trim() } + // MSRESCORE_FINE_TUNING + withName: 'MSRESCORE_FINE_TUNING' { + beforeScript = 'export HOME=/tmp' + } + withName: '.*:DDA_ID:PHOSPHO_SCORING:ID_SCORE_SWITCHER' { ext.args = [ "-new_score_orientation lower_better", From 2f3bfd860c24367a4e9af2b7f65bac88d4de3e6f Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Wed, 31 Dec 2025 18:40:17 +0800 Subject: [PATCH 04/22] test --- conf/modules/modules.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/modules/modules.config b/conf/modules/modules.config index a6260b7e..fdc13b08 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -215,7 +215,7 @@ process { // MS2RESCORE withName: 'MSRESCORE_FEATURES' { - beforeScript = 'export HOME=/tmp' + beforeScript = 'export PEPTDEEP_HOME=/tmp' ext.args = [ "--ms2_model ${params.ms2features_model}", "--calibration_set_size ${params.ms2features_calibration}", @@ -225,7 +225,7 @@ process { // MSRESCORE_FINE_TUNING withName: 'MSRESCORE_FINE_TUNING' { - beforeScript = 'export HOME=/tmp' + beforeScript = 'export PEPTDEEP_HOME=/tmp' } withName: '.*:DDA_ID:PHOSPHO_SCORING:ID_SCORE_SWITCHER' { From 29e3073a0c58da0978288a6a19b75ab5526c4191 Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Wed, 31 Dec 2025 20:04:49 +0800 Subject: [PATCH 05/22] test --- modules/local/utils/download_model/main.nf | 34 +++++++++++++++++++++ modules/local/utils/download_model/meta.yml | 24 +++++++++++++++ subworkflows/local/dda_id/main.nf | 5 +-- 3 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 modules/local/utils/download_model/main.nf create mode 100644 modules/local/utils/download_model/meta.yml diff --git a/modules/local/utils/download_model/main.nf b/modules/local/utils/download_model/main.nf new file mode 100644 index 00000000..02e1c958 --- /dev/null +++ b/modules/local/utils/download_model/main.nf @@ -0,0 +1,34 @@ +process DOWNLOAD_MODEL { + label 'process_single' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'oras://ghcr.io/daichengxin/quantms-rescoring-sif:0.0.14' : + 'ghcr.io/daichengxin/quantms-rescoring:0.0.14' }" + + output: + path "rescore_model" , emit: model_weights + path "versions.yml" , emit: versions + path "*.log" , emit: log + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + rescoring download_models \\ + --models ./ \\ + --model_dir ./rescore_model \\ + $args \\ + 2>&1 | tee download_models.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + quantms-rescoring: \$(rescoring --version 2>&1 | grep -Eo '[0-9]+\\.[0-9]+\\.[0-9]+') + ms2pip: \$(ms2pip --version 2>&1 | grep -Eo '[0-9]+\\.[0-9]+\\.[0-9]+') + deeplc: \$(deeplc --version 2>&1 | grep -Eo '[0-9]+\\.[0-9]+\\.[0-9]+') + MS2Rescore: \$(ms2rescore --version 2>&1 | grep -Eo '[0-9]+\\.[0-9]+\\.[0-9]+' | head -n 1) + END_VERSIONS + """ +} diff --git a/modules/local/utils/download_model/meta.yml b/modules/local/utils/download_model/meta.yml new file mode 100644 index 00000000..9eb639e5 --- /dev/null +++ b/modules/local/utils/download_model/meta.yml @@ -0,0 +1,24 @@ +name: DOWNLOAD_MODEL +description: A module to download quantms-rescoring model +keywords: + - MS2 + - rescoring + - fine tuning +tools: + - custom: + description: | + A custom module download quantms-rescoring model. + homepage: https://github.com/bigbio/quantms + documentation: https://github.com/bigbio/quantms/tree/readthedocs + +output: + - model_weights: + type: file + description: Model weights for MS2 rescoring + pattern: "*" + - version: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@daichengxin" diff --git a/subworkflows/local/dda_id/main.nf b/subworkflows/local/dda_id/main.nf index 182ea094..0842d75c 100644 --- a/subworkflows/local/dda_id/main.nf +++ b/subworkflows/local/dda_id/main.nf @@ -11,6 +11,7 @@ include { GET_SAMPLE } from '../../../modules/local/utils/extract_samp include { SPECTRUM_FEATURES } from '../../../modules/local/utils/spectrum_features/main' include { PSM_CLEAN } from '../../../modules/local/utils/psm_clean/main' include { MSRESCORE_FINE_TUNING} from '../../../modules/local/utils/msrescore_fine_tuning/main' +include { DOWNLOAD_MODEL } from '../../../modules/local/utils/download_model/main' include { PHOSPHO_SCORING } from '../phospho_scoring/main' // @@ -49,14 +50,14 @@ workflow DDA_ID { if (params.skip_rescoring == false) { if (params.ms2features_enable == true) { - // Only add ms2_model_dir if it's actually set and not empty // Handle cases where parameter might be empty string, null, boolean true, or whitespace // When --ms2features_model_dir is passed with no value, Nextflow may set it to boolean true if (params.ms2features_model_dir && params.ms2features_model_dir != true) { ms2_model_dir = Channel.from(file(params.ms2features_model_dir, checkIfExists: true)) } else { - ms2_model_dir = Channel.from(file("./")) + DOWNLOAD_MODEL() + ms2_model_dir = DOWNLOAD_MODEL.out.model_weights } if (params.ms2features_fine_tuning == true) { From f64e3efeeb98d8d3329461147e78eb71b9c9eec4 Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Wed, 31 Dec 2025 20:30:35 +0800 Subject: [PATCH 06/22] test --- modules/local/utils/download_model/main.nf | 5 ++++- subworkflows/local/dda_id/main.nf | 14 ++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/modules/local/utils/download_model/main.nf b/modules/local/utils/download_model/main.nf index 02e1c958..da4fa09c 100644 --- a/modules/local/utils/download_model/main.nf +++ b/modules/local/utils/download_model/main.nf @@ -5,6 +5,9 @@ process DOWNLOAD_MODEL { 'oras://ghcr.io/daichengxin/quantms-rescoring-sif:0.0.14' : 'ghcr.io/daichengxin/quantms-rescoring:0.0.14' }" + input: + val(model_list) + output: path "rescore_model" , emit: model_weights path "versions.yml" , emit: versions @@ -18,7 +21,7 @@ process DOWNLOAD_MODEL { """ rescoring download_models \\ - --models ./ \\ + --models ${model_list} \\ --model_dir ./rescore_model \\ $args \\ 2>&1 | tee download_models.log diff --git a/subworkflows/local/dda_id/main.nf b/subworkflows/local/dda_id/main.nf index 0842d75c..b0112b62 100644 --- a/subworkflows/local/dda_id/main.nf +++ b/subworkflows/local/dda_id/main.nf @@ -56,8 +56,18 @@ workflow DDA_ID { if (params.ms2features_model_dir && params.ms2features_model_dir != true) { ms2_model_dir = Channel.from(file(params.ms2features_model_dir, checkIfExists: true)) } else { - DOWNLOAD_MODEL() - ms2_model_dir = DOWNLOAD_MODEL.out.model_weights + if (params.ms2features_best){ + DOWNLOAD_MODEL(Channel.value('ms2pip,alphapeptdeep')) + ms2_model_dir = DOWNLOAD_MODEL.out.model_weights + } else if (params.ms2features_generators.toLowerCase().contains('ms2pip')) { + DOWNLOAD_MODEL(Channel.value('ms2pip')) + ms2_model_dir = DOWNLOAD_MODEL.out.model_weights + } else if (params.ms2features_generators.toLowerCase().contains('alphapeptdeep')) { + DOWNLOAD_MODEL(Channel.value('alphapeptdeep')) + ms2_model_dir = DOWNLOAD_MODEL.out.model_weights + } else { + ms2_model_dir = Channel.from(file("./")) + } } if (params.ms2features_fine_tuning == true) { From 25a0d6a3c915c5b0f9be778e28564ca3c04bf5fb Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Thu, 1 Jan 2026 12:19:32 +0800 Subject: [PATCH 07/22] test --- modules/local/utils/download_model/main.nf | 37 --------------------- modules/local/utils/download_model/meta.yml | 24 ------------- subworkflows/local/dda_id/main.nf | 14 +------- 3 files changed, 1 insertion(+), 74 deletions(-) delete mode 100644 modules/local/utils/download_model/main.nf delete mode 100644 modules/local/utils/download_model/meta.yml diff --git a/modules/local/utils/download_model/main.nf b/modules/local/utils/download_model/main.nf deleted file mode 100644 index da4fa09c..00000000 --- a/modules/local/utils/download_model/main.nf +++ /dev/null @@ -1,37 +0,0 @@ -process DOWNLOAD_MODEL { - label 'process_single' - - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/daichengxin/quantms-rescoring-sif:0.0.14' : - 'ghcr.io/daichengxin/quantms-rescoring:0.0.14' }" - - input: - val(model_list) - - output: - path "rescore_model" , emit: model_weights - path "versions.yml" , emit: versions - path "*.log" , emit: log - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - - """ - rescoring download_models \\ - --models ${model_list} \\ - --model_dir ./rescore_model \\ - $args \\ - 2>&1 | tee download_models.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - quantms-rescoring: \$(rescoring --version 2>&1 | grep -Eo '[0-9]+\\.[0-9]+\\.[0-9]+') - ms2pip: \$(ms2pip --version 2>&1 | grep -Eo '[0-9]+\\.[0-9]+\\.[0-9]+') - deeplc: \$(deeplc --version 2>&1 | grep -Eo '[0-9]+\\.[0-9]+\\.[0-9]+') - MS2Rescore: \$(ms2rescore --version 2>&1 | grep -Eo '[0-9]+\\.[0-9]+\\.[0-9]+' | head -n 1) - END_VERSIONS - """ -} diff --git a/modules/local/utils/download_model/meta.yml b/modules/local/utils/download_model/meta.yml deleted file mode 100644 index 9eb639e5..00000000 --- a/modules/local/utils/download_model/meta.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: DOWNLOAD_MODEL -description: A module to download quantms-rescoring model -keywords: - - MS2 - - rescoring - - fine tuning -tools: - - custom: - description: | - A custom module download quantms-rescoring model. - homepage: https://github.com/bigbio/quantms - documentation: https://github.com/bigbio/quantms/tree/readthedocs - -output: - - model_weights: - type: file - description: Model weights for MS2 rescoring - pattern: "*" - - version: - type: file - description: File containing software version - pattern: "versions.yml" -authors: - - "@daichengxin" diff --git a/subworkflows/local/dda_id/main.nf b/subworkflows/local/dda_id/main.nf index b0112b62..8839d44c 100644 --- a/subworkflows/local/dda_id/main.nf +++ b/subworkflows/local/dda_id/main.nf @@ -11,7 +11,6 @@ include { GET_SAMPLE } from '../../../modules/local/utils/extract_samp include { SPECTRUM_FEATURES } from '../../../modules/local/utils/spectrum_features/main' include { PSM_CLEAN } from '../../../modules/local/utils/psm_clean/main' include { MSRESCORE_FINE_TUNING} from '../../../modules/local/utils/msrescore_fine_tuning/main' -include { DOWNLOAD_MODEL } from '../../../modules/local/utils/download_model/main' include { PHOSPHO_SCORING } from '../phospho_scoring/main' // @@ -56,18 +55,7 @@ workflow DDA_ID { if (params.ms2features_model_dir && params.ms2features_model_dir != true) { ms2_model_dir = Channel.from(file(params.ms2features_model_dir, checkIfExists: true)) } else { - if (params.ms2features_best){ - DOWNLOAD_MODEL(Channel.value('ms2pip,alphapeptdeep')) - ms2_model_dir = DOWNLOAD_MODEL.out.model_weights - } else if (params.ms2features_generators.toLowerCase().contains('ms2pip')) { - DOWNLOAD_MODEL(Channel.value('ms2pip')) - ms2_model_dir = DOWNLOAD_MODEL.out.model_weights - } else if (params.ms2features_generators.toLowerCase().contains('alphapeptdeep')) { - DOWNLOAD_MODEL(Channel.value('alphapeptdeep')) - ms2_model_dir = DOWNLOAD_MODEL.out.model_weights - } else { - ms2_model_dir = Channel.from(file("./")) - } + ms2_model_dir = Channel.from(file("./")) } if (params.ms2features_fine_tuning == true) { From 28e428c34374aa73e2b271e761b53743f0622631 Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Fri, 2 Jan 2026 07:57:28 +0000 Subject: [PATCH 08/22] Update container image references in main.nf --- modules/local/utils/msrescore_features/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/utils/msrescore_features/main.nf b/modules/local/utils/msrescore_features/main.nf index b2b38bd3..c08f1193 100644 --- a/modules/local/utils/msrescore_features/main.nf +++ b/modules/local/utils/msrescore_features/main.nf @@ -3,8 +3,8 @@ process MSRESCORE_FEATURES { label 'process_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/daichengxin/quantms-rescoring-sif:0.0.14' : - 'ghcr.io/daichengxin/quantms-rescoring:0.0.14' }" + 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.14' : + 'ghcr.io/bigbio/quantms-rescoring:0.0.14' }" input: tuple val(meta), path(idxml), path(mzml), path(model_weight) From d6922fb6a63744cd2aa694a372a104f6427f5e80 Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Fri, 2 Jan 2026 07:58:50 +0000 Subject: [PATCH 09/22] Update container image references in main.nf --- modules/local/utils/msrescore_fine_tuning/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/utils/msrescore_fine_tuning/main.nf b/modules/local/utils/msrescore_fine_tuning/main.nf index 6c054643..9441d156 100644 --- a/modules/local/utils/msrescore_fine_tuning/main.nf +++ b/modules/local/utils/msrescore_fine_tuning/main.nf @@ -3,8 +3,8 @@ process MSRESCORE_FINE_TUNING { label 'process_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/daichengxin/quantms-rescoring-sif:0.0.14' : - 'ghcr.io/daichengxin/quantms-rescoring:0.0.14' }" + 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.14' : + 'ghcr.io/bigbio/quantms-rescoring:0.0.14' }" input: tuple val(meta), path(idxml), path(mzml), path(ms2_model_dir) From 2051fcffe5bca23eb7679998142c1e0e75f06404 Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Fri, 2 Jan 2026 22:18:30 +0800 Subject: [PATCH 10/22] refine --- conf/modules/modules.config | 6 --- .../local/utils/msrescore_features/main.nf | 4 +- .../local/utils/msrescore_fine_tuning/main.nf | 22 +++++--- subworkflows/local/dda_id/main.nf | 43 ++++++++++++++-- subworkflows/local/psm_rescoring/main.nf | 50 ++++++++++++++++--- 5 files changed, 100 insertions(+), 25 deletions(-) diff --git a/conf/modules/modules.config b/conf/modules/modules.config index fdc13b08..979da993 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -215,7 +215,6 @@ process { // MS2RESCORE withName: 'MSRESCORE_FEATURES' { - beforeScript = 'export PEPTDEEP_HOME=/tmp' ext.args = [ "--ms2_model ${params.ms2features_model}", "--calibration_set_size ${params.ms2features_calibration}", @@ -223,11 +222,6 @@ process { ].join(' ').trim() } - // MSRESCORE_FINE_TUNING - withName: 'MSRESCORE_FINE_TUNING' { - beforeScript = 'export PEPTDEEP_HOME=/tmp' - } - withName: '.*:DDA_ID:PHOSPHO_SCORING:ID_SCORE_SWITCHER' { ext.args = [ "-new_score_orientation lower_better", diff --git a/modules/local/utils/msrescore_features/main.nf b/modules/local/utils/msrescore_features/main.nf index c08f1193..6a28d128 100644 --- a/modules/local/utils/msrescore_features/main.nf +++ b/modules/local/utils/msrescore_features/main.nf @@ -27,8 +27,10 @@ process MSRESCORE_FEATURES { // When --ms2features_model_dir is passed with no value, Nextflow may set it to boolean true if (params.ms2features_fine_tuning) { ms2_model_dir = '--ms2_model_dir ./' - } else { + } else if (params.ms2features_model_dir && params.ms2features_model_dir != true){ ms2_model_dir = "--ms2_model_dir ${model_weight}" + } else { + ms2_model_dir = "--ms2_model_dir ./" } // Determine if using ms2pip or alphapeptdeep based on ms2features_generators diff --git a/modules/local/utils/msrescore_fine_tuning/main.nf b/modules/local/utils/msrescore_fine_tuning/main.nf index 9441d156..a5763fa5 100644 --- a/modules/local/utils/msrescore_fine_tuning/main.nf +++ b/modules/local/utils/msrescore_fine_tuning/main.nf @@ -7,27 +7,33 @@ process MSRESCORE_FINE_TUNING { 'ghcr.io/bigbio/quantms-rescoring:0.0.14' }" input: - tuple val(meta), path(idxml), path(mzml), path(ms2_model_dir) + tuple val(meta), path(idxml), path(mzml), val(groupkey), path(ms2_model_dir) output: - path "retained_ms2.pth" , emit: model_weight - path "versions.yml" , emit: versions - path "*.log" , emit: log + tuple val(groupkey), path("retained_ms2.pth") , emit: model_weight + path "versions.yml" , emit: versions + path "*.log" , emit: log when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.mzml_id}_ms2rescore" + def prefix = task.ext.prefix ?: "${groupkey}_fine_tuning" // Initialize tolerance variables def ms2_tolerance = null def ms2_tolerance_unit = null // ms2pip only supports Da unit, but alphapeptdeep supports both Da and ppm - ms2_tolerance = meta['fragmentmasstolerance'] - ms2_tolerance_unit = meta['fragmentmasstoleranceunit'] + ms2_tolerance = meta[0]['fragmentmasstolerance'] + ms2_tolerance_unit = meta[0]['fragmentmasstoleranceunit'] + + if (params.ms2features_model_dir && params.ms2features_model_dir != true) { + ms2_model_dir = ms2_model_dir + } else { + ms2_model_dir = "./" + } if (params.force_transfer_learning) { force_transfer_learning = "--force_transfer_learning" @@ -56,7 +62,7 @@ process MSRESCORE_FINE_TUNING { ${force_transfer_learning} \\ ${consider_modloss} \\ $args \\ - 2>&1 | tee ${idxml.baseName}_fine_tuning.log + 2>&1 | tee ${groupkey}_fine_tuning.log cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/subworkflows/local/dda_id/main.nf b/subworkflows/local/dda_id/main.nf index 8839d44c..5d7b8c87 100644 --- a/subworkflows/local/dda_id/main.nf +++ b/subworkflows/local/dda_id/main.nf @@ -55,16 +55,51 @@ workflow DDA_ID { if (params.ms2features_model_dir && params.ms2features_model_dir != true) { ms2_model_dir = Channel.from(file(params.ms2features_model_dir, checkIfExists: true)) } else { - ms2_model_dir = Channel.from(file("./")) + ms2_model_dir = Channel.from(file("pretrained_models")) } if (params.ms2features_fine_tuning == true) { if (params.ms2features_generators.toLowerCase().contains('ms2pip')) { exit(1, 'Error: Fine tuning only supports AlphaPeptdeep!') } else { - train_datasets = ch_id_files.combine(ch_file_preparation_results, by: 0).randomSample(params.fine_tuning_sample_run, 2025) - MSRESCORE_FINE_TUNING(train_datasets.collect().combine(ms2_model_dir)) - MSRESCORE_FEATURES(ch_id_files.combine(ch_file_preparation_results, by: 0).combine(MSRESCORE_FINE_TUNING.out.model_weight)) + // Split ch_id_files by search_engines + ch_id_files.combine(ch_file_preparation_results, by: 0).branch{ meta, filename, mzml_name -> + sage: filename.name.contains('sage') + return [meta, filename, mzml_name] + msgf: filename.name.contains('msgf') + return [meta, filename, mzml_name] + comet: filename.name.contains('comet') + return [meta, filename, mzml_name] + }.set{ch_id_files_branched} + + // Preparing train datasets and fine tuning MS2 model + sage_train_datasets = ch_id_files_branched.sage.randomSample(params.fine_tuning_sample_run, 2025).combine( + Channel.value("sage") + ).groupTuple(by: 3) + msgf_train_datasets = ch_id_files_branched.msgf.randomSample(params.fine_tuning_sample_run, 2025).combine( + Channel.value("msgf") + ).groupTuple(by: 3) + comet_train_datasets = ch_id_files_branched.comet.randomSample(params.fine_tuning_sample_run, 2025).combine( + Channel.value("comet") + ).groupTuple(by: 3) + sage_train_datasets.mix(msgf_train_datasets) + .mix(comet_train_datasets) + .combine(ms2_model_dir) + .set { train_datasets } + MSRESCORE_FINE_TUNING(train_datasets) + ch_software_versions = ch_software_versions.mix(MSRESCORE_FINE_TUNING.out.versions) + + sage_features_input = Channel.value("sage").combine(ch_id_files_branched.sage) + .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) + msgf_features_input = Channel.value("msgf").combine(ch_id_files_branched.msgf) + .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) + comet_features_input = Channel.value("comet").combine(ch_id_files_branched.comet) + .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) + sage_features_input.mix(msgf_features_input).mix(comet_features_input) + .map { [it[1], it[2], it[3], it[4]] } + .set { ch_features_input } + + MSRESCORE_FEATURES(ch_features_input) ch_software_versions = ch_software_versions.mix(MSRESCORE_FEATURES.out.versions) ch_id_files_feats = MSRESCORE_FEATURES.out.idxml } diff --git a/subworkflows/local/psm_rescoring/main.nf b/subworkflows/local/psm_rescoring/main.nf index 09c06fc7..57f48873 100644 --- a/subworkflows/local/psm_rescoring/main.nf +++ b/subworkflows/local/psm_rescoring/main.nf @@ -30,15 +30,53 @@ workflow PSM_RESCORING { if (params.ms2features_model_dir && params.ms2features_model_dir != true) { ms2_model_dir = Channel.from(file(params.ms2features_model_dir, checkIfExists: true)) } else { - ms2_model_dir = Channel.from(file("./")) + ms2_model_dir = Channel.from(file("pretrained_models")) } if (params.ms2features_fine_tuning == true) { - train_datasets = ch_id_files.combine(ch_file_preparation_results, by: 0).randomSample(params.fine_tuning_sample_run, 2025) - MSRESCORE_FINE_TUNING(train_datasets.collect().combine(ms2_model_dir)) - MSRESCORE_FEATURES(ch_id_files.combine(ch_file_preparation_results, by: 0).combine(MSRESCORE_FINE_TUNING.out.model_weight)) - ch_software_versions = ch_software_versions.mix(MSRESCORE_FEATURES.out.versions) - ch_id_files_feats = MSRESCORE_FEATURES.out.idxml + if (params.ms2features_generators.toLowerCase().contains('ms2pip')) { + exit(1, 'Error: Fine tuning only supports AlphaPeptdeep!') + } else { + // Split ch_id_files by search_engines + ch_id_files.combine(ch_file_preparation_results, by: 0).branch{ meta, filename, mzml_name -> + sage: filename.name.contains('sage') + return [meta, filename, mzml_name] + msgf: filename.name.contains('msgf') + return [meta, filename, mzml_name] + comet: filename.name.contains('comet') + return [meta, filename, mzml_name] + }.set{ch_id_files_branched} + + // Preparing train datasets and fine tuning MS2 model + sage_train_datasets = ch_id_files_branched.sage.randomSample(params.fine_tuning_sample_run, 2025).combine( + Channel.value("sage") + ).groupTuple(by: 3) + msgf_train_datasets = ch_id_files_branched.msgf.randomSample(params.fine_tuning_sample_run, 2025).combine( + Channel.value("msgf") + ).groupTuple(by: 3) + comet_train_datasets = ch_id_files_branched.comet.randomSample(params.fine_tuning_sample_run, 2025).combine( + Channel.value("comet") + ).groupTuple(by: 3) + sage_train_datasets.mix(msgf_train_datasets) + .mix(comet_train_datasets) + .combine(ms2_model_dir) + .set { train_datasets } + MSRESCORE_FINE_TUNING(train_datasets) + ch_software_versions = ch_software_versions.mix(MSRESCORE_FINE_TUNING.out.versions) + + sage_features_input = Channel.value("sage").combine(ch_id_files_branched.sage) + .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) + msgf_features_input = Channel.value("msgf").combine(ch_id_files_branched.msgf) + .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) + comet_features_input = Channel.value("comet").combine(ch_id_files_branched.comet) + .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) + sage_features_input.mix(msgf_features_input).mix(comet_features_input) + .map { [it[1], it[2], it[3], it[4]] } + .set { ch_features_input } + + MSRESCORE_FEATURES(ch_features_input) + ch_software_versions = ch_software_versions.mix(MSRESCORE_FEATURES.out.versions) + ch_id_files_feats = MSRESCORE_FEATURES.out.idxml } else{ MSRESCORE_FEATURES(ch_id_files.combine(ch_file_preparation_results, by: 0).combine(ms2_model_dir)) ch_software_versions = ch_software_versions.mix(MSRESCORE_FEATURES.out.versions) From 5da29f8c7803b71e435bc6cde86b5db1d91fe4a8 Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Fri, 2 Jan 2026 22:23:58 +0800 Subject: [PATCH 11/22] Update main.nf --- subworkflows/local/psm_rescoring/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/subworkflows/local/psm_rescoring/main.nf b/subworkflows/local/psm_rescoring/main.nf index 57f48873..f70c66fc 100644 --- a/subworkflows/local/psm_rescoring/main.nf +++ b/subworkflows/local/psm_rescoring/main.nf @@ -77,6 +77,7 @@ workflow PSM_RESCORING { MSRESCORE_FEATURES(ch_features_input) ch_software_versions = ch_software_versions.mix(MSRESCORE_FEATURES.out.versions) ch_id_files_feats = MSRESCORE_FEATURES.out.idxml + } } else{ MSRESCORE_FEATURES(ch_id_files.combine(ch_file_preparation_results, by: 0).combine(ms2_model_dir)) ch_software_versions = ch_software_versions.mix(MSRESCORE_FEATURES.out.versions) From 1e8ae25d0bbb1489707f67050f09062e419a4140 Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Sat, 3 Jan 2026 15:36:43 +0800 Subject: [PATCH 12/22] move to id --- subworkflows/local/dda_id/main.nf | 67 ----------------- .../local/peptide_database_search/main.nf | 74 ++++++++++++++++++- subworkflows/local/psm_rescoring/main.nf | 72 +----------------- 3 files changed, 73 insertions(+), 140 deletions(-) diff --git a/subworkflows/local/dda_id/main.nf b/subworkflows/local/dda_id/main.nf index 5d7b8c87..7146a888 100644 --- a/subworkflows/local/dda_id/main.nf +++ b/subworkflows/local/dda_id/main.nf @@ -48,73 +48,6 @@ workflow DDA_ID { // if (params.skip_rescoring == false) { - if (params.ms2features_enable == true) { - // Only add ms2_model_dir if it's actually set and not empty - // Handle cases where parameter might be empty string, null, boolean true, or whitespace - // When --ms2features_model_dir is passed with no value, Nextflow may set it to boolean true - if (params.ms2features_model_dir && params.ms2features_model_dir != true) { - ms2_model_dir = Channel.from(file(params.ms2features_model_dir, checkIfExists: true)) - } else { - ms2_model_dir = Channel.from(file("pretrained_models")) - } - - if (params.ms2features_fine_tuning == true) { - if (params.ms2features_generators.toLowerCase().contains('ms2pip')) { - exit(1, 'Error: Fine tuning only supports AlphaPeptdeep!') - } else { - // Split ch_id_files by search_engines - ch_id_files.combine(ch_file_preparation_results, by: 0).branch{ meta, filename, mzml_name -> - sage: filename.name.contains('sage') - return [meta, filename, mzml_name] - msgf: filename.name.contains('msgf') - return [meta, filename, mzml_name] - comet: filename.name.contains('comet') - return [meta, filename, mzml_name] - }.set{ch_id_files_branched} - - // Preparing train datasets and fine tuning MS2 model - sage_train_datasets = ch_id_files_branched.sage.randomSample(params.fine_tuning_sample_run, 2025).combine( - Channel.value("sage") - ).groupTuple(by: 3) - msgf_train_datasets = ch_id_files_branched.msgf.randomSample(params.fine_tuning_sample_run, 2025).combine( - Channel.value("msgf") - ).groupTuple(by: 3) - comet_train_datasets = ch_id_files_branched.comet.randomSample(params.fine_tuning_sample_run, 2025).combine( - Channel.value("comet") - ).groupTuple(by: 3) - sage_train_datasets.mix(msgf_train_datasets) - .mix(comet_train_datasets) - .combine(ms2_model_dir) - .set { train_datasets } - MSRESCORE_FINE_TUNING(train_datasets) - ch_software_versions = ch_software_versions.mix(MSRESCORE_FINE_TUNING.out.versions) - - sage_features_input = Channel.value("sage").combine(ch_id_files_branched.sage) - .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) - msgf_features_input = Channel.value("msgf").combine(ch_id_files_branched.msgf) - .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) - comet_features_input = Channel.value("comet").combine(ch_id_files_branched.comet) - .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) - sage_features_input.mix(msgf_features_input).mix(comet_features_input) - .map { [it[1], it[2], it[3], it[4]] } - .set { ch_features_input } - - MSRESCORE_FEATURES(ch_features_input) - ch_software_versions = ch_software_versions.mix(MSRESCORE_FEATURES.out.versions) - ch_id_files_feats = MSRESCORE_FEATURES.out.idxml - } - } else{ - MSRESCORE_FEATURES(ch_id_files.combine(ch_file_preparation_results, by: 0).combine(ms2_model_dir)) - ch_software_versions = ch_software_versions.mix(MSRESCORE_FEATURES.out.versions) - ch_id_files_feats = MSRESCORE_FEATURES.out.idxml - } - - } else { - PSM_CLEAN(ch_id_files.combine(ch_file_preparation_results, by: 0)) - ch_id_files_feats = PSM_CLEAN.out.idxml - ch_software_versions = ch_software_versions.mix(PSM_CLEAN.out.versions) - } - // Add SNR features to percolator if (params.ms2features_snr) { SPECTRUM_FEATURES(ch_id_files_feats.combine(ch_file_preparation_results, by: 0)) diff --git a/subworkflows/local/peptide_database_search/main.nf b/subworkflows/local/peptide_database_search/main.nf index 924c629c..261a0183 100644 --- a/subworkflows/local/peptide_database_search/main.nf +++ b/subworkflows/local/peptide_database_search/main.nf @@ -3,7 +3,9 @@ include { MSGF_DB_INDEXING } from '../../../modules/local/utils/msgf_db_indexing include { MSGF } from '../../../modules/local/openms/msgf/main' include { COMET } from '../../../modules/local/openms/comet/main' include { SAGE } from '../../../modules/local/openms/sage/main' - +include { PSM_CLEAN } from '../../../modules/local/utils/psm_clean/main' +include { MSRESCORE_FINE_TUNING} from '../../../modules/local/utils/msrescore_fine_tuning/main' +include { MSRESCORE_FEATURES } from '../../../modules/local/utils/msrescore_features/main' workflow PEPTIDE_DATABASE_SEARCH { take: @@ -66,7 +68,75 @@ workflow PEPTIDE_DATABASE_SEARCH { ch_id_sage = ch_id_sage.mix(SAGE.out.id_files_sage.transpose()) } + if (params.skip_rescoring != true && params.ms2features_enable == true) { + // Only add ms2_model_dir if it's actually set and not empty + // Handle cases where parameter might be empty string, null, boolean true, or whitespace + // When --ms2features_model_dir is passed with no value, Nextflow may set it to boolean true + if (params.ms2features_model_dir && params.ms2features_model_dir != true) { + ms2_model_dir = Channel.from(file(params.ms2features_model_dir, checkIfExists: true)) + } else { + ms2_model_dir = Channel.from(file("pretrained_models")) + } + + if (params.ms2features_fine_tuning == true) { + if (params.ms2features_generators.toLowerCase().contains('ms2pip')) { + exit(1, 'Error: Fine tuning only supports AlphaPeptdeep!') + } else { + + // Preparing train datasets and fine tuning MS2 model + sage_train_datasets = ch_id_sage + .combine(ch_mzmls_search, by: 0) + .randomSample(params.fine_tuning_sample_run, 2025) + .combine(Channel.value("sage")) + .groupTuple(by: 3) + + msgf_train_datasets = ch_id_msgf + .combine(ch_mzmls_search, by: 0) + .randomSample(params.fine_tuning_sample_run, 2025) + .combine(Channel.value("msgf")) + .groupTuple(by: 3) + + comet_train_datasets = ch_id_comet + .combine(ch_mzmls_search, by: 0) + .randomSample(params.fine_tuning_sample_run, 2025) + .combine(Channel.value("comet")) + .groupTuple(by: 3) + + sage_train_datasets.mix(msgf_train_datasets) + .mix(comet_train_datasets) + .combine(ms2_model_dir) + .set { train_datasets } + MSRESCORE_FINE_TUNING(train_datasets) + ch_software_versions = ch_software_versions.mix(MSRESCORE_FINE_TUNING.out.versions) + + sage_features_input = Channel.value("sage").combine(ch_id_files_branched.sage) + .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) + msgf_features_input = Channel.value("msgf").combine(ch_id_files_branched.msgf) + .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) + comet_features_input = Channel.value("comet").combine(ch_id_files_branched.comet) + .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) + sage_features_input.mix(msgf_features_input).mix(comet_features_input) + .map { [it[1], it[2], it[3], it[4]] } + .set { ch_features_input } + + MSRESCORE_FEATURES(ch_features_input) + ch_software_versions = ch_software_versions.mix(MSRESCORE_FEATURES.out.versions) + ch_id_files_feats = MSRESCORE_FEATURES.out.idxml + } + } else{ + MSRESCORE_FEATURES(ch_id_files.combine(ch_file_preparation_results, by: 0).combine(ms2_model_dir)) + ch_software_versions = ch_software_versions.mix(MSRESCORE_FEATURES.out.versions) + ch_id_files_feats = MSRESCORE_FEATURES.out.idxml + } + + } else if (params.psm_clean == true) { + ch_id_files = ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage) + PSM_CLEAN(ch_id_files.combine(ch_file_preparation_results, by: 0)) + ch_id_files_feats = PSM_CLEAN.out.idxml + ch_software_versions = ch_software_versions.mix(PSM_CLEAN.out.versions) + } + emit: - ch_id_files_idx = ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage) + ch_id_files_idx = ch_id_files_feats versions = ch_versions } diff --git a/subworkflows/local/psm_rescoring/main.nf b/subworkflows/local/psm_rescoring/main.nf index f70c66fc..0fe25191 100644 --- a/subworkflows/local/psm_rescoring/main.nf +++ b/subworkflows/local/psm_rescoring/main.nf @@ -14,7 +14,7 @@ include { MSRESCORE_FINE_TUNING } from '../../../modules/local/utils/msrescore workflow PSM_RESCORING { take: ch_file_preparation_results - ch_id_files + ch_id_files_feats ch_expdesign main: @@ -22,76 +22,6 @@ workflow PSM_RESCORING { ch_results = Channel.empty() ch_fdridpep = Channel.empty() - if (params.ms2features_enable == true) { - - // Only add ms2_model_dir if it's actually set and not empty - // Handle cases where parameter might be empty string, null, boolean true, or whitespace - // When --ms2features_model_dir is passed with no value, Nextflow may set it to boolean true - if (params.ms2features_model_dir && params.ms2features_model_dir != true) { - ms2_model_dir = Channel.from(file(params.ms2features_model_dir, checkIfExists: true)) - } else { - ms2_model_dir = Channel.from(file("pretrained_models")) - } - - if (params.ms2features_fine_tuning == true) { - if (params.ms2features_generators.toLowerCase().contains('ms2pip')) { - exit(1, 'Error: Fine tuning only supports AlphaPeptdeep!') - } else { - // Split ch_id_files by search_engines - ch_id_files.combine(ch_file_preparation_results, by: 0).branch{ meta, filename, mzml_name -> - sage: filename.name.contains('sage') - return [meta, filename, mzml_name] - msgf: filename.name.contains('msgf') - return [meta, filename, mzml_name] - comet: filename.name.contains('comet') - return [meta, filename, mzml_name] - }.set{ch_id_files_branched} - - // Preparing train datasets and fine tuning MS2 model - sage_train_datasets = ch_id_files_branched.sage.randomSample(params.fine_tuning_sample_run, 2025).combine( - Channel.value("sage") - ).groupTuple(by: 3) - msgf_train_datasets = ch_id_files_branched.msgf.randomSample(params.fine_tuning_sample_run, 2025).combine( - Channel.value("msgf") - ).groupTuple(by: 3) - comet_train_datasets = ch_id_files_branched.comet.randomSample(params.fine_tuning_sample_run, 2025).combine( - Channel.value("comet") - ).groupTuple(by: 3) - sage_train_datasets.mix(msgf_train_datasets) - .mix(comet_train_datasets) - .combine(ms2_model_dir) - .set { train_datasets } - MSRESCORE_FINE_TUNING(train_datasets) - ch_software_versions = ch_software_versions.mix(MSRESCORE_FINE_TUNING.out.versions) - - sage_features_input = Channel.value("sage").combine(ch_id_files_branched.sage) - .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) - msgf_features_input = Channel.value("msgf").combine(ch_id_files_branched.msgf) - .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) - comet_features_input = Channel.value("comet").combine(ch_id_files_branched.comet) - .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) - sage_features_input.mix(msgf_features_input).mix(comet_features_input) - .map { [it[1], it[2], it[3], it[4]] } - .set { ch_features_input } - - MSRESCORE_FEATURES(ch_features_input) - ch_software_versions = ch_software_versions.mix(MSRESCORE_FEATURES.out.versions) - ch_id_files_feats = MSRESCORE_FEATURES.out.idxml - } - } else{ - MSRESCORE_FEATURES(ch_id_files.combine(ch_file_preparation_results, by: 0).combine(ms2_model_dir)) - ch_software_versions = ch_software_versions.mix(MSRESCORE_FEATURES.out.versions) - ch_id_files_feats = MSRESCORE_FEATURES.out.idxml - } - - } else if (params.psm_clean == true) { - PSM_CLEAN(ch_id_files.combine(ch_file_preparation_results, by: 0)) - ch_id_files_feats = PSM_CLEAN.out.idxml - ch_software_versions = ch_software_versions.mix(PSM_CLEAN.out.versions) - } else { - ch_id_files_feats = ch_id_files - } - // Add SNR features to percolator if (params.ms2features_snr) { SPECTRUM_FEATURES(ch_id_files_feats.combine(ch_file_preparation_results, by: 0)) From 5ff0403f74759509730c7682d554455bb36f85ec Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Sat, 3 Jan 2026 15:41:30 +0800 Subject: [PATCH 13/22] Update main.nf --- subworkflows/local/peptide_database_search/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/subworkflows/local/peptide_database_search/main.nf b/subworkflows/local/peptide_database_search/main.nf index 261a0183..14e7257e 100644 --- a/subworkflows/local/peptide_database_search/main.nf +++ b/subworkflows/local/peptide_database_search/main.nf @@ -124,6 +124,7 @@ workflow PEPTIDE_DATABASE_SEARCH { ch_id_files_feats = MSRESCORE_FEATURES.out.idxml } } else{ + ch_id_files = ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage) MSRESCORE_FEATURES(ch_id_files.combine(ch_file_preparation_results, by: 0).combine(ms2_model_dir)) ch_software_versions = ch_software_versions.mix(MSRESCORE_FEATURES.out.versions) ch_id_files_feats = MSRESCORE_FEATURES.out.idxml From 627d0a8374aeebd700d41e06b95b36d750d8039d Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Sat, 3 Jan 2026 15:47:01 +0800 Subject: [PATCH 14/22] Update main.nf --- subworkflows/local/peptide_database_search/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/peptide_database_search/main.nf b/subworkflows/local/peptide_database_search/main.nf index 14e7257e..44842a1d 100644 --- a/subworkflows/local/peptide_database_search/main.nf +++ b/subworkflows/local/peptide_database_search/main.nf @@ -125,14 +125,14 @@ workflow PEPTIDE_DATABASE_SEARCH { } } else{ ch_id_files = ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage) - MSRESCORE_FEATURES(ch_id_files.combine(ch_file_preparation_results, by: 0).combine(ms2_model_dir)) + MSRESCORE_FEATURES(ch_id_files.combine(ch_mzmls_search, by: 0).combine(ms2_model_dir)) ch_software_versions = ch_software_versions.mix(MSRESCORE_FEATURES.out.versions) ch_id_files_feats = MSRESCORE_FEATURES.out.idxml } } else if (params.psm_clean == true) { ch_id_files = ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage) - PSM_CLEAN(ch_id_files.combine(ch_file_preparation_results, by: 0)) + PSM_CLEAN(ch_id_files.combine(ch_mzmls_search, by: 0)) ch_id_files_feats = PSM_CLEAN.out.idxml ch_software_versions = ch_software_versions.mix(PSM_CLEAN.out.versions) } From 511595fdf1076d78b26d1ceb5af263aeb6239efd Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Sat, 3 Jan 2026 16:56:57 +0800 Subject: [PATCH 15/22] refine --- subworkflows/local/dda_id/main.nf | 80 +----- .../local/peptide_database_search/main.nf | 230 +++++++++++++----- subworkflows/local/psm_rescoring/main.nf | 75 +----- 3 files changed, 178 insertions(+), 207 deletions(-) diff --git a/subworkflows/local/dda_id/main.nf b/subworkflows/local/dda_id/main.nf index 7146a888..67f49a36 100644 --- a/subworkflows/local/dda_id/main.nf +++ b/subworkflows/local/dda_id/main.nf @@ -3,14 +3,8 @@ // include { CONSENSUSID } from '../../../modules/local/openms/consensusid/main' include { PERCOLATOR } from '../../../modules/local/openms/percolator/main' -include { ID_MERGER } from '../../../modules/local/openms/id_merger/main' include { ID_RIPPER } from '../../../modules/local/openms/id_ripper/main' include { PSM_CONVERSION } from '../../../modules/local/utils/psm_conversion/main' -include { MSRESCORE_FEATURES } from '../../../modules/local/utils/msrescore_features/main' -include { GET_SAMPLE } from '../../../modules/local/utils/extract_sample/main' -include { SPECTRUM_FEATURES } from '../../../modules/local/utils/spectrum_features/main' -include { PSM_CLEAN } from '../../../modules/local/utils/psm_clean/main' -include { MSRESCORE_FINE_TUNING} from '../../../modules/local/utils/msrescore_fine_tuning/main' include { PHOSPHO_SCORING } from '../phospho_scoring/main' // @@ -47,83 +41,14 @@ workflow DDA_ID { // SUBWORKFLOW: Rescoring // if (params.skip_rescoring == false) { - - // Add SNR features to percolator - if (params.ms2features_snr) { - SPECTRUM_FEATURES(ch_id_files_feats.combine(ch_file_preparation_results, by: 0)) - ch_id_files_feats = SPECTRUM_FEATURES.out.id_files_snr - ch_software_versions = ch_software_versions.mix(SPECTRUM_FEATURES.out.versions) - } - // Rescoring for independent run, Sample or whole experiments if (params.ms2features_range == "independent_run") { PERCOLATOR(ch_id_files_feats) ch_software_versions = ch_software_versions.mix(PERCOLATOR.out.versions) ch_consensus_input = PERCOLATOR.out.id_files_perc - } else if (params.ms2features_range == "by_sample") { - // Sample map - GET_SAMPLE(ch_expdesign) - ch_software_versions = ch_software_versions.mix(GET_SAMPLE.out.versions) - - ch_expdesign_sample = GET_SAMPLE.out.ch_expdesign_sample - ch_expdesign_sample.splitCsv(header: true, sep: '\t') - .map { get_sample_map(it) }.set{ sample_map_idv } - - ch_id_files_feats.map {[it[0].mzml_id, it[0], it[1]]} - .combine(sample_map_idv, by: 0) - .map {[it[1], it[2], it[3]]} - .set{ch_id_files_feats_sample} - - // Group by search_engines and sample - ch_id_files_feats_sample.branch{ meta, filename, sample -> - sage: filename.name.contains('sage') - return [meta, filename, sample] - msgf: filename.name.contains('msgf') - return [meta, filename, sample] - comet: filename.name.contains('comet') - return [meta, filename, sample] - }.set{ch_id_files_feat_branched} - - // IDMERGER for samples group - ID_MERGER(ch_id_files_feat_branched.comet.groupTuple(by: 2) - .mix(ch_id_files_feat_branched.msgf.groupTuple(by: 2)) - .mix(ch_id_files_feat_branched.sage.groupTuple(by: 2))) - ch_software_versions = ch_software_versions.mix(ID_MERGER.out.versions) - - PERCOLATOR(ID_MERGER.out.id_merged) - ch_software_versions = ch_software_versions.mix(PERCOLATOR.out.versions) - - // Currently only ID runs on exactly one mzML file are supported in CONSENSUSID. Split idXML by runs - ID_RIPPER(PERCOLATOR.out.id_files_perc) - ch_file_preparation_results.map{[it[0].mzml_id, it[0]]}.set{meta} - ID_RIPPER.out.id_rippers.flatten().map { add_file_prefix (it)}.set{id_rippers} - meta.combine(id_rippers, by: 0) - .map{ [it[1], it[2]]} - .set{ ch_consensus_input } - ch_software_versions = ch_software_versions.mix(ID_RIPPER.out.versions) - - } else if (params.ms2features_range == "by_project"){ - ch_id_files_feats.map {[it[0].experiment_id, it[0], it[1]]}.set { ch_id_files_feats} - - // Split ch_id_files_feats by search_engines - ch_id_files_feats.branch{ experiment_id, meta, filename -> - sage: filename.name.contains('sage') - return [meta, filename, experiment_id] - msgf: filename.name.contains('msgf') - return [meta, filename, experiment_id] - comet: filename.name.contains('comet') - return [meta, filename, experiment_id] - }.set{ch_id_files_feat_branched} - - // IDMERGER for whole experiments - ID_MERGER(ch_id_files_feat_branched.comet.groupTuple(by: 2) - .mix(ch_id_files_feat_branched.msgf.groupTuple(by: 2)) - .mix(ch_id_files_feat_branched.sage.groupTuple(by: 2))) - ch_software_versions = ch_software_versions.mix(ID_MERGER.out.versions) - - PERCOLATOR(ID_MERGER.out.id_merged) + } else { + PERCOLATOR(ch_id_files_feats) ch_software_versions = ch_software_versions.mix(PERCOLATOR.out.versions) - // Currently only ID runs on exactly one mzML file are supported in CONSENSUSID. Split idXML by runs ID_RIPPER(PERCOLATOR.out.id_files_perc) ch_file_preparation_results.map{[it[0].mzml_id, it[0]]}.set{meta} @@ -132,7 +57,6 @@ workflow DDA_ID { .map{ [it[1], it[2]]} .set{ ch_consensus_input } ch_software_versions = ch_software_versions.mix(ID_RIPPER.out.versions) - } ch_rescoring_results = ch_consensus_input diff --git a/subworkflows/local/peptide_database_search/main.nf b/subworkflows/local/peptide_database_search/main.nf index 44842a1d..550eddac 100644 --- a/subworkflows/local/peptide_database_search/main.nf +++ b/subworkflows/local/peptide_database_search/main.nf @@ -6,6 +6,9 @@ include { SAGE } from '../../../modules/local/openms/sage/main' include { PSM_CLEAN } from '../../../modules/local/utils/psm_clean/main' include { MSRESCORE_FINE_TUNING} from '../../../modules/local/utils/msrescore_fine_tuning/main' include { MSRESCORE_FEATURES } from '../../../modules/local/utils/msrescore_features/main' +include { GET_SAMPLE } from '../../../modules/local/utils/extract_sample/main' +include { SPECTRUM_FEATURES } from '../../../modules/local/utils/spectrum_features/main' +include { ID_MERGER } from '../../../modules/local/openms/id_merger/main' workflow PEPTIDE_DATABASE_SEARCH { take: @@ -68,76 +71,187 @@ workflow PEPTIDE_DATABASE_SEARCH { ch_id_sage = ch_id_sage.mix(SAGE.out.id_files_sage.transpose()) } - if (params.skip_rescoring != true && params.ms2features_enable == true) { - // Only add ms2_model_dir if it's actually set and not empty - // Handle cases where parameter might be empty string, null, boolean true, or whitespace - // When --ms2features_model_dir is passed with no value, Nextflow may set it to boolean true - if (params.ms2features_model_dir && params.ms2features_model_dir != true) { - ms2_model_dir = Channel.from(file(params.ms2features_model_dir, checkIfExists: true)) + if (params.skip_rescoring != true) { + + if (params.ms2features_enable == true){ + // Only add ms2_model_dir if it's actually set and not empty + // Handle cases where parameter might be empty string, null, boolean true, or whitespace + // When --ms2features_model_dir is passed with no value, Nextflow may set it to boolean true + if (params.ms2features_model_dir && params.ms2features_model_dir != true) { + ms2_model_dir = Channel.from(file(params.ms2features_model_dir, checkIfExists: true)) + } else { + ms2_model_dir = Channel.from(file("pretrained_models")) + } + + if (params.ms2features_fine_tuning == true) { + if (params.ms2features_generators.toLowerCase().contains('ms2pip')) { + exit(1, 'Error: Fine tuning only supports AlphaPeptdeep!') + } else { + + // Preparing train datasets and fine tuning MS2 model + sage_train_datasets = ch_id_sage + .combine(ch_mzmls_search, by: 0) + .randomSample(params.fine_tuning_sample_run, 2025) + .combine(Channel.value("sage")) + .groupTuple(by: 3) + + msgf_train_datasets = ch_id_msgf + .combine(ch_mzmls_search, by: 0) + .randomSample(params.fine_tuning_sample_run, 2025) + .combine(Channel.value("msgf")) + .groupTuple(by: 3) + + comet_train_datasets = ch_id_comet + .combine(ch_mzmls_search, by: 0) + .randomSample(params.fine_tuning_sample_run, 2025) + .combine(Channel.value("comet")) + .groupTuple(by: 3) + + sage_train_datasets.mix(msgf_train_datasets) + .mix(comet_train_datasets) + .combine(ms2_model_dir) + .set { train_datasets } + MSRESCORE_FINE_TUNING(train_datasets) + ch_versions = ch_versions.mix(MSRESCORE_FINE_TUNING.out.versions) + + if (params.search_engines.contains("msgf")) { + Channel.value("msgf").combine(ch_id_msgf) + .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) + .map { [it[1], it[2], it[3], it[4]] } + .set { msgf_features_input } + MSRESCORE_FEATURES(msgf_features_input) + ch_versions = ch_versions.mix(MSRESCORE_FEATURES.out.versions) + ch_id_files_msgf_feats = MSRESCORE_FEATURES.out.idxml + } + + if (params.search_engines.contains("sage")) { + Channel.value("sage").combine(ch_id_sage) + .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) + .map { [it[1], it[2], it[3], it[4]] } + .set { sage_features_input } + MSRESCORE_FEATURES(sage_features_input) + ch_versions = ch_versions.mix(MSRESCORE_FEATURES.out.versions) + ch_id_files_sage_feats = MSRESCORE_FEATURES.out.idxml + } + + if (params.search_engines.contains("comet")) { + Channel.value("comet").combine(ch_id_comet) + .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) + .map { [it[1], it[2], it[3], it[4]] } + .set { comet_features_input } + MSRESCORE_FEATURES(comet_features_input) + ch_versions = ch_versions.mix(MSRESCORE_FEATURES.out.versions) + ch_id_files_comet_feats = MSRESCORE_FEATURES.out.idxml + } + } + } else{ + if (params.search_engines.contains("msgf")) { + MSRESCORE_FEATURES(ch_id_msgf.combine(ch_mzmls_search, by: 0).combine(ms2_model_dir)) + ch_versions = ch_versions.mix(MSRESCORE_FEATURES.out.versions) + ch_id_files_msgf_feats = MSRESCORE_FEATURES.out.idxml + } + + if (params.search_engines.contains("comet")) { + MSRESCORE_FEATURES(ch_id_comet.combine(ch_mzmls_search, by: 0).combine(ms2_model_dir)) + ch_versions = ch_versions.mix(MSRESCORE_FEATURES.out.versions) + ch_id_files_comet_feats = MSRESCORE_FEATURES.out.idxml + } + + if (params.search_engines.contains("sage")) { + MSRESCORE_FEATURES(ch_id_sage.combine(ch_mzmls_search, by: 0).combine(ms2_model_dir)) + ch_versions = ch_versions.mix(MSRESCORE_FEATURES.out.versions) + ch_id_files_sage_feats = MSRESCORE_FEATURES.out.idxml + } + } + } else { - ms2_model_dir = Channel.from(file("pretrained_models")) + ch_id_files_msgf_feats = ch_id_msgf + ch_id_files_comet_feats = ch_id_comet + ch_id_files_sage_feats = ch_id_sage } - if (params.ms2features_fine_tuning == true) { - if (params.ms2features_generators.toLowerCase().contains('ms2pip')) { - exit(1, 'Error: Fine tuning only supports AlphaPeptdeep!') - } else { + // Add SNR features to percolator + if (params.ms2features_snr) { + if (params.search_engines.contains("msgf")) { + SPECTRUM_FEATURES(ch_id_files_msgf_feats.combine(ch_file_preparation_results, by: 0)) + ch_id_files_msgf_feats = SPECTRUM_FEATURES.out.id_files_snr + ch_versions = ch_versions.mix(SPECTRUM_FEATURES.out.versions) + } - // Preparing train datasets and fine tuning MS2 model - sage_train_datasets = ch_id_sage - .combine(ch_mzmls_search, by: 0) - .randomSample(params.fine_tuning_sample_run, 2025) - .combine(Channel.value("sage")) - .groupTuple(by: 3) - - msgf_train_datasets = ch_id_msgf - .combine(ch_mzmls_search, by: 0) - .randomSample(params.fine_tuning_sample_run, 2025) - .combine(Channel.value("msgf")) - .groupTuple(by: 3) - - comet_train_datasets = ch_id_comet - .combine(ch_mzmls_search, by: 0) - .randomSample(params.fine_tuning_sample_run, 2025) - .combine(Channel.value("comet")) - .groupTuple(by: 3) - - sage_train_datasets.mix(msgf_train_datasets) - .mix(comet_train_datasets) - .combine(ms2_model_dir) - .set { train_datasets } - MSRESCORE_FINE_TUNING(train_datasets) - ch_software_versions = ch_software_versions.mix(MSRESCORE_FINE_TUNING.out.versions) - - sage_features_input = Channel.value("sage").combine(ch_id_files_branched.sage) - .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) - msgf_features_input = Channel.value("msgf").combine(ch_id_files_branched.msgf) - .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) - comet_features_input = Channel.value("comet").combine(ch_id_files_branched.comet) - .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) - sage_features_input.mix(msgf_features_input).mix(comet_features_input) - .map { [it[1], it[2], it[3], it[4]] } - .set { ch_features_input } - - MSRESCORE_FEATURES(ch_features_input) - ch_software_versions = ch_software_versions.mix(MSRESCORE_FEATURES.out.versions) - ch_id_files_feats = MSRESCORE_FEATURES.out.idxml + if (params.search_engines.contains("comet")) { + SPECTRUM_FEATURES(ch_id_files_comet_feats.combine(ch_file_preparation_results, by: 0)) + ch_id_files_comet_feats = SPECTRUM_FEATURES.out.id_files_snr + ch_versions = ch_versions.mix(SPECTRUM_FEATURES.out.versions) + } + + if (params.search_engines.contains("sage")) { + SPECTRUM_FEATURES(ch_id_files_sage_feats.combine(ch_file_preparation_results, by: 0)) + ch_id_files_sage_feats = SPECTRUM_FEATURES.out.id_files_snr + ch_versions = ch_versions.mix(SPECTRUM_FEATURES.out.versions) } - } else{ - ch_id_files = ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage) - MSRESCORE_FEATURES(ch_id_files.combine(ch_mzmls_search, by: 0).combine(ms2_model_dir)) - ch_software_versions = ch_software_versions.mix(MSRESCORE_FEATURES.out.versions) - ch_id_files_feats = MSRESCORE_FEATURES.out.idxml } + if (params.ms2features_range == "by_sample") { + // Sample map + GET_SAMPLE(ch_expdesign) + ch_versions = ch_versions.mix(GET_SAMPLE.out.versions) + ch_expdesign_sample = GET_SAMPLE.out.ch_expdesign_sample + ch_expdesign_sample.splitCsv(header: true, sep: '\t') + .map { get_sample_map(it) }.set{ sample_map_idv } + + ch_id_files_msgf_feats.map {[it[0].mzml_id, it[0], it[1]]}.set { ch_id_files_msgf_feats } + ch_id_files_msgf_feats.combine(sample_map_idv, by: 0).map {[it[1], it[2], it[3]]}.set{ ch_id_files_msgf_feats } + + ch_id_files_comet_feats.map {[it[0].mzml_id, it[0], it[1]]}.set { ch_id_files_comet_feats } + ch_id_files_comet_feats.combine(sample_map_idv, by: 0).map {[it[1], it[2], it[3]]}.set{ ch_id_files_comet_feats } + + ch_id_files_sage_feats.map {[it[0].mzml_id, it[0], it[1]]}.set { ch_id_files_sage_feats } + ch_id_files_sage_feats.combine(sample_map_idv, by: 0).map {[it[1], it[2], it[3]]}.set{ ch_id_files_sage_feats } + + // ID_MERGER for samples group + ID_MERGER(ch_id_files_msgf_feats.groupTuple(by: 2) + .mix(ch_id_files_comet_feats.groupTuple(by: 2)) + .mix(ch_id_files_sage_feats.groupTuple(by: 2))) + ) + ch_versions = ch_versions.mix(ID_MERGER.out.versions) + ch_id_files_out = ID_MERGER.out.id_merged + + } else if (params.ms2features_range == "by_project") { + ch_id_files_msgf_feats.map {[it[0].experiment_id, it[0], it[1]]}.set { ch_id_files_msgf_feats } + ch_id_files_comet_feats.map {[it[0].experiment_id, it[0], it[1]]}.set { ch_id_files_comet_feats } + ch_id_files_sage_feats.map {[it[0].experiment_id, it[0], it[1]]}.set { ch_id_files_sage_feats } + + // ID_MERGER for whole experiments + ID_MERGER(ch_id_files_msgf_feats.groupTuple(by: 2) + .mix(ch_id_files_comet_feats.groupTuple(by: 2)) + .mix(ch_id_files_sage_feats.groupTuple(by: 2))) + ch_versions = ch_versions.mix(ID_MERGER.out.versions) + ch_id_files_out = ID_MERGER.out.id_merged + } else { + ch_id_files_out = ch_id_files_msgf_feats.mix(ch_id_files_comet_feats).mix(ch_id_files_sage_feats) + } + + } else if (params.psm_clean == true) { ch_id_files = ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage) PSM_CLEAN(ch_id_files.combine(ch_mzmls_search, by: 0)) - ch_id_files_feats = PSM_CLEAN.out.idxml - ch_software_versions = ch_software_versions.mix(PSM_CLEAN.out.versions) + ch_id_files_out = PSM_CLEAN.out.idxml + ch_versions = ch_versions.mix(PSM_CLEAN.out.versions) } emit: - ch_id_files_idx = ch_id_files_feats + ch_id_files_idx = ch_id_files_out versions = ch_versions } + +// Function to get sample map +def get_sample_map(LinkedHashMap row) { + def sample_map = [:] + + filestr = row.Spectra_Filepath + file_name = file(filestr).name.take(file(filestr).name.lastIndexOf('.')) + sample = row.Sample + + return [file_name, sample] + +} diff --git a/subworkflows/local/psm_rescoring/main.nf b/subworkflows/local/psm_rescoring/main.nf index 0fe25191..86cb44ba 100644 --- a/subworkflows/local/psm_rescoring/main.nf +++ b/subworkflows/local/psm_rescoring/main.nf @@ -3,13 +3,9 @@ // include { PERCOLATOR } from '../../../modules/local/openms/percolator/main' -include { MSRESCORE_FEATURES } from '../../../modules/local/utils/msrescore_features/main' -include { GET_SAMPLE } from '../../../modules/local/utils/extract_sample/main' -include { ID_MERGER } from '../../../modules/local/openms/id_merger/main' + include { ID_RIPPER } from '../../../modules/local/openms/id_ripper/main' -include { SPECTRUM_FEATURES } from '../../../modules/local/utils/spectrum_features/main' -include { PSM_CLEAN } from '../../../modules/local/utils/psm_clean/main' -include { MSRESCORE_FINE_TUNING } from '../../../modules/local/utils/msrescore_fine_tuning/main' + workflow PSM_RESCORING { take: @@ -22,46 +18,13 @@ workflow PSM_RESCORING { ch_results = Channel.empty() ch_fdridpep = Channel.empty() - // Add SNR features to percolator - if (params.ms2features_snr) { - SPECTRUM_FEATURES(ch_id_files_feats.combine(ch_file_preparation_results, by: 0)) - ch_id_files_feats = SPECTRUM_FEATURES.out.id_files_snr - ch_software_versions = ch_software_versions.mix(SPECTRUM_FEATURES.out.versions) - } - // Rescoring for independent run, Sample or whole experiments if (params.ms2features_range == "independent_run") { PERCOLATOR(ch_id_files_feats) ch_software_versions = ch_software_versions.mix(PERCOLATOR.out.versions) ch_consensus_input = PERCOLATOR.out.id_files_perc } else if (params.ms2features_range == "by_sample") { - // Sample map - GET_SAMPLE(ch_expdesign) - ch_software_versions = ch_software_versions.mix(GET_SAMPLE.out.versions) - ch_expdesign_sample = GET_SAMPLE.out.ch_expdesign_sample - ch_expdesign_sample.splitCsv(header: true, sep: '\t') - .map { get_sample_map(it) }.set{ sample_map_idv } - - ch_id_files_feats.map {[it[0].mzml_id, it[0], it[1]]}.set { ch_id_files_feats} - ch_id_files_feats.combine(sample_map_idv, by: 0).map {[it[1], it[2], it[3]]}.set{ch_id_files_feats} - - // Group by search_engines and convert meta - ch_id_files_feats.branch{ meta, filename, sample -> - sage: filename.name.contains('sage') - return [meta, filename, sample] - msgf: filename.name.contains('msgf') - return [meta, filename, sample] - comet: filename.name.contains('comet') - return [meta, filename, sample] - }.set{ch_id_files_feat_branched} - - // ID_MERGER for samples group - ID_MERGER(ch_id_files_feat_branched.comet.groupTuple(by: 2) - .mix(ch_id_files_feat_branched.msgf.groupTuple(by: 2)) - .mix(ch_id_files_feat_branched.sage.groupTuple(by: 2))) - ch_software_versions = ch_software_versions.mix(ID_MERGER.out.versions) - - PERCOLATOR(ID_MERGER.out.id_merged) + PERCOLATOR(ch_id_files_feats) ch_software_versions = ch_software_versions.mix(PERCOLATOR.out.versions) // Currently only ID runs on exactly one mzML file are supported in CONSENSUSID. Split idXML by runs @@ -74,25 +37,7 @@ workflow PSM_RESCORING { ch_software_versions = ch_software_versions.mix(ID_RIPPER.out.versions) } else if (params.ms2features_range == "by_project"){ - ch_id_files_feats.map {[it[0].experiment_id, it[0], it[1]]}.set { ch_id_files_feats} - - // Split ch_id_files_feats by search_engines - ch_id_files_feats.branch{ experiment_id, meta, filename -> - sage: filename.name.contains('sage') - return [meta, filename, experiment_id] - msgf: filename.name.contains('msgf') - return [meta, filename, experiment_id] - comet: filename.name.contains('comet') - return [meta, filename, experiment_id] - }.set{ch_id_files_feat_branched} - - // ID_MERGER for whole experiments - ID_MERGER(ch_id_files_feat_branched.comet.groupTuple(by: 2) - .mix(ch_id_files_feat_branched.msgf.groupTuple(by: 2)) - .mix(ch_id_files_feat_branched.sage.groupTuple(by: 2))) - ch_software_versions = ch_software_versions.mix(ID_MERGER.out.versions) - - PERCOLATOR(ID_MERGER.out.id_merged) + PERCOLATOR(ch_id_files_feats) ch_software_versions = ch_software_versions.mix(PERCOLATOR.out.versions) // Currently only ID runs on exactly one mzML file are supported in CONSENSUSID. Split idXML by runs @@ -122,15 +67,3 @@ def add_file_prefix(file_path) { file_name = file(file_name).name.take(position) return [file_name, file_path] } - -// Function to get sample map -def get_sample_map(LinkedHashMap row) { - def sample_map = [:] - - filestr = row.Spectra_Filepath - file_name = file(filestr).name.take(file(filestr).name.lastIndexOf('.')) - sample = row.Sample - - return [file_name, sample] - -} From 04b428718024ed6b481ef2f2bbb23754135f8c20 Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Sat, 3 Jan 2026 17:03:17 +0800 Subject: [PATCH 16/22] Update main.nf --- subworkflows/local/peptide_database_search/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/peptide_database_search/main.nf b/subworkflows/local/peptide_database_search/main.nf index 550eddac..161aec3a 100644 --- a/subworkflows/local/peptide_database_search/main.nf +++ b/subworkflows/local/peptide_database_search/main.nf @@ -211,7 +211,7 @@ workflow PEPTIDE_DATABASE_SEARCH { // ID_MERGER for samples group ID_MERGER(ch_id_files_msgf_feats.groupTuple(by: 2) .mix(ch_id_files_comet_feats.groupTuple(by: 2)) - .mix(ch_id_files_sage_feats.groupTuple(by: 2))) + .mix(ch_id_files_sage_feats.groupTuple(by: 2)) ) ch_versions = ch_versions.mix(ID_MERGER.out.versions) ch_id_files_out = ID_MERGER.out.id_merged From dcb5d3c17983c14d21b2f67b318dede13a1cc125 Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Sat, 3 Jan 2026 17:08:13 +0800 Subject: [PATCH 17/22] Update main.nf --- subworkflows/local/peptide_database_search/main.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/peptide_database_search/main.nf b/subworkflows/local/peptide_database_search/main.nf index 161aec3a..929da803 100644 --- a/subworkflows/local/peptide_database_search/main.nf +++ b/subworkflows/local/peptide_database_search/main.nf @@ -173,19 +173,19 @@ workflow PEPTIDE_DATABASE_SEARCH { // Add SNR features to percolator if (params.ms2features_snr) { if (params.search_engines.contains("msgf")) { - SPECTRUM_FEATURES(ch_id_files_msgf_feats.combine(ch_file_preparation_results, by: 0)) + SPECTRUM_FEATURES(ch_id_files_msgf_feats.combine(ch_mzmls_search, by: 0)) ch_id_files_msgf_feats = SPECTRUM_FEATURES.out.id_files_snr ch_versions = ch_versions.mix(SPECTRUM_FEATURES.out.versions) } if (params.search_engines.contains("comet")) { - SPECTRUM_FEATURES(ch_id_files_comet_feats.combine(ch_file_preparation_results, by: 0)) + SPECTRUM_FEATURES(ch_id_files_comet_feats.combine(ch_mzmls_search, by: 0)) ch_id_files_comet_feats = SPECTRUM_FEATURES.out.id_files_snr ch_versions = ch_versions.mix(SPECTRUM_FEATURES.out.versions) } if (params.search_engines.contains("sage")) { - SPECTRUM_FEATURES(ch_id_files_sage_feats.combine(ch_file_preparation_results, by: 0)) + SPECTRUM_FEATURES(ch_id_files_sage_feats.combine(ch_mzmls_search, by: 0)) ch_id_files_sage_feats = SPECTRUM_FEATURES.out.id_files_snr ch_versions = ch_versions.mix(SPECTRUM_FEATURES.out.versions) } From 611fc2f639fd39c8fd19d0aced6b84594a0f4a0a Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Sat, 3 Jan 2026 17:18:55 +0800 Subject: [PATCH 18/22] Update main.nf --- subworkflows/local/peptide_database_search/main.nf | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/subworkflows/local/peptide_database_search/main.nf b/subworkflows/local/peptide_database_search/main.nf index 929da803..eea6bbcd 100644 --- a/subworkflows/local/peptide_database_search/main.nf +++ b/subworkflows/local/peptide_database_search/main.nf @@ -71,6 +71,8 @@ workflow PEPTIDE_DATABASE_SEARCH { ch_id_sage = ch_id_sage.mix(SAGE.out.id_files_sage.transpose()) } + (ch_id_files_msgf_feats, ch_id_files_comet_feats, ch_id_files_sage_feats) = [ Channel.empty(), Channel.empty(), Channel.empty() ] + if (params.skip_rescoring != true) { if (params.ms2features_enable == true){ @@ -121,7 +123,7 @@ workflow PEPTIDE_DATABASE_SEARCH { .set { msgf_features_input } MSRESCORE_FEATURES(msgf_features_input) ch_versions = ch_versions.mix(MSRESCORE_FEATURES.out.versions) - ch_id_files_msgf_feats = MSRESCORE_FEATURES.out.idxml + ch_id_files_msgf_feats = ch_id_files_msgf_feats.mix(MSRESCORE_FEATURES.out.idxml) } if (params.search_engines.contains("sage")) { @@ -131,7 +133,7 @@ workflow PEPTIDE_DATABASE_SEARCH { .set { sage_features_input } MSRESCORE_FEATURES(sage_features_input) ch_versions = ch_versions.mix(MSRESCORE_FEATURES.out.versions) - ch_id_files_sage_feats = MSRESCORE_FEATURES.out.idxml + ch_id_files_sage_feats = ch_id_files_sage_feats.mix(MSRESCORE_FEATURES.out.idxml) } if (params.search_engines.contains("comet")) { @@ -141,26 +143,26 @@ workflow PEPTIDE_DATABASE_SEARCH { .set { comet_features_input } MSRESCORE_FEATURES(comet_features_input) ch_versions = ch_versions.mix(MSRESCORE_FEATURES.out.versions) - ch_id_files_comet_feats = MSRESCORE_FEATURES.out.idxml + ch_id_files_comet_feats = ch_id_files_comet_feats.mix(MSRESCORE_FEATURES.out.idxml) } } } else{ if (params.search_engines.contains("msgf")) { MSRESCORE_FEATURES(ch_id_msgf.combine(ch_mzmls_search, by: 0).combine(ms2_model_dir)) ch_versions = ch_versions.mix(MSRESCORE_FEATURES.out.versions) - ch_id_files_msgf_feats = MSRESCORE_FEATURES.out.idxml + ch_id_files_msgf_feats = ch_id_files_msgf_feats.mix(MSRESCORE_FEATURES.out.idxml) } if (params.search_engines.contains("comet")) { MSRESCORE_FEATURES(ch_id_comet.combine(ch_mzmls_search, by: 0).combine(ms2_model_dir)) ch_versions = ch_versions.mix(MSRESCORE_FEATURES.out.versions) - ch_id_files_comet_feats = MSRESCORE_FEATURES.out.idxml + ch_id_files_comet_feats = ch_id_files_comet_feats.mix(MSRESCORE_FEATURES.out.idxml) } if (params.search_engines.contains("sage")) { MSRESCORE_FEATURES(ch_id_sage.combine(ch_mzmls_search, by: 0).combine(ms2_model_dir)) ch_versions = ch_versions.mix(MSRESCORE_FEATURES.out.versions) - ch_id_files_sage_feats = MSRESCORE_FEATURES.out.idxml + ch_id_files_sage_feats = ch_id_files_sage_feats.mix(MSRESCORE_FEATURES.out.idxml) } } From a0e8e31788fcaa8caf9acaf5f579cd8384d88e0e Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Sat, 3 Jan 2026 19:46:58 +0800 Subject: [PATCH 19/22] fixed --- .../local/utils/msrescore_features/main.nf | 10 +- modules/local/utils/spectrum_features/main.nf | 4 +- subworkflows/local/dda_id/main.nf | 2 +- .../local/peptide_database_search/main.nf | 116 ++++++++---------- 4 files changed, 59 insertions(+), 73 deletions(-) diff --git a/modules/local/utils/msrescore_features/main.nf b/modules/local/utils/msrescore_features/main.nf index 6a28d128..12543126 100644 --- a/modules/local/utils/msrescore_features/main.nf +++ b/modules/local/utils/msrescore_features/main.nf @@ -7,13 +7,13 @@ process MSRESCORE_FEATURES { 'ghcr.io/bigbio/quantms-rescoring:0.0.14' }" input: - tuple val(meta), path(idxml), path(mzml), path(model_weight) + tuple val(meta), path(idxml), path(mzml), path(model_weight), val(search_engine) output: - tuple val(meta), path("*ms2rescore.idXML") , emit: idxml - tuple val(meta), path("*.html" ) , optional:true, emit: html - path "versions.yml" , emit: versions - path "*.log" , emit: log + tuple val(meta), path("*ms2rescore.idXML"), val(search_engine) , emit: idxml + tuple val(meta), path("*.html" ) , optional:true, emit: html + path "versions.yml" , emit: versions + path "*.log" , emit: log when: task.ext.when == null || task.ext.when diff --git a/modules/local/utils/spectrum_features/main.nf b/modules/local/utils/spectrum_features/main.nf index 7a3c38e2..3d55840c 100644 --- a/modules/local/utils/spectrum_features/main.nf +++ b/modules/local/utils/spectrum_features/main.nf @@ -7,10 +7,10 @@ process SPECTRUM_FEATURES { 'ghcr.io/bigbio/quantms-rescoring:0.0.13' }" input: - tuple val(meta), path(id_file), path(ms_file) + tuple val(meta), path(id_file), path(ms_file), val(search_engine) output: - tuple val(meta), path("${id_file.baseName}_snr.idXML"), emit: id_files_snr + tuple val(meta), path("${id_file.baseName}_snr.idXML"), val(search_engine), emit: id_files_snr path "versions.yml", emit: versions path "*.log", emit: log diff --git a/subworkflows/local/dda_id/main.nf b/subworkflows/local/dda_id/main.nf index 67f49a36..0599e802 100644 --- a/subworkflows/local/dda_id/main.nf +++ b/subworkflows/local/dda_id/main.nf @@ -32,7 +32,7 @@ workflow DDA_ID { ch_database_wdecoy ) ch_software_versions = ch_software_versions.mix(PEPTIDE_DATABASE_SEARCH.out.versions) - ch_id_files = PEPTIDE_DATABASE_SEARCH.out.ch_id_files_idx + ch_id_files_feats = PEPTIDE_DATABASE_SEARCH.out.ch_id_files_idx ch_pmultiqc_consensus = Channel.empty() ch_pmultiqc_ids = Channel.empty() diff --git a/subworkflows/local/peptide_database_search/main.nf b/subworkflows/local/peptide_database_search/main.nf index eea6bbcd..e0663f32 100644 --- a/subworkflows/local/peptide_database_search/main.nf +++ b/subworkflows/local/peptide_database_search/main.nf @@ -116,81 +116,67 @@ workflow PEPTIDE_DATABASE_SEARCH { MSRESCORE_FINE_TUNING(train_datasets) ch_versions = ch_versions.mix(MSRESCORE_FINE_TUNING.out.versions) - if (params.search_engines.contains("msgf")) { - Channel.value("msgf").combine(ch_id_msgf) - .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) - .map { [it[1], it[2], it[3], it[4]] } - .set { msgf_features_input } - MSRESCORE_FEATURES(msgf_features_input) - ch_versions = ch_versions.mix(MSRESCORE_FEATURES.out.versions) - ch_id_files_msgf_feats = ch_id_files_msgf_feats.mix(MSRESCORE_FEATURES.out.idxml) - } - - if (params.search_engines.contains("sage")) { - Channel.value("sage").combine(ch_id_sage) - .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) - .map { [it[1], it[2], it[3], it[4]] } - .set { sage_features_input } - MSRESCORE_FEATURES(sage_features_input) - ch_versions = ch_versions.mix(MSRESCORE_FEATURES.out.versions) - ch_id_files_sage_feats = ch_id_files_sage_feats.mix(MSRESCORE_FEATURES.out.idxml) - } - - if (params.search_engines.contains("comet")) { - Channel.value("comet").combine(ch_id_comet) - .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) - .map { [it[1], it[2], it[3], it[4]] } - .set { comet_features_input } - MSRESCORE_FEATURES(comet_features_input) - ch_versions = ch_versions.mix(MSRESCORE_FEATURES.out.versions) - ch_id_files_comet_feats = ch_id_files_comet_feats.mix(MSRESCORE_FEATURES.out.idxml) - } - } - } else{ - if (params.search_engines.contains("msgf")) { - MSRESCORE_FEATURES(ch_id_msgf.combine(ch_mzmls_search, by: 0).combine(ms2_model_dir)) + Channel.value("msgf").combine(ch_id_msgf.combine(ch_mzmls_search, by: 0)) + .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) + .map { [it[1], it[2], it[3], it[4], it[0] ] } + .set { msgf_features_input } + + Channel.value("sage").combine(ch_id_sage.combine(ch_mzmls_search, by: 0)) + .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) + .map { [it[1], it[2], it[3], it[4], it[0] ] } + .set { sage_features_input } + + Channel.value("comet").combine(ch_id_comet.combine(ch_mzmls_search, by: 0)) + .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) + .map { [it[1], it[2], it[3], it[4], it[0] ] } + .set { comet_features_input } + + MSRESCORE_FEATURES(msgf_features_input.mix(sage_features_input).mix(comet_features_input)) ch_versions = ch_versions.mix(MSRESCORE_FEATURES.out.versions) - ch_id_files_msgf_feats = ch_id_files_msgf_feats.mix(MSRESCORE_FEATURES.out.idxml) - } + ch_id_files_feats = MSRESCORE_FEATURES.out.idxml - if (params.search_engines.contains("comet")) { - MSRESCORE_FEATURES(ch_id_comet.combine(ch_mzmls_search, by: 0).combine(ms2_model_dir)) - ch_versions = ch_versions.mix(MSRESCORE_FEATURES.out.versions) - ch_id_files_comet_feats = ch_id_files_comet_feats.mix(MSRESCORE_FEATURES.out.idxml) - } - if (params.search_engines.contains("sage")) { - MSRESCORE_FEATURES(ch_id_sage.combine(ch_mzmls_search, by: 0).combine(ms2_model_dir)) - ch_versions = ch_versions.mix(MSRESCORE_FEATURES.out.versions) - ch_id_files_sage_feats = ch_id_files_sage_feats.mix(MSRESCORE_FEATURES.out.idxml) } + } else{ + ch_id_msgf.combine(ch_mzmls_search, by: 0) + .combine(ms2_model_dir) + .combine(Channel.value("msgf")).set(ch_id_msgf) + ch_id_comet.combine(ch_mzmls_search, by: 0) + .combine(ms2_model_dir) + .combine(Channel.value("comet")).set(ch_id_comet) + ch_id_sage.combine(ch_mzmls_search, by: 0) + .combine(ms2_model_dir) + .combine(Channel.value("sage")).set(ch_id_sage) + + MSRESCORE_FEATURES(ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage)) + ch_versions = ch_versions.mix(MSRESCORE_FEATURES.out.versions) + ch_id_files_feats = MSRESCORE_FEATURES.out.idxml } - } else { - ch_id_files_msgf_feats = ch_id_msgf - ch_id_files_comet_feats = ch_id_comet - ch_id_files_sage_feats = ch_id_sage - } - - // Add SNR features to percolator - if (params.ms2features_snr) { - if (params.search_engines.contains("msgf")) { - SPECTRUM_FEATURES(ch_id_files_msgf_feats.combine(ch_mzmls_search, by: 0)) - ch_id_files_msgf_feats = SPECTRUM_FEATURES.out.id_files_snr + // Add SNR features to percolator + if (params.ms2features_snr) { + SPECTRUM_FEATURES(ch_id_files_feats.combine(ch_mzmls_search, by: 0)) + ch_id_files_feats_snr = SPECTRUM_FEATURES.out.id_files_snr ch_versions = ch_versions.mix(SPECTRUM_FEATURES.out.versions) + } else { + ch_id_files_feats_snr = ch_id_files_feats } - if (params.search_engines.contains("comet")) { - SPECTRUM_FEATURES(ch_id_files_comet_feats.combine(ch_mzmls_search, by: 0)) - ch_id_files_comet_feats = SPECTRUM_FEATURES.out.id_files_snr - ch_versions = ch_versions.mix(SPECTRUM_FEATURES.out.versions) - } + ch_id_files_feats_snr + .branch { meta, file_name, engine_name -> + msgf: engine_name == "msgf" + comet: engine_name == "comet" + sage: engine_name == "sage" + } + .set {ch_id_files_feats_branch} + ch_id_files_msgf_feats = ch_id_files_feats_branch.msgf.map {it -> [it[0], it[1]]} + ch_id_files_comet_feats = ch_id_files_feats_branch.comet.map {it -> [it[0], it[1]]} + ch_id_files_sage_feats = ch_id_files_feats_branch.sage.map {it -> [it[0], it[1]]} - if (params.search_engines.contains("sage")) { - SPECTRUM_FEATURES(ch_id_files_sage_feats.combine(ch_mzmls_search, by: 0)) - ch_id_files_sage_feats = SPECTRUM_FEATURES.out.id_files_snr - ch_versions = ch_versions.mix(SPECTRUM_FEATURES.out.versions) - } + } else { + ch_id_files_msgf_feats = ch_id_msgf + ch_id_files_comet_feats = ch_id_comet + ch_id_files_sage_feats = ch_id_sage } if (params.ms2features_range == "by_sample") { From c11e0316666c2cd45c029a3f06f1c947f8332660 Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Sat, 3 Jan 2026 20:20:31 +0800 Subject: [PATCH 20/22] fixed --- modules/local/utils/spectrum_features/main.nf | 2 +- subworkflows/local/peptide_database_search/main.nf | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/local/utils/spectrum_features/main.nf b/modules/local/utils/spectrum_features/main.nf index 3d55840c..83cc150e 100644 --- a/modules/local/utils/spectrum_features/main.nf +++ b/modules/local/utils/spectrum_features/main.nf @@ -7,7 +7,7 @@ process SPECTRUM_FEATURES { 'ghcr.io/bigbio/quantms-rescoring:0.0.13' }" input: - tuple val(meta), path(id_file), path(ms_file), val(search_engine) + tuple val(meta), path(id_file), val(search_engine), path(ms_file) output: tuple val(meta), path("${id_file.baseName}_snr.idXML"), val(search_engine), emit: id_files_snr diff --git a/subworkflows/local/peptide_database_search/main.nf b/subworkflows/local/peptide_database_search/main.nf index e0663f32..283c39dc 100644 --- a/subworkflows/local/peptide_database_search/main.nf +++ b/subworkflows/local/peptide_database_search/main.nf @@ -169,9 +169,9 @@ workflow PEPTIDE_DATABASE_SEARCH { sage: engine_name == "sage" } .set {ch_id_files_feats_branch} - ch_id_files_msgf_feats = ch_id_files_feats_branch.msgf.map {it -> [it[0], it[1]]} - ch_id_files_comet_feats = ch_id_files_feats_branch.comet.map {it -> [it[0], it[1]]} - ch_id_files_sage_feats = ch_id_files_feats_branch.sage.map {it -> [it[0], it[1]]} + ch_id_files_feats_branch.msgf.map {it -> [it[0], it[1]]}.set {ch_id_files_msgf_feats} + ch_id_files_feats_branch.comet.map {it -> [it[0], it[1]]}.set {ch_id_files_comet_feats} + ch_id_files_feats_branch.sage.map {it -> [it[0], it[1]]}.set {ch_id_files_sage_feats} } else { ch_id_files_msgf_feats = ch_id_msgf From d8ed06f961721f417db8f21b58602191baac437e Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Sat, 3 Jan 2026 20:55:02 +0800 Subject: [PATCH 21/22] Update main.nf --- subworkflows/local/peptide_database_search/main.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/peptide_database_search/main.nf b/subworkflows/local/peptide_database_search/main.nf index 283c39dc..ede6aead 100644 --- a/subworkflows/local/peptide_database_search/main.nf +++ b/subworkflows/local/peptide_database_search/main.nf @@ -140,13 +140,13 @@ workflow PEPTIDE_DATABASE_SEARCH { } else{ ch_id_msgf.combine(ch_mzmls_search, by: 0) .combine(ms2_model_dir) - .combine(Channel.value("msgf")).set(ch_id_msgf) + .combine(Channel.value("msgf")).set{ ch_id_msgf } ch_id_comet.combine(ch_mzmls_search, by: 0) .combine(ms2_model_dir) - .combine(Channel.value("comet")).set(ch_id_comet) + .combine(Channel.value("comet")).set{ ch_id_comet } ch_id_sage.combine(ch_mzmls_search, by: 0) .combine(ms2_model_dir) - .combine(Channel.value("sage")).set(ch_id_sage) + .combine(Channel.value("sage")).set{ ch_id_sage } MSRESCORE_FEATURES(ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage)) ch_versions = ch_versions.mix(MSRESCORE_FEATURES.out.versions) From 075a534790d4d3fb3553723341982dac9e67963d Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Sun, 4 Jan 2026 19:16:56 +0800 Subject: [PATCH 22/22] refine --- subworkflows/local/dda_id/main.nf | 3 ++- subworkflows/local/id/main.nf | 3 ++- subworkflows/local/peptide_database_search/main.nf | 7 +++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/dda_id/main.nf b/subworkflows/local/dda_id/main.nf index 0599e802..cfbb05cd 100644 --- a/subworkflows/local/dda_id/main.nf +++ b/subworkflows/local/dda_id/main.nf @@ -29,7 +29,8 @@ workflow DDA_ID { // PEPTIDE_DATABASE_SEARCH ( ch_file_preparation_results, - ch_database_wdecoy + ch_database_wdecoy, + ch_expdesign ) ch_software_versions = ch_software_versions.mix(PEPTIDE_DATABASE_SEARCH.out.versions) ch_id_files_feats = PEPTIDE_DATABASE_SEARCH.out.ch_id_files_idx diff --git a/subworkflows/local/id/main.nf b/subworkflows/local/id/main.nf index 690363e3..87acc62b 100644 --- a/subworkflows/local/id/main.nf +++ b/subworkflows/local/id/main.nf @@ -26,7 +26,8 @@ workflow ID { // PEPTIDE_DATABASE_SEARCH ( ch_file_preparation_results, - ch_database_wdecoy + ch_database_wdecoy, + ch_expdesign ) ch_software_versions = ch_software_versions.mix(PEPTIDE_DATABASE_SEARCH.out.versions) diff --git a/subworkflows/local/peptide_database_search/main.nf b/subworkflows/local/peptide_database_search/main.nf index ede6aead..ce7a17d1 100644 --- a/subworkflows/local/peptide_database_search/main.nf +++ b/subworkflows/local/peptide_database_search/main.nf @@ -14,6 +14,7 @@ workflow PEPTIDE_DATABASE_SEARCH { take: ch_mzmls_search ch_searchengine_in_db + ch_expdesign main: (ch_id_msgf, ch_id_comet, ch_id_sage, ch_versions) = [ Channel.empty(), Channel.empty(), Channel.empty(), Channel.empty() ] @@ -93,18 +94,24 @@ workflow PEPTIDE_DATABASE_SEARCH { // Preparing train datasets and fine tuning MS2 model sage_train_datasets = ch_id_sage .combine(ch_mzmls_search, by: 0) + .toSortedList() + .flatMap() .randomSample(params.fine_tuning_sample_run, 2025) .combine(Channel.value("sage")) .groupTuple(by: 3) msgf_train_datasets = ch_id_msgf .combine(ch_mzmls_search, by: 0) + .toSortedList() + .flatMap() .randomSample(params.fine_tuning_sample_run, 2025) .combine(Channel.value("msgf")) .groupTuple(by: 3) comet_train_datasets = ch_id_comet .combine(ch_mzmls_search, by: 0) + .toSortedList() + .flatMap() .randomSample(params.fine_tuning_sample_run, 2025) .combine(Channel.value("comet")) .groupTuple(by: 3)