Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions modules/local/utils/msrescore_features/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@ process MSRESCORE_FEATURES {
label 'process_high'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.13' :
'ghcr.io/bigbio/quantms-rescoring:0.0.13' }"
'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.14' :
'ghcr.io/bigbio/quantms-rescoring:0.0.14' }"

input:
tuple val(meta), path(idxml), path(mzml), path(model_weight)
tuple val(meta), path(idxml), path(mzml), path(model_weight), val(search_engine)

output:
tuple val(meta), path("*ms2rescore.idXML") , emit: idxml
tuple val(meta), path("*.html" ) , optional:true, emit: html
path "versions.yml" , emit: versions
path "*.log" , emit: log
tuple val(meta), path("*ms2rescore.idXML"), val(search_engine) , emit: idxml
tuple val(meta), path("*.html" ) , optional:true, emit: html
path "versions.yml" , emit: versions
path "*.log" , emit: log

when:
task.ext.when == null || task.ext.when
Expand All @@ -27,8 +27,10 @@ process MSRESCORE_FEATURES {
// When --ms2features_model_dir is passed with no value, Nextflow may set it to boolean true
if (params.ms2features_fine_tuning) {
ms2_model_dir = '--ms2_model_dir ./'
} else {
} else if (params.ms2features_model_dir && params.ms2features_model_dir != true){
ms2_model_dir = "--ms2_model_dir ${model_weight}"
} else {
ms2_model_dir = "--ms2_model_dir ./"
}

// Determine if using ms2pip or alphapeptdeep based on ms2features_generators
Expand Down
26 changes: 16 additions & 10 deletions modules/local/utils/msrescore_fine_tuning/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,37 @@ process MSRESCORE_FINE_TUNING {
label 'process_high'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.13' :
'ghcr.io/bigbio/quantms-rescoring:0.0.13' }"
'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.14' :
'ghcr.io/bigbio/quantms-rescoring:0.0.14' }"

input:
tuple val(meta), path(idxml), path(mzml), path(ms2_model_dir)
tuple val(meta), path(idxml), path(mzml), val(groupkey), path(ms2_model_dir)

output:
path "retained_ms2.pth" , emit: model_weight
path "versions.yml" , emit: versions
path "*.log" , emit: log
tuple val(groupkey), path("retained_ms2.pth") , emit: model_weight
path "versions.yml" , emit: versions
path "*.log" , emit: log

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.mzml_id}_ms2rescore"
def prefix = task.ext.prefix ?: "${groupkey}_fine_tuning"

// Initialize tolerance variables
def ms2_tolerance = null
def ms2_tolerance_unit = null

// ms2pip only supports Da unit, but alphapeptdeep supports both Da and ppm
ms2_tolerance = meta['fragmentmasstolerance']
ms2_tolerance_unit = meta['fragmentmasstoleranceunit']
ms2_tolerance = meta[0]['fragmentmasstolerance']
ms2_tolerance_unit = meta[0]['fragmentmasstoleranceunit']

if (params.ms2features_model_dir && params.ms2features_model_dir != true) {
ms2_model_dir = ms2_model_dir
} else {
ms2_model_dir = "./"
}

if (params.force_transfer_learning) {
force_transfer_learning = "--force_transfer_learning"
Expand Down Expand Up @@ -56,7 +62,7 @@ process MSRESCORE_FINE_TUNING {
${force_transfer_learning} \\
${consider_modloss} \\
$args \\
2>&1 | tee ${idxml.baseName}_fine_tuning.log
2>&1 | tee ${groupkey}_fine_tuning.log

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
4 changes: 2 additions & 2 deletions modules/local/utils/spectrum_features/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ process SPECTRUM_FEATURES {
'ghcr.io/bigbio/quantms-rescoring:0.0.13' }"

input:
tuple val(meta), path(id_file), path(ms_file)
tuple val(meta), path(id_file), val(search_engine), path(ms_file)

output:
tuple val(meta), path("${id_file.baseName}_snr.idXML"), emit: id_files_snr
tuple val(meta), path("${id_file.baseName}_snr.idXML"), val(search_engine), emit: id_files_snr
path "versions.yml", emit: versions
path "*.log", emit: log

Expand Down
118 changes: 5 additions & 113 deletions subworkflows/local/dda_id/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,8 @@
//
include { CONSENSUSID } from '../../../modules/local/openms/consensusid/main'
include { PERCOLATOR } from '../../../modules/local/openms/percolator/main'
include { ID_MERGER } from '../../../modules/local/openms/id_merger/main'
include { ID_RIPPER } from '../../../modules/local/openms/id_ripper/main'
include { PSM_CONVERSION } from '../../../modules/local/utils/psm_conversion/main'
include { MSRESCORE_FEATURES } from '../../../modules/local/utils/msrescore_features/main'
include { GET_SAMPLE } from '../../../modules/local/utils/extract_sample/main'
include { SPECTRUM_FEATURES } from '../../../modules/local/utils/spectrum_features/main'
include { PSM_CLEAN } from '../../../modules/local/utils/psm_clean/main'
include { MSRESCORE_FINE_TUNING} from '../../../modules/local/utils/msrescore_fine_tuning/main'
include { PHOSPHO_SCORING } from '../phospho_scoring/main'

//
Expand All @@ -35,10 +29,11 @@ workflow DDA_ID {
//
PEPTIDE_DATABASE_SEARCH (
ch_file_preparation_results,
ch_database_wdecoy
ch_database_wdecoy,
ch_expdesign
)
ch_software_versions = ch_software_versions.mix(PEPTIDE_DATABASE_SEARCH.out.versions)
ch_id_files = PEPTIDE_DATABASE_SEARCH.out.ch_id_files_idx
ch_id_files_feats = PEPTIDE_DATABASE_SEARCH.out.ch_id_files_idx

ch_pmultiqc_consensus = Channel.empty()
ch_pmultiqc_ids = Channel.empty()
Expand All @@ -47,116 +42,14 @@ workflow DDA_ID {
// SUBWORKFLOW: Rescoring
//
if (params.skip_rescoring == false) {

if (params.ms2features_enable == true) {

// Only add ms2_model_dir if it's actually set and not empty
// Handle cases where parameter might be empty string, null, boolean true, or whitespace
// When --ms2features_model_dir is passed with no value, Nextflow may set it to boolean true
if (params.ms2features_model_dir && params.ms2features_model_dir != true) {
ms2_model_dir = Channel.from(file(params.ms2features_model_dir, checkIfExists: true))
} else {
ms2_model_dir = Channel.from(file("./"))
}

if (params.ms2features_fine_tuning == true) {
if (params.ms2features_generators.toLowerCase().contains('ms2pip')) {
exit(1, 'Error: Fine tuning only supports AlphaPeptdeep!')
} else {
train_datasets = ch_id_files.combine(ch_file_preparation_results, by: 0).randomSample(params.fine_tuning_sample_run, 2025)
MSRESCORE_FINE_TUNING(train_datasets.collect().combine(ms2_model_dir))
MSRESCORE_FEATURES(ch_id_files.combine(ch_file_preparation_results, by: 0).combine(MSRESCORE_FINE_TUNING.out.model_weight))
ch_software_versions = ch_software_versions.mix(MSRESCORE_FEATURES.out.versions)
ch_id_files_feats = MSRESCORE_FEATURES.out.idxml
}
} else{
MSRESCORE_FEATURES(ch_id_files.combine(ch_file_preparation_results, by: 0).combine(ms2_model_dir))
ch_software_versions = ch_software_versions.mix(MSRESCORE_FEATURES.out.versions)
ch_id_files_feats = MSRESCORE_FEATURES.out.idxml
}

} else {
PSM_CLEAN(ch_id_files.combine(ch_file_preparation_results, by: 0))
ch_id_files_feats = PSM_CLEAN.out.idxml
ch_software_versions = ch_software_versions.mix(PSM_CLEAN.out.versions)
}

// Add SNR features to percolator
if (params.ms2features_snr) {
SPECTRUM_FEATURES(ch_id_files_feats.combine(ch_file_preparation_results, by: 0))
ch_id_files_feats = SPECTRUM_FEATURES.out.id_files_snr
ch_software_versions = ch_software_versions.mix(SPECTRUM_FEATURES.out.versions)
}

// Rescoring for independent run, Sample or whole experiments
if (params.ms2features_range == "independent_run") {
PERCOLATOR(ch_id_files_feats)
ch_software_versions = ch_software_versions.mix(PERCOLATOR.out.versions)
ch_consensus_input = PERCOLATOR.out.id_files_perc
} else if (params.ms2features_range == "by_sample") {
// Sample map
GET_SAMPLE(ch_expdesign)
ch_software_versions = ch_software_versions.mix(GET_SAMPLE.out.versions)

ch_expdesign_sample = GET_SAMPLE.out.ch_expdesign_sample
ch_expdesign_sample.splitCsv(header: true, sep: '\t')
.map { get_sample_map(it) }.set{ sample_map_idv }

ch_id_files_feats.map {[it[0].mzml_id, it[0], it[1]]}
.combine(sample_map_idv, by: 0)
.map {[it[1], it[2], it[3]]}
.set{ch_id_files_feats_sample}

// Group by search_engines and sample
ch_id_files_feats_sample.branch{ meta, filename, sample ->
sage: filename.name.contains('sage')
return [meta, filename, sample]
msgf: filename.name.contains('msgf')
return [meta, filename, sample]
comet: filename.name.contains('comet')
return [meta, filename, sample]
}.set{ch_id_files_feat_branched}

// IDMERGER for samples group
ID_MERGER(ch_id_files_feat_branched.comet.groupTuple(by: 2)
.mix(ch_id_files_feat_branched.msgf.groupTuple(by: 2))
.mix(ch_id_files_feat_branched.sage.groupTuple(by: 2)))
ch_software_versions = ch_software_versions.mix(ID_MERGER.out.versions)

PERCOLATOR(ID_MERGER.out.id_merged)
ch_software_versions = ch_software_versions.mix(PERCOLATOR.out.versions)

// Currently only ID runs on exactly one mzML file are supported in CONSENSUSID. Split idXML by runs
ID_RIPPER(PERCOLATOR.out.id_files_perc)
ch_file_preparation_results.map{[it[0].mzml_id, it[0]]}.set{meta}
ID_RIPPER.out.id_rippers.flatten().map { add_file_prefix (it)}.set{id_rippers}
meta.combine(id_rippers, by: 0)
.map{ [it[1], it[2]]}
.set{ ch_consensus_input }
ch_software_versions = ch_software_versions.mix(ID_RIPPER.out.versions)

} else if (params.ms2features_range == "by_project"){
ch_id_files_feats.map {[it[0].experiment_id, it[0], it[1]]}.set { ch_id_files_feats}

// Split ch_id_files_feats by search_engines
ch_id_files_feats.branch{ experiment_id, meta, filename ->
sage: filename.name.contains('sage')
return [meta, filename, experiment_id]
msgf: filename.name.contains('msgf')
return [meta, filename, experiment_id]
comet: filename.name.contains('comet')
return [meta, filename, experiment_id]
}.set{ch_id_files_feat_branched}

// IDMERGER for whole experiments
ID_MERGER(ch_id_files_feat_branched.comet.groupTuple(by: 2)
.mix(ch_id_files_feat_branched.msgf.groupTuple(by: 2))
.mix(ch_id_files_feat_branched.sage.groupTuple(by: 2)))
ch_software_versions = ch_software_versions.mix(ID_MERGER.out.versions)

PERCOLATOR(ID_MERGER.out.id_merged)
} else {
PERCOLATOR(ch_id_files_feats)
ch_software_versions = ch_software_versions.mix(PERCOLATOR.out.versions)

// Currently only ID runs on exactly one mzML file are supported in CONSENSUSID. Split idXML by runs
ID_RIPPER(PERCOLATOR.out.id_files_perc)
ch_file_preparation_results.map{[it[0].mzml_id, it[0]]}.set{meta}
Expand All @@ -165,7 +58,6 @@ workflow DDA_ID {
.map{ [it[1], it[2]]}
.set{ ch_consensus_input }
ch_software_versions = ch_software_versions.mix(ID_RIPPER.out.versions)

}

ch_rescoring_results = ch_consensus_input
Expand Down
3 changes: 2 additions & 1 deletion subworkflows/local/id/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ workflow ID {
//
PEPTIDE_DATABASE_SEARCH (
ch_file_preparation_results,
ch_database_wdecoy
ch_database_wdecoy,
ch_expdesign
)
ch_software_versions = ch_software_versions.mix(PEPTIDE_DATABASE_SEARCH.out.versions)

Expand Down
Loading
Loading