Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions lib/BlockedFlags.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ class BlockedFlags {
'--min-pr-mz', '--max-pr-mz', '--min-fr-mz', '--max-fr-mz',
'--met-excision', '--light-models',
'--infin-dia', '--pre-select',
// Pipeline-managed when fine-tuning: tuned model files passed as process inputs
'--tokens', '--rt-model', '--im-model', '--fr-model',
],
PRELIMINARY_ANALYSIS: [
// Pipeline-managed: set from params and SDRF calibration metadata
Expand Down Expand Up @@ -104,6 +106,11 @@ class BlockedFlags {
'--channel-run-norm', '--channel-spec-norm',
'--no-prot-inf',
],
FINE_TUNE_MODELS: [
// Pipeline-managed: tuning flags set from enable_fine_tuning, tune_fr, tune_lr params
'--tune-lib', '--tune-rt', '--tune-im', '--tune-fr', '--tune-lr',
'--tune-restrict-layers', '--tune-level',
],
]

/**
Expand Down
Binary file added mkdocs/images/social-card.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
56 changes: 56 additions & 0 deletions modules/local/diann/fine_tune_models/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
process FINE_TUNE_MODELS {
tag "fine_tune"
label 'process_medium'
label 'diann'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://containers.biocontainers.pro/s3/SingImgsRepo/diann/v1.8.1_cv1/diann_v1.8.1_cv1.img' :
'docker.io/biocontainers/diann:v1.8.1_cv1' }"

input:
path(tune_lib)
path(fasta)
path(diann_config)

output:
path "*.dict.txt", emit: tokens
path "*.rt.d0.pt", emit: rt_model
path "*.im.d0.pt", emit: im_model, optional: true
path "*.fr.d0.pt", emit: fr_model, optional: true
path "fine_tune.log", emit: log
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
// Strip flags managed by the pipeline from extra_args to prevent silent conflicts.
// Blocked flags are defined centrally in lib/BlockedFlags.groovy — edit there, not here.
args = BlockedFlags.strip('FINE_TUNE_MODELS', args, log)

tune_fr = params.tune_fr ? '--tune-fr' : ''
tune_lr = params.tune_lr ? "--tune-lr ${params.tune_lr}" : ''

// Extract mod flags from diann_config.cfg so DIA-NN recognises modifications in the library
"""
mod_flags=\$(grep -oP '(--var-mod\\s+\\S+|--fixed-mod\\s+\\S+|--monitor-mod\\s+\\S+|--lib-fixed-mod\\s+\\S+|--original-mods|--channels\\s+.+)' ${diann_config} | tr '\\n' ' ')

diann --tune-lib ${tune_lib} \\
--tune-rt \\
--tune-im \\
${tune_fr} \\
${tune_lr} \\
--fasta ${fasta} \\
--threads ${task.cpus} \\
--verbose $params.debug_level \\
\${mod_flags} \\
$args \\
2>&1 | tee fine_tune.log

cat <<-END_VERSIONS > versions.yml
"${task.process}":
DIA-NN: \$(diann 2>&1 | grep "DIA-NN" | grep -oP "\\d+\\.\\d+(\\.\\w+)*(\\.[\\d]+)?")
END_VERSIONS
"""
}
53 changes: 53 additions & 0 deletions modules/local/diann/fine_tune_models/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: fine_tune_models
description: Fine-tune DIA-NN deep learning prediction models (RT, IM, fragmentation) using an empirical spectral library.
keywords:
- DIA-NN
- fine-tuning
- deep learning
- prediction models
tools:
- DIA-NN:
description: |
DIA-NN - a universal software for data-independent acquisition (DIA) proteomics data processing by Demichev.
homepage: https://github.com/vdemichev/DiaNN
documentation: https://github.com/vdemichev/DiaNN
input:
- tune_lib:
type: file
description: Empirical spectral library to use for fine-tuning (typically from ASSEMBLE_EMPIRICAL_LIBRARY)
pattern: "*.{parquet,tsv}"
- fasta:
type: file
description: Protein sequence database (needed to resolve modification declarations)
pattern: "*.{fasta,fa}"
- diann_config:
type: file
description: DIA-NN config file with modification declarations
pattern: "*.cfg"
output:
- tokens:
type: file
description: Expanded tokenizer dictionary mapping modifications to neural network token IDs
pattern: "*.dict.txt"
- rt_model:
type: file
description: Fine-tuned retention time prediction model (distillation level 0)
pattern: "*.rt.d0.pt"
- im_model:
type: file
description: Fine-tuned ion mobility prediction model (optional, distillation level 0)
pattern: "*.im.d0.pt"
- fr_model:
type: file
description: Fine-tuned fragment ion prediction model (optional, distillation level 0)
pattern: "*.fr.d0.pt"
- log:
type: file
description: DIA-NN fine-tuning log
pattern: "fine_tune.log"
- versions:
type: file
description: File containing software version
pattern: "versions.yml"
authors:
- "@ypriverol"
10 changes: 10 additions & 0 deletions modules/local/diann/insilico_library_generation/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ process INSILICO_LIBRARY_GENERATION {
path(fasta)
path(diann_config)
val(is_dda)
path(tuned_tokens) // optional: fine-tuned tokenizer dict (pass [] when not used)
path(tuned_rt_model) // optional: fine-tuned RT model (pass [] when not used)
path(tuned_im_model) // optional: fine-tuned IM model (pass [] when not used)

output:
path "versions.yml", emit: versions
Expand All @@ -37,6 +40,10 @@ process INSILICO_LIBRARY_GENERATION {
params.scoring_mode == 'peptidoforms' ? '--peptidoforms' : ''
diann_dda_flag = is_dda ? "--dda" : ""
diann_light_models = params.light_models ? "--light-models" : ""
// Fine-tuned model flags — only set when tuned model files are provided
tuned_tokens_flag = tuned_tokens ? "--tokens ${tuned_tokens}" : ''
tuned_rt_flag = tuned_rt_model ? "--rt-model ${tuned_rt_model}" : ''
tuned_im_flag = tuned_im_model ? "--im-model ${tuned_im_model}" : ''
infin_dia_flag = params.enable_infin_dia ? "--infin-dia" : ""
pre_select_flag = (params.enable_infin_dia && params.pre_select) ? "--pre-select $params.pre_select" : ""

Expand All @@ -61,6 +68,9 @@ process INSILICO_LIBRARY_GENERATION {
--gen-spec-lib \\
${scoring_mode} \\
${diann_light_models} \\
${tuned_tokens_flag} \\
${tuned_rt_flag} \\
${tuned_im_flag} \\
${infin_dia_flag} \\
${pre_select_flag} \\
${met_excision} \\
Expand Down
6 changes: 6 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ params {
export_quant = false // add '--export-quant' for fragment-level parquet export (DIA-NN >= 2.0)
site_ms1_quant = false // add '--site-ms1-quant' for MS1 apex PTM quantification (DIA-NN >= 2.0)

// DIA-NN: Model fine-tuning (v2.0+)
enable_fine_tuning = false // Enable model fine-tuning before the main analysis
tune_n_files = 3 // Number of files to use for the tuning search (largest/best quality recommended)
tune_fr = false // Also fine-tune the fragmentation model (quality-sensitive)
tune_lr = null // Fine-tuning learning rate (default: 0.0005)

// DIA-NN: InfinDIA (experimental, v2.3.0+)
enable_infin_dia = false // Enable InfinDIA for ultra-large search spaces
pre_select = null // --pre-select N precursor limit for InfinDIA
Expand Down
24 changes: 24 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,30 @@
"fa_icon": "fas fa-crosshairs",
"default": false
},
"enable_fine_tuning": {
"type": "boolean",
"description": "Enable DL model fine-tuning before the main analysis. Runs a tuning search on a file subset, fine-tunes RT/IM models, then runs the full pipeline with tuned models. Requires DIA-NN >= 2.0.",
"fa_icon": "fas fa-brain",
"default": false,
"help_text": "When enabled, Phase 0 runs before the main pipeline: (1) in-silico library generation with default models, (2) preliminary analysis + assembly on a subset of files (controlled by --tune_n_files), (3) fine-tuning RT/IM models on the resulting empirical library, (4) re-generation of the in-silico library with tuned models. The main pipeline then runs from preliminary analysis using the tuned library."
},
"tune_n_files": {
"type": "integer",
"description": "Number of files to use for the fine-tuning search. Use the largest/best-quality files.",
"fa_icon": "fas fa-layer-group",
"default": 3
},
"tune_fr": {
"type": "boolean",
"description": "Also fine-tune the fragmentation model (quality-sensitive — verify results vs base model).",
"fa_icon": "fas fa-flask",
"default": false
},
"tune_lr": {
"type": "number",
"description": "Fine-tuning learning rate (default in DIA-NN: 0.0005). Maps to --tune-lr.",
"fa_icon": "fas fa-sliders-h"
},
"enable_infin_dia": {
"type": "boolean",
"description": "Enable InfinDIA for ultra-large search spaces (DIA-NN >= 2.3.0). Experimental.",
Expand Down
96 changes: 90 additions & 6 deletions workflows/dia.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,12 @@
//
include { DIANN_MSSTATS } from '../modules/local/diann/diann_msstats/main'
include { PRELIMINARY_ANALYSIS } from '../modules/local/diann/preliminary_analysis/main'
include { PRELIMINARY_ANALYSIS as TUNE_PRELIMINARY_ANALYSIS } from '../modules/local/diann/preliminary_analysis/main'
include { ASSEMBLE_EMPIRICAL_LIBRARY } from '../modules/local/diann/assemble_empirical_library/main'
include { ASSEMBLE_EMPIRICAL_LIBRARY as TUNE_ASSEMBLE_LIBRARY } from '../modules/local/diann/assemble_empirical_library/main'
include { INSILICO_LIBRARY_GENERATION } from '../modules/local/diann/insilico_library_generation/main'
include { INSILICO_LIBRARY_GENERATION as TUNED_LIBRARY_GENERATION } from '../modules/local/diann/insilico_library_generation/main'
include { FINE_TUNE_MODELS } from '../modules/local/diann/fine_tune_models/main'
include { INDIVIDUAL_ANALYSIS } from '../modules/local/diann/individual_analysis/main'
include { FINAL_QUANTIFICATION } from '../modules/local/diann/final_quantification/main'

Expand Down Expand Up @@ -61,6 +65,11 @@ workflow DIA {
error("${enabled.join(', ')} require DIA-NN >= 2.0. Current version: ${params.diann_version}. Use -profile diann_v2_1_0 or later")
}

// Version guard for model fine-tuning
if (params.enable_fine_tuning && VersionUtils.versionLessThan(params.diann_version, '2.0')) {
error("Model fine-tuning requires DIA-NN >= 2.0. Current version: ${params.diann_version}. Use -profile diann_v2_1_0 or later")
}

// Warn about contradictory normalization flags
if (!params.normalize && (params.channel_run_norm || params.channel_spec_norm)) {
log.warn "Both --normalize false (adds --no-norm) and channel normalization flags are set. " +
Expand Down Expand Up @@ -95,13 +104,88 @@ workflow DIA {
ch_diann_cfg_val = ch_diann_cfg

//
// MODULE: SILICOLIBRARYGENERATION
// PHASE 0 (optional): FINE-TUNE DL MODELS
//
if (params.speclib != null && params.speclib.toString() != "") {
speclib = channel.from(file(params.speclib, checkIfExists: true))
} else {
INSILICO_LIBRARY_GENERATION(ch_searchdb, ch_diann_cfg_val, ch_is_dda)
speclib = INSILICO_LIBRARY_GENERATION.out.predict_speclib
// Per DIA-NN author's recommendation (Vadim Demichev):
// 1. Run InfinDIA on a subset of files with RT/IM filtering set to Relaxed
// 2. Fine-tune models using the resulting empirical library
// 3. Then run the full pipeline from in-silico library generation with tuned models
//
// The tuned models feed into INSILICO_LIBRARY_GENERATION at the very start.
//
ch_tuned_tokens = Channel.empty()
ch_tuned_rt = Channel.empty()
ch_tuned_im = Channel.empty()

if (params.enable_fine_tuning) {
// Step 0a: Generate a tuning library via InfinDIA on a subset of files
// Use a random subset (or all files if small dataset) for the tuning search
tuning_files = ch_file_preparation_results
.toSortedList{ a, b -> file(a[1]).getName() <=> file(b[1]).getName() }
.flatMap()
.take(params.tune_n_files)

// Run in-silico library generation first (with default models) for the tuning search
INSILICO_LIBRARY_GENERATION(ch_searchdb, ch_diann_cfg_val, ch_is_dda, [], [], [])
tune_speclib = INSILICO_LIBRARY_GENERATION.out.predict_speclib

// Run preliminary analysis on the tuning subset to produce .quant files
TUNE_PRELIMINARY_ANALYSIS(tuning_files.combine(tune_speclib), ch_diann_cfg_val)

// Assemble the tuning empirical library from the subset
tune_lib_files = tuning_files
.map { result -> result[1] }
.collect( sort: { a, b -> file(a).getName() <=> file(b).getName() } )

TUNE_ASSEMBLE_LIBRARY(
tune_lib_files,
ch_experiment_meta,
TUNE_PRELIMINARY_ANALYSIS.out.diann_quant.collect(),
tune_speclib,
ch_diann_cfg_val
)
ch_software_versions = ch_software_versions
.mix(TUNE_PRELIMINARY_ANALYSIS.out.versions)
.mix(TUNE_ASSEMBLE_LIBRARY.out.versions)

// Step 0b: Fine-tune models on the empirical library
FINE_TUNE_MODELS(
TUNE_ASSEMBLE_LIBRARY.out.empirical_library,
ch_searchdb,
ch_diann_cfg_val
)
ch_software_versions = ch_software_versions
.mix(FINE_TUNE_MODELS.out.versions)

ch_tuned_tokens = FINE_TUNE_MODELS.out.tokens
ch_tuned_rt = FINE_TUNE_MODELS.out.rt_model
ch_tuned_im = FINE_TUNE_MODELS.out.im_model

// Step 0c: Re-generate in-silico library with tuned models
TUNED_LIBRARY_GENERATION(
ch_searchdb,
ch_diann_cfg_val,
ch_is_dda,
ch_tuned_tokens,
ch_tuned_rt,
ch_tuned_im
)
ch_software_versions = ch_software_versions
.mix(TUNED_LIBRARY_GENERATION.out.versions)

speclib = TUNED_LIBRARY_GENERATION.out.predict_speclib
}

//
// MODULE: INSILICO_LIBRARY_GENERATION (standard, when not fine-tuning)
//
if (!params.enable_fine_tuning) {
if (params.speclib != null && params.speclib.toString() != "") {
speclib = channel.from(file(params.speclib, checkIfExists: true))
} else {
INSILICO_LIBRARY_GENERATION(ch_searchdb, ch_diann_cfg_val, ch_is_dda, [], [], [])
speclib = INSILICO_LIBRARY_GENERATION.out.predict_speclib
}
}

if (params.skip_preliminary_analysis) {
Expand Down
Loading