bigbio · ypriverol · Apr 16, 2026 · Apr 13, 2026 · Apr 13, 2026 · Apr 14, 2026
diff --git a/lib/BlockedFlags.groovy b/lib/BlockedFlags.groovy
@@ -59,6 +59,8 @@ class BlockedFlags {
             '--min-pr-mz', '--max-pr-mz', '--min-fr-mz', '--max-fr-mz',
             '--met-excision', '--light-models',
             '--infin-dia', '--pre-select',
+            // Pipeline-managed when fine-tuning: tuned model files passed as process inputs
+            '--tokens', '--rt-model', '--im-model', '--fr-model',
         ],
         PRELIMINARY_ANALYSIS: [
             // Pipeline-managed: set from params and SDRF calibration metadata
@@ -104,6 +106,11 @@ class BlockedFlags {
             '--channel-run-norm', '--channel-spec-norm',
             '--no-prot-inf',
         ],
+        FINE_TUNE_MODELS: [
+            // Pipeline-managed: tuning flags set from enable_fine_tuning, tune_fr, tune_lr params
+            '--tune-lib', '--tune-rt', '--tune-im', '--tune-fr', '--tune-lr',
+            '--tune-restrict-layers', '--tune-level',
+        ],
     ]
 
     /**

diff --git a/mkdocs/images/social-card.png b/mkdocs/images/social-card.png
diff --git a/modules/local/diann/fine_tune_models/main.nf b/modules/local/diann/fine_tune_models/main.nf
@@ -0,0 +1,56 @@
+process FINE_TUNE_MODELS {
+    tag "fine_tune"
+    label 'process_medium'
+    label 'diann'
+
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://containers.biocontainers.pro/s3/SingImgsRepo/diann/v1.8.1_cv1/diann_v1.8.1_cv1.img' :
+        'docker.io/biocontainers/diann:v1.8.1_cv1' }"
+
+    input:
+    path(tune_lib)
+    path(fasta)
+    path(diann_config)
+
+    output:
+    path "*.dict.txt", emit: tokens
+    path "*.rt.d0.pt", emit: rt_model
+    path "*.im.d0.pt", emit: im_model, optional: true
+    path "*.fr.d0.pt", emit: fr_model, optional: true
+    path "fine_tune.log", emit: log
+    path "versions.yml", emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    // Strip flags managed by the pipeline from extra_args to prevent silent conflicts.
+    // Blocked flags are defined centrally in lib/BlockedFlags.groovy — edit there, not here.
+    args = BlockedFlags.strip('FINE_TUNE_MODELS', args, log)
+
+    tune_fr = params.tune_fr ? '--tune-fr' : ''
+    tune_lr = params.tune_lr ? "--tune-lr ${params.tune_lr}" : ''
+
+    // Extract mod flags from diann_config.cfg so DIA-NN recognises modifications in the library
+    """
+    mod_flags=\$(grep -oP '(--var-mod\\s+\\S+|--fixed-mod\\s+\\S+|--monitor-mod\\s+\\S+|--lib-fixed-mod\\s+\\S+|--original-mods|--channels\\s+.+)' ${diann_config} | tr '\\n' ' ')
+
+    diann   --tune-lib ${tune_lib} \\
+            --tune-rt \\
+            --tune-im \\
+            ${tune_fr} \\
+            ${tune_lr} \\
+            --fasta ${fasta} \\
+            --threads ${task.cpus} \\
+            --verbose $params.debug_level \\
+            \${mod_flags} \\
+            $args \\
+            2>&1 | tee fine_tune.log
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        DIA-NN: \$(diann 2>&1 | grep "DIA-NN" | grep -oP "\\d+\\.\\d+(\\.\\w+)*(\\.[\\d]+)?")
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/diann/fine_tune_models/meta.yml b/modules/local/diann/fine_tune_models/meta.yml
@@ -0,0 +1,53 @@
+name: fine_tune_models
+description: Fine-tune DIA-NN deep learning prediction models (RT, IM, fragmentation) using an empirical spectral library.
+keywords:
+  - DIA-NN
+  - fine-tuning
+  - deep learning
+  - prediction models
+tools:
+  - DIA-NN:
+      description: |
+        DIA-NN - a universal software for data-independent acquisition (DIA) proteomics data processing by Demichev.
+      homepage: https://github.com/vdemichev/DiaNN
+      documentation: https://github.com/vdemichev/DiaNN
+input:
+  - tune_lib:
+      type: file
+      description: Empirical spectral library to use for fine-tuning (typically from ASSEMBLE_EMPIRICAL_LIBRARY)
+      pattern: "*.{parquet,tsv}"
+  - fasta:
+      type: file
+      description: Protein sequence database (needed to resolve modification declarations)
+      pattern: "*.{fasta,fa}"
+  - diann_config:
+      type: file
+      description: DIA-NN config file with modification declarations
+      pattern: "*.cfg"
+output:
+  - tokens:
+      type: file
+      description: Expanded tokenizer dictionary mapping modifications to neural network token IDs
+      pattern: "*.dict.txt"
+  - rt_model:
+      type: file
+      description: Fine-tuned retention time prediction model (distillation level 0)
+      pattern: "*.rt.d0.pt"
+  - im_model:
+      type: file
+      description: Fine-tuned ion mobility prediction model (optional, distillation level 0)
+      pattern: "*.im.d0.pt"
+  - fr_model:
+      type: file
+      description: Fine-tuned fragment ion prediction model (optional, distillation level 0)
+      pattern: "*.fr.d0.pt"
+  - log:
+      type: file
+      description: DIA-NN fine-tuning log
+      pattern: "fine_tune.log"
+  - versions:
+      type: file
+      description: File containing software version
+      pattern: "versions.yml"
+authors:
+  - "@ypriverol"
diff --git a/modules/local/diann/insilico_library_generation/main.nf b/modules/local/diann/insilico_library_generation/main.nf
@@ -12,6 +12,9 @@ process INSILICO_LIBRARY_GENERATION {
     path(fasta)
     path(diann_config)
     val(is_dda)
+    path(tuned_tokens)   // optional: fine-tuned tokenizer dict (pass [] when not used)
+    path(tuned_rt_model) // optional: fine-tuned RT model (pass [] when not used)
+    path(tuned_im_model) // optional: fine-tuned IM model (pass [] when not used)
 
     output:
     path "versions.yml", emit: versions
@@ -37,6 +40,10 @@ process INSILICO_LIBRARY_GENERATION {
                          params.scoring_mode == 'peptidoforms' ? '--peptidoforms' : ''
     diann_dda_flag = is_dda ? "--dda" : ""
     diann_light_models = params.light_models ? "--light-models" : ""
+    // Fine-tuned model flags — only set when tuned model files are provided
+    tuned_tokens_flag = tuned_tokens ? "--tokens ${tuned_tokens}" : ''
+    tuned_rt_flag = tuned_rt_model ? "--rt-model ${tuned_rt_model}" : ''
+    tuned_im_flag = tuned_im_model ? "--im-model ${tuned_im_model}" : ''
     infin_dia_flag = params.enable_infin_dia ? "--infin-dia" : ""
     pre_select_flag = (params.enable_infin_dia && params.pre_select) ? "--pre-select $params.pre_select" : ""
 
@@ -61,6 +68,9 @@ process INSILICO_LIBRARY_GENERATION {
             --gen-spec-lib \\
             ${scoring_mode} \\
             ${diann_light_models} \\
+            ${tuned_tokens_flag} \\
+            ${tuned_rt_flag} \\
+            ${tuned_im_flag} \\
             ${infin_dia_flag} \\
             ${pre_select_flag} \\
             ${met_excision} \\

diff --git a/nextflow.config b/nextflow.config
@@ -60,6 +60,12 @@ params {
     export_quant            = false  // add '--export-quant' for fragment-level parquet export (DIA-NN >= 2.0)
     site_ms1_quant          = false  // add '--site-ms1-quant' for MS1 apex PTM quantification (DIA-NN >= 2.0)
 
+    // DIA-NN: Model fine-tuning (v2.0+)
+    enable_fine_tuning      = false  // Enable model fine-tuning before the main analysis
+    tune_n_files            = 3      // Number of files to use for the tuning search (largest/best quality recommended)
+    tune_fr                 = false  // Also fine-tune the fragmentation model (quality-sensitive)
+    tune_lr                 = null   // Fine-tuning learning rate (default: 0.0005)
+
     // DIA-NN: InfinDIA (experimental, v2.3.0+)
     enable_infin_dia        = false  // Enable InfinDIA for ultra-large search spaces
     pre_select              = null   // --pre-select N precursor limit for InfinDIA

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -500,6 +500,30 @@
                     "fa_icon": "fas fa-crosshairs",
                     "default": false
                 },
+                "enable_fine_tuning": {
+                    "type": "boolean",
+                    "description": "Enable DL model fine-tuning before the main analysis. Runs a tuning search on a file subset, fine-tunes RT/IM models, then runs the full pipeline with tuned models. Requires DIA-NN >= 2.0.",
+                    "fa_icon": "fas fa-brain",
+                    "default": false,
+                    "help_text": "When enabled, Phase 0 runs before the main pipeline: (1) in-silico library generation with default models, (2) preliminary analysis + assembly on a subset of files (controlled by --tune_n_files), (3) fine-tuning RT/IM models on the resulting empirical library, (4) re-generation of the in-silico library with tuned models. The main pipeline then runs from preliminary analysis using the tuned library."
+                },
+                "tune_n_files": {
+                    "type": "integer",
+                    "description": "Number of files to use for the fine-tuning search. Use the largest/best-quality files.",
+                    "fa_icon": "fas fa-layer-group",
+                    "default": 3
+                },
+                "tune_fr": {
+                    "type": "boolean",
+                    "description": "Also fine-tune the fragmentation model (quality-sensitive — verify results vs base model).",
+                    "fa_icon": "fas fa-flask",
+                    "default": false
+                },
+                "tune_lr": {
+                    "type": "number",
+                    "description": "Fine-tuning learning rate (default in DIA-NN: 0.0005). Maps to --tune-lr.",
+                    "fa_icon": "fas fa-sliders-h"
+                },
                 "enable_infin_dia": {
                     "type": "boolean",
                     "description": "Enable InfinDIA for ultra-large search spaces (DIA-NN >= 2.3.0). Experimental.",

diff --git a/workflows/dia.nf b/workflows/dia.nf
@@ -9,8 +9,12 @@
 //
 include { DIANN_MSSTATS               } from '../modules/local/diann/diann_msstats/main'
 include { PRELIMINARY_ANALYSIS        } from '../modules/local/diann/preliminary_analysis/main'
+include { PRELIMINARY_ANALYSIS        as TUNE_PRELIMINARY_ANALYSIS   } from '../modules/local/diann/preliminary_analysis/main'
 include { ASSEMBLE_EMPIRICAL_LIBRARY  } from '../modules/local/diann/assemble_empirical_library/main'
+include { ASSEMBLE_EMPIRICAL_LIBRARY  as TUNE_ASSEMBLE_LIBRARY      } from '../modules/local/diann/assemble_empirical_library/main'
 include { INSILICO_LIBRARY_GENERATION } from '../modules/local/diann/insilico_library_generation/main'
+include { INSILICO_LIBRARY_GENERATION as TUNED_LIBRARY_GENERATION   } from '../modules/local/diann/insilico_library_generation/main'
+include { FINE_TUNE_MODELS            } from '../modules/local/diann/fine_tune_models/main'
 include { INDIVIDUAL_ANALYSIS         } from '../modules/local/diann/individual_analysis/main'
 include { FINAL_QUANTIFICATION        } from '../modules/local/diann/final_quantification/main'
 
@@ -61,6 +65,11 @@ workflow DIA {
         error("${enabled.join(', ')} require DIA-NN >= 2.0. Current version: ${params.diann_version}. Use -profile diann_v2_1_0 or later")
     }
 
+    // Version guard for model fine-tuning
+    if (params.enable_fine_tuning && VersionUtils.versionLessThan(params.diann_version, '2.0')) {
+        error("Model fine-tuning requires DIA-NN >= 2.0. Current version: ${params.diann_version}. Use -profile diann_v2_1_0 or later")
+    }
+
     // Warn about contradictory normalization flags
     if (!params.normalize && (params.channel_run_norm || params.channel_spec_norm)) {
         log.warn "Both --normalize false (adds --no-norm) and channel normalization flags are set. " +
@@ -95,13 +104,88 @@ workflow DIA {
     ch_diann_cfg_val = ch_diann_cfg
 
     //
-    // MODULE: SILICOLIBRARYGENERATION
+    // PHASE 0 (optional): FINE-TUNE DL MODELS
     //
-    if (params.speclib != null && params.speclib.toString() != "") {
-        speclib = channel.from(file(params.speclib, checkIfExists: true))
-    } else {
-        INSILICO_LIBRARY_GENERATION(ch_searchdb, ch_diann_cfg_val, ch_is_dda)
-        speclib = INSILICO_LIBRARY_GENERATION.out.predict_speclib
+    // Per DIA-NN author's recommendation (Vadim Demichev):
+    // 1. Run InfinDIA on a subset of files with RT/IM filtering set to Relaxed
+    // 2. Fine-tune models using the resulting empirical library
+    // 3. Then run the full pipeline from in-silico library generation with tuned models
+    //
+    // The tuned models feed into INSILICO_LIBRARY_GENERATION at the very start.
+    //
+    ch_tuned_tokens = Channel.empty()
+    ch_tuned_rt     = Channel.empty()
+    ch_tuned_im     = Channel.empty()
+
+    if (params.enable_fine_tuning) {
+        // Step 0a: Generate a tuning library via InfinDIA on a subset of files
+        // Use a random subset (or all files if small dataset) for the tuning search
+        tuning_files = ch_file_preparation_results
+            .toSortedList{ a, b -> file(a[1]).getName() <=> file(b[1]).getName() }
+            .flatMap()
+            .take(params.tune_n_files)
+
+        // Run in-silico library generation first (with default models) for the tuning search
+        INSILICO_LIBRARY_GENERATION(ch_searchdb, ch_diann_cfg_val, ch_is_dda, [], [], [])
+        tune_speclib = INSILICO_LIBRARY_GENERATION.out.predict_speclib
+
+        // Run preliminary analysis on the tuning subset to produce .quant files
+        TUNE_PRELIMINARY_ANALYSIS(tuning_files.combine(tune_speclib), ch_diann_cfg_val)
+
+        // Assemble the tuning empirical library from the subset
+        tune_lib_files = tuning_files
+            .map { result -> result[1] }
+            .collect( sort: { a, b -> file(a).getName() <=> file(b).getName() } )
+
+        TUNE_ASSEMBLE_LIBRARY(
+            tune_lib_files,
+            ch_experiment_meta,
+            TUNE_PRELIMINARY_ANALYSIS.out.diann_quant.collect(),
+            tune_speclib,
+            ch_diann_cfg_val
+        )
+        ch_software_versions = ch_software_versions
+            .mix(TUNE_PRELIMINARY_ANALYSIS.out.versions)
+            .mix(TUNE_ASSEMBLE_LIBRARY.out.versions)
+
+        // Step 0b: Fine-tune models on the empirical library
+        FINE_TUNE_MODELS(
+            TUNE_ASSEMBLE_LIBRARY.out.empirical_library,
+            ch_searchdb,
+            ch_diann_cfg_val
+        )
+        ch_software_versions = ch_software_versions
+            .mix(FINE_TUNE_MODELS.out.versions)
+
+        ch_tuned_tokens = FINE_TUNE_MODELS.out.tokens
+        ch_tuned_rt     = FINE_TUNE_MODELS.out.rt_model
+        ch_tuned_im     = FINE_TUNE_MODELS.out.im_model
+
+        // Step 0c: Re-generate in-silico library with tuned models
+        TUNED_LIBRARY_GENERATION(
+            ch_searchdb,
+            ch_diann_cfg_val,
+            ch_is_dda,
+            ch_tuned_tokens,
+            ch_tuned_rt,
+            ch_tuned_im
+        )
+        ch_software_versions = ch_software_versions
+            .mix(TUNED_LIBRARY_GENERATION.out.versions)
+
+        speclib = TUNED_LIBRARY_GENERATION.out.predict_speclib
+    }
+
+    //
+    // MODULE: INSILICO_LIBRARY_GENERATION (standard, when not fine-tuning)
+    //
+    if (!params.enable_fine_tuning) {
+        if (params.speclib != null && params.speclib.toString() != "") {
+            speclib = channel.from(file(params.speclib, checkIfExists: true))
+        } else {
+            INSILICO_LIBRARY_GENERATION(ch_searchdb, ch_diann_cfg_val, ch_is_dda, [], [], [])
+            speclib = INSILICO_LIBRARY_GENERATION.out.predict_speclib
+        }
     }
 
     if (params.skip_preliminary_analysis) {