From 1f31302d0b173efbc86fe6971909affb03e767c8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 3 Dec 2025 10:47:46 +0000 Subject: [PATCH 1/7] Initial plan From d97716a7497eb327121412f36d81fe9e98fa7b26 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 3 Dec 2025 10:56:11 +0000 Subject: [PATCH 2/7] Add experimental design validation for duplicate combinations Co-authored-by: ypriverol <52113+ypriverol@users.noreply.github.com> --- bin/validate_expdesign.py | 121 ++++++++++++++++++ .../local/expdesign_validator/environment.yml | 7 + modules/local/expdesign_validator/main.nf | 27 ++++ modules/local/expdesign_validator/meta.yml | 38 ++++++ .../local/create_input_channel/main.nf | 12 +- 5 files changed, 203 insertions(+), 2 deletions(-) create mode 100755 bin/validate_expdesign.py create mode 100644 modules/local/expdesign_validator/environment.yml create mode 100644 modules/local/expdesign_validator/main.nf create mode 100644 modules/local/expdesign_validator/meta.yml diff --git a/bin/validate_expdesign.py b/bin/validate_expdesign.py new file mode 100755 index 00000000..64d32031 --- /dev/null +++ b/bin/validate_expdesign.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +""" +Validates OpenMS experimental design files for duplicate (Fraction_Group, Fraction, Label) combinations. + +This script checks the experimental design file generated from SDRF conversion to ensure that +each combination of (Fraction_Group, Fraction, Label) appears only once, which is a requirement +for downstream OpenMS tools like MSstatsConverter and ProteinQuantifier. +""" + +import sys +import argparse +import csv +from collections import defaultdict + + +def validate_expdesign(expdesign_file): + """ + Validate the experimental design file for duplicate combinations. + + Args: + expdesign_file: Path to the OpenMS experimental design TSV file + + Returns: + bool: True if validation passes, False otherwise + """ + print(f"Validating experimental design file: {expdesign_file}") + + # Track combinations and their row numbers + combinations = defaultdict(list) + + # Read the file + try: + with open(expdesign_file, 'r') as f: + reader = csv.DictReader(f, delimiter='\t') + + # Check if required columns exist + required_cols = ['Fraction_Group', 'Fraction', 'Label'] + if not all(col in reader.fieldnames for col in required_cols): + print(f"ERROR: Missing required columns. Expected columns: {required_cols}") + print(f"Found columns: {reader.fieldnames}") + return False + + # Check each row for duplicates + for row_num, row in enumerate(reader, start=2): # Start at 2 because line 1 is header + fraction_group = row.get('Fraction_Group', '') + fraction = row.get('Fraction', '') + label = row.get('Label', '') + + # Create the combination tuple + combination = (fraction_group, fraction, label) + + # Track which rows have this combination + combinations[combination].append({ + 'row': row_num, + 'data': row + }) + + # Check for duplicates + duplicates_found = False + for combination, occurrences in combinations.items(): + if len(occurrences) > 1: + duplicates_found = True + fraction_group, fraction, label = combination + print(f"\nERROR: Duplicate (Fraction_Group={fraction_group}, Fraction={fraction}, Label={label}) combination found!") + print(f"This combination appears {len(occurrences)} times in the following rows:") + + for occurrence in occurrences: + row_num = occurrence['row'] + data = occurrence['data'] + # Get relevant columns for display + sample = data.get('Sample', 'N/A') + spectra = data.get('Spectra_Filepath', data.get('MSRun', 'N/A')) + print(f" - Row {row_num}: Sample={sample}, Spectra={spectra}") + + if duplicates_found: + print("\n" + "="*80) + print("VALIDATION FAILED: Duplicate (Fraction_Group, Fraction, Label) combinations detected!") + print("="*80) + print("\nPlease fix the SDRF file to ensure each (Fraction_Group, Fraction, Label) combination is unique.") + print("Common causes:") + print(" - Duplicate label assignments for the same data file") + print(" - Incorrect fraction or fraction group assignments") + print(" - Copy-paste errors in the SDRF file") + return False + else: + print(f"\nāœ“ Validation passed: All {len(combinations)} (Fraction_Group, Fraction, Label) combinations are unique.") + return True + + except FileNotFoundError: + print(f"ERROR: File not found: {expdesign_file}") + return False + except Exception as e: + print(f"ERROR: Failed to read or parse the file: {e}") + return False + + +def main(): + parser = argparse.ArgumentParser( + description='Validate OpenMS experimental design files for duplicate combinations', + formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument( + '--expdesign', + required=True, + help='Path to the OpenMS experimental design TSV file' + ) + + args = parser.parse_args() + + # Run validation + is_valid = validate_expdesign(args.expdesign) + + # Exit with appropriate code + if is_valid: + sys.exit(0) + else: + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/modules/local/expdesign_validator/environment.yml b/modules/local/expdesign_validator/environment.yml new file mode 100644 index 00000000..2f89a98b --- /dev/null +++ b/modules/local/expdesign_validator/environment.yml @@ -0,0 +1,7 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: expdesign_validator +channels: + - conda-forge + - bioconda +dependencies: + - python=3.9 diff --git a/modules/local/expdesign_validator/main.nf b/modules/local/expdesign_validator/main.nf new file mode 100644 index 00000000..306344ee --- /dev/null +++ b/modules/local/expdesign_validator/main.nf @@ -0,0 +1,27 @@ +process EXPDESIGN_VALIDATOR { + tag "$expdesign.Name" + label 'process_tiny' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.9' : + 'biocontainers/python:3.9' }" + + input: + path expdesign + + output: + path "${expdesign}", emit: ch_validated_expdesign + path "*.log" , emit: log + path "versions.yml", emit: versions + + script: + """ + validate_expdesign.py --expdesign "${expdesign}" 2>&1 | tee validation.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version 2>&1 | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/expdesign_validator/meta.yml b/modules/local/expdesign_validator/meta.yml new file mode 100644 index 00000000..3501117d --- /dev/null +++ b/modules/local/expdesign_validator/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "expdesign_validator" +description: Validates OpenMS experimental design files for duplicate (Fraction_Group, Fraction, Label) combinations +keywords: + - validation + - experimental design + - openms + - proteomics +tools: + - "quantms-utils": + description: "Utility tools for the quantms pipeline" + homepage: "https://github.com/bigbio/quantms-utils" + documentation: "https://github.com/bigbio/quantms-utils" + +input: + - expdesign: + type: file + description: OpenMS experimental design file in TSV format + pattern: "*.tsv" + +output: + - ch_validated_expdesign: + type: file + description: Validated experimental design file + pattern: "*.tsv" + - log: + type: file + description: Validation log file + pattern: "*.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@bigbio" +maintainers: + - "@bigbio" diff --git a/subworkflows/local/create_input_channel/main.nf b/subworkflows/local/create_input_channel/main.nf index aeba504d..7c500fc7 100644 --- a/subworkflows/local/create_input_channel/main.nf +++ b/subworkflows/local/create_input_channel/main.nf @@ -3,6 +3,7 @@ // include { SDRF_PARSING } from '../../../modules/local/sdrf_parsing/main' include { PREPROCESS_EXPDESIGN } from '../../../modules/local/preprocess_expdesign' +include { EXPDESIGN_VALIDATOR } from '../../../modules/local/expdesign_validator/main' @@ -19,14 +20,21 @@ workflow CREATE_INPUT_CHANNEL { ch_versions = ch_versions.mix(SDRF_PARSING.out.versions) ch_config = SDRF_PARSING.out.ch_sdrf_config_file - ch_expdesign = SDRF_PARSING.out.ch_expdesign + // Validate the experimental design for duplicate combinations + EXPDESIGN_VALIDATOR(SDRF_PARSING.out.ch_expdesign) + ch_versions = ch_versions.mix(EXPDESIGN_VALIDATOR.out.versions) + ch_expdesign = EXPDESIGN_VALIDATOR.out.ch_validated_expdesign } else { PREPROCESS_EXPDESIGN(ch_sdrf_or_design) ch_versions = ch_versions.mix(PREPROCESS_EXPDESIGN.out.versions) ch_config = PREPROCESS_EXPDESIGN.out.ch_config - ch_expdesign = PREPROCESS_EXPDESIGN.out.ch_expdesign + + // Validate the experimental design for duplicate combinations + EXPDESIGN_VALIDATOR(PREPROCESS_EXPDESIGN.out.ch_expdesign) + ch_versions = ch_versions.mix(EXPDESIGN_VALIDATOR.out.versions) + ch_expdesign = EXPDESIGN_VALIDATOR.out.ch_validated_expdesign } def Set enzymes = [] From d5bfc6d96575c69e2fa9afaf6dab4dcd66126601 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 3 Dec 2025 10:57:50 +0000 Subject: [PATCH 3/7] Fix code review feedback: use lowercase file property and UTF-8 encoding Co-authored-by: ypriverol <52113+ypriverol@users.noreply.github.com> --- bin/validate_expdesign.py | 2 +- modules/local/expdesign_validator/main.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/validate_expdesign.py b/bin/validate_expdesign.py index 64d32031..8e5f3a52 100755 --- a/bin/validate_expdesign.py +++ b/bin/validate_expdesign.py @@ -30,7 +30,7 @@ def validate_expdesign(expdesign_file): # Read the file try: - with open(expdesign_file, 'r') as f: + with open(expdesign_file, 'r', encoding='utf-8') as f: reader = csv.DictReader(f, delimiter='\t') # Check if required columns exist diff --git a/modules/local/expdesign_validator/main.nf b/modules/local/expdesign_validator/main.nf index 306344ee..ef8be5b6 100644 --- a/modules/local/expdesign_validator/main.nf +++ b/modules/local/expdesign_validator/main.nf @@ -1,5 +1,5 @@ process EXPDESIGN_VALIDATOR { - tag "$expdesign.Name" + tag "$expdesign.name" label 'process_tiny' conda "${moduleDir}/environment.yml" From f3141641dc63866368d134f60e6344f46cf60720 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 3 Dec 2025 11:07:24 +0000 Subject: [PATCH 4/7] Integrate validation into SDRF_PARSING and PREPROCESS_EXPDESIGN processes Co-authored-by: ypriverol <52113+ypriverol@users.noreply.github.com> --- .../local/expdesign_validator/environment.yml | 7 ---- modules/local/expdesign_validator/main.nf | 27 ------------- modules/local/expdesign_validator/meta.yml | 38 ------------------- modules/local/preprocess_expdesign/main.nf | 3 ++ modules/local/sdrf_parsing/main.nf | 3 ++ .../local/create_input_channel/main.nf | 12 +----- 6 files changed, 8 insertions(+), 82 deletions(-) delete mode 100644 modules/local/expdesign_validator/environment.yml delete mode 100644 modules/local/expdesign_validator/main.nf delete mode 100644 modules/local/expdesign_validator/meta.yml diff --git a/modules/local/expdesign_validator/environment.yml b/modules/local/expdesign_validator/environment.yml deleted file mode 100644 index 2f89a98b..00000000 --- a/modules/local/expdesign_validator/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: expdesign_validator -channels: - - conda-forge - - bioconda -dependencies: - - python=3.9 diff --git a/modules/local/expdesign_validator/main.nf b/modules/local/expdesign_validator/main.nf deleted file mode 100644 index ef8be5b6..00000000 --- a/modules/local/expdesign_validator/main.nf +++ /dev/null @@ -1,27 +0,0 @@ -process EXPDESIGN_VALIDATOR { - tag "$expdesign.name" - label 'process_tiny' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.9' : - 'biocontainers/python:3.9' }" - - input: - path expdesign - - output: - path "${expdesign}", emit: ch_validated_expdesign - path "*.log" , emit: log - path "versions.yml", emit: versions - - script: - """ - validate_expdesign.py --expdesign "${expdesign}" 2>&1 | tee validation.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version 2>&1 | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/modules/local/expdesign_validator/meta.yml b/modules/local/expdesign_validator/meta.yml deleted file mode 100644 index 3501117d..00000000 --- a/modules/local/expdesign_validator/meta.yml +++ /dev/null @@ -1,38 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: "expdesign_validator" -description: Validates OpenMS experimental design files for duplicate (Fraction_Group, Fraction, Label) combinations -keywords: - - validation - - experimental design - - openms - - proteomics -tools: - - "quantms-utils": - description: "Utility tools for the quantms pipeline" - homepage: "https://github.com/bigbio/quantms-utils" - documentation: "https://github.com/bigbio/quantms-utils" - -input: - - expdesign: - type: file - description: OpenMS experimental design file in TSV format - pattern: "*.tsv" - -output: - - ch_validated_expdesign: - type: file - description: Validated experimental design file - pattern: "*.tsv" - - log: - type: file - description: Validation log file - pattern: "*.log" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@bigbio" -maintainers: - - "@bigbio" diff --git a/modules/local/preprocess_expdesign/main.nf b/modules/local/preprocess_expdesign/main.nf index 1880fd11..1510527b 100644 --- a/modules/local/preprocess_expdesign/main.nf +++ b/modules/local/preprocess_expdesign/main.nf @@ -27,6 +27,9 @@ process PREPROCESS_EXPDESIGN { # we edit the design here and change the endings. sed 's/.raw\\t/.mzML\\t/I' ${design} > ${design.baseName}_openms_design.tsv + # Validate the experimental design for duplicate combinations + validate_expdesign.py --expdesign ${design.baseName}_openms_design.tsv + # here we extract the filenames and fake an empty config (since the config values will be deduced from the workflow params) a=\$(grep -n '^\$' ${design} | head -n 1 | awk -F ":" '{print \$1}') sed -e ''"\${a}"',\$d' ${design} > ${design.baseName}_config.tsv diff --git a/modules/local/sdrf_parsing/main.nf b/modules/local/sdrf_parsing/main.nf index e379facd..37250cce 100644 --- a/modules/local/sdrf_parsing/main.nf +++ b/modules/local/sdrf_parsing/main.nf @@ -37,6 +37,9 @@ process SDRF_PARSING { mv openms.tsv ${sdrf.baseName}_config.tsv mv experimental_design.tsv ${sdrf.baseName}_openms_design.tsv + # Validate the experimental design for duplicate combinations + validate_expdesign.py --expdesign ${sdrf.baseName}_openms_design.tsv 2>&1 | tee -a ${sdrf.baseName}_parsing.log + cat <<-END_VERSIONS > versions.yml "${task.process}": sdrf-pipelines: \$(parse_sdrf --version 2>/dev/null | awk -F ' ' '{print \$2}') diff --git a/subworkflows/local/create_input_channel/main.nf b/subworkflows/local/create_input_channel/main.nf index 7c500fc7..aeba504d 100644 --- a/subworkflows/local/create_input_channel/main.nf +++ b/subworkflows/local/create_input_channel/main.nf @@ -3,7 +3,6 @@ // include { SDRF_PARSING } from '../../../modules/local/sdrf_parsing/main' include { PREPROCESS_EXPDESIGN } from '../../../modules/local/preprocess_expdesign' -include { EXPDESIGN_VALIDATOR } from '../../../modules/local/expdesign_validator/main' @@ -20,21 +19,14 @@ workflow CREATE_INPUT_CHANNEL { ch_versions = ch_versions.mix(SDRF_PARSING.out.versions) ch_config = SDRF_PARSING.out.ch_sdrf_config_file - // Validate the experimental design for duplicate combinations - EXPDESIGN_VALIDATOR(SDRF_PARSING.out.ch_expdesign) - ch_versions = ch_versions.mix(EXPDESIGN_VALIDATOR.out.versions) - ch_expdesign = EXPDESIGN_VALIDATOR.out.ch_validated_expdesign + ch_expdesign = SDRF_PARSING.out.ch_expdesign } else { PREPROCESS_EXPDESIGN(ch_sdrf_or_design) ch_versions = ch_versions.mix(PREPROCESS_EXPDESIGN.out.versions) ch_config = PREPROCESS_EXPDESIGN.out.ch_config - - // Validate the experimental design for duplicate combinations - EXPDESIGN_VALIDATOR(PREPROCESS_EXPDESIGN.out.ch_expdesign) - ch_versions = ch_versions.mix(EXPDESIGN_VALIDATOR.out.versions) - ch_expdesign = EXPDESIGN_VALIDATOR.out.ch_validated_expdesign + ch_expdesign = PREPROCESS_EXPDESIGN.out.ch_expdesign } def Set enzymes = [] From 18af9f6ef63de7f4a73a8fe4eddcb8fda67b0cbf Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 3 Dec 2025 11:09:34 +0000 Subject: [PATCH 5/7] Add consistent logging for validation in PREPROCESS_EXPDESIGN Co-authored-by: ypriverol <52113+ypriverol@users.noreply.github.com> --- modules/local/preprocess_expdesign/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/preprocess_expdesign/main.nf b/modules/local/preprocess_expdesign/main.nf index 1510527b..05768f85 100644 --- a/modules/local/preprocess_expdesign/main.nf +++ b/modules/local/preprocess_expdesign/main.nf @@ -28,7 +28,7 @@ process PREPROCESS_EXPDESIGN { sed 's/.raw\\t/.mzML\\t/I' ${design} > ${design.baseName}_openms_design.tsv # Validate the experimental design for duplicate combinations - validate_expdesign.py --expdesign ${design.baseName}_openms_design.tsv + validate_expdesign.py --expdesign ${design.baseName}_openms_design.tsv 2>&1 | tee validation.log # here we extract the filenames and fake an empty config (since the config values will be deduced from the workflow params) a=\$(grep -n '^\$' ${design} | head -n 1 | awk -F ":" '{print \$1}') From 1fb6c651e7c8fe348fc772a652667d18485e2754 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 3 Dec 2025 11:33:49 +0000 Subject: [PATCH 6/7] Prepare integration with quantms-utils for validation command Co-authored-by: ypriverol <52113+ypriverol@users.noreply.github.com> --- QUANTMS_UTILS_INTEGRATION.md | 100 +++++++++++++++++++++ modules/local/preprocess_expdesign/main.nf | 4 +- 2 files changed, 102 insertions(+), 2 deletions(-) create mode 100644 QUANTMS_UTILS_INTEGRATION.md diff --git a/QUANTMS_UTILS_INTEGRATION.md b/QUANTMS_UTILS_INTEGRATION.md new file mode 100644 index 00000000..f5385af3 --- /dev/null +++ b/QUANTMS_UTILS_INTEGRATION.md @@ -0,0 +1,100 @@ +# quantms-utils Integration Requirements + +This document describes the validation functionality that needs to be added to the `quantms-utils` library to support the experimental design validation feature in quantms. + +## Required Command + +Add a new command `validateexpdesign` to the `quantmsutilsc` CLI tool in quantms-utils. + +### Command Signature + +```bash +quantmsutilsc validateexpdesign --expdesign +``` + +### Functionality + +The command should validate OpenMS experimental design files for duplicate `(Fraction_Group, Fraction, Label)` combinations. + +#### Input +- `--expdesign`: Path to the OpenMS experimental design TSV file + +#### Validation Logic +1. Read the TSV file with tab delimiter +2. Check for required columns: `Fraction_Group`, `Fraction`, `Label` +3. For each row, extract the combination of `(Fraction_Group, Fraction, Label)` +4. Track all occurrences of each combination with row numbers +5. If any combination appears more than once, report: + - The duplicate combination values + - How many times it appears + - The specific row numbers where it appears + - Sample and Spectra_Filepath/MSRun information for each duplicate row + +#### Output +- **On success**: Print validation success message and exit with code 0 +- **On failure**: Print detailed error messages showing: + - Which combinations are duplicated + - Row numbers for each duplicate + - Sample and file information for debugging + - Exit with code 1 + +#### Example Output (Success) +``` +Validating experimental design file: test_design.tsv + +āœ“ Validation passed: All 24 (Fraction_Group, Fraction, Label) combinations are unique. +``` + +#### Example Output (Failure) +``` +Validating experimental design file: test_design.tsv + +ERROR: Duplicate (Fraction_Group=1, Fraction=1, Label=TMT130N) combination found! +This combination appears 2 times in the following rows: + - Row 2: Sample=TMT130N_mus, Spectra=Margolis_Mouse_Neuronal_TMT_TP_F1.mzML + - Row 4: Sample=TMT130N_mus, Spectra=Margolis_Mouse_Neuronal_TMT_TP_F1.mzML + +================================================================================ +VALIDATION FAILED: Duplicate (Fraction_Group, Fraction, Label) combinations detected! +================================================================================ + +Please fix the SDRF file to ensure each (Fraction_Group, Fraction, Label) combination is unique. +Common causes: + - Duplicate label assignments for the same data file + - Incorrect fraction or fraction group assignments + - Copy-paste errors in the SDRF file +``` + +## Implementation Reference + +The reference implementation is currently in `bin/validate_expdesign.py` in this repository. The core validation logic should be extracted and adapted for quantms-utils: + +### Key Components +1. **Function**: `validate_expdesign(expdesign_file: str) -> bool` +2. **Error handling**: File not found, missing columns, CSV parsing errors +3. **Duplicate detection**: Using combinations as dictionary keys to track occurrences +4. **Detailed reporting**: Row numbers, sample names, file paths for each duplicate + +### Suggested Location in quantms-utils +- Module: `quantmsutils/sdrf/expdesign_validator.py` +- CLI integration: `quantmsutils/commands/validateexpdesign.py` + +## Usage in quantms + +Once the command is available in quantms-utils, it will be called by: +1. **PREPROCESS_EXPDESIGN** process - validates user-provided experimental designs +2. **SDRF_PARSING** process - uses the bin/validate_expdesign.py script (since it uses sdrf-pipelines container) + +## Container Requirements + +The validation command must be available in the `quantms-utils` container: +- Container: `biocontainers/quantms-utils:0.0.25` (or later version) +- The command should be accessible via the `quantmsutilsc` entry point + +## Migration Plan + +1. Add validation function to quantms-utils library +2. Add CLI command to quantmsutilsc +3. Release new version of quantms-utils (e.g., 0.0.25) +4. Update quantms to use the new version +5. Eventually remove bin/validate_expdesign.py from quantms (after ensuring both containers can use quantms-utils) diff --git a/modules/local/preprocess_expdesign/main.nf b/modules/local/preprocess_expdesign/main.nf index 05768f85..3b21bab3 100644 --- a/modules/local/preprocess_expdesign/main.nf +++ b/modules/local/preprocess_expdesign/main.nf @@ -28,7 +28,7 @@ process PREPROCESS_EXPDESIGN { sed 's/.raw\\t/.mzML\\t/I' ${design} > ${design.baseName}_openms_design.tsv # Validate the experimental design for duplicate combinations - validate_expdesign.py --expdesign ${design.baseName}_openms_design.tsv 2>&1 | tee validation.log + quantmsutilsc validateexpdesign --expdesign ${design.baseName}_openms_design.tsv 2>&1 | tee validation.log # here we extract the filenames and fake an empty config (since the config values will be deduced from the workflow params) a=\$(grep -n '^\$' ${design} | head -n 1 | awk -F ":" '{print \$1}') @@ -36,7 +36,7 @@ process PREPROCESS_EXPDESIGN { cat <<-END_VERSIONS > versions.yml "${task.process}": - sdrf-pipelines: \$(parse_sdrf --version 2>/dev/null | awk -F ' ' '{print \$2}') + quantms-utils: \$(pip show quantms-utils | grep "Version" | awk -F ': ' '{print \$2}') END_VERSIONS """ } From 6cedf2fb199c159245d859b87e8a37a0ede1ad70 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 3 Dec 2025 11:44:47 +0000 Subject: [PATCH 7/7] Remove QUANTMS_UTILS_INTEGRATION.md documentation file Co-authored-by: ypriverol <52113+ypriverol@users.noreply.github.com> --- QUANTMS_UTILS_INTEGRATION.md | 100 ----------------------------------- 1 file changed, 100 deletions(-) delete mode 100644 QUANTMS_UTILS_INTEGRATION.md diff --git a/QUANTMS_UTILS_INTEGRATION.md b/QUANTMS_UTILS_INTEGRATION.md deleted file mode 100644 index f5385af3..00000000 --- a/QUANTMS_UTILS_INTEGRATION.md +++ /dev/null @@ -1,100 +0,0 @@ -# quantms-utils Integration Requirements - -This document describes the validation functionality that needs to be added to the `quantms-utils` library to support the experimental design validation feature in quantms. - -## Required Command - -Add a new command `validateexpdesign` to the `quantmsutilsc` CLI tool in quantms-utils. - -### Command Signature - -```bash -quantmsutilsc validateexpdesign --expdesign -``` - -### Functionality - -The command should validate OpenMS experimental design files for duplicate `(Fraction_Group, Fraction, Label)` combinations. - -#### Input -- `--expdesign`: Path to the OpenMS experimental design TSV file - -#### Validation Logic -1. Read the TSV file with tab delimiter -2. Check for required columns: `Fraction_Group`, `Fraction`, `Label` -3. For each row, extract the combination of `(Fraction_Group, Fraction, Label)` -4. Track all occurrences of each combination with row numbers -5. If any combination appears more than once, report: - - The duplicate combination values - - How many times it appears - - The specific row numbers where it appears - - Sample and Spectra_Filepath/MSRun information for each duplicate row - -#### Output -- **On success**: Print validation success message and exit with code 0 -- **On failure**: Print detailed error messages showing: - - Which combinations are duplicated - - Row numbers for each duplicate - - Sample and file information for debugging - - Exit with code 1 - -#### Example Output (Success) -``` -Validating experimental design file: test_design.tsv - -āœ“ Validation passed: All 24 (Fraction_Group, Fraction, Label) combinations are unique. -``` - -#### Example Output (Failure) -``` -Validating experimental design file: test_design.tsv - -ERROR: Duplicate (Fraction_Group=1, Fraction=1, Label=TMT130N) combination found! -This combination appears 2 times in the following rows: - - Row 2: Sample=TMT130N_mus, Spectra=Margolis_Mouse_Neuronal_TMT_TP_F1.mzML - - Row 4: Sample=TMT130N_mus, Spectra=Margolis_Mouse_Neuronal_TMT_TP_F1.mzML - -================================================================================ -VALIDATION FAILED: Duplicate (Fraction_Group, Fraction, Label) combinations detected! -================================================================================ - -Please fix the SDRF file to ensure each (Fraction_Group, Fraction, Label) combination is unique. -Common causes: - - Duplicate label assignments for the same data file - - Incorrect fraction or fraction group assignments - - Copy-paste errors in the SDRF file -``` - -## Implementation Reference - -The reference implementation is currently in `bin/validate_expdesign.py` in this repository. The core validation logic should be extracted and adapted for quantms-utils: - -### Key Components -1. **Function**: `validate_expdesign(expdesign_file: str) -> bool` -2. **Error handling**: File not found, missing columns, CSV parsing errors -3. **Duplicate detection**: Using combinations as dictionary keys to track occurrences -4. **Detailed reporting**: Row numbers, sample names, file paths for each duplicate - -### Suggested Location in quantms-utils -- Module: `quantmsutils/sdrf/expdesign_validator.py` -- CLI integration: `quantmsutils/commands/validateexpdesign.py` - -## Usage in quantms - -Once the command is available in quantms-utils, it will be called by: -1. **PREPROCESS_EXPDESIGN** process - validates user-provided experimental designs -2. **SDRF_PARSING** process - uses the bin/validate_expdesign.py script (since it uses sdrf-pipelines container) - -## Container Requirements - -The validation command must be available in the `quantms-utils` container: -- Container: `biocontainers/quantms-utils:0.0.25` (or later version) -- The command should be accessible via the `quantmsutilsc` entry point - -## Migration Plan - -1. Add validation function to quantms-utils library -2. Add CLI command to quantmsutilsc -3. Release new version of quantms-utils (e.g., 0.0.25) -4. Update quantms to use the new version -5. Eventually remove bin/validate_expdesign.py from quantms (after ensuring both containers can use quantms-utils)