Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions modules/nf-core/cellranger/count/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ process CELLRANGER_COUNT {
input:
tuple val(meta), path(reads, stageAs: "fastq_???/*")
path reference
val skip_renaming
val ignore_filename_pattern
Comment on lines 7 to +11

output:
tuple val(meta), path("**/outs/**"), emit: outs
Expand Down
40 changes: 24 additions & 16 deletions modules/nf-core/cellranger/count/templates/cellranger_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,23 +38,31 @@ def chunk_iter(seq, size):
filename_pattern = r"([^a-zA-Z0-9])R1([^a-zA-Z0-9])"

for i, (r1, r2) in enumerate(chunk_iter(fastqs, 2), start=1):
# double escapes are required because nextflow processes this python 'template'
if re.sub(filename_pattern, r"\\1R2\\2", r1.name) != r2.name:
raise AssertionError(
dedent(
f"""\
We expect R1 and R2 of the same sample to have the same filename except for R1/R2.
This has been checked by replacing "R1" with "R2" in the first filename and comparing it to the second filename.
If you believe this check shouldn't have failed on your filenames, please report an issue on GitHub!

Files involved:
- {r1}
- {r2}
"""
if "${skip_renaming}" == "true": # nf variables are true/false, which are different from Python
resolved_name_r1 = r1.name
resolved_name_r2 = r2.name
else:
# double escapes are required because nextflow processes this python 'template'
if (re.sub(filename_pattern, r"\\1R2\\2", r1.name) != r2.name) and ("${ignore_filename_pattern}" == "false"):
raise AssertionError(
dedent(
f"""\
We expect R1 and R2 of the same sample to have the same filename except for R1/R2.
This has been checked by replacing "R1" with "R2" in the first filename and comparing it to the second filename.
If you believe this check shouldn't have failed on your filenames, please report an issue on GitHub!

Files involved:
- {r1}
- {r2}
Comment on lines +49 to +56
"""
)
)
)
r1.rename(fastq_all / f"{sample_id}_S1_L{i:03d}_R1_001.fastq.gz")
r2.rename(fastq_all / f"{sample_id}_S1_L{i:03d}_R2_001.fastq.gz")

resolved_name_r1 = f"{sample_id}_S1_L{i:03d}_R1_001.fastq.gz"
resolved_name_r2 = f"{sample_id}_S1_L{i:03d}_R2_001.fastq.gz"

r1.rename(fastq_all / resolved_name_r1)
r2.rename(fastq_all / resolved_name_r2)

# fmt: off
run(
Expand Down
81 changes: 81 additions & 0 deletions modules/nf-core/cellranger/count/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ nextflow_process {
]
]
input[1] = CELLRANGER_MKREF.out.reference
input[2] = false
input[3] = false
"""
}
}
Expand Down Expand Up @@ -90,6 +92,8 @@ nextflow_process {
]
]
input[1] = CELLRANGER_MKREF.out.reference
input[2] = false
input[3] = false
"""
}
}
Expand All @@ -102,5 +106,82 @@ nextflow_process {
}

}
test("10x example file - nonstandard filename renaming fails") {
config "./nextflow.config"
tag "cellranger_rename_logic"

when {
process {
"""
original_r1 = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/gex_1/subsampled_5k_human_antiCMV_T_TBNK_connect_GEX_1_S1_L001_R1_001.fastq.gz', checkIfExists: true)
original_r2 = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/gex_1/subsampled_5k_human_antiCMV_T_TBNK_connect_GEX_1_S1_L001_R2_001.fastq.gz', checkIfExists: true)

new_r1_loc = "${workDir}/fastqs/subsampled_5k_human_antiCMV_T_TBNK_connect_GEX_1.fastq.gz"
new_r2_loc = "${workDir}/fastqs/subsampled_5k_human_antiCMV_T_TBNK_connect_GEX_2.fastq.gz"

original_r1.copyTo(new_r1_loc)
original_r2.copyTo(new_r2_loc)

input[0] = [
[ id:'test_10x', single_end:false, strandedness:'auto' ], // meta map
[ file(new_r1_loc), file(new_r2_loc) ]
]
input[1] = CELLRANGER_MKREF.out.reference
input[2] = false
input[3] = false
"""
}
}

then { assert process.failed }
}
test("10x example file - nonstandard filename, rename succeeds with skipped validation") {
config "./nextflow.config"
tag "cellranger_rename_logic"

when {
process {
"""
original_r1 = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/gex_1/subsampled_5k_human_antiCMV_T_TBNK_connect_GEX_1_S1_L001_R1_001.fastq.gz', checkIfExists: true)
original_r2 = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/gex_1/subsampled_5k_human_antiCMV_T_TBNK_connect_GEX_1_S1_L001_R2_001.fastq.gz', checkIfExists: true)

new_r1_loc = "${workDir}/fastqs/subsampled_5k_human_antiCMV_T_TBNK_connect_GEX_1.fastq.gz"
new_r2_loc = "${workDir}/fastqs/subsampled_5k_human_antiCMV_T_TBNK_connect_GEX_2.fastq.gz"

original_r1.copyTo(new_r1_loc)
original_r2.copyTo(new_r2_loc)

input[0] = [
[ id:'test_10x', single_end:false, strandedness:'auto' ], // meta map
[ file(new_r1_loc), file(new_r2_loc) ]
]
input[1] = CELLRANGER_MKREF.out.reference
input[2] = false
input[3] = true
"""
}
}

then {
assertAll(
{ assert process.success },
{
assert snapshot(
process.out.versions_cellranger,
process.out.outs[0][1].findAll { file(it).name !in [
'web_summary.html', // unstable checksum
'possorted_genome_bam.bam.bai', // unstable checksum despite identical content, verified with samtools idxstats
'possorted_genome_bam.bam', // unstable checksum due to header
'barcodes.tsv.gz' // empty file in test data -> would raise linting error
]} + [
"possorted_genome_bam_samlines:" + sam(process.out.outs.get(0).get(1).find { file(it).name == "possorted_genome_bam.bam" }).getSamLinesMD5() // checksum is unstable when header is included
],
).match()
},
{ assert file(process.out.outs.get(0).get(1).find { file(it).name == 'web_summary.html' }).exists() },
{ assert file(process.out.outs.get(0).get(1).find { file(it).name == 'barcodes.tsv.gz' }).exists() },
{ assert file(process.out.outs.get(0).get(1).find { file(it).name == 'possorted_genome_bam.bam.bai' }).exists() },
)
}
}
}
31 changes: 27 additions & 4 deletions modules/nf-core/cellranger/count/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,34 @@
"possorted_genome_bam_samlines:14b70a2253bcaf40b1c6428e5b66f7a0"
]
],
"timestamp": "2026-03-13T11:50:19.775498195",
"meta": {
"nf-test": "0.9.4",
"nextflow": "25.10.4"
}
},
"timestamp": "2026-03-13T11:50:19.775498195"
},
"10x example file - nonstandard filename, rename succeeds with skipped validation": {
"content": [
[
"versions.yml:md5,0d99b1cd733a51d67540d796e6b1e1f6"
],
[
"filtered_feature_bc_matrix.h5:md5,530804f5b2fdc9d262b4b6d32bde8b7b",
"features.tsv.gz:md5,9f93621be0bede2b75596ad255607633",
"matrix.mtx.gz:md5,925a0f46932cba157c44cb94e0c06314",
"metrics_summary.csv:md5,14a544ef1204f4f9d128bc15febb94cf",
"molecule_info.h5:md5,ac30e998aae699f09978be60d548635c",
"raw_feature_bc_matrix.h5:md5,803d3ffa688dccd898c073b300c23ff9",
"features.tsv.gz:md5,9f93621be0bede2b75596ad255607633",
"matrix.mtx.gz:md5,a030e644e8b57df71142e9e26c306b5e",
"possorted_genome_bam_samlines:14b70a2253bcaf40b1c6428e5b66f7a0"
]
],
"meta": {
"nf-test": "0.9.3",
"nextflow": "25.04.8"
Comment on lines +43 to +44
},
"timestamp": "2026-05-14T14:00:50.314716225"
},
"10x example file - stub": {
"content": [
Expand Down Expand Up @@ -53,10 +76,10 @@
]
}
],
"timestamp": "2026-03-11T15:59:25.090371925",
"meta": {
"nf-test": "0.9.4",
"nextflow": "25.10.4"
}
},
"timestamp": "2026-03-11T15:59:25.090371925"
}
}
1 change: 1 addition & 0 deletions modules/nf-core/cellranger/multi/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ process CELLRANGER_MULTI {
path frna_sampleinfo , stageAs: "references/frna/*"
path ocm_barcodes , stageAs: "references/ocm/barcodes/*"
val skip_renaming
val ignore_filename_pattern

Comment on lines 26 to 30
output:
tuple val(meta), path("cellranger_multi_config.csv"), emit: config
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def chunk_iter(seq, size):

else:
# double escapes are required because nextflow processes this python 'template'
if re.sub(filename_pattern, r"\\1R2\\2", r1.name) != r2.name:
if (re.sub(filename_pattern, r"\\1R2\\2", r1.name) != r2.name) and ("${ignore_filename_pattern}" == "false"):
raise AssertionError(
dedent(
f"""\
Expand Down
Loading
Loading