@@ -47,6 +47,7 @@ def backup_fastq(
4747 sample_sheet_fp : Path ,
4848 has_index : bool ,
4949 min_file_size : int ,
50+ allow_check_failures : bool = False ,
5051):
5152
5253 R1 = IlluminaFastq (gzip .open (forward_reads , mode = "rt" ))
@@ -58,25 +59,47 @@ def backup_fastq(
5859 illumina_fastqs = [IlluminaFastq (gzip .open (fp , mode = "rt" )) for fp in RI_fps ]
5960 r1 = illumina_fastqs [0 ]
6061
61- if not all ([ifq .check_fp_vs_content ()[0 ] for ifq in illumina_fastqs ]):
62+ fp_vs_content_results = [ifq .check_fp_vs_content ()[0 ] for ifq in illumina_fastqs ]
63+ if not all (fp_vs_content_results ):
6264 [ifq .check_fp_vs_content (verbose = True ) for ifq in illumina_fastqs ]
63- raise ValueError (
65+ message = (
6466 "The file path and header information don't match" ,
65- [str (ifq ) for ifq in illumina_fastqs if not ifq .check_fp_vs_content ()[0 ]],
67+ [
68+ str (ifq )
69+ for ifq , ok in zip (illumina_fastqs , fp_vs_content_results )
70+ if not ok
71+ ],
6672 )
67- if not all ([ifq .check_file_size (min_file_size ) for ifq in illumina_fastqs ]):
68- raise ValueError (
69- "File seems suspiciously small. Please check if you have the correct file or lower the minimum file size threshold" ,
70- [ifq .check_file_size (min_file_size ) for ifq in illumina_fastqs ],
73+ if allow_check_failures :
74+ warnings .warn (f"{ message [0 ]} : { message [1 ]} " )
75+ else :
76+ raise ValueError (* message )
77+ file_size_results = [ifq .check_file_size (min_file_size ) for ifq in illumina_fastqs ]
78+ if not all (file_size_results ):
79+ message = (
80+ "File seems suspiciously small. Please check if you have the correct file or"
81+ " lower the minimum file size threshold" ,
82+ file_size_results ,
7183 )
84+ if allow_check_failures :
85+ warnings .warn (f"{ message [0 ]} : { message [1 ]} " )
86+ else :
87+ raise ValueError (* message )
7288 if not all ([ifq .check_index_read_exists () for ifq in illumina_fastqs ]):
7389 warnings .warn (
7490 "No barcodes in headers. Were the fastq files generated properly?"
7591 )
7692
7793 # parse the info from the headers in EACH file and check they are consistent within each other
78- if not all ([fastq .is_same_run (illumina_fastqs [0 ]) for fastq in illumina_fastqs ]):
79- raise ValueError ("The files are not from the same run." )
94+ same_run_results = [
95+ fastq .is_same_run (illumina_fastqs [0 ]) for fastq in illumina_fastqs
96+ ]
97+ if not all (same_run_results ):
98+ message = "The files are not from the same run."
99+ if allow_check_failures :
100+ warnings .warn (message )
101+ else :
102+ raise ValueError (message )
80103
81104 ## Archiving steps
82105
@@ -144,13 +167,19 @@ def main(argv=None):
144167 default = DEFAULT_MIN_FILE_SIZE ,
145168 help = "Minimum file size to register in bytes" ,
146169 )
170+ parser .add_argument (
171+ "--allow-check-failures" ,
172+ action = "store_true" ,
173+ help = "Continue archiving even if validation checks fail" ,
174+ )
147175 args = parser .parse_args (argv )
148176 return backup_fastq (
149177 args .forward_reads ,
150178 args .destination_dir ,
151179 args .sample_sheet ,
152180 not args .no_index ,
153181 args .min_file_size ,
182+ args .allow_check_failures ,
154183 )
155184
156185 # maybe also ask for single or double reads
0 commit comments