From f71fa0f7381bbcfe67df1f5dae90c50eb3067e6e Mon Sep 17 00:00:00 2001
From: gp201 <praneethg2001@gmail.com>
Date: Fri, 12 Sep 2025 13:36:05 -0700
Subject: [PATCH 1/4] fix: added assertion to ensure
 retraction_detection_date_utc is only present when is_retracted is True

---
 bin/fetch_sra_metadata.py | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/bin/fetch_sra_metadata.py b/bin/fetch_sra_metadata.py
index 5aeb639..1e3a7f1 100755
--- a/bin/fetch_sra_metadata.py
+++ b/bin/fetch_sra_metadata.py
@@ -318,7 +318,7 @@ def parse_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser(description='Fetch SRA metadata for a BioProject.')
     parser.add_argument('-b', '--bioproject_ids', type=str, required=True, help='BioProject ID to monitor. Multiple IDs should be separated by commas.')
     parser.add_argument('-e', '--email', type=str, required=True, help='Email address for Entrez')
-    parser.add_argument('-m', '--metadata', type=str, required=True, help='Path to old metadata file')
+    parser.add_argument('-m', '--metadata', type=str, required=False, help='Path to old metadata file')
     parser.add_argument('-t', '--trimming_config', type=str, required=False, help='Path to trimming yaml file')
     parser.add_argument('-r', '--check_retracted', action='store_true', help='Check for retracted SRA runs')
     return parser.parse_args()
@@ -347,13 +347,25 @@ def main():
     """
 
     args = parse_args()
-
+    
+    # Resolve metadata path: use provided path or build a sensible default from bioproject ids
+    if args.metadata:
+        metadata_path = args.metadata
+    else:
+        proj_tag = "_".join([bid.strip() for bid in args.bioproject_ids.split(',') if bid.strip()]) or "sra"
+        metadata_path = f"{proj_tag}_metadata.csv"
+    
     # Process BioProject IDs
     bioproject_ids = [bid.strip() for bid in args.bioproject_ids.split(',') if bid.strip()]
     search_term = " OR ".join(f"{bid}[BioProject]" for bid in bioproject_ids)
 
     new_metadata = get_new_srps(search_term, args.email)
-    prev_metadata = pd.read_csv(args.metadata)
+    # Try to read previous metadata from the resolved metadata_path; if missing use empty df
+    try:
+        prev_metadata = pd.read_csv(metadata_path)
+    except (FileNotFoundError, pd.errors.EmptyDataError):
+        print(f"Metadata file {metadata_path} not found or empty, using empty DataFrame.")
+        prev_metadata = pd.DataFrame(columns=FIELDS.keys())
 
     new_sras = new_metadata.loc[~new_metadata['Run'].isin(prev_metadata['Run'])]
     combined_metadata = prev_metadata.copy()
@@ -380,7 +392,7 @@ def main():
             save_columns.append('global_trimming')
 
         new_sras[save_columns].to_csv(
-            args.metadata.replace('.csv', '_to_process.tsv'),
+            metadata_path.replace('.csv', '_to_process.tsv'),
             index=False,
             sep='\t'
         )
@@ -389,6 +401,8 @@ def main():
         # Check for retracted runs and update metadata accordingly
         retracted_runs = check_retracted_runs(prev_metadata, new_metadata)
         combined_metadata['is_retracted'] = combined_metadata['Run'].isin(retracted_runs)
+        # ensure is_retracted is boolean
+        combined_metadata['is_retracted'] = combined_metadata['is_retracted'].astype(bool)
 
         # Ensure the retraction detection date column exists
         if 'retraction_detection_date_utc' not in combined_metadata.columns:
@@ -399,10 +413,14 @@ def main():
         mask = combined_metadata['is_retracted'] & combined_metadata['retraction_detection_date_utc'].isna()
         combined_metadata.loc[mask, 'retraction_detection_date_utc'] = now_str
 
+        # assert that all retracted runs have a detection date and no non-retracted runs have a date
+        assert combined_metadata.loc[combined_metadata['is_retracted'], 'retraction_detection_date_utc'].notna().all()
+        assert combined_metadata.loc[~combined_metadata['is_retracted'], 'retraction_detection_date_utc'].isna().all()
+
     if combined_metadata.equals(prev_metadata):
         print("No new updates found.")
     else:
-        updated_path = args.metadata.replace('.csv', '_updated.csv')
+        updated_path = metadata_path.replace('.csv', '_updated.csv')
         combined_metadata.to_csv(updated_path, index=False)
         print(f"Updated metadata saved to {updated_path}")
 

From 0ea5340f2c8afab5ec98d571eec90adc0bb29e9a Mon Sep 17 00:00:00 2001
From: gp201 <praneethg2001@gmail.com>
Date: Fri, 12 Sep 2025 13:36:23 -0700
Subject: [PATCH 2/4] lint: lint fixes

---
 conf/test.config                          | 22 +++++++++++-----------
 modules/nf-core/sratools/prefetch/main.nf |  2 +-
 workflows/flusra.nf                       | 10 +++-------
 3 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/conf/test.config b/conf/test.config
index 652d716..f673b5e 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -11,22 +11,22 @@
 */
 
 params {
-    config_profile_name        = 'Test profile'
+    config_profile_name = 'Test profile'
     config_profile_description = 'Test dataset to check pipeline function'
 
     // Limit resources so that this can run on GitHub Actions
-    max_cpus   = 2
+    max_cpus = 2
     max_memory = '6.GB'
-    max_time   = '6.h'
+    max_time = '6.h'
 
     // Input data
-    bioproject                  = null
-    samples_to_process          = "${projectDir}/assets/test/input/sra/input.tsv"
-    email                       = "test@test.com"
-    metadata                    = null
+    bioproject = null
+    samples_to_process = "${projectDir}/assets/test/input/sra/input.tsv"
+    email = "test@test.com"
+    metadata = null
     // Reference genome
-    reference                  = "${projectDir}/assets/test/input/ref/reference.fasta"
-    milk_reference             = "${projectDir}/assets/test/input/ref/milk_reference.fasta"
-    milk_barcode               = "${projectDir}/assets/test/input/barcode/barcode.csv"
-    outdir                     = "${projectDir}/testing/output"
+    reference = "${projectDir}/assets/test/input/ref/reference.fasta"
+    milk_reference = "${projectDir}/assets/test/input/ref/milk_reference.fasta"
+    milk_barcode = "${projectDir}/assets/test/input/barcode/barcode.csv"
+    outdir = "${projectDir}/testing/output"
 }
diff --git a/modules/nf-core/sratools/prefetch/main.nf b/modules/nf-core/sratools/prefetch/main.nf
index 29195e5..23e8182 100644
--- a/modules/nf-core/sratools/prefetch/main.nf
+++ b/modules/nf-core/sratools/prefetch/main.nf
@@ -14,7 +14,7 @@ process SRATOOLS_PREFETCH {
     when:
     task.ext.when == null || task.ext.when
 
-    shell:
+    script:
     args = '5 1 100'
     // <num retries> <base delay in seconds> <max delay in seconds>
     template('retry_with_backoff.sh')
diff --git a/workflows/flusra.nf b/workflows/flusra.nf
index 91a9ae5..d2f3c6d 100644
--- a/workflows/flusra.nf
+++ b/workflows/flusra.nf
@@ -25,7 +25,7 @@ workflow FLUSRA {
                 by: 1,
                 failOnDuplicate: true,
             )
-            .map { sra, meta, reads ->
+            .map { _sra, meta, reads ->
                 tuple(meta, reads)
             }
             .set { reads_ch }
@@ -71,14 +71,10 @@ workflow FLUSRA {
         .set { sample_reads_input }
 
     if (!params.fetch_and_pull) {
-        sample_reads_input.samples.filter { it
-            != null }
-            | PROCESS_SRA
+        sample_reads_input.samples.filter { it != null } | PROCESS_SRA
         ch_versions = ch_versions.mix(PROCESS_SRA.out.versions)
 
-        sample_reads_input.milk.filter { it
-            != null }
-            | MILK_FREYJA
+        sample_reads_input.milk.filter { it != null } | MILK_FREYJA
         ch_versions = ch_versions.mix(MILK_FREYJA.out.versions)
     }
 

From 9d8c077890bd44b794329ba253ed702dfc655e86 Mon Sep 17 00:00:00 2001
From: gp201 <praneethg2001@gmail.com>
Date: Fri, 12 Sep 2025 13:36:36 -0700
Subject: [PATCH 3/4] chore: bumped version to 1.5.2

---
 nextflow.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow.config b/nextflow.config
index 1a5a689..f5e7f6b 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -165,7 +165,7 @@ manifest {
     description     = """A pipeline to assemble avian flu genomes"""
     mainScript      = 'main.nf'
     nextflowVersion = '!>=24.10.0'
-    version         = '1.5.1'
+    version         = '1.5.2'
     doi             = ''
 }
 

From 10f6cbd2193963209921d1b9bd2f902fe1afa97b Mon Sep 17 00:00:00 2001
From: gp201 <praneethg2001@gmail.com>
Date: Fri, 12 Sep 2025 13:57:14 -0700
Subject: [PATCH 4/4] fix: change script block to shell in SRATOOLS_PREFETCH
 process

---
 modules/nf-core/sratools/prefetch/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/nf-core/sratools/prefetch/main.nf b/modules/nf-core/sratools/prefetch/main.nf
index 23e8182..29195e5 100644
--- a/modules/nf-core/sratools/prefetch/main.nf
+++ b/modules/nf-core/sratools/prefetch/main.nf
@@ -14,7 +14,7 @@ process SRATOOLS_PREFETCH {
     when:
     task.ext.when == null || task.ext.when
 
-    script:
+    shell:
     args = '5 1 100'
     // <num retries> <base delay in seconds> <max delay in seconds>
     template('retry_with_backoff.sh')