From 3a0eec3a502678a5aaed94ce00477dc47a6eb7ef Mon Sep 17 00:00:00 2001
From: Eleanor Lewis <elewis@bcgsc.ca>
Date: Fri, 17 Apr 2026 15:29:41 -0700
Subject: [PATCH 1/8] add seqqc section and test

---
 pori_python/ipr/content.spec.json |  96 ++++++++
 tests/test_ipr/test_upload.py     | 394 +++++++++++++++++-------------
 2 files changed, 319 insertions(+), 171 deletions(-)

diff --git a/pori_python/ipr/content.spec.json b/pori_python/ipr/content.spec.json
index 5a1793a2..711f9eb5 100644
--- a/pori_python/ipr/content.spec.json
+++ b/pori_python/ipr/content.spec.json
@@ -892,6 +892,102 @@
             "example": "POG",
             "type": "string"
         },
+        "seqQC": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "reads": {
+                        "description": "Number of reads",
+                        "example": "2534M",
+                        "type": [
+                            "string",
+                            "null"
+                        ]
+                    },
+                    "bioQC": {
+                        "description": "Biological QC status",
+                        "example": "passed",
+                        "type": [
+                            "string",
+                            "null"
+                        ]
+                    },
+                    "labQC": {
+                        "description": "Lab QC status",
+                        "example": "passed",
+                        "type": [
+                            "string",
+                            "null"
+                        ]
+                    },
+                    "sample": {
+                        "description": "Sample identifier, e.g. Tumour DNA, Constitutional DNA",
+                        "example": "Tumour DNA",
+                        "type": [
+                            "string",
+                            "null"
+                        ]
+                    },
+                    "library": {
+                        "description": "Library identifier",
+                        "example": "LIB0001",
+                        "type": [
+                            "string",
+                            "null"
+                        ]
+                    },
+                    "coverage": {
+                        "description": "Sequencing coverage",
+                        "example": "80x",
+                        "type": [
+                            "string",
+                            "null"
+                        ]
+                    },
+                    "inputNg": {
+                        "description": "Input amount in nanograms",
+                        "example": "500",
+                        "type": [
+                            "string",
+                            "null"
+                        ]
+                    },
+                    "inputUg": {
+                        "description": "Input amount in micrograms",
+                        "example": "0.5",
+                        "type": [
+                            "string",
+                            "null"
+                        ]
+                    },
+                    "protocol": {
+                        "description": "Sequencing protocol",
+                        "example": "WGS",
+                        "type": [
+                            "string",
+                            "null"
+                        ]
+                    },
+                    "sampleName": {
+                        "description": "Full sample name",
+                        "example": "SAMPLE1-FF-1",
+                        "type": [
+                            "string",
+                            "null"
+                        ]
+                    },
+                    "duplicateReadsPerc": {
+                        "description": "Percentage of duplicate reads",
+                        "example": "12.3",
+                        "type": [
+                            "string",
+                            "null"
+                        ]
+                    }
+                }
+            }
+        },
         "smallMutations": {
             "default": [],
             "items": {
diff --git a/tests/test_ipr/test_upload.py b/tests/test_ipr/test_upload.py
index 2c6fb73c..80bdd4b5 100644
--- a/tests/test_ipr/test_upload.py
+++ b/tests/test_ipr/test_upload.py
@@ -13,100 +13,132 @@
 
 from .constants import EXCLUDE_INTEGRATION_TESTS
 
-EXCLUDE_BCGSC_TESTS = os.environ.get('EXCLUDE_BCGSC_TESTS') == '1'
-EXCLUDE_ONCOKB_TESTS = os.environ.get('EXCLUDE_ONCOKB_TESTS') == '1'
-INCLUDE_UPLOAD_TESTS = os.environ.get('INCLUDE_UPLOAD_TESTS', '0') == '1'
-DELETE_UPLOAD_TEST_REPORTS = os.environ.get('DELETE_UPLOAD_TEST_REPORTS', '1') == '1'
+EXCLUDE_BCGSC_TESTS = os.environ.get("EXCLUDE_BCGSC_TESTS") == "1"
+EXCLUDE_ONCOKB_TESTS = os.environ.get("EXCLUDE_ONCOKB_TESTS") == "1"
+INCLUDE_UPLOAD_TESTS = os.environ.get("INCLUDE_UPLOAD_TESTS", "0") == "1"
+DELETE_UPLOAD_TEST_REPORTS = os.environ.get("DELETE_UPLOAD_TEST_REPORTS", "1") == "1"
 
 
 def get_test_spec():
-    ipr_spec = {'components': {'schemas': {'genesCreate': {'properties': {}}}}}
+    ipr_spec = {"components": {"schemas": {"genesCreate": {"properties": {}}}}}
     ipr_gene_keys = IprGene.__required_keys__ | IprGene.__optional_keys__
     for key in ipr_gene_keys:
-        ipr_spec['components']['schemas']['genesCreate']['properties'][key] = ''
+        ipr_spec["components"]["schemas"]["genesCreate"]["properties"][key] = ""
     return ipr_spec
 
 
 def get_test_file(name: str) -> str:
-    return os.path.join(os.path.dirname(__file__), 'test_data', name)
+    return os.path.join(os.path.dirname(__file__), "test_data", name)
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def loaded_reports(tmp_path_factory) -> Generator:
-    json_file = tmp_path_factory.mktemp('inputs') / 'content.json'
-    async_json_file = tmp_path_factory.mktemp('inputs') / 'async_content.json'
-    patient_id = f'TEST_{str(uuid.uuid4())}'
-    async_patient_id = f'TEST_ASYNC_{str(uuid.uuid4())}'
+    json_file = tmp_path_factory.mktemp("inputs") / "content.json"
+    async_json_file = tmp_path_factory.mktemp("inputs") / "async_content.json"
+    patient_id = f"TEST_{str(uuid.uuid4())}"
+    async_patient_id = f"TEST_ASYNC_{str(uuid.uuid4())}"
     json_contents = {
-        'comparators': [
-            {'analysisRole': 'expression (disease)', 'name': '1'},
-            {'analysisRole': 'expression (primary site)', 'name': '2'},
-            {'analysisRole': 'expression (biopsy site)', 'name': '3'},
+        "comparators": [
+            {"analysisRole": "expression (disease)", "name": "1"},
+            {"analysisRole": "expression (primary site)", "name": "2"},
+            {"analysisRole": "expression (biopsy site)", "name": "3"},
             {
-                'analysisRole': 'expression (internal pancancer cohort)',
-                'name': '4',
+                "analysisRole": "expression (internal pancancer cohort)",
+                "name": "4",
             },
         ],
-        'patientId': patient_id,
-        'project': 'TEST',
-        'sampleInfo': [
+        "patientId": patient_id,
+        "project": "TEST",
+        "sampleInfo": [
             {
-                'sample': 'Constitutional',
-                'biopsySite': 'Normal tissue',
-                'sampleName': 'SAMPLE1-PB',
-                'primarySite': 'Blood-Peripheral',
-                'collectionDate': '11-11-11',
+                "sample": "Constitutional",
+                "biopsySite": "Normal tissue",
+                "sampleName": "SAMPLE1-PB",
+                "primarySite": "Blood-Peripheral",
+                "collectionDate": "11-11-11",
             },
             {
-                'sample': 'Tumour',
-                'pathoTc': '90%',
-                'biopsySite': 'hepatic',
-                'sampleName': 'SAMPLE2-FF-1',
-                'primarySite': 'Vena Cava-Hepatic',
-                'collectionDate': '12-12-12',
+                "sample": "Tumour",
+                "pathoTc": "90%",
+                "biopsySite": "hepatic",
+                "sampleName": "SAMPLE2-FF-1",
+                "primarySite": "Vena Cava-Hepatic",
+                "collectionDate": "12-12-12",
             },
         ],
-        'msi': [
+        "msi": [
             {
-                'score': 1000.0,
-                'kbCategory': 'microsatellite instability',
+                "score": 1000.0,
+                "kbCategory": "microsatellite instability",
             }
         ],
-        'hrd': {
-            'score': 9999.0,
-            'cutoff': 5,
+        "hrd": {
+            "score": 9999.0,
+            "cutoff": 5,
         },
-        'expressionVariants': json.loads(
-            pd.read_csv(get_test_file('expression.short.tab'), sep='\t').to_json(orient='records')
+        "expressionVariants": json.loads(
+            pd.read_csv(get_test_file("expression.short.tab"), sep="\t").to_json(
+                orient="records"
+            )
         ),
-        'smallMutations': json.loads(
-            pd.read_csv(get_test_file('small_mutations.short.tab'), sep='\t').to_json(
-                orient='records'
+        "smallMutations": json.loads(
+            pd.read_csv(get_test_file("small_mutations.short.tab"), sep="\t").to_json(
+                orient="records"
             )
         ),
-        'copyVariants': json.loads(
-            pd.read_csv(get_test_file('copy_variants.short.tab'), sep='\t').to_json(
-                orient='records'
+        "copyVariants": json.loads(
+            pd.read_csv(get_test_file("copy_variants.short.tab"), sep="\t").to_json(
+                orient="records"
             )
         ),
-        'structuralVariants': json.loads(
-            pd.read_csv(get_test_file('fusions.tab'), sep='\t').to_json(orient='records')
+        "structuralVariants": json.loads(
+            pd.read_csv(get_test_file("fusions.tab"), sep="\t").to_json(
+                orient="records"
+            )
         ),
-        'kbDiseaseMatch': 'colorectal cancer',
-        'cosmicSignatures': pd.read_csv(
-            get_test_file('cosmic_variants.tab'), sep='\t'
+        "kbDiseaseMatch": "colorectal cancer",
+        "cosmicSignatures": pd.read_csv(
+            get_test_file("cosmic_variants.tab"), sep="\t"
         ).signature.tolist(),
-        'hlaTypes': json.loads(
-            pd.read_csv(get_test_file('hla_variants.tab'), sep='\t').to_json(orient='records')
+        "hlaTypes": json.loads(
+            pd.read_csv(get_test_file("hla_variants.tab"), sep="\t").to_json(
+                orient="records"
+            )
         ),
-        'images': [
+        "images": [
             {
-                'key': 'cnvLoh.circos',
-                'path': 'test/testData/images/cnvLoh.png',
-                'caption': 'Test adding a caption to an image',
+                "key": "cnvLoh.circos",
+                "path": "test/testData/images/cnvLoh.png",
+                "caption": "Test adding a caption to an image",
             }
         ],
-        'config': 'test config',
+        "seqQC": [
+            {
+                "sample": "Tumour DNA",
+                "reads": "2534M",
+                "library": "LIB0001",
+                "coverage": "80x",
+                "inputNg": "500",
+                "protocol": "WGS",
+                "sampleName": "SAMPLE2-FF-1",
+                "bioQC": "passed",
+                "labQC": "passed",
+                "duplicateReadsPerc": "12.3",
+            },
+            {
+                "sample": "Constitutional DNA",
+                "reads": "1200M",
+                "library": "LIB0002",
+                "coverage": "40x",
+                "inputNg": "300",
+                "protocol": "WGS",
+                "sampleName": "SAMPLE1-PB",
+                "bioQC": "passed",
+                "labQC": "passed",
+                "duplicateReadsPerc": "8.1",
+            },
+        ],
+        "config": "test config",
     }
 
     json_file.write_text(
@@ -116,7 +148,7 @@ def loaded_reports(tmp_path_factory) -> Generator:
         )
     )
 
-    json_contents['patientId'] = async_patient_id
+    json_contents["patientId"] = async_patient_id
     async_json_file.write_text(
         json.dumps(
             json_contents,
@@ -125,46 +157,46 @@ def loaded_reports(tmp_path_factory) -> Generator:
     )
 
     argslist = [
-        'ipr',
-        '--username',
-        os.environ.get('IPR_USER', os.environ['USER']),
-        '--password',
-        os.environ['IPR_PASS'],
-        '--graphkb_username',
-        os.environ.get('GRAPHKB_USER', os.environ.get('IPR_USER', os.environ['USER'])),
-        '--graphkb_password',
-        os.environ.get('GRAPHKB_PASS', os.environ['IPR_PASS']),
-        '--ipr_url',
-        os.environ['IPR_TEST_URL'],
-        '--graphkb_url',
-        os.environ.get('GRAPHKB_URL', False),
-        '--therapeutics',
-        '--allow_partial_matches',
+        "ipr",
+        "--username",
+        os.environ.get("IPR_USER", os.environ["USER"]),
+        "--password",
+        os.environ["IPR_PASS"],
+        "--graphkb_username",
+        os.environ.get("GRAPHKB_USER", os.environ.get("IPR_USER", os.environ["USER"])),
+        "--graphkb_password",
+        os.environ.get("GRAPHKB_PASS", os.environ["IPR_PASS"]),
+        "--ipr_url",
+        os.environ["IPR_TEST_URL"],
+        "--graphkb_url",
+        os.environ.get("GRAPHKB_URL", False),
+        "--therapeutics",
+        "--allow_partial_matches",
     ]
 
     sync_argslist = argslist.copy()
-    sync_argslist.extend(['--content', str(json_file)])
-    with patch.object(sys, 'argv', sync_argslist):
-        with patch.object(IprConnection, 'get_spec', return_value=get_test_spec()):
+    sync_argslist.extend(["--content", str(json_file)])
+    with patch.object(sys, "argv", sync_argslist):
+        with patch.object(IprConnection, "get_spec", return_value=get_test_spec()):
             command_interface()
 
     async_argslist = argslist.copy()
-    async_argslist.extend(['--content', str(async_json_file), '--async_upload'])
-    with patch.object(sys, 'argv', async_argslist):
-        with patch.object(IprConnection, 'get_spec', return_value=get_test_spec()):
+    async_argslist.extend(["--content", str(async_json_file), "--async_upload"])
+    with patch.object(sys, "argv", async_argslist):
+        with patch.object(IprConnection, "get_spec", return_value=get_test_spec()):
             command_interface()
 
     ipr_conn = IprConnection(
-        username=os.environ.get('IPR_USER', os.environ['USER']),
-        password=os.environ['IPR_PASS'],
-        url=os.environ['IPR_TEST_URL'],
+        username=os.environ.get("IPR_USER", os.environ["USER"]),
+        password=os.environ["IPR_PASS"],
+        url=os.environ["IPR_TEST_URL"],
     )
-    loaded_report = ipr_conn.get(uri=f'reports?searchText={patient_id}')
-    async_loaded_report = ipr_conn.get(uri=f'reports?searchText={async_patient_id}')
+    loaded_report = ipr_conn.get(uri=f"reports?searchText={patient_id}")
+    async_loaded_report = ipr_conn.get(uri=f"reports?searchText={async_patient_id}")
 
     loaded_reports_result = {
-        'sync': (patient_id, loaded_report),
-        'async': (async_patient_id, async_loaded_report),
+        "sync": (patient_id, loaded_report),
+        "async": (async_patient_id, async_loaded_report),
     }
     yield loaded_reports_result
     if DELETE_UPLOAD_TEST_REPORTS:
@@ -173,13 +205,13 @@ def loaded_reports(tmp_path_factory) -> Generator:
 
 
 def get_section(loaded_report, section_name):
-    ident = loaded_report[1]['reports'][0]['ident']
+    ident = loaded_report[1]["reports"][0]["ident"]
     ipr_conn = IprConnection(
-        username=os.environ.get('IPR_USER', os.environ['USER']),
-        password=os.environ['IPR_PASS'],
-        url=os.environ['IPR_TEST_URL'],
+        username=os.environ.get("IPR_USER", os.environ["USER"]),
+        password=os.environ["IPR_PASS"],
+        url=os.environ["IPR_TEST_URL"],
     )
-    return ipr_conn.get(uri=f'reports/{ident}/{section_name}')
+    return ipr_conn.get(uri=f"reports/{ident}/{section_name}")
 
 
 def stringify_sorted(obj):
@@ -192,7 +224,7 @@ def stringify_sorted(obj):
         obj.sort()
         return str(obj)
     elif isinstance(obj, dict):
-        for key in ('ident', 'updatedAt', 'createdAt', 'deletedAt'):
+        for key in ("ident", "updatedAt", "createdAt", "deletedAt"):
             obj.pop(key, None)
         keys = obj.keys()
         for key in keys:
@@ -208,135 +240,145 @@ def stringify_sorted(obj):
 
 
 @pytest.mark.skipif(
-    not INCLUDE_UPLOAD_TESTS, reason='excluding tests of upload to live ipr instance'
+    not INCLUDE_UPLOAD_TESTS, reason="excluding tests of upload to live ipr instance"
+)
+@pytest.mark.skipif(
+    EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests"
 )
-@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests')
 class TestCreateReport:
     def test_patient_id_loaded_once(self, loaded_reports) -> None:
-        sync_patient_id = loaded_reports['sync'][0]
-        assert loaded_reports['sync'][1]['total'] == 1
-        assert loaded_reports['sync'][1]['reports'][0]['patientId'] == sync_patient_id
-        async_patient_id = loaded_reports['async'][0]
-        assert loaded_reports['async'][1]['total'] == 1
-        assert loaded_reports['async'][1]['reports'][0]['patientId'] == async_patient_id
+        sync_patient_id = loaded_reports["sync"][0]
+        assert loaded_reports["sync"][1]["total"] == 1
+        assert loaded_reports["sync"][1]["reports"][0]["patientId"] == sync_patient_id
+        async_patient_id = loaded_reports["async"][0]
+        assert loaded_reports["async"][1]["total"] == 1
+        assert loaded_reports["async"][1]["reports"][0]["patientId"] == async_patient_id
 
     def test_expression_variants_loaded(self, loaded_reports) -> None:
-        section = get_section(loaded_reports['sync'], 'expression-variants')
-        kbmatched = [item for item in section if item['kbMatches']]
-        assert 'PTP4A3' in [item['gene']['name'] for item in kbmatched]
-        async_section = get_section(loaded_reports['async'], 'expression-variants')
+        section = get_section(loaded_reports["sync"], "expression-variants")
+        kbmatched = [item for item in section if item["kbMatches"]]
+        assert "PTP4A3" in [item["gene"]["name"] for item in kbmatched]
+        async_section = get_section(loaded_reports["async"], "expression-variants")
         async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section)
         assert async_equals_sync
 
     def test_structural_variants_loaded(self, loaded_reports) -> None:
-        section = get_section(loaded_reports['sync'], 'structural-variants')
-        kbmatched = [item for item in section if item['kbMatches']]
-        assert '(EWSR1,FLI1):fusion(e.7,e.4)' in [item['displayName'] for item in kbmatched]
-        async_section = get_section(loaded_reports['async'], 'structural-variants')
+        section = get_section(loaded_reports["sync"], "structural-variants")
+        kbmatched = [item for item in section if item["kbMatches"]]
+        assert "(EWSR1,FLI1):fusion(e.7,e.4)" in [
+            item["displayName"] for item in kbmatched
+        ]
+        async_section = get_section(loaded_reports["async"], "structural-variants")
         async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section)
         assert async_equals_sync
 
     def test_small_mutations_loaded(self, loaded_reports) -> None:
-        section = get_section(loaded_reports['sync'], 'small-mutations')
-        kbmatched = [item for item in section if item['kbMatches']]
-        assert 'FGFR2:p.R421C' in [item['displayName'] for item in kbmatched]
-        assert 'CDKN2A:p.T18M' in [item['displayName'] for item in kbmatched]
-        async_section = get_section(loaded_reports['async'], 'small-mutations')
+        section = get_section(loaded_reports["sync"], "small-mutations")
+        kbmatched = [item for item in section if item["kbMatches"]]
+        assert "FGFR2:p.R421C" in [item["displayName"] for item in kbmatched]
+        assert "CDKN2A:p.T18M" in [item["displayName"] for item in kbmatched]
+        async_section = get_section(loaded_reports["async"], "small-mutations")
         async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section)
         assert async_equals_sync
 
     def test_copy_variants_loaded(self, loaded_reports) -> None:
-        section = get_section(loaded_reports['sync'], 'copy-variants')
-        kbmatched = [item for item in section if item['kbMatches']]
-        assert ('ERBB2', 'amplification') in [
-            (item['gene']['name'], item['displayName']) for item in kbmatched
+        section = get_section(loaded_reports["sync"], "copy-variants")
+        kbmatched = [item for item in section if item["kbMatches"]]
+        assert ("ERBB2", "amplification") in [
+            (item["gene"]["name"], item["displayName"]) for item in kbmatched
         ]
-        async_section = get_section(loaded_reports['async'], 'copy-variants')
+        async_section = get_section(loaded_reports["async"], "copy-variants")
         async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section)
         assert async_equals_sync
 
     def test_signature_variants_loaded(self, loaded_reports) -> None:
-        section = get_section(loaded_reports['sync'], 'signature-variants')
-        kbmatched = [item for item in section if item['kbMatches']]
+        section = get_section(loaded_reports["sync"], "signature-variants")
+        kbmatched = [item for item in section if item["kbMatches"]]
         # Check for COSMIC signatures
-        assert ('SBS2', 'high signature') in [
-            (item['signatureName'], item['variantTypeName']) for item in kbmatched
+        assert ("SBS2", "high signature") in [
+            (item["signatureName"], item["variantTypeName"]) for item in kbmatched
         ]
         # Check for HRD signature (score 9999 > cutoff 5, so strong signature)
-        assert ('homologous recombination deficiency', 'strong signature') in [
-            (item['signatureName'], item['variantTypeName']) for item in kbmatched
+        assert ("homologous recombination deficiency", "strong signature") in [
+            (item["signatureName"], item["variantTypeName"]) for item in kbmatched
         ]
         # Check for MSI signature
-        assert ('microsatellite instability', 'high signature') in [
-            (item['signatureName'], item['variantTypeName']) for item in kbmatched
+        assert ("microsatellite instability", "high signature") in [
+            (item["signatureName"], item["variantTypeName"]) for item in kbmatched
         ]
-        async_section = get_section(loaded_reports['async'], 'signature-variants')
+        async_section = get_section(loaded_reports["async"], "signature-variants")
         async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section)
         assert async_equals_sync
 
     def test_hrd_score_in_report(self, loaded_reports) -> None:
         """Test that HRD score is present in the loaded report."""
-        report = loaded_reports['sync'][1]['reports'][0]
-        assert 'hrdScore' in report
-        assert report['hrdScore'] == 9999.0
+        report = loaded_reports["sync"][1]["reports"][0]
+        assert "hrdScore" in report
+        assert report["hrdScore"] == 9999.0
 
     def test_kb_matches_loaded(self, loaded_reports) -> None:
-        section = get_section(loaded_reports['sync'], 'kb-matches')
+        section = get_section(loaded_reports["sync"], "kb-matches")
         observed_and_matched = set(
-            [(item['kbVariant'], item['variant']['displayName']) for item in section]
+            [(item["kbVariant"], item["variant"]["displayName"]) for item in section]
         )
         for pair in [
-            ('ERBB2 amplification', 'amplification'),
-            ('FGFR2 mutation', 'FGFR2:p.R421C'),
-            ('PTP4A3 overexpression', 'increased expression'),
-            ('EWSR1 and FLI1 fusion', '(EWSR1,FLI1):fusion(e.7,e.4)'),
-            ('CDKN2A mutation', 'CDKN2A:p.T18M'),
+            ("ERBB2 amplification", "amplification"),
+            ("FGFR2 mutation", "FGFR2:p.R421C"),
+            ("PTP4A3 overexpression", "increased expression"),
+            ("EWSR1 and FLI1 fusion", "(EWSR1,FLI1):fusion(e.7,e.4)"),
+            ("CDKN2A mutation", "CDKN2A:p.T18M"),
         ]:
             assert pair in observed_and_matched
-        async_section = get_section(loaded_reports['async'], 'kb-matches')
+        async_section = get_section(loaded_reports["async"], "kb-matches")
         async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section)
         assert async_equals_sync
 
     def test_therapeutic_targets_loaded(self, loaded_reports) -> None:
-        section = get_section(loaded_reports['sync'], 'therapeutic-targets')
-        therapeutic_target_genes = set([item['gene'] for item in section])
-        for gene in ['CDKN2A', 'ERBB2', 'FGFR2', 'PTP4A3']:
+        section = get_section(loaded_reports["sync"], "therapeutic-targets")
+        therapeutic_target_genes = set([item["gene"] for item in section])
+        for gene in ["CDKN2A", "ERBB2", "FGFR2", "PTP4A3"]:
             assert gene in therapeutic_target_genes
-        async_section = get_section(loaded_reports['async'], 'therapeutic-targets')
+        async_section = get_section(loaded_reports["async"], "therapeutic-targets")
         async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section)
         assert async_equals_sync
 
     def test_genomic_alterations_identified_loaded(self, loaded_reports) -> None:
-        section = get_section(loaded_reports['sync'], 'summary/genomic-alterations-identified')
-        variants = set([item['geneVariant'] for item in section])
+        section = get_section(
+            loaded_reports["sync"], "summary/genomic-alterations-identified"
+        )
+        variants = set([item["geneVariant"] for item in section])
         for variant in [
-            'FGFR2:p.R421C',
-            'PTP4A3 (high_percentile)',
-            'ERBB2 (Amplification)',
-            '(EWSR1,FLI1):fusion(e.7,e.4)',
-            'CDKN2A:p.T18M',
+            "FGFR2:p.R421C",
+            "PTP4A3 (high_percentile)",
+            "ERBB2 (Amplification)",
+            "(EWSR1,FLI1):fusion(e.7,e.4)",
+            "CDKN2A:p.T18M",
         ]:
             assert variant in variants
         async_section = get_section(
-            loaded_reports['async'], 'summary/genomic-alterations-identified'
+            loaded_reports["async"], "summary/genomic-alterations-identified"
         )
         async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section)
         assert async_equals_sync
 
     def test_analyst_comments_loaded(self, loaded_reports) -> None:
-        sync_section = get_section(loaded_reports['sync'], 'summary/analyst-comments')
-        assert sync_section['comments']
-        async_section = get_section(loaded_reports['async'], 'summary/analyst-comments')
-        assert async_section['comments']
-        assert sync_section['comments'] == async_section['comments']
+        sync_section = get_section(loaded_reports["sync"], "summary/analyst-comments")
+        assert sync_section["comments"]
+        async_section = get_section(loaded_reports["async"], "summary/analyst-comments")
+        assert async_section["comments"]
+        assert sync_section["comments"] == async_section["comments"]
 
     def test_sample_info_loaded(self, loaded_reports) -> None:
-        sync_section = get_section(loaded_reports['sync'], 'sample-info')
-        async_section = get_section(loaded_reports['async'], 'sample-info')
-        async_equals_sync = stringify_sorted(sync_section) == stringify_sorted(async_section)
+        sync_section = get_section(loaded_reports["sync"], "sample-info")
+        async_section = get_section(loaded_reports["async"], "sample-info")
+        async_equals_sync = stringify_sorted(sync_section) == stringify_sorted(
+            async_section
+        )
         assert async_equals_sync
 
-    def test_multivariant_multiconditionset_statements_loaded(self, loaded_reports) -> None:
+    def test_multivariant_multiconditionset_statements_loaded(
+        self, loaded_reports
+    ) -> None:
         """
         Checks that multivariant statements and multiple condition sets prepared correctly
         by this package are handled as expected by the api.
@@ -348,31 +390,41 @@ def test_multivariant_multiconditionset_statements_loaded(self, loaded_reports)
         are met.
         This is also a test of multiple condition sets since there are two variants
         in the test data that satisfy one of the conditions (the APC mutation)."""
-        section = get_section(loaded_reports['sync'], 'kb-matches/kb-matched-statements')
-        multivariant_stmts = [item for item in section if item['reference'] == 'pmid:27302369']
+        section = get_section(
+            loaded_reports["sync"], "kb-matches/kb-matched-statements"
+        )
+        multivariant_stmts = [
+            item for item in section if item["reference"] == "pmid:27302369"
+        ]
 
         # if this statement is entered more than once there may be multiple sets of records to
         # check, so to make sure the count checks work, go stmt_id by stmt_id:
-        stmt_ids = list(set([item['kbStatementId'] for item in multivariant_stmts]))
+        stmt_ids = list(set([item["kbStatementId"] for item in multivariant_stmts]))
         for stmt_id in stmt_ids:
-            stmts = [item for item in multivariant_stmts if item['kbStatementId'] == stmt_id]
+            stmts = [
+                item for item in multivariant_stmts if item["kbStatementId"] == stmt_id
+            ]
 
             # we expect three stmts, one for each condition set
             assert len(stmts) == 3
 
             # we expect each condition set to have two kb variants in it
             # we expect the two kb variants to be the same in each stmt
-            assert len(stmts[0]['kbMatches']) == 2
-            assert len(stmts[1]['kbMatches']) == 2
-            kbmatches1 = [item['kbVariant'] for item in stmts[0]['kbMatches']]
-            kbmatches2 = [item['kbVariant'] for item in stmts[1]['kbMatches']]
+            assert len(stmts[0]["kbMatches"]) == 2
+            assert len(stmts[1]["kbMatches"]) == 2
+            kbmatches1 = [item["kbVariant"] for item in stmts[0]["kbMatches"]]
+            kbmatches2 = [item["kbVariant"] for item in stmts[1]["kbMatches"]]
             kbmatches1.sort()
             kbmatches2.sort()
-            assert kbmatches1 == kbmatches2 == ['APC mutation', 'KRAS mutation']
+            assert kbmatches1 == kbmatches2 == ["APC mutation", "KRAS mutation"]
 
             # we expect the two stmts to have different observed variant sets
-            observedVariants1 = [item['variant']['ident'] for item in stmts[0]['kbMatches']]
-            observedVariants2 = [item['variant']['ident'] for item in stmts[1]['kbMatches']]
+            observedVariants1 = [
+                item["variant"]["ident"] for item in stmts[0]["kbMatches"]
+            ]
+            observedVariants2 = [
+                item["variant"]["ident"] for item in stmts[1]["kbMatches"]
+            ]
             observedVariants1.sort()
             observedVariants2.sort()
             assert observedVariants1 != observedVariants2

From 72d6805e1c51e73b627997971b0d07910dc91b39 Mon Sep 17 00:00:00 2001
From: Eleanor Lewis <elewis@bcgsc.ca>
Date: Mon, 20 Apr 2026 11:04:34 -0700
Subject: [PATCH 2/8] commit to save

---
 pori_python/ipr/content.spec.json |  1 +
 tests/test_ipr/test_upload.py     | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/pori_python/ipr/content.spec.json b/pori_python/ipr/content.spec.json
index 711f9eb5..dfbfb324 100644
--- a/pori_python/ipr/content.spec.json
+++ b/pori_python/ipr/content.spec.json
@@ -893,6 +893,7 @@
             "type": "string"
         },
         "seqQC": {
+            "default": [],
             "type": "array",
             "items": {
                 "type": "object",
diff --git a/tests/test_ipr/test_upload.py b/tests/test_ipr/test_upload.py
index 80bdd4b5..f8eaa246 100644
--- a/tests/test_ipr/test_upload.py
+++ b/tests/test_ipr/test_upload.py
@@ -368,6 +368,18 @@ def test_analyst_comments_loaded(self, loaded_reports) -> None:
         assert async_section["comments"]
         assert sync_section["comments"] == async_section["comments"]
 
+    def test_seqqc_loaded(self, loaded_reports) -> None:
+        """Test that seqQC data is present in the loaded report."""
+        sync_report = loaded_reports["sync"][1]["reports"][0]
+        assert "seqQC" in sync_report
+        assert len(sync_report["seqQC"]) == 2
+        samples = [item["sample"] for item in sync_report["seqQC"]]
+        assert "Tumour DNA" in samples
+        assert "Constitutional DNA" in samples
+        async_report = loaded_reports["async"][1]["reports"][0]
+        assert "seqQC" in async_report
+        assert len(async_report["seqQC"]) == 2
+
     def test_sample_info_loaded(self, loaded_reports) -> None:
         sync_section = get_section(loaded_reports["sync"], "sample-info")
         async_section = get_section(loaded_reports["async"], "sample-info")

From 4d0181bdcefe95e363cbbfde486d4b2b6f0ea0d7 Mon Sep 17 00:00:00 2001
From: Eleanor Lewis <elewis@bcgsc.ca>
Date: Mon, 20 Apr 2026 11:08:47 -0700
Subject: [PATCH 3/8] format

---
 tests/test_ipr/test_upload.py | 428 ++++++++++++++++------------------
 1 file changed, 201 insertions(+), 227 deletions(-)

diff --git a/tests/test_ipr/test_upload.py b/tests/test_ipr/test_upload.py
index f8eaa246..06d60eb2 100644
--- a/tests/test_ipr/test_upload.py
+++ b/tests/test_ipr/test_upload.py
@@ -13,132 +13,126 @@
 
 from .constants import EXCLUDE_INTEGRATION_TESTS
 
-EXCLUDE_BCGSC_TESTS = os.environ.get("EXCLUDE_BCGSC_TESTS") == "1"
-EXCLUDE_ONCOKB_TESTS = os.environ.get("EXCLUDE_ONCOKB_TESTS") == "1"
-INCLUDE_UPLOAD_TESTS = os.environ.get("INCLUDE_UPLOAD_TESTS", "0") == "1"
-DELETE_UPLOAD_TEST_REPORTS = os.environ.get("DELETE_UPLOAD_TEST_REPORTS", "1") == "1"
+EXCLUDE_BCGSC_TESTS = os.environ.get('EXCLUDE_BCGSC_TESTS') == '1'
+EXCLUDE_ONCOKB_TESTS = os.environ.get('EXCLUDE_ONCOKB_TESTS') == '1'
+INCLUDE_UPLOAD_TESTS = os.environ.get('INCLUDE_UPLOAD_TESTS', '0') == '1'
+DELETE_UPLOAD_TEST_REPORTS = os.environ.get('DELETE_UPLOAD_TEST_REPORTS', '1') == '1'
 
 
 def get_test_spec():
-    ipr_spec = {"components": {"schemas": {"genesCreate": {"properties": {}}}}}
+    ipr_spec = {'components': {'schemas': {'genesCreate': {'properties': {}}}}}
     ipr_gene_keys = IprGene.__required_keys__ | IprGene.__optional_keys__
     for key in ipr_gene_keys:
-        ipr_spec["components"]["schemas"]["genesCreate"]["properties"][key] = ""
+        ipr_spec['components']['schemas']['genesCreate']['properties'][key] = ''
     return ipr_spec
 
 
 def get_test_file(name: str) -> str:
-    return os.path.join(os.path.dirname(__file__), "test_data", name)
+    return os.path.join(os.path.dirname(__file__), 'test_data', name)
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope='module')
 def loaded_reports(tmp_path_factory) -> Generator:
-    json_file = tmp_path_factory.mktemp("inputs") / "content.json"
-    async_json_file = tmp_path_factory.mktemp("inputs") / "async_content.json"
-    patient_id = f"TEST_{str(uuid.uuid4())}"
-    async_patient_id = f"TEST_ASYNC_{str(uuid.uuid4())}"
+    json_file = tmp_path_factory.mktemp('inputs') / 'content.json'
+    async_json_file = tmp_path_factory.mktemp('inputs') / 'async_content.json'
+    patient_id = f'TEST_{str(uuid.uuid4())}'
+    async_patient_id = f'TEST_ASYNC_{str(uuid.uuid4())}'
     json_contents = {
-        "comparators": [
-            {"analysisRole": "expression (disease)", "name": "1"},
-            {"analysisRole": "expression (primary site)", "name": "2"},
-            {"analysisRole": "expression (biopsy site)", "name": "3"},
+        'comparators': [
+            {'analysisRole': 'expression (disease)', 'name': '1'},
+            {'analysisRole': 'expression (primary site)', 'name': '2'},
+            {'analysisRole': 'expression (biopsy site)', 'name': '3'},
             {
-                "analysisRole": "expression (internal pancancer cohort)",
-                "name": "4",
+                'analysisRole': 'expression (internal pancancer cohort)',
+                'name': '4',
             },
         ],
-        "patientId": patient_id,
-        "project": "TEST",
-        "sampleInfo": [
+        'patientId': patient_id,
+        'project': 'TEST',
+        'sampleInfo': [
             {
-                "sample": "Constitutional",
-                "biopsySite": "Normal tissue",
-                "sampleName": "SAMPLE1-PB",
-                "primarySite": "Blood-Peripheral",
-                "collectionDate": "11-11-11",
+                'sample': 'Constitutional',
+                'biopsySite': 'Normal tissue',
+                'sampleName': 'SAMPLE1-PB',
+                'primarySite': 'Blood-Peripheral',
+                'collectionDate': '11-11-11',
             },
             {
-                "sample": "Tumour",
-                "pathoTc": "90%",
-                "biopsySite": "hepatic",
-                "sampleName": "SAMPLE2-FF-1",
-                "primarySite": "Vena Cava-Hepatic",
-                "collectionDate": "12-12-12",
+                'sample': 'Tumour',
+                'pathoTc': '90%',
+                'biopsySite': 'hepatic',
+                'sampleName': 'SAMPLE2-FF-1',
+                'primarySite': 'Vena Cava-Hepatic',
+                'collectionDate': '12-12-12',
             },
         ],
-        "msi": [
+        'msi': [
             {
-                "score": 1000.0,
-                "kbCategory": "microsatellite instability",
+                'score': 1000.0,
+                'kbCategory': 'microsatellite instability',
             }
         ],
-        "hrd": {
-            "score": 9999.0,
-            "cutoff": 5,
+        'hrd': {
+            'score': 9999.0,
+            'cutoff': 5,
         },
-        "expressionVariants": json.loads(
-            pd.read_csv(get_test_file("expression.short.tab"), sep="\t").to_json(
-                orient="records"
-            )
+        'expressionVariants': json.loads(
+            pd.read_csv(get_test_file('expression.short.tab'), sep='\t').to_json(orient='records')
         ),
-        "smallMutations": json.loads(
-            pd.read_csv(get_test_file("small_mutations.short.tab"), sep="\t").to_json(
-                orient="records"
+        'smallMutations': json.loads(
+            pd.read_csv(get_test_file('small_mutations.short.tab'), sep='\t').to_json(
+                orient='records'
             )
         ),
-        "copyVariants": json.loads(
-            pd.read_csv(get_test_file("copy_variants.short.tab"), sep="\t").to_json(
-                orient="records"
+        'copyVariants': json.loads(
+            pd.read_csv(get_test_file('copy_variants.short.tab'), sep='\t').to_json(
+                orient='records'
             )
         ),
-        "structuralVariants": json.loads(
-            pd.read_csv(get_test_file("fusions.tab"), sep="\t").to_json(
-                orient="records"
-            )
+        'structuralVariants': json.loads(
+            pd.read_csv(get_test_file('fusions.tab'), sep='\t').to_json(orient='records')
         ),
-        "kbDiseaseMatch": "colorectal cancer",
-        "cosmicSignatures": pd.read_csv(
-            get_test_file("cosmic_variants.tab"), sep="\t"
+        'kbDiseaseMatch': 'colorectal cancer',
+        'cosmicSignatures': pd.read_csv(
+            get_test_file('cosmic_variants.tab'), sep='\t'
         ).signature.tolist(),
-        "hlaTypes": json.loads(
-            pd.read_csv(get_test_file("hla_variants.tab"), sep="\t").to_json(
-                orient="records"
-            )
+        'hlaTypes': json.loads(
+            pd.read_csv(get_test_file('hla_variants.tab'), sep='\t').to_json(orient='records')
         ),
-        "images": [
+        'images': [
             {
-                "key": "cnvLoh.circos",
-                "path": "test/testData/images/cnvLoh.png",
-                "caption": "Test adding a caption to an image",
+                'key': 'cnvLoh.circos',
+                'path': 'test/testData/images/cnvLoh.png',
+                'caption': 'Test adding a caption to an image',
             }
         ],
-        "seqQC": [
+        'seqQC': [
             {
-                "sample": "Tumour DNA",
-                "reads": "2534M",
-                "library": "LIB0001",
-                "coverage": "80x",
-                "inputNg": "500",
-                "protocol": "WGS",
-                "sampleName": "SAMPLE2-FF-1",
-                "bioQC": "passed",
-                "labQC": "passed",
-                "duplicateReadsPerc": "12.3",
+                'sample': 'Tumour DNA',
+                'reads': '2534M',
+                'library': 'LIB0001',
+                'coverage': '80x',
+                'inputNg': '500',
+                'protocol': 'WGS',
+                'sampleName': 'SAMPLE2-FF-1',
+                'bioQC': 'passed',
+                'labQC': 'passed',
+                'duplicateReadsPerc': '12.3',
             },
             {
-                "sample": "Constitutional DNA",
-                "reads": "1200M",
-                "library": "LIB0002",
-                "coverage": "40x",
-                "inputNg": "300",
-                "protocol": "WGS",
-                "sampleName": "SAMPLE1-PB",
-                "bioQC": "passed",
-                "labQC": "passed",
-                "duplicateReadsPerc": "8.1",
+                'sample': 'Constitutional DNA',
+                'reads': '1200M',
+                'library': 'LIB0002',
+                'coverage': '40x',
+                'inputNg': '300',
+                'protocol': 'WGS',
+                'sampleName': 'SAMPLE1-PB',
+                'bioQC': 'passed',
+                'labQC': 'passed',
+                'duplicateReadsPerc': '8.1',
             },
         ],
-        "config": "test config",
+        'config': 'test config',
     }
 
     json_file.write_text(
@@ -148,7 +142,7 @@ def loaded_reports(tmp_path_factory) -> Generator:
         )
     )
 
-    json_contents["patientId"] = async_patient_id
+    json_contents['patientId'] = async_patient_id
     async_json_file.write_text(
         json.dumps(
             json_contents,
@@ -157,46 +151,46 @@ def loaded_reports(tmp_path_factory) -> Generator:
     )
 
     argslist = [
-        "ipr",
-        "--username",
-        os.environ.get("IPR_USER", os.environ["USER"]),
-        "--password",
-        os.environ["IPR_PASS"],
-        "--graphkb_username",
-        os.environ.get("GRAPHKB_USER", os.environ.get("IPR_USER", os.environ["USER"])),
-        "--graphkb_password",
-        os.environ.get("GRAPHKB_PASS", os.environ["IPR_PASS"]),
-        "--ipr_url",
-        os.environ["IPR_TEST_URL"],
-        "--graphkb_url",
-        os.environ.get("GRAPHKB_URL", False),
-        "--therapeutics",
-        "--allow_partial_matches",
+        'ipr',
+        '--username',
+        os.environ.get('IPR_USER', os.environ['USER']),
+        '--password',
+        os.environ['IPR_PASS'],
+        '--graphkb_username',
+        os.environ.get('GRAPHKB_USER', os.environ.get('IPR_USER', os.environ['USER'])),
+        '--graphkb_password',
+        os.environ.get('GRAPHKB_PASS', os.environ['IPR_PASS']),
+        '--ipr_url',
+        os.environ['IPR_TEST_URL'],
+        '--graphkb_url',
+        os.environ.get('GRAPHKB_URL', False),
+        '--therapeutics',
+        '--allow_partial_matches',
     ]
 
     sync_argslist = argslist.copy()
-    sync_argslist.extend(["--content", str(json_file)])
-    with patch.object(sys, "argv", sync_argslist):
-        with patch.object(IprConnection, "get_spec", return_value=get_test_spec()):
+    sync_argslist.extend(['--content', str(json_file)])
+    with patch.object(sys, 'argv', sync_argslist):
+        with patch.object(IprConnection, 'get_spec', return_value=get_test_spec()):
             command_interface()
 
     async_argslist = argslist.copy()
-    async_argslist.extend(["--content", str(async_json_file), "--async_upload"])
-    with patch.object(sys, "argv", async_argslist):
-        with patch.object(IprConnection, "get_spec", return_value=get_test_spec()):
+    async_argslist.extend(['--content', str(async_json_file), '--async_upload'])
+    with patch.object(sys, 'argv', async_argslist):
+        with patch.object(IprConnection, 'get_spec', return_value=get_test_spec()):
             command_interface()
 
     ipr_conn = IprConnection(
-        username=os.environ.get("IPR_USER", os.environ["USER"]),
-        password=os.environ["IPR_PASS"],
-        url=os.environ["IPR_TEST_URL"],
+        username=os.environ.get('IPR_USER', os.environ['USER']),
+        password=os.environ['IPR_PASS'],
+        url=os.environ['IPR_TEST_URL'],
     )
-    loaded_report = ipr_conn.get(uri=f"reports?searchText={patient_id}")
-    async_loaded_report = ipr_conn.get(uri=f"reports?searchText={async_patient_id}")
+    loaded_report = ipr_conn.get(uri=f'reports?searchText={patient_id}')
+    async_loaded_report = ipr_conn.get(uri=f'reports?searchText={async_patient_id}')
 
     loaded_reports_result = {
-        "sync": (patient_id, loaded_report),
-        "async": (async_patient_id, async_loaded_report),
+        'sync': (patient_id, loaded_report),
+        'async': (async_patient_id, async_loaded_report),
     }
     yield loaded_reports_result
     if DELETE_UPLOAD_TEST_REPORTS:
@@ -205,13 +199,13 @@ def loaded_reports(tmp_path_factory) -> Generator:
 
 
 def get_section(loaded_report, section_name):
-    ident = loaded_report[1]["reports"][0]["ident"]
+    ident = loaded_report[1]['reports'][0]['ident']
     ipr_conn = IprConnection(
-        username=os.environ.get("IPR_USER", os.environ["USER"]),
-        password=os.environ["IPR_PASS"],
-        url=os.environ["IPR_TEST_URL"],
+        username=os.environ.get('IPR_USER', os.environ['USER']),
+        password=os.environ['IPR_PASS'],
+        url=os.environ['IPR_TEST_URL'],
     )
-    return ipr_conn.get(uri=f"reports/{ident}/{section_name}")
+    return ipr_conn.get(uri=f'reports/{ident}/{section_name}')
 
 
 def stringify_sorted(obj):
@@ -224,7 +218,7 @@ def stringify_sorted(obj):
         obj.sort()
         return str(obj)
     elif isinstance(obj, dict):
-        for key in ("ident", "updatedAt", "createdAt", "deletedAt"):
+        for key in ('ident', 'updatedAt', 'createdAt', 'deletedAt'):
             obj.pop(key, None)
         keys = obj.keys()
         for key in keys:
@@ -240,157 +234,147 @@ def stringify_sorted(obj):
 
 
 @pytest.mark.skipif(
-    not INCLUDE_UPLOAD_TESTS, reason="excluding tests of upload to live ipr instance"
-)
-@pytest.mark.skipif(
-    EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests"
+    not INCLUDE_UPLOAD_TESTS, reason='excluding tests of upload to live ipr instance'
 )
+@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests')
 class TestCreateReport:
     def test_patient_id_loaded_once(self, loaded_reports) -> None:
-        sync_patient_id = loaded_reports["sync"][0]
-        assert loaded_reports["sync"][1]["total"] == 1
-        assert loaded_reports["sync"][1]["reports"][0]["patientId"] == sync_patient_id
-        async_patient_id = loaded_reports["async"][0]
-        assert loaded_reports["async"][1]["total"] == 1
-        assert loaded_reports["async"][1]["reports"][0]["patientId"] == async_patient_id
+        sync_patient_id = loaded_reports['sync'][0]
+        assert loaded_reports['sync'][1]['total'] == 1
+        assert loaded_reports['sync'][1]['reports'][0]['patientId'] == sync_patient_id
+        async_patient_id = loaded_reports['async'][0]
+        assert loaded_reports['async'][1]['total'] == 1
+        assert loaded_reports['async'][1]['reports'][0]['patientId'] == async_patient_id
 
     def test_expression_variants_loaded(self, loaded_reports) -> None:
-        section = get_section(loaded_reports["sync"], "expression-variants")
-        kbmatched = [item for item in section if item["kbMatches"]]
-        assert "PTP4A3" in [item["gene"]["name"] for item in kbmatched]
-        async_section = get_section(loaded_reports["async"], "expression-variants")
+        section = get_section(loaded_reports['sync'], 'expression-variants')
+        kbmatched = [item for item in section if item['kbMatches']]
+        assert 'PTP4A3' in [item['gene']['name'] for item in kbmatched]
+        async_section = get_section(loaded_reports['async'], 'expression-variants')
         async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section)
         assert async_equals_sync
 
     def test_structural_variants_loaded(self, loaded_reports) -> None:
-        section = get_section(loaded_reports["sync"], "structural-variants")
-        kbmatched = [item for item in section if item["kbMatches"]]
-        assert "(EWSR1,FLI1):fusion(e.7,e.4)" in [
-            item["displayName"] for item in kbmatched
-        ]
-        async_section = get_section(loaded_reports["async"], "structural-variants")
+        section = get_section(loaded_reports['sync'], 'structural-variants')
+        kbmatched = [item for item in section if item['kbMatches']]
+        assert '(EWSR1,FLI1):fusion(e.7,e.4)' in [item['displayName'] for item in kbmatched]
+        async_section = get_section(loaded_reports['async'], 'structural-variants')
         async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section)
         assert async_equals_sync
 
     def test_small_mutations_loaded(self, loaded_reports) -> None:
-        section = get_section(loaded_reports["sync"], "small-mutations")
-        kbmatched = [item for item in section if item["kbMatches"]]
-        assert "FGFR2:p.R421C" in [item["displayName"] for item in kbmatched]
-        assert "CDKN2A:p.T18M" in [item["displayName"] for item in kbmatched]
-        async_section = get_section(loaded_reports["async"], "small-mutations")
+        section = get_section(loaded_reports['sync'], 'small-mutations')
+        kbmatched = [item for item in section if item['kbMatches']]
+        assert 'FGFR2:p.R421C' in [item['displayName'] for item in kbmatched]
+        assert 'CDKN2A:p.T18M' in [item['displayName'] for item in kbmatched]
+        async_section = get_section(loaded_reports['async'], 'small-mutations')
         async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section)
         assert async_equals_sync
 
     def test_copy_variants_loaded(self, loaded_reports) -> None:
-        section = get_section(loaded_reports["sync"], "copy-variants")
-        kbmatched = [item for item in section if item["kbMatches"]]
-        assert ("ERBB2", "amplification") in [
-            (item["gene"]["name"], item["displayName"]) for item in kbmatched
+        section = get_section(loaded_reports['sync'], 'copy-variants')
+        kbmatched = [item for item in section if item['kbMatches']]
+        assert ('ERBB2', 'amplification') in [
+            (item['gene']['name'], item['displayName']) for item in kbmatched
         ]
-        async_section = get_section(loaded_reports["async"], "copy-variants")
+        async_section = get_section(loaded_reports['async'], 'copy-variants')
         async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section)
         assert async_equals_sync
 
     def test_signature_variants_loaded(self, loaded_reports) -> None:
-        section = get_section(loaded_reports["sync"], "signature-variants")
-        kbmatched = [item for item in section if item["kbMatches"]]
+        section = get_section(loaded_reports['sync'], 'signature-variants')
+        kbmatched = [item for item in section if item['kbMatches']]
         # Check for COSMIC signatures
-        assert ("SBS2", "high signature") in [
-            (item["signatureName"], item["variantTypeName"]) for item in kbmatched
+        assert ('SBS2', 'high signature') in [
+            (item['signatureName'], item['variantTypeName']) for item in kbmatched
         ]
         # Check for HRD signature (score 9999 > cutoff 5, so strong signature)
-        assert ("homologous recombination deficiency", "strong signature") in [
-            (item["signatureName"], item["variantTypeName"]) for item in kbmatched
+        assert ('homologous recombination deficiency', 'strong signature') in [
+            (item['signatureName'], item['variantTypeName']) for item in kbmatched
         ]
         # Check for MSI signature
-        assert ("microsatellite instability", "high signature") in [
-            (item["signatureName"], item["variantTypeName"]) for item in kbmatched
+        assert ('microsatellite instability', 'high signature') in [
+            (item['signatureName'], item['variantTypeName']) for item in kbmatched
         ]
-        async_section = get_section(loaded_reports["async"], "signature-variants")
+        async_section = get_section(loaded_reports['async'], 'signature-variants')
         async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section)
         assert async_equals_sync
 
     def test_hrd_score_in_report(self, loaded_reports) -> None:
         """Test that HRD score is present in the loaded report."""
-        report = loaded_reports["sync"][1]["reports"][0]
-        assert "hrdScore" in report
-        assert report["hrdScore"] == 9999.0
+        report = loaded_reports['sync'][1]['reports'][0]
+        assert 'hrdScore' in report
+        assert report['hrdScore'] == 9999.0
 
     def test_kb_matches_loaded(self, loaded_reports) -> None:
-        section = get_section(loaded_reports["sync"], "kb-matches")
+        section = get_section(loaded_reports['sync'], 'kb-matches')
         observed_and_matched = set(
-            [(item["kbVariant"], item["variant"]["displayName"]) for item in section]
+            [(item['kbVariant'], item['variant']['displayName']) for item in section]
         )
         for pair in [
-            ("ERBB2 amplification", "amplification"),
-            ("FGFR2 mutation", "FGFR2:p.R421C"),
-            ("PTP4A3 overexpression", "increased expression"),
-            ("EWSR1 and FLI1 fusion", "(EWSR1,FLI1):fusion(e.7,e.4)"),
-            ("CDKN2A mutation", "CDKN2A:p.T18M"),
+            ('ERBB2 amplification', 'amplification'),
+            ('FGFR2 mutation', 'FGFR2:p.R421C'),
+            ('PTP4A3 overexpression', 'increased expression'),
+            ('EWSR1 and FLI1 fusion', '(EWSR1,FLI1):fusion(e.7,e.4)'),
+            ('CDKN2A mutation', 'CDKN2A:p.T18M'),
         ]:
             assert pair in observed_and_matched
-        async_section = get_section(loaded_reports["async"], "kb-matches")
+        async_section = get_section(loaded_reports['async'], 'kb-matches')
         async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section)
         assert async_equals_sync
 
     def test_therapeutic_targets_loaded(self, loaded_reports) -> None:
-        section = get_section(loaded_reports["sync"], "therapeutic-targets")
-        therapeutic_target_genes = set([item["gene"] for item in section])
-        for gene in ["CDKN2A", "ERBB2", "FGFR2", "PTP4A3"]:
+        section = get_section(loaded_reports['sync'], 'therapeutic-targets')
+        therapeutic_target_genes = set([item['gene'] for item in section])
+        for gene in ['CDKN2A', 'ERBB2', 'FGFR2', 'PTP4A3']:
             assert gene in therapeutic_target_genes
-        async_section = get_section(loaded_reports["async"], "therapeutic-targets")
+        async_section = get_section(loaded_reports['async'], 'therapeutic-targets')
         async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section)
         assert async_equals_sync
 
     def test_genomic_alterations_identified_loaded(self, loaded_reports) -> None:
-        section = get_section(
-            loaded_reports["sync"], "summary/genomic-alterations-identified"
-        )
-        variants = set([item["geneVariant"] for item in section])
+        section = get_section(loaded_reports['sync'], 'summary/genomic-alterations-identified')
+        variants = set([item['geneVariant'] for item in section])
         for variant in [
-            "FGFR2:p.R421C",
-            "PTP4A3 (high_percentile)",
-            "ERBB2 (Amplification)",
-            "(EWSR1,FLI1):fusion(e.7,e.4)",
-            "CDKN2A:p.T18M",
+            'FGFR2:p.R421C',
+            'PTP4A3 (high_percentile)',
+            'ERBB2 (Amplification)',
+            '(EWSR1,FLI1):fusion(e.7,e.4)',
+            'CDKN2A:p.T18M',
         ]:
             assert variant in variants
         async_section = get_section(
-            loaded_reports["async"], "summary/genomic-alterations-identified"
+            loaded_reports['async'], 'summary/genomic-alterations-identified'
         )
         async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section)
         assert async_equals_sync
 
     def test_analyst_comments_loaded(self, loaded_reports) -> None:
-        sync_section = get_section(loaded_reports["sync"], "summary/analyst-comments")
-        assert sync_section["comments"]
-        async_section = get_section(loaded_reports["async"], "summary/analyst-comments")
-        assert async_section["comments"]
-        assert sync_section["comments"] == async_section["comments"]
+        sync_section = get_section(loaded_reports['sync'], 'summary/analyst-comments')
+        assert sync_section['comments']
+        async_section = get_section(loaded_reports['async'], 'summary/analyst-comments')
+        assert async_section['comments']
+        assert sync_section['comments'] == async_section['comments']
 
     def test_seqqc_loaded(self, loaded_reports) -> None:
         """Test that seqQC data is present in the loaded report."""
-        sync_report = loaded_reports["sync"][1]["reports"][0]
-        assert "seqQC" in sync_report
-        assert len(sync_report["seqQC"]) == 2
-        samples = [item["sample"] for item in sync_report["seqQC"]]
-        assert "Tumour DNA" in samples
-        assert "Constitutional DNA" in samples
-        async_report = loaded_reports["async"][1]["reports"][0]
-        assert "seqQC" in async_report
-        assert len(async_report["seqQC"]) == 2
+        sync_report = loaded_reports['sync'][1]['reports'][0]
+        assert 'seqQC' in sync_report
+        assert len(sync_report['seqQC']) == 2
+        samples = [item['sample'] for item in sync_report['seqQC']]
+        assert 'Tumour DNA' in samples
+        assert 'Constitutional DNA' in samples
+        async_report = loaded_reports['async'][1]['reports'][0]
+        assert 'seqQC' in async_report
+        assert len(async_report['seqQC']) == 2
 
     def test_sample_info_loaded(self, loaded_reports) -> None:
-        sync_section = get_section(loaded_reports["sync"], "sample-info")
-        async_section = get_section(loaded_reports["async"], "sample-info")
-        async_equals_sync = stringify_sorted(sync_section) == stringify_sorted(
-            async_section
-        )
+        sync_section = get_section(loaded_reports['sync'], 'sample-info')
+        async_section = get_section(loaded_reports['async'], 'sample-info')
+        async_equals_sync = stringify_sorted(sync_section) == stringify_sorted(async_section)
         assert async_equals_sync
 
-    def test_multivariant_multiconditionset_statements_loaded(
-        self, loaded_reports
-    ) -> None:
+    def test_multivariant_multiconditionset_statements_loaded(self, loaded_reports) -> None:
         """
         Checks that multivariant statements and multiple condition sets prepared correctly
         by this package are handled as expected by the api.
@@ -402,41 +386,31 @@ def test_multivariant_multiconditionset_statements_loaded(
         are met.
         This is also a test of multiple condition sets since there are two variants
         in the test data that satisfy one of the conditions (the APC mutation)."""
-        section = get_section(
-            loaded_reports["sync"], "kb-matches/kb-matched-statements"
-        )
-        multivariant_stmts = [
-            item for item in section if item["reference"] == "pmid:27302369"
-        ]
+        section = get_section(loaded_reports['sync'], 'kb-matches/kb-matched-statements')
+        multivariant_stmts = [item for item in section if item['reference'] == 'pmid:27302369']
 
         # if this statement is entered more than once there may be multiple sets of records to
         # check, so to make sure the count checks work, go stmt_id by stmt_id:
-        stmt_ids = list(set([item["kbStatementId"] for item in multivariant_stmts]))
+        stmt_ids = list(set([item['kbStatementId'] for item in multivariant_stmts]))
         for stmt_id in stmt_ids:
-            stmts = [
-                item for item in multivariant_stmts if item["kbStatementId"] == stmt_id
-            ]
+            stmts = [item for item in multivariant_stmts if item['kbStatementId'] == stmt_id]
 
             # we expect three stmts, one for each condition set
             assert len(stmts) == 3
 
             # we expect each condition set to have two kb variants in it
             # we expect the two kb variants to be the same in each stmt
-            assert len(stmts[0]["kbMatches"]) == 2
-            assert len(stmts[1]["kbMatches"]) == 2
-            kbmatches1 = [item["kbVariant"] for item in stmts[0]["kbMatches"]]
-            kbmatches2 = [item["kbVariant"] for item in stmts[1]["kbMatches"]]
+            assert len(stmts[0]['kbMatches']) == 2
+            assert len(stmts[1]['kbMatches']) == 2
+            kbmatches1 = [item['kbVariant'] for item in stmts[0]['kbMatches']]
+            kbmatches2 = [item['kbVariant'] for item in stmts[1]['kbMatches']]
             kbmatches1.sort()
             kbmatches2.sort()
-            assert kbmatches1 == kbmatches2 == ["APC mutation", "KRAS mutation"]
+            assert kbmatches1 == kbmatches2 == ['APC mutation', 'KRAS mutation']
 
             # we expect the two stmts to have different observed variant sets
-            observedVariants1 = [
-                item["variant"]["ident"] for item in stmts[0]["kbMatches"]
-            ]
-            observedVariants2 = [
-                item["variant"]["ident"] for item in stmts[1]["kbMatches"]
-            ]
+            observedVariants1 = [item['variant']['ident'] for item in stmts[0]['kbMatches']]
+            observedVariants2 = [item['variant']['ident'] for item in stmts[1]['kbMatches']]
             observedVariants1.sort()
             observedVariants2.sort()
             assert observedVariants1 != observedVariants2

From 1e9d46d5b1a20a432f59cc7766875f5c1c62a158 Mon Sep 17 00:00:00 2001
From: Eleanor Lewis <elewis@bcgsc.ca>
Date: Mon, 20 Apr 2026 11:42:23 -0700
Subject: [PATCH 4/8] handle existing input format

---
 pori_python/ipr/inputs.py     |  40 ++++++++++++
 pori_python/ipr/main.py       |   2 +
 tests/test_ipr/test_inputs.py | 115 ++++++++++++++++++++++++++++++++++
 3 files changed, 157 insertions(+)

diff --git a/pori_python/ipr/inputs.py b/pori_python/ipr/inputs.py
index f14fc696..6dbb9401 100644
--- a/pori_python/ipr/inputs.py
+++ b/pori_python/ipr/inputs.py
@@ -796,6 +796,46 @@ def check_null(checker, instance):
 DefaultValidatingDraft7Validator = extend_with_default(jsonschema.Draft7Validator)
 
 
+def normalize_seqqc(content: Dict) -> Dict:
+    """
+    Normalize seqQC field names from production report format to schema format.
+    
+    Maps inconsistent casing and underscores in field names to match content.spec.json requirements.
+    For example: 'Reads' -> 'reads', 'Sample_Name' -> 'sampleName', etc.
+    
+    Args:
+        content: Report content dictionary that may contain seqQC array
+        
+    Returns:
+        The content dictionary with seqQC fields normalized in-place
+    """
+    # Field name mapping from production/legacy format to schema format
+    field_mapping = {
+        'Reads': 'reads',
+        'Sample': 'sample',
+        'Library': 'library',
+        'Coverage': 'coverage',
+        'Input_ng': 'inputNg',
+        'Input_ug': 'inputUg',
+        'Protocol': 'protocol',
+        'Sample Name': 'sampleName',
+        'Duplicate_Reads_Perc': 'duplicateReadsPerc',
+    }
+    
+    if 'seqQC' in content and isinstance(content['seqQC'], list):
+        for item in content['seqQC']:
+            # Create a new dict with normalized keys
+            normalized_item = {}
+            for old_key, value in item.items():
+                # Use mapped key if it exists, otherwise keep original
+                new_key = field_mapping.get(old_key, old_key)
+                normalized_item[new_key] = value
+            # Replace the item with normalized version
+            content['seqQC'][content['seqQC'].index(item)] = normalized_item
+    
+    return content
+
+
 def validate_report_content(content: Dict, schema_file: str = SPECIFICATION) -> None:
     """
     Validate a report content input JSON object against the schema specification
diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py
index cbb7c128..eea1987e 100644
--- a/pori_python/ipr/main.py
+++ b/pori_python/ipr/main.py
@@ -27,6 +27,7 @@
 from .inputs import (
     check_comparators,
     check_variant_links,
+    normalize_seqqc,
     preprocess_copy_variants,
     preprocess_cosmic,
     preprocess_expression_variants,
@@ -380,6 +381,7 @@ def ipr_report(
         return ipr_result
 
     # validate the JSON content follows the specification
+    normalize_seqqc(content)
     try:
         validate_report_content(content)
     except jsonschema.exceptions.ValidationError as err:
diff --git a/tests/test_ipr/test_inputs.py b/tests/test_ipr/test_inputs.py
index 4bdd6b6d..d6e12493 100644
--- a/tests/test_ipr/test_inputs.py
+++ b/tests/test_ipr/test_inputs.py
@@ -17,6 +17,7 @@
     check_comparators,
     check_variant_links,
     create_graphkb_sv_notation,
+    normalize_seqqc,
     preprocess_copy_variants,
     preprocess_cosmic,
     preprocess_expression_variants,
@@ -558,3 +559,117 @@ def test_valid_json_inputs(example_name: str):
     with open(os.path.join(DATA_DIR, 'json_examples', f'{example_name}.json'), 'r') as fh:
         content = json.load(fh)
     validate_report_content(content)
+
+
+class TestNormalizeSeqQC:
+    """Test seqQC field name normalization from production format to schema format."""
+
+    def test_normalize_seqqc_production_format(self):
+        """Test normalization of production report field names."""
+        content = {
+            'seqQC': [
+                {
+                    'Reads': '2407M',
+                    'Sample': 'Tumour DNA',
+                    'Library': 'LIB0001',
+                    'Coverage': '96X',
+                    'Input_ng': 400,
+                    'Input_ug': '',
+                    'Protocol': 'Genome Shotgun FFPE 4.2',
+                    'Sample Name': 'SAMPLE-T-01',
+                    'bioQC': 'Passed',
+                    'labQC': 'Approved',
+                    'Duplicate_Reads_Perc': 18,
+                }
+            ]
+        }
+
+        result = normalize_seqqc(content)
+
+        assert result['seqQC'][0]['reads'] == '2407M'
+        assert result['seqQC'][0]['sample'] == 'Tumour DNA'
+        assert result['seqQC'][0]['library'] == 'LIB0001'
+        assert result['seqQC'][0]['coverage'] == '96X'
+        assert result['seqQC'][0]['inputNg'] == 400
+        assert result['seqQC'][0]['inputUg'] == ''
+        assert result['seqQC'][0]['protocol'] == 'Genome Shotgun FFPE 4.2'
+        assert result['seqQC'][0]['sampleName'] == 'SAMPLE-T-01'
+        assert result['seqQC'][0]['bioQC'] == 'Passed'
+        assert result['seqQC'][0]['labQC'] == 'Approved'
+        assert result['seqQC'][0]['duplicateReadsPerc'] == 18
+        # Old keys should be gone
+        assert 'Reads' not in result['seqQC'][0]
+        assert 'Sample' not in result['seqQC'][0]
+
+    def test_normalize_seqqc_already_normalized(self):
+        """Test that already-normalized field names are preserved."""
+        content = {
+            'seqQC': [
+                {
+                    'reads': '1200M',
+                    'sample': 'Constitutional DNA',
+                    'library': 'LIB0002',
+                    'coverage': '40x',
+                    'inputNg': '300',
+                    'protocol': 'WGS',
+                    'sampleName': 'SAMPLE-N-01',
+                    'bioQC': 'passed',
+                    'labQC': 'passed',
+                    'duplicateReadsPerc': '8.1',
+                }
+            ]
+        }
+
+        result = normalize_seqqc(content)
+
+        # All normalized keys should still exist with same values
+        assert result['seqQC'][0]['reads'] == '1200M'
+        assert result['seqQC'][0]['sample'] == 'Constitutional DNA'
+        assert result['seqQC'][0]['inputNg'] == '300'
+
+    def test_normalize_seqqc_no_seqqc_field(self):
+        """Test that content without seqQC is unchanged."""
+        content = {
+            'patientId': 'TEST001',
+            'project': 'TEST',
+        }
+
+        result = normalize_seqqc(content)
+
+        assert result == content
+        assert 'seqQC' not in result
+
+    def test_normalize_seqqc_empty_seqqc(self):
+        """Test that empty seqQC array is handled."""
+        content = {'seqQC': []}
+
+        result = normalize_seqqc(content)
+
+        assert result['seqQC'] == []
+
+    def test_normalize_seqqc_multiple_items(self):
+        """Test normalization of multiple seqQC items."""
+        content = {
+            'seqQC': [
+                {
+                    'Reads': '2534M',
+                    'Sample': 'Tumour DNA',
+                    'Duplicate_Reads_Perc': 12.3,
+                },
+                {
+                    'Reads': '1200M',
+                    'Sample': 'Constitutional DNA',
+                    'Duplicate_Reads_Perc': 8.1,
+                },
+            ]
+        }
+
+        result = normalize_seqqc(content)
+
+        assert len(result['seqQC']) == 2
+        assert result['seqQC'][0]['reads'] == '2534M'
+        assert result['seqQC'][0]['sample'] == 'Tumour DNA'
+        assert result['seqQC'][0]['duplicateReadsPerc'] == 12.3
+        assert result['seqQC'][1]['reads'] == '1200M'
+        assert result['seqQC'][1]['sample'] == 'Constitutional DNA'
+        assert result['seqQC'][1]['duplicateReadsPerc'] == 8.1

From e0ee8ec7cdc8e7901cbde5e53877898f45de3bc1 Mon Sep 17 00:00:00 2001
From: Eleanor Lewis <elewis@bcgsc.ca>
Date: Fri, 8 May 2026 10:49:43 -0700
Subject: [PATCH 5/8] format with ruff

---
 pori_python/ipr/inputs.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pori_python/ipr/inputs.py b/pori_python/ipr/inputs.py
index f76843e0..2dfac499 100644
--- a/pori_python/ipr/inputs.py
+++ b/pori_python/ipr/inputs.py
@@ -804,13 +804,13 @@ def check_null(checker, instance):
 def normalize_seqqc(content: Dict) -> Dict:
     """
     Normalize seqQC field names from production report format to schema format.
-    
+
     Maps inconsistent casing and underscores in field names to match content.spec.json requirements.
     For example: 'Reads' -> 'reads', 'Sample_Name' -> 'sampleName', etc.
-    
+
     Args:
         content: Report content dictionary that may contain seqQC array
-        
+
     Returns:
         The content dictionary with seqQC fields normalized in-place
     """
@@ -826,7 +826,7 @@ def normalize_seqqc(content: Dict) -> Dict:
         'Sample Name': 'sampleName',
         'Duplicate_Reads_Perc': 'duplicateReadsPerc',
     }
-    
+
     if 'seqQC' in content and isinstance(content['seqQC'], list):
         for item in content['seqQC']:
             # Create a new dict with normalized keys
@@ -837,7 +837,7 @@ def normalize_seqqc(content: Dict) -> Dict:
                 normalized_item[new_key] = value
             # Replace the item with normalized version
             content['seqQC'][content['seqQC'].index(item)] = normalized_item
-    
+
     return content
 
 

From dcedd6390c095baac3c7750fc89bf8b7c2e0e1c0 Mon Sep 17 00:00:00 2001
From: Eleanor Lewis <elewis@bcgsc.ca>
Date: Mon, 11 May 2026 13:01:55 -0700
Subject: [PATCH 6/8] fix issues raised in pr

---
 pori_python/ipr/content.spec.json |  6 +++++
 pori_python/ipr/inputs.py         |  8 +++---
 tests/test_ipr/test_inputs.py     | 45 +++++++++++++++++++++++++++++++
 3 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/pori_python/ipr/content.spec.json b/pori_python/ipr/content.spec.json
index 830218ae..c994ba6f 100644
--- a/pori_python/ipr/content.spec.json
+++ b/pori_python/ipr/content.spec.json
@@ -971,6 +971,8 @@
                         "example": "500",
                         "type": [
                             "string",
+                            "number",
+                            "integer",
                             "null"
                         ]
                     },
@@ -979,6 +981,8 @@
                         "example": "0.5",
                         "type": [
                             "string",
+                            "number",
+                            "integer",
                             "null"
                         ]
                     },
@@ -1003,6 +1007,8 @@
                         "example": "12.3",
                         "type": [
                             "string",
+                            "number",
+                            "integer",
                             "null"
                         ]
                     }
diff --git a/pori_python/ipr/inputs.py b/pori_python/ipr/inputs.py
index 2dfac499..5c7f10fb 100644
--- a/pori_python/ipr/inputs.py
+++ b/pori_python/ipr/inputs.py
@@ -806,7 +806,7 @@ def normalize_seqqc(content: Dict) -> Dict:
     Normalize seqQC field names from production report format to schema format.
 
     Maps inconsistent casing and underscores in field names to match content.spec.json requirements.
-    For example: 'Reads' -> 'reads', 'Sample_Name' -> 'sampleName', etc.
+    For example: 'Reads' -> 'reads', 'Sample Name' -> 'sampleName', etc.
 
     Args:
         content: Report content dictionary that may contain seqQC array
@@ -828,7 +828,9 @@ def normalize_seqqc(content: Dict) -> Dict:
     }
 
     if 'seqQC' in content and isinstance(content['seqQC'], list):
-        for item in content['seqQC']:
+        for i, item in enumerate(content['seqQC']):
+            if not isinstance(item, dict):
+                continue
             # Create a new dict with normalized keys
             normalized_item = {}
             for old_key, value in item.items():
@@ -836,7 +838,7 @@ def normalize_seqqc(content: Dict) -> Dict:
                 new_key = field_mapping.get(old_key, old_key)
                 normalized_item[new_key] = value
             # Replace the item with normalized version
-            content['seqQC'][content['seqQC'].index(item)] = normalized_item
+            content['seqQC'][i] = normalized_item
 
     return content
 
diff --git a/tests/test_ipr/test_inputs.py b/tests/test_ipr/test_inputs.py
index d6e12493..f3cd6f99 100644
--- a/tests/test_ipr/test_inputs.py
+++ b/tests/test_ipr/test_inputs.py
@@ -673,3 +673,48 @@ def test_normalize_seqqc_multiple_items(self):
         assert result['seqQC'][1]['reads'] == '1200M'
         assert result['seqQC'][1]['sample'] == 'Constitutional DNA'
         assert result['seqQC'][1]['duplicateReadsPerc'] == 8.1
+
+    def test_normalize_seqqc_numeric_fields_pass_validation(self):
+        """Test that integer/float values for inputNg, inputUg, duplicateReadsPerc pass schema validation."""
+        content = {
+            'patientId': 'PATIENT001',
+            'kbDiseaseMatch': 'colorectal cancer',
+            'project': 'TEST',
+            'template': 'genomic',
+            'seqQC': [
+                {
+                    'reads': '2407M',
+                    'sample': 'Tumour DNA',
+                    'library': 'LIB0001',
+                    'inputNg': 400,
+                    'inputUg': 0.4,
+                    'duplicateReadsPerc': 18,
+                }
+            ],
+        }
+        # Should not raise
+        validate_report_content(content)
+
+    def test_normalize_seqqc_numeric_float_duplicateReadsPerc_passes_validation(self):
+        """Test that a float duplicateReadsPerc value passes schema validation after normalization."""
+        content = {
+            'patientId': 'PATIENT001',
+            'kbDiseaseMatch': 'colorectal cancer',
+            'project': 'TEST',
+            'template': 'genomic',
+            'seqQC': [
+                {
+                    'Reads': '2534M',
+                    'Sample': 'Tumour DNA',
+                    'Duplicate_Reads_Perc': 12.3,
+                    'Input_ng': 500,
+                    'Input_ug': 0.5,
+                }
+            ],
+        }
+        result = normalize_seqqc(content)
+        assert result['seqQC'][0]['duplicateReadsPerc'] == 12.3
+        assert result['seqQC'][0]['inputNg'] == 500
+        assert result['seqQC'][0]['inputUg'] == 0.5
+        # Should not raise after normalization
+        validate_report_content(result)

From 416146e0ba5b9b67d73ef89e65cf2cfad4c3153c Mon Sep 17 00:00:00 2001
From: Eleanor Lewis <elewis@bcgsc.ca>
Date: Mon, 11 May 2026 13:14:41 -0700
Subject: [PATCH 7/8] Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
---
 pori_python/ipr/inputs.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/pori_python/ipr/inputs.py b/pori_python/ipr/inputs.py
index 5c7f10fb..25901fc3 100644
--- a/pori_python/ipr/inputs.py
+++ b/pori_python/ipr/inputs.py
@@ -826,17 +826,25 @@ def normalize_seqqc(content: Dict) -> Dict:
         'Sample Name': 'sampleName',
         'Duplicate_Reads_Perc': 'duplicateReadsPerc',
     }
+    normalized_keys = set(field_mapping.values())
 
     if 'seqQC' in content and isinstance(content['seqQC'], list):
         for i, item in enumerate(content['seqQC']):
             if not isinstance(item, dict):
                 continue
-            # Create a new dict with normalized keys
+            # Preserve already-normalized keys (and unrelated keys) first so
+            # legacy aliases cannot overwrite them based on insertion order.
             normalized_item = {}
-            for old_key, value in item.items():
-                # Use mapped key if it exists, otherwise keep original
-                new_key = field_mapping.get(old_key, old_key)
-                normalized_item[new_key] = value
+            for key, value in item.items():
+                if key in normalized_keys or key not in field_mapping:
+                    normalized_item[key] = value
+
+            # Add legacy aliases only when the normalized key is not already
+            # present. This makes collision handling explicit and stable.
+            for old_key, new_key in field_mapping.items():
+                if old_key in item and new_key not in normalized_item:
+                    normalized_item[new_key] = item[old_key]
+
             # Replace the item with normalized version
             content['seqQC'][i] = normalized_item
 

From e2c25efa7017b42af99c19805eee475a7a31402e Mon Sep 17 00:00:00 2001
From: Eleanor Lewis <elewis@bcgsc.ca>
Date: Mon, 11 May 2026 13:15:35 -0700
Subject: [PATCH 8/8] normalize before upload_json as well

---
 pori_python/ipr/main.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py
index 9d8f0568..c249b791 100644
--- a/pori_python/ipr/main.py
+++ b/pori_python/ipr/main.py
@@ -403,6 +403,10 @@ def ipr_report(
         ipr_result = ipr_conn.validate_json(content)
         return ipr_result
 
+    # seqqc normalization is a bridging measure only;
+    # validate_json should be called on non-normalized json
+    normalize_seqqc(content)
+
     if upload_json:
         if not ipr_conn:
             raise ValueError('ipr_url required to upload json')
@@ -412,7 +416,6 @@ def ipr_report(
         return ipr_result
 
     # validate the JSON content follows the specification
-    normalize_seqqc(content)
     try:
         validate_report_content(content)
     except jsonschema.exceptions.ValidationError as err: