diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 607c0c6e..7804a8a0 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -28,15 +28,10 @@ jobs:
       run: |
         python -m pip install --upgrade pip setuptools
         pip install -e .[test]  # coverage reports need -e to capture properly
-    - name: Lint with flake8
+    - name: Check with ruff
       run: |
-        pip install flake8
-        # stop the build if there are Python syntax errors or undefined names
-        flake8 pori_python --count --select=E9,F63,F7,F82 --show-source --statistics
-    - name: Check with black
-      run: |
-        pip install black
-        black --check -S -l 100 pori_python tests
+        pip install ruff
+        ruff format --check pori_python tests
     - name: Full Tests with pytest
       run: |
         pip list
@@ -46,6 +41,7 @@ jobs:
         IPR_PASS: ${{ secrets.IPR_TEST_PASSWORD }}
         GRAPHKB_USER: ${{ secrets.GKB_TEST_USER }}
         GRAPHKB_PASS: ${{ secrets.GKB_TEST_PASS }}
+        GRAPHKB_URL: ${{ secrets.GKB_TEST_URL }}
         # SDEV-3381 - Turn off integration tests temporarily, till efficiency is increased
         # turn on integration tests for one python version only
         EXCLUDE_INTEGRATION_TESTS: ${{ matrix.python-version != '3.11' }}
diff --git a/.github/workflows/quick-pytest.yml b/.github/workflows/quick-pytest.yml
index d4f70223..aef2b56d 100644
--- a/.github/workflows/quick-pytest.yml
+++ b/.github/workflows/quick-pytest.yml
@@ -25,22 +25,19 @@ jobs:
       run: |
         python -m pip install --upgrade pip setuptools
         pip install -e .[test]  # coverage reports need -e to capture properly
-    - name: Lint with flake8
+    - name: Check with ruff
       run: |
-        pip install flake8
-        # stop the build if there are Python syntax errors or undefined names
-        flake8 pori_python --count --select=E9,F63,F7,F82 --show-source --statistics
-    - name: Check with black
-      run: |
-        pip install black
-        black --check -S -l 100 pori_python tests
+        pip install ruff
+        ruff format --check pori_python tests
     - name: Short Tests with pytest
       run: pytest --junitxml=junit/test-results-${{ matrix.python-version }}.xml --cov ipr --cov-report term --cov-report xml
       env:
         IPR_USER: ${{ secrets.IPR_TEST_USER }}
         IPR_PASS: ${{ secrets.IPR_TEST_PASSWORD }}
+        IPR_URL: ${{ secrets.IPR_TEST_URL }}
         GRAPHKB_USER: ${{ secrets.GKB_TEST_USER }}
         GRAPHKB_PASS: ${{ secrets.GKB_TEST_PASS }}
+        GRAPHKB_URL: ${{ secrets.GKB_TEST_URL }}
         EXCLUDE_INTEGRATION_TESTS: 1
         # EXCLUDE_INTEGRATION_TESTS: ${{ matrix.python-version != '3.11' }}
-      if: github.event_name != 'pull_request'
\ No newline at end of file
+      if: github.event_name != 'pull_request'
diff --git a/README.md b/README.md
index 44d015ec..192ab2a3 100644
--- a/README.md
+++ b/README.md
@@ -46,11 +46,9 @@ pip install -e .[dev]
 
 Run the tests:
 
-Export usernames, passwords, and set test options.
+Export usernames, passwords, and test options.
 
-Note that IPR tests will try to use the BCGSC production GraphKB API by default.
-If you want to test interaction with a different instance, you will need to
-set the GraphKB variables.
+IPR_URL and GRAPHKB_URL values must also be set.
 
 Set EXCLUDE vars to 1 if you don't want to run these tests.
 ONCOKB and BCGSC tests are enabled by default.
@@ -67,11 +65,12 @@ export EXCLUDE_ONCOKB_TESTS=1
 ```
 
 If you want to run tests that upload reports to a live IPR instance,
-specify the url of the IPR API you want to use and set the test var to 1.
+specify the url of the IPR API you want to use and set the test var
+INCLUDE_UPLOAD_TESTS to 1.
 These tests are disabled by default.
 
 The created reports are deleted by default. If you want to keep them,
-set DELETE_UPLOAD_TEST_REPORTS to 0 in the env.
+set DELETE_UPLOAD_TEST_REPORTS to 0.
 
 ```bash
 export IPR_TEST_URL='http://localhost:8081/api'
@@ -84,14 +83,16 @@ pytest tests
 ```
 
 ### JSON Validate and Upload to IPR
+An IPR_URL must be provided either as an environment variable or an arg.
+
 If you only want to validate the json content, use
 ```bash
-ipr --password $IPR_PASS -c 'path/to/content.json' --validate_json
+ipr --password $IPR_PASS -c 'path/to/content.json' --validate_json --ipr_url $IPR_URL
 ```
 
 If you only want to upload the json directly to ipr and skip all the preprocessing, use
 ```bash
-ipr --password $IPR_PASS -c 'path/to/content.json' --upload_json
+ipr --password $IPR_PASS -c 'path/to/content.json' --upload_json --ipr_url $IPR_URL
 ```
 
 ## Documentation
diff --git a/pori_python/graphkb/__init__.py b/pori_python/graphkb/__init__.py
index a6fdd663..acce57aa 100644
--- a/pori_python/graphkb/__init__.py
+++ b/pori_python/graphkb/__init__.py
@@ -1,2 +1 @@
-from .constants import DEFAULT_URL  # noqa: F401
 from .util import GraphKBConnection, logger  # noqa: F401
diff --git a/pori_python/graphkb/constants.py b/pori_python/graphkb/constants.py
index 4861d70c..fe22f4a0 100644
--- a/pori_python/graphkb/constants.py
+++ b/pori_python/graphkb/constants.py
@@ -4,113 +4,108 @@
 from pori_python.types import CategoryBaseTermMapping
 
 DEFAULT_LIMIT = 1000
-GKB_BASE_URL = "https://graphkb-api.bcgsc.ca/api"
-GKB_STAGING_URL = "https://graphkbstaging-api.bcgsc.ca/api"
-GKB_DEV_URL = "https://graphkbdev-api.bcgsc.ca/api"
-DEFAULT_URL = GKB_BASE_URL
 
-PREFERRED_GENE_SOURCE = "#39:5"  # HGNC
-PREFERRED_GENE_SOURCE_NAME = "HGNC"
+PREFERRED_GENE_SOURCE_NAME = 'HGNC'
 
-BASE_RETURN_PROPERTIES = ["@rid", "@class"]
+BASE_RETURN_PROPERTIES = ['@rid', '@class']
 
 GENERIC_RETURN_PROPERTIES = [
-    "name",
-    "sourceId",
-    "sourceIdVersion",
-    "source.name",
-    "source.@rid",
-    "displayName",
-    "deprecated",
+    'name',
+    'sourceId',
+    'sourceIdVersion',
+    'source.name',
+    'source.@rid',
+    'displayName',
+    'deprecated',
 ] + BASE_RETURN_PROPERTIES
 
-GENE_RETURN_PROPERTIES = ["biotype"] + GENERIC_RETURN_PROPERTIES
+GENE_RETURN_PROPERTIES = ['biotype'] + GENERIC_RETURN_PROPERTIES
 
 VARIANT_RETURN_PROPERTIES = (
     BASE_RETURN_PROPERTIES
-    + [f"type.{p}" for p in GENERIC_RETURN_PROPERTIES]
-    + [f"reference1.{p}" for p in GENE_RETURN_PROPERTIES]
-    + [f"reference2.{p}" for p in GENE_RETURN_PROPERTIES]
-    + ["zygosity", "germline", "displayName"]
+    + [f'type.{p}' for p in GENERIC_RETURN_PROPERTIES]
+    + [f'reference1.{p}' for p in GENE_RETURN_PROPERTIES]
+    + [f'reference2.{p}' for p in GENE_RETURN_PROPERTIES]
+    + ['zygosity', 'germline', 'displayName']
 )
 
 POS_VARIANT_RETURN_PROPERTIES = VARIANT_RETURN_PROPERTIES + [
-    "break1Start",
-    "break1End",
-    "break2Start",
-    "break2End",
-    "break1Repr",
-    "break2Repr",
-    "refSeq",
-    "untemplatedSeq",
-    "untemplatedSeqSize",
-    "truncation",
-    "assembly",
+    'break1Start',
+    'break1End',
+    'break2Start',
+    'break2End',
+    'break1Repr',
+    'break2Repr',
+    'refSeq',
+    'untemplatedSeq',
+    'untemplatedSeqSize',
+    'truncation',
+    'assembly',
 ]
 
 STATEMENT_RETURN_PROPERTIES = (
     BASE_RETURN_PROPERTIES
-    + ["displayNameTemplate", "sourceId", "source.name", "source.displayName"]
-    + [f"conditions.{p}" for p in GENERIC_RETURN_PROPERTIES]
-    + [f"subject.{p}" for p in GENERIC_RETURN_PROPERTIES]
-    + [f"evidence.{p}" for p in GENERIC_RETURN_PROPERTIES]
-    + [f"relevance.{p}" for p in GENERIC_RETURN_PROPERTIES]
-    + [f"evidenceLevel.{p}" for p in GENERIC_RETURN_PROPERTIES]
-    + ["reviewStatus"]
+    + ['displayNameTemplate', 'sourceId', 'source.name', 'source.displayName']
+    + [f'conditions.{p}' for p in GENERIC_RETURN_PROPERTIES]
+    + [f'subject.{p}' for p in GENERIC_RETURN_PROPERTIES]
+    + [f'evidence.{p}' for p in GENERIC_RETURN_PROPERTIES]
+    + [f'relevance.{p}' for p in GENERIC_RETURN_PROPERTIES]
+    + [f'evidenceLevel.{p}' for p in GENERIC_RETURN_PROPERTIES]
+    + ['reviewStatus']
 )
 
 
-ONCOKB_SOURCE_NAME = "oncokb"
-TSO500_SOURCE_NAME = "tso500"
-ONCOGENE = "oncogenic"
-TUMOUR_SUPPRESSIVE = "tumour suppressive"
-CANCER_GENE = "cancer gene"
-FUSION_NAMES = ["structural variant", "fusion"]
+ONCOKB_SOURCE_NAME = 'oncokb'
+TSO500_SOURCE_NAME = 'tso500'
+ONCOGENE = 'oncogenic'
+TUMOUR_SUPPRESSIVE = 'tumour suppressive'
+CANCER_GENE = 'cancer gene'
+FUSION_NAMES = ['structural variant', 'fusion']
 
-GSC_PHARMACOGENOMIC_SOURCE_EXCLUDE_LIST = ["cancer genome interpreter", "civic"]
-GSC_PHARMACOGENOMIC_SOURCE_DISPLAYNAME_EXCLUDE_LIST = ["CGI", "CIViC"]
+GSC_PHARMACOGENOMIC_SOURCE_EXCLUDE_LIST = ['cancer genome interpreter', 'civic']
+GSC_PHARMACOGENOMIC_SOURCE_DISPLAYNAME_EXCLUDE_LIST = ['CGI', 'CIViC']
 
-BASE_THERAPEUTIC_TERMS = ["therapeutic efficacy", "eligibility"]
+BASE_THERAPEUTIC_TERMS = ['therapeutic efficacy', 'eligibility']
 # the order here is the order these are applied, the first category matched is returned
 RELEVANCE_BASE_TERMS: CategoryBaseTermMapping = [
-    ("therapeutic", BASE_THERAPEUTIC_TERMS),
-    ("diagnostic", ["diagnostic indicator"]),
-    ("prognostic", ["prognostic indicator"]),
-    ("pharmacogenomic", ["metabolism", "toxicity", "dosage"]),
-    ("cancer predisposition", ["pathogenic"]),
-    ("biological", ["functional effect", "tumourigenesis", "predisposing"]),
+    ('therapeutic', BASE_THERAPEUTIC_TERMS),
+    ('diagnostic', ['diagnostic indicator']),
+    ('prognostic', ['prognostic indicator']),
+    ('pharmacogenomic', ['metabolism', 'toxicity', 'dosage']),
+    ('cancer predisposition', ['pathogenic']),
+    ('biological', ['functional effect', 'tumourigenesis', 'predisposing']),
 ]
-FAILED_REVIEW_STATUS = "failed"
+FAILED_REVIEW_STATUS = 'failed'
 
-CHROMOSOMES_HG38 = [f"chr{i}" for i in range(1, 23)] + ["chrX", "chrY", "chrM"]
-CHROMOSOMES_HG19 = [str(i) for i in range(1, 23)] + ["x", "y", "mt"]
+CHROMOSOMES_HG38 = [f'chr{i}' for i in range(1, 23)] + ['chrX', 'chrY', 'chrM']
+CHROMOSOMES_HG19 = [str(i) for i in range(1, 23)] + ['x', 'y', 'mt']
 CHROMOSOMES = CHROMOSOMES_HG38 + CHROMOSOMES_HG19
 
-AMBIGUOUS_AA = ["x", "?", "X"]
+AMBIGUOUS_AA = ['x', '?', 'X']
 AA_3to1_MAPPING = {
-    "Ala": "A",
-    "Arg": "R",
-    "Asn": "N",
-    "Asp": "D",
-    "Asx": "B",
-    "Cys": "C",
-    "Glu": "E",
-    "Gln": "Q",
-    "Glx": "Z",
-    "Gly": "G",
-    "His": "H",
-    "Ile": "I",
-    "Leu": "L",
-    "Lys": "K",
-    "Met": "M",
-    "Phe": "F",
-    "Pro": "P",
-    "Ser": "S",
-    "Thr": "T",
-    "Trp": "W",
-    "Tyr": "Y",
-    "Val": "V",
-    "Ter": "*",
+    'Ala': 'A',
+    'Arg': 'R',
+    'Asn': 'N',
+    'Asp': 'D',
+    'Asx': 'B',
+    'Cys': 'C',
+    'Glu': 'E',
+    'Gln': 'Q',
+    'Glx': 'Z',
+    'Gly': 'G',
+    'His': 'H',
+    'Ile': 'I',
+    'Leu': 'L',
+    'Lys': 'K',
+    'Met': 'M',
+    'Phe': 'F',
+    'Pro': 'P',
+    'Ser': 'S',
+    'Thr': 'T',
+    'Trp': 'W',
+    'Tyr': 'Y',
+    'Val': 'V',
+    'Ter': '*',
 }
 
 
@@ -132,89 +127,89 @@ def __getitem__(self, key):
 
 
 INPUT_COPY_CATEGORIES = IterableNamespace(
-    AMP="amplification",
-    ANY_GAIN="copy gain",
-    ANY_LOSS="copy loss",
-    DEEP="deep deletion",
-    GAIN="low level copy gain",
-    LOSS="shallow deletion",
+    AMP='amplification',
+    ANY_GAIN='copy gain',
+    ANY_LOSS='copy loss',
+    DEEP='deep deletion',
+    GAIN='low level copy gain',
+    LOSS='shallow deletion',
 )
 INPUT_EXPRESSION_CATEGORIES = IterableNamespace(
-    UP="increased expression", DOWN="reduced expression"
+    UP='increased expression', DOWN='reduced expression'
 )
 
 # From: https://github.com/bcgsc/pori_graphkb_parser/blob/ae3738842a4c208ab30f58c08ae987594d632504/src/constants.ts#L33-L80
 TYPES_TO_NOTATION: Dict[str, str] = {
-    "acetylation": "ac",
-    "copy gain": "copygain",
-    "copy loss": "copyloss",
-    "deletion": "del",
-    "duplication": "dup",
-    "extension": "ext",
-    "frameshift": "fs",
-    "fusion": "fusion",
-    "indel": "delins",
-    "insertion": "ins",
-    "inversion": "inv",
-    "inverted translocation": "itrans",
-    "methylation": "me",
-    "missense mutation": "mis",
-    "mutation": "mut",
-    "nonsense mutation": ">",
-    "phosphorylation": "phos",
-    "splice-site": "spl",
-    "substitution": ">",
-    "translocation": "trans",
-    "truncating frameshift mutation": "fs",
-    "ubiquitination": "ub",
+    'acetylation': 'ac',
+    'copy gain': 'copygain',
+    'copy loss': 'copyloss',
+    'deletion': 'del',
+    'duplication': 'dup',
+    'extension': 'ext',
+    'frameshift': 'fs',
+    'fusion': 'fusion',
+    'indel': 'delins',
+    'insertion': 'ins',
+    'inversion': 'inv',
+    'inverted translocation': 'itrans',
+    'methylation': 'me',
+    'missense mutation': 'mis',
+    'mutation': 'mut',
+    'nonsense mutation': '>',
+    'phosphorylation': 'phos',
+    'splice-site': 'spl',
+    'substitution': '>',
+    'translocation': 'trans',
+    'truncating frameshift mutation': 'fs',
+    'ubiquitination': 'ub',
     # deprecated forms and aliases
-    "frameshift mutation": "fs",
-    "frameshift truncation": "fs",
-    "missense variant": "mis",
-    "truncating frameshift": "fs",
-    "missense": "mis",
-    "mutations": "mut",
-    "nonsense": ">",
+    'frameshift mutation': 'fs',
+    'frameshift truncation': 'fs',
+    'missense variant': 'mis',
+    'truncating frameshift': 'fs',
+    'missense': 'mis',
+    'mutations': 'mut',
+    'nonsense': '>',
 }
 
 # For match.type_screening() [KBDEV-1056]
-DEFAULT_NON_STRUCTURAL_VARIANT_TYPE = "mutation"
+DEFAULT_NON_STRUCTURAL_VARIANT_TYPE = 'mutation'
 STRUCTURAL_VARIANT_SIZE_THRESHOLD = 48  # bp
 STRUCTURAL_VARIANT_TYPES = [
-    "structural variant",
-    "insertion",
-    "in-frame insertion",
-    "deletion",
-    "deletion polymorphism",
-    "in-frame deletion",
-    "translocation",
-    "inverted translocation",
-    "inversion",
-    "indel",
-    "fusion",
-    "out-of-frame fusion",
-    "oncogenic fusion",
-    "in-frame fusion",
-    "disruptive fusion",
-    "duplication",
-    "internal duplication",
-    "tandem duplication",
-    "internal tandem duplication",
-    "itd",
-    "domain duplication",
-    "kinase domain duplication",
-    "copy variant",
-    "copy number variation",
-    "copy number variant",
-    "copy loss",
-    "copy number loss",
-    "shallow deletion",
-    "deep deletion",
-    "gene deletion",
-    "copy gain",
-    "copy number gain",
-    "low level copy gain",
-    "amplification",
-    "focal amplification",
-    "rearrangement",
+    'structural variant',
+    'insertion',
+    'in-frame insertion',
+    'deletion',
+    'deletion polymorphism',
+    'in-frame deletion',
+    'translocation',
+    'inverted translocation',
+    'inversion',
+    'indel',
+    'fusion',
+    'out-of-frame fusion',
+    'oncogenic fusion',
+    'in-frame fusion',
+    'disruptive fusion',
+    'duplication',
+    'internal duplication',
+    'tandem duplication',
+    'internal tandem duplication',
+    'itd',
+    'domain duplication',
+    'kinase domain duplication',
+    'copy variant',
+    'copy number variation',
+    'copy number variant',
+    'copy loss',
+    'copy number loss',
+    'shallow deletion',
+    'deep deletion',
+    'gene deletion',
+    'copy gain',
+    'copy number gain',
+    'low level copy gain',
+    'amplification',
+    'focal amplification',
+    'rearrangement',
 ]
diff --git a/pori_python/graphkb/genes.py b/pori_python/graphkb/genes.py
index e61e3cf5..09da3ed7 100644
--- a/pori_python/graphkb/genes.py
+++ b/pori_python/graphkb/genes.py
@@ -17,7 +17,6 @@
     GSC_PHARMACOGENOMIC_SOURCE_EXCLUDE_LIST,
     ONCOGENE,
     ONCOKB_SOURCE_NAME,
-    PREFERRED_GENE_SOURCE,
     PREFERRED_GENE_SOURCE_NAME,
     RELEVANCE_BASE_TERMS,
     TSO500_SOURCE_NAME,
@@ -35,14 +34,14 @@ def _get_tumourigenesis_genes_list(
         List[Statement],
         conn.query(
             {
-                "target": "Statement",
-                "filters": {
-                    "AND": [
-                        {"source": {"target": "Source", "filters": {"name": sources}}},
-                        {"relevance": {"target": "Vocabulary", "filters": {"name": relevance}}},
+                'target': 'Statement',
+                'filters': {
+                    'AND': [
+                        {'source': {'target': 'Source', 'filters': {'name': sources}}},
+                        {'relevance': {'target': 'Vocabulary', 'filters': {'name': relevance}}},
                     ]
                 },
-                "returnProperties": [f"subject.{prop}" for prop in GENE_RETURN_PROPERTIES],
+                'returnProperties': [f'subject.{prop}' for prop in GENE_RETURN_PROPERTIES],
             },
             ignore_cache=ignore_cache,
         ),
@@ -51,9 +50,9 @@ def _get_tumourigenesis_genes_list(
     genes: Dict[str, Ontology] = {}
 
     for statement in statements:
-        if statement["subject"].get("biotype", "") == "gene":
-            record_id = statement["subject"]["@rid"]
-            genes[record_id] = statement["subject"]
+        if statement['subject'].get('biotype', '') == 'gene':
+            record_id = statement['subject']['@rid']
+            genes[record_id] = statement['subject']
 
     return [gene for gene in genes.values()]
 
@@ -101,35 +100,35 @@ def get_therapeutic_associated_genes(graphkb_conn: GraphKBConnection) -> List[On
     therapeutic_relevance = get_terms_set(graphkb_conn, BASE_THERAPEUTIC_TERMS)
     statements = graphkb_conn.query(
         {
-            "target": "Statement",
-            "filters": {"relevance": sorted(list(therapeutic_relevance))},
-            "returnProperties": ["reviewStatus"]
-            + [f"conditions.{prop}" for prop in GENE_RETURN_PROPERTIES]
+            'target': 'Statement',
+            'filters': {'relevance': sorted(list(therapeutic_relevance))},
+            'returnProperties': ['reviewStatus']
+            + [f'conditions.{prop}' for prop in GENE_RETURN_PROPERTIES]
             + [
-                f"conditions.reference{ref}.{prop}"
+                f'conditions.reference{ref}.{prop}'
                 for prop in GENE_RETURN_PROPERTIES
-                for ref in ("1", "2")
+                for ref in ('1', '2')
             ],
         }
     )
     genes: List[Ontology] = []
     for statement in statements:
         statement = cast(Statement, statement)
-        if statement["reviewStatus"] == "failed":
+        if statement['reviewStatus'] == 'failed':
             continue
-        for condition in statement["conditions"]:
-            if condition["@class"] == "Feature":
+        for condition in statement['conditions']:
+            if condition['@class'] == 'Feature':
                 genes.append(condition)
-            elif condition["@class"].endswith("Variant"):
+            elif condition['@class'].endswith('Variant'):
                 cond = cast(Variant, condition)
-                if cond["reference1"] and cond["reference1"]["@class"] == "Feature":
-                    genes.append(cond["reference1"])
-                if cond["reference2"] and cond["reference2"]["@class"] == "Feature":
-                    genes.append(cond["reference2"])
+                if cond['reference1'] and cond['reference1']['@class'] == 'Feature':
+                    genes.append(cond['reference1'])
+                if cond['reference2'] and cond['reference2']['@class'] == 'Feature':
+                    genes.append(cond['reference2'])
     unique_genes: List[Ontology] = []
     for gene in genes:
-        if not gene.get("deprecated", False):
-            if gene["@rid"] not in [g["@rid"] for g in unique_genes]:
+        if not gene.get('deprecated', False):
+            if gene['@rid'] not in [g['@rid'] for g in unique_genes]:
                 unique_genes.append(gene)
     return unique_genes
 
@@ -153,16 +152,16 @@ def get_genes_from_variant_types(
     variant_filters: List[Dict[str, Any]] = []
     if types:
         variant_filters.append(
-            {"type": {"target": "Vocabulary", "filters": {"name": types, "operator": "IN"}}}
+            {'type': {'target': 'Vocabulary', 'filters': {'name': types, 'operator': 'IN'}}}
         )
 
     variants = cast(
         List[Variant],
         conn.query(
             {
-                "target": "Variant",
-                "filters": variant_filters,
-                "returnProperties": ["reference1", "reference2"],
+                'target': 'Variant',
+                'filters': variant_filters,
+                'returnProperties': ['reference1', 'reference2'],
             },
             ignore_cache=ignore_cache,
         ),
@@ -170,23 +169,23 @@ def get_genes_from_variant_types(
 
     genes = set()
     for variant in variants:
-        genes.add(variant["reference1"])
-        if variant["reference2"]:
-            genes.add(variant["reference2"])
+        genes.add(variant['reference1'])
+        if variant['reference2']:
+            genes.add(variant['reference2'])
     if not genes:
         return []
 
-    gene_filters: List[Dict[str, Any]] = [{"biotype": "gene"}]
+    gene_filters: List[Dict[str, Any]] = [{'biotype': 'gene'}]
     if source_record_ids:
-        gene_filters.append({"source": source_record_ids, "operator": "IN"})
+        gene_filters.append({'source': source_record_ids, 'operator': 'IN'})
 
     result = cast(
         List[Ontology],
         conn.query(
             {
-                "target": list(genes),
-                "returnProperties": GENE_RETURN_PROPERTIES,
-                "filters": gene_filters,
+                'target': list(genes),
+                'returnProperties': GENE_RETURN_PROPERTIES,
+                'filters': gene_filters,
             },
             ignore_cache=ignore_cache,
         ),
@@ -210,10 +209,10 @@ def get_preferred_gene_source_rid(
         return preferred_source_name
     result = conn.query(
         {
-            "target": {"target": "Source", "filters": {"name": preferred_source_name}},
-            "queryType": "similarTo",
+            'target': {'target': 'Source', 'filters': {'name': preferred_source_name}},
+            'queryType': 'similarTo',
         }
-    )[0]["@rid"]
+    )[0]['@rid']
     return result
 
 
@@ -235,29 +234,29 @@ def get_preferred_gene_name(
     """
     source_rid = get_preferred_gene_source_rid(conn, source)
     if gene_name in CHROMOSOMES:
-        logger.error(f"{gene_name} assumed to be a chromosome, not gene")
-        return ""
+        logger.error(f'{gene_name} assumed to be a chromosome, not gene')
+        return ''
     eq = get_equivalent_features(conn=conn, gene_name=gene_name)
-    genes = [m for m in eq if m.get("biotype") == "gene" and not m.get("deprecated")]
+    genes = [m for m in eq if m.get('biotype') == 'gene' and not m.get('deprecated')]
     if not genes:
-        logger.error(f"No genes found for: {gene_name}")
-        return ""
+        logger.error(f'No genes found for: {gene_name}')
+        return ''
     if source_rid:
-        source_filtered_genes = [m for m in genes if m.get("source") == source_rid]
+        source_filtered_genes = [m for m in genes if m.get('source') == source_rid]
         if not source_filtered_genes:
-            logger.error(f"No data from source {source_rid} for {gene_name}")
+            logger.error(f'No data from source {source_rid} for {gene_name}')
         else:
             genes = source_filtered_genes
 
-    gene_names = [g["displayName"] for g in genes if g]
+    gene_names = [g['displayName'] for g in genes if g]
     if len(gene_names) > 1:
         logger.error(
-            f"Multiple gene names found for: {gene_name} - using {gene_names[0]}, ignoring {gene_names[1:]}"
+            f'Multiple gene names found for: {gene_name} - using {gene_names[0]}, ignoring {gene_names[1:]}'
         )
     return gene_names[0]
 
 
-@deprecated("Use get_gene_linked_cancer_predisposition_info instead")
+@deprecated('Use get_gene_linked_cancer_predisposition_info instead')
 def get_cancer_predisposition_info(
     conn: GraphKBConnection, source: str = PREFERRED_GENE_SOURCE_NAME
 ) -> Tuple[List[str], Dict[str, str]]:
@@ -267,7 +266,7 @@ def get_cancer_predisposition_info(
 
 
 def get_gene_linked_cancer_predisposition_info(
-    conn: GraphKBConnection, source: str = PREFERRED_GENE_SOURCE
+    conn: GraphKBConnection, source: str = PREFERRED_GENE_SOURCE_NAME
 ) -> Tuple[List[str], Dict[str, Tuple[str, List[str]]]]:
     """
     Return two lists from GraphKB, one of cancer predisposition genes and one of associated variants.
@@ -275,16 +274,15 @@ def get_gene_linked_cancer_predisposition_info(
     GERO-272 - criteria for what counts as a "cancer predisposition" variant
 
     In short:
-    * Statement 'source' is 'CGL'
+    * Statement 'source' is 'CGL' (not related to the preferred gene source)
     * Statement 'relevance' is 'pathogenic'
     * gene is gotten from any associated 'PositionalVariant' records
 
     Example: https://graphkb.bcgsc.ca/view/Statement/155:11616
 
-
-
     Returns:
         genes: list of cancer predisposition genes
+            (using names from the source specified in this function's arguments)
         variants: dictionary mapping pharmacogenomic variant IDs to variant display names
     """
     genes = set()
@@ -293,51 +291,51 @@ def get_gene_linked_cancer_predisposition_info(
     variants: Dict[str, Tuple[str, List[str]]] = {}
 
     terms: dict = {term: lst for term, lst in RELEVANCE_BASE_TERMS}
-    relevance_rids = list(get_terms_set(conn, terms.get("cancer predisposition", [])))
+    relevance_rids = list(get_terms_set(conn, terms.get('cancer predisposition', [])))
     source_rid = get_preferred_gene_source_rid(conn, source)
 
     predisp_statements = [
         cast(Statement, record)
         for record in conn.query(
             {
-                "target": "Statement",
-                "filters": {
-                    "AND": [
+                'target': 'Statement',
+                'filters': {
+                    'AND': [
                         {
-                            "evidence": {
-                                "target": "Source",
-                                "filters": {"@rid": get_rid(conn, "Source", "CGL")},
+                            'evidence': {
+                                'target': 'Source',
+                                'filters': {'@rid': get_rid(conn, 'Source', 'CGL')},
                             }
                         },
                         {
-                            "relevance": {
-                                "target": "Vocabulary",
-                                "filters": {"@rid": relevance_rids},
+                            'relevance': {
+                                'target': 'Vocabulary',
+                                'filters': {'@rid': relevance_rids},
                             }
                         },
                     ]
                 },
-                "returnProperties": [
-                    "conditions.@class",
-                    "conditions.@rid",
-                    "conditions.displayName",
-                    "conditions.reference1.biotype",
-                    "conditions.reference1.displayName",
-                    "conditions.reference2.biotype",
-                    "conditions.reference2.displayName",
+                'returnProperties': [
+                    'conditions.@class',
+                    'conditions.@rid',
+                    'conditions.displayName',
+                    'conditions.reference1.biotype',
+                    'conditions.reference1.displayName',
+                    'conditions.reference2.biotype',
+                    'conditions.reference2.displayName',
                 ],
             },
             ignore_cache=False,
         )
     ]
     for record in predisp_statements:
-        for condition in record["conditions"]:
-            if condition["@class"] == "PositionalVariant":
+        for condition in record['conditions']:
+            if condition['@class'] == 'PositionalVariant':
                 assoc_gene_list: List[str] = []
-                for reference in ["reference1", "reference2"]:
-                    name = (condition.get(reference) or {}).get("displayName", "")  # type: ignore
-                    biotype = (condition.get(reference) or {}).get("biotype", "")  # type: ignore
-                    if name and biotype == "gene":
+                for reference in ['reference1', 'reference2']:
+                    name = (condition.get(reference) or {}).get('displayName', '')  # type: ignore
+                    biotype = (condition.get(reference) or {}).get('biotype', '')  # type: ignore
+                    if name and biotype == 'gene':
                         genes.add(name)
                         assoc_gene_list.append(name)
                     elif name:
@@ -348,9 +346,9 @@ def get_gene_linked_cancer_predisposition_info(
                         else:
                             non_genes.add((name, biotype))
                             logger.error(
-                                f"Non-gene cancer predisposition {biotype}: {name} for {condition['displayName']}"
+                                f'Non-gene cancer predisposition {biotype}: {name} for {condition["displayName"]}'
                             )
-                variants[condition["@rid"]] = (condition["displayName"], assoc_gene_list)
+                variants[condition['@rid']] = (condition['displayName'], assoc_gene_list)
 
     for gene, name, biotype in infer_genes:
         logger.debug(f"Found gene '{gene}' for '{name}' ({biotype})")
@@ -362,7 +360,7 @@ def get_gene_linked_cancer_predisposition_info(
     return sorted(genes), variants
 
 
-@deprecated("Use get_gene_linked_pharmacogenomic_info instead")
+@deprecated('Use get_gene_linked_pharmacogenomic_info instead')
 def get_pharmacogenomic_info(
     conn: GraphKBConnection, source: str = PREFERRED_GENE_SOURCE_NAME
 ) -> Tuple[List[str], Dict[str, str]]:
@@ -372,7 +370,7 @@ def get_pharmacogenomic_info(
 
 
 def get_gene_linked_pharmacogenomic_info(
-    conn: GraphKBConnection, source: str = PREFERRED_GENE_SOURCE
+    conn: GraphKBConnection, source: str = PREFERRED_GENE_SOURCE_NAME
 ) -> Tuple[List[str], Dict[str, Tuple[str, List[str]]]]:
     """
     Return two lists from GraphKB, one of pharmacogenomic genes and one of associated variants.
@@ -395,39 +393,39 @@ def get_gene_linked_pharmacogenomic_info(
     infer_genes = set()
     variants: Dict[str, Tuple] = {}
 
-    relevance_rids = list(get_terms_set(conn, "pharmacogenomic"))
+    relevance_rids = list(get_terms_set(conn, 'pharmacogenomic'))
     source_rid = get_preferred_gene_source_rid(conn, source)
 
     for record in conn.query(
         {
-            "target": "Statement",
-            "filters": [
-                {"relevance": {"target": "Vocabulary", "filters": {"@rid": relevance_rids}}}
+            'target': 'Statement',
+            'filters': [
+                {'relevance': {'target': 'Vocabulary', 'filters': {'@rid': relevance_rids}}}
             ],
-            "returnProperties": [
-                "conditions.@class",
-                "conditions.@rid",
-                "conditions.displayName",
-                "conditions.reference1.biotype",
-                "conditions.reference1.displayName",
-                "conditions.reference2.biotype",
-                "conditions.reference2.displayName",
-                "source.name",
+            'returnProperties': [
+                'conditions.@class',
+                'conditions.@rid',
+                'conditions.displayName',
+                'conditions.reference1.biotype',
+                'conditions.reference1.displayName',
+                'conditions.reference2.biotype',
+                'conditions.reference2.displayName',
+                'source.name',
             ],
         },
         ignore_cache=False,
     ):
-        if record["source"]:  # type: ignore
-            if record["source"]["name"].lower() in GSC_PHARMACOGENOMIC_SOURCE_EXCLUDE_LIST:  # type: ignore
+        if record['source']:  # type: ignore
+            if record['source']['name'].lower() in GSC_PHARMACOGENOMIC_SOURCE_EXCLUDE_LIST:  # type: ignore
                 continue
 
-        for condition in record["conditions"]:  # type: ignore
-            if condition["@class"] == "PositionalVariant":
+        for condition in record['conditions']:  # type: ignore
+            if condition['@class'] == 'PositionalVariant':
                 assoc_gene_list = []
-                for reference in ["reference1", "reference2"]:
-                    name = (condition.get(reference) or {}).get("displayName", "")
-                    biotype = (condition.get(reference) or {}).get("biotype", "")
-                    if name and biotype == "gene":
+                for reference in ['reference1', 'reference2']:
+                    name = (condition.get(reference) or {}).get('displayName', '')
+                    biotype = (condition.get(reference) or {}).get('biotype', '')
+                    if name and biotype == 'gene':
                         genes.add(name)
                         assoc_gene_list.append(name)
                     elif name:
@@ -438,9 +436,9 @@ def get_gene_linked_pharmacogenomic_info(
                         else:
                             non_genes.add((name, biotype))
                             logger.error(
-                                f"Non-gene pharmacogenomic {biotype}: {name} for {condition['displayName']}"
+                                f'Non-gene pharmacogenomic {biotype}: {name} for {condition["displayName"]}'
                             )
-                variants[condition["@rid"]] = (condition["displayName"], assoc_gene_list)
+                variants[condition['@rid']] = (condition['displayName'], assoc_gene_list)
     for gene, name, biotype in infer_genes:
         logger.debug(f"Found gene '{gene}' for '{name}' ({biotype})")
         genes.add(gene)
@@ -452,7 +450,7 @@ def get_gene_linked_pharmacogenomic_info(
 
 
 def convert_to_rid_set(records: List[Record] | List[Ontology]) -> Set[str]:
-    return {r["@rid"] for r in records}
+    return {r['@rid'] for r in records}
 
 
 def get_gene_information(
@@ -479,59 +477,59 @@ def get_gene_information(
                   'name': 'TERT',
                   'oncogene': True}]
     """
-    logger.info("fetching variant related genes list")
+    logger.info('fetching variant related genes list')
     # For query speed, only fetch the minimum needed details
     ret_props = [
-        "conditions.@rid",
-        "conditions.@class",
-        "conditions.reference1",
-        "conditions.reference2",
-        "reviewStatus",
+        'conditions.@rid',
+        'conditions.@class',
+        'conditions.reference1',
+        'conditions.reference2',
+        'reviewStatus',
     ]
-    body: Dict[str, Any] = {"target": "Statement", "returnProperties": ret_props}
+    body: Dict[str, Any] = {'target': 'Statement', 'returnProperties': ret_props}
 
     gene_names = sorted(set(gene_names))
     statements = graphkb_conn.query(body)
-    statements = [s for s in statements if s.get("reviewStatus") != FAILED_REVIEW_STATUS]
+    statements = [s for s in statements if s.get('reviewStatus') != FAILED_REVIEW_STATUS]
 
     gene_flags: Dict[str, Set[str]] = {
-        "kbStatementRelated": set(),
-        "knownFusionPartner": set(),
-        "knownSmallMutation": set(),
+        'kbStatementRelated': set(),
+        'knownFusionPartner': set(),
+        'knownSmallMutation': set(),
     }
 
     for statement in statements:
         statement = cast(Statement, statement)
-        for condition in statement["conditions"]:
+        for condition in statement['conditions']:
             # ignore types, as there can be various types of conditions
-            if condition.get("reference1"):
-                gene_flags["kbStatementRelated"].add(condition["reference1"])  # type: ignore
-                if condition.get("reference2"):
+            if condition.get('reference1'):
+                gene_flags['kbStatementRelated'].add(condition['reference1'])  # type: ignore
+                if condition.get('reference2'):
                     # Having a reference2 implies the event is a fusion
-                    gene_flags["kbStatementRelated"].add(condition["reference2"])  # type: ignore
-                    gene_flags["knownFusionPartner"].add(condition["reference1"])  # type: ignore
-                    gene_flags["knownFusionPartner"].add(condition["reference2"])  # type: ignore
-                elif condition["@class"] == "PositionalVariant":
+                    gene_flags['kbStatementRelated'].add(condition['reference2'])  # type: ignore
+                    gene_flags['knownFusionPartner'].add(condition['reference1'])  # type: ignore
+                    gene_flags['knownFusionPartner'].add(condition['reference2'])  # type: ignore
+                elif condition['@class'] == 'PositionalVariant':
                     # PositionalVariant without a reference2 implies a smallMutation type
-                    gene_flags["knownSmallMutation"].add(condition["reference1"])  # type: ignore
+                    gene_flags['knownSmallMutation'].add(condition['reference1'])  # type: ignore
 
-    logger.info("fetching oncogenes list")
-    gene_flags["oncogene"] = convert_to_rid_set(get_oncokb_oncogenes(graphkb_conn))
-    logger.info("fetching tumour supressors list")
-    gene_flags["tumourSuppressor"] = convert_to_rid_set(get_oncokb_tumour_supressors(graphkb_conn))
-    logger.info("fetching cancerGeneListMatch list")
-    gene_flags["cancerGeneListMatch"] = convert_to_rid_set(get_cancer_genes(graphkb_conn))
+    logger.info('fetching oncogenes list')
+    gene_flags['oncogene'] = convert_to_rid_set(get_oncokb_oncogenes(graphkb_conn))
+    logger.info('fetching tumour supressors list')
+    gene_flags['tumourSuppressor'] = convert_to_rid_set(get_oncokb_tumour_supressors(graphkb_conn))
+    logger.info('fetching cancerGeneListMatch list')
+    gene_flags['cancerGeneListMatch'] = convert_to_rid_set(get_cancer_genes(graphkb_conn))
 
-    logger.info("fetching therapeutic associated genes lists")
-    gene_flags["therapeuticAssociated"] = convert_to_rid_set(
+    logger.info('fetching therapeutic associated genes lists')
+    gene_flags['therapeuticAssociated'] = convert_to_rid_set(
         get_therapeutic_associated_genes(graphkb_conn)
     )
 
-    logger.info(f"Setting gene_info flags on {len(gene_names)} genes")
+    logger.info(f'Setting gene_info flags on {len(gene_names)} genes')
     result: List[IprGene] = []
     for gene_name in gene_names:
         equivalent = convert_to_rid_set(get_equivalent_features(graphkb_conn, gene_name))
-        row: Dict[str, str | bool] = {"name": gene_name}
+        row: Dict[str, str | bool] = {'name': gene_name}
         flagged = False
         for flag in gene_flags:
             # make smaller JSON to upload since all default to false already
diff --git a/pori_python/graphkb/match.py b/pori_python/graphkb/match.py
index 0c791383..29c8cf32 100644
--- a/pori_python/graphkb/match.py
+++ b/pori_python/graphkb/match.py
@@ -46,8 +46,8 @@ def get_equivalent_features(
     gene_name: str,
     ignore_cache: bool = False,
     is_source_id: bool = False,
-    source: str = "",
-    source_id_version: str = "",
+    source: str = '',
+    source_id_version: str = '',
 ) -> List[Ontology]:
     """Match an equivalent list of features given some input feature name (or ID).
 
@@ -76,36 +76,36 @@ def get_equivalent_features(
         return cast(
             List[Ontology],
             conn.query(
-                {"target": [gene_name], "queryType": "similarTo"}, ignore_cache=ignore_cache
+                {'target': [gene_name], 'queryType': 'similarTo'}, ignore_cache=ignore_cache
             ),
         )
 
     filters: List[Dict] = []
     if source:
-        filters.append({"source": {"target": "Source", "filters": {"name": source}}})
+        filters.append({'source': {'target': 'Source', 'filters': {'name': source}}})
 
-    if gene_name.count(".") == 1 and gene_name.split(".")[-1].isnumeric():
+    if gene_name.count('.') == 1 and gene_name.split('.')[-1].isnumeric():
         # eg. ENSG00000133703.11 or NM_033360.4
         logger.debug(
-            f"Assuming {gene_name} has a .version_format - ignoring the version for equivalent features"
+            f'Assuming {gene_name} has a .version_format - ignoring the version for equivalent features'
         )
-        gene_name = gene_name.split(".")[0]
+        gene_name = gene_name.split('.')[0]
 
     if is_source_id or source_id_version:
-        filters.append({"sourceId": gene_name})
+        filters.append({'sourceId': gene_name})
         if source_id_version:
             filters.append(
-                {"OR": [{"sourceIdVersion": source_id_version}, {"sourceIdVersion": None}]}
+                {'OR': [{'sourceIdVersion': source_id_version}, {'sourceIdVersion': None}]}
             )
     elif FEATURES_CACHE and gene_name.lower() not in FEATURES_CACHE and not ignore_cache:
         return []
     else:
-        filters.append({"OR": [{"sourceId": gene_name}, {"name": gene_name}]})
+        filters.append({'OR': [{'sourceId': gene_name}, {'name': gene_name}]})
 
     return cast(
         List[Ontology],
         conn.query(
-            {"target": {"target": "Feature", "filters": filters}, "queryType": "similarTo"},
+            {'target': {'target': 'Feature', 'filters': filters}, 'queryType': 'similarTo'},
             ignore_cache=ignore_cache,
         ),
     )
@@ -118,24 +118,24 @@ def cache_missing_features(conn: GraphKBConnection) -> None:
     """
     genes = cast(
         List[Ontology],
-        conn.query({"target": "Feature", "returnProperties": ["name", "sourceId"], "neighbors": 0}),
+        conn.query({'target': 'Feature', 'returnProperties': ['name', 'sourceId'], 'neighbors': 0}),
     )
     for gene in genes:
-        if gene["name"]:
-            FEATURES_CACHE.add(gene["name"].lower())
-        if gene["sourceId"]:
-            FEATURES_CACHE.add(gene["sourceId"].lower())
+        if gene['name']:
+            FEATURES_CACHE.add(gene['name'].lower())
+        if gene['sourceId']:
+            FEATURES_CACHE.add(gene['sourceId'].lower())
 
 
 def match_category_variant(
     conn: GraphKBConnection,
     reference_name: str,
     category: str,
-    root_exclude_term: str = "",
-    gene_source: str = "",
+    root_exclude_term: str = '',
+    gene_source: str = '',
     gene_is_source_id: bool = False,
     ignore_cache: bool = False,
-    reference_class: str = "Feature",
+    reference_class: str = 'Feature',
 ) -> List[Variant]:
     """
     Returns a list of variants matching the input variant
@@ -155,7 +155,7 @@ def match_category_variant(
     """
     # disambiguate the reference to find all equivalent representations
     references: List[str] = []
-    if reference_class == "Feature":
+    if reference_class == 'Feature':
         references = convert_to_rid_list(
             get_equivalent_features(
                 conn,
@@ -167,14 +167,14 @@ def match_category_variant(
         )
         if not references:
             raise FeatureNotFoundError(
-                f"unable to find the gene ({reference_name}) or any equivalent representations"
+                f'unable to find the gene ({reference_name}) or any equivalent representations'
             )
-    if reference_class == "Signature":
+    if reference_class == 'Signature':
         references = convert_to_rid_list(
             get_equivalent_terms(
                 conn,
                 reference_name.lower(),
-                ontology_class="Signature",
+                ontology_class='Signature',
                 ignore_cache=ignore_cache,
             )
         )
@@ -185,24 +185,24 @@ def match_category_variant(
     )
 
     if not types:
-        raise ValueError(f"unable to find the term/category ({category}) or any equivalent")
+        raise ValueError(f'unable to find the term/category ({category}) or any equivalent')
 
     # find the variant list
     return cast(
         List[Variant],
         conn.query(
             {
-                "target": {
-                    "target": "CategoryVariant",
-                    "filters": [
-                        {"reference1": references, "operator": "IN"},
-                        {"type": types, "operator": "IN"},
+                'target': {
+                    'target': 'CategoryVariant',
+                    'filters': [
+                        {'reference1': references, 'operator': 'IN'},
+                        {'type': types, 'operator': 'IN'},
                     ],
                 },
-                "queryType": "similarTo",
-                "edges": ["AliasOf", "DeprecatedBy", "CrossReferenceOf", "GeneralizationOf"],
-                "treeEdges": ["Infers"],
-                "returnProperties": VARIANT_RETURN_PROPERTIES,
+                'queryType': 'similarTo',
+                'edges': ['AliasOf', 'DeprecatedBy', 'CrossReferenceOf', 'GeneralizationOf'],
+                'treeEdges': ['Infers'],
+                'returnProperties': VARIANT_RETURN_PROPERTIES,
             },
             ignore_cache=ignore_cache,
         ),
@@ -228,14 +228,14 @@ def match_copy_variant(
         List of variant records from GraphKB which match the input
     """
     if category not in INPUT_COPY_CATEGORIES.values():
-        raise ValueError(f"not a valid copy variant input category ({category})")
+        raise ValueError(f'not a valid copy variant input category ({category})')
 
     result = match_category_variant(
-        conn, gene_name, category, root_exclude_term="structural variant", **kwargs
+        conn, gene_name, category, root_exclude_term='structural variant', **kwargs
     )
 
     if drop_homozygous:
-        return [row for row in result if row["zygosity"] != "homozygous"]
+        return [row for row in result if row['zygosity'] != 'homozygous']
     return result
 
 
@@ -243,10 +243,10 @@ def match_expression_variant(
     conn: GraphKBConnection, gene_name: str, category: str, **kwargs
 ) -> List[Variant]:
     if category not in INPUT_EXPRESSION_CATEGORIES.values():
-        raise ValueError(f"not a valid expression variant input category ({category})")
+        raise ValueError(f'not a valid expression variant input category ({category})')
 
     return match_category_variant(
-        conn, gene_name, category, root_exclude_term="biological", **kwargs
+        conn, gene_name, category, root_exclude_term='biological', **kwargs
     )
 
 
@@ -270,19 +270,19 @@ def positions_overlap(
     Returns:
         bool: True if the positions overlap
     """
-    if pos_record.get("@class", "") == "CytobandPosition":
+    if pos_record.get('@class', '') == 'CytobandPosition':
         raise NotImplementedError(
-            "Position comparison for cytoband coordinates is not yet implemented"
+            'Position comparison for cytoband coordinates is not yet implemented'
         )
 
-    pos = pos_record.get("pos", None)
+    pos = pos_record.get('pos', None)
     if pos is None:
         return True
 
-    start = range_start.get("pos", None)
+    start = range_start.get('pos', None)
 
     if range_end:
-        end = range_end.get("pos", None)
+        end = range_end.get('pos', None)
 
         if start is not None and pos < start:
             return False
@@ -315,15 +315,15 @@ def equivalent_types(
 
     # Convert rid to displayName if needed
     if looks_like_rid(type1):
-        type1 = conn.get_records_by_id([type1])[0]["displayName"]
+        type1 = conn.get_records_by_id([type1])[0]['displayName']
     if looks_like_rid(type2):
-        type2 = conn.get_records_by_id([type2])[0]["displayName"]
+        type2 = conn.get_records_by_id([type2])[0]['displayName']
 
     # Get type terms from observed variant
     terms1 = []
     if strict:
         try:
-            terms1.append(get_term_by_name(conn, type1)["@rid"])
+            terms1.append(get_term_by_name(conn, type1)['@rid'])
         except Exception:
             pass
     else:
@@ -375,12 +375,12 @@ def compare_positional_variants(
     # For break1, check if positions are overlaping between the variant and the reference.
     # Continue only if True.
     if not positions_overlap(
-        cast(BasicPosition, variant["break1Start"]),
-        cast(BasicPosition, reference_variant["break1Start"]),
+        cast(BasicPosition, variant['break1Start']),
+        cast(BasicPosition, reference_variant['break1Start']),
         (
             None
-            if "break1End" not in reference_variant
-            else cast(BasicPosition, reference_variant["break1End"])
+            if 'break1End' not in reference_variant
+            else cast(BasicPosition, reference_variant['break1End'])
         ),
     ):
         return False
@@ -388,16 +388,16 @@ def compare_positional_variants(
     # For break2, check if positions are overlaping between the variant and the reference.
     # Continue only if True or no break2.
     # TODO: check for variant without break2 but reference_variant with one.
-    if variant.get("break2Start"):
-        if not reference_variant.get("break2Start"):
+    if variant.get('break2Start'):
+        if not reference_variant.get('break2Start'):
             return False
         if not positions_overlap(
-            cast(BasicPosition, variant["break2Start"]),
-            cast(BasicPosition, reference_variant["break2Start"]),
+            cast(BasicPosition, variant['break2Start']),
+            cast(BasicPosition, reference_variant['break2Start']),
             (
                 None
-                if "break2End" not in reference_variant
-                else cast(BasicPosition, reference_variant["break2End"])
+                if 'break2End' not in reference_variant
+                else cast(BasicPosition, reference_variant['break2End'])
             ),
         ):
             return False
@@ -405,47 +405,47 @@ def compare_positional_variants(
     # If both variants have untemplated sequence,
     # check for size and content.
     if (
-        variant.get("untemplatedSeq", None) is not None
-        and reference_variant.get("untemplatedSeq", None) is not None
+        variant.get('untemplatedSeq', None) is not None
+        and reference_variant.get('untemplatedSeq', None) is not None
     ):
         if (
-            variant.get("untemplatedSeqSize", None) is not None
-            and reference_variant.get("untemplatedSeqSize", None) is not None
+            variant.get('untemplatedSeqSize', None) is not None
+            and reference_variant.get('untemplatedSeqSize', None) is not None
         ):
-            if variant["untemplatedSeqSize"] != reference_variant["untemplatedSeqSize"]:
+            if variant['untemplatedSeqSize'] != reference_variant['untemplatedSeqSize']:
                 return False
 
         if (
-            reference_variant["untemplatedSeq"] is not None
-            and variant["untemplatedSeq"] is not None
+            reference_variant['untemplatedSeq'] is not None
+            and variant['untemplatedSeq'] is not None
         ):
             if (
-                reference_variant["untemplatedSeq"] not in AMBIGUOUS_AA
-                and variant["untemplatedSeq"] not in AMBIGUOUS_AA
+                reference_variant['untemplatedSeq'] not in AMBIGUOUS_AA
+                and variant['untemplatedSeq'] not in AMBIGUOUS_AA
             ):
-                if reference_variant["untemplatedSeq"].lower() != variant["untemplatedSeq"].lower():
+                if reference_variant['untemplatedSeq'].lower() != variant['untemplatedSeq'].lower():
                     return False
-            elif len(variant["untemplatedSeq"]) != len(reference_variant["untemplatedSeq"]):
+            elif len(variant['untemplatedSeq']) != len(reference_variant['untemplatedSeq']):
                 return False
 
     # If both variants have a reference sequence,
     # check if they are the same.
     if (
-        variant.get("refSeq", None) is not None
-        and reference_variant.get("refSeq", None) is not None
+        variant.get('refSeq', None) is not None
+        and reference_variant.get('refSeq', None) is not None
     ):
         if (
-            reference_variant["refSeq"] not in AMBIGUOUS_AA
-            and variant["refSeq"] not in AMBIGUOUS_AA
+            reference_variant['refSeq'] not in AMBIGUOUS_AA
+            and variant['refSeq'] not in AMBIGUOUS_AA
         ):
-            if reference_variant["refSeq"].lower() != variant["refSeq"].lower():  # type: ignore
+            if reference_variant['refSeq'].lower() != variant['refSeq'].lower():  # type: ignore
                 return False
-        elif len(variant["refSeq"]) != len(reference_variant["refSeq"]):  # type: ignore
+        elif len(variant['refSeq']) != len(reference_variant['refSeq']):  # type: ignore
             return False
 
     # Equivalent types
-    if variant.get("type") and reference_variant.get("type"):
-        if not equivalent_types(conn, variant["type"], reference_variant["type"]):
+    if variant.get('type') and reference_variant.get('type'):
+        if not equivalent_types(conn, variant['type'], reference_variant['type']):
             return False
 
     return True
@@ -500,38 +500,38 @@ def type_screening(
 
     # Will use either hardcoded type list or an updated list from the API
     if updateStructuralTypes:
-        rids = list(get_terms_set(conn, ["structural variant"]))
+        rids = list(get_terms_set(conn, ['structural variant']))
         records = conn.get_records_by_id(rids)
-        structuralVariantTypes = [el["name"] for el in records]
+        structuralVariantTypes = [el['name'] for el in records]
 
     # Unambiguous non-structural variation type
-    if parsed["type"] not in structuralVariantTypes:
-        return parsed["type"]
+    if parsed['type'] not in structuralVariantTypes:
+        return parsed['type']
 
     # Unambiguous structural variation type
-    if parsed["type"] in ["fusion", "translocation"]:
-        return parsed["type"]
-    if parsed.get("reference2", None):
-        return parsed["type"]
-    prefix = parsed.get("prefix", "g")
-    if prefix == "y":  # Assuming all variations using cytoband coordiantes meet the size threshold
-        return parsed["type"]
+    if parsed['type'] in ['fusion', 'translocation']:
+        return parsed['type']
+    if parsed.get('reference2', None):
+        return parsed['type']
+    prefix = parsed.get('prefix', 'g')
+    if prefix == 'y':  # Assuming all variations using cytoband coordiantes meet the size threshold
+        return parsed['type']
 
     # When size cannot be determined: exonic and intronic coordinates
     # e.g. "MET:e.14del" meaning "Any deletion occuring at the 14th exon"
-    if prefix in ["e", "i"]:  # Assuming they don't meet the size threshold
+    if prefix in ['e', 'i']:  # Assuming they don't meet the size threshold
         return default_type
 
     # When size is given
-    if (parsed.get("untemplatedSeqSize") or 0) >= threshold:
-        return parsed["type"]
+    if (parsed.get('untemplatedSeqSize') or 0) >= threshold:
+        return parsed['type']
 
     # When size needs to be computed from positions
-    pos_start: int = parsed.get("break1Start", {}).get("pos", 1)  # type: ignore
-    pos_end: int = parsed.get("break2Start", {}).get("pos", pos_start)  # type: ignore
-    pos_size = 3 if prefix == "p" else 1
+    pos_start: int = parsed.get('break1Start', {}).get('pos', 1)  # type: ignore
+    pos_end: int = parsed.get('break2Start', {}).get('pos', pos_start)  # type: ignore
+    pos_size = 3 if prefix == 'p' else 1
     if ((pos_end - pos_start) + 1) * pos_size >= threshold:
-        return parsed["type"]
+        return parsed['type']
 
     # Default
     return default_type
@@ -543,7 +543,7 @@ def match_positional_variant(
     reference1: Optional[str] = None,
     reference2: Optional[str] = None,
     gene_is_source_id: bool = False,
-    gene_source: str = "",
+    gene_source: str = '',
     ignore_cache: bool = False,
     updateStructuralTypes: bool = False,
 ) -> List[Variant]:
@@ -590,21 +590,21 @@ def match_positional_variant(
     # parse the representation
     parsed = conn.parse(variant_string, not (reference1 or reference2))
 
-    if "break1End" in parsed or "break2End" in parsed:  # uncertain position
+    if 'break1End' in parsed or 'break2End' in parsed:  # uncertain position
         raise NotImplementedError(
-            f"Matching does not support uncertain positions ({variant_string}) as input"
+            f'Matching does not support uncertain positions ({variant_string}) as input'
         )
     if reference2 and not reference1:
-        raise ValueError("cannot specify reference2 without reference1")
+        raise ValueError('cannot specify reference2 without reference1')
     # disambiguate the gene name
     if reference1:
         gene1 = reference1
-        if "reference1" in parsed:
+        if 'reference1' in parsed:
             raise ValueError(
-                "Cannot specify reference1 explicitly as well as in the variant notation"
+                'Cannot specify reference1 explicitly as well as in the variant notation'
             )
     else:
-        gene1 = parsed["reference1"]
+        gene1 = parsed['reference1']
 
     gene1_features = get_equivalent_features(
         conn, gene1, source=gene_source, is_source_id=gene_is_source_id, ignore_cache=ignore_cache
@@ -613,7 +613,7 @@ def match_positional_variant(
 
     if not features:
         raise FeatureNotFoundError(
-            f"unable to find the gene ({gene1}) or any equivalent representations"
+            f'unable to find the gene ({gene1}) or any equivalent representations'
         )
 
     secondary_features = None
@@ -621,20 +621,20 @@ def match_positional_variant(
     gene2: Optional[str] = None
     if reference2:
         gene2 = reference2
-        if "reference2" in parsed:
+        if 'reference2' in parsed:
             raise ValueError(
-                "Cannot specify reference2 explicitly as well as in the variant notation"
+                'Cannot specify reference2 explicitly as well as in the variant notation'
             )
-        elif "reference1" in parsed:
+        elif 'reference1' in parsed:
             raise ValueError(
-                "variant notation cannot contain features when explicit features are given"
+                'variant notation cannot contain features when explicit features are given'
             )
     elif (
-        "reference2" in parsed
-        and parsed.get("reference2", "?") != "?"
-        and parsed["reference2"] is not None
+        'reference2' in parsed
+        and parsed.get('reference2', '?') != '?'
+        and parsed['reference2'] is not None
     ):
-        gene2 = parsed["reference2"]
+        gene2 = parsed['reference2']
 
     if gene2:
         gene2_features = get_equivalent_features(
@@ -647,14 +647,14 @@ def match_positional_variant(
         secondary_features = convert_to_rid_list(gene2_features)
         if not secondary_features:
             raise FeatureNotFoundError(
-                f"unable to find the gene ({gene2}) or any equivalent representations"
+                f'unable to find the gene ({gene2}) or any equivalent representations'
             )
 
     # match the existing mutations (positional)
     query_filters = [
-        {"reference1": features},
-        {"reference2": secondary_features},
-        {"break1Start.@class": parsed["break1Start"]["@class"]},
+        {'reference1': features},
+        {'reference2': secondary_features},
+        {'break1Start.@class': parsed['break1Start']['@class']},
     ]
 
     filtered_similarOnly: List[Record] = []  # For post filter match use
@@ -663,7 +663,7 @@ def match_positional_variant(
     for row in cast(
         List[Record],
         conn.query(
-            {"target": "PositionalVariant", "filters": query_filters}, ignore_cache=ignore_cache
+            {'target': 'PositionalVariant', 'filters': query_filters}, ignore_cache=ignore_cache
         ),
     ):
         # TODO: Check if variant and reference_variant should be interchanged
@@ -688,11 +688,11 @@ def match_positional_variant(
         matches.extend(
             conn.query(
                 {
-                    "target": convert_to_rid_list(filtered_similarOnly),
-                    "queryType": "similarTo",
-                    "edges": ["AliasOf", "DeprecatedBy", "CrossReferenceOf", "GeneralizationOf"],
-                    "treeEdges": ["Infers"],
-                    "returnProperties": POS_VARIANT_RETURN_PROPERTIES,
+                    'target': convert_to_rid_list(filtered_similarOnly),
+                    'queryType': 'similarTo',
+                    'edges': ['AliasOf', 'DeprecatedBy', 'CrossReferenceOf', 'GeneralizationOf'],
+                    'treeEdges': ['Infers'],
+                    'returnProperties': POS_VARIANT_RETURN_PROPERTIES,
                 },
                 ignore_cache=ignore_cache,
             )
@@ -705,7 +705,7 @@ def match_positional_variant(
     variant_types_details = get_equivalent_terms(
         conn,
         screened_type,
-        root_exclude_term="mutation" if secondary_features else "",
+        root_exclude_term='mutation' if secondary_features else '',
         ignore_cache=ignore_cache,
     )
 
@@ -714,18 +714,18 @@ def match_positional_variant(
     matches.extend(
         conn.query(
             {
-                "target": {
-                    "target": "CategoryVariant",
-                    "filters": [
-                        {"reference1": features},
-                        {"type": types},
-                        {"reference2": secondary_features},
+                'target': {
+                    'target': 'CategoryVariant',
+                    'filters': [
+                        {'reference1': features},
+                        {'type': types},
+                        {'reference2': secondary_features},
                     ],
                 },
-                "queryType": "similarTo",
-                "edges": ["AliasOf", "DeprecatedBy", "CrossReferenceOf"],
-                "treeEdges": ["Infers"],
-                "returnProperties": POS_VARIANT_RETURN_PROPERTIES,
+                'queryType': 'similarTo',
+                'edges': ['AliasOf', 'DeprecatedBy', 'CrossReferenceOf'],
+                'treeEdges': ['Infers'],
+                'returnProperties': POS_VARIANT_RETURN_PROPERTIES,
             },
             ignore_cache=ignore_cache,
         )
@@ -739,18 +739,18 @@ def cat_variant_query(
         matches.extend(
             conn.query(
                 {
-                    "target": {
-                        "target": "CategoryVariant",
-                        "filters": [
-                            {"reference1": cat_features},
-                            {"type": cat_types},
-                            {"reference2": cat_secondary_features},
+                    'target': {
+                        'target': 'CategoryVariant',
+                        'filters': [
+                            {'reference1': cat_features},
+                            {'type': cat_types},
+                            {'reference2': cat_secondary_features},
                         ],
                     },
-                    "queryType": "similarTo",
-                    "edges": ["AliasOf", "DeprecatedBy", "CrossReferenceOf"],
-                    "treeEdges": [],
-                    "returnProperties": VARIANT_RETURN_PROPERTIES,
+                    'queryType': 'similarTo',
+                    'edges': ['AliasOf', 'DeprecatedBy', 'CrossReferenceOf'],
+                    'treeEdges': [],
+                    'returnProperties': VARIANT_RETURN_PROPERTIES,
                 },
                 ignore_cache=ignore_cache,
             )
@@ -768,10 +768,10 @@ def cat_variant_query(
         matches.extend(
             conn.query(
                 {
-                    "target": convert_to_rid_list(filtered_similarAndGeneric),
-                    "queryType": "descendants",
-                    "edges": [],
-                    "returnProperties": POS_VARIANT_RETURN_PROPERTIES,
+                    'target': convert_to_rid_list(filtered_similarAndGeneric),
+                    'queryType': 'descendants',
+                    'edges': [],
+                    'returnProperties': POS_VARIANT_RETURN_PROPERTIES,
                 },
                 ignore_cache=ignore_cache,
             )
@@ -779,6 +779,6 @@ def cat_variant_query(
 
     result: Dict[str, Variant] = {}
     for row in matches:
-        result[row["@rid"]] = cast(Variant, row)
+        result[row['@rid']] = cast(Variant, row)
 
     return list(result.values())
diff --git a/pori_python/graphkb/statement.py b/pori_python/graphkb/statement.py
index 24246b91..3f077ee1 100644
--- a/pori_python/graphkb/statement.py
+++ b/pori_python/graphkb/statement.py
@@ -20,7 +20,7 @@ def categorize_relevance(
         term_set = get_terms_set(graphkb_conn, base_terms)
         if relevance_rid in term_set:
             return category
-    return ""
+    return ''
 
 
 def get_statements_from_variants(
@@ -38,11 +38,11 @@ def get_statements_from_variants(
     """
     statements = graphkb_conn.query(
         {
-            "target": "Statement",
-            "filters": {"conditions": convert_to_rid_list(variants), "operator": "CONTAINSANY"},
-            "returnProperties": STATEMENT_RETURN_PROPERTIES,
+            'target': 'Statement',
+            'filters': {'conditions': convert_to_rid_list(variants), 'operator': 'CONTAINSANY'},
+            'returnProperties': STATEMENT_RETURN_PROPERTIES,
         }
     )
     if not failed_review:
-        statements = [s for s in statements if s.get("reviewStatus") != FAILED_REVIEW_STATUS]
+        statements = [s for s in statements if s.get('reviewStatus') != FAILED_REVIEW_STATUS]
     return [cast(Statement, s) for s in statements]
diff --git a/pori_python/graphkb/util.py b/pori_python/graphkb/util.py
index 2ff8620c..23c28963 100644
--- a/pori_python/graphkb/util.py
+++ b/pori_python/graphkb/util.py
@@ -14,14 +14,14 @@
 
 from pori_python.types import ParsedVariant, PositionalVariant, Record
 
-from .constants import DEFAULT_LIMIT, DEFAULT_URL, TYPES_TO_NOTATION, AA_3to1_MAPPING
+from .constants import DEFAULT_LIMIT, TYPES_TO_NOTATION, AA_3to1_MAPPING
 
 QUERY_CACHE: Dict[Any, Any] = {}
 
 # name the logger after the package to make it simple to disable for packages using this one as a dependency
 # https://stackoverflow.com/questions/11029717/how-do-i-disable-log-messages-from-the-requests-library
 
-logger = logging.getLogger("graphkb")
+logger = logging.getLogger('graphkb')
 
 
 def convert_to_rid_list(records: Iterable[Record]) -> List[str]:
@@ -31,7 +31,7 @@ def convert_to_rid_list(records: Iterable[Record]) -> List[str]:
         if isinstance(record, str):
             result.append(record)  # assume an @rid string
         else:
-            result.append(record["@rid"])
+            result.append(record['@rid'])
     return result
 
 
@@ -41,7 +41,7 @@ class FeatureNotFoundError(Exception):
 
 def looks_like_rid(rid: str) -> bool:
     """Check if an input string looks like a GraphKB ID."""
-    if re.match(r"^#-?\d+:-?\d+$", rid):
+    if re.match(r'^#-?\d+:-?\d+$', rid):
         return True
     return False
 
@@ -50,15 +50,15 @@ def convert_aa_3to1(three_letter_notation: str) -> str:
     """Convert an Input string from 3 letter AA notation to 1 letter AA notation."""
     result = []
 
-    if ":" in three_letter_notation:
+    if ':' in three_letter_notation:
         # do not include the feature/gene in replacements
-        pos = three_letter_notation.index(":")
+        pos = three_letter_notation.index(':')
         result.append(three_letter_notation[: pos + 1])
         three_letter_notation = three_letter_notation[pos + 1 :]
 
     last_match_end = 0  # exclusive interval [ )
 
-    for match in re.finditer(r"[A-Z][a-z][a-z]", three_letter_notation):
+    for match in re.finditer(r'[A-Z][a-z][a-z]', three_letter_notation):
         # add the in-between string
         result.append(three_letter_notation[last_match_end : match.start()])
         text = three_letter_notation[match.start() : match.end()]
@@ -66,7 +66,7 @@ def convert_aa_3to1(three_letter_notation: str) -> str:
         last_match_end = match.end()
 
     result.append(three_letter_notation[last_match_end:])
-    return "".join(result)
+    return ''.join(result)
 
 
 def join_url(base_url: str, *parts) -> str:
@@ -74,9 +74,9 @@ def join_url(base_url: str, *parts) -> str:
     if not parts:
         return base_url
 
-    url = [base_url.rstrip("/")] + [part.strip("/") for part in parts]
+    url = [base_url.rstrip('/')] + [part.strip('/') for part in parts]
 
-    return "/".join(url)
+    return '/'.join(url)
 
 
 def millis_interval(start: datetime, end: datetime) -> int:
@@ -91,16 +91,16 @@ def millis_interval(start: datetime, end: datetime) -> int:
 def cache_key(request_body) -> str:
     """Create a cache key for a query request to GraphKB."""
     body = json.dumps(request_body, sort_keys=True)
-    hash_code = hashlib.md5(f"/query{body}".encode("utf-8")).hexdigest()
+    hash_code = hashlib.md5(f'/query{body}'.encode('utf-8')).hexdigest()
     return hash_code
 
 
 class GraphKBConnection:
     def __init__(
         self,
-        url: str = os.environ.get("GRAPHKB_URL", DEFAULT_URL),
-        username: str = "",
-        password: str = "",
+        url: str = os.environ.get('GRAPHKB_URL'),
+        username: str = '',
+        password: str = '',
         use_global_cache: bool = True,
     ):
         self.http = requests.Session()
@@ -111,13 +111,13 @@ def __init__(
             backoff_factor=5,
             status_forcelist=[429, 500, 502, 503, 504],
         )
-        self.http.mount("https://", HTTPAdapter(max_retries=retries))
-        self.token = ""
-        self.token_kc = ""
+        self.http.mount('https://', HTTPAdapter(max_retries=retries))
+        self.token = ''
+        self.token_kc = ''
         self.url = url
         self.username = username
         self.password = password
-        self.headers = {"Accept": "application/json", "Content-Type": "application/json"}
+        self.headers = {'Accept': 'application/json', 'Content-Type': 'application/json'}
         self.cache: Dict[Any, Any] = {} if not use_global_cache else QUERY_CACHE
         self.request_count = 0
         self.first_request: Optional[datetime] = None
@@ -125,6 +125,10 @@ def __init__(
         if username and password:
             self.login(username=username, password=password)
 
+        # URL check
+        if not self.url:
+            raise ValueError('URL to a GraphKB API instance is required')
+
     @property
     def load(self) -> Optional[float]:
         if self.first_request and self.last_request:
@@ -133,7 +137,7 @@ def load(self) -> Optional[float]:
                 return self.request_count * 1000 / msec
         return None
 
-    def request(self, endpoint: str, method: str = "GET", **kwargs) -> Dict:
+    def request(self, endpoint: str, method: str = 'GET', **kwargs) -> Dict:
         """Request wrapper to handle adding common headers and logging.
 
         Args:
@@ -151,7 +155,7 @@ def request(self, endpoint: str, method: str = "GET", **kwargs) -> Dict:
         # don't want to use a read timeout if the request is not idempotent
         # otherwise you may wind up making unintended changes
         timeout = None
-        if endpoint in ["query", "parse"]:
+        if endpoint in ['query', 'parse']:
             timeout = (connect_timeout, read_timeout)
 
         start_time = datetime.now()
@@ -170,7 +174,6 @@ def request(self, endpoint: str, method: str = "GET", **kwargs) -> Dict:
             if attempt > 0:
                 time.sleep(2)  # wait between retries
             try:
-
                 if need_refresh_login:
                     self.refresh_login()
                     need_refresh_login = False
@@ -180,7 +183,7 @@ def request(self, endpoint: str, method: str = "GET", **kwargs) -> Dict:
                     method, url, headers=self.headers, timeout=timeout, **kwargs
                 )
                 if resp.status_code == 401 or resp.status_code == 403:
-                    logger.debug(f"/{endpoint} - {resp.status_code} - retrying")
+                    logger.debug(f'/{endpoint} - {resp.status_code} - retrying')
                     # try to re-login if the token expired
                     need_refresh_login = True
                     continue
@@ -188,14 +191,14 @@ def request(self, endpoint: str, method: str = "GET", **kwargs) -> Dict:
                     break
             except (requests.exceptions.ConnectionError, OSError) as err:
                 if attempt < len(attempts) - 1:
-                    logger.debug(f"/{endpoint} - {str(err)} - retrying")
+                    logger.debug(f'/{endpoint} - {str(err)} - retrying')
                     continue
                 raise err
             except Exception as err2:
                 raise err2
 
         timing = millis_interval(start_time, datetime.now())
-        logger.debug(f"/{endpoint} - {resp.status_code} - {timing} ms")  # type: ignore
+        logger.debug(f'/{endpoint} - {resp.status_code} - {timing} ms')  # type: ignore
 
         try:
             resp.raise_for_status()
@@ -203,7 +206,7 @@ def request(self, endpoint: str, method: str = "GET", **kwargs) -> Dict:
             # try to get more error details
             message = str(err)
             try:
-                message += " " + resp.json()["message"]
+                message += ' ' + resp.json()['message']
             except Exception:
                 pass
 
@@ -213,7 +216,7 @@ def request(self, endpoint: str, method: str = "GET", **kwargs) -> Dict:
 
     def post(self, uri: str, data: Dict = {}, **kwargs) -> Dict:
         """Convenience method for making post requests."""
-        return self.request(uri, method="POST", data=json.dumps(data), **kwargs)
+        return self.request(uri, method='POST', data=json.dumps(data), **kwargs)
 
     def login_demo(self) -> None:
         """
@@ -223,26 +226,26 @@ def login_demo(self) -> None:
         2. get a second token from the GraphKB API using keyCloakToken; self.login()
         """
         url_parts = urlsplit(self.url)
-        base_url = f"{url_parts.scheme}://{url_parts.netloc}"
+        base_url = f'{url_parts.scheme}://{url_parts.netloc}'
 
         try:
             resp = requests.request(
-                url=f"{base_url}/auth/realms/PORI/protocol/openid-connect/token",
-                method="POST",
+                url=f'{base_url}/auth/realms/PORI/protocol/openid-connect/token',
+                method='POST',
                 data={
-                    "client_id": "GraphKB",
-                    "grant_type": "password",
-                    "password": self.password,
-                    "username": self.username,
+                    'client_id': 'GraphKB',
+                    'grant_type': 'password',
+                    'password': self.password,
+                    'username': self.username,
                 },
             )
         except Exception as err:
-            logger.debug(f"unable to fetch a token from KeyCloak: {err}")
+            logger.debug(f'unable to fetch a token from KeyCloak: {err}')
             raise err
 
         resp.raise_for_status()
         content = resp.json()
-        self.token_kc = content["access_token"]
+        self.token_kc = content['access_token']
 
     def login(self, username: str, password: str, pori_demo: bool = False) -> None:
         self.username = username
@@ -251,7 +254,8 @@ def login(self, username: str, password: str, pori_demo: bool = False) -> None:
         read_timeout = 61
 
         # KBDEV-1328. Alt. GraphKB login for GSC's PORI online demo
-        if pori_demo or "pori-demo" in self.url:
+        if pori_demo or 'pori-demo' in self.url:
+            logger.warning('login demo')
             self.login_demo()
 
         # use requests package directly to avoid recursion loop on login failure
@@ -262,29 +266,29 @@ def login(self, username: str, password: str, pori_demo: bool = False) -> None:
             try:
                 self.request_count += 1
                 resp = requests.request(
-                    url=f"{self.url}/token",
-                    method="POST",
+                    url=f'{self.url}/token',
+                    method='POST',
                     headers=self.headers,
                     timeout=(connect_timeout, read_timeout),
                     data=json.dumps(
                         # KBDEV-1328. Alt. GraphKB login for GSC's PORI online demo
-                        {"keyCloakToken": self.token_kc}
+                        {'keyCloakToken': self.token_kc}
                         if self.token_kc
-                        else {"username": username, "password": password}
+                        else {'username': username, 'password': password}
                     ),
                 )
                 break
             except (requests.exceptions.ConnectionError, OSError) as err:
                 if attempt < len(attempts) - 1:
-                    logger.debug(f"/login - {str(err)} - retrying")
+                    logger.debug(f'/login - {str(err)} - retrying')
                     continue
                 raise err
             except Exception as err2:
                 raise err2
         resp.raise_for_status()
         content = resp.json()
-        self.token = content["kbToken"]
-        self.headers["Authorization"] = self.token
+        self.token = content['kbToken']
+        self.headers['Authorization'] = self.token
 
     def refresh_login(self) -> None:
         self.login(self.username, self.password)
@@ -306,7 +310,7 @@ def query(
         Query GraphKB
         """
         result: List[Record] = []
-        hash_code = ""
+        hash_code = ''
 
         if not ignore_cache and paginate:
             hash_code = cache_key(request_body)
@@ -314,8 +318,8 @@ def query(
                 return self.cache[hash_code]
 
         while True:
-            content = self.post("query", data={**request_body, "limit": limit, "skip": len(result)})
-            records = content["result"]
+            content = self.post('query', data={**request_body, 'limit': limit, 'skip': len(result)})
+            records = content['result']
             result.extend(records)
             if len(records) < limit or not paginate:
                 break
@@ -326,17 +330,17 @@ def query(
 
     def parse(self, hgvs_string: str, requireFeatures: bool = False) -> ParsedVariant:
         content = self.post(
-            "parse", data={"content": hgvs_string, "requireFeatures": requireFeatures}
+            'parse', data={'content': hgvs_string, 'requireFeatures': requireFeatures}
         )
-        return cast(ParsedVariant, content["result"])
+        return cast(ParsedVariant, content['result'])
 
     def get_records_by_id(self, record_ids: List[str]) -> List[Record]:
         if not record_ids:
             return []
-        result = self.query({"target": record_ids})
+        result = self.query({'target': record_ids})
         if len(record_ids) != len(result):
             raise AssertionError(
-                f"The number of Ids given ({len(record_ids)}) does not match the number of records fetched ({len(result)})"
+                f'The number of Ids given ({len(record_ids)}) does not match the number of records fetched ({len(result)})'
             )
         return result
 
@@ -345,9 +349,9 @@ def get_record_by_id(self, record_id: str) -> Record:
         return result[0]
 
     def get_source(self, name: str) -> Record:
-        source = self.query({"target": "Source", "filters": {"name": name}})
+        source = self.query({'target': 'Source', 'filters': {'name': name}})
         if len(source) != 1:
-            raise AssertionError(f"Unable to unqiuely identify source with name {name}")
+            raise AssertionError(f'Unable to unqiuely identify source with name {name}')
         return source[0]
 
 
@@ -367,27 +371,27 @@ def get_rid(conn: GraphKBConnection, target: str, name: str) -> str:
         AssertionError: if the term was not found or more than 1 match was found (expected to be unique)
     """
     result = conn.query(
-        {"target": target, "filters": {"name": name}, "returnProperties": ["@rid"]},
+        {'target': target, 'filters': {'name': name}, 'returnProperties': ['@rid']},
         ignore_cache=False,
     )
     assert len(result) == 1, f"unable to find unique '{target}' ID for '{name}'"
 
-    return result[0]["@rid"]
+    return result[0]['@rid']
 
 
 def stripParentheses(breakRepr: str) -> str:
-    match = re.search(r"^([a-z])\.\((.+)\)$", breakRepr)
+    match = re.search(r'^([a-z])\.\((.+)\)$', breakRepr)
 
     if match:
-        return f"{match.group(1)}.{match.group(2)}"
+        return f'{match.group(1)}.{match.group(2)}'
     return breakRepr
 
 
 def stripRefSeq(breakRepr: str) -> str:
     # 1 leading RefSeq
-    match = re.search(r"^([a-z])\.([A-Z]*|\?)([0-9]*[A-Z]*)$", breakRepr)
+    match = re.search(r'^([a-z])\.([A-Z]*|\?)([0-9]*[A-Z]*)$', breakRepr)
     if match:
-        return f"{match.group(1)}.{match.group(3)}"
+        return f'{match.group(1)}.{match.group(3)}'
 
     # TODO: Deal with cases like "p.?889_?890", "chr4:g.55593604_55593605delGGinsTT", ...
 
@@ -395,27 +399,27 @@ def stripRefSeq(breakRepr: str) -> str:
 
 
 def stripDisplayName(displayName: str, withRef: bool = True, withRefSeq: bool = True) -> str:
-    match = re.search(r"^(.*)(\:)(.*)$", displayName)
+    match = re.search(r'^(.*)(\:)(.*)$', displayName)
     if match and not withRef:
         if withRefSeq:
             return match.group(3)
         displayName = match.group(2) + match.group(3)
 
-    match = re.search(r"^(.*\:)([a-z]\.)(.*)$", displayName)
+    match = re.search(r'^(.*\:)([a-z]\.)(.*)$', displayName)
     if match and not withRefSeq:
-        ref: str = match.group(1) if match.group(1) != ":" else ""
+        ref: str = match.group(1) if match.group(1) != ':' else ''
         prefix: str = match.group(2)
         rest: str = match.group(3)
         new_matches: Union[bool, object] = True
 
         # refSeq before position
         while new_matches:
-            new_matches = re.search(r"(.*)([A-Z]|\?)([0-9]+)(.*)", rest)
+            new_matches = re.search(r'(.*)([A-Z]|\?)([0-9]+)(.*)', rest)
             if new_matches:
                 rest = new_matches.group(1) + new_matches.group(3) + new_matches.group(4)
 
         # refSeq before '>'
-        new_matches = re.search(r"^([0-9]*)([A-Z]*|\?)(\>)(.*)$", rest)
+        new_matches = re.search(r'^([0-9]*)([A-Z]*|\?)(\>)(.*)$', rest)
         if new_matches:
             rest = new_matches.group(1) + new_matches.group(3) + new_matches.group(4)
 
@@ -442,18 +446,18 @@ def stringifyVariant(
         str: The string representation
     """
 
-    displayName: str = variant.get("displayName") or ""  # type: ignore
+    displayName: str = variant.get('displayName') or ''  # type: ignore
 
     # If variant is a PositionalVariant (i.e. variant with a displayName) and
     # we already have the appropriate string representation,
     # then return it right away
-    if displayName != "" and (withRef and withRefSeq):
+    if displayName != '' and (withRef and withRefSeq):
         return displayName
 
     # If variant is a PositionalVariant (i.e. variant with a displayName) and
     # we DO NOT have the appropriate string representation,
     # then strip unwanted features, then return it right away
-    if displayName != "":
+    if displayName != '':
         return stripDisplayName(displayName, withRef, withRefSeq)
 
     # If variant is a ParsedVariant (i.e. variant without a displayName yet),
@@ -464,106 +468,106 @@ def stringifyVariant(
     result: List[str] = []
 
     # Extracting parsed values into individual variables
-    break1Repr: str = str(parsed.get("break1Repr", ""))
-    break2Repr: str = str(parsed.get("break2Repr", ""))
-    multiFeature: bool = bool(parsed.get("multiFeature"))
-    noFeatures: bool = bool(parsed.get("noFeatures"))
-    notationType: str = str(parsed.get("notationType", ""))
-    reference1: str = ""
-    if ref1 := parsed.get("reference1"):
+    break1Repr: str = str(parsed.get('break1Repr', ''))
+    break2Repr: str = str(parsed.get('break2Repr', ''))
+    multiFeature: bool = bool(parsed.get('multiFeature'))
+    noFeatures: bool = bool(parsed.get('noFeatures'))
+    notationType: str = str(parsed.get('notationType', ''))
+    reference1: str = ''
+    if ref1 := parsed.get('reference1'):
         if isinstance(ref1, str):
             reference1 = ref1
         else:
-            reference1 = ref1.get("displayName", str(ref1))
-    reference2: str = ""
-    if ref2 := parsed.get("reference2"):
+            reference1 = ref1.get('displayName', str(ref1))
+    reference2: str = ''
+    if ref2 := parsed.get('reference2'):
         if isinstance(ref2, str):
             reference2 = ref2
         else:
-            reference2 = ref2.get("displayName", str(ref2))
-    refSeq: str = parsed.get("refSeq") or ""
-    truncation: int = parsed.get("truncation") or 0  # type: ignore
-    variantType: str = parsed.get("type", "")
-    untemplatedSeq: str = parsed.get("untemplatedSeq") or ""
-    untemplatedSeqSize: int = parsed.get("untemplatedSeqSize") or 0
+            reference2 = ref2.get('displayName', str(ref2))
+    refSeq: str = parsed.get('refSeq') or ''
+    truncation: int = parsed.get('truncation') or 0  # type: ignore
+    variantType: str = parsed.get('type', '')
+    untemplatedSeq: str = parsed.get('untemplatedSeq') or ''
+    untemplatedSeqSize: int = parsed.get('untemplatedSeqSize') or 0
 
     # formating notationType
     if not notationType:
-        notationType = TYPES_TO_NOTATION.get(variantType, re.sub(r"\s", "-", variantType))
+        notationType = TYPES_TO_NOTATION.get(variantType, re.sub(r'\s', '-', variantType))
 
     # If multiFeature
-    if multiFeature or (reference2 != "" and reference1 != reference2):
+    if multiFeature or (reference2 != '' and reference1 != reference2):
         if withRef and not noFeatures:
-            result.append(f"({reference1}:{reference2})")
+            result.append(f'({reference1}:{reference2})')
         result.append(notationType)
         if withRefSeq:
             break1Repr_noParentheses = stripParentheses(break1Repr)
             break2Repr_noParentheses = stripParentheses(break2Repr)
-            result.append(f"({break1Repr_noParentheses},{break2Repr_noParentheses})")
+            result.append(f'({break1Repr_noParentheses},{break2Repr_noParentheses})')
         else:
             break1Repr_noParentheses_noRefSeq = stripRefSeq(stripParentheses(break1Repr))
             break2Repr_noParentheses_noRefSeq = stripRefSeq(stripParentheses(break2Repr))
             result.append(
-                f"({break1Repr_noParentheses_noRefSeq},{break2Repr_noParentheses_noRefSeq})"
+                f'({break1Repr_noParentheses_noRefSeq},{break2Repr_noParentheses_noRefSeq})'
             )
-        if untemplatedSeq != "":
+        if untemplatedSeq != '':
             result.append(untemplatedSeq)
         elif untemplatedSeqSize:
             result.append(str(untemplatedSeqSize))
-        return "".join(result)
+        return ''.join(result)
 
     # Continuous notation...
 
     # Reference
     if withRef and not noFeatures:
-        result.append(f"{reference1}:")
+        result.append(f'{reference1}:')
 
     # BreakRep
     if withRefSeq:
         result.append(break1Repr)
-        if break2Repr != "":
-            result.append(f"_{break2Repr[2:]}")
+        if break2Repr != '':
+            result.append(f'_{break2Repr[2:]}')
     else:
         result.append(stripRefSeq(break1Repr))
-        if break2Repr != "":
-            result.append(f"_{stripRefSeq(break2Repr)[2:]}")
+        if break2Repr != '':
+            result.append(f'_{stripRefSeq(break2Repr)[2:]}')
 
     # refSeq, truncation, notationType, untemplatedSeq, untemplatedSeqSize
-    if any(i in notationType for i in ["ext", "fs"]) or (
-        notationType == ">" and break1Repr.startswith("p.")
+    if any(i in notationType for i in ['ext', 'fs']) or (
+        notationType == '>' and break1Repr.startswith('p.')
     ):
         result.append(untemplatedSeq)
-    if notationType == "mis" and break1Repr.startswith("p."):
+    if notationType == 'mis' and break1Repr.startswith('p.'):
         result.append(untemplatedSeq)
-    elif notationType != ">":
-        if notationType == "delins":
+    elif notationType != '>':
+        if notationType == 'delins':
             if withRefSeq:
-                result.append(f"del{refSeq}ins")
+                result.append(f'del{refSeq}ins')
             else:
-                result.append("delins")
+                result.append('delins')
         else:
             result.append(notationType)
         if truncation and truncation != 1:
             if truncation < 0:
                 result.append(str(truncation))
             else:
-                result.append(f"*{truncation}")
-        if any(i in notationType for i in ["dup", "del", "inv"]):
+                result.append(f'*{truncation}')
+        if any(i in notationType for i in ['dup', 'del', 'inv']):
             if withRefSeq:
                 result.append(refSeq)
-        if any(i in notationType for i in ["ins", "delins"]):
-            if untemplatedSeq != "":
+        if any(i in notationType for i in ['ins', 'delins']):
+            if untemplatedSeq != '':
                 result.append(untemplatedSeq)
             elif untemplatedSeqSize:
                 result.append(str(untemplatedSeqSize))
-    elif not break1Repr.startswith("p."):
+    elif not break1Repr.startswith('p.'):
         if withRefSeq:
-            refSeq = refSeq if refSeq != "" else "?"
+            refSeq = refSeq if refSeq != '' else '?'
         else:
-            refSeq = ""
-        untemplatedSeq = untemplatedSeq if untemplatedSeq != "" else "?"
-        result.append(f"{refSeq}{notationType}{untemplatedSeq}")
+            refSeq = ''
+        untemplatedSeq = untemplatedSeq if untemplatedSeq != '' else '?'
+        result.append(f'{refSeq}{notationType}{untemplatedSeq}')
 
     # TODO: Deal with more complexes cases like 'MED12:p.(?34_?68)mut'
 
-    return "".join(result)
+    return ''.join(result)
diff --git a/pori_python/graphkb/vocab.py b/pori_python/graphkb/vocab.py
index 26033e75..e9242a7a 100644
--- a/pori_python/graphkb/vocab.py
+++ b/pori_python/graphkb/vocab.py
@@ -7,14 +7,14 @@
 
 
 def query_by_name(ontology_class: str, base_term_name: str) -> Dict:
-    return {"target": ontology_class, "filters": {"name": base_term_name}}
+    return {'target': ontology_class, 'filters': {'name': base_term_name}}
 
 
 def get_equivalent_terms(
     conn: GraphKBConnection,
     base_term_name: str,
-    root_exclude_term: str = "",
-    ontology_class: str = "Vocabulary",
+    root_exclude_term: str = '',
+    ontology_class: str = 'Vocabulary',
     ignore_cache: bool = False,
     build_base_query: Callable = query_by_name,
 ) -> List[Ontology]:
@@ -32,10 +32,10 @@ def get_equivalent_terms(
         List[Ontology],
         conn.query(
             {
-                "target": {"target": base_records, "queryType": "descendants"},
-                "queryType": "similarTo",
-                "treeEdges": [],
-                "returnProperties": ["sourceId", "sourceIdVersion", "deprecated", "name", "@rid"],
+                'target': {'target': base_records, 'queryType': 'descendants'},
+                'queryType': 'similarTo',
+                'treeEdges': [],
+                'returnProperties': ['sourceId', 'sourceIdVersion', 'deprecated', 'name', '@rid'],
             },
             ignore_cache=ignore_cache,
         ),
@@ -51,30 +51,30 @@ def get_equivalent_terms(
             convert_to_rid_list(
                 conn.query(
                     {
-                        "target": {"target": root_records, "queryType": "descendants"},
-                        "queryType": "similarTo",
-                        "treeEdges": [],
-                        "returnProperties": [
-                            "sourceId",
-                            "sourceIdVersion",
-                            "deprecated",
-                            "name",
-                            "@rid",
+                        'target': {'target': root_records, 'queryType': 'descendants'},
+                        'queryType': 'similarTo',
+                        'treeEdges': [],
+                        'returnProperties': [
+                            'sourceId',
+                            'sourceIdVersion',
+                            'deprecated',
+                            'name',
+                            '@rid',
                         ],
                     },
                     ignore_cache=ignore_cache,
                 )
             )
         )
-        return [term for term in base_term_parents if term["@rid"] not in exclude]
+        return [term for term in base_term_parents if term['@rid'] not in exclude]
     return base_term_parents
 
 
 def get_term_tree(
     conn: GraphKBConnection,
     base_term_name: str,
-    root_exclude_term: str = "",
-    ontology_class: str = "Vocabulary",
+    root_exclude_term: str = '',
+    ontology_class: str = 'Vocabulary',
     include_superclasses: bool = True,
     ignore_cache: bool = False,
     build_base_query: Callable = query_by_name,
@@ -102,10 +102,10 @@ def get_term_tree(
         List[Ontology],
         conn.query(
             {
-                "target": {"target": base_records, "queryType": "ancestors"},
-                "queryType": "similarTo",
-                "treeEdges": [],
-                "returnProperties": ["sourceId", "sourceIdVersion", "deprecated", "name", "@rid"],
+                'target': {'target': base_records, 'queryType': 'ancestors'},
+                'queryType': 'similarTo',
+                'treeEdges': [],
+                'returnProperties': ['sourceId', 'sourceIdVersion', 'deprecated', 'name', '@rid'],
             },
             ignore_cache=ignore_cache,
         ),
@@ -126,7 +126,7 @@ def get_term_tree(
     terms = {}
     # merge the two lists
     for term in child_terms + parent_terms:
-        terms[term["@rid"]] = term
+        terms[term['@rid']] = term
 
     return list(terms.values())
 
@@ -134,7 +134,7 @@ def get_term_tree(
 def get_term_by_name(
     conn: GraphKBConnection,
     name: str,
-    ontology_class: str = "Vocabulary",
+    ontology_class: str = 'Vocabulary',
     ignore_cache: bool = False,
     **kwargs,
 ) -> Ontology:
@@ -156,15 +156,15 @@ def get_term_by_name(
     """
     result = conn.query(
         {
-            "target": ontology_class,
-            "filters": {"name": name},
-            "returnProperties": [
-                "sourceId",
-                "sourceIdVersion",
-                "deprecated",
-                "name",
-                "@rid",
-                "@class",
+            'target': ontology_class,
+            'filters': {'name': name},
+            'returnProperties': [
+                'sourceId',
+                'sourceIdVersion',
+                'deprecated',
+                'name',
+                '@rid',
+                '@class',
             ],
         },
         ignore_cache=ignore_cache,
@@ -172,7 +172,7 @@ def get_term_by_name(
     )
 
     if len(result) != 1:
-        raise AssertionError(f"unable to find term ({name}) by name")
+        raise AssertionError(f'unable to find term ({name}) by name')
     return cast(Ontology, result[0])
 
 
diff --git a/pori_python/ipr/annotate.py b/pori_python/ipr/annotate.py
index 72ae7626..cd6478a3 100644
--- a/pori_python/ipr/annotate.py
+++ b/pori_python/ipr/annotate.py
@@ -43,16 +43,16 @@ def get_second_pass_variants(
     # second-pass matching
     all_inferred_matches: Dict[str, Variant] = {}
     inferred_variants = {
-        (s["subject"]["@rid"], s["relevance"]["name"])
+        (s['subject']['@rid'], s['relevance']['name'])
         for s in statements
-        if s["subject"] and s["subject"]["@class"] in ("Feature", "Signature")
+        if s['subject'] and s['subject']['@class'] in ('Feature', 'Signature')
     }
 
     for reference1, variant_type in inferred_variants:
         variants = gkb_match.match_category_variant(graphkb_conn, reference1, variant_type)
 
         for variant in variants:
-            all_inferred_matches[variant["@rid"]] = variant
+            all_inferred_matches[variant['@rid']] = variant
     inferred_matches: List[Variant] = list(all_inferred_matches.values())
     return inferred_matches
 
@@ -70,7 +70,7 @@ def get_ipr_statements_from_variants(
     rows = []
 
     statements = get_statements_from_variants(graphkb_conn, matches)
-    existing_statements = {s["@rid"] for s in statements}
+    existing_statements = {s['@rid'] for s in statements}
 
     for ipr_row in convert_statements_to_alterations(
         graphkb_conn, statements, disease_matches, convert_to_rid_set(matches)
@@ -83,7 +83,7 @@ def get_ipr_statements_from_variants(
     inferred_statements = [
         s
         for s in get_statements_from_variants(graphkb_conn, inferred_matches)
-        if s["@rid"] not in existing_statements  # do not duplicate if non-inferred match
+        if s['@rid'] not in existing_statements  # do not duplicate if non-inferred match
     ]
 
     for ipr_row in convert_statements_to_alterations(
@@ -92,7 +92,7 @@ def get_ipr_statements_from_variants(
         disease_matches,
         convert_to_rid_set(inferred_matches),
     ):
-        ipr_row["kbData"]["inferred"] = True
+        ipr_row['kbData']['inferred'] = True
         rows.append(ipr_row)
 
     return rows
@@ -118,35 +118,35 @@ def annotate_expression_variants(
     skipped = 0
     alterations = []
     problem_genes = set()
-    logger.info(f"Starting annotation of {len(variants)} expression category_variants")
+    logger.info(f'Starting annotation of {len(variants)} expression category_variants')
     iterfunc = tqdm if show_progress else iter
     for row in iterfunc(variants):
-        gene = row["gene"]
-        variant = row["variant"]
+        gene = row['gene']
+        variant = row['variant']
 
         if not variant:
             skipped += 1
-            logger.debug(f"Skipping malformed Expression {gene}: {row}")
+            logger.debug(f'Skipping malformed Expression {gene}: {row}')
             continue
         try:
             matches = gkb_match.match_expression_variant(graphkb_conn, gene, variant)
             for ipr_row in get_ipr_statements_from_variants(graphkb_conn, matches, disease_matches):
-                ipr_row["variant"] = row["key"]
-                ipr_row["variantType"] = row.get("variantType", "exp")
+                ipr_row['variant'] = row['key']
+                ipr_row['variantType'] = row.get('variantType', 'exp')
                 alterations.append(ipr_row)
         except FeatureNotFoundError as err:
             problem_genes.add(gene)
-            logger.debug(f"Unrecognized gene ({gene} {variant}): {err}")
+            logger.debug(f'Unrecognized gene ({gene} {variant}): {err}')
         except ValueError as err:
-            logger.error(f"failed to match variants ({gene} {variant}): {err}")
+            logger.error(f'failed to match variants ({gene} {variant}): {err}')
 
     if skipped:
-        logger.info(f"skipped matching {skipped} expression information rows")
+        logger.info(f'skipped matching {skipped} expression information rows')
     if problem_genes:
-        logger.error(f"gene finding failures for expression {sorted(problem_genes)}")
-        logger.error(f"gene finding falure for {len(problem_genes)} expression genes")
+        logger.error(f'gene finding failures for expression {sorted(problem_genes)}')
+        logger.error(f'gene finding falure for {len(problem_genes)} expression genes')
     logger.info(
-        f"matched {len(variants)} expression variants to {len(alterations)} graphkb annotations"
+        f'matched {len(variants)} expression variants to {len(alterations)} graphkb annotations'
     )
     return alterations
 
@@ -172,11 +172,11 @@ def annotate_copy_variants(
     alterations = []
     problem_genes = set()
 
-    logger.info(f"Starting annotation of {len(variants)} copy category_variants")
+    logger.info(f'Starting annotation of {len(variants)} copy category_variants')
     iterfunc = tqdm if show_progress else iter
     for row in iterfunc(variants):
-        gene = row["gene"]
-        variant = row["variant"]
+        gene = row['gene']
+        variant = row['variant']
 
         if variant not in REPORTED_COPY_VARIANTS:
             # https://www.bcgsc.ca/jira/browse/GERO-77
@@ -186,24 +186,24 @@ def annotate_copy_variants(
         try:
             matches = gkb_match.match_copy_variant(graphkb_conn, gene, variant)
             for ipr_row in get_ipr_statements_from_variants(graphkb_conn, matches, disease_matches):
-                ipr_row["variant"] = row["key"]
-                ipr_row["variantType"] = row.get("variantType", "cnv")
+                ipr_row['variant'] = row['key']
+                ipr_row['variantType'] = row.get('variantType', 'cnv')
                 alterations.append(ipr_row)
         except FeatureNotFoundError as err:
             problem_genes.add(gene)
-            logger.debug(f"Unrecognized gene ({gene} {variant}): {err}")
+            logger.debug(f'Unrecognized gene ({gene} {variant}): {err}')
         except ValueError as err:
-            logger.error(f"failed to match variants ({gene} {variant}): {err}")
+            logger.error(f'failed to match variants ({gene} {variant}): {err}')
 
     if skipped:
         logger.info(
-            f"skipped matching {skipped} copy number variants not in {REPORTED_COPY_VARIANTS}"
+            f'skipped matching {skipped} copy number variants not in {REPORTED_COPY_VARIANTS}'
         )
     if problem_genes:
-        logger.error(f"gene finding failures for copy variants {sorted(problem_genes)}")
-        logger.error(f"gene finding failure for {len(problem_genes)} copy variant genes")
+        logger.error(f'gene finding failures for copy variants {sorted(problem_genes)}')
+        logger.error(f'gene finding failure for {len(problem_genes)} copy variant genes')
     logger.info(
-        f"matched {len(variants)} copy category variants to {len(alterations)} graphkb annotations"
+        f'matched {len(variants)} copy category variants to {len(alterations)} graphkb annotations'
     )
     return alterations
 
@@ -226,14 +226,14 @@ def annotate_positional_variants(
     Returns:
         Hashable list of kbMatches records for IPR
     """
-    VARIANT_KEYS = ("variant", "hgvsProtein", "hgvsCds", "hgvsGenomic")
+    VARIANT_KEYS = ('variant', 'hgvsProtein', 'hgvsCds', 'hgvsGenomic')
     errors = 0
     alterations: List[Hashabledict] = []
     problem_genes = set()
 
     iterfunc = tqdm if show_progress else iter
     for row in iterfunc(variants):
-        if not row.get("gene") and (not row.get("gene1") or not row.get("gene2")):
+        if not row.get('gene') and (not row.get('gene1') or not row.get('gene2')):
             # https://www.bcgsc.ca/jira/browse/GERO-56?focusedCommentId=1234791&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-1234791
             # should not match single gene SVs
             continue
@@ -250,15 +250,15 @@ def annotate_positional_variants(
                     # DEVSU-1885 - fix malformed single deletion described as substitution of blank
                     # eg. deletion described as substitution with nothing: 'chr1:g.150951027T>'
                     if (
-                        variant[-1] == ">"
-                        and "g." in variant
+                        variant[-1] == '>'
+                        and 'g.' in variant
                         and variant[-2].isalpha()
                         and variant[-3].isnumeric()
                     ):
                         logger.warning(
-                            f"Assuming malformed deletion variant {variant} is {variant[:-2] + 'del'}"
+                            f'Assuming malformed deletion variant {variant} is {variant[:-2] + "del"}'
                         )
-                        variant = variant[:-2] + "del"
+                        variant = variant[:-2] + 'del'
                         matches = gkb_match.match_positional_variant(graphkb_conn, variant)
                     else:
                         raise parse_err
@@ -268,42 +268,42 @@ def annotate_positional_variants(
                     matches,
                     disease_matches,
                 ):
-                    ipr_row["variant"] = row["key"]
-                    ipr_row["variantType"] = row.get(
-                        "variantType", "mut" if row.get("gene") else "sv"
+                    ipr_row['variant'] = row['key']
+                    ipr_row['variantType'] = row.get(
+                        'variantType', 'mut' if row.get('gene') else 'sv'
                     )
                     alterations.append(Hashabledict(ipr_row))
 
             except FeatureNotFoundError as err:
-                logger.debug(f"failed to match positional variants ({variant}): {err}")
+                logger.debug(f'failed to match positional variants ({variant}): {err}')
                 errors += 1
-                if "gene" in row:
-                    problem_genes.add(row["gene"])
-                elif "gene1" in row and f"({row['gene1']})" in str(err):
-                    problem_genes.add(row["gene1"])
-                elif "gene2" in row and f"({row['gene2']})" in str(err):
-                    problem_genes.add(row["gene2"])
-                elif "gene1" in row and "gene2" in row:
-                    problem_genes.add(row["gene1"])
-                    problem_genes.add(row["gene2"])
+                if 'gene' in row:
+                    problem_genes.add(row['gene'])
+                elif 'gene1' in row and f'({row["gene1"]})' in str(err):
+                    problem_genes.add(row['gene1'])
+                elif 'gene2' in row and f'({row["gene2"]})' in str(err):
+                    problem_genes.add(row['gene2'])
+                elif 'gene1' in row and 'gene2' in row:
+                    problem_genes.add(row['gene1'])
+                    problem_genes.add(row['gene2'])
                 else:
                     raise err
             except HTTPError as err:
                 errors += 1
-                logger.error(f"failed to match positional variants ({variant}): {err}")
+                logger.error(f'failed to match positional variants ({variant}): {err}')
 
     if problem_genes:
-        logger.error(f"gene finding failures for {sorted(problem_genes)}")
-        logger.error(f"{len(problem_genes)} gene finding failures for positional variants")
+        logger.error(f'gene finding failures for {sorted(problem_genes)}')
+        logger.error(f'{len(problem_genes)} gene finding failures for positional variants')
     if errors:
-        logger.error(f"skipped {errors} positional variants due to errors")
+        logger.error(f'skipped {errors} positional variants due to errors')
 
     # drop duplicates
     alterations = list(set(alterations))
 
-    variant_types = ", ".join(sorted(set([alt["variantType"] for alt in alterations])))
+    variant_types = ', '.join(sorted(set([alt['variantType'] for alt in alterations])))
     logger.info(
-        f"matched {len(variants)} {variant_types} positional variants to {len(alterations)} graphkb annotations"
+        f'matched {len(variants)} {variant_types} positional variants to {len(alterations)} graphkb annotations'
     )
 
     return alterations
@@ -336,30 +336,30 @@ def annotate_signature_variants(
             # Matching signature variant to GKB Variants
             matched_variants: List[Variant] = gkb_match.match_category_variant(
                 graphkb_conn,
-                variant["signatureName"],
-                variant["variantTypeName"],
-                reference_class="Signature",
+                variant['signatureName'],
+                variant['variantTypeName'],
+                reference_class='Signature',
             )
             # KBDEV-1246
             # Keep support for 'high mutation burden' until statement datafix
             if (
-                variant["signatureName"] == TMB_SIGNATURE
-                and TMB_SIGNATURE != "high mutation burden"
+                variant['signatureName'] == TMB_SIGNATURE
+                and TMB_SIGNATURE != 'high mutation burden'
             ):
                 matched_variants.extend(
                     gkb_match.match_category_variant(
                         graphkb_conn,
-                        "high mutation burden",
-                        variant["variantTypeName"],
-                        reference_class="Signature",
+                        'high mutation burden',
+                        variant['variantTypeName'],
+                        reference_class='Signature',
                     )
                 )
             # Matching GKB Variants to GKB Statements
             for ipr_row in get_ipr_statements_from_variants(
                 graphkb_conn, matched_variants, disease_matches
             ):
-                ipr_row["variant"] = variant["key"]
-                ipr_row["variantType"] = "sigv"
+                ipr_row['variant'] = variant['key']
+                ipr_row['variantType'] = 'sigv'
                 alterations.append(Hashabledict(ipr_row))
 
         except ValueError as err:
@@ -369,7 +369,7 @@ def annotate_signature_variants(
     alterations = list(set(alterations))
 
     logger.info(
-        f"matched {len(variants)} signature category variants to {len(alterations)} graphkb annotations"
+        f'matched {len(variants)} signature category variants to {len(alterations)} graphkb annotations'
     )
 
     return alterations
@@ -401,25 +401,25 @@ def annotate_variants(
     gkb_matches: List[Hashabledict] = []
 
     # MATCHING SIGNATURE CATEGORY VARIANTS
-    logger.info(f"annotating {len(signature_variants)} signatures")
+    logger.info(f'annotating {len(signature_variants)} signatures')
     gkb_matches.extend(
         annotate_signature_variants(
             graphkb_conn, disease_matches, signature_variants, show_progress=interactive
         )
     )
-    logger.debug(f"\tgkb_matches: {len(gkb_matches)}")
+    logger.debug(f'\tgkb_matches: {len(gkb_matches)}')
 
     # MATCHING SMALL MUTATIONS
-    logger.info(f"annotating {len(small_mutations)} small mutations")
+    logger.info(f'annotating {len(small_mutations)} small mutations')
     gkb_matches.extend(
         annotate_positional_variants(
             graphkb_conn, small_mutations, disease_matches, show_progress=interactive
         )
     )
-    logger.debug(f"\tgkb_matches: {len(gkb_matches)}")
+    logger.debug(f'\tgkb_matches: {len(gkb_matches)}')
 
     # MATCHING STRUCTURAL VARIANTS
-    logger.info(f"annotating {len(structural_variants)} structural variants")
+    logger.info(f'annotating {len(structural_variants)} structural variants')
     gkb_matches.extend(
         annotate_positional_variants(
             graphkb_conn,
@@ -428,10 +428,10 @@ def annotate_variants(
             show_progress=interactive,
         )
     )
-    logger.debug(f"\tgkb_matches: {len(gkb_matches)}")
+    logger.debug(f'\tgkb_matches: {len(gkb_matches)}')
 
     # MATCHING COPY VARIANTS
-    logger.info(f"annotating {len(copy_variants)} copy variants")
+    logger.info(f'annotating {len(copy_variants)} copy variants')
     gkb_matches.extend(
         [
             Hashabledict(copy_var)
@@ -440,10 +440,10 @@ def annotate_variants(
             )
         ]
     )
-    logger.debug(f"\tgkb_matches: {len(gkb_matches)}")
+    logger.debug(f'\tgkb_matches: {len(gkb_matches)}')
 
     # MATCHING EXPRESSION VARIANTS
-    logger.info(f"annotating {len(expression_variants)} expression variants")
+    logger.info(f'annotating {len(expression_variants)} expression variants')
     gkb_matches.extend(
         [
             Hashabledict(exp_var)
@@ -455,6 +455,6 @@ def annotate_variants(
             )
         ]
     )
-    logger.debug(f"\tgkb_matches: {len(gkb_matches)}")
+    logger.debug(f'\tgkb_matches: {len(gkb_matches)}')
 
     return gkb_matches
diff --git a/pori_python/ipr/connection.py b/pori_python/ipr/connection.py
index f2652fdd..70eaf26c 100644
--- a/pori_python/ipr/connection.py
+++ b/pori_python/ipr/connection.py
@@ -6,7 +6,6 @@
 import zlib
 from typing import Dict, List
 
-from .constants import DEFAULT_URL
 from .util import logger
 
 IMAGE_MAX = 20  # cannot upload more than 20 images at a time
@@ -17,21 +16,21 @@ def __init__(
         self,
         username: str,
         password: str,
-        url: str = os.environ.get("IPR_URL", DEFAULT_URL),
+        url: str = os.environ.get('IPR_URL'),
     ):
         self.token = None
         self.url = url
         self.username = username
         self.password = password
         self.headers = {
-            "Accept": "application/json",
-            "Content-Type": "application/json",
-            "Content-Encoding": "deflate",
+            'Accept': 'application/json',
+            'Content-Type': 'application/json',
+            'Content-Encoding': 'deflate',
         }
         self.cache: Dict[str, List[Dict]] = {}
         self.request_count = 0
 
-    def request(self, endpoint: str, method: str = "GET", **kwargs) -> Dict:
+    def request(self, endpoint: str, method: str = 'GET', **kwargs) -> Dict:
         """Request wrapper to handle adding common headers and logging
 
         Args:
@@ -41,9 +40,9 @@ def request(self, endpoint: str, method: str = "GET", **kwargs) -> Dict:
         Returns:
             dict: the json response as a python dict
         """
-        url = f"{self.url}/{endpoint}"
+        url = f'{self.url}/{endpoint}'
         self.request_count += 1
-        kwargs_header = kwargs.pop("headers", None)
+        kwargs_header = kwargs.pop('headers', None)
         if kwargs_header:
             headers = json.loads(kwargs_header)
         else:
@@ -57,21 +56,21 @@ def request(self, endpoint: str, method: str = "GET", **kwargs) -> Dict:
             # try to get more error details
             message = str(err)
             try:
-                message += " " + resp.json()["error"]["message"]
+                message += ' ' + resp.json()['error']['message']
             except Exception:
                 pass
 
             raise requests.exceptions.HTTPError(message)
         if resp.status_code == 204:  # TODO: address this in api
-            return {"status_code": 204}
+            return {'status_code': 204}
         return resp.json()
 
     def post(self, uri: str, data: Dict = {}, **kwargs) -> Dict:
         """Convenience method for making post requests"""
         return self.request(
             uri,
-            method="POST",
-            data=zlib.compress(json.dumps(data, allow_nan=False).encode("utf-8")),
+            method='POST',
+            data=zlib.compress(json.dumps(data, allow_nan=False).encode('utf-8')),
             **kwargs,
         )
 
@@ -79,8 +78,8 @@ def get(self, uri: str, data: Dict = {}, **kwargs) -> Dict:
         """Convenience method for making get requests"""
         return self.request(
             uri,
-            method="GET",
-            data=zlib.compress(json.dumps(data, allow_nan=False).encode("utf-8")),
+            method='GET',
+            data=zlib.compress(json.dumps(data, allow_nan=False).encode('utf-8')),
             **kwargs,
         )
 
@@ -88,9 +87,9 @@ def delete(self, uri: str, data: Dict = {}, **kwargs) -> Dict:
         """Convenience method for making delete requests"""
         return self.request(
             uri,
-            method="DELETE",
-            data=zlib.compress(json.dumps(data, allow_nan=False).encode("utf-8")),
-            headers=json.dumps({"Accept": "*/*"}),
+            method='DELETE',
+            data=zlib.compress(json.dumps(data, allow_nan=False).encode('utf-8')),
+            headers=json.dumps({'Accept': '*/*'}),
             **kwargs,
         )
 
@@ -106,83 +105,83 @@ def upload_report(
             # or 'report'. jobStatus is no longer available once the report is successfully
             # uploaded.
 
-            projects = self.get("project")
-            project_names = [item["name"] for item in projects]
+            projects = self.get('project')
+            project_names = [item['name'] for item in projects]
 
             # if project is not exist, create one
-            if content["project"] not in project_names:
+            if content['project'] not in project_names:
                 logger.info(
-                    f"Project not found - attempting to create project {content['project']}"
+                    f'Project not found - attempting to create project {content["project"]}'
                 )
                 try:
-                    self.post("project", {"name": content["project"]})
+                    self.post('project', {'name': content['project']})
                 except Exception as err:
-                    raise Exception(f"Project creation failed due to {err}")
+                    raise Exception(f'Project creation failed due to {err}')
 
             if ignore_extra_fields:
-                initial_result = self.post("reports-async?ignore_extra_fields=true", content)
+                initial_result = self.post('reports-async?ignore_extra_fields=true', content)
             else:
-                initial_result = self.post("reports-async", content)
+                initial_result = self.post('reports-async', content)
 
-            report_id = initial_result["ident"]
+            report_id = initial_result['ident']
 
             def check_status_result(result):
-                if result.get("report", False):
-                    return "upload complete"
-                if result.get("jobStatus", False) and result["jobStatus"].get("state", False):
-                    return result["jobStatus"]["state"]
+                if result.get('report', False):
+                    return 'upload complete'
+                if result.get('jobStatus', False) and result['jobStatus'].get('state', False):
+                    return result['jobStatus']['state']
                 raise Exception(
-                    "async report get returned with no report or jobStatus, or unexpected jobStatus type"
+                    'async report get returned with no report or jobStatus, or unexpected jobStatus type'
                 )
 
             def check_status(interval: int = 5, num_attempts: int = 5):
                 for i in range(num_attempts):
-                    logger.info(f"checking report loading status in {interval} seconds")
+                    logger.info(f'checking report loading status in {interval} seconds')
                     time.sleep(interval)
-                    current_status = self.get(f"reports-async/{report_id}")
+                    current_status = self.get(f'reports-async/{report_id}')
 
                     check_result = check_status_result(current_status)
 
-                    if check_result == "upload complete":
+                    if check_result == 'upload complete':
                         return current_status
 
-                    if check_result == "failed":
+                    if check_result == 'failed':
                         raise Exception(
-                            f"async report upload failed with reason: {current_status.get('jobStatus', {}).get('failedReason', 'Unknown')}"
+                            f'async report upload failed with reason: {current_status.get("jobStatus", {}).get("failedReason", "Unknown")}'
                         )
 
                     if check_result not in [
-                        "active",
-                        "ready",
-                        "waiting",
-                        "completed",
+                        'active',
+                        'ready',
+                        'waiting',
+                        'completed',
                     ]:
-                        raise Exception(f"async report upload in unexpected state: {check_result}")
+                        raise Exception(f'async report upload in unexpected state: {check_result}')
 
                 return current_status
 
             current_status = check_status()
             check_result = check_status_result(current_status)
 
-            if check_result in ["active", "waiting"]:
+            if check_result in ['active', 'waiting']:
                 current_status = check_status(interval=30)
                 check_result = check_status_result(current_status)
 
-            if check_result in ["active", "waiting"]:
+            if check_result in ['active', 'waiting']:
                 current_status = check_status(interval=60, num_attempts=mins_to_wait)
                 check_result = check_status_result(current_status)
 
-            if check_result in ["active", "waiting"]:
+            if check_result in ['active', 'waiting']:
                 raise Exception(
-                    f"async report upload taking longer than expected: {current_status}"
+                    f'async report upload taking longer than expected: {current_status}'
                 )
 
             return current_status
         else:
             if ignore_extra_fields:
-                return self.post("reports?ignore_extra_fields=true", content)
+                return self.post('reports?ignore_extra_fields=true', content)
             else:
-                return self.post("reports", content)
+                return self.post('reports', content)
 
     def set_analyst_comments(self, report_id: str, data: Dict) -> Dict:
         """
@@ -193,9 +192,9 @@ def set_analyst_comments(self, report_id: str, data: Dict) -> Dict:
             Pending: https://www.bcgsc.ca/jira/browse/DEVSU-1177
         """
         return self.request(
-            f"/reports/{report_id}/summary/analyst-comments",
-            method="PUT",
-            data=zlib.compress(json.dumps(data, allow_nan=False).encode("utf-8")),
+            f'/reports/{report_id}/summary/analyst-comments',
+            method='PUT',
+            data=zlib.compress(json.dumps(data, allow_nan=False).encode('utf-8')),
         )
 
     def post_images(self, report_id: str, files: Dict[str, str], data: Dict[str, str] = {}) -> None:
@@ -212,18 +211,18 @@ def post_images(self, report_id: str, files: Dict[str, str], data: Dict[str, str
                 if not os.path.exists(path):
                     raise FileNotFoundError(path)
                 current_files[key] = path
-            open_files = {k: open(f, "rb") for (k, f) in current_files.items()}
+            open_files = {k: open(f, 'rb') for (k, f) in current_files.items()}
             try:
                 resp = self.request(
-                    f"reports/{report_id}/image",
-                    method="POST",
+                    f'reports/{report_id}/image',
+                    method='POST',
                     data=data,
                     files=open_files,
                     headers=json.dumps({}),
                 )
                 for status in resp:
-                    if status.get("upload") != "successful":
-                        image_errors.add(status["key"])
+                    if status.get('upload') != 'successful':
+                        image_errors.add(status['key'])
             finally:
                 for handler in open_files.values():
                     handler.close()
@@ -235,12 +234,12 @@ def get_spec(self) -> Dict:
         """
         Get the current IPR spec, for the purposes of current report upload fields
         """
-        return self.request("/spec.json", method="GET")
+        return self.request('/spec.json', method='GET')
 
     def validate_json(self, content: Dict) -> Dict:
         """
         Validate the provided json schema
         """
-        result = self.post("reports/schema", content)
-        logger.info(f"{result['message']}")
+        result = self.post('reports/schema', content)
+        logger.info(f'{result["message"]}')
         return result
diff --git a/pori_python/ipr/constants.py b/pori_python/ipr/constants.py
index 6f3958c3..35c2a547 100644
--- a/pori_python/ipr/constants.py
+++ b/pori_python/ipr/constants.py
@@ -1,28 +1,40 @@
-DEFAULT_URL = "https://iprstaging-api.bcgsc.ca/api"
-GERMLINE_BASE_TERMS = ("pharmacogenomic", "cancer predisposition")  # based on graphkb.constants
-VARIANT_CLASSES = {"Variant", "CategoryVariant", "PositionalVariant", "CatalogueVariant"}
+GERMLINE_BASE_TERMS = ('pharmacogenomic', 'cancer predisposition')  # based on graphkb.constants
+VARIANT_CLASSES = {'Variant', 'CategoryVariant', 'PositionalVariant', 'CatalogueVariant'}
 
 # all possible values for review status are: ['pending', 'not required', 'passed', 'failed', 'initial']
-FAILED_REVIEW_STATUS = "failed"
+FAILED_REVIEW_STATUS = 'failed'
 
 # Signatures
-COSMIC_SIGNATURE_VARIANT_TYPE = "high signature"
-HLA_SIGNATURE_VARIANT_TYPE = "signature present"
-TMB_SIGNATURE = "mutation burden"
+COSMIC_SIGNATURE_VARIANT_TYPE = 'high signature'
+HLA_SIGNATURE_VARIANT_TYPE = 'signature present'
+TMB_SIGNATURE = 'mutation burden'
 TMB_SIGNATURE_HIGH_THRESHOLD = (
     10.0  # genomic mutations per mb - https://www.bcgsc.ca/jira/browse/GERO-296
 )
-TMB_SIGNATURE_VARIANT_TYPE = "high signature"
+TMB_SIGNATURE_VARIANT_TYPE = 'high signature'
 # Mapping micro-satellite from pipeline terms to GraphKB terms
 MSI_MAPPING = {
-    "microsatellite instability": {  # MSI
-        "displayName": "microsatellite instability high signature",
-        "signatureName": "microsatellite instability",
-        "variantTypeName": "high signature",
+    'microsatellite instability': {  # MSI
+        'displayName': 'microsatellite instability high signature',
+        'signatureName': 'microsatellite instability',
+        'variantTypeName': 'high signature',
     },
-    "microsatellite stable": {  # MSS
-        "displayName": "microsatellite stable signature present",
-        "signatureName": "microsatellite stable",
-        "variantTypeName": "signature present",
+    'microsatellite stable': {  # MSS
+        'displayName': 'microsatellite stable signature present',
+        'signatureName': 'microsatellite stable',
+        'variantTypeName': 'signature present',
+    },
+}
+# Mapping hrd from pipeline terms to GraphKB terms
+HRD_MAPPING = {
+    'homologous recombination deficiency strong signature': {
+        'displayName': 'homologous recombination deficiency strong signature',
+        'signatureName': 'homologous recombination deficiency',
+        'variantTypeName': 'strong signature',
+    },
+    'homologous recombination deficiency moderate signature': {
+        'displayName': 'homologous recombination deficiency moderate signature',
+        'signatureName': 'homologous recombination deficiency',
+        'variantTypeName': 'moderate signature',
     },
 }
diff --git a/pori_python/ipr/content.spec.json b/pori_python/ipr/content.spec.json
index c2df68e9..a9790129 100644
--- a/pori_python/ipr/content.spec.json
+++ b/pori_python/ipr/content.spec.json
@@ -541,6 +541,20 @@
             },
             "type": "array"
         },
+        "hrd": {
+            "properties": {
+                "kbCategory": {
+                    "type": "string"
+                },
+                "score": {
+                    "type": "number"
+                }
+            },
+            "required": [
+                "score"
+            ],
+            "type": "object"
+        },
         "images": {
             "items": {
                 "example": {
diff --git a/pori_python/ipr/inputs.py b/pori_python/ipr/inputs.py
index dcff908b..01976603 100644
--- a/pori_python/ipr/inputs.py
+++ b/pori_python/ipr/inputs.py
@@ -9,7 +9,6 @@
 import os
 import pandas as pd
 import re
-from Bio.Data.IUPACData import protein_letters_3to1
 from numpy import nan
 from typing import Any, Callable, Dict, Iterable, List, Set, Tuple, cast
 
@@ -25,149 +24,149 @@
 
 from .constants import (
     COSMIC_SIGNATURE_VARIANT_TYPE,
-    DEFAULT_URL,
     HLA_SIGNATURE_VARIANT_TYPE,
     MSI_MAPPING,
+    HRD_MAPPING,
     TMB_SIGNATURE,
     TMB_SIGNATURE_VARIANT_TYPE,
 )
-from .util import hash_key, logger, pandas_falsy
+from .util import hash_key, logger, pandas_falsy, protein_letters_3to1
 
-protein_letters_3to1.setdefault("Ter", "*")
+protein_letters_3to1.setdefault('Ter', '*')
 
-SPECIFICATION = os.path.join(os.path.dirname(__file__), "content.spec.json")
+SPECIFICATION = os.path.join(os.path.dirname(__file__), 'content.spec.json')
 
 # content in the local specification should match the values in IPR_API_SPEC_JSON_URL
-IPR_API_SPEC_JSON_URL = f'{os.environ.get("IPR_URL", DEFAULT_URL)}/spec.json'
+IPR_API_SPEC_JSON_URL = f'{os.environ.get("IPR_URL")}/spec.json'
 
 # TODO: GERO-307 - use SPECIFICATION json to derive the variant required and optional details defined below
 
 # 'cnvState' is for display
-COPY_REQ = ["gene", "kbCategory"]
-COPY_KEY = ["gene"]
+COPY_REQ = ['gene', 'kbCategory']
+COPY_KEY = ['gene']
 COPY_OPTIONAL = [
-    "cnvState",
-    "copyChange",
-    "lohState",  # Loss of Heterzygosity state - informative detail to analyst
-    "chromosomeBand",
-    "chromosome",
-    "chr",  # expect only one of chromosome or chr
-    "start",
-    "end",
-    "size",
-    "log2Cna",
-    "cna",
-    "comments",
-    "library",
-    "germline",
+    'cnvState',
+    'copyChange',
+    'lohState',  # Loss of Heterzygosity state - informative detail to analyst
+    'chromosomeBand',
+    'chromosome',
+    'chr',  # expect only one of chromosome or chr
+    'start',
+    'end',
+    'size',
+    'log2Cna',
+    'cna',
+    'comments',
+    'library',
+    'germline',
 ]
 
-SMALL_MUT_REQ = ["gene", "proteinChange"]
+SMALL_MUT_REQ = ['gene', 'proteinChange']
 # alternate details in the key, can distinguish / subtype events.
 SMALL_MUT_KEY = SMALL_MUT_REQ + [
-    "altSeq",
-    "chromosome",
-    "endPosition",
-    "refSeq",
-    "startPosition",
-    "transcript",
+    'altSeq',
+    'chromosome',
+    'endPosition',
+    'refSeq',
+    'startPosition',
+    'transcript',
 ]
 SMALL_MUT_OPTIONAL = [
-    "altSeq",
-    "comments",
-    "chromosome",
-    "endPosition",
-    "germline",
-    "hgvsCds",
-    "hgvsGenomic",
-    "hgvsProtein",
-    "library",
-    "ncbiBuild",
-    "normalAltCount",
-    "normalDepth",
-    "normalRefCount",
-    "refSeq",
-    "rnaAltCount",
-    "rnaDepth",
-    "rnaRefCount",
-    "startPosition",
-    "transcript",
-    "tumourAltCount",
-    "tumourAltCopies",
-    "tumourDepth",
-    "tumourRefCount",
-    "tumourRefCopies",
-    "zygosity",
+    'altSeq',
+    'comments',
+    'chromosome',
+    'endPosition',
+    'germline',
+    'hgvsCds',
+    'hgvsGenomic',
+    'hgvsProtein',
+    'library',
+    'ncbiBuild',
+    'normalAltCount',
+    'normalDepth',
+    'normalRefCount',
+    'refSeq',
+    'rnaAltCount',
+    'rnaDepth',
+    'rnaRefCount',
+    'startPosition',
+    'transcript',
+    'tumourAltCount',
+    'tumourAltCopies',
+    'tumourDepth',
+    'tumourRefCount',
+    'tumourRefCopies',
+    'zygosity',
 ]
 
-EXP_REQ = ["gene", "kbCategory"]
-EXP_KEY = ["gene"]
+EXP_REQ = ['gene', 'kbCategory']
+EXP_KEY = ['gene']
 EXP_OPTIONAL = [
-    "biopsySiteFoldChange",
-    "biopsySitePercentile",
-    "biopsySiteQC",
-    "biopsySiteZScore",
-    "biopsySitekIQR",
-    "comments",
-    "diseaseFoldChange",
-    "diseasekIQR",
-    "diseasePercentile",
-    "diseaseQC",
-    "diseaseZScore",
-    "expressionState",
-    "histogramImage",
-    "library",
-    "primarySiteFoldChange",
-    "primarySitekIQR",
-    "primarySitePercentile",
-    "primarySiteQC",
-    "primarySiteZScore",
-    "internalPancancerFoldChange",
-    "internalPancancerkIQR",
-    "internalPancancerPercentile",
-    "internalPancancerQC",
-    "internalPancancerZScore",
-    "rnaReads",
-    "rpkm",
-    "tpm",
+    'biopsySiteFoldChange',
+    'biopsySitePercentile',
+    'biopsySiteQC',
+    'biopsySiteZScore',
+    'biopsySitekIQR',
+    'comments',
+    'diseaseFoldChange',
+    'diseasekIQR',
+    'diseasePercentile',
+    'diseaseQC',
+    'diseaseZScore',
+    'expressionState',
+    'histogramImage',
+    'library',
+    'primarySiteFoldChange',
+    'primarySitekIQR',
+    'primarySitePercentile',
+    'primarySiteQC',
+    'primarySiteZScore',
+    'internalPancancerFoldChange',
+    'internalPancancerkIQR',
+    'internalPancancerPercentile',
+    'internalPancancerQC',
+    'internalPancancerZScore',
+    'rnaReads',
+    'rpkm',
+    'tpm',
 ]
 
 SV_REQ = [
-    "eventType",
-    "breakpoint",
-    "gene1",  # prev: nterm_hugo
-    "gene2",  # prev: cterm_hugo
-    "exon1",  # n-terminal
-    "exon2",  # c-terminal
+    'eventType',
+    'breakpoint',
+    'gene1',  # prev: nterm_hugo
+    'gene2',  # prev: cterm_hugo
+    'exon1',  # n-terminal
+    'exon2',  # c-terminal
 ]
 SV_KEY = SV_REQ[:]
 SV_OPTIONAL = [
-    "ctermTranscript",
-    "ntermTranscript",
-    "ctermGene",  # combined hugo ensembl form
-    "ntermGene",  # combined hugo ensembl form
-    "detectedIn",
-    "conventionalName",
-    "svg",
-    "svgTitle",
-    "name",
-    "frame",
-    "omicSupport",
-    "highQuality",
-    "comments",
-    "library",
-    "rnaAltCount",
-    "rnaDepth",
-    "tumourAltCount",
-    "tumourDepth",
-    "germline",
-    "mavis_product_id",
+    'ctermTranscript',
+    'ntermTranscript',
+    'ctermGene',  # combined hugo ensembl form
+    'ntermGene',  # combined hugo ensembl form
+    'detectedIn',
+    'conventionalName',
+    'svg',
+    'svgTitle',
+    'name',
+    'frame',
+    'omicSupport',
+    'highQuality',
+    'comments',
+    'library',
+    'rnaAltCount',
+    'rnaDepth',
+    'tumourAltCount',
+    'tumourDepth',
+    'germline',
+    'mavis_product_id',
 ]
 
-SIGV_REQ = ["signatureName", "variantTypeName"]
-SIGV_COSMIC = ["signature"]  # 1st element used as signatureName key
-SIGV_HLA = ["a1", "a2", "b1", "b2", "c1", "c2"]
-SIGV_OPTIONAL = ["displayName"]
+SIGV_REQ = ['signatureName', 'variantTypeName']
+SIGV_COSMIC = ['signature']  # 1st element used as signatureName key
+SIGV_HLA = ['a1', 'a2', 'b1', 'b2', 'c1', 'c2']
+SIGV_OPTIONAL = ['displayName']
 SIGV_KEY = SIGV_REQ[:]
 
 
@@ -192,7 +191,7 @@ def validate_variant_rows(
     Returns:
         the rows from the tab file as dictionaries
     """
-    header = required + optional + ["key"]
+    header = required + optional + ['key']
 
     result = []
     keys = set()
@@ -202,18 +201,18 @@ def validate_variant_rows(
         if not header_validated:
             for req_col in required:
                 if req_col not in row:
-                    raise ValueError(f"header missing required column ({req_col})")
+                    raise ValueError(f'header missing required column ({req_col})')
             header_validated = True
         row_key = hash_key(row_to_key(row))
         if row_key in keys:
-            raise ValueError(f"duplicate row key ({row_key}) from ({row_to_key(row)})")
-        row["key"] = row_key
+            raise ValueError(f'duplicate row key ({row_key}) from ({row_to_key(row)})')
+        row['key'] = row_key
         keys.add(row_key)
         for k, v in row.items():
             if v is pd.NA:
-                row[k] = ""
+                row[k] = ''
 
-        result.append(cast(IprVariant, {col: row.get(col, "") for col in header}))
+        result.append(cast(IprVariant, {col: row.get(col, '') for col in header}))
 
     return result
 
@@ -225,43 +224,42 @@ def preprocess_copy_variants(rows: Iterable[Dict]) -> List[IprCopyVariant]:
     """
     # default map for display - concise names
     display_name_mapping = {
-        INPUT_COPY_CATEGORIES.DEEP: "deep deletion",
-        INPUT_COPY_CATEGORIES.AMP: "amplification",
-        INPUT_COPY_CATEGORIES.GAIN: "copy gain",
-        INPUT_COPY_CATEGORIES.LOSS: "copy loss",
+        INPUT_COPY_CATEGORIES.DEEP: 'deep deletion',
+        INPUT_COPY_CATEGORIES.AMP: 'amplification',
+        INPUT_COPY_CATEGORIES.GAIN: 'copy gain',
+        INPUT_COPY_CATEGORIES.LOSS: 'copy loss',
     }
     display_name_mapping.update(dict([(v, v) for v in display_name_mapping.values()]))
 
     def row_key(row: Dict) -> Tuple[str, ...]:
-        return tuple(["cnv"] + [row[key] for key in COPY_KEY])
+        return tuple(['cnv'] + [row[key] for key in COPY_KEY])
 
     result = validate_variant_rows(rows, COPY_REQ, COPY_OPTIONAL, row_key)
     ret_list = [cast(IprCopyVariant, var) for var in result]
     for row in ret_list:
-
-        kb_cat = row.get("kbCategory")
-        kb_cat = "" if pd.isnull(kb_cat) else str(kb_cat)
+        kb_cat = row.get('kbCategory')
+        kb_cat = '' if pd.isnull(kb_cat) else str(kb_cat)
         if kb_cat:
             if kb_cat not in INPUT_COPY_CATEGORIES.values():
-                raise ValueError(f"invalid copy variant kbCategory value ({kb_cat})")
-            if not row.get("cnvState"):  # apply default short display name
-                row["cnvState"] = display_name_mapping[kb_cat]
-        row["variant"] = kb_cat
-        row["variantType"] = "cnv"
-        chrband = row.get("chromosomeBand", False)
-        chrom = row.pop("chromosome", False)
+                raise ValueError(f'invalid copy variant kbCategory value ({kb_cat})')
+            if not row.get('cnvState'):  # apply default short display name
+                row['cnvState'] = display_name_mapping[kb_cat]
+        row['variant'] = kb_cat
+        row['variantType'] = 'cnv'
+        chrband = row.get('chromosomeBand', False)
+        chrom = row.pop('chromosome', False)
         if not chrom:
-            chrom = row.pop("chr", False)
+            chrom = row.pop('chr', False)
         # remove chr if it was not used for chrom
-        row.pop("chr", False)
+        row.pop('chr', False)
         if chrom:
             # check that chr isn't already in the chrband;
             # this regex from https://vrs.ga4gh.org/en/1.2/terms_and_model.html#id25
-            if chrband and (re.match(r"^cen|[pq](ter|([1-9][0-9]*(\.[1-9][0-9]*)?))$", chrband)):
+            if chrband and (re.match(r'^cen|[pq](ter|([1-9][0-9]*(\.[1-9][0-9]*)?))$', chrband)):
                 if isinstance(chrom, int):
                     chrom = str(chrom)
-                chrom = chrom.strip("chr")
-                row["chromosomeBand"] = chrom + row["chromosomeBand"]
+                chrom = chrom.strip('chr')
+                row['chromosomeBand'] = chrom + row['chromosomeBand']
 
     return ret_list
 
@@ -274,28 +272,28 @@ def preprocess_small_mutations(rows: Iterable[Dict]) -> List[IprSmallMutationVar
 
     def row_key(row: IprSmallMutationVariant) -> Tuple[str, ...]:
         key_vals = []
-        for kval in [row.get(key, "") for key in SMALL_MUT_KEY]:
-            key_vals.append(str(kval) if pd.notnull(kval) else "")
-        return tuple(["small mutation"] + key_vals)
+        for kval in [row.get(key, '') for key in SMALL_MUT_KEY]:
+            key_vals.append(str(kval) if pd.notnull(kval) else '')
+        return tuple(['small mutation'] + key_vals)
 
     result = validate_variant_rows(rows, SMALL_MUT_REQ, SMALL_MUT_OPTIONAL, row_key)
     if not result:
         return []
 
     def pick_variant(row: IprSmallMutationVariant) -> str:
-        protein_change = row.get("proteinChange")
+        protein_change = row.get('proteinChange')
         if not pandas_falsy(protein_change):
             for longAA, shortAA in protein_letters_3to1.items():
                 protein_change = str(protein_change).replace(longAA, shortAA)
-            hgvsp = "{}:{}".format(row["gene"], protein_change)
+            hgvsp = '{}:{}'.format(row['gene'], protein_change)
             return hgvsp
 
-        for field in ["hgvsProtein", "hgvsCds", "hgvsGenomic"]:
+        for field in ['hgvsProtein', 'hgvsCds', 'hgvsGenomic']:
             if not pandas_falsy(row.get(field)):
                 return str(row.get(field))
 
         raise ValueError(
-            "Variant field cannot be empty. Must include proteinChange or one of the hgvs fields (hgvsProtein, hgvsCds, hgvsGenomic) to build the variant string"
+            'Variant field cannot be empty. Must include proteinChange or one of the hgvs fields (hgvsProtein, hgvsCds, hgvsGenomic) to build the variant string'
         )
 
     # 'location' and 'refAlt' are not currently used for matching; still optional and allowed blank
@@ -304,21 +302,21 @@ def pick_variant(row: IprSmallMutationVariant) -> str:
     # for row in result:
     def convert_sm(row: IprVariant) -> IprSmallMutationVariant:
         ret = cast(IprSmallMutationVariant, row)
-        ret["variant"] = pick_variant(ret)
-        ret["variantType"] = "mut"
+        ret['variant'] = pick_variant(ret)
+        ret['variantType'] = 'mut'
 
-        if ret.get("startPosition") and not ret.get("endPosition"):
-            ret["endPosition"] = ret["startPosition"]
+        if ret.get('startPosition') and not ret.get('endPosition'):
+            ret['endPosition'] = ret['startPosition']
 
         # default depth to alt + ref if not given
-        for sample_type in ("normal", "rna", "tumour"):
+        for sample_type in ('normal', 'rna', 'tumour'):
             if (
-                ret.get(f"{sample_type}RefCount")
-                and ret.get(f"{sample_type}AltCount")
-                and not ret.get(f"{sample_type}Depth")
+                ret.get(f'{sample_type}RefCount')
+                and ret.get(f'{sample_type}AltCount')
+                and not ret.get(f'{sample_type}Depth')
             ):
-                ret[f"{sample_type}Depth"] = (  # type: ignore
-                    ret[f"{sample_type}RefCount"] + ret[f"{sample_type}AltCount"]  # type: ignore
+                ret[f'{sample_type}Depth'] = (  # type: ignore
+                    ret[f'{sample_type}RefCount'] + ret[f'{sample_type}AltCount']  # type: ignore
                 )
         return ret
 
@@ -334,65 +332,65 @@ def preprocess_expression_variants(rows: Iterable[Dict]) -> List[IprExprVariant]
     """
 
     def row_key(row: Dict) -> Tuple[str, ...]:
-        return tuple(["expression"] + [row[key] for key in EXP_KEY])
+        return tuple(['expression'] + [row[key] for key in EXP_KEY])
 
     variants = validate_variant_rows(rows, EXP_REQ, EXP_OPTIONAL, row_key)
     result = [cast(IprExprVariant, var) for var in variants]
     float_columns = [
         col
         for col in EXP_REQ + EXP_OPTIONAL
-        if col.endswith("kIQR")
-        or col.endswith("Percentile")
-        or col.endswith("FoldChange")
-        or col.endswith("QC")
-        or col.endswith("ZScore")
-        or col in ["tpm", "rpkm"]
+        if col.endswith('kIQR')
+        or col.endswith('Percentile')
+        or col.endswith('FoldChange')
+        or col.endswith('QC')
+        or col.endswith('ZScore')
+        or col in ['tpm', 'rpkm']
     ]
 
     errors = []
     for row in result:
-        row["variant"] = row["kbCategory"]
-        if not row["expressionState"] and row["kbCategory"]:
-            row["expressionState"] = row["kbCategory"]
+        row['variant'] = row['kbCategory']
+        if not row['expressionState'] and row['kbCategory']:
+            row['expressionState'] = row['kbCategory']
 
-        if row["variant"] and not pd.isnull(row["variant"]):
-            if row["variant"] not in INPUT_EXPRESSION_CATEGORIES.values():
+        if row['variant'] and not pd.isnull(row['variant']):
+            if row['variant'] not in INPUT_EXPRESSION_CATEGORIES.values():
                 err_msg = f"{row['gene']} variant '{row['variant']}' not in {INPUT_EXPRESSION_CATEGORIES.values()}"
                 errors.append(err_msg)
                 logger.error(err_msg)
-        row["variantType"] = "exp"
+        row['variantType'] = 'exp'
 
         for col in float_columns:
-            if row.get(col) in ["inf", "+inf", "-inf"]:
-                row[col] = row[col].replace("inf", "Infinity")  # type: ignore
+            if row.get(col) in ['inf', '+inf', '-inf']:
+                row[col] = row[col].replace('inf', 'Infinity')  # type: ignore
 
         # check images exist
-        if row["histogramImage"] and not os.path.exists(row["histogramImage"]):
+        if row['histogramImage'] and not os.path.exists(row['histogramImage']):
             raise FileNotFoundError(f'missing image ({row["histogramImage"]})')
 
     if errors:
-        raise ValueError(f"{len(errors)} Invalid expression variants in file")
+        raise ValueError(f'{len(errors)} Invalid expression variants in file')
 
     return result
 
 
 def create_graphkb_sv_notation(row: IprFusionVariant) -> str:
     """Generate GKB/IPR fusion style notation from a structural variant."""
-    gene1 = row["gene1"] or "?"
-    gene2 = row["gene2"] or "?"
-    exon1 = str(row["exon1"]) if row["exon1"] else "?"
-    exon2 = str(row["exon2"]) if row["exon2"] else "?"
-    if not row["gene1"]:
+    gene1 = row['gene1'] or '?'
+    gene2 = row['gene2'] or '?'
+    exon1 = str(row['exon1']) if row['exon1'] else '?'
+    exon2 = str(row['exon2']) if row['exon2'] else '?'
+    if not row['gene1']:
         gene1, gene2 = gene2, gene1
         exon1, exon2 = exon2, exon1
-    if gene1 == "?":
+    if gene1 == '?':
         raise ValueError(
             f'both genes cannot be blank for a structural variant {row["key"]}. At least 1 gene must be entered'
         )
     # force exons to integer repr string
-    exon1 = exon1[:-2] if exon1.endswith(".0") else exon1
-    exon2 = exon2[:-2] if exon2.endswith(".0") else exon2
-    return f"({gene1},{gene2}):fusion(e.{exon1},e.{exon2})"
+    exon1 = exon1[:-2] if exon1.endswith('.0') else exon1
+    exon2 = exon2[:-2] if exon2.endswith('.0') else exon2
+    return f'({gene1},{gene2}):fusion(e.{exon1},e.{exon2})'
 
 
 def preprocess_structural_variants(rows: Iterable[Dict]) -> List[IprFusionVariant]:
@@ -402,21 +400,21 @@ def preprocess_structural_variants(rows: Iterable[Dict]) -> List[IprFusionVarian
     """
 
     def row_key(row: Dict) -> Tuple[str, ...]:
-        return tuple(["sv"] + [row[key] for key in SV_KEY])
+        return tuple(['sv'] + [row[key] for key in SV_KEY])
 
     variants = validate_variant_rows(rows, SV_REQ, SV_OPTIONAL, row_key)
     result = [cast(IprFusionVariant, var) for var in variants]
     # genes are optional for structural variants
     for row in result:
-        row["variant"] = create_graphkb_sv_notation(row)
-        row["variantType"] = "sv"
+        row['variant'] = create_graphkb_sv_notation(row)
+        row['variantType'] = 'sv'
 
         # check and load the svg file where applicable
-        if row["svg"] and not pd.isnull(row["svg"]):
-            if not os.path.exists(row["svg"]):
-                raise FileNotFoundError(row["svg"])
-            with open(row["svg"], "r") as fh:
-                row["svg"] = fh.read()
+        if row['svg'] and not pd.isnull(row['svg']):
+            if not os.path.exists(row['svg']):
+                raise FileNotFoundError(row['svg'])
+            with open(row['svg'], 'r') as fh:
+                row['svg'] = fh.read()
 
     return result
 
@@ -428,15 +426,15 @@ def preprocess_signature_variants(rows: Iterable[Dict]) -> List[IprSignatureVari
     """
 
     def row_key(row: Dict) -> Tuple[str, ...]:
-        return tuple(["sigv"] + [row[key] for key in SIGV_KEY])
+        return tuple(['sigv'] + [row[key] for key in SIGV_KEY])
 
     variants = validate_variant_rows(rows, SIGV_REQ, SIGV_OPTIONAL, row_key)
     result = [cast(IprSignatureVariant, var) for var in variants]
 
     # Adding additional required properties
     for row in result:
-        row["variant"] = row["displayName"]
-        row["variantType"] = "sigv"
+        row['variant'] = row['displayName']
+        row['variantType'] = 'sigv'
 
     return result
 
@@ -448,9 +446,9 @@ def preprocess_cosmic(rows: Iterable[Dict]) -> Iterable[Dict]:
     """
     return [
         {
-            "displayName": f"{signature} {COSMIC_SIGNATURE_VARIANT_TYPE}",
-            "signatureName": signature,
-            "variantTypeName": COSMIC_SIGNATURE_VARIANT_TYPE,
+            'displayName': f'{signature} {COSMIC_SIGNATURE_VARIANT_TYPE}',
+            'signatureName': signature,
+            'variantTypeName': COSMIC_SIGNATURE_VARIANT_TYPE,
         }
         for signature in rows
     ]
@@ -465,21 +463,21 @@ def preprocess_hla(rows: Iterable[Dict]) -> Iterable[Dict]:
         for k, v in row.items():
             if k not in SIGV_HLA:
                 continue
-            hla.add(f"HLA-{v}")  # 2nd level, e.g. 'HLA-A*02:01'
-            hla.add(f"HLA-{v.split(':')[0]}")  # 1st level, e.g. 'HLA-A*02'
+            hla.add(f'HLA-{v}')  # 2nd level, e.g. 'HLA-A*02:01'
+            hla.add(f'HLA-{v.split(":")[0]}')  # 1st level, e.g. 'HLA-A*02'
 
     return [
         {
-            "displayName": f"{signature} {HLA_SIGNATURE_VARIANT_TYPE}",
-            "signatureName": signature,
-            "variantTypeName": HLA_SIGNATURE_VARIANT_TYPE,
+            'displayName': f'{signature} {HLA_SIGNATURE_VARIANT_TYPE}',
+            'signatureName': signature,
+            'variantTypeName': HLA_SIGNATURE_VARIANT_TYPE,
         }
         for signature in hla
     ]
 
 
 def preprocess_tmb(
-    tmb_high: float, tmburMutationBurden: Dict = {}, genomeTmb: float | str = ""
+    tmb_high: float, tmburMutationBurden: Dict = {}, genomeTmb: float | str = ''
 ) -> Iterable[Dict]:
     """
     Process tumour mutation burden (tmb) input(s) into preformatted signature input.
@@ -493,15 +491,15 @@ def preprocess_tmb(
     if tmburMutationBurden:
         try:
             tmbur_tmb_val = float(
-                tmburMutationBurden["genomeIndelTmb"] + tmburMutationBurden["genomeSnvTmb"]
+                tmburMutationBurden['genomeIndelTmb'] + tmburMutationBurden['genomeSnvTmb']
             )
             if not genomeTmb and not isinstance(genomeTmb, float):
                 logger.error(
-                    "backwards compatibility: deriving genomeTmb from tmburMutationBurden genomeIndelTmb + genomeSnvTmb"
+                    'backwards compatibility: deriving genomeTmb from tmburMutationBurden genomeIndelTmb + genomeSnvTmb'
                 )
                 tmb_val = tmbur_tmb_val
         except Exception as err:
-            logger.error(f"tmburMutationBurden parsing failure: {err}")
+            logger.error(f'tmburMutationBurden parsing failure: {err}')
 
     # genomeTmb
     # SDEV-4811 - mutation burden is now expected to be uploaded in genomeTmb as mutations/megabase
@@ -512,19 +510,19 @@ def preprocess_tmb(
             tmb_val = float(genomeTmb)
             if tmburMutationBurden and tmbur_tmb_val != tmb_val:
                 logger.warning(
-                    f"genomeTmb given {tmb_val} does not match tmburMutationBurden TMB {tmbur_tmb_val}"
+                    f'genomeTmb given {tmb_val} does not match tmburMutationBurden TMB {tmbur_tmb_val}'
                 )
         except TypeError as err:
-            logger.error(f"genomeTmb parsing failure {genomeTmb}: {err}")
+            logger.error(f'genomeTmb parsing failure {genomeTmb}: {err}')
 
     # comparaing tmb_val to threshold
     # Signature CategoryVariant created only if threshold met
     if tmb_val >= tmb_high:
         return [
             {
-                "displayName": f"{TMB_SIGNATURE} {TMB_SIGNATURE_VARIANT_TYPE}",
-                "signatureName": TMB_SIGNATURE,
-                "variantTypeName": TMB_SIGNATURE_VARIANT_TYPE,
+                'displayName': f'{TMB_SIGNATURE} {TMB_SIGNATURE_VARIANT_TYPE}',
+                'signatureName': TMB_SIGNATURE,
+                'variantTypeName': TMB_SIGNATURE_VARIANT_TYPE,
             }
         ]
     return []
@@ -536,17 +534,16 @@ def preprocess_msi(msi: Any) -> Iterable[Dict]:
     Both msi & mss gets mapped to corresponding GraphKB Signature CategoryVariants.
     """
     if msi:
-
         # MSI category is given from upstream (only one msi variant per library)
         if isinstance(msi, list):
             # msi is given as a list of one dict
-            msi_cat = msi[0].get("kbCategory", "")
+            msi_cat = msi[0].get('kbCategory', '')
         elif isinstance(msi, str):
             # msi is given as a string
             msi_cat = msi
         else:
             # msi is given as a dict; uncatched error if not.
-            msi_cat = msi.get("kbCategory", "")
+            msi_cat = msi.get('kbCategory', '')
 
         msi_variant = MSI_MAPPING.get(msi_cat, None)
 
@@ -557,6 +554,23 @@ def preprocess_msi(msi: Any) -> Iterable[Dict]:
     return []
 
 
+def preprocess_hrd(hrd: Any) -> Iterable[Dict]:
+    """
+    Process hrd input into preformatted signature input.
+    HRD gets mapped to corresponding GraphKB Signature CategoryVariants.
+    """
+    if hrd:
+        hrd_cat = hrd.get('kbCategory', '')
+
+        hrd_variant = HRD_MAPPING.get(hrd_cat, None)
+
+        # Signature CategoryVariant created either for msi or mss
+        if hrd_variant:
+            return [hrd_variant]
+
+    return []
+
+
 def check_variant_links(
     small_mutations: List[IprSmallMutationVariant],
     expression_variants: List[IprExprVariant],
@@ -580,67 +594,67 @@ def check_variant_links(
     missing_information_genes = set()
     missing_information_errors = set()
 
-    copy_variant_genes = {variant["gene"] for variant in copy_variants}
-    expression_variant_genes = {variant["gene"] for variant in expression_variants}
+    copy_variant_genes = {variant['gene'] for variant in copy_variants}
+    expression_variant_genes = {variant['gene'] for variant in expression_variants}
     genes_with_variants = set()  # filter excess copy variants
     variant: IprCopyVariant | IprExprVariant | IprFusionVariant | IprSmallMutationVariant
 
     for variant in copy_variants:
-        gene = variant["gene"]
+        gene = variant['gene']
         if not gene:
-            logger.error("copy_variant data cannot be applied to an empty genename")
-        elif variant["variant"]:
+            logger.error('copy_variant data cannot be applied to an empty genename')
+        elif variant['variant']:
             genes_with_variants.add(gene)
 
             if expression_variant_genes and gene not in expression_variant_genes:
                 missing_information_genes.add(gene)
                 missing_information_errors.add(
-                    f"gene ({gene}) has a copy variant but is missing expression information"
+                    f'gene ({gene}) has a copy variant but is missing expression information'
                 )
 
     for variant in expression_variants:
-        gene = variant["gene"]
+        gene = variant['gene']
         if not gene:
-            logger.error("expression_variant data cannot be applied to an empty genename")
-        elif variant["variant"]:
+            logger.error('expression_variant data cannot be applied to an empty genename')
+        elif variant['variant']:
             genes_with_variants.add(gene)
 
             if copy_variant_genes and gene not in copy_variant_genes:
                 missing_information_genes.add(gene)
                 missing_information_errors.add(
-                    f"gene ({gene}) has an expression variant but is missing copy number information"
+                    f'gene ({gene}) has an expression variant but is missing copy number information'
                 )
 
     for variant in small_mutations:
-        gene = variant["gene"]
+        gene = variant['gene']
         if not gene:
-            logger.error("small_mutation data cannot be applied to an empty genename")
+            logger.error('small_mutation data cannot be applied to an empty genename')
             continue
 
         if copy_variant_genes and gene not in copy_variant_genes:
             missing_information_genes.add(gene)
             missing_information_errors.add(
-                f"gene ({gene}) has a small mutation but is missing copy number information"
+                f'gene ({gene}) has a small mutation but is missing copy number information'
             )
         if expression_variant_genes and gene not in expression_variant_genes:
             missing_information_genes.add(gene)
             missing_information_errors.add(
-                f"gene ({gene}) has a small mutation but is missing expression information"
+                f'gene ({gene}) has a small mutation but is missing expression information'
             )
         genes_with_variants.add(gene)
 
     for variant in structural_variants:
-        for gene in [variant["gene1"], variant["gene2"]]:
+        for gene in [variant['gene1'], variant['gene2']]:
             if gene:  # genes are optional for structural variants
                 if gene not in copy_variant_genes:
                     missing_information_genes.add(gene)
                     missing_information_errors.add(
-                        f"gene ({gene}) has a structural variant but is missing copy number information"
+                        f'gene ({gene}) has a structural variant but is missing copy number information'
                     )
                 if gene not in expression_variant_genes:
                     missing_information_genes.add(gene)
                     missing_information_errors.add(
-                        f"gene ({gene}) has a structural variant but is missing expression information"
+                        f'gene ({gene}) has a structural variant but is missing expression information'
                     )
                 genes_with_variants.add(gene)
 
@@ -648,7 +662,7 @@ def check_variant_links(
         for err_msg in sorted(missing_information_errors):
             logger.debug(err_msg)
         link_err_msg = (
-            f"Missing information variant links on {len(missing_information_genes)} genes"
+            f'Missing information variant links on {len(missing_information_genes)} genes'
         )
         logger.warning(link_err_msg)
     return genes_with_variants
@@ -659,91 +673,91 @@ def check_comparators(content: Dict, expresssionVariants: List[IprExprVariant] =
     Given the optional content dictionary, check that based on the analyses present the
     correct/sufficient comparators have also been specified
     """
-    mutation_burden = "mutationBurden"
-    comparator_roles = {c["analysisRole"] for c in content.get("comparators", [])}
+    mutation_burden = 'mutationBurden'
+    comparator_roles = {c['analysisRole'] for c in content.get('comparators', [])}
 
-    for image in content.get("images", []):
-        key = image["key"]
+    for image in content.get('images', []):
+        key = image['key']
         if key.startswith(mutation_burden):
-            comp_type = key.split(".")[-1]
-            role = f"mutation burden ({comp_type})"
+            comp_type = key.split('.')[-1]
+            role = f'mutation burden ({comp_type})'
             if role in comparator_roles:
                 continue
-            if "_sv." in key:
-                sv_role = f"mutation burden SV ({comp_type})"
+            if '_sv.' in key:
+                sv_role = f'mutation burden SV ({comp_type})'
                 if sv_role in comparator_roles:
                     continue
-            raise ValueError(f"missing required comparator definition ({role})")
+            raise ValueError(f'missing required comparator definition ({role})')
 
     if expresssionVariants:
-        required_comparators = {"expression (disease)"}
+        required_comparators = {'expression (disease)'}
 
         def all_none(row: IprExprVariant, columns: List[str]) -> bool:
-            return all([row.get(col) is None or row.get(col) == "" for col in columns])
+            return all([row.get(col) is None or row.get(col) == '' for col in columns])
 
         for exp in expresssionVariants:
             if not all_none(
                 exp,
                 [
-                    "primarySitekIQR",
-                    "primarySitePercentile",
-                    "primarySiteZScore",
-                    "primarySiteFoldChange",
+                    'primarySitekIQR',
+                    'primarySitePercentile',
+                    'primarySiteZScore',
+                    'primarySiteFoldChange',
                 ],
             ):
-                required_comparators.add("expression (primary site)")
+                required_comparators.add('expression (primary site)')
 
             if not all_none(
                 exp,
                 [
-                    "biopsySitekIQR",
-                    "biopsySitePercentile",
-                    "biopsySiteZScore",
-                    "biopsySiteFoldChange",
+                    'biopsySitekIQR',
+                    'biopsySitePercentile',
+                    'biopsySiteZScore',
+                    'biopsySiteFoldChange',
                 ],
             ):
-                required_comparators.add("expression (biopsy site)")
+                required_comparators.add('expression (biopsy site)')
 
             if not all_none(
                 exp,
                 [
-                    "internalPancancerkIQR",
-                    "internalPancancerPercentile",
-                    "internalPancancerZScore",
-                    "internalPancancerFoldChange",
+                    'internalPancancerkIQR',
+                    'internalPancancerPercentile',
+                    'internalPancancerZScore',
+                    'internalPancancerFoldChange',
                 ],
             ):
-                required_comparators.add("expression (internal pancancer cohort)")
+                required_comparators.add('expression (internal pancancer cohort)')
 
         if required_comparators - comparator_roles:
-            missing = "; ".join(sorted(list(required_comparators - comparator_roles)))
-            raise ValueError(f"missing required comparator definitions ({missing})")
+            missing = '; '.join(sorted(list(required_comparators - comparator_roles)))
+            raise ValueError(f'missing required comparator definitions ({missing})')
 
 
 def extend_with_default(validator_class):
     # https://python-jsonschema.readthedocs.io/en/latest/faq/#why-doesn-t-my-schema-s-default-property-set-the-default-on-my-instance
-    validate_properties = validator_class.VALIDATORS["properties"]
+    validate_properties = validator_class.VALIDATORS['properties']
 
     def set_defaults(validator, properties, instance, schema):
         for property, subschema in properties.items():
-            if "default" in subschema:
-                instance.setdefault(property, subschema["default"])
+            if 'default' in subschema:
+                instance.setdefault(property, subschema['default'])
 
         for error in validate_properties(validator, properties, instance, schema):
             yield error
 
     def check_null(checker, instance):
         return (
-            validator_class.TYPE_CHECKER.is_type(instance, "null")
+            validator_class.TYPE_CHECKER.is_type(instance, 'null')
             or pd.isnull(instance)
-            or instance == ""
+            or instance == ''
         )
 
-    type_checker = validator_class.TYPE_CHECKER.redefine("null", check_null)
+    type_checker = validator_class.TYPE_CHECKER.redefine('null', check_null)
 
     return jsonschema.validators.extend(
         validator_class,
-        validators={"properties": set_defaults},
+        validators={'properties': set_defaults},
         type_checker=type_checker,
     )
 
@@ -758,7 +772,7 @@ def validate_report_content(content: Dict, schema_file: str = SPECIFICATION) ->
 
     Adds defaults as reccommended by: https://python-jsonschema.readthedocs.io/en/latest/faq/#why-doesn-t-my-schema-s-default-property-set-the-default-on-my-instance
     """
-    with open(schema_file, "r") as fh:
+    with open(schema_file, 'r') as fh:
         schema = json.load(fh)
 
     return DefaultValidatingDraft7Validator(schema).validate(content)
diff --git a/pori_python/ipr/ipr.py b/pori_python/ipr/ipr.py
index 8487dc1d..06d9efbd 100644
--- a/pori_python/ipr/ipr.py
+++ b/pori_python/ipr/ipr.py
@@ -35,12 +35,12 @@
 
 def display_evidence_levels(statement: Statement) -> str:
     result = []
-    for evidence_level in statement.get("evidenceLevel", []) or []:
+    for evidence_level in statement.get('evidenceLevel', []) or []:
         if isinstance(evidence_level, str):
             result.append(evidence_level)
-        elif "displayName" in evidence_level:
-            result.append(evidence_level["displayName"])
-    return ";".join(sorted(result))
+        elif 'displayName' in evidence_level:
+            result.append(evidence_level['displayName'])
+    return ';'.join(sorted(result))
 
 
 def filter_structural_variants(
@@ -52,9 +52,9 @@ def filter_structural_variants(
     Filter structural variants to remove non-high quality events unless they are matched/annotated or
     they involve a gene that is a known fusion partner
     """
-    matched_svs = {match["variant"] for match in kb_matches if match["variantType"] == "sv"}
+    matched_svs = {match['variant'] for match in kb_matches if match['variantType'] == 'sv'}
     fusion_genes = {
-        gene["name"] for gene in gene_annotations if gene.get("knownFusionPartner", False)
+        gene['name'] for gene in gene_annotations if gene.get('knownFusionPartner', False)
     }
 
     result = []
@@ -62,10 +62,10 @@ def filter_structural_variants(
     for structural_variant in structural_variants:
         if any(
             [
-                structural_variant["highQuality"],
-                structural_variant["key"] in matched_svs,
-                structural_variant["gene1"] in fusion_genes,
-                structural_variant["gene2"] in fusion_genes,
+                structural_variant['highQuality'],
+                structural_variant['key'] in matched_svs,
+                structural_variant['gene1'] in fusion_genes,
+                structural_variant['gene2'] in fusion_genes,
             ]
         ):
             result.append(structural_variant)
@@ -83,22 +83,22 @@ def get_evidencelevel_mapping(graphkb_conn: GraphKBConnection) -> Dict[str, str]
     """
     # Get all EvidenceLevel from GraphKB
     # Note: not specifying any returnProperties allows for retreiving in/out_CrossReferenceOf
-    evidence_levels = graphkb_conn.query({"target": "EvidenceLevel"})
+    evidence_levels = graphkb_conn.query({'target': 'EvidenceLevel'})
 
     # Map EvidenceLevel RIDs to list of incoming CrossReferenceOf
     evidence_levels_mapping = dict(
-        map(lambda d: (d["@rid"], d.get("in_CrossReferenceOf", [])), evidence_levels)
+        map(lambda d: (d['@rid'], d.get('in_CrossReferenceOf', [])), evidence_levels)
     )
 
     # Filter IPR EvidenceLevel and map each outgoing CrossReferenceOf to displayName
-    ipr_source_rid = graphkb_conn.get_source("ipr")["@rid"]
-    ipr_evidence_levels = filter(lambda d: d.get("source") == ipr_source_rid, evidence_levels)
+    ipr_source_rid = graphkb_conn.get_source('ipr')['@rid']
+    ipr_evidence_levels = filter(lambda d: d.get('source') == ipr_source_rid, evidence_levels)
     cross_references_mapping: Dict[str, str] = dict()
     ipr_rids_to_displayname: Dict[str, str] = dict()
     for level in ipr_evidence_levels:
-        d = map(lambda i: (i, level["displayName"]), level.get("out_CrossReferenceOf", []))  # type: ignore
+        d = map(lambda i: (i, level['displayName']), level.get('out_CrossReferenceOf', []))  # type: ignore
         cross_references_mapping.update(d)
-        ipr_rids_to_displayname[level["@rid"]] = level["displayName"]  # type: ignore
+        ipr_rids_to_displayname[level['@rid']] = level['displayName']  # type: ignore
 
     # Update EvidenceLevel mapping to corresponding IPR EvidenceLevel displayName
     def link_refs(refs) -> Tuple[str, str]:
@@ -107,10 +107,10 @@ def link_refs(refs) -> Tuple[str, str]:
                 return (refs[0], cross_references_mapping[rid])
         if refs[0] in ipr_rids_to_displayname:  # self-referencing IPR levels
             return (refs[0], ipr_rids_to_displayname[refs[0]])
-        return (refs[0], "")
+        return (refs[0], '')
 
     evidence_levels_mapping = dict(map(link_refs, evidence_levels_mapping.items()))
-    evidence_levels_mapping[""] = ""
+    evidence_levels_mapping[''] = ''
 
     return evidence_levels_mapping  # type: ignore
 
@@ -142,11 +142,11 @@ def convert_statements_to_alterations(
     rows = []
     ev_map = get_evidencelevel_mapping(graphkb_conn)
     # GERO-318 - add all IPR-A evidence equivalents to the approvedTherapy flag
-    approved = set([ev for (ev, ipr) in ev_map.items() if ipr == "IPR-A"])
+    approved = set([ev for (ev, ipr) in ev_map.items() if ipr == 'IPR-A'])
 
     # get the recruitment status for any trial associated with a statement
     clinical_trials = [
-        s["subject"]["@rid"] for s in statements if s["subject"]["@class"] == "ClinicalTrial"
+        s['subject']['@rid'] for s in statements if s['subject']['@class'] == 'ClinicalTrial'
     ]
     recruitment_statuses = {}
     if clinical_trials:
@@ -154,76 +154,79 @@ def convert_statements_to_alterations(
         for rid in clinical_trials:
             query_result = graphkb_conn.query(
                 {
-                    "target": {"target": "ClinicalTrial", "filters": {"@rid": rid}},
-                    "returnProperties": ["@rid", "recruitmentStatus"],
+                    'target': {'target': 'ClinicalTrial', 'filters': {'@rid': rid}},
+                    'returnProperties': ['@rid', 'recruitmentStatus'],
                 }
             )
             if query_result:
-                recruitment_statuses[rid] = query_result[0]["recruitmentStatus"]  # type: ignore
+                recruitment_statuses[rid] = query_result[0]['recruitmentStatus']  # type: ignore
 
     for statement in statements:
         variants = [
-            cast(Variant, c) for c in statement["conditions"] if c["@class"] in VARIANT_CLASSES
+            cast(Variant, c) for c in statement['conditions'] if c['@class'] in VARIANT_CLASSES
         ]
-        diseases = [c for c in statement["conditions"] if c["@class"] == "Disease"]
-        disease_match = len(diseases) == 1 and diseases[0]["@rid"] in disease_matches
-        pmid = ";".join([e["displayName"] for e in statement["evidence"]])
+        diseases = [c for c in statement['conditions'] if c['@class'] == 'Disease']
+        disease_match = len(diseases) == 1 and diseases[0]['@rid'] in disease_matches
+        reference = ';'.join([e['displayName'] for e in statement['evidence']])
+
+        if statement['relevance']['name'] == 'eligibility':
+            reference = ';'.join([e['sourceId'] for e in statement['evidence']])
 
         ipr_section = gkb_statement.categorize_relevance(
-            graphkb_conn, statement["relevance"]["@rid"]
+            graphkb_conn, statement['relevance']['@rid']
         )
         approved_therapy = False
-        if ipr_section == "therapeutic":
-            for level in statement["evidenceLevel"] or []:
-                if level["@rid"] in approved:
+        if ipr_section == 'therapeutic':
+            for level in statement['evidenceLevel'] or []:
+                if level['@rid'] in approved:
                     approved_therapy = True
                     break
 
-        if ipr_section == "prognostic" and not disease_match:
+        if ipr_section == 'prognostic' and not disease_match:
             continue  # GERO-72 / GERO-196
 
         evidence_level_str = display_evidence_levels(statement)
-        evidence_levels = statement.get("evidenceLevel") or []
-        ipr_evidence_levels = [ev_map[el.get("@rid", "")] for el in evidence_levels if el]
-        ipr_evidence_levels_str = ";".join(sorted(set([el for el in ipr_evidence_levels])))
+        evidence_levels = statement.get('evidenceLevel') or []
+        ipr_evidence_levels = [ev_map[el.get('@rid', '')] for el in evidence_levels if el]
+        ipr_evidence_levels_str = ';'.join(sorted(set([el for el in ipr_evidence_levels])))
 
         for variant in variants:
-            if variant["@rid"] not in variant_matches:
+            if variant['@rid'] not in variant_matches:
                 continue
             row = KbMatch(
                 {
-                    "approvedTherapy": approved_therapy or False,
-                    "category": ipr_section or "unknown",
-                    "context": (
-                        statement["subject"]["displayName"] if statement["subject"] else ""
+                    'approvedTherapy': approved_therapy or False,
+                    'category': ipr_section or 'unknown',
+                    'context': (
+                        statement['subject']['displayName'] if statement['subject'] else ''
                     ),
-                    "kbContextId": (statement["subject"]["@rid"] if statement["subject"] else ""),
-                    "disease": ";".join(sorted(d.get("displayName", "") for d in diseases)),
-                    "evidenceLevel": evidence_level_str or "",
-                    "iprEvidenceLevel": ipr_evidence_levels_str or "",
-                    "kbStatementId": statement["@rid"],
-                    "kbVariant": str(variant.get("displayName", "")) or "",
-                    "variant": str(variant.get("displayName", "")) or "",
-                    "variantType": "",
-                    "kbVariantId": variant["@rid"],
-                    "matchedCancer": disease_match,
-                    "reference": pmid,
-                    "relevance": statement["relevance"]["displayName"],
-                    "kbRelevanceId": statement["relevance"]["@rid"],
-                    "externalSource": (
-                        str(statement["source"].get("displayName", ""))
-                        if statement["source"]
-                        else ""
+                    'kbContextId': (statement['subject']['@rid'] if statement['subject'] else ''),
+                    'disease': ';'.join(sorted(d.get('displayName', '') for d in diseases)),
+                    'evidenceLevel': evidence_level_str or '',
+                    'iprEvidenceLevel': ipr_evidence_levels_str or '',
+                    'kbStatementId': statement['@rid'],
+                    'kbVariant': str(variant.get('displayName', '')) or '',
+                    'variant': str(variant.get('displayName', '')) or '',
+                    'variantType': '',
+                    'kbVariantId': variant['@rid'],
+                    'matchedCancer': disease_match,
+                    'reference': reference,
+                    'relevance': statement['relevance']['displayName'],
+                    'kbRelevanceId': statement['relevance']['@rid'],
+                    'externalSource': (
+                        str(statement['source'].get('displayName', ''))
+                        if statement['source']
+                        else ''
                     ),
-                    "requiredKbMatches": [item["@rid"] for item in variants],
-                    "externalStatementId": statement.get("sourceId", "") or "",
-                    "reviewStatus": statement.get("reviewStatus", "") or "",
-                    "kbData": {},
+                    'requiredKbMatches': [item['@rid'] for item in variants],
+                    'externalStatementId': statement.get('sourceId', '') or '',
+                    'reviewStatus': statement.get('reviewStatus', '') or '',
+                    'kbData': {},
                 }
             )
-            if statement["relevance"]["name"] == "eligibility":
-                row["kbData"]["recruitment_status"] = recruitment_statuses.get(
-                    row["kbContextId"], "not found"
+            if statement['relevance']['name'] == 'eligibility':
+                row['kbData']['recruitment_status'] = recruitment_statuses.get(
+                    row['kbContextId'], 'not found'
                 )
             rows.append(row)
     return rows
@@ -246,83 +249,99 @@ def select_expression_plots(
     """
 
     selected_variants = {
-        (match["variantType"], match["variant"])
+        (match['variantType'], match['variant'])
         for match in kb_matches
-        if match["category"] == "therapeutic"
+        if match['category'] == 'therapeutic'
     }
     images_by_gene: Dict[str, ImageDefinition] = {}
     selected_genes = set()
     for variant in all_variants:
-        if (variant["variantType"], variant["key"]) in selected_variants:
-            for key in ["gene", "gene1", "gene2"]:
+        if (variant['variantType'], variant['key']) in selected_variants:
+            for key in ['gene', 'gene1', 'gene2']:
                 gene = variant.get(key)
                 if gene:
                     selected_genes.add(str(gene))
-        gene = str(variant.get("gene", ""))
-        hist = str(variant.get("histogramImage", ""))
+        gene = str(variant.get('gene', ''))
+        hist = str(variant.get('histogramImage', ''))
         if hist:
-            images_by_gene[gene] = ImageDefinition({"key": f"expDensity.{gene}", "path": hist})
+            images_by_gene[gene] = ImageDefinition({'key': f'expDensity.{gene}', 'path': hist})
     return [images_by_gene[gene] for gene in selected_genes if gene in images_by_gene]
 
 
 def create_key_alterations(
-    kb_matches: List[Hashabledict], all_variants: Sequence[IprVariant]
+    kb_matches: List[Hashabledict],
+    all_variants: Sequence[IprVariant],
+    included_kb_matches: List[KbVariantMatch],
 ) -> Tuple[List[Dict], Dict]:
     """Create the list of significant variants matched by the KB.
 
     This list of matches is also used to create the variant counts.
+
+    kb_matches: the full list of matched kb objects found for the reported variants
+    all_variants: the full list of all reported variants, matched or unmatched
+    included_kb_matches: the list of kb_variant ids to be allowed in the key alterations table;
+        this is all kb_variants if partially matched statements are allowed, or
+        the subset of kb_variants that are conditions for at least one
+        fully satisfied statement condition set, if partially matched statements
+        are not allowed (ie, kb_variants that are not part of any fully satisfied
+        statement condition set are excluded)
     """
     alterations = []
     type_mapping = {
-        "mut": "smallMutations",
-        "cnv": "CNVs",
-        "sv": "SVs",
-        "exp": "expressionOutliers",
+        'mut': 'smallMutations',
+        'cnv': 'CNVs',
+        'sv': 'SVs',
+        'exp': 'expressionOutliers',
     }
     counts: Dict[str, Set] = {v: set() for v in type_mapping.values()}
     skipped_variant_types = []
+
+    included_kbvariant_ids = list(set([item['kbVariantId'] for item in included_kb_matches]))
+
     for kb_match in kb_matches:
-        variant_type = kb_match["variantType"]
-        variant_key = kb_match["variant"]
-        if kb_match["category"] == "unknown":
+        if kb_match['kbVariantId'] not in included_kbvariant_ids:
+            continue
+        variant_type = kb_match['variantType']
+        variant_key = kb_match['variant']
+        if kb_match['category'] == 'unknown':
             continue
 
         if variant_type not in type_mapping.keys():
             if variant_type not in skipped_variant_types:
                 skipped_variant_types.append(variant_type)
                 logger.warning(
-                    f"No summary key alterations for {variant_type}.  Skipping {variant_key}"
+                    f'No summary key alterations for {variant_type}.  Skipping {variant_key}'
                 )
             continue
         try:
             variant = find_variant(all_variants, variant_type, variant_key)
         except KeyError as err:
             logger.error(err)
-            logger.error(f"No variant match found for {variant_key}")
+            logger.error(f'No variant match found for {variant_key}')
             continue
 
         counts[type_mapping[variant_type]].add(variant_key)
 
-        if variant_type == "exp":
-            alterations.append(f'{variant.get("gene","")} ({variant.get("expressionState")})')
-        elif variant_type == "cnv":
-            alterations.append(f'{variant.get("gene","")} ({variant.get("cnvState")})')
+        if variant_type == 'exp':
+            alterations.append(f'{variant.get("gene", "")} ({variant.get("expressionState")})')
+        elif variant_type == 'cnv':
+            alterations.append(f'{variant.get("gene", "")} ({variant.get("cnvState")})')
         # only show germline if relevant
-        elif kb_match["category"] in GERMLINE_BASE_TERMS and variant.get("germline"):
-            alterations.append(f"germline {variant['variant']}")
+        elif kb_match['category'] in GERMLINE_BASE_TERMS and variant.get('germline'):
+            alterations.append(f'germline {variant["variant"]}')
         else:
-            alterations.append(variant["variant"])
+            alterations.append(variant['variant'])
 
     counted_variants = set.union(*counts.values())
-    counts["variantsUnknown"] = set()
+    counts['variantsUnknown'] = set()
 
     # count the un-matched variants
     for variant in all_variants:
-        if variant["variant"] and variant["key"] not in counted_variants:
-            counts["variantsUnknown"].add(variant["key"])
+        if variant['variant'] and variant['key'] not in counted_variants:
+            counts['variantsUnknown'].add(variant['key'])
 
     return (
-        [{"geneVariant": alt} for alt in set(alterations)],
+        [{'geneVariant': alt} for alt in set(alterations)],
         {k: len(v) for k, v in counts.items()},
     )
 
@@ -347,44 +366,44 @@ def germline_kb_matches(
         filtered list of kb_matches
     """
     ret_list = []
-    germ_alts = [alt for alt in kb_matches if alt["category"] in GERMLINE_BASE_TERMS]
+    germ_alts = [alt for alt in kb_matches if alt['category'] in GERMLINE_BASE_TERMS]
     somatic_alts = [alt for alt in kb_matches if alt not in germ_alts]
     if germ_alts:
-        logger.info(f"checking germline status of {GERMLINE_BASE_TERMS}")
+        logger.info(f'checking germline status of {GERMLINE_BASE_TERMS}')
         for alt in germ_alts:
-            var_list = [v for v in all_variants if v["key"] == alt["variant"]]
-            germline_var_list = [v for v in var_list if v.get("germline")]
-            unknown_var_list = [v for v in var_list if "germline" not in v]
+            var_list = [v for v in all_variants if v['key'] == alt['variant']]
+            germline_var_list = [v for v in var_list if v.get('germline')]
+            unknown_var_list = [v for v in var_list if 'germline' not in v]
             if germline_var_list:
                 logger.debug(
-                    f"germline kbStatementId:{alt['kbStatementId']}: {alt['kbVariant']} {alt['category']}"
+                    f'germline kbStatementId:{alt["kbStatementId"]}: {alt["kbVariant"]} {alt["category"]}'
                 )
                 ret_list.append(alt)
             elif unknown_var_list:
                 logger.warning(
-                    f"germline no data fail for: {alt['kbStatementId']}: {alt['kbVariant']} {alt['category']}"
+                    f'germline no data fail for: {alt["kbStatementId"]}: {alt["kbVariant"]} {alt["category"]}'
                 )
                 if not assume_somatic:
                     logger.debug(
-                        f"Keeping unverified match to germline kbStatementId:{alt['kbStatementId']}: {alt['kbVariant']} {alt['category']}"
+                        f'Keeping unverified match to germline kbStatementId:{alt["kbStatementId"]}: {alt["kbVariant"]} {alt["category"]}'
                     )
                     ret_list.append(alt)
                 else:
                     logger.debug(
-                        f"Dropping unverified match to germline kbStatementId:{alt['kbStatementId']}: {alt['kbVariant']} {alt['category']}"
+                        f'Dropping unverified match to germline kbStatementId:{alt["kbStatementId"]}: {alt["kbVariant"]} {alt["category"]}'
                     )
             else:
                 logger.debug(
-                    f"Dropping somatic match to germline kbStatementId:{alt['kbStatementId']}: {alt['kbVariant']} {alt['category']}"
+                    f'Dropping somatic match to germline kbStatementId:{alt["kbStatementId"]}: {alt["kbVariant"]} {alt["category"]}'
                 )
     if somatic_alts:
         # Remove any matches to germline events
         for alt in somatic_alts:
-            var_list = [v for v in all_variants if v["key"] == alt["variant"]]
-            somatic_var_list = [v for v in var_list if not v.get("germline", not assume_somatic)]
+            var_list = [v for v in all_variants if v['key'] == alt['variant']]
+            somatic_var_list = [v for v in var_list if not v.get('germline', not assume_somatic)]
             if var_list and not somatic_var_list:
                 logger.debug(
-                    f"Dropping germline match to somatic statement kbStatementId:{alt['kbStatementId']}: {alt['kbVariant']} {alt['category']}"
+                    f'Dropping germline match to somatic statement kbStatementId:{alt["kbStatementId"]}: {alt["kbVariant"]} {alt["category"]}'
                 )
             elif somatic_var_list:
                 ret_list.append(alt)  # match to somatic variant
@@ -397,7 +416,7 @@ def germline_kb_matches(
 def multi_variant_filtering(
     graphkb_conn: GraphKBConnection,
     gkb_matches: List[KbMatch],
-    excludedTypes: List[str] = ["wildtype"],
+    excludedTypes: List[str] = ['wildtype'],
 ) -> List[KbMatch]:
     """Filters out GraphKB matches that doesn't match to all required variants on multi-variant statements
 
@@ -417,42 +436,42 @@ def multi_variant_filtering(
         filtered list of KbMatch statements
     """
     # All matching statements & variants (GKB RIDs)
-    matching_statement_rids = {match["kbStatementId"] for match in gkb_matches}
-    matching_variant_rids = {match["kbVariantId"] for match in gkb_matches}
+    matching_statement_rids = {match['kbStatementId'] for match in gkb_matches}
+    matching_variant_rids = {match['kbVariantId'] for match in gkb_matches}
 
     # Get conditions detail on all matching statements
     res = graphkb_conn.post(
-        uri="query",
+        uri='query',
         data={
-            "target": "Statement",
-            "filters": {
-                "@rid": list(matching_statement_rids),
-                "operator": "IN",
+            'target': 'Statement',
+            'filters': {
+                '@rid': list(matching_statement_rids),
+                'operator': 'IN',
             },
-            "history": True,
-            "returnProperties": [
-                "@rid",
-                "conditions.@rid",
-                "conditions.@class",
-                "conditions.type",
+            'history': True,
+            'returnProperties': [
+                '@rid',
+                'conditions.@rid',
+                'conditions.@class',
+                'conditions.type',
             ],
         },
     )
-    statements = res["result"]
+    statements = res['result']
 
     # Get set of excluded Vocabulary RIDs for variant types
     excluded = {}
-    if len(excludedTypes) != 0 and excludedTypes[0] != "":
+    if len(excludedTypes) != 0 and excludedTypes[0] != '':
         excluded = gkb_vocab.get_terms_set(graphkb_conn, excludedTypes)
 
     # Mapping statements to their conditional variants
     # (discarding non-variant conditions & variant conditions from excluded types)
     statement_to_variants = {}
     for statement in statements:
-        statement_to_variants[statement["@rid"]] = {
-            el["@rid"]
-            for el in statement["conditions"]
-            if (el["@class"] in VARIANT_CLASSES and el.get("type", "") not in excluded)
+        statement_to_variants[statement['@rid']] = {
+            el['@rid']
+            for el in statement['conditions']
+            if (el['@class'] in VARIANT_CLASSES and el.get('type', '') not in excluded)
         }
 
     # Set of statements with complete matching
@@ -464,7 +483,7 @@ def multi_variant_filtering(
 
     # Filtering out incompleted matches of gkb_matches
     return [
-        match for match in gkb_matches if match["kbStatementId"] in complete_matching_statements
+        match for match in gkb_matches if match['kbStatementId'] in complete_matching_statements
     ]
 
 
@@ -483,10 +502,10 @@ def get_kb_variants(
     for item in gkb_matches:
         kbv = KbVariantMatch(
             {
-                "kbVariant": item["kbVariant"],
-                "variant": item["variant"],
-                "variantType": item["variantType"],
-                "kbVariantId": item["kbVariantId"],
+                'kbVariant': item['kbVariant'],
+                'variant': item['variant'],
+                'variantType': item['variantType'],
+                'kbVariantId': item['kbVariantId'],
             }
         )
         kbVariants[str(kbv)] = kbv
@@ -509,7 +528,7 @@ def get_kb_matched_statements(
     kbs_keys = KbMatchedStatement.__annotations__.keys()
     for item in gkb_matches:
         stmt = copy(item)
-        stmt["requiredKbMatches"].sort()
+        stmt['requiredKbMatches'].sort()
         kbs = KbMatchedStatement({key: val for (key, val) in stmt.items() if key in kbs_keys})
         dict_key = str(kbs)
         kbMatchedStatements[dict_key] = kbs
@@ -568,20 +587,20 @@ def get_kb_statement_matched_conditions(
     kbMatchedStatementConditions = {}
 
     for kbStmt in kbMatchedStatements:
-        stmts = [item for item in gkb_matches if item["kbStatementId"] == kbStmt["kbStatementId"]]
+        stmts = [item for item in gkb_matches if item['kbStatementId'] == kbStmt['kbStatementId']]
         requirements = {}
-        for requirement in stmts[0]["requiredKbMatches"]:
+        for requirement in stmts[0]['requiredKbMatches']:
             if not requirements.get(requirement, False):
                 # only use explicit variant/statement links
                 reqlist = [
                     {
-                        "kbVariantId": requirement,
-                        "observedVariantKey": item["variant"],
+                        'kbVariantId': requirement,
+                        'observedVariantKey': item['variant'],
                     }
                     for item in gkb_matches
                     if (
-                        item["kbVariantId"] == requirement
-                        and item["kbStatementId"] == kbStmt["kbStatementId"]
+                        item['kbVariantId'] == requirement
+                        and item['kbStatementId'] == kbStmt['kbStatementId']
                     )
                 ]
                 requirements[requirement] = reqlist
@@ -592,18 +611,18 @@ def get_kb_statement_matched_conditions(
 
         variantConditionSets = list(product(*requirements.values()))
         conditionSets = [
-            {"kbStatementId": kbStmt["kbStatementId"], "matchedConditions": item}
+            {'kbStatementId': kbStmt['kbStatementId'], 'matchedConditions': item}
             for item in variantConditionSets
         ]
         for conditionSet in conditionSets:
             matchedConditions = sorted(
-                conditionSet["matchedConditions"],
-                key=lambda x: (x["kbVariantId"], x["observedVariantKey"]),
+                conditionSet['matchedConditions'],
+                key=lambda x: (x['kbVariantId'], x['observedVariantKey']),
             )
             kbmc = KbMatchedStatementConditionSet(
                 {
-                    "kbStatementId": conditionSet["kbStatementId"],
-                    "matchedConditions": matchedConditions,
+                    'kbStatementId': conditionSet['kbStatementId'],
+                    'matchedConditions': matchedConditions,
                 }
             )
             key = str(
@@ -622,10 +641,24 @@ def get_kb_matches_sections(
     kb_statement_matched_conditions = get_kb_statement_matched_conditions(
         gkb_matches, allow_partial_matches
     )
+
+    if not allow_partial_matches:
+        # remove kb_matches that are not part of any fully matched condition set
+        unique_kb_variant_ids = list(
+            set(
+                [
+                    item['kbVariantId']
+                    for conditionSet in kb_statement_matched_conditions
+                    for item in conditionSet['matchedConditions']
+                ]
+            )
+        )
+        kb_variants = [item for item in kb_variants if item['kbVariantId'] in unique_kb_variant_ids]
+
     return {
-        "kbMatches": kb_variants,
-        "kbMatchedStatements": kb_matched_statements,
-        "kbStatementMatchedConditions": kb_statement_matched_conditions,
+        'kbMatches': kb_variants,
+        'kbMatchedStatements': kb_matched_statements,
+        'kbStatementMatchedConditions': kb_statement_matched_conditions,
     }
 
 
@@ -634,12 +667,12 @@ def get_kb_disease_matches(
     kb_disease_match: Optional[str] = None,
     verbose: bool = True,
     useSubgraphsRoute: bool = True,
-) -> list[str]:
+) -> list[Dict]:
 
     disease_matches = []
 
     if not kb_disease_match:
-        kb_disease_match = "cancer"
+        kb_disease_match = 'cancer'
         if verbose:
             logger.warning(f"No disease provided; will use '{kb_disease_match}'")
 
@@ -657,20 +690,20 @@ def get_kb_disease_matches(
             base_records = gkb_util.convert_to_rid_list(
                 graphkb_conn.query(
                     gkb_vocab.query_by_name(
-                        "Disease",
+                        'Disease',
                         kb_disease_match,
                     )
                 )
             )
             if base_records:
                 response = graphkb_conn.post(
-                    "/subgraphs/Disease",
+                    '/subgraphs/Disease',
                     {
-                        "subgraphType": "tree",
-                        "base": base_records,
+                        'subgraphType': 'tree',
+                        'base': base_records,
                     },
                 )
-                disease_matches = list(response["result"]["g"]["nodes"].keys())
+                disease_matches = list(response['result']['g']['nodes'].values())
 
         except Exception:
             if verbose:
@@ -681,20 +714,15 @@ def get_kb_disease_matches(
     # Traversal depth is limited
     if not useSubgraphsRoute:
         if verbose:
-            logger.info(f"Matching disease ({kb_disease_match}) to graphkb using get_term_tree()")
-        disease_matches = list(
-            {
-                r["@rid"]
-                for r in gkb_vocab.get_term_tree(
-                    graphkb_conn,
-                    kb_disease_match,
-                    ontology_class="Disease",
-                )
-            }
+            logger.info(f'Matching disease ({kb_disease_match}) to graphkb using get_term_tree()')
+        disease_matches = gkb_vocab.get_term_tree(
+            graphkb_conn,
+            kb_disease_match,
+            ontology_class='Disease',
         )
 
     if not disease_matches:
-        msg = f"failed to match disease ({kb_disease_match}) to graphkb"
+        msg = f'failed to match disease ({kb_disease_match}) to graphkb'
         if verbose:
             logger.error(msg)
         raise ValueError(msg)
diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py
index 63a028fe..cbb7c128 100644
--- a/pori_python/ipr/main.py
+++ b/pori_python/ipr/main.py
@@ -23,7 +23,7 @@
 
 from .annotate import annotate_variants
 from .connection import IprConnection
-from .constants import DEFAULT_URL, TMB_SIGNATURE_HIGH_THRESHOLD
+from .constants import TMB_SIGNATURE_HIGH_THRESHOLD
 from .inputs import (
     check_comparators,
     check_variant_links,
@@ -32,6 +32,7 @@
     preprocess_expression_variants,
     preprocess_hla,
     preprocess_msi,
+    preprocess_hrd,
     preprocess_signature_variants,
     preprocess_small_mutations,
     preprocess_structural_variants,
@@ -53,19 +54,19 @@
 CACHE_GENE_MINIMUM = 5000
 RENAMED_GENE_PROPERTIES = {
     # old_name: new_name
-    "cancerRelated": "kbStatementRelated",
-    "cancerGene": "cancerGeneListMatch",
+    'cancerRelated': 'kbStatementRelated',
+    'cancerGene': 'cancerGeneListMatch',
 }
 
 
 def file_path(path: str) -> str:
     if not os.path.exists(path):
-        raise argparse.ArgumentTypeError(f"{repr(path)} is not a valid filename. does not exist")
+        raise argparse.ArgumentTypeError(f'{repr(path)} is not a valid filename. does not exist')
     return path
 
 
 def timestamp() -> str:
-    return datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
+    return datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
 
 
 def command_interface() -> None:
@@ -73,92 +74,92 @@ def command_interface() -> None:
     Parsed arguments are used to call the ipr_report() function.
     """
     parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
-    req = parser.add_argument_group("required arguments")
-    (req if not os.environ.get("USER") else parser).add_argument(
-        "--username",
-        required=not os.environ.get("USER"),
-        default=os.environ.get("USER"),
-        help="username to use connecting to graphkb/ipr",
+    req = parser.add_argument_group('required arguments')
+    (req if not os.environ.get('USER') else parser).add_argument(
+        '--username',
+        required=not os.environ.get('USER'),
+        default=os.environ.get('USER'),
+        help='username to use connecting to graphkb/ipr',
     )
-    req.add_argument("--password", required=True, help="password to use connecting to graphkb/ipr")
+    req.add_argument('--password', required=True, help='password to use connecting to graphkb/ipr')
     req.add_argument(
-        "-c", "--content", required=True, type=file_path, help="Report Content as JSON"
+        '-c', '--content', required=True, type=file_path, help='Report Content as JSON'
     )
 
-    parser.add_argument("--ipr_url", default=os.environ.get("IPR_URL", DEFAULT_URL))
+    parser.add_argument('--ipr_url', default=os.environ.get('IPR_URL'))
     parser.add_argument(
-        "--graphkb_username",
-        help="username to use connecting to graphkb if different from ipr",
+        '--graphkb_username',
+        help='username to use connecting to graphkb if different from ipr',
     )
     parser.add_argument(
-        "--graphkb_password",
-        help="password to use connecting to graphkb if different from ipr",
+        '--graphkb_password',
+        help='password to use connecting to graphkb if different from ipr',
     )
-    parser.add_argument("--graphkb_url", default=os.environ.get("GRAPHKB_URL", None))
-    parser.add_argument("--log_level", default="info", choices=LOG_LEVELS.keys())
+    parser.add_argument('--graphkb_url', default=os.environ.get('GRAPHKB_URL', None))
+    parser.add_argument('--log_level', default='info', choices=LOG_LEVELS.keys())
     parser.add_argument(
-        "--therapeutics",
+        '--therapeutics',
         default=False,
-        help="Generate therapeutic options",
-        action="store_true",
+        help='Generate therapeutic options',
+        action='store_true',
     )
     parser.add_argument(
-        "--skip_comments",
+        '--skip_comments',
         default=False,
-        action="store_true",
-        help="Turn off generating the analyst comments section of the report",
+        action='store_true',
+        help='Turn off generating the analyst comments section of the report',
     )
     parser.add_argument(
-        "-o",
-        "--output_json_path",
-        default=f"pori_python_report_{timestamp()}.json",
-        help="path to a JSON to output the report upload body",
+        '-o',
+        '--output_json_path',
+        default=f'pori_python_report_{timestamp()}.json',
+        help='path to a JSON to output the report upload body',
     )
     parser.add_argument(
-        "-w",
-        "--always_write_output_json",
-        action="store_true",
-        help="Write to output_json_path on successful IPR uploads instead of just when the upload fails",
+        '-w',
+        '--always_write_output_json',
+        action='store_true',
+        help='Write to output_json_path on successful IPR uploads instead of just when the upload fails',
     )
     parser.add_argument(
-        "--async_upload",
+        '--async_upload',
         default=False,
-        action="store_true",
-        help="True if reports-async ipr endpoint should be used instead of basic reports",
+        action='store_true',
+        help='True if reports-async ipr endpoint should be used instead of basic reports',
     )
     parser.add_argument(
-        "--mins_to_wait",
+        '--mins_to_wait',
         default=5,
-        action="store",
-        help="is using reports-async, number of minutes to wait before throwing error",
+        action='store',
+        help='is using reports-async, number of minutes to wait before throwing error',
     )
     parser.add_argument(
-        "--allow_partial_matches",
+        '--allow_partial_matches',
         default=False,
-        action="store_true",
-        help="True to include matches to multivariant statements where not all variants are present",
+        action='store_true',
+        help='True to include matches to multivariant statements where not all variants are present',
     )
     parser.add_argument(
-        "--upload_json",
+        '--upload_json',
         default=False,
-        action="store_true",
-        help="True to skip all the preprocessing and just submit a json to ipr",
+        action='store_true',
+        help='True to skip all the preprocessing and just submit a json to ipr',
     )
     parser.add_argument(
-        "--validate_json",
+        '--validate_json',
         default=False,
-        action="store_true",
-        help="True if only need to validate the json",
+        action='store_true',
+        help='True if only need to validate the json',
     )
     parser.add_argument(
-        "--ignore_extra_fields",
+        '--ignore_extra_fields',
         default=False,
-        action="store_true",
-        help="True if ignore extra fields in json",
+        action='store_true',
+        help='True if ignore extra fields in json',
     )
     args = parser.parse_args()
 
-    with open(args.content, "r") as fh:
+    with open(args.content, 'r') as fh:
         content = json.load(fh)
 
     ipr_report(
@@ -191,38 +192,38 @@ def clean_unsupported_content(upload_content: Dict, ipr_spec: Dict = {}) -> Dict
     """
     if (
         ipr_spec
-        and "components" in ipr_spec.keys()
-        and "schemas" in ipr_spec["components"].keys()
-        and "genesCreate" in ipr_spec["components"]["schemas"].keys()
-        and "properties" in ipr_spec["components"]["schemas"]["genesCreate"].keys()
+        and 'components' in ipr_spec.keys()
+        and 'schemas' in ipr_spec['components'].keys()
+        and 'genesCreate' in ipr_spec['components']['schemas'].keys()
+        and 'properties' in ipr_spec['components']['schemas']['genesCreate'].keys()
     ):
-        genes_spec = ipr_spec["components"]["schemas"]["genesCreate"]["properties"].keys()
+        genes_spec = ipr_spec['components']['schemas']['genesCreate']['properties'].keys()
 
         # check what ipr report upload expects and adjust contents to match
         for old_name, new_name in RENAMED_GENE_PROPERTIES.items():
             if old_name in genes_spec:
                 logger.warning(
-                    f"Legacy IPR - Renaming property {new_name} to {old_name} for compatibility to ipr_spec"
+                    f'Legacy IPR - Renaming property {new_name} to {old_name} for compatibility to ipr_spec'
                 )
-                for gene in upload_content["genes"]:
+                for gene in upload_content['genes']:
                     if new_name in gene:
                         gene[old_name] = gene[new_name]
                         gene.pop(new_name)
             else:
                 outdate_properties = 0
-                for gene in upload_content["genes"]:
+                for gene in upload_content['genes']:
                     if old_name in gene:
                         gene[new_name] = gene[old_name]
                         gene.pop(old_name)
                         outdate_properties += 1
                 if outdate_properties:
                     logger.warning(
-                        f"Renamed property {old_name} to {new_name} on {outdate_properties} genes for ipr_spec"
+                        f'Renamed property {old_name} to {new_name} on {outdate_properties} genes for ipr_spec'
                     )
 
         # remove any unhandled incompatible keys
         removed_keys: Dict[str, int] = {}
-        for gene in upload_content["genes"]:
+        for gene in upload_content['genes']:
             unsupported_keys = [key for key in gene.keys() if key not in genes_spec]
             for key in unsupported_keys:
                 if key in removed_keys:
@@ -233,23 +234,23 @@ def clean_unsupported_content(upload_content: Dict, ipr_spec: Dict = {}) -> Dict
         for key, count in removed_keys.items():
             logger.warning(f"IPR unsupported property '{key}' removed from {count} genes.")
 
-    drop_columns = ["variant", "variantType", "histogramImage"]
+    drop_columns = ['variant', 'variantType', 'histogramImage']
     # DEVSU-2034 - use a 'displayName'
     VARIANT_LIST_KEYS = [
-        "expressionVariants",
-        "smallMutations",
-        "copyVariants",
-        "structuralVariants",
-        "probeResults",
-        "signatureVariants",
+        'expressionVariants',
+        'smallMutations',
+        'copyVariants',
+        'structuralVariants',
+        'probeResults',
+        'signatureVariants',
     ]
     for variant_list_section in VARIANT_LIST_KEYS:
         for variant in upload_content.get(variant_list_section, []):
-            if not variant.get("displayName"):
-                variant["displayName"] = (
-                    variant.get("variant") or variant.get("kbCategory") or variant.get("key", "")
+            if not variant.get('displayName'):
+                variant['displayName'] = (
+                    variant.get('variant') or variant.get('kbCategory') or variant.get('key', '')
                 )
-            if variant_list_section == "probeResults":
+            if variant_list_section == 'probeResults':
                 # currently probeResults will error if they do NOT have a 'variant' column.
                 # smallMutations will error if they DO have a 'variant' column.
                 continue
@@ -257,29 +258,29 @@ def clean_unsupported_content(upload_content: Dict, ipr_spec: Dict = {}) -> Dict
                 if col in variant:
                     del variant[col]
     # tmburMutationBurden is a single value, not list
-    if upload_content.get("tmburMutationBurden"):
-        if not upload_content["tmburMutationBurden"].get("displayName"):
-            upload_content["tmburMutationBurden"]["displayName"] = upload_content[
-                "tmburMutationBurden"
-            ].get("kbCategory", "")
+    if upload_content.get('tmburMutationBurden'):
+        if not upload_content['tmburMutationBurden'].get('displayName'):
+            upload_content['tmburMutationBurden']['displayName'] = upload_content[
+                'tmburMutationBurden'
+            ].get('kbCategory', '')
 
     # TODO: check this is still necessary
-    for row in upload_content["kbMatches"]:
-        if "kbContextId" in row:
-            del row["kbContextId"]
-        if "kbRelevanceId" in row:
-            del row["kbRelevanceId"]
-        if "requiredKbMatches" in row:
-            del row["requiredKbMatches"]
-
-    for row in upload_content["kbMatchedStatements"]:
-        if "kbContextId" in row:
-            del row["kbContextId"]
-        if "kbRelevanceId" in row:
-            del row["kbRelevanceId"]
+    for row in upload_content['kbMatches']:
+        if 'kbContextId' in row:
+            del row['kbContextId']
+        if 'kbRelevanceId' in row:
+            del row['kbRelevanceId']
+        if 'requiredKbMatches' in row:
+            del row['requiredKbMatches']
+
+    for row in upload_content['kbMatchedStatements']:
+        if 'kbContextId' in row:
+            del row['kbContextId']
+        if 'kbRelevanceId' in row:
+            del row['kbRelevanceId']
 
     # Removing cosmicSignatures. Temporary
-    upload_content.pop("cosmicSignatures", None)
+    upload_content.pop('cosmicSignatures', None)
 
     return upload_content
 
@@ -293,15 +294,15 @@ def ipr_report(
     username: str,
     password: str,
     content: Dict,
-    ipr_url: str = DEFAULT_URL,
-    log_level: str = "info",
-    output_json_path: str = "",
+    ipr_url: str = '',
+    log_level: str = 'info',
+    output_json_path: str = '',
     always_write_output_json: bool = False,
     ipr_upload: bool = True,
     interactive: bool = False,
-    graphkb_username: str = "",
-    graphkb_password: str = "",
-    graphkb_url: str = "",
+    graphkb_username: str = '',
+    graphkb_password: str = '',
+    graphkb_url: str = '',
     generate_therapeutics: bool = False,
     generate_comments: bool = True,
     match_germline: bool = False,
@@ -323,7 +324,7 @@ def ipr_report(
     Args:
         username: the username for connecting to GraphKB and IPR
         password: the password for connecting to GraphKB and IPR
-        ipr_url: base URL to use in connecting to IPR
+        ipr_url: base URL to use in connecting to IPR (eg. https://ipr-api.bcgsc.ca/api)
         log_level: the logging level
         content: report content
         output_json_path: path to a JSON file to output the report upload body.
@@ -352,18 +353,27 @@ def ipr_report(
     # set the default logging configuration
     logging.basicConfig(
         level=LOG_LEVELS[log_level],
-        format="%(asctime)s %(name)s %(levelname)s %(message)s",
-        datefmt="%m-%d-%y %H:%M:%S",
+        format='%(asctime)s %(name)s %(levelname)s %(message)s',
+        datefmt='%m-%d-%y %H:%M:%S',
     )
 
     # IPR CONNECTION
-    ipr_conn = IprConnection(username, password, ipr_url)
+    ipr_url = ipr_url if ipr_url else os.environ.get('IPR_URL', '')
+    ipr_conn = None
+    if ipr_url:
+        ipr_conn = IprConnection(username, password, ipr_url)
+    else:
+        logger.warning('No ipr_url given')
 
     if validate_json:
+        if not ipr_conn:
+            raise ValueError('ipr_url required to validate json')
         ipr_result = ipr_conn.validate_json(content)
         return ipr_result
 
     if upload_json:
+        if not ipr_conn:
+            raise ValueError('ipr_url required to upload json')
         ipr_result = ipr_conn.upload_report(
             content, mins_to_wait, async_upload, ignore_extra_fields
         )
@@ -373,31 +383,32 @@ def ipr_report(
     try:
         validate_report_content(content)
     except jsonschema.exceptions.ValidationError as err:
-        logger.error("Failed schema check - report variants may be corrupted or unmatched.")
-        logger.error(f"Failed schema check: {err}")
+        logger.error('Failed schema check - report variants may be corrupted or unmatched.')
+        logger.error(f'Failed schema check: {err}')
 
     # INPUT VARIANTS VALIDATION & PREPROCESSING (OBSERVED BIOMARKERS)
     signature_variants: List[IprSignatureVariant] = preprocess_signature_variants(
         [
-            *preprocess_cosmic(content.get("cosmicSignatures", [])),  # includes dMMR
-            *preprocess_hla(content.get("hlaTypes", [])),
+            *preprocess_cosmic(content.get('cosmicSignatures', [])),  # includes dMMR
+            *preprocess_hla(content.get('hlaTypes', [])),
             *preprocess_tmb(
                 tmb_high,
-                content.get("tmburMutationBurden", {}),  # old tmb pipeline
-                content.get("genomeTmb", ""),  # newer tmb pipeline
+                content.get('tmburMutationBurden', {}),  # old tmb pipeline
+                content.get('genomeTmb', ''),  # newer tmb pipeline
             ),
-            *preprocess_msi(content.get("msi", None)),
+            *preprocess_msi(content.get('msi', None)),
+            *preprocess_hrd(content.get('hrd', None)),
         ]
     )
     small_mutations: List[IprSmallMutationVariant] = preprocess_small_mutations(
-        content.get("smallMutations", [])
+        content.get('smallMutations', [])
     )
     structural_variants: List[IprFusionVariant] = preprocess_structural_variants(
-        content.get("structuralVariants", [])
+        content.get('structuralVariants', [])
     )
-    copy_variants: List[IprCopyVariant] = preprocess_copy_variants(content.get("copyVariants", []))
+    copy_variants: List[IprCopyVariant] = preprocess_copy_variants(content.get('copyVariants', []))
     expression_variants: List[IprExprVariant] = preprocess_expression_variants(
-        content.get("expressionVariants", [])
+        content.get('expressionVariants', [])
     )
     # Additional checks
     if expression_variants:
@@ -408,30 +419,29 @@ def ipr_report(
     )
 
     # GKB CONNECTION
-    if graphkb_url:
-        logger.info(f"connecting to graphkb: {graphkb_url}")
-        graphkb_conn = GraphKBConnection(graphkb_url)
-    else:
-        graphkb_conn = GraphKBConnection()
-
-    gkb_user = graphkb_username if graphkb_username else username
-    gkb_pass = graphkb_password if graphkb_password else password
+    graphkb_conn = GraphKBConnection(graphkb_url) if graphkb_url else GraphKBConnection()
+    logger.info(f'connecting to graphkb: {graphkb_conn.url}')
 
-    graphkb_conn.login(gkb_user, gkb_pass)
+    graphkb_conn.login(
+        graphkb_username if graphkb_username else username,
+        graphkb_password if graphkb_password else password,
+    )
 
     # DISEASE
     # Disease term from bioapps; expected OncoTree term
-    kb_disease_match: str = content["kbDiseaseMatch"]
+    kb_disease_match: str = content['kbDiseaseMatch']
 
     # Matching disease RIDs from GraphKB using term tree
     # (Will raise uncatched error if no match)
-    disease_matches: list[str] = get_kb_disease_matches(graphkb_conn, kb_disease_match)
+    disease_match_records: list[Dict] = get_kb_disease_matches(graphkb_conn, kb_disease_match)
+    disease_match_rids: list[str] = [item['@rid'] for item in disease_match_records]
+    disease_match_names: list[str] = [item['name'] for item in disease_match_records]
 
     # GKB MATCHING (AKA ANNOTATION)
     gkb_matches: List[Hashabledict] = annotate_variants(
         graphkb_conn=graphkb_conn,
         interactive=interactive,
-        disease_matches=disease_matches,
+        disease_matches=disease_match_rids,
         # Variants, per type:
         signature_variants=signature_variants,
         small_mutations=small_mutations,
@@ -458,53 +468,53 @@ def ipr_report(
         ]
         num_removed = org_len - len(gkb_matches)
         if num_removed:
-            logger.info(f"Removing {num_removed} germline events without medical matches.")
+            logger.info(f'Removing {num_removed} germline events without medical matches.')
 
     if custom_kb_match_filter:
-        logger.info(f"custom_kb_match_filter on {len(gkb_matches)} variants")
+        logger.info(f'custom_kb_match_filter on {len(gkb_matches)} variants')
         gkb_matches = [Hashabledict(match) for match in custom_kb_match_filter(gkb_matches)]
-        logger.info(f"\t custom_kb_match_filter left {len(gkb_matches)} variants")
-
-    # KEY ALTERATIONS
-    key_alterations, variant_counts = create_key_alterations(gkb_matches, all_variants)
+        logger.info(f'\t custom_kb_match_filter left {len(gkb_matches)} variants')
 
     # GENE INFORMATION
-    logger.info("fetching gene annotations")
+    logger.info('fetching gene annotations')
     gene_information = get_gene_information(graphkb_conn, sorted(genes_with_variants))
 
     # THERAPEUTIC OPTIONS
     if generate_therapeutics:
-        logger.info("generating therapeutic options")
+        logger.info('generating therapeutic options')
         targets = create_therapeutic_options(graphkb_conn, gkb_matches, all_variants)
     else:
         targets = []
 
     # ANALYST COMMENTS
-    logger.info("generating analyst comments")
+    logger.info('generating analyst comments')
 
     comments_list = []
     if generate_comments:
         graphkb_comments = auto_analyst_comments(
             graphkb_conn,
             gkb_matches,
-            disease_matches=set(disease_matches),
+            disease_matches=set(disease_match_rids),
             variants=all_variants,
         )
         comments_list.append(graphkb_comments)
 
     if include_ipr_variant_text:
+        if not ipr_conn:
+            raise ValueError('ipr_url required to include ipr variant text')
         ipr_comments = get_ipr_analyst_comments(
             ipr_conn,
             gkb_matches,
             disease_name=kb_disease_match,
-            project_name=content["project"],
-            report_type=content["template"],
+            disease_match_names=disease_match_names,
+            project_name=content['project'],
+            report_type=content['template'],
             include_nonspecific_disease=include_nonspecific_disease,
             include_nonspecific_project=include_nonspecific_project,
             include_nonspecific_template=include_nonspecific_template,
         )
         comments_list.append(ipr_comments)
-    comments = {"comments": "\n".join(comments_list)}
+    comments = {'comments': '\n'.join(comments_list)}
 
     # REFORMATTING KBMATCHES
     # kbMatches -> kbMatches, kbMatchedStatements & kbStatementMatchedConditions
@@ -512,50 +522,64 @@ def ipr_report(
         gkb_matches, allow_partial_matches=allow_partial_matches
     )
 
+    # KEY ALTERATIONS
+    key_alterations, variant_counts = create_key_alterations(
+        gkb_matches, all_variants, kb_matched_sections['kbMatches']
+    )
+
     # OUTPUT CONTENT
     # thread safe deep-copy the original content
     output = json.loads(json.dumps(content))
-
     output.update(kb_matched_sections)
     output.update(
         {
-            "copyVariants": [
-                trim_empty_values(c) for c in copy_variants if c["gene"] in genes_with_variants
+            'copyVariants': [
+                trim_empty_values(c) for c in copy_variants if c['gene'] in genes_with_variants
             ],
-            "smallMutations": [trim_empty_values(s) for s in small_mutations],
-            "expressionVariants": [
+            'smallMutations': [trim_empty_values(s) for s in small_mutations],
+            'expressionVariants': [
                 trim_empty_values(e)
                 for e in expression_variants
-                if e["gene"] in genes_with_variants
+                if e['gene'] in genes_with_variants
             ],
-            "kbDiseaseMatch": kb_disease_match,
-            "kbUrl": graphkb_conn.url,
-            "kbVersion": timestamp(),
-            "structuralVariants": [
+            'kbDiseaseMatch': kb_disease_match,
+            'kbUrl': graphkb_conn.url,
+            'kbVersion': timestamp(),
+            'structuralVariants': [
                 trim_empty_values(s)
                 for s in filter_structural_variants(
                     structural_variants, gkb_matches, gene_information
                 )
             ],
-            "signatureVariants": [trim_empty_values(s) for s in signature_variants],
-            "genes": gene_information,
-            "genomicAlterationsIdentified": key_alterations,
-            "variantCounts": variant_counts,
-            "analystComments": comments,
-            "therapeuticTarget": targets,
+            'signatureVariants': [trim_empty_values(s) for s in signature_variants],
+            'genes': gene_information,
+            'genomicAlterationsIdentified': key_alterations,
+            'variantCounts': variant_counts,
+            'analystComments': comments,
+            'therapeuticTarget': targets,
         }
     )
-    output.setdefault("images", []).extend(select_expression_plots(gkb_matches, all_variants))
+    output.setdefault('images', []).extend(select_expression_plots(gkb_matches, all_variants))
+
+    # if input includes hrdScore field, that is ok to pass to db
+    # but prefer the 'hrd' field if it exists
+    if output.get('hrd'):
+        if output.get('hrd').get('score'):
+            output['hrdScore'] = output['hrd']['score']
+        output.pop('hrd')  # kbmatches have already been made
 
-    ipr_spec = ipr_conn.get_spec()
-    output = clean_unsupported_content(output, ipr_spec)
     ipr_result = {}
     upload_error = None
 
     # UPLOAD TO IPR
+
     if ipr_upload:
+        if not ipr_conn:
+            raise ValueError('ipr_url required to upload report')
+        ipr_spec = ipr_conn.get_spec()
+        output = clean_unsupported_content(output, ipr_spec)
         try:
-            logger.info(f"Uploading to IPR {ipr_conn.url}")
+            logger.info(f'Uploading to IPR {ipr_conn.url}')
             ipr_result = ipr_conn.upload_report(
                 output, mins_to_wait, async_upload, ignore_extra_fields
             )
@@ -563,16 +587,16 @@ def ipr_report(
             output.update(ipr_result)
         except Exception as err:
             upload_error = err
-            logger.error(f"ipr_conn.upload_report failed: {err}", exc_info=True)
+            logger.error(f'ipr_conn.upload_report failed: {err}', exc_info=True)
 
     # SAVE TO JSON FILE
     if always_write_output_json:
-        logger.info(f"Writing IPR upload json to: {output_json_path}")
-        with open(output_json_path, "w") as fh:
+        logger.info(f'Writing IPR upload json to: {output_json_path}')
+        with open(output_json_path, 'w') as fh:
             fh.write(json.dumps(output))
 
-    logger.info(f"made {graphkb_conn.request_count} requests to graphkb")
-    logger.info(f"average load {int(graphkb_conn.load or 0)} req/s")
+    logger.info(f'made {graphkb_conn.request_count} requests to graphkb')
+    logger.info(f'average load {int(graphkb_conn.load or 0)} req/s')
     if upload_error:
         raise upload_error
     return output
diff --git a/pori_python/ipr/summary.py b/pori_python/ipr/summary.py
index c6d63b20..cfe60868 100644
--- a/pori_python/ipr/summary.py
+++ b/pori_python/ipr/summary.py
@@ -28,10 +28,10 @@
     logger,
 )
 
-OTHER_DISEASES = "other disease types"
-ENTREZ_GENE_URL = "https://www.ncbi.nlm.nih.gov/gene"
+OTHER_DISEASES = 'other disease types'
+ENTREZ_GENE_URL = 'https://www.ncbi.nlm.nih.gov/gene'
 # TODO: https://www.bcgsc.ca/jira/browse/DEVSU-1181
-GRAPHKB_GUI = "https://graphkb.bcgsc.ca"
+GRAPHKB_GUI = 'https://graphkb.bcgsc.ca'
 
 
 def filter_by_record_class(
@@ -45,17 +45,17 @@ def check(name: str) -> bool:
         else:
             return name in record_classes
 
-    return [rec for rec in record_list if check(rec["@class"])]
+    return [rec for rec in record_list if check(rec['@class'])]
 
 
 def natural_join(word_list: List[str]) -> str:
     if len(word_list) > 1:
-        return ", ".join(word_list[:-1]) + ", and " + word_list[-1]
-    return "".join(word_list)
+        return ', '.join(word_list[:-1]) + ', and ' + word_list[-1]
+    return ''.join(word_list)
 
 
 def get_displayname(rec: Record) -> str:
-    ret_val = rec.get("displayName", rec["@rid"])
+    ret_val = rec.get('displayName', rec['@rid'])
     return str(ret_val)
 
 
@@ -66,26 +66,26 @@ def natural_join_records(
     return natural_join(word_list)
 
 
-def create_graphkb_link(record_ids: List[str], record_class: str = "Statement") -> str:
+def create_graphkb_link(record_ids: List[str], record_class: str = 'Statement') -> str:
     """
     Create a link for a set of statements to the GraphKB client
     """
     record_ids = sorted(list(set(record_ids)))
     if len(record_ids) == 1:
         return f'{GRAPHKB_GUI}/view/{record_class}/{record_ids[0].replace("#", "")}'
-    complex_param = base64.b64encode(json.dumps({"target": record_ids}).encode("utf-8"))
-    search_params = {"complex": complex_param, "@class": record_class}
-    return f"{GRAPHKB_GUI}/data/table?{urlencode(search_params)}"
+    complex_param = base64.b64encode(json.dumps({'target': record_ids}).encode('utf-8'))
+    search_params = {'complex': complex_param, '@class': record_class}
+    return f'{GRAPHKB_GUI}/data/table?{urlencode(search_params)}'
 
 
 def merge_diseases(
     diseases: List[Ontology] | List[Record], disease_matches: Set[str] = set()
 ) -> str:
     if len(convert_to_rid_set(diseases) - disease_matches) >= 2 and all(
-        [d["@class"] == "Disease" for d in diseases]
+        [d['@class'] == 'Disease' for d in diseases]
     ):
         words = sorted(
-            list(set([get_displayname(s) for s in diseases if s["@rid"] in disease_matches]))
+            list(set([get_displayname(s) for s in diseases if s['@rid'] in disease_matches]))
         )
         words.append(OTHER_DISEASES)
         return natural_join(words)
@@ -105,54 +105,54 @@ def substitute_sentence_template(
     """Create the filled-in sentence template for a given template and list of substitutions
     which may be the result of the aggregation of 1 or more statements.
     """
-    disease_conditions = filter_by_record_class(conditions, "Disease")
+    disease_conditions = filter_by_record_class(conditions, 'Disease')
     variant_conditions = filter_by_record_class(
-        conditions, "CategoryVariant", "CatalogueVariant", "PositionalVariant"
+        conditions, 'CategoryVariant', 'CatalogueVariant', 'PositionalVariant'
     )
     other_conditions = filter_by_record_class(
         conditions,
-        "CategoryVariant",
-        "CatalogueVariant",
-        "PositionalVariant",
-        "Disease",
+        'CategoryVariant',
+        'CatalogueVariant',
+        'PositionalVariant',
+        'Disease',
         exclude=True,
     )
-    result = template.replace(r"{relevance}", relevance["displayName"])
+    result = template.replace(r'{relevance}', relevance['displayName'])
 
-    if r"{subject}" in template:
+    if r'{subject}' in template:
         # remove subject from the conditions replacements
         subjects_ids = convert_to_rid_set(subjects)
         disease_conditions = [
-            cast(Ontology, d) for d in disease_conditions if d["@rid"] not in subjects_ids
+            cast(Ontology, d) for d in disease_conditions if d['@rid'] not in subjects_ids
         ]
         variant_conditions = [
-            cast(Ontology, d) for d in variant_conditions if d["@rid"] not in subjects_ids
+            cast(Ontology, d) for d in variant_conditions if d['@rid'] not in subjects_ids
         ]
-        other_conditions = [d for d in other_conditions if d["@rid"] not in subjects_ids]
+        other_conditions = [d for d in other_conditions if d['@rid'] not in subjects_ids]
 
-        result = result.replace(r"{subject}", merge_diseases(subjects, disease_matches))
+        result = result.replace(r'{subject}', merge_diseases(subjects, disease_matches))
 
-    if r"{conditions:disease}" in template:
+    if r'{conditions:disease}' in template:
         result = result.replace(
-            r"{conditions:disease}", merge_diseases(disease_conditions, disease_matches)
+            r'{conditions:disease}', merge_diseases(disease_conditions, disease_matches)
         )
     else:
         other_conditions.extend(disease_conditions)
 
-    if r"{conditions:variant}" in template:
-        result = result.replace(r"{conditions:variant}", natural_join_records(variant_conditions))
+    if r'{conditions:variant}' in template:
+        result = result.replace(r'{conditions:variant}', natural_join_records(variant_conditions))
     else:
         other_conditions.extend(variant_conditions)
 
-    result = result.replace(r"{conditions}", natural_join_records(other_conditions))
+    result = result.replace(r'{conditions}', natural_join_records(other_conditions))
 
-    link_url = create_graphkb_link(statement_rids) if statement_rids else ""
+    link_url = create_graphkb_link(statement_rids) if statement_rids else ''
 
-    if r"{evidence}" in template:
-        evidence_str = ", ".join(sorted(list({e["displayName"] for e in evidence})))
+    if r'{evidence}' in template:
+        evidence_str = ', '.join(sorted(list({e['displayName'] for e in evidence})))
         if link_url:
             evidence_str = f'<a href="{link_url}" target="_blank" rel="noopener">{evidence_str}</a>'
-        result = result.replace(r"{evidence}", evidence_str)
+        result = result.replace(r'{evidence}', evidence_str)
 
     return result
 
@@ -170,18 +170,18 @@ def aggregate_statements(
 
     def generate_key(statement: Statement) -> Tuple:
         result = [
-            cond.get("displayName", cond["@rid"])
-            for cond in filter_by_record_class(statement["conditions"], "Disease", exclude=True)
-            if cond["@rid"] != statement["subject"]["@rid"]
+            cond.get('displayName', cond['@rid'])
+            for cond in filter_by_record_class(statement['conditions'], 'Disease', exclude=True)
+            if cond['@rid'] != statement['subject']['@rid']
         ]
-        if statement.get("subject", {}).get("@class", "Disease") != "Disease":
-            subject = statement["subject"]
-            if subject["@class"] == "Therapy":
-                alt = get_preferred_drug_representation(graphkb_conn, subject["@rid"])
-                statement["subject"] = alt
-            result.append(statement["subject"]["displayName"])
-        result.append(statement["relevance"]["displayName"])
-        result.append(statement["displayNameTemplate"])
+        if statement.get('subject', {}).get('@class', 'Disease') != 'Disease':
+            subject = statement['subject']
+            if subject['@class'] == 'Therapy':
+                alt = get_preferred_drug_representation(graphkb_conn, subject['@rid'])
+                statement['subject'] = alt
+            result.append(statement['subject']['displayName'])
+        result.append(statement['relevance']['displayName'])
+        result.append(statement['displayNameTemplate'])
         return tuple(sorted(set(result)))
 
     for statement in statements:
@@ -193,12 +193,12 @@ def generate_key(statement: Statement) -> Tuple:
         conditions = []
         subjects = []
         evidence = []
-        relevance = group[0]["relevance"]
-        template = group[0]["displayNameTemplate"]
+        relevance = group[0]['relevance']
+        template = group[0]['displayNameTemplate']
         for statement in group:
-            conditions.extend(statement["conditions"])
-            evidence.extend(statement["evidence"])
-            subjects.append(statement["subject"])
+            conditions.extend(statement['conditions'])
+            evidence.extend(statement['evidence'])
+            subjects.append(statement['subject'])
 
         sentence = substitute_sentence_template(
             template,
@@ -211,35 +211,35 @@ def generate_key(statement: Statement) -> Tuple:
         )
 
         for statement in group:
-            result[statement["@rid"]] = sentence
+            result[statement['@rid']] = sentence
     return result
 
 
 def display_variant(variant: IprVariant) -> str:
     """Short, human readable variant description string."""
-    gene = variant.get("gene", "")
-    if not gene and "gene1" in variant and "gene2" in variant:
+    gene = variant.get('gene', '')
+    if not gene and 'gene1' in variant and 'gene2' in variant:
         gene = f'({variant.get("gene1", "")},{variant.get("gene2", "")})'
 
-    if variant.get("kbCategory"):
+    if variant.get('kbCategory'):
         return f'{variant.get("kbCategory")} of {gene}'
 
     # Special display of IprFusionVariant with exons
-    if variant.get("exon1") or variant.get("exon2"):
+    if variant.get('exon1') or variant.get('exon2'):
         return create_graphkb_sv_notation(variant)  # type: ignore
 
     # Use chosen legacy 'proteinChange' or an hgvs description of lowest detail.
     hgvs = variant.get(
-        "proteinChange",
-        variant.get("hgvsProtein", variant.get("hgvsCds", variant.get("hgvsGenomic", ""))),
+        'proteinChange',
+        variant.get('hgvsProtein', variant.get('hgvsCds', variant.get('hgvsGenomic', ''))),
     )
 
     if gene and hgvs:
-        return f"{gene}:{hgvs}"
-    elif variant.get("variant"):
-        return str(variant.get("variant"))
+        return f'{gene}:{hgvs}'
+    elif variant.get('variant'):
+        return str(variant.get('variant'))
 
-    raise ValueError(f"Unable to form display_variant of {variant}")
+    raise ValueError(f'Unable to form display_variant of {variant}')
 
 
 def display_variants(gene_name: str, variants: List[IprVariant]) -> str:
@@ -247,11 +247,11 @@ def display_variants(gene_name: str, variants: List[IprVariant]) -> str:
     variants_text = natural_join(result)
     if len(result) > 1:
         return (
-            f"Multiple variants of the gene {gene_name} were observed in this case: {variants_text}"
+            f'Multiple variants of the gene {gene_name} were observed in this case: {variants_text}'
         )
     elif result:
-        return f"{variants_text[0].upper()}{variants_text[1:]} was observed in this case."
-    return ""
+        return f'{variants_text[0].upper()}{variants_text[1:]} was observed in this case.'
+    return ''
 
 
 def create_section_html(
@@ -264,33 +264,33 @@ def create_section_html(
     """
     Generate HTML for a gene section of the comments
     """
-    output = [f"<h2>{gene_name}</h2>"]
+    output = [f'<h2>{gene_name}</h2>']
 
     sentence_categories: Dict[str, str] = {}
 
     for statement_id, sentence in sentences_by_statement_id.items():
-        relevance = statements[statement_id]["relevance"]["@rid"]
+        relevance = statements[statement_id]['relevance']['@rid']
         category = categorize_relevance(
             graphkb_conn,
             relevance,
-            RELEVANCE_BASE_TERMS + [("resistance", ["no sensitivity"])],
+            RELEVANCE_BASE_TERMS + [('resistance', ['no sensitivity'])],
         )
         sentence_categories[sentence] = category
 
     # get the entrez gene descriptive hugo name
     genes = graphkb_conn.query(
         {
-            "target": "Feature",
-            "filters": {
-                "AND": [
+            'target': 'Feature',
+            'filters': {
+                'AND': [
                     {
-                        "source": {
-                            "target": "Source",
-                            "filters": {"name": "entrez gene"},
+                        'source': {
+                            'target': 'Source',
+                            'filters': {'name': 'entrez gene'},
                         }
                     },
-                    {"name": gene_name},
-                    {"biotype": "gene"},
+                    {'name': gene_name},
+                    {'biotype': 'gene'},
                 ]
             },
         }
@@ -300,10 +300,10 @@ def create_section_html(
     variants_text = display_variants(gene_name, exp_variants)
     if not variants_text:
         # exclude sections where they are not linked to an experimental variant. this can occur when there are co-occurent statements collected
-        return ""
-    if genes and genes[0].get("description", ""):
-        description = ". ".join(genes[0]["description"].split(". ")[:2])  # type: ignore
-        sourceId = genes[0].get("sourceId", "")
+        return ''
+    if genes and genes[0].get('description', ''):
+        description = '. '.join(genes[0]['description'].split('. ')[:2])  # type: ignore
+        sourceId = genes[0].get('sourceId', '')
 
         output.append(
             f"""
@@ -319,27 +319,27 @@ def create_section_html(
     sentences_used: Set[str] = set()
 
     for section in [
-        {s for (s, v) in sentence_categories.items() if v == "diagnostic"},
-        {s for (s, v) in sentence_categories.items() if v == "biological"},
-        {s for (s, v) in sentence_categories.items() if v in ["therapeutic", "prognostic"]},
+        {s for (s, v) in sentence_categories.items() if v == 'diagnostic'},
+        {s for (s, v) in sentence_categories.items() if v == 'biological'},
+        {s for (s, v) in sentence_categories.items() if v in ['therapeutic', 'prognostic']},
         {
             s
             for (s, v) in sentence_categories.items()
             if v
             not in [
-                "diagnostic",
-                "biological",
-                "therapeutic",
-                "prognostic",
-                "resistance",
+                'diagnostic',
+                'biological',
+                'therapeutic',
+                'prognostic',
+                'resistance',
             ]
         },
-        {s for (s, v) in sentence_categories.items() if v == "resistance"},
+        {s for (s, v) in sentence_categories.items() if v == 'resistance'},
     ]:
-        content = ". ".join(sorted(list(section - sentences_used)))
+        content = '. '.join(sorted(list(section - sentences_used)))
         sentences_used.update(section)
-        output.append(f"<p>{content}</p>")
-    return "\n".join(output)
+        output.append(f'<p>{content}</p>')
+    return '\n'.join(output)
 
 
 def section_statements_by_genes(
@@ -349,16 +349,16 @@ def section_statements_by_genes(
     genes: Dict[str, Set[str]] = {}
 
     for statement in statements:
-        for condition in statement["conditions"]:
-            if condition.get("biotype", "") == "gene":
-                gene = get_preferred_gene_name(graphkb_conn, condition["@rid"])
-                genes.setdefault(gene, set()).add(statement["@rid"])
+        for condition in statement['conditions']:
+            if condition.get('biotype', '') == 'gene':
+                gene = get_preferred_gene_name(graphkb_conn, condition['@rid'])
+                genes.setdefault(gene, set()).add(statement['@rid'])
             else:
-                for cond_ref_key in ("reference1", "reference2"):
+                for cond_ref_key in ('reference1', 'reference2'):
                     cond_ref_gene = condition.get(cond_ref_key)
                     if cond_ref_gene:
                         gene = get_preferred_gene_name(graphkb_conn, str(cond_ref_gene))
-                        genes.setdefault(gene, set()).add(statement["@rid"])
+                        genes.setdefault(gene, set()).add(statement['@rid'])
 
     return genes
 
@@ -372,12 +372,12 @@ def prep_single_ipr_variant_comment(variant_text):
     Returns:
         section: html-formatted string
     """
-    cancer_type = ",".join(variant_text["cancerType"])
+    cancer_type = ','.join(variant_text['cancerType'])
     if not cancer_type:
-        cancer_type = "no specific cancer types"
-    cancer_type = f" ({cancer_type})"
-    section = [f"<h2>{variant_text['variantName']}{cancer_type}</h2>"]
-    section.append(f"<p>{variant_text['text']}</p>")
+        cancer_type = 'no specific cancer types'
+    cancer_type = f' ({cancer_type})'
+    section = [f'<h2>{variant_text["variantName"]}{cancer_type}</h2>']
+    section.append(f'<p>{variant_text["text"]}</p>')
     return section
 
 
@@ -385,6 +385,7 @@ def get_ipr_analyst_comments(
     ipr_conn: IprConnection,
     matches: Sequence[KbMatch] | Sequence[Hashabledict],
     disease_name: str,
+    disease_match_names: [str],
     project_name: str,
     report_type: str,
     include_nonspecific_disease: bool = False,
@@ -403,6 +404,7 @@ def get_ipr_analyst_comments(
         ipr_conn: connection to the ipr db
         matches: list of kbmatches which will be included in the report
         disease_name: str, eg 'colorectal cancer'
+        disease_match_names: list[str] of names considered to be equivalent to the disease name
         project_name: str, eg TEST or pog
         report_type: str, eg genomic or rapid
         include_nonspecific_disease: bool - true if variant texts that don't explicitly
@@ -414,48 +416,58 @@ def get_ipr_analyst_comments(
     Returns:
         html-formatted string
     """
-    output_header = "<h3>The comments below were automatically drawn from curated text stored in IPR for variant matches in this report, and have not been manually reviewed</h3>"
-    no_comments_found_output = "No comments found in IPR for variants in this report"
+    output_header = '<h3>The comments below were automatically drawn from curated text stored in IPR for variant matches in this report, and have not been manually reviewed</h3>'
+    no_comments_found_output = 'No comments found in IPR for variants in this report'
     output = []
     # get the list of variants to check for custom text for
-    match_set = list(set([item["kbVariant"] for item in matches]))
+    match_set = list(set([item['kbVariant'] for item in matches]))
+
+    disease_match_set = set([disease_name.lower()] + [item.lower() for item in disease_match_names])
 
     for variant in match_set:
         data = {
-            "variantName": variant,
+            'variantName': variant,
         }
         itemlist: list[dict] = []
-        itemlist = ipr_conn.get("variant-text", data=data)  # type: ignore
+        itemlist = ipr_conn.get('variant-text', data=data)  # type: ignore
         if itemlist:
             project_matches = [
                 item
                 for item in itemlist
-                if "project" in item.keys() and item["project"]["name"] == project_name
+                if 'project' in item.keys() and item['project']['name'] == project_name
             ]
             if project_matches:
                 itemlist = project_matches
             elif include_nonspecific_project:
-                itemlist = [item for item in itemlist if "project" not in item.keys()]
+                itemlist = [item for item in itemlist if 'project' not in item.keys()]
             else:
                 itemlist = []
 
             template_matches = [
                 item
                 for item in itemlist
-                if "template" in item.keys() and item["template"]["name"] == report_type
+                if 'template' in item.keys() and item['template']['name'] == report_type
             ]
             if template_matches:
                 itemlist = template_matches
             elif include_nonspecific_template:
-                itemlist = [item for item in itemlist if "template" not in item.keys()]
+                itemlist = [item for item in itemlist if 'template' not in item.keys()]
             else:
                 itemlist = []
 
-            disease_matches = [item for item in itemlist if disease_name in item["cancerType"]]
+            disease_matches = [
+                item
+                for item in itemlist
+                if len(
+                    set([ct.lower() for ct in item['cancerType']]).intersection(disease_match_set)
+                )
+                > 0
+            ]
+
             if disease_matches:
                 itemlist = disease_matches
             elif include_nonspecific_disease:
-                itemlist = [item for item in itemlist if not item["cancerType"]]
+                itemlist = [item for item in itemlist if not item['cancerType']]
             else:
                 itemlist = []
 
@@ -466,7 +478,7 @@ def get_ipr_analyst_comments(
     if not output:
         return no_comments_found_output
     output.insert(0, output_header)
-    return "\n".join(output)
+    return '\n'.join(output)
 
 
 def auto_analyst_comments(
@@ -478,12 +490,12 @@ def auto_analyst_comments(
     """Given a list of GraphKB matches, generate a text summary to add to the report."""
     templates: Dict[str, List[Statement]] = {}
     statements: Dict[str, Statement] = {}
-    variants_by_keys = {v["key"]: v for v in variants}
+    variants_by_keys = {v['key']: v for v in variants}
     variant_keys_by_statement_ids: Dict[str, Set[str]] = {}
 
     for match in matches:
-        rid = match["kbStatementId"]
-        exp_variant = match["variant"]
+        rid = match['kbStatementId']
+        exp_variant = match['variant']
         variant_keys_by_statement_ids.setdefault(rid, set()).add(exp_variant)
 
     exp_variants_by_statements: Dict[str, List[IprVariant]] = {}
@@ -491,16 +503,16 @@ def auto_analyst_comments(
         try:
             exp_variants_by_statements[rid] = [variants_by_keys[key] for key in keys]
         except KeyError as err:
-            logger.warning(f"No specific variant matched for {rid}:{keys} - {err}")
+            logger.warning(f'No specific variant matched for {rid}:{keys} - {err}')
             exp_variants_by_statements[rid] = []
 
     # get details for statements
     for match in matches:
-        rid = match["kbStatementId"].replace("#", "")
-        result = graphkb_conn.request(f"/statements/{rid}?neighbors=1")["result"]
+        rid = match['kbStatementId'].replace('#', '')
+        result = graphkb_conn.request(f'/statements/{rid}?neighbors=1')['result']
 
-        templates.setdefault(result["displayNameTemplate"], []).append(result)
-        statements[result["@rid"]] = result
+        templates.setdefault(result['displayNameTemplate'], []).append(result)
+        statements[result['@rid']] = result
 
     # aggregate similar sentences
     sentences = {}
@@ -511,7 +523,7 @@ def auto_analyst_comments(
     statements_by_genes = section_statements_by_genes(graphkb_conn, list(statements.values()))
 
     output: List[str] = [
-        "<h3>The comments below were automatically generated from matches to GraphKB and have not been manually reviewed</h3>"
+        '<h3>The comments below were automatically generated from matches to GraphKB and have not been manually reviewed</h3>'
     ]
 
     for section, statement_rids in sorted(
@@ -520,7 +532,7 @@ def auto_analyst_comments(
         exp_variants = {}
         for variant_list in [exp_variants_by_statements[r] for r in statement_rids]:
             for variant in variant_list:
-                exp_variants[variant["key"]] = variant
+                exp_variants[variant['key']] = variant
 
         output.append(
             create_section_html(
@@ -532,4 +544,4 @@ def auto_analyst_comments(
             )
         )
 
-    return "\n".join(output)
+    return '\n'.join(output)
diff --git a/pori_python/ipr/therapeutic_options.py b/pori_python/ipr/therapeutic_options.py
index 9ca8c3be..7dc38cee 100644
--- a/pori_python/ipr/therapeutic_options.py
+++ b/pori_python/ipr/therapeutic_options.py
@@ -27,57 +27,57 @@ def create_therapeutic_options(
     Generate therapeutic options summary from the list of kb-matches
     """
     options: List[Dict[str, Any]] = []
-    resistance_markers = get_terms_set(graphkb_conn, ["no sensitivity"])
+    resistance_markers = get_terms_set(graphkb_conn, ['no sensitivity'])
 
     for match in kb_matches:
-        row_type = "therapeutic"
-        if match["category"] != "therapeutic" or match["relevance"] == "eligibility":
+        row_type = 'therapeutic'
+        if match['category'] != 'therapeutic' or match['relevance'] == 'eligibility':
             continue
-        if match["kbRelevanceId"] in resistance_markers:
-            row_type = "chemoresistance"
-        variant = find_variant(variants, match["variantType"], match["variant"])
-        drug = get_preferred_drug_representation(graphkb_conn, match["kbContextId"])
+        if match['kbRelevanceId'] in resistance_markers:
+            row_type = 'chemoresistance'
+        variant = find_variant(variants, match['variantType'], match['variant'])
+        drug = get_preferred_drug_representation(graphkb_conn, match['kbContextId'])
 
         gene, variant_string = create_variant_name_tuple(variant)
 
         options.append(
             {
-                "gene": gene,
-                "type": row_type,
-                "therapy": drug["displayName"],
-                "therapyGraphkbId": drug["@rid"],
-                "context": match["relevance"],
-                "contextGraphkbId": match["kbRelevanceId"],
-                "variantGraphkbId": match["kbVariantId"],
-                "variant": variant_string,
-                "evidenceLevel": match["evidenceLevel"],
-                "kbStatementIds": match["kbStatementId"],
-                "notes": "",
+                'gene': gene,
+                'type': row_type,
+                'therapy': drug['displayName'],
+                'therapyGraphkbId': drug['@rid'],
+                'context': match['relevance'],
+                'contextGraphkbId': match['kbRelevanceId'],
+                'variantGraphkbId': match['kbVariantId'],
+                'variant': variant_string,
+                'evidenceLevel': match['evidenceLevel'],
+                'kbStatementIds': match['kbStatementId'],
+                'notes': '',
             }
         )
     if not options:
         return options
     options_df = pandas.DataFrame.from_records(options)
 
-    def delimited_list(inputs: List, delimiter: str = " / ") -> str:
+    def delimited_list(inputs: List, delimiter: str = ' / ') -> str:
         return delimiter.join(sorted(list({i for i in inputs if i})))
 
-    options_df = options_df.groupby(["gene", "type", "therapy", "variant"]).agg(
+    options_df = options_df.groupby(['gene', 'type', 'therapy', 'variant']).agg(
         {
-            "evidenceLevel": delimited_list,
-            "context": delimited_list,
-            "notes": lambda x: delimited_list(x, " "),
+            'evidenceLevel': delimited_list,
+            'context': delimited_list,
+            'notes': lambda x: delimited_list(x, ' '),
         }
     )
     options_df = options_df.reset_index()
-    options = options_df.to_dict("records")  # type: ignore
+    options = options_df.to_dict('records')  # type: ignore
     therapeutic_rank = 0
     chemoresistance_rank = 0
     for option in options:
-        if option["type"] == "therapeutic":
-            option["rank"] = therapeutic_rank
+        if option['type'] == 'therapeutic':
+            option['rank'] = therapeutic_rank
             therapeutic_rank += 1
         else:
-            option["rank"] = chemoresistance_rank
+            option['rank'] = chemoresistance_rank
             chemoresistance_rank += 1
     return options
diff --git a/pori_python/ipr/util.py b/pori_python/ipr/util.py
index d2aca074..69ac7024 100644
--- a/pori_python/ipr/util.py
+++ b/pori_python/ipr/util.py
@@ -12,12 +12,12 @@
 GENE_NEIGHBORS_MAX = 3
 
 # name the logger after the package to make it simple to disable for packages using this one as a dependency
-logger = logging.getLogger("ipr")
+logger = logging.getLogger('ipr')
 LOG_LEVELS = {
-    "info": logging.INFO,
-    "debug": logging.DEBUG,
-    "warn": logging.WARN,
-    "error": logging.ERROR,
+    'info': logging.INFO,
+    'debug': logging.DEBUG,
+    'warn': logging.WARN,
+    'error': logging.ERROR,
 }
 
 
@@ -31,17 +31,17 @@ def get_terms_set(graphkb_conn: GraphKBConnection, base_terms: List[str]) -> Set
 
 
 def hash_key(key: Tuple[str]) -> str:
-    body = json.dumps({"key": key}, sort_keys=True)
-    hash_code = hashlib.md5(body.encode("utf-8")).hexdigest()
+    body = json.dumps({'key': key}, sort_keys=True)
+    hash_code = hashlib.md5(body.encode('utf-8')).hexdigest()
     return hash_code
 
 
 def convert_to_rid_set(records: Sequence[Record]) -> Set[str]:
-    return {r["@rid"] for r in records}
+    return {r['@rid'] for r in records}
 
 
-def trim_empty_values(obj: IprVariant, empty_values: Sequence = ("", None, nan)):
-    blacklist = ("gene1", "gene2")  # allow null for sv genes
+def trim_empty_values(obj: IprVariant, empty_values: Sequence = ('', None, nan)):
+    blacklist = ('gene1', 'gene2')  # allow null for sv genes
     keys = list(obj.keys())
 
     for key in keys:
@@ -55,19 +55,19 @@ def create_variant_name_tuple(variant: IprVariant) -> Tuple[str, str]:
     Given an IPR variant row, create the variant representation to be used as the name
     of the variant
     """
-    variant_type = variant["variantType"]
-    gene = str(variant.get("gene", variant.get("gene1", "")))
-    if variant_type == "exp":
-        return (gene, str(variant.get("expressionState", "")))
-    elif variant_type == "cnv":
-        return (gene, str(variant.get("cnvState", "")))
+    variant_type = variant['variantType']
+    gene = str(variant.get('gene', variant.get('gene1', '')))
+    if variant_type == 'exp':
+        return (gene, str(variant.get('expressionState', '')))
+    elif variant_type == 'cnv':
+        return (gene, str(variant.get('cnvState', '')))
     variant_split = (
-        variant["variant"].split(":", 1)[1] if ":" in variant["variant"] else variant["variant"]
+        variant['variant'].split(':', 1)[1] if ':' in variant['variant'] else variant['variant']
     )
 
-    gene2 = str(variant.get("gene2", ""))
+    gene2 = str(variant.get('gene2', ''))
     if gene and gene2:
-        gene = f"{gene}, {gene2}"
+        gene = f'{gene}, {gene2}'
     elif gene2:
         gene = gene2
 
@@ -81,28 +81,28 @@ def find_variant(
     Find a variant in a list of variants by its key and type
     """
     for variant in all_variants:
-        if variant["key"] == variant_key and variant["variantType"] == variant_type:
+        if variant['key'] == variant_key and variant['variantType'] == variant_type:
             return variant
-    raise KeyError(f"expected variant ({variant_key}, {variant_type}) does not exist")
+    raise KeyError(f'expected variant ({variant_key}, {variant_type}) does not exist')
 
 
 def generate_ontology_preference_key(record: Ontology, sources_sort: Dict[str, int] = {}) -> Tuple:
     """Generate a tuple key for comparing preferred ontology terms."""
     return (
-        record.get("name") == record.get("sourceId"),
-        record.get("deprecated", False),
-        record.get("alias", False),
-        bool(record.get("dependency", "")),
-        sources_sort.get(str(record.get("source")), 99999),
-        record["sourceId"],
-        record.get("sourceIdVersion", ""),
-        record["name"],
+        record.get('name') == record.get('sourceId'),
+        record.get('deprecated', False),
+        record.get('alias', False),
+        bool(record.get('dependency', '')),
+        sources_sort.get(str(record.get('source')), 99999),
+        record['sourceId'],
+        record.get('sourceIdVersion', ''),
+        record['name'],
     )
 
 
 def get_alternatives(graphkb_conn: GraphKBConnection, record_id: str) -> List[Ontology]:
     rec_list = graphkb_conn.query(
-        {"target": [record_id], "queryType": "similarTo", "treeEdges": []}
+        {'target': [record_id], 'queryType': 'similarTo', 'treeEdges': []}
     )
     return [cast(Ontology, rec) for rec in rec_list]
 
@@ -115,8 +115,8 @@ def get_preferred_drug_representation(
     """
 
     source_preference = {
-        r["@rid"]: r["sort"]  # type: ignore
-        for r in graphkb_conn.query({"target": "Source", "returnProperties": ["sort", "@rid"]})
+        r['@rid']: r['sort']  # type: ignore
+        for r in graphkb_conn.query({'target': 'Source', 'returnProperties': ['sort', '@rid']})
     }
     drugs = sorted(
         get_alternatives(graphkb_conn, drug_record_id),
@@ -130,44 +130,78 @@ def get_preferred_gene_name(
 ) -> str:
     """Given some Feature record ID return the preferred gene name."""
     record = graphkb_conn.get_record_by_id(record_id)
-    biotype = record.get("biotype", "")
+    biotype = record.get('biotype', '')
     genes = []
-    expanded_gene_names = graphkb_conn.query({"target": [record_id], "neighbors": neighbors})
-    assert len(expanded_gene_names) == 1, "get_preferred_gene_name should have single result"
+    expanded_gene_names = graphkb_conn.query({'target': [record_id], 'neighbors': neighbors})
+    assert len(expanded_gene_names) == 1, 'get_preferred_gene_name should have single result'
     expanded: Dict[str, List] = expanded_gene_names[0]  # type: ignore
-    if biotype != "gene":
-        for edge in expanded.get("out_ElementOf", []):
-            target = edge["in"]
-            if target.get("biotype") == "gene":
+    if biotype != 'gene':
+        for edge in expanded.get('out_ElementOf', []):
+            target = edge['in']
+            if target.get('biotype') == 'gene':
                 genes.append(target)
 
     for edge_type in [
-        "out_AliasOf",
-        "in_AliasOf",
-        "in_DeprecatedBy",
-        "out_CrossReferenceOf",
-        "in_CrossReferenceOf",
+        'out_AliasOf',
+        'in_AliasOf',
+        'in_DeprecatedBy',
+        'out_CrossReferenceOf',
+        'in_CrossReferenceOf',
     ]:
-        target_name = "out" if edge_type.startswith("in") else "in"
+        target_name = 'out' if edge_type.startswith('in') else 'in'
         for edge in expanded.get(edge_type, []):
             target = edge[target_name]
-            if target.get("biotype") == "gene":
+            if target.get('biotype') == 'gene':
                 genes.append(target)
     genes = sorted(
         genes,
         key=lambda gene: (
-            gene["deprecated"],
-            bool(gene["dependency"]),
-            "_" in gene["name"],
-            gene["name"].startswith("ens"),
+            gene['deprecated'],
+            bool(gene['dependency']),
+            '_' in gene['name'],
+            gene['name'].startswith('ens'),
         ),
     )
     if genes:
-        return genes[0]["displayName"]
+        return genes[0]['displayName']
     # fallback to the input displayName
-    return str(record.get("displayName", ""))
+    return str(record.get('displayName', ''))
 
 
 def pandas_falsy(field: Any) -> bool:
     """Check if a field is python falsy or pandas null."""
     return bool(pd.isnull(field) or not field)
+
+
+# the below is copied from
+# https://github.com/biopython/biopython/blob/master/Bio/Data/IUPACData.py
+# to allow us to remove otherwise unnecessary biopython dependency
+
+protein_letters_1to3 = {
+    'A': 'Ala',
+    'C': 'Cys',
+    'D': 'Asp',
+    'E': 'Glu',
+    'F': 'Phe',
+    'G': 'Gly',
+    'H': 'His',
+    'I': 'Ile',
+    'K': 'Lys',
+    'L': 'Leu',
+    'M': 'Met',
+    'N': 'Asn',
+    'P': 'Pro',
+    'Q': 'Gln',
+    'R': 'Arg',
+    'S': 'Ser',
+    'T': 'Thr',
+    'V': 'Val',
+    'W': 'Trp',
+    'Y': 'Tyr',
+}
+protein_letters_1to3_extended = {
+    **protein_letters_1to3,
+    **{'B': 'Asx', 'X': 'Xaa', 'Z': 'Glx', 'J': 'Xle', 'U': 'Sec', 'O': 'Pyl'},
+}
+
+protein_letters_3to1 = {value: key for key, value in protein_letters_1to3.items()}
diff --git a/pori_python/types.py b/pori_python/types.py
index faec8f29..dd1ab7e5 100644
--- a/pori_python/types.py
+++ b/pori_python/types.py
@@ -5,8 +5,8 @@
 # TODO: Can constants in inputs.py like COPY_REQ, SMALL_MUT_REQ, just be replaced by types?
 
 CategoryBaseTermMapping = List[Tuple[str, List[str]]]
-Record = TypedDict("Record", {"@rid": str, "@class": str, "name": str})
-EmbeddedRecord = TypedDict("EmbeddedRecord", {"@class": str})
+Record = TypedDict('Record', {'@rid': str, '@class': str, 'name': str})
+EmbeddedRecord = TypedDict('EmbeddedRecord', {'@class': str})
 
 
 class DisplayedRecord(Record):
diff --git a/pyproject.toml b/pyproject.toml
index 9e5a9848..6213fb7c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1 +1,7 @@
 build-backend = "setuptools.build_meta"
+
+[tool.ruff]
+line-length = 100
+
+[tool.ruff.format]
+quote-style = "single"
diff --git a/setup.cfg b/setup.cfg
index e466a485..e508812e 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -18,7 +18,7 @@ known_standard_library = requests
 
 [metadata]
 name = pori_python
-version = 1.2.2
+version = 1.3.0
 url = https://github.com/bcgsc/pori_python
 author_email = dat@bcgsc.ca
 maintainer_email = dat@bcgsc.ca
@@ -31,7 +31,6 @@ python_requires = >=3.9
 dependency_links = []
 include_package_data = true
 install_requires =
-    biopython
     jsonschema
     pandas>=1.1.0
     requests
@@ -53,10 +52,7 @@ dev =
     mkdocs-material
     mkdocs-redirects
     markdown-refdocs
-    flake8
-    black
-    flake8-annotations
-    isort
+    ruff
     mypy
 
 [options.package_data]
diff --git a/tests/test_graphkb/data.py b/tests/test_graphkb/data.py
index d628ff20..955e3911 100644
--- a/tests/test_graphkb/data.py
+++ b/tests/test_graphkb/data.py
@@ -7,72 +7,72 @@
 #
 structuralVariants = {
     # Unambiguous structural variations
-    "(FGFR3,BRCA2):fusion(g.1234567,g.1234567)": {
-        "matches": {
-            "displayName": ["FGFR3 fusion", "FGFR3 rearrangement"],
-            "type": ["fusion", "rearrangement"],
+    '(FGFR3,BRCA2):fusion(g.1234567,g.1234567)': {
+        'matches': {
+            'displayName': ['FGFR3 fusion', 'FGFR3 rearrangement'],
+            'type': ['fusion', 'rearrangement'],
         }
     },
     # ambiguous structural variations -> structural
-    "FGFR3:c.1200_1300dup": {
-        "matches": {
-            "displayName": ["FGFR3 mutation", "FGFR3 rearrangement"],
-            "type": ["mutation", "rearrangement"],
+    'FGFR3:c.1200_1300dup': {
+        'matches': {
+            'displayName': ['FGFR3 mutation', 'FGFR3 rearrangement'],
+            'type': ['mutation', 'rearrangement'],
         }
     },
-    "FGFR3:c.1200_1201insACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT": {
-        "matches": {
-            "displayName": ["FGFR3 mutation", "FGFR3 rearrangement"],
-            "type": ["mutation", "rearrangement"],
+    'FGFR3:c.1200_1201insACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT': {
+        'matches': {
+            'displayName': ['FGFR3 mutation', 'FGFR3 rearrangement'],
+            'type': ['mutation', 'rearrangement'],
         }
     },
-    "FGFR3:g.5000_5100del": {
-        "matches": {
-            "displayName": ["FGFR3 mutation", "FGFR3 rearrangement"],
-            "type": ["mutation", "rearrangement"],
+    'FGFR3:g.5000_5100del': {
+        'matches': {
+            'displayName': ['FGFR3 mutation', 'FGFR3 rearrangement'],
+            'type': ['mutation', 'rearrangement'],
         }
     },
-    "FGFR3:c.1200_1300delinsA": {
-        "matches": {
-            "displayName": ["FGFR3 mutation", "FGFR3 rearrangement"],
-            "type": ["mutation", "rearrangement"],
+    'FGFR3:c.1200_1300delinsA': {
+        'matches': {
+            'displayName': ['FGFR3 mutation', 'FGFR3 rearrangement'],
+            'type': ['mutation', 'rearrangement'],
         }
     },
-    "FGFR3:c.1200delinsACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT": {
-        "matches": {
-            "displayName": ["FGFR3 mutation", "FGFR3 rearrangement"],
-            "type": ["mutation", "rearrangement"],
+    'FGFR3:c.1200delinsACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT': {
+        'matches': {
+            'displayName': ['FGFR3 mutation', 'FGFR3 rearrangement'],
+            'type': ['mutation', 'rearrangement'],
         }
     },
     # ambiguous structural variations -> non-structural
-    "FGFR3:c.1200dup": {
-        "matches": {"displayName": ["FGFR3 mutation"], "type": ["mutation"]},
-        "does_not_matches": {"displayName": ["FGFR3 rearrangement"], "type": ["rearrangement"]},
+    'FGFR3:c.1200dup': {
+        'matches': {'displayName': ['FGFR3 mutation'], 'type': ['mutation']},
+        'does_not_matches': {'displayName': ['FGFR3 rearrangement'], 'type': ['rearrangement']},
     },
-    "FGFR3:c.1200_1201insA": {
-        "matches": {"displayName": ["FGFR3 mutation"], "type": ["mutation"]},
-        "does_not_matches": {"displayName": ["FGFR3 rearrangement"], "type": ["rearrangement"]},
+    'FGFR3:c.1200_1201insA': {
+        'matches': {'displayName': ['FGFR3 mutation'], 'type': ['mutation']},
+        'does_not_matches': {'displayName': ['FGFR3 rearrangement'], 'type': ['rearrangement']},
     },
-    "FGFR3:g.5000del": {
-        "matches": {"displayName": ["FGFR3 mutation"], "type": ["mutation"]},
-        "does_not_matches": {"displayName": ["FGFR3 rearrangement"], "type": ["rearrangement"]},
+    'FGFR3:g.5000del': {
+        'matches': {'displayName': ['FGFR3 mutation'], 'type': ['mutation']},
+        'does_not_matches': {'displayName': ['FGFR3 rearrangement'], 'type': ['rearrangement']},
     },
-    "FGFR3:c.1200delinsA": {
-        "matches": {"displayName": ["FGFR3 mutation"], "type": ["mutation"]},
-        "does_not_matches": {"displayName": ["FGFR3 rearrangement"], "type": ["rearrangement"]},
+    'FGFR3:c.1200delinsA': {
+        'matches': {'displayName': ['FGFR3 mutation'], 'type': ['mutation']},
+        'does_not_matches': {'displayName': ['FGFR3 rearrangement'], 'type': ['rearrangement']},
     },
-    "STK11:e.1_100del": {
-        "matches": {"displayName": ["STK11 mutation"], "type": ["mutation"]},
-        "does_not_matches": {"displayName": ["STK11 deletion"], "type": ["deletion"]},
+    'STK11:e.1_100del': {
+        'matches': {'displayName': ['STK11 mutation'], 'type': ['mutation']},
+        'does_not_matches': {'displayName': ['STK11 deletion'], 'type': ['deletion']},
     },
-    "STK11:i.1_100del": {
-        "matches": {"displayName": ["STK11 mutation"], "type": ["mutation"]},
-        "does_not_matches": {"displayName": ["STK11 deletion"], "type": ["deletion"]},
+    'STK11:i.1_100del': {
+        'matches': {'displayName': ['STK11 mutation'], 'type': ['mutation']},
+        'does_not_matches': {'displayName': ['STK11 deletion'], 'type': ['deletion']},
     },
     # non-structural variations
-    "FGFR3:c.1200C>A": {
-        "matches": {"displayName": ["FGFR3 mutation"], "type": ["mutation"]},
-        "does_not_matches": {"displayName": ["FGFR3 rearrangement"], "type": ["rearrangement"]},
+    'FGFR3:c.1200C>A': {
+        'matches': {'displayName': ['FGFR3 mutation'], 'type': ['mutation']},
+        'does_not_matches': {'displayName': ['FGFR3 rearrangement'], 'type': ['rearrangement']},
     },
 }
 
@@ -81,118 +81,118 @@
 # pos 1: expected equivalences
 ensemblProteinSample = [
     (
-        "EGFR",
+        'EGFR',
         [
-            "EGFR",
-            "ERBB",
-            "ENSG00000146648",
-            "ENSG00000146648.17",
-            "ENST00000275493",
-            "ENST00000275493.6",
-            "NM_001346897",
-            "NM_001346897.2",
-            "NP_001333826",
-            "NP_001333826.1",
+            'EGFR',
+            'ERBB',
+            'ENSG00000146648',
+            'ENSG00000146648.17',
+            'ENST00000275493',
+            'ENST00000275493.6',
+            'NM_001346897',
+            'NM_001346897.2',
+            'NP_001333826',
+            'NP_001333826.1',
         ],
     ),
     (
-        "NM_001346897",
+        'NM_001346897',
         [
-            "EGFR",
-            "ERBB",
-            "ENSG00000146648",
-            "ENSG00000146648.17",
-            "NM_001346897",
-            "NM_001346897.2",
-            "NP_001333826",
-            "NP_001333826.1",
+            'EGFR',
+            'ERBB',
+            'ENSG00000146648',
+            'ENSG00000146648.17',
+            'NM_001346897',
+            'NM_001346897.2',
+            'NP_001333826',
+            'NP_001333826.1',
         ],
     ),
     (
-        "NM_001346897.2",
+        'NM_001346897.2',
         [
-            "EGFR",
-            "ERBB",
-            "ENSG00000146648",
-            "ENSG00000146648.17",
-            "NM_001346897",
-            "NM_001346897.2",
-            "NP_001333826",
-            "NP_001333826.1",
+            'EGFR',
+            'ERBB',
+            'ENSG00000146648',
+            'ENSG00000146648.17',
+            'NM_001346897',
+            'NM_001346897.2',
+            'NP_001333826',
+            'NP_001333826.1',
         ],
     ),
     (
-        "NP_001333826",
+        'NP_001333826',
         [
-            "EGFR",
-            "ERBB",
-            "ENSG00000146648",  # Warn: Versionized ENSG won't be returned due to API limitations
-            "NM_001346897",
-            "NM_001346897.2",
-            "NP_001333826",
-            "NP_001333826.1",
+            'EGFR',
+            'ERBB',
+            'ENSG00000146648',  # Warn: Versionized ENSG won't be returned due to API limitations
+            'NM_001346897',
+            'NM_001346897.2',
+            'NP_001333826',
+            'NP_001333826.1',
         ],
     ),
     (
-        "NP_001333826.1",
+        'NP_001333826.1',
         [
-            "EGFR",
-            "ERBB",
-            "ENSG00000146648",  # Warn: Versionized ENSG won't be returned due to API limitations
-            "NM_001346897",
-            "NM_001346897.2",
-            "NP_001333826",
-            "NP_001333826.1",
+            'EGFR',
+            'ERBB',
+            'ENSG00000146648',  # Warn: Versionized ENSG won't be returned due to API limitations
+            'NM_001346897',
+            'NM_001346897.2',
+            'NP_001333826',
+            'NP_001333826.1',
         ],
     ),
     (
-        "ENSG00000146648",
+        'ENSG00000146648',
         [
-            "EGFR",
-            "ERBB",
-            "ENSG00000146648",
-            "ENSG00000146648.17",
-            "ENST00000275493",
-            "ENST00000275493.6",
-            "NM_001346897",
-            "NM_001346897.2",
-            "NP_001333826",  # Warn: Versionized NP won't be returned due to API limitations
+            'EGFR',
+            'ERBB',
+            'ENSG00000146648',
+            'ENSG00000146648.17',
+            'ENST00000275493',
+            'ENST00000275493.6',
+            'NM_001346897',
+            'NM_001346897.2',
+            'NP_001333826',  # Warn: Versionized NP won't be returned due to API limitations
         ],
     ),
     (
-        "ENSG00000146648.17",
+        'ENSG00000146648.17',
         [
-            "EGFR",
-            "ERBB",
-            "ENSG00000146648",
-            "ENSG00000146648.17",
-            "ENST00000275493",
-            "ENST00000275493.6",
-            "NM_001346897",
-            "NM_001346897.2",
-            "NP_001333826",  # Warn: Versionized NP won't be returned due to API limitations
+            'EGFR',
+            'ERBB',
+            'ENSG00000146648',
+            'ENSG00000146648.17',
+            'ENST00000275493',
+            'ENST00000275493.6',
+            'NM_001346897',
+            'NM_001346897.2',
+            'NP_001333826',  # Warn: Versionized NP won't be returned due to API limitations
         ],
     ),
     (
-        "ENST00000275493",
+        'ENST00000275493',
         [
-            "EGFR",
-            "ERBB",
-            "ENSG00000146648",
-            "ENSG00000146648.17",
-            "ENST00000275493",
-            "ENST00000275493.6",
+            'EGFR',
+            'ERBB',
+            'ENSG00000146648',
+            'ENSG00000146648.17',
+            'ENST00000275493',
+            'ENST00000275493.6',
         ],
     ),
     (
-        "ENST00000275493.6",
+        'ENST00000275493.6',
         [
-            "EGFR",
-            "ERBB",
-            "ENSG00000146648",
-            "ENSG00000146648.17",
-            "ENST00000275493",
-            "ENST00000275493.6",
+            'EGFR',
+            'ERBB',
+            'ENSG00000146648',
+            'ENSG00000146648.17',
+            'ENST00000275493',
+            'ENST00000275493.6',
         ],
     ),
 ]
diff --git a/tests/test_graphkb/test_genes.py b/tests/test_graphkb/test_genes.py
index fd97ffcd..90efe5d4 100644
--- a/tests/test_graphkb/test_genes.py
+++ b/tests/test_graphkb/test_genes.py
@@ -21,100 +21,100 @@
 )
 from pori_python.graphkb.util import get_rid
 
-EXCLUDE_INTEGRATION_TESTS = os.environ.get("EXCLUDE_INTEGRATION_TESTS") == "1"
-EXCLUDE_BCGSC_TESTS = os.environ.get("EXCLUDE_BCGSC_TESTS") == "1"
-EXCLUDE_ONCOKB_TESTS = os.environ.get("EXCLUDE_ONCOKB_TESTS") == "1"
+EXCLUDE_INTEGRATION_TESTS = os.environ.get('EXCLUDE_INTEGRATION_TESTS') == '1'
+EXCLUDE_BCGSC_TESTS = os.environ.get('EXCLUDE_BCGSC_TESTS') == '1'
+EXCLUDE_ONCOKB_TESTS = os.environ.get('EXCLUDE_ONCOKB_TESTS') == '1'
 
-CANONICAL_ONCOGENES = ["kras", "nras", "alk"]
-CANONICAL_TS = ["cdkn2a", "tp53"]
-CANONICAL_CG = ["alb"]
-CANONICAL_FUSION_GENES = ["alk", "ewsr1", "fli1"]
-CANONICAL_STRUCTURAL_VARIANT_GENES = ["brca1", "dpyd", "pten"]
-CANNONICAL_THERAPY_GENES = ["erbb2", "brca2", "egfr"]
+CANONICAL_ONCOGENES = ['kras', 'nras', 'alk']
+CANONICAL_TS = ['cdkn2a', 'tp53']
+CANONICAL_CG = ['alb']
+CANONICAL_FUSION_GENES = ['alk', 'ewsr1', 'fli1']
+CANONICAL_STRUCTURAL_VARIANT_GENES = ['brca1', 'dpyd', 'pten']
+CANNONICAL_THERAPY_GENES = ['erbb2', 'brca2', 'egfr']
 
 
 PHARMACOGENOMIC_INITIAL_GENES = [
-    "ACYP2",
-    "CEP72",
+    'ACYP2',
+    'CEP72',
     # 'CYP26B1',  # defined as hgvsGenomic chr2:g.233760235_233760235nc_000002.12:g.233760235ta[7]>ta[8]
-    "DPYD",
-    "NUDT15",
-    "RARG",
-    "SLC28A3",
-    "TPMT",
-    "UGT1A6",
+    'DPYD',
+    'NUDT15',
+    'RARG',
+    'SLC28A3',
+    'TPMT',
+    'UGT1A6',
 ]
 CANCER_PREDISP_INITIAL_GENES = [
-    "AKT1",
-    "APC",
-    "ATM",
-    "AXIN2",
-    "BAP1",
-    "BLM",
-    "BMPR1A",
-    "BRCA1",
-    "BRCA2",
-    "BRIP1",
-    "CBL",
-    "CDH1",
-    "CDK4",
-    "CDKN2A",
-    "CHEK2",
-    "DICER1",
-    "EGFR",
-    "EPCAM",
-    "ETV6",
-    "EZH2",
-    "FH",
-    "FLCN",
-    "GATA2",
-    "HRAS",
-    "KIT",
-    "MEN1",
-    "MET",
-    "MLH1",
-    "MSH2",
-    "MSH6",
-    "MUTYH",
-    "NBN",
-    "NF1",
-    "PALB2",
-    "PDGFRA",
-    "PMS2",
-    "PTCH1",
-    "PTEN",
-    "PTPN11",
-    "RAD51C",
-    "RAD51D",
-    "RB1",
-    "RET",
-    "RUNX1",
-    "SDHA",
-    "SDHB",
-    "SDHC",
-    "SDHD",
-    "SMAD4",
-    "SMARCA4",
-    "STK11",
-    "TP53",
-    "TSC1",
-    "TSC2",
-    "VHL",
-    "WT1",
+    'AKT1',
+    'APC',
+    'ATM',
+    'AXIN2',
+    'BAP1',
+    'BLM',
+    'BMPR1A',
+    'BRCA1',
+    'BRCA2',
+    'BRIP1',
+    'CBL',
+    'CDH1',
+    'CDK4',
+    'CDKN2A',
+    'CHEK2',
+    'DICER1',
+    'EGFR',
+    'EPCAM',
+    'ETV6',
+    'EZH2',
+    'FH',
+    'FLCN',
+    'GATA2',
+    'HRAS',
+    'KIT',
+    'MEN1',
+    'MET',
+    'MLH1',
+    'MSH2',
+    'MSH6',
+    'MUTYH',
+    'NBN',
+    'NF1',
+    'PALB2',
+    'PDGFRA',
+    'PMS2',
+    'PTCH1',
+    'PTEN',
+    'PTPN11',
+    'RAD51C',
+    'RAD51D',
+    'RB1',
+    'RET',
+    'RUNX1',
+    'SDHA',
+    'SDHB',
+    'SDHC',
+    'SDHD',
+    'SMAD4',
+    'SMARCA4',
+    'STK11',
+    'TP53',
+    'TSC1',
+    'TSC2',
+    'VHL',
+    'WT1',
 ]
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope='module')
 def conn():
     conn = GraphKBConnection()
-    conn.login(os.environ["GRAPHKB_USER"], os.environ["GRAPHKB_PASS"])
+    conn.login(os.environ['GRAPHKB_USER'], os.environ['GRAPHKB_PASS'])
     return conn
 
 
-@pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason="excluding tests that depend on oncokb data")
+@pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason='excluding tests that depend on oncokb data')
 def test_oncogene(conn):
     result = get_oncokb_oncogenes(conn)
-    names = {row["name"] for row in result}
+    names = {row['name'] for row in result}
     for gene in CANONICAL_ONCOGENES:
         assert gene in names
     for gene in CANONICAL_TS:
@@ -123,10 +123,10 @@ def test_oncogene(conn):
         assert gene not in names
 
 
-@pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason="excluding tests that depend on oncokb data")
+@pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason='excluding tests that depend on oncokb data')
 def test_tumour_supressors(conn):
     result = get_oncokb_tumour_supressors(conn)
-    names = {row["name"] for row in result}
+    names = {row['name'] for row in result}
     for gene in CANONICAL_TS:
         assert gene in names
     for gene in CANONICAL_ONCOGENES:
@@ -135,13 +135,13 @@ def test_tumour_supressors(conn):
         assert gene not in names
 
 
-@pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason="excluding tests that depend on oncokb data")
+@pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason='excluding tests that depend on oncokb data')
 @pytest.mark.skipif(
-    EXCLUDE_BCGSC_TESTS, reason="excluding BCGSC-specific tests (tso500 not available)"
+    EXCLUDE_BCGSC_TESTS, reason='excluding BCGSC-specific tests (tso500 not available)'
 )
 def test_cancer_genes(conn):
     result = get_cancer_genes(conn)
-    names = {row["name"] for row in result}
+    names = {row['name'] for row in result}
     for gene in CANONICAL_CG:
         assert gene in names
     for gene in CANONICAL_TS:
@@ -151,104 +151,104 @@ def test_cancer_genes(conn):
 
 
 @pytest.mark.skipif(
-    EXCLUDE_BCGSC_TESTS, reason="excluding BCGSC-specific tests (requires CGL loader))"
+    EXCLUDE_BCGSC_TESTS, reason='excluding BCGSC-specific tests (requires CGL loader))'
 )
 def test_get_pharmacogenomic_info(conn):
     genes, matches = get_pharmacogenomic_info(conn)
     for gene in PHARMACOGENOMIC_INITIAL_GENES:
-        assert gene in genes, f"{gene} not found in get_pharmacogenomic_info"
+        assert gene in genes, f'{gene} not found in get_pharmacogenomic_info'
         for rid, variant_display in matches.items():
             if variant_display.startswith(gene):
                 break
         else:  # no break called
             # failing on this version of the func; addressed in 'new' version
-            if gene == "ACYP2":
+            if gene == 'ACYP2':
                 continue
-            assert False, f"No rid found for a pharmacogenomic with {gene}"
+            assert False, f'No rid found for a pharmacogenomic with {gene}'
 
 
 @pytest.mark.skipif(
-    EXCLUDE_BCGSC_TESTS, reason="excluding BCGSC-specific tests (requires CGL loader))"
+    EXCLUDE_BCGSC_TESTS, reason='excluding BCGSC-specific tests (requires CGL loader))'
 )
 def test_get_gene_linked_pharmacogenomic_info(conn):
     genes, matches = get_gene_linked_pharmacogenomic_info(conn)
     for gene in PHARMACOGENOMIC_INITIAL_GENES:
-        assert gene in genes, f"{gene} not found in get_pharmacogenomic_info"
+        assert gene in genes, f'{gene} not found in get_pharmacogenomic_info'
         for rid, variant_info in matches.items():
             variant_gene_assoc = variant_info[1]
             if gene in variant_gene_assoc:
                 break
         else:  # no break called
-            assert False, f"No rid found for a pharmacogenomic with {gene}"
+            assert False, f'No rid found for a pharmacogenomic with {gene}'
 
 
-@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests")
+@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests')
 @pytest.mark.skipif(
-    EXCLUDE_BCGSC_TESTS, reason="excluding BCGSC-specific tests (requires CGL loader))"
+    EXCLUDE_BCGSC_TESTS, reason='excluding BCGSC-specific tests (requires CGL loader))'
 )
 def test_get_cancer_predisposition_info(conn):
     genes, matches = get_cancer_predisposition_info(conn)
     for gene in CANCER_PREDISP_INITIAL_GENES:
-        assert gene in genes, f"{gene} not found in get_cancer_predisposition_info"
+        assert gene in genes, f'{gene} not found in get_cancer_predisposition_info'
 
 
-@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests")
+@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests')
 @pytest.mark.skipif(
-    EXCLUDE_BCGSC_TESTS, reason="excluding BCGSC-specific tests (requires CGL loader))"
+    EXCLUDE_BCGSC_TESTS, reason='excluding BCGSC-specific tests (requires CGL loader))'
 )
 def test_get_gene_linked_cancer_predisposition_info(conn):
     genes, matches = get_gene_linked_cancer_predisposition_info(conn)
     for gene in CANCER_PREDISP_INITIAL_GENES:
-        assert gene in genes, f"{gene} not found in get_cancer_predisposition_info"
+        assert gene in genes, f'{gene} not found in get_cancer_predisposition_info'
 
 
 @pytest.mark.parametrize(
-    "alt_rep", ("NM_033360.4", "NM_033360", "ENSG00000133703.11", "ENSG00000133703")
+    'alt_rep', ('NM_033360.4', 'NM_033360', 'ENSG00000133703.11', 'ENSG00000133703')
 )
 def test_get_preferred_gene_name_kras(alt_rep, conn):
     gene_name = get_preferred_gene_name(conn, alt_rep)
-    assert (
-        "KRAS" == gene_name
-    ), f"Expected KRAS as preferred gene name for {alt_rep}, not '{gene_name}'"
+    assert 'KRAS' == gene_name, (
+        f"Expected KRAS as preferred gene name for {alt_rep}, not '{gene_name}'"
+    )
 
 
 @pytest.mark.skipif(
-    EXCLUDE_BCGSC_TESTS, reason="excluding BCGSC-specific tests (requires CGL loader))"
+    EXCLUDE_BCGSC_TESTS, reason='excluding BCGSC-specific tests (requires CGL loader))'
 )
-@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests")
+@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests')
 def test_find_genes_by_variant_type_structural_variant(conn):
-    result = get_genes_from_variant_types(conn, ["structural variant"])
-    names = {row["name"] for row in result}
+    result = get_genes_from_variant_types(conn, ['structural variant'])
+    names = {row['name'] for row in result}
     for gene in CANONICAL_STRUCTURAL_VARIANT_GENES:
-        assert gene in names, f"{gene} was not identified as a structural variant gene."
+        assert gene in names, f'{gene} was not identified as a structural variant gene.'
 
 
-@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests")
+@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests')
 def test_find_no_genes_by_variant_type_with_nonmatching_source_record_id(conn):
-    refseq_id = get_rid(conn, target="source", name="refseq")
+    refseq_id = get_rid(conn, target='source', name='refseq')
     result = get_genes_from_variant_types(
-        conn, ["structural variant"], source_record_ids=[refseq_id]
+        conn, ['structural variant'], source_record_ids=[refseq_id]
     )
     assert not result
 
 
-@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests")
+@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests')
 def test_get_therapeutic_associated_genes(conn):
     gene_list = get_therapeutic_associated_genes(graphkb_conn=conn)
-    assert gene_list, "No get_therapeutic_associated_genes found"
-    assert (
-        len(gene_list) > 300
-    ), f"Expected over 300 get_therapeutic_associated_genes but found {len(gene_list)}"
-    names = {row["name"] for row in gene_list}
+    assert gene_list, 'No get_therapeutic_associated_genes found'
+    assert len(gene_list) > 300, (
+        f'Expected over 300 get_therapeutic_associated_genes but found {len(gene_list)}'
+    )
+    names = {row['name'] for row in gene_list}
     for gene in CANNONICAL_THERAPY_GENES + CANONICAL_ONCOGENES + CANONICAL_TS:
-        assert gene in names, f"{gene} not found by get_therapeutic_associated_genes"
+        assert gene in names, f'{gene} not found by get_therapeutic_associated_genes'
 
 
 @pytest.mark.skipif(
     EXCLUDE_BCGSC_TESTS,
-    reason="excluding BCGSC-specific tests (requires oncokb and other loaders))",
+    reason='excluding BCGSC-specific tests (requires oncokb and other loaders))',
 )
-@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests")
+@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests')
 def test_get_gene_information(conn):
     gene_info = get_gene_information(
         conn,
@@ -258,36 +258,36 @@ def test_get_gene_information(conn):
         + CANONICAL_FUSION_GENES
         + CANONICAL_STRUCTURAL_VARIANT_GENES
         + CANNONICAL_THERAPY_GENES
-        + ["notagenename"],
+        + ['notagenename'],
     )
     assert gene_info
-    nongene_flagged = [g["name"] for g in gene_info if g["name"] == "notagenename"]
-    assert not nongene_flagged, f"Improper gene category: {nongene_flagged}"
+    nongene_flagged = [g['name'] for g in gene_info if g['name'] == 'notagenename']
+    assert not nongene_flagged, f'Improper gene category: {nongene_flagged}'
 
     for gene in CANONICAL_ONCOGENES:
-        assert gene in [
-            g["name"] for g in gene_info if g.get("oncogene")
-        ], f"Missed oncogene {gene}"
+        assert gene in [g['name'] for g in gene_info if g.get('oncogene')], (
+            f'Missed oncogene {gene}'
+        )
 
     for gene in CANONICAL_TS:
-        assert gene in [
-            g["name"] for g in gene_info if g.get("tumourSuppressor")
-        ], f"Missed 'tumourSuppressor' {gene}"
+        assert gene in [g['name'] for g in gene_info if g.get('tumourSuppressor')], (
+            f"Missed 'tumourSuppressor' {gene}"
+        )
 
     for gene in CANONICAL_FUSION_GENES:
-        assert gene in [
-            g["name"] for g in gene_info if g.get("knownFusionPartner")
-        ], f"Missed knownFusionPartner {gene}"
+        assert gene in [g['name'] for g in gene_info if g.get('knownFusionPartner')], (
+            f'Missed knownFusionPartner {gene}'
+        )
 
     for gene in CANONICAL_STRUCTURAL_VARIANT_GENES:
-        assert gene in [
-            g["name"] for g in gene_info if g.get("knownSmallMutation")
-        ], f"Missed knownSmallMutation {gene}"
+        assert gene in [g['name'] for g in gene_info if g.get('knownSmallMutation')], (
+            f'Missed knownSmallMutation {gene}'
+        )
 
     for gene in CANNONICAL_THERAPY_GENES:
-        assert gene in [
-            g["name"] for g in gene_info if g.get("therapeuticAssociated")
-        ], f"Missed therapeuticAssociated {gene}"
+        assert gene in [g['name'] for g in gene_info if g.get('therapeuticAssociated')], (
+            f'Missed therapeuticAssociated {gene}'
+        )
 
     for gene in (
         CANONICAL_ONCOGENES
@@ -296,11 +296,11 @@ def test_get_gene_information(conn):
         + CANONICAL_STRUCTURAL_VARIANT_GENES
         + CANNONICAL_THERAPY_GENES
     ):
-        assert gene in [
-            g["name"] for g in gene_info if g.get("kbStatementRelated")
-        ], f"Missed kbStatementRelated {gene}"
+        assert gene in [g['name'] for g in gene_info if g.get('kbStatementRelated')], (
+            f'Missed kbStatementRelated {gene}'
+        )
 
     for gene in CANONICAL_CG:
-        assert gene in [
-            g["name"] for g in gene_info if g.get("cancerGeneListMatch")
-        ], f"Missed cancerGeneListMatch {gene}"
+        assert gene in [g['name'] for g in gene_info if g.get('cancerGeneListMatch')], (
+            f'Missed cancerGeneListMatch {gene}'
+        )
diff --git a/tests/test_graphkb/test_graphkb.py b/tests/test_graphkb/test_graphkb.py
index 5a7e48fe..88158ac0 100644
--- a/tests/test_graphkb/test_graphkb.py
+++ b/tests/test_graphkb/test_graphkb.py
@@ -7,26 +7,26 @@
 
 def test_login_ok():
     conn = GraphKBConnection()
-    conn.login(os.environ["GRAPHKB_USER"], os.environ["GRAPHKB_PASS"])
+    conn.login(os.environ['GRAPHKB_USER'], os.environ['GRAPHKB_PASS'])
     assert conn.token is not None
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope='module')
 def conn():
     conn = GraphKBConnection()
-    conn.login(os.environ["GRAPHKB_USER"], os.environ["GRAPHKB_PASS"])
+    conn.login(os.environ['GRAPHKB_USER'], os.environ['GRAPHKB_PASS'])
     return conn
 
 
 class TestPaginate:
-    @mock.patch("pori_python.graphkb.GraphKBConnection.request")
+    @mock.patch('pori_python.graphkb.GraphKBConnection.request')
     def test_does_not_paginate_when_false(self, graphkb_request, conn):
-        graphkb_request.side_effect = [{"result": [1, 2, 3]}, {"result": [4, 5]}]
+        graphkb_request.side_effect = [{'result': [1, 2, 3]}, {'result': [4, 5]}]
         result = conn.query({}, paginate=False, limit=3)
         assert result == [1, 2, 3]
 
-    @mock.patch("pori_python.graphkb.GraphKBConnection.request")
+    @mock.patch('pori_python.graphkb.GraphKBConnection.request')
     def test_paginates_by_default(self, graphkb_request, conn):
-        graphkb_request.side_effect = [{"result": [1, 2, 3]}, {"result": [4, 5]}]
+        graphkb_request.side_effect = [{'result': [1, 2, 3]}, {'result': [4, 5]}]
         result = conn.query({}, paginate=True, limit=3)
         assert result == [1, 2, 3, 4, 5]
diff --git a/tests/test_graphkb/test_match.py b/tests/test_graphkb/test_match.py
index d4ee5679..7a34b900 100644
--- a/tests/test_graphkb/test_match.py
+++ b/tests/test_graphkb/test_match.py
@@ -15,53 +15,53 @@
 # Test datasets
 from .data import ensemblProteinSample, structuralVariants
 
-EXCLUDE_BCGSC_TESTS = os.environ.get("EXCLUDE_BCGSC_TESTS") == "1"
-EXCLUDE_INTEGRATION_TESTS = os.environ.get("EXCLUDE_INTEGRATION_TESTS") == "1"
+EXCLUDE_BCGSC_TESTS = os.environ.get('EXCLUDE_BCGSC_TESTS') == '1'
+EXCLUDE_INTEGRATION_TESTS = os.environ.get('EXCLUDE_INTEGRATION_TESTS') == '1'
 
-INCREASE_PREFIXES = ["up", "increase", "over", "gain", "amp"]
-DECREASE_PREFIXES = ["down", "decrease", "reduce", "under", "loss", "delet"]
-GENERAL_MUTATION = "mutation"
+INCREASE_PREFIXES = ['up', 'increase', 'over', 'gain', 'amp']
+DECREASE_PREFIXES = ['down', 'decrease', 'reduce', 'under', 'loss', 'delet']
+GENERAL_MUTATION = 'mutation'
 
 
 def has_prefix(word: str, prefixes: List[str]) -> bool:
     for prefix in prefixes:
-        if re.search(r"\b" + prefix, word):
+        if re.search(r'\b' + prefix, word):
             return True
     return False
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope='module')
 def conn() -> GraphKBConnection:
     conn = GraphKBConnection()
-    conn.login(os.environ["GRAPHKB_USER"], os.environ["GRAPHKB_PASS"])
+    conn.login(os.environ['GRAPHKB_USER'], os.environ['GRAPHKB_PASS'])
     return conn
 
 
-@pytest.fixture(scope="class")
+@pytest.fixture(scope='class')
 def kras(conn):
-    return [f["displayName"] for f in match.get_equivalent_features(conn, "kras")]
+    return [f['displayName'] for f in match.get_equivalent_features(conn, 'kras')]
 
 
 """
 version found in the db for ENSG00000133703 will vary depending on which
 version of ensembl was loaded. checking for any . version
  """
-kras_ensg_version = r"ENSG00000133703\..*"
+kras_ensg_version = r'ENSG00000133703\..*'
 
 
 class TestGetEquivalentFeatures:
     def test_kras_has_self(self, kras):
-        assert "KRAS" in kras
+        assert 'KRAS' in kras
 
     def test_expands_aliases(self, kras):
-        assert "KRAS2" in kras
+        assert 'KRAS2' in kras
 
     def test_expands_elements(self, kras):
-        assert "NM_033360" in kras
-        assert "ENST00000311936" in kras
+        assert 'NM_033360' in kras
+        assert 'ENST00000311936' in kras
 
     def test_expands_generalizations(self, kras):
-        assert "NM_033360.4" in kras
+        assert 'NM_033360.4' in kras
         ensg_version_found = False
         for item in kras:
             if re.match(kras_ensg_version, item):
@@ -69,47 +69,47 @@ def test_expands_generalizations(self, kras):
         assert ensg_version_found
 
     def test_expands_generalizations_kras(self, kras):
-        assert "NM_033360.4" in kras
-        assert "NM_033360" in kras
+        assert 'NM_033360.4' in kras
+        assert 'NM_033360' in kras
         ensg_version_found = False
         for item in kras:
             if re.match(kras_ensg_version, item):
                 ensg_version_found = True
         assert ensg_version_found
-        assert "ENSG00000133703" in kras
+        assert 'ENSG00000133703' in kras
 
     @pytest.mark.parametrize(
-        "alt_rep", ("NM_033360.4", "NM_033360", "ENSG00000133703.11", "ENSG00000133703")
+        'alt_rep', ('NM_033360.4', 'NM_033360', 'ENSG00000133703.11', 'ENSG00000133703')
     )
     def test_expands_generalizations_refseq(self, alt_rep, conn):
-        kras = [f["displayName"] for f in match.get_equivalent_features(conn, alt_rep)]
-        assert "NM_033360.4" in kras
-        assert "NM_033360" in kras
+        kras = [f['displayName'] for f in match.get_equivalent_features(conn, alt_rep)]
+        assert 'NM_033360.4' in kras
+        assert 'NM_033360' in kras
         ensg_version_found = False
         for item in kras:
             if re.match(kras_ensg_version, item):
                 ensg_version_found = True
         assert ensg_version_found
-        assert "ENSG00000133703" in kras
+        assert 'ENSG00000133703' in kras
 
     def test_checks_by_source_id_kras(self, conn):
         kras = [
-            f["displayName"]
+            f['displayName']
             for f in match.get_equivalent_features(
-                conn, "nm_033360", source="refseq", source_id_version="4", is_source_id=True
+                conn, 'nm_033360', source='refseq', source_id_version='4', is_source_id=True
             )
         ]
-        assert "KRAS" in kras
+        assert 'KRAS' in kras
 
     # KBDEV-1163
     # Testing if the addition of Ensembl protein Features are limiting results
     # returned by get_equivalent_features() since SimilatTo queryType queries
     # aren't traversing the graph to it's whole depth.
-    @pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding data-specific test")
+    @pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding data-specific test')
     def test_ensembl_protein(self, conn):
         for feature, expected in ensemblProteinSample:
             equivalent_features = match.get_equivalent_features(conn, feature)
-            equivalent_features = [el["displayName"] for el in equivalent_features]
+            equivalent_features = [el['displayName'] for el in equivalent_features]
             for equivalent_feature in expected:
                 assert equivalent_feature in equivalent_features
 
@@ -117,46 +117,46 @@ def test_ensembl_protein(self, conn):
 class TestMatchCopyVariant:
     def test_bad_category(self, conn):
         with pytest.raises(ValueError):
-            match.match_copy_variant(conn, "kras", "not a copy number")
+            match.match_copy_variant(conn, 'kras', 'not a copy number')
 
     def test_bad_gene_name(self, conn):
         with pytest.raises(FeatureNotFoundError):
-            match.match_copy_variant(conn, "not a real gene name", match.INPUT_COPY_CATEGORIES.AMP)
+            match.match_copy_variant(conn, 'not a real gene name', match.INPUT_COPY_CATEGORIES.AMP)
 
     @pytest.mark.skipif(
         EXCLUDE_BCGSC_TESTS,
-        reason="excluding BCGSC-specific tests - no copy loss variants in other data",
+        reason='excluding BCGSC-specific tests - no copy loss variants in other data',
     )
     def test_known_loss(self, conn):
-        matches = match.match_copy_variant(conn, "CDKN2A", match.INPUT_COPY_CATEGORIES.ANY_LOSS)
+        matches = match.match_copy_variant(conn, 'CDKN2A', match.INPUT_COPY_CATEGORIES.ANY_LOSS)
         assert matches
 
-        types_selected = {record["type"]["name"] for record in matches}
-        zygositys = {record["zygosity"] for record in matches}
+        types_selected = {record['type']['name'] for record in matches}
+        zygositys = {record['zygosity'] for record in matches}
 
         assert match.INPUT_COPY_CATEGORIES.ANY_LOSS in types_selected
         assert match.INPUT_COPY_CATEGORIES.AMP not in types_selected
         assert GENERAL_MUTATION not in types_selected
 
-        assert "homozygous" in zygositys
+        assert 'homozygous' in zygositys
 
         for variant_type in types_selected:
             assert not has_prefix(variant_type, INCREASE_PREFIXES)
 
     @pytest.mark.skipif(
         EXCLUDE_BCGSC_TESTS,
-        reason="excluding BCGSC-specific tests - no copy loss variants in other data",
+        reason='excluding BCGSC-specific tests - no copy loss variants in other data',
     )
     def test_known_loss_zygosity_filtered(self, conn):
         matches = match.match_copy_variant(
-            conn, "CDKN2A", match.INPUT_COPY_CATEGORIES.ANY_LOSS, True
+            conn, 'CDKN2A', match.INPUT_COPY_CATEGORIES.ANY_LOSS, True
         )
         assert matches
 
-        types_selected = {record["type"]["name"] for record in matches}
-        zygositys = {record["zygosity"] for record in matches}
+        types_selected = {record['type']['name'] for record in matches}
+        zygositys = {record['zygosity'] for record in matches}
 
-        assert "homozygous" not in zygositys
+        assert 'homozygous' not in zygositys
 
         assert GENERAL_MUTATION not in types_selected
         assert match.INPUT_COPY_CATEGORIES.ANY_LOSS in types_selected
@@ -166,10 +166,10 @@ def test_known_loss_zygosity_filtered(self, conn):
             assert not has_prefix(variant_type, INCREASE_PREFIXES)
 
     def test_known_gain(self, conn):
-        matches = match.match_copy_variant(conn, "KRAS", "copy gain")
+        matches = match.match_copy_variant(conn, 'KRAS', 'copy gain')
         assert matches
 
-        types_selected = {record["type"]["name"] for record in matches}
+        types_selected = {record['type']['name'] for record in matches}
 
         assert GENERAL_MUTATION not in types_selected
         assert match.INPUT_COPY_CATEGORIES.AMP in types_selected
@@ -179,12 +179,12 @@ def test_known_gain(self, conn):
             assert not has_prefix(variant_type, DECREASE_PREFIXES)
 
     @pytest.mark.skipif(
-        EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests"
+        EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests'
     )
     def test_low_gain_excludes_amplification(self, conn):
-        matches = match.match_copy_variant(conn, "KRAS", match.INPUT_COPY_CATEGORIES.GAIN)
+        matches = match.match_copy_variant(conn, 'KRAS', match.INPUT_COPY_CATEGORIES.GAIN)
 
-        types_selected = {record["type"]["name"] for record in matches}
+        types_selected = {record['type']['name'] for record in matches}
 
         assert match.INPUT_COPY_CATEGORIES.AMP not in types_selected
         assert match.INPUT_COPY_CATEGORIES.LOSS not in types_selected
@@ -194,49 +194,49 @@ def test_low_gain_excludes_amplification(self, conn):
             assert not has_prefix(variant_type, DECREASE_PREFIXES)
 
 
-@pytest.mark.parametrize("pos1,pos2_start,pos2_end", [[3, 2, 5], [2, None, 5], [3, 2, None]])
+@pytest.mark.parametrize('pos1,pos2_start,pos2_end', [[3, 2, 5], [2, None, 5], [3, 2, None]])
 def test_range_overlap(pos1, pos2_start, pos2_end):
-    assert match.positions_overlap({"pos": pos1}, {"pos": pos2_start}, {"pos": pos2_end})
+    assert match.positions_overlap({'pos': pos1}, {'pos': pos2_start}, {'pos': pos2_end})
 
 
 @pytest.mark.parametrize(
-    "pos1,pos2_start,pos2_end",
+    'pos1,pos2_start,pos2_end',
     [[2, 4, 5], [5, 2, 3], [10, None, 9], [10, 11, None], [1, 2, 2], [2, 1, 1]],
 )
 def test_range_not_overlap(pos1, pos2_start, pos2_end):
-    assert not match.positions_overlap({"pos": pos1}, {"pos": pos2_start}, {"pos": pos2_end})
+    assert not match.positions_overlap({'pos': pos1}, {'pos': pos2_start}, {'pos': pos2_end})
 
 
-@pytest.mark.parametrize("pos1", [None, 1])
-@pytest.mark.parametrize("pos2", [None, 1])
+@pytest.mark.parametrize('pos1', [None, 1])
+@pytest.mark.parametrize('pos2', [None, 1])
 def test_position_match(pos1, pos2):
-    assert match.positions_overlap({"pos": pos1}, {"pos": pos2})
+    assert match.positions_overlap({'pos': pos1}, {'pos': pos2})
 
 
 class TestMatchExpressionVariant:
     def test_bad_category(self, conn):
         with pytest.raises(ValueError):
-            match.match_expression_variant(conn, "PTEN", "not a expression category")
+            match.match_expression_variant(conn, 'PTEN', 'not a expression category')
 
     def test_bad_gene_name(self, conn):
         with pytest.raises(FeatureNotFoundError):
             match.match_expression_variant(
-                conn, "not a real gene name", match.INPUT_EXPRESSION_CATEGORIES.UP
+                conn, 'not a real gene name', match.INPUT_EXPRESSION_CATEGORIES.UP
             )
 
     @pytest.mark.skipif(
-        EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests"
+        EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests'
     )
     @pytest.mark.skipif(
-        EXCLUDE_BCGSC_TESTS, reason="variants in db for this test are from IPRKB and ESMO"
+        EXCLUDE_BCGSC_TESTS, reason='variants in db for this test are from IPRKB and ESMO'
     )
     def test_known_reduced_expression(self, conn):
         matches = match.match_expression_variant(
-            conn, "PTEN", match.INPUT_EXPRESSION_CATEGORIES.DOWN
+            conn, 'PTEN', match.INPUT_EXPRESSION_CATEGORIES.DOWN
         )
         assert matches
 
-        types_selected = {record["type"]["name"] for record in matches}
+        types_selected = {record['type']['name'] for record in matches}
 
         assert match.INPUT_EXPRESSION_CATEGORIES.UP not in types_selected
         assert GENERAL_MUTATION not in types_selected
@@ -245,16 +245,16 @@ def test_known_reduced_expression(self, conn):
             assert not has_prefix(variant_type, INCREASE_PREFIXES)
 
     @pytest.mark.skipif(
-        EXCLUDE_BCGSC_TESTS, reason="excluding BCGSC-specific tests - no applicable variants"
+        EXCLUDE_BCGSC_TESTS, reason='excluding BCGSC-specific tests - no applicable variants'
     )
     def test_known_reduced_expression_gene_id(self, conn):
-        gene_id = conn.query({"target": "Feature", "filters": [{"name": "PTEN"}]})[0]["@rid"]
+        gene_id = conn.query({'target': 'Feature', 'filters': [{'name': 'PTEN'}]})[0]['@rid']
         matches = match.match_expression_variant(
             conn, gene_id, match.INPUT_EXPRESSION_CATEGORIES.DOWN
         )
         assert matches
 
-        types_selected = {record["type"]["name"] for record in matches}
+        types_selected = {record['type']['name'] for record in matches}
 
         assert match.INPUT_EXPRESSION_CATEGORIES.UP not in types_selected
         assert GENERAL_MUTATION not in types_selected
@@ -263,13 +263,13 @@ def test_known_reduced_expression_gene_id(self, conn):
             assert not has_prefix(variant_type, INCREASE_PREFIXES)
 
     @pytest.mark.skipif(
-        EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests"
+        EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests'
     )
     def test_known_increased_expression(self, conn):
-        matches = match.match_expression_variant(conn, "CA9", match.INPUT_EXPRESSION_CATEGORIES.UP)
+        matches = match.match_expression_variant(conn, 'CA9', match.INPUT_EXPRESSION_CATEGORIES.UP)
         assert matches
 
-        types_selected = {record["type"]["name"] for record in matches}
+        types_selected = {record['type']['name'] for record in matches}
 
         assert match.INPUT_EXPRESSION_CATEGORIES.UP not in types_selected
         assert GENERAL_MUTATION not in types_selected
@@ -281,115 +281,115 @@ def test_known_increased_expression(self, conn):
 class TestComparePositionalVariants:
     def test_nonspecific_altseq(self):
         assert match.compare_positional_variants(
-            conn, {"break1Start": {"pos": 1}}, {"break1Start": {"pos": 1}}
+            conn, {'break1Start': {'pos': 1}}, {'break1Start': {'pos': 1}}
         )
         # null matches anything
         assert match.compare_positional_variants(
-            conn, {"break1Start": {"pos": 1}, "untemplatedSeq": "T"}, {"break1Start": {"pos": 1}}
+            conn, {'break1Start': {'pos': 1}, 'untemplatedSeq': 'T'}, {'break1Start': {'pos': 1}}
         )
         assert match.compare_positional_variants(
-            conn, {"break1Start": {"pos": 1}}, {"break1Start": {"pos": 1}, "untemplatedSeq": "T"}
+            conn, {'break1Start': {'pos': 1}}, {'break1Start': {'pos': 1}, 'untemplatedSeq': 'T'}
         )
 
-    @pytest.mark.parametrize("seq1", ["T", "X", "?"])
-    @pytest.mark.parametrize("seq2", ["T", "X", "?"])
+    @pytest.mark.parametrize('seq1', ['T', 'X', '?'])
+    @pytest.mark.parametrize('seq2', ['T', 'X', '?'])
     def test_ambiguous_altseq(self, seq1, seq2):
         # ambiguous AA matches anything the same length
         assert match.compare_positional_variants(
             conn,
-            {"break1Start": {"pos": 1}, "untemplatedSeq": seq1},
-            {"break1Start": {"pos": 1}, "untemplatedSeq": seq2},
+            {'break1Start': {'pos': 1}, 'untemplatedSeq': seq1},
+            {'break1Start': {'pos': 1}, 'untemplatedSeq': seq2},
         )
 
     def test_altseq_length_mismatch(self):
         assert not match.compare_positional_variants(
             conn,
-            {"break1Start": {"pos": 1}, "untemplatedSeq": "??"},
-            {"break1Start": {"pos": 1}, "untemplatedSeq": "T"},
+            {'break1Start': {'pos': 1}, 'untemplatedSeq': '??'},
+            {'break1Start': {'pos': 1}, 'untemplatedSeq': 'T'},
         )
         assert not match.compare_positional_variants(
             conn,
-            {"break1Start": {"pos": 1}, "untemplatedSeq": "?"},
-            {"break1Start": {"pos": 1}, "untemplatedSeq": "TT"},
+            {'break1Start': {'pos': 1}, 'untemplatedSeq': '?'},
+            {'break1Start': {'pos': 1}, 'untemplatedSeq': 'TT'},
         )
 
     def test_nonspecific_refseq(self):
         # null matches anything
         assert match.compare_positional_variants(
-            conn, {"break1Start": {"pos": 1}, "refSeq": "T"}, {"break1Start": {"pos": 1}}
+            conn, {'break1Start': {'pos': 1}, 'refSeq': 'T'}, {'break1Start': {'pos': 1}}
         )
         assert match.compare_positional_variants(
-            conn, {"break1Start": {"pos": 1}}, {"break1Start": {"pos": 1}, "refSeq": "T"}
+            conn, {'break1Start': {'pos': 1}}, {'break1Start': {'pos': 1}, 'refSeq': 'T'}
         )
 
-    @pytest.mark.parametrize("seq1", ["T", "X", "?"])
-    @pytest.mark.parametrize("seq2", ["T", "X", "?"])
+    @pytest.mark.parametrize('seq1', ['T', 'X', '?'])
+    @pytest.mark.parametrize('seq2', ['T', 'X', '?'])
     def test_ambiguous_refseq(self, seq1, seq2):
         # ambiguous AA matches anything the same length
         assert match.compare_positional_variants(
             conn,
-            {"break1Start": {"pos": 1}, "refSeq": seq1},
-            {"break1Start": {"pos": 1}, "refSeq": seq2},
+            {'break1Start': {'pos': 1}, 'refSeq': seq1},
+            {'break1Start': {'pos': 1}, 'refSeq': seq2},
         )
 
     def test_refseq_length_mismatch(self):
         assert not match.compare_positional_variants(
             conn,
-            {"break1Start": {"pos": 1}, "refSeq": "??"},
-            {"break1Start": {"pos": 1}, "refSeq": "T"},
+            {'break1Start': {'pos': 1}, 'refSeq': '??'},
+            {'break1Start': {'pos': 1}, 'refSeq': 'T'},
         )
         assert not match.compare_positional_variants(
             conn,
-            {"break1Start": {"pos": 1}, "refSeq": "?"},
-            {"break1Start": {"pos": 1}, "refSeq": "TT"},
+            {'break1Start': {'pos': 1}, 'refSeq': '?'},
+            {'break1Start': {'pos': 1}, 'refSeq': 'TT'},
         )
 
     def test_diff_altseq(self):
         assert not match.compare_positional_variants(
             conn,
-            {"break1Start": {"pos": 1}, "untemplatedSeq": "M"},
-            {"break1Start": {"pos": 1}, "untemplatedSeq": "R"},
+            {'break1Start': {'pos': 1}, 'untemplatedSeq': 'M'},
+            {'break1Start': {'pos': 1}, 'untemplatedSeq': 'R'},
         )
 
     def test_same_altseq_matches(self):
         assert match.compare_positional_variants(
             conn,
-            {"break1Start": {"pos": 1}, "untemplatedSeq": "R"},
-            {"break1Start": {"pos": 1}, "untemplatedSeq": "R"},
+            {'break1Start': {'pos': 1}, 'untemplatedSeq': 'R'},
+            {'break1Start': {'pos': 1}, 'untemplatedSeq': 'R'},
         )
 
     def test_diff_refseq(self):
         assert not match.compare_positional_variants(
             conn,
-            {"break1Start": {"pos": 1}, "refSeq": "M"},
-            {"break1Start": {"pos": 1}, "refSeq": "R"},
+            {'break1Start': {'pos': 1}, 'refSeq': 'M'},
+            {'break1Start': {'pos': 1}, 'refSeq': 'R'},
         )
 
     def test_same_refseq_matches(self):
         assert match.compare_positional_variants(
             conn,
-            {"break1Start": {"pos": 1}, "refSeq": "R"},
-            {"break1Start": {"pos": 1}, "refSeq": "R"},
+            {'break1Start': {'pos': 1}, 'refSeq': 'R'},
+            {'break1Start': {'pos': 1}, 'refSeq': 'R'},
         )
 
     def test_range_vs_sub(self):
         sub = {
-            "break1Repr": "p.G776",
-            "break1Start": {"@Class": "ProteinPosition", "pos": 776, "refAA": "G"},
-            "break2Repr": "p.V777",
-            "break2Start": {"@Class": "ProteinPosition", "pos": 777, "refAA": "V"},
-            "reference1": "ERBB2",
-            "type": "insertion",
-            "untemplatedSeq": "YVMA",
-            "untemplatedSeqSize": 4,
+            'break1Repr': 'p.G776',
+            'break1Start': {'@Class': 'ProteinPosition', 'pos': 776, 'refAA': 'G'},
+            'break2Repr': 'p.V777',
+            'break2Start': {'@Class': 'ProteinPosition', 'pos': 777, 'refAA': 'V'},
+            'reference1': 'ERBB2',
+            'type': 'insertion',
+            'untemplatedSeq': 'YVMA',
+            'untemplatedSeqSize': 4,
         }
         range_variant = {
-            "break1Repr": "p.G776",
-            "break1Start": {"@Class": "ProteinPosition", "pos": 776, "refAA": "G"},
-            "break2Repr": "p.?776",
-            "break2Start": None,
-            "refSeq": "G",
-            "untemplatedSeq": "VV",
+            'break1Repr': 'p.G776',
+            'break1Start': {'@Class': 'ProteinPosition', 'pos': 776, 'refAA': 'G'},
+            'break2Repr': 'p.?776',
+            'break2Start': None,
+            'refSeq': 'G',
+            'untemplatedSeq': 'VV',
         }
         assert not match.compare_positional_variants(conn, sub, range_variant)
         assert not match.compare_positional_variants(conn, range_variant, sub)
@@ -398,64 +398,64 @@ def test_range_vs_sub(self):
 class TestMatchPositionalVariant:
     def test_error_on_duplicate_reference1(self, conn):
         with pytest.raises(ValueError):
-            match.match_positional_variant(conn, "KRAS:p.G12D", "#123:34")
+            match.match_positional_variant(conn, 'KRAS:p.G12D', '#123:34')
 
     def test_error_on_bad_reference2(self, conn):
         with pytest.raises(ValueError):
-            match.match_positional_variant(conn, "KRAS:p.G12D", reference2="#123:34")
+            match.match_positional_variant(conn, 'KRAS:p.G12D', reference2='#123:34')
 
     def test_error_on_duplicate_reference2(self, conn):
         with pytest.raises(ValueError):
             match.match_positional_variant(
-                conn, "(BCR,ABL1):fusion(e.13,e.3)", reference2="#123:34"
+                conn, '(BCR,ABL1):fusion(e.13,e.3)', reference2='#123:34'
             )
 
     def test_uncertain_position_not_supported(self, conn):
         with pytest.raises(NotImplementedError):
-            match.match_positional_variant(conn, "(BCR,ABL1):fusion(e.13_24,e.3)")
+            match.match_positional_variant(conn, '(BCR,ABL1):fusion(e.13_24,e.3)')
 
     def test_bad_gene_name(self, conn):
         with pytest.raises(FeatureNotFoundError):
-            match.match_positional_variant(conn, "ME-AS-A-GENE:p.G12D")
+            match.match_positional_variant(conn, 'ME-AS-A-GENE:p.G12D')
 
     def test_bad_gene2_name(self, conn):
         with pytest.raises(FeatureNotFoundError):
-            match.match_positional_variant(conn, "(BCR,ME-AS-A-GENE):fusion(e.13,e.3)")
+            match.match_positional_variant(conn, '(BCR,ME-AS-A-GENE):fusion(e.13,e.3)')
 
     def test_match_explicit_reference1(self, conn):
-        reference1 = conn.query({"target": "Feature", "filters": {"name": "KRAS"}})[0]["@rid"]
-        matches = match.match_positional_variant(conn, "p.G12D", reference1=reference1)
+        reference1 = conn.query({'target': 'Feature', 'filters': {'name': 'KRAS'}})[0]['@rid']
+        matches = match.match_positional_variant(conn, 'p.G12D', reference1=reference1)
         assert matches
 
     @pytest.mark.skipif(
-        EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests"
+        EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests'
     )
     def test_match_explicit_references(self, conn):
-        reference1 = conn.query({"target": "Feature", "filters": {"name": "BCR"}})[0]["@rid"]
-        reference2 = conn.query({"target": "Feature", "filters": {"name": "ABL1"}})[0]["@rid"]
+        reference1 = conn.query({'target': 'Feature', 'filters': {'name': 'BCR'}})[0]['@rid']
+        reference2 = conn.query({'target': 'Feature', 'filters': {'name': 'ABL1'}})[0]['@rid']
         matches = match.match_positional_variant(
-            conn, "fusion(e.13,e.3)", reference1=reference1, reference2=reference2
+            conn, 'fusion(e.13,e.3)', reference1=reference1, reference2=reference2
         )
         assert matches
 
     @pytest.mark.skipif(
-        EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests"
+        EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests'
     )
     @pytest.mark.parametrize(
-        "known_variant,related_variants,unrelated_variants",
+        'known_variant,related_variants,unrelated_variants',
         [
-            ["KRAS:p.G12D", ["KRAS:p.G12X", "chr12:g.25398284C>T"], ["KRAS:p.G12V"]],
-            ["KRAS:p.G13D", ["KRAS:p.?13mut"], []],
-            ["chr12:g.25398284C>T", ["KRAS:p.G12D"], ["KRAS:p.G12V"]],
-            ["EGFR:p.E746_S752delinsI", ["EGFR mutation"], ["EGFR copy variant"]],
+            ['KRAS:p.G12D', ['KRAS:p.G12X', 'chr12:g.25398284C>T'], ['KRAS:p.G12V']],
+            ['KRAS:p.G13D', ['KRAS:p.?13mut'], []],
+            ['chr12:g.25398284C>T', ['KRAS:p.G12D'], ['KRAS:p.G12V']],
+            ['EGFR:p.E746_S752delinsI', ['EGFR mutation'], ['EGFR copy variant']],
         ],
     )
     @pytest.mark.skipif(
-        EXCLUDE_BCGSC_TESTS, reason="TODO: fix loader for vars ending in X, p.?, copy variant"
+        EXCLUDE_BCGSC_TESTS, reason='TODO: fix loader for vars ending in X, p.?, copy variant'
     )
     def test_known_variants(self, conn, known_variant, related_variants, unrelated_variants):
         matches = match.match_positional_variant(conn, known_variant)
-        names = {m["displayName"] for m in matches}
+        names = {m['displayName'] for m in matches}
         assert matches
         assert known_variant in names
         for variant in related_variants:
@@ -464,103 +464,103 @@ def test_known_variants(self, conn, known_variant, related_variants, unrelated_v
             assert variant not in names
 
     @pytest.mark.skipif(
-        EXCLUDE_BCGSC_TESTS, reason="TODO: add nonIPRKB fusion tests; source for these is IPRKB"
+        EXCLUDE_BCGSC_TESTS, reason='TODO: add nonIPRKB fusion tests; source for these is IPRKB'
     )
     @pytest.mark.parametrize(
-        "known_variant,related_variants",
+        'known_variant,related_variants',
         [
-            ["(BCR,ABL1):fusion(e.13,e.3)", ["BCR and ABL1 fusion"]],
-            ["(ATP1B1,NRG1):fusion(e.2,e.2)", ["NRG1 fusion", "ATP1B1 and NRG1 fusion"]],
+            ['(BCR,ABL1):fusion(e.13,e.3)', ['BCR and ABL1 fusion']],
+            ['(ATP1B1,NRG1):fusion(e.2,e.2)', ['NRG1 fusion', 'ATP1B1 and NRG1 fusion']],
         ],
     )
     def test_known_fusions(self, conn, known_variant, related_variants):
         matches = match.match_positional_variant(conn, known_variant)
-        types_selected = [m["type"]["name"] for m in matches]
+        types_selected = [m['type']['name'] for m in matches]
         assert GENERAL_MUTATION not in types_selected
-        names = {m["displayName"] for m in matches}
+        names = {m['displayName'] for m in matches}
         assert matches
         assert known_variant in names
         for variant in related_variants:
             assert variant in names
 
     def test_known_fusion_single_gene_no_match(self, conn):
-        known = "(TERT,?):fusion(e.1,e.?)"
+        known = '(TERT,?):fusion(e.1,e.?)'
         matches = match.match_positional_variant(conn, known)
         assert not matches
 
     def test_novel_specific_matches_general(self, conn):
-        novel_specific = "CDKN2A:p.T18888888888888888888M"
+        novel_specific = 'CDKN2A:p.T18888888888888888888M'
         matches = match.match_positional_variant(conn, novel_specific)
-        names = {m["displayName"] for m in matches}
+        names = {m['displayName'] for m in matches}
         assert matches
         assert novel_specific not in names
-        assert "CDKN2A mutation" in names
+        assert 'CDKN2A mutation' in names
 
     @pytest.mark.skipif(
-        EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests"
+        EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests'
     )
     def test_genomic_coordinates(self, conn):
-        genomic = "X:g.100611165A>T"
+        genomic = 'X:g.100611165A>T'
         x = match.match_positional_variant(conn, genomic)
         assert x != []
         # no assert b/c checking for no error rather than the result (but also want to confirm some result returned)
 
     @pytest.mark.skipif(
-        EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests"
+        EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests'
     )
-    @pytest.mark.skipif(EXCLUDE_BCGSC_TESTS, reason="source for this variant is IPRKB")
+    @pytest.mark.skipif(EXCLUDE_BCGSC_TESTS, reason='source for this variant is IPRKB')
     def test_tert_promoter(self, conn):
-        assert match.match_positional_variant(conn, "TERT:c.-124C>T")
+        assert match.match_positional_variant(conn, 'TERT:c.-124C>T')
 
     def test_wildtype_match_error(self, conn):
-        for gkb_match in match.match_positional_variant(conn, "TP53:p.E285K"):
-            assert (
-                "wildtype" not in gkb_match["displayName"]
-            ), f"TP53:p.E285K should not match {gkb_match['displayName']}"
+        for gkb_match in match.match_positional_variant(conn, 'TP53:p.E285K'):
+            assert 'wildtype' not in gkb_match['displayName'], (
+                f'TP53:p.E285K should not match {gkb_match["displayName"]}'
+            )
 
     @pytest.mark.skipif(
-        True, reason="GERO-303 - technically incorrect notation for GSC backwards compatibility."
+        True, reason='GERO-303 - technically incorrect notation for GSC backwards compatibility.'
     )
     def test_tert_promoter_leading_one_alt_notation(self, conn):
         # GERO-303 - technically this format is incorrect.
-        assert match.match_positional_variant(conn, "TERT:c.1-124C>T")
+        assert match.match_positional_variant(conn, 'TERT:c.1-124C>T')
 
     def test_missense_is_not_nonsense(self, conn):
         """GERO-299 - nonsense mutation creates a stop codon and is usually more severe."""
         # equivalent TP53 notations
-        genomic = "chr17:g.7674252C>T"
-        cds = "ENST00000269305:c.711G>A"
-        protein = "TP53:p.M237I"
+        genomic = 'chr17:g.7674252C>T'
+        cds = 'ENST00000269305:c.711G>A'
+        protein = 'TP53:p.M237I'
         for mut in (protein, genomic, cds):
             matches = match.match_positional_variant(conn, mut)
-            nonsense = [m for m in matches if "nonsense" in m["displayName"]]
-            assert (
-                not nonsense
-            ), f"Missense {mut} is not a nonsense variant: {((m['displayName'], m['@rid']) for m in nonsense)}"
+            nonsense = [m for m in matches if 'nonsense' in m['displayName']]
+            assert not nonsense, (
+                f'Missense {mut} is not a nonsense variant: {((m["displayName"], m["@rid"]) for m in nonsense)}'
+            )
 
-    @pytest.mark.skipif(EXCLUDE_BCGSC_TESTS, reason="TODO: missing record for FGFR3 rearrangement")
+    @pytest.mark.skipif(EXCLUDE_BCGSC_TESTS, reason='TODO: missing record for FGFR3 rearrangement')
     def test_structural_variants(self, conn):
         """KBDEV-1056"""
         for variant_string, expected in structuralVariants.items():
             print(variant_string)
             # Querying matches for variant_string
             m = match.match_positional_variant(conn, variant_string)
-            MatchingDisplayNames = [el["displayName"] for el in m]
-            MatchingTypes = [el["type"]["name"] for el in m]
+            MatchingDisplayNames = [el['displayName'] for el in m]
+            MatchingTypes = [el['type']['name'] for el in m]
 
             # Match
-            for displayName in expected.get("matches", {}).get("displayName", []):
+            for displayName in expected.get('matches', {}).get('displayName', []):
                 assert displayName in MatchingDisplayNames
-            for type in expected.get("matches", {}).get("type", []):
+            for type in expected.get('matches', {}).get('type', []):
                 assert type in MatchingTypes
 
             # Does not match
             for displayName in MatchingDisplayNames:
-                assert displayName not in expected.get("does_not_matches", {}).get(
-                    "displayName", []
+                assert displayName not in expected.get('does_not_matches', {}).get(
+                    'displayName', []
                 )
             for type in MatchingTypes:
-                assert type not in expected.get("does_not_matches", {}).get("type", [])
+                assert type not in expected.get('does_not_matches', {}).get('type', [])
 
 
 class TestCacheMissingFeatures:
@@ -568,14 +568,14 @@ def test_filling_cache(self):
         mock_conn = MagicMock(
             query=MagicMock(
                 return_value=[
-                    {"name": "bob", "sourceId": "alice"},
-                    {"name": "KRAS", "sourceId": "1234"},
+                    {'name': 'bob', 'sourceId': 'alice'},
+                    {'name': 'KRAS', 'sourceId': '1234'},
                 ]
             )
         )
         match.cache_missing_features(mock_conn)
-        assert "kras" in match.FEATURES_CACHE
-        assert "alice" in match.FEATURES_CACHE
+        assert 'kras' in match.FEATURES_CACHE
+        assert 'alice' in match.FEATURES_CACHE
         match.FEATURES_CACHE = None
 
 
@@ -583,9 +583,9 @@ class TestTypeScreening:
     # Types as class variables
     default_type = DEFAULT_NON_STRUCTURAL_VARIANT_TYPE
     threshold = STRUCTURAL_VARIANT_SIZE_THRESHOLD
-    unambiguous_structural = ["fusion", "translocation"]
-    ambiguous_structural = ["duplication", "deletion", "insertion", "indel"]
-    non_structural = ["substitution", "missense", "nonsense", "frameshift", "truncating"]
+    unambiguous_structural = ['fusion', 'translocation']
+    ambiguous_structural = ['duplication', 'deletion', 'insertion', 'indel']
+    non_structural = ['substitution', 'missense', 'nonsense', 'frameshift', 'truncating']
 
     def test_type_screening_update(self, conn, monkeypatch):
         # Monkey-patching get_terms_set()
@@ -594,44 +594,44 @@ def mock_get_terms_set(graphkb_conn, base_terms):
             called = True
             return set()
 
-        monkeypatch.setattr("pori_python.graphkb.match.get_terms_set", mock_get_terms_set)
+        monkeypatch.setattr('pori_python.graphkb.match.get_terms_set', mock_get_terms_set)
 
         # Assert get_terms_set() has been called
         called = False
-        pori_python.graphkb.match.type_screening(conn, {"type": ""}, updateStructuralTypes=True)
+        pori_python.graphkb.match.type_screening(conn, {'type': ''}, updateStructuralTypes=True)
         assert called
 
         # Assert get_terms_set() has not been called (default behavior)
         called = False
-        pori_python.graphkb.match.type_screening(conn, {"type": ""})
+        pori_python.graphkb.match.type_screening(conn, {'type': ''})
         assert not called
 
     def test_type_screening_non_structural(self, conn):
         for type in TestTypeScreening.non_structural:
             # type substitution and alike
-            assert match.type_screening(conn, {"type": type}) == type
+            assert match.type_screening(conn, {'type': type}) == type
 
     def test_type_screening_structural(self, conn):
         for type in TestTypeScreening.unambiguous_structural:
             # type fusion and alike
-            assert match.type_screening(conn, {"type": type}) == type
+            assert match.type_screening(conn, {'type': type}) == type
         for type in TestTypeScreening.ambiguous_structural:
             # w/ reference2
-            assert match.type_screening(conn, {"type": type, "reference2": "#123:45"}) == type
+            assert match.type_screening(conn, {'type': type, 'reference2': '#123:45'}) == type
             # w/ cytoband coordinates
-            assert match.type_screening(conn, {"type": type, "prefix": "y"}) == type
+            assert match.type_screening(conn, {'type': type, 'prefix': 'y'}) == type
 
     def test_type_screening_structural_ambiguous_size(self, conn):
         for type in TestTypeScreening.ambiguous_structural:
             # coordinate system with ambiguous size
-            for prefix in ["e", "i"]:
+            for prefix in ['e', 'i']:
                 assert (
                     match.type_screening(
                         conn,
                         {
-                            "type": type,
-                            "break2Start": {"pos": TestTypeScreening.threshold},
-                            "prefix": prefix,
+                            'type': type,
+                            'break2Start': {'pos': TestTypeScreening.threshold},
+                            'prefix': prefix,
                         },
                     )
                     == TestTypeScreening.default_type
@@ -642,14 +642,14 @@ def test_type_screening_structural_untemplatedSeqSize(self, conn):
             # Variation length too small (< threshold)
             assert (
                 match.type_screening(
-                    conn, {"type": type, "untemplatedSeqSize": TestTypeScreening.threshold - 1}
+                    conn, {'type': type, 'untemplatedSeqSize': TestTypeScreening.threshold - 1}
                 )
                 == TestTypeScreening.default_type
             )
             # Variation length big enough (>= threshold)
             assert (
                 match.type_screening(
-                    conn, {"type": type, "untemplatedSeqSize": TestTypeScreening.threshold}
+                    conn, {'type': type, 'untemplatedSeqSize': TestTypeScreening.threshold}
                 )
                 == type
             )
@@ -658,32 +658,32 @@ def test_type_screening_structural_positions(self, conn):
         for type in TestTypeScreening.ambiguous_structural:
             # Variation length too small (< threshold)
             for opt in [
-                {"break2Start": {"pos": TestTypeScreening.threshold - 1}},
-                {"break2Start": {"pos": TestTypeScreening.threshold - 1}, "prefix": "c"},
-                {"break2Start": {"pos": TestTypeScreening.threshold - 1}, "prefix": "g"},
-                {"break2Start": {"pos": TestTypeScreening.threshold - 1}, "prefix": "n"},
-                {"break2Start": {"pos": TestTypeScreening.threshold - 1}, "prefix": "r"},
-                {"break2Start": {"pos": int(TestTypeScreening.threshold / 3) - 1}, "prefix": "p"},
+                {'break2Start': {'pos': TestTypeScreening.threshold - 1}},
+                {'break2Start': {'pos': TestTypeScreening.threshold - 1}, 'prefix': 'c'},
+                {'break2Start': {'pos': TestTypeScreening.threshold - 1}, 'prefix': 'g'},
+                {'break2Start': {'pos': TestTypeScreening.threshold - 1}, 'prefix': 'n'},
+                {'break2Start': {'pos': TestTypeScreening.threshold - 1}, 'prefix': 'r'},
+                {'break2Start': {'pos': int(TestTypeScreening.threshold / 3) - 1}, 'prefix': 'p'},
                 {
-                    "break1Start": {"pos": 1 + 99},
-                    "break2Start": {"pos": TestTypeScreening.threshold + 99 - 1},
+                    'break1Start': {'pos': 1 + 99},
+                    'break2Start': {'pos': TestTypeScreening.threshold + 99 - 1},
                 },
             ]:
                 assert (
-                    match.type_screening(conn, {"type": type, **opt})
+                    match.type_screening(conn, {'type': type, **opt})
                     == TestTypeScreening.default_type
                 )
             # Variation length big enough (>= threshold)
             for opt in [
-                {"break2Start": {"pos": TestTypeScreening.threshold}},
-                {"break2Start": {"pos": TestTypeScreening.threshold}, "prefix": "c"},
-                {"break2Start": {"pos": TestTypeScreening.threshold}, "prefix": "g"},
-                {"break2Start": {"pos": TestTypeScreening.threshold}, "prefix": "n"},
-                {"break2Start": {"pos": TestTypeScreening.threshold}, "prefix": "r"},
-                {"break2Start": {"pos": int(TestTypeScreening.threshold / 3) + 1}, "prefix": "p"},
+                {'break2Start': {'pos': TestTypeScreening.threshold}},
+                {'break2Start': {'pos': TestTypeScreening.threshold}, 'prefix': 'c'},
+                {'break2Start': {'pos': TestTypeScreening.threshold}, 'prefix': 'g'},
+                {'break2Start': {'pos': TestTypeScreening.threshold}, 'prefix': 'n'},
+                {'break2Start': {'pos': TestTypeScreening.threshold}, 'prefix': 'r'},
+                {'break2Start': {'pos': int(TestTypeScreening.threshold / 3) + 1}, 'prefix': 'p'},
                 {
-                    "break1Start": {"pos": 1 + 99},
-                    "break2Start": {"pos": TestTypeScreening.threshold + 99},
+                    'break1Start': {'pos': 1 + 99},
+                    'break2Start': {'pos': TestTypeScreening.threshold + 99},
                 },
             ]:
-                assert match.type_screening(conn, {"type": type, **opt}) == type
+                assert match.type_screening(conn, {'type': type, **opt}) == type
diff --git a/tests/test_graphkb/test_statement.py b/tests/test_graphkb/test_statement.py
index fcf0bef5..89faafdd 100644
--- a/tests/test_graphkb/test_statement.py
+++ b/tests/test_graphkb/test_statement.py
@@ -4,36 +4,36 @@
 
 from pori_python.graphkb import GraphKBConnection, statement
 
-EXCLUDE_INTEGRATION_TESTS = os.environ.get("EXCLUDE_INTEGRATION_TESTS") == "1"
-EXCLUDE_BCGSC_TESTS = os.environ.get("EXCLUDE_BCGSC_TESTS") == "1"
+EXCLUDE_INTEGRATION_TESTS = os.environ.get('EXCLUDE_INTEGRATION_TESTS') == '1'
+EXCLUDE_BCGSC_TESTS = os.environ.get('EXCLUDE_BCGSC_TESTS') == '1'
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope='module')
 def conn() -> GraphKBConnection:
     conn = GraphKBConnection()
-    conn.login(os.environ["GRAPHKB_USER"], os.environ["GRAPHKB_PASS"])
+    conn.login(os.environ['GRAPHKB_USER'], os.environ['GRAPHKB_PASS'])
     return conn
 
 
 @pytest.fixture()
 def graphkb_conn():
     def make_rid_list(*values):
-        return [{"@rid": v} for v in values]
+        return [{'@rid': v} for v in values]
 
     def term_tree_calls(*final_values):
         # this function makes 2 calls to conn.query here
-        sets = [["fake"], final_values]
+        sets = [['fake'], final_values]
         return [make_rid_list(*s) for s in sets]
 
     return_values = [
-        *term_tree_calls("1"),  # therapeutic
-        *term_tree_calls("2"),  # therapeutic (2nd base term)
-        *term_tree_calls("3"),  # diagnostic
-        *term_tree_calls("4"),  # prognostic
-        *term_tree_calls("5"),  # pharmacogenomic ['metabolism']
-        *term_tree_calls("6"),  # pharmacogenomic ['toxicity']
-        *term_tree_calls("7"),  # pharmacogenomic ['dosage']
-        *term_tree_calls("8"),  # cancer predisposition
+        *term_tree_calls('1'),  # therapeutic
+        *term_tree_calls('2'),  # therapeutic (2nd base term)
+        *term_tree_calls('3'),  # diagnostic
+        *term_tree_calls('4'),  # prognostic
+        *term_tree_calls('5'),  # pharmacogenomic ['metabolism']
+        *term_tree_calls('6'),  # pharmacogenomic ['toxicity']
+        *term_tree_calls('7'),  # pharmacogenomic ['dosage']
+        *term_tree_calls('8'),  # cancer predisposition
         *term_tree_calls(),  # biological
         *term_tree_calls(),  # biological (2nd base term)
         *term_tree_calls(),  # biological (3rd base term)
@@ -46,55 +46,55 @@ def term_tree_calls(*final_values):
 
 class TestCategorizeRelevance:
     def test_default_categories(self, graphkb_conn):
-        category = statement.categorize_relevance(graphkb_conn, "1")
-        assert category == "therapeutic"
+        category = statement.categorize_relevance(graphkb_conn, '1')
+        assert category == 'therapeutic'
 
     def test_first_match_returns(self, graphkb_conn):
-        category = statement.categorize_relevance(graphkb_conn, "2")
-        assert category == "therapeutic"
+        category = statement.categorize_relevance(graphkb_conn, '2')
+        assert category == 'therapeutic'
 
     def test_second_category(self, graphkb_conn):
-        category = statement.categorize_relevance(graphkb_conn, "3")
-        assert category == "diagnostic"
+        category = statement.categorize_relevance(graphkb_conn, '3')
+        assert category == 'diagnostic'
 
     def test_third_category(self, graphkb_conn):
-        category = statement.categorize_relevance(graphkb_conn, "4")
-        assert category == "prognostic"
+        category = statement.categorize_relevance(graphkb_conn, '4')
+        assert category == 'prognostic'
 
     def test_fourth_category(self, graphkb_conn):
-        category = statement.categorize_relevance(graphkb_conn, "5")
-        assert category == "pharmacogenomic"
+        category = statement.categorize_relevance(graphkb_conn, '5')
+        assert category == 'pharmacogenomic'
 
     def test_fifth_category(self, graphkb_conn):
-        category = statement.categorize_relevance(graphkb_conn, "6")
-        assert category == "pharmacogenomic"
+        category = statement.categorize_relevance(graphkb_conn, '6')
+        assert category == 'pharmacogenomic'
 
     def test_predisposition_category(self, graphkb_conn):
-        category = statement.categorize_relevance(graphkb_conn, "8")
-        assert category == "cancer predisposition"
+        category = statement.categorize_relevance(graphkb_conn, '8')
+        assert category == 'cancer predisposition'
 
     def test_no_match(self, graphkb_conn):
-        category = statement.categorize_relevance(graphkb_conn, "x")
-        assert category == ""
+        category = statement.categorize_relevance(graphkb_conn, 'x')
+        assert category == ''
 
     def test_custom_categories(self, graphkb_conn):
         category = statement.categorize_relevance(
-            graphkb_conn, "x", [("blargh", ["some", "blargh"])]
+            graphkb_conn, 'x', [('blargh', ['some', 'blargh'])]
         )
-        assert category == ""
+        assert category == ''
 
         category = statement.categorize_relevance(
-            graphkb_conn, "1", [("blargh", ["some", "blargh"])]
+            graphkb_conn, '1', [('blargh', ['some', 'blargh'])]
         )
-        assert category == "blargh"
+        assert category == 'blargh'
 
 
 @pytest.mark.skipif(
-    EXCLUDE_BCGSC_TESTS, reason="db-specific rid; requires Inferred Functional Annotation source"
+    EXCLUDE_BCGSC_TESTS, reason='db-specific rid; requires Inferred Functional Annotation source'
 )
-@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests")
+@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests')
 class TestStatementMatch:
     def test_truncating_categories(self, conn):  # noqa - pytest fixture, not redefinition
-        variant = {"@class": "CategoryVariant", "@rid": "#161:429", "displayName": "RB1 truncating"}
+        variant = {'@class': 'CategoryVariant', '@rid': '#161:429', 'displayName': 'RB1 truncating'}
         statements = statement.get_statements_from_variants(conn, [variant])
         assert statements
diff --git a/tests/test_graphkb/test_util.py b/tests/test_graphkb/test_util.py
index 319df576..36760b2a 100644
--- a/tests/test_graphkb/test_util.py
+++ b/tests/test_graphkb/test_util.py
@@ -3,7 +3,7 @@
 
 from pori_python.graphkb import GraphKBConnection, util
 
-EXCLUDE_BCGSC_TESTS = os.environ.get("EXCLUDE_BCGSC_TESTS") == "1"
+EXCLUDE_BCGSC_TESTS = os.environ.get('EXCLUDE_BCGSC_TESTS') == '1'
 
 
 class OntologyTerm:
@@ -13,34 +13,34 @@ def __init__(self, name, sourceId, displayName):
         self.displayName = displayName
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope='module')
 def conn() -> GraphKBConnection:
     conn = GraphKBConnection()
-    conn.login(os.environ["GRAPHKB_USER"], os.environ["GRAPHKB_PASS"])
+    conn.login(os.environ['GRAPHKB_USER'], os.environ['GRAPHKB_PASS'])
     return conn
 
 
 class TestLooksLikeRid:
-    @pytest.mark.parametrize("rid", ["#3:4", "#50:04", "#-3:4", "#-3:-4", "#3:-4"])
+    @pytest.mark.parametrize('rid', ['#3:4', '#50:04', '#-3:4', '#-3:-4', '#3:-4'])
     def test_valid(self, rid):
         assert util.looks_like_rid(rid)
 
-    @pytest.mark.parametrize("rid", ["-3:4", "KRAS"])
+    @pytest.mark.parametrize('rid', ['-3:4', 'KRAS'])
     def test_invalid(self, rid):
         assert not util.looks_like_rid(rid)
 
 
 @pytest.mark.parametrize(
-    "input,result",
+    'input,result',
     [
-        ["GP5:p.Leu113His", "GP5:p.L113H"],
-        ["GP5:p.Lys113His", "GP5:p.K113H"],
-        ["CDK11A:p.Arg536Gln", "CDK11A:p.R536Q"],
-        ["APC:p.Cys1405*", "APC:p.C1405*"],
-        ["ApcTer:p.Cys1405*", "ApcTer:p.C1405*"],
-        ["GP5:p.Leu113_His114insLys", "GP5:p.L113_H114insK"],
-        ["NP_003997.1:p.Lys23_Val25del", "NP_003997.1:p.K23_V25del"],
-        ["LRG_199p1:p.Val7del", "LRG_199p1:p.V7del"],
+        ['GP5:p.Leu113His', 'GP5:p.L113H'],
+        ['GP5:p.Lys113His', 'GP5:p.K113H'],
+        ['CDK11A:p.Arg536Gln', 'CDK11A:p.R536Q'],
+        ['APC:p.Cys1405*', 'APC:p.C1405*'],
+        ['ApcTer:p.Cys1405*', 'ApcTer:p.C1405*'],
+        ['GP5:p.Leu113_His114insLys', 'GP5:p.L113_H114insK'],
+        ['NP_003997.1:p.Lys23_Val25del', 'NP_003997.1:p.K23_V25del'],
+        ['LRG_199p1:p.Val7del', 'LRG_199p1:p.V7del'],
     ],
 )
 def test_convert_aa_3to1(input, result):
@@ -49,12 +49,12 @@ def test_convert_aa_3to1(input, result):
 
 class TestStripParentheses:
     @pytest.mark.parametrize(
-        "breakRepr,StrippedBreakRepr",
+        'breakRepr,StrippedBreakRepr',
         [
-            ["p.(E2015_Q2114)", "p.E2015_Q2114"],
-            ["p.(?572_?630)", "p.?572_?630"],
-            ["g.178916854", "g.178916854"],
-            ["e.10", "e.10"],
+            ['p.(E2015_Q2114)', 'p.E2015_Q2114'],
+            ['p.(?572_?630)', 'p.?572_?630'],
+            ['g.178916854', 'g.178916854'],
+            ['e.10', 'e.10'],
         ],
     )
     def test_stripParentheses(self, breakRepr, StrippedBreakRepr):
@@ -63,10 +63,10 @@ def test_stripParentheses(self, breakRepr, StrippedBreakRepr):
 
 class TestStripRefSeq:
     @pytest.mark.parametrize(
-        "breakRepr,StrippedBreakRepr",
+        'breakRepr,StrippedBreakRepr',
         [
-            ["p.L2209", "p.2209"],
-            ["p.?891", "p.891"],
+            ['p.L2209', 'p.2209'],
+            ['p.?891', 'p.891'],
             # TODO: ['p.?572_?630', 'p.572_630'],
         ],
     )
@@ -76,31 +76,31 @@ def test_stripRefSeq(self, breakRepr, StrippedBreakRepr):
 
 class TestStripDisplayName:
     @pytest.mark.parametrize(
-        "opt,stripDisplayName",
+        'opt,stripDisplayName',
         [
-            [{"displayName": "ABL1:p.T315I", "withRef": True, "withRefSeq": True}, "ABL1:p.T315I"],
-            [{"displayName": "ABL1:p.T315I", "withRef": False, "withRefSeq": True}, "p.T315I"],
-            [{"displayName": "ABL1:p.T315I", "withRef": True, "withRefSeq": False}, "ABL1:p.315I"],
-            [{"displayName": "ABL1:p.T315I", "withRef": False, "withRefSeq": False}, "p.315I"],
+            [{'displayName': 'ABL1:p.T315I', 'withRef': True, 'withRefSeq': True}, 'ABL1:p.T315I'],
+            [{'displayName': 'ABL1:p.T315I', 'withRef': False, 'withRefSeq': True}, 'p.T315I'],
+            [{'displayName': 'ABL1:p.T315I', 'withRef': True, 'withRefSeq': False}, 'ABL1:p.315I'],
+            [{'displayName': 'ABL1:p.T315I', 'withRef': False, 'withRefSeq': False}, 'p.315I'],
             [
-                {"displayName": "chr3:g.41266125C>T", "withRef": False, "withRefSeq": False},
-                "g.41266125>T",
+                {'displayName': 'chr3:g.41266125C>T', 'withRef': False, 'withRefSeq': False},
+                'g.41266125>T',
             ],
             [
                 {
-                    "displayName": "chrX:g.99662504_99662505insG",
-                    "withRef": False,
-                    "withRefSeq": False,
+                    'displayName': 'chrX:g.99662504_99662505insG',
+                    'withRef': False,
+                    'withRefSeq': False,
                 },
-                "g.99662504_99662505insG",
+                'g.99662504_99662505insG',
             ],
             [
                 {
-                    "displayName": "chrX:g.99662504_99662505dup",
-                    "withRef": False,
-                    "withRefSeq": False,
+                    'displayName': 'chrX:g.99662504_99662505dup',
+                    'withRef': False,
+                    'withRefSeq': False,
                 },
-                "g.99662504_99662505dup",
+                'g.99662504_99662505dup',
             ],
             # TODO: [{'displayName': 'VHL:c.330_331delCAinsTT', 'withRef': False, 'withRefSeq': False}, 'c.330_331delinsTT'],
             # TODO: [{'displayName': 'VHL:c.464-2G>A', 'withRef': False, 'withRefSeq': False}, 'c.464-2>A'],
@@ -112,40 +112,40 @@ def test_stripDisplayName(self, opt, stripDisplayName):
 
 class TestStringifyVariant:
     @pytest.mark.parametrize(
-        "hgvs_string,opt,stringifiedVariant",
+        'hgvs_string,opt,stringifiedVariant',
         [
-            ["VHL:c.345C>G", {"withRef": True, "withRefSeq": True}, "VHL:c.345C>G"],
-            ["VHL:c.345C>G", {"withRef": False, "withRefSeq": True}, "c.345C>G"],
-            ["VHL:c.345C>G", {"withRef": True, "withRefSeq": False}, "VHL:c.345>G"],
-            ["VHL:c.345C>G", {"withRef": False, "withRefSeq": False}, "c.345>G"],
+            ['VHL:c.345C>G', {'withRef': True, 'withRefSeq': True}, 'VHL:c.345C>G'],
+            ['VHL:c.345C>G', {'withRef': False, 'withRefSeq': True}, 'c.345C>G'],
+            ['VHL:c.345C>G', {'withRef': True, 'withRefSeq': False}, 'VHL:c.345>G'],
+            ['VHL:c.345C>G', {'withRef': False, 'withRefSeq': False}, 'c.345>G'],
             [
-                "(LMNA,NTRK1):fusion(e.10,e.12)",
-                {"withRef": False, "withRefSeq": False},
-                "fusion(e.10,e.12)",
+                '(LMNA,NTRK1):fusion(e.10,e.12)',
+                {'withRef': False, 'withRefSeq': False},
+                'fusion(e.10,e.12)',
             ],
-            ["ABCA12:p.N1671Ifs*4", {"withRef": False, "withRefSeq": False}, "p.1671Ifs*4"],
-            ["x:y.p22.33copyloss", {"withRef": False, "withRefSeq": False}, "y.p22.33copyloss"],
+            ['ABCA12:p.N1671Ifs*4', {'withRef': False, 'withRefSeq': False}, 'p.1671Ifs*4'],
+            ['x:y.p22.33copyloss', {'withRef': False, 'withRefSeq': False}, 'y.p22.33copyloss'],
             # TODO: ['MED12:p.(?34_?68)mut', {'withRef': False, 'withRefSeq': False}, 'p.(34_68)mut'],
             # TODO: ['FLT3:p.(?572_?630)_(?572_?630)ins', {'withRef': False, 'withRefSeq': False}, 'p.(572_630)_(572_630)ins'],
         ],
     )
     def test_stringifyVariant_parsed(self, conn, hgvs_string, opt, stringifiedVariant):
-        opt["variant"] = conn.parse(hgvs_string)
+        opt['variant'] = conn.parse(hgvs_string)
         assert util.stringifyVariant(**opt) == stringifiedVariant
 
     # Based on the assumption that these variants are in the database.
     # createdAt date help avoiding errors if assumption tuns to be false
     @pytest.mark.parametrize(
-        "rid,createdAt,stringifiedVariant",
+        'rid,createdAt,stringifiedVariant',
         [
-            ["#157:0", 1565627324397, "p.315I"],
-            ["#157:79", 1565627683602, "p.776_777insVGC"],
-            ["#158:35317", 1652734056311, "c.1>G"],
+            ['#157:0', 1565627324397, 'p.315I'],
+            ['#157:79', 1565627683602, 'p.776_777insVGC'],
+            ['#158:35317', 1652734056311, 'c.1>G'],
         ],
     )
-    @pytest.mark.skipif(EXCLUDE_BCGSC_TESTS, reason="db-dependent rids")
+    @pytest.mark.skipif(EXCLUDE_BCGSC_TESTS, reason='db-dependent rids')
     def test_stringifyVariant_positional(self, conn, rid, createdAt, stringifiedVariant):
-        opt = {"withRef": False, "withRefSeq": False}
+        opt = {'withRef': False, 'withRefSeq': False}
         variant = conn.get_record_by_id(rid)
-        if variant and variant.get("createdAt", None) == createdAt:
+        if variant and variant.get('createdAt', None) == createdAt:
             assert util.stringifyVariant(variant=variant, **opt) == stringifiedVariant
diff --git a/tests/test_graphkb/test_vocab.py b/tests/test_graphkb/test_vocab.py
index fc8497f7..ff64b7a7 100644
--- a/tests/test_graphkb/test_vocab.py
+++ b/tests/test_graphkb/test_vocab.py
@@ -7,79 +7,79 @@
 
 from pori_python.graphkb import GraphKBConnection, genes, vocab
 
-BASE_EXPRESSION = "expression variant"
-BASE_INCREASED_EXPRESSION = "increased expression"
-BASE_REDUCED_EXPRESSION = "reduced expression"
+BASE_EXPRESSION = 'expression variant'
+BASE_INCREASED_EXPRESSION = 'increased expression'
+BASE_REDUCED_EXPRESSION = 'reduced expression'
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope='module')
 def conn():
     conn = GraphKBConnection()
-    conn.login(os.environ["GRAPHKB_USER"], os.environ["GRAPHKB_PASS"])
+    conn.login(os.environ['GRAPHKB_USER'], os.environ['GRAPHKB_PASS'])
     return conn
 
 
 def test_expression_vocabulary(conn):
     result = vocab.get_term_tree(conn, BASE_EXPRESSION)
 
-    names = [row["name"] for row in result]
+    names = [row['name'] for row in result]
     assert BASE_EXPRESSION in names
-    assert "increased rna expression" in names
+    assert 'increased rna expression' in names
 
 
 def test_indel_vocabulary(conn):
-    result = vocab.get_term_tree(conn, "indel")
+    result = vocab.get_term_tree(conn, 'indel')
 
-    names = {row["name"] for row in result}
-    assert "indel" in names
-    assert "copy variant" not in names
-    assert "copy number variant" not in names
+    names = {row['name'] for row in result}
+    assert 'indel' in names
+    assert 'copy variant' not in names
+    assert 'copy number variant' not in names
 
 
 def test_expression_up(conn):
     result = vocab.get_term_tree(conn, BASE_INCREASED_EXPRESSION)
 
-    names = [row["name"] for row in result]
+    names = [row['name'] for row in result]
     assert BASE_EXPRESSION in names
     assert BASE_INCREASED_EXPRESSION in names
-    assert "increased rna expression" in names
-    assert "reduced rna expression" not in names
+    assert 'increased rna expression' in names
+    assert 'reduced rna expression' not in names
     assert BASE_REDUCED_EXPRESSION not in names
 
 
 def test_expression_down(conn):
     result = vocab.get_term_tree(conn, BASE_REDUCED_EXPRESSION)
 
-    names = [row["name"] for row in result]
+    names = [row['name'] for row in result]
     assert BASE_EXPRESSION in names
     assert BASE_REDUCED_EXPRESSION in names
     assert BASE_INCREASED_EXPRESSION not in names
-    assert "increased rna expression" not in names
-    assert "reduced rna expression" in names
+    assert 'increased rna expression' not in names
+    assert 'reduced rna expression' in names
 
 
 class TestGetEquivalentTerms:
     def test_gain_excludes_amplification(self, conn):
-        result = vocab.get_equivalent_terms(conn, "copy gain")
-        names = {row["name"] for row in result}
-        assert "copy gain" in names
-        assert "amplification" not in names
+        result = vocab.get_equivalent_terms(conn, 'copy gain')
+        names = {row['name'] for row in result}
+        assert 'copy gain' in names
+        assert 'amplification' not in names
 
     def test_amplification_includes_gain(self, conn):
-        result = vocab.get_equivalent_terms(conn, "amplification")
-        names = {row["name"] for row in result}
-        assert "copy gain" in names
-        assert "amplification" in names
+        result = vocab.get_equivalent_terms(conn, 'amplification')
+        names = {row['name'] for row in result}
+        assert 'copy gain' in names
+        assert 'amplification' in names
 
 
 def test_oncogenic(conn):
     result = vocab.get_term_by_name(conn, genes.ONCOGENE)
-    assert result["name"] == genes.ONCOGENE
+    assert result['name'] == genes.ONCOGENE
 
 
 def test_get_terms_set(conn):
-    terms = vocab.get_terms_set(conn, ["copy variant"])
+    terms = vocab.get_terms_set(conn, ['copy variant'])
     assert terms
-    more_terms = vocab.get_terms_set(conn, ["copy variant", "expression variant"])
+    more_terms = vocab.get_terms_set(conn, ['copy variant', 'expression variant'])
     assert more_terms
     assert len(more_terms) > len(terms)
diff --git a/tests/test_ipr/constants.py b/tests/test_ipr/constants.py
index b4edeab3..8b211ff9 100644
--- a/tests/test_ipr/constants.py
+++ b/tests/test_ipr/constants.py
@@ -1,3 +1,3 @@
 import os
 
-EXCLUDE_INTEGRATION_TESTS = os.environ.get("EXCLUDE_INTEGRATION_TESTS") == "1"
+EXCLUDE_INTEGRATION_TESTS = os.environ.get('EXCLUDE_INTEGRATION_TESTS') == '1'
diff --git a/tests/test_ipr/test_annotate.py b/tests/test_ipr/test_annotate.py
index 0695debe..43de4216 100644
--- a/tests/test_ipr/test_annotate.py
+++ b/tests/test_ipr/test_annotate.py
@@ -15,81 +15,81 @@
 
 from .test_ipr import DISEASE_RIDS
 
-EXCLUDE_BCGSC_TESTS = os.environ.get("EXCLUDE_BCGSC_TESTS") == "1"
+EXCLUDE_BCGSC_TESTS = os.environ.get('EXCLUDE_BCGSC_TESTS') == '1'
 
 
 # TP53 examples from https://www.bcgsc.ca/jira/browse/SDEV-3122
 # Mutations are actually identical but on alternate transcripts.
 
 TP53_MUT_DICT = {
-    "pref": IprSmallMutationVariant(  # type: ignore
+    'pref': IprSmallMutationVariant(  # type: ignore
         {
-            "key": "SDEV-3122_preferred",
-            "gene": "TP53",
-            "hgvsGenomic": "chr17:g.7674252C>T",
-            "hgvsCds": "ENST00000269305:c.711G>A",
-            "hgvsProtein": "TP53:p.M237I",
+            'key': 'SDEV-3122_preferred',
+            'gene': 'TP53',
+            'hgvsGenomic': 'chr17:g.7674252C>T',
+            'hgvsCds': 'ENST00000269305:c.711G>A',
+            'hgvsProtein': 'TP53:p.M237I',
         }
     ),
-    "intersect": IprSmallMutationVariant(  # type: ignore
+    'intersect': IprSmallMutationVariant(  # type: ignore
         {
-            "key": "SDEV-3122_alt",
-            "gene": "TP53",
-            "hgvsGenomic": "chr17:g.7674252C>T",
-            "hgvsCds": "ENST00000610292:c.594G>A",
-            "hgvsProtein": "TP53:p.M198I",
+            'key': 'SDEV-3122_alt',
+            'gene': 'TP53',
+            'hgvsGenomic': 'chr17:g.7674252C>T',
+            'hgvsCds': 'ENST00000610292:c.594G>A',
+            'hgvsProtein': 'TP53:p.M198I',
         }
     ),
-    "prot_only": IprSmallMutationVariant(  # type: ignore
-        {"key": "prot_only", "gene": "TP53", "hgvsProtein": "TP53:p.M237I"}
+    'prot_only': IprSmallMutationVariant(  # type: ignore
+        {'key': 'prot_only', 'gene': 'TP53', 'hgvsProtein': 'TP53:p.M237I'}
     ),
-    "cds_only": IprSmallMutationVariant(  # type: ignore
-        {"key": "cds_only", "gene": "TP53", "hgvsCds": "ENST00000269305:c.711G>A"}
+    'cds_only': IprSmallMutationVariant(  # type: ignore
+        {'key': 'cds_only', 'gene': 'TP53', 'hgvsCds': 'ENST00000269305:c.711G>A'}
     ),
-    "genome_only": IprSmallMutationVariant(  # type: ignore
-        {"key": "genome_only", "gene": "TP53", "hgvsGenomic": "chr17:g.7674252C>T"}
+    'genome_only': IprSmallMutationVariant(  # type: ignore
+        {'key': 'genome_only', 'gene': 'TP53', 'hgvsGenomic': 'chr17:g.7674252C>T'}
     ),
 }
 
 KBDEV1231_TP53_ERR_MATCH_WT = {
-    "altSeq": "",
-    "chromosome": "chr17",
-    "comments": "",
-    "endPosition": "",
-    "gene": "TP53",
-    "germline": False,
-    "hgvsCds": "ENST00000269305:c.853G>A",
-    "hgvsGenomic": "chr17:g.7673767C>T",
-    "hgvsProtein": "TP53:p.E285K",
-    "key": "c23a7b0387335e7a5ed6c1081a1822ae",
-    "library": "F145233;F145265",
-    "ncbiBuild": "GRCh38",
-    "normalAltCount": "",
-    "normalDepth": "",
-    "normalRefCount": "",
-    "proteinChange": "p.E285K",
-    "refSeq": "",
-    "rnaAltCount": 311,
-    "rnaDepth": 370,
-    "rnaRefCount": 59,
-    "startPosition": "",
-    "transcript": "ENST00000269305",
-    "tumourAltCopies": "",
-    "tumourAltCount": 64,
-    "tumourDepth": 100,
-    "tumourRefCopies": "",
-    "tumourRefCount": 36,
-    "variant": "TP53:p.E285K",
-    "variantType": "mut",
-    "zygosity": "",
+    'altSeq': '',
+    'chromosome': 'chr17',
+    'comments': '',
+    'endPosition': '',
+    'gene': 'TP53',
+    'germline': False,
+    'hgvsCds': 'ENST00000269305:c.853G>A',
+    'hgvsGenomic': 'chr17:g.7673767C>T',
+    'hgvsProtein': 'TP53:p.E285K',
+    'key': 'c23a7b0387335e7a5ed6c1081a1822ae',
+    'library': 'F145233;F145265',
+    'ncbiBuild': 'GRCh38',
+    'normalAltCount': '',
+    'normalDepth': '',
+    'normalRefCount': '',
+    'proteinChange': 'p.E285K',
+    'refSeq': '',
+    'rnaAltCount': 311,
+    'rnaDepth': 370,
+    'rnaRefCount': 59,
+    'startPosition': '',
+    'transcript': 'ENST00000269305',
+    'tumourAltCopies': '',
+    'tumourAltCount': 64,
+    'tumourDepth': 100,
+    'tumourRefCopies': '',
+    'tumourRefCount': 36,
+    'variant': 'TP53:p.E285K',
+    'variantType': 'mut',
+    'zygosity': '',
 }
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope='module')
 def graphkb_conn():
-    username = os.environ.get("GRAPHKB_USER", os.environ["IPR_USER"])
-    password = os.environ.get("GRAPHKB_PASS", os.environ["IPR_PASS"])
-    graphkb_url = os.environ.get("GRAPHKB_URL", False)
+    username = os.environ.get('GRAPHKB_USER', os.environ['IPR_USER'])
+    password = os.environ.get('GRAPHKB_PASS', os.environ['IPR_PASS'])
+    graphkb_url = os.environ.get('GRAPHKB_URL', False)
     if graphkb_url:
         graphkb_conn = GraphKBConnection(graphkb_url)
     else:
@@ -99,77 +99,77 @@ def graphkb_conn():
 
 
 @pytest.mark.skipif(
-    EXCLUDE_BCGSC_TESTS, reason="excluding tests that depend on BCGSC-specific data"
+    EXCLUDE_BCGSC_TESTS, reason='excluding tests that depend on BCGSC-specific data'
 )
 class TestAnnotation:
     def test_annotate_nonsense_vs_missense(self, graphkb_conn):
         """Verify missense (point mutation) is not mistaken for a nonsense (stop codon) mutation."""
-        for key in ("prot_only", "cds_only", "genome_only", "pref"):
+        for key in ('prot_only', 'cds_only', 'genome_only', 'pref'):
             matched = annotate_positional_variants(graphkb_conn, [TP53_MUT_DICT[key]], DISEASE_RIDS)
             # nonsense - stop codon - should not match.  This is missense not nonsense (#164:933).
-            nonsense = [a for a in matched if a["kbVariant"] == "TP53 nonsense"]
-            assert not nonsense, f"nonsense matched to {key}: {TP53_MUT_DICT[key]}"
-            assert matched, f"should have matched in {key}: {TP53_MUT_DICT[key]}"
+            nonsense = [a for a in matched if a['kbVariant'] == 'TP53 nonsense']
+            assert not nonsense, f'nonsense matched to {key}: {TP53_MUT_DICT[key]}'
+            assert matched, f'should have matched in {key}: {TP53_MUT_DICT[key]}'
 
     def test_annotate_nonsense_vs_missense_protein(self, graphkb_conn):
         """Verify missense (point mutation) is not mistaken for a nonsense (stop codon) mutation."""
-        for key in ("prot_only", "pref"):
+        for key in ('prot_only', 'pref'):
             matched = annotate_positional_variants(graphkb_conn, [TP53_MUT_DICT[key]], DISEASE_RIDS)
             # nonsense - stop codon - should not match.  This is missense not nonsense (#164:933).
-            nonsense = [a for a in matched if "nonsense" in a["kbVariant"]]
-            assert not nonsense, f"nonsense matched to {key}: {TP53_MUT_DICT[key]}"
-            assert matched, f"should have matched in {key}: {TP53_MUT_DICT[key]}"
+            nonsense = [a for a in matched if 'nonsense' in a['kbVariant']]
+            assert not nonsense, f'nonsense matched to {key}: {TP53_MUT_DICT[key]}'
+            assert matched, f'should have matched in {key}: {TP53_MUT_DICT[key]}'
 
     def test_annotate_signature_variants_cosmic(self, graphkb_conn):
         """Test a Cosmic Signature CVs with known GKB statements"""
-        signature = "SBS10B"
+        signature = 'SBS10B'
         cosmic = annotate_signature_variants(
             graphkb_conn,
             DISEASE_RIDS,
             preprocess_signature_variants(
                 [
                     {
-                        "displayName": f"{signature} {COSMIC_SIGNATURE_VARIANT_TYPE}",
-                        "signatureName": signature,
-                        "variantTypeName": COSMIC_SIGNATURE_VARIANT_TYPE,
+                        'displayName': f'{signature} {COSMIC_SIGNATURE_VARIANT_TYPE}',
+                        'signatureName': signature,
+                        'variantTypeName': COSMIC_SIGNATURE_VARIANT_TYPE,
                     }
                 ]
             ),
         )
         assert len(cosmic) != 0
 
-    @pytest.mark.skip(reason="no GKB statement for dMMR Signature CVs yet")
+    @pytest.mark.skip(reason='no GKB statement for dMMR Signature CVs yet')
     def test_annotate_signature_variants_dmmr(self, graphkb_conn):
         """Test a dMMR (from Cosmic) Signature CVs with known GKB statements"""
-        signature = "DMMR"
+        signature = 'DMMR'
         dmmr = annotate_signature_variants(
             graphkb_conn,
             DISEASE_RIDS,
             preprocess_signature_variants(
                 [
                     {
-                        "displayName": f"{signature} {COSMIC_SIGNATURE_VARIANT_TYPE}",
-                        "signatureName": signature,
-                        "variantTypeName": COSMIC_SIGNATURE_VARIANT_TYPE,
+                        'displayName': f'{signature} {COSMIC_SIGNATURE_VARIANT_TYPE}',
+                        'signatureName': signature,
+                        'variantTypeName': COSMIC_SIGNATURE_VARIANT_TYPE,
                     }
                 ]
             ),
         )
         assert len(dmmr) != 0
 
-    @pytest.mark.skip(reason="no GKB statement for HLA Signature CVs yet")
+    @pytest.mark.skip(reason='no GKB statement for HLA Signature CVs yet')
     def test_annotate_signature_variants_hla(self, graphkb_conn):
         """Test an HLA Signature CVs with known GKB statements"""
-        signature = "HLA-A*02:01"
+        signature = 'HLA-A*02:01'
         hla = annotate_signature_variants(
             graphkb_conn,
             DISEASE_RIDS,
             preprocess_signature_variants(
                 [
                     {
-                        "displayName": f"{signature} {HLA_SIGNATURE_VARIANT_TYPE}",
-                        "signatureName": signature,
-                        "variantTypeName": HLA_SIGNATURE_VARIANT_TYPE,
+                        'displayName': f'{signature} {HLA_SIGNATURE_VARIANT_TYPE}',
+                        'signatureName': signature,
+                        'variantTypeName': HLA_SIGNATURE_VARIANT_TYPE,
                     }
                 ]
             ),
@@ -184,9 +184,9 @@ def test_annotate_signature_variants_tmb(self, graphkb_conn):
             preprocess_signature_variants(
                 [
                     {
-                        "displayName": f"{TMB_SIGNATURE} {TMB_SIGNATURE_VARIANT_TYPE}",
-                        "signatureName": TMB_SIGNATURE,
-                        "variantTypeName": TMB_SIGNATURE_VARIANT_TYPE,
+                        'displayName': f'{TMB_SIGNATURE} {TMB_SIGNATURE_VARIANT_TYPE}',
+                        'signatureName': TMB_SIGNATURE,
+                        'variantTypeName': TMB_SIGNATURE_VARIANT_TYPE,
                     }
                 ]
             ),
@@ -199,39 +199,39 @@ def test_annotate_signature_variants_msi(self, graphkb_conn):
         msi = annotate_signature_variants(
             graphkb_conn,
             DISEASE_RIDS,
-            preprocess_signature_variants([MSI_MAPPING.get("microsatellite instability")]),
+            preprocess_signature_variants([MSI_MAPPING.get('microsatellite instability')]),
         )
         assert len(msi) != 0
 
     def test_annotate_structural_variants_tp53(self, graphkb_conn):
         """Verify alternate TP53 variants match."""
-        ref_key = "prot_only"
+        ref_key = 'prot_only'
         pref = annotate_positional_variants(graphkb_conn, [TP53_MUT_DICT[ref_key]], DISEASE_RIDS)
         # GERO-299 - nonsense - stop codon - should not match.  This is missense not nonsense (#164:933).
-        nonsense = [a for a in pref if a["kbVariant"] == "TP53 nonsense"]
+        nonsense = [a for a in pref if a['kbVariant'] == 'TP53 nonsense']
         assert not nonsense
-        pref_vars = set([m["kbVariant"] for m in pref])
-        assert pref_vars, f"No matches to {TP53_MUT_DICT[pref]}"
+        pref_vars = set([m['kbVariant'] for m in pref])
+        assert pref_vars, f'No matches to {TP53_MUT_DICT[pref]}'
         print(pref_vars)
         for key, alt_rep in TP53_MUT_DICT.items():
             if key == ref_key:
                 continue
-            if key in ("cds_only", "genome_only"):
+            if key in ('cds_only', 'genome_only'):
                 # KBDEV-1259. Temporarely disabled until issue resolution.
                 continue
             alt = annotate_positional_variants(graphkb_conn, [alt_rep], DISEASE_RIDS)
-            alt_vars = set([m["kbVariant"] for m in alt])
+            alt_vars = set([m['kbVariant'] for m in alt])
             diff = pref_vars.symmetric_difference(alt_vars)
             missing = pref_vars.difference(alt_vars)
 
             known_issues = set()
-            if key == "genome_only":
+            if key == 'genome_only':
                 # genome_only matched to more precise type 'TP53 deleterious mutation' but not 'TP53 mutation'
-                known_issues.add("TP53 mutation")
+                known_issues.add('TP53 mutation')
 
             missing = pref_vars.difference(alt_vars).difference(known_issues)
             print(alt_vars)
-            assert not missing, f"{key} missing{missing}: {diff}"
+            assert not missing, f'{key} missing{missing}: {diff}'
 
     def test_wt_not_matched(self, graphkb_conn):
         """Verify wildtypes are not matched to mutations."""
@@ -239,5 +239,5 @@ def test_wt_not_matched(self, graphkb_conn):
             graphkb_conn, [KBDEV1231_TP53_ERR_MATCH_WT], DISEASE_RIDS
         )
         # KBDEV-1231 - wildtype - should not match.  A mutation is not wildtype
-        wt_matches = sorted(set([m["kbVariant"] for m in matches if "wildtype" in m["kbVariant"]]))
+        wt_matches = sorted(set([m['kbVariant'] for m in matches if 'wildtype' in m['kbVariant']]))
         assert not wt_matches, f"Mutation 'TP53:p.E285K' should NOT match {wt_matches}"
diff --git a/tests/test_ipr/test_connection.py b/tests/test_ipr/test_connection.py
index 611afb25..d83ac79a 100644
--- a/tests/test_ipr/test_connection.py
+++ b/tests/test_ipr/test_connection.py
@@ -4,37 +4,37 @@
 
 from pori_python.ipr.connection import IprConnection
 
-IMAGE_DIR = os.path.join(os.path.dirname(__file__), "../../docs/images")
+IMAGE_DIR = os.path.join(os.path.dirname(__file__), '../../docs/images')
 
 
 class TestPostImages:
     def test_no_images_ok(self):
         def request(*args, **kwargs):
             m = mock.MagicMock(
-                json=lambda: [{"upload": "successful"}], raise_for_status=lambda: None
+                json=lambda: [{'upload': 'successful'}], raise_for_status=lambda: None
             )
             return m
 
-        with mock.patch("pori_python.ipr.connection.requests.request", request):
-            conn = IprConnection("user", "pass")
-            result = conn.post_images("report_id", files={}, data={})
+        with mock.patch('pori_python.ipr.connection.requests.request', request):
+            conn = IprConnection('user', 'pass')
+            result = conn.post_images('report_id', files={}, data={})
             assert result is None
 
     def test_images_load_ok(self):
         def request(*args, **kwargs):
             m = mock.MagicMock(
-                json=lambda: [{"upload": "successful"}], raise_for_status=lambda: None
+                json=lambda: [{'upload': 'successful'}], raise_for_status=lambda: None
             )
             return m
 
-        with mock.patch("pori_python.ipr.connection.requests.request", request):
-            conn = IprConnection("user", "pass")
+        with mock.patch('pori_python.ipr.connection.requests.request', request):
+            conn = IprConnection('user', 'pass')
             result = conn.post_images(
-                "report_id",
+                'report_id',
                 files={
-                    "expression.correlation": os.path.join(IMAGE_DIR, "expression_correlation.png"),
-                    "mixcr.circos_trb_vj_gene_usage": os.path.join(
-                        IMAGE_DIR, "mixcr.circos_trb_vj_gene_usage.png"
+                    'expression.correlation': os.path.join(IMAGE_DIR, 'expression_correlation.png'),
+                    'mixcr.circos_trb_vj_gene_usage': os.path.join(
+                        IMAGE_DIR, 'mixcr.circos_trb_vj_gene_usage.png'
                     ),
                 },
                 data={},
@@ -44,54 +44,54 @@ def request(*args, **kwargs):
     def test_images_with_data_load_ok(self):
         def request(*args, **kwargs):
             m = mock.MagicMock(
-                json=lambda: [{"upload": "successful"}], raise_for_status=lambda: None
+                json=lambda: [{'upload': 'successful'}], raise_for_status=lambda: None
             )
             return m
 
-        with mock.patch("pori_python.ipr.connection.requests.request", request):
-            conn = IprConnection("user", "pass")
+        with mock.patch('pori_python.ipr.connection.requests.request', request):
+            conn = IprConnection('user', 'pass')
             result = conn.post_images(
-                "report_id",
+                'report_id',
                 files={
-                    "expression.correlation": os.path.join(IMAGE_DIR, "expression_correlation.png"),
-                    "mixcr.circos_trb_vj_gene_usage": os.path.join(
-                        IMAGE_DIR, "mixcr.circos_trb_vj_gene_usage.png"
+                    'expression.correlation': os.path.join(IMAGE_DIR, 'expression_correlation.png'),
+                    'mixcr.circos_trb_vj_gene_usage': os.path.join(
+                        IMAGE_DIR, 'mixcr.circos_trb_vj_gene_usage.png'
                     ),
                 },
-                data={"expression.correlation.title": "this is a title"},
+                data={'expression.correlation.title': 'this is a title'},
             )
             assert result is None
 
     def test_bad_file(self):
         def request(*args, **kwargs):
             m = mock.MagicMock(
-                json=lambda: [{"upload": "successful"}], raise_for_status=lambda: None
+                json=lambda: [{'upload': 'successful'}], raise_for_status=lambda: None
             )
             return m
 
-        with mock.patch("pori_python.ipr.connection.requests.request", request):
-            conn = IprConnection("user", "pass")
+        with mock.patch('pori_python.ipr.connection.requests.request', request):
+            conn = IprConnection('user', 'pass')
             with pytest.raises(FileNotFoundError):
                 conn.post_images(
-                    "report_id", files={"expression.correlation": "thing/that/does/not/exist.png"}
+                    'report_id', files={'expression.correlation': 'thing/that/does/not/exist.png'}
                 )
 
     def test_failed_image_load(self):
         def request(*args, **kwargs):
             m = mock.MagicMock(
-                json=lambda: [{"upload": "anything else", "key": "thing"}],
+                json=lambda: [{'upload': 'anything else', 'key': 'thing'}],
                 raise_for_status=lambda: None,
             )
             return m
 
-        with mock.patch("pori_python.ipr.connection.requests.request", request):
-            conn = IprConnection("user", "pass")
+        with mock.patch('pori_python.ipr.connection.requests.request', request):
+            conn = IprConnection('user', 'pass')
             with pytest.raises(ValueError):
                 conn.post_images(
-                    "report_id",
+                    'report_id',
                     {
-                        "expression.correlation": os.path.join(
-                            IMAGE_DIR, "expression_correlation.png"
+                        'expression.correlation': os.path.join(
+                            IMAGE_DIR, 'expression_correlation.png'
                         )
                     },
                 )
diff --git a/tests/test_ipr/test_data/expression.short.tab b/tests/test_ipr/test_data/expression.short.tab
index a10cb803..0d286531 100644
--- a/tests/test_ipr/test_data/expression.short.tab
+++ b/tests/test_ipr/test_data/expression.short.tab
@@ -25,3 +25,4 @@ KLHL25	27.39	outlier_high	increased expression	100.0	6.97	46.01	100.0	210.66		8.
 ZNRF3-IT1	0.2	no_category								6.75	0.12	34	-1.816	2.225	-2.505	1.744	26	0.623	-1.272	0.2	0.3	0.4	0.5
 PTP4A3	1.33	high_percentile	increased expression	100.0	9.74	99.62	100.0	32.2		6.09	-2.217	87	1.863	2.013	0.286	-0.767	66	-0.091	-1.824	0.2	0.3	0.4	0.5
 ERBB2	0.05	no_category		67.0	0.62	51.71	100.0	1.01		4.8	-0.551	61	-2.009	-2.216	0.147	-2.385	86	2.299	-0.953	0.2	0.3	0.4	0.5
+DPYD	1.12	increased rna expression	increased expression	97.0	4.75	73.38	100.0	50.19		0.99	-0.193	92	2.786	0.537	2.113	1.888	31	0.915	0.861	0.2	0.3	0.4	0.5
diff --git a/tests/test_ipr/test_inputs.py b/tests/test_ipr/test_inputs.py
index 61e9e0f9..56e0c4d4 100644
--- a/tests/test_ipr/test_inputs.py
+++ b/tests/test_ipr/test_inputs.py
@@ -8,6 +8,7 @@
 from pori_python.graphkb.match import INPUT_COPY_CATEGORIES
 from pori_python.ipr.constants import (
     MSI_MAPPING,
+    HRD_MAPPING,
     TMB_SIGNATURE,
     TMB_SIGNATURE_HIGH_THRESHOLD,
 )
@@ -21,6 +22,7 @@
     preprocess_expression_variants,
     preprocess_hla,
     preprocess_msi,
+    preprocess_hrd,
     preprocess_signature_variants,
     preprocess_small_mutations,
     preprocess_structural_variants,
@@ -30,25 +32,28 @@
 from pori_python.ipr.util import logger
 from pori_python.types import IprFusionVariant, IprGeneVariant
 
-DATA_DIR = os.path.join(os.path.dirname(__file__), "test_data")
-NON_EMPTY_STRING_NULLS = ["", None, np.nan, pd.NA]
-EXPECTED_COSMIC = {"DBS9", "DBS11", "ID2", "ID7", "ID10", "SBS2", "SBS5", "DMMR"}
+DATA_DIR = os.path.join(os.path.dirname(__file__), 'test_data')
+NON_EMPTY_STRING_NULLS = ['', None, np.nan, pd.NA]
+EXPECTED_COSMIC = {'DBS9', 'DBS11', 'ID2', 'ID7', 'ID10', 'SBS2', 'SBS5', 'DMMR'}
 EXPECTED_HLA = {
-    "HLA-A*02:01",
-    "HLA-A*02",
-    "HLA-A*30:01",
-    "HLA-A*30",
-    "HLA-B*27:01",
-    "HLA-B*27",
-    "HLA-B*15:01",
-    "HLA-B*15",
-    "HLA-C*03:03",
-    "HLA-C*03",
-    "HLA-C*06:02",
-    "HLA-C*06",
+    'HLA-A*02:01',
+    'HLA-A*02',
+    'HLA-A*30:01',
+    'HLA-A*30',
+    'HLA-B*27:01',
+    'HLA-B*27',
+    'HLA-B*15:01',
+    'HLA-B*15',
+    'HLA-C*03:03',
+    'HLA-C*03',
+    'HLA-C*06:02',
+    'HLA-C*06',
 }
 EXPECTED_TMB = {TMB_SIGNATURE}
-EXPECTED_MSI = {MSI_MAPPING.get("microsatellite instability")["signatureName"]}
+EXPECTED_MSI = {MSI_MAPPING.get('microsatellite instability')['signatureName']}
+EXPECTED_HRD = {
+    HRD_MAPPING.get('homologous recombination deficiency strong signature')['signatureName']
+}
 
 
 def read_data_file(filename):
@@ -58,220 +63,235 @@ def read_data_file(filename):
 class TestPreProcessSmallMutations:
     def test_load_test_file(self) -> None:
         records = preprocess_small_mutations(
-            pd.read_csv(os.path.join(DATA_DIR, "small_mutations.tab"), sep="\t").to_dict("records")
+            pd.read_csv(os.path.join(DATA_DIR, 'small_mutations.tab'), sep='\t').to_dict('records')
         )
         assert records
         assert len(records) == 2614
 
     def test_maintains_optional_fields(self):
         original = {
-            "gene": "A1BG",
-            "proteinChange": "p.V460M",
-            "zygosity": "het",
-            "tumourAltCount": 42,
-            "tumourRefCount": 48,
-            "hgvsProtein": "",
-            "transcript": "ENST1000",
-            "hgvsCds": "",
-            "hgvsGenomic": "",
-            "key": "02fe85a3477784b5ac0f8ecffb300d10",
-            "variant": "blargh",
-            "chromosome": "2",
-            "startPosition": 1234,
+            'gene': 'A1BG',
+            'proteinChange': 'p.V460M',
+            'zygosity': 'het',
+            'tumourAltCount': 42,
+            'tumourRefCount': 48,
+            'hgvsProtein': '',
+            'transcript': 'ENST1000',
+            'hgvsCds': '',
+            'hgvsGenomic': '',
+            'key': '02fe85a3477784b5ac0f8ecffb300d10',
+            'variant': 'blargh',
+            'chromosome': '2',
+            'startPosition': 1234,
         }
         records = preprocess_small_mutations([original])
         record = records[0]
-        assert record["variantType"] == "mut"
+        assert record['variantType'] == 'mut'
         for col in original:
             assert col in record
-        assert record["variant"] == "A1BG:p.V460M"
-        assert "endPosition" in record
-        assert record["endPosition"] == record["startPosition"]
-        assert "tumourDepth" in record
-        assert record["tumourDepth"] == 90
+        assert record['variant'] == 'A1BG:p.V460M'
+        assert 'endPosition' in record
+        assert record['endPosition'] == record['startPosition']
+        assert 'tumourDepth' in record
+        assert record['tumourDepth'] == 90
 
     def test_null(self):
         original = {
-            "gene": "A1BG",
-            "proteinChange": "p.V460M",
-            "tumourAltCount": 42,
-            "tumourRefCount": 48,
-            "startPosition": 1234,
+            'gene': 'A1BG',
+            'proteinChange': 'p.V460M',
+            'tumourAltCount': 42,
+            'tumourRefCount': 48,
+            'startPosition': 1234,
         }
         # Make sure TEST_KEYS are appropriate.
         # For some fields, like 'ref' and 'alt', NA is _not_ equivalent to a null string.
-        TEST_KEYS = ["startPosition", "endPosition", "tumourAltCount", "tumourRefCount"]
+        TEST_KEYS = ['startPosition', 'endPosition', 'tumourAltCount', 'tumourRefCount']
         for key in TEST_KEYS:
             for null in NON_EMPTY_STRING_NULLS:
                 small_mut = original.copy()
                 small_mut[key] = null
                 records = preprocess_small_mutations([small_mut])
                 record = records[0]
-                assert record["variantType"] == "mut"
+                assert record['variantType'] == 'mut'
                 for col in original:
                     assert col in record
-                assert record["variant"] == "A1BG:p.V460M"
-                assert "endPosition" in record
+                assert record['variant'] == 'A1BG:p.V460M'
+                assert 'endPosition' in record
 
     def test_load_small_mutations_probe(self) -> None:
         records = preprocess_small_mutations(
-            pd.read_csv(os.path.join(DATA_DIR, "small_mutations_probe.tab"), sep="\t").to_dict(
-                "records"
+            pd.read_csv(os.path.join(DATA_DIR, 'small_mutations_probe.tab'), sep='\t').to_dict(
+                'records'
             )
         )
         assert records
         assert len(records) == 4
-        assert records[0]["variantType"] == "mut"
-        assert "variant" in records[0]
+        assert records[0]['variantType'] == 'mut'
+        assert 'variant' in records[0]
 
 
 class TestPreProcessCopyVariants:
     def test_load_copy_variants(self) -> None:
         records = preprocess_copy_variants(
-            pd.read_csv(os.path.join(DATA_DIR, "copy_variants.tab"), sep="\t").to_dict("records")
+            pd.read_csv(os.path.join(DATA_DIR, 'copy_variants.tab'), sep='\t').to_dict('records')
         )
 
         assert records
         assert len(records) == 4603
-        assert records[0]["variantType"] == "cnv"
-        assert "variant" in records[0]
+        assert records[0]['variantType'] == 'cnv'
+        assert 'variant' in records[0]
 
     def test_add_chr_to_chrband(self) -> None:
-        df1 = pd.read_csv(os.path.join(DATA_DIR, "copy_variants.tab"), sep="\t")
-        df1 = df1.to_dict("records")
+        df1 = pd.read_csv(os.path.join(DATA_DIR, 'copy_variants.tab'), sep='\t')
+        df1 = df1.to_dict('records')
         records = preprocess_copy_variants(df1)
         assert records
         assert len(records) == 4603
-        assert records[0]["chromosomeBand"] == "1q22.1"
-        assert "chromosome" not in records[0]
+        assert records[0]['chromosomeBand'] == '1q22.1'
+        assert 'chromosome' not in records[0]
 
     def test_add_int_chr_to_chrband(self) -> None:
-        df1 = pd.read_csv(os.path.join(DATA_DIR, "copy_variants.tab"), sep="\t")
-        df1["chromosome"] = df1["chromosome"].apply(lambda x: x.split("chr")[1])
-        df1 = df1.to_dict("records")
+        df1 = pd.read_csv(os.path.join(DATA_DIR, 'copy_variants.tab'), sep='\t')
+        df1['chromosome'] = df1['chromosome'].apply(lambda x: x.split('chr')[1])
+        df1 = df1.to_dict('records')
         records = preprocess_copy_variants(df1)
         assert records
         assert len(records) == 4603
-        assert records[0]["chromosomeBand"] == "1q22.1"
-        assert "chromosome" not in records[0]
+        assert records[0]['chromosomeBand'] == '1q22.1'
+        assert 'chromosome' not in records[0]
 
     def test_add_chr_to_chrband_if_chromosome_not_present(self) -> None:
-        df1 = pd.read_csv(os.path.join(DATA_DIR, "copy_variants.tab"), sep="\t")
-        df1["chr"] = df1["chromosome"].copy()
-        df1.drop("chromosome", axis=1, inplace=True)
-        df1 = df1.to_dict("records")
+        df1 = pd.read_csv(os.path.join(DATA_DIR, 'copy_variants.tab'), sep='\t')
+        df1['chr'] = df1['chromosome'].copy()
+        df1.drop('chromosome', axis=1, inplace=True)
+        df1 = df1.to_dict('records')
         records = preprocess_copy_variants(df1)
         assert records
         assert len(records) == 4603
-        assert records[0]["chromosomeBand"] == "1q22.1"
-        assert "chr" not in records[0]
-        assert "chromosome" not in records[0]
+        assert records[0]['chromosomeBand'] == '1q22.1'
+        assert 'chr' not in records[0]
+        assert 'chromosome' not in records[0]
 
     def test_do_not_add_chr_if_chr_already_in_chrband(self) -> None:
-        df1 = pd.read_csv(os.path.join(DATA_DIR, "copy_variants.tab"), sep="\t")
+        df1 = pd.read_csv(os.path.join(DATA_DIR, 'copy_variants.tab'), sep='\t')
         df2 = df1.copy()
-        df1["chromosomeBand"] = df1["chromosomeBand"].apply(lambda x: "chr99" + x)
-        df1 = df1.to_dict("records")
-        df2["chromosomeBand"] = df2["chromosomeBand"].apply(lambda x: "99" + x)
-        df2 = df2.to_dict("records")
+        df1['chromosomeBand'] = df1['chromosomeBand'].apply(lambda x: 'chr99' + x)
+        df1 = df1.to_dict('records')
+        df2['chromosomeBand'] = df2['chromosomeBand'].apply(lambda x: '99' + x)
+        df2 = df2.to_dict('records')
         records = preprocess_copy_variants(df1)
         assert records
         assert len(records) == 4603
-        assert records[0]["chromosomeBand"] == "chr99q22.1"
-        assert "chr" not in records[0]  # make sure these cols are still getting removed
-        assert "chromosome" not in records[0]
+        assert records[0]['chromosomeBand'] == 'chr99q22.1'
+        assert 'chr' not in records[0]  # make sure these cols are still getting removed
+        assert 'chromosome' not in records[0]
         records2 = preprocess_copy_variants(df2)
-        assert records2[0]["chromosomeBand"] == "99q22.1"
+        assert records2[0]['chromosomeBand'] == '99q22.1'
 
     def test_no_error_if_chr_column_not_present(self) -> None:
-        df1 = pd.read_csv(os.path.join(DATA_DIR, "copy_variants.tab"), sep="\t")
-        df1.drop("chromosome", axis=1, inplace=True)
-        df1 = df1.to_dict("records")
+        df1 = pd.read_csv(os.path.join(DATA_DIR, 'copy_variants.tab'), sep='\t')
+        df1.drop('chromosome', axis=1, inplace=True)
+        df1 = df1.to_dict('records')
         records = preprocess_copy_variants(df1)
         assert records
         assert len(records) == 4603
-        assert records[0]["chromosomeBand"] == "q22.1"
+        assert records[0]['chromosomeBand'] == 'q22.1'
 
     def test_null(self):
         for kb_cat in list(INPUT_COPY_CATEGORIES.values()) + NON_EMPTY_STRING_NULLS:
-            original = {"gene": "ERBB2", "kbCategory": kb_cat}
+            original = {'gene': 'ERBB2', 'kbCategory': kb_cat}
             for key in COPY_OPTIONAL:
                 for null in NON_EMPTY_STRING_NULLS:
                     copy_var = original.copy()
                     copy_var[key] = null
                     records = preprocess_copy_variants([copy_var])
                     record = records[0]
-                    assert record["variantType"] == "cnv"
+                    assert record['variantType'] == 'cnv'
 
 
 class TestPreProcessSignatureVariants:
-
     # Preprocessing records from file
     cosmic = preprocess_cosmic(
         [
-            r["signature"]
-            for r in pd.read_csv(os.path.join(DATA_DIR, "cosmic_variants.tab"), sep="\t").to_dict(
-                "records"
+            r['signature']
+            for r in pd.read_csv(os.path.join(DATA_DIR, 'cosmic_variants.tab'), sep='\t').to_dict(
+                'records'
             )
         ]
     )
     hla = preprocess_hla(
-        pd.read_csv(os.path.join(DATA_DIR, "hla_variants.tab"), sep="\t").to_dict("records")
+        pd.read_csv(os.path.join(DATA_DIR, 'hla_variants.tab'), sep='\t').to_dict('records')
     )
     tmb = preprocess_tmb(
         tmb_high=TMB_SIGNATURE_HIGH_THRESHOLD,
         tmburMutationBurden=pd.read_csv(
-            os.path.join(DATA_DIR, "tmburMutationBurden.tab"), sep="\t"
-        ).to_dict("records"),
-        genomeTmb="11.430000000000001",
+            os.path.join(DATA_DIR, 'tmburMutationBurden.tab'), sep='\t'
+        ).to_dict('records'),
+        genomeTmb='11.430000000000001',
     )
     msi = preprocess_msi(
         [
             {
-                "score": 27.55,
-                "kbCategory": "microsatellite instability",
-                "key": "microsatellite instability",
+                'score': 27.55,
+                'kbCategory': 'microsatellite instability',
+                'key': 'microsatellite instability',
             }
         ]
     )
+    hrd = preprocess_hrd(
+        {
+            'score': 9999,
+            'kbCategory': 'homologous recombination deficiency strong signature',
+            'key': 'homologous recombination deficiency strong signature',
+        }
+    )
 
     # tests on preprocessed records
     def test_preprocess_cosmic(self) -> None:
         assert self.cosmic
         assert len(self.cosmic) == len(EXPECTED_COSMIC)
-        assert "variantTypeName" in self.cosmic[0]
-        assert "displayName" in self.cosmic[0]
+        assert 'variantTypeName' in self.cosmic[0]
+        assert 'displayName' in self.cosmic[0]
 
-        signatureNames = {r.get("signatureName", "") for r in self.cosmic}
+        signatureNames = {r.get('signatureName', '') for r in self.cosmic}
         assert len(EXPECTED_COSMIC.symmetric_difference(signatureNames)) == 0
 
     def test_preprocess_hla(self) -> None:
         assert self.hla
         assert len(self.hla) == len(EXPECTED_HLA)
-        assert "variantTypeName" in self.hla[0]
-        assert "displayName" in self.hla[0]
+        assert 'variantTypeName' in self.hla[0]
+        assert 'displayName' in self.hla[0]
 
-        signatureNames = {r.get("signatureName", "") for r in self.hla}
+        signatureNames = {r.get('signatureName', '') for r in self.hla}
         assert len(EXPECTED_HLA.symmetric_difference(signatureNames)) == 0
 
     def test_preprocess_tmb(self) -> None:
         assert self.tmb
         assert len(self.tmb) == len(EXPECTED_TMB)
-        assert "variantTypeName" in self.tmb[0]
-        assert "displayName" in self.tmb[0]
+        assert 'variantTypeName' in self.tmb[0]
+        assert 'displayName' in self.tmb[0]
 
-        signatureNames = {r.get("signatureName", "") for r in self.tmb}
+        signatureNames = {r.get('signatureName', '') for r in self.tmb}
         assert len(EXPECTED_TMB.symmetric_difference(signatureNames)) == 0
 
     def test_preprocess_msi(self) -> None:
         assert self.msi
         assert len(self.msi) == len(EXPECTED_MSI)
-        assert "variantTypeName" in self.msi[0]
-        assert "displayName" in self.msi[0]
+        assert 'variantTypeName' in self.msi[0]
+        assert 'displayName' in self.msi[0]
 
-        signatureNames = {r.get("signatureName", "") for r in self.msi}
+        signatureNames = {r.get('signatureName', '') for r in self.msi}
         assert len(EXPECTED_MSI.symmetric_difference(signatureNames)) == 0
 
+    def test_preprocess_hrd(self) -> None:
+        assert self.hrd
+        assert len(self.hrd) == len(EXPECTED_HRD)
+        assert 'variantTypeName' in self.hrd[0]
+        assert 'displayName' in self.hrd[0]
+
+        signatureNames = {r.get('signatureName', '') for r in self.hrd}
+        assert len(EXPECTED_HRD.symmetric_difference(signatureNames)) == 0
+
     def test_preprocess_signature_variants(self) -> None:
         records = preprocess_signature_variants(
             [
@@ -285,100 +305,100 @@ def test_preprocess_signature_variants(self) -> None:
         assert len(records) == (
             len(EXPECTED_COSMIC) + len(EXPECTED_HLA) + len(EXPECTED_TMB) + len(EXPECTED_MSI)
         )
-        assert "key" in records[0]
+        assert 'key' in records[0]
 
 
 def test_load_structural_variants() -> None:
     records = preprocess_structural_variants(
-        pd.read_csv(os.path.join(DATA_DIR, "fusions.tab"), sep="\t").to_dict("records")
+        pd.read_csv(os.path.join(DATA_DIR, 'fusions.tab'), sep='\t').to_dict('records')
     )
     assert records
     assert len(records) == 7
-    assert records[0]["variantType"] == "sv"
-    assert "variant" in records[0]
+    assert records[0]['variantType'] == 'sv'
+    assert 'variant' in records[0]
 
 
 def test_load_expression_variants() -> None:
     records = preprocess_expression_variants(
-        pd.read_csv(os.path.join(DATA_DIR, "expression.tab"), sep="\t").to_dict("records")
+        pd.read_csv(os.path.join(DATA_DIR, 'expression.tab'), sep='\t').to_dict('records')
     )
     assert records
     assert len(records) == 4603
-    assert records[0]["variantType"] == "exp"
-    assert "variant" in records[0]
+    assert records[0]['variantType'] == 'exp'
+    assert 'variant' in records[0]
 
 
 class TestCheckVariantLinks:
     def test_sm_missing_copy_empty_ok(self) -> None:
         genes = check_variant_links(
-            small_mutations=[IprGeneVariant({"gene": "KRAS"})],  # type: ignore
+            small_mutations=[IprGeneVariant({'gene': 'KRAS'})],  # type: ignore
             copy_variants=[],
-            expression_variants=[IprGeneVariant({"gene": "KRAS", "variant": ""})],  # type: ignore
+            expression_variants=[IprGeneVariant({'gene': 'KRAS', 'variant': ''})],  # type: ignore
             structural_variants=[],
         )
-        assert genes == {"KRAS"}
+        assert genes == {'KRAS'}
 
     def test_sm_missing_exp_empty_ok(self) -> None:
         genes = check_variant_links(
-            small_mutations=[IprGeneVariant({"gene": "KRAS"})],  # type: ignore
-            copy_variants=[IprGeneVariant({"gene": "KRAS", "variant": ""})],  # type: ignore
+            small_mutations=[IprGeneVariant({'gene': 'KRAS'})],  # type: ignore
+            copy_variants=[IprGeneVariant({'gene': 'KRAS', 'variant': ''})],  # type: ignore
             expression_variants=[],
             structural_variants=[],
         )
-        assert genes == {"KRAS"}
+        assert genes == {'KRAS'}
 
     def test_sm_missing_copy(self) -> None:
-        with mock.patch.object(logger, "debug") as mock_debug:
+        with mock.patch.object(logger, 'debug') as mock_debug:
             check_variant_links(
-                small_mutations=[IprGeneVariant({"gene": "KRAS"})],  # type: ignore
-                copy_variants=[IprGeneVariant({"gene": "CDK", "variant": ""})],  # type: ignore
-                expression_variants=[IprGeneVariant({"gene": "KRAS", "variant": ""})],  # type: ignore
+                small_mutations=[IprGeneVariant({'gene': 'KRAS'})],  # type: ignore
+                copy_variants=[IprGeneVariant({'gene': 'CDK', 'variant': ''})],  # type: ignore
+                expression_variants=[IprGeneVariant({'gene': 'KRAS', 'variant': ''})],  # type: ignore
                 structural_variants=[],
             )
             assert mock_debug.called
 
     def test_sm_missing_exp(self) -> None:
-        with mock.patch.object(logger, "debug") as mock_debug:
+        with mock.patch.object(logger, 'debug') as mock_debug:
             check_variant_links(
-                small_mutations=[IprGeneVariant({"gene": "KRAS"})],  # type: ignore
-                copy_variants=[IprGeneVariant({"gene": "KRAS", "variant": ""})],  # type: ignore
-                expression_variants=[IprGeneVariant({"gene": "CDK", "variant": ""})],  # type: ignore
+                small_mutations=[IprGeneVariant({'gene': 'KRAS'})],  # type: ignore
+                copy_variants=[IprGeneVariant({'gene': 'KRAS', 'variant': ''})],  # type: ignore
+                expression_variants=[IprGeneVariant({'gene': 'CDK', 'variant': ''})],  # type: ignore
                 structural_variants=[],
             )
             assert mock_debug.called
 
     def test_with_valid_inputs(self) -> None:
         genes = check_variant_links(
-            small_mutations=[IprGeneVariant({"gene": "KRAS"})],  # type: ignore
+            small_mutations=[IprGeneVariant({'gene': 'KRAS'})],  # type: ignore
             copy_variants=[
-                IprGeneVariant({"gene": "KRAS", "variant": ""}),  # type: ignore
-                IprGeneVariant({"gene": "CDK", "variant": ""}),  # type: ignore
+                IprGeneVariant({'gene': 'KRAS', 'variant': ''}),  # type: ignore
+                IprGeneVariant({'gene': 'CDK', 'variant': ''}),  # type: ignore
             ],
-            expression_variants=[IprGeneVariant({"gene": "KRAS", "variant": ""})],  # type: ignore
+            expression_variants=[IprGeneVariant({'gene': 'KRAS', 'variant': ''})],  # type: ignore
             structural_variants=[],
         )
-        assert genes == {"KRAS"}
+        assert genes == {'KRAS'}
 
     def test_copy_missing_exp(self) -> None:
-        with mock.patch.object(logger, "debug") as mock_debug:
+        with mock.patch.object(logger, 'debug') as mock_debug:
             check_variant_links(
                 small_mutations=[],
                 copy_variants=[
-                    IprGeneVariant({"gene": "BRAF", "variant": "copy gain"}),  # type: ignore
-                    IprGeneVariant({"gene": "KRAS", "variant": ""}),  # type: ignore
+                    IprGeneVariant({'gene': 'BRAF', 'variant': 'copy gain'}),  # type: ignore
+                    IprGeneVariant({'gene': 'KRAS', 'variant': ''}),  # type: ignore
                 ],
-                expression_variants=[IprGeneVariant({"gene": "KRAS", "variant": ""})],  # type: ignore
+                expression_variants=[IprGeneVariant({'gene': 'KRAS', 'variant': ''})],  # type: ignore
                 structural_variants=[],
             )
             assert mock_debug.called
 
     def test_exp_missing_copy(self) -> None:
-        with mock.patch.object(logger, "debug") as mock_debug:
+        with mock.patch.object(logger, 'debug') as mock_debug:
             check_variant_links(
                 small_mutations=[],
-                copy_variants=[IprGeneVariant({"gene": "KRAS", "variant": ""})],  # type: ignore
+                copy_variants=[IprGeneVariant({'gene': 'KRAS', 'variant': ''})],  # type: ignore
                 expression_variants=[
-                    IprGeneVariant({"gene": "BRAF", "variant": "increased expression"})  # type: ignore
+                    IprGeneVariant({'gene': 'BRAF', 'variant': 'increased expression'})  # type: ignore
                 ],
                 structural_variants=[],
             )
@@ -388,67 +408,67 @@ def test_exp_missing_copy(self) -> None:
 class TestCreateGraphkbSvNotation:
     def test_both_genes_and_exons(self) -> None:
         notation = create_graphkb_sv_notation(
-            IprFusionVariant({"gene1": "A", "gene2": "B", "exon1": 1, "exon2": 2})  # type: ignore
+            IprFusionVariant({'gene1': 'A', 'gene2': 'B', 'exon1': 1, 'exon2': 2})  # type: ignore
         )
-        assert notation == "(A,B):fusion(e.1,e.2)"
+        assert notation == '(A,B):fusion(e.1,e.2)'
 
     def test_one_exon_missing(self) -> None:
         notation = create_graphkb_sv_notation(
-            IprFusionVariant({"gene1": "A", "gene2": "B", "exon1": "", "exon2": 2})  # type: ignore
+            IprFusionVariant({'gene1': 'A', 'gene2': 'B', 'exon1': '', 'exon2': 2})  # type: ignore
         )
-        assert notation == "(A,B):fusion(e.?,e.2)"
+        assert notation == '(A,B):fusion(e.?,e.2)'
 
     def test_one_gene_missing(self) -> None:
         notation = create_graphkb_sv_notation(
-            IprFusionVariant({"gene1": "A", "gene2": "", "exon1": 1, "exon2": 2})  # type: ignore
+            IprFusionVariant({'gene1': 'A', 'gene2': '', 'exon1': 1, 'exon2': 2})  # type: ignore
         )
-        assert notation == "(A,?):fusion(e.1,e.2)"
+        assert notation == '(A,?):fusion(e.1,e.2)'
 
     def test_first_gene_missing(self) -> None:
         notation = create_graphkb_sv_notation(
-            IprFusionVariant({"gene1": "", "gene2": "B", "exon1": 1, "exon2": 2})  # type: ignore
+            IprFusionVariant({'gene1': '', 'gene2': 'B', 'exon1': 1, 'exon2': 2})  # type: ignore
         )
-        assert notation == "(B,?):fusion(e.2,e.1)"
+        assert notation == '(B,?):fusion(e.2,e.1)'
 
     def test_no_genes_error(self) -> None:
         with pytest.raises(ValueError):
             create_graphkb_sv_notation(
-                IprFusionVariant({"gene1": "", "gene2": "", "exon1": 1, "exon2": 2, "key": "x"})  # type: ignore
+                IprFusionVariant({'gene1': '', 'gene2': '', 'exon1': 1, 'exon2': 2, 'key': 'x'})  # type: ignore
             )
 
 
 class TestCheckComparators:
     def test_missing_disease_expression_error(self):
-        content = {"comparators": [{"analysisRole": "expression (primary site)"}]}
+        content = {'comparators': [{'analysisRole': 'expression (primary site)'}]}
         variants = [{}]
 
         with pytest.raises(ValueError):
             check_comparators(content, variants)
 
     def test_missing_primary_expression_error(self):
-        content = {"comparators": [{"analysisRole": "expression (disease)"}]}
-        variants = [{"primarySiteFoldChange": 1}]
+        content = {'comparators': [{'analysisRole': 'expression (disease)'}]}
+        variants = [{'primarySiteFoldChange': 1}]
 
         with pytest.raises(ValueError):
             check_comparators(content, variants)
 
     def test_missing_biopsy_expression_error(self):
-        content = {"comparators": [{"analysisRole": "expression (disease)"}]}
-        variants = [{"biopsySitePercentile": 1}]
+        content = {'comparators': [{'analysisRole': 'expression (disease)'}]}
+        variants = [{'biopsySitePercentile': 1}]
 
         with pytest.raises(ValueError):
             check_comparators(content, variants)
 
     def test_expression_not_required_without_variants(self):
-        content = {"comparators": []}
+        content = {'comparators': []}
         variants = []
 
         assert check_comparators(content, variants) is None
 
     def test_missing_mutation_burden(self):
         content = {
-            "comparators": [{"analysisRole": "mutation burden (secondary)"}],
-            "images": [{"key": "mutationBurden.density_snv.primary"}],
+            'comparators': [{'analysisRole': 'mutation burden (secondary)'}],
+            'images': [{'key': 'mutationBurden.density_snv.primary'}],
         }
         variants = []
 
@@ -456,8 +476,8 @@ def test_missing_mutation_burden(self):
             check_comparators(content, variants)
 
 
-@pytest.mark.parametrize("example_name", ["no_variants", "sm_and_exp", "sm_only"])
+@pytest.mark.parametrize('example_name', ['no_variants', 'sm_and_exp', 'sm_only'])
 def test_valid_json_inputs(example_name: str):
-    with open(os.path.join(DATA_DIR, "json_examples", f"{example_name}.json"), "r") as fh:
+    with open(os.path.join(DATA_DIR, 'json_examples', f'{example_name}.json'), 'r') as fh:
         content = json.load(fh)
     validate_report_content(content)
diff --git a/tests/test_ipr/test_ipr.py b/tests/test_ipr/test_ipr.py
index adadcf9e..3e9b01a3 100644
--- a/tests/test_ipr/test_ipr.py
+++ b/tests/test_ipr/test_ipr.py
@@ -10,196 +10,198 @@
     get_kb_matched_statements,
     get_kb_statement_matched_conditions,
     get_kb_variants,
+    get_kb_matches_sections,
+    create_key_alterations,
 )
 from pori_python.types import Statement
 
-DISEASE_RIDS = ["#138:12", "#138:13"]
-APPROVED_EVIDENCE_RIDS = ["approved1", "approved2"]
+DISEASE_RIDS = ['#138:12', '#138:13']
+APPROVED_EVIDENCE_RIDS = ['approved1', 'approved2']
 GERMLINE_VARIANTS = [
     {
-        "key": "1",
-        "germline": True,
-        "hgvsCds": "SLC28A3:c.1381C>T",
-        "hgvsGenomic": "chr9:g.84286011G>A",
-        "hgvsProtein": "SLC28A3:p.L461L",
-        "ncbiBuild": "GRCh38",
-        "normalAltCount": 37,
-        "normalDepth": 37,
-        "normalRefCount": 0,
-        "proteinChange": "p.L461L",
-        "rnaAltCount": "",
-        "rnaDepth": "",
-        "rnaRefCount": "",
-        "startPosition": 84286011,
-        "transcript": "ENST00000376238",
-        "tumourAltCount": "",
-        "tumourDepth": "",
-        "tumourRefCount": "",
-        "variant": "SLC28A3:p.L461L",
-        "variantType": "mut",
-        "zygosity": "",
+        'key': '1',
+        'germline': True,
+        'hgvsCds': 'SLC28A3:c.1381C>T',
+        'hgvsGenomic': 'chr9:g.84286011G>A',
+        'hgvsProtein': 'SLC28A3:p.L461L',
+        'ncbiBuild': 'GRCh38',
+        'normalAltCount': 37,
+        'normalDepth': 37,
+        'normalRefCount': 0,
+        'proteinChange': 'p.L461L',
+        'rnaAltCount': '',
+        'rnaDepth': '',
+        'rnaRefCount': '',
+        'startPosition': 84286011,
+        'transcript': 'ENST00000376238',
+        'tumourAltCount': '',
+        'tumourDepth': '',
+        'tumourRefCount': '',
+        'variant': 'SLC28A3:p.L461L',
+        'variantType': 'mut',
+        'zygosity': '',
     },
     {
-        "key": "2",
-        "germline": True,
-        "hgvsCds": "BRCA1:c.4837A>",
-        "hgvsGenomic": "chr17:g.43071077T>C",
-        "hgvsProtein": "BRCA1:p.S1613G",
-        "normalAltCount": 33,
-        "normalDepth": 33,
-        "normalRefCount": 0,
-        "tumourAltCount": 37,
-        "tumourDepth": 37,
-        "tumourRefCount": 0,
+        'key': '2',
+        'germline': True,
+        'hgvsCds': 'BRCA1:c.4837A>',
+        'hgvsGenomic': 'chr17:g.43071077T>C',
+        'hgvsProtein': 'BRCA1:p.S1613G',
+        'normalAltCount': 33,
+        'normalDepth': 33,
+        'normalRefCount': 0,
+        'tumourAltCount': 37,
+        'tumourDepth': 37,
+        'tumourRefCount': 0,
     },
 ]
 
 SOMATIC_VARIANTS = [
     {
-        "key": "1",
-        "gene": "SLC28A3",
-        "germline": False,
-        "hgvsCds": "SLC28A3:c.1381C>T",
-        "hgvsGenomic": "chr9:g.84286011G>A",
-        "hgvsProtein": "SLC28A3:p.L461L",
-        "ncbiBuild": "GRCh38",
-        "normalAltCount": 0,
-        "normalDepth": 37,
-        "normalRefCount": 37,
-        "tumourAltCount": 37,
-        "tumourDepth": 37,
-        "tumourRefCount": 0,
-        "variant": "SLC28A3:p.L461L",
-        "variantType": "mut",
-        "zygosity": "",
+        'key': '1',
+        'gene': 'SLC28A3',
+        'germline': False,
+        'hgvsCds': 'SLC28A3:c.1381C>T',
+        'hgvsGenomic': 'chr9:g.84286011G>A',
+        'hgvsProtein': 'SLC28A3:p.L461L',
+        'ncbiBuild': 'GRCh38',
+        'normalAltCount': 0,
+        'normalDepth': 37,
+        'normalRefCount': 37,
+        'tumourAltCount': 37,
+        'tumourDepth': 37,
+        'tumourRefCount': 0,
+        'variant': 'SLC28A3:p.L461L',
+        'variantType': 'mut',
+        'zygosity': '',
     },
     {
-        "key": "2",
-        "germline": False,
-        "hgvsCds": "BRCA1:c.4837A>",
-        "hgvsGenomic": "chr17:g.43071077T>C",
-        "hgvsProtein": "BRCA1:p.S1613G",
-        "normalAltCount": 1,
-        "normalDepth": 33,
-        "normalRefCount": 32,
-        "tumourAltCount": 37,
-        "tumourDepth": 37,
-        "tumourRefCount": 0,
+        'key': '2',
+        'germline': False,
+        'hgvsCds': 'BRCA1:c.4837A>',
+        'hgvsGenomic': 'chr17:g.43071077T>C',
+        'hgvsProtein': 'BRCA1:p.S1613G',
+        'normalAltCount': 1,
+        'normalDepth': 33,
+        'normalRefCount': 32,
+        'tumourAltCount': 37,
+        'tumourDepth': 37,
+        'tumourRefCount': 0,
     },
 ]
 
 GERMLINE_KB_MATCHES = [
     {
-        "variant": "1",
-        "approvedTherapy": False,
-        "category": "pharmacogenomic",
-        "context": "anthracyclines",
-        "kbContextId": "#122:20944",
-        "kbRelevanceId": "#147:38",
-        "kbStatementId": "#154:13387",
-        "kbVariant": "SLC28A3:c.1381C>T",
-        "kbVariantId": "#159:5426",
-        "matchedCancer": False,
-        "reference": "PMID: 27197003",
-        "relevance": "decreased toxicity",
-        "reviewStatus": "initial",
+        'variant': '1',
+        'approvedTherapy': False,
+        'category': 'pharmacogenomic',
+        'context': 'anthracyclines',
+        'kbContextId': '#122:20944',
+        'kbRelevanceId': '#147:38',
+        'kbStatementId': '#154:13387',
+        'kbVariant': 'SLC28A3:c.1381C>T',
+        'kbVariantId': '#159:5426',
+        'matchedCancer': False,
+        'reference': 'PMID: 27197003',
+        'relevance': 'decreased toxicity',
+        'reviewStatus': 'initial',
     },
     {
-        "variant": "2",
-        "approvedTherapy": True,
-        "category": "cancer predisposition",
-        "kbContextId": "#135:8764",
-        "kbRelevanceId": "#147:32",
-        "kbStatementId": "#155:13511",
-        "kbVariant": "BRCA1 mutation",
-        "kbVariantId": "#161:938",
-        "matchedCancer": False,
-        "reference": "MOAlmanac FDA-56",
-        "relevance": "therapy",
-        "reviewStatus": None,
+        'variant': '2',
+        'approvedTherapy': True,
+        'category': 'cancer predisposition',
+        'kbContextId': '#135:8764',
+        'kbRelevanceId': '#147:32',
+        'kbStatementId': '#155:13511',
+        'kbVariant': 'BRCA1 mutation',
+        'kbVariantId': '#161:938',
+        'matchedCancer': False,
+        'reference': 'MOAlmanac FDA-56',
+        'relevance': 'therapy',
+        'reviewStatus': None,
     },
 ]
 
 SOMATIC_KB_MATCHES = [
     {
-        "variant": "1",
-        "approvedTherapy": False,
-        "category": "prognostic",
-        "kbContextId": "somatic_test",
-        "kbRelevanceId": "#147:38",
-        "kbStatementId": "#154:13387",
-        "kbVariant": "SLC28A3:c.1381C>T",
-        "kbVariantId": "#159:5426",
-        "relevance": "prognostic",
-        "reviewStatus": "initial",
+        'variant': '1',
+        'approvedTherapy': False,
+        'category': 'prognostic',
+        'kbContextId': 'somatic_test',
+        'kbRelevanceId': '#147:38',
+        'kbStatementId': '#154:13387',
+        'kbVariant': 'SLC28A3:c.1381C>T',
+        'kbVariantId': '#159:5426',
+        'relevance': 'prognostic',
+        'reviewStatus': 'initial',
     },
     {
-        "variant": "2",
-        "approvedTherapy": True,
-        "category": "therapy",
-        "kbContextId": "#135:8764",
-        "kbRelevanceId": "#147:32",
-        "kbStatementId": "#155:13511",
-        "kbVariant": "BRCA1 mutation",
-        "kbVariantId": "#161:938",
-        "matchedCancer": False,
-        "reference": "MOAlmanac FDA-56",
-        "relevance": "therapy",
-        "reviewStatus": None,
+        'variant': '2',
+        'approvedTherapy': True,
+        'category': 'therapy',
+        'kbContextId': '#135:8764',
+        'kbRelevanceId': '#147:32',
+        'kbStatementId': '#155:13511',
+        'kbVariant': 'BRCA1 mutation',
+        'kbVariantId': '#161:938',
+        'matchedCancer': False,
+        'reference': 'MOAlmanac FDA-56',
+        'relevance': 'therapy',
+        'reviewStatus': None,
     },
 ]
 
 KB_MATCHES_STATEMENTS = [
     {
-        "@rid": SOMATIC_KB_MATCHES[0]["kbStatementId"],
-        "conditions": [
+        '@rid': SOMATIC_KB_MATCHES[0]['kbStatementId'],
+        'conditions': [
             {
-                "@class": "PositionalVariant",
-                "@rid": SOMATIC_KB_MATCHES[0]["kbVariantId"],
+                '@class': 'PositionalVariant',
+                '@rid': SOMATIC_KB_MATCHES[0]['kbVariantId'],
             },
-            {"@class": "CategoryVariant", "@rid": SOMATIC_KB_MATCHES[1]["kbVariantId"]},
-            {"@class": "Disease", "@rid": ""},
+            {'@class': 'CategoryVariant', '@rid': SOMATIC_KB_MATCHES[1]['kbVariantId']},
+            {'@class': 'Disease', '@rid': ''},
         ],
     },
     {
-        "@rid": SOMATIC_KB_MATCHES[1]["kbStatementId"],
-        "conditions": [
-            {"@class": "CategoryVariant", "@rid": SOMATIC_KB_MATCHES[1]["kbVariantId"]},
-            {"@class": "PositionalVariant", "@rid": "157:0", "type": "#999:99"},
+        '@rid': SOMATIC_KB_MATCHES[1]['kbStatementId'],
+        'conditions': [
+            {'@class': 'CategoryVariant', '@rid': SOMATIC_KB_MATCHES[1]['kbVariantId']},
+            {'@class': 'PositionalVariant', '@rid': '157:0', 'type': '#999:99'},
         ],
     },
 ]
 
 
-def base_graphkb_statement(disease_id: str = "disease", relevance_rid: str = "other") -> Statement:
+def base_graphkb_statement(disease_id: str = 'disease', relevance_rid: str = 'other') -> Statement:
     statement = Statement(  # type: ignore
         {
-            "conditions": [
+            'conditions': [
                 {
-                    "@class": "Disease",
-                    "@rid": disease_id,
-                    "displayName": "disease_display_name",
+                    '@class': 'Disease',
+                    '@rid': disease_id,
+                    'displayName': 'disease_display_name',
                 },
                 {
-                    "@class": "CategoryVariant",
-                    "@rid": "variant_rid",
-                    "displayName": "KRAS increased expression",
+                    '@class': 'CategoryVariant',
+                    '@rid': 'variant_rid',
+                    'displayName': 'KRAS increased expression',
                 },
             ],
-            "evidence": [],
-            "subject": {
-                "@class": "dummy_value",
-                "@rid": "101:010",
-                "displayName": "dummy_display_name",
+            'evidence': [{'displayName': 'pmid12345', 'sourceId': 'nct12345'}],
+            'subject': {
+                '@class': 'dummy_value',
+                '@rid': '101:010',
+                'displayName': 'dummy_display_name',
             },
-            "source": None,
-            "sourceId": None,
-            "relevance": {
-                "@rid": relevance_rid,
-                "displayName": "relevance_display_name",
-                "name": "relevance_name",
+            'source': None,
+            'sourceId': None,
+            'relevance': {
+                '@rid': relevance_rid,
+                'displayName': 'relevance_display_name',
+                'name': 'relevance_name',
             },
-            "@rid": "statement_rid",
+            '@rid': 'statement_rid',
         }
     )
     return statement
@@ -209,25 +211,25 @@ def base_graphkb_statement(disease_id: str = "disease", relevance_rid: str = "ot
 def graphkb_conn():
     # Mock for the 'query' method
     query_mock = Mock()
-    query_return_values = [[{"@rid": v} for v in APPROVED_EVIDENCE_RIDS]]
-    query_index = {"value": -1}  # Mutable index for closure
+    query_return_values = [[{'@rid': v} for v in APPROVED_EVIDENCE_RIDS]]
+    query_index = {'value': -1}  # Mutable index for closure
 
     def query_side_effect(*args, **kwargs):
         if args:
             # for TestGetKbDiseaseMatches
-            return [{"@rid": "#123:45"}]
-        query_index["value"] += 1
-        idx = query_index["value"]
+            return [{'@rid': '#123:45', 'name': 'name_for_#123:45'}]
+        query_index['value'] += 1
+        idx = query_index['value']
         return query_return_values[idx] if idx < len(query_return_values) else []
 
     query_mock.side_effect = query_side_effect
 
     # Mock for the 'post' method
-    post_mock = Mock(return_value={"result": KB_MATCHES_STATEMENTS})
+    post_mock = Mock(return_value={'result': KB_MATCHES_STATEMENTS})
 
     # 'get_source' remains a plain function
     def mock_get_source(source):
-        return {"@rid": 0}
+        return {'@rid': 0}
 
     # Create the connection mock with attributes
     conn = Mock()
@@ -243,8 +245,8 @@ def mock_get_source(source):
 
 @pytest.fixture(autouse=True)
 def mock_get_term_tree(monkeypatch):
-    mock_func = Mock(return_value=[{"@rid": d} for d in DISEASE_RIDS])
-    monkeypatch.setattr(gkb_vocab, "get_term_tree", mock_func)
+    mock_func = Mock(return_value=[{'@rid': d, 'name': 'name_of_' + d} for d in DISEASE_RIDS])
+    monkeypatch.setattr(gkb_vocab, 'get_term_tree', mock_func)
     yield mock_func
     mock_func.reset_mock()
 
@@ -252,9 +254,9 @@ def mock_get_term_tree(monkeypatch):
 @pytest.fixture(autouse=True)
 def get_terms_set(monkeypatch):
     def mock_func(*pos, **kwargs):
-        return {"#999:99"}
+        return {'#999:99'}
 
-    monkeypatch.setattr(gkb_vocab, "get_terms_set", mock_func)
+    monkeypatch.setattr(gkb_vocab, 'get_terms_set', mock_func)
 
 
 @pytest.fixture(autouse=True)
@@ -262,22 +264,22 @@ def mock_categorize_relevance(monkeypatch):
     def mock_func(_, relevance_id):
         return relevance_id
 
-    monkeypatch.setattr(gkb_statement, "categorize_relevance", mock_func)
+    monkeypatch.setattr(gkb_statement, 'categorize_relevance', mock_func)
 
 
 class TestGetKbDiseaseMatches:
     def test_get_kb_disease_matches_default_disease(self, graphkb_conn) -> None:
         get_kb_disease_matches(graphkb_conn)  # default to 'cancer'
         assert graphkb_conn.post.called
-        assert graphkb_conn.post.call_args_list[0].args[0] == "/subgraphs/Disease"
+        assert graphkb_conn.post.call_args_list[0].args[0] == '/subgraphs/Disease'
 
     def test_get_kb_disease_matches_disease_with_subgraphs(self, graphkb_conn) -> None:
-        get_kb_disease_matches(graphkb_conn, "Breast Cancer")
+        get_kb_disease_matches(graphkb_conn, 'Breast Cancer')
         assert graphkb_conn.post.called
-        assert graphkb_conn.post.call_args_list[0].args[0] == "/subgraphs/Disease"
+        assert graphkb_conn.post.call_args_list[0].args[0] == '/subgraphs/Disease'
 
     def test_get_kb_disease_matches_get_term_tree(self, graphkb_conn) -> None:
-        get_kb_disease_matches(graphkb_conn, "Breast Cancer", useSubgraphsRoute=False)
+        get_kb_disease_matches(graphkb_conn, 'Breast Cancer', useSubgraphsRoute=False)
         assert not graphkb_conn.post.called
 
 
@@ -285,210 +287,238 @@ class TestConvertStatementsToAlterations:
     def test_disease_match(self, graphkb_conn) -> None:
         statement = base_graphkb_statement(DISEASE_RIDS[0])
         result = convert_statements_to_alterations(
-            graphkb_conn, [statement], DISEASE_RIDS, {"variant_rid"}
+            graphkb_conn, [statement], DISEASE_RIDS, {'variant_rid'}
         )
 
         assert len(result) == 1
         row = result[0]
-        assert row["kbVariantId"] == "variant_rid"
-        assert row["kbStatementId"] == "statement_rid"
-        assert row["matchedCancer"]
-        assert row["kbVariant"] == "KRAS increased expression"
-        assert row["relevance"] == "relevance_display_name"
+        assert row['kbVariantId'] == 'variant_rid'
+        assert row['kbStatementId'] == 'statement_rid'
+        assert row['matchedCancer']
+        assert row['kbVariant'] == 'KRAS increased expression'
+        assert row['relevance'] == 'relevance_display_name'
 
     def test_no_disease_match(self, graphkb_conn) -> None:
-        statement = base_graphkb_statement("other")
+        statement = base_graphkb_statement('other')
         result = convert_statements_to_alterations(
-            graphkb_conn, [statement], DISEASE_RIDS, {"variant_rid"}
+            graphkb_conn, [statement], DISEASE_RIDS, {'variant_rid'}
         )
 
         assert len(result) == 1
         row = result[0]
-        assert not row["matchedCancer"]
+        assert not row['matchedCancer']
 
     def test_multiple_disease_not_match(self, graphkb_conn) -> None:
-        statement = base_graphkb_statement("disease")
-        statement["conditions"].append(
-            {"@class": "Disease", "@rid": "other", "displayName": "disease_display_name"}  # type: ignore
+        statement = base_graphkb_statement('disease')
+        statement['conditions'].append(
+            {'@class': 'Disease', '@rid': 'other', 'displayName': 'disease_display_name'}  # type: ignore
         )
         result = convert_statements_to_alterations(
-            graphkb_conn, [statement], DISEASE_RIDS, {"variant_rid"}
+            graphkb_conn, [statement], DISEASE_RIDS, {'variant_rid'}
         )
 
         assert len(result) == 1
         row = result[0]
-        assert not row["matchedCancer"]
+        assert not row['matchedCancer']
 
     def test_biological(self, graphkb_conn) -> None:
         statement = base_graphkb_statement()
-        statement["relevance"]["@rid"] = "biological"
+        statement['relevance']['@rid'] = 'biological'
 
         result = convert_statements_to_alterations(
-            graphkb_conn, [statement], DISEASE_RIDS, {"variant_rid"}
+            graphkb_conn, [statement], DISEASE_RIDS, {'variant_rid'}
         )
         assert len(result) == 1
         row = result[0]
-        assert row["category"] == "biological"
+        assert row['category'] == 'biological'
 
     def test_prognostic_no_disease_match(self, graphkb_conn) -> None:
         statement = base_graphkb_statement()
-        statement["relevance"]["@rid"] = "prognostic"
+        statement['relevance']['@rid'] = 'prognostic'
 
         result = convert_statements_to_alterations(
-            graphkb_conn, [statement], DISEASE_RIDS, {"variant_rid"}
+            graphkb_conn, [statement], DISEASE_RIDS, {'variant_rid'}
         )
         assert len(result) == 0
 
     def test_prognostic_disease_match(self, graphkb_conn) -> None:
         statement = base_graphkb_statement(DISEASE_RIDS[0])
-        statement["relevance"]["@rid"] = "prognostic"
+        statement['relevance']['@rid'] = 'prognostic'
 
         result = convert_statements_to_alterations(
-            graphkb_conn, [statement], DISEASE_RIDS, {"variant_rid"}
+            graphkb_conn, [statement], DISEASE_RIDS, {'variant_rid'}
         )
         assert len(result) == 1
         row = result[0]
-        assert row["category"] == "prognostic"
+        assert row['category'] == 'prognostic'
 
     def test_diagnostic(self, graphkb_conn) -> None:
         statement = base_graphkb_statement()
-        statement["relevance"]["@rid"] = "diagnostic"
+        statement['relevance']['@rid'] = 'diagnostic'
 
         result = convert_statements_to_alterations(
-            graphkb_conn, [statement], DISEASE_RIDS, {"variant_rid"}
+            graphkb_conn, [statement], DISEASE_RIDS, {'variant_rid'}
         )
         assert len(result) == 1
         row = result[0]
-        assert row["category"] == "diagnostic"
+        assert row['category'] == 'diagnostic'
 
-    @patch("pori_python.ipr.ipr.get_evidencelevel_mapping")
+    def test_reference_from_displayname_for_noneligibility_stmts(self, graphkb_conn) -> None:
+        statement = base_graphkb_statement()
+
+        result = convert_statements_to_alterations(
+            graphkb_conn, [statement], DISEASE_RIDS, {'variant_rid'}
+        )
+        assert len(result) == 1
+        row = result[0]
+        assert row['reference'] == 'pmid12345'
+
+    def test_reference_from_sourceid_for_eligibility_stmts(self, graphkb_conn) -> None:
+        statement = base_graphkb_statement()
+        statement['relevance']['name'] = 'eligibility'
+
+        result = convert_statements_to_alterations(
+            graphkb_conn, [statement], DISEASE_RIDS, {'variant_rid'}
+        )
+        assert len(result) == 1
+        row = result[0]
+        assert row['reference'] == 'nct12345'
+
+    @patch('pori_python.ipr.ipr.get_evidencelevel_mapping')
     def test_unapproved_therapeutic(self, mock_get_evidencelevel_mapping, graphkb_conn) -> None:
-        mock_get_evidencelevel_mapping.return_value = {"other": "test"}
+        mock_get_evidencelevel_mapping.return_value = {'other': 'test'}
 
         statement = base_graphkb_statement()
-        statement["relevance"]["@rid"] = "therapeutic"
-        statement["evidenceLevel"] = [{"@rid": "other", "displayName": "level"}]  # type: ignore
+        statement['relevance']['@rid'] = 'therapeutic'
+        statement['evidenceLevel'] = [{'@rid': 'other', 'displayName': 'level'}]  # type: ignore
 
         result = convert_statements_to_alterations(
-            graphkb_conn, [statement], DISEASE_RIDS, {"variant_rid"}
+            graphkb_conn, [statement], DISEASE_RIDS, {'variant_rid'}
         )
         assert len(result) == 1
         row = result[0]
-        assert row["category"] == "therapeutic"
+        assert row['category'] == 'therapeutic'
 
-    @patch("pori_python.ipr.ipr.get_evidencelevel_mapping")
+    @patch('pori_python.ipr.ipr.get_evidencelevel_mapping')
     def test_approved_therapeutic(self, mock_get_evidencelevel_mapping, graphkb_conn) -> None:
-        mock_get_evidencelevel_mapping.return_value = {APPROVED_EVIDENCE_RIDS[0]: "test"}
+        mock_get_evidencelevel_mapping.return_value = {APPROVED_EVIDENCE_RIDS[0]: 'test'}
 
         statement = base_graphkb_statement()
-        statement["relevance"]["@rid"] = "therapeutic"
-        statement["evidenceLevel"] = [{"@rid": APPROVED_EVIDENCE_RIDS[0], "displayName": "level"}]  # type: ignore
+        statement['relevance']['@rid'] = 'therapeutic'
+        statement['evidenceLevel'] = [{'@rid': APPROVED_EVIDENCE_RIDS[0], 'displayName': 'level'}]  # type: ignore
 
         result = convert_statements_to_alterations(
-            graphkb_conn, [statement], DISEASE_RIDS, {"variant_rid"}
+            graphkb_conn, [statement], DISEASE_RIDS, {'variant_rid'}
         )
         assert len(result) == 1
         row = result[0]
-        assert row["category"] == "therapeutic"
+        assert row['category'] == 'therapeutic'
 
 
 class TestKbmatchFilters:
     def test_germline_kb_matches(self):
         assert len(germline_kb_matches(GERMLINE_KB_MATCHES, GERMLINE_VARIANTS)) == len(
             GERMLINE_KB_MATCHES
-        ), "Germline variant not matched to germline KB statement."
-        assert not germline_kb_matches(
-            GERMLINE_KB_MATCHES, SOMATIC_VARIANTS
-        ), "Somatic variant matched to KB germline statement."
+        ), 'Germline variant not matched to germline KB statement.'
+        assert not germline_kb_matches(GERMLINE_KB_MATCHES, SOMATIC_VARIANTS), (
+            'Somatic variant matched to KB germline statement.'
+        )
         assert len(germline_kb_matches(SOMATIC_KB_MATCHES, SOMATIC_VARIANTS)) == len(
             SOMATIC_KB_MATCHES
-        ), "Somatic variant not matched to somatic KB statement."
-        assert not germline_kb_matches(
-            SOMATIC_KB_MATCHES, GERMLINE_VARIANTS
-        ), "Germline variant matched to KB somatic statement."
+        ), 'Somatic variant not matched to somatic KB statement.'
+        assert not germline_kb_matches(SOMATIC_KB_MATCHES, GERMLINE_VARIANTS), (
+            'Germline variant matched to KB somatic statement.'
+        )
 
 
 GKB_MATCHES = [
     {
-        "variant": "1",
-        "approvedTherapy": False,
-        "category": "prognostic",
-        "kbContextId": "somatic_test",
-        "kbRelevanceId": "#147:38",
-        "kbStatementId": "#154:13387",
-        "requiredKbMatches": ["#159:5426"],
-        "kbVariant": "SLC28A3:c.1381C>T",
-        "kbVariantId": "#159:5426",
-        "relevance": "prognostic",
-        "variantType": "mut",
-        "reviewStatus": "initial",
+        'variant': '1',
+        'approvedTherapy': False,
+        'category': 'prognostic',
+        'kbContextId': 'somatic_test',
+        'kbRelevanceId': '#147:38',
+        'kbStatementId': '#154:13387',
+        'requiredKbMatches': ['#159:5426'],
+        'kbVariant': 'SLC28A3:c.1381C>T',
+        'kbVariantId': '#159:5426',
+        'relevance': 'prognostic',
+        'variantType': 'mut',
+        'reviewStatus': 'initial',
     },
     {
-        "variant": "2",
-        "approvedTherapy": True,
-        "category": "therapy",
-        "kbContextId": "#135:8764",
-        "kbRelevanceId": "#147:32",
-        "kbStatementId": "#155:13511",
-        "requiredKbMatches": ["#161:938"],
-        "kbVariant": "BRCA1 mutation",
-        "kbVariantId": "#161:938",
-        "matchedCancer": False,
-        "reference": "MOAlmanac FDA-56",
-        "relevance": "therapy",
-        "variantType": "mut",
-        "reviewStatus": None,
+        'variant': '2',
+        'approvedTherapy': True,
+        'category': 'therapy',
+        'kbContextId': '#135:8764',
+        'kbRelevanceId': '#147:32',
+        'kbStatementId': '#155:13511',
+        'requiredKbMatches': ['#161:938'],
+        'kbVariant': 'BRCA1 mutation',
+        'kbVariantId': '#161:938',
+        'matchedCancer': False,
+        'reference': 'MOAlmanac FDA-56',
+        'relevance': 'therapy',
+        'variantType': 'mut',
+        'reviewStatus': None,
     },
     {
-        "variant": "3",
-        "approvedTherapy": True,
-        "category": "therapy",
-        "kbContextId": "#135:8764",
-        "kbRelevanceId": "#147:32",
-        "kbStatementId": "#155:13511",
-        "requiredKbMatches": ["#161:938"],
-        "kbVariant": "BRCA1 mutation",
-        "kbVariantId": "#161:938",
-        "matchedCancer": False,
-        "reference": "MOAlmanac FDA-56",
-        "relevance": "therapy",
-        "variantType": "mut",
-        "reviewStatus": None,
+        'variant': '3',
+        'approvedTherapy': True,
+        'category': 'therapy',
+        'kbContextId': '#135:8764',
+        'kbRelevanceId': '#147:32',
+        'kbStatementId': '#155:13511',
+        'requiredKbMatches': ['#161:938'],
+        'kbVariant': 'BRCA1 mutation',
+        'kbVariantId': '#161:938',
+        'matchedCancer': False,
+        'reference': 'MOAlmanac FDA-56',
+        'relevance': 'therapy',
+        'variantType': 'mut',
+        'reviewStatus': None,
     },
     {
-        "variant": "4",
-        "approvedTherapy": True,
-        "category": "therapy",
-        "kbContextId": "#135:8764",
-        "kbRelevanceId": "#147:32",
-        "kbStatementId": "#155:13511",
-        "requiredKbMatches": ["#159:5426", "#161:938"],
-        "kbVariant": "BRCA1 mutation",
-        "kbVariantId": "#161:938",
-        "matchedCancer": False,
-        "reference": "MOAlmanac FDA-56",
-        "relevance": "therapy",
-        "variantType": "mut",
-        "reviewStatus": None,
+        'variant': '4',
+        'approvedTherapy': True,
+        'category': 'therapy',
+        'kbContextId': '#135:8764',
+        'kbRelevanceId': '#147:32',
+        'kbStatementId': '#155:13511',
+        'requiredKbMatches': ['#159:54261', '#161:9381'],
+        'kbVariant': 'BRCA1 mutation',
+        'kbVariantId': '#161:9381',
+        'matchedCancer': False,
+        'reference': 'MOAlmanac FDA-56',
+        'relevance': 'therapy',
+        'variantType': 'mut',
+        'reviewStatus': None,
     },
 ]
 
+ALL_VARIANTS = [
+    {'variant': 'var1', 'key': '1', 'variantType': 'mut'},
+    {'variant': 'var2', 'key': '2', 'variantType': 'mut'},
+    {'variant': 'var3', 'key': '3', 'variantType': 'mut'},
+    {'variant': 'var4', 'key': '4', 'variantType': 'mut'},
+]
+
 BASIC_GKB_MATCH = {
-    "approvedTherapy": False,
-    "category": "test",
-    "context": "test",
-    "kbContextId": "#124:24761",
-    "disease": "test",
-    "evidenceLevel": "test",
-    "iprEvidenceLevel": "test",
-    "matchedCancer": False,
-    "reference": "test",
-    "relevance": "test",
-    "kbRelevanceId": "#148:31",
-    "externalSource": "",
-    "externalStatementId": "",
-    "reviewStatus": "passed",
-    "kbData": {},
+    'approvedTherapy': False,
+    'category': 'test',
+    'context': 'test',
+    'kbContextId': '#124:24761',
+    'disease': 'test',
+    'evidenceLevel': 'test',
+    'iprEvidenceLevel': 'test',
+    'matchedCancer': False,
+    'reference': 'test',
+    'relevance': 'test',
+    'kbRelevanceId': '#148:31',
+    'externalSource': '',
+    'externalStatementId': '',
+    'reviewStatus': 'passed',
+    'kbData': {},
 }
 
 
@@ -503,13 +533,13 @@ def create_gkb_matches(input_fields):
 
 def get_condition_set_string_rep(condition_set):
     for item in condition_set:
-        item["observedKeysStrs"] = [
-            "-".join([elem["kbVariantId"], elem["observedVariantKey"]])
-            for elem in item["matchedConditions"]
+        item['observedKeysStrs'] = [
+            '-'.join([elem['kbVariantId'], elem['observedVariantKey']])
+            for elem in item['matchedConditions']
         ]
-        item["observedKeysStrs"].sort()
-        item["observedKeysStr"] = ",".join(item["observedKeysStrs"])
-    condition_set = [f"{item['kbStatementId']},{item['observedKeysStr']}" for item in condition_set]
+        item['observedKeysStrs'].sort()
+        item['observedKeysStr'] = ','.join(item['observedKeysStrs'])
+    condition_set = [f'{item["kbStatementId"]},{item["observedKeysStr"]}' for item in condition_set]
     condition_set.sort()
     return condition_set
 
@@ -517,132 +547,132 @@ def get_condition_set_string_rep(condition_set):
 class TestKbMatchSectionPrep:
     def test_matched_variant_pairs_extracted_only_once_for_multiple_statements(self):
         input_fields = [
-            {"variant": "A", "kbVariantId": "test1", "kbStatementId": "test1"},
+            {'variant': 'A', 'kbVariantId': 'test1', 'kbStatementId': 'test1'},
             {
-                "variant": "A",
-                "kbVariantId": "test1",
-                "kbStatementId": "test2",
+                'variant': 'A',
+                'kbVariantId': 'test1',
+                'kbStatementId': 'test2',
             },  # diff statement
         ]
         for item in input_fields:  # we don't care about these for this test
-            item["variantType"] = "test"
-            item["kbVariant"] = "test"
-            item["requiredKbMatches"] = ["test1", "test2"]
+            item['variantType'] = 'test'
+            item['kbVariant'] = 'test'
+            item['requiredKbMatches'] = ['test1', 'test2']
         gkb_matches = create_gkb_matches(input_fields)
         kb_variants = get_kb_variants(gkb_matches)
-        found_variants = [f"{item['variant']},{item['kbVariantId']}" for item in kb_variants]
+        found_variants = [f'{item["variant"]},{item["kbVariantId"]}' for item in kb_variants]
         found_variants.sort()
-        assert found_variants == ["A,test1"]
+        assert found_variants == ['A,test1']
 
     def test_all_distinct_observed_and_matched_variant_pairs_extracted(self):
         input_fields = [
-            {"variant": "A", "kbVariantId": "test1", "kbStatementId": "test1"},
-            {"variant": "A", "kbVariantId": "test2", "kbStatementId": "test1"},
-            {"variant": "B", "kbVariantId": "test1", "kbStatementId": "test1"},
-            {"variant": "B", "kbVariantId": "test1", "kbStatementId": "test1"},
+            {'variant': 'A', 'kbVariantId': 'test1', 'kbStatementId': 'test1'},
+            {'variant': 'A', 'kbVariantId': 'test2', 'kbStatementId': 'test1'},
+            {'variant': 'B', 'kbVariantId': 'test1', 'kbStatementId': 'test1'},
+            {'variant': 'B', 'kbVariantId': 'test1', 'kbStatementId': 'test1'},
         ]
         for item in input_fields:  # we don't care about these for this test
-            item["variantType"] = "test"
-            item["kbVariant"] = "test"
-            item["requiredKbMatches"] = ["test1", "test2"]
+            item['variantType'] = 'test'
+            item['kbVariant'] = 'test'
+            item['requiredKbMatches'] = ['test1', 'test2']
         gkb_matches = create_gkb_matches(input_fields)
         kb_variants = get_kb_variants(gkb_matches)
-        found_variants = [f"{item['variant']},{item['kbVariantId']}" for item in kb_variants]
+        found_variants = [f'{item["variant"]},{item["kbVariantId"]}' for item in kb_variants]
         found_variants.sort()
-        assert found_variants == ["A,test1", "A,test2", "B,test1"]
+        assert found_variants == ['A,test1', 'A,test2', 'B,test1']
 
     def test_statements_extracted_only_once(self):
         input_fields = [
-            {"variant": "A", "kbVariantId": "test1", "kbStatementId": "X"},
-            {"variant": "A", "kbVariantId": "test2", "kbStatementId": "X"},
-            {"variant": "B", "kbVariantId": "test1", "kbStatementId": "X"},
-            {"variant": "B", "kbVariantId": "test2", "kbStatementId": "X"},
-            {"variant": "C", "kbVariantId": "test1", "kbStatementId": "Y"},
+            {'variant': 'A', 'kbVariantId': 'test1', 'kbStatementId': 'X'},
+            {'variant': 'A', 'kbVariantId': 'test2', 'kbStatementId': 'X'},
+            {'variant': 'B', 'kbVariantId': 'test1', 'kbStatementId': 'X'},
+            {'variant': 'B', 'kbVariantId': 'test2', 'kbStatementId': 'X'},
+            {'variant': 'C', 'kbVariantId': 'test1', 'kbStatementId': 'Y'},
         ]
         for item in input_fields:  # we don't care about these for this test
-            item["variantType"] = "test"
-            item["kbVariant"] = "test"
-            item["requiredKbMatches"] = ["test1", "test2"]
+            item['variantType'] = 'test'
+            item['kbVariant'] = 'test'
+            item['requiredKbMatches'] = ['test1', 'test2']
         gkb_matches = create_gkb_matches(input_fields)
         kb_stmts = get_kb_matched_statements(gkb_matches)
-        kb_stmts = [item["kbStatementId"] for item in kb_stmts]
+        kb_stmts = [item['kbStatementId'] for item in kb_stmts]
         kb_stmts.sort()
-        assert kb_stmts == ["X", "Y"]
+        assert kb_stmts == ['X', 'Y']
 
     def test_singlevar_statements_with_multiple_satisfying_condition_sets(self):
         input_fields = [
-            {"variant": "A", "kbVariantId": "test1", "kbStatementId": "X"},
-            {"variant": "B", "kbVariantId": "test1", "kbStatementId": "X"},
-            {"variant": "C", "kbVariantId": "test1", "kbStatementId": "X"},
+            {'variant': 'A', 'kbVariantId': 'test1', 'kbStatementId': 'X'},
+            {'variant': 'B', 'kbVariantId': 'test1', 'kbStatementId': 'X'},
+            {'variant': 'C', 'kbVariantId': 'test1', 'kbStatementId': 'X'},
         ]
         for item in input_fields:  # we don't care about these for this test
-            item["variantType"] = "test"
-            item["kbVariant"] = "test"
-            item["requiredKbMatches"] = ["test1"]
+            item['variantType'] = 'test'
+            item['kbVariant'] = 'test'
+            item['requiredKbMatches'] = ['test1']
         gkb_matches = create_gkb_matches(input_fields)
         kbcs = get_kb_statement_matched_conditions(gkb_matches)
         kbcs_string_rep = get_condition_set_string_rep(kbcs)
-        assert kbcs_string_rep == ["X,test1-A", "X,test1-B", "X,test1-C"]
+        assert kbcs_string_rep == ['X,test1-A', 'X,test1-B', 'X,test1-C']
 
     def test_multivar_statements_with_multiple_satisfying_condition_sets(self):
         input_fields = [
-            {"variant": "A", "kbVariantId": "test1", "kbStatementId": "X"},
-            {"variant": "B", "kbVariantId": "test2", "kbStatementId": "X"},
-            {"variant": "C", "kbVariantId": "test2", "kbStatementId": "X"},
+            {'variant': 'A', 'kbVariantId': 'test1', 'kbStatementId': 'X'},
+            {'variant': 'B', 'kbVariantId': 'test2', 'kbStatementId': 'X'},
+            {'variant': 'C', 'kbVariantId': 'test2', 'kbStatementId': 'X'},
         ]
         for item in input_fields:  # we don't care about these for this test
-            item["variantType"] = "test"
-            item["kbVariant"] = "test"
-            item["requiredKbMatches"] = ["test1", "test2"]
+            item['variantType'] = 'test'
+            item['kbVariant'] = 'test'
+            item['requiredKbMatches'] = ['test1', 'test2']
         gkb_matches = create_gkb_matches(input_fields)
         kbcs = get_kb_statement_matched_conditions(gkb_matches)
         kbcs_string_rep = get_condition_set_string_rep(kbcs)
-        assert kbcs_string_rep == ["X,test1-A,test2-B", "X,test1-A,test2-C"]
+        assert kbcs_string_rep == ['X,test1-A,test2-B', 'X,test1-A,test2-C']
 
     def test_do_not_infer_possible_matches(self):
         """edge case - when infer_possible_matches is false, do not allow var/kbvar
         pairs to satisfy conditions for statements they are not explicitly linked
         to in the input"""
         input_fields = [
-            {"variant": "A", "kbVariantId": "test1", "kbStatementId": "X"},
-            {"variant": "B", "kbVariantId": "test1", "kbStatementId": "Y"},
+            {'variant': 'A', 'kbVariantId': 'test1', 'kbStatementId': 'X'},
+            {'variant': 'B', 'kbVariantId': 'test1', 'kbStatementId': 'Y'},
         ]
         for item in input_fields:  # we don't care about these for this test
-            item["variantType"] = "test"
-            item["kbVariant"] = "test"
-            item["requiredKbMatches"] = ["test1"]
+            item['variantType'] = 'test'
+            item['kbVariant'] = 'test'
+            item['requiredKbMatches'] = ['test1']
         gkb_matches = create_gkb_matches(input_fields)
         kbcs = get_kb_statement_matched_conditions(gkb_matches)
         kbcs_string_rep = get_condition_set_string_rep(kbcs)
-        assert kbcs_string_rep == ["X,test1-A", "Y,test1-B"]
+        assert kbcs_string_rep == ['X,test1-A', 'Y,test1-B']
 
     def test_no_dupes_when_requiredKbMatches_not_sorted(self):
         input_fields = [
             {
-                "variant": "A",
-                "kbVariantId": "test1",
-                "requiredKbMatches": ["test1", "test2"],
+                'variant': 'A',
+                'kbVariantId': 'test1',
+                'requiredKbMatches': ['test1', 'test2'],
             },
             {
-                "variant": "B",
-                "kbVariantId": "test2",
-                "requiredKbMatches": ["test1", "test2"],
+                'variant': 'B',
+                'kbVariantId': 'test2',
+                'requiredKbMatches': ['test1', 'test2'],
             },
             {
-                "variant": "A",
-                "kbVariantId": "test1",
-                "requiredKbMatches": ["test2", "test1"],
+                'variant': 'A',
+                'kbVariantId': 'test1',
+                'requiredKbMatches': ['test2', 'test1'],
             },
             {
-                "variant": "B",
-                "kbVariantId": "test2",
-                "requiredKbMatches": ["test2", "test1"],
+                'variant': 'B',
+                'kbVariantId': 'test2',
+                'requiredKbMatches': ['test2', 'test1'],
             },
         ]
         for item in input_fields:  # we don't care about these for this test
-            item["variantType"] = "test"
-            item["kbVariant"] = "test"
-            item["kbStatementId"] = "X"
+            item['variantType'] = 'test'
+            item['kbVariant'] = 'test'
+            item['kbStatementId'] = 'X'
         gkb_matches = create_gkb_matches(input_fields)
         stmts = get_kb_matched_statements(gkb_matches)
         kbcs = get_kb_statement_matched_conditions(gkb_matches)
@@ -654,33 +684,36 @@ def test_partial_matches_omitted(self):
         are omitted when allow_partial_matches=False"""
         input_fields = [
             {
-                "variant": "A",
-                "kbVariantId": "test1",
-                "kbStatementId": "X",
-                "requiredKbMatches": ["test1", "test2"],
+                'variant': 'A',
+                'kbVariantId': 'test1',
+                'kbStatementId': 'X',
+                'requiredKbMatches': ['test1', 'test2'],
             },
             {
-                "variant": "B",
-                "kbVariantId": "test2",
-                "kbStatementId": "X",
-                "requiredKbMatches": ["test1", "test2"],
+                'variant': 'B',
+                'kbVariantId': 'test2',
+                'kbStatementId': 'X',
+                'requiredKbMatches': ['test1', 'test2'],
             },
             {
-                "variant": "A",
-                "kbVariantId": "test1",
-                "kbStatementId": "Y",
-                "requiredKbMatches": ["test1", "test3"],
+                'variant': 'A',
+                'kbVariantId': 'test1',
+                'kbStatementId': 'Y',
+                'requiredKbMatches': ['test1', 'test3'],
             },
         ]
         for item in input_fields:  # we don't care about these for this test
-            item["variantType"] = "test"
-            item["kbVariant"] = "test"
+            item['variantType'] = 'test'
+            item['kbVariant'] = 'test'
+
         gkb_matches = create_gkb_matches(input_fields)
-        stmts = get_kb_matched_statements(gkb_matches)
-        kbcs = get_kb_statement_matched_conditions(gkb_matches)
+        sections = get_kb_matches_sections(gkb_matches, allow_partial_matches=False)
+
+        stmts = sections['kbMatchedStatements']
+        kbcs = sections['kbStatementMatchedConditions']
         assert len(stmts) == 2
         assert len(kbcs) == 1  # X only
-        assert kbcs[0]["kbStatementId"] == "X"
+        assert kbcs[0]['kbStatementId'] == 'X'
 
     def test_partial_matches_omitted_even_when_var_used_elsewhere(self):
         """edge case -
@@ -693,68 +726,133 @@ def test_partial_matches_omitted_even_when_var_used_elsewhere(self):
         satisfied for statement Z"""
         input_fields = [
             {
-                "variant": "A",
-                "kbVariantId": "test1",
-                "kbStatementId": "X",
-                "requiredKbMatches": ["test1", "test2"],
+                'variant': 'A',
+                'kbVariantId': 'test1',
+                'kbStatementId': 'X',
+                'requiredKbMatches': ['test1', 'test2'],
             },
             {
-                "variant": "B",
-                "kbVariantId": "test2",
-                "kbStatementId": "X",
-                "requiredKbMatches": ["test1", "test2"],
+                'variant': 'B',
+                'kbVariantId': 'test2',
+                'kbStatementId': 'X',
+                'requiredKbMatches': ['test1', 'test2'],
             },
             {
-                "variant": "A",
-                "kbVariantId": "test1",
-                "kbStatementId": "Y",
-                "requiredKbMatches": ["test1", "test3"],
+                'variant': 'A',
+                'kbVariantId': 'test1',
+                'kbStatementId': 'Y',
+                'requiredKbMatches': ['test1', 'test3'],
             },
             {
-                "variant": "C",
-                "kbVariantId": "test3",
-                "kbStatementId": "Z",
-                "requiredKbMatches": ["test3"],
+                'variant': 'C',
+                'kbVariantId': 'test3',
+                'kbStatementId': 'Z',
+                'requiredKbMatches': ['test3'],
             },
         ]
         for item in input_fields:  # we don't care about these for this test
-            item["variantType"] = "test"
-            item["kbVariant"] = "test"
+            item['variantType'] = 'test'
+            item['kbVariant'] = 'test'
         gkb_matches = create_gkb_matches(input_fields)
         stmts = get_kb_matched_statements(gkb_matches)
         kbcs = get_kb_statement_matched_conditions(gkb_matches)
         assert len(stmts) == 3
         assert len(kbcs) == 2  # X and Z but not Y
-        assert "Y" not in [item["kbStatementId"] for item in kbcs]
+        assert 'Y' not in [item['kbStatementId'] for item in kbcs]
+
+    def test_kbvariants_removed_from_set_when_not_part_of_full_conditionset_match(self):
+        """When there is a variant that fulfills one part of a statement's condition set,
+        but isn't part of any fully satisfied condition set,
+        the kbvariant record should be removed from the kbvariants list
+        """
+        input_fields = [
+            {
+                'variant': 'A',
+                'kbVariantId': 'test1',
+                'kbStatementId': 'X',
+                'requiredKbMatches': ['test1', 'test2', 'test3'],
+            },
+            {
+                'variant': 'B',
+                'kbVariantId': 'test2',
+                'kbStatementId': 'X',
+                'requiredKbMatches': ['test1', 'test2', 'test3'],
+            },
+            {
+                'variant': 'A',
+                'kbVariantId': 'test1',
+                'kbStatementId': 'Y',
+                'requiredKbMatches': ['test4', 'test1'],
+            },
+            {
+                'variant': 'D',
+                'kbVariantId': 'test4',
+                'kbStatementId': 'Y',
+                'requiredKbMatches': ['test4', 'test1'],
+            },
+        ]
+        for item in input_fields:  # we don't care about these for this test
+            item['variantType'] = 'test'
+            item['kbVariant'] = 'test'
+        gkb_matches = create_gkb_matches(input_fields)
+        sections1 = get_kb_matches_sections(gkb_matches, allow_partial_matches=False)
+        kbcs1 = sections1['kbStatementMatchedConditions']
+        kbvars1 = sections1['kbMatches']
+        assert len(kbcs1) == 1  # only fully matched condition sets included
+        assert len(kbvars1) == 2  # therefore, kbvars associated with stmt X are pruned
+
+        sections2 = get_kb_matches_sections(gkb_matches, allow_partial_matches=True)
+        kbcs2 = sections2['kbStatementMatchedConditions']
+        kbvars2 = sections2['kbMatches']
+        assert len(kbcs2) == 2  # all condition sets included
+        assert len(kbvars2) == 3  # therefore, no pruning
 
     def test_partial_matches_included(self):
         """check statements that are only partially supported
         are included when allow_partial_matches=True"""
         input_fields = [
             {
-                "variant": "A",
-                "kbVariantId": "test1",
-                "kbStatementId": "X",
-                "requiredKbMatches": ["test1", "test2"],
+                'variant': 'A',
+                'kbVariantId': 'test1',
+                'kbStatementId': 'X',
+                'requiredKbMatches': ['test1', 'test2'],
             },
             {
-                "variant": "B",
-                "kbVariantId": "test2",
-                "kbStatementId": "X",
-                "requiredKbMatches": ["test1", "test2"],
+                'variant': 'B',
+                'kbVariantId': 'test2',
+                'kbStatementId': 'X',
+                'requiredKbMatches': ['test1', 'test2'],
             },
             {
-                "variant": "A",
-                "kbVariantId": "test1",
-                "kbStatementId": "Y",
-                "requiredKbMatches": ["test1", "test3"],
+                'variant': 'A',
+                'kbVariantId': 'test1',
+                'kbStatementId': 'Y',
+                'requiredKbMatches': ['test1', 'test3'],
             },
         ]
         for item in input_fields:  # we don't care about these for this test
-            item["variantType"] = "test"
-            item["kbVariant"] = "test"
+            item['variantType'] = 'test'
+            item['kbVariant'] = 'test'
         gkb_matches = create_gkb_matches(input_fields)
         stmts = get_kb_matched_statements(gkb_matches)
         kbcs = get_kb_statement_matched_conditions(gkb_matches, allow_partial_matches=True)
         assert len(stmts) == 2  # X and Y
         assert len(kbcs) == 2
+
+    def test_create_key_alterations_includes_only_pruned_kbmatches(self):
+        gkb_matches = create_gkb_matches(GKB_MATCHES)
+
+        sections1 = get_kb_matches_sections(gkb_matches, allow_partial_matches=False)
+        key_alts1, counts1 = create_key_alterations(
+            gkb_matches, ALL_VARIANTS, sections1['kbMatches']
+        )
+
+        sections2 = get_kb_matches_sections(gkb_matches, allow_partial_matches=True)
+        key_alts2, counts2 = create_key_alterations(
+            gkb_matches, ALL_VARIANTS, sections2['kbMatches']
+        )
+
+        # check partial-match-only variants are not included in key alterations when
+        # partial matches is false
+        assert len(key_alts1) == 3
+        assert len(key_alts2) == 4
diff --git a/tests/test_ipr/test_main.py b/tests/test_ipr/test_main.py
index f4e9e788..8fe585cd 100644
--- a/tests/test_ipr/test_main.py
+++ b/tests/test_ipr/test_main.py
@@ -12,98 +12,108 @@
 
 from .constants import EXCLUDE_INTEGRATION_TESTS
 
-EXCLUDE_BCGSC_TESTS = os.environ.get("EXCLUDE_BCGSC_TESTS") == "1"
-EXCLUDE_ONCOKB_TESTS = os.environ.get("EXCLUDE_ONCOKB_TESTS") == "1"
+EXCLUDE_BCGSC_TESTS = os.environ.get('EXCLUDE_BCGSC_TESTS') == '1'
+EXCLUDE_ONCOKB_TESTS = os.environ.get('EXCLUDE_ONCOKB_TESTS') == '1'
 
 
 def get_test_spec():
-    ipr_spec = {"components": {"schemas": {"genesCreate": {"properties": {}}}}}
+    ipr_spec = {'components': {'schemas': {'genesCreate': {'properties': {}}}}}
     ipr_gene_keys = IprGene.__required_keys__ | IprGene.__optional_keys__
     for key in ipr_gene_keys:
-        ipr_spec["components"]["schemas"]["genesCreate"]["properties"][key] = ""
+        ipr_spec['components']['schemas']['genesCreate']['properties'][key] = ''
     return ipr_spec
 
 
 def get_test_file(name: str) -> str:
-    return os.path.join(os.path.dirname(__file__), "test_data", name)
+    return os.path.join(os.path.dirname(__file__), 'test_data', name)
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope='module')
 def report_upload_content(tmp_path_factory) -> Dict:
     mock = MagicMock()
-    json_file = tmp_path_factory.mktemp("inputs") / "content.json"
+    json_file = tmp_path_factory.mktemp('inputs') / 'content.json'
     json_file.write_text(
         json.dumps(
             {
-                "blargh": "some fake content",
-                "comparators": [
-                    {"analysisRole": "expression (disease)", "name": "1"},
-                    {"analysisRole": "expression (primary site)", "name": "2"},
-                    {"analysisRole": "expression (biopsy site)", "name": "3"},
+                'blargh': 'some fake content',
+                'comparators': [
+                    {'analysisRole': 'expression (disease)', 'name': '1'},
+                    {'analysisRole': 'expression (primary site)', 'name': '2'},
+                    {'analysisRole': 'expression (biopsy site)', 'name': '3'},
                     {
-                        "analysisRole": "expression (internal pancancer cohort)",
-                        "name": "4",
+                        'analysisRole': 'expression (internal pancancer cohort)',
+                        'name': '4',
                     },
                 ],
-                "patientId": "PATIENT001",
-                "project": "TEST",
-                "expressionVariants": json.loads(
-                    pd.read_csv(get_test_file("expression.short.tab"), sep="\t").to_json(
-                        orient="records"
+                'patientId': 'PATIENT001',
+                'project': 'TEST',
+                'expressionVariants': json.loads(
+                    pd.read_csv(get_test_file('expression.short.tab'), sep='\t').to_json(
+                        orient='records'
                     )
                 ),
-                "smallMutations": json.loads(
-                    pd.read_csv(get_test_file("small_mutations.short.tab"), sep="\t").to_json(
-                        orient="records"
+                'smallMutations': json.loads(
+                    pd.read_csv(get_test_file('small_mutations.short.tab'), sep='\t').to_json(
+                        orient='records'
                     )
                 ),
-                "copyVariants": json.loads(
-                    pd.read_csv(get_test_file("copy_variants.short.tab"), sep="\t").to_json(
-                        orient="records"
+                'copyVariants': json.loads(
+                    pd.read_csv(get_test_file('copy_variants.short.tab'), sep='\t').to_json(
+                        orient='records'
                     )
                 ),
-                "structuralVariants": json.loads(
-                    pd.read_csv(get_test_file("fusions.tab"), sep="\t").to_json(orient="records")
+                'structuralVariants': json.loads(
+                    pd.read_csv(get_test_file('fusions.tab'), sep='\t').to_json(orient='records')
                 ),
-                "kbDiseaseMatch": "colorectal cancer",
+                'kbDiseaseMatch': 'colorectal cancer',
+                'msi': [
+                    {
+                        'score': 1000.0,
+                        'kbCategory': 'microsatellite instability',
+                    }
+                ],
+                'hrd': {
+                    'score': 9999.0,
+                    'kbCategory': 'homologous recombination deficiency strong signature',
+                },
             },
             allow_nan=False,
         )
     )
 
     def side_effect_function(*args, **kwargs):
-        if "templates" in args[0]:
-            return [{"name": "genomic", "ident": "001"}]
-        elif args[0] == "project":
-            return [{"name": "TEST", "ident": "001"}]
+        if 'templates' in args[0]:
+            return [{'name': 'genomic', 'ident': '001'}]
+        elif args[0] == 'project':
+            return [{'name': 'TEST', 'ident': '001'}]
         else:
             return []
 
     with patch.object(
         sys,
-        "argv",
+        'argv',
         [
-            "ipr",
-            "--username",
-            os.environ.get("IPR_USER", os.environ["USER"]),
-            "--password",
-            os.environ["IPR_PASS"],
-            "--ipr_url",
-            "http://fake.url.ca",
-            "--graphkb_username",
-            os.environ.get("GRAPHKB_USER", os.environ["USER"]),
-            "--graphkb_password",
-            os.environ.get("GRAPHKB_PASS", os.environ["IPR_PASS"]),
-            "--graphkb_url",
-            os.environ.get("GRAPHKB_URL", False),
-            "--content",
+            'ipr',
+            '--username',
+            os.environ.get('IPR_USER', os.environ['USER']),
+            '--password',
+            os.environ['IPR_PASS'],
+            '--ipr_url',
+            'http://fake.url.ca',
+            '--graphkb_username',
+            os.environ.get('GRAPHKB_USER', os.environ['USER']),
+            '--graphkb_password',
+            os.environ.get('GRAPHKB_PASS', os.environ['IPR_PASS']),
+            '--graphkb_url',
+            os.environ.get('GRAPHKB_URL', False),
+            '--content',
             str(json_file),
-            "--therapeutics",
+            '--therapeutics',
         ],
     ):
-        with patch.object(IprConnection, "upload_report", new=mock):
-            with patch.object(IprConnection, "get_spec", return_value=get_test_spec()):
-                with patch.object(IprConnection, "get", side_effect=side_effect_function):
+        with patch.object(IprConnection, 'upload_report', new=mock):
+            with patch.object(IprConnection, 'get_spec', return_value=get_test_spec()):
+                with patch.object(IprConnection, 'get', side_effect=side_effect_function):
                     command_interface()
 
     assert mock.called
@@ -112,57 +122,57 @@ def side_effect_function(*args, **kwargs):
     return report_content
 
 
-@pytest.mark.skip(reason="KBDEV-1308; taking too long, getting canceled after reaching max delay")
-@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests")
+@pytest.mark.skip(reason='KBDEV-1308; taking too long, getting canceled after reaching max delay')
+@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests')
 class TestCreateReport:
     def test_main_sections_present(self, report_upload_content: Dict) -> None:
         sections = set(report_upload_content.keys())
 
         for section in [
-            "structuralVariants",
-            "expressionVariants",
-            "copyVariants",
-            "smallMutations",
-            "kbMatches",
-            "genes",
+            'structuralVariants',
+            'expressionVariants',
+            'copyVariants',
+            'smallMutations',
+            'kbMatches',
+            'genes',
         ]:
             assert section in sections
 
     def test_kept_low_quality_fusion(self, report_upload_content: Dict) -> None:
-        fusions = [(sv["gene1"], sv["gene2"]) for sv in report_upload_content["structuralVariants"]]
+        fusions = [(sv['gene1'], sv['gene2']) for sv in report_upload_content['structuralVariants']]
         if (
             EXCLUDE_BCGSC_TESTS
         ):  # may be missing statements assoc with SUZ12 if no access to bcgsc data
-            assert ("SARM1", "CDKL2") in fusions
+            assert ('SARM1', 'CDKL2') in fusions
         else:
-            assert ("SARM1", "SUZ12") in fusions
+            assert ('SARM1', 'SUZ12') in fusions
 
     def test_pass_through_content_added(self, report_upload_content: Dict) -> None:
         # check the passthorough content was added
-        assert "blargh" in report_upload_content
+        assert 'blargh' in report_upload_content
 
     def test_found_fusion_partner_gene(self, report_upload_content: Dict) -> None:
-        genes = report_upload_content["genes"]
+        genes = report_upload_content['genes']
         # eg, A1BG
-        assert any([g.get("knownFusionPartner", False) for g in genes])
+        assert any([g.get('knownFusionPartner', False) for g in genes])
 
-    @pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason="excluding tests that depend on oncokb data")
+    @pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason='excluding tests that depend on oncokb data')
     def test_found_oncogene(self, report_upload_content: Dict) -> None:
-        genes = report_upload_content["genes"]
+        genes = report_upload_content['genes']
         # eg, ZBTB20
-        assert any([g.get("oncogene", False) for g in genes])
+        assert any([g.get('oncogene', False) for g in genes])
 
-    @pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason="excluding tests that depend on oncokb data)")
+    @pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason='excluding tests that depend on oncokb data)')
     def test_found_tumour_supressor(self, report_upload_content: Dict) -> None:
-        genes = report_upload_content["genes"]
+        genes = report_upload_content['genes']
         # eg, ZNRF3
-        assert any([g.get("tumourSuppressor", False) for g in genes])
+        assert any([g.get('tumourSuppressor', False) for g in genes])
 
     def test_found_kb_statement_related_gene(self, report_upload_content: Dict) -> None:
-        genes = report_upload_content["genes"]
-        assert any([g.get("kbStatementRelated", False) for g in genes])
+        genes = report_upload_content['genes']
+        assert any([g.get('kbStatementRelated', False) for g in genes])
 
-    @pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason="excluding tests that depend on oncokb data")
+    @pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason='excluding tests that depend on oncokb data')
     def test_found_cancer_gene_list_match_gene(self, report_upload_content: Dict) -> None:
-        genes = report_upload_content["genes"]
-        assert any([g.get("cancerGeneListMatch", False) for g in genes])
+        genes = report_upload_content['genes']
+        assert any([g.get('cancerGeneListMatch', False) for g in genes])
diff --git a/tests/test_ipr/test_probe.py b/tests/test_ipr/test_probe.py
index 4801c54a..43ead9f1 100644
--- a/tests/test_ipr/test_probe.py
+++ b/tests/test_ipr/test_probe.py
@@ -9,50 +9,50 @@
 
 from .constants import EXCLUDE_INTEGRATION_TESTS
 
-EXCLUDE_BCGSC_TESTS = os.environ.get("EXCLUDE_BCGSC_TESTS") == "1"
+EXCLUDE_BCGSC_TESTS = os.environ.get('EXCLUDE_BCGSC_TESTS') == '1'
 
 
 def get_test_file(name: str) -> str:
-    return os.path.join(os.path.dirname(__file__), "test_data", name)
+    return os.path.join(os.path.dirname(__file__), 'test_data', name)
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope='module')
 def probe_upload_content() -> Dict:
     mock = MagicMock()
 
     def side_effect_function(*args, **kwargs):
-        if "templates" in args[0]:
-            return [{"name": "genomic", "ident": "001"}]
-        elif args[0] == "project":
-            return [{"name": "TEST", "ident": "001"}]
+        if 'templates' in args[0]:
+            return [{'name': 'genomic', 'ident': '001'}]
+        elif args[0] == 'project':
+            return [{'name': 'TEST', 'ident': '001'}]
         else:
             return []
 
-    with patch.object(IprConnection, "upload_report", new=mock):
-        with patch.object(IprConnection, "get_spec", return_value={}):
-            with patch.object(IprConnection, "get", side_effect=side_effect_function):
+    with patch.object(IprConnection, 'upload_report', new=mock):
+        with patch.object(IprConnection, 'get_spec', return_value={}):
+            with patch.object(IprConnection, 'get', side_effect=side_effect_function):
                 create_report(
                     content={
-                        "patientId": "PATIENT001",
-                        "project": "TEST",
-                        "smallMutations": pd.read_csv(
-                            get_test_file("small_mutations_probe.tab"),
-                            sep="\t",
-                            dtype={"chromosome": "string"},
-                        ).to_dict("records"),
-                        "structuralVariants": pd.read_csv(
-                            get_test_file("fusions.tab"), sep="\t"
-                        ).to_dict("records"),
-                        "blargh": "some fake content",
-                        "kbDiseaseMatch": "colorectal cancer",
+                        'patientId': 'PATIENT001',
+                        'project': 'TEST',
+                        'smallMutations': pd.read_csv(
+                            get_test_file('small_mutations_probe.tab'),
+                            sep='\t',
+                            dtype={'chromosome': 'string'},
+                        ).to_dict('records'),
+                        'structuralVariants': pd.read_csv(
+                            get_test_file('fusions.tab'), sep='\t'
+                        ).to_dict('records'),
+                        'blargh': 'some fake content',
+                        'kbDiseaseMatch': 'colorectal cancer',
                     },
-                    username=os.environ["IPR_USER"],
-                    password=os.environ["IPR_PASS"],
-                    log_level="info",
-                    ipr_url="http://fake.url.ca",
-                    graphkb_username=os.environ.get("GRAPHKB_USER", os.environ["IPR_USER"]),
-                    graphkb_password=os.environ.get("GRAPHKB_PASS", os.environ["IPR_PASS"]),
-                    graphkb_url=os.environ.get("GRAPHKB_URL", False),
+                    username=os.environ['IPR_USER'],
+                    password=os.environ['IPR_PASS'],
+                    log_level='info',
+                    ipr_url='http://fake.url.ca',
+                    graphkb_username=os.environ.get('GRAPHKB_USER', os.environ['IPR_USER']),
+                    graphkb_password=os.environ.get('GRAPHKB_PASS', os.environ['IPR_PASS']),
+                    graphkb_url=os.environ.get('GRAPHKB_URL', False),
                 )
 
     assert mock.called
@@ -61,22 +61,22 @@ def side_effect_function(*args, **kwargs):
     return report_content
 
 
-@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests")
+@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests')
 class TestCreateReport:
     def test_found_probe_small_mutations(self, probe_upload_content: Dict) -> None:
-        assert probe_upload_content["smallMutations"]
+        assert probe_upload_content['smallMutations']
 
     @pytest.mark.skipif(
-        EXCLUDE_BCGSC_TESTS, reason="excluding tests that depend on BCGSC-specific data"
+        EXCLUDE_BCGSC_TESTS, reason='excluding tests that depend on BCGSC-specific data'
     )
     def test_found_probe_small_mutations_match(self, probe_upload_content: Dict) -> None:
         # verify each probe had a KB match
-        for sm_probe in probe_upload_content["smallMutations"]:
+        for sm_probe in probe_upload_content['smallMutations']:
             match_list = [
                 kb_match
-                for kb_match in probe_upload_content["kbMatches"]
-                if kb_match["variant"] == sm_probe["key"]
+                for kb_match in probe_upload_content['kbMatches']
+                if kb_match['variant'] == sm_probe['key']
             ]
-            assert (
-                match_list
-            ), f"probe match failure: {sm_probe['gene']} {sm_probe['proteinChange']} key: {sm_probe['proteinChange']}"
+            assert match_list, (
+                f'probe match failure: {sm_probe["gene"]} {sm_probe["proteinChange"]} key: {sm_probe["proteinChange"]}'
+            )
diff --git a/tests/test_ipr/test_summary.py b/tests/test_ipr/test_summary.py
index 083683ef..248a99f2 100644
--- a/tests/test_ipr/test_summary.py
+++ b/tests/test_ipr/test_summary.py
@@ -16,14 +16,14 @@ def test_prefers_non_alias(self):
                 side_effect=[
                     [],
                     [
-                        {"sourceId": "1", "alias": False, "source": "source", "name": "name"},
-                        {"sourceId": "2", "alias": True, "source": "source", "name": "name"},
+                        {'sourceId': '1', 'alias': False, 'source': 'source', 'name': 'name'},
+                        {'sourceId': '2', 'alias': True, 'source': 'source', 'name': 'name'},
                     ],
                 ]
             )
         )
-        rec = get_preferred_drug_representation(api, "anything")
-        assert rec["sourceId"] == "1"
+        rec = get_preferred_drug_representation(api, 'anything')
+        assert rec['sourceId'] == '1'
 
     def test_prefers_non_deprecated(self):
         api = MagicMock(
@@ -31,29 +31,29 @@ def test_prefers_non_deprecated(self):
                 side_effect=[
                     [],
                     [
-                        {"sourceId": "1", "deprecated": True, "source": "source", "name": "name"},
-                        {"sourceId": "2", "deprecated": False, "source": "source", "name": "name"},
+                        {'sourceId': '1', 'deprecated': True, 'source': 'source', 'name': 'name'},
+                        {'sourceId': '2', 'deprecated': False, 'source': 'source', 'name': 'name'},
                     ],
                 ]
             )
         )
-        rec = get_preferred_drug_representation(api, "anything")
-        assert rec["sourceId"] == "2"
+        rec = get_preferred_drug_representation(api, 'anything')
+        assert rec['sourceId'] == '2'
 
     def test_prefers_lower_sort_source(self):
         api = MagicMock(
             query=MagicMock(
                 side_effect=[
-                    [{"@rid": "source2", "sort": 0}, {"@rid": "source1", "sort": 1}],
+                    [{'@rid': 'source2', 'sort': 0}, {'@rid': 'source1', 'sort': 1}],
                     [
-                        {"sourceId": "1", "deprecated": False, "source": "source1", "name": "name"},
-                        {"sourceId": "2", "deprecated": False, "source": "source2", "name": "name"},
+                        {'sourceId': '1', 'deprecated': False, 'source': 'source1', 'name': 'name'},
+                        {'sourceId': '2', 'deprecated': False, 'source': 'source2', 'name': 'name'},
                     ],
                 ]
             )
         )
-        rec = get_preferred_drug_representation(api, "anything")
-        assert rec["sourceId"] == "2"
+        rec = get_preferred_drug_representation(api, 'anything')
+        assert rec['sourceId'] == '2'
 
     def test_prefers_newer_version(self):
         api = MagicMock(
@@ -62,46 +62,46 @@ def test_prefers_newer_version(self):
                     [],
                     [
                         {
-                            "sourceId": "2",
-                            "deprecated": True,
-                            "source": "source",
-                            "name": "name",
-                            "sourceIdVersion": "1",
+                            'sourceId': '2',
+                            'deprecated': True,
+                            'source': 'source',
+                            'name': 'name',
+                            'sourceIdVersion': '1',
                         },
                         {
-                            "sourceId": "2",
-                            "deprecated": True,
-                            "source": "source",
-                            "name": "name",
-                            "sourceIdVersion": "2",
+                            'sourceId': '2',
+                            'deprecated': True,
+                            'source': 'source',
+                            'name': 'name',
+                            'sourceIdVersion': '2',
                         },
                     ],
                 ]
             )
         )
-        rec = get_preferred_drug_representation(api, "anything")
-        assert rec["sourceIdVersion"] == "1"
+        rec = get_preferred_drug_representation(api, 'anything')
+        assert rec['sourceIdVersion'] == '1'
 
 
 class TestSubstituteSentenceTemplate:
     def test_multiple_diseases_no_matches(self):
-        template = "{conditions:variant} is associated with {relevance} to {subject} in {conditions:disease} ({evidence})"
-        relevance = {"displayName": "senitivity"}
-        disease_matches = {"1"}
+        template = '{conditions:variant} is associated with {relevance} to {subject} in {conditions:disease} ({evidence})'
+        relevance = {'displayName': 'senitivity'}
+        disease_matches = {'1'}
         diseases = [
-            {"@class": "Disease", "@rid": "2", "displayName": "disease 1"},
-            {"@class": "Disease", "@rid": "3", "displayName": "disease 2"},
+            {'@class': 'Disease', '@rid': '2', 'displayName': 'disease 1'},
+            {'@class': 'Disease', '@rid': '3', 'displayName': 'disease 2'},
         ]
         variants = [
             {
-                "@class": "CategoryVariant",
-                "displayName": "KRAS increased RNA expression",
-                "@rid": "4",
+                '@class': 'CategoryVariant',
+                'displayName': 'KRAS increased RNA expression',
+                '@rid': '4',
             }
         ]
-        subjects = [{"@class": "Therapy", "displayName": "some drug", "@rid": "5"}]
+        subjects = [{'@class': 'Therapy', 'displayName': 'some drug', '@rid': '5'}]
         sentence = substitute_sentence_template(
-            template, diseases + variants, subjects, relevance, [], ["6", "7"], disease_matches
+            template, diseases + variants, subjects, relevance, [], ['6', '7'], disease_matches
         )
         assert (
             sentence
@@ -109,24 +109,24 @@ def test_multiple_diseases_no_matches(self):
         )
 
     def test_multiple_diseases_some_matches(self):
-        template = "{conditions:variant} is associated with {relevance} to {subject} in {conditions:disease} ({evidence})"
-        relevance = {"displayName": "senitivity"}
-        disease_matches = {"1"}
+        template = '{conditions:variant} is associated with {relevance} to {subject} in {conditions:disease} ({evidence})'
+        relevance = {'displayName': 'senitivity'}
+        disease_matches = {'1'}
         diseases = [
-            {"@class": "Disease", "@rid": "2", "displayName": "disease 2"},
-            {"@class": "Disease", "@rid": "1", "displayName": "disease 1"},
-            {"@class": "Disease", "@rid": "3", "displayName": "disease 3"},
+            {'@class': 'Disease', '@rid': '2', 'displayName': 'disease 2'},
+            {'@class': 'Disease', '@rid': '1', 'displayName': 'disease 1'},
+            {'@class': 'Disease', '@rid': '3', 'displayName': 'disease 3'},
         ]
         variants = [
             {
-                "@class": "CategoryVariant",
-                "displayName": "KRAS increased RNA expression",
-                "@rid": "4",
+                '@class': 'CategoryVariant',
+                'displayName': 'KRAS increased RNA expression',
+                '@rid': '4',
             }
         ]
-        subjects = [{"@class": "Therapy", "displayName": "some drug", "@rid": "5"}]
+        subjects = [{'@class': 'Therapy', 'displayName': 'some drug', '@rid': '5'}]
         sentence = substitute_sentence_template(
-            template, diseases + variants, subjects, relevance, [], ["6", "7"], disease_matches
+            template, diseases + variants, subjects, relevance, [], ['6', '7'], disease_matches
         )
         assert (
             sentence
@@ -134,24 +134,24 @@ def test_multiple_diseases_some_matches(self):
         )
 
     def test_multiple_diseases_only_matches(self):
-        template = "{conditions:variant} is associated with {relevance} to {subject} in {conditions:disease} ({evidence})"
-        relevance = {"displayName": "senitivity"}
-        disease_matches = {"1", "2", "3"}
+        template = '{conditions:variant} is associated with {relevance} to {subject} in {conditions:disease} ({evidence})'
+        relevance = {'displayName': 'senitivity'}
+        disease_matches = {'1', '2', '3'}
         diseases = [
-            {"@class": "Disease", "@rid": "2", "displayName": "disease 2"},
-            {"@class": "Disease", "@rid": "1", "displayName": "disease 1"},
-            {"@class": "Disease", "@rid": "3", "displayName": "disease 3"},
+            {'@class': 'Disease', '@rid': '2', 'displayName': 'disease 2'},
+            {'@class': 'Disease', '@rid': '1', 'displayName': 'disease 1'},
+            {'@class': 'Disease', '@rid': '3', 'displayName': 'disease 3'},
         ]
         variants = [
             {
-                "@class": "CategoryVariant",
-                "displayName": "KRAS increased RNA expression",
-                "@rid": "4",
+                '@class': 'CategoryVariant',
+                'displayName': 'KRAS increased RNA expression',
+                '@rid': '4',
             }
         ]
-        subjects = [{"@class": "Therapy", "displayName": "some drug", "@rid": "5"}]
+        subjects = [{'@class': 'Therapy', 'displayName': 'some drug', '@rid': '5'}]
         sentence = substitute_sentence_template(
-            template, diseases + variants, subjects, relevance, [], ["6", "7"], disease_matches
+            template, diseases + variants, subjects, relevance, [], ['6', '7'], disease_matches
         )
         assert (
             sentence
@@ -162,74 +162,76 @@ def test_multiple_diseases_only_matches(self):
 mock_ipr_results = [
     [
         {
-            "text": "<p>no cancerType</p>",
-            "variantName": "ERBB2 amplification",
-            "cancerType": [],
-            "template": {"name": "test3"},
-            "project": {"name": "test2"},
+            'text': '<p>no cancerType</p>',
+            'variantName': 'ERBB2 amplification',
+            'cancerType': [],
+            'template': {'name': 'test3'},
+            'project': {'name': 'test2'},
         },
         {
-            "text": "<p>normal</p>",
-            "variantName": "ERBB2 amplification",
-            "cancerType": ["test1", "test"],
-            "template": {"name": "test3"},
-            "project": {"name": "test2"},
+            'text': '<p>normal</p>',
+            'variantName': 'ERBB2 amplification',
+            'cancerType': ['test1', 'test'],
+            'template': {'name': 'test3'},
+            'project': {'name': 'test2'},
         },
         {
-            "text": "<p>no project</p>",
-            "variantName": "ERBB2 amplification",
-            "cancerType": ["test1", "test"],
-            "template": {"name": "test3"},
+            'text': '<p>no project</p>',
+            'variantName': 'ERBB2 amplification',
+            'cancerType': ['test1', 'test'],
+            'template': {'name': 'test3'},
         },
         {
-            "text": "<p>no template</p>",
-            "variantName": "ERBB2 amplification",
-            "cancerType": ["test1", "test"],
-            "project": {"name": "test2"},
+            'text': '<p>no template</p>',
+            'variantName': 'ERBB2 amplification',
+            'cancerType': ['test1', 'test'],
+            'project': {'name': 'test2'},
         },
     ],
     [
         {
-            "text": "<p>normal, second variant</p>",
-            "variantName": "second variant",
-            "cancerType": ["test1", "test"],
-            "template": {"name": "test3"},
-            "project": {"name": "test2"},
+            'text': '<p>normal, second variant</p>',
+            'variantName': 'second variant',
+            'cancerType': ['test1', 'test'],
+            'template': {'name': 'test3'},
+            'project': {'name': 'test2'},
         },
     ],
 ]
 
-no_comments_found_output = "No comments found in IPR for variants in this report"
+no_comments_found_output = 'No comments found in IPR for variants in this report'
 
 
 class TestVariantTextFromIPR:
     def test_gets_fully_matched_output_when_possible(self):
         ipr_conn = MagicMock(get=MagicMock(side_effect=copy(mock_ipr_results)))
-        matches = [{"kbVariant": "ERBB2 amplification"}]
+        matches = [{'kbVariant': 'ERBB2 amplification'}]
         ipr_summary = get_ipr_analyst_comments(
             ipr_conn,
             matches=matches,
-            disease_name="test1",
-            project_name="test2",
-            report_type="test3",
+            disease_name='test1',
+            disease_match_names=[],
+            project_name='test2',
+            report_type='test3',
             include_nonspecific_project=False,
             include_nonspecific_disease=True,
             include_nonspecific_template=True,
         )
-        summary_lines = ipr_summary.split("\n")
-        assert summary_lines[1] == "<h2>ERBB2 amplification (test1,test)</h2>"
-        assert summary_lines[2] == "<p><p>normal</p></p>"
+        summary_lines = ipr_summary.split('\n')
+        assert summary_lines[1] == '<h2>ERBB2 amplification (test1,test)</h2>'
+        assert summary_lines[2] == '<p><p>normal</p></p>'
         assert len(summary_lines) == 3
 
     def test_omits_nonspecific_project_matches_when_specified(self):
         ipr_conn = MagicMock(get=MagicMock(side_effect=copy(mock_ipr_results)))
-        matches = [{"kbVariant": "ERBB2 amplification"}]
+        matches = [{'kbVariant': 'ERBB2 amplification'}]
         ipr_summary = get_ipr_analyst_comments(
             ipr_conn,
             matches=matches,
-            disease_name="test1",
-            project_name="notfound",
-            report_type="test3",
+            disease_name='test1',
+            disease_match_names=[],
+            project_name='notfound',
+            report_type='test3',
             include_nonspecific_project=False,
             include_nonspecific_disease=True,
             include_nonspecific_template=True,
@@ -238,13 +240,14 @@ def test_omits_nonspecific_project_matches_when_specified(self):
 
     def test_omits_nonspecific_template_matches_when_specified(self):
         ipr_conn = MagicMock(get=MagicMock(side_effect=copy(mock_ipr_results)))
-        matches = [{"kbVariant": "ERBB2 amplification"}]
+        matches = [{'kbVariant': 'ERBB2 amplification'}]
         ipr_summary = get_ipr_analyst_comments(
             ipr_conn,
             matches=matches,
-            disease_name="test1",
-            project_name="test2",
-            report_type="notfound",
+            disease_name='test1',
+            disease_match_names=[],
+            project_name='test2',
+            report_type='notfound',
             include_nonspecific_project=True,
             include_nonspecific_disease=True,
             include_nonspecific_template=False,
@@ -253,13 +256,14 @@ def test_omits_nonspecific_template_matches_when_specified(self):
 
     def test_omits_nonspecific_disease_matches_when_specified(self):
         ipr_conn = MagicMock(get=MagicMock(side_effect=copy(mock_ipr_results)))
-        matches = [{"kbVariant": "ERBB2 amplification"}]
+        matches = [{'kbVariant': 'ERBB2 amplification'}]
         ipr_summary = get_ipr_analyst_comments(
             ipr_conn,
             matches=matches,
-            disease_name="notfound",
-            project_name="test2",
-            report_type="test3",
+            disease_name='notfound',
+            disease_match_names=[],
+            project_name='test2',
+            report_type='test3',
             include_nonspecific_project=True,
             include_nonspecific_disease=False,
             include_nonspecific_template=True,
@@ -268,86 +272,110 @@ def test_omits_nonspecific_disease_matches_when_specified(self):
 
     def test_includes_nonspecific_project_matches_when_specified(self):
         ipr_conn = MagicMock(get=MagicMock(side_effect=copy(mock_ipr_results)))
-        matches = [{"kbVariant": "ERBB2 amplification"}]
+        matches = [{'kbVariant': 'ERBB2 amplification'}]
         ipr_summary = get_ipr_analyst_comments(
             ipr_conn,
             matches=matches,
-            disease_name="test1",
-            project_name="notfound",
-            report_type="test3",
+            disease_name='test1',
+            disease_match_names=[],
+            project_name='notfound',
+            report_type='test3',
             include_nonspecific_project=True,
             include_nonspecific_disease=False,
             include_nonspecific_template=False,
         )
-        summary_lines = ipr_summary.split("\n")
-        assert summary_lines[2] == "<p><p>no project</p></p>"
+        summary_lines = ipr_summary.split('\n')
+        assert summary_lines[2] == '<p><p>no project</p></p>'
         assert len(summary_lines) == 3
 
     def test_includes_nonspecific_template_matches_when_specified(self):
         ipr_conn = MagicMock(get=MagicMock(side_effect=copy(mock_ipr_results)))
-        matches = [{"kbVariant": "ERBB2 amplification"}]
+        matches = [{'kbVariant': 'ERBB2 amplification'}]
         ipr_summary = get_ipr_analyst_comments(
             ipr_conn,
             matches=matches,
-            disease_name="test1",
-            project_name="test2",
-            report_type="notfound",
+            disease_name='test1',
+            disease_match_names=[],
+            project_name='test2',
+            report_type='notfound',
             include_nonspecific_project=False,
             include_nonspecific_disease=False,
             include_nonspecific_template=True,
         )
-        summary_lines = ipr_summary.split("\n")
-        assert summary_lines[2] == "<p><p>no template</p></p>"
+        summary_lines = ipr_summary.split('\n')
+        assert summary_lines[2] == '<p><p>no template</p></p>'
         assert len(summary_lines) == 3
 
     def test_includes_nonspecific_disease_matches_when_specified(self):
         ipr_conn = MagicMock(get=MagicMock(side_effect=copy(mock_ipr_results)))
-        matches = [{"kbVariant": "ERBB2 amplification"}]
+        matches = [{'kbVariant': 'ERBB2 amplification'}]
         ipr_summary = get_ipr_analyst_comments(
             ipr_conn,
             matches=matches,
-            disease_name="notfound",
-            project_name="test2",
-            report_type="test3",
+            disease_name='notfound',
+            disease_match_names=[],
+            project_name='test2',
+            report_type='test3',
             include_nonspecific_project=False,
             include_nonspecific_disease=True,
             include_nonspecific_template=False,
         )
-        summary_lines = ipr_summary.split("\n")
-        assert summary_lines[1] == "<h2>ERBB2 amplification (no specific cancer types)</h2>"
-        assert summary_lines[2] == "<p><p>no cancerType</p></p>"
+        summary_lines = ipr_summary.split('\n')
+        assert summary_lines[1] == '<h2>ERBB2 amplification (no specific cancer types)</h2>'
+        assert summary_lines[2] == '<p><p>no cancerType</p></p>'
+        assert len(summary_lines) == 3
+
+    def test_includes_all_graphkb_disease_matches(self):
+        ipr_conn = MagicMock(get=MagicMock(side_effect=copy(mock_ipr_results)))
+        matches = [{'kbVariant': 'ERBB2 amplification'}]
+        ipr_summary = get_ipr_analyst_comments(
+            ipr_conn,
+            matches=matches,
+            disease_name='notfound',
+            disease_match_names=['TEST1'],
+            project_name='test2',
+            report_type='test3',
+            include_nonspecific_project=False,
+            include_nonspecific_disease=False,
+            include_nonspecific_template=False,
+        )
+        summary_lines = ipr_summary.split('\n')
+        assert summary_lines[1] == '<h2>ERBB2 amplification (test1,test)</h2>'
+        assert summary_lines[2] == '<p><p>normal</p></p>'
         assert len(summary_lines) == 3
 
     def test_prepare_section_for_multiple_variants(self):
         ipr_conn = MagicMock(get=MagicMock(side_effect=copy(mock_ipr_results)))
         # NB this test relies on matches being processed in this order
-        matches = [{"kbVariant": "ERBB2 amplification"}, {"kbVariant": "second variant"}]
+        matches = [{'kbVariant': 'ERBB2 amplification'}, {'kbVariant': 'second variant'}]
         ipr_summary = get_ipr_analyst_comments(
             ipr_conn,
             matches=matches,
-            disease_name="test1",
-            project_name="test2",
-            report_type="test3",
+            disease_name='test1',
+            disease_match_names=[],
+            project_name='test2',
+            report_type='test3',
             include_nonspecific_project=False,
             include_nonspecific_disease=False,
             include_nonspecific_template=False,
         )
-        summary_lines = ipr_summary.split("\n")
+        summary_lines = ipr_summary.split('\n')
         assert len(summary_lines) == 5
         assert (
-            "\n".join(summary_lines[1:])
-            == "<h2>ERBB2 amplification (test1,test)</h2>\n<p><p>normal</p></p>\n<h2>second variant (test1,test)</h2>\n<p><p>normal, second variant</p></p>"
+            '\n'.join(summary_lines[1:])
+            == '<h2>ERBB2 amplification (test1,test)</h2>\n<p><p>normal</p></p>\n<h2>second variant (test1,test)</h2>\n<p><p>normal, second variant</p></p>'
         )
 
     def test_empty_section_when_no_variant_match(self):
         ipr_conn = MagicMock(get=MagicMock(side_effect=[[], []]))
-        matches = [{"kbVariant": "notfound1"}, {"kbVariant": "notfound2"}]
+        matches = [{'kbVariant': 'notfound1'}, {'kbVariant': 'notfound2'}]
         ipr_summary = get_ipr_analyst_comments(
             ipr_conn,
             matches=matches,
-            disease_name="test1",
-            project_name="test2",
-            report_type="test3",
+            disease_name='test1',
+            disease_match_names=[],
+            project_name='test2',
+            report_type='test3',
             include_nonspecific_project=False,
             include_nonspecific_disease=False,
             include_nonspecific_template=False,
diff --git a/tests/test_ipr/test_upload.py b/tests/test_ipr/test_upload.py
index 95f7fd7a..79568f75 100644
--- a/tests/test_ipr/test_upload.py
+++ b/tests/test_ipr/test_upload.py
@@ -13,87 +13,97 @@
 
 from .constants import EXCLUDE_INTEGRATION_TESTS
 
-EXCLUDE_BCGSC_TESTS = os.environ.get("EXCLUDE_BCGSC_TESTS") == "1"
-EXCLUDE_ONCOKB_TESTS = os.environ.get("EXCLUDE_ONCOKB_TESTS") == "1"
-INCLUDE_UPLOAD_TESTS = os.environ.get("INCLUDE_UPLOAD_TESTS", "0") == "1"
-DELETE_UPLOAD_TEST_REPORTS = os.environ.get("DELETE_UPLOAD_TEST_REPORTS", "1") == "1"
+EXCLUDE_BCGSC_TESTS = os.environ.get('EXCLUDE_BCGSC_TESTS') == '1'
+EXCLUDE_ONCOKB_TESTS = os.environ.get('EXCLUDE_ONCOKB_TESTS') == '1'
+INCLUDE_UPLOAD_TESTS = os.environ.get('INCLUDE_UPLOAD_TESTS', '0') == '1'
+DELETE_UPLOAD_TEST_REPORTS = os.environ.get('DELETE_UPLOAD_TEST_REPORTS', '1') == '1'
 
 
 def get_test_spec():
-    ipr_spec = {"components": {"schemas": {"genesCreate": {"properties": {}}}}}
+    ipr_spec = {'components': {'schemas': {'genesCreate': {'properties': {}}}}}
     ipr_gene_keys = IprGene.__required_keys__ | IprGene.__optional_keys__
     for key in ipr_gene_keys:
-        ipr_spec["components"]["schemas"]["genesCreate"]["properties"][key] = ""
+        ipr_spec['components']['schemas']['genesCreate']['properties'][key] = ''
     return ipr_spec
 
 
 def get_test_file(name: str) -> str:
-    return os.path.join(os.path.dirname(__file__), "test_data", name)
+    return os.path.join(os.path.dirname(__file__), 'test_data', name)
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope='module')
 def loaded_reports(tmp_path_factory) -> Generator:
-    json_file = tmp_path_factory.mktemp("inputs") / "content.json"
-    async_json_file = tmp_path_factory.mktemp("inputs") / "async_content.json"
-    patient_id = f"TEST_{str(uuid.uuid4())}"
-    async_patient_id = f"TEST_ASYNC_{str(uuid.uuid4())}"
+    json_file = tmp_path_factory.mktemp('inputs') / 'content.json'
+    async_json_file = tmp_path_factory.mktemp('inputs') / 'async_content.json'
+    patient_id = f'TEST_{str(uuid.uuid4())}'
+    async_patient_id = f'TEST_ASYNC_{str(uuid.uuid4())}'
     json_contents = {
-        "comparators": [
-            {"analysisRole": "expression (disease)", "name": "1"},
-            {"analysisRole": "expression (primary site)", "name": "2"},
-            {"analysisRole": "expression (biopsy site)", "name": "3"},
+        'comparators': [
+            {'analysisRole': 'expression (disease)', 'name': '1'},
+            {'analysisRole': 'expression (primary site)', 'name': '2'},
+            {'analysisRole': 'expression (biopsy site)', 'name': '3'},
             {
-                "analysisRole": "expression (internal pancancer cohort)",
-                "name": "4",
+                'analysisRole': 'expression (internal pancancer cohort)',
+                'name': '4',
             },
         ],
-        "patientId": patient_id,
-        "project": "TEST",
-        "sampleInfo": [
+        'patientId': patient_id,
+        'project': 'TEST',
+        'sampleInfo': [
             {
-                "sample": "Constitutional",
-                "biopsySite": "Normal tissue",
-                "sampleName": "SAMPLE1-PB",
-                "primarySite": "Blood-Peripheral",
-                "collectionDate": "11-11-11",
+                'sample': 'Constitutional',
+                'biopsySite': 'Normal tissue',
+                'sampleName': 'SAMPLE1-PB',
+                'primarySite': 'Blood-Peripheral',
+                'collectionDate': '11-11-11',
             },
             {
-                "sample": "Tumour",
-                "pathoTc": "90%",
-                "biopsySite": "hepatic",
-                "sampleName": "SAMPLE2-FF-1",
-                "primarySite": "Vena Cava-Hepatic",
-                "collectionDate": "12-12-12",
+                'sample': 'Tumour',
+                'pathoTc': '90%',
+                'biopsySite': 'hepatic',
+                'sampleName': 'SAMPLE2-FF-1',
+                'primarySite': 'Vena Cava-Hepatic',
+                'collectionDate': '12-12-12',
             },
         ],
-        "expressionVariants": json.loads(
-            pd.read_csv(get_test_file("expression.short.tab"), sep="\t").to_json(orient="records")
+        'msi': [
+            {
+                'score': 1000.0,
+                'kbCategory': 'microsatellite instability',
+            }
+        ],
+        'hrd': {
+            'score': 9999.0,
+            'kbCategory': 'homologous recombination deficiency strong signature',
+        },
+        'expressionVariants': json.loads(
+            pd.read_csv(get_test_file('expression.short.tab'), sep='\t').to_json(orient='records')
         ),
-        "smallMutations": json.loads(
-            pd.read_csv(get_test_file("small_mutations.short.tab"), sep="\t").to_json(
-                orient="records"
+        'smallMutations': json.loads(
+            pd.read_csv(get_test_file('small_mutations.short.tab'), sep='\t').to_json(
+                orient='records'
             )
         ),
-        "copyVariants": json.loads(
-            pd.read_csv(get_test_file("copy_variants.short.tab"), sep="\t").to_json(
-                orient="records"
+        'copyVariants': json.loads(
+            pd.read_csv(get_test_file('copy_variants.short.tab'), sep='\t').to_json(
+                orient='records'
             )
         ),
-        "structuralVariants": json.loads(
-            pd.read_csv(get_test_file("fusions.tab"), sep="\t").to_json(orient="records")
+        'structuralVariants': json.loads(
+            pd.read_csv(get_test_file('fusions.tab'), sep='\t').to_json(orient='records')
         ),
-        "kbDiseaseMatch": "colorectal cancer",
-        "cosmicSignatures": pd.read_csv(
-            get_test_file("cosmic_variants.tab"), sep="\t"
+        'kbDiseaseMatch': 'colorectal cancer',
+        'cosmicSignatures': pd.read_csv(
+            get_test_file('cosmic_variants.tab'), sep='\t'
         ).signature.tolist(),
-        "hlaTypes": json.loads(
-            pd.read_csv(get_test_file("hla_variants.tab"), sep="\t").to_json(orient="records")
+        'hlaTypes': json.loads(
+            pd.read_csv(get_test_file('hla_variants.tab'), sep='\t').to_json(orient='records')
         ),
-        "images": [
+        'images': [
             {
-                "key": "cnvLoh.circos",
-                "path": "test/testData/images/cnvLoh.png",
-                "caption": "Test adding a caption to an image",
+                'key': 'cnvLoh.circos',
+                'path': 'test/testData/images/cnvLoh.png',
+                'caption': 'Test adding a caption to an image',
             }
         ],
     }
@@ -105,7 +115,7 @@ def loaded_reports(tmp_path_factory) -> Generator:
         )
     )
 
-    json_contents["patientId"] = async_patient_id
+    json_contents['patientId'] = async_patient_id
     async_json_file.write_text(
         json.dumps(
             json_contents,
@@ -114,61 +124,61 @@ def loaded_reports(tmp_path_factory) -> Generator:
     )
 
     argslist = [
-        "ipr",
-        "--username",
-        os.environ.get("IPR_USER", os.environ["USER"]),
-        "--password",
-        os.environ["IPR_PASS"],
-        "--graphkb_username",
-        os.environ.get("GRAPHKB_USER", os.environ.get("IPR_USER", os.environ["USER"])),
-        "--graphkb_password",
-        os.environ.get("GRAPHKB_PASS", os.environ["IPR_PASS"]),
-        "--ipr_url",
-        os.environ["IPR_TEST_URL"],
-        "--graphkb_url",
-        os.environ.get("GRAPHKB_URL", False),
-        "--therapeutics",
-        "--allow_partial_matches",
+        'ipr',
+        '--username',
+        os.environ.get('IPR_USER', os.environ['USER']),
+        '--password',
+        os.environ['IPR_PASS'],
+        '--graphkb_username',
+        os.environ.get('GRAPHKB_USER', os.environ.get('IPR_USER', os.environ['USER'])),
+        '--graphkb_password',
+        os.environ.get('GRAPHKB_PASS', os.environ['IPR_PASS']),
+        '--ipr_url',
+        os.environ['IPR_TEST_URL'],
+        '--graphkb_url',
+        os.environ.get('GRAPHKB_URL', False),
+        '--therapeutics',
+        '--allow_partial_matches',
     ]
 
     sync_argslist = argslist.copy()
-    sync_argslist.extend(["--content", str(json_file)])
-    with patch.object(sys, "argv", sync_argslist):
-        with patch.object(IprConnection, "get_spec", return_value=get_test_spec()):
+    sync_argslist.extend(['--content', str(json_file)])
+    with patch.object(sys, 'argv', sync_argslist):
+        with patch.object(IprConnection, 'get_spec', return_value=get_test_spec()):
             command_interface()
 
     async_argslist = argslist.copy()
-    async_argslist.extend(["--content", str(async_json_file), "--async_upload"])
-    with patch.object(sys, "argv", async_argslist):
-        with patch.object(IprConnection, "get_spec", return_value=get_test_spec()):
+    async_argslist.extend(['--content', str(async_json_file), '--async_upload'])
+    with patch.object(sys, 'argv', async_argslist):
+        with patch.object(IprConnection, 'get_spec', return_value=get_test_spec()):
             command_interface()
 
     ipr_conn = IprConnection(
-        username=os.environ.get("IPR_USER", os.environ["USER"]),
-        password=os.environ["IPR_PASS"],
-        url=os.environ["IPR_TEST_URL"],
+        username=os.environ.get('IPR_USER', os.environ['USER']),
+        password=os.environ['IPR_PASS'],
+        url=os.environ['IPR_TEST_URL'],
     )
-    loaded_report = ipr_conn.get(uri=f"reports?searchText={patient_id}")
-    async_loaded_report = ipr_conn.get(uri=f"reports?searchText={async_patient_id}")
+    loaded_report = ipr_conn.get(uri=f'reports?searchText={patient_id}')
+    async_loaded_report = ipr_conn.get(uri=f'reports?searchText={async_patient_id}')
 
     loaded_reports_result = {
-        "sync": (patient_id, loaded_report),
-        "async": (async_patient_id, async_loaded_report),
+        'sync': (patient_id, loaded_report),
+        'async': (async_patient_id, async_loaded_report),
     }
     yield loaded_reports_result
     if DELETE_UPLOAD_TEST_REPORTS:
-        ipr_conn.delete(uri=f"reports/{loaded_report['reports'][0]['ident']}")
-        ipr_conn.delete(uri=f"reports/{async_loaded_report['reports'][0]['ident']}")
+        ipr_conn.delete(uri=f'reports/{loaded_report["reports"][0]["ident"]}')
+        ipr_conn.delete(uri=f'reports/{async_loaded_report["reports"][0]["ident"]}')
 
 
 def get_section(loaded_report, section_name):
-    ident = loaded_report[1]["reports"][0]["ident"]
+    ident = loaded_report[1]['reports'][0]['ident']
     ipr_conn = IprConnection(
-        username=os.environ.get("IPR_USER", os.environ["USER"]),
-        password=os.environ["IPR_PASS"],
-        url=os.environ["IPR_TEST_URL"],
+        username=os.environ.get('IPR_USER', os.environ['USER']),
+        password=os.environ['IPR_PASS'],
+        url=os.environ['IPR_TEST_URL'],
     )
-    return ipr_conn.get(uri=f"reports/{ident}/{section_name}")
+    return ipr_conn.get(uri=f'reports/{ident}/{section_name}')
 
 
 def stringify_sorted(obj):
@@ -181,7 +191,7 @@ def stringify_sorted(obj):
         obj.sort()
         return str(obj)
     elif isinstance(obj, dict):
-        for key in ("ident", "updatedAt", "createdAt", "deletedAt"):
+        for key in ('ident', 'updatedAt', 'createdAt', 'deletedAt'):
             obj.pop(key, None)
         keys = obj.keys()
         for key in keys:
@@ -197,50 +207,50 @@ def stringify_sorted(obj):
 
 
 @pytest.mark.skipif(
-    not INCLUDE_UPLOAD_TESTS, reason="excluding tests of upload to live ipr instance"
+    not INCLUDE_UPLOAD_TESTS, reason='excluding tests of upload to live ipr instance'
 )
-@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests")
+@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests')
 class TestCreateReport:
     def test_patient_id_loaded_once(self, loaded_reports) -> None:
-        sync_patient_id = loaded_reports["sync"][0]
-        assert loaded_reports["sync"][1]["total"] == 1
-        assert loaded_reports["sync"][1]["reports"][0]["patientId"] == sync_patient_id
-        async_patient_id = loaded_reports["async"][0]
-        assert loaded_reports["async"][1]["total"] == 1
-        assert loaded_reports["async"][1]["reports"][0]["patientId"] == async_patient_id
+        sync_patient_id = loaded_reports['sync'][0]
+        assert loaded_reports['sync'][1]['total'] == 1
+        assert loaded_reports['sync'][1]['reports'][0]['patientId'] == sync_patient_id
+        async_patient_id = loaded_reports['async'][0]
+        assert loaded_reports['async'][1]['total'] == 1
+        assert loaded_reports['async'][1]['reports'][0]['patientId'] == async_patient_id
 
     def test_expression_variants_loaded(self, loaded_reports) -> None:
-        section = get_section(loaded_reports["sync"], "expression-variants")
-        kbmatched = [item for item in section if item["kbMatches"]]
-        assert "PTP4A3" in [item["gene"]["name"] for item in kbmatched]
-        async_section = get_section(loaded_reports["async"], "expression-variants")
+        section = get_section(loaded_reports['sync'], 'expression-variants')
+        kbmatched = [item for item in section if item['kbMatches']]
+        assert 'PTP4A3' in [item['gene']['name'] for item in kbmatched]
+        async_section = get_section(loaded_reports['async'], 'expression-variants')
         async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section)
         assert async_equals_sync
 
     def test_structural_variants_loaded(self, loaded_reports) -> None:
-        section = get_section(loaded_reports["sync"], "structural-variants")
-        kbmatched = [item for item in section if item["kbMatches"]]
-        assert "(EWSR1,FLI1):fusion(e.7,e.4)" in [item["displayName"] for item in kbmatched]
-        async_section = get_section(loaded_reports["async"], "structural-variants")
+        section = get_section(loaded_reports['sync'], 'structural-variants')
+        kbmatched = [item for item in section if item['kbMatches']]
+        assert '(EWSR1,FLI1):fusion(e.7,e.4)' in [item['displayName'] for item in kbmatched]
+        async_section = get_section(loaded_reports['async'], 'structural-variants')
         async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section)
         assert async_equals_sync
 
     def test_small_mutations_loaded(self, loaded_reports) -> None:
-        section = get_section(loaded_reports["sync"], "small-mutations")
-        kbmatched = [item for item in section if item["kbMatches"]]
-        assert "FGFR2:p.R421C" in [item["displayName"] for item in kbmatched]
-        assert "CDKN2A:p.T18M" in [item["displayName"] for item in kbmatched]
-        async_section = get_section(loaded_reports["async"], "small-mutations")
+        section = get_section(loaded_reports['sync'], 'small-mutations')
+        kbmatched = [item for item in section if item['kbMatches']]
+        assert 'FGFR2:p.R421C' in [item['displayName'] for item in kbmatched]
+        assert 'CDKN2A:p.T18M' in [item['displayName'] for item in kbmatched]
+        async_section = get_section(loaded_reports['async'], 'small-mutations')
         async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section)
         assert async_equals_sync
 
     def test_copy_variants_loaded(self, loaded_reports) -> None:
-        section = get_section(loaded_reports["sync"], "copy-variants")
-        kbmatched = [item for item in section if item["kbMatches"]]
-        assert ("ERBB2", "amplification") in [
-            (item["gene"]["name"], item["displayName"]) for item in kbmatched
+        section = get_section(loaded_reports['sync'], 'copy-variants')
+        kbmatched = [item for item in section if item['kbMatches']]
+        assert ('ERBB2', 'amplification') in [
+            (item['gene']['name'], item['displayName']) for item in kbmatched
         ]
-        async_section = get_section(loaded_reports["async"], "copy-variants")
+        async_section = get_section(loaded_reports['async'], 'copy-variants')
         async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section)
         assert async_equals_sync
 
@@ -255,58 +265,58 @@ def test_copy_variants_loaded(self, loaded_reports) -> None:
     #     assert compare_sections(section, async_section)
 
     def test_kb_matches_loaded(self, loaded_reports) -> None:
-        section = get_section(loaded_reports["sync"], "kb-matches")
+        section = get_section(loaded_reports['sync'], 'kb-matches')
         observed_and_matched = set(
-            [(item["kbVariant"], item["variant"]["displayName"]) for item in section]
+            [(item['kbVariant'], item['variant']['displayName']) for item in section]
         )
         for pair in [
-            ("ERBB2 amplification", "amplification"),
-            ("FGFR2 mutation", "FGFR2:p.R421C"),
-            ("PTP4A3 overexpression", "increased expression"),
-            ("EWSR1 and FLI1 fusion", "(EWSR1,FLI1):fusion(e.7,e.4)"),
-            ("CDKN2A mutation", "CDKN2A:p.T18M"),
+            ('ERBB2 amplification', 'amplification'),
+            ('FGFR2 mutation', 'FGFR2:p.R421C'),
+            ('PTP4A3 overexpression', 'increased expression'),
+            ('EWSR1 and FLI1 fusion', '(EWSR1,FLI1):fusion(e.7,e.4)'),
+            ('CDKN2A mutation', 'CDKN2A:p.T18M'),
         ]:
             assert pair in observed_and_matched
-        async_section = get_section(loaded_reports["async"], "kb-matches")
+        async_section = get_section(loaded_reports['async'], 'kb-matches')
         async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section)
         assert async_equals_sync
 
     def test_therapeutic_targets_loaded(self, loaded_reports) -> None:
-        section = get_section(loaded_reports["sync"], "therapeutic-targets")
-        therapeutic_target_genes = set([item["gene"] for item in section])
-        for gene in ["CDKN2A", "ERBB2", "FGFR2", "PTP4A3"]:
+        section = get_section(loaded_reports['sync'], 'therapeutic-targets')
+        therapeutic_target_genes = set([item['gene'] for item in section])
+        for gene in ['CDKN2A', 'ERBB2', 'FGFR2', 'PTP4A3']:
             assert gene in therapeutic_target_genes
-        async_section = get_section(loaded_reports["async"], "therapeutic-targets")
+        async_section = get_section(loaded_reports['async'], 'therapeutic-targets')
         async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section)
         assert async_equals_sync
 
     def test_genomic_alterations_identified_loaded(self, loaded_reports) -> None:
-        section = get_section(loaded_reports["sync"], "summary/genomic-alterations-identified")
-        variants = set([item["geneVariant"] for item in section])
+        section = get_section(loaded_reports['sync'], 'summary/genomic-alterations-identified')
+        variants = set([item['geneVariant'] for item in section])
         for variant in [
-            "FGFR2:p.R421C",
-            "PTP4A3 (high_percentile)",
-            "ERBB2 (Amplification)",
-            "(EWSR1,FLI1):fusion(e.7,e.4)",
-            "CDKN2A:p.T18M",
+            'FGFR2:p.R421C',
+            'PTP4A3 (high_percentile)',
+            'ERBB2 (Amplification)',
+            '(EWSR1,FLI1):fusion(e.7,e.4)',
+            'CDKN2A:p.T18M',
         ]:
             assert variant in variants
         async_section = get_section(
-            loaded_reports["async"], "summary/genomic-alterations-identified"
+            loaded_reports['async'], 'summary/genomic-alterations-identified'
         )
         async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section)
         assert async_equals_sync
 
     def test_analyst_comments_loaded(self, loaded_reports) -> None:
-        sync_section = get_section(loaded_reports["sync"], "summary/analyst-comments")
-        assert sync_section["comments"]
-        async_section = get_section(loaded_reports["async"], "summary/analyst-comments")
-        assert async_section["comments"]
-        assert sync_section["comments"] == async_section["comments"]
+        sync_section = get_section(loaded_reports['sync'], 'summary/analyst-comments')
+        assert sync_section['comments']
+        async_section = get_section(loaded_reports['async'], 'summary/analyst-comments')
+        assert async_section['comments']
+        assert sync_section['comments'] == async_section['comments']
 
     def test_sample_info_loaded(self, loaded_reports) -> None:
-        sync_section = get_section(loaded_reports["sync"], "sample-info")
-        async_section = get_section(loaded_reports["async"], "sample-info")
+        sync_section = get_section(loaded_reports['sync'], 'sample-info')
+        async_section = get_section(loaded_reports['async'], 'sample-info')
         async_equals_sync = stringify_sorted(sync_section) == stringify_sorted(async_section)
         assert async_equals_sync
 
@@ -322,31 +332,31 @@ def test_multivariant_multiconditionset_statements_loaded(self, loaded_reports)
         are met.
         This is also a test of multiple condition sets since there are two variants
         in the test data that satisfy one of the conditions (the APC mutation)."""
-        section = get_section(loaded_reports["sync"], "kb-matches/kb-matched-statements")
-        multivariant_stmts = [item for item in section if item["reference"] == "pmid:27302369"]
+        section = get_section(loaded_reports['sync'], 'kb-matches/kb-matched-statements')
+        multivariant_stmts = [item for item in section if item['reference'] == 'pmid:27302369']
 
         # if this statement is entered more than once there may be multiple sets of records to
         # check, so to make sure the count checks work, go stmt_id by stmt_id:
-        stmt_ids = list(set([item["kbStatementId"] for item in multivariant_stmts]))
+        stmt_ids = list(set([item['kbStatementId'] for item in multivariant_stmts]))
         for stmt_id in stmt_ids:
-            stmts = [item for item in multivariant_stmts if item["kbStatementId"] == stmt_id]
+            stmts = [item for item in multivariant_stmts if item['kbStatementId'] == stmt_id]
 
-            # we expect two stmts, one for each condition set
-            assert len(stmts) == 2
+            # we expect three stmts, one for each condition set
+            assert len(stmts) == 3
 
             # we expect each condition set to have two kb variants in it
             # we expect the two kb variants to be the same in each stmt
-            assert len(stmts[0]["kbMatches"]) == 2
-            assert len(stmts[1]["kbMatches"]) == 2
-            kbmatches1 = [item["kbVariant"] for item in stmts[0]["kbMatches"]]
-            kbmatches2 = [item["kbVariant"] for item in stmts[1]["kbMatches"]]
+            assert len(stmts[0]['kbMatches']) == 2
+            assert len(stmts[1]['kbMatches']) == 2
+            kbmatches1 = [item['kbVariant'] for item in stmts[0]['kbMatches']]
+            kbmatches2 = [item['kbVariant'] for item in stmts[1]['kbMatches']]
             kbmatches1.sort()
             kbmatches2.sort()
-            assert kbmatches1 == kbmatches2 == ["APC mutation", "KRAS mutation"]
+            assert kbmatches1 == kbmatches2 == ['APC mutation', 'KRAS mutation']
 
             # we expect the two stmts to have different observed variant sets
-            observedVariants1 = [item["variant"]["ident"] for item in stmts[0]["kbMatches"]]
-            observedVariants2 = [item["variant"]["ident"] for item in stmts[1]["kbMatches"]]
+            observedVariants1 = [item['variant']['ident'] for item in stmts[0]['kbMatches']]
+            observedVariants2 = [item['variant']['ident'] for item in stmts[1]['kbMatches']]
             observedVariants1.sort()
             observedVariants2.sort()
             assert observedVariants1 != observedVariants2
diff --git a/tests/test_ipr/test_util.py b/tests/test_ipr/test_util.py
index 70318818..bbae6d98 100644
--- a/tests/test_ipr/test_util.py
+++ b/tests/test_ipr/test_util.py
@@ -4,8 +4,8 @@
 
 
 @pytest.mark.parametrize(
-    "input,output_keys",
-    [[{"key": 0}, ["key"]], [{"key": None}, []], [{"key": ""}, []], [{"gene1": None}, ["gene1"]]],
+    'input,output_keys',
+    [[{'key': 0}, ['key']], [{'key': None}, []], [{'key': ''}, []], [{'gene1': None}, ['gene1']]],
 )
 def test_trim_empty_values(input, output_keys):
     modified_object = trim_empty_values(input)
@@ -13,17 +13,17 @@ def test_trim_empty_values(input, output_keys):
 
 
 @pytest.mark.parametrize(
-    "variant,result",
+    'variant,result',
     [
         [
-            {"variantType": "exp", "gene": "GENE", "expressionState": "increased expression"},
-            "increased expression",
+            {'variantType': 'exp', 'gene': 'GENE', 'expressionState': 'increased expression'},
+            'increased expression',
         ],
-        [{"variantType": "cnv", "gene": "GENE", "cnvState": "amplification"}, "amplification"],
-        [{"variantType": "other", "gene2": "GENE", "variant": "GENE:anything"}, "anything"],
+        [{'variantType': 'cnv', 'gene': 'GENE', 'cnvState': 'amplification'}, 'amplification'],
+        [{'variantType': 'other', 'gene2': 'GENE', 'variant': 'GENE:anything'}, 'anything'],
     ],
 )
 def test_create_variant_name_tuple(variant, result):
     gene, name = create_variant_name_tuple(variant)
     assert name == result
-    assert gene == "GENE"
+    assert gene == 'GENE'