From d3062b778f24590df1e076f6b9f272759820b1de Mon Sep 17 00:00:00 2001 From: Dustin Bleile Date: Wed, 12 Nov 2025 11:36:44 -0800 Subject: [PATCH 01/23] SDEV-5193 - lint - match.py - eliminate mypy type errors. --- pori_python/graphkb/match.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pori_python/graphkb/match.py b/pori_python/graphkb/match.py index 0c79138..6615ff4 100644 --- a/pori_python/graphkb/match.py +++ b/pori_python/graphkb/match.py @@ -315,15 +315,15 @@ def equivalent_types( # Convert rid to displayName if needed if looks_like_rid(type1): - type1 = conn.get_records_by_id([type1])[0]["displayName"] + type1 = str(conn.get_records_by_id([type1])[0].get("displayName", type1)) if looks_like_rid(type2): - type2 = conn.get_records_by_id([type2])[0]["displayName"] + type2 = str(conn.get_records_by_id([type2])[0].get("displayName", type2)) # Get type terms from observed variant - terms1 = [] + terms1 = set() if strict: try: - terms1.append(get_term_by_name(conn, type1)["@rid"]) + terms1.add(get_term_by_name(conn, type1)["@rid"]) except Exception: pass else: From 592d28f2c0705cabe566c5daca6a76191c506fee Mon Sep 17 00:00:00 2001 From: Dustin Bleile Date: Wed, 12 Nov 2025 12:00:13 -0800 Subject: [PATCH 02/23] SDEV-5193 - lint - inputs.py - remove mypy errors - IprCopyVariant has chromosomeBand --- pori_python/ipr/inputs.py | 22 +++++++++++----------- pori_python/types.py | 1 + 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/pori_python/ipr/inputs.py b/pori_python/ipr/inputs.py index dcff908..b78e059 100644 --- a/pori_python/ipr/inputs.py +++ b/pori_python/ipr/inputs.py @@ -248,20 +248,20 @@ def row_key(row: Dict) -> Tuple[str, ...]: row["cnvState"] = display_name_mapping[kb_cat] row["variant"] = kb_cat row["variantType"] = "cnv" - chrband = row.get("chromosomeBand", False) - chrom = row.pop("chromosome", False) - if not chrom: - chrom = row.pop("chr", False) - # remove chr if it was not used for chrom - row.pop("chr", False) - if chrom: + + chrom = "" + if "chromosome" in row: + chrom = str(row.pop("chromosome", "")) # type: ignore + elif "chr" in row: + chrom = str(row.pop("chr", "")) # type: ignore + + chrband = row.get("chromosomeBand", "") + if chrom and chrband: # check that chr isn't already in the chrband; # this regex from https://vrs.ga4gh.org/en/1.2/terms_and_model.html#id25 - if chrband and (re.match(r"^cen|[pq](ter|([1-9][0-9]*(\.[1-9][0-9]*)?))$", chrband)): - if isinstance(chrom, int): - chrom = str(chrom) + if re.match(r"^cen|[pq](ter|([1-9][0-9]*(\.[1-9][0-9]*)?))$", chrband): chrom = chrom.strip("chr") - row["chromosomeBand"] = chrom + row["chromosomeBand"] + row["chromosomeBand"] = chrom + chrband return ret_list diff --git a/pori_python/types.py b/pori_python/types.py index faec8f2..92415fc 100644 --- a/pori_python/types.py +++ b/pori_python/types.py @@ -160,6 +160,7 @@ class IprCopyVariant(IprGeneVariant): # variantType == 'cnv' kbCategory: str cnvState: str + chromosomeBand: str class IprExprVariant(IprGeneVariant): From 24dec890798f2c1a952109d135449fe51382671e Mon Sep 17 00:00:00 2001 From: Dustin Bleile Date: Wed, 12 Nov 2025 12:13:31 -0800 Subject: [PATCH 03/23] SDEV-5193 - lint - test_inputs.py - mypy type fixes - use Sequence instead of Iterable type for len property --- pori_python/ipr/inputs.py | 10 +++++----- tests/test_ipr/test_inputs.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pori_python/ipr/inputs.py b/pori_python/ipr/inputs.py index b78e059..2858d56 100644 --- a/pori_python/ipr/inputs.py +++ b/pori_python/ipr/inputs.py @@ -11,7 +11,7 @@ import re from Bio.Data.IUPACData import protein_letters_3to1 from numpy import nan -from typing import Any, Callable, Dict, Iterable, List, Set, Tuple, cast +from typing import Any, Callable, Dict, Iterable, List, Sequence, Set, Tuple, cast from pori_python.graphkb.match import INPUT_COPY_CATEGORIES, INPUT_EXPRESSION_CATEGORIES from pori_python.types import ( @@ -441,7 +441,7 @@ def row_key(row: Dict) -> Tuple[str, ...]: return result -def preprocess_cosmic(rows: Iterable[Dict]) -> Iterable[Dict]: +def preprocess_cosmic(rows: Iterable[Dict]) -> Sequence[Dict]: """ Process cosmic inputs into preformatted signature inputs Note: Cosmic and dMMR already evaluated against thresholds in gsc_report @@ -456,7 +456,7 @@ def preprocess_cosmic(rows: Iterable[Dict]) -> Iterable[Dict]: ] -def preprocess_hla(rows: Iterable[Dict]) -> Iterable[Dict]: +def preprocess_hla(rows: Iterable[Dict]) -> Sequence[Dict]: """ Process hla inputs into preformatted signature inputs """ @@ -480,7 +480,7 @@ def preprocess_hla(rows: Iterable[Dict]) -> Iterable[Dict]: def preprocess_tmb( tmb_high: float, tmburMutationBurden: Dict = {}, genomeTmb: float | str = "" -) -> Iterable[Dict]: +) -> Sequence[Dict]: """ Process tumour mutation burden (tmb) input(s) into preformatted signature input. Get compared to threshold; signature CategoryVariant created only if threshold met. @@ -530,7 +530,7 @@ def preprocess_tmb( return [] -def preprocess_msi(msi: Any) -> Iterable[Dict]: +def preprocess_msi(msi: Any) -> Sequence[Dict]: """ Process micro-satellite input into preformatted signature input. Both msi & mss gets mapped to corresponding GraphKB Signature CategoryVariants. diff --git a/tests/test_ipr/test_inputs.py b/tests/test_ipr/test_inputs.py index 61e9e0f..98544bb 100644 --- a/tests/test_ipr/test_inputs.py +++ b/tests/test_ipr/test_inputs.py @@ -48,7 +48,7 @@ "HLA-C*06", } EXPECTED_TMB = {TMB_SIGNATURE} -EXPECTED_MSI = {MSI_MAPPING.get("microsatellite instability")["signatureName"]} +EXPECTED_MSI = {MSI_MAPPING.get("microsatellite instability", {})["signatureName"]} def read_data_file(filename): From e6655ecaaba441a583cd27204d202d786bed82a5 Mon Sep 17 00:00:00 2001 From: Dustin Bleile Date: Wed, 12 Nov 2025 12:22:41 -0800 Subject: [PATCH 04/23] SDEV-5193 - lint - ipr.py - excluded variable - type consistency --- pori_python/ipr/ipr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pori_python/ipr/ipr.py b/pori_python/ipr/ipr.py index 8487dc1..966ba3b 100644 --- a/pori_python/ipr/ipr.py +++ b/pori_python/ipr/ipr.py @@ -441,7 +441,7 @@ def multi_variant_filtering( statements = res["result"] # Get set of excluded Vocabulary RIDs for variant types - excluded = {} + excluded = set() if len(excludedTypes) != 0 and excludedTypes[0] != "": excluded = gkb_vocab.get_terms_set(graphkb_conn, excludedTypes) From 5cdc11a9ec738fc8039019288d40d18edf3d4c10 Mon Sep 17 00:00:00 2001 From: Dustin Bleile Date: Wed, 12 Nov 2025 12:32:14 -0800 Subject: [PATCH 05/23] SDEV-5193 - lint - annotate.py - annotate_signature_variants - return type matches description --- pori_python/ipr/annotate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pori_python/ipr/annotate.py b/pori_python/ipr/annotate.py index 72ae762..a95867d 100644 --- a/pori_python/ipr/annotate.py +++ b/pori_python/ipr/annotate.py @@ -328,7 +328,7 @@ def annotate_signature_variants( Returns: list of kbMatches records for IPR """ - alterations: List[Hashabledict] = [] + alterations: List[KbMatch] = [] iterfunc = tqdm if show_progress else iter for variant in iterfunc(variants): @@ -360,7 +360,7 @@ def annotate_signature_variants( ): ipr_row["variant"] = variant["key"] ipr_row["variantType"] = "sigv" - alterations.append(Hashabledict(ipr_row)) + alterations.append(KbMatch(ipr_row)) except ValueError as err: logger.error(f"failed to match signature category variant '{variant}': {err}") From f0f68fa7ab8288c73b40bd4c11c83c0c4b4e5185 Mon Sep 17 00:00:00 2001 From: Dustin Bleile Date: Wed, 12 Nov 2025 12:52:16 -0800 Subject: [PATCH 06/23] SDEV-5193 - lint - annotate_positional_variants - ignore type error from IprSmallMutationVariant gene vs IprFusionVariant gene1 and gene2 --- pori_python/ipr/annotate.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/pori_python/ipr/annotate.py b/pori_python/ipr/annotate.py index a95867d..d52d21b 100644 --- a/pori_python/ipr/annotate.py +++ b/pori_python/ipr/annotate.py @@ -239,10 +239,11 @@ def annotate_positional_variants( continue for var_key in VARIANT_KEYS: - variant = row.get(var_key) + variant = row.get(var_key, "") matches = [] if not variant or isnull(variant): continue + variant = str(variant) try: try: matches = gkb_match.match_positional_variant(graphkb_conn, variant) @@ -277,15 +278,15 @@ def annotate_positional_variants( except FeatureNotFoundError as err: logger.debug(f"failed to match positional variants ({variant}): {err}") errors += 1 - if "gene" in row: - problem_genes.add(row["gene"]) - elif "gene1" in row and f"({row['gene1']})" in str(err): - problem_genes.add(row["gene1"]) - elif "gene2" in row and f"({row['gene2']})" in str(err): - problem_genes.add(row["gene2"]) - elif "gene1" in row and "gene2" in row: - problem_genes.add(row["gene1"]) - problem_genes.add(row["gene2"]) + if row.get("gene"): + problem_genes.add(row["gene"]) # type: ignore + elif row.get("gene1") and f"({row['gene1']})" in str(err): # type: ignore + problem_genes.add(row["gene1"]) # type: ignore + elif row.get("gene2") and f"({row['gene2']})" in str(err): # type: ignore + problem_genes.add(row["gene2"]) # type: ignore + elif row.get("gene1") and row.get("gene2"): # type: ignore + problem_genes.add(row["gene1"]) # type: ignore + problem_genes.add(row["gene2"]) # type: ignore else: raise err except HTTPError as err: From cd67aa85413d4130e4d63627a1efa7101912e312 Mon Sep 17 00:00:00 2001 From: Dustin Bleile Date: Wed, 12 Nov 2025 13:15:27 -0800 Subject: [PATCH 07/23] SDEV-5193 - lint - mypy - annoate.py - resolve KbMatch types vs Hashabledict conflicts --- pori_python/ipr/annotate.py | 36 +++++++++++++++--------------------- pori_python/ipr/ipr.py | 6 +++--- pori_python/ipr/main.py | 10 ++++------ 3 files changed, 22 insertions(+), 30 deletions(-) diff --git a/pori_python/ipr/annotate.py b/pori_python/ipr/annotate.py index d52d21b..8c4148d 100644 --- a/pori_python/ipr/annotate.py +++ b/pori_python/ipr/annotate.py @@ -213,7 +213,7 @@ def annotate_positional_variants( variants: Sequence[IprStructuralVariant] | Sequence[Hashabledict], disease_matches: List[str], show_progress: bool = False, -) -> List[Hashabledict]: +) -> List[KbMatch]: """Annotate SNP, INDEL or fusion variant calls with GraphKB and return in IPR match format. Hashable type is required to turn lists into sets. @@ -228,7 +228,7 @@ def annotate_positional_variants( """ VARIANT_KEYS = ("variant", "hgvsProtein", "hgvsCds", "hgvsGenomic") errors = 0 - alterations: List[Hashabledict] = [] + alterations: List[KbMatch] = [] problem_genes = set() iterfunc = tqdm if show_progress else iter @@ -273,7 +273,7 @@ def annotate_positional_variants( ipr_row["variantType"] = row.get( "variantType", "mut" if row.get("gene") else "sv" ) - alterations.append(Hashabledict(ipr_row)) + alterations.append(ipr_row) except FeatureNotFoundError as err: logger.debug(f"failed to match positional variants ({variant}): {err}") @@ -361,7 +361,7 @@ def annotate_signature_variants( ): ipr_row["variant"] = variant["key"] ipr_row["variantType"] = "sigv" - alterations.append(KbMatch(ipr_row)) + alterations.append(ipr_row) except ValueError as err: logger.error(f"failed to match signature category variant '{variant}': {err}") @@ -385,7 +385,7 @@ def annotate_variants( structural_variants: Sequence[IprStructuralVariant] = [], copy_variants: List[IprCopyVariant] = [], expression_variants: List[IprExprVariant] = [], -) -> List[Hashabledict]: +) -> List[KbMatch]: """Annotating (matching to GraphKB) all observed variants, per type Args: graphkb_conn: the graphkb api connection object @@ -399,7 +399,7 @@ def annotate_variants( Returns: A list of matched Statements to GraphKB """ - gkb_matches: List[Hashabledict] = [] + gkb_matches: List[KbMatch] = [] # MATCHING SIGNATURE CATEGORY VARIANTS logger.info(f"annotating {len(signature_variants)} signatures") @@ -434,27 +434,21 @@ def annotate_variants( # MATCHING COPY VARIANTS logger.info(f"annotating {len(copy_variants)} copy variants") gkb_matches.extend( - [ - Hashabledict(copy_var) - for copy_var in annotate_copy_variants( - graphkb_conn, disease_matches, copy_variants, show_progress=interactive - ) - ] + annotate_copy_variants( + graphkb_conn, disease_matches, copy_variants, show_progress=interactive + ) ) logger.debug(f"\tgkb_matches: {len(gkb_matches)}") # MATCHING EXPRESSION VARIANTS logger.info(f"annotating {len(expression_variants)} expression variants") gkb_matches.extend( - [ - Hashabledict(exp_var) - for exp_var in annotate_expression_variants( - graphkb_conn, - disease_matches, - expression_variants, - show_progress=interactive, - ) - ] + annotate_expression_variants( + graphkb_conn, + disease_matches, + expression_variants, + show_progress=interactive, + ) ) logger.debug(f"\tgkb_matches: {len(gkb_matches)}") diff --git a/pori_python/ipr/ipr.py b/pori_python/ipr/ipr.py index 966ba3b..60b9139 100644 --- a/pori_python/ipr/ipr.py +++ b/pori_python/ipr/ipr.py @@ -266,7 +266,7 @@ def select_expression_plots( def create_key_alterations( - kb_matches: List[Hashabledict], all_variants: Sequence[IprVariant] + kb_matches: List[KbMatch], all_variants: Sequence[IprVariant] ) -> Tuple[List[Dict], Dict]: """Create the list of significant variants matched by the KB. @@ -328,10 +328,10 @@ def create_key_alterations( def germline_kb_matches( - kb_matches: List[Hashabledict], + kb_matches: List[KbMatch], all_variants: Sequence[IprVariant], assume_somatic: bool = True, -) -> List[Hashabledict]: +) -> List[KbMatch]: """Filter kb_matches for matching to germline or somatic events using the 'germline' optional property. Statements related to pharmacogenomic toxicity or cancer predisposition are only relevant if diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py index 63a028f..d331704 100644 --- a/pori_python/ipr/main.py +++ b/pori_python/ipr/main.py @@ -12,13 +12,13 @@ from pori_python.graphkb import GraphKBConnection from pori_python.graphkb.genes import get_gene_information from pori_python.types import ( - Hashabledict, IprCopyVariant, IprExprVariant, IprFusionVariant, IprSignatureVariant, IprSmallMutationVariant, IprVariant, + KbMatch, ) from .annotate import annotate_variants @@ -428,7 +428,7 @@ def ipr_report( disease_matches: list[str] = get_kb_disease_matches(graphkb_conn, kb_disease_match) # GKB MATCHING (AKA ANNOTATION) - gkb_matches: List[Hashabledict] = annotate_variants( + gkb_matches: List[KbMatch] = annotate_variants( graphkb_conn=graphkb_conn, interactive=interactive, disease_matches=disease_matches, @@ -453,16 +453,14 @@ def ipr_report( if match_germline: # verify germline kb statements matched germline observed variants, not somatic variants org_len = len(gkb_matches) - gkb_matches = [ - Hashabledict(match) for match in germline_kb_matches(gkb_matches, all_variants) - ] + gkb_matches = germline_kb_matches(gkb_matches, all_variants) num_removed = org_len - len(gkb_matches) if num_removed: logger.info(f"Removing {num_removed} germline events without medical matches.") if custom_kb_match_filter: logger.info(f"custom_kb_match_filter on {len(gkb_matches)} variants") - gkb_matches = [Hashabledict(match) for match in custom_kb_match_filter(gkb_matches)] + gkb_matches = custom_kb_match_filter(gkb_matches) logger.info(f"\t custom_kb_match_filter left {len(gkb_matches)} variants") # KEY ALTERATIONS From 1639e830a70d040e3e2f65e412abec7e69d67bbb Mon Sep 17 00:00:00 2001 From: Dustin Bleile Date: Wed, 12 Nov 2025 13:39:08 -0800 Subject: [PATCH 08/23] SDEV-5193 - lint - mypy - ipr.py - use Cast to reduce type errors --- pori_python/ipr/ipr.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pori_python/ipr/ipr.py b/pori_python/ipr/ipr.py index 60b9139..8515a04 100644 --- a/pori_python/ipr/ipr.py +++ b/pori_python/ipr/ipr.py @@ -8,7 +8,7 @@ import uuid from copy import copy from itertools import product -from typing import Dict, Iterable, List, Optional, Sequence, Set, Tuple, cast +from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple, cast from pori_python.graphkb import GraphKBConnection from pori_python.graphkb import statement as gkb_statement @@ -510,7 +510,8 @@ def get_kb_matched_statements( for item in gkb_matches: stmt = copy(item) stmt["requiredKbMatches"].sort() - kbs = KbMatchedStatement({key: val for (key, val) in stmt.items() if key in kbs_keys}) + kbs_dict = {key: val for (key, val) in stmt.items() if key in kbs_keys} + kbs = cast(KbMatchedStatement, kbs_dict) dict_key = str(kbs) kbMatchedStatements[dict_key] = kbs return [*kbMatchedStatements.values()] @@ -569,7 +570,7 @@ def get_kb_statement_matched_conditions( for kbStmt in kbMatchedStatements: stmts = [item for item in gkb_matches if item["kbStatementId"] == kbStmt["kbStatementId"]] - requirements = {} + requirements: Dict[str, str | Any] = {} for requirement in stmts[0]["requiredKbMatches"]: if not requirements.get(requirement, False): # only use explicit variant/statement links @@ -602,7 +603,7 @@ def get_kb_statement_matched_conditions( ) kbmc = KbMatchedStatementConditionSet( { - "kbStatementId": conditionSet["kbStatementId"], + "kbStatementId": str(conditionSet.get("kbStatementId", "")), "matchedConditions": matchedConditions, } ) @@ -622,11 +623,12 @@ def get_kb_matches_sections( kb_statement_matched_conditions = get_kb_statement_matched_conditions( gkb_matches, allow_partial_matches ) - return { + ret_dict = { "kbMatches": kb_variants, "kbMatchedStatements": kb_matched_statements, "kbStatementMatchedConditions": kb_statement_matched_conditions, } + return cast(KbMatchSections, ret_dict) def get_kb_disease_matches( From 0ee36c7b6e2f68ca69a81e28e8ba3ba06a1e5d9e Mon Sep 17 00:00:00 2001 From: Dustin Bleile Date: Fri, 14 Nov 2025 12:22:21 -0800 Subject: [PATCH 09/23] SDEV-5193 - refactor typing - fixes for mypy typing checks --- pori_python/ipr/annotate.py | 30 +++++++++++++++--------------- pori_python/ipr/inputs.py | 9 ++++++--- pori_python/ipr/ipr.py | 12 ++++++------ pori_python/ipr/main.py | 4 ++-- pori_python/types.py | 13 +++++++------ 5 files changed, 36 insertions(+), 32 deletions(-) diff --git a/pori_python/ipr/annotate.py b/pori_python/ipr/annotate.py index 8c4148d..03a8ec2 100644 --- a/pori_python/ipr/annotate.py +++ b/pori_python/ipr/annotate.py @@ -103,7 +103,7 @@ def annotate_expression_variants( disease_matches: List[str], variants: List[IprExprVariant], show_progress: bool = False, -) -> List[KbMatch]: +) -> List[Hashabledict]: """Annotate expression variants with GraphKB in the IPR alterations format. Args: @@ -113,10 +113,10 @@ def annotate_expression_variants( show_progress (bool): Progressbar displayed for long runs. Returns: - list of kbMatches records for IPR + list of Hashabledict records for IPR """ skipped = 0 - alterations = [] + alterations: List[Hashabledict] = [] problem_genes = set() logger.info(f"Starting annotation of {len(variants)} expression category_variants") iterfunc = tqdm if show_progress else iter @@ -133,7 +133,7 @@ def annotate_expression_variants( for ipr_row in get_ipr_statements_from_variants(graphkb_conn, matches, disease_matches): ipr_row["variant"] = row["key"] ipr_row["variantType"] = row.get("variantType", "exp") - alterations.append(ipr_row) + alterations.append(Hashabledict(ipr_row)) except FeatureNotFoundError as err: problem_genes.add(gene) logger.debug(f"Unrecognized gene ({gene} {variant}): {err}") @@ -156,7 +156,7 @@ def annotate_copy_variants( disease_matches: List[str], variants: List[IprCopyVariant], show_progress: bool = False, -) -> List[KbMatch]: +) -> List[Hashabledict]: """Annotate allowed copy variants with GraphKB in the IPR alterations format. Args: @@ -169,7 +169,7 @@ def annotate_copy_variants( list of kbMatches records for IPR """ skipped = 0 - alterations = [] + alterations: List[Hashabledict] = [] problem_genes = set() logger.info(f"Starting annotation of {len(variants)} copy category_variants") @@ -188,7 +188,7 @@ def annotate_copy_variants( for ipr_row in get_ipr_statements_from_variants(graphkb_conn, matches, disease_matches): ipr_row["variant"] = row["key"] ipr_row["variantType"] = row.get("variantType", "cnv") - alterations.append(ipr_row) + alterations.append(Hashabledict(ipr_row)) except FeatureNotFoundError as err: problem_genes.add(gene) logger.debug(f"Unrecognized gene ({gene} {variant}): {err}") @@ -213,7 +213,7 @@ def annotate_positional_variants( variants: Sequence[IprStructuralVariant] | Sequence[Hashabledict], disease_matches: List[str], show_progress: bool = False, -) -> List[KbMatch]: +) -> Sequence[Hashabledict]: """Annotate SNP, INDEL or fusion variant calls with GraphKB and return in IPR match format. Hashable type is required to turn lists into sets. @@ -228,7 +228,7 @@ def annotate_positional_variants( """ VARIANT_KEYS = ("variant", "hgvsProtein", "hgvsCds", "hgvsGenomic") errors = 0 - alterations: List[KbMatch] = [] + alterations: List[Hashabledict] = [] problem_genes = set() iterfunc = tqdm if show_progress else iter @@ -273,7 +273,7 @@ def annotate_positional_variants( ipr_row["variantType"] = row.get( "variantType", "mut" if row.get("gene") else "sv" ) - alterations.append(ipr_row) + alterations.append(Hashabledict(ipr_row)) except FeatureNotFoundError as err: logger.debug(f"failed to match positional variants ({variant}): {err}") @@ -315,7 +315,7 @@ def annotate_signature_variants( disease_matches: List[str], variants: List[IprSignatureVariant] = [], show_progress: bool = False, -) -> List[KbMatch]: +) -> List[Hashabledict]: """Annotate Signature variants with GraphKB in the IPR alterations format. Match to corresponding GraphKB Variants, then to linked GraphKB Statements @@ -329,7 +329,7 @@ def annotate_signature_variants( Returns: list of kbMatches records for IPR """ - alterations: List[KbMatch] = [] + alterations: List[Hashabledict] = [] iterfunc = tqdm if show_progress else iter for variant in iterfunc(variants): @@ -361,7 +361,7 @@ def annotate_signature_variants( ): ipr_row["variant"] = variant["key"] ipr_row["variantType"] = "sigv" - alterations.append(ipr_row) + alterations.append(Hashabledict(ipr_row)) except ValueError as err: logger.error(f"failed to match signature category variant '{variant}': {err}") @@ -385,7 +385,7 @@ def annotate_variants( structural_variants: Sequence[IprStructuralVariant] = [], copy_variants: List[IprCopyVariant] = [], expression_variants: List[IprExprVariant] = [], -) -> List[KbMatch]: +) -> List[Hashabledict]: """Annotating (matching to GraphKB) all observed variants, per type Args: graphkb_conn: the graphkb api connection object @@ -399,7 +399,7 @@ def annotate_variants( Returns: A list of matched Statements to GraphKB """ - gkb_matches: List[KbMatch] = [] + gkb_matches: List[Hashabledict] = [] # MATCHING SIGNATURE CATEGORY VARIANTS logger.info(f"annotating {len(signature_variants)} signatures") diff --git a/pori_python/ipr/inputs.py b/pori_python/ipr/inputs.py index 2858d56..afa9714 100644 --- a/pori_python/ipr/inputs.py +++ b/pori_python/ipr/inputs.py @@ -249,13 +249,16 @@ def row_key(row: Dict) -> Tuple[str, ...]: row["variant"] = kb_cat row["variantType"] = "cnv" + # Find chromosome and remove chromosome values chrom = "" if "chromosome" in row: - chrom = str(row.pop("chromosome", "")) # type: ignore - elif "chr" in row: - chrom = str(row.pop("chr", "")) # type: ignore + chrom = str(row.pop("chromosome", "")) or chrom # type: ignore + if "chr" in row: + chrom = str(row.pop("chr", "")) or chrom # type: ignore + # Include chromosome in chromosomeBand chrband = row.get("chromosomeBand", "") + if chrom and chrband: # check that chr isn't already in the chrband; # this regex from https://vrs.ga4gh.org/en/1.2/terms_and_model.html#id25 diff --git a/pori_python/ipr/ipr.py b/pori_python/ipr/ipr.py index 8515a04..33d83df 100644 --- a/pori_python/ipr/ipr.py +++ b/pori_python/ipr/ipr.py @@ -266,7 +266,7 @@ def select_expression_plots( def create_key_alterations( - kb_matches: List[KbMatch], all_variants: Sequence[IprVariant] + kb_matches: Sequence[KbMatch] | Sequence[Hashabledict], all_variants: Sequence[IprVariant] ) -> Tuple[List[Dict], Dict]: """Create the list of significant variants matched by the KB. @@ -328,10 +328,10 @@ def create_key_alterations( def germline_kb_matches( - kb_matches: List[KbMatch], + kb_matches: List[KbMatch] | List[Hashabledict], all_variants: Sequence[IprVariant], assume_somatic: bool = True, -) -> List[KbMatch]: +) -> List[Hashabledict]: """Filter kb_matches for matching to germline or somatic events using the 'germline' optional property. Statements related to pharmacogenomic toxicity or cancer predisposition are only relevant if @@ -346,9 +346,9 @@ def germline_kb_matches( Returns: filtered list of kb_matches """ - ret_list = [] - germ_alts = [alt for alt in kb_matches if alt["category"] in GERMLINE_BASE_TERMS] - somatic_alts = [alt for alt in kb_matches if alt not in germ_alts] + ret_list: List[Hashabledict] = [] + germ_alts = [Hashabledict(alt) for alt in kb_matches if alt["category"] in GERMLINE_BASE_TERMS] + somatic_alts = [Hashabledict(alt) for alt in kb_matches if alt not in germ_alts] if germ_alts: logger.info(f"checking germline status of {GERMLINE_BASE_TERMS}") for alt in germ_alts: diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py index d331704..58bd6ae 100644 --- a/pori_python/ipr/main.py +++ b/pori_python/ipr/main.py @@ -12,13 +12,13 @@ from pori_python.graphkb import GraphKBConnection from pori_python.graphkb.genes import get_gene_information from pori_python.types import ( + Hashabledict, IprCopyVariant, IprExprVariant, IprFusionVariant, IprSignatureVariant, IprSmallMutationVariant, IprVariant, - KbMatch, ) from .annotate import annotate_variants @@ -428,7 +428,7 @@ def ipr_report( disease_matches: list[str] = get_kb_disease_matches(graphkb_conn, kb_disease_match) # GKB MATCHING (AKA ANNOTATION) - gkb_matches: List[KbMatch] = annotate_variants( + gkb_matches: List[Hashabledict] = annotate_variants( graphkb_conn=graphkb_conn, interactive=interactive, disease_matches=disease_matches, diff --git a/pori_python/types.py b/pori_python/types.py index 92415fc..f95599a 100644 --- a/pori_python/types.py +++ b/pori_python/types.py @@ -66,6 +66,11 @@ class Statement(Record): displayNameTemplate: str +class Hashabledict(dict): + def __hash__(self): + return hash(frozenset(self)) + + class KbMatch(TypedDict): variant: str variantType: str @@ -91,7 +96,8 @@ class KbMatch(TypedDict): class KbVariantMatch(TypedDict): - variantKey: str + variant: str + # variantKey: str variantType: str kbVariant: str kbVariantId: str @@ -128,11 +134,6 @@ class KbMatchSections(TypedDict): kbMatchedStatementConditions: List[KbMatchedStatementConditionSet] -class Hashabledict(dict): - def __hash__(self): - return hash(frozenset(self)) - - class IprVariantBase(TypedDict): """Required properties of all variants for IPR.""" From b209e2396fecebfabc97835e6f92d7f20eb3cb2f Mon Sep 17 00:00:00 2001 From: Dustin Bleile Date: Fri, 12 Dec 2025 12:19:03 -0800 Subject: [PATCH 10/23] DEVSU-2797 - main.ipr_report - allow null ipr_url if ipr_upload = False --- pori_python/ipr/main.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py index b7cd761..77f8620 100644 --- a/pori_python/ipr/main.py +++ b/pori_python/ipr/main.py @@ -31,8 +31,8 @@ preprocess_cosmic, preprocess_expression_variants, preprocess_hla, - preprocess_msi, preprocess_hrd, + preprocess_msi, preprocess_signature_variants, preprocess_small_mutations, preprocess_structural_variants, @@ -294,7 +294,7 @@ def ipr_report( username: str, password: str, content: Dict, - ipr_url: str, + ipr_url: str = "", log_level: str = "info", output_json_path: str = "", always_write_output_json: bool = False, @@ -324,7 +324,7 @@ def ipr_report( Args: username: the username for connecting to GraphKB and IPR password: the password for connecting to GraphKB and IPR - ipr_url: base URL to use in connecting to IPR + ipr_url: base URL to use in connecting to IPR (eg. https://ipr-api.bcgsc.ca/api) log_level: the logging level content: report content output_json_path: path to a JSON file to output the report upload body. @@ -358,17 +358,24 @@ def ipr_report( ) # IPR CONNECTION - ipr_conn = IprConnection(username, password, ipr_url) + ipr_conn = None + if ipr_url: + ipr_conn = IprConnection(username, password, ipr_url) + else: + logger.warning("No ipr_url given") if validate_json: + if not ipr_conn: + raise ValueError("ipr_url required to validate_json") ipr_result = ipr_conn.validate_json(content) return ipr_result if upload_json: + if not ipr_conn: + raise ValueError("ipr_url required to validate_json") ipr_result = ipr_conn.upload_report( content, mins_to_wait, async_upload, ignore_extra_fields ) - return ipr_result # validate the JSON content follows the specification try: @@ -495,6 +502,8 @@ def ipr_report( comments_list.append(graphkb_comments) if include_ipr_variant_text: + if not ipr_conn: + raise ValueError("ipr_url required to include_ipr_variant_text") ipr_comments = get_ipr_analyst_comments( ipr_conn, gkb_matches, @@ -550,18 +559,18 @@ def ipr_report( # if input includes hrdScore field, that is ok to pass to db # but prefer the 'hrd' field if it exists - if output.get('hrd'): - if output.get('hrd').get('score'): - output['hrdScore'] = output['hrd']['score'] - output.pop('hrd') # kbmatches have already been made + if output.get("hrd"): + if output.get("hrd").get("score"): + output["hrdScore"] = output["hrd"]["score"] + output.pop("hrd") # kbmatches have already been made - ipr_spec = ipr_conn.get_spec() - output = clean_unsupported_content(output, ipr_spec) ipr_result = {} upload_error = None # UPLOAD TO IPR if ipr_upload: + ipr_spec = ipr_conn.get_spec() + output = clean_unsupported_content(output, ipr_spec) try: logger.info(f"Uploading to IPR {ipr_conn.url}") ipr_result = ipr_conn.upload_report( From e95203c8828f93ed27dec29f519ce0e147d20ecd Mon Sep 17 00:00:00 2001 From: Dustin Bleile Date: Fri, 12 Dec 2025 12:19:55 -0800 Subject: [PATCH 11/23] DEVSU-2797 - lint isort - unsorted imports --- pori_python/ipr/inputs.py | 2 +- pori_python/ipr/ipr.py | 2 +- tests/test_graphkb/test_genes.py | 2 +- tests/test_ipr/test_inputs.py | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pori_python/ipr/inputs.py b/pori_python/ipr/inputs.py index 3cfe526..4feb12f 100644 --- a/pori_python/ipr/inputs.py +++ b/pori_python/ipr/inputs.py @@ -26,8 +26,8 @@ from .constants import ( COSMIC_SIGNATURE_VARIANT_TYPE, HLA_SIGNATURE_VARIANT_TYPE, - MSI_MAPPING, HRD_MAPPING, + MSI_MAPPING, TMB_SIGNATURE, TMB_SIGNATURE_VARIANT_TYPE, ) diff --git a/pori_python/ipr/ipr.py b/pori_python/ipr/ipr.py index 3b98d9a..12d7ce6 100644 --- a/pori_python/ipr/ipr.py +++ b/pori_python/ipr/ipr.py @@ -168,7 +168,7 @@ def convert_statements_to_alterations( diseases = [c for c in statement["conditions"] if c["@class"] == "Disease"] disease_match = len(diseases) == 1 and diseases[0]["@rid"] in disease_matches reference = ";".join([e["displayName"] for e in statement["evidence"]]) - if statement['relevance']['name'] == 'eligibility': + if statement["relevance"]["name"] == "eligibility": reference = ";".join([e["sourceId"] for e in statement["evidence"]]) ipr_section = gkb_statement.categorize_relevance( diff --git a/tests/test_graphkb/test_genes.py b/tests/test_graphkb/test_genes.py index 1c2862f..f598822 100644 --- a/tests/test_graphkb/test_genes.py +++ b/tests/test_graphkb/test_genes.py @@ -7,6 +7,7 @@ from pori_python.graphkb import GraphKBConnection from pori_python.graphkb.genes import ( + PREFERRED_GENE_SOURCE_NAME, get_cancer_genes, get_cancer_predisposition_info, get_gene_information, @@ -18,7 +19,6 @@ get_pharmacogenomic_info, get_preferred_gene_name, get_therapeutic_associated_genes, - PREFERRED_GENE_SOURCE_NAME, ) from pori_python.graphkb.util import get_rid diff --git a/tests/test_ipr/test_inputs.py b/tests/test_ipr/test_inputs.py index e99d170..da9b1dd 100644 --- a/tests/test_ipr/test_inputs.py +++ b/tests/test_ipr/test_inputs.py @@ -7,8 +7,8 @@ from pori_python.graphkb.match import INPUT_COPY_CATEGORIES from pori_python.ipr.constants import ( - MSI_MAPPING, HRD_MAPPING, + MSI_MAPPING, TMB_SIGNATURE, TMB_SIGNATURE_HIGH_THRESHOLD, ) @@ -21,8 +21,8 @@ preprocess_cosmic, preprocess_expression_variants, preprocess_hla, - preprocess_msi, preprocess_hrd, + preprocess_msi, preprocess_signature_variants, preprocess_small_mutations, preprocess_structural_variants, From 0f7b2d798d351798e120e0ee24c899e6a19fb5f4 Mon Sep 17 00:00:00 2001 From: Dustin Bleile Date: Fri, 12 Dec 2025 15:41:12 -0800 Subject: [PATCH 12/23] DEVSU-2797 - improve error message logging when graphkb_conn is made with graphkb_url None --- pori_python/graphkb/util.py | 5 ++++- pori_python/ipr/main.py | 8 ++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/pori_python/graphkb/util.py b/pori_python/graphkb/util.py index bbc6776..9d4e030 100644 --- a/pori_python/graphkb/util.py +++ b/pori_python/graphkb/util.py @@ -222,6 +222,8 @@ def login_demo(self) -> None: 1. get a first token from KeyCloak using username and password; self.login_demo() 2. get a second token from the GraphKB API using keyCloakToken; self.login() """ + if not self.url: + raise ValueError("no self.url set - cannot make a login demo") url_parts = urlsplit(self.url) base_url = f"{url_parts.scheme}://{url_parts.netloc}" @@ -251,7 +253,8 @@ def login(self, username: str, password: str, pori_demo: bool = False) -> None: read_timeout = 61 # KBDEV-1328. Alt. GraphKB login for GSC's PORI online demo - if pori_demo or "pori-demo" in self.url: + if self.url and (pori_demo or "pori-demo" in self.url): + logger.warning(f"login demo") self.login_demo() # use requests package directly to avoid recursion loop on login failure diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py index 77f8620..0e0174d 100644 --- a/pori_python/ipr/main.py +++ b/pori_python/ipr/main.py @@ -417,14 +417,14 @@ def ipr_report( ) # GKB CONNECTION + gkb_user = graphkb_username if graphkb_username else username + gkb_pass = graphkb_password if graphkb_password else password if graphkb_url: logger.info(f"connecting to graphkb: {graphkb_url}") graphkb_conn = GraphKBConnection(graphkb_url) else: - graphkb_conn = GraphKBConnection() - - gkb_user = graphkb_username if graphkb_username else username - gkb_pass = graphkb_password if graphkb_password else password + # graphkb_conn = GraphKBConnection() # This will just error on trying to login + raise ValueError("graphkb_url is required") graphkb_conn.login(gkb_user, gkb_pass) From ecb9060c425bd1018e968a73780ba872c904748c Mon Sep 17 00:00:00 2001 From: Dustin Bleile Date: Mon, 15 Dec 2025 16:34:45 -0800 Subject: [PATCH 13/23] bugfix - ipr_report - upload_json immediately returns --- pori_python/ipr/main.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py index 0e0174d..52265e1 100644 --- a/pori_python/ipr/main.py +++ b/pori_python/ipr/main.py @@ -372,10 +372,11 @@ def ipr_report( if upload_json: if not ipr_conn: - raise ValueError("ipr_url required to validate_json") + raise ValueError("ipr_url required to upload_json") ipr_result = ipr_conn.upload_report( content, mins_to_wait, async_upload, ignore_extra_fields ) + return ipr_result # validate the JSON content follows the specification try: @@ -569,6 +570,8 @@ def ipr_report( # UPLOAD TO IPR if ipr_upload: + if not ipr_conn: + raise ValueError("ipr_url required to upload_report") ipr_spec = ipr_conn.get_spec() output = clean_unsupported_content(output, ipr_spec) try: From b572f2b09c4e269bd08887767a826c5d37fd174c Mon Sep 17 00:00:00 2001 From: Dustin Bleile Date: Mon, 15 Dec 2025 16:48:49 -0800 Subject: [PATCH 14/23] DEVSU-2797 - check os.environ for IPR_URL and GRAPHKB_URL --- pori_python/ipr/main.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py index 52265e1..7fcff9d 100644 --- a/pori_python/ipr/main.py +++ b/pori_python/ipr/main.py @@ -358,6 +358,7 @@ def ipr_report( ) # IPR CONNECTION + ipr_url = ipr_url if ipr_url else os.environ.get("IPR_URL", "") ipr_conn = None if ipr_url: ipr_conn = IprConnection(username, password, ipr_url) @@ -420,6 +421,7 @@ def ipr_report( # GKB CONNECTION gkb_user = graphkb_username if graphkb_username else username gkb_pass = graphkb_password if graphkb_password else password + graphkb_url = graphkb_url if graphkb_url else os.environ.get("GRAPHKB_URL", "") if graphkb_url: logger.info(f"connecting to graphkb: {graphkb_url}") graphkb_conn = GraphKBConnection(graphkb_url) From 9cd51c3733573d85c965952b79a42b304b609292 Mon Sep 17 00:00:00 2001 From: Dustin Bleile Date: Tue, 16 Dec 2025 11:16:14 -0800 Subject: [PATCH 15/23] lint - fix fstring --- pori_python/graphkb/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pori_python/graphkb/util.py b/pori_python/graphkb/util.py index 9d4e030..9792639 100644 --- a/pori_python/graphkb/util.py +++ b/pori_python/graphkb/util.py @@ -254,7 +254,7 @@ def login(self, username: str, password: str, pori_demo: bool = False) -> None: # KBDEV-1328. Alt. GraphKB login for GSC's PORI online demo if self.url and (pori_demo or "pori-demo" in self.url): - logger.warning(f"login demo") + logger.warning("login demo") self.login_demo() # use requests package directly to avoid recursion loop on login failure From e05ec5bd186d525ef7a96213994a794d89bf4171 Mon Sep 17 00:00:00 2001 From: Dustin Bleile Date: Wed, 7 Jan 2026 15:05:01 -0800 Subject: [PATCH 16/23] minor type fix - null string instead of None --- pori_python/graphkb/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pori_python/graphkb/util.py b/pori_python/graphkb/util.py index 9792639..4a4cbff 100644 --- a/pori_python/graphkb/util.py +++ b/pori_python/graphkb/util.py @@ -98,7 +98,7 @@ def cache_key(request_body) -> str: class GraphKBConnection: def __init__( self, - url: str = os.environ.get("GRAPHKB_URL"), + url: str = os.environ.get("GRAPHKB_URL", ""), username: str = "", password: str = "", use_global_cache: bool = True, From 57f97f924298f9af523afa5aa2d49cd7fda7cce7 Mon Sep 17 00:00:00 2001 From: Dustin Bleile Date: Wed, 7 Jan 2026 15:08:30 -0800 Subject: [PATCH 17/23] minor lint - isort inputs & quotes --- pori_python/ipr/ipr.py | 10 +++++----- pori_python/ipr/main.py | 2 +- tests/test_ipr/test_ipr.py | 28 ++++++++++++++-------------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/pori_python/ipr/ipr.py b/pori_python/ipr/ipr.py index fae5244..742a5a7 100644 --- a/pori_python/ipr/ipr.py +++ b/pori_python/ipr/ipr.py @@ -295,10 +295,10 @@ def create_key_alterations( counts: Dict[str, Set] = {v: set() for v in type_mapping.values()} skipped_variant_types = [] - included_kbvariant_ids = list(set([item['kbVariantId'] for item in included_kb_matches])) + included_kbvariant_ids = list(set([item["kbVariantId"] for item in included_kb_matches])) for kb_match in kb_matches: - if kb_match['kbVariantId'] not in included_kbvariant_ids: + if kb_match["kbVariantId"] not in included_kbvariant_ids: continue variant_type = kb_match["variantType"] variant_key = kb_match["variant"] @@ -646,13 +646,13 @@ def get_kb_matches_sections( unique_kb_variant_ids = list( set( [ - item['kbVariantId'] + item["kbVariantId"] for conditionSet in kb_statement_matched_conditions - for item in conditionSet['matchedConditions'] + for item in conditionSet["matchedConditions"] ] ) ) - kb_variants = [item for item in kb_variants if item['kbVariantId'] in unique_kb_variant_ids] + kb_variants = [item for item in kb_variants if item["kbVariantId"] in unique_kb_variant_ids] return { "kbMatches": kb_variants, diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py index 81b0d79..c7e026b 100644 --- a/pori_python/ipr/main.py +++ b/pori_python/ipr/main.py @@ -525,7 +525,7 @@ def ipr_report( # KEY ALTERATIONS key_alterations, variant_counts = create_key_alterations( - gkb_matches, all_variants, kb_matched_sections['kbMatches'] + gkb_matches, all_variants, kb_matched_sections["kbMatches"] ) # OUTPUT CONTENT diff --git a/tests/test_ipr/test_ipr.py b/tests/test_ipr/test_ipr.py index 9994cf4..e7c68df 100644 --- a/tests/test_ipr/test_ipr.py +++ b/tests/test_ipr/test_ipr.py @@ -5,13 +5,13 @@ from pori_python.graphkb import vocab as gkb_vocab from pori_python.ipr.ipr import ( convert_statements_to_alterations, + create_key_alterations, germline_kb_matches, get_kb_disease_matches, get_kb_matched_statements, + get_kb_matches_sections, get_kb_statement_matched_conditions, get_kb_variants, - get_kb_matches_sections, - create_key_alterations, ) from pori_python.types import Statement @@ -497,10 +497,10 @@ def test_germline_kb_matches(self): ] ALL_VARIANTS = [ - {"variant": "var1", "key": '1', "variantType": 'mut'}, - {"variant": "var2", "key": '2', "variantType": 'mut'}, - {"variant": "var3", "key": '3', "variantType": 'mut'}, - {"variant": "var4", "key": '4', "variantType": 'mut'}, + {"variant": "var1", "key": "1", "variantType": "mut"}, + {"variant": "var2", "key": "2", "variantType": "mut"}, + {"variant": "var3", "key": "3", "variantType": "mut"}, + {"variant": "var4", "key": "4", "variantType": "mut"}, ] BASIC_GKB_MATCH = { @@ -709,8 +709,8 @@ def test_partial_matches_omitted(self): gkb_matches = create_gkb_matches(input_fields) sections = get_kb_matches_sections(gkb_matches, allow_partial_matches=False) - stmts = sections['kbMatchedStatements'] - kbcs = sections['kbStatementMatchedConditions'] + stmts = sections["kbMatchedStatements"] + kbcs = sections["kbStatementMatchedConditions"] assert len(stmts) == 2 assert len(kbcs) == 1 # X only assert kbcs[0]["kbStatementId"] == "X" @@ -796,14 +796,14 @@ def test_kbvariants_removed_from_set_when_not_part_of_full_conditionset_match(se item["kbVariant"] = "test" gkb_matches = create_gkb_matches(input_fields) sections1 = get_kb_matches_sections(gkb_matches, allow_partial_matches=False) - kbcs1 = sections1['kbStatementMatchedConditions'] - kbvars1 = sections1['kbMatches'] + kbcs1 = sections1["kbStatementMatchedConditions"] + kbvars1 = sections1["kbMatches"] assert len(kbcs1) == 1 # only fully matched condition sets included assert len(kbvars1) == 2 # therefore, kbvars associated with stmt X are pruned sections2 = get_kb_matches_sections(gkb_matches, allow_partial_matches=True) - kbcs2 = sections2['kbStatementMatchedConditions'] - kbvars2 = sections2['kbMatches'] + kbcs2 = sections2["kbStatementMatchedConditions"] + kbvars2 = sections2["kbMatches"] assert len(kbcs2) == 2 # all condition sets included assert len(kbvars2) == 3 # therefore, no pruning @@ -844,12 +844,12 @@ def test_create_key_alterations_includes_only_pruned_kbmatches(self): sections1 = get_kb_matches_sections(gkb_matches, allow_partial_matches=False) key_alts1, counts1 = create_key_alterations( - gkb_matches, ALL_VARIANTS, sections1['kbMatches'] + gkb_matches, ALL_VARIANTS, sections1["kbMatches"] ) sections2 = get_kb_matches_sections(gkb_matches, allow_partial_matches=True) key_alts2, counts2 = create_key_alterations( - gkb_matches, ALL_VARIANTS, sections2['kbMatches'] + gkb_matches, ALL_VARIANTS, sections2["kbMatches"] ) # check partial-match-only variants are not included in key alterations when From a48b090cb235211541be16c476e1402d002243ad Mon Sep 17 00:00:00 2001 From: Dustin Bleile Date: Wed, 7 Jan 2026 15:16:04 -0800 Subject: [PATCH 18/23] test_genes - remove unused import PREFERRED_GENE_SOURCE_NAME --- tests/test_graphkb/test_genes.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_graphkb/test_genes.py b/tests/test_graphkb/test_genes.py index d5abc65..fd97ffc 100644 --- a/tests/test_graphkb/test_genes.py +++ b/tests/test_graphkb/test_genes.py @@ -7,7 +7,6 @@ from pori_python.graphkb import GraphKBConnection from pori_python.graphkb.genes import ( - PREFERRED_GENE_SOURCE_NAME, get_cancer_genes, get_cancer_predisposition_info, get_gene_information, From 87cd9ff431fc55429dde6ed57487d588dd625874 Mon Sep 17 00:00:00 2001 From: Dustin Bleile Date: Wed, 7 Jan 2026 15:39:26 -0800 Subject: [PATCH 19/23] raise errors if no graphkb url has been set --- pori_python/graphkb/util.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pori_python/graphkb/util.py b/pori_python/graphkb/util.py index 4a4cbff..772cfae 100644 --- a/pori_python/graphkb/util.py +++ b/pori_python/graphkb/util.py @@ -143,6 +143,8 @@ def request(self, endpoint: str, method: str = "GET", **kwargs) -> Dict: Returns: dict: the json response as a python dict """ + if not self.url: + raise ValueError("no GraphKBConnection url set - cannot make a login demo") url = join_url(self.url, endpoint) self.request_count += 1 connect_timeout = 7 @@ -223,7 +225,7 @@ def login_demo(self) -> None: 2. get a second token from the GraphKB API using keyCloakToken; self.login() """ if not self.url: - raise ValueError("no self.url set - cannot make a login demo") + raise ValueError("no GraphKBConnection url set - cannot make a login demo") url_parts = urlsplit(self.url) base_url = f"{url_parts.scheme}://{url_parts.netloc}" @@ -252,8 +254,10 @@ def login(self, username: str, password: str, pori_demo: bool = False) -> None: connect_timeout = 7 read_timeout = 61 - # KBDEV-1328. Alt. GraphKB login for GSC's PORI online demo - if self.url and (pori_demo or "pori-demo" in self.url): + if not self.url: + raise ValueError("no GraphKBConnection url set - cannot login") + elif pori_demo or "pori-demo" in self.url: + # KBDEV-1328. Alt. GraphKB login for GSC's PORI online demo logger.warning("login demo") self.login_demo() From 686511dcf89c1a9bec78419bb0624b371cca6b3f Mon Sep 17 00:00:00 2001 From: Dustin Bleile Date: Wed, 7 Jan 2026 15:51:47 -0800 Subject: [PATCH 20/23] Error message when no IPR_URL defined --- pori_python/ipr/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py index c7e026b..b0b0782 100644 --- a/pori_python/ipr/main.py +++ b/pori_python/ipr/main.py @@ -363,7 +363,7 @@ def ipr_report( if ipr_url: ipr_conn = IprConnection(username, password, ipr_url) else: - logger.warning("No ipr_url given") + logger.error("No ipr_url given with no IPR_URL environment variable") if validate_json: if not ipr_conn: From 2c39230a32783fd1294e1969d22dec0e62da0edc Mon Sep 17 00:00:00 2001 From: Dustin Bleile Date: Wed, 7 Jan 2026 16:11:30 -0800 Subject: [PATCH 21/23] IPR_URL - typing fix - only string --- pori_python/ipr/connection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pori_python/ipr/connection.py b/pori_python/ipr/connection.py index dca4687..da598e3 100644 --- a/pori_python/ipr/connection.py +++ b/pori_python/ipr/connection.py @@ -16,7 +16,7 @@ def __init__( self, username: str, password: str, - url: str = os.environ.get("IPR_URL"), + url: str = os.environ.get("IPR_URL", ""), ): self.token = None self.url = url From 3682a6d7f66c8a8912d38c13e25fec33d1f6c6d0 Mon Sep 17 00:00:00 2001 From: Dustin Bleile Date: Wed, 7 Jan 2026 16:52:44 -0800 Subject: [PATCH 22/23] lint - inputs.py - fix typing warnings --- pori_python/ipr/inputs.py | 2 +- tests/test_ipr/test_inputs.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pori_python/ipr/inputs.py b/pori_python/ipr/inputs.py index e26e7b1..c10296d 100644 --- a/pori_python/ipr/inputs.py +++ b/pori_python/ipr/inputs.py @@ -560,7 +560,7 @@ def preprocess_msi(msi: Any) -> Sequence[Dict]: return [] -def preprocess_hrd(hrd: Any) -> Iterable[Dict]: +def preprocess_hrd(hrd: Any) -> Sequence[Dict]: """ Process hrd input into preformatted signature input. HRD gets mapped to corresponding GraphKB Signature CategoryVariants. diff --git a/tests/test_ipr/test_inputs.py b/tests/test_ipr/test_inputs.py index da9b1dd..d5958a2 100644 --- a/tests/test_ipr/test_inputs.py +++ b/tests/test_ipr/test_inputs.py @@ -50,9 +50,11 @@ "HLA-C*06", } EXPECTED_TMB = {TMB_SIGNATURE} -EXPECTED_MSI = {MSI_MAPPING.get("microsatellite instability")["signatureName"]} +EXPECTED_MSI = {MSI_MAPPING.get("microsatellite instability", {}).get("signatureName", "")} EXPECTED_HRD = { - HRD_MAPPING.get("homologous recombination deficiency strong signature")["signatureName"] + HRD_MAPPING.get("homologous recombination deficiency strong signature", {}).get( + "signatureName", "" + ) } From 9255bf03c704636c91e9525efd13041112072f1a Mon Sep 17 00:00:00 2001 From: Dustin Bleile Date: Thu, 8 Jan 2026 09:05:36 -0800 Subject: [PATCH 23/23] SDEV-5193 - type fix - KbMatchSections['KbMatchedVariants'] -> KbMatchSection['kbMatches'] --- pori_python/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pori_python/types.py b/pori_python/types.py index f95599a..eb3c2ef 100644 --- a/pori_python/types.py +++ b/pori_python/types.py @@ -130,7 +130,7 @@ class KbMatchedStatementConditionSet(TypedDict): class KbMatchSections(TypedDict): kbMatchedStatements: List[KbMatchedStatement] - kbMatchedVariants: List[KbVariantMatch] + kbMatches: List[KbVariantMatch] kbMatchedStatementConditions: List[KbMatchedStatementConditionSet]