diff --git a/pori_python/graphkb/match.py b/pori_python/graphkb/match.py index 0c79138..6615ff4 100644 --- a/pori_python/graphkb/match.py +++ b/pori_python/graphkb/match.py @@ -315,15 +315,15 @@ def equivalent_types( # Convert rid to displayName if needed if looks_like_rid(type1): - type1 = conn.get_records_by_id([type1])[0]["displayName"] + type1 = str(conn.get_records_by_id([type1])[0].get("displayName", type1)) if looks_like_rid(type2): - type2 = conn.get_records_by_id([type2])[0]["displayName"] + type2 = str(conn.get_records_by_id([type2])[0].get("displayName", type2)) # Get type terms from observed variant - terms1 = [] + terms1 = set() if strict: try: - terms1.append(get_term_by_name(conn, type1)["@rid"]) + terms1.add(get_term_by_name(conn, type1)["@rid"]) except Exception: pass else: diff --git a/pori_python/graphkb/util.py b/pori_python/graphkb/util.py index bbc6776..772cfae 100644 --- a/pori_python/graphkb/util.py +++ b/pori_python/graphkb/util.py @@ -98,7 +98,7 @@ def cache_key(request_body) -> str: class GraphKBConnection: def __init__( self, - url: str = os.environ.get("GRAPHKB_URL"), + url: str = os.environ.get("GRAPHKB_URL", ""), username: str = "", password: str = "", use_global_cache: bool = True, @@ -143,6 +143,8 @@ def request(self, endpoint: str, method: str = "GET", **kwargs) -> Dict: Returns: dict: the json response as a python dict """ + if not self.url: + raise ValueError("no GraphKBConnection url set - cannot make a login demo") url = join_url(self.url, endpoint) self.request_count += 1 connect_timeout = 7 @@ -222,6 +224,8 @@ def login_demo(self) -> None: 1. get a first token from KeyCloak using username and password; self.login_demo() 2. get a second token from the GraphKB API using keyCloakToken; self.login() """ + if not self.url: + raise ValueError("no GraphKBConnection url set - cannot make a login demo") url_parts = urlsplit(self.url) base_url = f"{url_parts.scheme}://{url_parts.netloc}" @@ -250,8 +254,11 @@ def login(self, username: str, password: str, pori_demo: bool = False) -> None: connect_timeout = 7 read_timeout = 61 - # KBDEV-1328. Alt. GraphKB login for GSC's PORI online demo - if pori_demo or "pori-demo" in self.url: + if not self.url: + raise ValueError("no GraphKBConnection url set - cannot login") + elif pori_demo or "pori-demo" in self.url: + # KBDEV-1328. Alt. GraphKB login for GSC's PORI online demo + logger.warning("login demo") self.login_demo() # use requests package directly to avoid recursion loop on login failure diff --git a/pori_python/ipr/annotate.py b/pori_python/ipr/annotate.py index 72ae762..03a8ec2 100644 --- a/pori_python/ipr/annotate.py +++ b/pori_python/ipr/annotate.py @@ -103,7 +103,7 @@ def annotate_expression_variants( disease_matches: List[str], variants: List[IprExprVariant], show_progress: bool = False, -) -> List[KbMatch]: +) -> List[Hashabledict]: """Annotate expression variants with GraphKB in the IPR alterations format. Args: @@ -113,10 +113,10 @@ def annotate_expression_variants( show_progress (bool): Progressbar displayed for long runs. Returns: - list of kbMatches records for IPR + list of Hashabledict records for IPR """ skipped = 0 - alterations = [] + alterations: List[Hashabledict] = [] problem_genes = set() logger.info(f"Starting annotation of {len(variants)} expression category_variants") iterfunc = tqdm if show_progress else iter @@ -133,7 +133,7 @@ def annotate_expression_variants( for ipr_row in get_ipr_statements_from_variants(graphkb_conn, matches, disease_matches): ipr_row["variant"] = row["key"] ipr_row["variantType"] = row.get("variantType", "exp") - alterations.append(ipr_row) + alterations.append(Hashabledict(ipr_row)) except FeatureNotFoundError as err: problem_genes.add(gene) logger.debug(f"Unrecognized gene ({gene} {variant}): {err}") @@ -156,7 +156,7 @@ def annotate_copy_variants( disease_matches: List[str], variants: List[IprCopyVariant], show_progress: bool = False, -) -> List[KbMatch]: +) -> List[Hashabledict]: """Annotate allowed copy variants with GraphKB in the IPR alterations format. Args: @@ -169,7 +169,7 @@ def annotate_copy_variants( list of kbMatches records for IPR """ skipped = 0 - alterations = [] + alterations: List[Hashabledict] = [] problem_genes = set() logger.info(f"Starting annotation of {len(variants)} copy category_variants") @@ -188,7 +188,7 @@ def annotate_copy_variants( for ipr_row in get_ipr_statements_from_variants(graphkb_conn, matches, disease_matches): ipr_row["variant"] = row["key"] ipr_row["variantType"] = row.get("variantType", "cnv") - alterations.append(ipr_row) + alterations.append(Hashabledict(ipr_row)) except FeatureNotFoundError as err: problem_genes.add(gene) logger.debug(f"Unrecognized gene ({gene} {variant}): {err}") @@ -213,7 +213,7 @@ def annotate_positional_variants( variants: Sequence[IprStructuralVariant] | Sequence[Hashabledict], disease_matches: List[str], show_progress: bool = False, -) -> List[Hashabledict]: +) -> Sequence[Hashabledict]: """Annotate SNP, INDEL or fusion variant calls with GraphKB and return in IPR match format. Hashable type is required to turn lists into sets. @@ -239,10 +239,11 @@ def annotate_positional_variants( continue for var_key in VARIANT_KEYS: - variant = row.get(var_key) + variant = row.get(var_key, "") matches = [] if not variant or isnull(variant): continue + variant = str(variant) try: try: matches = gkb_match.match_positional_variant(graphkb_conn, variant) @@ -277,15 +278,15 @@ def annotate_positional_variants( except FeatureNotFoundError as err: logger.debug(f"failed to match positional variants ({variant}): {err}") errors += 1 - if "gene" in row: - problem_genes.add(row["gene"]) - elif "gene1" in row and f"({row['gene1']})" in str(err): - problem_genes.add(row["gene1"]) - elif "gene2" in row and f"({row['gene2']})" in str(err): - problem_genes.add(row["gene2"]) - elif "gene1" in row and "gene2" in row: - problem_genes.add(row["gene1"]) - problem_genes.add(row["gene2"]) + if row.get("gene"): + problem_genes.add(row["gene"]) # type: ignore + elif row.get("gene1") and f"({row['gene1']})" in str(err): # type: ignore + problem_genes.add(row["gene1"]) # type: ignore + elif row.get("gene2") and f"({row['gene2']})" in str(err): # type: ignore + problem_genes.add(row["gene2"]) # type: ignore + elif row.get("gene1") and row.get("gene2"): # type: ignore + problem_genes.add(row["gene1"]) # type: ignore + problem_genes.add(row["gene2"]) # type: ignore else: raise err except HTTPError as err: @@ -314,7 +315,7 @@ def annotate_signature_variants( disease_matches: List[str], variants: List[IprSignatureVariant] = [], show_progress: bool = False, -) -> List[KbMatch]: +) -> List[Hashabledict]: """Annotate Signature variants with GraphKB in the IPR alterations format. Match to corresponding GraphKB Variants, then to linked GraphKB Statements @@ -433,27 +434,21 @@ def annotate_variants( # MATCHING COPY VARIANTS logger.info(f"annotating {len(copy_variants)} copy variants") gkb_matches.extend( - [ - Hashabledict(copy_var) - for copy_var in annotate_copy_variants( - graphkb_conn, disease_matches, copy_variants, show_progress=interactive - ) - ] + annotate_copy_variants( + graphkb_conn, disease_matches, copy_variants, show_progress=interactive + ) ) logger.debug(f"\tgkb_matches: {len(gkb_matches)}") # MATCHING EXPRESSION VARIANTS logger.info(f"annotating {len(expression_variants)} expression variants") gkb_matches.extend( - [ - Hashabledict(exp_var) - for exp_var in annotate_expression_variants( - graphkb_conn, - disease_matches, - expression_variants, - show_progress=interactive, - ) - ] + annotate_expression_variants( + graphkb_conn, + disease_matches, + expression_variants, + show_progress=interactive, + ) ) logger.debug(f"\tgkb_matches: {len(gkb_matches)}") diff --git a/pori_python/ipr/connection.py b/pori_python/ipr/connection.py index dca4687..da598e3 100644 --- a/pori_python/ipr/connection.py +++ b/pori_python/ipr/connection.py @@ -16,7 +16,7 @@ def __init__( self, username: str, password: str, - url: str = os.environ.get("IPR_URL"), + url: str = os.environ.get("IPR_URL", ""), ): self.token = None self.url = url diff --git a/pori_python/ipr/inputs.py b/pori_python/ipr/inputs.py index 3cfe526..c10296d 100644 --- a/pori_python/ipr/inputs.py +++ b/pori_python/ipr/inputs.py @@ -11,7 +11,7 @@ import re from Bio.Data.IUPACData import protein_letters_3to1 from numpy import nan -from typing import Any, Callable, Dict, Iterable, List, Set, Tuple, cast +from typing import Any, Callable, Dict, Iterable, List, Sequence, Set, Tuple, cast from pori_python.graphkb.match import INPUT_COPY_CATEGORIES, INPUT_EXPRESSION_CATEGORIES from pori_python.types import ( @@ -26,8 +26,8 @@ from .constants import ( COSMIC_SIGNATURE_VARIANT_TYPE, HLA_SIGNATURE_VARIANT_TYPE, - MSI_MAPPING, HRD_MAPPING, + MSI_MAPPING, TMB_SIGNATURE, TMB_SIGNATURE_VARIANT_TYPE, ) @@ -248,20 +248,23 @@ def row_key(row: Dict) -> Tuple[str, ...]: row["cnvState"] = display_name_mapping[kb_cat] row["variant"] = kb_cat row["variantType"] = "cnv" - chrband = row.get("chromosomeBand", False) - chrom = row.pop("chromosome", False) - if not chrom: - chrom = row.pop("chr", False) - # remove chr if it was not used for chrom - row.pop("chr", False) - if chrom: + + # Find chromosome and remove chromosome values + chrom = "" + if "chromosome" in row: + chrom = str(row.pop("chromosome", "")) or chrom # type: ignore + if "chr" in row: + chrom = str(row.pop("chr", "")) or chrom # type: ignore + + # Include chromosome in chromosomeBand + chrband = row.get("chromosomeBand", "") + + if chrom and chrband: # check that chr isn't already in the chrband; # this regex from https://vrs.ga4gh.org/en/1.2/terms_and_model.html#id25 - if chrband and (re.match(r"^cen|[pq](ter|([1-9][0-9]*(\.[1-9][0-9]*)?))$", chrband)): - if isinstance(chrom, int): - chrom = str(chrom) + if re.match(r"^cen|[pq](ter|([1-9][0-9]*(\.[1-9][0-9]*)?))$", chrband): chrom = chrom.strip("chr") - row["chromosomeBand"] = chrom + row["chromosomeBand"] + row["chromosomeBand"] = chrom + chrband return ret_list @@ -441,7 +444,7 @@ def row_key(row: Dict) -> Tuple[str, ...]: return result -def preprocess_cosmic(rows: Iterable[Dict]) -> Iterable[Dict]: +def preprocess_cosmic(rows: Iterable[Dict]) -> Sequence[Dict]: """ Process cosmic inputs into preformatted signature inputs Note: Cosmic and dMMR already evaluated against thresholds in gsc_report @@ -456,7 +459,7 @@ def preprocess_cosmic(rows: Iterable[Dict]) -> Iterable[Dict]: ] -def preprocess_hla(rows: Iterable[Dict]) -> Iterable[Dict]: +def preprocess_hla(rows: Iterable[Dict]) -> Sequence[Dict]: """ Process hla inputs into preformatted signature inputs """ @@ -480,7 +483,7 @@ def preprocess_hla(rows: Iterable[Dict]) -> Iterable[Dict]: def preprocess_tmb( tmb_high: float, tmburMutationBurden: Dict = {}, genomeTmb: float | str = "" -) -> Iterable[Dict]: +) -> Sequence[Dict]: """ Process tumour mutation burden (tmb) input(s) into preformatted signature input. Get compared to threshold; signature CategoryVariant created only if threshold met. @@ -530,7 +533,7 @@ def preprocess_tmb( return [] -def preprocess_msi(msi: Any) -> Iterable[Dict]: +def preprocess_msi(msi: Any) -> Sequence[Dict]: """ Process micro-satellite input into preformatted signature input. Both msi & mss gets mapped to corresponding GraphKB Signature CategoryVariants. @@ -557,7 +560,7 @@ def preprocess_msi(msi: Any) -> Iterable[Dict]: return [] -def preprocess_hrd(hrd: Any) -> Iterable[Dict]: +def preprocess_hrd(hrd: Any) -> Sequence[Dict]: """ Process hrd input into preformatted signature input. HRD gets mapped to corresponding GraphKB Signature CategoryVariants. diff --git a/pori_python/ipr/ipr.py b/pori_python/ipr/ipr.py index d606de2..6953163 100644 --- a/pori_python/ipr/ipr.py +++ b/pori_python/ipr/ipr.py @@ -8,7 +8,7 @@ import uuid from copy import copy from itertools import product -from typing import Dict, Iterable, List, Optional, Sequence, Set, Tuple, cast +from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple, cast from pori_python.graphkb import GraphKBConnection from pori_python.graphkb import statement as gkb_statement @@ -168,8 +168,7 @@ def convert_statements_to_alterations( diseases = [c for c in statement["conditions"] if c["@class"] == "Disease"] disease_match = len(diseases) == 1 and diseases[0]["@rid"] in disease_matches reference = ";".join([e["displayName"] for e in statement["evidence"]]) - - if statement['relevance']['name'] == 'eligibility': + if statement["relevance"]["name"] == "eligibility": reference = ";".join([e["sourceId"] for e in statement["evidence"]]) ipr_section = gkb_statement.categorize_relevance( @@ -269,7 +268,7 @@ def select_expression_plots( def create_key_alterations( - kb_matches: List[Hashabledict], + kb_matches: Sequence[KbMatch] | Sequence[Hashabledict], all_variants: Sequence[IprVariant], included_kb_matches: List[KbVariantMatch], ) -> Tuple[List[Dict], Dict]: @@ -296,10 +295,10 @@ def create_key_alterations( counts: Dict[str, Set] = {v: set() for v in type_mapping.values()} skipped_variant_types = [] - included_kbvariant_ids = list(set([item['kbVariantId'] for item in included_kb_matches])) + included_kbvariant_ids = list(set([item["kbVariantId"] for item in included_kb_matches])) for kb_match in kb_matches: - if kb_match['kbVariantId'] not in included_kbvariant_ids: + if kb_match["kbVariantId"] not in included_kbvariant_ids: continue variant_type = kb_match["variantType"] variant_key = kb_match["variant"] @@ -347,7 +346,7 @@ def create_key_alterations( def germline_kb_matches( - kb_matches: List[Hashabledict], + kb_matches: List[KbMatch] | List[Hashabledict], all_variants: Sequence[IprVariant], assume_somatic: bool = True, ) -> List[Hashabledict]: @@ -365,9 +364,9 @@ def germline_kb_matches( Returns: filtered list of kb_matches """ - ret_list = [] - germ_alts = [alt for alt in kb_matches if alt["category"] in GERMLINE_BASE_TERMS] - somatic_alts = [alt for alt in kb_matches if alt not in germ_alts] + ret_list: List[Hashabledict] = [] + germ_alts = [Hashabledict(alt) for alt in kb_matches if alt["category"] in GERMLINE_BASE_TERMS] + somatic_alts = [Hashabledict(alt) for alt in kb_matches if alt not in germ_alts] if germ_alts: logger.info(f"checking germline status of {GERMLINE_BASE_TERMS}") for alt in germ_alts: @@ -460,7 +459,7 @@ def multi_variant_filtering( statements = res["result"] # Get set of excluded Vocabulary RIDs for variant types - excluded = {} + excluded = set() if len(excludedTypes) != 0 and excludedTypes[0] != "": excluded = gkb_vocab.get_terms_set(graphkb_conn, excludedTypes) @@ -529,7 +528,8 @@ def get_kb_matched_statements( for item in gkb_matches: stmt = copy(item) stmt["requiredKbMatches"].sort() - kbs = KbMatchedStatement({key: val for (key, val) in stmt.items() if key in kbs_keys}) + kbs_dict = {key: val for (key, val) in stmt.items() if key in kbs_keys} + kbs = cast(KbMatchedStatement, kbs_dict) dict_key = str(kbs) kbMatchedStatements[dict_key] = kbs return [*kbMatchedStatements.values()] @@ -588,7 +588,7 @@ def get_kb_statement_matched_conditions( for kbStmt in kbMatchedStatements: stmts = [item for item in gkb_matches if item["kbStatementId"] == kbStmt["kbStatementId"]] - requirements = {} + requirements: Dict[str, str | Any] = {} for requirement in stmts[0]["requiredKbMatches"]: if not requirements.get(requirement, False): # only use explicit variant/statement links @@ -621,7 +621,7 @@ def get_kb_statement_matched_conditions( ) kbmc = KbMatchedStatementConditionSet( { - "kbStatementId": conditionSet["kbStatementId"], + "kbStatementId": str(conditionSet.get("kbStatementId", "")), "matchedConditions": matchedConditions, } ) @@ -647,19 +647,20 @@ def get_kb_matches_sections( unique_kb_variant_ids = list( set( [ - item['kbVariantId'] + item["kbVariantId"] for conditionSet in kb_statement_matched_conditions - for item in conditionSet['matchedConditions'] + for item in conditionSet["matchedConditions"] ] ) ) - kb_variants = [item for item in kb_variants if item['kbVariantId'] in unique_kb_variant_ids] + kb_variants = [item for item in kb_variants if item["kbVariantId"] in unique_kb_variant_ids] - return { + ret_dict = { "kbMatches": kb_variants, "kbMatchedStatements": kb_matched_statements, "kbStatementMatchedConditions": kb_statement_matched_conditions, } + return cast(KbMatchSections, ret_dict) def get_kb_disease_matches( diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py index 3194779..930bad0 100644 --- a/pori_python/ipr/main.py +++ b/pori_python/ipr/main.py @@ -31,8 +31,8 @@ preprocess_cosmic, preprocess_expression_variants, preprocess_hla, - preprocess_msi, preprocess_hrd, + preprocess_msi, preprocess_signature_variants, preprocess_small_mutations, preprocess_structural_variants, @@ -294,7 +294,7 @@ def ipr_report( username: str, password: str, content: Dict, - ipr_url: str = '', + ipr_url: str = "", log_level: str = "info", output_json_path: str = "", always_write_output_json: bool = False, @@ -363,17 +363,17 @@ def ipr_report( if ipr_url: ipr_conn = IprConnection(username, password, ipr_url) else: - logger.warning("No ipr_url given") + logger.error("No ipr_url given with no IPR_URL environment variable") if validate_json: if not ipr_conn: - raise ValueError("ipr_url required to validate json") + raise ValueError("ipr_url required to validate_json") ipr_result = ipr_conn.validate_json(content) return ipr_result if upload_json: if not ipr_conn: - raise ValueError("ipr_url required to upload json") + raise ValueError("ipr_url required to upload_json") ipr_result = ipr_conn.upload_report( content, mins_to_wait, async_upload, ignore_extra_fields ) @@ -419,14 +419,15 @@ def ipr_report( ) # GKB CONNECTION + gkb_user = graphkb_username if graphkb_username else username + gkb_pass = graphkb_password if graphkb_password else password + graphkb_url = graphkb_url if graphkb_url else os.environ.get("GRAPHKB_URL", "") if graphkb_url: logger.info(f"connecting to graphkb: {graphkb_url}") graphkb_conn = GraphKBConnection(graphkb_url) else: - graphkb_conn = GraphKBConnection() - - gkb_user = graphkb_username if graphkb_username else username - gkb_pass = graphkb_password if graphkb_password else password + # graphkb_conn = GraphKBConnection() # This will just error on trying to login + raise ValueError("graphkb_url is required") graphkb_conn.login(gkb_user, gkb_pass) @@ -464,16 +465,14 @@ def ipr_report( if match_germline: # verify germline kb statements matched germline observed variants, not somatic variants org_len = len(gkb_matches) - gkb_matches = [ - Hashabledict(match) for match in germline_kb_matches(gkb_matches, all_variants) - ] + gkb_matches = germline_kb_matches(gkb_matches, all_variants) num_removed = org_len - len(gkb_matches) if num_removed: logger.info(f"Removing {num_removed} germline events without medical matches.") if custom_kb_match_filter: logger.info(f"custom_kb_match_filter on {len(gkb_matches)} variants") - gkb_matches = [Hashabledict(match) for match in custom_kb_match_filter(gkb_matches)] + gkb_matches = custom_kb_match_filter(gkb_matches) logger.info(f"\t custom_kb_match_filter left {len(gkb_matches)} variants") # GENE INFORMATION @@ -502,7 +501,7 @@ def ipr_report( if include_ipr_variant_text: if not ipr_conn: - raise ValueError("ipr_url required to to include ipr variant text") + raise ValueError("ipr_url required to include_ipr_variant_text") ipr_comments = get_ipr_analyst_comments( ipr_conn, gkb_matches, @@ -524,7 +523,7 @@ def ipr_report( # KEY ALTERATIONS key_alterations, variant_counts = create_key_alterations( - gkb_matches, all_variants, kb_matched_sections['kbMatches'] + gkb_matches, all_variants, kb_matched_sections["kbMatches"] ) # OUTPUT CONTENT @@ -563,10 +562,10 @@ def ipr_report( # if input includes hrdScore field, that is ok to pass to db # but prefer the 'hrd' field if it exists - if output.get('hrd'): - if output.get('hrd').get('score'): - output['hrdScore'] = output['hrd']['score'] - output.pop('hrd') # kbmatches have already been made + if output.get("hrd"): + if output.get("hrd").get("score"): + output["hrdScore"] = output["hrd"]["score"] + output.pop("hrd") # kbmatches have already been made ipr_result = {} upload_error = None diff --git a/pori_python/types.py b/pori_python/types.py index faec8f2..eb3c2ef 100644 --- a/pori_python/types.py +++ b/pori_python/types.py @@ -66,6 +66,11 @@ class Statement(Record): displayNameTemplate: str +class Hashabledict(dict): + def __hash__(self): + return hash(frozenset(self)) + + class KbMatch(TypedDict): variant: str variantType: str @@ -91,7 +96,8 @@ class KbMatch(TypedDict): class KbVariantMatch(TypedDict): - variantKey: str + variant: str + # variantKey: str variantType: str kbVariant: str kbVariantId: str @@ -124,15 +130,10 @@ class KbMatchedStatementConditionSet(TypedDict): class KbMatchSections(TypedDict): kbMatchedStatements: List[KbMatchedStatement] - kbMatchedVariants: List[KbVariantMatch] + kbMatches: List[KbVariantMatch] kbMatchedStatementConditions: List[KbMatchedStatementConditionSet] -class Hashabledict(dict): - def __hash__(self): - return hash(frozenset(self)) - - class IprVariantBase(TypedDict): """Required properties of all variants for IPR.""" @@ -160,6 +161,7 @@ class IprCopyVariant(IprGeneVariant): # variantType == 'cnv' kbCategory: str cnvState: str + chromosomeBand: str class IprExprVariant(IprGeneVariant): diff --git a/tests/test_ipr/test_inputs.py b/tests/test_ipr/test_inputs.py index e99d170..d5958a2 100644 --- a/tests/test_ipr/test_inputs.py +++ b/tests/test_ipr/test_inputs.py @@ -7,8 +7,8 @@ from pori_python.graphkb.match import INPUT_COPY_CATEGORIES from pori_python.ipr.constants import ( - MSI_MAPPING, HRD_MAPPING, + MSI_MAPPING, TMB_SIGNATURE, TMB_SIGNATURE_HIGH_THRESHOLD, ) @@ -21,8 +21,8 @@ preprocess_cosmic, preprocess_expression_variants, preprocess_hla, - preprocess_msi, preprocess_hrd, + preprocess_msi, preprocess_signature_variants, preprocess_small_mutations, preprocess_structural_variants, @@ -50,9 +50,11 @@ "HLA-C*06", } EXPECTED_TMB = {TMB_SIGNATURE} -EXPECTED_MSI = {MSI_MAPPING.get("microsatellite instability")["signatureName"]} +EXPECTED_MSI = {MSI_MAPPING.get("microsatellite instability", {}).get("signatureName", "")} EXPECTED_HRD = { - HRD_MAPPING.get("homologous recombination deficiency strong signature")["signatureName"] + HRD_MAPPING.get("homologous recombination deficiency strong signature", {}).get( + "signatureName", "" + ) } diff --git a/tests/test_ipr/test_ipr.py b/tests/test_ipr/test_ipr.py index 9994cf4..e7c68df 100644 --- a/tests/test_ipr/test_ipr.py +++ b/tests/test_ipr/test_ipr.py @@ -5,13 +5,13 @@ from pori_python.graphkb import vocab as gkb_vocab from pori_python.ipr.ipr import ( convert_statements_to_alterations, + create_key_alterations, germline_kb_matches, get_kb_disease_matches, get_kb_matched_statements, + get_kb_matches_sections, get_kb_statement_matched_conditions, get_kb_variants, - get_kb_matches_sections, - create_key_alterations, ) from pori_python.types import Statement @@ -497,10 +497,10 @@ def test_germline_kb_matches(self): ] ALL_VARIANTS = [ - {"variant": "var1", "key": '1', "variantType": 'mut'}, - {"variant": "var2", "key": '2', "variantType": 'mut'}, - {"variant": "var3", "key": '3', "variantType": 'mut'}, - {"variant": "var4", "key": '4', "variantType": 'mut'}, + {"variant": "var1", "key": "1", "variantType": "mut"}, + {"variant": "var2", "key": "2", "variantType": "mut"}, + {"variant": "var3", "key": "3", "variantType": "mut"}, + {"variant": "var4", "key": "4", "variantType": "mut"}, ] BASIC_GKB_MATCH = { @@ -709,8 +709,8 @@ def test_partial_matches_omitted(self): gkb_matches = create_gkb_matches(input_fields) sections = get_kb_matches_sections(gkb_matches, allow_partial_matches=False) - stmts = sections['kbMatchedStatements'] - kbcs = sections['kbStatementMatchedConditions'] + stmts = sections["kbMatchedStatements"] + kbcs = sections["kbStatementMatchedConditions"] assert len(stmts) == 2 assert len(kbcs) == 1 # X only assert kbcs[0]["kbStatementId"] == "X" @@ -796,14 +796,14 @@ def test_kbvariants_removed_from_set_when_not_part_of_full_conditionset_match(se item["kbVariant"] = "test" gkb_matches = create_gkb_matches(input_fields) sections1 = get_kb_matches_sections(gkb_matches, allow_partial_matches=False) - kbcs1 = sections1['kbStatementMatchedConditions'] - kbvars1 = sections1['kbMatches'] + kbcs1 = sections1["kbStatementMatchedConditions"] + kbvars1 = sections1["kbMatches"] assert len(kbcs1) == 1 # only fully matched condition sets included assert len(kbvars1) == 2 # therefore, kbvars associated with stmt X are pruned sections2 = get_kb_matches_sections(gkb_matches, allow_partial_matches=True) - kbcs2 = sections2['kbStatementMatchedConditions'] - kbvars2 = sections2['kbMatches'] + kbcs2 = sections2["kbStatementMatchedConditions"] + kbvars2 = sections2["kbMatches"] assert len(kbcs2) == 2 # all condition sets included assert len(kbvars2) == 3 # therefore, no pruning @@ -844,12 +844,12 @@ def test_create_key_alterations_includes_only_pruned_kbmatches(self): sections1 = get_kb_matches_sections(gkb_matches, allow_partial_matches=False) key_alts1, counts1 = create_key_alterations( - gkb_matches, ALL_VARIANTS, sections1['kbMatches'] + gkb_matches, ALL_VARIANTS, sections1["kbMatches"] ) sections2 = get_kb_matches_sections(gkb_matches, allow_partial_matches=True) key_alts2, counts2 = create_key_alterations( - gkb_matches, ALL_VARIANTS, sections2['kbMatches'] + gkb_matches, ALL_VARIANTS, sections2["kbMatches"] ) # check partial-match-only variants are not included in key alterations when