From ca8d5f096f6467f7cb5f1622e3b8d2e4ca136119 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Mon, 11 May 2026 15:16:41 -0700 Subject: [PATCH 01/10] Update get_cancer_genes() to fit consensus defenition of cancer gene --- pori_python/graphkb/genes.py | 31 ++++++++++++++++++++++++++----- tests/test_graphkb/test_genes.py | 20 ++++++++++++-------- 2 files changed, 38 insertions(+), 13 deletions(-) diff --git a/pori_python/graphkb/genes.py b/pori_python/graphkb/genes.py index 09da3ed7..f2899ede 100644 --- a/pori_python/graphkb/genes.py +++ b/pori_python/graphkb/genes.py @@ -24,11 +24,11 @@ ) from .match import get_equivalent_features from .util import get_rid, logger, looks_like_rid -from .vocab import get_terms_set +from .vocab import convert_to_rid_list, get_terms_set, query_by_name def _get_tumourigenesis_genes_list( - conn: GraphKBConnection, relevance: str, sources: List[str], ignore_cache: bool = False + conn: GraphKBConnection, relevance: str | List[str], sources: str | List[str], ignore_cache: bool = False ) -> List[Ontology]: statements = cast( List[Statement], @@ -66,7 +66,7 @@ def get_oncokb_oncogenes(conn: GraphKBConnection) -> List[Ontology]: Returns: gene (Feature) records """ - return _get_tumourigenesis_genes_list(conn, ONCOGENE, [ONCOKB_SOURCE_NAME]) + return _get_tumourigenesis_genes_list(conn, ONCOGENE, ONCOKB_SOURCE_NAME) def get_oncokb_tumour_supressors(conn: GraphKBConnection) -> List[Ontology]: @@ -78,20 +78,41 @@ def get_oncokb_tumour_supressors(conn: GraphKBConnection) -> List[Ontology]: Returns: gene (Feature) records """ - return _get_tumourigenesis_genes_list(conn, TUMOUR_SUPPRESSIVE, [ONCOKB_SOURCE_NAME]) + return _get_tumourigenesis_genes_list(conn, TUMOUR_SUPPRESSIVE, ONCOKB_SOURCE_NAME) def get_cancer_genes(conn: GraphKBConnection) -> List[Ontology]: """Get the list of cancer genes stored in GraphKB derived from OncoKB & TSO500. + Cancer genes include oncogenes, tumour supressor genes and other cancer genes. + Args: conn: the graphkb connection object Returns: gene (Feature) records """ + cancer_gene_rid = convert_to_rid_list( + conn.query( + query_by_name('Vocabulary', CANCER_GENE) + ) + ) + associated_terms = conn.post( + '/subgraphs/Vocabulary', + { + 'subgraphType': 'children', + 'base': cancer_gene_rid, + }, + ) + associated_term_names = list( + map( + lambda x: x['name'], + associated_terms['result']['g']['nodes'].values(), + ), + ) + return _get_tumourigenesis_genes_list( - conn, CANCER_GENE, [ONCOKB_SOURCE_NAME, TSO500_SOURCE_NAME] + conn, associated_term_names, [ONCOKB_SOURCE_NAME, TSO500_SOURCE_NAME] ) diff --git a/tests/test_graphkb/test_genes.py b/tests/test_graphkb/test_genes.py index 90efe5d4..f3440f83 100644 --- a/tests/test_graphkb/test_genes.py +++ b/tests/test_graphkb/test_genes.py @@ -27,7 +27,7 @@ CANONICAL_ONCOGENES = ['kras', 'nras', 'alk'] CANONICAL_TS = ['cdkn2a', 'tp53'] -CANONICAL_CG = ['alb'] +CANONICAL_OTHER_CG = ['alb'] CANONICAL_FUSION_GENES = ['alk', 'ewsr1', 'fli1'] CANONICAL_STRUCTURAL_VARIANT_GENES = ['brca1', 'dpyd', 'pten'] CANNONICAL_THERAPY_GENES = ['erbb2', 'brca2', 'egfr'] @@ -119,7 +119,7 @@ def test_oncogene(conn): assert gene in names for gene in CANONICAL_TS: assert gene not in names - for gene in CANONICAL_CG: + for gene in CANONICAL_OTHER_CG: assert gene not in names @@ -131,7 +131,7 @@ def test_tumour_supressors(conn): assert gene in names for gene in CANONICAL_ONCOGENES: assert gene not in names - for gene in CANONICAL_CG: + for gene in CANONICAL_OTHER_CG: assert gene not in names @@ -142,12 +142,12 @@ def test_tumour_supressors(conn): def test_cancer_genes(conn): result = get_cancer_genes(conn) names = {row['name'] for row in result} - for gene in CANONICAL_CG: + for gene in CANONICAL_OTHER_CG: assert gene in names for gene in CANONICAL_TS: - assert gene not in names + assert gene in names for gene in CANONICAL_ONCOGENES: - assert gene not in names + assert gene in names @pytest.mark.skipif( @@ -254,7 +254,7 @@ def test_get_gene_information(conn): conn, CANONICAL_ONCOGENES + CANONICAL_TS - + CANONICAL_CG + + CANONICAL_OTHER_CG + CANONICAL_FUSION_GENES + CANONICAL_STRUCTURAL_VARIANT_GENES + CANNONICAL_THERAPY_GENES @@ -300,7 +300,11 @@ def test_get_gene_information(conn): f'Missed kbStatementRelated {gene}' ) - for gene in CANONICAL_CG: + for gene in ( + CANONICAL_ONCOGENES + + CANONICAL_TS + + CANONICAL_OTHER_CG + ): assert gene in [g['name'] for g in gene_info if g.get('cancerGeneListMatch')], ( f'Missed cancerGeneListMatch {gene}' ) From bca3e1d912dc3bcf5d694d0e5a100e26bfe157bf Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Mon, 11 May 2026 15:23:10 -0700 Subject: [PATCH 02/10] linting --- pori_python/graphkb/genes.py | 11 +++++------ tests/test_graphkb/test_genes.py | 6 +----- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/pori_python/graphkb/genes.py b/pori_python/graphkb/genes.py index f2899ede..4f6de23a 100644 --- a/pori_python/graphkb/genes.py +++ b/pori_python/graphkb/genes.py @@ -28,7 +28,10 @@ def _get_tumourigenesis_genes_list( - conn: GraphKBConnection, relevance: str | List[str], sources: str | List[str], ignore_cache: bool = False + conn: GraphKBConnection, + relevance: str | List[str], + sources: str | List[str], + ignore_cache: bool = False, ) -> List[Ontology]: statements = cast( List[Statement], @@ -92,11 +95,7 @@ def get_cancer_genes(conn: GraphKBConnection) -> List[Ontology]: Returns: gene (Feature) records """ - cancer_gene_rid = convert_to_rid_list( - conn.query( - query_by_name('Vocabulary', CANCER_GENE) - ) - ) + cancer_gene_rid = convert_to_rid_list(conn.query(query_by_name('Vocabulary', CANCER_GENE))) associated_terms = conn.post( '/subgraphs/Vocabulary', { diff --git a/tests/test_graphkb/test_genes.py b/tests/test_graphkb/test_genes.py index f3440f83..5986b2f0 100644 --- a/tests/test_graphkb/test_genes.py +++ b/tests/test_graphkb/test_genes.py @@ -300,11 +300,7 @@ def test_get_gene_information(conn): f'Missed kbStatementRelated {gene}' ) - for gene in ( - CANONICAL_ONCOGENES - + CANONICAL_TS - + CANONICAL_OTHER_CG - ): + for gene in CANONICAL_ONCOGENES + CANONICAL_TS + CANONICAL_OTHER_CG: assert gene in [g['name'] for g in gene_info if g.get('cancerGeneListMatch')], ( f'Missed cancerGeneListMatch {gene}' ) From d6f7c9aef32e0ac6629ac2e5f1669afb3db06b41 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Tue, 12 May 2026 08:30:27 -0700 Subject: [PATCH 03/10] Add tumourigenesis to CANCER_GENE for backward compatibility --- pori_python/graphkb/constants.py | 5 ++++- pori_python/graphkb/genes.py | 6 ++++-- pori_python/graphkb/vocab.py | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/pori_python/graphkb/constants.py b/pori_python/graphkb/constants.py index fe22f4a0..07f686b9 100644 --- a/pori_python/graphkb/constants.py +++ b/pori_python/graphkb/constants.py @@ -59,7 +59,10 @@ TSO500_SOURCE_NAME = 'tso500' ONCOGENE = 'oncogenic' TUMOUR_SUPPRESSIVE = 'tumour suppressive' -CANCER_GENE = 'cancer gene' +CANCER_GENE = [ + 'cancer gene', + 'tumourigenesis', +] # KBDEV-1532. tumourigenesis for backward compatibility FUSION_NAMES = ['structural variant', 'fusion'] GSC_PHARMACOGENOMIC_SOURCE_EXCLUDE_LIST = ['cancer genome interpreter', 'civic'] diff --git a/pori_python/graphkb/genes.py b/pori_python/graphkb/genes.py index 4f6de23a..833536fe 100644 --- a/pori_python/graphkb/genes.py +++ b/pori_python/graphkb/genes.py @@ -95,12 +95,14 @@ def get_cancer_genes(conn: GraphKBConnection) -> List[Ontology]: Returns: gene (Feature) records """ - cancer_gene_rid = convert_to_rid_list(conn.query(query_by_name('Vocabulary', CANCER_GENE))) + cancer_gene_rids = convert_to_rid_list( + conn.query(query_by_name('Vocabulary', CANCER_GENE)), + ) associated_terms = conn.post( '/subgraphs/Vocabulary', { 'subgraphType': 'children', - 'base': cancer_gene_rid, + 'base': cancer_gene_rids, }, ) associated_term_names = list( diff --git a/pori_python/graphkb/vocab.py b/pori_python/graphkb/vocab.py index e9242a7a..2beec5b1 100644 --- a/pori_python/graphkb/vocab.py +++ b/pori_python/graphkb/vocab.py @@ -6,7 +6,7 @@ from .util import convert_to_rid_list -def query_by_name(ontology_class: str, base_term_name: str) -> Dict: +def query_by_name(ontology_class: str, base_term_name: str | list[str]) -> Dict: return {'target': ontology_class, 'filters': {'name': base_term_name}} From 458f88fe042c253a2c3026c1594becc5be2ec8d8 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Tue, 12 May 2026 09:43:17 -0700 Subject: [PATCH 04/10] Use Union in type hint instead of pipe --- pori_python/graphkb/genes.py | 6 +++--- pori_python/graphkb/vocab.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pori_python/graphkb/genes.py b/pori_python/graphkb/genes.py index 833536fe..bdc4b17c 100644 --- a/pori_python/graphkb/genes.py +++ b/pori_python/graphkb/genes.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import Any, Dict, List, Sequence, Set, Tuple, cast +from typing import Any, Dict, List, Sequence, Set, Tuple, cast, Union from typing_extensions import deprecated from pori_python.types import IprGene, Ontology, Record, Statement, Variant @@ -29,8 +29,8 @@ def _get_tumourigenesis_genes_list( conn: GraphKBConnection, - relevance: str | List[str], - sources: str | List[str], + relevance: Union[str, list[str]], + sources: Union[str, list[str]], ignore_cache: bool = False, ) -> List[Ontology]: statements = cast( diff --git a/pori_python/graphkb/vocab.py b/pori_python/graphkb/vocab.py index 2beec5b1..bb96e5f5 100644 --- a/pori_python/graphkb/vocab.py +++ b/pori_python/graphkb/vocab.py @@ -1,4 +1,4 @@ -from typing import Callable, Dict, Iterable, List, Set, cast +from typing import Callable, Dict, Iterable, List, Set, cast, Union from pori_python.types import Ontology @@ -6,7 +6,7 @@ from .util import convert_to_rid_list -def query_by_name(ontology_class: str, base_term_name: str | list[str]) -> Dict: +def query_by_name(ontology_class: str, base_term_name: Union[str, list[str]]) -> Dict: return {'target': ontology_class, 'filters': {'name': base_term_name}} From 9942425176f261f2c3e2ede4f61d142090c083ce Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Wed, 13 May 2026 16:20:06 -0700 Subject: [PATCH 05/10] Add get_related_records() and get_related_terms() to GraphKBConnection --- pori_python/graphkb/util.py | 58 +++++++++++++++++++++++++++++++++ tests/test_graphkb/test_util.py | 29 ++++++++++++++++- 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/pori_python/graphkb/util.py b/pori_python/graphkb/util.py index 075084ff..2508b30b 100644 --- a/pori_python/graphkb/util.py +++ b/pori_python/graphkb/util.py @@ -366,6 +366,64 @@ def version(self) -> Dict[str, str]: """ return self.request('version') + def get_related_records( + self, + base: Union[str, list[str]], + ontology: str, + subgraphType: str, + returnProperties=[], + ): + """ + Given some base node RIDs, an ontology class and a subgraph type, + leverage the subgraphs route to return the list of related nodes. + + Args: + base: the base node RIDs to start the graph traversal from + ontology: the ontology class to traverse + subgraphType: the type of traversal. See options in API specs + returnProperties: additional record properties to return + + Returns: + list of related node record(s) traversed + """ + related = self.post( + uri=f'/subgraphs/{ontology}', + data={ + 'base': base if isinstance(base, list) else [base], + 'subgraphType': subgraphType, + 'returnProperties': returnProperties, + }, + ) + return related['result']['g']['nodes'] + + def get_related_terms( + self, + terms: Union[str, list[str]], + ontology: str = 'Vocabulary', + subgraphType: str = 'similar', + ) -> list[str]: + """ + Given some base term name(s), an ontology class and a subgraph type, + leverage the subgraphs route to return the list of related term name(s) + + Args: + terms: the base term name(s) to start the graph traversal from + ontology: the ontology class to traverse + subgraphType: the type of traversal + + Returns: + list of related term name(s) + """ + rids = convert_to_rid_list(self.query({'target': ontology, 'filters': {'name': terms}})) + nodes = self.get_related_records( + base=rids, + ontology=ontology, + subgraphType=subgraphType, + ) + return list( + map(lambda x: x['name'], nodes.values()), + ) + def get_rid(conn: GraphKBConnection, target: str, name: str) -> str: """ diff --git a/tests/test_graphkb/test_util.py b/tests/test_graphkb/test_util.py index e0173a0f..dbbb2c2b 100644 --- a/tests/test_graphkb/test_util.py +++ b/tests/test_graphkb/test_util.py @@ -152,7 +152,7 @@ def test_stringifyVariant_positional(self, conn, rid, createdAt, stringifiedVari assert util.stringifyVariant(variant=variant, **opt) == stringifiedVariant -class TestVersion: +class TestGraphKBConnection: def test_version(self, conn): version = conn.version assert version['db'] in [ @@ -164,3 +164,30 @@ def test_version(self, conn): assert SEMANTIC_VERSIONING_REGEX.match(version['api']) assert SEMANTIC_VERSIONING_REGEX.match(version['parser']) assert SEMANTIC_VERSIONING_REGEX.match(version['schema']) + + def test_get_related_records(self, conn): + base = util.convert_to_rid_list( + conn.query({'target': 'Vocabulary', 'filters': {'name': 'missense'}}) + ) + records = conn.get_related_records( + base=base, + ontology='Vocabulary', + subgraphType='similar', + returnProperties=['displayName'], + ) + assert 'missense mutation' in list(map(lambda x: x['displayName'], records.values())) + + def test_get_related_terms(self, conn): + # with defaults + vocab_terms = conn.get_related_terms( + terms='missense', + ) + assert 'missense mutation' in vocab_terms + + # overriding ontology & subgraphType defaults + disease_terms = conn.get_related_terms( + terms='all solid tumors', + ontology='Disease', + subgraphType='parents', + ) + assert 'cancer' in disease_terms From f13cf6bfb43bf8e677cae587266bc9d1fb06e21a Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Wed, 13 May 2026 16:23:34 -0700 Subject: [PATCH 06/10] Refactor get_cancer_genes() to use get_related_terms() --- pori_python/graphkb/genes.py | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/pori_python/graphkb/genes.py b/pori_python/graphkb/genes.py index bdc4b17c..4d109818 100644 --- a/pori_python/graphkb/genes.py +++ b/pori_python/graphkb/genes.py @@ -24,9 +24,7 @@ ) from .match import get_equivalent_features from .util import get_rid, logger, looks_like_rid -from .vocab import convert_to_rid_list, get_terms_set, query_by_name - - +from .vocab import get_terms_set def _get_tumourigenesis_genes_list( conn: GraphKBConnection, relevance: Union[str, list[str]], @@ -85,8 +83,8 @@ def get_oncokb_tumour_supressors(conn: GraphKBConnection) -> List[Ontology]: def get_cancer_genes(conn: GraphKBConnection) -> List[Ontology]: - """Get the list of cancer genes stored in GraphKB derived from OncoKB & TSO500. - + """ + Get the list of cancer genes stored in GraphKB derived from OncoKB & TSO500. Cancer genes include oncogenes, tumour supressor genes and other cancer genes. Args: @@ -95,25 +93,12 @@ def get_cancer_genes(conn: GraphKBConnection) -> List[Ontology]: Returns: gene (Feature) records """ - cancer_gene_rids = convert_to_rid_list( - conn.query(query_by_name('Vocabulary', CANCER_GENE)), - ) - associated_terms = conn.post( - '/subgraphs/Vocabulary', - { - 'subgraphType': 'children', - 'base': cancer_gene_rids, - }, + cancer_gene_terms = conn.get_related_terms( + terms=CANCER_GENE, + subgraphType='children', ) - associated_term_names = list( - map( - lambda x: x['name'], - associated_terms['result']['g']['nodes'].values(), - ), - ) - return _get_tumourigenesis_genes_list( - conn, associated_term_names, [ONCOKB_SOURCE_NAME, TSO500_SOURCE_NAME] + conn, cancer_gene_terms, [ONCOKB_SOURCE_NAME, TSO500_SOURCE_NAME] ) From f3a7f53a1719b4ef034ac4acd58fb6d7682a62be Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Wed, 13 May 2026 16:25:19 -0700 Subject: [PATCH 07/10] Add get_cancer_gene_flags() --- pori_python/graphkb/genes.py | 106 +++++++++++++++++++++++++++++++ tests/test_graphkb/test_genes.py | 22 +++++++ 2 files changed, 128 insertions(+) diff --git a/pori_python/graphkb/genes.py b/pori_python/graphkb/genes.py index 4d109818..09b3d9f7 100644 --- a/pori_python/graphkb/genes.py +++ b/pori_python/graphkb/genes.py @@ -25,6 +25,112 @@ from .match import get_equivalent_features from .util import get_rid, logger, looks_like_rid from .vocab import get_terms_set + + +def get_cancer_gene_flags( + conn: GraphKBConnection, + flags: bool = False, + ignore_cache: bool = False, +) -> Union[List, Dict]: + """ + Return all cancer genes, optionally sorted by flags. + + Flag definitions: + oncogenic: relevance 'oncogenic' from OncoKB + tumourSuppressive: relevance 'tumour suppressive' from OncoKB + cancerGene: relevance 'cancer gene' AND child terms ('oncogenic', 'tumour suppressive', 'other cancer gene'), from OncoKB AND TSO500 + + Args: + conn: the graphkb connection object + namesOnly: if only the gene names should be returned + + Returns (if flags=False; default): list of unique gene records + [ , , ... ] + + Returns (if flags=True): dict of flags as keys, and list of gene records as value + { + 'oncogenic': [ , , ... ], + 'tumourSuppressive' = [ , , ... ], + 'cancerGene' = [ , , ... ], + } + """ + # all cancer gene statements + CANCER_GENES = conn.get_related_terms( + terms=CANCER_GENE, + subgraphType='children', + ) + statements = cast( + List[Statement], + conn.query( + { + 'target': 'Statement', + 'filters': { + 'relevance': {'target': 'Vocabulary', 'filters': {'name': CANCER_GENES}} + }, + 'returnProperties': [ + 'source.name', + 'relevance.name', + *[f'subject.{prop}' for prop in GENE_RETURN_PROPERTIES], + ], + }, + ignore_cache=ignore_cache, + ), + ) + + # post-query filtering (faster) + cancerGeneStms = list( + filter( + lambda r: ( + r['subject']['@class'] == 'Feature' + and r['subject']['biotype'] == 'gene' + and r['source']['name'] in [ONCOKB_SOURCE_NAME, TSO500_SOURCE_NAME] + ), + statements, + ) + ) + oncogenicStms = list( + filter( + lambda r: ( + r['relevance']['name'] == ONCOGENE and r['source']['name'] == ONCOKB_SOURCE_NAME + ), + cancerGeneStms, + ) + ) + tumourSuppressiveStms = list( + filter( + lambda r: ( + r['relevance']['name'] == TUMOUR_SUPPRESSIVE + and r['source']['name'] == ONCOKB_SOURCE_NAME + ), + cancerGeneStms, + ) + ) + + # Returning a sorted list of unique gene records, based on iProbe requirements + # Unique by name, sorted by displayName + names = set() # for unique gene names tracking + if not flags: + return cast( + List[Record], + sorted( + [ + r['subject'] + for r in cancerGeneStms + if r['subject']['name'] not in names and not names.add(r['subject']['name']) + ], + key=lambda gene: gene['displayName'], + ), + ) + + # Returning a Dict of flags, with list of associated gene records + # Duplicates are ok + return { + 'cancerGene': [r['subject'] for r in cancerGeneStms], + 'oncogenic': [r['subject'] for r in oncogenicStms], + 'tumourSuppressive': [r['subject'] for r in tumourSuppressiveStms], + } + + def _get_tumourigenesis_genes_list( conn: GraphKBConnection, relevance: Union[str, list[str]], diff --git a/tests/test_graphkb/test_genes.py b/tests/test_graphkb/test_genes.py index 5986b2f0..d53b4e9d 100644 --- a/tests/test_graphkb/test_genes.py +++ b/tests/test_graphkb/test_genes.py @@ -8,6 +8,7 @@ from pori_python.graphkb import GraphKBConnection from pori_python.graphkb.genes import ( get_cancer_genes, + get_cancer_gene_flags, get_cancer_predisposition_info, get_gene_information, get_gene_linked_cancer_predisposition_info, @@ -111,6 +112,27 @@ def conn(): return conn +@pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason='excluding tests that depend on oncokb data') +def test_cancer_gene_flags(conn): + # wo/ flags + result = get_cancer_gene_flags(conn) + for gene in [*CANONICAL_OTHER_CG, *CANONICAL_TS, *CANONICAL_ONCOGENES]: + assert gene in {row['name'] for row in result} + # w/ flags + result = get_cancer_gene_flags(conn, flags=True) + for gene in [*CANONICAL_OTHER_CG, *CANONICAL_TS, *CANONICAL_ONCOGENES]: + assert gene in {row['name'] for row in result['cancerGene']} + for gene in CANONICAL_TS: + assert gene in {row['name'] for row in result['tumourSuppressive']} + assert gene not in {row['name'] for row in result['oncogenic']} + for gene in CANONICAL_ONCOGENES: + assert gene in {row['name'] for row in result['oncogenic']} + assert gene not in {row['name'] for row in result['tumourSuppressive']} + for gene in [*CANONICAL_OTHER_CG]: + assert gene not in {row['name'] for row in result['oncogenic']} + assert gene not in {row['name'] for row in result['tumourSuppressive']} + + @pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason='excluding tests that depend on oncokb data') def test_oncogene(conn): result = get_oncokb_oncogenes(conn) From 27ec42952e29a299b729213ab66ecae2852377b9 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Wed, 13 May 2026 16:26:28 -0700 Subject: [PATCH 08/10] Deprecate _get_tumourigenesis_genes_list(), get_oncokb_oncogenes(), get_oncokb_tumour_supressors() and get_cancer_genes() --- pori_python/graphkb/genes.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pori_python/graphkb/genes.py b/pori_python/graphkb/genes.py index 09b3d9f7..82fd9c7f 100644 --- a/pori_python/graphkb/genes.py +++ b/pori_python/graphkb/genes.py @@ -131,6 +131,7 @@ def get_cancer_gene_flags( } +@deprecated('fuctionality replaced by get_cancer_gene_flags') def _get_tumourigenesis_genes_list( conn: GraphKBConnection, relevance: Union[str, list[str]], @@ -164,6 +165,7 @@ def _get_tumourigenesis_genes_list( return [gene for gene in genes.values()] +@deprecated('fuctionality replaced by get_cancer_gene_flags') def get_oncokb_oncogenes(conn: GraphKBConnection) -> List[Ontology]: """Get the list of oncogenes stored in GraphKB derived from OncoKB. @@ -176,6 +178,7 @@ def get_oncokb_oncogenes(conn: GraphKBConnection) -> List[Ontology]: return _get_tumourigenesis_genes_list(conn, ONCOGENE, ONCOKB_SOURCE_NAME) +@deprecated('fuctionality replaced by get_cancer_gene_flags') def get_oncokb_tumour_supressors(conn: GraphKBConnection) -> List[Ontology]: """Get the list of tumour supressor genes stored in GraphKB derived from OncoKB. @@ -188,6 +191,7 @@ def get_oncokb_tumour_supressors(conn: GraphKBConnection) -> List[Ontology]: return _get_tumourigenesis_genes_list(conn, TUMOUR_SUPPRESSIVE, ONCOKB_SOURCE_NAME) +@deprecated('fuctionality replaced by get_cancer_gene_flags') def get_cancer_genes(conn: GraphKBConnection) -> List[Ontology]: """ Get the list of cancer genes stored in GraphKB derived from OncoKB & TSO500. From 22f5b412697f2975fdd542951ee8c6b6c2bcb2bc Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Wed, 13 May 2026 16:27:19 -0700 Subject: [PATCH 09/10] Update get_gene_information() to use get_cancer_gene_flags() --- pori_python/graphkb/genes.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pori_python/graphkb/genes.py b/pori_python/graphkb/genes.py index 82fd9c7f..63d14a9b 100644 --- a/pori_python/graphkb/genes.py +++ b/pori_python/graphkb/genes.py @@ -630,12 +630,12 @@ def get_gene_information( # PositionalVariant without a reference2 implies a smallMutation type gene_flags['knownSmallMutation'].add(condition['reference1']) # type: ignore - logger.info('fetching oncogenes list') - gene_flags['oncogene'] = convert_to_rid_set(get_oncokb_oncogenes(graphkb_conn)) - logger.info('fetching tumour supressors list') - gene_flags['tumourSuppressor'] = convert_to_rid_set(get_oncokb_tumour_supressors(graphkb_conn)) - logger.info('fetching cancerGeneListMatch list') - gene_flags['cancerGeneListMatch'] = convert_to_rid_set(get_cancer_genes(graphkb_conn)) + # cancer gene flags + logger.info('fetching cancer genes') + cancer_gene_flags = get_cancer_gene_flags(graphkb_conn, flags=True) + gene_flags['oncogene'] = convert_to_rid_set(cancer_gene_flags['oncogenic']) + gene_flags['tumourSuppressor'] = convert_to_rid_set(cancer_gene_flags['tumourSuppressive']) + gene_flags['cancerGeneListMatch'] = convert_to_rid_set(cancer_gene_flags['cancerGene']) logger.info('fetching therapeutic associated genes lists') gene_flags['therapeuticAssociated'] = convert_to_rid_set( From 2e56ea8e36b1cbb6c1a33b68ef4113040c7207d0 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Wed, 13 May 2026 16:29:47 -0700 Subject: [PATCH 10/10] Add equivalent gene name caching to get_gene_information() --- pori_python/graphkb/genes.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pori_python/graphkb/genes.py b/pori_python/graphkb/genes.py index 63d14a9b..6c62e601 100644 --- a/pori_python/graphkb/genes.py +++ b/pori_python/graphkb/genes.py @@ -644,8 +644,14 @@ def get_gene_information( logger.info(f'Setting gene_info flags on {len(gene_names)} genes') result: List[IprGene] = [] + EQUIVALENT_CACHE = {} for gene_name in gene_names: - equivalent = convert_to_rid_set(get_equivalent_features(graphkb_conn, gene_name)) + if gene_name not in EQUIVALENT_CACHE: + EQUIVALENT_CACHE[gene_name] = convert_to_rid_set( + get_equivalent_features(graphkb_conn, gene_name) + ) + equivalent = EQUIVALENT_CACHE[gene_name] + row: Dict[str, str | bool] = {'name': gene_name} flagged = False for flag in gene_flags: