Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion pori_python/graphkb/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,10 @@
TSO500_SOURCE_NAME = 'tso500'
ONCOGENE = 'oncogenic'
TUMOUR_SUPPRESSIVE = 'tumour suppressive'
CANCER_GENE = 'cancer gene'
CANCER_GENE = [
'cancer gene',
'tumourigenesis',
] # KBDEV-1532. tumourigenesis for backward compatibility
FUSION_NAMES = ['structural variant', 'fusion']

GSC_PHARMACOGENOMIC_SOURCE_EXCLUDE_LIST = ['cancer genome interpreter', 'civic']
Expand Down
149 changes: 136 additions & 13 deletions pori_python/graphkb/genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from __future__ import annotations

from typing import Any, Dict, List, Sequence, Set, Tuple, cast
from typing import Any, Dict, List, Sequence, Set, Tuple, cast, Union
from typing_extensions import deprecated

from pori_python.types import IprGene, Ontology, Record, Statement, Variant
Expand All @@ -27,8 +27,116 @@
from .vocab import get_terms_set


def get_cancer_gene_flags(
conn: GraphKBConnection,
flags: bool = False,
ignore_cache: bool = False,
) -> Union[List, Dict]:
"""
Return all cancer genes, optionally sorted by flags.

Flag definitions:
oncogenic: relevance 'oncogenic' from OncoKB
tumourSuppressive: relevance 'tumour suppressive' from OncoKB
cancerGene: relevance 'cancer gene' AND child terms ('oncogenic', 'tumour suppressive', 'other cancer gene'), from OncoKB AND TSO500

Args:
conn: the graphkb connection object
namesOnly: if only the gene names should be returned

Returns (if flags=False; default): list of unique gene records
[ <record>, <record>, ... ]

Returns (if flags=True): dict of flags as keys, and list of gene records as value
{
'oncogenic': [ <record>, <record>, ... ],
'tumourSuppressive' = [ <record>, <record>, ... ],
'cancerGene' = [ <record>, <record>, ... ],
}
"""
# all cancer gene statements
CANCER_GENES = conn.get_related_terms(
terms=CANCER_GENE,
subgraphType='children',
)
statements = cast(
List[Statement],
conn.query(
{
'target': 'Statement',
'filters': {
'relevance': {'target': 'Vocabulary', 'filters': {'name': CANCER_GENES}}
},
'returnProperties': [
'source.name',
'relevance.name',
*[f'subject.{prop}' for prop in GENE_RETURN_PROPERTIES],
],
},
ignore_cache=ignore_cache,
),
)

# post-query filtering (faster)
cancerGeneStms = list(
filter(
lambda r: (
r['subject']['@class'] == 'Feature'
and r['subject']['biotype'] == 'gene'
and r['source']['name'] in [ONCOKB_SOURCE_NAME, TSO500_SOURCE_NAME]
),
statements,
)
)
oncogenicStms = list(
filter(
lambda r: (
r['relevance']['name'] == ONCOGENE and r['source']['name'] == ONCOKB_SOURCE_NAME
),
cancerGeneStms,
)
)
tumourSuppressiveStms = list(
filter(
lambda r: (
r['relevance']['name'] == TUMOUR_SUPPRESSIVE
and r['source']['name'] == ONCOKB_SOURCE_NAME
),
cancerGeneStms,
)
)

# Returning a sorted list of unique gene records, based on iProbe requirements
# Unique by name, sorted by displayName
names = set() # for unique gene names tracking
if not flags:
return cast(
List[Record],
sorted(
[
r['subject']
for r in cancerGeneStms
if r['subject']['name'] not in names and not names.add(r['subject']['name'])
],
key=lambda gene: gene['displayName'],
),
)

# Returning a Dict of flags, with list of associated gene records
# Duplicates are ok
return {
'cancerGene': [r['subject'] for r in cancerGeneStms],
'oncogenic': [r['subject'] for r in oncogenicStms],
'tumourSuppressive': [r['subject'] for r in tumourSuppressiveStms],
}


@deprecated('fuctionality replaced by get_cancer_gene_flags')
def _get_tumourigenesis_genes_list(
conn: GraphKBConnection, relevance: str, sources: List[str], ignore_cache: bool = False
conn: GraphKBConnection,
relevance: Union[str, list[str]],
sources: Union[str, list[str]],
ignore_cache: bool = False,
) -> List[Ontology]:
statements = cast(
List[Statement],
Expand Down Expand Up @@ -57,6 +165,7 @@ def _get_tumourigenesis_genes_list(
return [gene for gene in genes.values()]


@deprecated('fuctionality replaced by get_cancer_gene_flags')
def get_oncokb_oncogenes(conn: GraphKBConnection) -> List[Ontology]:
"""Get the list of oncogenes stored in GraphKB derived from OncoKB.

Expand All @@ -66,9 +175,10 @@ def get_oncokb_oncogenes(conn: GraphKBConnection) -> List[Ontology]:
Returns:
gene (Feature) records
"""
return _get_tumourigenesis_genes_list(conn, ONCOGENE, [ONCOKB_SOURCE_NAME])
return _get_tumourigenesis_genes_list(conn, ONCOGENE, ONCOKB_SOURCE_NAME)


@deprecated('fuctionality replaced by get_cancer_gene_flags')
def get_oncokb_tumour_supressors(conn: GraphKBConnection) -> List[Ontology]:
"""Get the list of tumour supressor genes stored in GraphKB derived from OncoKB.

Expand All @@ -78,20 +188,27 @@ def get_oncokb_tumour_supressors(conn: GraphKBConnection) -> List[Ontology]:
Returns:
gene (Feature) records
"""
return _get_tumourigenesis_genes_list(conn, TUMOUR_SUPPRESSIVE, [ONCOKB_SOURCE_NAME])
return _get_tumourigenesis_genes_list(conn, TUMOUR_SUPPRESSIVE, ONCOKB_SOURCE_NAME)


@deprecated('fuctionality replaced by get_cancer_gene_flags')
def get_cancer_genes(conn: GraphKBConnection) -> List[Ontology]:
"""Get the list of cancer genes stored in GraphKB derived from OncoKB & TSO500.
"""
Get the list of cancer genes stored in GraphKB derived from OncoKB & TSO500.
Cancer genes include oncogenes, tumour supressor genes and other cancer genes.

Args:
conn: the graphkb connection object

Returns:
gene (Feature) records
"""
cancer_gene_terms = conn.get_related_terms(
terms=CANCER_GENE,
subgraphType='children',
)
return _get_tumourigenesis_genes_list(
conn, CANCER_GENE, [ONCOKB_SOURCE_NAME, TSO500_SOURCE_NAME]
conn, cancer_gene_terms, [ONCOKB_SOURCE_NAME, TSO500_SOURCE_NAME]
)


Expand Down Expand Up @@ -513,12 +630,12 @@ def get_gene_information(
# PositionalVariant without a reference2 implies a smallMutation type
gene_flags['knownSmallMutation'].add(condition['reference1']) # type: ignore

logger.info('fetching oncogenes list')
gene_flags['oncogene'] = convert_to_rid_set(get_oncokb_oncogenes(graphkb_conn))
logger.info('fetching tumour supressors list')
gene_flags['tumourSuppressor'] = convert_to_rid_set(get_oncokb_tumour_supressors(graphkb_conn))
logger.info('fetching cancerGeneListMatch list')
gene_flags['cancerGeneListMatch'] = convert_to_rid_set(get_cancer_genes(graphkb_conn))
# cancer gene flags
logger.info('fetching cancer genes')
cancer_gene_flags = get_cancer_gene_flags(graphkb_conn, flags=True)
gene_flags['oncogene'] = convert_to_rid_set(cancer_gene_flags['oncogenic'])
gene_flags['tumourSuppressor'] = convert_to_rid_set(cancer_gene_flags['tumourSuppressive'])
gene_flags['cancerGeneListMatch'] = convert_to_rid_set(cancer_gene_flags['cancerGene'])

logger.info('fetching therapeutic associated genes lists')
gene_flags['therapeuticAssociated'] = convert_to_rid_set(
Expand All @@ -527,8 +644,14 @@ def get_gene_information(

logger.info(f'Setting gene_info flags on {len(gene_names)} genes')
result: List[IprGene] = []
EQUIVALENT_CACHE = {}
for gene_name in gene_names:
equivalent = convert_to_rid_set(get_equivalent_features(graphkb_conn, gene_name))
if gene_name not in EQUIVALENT_CACHE:
EQUIVALENT_CACHE[gene_name] = convert_to_rid_set(
get_equivalent_features(graphkb_conn, gene_name)
)
equivalent = EQUIVALENT_CACHE[gene_name]

row: Dict[str, str | bool] = {'name': gene_name}
flagged = False
for flag in gene_flags:
Expand Down
58 changes: 58 additions & 0 deletions pori_python/graphkb/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,64 @@ def version(self) -> Dict[str, str]:
"""
return self.request('version')

def get_related_records(
self,
base: Union[str, list[str]],
ontology: str,
subgraphType: str,
returnProperties=[],
):
"""
Given some base node RIDs, an ontology class and a subgraph type,
leverage the subgraphs route to return the list of related nodes.

Args:
base: the base node RIDs to start the graph traversal from
ontology: the ontology class to traverse
subgraphType: the type of traversal. See options in API specs
returnProperties: additional record properties to return

Returns:
list of related node record(s) traversed
"""
related = self.post(
uri=f'/subgraphs/{ontology}',
data={
'base': base if isinstance(base, list) else [base],
'subgraphType': subgraphType,
'returnProperties': returnProperties,
},
)
return related['result']['g']['nodes']

def get_related_terms(
self,
terms: Union[str, list[str]],
ontology: str = 'Vocabulary',
subgraphType: str = 'similar',
) -> list[str]:
"""
Given some base term name(s), an ontology class and a subgraph type,
leverage the subgraphs route to return the list of related term name(s)

Args:
terms: the base term name(s) to start the graph traversal from
ontology: the ontology class to traverse
subgraphType: the type of traversal

Returns:
list of related term name(s)
"""
rids = convert_to_rid_list(self.query({'target': ontology, 'filters': {'name': terms}}))
nodes = self.get_related_records(
base=rids,
ontology=ontology,
subgraphType=subgraphType,
)
return list(
map(lambda x: x['name'], nodes.values()),
)


def get_rid(conn: GraphKBConnection, target: str, name: str) -> str:
"""
Expand Down
4 changes: 2 additions & 2 deletions pori_python/graphkb/vocab.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from typing import Callable, Dict, Iterable, List, Set, cast
from typing import Callable, Dict, Iterable, List, Set, cast, Union

from pori_python.types import Ontology

from . import GraphKBConnection
from .util import convert_to_rid_list


def query_by_name(ontology_class: str, base_term_name: str) -> Dict:
def query_by_name(ontology_class: str, base_term_name: Union[str, list[str]]) -> Dict:
return {'target': ontology_class, 'filters': {'name': base_term_name}}


Expand Down
Loading