From 2207bf0ff43291bea09b10c9139d7168bb26d8d9 Mon Sep 17 00:00:00 2001
From: Eleanor Lewis
Date: Wed, 4 Feb 2026 16:55:42 -0800
Subject: [PATCH 1/3] use gkb disease matches for variant-text
---
pori_python/ipr/ipr.py | 21 +++++++++------------
pori_python/ipr/main.py | 7 ++++++-
pori_python/ipr/summary.py | 14 +++++++++++++-
tests/test_ipr/test_ipr.py | 4 ++--
tests/test_ipr/test_summary.py | 28 ++++++++++++++++++++++++++++
5 files changed, 58 insertions(+), 16 deletions(-)
diff --git a/pori_python/ipr/ipr.py b/pori_python/ipr/ipr.py
index d606de2..204413a 100644
--- a/pori_python/ipr/ipr.py
+++ b/pori_python/ipr/ipr.py
@@ -667,7 +667,7 @@ def get_kb_disease_matches(
kb_disease_match: Optional[str] = None,
verbose: bool = True,
useSubgraphsRoute: bool = True,
-) -> list[str]:
+) -> tuple[list[str], list[str]]:
disease_matches = []
@@ -703,7 +703,7 @@ def get_kb_disease_matches(
"base": base_records,
},
)
- disease_matches = list(response["result"]["g"]["nodes"].keys())
+ disease_matches = list(response["result"]["g"]["nodes"].values())
except Exception:
if verbose:
@@ -715,15 +715,10 @@ def get_kb_disease_matches(
if not useSubgraphsRoute:
if verbose:
logger.info(f"Matching disease ({kb_disease_match}) to graphkb using get_term_tree()")
- disease_matches = list(
- {
- r["@rid"]
- for r in gkb_vocab.get_term_tree(
- graphkb_conn,
- kb_disease_match,
- ontology_class="Disease",
- )
- }
+ disease_matches = gkb_vocab.get_term_tree(
+ graphkb_conn,
+ kb_disease_match,
+ ontology_class="Disease",
)
if not disease_matches:
@@ -732,4 +727,6 @@ def get_kb_disease_matches(
logger.error(msg)
raise ValueError(msg)
- return disease_matches
+ disease_match_rids = [item['@rid'] for item in disease_matches]
+ disease_match_names = [item['name'] for item in disease_matches]
+ return (disease_match_rids, disease_match_names)
diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py
index 3a44a6d..5215793 100644
--- a/pori_python/ipr/main.py
+++ b/pori_python/ipr/main.py
@@ -433,7 +433,11 @@ def ipr_report(
# Matching disease RIDs from GraphKB using term tree
# (Will raise uncatched error if no match)
- disease_matches: list[str] = get_kb_disease_matches(graphkb_conn, kb_disease_match)
+ disease_match_records: [list[str], list[str]] = get_kb_disease_matches(
+ graphkb_conn, kb_disease_match
+ )
+ disease_matches: list[str] = disease_match_records[0]
+ disease_match_names: list[str] = disease_match_records[1]
# GKB MATCHING (AKA ANNOTATION)
gkb_matches: List[Hashabledict] = annotate_variants(
@@ -504,6 +508,7 @@ def ipr_report(
ipr_conn,
gkb_matches,
disease_name=kb_disease_match,
+ disease_match_names=disease_match_names,
project_name=content["project"],
report_type=content["template"],
include_nonspecific_disease=include_nonspecific_disease,
diff --git a/pori_python/ipr/summary.py b/pori_python/ipr/summary.py
index c6d63b2..f698767 100644
--- a/pori_python/ipr/summary.py
+++ b/pori_python/ipr/summary.py
@@ -385,6 +385,7 @@ def get_ipr_analyst_comments(
ipr_conn: IprConnection,
matches: Sequence[KbMatch] | Sequence[Hashabledict],
disease_name: str,
+ disease_match_names: [str],
project_name: str,
report_type: str,
include_nonspecific_disease: bool = False,
@@ -403,6 +404,7 @@ def get_ipr_analyst_comments(
ipr_conn: connection to the ipr db
matches: list of kbmatches which will be included in the report
disease_name: str, eg 'colorectal cancer'
+ disease_match_names: list[str] of names considered to be equivalent to the disease name
project_name: str, eg TEST or pog
report_type: str, eg genomic or rapid
include_nonspecific_disease: bool - true if variant texts that don't explicitly
@@ -420,6 +422,8 @@ def get_ipr_analyst_comments(
# get the list of variants to check for custom text for
match_set = list(set([item["kbVariant"] for item in matches]))
+ disease_match_set = set([disease_name.lower()] + [item.lower() for item in disease_match_names])
+
for variant in match_set:
data = {
"variantName": variant,
@@ -451,7 +455,15 @@ def get_ipr_analyst_comments(
else:
itemlist = []
- disease_matches = [item for item in itemlist if disease_name in item["cancerType"]]
+ disease_matches = [
+ item
+ for item in itemlist
+ if len(
+ set([ct.lower() for ct in item["cancerType"]]).intersection(disease_match_set)
+ )
+ > 0
+ ]
+
if disease_matches:
itemlist = disease_matches
elif include_nonspecific_disease:
diff --git a/tests/test_ipr/test_ipr.py b/tests/test_ipr/test_ipr.py
index 9994cf4..4e74291 100644
--- a/tests/test_ipr/test_ipr.py
+++ b/tests/test_ipr/test_ipr.py
@@ -217,7 +217,7 @@ def graphkb_conn():
def query_side_effect(*args, **kwargs):
if args:
# for TestGetKbDiseaseMatches
- return [{"@rid": "#123:45"}]
+ return [{"@rid": "#123:45", "name": "name_for_#123:45"}]
query_index["value"] += 1
idx = query_index["value"]
return query_return_values[idx] if idx < len(query_return_values) else []
@@ -245,7 +245,7 @@ def mock_get_source(source):
@pytest.fixture(autouse=True)
def mock_get_term_tree(monkeypatch):
- mock_func = Mock(return_value=[{"@rid": d} for d in DISEASE_RIDS])
+ mock_func = Mock(return_value=[{"@rid": d, "name": 'name_of_' + d} for d in DISEASE_RIDS])
monkeypatch.setattr(gkb_vocab, "get_term_tree", mock_func)
yield mock_func
mock_func.reset_mock()
diff --git a/tests/test_ipr/test_summary.py b/tests/test_ipr/test_summary.py
index 083683e..a17edca 100644
--- a/tests/test_ipr/test_summary.py
+++ b/tests/test_ipr/test_summary.py
@@ -210,6 +210,7 @@ def test_gets_fully_matched_output_when_possible(self):
ipr_conn,
matches=matches,
disease_name="test1",
+ disease_match_names=[],
project_name="test2",
report_type="test3",
include_nonspecific_project=False,
@@ -228,6 +229,7 @@ def test_omits_nonspecific_project_matches_when_specified(self):
ipr_conn,
matches=matches,
disease_name="test1",
+ disease_match_names=[],
project_name="notfound",
report_type="test3",
include_nonspecific_project=False,
@@ -243,6 +245,7 @@ def test_omits_nonspecific_template_matches_when_specified(self):
ipr_conn,
matches=matches,
disease_name="test1",
+ disease_match_names=[],
project_name="test2",
report_type="notfound",
include_nonspecific_project=True,
@@ -258,6 +261,7 @@ def test_omits_nonspecific_disease_matches_when_specified(self):
ipr_conn,
matches=matches,
disease_name="notfound",
+ disease_match_names=[],
project_name="test2",
report_type="test3",
include_nonspecific_project=True,
@@ -273,6 +277,7 @@ def test_includes_nonspecific_project_matches_when_specified(self):
ipr_conn,
matches=matches,
disease_name="test1",
+ disease_match_names=[],
project_name="notfound",
report_type="test3",
include_nonspecific_project=True,
@@ -290,6 +295,7 @@ def test_includes_nonspecific_template_matches_when_specified(self):
ipr_conn,
matches=matches,
disease_name="test1",
+ disease_match_names=[],
project_name="test2",
report_type="notfound",
include_nonspecific_project=False,
@@ -307,6 +313,7 @@ def test_includes_nonspecific_disease_matches_when_specified(self):
ipr_conn,
matches=matches,
disease_name="notfound",
+ disease_match_names=[],
project_name="test2",
report_type="test3",
include_nonspecific_project=False,
@@ -318,6 +325,25 @@ def test_includes_nonspecific_disease_matches_when_specified(self):
assert summary_lines[2] == "no cancerType
"
assert len(summary_lines) == 3
+ def test_includes_all_graphkb_disease_matches(self):
+ ipr_conn = MagicMock(get=MagicMock(side_effect=copy(mock_ipr_results)))
+ matches = [{"kbVariant": "ERBB2 amplification"}]
+ ipr_summary = get_ipr_analyst_comments(
+ ipr_conn,
+ matches=matches,
+ disease_name="notfound",
+ disease_match_names=["TEST1"],
+ project_name="test2",
+ report_type="test3",
+ include_nonspecific_project=False,
+ include_nonspecific_disease=False,
+ include_nonspecific_template=False,
+ )
+ summary_lines = ipr_summary.split("\n")
+ assert summary_lines[1] == "ERBB2 amplification (test1,test)
"
+ assert summary_lines[2] == "normal
"
+ assert len(summary_lines) == 3
+
def test_prepare_section_for_multiple_variants(self):
ipr_conn = MagicMock(get=MagicMock(side_effect=copy(mock_ipr_results)))
# NB this test relies on matches being processed in this order
@@ -326,6 +352,7 @@ def test_prepare_section_for_multiple_variants(self):
ipr_conn,
matches=matches,
disease_name="test1",
+ disease_match_names=[],
project_name="test2",
report_type="test3",
include_nonspecific_project=False,
@@ -346,6 +373,7 @@ def test_empty_section_when_no_variant_match(self):
ipr_conn,
matches=matches,
disease_name="test1",
+ disease_match_names=[],
project_name="test2",
report_type="test3",
include_nonspecific_project=False,
From a3715db2fbefff8daf4d68d5ff091785eaaf1cee Mon Sep 17 00:00:00 2001
From: Eleanor Lewis
Date: Wed, 4 Feb 2026 16:59:55 -0800
Subject: [PATCH 2/3] use consistent disease match component name
---
pori_python/ipr/main.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py
index 5215793..4786dd3 100644
--- a/pori_python/ipr/main.py
+++ b/pori_python/ipr/main.py
@@ -436,14 +436,14 @@ def ipr_report(
disease_match_records: [list[str], list[str]] = get_kb_disease_matches(
graphkb_conn, kb_disease_match
)
- disease_matches: list[str] = disease_match_records[0]
+ disease_match_rids: list[str] = disease_match_records[0]
disease_match_names: list[str] = disease_match_records[1]
# GKB MATCHING (AKA ANNOTATION)
gkb_matches: List[Hashabledict] = annotate_variants(
graphkb_conn=graphkb_conn,
interactive=interactive,
- disease_matches=disease_matches,
+ disease_matches=disease_match_rids,
# Variants, per type:
signature_variants=signature_variants,
small_mutations=small_mutations,
@@ -496,7 +496,7 @@ def ipr_report(
graphkb_comments = auto_analyst_comments(
graphkb_conn,
gkb_matches,
- disease_matches=set(disease_matches),
+ disease_matches=set(disease_match_rids),
variants=all_variants,
)
comments_list.append(graphkb_comments)
From d11b414d58989ece8779a50e78601a438a3a21d1 Mon Sep 17 00:00:00 2001
From: Eleanor Lewis
Date: Fri, 6 Feb 2026 08:40:52 -0800
Subject: [PATCH 3/3] typefix, extract fields in main
---
pori_python/ipr/ipr.py | 6 ++----
pori_python/ipr/main.py | 8 +++-----
2 files changed, 5 insertions(+), 9 deletions(-)
diff --git a/pori_python/ipr/ipr.py b/pori_python/ipr/ipr.py
index 204413a..303416b 100644
--- a/pori_python/ipr/ipr.py
+++ b/pori_python/ipr/ipr.py
@@ -667,7 +667,7 @@ def get_kb_disease_matches(
kb_disease_match: Optional[str] = None,
verbose: bool = True,
useSubgraphsRoute: bool = True,
-) -> tuple[list[str], list[str]]:
+) -> list[Dict]:
disease_matches = []
@@ -727,6 +727,4 @@ def get_kb_disease_matches(
logger.error(msg)
raise ValueError(msg)
- disease_match_rids = [item['@rid'] for item in disease_matches]
- disease_match_names = [item['name'] for item in disease_matches]
- return (disease_match_rids, disease_match_names)
+ return disease_matches
diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py
index 4786dd3..35ff219 100644
--- a/pori_python/ipr/main.py
+++ b/pori_python/ipr/main.py
@@ -433,11 +433,9 @@ def ipr_report(
# Matching disease RIDs from GraphKB using term tree
# (Will raise uncatched error if no match)
- disease_match_records: [list[str], list[str]] = get_kb_disease_matches(
- graphkb_conn, kb_disease_match
- )
- disease_match_rids: list[str] = disease_match_records[0]
- disease_match_names: list[str] = disease_match_records[1]
+ disease_match_records: list[Dict] = get_kb_disease_matches(graphkb_conn, kb_disease_match)
+ disease_match_rids: list[str] = [item['@rid'] for item in disease_match_records]
+ disease_match_names: list[str] = [item['name'] for item in disease_match_records]
# GKB MATCHING (AKA ANNOTATION)
gkb_matches: List[Hashabledict] = annotate_variants(