From 2207bf0ff43291bea09b10c9139d7168bb26d8d9 Mon Sep 17 00:00:00 2001
From: Eleanor Lewis <elewis@bcgsc.ca>
Date: Wed, 4 Feb 2026 16:55:42 -0800
Subject: [PATCH 1/3] use gkb disease matches for variant-text

---
 pori_python/ipr/ipr.py         | 21 +++++++++------------
 pori_python/ipr/main.py        |  7 ++++++-
 pori_python/ipr/summary.py     | 14 +++++++++++++-
 tests/test_ipr/test_ipr.py     |  4 ++--
 tests/test_ipr/test_summary.py | 28 ++++++++++++++++++++++++++++
 5 files changed, 58 insertions(+), 16 deletions(-)

diff --git a/pori_python/ipr/ipr.py b/pori_python/ipr/ipr.py
index d606de2..204413a 100644
--- a/pori_python/ipr/ipr.py
+++ b/pori_python/ipr/ipr.py
@@ -667,7 +667,7 @@ def get_kb_disease_matches(
     kb_disease_match: Optional[str] = None,
     verbose: bool = True,
     useSubgraphsRoute: bool = True,
-) -> list[str]:
+) -> tuple[list[str], list[str]]:
 
     disease_matches = []
 
@@ -703,7 +703,7 @@ def get_kb_disease_matches(
                         "base": base_records,
                     },
                 )
-                disease_matches = list(response["result"]["g"]["nodes"].keys())
+                disease_matches = list(response["result"]["g"]["nodes"].values())
 
         except Exception:
             if verbose:
@@ -715,15 +715,10 @@ def get_kb_disease_matches(
     if not useSubgraphsRoute:
         if verbose:
             logger.info(f"Matching disease ({kb_disease_match}) to graphkb using get_term_tree()")
-        disease_matches = list(
-            {
-                r["@rid"]
-                for r in gkb_vocab.get_term_tree(
-                    graphkb_conn,
-                    kb_disease_match,
-                    ontology_class="Disease",
-                )
-            }
+        disease_matches = gkb_vocab.get_term_tree(
+            graphkb_conn,
+            kb_disease_match,
+            ontology_class="Disease",
         )
 
     if not disease_matches:
@@ -732,4 +727,6 @@ def get_kb_disease_matches(
             logger.error(msg)
         raise ValueError(msg)
 
-    return disease_matches
+    disease_match_rids = [item['@rid'] for item in disease_matches]
+    disease_match_names = [item['name'] for item in disease_matches]
+    return (disease_match_rids, disease_match_names)
diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py
index 3a44a6d..5215793 100644
--- a/pori_python/ipr/main.py
+++ b/pori_python/ipr/main.py
@@ -433,7 +433,11 @@ def ipr_report(
 
     # Matching disease RIDs from GraphKB using term tree
     # (Will raise uncatched error if no match)
-    disease_matches: list[str] = get_kb_disease_matches(graphkb_conn, kb_disease_match)
+    disease_match_records: [list[str], list[str]] = get_kb_disease_matches(
+        graphkb_conn, kb_disease_match
+    )
+    disease_matches: list[str] = disease_match_records[0]
+    disease_match_names: list[str] = disease_match_records[1]
 
     # GKB MATCHING (AKA ANNOTATION)
     gkb_matches: List[Hashabledict] = annotate_variants(
@@ -504,6 +508,7 @@ def ipr_report(
             ipr_conn,
             gkb_matches,
             disease_name=kb_disease_match,
+            disease_match_names=disease_match_names,
             project_name=content["project"],
             report_type=content["template"],
             include_nonspecific_disease=include_nonspecific_disease,
diff --git a/pori_python/ipr/summary.py b/pori_python/ipr/summary.py
index c6d63b2..f698767 100644
--- a/pori_python/ipr/summary.py
+++ b/pori_python/ipr/summary.py
@@ -385,6 +385,7 @@ def get_ipr_analyst_comments(
     ipr_conn: IprConnection,
     matches: Sequence[KbMatch] | Sequence[Hashabledict],
     disease_name: str,
+    disease_match_names: [str],
     project_name: str,
     report_type: str,
     include_nonspecific_disease: bool = False,
@@ -403,6 +404,7 @@ def get_ipr_analyst_comments(
         ipr_conn: connection to the ipr db
         matches: list of kbmatches which will be included in the report
         disease_name: str, eg 'colorectal cancer'
+        disease_match_names: list[str] of names considered to be equivalent to the disease name
         project_name: str, eg TEST or pog
         report_type: str, eg genomic or rapid
         include_nonspecific_disease: bool - true if variant texts that don't explicitly
@@ -420,6 +422,8 @@ def get_ipr_analyst_comments(
     # get the list of variants to check for custom text for
     match_set = list(set([item["kbVariant"] for item in matches]))
 
+    disease_match_set = set([disease_name.lower()] + [item.lower() for item in disease_match_names])
+
     for variant in match_set:
         data = {
             "variantName": variant,
@@ -451,7 +455,15 @@ def get_ipr_analyst_comments(
             else:
                 itemlist = []
 
-            disease_matches = [item for item in itemlist if disease_name in item["cancerType"]]
+            disease_matches = [
+                item
+                for item in itemlist
+                if len(
+                    set([ct.lower() for ct in item["cancerType"]]).intersection(disease_match_set)
+                )
+                > 0
+            ]
+
             if disease_matches:
                 itemlist = disease_matches
             elif include_nonspecific_disease:
diff --git a/tests/test_ipr/test_ipr.py b/tests/test_ipr/test_ipr.py
index 9994cf4..4e74291 100644
--- a/tests/test_ipr/test_ipr.py
+++ b/tests/test_ipr/test_ipr.py
@@ -217,7 +217,7 @@ def graphkb_conn():
     def query_side_effect(*args, **kwargs):
         if args:
             # for TestGetKbDiseaseMatches
-            return [{"@rid": "#123:45"}]
+            return [{"@rid": "#123:45", "name": "name_for_#123:45"}]
         query_index["value"] += 1
         idx = query_index["value"]
         return query_return_values[idx] if idx < len(query_return_values) else []
@@ -245,7 +245,7 @@ def mock_get_source(source):
 
 @pytest.fixture(autouse=True)
 def mock_get_term_tree(monkeypatch):
-    mock_func = Mock(return_value=[{"@rid": d} for d in DISEASE_RIDS])
+    mock_func = Mock(return_value=[{"@rid": d, "name": 'name_of_' + d} for d in DISEASE_RIDS])
     monkeypatch.setattr(gkb_vocab, "get_term_tree", mock_func)
     yield mock_func
     mock_func.reset_mock()
diff --git a/tests/test_ipr/test_summary.py b/tests/test_ipr/test_summary.py
index 083683e..a17edca 100644
--- a/tests/test_ipr/test_summary.py
+++ b/tests/test_ipr/test_summary.py
@@ -210,6 +210,7 @@ def test_gets_fully_matched_output_when_possible(self):
             ipr_conn,
             matches=matches,
             disease_name="test1",
+            disease_match_names=[],
             project_name="test2",
             report_type="test3",
             include_nonspecific_project=False,
@@ -228,6 +229,7 @@ def test_omits_nonspecific_project_matches_when_specified(self):
             ipr_conn,
             matches=matches,
             disease_name="test1",
+            disease_match_names=[],
             project_name="notfound",
             report_type="test3",
             include_nonspecific_project=False,
@@ -243,6 +245,7 @@ def test_omits_nonspecific_template_matches_when_specified(self):
             ipr_conn,
             matches=matches,
             disease_name="test1",
+            disease_match_names=[],
             project_name="test2",
             report_type="notfound",
             include_nonspecific_project=True,
@@ -258,6 +261,7 @@ def test_omits_nonspecific_disease_matches_when_specified(self):
             ipr_conn,
             matches=matches,
             disease_name="notfound",
+            disease_match_names=[],
             project_name="test2",
             report_type="test3",
             include_nonspecific_project=True,
@@ -273,6 +277,7 @@ def test_includes_nonspecific_project_matches_when_specified(self):
             ipr_conn,
             matches=matches,
             disease_name="test1",
+            disease_match_names=[],
             project_name="notfound",
             report_type="test3",
             include_nonspecific_project=True,
@@ -290,6 +295,7 @@ def test_includes_nonspecific_template_matches_when_specified(self):
             ipr_conn,
             matches=matches,
             disease_name="test1",
+            disease_match_names=[],
             project_name="test2",
             report_type="notfound",
             include_nonspecific_project=False,
@@ -307,6 +313,7 @@ def test_includes_nonspecific_disease_matches_when_specified(self):
             ipr_conn,
             matches=matches,
             disease_name="notfound",
+            disease_match_names=[],
             project_name="test2",
             report_type="test3",
             include_nonspecific_project=False,
@@ -318,6 +325,25 @@ def test_includes_nonspecific_disease_matches_when_specified(self):
         assert summary_lines[2] == "<p><p>no cancerType</p></p>"
         assert len(summary_lines) == 3
 
+    def test_includes_all_graphkb_disease_matches(self):
+        ipr_conn = MagicMock(get=MagicMock(side_effect=copy(mock_ipr_results)))
+        matches = [{"kbVariant": "ERBB2 amplification"}]
+        ipr_summary = get_ipr_analyst_comments(
+            ipr_conn,
+            matches=matches,
+            disease_name="notfound",
+            disease_match_names=["TEST1"],
+            project_name="test2",
+            report_type="test3",
+            include_nonspecific_project=False,
+            include_nonspecific_disease=False,
+            include_nonspecific_template=False,
+        )
+        summary_lines = ipr_summary.split("\n")
+        assert summary_lines[1] == "<h2>ERBB2 amplification (test1,test)</h2>"
+        assert summary_lines[2] == "<p><p>normal</p></p>"
+        assert len(summary_lines) == 3
+
     def test_prepare_section_for_multiple_variants(self):
         ipr_conn = MagicMock(get=MagicMock(side_effect=copy(mock_ipr_results)))
         # NB this test relies on matches being processed in this order
@@ -326,6 +352,7 @@ def test_prepare_section_for_multiple_variants(self):
             ipr_conn,
             matches=matches,
             disease_name="test1",
+            disease_match_names=[],
             project_name="test2",
             report_type="test3",
             include_nonspecific_project=False,
@@ -346,6 +373,7 @@ def test_empty_section_when_no_variant_match(self):
             ipr_conn,
             matches=matches,
             disease_name="test1",
+            disease_match_names=[],
             project_name="test2",
             report_type="test3",
             include_nonspecific_project=False,

From a3715db2fbefff8daf4d68d5ff091785eaaf1cee Mon Sep 17 00:00:00 2001
From: Eleanor Lewis <elewis@bcgsc.ca>
Date: Wed, 4 Feb 2026 16:59:55 -0800
Subject: [PATCH 2/3] use consistent disease match component name

---
 pori_python/ipr/main.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py
index 5215793..4786dd3 100644
--- a/pori_python/ipr/main.py
+++ b/pori_python/ipr/main.py
@@ -436,14 +436,14 @@ def ipr_report(
     disease_match_records: [list[str], list[str]] = get_kb_disease_matches(
         graphkb_conn, kb_disease_match
     )
-    disease_matches: list[str] = disease_match_records[0]
+    disease_match_rids: list[str] = disease_match_records[0]
     disease_match_names: list[str] = disease_match_records[1]
 
     # GKB MATCHING (AKA ANNOTATION)
     gkb_matches: List[Hashabledict] = annotate_variants(
         graphkb_conn=graphkb_conn,
         interactive=interactive,
-        disease_matches=disease_matches,
+        disease_matches=disease_match_rids,
         # Variants, per type:
         signature_variants=signature_variants,
         small_mutations=small_mutations,
@@ -496,7 +496,7 @@ def ipr_report(
         graphkb_comments = auto_analyst_comments(
             graphkb_conn,
             gkb_matches,
-            disease_matches=set(disease_matches),
+            disease_matches=set(disease_match_rids),
             variants=all_variants,
         )
         comments_list.append(graphkb_comments)

From d11b414d58989ece8779a50e78601a438a3a21d1 Mon Sep 17 00:00:00 2001
From: Eleanor Lewis <elewis@bcgsc.ca>
Date: Fri, 6 Feb 2026 08:40:52 -0800
Subject: [PATCH 3/3] typefix, extract fields in main

---
 pori_python/ipr/ipr.py  | 6 ++----
 pori_python/ipr/main.py | 8 +++-----
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/pori_python/ipr/ipr.py b/pori_python/ipr/ipr.py
index 204413a..303416b 100644
--- a/pori_python/ipr/ipr.py
+++ b/pori_python/ipr/ipr.py
@@ -667,7 +667,7 @@ def get_kb_disease_matches(
     kb_disease_match: Optional[str] = None,
     verbose: bool = True,
     useSubgraphsRoute: bool = True,
-) -> tuple[list[str], list[str]]:
+) -> list[Dict]:
 
     disease_matches = []
 
@@ -727,6 +727,4 @@ def get_kb_disease_matches(
             logger.error(msg)
         raise ValueError(msg)
 
-    disease_match_rids = [item['@rid'] for item in disease_matches]
-    disease_match_names = [item['name'] for item in disease_matches]
-    return (disease_match_rids, disease_match_names)
+    return disease_matches
diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py
index 4786dd3..35ff219 100644
--- a/pori_python/ipr/main.py
+++ b/pori_python/ipr/main.py
@@ -433,11 +433,9 @@ def ipr_report(
 
     # Matching disease RIDs from GraphKB using term tree
     # (Will raise uncatched error if no match)
-    disease_match_records: [list[str], list[str]] = get_kb_disease_matches(
-        graphkb_conn, kb_disease_match
-    )
-    disease_match_rids: list[str] = disease_match_records[0]
-    disease_match_names: list[str] = disease_match_records[1]
+    disease_match_records: list[Dict] = get_kb_disease_matches(graphkb_conn, kb_disease_match)
+    disease_match_rids: list[str] = [item['@rid'] for item in disease_match_records]
+    disease_match_names: list[str] = [item['name'] for item in disease_match_records]
 
     # GKB MATCHING (AKA ANNOTATION)
     gkb_matches: List[Hashabledict] = annotate_variants(