From 2207bf0ff43291bea09b10c9139d7168bb26d8d9 Mon Sep 17 00:00:00 2001 From: Eleanor Lewis Date: Wed, 4 Feb 2026 16:55:42 -0800 Subject: [PATCH 1/3] use gkb disease matches for variant-text --- pori_python/ipr/ipr.py | 21 +++++++++------------ pori_python/ipr/main.py | 7 ++++++- pori_python/ipr/summary.py | 14 +++++++++++++- tests/test_ipr/test_ipr.py | 4 ++-- tests/test_ipr/test_summary.py | 28 ++++++++++++++++++++++++++++ 5 files changed, 58 insertions(+), 16 deletions(-) diff --git a/pori_python/ipr/ipr.py b/pori_python/ipr/ipr.py index d606de2..204413a 100644 --- a/pori_python/ipr/ipr.py +++ b/pori_python/ipr/ipr.py @@ -667,7 +667,7 @@ def get_kb_disease_matches( kb_disease_match: Optional[str] = None, verbose: bool = True, useSubgraphsRoute: bool = True, -) -> list[str]: +) -> tuple[list[str], list[str]]: disease_matches = [] @@ -703,7 +703,7 @@ def get_kb_disease_matches( "base": base_records, }, ) - disease_matches = list(response["result"]["g"]["nodes"].keys()) + disease_matches = list(response["result"]["g"]["nodes"].values()) except Exception: if verbose: @@ -715,15 +715,10 @@ def get_kb_disease_matches( if not useSubgraphsRoute: if verbose: logger.info(f"Matching disease ({kb_disease_match}) to graphkb using get_term_tree()") - disease_matches = list( - { - r["@rid"] - for r in gkb_vocab.get_term_tree( - graphkb_conn, - kb_disease_match, - ontology_class="Disease", - ) - } + disease_matches = gkb_vocab.get_term_tree( + graphkb_conn, + kb_disease_match, + ontology_class="Disease", ) if not disease_matches: @@ -732,4 +727,6 @@ def get_kb_disease_matches( logger.error(msg) raise ValueError(msg) - return disease_matches + disease_match_rids = [item['@rid'] for item in disease_matches] + disease_match_names = [item['name'] for item in disease_matches] + return (disease_match_rids, disease_match_names) diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py index 3a44a6d..5215793 100644 --- a/pori_python/ipr/main.py +++ b/pori_python/ipr/main.py @@ -433,7 +433,11 @@ def ipr_report( # Matching disease RIDs from GraphKB using term tree # (Will raise uncatched error if no match) - disease_matches: list[str] = get_kb_disease_matches(graphkb_conn, kb_disease_match) + disease_match_records: [list[str], list[str]] = get_kb_disease_matches( + graphkb_conn, kb_disease_match + ) + disease_matches: list[str] = disease_match_records[0] + disease_match_names: list[str] = disease_match_records[1] # GKB MATCHING (AKA ANNOTATION) gkb_matches: List[Hashabledict] = annotate_variants( @@ -504,6 +508,7 @@ def ipr_report( ipr_conn, gkb_matches, disease_name=kb_disease_match, + disease_match_names=disease_match_names, project_name=content["project"], report_type=content["template"], include_nonspecific_disease=include_nonspecific_disease, diff --git a/pori_python/ipr/summary.py b/pori_python/ipr/summary.py index c6d63b2..f698767 100644 --- a/pori_python/ipr/summary.py +++ b/pori_python/ipr/summary.py @@ -385,6 +385,7 @@ def get_ipr_analyst_comments( ipr_conn: IprConnection, matches: Sequence[KbMatch] | Sequence[Hashabledict], disease_name: str, + disease_match_names: [str], project_name: str, report_type: str, include_nonspecific_disease: bool = False, @@ -403,6 +404,7 @@ def get_ipr_analyst_comments( ipr_conn: connection to the ipr db matches: list of kbmatches which will be included in the report disease_name: str, eg 'colorectal cancer' + disease_match_names: list[str] of names considered to be equivalent to the disease name project_name: str, eg TEST or pog report_type: str, eg genomic or rapid include_nonspecific_disease: bool - true if variant texts that don't explicitly @@ -420,6 +422,8 @@ def get_ipr_analyst_comments( # get the list of variants to check for custom text for match_set = list(set([item["kbVariant"] for item in matches])) + disease_match_set = set([disease_name.lower()] + [item.lower() for item in disease_match_names]) + for variant in match_set: data = { "variantName": variant, @@ -451,7 +455,15 @@ def get_ipr_analyst_comments( else: itemlist = [] - disease_matches = [item for item in itemlist if disease_name in item["cancerType"]] + disease_matches = [ + item + for item in itemlist + if len( + set([ct.lower() for ct in item["cancerType"]]).intersection(disease_match_set) + ) + > 0 + ] + if disease_matches: itemlist = disease_matches elif include_nonspecific_disease: diff --git a/tests/test_ipr/test_ipr.py b/tests/test_ipr/test_ipr.py index 9994cf4..4e74291 100644 --- a/tests/test_ipr/test_ipr.py +++ b/tests/test_ipr/test_ipr.py @@ -217,7 +217,7 @@ def graphkb_conn(): def query_side_effect(*args, **kwargs): if args: # for TestGetKbDiseaseMatches - return [{"@rid": "#123:45"}] + return [{"@rid": "#123:45", "name": "name_for_#123:45"}] query_index["value"] += 1 idx = query_index["value"] return query_return_values[idx] if idx < len(query_return_values) else [] @@ -245,7 +245,7 @@ def mock_get_source(source): @pytest.fixture(autouse=True) def mock_get_term_tree(monkeypatch): - mock_func = Mock(return_value=[{"@rid": d} for d in DISEASE_RIDS]) + mock_func = Mock(return_value=[{"@rid": d, "name": 'name_of_' + d} for d in DISEASE_RIDS]) monkeypatch.setattr(gkb_vocab, "get_term_tree", mock_func) yield mock_func mock_func.reset_mock() diff --git a/tests/test_ipr/test_summary.py b/tests/test_ipr/test_summary.py index 083683e..a17edca 100644 --- a/tests/test_ipr/test_summary.py +++ b/tests/test_ipr/test_summary.py @@ -210,6 +210,7 @@ def test_gets_fully_matched_output_when_possible(self): ipr_conn, matches=matches, disease_name="test1", + disease_match_names=[], project_name="test2", report_type="test3", include_nonspecific_project=False, @@ -228,6 +229,7 @@ def test_omits_nonspecific_project_matches_when_specified(self): ipr_conn, matches=matches, disease_name="test1", + disease_match_names=[], project_name="notfound", report_type="test3", include_nonspecific_project=False, @@ -243,6 +245,7 @@ def test_omits_nonspecific_template_matches_when_specified(self): ipr_conn, matches=matches, disease_name="test1", + disease_match_names=[], project_name="test2", report_type="notfound", include_nonspecific_project=True, @@ -258,6 +261,7 @@ def test_omits_nonspecific_disease_matches_when_specified(self): ipr_conn, matches=matches, disease_name="notfound", + disease_match_names=[], project_name="test2", report_type="test3", include_nonspecific_project=True, @@ -273,6 +277,7 @@ def test_includes_nonspecific_project_matches_when_specified(self): ipr_conn, matches=matches, disease_name="test1", + disease_match_names=[], project_name="notfound", report_type="test3", include_nonspecific_project=True, @@ -290,6 +295,7 @@ def test_includes_nonspecific_template_matches_when_specified(self): ipr_conn, matches=matches, disease_name="test1", + disease_match_names=[], project_name="test2", report_type="notfound", include_nonspecific_project=False, @@ -307,6 +313,7 @@ def test_includes_nonspecific_disease_matches_when_specified(self): ipr_conn, matches=matches, disease_name="notfound", + disease_match_names=[], project_name="test2", report_type="test3", include_nonspecific_project=False, @@ -318,6 +325,25 @@ def test_includes_nonspecific_disease_matches_when_specified(self): assert summary_lines[2] == "

no cancerType

" assert len(summary_lines) == 3 + def test_includes_all_graphkb_disease_matches(self): + ipr_conn = MagicMock(get=MagicMock(side_effect=copy(mock_ipr_results))) + matches = [{"kbVariant": "ERBB2 amplification"}] + ipr_summary = get_ipr_analyst_comments( + ipr_conn, + matches=matches, + disease_name="notfound", + disease_match_names=["TEST1"], + project_name="test2", + report_type="test3", + include_nonspecific_project=False, + include_nonspecific_disease=False, + include_nonspecific_template=False, + ) + summary_lines = ipr_summary.split("\n") + assert summary_lines[1] == "

ERBB2 amplification (test1,test)

" + assert summary_lines[2] == "

normal

" + assert len(summary_lines) == 3 + def test_prepare_section_for_multiple_variants(self): ipr_conn = MagicMock(get=MagicMock(side_effect=copy(mock_ipr_results))) # NB this test relies on matches being processed in this order @@ -326,6 +352,7 @@ def test_prepare_section_for_multiple_variants(self): ipr_conn, matches=matches, disease_name="test1", + disease_match_names=[], project_name="test2", report_type="test3", include_nonspecific_project=False, @@ -346,6 +373,7 @@ def test_empty_section_when_no_variant_match(self): ipr_conn, matches=matches, disease_name="test1", + disease_match_names=[], project_name="test2", report_type="test3", include_nonspecific_project=False, From a3715db2fbefff8daf4d68d5ff091785eaaf1cee Mon Sep 17 00:00:00 2001 From: Eleanor Lewis Date: Wed, 4 Feb 2026 16:59:55 -0800 Subject: [PATCH 2/3] use consistent disease match component name --- pori_python/ipr/main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py index 5215793..4786dd3 100644 --- a/pori_python/ipr/main.py +++ b/pori_python/ipr/main.py @@ -436,14 +436,14 @@ def ipr_report( disease_match_records: [list[str], list[str]] = get_kb_disease_matches( graphkb_conn, kb_disease_match ) - disease_matches: list[str] = disease_match_records[0] + disease_match_rids: list[str] = disease_match_records[0] disease_match_names: list[str] = disease_match_records[1] # GKB MATCHING (AKA ANNOTATION) gkb_matches: List[Hashabledict] = annotate_variants( graphkb_conn=graphkb_conn, interactive=interactive, - disease_matches=disease_matches, + disease_matches=disease_match_rids, # Variants, per type: signature_variants=signature_variants, small_mutations=small_mutations, @@ -496,7 +496,7 @@ def ipr_report( graphkb_comments = auto_analyst_comments( graphkb_conn, gkb_matches, - disease_matches=set(disease_matches), + disease_matches=set(disease_match_rids), variants=all_variants, ) comments_list.append(graphkb_comments) From d11b414d58989ece8779a50e78601a438a3a21d1 Mon Sep 17 00:00:00 2001 From: Eleanor Lewis Date: Fri, 6 Feb 2026 08:40:52 -0800 Subject: [PATCH 3/3] typefix, extract fields in main --- pori_python/ipr/ipr.py | 6 ++---- pori_python/ipr/main.py | 8 +++----- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/pori_python/ipr/ipr.py b/pori_python/ipr/ipr.py index 204413a..303416b 100644 --- a/pori_python/ipr/ipr.py +++ b/pori_python/ipr/ipr.py @@ -667,7 +667,7 @@ def get_kb_disease_matches( kb_disease_match: Optional[str] = None, verbose: bool = True, useSubgraphsRoute: bool = True, -) -> tuple[list[str], list[str]]: +) -> list[Dict]: disease_matches = [] @@ -727,6 +727,4 @@ def get_kb_disease_matches( logger.error(msg) raise ValueError(msg) - disease_match_rids = [item['@rid'] for item in disease_matches] - disease_match_names = [item['name'] for item in disease_matches] - return (disease_match_rids, disease_match_names) + return disease_matches diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py index 4786dd3..35ff219 100644 --- a/pori_python/ipr/main.py +++ b/pori_python/ipr/main.py @@ -433,11 +433,9 @@ def ipr_report( # Matching disease RIDs from GraphKB using term tree # (Will raise uncatched error if no match) - disease_match_records: [list[str], list[str]] = get_kb_disease_matches( - graphkb_conn, kb_disease_match - ) - disease_match_rids: list[str] = disease_match_records[0] - disease_match_names: list[str] = disease_match_records[1] + disease_match_records: list[Dict] = get_kb_disease_matches(graphkb_conn, kb_disease_match) + disease_match_rids: list[str] = [item['@rid'] for item in disease_match_records] + disease_match_names: list[str] = [item['name'] for item in disease_match_records] # GKB MATCHING (AKA ANNOTATION) gkb_matches: List[Hashabledict] = annotate_variants(