From f55e36c0d945d3718fbf31721de078a10de2268e Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Mon, 9 Feb 2026 12:17:41 -0800 Subject: [PATCH 01/11] Ignore more venv names --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index f634e82..f001284 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ htmlcov # common virtual environment names venv* env +.venv # editors .idea From 94eba4b6c9e208cc5970e83ad23c7ca1c2dcb2b4 Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Mon, 9 Feb 2026 12:22:52 -0800 Subject: [PATCH 02/11] Replace custom cachine with requests cache library --- pori_python/graphkb/match.py | 16 ------ pori_python/graphkb/util.py | 81 +++++++++++++++++++--------- tests/test_graphkb/test_statement.py | 2 +- 3 files changed, 56 insertions(+), 43 deletions(-) diff --git a/pori_python/graphkb/match.py b/pori_python/graphkb/match.py index 0c79138..d24bb06 100644 --- a/pori_python/graphkb/match.py +++ b/pori_python/graphkb/match.py @@ -111,22 +111,6 @@ def get_equivalent_features( ) -def cache_missing_features(conn: GraphKBConnection) -> None: - """ - Create a cache of features that exist to avoid repeatedly querying - for missing features - """ - genes = cast( - List[Ontology], - conn.query({"target": "Feature", "returnProperties": ["name", "sourceId"], "neighbors": 0}), - ) - for gene in genes: - if gene["name"]: - FEATURES_CACHE.add(gene["name"].lower()) - if gene["sourceId"]: - FEATURES_CACHE.add(gene["sourceId"].lower()) - - def match_category_variant( conn: GraphKBConnection, reference_name: str, diff --git a/pori_python/graphkb/util.py b/pori_python/graphkb/util.py index 85bfa51..84ab0a6 100644 --- a/pori_python/graphkb/util.py +++ b/pori_python/graphkb/util.py @@ -8,6 +8,7 @@ import re import time from datetime import datetime +from requests_cache import CacheMixin from typing import Any, Dict, Iterable, List, Optional, Union, cast from urllib3.util.retry import Retry from urllib.parse import urlsplit @@ -88,11 +89,8 @@ def millis_interval(start: datetime, end: datetime) -> int: return millis -def cache_key(request_body) -> str: - """Create a cache key for a query request to GraphKB.""" - body = json.dumps(request_body, sort_keys=True) - hash_code = hashlib.md5(f"/query{body}".encode("utf-8")).hexdigest() - return hash_code +class CustomSession(CacheMixin, requests.Session): + pass class GraphKBConnection: @@ -102,8 +100,32 @@ def __init__( username: str = "", password: str = "", use_global_cache: bool = True, + cache_name: str = "", ): - self.http = requests.Session() + """ + Docstring for __init__ + + Args: + - use_global_cache: cache requests across all requests to GKB + - cache_db: Path or connection URL to the database which stors the requests cache. see https://requests-cache.readthedocs.io/en/v0.6.4/user_guide.html#cache-name + """ + if use_global_cache: + if not cache_name: + self.http = CustomSession( + backend="memory", + cache_control=True, + allowable_methods=["GET", "POST"], + ignored_parameters=["Authorization"], + ) + else: + self.http = CustomSession( + cache_name, + cache_control=True, + allowable_methods=["GET", "POST"], + ignored_parameters=["Authorization"], + ) + else: + self.http = requests.Session() retries = Retry( total=100, connect=5, @@ -117,8 +139,10 @@ def __init__( self.url = url self.username = username self.password = password - self.headers = {"Accept": "application/json", "Content-Type": "application/json"} - self.cache: Dict[Any, Any] = {} if not use_global_cache else QUERY_CACHE + self.headers = { + "Accept": "application/json", + "Content-Type": "application/json", + } self.request_count = 0 self.first_request: Optional[datetime] = None self.last_request: Optional[datetime] = None @@ -137,7 +161,13 @@ def load(self) -> Optional[float]: return self.request_count * 1000 / msec return None - def request(self, endpoint: str, method: str = "GET", **kwargs) -> Dict: + def request( + self, + endpoint: str, + method: str = "GET", + headers: Optional[dict[str, str]] = None, + **kwargs, + ) -> Dict: """Request wrapper to handle adding common headers and logging. Args: @@ -158,6 +188,11 @@ def request(self, endpoint: str, method: str = "GET", **kwargs) -> Dict: if endpoint in ["query", "parse"]: timeout = (connect_timeout, read_timeout) + request_headers = {} + request_headers.update(self.headers) + if headers is not None: + request_headers.update(headers) + start_time = datetime.now() if not self.first_request: @@ -180,8 +215,8 @@ def request(self, endpoint: str, method: str = "GET", **kwargs) -> Dict: need_refresh_login = False self.request_count += 1 - resp = requests.request( - method, url, headers=self.headers, timeout=timeout, **kwargs + resp = self.http.request( + method, url, headers=request_headers, timeout=timeout, **kwargs ) if resp.status_code == 401 or resp.status_code == 403: logger.debug(f"/{endpoint} - {resp.status_code} - retrying") @@ -294,11 +329,6 @@ def login(self, username: str, password: str, pori_demo: bool = False) -> None: def refresh_login(self) -> None: self.login(self.username, self.password) - def set_cache_data(self, request_body: Dict, result: List[Record]) -> None: - """Explicitly add a query to the cache.""" - hash_code = cache_key(request_body) - self.cache[hash_code] = result - def query( self, request_body: Dict = {}, @@ -310,23 +340,22 @@ def query( """ Query GraphKB """ - result: List[Record] = [] - hash_code = "" - - if not ignore_cache and paginate: - hash_code = cache_key(request_body) - if hash_code in self.cache and not force_refresh: - return self.cache[hash_code] + headers = {} + if ignore_cache or force_refresh: + headers = {"Cache-Control": "no-cache"} + result: List[Record] = [] while True: - content = self.post("query", data={**request_body, "limit": limit, "skip": len(result)}) + content = self.post( + "query", + data={**request_body, "limit": limit, "skip": len(result)}, + headers=headers, + ) records = content["result"] result.extend(records) if len(records) < limit or not paginate: break - if not ignore_cache and paginate: - self.cache[hash_code] = result return result def parse(self, hgvs_string: str, requireFeatures: bool = False) -> ParsedVariant: diff --git a/tests/test_graphkb/test_statement.py b/tests/test_graphkb/test_statement.py index fcf0bef..49b15a2 100644 --- a/tests/test_graphkb/test_statement.py +++ b/tests/test_graphkb/test_statement.py @@ -41,7 +41,7 @@ def term_tree_calls(*final_values): query_mock = Mock() query_mock.side_effect = return_values - return Mock(query=query_mock, cache={}) + return Mock(query=query_mock) class TestCategorizeRelevance: From 21a224bdde59e022a8ee409c214e84c0e51b5b69 Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Mon, 9 Feb 2026 12:36:53 -0800 Subject: [PATCH 03/11] Add requests cache pkg to requirements --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index b5d3df0..a22e99c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,6 +37,7 @@ install_requires = requests tqdm typing_extensions>=3.7.4.2,<5 + requests-cache[sqlite] [options.extras_require] deploy = twine; wheel; m2r From c327f6fafbf80a50a5b145a839efdb477280e8bf Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Mon, 9 Feb 2026 12:50:30 -0800 Subject: [PATCH 04/11] Remove leftover references to old cache --- pori_python/graphkb/vocab.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pori_python/graphkb/vocab.py b/pori_python/graphkb/vocab.py index 26033e7..3242a06 100644 --- a/pori_python/graphkb/vocab.py +++ b/pori_python/graphkb/vocab.py @@ -181,9 +181,6 @@ def get_terms_set( ) -> Set[str]: """Get a set of vocabulary rids given some base/parent term names.""" base_terms = [base_terms] if isinstance(base_terms, str) else base_terms - cache_key = tuple(sorted(base_terms)) - if graphkb_conn.cache.get(cache_key, None) and not ignore_cache: - return graphkb_conn.cache[cache_key] terms = set() for base_term in base_terms: terms.update( @@ -193,6 +190,4 @@ def get_terms_set( ) ) ) - if not ignore_cache: - graphkb_conn.cache[cache_key] = terms return terms From d7db900468bdf09e84902fad551298747b81f93c Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Mon, 9 Feb 2026 13:07:04 -0800 Subject: [PATCH 05/11] Remove no longer relevant test --- tests/test_graphkb/test_match.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/tests/test_graphkb/test_match.py b/tests/test_graphkb/test_match.py index d4ee567..0079814 100644 --- a/tests/test_graphkb/test_match.py +++ b/tests/test_graphkb/test_match.py @@ -563,22 +563,6 @@ def test_structural_variants(self, conn): assert type not in expected.get("does_not_matches", {}).get("type", []) -class TestCacheMissingFeatures: - def test_filling_cache(self): - mock_conn = MagicMock( - query=MagicMock( - return_value=[ - {"name": "bob", "sourceId": "alice"}, - {"name": "KRAS", "sourceId": "1234"}, - ] - ) - ) - match.cache_missing_features(mock_conn) - assert "kras" in match.FEATURES_CACHE - assert "alice" in match.FEATURES_CACHE - match.FEATURES_CACHE = None - - class TestTypeScreening: # Types as class variables default_type = DEFAULT_NON_STRUCTURAL_VARIANT_TYPE From 5f80d1ae25aeb87b81b055bff5b162e6def48d63 Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Wed, 11 Feb 2026 21:30:46 -0800 Subject: [PATCH 06/11] Fix missing merge conflict --- pori_python/graphkb/util.py | 48 ++++++++++--------------------------- 1 file changed, 12 insertions(+), 36 deletions(-) diff --git a/pori_python/graphkb/util.py b/pori_python/graphkb/util.py index 2c1e7e0..a000134 100644 --- a/pori_python/graphkb/util.py +++ b/pori_python/graphkb/util.py @@ -100,7 +100,7 @@ def __init__( username: str = '', password: str = '', use_global_cache: bool = True, - cache_name: str = "", + cache_name: str = '', ): """ Docstring for __init__ @@ -112,17 +112,17 @@ def __init__( if use_global_cache: if not cache_name: self.http = CustomSession( - backend="memory", + backend='memory', cache_control=True, - allowable_methods=["GET", "POST"], - ignored_parameters=["Authorization"], + allowable_methods=['GET', 'POST'], + ignored_parameters=['Authorization'], ) else: self.http = CustomSession( cache_name, cache_control=True, - allowable_methods=["GET", "POST"], - ignored_parameters=["Authorization"], + allowable_methods=['GET', 'POST'], + ignored_parameters=['Authorization'], ) else: self.http = requests.Session() @@ -139,15 +139,10 @@ def __init__( self.url = url self.username = username self.password = password -<<<<<<< HEAD self.headers = { - "Accept": "application/json", - "Content-Type": "application/json", + 'Accept': 'application/json', + 'Content-Type': 'application/json', } -======= - self.headers = {'Accept': 'application/json', 'Content-Type': 'application/json'} - self.cache: Dict[Any, Any] = {} if not use_global_cache else QUERY_CACHE ->>>>>>> develop self.request_count = 0 self.first_request: Optional[datetime] = None self.last_request: Optional[datetime] = None @@ -166,17 +161,13 @@ def load(self) -> Optional[float]: return self.request_count * 1000 / msec return None -<<<<<<< HEAD def request( self, endpoint: str, - method: str = "GET", + method: str = 'GET', headers: Optional[dict[str, str]] = None, **kwargs, ) -> Dict: -======= - def request(self, endpoint: str, method: str = 'GET', **kwargs) -> Dict: ->>>>>>> develop """Request wrapper to handle adding common headers and logging. Args: @@ -348,33 +339,18 @@ def query( """ Query GraphKB """ -<<<<<<< HEAD headers = {} if ignore_cache or force_refresh: - headers = {"Cache-Control": "no-cache"} -======= - result: List[Record] = [] - hash_code = '' - - if not ignore_cache and paginate: - hash_code = cache_key(request_body) - if hash_code in self.cache and not force_refresh: - return self.cache[hash_code] ->>>>>>> develop + headers = {'Cache-Control': 'no-cache'} result: List[Record] = [] while True: -<<<<<<< HEAD content = self.post( - "query", - data={**request_body, "limit": limit, "skip": len(result)}, + 'query', + data={**request_body, 'limit': limit, 'skip': len(result)}, headers=headers, ) - records = content["result"] -======= - content = self.post('query', data={**request_body, 'limit': limit, 'skip': len(result)}) records = content['result'] ->>>>>>> develop result.extend(records) if len(records) < limit or not paginate: break From df2d899e6380d7568570734bfaa1fe8551f1a226 Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Wed, 11 Feb 2026 23:10:58 -0800 Subject: [PATCH 07/11] fix: due to change in caching, the mock now needs to be re setup between calls so I split the tests into two functions --- pori_python/graphkb/util.py | 2 -- tests/test_graphkb/test_statement.py | 15 ++++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/pori_python/graphkb/util.py b/pori_python/graphkb/util.py index a000134..ba54a07 100644 --- a/pori_python/graphkb/util.py +++ b/pori_python/graphkb/util.py @@ -17,8 +17,6 @@ from .constants import DEFAULT_LIMIT, TYPES_TO_NOTATION, AA_3to1_MAPPING -QUERY_CACHE: Dict[Any, Any] = {} - # name the logger after the package to make it simple to disable for packages using this one as a dependency # https://stackoverflow.com/questions/11029717/how-do-i-disable-log-messages-from-the-requests-library diff --git a/tests/test_graphkb/test_statement.py b/tests/test_graphkb/test_statement.py index 0b4ccd2..00720e9 100644 --- a/tests/test_graphkb/test_statement.py +++ b/tests/test_graphkb/test_statement.py @@ -17,11 +17,20 @@ def conn() -> GraphKBConnection: @pytest.fixture() def graphkb_conn(): + """ + Mocks the query functionality required by the calls made for the categorize_relevance function + + categorize_relevance calls query twice for each term in the values of the category_base_terms object + + - get_terms_set ([term, term, term...]) + - get_term_tree(term) + - query(query(term)) + """ def make_rid_list(*values): return [{'@rid': v} for v in values] def term_tree_calls(*final_values): - # this function makes 2 calls to conn.query here + # this function makes 2 calls to conn.query here b/c the get_terms_set function will always call query twice sets = [['fake'], final_values] return [make_rid_list(*s) for s in sets] @@ -77,18 +86,18 @@ def test_no_match(self, graphkb_conn): category = statement.categorize_relevance(graphkb_conn, 'x') assert category == '' - def test_custom_categories(self, graphkb_conn): + def test_custom_categories_not_found(self, graphkb_conn): category = statement.categorize_relevance( graphkb_conn, 'x', [('blargh', ['some', 'blargh'])] ) assert category == '' + def test_custom_categories_match(self, graphkb_conn): category = statement.categorize_relevance( graphkb_conn, '1', [('blargh', ['some', 'blargh'])] ) assert category == 'blargh' - @pytest.mark.skipif( EXCLUDE_BCGSC_TESTS, reason='db-specific rid; requires Inferred Functional Annotation source' ) From 1107f68499787a236ec25afbddde306f111e0106 Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Wed, 11 Feb 2026 23:11:54 -0800 Subject: [PATCH 08/11] format with ruff --- tests/test_graphkb/test_statement.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_graphkb/test_statement.py b/tests/test_graphkb/test_statement.py index 00720e9..550191e 100644 --- a/tests/test_graphkb/test_statement.py +++ b/tests/test_graphkb/test_statement.py @@ -26,6 +26,7 @@ def graphkb_conn(): - get_term_tree(term) - query(query(term)) """ + def make_rid_list(*values): return [{'@rid': v} for v in values] @@ -98,6 +99,7 @@ def test_custom_categories_match(self, graphkb_conn): ) assert category == 'blargh' + @pytest.mark.skipif( EXCLUDE_BCGSC_TESTS, reason='db-specific rid; requires Inferred Functional Annotation source' ) From 5e60ddc5c52c9991aa369aae0748b624c7a41382 Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Wed, 11 Feb 2026 23:29:59 -0800 Subject: [PATCH 09/11] Add pass-through args for sessions --- pori_python/graphkb/util.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pori_python/graphkb/util.py b/pori_python/graphkb/util.py index ba54a07..8cfe79d 100644 --- a/pori_python/graphkb/util.py +++ b/pori_python/graphkb/util.py @@ -99,6 +99,7 @@ def __init__( password: str = '', use_global_cache: bool = True, cache_name: str = '', + **session_kwargs ): """ Docstring for __init__ @@ -114,6 +115,7 @@ def __init__( cache_control=True, allowable_methods=['GET', 'POST'], ignored_parameters=['Authorization'], + **session_kwargs ) else: self.http = CustomSession( @@ -121,9 +123,10 @@ def __init__( cache_control=True, allowable_methods=['GET', 'POST'], ignored_parameters=['Authorization'], + **session_kwargs ) else: - self.http = requests.Session() + self.http = requests.Session(**session_kwargs) retries = Retry( total=100, connect=5, From ae37e3bfee34cc97e9167fe187508d53e5fb07d2 Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Wed, 11 Feb 2026 23:31:29 -0800 Subject: [PATCH 10/11] fix arg in docstring to match arg name --- pori_python/graphkb/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pori_python/graphkb/util.py b/pori_python/graphkb/util.py index 8cfe79d..60ce699 100644 --- a/pori_python/graphkb/util.py +++ b/pori_python/graphkb/util.py @@ -106,7 +106,7 @@ def __init__( Args: - use_global_cache: cache requests across all requests to GKB - - cache_db: Path or connection URL to the database which stors the requests cache. see https://requests-cache.readthedocs.io/en/v0.6.4/user_guide.html#cache-name + - cache_name: Path or connection URL to the database which stors the requests cache. see https://requests-cache.readthedocs.io/en/v0.6.4/user_guide.html#cache-name """ if use_global_cache: if not cache_name: From 135df0390be00de7cc5ebe634a3350ca180947a1 Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Wed, 11 Feb 2026 23:32:54 -0800 Subject: [PATCH 11/11] format with ruff --- pori_python/graphkb/util.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pori_python/graphkb/util.py b/pori_python/graphkb/util.py index 60ce699..382dba6 100644 --- a/pori_python/graphkb/util.py +++ b/pori_python/graphkb/util.py @@ -99,7 +99,7 @@ def __init__( password: str = '', use_global_cache: bool = True, cache_name: str = '', - **session_kwargs + **session_kwargs, ): """ Docstring for __init__ @@ -115,7 +115,7 @@ def __init__( cache_control=True, allowable_methods=['GET', 'POST'], ignored_parameters=['Authorization'], - **session_kwargs + **session_kwargs, ) else: self.http = CustomSession( @@ -123,7 +123,7 @@ def __init__( cache_control=True, allowable_methods=['GET', 'POST'], ignored_parameters=['Authorization'], - **session_kwargs + **session_kwargs, ) else: self.http = requests.Session(**session_kwargs)