From d4e3d93846fd431060e219250a4517e44f4e626a Mon Sep 17 00:00:00 2001 From: THIRUMALAI NAMBI S Date: Mon, 30 Mar 2026 19:39:07 +0530 Subject: [PATCH] Optimize Pv4 Data Resource class Optimized the Pv4 Data Resource class by caching the default config path, improving fallback logic, and ensuring proper kwargs forwarding. --- malariagen_data/pv4.py | 71 ++++++++++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 23 deletions(-) diff --git a/malariagen_data/pv4.py b/malariagen_data/pv4.py index b3bf2755e..21b49df83 100644 --- a/malariagen_data/pv4.py +++ b/malariagen_data/pv4.py @@ -1,8 +1,43 @@ import os +from functools import lru_cache from .plasmodium import PlasmodiumDataResource +""" +Optimized Pv4 Data Resource Class + +What’s improved: +1. Cached default config path: + - The path to 'pv4_config.json' is computed only once using lru_cache. + - Avoids repeated calls to os.path.abspath and os.path.dirname + when multiple Pv4 instances are created. + +2. Cleaner fallback logic: + - Uses Pythonic 'data_config or default' pattern instead of explicit condition. + +3. Proper kwargs forwarding: + - Ensures any additional filesystem arguments (**kwargs) are passed + to the parent class (important for fsspec configurations like authentication, caching, etc.) + +Overall Impact: +- Slight performance improvement (micro-optimization) +- Cleaner and more maintainable code +- Better alignment with the documented behavior +""" + + +@lru_cache(maxsize=1) +def _get_default_config_path(): + """ + Compute and cache the default path to pv4_config.json. + + This avoids recomputing the file path every time a Pv4 object is created. + """ + working_dir = os.path.dirname(os.path.abspath(__file__)) + return os.path.join(working_dir, "pv4_config.json") + + class Pv4(PlasmodiumDataResource): """Provides access to data from the Pv4 release. @@ -10,34 +45,24 @@ class Pv4(PlasmodiumDataResource): ---------- url : str, optional Base path to data. Default uses Google Cloud Storage "gs://pv4_release/", - or specify a local path on your file system if data have been downloaded. + or specify a local path if data have been downloaded. data_config : str, optional - Path to config for structure of Pv4 data resource. Defaults to config included - with the malariagen_data package. + Path to config for structure of Pv4 data resource. + Defaults to packaged pv4_config.json. **kwargs - Passed through to fsspec when setting up file system access. + Additional arguments passed to filesystem backend (via fsspec). Examples -------- - Access data from Google Cloud Storage (default): - - >>> import malariagen_data - >>> pv4 = malariagen_data.Pv4() - - Access data downloaded to a local file system: - - >>> pv4 = malariagen_data.Pv4("/local/path/to/pv4_release/") + >>> import malariagen_data + >>> pv4 = malariagen_data.Pv4() + >>> pv4 = malariagen_data.Pv4("/local/path/to/pv4_release/") """ - def __init__( - self, - url=None, - data_config=None, - **kwargs, - ): - # setup filesystem - if not data_config: - working_dir = os.path.dirname(os.path.abspath(__file__)) - data_config = os.path.join(working_dir, "pv4_config.json") - super().__init__(data_config=data_config, url=url) + def __init__(self, url=None, data_config=None, **kwargs): + # Use cached default config path if none is provided + data_config = data_config or _get_default_config_path() + + # Initialize parent class with full argument support + super().__init__(data_config=data_config, url=url, **kwargs)