Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 48 additions & 23 deletions malariagen_data/pv4.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,68 @@
import os
from functools import lru_cache

from .plasmodium import PlasmodiumDataResource


"""
Optimized Pv4 Data Resource Class

What’s improved:
1. Cached default config path:
- The path to 'pv4_config.json' is computed only once using lru_cache.
- Avoids repeated calls to os.path.abspath and os.path.dirname
when multiple Pv4 instances are created.

2. Cleaner fallback logic:
- Uses Pythonic 'data_config or default' pattern instead of explicit condition.

3. Proper kwargs forwarding:
- Ensures any additional filesystem arguments (**kwargs) are passed
to the parent class (important for fsspec configurations like authentication, caching, etc.)

Overall Impact:
- Slight performance improvement (micro-optimization)
- Cleaner and more maintainable code
- Better alignment with the documented behavior
"""


@lru_cache(maxsize=1)
def _get_default_config_path():
"""
Compute and cache the default path to pv4_config.json.

This avoids recomputing the file path every time a Pv4 object is created.
"""
working_dir = os.path.dirname(os.path.abspath(__file__))
return os.path.join(working_dir, "pv4_config.json")


class Pv4(PlasmodiumDataResource):
"""Provides access to data from the Pv4 release.

Parameters
----------
url : str, optional
Base path to data. Default uses Google Cloud Storage "gs://pv4_release/",
or specify a local path on your file system if data have been downloaded.
or specify a local path if data have been downloaded.
data_config : str, optional
Path to config for structure of Pv4 data resource. Defaults to config included
with the malariagen_data package.
Path to config for structure of Pv4 data resource.
Defaults to packaged pv4_config.json.
**kwargs
Passed through to fsspec when setting up file system access.
Additional arguments passed to filesystem backend (via fsspec).

Examples
--------
Access data from Google Cloud Storage (default):

>>> import malariagen_data
>>> pv4 = malariagen_data.Pv4()

Access data downloaded to a local file system:

>>> pv4 = malariagen_data.Pv4("/local/path/to/pv4_release/")
>>> import malariagen_data
>>> pv4 = malariagen_data.Pv4()

>>> pv4 = malariagen_data.Pv4("/local/path/to/pv4_release/")
"""

def __init__(
self,
url=None,
data_config=None,
**kwargs,
):
# setup filesystem
if not data_config:
working_dir = os.path.dirname(os.path.abspath(__file__))
data_config = os.path.join(working_dir, "pv4_config.json")
super().__init__(data_config=data_config, url=url)
def __init__(self, url=None, data_config=None, **kwargs):
# Use cached default config path if none is provided
data_config = data_config or _get_default_config_path()

# Initialize parent class with full argument support
super().__init__(data_config=data_config, url=url, **kwargs)