diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..c3ef4c8 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,36 @@ +name: CI + +on: + pull_request: + push: + branches: [main] + schedule: + - cron: "0 17 * * 2" # Tuesdays at 12pm EST (17:00 UTC); 1pm during EDT + workflow_dispatch: + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: astral-sh/setup-uv@v5 + - run: uv sync --group dev + - run: uv run ruff check src/ tests/ + - run: uv run ruff format --check src/ tests/ + + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: astral-sh/setup-uv@v5 + - run: uv sync --group dev + - run: uv run pytest -v -m "not integration" + + integration-test: + runs-on: ubuntu-latest + if: github.event_name != 'pull_request' + steps: + - uses: actions/checkout@v4 + - uses: astral-sh/setup-uv@v5 + - run: uv sync --group dev + - run: uv run pytest -v -m "integration and not slow" diff --git a/.gitignore b/.gitignore index b7faf40..55c6b2a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# Ignore data files. +/data + # Byte-compiled / optimized / DLL files __pycache__/ *.py[codz] @@ -14,8 +17,9 @@ dist/ downloads/ eggs/ .eggs/ -lib/ -lib64/ +# Python distribution lib directories (not web/src/lib/) +/lib/ +/lib64/ parts/ sdist/ var/ diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..2c07333 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.11 diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..8ecea72 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,151 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +babel-explorer is a tool for querying and exploring Babel intermediate files. It allows users to discover why two biological/chemical identifiers are considered identical by the Babel system, which handles cross-references between different ontology and database identifiers (e.g., MONDO, HP, UMLS, HGNC). + +## Development Setup + +This project uses **uv** for package management: + +```bash +# Install dependencies +uv sync + +# Install with dev dependencies +uv sync --group dev + +# Run the CLI +uv run babel-explorer --help +``` + +## Commands + +### Running the Application + +```bash +# Get cross-references for one or more CURIEs +uv run babel-explorer xrefs MONDO:0004979 + +# Get cross-references with expansion (recursive lookup) +uv run babel-explorer xrefs MONDO:0004979 --recurse + +# Get cross-references with labels from NodeNorm +uv run babel-explorer xrefs MONDO:0004979 --labels + +# Get ID records for CURIEs +uv run babel-explorer ids MONDO:0004979 + +# Test concordance changes with NodeNorm +uv run babel-explorer test-concord MONDO:0004979 HP:0000001 + +# Use custom Babel server or local directory +uv run babel-explorer xrefs MONDO:0004979 --local-dir data/2025nov19 --babel-url https://stars.renci.org:443/var/babel_outputs/2025nov19/ +``` + +### Development Commands + +```bash +# Run all tests (includes large file downloads) +uv run pytest -v + +# Run unit tests only (fast, no network) +uv run pytest -v -m "not integration" + +# Run integration tests without 2GB+ downloads +uv run pytest -v -m "integration and not slow" + +# Run a single test file +uv run pytest -v tests/test_nodenorm.py + +# Run linter +uv run ruff check + +# Format code +uv run ruff format +``` + +## Architecture + +### Core Components + +1. **BabelDownloader** (`src/babel_explorer/core/downloader.py`): + - Downloads Babel intermediate files from a remote HTTP(S) server using Python's `requests` library (streaming downloads) + - Caches files locally in configurable directory (default: `data/2025nov19/`) + - Uses `@functools.lru_cache` to avoid re-downloading + - **Important**: Requires network access but no external tools like `wget` + +2. **BabelXRefs** (`src/babel_explorer/core/babel_xrefs.py`): + - Main query engine for cross-references + - Uses DuckDB to query Parquet files (`Concord.parquet`, `Identifiers.parquet`) + - Supports recursive expansion of cross-references via a single `WITH RECURSIVE` query + - Uses ephemeral in-memory DuckDB connections (nothing written to disk) + +3. **NodeNorm** (`src/babel_explorer/core/nodenorm.py`): + - Integration with NodeNormalization API (https://nodenormalization-sri.renci.org/) + - Fetches labels, biolink types, and equivalent identifiers for CURIEs + - Uses `@functools.lru_cache` for performance + - Optional component for label enrichment + +4. **CLI** (`src/babel_explorer/cli.py`): + - Click-based command-line interface + - Three main commands: `xrefs`, `ids`, `test-concord` + +### Data Flow + +1. User provides CURIEs via CLI +2. BabelDownloader ensures required Parquet files are downloaded +3. BabelXRefs queries files using DuckDB +4. If `--labels` or `--recurse` flags are set, NodeNorm is queried for additional metadata +5. Results are printed to stdout + +### Key Design Patterns + +- **Lazy downloading**: Files are only downloaded when first accessed +- **LRU caching**: Heavy use of `@functools.lru_cache` to avoid redundant downloads and API calls +- **Recursive expansion**: The `--recurse` flag recursively follows all cross-references to build complete graphs +- **DuckDB for querying**: In-memory SQL queries against Parquet files for fast lookups + +## Testing + +### Test Structure + +Tests live in `tests/` and are split into fast **unit tests** (mocked, no network) and slower **integration tests** (real downloads and API calls). Pytest markers control which tests run: + +- **`@pytest.mark.integration`** — requires network access (downloads Parquet files or calls NodeNorm API) +- **`@pytest.mark.slow`** — downloads very large files (2 GB+) + +| File | Unit | Integration | Slow | Total | +|------|------|-------------|------|-------| +| `tests/test_downloader.py` | 41 | 4 | 1 | 46 | +| `tests/test_babel_xrefs.py` | 23 | 20 | 3 | 46 | +| `tests/test_nodenorm.py` | 20 | 13 | 0 | 33 | +| `tests/test_cli.py` | 24 | 0 | 0 | 24 | + +### Test Infrastructure + +- **`tests/conftest.py`** — Session-scoped fixtures that download Parquet files once and share them across all integration tests. Teardown removes the `data/test/` directory so the next run starts fresh. +- **`tests/constants.py`** — Shared constants (URLs, file paths) and `load_curies()` helper. +- **`tests/data/valid_curies.txt`** — One CURIE per line (`#` comments allowed). Integration tests are parametrized over this list — adding a new line automatically expands test coverage. + +### Key Dataclasses + +- **`Identifier`** — Frozen dataclass for a normalized NodeNorm entry (curie, label, biolink_type, taxa, description). Returned by `NodeNorm.get_identifier()` and `get_clique_identifiers()`. +- **`CrossReference`** — Frozen dataclass for Concord.parquet rows (filename, subj, pred, obj) +- **`LabeledCrossReference`** — Extends CrossReference with labels and biolink types from NodeNorm +- **`IdentifierRecord`** — Frozen dataclass for Identifiers.parquet rows (curie + dynamic extra fields). Returned by `BabelXRefs.get_curie_ids()`. + +## Important Notes + +- **Data directory**: The `data/` directory is gitignored and contains downloaded Parquet files and generated DuckDB databases +- **Babel versions**: The default Babel version is `2025nov19`, but this can be customized via `--local-dir` and `--babel-url` + +## File Locations + +- Source code: `src/babel_explorer/` +- Tests: `tests/` +- Test CURIEs: `tests/data/valid_curies.txt` +- Downloaded Babel files: `data//duckdb/*.parquet` +- Entry point: `src/babel_explorer/cli.py` diff --git a/FUTURE.md b/FUTURE.md new file mode 100644 index 0000000..a2bb3fb --- /dev/null +++ b/FUTURE.md @@ -0,0 +1,7 @@ +# Future Work + +## Deduplicate CLI option blocks + +`--local-dir`, `--babel-url`, and `--check-download` are copy-pasted between the +`xrefs` and `ids` commands in `cli.py`. Extract a `@common_babel_options` Click +decorator so defaults are defined in one place and can't drift. diff --git a/README.md b/README.md index 077ce44..00fff8c 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,56 @@ # Babel Explorer -Software for querying and exporting Babel intermediate files +Software for querying and exploring Babel intermediate files. + +babel-explorer allows you to discover why two biological/chemical identifiers are considered identical by the [Babel](https://github.com/TranslatorSRI/Babel) system, which handles cross-references between different ontology and database identifiers (e.g., MONDO, HP, UMLS, HGNC). + +## Setup + +This project uses [uv](https://docs.astral.sh/uv/) for package management: + +```bash +uv sync --group dev +``` + +## Usage + +```bash +# Get cross-references for one or more CURIEs +uv run babel-explorer xrefs MONDO:0004979 + +# Get cross-references with expansion (recursive lookup) +uv run babel-explorer xrefs MONDO:0004979 --recurse + +# Get cross-references with labels from NodeNorm +uv run babel-explorer xrefs MONDO:0004979 --labels + +# Get ID records for CURIEs +uv run babel-explorer ids MONDO:0004979 + +# Test concordance changes with NodeNorm +uv run babel-explorer test-concord MONDO:0004979 HP:0000001 +``` + +## Testing + +Tests are split into fast **unit tests** (mocked, no network) and slower **integration tests** (real file downloads and API calls), controlled by pytest markers. + +```bash +# Unit tests only — fast, no network required +uv run pytest -v -m "not integration" + +# Integration tests without 2GB+ downloads +uv run pytest -v -m "integration and not slow" + +# Full suite including large file downloads +uv run pytest -v +``` + +### Adding Test CURIEs + +Integration tests are parametrized over the CURIEs listed in `tests/data/valid_curies.txt`. Add a new CURIE on its own line to automatically expand test coverage: + +``` +# tests/data/valid_curies.txt +MONDO:0004979 +HP:0000001 +``` \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..34a0be3 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,35 @@ +[project] +name = "babel-explorer" +version = "0.1.0" +description = "Tool for querying and exploring Babel APIs and intermediate files" +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ + "click>=8.3.1", + "duckdb>=1.4.2", + "requests>=2.32.5", + "rich>=13", + "tqdm>=4.67.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[dependency-groups] +dev = [ + "filelock>=3.16", + "pytest>=8.3.5", + "pytest-xdist[psutil]>=3.6", + "ruff>=0.11.0", +] + +[project.scripts] +babel-explorer = "babel_explorer.cli:cli" + +[tool.pytest.ini_options] +addopts = "-n auto" +markers = [ + "integration: tests requiring network access (deselect with '-m \"not integration\"')", + "slow: tests downloading very large files 2GB+ (deselect with '-m \"not slow\"')", +] diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/babel_explorer/__init__.py b/src/babel_explorer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/babel_explorer/cli.py b/src/babel_explorer/cli.py new file mode 100644 index 0000000..db0d46f --- /dev/null +++ b/src/babel_explorer/cli.py @@ -0,0 +1,252 @@ +# Command line interface for babel-explorer +import click +import logging +from babel_explorer.core.downloader import BabelDownloader +from babel_explorer.core.babel_xrefs import BabelXRefs +from babel_explorer.core.nodenorm import NodeNorm +from babel_explorer.core.babel_xrefs import LabeledCrossReference +from babel_explorer.formatting import write_records, _record_to_dict, make_console, hl_curie +from rich.markup import escape + + +def format_option(f): + """Decorator adding --format and --json-indent options to a command.""" + f = click.option( + "--format", + "fmt", + default="console", + type=click.Choice(["console", "json", "tsv", "csv"]), + show_default=True, + help="Output format", + )(f) + f = click.option( + "--json-indent", + default=2, + show_default=True, + help="Indentation depth for JSON output", + )(f) + return f + + +def parse_duration(value: str) -> int | float: + """Parse a duration string like '3h', '30m', '1d', '7200', or 'never' → seconds.""" + units = {"s": 1, "m": 60, "h": 3600, "d": 86400} + lower = (value or "").strip().lower() + if not lower: + raise click.BadParameter( + "Invalid duration: value cannot be empty. " + "Use an integer number of seconds, optionally followed by 's', 'm', 'h', or 'd', " + "or 'never'." + ) + if lower == "never": + return float("inf") + # Value with unit suffix (e.g. '3h', '30m') + if lower[-1] in units: + try: + amount = int(lower[:-1]) + except ValueError: + raise click.BadParameter( + f"Invalid duration {value!r}: expected an integer followed by an optional unit " + "('s', 'm', 'h', or 'd'), or 'never'." + ) + if amount < 0: + raise click.BadParameter( + f"Invalid duration {value!r}: duration must be non-negative." + ) + return amount * units[lower[-1]] + # Bare integer seconds + try: + result = int(lower) + except ValueError: + raise click.BadParameter( + f"Invalid duration {value!r}: expected an integer number of seconds, optionally " + "followed by 's', 'm', 'h', or 'd', or 'never'." + ) + if result < 0: + raise click.BadParameter( + f"Invalid duration {value!r}: duration must be non-negative." + ) + return result + + +@click.group() +def cli(): + """babel-explorer: query and explore Babel intermediate files.""" + pass + + +@cli.command("xrefs") +@click.argument("curies", type=str, required=True, nargs=-1) +@click.option( + "--local-dir", + type=str, + default="data/2025nov19", + help="Local location to save Babel download files to", +) +@click.option( + "--babel-url", + type=str, + default="https://stars.renci.org:443/var/babel_outputs/2025nov19/", + help="Base URL of the Babel server", +) +@click.option( + "--nodenorm-url", + type=str, + default="https://nodenormalization-sri.renci.org/", + help="NodeNorm base URL used for node normalization and label enrichment", +) +@click.option("--recurse", is_flag=True, help="Recursively query returned xrefs") +@click.option("--labels", is_flag=True, help="Include labels for CURIEs") +@click.option( + "--check-download", + type=str, + default="3h", + show_default=True, + help="How often to re-check downloads (e.g. '3h', '30m', '1d', '0', 'never'). " + "'never' disables re-checking and always uses cached files; '0' forces a re-check every time.", +) +@format_option +def xrefs( + curies: list[str], + babel_url: str, + nodenorm_url: str, + local_dir: str, + recurse: bool, + labels: bool, + check_download: str, + fmt: str, + json_indent: int, +): + """ + Fetches and prints the cross-references (xrefs) for the given CURIEs. + + \f + + :param curies: A list of CURIEs (Compact URI) for which cross-references need + to be retrieved. + :type curies: list[str] + :param babel_url: Base URL of the Babel server from which to download DuckDB files. + :type babel_url: str + + :return: None + """ + logging.basicConfig(level=logging.INFO) + + freshness = parse_duration(check_download) + bxref = BabelXRefs( + BabelDownloader(babel_url, local_path=local_dir, freshness_seconds=freshness), + NodeNorm(nodenorm_url), + ) + xrefs = bxref.get_curie_xrefs(curies, recurse, label_curies=labels) + + if fmt == "console": + console = make_console() + query_set = set(curies) + for xref in xrefs: + subj_str = hl_curie(xref.subj, xref.subj in query_set) + obj_str = hl_curie(xref.obj, xref.obj in query_set) + if isinstance(xref, LabeledCrossReference): + if xref.subj_label: + subj_str += f" ({escape(xref.subj_label)})" + if xref.obj_label: + obj_str += f" ({escape(xref.obj_label)})" + console.print( + f"{subj_str} [dim]{escape(xref.pred)}[/dim] " + f"{obj_str} [dim italic]{escape(xref.filename)}[/dim italic]" + ) + else: + write_records(xrefs, fmt=fmt, indent=json_indent) + + +@cli.command("ids") +@click.argument("curies", type=str, required=True, nargs=-1) +@click.option( + "--local-dir", + type=str, + default="data/2025nov19", + help="Local location to save Babel download files to", +) +@click.option( + "--babel-url", + type=str, + default="https://stars.renci.org:443/var/babel_outputs/2025nov19/", + help="Base URL of the Babel server", +) +@click.option( + "--check-download", + type=str, + default="3h", + show_default=True, + help="How often to re-check downloads (e.g. '3h', '30m', '1d', '0', 'never'). " + "'never' disables re-checking and always uses cached files; '0' forces a re-check every time.", +) +@format_option +def ids(curies: list[str], babel_url: str, local_dir: str, check_download: str, fmt: str, json_indent: int): + """ + Fetches and prints the ID records for the given CURIEs, along with Biolink type if provided. + + \f + + :param curies: A list of CURIEs (Compact URI) for which cross-references need + to be retrieved. + :type curies: list[str] + :param babel_url: Base URL of the Babel server + :type babel_url: str + + :return: None + """ + logging.basicConfig(level=logging.INFO) + + freshness = parse_duration(check_download) + bxref = BabelXRefs( + BabelDownloader(babel_url, local_path=local_dir, freshness_seconds=freshness) + ) + xrefs = bxref.get_curie_ids(curies) + + if fmt == "console": + console = make_console() + for record in xrefs: + console.print(str(record)) + else: + write_records(xrefs, fmt=fmt, indent=json_indent) + + +@cli.command("test-concord") +@click.argument("curies", type=str, required=True, nargs=-1) +@click.option( + "--nodenorm-url", + type=str, + default="https://nodenormalization-sri.renci.org/", + help="NodeNorm URL to check for concord changes", +) +@format_option +def test_concord(curies, nodenorm_url, fmt, json_indent): + """For each CURIE, print the current NodeNorm clique (all equivalent identifiers, labels, and Biolink types). + + Useful for inspecting how a potential Babel concordance change would affect NodeNorm: + run before and after a Babel rebuild to see how cliques would shift. + """ + nodenorm = NodeNorm(nodenorm_url) + if fmt == "console": + console = make_console() + query_set = set(curies) + for curie in curies: + for ident in nodenorm.get_clique_identifiers(curie): + biolink = ", ".join(ident.biolink_type) + console.print( + f"{hl_curie(curie, True)} " + f"{hl_curie(ident.curie, ident.curie in query_set)} " + f"{escape(ident.label or '-')} " + f"[dim]{escape(biolink)}[/dim]" + ) + else: + rows = [ + {"query_curie": curie, **_record_to_dict(ident)} + for curie in curies + for ident in nodenorm.get_clique_identifiers(curie) + ] + write_records(rows, fmt=fmt, indent=json_indent) + + +if __name__ == "__main__": + cli() diff --git a/src/babel_explorer/core/__init__.py b/src/babel_explorer/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/babel_explorer/core/babel_xrefs.py b/src/babel_explorer/core/babel_xrefs.py new file mode 100644 index 0000000..0c94074 --- /dev/null +++ b/src/babel_explorer/core/babel_xrefs.py @@ -0,0 +1,236 @@ +"""Query engine for Babel cross-reference intermediate files. + +Provides access to Concord.parquet and Identifiers.parquet via DuckDB, +allowing callers to discover why two biological/chemical identifiers are +considered identical in a Babel build. +""" + +import dataclasses +import logging +import duckdb +import functools + +from babel_explorer.core.downloader import BabelDownloader +from babel_explorer.core.nodenorm import NodeNorm + + +@dataclasses.dataclass(frozen=True) +class CrossReference: + """A single cross-reference edge read from Concord.parquet.""" + + filename: str + subj: str + pred: str + obj: str + + @staticmethod + def from_tuple(tuple: tuple[str, str, str, str]): + """Construct from a ``(filename, subj, pred, obj)`` database row tuple.""" + return CrossReference( + filename=tuple[0], subj=tuple[1], pred=tuple[2], obj=tuple[3] + ) + + @property + def curies(self): + """The frozenset of both CURIEs in this edge (subject and object).""" + return frozenset([self.subj, self.obj]) + + def __lt__(self, other): + return (self.filename, self.subj, self.obj, self.pred) < ( + other.filename, + other.subj, + other.obj, + other.pred, + ) + + +@dataclasses.dataclass(frozen=True) +class LabeledCrossReference(CrossReference): + """A CrossReference enriched with human-readable labels and Biolink types from NodeNorm.""" + + subj_label: str + subj_biolink_type: tuple[str, ...] + obj_label: str + obj_biolink_type: tuple[str, ...] + + def __str__(self): + return f"""LabeledCrossReference(subj="{self.subj}", pred="{self.pred}", obj="{self.obj}", subj_label="{self.subj_label}", subj_biolink_type="{self.subj_biolink_type}", obj_label="{self.obj_label}", obj_biolink_type="{self.obj_biolink_type}")""" + + +@dataclasses.dataclass(frozen=True) +class IdentifierRecord: + """A record from the Identifiers.parquet file.""" + + curie: str + extra_fields: tuple = () + + @staticmethod + def from_row(row: tuple, column_names: list[str]): + """Create an IdentifierRecord from a DuckDB result row and its column names.""" + curie_idx = column_names.index("curie") + extra = tuple( + (col, row[i]) for i, col in enumerate(column_names) if i != curie_idx + ) + return IdentifierRecord(curie=row[curie_idx], extra_fields=extra) + + def __str__(self): + """Return a ``key=value`` string of the CURIE and all extra fields.""" + parts = [f"curie={self.curie!r}"] + for name, value in self.extra_fields: + parts.append(f"{name}={value!r}") + return f"IdentifierRecord({', '.join(parts)})" + + +class BabelXRefs: + """Query engine for Babel cross-reference and identifier Parquet files. + + Uses DuckDB for in-memory SQL queries against Concord.parquet and + Identifiers.parquet. NodeNorm is optional and only required when + ``label_curies=True`` is passed to enrichment-aware methods. + """ + + def __init__(self, downloader: BabelDownloader, nodenorm: NodeNorm = None): + """ + :param downloader: A configured ``BabelDownloader`` that provides local paths + to the required Parquet files, downloading them on first access. + :param nodenorm: Optional ``NodeNorm`` client. Required only when callers pass + ``label_curies=True``; may be ``None`` for label-free queries. + """ + self.downloader = downloader + self.nodenorm = nodenorm + + def get_curie_ids(self, curies: list[str]) -> list[IdentifierRecord]: + """ + Search for all identifiers in the /ids/ files for a particular CURIE. + + :param curies: A list of CURIEs to search for. + :return: A list of IdentifierRecords containing those CURIEs. + """ + + identifier_parquet = self.downloader.get_downloaded_file( + "duckdb/Identifiers.parquet" + ) + + # Query the Parquet files using DuckDB (in-memory; nothing is persisted). + with duckdb.connect() as db: + identifier_table = db.read_parquet(identifier_parquet) # noqa: F841 — DuckDB resolves 'identifier_table' by Python variable name in the SQL query + result = db.execute( + "SELECT * FROM identifier_table WHERE curie IN $1", [curies] + ) + column_names = [desc[0] for desc in result.description] + return [ + IdentifierRecord.from_row(row, column_names) + for row in result.fetchall() + ] + + @functools.lru_cache(maxsize=None) + def get_curie_xref(self, curie: str, label_curies: bool = False): + """Return all cross-references in Concord.parquet where *curie* is the subject or object. + + Results are LRU-cached per ``(curie, label_curies)`` pair. + + :param curie: The CURIE to look up. + :param label_curies: If ``True``, annotate each result with NodeNorm labels and + Biolink types. Requires a NodeNorm instance to have been passed to ``__init__``. + :raises ValueError: If ``label_curies=True`` but no NodeNorm instance is available. + :return: A list of ``CrossReference`` (or ``LabeledCrossReference``) objects. + """ + if label_curies and self.nodenorm is None: + raise ValueError( + "label_curies=True requires a configured NodeNorm instance (nodenorm was None)." + ) + + concord_parquet = self.downloader.get_downloaded_file("duckdb/Concord.parquet") + + with duckdb.connect() as db: + concord_table = db.read_parquet(concord_parquet) # noqa: F841 — DuckDB resolves 'concord_table' by Python variable name in the SQL query + xref_tuples = db.execute( + "SELECT filename, subj, pred, obj FROM concord_table WHERE subj=$1 OR obj=$1", + [curie], + ).fetchall() + + xrefs = [CrossReference.from_tuple(rec) for rec in xref_tuples] + if label_curies: + xrefs = [self._to_labeled_xref(xref) for xref in xrefs] + return xrefs + + def _to_labeled_xref(self, xref: CrossReference) -> LabeledCrossReference: + """Convert a CrossReference to a LabeledCrossReference using NodeNorm.""" + subj_ident = self.nodenorm.get_identifier(xref.subj) + obj_ident = self.nodenorm.get_identifier(xref.obj) + return LabeledCrossReference( + subj=xref.subj, + obj=xref.obj, + filename=xref.filename, + pred=xref.pred, + subj_label=subj_ident.label, + subj_biolink_type=subj_ident.biolink_type, + obj_label=obj_ident.label, + obj_biolink_type=obj_ident.biolink_type, + ) + + def _get_curie_xrefs_recursive(self, curies: list[str], label_curies: bool = False): + """Traverse the cross-reference graph in one DuckDB WITH RECURSIVE query.""" + if label_curies and self.nodenorm is None: + raise ValueError( + "label_curies=True requires a configured NodeNorm instance (nodenorm was None)." + ) + if not curies: + return [] + + concord_parquet = self.downloader.get_downloaded_file("duckdb/Concord.parquet") + + with duckdb.connect() as db: + concord_table = db.read_parquet(concord_parquet) # noqa: F841 — DuckDB resolves 'concord_table' by Python variable name in the SQL query + rows = db.execute( + """ + WITH RECURSIVE + edges(a, b) AS ( + SELECT subj, obj FROM concord_table + UNION ALL + SELECT obj, subj FROM concord_table + ), + frontier(curie) AS ( + SELECT unnest($1::VARCHAR[]) + UNION + SELECT e.b + FROM edges e + INNER JOIN frontier f ON e.a = f.curie + ) + SELECT DISTINCT c.filename, c.subj, c.pred, c.obj + FROM concord_table c + WHERE c.subj IN (SELECT curie FROM frontier) + OR c.obj IN (SELECT curie FROM frontier) + ORDER BY c.filename, c.subj, c.obj, c.pred + """, + [curies], + ).fetchall() + + xrefs = [CrossReference.from_tuple(row) for row in rows] + + if label_curies: + xrefs = [self._to_labeled_xref(xref) for xref in xrefs] + + return xrefs + + def get_curie_xrefs( + self, curies: list[str], recurse: bool = False, label_curies: bool = False + ): + """ + Search for all identifiers that are cross-referenced to the given CURIE. + + :param curies: A list of CURIEs to search for. + :param recurse: Whether to expand the cross-references (i.e. recursively follow all identifiers). + :param label_curies: Whether to annotate results with labels from NodeNorm. + :return: A list of cross-references containing those CURIEs. + """ + + if recurse: + return self._get_curie_xrefs_recursive(curies, label_curies) + + xrefs = set() + for curie in curies: + logging.info(f"Searching for cross-references for {curie}") + xrefs.update(self.get_curie_xref(curie, label_curies)) + + return sorted(xrefs) diff --git a/src/babel_explorer/core/downloader.py b/src/babel_explorer/core/downloader.py new file mode 100644 index 0000000..4d2f9a2 --- /dev/null +++ b/src/babel_explorer/core/downloader.py @@ -0,0 +1,371 @@ +"""HTTP downloader for Babel Parquet files with ETag-based freshness checking.""" + +import functools +import json +import os +import tempfile +import urllib.parse +import time +import requests +from datetime import datetime, timezone +from tqdm import tqdm +import logging + + +class BabelDownloader: + """ + Class for downloading Babel cross-reference files to a local directory as needed. + """ + + def __init__( + self, + url_base, + local_path=None, + retries=10, + freshness_seconds=3 * 3600, + timeout: int = 30, + ): + """ + :param url_base: Base URL of the Babel server (must end with ``/``). + :param local_path: Directory for cached downloads. Defaults to + ``tempfile.gettempdir()`` if ``None``; created automatically if it + does not exist. + :param retries: Maximum number of download retry attempts on failure. + :param freshness_seconds: How long a local file is considered fresh without + re-checking the server. Use ``float('inf')`` to never re-check, or ``0`` + to always issue a HEAD request. Defaults to 3 hours. + :param timeout: HTTP request timeout in seconds. + """ + if not url_base.endswith("/"): + url_base += "/" + self.url_base = url_base + self.retries = retries + self.freshness_seconds = freshness_seconds + self.timeout = timeout + self.logger = logging.getLogger(BabelDownloader.__name__) + + if local_path is None: + local_path = tempfile.gettempdir() + + # Make sure the local path is an existing directory or that we can create it. + if not os.path.exists(local_path): + os.makedirs(local_path, exist_ok=True) + self.local_path = local_path + elif os.path.exists(local_path) and os.path.isdir(local_path): + self.local_path = local_path + else: + raise ValueError( + f"Invalid local_path (must be an existing directory): '{local_path}'" + ) + + @functools.lru_cache(maxsize=None) + def get_output_file(self, filename): + """Return (and create) the local filesystem path for a given relative filename.""" + filepath = os.path.join(self.local_path, filename) + os.makedirs(os.path.dirname(filepath), exist_ok=True) + return filepath + + def _get_meta_path(self, local_path): + """Return the sidecar metadata file path for a given local file.""" + return local_path + ".meta" + + def _load_meta(self, local_path): + """Load sidecar metadata JSON, or return None if not found/invalid.""" + meta_path = self._get_meta_path(local_path) + if not os.path.exists(meta_path): + return None + try: + with open(meta_path, "r") as f: + return json.load(f) + except (json.JSONDecodeError, OSError): + return None + + def _save_meta(self, local_path, headers, update_last_checked=True): + """ + Write a sidecar .meta JSON file next to local_path. + + Args: + local_path: Path to the downloaded file + headers: Response headers dict (or requests.structures.CaseInsensitiveDict) + update_last_checked: If True, set last_checked to now + """ + meta = {} + if "ETag" in headers: + meta["etag"] = headers["ETag"] + if "Last-Modified" in headers: + meta["last_modified"] = headers["Last-Modified"] + if "Content-Length" in headers: + meta["content_length"] = int(headers["Content-Length"]) + if update_last_checked: + meta["last_checked"] = datetime.now(timezone.utc).isoformat() + + meta_path = self._get_meta_path(local_path) + with open(meta_path, "w") as f: + json.dump(meta, f, indent=2) + + def _is_within_freshness(self, meta, freshness_seconds): + """ + Return True if last_checked is within freshness_seconds of now. + + Args: + meta: dict loaded from .meta file + freshness_seconds: Number of seconds; float('inf') means always fresh + + Returns: + bool + """ + if freshness_seconds == float("inf"): + return True + last_checked_str = meta.get("last_checked") + if not last_checked_str: + return False + try: + last_checked = datetime.fromisoformat(last_checked_str) + age = (datetime.now(timezone.utc) - last_checked).total_seconds() + return age < freshness_seconds + except (ValueError, TypeError): + return False + + def _etag_matches(self, url, meta): + """ + Do a HEAD request and check if the ETag (or Last-Modified + Content-Length) + matches the stored metadata. + + Does not write to disk — the caller is responsible for updating last_checked + when this returns True. + + Args: + url: URL to HEAD + meta: dict loaded from .meta file (may have etag, last_modified, content_length) + + Returns: + bool: True if remote matches local meta (file is still current) + """ + try: + response = requests.head(url, timeout=self.timeout) + response.raise_for_status() + except requests.RequestException as e: + self.logger.warning( + f"HEAD request failed for {url}: {e}; assuming file is current" + ) + return True + + remote_headers = response.headers + + # Primary check: ETag + local_etag = meta.get("etag") + remote_etag = remote_headers.get("ETag") + if local_etag and remote_etag: + if local_etag == remote_etag: + self.logger.info(f"ETag matches ({remote_etag}), file is current") + return True + else: + self.logger.info( + f"ETag changed: {local_etag!r} → {remote_etag!r}, re-downloading" + ) + return False + + # Fallback: Last-Modified + Content-Length + local_lm = meta.get("last_modified") + remote_lm = remote_headers.get("Last-Modified") + local_cl = meta.get("content_length") + remote_cl = remote_headers.get("Content-Length") + + if local_lm and remote_lm and local_lm == remote_lm: + if local_cl is None or remote_cl is None or int(remote_cl) == local_cl: + self.logger.info( + f"Last-Modified matches ({remote_lm}), file is current" + ) + return True + + self.logger.info( + "Cannot confirm file is current (no matching ETag or Last-Modified), will re-download" + ) + return False + + def _stream_download(self, response, local_path, resume_byte_pos, chunk_size): + """ + Stream download from response to file with progress bar. + + Args: + response: requests.Response object with stream=True + local_path: Local file path to write to + resume_byte_pos: Starting byte position (for resume) + chunk_size: Size of chunks to read/write + """ + # Get total size from Content-Length header (may not be present) + content_length = response.headers.get("Content-Length") + if content_length: + total_size = int(content_length) + resume_byte_pos + else: + total_size = None + + # Open file in append mode if resuming, write mode otherwise + mode = "ab" if resume_byte_pos > 0 else "wb" + + with open(local_path, mode) as f: + with tqdm( + total=total_size, + initial=resume_byte_pos, + unit="B", + unit_scale=True, + unit_divisor=1024, + desc=os.path.basename(local_path), + ) as progress_bar: + for chunk in response.iter_content(chunk_size=chunk_size): + if chunk: + f.write(chunk) + progress_bar.update(len(chunk)) + + def _download_with_retry(self, url, local_path, chunk_size): + """ + Download a file with retry logic and resume capability. + + Args: + url: URL to download from + local_path: Local file path to save to + chunk_size: Size of chunks to read/write + + Returns: + requests.structures.CaseInsensitiveDict: Response headers from the final request + + Raises: + RuntimeError: If all retry attempts fail + """ + for attempt in range(1, self.retries + 1): + try: + # Check if we're resuming a partial download + resume_byte_pos = 0 + if os.path.exists(local_path): + resume_byte_pos = os.path.getsize(local_path) + + # Prepare headers for resume + headers = {} + if resume_byte_pos > 0: + headers["Range"] = f"bytes={resume_byte_pos}-" + self.logger.info(f"Resuming download from byte {resume_byte_pos}") + + # Make streaming request with timeout for connection (not total time) + with requests.get( + url, headers=headers, stream=True, timeout=self.timeout + ) as response: + # Handle different response codes + if response.status_code == 416: + # Range Not Satisfiable - file already complete + self.logger.info(f"File already complete: {local_path}") + return response.headers + elif response.status_code == 206: + # Partial Content - resume successful + self.logger.info("Resuming download (HTTP 206)") + elif response.status_code == 200: + # OK - server doesn't support resume or no Range header was sent + if resume_byte_pos > 0: + self.logger.warning( + "Server doesn't support resume, restarting from beginning" + ) + resume_byte_pos = 0 + # Remove partial file + if os.path.exists(local_path): + os.remove(local_path) + else: + response.raise_for_status() + + # Stream download with progress bar + self._stream_download( + response, local_path, resume_byte_pos, chunk_size + ) + + # Success - exit retry loop + return response.headers + + except (requests.RequestException, IOError) as e: + self.logger.warning( + f"Download attempt {attempt}/{self.retries} failed: {e}" + ) + + if attempt < self.retries: + # Calculate exponential backoff with max of 60 seconds + wait_time = min(2**attempt, 60) + self.logger.info(f"Retrying in {wait_time} seconds...") + time.sleep(wait_time) + else: + # All retries exhausted + raise RuntimeError( + f"Failed to download {url} after {self.retries} attempts: {e}" + ) + + @functools.lru_cache(maxsize=None) + def get_downloaded_file(self, dirpath: str, chunk_size: int = 1024 * 1024): + """ + Download a file from the Babel server to local storage with ETag-based caching. + + Three-tier freshness logic: + 1. If .meta exists and last_checked is within freshness window → return immediately + 2. If .meta exists but stale → HEAD request to compare ETag; return if unchanged + 3. If ETag changed or no .meta → full re-download + + Args: + dirpath: Relative path from url_base to the file + chunk_size: Size of chunks to download (default 1MB) + + Returns: + str: Local path to the downloaded file + """ + local_path_to_download_to = os.path.join(self.local_path, dirpath) + os.makedirs(os.path.dirname(local_path_to_download_to), exist_ok=True) + + url_to_download = urllib.parse.urljoin(self.url_base, dirpath) + + if os.path.exists(local_path_to_download_to): + meta = self._load_meta(local_path_to_download_to) + if meta is not None: + # Tier 1: within freshness window — skip all network calls + if self._is_within_freshness(meta, self.freshness_seconds): + self.logger.info( + f"File within freshness window ({self.freshness_seconds} seconds), skipping check: {local_path_to_download_to}" + ) + return local_path_to_download_to + + # Tier 2: stale but maybe unchanged — HEAD request + if self._etag_matches(url_to_download, meta): + # Update last_checked timestamp + meta["last_checked"] = datetime.now(timezone.utc).isoformat() + meta_path = self._get_meta_path(local_path_to_download_to) + with open(meta_path, "w") as f: + json.dump(meta, f, indent=2) + self.logger.info( + f"ETag matches, using existing file: {local_path_to_download_to}" + ) + return local_path_to_download_to + + # Tier 3: ETag changed — re-download + self.logger.warning( + f"Remote file changed, re-downloading: {local_path_to_download_to}" + ) + + self.logger.info( + f"Downloading {url_to_download} to {local_path_to_download_to}" + ) + + # Download to a sibling .tmp file, then atomically replace the final destination. + # This ensures the final file is never partially written. + tmp_path = local_path_to_download_to + ".tmp" + try: + response_headers = self._download_with_retry( + url_to_download, tmp_path, chunk_size + ) + os.replace(tmp_path, local_path_to_download_to) + except Exception: + if os.path.exists(tmp_path): + os.remove(tmp_path) + raise + + # Save sidecar metadata + if response_headers is not None: + self._save_meta(local_path_to_download_to, response_headers) + + bytes_downloaded = os.path.getsize(local_path_to_download_to) + self.logger.info( + f"Downloaded {url_to_download} to {local_path_to_download_to}: {bytes_downloaded} bytes" + ) + return local_path_to_download_to diff --git a/src/babel_explorer/core/nodenorm.py b/src/babel_explorer/core/nodenorm.py new file mode 100644 index 0000000..4f0f6d1 --- /dev/null +++ b/src/babel_explorer/core/nodenorm.py @@ -0,0 +1,130 @@ +"""NodeNorm API client for identifier normalisation and label enrichment.""" + +import dataclasses +import functools +import requests +import logging + + +@dataclasses.dataclass(frozen=True) +class Identifier: + """Normalised identifier record returned by the NodeNorm API.""" + + curie: str + label: str = "" + biolink_type: tuple[str, ...] = () + taxa: tuple[str, ...] = () + description: tuple[str, ...] = () + + def __lt__(self, other): + return self.curie < other.curie + + @staticmethod + def from_dict(d: dict) -> "Identifier": + def _to_tuple(val) -> tuple[str, ...]: + """Coerce a string or list to a tuple — guards against iterating string chars.""" + if not val: + return () + return (val,) if isinstance(val, str) else tuple(val) + + return Identifier( + curie=d["identifier"], + label=d.get("label", ""), + biolink_type=_to_tuple(d.get("type")), + taxa=_to_tuple(d.get("taxa")), + description=_to_tuple(d.get("description")), + ) + + +class NodeNorm: + """Client for the NodeNormalization API (https://nodenormalization-sri.renci.org/).""" + + def __init__(self, nodenorm_url: str = "", timeout: int = 30): + """ + :param nodenorm_url: Base URL of the NodeNorm service. Pass an empty string (default) + to skip all network calls and have every lookup return a bare ``Identifier``. + :param timeout: HTTP request timeout in seconds. + """ + self.nodenorm_url = nodenorm_url + self.timeout = timeout + if self.nodenorm_url and not self.nodenorm_url.endswith("/"): + self.nodenorm_url += "/" + + @functools.lru_cache(maxsize=None) + def get_identifier(self, curie: str) -> "Identifier": + """Return the ``Identifier`` for *curie* by looking it up in its NodeNorm clique. + + Searches ``equivalent_identifiers`` for an entry whose ``identifier`` field matches + *curie* exactly. Falls back to a bare ``Identifier(curie=curie)`` (empty label and + type) if NodeNorm does not recognise the CURIE or it is not listed in the clique. + + Results are LRU-cached so repeated calls for the same CURIE are free. + """ + result = self.normalize_curie(curie) + logging.debug(f"Normalizing {curie} with NodeNorm to result: {result}") + if not result: + return Identifier(curie=curie) + for identifier in result.get("equivalent_identifiers", []): + if identifier["identifier"] == curie: + logging.debug(f"Found exact match for {curie}: {identifier}") + return Identifier.from_dict(identifier) + + logging.debug( + f"No exact match for {curie!r} in equivalent_identifiers; returning bare Identifier" + ) + return Identifier(curie=curie) + + @functools.lru_cache(maxsize=None) + def normalize_curie( + self, + curie: str, + conflate=True, + drug_chemical_conflate=True, + description=True, + individual_types=True, + include_taxa=True, + ): + """Call ``get_normalized_nodes`` and return the per-CURIE result dict. + + :return: The normalisation dict for *curie* (contains ``id``, ``equivalent_identifiers``, + ``type``, etc.), or ``None`` if the CURIE is not recognised by NodeNorm. + :raises requests.HTTPError: If the API returns a non-2xx status code. + """ + if not self.nodenorm_url: + return None + response = requests.get( + f"{self.nodenorm_url}get_normalized_nodes", + params={ + "curie": curie, + "conflate": conflate, + "drug_chemical_conflate": drug_chemical_conflate, + "description": description, + "individual_types": individual_types, + "include_taxa": include_taxa, + }, + timeout=self.timeout, + ) + response.raise_for_status() + result = response.json() + + try: + return result[curie] + except KeyError: + logging.debug( + f"NodeNorm response did not contain CURIE {curie!r}; returning None" + ) + return None + + @functools.lru_cache(maxsize=None) + def get_clique_identifiers(self, curie: str) -> list[Identifier]: + """Return all ``Identifier`` objects in the NodeNorm clique for *curie*. + + :return: A list of ``Identifier`` objects (one per entry in ``equivalent_identifiers``), + or an empty list if the CURIE is unknown or has no equivalents. + """ + result = self.normalize_curie(curie) + if not result: + return [] + if "equivalent_identifiers" not in result: + return [] + return [Identifier.from_dict(x) for x in result["equivalent_identifiers"]] diff --git a/src/babel_explorer/formatting.py b/src/babel_explorer/formatting.py new file mode 100644 index 0000000..191f1cd --- /dev/null +++ b/src/babel_explorer/formatting.py @@ -0,0 +1,87 @@ +"""Output formatting for babel-explorer CLI commands. + +Provides: +- write_records() for machine-readable output (json, tsv, csv) +- make_console() and hl_curie() for rich console output +""" + +import csv +import dataclasses +import json +import sys +from typing import Any + +from rich.console import Console +from rich.markup import escape + + +def _record_to_dict(record) -> dict[str, Any]: + """Convert a dataclass (or plain dict) to a flat dict. + + Handles IdentifierRecord's extra_fields, which asdict() returns as a + list of [col, val] pairs rather than a nested dict. + """ + if isinstance(record, dict): + return record + d = dataclasses.asdict(record) + if "extra_fields" in d: + for col, val in d.pop("extra_fields"): + d[col] = val + return d + + +def _flatten_for_tabular(row: dict) -> dict: + """Convert list/tuple fields to pipe-joined strings for TSV/CSV output.""" + return {k: "|".join(v) if isinstance(v, (list, tuple)) else v for k, v in row.items()} + + +def make_console(file=None) -> Console: + """Create a rich Console with babel-explorer defaults. + + Auto-detects TTY and NO_COLOR; strips markup when output is piped. + highlight=False prevents rich from auto-highlighting numbers and strings. + """ + return Console(file=file, highlight=False) + + +def hl_curie(curie: str, highlight: bool) -> str: + """Return rich markup for a CURIE — bold cyan if it is a query CURIE.""" + escaped = escape(curie) + return f"[bold cyan]{escaped}[/bold cyan]" if highlight else escaped + + +def write_records(records, fmt: str, indent: int = 2, file=None): + """Write an iterable of dataclass records (or dicts) in the requested format. + + :param records: Iterable of dataclass instances or plain dicts. + :param fmt: One of "json", "tsv", "csv". (Console output is handled by + make_console/hl_curie in the CLI layer.) + :param indent: JSON indentation depth (ignored for other formats). + :param file: Output file-like object; defaults to sys.stdout. + :raises ValueError: If fmt is not a recognised format. + """ + if file is None: + file = sys.stdout + records = list(records) + + if fmt == "json": + rows = [_record_to_dict(r) for r in records] + json.dump(rows, file, indent=indent, default=str) + print(file=file) # trailing newline + + elif fmt in ("tsv", "csv"): + if not records: + return + rows = [_flatten_for_tabular(_record_to_dict(r)) for r in records] + delimiter = "\t" if fmt == "tsv" else "," + writer = csv.DictWriter( + file, + fieldnames=list(rows[0].keys()), + delimiter=delimiter, + lineterminator="\n", + ) + writer.writeheader() + writer.writerows(rows) + + else: + raise ValueError(f"Unknown format: {fmt!r}") diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..588fec0 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +# Tests for babel-explorer diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..92ecb06 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,123 @@ +""" +Shared fixtures for babel-explorer tests. + +Session-scoped fixtures download Babel files once and share them across all test modules. +Teardown removes the test data directory so the next run starts fresh. +""" + +import os +import shutil + +import pytest +from filelock import FileLock + +from babel_explorer.core.downloader import BabelDownloader +from babel_explorer.core.babel_xrefs import BabelXRefs +from babel_explorer.core.nodenorm import NodeNorm + +from tests.constants import ( + BABEL_URL, + NODENORM_URL, + TEST_DATA_DIR, + CONCORD_FILE, + METADATA_FILE, + IDENTIFIERS_FILE, + load_curies, +) + + +# --------------------------------------------------------------------------- +# Session-scoped fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture(scope="session") +def valid_curies() -> list[str]: + """Load test CURIEs from tests/data/valid_curies.txt.""" + curies = load_curies() + assert len(curies) > 0, "No CURIEs found in valid_curies.txt" + return curies + + +@pytest.fixture(scope="session") +def test_data_dir(request): + """ + Provide a test data directory for the entire session. + + Creates the directory before tests, removes it after all tests complete. + When running under pytest-xdist, cleanup is skipped: worker sessions end at + unpredictable times and deleting the shared directory from one worker while + others are still reading the same files causes flaky IO errors. The files + are re-used (or re-validated) on the next run via the freshness-window logic + in BabelDownloader.get_downloaded_file. + """ + worker_id = getattr(request.config, "workerinput", {}).get("workerid", "master") + os.makedirs(TEST_DATA_DIR, exist_ok=True) + + yield TEST_DATA_DIR + + # Only clean up when running without xdist (sequential run). In a parallel + # run each worker session may finish at a different time; gw0 cleaning up + # while gw5 is still reading Concord.parquet causes spurious failures. + if worker_id == "master": + if os.path.exists(TEST_DATA_DIR): + shutil.rmtree(TEST_DATA_DIR) + + +@pytest.fixture(scope="session") +def shared_downloader(test_data_dir) -> BabelDownloader: + """A BabelDownloader pointed at the test data directory.""" + return BabelDownloader(url_base=BABEL_URL, local_path=test_data_dir) + + +@pytest.fixture(scope="session") +def downloaded_concord(shared_downloader, test_data_dir) -> str: + """Download duckdb/Concord.parquet (~626 MB). Returns the local path.""" + lock_path = os.path.join(test_data_dir, "concord.lock") + with FileLock(lock_path): + return shared_downloader.get_downloaded_file(CONCORD_FILE) + + +@pytest.fixture(scope="session") +def downloaded_metadata(shared_downloader, test_data_dir) -> str: + """Download duckdb/Metadata.parquet (small). Returns the local path.""" + lock_path = os.path.join(test_data_dir, "metadata.lock") + with FileLock(lock_path): + return shared_downloader.get_downloaded_file(METADATA_FILE) + + +@pytest.fixture(scope="session") +def downloaded_parquet_files(downloaded_concord, downloaded_metadata) -> dict[str, str]: + """Dict of {relative_name: local_path} for Concord and Metadata files.""" + return { + CONCORD_FILE: downloaded_concord, + METADATA_FILE: downloaded_metadata, + } + + +@pytest.fixture(scope="session") +def downloaded_identifiers(shared_downloader, test_data_dir) -> str: + """Download duckdb/Identifiers.parquet (2 GB+). Returns the local path.""" + lock_path = os.path.join(test_data_dir, "identifiers.lock") + with FileLock(lock_path): + return shared_downloader.get_downloaded_file(IDENTIFIERS_FILE) + + +@pytest.fixture(scope="session") +def nodenorm() -> NodeNorm: + """A NodeNorm client pointed at the public API.""" + return NodeNorm(nodenorm_url=NODENORM_URL) + + +@pytest.fixture(scope="session") +def babel_xrefs(shared_downloader, downloaded_parquet_files) -> BabelXRefs: + """A BabelXRefs instance (no NodeNorm) with Concord + Metadata already downloaded.""" + return BabelXRefs(shared_downloader) + + +@pytest.fixture(scope="session") +def babel_xrefs_with_nodenorm( + shared_downloader, nodenorm, downloaded_parquet_files +) -> BabelXRefs: + """A BabelXRefs instance with NodeNorm, Concord + Metadata already downloaded.""" + return BabelXRefs(shared_downloader, nodenorm) diff --git a/tests/constants.py b/tests/constants.py new file mode 100644 index 0000000..01b75fa --- /dev/null +++ b/tests/constants.py @@ -0,0 +1,26 @@ +"""Shared constants for babel-explorer tests.""" + +import pathlib + +BABEL_URL = "https://stars.renci.org/var/babel_outputs/2025nov19/" +NODENORM_URL = "https://nodenormalization-sri.renci.org/" +TEST_DATA_DIR = "data/test" + +# Parquet file paths (relative to the Babel server / local data dir) +CONCORD_FILE = "duckdb/Concord.parquet" +METADATA_FILE = "duckdb/Metadata.parquet" +IDENTIFIERS_FILE = "duckdb/Identifiers.parquet" + +# Path to the valid CURIEs file +VALID_CURIES_PATH = pathlib.Path(__file__).parent / "data" / "valid_curies.txt" + + +def load_curies(path: pathlib.Path = VALID_CURIES_PATH) -> list[str]: + """Read CURIEs from a text file, skipping comments and blank lines.""" + curies = [] + with open(path) as f: + for line in f: + stripped = line.strip() + if stripped and not stripped.startswith("#"): + curies.append(stripped) + return curies diff --git a/tests/data/valid_curies.txt b/tests/data/valid_curies.txt new file mode 100644 index 0000000..89a53b3 --- /dev/null +++ b/tests/data/valid_curies.txt @@ -0,0 +1,5 @@ +# Valid CURIEs for integration tests. +# Add new CURIEs here to expand test coverage — tests are parametrized over this list. +MONDO:0004979 +MONDO:0005044 +NCIT:C55060 diff --git a/tests/test_babel_xrefs.py b/tests/test_babel_xrefs.py new file mode 100644 index 0000000..114d269 --- /dev/null +++ b/tests/test_babel_xrefs.py @@ -0,0 +1,409 @@ +""" +Tests for BabelXRefs, CrossReference, LabeledCrossReference, and IdentifierRecord. + +Unit tests use mocks; integration tests query real Parquet files via DuckDB. +""" + +import pytest +from unittest.mock import patch, MagicMock + +from babel_explorer.core.babel_xrefs import ( + BabelXRefs, + CrossReference, + LabeledCrossReference, + IdentifierRecord, +) +from babel_explorer.core.downloader import BabelDownloader +from babel_explorer.core.nodenorm import NodeNorm + +from tests.constants import load_curies + +VALID_CURIES = load_curies() + + +# ========================================================================== +# Unit Tests — CrossReference +# ========================================================================== + + +class TestCrossReference: + """Tests for the CrossReference frozen dataclass.""" + + def test_creation(self): + xr = CrossReference( + filename="f.txt", subj="A:1", pred="skos:exactMatch", obj="B:2" + ) + assert xr.filename == "f.txt" + assert xr.subj == "A:1" + assert xr.pred == "skos:exactMatch" + assert xr.obj == "B:2" + + def test_from_tuple(self): + t = ("file.tsv", "MONDO:1", "owl:sameAs", "HP:2") + xr = CrossReference.from_tuple(t) + assert xr.filename == "file.tsv" + assert xr.subj == "MONDO:1" + assert xr.pred == "owl:sameAs" + assert xr.obj == "HP:2" + + def test_curies_property(self): + xr = CrossReference(filename="f", subj="A:1", pred="p", obj="B:2") + assert xr.curies == frozenset({"A:1", "B:2"}) + + def test_frozen_immutability(self): + xr = CrossReference(filename="f", subj="A:1", pred="p", obj="B:2") + with pytest.raises(AttributeError): + xr.subj = "changed" + + def test_equality(self): + a = CrossReference(filename="f", subj="A:1", pred="p", obj="B:2") + b = CrossReference(filename="f", subj="A:1", pred="p", obj="B:2") + assert a == b + + def test_hashability(self): + a = CrossReference(filename="f", subj="A:1", pred="p", obj="B:2") + b = CrossReference(filename="f", subj="A:1", pred="p", obj="B:2") + assert hash(a) == hash(b) + assert len({a, b}) == 1 + + def test_lt_ordering(self): + a = CrossReference(filename="a.tsv", subj="A:1", pred="p", obj="B:2") + b = CrossReference(filename="b.tsv", subj="A:1", pred="p", obj="B:2") + assert a < b + + def test_sorting(self): + items = [ + CrossReference(filename="c", subj="C:1", pred="p", obj="D:1"), + CrossReference(filename="a", subj="A:1", pred="p", obj="B:1"), + CrossReference(filename="b", subj="B:1", pred="p", obj="C:1"), + ] + result = sorted(items) + assert [x.filename for x in result] == ["a", "b", "c"] + + +# ========================================================================== +# Unit Tests — LabeledCrossReference +# ========================================================================== + + +class TestLabeledCrossReference: + """Tests for the LabeledCrossReference frozen dataclass.""" + + def test_creation(self): + lxr = LabeledCrossReference( + subj="A:1", + pred="p", + obj="B:2", + filename="f", + subj_label="Alpha", + subj_biolink_type=("biolink:Disease",), + obj_label="Beta", + obj_biolink_type=("biolink:Gene",), + ) + assert lxr.subj == "A:1" + assert lxr.subj_label == "Alpha" + assert lxr.obj_biolink_type == ("biolink:Gene",) + + def test_inherits_from_cross_reference(self): + lxr = LabeledCrossReference( + subj="A:1", + pred="p", + obj="B:2", + filename="f", + subj_label="", + subj_biolink_type=(), + obj_label="", + obj_biolink_type=(), + ) + assert isinstance(lxr, CrossReference) + + def test_curies_property(self): + lxr = LabeledCrossReference( + subj="A:1", + pred="p", + obj="B:2", + filename="f", + subj_label="", + subj_biolink_type=(), + obj_label="", + obj_biolink_type=(), + ) + assert lxr.curies == frozenset({"A:1", "B:2"}) + + def test_str(self): + lxr = LabeledCrossReference( + subj="A:1", + pred="p", + obj="B:2", + filename="f", + subj_label="Alpha", + subj_biolink_type=("biolink:Disease",), + obj_label="Beta", + obj_biolink_type=("biolink:Gene",), + ) + s = str(lxr) + assert "A:1" in s + assert "B:2" in s + assert "Alpha" in s + + +# ========================================================================== +# Unit Tests — IdentifierRecord +# ========================================================================== + + +class TestIdentifierRecord: + """Tests for the IdentifierRecord frozen dataclass.""" + + def test_creation(self): + rec = IdentifierRecord(curie="MONDO:0004979") + assert rec.curie == "MONDO:0004979" + assert rec.extra_fields == () + + def test_from_row(self): + row = ("MONDO:0004979", "Disease", "asthma") + cols = ["curie", "category", "label"] + rec = IdentifierRecord.from_row(row, cols) + assert rec.curie == "MONDO:0004979" + assert ("category", "Disease") in rec.extra_fields + assert ("label", "asthma") in rec.extra_fields + + def test_frozen(self): + rec = IdentifierRecord(curie="X:1") + with pytest.raises(AttributeError): + rec.curie = "changed" + + def test_str(self): + rec = IdentifierRecord(curie="X:1", extra_fields=(("type", "Gene"),)) + s = str(rec) + assert "X:1" in s + assert "type" in s + assert "Gene" in s + + +# ========================================================================== +# Unit Tests — BabelXRefs (mocked) +# ========================================================================== + + +class TestBabelXRefsInit: + """Tests for BabelXRefs constructor.""" + + def test_init_without_nodenorm(self, tmp_path): + dl = BabelDownloader(url_base="https://example.com/", local_path=str(tmp_path)) + bx = BabelXRefs(dl) + assert bx.downloader is dl + assert bx.nodenorm is None + + def test_init_with_nodenorm(self, tmp_path): + dl = BabelDownloader(url_base="https://example.com/", local_path=str(tmp_path)) + nn = NodeNorm("https://example.com/") + bx = BabelXRefs(dl, nn) + assert bx.nodenorm is nn + + +class TestBabelXRefsMocked: + """Mocked query tests — no DuckDB or Parquet files needed.""" + + def _make_bx(self, tmp_path): + dl = BabelDownloader(url_base="https://example.com/", local_path=str(tmp_path)) + return BabelXRefs(dl) + + def test_get_curie_xref_calls_downloader(self, tmp_path): + bx = self._make_bx(tmp_path) + mock_result = MagicMock() + mock_result.fetchall.return_value = [ + ("concord.tsv", "A:1", "skos:exactMatch", "B:2"), + ] + mock_db = MagicMock() + mock_db.__enter__.return_value = mock_db + mock_db.read_parquet.return_value = "table" + mock_db.execute.return_value = mock_result + + with patch.object( + bx.downloader, "get_downloaded_file", return_value="/fake/path" + ) as mock_dl: + with patch.object( + bx.downloader, "get_output_file", return_value="/fake/db" + ): + with patch( + "babel_explorer.core.babel_xrefs.duckdb.connect", + return_value=mock_db, + ): + bx.get_curie_xref.cache_clear() + result = bx.get_curie_xref("A:1") + # Downloader should be called for Concord only (Metadata unused here) + assert mock_dl.call_count == 1 + result_list = list(result) + assert len(result_list) == 1 + assert isinstance(result_list[0], CrossReference) + + def test_get_curie_xrefs_no_expand(self, tmp_path): + bx = self._make_bx(tmp_path) + xr = CrossReference(filename="f", subj="A:1", pred="p", obj="B:2") + with patch.object(bx, "get_curie_xref", return_value=[xr]): + bx.get_curie_xref.cache_clear() + result = bx.get_curie_xrefs(["A:1"], recurse=False) + assert len(result) == 1 + assert result[0] == xr + + def test_get_curie_xrefs_with_expand(self, tmp_path): + bx = self._make_bx(tmp_path) + xr1 = CrossReference(filename="f", subj="A:1", pred="p", obj="B:2") + xr2 = CrossReference(filename="f", subj="B:2", pred="p", obj="C:3") + + with patch.object( + bx, "_get_curie_xrefs_recursive", return_value=[xr1, xr2] + ) as mock_rec: + result = bx.get_curie_xrefs(["A:1"], recurse=True) + mock_rec.assert_called_once_with(["A:1"], False) + assert xr1 in result + assert xr2 in result + + def test_get_curie_xrefs_recursive_sql_traversal(self, tmp_path): + """_get_curie_xrefs_recursive uses SQL graph traversal, not Python recursion.""" + import duckdb as real_duckdb + + bx = self._make_bx(tmp_path) + + # Write a tiny Parquet file: graph A-B, B-C, D-E (disconnected from A-B-C) + parquet_path = str(tmp_path / "test_concord.parquet") + setup_db = real_duckdb.connect() + setup_db.execute(f""" + COPY ( + SELECT * FROM (VALUES + ('f1.tsv', 'A:1', 'skos:exactMatch', 'B:2'), + ('f1.tsv', 'B:2', 'skos:exactMatch', 'C:3'), + ('f2.tsv', 'D:4', 'skos:exactMatch', 'E:5') + ) AS t(filename, subj, pred, obj) + ) TO '{parquet_path}' (FORMAT PARQUET) + """) + setup_db.close() + + with patch.object( + bx.downloader, "get_downloaded_file", return_value=parquet_path + ): + # Starting from A:1 should reach B:2 and C:3 but not the D-E component + result = bx._get_curie_xrefs_recursive(["A:1"]) + pairs = {(xr.subj, xr.obj) for xr in result} + assert ("A:1", "B:2") in pairs + assert ("B:2", "C:3") in pairs + assert ("D:4", "E:5") not in pairs + + # Starting from D:4 should only reach E:5 + result = bx._get_curie_xrefs_recursive(["D:4"]) + pairs = {(xr.subj, xr.obj) for xr in result} + assert ("D:4", "E:5") in pairs + assert ("A:1", "B:2") not in pairs + + # Empty input returns empty list + result = bx._get_curie_xrefs_recursive([]) + assert result == [] + + def test_results_are_sorted(self, tmp_path): + bx = self._make_bx(tmp_path) + xr_b = CrossReference(filename="b", subj="B:1", pred="p", obj="C:1") + xr_a = CrossReference(filename="a", subj="A:1", pred="p", obj="B:1") + + with patch.object(bx, "get_curie_xref", return_value=[xr_b, xr_a]): + result = bx.get_curie_xrefs(["X:1"], recurse=False) + assert result == [xr_a, xr_b] + + +# ========================================================================== +# Integration Tests — require downloaded Parquet files +# ========================================================================== + + +@pytest.mark.integration +@pytest.mark.parametrize("curie", VALID_CURIES) +def test_get_curie_xref(babel_xrefs, curie): + """get_curie_xref returns non-empty CrossReferences with the queried CURIE.""" + babel_xrefs.get_curie_xref.cache_clear() + results = list(babel_xrefs.get_curie_xref(curie)) + assert len(results) > 0, f"No cross-references found for {curie}" + for xr in results: + assert isinstance(xr, CrossReference) + assert curie in (xr.subj, xr.obj) + + +@pytest.mark.integration +@pytest.mark.parametrize("curie", VALID_CURIES) +def test_get_curie_xref_returns_known_xrefs(babel_xrefs, curie): + """At least one cross-reference is found.""" + babel_xrefs.get_curie_xref.cache_clear() + results = list(babel_xrefs.get_curie_xref(curie)) + assert len(results) >= 1 + + +@pytest.mark.integration +@pytest.mark.parametrize("curie", VALID_CURIES) +def test_get_curie_xrefs_single_no_expand(babel_xrefs, curie): + """get_curie_xrefs without expansion returns sorted, non-empty results.""" + babel_xrefs.get_curie_xref.cache_clear() + results = babel_xrefs.get_curie_xrefs([curie], recurse=False) + assert len(results) > 0 + assert results == sorted(results) + + +@pytest.mark.integration +@pytest.mark.parametrize("curie", VALID_CURIES) +def test_get_curie_xrefs_expansion_finds_more(babel_xrefs, curie): + """Expanded results are at least as many as non-expanded.""" + babel_xrefs.get_curie_xref.cache_clear() + non_expanded = babel_xrefs.get_curie_xrefs([curie], recurse=False) + babel_xrefs.get_curie_xref.cache_clear() + expanded = babel_xrefs.get_curie_xrefs([curie], recurse=True) + assert len(expanded) >= len(non_expanded) + + +@pytest.mark.integration +@pytest.mark.parametrize("curie", VALID_CURIES) +def test_get_curie_xrefs_expanded_includes_original(babel_xrefs, curie): + """Non-expanded results are a subset of expanded results.""" + babel_xrefs.get_curie_xref.cache_clear() + non_expanded = set(babel_xrefs.get_curie_xrefs([curie], recurse=False)) + babel_xrefs.get_curie_xref.cache_clear() + expanded = set(babel_xrefs.get_curie_xrefs([curie], recurse=True)) + assert non_expanded.issubset(expanded) + + +@pytest.mark.integration +def test_get_curie_xref_caching(babel_xrefs): + """Cached calls return the same object.""" + curie = VALID_CURIES[0] + babel_xrefs.get_curie_xref.cache_clear() + r1 = babel_xrefs.get_curie_xref(curie) + r2 = babel_xrefs.get_curie_xref(curie) + assert r1 is r2 + + +@pytest.mark.integration +@pytest.mark.parametrize("curie", VALID_CURIES) +def test_get_curie_xref_with_labels(babel_xrefs_with_nodenorm, curie): + """With labels, returns LabeledCrossReference objects.""" + babel_xrefs_with_nodenorm.get_curie_xref.cache_clear() + results = list(babel_xrefs_with_nodenorm.get_curie_xref(curie, label_curies=True)) + assert len(results) > 0 + for xr in results: + assert isinstance(xr, LabeledCrossReference) + + +@pytest.mark.integration +def test_get_curie_xref_nonexistent_curie(babel_xrefs): + """A made-up CURIE returns an empty list.""" + babel_xrefs.get_curie_xref.cache_clear() + results = list(babel_xrefs.get_curie_xref("FAKE:9999999999")) + assert results == [] + + +@pytest.mark.integration +@pytest.mark.slow +@pytest.mark.parametrize("curie", VALID_CURIES) +def test_get_curie_ids(babel_xrefs, downloaded_identifiers, curie): + """get_curie_ids returns non-empty IdentifierRecord objects.""" + results = babel_xrefs.get_curie_ids([curie]) + assert len(results) > 0 + for rec in results: + assert isinstance(rec, IdentifierRecord) + assert rec.curie == curie diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..f4f6dbb --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,408 @@ +""" +Tests for CLI helper functions. + +Unit tests — no network required. +""" + +import json + +import click +import pytest +from click.testing import CliRunner +from unittest.mock import patch, MagicMock + +from babel_explorer.cli import parse_duration, cli +from babel_explorer.core.babel_xrefs import CrossReference, IdentifierRecord +from babel_explorer.core.nodenorm import Identifier + + +# ========================================================================== +# Unit Tests — no network required +# ========================================================================== + + +class TestParseDuration: + """Tests for parse_duration().""" + + @pytest.mark.parametrize( + "value, expected", + [ + ("never", float("inf")), + ("NEVER", float("inf")), + ("3h", 10800), + ("3H", 10800), + ("30m", 1800), + ("1d", 86400), + ("7200s", 7200), + ("7200", 7200), + ("0", 0), + (" 3h ", 10800), + ], + ) + def test_valid_inputs(self, value, expected): + assert parse_duration(value) == expected + + @pytest.mark.parametrize( + "value", + [ + "", + None, + "abc", + "3.5h", + "1.5", + "3x", + "-5", + "-5h", + ], + ) + def test_invalid_inputs_raise_bad_parameter(self, value): + with pytest.raises(click.BadParameter): + parse_duration(value) + + +class TestCliCommands: + """Tests for CLI commands using CliRunner — no network required.""" + + def test_xrefs_happy_path(self): + runner = CliRunner() + mock_xref = MagicMock() + mock_xref.__str__ = lambda self: "A:1 skos:exactMatch B:2" + mock_xref.subj = "A:1" + mock_xref.obj = "B:2" + mock_xref.pred = "skos:exactMatch" + mock_xref.filename = "test.parquet" + + with ( + patch("babel_explorer.cli.BabelDownloader"), + patch("babel_explorer.cli.BabelXRefs") as mock_bx, + patch("babel_explorer.cli.NodeNorm"), + ): + mock_bx.return_value.get_curie_xrefs.return_value = [mock_xref] + result = runner.invoke(cli, ["xrefs", "MONDO:0004979"]) + + assert result.exit_code == 0 + mock_bx.return_value.get_curie_xrefs.assert_called_once_with( + ("MONDO:0004979",), False, label_curies=False + ) + + def test_xrefs_recurse_and_labels_flags(self): + runner = CliRunner() + mock_xref = MagicMock() + mock_xref.subj = "A:1" + mock_xref.obj = "B:2" + mock_xref.pred = "skos:exactMatch" + mock_xref.filename = "test.parquet" + + with ( + patch("babel_explorer.cli.BabelDownloader"), + patch("babel_explorer.cli.BabelXRefs") as mock_bx, + patch("babel_explorer.cli.NodeNorm"), + ): + mock_bx.return_value.get_curie_xrefs.return_value = [mock_xref] + result = runner.invoke( + cli, ["xrefs", "MONDO:0004979", "--recurse", "--labels"] + ) + + assert result.exit_code == 0 + mock_bx.return_value.get_curie_xrefs.assert_called_once_with( + ("MONDO:0004979",), True, label_curies=True + ) + + def test_xrefs_check_download_option(self): + runner = CliRunner() + + with ( + patch("babel_explorer.cli.BabelDownloader") as mock_dl, + patch("babel_explorer.cli.BabelXRefs") as mock_bx, + patch("babel_explorer.cli.NodeNorm"), + ): + mock_bx.return_value.get_curie_xrefs.return_value = [] + result = runner.invoke( + cli, ["xrefs", "MONDO:0004979", "--check-download", "1h"] + ) + + assert result.exit_code == 0 + _, kwargs = mock_dl.call_args + assert kwargs.get("freshness_seconds") == 3600 + + def test_ids_happy_path(self): + runner = CliRunner() + mock_id = MagicMock() + mock_id.__str__ = lambda self: "MONDO:0004979 record" + + with ( + patch("babel_explorer.cli.BabelDownloader"), + patch("babel_explorer.cli.BabelXRefs") as mock_bx, + ): + mock_bx.return_value.get_curie_ids.return_value = [mock_id] + result = runner.invoke(cli, ["ids", "MONDO:0004979"]) + + assert result.exit_code == 0 + mock_bx.return_value.get_curie_ids.assert_called_once_with(("MONDO:0004979",)) + + def test_test_concord_happy_path(self): + runner = CliRunner() + mock_ident = MagicMock() + mock_ident.curie = "MONDO:0004979" + mock_ident.label = "asthma" + mock_ident.biolink_type = ["biolink:Disease"] + + with patch("babel_explorer.cli.NodeNorm") as mock_nn: + mock_nn.return_value.get_clique_identifiers.return_value = [mock_ident] + result = runner.invoke(cli, ["test-concord", "MONDO:0004979"]) + + assert result.exit_code == 0 + assert "asthma" in result.output + mock_nn.return_value.get_clique_identifiers.assert_called_once_with( + "MONDO:0004979" + ) + + def test_test_concord_no_label(self): + runner = CliRunner() + mock_ident = MagicMock() + mock_ident.curie = "MONDO:0004979" + mock_ident.label = None + mock_ident.biolink_type = ["biolink:Disease"] + + with patch("babel_explorer.cli.NodeNorm") as mock_nn: + mock_nn.return_value.get_clique_identifiers.return_value = [mock_ident] + result = runner.invoke(cli, ["test-concord", "MONDO:0004979"]) + + assert result.exit_code == 0 + assert "MONDO:0004979" in result.output + assert "biolink:Disease" in result.output + + def test_test_concord_unknown_curie_produces_no_output(self): + """When get_clique_identifiers returns [], no output is produced and exit code is 0.""" + runner = CliRunner() + with patch("babel_explorer.cli.NodeNorm") as mock_nn: + mock_nn.return_value.get_clique_identifiers.return_value = [] + result = runner.invoke(cli, ["test-concord", "UNKNOWN:9999"]) + assert result.exit_code == 0 + assert result.output.strip() == "" + + def test_test_concord_multiple_curies(self): + """Each CURIE is looked up independently.""" + runner = CliRunner() + mock_a = MagicMock() + mock_a.curie = "A:1" + mock_a.label = "Alpha" + mock_a.biolink_type = ["biolink:Disease"] + mock_b = MagicMock() + mock_b.curie = "B:2" + mock_b.label = "Beta" + mock_b.biolink_type = ["biolink:Gene"] + + with patch("babel_explorer.cli.NodeNorm") as mock_nn: + mock_nn.return_value.get_clique_identifiers.side_effect = [ + [mock_a], + [mock_b], + ] + result = runner.invoke(cli, ["test-concord", "A:1", "B:2"]) + + assert result.exit_code == 0 + assert mock_nn.return_value.get_clique_identifiers.call_count == 2 + assert "Alpha" in result.output + assert "Beta" in result.output + + +class TestOutputFormats: + """Tests for --format option on all commands.""" + + # Shared real dataclass instances (no mocking needed for formatting logic) + _xref = CrossReference(filename="Concord.parquet", subj="A:1", pred="skos:exactMatch", obj="B:2") + _id_record = IdentifierRecord(curie="A:1", extra_fields=(("type", "gene"), ("label", "Alpha"))) + _identifier = Identifier( + curie="MONDO:0004979", label="asthma", + biolink_type=("biolink:Disease",), taxa=(), description=(), + ) + + # -- console format (default) -- + + def test_xrefs_default_format_is_console(self): + """Default format is console — output contains the CURIEs as plain text (no TTY in runner).""" + runner = CliRunner() + with ( + patch("babel_explorer.cli.BabelDownloader"), + patch("babel_explorer.cli.BabelXRefs") as mock_bx, + patch("babel_explorer.cli.NodeNorm"), + ): + mock_bx.return_value.get_curie_xrefs.return_value = [self._xref] + result = runner.invoke(cli, ["xrefs", "A:1"]) + + assert result.exit_code == 0 + # Rich strips markup on non-TTY; plain CURIEs and predicate appear + assert "A:1" in result.output + assert "B:2" in result.output + assert "skos:exactMatch" in result.output + + def test_xrefs_console_shows_query_curie(self): + runner = CliRunner() + with ( + patch("babel_explorer.cli.BabelDownloader"), + patch("babel_explorer.cli.BabelXRefs") as mock_bx, + patch("babel_explorer.cli.NodeNorm"), + ): + mock_bx.return_value.get_curie_xrefs.return_value = [self._xref] + result = runner.invoke(cli, ["xrefs", "A:1", "--format", "console"]) + + assert result.exit_code == 0 + assert "A:1" in result.output + + def test_test_concord_console_format(self): + runner = CliRunner() + with patch("babel_explorer.cli.NodeNorm") as mock_nn: + mock_nn.return_value.get_clique_identifiers.return_value = [self._identifier] + result = runner.invoke(cli, ["test-concord", "MONDO:0004979", "--format", "console"]) + + assert result.exit_code == 0 + assert "MONDO:0004979" in result.output + assert "asthma" in result.output + assert "biolink:Disease" in result.output + + def test_test_concord_console_no_label_shows_dash(self): + """Identifiers with no label display '-' in console format.""" + runner = CliRunner() + mock_ident = MagicMock() + mock_ident.curie = "MONDO:0004979" + mock_ident.label = None + mock_ident.biolink_type = ["biolink:Disease"] + + with patch("babel_explorer.cli.NodeNorm") as mock_nn: + mock_nn.return_value.get_clique_identifiers.return_value = [mock_ident] + result = runner.invoke(cli, ["test-concord", "MONDO:0004979", "--format", "console"]) + + assert result.exit_code == 0 + assert "-" in result.output + + # -- json format -- + + def test_xrefs_format_json(self): + runner = CliRunner() + with ( + patch("babel_explorer.cli.BabelDownloader"), + patch("babel_explorer.cli.BabelXRefs") as mock_bx, + patch("babel_explorer.cli.NodeNorm"), + ): + mock_bx.return_value.get_curie_xrefs.return_value = [self._xref] + result = runner.invoke(cli, ["xrefs", "A:1", "--format", "json"]) + + assert result.exit_code == 0 + data = json.loads(result.output) + assert isinstance(data, list) + assert data[0]["subj"] == "A:1" + assert data[0]["obj"] == "B:2" + + def test_xrefs_format_tsv(self): + runner = CliRunner() + with ( + patch("babel_explorer.cli.BabelDownloader"), + patch("babel_explorer.cli.BabelXRefs") as mock_bx, + patch("babel_explorer.cli.NodeNorm"), + ): + mock_bx.return_value.get_curie_xrefs.return_value = [self._xref] + result = runner.invoke(cli, ["xrefs", "A:1", "--format", "tsv"]) + + assert result.exit_code == 0 + lines = result.output.splitlines() + assert lines[0] == "filename\tsubj\tpred\tobj" + assert "A:1" in lines[1] + + def test_xrefs_format_csv(self): + runner = CliRunner() + with ( + patch("babel_explorer.cli.BabelDownloader"), + patch("babel_explorer.cli.BabelXRefs") as mock_bx, + patch("babel_explorer.cli.NodeNorm"), + ): + mock_bx.return_value.get_curie_xrefs.return_value = [self._xref] + result = runner.invoke(cli, ["xrefs", "A:1", "--format", "csv"]) + + assert result.exit_code == 0 + lines = result.output.splitlines() + assert lines[0] == "filename,subj,pred,obj" + assert "A:1" in lines[1] + + # -- ids -- + + def test_ids_format_json_expands_extra_fields(self): + runner = CliRunner() + with ( + patch("babel_explorer.cli.BabelDownloader"), + patch("babel_explorer.cli.BabelXRefs") as mock_bx, + ): + mock_bx.return_value.get_curie_ids.return_value = [self._id_record] + result = runner.invoke(cli, ["ids", "A:1", "--format", "json"]) + + assert result.exit_code == 0 + data = json.loads(result.output) + assert data[0]["curie"] == "A:1" + assert data[0]["type"] == "gene" + assert data[0]["label"] == "Alpha" + assert "extra_fields" not in data[0] + + def test_ids_format_tsv_expands_extra_fields(self): + runner = CliRunner() + with ( + patch("babel_explorer.cli.BabelDownloader"), + patch("babel_explorer.cli.BabelXRefs") as mock_bx, + ): + mock_bx.return_value.get_curie_ids.return_value = [self._id_record] + result = runner.invoke(cli, ["ids", "A:1", "--format", "tsv"]) + + assert result.exit_code == 0 + lines = result.output.splitlines() + assert "type" in lines[0] + assert "label" in lines[0] + assert "gene" in lines[1] + + # -- test-concord structured formats -- + + def test_test_concord_format_json_includes_query_curie(self): + runner = CliRunner() + with patch("babel_explorer.cli.NodeNorm") as mock_nn: + mock_nn.return_value.get_clique_identifiers.return_value = [self._identifier] + result = runner.invoke(cli, ["test-concord", "MONDO:0004979", "--format", "json"]) + + assert result.exit_code == 0 + data = json.loads(result.output) + assert data[0]["query_curie"] == "MONDO:0004979" + assert data[0]["curie"] == "MONDO:0004979" + assert data[0]["label"] == "asthma" + assert data[0]["biolink_type"] == ["biolink:Disease"] + + def test_test_concord_format_tsv(self): + runner = CliRunner() + with patch("babel_explorer.cli.NodeNorm") as mock_nn: + mock_nn.return_value.get_clique_identifiers.return_value = [self._identifier] + result = runner.invoke(cli, ["test-concord", "MONDO:0004979", "--format", "tsv"]) + + assert result.exit_code == 0 + lines = result.output.splitlines() + assert "query_curie" in lines[0] + assert "MONDO:0004979" in lines[1] + + # -- format validation -- + + def test_invalid_format_rejected_by_click(self): + runner = CliRunner() + with ( + patch("babel_explorer.cli.BabelDownloader"), + patch("babel_explorer.cli.BabelXRefs"), + patch("babel_explorer.cli.NodeNorm"), + ): + result = runner.invoke(cli, ["xrefs", "A:1", "--format", "xml"]) + + assert result.exit_code != 0 + + def test_text_format_rejected_by_click(self): + """'text' was removed; it is no longer a valid choice.""" + runner = CliRunner() + with ( + patch("babel_explorer.cli.BabelDownloader"), + patch("babel_explorer.cli.BabelXRefs"), + patch("babel_explorer.cli.NodeNorm"), + ): + result = runner.invoke(cli, ["xrefs", "A:1", "--format", "text"]) + + assert result.exit_code != 0 diff --git a/tests/test_downloader.py b/tests/test_downloader.py new file mode 100644 index 0000000..7fe8609 --- /dev/null +++ b/tests/test_downloader.py @@ -0,0 +1,670 @@ +""" +Tests for the BabelDownloader class. + +Unit tests use mocks and run without network access. +Integration tests download real files from the Babel server. +""" + +import json +import os +import tempfile +from datetime import datetime, timezone, timedelta + +import pytest +import requests +from unittest.mock import Mock, patch, MagicMock + +from babel_explorer.core.downloader import BabelDownloader + +from tests.constants import CONCORD_FILE + + +# ========================================================================== +# Unit Tests — no network required +# ========================================================================== + + +class TestBabelDownloaderInit: + """Tests for BabelDownloader constructor.""" + + def test_constructor_stores_url_and_path(self, tmp_path): + dl = BabelDownloader(url_base="https://example.com/", local_path=str(tmp_path)) + assert dl.url_base == "https://example.com/" + assert dl.local_path == str(tmp_path) + + def test_creates_directory_if_missing(self, tmp_path): + new_dir = str(tmp_path / "nested" / "dir") + dl = BabelDownloader(url_base="https://example.com/", local_path=new_dir) + assert os.path.isdir(new_dir) + assert dl.local_path == new_dir + + def test_custom_retries(self, tmp_path): + dl = BabelDownloader( + url_base="https://example.com/", local_path=str(tmp_path), retries=3 + ) + assert dl.retries == 3 + + def test_default_retries(self, tmp_path): + dl = BabelDownloader(url_base="https://example.com/", local_path=str(tmp_path)) + assert dl.retries == 10 + + def test_default_freshness_seconds(self, tmp_path): + dl = BabelDownloader(url_base="https://example.com/", local_path=str(tmp_path)) + assert dl.freshness_seconds == 3 * 3600 + + def test_custom_freshness_seconds(self, tmp_path): + dl = BabelDownloader( + url_base="https://example.com/", + local_path=str(tmp_path), + freshness_seconds=0, + ) + assert dl.freshness_seconds == 0 + + def test_url_base_trailing_slash_added(self, tmp_path): + """url_base without trailing slash gets one appended automatically.""" + dl = BabelDownloader( + url_base="https://example.com/path", local_path=str(tmp_path) + ) + assert dl.url_base == "https://example.com/path/" + + def test_url_base_with_trailing_slash_unchanged(self, tmp_path): + dl = BabelDownloader( + url_base="https://example.com/path/", local_path=str(tmp_path) + ) + assert dl.url_base == "https://example.com/path/" + + def test_invalid_path_raises_value_error(self): + """Using a file path (not a directory) should raise ValueError.""" + with tempfile.NamedTemporaryFile(delete=False) as f: + f.write(b"not a directory") + f.flush() + try: + with pytest.raises(ValueError, match="Invalid local_path"): + BabelDownloader(url_base="https://example.com/", local_path=f.name) + finally: + os.unlink(f.name) + + +class TestGetOutputFile: + """Tests for get_output_file.""" + + def test_returns_correct_path(self, tmp_path): + dl = BabelDownloader(url_base="https://example.com/", local_path=str(tmp_path)) + result = dl.get_output_file("output/duckdbs/test.duckdb") + assert result == os.path.join(str(tmp_path), "output/duckdbs/test.duckdb") + + def test_creates_parent_directories(self, tmp_path): + dl = BabelDownloader(url_base="https://example.com/", local_path=str(tmp_path)) + result = dl.get_output_file("deep/nested/dir/file.txt") + assert os.path.isdir(os.path.dirname(result)) + + def test_lru_caching(self, tmp_path): + dl = BabelDownloader(url_base="https://example.com/", local_path=str(tmp_path)) + result1 = dl.get_output_file("some/file.txt") + result2 = dl.get_output_file("some/file.txt") + assert result1 is result2 # identity check — same cached object + + +class TestSaveMeta: + """Tests for _save_meta.""" + + def _make_dl(self, tmp_path): + return BabelDownloader( + url_base="https://example.com/", local_path=str(tmp_path) + ) + + def test_writes_all_fields(self, tmp_path): + dl = self._make_dl(tmp_path) + file_path = str(tmp_path / "test.parquet") + # Create the file so the path is valid + open(file_path, "wb").close() + + headers = { + "ETag": '"abc123"', + "Last-Modified": "Wed, 03 Dec 2025 15:54:19 GMT", + "Content-Length": "12345", + } + dl._save_meta(file_path, headers) + + meta_path = file_path + ".meta" + assert os.path.exists(meta_path) + with open(meta_path) as f: + meta = json.load(f) + + assert meta["etag"] == '"abc123"' + assert meta["last_modified"] == "Wed, 03 Dec 2025 15:54:19 GMT" + assert meta["content_length"] == 12345 + assert "last_checked" in meta + + def test_last_checked_is_recent_utc(self, tmp_path): + dl = self._make_dl(tmp_path) + file_path = str(tmp_path / "f.parquet") + open(file_path, "wb").close() + + dl._save_meta(file_path, {"ETag": '"x"'}) + + with open(file_path + ".meta") as f: + meta = json.load(f) + + last_checked = datetime.fromisoformat(meta["last_checked"]) + age = (datetime.now(timezone.utc) - last_checked).total_seconds() + assert age < 5 # written less than 5 seconds ago + + def test_missing_headers_not_written(self, tmp_path): + """Headers not present in the response should not appear in .meta.""" + dl = self._make_dl(tmp_path) + file_path = str(tmp_path / "sparse.parquet") + open(file_path, "wb").close() + + dl._save_meta(file_path, {}) + + with open(file_path + ".meta") as f: + meta = json.load(f) + + assert "etag" not in meta + assert "last_modified" not in meta + assert "content_length" not in meta + assert "last_checked" in meta + + +class TestLoadMeta: + """Tests for _load_meta.""" + + def _make_dl(self, tmp_path): + return BabelDownloader( + url_base="https://example.com/", local_path=str(tmp_path) + ) + + def test_returns_none_if_no_meta_file(self, tmp_path): + dl = self._make_dl(tmp_path) + assert dl._load_meta(str(tmp_path / "nonexistent.parquet")) is None + + def test_returns_dict_for_valid_meta(self, tmp_path): + dl = self._make_dl(tmp_path) + file_path = str(tmp_path / "f.parquet") + open(file_path, "wb").close() + meta_data = {"etag": '"abc"', "last_checked": "2026-01-01T00:00:00+00:00"} + with open(file_path + ".meta", "w") as f: + json.dump(meta_data, f) + + result = dl._load_meta(file_path) + assert result == meta_data + + def test_returns_none_for_corrupt_meta(self, tmp_path): + dl = self._make_dl(tmp_path) + file_path = str(tmp_path / "corrupt.parquet") + open(file_path, "wb").close() + with open(file_path + ".meta", "w") as f: + f.write("not valid json {{{") + + assert dl._load_meta(file_path) is None + + +class TestIsWithinFreshness: + """Tests for _is_within_freshness.""" + + def _make_dl(self, tmp_path): + return BabelDownloader( + url_base="https://example.com/", local_path=str(tmp_path) + ) + + def test_returns_true_when_recent(self, tmp_path): + dl = self._make_dl(tmp_path) + recent = datetime.now(timezone.utc).isoformat() + meta = {"last_checked": recent} + assert dl._is_within_freshness(meta, 3600) is True + + def test_returns_false_when_stale(self, tmp_path): + dl = self._make_dl(tmp_path) + old = (datetime.now(timezone.utc) - timedelta(hours=5)).isoformat() + meta = {"last_checked": old} + assert dl._is_within_freshness(meta, 3600) is False + + def test_returns_false_when_missing_last_checked(self, tmp_path): + dl = self._make_dl(tmp_path) + assert dl._is_within_freshness({}, 3600) is False + + def test_returns_true_when_freshness_is_inf(self, tmp_path): + dl = self._make_dl(tmp_path) + old = (datetime.now(timezone.utc) - timedelta(days=365)).isoformat() + meta = {"last_checked": old} + assert dl._is_within_freshness(meta, float("inf")) is True + + def test_returns_false_when_freshness_is_zero(self, tmp_path): + dl = self._make_dl(tmp_path) + just_now = datetime.now(timezone.utc).isoformat() + meta = {"last_checked": just_now} + # Even with freshness=0, age >= 0 so it's not < 0 + assert dl._is_within_freshness(meta, 0) is False + + +class TestEtagMatches: + """Tests for _etag_matches.""" + + def _make_dl(self, tmp_path): + return BabelDownloader( + url_base="https://example.com/", local_path=str(tmp_path) + ) + + def test_returns_true_on_matching_etag(self, tmp_path): + dl = self._make_dl(tmp_path) + meta = {"etag": '"abc123"'} + mock_resp = Mock() + mock_resp.headers = {"ETag": '"abc123"'} + mock_resp.raise_for_status = Mock() + with patch( + "babel_explorer.core.downloader.requests.head", return_value=mock_resp + ): + assert dl._etag_matches("https://example.com/f.parquet", meta) is True + + def test_returns_false_on_different_etag(self, tmp_path): + dl = self._make_dl(tmp_path) + meta = {"etag": '"old"'} + mock_resp = Mock() + mock_resp.headers = {"ETag": '"new"'} + mock_resp.raise_for_status = Mock() + with patch( + "babel_explorer.core.downloader.requests.head", return_value=mock_resp + ): + assert dl._etag_matches("https://example.com/f.parquet", meta) is False + + def test_fallback_last_modified_match(self, tmp_path): + dl = self._make_dl(tmp_path) + lm = "Wed, 03 Dec 2025 15:54:19 GMT" + meta = {"last_modified": lm, "content_length": 100} + mock_resp = Mock() + mock_resp.headers = {"Last-Modified": lm, "Content-Length": "100"} + mock_resp.raise_for_status = Mock() + with patch( + "babel_explorer.core.downloader.requests.head", return_value=mock_resp + ): + assert dl._etag_matches("https://example.com/f.parquet", meta) is True + + def test_returns_true_on_request_error(self, tmp_path): + """Network errors are treated as 'assume still fresh' to avoid triggering large re-downloads.""" + dl = self._make_dl(tmp_path) + meta = {"etag": '"abc"'} + with patch( + "babel_explorer.core.downloader.requests.head", + side_effect=requests.ConnectionError("fail"), + ): + assert dl._etag_matches("https://example.com/f.parquet", meta) is True + + +class TestGetDownloadedFileTiers: + """Tests for the three-tier logic in get_downloaded_file.""" + + def _make_dl(self, tmp_path, freshness=3600): + return BabelDownloader( + url_base="https://example.com/", + local_path=str(tmp_path), + freshness_seconds=freshness, + ) + + # --- Tier 1: within freshness window --- + + def test_tier1_returns_immediately_no_http(self, tmp_path): + """File + fresh .meta → no network calls at all.""" + dl = self._make_dl(tmp_path, freshness=3600) + test_file = "duckdb/test.parquet" + local = tmp_path / "duckdb" / "test.parquet" + local.parent.mkdir(parents=True) + local.write_bytes(b"data") + + meta = {"etag": '"abc"', "last_checked": datetime.now(timezone.utc).isoformat()} + with open(str(local) + ".meta", "w") as f: + json.dump(meta, f) + + with patch("babel_explorer.core.downloader.requests.head") as mock_head: + with patch("babel_explorer.core.downloader.requests.get") as mock_get: + dl.get_downloaded_file.cache_clear() + result = dl.get_downloaded_file(test_file) + mock_head.assert_not_called() + mock_get.assert_not_called() + assert result == str(local) + + # --- Tier 2: stale .meta, ETag matches --- + + def test_tier2_head_check_no_redownload(self, tmp_path): + """Stale .meta + matching ETag → HEAD only, no GET.""" + dl = self._make_dl(tmp_path, freshness=0) + test_file = "duckdb/test.parquet" + local = tmp_path / "duckdb" / "test.parquet" + local.parent.mkdir(parents=True) + local.write_bytes(b"data") + + old_ts = (datetime.now(timezone.utc) - timedelta(hours=5)).isoformat() + meta = {"etag": '"abc"', "last_checked": old_ts} + with open(str(local) + ".meta", "w") as f: + json.dump(meta, f) + + mock_head_resp = Mock() + mock_head_resp.headers = {"ETag": '"abc"'} + mock_head_resp.raise_for_status = Mock() + + with patch( + "babel_explorer.core.downloader.requests.head", return_value=mock_head_resp + ): + with patch("babel_explorer.core.downloader.requests.get") as mock_get: + dl.get_downloaded_file.cache_clear() + result = dl.get_downloaded_file(test_file) + mock_get.assert_not_called() + assert result == str(local) + + def test_tier2_updates_last_checked_after_head(self, tmp_path): + """After successful HEAD match, last_checked in .meta is updated.""" + dl = self._make_dl(tmp_path, freshness=0) + test_file = "duckdb/upd.parquet" + local = tmp_path / "duckdb" / "upd.parquet" + local.parent.mkdir(parents=True) + local.write_bytes(b"data") + + old_ts = (datetime.now(timezone.utc) - timedelta(hours=5)).isoformat() + meta = {"etag": '"abc"', "last_checked": old_ts} + with open(str(local) + ".meta", "w") as f: + json.dump(meta, f) + + mock_head_resp = Mock() + mock_head_resp.headers = {"ETag": '"abc"'} + mock_head_resp.raise_for_status = Mock() + + with patch( + "babel_explorer.core.downloader.requests.head", return_value=mock_head_resp + ): + dl.get_downloaded_file.cache_clear() + dl.get_downloaded_file(test_file) + + with open(str(local) + ".meta") as f: + updated_meta = json.load(f) + updated_ts = datetime.fromisoformat(updated_meta["last_checked"]) + assert (datetime.now(timezone.utc) - updated_ts).total_seconds() < 5 + + # --- Tier 3: ETag changed, re-download --- + + def test_tier3_redownloads_when_etag_changed(self, tmp_path): + """Changed ETag → file deleted and re-downloaded.""" + dl = self._make_dl(tmp_path, freshness=0) + test_file = "duckdb/changed.parquet" + local = tmp_path / "duckdb" / "changed.parquet" + local.parent.mkdir(parents=True) + local.write_bytes(b"old data") + + old_ts = (datetime.now(timezone.utc) - timedelta(hours=5)).isoformat() + meta = {"etag": '"old"', "last_checked": old_ts} + with open(str(local) + ".meta", "w") as f: + json.dump(meta, f) + + mock_head_resp = Mock() + mock_head_resp.headers = {"ETag": '"new"'} + mock_head_resp.raise_for_status = Mock() + + new_content = b"new data" + + def fake_download(url, path, chunk_size): + with open(path, "wb") as f: + f.write(new_content) + return {"ETag": '"new"', "Content-Length": str(len(new_content))} + + with patch( + "babel_explorer.core.downloader.requests.head", return_value=mock_head_resp + ): + with patch.object(dl, "_download_with_retry", side_effect=fake_download): + dl.get_downloaded_file.cache_clear() + result = dl.get_downloaded_file(test_file) + + assert open(result, "rb").read() == new_content + + # --- No .meta: fresh download --- + + def test_downloads_when_no_meta(self, tmp_path): + """No file and no .meta → download happens, .meta is saved.""" + dl = self._make_dl(tmp_path) + test_file = "duckdb/new.parquet" + content = b"fresh download" + + def fake_download(url, path, chunk_size): + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "wb") as f: + f.write(content) + return {"ETag": '"fresh"', "Content-Length": str(len(content))} + + with patch.object( + dl, "_download_with_retry", side_effect=fake_download + ) as mock_dl: + dl.get_downloaded_file.cache_clear() + result = dl.get_downloaded_file(test_file) + mock_dl.assert_called_once() + + assert os.path.exists(result) + assert open(result, "rb").read() == content + # .meta should be saved + meta_path = result + ".meta" + assert os.path.exists(meta_path) + with open(meta_path) as f: + saved_meta = json.load(f) + assert saved_meta["etag"] == '"fresh"' + + def test_downloads_when_file_exists_but_no_meta(self, tmp_path): + """File exists but no .meta → treats as unknown, triggers full download flow.""" + dl = self._make_dl(tmp_path, freshness=3600) + test_file = "duckdb/nometa.parquet" + local = tmp_path / "duckdb" / "nometa.parquet" + local.parent.mkdir(parents=True) + local.write_bytes(b"old content") + # No .meta file + + new_content = b"refreshed" + + def fake_download(url, path, chunk_size): + with open(path, "wb") as f: + f.write(new_content) + return {"ETag": '"new"'} + + with patch.object( + dl, "_download_with_retry", side_effect=fake_download + ) as mock_dl: + dl.get_downloaded_file.cache_clear() + result = dl.get_downloaded_file(test_file) + mock_dl.assert_called_once() + + assert open(result, "rb").read() == new_content + + +class TestGetDownloadedFileCaching: + """Tests for get_downloaded_file LRU caching.""" + + def test_cache_returns_same_result(self, tmp_path): + dl = BabelDownloader(url_base="https://example.com/", local_path=str(tmp_path)) + content = b"cached content" + + def fake_download(url, path, chunk_size): + with open(path, "wb") as f: + f.write(content) + return {} + + with patch.object( + dl, "_download_with_retry", side_effect=fake_download + ) as mock_dl: + dl.get_downloaded_file.cache_clear() + r1 = dl.get_downloaded_file("cached.txt") + r2 = dl.get_downloaded_file("cached.txt") + assert r1 == r2 + mock_dl.assert_called_once() # only one actual download + + +class TestDownloadWithRetry: + """Tests for _download_with_retry.""" + + @staticmethod + def _make_response(status_code, headers=None, content=None): + m = MagicMock() + m.__enter__.return_value = m + m.status_code = status_code + m.headers = headers or {} + if content is not None: + m.iter_content = Mock(return_value=content) + return m + + def test_retries_exhausted_raises_runtime_error(self, tmp_path): + dl = BabelDownloader( + url_base="https://example.com/", local_path=str(tmp_path), retries=2 + ) + with patch( + "babel_explorer.core.downloader.requests.get", + side_effect=requests.ConnectionError("fail"), + ): + with patch("babel_explorer.core.downloader.time.sleep"): # skip waiting + with pytest.raises(RuntimeError, match="Failed to download"): + dl._download_with_retry( + "https://example.com/file", str(tmp_path / "f"), 1024 + ) + + def test_succeeds_on_second_attempt(self, tmp_path): + dl = BabelDownloader( + url_base="https://example.com/", local_path=str(tmp_path), retries=3 + ) + out_path = str(tmp_path / "retry_success.bin") + + mock_response = self._make_response(200, {"Content-Length": "5"}, [b"hello"]) + side_effects = [requests.ConnectionError("first fail"), mock_response] + + with patch( + "babel_explorer.core.downloader.requests.get", side_effect=side_effects + ): + with patch("babel_explorer.core.downloader.time.sleep"): + dl._download_with_retry("https://example.com/file", out_path, 1024) + assert os.path.exists(out_path) + + def test_resume_sends_range_header(self, tmp_path): + dl = BabelDownloader(url_base="https://example.com/", local_path=str(tmp_path)) + out_path = tmp_path / "partial.bin" + out_path.write_bytes(b"partial") # 7 bytes + + mock_response = self._make_response(206, {"Content-Length": "3"}, [b"end"]) + with patch( + "babel_explorer.core.downloader.requests.get", return_value=mock_response + ) as mock_get: + dl._download_with_retry("https://example.com/file", str(out_path), 1024) + _, kwargs = mock_get.call_args + assert kwargs["headers"] == {"Range": "bytes=7-"} + + def test_http_416_file_already_complete(self, tmp_path): + dl = BabelDownloader(url_base="https://example.com/", local_path=str(tmp_path)) + out_path = tmp_path / "complete.bin" + out_path.write_bytes(b"full file") + + mock_response = self._make_response(416) + with patch( + "babel_explorer.core.downloader.requests.get", return_value=mock_response + ): + dl._download_with_retry("https://example.com/file", str(out_path), 1024) + # Should return without error + assert out_path.read_bytes() == b"full file" + + def test_server_no_resume_restarts_download(self, tmp_path): + """When server responds 200 (instead of 206), partial file is removed and download restarts.""" + dl = BabelDownloader(url_base="https://example.com/", local_path=str(tmp_path)) + out_path = tmp_path / "no_resume.bin" + out_path.write_bytes(b"partial") + + mock_response = self._make_response( + 200, {"Content-Length": "12"}, [b"full content"] + ) + with patch( + "babel_explorer.core.downloader.requests.get", return_value=mock_response + ): + dl._download_with_retry("https://example.com/file", str(out_path), 1024) + assert out_path.read_bytes() == b"full content" + + def test_returns_response_headers(self, tmp_path): + """_download_with_retry should return response headers.""" + dl = BabelDownloader(url_base="https://example.com/", local_path=str(tmp_path)) + out_path = str(tmp_path / "headers.bin") + + mock_response = self._make_response( + 200, {"Content-Length": "5", "ETag": '"abc"'}, [b"hello"] + ) + with patch( + "babel_explorer.core.downloader.requests.get", return_value=mock_response + ): + headers = dl._download_with_retry( + "https://example.com/file", out_path, 1024 + ) + assert headers["ETag"] == '"abc"' + + +class TestStreamDownload: + """Tests for _stream_download.""" + + def test_writes_chunks(self, tmp_path): + dl = BabelDownloader(url_base="https://example.com/", local_path=str(tmp_path)) + out_path = str(tmp_path / "stream.bin") + + mock_response = Mock() + mock_response.headers = {"Content-Length": "10"} + mock_response.iter_content = Mock(return_value=[b"hello", b"world"]) + + dl._stream_download(mock_response, out_path, resume_byte_pos=0, chunk_size=1024) + with open(out_path, "rb") as f: + assert f.read() == b"helloworld" + + def test_append_mode_on_resume(self, tmp_path): + dl = BabelDownloader(url_base="https://example.com/", local_path=str(tmp_path)) + out_path = tmp_path / "append.bin" + out_path.write_bytes(b"start") + + mock_response = Mock() + mock_response.headers = {"Content-Length": "3"} + mock_response.iter_content = Mock(return_value=[b"end"]) + + dl._stream_download( + mock_response, str(out_path), resume_byte_pos=5, chunk_size=1024 + ) + assert out_path.read_bytes() == b"startend" + + +# ========================================================================== +# Integration Tests — require network access +# ========================================================================== + + +@pytest.mark.integration +def test_download_concord_parquet(downloaded_concord): + """Verify Concord.parquet downloads and is > 100 MB.""" + assert os.path.isfile(downloaded_concord) + size = os.path.getsize(downloaded_concord) + assert size > 100 * 1024 * 1024, f"Concord.parquet too small: {size} bytes" + + +@pytest.mark.integration +def test_download_metadata_parquet(downloaded_metadata): + """Verify Metadata.parquet downloads and is non-empty.""" + assert os.path.isfile(downloaded_metadata) + assert os.path.getsize(downloaded_metadata) > 0 + + +@pytest.mark.integration +def test_download_creates_meta_file(downloaded_concord): + """After download, a .meta sidecar file should exist.""" + meta_path = downloaded_concord + ".meta" + assert os.path.isfile(meta_path), f"Missing .meta file: {meta_path}" + with open(meta_path) as f: + meta = json.load(f) + assert "last_checked" in meta + + +@pytest.mark.integration +def test_download_caching_real_files(shared_downloader, downloaded_concord): + """Second call returns same path and file is not re-downloaded.""" + path2 = shared_downloader.get_downloaded_file(CONCORD_FILE) + assert path2 == downloaded_concord + assert os.path.getmtime(downloaded_concord) == os.path.getmtime(path2) + + +@pytest.mark.integration +@pytest.mark.slow +def test_download_identifiers_parquet(downloaded_identifiers): + """Verify Identifiers.parquet downloads and is > 2 GB.""" + assert os.path.isfile(downloaded_identifiers) + size = os.path.getsize(downloaded_identifiers) + assert size > 2 * 1024 * 1024 * 1024, f"Identifiers.parquet too small: {size} bytes" diff --git a/tests/test_formatting.py b/tests/test_formatting.py new file mode 100644 index 0000000..2d402db --- /dev/null +++ b/tests/test_formatting.py @@ -0,0 +1,271 @@ +""" +Unit tests for formatting.py — no network, no mocking required. +""" + +import io +import json + +import pytest +from rich.console import Console + +from babel_explorer.core.babel_xrefs import CrossReference, LabeledCrossReference, IdentifierRecord +from babel_explorer.core.nodenorm import Identifier +from babel_explorer.formatting import _record_to_dict, write_records, make_console, hl_curie + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def xref(): + return CrossReference(filename="Concord.parquet", subj="A:1", pred="skos:exactMatch", obj="B:2") + + +@pytest.fixture +def labeled_xref(): + return LabeledCrossReference( + filename="Concord.parquet", + subj="A:1", + pred="skos:exactMatch", + obj="B:2", + subj_label="Alpha", + subj_biolink_type=("biolink:Disease",), + obj_label="Beta", + obj_biolink_type=("biolink:Gene", "biolink:NamedThing"), + ) + + +@pytest.fixture +def id_record(): + return IdentifierRecord( + curie="A:1", + extra_fields=(("type", "gene"), ("label", "Alpha")), + ) + + +@pytest.fixture +def identifier(): + return Identifier( + curie="MONDO:0004979", + label="asthma", + biolink_type=("biolink:Disease",), + taxa=("NCBITaxon:9606",), + description=("A chronic inflammatory disease",), + ) + + +# --------------------------------------------------------------------------- +# Tests for make_console and hl_curie +# --------------------------------------------------------------------------- + + +class TestConsoleUtilities: + def test_make_console_returns_console(self): + console = make_console() + assert isinstance(console, Console) + + def test_make_console_accepts_file(self): + out = io.StringIO() + console = make_console(file=out) + assert isinstance(console, Console) + console.print("hello") + assert "hello" in out.getvalue() + + def test_hl_curie_highlighted_contains_markup(self): + result = hl_curie("HGNC:1100", highlight=True) + assert "bold cyan" in result + assert "HGNC:1100" in result + + def test_hl_curie_not_highlighted_is_plain(self): + result = hl_curie("HGNC:1100", highlight=False) + assert result == "HGNC:1100" + assert "[" not in result + + def test_hl_curie_highlighted_renders_correctly(self): + """Markup renders to plain text on a non-TTY console.""" + out = io.StringIO() + console = Console(file=out, highlight=False, no_color=True) + console.print(hl_curie("HGNC:1100", highlight=True)) + assert "HGNC:1100" in out.getvalue() + + def test_hl_curie_highlighted_renders_with_color(self): + """On a forced-TTY console, ANSI codes are emitted.""" + out = io.StringIO() + console = Console(file=out, highlight=False, force_terminal=True) + console.print(hl_curie("HGNC:1100", highlight=True)) + output = out.getvalue() + assert "HGNC:1100" in output + assert "\x1b[" in output # ANSI escape present + + +# --------------------------------------------------------------------------- +# Tests for _record_to_dict +# --------------------------------------------------------------------------- + + +class TestRecordToDict: + def test_cross_reference(self, xref): + d = _record_to_dict(xref) + assert d == {"filename": "Concord.parquet", "subj": "A:1", "pred": "skos:exactMatch", "obj": "B:2"} + + def test_labeled_cross_reference_has_all_eight_fields(self, labeled_xref): + d = _record_to_dict(labeled_xref) + assert set(d.keys()) == { + "filename", "subj", "pred", "obj", + "subj_label", "subj_biolink_type", "obj_label", "obj_biolink_type", + } + # dataclasses.asdict() preserves tuple types + assert d["subj_biolink_type"] == ("biolink:Disease",) + assert d["obj_biolink_type"] == ("biolink:Gene", "biolink:NamedThing") + + def test_identifier_record_extra_fields_expanded(self, id_record): + d = _record_to_dict(id_record) + assert "extra_fields" not in d + assert d["curie"] == "A:1" + assert d["type"] == "gene" + assert d["label"] == "Alpha" + + def test_identifier_record_no_extra_fields(self): + rec = IdentifierRecord(curie="X:1") + d = _record_to_dict(rec) + assert d == {"curie": "X:1"} + + def test_plain_dict_passthrough(self): + data = {"a": 1, "b": "hello"} + assert _record_to_dict(data) is data + + def test_identifier_dataclass(self, identifier): + d = _record_to_dict(identifier) + assert d["curie"] == "MONDO:0004979" + assert d["label"] == "asthma" + # dataclasses.asdict() preserves tuple types + assert d["biolink_type"] == ("biolink:Disease",) + assert d["taxa"] == ("NCBITaxon:9606",) + + +# --------------------------------------------------------------------------- +# Tests for write_records +# --------------------------------------------------------------------------- + + +class TestWriteRecords: + + # -- json format -- + + def test_json_is_valid_list(self, xref): + out = io.StringIO() + write_records([xref], "json", file=out) + data = json.loads(out.getvalue()) + assert isinstance(data, list) + assert len(data) == 1 + assert data[0]["subj"] == "A:1" + + def test_json_empty_list(self): + out = io.StringIO() + write_records([], "json", file=out) + assert json.loads(out.getvalue()) == [] + + def test_json_indent_controls_formatting(self, xref): + out_pretty = io.StringIO() + write_records([xref], "json", indent=2, file=out_pretty) + + out_compact = io.StringIO() + write_records([xref], "json", indent=None, file=out_compact) + + # Pretty-printed output has more lines (has newlines per field) + assert out_pretty.getvalue().count("\n") > out_compact.getvalue().count("\n") + + def test_json_tuple_fields_serialized_as_arrays(self, labeled_xref): + # json.dump converts tuples to JSON arrays, so json.loads gives back lists + out = io.StringIO() + write_records([labeled_xref], "json", file=out) + data = json.loads(out.getvalue()) + assert isinstance(data[0]["subj_biolink_type"], list) + assert data[0]["obj_biolink_type"] == ["biolink:Gene", "biolink:NamedThing"] + + def test_json_plain_dict(self): + out = io.StringIO() + write_records([{"a": 1, "b": "x"}], "json", file=out) + assert json.loads(out.getvalue()) == [{"a": 1, "b": "x"}] + + # -- tsv format -- + + def test_tsv_has_header_row(self, xref): + out = io.StringIO() + write_records([xref], "tsv", file=out) + lines = out.getvalue().splitlines() + assert lines[0] == "filename\tsubj\tpred\tobj" + + def test_tsv_data_row(self, xref): + out = io.StringIO() + write_records([xref], "tsv", file=out) + lines = out.getvalue().splitlines() + assert lines[1] == "Concord.parquet\tA:1\tskos:exactMatch\tB:2" + + def test_tsv_tuple_fields_pipe_joined(self, labeled_xref): + out = io.StringIO() + write_records([labeled_xref], "tsv", file=out) + lines = out.getvalue().splitlines() + # Header row + assert "subj_biolink_type" in lines[0] + # Data row: multi-value tuple joined with pipe + assert "biolink:Gene|biolink:NamedThing" in lines[1] + + def test_tsv_empty_no_output(self): + out = io.StringIO() + write_records([], "tsv", file=out) + assert out.getvalue() == "" + + def test_tsv_identifier_record_extra_fields_expanded(self, id_record): + out = io.StringIO() + write_records([id_record], "tsv", file=out) + lines = out.getvalue().splitlines() + assert "curie" in lines[0] + assert "type" in lines[0] + assert "label" in lines[0] + assert "A:1" in lines[1] + + # -- csv format -- + + def test_csv_has_header_row(self, xref): + out = io.StringIO() + write_records([xref], "csv", file=out) + lines = out.getvalue().splitlines() + assert lines[0] == "filename,subj,pred,obj" + + def test_csv_data_row(self, xref): + out = io.StringIO() + write_records([xref], "csv", file=out) + lines = out.getvalue().splitlines() + assert lines[1] == "Concord.parquet,A:1,skos:exactMatch,B:2" + + def test_csv_empty_no_output(self): + out = io.StringIO() + write_records([], "csv", file=out) + assert out.getvalue() == "" + + def test_csv_tuple_fields_pipe_joined(self, labeled_xref): + out = io.StringIO() + write_records([labeled_xref], "csv", file=out) + lines = out.getvalue().splitlines() + assert "biolink:Gene|biolink:NamedThing" in lines[1] + + # -- invalid formats (including console, which is handled at CLI layer) -- + + def test_text_format_raises_value_error(self, xref): + out = io.StringIO() + with pytest.raises(ValueError, match="Unknown format"): + write_records([xref], "text", file=out) + + def test_console_format_raises_value_error(self, xref): + """Console format is handled by the CLI, not write_records.""" + out = io.StringIO() + with pytest.raises(ValueError, match="Unknown format"): + write_records([xref], "console", file=out) + + def test_unknown_format_raises_value_error(self, xref): + out = io.StringIO() + with pytest.raises(ValueError, match="Unknown format"): + write_records([xref], "xml", file=out) diff --git a/tests/test_nodenorm.py b/tests/test_nodenorm.py new file mode 100644 index 0000000..57b6dab --- /dev/null +++ b/tests/test_nodenorm.py @@ -0,0 +1,361 @@ +""" +Tests for NodeNorm and Identifier classes. + +Unit tests use mocks; integration tests call the real NodeNorm API. +""" + +import pytest +from unittest.mock import Mock, patch + +import requests + +from babel_explorer.core.nodenorm import NodeNorm, Identifier + +from tests.constants import load_curies + +VALID_CURIES = load_curies() + + +# ========================================================================== +# Unit Tests — Identifier +# ========================================================================== + + +class TestIdentifier: + """Tests for the Identifier dataclass.""" + + def test_creation_with_defaults(self): + ident = Identifier(curie="MONDO:0004979") + assert ident.curie == "MONDO:0004979" + assert ident.label == "" + assert ident.biolink_type == () + assert ident.taxa == () + assert ident.description == () + + def test_full_creation(self): + ident = Identifier( + curie="MONDO:0004979", + label="asthma", + biolink_type=("biolink:Disease",), + taxa=("NCBITaxon:9606",), + description=("A chronic respiratory disease",), + ) + assert ident.label == "asthma" + assert ident.biolink_type == ("biolink:Disease",) + assert ident.taxa == ("NCBITaxon:9606",) + + def test_from_dict_minimal(self): + d = {"identifier": "X:1"} + ident = Identifier.from_dict(d) + assert ident.curie == "X:1" + assert ident.label == "" + + def test_from_dict_full(self): + d = { + "identifier": "X:1", + "label": "Alpha", + "type": ["biolink:NamedThing"], + "taxa": ["NCBITaxon:9606"], + "description": ["Some thing"], + } + ident = Identifier.from_dict(d) + assert ident.curie == "X:1" + assert ident.label == "Alpha" + assert ident.biolink_type == ("biolink:NamedThing",) + assert ident.taxa == ("NCBITaxon:9606",) + + def test_from_dict_partial(self): + d = {"identifier": "X:1", "label": "Beta"} + ident = Identifier.from_dict(d) + assert ident.curie == "X:1" + assert ident.label == "Beta" + assert ident.biolink_type == () + + def test_from_dict_type_as_string(self): + """NodeNorm may return 'type' as a bare string for individual identifiers.""" + d = {"identifier": "X:1", "type": "biolink:Disease"} + ident = Identifier.from_dict(d) + assert ident.biolink_type == ("biolink:Disease",), ( + "biolink_type should be a 1-tuple, not a tuple of characters" + ) + + def test_from_dict_description_as_string(self): + """NodeNorm may return 'description' as a bare string.""" + d = {"identifier": "X:1", "description": "A chronic disease"} + ident = Identifier.from_dict(d) + assert ident.description == ("A chronic disease",), ( + "description should be a 1-tuple, not a tuple of characters" + ) + + def test_from_dict_taxa_as_string(self): + """NodeNorm may return 'taxa' as a bare string.""" + d = {"identifier": "X:1", "taxa": "NCBITaxon:9606"} + ident = Identifier.from_dict(d) + assert ident.taxa == ("NCBITaxon:9606",), ( + "taxa should be a 1-tuple, not a tuple of characters" + ) + + def test_from_dict_all_fields_as_strings(self): + """All three tuple fields as strings produce correct single-element tuples.""" + d = { + "identifier": "X:1", + "label": "Alpha", + "type": "biolink:NamedThing", + "taxa": "NCBITaxon:9606", + "description": "Some description", + } + ident = Identifier.from_dict(d) + assert ident.biolink_type == ("biolink:NamedThing",) + assert ident.taxa == ("NCBITaxon:9606",) + assert ident.description == ("Some description",) + + def test_lt_ordering(self): + a = Identifier(curie="A:1") + b = Identifier(curie="B:2") + assert a < b + + def test_sorting(self): + items = [ + Identifier(curie="C:3"), + Identifier(curie="A:1"), + Identifier(curie="B:2"), + ] + result = sorted(items) + assert [x.curie for x in result] == ["A:1", "B:2", "C:3"] + + +# ========================================================================== +# Unit Tests — NodeNorm (mocked) +# ========================================================================== + + +class TestNodeNormInit: + """Tests for NodeNorm constructor and URL normalisation.""" + + def test_default_url(self): + nn = NodeNorm() + assert nn.nodenorm_url == "" + + def test_custom_url(self): + nn = NodeNorm(nodenorm_url="https://custom.api/") + assert nn.nodenorm_url == "https://custom.api/" + + def test_empty_url_normalize_curie_returns_none_without_network(self): + """NodeNorm('') must not make any HTTP calls and must return None.""" + nn = NodeNorm("") + nn.normalize_curie.cache_clear() + with patch("babel_explorer.core.nodenorm.requests.get") as mock_get: + result = nn.normalize_curie("MONDO:0004979") + mock_get.assert_not_called() + assert result is None + + +class TestNormalizeCurieMocked: + """Unit tests for NodeNorm.normalize_curie() with mocked HTTP responses.""" + + def _make_nn(self): + nn = NodeNorm(nodenorm_url="https://example.com/") + nn.normalize_curie.cache_clear() + return nn + + def test_correct_api_endpoint_and_params(self): + nn = self._make_nn() + mock_resp = Mock() + mock_resp.status_code = 200 + mock_resp.json.return_value = {"X:1": {"id": {"identifier": "X:1"}}} + mock_resp.raise_for_status = Mock() + + with patch( + "babel_explorer.core.nodenorm.requests.get", return_value=mock_resp + ) as mock_get: + nn.normalize_curie("X:1") + mock_get.assert_called_once() + args, kwargs = mock_get.call_args + assert args[0] == "https://example.com/get_normalized_nodes" + assert kwargs["params"]["curie"] == "X:1" + + def test_returns_result_for_curie(self): + nn = self._make_nn() + expected = {"id": {"identifier": "X:1"}, "equivalent_identifiers": []} + mock_resp = Mock() + mock_resp.json.return_value = {"X:1": expected} + mock_resp.raise_for_status = Mock() + + with patch("babel_explorer.core.nodenorm.requests.get", return_value=mock_resp): + result = nn.normalize_curie("X:1") + assert result == expected + + def test_lru_caching(self): + nn = self._make_nn() + mock_resp = Mock() + mock_resp.json.return_value = {"X:1": {"id": "X:1"}} + mock_resp.raise_for_status = Mock() + + with patch( + "babel_explorer.core.nodenorm.requests.get", return_value=mock_resp + ) as mock_get: + nn.normalize_curie("X:1") + nn.normalize_curie("X:1") + mock_get.assert_called_once() + + def test_http_error_raises(self): + nn = self._make_nn() + mock_resp = Mock() + mock_resp.raise_for_status.side_effect = requests.HTTPError("500 Server Error") + + with patch("babel_explorer.core.nodenorm.requests.get", return_value=mock_resp): + with pytest.raises(requests.HTTPError): + nn.normalize_curie("BAD:1") + + +class TestGetIdentifierMocked: + """Unit tests for NodeNorm.get_identifier() with mocked normalize_curie.""" + + def _make_nn(self): + nn = NodeNorm(nodenorm_url="https://example.com/") + nn.normalize_curie.cache_clear() + nn.get_identifier.cache_clear() + return nn + + def test_exact_match_found(self): + nn = self._make_nn() + api_result = { + "equivalent_identifiers": [ + {"identifier": "X:1", "label": "Alpha", "type": ["biolink:Disease"]}, + {"identifier": "X:2", "label": "Beta"}, + ], + } + with patch.object(nn, "normalize_curie", return_value=api_result): + ident = nn.get_identifier("X:1") + assert ident.curie == "X:1" + assert ident.label == "Alpha" + + def test_no_match_returns_bare_identifier(self): + nn = self._make_nn() + api_result = { + "equivalent_identifiers": [ + {"identifier": "X:2", "label": "Beta"}, + ], + } + with patch.object(nn, "normalize_curie", return_value=api_result): + ident = nn.get_identifier("X:1") + assert ident.curie == "X:1" + assert ident.label == "" + + def test_falsy_result_returns_bare_identifier(self): + nn = self._make_nn() + with patch.object(nn, "normalize_curie", return_value=None): + ident = nn.get_identifier("X:1") + assert ident.curie == "X:1" + assert ident.label == "" + + def test_caching(self): + nn = self._make_nn() + api_result = { + "equivalent_identifiers": [ + {"identifier": "X:1", "label": "Alpha"}, + ], + } + with patch.object(nn, "normalize_curie", return_value=api_result) as mock_norm: + nn.get_identifier("X:1") + nn.get_identifier("X:1") + mock_norm.assert_called_once() + + +class TestGetCliqueIdentifiersMocked: + """Unit tests for NodeNorm.get_clique_identifiers() with mocked normalize_curie.""" + + def _make_nn(self): + nn = NodeNorm(nodenorm_url="https://example.com/") + nn.normalize_curie.cache_clear() + nn.get_clique_identifiers.cache_clear() + return nn + + def test_success_returns_list(self): + nn = self._make_nn() + api_result = { + "equivalent_identifiers": [ + {"identifier": "X:1", "label": "Alpha"}, + {"identifier": "X:2", "label": "Beta"}, + ], + } + with patch.object(nn, "normalize_curie", return_value=api_result): + result = nn.get_clique_identifiers("X:1") + assert len(result) == 2 + assert all(isinstance(x, Identifier) for x in result) + + def test_missing_key_returns_none(self): + nn = self._make_nn() + api_result = {"id": {"identifier": "X:1"}} # no equivalent_identifiers + with patch.object(nn, "normalize_curie", return_value=api_result): + result = nn.get_clique_identifiers("X:1") + assert result == [] + + def test_caching(self): + nn = self._make_nn() + api_result = { + "equivalent_identifiers": [{"identifier": "X:1"}], + } + with patch.object(nn, "normalize_curie", return_value=api_result) as mock_norm: + nn.get_clique_identifiers("X:1") + nn.get_clique_identifiers("X:1") + mock_norm.assert_called_once() + + +# ========================================================================== +# Integration Tests — require real NodeNorm API +# ========================================================================== + + +@pytest.mark.integration +@pytest.mark.parametrize("curie", VALID_CURIES) +def test_normalize_curie_real_api(nodenorm, curie): + """normalize_curie returns a dict with expected keys.""" + nodenorm.normalize_curie.cache_clear() + result = nodenorm.normalize_curie(curie) + assert isinstance(result, dict) + assert "id" in result + assert "equivalent_identifiers" in result + assert "type" in result + + +@pytest.mark.integration +@pytest.mark.parametrize("curie", VALID_CURIES) +def test_get_identifier_real_api(nodenorm, curie): + """get_identifier returns an Identifier with non-empty label and biolink_type.""" + nodenorm.normalize_curie.cache_clear() + nodenorm.get_identifier.cache_clear() + ident = nodenorm.get_identifier(curie) + assert isinstance(ident, Identifier) + assert ident.curie == curie + assert ident.label != "" + + +@pytest.mark.integration +@pytest.mark.parametrize("curie", VALID_CURIES) +def test_get_clique_identifiers_real_api(nodenorm, curie): + """get_clique_identifiers returns a non-empty list of Identifiers.""" + nodenorm.normalize_curie.cache_clear() + nodenorm.get_clique_identifiers.cache_clear() + result = nodenorm.get_clique_identifiers(curie) + assert result is not None + assert len(result) > 0 + assert all(isinstance(x, Identifier) for x in result) + + +@pytest.mark.integration +@pytest.mark.parametrize("curie", VALID_CURIES) +def test_get_clique_identifiers_has_known_ids(nodenorm, curie): + """At least one equivalent identifier is returned.""" + nodenorm.normalize_curie.cache_clear() + nodenorm.get_clique_identifiers.cache_clear() + result = nodenorm.get_clique_identifiers(curie) + assert len(result) >= 1 + + +@pytest.mark.integration +def test_normalize_curie_nonexistent(nodenorm): + """A made-up CURIE returns None.""" + nodenorm.normalize_curie.cache_clear() + result = nodenorm.normalize_curie("FAKENS:9999999999") + assert result is None diff --git a/uv.lock b/uv.lock new file mode 100644 index 0000000..7b201e4 --- /dev/null +++ b/uv.lock @@ -0,0 +1,399 @@ +version = 1 +revision = 3 +requires-python = ">=3.11" + +[[package]] +name = "babel-explorer" +version = "0.1.0" +source = { editable = "." } +dependencies = [ + { name = "click" }, + { name = "duckdb" }, + { name = "requests" }, + { name = "rich" }, + { name = "tqdm" }, +] + +[package.dev-dependencies] +dev = [ + { name = "filelock" }, + { name = "pytest" }, + { name = "pytest-xdist", extra = ["psutil"] }, + { name = "ruff" }, +] + +[package.metadata] +requires-dist = [ + { name = "click", specifier = ">=8.3.1" }, + { name = "duckdb", specifier = ">=1.4.2" }, + { name = "requests", specifier = ">=2.32.5" }, + { name = "rich", specifier = ">=13" }, + { name = "tqdm", specifier = ">=4.67.0" }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "filelock", specifier = ">=3.16" }, + { name = "pytest", specifier = ">=8.3.5" }, + { name = "pytest-xdist", extras = ["psutil"], specifier = ">=3.6" }, + { name = "ruff", specifier = ">=0.11.0" }, +] + +[[package]] +name = "certifi" +version = "2026.1.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268, upload-time = "2026-01-04T02:42:41.825Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload-time = "2025-10-14T04:42:32.879Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ed/27/c6491ff4954e58a10f69ad90aca8a1b6fe9c5d3c6f380907af3c37435b59/charset_normalizer-3.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8", size = 206988, upload-time = "2025-10-14T04:40:33.79Z" }, + { url = "https://files.pythonhosted.org/packages/94/59/2e87300fe67ab820b5428580a53cad894272dbb97f38a7a814a2a1ac1011/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0", size = 147324, upload-time = "2025-10-14T04:40:34.961Z" }, + { url = "https://files.pythonhosted.org/packages/07/fb/0cf61dc84b2b088391830f6274cb57c82e4da8bbc2efeac8c025edb88772/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3", size = 142742, upload-time = "2025-10-14T04:40:36.105Z" }, + { url = "https://files.pythonhosted.org/packages/62/8b/171935adf2312cd745d290ed93cf16cf0dfe320863ab7cbeeae1dcd6535f/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc", size = 160863, upload-time = "2025-10-14T04:40:37.188Z" }, + { url = "https://files.pythonhosted.org/packages/09/73/ad875b192bda14f2173bfc1bc9a55e009808484a4b256748d931b6948442/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897", size = 157837, upload-time = "2025-10-14T04:40:38.435Z" }, + { url = "https://files.pythonhosted.org/packages/6d/fc/de9cce525b2c5b94b47c70a4b4fb19f871b24995c728e957ee68ab1671ea/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381", size = 151550, upload-time = "2025-10-14T04:40:40.053Z" }, + { url = "https://files.pythonhosted.org/packages/55/c2/43edd615fdfba8c6f2dfbd459b25a6b3b551f24ea21981e23fb768503ce1/charset_normalizer-3.4.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815", size = 149162, upload-time = "2025-10-14T04:40:41.163Z" }, + { url = "https://files.pythonhosted.org/packages/03/86/bde4ad8b4d0e9429a4e82c1e8f5c659993a9a863ad62c7df05cf7b678d75/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0", size = 150019, upload-time = "2025-10-14T04:40:42.276Z" }, + { url = "https://files.pythonhosted.org/packages/1f/86/a151eb2af293a7e7bac3a739b81072585ce36ccfb4493039f49f1d3cae8c/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161", size = 143310, upload-time = "2025-10-14T04:40:43.439Z" }, + { url = "https://files.pythonhosted.org/packages/b5/fe/43dae6144a7e07b87478fdfc4dbe9efd5defb0e7ec29f5f58a55aeef7bf7/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4", size = 162022, upload-time = "2025-10-14T04:40:44.547Z" }, + { url = "https://files.pythonhosted.org/packages/80/e6/7aab83774f5d2bca81f42ac58d04caf44f0cc2b65fc6db2b3b2e8a05f3b3/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89", size = 149383, upload-time = "2025-10-14T04:40:46.018Z" }, + { url = "https://files.pythonhosted.org/packages/4f/e8/b289173b4edae05c0dde07f69f8db476a0b511eac556dfe0d6bda3c43384/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569", size = 159098, upload-time = "2025-10-14T04:40:47.081Z" }, + { url = "https://files.pythonhosted.org/packages/d8/df/fe699727754cae3f8478493c7f45f777b17c3ef0600e28abfec8619eb49c/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224", size = 152991, upload-time = "2025-10-14T04:40:48.246Z" }, + { url = "https://files.pythonhosted.org/packages/1a/86/584869fe4ddb6ffa3bd9f491b87a01568797fb9bd8933f557dba9771beaf/charset_normalizer-3.4.4-cp311-cp311-win32.whl", hash = "sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a", size = 99456, upload-time = "2025-10-14T04:40:49.376Z" }, + { url = "https://files.pythonhosted.org/packages/65/f6/62fdd5feb60530f50f7e38b4f6a1d5203f4d16ff4f9f0952962c044e919a/charset_normalizer-3.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016", size = 106978, upload-time = "2025-10-14T04:40:50.844Z" }, + { url = "https://files.pythonhosted.org/packages/7a/9d/0710916e6c82948b3be62d9d398cb4fcf4e97b56d6a6aeccd66c4b2f2bd5/charset_normalizer-3.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1", size = 99969, upload-time = "2025-10-14T04:40:52.272Z" }, + { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425, upload-time = "2025-10-14T04:40:53.353Z" }, + { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162, upload-time = "2025-10-14T04:40:54.558Z" }, + { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558, upload-time = "2025-10-14T04:40:55.677Z" }, + { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497, upload-time = "2025-10-14T04:40:57.217Z" }, + { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240, upload-time = "2025-10-14T04:40:58.358Z" }, + { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471, upload-time = "2025-10-14T04:40:59.468Z" }, + { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864, upload-time = "2025-10-14T04:41:00.623Z" }, + { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647, upload-time = "2025-10-14T04:41:01.754Z" }, + { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110, upload-time = "2025-10-14T04:41:03.231Z" }, + { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839, upload-time = "2025-10-14T04:41:04.715Z" }, + { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667, upload-time = "2025-10-14T04:41:05.827Z" }, + { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535, upload-time = "2025-10-14T04:41:06.938Z" }, + { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816, upload-time = "2025-10-14T04:41:08.101Z" }, + { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694, upload-time = "2025-10-14T04:41:09.23Z" }, + { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131, upload-time = "2025-10-14T04:41:10.467Z" }, + { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390, upload-time = "2025-10-14T04:41:11.915Z" }, + { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091, upload-time = "2025-10-14T04:41:13.346Z" }, + { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936, upload-time = "2025-10-14T04:41:14.461Z" }, + { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180, upload-time = "2025-10-14T04:41:15.588Z" }, + { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346, upload-time = "2025-10-14T04:41:16.738Z" }, + { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874, upload-time = "2025-10-14T04:41:17.923Z" }, + { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076, upload-time = "2025-10-14T04:41:19.106Z" }, + { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601, upload-time = "2025-10-14T04:41:20.245Z" }, + { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376, upload-time = "2025-10-14T04:41:21.398Z" }, + { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825, upload-time = "2025-10-14T04:41:22.583Z" }, + { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583, upload-time = "2025-10-14T04:41:23.754Z" }, + { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366, upload-time = "2025-10-14T04:41:25.27Z" }, + { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300, upload-time = "2025-10-14T04:41:26.725Z" }, + { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465, upload-time = "2025-10-14T04:41:28.322Z" }, + { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404, upload-time = "2025-10-14T04:41:29.95Z" }, + { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092, upload-time = "2025-10-14T04:41:31.188Z" }, + { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408, upload-time = "2025-10-14T04:41:32.624Z" }, + { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746, upload-time = "2025-10-14T04:41:33.773Z" }, + { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889, upload-time = "2025-10-14T04:41:34.897Z" }, + { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641, upload-time = "2025-10-14T04:41:36.116Z" }, + { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779, upload-time = "2025-10-14T04:41:37.229Z" }, + { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035, upload-time = "2025-10-14T04:41:38.368Z" }, + { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542, upload-time = "2025-10-14T04:41:39.862Z" }, + { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524, upload-time = "2025-10-14T04:41:41.319Z" }, + { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395, upload-time = "2025-10-14T04:41:42.539Z" }, + { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680, upload-time = "2025-10-14T04:41:43.661Z" }, + { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045, upload-time = "2025-10-14T04:41:44.821Z" }, + { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687, upload-time = "2025-10-14T04:41:46.442Z" }, + { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014, upload-time = "2025-10-14T04:41:47.631Z" }, + { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044, upload-time = "2025-10-14T04:41:48.81Z" }, + { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940, upload-time = "2025-10-14T04:41:49.946Z" }, + { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104, upload-time = "2025-10-14T04:41:51.051Z" }, + { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743, upload-time = "2025-10-14T04:41:52.122Z" }, + { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" }, +] + +[[package]] +name = "click" +version = "8.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "duckdb" +version = "1.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/36/9d/ab66a06e416d71b7bdcb9904cdf8d4db3379ef632bb8e9495646702d9718/duckdb-1.4.4.tar.gz", hash = "sha256:8bba52fd2acb67668a4615ee17ee51814124223de836d9e2fdcbc4c9021b3d3c", size = 18419763, upload-time = "2026-01-26T11:50:37.68Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/68/19233412033a2bc5a144a3f531f64e3548d4487251e3f16b56c31411a06f/duckdb-1.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5ba684f498d4e924c7e8f30dd157da8da34c8479746c5011b6c0e037e9c60ad2", size = 28883816, upload-time = "2026-01-26T11:49:01.009Z" }, + { url = "https://files.pythonhosted.org/packages/b3/3e/cec70e546c298ab76d80b990109e111068d82cca67942c42328eaa7d6fdb/duckdb-1.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5536eb952a8aa6ae56469362e344d4e6403cc945a80bc8c5c2ebdd85d85eb64b", size = 15339662, upload-time = "2026-01-26T11:49:04.058Z" }, + { url = "https://files.pythonhosted.org/packages/d3/f0/cf4241a040ec4f571859a738007ec773b642fbc27df4cbcf34b0c32ea559/duckdb-1.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:47dd4162da6a2be59a0aef640eb08d6360df1cf83c317dcc127836daaf3b7f7c", size = 13670044, upload-time = "2026-01-26T11:49:06.627Z" }, + { url = "https://files.pythonhosted.org/packages/11/64/de2bb4ec1e35ec9ebf6090a95b930fc56934a0ad6f34a24c5972a14a77ef/duckdb-1.4.4-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6cb357cfa3403910e79e2eb46c8e445bb1ee2fd62e9e9588c6b999df4256abc1", size = 18409951, upload-time = "2026-01-26T11:49:09.808Z" }, + { url = "https://files.pythonhosted.org/packages/79/a2/ac0f5ee16df890d141304bcd48733516b7202c0de34cd3555634d6eb4551/duckdb-1.4.4-cp311-cp311-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c25d5b0febda02b7944e94fdae95aecf952797afc8cb920f677b46a7c251955", size = 20411739, upload-time = "2026-01-26T11:49:12.652Z" }, + { url = "https://files.pythonhosted.org/packages/37/a2/9a3402edeedaecf72de05fe9ff7f0303d701b8dfc136aea4a4be1a5f7eee/duckdb-1.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:6703dd1bb650025b3771552333d305d62ddd7ff182de121483d4e042ea6e2e00", size = 12256972, upload-time = "2026-01-26T11:49:15.468Z" }, + { url = "https://files.pythonhosted.org/packages/f6/e6/052ea6dcdf35b259fd182eff3efd8d75a071de4010c9807556098df137b9/duckdb-1.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:bf138201f56e5d6fc276a25138341b3523e2f84733613fc43f02c54465619a95", size = 13006696, upload-time = "2026-01-26T11:49:18.054Z" }, + { url = "https://files.pythonhosted.org/packages/58/33/beadaa69f8458afe466126f2c5ee48c4759cc9d5d784f8703d44e0b52c3c/duckdb-1.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ddcfd9c6ff234da603a1edd5fd8ae6107f4d042f74951b65f91bc5e2643856b3", size = 28896535, upload-time = "2026-01-26T11:49:21.232Z" }, + { url = "https://files.pythonhosted.org/packages/76/66/82413f386df10467affc87f65bac095b7c88dbd9c767584164d5f4dc4cb8/duckdb-1.4.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6792ca647216bd5c4ff16396e4591cfa9b4a72e5ad7cdd312cec6d67e8431a7c", size = 15349716, upload-time = "2026-01-26T11:49:23.989Z" }, + { url = "https://files.pythonhosted.org/packages/5d/8c/c13d396fd4e9bf970916dc5b4fea410c1b10fe531069aea65f1dcf849a71/duckdb-1.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1f8d55843cc940e36261689054f7dfb6ce35b1f5b0953b0d355b6adb654b0d52", size = 13672403, upload-time = "2026-01-26T11:49:26.741Z" }, + { url = "https://files.pythonhosted.org/packages/db/77/2446a0b44226bb95217748d911c7ca66a66ca10f6481d5178d9370819631/duckdb-1.4.4-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c65d15c440c31e06baaebfd2c06d71ce877e132779d309f1edf0a85d23c07e92", size = 18419001, upload-time = "2026-01-26T11:49:29.353Z" }, + { url = "https://files.pythonhosted.org/packages/2e/a3/97715bba30040572fb15d02c26f36be988d48bc00501e7ac02b1d65ef9d0/duckdb-1.4.4-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b297eff642503fd435a9de5a9cb7db4eccb6f61d61a55b30d2636023f149855f", size = 20437385, upload-time = "2026-01-26T11:49:32.302Z" }, + { url = "https://files.pythonhosted.org/packages/8b/0a/18b9167adf528cbe3867ef8a84a5f19f37bedccb606a8a9e59cfea1880c8/duckdb-1.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:d525de5f282b03aa8be6db86b1abffdceae5f1055113a03d5b50cd2fb8cf2ef8", size = 12267343, upload-time = "2026-01-26T11:49:34.985Z" }, + { url = "https://files.pythonhosted.org/packages/f8/15/37af97f5717818f3d82d57414299c293b321ac83e048c0a90bb8b6a09072/duckdb-1.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:50f2eb173c573811b44aba51176da7a4e5c487113982be6a6a1c37337ec5fa57", size = 13007490, upload-time = "2026-01-26T11:49:37.413Z" }, + { url = "https://files.pythonhosted.org/packages/7f/fe/64810fee20030f2bf96ce28b527060564864ce5b934b50888eda2cbf99dd/duckdb-1.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:337f8b24e89bc2e12dadcfe87b4eb1c00fd920f68ab07bc9b70960d6523b8bc3", size = 28899349, upload-time = "2026-01-26T11:49:40.294Z" }, + { url = "https://files.pythonhosted.org/packages/9c/9b/3c7c5e48456b69365d952ac201666053de2700f5b0144a699a4dc6854507/duckdb-1.4.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0509b39ea7af8cff0198a99d206dca753c62844adab54e545984c2e2c1381616", size = 15350691, upload-time = "2026-01-26T11:49:43.242Z" }, + { url = "https://files.pythonhosted.org/packages/a6/7b/64e68a7b857ed0340045501535a0da99ea5d9d5ea3708fec0afb8663eb27/duckdb-1.4.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:fb94de6d023de9d79b7edc1ae07ee1d0b4f5fa8a9dcec799650b5befdf7aafec", size = 13672311, upload-time = "2026-01-26T11:49:46.069Z" }, + { url = "https://files.pythonhosted.org/packages/09/5b/3e7aa490841784d223de61beb2ae64e82331501bf5a415dc87a0e27b4663/duckdb-1.4.4-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0d636ceda422e7babd5e2f7275f6a0d1a3405e6a01873f00d38b72118d30c10b", size = 18422740, upload-time = "2026-01-26T11:49:49.034Z" }, + { url = "https://files.pythonhosted.org/packages/53/32/256df3dbaa198c58539ad94f9a41e98c2c8ff23f126b8f5f52c7dcd0a738/duckdb-1.4.4-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7df7351328ffb812a4a289732f500d621e7de9942a3a2c9b6d4afcf4c0e72526", size = 20435578, upload-time = "2026-01-26T11:49:51.946Z" }, + { url = "https://files.pythonhosted.org/packages/a4/f0/620323fd87062ea43e527a2d5ed9e55b525e0847c17d3b307094ddab98a2/duckdb-1.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:6fb1225a9ea5877421481d59a6c556a9532c32c16c7ae6ca8d127e2b878c9389", size = 12268083, upload-time = "2026-01-26T11:49:54.615Z" }, + { url = "https://files.pythonhosted.org/packages/e5/07/a397fdb7c95388ba9c055b9a3d38dfee92093f4427bc6946cf9543b1d216/duckdb-1.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:f28a18cc790217e5b347bb91b2cab27aafc557c58d3d8382e04b4fe55d0c3f66", size = 13006123, upload-time = "2026-01-26T11:49:57.092Z" }, + { url = "https://files.pythonhosted.org/packages/97/a6/f19e2864e651b0bd8e4db2b0c455e7e0d71e0d4cd2cd9cc052f518e43eb3/duckdb-1.4.4-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:25874f8b1355e96178079e37312c3ba6d61a2354f51319dae860cf21335c3a20", size = 28909554, upload-time = "2026-01-26T11:50:00.107Z" }, + { url = "https://files.pythonhosted.org/packages/0e/93/8a24e932c67414fd2c45bed83218e62b73348996bf859eda020c224774b2/duckdb-1.4.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:452c5b5d6c349dc5d1154eb2062ee547296fcbd0c20e9df1ed00b5e1809089da", size = 15353804, upload-time = "2026-01-26T11:50:03.382Z" }, + { url = "https://files.pythonhosted.org/packages/62/13/e5378ff5bb1d4397655d840b34b642b1b23cdd82ae19599e62dc4b9461c9/duckdb-1.4.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8e5c2d8a0452df55e092959c0bfc8ab8897ac3ea0f754cb3b0ab3e165cd79aff", size = 13676157, upload-time = "2026-01-26T11:50:06.232Z" }, + { url = "https://files.pythonhosted.org/packages/2d/94/24364da564b27aeebe44481f15bd0197a0b535ec93f188a6b1b98c22f082/duckdb-1.4.4-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1af6e76fe8bd24875dc56dd8e38300d64dc708cd2e772f67b9fbc635cc3066a3", size = 18426882, upload-time = "2026-01-26T11:50:08.97Z" }, + { url = "https://files.pythonhosted.org/packages/26/0a/6ae31b2914b4dc34243279b2301554bcbc5f1a09ccc82600486c49ab71d1/duckdb-1.4.4-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d0440f59e0cd9936a9ebfcf7a13312eda480c79214ffed3878d75947fc3b7d6d", size = 20435641, upload-time = "2026-01-26T11:50:12.188Z" }, + { url = "https://files.pythonhosted.org/packages/d2/b1/fd5c37c53d45efe979f67e9bd49aaceef640147bb18f0699a19edd1874d6/duckdb-1.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:59c8d76016dde854beab844935b1ec31de358d4053e792988108e995b18c08e7", size = 12762360, upload-time = "2026-01-26T11:50:14.76Z" }, + { url = "https://files.pythonhosted.org/packages/dd/2d/13e6024e613679d8a489dd922f199ef4b1d08a456a58eadd96dc2f05171f/duckdb-1.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:53cd6423136ab44383ec9955aefe7599b3fb3dd1fe006161e6396d8167e0e0d4", size = 13458633, upload-time = "2026-01-26T11:50:17.657Z" }, +] + +[[package]] +name = "execnet" +version = "2.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bf/89/780e11f9588d9e7128a3f87788354c7946a9cbb1401ad38a48c4db9a4f07/execnet-2.1.2.tar.gz", hash = "sha256:63d83bfdd9a23e35b9c6a3261412324f964c2ec8dcd8d3c6916ee9373e0befcd", size = 166622, upload-time = "2025-11-12T09:56:37.75Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ab/84/02fc1827e8cdded4aa65baef11296a9bbe595c474f0d6d758af082d849fd/execnet-2.1.2-py3-none-any.whl", hash = "sha256:67fba928dd5a544b783f6056f449e5e3931a5c378b128bc18501f7ea79e296ec", size = 40708, upload-time = "2025-11-12T09:56:36.333Z" }, +] + +[[package]] +name = "filelock" +version = "3.25.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/77/18/a1fd2231c679dcb9726204645721b12498aeac28e1ad0601038f94b42556/filelock-3.25.0.tar.gz", hash = "sha256:8f00faf3abf9dc730a1ffe9c354ae5c04e079ab7d3a683b7c32da5dd05f26af3", size = 40158, upload-time = "2026-03-01T15:08:45.916Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/0b/de6f54d4a8bedfe8645c41497f3c18d749f0bd3218170c667bf4b81d0cdd/filelock-3.25.0-py3-none-any.whl", hash = "sha256:5ccf8069f7948f494968fc0713c10e5c182a9c9d9eef3a636307a20c2490f047", size = 26427, upload-time = "2026-03-01T15:08:44.593Z" }, +] + +[[package]] +name = "idna" +version = "3.11" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, +] + +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, +] + +[[package]] +name = "markdown-it-py" +version = "4.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" }, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, +] + +[[package]] +name = "packaging" +version = "26.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, +] + +[[package]] +name = "psutil" +version = "7.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/aa/c6/d1ddf4abb55e93cebc4f2ed8b5d6dbad109ecb8d63748dd2b20ab5e57ebe/psutil-7.2.2.tar.gz", hash = "sha256:0746f5f8d406af344fd547f1c8daa5f5c33dbc293bb8d6a16d80b4bb88f59372", size = 493740, upload-time = "2026-01-28T18:14:54.428Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/51/08/510cbdb69c25a96f4ae523f733cdc963ae654904e8db864c07585ef99875/psutil-7.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2edccc433cbfa046b980b0df0171cd25bcaeb3a68fe9022db0979e7aa74a826b", size = 130595, upload-time = "2026-01-28T18:14:57.293Z" }, + { url = "https://files.pythonhosted.org/packages/d6/f5/97baea3fe7a5a9af7436301f85490905379b1c6f2dd51fe3ecf24b4c5fbf/psutil-7.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78c8603dcd9a04c7364f1a3e670cea95d51ee865e4efb3556a3a63adef958ea", size = 131082, upload-time = "2026-01-28T18:14:59.732Z" }, + { url = "https://files.pythonhosted.org/packages/37/d6/246513fbf9fa174af531f28412297dd05241d97a75911ac8febefa1a53c6/psutil-7.2.2-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1a571f2330c966c62aeda00dd24620425d4b0cc86881c89861fbc04549e5dc63", size = 181476, upload-time = "2026-01-28T18:15:01.884Z" }, + { url = "https://files.pythonhosted.org/packages/b8/b5/9182c9af3836cca61696dabe4fd1304e17bc56cb62f17439e1154f225dd3/psutil-7.2.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:917e891983ca3c1887b4ef36447b1e0873e70c933afc831c6b6da078ba474312", size = 184062, upload-time = "2026-01-28T18:15:04.436Z" }, + { url = "https://files.pythonhosted.org/packages/16/ba/0756dca669f5a9300d0cbcbfae9a4c30e446dfc7440ffe43ded5724bfd93/psutil-7.2.2-cp313-cp313t-win_amd64.whl", hash = "sha256:ab486563df44c17f5173621c7b198955bd6b613fb87c71c161f827d3fb149a9b", size = 139893, upload-time = "2026-01-28T18:15:06.378Z" }, + { url = "https://files.pythonhosted.org/packages/1c/61/8fa0e26f33623b49949346de05ec1ddaad02ed8ba64af45f40a147dbfa97/psutil-7.2.2-cp313-cp313t-win_arm64.whl", hash = "sha256:ae0aefdd8796a7737eccea863f80f81e468a1e4cf14d926bd9b6f5f2d5f90ca9", size = 135589, upload-time = "2026-01-28T18:15:08.03Z" }, + { url = "https://files.pythonhosted.org/packages/81/69/ef179ab5ca24f32acc1dac0c247fd6a13b501fd5534dbae0e05a1c48b66d/psutil-7.2.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:eed63d3b4d62449571547b60578c5b2c4bcccc5387148db46e0c2313dad0ee00", size = 130664, upload-time = "2026-01-28T18:15:09.469Z" }, + { url = "https://files.pythonhosted.org/packages/7b/64/665248b557a236d3fa9efc378d60d95ef56dd0a490c2cd37dafc7660d4a9/psutil-7.2.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7b6d09433a10592ce39b13d7be5a54fbac1d1228ed29abc880fb23df7cb694c9", size = 131087, upload-time = "2026-01-28T18:15:11.724Z" }, + { url = "https://files.pythonhosted.org/packages/d5/2e/e6782744700d6759ebce3043dcfa661fb61e2fb752b91cdeae9af12c2178/psutil-7.2.2-cp314-cp314t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fa4ecf83bcdf6e6c8f4449aff98eefb5d0604bf88cb883d7da3d8d2d909546a", size = 182383, upload-time = "2026-01-28T18:15:13.445Z" }, + { url = "https://files.pythonhosted.org/packages/57/49/0a41cefd10cb7505cdc04dab3eacf24c0c2cb158a998b8c7b1d27ee2c1f5/psutil-7.2.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e452c464a02e7dc7822a05d25db4cde564444a67e58539a00f929c51eddda0cf", size = 185210, upload-time = "2026-01-28T18:15:16.002Z" }, + { url = "https://files.pythonhosted.org/packages/dd/2c/ff9bfb544f283ba5f83ba725a3c5fec6d6b10b8f27ac1dc641c473dc390d/psutil-7.2.2-cp314-cp314t-win_amd64.whl", hash = "sha256:c7663d4e37f13e884d13994247449e9f8f574bc4655d509c3b95e9ec9e2b9dc1", size = 141228, upload-time = "2026-01-28T18:15:18.385Z" }, + { url = "https://files.pythonhosted.org/packages/f2/fc/f8d9c31db14fcec13748d373e668bc3bed94d9077dbc17fb0eebc073233c/psutil-7.2.2-cp314-cp314t-win_arm64.whl", hash = "sha256:11fe5a4f613759764e79c65cf11ebdf26e33d6dd34336f8a337aa2996d71c841", size = 136284, upload-time = "2026-01-28T18:15:19.912Z" }, + { url = "https://files.pythonhosted.org/packages/e7/36/5ee6e05c9bd427237b11b3937ad82bb8ad2752d72c6969314590dd0c2f6e/psutil-7.2.2-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ed0cace939114f62738d808fdcecd4c869222507e266e574799e9c0faa17d486", size = 129090, upload-time = "2026-01-28T18:15:22.168Z" }, + { url = "https://files.pythonhosted.org/packages/80/c4/f5af4c1ca8c1eeb2e92ccca14ce8effdeec651d5ab6053c589b074eda6e1/psutil-7.2.2-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:1a7b04c10f32cc88ab39cbf606e117fd74721c831c98a27dc04578deb0c16979", size = 129859, upload-time = "2026-01-28T18:15:23.795Z" }, + { url = "https://files.pythonhosted.org/packages/b5/70/5d8df3b09e25bce090399cf48e452d25c935ab72dad19406c77f4e828045/psutil-7.2.2-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:076a2d2f923fd4821644f5ba89f059523da90dc9014e85f8e45a5774ca5bc6f9", size = 155560, upload-time = "2026-01-28T18:15:25.976Z" }, + { url = "https://files.pythonhosted.org/packages/63/65/37648c0c158dc222aba51c089eb3bdfa238e621674dc42d48706e639204f/psutil-7.2.2-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0726cecd84f9474419d67252add4ac0cd9811b04d61123054b9fb6f57df6e9e", size = 156997, upload-time = "2026-01-28T18:15:27.794Z" }, + { url = "https://files.pythonhosted.org/packages/8e/13/125093eadae863ce03c6ffdbae9929430d116a246ef69866dad94da3bfbc/psutil-7.2.2-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fd04ef36b4a6d599bbdb225dd1d3f51e00105f6d48a28f006da7f9822f2606d8", size = 148972, upload-time = "2026-01-28T18:15:29.342Z" }, + { url = "https://files.pythonhosted.org/packages/04/78/0acd37ca84ce3ddffaa92ef0f571e073faa6d8ff1f0559ab1272188ea2be/psutil-7.2.2-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b58fabe35e80b264a4e3bb23e6b96f9e45a3df7fb7eed419ac0e5947c61e47cc", size = 148266, upload-time = "2026-01-28T18:15:31.597Z" }, + { url = "https://files.pythonhosted.org/packages/b4/90/e2159492b5426be0c1fef7acba807a03511f97c5f86b3caeda6ad92351a7/psutil-7.2.2-cp37-abi3-win_amd64.whl", hash = "sha256:eb7e81434c8d223ec4a219b5fc1c47d0417b12be7ea866e24fb5ad6e84b3d988", size = 137737, upload-time = "2026-01-28T18:15:33.849Z" }, + { url = "https://files.pythonhosted.org/packages/8c/c7/7bb2e321574b10df20cbde462a94e2b71d05f9bbda251ef27d104668306a/psutil-7.2.2-cp37-abi3-win_arm64.whl", hash = "sha256:8c233660f575a5a89e6d4cb65d9f938126312bca76d8fe087b947b3a1aaac9ee", size = 134617, upload-time = "2026-01-28T18:15:36.514Z" }, +] + +[[package]] +name = "pygments" +version = "2.19.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, +] + +[[package]] +name = "pytest" +version = "9.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, +] + +[[package]] +name = "pytest-xdist" +version = "3.8.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "execnet" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/78/b4/439b179d1ff526791eb921115fca8e44e596a13efeda518b9d845a619450/pytest_xdist-3.8.0.tar.gz", hash = "sha256:7e578125ec9bc6050861aa93f2d59f1d8d085595d6551c2c90b6f4fad8d3a9f1", size = 88069, upload-time = "2025-07-01T13:30:59.346Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ca/31/d4e37e9e550c2b92a9cbc2e4d0b7420a27224968580b5a447f420847c975/pytest_xdist-3.8.0-py3-none-any.whl", hash = "sha256:202ca578cfeb7370784a8c33d6d05bc6e13b4f25b5053c30a152269fd10f0b88", size = 46396, upload-time = "2025-07-01T13:30:56.632Z" }, +] + +[package.optional-dependencies] +psutil = [ + { name = "psutil" }, +] + +[[package]] +name = "requests" +version = "2.32.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" }, +] + +[[package]] +name = "rich" +version = "14.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/c6/f3b320c27991c46f43ee9d856302c70dc2d0fb2dba4842ff739d5f46b393/rich-14.3.3.tar.gz", hash = "sha256:b8daa0b9e4eef54dd8cf7c86c03713f53241884e814f4e2f5fb342fe520f639b", size = 230582, upload-time = "2026-02-19T17:23:12.474Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d", size = 310458, upload-time = "2026-02-19T17:23:13.732Z" }, +] + +[[package]] +name = "ruff" +version = "0.15.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/06/04/eab13a954e763b0606f460443fcbf6bb5a0faf06890ea3754ff16523dce5/ruff-0.15.2.tar.gz", hash = "sha256:14b965afee0969e68bb871eba625343b8673375f457af4abe98553e8bbb98342", size = 4558148, upload-time = "2026-02-19T22:32:20.271Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2f/70/3a4dc6d09b13cb3e695f28307e5d889b2e1a66b7af9c5e257e796695b0e6/ruff-0.15.2-py3-none-linux_armv6l.whl", hash = "sha256:120691a6fdae2f16d65435648160f5b81a9625288f75544dc40637436b5d3c0d", size = 10430565, upload-time = "2026-02-19T22:32:41.824Z" }, + { url = "https://files.pythonhosted.org/packages/71/0b/bb8457b56185ece1305c666dc895832946d24055be90692381c31d57466d/ruff-0.15.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:a89056d831256099658b6bba4037ac6dd06f49d194199215befe2bb10457ea5e", size = 10820354, upload-time = "2026-02-19T22:32:07.366Z" }, + { url = "https://files.pythonhosted.org/packages/2d/c1/e0532d7f9c9e0b14c46f61b14afd563298b8b83f337b6789ddd987e46121/ruff-0.15.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e36dee3a64be0ebd23c86ffa3aa3fd3ac9a712ff295e192243f814a830b6bd87", size = 10170767, upload-time = "2026-02-19T22:32:13.188Z" }, + { url = "https://files.pythonhosted.org/packages/47/e8/da1aa341d3af017a21c7a62fb5ec31d4e7ad0a93ab80e3a508316efbcb23/ruff-0.15.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9fb47b6d9764677f8c0a193c0943ce9a05d6763523f132325af8a858eadc2b9", size = 10529591, upload-time = "2026-02-19T22:32:02.547Z" }, + { url = "https://files.pythonhosted.org/packages/93/74/184fbf38e9f3510231fbc5e437e808f0b48c42d1df9434b208821efcd8d6/ruff-0.15.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f376990f9d0d6442ea9014b19621d8f2aaf2b8e39fdbfc79220b7f0c596c9b80", size = 10260771, upload-time = "2026-02-19T22:32:36.938Z" }, + { url = "https://files.pythonhosted.org/packages/05/ac/605c20b8e059a0bc4b42360414baa4892ff278cec1c91fff4be0dceedefd/ruff-0.15.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2dcc987551952d73cbf5c88d9fdee815618d497e4df86cd4c4824cc59d5dd75f", size = 11045791, upload-time = "2026-02-19T22:32:31.642Z" }, + { url = "https://files.pythonhosted.org/packages/fd/52/db6e419908f45a894924d410ac77d64bdd98ff86901d833364251bd08e22/ruff-0.15.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:42a47fd785cbe8c01b9ff45031af875d101b040ad8f4de7bbb716487c74c9a77", size = 11879271, upload-time = "2026-02-19T22:32:29.305Z" }, + { url = "https://files.pythonhosted.org/packages/3e/d8/7992b18f2008bdc9231d0f10b16df7dda964dbf639e2b8b4c1b4e91b83af/ruff-0.15.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cbe9f49354866e575b4c6943856989f966421870e85cd2ac94dccb0a9dcb2fea", size = 11303707, upload-time = "2026-02-19T22:32:22.492Z" }, + { url = "https://files.pythonhosted.org/packages/d7/02/849b46184bcfdd4b64cde61752cc9a146c54759ed036edd11857e9b8443b/ruff-0.15.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7a672c82b5f9887576087d97be5ce439f04bbaf548ee987b92d3a7dede41d3a", size = 11149151, upload-time = "2026-02-19T22:32:44.234Z" }, + { url = "https://files.pythonhosted.org/packages/70/04/f5284e388bab60d1d3b99614a5a9aeb03e0f333847e2429bebd2aaa1feec/ruff-0.15.2-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:72ecc64f46f7019e2bcc3cdc05d4a7da958b629a5ab7033195e11a438403d956", size = 11091132, upload-time = "2026-02-19T22:32:24.691Z" }, + { url = "https://files.pythonhosted.org/packages/fa/ae/88d844a21110e14d92cf73d57363fab59b727ebeabe78009b9ccb23500af/ruff-0.15.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:8dcf243b15b561c655c1ef2f2b0050e5d50db37fe90115507f6ff37d865dc8b4", size = 10504717, upload-time = "2026-02-19T22:32:26.75Z" }, + { url = "https://files.pythonhosted.org/packages/64/27/867076a6ada7f2b9c8292884ab44d08fd2ba71bd2b5364d4136f3cd537e1/ruff-0.15.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:dab6941c862c05739774677c6273166d2510d254dac0695c0e3f5efa1b5585de", size = 10263122, upload-time = "2026-02-19T22:32:10.036Z" }, + { url = "https://files.pythonhosted.org/packages/e7/ef/faf9321d550f8ebf0c6373696e70d1758e20ccdc3951ad7af00c0956be7c/ruff-0.15.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1b9164f57fc36058e9a6806eb92af185b0697c9fe4c7c52caa431c6554521e5c", size = 10735295, upload-time = "2026-02-19T22:32:39.227Z" }, + { url = "https://files.pythonhosted.org/packages/2f/55/e8089fec62e050ba84d71b70e7834b97709ca9b7aba10c1a0b196e493f97/ruff-0.15.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:80d24fcae24d42659db7e335b9e1531697a7102c19185b8dc4a028b952865fd8", size = 11241641, upload-time = "2026-02-19T22:32:34.617Z" }, + { url = "https://files.pythonhosted.org/packages/23/01/1c30526460f4d23222d0fabd5888868262fd0e2b71a00570ca26483cd993/ruff-0.15.2-py3-none-win32.whl", hash = "sha256:fd5ff9e5f519a7e1bd99cbe8daa324010a74f5e2ebc97c6242c08f26f3714f6f", size = 10507885, upload-time = "2026-02-19T22:32:15.635Z" }, + { url = "https://files.pythonhosted.org/packages/5c/10/3d18e3bbdf8fc50bbb4ac3cc45970aa5a9753c5cb51bf9ed9a3cd8b79fa3/ruff-0.15.2-py3-none-win_amd64.whl", hash = "sha256:d20014e3dfa400f3ff84830dfb5755ece2de45ab62ecea4af6b7262d0fb4f7c5", size = 11623725, upload-time = "2026-02-19T22:32:04.947Z" }, + { url = "https://files.pythonhosted.org/packages/6d/78/097c0798b1dab9f8affe73da9642bb4500e098cb27fd8dc9724816ac747b/ruff-0.15.2-py3-none-win_arm64.whl", hash = "sha256:cabddc5822acdc8f7b5527b36ceac55cc51eec7b1946e60181de8fe83ca8876e", size = 10941649, upload-time = "2026-02-19T22:32:18.108Z" }, +] + +[[package]] +name = "tqdm" +version = "4.67.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598, upload-time = "2026-02-03T17:35:53.048Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" }, +] + +[[package]] +name = "urllib3" +version = "2.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, +]