diff --git a/pyproject.toml b/pyproject.toml index acfc280..34af193 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -144,6 +144,7 @@ ignore = [ "tests/**/*.py" = ["S101", "T201"] # use of assert "**/__init__.py" = ["D104"] + [tool.ruff.lint.mccabe] # Flag errors (`C901`) whenever the complexity level exceeds 15. max-complexity = 15 diff --git a/src/cdm_data_loader_utils/parsers/gene_association_file.py b/src/cdm_data_loader_utils/parsers/gene_association_file.py index 548de56..cd81647 100644 --- a/src/cdm_data_loader_utils/parsers/gene_association_file.py +++ b/src/cdm_data_loader_utils/parsers/gene_association_file.py @@ -273,7 +273,7 @@ def run( if register: register_table(spark, output_path, table_name=table_name, permanent=permanent) - except Exception as e: + except Exception: logger.exception("Pipeline failed") sys.exit(1) finally: diff --git a/src/cdm_data_loader_utils/parsers/shared_identifiers.py b/src/cdm_data_loader_utils/parsers/shared_identifiers.py new file mode 100644 index 0000000..33b865f --- /dev/null +++ b/src/cdm_data_loader_utils/parsers/shared_identifiers.py @@ -0,0 +1,11 @@ +from cdm_data_loader_utils.parsers.xml_utils import get_text + + +def parse_identifiers_generic(entry, xpath, prefix, ns): + result = [] + for node in entry.findall(xpath, ns): + text = get_text(node) + if not text: + continue + result.append({"identifier": f"{prefix}:{text}", "source": prefix, "description": f"{prefix} accession"}) + return result diff --git a/src/cdm_data_loader_utils/parsers/uniprot.py b/src/cdm_data_loader_utils/parsers/uniprot.py index fa6d6a4..ca4516e 100644 --- a/src/cdm_data_loader_utils/parsers/uniprot.py +++ b/src/cdm_data_loader_utils/parsers/uniprot.py @@ -1,28 +1,37 @@ """ -UniProt XML Delta Lake Ingestion Pipeline. +UniProt XML Delta Lake Ingestion Pipeline ========================================= This script parses UniProt XML (.xml.gz) file and ingests the data into structured Delta Lake tables. Typical usage: -------------- +Use it in Berdle as: python3 src/parsers/uniprot.py \ --xml-url "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/uniprot_sprot_archaea.xml.gz" \ --output-dir "./output" \ --namespace "uniprot_db" \ --batch-size 5000 + +python -m cdm_data_loader_utils.parsers.uniprot \ + --xml-url "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/uniprot_sprot_archaea.xml.gz" \ + --output-dir "tests/data/uniprot_archaea" \ + --namespace "uniprot_db" \ + --batch-size 5000 + + Arguments: ---------- ---xml-url: URL to the UniProt XML .gz file +--xml-url: URL to the UniProt XML .gz file --output-dir: Output directory for Delta tables and logs (default: './output') --namespace: Delta Lake database name (default: 'uniprot_db') ---target-date: Process entries modified/updated since specific date +--target-date: Process entries modified/updated since specific date --batch-size: Number of UniProt entries to process per write batch (default: 5000) Functionality: -------------- -- Downloads the XML file if not present locally +- Downloads the XML file if not present locally - Parses UniProt entries in a memory-efficient streaming fashion - Maps parsed data into standardized CDM tables - Writes all tables as Delta Lake tables, supporting incremental import @@ -38,6 +47,7 @@ import datetime import gzip import json +import logging import os import uuid import xml.etree.ElementTree as ET @@ -46,49 +56,61 @@ import requests from delta import configure_spark_with_delta_pip from pyspark.sql import SparkSession +from pyspark.sql.functions import col, split from pyspark.sql.types import ArrayType, StringType, StructField, StructType -## XML namespace mapping for UniProt entries (used for all XPath queries) -NS = {"u": "https://uniprot.org/uniprot"} +from cdm_data_loader_utils.parsers.shared_identifiers import parse_identifiers_generic +from cdm_data_loader_utils.parsers.xml_utils import clean_dict, find_all_text, get_attr, get_text, parse_db_references +# --------------------------------------------------------------------- +# Logging +# --------------------------------------------------------------------- +logger = logging.getLogger(__name__) +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", +) -def load_existing_identifiers(spark, output_dir, namespace): - """ - Load the existing 'identifiers' Delta table and build a mapping from UniProt accession to CDM entity ID. - This function enables consistent mapping of accessions to CDM IDs across multiple imports, supporting upsert and idempotent workflows. - Returns: - dict: {accession: entity_id} - """ - access_to_cdm_id = {} - id_path = os.path.abspath(os.path.join(output_dir, f"{namespace}_identifiers_delta")) - if os.path.exists(id_path): - try: - # Read identifier and entity_id columns from the Delta table - df = spark.read.format("delta").load(id_path).select("identifier", "entity_id") - for row in df.collect(): - # Identifier field: UniProt:Pxxxxx, extract the actual accession part after the colon - accession = row["identifier"].split(":", 1)[1] - access_to_cdm_id[accession] = row["entity_id"] - except Exception as e: - print(f"Couldn't load identifiers table: {e}") - else: - print(f"No previous identifiers delta at {id_path}.") - return access_to_cdm_id +# --------------------------------------------------------------------- +# XML namespace mapping for UniProt entries (used for all XPath queries) +# --------------------------------------------------------------------- +NS = {"ns": "https://uniprot.org/uniprot"} -def generate_cdm_id() -> str: - """ - Generate a CDM entity_id directly from UniProt accession, using 'CDM:' prefix - Ensures that each accession is mapped to stable and unique CDM entity ID, making it easy to join across different tables by accession. - """ - return f"CDM:{uuid.uuid4()}" +# --------------------------------------------------------------------- +# Stable ID namespace (UUIDv5) +# --------------------------------------------------------------------- +CDM_UUID_NAMESPACE = uuid.UUID("2d3f6e2a-4d7b-4a8c-9c5a-0e0f7b7d9b3a") -def build_datasource_record(xml_url): - """ - Build a provenance record for the UniProt datasource without version extraction. - """ +# --------------------------------------------------------------------- +# CURIE prefixes +# --------------------------------------------------------------------- +PREFIX_TRANSLATION: dict[str, str] = { + "UniProtKB": "UniProt", + "UniProtKB/Swiss-Prot": "UniProt", + "UniProtKB/TrEMBL": "UniProt", + "UniParc": "UniParc", + "RefSeq": "RefSeq", + "EMBL": "EMBL", + "PDB": "PDB", + "ChEBI": "ChEBI", + "Rhea": "Rhea", + "NCBI Taxonomy": "NCBITaxon", + "GeneID": "NCBIGene", + "Ensembl": "Ensembl", + "GO": "GO", +} + + +# ================================ HELPERS ================================= +def delta_table_path(output_dir: str, namespace: str, table: str) -> str: + return os.path.abspath(os.path.join(output_dir, namespace, table)) + + +def build_datasource_record(xml_url: str) -> dict: + """Build a provenance record for the UniProt datasource.""" return { "name": "UniProt import", "source": "UniProt", @@ -98,162 +120,323 @@ def build_datasource_record(xml_url): } -def parse_identifiers(entry, cdm_id): - """ - Extract all accession numbers in the UniProt entry and format them into a CDM identifier structure. - """ - return [ - { - "entity_id": cdm_id, - "identifier": f"UniProt:{acc.text}", - "source": "UniProt", - "description": "UniProt accession", - } - for acc in entry.findall("u:accession", NS) - ] +def save_datasource_record(xml_url: str, output_dir: str) -> dict: + """Generate and save the datasource provenance record as a JSON file.""" + datasource = build_datasource_record(xml_url) + + os.makedirs(output_dir, exist_ok=True) + output_path = os.path.join(output_dir, "datasource.json") + + with open(output_path, "w", encoding="utf-8") as f: + json.dump(datasource, f, indent=2) + + logger.info("Saved datasource record to %s", output_path) + return datasource + + +def download_file( + url: str, + output_path: str, + chunk_size: int = 1024 * 1024, + overwrite: bool = False, +) -> None: + """Download URL -> output_path (streaming)""" + if os.path.exists(output_path) and not overwrite: + logger.info("File already exists, skip download: %s", output_path) + return + + tmp_path = output_path + ".part" + if os.path.exists(tmp_path): + try: + os.remove(tmp_path) + except Exception: + pass + + try: + logger.info("Downloading %s -> %s", url, output_path) + with requests.get(url, stream=True, timeout=120) as r: + r.raise_for_status() + with open(tmp_path, "wb") as f: + for chunk in r.iter_content(chunk_size=chunk_size): + if chunk: + f.write(chunk) + os.replace(tmp_path, output_path) + logger.info("Download complete: %s", output_path) + except Exception: + logger.exception("Failed to download %s", url) + try: + if os.path.exists(tmp_path): + os.remove(tmp_path) + except Exception: + logger.exception("Failed to remove partial download: %s", tmp_path) + raise + + +def prepare_local_xml(xml_url: str, output_dir: str, overwrite: bool = False) -> str: + os.makedirs(output_dir, exist_ok=True) + local_path = os.path.join(output_dir, os.path.basename(xml_url)) + download_file(xml_url, local_path, overwrite=overwrite) + return local_path + + +def stream_uniprot_xml(filepath: str): + """Stream gzipped UniProt XML entries.""" + logger.info("Streaming UniProt XML from: %s", filepath) + with gzip.open(filepath, "rb") as f: + for _, elem in ET.iterparse(f, events=("end",)): + if elem.tag.endswith("entry"): + yield elem + elem.clear() + + +def get_spark_session(namespace: str) -> SparkSession: + """Initialize SparkSession with Delta Lake support, and ensure the target database exists.""" + builder = ( + SparkSession.builder.appName("UniProtDeltaIngestion") + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .config( + "spark.sql.catalog.spark_catalog", + "org.apache.spark.sql.delta.catalog.DeltaCatalog", + ) + .config("spark.databricks.delta.schema.autoMerge.enabled", "true") + ) + spark = configure_spark_with_delta_pip(builder).getOrCreate() + spark.sql(f"CREATE DATABASE IF NOT EXISTS {namespace}") + return spark + + +def normalize_prefix(db_type: str) -> str: + """Map UniProt dbReference @type to a normalized CURIE prefix.""" + return PREFIX_TRANSLATION.get(db_type, db_type.replace(" ", "")) + +def make_curie(db_type: str, db_id: str) -> str: + """Create CURIE with normalized prefix.""" + return f"{normalize_prefix(db_type)}:{db_id}" -def parse_names(entry, cdm_id): + +# ================================ STABLE ID ================================= +def stable_cdm_id_from_uniprot_accession(accession: str, prefix: str = "cdm_prot_") -> str: + u = uuid.uuid5(CDM_UUID_NAMESPACE, f"UniProt:{accession}") + return f"{prefix}{u}" + + +def load_existing_maps( + spark: SparkSession, + output_dir: str, + namespace: str, +) -> tuple[dict[str, str], dict[str, str]]: """ - Extract all protein names from a UniProt element, including - - Top-level elements (generic names) - - and blocks within (full and short names). + Returns: + accession_to_entity_id: accession -> entity_id (from identifiers) + entity_id_to_created: entity_id -> created (from entities) """ - names = [] + accession_to_entity_id: dict[str, str] = {} + entity_id_to_created: dict[str, str] = {} - # Extract all top-level tags - for name_element in entry.findall("u:name", NS): - if name_element.text: - names.append( - { - "entity_id": cdm_id, - "name": name_element.text, - "description": "UniProt protein name", - "source": "UniProt", - } + id_path = os.path.join(output_dir, namespace, "identifiers") + ent_path = os.path.join(output_dir, namespace, "entities") + + if os.path.exists(id_path): + try: + df = ( + spark.read.format("delta") + .load(id_path) + .filter(col("identifier").startswith("UniProt:")) + .select( + split(col("identifier"), ":").getItem(1).alias("accession"), + col("entity_id"), + ) ) + for row in df.toLocalIterator(): + acc = row["accession"] + eid = row["entity_id"] + if acc and eid: + accession_to_entity_id[acc] = eid + logger.info( + "Loaded %d accession->entity_id from %s", + len(accession_to_entity_id), + id_path, + ) + except Exception: + logger.exception("Couldn't load identifiers from %s", id_path) - # Extract recommended and alternative names from block - protein = entry.find("u:protein", NS) - if protein is not None: - for name_type in ["recommended", "alternative"]: - # Directly use findall for simplicity (recommendedName returns single-element list) - name_blocks = protein.findall(f"u:{name_type}Name", NS) - for name in name_blocks: - for name_length in ["full", "short"]: - name_string = name.find(f"u:{name_length}Name", NS) - if name_string is None or not name_string.text: - continue + if os.path.exists(ent_path): + try: + df = spark.read.format("delta").load(ent_path).select("entity_id", "created") + for row in df.toLocalIterator(): + if row["entity_id"] and row["created"]: + entity_id_to_created[row["entity_id"]] = row["created"] + logger.info( + "Loaded %d entity_id->created from %s", + len(entity_id_to_created), + ent_path, + ) + except Exception: + logger.exception("Couldn't load entities from %s", ent_path) + + return accession_to_entity_id, entity_id_to_created + + +# ================================ PARSERS ================================= +def parse_identifiers(entry, cdm_id: str) -> list[dict]: + out = parse_identifiers_generic(entry=entry, xpath="ns:accession", prefix="UniProt", ns=NS) + for row in out: + row["entity_id"] = cdm_id + row.setdefault("source", "UniProt") + row.setdefault("description", "UniProt accession") + return out - names.append( - { - "entity_id": cdm_id, - "name": name_string.text, - "description": f"UniProt {name_type} {name_length} name", - "source": "UniProt", - } - ) + +def _make_name_record(cdm_id: str, name_text: str, description: str) -> dict: + return { + "entity_id": cdm_id, + "name": name_text, + "description": description, + "source": "UniProt", + } + + +def parse_names(entry, cdm_id: str) -> list[dict]: + names: list[dict] = [] + + for txt in find_all_text(entry, "ns:name", NS): + names.append(_make_name_record(cdm_id, txt, "UniProt entry name")) + + protein = entry.find("ns:protein", NS) + if protein is not None: + for tag_name, logical_type in [ + ("recommendedName", "recommended"), + ("alternativeName", "alternative"), + ]: + for name_block in protein.findall(f"ns:{tag_name}", NS): + for xml_tag, length_label in [ + ("fullName", "full"), + ("shortName", "short"), + ]: + elem = name_block.find(f"ns:{xml_tag}", NS) + text = get_text(elem) + if text: + names.append( + _make_name_record( + cdm_id, + text, + f"UniProt {logical_type} {length_label} name", + ) + ) return names -def parse_protein_info(entry, cdm_id): - """ - Extract protein-level metadata from a UniProt XML element. - """ - protein_info = {} - ec_numbers = [] +def parse_protein_info(entry, cdm_id: str) -> dict | None: + protein_info: dict = {} - # Extract EC numbers from and in - protein = entry.find("u:protein", NS) + protein = entry.find("ns:protein", NS) if protein is not None: - # Find EC numbers in recommendedName - rec = protein.find("u:recommendedName", NS) - if rec is not None: - for ec in rec.findall("u:ecNumber", NS): - if ec.text: - ec_numbers.append(ec.text) - - # Find EC numbers in all alternativeNames - for alt in protein.findall("u:alternativeName", NS): - for ec in alt.findall("u:ecNumber", NS): - if ec.text: - ec_numbers.append(ec.text) + ec_paths = ["ns:recommendedName/ns:ecNumber", "ns:alternativeName/ns:ecNumber"] + ec_numbers: list[str] = [] + for path in ec_paths: + ec_numbers.extend(find_all_text(protein, path, NS)) if ec_numbers: - protein_info["ec_numbers"] = ec_numbers + protein_info["ec_numbers"] = ";".join(ec_numbers) - # Extract protein existence evidence type - protein_existence = entry.find("u:proteinExistence", NS) + protein_existence = entry.find("ns:proteinExistence", NS) if protein_existence is not None: protein_info["protein_id"] = cdm_id - protein_info["evidence_for_existence"] = protein_existence.get("type") - - # Extract sequence and sequence-related attributes - seq_elem = entry.find("u:sequence", NS) - if seq_elem is not None and seq_elem.text: - protein_info["length"] = seq_elem.get("length") - protein_info["mass"] = seq_elem.get("mass") - protein_info["checksum"] = seq_elem.get("checksum") - protein_info["modified"] = seq_elem.get("modified") - protein_info["sequence_version"] = seq_elem.get("version") - protein_info["sequence"] = seq_elem.text.strip() - - # Capture the entry's modified/updated date for tracking - entry_modified = entry.attrib.get("modified") or entry.attrib.get("updated") + protein_info["evidence_for_existence"] = get_attr(protein_existence, "type") + + seq_elem = entry.find("ns:sequence", NS) + if seq_elem is not None: + protein_info.update( + clean_dict( + { + "length": get_attr(seq_elem, "length"), + "mass": get_attr(seq_elem, "mass"), + "checksum": get_attr(seq_elem, "checksum"), + "modified": get_attr(seq_elem, "modified"), + "sequence_version": get_attr(seq_elem, "version"), + "sequence": get_text(seq_elem), + } + ) + ) + + entry_modified = get_attr(entry, "modified") or get_attr(entry, "updated") if entry_modified: protein_info["entry_modified"] = entry_modified - # Return the dictionary if any protein info was extracted return protein_info if protein_info else None -def parse_evidence_map(entry): - """ - Parse all elements from a UniProt XML entry and build a mapping - from evidence key to metadata (type, supporting objects, publications). - """ - evidence_map = {} +def parse_evidence_map(entry) -> dict[str, dict]: + evidence_map: dict[str, dict] = {} - # Loop through every element in the entry - for evidence in entry.findall("u:evidence", NS): - key = evidence.get("key") # Unique evidence key (string) - evidence_type = evidence.get("type") # Evidence code/type (e.g., ECO:0000255) + for ev in entry.findall("ns:evidence", NS): + key = get_attr(ev, "key") + if not key: + continue - supporting_objects = [] - publications = [] + evidence_type = get_attr(ev, "type") + pubs: list[str] = [] + others: list[str] = [] - # Check if this evidence has a element with children - source = evidence.find("u:source", NS) + source = ev.find("ns:source", NS) if source is not None: - for dbref in source.findall("u:dbReference", NS): - db_type = dbref.get("type") - db_id = dbref.get("id") - # Add publication references as PubMed or DOI; others as supporting objects - if db_type == "PubMed": - publications.append(f"PMID:{db_id}") - elif db_type == "DOI": - publications.append(f"DOI:{db_id}") + raw_pubs, raw_others = parse_db_references(source, NS) + + normalized_pubs: list[str] = [] + for p in raw_pubs: + up = p.upper() + if up.startswith("PUBMED:"): + _, acc = p.split(":", 1) + normalized_pubs.append(f"PMID:{acc}") else: - supporting_objects.append(f"{db_type}:{db_id}") + normalized_pubs.append(p) - # Store evidence metadata, omitting empty lists for cleanliness - evidence_map[key] = { - "evidence_type": evidence_type, - "supporting_objects": supporting_objects if supporting_objects else None, - "publications": publications if publications else None, - } + pubs = normalized_pubs + others = raw_others + + evidence_map[key] = clean_dict( + { + "evidence_type": evidence_type, + "publications": pubs or None, + "supporting_objects": others or None, + } + ) return evidence_map -def parse_reaction_association(reaction, cdm_id, evidence_map): - associations = [] - for dbref in reaction.findall("u:dbReference", NS): +def _make_association( + cdm_id: str, + obj: str, + predicate: str | None = None, + evidence_key: str | None = None, + evidence_map: dict | None = None, +) -> dict: + assoc = { + "subject": cdm_id, + "object": obj, + "predicate": predicate, + "evidence_type": None, + "supporting_objects": None, + "publications": None, + } + if evidence_key and evidence_map and evidence_key in evidence_map: + assoc.update(evidence_map[evidence_key]) + return clean_dict(assoc) + + +def parse_reaction_association(reaction, cdm_id: str, evidence_map: dict[str, dict]) -> list[dict]: + associations: list[dict] = [] + for dbref in reaction.findall("ns:dbReference", NS): db_type = dbref.get("type") db_id = dbref.get("id") + if not db_type or not db_id: + continue + assoc = { "subject": cdm_id, "predicate": "catalyzes", - "object": f"{db_type}:{db_id}", + "object": make_curie(db_type, db_id), "evidence_type": None, "supporting_objects": None, "publications": None, @@ -261,124 +444,127 @@ def parse_reaction_association(reaction, cdm_id, evidence_map): evidence_key = reaction.get("evidence") if evidence_key and evidence_key in evidence_map: assoc.update(evidence_map[evidence_key]) - associations.append(assoc) + associations.append(clean_dict(assoc)) return associations -def parse_cofactor_association(cofactor, cdm_id): - associations = [] - for dbref in cofactor.findall("u:dbReference", NS): +def parse_cofactor_association(cofactor, cdm_id: str) -> list[dict]: + associations: list[dict] = [] + for dbref in cofactor.findall("ns:dbReference", NS): db_type = dbref.get("type") db_id = dbref.get("id") - assoc = { - "subject": cdm_id, - "predicate": "requires_cofactor", - "object": f"{db_type}:{db_id}", - "evidence_type": None, - "supporting_objects": None, - "publications": None, - } - associations.append(assoc) + if not db_type or not db_id: + continue + associations.append( + clean_dict( + { + "subject": cdm_id, + "predicate": "requires_cofactor", + "object": make_curie(db_type, db_id), + "evidence_type": None, + "supporting_objects": None, + "publications": None, + } + ) + ) return associations -def parse_associations(entry, cdm_id, evidence_map): +def parse_associations(entry, cdm_id: str, evidence_map: dict[str, dict]) -> list[dict]: """ - Parse all relevant associations from a UniProt XML entry for the CDM model. - Only include fields that are not None for each association. + Only keep: + - taxonomy association + - catalytic activity / cofactor associations """ - associations = [] - - def clean(d): - """Remove None-value keys from a dict.""" - return {k: v for k, v in d.items() if v is not None} + associations: list[dict] = [] # Taxonomy association - organism = entry.find("u:organism", NS) + organism = entry.find("ns:organism", NS) if organism is not None: - taxon_ref = organism.find('u:dbReference[@type="NCBI Taxonomy"]', NS) + taxon_ref = organism.find('ns:dbReference[@type="NCBI Taxonomy"]', NS) if taxon_ref is not None: - associations.append( - clean( - { - "subject": cdm_id, - "object": f"NCBITaxon:{taxon_ref.get('id')}", - "predicate": None, - "evidence_type": None, - "supporting_objects": None, - "publications": None, - } - ) - ) - - # Database cross-references with evidence - for dbref in entry.findall("u:dbReference", NS): - db_type = dbref.get("type") - db_id = dbref.get("id") - association = { - "subject": cdm_id, - "object": f"{db_type}:{db_id}", - "predicate": None, - "evidence_type": None, - "supporting_objects": None, - "publications": None, - } - evidence_key = dbref.get("evidence") - if evidence_key and evidence_key in evidence_map: - association.update(evidence_map[evidence_key]) - associations.append(clean(association)) + tax_id = taxon_ref.get("id") + if tax_id: + associations.append(_make_association(cdm_id, f"NCBITaxon:{tax_id}", predicate="in_taxon")) - # Catalytic/cofactor - for comment in entry.findall("u:comment", NS): + # Catalytic activity / cofactor + for comment in entry.findall("ns:comment", NS): comment_type = comment.get("type") if comment_type == "catalytic activity": - # extract catalytic associations - for reaction in comment.findall("u:reaction", NS): - for assoc in parse_reaction_association(reaction, cdm_id, evidence_map): - associations.append(clean(assoc)) + for reaction in comment.findall("ns:reaction", NS): + associations.extend(parse_reaction_association(reaction, cdm_id, evidence_map)) elif comment_type == "cofactor": - # extract cofactor associations - for cofactor in comment.findall("u:cofactor", NS): - for assoc in parse_cofactor_association(cofactor, cdm_id): - associations.append(clean(assoc)) + for cofactor in comment.findall("ns:cofactor", NS): + associations.extend(parse_cofactor_association(cofactor, cdm_id)) + return associations -def parse_publications(entry): - """ - Extract all publication references from a UniProt XML - Returns a list of standardized publication IDs (PMID and DOI). - """ - publications = [] - - # Iterate through all blocks in the entry - for reference in entry.findall("u:reference", NS): - citation = reference.find("u:citation", NS) - if citation is not None: - # Each may have multiple elements (e.g., PubMed, DOI) - for dbref in citation.findall("u:dbReference", NS): - db_type = dbref.get("type") - db_id = dbref.get("id") - # Standardize format for known publication types - if db_type == "PubMed": - publications.append(f"PMID:{db_id}") - elif db_type == "DOI": - publications.append(f"DOI:{db_id}") - - return publications - - -def parse_uniprot_entry(entry, cdm_id, current_timestamp, datasource_name="UniProt import", prev_created=None): - if prev_created: - entity_created = prev_created - entity_updated = current_timestamp - else: - entity_created = current_timestamp - entity_updated = current_timestamp +def parse_cross_references(entry, cdm_id: str) -> list[dict]: + """Generic -> cross_references table.""" + rows: list[dict] = [] + + for dbref in entry.findall("ns:dbReference", NS): + db_type = dbref.get("type") + db_id = dbref.get("id") + if not db_type or not db_id: + continue + + xref_type = normalize_prefix(db_type) + + if ":" in db_id: + xref = db_id + else: + xref = f"{xref_type}:{db_id}" + + rows.append( + clean_dict( + { + "entity_id": cdm_id, + "xref_type": xref_type, + "xref_value": db_id, + "xref": xref, + } + ) + ) + + return rows + + +def parse_publications(entry) -> list[str]: + publications: list[str] = [] + for reference in entry.findall("ns:reference", NS): + citation = reference.find("ns:citation", NS) + if citation is None: + continue + + raw_pubs, _ = parse_db_references(citation, NS) + for p in raw_pubs: + up = p.upper() + if up.startswith("PUBMED:"): + _, acc = p.split(":", 1) + publications.append(f"PMID:{acc}") + elif up.startswith("DOI:"): + _, acc = p.split(":", 1) + publications.append(f"DOI:{acc}") + + return list(dict.fromkeys(publications)) + + +def parse_uniprot_entry( + entry, + cdm_id: str, + current_timestamp: str, + datasource_name: str = "UniProt import", + prev_created: str | None = None, +) -> dict: + entity_created = prev_created or current_timestamp + entity_updated = current_timestamp uniprot_created = entry.attrib.get("created") uniprot_modified = entry.attrib.get("modified") or entry.attrib.get("updated") uniprot_version = entry.attrib.get("version") + entity = { "entity_id": cdm_id, "entity_type": "protein", @@ -389,65 +575,21 @@ def parse_uniprot_entry(entry, cdm_id, current_timestamp, datasource_name="UniPr "uniprot_created": uniprot_created, "uniprot_modified": uniprot_modified, } + evidence_map = parse_evidence_map(entry) + return { "entity": entity, "identifiers": parse_identifiers(entry, cdm_id), "names": parse_names(entry, cdm_id), "protein": parse_protein_info(entry, cdm_id), "associations": parse_associations(entry, cdm_id, evidence_map), + "cross_references": parse_cross_references(entry, cdm_id), "publications": parse_publications(entry), } -def download_file(url, output_path, chunk_size=8192, overwrite=False) -> None: - """ - Download a file from a given URL to a local output path. - """ - # Skip download if file already exists and not overwriting - if os.path.exists(output_path) and not overwrite: - print(f"File '{output_path}' already exists.") - return - - # Stream download to avoid high memory usage - try: - with requests.get(url, stream=True, timeout=60) as response: - response.raise_for_status() - with open(output_path, "wb") as f: - for chunk in response.iter_content(chunk_size=chunk_size): - if chunk: - f.write(chunk) - print(f"Downloaded '{url}' to '{output_path}'") - except Exception as e: - print(f"Failed to download '{url}': {e}") - - if os.path.exists(output_path): - os.remove(output_path) # Remove incomplete file - raise - - -def stream_uniprot_xml(filepath): - """ - Stream and parse UniProt XML entries from a local gzipped file. - Yields each element as soon as it is parsed to avoid loading the entire XML into memory. - """ - # Open the gzipped XML file for reading in binary mode - with gzip.open(filepath, "rb") as f: - # Use iterparse to process XML incrementally, triggering on element end events - context = ET.iterparse(f, events=("end",)) - for _event, element in context: - # Check tag name, ignoring namespace - if element.tag.endswith("entry"): - yield element - element.clear() - - -## ================================ SCHEMA ================================= -""" -Defines the Spark schema for all major CDM tables derived from UniProt XML. -Each schema is tailored for protein entities, identifiers, protein details, names, associations, and linked publications. -""" - +# ================================ SCHEMA ================================= schema_entities = StructType( [ StructField("entity_id", StringType(), False), @@ -505,6 +647,15 @@ def stream_uniprot_xml(filepath): ] ) +schema_cross_references = StructType( + [ + StructField("entity_id", StringType(), False), + StructField("xref_type", StringType(), True), + StructField("xref_value", StringType(), True), + StructField("xref", StringType(), True), + ] +) + schema_publications = StructType( [ StructField("entity_id", StringType(), False), @@ -513,126 +664,74 @@ def stream_uniprot_xml(filepath): ) -def save_batches_to_delta(spark, tables, output_dir, namespace) -> None: - """ - Persist batches of parsed records for each CDM table into Delta Lake format. - - - Each table is saved into a Delta directory named '{namespace}_{table}_delta' in the output folder. - - If the Delta directory exists, append new records. Otherwise, overwrite it. - - Registers the table in the Spark SQL for downstream query. - """ - for table, (records, schema) in tables.items(): - if not records: - continue # Skip all empty tables - - delta_dir = os.path.abspath(os.path.join(output_dir, f"{namespace}_{table}_delta")) - # Use "append" mode if the Delta directory already exists, otherwise "overwrite" - mode = "append" if os.path.exists(delta_dir) else "overwrite" - - print( - f"[DEBUG] Registering table: {namespace}.{table} at {delta_dir} with mode={mode}, record count: {len(records)}" - ) - - try: - df = spark.createDataFrame(records, schema) - df.write.format("delta").mode(mode).option("overwriteSchema", "true").save(delta_dir) - spark.sql(f""" - CREATE TABLE IF NOT EXISTS {namespace}.{table} - USING DELTA - LOCATION '{delta_dir}' - """) - except Exception as e: - print(f"Failed to save {table} to Delta: {e}") - - -def prepare_local_xml(xml_url, output_dir): - """ - Download the remote UniProt XML (.xml.gz) file to the specified local output directory, - unless the file already exists locally. Returns the full local file path. - """ - # Ensure output directory exists - os.makedirs(output_dir, exist_ok=True) - local_xml_path = os.path.join(output_dir, os.path.basename(xml_url)) - # Download only if file does not exist - download_file(xml_url, local_xml_path) - return local_xml_path - - -def save_datasource_record(xml_url, output_dir): - """ - Generate and save the datasource provenance record as a JSON file in the output directory. - """ - datasource = build_datasource_record(xml_url) - os.makedirs(output_dir, exist_ok=True) # Ensure output directory exists - output_path = os.path.join(output_dir, "datasource.json") - with open(output_path, "w") as f: - json.dump(datasource, f, indent=4) - return datasource - - -def get_spark_session(namespace): - """ - Initialize SparkSession with Delta Lake support, and ensure the target database exists. - """ - # Build SparkSession with Delta extensions enabled - builder = ( - SparkSession.builder.appName("DeltaIngestion") - .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") - .config( - "spark.sql.catalog.spark_catalog", - "org.apache.spark.sql.delta.catalog.DeltaCatalog", - ) - ) - spark = configure_spark_with_delta_pip(builder).getOrCreate() - # Ensure the target namespace (database) exists +# ================================ DELTA WRITE ================================= +def ensure_tables_registered(spark: SparkSession, output_dir: str, namespace: str, table_names: list[str]) -> None: spark.sql(f"CREATE DATABASE IF NOT EXISTS {namespace}") - return spark - - -def load_existing_entity(spark, output_dir, namespace): - """ - Load the existing entities_delta Delta table and build a mapping of entity_id to created timestamp. - This mapping is used to support upserts and idempotent writes. - """ - old_created_dict = {} - entities_table_path = os.path.abspath(os.path.join(output_dir, f"{namespace}_entities_delta")) - if os.path.exists(entities_table_path): - try: - # Read only the required columns for efficiency - old_df = spark.read.format("delta").load(entities_table_path).select("entity_id", "created") - for row in old_df.collect(): - old_created_dict[row["entity_id"]] = row["created"] - print(f"Loaded {len(old_created_dict)} existing entity_id records for upsert.") - except Exception as e: - print(f"Couldn't load previous entities delta table: {e}") - else: - print(f"No previous entities delta at {entities_table_path}.") - return old_created_dict + for tbl in table_names: + # delta_dir = os.path.abspath(os.path.join(output_dir, namespace, tbl)) + delta_dir = delta_table_path(output_dir, namespace, tbl) + spark.sql( + f""" + CREATE TABLE IF NOT EXISTS {namespace}.{tbl} + USING DELTA + LOCATION '{delta_dir}' + """ + ) -def parse_entries(local_xml_path, target_date, batch_size, spark, tables, output_dir, namespace, current_timestamp): - """ - Parse UniProt XML entries, write to Delta Lake in batches - Return (processed_entry_count, skipped_entry_count). +def save_batches_to_delta( + spark: SparkSession, + tables: dict[str, tuple[list, StructType]], + output_dir: str, + namespace: str, + mode: str = "append", +) -> None: + for table_name, (records, schema) in tables.items(): + if not records: + continue - """ + # delta_dir = os.path.abspath(os.path.join(output_dir, namespace, table_name)) + delta_dir = delta_table_path(output_dir, namespace, table_name) + df = spark.createDataFrame(records, schema) + writer = df.write.format("delta").mode(mode) + + if mode == "append": + writer = writer.option("mergeSchema", "true") + if mode == "overwrite": + writer = writer.option("overwriteSchema", "true") + + writer.save(delta_dir) + + +## =============================== MAIN PARSING LOOP ================================= +def parse_entries( + local_xml_path: str, + target_date: str | None, + batch_size: int, + spark: SparkSession, + tables: dict[str, tuple[list, StructType]], + output_dir: str, + namespace: str, + current_timestamp: str, + accession_to_entity_id: dict[str, str], + entity_id_to_created: dict[str, str], + mode: str, +) -> tuple[int, int]: target_date_dt = None - - # Convert target_date string to datetime for comparison if provided if target_date: try: target_date_dt = datetime.datetime.strptime(target_date, "%Y-%m-%d") + logger.info("Target date filter enabled: >= %s", target_date) except Exception: - print(f"Invalid target date is {target_date}") + logger.warning("Invalid target date provided: %s (ignored)", target_date) + target_date_dt = None entry_count, skipped = 0, 0 - # Iterate over each element in the XML file for entry_elem in stream_uniprot_xml(local_xml_path): try: - # Get the modification date of the entry mod_date = entry_elem.attrib.get("modified") or entry_elem.attrib.get("updated") - # If target_date is set, skip entries older than target_date + if target_date_dt and mod_date: try: entry_date_dt = datetime.datetime.strptime(mod_date[:10], "%Y-%m-%d") @@ -643,110 +742,197 @@ def parse_entries(local_xml_path, target_date, batch_size, spark, tables, output skipped += 1 continue - # Extract main accession (skip entry if not present) - main_accession_elem = entry_elem.find("u:accession", NS) - if main_accession_elem is None or main_accession_elem.text is None: + main_accession_elem = entry_elem.find("ns:accession", NS) + if main_accession_elem is None or not main_accession_elem.text: skipped += 1 continue - # Generate a unique CDM ID (UUID) for this entry - cdm_id = generate_cdm_id() + accession = main_accession_elem.text.strip() + + cdm_id = accession_to_entity_id.get(accession) or stable_cdm_id_from_uniprot_accession(accession) + prev_created = entity_id_to_created.get(cdm_id) + + record = parse_uniprot_entry(entry_elem, cdm_id, current_timestamp, prev_created=prev_created) - # Parse all sub-objects: entity, identifiers, names, protein, associations, publications - record = parse_uniprot_entry(entry_elem, cdm_id, current_timestamp) tables["entities"][0].append(record["entity"]) tables["identifiers"][0].extend(record["identifiers"]) tables["names"][0].extend(record["names"]) if record["protein"]: tables["proteins"][0].append(record["protein"]) + tables["associations"][0].extend(record["associations"]) - tables["publications"][0].extend( - {"entity_id": record["entity"]["entity_id"], "publication": pub} for pub in record["publications"] - ) + tables["cross_references"][0].extend(record["cross_references"]) + + for pub in record["publications"]: + tables["publications"][0].append( + { + "entity_id": cdm_id, + "publication": pub, + } + ) entry_count += 1 - # Write batch to Delta and clear lists every batch_size entries + if entry_count % batch_size == 0: - save_batches_to_delta(spark, tables, output_dir, namespace) + save_batches_to_delta(spark, tables, output_dir, namespace, mode=mode) for v in tables.values(): v[0].clear() - print(f"{entry_count} entries processed and saved") - except Exception as e: - # If any error occurs in parsing this entry, skip it and count - print(f"Error parsing entry: {e}") + logger.info("Processed and saved %d entries...", entry_count) + + except Exception: + logger.exception("Error parsing UniProt entry, skipping") skipped += 1 - continue - # write remaining records - save_batches_to_delta(spark, tables, output_dir, namespace) + save_batches_to_delta(spark, tables, output_dir, namespace, mode=mode) return entry_count, skipped -def ingest_uniprot(xml_url, output_dir, namespace, target_date=None, batch_size=5000) -> None: - # Generate the timestamp for the current run +def ingest_uniprot( + xml_url: str, + output_dir: str, + namespace: str, + target_date: str | None = None, + batch_size: int = 5000, + mode: str = "append", + overwrite_download: bool = False, +) -> None: current_timestamp = datetime.datetime.now(datetime.UTC).isoformat() - # Prepare local XML - local_xml_path = prepare_local_xml(xml_url, output_dir) - - # Save data source meta information + local_xml_path = prepare_local_xml(xml_url, output_dir, overwrite=overwrite_download) save_datasource_record(xml_url, output_dir) - # Get Spark and the existing CDM entity_id spark = get_spark_session(namespace) + if mode == "append": + accession_to_entity_id, entity_id_to_created = load_existing_maps(spark, output_dir, namespace) + else: + accession_to_entity_id, entity_id_to_created = {}, {} - # Define the table structure (batch storage) - entities, identifiers, names, proteins, associations, publications = ( - [], - [], - [], - [], - [], - [], - ) - tables = { + # accession_to_entity_id, entity_id_to_created = load_existing_maps(spark, output_dir, namespace) + + entities: list[dict] = [] + identifiers: list[dict] = [] + names: list[dict] = [] + proteins: list[dict] = [] + associations: list[dict] = [] + cross_references: list[dict] = [] + publications: list[dict] = [] + + tables: dict[str, tuple[list, StructType]] = { "entities": (entities, schema_entities), "identifiers": (identifiers, schema_identifiers), "names": (names, schema_names), "proteins": (proteins, schema_proteins), "associations": (associations, schema_associations), + "cross_references": (cross_references, schema_cross_references), "publications": (publications, schema_publications), } - # Main cycle processing, transfer to current timestamp + ensure_tables_registered( + spark, + output_dir, + namespace, + [ + "entities", + "identifiers", + "names", + "proteins", + "associations", + "cross_references", + "publications", + ], + ) + + logger.info( + "Starting UniProt ingestion: xml=%s | namespace=%s | mode=%s | batch_size=%d", + xml_url, + namespace, + mode, + batch_size, + ) + entry_count, skipped = parse_entries( - local_xml_path, target_date, batch_size, spark, tables, output_dir, namespace, current_timestamp + local_xml_path=local_xml_path, + target_date=target_date, + batch_size=batch_size, + spark=spark, + tables=tables, + output_dir=output_dir, + namespace=namespace, + current_timestamp=current_timestamp, + accession_to_entity_id=accession_to_entity_id, + entity_id_to_created=entity_id_to_created, + mode=mode, ) - print(f"All entries processed ({entry_count}), skipped {skipped}, writing complete tables.") - spark.sql(f"SHOW TABLES IN {namespace}").show() - spark.sql(f"SELECT COUNT(*) FROM {namespace}.entities").show() - # make sql test in entity table - spark.sql(f"SELECT * FROM {namespace}.entities LIMIT 10").show(truncate=False) + logger.info("Completed parsing UniProt XML. processed=%d skipped=%d", entry_count, skipped) - spark.stop() + logger.info("Verifying Delta tables in namespace `%s`", namespace) + spark.sql(f"SHOW TABLES IN {namespace}").show(truncate=False) - print(f"All Delta tables are created and registered in Spark SQL under `{namespace}`.") + for tbl in [ + "entities", + "identifiers", + "names", + "proteins", + "associations", + "cross_references", + "publications", + ]: + logger.info("Verifying table: %s.%s", namespace, tbl) + spark.sql(f"SELECT COUNT(*) AS row_count FROM {namespace}.{tbl}").show(truncate=False) + spark.sql(f"SELECT * FROM {namespace}.{tbl} LIMIT 5").show(truncate=False) + + spark.stop() + logger.info("Done") +# ================================ CLI ================================= @click.command() @click.option("--xml-url", required=True, help="URL to UniProt XML (.xml.gz)") -@click.option("--output-dir", default="output", help="Output directory for Delta tables") -@click.option("--namespace", default="uniprot_db", help="Delta Lake database name") +@click.option( + "--output-dir", + default="output", + show_default=True, + help="Output directory for Delta tables", +) +@click.option( + "--namespace", + default="uniprot_db", + show_default=True, + help="Delta Lake database name", +) @click.option( "--target-date", default=None, help="Only process entries modified/updated since this date (YYYY-MM-DD)", ) -@click.option("--batch-size", default=5000, help="Batch size for writing Delta tables") -def main(xml_url, output_dir, namespace, target_date, batch_size) -> None: +@click.option( + "--batch-size", + default=5000, + show_default=True, + help="Batch size for writing Delta tables", +) +@click.option( + "--mode", + type=click.Choice(["append", "overwrite"]), + default="append", + show_default=True, +) +@click.option( + "--overwrite-download", + is_flag=True, + help="Force re-download XML even if file exists", +) +def main(xml_url, output_dir, namespace, target_date, batch_size, mode, overwrite_download): ingest_uniprot( xml_url=xml_url, output_dir=output_dir, namespace=namespace, target_date=target_date, batch_size=int(batch_size), + mode=mode, + overwrite_download=overwrite_download, ) diff --git a/src/cdm_data_loader_utils/parsers/uniref.py b/src/cdm_data_loader_utils/parsers/uniref.py index da3327c..6f24bb3 100644 --- a/src/cdm_data_loader_utils/parsers/uniref.py +++ b/src/cdm_data_loader_utils/parsers/uniref.py @@ -1,5 +1,5 @@ """ -UniRef XML Cluster ETL Pipeline. +UniRef XML Cluster ETL Pipeline This script downloads a UniRef100 XML file, parses cluster and member information, and writes the extracted data into Delta Lake tables for downstream analysis. @@ -22,7 +22,12 @@ --output-dir cdm-data-loader-utils/output/uniref100_clusters \ --batch-size 1000 -**Parameters:** +python3 uniref.py \ + --ftp-url https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref100/uniref100.xml.gz \ + --output-dir output_uniref \ + --batch-size 1000 + +Parameters: - --ftp-url: UniProt FTP URL to the UniRef100 gzipped XML file. - --output-dir: Output directory where Delta tables will be written. - --batch-size: Number of UniRef entries to process. @@ -30,118 +35,213 @@ """ import gzip + +### ===== logging setup ===== ### +import logging import os import uuid import xml.etree.ElementTree as ET from datetime import datetime -from urllib.request import URLError, urlretrieve - +from urllib.error import URLError +from datetime import timezone +from urllib.request import urlretrieve import click from delta import configure_spark_with_delta_pip from pyspark.sql import SparkSession from pyspark.sql.types import StringType, StructField, StructType +from pathlib import Path + +from cdm_data_loader_utils.parsers.xml_utils import get_text, parse_properties + + +logger = logging.getLogger(__name__) + + +UNIREF_NS = {"ns": "http://uniprot.org/uniref"} +DATA_SOURCE = "UniRef 100/90/50" + + +PREFIX_TRANSLATION = { + "UniProtKB ID": "UniProt", + "UniProtKB accession": "UniProt", + "UniParc ID": "UniParc", + "UniRef90 ID": "UniRef90", + "UniRef50 ID": "UniRef50", + "UniRef100 ID": "UniRef100", +} + + +def generate_dbxref(db: str, acc: str) -> str: + """Generate a database reference that uses BioRegistry prefixes.""" + return f"{PREFIX_TRANSLATION[db]}:{acc}" + + +def cdm_entity_id(value: str, prefix: str = "CDM:") -> str: + """ + Deterministic UUIDv5-based CDM id generator. + + value must be non-empty. + """ + if not value: + raise ValueError("Value must be a non-empty string") + + return f"{prefix}{uuid.uuid5(uuid.NAMESPACE_OID, value)}" + +# timestamp helper +def get_timestamps( + uniref_id: str, + existing_created: dict[str, str], + now: datetime | None = None, +) -> tuple[str, str]: + """ + Return (updated_time, created_time) for a given UniRef cluster ID. + - All timestamps are UTC ISO8601 with timezone (e.g., 2026-01-05T12:34:56+00:00) + - uniref_id must be non-empty (schema invariant) + """ + if not uniref_id: + raise ValueError("get_timestamps: uniref_id must be a non-empty string") -# Generate a unique CDM entity_id based on accession -def cdm_entity_id(accession) -> str | None: - if not accession: - return None - uuid_part = uuid.uuid5(uuid.NAMESPACE_OID, accession) - return f"CDM:{uuid_part}" + now_dt = now or datetime.now(timezone.utc) + updated_time = now_dt.isoformat(timespec="seconds") + created_time = existing_created.get(uniref_id) or updated_time + return updated_time, created_time -# Download a file from the specified URL to the local path if it does not already exist -def download_file(url, local_path) -> None: + +def download_file(url: str, local_path: str, overwrite: bool = False) -> str: """ - If the file is already present at local, the function does nothing. - If the download fails, any partially downloaded file will be removed. + Download URL -> local_path. + - Atomic: downloads to .part then os.replace + - Idempotent: skips if exists unless overwrite=True + Returns the final local_path. """ - if not os.path.exists(local_path): - print(f"Downloading from URL link: {url}") + dst = Path(local_path) + dst.parent.mkdir(parents=True, exist_ok=True) + + if dst.exists() and not overwrite: + logger.info("File already exists, skip download: %s", dst) + return str(dst) + + tmp = dst.with_suffix(dst.suffix + ".part") + + try: + if tmp.exists(): + tmp.unlink() + except Exception: + logger.exception("Failed to remove partial download: %s", tmp) + + logger.info("Downloading %s -> %s", url, dst) + try: + urlretrieve(url, str(tmp)) + os.replace(tmp, dst) + logger.info("Download complete: %s", dst) + return str(dst) + except Exception: + logger.exception("Failed to download %s", url) try: - urlretrieve(url, local_path) - print("Download completed!") - except Exception as e: - print(f"Failed to download {url}: {e}") - if os.path.exists(local_path): - os.remove(local_path) - raise - else: - print(f"File already exists: {local_path}") + if tmp.exists(): + tmp.unlink() + except Exception: + logger.exception("Failed to cleanup tmp file: %s", tmp) + raise -# Load mapping from data_source_entity_id to created timestamp from Delta table -def load_existing_created(spark, entity_table): - existing_created = {} +def load_existing_created(spark: SparkSession, entity_table: str | None) -> dict[str, str]: + """ + Load mapping data_source_entity_id -> created timestamp from the Entity Delta table. + Returns an empty dict if the table does not exist. + """ + existing_created: dict[str, str] = {} if not entity_table: - print("Entity table path not specified.") + logger.warning("Entity table path not specified.") return existing_created try: df = spark.read.format("delta").load(entity_table).select("data_source_entity_id", "created") existing_created = {row["data_source_entity_id"]: row["created"] for row in df.collect()} - print(f"Loaded {len(existing_created)} existing created timestamps.") + logger.info(f"Loaded {len(existing_created)} existing created timestamps from {entity_table}.") except Exception as e: - print(f"No existing Delta table found at {entity_table}. Starting fresh. ({e.__class__.__name__})") + logger.warning(f"No existing Delta table found at {entity_table}. Starting fresh. ({e.__class__.__name__})") return existing_created ##### -------------- List utility function --------------- ##### - - -# Helper function to extract basic cluster info from XML entry element -def extract_cluster(elem, ns): - cluster_id = f"CDM:{uuid.uuid4()}" +def extract_cluster( + elem: ET.Element, + ns: dict[str, str], + uniref_id: str, +) -> tuple[str, str]: + """ + Extract a deterministic CDM cluster_id and the UniRef cluster name. + """ + cluster_id = cdm_entity_id(value=uniref_id) or f"CDM:{uuid.uuid4()}" name_elem = elem.find("ns:name", ns) - name = name_elem.text if name_elem is not None else "UNKNOWN" - return cluster_id, name + name = get_text(elem=name_elem, default="UNKNOWN") or "UNKNOWN" + return cluster_id, name -# Returns tuple of (updated_time, created_time) -def get_timestamps(uniref_id, existing_created, now=None): - now_dt = now or datetime.now() - formatted_now = now_dt.strftime("%Y-%m-%dT%H:%M:%S") - created = existing_created.get(uniref_id) - created_time = (created.split(".")[0] if "." in created else created) if created else formatted_now - return formatted_now, created_time +def get_accession_and_seed(dbref: ET.Element | None, ns: dict[str, str]) -> tuple[str | None, bool]: + """ + Extract UniProtKB accession and is_seed status from a dbReference element. + """ -# Extract UniProtKB accession and is_seed status from a dbReference element -def get_accession_and_seed(dbref, ns): if dbref is None: return None, False - prop_elems = dbref.findall("ns:property", ns) - props = {} - for prop in prop_elems: - t = prop.attrib["type"] - v = prop.attrib["value"] - props[t] = v + props = parse_properties(dbref, ns) + + raw_acc = props.get("UniProtKB accession") + if isinstance(raw_acc, list): + accession = raw_acc[0] if raw_acc else None + else: + accession = raw_acc # string or None + + raw_seed = props.get("isSeed") + if isinstance(raw_seed, list): + is_seed = bool(raw_seed) and raw_seed[0].lower() == "true" + else: + is_seed = raw_seed is not None and raw_seed.lower() == "true" - acc = props.get("UniProtKB accession") or dbref.attrib.get("id") - is_seed = props.get("isSeed", "false").lower() == "true" - return acc, is_seed + return accession, is_seed -# Add both representative and other cluster members into cluster_member_data list -def add_cluster_members(cluster_id, repr_db, elem, cluster_member_data, ns) -> None: - dbrefs = [] +def add_cluster_members( + cluster_id: str, + repr_db: ET.Element | None, + elem: ET.Element, + cluster_member_rows: list[tuple[str, str, str, str, str]], + ns: dict[str, str], +) -> None: + """Populate cluster_member_rows with representative, member records.""" + dbrefs: list[tuple[ET.Element, bool]] = [] if repr_db is not None: dbrefs.append((repr_db, True)) for mem in elem.findall("ns:member/ns:dbReference", ns): dbrefs.append((mem, False)) for dbref, is_representative in dbrefs: - acc, is_seed = get_accession_and_seed(dbref, ns) - if acc: - member_entity_id = cdm_entity_id(acc) - cluster_member_data.append( - (cluster_id, member_entity_id, str(is_representative).lower(), str(is_seed).lower(), "1.0") + accession, is_seed = get_accession_and_seed(dbref, ns) + if not accession: + continue + + member_entity_id = cdm_entity_id(accession) + if not member_entity_id: + continue + + cluster_member_rows.append( + ( + cluster_id, + member_entity_id, + str(is_representative).lower(), + str(is_seed).lower(), + "1.0", # score placeholder ) + ) -# Extract cross-references (UniRef90/50/UniParc) from a dbReference element def extract_cross_refs(dbref, cross_reference_data, ns) -> None: if dbref is None: return @@ -153,83 +253,95 @@ def extract_cross_refs(dbref, cross_reference_data, ns) -> None: cross_reference_data.append((entity_id, i, props[i])) -##### -------------- Parse Uniref XML --------------- ##### +def parse_uniref_entry( + elem: ET.Element, existing_created: dict[str, str], ns: dict[str, str] +) -> dict[str, list[tuple]]: + """ + Parse a single UniRef element into CDM-friendly row tuples. + """ + cluster_rows: list[tuple[str, str, str, str | None, str]] = [] + entity_rows: list[tuple[str, str, str, str, str, str]] = [] + member_rows: list[tuple[str, str, str, str, str]] = [] + xref_rows: list[tuple[str, str, str]] = [] + + uniref_id = elem.attrib.get("id") or "" + + cluster_id, name = extract_cluster(elem, ns, uniref_id) + updated_time, created_time = get_timestamps(uniref_id, existing_created) + + # Cluster table + cluster_rows.append( + ( + cluster_id, + name, + "protein", + None, + DATA_SOURCE, + ) + ) + # Entity table + entity_rows.append( + ( + cluster_id, + uniref_id, + "Cluster", + DATA_SOURCE, + updated_time, + created_time, + ) + ) -def parse_uniref_xml(local_gz, batch_size, existing_created): - """ - Parse UniRef XML (gzipped) and extract cluster, entity, cluster member, UniProtKB member, and cross-reference info. + # Cross references from representative and members + repr_db = elem.find("ns:representativeMember/ns:dbReference", ns) + extract_cross_refs(repr_db, xref_rows, ns) + + for mem in elem.findall("ns:member/ns:dbReference", ns): + extract_cross_refs(mem, xref_rows, ns) - Args: - local_gz (str): Local gzipped UniRef XML path. - batch_size (int): Maximum number of entries to parse. - existing_created (dict): Mapping from UniRef cluster ID to 'created' timestamp for idempotent imports. + # Cluster members (representative + members) + add_cluster_members(cluster_id, repr_db, elem, member_rows, ns) - Returns: - dict: Dictionary with lists for each CDM table + return { + "cluster_data": cluster_rows, + "entity_data": entity_rows, + "cluster_member_data": member_rows, + "cross_reference_data": xref_rows, + } + + +##### -------------- Parse Uniref XML --------------- ##### +def parse_uniref_xml(local_gz: str, batch_size: int, existing_created: dict[str, str]) -> dict[str, list[tuple]]: + """ + Stream-parse UniRef XML (gzipped) and extract CDM-like row tuples. """ - ns = {"ns": "http://uniprot.org/uniref"} # Namespace for XML parsing + ns = UNIREF_NS entry_count = 0 - # Initialize lists to collect parsed rows for different tables - cluster_data = [] - entity_data = [] - cluster_member_data = [] - cross_reference_data = [] + cluster_data: list[tuple] = [] + entity_data: list[tuple] = [] + cluster_member_data: list[tuple] = [] + cross_reference_data: list[tuple] = [] with gzip.open(local_gz, "rb") as f: - # Stream parse the XML to avoid memory issues with big files context = ET.iterparse(f, events=("end",)) for _, elem in context: - if elem.tag.endswith("entry"): - # Cluster basic info - cluster_id, name = extract_cluster(elem, ns) - - # Get UniRef cluster id and timestamps - uniref_id = elem.attrib.get("id") - updated_time, created_time = get_timestamps(uniref_id, existing_created) - - # Populate Cluster and Entity table data - cluster_data.append( - ( - cluster_id, # cluster_id - name, # cluster name - "protein", # entity_type (fixed value) - None, # description (not present) - "UniRef 100", # protocol_id - ) - ) - - entity_data.append( - ( - cluster_id, # entity_id (matches cluster_id) - uniref_id, # data_source_entity_id (UniRef100_xxx) - "Cluster", # entity_type - "UniRef 100", # data_source - updated_time, # updated - created_time, # created - ) - ) - - # Extract UniProtKB member attributes and cross-references - repr_db = elem.find("ns:representativeMember/ns:dbReference", ns) - extract_cross_refs(repr_db, cross_reference_data, ns) - - for mem in elem.findall("ns:member/ns:dbReference", ns): - extract_cross_refs(mem, cross_reference_data, ns) - - # ClusterMember table (representative + members) - add_cluster_members(cluster_id, repr_db, elem, cluster_member_data, ns) - - # Batch size limit - entry_count += 1 - if entry_count >= batch_size: - break - - # Release element to save memory - elem.clear() - - print(f"Parsed {entry_count} clusters") + if not elem.tag.endswith("entry"): + continue + + parsed = parse_uniref_entry(elem, existing_created, ns) + cluster_data.extend(parsed["cluster_data"]) + entity_data.extend(parsed["entity_data"]) + cluster_member_data.extend(parsed["cluster_member_data"]) + cross_reference_data.extend(parsed["cross_reference_data"]) + + entry_count += 1 + if entry_count >= batch_size: + break + + elem.clear() + + logger.info(f"Parsed {entry_count} clusters") return { "cluster_data": cluster_data, "entity_data": entity_data, @@ -238,10 +350,8 @@ def parse_uniref_xml(local_gz, batch_size, existing_created): } -##### -------------- Save dalta table and print the preview --------------- ##### - - -def save_delta_tables(spark, output_dir, data_dict) -> None: +##### -------------- Save delta table and print the preview --------------- ##### +def save_delta_tables(spark, output_dir, data_dict): # Cluster cluster_schema = StructType( [ @@ -255,7 +365,7 @@ def save_delta_tables(spark, output_dir, data_dict) -> None: cluster_df = spark.createDataFrame(data_dict["cluster_data"], cluster_schema) cluster_df.write.format("delta").mode("overwrite").save(os.path.join(output_dir, "Cluster")) - print(f"Cluster Delta table written to: {os.path.join(output_dir, 'Cluster')}") + logger.info(f"Cluster Delta table written to: {os.path.join(output_dir, 'Cluster')}") # Entity entity_schema = StructType( @@ -272,7 +382,7 @@ def save_delta_tables(spark, output_dir, data_dict) -> None: entity_df = spark.createDataFrame(data_dict["entity_data"], entity_schema) entity_table_path = os.path.join(output_dir, "Entity") entity_df.write.format("delta").mode("overwrite").save(entity_table_path) - print(f"Entity Delta table written to: {entity_table_path}") + logger.info(f"Entity Delta table written to: {entity_table_path}") # ClusterMember cluster_member_schema = StructType( @@ -288,7 +398,7 @@ def save_delta_tables(spark, output_dir, data_dict) -> None: cluster_member_df = spark.createDataFrame(data_dict["cluster_member_data"], cluster_member_schema) cluster_member_path = os.path.join(output_dir, "ClusterMember") cluster_member_df.write.format("delta").mode("overwrite").save(cluster_member_path) - print(f"ClusterMember Delta table written to: {cluster_member_path}") + logger.info(f"ClusterMember Delta table written to: {cluster_member_path}") # CrossReference cross_reference_schema = StructType( @@ -302,22 +412,22 @@ def save_delta_tables(spark, output_dir, data_dict) -> None: cross_reference_df = spark.createDataFrame(data_dict["cross_reference_data"], cross_reference_schema) cross_reference_path = os.path.join(output_dir, "CrossReference") cross_reference_df.write.format("delta").mode("overwrite").save(cross_reference_path) - print(f"CrossReference Delta table written to: {cross_reference_path}") + logger.info(f"CrossReference Delta table written to: {cross_reference_path}") # Previews - print("Sample Clusters:") + logger.info("Sample Clusters:") cluster_df.createOrReplaceTempView("Cluster") spark.sql("SELECT * FROM Cluster LIMIT 20").show(truncate=False) - print("Sample Entities:") + logger.info("Sample Entities:") entity_df.createOrReplaceTempView("Entity") spark.sql("SELECT * FROM Entity LIMIT 20").show(truncate=False) - print("Sample ClusterMembers:") + logger.info("Sample ClusterMembers:") cluster_member_df.createOrReplaceTempView("ClusterMember") spark.sql("SELECT * FROM ClusterMember LIMIT 20").show(truncate=False) - print("Sample CrossReferences:") + logger.info("Sample CrossReferences:") cross_reference_df.createOrReplaceTempView("CrossReference") spark.sql("SELECT * FROM CrossReference LIMIT 20").show(truncate=False) @@ -327,17 +437,27 @@ def build_spark_session(): builder = ( SparkSession.builder.appName("UniRef Cluster Extractor") .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") - .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") + .config( + "spark.sql.catalog.spark_catalog", + "org.apache.spark.sql.delta.catalog.DeltaCatalog", + ) ) return configure_spark_with_delta_pip(builder).getOrCreate() -# Click command-line interface for parameter parsing @click.command() @click.option("--ftp-url", required=True, help="FTP URL to UniRef100 XML file") @click.option("--output-dir", required=True, help="Output directory for Delta table") @click.option("--batch-size", default=1000, help="Number of UniRef entries to parse (limit)") -def main(ftp_url, output_dir, batch_size) -> None: +def main(ftp_url, output_dir, batch_size): + # set up logging in CLI context + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] (%(name)s:%(lineno)d %(message)s", + ) + + logger.info("Starting UniRef100/90/50 Import Pipeline") + # Set local path for downloaded gzipped XML file local_gz = os.path.join("/tmp", os.path.basename(ftp_url)) @@ -345,23 +465,31 @@ def main(ftp_url, output_dir, batch_size) -> None: try: download_file(ftp_url, local_gz) except URLError as e: - print(f"Error! Cannot download file: {e.reason}") + logger.error(f"Error! Cannot download file: {e.reason}") return # Start Spark session with Delta Lake support + logger.info("Building Spark session:") spark = build_spark_session() # Load existing entity creation timestamps - entity_table_path = os.path.join(output_dir, "Entity") - existing_created = load_existing_created(spark, entity_table_path) + try: + entity_table_path = os.path.join(output_dir, "Entity") + existing_created = load_existing_created(spark, entity_table_path) + + # Parse the UniRef XML and extract all CDM table data + logger.info("Parsing UniRef XML:") + data_dict = parse_uniref_xml(local_gz, batch_size, existing_created) - # Parse the UniRef XML and extract all CDM table data - data_dict = parse_uniref_xml(local_gz, batch_size, existing_created) + # Write parsed data to Delta tables in output directory + logger.info("Saving Delta tables:") + save_delta_tables(spark, output_dir, data_dict) - # Write parsed data to Delta tables in output directory - save_delta_tables(spark, output_dir, data_dict) + logger.info("UniRef100/90/50 Import Pipeline completed successfully.") - spark.stop() + finally: + spark.stop() + logger.info("Spark session stopped.") if __name__ == "__main__": diff --git a/src/cdm_data_loader_utils/parsers/xml_utils.py b/src/cdm_data_loader_utils/parsers/xml_utils.py new file mode 100644 index 0000000..d916799 --- /dev/null +++ b/src/cdm_data_loader_utils/parsers/xml_utils.py @@ -0,0 +1,124 @@ +""" +Shared XML helper utilities used by UniProt and UniRef parsers. + +This module centralizes common operations: +- Safe text extraction +- Safe attribute extraction +- Property parsing +- Evidence / dbReference parsing +- Cleaning dictionaries +- Deduplicating lists +""" + +import xml.etree.ElementTree as ET +from typing import Any + +# ============================================================ +# Basic Safe Accessors +# ============================================================ + + +def get_text(elem: ET.Element | None, default: str | None = None) -> str | None: + """Return elem.text if exists and non-empty.""" + if elem is None: + return default + if elem.text is None: + return default + text = elem.text.strip() + return text if text else default + + +def get_attr(elem: ET.Element | None, name: str, default: str | None = None) -> str | None: + """Return elem.get(name) safely.""" + if elem is None: + return default + val = elem.get(name) + return val.strip() if isinstance(val, str) else default + + +# ============================================================ +# List / Node Finders +# ============================================================ + + +def find_one(elem: ET.Element, xpath: str, ns: dict[str, str]): + """Return first element matching xpath or None.""" + results = elem.findall(xpath, ns) + return results[0] if results else None + + +def find_all_text(elem: ET.Element, xpath: str, ns: dict[str, str]) -> list[str]: + """Return list of text values from xpath matches (deduped).""" + texts = [] + for node in elem.findall(xpath, ns): + txt = get_text(node) + if txt: + texts.append(txt) + return list(dict.fromkeys(texts)) # preserve order, dedupe + + +def safe_list(x) -> list[Any]: + """Convert None → [].""" + if x is None: + return [] + if isinstance(x, list): + return x + return [x] + + +# ============================================================ +# dbReference / property parsing (shared by UniProt + UniRef) +# ============================================================ + + +def parse_properties(dbref: ET.Element | None, ns: dict[str, str]) -> dict[str, list[str]]: + """ + Extract key/value pairs from blocks. + """ + if dbref is None: + return {} + props = {} + for prop in dbref.findall("ns:property", ns): + ptype = prop.attrib.get("type") + pval = prop.attrib.get("value") + if ptype and pval: + if ptype not in props: + props[ptype] = [] + props[ptype].append(pval) + return props + + +def parse_db_references(elem: ET.Element, ns: dict[str, str], pub_types=("PubMed", "DOI")): + """ + Generic dbReference parser: + - Identify publication IDs (PubMed, DOI) + - Identify other cross-references (dbType:dbId) + """ + publications = [] + others = [] + + for dbref in elem.findall("ns:dbReference", ns): + db_type = dbref.get("type") + db_id = dbref.get("id") + + if not db_type or not db_id: + continue + + if db_type in pub_types: + publications.append(f"{db_type.upper()}:{db_id}") + else: + others.append(f"{db_type}:{db_id}") + + return publications, others + + +# ============================================================ +# Dict Cleaning +# ============================================================ + + +def clean_dict(d: dict[str, Any]) -> dict[str, Any]: + """ + Remove keys whose value is None or empty list. + """ + return {k: v for k, v in d.items() if v not in (None, [], {})} diff --git a/tests/parsers/test_shared_identifiers.py b/tests/parsers/test_shared_identifiers.py new file mode 100644 index 0000000..b76e9af --- /dev/null +++ b/tests/parsers/test_shared_identifiers.py @@ -0,0 +1,34 @@ +import xml.etree.ElementTree as ET + +from cdm_data_loader_utils.parsers.shared_identifiers import parse_identifiers_generic + + +def test_parse_identifiers_generic_basic() -> None: + # + # P12345 + # Q99999 + # + ns = {"ns": "dummy"} + entry = ET.Element("entry") + + a1 = ET.SubElement(entry, "accession") + a1.text = "P12345" + a2 = ET.SubElement(entry, "accession") + a2.text = "Q99999" + + # Add namespace prefix to match xpath + a1.tag = "{dummy}accession" + a2.tag = "{dummy}accession" + + rows = parse_identifiers_generic( + entry=entry, + xpath="ns:accession", + prefix="UniProt", + ns=ns, + ) + + assert len(rows) == 2 + assert rows[0]["identifier"] == "UniProt:P12345" + assert rows[1]["identifier"] == "UniProt:Q99999" + assert rows[0]["source"] == "UniProt" + assert rows[0]["description"] == "UniProt accession" diff --git a/tests/parsers/test_uniprot.py b/tests/parsers/test_uniprot.py index 105fb76..86ffca3 100644 --- a/tests/parsers/test_uniprot.py +++ b/tests/parsers/test_uniprot.py @@ -1,4 +1,4 @@ -"""Tests for the UniProt parser. +""" This file uses pytest to provide parameterized and functional tests for all major UniProt parsing utility functions, ensuring correct parsing and transformation of @@ -16,756 +16,729 @@ - parse_uniprot_entry: Full record parsing, all fields together How to run in the terminal: - PYTHONPATH=src pytest tests/test_uniprot.py + pytest tests/uniprot_refactor/test_uniprot_parsers.py """ import datetime -import re +import json import xml.etree.ElementTree as ET -from typing import Any +from pathlib import Path import pytest from cdm_data_loader_utils.parsers.uniprot import ( build_datasource_record, - generate_cdm_id, parse_associations, + parse_cross_references, parse_evidence_map, parse_identifiers, parse_names, parse_protein_info, - parse_publications, - parse_uniprot_entry, + save_datasource_record, ) -# Regular expression to validate UUID format -UUID_PATTERN = re.compile(r"^[a-f0-9]{8}-[a-f0-9]{4}-[1-5][a-f0-9]{3}-[89ab][a-f0-9]{3}-[a-f0-9]{12}$", re.IGNORECASE) +NS_URI = "https://uniprot.org/uniprot" + +@pytest.fixture( + params=[ + "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz", + "http://example.org/uniprot_test.xml.gz", + ] +) +def xml_url(request): + return request.param -@pytest.mark.parametrize("n", range(5)) -def test_generate_cdm_id_format(n: int) -> None: - uuid = generate_cdm_id() - assert uuid.startswith("CDM:") - uuid_str = uuid[4:] - assert UUID_PATTERN.match(uuid_str), f"{uuid_str} is not a valid UUID" +def test_build_datasource_record(xml_url): + record = build_datasource_record(xml_url) -## build_datasource_record ## -def test_build_datasource_record() -> None: - url = "https://example.com/uniprot.xml.gz" - record = build_datasource_record(url) + # ---- basic structure ---- assert isinstance(record, dict) - assert set(record.keys()) == {"name", "source", "url", "accessed", "version"} + + # ---- fixed fields ---- assert record["name"] == "UniProt import" assert record["source"] == "UniProt" - assert record["url"] == url - - # check accessed - accessed_dt = datetime.datetime.fromisoformat(record["accessed"]) - now = datetime.datetime.now(datetime.UTC) - delta = abs((now - accessed_dt).total_seconds()) - assert delta < 10 + assert record["url"] == xml_url assert record["version"] == 115 + # ---- accessed field ---- + accessed = record.get("accessed") + assert accessed is not None -@pytest.mark.parametrize("bad_url", [None, ""]) -def test_build_datasource_record_bad(bad_url: str | None) -> None: - record = build_datasource_record(bad_url) - assert record["url"] == bad_url + parsed = datetime.datetime.fromisoformat(accessed) + assert parsed.tzinfo is not None + assert parsed.tzinfo == datetime.UTC -## parse_identifiers function test ## -@pytest.mark.parametrize( - ("xml_str", "cdm_id", "expected"), - [ - ### multiple accessions, expect two dict, every dic use the same cdm_id - ### identifier according to number - ( - """ - - Q9V2L2 - G8ZFP4 - - """, - "CDM:001", - [ - { - "entity_id": "CDM:001", - "identifier": "UniProt:Q9V2L2", - "source": "UniProt", - "description": "UniProt accession", - }, - { - "entity_id": "CDM:001", - "identifier": "UniProt:G8ZFP4", - "source": "UniProt", - "description": "UniProt accession", - }, - ], - ), - ### Use single accession - ( - """ - - X00001 - - """, - "CDM:002", - [ - { - "entity_id": "CDM:002", - "identifier": "UniProt:X00001", - "source": "UniProt", - "description": "UniProt accession", - } - ], - ), - ### No accession - ( - """ - - - """, - "CDM:003", - [], - ), - ], -) -def test_parse_identifiers(xml_str: str, cdm_id: str, expected: list[dict[str, str]]) -> None: +def test_save_datasource_record(tmp_path: Path, xml_url): + """ + save_datasource_record should: + - create output directory if missing + - write datasource.json + - return the same content that is written to disk """ - This approach ensures that parse_identifiers correctly parses and structures identifier data. + output_dir = tmp_path / "output" - The parsed Element object and the provided CDM_id are passed to the parse_identifiers funtion. - The function is expected to extract all relevant identifier information from the XML and return list of dict. + # ---- call function ---- + result = save_datasource_record(xml_url, str(output_dir)) - The test compares the result output with the predefined expected result using an assert statement. + # ---- return value sanity ---- + assert isinstance(result, dict) + assert result["url"] == xml_url + assert result["source"] == "UniProt" + assert result["name"] == "UniProt import" + assert "accessed" in result + assert "version" in result - """ - entry = ET.fromstring(xml_str) - result = parse_identifiers(entry, cdm_id) - assert result == expected + # ---- file existence ---- + output_file = output_dir / "datasource.json" + assert output_file.exists() + assert output_file.is_file() + # ---- file content correctness ---- + with open(output_file, encoding="utf-8") as f: + on_disk = json.load(f) -""" - This parameterized pytest function tests the correctness of the parse_names function for various UniProt XML entry scenarios. + assert on_disk == result - XML string representing a UniProt entry with different protein names: - top-level - recommended names, - alternative names, - combinations, - no names - cdm_id: CDM entry ID +def make_entry(names=None, protein_names=None): + entry = ET.Element(f"{{{NS_URI}}}entry") - Output: - A list of name records with their metadata + # + for n in names or []: + e = ET.SubElement(entry, f"{{{NS_URI}}}name") + e.text = n -""" + # block + if protein_names: + protein = ET.SubElement(entry, f"{{{NS_URI}}}protein") + + for tag, logical in [ + ("recommendedName", "recommended"), + ("alternativeName", "alternative"), + ]: + if logical not in protein_names: + continue + + block = ET.SubElement(protein, f"{{{NS_URI}}}{tag}") + for xml_tag in ["fullName", "shortName"]: + val = protein_names[logical].get(xml_tag.replace("Name", "")) + if val: + e = ET.SubElement(block, f"{{{NS_URI}}}{xml_tag}") + e.text = val + + return entry -## parse_names function test ## @pytest.mark.parametrize( - ("xml_str", "cdm_id", "expected"), + "entry_kwargs, cdm_id, expected", [ - # Only top-level + # Only ( - """ - MainProteinName - """, - "CDM:001", - [ - { - "entity_id": "CDM:001", - "name": "MainProteinName", - "description": "UniProt protein name", - "source": "UniProt", - } - ], - ), - # RecommendedName (fullName and shortName) - ( - """ - - - RecFullName - RecShort - - - """, - "CDM:002", - [ - { - "entity_id": "CDM:002", - "name": "RecFullName", - "description": "UniProt recommended full name", - "source": "UniProt", - }, - { - "entity_id": "CDM:002", - "name": "RecShort", - "description": "UniProt recommended short name", - "source": "UniProt", - }, - ], + {"names": ["ProteinA"]}, + "cdm_1", + { + ("ProteinA", "UniProt entry name"), + }, ), - # AlternativeName (fullName and shortName) + # entry name + recommended full name ( - """ - - - AltFullName1 - AltShort1 - - - AltFullName2 - - - """, - "CDM:003", - [ - { - "entity_id": "CDM:003", - "name": "AltFullName1", - "description": "UniProt alternative full name", - "source": "UniProt", - }, - { - "entity_id": "CDM:003", - "name": "AltShort1", - "description": "UniProt alternative short name", - "source": "UniProt", - }, - { - "entity_id": "CDM:003", - "name": "AltFullName2", - "description": "UniProt alternative full name", - "source": "UniProt", + { + "names": ["ProteinB"], + "protein_names": { + "recommended": {"full": "Rec Full B", "short": None}, }, - ], + }, + "cdm_2", + { + ("ProteinB", "UniProt entry name"), + ("Rec Full B", "UniProt recommended full name"), + }, ), - # Mixed: top-level and + # everything ( - """ - TopLevel - - - MixedFull - - - """, - "CDM:004", - [ - { - "entity_id": "CDM:004", - "name": "TopLevel", - "description": "UniProt protein name", - "source": "UniProt", - }, - { - "entity_id": "CDM:004", - "name": "MixedFull", - "description": "UniProt recommended full name", - "source": "UniProt", + { + "names": ["ProteinC"], + "protein_names": { + "recommended": {"full": "Rec Full C", "short": "Rec Short C"}, + "alternative": {"full": "Alt Full C", "short": "Alt Short C"}, }, - ], - ), - # No names at all - ( - """ - """, - "CDM:005", - [], + }, + "cdm_3", + { + ("ProteinC", "UniProt entry name"), + ("Rec Full C", "UniProt recommended full name"), + ("Rec Short C", "UniProt recommended short name"), + ("Alt Full C", "UniProt alternative full name"), + ("Alt Short C", "UniProt alternative short name"), + }, ), ], ) -def test_parse_names(xml_str: str, cdm_id: str, expected: list[dict[str, str]]) -> None: - entry = ET.fromstring(xml_str) - result = parse_names(entry, cdm_id) - assert result == expected +def test_parse_names_parametrized(entry_kwargs, cdm_id, expected): + entry = make_entry(**entry_kwargs) + rows = parse_names(entry, cdm_id) -""" - - This test ensures parse_protein_info works correctly for different combinations of data - Including cases with no protein info, sequence only, existence only or EC numbers - - This approach thoroughly validates that parse_protein_info can accurately extract, combine and structure metadata field. + # ---- row count ---- + assert len(rows) == len(expected) - Include: - EC Number, - existence evidence, - sequence + # ---- content ---- + observed = {(r["name"], r["description"]) for r in rows} + assert observed == expected -""" + # ---- entity_id and source ---- + for r in rows: + assert r["entity_id"] == cdm_id + assert r["source"] == "UniProt" -## parse_protein_info function test ## @pytest.mark.parametrize( - ("xml_str", "cdm_id", "expected"), + "build_entry, cdm_id, expected", [ - # There are multiple ecNumbers under the recommend names - ( - """ - - - 1.2.3.4 - 5.6.7.8 - - - """, - "CDM:001", - {"ec_numbers": ["1.2.3.4", "5.6.7.8"]}, - ), - # alternativeName has EC Number + # -------------------------------------------------- + # Empty entry -> None + # -------------------------------------------------- ( - """ - - - 3.3.3.3 - - - """, - "CDM:002", - {"ec_numbers": ["3.3.3.3"]}, + lambda: ET.Element(f"{{{NS_URI}}}entry"), + "cdm_1", + None, ), - # If have both proteinExistence evidence and existence + # -------------------------------------------------- + # Only EC numbers + # -------------------------------------------------- ( - """ - - """, - "CDM:003", + lambda: ( + lambda entry: ( + ET.SubElement( + ET.SubElement( + ET.SubElement(entry, f"{{{NS_URI}}}protein"), + f"{{{NS_URI}}}recommendedName", + ), + f"{{{NS_URI}}}ecNumber", + ).__setattr__("text", "1.1.1.1"), + entry, + )[1] + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_2", { - "protein_id": "CDM:003", - "evidence_for_existence": "evidence at protein level", + "ec_numbers": "1.1.1.1", }, ), - # Sequence only + # -------------------------------------------------- + # Only sequence + entry modified + # -------------------------------------------------- ( - """ - - MAGNLSKVAAVSGVAAAVLGK - - """, - "CDM:004", + lambda: ( + lambda entry: ( + entry.set("modified", "2024-01-01"), + ET.SubElement( + entry, + f"{{{NS_URI}}}sequence", + { + "length": "100", + "mass": "12345", + "checksum": "ABC", + "version": "2", + }, + ).__setattr__("text", "MKTIIALSY"), + entry, + )[2] + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_3", { - "length": "357", + "length": "100", "mass": "12345", - "checksum": "ABCD", - "modified": "2024-05-21", + "checksum": "ABC", "sequence_version": "2", - "sequence": "MAGNLSKVAAVSGVAAAVLGK", + "sequence": "MKTIIALSY", + "entry_modified": "2024-01-01", }, ), - # Combine with three elements: proteinExistence, sequence and ecNumbers + # -------------------------------------------------- + # Everything + # -------------------------------------------------- ( - """ - - - 3.3.3.3 - - - 8.8.8.8 - - - - - MKTLLTGAAT - - """, - "CDM:005", + lambda: ( + lambda entry: ( + entry.set("modified", "2024-02-02"), + # protein + EC + ET.SubElement( + ET.SubElement( + ET.SubElement(entry, f"{{{NS_URI}}}protein"), + f"{{{NS_URI}}}recommendedName", + ), + f"{{{NS_URI}}}ecNumber", + ).__setattr__("text", "3.5.4.4"), + # proteinExistence + ET.SubElement( + entry, + f"{{{NS_URI}}}proteinExistence", + {"type": "evidence at protein level"}, + ), + # sequence + ET.SubElement( + entry, + f"{{{NS_URI}}}sequence", + { + "length": "250", + "mass": "99999", + "checksum": "XYZ", + "modified": "2023-12-01", + "version": "1", + }, + ).__setattr__("text", "MADEUPSEQUENCE"), + entry, + )[4] + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_4", { - "ec_numbers": ["3.3.3.3", "8.8.8.8"], - "protein_id": "CDM:005", - "evidence_for_existence": "evidence at transcript level", - "length": "10", - "mass": "1000", + "ec_numbers": "3.5.4.4", + "protein_id": "cdm_4", + "evidence_for_existence": "evidence at protein level", + "length": "250", + "mass": "99999", "checksum": "XYZ", - "modified": "2021-12-01", + "modified": "2023-12-01", "sequence_version": "1", - "sequence": "MKTLLTGAAT", + "sequence": "MADEUPSEQUENCE", + "entry_modified": "2024-02-02", }, ), - # return None - ("""""", "CDM:006", None), ], ) -def test_parse_protein_info(xml_str: str, cdm_id: str, expected: dict[str, Any]) -> None: - entry = ET.fromstring(xml_str) - result = parse_protein_info(entry, cdm_id) - assert result == expected - +def test_parse_protein_info(build_entry, cdm_id, expected): + entry = build_entry() -""" - - This parameterized pytest function verifies the behavior of the parse_evidence_map function - for different UniProt XML entry structures involving evidence elements. - - xml_str: Simulates a UniProt entry with various and sub-structures, - including cases with multiple evidence elements, missing sources, or no evidence at all. - - expected: A dictionary mapping evidence keys to their extracted details—such as evidence type, - supporting objects, and publication references. - - Ensure parse_evidence_map: - Accurately extract evidence keys and types - Correctly classify supporting objects and publication references - Handle entries with absent sources or evidence elements - Represent all relevant evidence metadata in the required structure + result = parse_protein_info(entry, cdm_id) -""" + if expected is None: + assert result is None + else: + assert isinstance(result, dict) + assert result == expected -## parse_evidence_map function test ## @pytest.mark.parametrize( - ("xml_str", "expected"), + "build_xml, expected", [ - # Single evidence,include PubMed and supporting object + # -------------------------------------------------- + # No evidence elements + # -------------------------------------------------- ( - """ - - - - - - - """, - { - "1": { - "evidence_type": "ECO:0000255", - "supporting_objects": ["Ensembl:ENSG00001"], - "publications": ["PMID:123456"], - } - }, + lambda: ET.Element(f"{{{NS_URI}}}entry"), + {}, ), - # multiple evidences + # -------------------------------------------------- + # Evidence without key + # -------------------------------------------------- ( - """ - - - - - - - - - - - """, - { - "E1": { - "evidence_type": "ECO:0000313", - "supporting_objects": None, - "publications": ["PMID:654321"], - }, - "E2": { - "evidence_type": "ECO:0000250", - "supporting_objects": ["PDB:2N7Q"], - "publications": None, - }, - }, + lambda: ( + lambda entry: ( + ET.SubElement(entry, f"{{{NS_URI}}}evidence", {"type": "ECO:0000269"}), + entry, + )[1] + )(ET.Element(f"{{{NS_URI}}}entry")), + {}, ), - # no source + # -------------------------------------------------- + # Evidence with key, no source + # -------------------------------------------------- ( - """ - - """, + lambda: ( + lambda entry: ( + ET.SubElement( + entry, + f"{{{NS_URI}}}evidence", + {"key": "1", "type": "ECO:0000313"}, + ), + entry, + )[1] + )(ET.Element(f"{{{NS_URI}}}entry")), { - "X1": { - "evidence_type": "ECO:9999999", - "supporting_objects": None, - "publications": None, + "1": { + "evidence_type": "ECO:0000313", } }, ), - # no evidence - ( - """ - """, - {}, - ), - # one evidence with multiple supporting objects + # -------------------------------------------------- + # Evidence with PUBMED with other refs + # -------------------------------------------------- ( - """ - - - - - - - """, + lambda: ( + lambda entry: ( + lambda ev: ( + ET.SubElement( + ET.SubElement(ev, f"{{{NS_URI}}}source"), + f"{{{NS_URI}}}dbReference", + {"type": "PubMed", "id": "12345"}, + ), + ET.SubElement( + ET.SubElement(ev, f"{{{NS_URI}}}source"), + f"{{{NS_URI}}}dbReference", + {"type": "GO", "id": "GO:0008150"}, + ), + entry, + )[2] + )( + ET.SubElement( + entry, + f"{{{NS_URI}}}evidence", + {"key": "E2", "type": "ECO:0000269"}, + ) + ) + )(ET.Element(f"{{{NS_URI}}}entry")), { - "K1": { + "E2": { "evidence_type": "ECO:0000269", - "supporting_objects": ["Ensembl:ENS1", "RefSeq:RS123"], - "publications": None, + "publications": ["PMID:12345"], } }, ), ], ) -def test_parse_evidence_map(xml_str: str, expected: dict[str, Any]) -> None: - entry = ET.fromstring(xml_str) +def test_parse_evidence_map_parametrized(build_xml, expected): + entry = build_xml() result = parse_evidence_map(entry) - assert result == expected - -""" - - xml_strings: models a UniProt entry with different types of possible associations - cdm_id: uniquely identifies the protein being parsed - evidence_map: supplies external evidence metadata for associations - expected: list of association dictionaries - - Arg: - The function correctly links proteins to organism taxonomy. - Cross-references are properly included, evidence metadata is correctly merged. - Associations derived from catalytic activity and cofactor comments are correctly generated. - All combinations and edge cases are handled robustly. - -""" + assert isinstance(result, dict) + assert result == expected -## parse_associations function test ## @pytest.mark.parametrize( - ("xml_str", "cdm_id", "evidence_map", "expected"), + "build_xml, cdm_id, evidence_map, expected", [ - # organism association(NCBI Taxonomy dbReference) + # -------------------------------------------------- + # Taxonomy association only + # -------------------------------------------------- ( - """ - - - - """, - "CDM:1", + lambda: ( + lambda entry: ( + ET.SubElement( + ET.SubElement(entry, f"{{{NS_URI}}}organism"), + f"{{{NS_URI}}}dbReference", + {"type": "NCBI Taxonomy", "id": "1234"}, + ), + entry, + )[1] + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_1", {}, - [{"subject": "CDM:1", "object": "NCBITaxon:9606"}], - ), - # dbReference with evidence key - ( - """ - - """, - "CDM:2", - { - "E1": { - "evidence_type": "ECO:0000250", - "supporting_objects": ["Ensembl:ENS1"], - "publications": ["PMID:1234"], - } - }, [ { - "subject": "CDM:2", - "object": "PDB:2N7Q", - "evidence_type": "ECO:0000250", - "supporting_objects": ["Ensembl:ENS1"], - "publications": ["PMID:1234"], + "subject": "cdm_1", + "object": "NCBITaxon:1234", + "predicate": "in_taxon", } ], ), - # comment catalytic activity (reaction) with evidence key + # -------------------------------------------------- + # Catalytic activity with evidence + # -------------------------------------------------- ( - """ - - - - - - """, - "CDM:3", + lambda: ( + lambda entry: ( + lambda comment: ( + lambda reaction: ( + ET.SubElement( + reaction, + f"{{{NS_URI}}}dbReference", + {"type": "Rhea", "id": "RHEA:12345"}, + ), + entry, + )[1] + )( + ET.SubElement( + comment, + f"{{{NS_URI}}}reaction", + {"evidence": "E1"}, + ) + ) + )( + ET.SubElement( + entry, + f"{{{NS_URI}}}comment", + {"type": "catalytic activity"}, + ) + ) + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_2", { - "E2": { - "evidence_type": "ECO:0000313", - "publications": ["PMID:2222"], + "E1": { + "evidence_type": "ECO:0000269", + "publications": ["PMID:12345"], } }, [ { - "subject": "CDM:3", + "subject": "cdm_2", "predicate": "catalyzes", - "object": "Rhea:12345", - "evidence_type": "ECO:0000313", - "publications": ["PMID:2222"], + "object": "Rhea:RHEA:12345", + "evidence_type": "ECO:0000269", + "publications": ["PMID:12345"], } ], ), - # Comment cofactor without evidence + # -------------------------------------------------- + # Cofactor association + # -------------------------------------------------- ( - """ - - - - - - """, - "CDM:4", + lambda: ( + lambda entry: ( + lambda comment: ( + ET.SubElement( + ET.SubElement( + comment, + f"{{{NS_URI}}}cofactor", + ), + f"{{{NS_URI}}}dbReference", + {"type": "ChEBI", "id": "CHEBI:15377"}, + ), + entry, + )[1] + )( + ET.SubElement( + entry, + f"{{{NS_URI}}}comment", + {"type": "cofactor"}, + ) + ) + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_3", {}, [ { - "subject": "CDM:4", + "subject": "cdm_3", "predicate": "requires_cofactor", "object": "ChEBI:CHEBI:15377", } ], ), - # Several relevant relationship(with organism and dbReference) - ( - """ - - - - - """, - "CDM:5", - {}, - [ - {"subject": "CDM:5", "object": "NCBITaxon:562"}, - {"subject": "CDM:5", "object": "RefSeq:NP_414543"}, - ], - ), - # if it is empty entry, return to [] - ("""""", "CDM:6", {}, []), ], ) -def test_parse_associations( - xml_str: str, cdm_id: str, evidence_map: dict[str, Any], expected: list[dict[str, str]] -) -> None: - entry = ET.fromstring(xml_str) - result = parse_associations(entry, cdm_id, evidence_map) - assert result == expected - +def test_parse_associations_parametrized(build_xml, cdm_id, evidence_map, expected): + entry = build_xml() -""" - - xml_str: Uniprot entry include , , - Refer: PubMed, DOI, GeneBank, DDBJ, EMBL - - Output: List of publication identifier - - Arg: - Extract publication of references - Recognize and format database types ( with prefixing “PMID:”, “DOI:”) - Handle entries with multiple or mixed publication types - Return an empty list if no publication data. + result = parse_associations(entry, cdm_id, evidence_map) -""" + assert isinstance(result, list) + assert result == expected -## parse_publications function test ## @pytest.mark.parametrize( - ("xml_str", "expected"), + "build_xml, cdm_id, expected", [ - # Single PubMed + # -------------------------------------------------- + # No dbReference + # -------------------------------------------------- ( - """ - - - - - - """, - ["PMID:12345"], + lambda: ET.Element(f"{{{NS_URI}}}entry"), + "cdm_1", + [], ), - # Multiple types include (PubMed, DOI, GenBank) + # -------------------------------------------------- + # dbReference with CURIE id + # -------------------------------------------------- ( - """ - - - - - - - - """, - ["PMID:55555", "DOI:10.1000/j.jmb.2020.01.001"], + lambda: ( + lambda entry: ( + ET.SubElement( + entry, + f"{{{NS_URI}}}dbReference", + {"type": "GO", "id": "GO:0008150"}, + ), + entry, + )[1] + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_2", + [ + { + "entity_id": "cdm_2", + "xref_type": "GO", + "xref_value": "GO:0008150", + "xref": "GO:0008150", + } + ], ), - # Multiple references + # -------------------------------------------------- + # dbReference without CURIE (prefix) + # -------------------------------------------------- ( - """ - - - - - - - - - - - """, - ["DOI:10.1000/jmb.123456", "PMID:98765"], + lambda: ( + lambda entry: ( + ET.SubElement( + entry, + f"{{{NS_URI}}}dbReference", + {"type": "CDD", "id": "cd04253"}, + ), + entry, + )[1] + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_3", + [ + { + "entity_id": "cdm_3", + "xref_type": "CDD", + "xref_value": "cd04253", + "xref": "CDD:cd04253", + } + ], + ), + # -------------------------------------------------- + # Mixed dbReferences + # -------------------------------------------------- + ( + lambda: ( + lambda entry: ( + ET.SubElement( + entry, + f"{{{NS_URI}}}dbReference", + {"type": "GO", "id": "GO:0003674"}, + ), + ET.SubElement( + entry, + f"{{{NS_URI}}}dbReference", + {"type": "PDB", "id": "1ABC"}, + ), + entry, + )[2] + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_4", + [ + { + "entity_id": "cdm_4", + "xref_type": "GO", + "xref_value": "GO:0003674", + "xref": "GO:0003674", + }, + { + "entity_id": "cdm_4", + "xref_type": "PDB", + "xref_value": "1ABC", + "xref": "PDB:1ABC", + }, + ], ), - # dbReference: DDBJ and EMBL + # -------------------------------------------------- + # Missing type or id + # -------------------------------------------------- ( - """ - - - - - - - """, + lambda: ( + lambda entry: ( + ET.SubElement( + entry, + f"{{{NS_URI}}}dbReference", + {"type": "GO"}, # missing id + ), + ET.SubElement( + entry, + f"{{{NS_URI}}}dbReference", + {"id": "123"}, # missing type + ), + entry, + )[2] + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_5", [], ), - # no publication - ("""""", []), ], ) -def test_parse_publications(xml_str: str, expected: list[str]) -> None: - entry = ET.fromstring(xml_str) - result = parse_publications(entry) +def test_parse_cross_references_parametrized(build_xml, cdm_id, expected): + entry = build_xml() + + result = parse_cross_references(entry, cdm_id) + + assert isinstance(result, list) assert result == expected -## parse_uniprot_entry function test ## @pytest.mark.parametrize( - ("xml_str", "datasource_name", "prev_created"), + "build_xml, cdm_id, expected", [ + # -------------------------------------------------- + # No accession + # -------------------------------------------------- ( - """ - - P12345 - ProteinX - - - ProteinX Full Name - - - - - - - - - - - - """, - "UniProt import", - None, + lambda: ET.Element(f"{{{NS_URI}}}entry"), + "cdm_1", + [], + ), + # -------------------------------------------------- + # Single accession + # -------------------------------------------------- + ( + lambda: ( + lambda entry: ( + ET.SubElement(entry, f"{{{NS_URI}}}accession").__setattr__("text", "P12345"), + entry, + )[1] + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_2", + [ + { + "entity_id": "cdm_2", + "identifier": "UniProt:P12345", + "source": "UniProt", + "description": "UniProt accession", + } + ], + ), + # -------------------------------------------------- + # Multiple accessions + # -------------------------------------------------- + ( + lambda: ( + lambda entry: ( + ET.SubElement(entry, f"{{{NS_URI}}}accession").__setattr__("text", "Q11111"), + ET.SubElement(entry, f"{{{NS_URI}}}accession").__setattr__("text", "Q22222"), + entry, + )[2] + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_3", + [ + { + "entity_id": "cdm_3", + "identifier": "UniProt:Q11111", + "source": "UniProt", + "description": "UniProt accession", + }, + { + "entity_id": "cdm_3", + "identifier": "UniProt:Q22222", + "source": "UniProt", + "description": "UniProt accession", + }, + ], + ), + # -------------------------------------------------- + # parse_identifiers_generic already sets source/description → setdefault + # -------------------------------------------------- + ( + lambda: ( + lambda entry: ( + ET.SubElement(entry, f"{{{NS_URI}}}accession").__setattr__("text", "A0A000"), + entry, + )[1] + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_4", + [ + { + "entity_id": "cdm_4", + "identifier": "UniProt:A0A000", + "source": "UniProt", # remains + "description": "UniProt accession", # remains + } + ], ), ], ) -def test_parse_uniprot_entry(xml_str: str, datasource_name: str, prev_created: None) -> None: - entry = ET.fromstring(xml_str) - cdm_id = generate_cdm_id() - - current_timestamp = "2024-07-17T13:00:00Z" - - record = parse_uniprot_entry(entry, cdm_id, current_timestamp, datasource_name, prev_created) - - entity = record["entity"] - assert entity["entity_type"] == "protein" - assert entity["data_source"] == datasource_name - assert entity["version"] == "3" - assert entity["uniprot_created"] == "2020-01-01" - assert entity["uniprot_modified"] == "2021-01-01" - assert entity["entity_id"].startswith("CDM:") - - # identifiers/names/associations/publications - assert isinstance(record["identifiers"], list) - assert isinstance(record["names"], list) - assert isinstance(record["associations"], list) - assert isinstance(record["publications"], list) +def test_parse_identifiers_parametrized(build_xml, cdm_id, expected): + entry = build_xml() + + result = parse_identifiers(entry, cdm_id) + + assert isinstance(result, list) + assert result == expected diff --git a/tests/parsers/test_uniref.py b/tests/parsers/test_uniref.py index 630949c..9ca5360 100644 --- a/tests/parsers/test_uniref.py +++ b/tests/parsers/test_uniref.py @@ -1,284 +1,318 @@ -"""Tests for the UniRef importer.""" +import os +import sys -import datetime as dt -import textwrap -import xml.etree.ElementTree as ET +sys.path.insert(0, os.path.abspath(os.path.dirname(__file__))) +import gzip +import tempfile +import xml.etree.ElementTree as ET +from datetime import datetime, timezone import pytest from cdm_data_loader_utils.parsers.uniref import ( - add_cluster_members, cdm_entity_id, + get_timestamps, extract_cluster, - extract_cross_refs, get_accession_and_seed, - get_timestamps, + add_cluster_members, + extract_cross_refs, + parse_uniref_xml, ) +NS = {"ns": "http://uniprot.org/uniref"} + +# --------------------------------------------------------- +# cdm_entity_id +# --------------------------------------------------------- @pytest.mark.parametrize( - ("accession", "expected_prefix"), - [("A0B0123456", "CDM:"), ("P01234", "CDM:"), ("", None), (None, None)], + "value, should_raise", + [ + ("A0A009HJL9", False), + ("UniRef100_A0A009HJL9", False), + ("", True), + (None, True), + ], ) -def test_cdm_entity_id(accession: str | None, expected_prefix: str | None) -> None: - """Ensure that CDM entities start with the appropriate prefix.""" - result = cdm_entity_id(accession) - if expected_prefix is None: - assert result is None +def test_cdm_entity_id(value, should_raise): + if should_raise: + with pytest.raises(ValueError): + cdm_entity_id(value) else: - assert result.startswith(expected_prefix) + out = cdm_entity_id(value) + assert isinstance(out, str) + assert out.startswith("CDM:") +# --------------------------------------------------------- +# get_timestamps +# --------------------------------------------------------- @pytest.mark.parametrize( - ("xml_str", "expected_name"), + "uniref_id, existing, now, expect_created_same_as_updated", [ ( - "" - "TestName", - "TestName", + "UniRef100_A", + {"UniRef100_A": "2024-01-01T00:00:00+00:00"}, + datetime(2025, 1, 1, 0, 0, 0, tzinfo=timezone.utc), + False, ), ( - "", - "UNKNOWN", + "UniRef100_B", + {}, + datetime(2025, 1, 1, 0, 0, 0, tzinfo=timezone.utc), + True, + ), + ( + "UniRef100_C", + {}, + None, + True, ), ], ) -def test_extract_cluster(xml_str: str, expected_name: str) -> None: - """Test cluster extraction from XML.""" - ns = {"ns": "http://uniprot.org/uniref"} - elem = ET.fromstring(xml_str) - cluster_id, name = extract_cluster(elem, ns) - assert cluster_id.startswith("CDM:") - assert isinstance(cluster_id, str) - assert name == expected_name +def test_get_timestamps(uniref_id, existing, now, expect_created_same_as_updated): + updated, created = get_timestamps(uniref_id, existing, now) + + assert isinstance(updated, str) + assert isinstance(created, str) + assert updated.endswith("+00:00") + if expect_created_same_as_updated: + assert updated == created + else: + assert updated != created + +@pytest.mark.parametrize("bad_id", ["", None]) +def test_get_timestamps_rejects_empty_uniref_id(bad_id): + with pytest.raises(ValueError): + get_timestamps(bad_id, {}, None) + + +# --------------------------------------------------------- +# add_cluster_members +# --------------------------------------------------------- @pytest.mark.parametrize( - ("uniref_id", "existing_created", "now", "expected"), + "repr_xml, member_xmls, expected_count", [ - # Has existing_created ( - "UniRef100_A", - {"UniRef100_A": "2024-01-01T00:00:00"}, - dt.datetime(2025, 1, 1, 0, 0, 0, tzinfo=dt.UTC), - ("2025-01-01T00:00:00", "2024-01-01T00:00:00"), + """ + + + + + """, + [ + """ + + + + """, + """ + + + + """, + ], + 3, ), - # There is no existing_created ( - "UniRef100_B", - {"UniRef100_A": "2024-01-01T00:00:00"}, - dt.datetime(2025, 1, 1, 0, 0, 0, tzinfo=dt.UTC), - ("2025-01-01T00:00:00", "2025-01-01T00:00:00"), + None, + [ + """ + + + + """, + ], + 1, ), - # There is no existing_created,also not provide "now" + (None, [], 0), + ], +) +def test_add_cluster_members(repr_xml, member_xmls, expected_count): + cluster_id = "CDM_CLUSTER" + repr_db = ET.fromstring(repr_xml) if repr_xml else None + + entry = ET.Element("{http://uniprot.org/uniref}entry") + for m in member_xmls: + mem = ET.SubElement(entry, "{http://uniprot.org/uniref}member") + mem.append(ET.fromstring(m)) + + rows = [] + add_cluster_members(cluster_id, repr_db, entry, rows, NS) + + assert len(rows) == expected_count + for r in rows: + assert r[0] == cluster_id + assert r[1].startswith("CDM:") + assert r[4] == "1.0" + + +# --------------------------------------------------------- +# extract_cluster +# --------------------------------------------------------- +@pytest.mark.parametrize( + "xml_str, uniref_id, expected_name", + [ ( - "UniRef100_C", - {}, - None, # The system automatically use the current time - None, # Only assert that the return is a string and they are equal + "Test Cluster Name", + "UniRef100_A", + "Test Cluster Name", + ), + ( + "", + "UniRef100_B", + "UNKNOWN", ), ], ) -def test_get_timestamps(uniref_id: str, existing_created: str, now: dt.datetime, expected: tuple[str] | None) -> None: - """Test timestamps.""" - result = get_timestamps(uniref_id, existing_created, now) - if expected is not None: - assert result == expected - else: - formatted_now, created_time = result - assert formatted_now == created_time - assert isinstance(formatted_now, str) - assert len(formatted_now) == 19 # "YYYY-MM-DDTHH:MM:SS" ---> 19 bites +def test_extract_cluster(xml_str, uniref_id, expected_name): + elem = ET.fromstring(xml_str) + + cluster_id, name = extract_cluster(elem, NS, uniref_id) + + # ---- cluster_id checks ---- + assert isinstance(cluster_id, str) + assert cluster_id.startswith("CDM:") + + # ---- name checks ---- + assert name == expected_name @pytest.mark.parametrize( - ("xml_str", "expected_acc", "expected_is_seed"), + "xml_str, expected_acc, expected_is_seed", [ - # Have accession and isSeed + # accession + isSeed=true ( """ - - - - - """, + + + + + """, "A0A009HJL9", True, ), - # Only accession, no isSeed + # accession only ( """ - - - - """, + + + + """, "A0A241V597", False, ), - # No accession, only id + # no accession ( """ - - """, - "ID_ONLY", + + + + """, + None, + False, + ), + # dbref is None + ( + None, + None, False, ), - # None - (None, None, False), ], ) -def test_get_accession_and_seed(xml_str: str | None, expected_acc: str | None, expected_is_seed: bool) -> None: - """Test parsing of UniRef entries for accession and seed status.""" - ns = {"ns": "http://uniprot.org/uniref"} +def test_get_accession_and_seed(xml_str, expected_acc, expected_is_seed): dbref = ET.fromstring(xml_str) if xml_str else None - acc, is_seed = get_accession_and_seed(dbref, ns) - assert acc == expected_acc - assert is_seed == expected_is_seed + acc, is_seed = get_accession_and_seed(dbref, NS) -def make_entry_with_members(member_xmls: list[str], ns_uri: str = "http://uniprot.org/uniref") -> ET.Element: - """ - Receives a list of xml strings from dbReference, generates an element with child nodes. - """ - entry_elem = ET.Element(f"{{{ns_uri}}}entry") - for dbref_xml in member_xmls: - dbref_elem = ET.fromstring(dbref_xml) - member_elem = ET.SubElement(entry_elem, f"{{{ns_uri}}}member") - member_elem.append(dbref_elem) - return entry_elem + assert acc == expected_acc + assert is_seed == expected_is_seed +# --------------------------------------------------------- +# extract_cross_refs +# --------------------------------------------------------- @pytest.mark.parametrize( - ("repr_xml", "member_xmls", "expected"), + "props, expected", [ - pytest.param( - # representative member, two members - textwrap.dedent(""" - - - - - """), - [ - textwrap.dedent(""" - - - - """), - textwrap.dedent(""" - - - - - """), - ], + ( [ - ("CLUSTER_X", "CDM:", "true", "true", "1.0"), - ("CLUSTER_X", "CDM:", "false", "false", "1.0"), - ("CLUSTER_X", "CDM:", "false", "true", "1.0"), + ("UniProtKB accession", "A0A1"), + ("UniRef90 ID", "UniRef90_X"), + ("UniParc ID", "UPI0001"), ], - id="with-representative-and-members", + { + ("UniRef90 ID", "UniRef90_X"), + ("UniParc ID", "UPI0001"), + }, ), - pytest.param( - # Only memebers, no representative member - None, + ( [ - textwrap.dedent(""" - - - - """) + ("UniProtKB accession", "A0A2"), ], - [("CLUSTER_X", "CDM:", "false", "false", "1.0")], - id="members-only", - ), - pytest.param( - # No members, no representative member - None, - [], - [], - id="no-members", + set(), ), ], ) -def test_add_cluster_members(repr_xml: str | None, member_xmls: list[str], expected: list[tuple[str, ...]]) -> None: - """Test add_cluster_members with various representative/member combinations.""" - ns = {"ns": "http://uniprot.org/uniref"} - cluster_id = "CLUSTER_X" +def test_extract_cross_refs(props, expected): + dbref = ET.Element("{http://uniprot.org/uniref}dbReference", id="UniProtKB:A0A1") - # Structure (representative members) dbReference if it exists - repr_db = ET.fromstring(repr_xml) if repr_xml else None + for k, v in props: + ET.SubElement( + dbref, + "{http://uniprot.org/uniref}property", + type=k, + value=v, + ) - # Structure nodes, and add - elem = make_entry_with_members(member_xmls) + rows = [] + extract_cross_refs(dbref, rows, NS) - # Calling the function under test - cluster_member_data = [] - add_cluster_members(cluster_id, repr_db, elem, cluster_member_data, ns) + got = {(t, v) for _, t, v in rows} + assert got == expected - assert len(cluster_member_data) == len(expected) - for i, (clu_id, cdm_prefix, is_repr, is_seed, score) in enumerate(expected): - out = cluster_member_data[i] - assert out[0] == clu_id, f"Wrong cluster_id at idx {i}: {out[0]}" - assert out[1].startswith(cdm_prefix), f"Wrong entity_id at idx {i}: {out[1]}" - assert out[2] == is_repr, f"Wrong is_representative at idx {i}: {out[2]}" - assert out[3] == is_seed, f"Wrong is_seed at idx {i}: {out[3]}" - assert out[4] == score, f"Wrong score at idx {i}: {out[4]}" + for entity_id, _, _ in rows: + assert entity_id is not None + assert isinstance(entity_id, str) -XREF_TYPES = ["UniRef90 ID", "UniRef50 ID", "UniParc ID"] +# --------------------------------------------------------- +# parse_uniref_xml +# --------------------------------------------------------- +@pytest.mark.parametrize("batch_size", [1, 2]) +def test_parse_uniref_xml_batch(batch_size): + xml = """ + + + A + + + + + + + + B + + + + + + + + """.strip() -@pytest.mark.parametrize( - ("dbref_props", "expected_xrefs"), - [ - ( - # all cross-ref fields present - [ - ("UniRef90 ID", "UniRef90_N8Q6C0"), - ("UniRef50 ID", "UniRef50_A0A7Z7LP76"), - ("UniParc ID", "UPI00044F6C4F"), - ("protein name", "foo"), - ], - [ - ("UniRef90 ID", "UniRef90_N8Q6C0"), - ("UniRef50 ID", "UniRef50_A0A7Z7LP76"), - ("UniParc ID", "UPI00044F6C4F"), - ], - ), - ( - # partial cross-ref - [ - ("UniRef90 ID", "UniRef90_ABC"), - ("protein name", "bar"), - ], - [ - ("UniRef90 ID", "UniRef90_ABC"), - ], - ), - ( - # No cross-ref - [ - ("protein name", "baz"), - ], - [], - ), - ], -) -def test_extract_cross_refs_param(dbref_props: list[tuple[str, str]], expected_xrefs: list[tuple[str, str]]) -> None: - """ - Test that extract_cross_refs correctly extracts all UniRef cross-reference fields. - """ - dbref = ET.Element("{http://uniprot.org/uniref}dbReference", type="UniProtKB ID", id="TEST_ID") - - for t, v in dbref_props: - ET.SubElement(dbref, "{http://uniprot.org/uniref}property", type=t, value=v) - - ns = {"ns": "http://uniprot.org/uniref"} - cross_reference_data = [] - extract_cross_refs(dbref, cross_reference_data, ns) - - entity_id = cdm_entity_id("TEST_ID") - expected = {(entity_id, typ, val) for typ, val in expected_xrefs} - got = set(cross_reference_data) - assert got == expected + with tempfile.TemporaryDirectory() as tmpdir: + gz_path = f"{tmpdir}/uniref_test.xml.gz" + with gzip.open(gz_path, "wb") as gz: + gz.write(xml.encode("utf-8")) + + result = parse_uniref_xml(gz_path, batch_size, {}) + + assert len(result["cluster_data"]) == batch_size + assert len(result["entity_data"]) == batch_size + assert len(result["cluster_member_data"]) == batch_size + assert len(result["cross_reference_data"]) in (0, batch_size) diff --git a/tests/parsers/test_xml_utils.py b/tests/parsers/test_xml_utils.py new file mode 100644 index 0000000..fc6e3ba --- /dev/null +++ b/tests/parsers/test_xml_utils.py @@ -0,0 +1,49 @@ +import xml.etree.ElementTree as ET + +from cdm_data_loader_utils.parsers.xml_utils import ( + clean_dict, + get_attr, + get_text, + parse_db_references, +) + + +def test_get_text_and_get_attr_basic() -> None: + elem = ET.Element("tag", attrib={"id": "123"}) + elem.text = " hello " + + assert get_text(elem) == "hello" + assert get_text(None) is None + assert get_attr(elem, "id") == "123" + assert get_attr(elem, "missing") is None + + +def test_parse_db_references_pub_and_others() -> None: + ns = {"ns": "dummy"} + source = ET.Element("source") + db1 = ET.SubElement(source, "dbReference", attrib={"type": "PubMed", "id": "12345"}) + db2 = ET.SubElement(source, "dbReference", attrib={"type": "DOI", "id": "10.1000/xyz"}) + db3 = ET.SubElement(source, "dbReference", attrib={"type": "PDB", "id": "1ABC"}) + + db1.tag = "{dummy}dbReference" + db2.tag = "{dummy}dbReference" + db3.tag = "{dummy}dbReference" + + pubs, others = parse_db_references(source, ns) + + assert "PUBMED:12345" in pubs + assert "DOI:10.1000/xyz" in pubs + assert "PDB:1ABC" in others + + +def test_clean_dict_removes_nones_and_empty() -> None: + """Test that clean_dict removes None and empty values.""" + d = { + "a": 1, + "b": None, + "c": [], + "d": {}, + "e": "ok", + } + cleaned = clean_dict(d) + assert cleaned == {"a": 1, "e": "ok"} diff --git a/uv.lock b/uv.lock index ad945bf..3414a4e 100644 --- a/uv.lock +++ b/uv.lock @@ -281,7 +281,7 @@ wheels = [ [[package]] name = "berdl-notebook-python-base" version = "0.1.0" -source = { git = "https://github.com/BERDataLakehouse/spark_notebook_base.git#e6ff234b274a634c0a3e7cf7fdb08528bbe8a5ab" } +source = { git = "https://github.com/BERDataLakehouse/spark_notebook_base.git?rev=0.0.0-dec11#e6ff234b274a634c0a3e7cf7fdb08528bbe8a5ab" } dependencies = [ { name = "attrs" }, { name = "boto3" }, @@ -316,7 +316,7 @@ dependencies = [ [[package]] name = "berdl-notebook-utils" version = "0.0.1" -source = { git = "https://github.com/BERDataLakehouse/spark_notebook.git?subdirectory=notebook_utils#ce40d0035f3edb8a7725b0d57dfcfdfbc7d37f55" } +source = { git = "https://github.com/BERDataLakehouse/spark_notebook.git?subdirectory=notebook_utils#e3de07fe2fef2aa9097abe543cbdbc1bd4d42289" } dependencies = [ { name = "berdl-notebook-python-base" }, ] @@ -756,20 +756,20 @@ wheels = [ [[package]] name = "curies" -version = "0.12.5" +version = "0.12.7" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a9/4c/fc5d51c21b99f802948a8b3079565806239c76e7b2f1f6702a603fe282f7/curies-0.12.5.tar.gz", hash = "sha256:57e4853045f8029c2564fbf2290221ff7a529034405076d1e82b7a8727b33dfc", size = 282912, upload-time = "2025-11-25T12:47:24.825Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ef/fc/8f73cbde9b2034e4b4f8524b4c5b7ce2a68d052ede8a486c0bc806c1f54d/curies-0.12.7.tar.gz", hash = "sha256:b51f422f6f8b93b35b583195222563327a00629d0ef8e889dc14606e31950e4f", size = 283292, upload-time = "2025-12-22T15:48:33.554Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8c/dd/29000adb47118edbf865a6e366fba294dcdacdf34322cedb23b8e7d30ae0/curies-0.12.5-py3-none-any.whl", hash = "sha256:e7fbb63cb49aeb389d46db64dae02f1563741084e033c2075cd1e163fdb1ead8", size = 69711, upload-time = "2025-11-25T12:47:23.058Z" }, + { url = "https://files.pythonhosted.org/packages/c6/65/c6118987bc902a1a5941d2028c49d91c2db55d5bec148b46d155a125543b/curies-0.12.7-py3-none-any.whl", hash = "sha256:9038d6afd6311328b072db51488af1ce162cb26c1a3cc497d2d00871ddb824a9", size = 70042, upload-time = "2025-12-22T15:48:32.508Z" }, ] [[package]] name = "dask" -version = "2025.11.0" +version = "2025.12.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, @@ -780,9 +780,9 @@ dependencies = [ { name = "pyyaml" }, { name = "toolz" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/db/33/eacaa72731f7fc64868caaf2d35060d50049eff889bd217263e68f76472f/dask-2025.11.0.tar.gz", hash = "sha256:23d59e624b80ee05b7cc8df858682cca58262c4c3b197ccf61da0f6543c8f7c3", size = 10984781, upload-time = "2025-11-06T16:56:51.535Z" } +sdist = { url = "https://files.pythonhosted.org/packages/49/ae/92fca08ff8fe3e8413842564dd55ee30c9cd9e07629e1bf4d347b005a5bf/dask-2025.12.0.tar.gz", hash = "sha256:8d478f2aabd025e2453cf733ad64559de90cf328c20209e4574e9543707c3e1b", size = 10995316, upload-time = "2025-12-12T14:59:10.885Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1d/54/a46920229d12c3a6e9f0081d1bdaeffad23c1826353ace95714faee926e5/dask-2025.11.0-py3-none-any.whl", hash = "sha256:08c35a8146c05c93b34f83cf651009129c42ee71762da7ca452fb7308641c2b8", size = 1477108, upload-time = "2025-11-06T16:56:44.892Z" }, + { url = "https://files.pythonhosted.org/packages/6f/3a/2121294941227c548d4b5f897a8a1b5f4c44a58f5437f239e6b86511d78e/dask-2025.12.0-py3-none-any.whl", hash = "sha256:4213ce9c5d51d6d89337cff69de35d902aa0bf6abdb8a25c942a4d0281f3a598", size = 1481293, upload-time = "2025-12-12T14:58:59.32Z" }, ] [package.optional-dependencies] @@ -814,19 +814,19 @@ dependencies = [ [[package]] name = "debugpy" -version = "1.8.18" +version = "1.8.19" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/62/1a/7cb5531840d7ba5d9329644109e62adee41f2f0083d9f8a4039f01de58cf/debugpy-1.8.18.tar.gz", hash = "sha256:02551b1b84a91faadd2db9bc4948873f2398190c95b3cc6f97dc706f43e8c433", size = 1644467, upload-time = "2025-12-10T19:48:07.236Z" } +sdist = { url = "https://files.pythonhosted.org/packages/73/75/9e12d4d42349b817cd545b89247696c67917aab907012ae5b64bbfea3199/debugpy-1.8.19.tar.gz", hash = "sha256:eea7e5987445ab0b5ed258093722d5ecb8bb72217c5c9b1e21f64efe23ddebdb", size = 1644590, upload-time = "2025-12-15T21:53:28.044Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fe/3f/45af037e91e308274a092eb6a86282865fb1f11148cdb7616e811aae33d7/debugpy-1.8.18-cp313-cp313-macosx_15_0_universal2.whl", hash = "sha256:75d14dd04b617ee38e46786394ec0dd5e1ac5e3d10ffb034fd6c7b72111174c2", size = 2538826, upload-time = "2025-12-10T19:48:29.434Z" }, - { url = "https://files.pythonhosted.org/packages/cc/f4/2de6bf624de05134d1bbe0a8750d484363cd212c3ade3d04f5c77d47d0ce/debugpy-1.8.18-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:1b224887af5121fa702f9f542968170d104e3f9cac827d85fdefe89702dc235c", size = 4292542, upload-time = "2025-12-10T19:48:30.836Z" }, - { url = "https://files.pythonhosted.org/packages/93/54/89de7ef84d5ac39fc64a773feaedd902536cc5295814cd22d19c6d9dea35/debugpy-1.8.18-cp313-cp313-win32.whl", hash = "sha256:636a5445a3336e4aba323a3545ca2bb373b04b0bc14084a4eb20c989db44429f", size = 5280460, upload-time = "2025-12-10T19:48:32.696Z" }, - { url = "https://files.pythonhosted.org/packages/4f/59/651329e618406229edbef6508a5aa05e43cd027f042740c5b27e46854b23/debugpy-1.8.18-cp313-cp313-win_amd64.whl", hash = "sha256:6da217ac8c1152d698b9809484d50c75bef9cc02fd6886a893a6df81ec952ff8", size = 5322399, upload-time = "2025-12-10T19:48:35.057Z" }, - { url = "https://files.pythonhosted.org/packages/36/59/5e8bf46a66ca9dfcd0ce4f35c07085aeb60d99bf5c52135973a4e197ed41/debugpy-1.8.18-cp314-cp314-macosx_15_0_universal2.whl", hash = "sha256:be7f622d250fe3429571e84572eb771023f1da22c754f28d2c60a10d74a4cc1b", size = 2537336, upload-time = "2025-12-10T19:48:36.463Z" }, - { url = "https://files.pythonhosted.org/packages/a1/5a/3b37cc266a69da83a4febaa4267bb2062d4bec5287036e2f23d9a30a788c/debugpy-1.8.18-cp314-cp314-manylinux_2_34_x86_64.whl", hash = "sha256:df8bf7cd78019d5d155213bf5a1818b36403d0c3758d669e76827d4db026b840", size = 4268696, upload-time = "2025-12-10T19:48:37.855Z" }, - { url = "https://files.pythonhosted.org/packages/de/4b/1e13586444440e5754b70055449b70afa187aaa167fa4c20c0c05d9c3b80/debugpy-1.8.18-cp314-cp314-win32.whl", hash = "sha256:32dd56d50fe15c47d0f930a7f0b9d3e5eb8ed04770bc6c313fba6d226f87e1e8", size = 5280624, upload-time = "2025-12-10T19:48:39.28Z" }, - { url = "https://files.pythonhosted.org/packages/7a/21/f8c12baa16212859269dc4c3e4b413778ec1154d332896d3c4cca96ac660/debugpy-1.8.18-cp314-cp314-win_amd64.whl", hash = "sha256:714b61d753cfe3ed5e7bf0aad131506d750e271726ac86e3e265fd7eeebbe765", size = 5321982, upload-time = "2025-12-10T19:48:41.086Z" }, - { url = "https://files.pythonhosted.org/packages/dc/0d/bf7ac329c132436c57124202b5b5ccd6366e5d8e75eeb184cf078c826e8d/debugpy-1.8.18-py2.py3-none-any.whl", hash = "sha256:ab8cf0abe0fe2dfe1f7e65abc04b1db8740f9be80c1274acb625855c5c3ece6e", size = 5286576, upload-time = "2025-12-10T19:48:56.071Z" }, + { url = "https://files.pythonhosted.org/packages/71/3d/388035a31a59c26f1ecc8d86af607d0c42e20ef80074147cd07b180c4349/debugpy-1.8.19-cp313-cp313-macosx_15_0_universal2.whl", hash = "sha256:91e35db2672a0abaf325f4868fcac9c1674a0d9ad9bb8a8c849c03a5ebba3e6d", size = 2538859, upload-time = "2025-12-15T21:53:50.478Z" }, + { url = "https://files.pythonhosted.org/packages/4a/19/c93a0772d0962294f083dbdb113af1a7427bb632d36e5314297068f55db7/debugpy-1.8.19-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:85016a73ab84dea1c1f1dcd88ec692993bcbe4532d1b49ecb5f3c688ae50c606", size = 4292575, upload-time = "2025-12-15T21:53:51.821Z" }, + { url = "https://files.pythonhosted.org/packages/5c/56/09e48ab796b0a77e3d7dc250f95251832b8bf6838c9632f6100c98bdf426/debugpy-1.8.19-cp313-cp313-win32.whl", hash = "sha256:b605f17e89ba0ecee994391194285fada89cee111cfcd29d6f2ee11cbdc40976", size = 5286209, upload-time = "2025-12-15T21:53:53.602Z" }, + { url = "https://files.pythonhosted.org/packages/fb/4e/931480b9552c7d0feebe40c73725dd7703dcc578ba9efc14fe0e6d31cfd1/debugpy-1.8.19-cp313-cp313-win_amd64.whl", hash = "sha256:c30639998a9f9cd9699b4b621942c0179a6527f083c72351f95c6ab1728d5b73", size = 5328206, upload-time = "2025-12-15T21:53:55.433Z" }, + { url = "https://files.pythonhosted.org/packages/f6/b9/cbec520c3a00508327476c7fce26fbafef98f412707e511eb9d19a2ef467/debugpy-1.8.19-cp314-cp314-macosx_15_0_universal2.whl", hash = "sha256:1e8c4d1bd230067bf1bbcdbd6032e5a57068638eb28b9153d008ecde288152af", size = 2537372, upload-time = "2025-12-15T21:53:57.318Z" }, + { url = "https://files.pythonhosted.org/packages/88/5e/cf4e4dc712a141e10d58405c58c8268554aec3c35c09cdcda7535ff13f76/debugpy-1.8.19-cp314-cp314-manylinux_2_34_x86_64.whl", hash = "sha256:d40c016c1f538dbf1762936e3aeb43a89b965069d9f60f9e39d35d9d25e6b809", size = 4268729, upload-time = "2025-12-15T21:53:58.712Z" }, + { url = "https://files.pythonhosted.org/packages/82/a3/c91a087ab21f1047db328c1d3eb5d1ff0e52de9e74f9f6f6fa14cdd93d58/debugpy-1.8.19-cp314-cp314-win32.whl", hash = "sha256:0601708223fe1cd0e27c6cce67a899d92c7d68e73690211e6788a4b0e1903f5b", size = 5286388, upload-time = "2025-12-15T21:54:00.687Z" }, + { url = "https://files.pythonhosted.org/packages/17/b8/bfdc30b6e94f1eff09f2dc9cc1f9cd1c6cde3d996bcbd36ce2d9a4956e99/debugpy-1.8.19-cp314-cp314-win_amd64.whl", hash = "sha256:8e19a725f5d486f20e53a1dde2ab8bb2c9607c40c00a42ab646def962b41125f", size = 5327741, upload-time = "2025-12-15T21:54:02.148Z" }, + { url = "https://files.pythonhosted.org/packages/25/3e/e27078370414ef35fafad2c06d182110073daaeb5d3bf734b0b1eeefe452/debugpy-1.8.19-py2.py3-none-any.whl", hash = "sha256:360ffd231a780abbc414ba0f005dad409e71c78637efe8f2bd75837132a41d38", size = 5292321, upload-time = "2025-12-15T21:54:16.024Z" }, ] [[package]] @@ -883,7 +883,7 @@ wheels = [ [[package]] name = "distributed" -version = "2025.11.0" +version = "2025.12.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, @@ -902,9 +902,9 @@ dependencies = [ { name = "urllib3" }, { name = "zict" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c5/be/085a60b627c1f2b795827ce41d4cc1e11e74ffdadcb5235ee5fb620f7929/distributed-2025.11.0.tar.gz", hash = "sha256:372c2f0c2faa890fc42188349969ba468161a9b356df49c4ca7d9a8d551a7ace", size = 2119140, upload-time = "2025-11-06T16:57:32.391Z" } +sdist = { url = "https://files.pythonhosted.org/packages/4f/f7/25e4ed891f4b347a7c0e6ad6106b564938ddd6f1832aa03f1505d0949cb4/distributed-2025.12.0.tar.gz", hash = "sha256:b1e58f1b3d733885335817562ee1723379f23733e4ef3546f141080d9cb01a74", size = 2102841, upload-time = "2025-12-12T14:58:57.74Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/46/ec/da78855318971c2be94d0283a41de6941a6b9f16146fb00babc74903ae01/distributed-2025.11.0-py3-none-any.whl", hash = "sha256:1794ff25b19ba347ccce563fb1dd5898c3bb30f500b15f8c20ad373f6281b30f", size = 1009248, upload-time = "2025-11-06T16:57:28.714Z" }, + { url = "https://files.pythonhosted.org/packages/87/45/ca760deab4de448e6c0e3860fc187bcc49216eabda379f6ce68065158843/distributed-2025.12.0-py3-none-any.whl", hash = "sha256:35d18449002ea191e97f7e04a33e16f90c2243486be52d4d0f991072ea06b48a", size = 1008379, upload-time = "2025-12-12T14:58:54.195Z" }, ] [[package]] @@ -927,11 +927,11 @@ wheels = [ [[package]] name = "docutils" -version = "0.22.3" +version = "0.22.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d9/02/111134bfeb6e6c7ac4c74594e39a59f6c0195dc4846afbeac3cba60f1927/docutils-0.22.3.tar.gz", hash = "sha256:21486ae730e4ca9f622677b1412b879af1791efcfba517e4c6f60be543fc8cdd", size = 2290153, upload-time = "2025-11-06T02:35:55.655Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ae/b6/03bb70946330e88ffec97aefd3ea75ba575cb2e762061e0e62a213befee8/docutils-0.22.4.tar.gz", hash = "sha256:4db53b1fde9abecbb74d91230d32ab626d94f6badfc575d6db9194a49df29968", size = 2291750, upload-time = "2025-12-18T19:00:26.443Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/11/a8/c6a4b901d17399c77cd81fb001ce8961e9f5e04d3daf27e8925cb012e163/docutils-0.22.3-py3-none-any.whl", hash = "sha256:bd772e4aca73aff037958d44f2be5229ded4c09927fcf8690c577b66234d6ceb", size = 633032, upload-time = "2025-11-06T02:35:52.391Z" }, + { url = "https://files.pythonhosted.org/packages/02/10/5da547df7a391dcde17f59520a231527b8571e6f46fc8efb02ccb370ab12/docutils-0.22.4-py3-none-any.whl", hash = "sha256:d0013f540772d1420576855455d050a2180186c91c15779301ac2ccb3eeb68de", size = 633196, upload-time = "2025-12-18T19:00:18.077Z" }, ] [[package]] @@ -993,11 +993,11 @@ wheels = [ [[package]] name = "filelock" -version = "3.20.0" +version = "3.20.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/58/46/0028a82567109b5ef6e4d2a1f04a583fb513e6cf9527fcdd09afd817deeb/filelock-3.20.0.tar.gz", hash = "sha256:711e943b4ec6be42e1d4e6690b48dc175c822967466bb31c0c293f34334c13f4", size = 18922, upload-time = "2025-10-08T18:03:50.056Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a7/23/ce7a1126827cedeb958fc043d61745754464eb56c5937c35bbf2b8e26f34/filelock-3.20.1.tar.gz", hash = "sha256:b8360948b351b80f420878d8516519a2204b07aefcdcfd24912a5d33127f188c", size = 19476, upload-time = "2025-12-15T23:54:28.027Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/76/91/7216b27286936c16f5b4d0c530087e4a54eead683e6b0b73dd0c64844af6/filelock-3.20.0-py3-none-any.whl", hash = "sha256:339b4732ffda5cd79b13f4e2711a31b0365ce445d95d243bb996273d072546a2", size = 16054, upload-time = "2025-10-08T18:03:48.35Z" }, + { url = "https://files.pythonhosted.org/packages/e3/7f/a1a97644e39e7316d850784c642093c99df1290a460df4ede27659056834/filelock-3.20.1-py3-none-any.whl", hash = "sha256:15d9e9a67306188a44baa72f569d2bfd803076269365fdea0934385da4dc361a", size = 16666, upload-time = "2025-12-15T23:54:26.874Z" }, ] [[package]] @@ -1338,14 +1338,14 @@ wheels = [ [[package]] name = "importlib-metadata" -version = "8.7.0" +version = "8.7.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "zipp" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/76/66/650a33bd90f786193e4de4b3ad86ea60b53c89b669a5c7be931fac31cdb0/importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000", size = 56641, upload-time = "2025-04-27T15:29:01.736Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f3/49/3b30cad09e7771a4982d9975a8cbf64f00d4a1ececb53297f1d9a7be1b10/importlib_metadata-8.7.1.tar.gz", hash = "sha256:49fef1ae6440c182052f407c8d34a68f72efc36db9ca90dc0113398f2fdde8bb", size = 57107, upload-time = "2025-12-21T10:00:19.278Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" }, + { url = "https://files.pythonhosted.org/packages/fa/5e/f8e9a1d23b9c20a551a8a02ea3637b4642e22c2626e3a13a9a29cdea99eb/importlib_metadata-8.7.1-py3-none-any.whl", hash = "sha256:5a1f80bf1daa489495071efbb095d75a634cf28a8bc299581244063b53176151", size = 27865, upload-time = "2025-12-21T10:00:18.329Z" }, ] [[package]] @@ -1849,7 +1849,7 @@ wheels = [ [[package]] name = "jupyterlab" -version = "4.5.0" +version = "4.5.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "async-lru" }, @@ -1866,9 +1866,9 @@ dependencies = [ { name = "tornado" }, { name = "traitlets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/df/e5/4fa382a796a6d8e2cd867816b64f1ff27f906e43a7a83ad9eb389e448cd8/jupyterlab-4.5.0.tar.gz", hash = "sha256:aec33d6d8f1225b495ee2cf20f0514f45e6df8e360bdd7ac9bace0b7ac5177ea", size = 23989880, upload-time = "2025-11-18T13:19:00.365Z" } +sdist = { url = "https://files.pythonhosted.org/packages/09/21/413d142686a4e8f4268d985becbdb4daf060524726248e73be4773786987/jupyterlab-4.5.1.tar.gz", hash = "sha256:09da1ddfbd9eec18b5101dbb8515612aa1e47443321fb99503725a88e93d20d9", size = 23992251, upload-time = "2025-12-15T16:58:59.361Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6c/1e/5a4d5498eba382fee667ed797cf64ae5d1b13b04356df62f067f48bb0f61/jupyterlab-4.5.0-py3-none-any.whl", hash = "sha256:88e157c75c1afff64c7dc4b801ec471450b922a4eae4305211ddd40da8201c8a", size = 12380641, upload-time = "2025-11-18T13:18:56.252Z" }, + { url = "https://files.pythonhosted.org/packages/af/c3/acced767eecc11a70c65c45295db5396c4f0c1937874937d5a76d7b177b6/jupyterlab-4.5.1-py3-none-any.whl", hash = "sha256:31b059de96de0754ff1f2ce6279774b6aab8c34d7082e9752db58207c99bd514", size = 12384821, upload-time = "2025-12-15T16:58:55.563Z" }, ] [[package]] @@ -2045,7 +2045,7 @@ wheels = [ [[package]] name = "langsmith" -version = "0.4.59" +version = "0.5.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpx" }, @@ -2057,9 +2057,9 @@ dependencies = [ { name = "uuid-utils" }, { name = "zstandard" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/61/71/d61524c3205bde7ec90423d997cf1a228d8adf2811110ec91ed40c8e8a34/langsmith-0.4.59.tar.gz", hash = "sha256:6b143214c2303dafb29ab12dcd05ac50bdfc60dac01c6e0450e50cee1d2415e0", size = 992784, upload-time = "2025-12-11T02:40:52.231Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/4b/d448307e8557e36b20008d0d1cd0a58233c38d90bf978e1d093be0ca4cb2/langsmith-0.5.0.tar.gz", hash = "sha256:5cadf1ddd30e838cf61679f4a776aaef638d4b02ffbceba9f73283caebd39e1b", size = 869272, upload-time = "2025-12-16T17:35:38.78Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/63/54/4577ef9424debea2fa08af338489d593276520d2e2f8950575d292be612c/langsmith-0.4.59-py3-none-any.whl", hash = "sha256:97c26399286441a7b7b06b912e2801420fbbf3a049787e609d49dc975ab10bc5", size = 413051, upload-time = "2025-12-11T02:40:50.523Z" }, + { url = "https://files.pythonhosted.org/packages/ee/8a/d9bc95607846bc82fbe0b98d2592ffb5e036c97a362735ae926e3d519df7/langsmith-0.5.0-py3-none-any.whl", hash = "sha256:a83750cb3dccb33148d4ffe005e3e03080fad13e01671efbb74c9a68813bfef8", size = 273711, upload-time = "2025-12-16T17:35:37.165Z" }, ] [[package]] @@ -2202,14 +2202,14 @@ wheels = [ [[package]] name = "marshmallow" -version = "3.26.1" +version = "3.26.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "packaging" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ab/5e/5e53d26b42ab75491cda89b871dab9e97c840bf12c63ec58a1919710cd06/marshmallow-3.26.1.tar.gz", hash = "sha256:e6d8affb6cb61d39d26402096dc0aee12d5a26d490a121f118d2e81dc0719dc6", size = 221825, upload-time = "2025-02-03T15:32:25.093Z" } +sdist = { url = "https://files.pythonhosted.org/packages/55/79/de6c16cc902f4fc372236926b0ce2ab7845268dcc30fb2fbb7f71b418631/marshmallow-3.26.2.tar.gz", hash = "sha256:bbe2adb5a03e6e3571b573f42527c6fe926e17467833660bebd11593ab8dfd57", size = 222095, upload-time = "2025-12-22T06:53:53.309Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/34/75/51952c7b2d3873b44a0028b1bd26a25078c18f92f256608e8d1dc61b39fd/marshmallow-3.26.1-py3-none-any.whl", hash = "sha256:3350409f20a70a7e4e11a27661187b77cdcaeb20abca41c1454fe33636bea09c", size = 50878, upload-time = "2025-02-03T15:32:22.295Z" }, + { url = "https://files.pythonhosted.org/packages/be/2f/5108cb3ee4ba6501748c4908b908e55f42a5b66245b4cfe0c99326e1ef6e/marshmallow-3.26.2-py3-none-any.whl", hash = "sha256:013fa8a3c4c276c24d26d84ce934dc964e2aa794345a0f8c7e5a7191482c8a73", size = 50964, upload-time = "2025-12-22T06:53:51.801Z" }, ] [[package]] @@ -2226,7 +2226,7 @@ wheels = [ [[package]] name = "mcp" -version = "1.23.3" +version = "1.25.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -2244,9 +2244,9 @@ dependencies = [ { name = "typing-inspection" }, { name = "uvicorn", marker = "sys_platform != 'emscripten'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a7/a4/d06a303f45997e266f2c228081abe299bbcba216cb806128e2e49095d25f/mcp-1.23.3.tar.gz", hash = "sha256:b3b0da2cc949950ce1259c7bfc1b081905a51916fcd7c8182125b85e70825201", size = 600697, upload-time = "2025-12-09T16:04:37.351Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d5/2d/649d80a0ecf6a1f82632ca44bec21c0461a9d9fc8934d38cb5b319f2db5e/mcp-1.25.0.tar.gz", hash = "sha256:56310361ebf0364e2d438e5b45f7668cbb124e158bb358333cd06e49e83a6802", size = 605387, upload-time = "2025-12-19T10:19:56.985Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/32/c6/13c1a26b47b3f3a3b480783001ada4268917c9f42d78a079c336da2e75e5/mcp-1.23.3-py3-none-any.whl", hash = "sha256:32768af4b46a1b4f7df34e2bfdf5c6011e7b63d7f1b0e321d0fdef4cd6082031", size = 231570, upload-time = "2025-12-09T16:04:35.56Z" }, + { url = "https://files.pythonhosted.org/packages/e2/fc/6dc7659c2ae5ddf280477011f4213a74f806862856b796ef08f028e664bf/mcp-1.25.0-py3-none-any.whl", hash = "sha256:b37c38144a666add0862614cc79ec276e97d72aa8ca26d622818d4e278b9721a", size = 233076, upload-time = "2025-12-19T10:19:55.416Z" }, ] [[package]] @@ -2332,7 +2332,7 @@ wheels = [ [[package]] name = "mkdocs-material" -version = "9.7.0" +version = "9.7.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "babel" }, @@ -2347,9 +2347,9 @@ dependencies = [ { name = "pymdown-extensions" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9c/3b/111b84cd6ff28d9e955b5f799ef217a17bc1684ac346af333e6100e413cb/mkdocs_material-9.7.0.tar.gz", hash = "sha256:602b359844e906ee402b7ed9640340cf8a474420d02d8891451733b6b02314ec", size = 4094546, upload-time = "2025-11-11T08:49:09.73Z" } +sdist = { url = "https://files.pythonhosted.org/packages/27/e2/2ffc356cd72f1473d07c7719d82a8f2cbd261666828614ecb95b12169f41/mkdocs_material-9.7.1.tar.gz", hash = "sha256:89601b8f2c3e6c6ee0a918cc3566cb201d40bf37c3cd3c2067e26fadb8cce2b8", size = 4094392, upload-time = "2025-12-18T09:49:00.308Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/04/87/eefe8d5e764f4cf50ed91b943f8e8f96b5efd65489d8303b7a36e2e79834/mkdocs_material-9.7.0-py3-none-any.whl", hash = "sha256:da2866ea53601125ff5baa8aa06404c6e07af3c5ce3d5de95e3b52b80b442887", size = 9283770, upload-time = "2025-11-11T08:49:06.26Z" }, + { url = "https://files.pythonhosted.org/packages/3e/32/ed071cb721aca8c227718cffcf7bd539620e9799bbf2619e90c757bfd030/mkdocs_material-9.7.1-py3-none-any.whl", hash = "sha256:3f6100937d7d731f87f1e3e3b021c97f7239666b9ba1151ab476cabb96c60d5c", size = 9297166, upload-time = "2025-12-18T09:48:56.664Z" }, ] [[package]] @@ -2505,7 +2505,7 @@ wheels = [ [[package]] name = "nbclient" -version = "0.10.2" +version = "0.10.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jupyter-client" }, @@ -2513,9 +2513,9 @@ dependencies = [ { name = "nbformat" }, { name = "traitlets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/87/66/7ffd18d58eae90d5721f9f39212327695b749e23ad44b3881744eaf4d9e8/nbclient-0.10.2.tar.gz", hash = "sha256:90b7fc6b810630db87a6d0c2250b1f0ab4cf4d3c27a299b0cde78a4ed3fd9193", size = 62424, upload-time = "2024-12-19T10:32:27.164Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8d/f3/1f6cf2ede4b026bc5f0b424cb41adf22f9c804e90a4dbd4fdb42291a35d5/nbclient-0.10.3.tar.gz", hash = "sha256:0baf171ee246e3bb2391da0635e719f27dc77d99aef59e0b04dcb935ee04c575", size = 62564, upload-time = "2025-12-19T15:50:09.331Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/34/6d/e7fa07f03a4a7b221d94b4d586edb754a9b0dc3c9e2c93353e9fa4e0d117/nbclient-0.10.2-py3-none-any.whl", hash = "sha256:4ffee11e788b4a27fabeb7955547e4318a5298f34342a4bfd01f2e1faaeadc3d", size = 25434, upload-time = "2024-12-19T10:32:24.139Z" }, + { url = "https://files.pythonhosted.org/packages/b2/77/0c73678f5260501a271fd7342bee5d639440f2e9e07d590f1100a056d87c/nbclient-0.10.3-py3-none-any.whl", hash = "sha256:39e9bd403504dd2484dd0fd25235bb6a683ce8cd9873356e40d880696adc9e35", size = 25473, upload-time = "2025-12-19T15:50:07.671Z" }, ] [[package]] @@ -2581,54 +2581,52 @@ wheels = [ [[package]] name = "numpy" -version = "2.3.5" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/76/65/21b3bc86aac7b8f2862db1e808f1ea22b028e30a225a34a5ede9bf8678f2/numpy-2.3.5.tar.gz", hash = "sha256:784db1dcdab56bf0517743e746dfb0f885fc68d948aba86eeec2cba234bdf1c0", size = 20584950, upload-time = "2025-11-16T22:52:42.067Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/db/69/9cde09f36da4b5a505341180a3f2e6fadc352fd4d2b7096ce9778db83f1a/numpy-2.3.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d0f23b44f57077c1ede8c5f26b30f706498b4862d3ff0a7298b8411dd2f043ff", size = 16728251, upload-time = "2025-11-16T22:50:19.013Z" }, - { url = "https://files.pythonhosted.org/packages/79/fb/f505c95ceddd7027347b067689db71ca80bd5ecc926f913f1a23e65cf09b/numpy-2.3.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:aa5bc7c5d59d831d9773d1170acac7893ce3a5e130540605770ade83280e7188", size = 12254652, upload-time = "2025-11-16T22:50:21.487Z" }, - { url = "https://files.pythonhosted.org/packages/78/da/8c7738060ca9c31b30e9301ee0cf6c5ffdbf889d9593285a1cead337f9a5/numpy-2.3.5-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:ccc933afd4d20aad3c00bcef049cb40049f7f196e0397f1109dba6fed63267b0", size = 5083172, upload-time = "2025-11-16T22:50:24.562Z" }, - { url = "https://files.pythonhosted.org/packages/a4/b4/ee5bb2537fb9430fd2ef30a616c3672b991a4129bb1c7dcc42aa0abbe5d7/numpy-2.3.5-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:afaffc4393205524af9dfa400fa250143a6c3bc646c08c9f5e25a9f4b4d6a903", size = 6622990, upload-time = "2025-11-16T22:50:26.47Z" }, - { url = "https://files.pythonhosted.org/packages/95/03/dc0723a013c7d7c19de5ef29e932c3081df1c14ba582b8b86b5de9db7f0f/numpy-2.3.5-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c75442b2209b8470d6d5d8b1c25714270686f14c749028d2199c54e29f20b4d", size = 14248902, upload-time = "2025-11-16T22:50:28.861Z" }, - { url = "https://files.pythonhosted.org/packages/f5/10/ca162f45a102738958dcec8023062dad0cbc17d1ab99d68c4e4a6c45fb2b/numpy-2.3.5-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11e06aa0af8c0f05104d56450d6093ee639e15f24ecf62d417329d06e522e017", size = 16597430, upload-time = "2025-11-16T22:50:31.56Z" }, - { url = "https://files.pythonhosted.org/packages/2a/51/c1e29be863588db58175175f057286900b4b3327a1351e706d5e0f8dd679/numpy-2.3.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ed89927b86296067b4f81f108a2271d8926467a8868e554eaf370fc27fa3ccaf", size = 16024551, upload-time = "2025-11-16T22:50:34.242Z" }, - { url = "https://files.pythonhosted.org/packages/83/68/8236589d4dbb87253d28259d04d9b814ec0ecce7cb1c7fed29729f4c3a78/numpy-2.3.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:51c55fe3451421f3a6ef9a9c1439e82101c57a2c9eab9feb196a62b1a10b58ce", size = 18533275, upload-time = "2025-11-16T22:50:37.651Z" }, - { url = "https://files.pythonhosted.org/packages/40/56/2932d75b6f13465239e3b7b7e511be27f1b8161ca2510854f0b6e521c395/numpy-2.3.5-cp313-cp313-win32.whl", hash = "sha256:1978155dd49972084bd6ef388d66ab70f0c323ddee6f693d539376498720fb7e", size = 6277637, upload-time = "2025-11-16T22:50:40.11Z" }, - { url = "https://files.pythonhosted.org/packages/0c/88/e2eaa6cffb115b85ed7c7c87775cb8bcf0816816bc98ca8dbfa2ee33fe6e/numpy-2.3.5-cp313-cp313-win_amd64.whl", hash = "sha256:00dc4e846108a382c5869e77c6ed514394bdeb3403461d25a829711041217d5b", size = 12779090, upload-time = "2025-11-16T22:50:42.503Z" }, - { url = "https://files.pythonhosted.org/packages/8f/88/3f41e13a44ebd4034ee17baa384acac29ba6a4fcc2aca95f6f08ca0447d1/numpy-2.3.5-cp313-cp313-win_arm64.whl", hash = "sha256:0472f11f6ec23a74a906a00b48a4dcf3849209696dff7c189714511268d103ae", size = 10194710, upload-time = "2025-11-16T22:50:44.971Z" }, - { url = "https://files.pythonhosted.org/packages/13/cb/71744144e13389d577f867f745b7df2d8489463654a918eea2eeb166dfc9/numpy-2.3.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:414802f3b97f3c1eef41e530aaba3b3c1620649871d8cb38c6eaff034c2e16bd", size = 16827292, upload-time = "2025-11-16T22:50:47.715Z" }, - { url = "https://files.pythonhosted.org/packages/71/80/ba9dc6f2a4398e7f42b708a7fdc841bb638d353be255655498edbf9a15a8/numpy-2.3.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5ee6609ac3604fa7780e30a03e5e241a7956f8e2fcfe547d51e3afa5247ac47f", size = 12378897, upload-time = "2025-11-16T22:50:51.327Z" }, - { url = "https://files.pythonhosted.org/packages/2e/6d/db2151b9f64264bcceccd51741aa39b50150de9b602d98ecfe7e0c4bff39/numpy-2.3.5-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:86d835afea1eaa143012a2d7a3f45a3adce2d7adc8b4961f0b362214d800846a", size = 5207391, upload-time = "2025-11-16T22:50:54.542Z" }, - { url = "https://files.pythonhosted.org/packages/80/ae/429bacace5ccad48a14c4ae5332f6aa8ab9f69524193511d60ccdfdc65fa/numpy-2.3.5-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:30bc11310e8153ca664b14c5f1b73e94bd0503681fcf136a163de856f3a50139", size = 6721275, upload-time = "2025-11-16T22:50:56.794Z" }, - { url = "https://files.pythonhosted.org/packages/74/5b/1919abf32d8722646a38cd527bc3771eb229a32724ee6ba340ead9b92249/numpy-2.3.5-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1062fde1dcf469571705945b0f221b73928f34a20c904ffb45db101907c3454e", size = 14306855, upload-time = "2025-11-16T22:50:59.208Z" }, - { url = "https://files.pythonhosted.org/packages/a5/87/6831980559434973bebc30cd9c1f21e541a0f2b0c280d43d3afd909b66d0/numpy-2.3.5-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ce581db493ea1a96c0556360ede6607496e8bf9b3a8efa66e06477267bc831e9", size = 16657359, upload-time = "2025-11-16T22:51:01.991Z" }, - { url = "https://files.pythonhosted.org/packages/dd/91/c797f544491ee99fd00495f12ebb7802c440c1915811d72ac5b4479a3356/numpy-2.3.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:cc8920d2ec5fa99875b670bb86ddeb21e295cb07aa331810d9e486e0b969d946", size = 16093374, upload-time = "2025-11-16T22:51:05.291Z" }, - { url = "https://files.pythonhosted.org/packages/74/a6/54da03253afcbe7a72785ec4da9c69fb7a17710141ff9ac5fcb2e32dbe64/numpy-2.3.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:9ee2197ef8c4f0dfe405d835f3b6a14f5fee7782b5de51ba06fb65fc9b36e9f1", size = 18594587, upload-time = "2025-11-16T22:51:08.585Z" }, - { url = "https://files.pythonhosted.org/packages/80/e9/aff53abbdd41b0ecca94285f325aff42357c6b5abc482a3fcb4994290b18/numpy-2.3.5-cp313-cp313t-win32.whl", hash = "sha256:70b37199913c1bd300ff6e2693316c6f869c7ee16378faf10e4f5e3275b299c3", size = 6405940, upload-time = "2025-11-16T22:51:11.541Z" }, - { url = "https://files.pythonhosted.org/packages/d5/81/50613fec9d4de5480de18d4f8ef59ad7e344d497edbef3cfd80f24f98461/numpy-2.3.5-cp313-cp313t-win_amd64.whl", hash = "sha256:b501b5fa195cc9e24fe102f21ec0a44dffc231d2af79950b451e0d99cea02234", size = 12920341, upload-time = "2025-11-16T22:51:14.312Z" }, - { url = "https://files.pythonhosted.org/packages/bb/ab/08fd63b9a74303947f34f0bd7c5903b9c5532c2d287bead5bdf4c556c486/numpy-2.3.5-cp313-cp313t-win_arm64.whl", hash = "sha256:a80afd79f45f3c4a7d341f13acbe058d1ca8ac017c165d3fa0d3de6bc1a079d7", size = 10262507, upload-time = "2025-11-16T22:51:16.846Z" }, - { url = "https://files.pythonhosted.org/packages/ba/97/1a914559c19e32d6b2e233cf9a6a114e67c856d35b1d6babca571a3e880f/numpy-2.3.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:bf06bc2af43fa8d32d30fae16ad965663e966b1a3202ed407b84c989c3221e82", size = 16735706, upload-time = "2025-11-16T22:51:19.558Z" }, - { url = "https://files.pythonhosted.org/packages/57/d4/51233b1c1b13ecd796311216ae417796b88b0616cfd8a33ae4536330748a/numpy-2.3.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:052e8c42e0c49d2575621c158934920524f6c5da05a1d3b9bab5d8e259e045f0", size = 12264507, upload-time = "2025-11-16T22:51:22.492Z" }, - { url = "https://files.pythonhosted.org/packages/45/98/2fe46c5c2675b8306d0b4a3ec3494273e93e1226a490f766e84298576956/numpy-2.3.5-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:1ed1ec893cff7040a02c8aa1c8611b94d395590d553f6b53629a4461dc7f7b63", size = 5093049, upload-time = "2025-11-16T22:51:25.171Z" }, - { url = "https://files.pythonhosted.org/packages/ce/0e/0698378989bb0ac5f1660c81c78ab1fe5476c1a521ca9ee9d0710ce54099/numpy-2.3.5-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:2dcd0808a421a482a080f89859a18beb0b3d1e905b81e617a188bd80422d62e9", size = 6626603, upload-time = "2025-11-16T22:51:27Z" }, - { url = "https://files.pythonhosted.org/packages/5e/a6/9ca0eecc489640615642a6cbc0ca9e10df70df38c4d43f5a928ff18d8827/numpy-2.3.5-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:727fd05b57df37dc0bcf1a27767a3d9a78cbbc92822445f32cc3436ba797337b", size = 14262696, upload-time = "2025-11-16T22:51:29.402Z" }, - { url = "https://files.pythonhosted.org/packages/c8/f6/07ec185b90ec9d7217a00eeeed7383b73d7e709dae2a9a021b051542a708/numpy-2.3.5-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fffe29a1ef00883599d1dc2c51aa2e5d80afe49523c261a74933df395c15c520", size = 16597350, upload-time = "2025-11-16T22:51:32.167Z" }, - { url = "https://files.pythonhosted.org/packages/75/37/164071d1dde6a1a84c9b8e5b414fa127981bad47adf3a6b7e23917e52190/numpy-2.3.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8f7f0e05112916223d3f438f293abf0727e1181b5983f413dfa2fefc4098245c", size = 16040190, upload-time = "2025-11-16T22:51:35.403Z" }, - { url = "https://files.pythonhosted.org/packages/08/3c/f18b82a406b04859eb026d204e4e1773eb41c5be58410f41ffa511d114ae/numpy-2.3.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2e2eb32ddb9ccb817d620ac1d8dae7c3f641c1e5f55f531a33e8ab97960a75b8", size = 18536749, upload-time = "2025-11-16T22:51:39.698Z" }, - { url = "https://files.pythonhosted.org/packages/40/79/f82f572bf44cf0023a2fe8588768e23e1592585020d638999f15158609e1/numpy-2.3.5-cp314-cp314-win32.whl", hash = "sha256:66f85ce62c70b843bab1fb14a05d5737741e74e28c7b8b5a064de10142fad248", size = 6335432, upload-time = "2025-11-16T22:51:42.476Z" }, - { url = "https://files.pythonhosted.org/packages/a3/2e/235b4d96619931192c91660805e5e49242389742a7a82c27665021db690c/numpy-2.3.5-cp314-cp314-win_amd64.whl", hash = "sha256:e6a0bc88393d65807d751a614207b7129a310ca4fe76a74e5c7da5fa5671417e", size = 12919388, upload-time = "2025-11-16T22:51:45.275Z" }, - { url = "https://files.pythonhosted.org/packages/07/2b/29fd75ce45d22a39c61aad74f3d718e7ab67ccf839ca8b60866054eb15f8/numpy-2.3.5-cp314-cp314-win_arm64.whl", hash = "sha256:aeffcab3d4b43712bb7a60b65f6044d444e75e563ff6180af8f98dd4b905dfd2", size = 10476651, upload-time = "2025-11-16T22:51:47.749Z" }, - { url = "https://files.pythonhosted.org/packages/17/e1/f6a721234ebd4d87084cfa68d081bcba2f5cfe1974f7de4e0e8b9b2a2ba1/numpy-2.3.5-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:17531366a2e3a9e30762c000f2c43a9aaa05728712e25c11ce1dbe700c53ad41", size = 16834503, upload-time = "2025-11-16T22:51:50.443Z" }, - { url = "https://files.pythonhosted.org/packages/5c/1c/baf7ffdc3af9c356e1c135e57ab7cf8d247931b9554f55c467efe2c69eff/numpy-2.3.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d21644de1b609825ede2f48be98dfde4656aefc713654eeee280e37cadc4e0ad", size = 12381612, upload-time = "2025-11-16T22:51:53.609Z" }, - { url = "https://files.pythonhosted.org/packages/74/91/f7f0295151407ddc9ba34e699013c32c3c91944f9b35fcf9281163dc1468/numpy-2.3.5-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:c804e3a5aba5460c73955c955bdbd5c08c354954e9270a2c1565f62e866bdc39", size = 5210042, upload-time = "2025-11-16T22:51:56.213Z" }, - { url = "https://files.pythonhosted.org/packages/2e/3b/78aebf345104ec50dd50a4d06ddeb46a9ff5261c33bcc58b1c4f12f85ec2/numpy-2.3.5-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:cc0a57f895b96ec78969c34f682c602bf8da1a0270b09bc65673df2e7638ec20", size = 6724502, upload-time = "2025-11-16T22:51:58.584Z" }, - { url = "https://files.pythonhosted.org/packages/02/c6/7c34b528740512e57ef1b7c8337ab0b4f0bddf34c723b8996c675bc2bc91/numpy-2.3.5-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:900218e456384ea676e24ea6a0417f030a3b07306d29d7ad843957b40a9d8d52", size = 14308962, upload-time = "2025-11-16T22:52:01.698Z" }, - { url = "https://files.pythonhosted.org/packages/80/35/09d433c5262bc32d725bafc619e095b6a6651caf94027a03da624146f655/numpy-2.3.5-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:09a1bea522b25109bf8e6f3027bd810f7c1085c64a0c7ce050c1676ad0ba010b", size = 16655054, upload-time = "2025-11-16T22:52:04.267Z" }, - { url = "https://files.pythonhosted.org/packages/7a/ab/6a7b259703c09a88804fa2430b43d6457b692378f6b74b356155283566ac/numpy-2.3.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:04822c00b5fd0323c8166d66c701dc31b7fbd252c100acd708c48f763968d6a3", size = 16091613, upload-time = "2025-11-16T22:52:08.651Z" }, - { url = "https://files.pythonhosted.org/packages/c2/88/330da2071e8771e60d1038166ff9d73f29da37b01ec3eb43cb1427464e10/numpy-2.3.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d6889ec4ec662a1a37eb4b4fb26b6100841804dac55bd9df579e326cdc146227", size = 18591147, upload-time = "2025-11-16T22:52:11.453Z" }, - { url = "https://files.pythonhosted.org/packages/51/41/851c4b4082402d9ea860c3626db5d5df47164a712cb23b54be028b184c1c/numpy-2.3.5-cp314-cp314t-win32.whl", hash = "sha256:93eebbcf1aafdf7e2ddd44c2923e2672e1010bddc014138b229e49725b4d6be5", size = 6479806, upload-time = "2025-11-16T22:52:14.641Z" }, - { url = "https://files.pythonhosted.org/packages/90/30/d48bde1dfd93332fa557cff1972fbc039e055a52021fbef4c2c4b1eefd17/numpy-2.3.5-cp314-cp314t-win_amd64.whl", hash = "sha256:c8a9958e88b65c3b27e22ca2a076311636850b612d6bbfb76e8d156aacde2aaf", size = 13105760, upload-time = "2025-11-16T22:52:17.975Z" }, - { url = "https://files.pythonhosted.org/packages/2d/fd/4b5eb0b3e888d86aee4d198c23acec7d214baaf17ea93c1adec94c9518b9/numpy-2.3.5-cp314-cp314t-win_arm64.whl", hash = "sha256:6203fdf9f3dc5bdaed7319ad8698e685c7a3be10819f41d32a0723e611733b42", size = 10545459, upload-time = "2025-11-16T22:52:20.55Z" }, +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a4/7a/6a3d14e205d292b738db449d0de649b373a59edb0d0b4493821d0a3e8718/numpy-2.4.0.tar.gz", hash = "sha256:6e504f7b16118198f138ef31ba24d985b124c2c469fe8467007cf30fd992f934", size = 20685720, upload-time = "2025-12-20T16:18:19.023Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/0d/853fd96372eda07c824d24adf02e8bc92bb3731b43a9b2a39161c3667cc4/numpy-2.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a152d86a3ae00ba5f47b3acf3b827509fd0b6cb7d3259665e63dafbad22a75ea", size = 16649088, upload-time = "2025-12-20T16:16:31.421Z" }, + { url = "https://files.pythonhosted.org/packages/e3/37/cc636f1f2a9f585434e20a3e6e63422f70bfe4f7f6698e941db52ea1ac9a/numpy-2.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:39b19251dec4de8ff8496cd0806cbe27bf0684f765abb1f4809554de93785f2d", size = 12364065, upload-time = "2025-12-20T16:16:33.491Z" }, + { url = "https://files.pythonhosted.org/packages/ed/69/0b78f37ca3690969beee54103ce5f6021709134e8020767e93ba691a72f1/numpy-2.4.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:009bd0ea12d3c784b6639a8457537016ce5172109e585338e11334f6a7bb88ee", size = 5192640, upload-time = "2025-12-20T16:16:35.636Z" }, + { url = "https://files.pythonhosted.org/packages/1d/2a/08569f8252abf590294dbb09a430543ec8f8cc710383abfb3e75cc73aeda/numpy-2.4.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:5fe44e277225fd3dff6882d86d3d447205d43532c3627313d17e754fb3905a0e", size = 6541556, upload-time = "2025-12-20T16:16:37.276Z" }, + { url = "https://files.pythonhosted.org/packages/93/e9/a949885a4e177493d61519377952186b6cbfdf1d6002764c664ba28349b5/numpy-2.4.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f935c4493eda9069851058fa0d9e39dbf6286be690066509305e52912714dbb2", size = 14396562, upload-time = "2025-12-20T16:16:38.953Z" }, + { url = "https://files.pythonhosted.org/packages/99/98/9d4ad53b0e9ef901c2ef1d550d2136f5ac42d3fd2988390a6def32e23e48/numpy-2.4.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8cfa5f29a695cb7438965e6c3e8d06e0416060cf0d709c1b1c1653a939bf5c2a", size = 16351719, upload-time = "2025-12-20T16:16:41.503Z" }, + { url = "https://files.pythonhosted.org/packages/28/de/5f3711a38341d6e8dd619f6353251a0cdd07f3d6d101a8fd46f4ef87f895/numpy-2.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ba0cb30acd3ef11c94dc27fbfba68940652492bc107075e7ffe23057f9425681", size = 16176053, upload-time = "2025-12-20T16:16:44.552Z" }, + { url = "https://files.pythonhosted.org/packages/2a/5b/2a3753dc43916501b4183532e7ace862e13211042bceafa253afb5c71272/numpy-2.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:60e8c196cd82cbbd4f130b5290007e13e6de3eca79f0d4d38014769d96a7c475", size = 18277859, upload-time = "2025-12-20T16:16:47.174Z" }, + { url = "https://files.pythonhosted.org/packages/2c/c5/a18bcdd07a941db3076ef489d036ab16d2bfc2eae0cf27e5a26e29189434/numpy-2.4.0-cp313-cp313-win32.whl", hash = "sha256:5f48cb3e88fbc294dc90e215d86fbaf1c852c63dbdb6c3a3e63f45c4b57f7344", size = 5953849, upload-time = "2025-12-20T16:16:49.554Z" }, + { url = "https://files.pythonhosted.org/packages/4f/f1/719010ff8061da6e8a26e1980cf090412d4f5f8060b31f0c45d77dd67a01/numpy-2.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:a899699294f28f7be8992853c0c60741f16ff199205e2e6cdca155762cbaa59d", size = 12302840, upload-time = "2025-12-20T16:16:51.227Z" }, + { url = "https://files.pythonhosted.org/packages/f5/5a/b3d259083ed8b4d335270c76966cb6cf14a5d1b69e1a608994ac57a659e6/numpy-2.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:9198f447e1dc5647d07c9a6bbe2063cc0132728cc7175b39dbc796da5b54920d", size = 10308509, upload-time = "2025-12-20T16:16:53.313Z" }, + { url = "https://files.pythonhosted.org/packages/31/01/95edcffd1bb6c0633df4e808130545c4f07383ab629ac7e316fb44fff677/numpy-2.4.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74623f2ab5cc3f7c886add4f735d1031a1d2be4a4ae63c0546cfd74e7a31ddf6", size = 12491815, upload-time = "2025-12-20T16:16:55.496Z" }, + { url = "https://files.pythonhosted.org/packages/59/ea/5644b8baa92cc1c7163b4b4458c8679852733fa74ca49c942cfa82ded4e0/numpy-2.4.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:0804a8e4ab070d1d35496e65ffd3cf8114c136a2b81f61dfab0de4b218aacfd5", size = 5320321, upload-time = "2025-12-20T16:16:57.468Z" }, + { url = "https://files.pythonhosted.org/packages/26/4e/e10938106d70bc21319bd6a86ae726da37edc802ce35a3a71ecdf1fdfe7f/numpy-2.4.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:02a2038eb27f9443a8b266a66911e926566b5a6ffd1a689b588f7f35b81e7dc3", size = 6641635, upload-time = "2025-12-20T16:16:59.379Z" }, + { url = "https://files.pythonhosted.org/packages/b3/8d/a8828e3eaf5c0b4ab116924df82f24ce3416fa38d0674d8f708ddc6c8aac/numpy-2.4.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1889b3a3f47a7b5bee16bc25a2145bd7cb91897f815ce3499db64c7458b6d91d", size = 14456053, upload-time = "2025-12-20T16:17:01.768Z" }, + { url = "https://files.pythonhosted.org/packages/68/a1/17d97609d87d4520aa5ae2dcfb32305654550ac6a35effb946d303e594ce/numpy-2.4.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85eef4cb5625c47ee6425c58a3502555e10f45ee973da878ac8248ad58c136f3", size = 16401702, upload-time = "2025-12-20T16:17:04.235Z" }, + { url = "https://files.pythonhosted.org/packages/18/32/0f13c1b2d22bea1118356b8b963195446f3af124ed7a5adfa8fdecb1b6ca/numpy-2.4.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6dc8b7e2f4eb184b37655195f421836cfae6f58197b67e3ffc501f1333d993fa", size = 16242493, upload-time = "2025-12-20T16:17:06.856Z" }, + { url = "https://files.pythonhosted.org/packages/ae/23/48f21e3d309fbc137c068a1475358cbd3a901b3987dcfc97a029ab3068e2/numpy-2.4.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:44aba2f0cafd287871a495fb3163408b0bd25bbce135c6f621534a07f4f7875c", size = 18324222, upload-time = "2025-12-20T16:17:09.392Z" }, + { url = "https://files.pythonhosted.org/packages/ac/52/41f3d71296a3dcaa4f456aaa3c6fc8e745b43d0552b6bde56571bb4b4a0f/numpy-2.4.0-cp313-cp313t-win32.whl", hash = "sha256:20c115517513831860c573996e395707aa9fb691eb179200125c250e895fcd93", size = 6076216, upload-time = "2025-12-20T16:17:11.437Z" }, + { url = "https://files.pythonhosted.org/packages/35/ff/46fbfe60ab0710d2a2b16995f708750307d30eccbb4c38371ea9e986866e/numpy-2.4.0-cp313-cp313t-win_amd64.whl", hash = "sha256:b48e35f4ab6f6a7597c46e301126ceba4c44cd3280e3750f85db48b082624fa4", size = 12444263, upload-time = "2025-12-20T16:17:13.182Z" }, + { url = "https://files.pythonhosted.org/packages/a3/e3/9189ab319c01d2ed556c932ccf55064c5d75bb5850d1df7a482ce0badead/numpy-2.4.0-cp313-cp313t-win_arm64.whl", hash = "sha256:4d1cfce39e511069b11e67cd0bd78ceff31443b7c9e5c04db73c7a19f572967c", size = 10378265, upload-time = "2025-12-20T16:17:15.211Z" }, + { url = "https://files.pythonhosted.org/packages/ab/ed/52eac27de39d5e5a6c9aadabe672bc06f55e24a3d9010cd1183948055d76/numpy-2.4.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c95eb6db2884917d86cde0b4d4cf31adf485c8ec36bf8696dd66fa70de96f36b", size = 16647476, upload-time = "2025-12-20T16:17:17.671Z" }, + { url = "https://files.pythonhosted.org/packages/77/c0/990ce1b7fcd4e09aeaa574e2a0a839589e4b08b2ca68070f1acb1fea6736/numpy-2.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:65167da969cd1ec3a1df31cb221ca3a19a8aaa25370ecb17d428415e93c1935e", size = 12374563, upload-time = "2025-12-20T16:17:20.216Z" }, + { url = "https://files.pythonhosted.org/packages/37/7c/8c5e389c6ae8f5fd2277a988600d79e9625db3fff011a2d87ac80b881a4c/numpy-2.4.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:3de19cfecd1465d0dcf8a5b5ea8b3155b42ed0b639dba4b71e323d74f2a3be5e", size = 5203107, upload-time = "2025-12-20T16:17:22.47Z" }, + { url = "https://files.pythonhosted.org/packages/e6/94/ca5b3bd6a8a70a5eec9a0b8dd7f980c1eff4b8a54970a9a7fef248ef564f/numpy-2.4.0-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:6c05483c3136ac4c91b4e81903cb53a8707d316f488124d0398499a4f8e8ef51", size = 6538067, upload-time = "2025-12-20T16:17:24.001Z" }, + { url = "https://files.pythonhosted.org/packages/79/43/993eb7bb5be6761dde2b3a3a594d689cec83398e3f58f4758010f3b85727/numpy-2.4.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:36667db4d6c1cea79c8930ab72fadfb4060feb4bfe724141cd4bd064d2e5f8ce", size = 14411926, upload-time = "2025-12-20T16:17:25.822Z" }, + { url = "https://files.pythonhosted.org/packages/03/75/d4c43b61de473912496317a854dac54f1efec3eeb158438da6884b70bb90/numpy-2.4.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9a818668b674047fd88c4cddada7ab8f1c298812783e8328e956b78dc4807f9f", size = 16354295, upload-time = "2025-12-20T16:17:28.308Z" }, + { url = "https://files.pythonhosted.org/packages/b8/0a/b54615b47ee8736a6461a4bb6749128dd3435c5a759d5663f11f0e9af4ac/numpy-2.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1ee32359fb7543b7b7bd0b2f46294db27e29e7bbdf70541e81b190836cd83ded", size = 16190242, upload-time = "2025-12-20T16:17:30.993Z" }, + { url = "https://files.pythonhosted.org/packages/98/ce/ea207769aacad6246525ec6c6bbd66a2bf56c72443dc10e2f90feed29290/numpy-2.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e493962256a38f58283de033d8af176c5c91c084ea30f15834f7545451c42059", size = 18280875, upload-time = "2025-12-20T16:17:33.327Z" }, + { url = "https://files.pythonhosted.org/packages/17/ef/ec409437aa962ea372ed601c519a2b141701683ff028f894b7466f0ab42b/numpy-2.4.0-cp314-cp314-win32.whl", hash = "sha256:6bbaebf0d11567fa8926215ae731e1d58e6ec28a8a25235b8a47405d301332db", size = 6002530, upload-time = "2025-12-20T16:17:35.729Z" }, + { url = "https://files.pythonhosted.org/packages/5f/4a/5cb94c787a3ed1ac65e1271b968686521169a7b3ec0b6544bb3ca32960b0/numpy-2.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:3d857f55e7fdf7c38ab96c4558c95b97d1c685be6b05c249f5fdafcbd6f9899e", size = 12435890, upload-time = "2025-12-20T16:17:37.599Z" }, + { url = "https://files.pythonhosted.org/packages/48/a0/04b89db963af9de1104975e2544f30de89adbf75b9e75f7dd2599be12c79/numpy-2.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:bb50ce5fb202a26fd5404620e7ef820ad1ab3558b444cb0b55beb7ef66cd2d63", size = 10591892, upload-time = "2025-12-20T16:17:39.649Z" }, + { url = "https://files.pythonhosted.org/packages/53/e5/d74b5ccf6712c06c7a545025a6a71bfa03bdc7e0568b405b0d655232fd92/numpy-2.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:355354388cba60f2132df297e2d53053d4063f79077b67b481d21276d61fc4df", size = 12494312, upload-time = "2025-12-20T16:17:41.714Z" }, + { url = "https://files.pythonhosted.org/packages/c2/08/3ca9cc2ddf54dfee7ae9a6479c071092a228c68aef08252aa08dac2af002/numpy-2.4.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:1d8f9fde5f6dc1b6fc34df8162f3b3079365468703fee7f31d4e0cc8c63baed9", size = 5322862, upload-time = "2025-12-20T16:17:44.145Z" }, + { url = "https://files.pythonhosted.org/packages/87/74/0bb63a68394c0c1e52670cfff2e309afa41edbe11b3327d9af29e4383f34/numpy-2.4.0-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:e0434aa22c821f44eeb4c650b81c7fbdd8c0122c6c4b5a576a76d5a35625ecd9", size = 6644986, upload-time = "2025-12-20T16:17:46.203Z" }, + { url = "https://files.pythonhosted.org/packages/06/8f/9264d9bdbcf8236af2823623fe2f3981d740fc3461e2787e231d97c38c28/numpy-2.4.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:40483b2f2d3ba7aad426443767ff5632ec3156ef09742b96913787d13c336471", size = 14457958, upload-time = "2025-12-20T16:17:48.017Z" }, + { url = "https://files.pythonhosted.org/packages/8c/d9/f9a69ae564bbc7236a35aa883319364ef5fd41f72aa320cc1cbe66148fe2/numpy-2.4.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9e6a7664ddd9746e20b7325351fe1a8408d0a2bf9c63b5e898290ddc8f09544", size = 16398394, upload-time = "2025-12-20T16:17:50.409Z" }, + { url = "https://files.pythonhosted.org/packages/34/c7/39241501408dde7f885d241a98caba5421061a2c6d2b2197ac5e3aa842d8/numpy-2.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ecb0019d44f4cdb50b676c5d0cb4b1eae8e15d1ed3d3e6639f986fc92b2ec52c", size = 16241044, upload-time = "2025-12-20T16:17:52.661Z" }, + { url = "https://files.pythonhosted.org/packages/7c/95/cae7effd90e065a95e59fe710eeee05d7328ed169776dfdd9f789e032125/numpy-2.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d0ffd9e2e4441c96a9c91ec1783285d80bf835b677853fc2770a89d50c1e48ac", size = 18321772, upload-time = "2025-12-20T16:17:54.947Z" }, + { url = "https://files.pythonhosted.org/packages/96/df/3c6c279accd2bfb968a76298e5b276310bd55d243df4fa8ac5816d79347d/numpy-2.4.0-cp314-cp314t-win32.whl", hash = "sha256:77f0d13fa87036d7553bf81f0e1fe3ce68d14c9976c9851744e4d3e91127e95f", size = 6148320, upload-time = "2025-12-20T16:17:57.249Z" }, + { url = "https://files.pythonhosted.org/packages/92/8d/f23033cce252e7a75cae853d17f582e86534c46404dea1c8ee094a9d6d84/numpy-2.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b1f5b45829ac1848893f0ddf5cb326110604d6df96cdc255b0bf9edd154104d4", size = 12623460, upload-time = "2025-12-20T16:17:58.963Z" }, + { url = "https://files.pythonhosted.org/packages/a4/4f/1f8475907d1a7c4ef9020edf7f39ea2422ec896849245f00688e4b268a71/numpy-2.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:23a3e9d1a6f360267e8fbb38ba5db355a6a7e9be71d7fce7ab3125e88bb646c8", size = 10661799, upload-time = "2025-12-20T16:18:01.078Z" }, ] [[package]] @@ -2646,7 +2644,7 @@ wheels = [ [[package]] name = "openai" -version = "2.9.0" +version = "2.14.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -2658,9 +2656,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/09/48/516290f38745cc1e72856f50e8afed4a7f9ac396a5a18f39e892ab89dfc2/openai-2.9.0.tar.gz", hash = "sha256:b52ec65727fc8f1eed2fbc86c8eac0998900c7ef63aa2eb5c24b69717c56fa5f", size = 608202, upload-time = "2025-12-04T18:15:09.01Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/b1/12fe1c196bea326261718eb037307c1c1fe1dedc2d2d4de777df822e6238/openai-2.14.0.tar.gz", hash = "sha256:419357bedde9402d23bf8f2ee372fca1985a73348debba94bddff06f19459952", size = 626938, upload-time = "2025-12-19T03:28:45.742Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/59/fd/ae2da789cd923dd033c99b8d544071a827c92046b150db01cfa5cea5b3fd/openai-2.9.0-py3-none-any.whl", hash = "sha256:0d168a490fbb45630ad508a6f3022013c155a68fd708069b6a1a01a5e8f0ffad", size = 1030836, upload-time = "2025-12-04T18:15:07.063Z" }, + { url = "https://files.pythonhosted.org/packages/27/4b/7c1a00c2c3fbd004253937f7520f692a9650767aa73894d7a34f0d65d3f4/openai-2.14.0-py3-none-any.whl", hash = "sha256:7ea40aca4ffc4c4a776e77679021b47eec1160e341f42ae086ba949c9dcc9183", size = 1067558, upload-time = "2025-12-19T03:28:43.727Z" }, ] [[package]] @@ -3249,24 +3247,24 @@ crypto = [ [[package]] name = "pymdown-extensions" -version = "10.18" +version = "10.19.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markdown" }, { name = "pyyaml" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d4/95/e4fa281e3f13b3d9c4aaebb21ef44879840325fa418276dd921209a5e9f9/pymdown_extensions-10.18.tar.gz", hash = "sha256:20252abe6367354b24191431617a072ee6be9f68c5afcc74ea5573508a61f9e5", size = 847697, upload-time = "2025-12-07T17:22:12.857Z" } +sdist = { url = "https://files.pythonhosted.org/packages/72/2d/9f30cee56d4d6d222430d401e85b0a6a1ae229819362f5786943d1a8c03b/pymdown_extensions-10.19.1.tar.gz", hash = "sha256:4969c691009a389fb1f9712dd8e7bd70dcc418d15a0faf70acb5117d022f7de8", size = 847839, upload-time = "2025-12-14T17:25:24.42Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/46/a4/aa2bada4a2fd648f40f19affa55d2c01dc7ff5ea9cffd3dfdeb6114951db/pymdown_extensions-10.18-py3-none-any.whl", hash = "sha256:090bca72be43f7d3186374e23c782899dbef9dc153ef24c59dcd3c346f9ffcae", size = 266703, upload-time = "2025-12-07T17:22:11.22Z" }, + { url = "https://files.pythonhosted.org/packages/fb/35/b763e8fbcd51968329b9adc52d188fc97859f85f2ee15fe9f379987d99c5/pymdown_extensions-10.19.1-py3-none-any.whl", hash = "sha256:e8698a66055b1dc0dca2a7f2c9d0ea6f5faa7834a9c432e3535ab96c0c4e509b", size = 266693, upload-time = "2025-12-14T17:25:22.999Z" }, ] [[package]] name = "pyparsing" -version = "3.2.5" +version = "3.3.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f2/a5/181488fc2b9d093e3972d2a472855aae8a03f000592dbfce716a512b3359/pyparsing-3.2.5.tar.gz", hash = "sha256:2df8d5b7b2802ef88e8d016a2eb9c7aeaa923529cd251ed0fe4608275d4105b6", size = 1099274, upload-time = "2025-09-21T04:11:06.277Z" } +sdist = { url = "https://files.pythonhosted.org/packages/33/c1/1d9de9aeaa1b89b0186e5fe23294ff6517fce1bc69149185577cd31016b2/pyparsing-3.3.1.tar.gz", hash = "sha256:47fad0f17ac1e2cad3de3b458570fbc9b03560aa029ed5e16ee5554da9a2251c", size = 1550512, upload-time = "2025-12-23T03:14:04.391Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/10/5e/1aa9a93198c6b64513c9d7752de7422c06402de6600a8767da1524f9570b/pyparsing-3.2.5-py3-none-any.whl", hash = "sha256:e38a4f02064cf41fe6593d328d0512495ad1f3d8a91c4f73fc401b3079a59a5e", size = 113890, upload-time = "2025-09-21T04:11:04.117Z" }, + { url = "https://files.pythonhosted.org/packages/8b/40/2614036cdd416452f5bf98ec037f38a1afb17f327cb8e6b652d4729e0af8/pyparsing-3.3.1-py3-none-any.whl", hash = "sha256:023b5e7e5520ad96642e2c6db4cb683d3970bd640cdf7115049a6e9c3682df82", size = 121793, upload-time = "2025-12-23T03:14:02.103Z" }, ] [[package]] @@ -3408,11 +3406,11 @@ wheels = [ [[package]] name = "python-multipart" -version = "0.0.20" +version = "0.0.21" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f3/87/f44d7c9f274c7ee665a29b885ec97089ec5dc034c7f3fafa03da9e39a09e/python_multipart-0.0.20.tar.gz", hash = "sha256:8dd0cab45b8e23064ae09147625994d090fa46f5b0d1e13af944c331a7fa9d13", size = 37158, upload-time = "2024-12-16T19:45:46.972Z" } +sdist = { url = "https://files.pythonhosted.org/packages/78/96/804520d0850c7db98e5ccb70282e29208723f0964e88ffd9d0da2f52ea09/python_multipart-0.0.21.tar.gz", hash = "sha256:7137ebd4d3bbf70ea1622998f902b97a29434a9e8dc40eb203bbcf7c2a2cba92", size = 37196, upload-time = "2025-12-17T09:24:22.446Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/45/58/38b5afbc1a800eeea951b9285d3912613f2603bdf897a4ab0f4bd7f405fc/python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104", size = 24546, upload-time = "2024-12-16T19:45:44.423Z" }, + { url = "https://files.pythonhosted.org/packages/aa/76/03af049af4dcee5d27442f71b6924f01f3efb5d2bd34f23fcd563f2cc5f5/python_multipart-0.0.21-py3-none-any.whl", hash = "sha256:cf7a6713e01c87aa35387f4774e812c4361150938d20d232800f75ffcf266090", size = 24541, upload-time = "2025-12-17T09:24:21.153Z" }, ] [[package]] @@ -3725,11 +3723,11 @@ wheels = [ [[package]] name = "roman-numerals" -version = "3.1.0" +version = "4.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/57/5b/1bcda2c6a8acec5b310dd70f732400827b96f05d815834f0f112b91b3539/roman_numerals-3.1.0.tar.gz", hash = "sha256:384e36fc1e8d4bd361bdb3672841faae7a345b3f708aae9895d074c878332551", size = 9069, upload-time = "2025-03-12T00:41:08.837Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ae/f9/41dc953bbeb056c17d5f7a519f50fdf010bd0553be2d630bc69d1e022703/roman_numerals-4.1.0.tar.gz", hash = "sha256:1af8b147eb1405d5839e78aeb93131690495fe9da5c91856cb33ad55a7f1e5b2", size = 9077, upload-time = "2025-12-17T18:25:34.381Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/82/1d/7356f115a0e5faf8dc59894a3e9fc8b1821ab949163458b0072db0a12a68/roman_numerals-3.1.0-py3-none-any.whl", hash = "sha256:842ae5fd12912d62720c9aad8cab706e8c692556d01a38443e051ee6cc158d90", size = 7709, upload-time = "2025-03-12T00:41:07.626Z" }, + { url = "https://files.pythonhosted.org/packages/04/54/6f679c435d28e0a568d8e8a7c0a93a09010818634c3c3907fc98d8983770/roman_numerals-4.1.0-py3-none-any.whl", hash = "sha256:647ba99caddc2cc1e55a51e4360689115551bf4476d90e8162cf8c345fe233c7", size = 7676, upload-time = "2025-12-17T18:25:33.098Z" }, ] [[package]] @@ -3800,28 +3798,28 @@ wheels = [ [[package]] name = "ruff" -version = "0.14.8" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ed/d9/f7a0c4b3a2bf2556cd5d99b05372c29980249ef71e8e32669ba77428c82c/ruff-0.14.8.tar.gz", hash = "sha256:774ed0dd87d6ce925e3b8496feb3a00ac564bea52b9feb551ecd17e0a23d1eed", size = 5765385, upload-time = "2025-12-04T15:06:17.669Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/48/b8/9537b52010134b1d2b72870cc3f92d5fb759394094741b09ceccae183fbe/ruff-0.14.8-py3-none-linux_armv6l.whl", hash = "sha256:ec071e9c82eca417f6111fd39f7043acb53cd3fde9b1f95bbed745962e345afb", size = 13441540, upload-time = "2025-12-04T15:06:14.896Z" }, - { url = "https://files.pythonhosted.org/packages/24/00/99031684efb025829713682012b6dd37279b1f695ed1b01725f85fd94b38/ruff-0.14.8-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:8cdb162a7159f4ca36ce980a18c43d8f036966e7f73f866ac8f493b75e0c27e9", size = 13669384, upload-time = "2025-12-04T15:06:51.809Z" }, - { url = "https://files.pythonhosted.org/packages/72/64/3eb5949169fc19c50c04f28ece2c189d3b6edd57e5b533649dae6ca484fe/ruff-0.14.8-py3-none-macosx_11_0_arm64.whl", hash = "sha256:2e2fcbefe91f9fad0916850edf0854530c15bd1926b6b779de47e9ab619ea38f", size = 12806917, upload-time = "2025-12-04T15:06:08.925Z" }, - { url = "https://files.pythonhosted.org/packages/c4/08/5250babb0b1b11910f470370ec0cbc67470231f7cdc033cee57d4976f941/ruff-0.14.8-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9d70721066a296f45786ec31916dc287b44040f553da21564de0ab4d45a869b", size = 13256112, upload-time = "2025-12-04T15:06:23.498Z" }, - { url = "https://files.pythonhosted.org/packages/78/4c/6c588e97a8e8c2d4b522c31a579e1df2b4d003eddfbe23d1f262b1a431ff/ruff-0.14.8-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2c87e09b3cd9d126fc67a9ecd3b5b1d3ded2b9c7fce3f16e315346b9d05cfb52", size = 13227559, upload-time = "2025-12-04T15:06:33.432Z" }, - { url = "https://files.pythonhosted.org/packages/23/ce/5f78cea13eda8eceac71b5f6fa6e9223df9b87bb2c1891c166d1f0dce9f1/ruff-0.14.8-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d62cb310c4fbcb9ee4ac023fe17f984ae1e12b8a4a02e3d21489f9a2a5f730c", size = 13896379, upload-time = "2025-12-04T15:06:02.687Z" }, - { url = "https://files.pythonhosted.org/packages/cf/79/13de4517c4dadce9218a20035b21212a4c180e009507731f0d3b3f5df85a/ruff-0.14.8-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:1af35c2d62633d4da0521178e8a2641c636d2a7153da0bac1b30cfd4ccd91344", size = 15372786, upload-time = "2025-12-04T15:06:29.828Z" }, - { url = "https://files.pythonhosted.org/packages/00/06/33df72b3bb42be8a1c3815fd4fae83fa2945fc725a25d87ba3e42d1cc108/ruff-0.14.8-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:25add4575ffecc53d60eed3f24b1e934493631b48ebbc6ebaf9d8517924aca4b", size = 14990029, upload-time = "2025-12-04T15:06:36.812Z" }, - { url = "https://files.pythonhosted.org/packages/64/61/0f34927bd90925880394de0e081ce1afab66d7b3525336f5771dcf0cb46c/ruff-0.14.8-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4c943d847b7f02f7db4201a0600ea7d244d8a404fbb639b439e987edcf2baf9a", size = 14407037, upload-time = "2025-12-04T15:06:39.979Z" }, - { url = "https://files.pythonhosted.org/packages/96/bc/058fe0aefc0fbf0d19614cb6d1a3e2c048f7dc77ca64957f33b12cfdc5ef/ruff-0.14.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb6e8bf7b4f627548daa1b69283dac5a296bfe9ce856703b03130732e20ddfe2", size = 14102390, upload-time = "2025-12-04T15:06:46.372Z" }, - { url = "https://files.pythonhosted.org/packages/af/a4/e4f77b02b804546f4c17e8b37a524c27012dd6ff05855d2243b49a7d3cb9/ruff-0.14.8-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:7aaf2974f378e6b01d1e257c6948207aec6a9b5ba53fab23d0182efb887a0e4a", size = 14230793, upload-time = "2025-12-04T15:06:20.497Z" }, - { url = "https://files.pythonhosted.org/packages/3f/52/bb8c02373f79552e8d087cedaffad76b8892033d2876c2498a2582f09dcf/ruff-0.14.8-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e5758ca513c43ad8a4ef13f0f081f80f08008f410790f3611a21a92421ab045b", size = 13160039, upload-time = "2025-12-04T15:06:49.06Z" }, - { url = "https://files.pythonhosted.org/packages/1f/ad/b69d6962e477842e25c0b11622548df746290cc6d76f9e0f4ed7456c2c31/ruff-0.14.8-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:f74f7ba163b6e85a8d81a590363bf71618847e5078d90827749bfda1d88c9cdf", size = 13205158, upload-time = "2025-12-04T15:06:54.574Z" }, - { url = "https://files.pythonhosted.org/packages/06/63/54f23da1315c0b3dfc1bc03fbc34e10378918a20c0b0f086418734e57e74/ruff-0.14.8-py3-none-musllinux_1_2_i686.whl", hash = "sha256:eed28f6fafcc9591994c42254f5a5c5ca40e69a30721d2ab18bb0bb3baac3ab6", size = 13469550, upload-time = "2025-12-04T15:05:59.209Z" }, - { url = "https://files.pythonhosted.org/packages/70/7d/a4d7b1961e4903bc37fffb7ddcfaa7beb250f67d97cfd1ee1d5cddb1ec90/ruff-0.14.8-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:21d48fa744c9d1cb8d71eb0a740c4dd02751a5de9db9a730a8ef75ca34cf138e", size = 14211332, upload-time = "2025-12-04T15:06:06.027Z" }, - { url = "https://files.pythonhosted.org/packages/5d/93/2a5063341fa17054e5c86582136e9895db773e3c2ffb770dde50a09f35f0/ruff-0.14.8-py3-none-win32.whl", hash = "sha256:15f04cb45c051159baebb0f0037f404f1dc2f15a927418f29730f411a79bc4e7", size = 13151890, upload-time = "2025-12-04T15:06:11.668Z" }, - { url = "https://files.pythonhosted.org/packages/02/1c/65c61a0859c0add13a3e1cbb6024b42de587456a43006ca2d4fd3d1618fe/ruff-0.14.8-py3-none-win_amd64.whl", hash = "sha256:9eeb0b24242b5bbff3011409a739929f497f3fb5fe3b5698aba5e77e8c833097", size = 14537826, upload-time = "2025-12-04T15:06:26.409Z" }, - { url = "https://files.pythonhosted.org/packages/6d/63/8b41cea3afd7f58eb64ac9251668ee0073789a3bc9ac6f816c8c6fef986d/ruff-0.14.8-py3-none-win_arm64.whl", hash = "sha256:965a582c93c63fe715fd3e3f8aa37c4b776777203d8e1d8aa3cc0c14424a4b99", size = 13634522, upload-time = "2025-12-04T15:06:43.212Z" }, +version = "0.14.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/08/52232a877978dd8f9cf2aeddce3e611b40a63287dfca29b6b8da791f5e8d/ruff-0.14.10.tar.gz", hash = "sha256:9a2e830f075d1a42cd28420d7809ace390832a490ed0966fe373ba288e77aaf4", size = 5859763, upload-time = "2025-12-18T19:28:57.98Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/60/01/933704d69f3f05ee16ef11406b78881733c186fe14b6a46b05cfcaf6d3b2/ruff-0.14.10-py3-none-linux_armv6l.whl", hash = "sha256:7a3ce585f2ade3e1f29ec1b92df13e3da262178df8c8bdf876f48fa0e8316c49", size = 13527080, upload-time = "2025-12-18T19:29:25.642Z" }, + { url = "https://files.pythonhosted.org/packages/df/58/a0349197a7dfa603ffb7f5b0470391efa79ddc327c1e29c4851e85b09cc5/ruff-0.14.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:674f9be9372907f7257c51f1d4fc902cb7cf014b9980152b802794317941f08f", size = 13797320, upload-time = "2025-12-18T19:29:02.571Z" }, + { url = "https://files.pythonhosted.org/packages/7b/82/36be59f00a6082e38c23536df4e71cdbc6af8d7c707eade97fcad5c98235/ruff-0.14.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d85713d522348837ef9df8efca33ccb8bd6fcfc86a2cde3ccb4bc9d28a18003d", size = 12918434, upload-time = "2025-12-18T19:28:51.202Z" }, + { url = "https://files.pythonhosted.org/packages/a6/00/45c62a7f7e34da92a25804f813ebe05c88aa9e0c25e5cb5a7d23dd7450e3/ruff-0.14.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6987ebe0501ae4f4308d7d24e2d0fe3d7a98430f5adfd0f1fead050a740a3a77", size = 13371961, upload-time = "2025-12-18T19:29:04.991Z" }, + { url = "https://files.pythonhosted.org/packages/40/31/a5906d60f0405f7e57045a70f2d57084a93ca7425f22e1d66904769d1628/ruff-0.14.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:16a01dfb7b9e4eee556fbfd5392806b1b8550c9b4a9f6acd3dbe6812b193c70a", size = 13275629, upload-time = "2025-12-18T19:29:21.381Z" }, + { url = "https://files.pythonhosted.org/packages/3e/60/61c0087df21894cf9d928dc04bcd4fb10e8b2e8dca7b1a276ba2155b2002/ruff-0.14.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7165d31a925b7a294465fa81be8c12a0e9b60fb02bf177e79067c867e71f8b1f", size = 14029234, upload-time = "2025-12-18T19:29:00.132Z" }, + { url = "https://files.pythonhosted.org/packages/44/84/77d911bee3b92348b6e5dab5a0c898d87084ea03ac5dc708f46d88407def/ruff-0.14.10-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:c561695675b972effb0c0a45db233f2c816ff3da8dcfbe7dfc7eed625f218935", size = 15449890, upload-time = "2025-12-18T19:28:53.573Z" }, + { url = "https://files.pythonhosted.org/packages/e9/36/480206eaefa24a7ec321582dda580443a8f0671fdbf6b1c80e9c3e93a16a/ruff-0.14.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4bb98fcbbc61725968893682fd4df8966a34611239c9fd07a1f6a07e7103d08e", size = 15123172, upload-time = "2025-12-18T19:29:23.453Z" }, + { url = "https://files.pythonhosted.org/packages/5c/38/68e414156015ba80cef5473d57919d27dfb62ec804b96180bafdeaf0e090/ruff-0.14.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f24b47993a9d8cb858429e97bdf8544c78029f09b520af615c1d261bf827001d", size = 14460260, upload-time = "2025-12-18T19:29:27.808Z" }, + { url = "https://files.pythonhosted.org/packages/b3/19/9e050c0dca8aba824d67cc0db69fb459c28d8cd3f6855b1405b3f29cc91d/ruff-0.14.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59aabd2e2c4fd614d2862e7939c34a532c04f1084476d6833dddef4afab87e9f", size = 14229978, upload-time = "2025-12-18T19:29:11.32Z" }, + { url = "https://files.pythonhosted.org/packages/51/eb/e8dd1dd6e05b9e695aa9dd420f4577debdd0f87a5ff2fedda33c09e9be8c/ruff-0.14.10-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:213db2b2e44be8625002dbea33bb9c60c66ea2c07c084a00d55732689d697a7f", size = 14338036, upload-time = "2025-12-18T19:29:09.184Z" }, + { url = "https://files.pythonhosted.org/packages/6a/12/f3e3a505db7c19303b70af370d137795fcfec136d670d5de5391e295c134/ruff-0.14.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b914c40ab64865a17a9a5b67911d14df72346a634527240039eb3bd650e5979d", size = 13264051, upload-time = "2025-12-18T19:29:13.431Z" }, + { url = "https://files.pythonhosted.org/packages/08/64/8c3a47eaccfef8ac20e0484e68e0772013eb85802f8a9f7603ca751eb166/ruff-0.14.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:1484983559f026788e3a5c07c81ef7d1e97c1c78ed03041a18f75df104c45405", size = 13283998, upload-time = "2025-12-18T19:29:06.994Z" }, + { url = "https://files.pythonhosted.org/packages/12/84/534a5506f4074e5cc0529e5cd96cfc01bb480e460c7edf5af70d2bcae55e/ruff-0.14.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:c70427132db492d25f982fffc8d6c7535cc2fd2c83fc8888f05caaa248521e60", size = 13601891, upload-time = "2025-12-18T19:28:55.811Z" }, + { url = "https://files.pythonhosted.org/packages/0d/1e/14c916087d8598917dbad9b2921d340f7884824ad6e9c55de948a93b106d/ruff-0.14.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5bcf45b681e9f1ee6445d317ce1fa9d6cba9a6049542d1c3d5b5958986be8830", size = 14336660, upload-time = "2025-12-18T19:29:16.531Z" }, + { url = "https://files.pythonhosted.org/packages/f2/1c/d7b67ab43f30013b47c12b42d1acd354c195351a3f7a1d67f59e54227ede/ruff-0.14.10-py3-none-win32.whl", hash = "sha256:104c49fc7ab73f3f3a758039adea978869a918f31b73280db175b43a2d9b51d6", size = 13196187, upload-time = "2025-12-18T19:29:19.006Z" }, + { url = "https://files.pythonhosted.org/packages/fb/9c/896c862e13886fae2af961bef3e6312db9ebc6adc2b156fe95e615dee8c1/ruff-0.14.10-py3-none-win_amd64.whl", hash = "sha256:466297bd73638c6bdf06485683e812db1c00c7ac96d4ddd0294a338c62fdc154", size = 14661283, upload-time = "2025-12-18T19:29:30.16Z" }, + { url = "https://files.pythonhosted.org/packages/74/31/b0e29d572670dca3674eeee78e418f20bdf97fa8aa9ea71380885e175ca0/ruff-0.14.10-py3-none-win_arm64.whl", hash = "sha256:e51d046cf6dda98a4633b8a8a771451107413b0f07183b2bef03f075599e44e6", size = 13729839, upload-time = "2025-12-18T19:28:48.636Z" }, ] [[package]] @@ -3925,11 +3923,11 @@ wheels = [ [[package]] name = "soupsieve" -version = "2.8" +version = "2.8.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6d/e6/21ccce3262dd4889aa3332e5a119a3491a95e8f60939870a3a035aabac0d/soupsieve-2.8.tar.gz", hash = "sha256:e2dd4a40a628cb5f28f6d4b0db8800b8f581b65bb380b97de22ba5ca8d72572f", size = 103472, upload-time = "2025-08-27T15:39:51.78Z" } +sdist = { url = "https://files.pythonhosted.org/packages/89/23/adf3796d740536d63a6fbda113d07e60c734b6ed5d3058d1e47fc0495e47/soupsieve-2.8.1.tar.gz", hash = "sha256:4cf733bc50fa805f5df4b8ef4740fc0e0fa6218cf3006269afd3f9d6d80fd350", size = 117856, upload-time = "2025-12-18T13:50:34.655Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/14/a0/bb38d3b76b8cae341dad93a2dd83ab7462e6dbcdd84d43f54ee60a8dc167/soupsieve-2.8-py3-none-any.whl", hash = "sha256:0cc76456a30e20f5d7f2e14a98a4ae2ee4e5abdc7c5ea0aafe795f344bc7984c", size = 36679, upload-time = "2025-08-27T15:39:50.179Z" }, + { url = "https://files.pythonhosted.org/packages/48/f3/b67d6ea49ca9154453b6d70b34ea22f3996b9fa55da105a79d8732227adc/soupsieve-2.8.1-py3-none-any.whl", hash = "sha256:a11fe2a6f3d76ab3cf2de04eb339c1be5b506a8a47f2ceb6d139803177f85434", size = 36710, upload-time = "2025-12-18T13:50:33.267Z" }, ] [[package]] @@ -4085,14 +4083,15 @@ wheels = [ [[package]] name = "sse-starlette" -version = "3.0.3" +version = "3.0.4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, + { name = "starlette" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/db/3c/fa6517610dc641262b77cc7bf994ecd17465812c1b0585fe33e11be758ab/sse_starlette-3.0.3.tar.gz", hash = "sha256:88cfb08747e16200ea990c8ca876b03910a23b547ab3bd764c0d8eb81019b971", size = 21943, upload-time = "2025-10-30T18:44:20.117Z" } +sdist = { url = "https://files.pythonhosted.org/packages/17/8b/54651ad49bce99a50fd61a7f19c2b6a79fbb072e693101fbb1194c362054/sse_starlette-3.0.4.tar.gz", hash = "sha256:5e34286862e96ead0eb70f5ddd0bd21ab1f6473a8f44419dd267f431611383dd", size = 22576, upload-time = "2025-12-14T16:22:52.493Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/23/a0/984525d19ca5c8a6c33911a0c164b11490dd0f90ff7fd689f704f84e9a11/sse_starlette-3.0.3-py3-none-any.whl", hash = "sha256:af5bf5a6f3933df1d9c7f8539633dc8444ca6a97ab2e2a7cd3b6e431ac03a431", size = 11765, upload-time = "2025-10-30T18:44:18.834Z" }, + { url = "https://files.pythonhosted.org/packages/71/22/8ab1066358601163e1ac732837adba3672f703818f693e179b24e0d3b65c/sse_starlette-3.0.4-py3-none-any.whl", hash = "sha256:32c80ef0d04506ced4b0b6ab8fe300925edc37d26f666afb1874c754895f5dc3", size = 11764, upload-time = "2025-12-14T16:22:51.453Z" }, ] [[package]] @@ -4222,21 +4221,21 @@ wheels = [ [[package]] name = "tornado" -version = "6.5.3" +version = "6.5.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7f/2e/3d22d478f27cb4b41edd4db7f10cd7846d0a28ea443342de3dba97035166/tornado-6.5.3.tar.gz", hash = "sha256:16abdeb0211796ffc73765bc0a20119712d68afeeaf93d1a3f2edf6b3aee8d5a", size = 513348, upload-time = "2025-12-11T04:16:42.225Z" } +sdist = { url = "https://files.pythonhosted.org/packages/37/1d/0a336abf618272d53f62ebe274f712e213f5a03c0b2339575430b8362ef2/tornado-6.5.4.tar.gz", hash = "sha256:a22fa9047405d03260b483980635f0b041989d8bcc9a313f8fe18b411d84b1d7", size = 513632, upload-time = "2025-12-15T19:21:03.836Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d3/e9/bf22f66e1d5d112c0617974b5ce86666683b32c09b355dfcd59f8d5c8ef6/tornado-6.5.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:2dd7d7e8d3e4635447a8afd4987951e3d4e8d1fb9ad1908c54c4002aabab0520", size = 443860, upload-time = "2025-12-11T04:16:26.638Z" }, - { url = "https://files.pythonhosted.org/packages/ca/9c/594b631f0b8dc5977080c7093d1e96f1377c10552577d2c31bb0208c9362/tornado-6.5.3-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:5977a396f83496657779f59a48c38096ef01edfe4f42f1c0634b791dde8165d0", size = 442118, upload-time = "2025-12-11T04:16:28.32Z" }, - { url = "https://files.pythonhosted.org/packages/78/f6/685b869f5b5b9d9547571be838c6106172082751696355b60fc32a4988ed/tornado-6.5.3-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f72ac800be2ac73ddc1504f7aa21069a4137e8d70c387172c063d363d04f2208", size = 445700, upload-time = "2025-12-11T04:16:29.64Z" }, - { url = "https://files.pythonhosted.org/packages/91/4c/f0d19edf24912b7f21ae5e941f7798d132ad4d9b71441c1e70917a297265/tornado-6.5.3-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c43c4fc4f5419c6561cfb8b884a8f6db7b142787d47821e1a0e1296253458265", size = 445041, upload-time = "2025-12-11T04:16:30.799Z" }, - { url = "https://files.pythonhosted.org/packages/eb/2b/e02da94f4a4aef2bb3b923c838ef284a77548a5f06bac2a8682b36b4eead/tornado-6.5.3-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de8b3fed4b3afb65d542d7702ac8767b567e240f6a43020be8eaef59328f117b", size = 445270, upload-time = "2025-12-11T04:16:32.316Z" }, - { url = "https://files.pythonhosted.org/packages/58/e2/7a7535d23133443552719dba526dacbb7415f980157da9f14950ddb88ad6/tornado-6.5.3-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:dbc4b4c32245b952566e17a20d5c1648fbed0e16aec3fc7e19f3974b36e0e47c", size = 445957, upload-time = "2025-12-11T04:16:33.913Z" }, - { url = "https://files.pythonhosted.org/packages/a0/1f/9ff92eca81ff17a86286ec440dcd5eab0400326eb81761aa9a4eecb1ffb9/tornado-6.5.3-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:db238e8a174b4bfd0d0238b8cfcff1c14aebb4e2fcdafbf0ea5da3b81caceb4c", size = 445371, upload-time = "2025-12-11T04:16:35.093Z" }, - { url = "https://files.pythonhosted.org/packages/70/b1/1d03ae4526a393b0b839472a844397337f03c7f3a1e6b5c82241f0e18281/tornado-6.5.3-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:892595c100cd9b53a768cbfc109dfc55dec884afe2de5290611a566078d9692d", size = 445348, upload-time = "2025-12-11T04:16:36.679Z" }, - { url = "https://files.pythonhosted.org/packages/4b/7d/7c181feadc8941f418d0d26c3790ee34ffa4bd0a294bc5201d44ebd19c1e/tornado-6.5.3-cp39-abi3-win32.whl", hash = "sha256:88141456525fe291e47bbe1ba3ffb7982549329f09b4299a56813923af2bd197", size = 446433, upload-time = "2025-12-11T04:16:38.332Z" }, - { url = "https://files.pythonhosted.org/packages/34/98/4f7f938606e21d0baea8c6c39a7c8e95bdf8e50b0595b1bb6f0de2af7a6e/tornado-6.5.3-cp39-abi3-win_amd64.whl", hash = "sha256:ba4b513d221cc7f795a532c1e296f36bcf6a60e54b15efd3f092889458c69af1", size = 446842, upload-time = "2025-12-11T04:16:39.867Z" }, - { url = "https://files.pythonhosted.org/packages/7a/27/0e3fca4c4edf33fb6ee079e784c63961cd816971a45e5e4cacebe794158d/tornado-6.5.3-cp39-abi3-win_arm64.whl", hash = "sha256:278c54d262911365075dd45e0b6314308c74badd6ff9a54490e7daccdd5ed0ea", size = 445863, upload-time = "2025-12-11T04:16:41.099Z" }, + { url = "https://files.pythonhosted.org/packages/ab/a9/e94a9d5224107d7ce3cc1fab8d5dc97f5ea351ccc6322ee4fb661da94e35/tornado-6.5.4-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d6241c1a16b1c9e4cc28148b1cda97dd1c6cb4fb7068ac1bedc610768dff0ba9", size = 443909, upload-time = "2025-12-15T19:20:48.382Z" }, + { url = "https://files.pythonhosted.org/packages/db/7e/f7b8d8c4453f305a51f80dbb49014257bb7d28ccb4bbb8dd328ea995ecad/tornado-6.5.4-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:2d50f63dda1d2cac3ae1fa23d254e16b5e38153758470e9956cbc3d813d40843", size = 442163, upload-time = "2025-12-15T19:20:49.791Z" }, + { url = "https://files.pythonhosted.org/packages/ba/b5/206f82d51e1bfa940ba366a8d2f83904b15942c45a78dd978b599870ab44/tornado-6.5.4-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1cf66105dc6acb5af613c054955b8137e34a03698aa53272dbda4afe252be17", size = 445746, upload-time = "2025-12-15T19:20:51.491Z" }, + { url = "https://files.pythonhosted.org/packages/8e/9d/1a3338e0bd30ada6ad4356c13a0a6c35fbc859063fa7eddb309183364ac1/tornado-6.5.4-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50ff0a58b0dc97939d29da29cd624da010e7f804746621c78d14b80238669335", size = 445083, upload-time = "2025-12-15T19:20:52.778Z" }, + { url = "https://files.pythonhosted.org/packages/50/d4/e51d52047e7eb9a582da59f32125d17c0482d065afd5d3bc435ff2120dc5/tornado-6.5.4-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5fb5e04efa54cf0baabdd10061eb4148e0be137166146fff835745f59ab9f7f", size = 445315, upload-time = "2025-12-15T19:20:53.996Z" }, + { url = "https://files.pythonhosted.org/packages/27/07/2273972f69ca63dbc139694a3fc4684edec3ea3f9efabf77ed32483b875c/tornado-6.5.4-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9c86b1643b33a4cd415f8d0fe53045f913bf07b4a3ef646b735a6a86047dda84", size = 446003, upload-time = "2025-12-15T19:20:56.101Z" }, + { url = "https://files.pythonhosted.org/packages/d1/83/41c52e47502bf7260044413b6770d1a48dda2f0246f95ee1384a3cd9c44a/tornado-6.5.4-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:6eb82872335a53dd063a4f10917b3efd28270b56a33db69009606a0312660a6f", size = 445412, upload-time = "2025-12-15T19:20:57.398Z" }, + { url = "https://files.pythonhosted.org/packages/10/c7/bc96917f06cbee182d44735d4ecde9c432e25b84f4c2086143013e7b9e52/tornado-6.5.4-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6076d5dda368c9328ff41ab5d9dd3608e695e8225d1cd0fd1e006f05da3635a8", size = 445392, upload-time = "2025-12-15T19:20:58.692Z" }, + { url = "https://files.pythonhosted.org/packages/0c/1a/d7592328d037d36f2d2462f4bc1fbb383eec9278bc786c1b111cbbd44cfa/tornado-6.5.4-cp39-abi3-win32.whl", hash = "sha256:1768110f2411d5cd281bac0a090f707223ce77fd110424361092859e089b38d1", size = 446481, upload-time = "2025-12-15T19:21:00.008Z" }, + { url = "https://files.pythonhosted.org/packages/d6/6d/c69be695a0a64fd37a97db12355a035a6d90f79067a3cf936ec2b1dc38cd/tornado-6.5.4-cp39-abi3-win_amd64.whl", hash = "sha256:fa07d31e0cd85c60713f2b995da613588aa03e1303d75705dca6af8babc18ddc", size = 446886, upload-time = "2025-12-15T19:21:01.287Z" }, + { url = "https://files.pythonhosted.org/packages/50/49/8dc3fd90902f70084bd2cd059d576ddb4f8bb44c2c7c0e33a11422acb17e/tornado-6.5.4-cp39-abi3-win_arm64.whl", hash = "sha256:053e6e16701eb6cbe641f308f4c1a9541f91b6261991160391bfc342e8a551a1", size = 445910, upload-time = "2025-12-15T19:21:02.571Z" }, ] [[package]] @@ -4262,15 +4261,15 @@ wheels = [ [[package]] name = "typer-slim" -version = "0.20.0" +version = "0.20.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8e/45/81b94a52caed434b94da65729c03ad0fb7665fab0f7db9ee54c94e541403/typer_slim-0.20.0.tar.gz", hash = "sha256:9fc6607b3c6c20f5c33ea9590cbeb17848667c51feee27d9e314a579ab07d1a3", size = 106561, upload-time = "2025-10-20T17:03:46.642Z" } +sdist = { url = "https://files.pythonhosted.org/packages/3f/3d/6a4ec47010e8de34dade20c8e7bce90502b173f62a6b41619523a3fcf562/typer_slim-0.20.1.tar.gz", hash = "sha256:bb9e4f7e6dc31551c8a201383df322b81b0ce37239a5ead302598a2ebb6f7c9c", size = 106113, upload-time = "2025-12-19T16:48:54.206Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5e/dd/5cbf31f402f1cc0ab087c94d4669cfa55bd1e818688b910631e131d74e75/typer_slim-0.20.0-py3-none-any.whl", hash = "sha256:f42a9b7571a12b97dddf364745d29f12221865acef7a2680065f9bb29c7dc89d", size = 47087, upload-time = "2025-10-20T17:03:44.546Z" }, + { url = "https://files.pythonhosted.org/packages/d8/f9/a273c8b57c69ac1b90509ebda204972265fdc978fbbecc25980786f8c038/typer_slim-0.20.1-py3-none-any.whl", hash = "sha256:8e89c5dbaffe87a4f86f4c7a9e2f7059b5b68c66f558f298969d42ce34f10122", size = 47440, upload-time = "2025-12-19T16:48:52.678Z" }, ] [[package]] @@ -4309,11 +4308,11 @@ wheels = [ [[package]] name = "tzdata" -version = "2025.2" +version = "2025.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380, upload-time = "2025-03-23T13:54:43.652Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5e/a7/c202b344c5ca7daf398f3b8a477eeb205cf3b6f32e7ec3a6bac0629ca975/tzdata-2025.3.tar.gz", hash = "sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7", size = 196772, upload-time = "2025-12-13T17:45:35.667Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839, upload-time = "2025-03-23T13:54:41.845Z" }, + { url = "https://files.pythonhosted.org/packages/c7/b0/003792df09decd6849a5e39c28b513c06e84436a54440380862b5aeff25d/tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1", size = 348521, upload-time = "2025-12-13T17:45:33.889Z" }, ] [[package]] @@ -4358,15 +4357,15 @@ wheels = [ [[package]] name = "uvicorn" -version = "0.38.0" +version = "0.40.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, { name = "h11" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/cb/ce/f06b84e2697fef4688ca63bdb2fdf113ca0a3be33f94488f2cadb690b0cf/uvicorn-0.38.0.tar.gz", hash = "sha256:fd97093bdd120a2609fc0d3afe931d4d4ad688b6e75f0f929fde1bc36fe0e91d", size = 80605, upload-time = "2025-10-18T13:46:44.63Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/d1/8f3c683c9561a4e6689dd3b1d345c815f10f86acd044ee1fb9a4dcd0b8c5/uvicorn-0.40.0.tar.gz", hash = "sha256:839676675e87e73694518b5574fd0f24c9d97b46bea16df7b8c05ea1a51071ea", size = 81761, upload-time = "2025-12-21T14:16:22.45Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ee/d9/d88e73ca598f4f6ff671fb5fde8a32925c2e08a637303a1d12883c7305fa/uvicorn-0.38.0-py3-none-any.whl", hash = "sha256:48c0afd214ceb59340075b4a052ea1ee91c16fbc2a9b1469cca0e54566977b02", size = 68109, upload-time = "2025-10-18T13:46:42.958Z" }, + { url = "https://files.pythonhosted.org/packages/3d/d8/2083a1daa7439a66f3a48589a57d576aa117726762618f6bb09fe3798796/uvicorn-0.40.0-py3-none-any.whl", hash = "sha256:c6c8f55bc8bf13eb6fa9ff87ad62308bbbc33d0b67f84293151efe87e0d5f2ee", size = 68502, upload-time = "2025-12-21T14:16:21.041Z" }, ] [[package]]