diff --git a/src/dpmcore/server/routers/structure.py b/src/dpmcore/server/routers/structure.py index 0973c5f..a8755b5 100644 --- a/src/dpmcore/server/routers/structure.py +++ b/src/dpmcore/server/routers/structure.py @@ -36,6 +36,7 @@ class References(str, enum.Enum): NONE = "none" ALL = "all" + CHILDREN = "children" class ArtefactType(str, enum.Enum): @@ -51,7 +52,6 @@ class ArtefactType(str, enum.Enum): ORGANISATION = "organisation" PROPERTY = "property" RELEASE = "release" - STRUCTURE = "structure" TABLE = "table" TABLEGROUP = "tablegroup" VARIABLE = "variable" @@ -214,7 +214,8 @@ def _handler( ) -> Any: type_value = artefact_type.value - # "structure" is a wildcard — not backed by a handler + # Safety net for future enum additions without a registered + # handler — every current ArtefactType value is registered. if type_value not in ARTEFACT_HANDLERS: valid = ", ".join(sorted(ARTEFACT_HANDLERS.keys())) return Response( @@ -468,3 +469,588 @@ def _owner_ids_from_acronyms( .all() ) return [o.org_id for o in orgs] + + +# ------------------------------------------------------------------ # +# Table handler +# ------------------------------------------------------------------ # + + +@register_artefact("table") +def handle_table( + *, + session: Session, + params: StructureParams, + detail: str, + references: str, + offset: int, + limit: int, +) -> Any: + """Handle ``/structure/table/...`` requests.""" + from dpmcore.services.structure import StructureService + + svc = StructureService(session) + + results, total = svc.query_tables( + params=params, + detail=detail, + offset=offset, + limit=limit, + ) + + if not results: + return Response(status_code=204) + + data: Dict[str, Any] = {"tables": results} + if references == "all": + acronyms: List[str] = list( + { + owner + for r in results + if isinstance((owner := r.get("owner")), str) + } + ) + if acronyms: + data["organisations"] = svc.get_release_organisations( + _owner_ids_from_acronyms(svc, acronyms), + ) + + return envelope( + data, + total_count=total, + offset=offset, + limit=limit, + ) + + +# ------------------------------------------------------------------ # +# Operator handler +# ------------------------------------------------------------------ # + + +@register_artefact("operator") +def handle_operator( + *, + session: Session, + params: StructureParams, + detail: str, + references: str, + offset: int, + limit: int, +) -> Any: + """Handle ``/structure/operator/...`` requests. + + Operators are flat, unversioned, and unowned. The ``{owner}`` URL + segment must be ``*`` (concrete owners 204); release is ignored. + Arguments are always inlined at ``detail=full``. ``references`` + is accepted but a no-op (no related artefacts to add — operators + have no owner). + """ + from dpmcore.services.structure import StructureService + + svc = StructureService(session) + results, total = svc.query_operators( + params=params, + detail=detail, + offset=offset, + limit=limit, + ) + + if not results: + return Response(status_code=204) + + return envelope( + {"operators": results}, + total_count=total, + offset=offset, + limit=limit, + ) + + +# ------------------------------------------------------------------ # +# Operation handler +# ------------------------------------------------------------------ # + + +@register_artefact("operation") +def handle_operation( + *, + session: Session, + params: StructureParams, + detail: str, + references: str, + offset: int, + limit: int, +) -> Any: + """Handle ``/structure/operation/...`` requests. + + Each Operation carries a nested ``versions`` array — and at + ``detail=full`` (default) each version carries its node tree, + operand references, and reference locations inline. The + ``{release}`` URL segment filters the inner ``versions`` array; + Operations with no matching version are dropped. ``references`` + is accepted but only ``all`` (which adds organisations) has an + effect — ``children`` is a no-op because the nested payload is + already present by default. + """ + from dpmcore.services.structure import StructureService + + svc = StructureService(session) + results, total = svc.query_operations( + params=params, + detail=detail, + offset=offset, + limit=limit, + ) + + if not results: + return Response(status_code=204) + + data: Dict[str, Any] = {"operations": results} + if references == "all": + acronyms: List[str] = list( + { + owner + for r in results + if isinstance((owner := r.get("owner")), str) + } + ) + if acronyms: + data["organisations"] = svc.get_release_organisations( + _owner_ids_from_acronyms(svc, acronyms), + ) + + return envelope( + data, + total_count=total, + offset=offset, + limit=limit, + ) + + +# ------------------------------------------------------------------ # +# DataType handler +# ------------------------------------------------------------------ # + + +@register_artefact("datatype") +def handle_datatype( + *, + session: Session, + params: StructureParams, + detail: str, + references: str, + offset: int, + limit: int, +) -> Any: + """Handle ``/structure/datatype/...`` requests. + + DataTypes are flat, unversioned, and unowned but hierarchical. + The ``{owner}`` URL segment must be ``*`` (concrete owners 204); + the release segment is ignored. ``references=children`` expands + ``childDataTypes`` stubs; ``references=all`` does the same (no + organisations enrichment — DataTypes have no owner). + """ + from dpmcore.services.structure import StructureService + + svc = StructureService(session) + results, total = svc.query_datatypes( + params=params, + detail=detail, + references=references, + offset=offset, + limit=limit, + ) + + if not results: + return Response(status_code=204) + + return envelope( + {"dataTypes": results}, + total_count=total, + offset=offset, + limit=limit, + ) + + +# ------------------------------------------------------------------ # +# Organisation handler +# ------------------------------------------------------------------ # + + +@register_artefact("organisation") +def handle_organisation( + *, + session: Session, + params: StructureParams, + detail: str, + references: str, + offset: int, + limit: int, +) -> Any: + """Handle ``/structure/organisation/...`` requests. + + Organisations are leaves and not release-versioned. ``references`` + is accepted but a no-op: the response is already the organisation + list, so neither ``children`` nor ``all`` adds anything. + """ + from dpmcore.services.structure import StructureService + + svc = StructureService(session) + results, total = svc.query_organisations( + params=params, + detail=detail, + offset=offset, + limit=limit, + ) + + if not results: + return Response(status_code=204) + + return envelope( + {"organisations": results}, + total_count=total, + offset=offset, + limit=limit, + ) + + +# ------------------------------------------------------------------ # +# Context handler +# ------------------------------------------------------------------ # + + +@register_artefact("context") +def handle_context( + *, + session: Session, + params: StructureParams, + detail: str, + references: str, + offset: int, + limit: int, +) -> Any: + """Handle ``/structure/context/...`` requests. + + Contexts are leaves: ``references=children`` is silently a no-op. + ``references=all`` enriches the response with owner organisations. + """ + from dpmcore.services.structure import StructureService + + svc = StructureService(session) + results, total = svc.query_contexts( + params=params, + detail=detail, + offset=offset, + limit=limit, + ) + + if not results: + return Response(status_code=204) + + data: Dict[str, Any] = {"contexts": results} + if references == "all": + acronyms: List[str] = list( + { + owner + for r in results + if isinstance((owner := r.get("owner")), str) + } + ) + if acronyms: + data["organisations"] = svc.get_release_organisations( + _owner_ids_from_acronyms(svc, acronyms), + ) + + return envelope( + data, + total_count=total, + offset=offset, + limit=limit, + ) + + +# ------------------------------------------------------------------ # +# Property handler +# ------------------------------------------------------------------ # + + +@register_artefact("property") +def handle_property( + *, + session: Session, + params: StructureParams, + detail: str, + references: str, + offset: int, + limit: int, +) -> Any: + """Handle ``/structure/property/...`` requests. + + Properties are leaves: ``references=children`` is silently a no-op. + ``references=all`` enriches the response with owner organisations. + """ + from dpmcore.services.structure import StructureService + + svc = StructureService(session) + results, total = svc.query_properties( + params=params, + detail=detail, + offset=offset, + limit=limit, + ) + + if not results: + return Response(status_code=204) + + data: Dict[str, Any] = {"properties": results} + if references == "all": + acronyms: List[str] = list( + { + owner + for r in results + if isinstance((owner := r.get("owner")), str) + } + ) + if acronyms: + data["organisations"] = svc.get_release_organisations( + _owner_ids_from_acronyms(svc, acronyms), + ) + + return envelope( + data, + total_count=total, + offset=offset, + limit=limit, + ) + + +# ------------------------------------------------------------------ # +# Variable handler +# ------------------------------------------------------------------ # + + +@register_artefact("variable") +def handle_variable( + *, + session: Session, + params: StructureParams, + detail: str, + references: str, + offset: int, + limit: int, +) -> Any: + """Handle ``/structure/variable/...`` requests. + + Variables are leaves: ``references=children`` is silently a no-op. + ``references=all`` enriches the response with owner organisations. + """ + from dpmcore.services.structure import StructureService + + svc = StructureService(session) + results, total = svc.query_variables( + params=params, + detail=detail, + offset=offset, + limit=limit, + ) + + if not results: + return Response(status_code=204) + + data: Dict[str, Any] = {"variables": results} + if references == "all": + acronyms: List[str] = list( + { + owner + for r in results + if isinstance((owner := r.get("owner")), str) + } + ) + if acronyms: + data["organisations"] = svc.get_release_organisations( + _owner_ids_from_acronyms(svc, acronyms), + ) + + return envelope( + data, + total_count=total, + offset=offset, + limit=limit, + ) + + +# ------------------------------------------------------------------ # +# Framework handler +# ------------------------------------------------------------------ # + + +@register_artefact("framework") +def handle_framework( + *, + session: Session, + params: StructureParams, + detail: str, + references: str, + offset: int, + limit: int, +) -> Any: + """Handle ``/structure/framework/...`` requests. + + Frameworks are not release-versioned; the release path segment + affects only the embedded ``modules`` children. ``references= + children`` embeds the ModuleVersions active at the requested + release; ``references=all`` adds the owner organisations. + """ + from dpmcore.services.structure import StructureService + + svc = StructureService(session) + + results, total = svc.query_frameworks( + params=params, + detail=detail, + references=references, + offset=offset, + limit=limit, + ) + + if not results: + return Response(status_code=204) + + data: Dict[str, Any] = {"frameworks": results} + if references == "all": + acronyms: List[str] = list( + { + owner + for r in results + if isinstance((owner := r.get("owner")), str) + } + ) + if acronyms: + data["organisations"] = svc.get_release_organisations( + _owner_ids_from_acronyms(svc, acronyms), + ) + + return envelope( + data, + total_count=total, + offset=offset, + limit=limit, + ) + + +# ------------------------------------------------------------------ # +# TableGroup handler +# ------------------------------------------------------------------ # + + +@register_artefact("tablegroup") +def handle_tablegroup( + *, + session: Session, + params: StructureParams, + detail: str, + references: str, + offset: int, + limit: int, +) -> Any: + """Handle ``/structure/tablegroup/...`` requests. + + ``references=children`` embeds each TableGroup's tables (full + shape, batch-loaded) and its direct child TableGroups (stubs). + ``references=all`` does both and additionally adds the owner + organisations. + """ + from dpmcore.services.structure import StructureService + + svc = StructureService(session) + results, total = svc.query_tablegroups( + params=params, + detail=detail, + references=references, + offset=offset, + limit=limit, + ) + + if not results: + return Response(status_code=204) + + data: Dict[str, Any] = {"tableGroups": results} + if references == "all": + acronyms: List[str] = list( + { + owner + for r in results + if isinstance((owner := r.get("owner")), str) + } + ) + if acronyms: + data["organisations"] = svc.get_release_organisations( + _owner_ids_from_acronyms(svc, acronyms), + ) + + return envelope( + data, + total_count=total, + offset=offset, + limit=limit, + ) + + +# ------------------------------------------------------------------ # +# Module handler +# ------------------------------------------------------------------ # + + +@register_artefact("module") +def handle_module( + *, + session: Session, + params: StructureParams, + detail: str, + references: str, + offset: int, + limit: int, +) -> Any: + """Handle ``/structure/module/...`` requests. + + ``references=children`` embeds each module version's ordered tables. + ``references=all`` does the same and additionally enriches the + response with the owner organisations (mirroring the table / + category handlers). + """ + from dpmcore.services.structure import StructureService + + svc = StructureService(session) + + results, total = svc.query_modules( + params=params, + detail=detail, + references=references, + offset=offset, + limit=limit, + ) + + if not results: + return Response(status_code=204) + + data: Dict[str, Any] = {"modules": results} + if references == "all": + acronyms: List[str] = list( + { + owner + for r in results + if isinstance((owner := r.get("owner")), str) + } + ) + if acronyms: + data["organisations"] = svc.get_release_organisations( + _owner_ids_from_acronyms(svc, acronyms), + ) + + return envelope( + data, + total_count=total, + offset=offset, + limit=limit, + ) diff --git a/src/dpmcore/services/structure.py b/src/dpmcore/services/structure.py index a0be318..162080f 100644 --- a/src/dpmcore/services/structure.py +++ b/src/dpmcore/services/structure.py @@ -3,12 +3,67 @@ from __future__ import annotations from collections import defaultdict -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple - -from sqlalchemy import or_ - -from dpmcore.orm.glossary import Category, Item, ItemCategory -from dpmcore.orm.infrastructure import Concept, Organisation, Release +from typing import ( + TYPE_CHECKING, + Any, + Dict, + List, + Optional, + Sequence, + Tuple, + cast, +) + +from sqlalchemy import func, or_ +from sqlalchemy.orm import joinedload + +from dpmcore.orm.glossary import ( + Category, + Context, + ContextComposition, + Item, + ItemCategory, + Property, + PropertyCategory, + SubCategory, + SubCategoryItem, + SubCategoryVersion, +) +from dpmcore.orm.infrastructure import ( + Concept, + DataType, + Organisation, + Release, +) +from dpmcore.orm.operations import ( + OperandReference, + OperandReferenceLocation, + Operation, + OperationNode, + OperationVersion, + Operator, + OperatorArgument, +) +from dpmcore.orm.packaging import ( + Framework, + Module, + ModuleParameters, + ModuleVersion, + ModuleVersionComposition, +) +from dpmcore.orm.release_sort_order import compute_sort_order +from dpmcore.orm.rendering import ( + Cell, + Header, + HeaderVersion, + Table, + TableGroup, + TableGroupComposition, + TableVersion, + TableVersionCell, + TableVersionHeader, +) +from dpmcore.orm.variables import CompoundKey, Variable, VariableVersion from dpmcore.server.params import StructureParams if TYPE_CHECKING: @@ -107,6 +162,7 @@ class StructureService: def __init__(self, session: "Session") -> None: # noqa: D107 self.session = session self._releases_cache: Optional[List[Release]] = None + self._sort_orders_cache: Optional[Dict[int, Optional[int]]] = None self._owner_cache: Dict[int, Optional[str]] = {} # ------------------------------------------------------------------ # @@ -114,13 +170,86 @@ def __init__(self, session: "Session") -> None: # noqa: D107 # ------------------------------------------------------------------ # def _get_all_releases(self) -> List[Release]: - """Return all releases sorted by date ascending (cached).""" + """Return all releases ordered by semver-parsed sort order. + + Releases are ordered ascending by ``compute_sort_order(code)`` + (NOT the opaque ``release_id`` FK, which is non-monotonic from + DPM 4.2.1 onwards — e.g. ``4.2.1`` is ``ReleaseID 1010000003`` — + nor by ``date``). This keeps the version walks in + ``_compute_*_versions`` / ``_version_at_release`` monotonic and + places a chronological backport (e.g. ``4.0.1`` published after + ``4.2.1``) inside its own lineage. + + Releases whose ``code`` is unparseable (``sort_order`` is + ``None`` — e.g. a ``3.5-draft`` pre-release) cannot be placed in + semver space, so they sort *first* and are skipped by the + version walks. ``sort_order`` is computed from the same query + that loads the releases, so this adds no extra round-trip. + """ if self._releases_cache is None: - self._releases_cache = ( - self.session.query(Release).order_by(Release.date.asc()).all() + releases = self.session.query(Release).all() + self._sort_orders_cache = { + r.release_id: compute_sort_order(r.code) for r in releases + } + sort_orders = self._sort_orders_cache + releases.sort( + key=lambda r: ( + sort_orders[r.release_id] is not None, + sort_orders[r.release_id] or 0, + ) ) + self._releases_cache = releases return self._releases_cache + def _release_sort_orders(self) -> Dict[int, Optional[int]]: + """Cached ``{release_id: sort_order}`` map (semver-parsed).""" + self._get_all_releases() # populates _sort_orders_cache + return self._sort_orders_cache or {} + + def _sort_order(self, release_id: int) -> Optional[int]: + """Semver sort order of a release_id, or ``None`` if unrankable.""" + return self._release_sort_orders().get(release_id) + + def _window_alive( + self, + start_release_id: int, + end_release_id: Optional[int], + target_release_id: int, + ) -> bool: + """Whether a ``[start, end]`` release window covers the target. + + Comparisons use the semver-parsed sort order rather than the + opaque ``release_id`` FK. The end bound is **inclusive** — the + convention the category/context virtual-versioning walks have + always used (this intentionally differs from + ``filter_by_release``'s exclusive end). Returns ``False`` for + any release whose code is unrankable. + """ + target_so = self._sort_order(target_release_id) + start_so = self._sort_order(start_release_id) + if target_so is None or start_so is None or start_so > target_so: + return False + if end_release_id is None: + return True + end_so = self._sort_order(end_release_id) + return end_so is None or end_so >= target_so + + def _lookup_in_windows( + self, + windows: List[Tuple[int, Optional[int], Any]], + target_release_id: int, + ) -> Any: + """Pick the value alive at *target_release_id* from windows. + + A window ``(start, end, value)`` is alive per the inclusive + ``_window_alive`` convention. Returns ``None`` when no window + covers the release. + """ + for start, end, value in windows: + if self._window_alive(start, end, target_release_id): + return value + return None + def query_releases( self, *, @@ -314,19 +443,18 @@ def _compute_category_versions( prev_fingerprint = None for rel in releases: - if ( - category.created_release is not None - and rel.release_id < category.created_release + if self._sort_order(rel.release_id) is None: + continue + if category.created_release is not None and not self._window_alive( + category.created_release, None, rel.release_id ): continue alive_ics = [ ic for ic in ics - if ic.start_release_id <= rel.release_id - and ( - ic.end_release_id is None - or ic.end_release_id >= rel.release_id + if self._window_alive( + ic.start_release_id, ic.end_release_id, rel.release_id ) ] @@ -365,18 +493,27 @@ def _compute_category_versions( return versions - @staticmethod def _version_at_release( + self, versions: List[Tuple[Release, Dict[str, Any]]], target_release: Release, ) -> Optional[Dict[str, Any]]: """Find the version active at *target_release*. - Returns the last version whose release_id <= target's. + Returns the last version whose semver sort order does not + exceed the target's. ``versions`` is produced in ascending + sort order, so the walk stops at the first version that + overshoots. """ + target_so = self._sort_order(target_release.release_id) active: Optional[Dict[str, Any]] = None for rel, version_dict in versions: - if rel.release_id <= target_release.release_id: + rel_so = self._sort_order(rel.release_id) + if ( + target_so is not None + and rel_so is not None + and (rel_so <= target_so) + ): active = version_dict else: break @@ -496,3 +633,3189 @@ def query_categories( total = len(all_entries) paginated = all_entries[offset : offset + limit] return paginated, total + + # ------------------------------------------------------------------ # + # Tables + # ------------------------------------------------------------------ # + + def query_tables( + self, + *, + params: StructureParams, + detail: str = "full", + offset: int = 0, + limit: int = 100, + ) -> Tuple[List[Dict[str, Any]], int]: + """Query tables with SDMX-style filtering. + + One entry per matching ``TableVersion`` (so the same table can + appear multiple times when ``release=*``). For each entry, + headers, cells, and the variables referenced by cells are + populated; enumerated variables include their valid items. + """ + from dpmcore.dpm_xl.utils.filters import filter_by_release + + # Resolve target release (None only for release=*) + target_release: Optional[Release] = None + if not params.wants_all_releases: + target_release = self._resolve_release(params) + if target_release is None: + return [], 0 + + q = ( + self.session.query(TableVersion) + .join(Table, Table.table_id == TableVersion.table_id) + .options(joinedload(TableVersion.table)) + ) + + # Owner filtering via Concept join chain (same pattern as + # query_categories — keeps the join predicates uniform). + owners = None if params.is_owner_wildcard else params.owners + if owners: + q = ( + q.join(Concept, Table.row_guid == Concept.concept_guid) + .join( + Organisation, + Concept.owner_id == Organisation.org_id, + ) + .filter(Organisation.acronym.in_(owners)) + ) + + # ID filter: TableVersion.code, with numeric ids tried against + # Table.table_id (matches the category handler's convention). + if not params.is_id_wildcard: + numeric_ids = [int(v) for v in params.ids if v.isdigit()] + if numeric_ids: + q = q.filter( + or_( + TableVersion.code.in_(params.ids), + Table.table_id.in_(numeric_ids), + ) + ) + else: + q = q.filter(TableVersion.code.in_(params.ids)) + + if target_release is not None: + q = filter_by_release( + q, + start_col=TableVersion.start_release_id, + end_col=TableVersion.end_release_id, + release_id=target_release.release_id, + ) + + q = q.order_by(TableVersion.table_id, TableVersion.start_release_id) + table_versions: List[TableVersion] = q.all() + + if not table_versions: + return [], 0 + + total = len(table_versions) + paginated = table_versions[offset : offset + limit] + + results: List[Dict[str, Any]] = [ + self._build_table_entry( + tv, + detail=detail, + target_release=target_release, + ) + for tv in paginated + ] + return results, total + + # -- table entry assembly ------------------------------------------ + + def _resolve_release_code( + self, + release_id: Optional[int], + ) -> Optional[str]: + """Return the release code for a release_id (cached lookup).""" + if release_id is None: + return None + for r in self._get_all_releases(): + if r.release_id == release_id: + return r.code + return None + + def _build_table_entry( + self, + tv: TableVersion, + *, + detail: str, + target_release: Optional[Release], + ) -> Dict[str, Any]: + """Single-table convenience wrapper over the batch builder.""" + return self._build_table_entries_batch( + [tv], detail=detail, target_release=target_release + )[tv.table_vid] + + # -- batched table loading ----------------------------------------- + + def _batch_load_headers( + self, + table_vids: List[int], + ) -> Dict[int, List[Tuple[TableVersionHeader, Header, HeaderVersion]]]: + """``{table_vid: [(TableVersionHeader, Header, HeaderVersion), ...]}``. + + One SQL statement for the whole input set. + """ + if not table_vids: + return {} + rows = ( + self.session.query(TableVersionHeader, Header, HeaderVersion) + .join(Header, Header.header_id == TableVersionHeader.header_id) + .join( + HeaderVersion, + HeaderVersion.header_vid == TableVersionHeader.header_vid, + ) + .filter(TableVersionHeader.table_vid.in_(table_vids)) + .all() + ) + out: Dict[ + int, List[Tuple[TableVersionHeader, Header, HeaderVersion]] + ] = defaultdict(list) + for tvh, h, hv in rows: + out[tvh.table_vid].append((tvh, h, hv)) + return dict(out) + + def _batch_load_cells( + self, + table_vids: List[int], + ) -> Dict[int, List[Tuple[TableVersionCell, Cell]]]: + """``{table_vid: [(TableVersionCell, Cell), ...]}``. + + One SQL statement for the whole input set. + """ + if not table_vids: + return {} + rows = ( + self.session.query(TableVersionCell, Cell) + .join(Cell, Cell.cell_id == TableVersionCell.cell_id) + .filter(TableVersionCell.table_vid.in_(table_vids)) + .all() + ) + out: Dict[int, List[Tuple[TableVersionCell, Cell]]] = defaultdict(list) + for tvc, c in rows: + out[tvc.table_vid].append((tvc, c)) + return dict(out) + + def _build_table_entries_batch( # noqa: C901 — pipeline orchestrator + self, + table_versions: List[TableVersion], + *, + detail: str, + target_release: Optional[Release], + ) -> Dict[int, Dict[str, Any]]: + """Build response dicts for many TableVersions in fixed queries. + + Loads headers, cells, variable versions, property names, and + subcategory enumerations in bulk regardless of how many tables + are supplied. When ``target_release`` is None (release=*), each + TableVersion's enumeration window uses its own + ``start_release_id`` — calls to + :meth:`_load_subcategory_enumerations` are then grouped per + distinct effective release_id. + """ + if not table_versions: + return {} + + # Per-table effective release id (None target ⇒ per-tv start). + effective_release_by_vid: Dict[int, Optional[int]] = { + tv.table_vid: ( + target_release.release_id + if target_release is not None + else tv.start_release_id + ) + for tv in table_versions + } + + # allstubs short-circuit — no header/cell/variable loading. + if detail == "allstubs": + return { + tv.table_vid: _table_stub_to_dict( + tv, + owner_acronym=self._get_owner_acronym( + tv.table.owner_id if tv.table else None + ), + release_code=( + target_release.code + if target_release is not None + else self._resolve_release_code(tv.start_release_id) + ), + ) + for tv in table_versions + } + + table_vids = [tv.table_vid for tv in table_versions] + headers_by_vid = self._batch_load_headers(table_vids) + cells_by_vid = self._batch_load_cells(table_vids) + + # Collect property_ids across all tables (TableVersion + headers). + property_ids: set[int] = set() + for tv in table_versions: + if tv.property_id is not None: + property_ids.add(tv.property_id) + for header_rows in headers_by_vid.values(): + for _tvh, _h, hv in header_rows: + if hv.property_id is not None: + property_ids.add(hv.property_id) + + # Collect variable_vids (key + cell) across all tables. + all_variable_vids: set[int] = set() + for header_rows in headers_by_vid.values(): + for _tvh, _h, hv in header_rows: + if hv.key_variable_vid: + all_variable_vids.add(hv.key_variable_vid) + for cell_rows in cells_by_vid.values(): + for tvc, _c in cell_rows: + if tvc.variable_vid: + all_variable_vids.add(tvc.variable_vid) + + # Bulk-load VariableVersion rows once. + vv_by_vid: Dict[int, VariableVersion] = {} + if all_variable_vids: + for vv in ( + self.session.query(VariableVersion) + .filter(VariableVersion.variable_vid.in_(all_variable_vids)) + .all() + ): + vv_by_vid[vv.variable_vid] = vv + # Variables expose their property reference too — fold + # those property_ids into the bulk name lookup. + for vv in vv_by_vid.values(): + if vv.property_id is not None: + property_ids.add(vv.property_id) + + property_names = self._bulk_load_property_names(property_ids) + + # Per-table {variable_vid: {subcat_vid}} mapping. + subcat_vids_per_table: Dict[int, Dict[int, set[int]]] = { + tv.table_vid: _collect_subcategory_vids_per_variable( + headers_by_vid.get(tv.table_vid, []), + cells_by_vid.get(tv.table_vid, []), + ) + for tv in table_versions + } + + # Group subcat lookups by effective release id (often a single + # group when target_release is set; potentially several with + # release=*). + subcat_vids_by_release: Dict[Optional[int], set[int]] = defaultdict( + set + ) + for table_vid, per_var in subcat_vids_per_table.items(): + rid = effective_release_by_vid[table_vid] + for svid_set in per_var.values(): + subcat_vids_by_release[rid].update(svid_set) + subcat_enums_by_release: Dict[ + Optional[int], Dict[int, Dict[str, Any]] + ] = { + rid: self._load_subcategory_enumerations(svids, release_id=rid) + for rid, svids in subcat_vids_by_release.items() + } + + # Assemble per-table dicts using only the pre-loaded data. + result: Dict[int, Dict[str, Any]] = {} + for tv in table_versions: + header_rows = headers_by_vid.get(tv.table_vid, []) + cell_rows = cells_by_vid.get(tv.table_vid, []) + owner_acronym = self._get_owner_acronym( + tv.table.owner_id if tv.table else None + ) + release_code = ( + target_release.code + if target_release is not None + else self._resolve_release_code(tv.start_release_id) + ) + + header_dicts = [ + _header_version_to_dict(tvh, h, hv, property_names) + for tvh, h, hv in header_rows + ] + cell_dicts = [_cell_to_dict(tvc, c) for tvc, c in cell_rows] + + key_vids = { + hv.key_variable_vid + for _tvh, _h, hv in header_rows + if hv.key_variable_vid + } + cell_vids = { + tvc.variable_vid for tvc, _ in cell_rows if tvc.variable_vid + } + + subcat_vids_by_variable = subcat_vids_per_table[tv.table_vid] + subcat_enums = subcat_enums_by_release.get( + effective_release_by_vid[tv.table_vid], {} + ) + + variables_block = _assemble_variable_blocks( + key_vids | cell_vids, + subcat_vids_by_variable=subcat_vids_by_variable, + subcat_enums=subcat_enums, + vv_by_vid=vv_by_vid, + property_names=property_names, + ) + key_variables = [ + v for v in variables_block if v["versionId"] in key_vids + ] + fact_variables = [ + v for v in variables_block if v["versionId"] not in key_vids + ] + + result[tv.table_vid] = _table_to_dict( + tv, + owner_acronym=owner_acronym, + release_code=release_code, + property_names=property_names, + headers=header_dicts, + cells=cell_dicts, + key_variables=key_variables, + fact_variables=fact_variables, + ) + return result + + def _bulk_load_property_names( + self, + property_ids: set[int], + ) -> Dict[int, Optional[str]]: + """Resolve ``{property_id: Item.name}`` for a set of property ids.""" + if not property_ids: + return {} + rows = ( + self.session.query(Item.item_id, Item.name) + .join(Property, Property.property_id == Item.item_id) + .filter(Item.item_id.in_(property_ids)) + .all() + ) + return {r[0]: r[1] for r in rows} + + def _load_subcategory_enumerations( + self, + subcategory_vids: set[int], + *, + release_id: Optional[int], + ) -> Dict[int, Dict[str, Any]]: + """Load enumeration payloads for a set of SubCategoryVersion ids. + + Returns ``{subcategory_vid: enumeration_dict}``. Each + enumeration dict carries the subcategory's parent + :class:`Category` identity plus the items defined by the + :class:`SubCategoryItem` rows of that version, enriched with + ``code``/``signature`` from the :class:`ItemCategory` rows + valid at *release_id*. Items lacking an ItemCategory entry at + the release are dropped (they have no code at that release). + """ + if not subcategory_vids: + return {} + + from dpmcore.dpm_xl.utils.filters import filter_by_release + + # SubCategoryVersion → SubCategory → parent Category. + info_rows = ( + self.session.query(SubCategoryVersion, SubCategory, Category) + .join( + SubCategory, + SubCategory.subcategory_id + == SubCategoryVersion.subcategory_id, + ) + .join(Category, Category.category_id == SubCategory.category_id) + .filter(SubCategoryVersion.subcategory_vid.in_(subcategory_vids)) + .all() + ) + subcat_info: Dict[ + int, Tuple[SubCategoryVersion, SubCategory, Category] + ] = {sv.subcategory_vid: (sv, sc, cat) for sv, sc, cat in info_rows} + + # SubCategoryItem rows + Item names, ordered. + item_rows = ( + self.session.query(SubCategoryItem, Item) + .join(Item, Item.item_id == SubCategoryItem.item_id) + .filter(SubCategoryItem.subcategory_vid.in_(subcategory_vids)) + .order_by(SubCategoryItem.subcategory_vid, SubCategoryItem.order) + .all() + ) + items_by_subcat: Dict[int, List[Tuple[SubCategoryItem, Item]]] = ( + defaultdict(list) + ) + for si, item in item_rows: + items_by_subcat[si.subcategory_vid].append((si, item)) + + # ItemCategory at release window — gives code/signature per + # (item_id, parent_category_id). + parent_cat_ids = { + cat.category_id for (_sv, _sc, cat) in subcat_info.values() + } + item_codes: Dict[Tuple[int, int], ItemCategory] = {} + if parent_cat_ids: + ic_q = self.session.query(ItemCategory).filter( + ItemCategory.category_id.in_(parent_cat_ids) + ) + ic_q = filter_by_release( + ic_q, + start_col=ItemCategory.start_release_id, + end_col=ItemCategory.end_release_id, + release_id=release_id, + active_only_fallback=True, + ) + for ic in ic_q.all(): + item_codes[(ic.item_id, ic.category_id)] = ic + + result: Dict[int, Dict[str, Any]] = {} + for svid, (_sv, sc, cat) in subcat_info.items(): + items_payload: List[Dict[str, Any]] = [] + for si, item in items_by_subcat.get(svid, []): + ic = item_codes.get((item.item_id, cat.category_id)) + if ic is None: + # Item has no ItemCategory in the parent category + # at this release — skip; no code to surface. + continue + items_payload.append( + { + "itemId": item.item_id, + "name": item.name, + "code": ic.code, + "signature": ic.signature, + "isDefaultItem": ic.is_default_item, + "subcategoryLabel": si.label, + "order": si.order, + } + ) + result[svid] = { + "subcategoryVersionId": svid, + "subcategoryCode": sc.code, + "subcategoryName": sc.name, + "categoryId": cat.category_id, + "categoryCode": cat.code, + "items": items_payload, + } + return result + + def _build_variable_blocks( + self, + variable_vids: set[int], + *, + subcat_vids_by_variable: Dict[int, set[int]], + subcat_enums: Dict[int, Dict[str, Any]], + ) -> List[Dict[str, Any]]: + """Loader-fronted variant of :func:`_assemble_variable_blocks`. + + Issues the VariableVersion + property-name queries itself. + Kept for callers that don't already have those rows preloaded. + """ + if not variable_vids: + return [] + vvs = ( + self.session.query(VariableVersion) + .filter(VariableVersion.variable_vid.in_(variable_vids)) + .all() + ) + vv_by_vid = {vv.variable_vid: vv for vv in vvs} + property_ids = {vv.property_id for vv in vvs if vv.property_id} + property_names = self._bulk_load_property_names(property_ids) + return _assemble_variable_blocks( + variable_vids, + subcat_vids_by_variable=subcat_vids_by_variable, + subcat_enums=subcat_enums, + vv_by_vid=vv_by_vid, + property_names=property_names, + ) + + # ------------------------------------------------------------------ # + # TableGroups + # ------------------------------------------------------------------ # + + def query_tablegroups( # noqa: C901 — pipeline orchestrator + self, + *, + params: StructureParams, + detail: str = "full", + references: str = "none", + offset: int = 0, + limit: int = 100, + ) -> Tuple[List[Dict[str, Any]], int]: + """Query table groups with SDMX-style filtering. + + TableGroup itself is release-versioned (start/end on the row); + ``TableGroupComposition`` is also release-versioned, so the + set of contained tables can change across releases. With + ``references=children`` the response carries: + + - ``tables`` — full table entries, ordered by composition order, + filtered to the effective release; + - ``childTableGroups`` — stub objects for direct child groups + alive at the same release. + + Hierarchy IDs (``parentTableGroupId``, ``childTableGroupIds``) + appear in the default response too. + """ + from dpmcore.dpm_xl.utils.filters import filter_by_release + + target_release: Optional[Release] = None + if not params.wants_all_releases: + target_release = self._resolve_release(params) + if target_release is None: + return [], 0 + + q = self.session.query(TableGroup) + + owners = None if params.is_owner_wildcard else params.owners + if owners: + org_ids = [ + org_id + for (org_id,) in self.session.query(Organisation.org_id) + .filter(Organisation.acronym.in_(owners)) + .all() + ] + if not org_ids: + return [], 0 + q = q.filter(TableGroup.owner_id.in_(org_ids)) + + if not params.is_id_wildcard: + numeric_ids = [int(v) for v in params.ids if v.isdigit()] + if numeric_ids: + q = q.filter( + or_( + TableGroup.code.in_(params.ids), + TableGroup.table_group_id.in_(numeric_ids), + ) + ) + else: + q = q.filter(TableGroup.code.in_(params.ids)) + + if target_release is not None: + q = filter_by_release( + q, + start_col=TableGroup.start_release_id, + end_col=TableGroup.end_release_id, + release_id=target_release.release_id, + ) + + q = q.order_by(TableGroup.table_group_id, TableGroup.start_release_id) + groups: List[TableGroup] = q.all() + if not groups: + return [], 0 + + total = len(groups) + paginated = groups[offset : offset + limit] + + # Hierarchy ID listings (always loaded — one extra query). + group_ids = [g.table_group_id for g in paginated] + child_id_map = self._bulk_load_tablegroup_child_ids( + group_ids, target_release=target_release + ) + + # Children expansion (tables + child group stubs) only on + # references=children or all, and only at detail=full. + include_children = references in ("children", "all") + tables_by_group: Dict[int, List[Dict[str, Any]]] = {} + child_stubs_by_group: Dict[int, List[Dict[str, Any]]] = {} + if include_children and detail != "allstubs": + tables_by_group, child_stubs_by_group = ( + self._load_tablegroup_children( + paginated, target_release=target_release + ) + ) + + results: List[Dict[str, Any]] = [] + for g in paginated: + owner_acronym = self._get_owner_acronym(g.owner_id) + release_code = ( + target_release.code + if target_release is not None + else self._resolve_release_code(g.start_release_id) + ) + child_ids = child_id_map.get(g.table_group_id, []) + if detail == "allstubs": + entry = _tablegroup_stub_to_dict( + g, + owner_acronym=owner_acronym, + release_code=release_code, + ) + else: + entry = _tablegroup_to_dict( + g, + owner_acronym=owner_acronym, + release_code=release_code, + child_table_group_ids=child_ids, + ) + if include_children: + entry["tables"] = tables_by_group.get(g.table_group_id, []) + entry["childTableGroups"] = child_stubs_by_group.get( + g.table_group_id, [] + ) + results.append(entry) + + return results, total + + def _bulk_load_tablegroup_child_ids( + self, + parent_ids: List[int], + *, + target_release: Optional[Release], + ) -> Dict[int, List[int]]: + """``{parent_table_group_id: [child_table_group_id, ...]}``. + + Release-filtered when ``target_release`` is set. + """ + from dpmcore.dpm_xl.utils.filters import filter_by_release + + if not parent_ids: + return {} + q = ( + self.session.query( + TableGroup.parent_table_group_id, TableGroup.table_group_id + ) + .filter(TableGroup.parent_table_group_id.in_(parent_ids)) + .order_by( + TableGroup.parent_table_group_id, TableGroup.table_group_id + ) + ) + if target_release is not None: + q = filter_by_release( + q, + start_col=TableGroup.start_release_id, + end_col=TableGroup.end_release_id, + release_id=target_release.release_id, + ) + out: Dict[int, List[int]] = defaultdict(list) + for parent_id, child_id in q.all(): + out[parent_id].append(child_id) + return dict(out) + + def _load_tablegroup_children( # noqa: C901 — pipeline + self, + groups: List[TableGroup], + *, + target_release: Optional[Release], + ) -> Tuple[ + Dict[int, List[Dict[str, Any]]], + Dict[int, List[Dict[str, Any]]], + ]: + """Per-group tables and direct child-group stubs. + + Returns ``(tables_by_group, child_stubs_by_group)``. + + Compositions and the child-group stubs are filtered by the + effective release per group (target_release for literal/~/+, + the group's own start_release_id for release=*). For release=* + we bucket groups by their effective release and run one set + of queries per bucket — query budget stays bounded. + """ + from dpmcore.dpm_xl.utils.filters import filter_by_release + + if not groups: + return {}, {} + + # Bucket groups by effective release. + groups_by_effective_release: Dict[int, List[TableGroup]] = defaultdict( + list + ) + for g in groups: + rid = ( + target_release.release_id + if target_release is not None + else g.start_release_id + ) + if rid is not None: + groups_by_effective_release[rid].append(g) + + # Build a quick lookup of release_id → Release for the batch + # table builder. + release_by_id = {r.release_id: r for r in self._get_all_releases()} + + tables_by_group: Dict[int, List[Dict[str, Any]]] = {} + child_stubs_by_group: Dict[int, List[Dict[str, Any]]] = {} + + for rid, bucket in groups_by_effective_release.items(): + bucket_ids = [g.table_group_id for g in bucket] + + # Compositions at this release. + comp_q = self.session.query(TableGroupComposition).filter( + TableGroupComposition.table_group_id.in_(bucket_ids) + ) + comp_q = filter_by_release( + comp_q, + start_col=TableGroupComposition.start_release_id, + end_col=TableGroupComposition.end_release_id, + release_id=rid, + ) + comps = comp_q.all() + + comps_by_group: Dict[int, List[TableGroupComposition]] = ( + defaultdict(list) + ) + table_ids: set[int] = set() + for cc in comps: + comps_by_group[cc.table_group_id].append(cc) + if cc.table_id is not None: + table_ids.add(cc.table_id) + + tv_by_table_id: Dict[int, TableVersion] = {} + if table_ids: + tv_q = ( + self.session.query(TableVersion) + .options(joinedload(TableVersion.table)) + .filter(TableVersion.table_id.in_(table_ids)) + ) + tv_q = filter_by_release( + tv_q, + start_col=TableVersion.start_release_id, + end_col=TableVersion.end_release_id, + release_id=rid, + ) + for tv in tv_q.all(): + tv_by_table_id[tv.table_id] = tv + + entries_by_tvid: Dict[int, Dict[str, Any]] = {} + if tv_by_table_id: + target_for_batch = release_by_id.get(rid) + entries_by_tvid = self._build_table_entries_batch( + list(tv_by_table_id.values()), + detail="full", + target_release=target_for_batch, + ) + + for g in bucket: + ordered = sorted( + comps_by_group.get(g.table_group_id, []), + key=lambda c: c.order if c.order is not None else 0, + ) + ordered_entries: List[Dict[str, Any]] = [] + for cc in ordered: + if cc.table_id is None: + continue + tv = tv_by_table_id.get(cc.table_id) + if tv is None: + continue + entry = entries_by_tvid.get(tv.table_vid) + if entry is not None: + ordered_entries.append(entry) + tables_by_group[g.table_group_id] = ordered_entries + + # Child TableGroups at this release. + child_q = self.session.query(TableGroup).filter( + TableGroup.parent_table_group_id.in_(bucket_ids) + ) + child_q = filter_by_release( + child_q, + start_col=TableGroup.start_release_id, + end_col=TableGroup.end_release_id, + release_id=rid, + ) + child_q = child_q.order_by( + TableGroup.parent_table_group_id, TableGroup.table_group_id + ) + child_stubs: Dict[int, List[Dict[str, Any]]] = defaultdict(list) + for cg in child_q.all(): + child_stubs[cg.parent_table_group_id].append( + { + "id": cg.table_group_id, + "code": cg.code, + "name": cg.name, + "type": cg.type, + "startReleaseId": cg.start_release_id, + "endReleaseId": cg.end_release_id, + } + ) + for g in bucket: + child_stubs_by_group[g.table_group_id] = child_stubs.get( + g.table_group_id, [] + ) + + return tables_by_group, child_stubs_by_group + + # ------------------------------------------------------------------ # + # Modules + # ------------------------------------------------------------------ # + + def query_modules( # noqa: C901 — pipeline orchestrator + self, + *, + params: StructureParams, + detail: str = "full", + references: str = "none", + offset: int = 0, + limit: int = 100, + ) -> Tuple[List[Dict[str, Any]], int]: + """Query modules with SDMX-style filtering. + + One entry per matching :class:`ModuleVersion`. When + ``references == "children"`` or ``"all"``, each entry includes + a ``tables`` array (full table shape, batch-loaded). + """ + from dpmcore.dpm_xl.utils.filters import filter_by_release + + target_release: Optional[Release] = None + if not params.wants_all_releases: + target_release = self._resolve_release(params) + if target_release is None: + return [], 0 + + q = ( + self.session.query(ModuleVersion) + .join(Module, Module.module_id == ModuleVersion.module_id) + .options(joinedload(ModuleVersion.module)) + ) + + # Owner filter — Module carries owner_id directly; we still join + # Concept→Organisation to translate acronym → id (cleaner than + # querying Organisation up front). + owners = None if params.is_owner_wildcard else params.owners + if owners: + q = ( + q.join(Concept, Module.row_guid == Concept.concept_guid) + .join( + Organisation, + Concept.owner_id == Organisation.org_id, + ) + .filter(Organisation.acronym.in_(owners)) + ) + + # ID filter on ModuleVersion.code (with numeric fallback to + # Module.module_id — matches the table handler's convention). + if not params.is_id_wildcard: + numeric_ids = [int(v) for v in params.ids if v.isdigit()] + if numeric_ids: + q = q.filter( + or_( + ModuleVersion.code.in_(params.ids), + Module.module_id.in_(numeric_ids), + ) + ) + else: + q = q.filter(ModuleVersion.code.in_(params.ids)) + + if target_release is not None: + q = filter_by_release( + q, + start_col=ModuleVersion.start_release_id, + end_col=ModuleVersion.end_release_id, + release_id=target_release.release_id, + ) + + q = q.order_by(ModuleVersion.module_id, ModuleVersion.start_release_id) + module_versions: List[ModuleVersion] = q.all() + + if not module_versions: + return [], 0 + + total = len(module_versions) + paginated = module_versions[offset : offset + limit] + + # Bulk loads shared by every detail level. + module_vids = [mv.module_vid for mv in paginated] + parameter_vids_by_module = self._bulk_load_module_parameters( + module_vids + ) + framework_ids = { + mv.module.framework_id + for mv in paginated + if mv.module and mv.module.framework_id is not None + } + framework_refs = self._bulk_load_framework_refs(framework_ids) + + include_children = references in ("children", "all") + tables_by_module: Dict[int, List[Dict[str, Any]]] = {} + if include_children and detail != "allstubs": + tables_by_module = self._load_module_children( + paginated, target_release=target_release, detail=detail + ) + + results: List[Dict[str, Any]] = [] + for mv in paginated: + module = mv.module + owner_acronym = ( + self._get_owner_acronym(module.owner_id) if module else None + ) + release_code = ( + target_release.code + if target_release is not None + else self._resolve_release_code(mv.start_release_id) + ) + framework_ref = ( + framework_refs.get(module.framework_id) + if module and module.framework_id is not None + else None + ) + + if detail == "allstubs": + entry = _module_stub_to_dict( + mv, + owner_acronym=owner_acronym, + release_code=release_code, + ) + else: + entry = _module_version_to_dict( + mv, + owner_acronym=owner_acronym, + release_code=release_code, + framework_ref=framework_ref, + parameter_variable_vids=parameter_vids_by_module.get( + mv.module_vid, [] + ), + ) + if include_children: + entry["tables"] = tables_by_module.get(mv.module_vid, []) + results.append(entry) + + return results, total + + def _load_module_children( + self, + module_versions: List[ModuleVersion], + *, + target_release: Optional[Release], + detail: str, + ) -> Dict[int, List[Dict[str, Any]]]: + """Load ordered table dicts per module_vid using the batch builder. + + One query for ModuleVersionComposition, one for TableVersion + + Table, and then the table batch builder handles the rest. Tables + are returned in ``ModuleVersionComposition.order``. + """ + if not module_versions: + return {} + + module_vids = [mv.module_vid for mv in module_versions] + comp_rows = ( + self.session.query(ModuleVersionComposition) + .filter(ModuleVersionComposition.module_vid.in_(module_vids)) + .order_by( + ModuleVersionComposition.module_vid, + ModuleVersionComposition.order, + ) + .all() + ) + + ordered_vids_by_module: Dict[int, List[int]] = defaultdict(list) + all_table_vids: set[int] = set() + for comp in comp_rows: + if comp.table_vid is None: + continue + ordered_vids_by_module[comp.module_vid].append(comp.table_vid) + all_table_vids.add(comp.table_vid) + + if not all_table_vids: + return {} + + tvs = ( + self.session.query(TableVersion) + .options(joinedload(TableVersion.table)) + .filter(TableVersion.table_vid.in_(all_table_vids)) + .all() + ) + table_entries = self._build_table_entries_batch( + tvs, detail=detail, target_release=target_release + ) + + # Project per module in composition order, skipping any + # composition rows whose table_vid didn't resolve. + return { + module_vid: [ + table_entries[tvid] + for tvid in ordered_vids + if tvid in table_entries + ] + for module_vid, ordered_vids in ordered_vids_by_module.items() + } + + # ------------------------------------------------------------------ # + # Operators + # ------------------------------------------------------------------ # + + def query_operators( + self, + *, + params: StructureParams, + detail: str = "full", + offset: int = 0, + limit: int = 100, + ) -> Tuple[List[Dict[str, Any]], int]: + """Query operators. + + Operators are flat, unversioned, and unowned. The ``{owner}`` + URL segment must be ``*`` (concrete owners 204); the release + segment is ignored. The ``{id}`` segment matches + ``Operator.name`` or numeric ``operator_id`` — symbols are + intentionally not supported as URL ids because operator + symbols often clash with URL syntax (``+``, ``*``, ``/``). + + Each operator carries its ``OperatorArgument`` list inline at + ``detail=full`` (the argument set is part of the operator's + definition; a handful of entries per operator). + """ + if not params.is_owner_wildcard: + return [], 0 + + q = self.session.query(Operator) + + if not params.is_id_wildcard: + numeric_ids = [int(v) for v in params.ids if v.isdigit()] + if numeric_ids: + q = q.filter( + or_( + Operator.name.in_(params.ids), + Operator.operator_id.in_(numeric_ids), + ) + ) + else: + q = q.filter(Operator.name.in_(params.ids)) + + total = q.with_entities(func.count(Operator.operator_id)).scalar() or 0 + if total == 0: + return [], 0 + + q = q.order_by(Operator.operator_id) + rows = q.offset(offset).limit(limit).all() + + if detail == "allstubs": + return [_operator_stub_to_dict(op) for op in rows], total + + operator_ids = [op.operator_id for op in rows] + args_by_operator = self._bulk_load_operator_arguments(operator_ids) + + results = [ + _operator_to_dict( + op, + arguments=args_by_operator.get(op.operator_id, []), + ) + for op in rows + ] + return results, total + + def _bulk_load_operator_arguments( + self, + operator_ids: List[int], + ) -> Dict[int, List[Dict[str, Any]]]: + """``{operator_id: [argument_dict, ...]}`` in one query.""" + if not operator_ids: + return {} + rows = ( + self.session.query(OperatorArgument) + .filter(OperatorArgument.operator_id.in_(operator_ids)) + .order_by( + OperatorArgument.operator_id, + OperatorArgument.order, + OperatorArgument.argument_id, + ) + .all() + ) + out: Dict[int, List[Dict[str, Any]]] = defaultdict(list) + for a in rows: + out[cast(int, a.operator_id)].append( + { + "id": a.argument_id, + "order": a.order, + "name": a.name, + "isMandatory": a.is_mandatory, + } + ) + return dict(out) + + # ------------------------------------------------------------------ # + # Operations + # ------------------------------------------------------------------ # + + def query_operations( # noqa: C901 — pipeline orchestrator + self, + *, + params: StructureParams, + detail: str = "full", + offset: int = 0, + limit: int = 100, + ) -> Tuple[List[Dict[str, Any]], int]: + """Query operations with nested versions/nodes/references. + + Each Operation carries a ``versions`` array. The ``{release}`` + URL segment filters which versions appear inside — Operations + with no version active at the release are dropped from the + result set entirely. + + At ``detail=full`` (default) every version carries the full + node tree, each node carries its operand references, and each + reference carries its physical locations (table/row/column/sheet). + ``detail=allstubs`` returns just the Operation identifiers plus + the list of operationVersionIds. + """ + from dpmcore.dpm_xl.utils.filters import filter_by_release + + target_release: Optional[Release] = None + if not params.wants_all_releases: + target_release = self._resolve_release(params) + if target_release is None: + return [], 0 + + q = self.session.query(Operation) + + # Owner filter via direct owner_id (same lesson as contexts — + # row_guid → Concept can be NULL in real data). + owners = None if params.is_owner_wildcard else params.owners + if owners: + org_ids = [ + org_id + for (org_id,) in self.session.query(Organisation.org_id) + .filter(Organisation.acronym.in_(owners)) + .all() + ] + if not org_ids: + return [], 0 + q = q.filter(Operation.owner_id.in_(org_ids)) + + if not params.is_id_wildcard: + numeric_ids = [int(v) for v in params.ids if v.isdigit()] + if numeric_ids: + q = q.filter( + or_( + Operation.code.in_(params.ids), + Operation.operation_id.in_(numeric_ids), + ) + ) + else: + q = q.filter(Operation.code.in_(params.ids)) + + if target_release is not None: + # EXISTS subquery: include Operation only if it has at + # least one OperationVersion active at the target release. + version_subq = self.session.query( + OperationVersion.operation_id + ).filter(OperationVersion.operation_id == Operation.operation_id) + version_subq = filter_by_release( + version_subq, + start_col=OperationVersion.start_release_id, + end_col=OperationVersion.end_release_id, + release_id=target_release.release_id, + ) + q = q.filter(version_subq.exists()) + + total = ( + q.with_entities(func.count(Operation.operation_id)).scalar() or 0 + ) + if total == 0: + return [], 0 + + q = q.order_by(Operation.operation_id) + operations: List[Operation] = q.offset(offset).limit(limit).all() + if not operations: + return [], total + + operation_ids = [op.operation_id for op in operations] + + # Bulk-load OperationVersions for the paginated Operations, + # filtered by the requested release. + versions_by_op = self._bulk_load_operation_versions( + operation_ids, target_release=target_release + ) + + # For full detail: bulk-load the rest of the tree. + nodes_by_vid: Dict[int, List[OperationNode]] = {} + refs_by_node: Dict[int, List[OperandReference]] = {} + locs_by_ref: Dict[int, List[OperandReferenceLocation]] = {} + if detail != "allstubs": + all_vids = [ + v.operation_vid for vs in versions_by_op.values() for v in vs + ] + nodes_by_vid = self._bulk_load_operation_nodes(all_vids) + all_node_ids = [ + n.node_id for ns in nodes_by_vid.values() for n in ns + ] + refs_by_node = self._bulk_load_operand_references(all_node_ids) + all_ref_ids = [ + r.operand_reference_id + for rs in refs_by_node.values() + for r in rs + ] + locs_by_ref = self._bulk_load_reference_locations(all_ref_ids) + + results: List[Dict[str, Any]] = [] + for op in operations: + owner_acronym = self._get_owner_acronym(op.owner_id) + op_versions = versions_by_op.get(op.operation_id, []) + if not op_versions: + # release filter dropped every version — skip the row. + continue + if detail == "allstubs": + results.append( + _operation_stub_to_dict( + op, + owner_acronym=owner_acronym, + version_ids=[v.operation_vid for v in op_versions], + ) + ) + continue + version_dicts: List[Dict[str, Any]] = [] + for v in op_versions: + release_code = ( + target_release.code + if target_release is not None + else self._resolve_release_code(v.start_release_id) + ) + node_dicts = [ + _operation_node_to_dict( + n, + references=[ + _operand_reference_to_dict( + r, + locations=[ + _operand_reference_location_to_dict(loc) + for loc in locs_by_ref.get( + r.operand_reference_id, [] + ) + ], + ) + for r in refs_by_node.get(n.node_id, []) + ], + ) + for n in nodes_by_vid.get(v.operation_vid, []) + ] + version_dicts.append( + _operation_version_to_dict( + v, + release_code=release_code, + nodes=node_dicts, + ) + ) + results.append( + _operation_to_dict( + op, + owner_acronym=owner_acronym, + versions=version_dicts, + ) + ) + + return results, total + + def _bulk_load_operation_versions( + self, + operation_ids: List[int], + *, + target_release: Optional[Release], + ) -> Dict[int, List[OperationVersion]]: + """``{operation_id: [OperationVersion, ...]}``, release-filtered.""" + from dpmcore.dpm_xl.utils.filters import filter_by_release + + if not operation_ids: + return {} + q = self.session.query(OperationVersion).filter( + OperationVersion.operation_id.in_(operation_ids) + ) + if target_release is not None: + q = filter_by_release( + q, + start_col=OperationVersion.start_release_id, + end_col=OperationVersion.end_release_id, + release_id=target_release.release_id, + ) + q = q.order_by( + OperationVersion.operation_id, OperationVersion.start_release_id + ) + out: Dict[int, List[OperationVersion]] = defaultdict(list) + for v in q.all(): + out[cast(int, v.operation_id)].append(v) + return dict(out) + + def _bulk_load_operation_nodes( + self, + operation_vids: List[int], + ) -> Dict[int, List[OperationNode]]: + """``{operation_vid: [OperationNode, ...]}`` in one query.""" + if not operation_vids: + return {} + rows = ( + self.session.query(OperationNode) + .filter(OperationNode.operation_vid.in_(operation_vids)) + .order_by(OperationNode.operation_vid, OperationNode.node_id) + .all() + ) + out: Dict[int, List[OperationNode]] = defaultdict(list) + for n in rows: + out[cast(int, n.operation_vid)].append(n) + return dict(out) + + def _bulk_load_operand_references( + self, + node_ids: List[int], + ) -> Dict[int, List[OperandReference]]: + """``{node_id: [OperandReference, ...]}`` in one query.""" + if not node_ids: + return {} + rows = ( + self.session.query(OperandReference) + .filter(OperandReference.node_id.in_(node_ids)) + .order_by( + OperandReference.node_id, + OperandReference.operand_reference_id, + ) + .all() + ) + out: Dict[int, List[OperandReference]] = defaultdict(list) + for r in rows: + out[cast(int, r.node_id)].append(r) + return dict(out) + + def _bulk_load_reference_locations( + self, + reference_ids: List[int], + ) -> Dict[int, List[OperandReferenceLocation]]: + """``{operand_reference_id: [OperandReferenceLocation, ...]}``. + + Schema declares ``operand_reference_id`` as the location's + primary key, so the list is typically 0 or 1 entry — we still + surface it as a list to mirror the ORM relationship. + """ + if not reference_ids: + return {} + rows = ( + self.session.query(OperandReferenceLocation) + .filter( + OperandReferenceLocation.operand_reference_id.in_( + reference_ids + ) + ) + .order_by(OperandReferenceLocation.operand_reference_id) + .all() + ) + out: Dict[int, List[OperandReferenceLocation]] = defaultdict(list) + for loc in rows: + out[loc.operand_reference_id].append(loc) + return dict(out) + + # ------------------------------------------------------------------ # + # DataTypes + # ------------------------------------------------------------------ # + + def query_datatypes( + self, + *, + params: StructureParams, + detail: str = "full", + references: str = "none", + offset: int = 0, + limit: int = 100, + ) -> Tuple[List[Dict[str, Any]], int]: + """Query data types. + + DataTypes are flat, unversioned, and unowned, but they form a + hierarchy via ``parent_data_type_id``. Default response carries + ``parentDataTypeId`` + ``childDataTypeIds``; + ``references=children`` adds expanded ``childDataTypes`` stubs. + + - ``{owner}`` must be ``*`` — DataTypes have no owner, so a + concrete owner deliberately returns 204. + - ``{release}`` is ignored — DataTypes are not versioned. + """ + # DataTypes have no owner; a concrete owner filter cannot match. + if not params.is_owner_wildcard: + return [], 0 + + q = self.session.query(DataType) + + if not params.is_id_wildcard: + numeric_ids = [int(v) for v in params.ids if v.isdigit()] + if numeric_ids: + q = q.filter( + or_( + DataType.code.in_(params.ids), + DataType.data_type_id.in_(numeric_ids), + ) + ) + else: + q = q.filter(DataType.code.in_(params.ids)) + + total = ( + q.with_entities(func.count(DataType.data_type_id)).scalar() or 0 + ) + if total == 0: + return [], 0 + + q = q.order_by(DataType.data_type_id) + rows = q.offset(offset).limit(limit).all() + + parent_ids = [r.data_type_id for r in rows] + child_id_map = self._bulk_load_datatype_child_ids(parent_ids) + + include_children = references in ("children", "all") + child_expansions: Dict[int, List[Dict[str, Any]]] = {} + if include_children and detail != "allstubs": + child_expansions = self._bulk_load_datatype_child_expansions( + parent_ids + ) + + results: List[Dict[str, Any]] = [] + for dt in rows: + if detail == "allstubs": + entry = _datatype_stub_to_dict(dt) + else: + entry = _datatype_to_dict( + dt, + child_data_type_ids=child_id_map.get(dt.data_type_id, []), + ) + if include_children: + entry["childDataTypes"] = child_expansions.get( + dt.data_type_id, [] + ) + results.append(entry) + return results, total + + def _bulk_load_datatype_child_ids( + self, + parent_ids: List[int], + ) -> Dict[int, List[int]]: + """``{parent_data_type_id: [child_id, ...]}`` in one query.""" + if not parent_ids: + return {} + rows = ( + self.session.query( + DataType.parent_data_type_id, DataType.data_type_id + ) + .filter(DataType.parent_data_type_id.in_(parent_ids)) + .order_by(DataType.parent_data_type_id, DataType.data_type_id) + .all() + ) + out: Dict[int, List[int]] = defaultdict(list) + for parent_id, child_id in rows: + out[parent_id].append(child_id) + return dict(out) + + def _bulk_load_datatype_child_expansions( + self, + parent_ids: List[int], + ) -> Dict[int, List[Dict[str, Any]]]: + """``{parent_data_type_id: [child_stub, ...]}`` in one query.""" + if not parent_ids: + return {} + rows = ( + self.session.query(DataType) + .filter(DataType.parent_data_type_id.in_(parent_ids)) + .order_by(DataType.parent_data_type_id, DataType.data_type_id) + .all() + ) + out: Dict[int, List[Dict[str, Any]]] = defaultdict(list) + for dt in rows: + out[cast(int, dt.parent_data_type_id)].append( + _datatype_stub_to_dict(dt) + ) + return dict(out) + + # ------------------------------------------------------------------ # + # Organisations + # ------------------------------------------------------------------ # + + def query_organisations( + self, + *, + params: StructureParams, + detail: str = "full", + offset: int = 0, + limit: int = 100, + ) -> Tuple[List[Dict[str, Any]], int]: + """Query organisations. + + Organisations are not release-versioned; the release path + segment is accepted but ignored for selection. Both the + ``{owner}`` and ``{id}`` URL segments filter on + ``Organisation.acronym`` (or numeric ``org_id``) — for + organisations they target the same column. + """ + q = self.session.query(Organisation) + + if not params.is_owner_wildcard: + q = q.filter(Organisation.acronym.in_(params.owners)) + + if not params.is_id_wildcard: + numeric_ids = [int(v) for v in params.ids if v.isdigit()] + non_numeric = [v for v in params.ids if not v.isdigit()] + id_clauses = [] + if numeric_ids: + id_clauses.append(Organisation.org_id.in_(numeric_ids)) + if non_numeric: + id_clauses.append(Organisation.acronym.in_(non_numeric)) + if id_clauses: + q = q.filter(or_(*id_clauses)) + + total = q.with_entities(func.count(Organisation.org_id)).scalar() or 0 + if total == 0: + return [], 0 + + q = q.order_by(Organisation.org_id) + rows = q.offset(offset).limit(limit).all() + + if detail == "allstubs": + return [ + {"id": o.org_id, "acronym": o.acronym} for o in rows + ], total + return [_organisation_to_dict(o) for o in rows], total + + # ------------------------------------------------------------------ # + # Contexts + # ------------------------------------------------------------------ # + + def query_contexts( # noqa: C901 — pipeline orchestrator + self, + *, + params: StructureParams, + detail: str = "full", + offset: int = 0, + limit: int = 100, + ) -> Tuple[List[Dict[str, Any]], int]: + """Query contexts with SDMX-style filtering + virtual versioning. + + :class:`ContextComposition` is **not** release-versioned — + the same (property, item) pairs apply to a context across + the whole timeline. The codes of those properties/items, + however, ARE release-versioned via their ItemCategory rows. + We surface a new "virtual version" of a context every time + the set of (propertyCode, itemCode) pairs changes between + consecutive releases. + + The URL ``id`` segment matches ``Context.context_id`` + (contexts have no human-readable code field). + """ + target_release: Optional[Release] = None + if not params.wants_all_releases: + target_release = self._resolve_release(params) + if target_release is None: + return [], 0 + + # Context query. Real-data note: Context rows often have + # ``row_guid`` NULL while ``owner_id`` is populated — so we + # filter on the direct column rather than the Concept join + # used by other endpoints. + q = self.session.query(Context) + + owners = None if params.is_owner_wildcard else params.owners + if owners: + org_ids = [ + org_id + for (org_id,) in self.session.query(Organisation.org_id) + .filter(Organisation.acronym.in_(owners)) + .all() + ] + if not org_ids: + return [], 0 + q = q.filter(Context.owner_id.in_(org_ids)) + + if not params.is_id_wildcard: + numeric_ids = [int(v) for v in params.ids if v.isdigit()] + if not numeric_ids: + # Contexts have no code; non-numeric ids match nothing. + return [], 0 + q = q.filter(Context.context_id.in_(numeric_ids)) + + q = q.order_by(Context.context_id) + contexts: List[Context] = q.all() + if not contexts: + return [], 0 + + # Bulk-load compositions for all matched contexts. + context_ids = [c.context_id for c in contexts] + comp_rows = ( + self.session.query(ContextComposition) + .filter(ContextComposition.context_id.in_(context_ids)) + .all() + ) + comps_by_context: Dict[int, List[ContextComposition]] = defaultdict( + list + ) + all_property_ids: set[int] = set() + all_item_ids: set[int] = set() + for cc in comp_rows: + comps_by_context[cc.context_id].append(cc) + all_property_ids.add(cc.property_id) + if cc.item_id is not None: + all_item_ids.add(cc.item_id) + + # Bulk-load the three pieces of release-versioned lookup data. + property_code_windows = self._bulk_load_property_code_windows( + all_property_ids + ) + property_category_windows = self._bulk_load_property_category_windows( + all_property_ids + ) + item_code_windows = self._bulk_load_item_code_windows(all_item_ids) + + releases = self._get_all_releases() + + all_entries: List[Dict[str, Any]] = [] + for context in contexts: + compositions = comps_by_context.get(context.context_id, []) + versions = self._compute_context_versions( + context, + compositions, + releases, + property_code_windows, + property_category_windows, + item_code_windows, + detail=detail, + ) + if not versions: + continue + if params.wants_all_releases: + all_entries.extend(v_dict for _, v_dict in versions) + elif target_release is not None: + v = self._version_at_release(versions, target_release) + if v is not None: + all_entries.append(v) + + total = len(all_entries) + paginated = all_entries[offset : offset + limit] + return paginated, total + + # -- bulk loaders for release-windowed lookups -------------------- + + def _bulk_load_property_code_windows( + self, + property_ids: set[int], + ) -> Dict[int, List[Tuple[int, Optional[int], str]]]: + """``{property_id: [(start_release_id, end_release_id, code)]}``. + + Codes come from the property item's ItemCategory rows. We + do not hardcode the meta-category code (e.g. ``_PR``) — we + accept any ItemCategory pointing at a Category for which the + item is registered as a property. In practice every property + item in EBA data lives in a single ``_PR`` row; this helper + works regardless. + """ + if not property_ids: + return {} + rows = ( + self.session.query( + ItemCategory.item_id, + ItemCategory.start_release_id, + ItemCategory.end_release_id, + ItemCategory.code, + ) + .join(Item, Item.item_id == ItemCategory.item_id) + .filter( + ItemCategory.item_id.in_(property_ids), + Item.is_property.is_(True), + ) + .all() + ) + out: Dict[int, List[Tuple[int, Optional[int], str]]] = defaultdict( + list + ) + for pid, start, end, code in rows: + if code is not None: + out[pid].append((start, end, code)) + return dict(out) + + def _bulk_load_property_category_windows( + self, + property_ids: set[int], + ) -> Dict[int, List[Tuple[int, Optional[int], int]]]: + """Map ``property_id`` to its category windows. + + Returns ``{property_id: [(start, end, category_id)]}``. + """ + if not property_ids: + return {} + rows = ( + self.session.query( + PropertyCategory.property_id, + PropertyCategory.start_release_id, + PropertyCategory.end_release_id, + PropertyCategory.category_id, + ) + .filter(PropertyCategory.property_id.in_(property_ids)) + .all() + ) + out: Dict[int, List[Tuple[int, Optional[int], int]]] = defaultdict( + list + ) + for pid, start, end, cat_id in rows: + if cat_id is not None: + out[pid].append((start, end, cat_id)) + return dict(out) + + def _bulk_load_item_code_windows( + self, + item_ids: set[int], + ) -> Dict[Tuple[int, int], List[Tuple[int, Optional[int], str]]]: + """``{(item_id, category_id): [(start, end, code), ...]}``. + + An item can have ItemCategory rows in several Categories; + keying by (item_id, category_id) keeps the lookup unambiguous + when the caller knows which Category the item should be looked + up in. + """ + if not item_ids: + return {} + rows = ( + self.session.query( + ItemCategory.item_id, + ItemCategory.category_id, + ItemCategory.start_release_id, + ItemCategory.end_release_id, + ItemCategory.code, + ) + .filter(ItemCategory.item_id.in_(item_ids)) + .all() + ) + out: Dict[Tuple[int, int], List[Tuple[int, Optional[int], str]]] = ( + defaultdict(list) + ) + for item_id, cat_id, start, end, code in rows: + if code is not None and cat_id is not None: + out[(item_id, cat_id)].append((start, end, code)) + return dict(out) + + def _compute_context_versions( # noqa: C901 — orchestrator + self, + context: Context, + compositions: List[ContextComposition], + releases: List[Release], + property_code_windows: Dict[int, List[Tuple[int, Optional[int], str]]], + property_category_windows: Dict[ + int, List[Tuple[int, Optional[int], int]] + ], + item_code_windows: Dict[ + Tuple[int, int], List[Tuple[int, Optional[int], str]] + ], + *, + detail: str, + ) -> List[Tuple[Release, Dict[str, Any]]]: + """Emit one virtual version per fingerprint change.""" + owner_acronym = self._get_owner_acronym(context.owner_id) + versions: List[Tuple[Release, Dict[str, Any]]] = [] + prev_fingerprint: Optional[Tuple[Tuple[str, Optional[str]], ...]] = ( + None + ) + + for rel in releases: + pairs: List[Tuple[str, Optional[str]]] = [] + for cc in compositions: + prop_code = self._lookup_in_windows( + property_code_windows.get(cc.property_id, []), + rel.release_id, + ) + if prop_code is None: + continue + item_code: Optional[str] = None + if cc.item_id is not None: + cat_id = self._lookup_in_windows( + property_category_windows.get(cc.property_id, []), + rel.release_id, + ) + if cat_id is not None: + item_code = self._lookup_in_windows( + item_code_windows.get((cc.item_id, cat_id), []), + rel.release_id, + ) + pairs.append((prop_code, item_code)) + pairs.sort() + fingerprint = tuple(pairs) + + # Skip empty / unchanged fingerprints. + if not fingerprint or fingerprint == prev_fingerprint: + continue + + version_dict = _context_to_dict( + context, + owner_acronym=owner_acronym, + release_code=rel.code, + start_release_id=rel.release_id, + compositions=pairs, + detail=detail, + ) + versions.append((rel, version_dict)) + prev_fingerprint = fingerprint + + return versions + + # ------------------------------------------------------------------ # + # Properties + # ------------------------------------------------------------------ # + + def query_properties( # noqa: C901 — pipeline orchestrator + self, + *, + params: StructureParams, + detail: str = "full", + offset: int = 0, + limit: int = 100, + ) -> Tuple[List[Dict[str, Any]], int]: + """Query properties with SDMX-style filtering. + + Properties don't have a dedicated *PropertyVersion* table; their + per-release identity lives in :class:`ItemCategory` rows whose + ``item_id`` matches a Property (and the parent + :class:`Category` is the meta-category that registers + properties, typically ``_PR``). One result entry per matching + ItemCategory row. + """ + from dpmcore.dpm_xl.utils.filters import filter_by_release + + target_release: Optional[Release] = None + if not params.wants_all_releases: + target_release = self._resolve_release(params) + if target_release is None: + return [], 0 + + # Anchor on ItemCategory + Property to get one row per + # property-version pair (a property's "version" is its + # ItemCategory row at the release). + q = ( + self.session.query( + ItemCategory, Item, Property, Category, DataType + ) + .join(Item, Item.item_id == ItemCategory.item_id) + .join(Property, Property.property_id == Item.item_id) + .join(Category, Category.category_id == ItemCategory.category_id) + .outerjoin( + DataType, DataType.data_type_id == Property.data_type_id + ) + .filter(Item.is_property.is_(True)) + ) + + owners = None if params.is_owner_wildcard else params.owners + if owners: + q = ( + q.join(Concept, Item.row_guid == Concept.concept_guid) + .join( + Organisation, + Concept.owner_id == Organisation.org_id, + ) + .filter(Organisation.acronym.in_(owners)) + ) + + if not params.is_id_wildcard: + numeric_ids = [int(v) for v in params.ids if v.isdigit()] + if numeric_ids: + q = q.filter( + or_( + ItemCategory.code.in_(params.ids), + Property.property_id.in_(numeric_ids), + ) + ) + else: + q = q.filter(ItemCategory.code.in_(params.ids)) + + if target_release is not None: + q = filter_by_release( + q, + start_col=ItemCategory.start_release_id, + end_col=ItemCategory.end_release_id, + release_id=target_release.release_id, + ) + + total = q.with_entities(func.count()).scalar() or 0 + if total == 0: + return [], 0 + + q = q.order_by(Property.property_id, ItemCategory.start_release_id) + paginated = q.offset(offset).limit(limit).all() + if not paginated: + return [], total + + if detail == "allstubs": + return [ + _property_stub_to_dict( + ic, + item=item, + owner_acronym=self._get_owner_acronym(item.owner_id), + release_code=( + target_release.code + if target_release is not None + else self._resolve_release_code(ic.start_release_id) + ), + ) + for ic, item, _prop, _cat, _dt in paginated + ], total + + # Bulk-load enumeration links grouped by effective release. + # The effective release is keyed per ItemCategory row (the + # target when pinned, else the row's own start release) — NOT + # per property_id, which would collide across a property's + # versions at release=* and apply one version's window to all. + property_ids_by_release: Dict[Optional[int], set[int]] = defaultdict( + set + ) + for ic, _item, prop, _cat, _dt in paginated: + effective = ( + target_release.release_id + if target_release is not None + else ic.start_release_id + ) + property_ids_by_release[effective].add(prop.property_id) + enums_by_release: Dict[Optional[int], Dict[int, Dict[str, Any]]] = { + rid: self._load_property_enumerations(pids, release_id=rid) + for rid, pids in property_ids_by_release.items() + } + + results: List[Dict[str, Any]] = [] + for ic, item, prop, cat, dt in paginated: + owner_acronym = self._get_owner_acronym(item.owner_id) + release_code = ( + target_release.code + if target_release is not None + else self._resolve_release_code(ic.start_release_id) + ) + effective = ( + target_release.release_id + if target_release is not None + else ic.start_release_id + ) + enumeration = enums_by_release.get(effective, {}).get( + prop.property_id + ) + results.append( + _property_to_dict( + ic, + item=item, + prop=prop, + defining_category=cat, + data_type=dt, + owner_acronym=owner_acronym, + release_code=release_code, + enumeration=enumeration, + ) + ) + return results, total + + def _load_property_enumerations( + self, + property_ids: set[int], + *, + release_id: Optional[int], + ) -> Dict[int, Dict[str, Any]]: + """Resolve enumeration payloads keyed by property_id. + + A property is "enumerated" when its :class:`PropertyCategory` + link, active at *release_id*, points at an enumerated Category. + The enumeration members are that Category's + :class:`ItemCategory` rows valid at the same release. Each + member carries ``code`` + ``signature`` from its + ItemCategory entry. + """ + if not property_ids: + return {} + + from dpmcore.dpm_xl.utils.filters import filter_by_release + + pc_q = ( + self.session.query(PropertyCategory, Category) + .join( + Category, + Category.category_id == PropertyCategory.category_id, + ) + .filter( + PropertyCategory.property_id.in_(property_ids), + Category.is_enumerated.is_(True), + ) + ) + pc_q = filter_by_release( + pc_q, + start_col=PropertyCategory.start_release_id, + end_col=PropertyCategory.end_release_id, + release_id=release_id, + active_only_fallback=True, + ) + # Deterministic pick when a property links to multiple + # enumerated categories: lowest category_id wins. + enum_category_by_property: Dict[int, Category] = {} + for pc, cat in pc_q.all(): + existing = enum_category_by_property.get(pc.property_id) + if existing is None or cat.category_id < existing.category_id: + enum_category_by_property[pc.property_id] = cat + if not enum_category_by_property: + return {} + + category_ids = { + cat.category_id for cat in enum_category_by_property.values() + } + ic_q = ( + self.session.query(ItemCategory, Item) + .join(Item, Item.item_id == ItemCategory.item_id) + .filter(ItemCategory.category_id.in_(category_ids)) + ) + ic_q = filter_by_release( + ic_q, + start_col=ItemCategory.start_release_id, + end_col=ItemCategory.end_release_id, + release_id=release_id, + active_only_fallback=True, + ) + items_by_category: Dict[int, List[Tuple[ItemCategory, Item]]] = ( + defaultdict(list) + ) + for ic, item in ic_q.all(): + items_by_category[ic.category_id].append((ic, item)) + + result: Dict[int, Dict[str, Any]] = {} + for property_id, cat in enum_category_by_property.items(): + result[property_id] = { + "categoryId": cat.category_id, + "categoryCode": cat.code, + "categoryName": cat.name, + "items": [ + { + "itemId": item.item_id, + "name": item.name, + "code": ic.code, + "signature": ic.signature, + "isDefaultItem": ic.is_default_item, + } + for ic, item in items_by_category.get(cat.category_id, []) + ], + } + return result + + # ------------------------------------------------------------------ # + # Variables + # ------------------------------------------------------------------ # + + def query_variables( # noqa: C901 — pipeline orchestrator + self, + *, + params: StructureParams, + detail: str = "full", + offset: int = 0, + limit: int = 100, + ) -> Tuple[List[Dict[str, Any]], int]: + """Query variables with SDMX-style filtering. + + One entry per matching :class:`VariableVersion`. Each entry + carries the variable's intrinsic enumeration (from + ``VariableVersion.subcategory_vid``) when present — not the + derived enumeration that the ``/structure/table`` endpoint + computes from header subcategories. Variables are leaves; + ``references=children`` is silently a no-op. + """ + from dpmcore.dpm_xl.utils.filters import filter_by_release + + target_release: Optional[Release] = None + if not params.wants_all_releases: + target_release = self._resolve_release(params) + if target_release is None: + return [], 0 + + q = ( + self.session.query(VariableVersion) + .join( + Variable, + Variable.variable_id == VariableVersion.variable_id, + ) + .options(joinedload(VariableVersion.variable)) + ) + + owners = None if params.is_owner_wildcard else params.owners + if owners: + q = ( + q.join(Concept, Variable.row_guid == Concept.concept_guid) + .join( + Organisation, + Concept.owner_id == Organisation.org_id, + ) + .filter(Organisation.acronym.in_(owners)) + ) + + if not params.is_id_wildcard: + numeric_ids = [int(v) for v in params.ids if v.isdigit()] + if numeric_ids: + q = q.filter( + or_( + VariableVersion.code.in_(params.ids), + Variable.variable_id.in_(numeric_ids), + ) + ) + else: + q = q.filter(VariableVersion.code.in_(params.ids)) + + if target_release is not None: + q = filter_by_release( + q, + start_col=VariableVersion.start_release_id, + end_col=VariableVersion.end_release_id, + release_id=target_release.release_id, + ) + + # Count first, paginate at the DB. Variables can be on the order + # of 100k rows; pulling them all just to slice in Python would + # be a 3-second tax per request. ``with_entities(func.count(...))`` + # avoids SQLAlchemy's default subquery-wrapping count. + total = ( + q.with_entities(func.count(VariableVersion.variable_vid)).scalar() + or 0 + ) + if total == 0: + return [], 0 + q = q.order_by( + VariableVersion.variable_id, VariableVersion.start_release_id + ) + paginated: List[VariableVersion] = q.offset(offset).limit(limit).all() + + if not paginated: + return [], total + + # Effective release per row drives enumeration windowing. + effective_release_by_vid: Dict[int, Optional[int]] = { + vv.variable_vid: ( + target_release.release_id + if target_release is not None + else vv.start_release_id + ) + for vv in paginated + } + + if detail == "allstubs": + return [ + _variable_standalone_stub_to_dict( + vv, + owner_acronym=self._get_owner_acronym( + vv.variable.owner_id if vv.variable else None + ), + release_code=( + target_release.code + if target_release is not None + else self._resolve_release_code(vv.start_release_id) + ), + var_type=vv.variable.type if vv.variable else None, + ) + for vv in paginated + ], total + + property_ids = {vv.property_id for vv in paginated if vv.property_id} + property_names = self._bulk_load_property_names(property_ids) + + key_ids = {vv.key_id for vv in paginated if vv.key_id} + key_signatures = self._bulk_load_key_signatures(key_ids) + + # Subcategory enumeration loads grouped by effective release. + subcat_vids_by_release: Dict[Optional[int], set[int]] = defaultdict( + set + ) + for vv in paginated: + if vv.subcategory_vid: + subcat_vids_by_release[ + effective_release_by_vid[vv.variable_vid] + ].add(vv.subcategory_vid) + subcat_enums_by_release: Dict[ + Optional[int], Dict[int, Dict[str, Any]] + ] = { + rid: self._load_subcategory_enumerations(svids, release_id=rid) + for rid, svids in subcat_vids_by_release.items() + } + + results: List[Dict[str, Any]] = [] + for vv in paginated: + owner_acronym = self._get_owner_acronym( + vv.variable.owner_id if vv.variable else None + ) + release_code = ( + target_release.code + if target_release is not None + else self._resolve_release_code(vv.start_release_id) + ) + property_ref: Optional[Dict[str, Any]] = None + if vv.property_id is not None: + property_ref = { + "id": vv.property_id, + "name": property_names.get(vv.property_id), + } + enumeration: Optional[Dict[str, Any]] = None + if vv.subcategory_vid: + rid = effective_release_by_vid[vv.variable_vid] + enumeration = subcat_enums_by_release.get(rid, {}).get( + vv.subcategory_vid + ) + results.append( + _variable_standalone_to_dict( + vv, + owner_acronym=owner_acronym, + release_code=release_code, + var_type=vv.variable.type if vv.variable else None, + property_ref=property_ref, + key_signature=( + key_signatures.get(vv.key_id) + if vv.key_id is not None + else None + ), + enumeration=enumeration, + ) + ) + return results, total + + def _bulk_load_key_signatures( + self, + key_ids: set[int], + ) -> Dict[int, Optional[str]]: + """``{key_id: CompoundKey.signature}`` in one query.""" + if not key_ids: + return {} + rows = ( + self.session.query(CompoundKey.key_id, CompoundKey.signature) + .filter(CompoundKey.key_id.in_(key_ids)) + .all() + ) + return dict(row._tuple() for row in rows) + + # ------------------------------------------------------------------ # + # Frameworks + # ------------------------------------------------------------------ # + + def query_frameworks( # noqa: C901 — pipeline orchestrator + self, + *, + params: StructureParams, + detail: str = "full", + references: str = "none", + offset: int = 0, + limit: int = 100, + ) -> Tuple[List[Dict[str, Any]], int]: + """Query frameworks with SDMX-style filtering. + + Framework itself isn't release-versioned, so the ``release`` + path segment doesn't constrain the framework set — it's used + only to filter the ``modules`` children when + ``references=children`` (or ``all``). + """ + # Resolve release only for child filtering. An unresolvable + # literal release means children come back empty without + # erroring the framework selection. + target_release: Optional[Release] = None + if not params.wants_all_releases: + target_release = self._resolve_release(params) + child_release_resolvable = ( + params.wants_all_releases or target_release is not None + ) + + q = self.session.query(Framework) + + owners = None if params.is_owner_wildcard else params.owners + if owners: + q = ( + q.join(Concept, Framework.row_guid == Concept.concept_guid) + .join( + Organisation, + Concept.owner_id == Organisation.org_id, + ) + .filter(Organisation.acronym.in_(owners)) + ) + + if not params.is_id_wildcard: + numeric_ids = [int(v) for v in params.ids if v.isdigit()] + if numeric_ids: + q = q.filter( + or_( + Framework.code.in_(params.ids), + Framework.framework_id.in_(numeric_ids), + ) + ) + else: + q = q.filter(Framework.code.in_(params.ids)) + + q = q.order_by(Framework.framework_id) + frameworks: List[Framework] = q.all() + + if not frameworks: + return [], 0 + + total = len(frameworks) + paginated = frameworks[offset : offset + limit] + + include_children = references in ("children", "all") + modules_by_framework: Dict[int, List[Dict[str, Any]]] = {} + if ( + include_children + and detail != "allstubs" + and child_release_resolvable + ): + modules_by_framework = self._load_framework_children( + paginated, target_release=target_release + ) + + results: List[Dict[str, Any]] = [] + for fw in paginated: + owner_acronym = self._get_owner_acronym(fw.owner_id) + if detail == "allstubs": + entry = _framework_stub_to_dict( + fw, owner_acronym=owner_acronym + ) + else: + entry = _framework_to_dict(fw, owner_acronym=owner_acronym) + if include_children: + entry["modules"] = modules_by_framework.get( + fw.framework_id, [] + ) + results.append(entry) + + return results, total + + def _load_framework_children( + self, + frameworks: List[Framework], + *, + target_release: Optional[Release], + ) -> Dict[int, List[Dict[str, Any]]]: + """Bulk-load ModuleVersions for the given frameworks. + + Returns ``{framework_id: [module_dict, ...]}``. Modules are + sorted by module_id then start_release_id within each + framework. Each module dict has the same shape that + ``/structure/module`` returns by default (no nested tables). + """ + from dpmcore.dpm_xl.utils.filters import filter_by_release + + if not frameworks: + return {} + + framework_ids = [fw.framework_id for fw in frameworks] + + q = ( + self.session.query(ModuleVersion) + .join(Module, Module.module_id == ModuleVersion.module_id) + .options(joinedload(ModuleVersion.module)) + .filter(Module.framework_id.in_(framework_ids)) + ) + if target_release is not None: + q = filter_by_release( + q, + start_col=ModuleVersion.start_release_id, + end_col=ModuleVersion.end_release_id, + release_id=target_release.release_id, + ) + + q = q.order_by( + Module.framework_id, + ModuleVersion.module_id, + ModuleVersion.start_release_id, + ) + module_versions: List[ModuleVersion] = q.all() + if not module_versions: + return {} + + module_vids = [mv.module_vid for mv in module_versions] + parameter_vids_by_module = self._bulk_load_module_parameters( + module_vids + ) + # Frameworks are already in hand; build framework refs locally + # without an extra query. + framework_refs = { + fw.framework_id: { + "id": fw.framework_id, + "code": fw.code, + "name": fw.name, + } + for fw in frameworks + } + + result: Dict[int, List[Dict[str, Any]]] = defaultdict(list) + for mv in module_versions: + module = mv.module + if module is None or module.framework_id is None: + continue + owner_acronym = self._get_owner_acronym(module.owner_id) + release_code = ( + target_release.code + if target_release is not None + else self._resolve_release_code(mv.start_release_id) + ) + entry = _module_version_to_dict( + mv, + owner_acronym=owner_acronym, + release_code=release_code, + framework_ref=framework_refs.get(module.framework_id), + parameter_variable_vids=parameter_vids_by_module.get( + mv.module_vid, [] + ), + ) + result[module.framework_id].append(entry) + return dict(result) + + def _bulk_load_framework_refs( + self, + framework_ids: set[int], + ) -> Dict[int, Dict[str, Any]]: + """``{framework_id: {"id", "code", "name"}}`` in one query.""" + if not framework_ids: + return {} + rows = ( + self.session.query( + Framework.framework_id, Framework.code, Framework.name + ) + .filter(Framework.framework_id.in_(framework_ids)) + .all() + ) + return { + fid: {"id": fid, "code": code, "name": name} + for fid, code, name in rows + } + + def _bulk_load_module_parameters( + self, + module_vids: List[int], + ) -> Dict[int, List[int]]: + """``{module_vid: [variable_vid, ...]}`` in one query.""" + if not module_vids: + return {} + rows = ( + self.session.query( + ModuleParameters.module_vid, ModuleParameters.variable_vid + ) + .filter(ModuleParameters.module_vid.in_(module_vids)) + .order_by( + ModuleParameters.module_vid, ModuleParameters.variable_vid + ) + .all() + ) + out: Dict[int, List[int]] = defaultdict(list) + for mvid, vvid in rows: + out[mvid].append(vvid) + return dict(out) + + +# ------------------------------------------------------------------ # +# Table dict shape helpers +# ------------------------------------------------------------------ # + + +def _assemble_variable_blocks( + variable_vids: set[int], + *, + subcat_vids_by_variable: Dict[int, set[int]], + subcat_enums: Dict[int, Dict[str, Any]], + vv_by_vid: Dict[int, VariableVersion], + property_names: Dict[int, Optional[str]], +) -> List[Dict[str, Any]]: + """Build deduplicated variable dicts from preloaded VariableVersions. + + Each dict carries a property reference and at most one enumeration. + A variable reachable from multiple subcategories gets the lowest-id + match deterministically. Variables in *variable_vids* that aren't + present in *vv_by_vid* are silently skipped (the caller didn't + preload them). + """ + results: List[Dict[str, Any]] = [] + for vid in sorted(variable_vids): + vv = vv_by_vid.get(vid) + if vv is None: + continue + property_dict: Optional[Dict[str, Any]] = None + if vv.property_id is not None: + property_dict = { + "id": vv.property_id, + "name": property_names.get(vv.property_id), + } + applicable_svids = subcat_vids_by_variable.get(vid, set()) + enumeration: Optional[Dict[str, Any]] = next( + ( + subcat_enums[svid] + for svid in sorted(applicable_svids) + if svid in subcat_enums + ), + None, + ) + results.append( + _variable_version_to_dict(vv, property_dict, enumeration) + ) + return results + + +def _collect_subcategory_vids_per_variable( + header_rows: List[Tuple[TableVersionHeader, Header, HeaderVersion]], + cell_rows: List[Tuple[TableVersionCell, Cell]], +) -> Dict[int, set[int]]: + """Map variable_vids to the subcategory_vids of their related headers. + + Related headers are: + - the HeaderVersion that names the variable as its key_variable_vid; + - any HeaderVersion of a header (column/row/sheet) that bounds a + cell whose variable_vid is the one in question. + """ + header_version_by_id: Dict[int, HeaderVersion] = { + h.header_id: hv for _tvh, h, hv in header_rows + } + out: Dict[int, set[int]] = defaultdict(set) + + for _tvh, _h, hv in header_rows: + if hv.key_variable_vid and hv.subcategory_vid: + out[hv.key_variable_vid].add(hv.subcategory_vid) + + for tvc, cell in cell_rows: + if not tvc.variable_vid: + continue + for hid in (cell.column_id, cell.row_id, cell.sheet_id): + if hid is None: + continue + matched = header_version_by_id.get(hid) + if matched and matched.subcategory_vid: + out[tvc.variable_vid].add(matched.subcategory_vid) + + return out + + +def _table_stub_to_dict( + tv: TableVersion, + *, + owner_acronym: Optional[str], + release_code: Optional[str], +) -> Dict[str, Any]: + """``detail=allstubs`` row — identifiers only.""" + return { + "id": tv.table_id, + "tableVersionId": tv.table_vid, + "code": tv.code, + "name": tv.name, + "owner": owner_acronym, + "release": release_code, + "startReleaseId": tv.start_release_id, + "endReleaseId": tv.end_release_id, + } + + +def _table_to_dict( + tv: TableVersion, + *, + owner_acronym: Optional[str], + release_code: Optional[str], + property_names: Dict[int, Optional[str]], + headers: List[Dict[str, Any]], + cells: List[Dict[str, Any]], + key_variables: List[Dict[str, Any]], + fact_variables: List[Dict[str, Any]], +) -> Dict[str, Any]: + """Full ``detail=full`` table-version row.""" + table = tv.table + property_ref: Optional[Dict[str, Any]] = None + if tv.property_id is not None: + property_ref = { + "id": tv.property_id, + "name": property_names.get(tv.property_id), + } + context_ref: Optional[Dict[str, Any]] = ( + {"id": tv.context_id} if tv.context_id is not None else None + ) + + return { + "id": tv.table_id, + "tableVersionId": tv.table_vid, + "code": tv.code, + "name": tv.name, + "description": tv.description, + "owner": owner_acronym, + "release": release_code, + "startReleaseId": tv.start_release_id, + "endReleaseId": tv.end_release_id, + "isAbstract": table.is_abstract if table else None, + "hasOpenColumns": table.has_open_columns if table else None, + "hasOpenRows": table.has_open_rows if table else None, + "hasOpenSheets": table.has_open_sheets if table else None, + "isNormalised": table.is_normalised if table else None, + "isFlat": table.is_flat if table else None, + "property": property_ref, + "context": context_ref, + "headers": headers, + "cells": cells, + "keyVariables": key_variables, + "factVariables": fact_variables, + } + + +def _header_version_to_dict( + tvh: TableVersionHeader, + header: Header, + header_version: HeaderVersion, + property_names: Dict[int, Optional[str]], +) -> Dict[str, Any]: + """One header row in the ``headers`` array of a table entry.""" + property_ref: Optional[Dict[str, Any]] = None + if header_version.property_id is not None: + property_ref = { + "id": header_version.property_id, + "name": property_names.get(header_version.property_id), + } + context_ref: Optional[Dict[str, Any]] = ( + {"id": header_version.context_id} + if header_version.context_id is not None + else None + ) + + return { + "id": header.header_id, + "headerVersionId": header_version.header_vid, + "direction": header.direction, + "isKey": header.is_key, + "isAttribute": header.is_attribute, + "code": header_version.code, + "label": header_version.label, + "order": tvh.order, + "parentHeaderId": tvh.parent_header_id, + "parentFirst": tvh.parent_first, + "isAbstractInVersion": tvh.is_abstract, + "isUniqueInVersion": tvh.is_unique, + "startReleaseId": header_version.start_release_id, + "endReleaseId": header_version.end_release_id, + "property": property_ref, + "context": context_ref, + "subcategoryVersionId": header_version.subcategory_vid, + "keyVariableVersionId": header_version.key_variable_vid, + } + + +def _cell_to_dict( + tvc: TableVersionCell, + cell: Cell, +) -> Dict[str, Any]: + """One cell row in the ``cells`` array of a table entry.""" + return { + "cellId": cell.cell_id, + "cellCode": tvc.cell_code, + "columnHeaderId": cell.column_id, + "rowHeaderId": cell.row_id, + "sheetHeaderId": cell.sheet_id, + "isNullable": tvc.is_nullable, + "isExcluded": tvc.is_excluded, + "isVoid": tvc.is_void, + "sign": tvc.sign, + "variableVersionId": tvc.variable_vid, + } + + +def _variable_version_to_dict( + vv: VariableVersion, + property_ref: Optional[Dict[str, Any]], + enumeration: Optional[Dict[str, Any]], +) -> Dict[str, Any]: + """Build one variable dict for a table entry. + + ``enumeration`` is the single applicable subcategory dict, or + None when no related header carries a subcategory. Goes into the + table's ``keyVariables`` or ``factVariables`` array depending on + the caller's classification. + """ + return { + "id": vv.variable_id, + "versionId": vv.variable_vid, + "code": vv.code, + "name": vv.name, + "isMultiValued": vv.is_multi_valued, + "startReleaseId": vv.start_release_id, + "endReleaseId": vv.end_release_id, + "property": property_ref, + "isEnumerated": enumeration is not None, + "enumeration": enumeration, + } + + +# ------------------------------------------------------------------ # +# Context dict shape helpers +# ------------------------------------------------------------------ # + + +def _context_to_dict( + context: Context, + *, + owner_acronym: Optional[str], + release_code: Optional[str], + start_release_id: int, + compositions: List[Tuple[str, Optional[str]]], + detail: str, +) -> Dict[str, Any]: + """One context-version row. + + ``compositions`` is the sorted list of (propertyCode, itemCode) + pairs alive at this virtual version's start release. ``detail= + allstubs`` strips the compositions. + """ + if detail == "allstubs": + return { + "id": context.context_id, + "owner": owner_acronym, + "release": release_code, + "startReleaseId": start_release_id, + } + return { + "id": context.context_id, + "signature": context.signature, + "owner": owner_acronym, + "release": release_code, + "startReleaseId": start_release_id, + "compositions": [ + {"propertyCode": pc, "itemCode": ic} for pc, ic in compositions + ], + } + + +# ------------------------------------------------------------------ # +# Property dict shape helpers +# ------------------------------------------------------------------ # + + +def _property_stub_to_dict( + ic: ItemCategory, + *, + item: Item, + owner_acronym: Optional[str], + release_code: Optional[str], +) -> Dict[str, Any]: + """``detail=allstubs`` row for a property — identifiers only.""" + return { + "id": item.item_id, + "code": ic.code, + "signature": ic.signature, + "label": item.name, + "owner": owner_acronym, + "release": release_code, + "startReleaseId": ic.start_release_id, + "endReleaseId": ic.end_release_id, + } + + +def _property_to_dict( + ic: ItemCategory, + *, + item: Item, + prop: Property, + defining_category: Category, + data_type: Optional[DataType], + owner_acronym: Optional[str], + release_code: Optional[str], + enumeration: Optional[Dict[str, Any]], +) -> Dict[str, Any]: + """Full property-version row (one entry per ItemCategory row). + + The property's release-scoped identity (``code``, ``signature``, + ``startReleaseId``, ``endReleaseId``) comes from the + ``ItemCategory`` row. ``label`` is from the parent ``Item``; + structural flags + data type from ``Property``; + ``definingCategory`` is the ItemCategory's parent Category (the + meta-category that registers the property, typically ``_PR``); + ``enumeration`` (when present) reflects the property's + ``PropertyCategory`` link at the release. + """ + data_type_ref: Optional[Dict[str, Any]] = None + if data_type is not None: + data_type_ref = { + "id": data_type.data_type_id, + "code": data_type.code, + "name": data_type.name, + } + return { + "id": item.item_id, + "code": ic.code, + "signature": ic.signature, + "label": item.name, + "description": item.description, + "owner": owner_acronym, + "release": release_code, + "startReleaseId": ic.start_release_id, + "endReleaseId": ic.end_release_id, + "isComposite": prop.is_composite, + "isMetric": prop.is_metric, + "valueLength": prop.value_length, + "periodType": prop.period_type, + "dataType": data_type_ref, + "definingCategory": { + "id": defining_category.category_id, + "code": defining_category.code, + "name": defining_category.name, + }, + "isEnumerated": enumeration is not None, + "enumeration": enumeration, + } + + +# ------------------------------------------------------------------ # +# Variable dict shape helpers (standalone /structure/variable endpoint) +# ------------------------------------------------------------------ # + + +def _variable_standalone_stub_to_dict( + vv: VariableVersion, + *, + owner_acronym: Optional[str], + release_code: Optional[str], + var_type: Optional[str], +) -> Dict[str, Any]: + """``detail=allstubs`` row — identifiers only.""" + return { + "id": vv.variable_id, + "versionId": vv.variable_vid, + "code": vv.code, + "name": vv.name, + "type": var_type, + "owner": owner_acronym, + "release": release_code, + "startReleaseId": vv.start_release_id, + "endReleaseId": vv.end_release_id, + } + + +def _variable_standalone_to_dict( + vv: VariableVersion, + *, + owner_acronym: Optional[str], + release_code: Optional[str], + var_type: Optional[str], + property_ref: Optional[Dict[str, Any]], + key_signature: Optional[str], + enumeration: Optional[Dict[str, Any]], +) -> Dict[str, Any]: + """Full standalone variable-version row. + + Enumeration comes from ``VariableVersion.subcategory_vid`` (the + variable's intrinsic enumeration domain), not from any table + context. Differs from :func:`_variable_version_to_dict` which is + used inside table entries. + """ + return { + "id": vv.variable_id, + "versionId": vv.variable_vid, + "code": vv.code, + "name": vv.name, + "type": var_type, + "owner": owner_acronym, + "release": release_code, + "startReleaseId": vv.start_release_id, + "endReleaseId": vv.end_release_id, + "isMultiValued": vv.is_multi_valued, + "property": property_ref, + "subcategoryVersionId": vv.subcategory_vid, + "contextId": vv.context_id, + "keyId": vv.key_id, + "keySignature": key_signature, + "isEnumerated": enumeration is not None, + "enumeration": enumeration, + } + + +# ------------------------------------------------------------------ # +# Framework dict shape helpers +# ------------------------------------------------------------------ # + + +def _framework_stub_to_dict( + fw: Framework, + *, + owner_acronym: Optional[str], +) -> Dict[str, Any]: + """``detail=allstubs`` row — identifiers only.""" + return { + "id": fw.framework_id, + "code": fw.code, + "name": fw.name, + "owner": owner_acronym, + } + + +def _framework_to_dict( + fw: Framework, + *, + owner_acronym: Optional[str], +) -> Dict[str, Any]: + """Full ``detail=full`` framework row (no children). + + Caller adds ``"modules"`` after this when ``references=children``. + """ + return { + "id": fw.framework_id, + "code": fw.code, + "name": fw.name, + "description": fw.description, + "owner": owner_acronym, + } + + +# ------------------------------------------------------------------ # +# Operator dict shape helpers +# ------------------------------------------------------------------ # + + +def _operator_stub_to_dict(op: Operator) -> Dict[str, Any]: + """``detail=allstubs`` row — identifiers only.""" + return { + "id": op.operator_id, + "name": op.name, + "symbol": op.symbol, + } + + +def _operator_to_dict( + op: Operator, + *, + arguments: List[Dict[str, Any]], +) -> Dict[str, Any]: + """Full operator row with inline ``arguments``.""" + return { + "id": op.operator_id, + "name": op.name, + "symbol": op.symbol, + "type": op.type, + "arguments": arguments, + } + + +# ------------------------------------------------------------------ # +# Operation dict shape helpers +# ------------------------------------------------------------------ # + + +def _operation_stub_to_dict( + op: Operation, + *, + owner_acronym: Optional[str], + version_ids: List[int], +) -> Dict[str, Any]: + """``detail=allstubs`` row — identifiers + version VID list only.""" + return { + "id": op.operation_id, + "code": op.code, + "type": op.type, + "owner": owner_acronym, + "operationVersionIds": version_ids, + } + + +def _operation_to_dict( + op: Operation, + *, + owner_acronym: Optional[str], + versions: List[Dict[str, Any]], +) -> Dict[str, Any]: + """Full Operation row with nested ``versions``.""" + return { + "id": op.operation_id, + "code": op.code, + "type": op.type, + "source": op.source, + "owner": owner_acronym, + "groupOperationId": op.group_operation_id, + "versions": versions, + } + + +def _operation_version_to_dict( + v: OperationVersion, + *, + release_code: Optional[str], + nodes: List[Dict[str, Any]], +) -> Dict[str, Any]: + """One OperationVersion entry inside an Operation's ``versions``.""" + return { + "operationVersionId": v.operation_vid, + "release": release_code, + "startReleaseId": v.start_release_id, + "endReleaseId": v.end_release_id, + "expression": v.expression, + "description": v.description, + "endorsement": v.endorsement, + "isVariantApproved": v.is_variant_approved, + "preconditionOperationVid": v.precondition_operation_vid, + "severityOperationVid": v.severity_operation_vid, + "nodes": nodes, + } + + +def _operation_node_to_dict( + n: OperationNode, + *, + references: List[Dict[str, Any]], +) -> Dict[str, Any]: + """One OperationNode entry inside a version's ``nodes``. + + Nodes are returned as a flat list keyed by ``nodeId`` / + ``parentNodeId`` — clients can reconstruct the tree by walking + parent links. Listing them flat keeps the response stable + regardless of tree shape. + """ + return { + "nodeId": n.node_id, + "parentNodeId": n.parent_node_id, + "operatorId": n.operator_id, + "argumentId": n.argument_id, + "absoluteTolerance": n.absolute_tolerance, + "relativeTolerance": n.relative_tolerance, + "fallbackValue": n.fallback_value, + "useIntervalArithmetics": n.use_interval_arithmetics, + "operandType": n.operand_type, + "isLeaf": n.is_leaf, + "scalar": n.scalar, + "references": references, + } + + +def _operand_reference_to_dict( + r: OperandReference, + *, + locations: List[Dict[str, Any]], +) -> Dict[str, Any]: + """One OperandReference entry inside a node's ``references``.""" + return { + "operandReferenceId": r.operand_reference_id, + "x": r.x, + "y": r.y, + "z": r.z, + "operandReference": r.operand_reference, + "itemId": r.item_id, + "propertyId": r.property_id, + "variableId": r.variable_id, + "subcategoryId": r.subcategory_id, + "locations": locations, + } + + +def _operand_reference_location_to_dict( + loc: OperandReferenceLocation, +) -> Dict[str, Any]: + """One OperandReferenceLocation entry inside a reference.""" + return { + "cellId": loc.cell_id, + "table": loc.table, + "row": loc.row, + "column": loc.column, + "sheet": loc.sheet, + } + + +# ------------------------------------------------------------------ # +# DataType dict shape helpers +# ------------------------------------------------------------------ # + + +def _datatype_stub_to_dict(dt: DataType) -> Dict[str, Any]: + """Compact DataType representation (also used for child expansion).""" + return { + "id": dt.data_type_id, + "code": dt.code, + "name": dt.name, + "isActive": dt.is_active, + } + + +def _datatype_to_dict( + dt: DataType, + *, + child_data_type_ids: List[int], +) -> Dict[str, Any]: + """Full ``detail=full`` data-type row. + + Caller appends ``childDataTypes`` after this when + ``references=children``. + """ + return { + "id": dt.data_type_id, + "code": dt.code, + "name": dt.name, + "isActive": dt.is_active, + "parentDataTypeId": dt.parent_data_type_id, + "childDataTypeIds": child_data_type_ids, + } + + +# ------------------------------------------------------------------ # +# TableGroup dict shape helpers +# ------------------------------------------------------------------ # + + +def _tablegroup_stub_to_dict( + g: TableGroup, + *, + owner_acronym: Optional[str], + release_code: Optional[str], +) -> Dict[str, Any]: + """``detail=allstubs`` row — identifiers only.""" + return { + "id": g.table_group_id, + "code": g.code, + "name": g.name, + "owner": owner_acronym, + "release": release_code, + "startReleaseId": g.start_release_id, + "endReleaseId": g.end_release_id, + } + + +def _tablegroup_to_dict( + g: TableGroup, + *, + owner_acronym: Optional[str], + release_code: Optional[str], + child_table_group_ids: List[int], +) -> Dict[str, Any]: + """Full ``detail=full`` table-group row (no expanded children). + + Caller appends ``tables`` and ``childTableGroups`` after this when + ``references=children``. ``childTableGroupIds`` is always present + and gives a cheap pointer to the hierarchy without requiring + children expansion. + """ + return { + "id": g.table_group_id, + "code": g.code, + "name": g.name, + "description": g.description, + "type": g.type, + "owner": owner_acronym, + "release": release_code, + "startReleaseId": g.start_release_id, + "endReleaseId": g.end_release_id, + "parentTableGroupId": g.parent_table_group_id, + "childTableGroupIds": child_table_group_ids, + } + + +# ------------------------------------------------------------------ # +# Module dict shape helpers +# ------------------------------------------------------------------ # + + +def _module_stub_to_dict( + mv: ModuleVersion, + *, + owner_acronym: Optional[str], + release_code: Optional[str], +) -> Dict[str, Any]: + """``detail=allstubs`` row — identifiers only.""" + return { + "id": mv.module_id, + "moduleVersionId": mv.module_vid, + "code": mv.code, + "name": mv.name, + "owner": owner_acronym, + "release": release_code, + "startReleaseId": mv.start_release_id, + "endReleaseId": mv.end_release_id, + } + + +def _module_version_to_dict( + mv: ModuleVersion, + *, + owner_acronym: Optional[str], + release_code: Optional[str], + framework_ref: Optional[Dict[str, Any]], + parameter_variable_vids: List[int], +) -> Dict[str, Any]: + """Full ``detail=full`` module-version row. + + Caller adds ``"tables"`` after this when ``references=children``. + """ + return { + "id": mv.module_id, + "moduleVersionId": mv.module_vid, + "code": mv.code, + "name": mv.name, + "description": mv.description, + "versionNumber": mv.version_number, + "owner": owner_acronym, + "release": release_code, + "startReleaseId": mv.start_release_id, + "endReleaseId": mv.end_release_id, + "fromReferenceDate": ( + mv.from_reference_date.isoformat() + if mv.from_reference_date is not None + else None + ), + "toReferenceDate": ( + mv.to_reference_date.isoformat() + if mv.to_reference_date is not None + else None + ), + "isReported": mv.is_reported, + "isCalculated": mv.is_calculated, + "isDocumentModule": ( + mv.module.is_document_module if mv.module is not None else None + ), + "framework": framework_ref, + "globalKeyId": mv.global_key_id, + "parameterVariableVersionIds": parameter_variable_vids, + } diff --git a/tests/unit/server/test_structure_context.py b/tests/unit/server/test_structure_context.py new file mode 100644 index 0000000..d7e2282 --- /dev/null +++ b/tests/unit/server/test_structure_context.py @@ -0,0 +1,530 @@ +"""Integration tests for /api/v1/structure/context endpoints.""" + +from __future__ import annotations + +from datetime import date + +import pytest +from sqlalchemy import create_engine, event +from sqlalchemy.orm import Session +from sqlalchemy.pool import StaticPool + +import dpmcore.orm # noqa: F401 — ensure all models are loaded +from dpmcore.orm.base import Base +from dpmcore.orm.glossary import ( + Category, + Context, + ContextComposition, + Item, + ItemCategory, + Property, + PropertyCategory, +) +from dpmcore.orm.infrastructure import ( + Concept, + DataType, + Organisation, + Release, +) +from dpmcore.server.app import create_app + +# ------------------------------------------------------------------ # +# Seed model +# ------------------------------------------------------------------ # +# +# Three releases (4.0, 4.1, 4.2). Two contexts: +# +# - context 100: composes property 51 (Accounting Standard) with +# item 700 (IFRS) AND property 52 (Geographical Area) with +# item 701 (EU). +# +# Property 51 has its `_PR` code "AS" at 4.0/4.1 and is recoded +# to "ACC" at 4.2 → first version transition. +# Item 700's code in ASSET_TYPE is "IFRS" at 4.0 only and +# "IFRS_NEW" at 4.1 onwards → second version transition. +# So context 100 has 3 virtual versions across 3 releases. +# +# - context 200: composes only property 53 with no item (item_id +# null) → fingerprint is one (propertyCode, None) pair. No +# code changes for property 53 → exactly one virtual version. +# ------------------------------------------------------------------ # + + +@pytest.fixture +def engine(): + eng = create_engine( + "sqlite:///:memory:", + connect_args={"check_same_thread": False}, + poolclass=StaticPool, + ) + Base.metadata.create_all(eng) + return eng + + +@pytest.fixture +def seeded_engine(engine): + s = Session(bind=engine) + + s.add( + Organisation( + org_id=1, name="European Banking Authority", acronym="EBA" + ) + ) + s.flush() + + for guid in [ + "c-rel-1", + "c-rel-2", + "c-rel-3", + "c-prop-51", + "c-prop-52", + "c-prop-53", + "c-ctx-100", + "c-ctx-200", + ]: + s.add(Concept(concept_guid=guid, owner_id=1)) + s.flush() + + s.add_all( + [ + Release( + release_id=1, + code="4.0", + date=date(2024, 1, 1), + status="Final", + row_guid="c-rel-1", + owner_id=1, + ), + Release( + release_id=2, + code="4.1", + date=date(2024, 6, 1), + status="Final", + row_guid="c-rel-2", + owner_id=1, + ), + Release( + release_id=3, + code="4.2", + date=date(2024, 12, 1), + status="Final", + row_guid="c-rel-3", + owner_id=1, + ), + ] + ) + s.flush() + + s.add( + DataType(data_type_id=1, code="String", name="String", is_active=True) + ) + s.flush() + + # Meta-category for properties + two enumerated domains. + s.add_all( + [ + Category( + category_id=1, + code="_PR", + name="Property", + is_enumerated=False, + is_active=True, + is_external_ref_data=False, + created_release=1, + owner_id=1, + ), + Category( + category_id=60, + code="ACC_STD", + name="Accounting standard", + is_enumerated=True, + is_active=True, + is_external_ref_data=False, + created_release=1, + owner_id=1, + ), + Category( + category_id=61, + code="GEO", + name="Geographical area", + is_enumerated=True, + is_active=True, + is_external_ref_data=False, + created_release=1, + owner_id=1, + ), + ] + ) + s.flush() + + # Property items (subtype of Item). + s.add_all( + [ + Item( + item_id=51, + name="Accounting Standard", + is_property=True, + is_active=True, + row_guid="c-prop-51", + owner_id=1, + ), + Item( + item_id=52, + name="Geographical Area", + is_property=True, + is_active=True, + row_guid="c-prop-52", + owner_id=1, + ), + Item( + item_id=53, + name="Reporting Frequency", + is_property=True, + is_active=True, + row_guid="c-prop-53", + owner_id=1, + ), + # Items for the domain categories. + Item(item_id=700, name="IFRS", is_property=False, is_active=True), + Item(item_id=701, name="EU", is_property=False, is_active=True), + ] + ) + s.flush() + s.add_all( + [ + Property( + property_id=51, + is_composite=False, + is_metric=False, + data_type_id=1, + owner_id=1, + ), + Property( + property_id=52, + is_composite=False, + is_metric=False, + data_type_id=1, + owner_id=1, + ), + Property( + property_id=53, + is_composite=False, + is_metric=False, + data_type_id=1, + owner_id=1, + ), + ] + ) + s.flush() + + # Property codes in _PR, with property 51 recoded at 4.2. + s.add_all( + [ + ItemCategory( + item_id=51, + start_release_id=1, + category_id=1, + code="AS", + signature="_PR(AS)", + is_default_item=False, + end_release_id=2, # alive 4.0 and 4.1 + ), + ItemCategory( + item_id=51, + start_release_id=3, + category_id=1, + code="ACC", + signature="_PR(ACC)", + is_default_item=False, + end_release_id=None, # alive 4.2+ + ), + ItemCategory( + item_id=52, + start_release_id=1, + category_id=1, + code="GA", + signature="_PR(GA)", + is_default_item=False, + end_release_id=None, + ), + ItemCategory( + item_id=53, + start_release_id=1, + category_id=1, + code="RF", + signature="_PR(RF)", + is_default_item=False, + end_release_id=None, + ), + # Item 700 (IFRS) in ACC_STD — recoded at 4.1. + ItemCategory( + item_id=700, + start_release_id=1, + category_id=60, + code="IFRS", + signature="ACC_STD(IFRS)", + is_default_item=False, + end_release_id=1, # alive 4.0 only + ), + ItemCategory( + item_id=700, + start_release_id=2, + category_id=60, + code="IFRS_NEW", + signature="ACC_STD(IFRS_NEW)", + is_default_item=False, + end_release_id=None, # alive 4.1+ + ), + # Item 701 (EU) in GEO — stable. + ItemCategory( + item_id=701, + start_release_id=1, + category_id=61, + code="EU", + signature="GEO(EU)", + is_default_item=False, + end_release_id=None, + ), + ] + ) + s.flush() + + # PropertyCategory — link property→enumerated category at the release. + s.add_all( + [ + PropertyCategory( + property_id=51, + start_release_id=1, + category_id=60, + end_release_id=None, + ), + PropertyCategory( + property_id=52, + start_release_id=1, + category_id=61, + end_release_id=None, + ), + # Property 53 has no PropertyCategory → its item lookups + # would fail (we test that this still emits a version with + # itemCode=None). + ] + ) + s.flush() + + # Two contexts. + s.add_all( + [ + Context( + context_id=100, + signature="(AS, IFRS)(GA, EU)", + row_guid="c-ctx-100", + owner_id=1, + ), + Context( + context_id=200, + signature="(RF, *)", + row_guid="c-ctx-200", + owner_id=1, + ), + ] + ) + s.flush() + + s.add_all( + [ + ContextComposition(context_id=100, property_id=51, item_id=700), + ContextComposition(context_id=100, property_id=52, item_id=701), + ContextComposition(context_id=200, property_id=53, item_id=None), + ] + ) + s.commit() + s.close() + return engine + + +@pytest.fixture +def client(seeded_engine): + from starlette.testclient import TestClient + + app = create_app("sqlite:///:memory:", engine=seeded_engine) + return TestClient(app) + + +@pytest.fixture +def empty_client(engine): + from starlette.testclient import TestClient + + app = create_app("sqlite:///:memory:", engine=engine) + return TestClient(app) + + +def _count_queries(engine): + class _Counter: + def __init__(self): + self.count = 0 + + def __enter__(self): + def _l(c, cur, stmt, p, ctx, m): + self.count += 1 + + self._l = _l + event.listen(engine, "before_cursor_execute", _l) + return self + + def __exit__(self, *exc): + event.remove(engine, "before_cursor_execute", self._l) + return False + + return _Counter() + + +def _pairs(context_dict): + return { + (c["propertyCode"], c["itemCode"]) + for c in context_dict["compositions"] + } + + +# ------------------------------------------------------------------ # +# Tests — single release +# ------------------------------------------------------------------ # + + +class TestSingleContextAtRelease: + def test_context_100_at_4_0(self, client): + resp = client.get("/api/v1/structure/context/EBA/100/4.0") + assert resp.status_code == 200 + ctx = resp.json()["data"]["contexts"][0] + assert ctx["id"] == 100 + assert ctx["owner"] == "EBA" + assert ctx["release"] == "4.0" + # AS+IFRS, GA+EU + assert _pairs(ctx) == {("AS", "IFRS"), ("GA", "EU")} + + def test_context_100_at_4_1_item_recoded(self, client): + resp = client.get("/api/v1/structure/context/EBA/100/4.1") + ctx = resp.json()["data"]["contexts"][0] + # IFRS → IFRS_NEW. + assert _pairs(ctx) == {("AS", "IFRS_NEW"), ("GA", "EU")} + + def test_context_100_at_4_2_property_recoded(self, client): + resp = client.get("/api/v1/structure/context/EBA/100/4.2") + ctx = resp.json()["data"]["contexts"][0] + # AS → ACC; item still IFRS_NEW. + assert _pairs(ctx) == {("ACC", "IFRS_NEW"), ("GA", "EU")} + + def test_context_200_pair_with_null_item(self, client): + resp = client.get("/api/v1/structure/context/EBA/200/4.0") + ctx = resp.json()["data"]["contexts"][0] + # Property 53 (RF) has no PropertyCategory → no item code + # resolvable. Pair surfaces as (propertyCode, None). + assert _pairs(ctx) == {("RF", None)} + + def test_nonexistent_id_204(self, client): + resp = client.get("/api/v1/structure/context/EBA/9999/4.0") + assert resp.status_code == 204 + + +# ------------------------------------------------------------------ # +# Tests — virtual versioning +# ------------------------------------------------------------------ # + + +class TestVirtualVersioning: + def test_context_100_has_three_virtual_versions(self, client): + resp = client.get("/api/v1/structure/context/EBA/100/*") + ctxs = resp.json()["data"]["contexts"] + assert len(ctxs) == 3 + # Versions emitted in chronological order of release. + releases = [c["release"] for c in ctxs] + assert releases == ["4.0", "4.1", "4.2"] + assert _pairs(ctxs[0]) == {("AS", "IFRS"), ("GA", "EU")} + assert _pairs(ctxs[1]) == {("AS", "IFRS_NEW"), ("GA", "EU")} + assert _pairs(ctxs[2]) == {("ACC", "IFRS_NEW"), ("GA", "EU")} + + def test_context_200_has_one_virtual_version(self, client): + resp = client.get("/api/v1/structure/context/EBA/200/*") + ctxs = resp.json()["data"]["contexts"] + # No code change in property 53 → one version only. + assert len(ctxs) == 1 + assert ctxs[0]["release"] == "4.0" + assert _pairs(ctxs[0]) == {("RF", None)} + + def test_latest_returns_newest_virtual_version(self, client): + resp = client.get("/api/v1/structure/context/EBA/100/~") + ctx = resp.json()["data"]["contexts"][0] + assert ctx["release"] == "4.2" + assert _pairs(ctx) == {("ACC", "IFRS_NEW"), ("GA", "EU")} + + def test_all_contexts_wildcard(self, client): + resp = client.get("/api/v1/structure/context/EBA/*/*") + total = resp.json()["meta"]["totalCount"] + # 3 versions for context 100 + 1 for context 200 = 4. + assert total == 4 + + +# ------------------------------------------------------------------ # +# Tests — detail / references / empty +# ------------------------------------------------------------------ # + + +class TestAllstubs: + def test_strips_compositions(self, client): + resp = client.get( + "/api/v1/structure/context/EBA/100/4.0?detail=allstubs" + ) + ctx = resp.json()["data"]["contexts"][0] + for key in ("compositions", "signature"): + assert key not in ctx + for key in ("id", "owner", "release", "startReleaseId"): + assert key in ctx + + +class TestReferences: + def test_all_includes_organisations(self, client): + resp = client.get( + "/api/v1/structure/context/EBA/100/4.0?references=all" + ) + assert "organisations" in resp.json()["data"] + + def test_children_is_silent_noop(self, client): + resp = client.get( + "/api/v1/structure/context/EBA/100/4.0?references=children" + ) + ctx = resp.json()["data"]["contexts"][0] + for key in ("tables", "modules", "children"): + assert key not in ctx + + +class TestNonNumericIdRejected: + def test_alpha_id_204(self, client): + # Contexts have no code field — alpha ids match nothing. + resp = client.get("/api/v1/structure/context/EBA/foo/4.0") + assert resp.status_code == 204 + + +class TestEmptyDatabase: + def test_empty_returns_204(self, empty_client): + resp = empty_client.get("/api/v1/structure/context/EBA/100/4.0") + assert resp.status_code == 204 + + +# ------------------------------------------------------------------ # +# Performance — query budget +# ------------------------------------------------------------------ # + + +class TestQueryBudget: + def test_query_count_bounded(self, client, seeded_engine): + with _count_queries(seeded_engine) as counter: + resp = client.get("/api/v1/structure/context/EBA/*/*") + assert resp.status_code == 200 + assert resp.json()["meta"]["totalCount"] == 4 + # Budget: release resolution + context query + + # ContextComposition + 3 bulk loaders + owner lookup + + # _get_all_releases (already cached after first hit). Total + # ≤9 in practice; cap at 12 for headroom. + assert counter.count <= 12, ( + f"context path issued {counter.count} queries — " + f"likely an N+1 regression." + ) diff --git a/tests/unit/server/test_structure_datatype.py b/tests/unit/server/test_structure_datatype.py new file mode 100644 index 0000000..b633dce --- /dev/null +++ b/tests/unit/server/test_structure_datatype.py @@ -0,0 +1,241 @@ +"""Integration tests for /api/v1/structure/datatype endpoints.""" + +from __future__ import annotations + +import pytest +from sqlalchemy import create_engine, event +from sqlalchemy.orm import Session +from sqlalchemy.pool import StaticPool + +import dpmcore.orm # noqa: F401 — ensure all models are loaded +from dpmcore.orm.base import Base +from dpmcore.orm.infrastructure import DataType +from dpmcore.server.app import create_app + +# ------------------------------------------------------------------ # +# Seed model +# ------------------------------------------------------------------ # +# +# A small hierarchy: +# +# Decimal (id=1) +# ├── Integer (id=3) +# └── Boolean (id=4) +# String (id=2) +# Inactive (id=5, is_active=False) +# ------------------------------------------------------------------ # + + +@pytest.fixture +def engine(): + eng = create_engine( + "sqlite:///:memory:", + connect_args={"check_same_thread": False}, + poolclass=StaticPool, + ) + Base.metadata.create_all(eng) + return eng + + +@pytest.fixture +def seeded_engine(engine): + s = Session(bind=engine) + s.add_all( + [ + DataType( + data_type_id=1, code="Decimal", name="Decimal", is_active=True + ), + DataType( + data_type_id=2, code="String", name="String", is_active=True + ), + DataType( + data_type_id=3, + code="Integer", + name="Integer", + parent_data_type_id=1, + is_active=True, + ), + DataType( + data_type_id=4, + code="Boolean", + name="Boolean", + parent_data_type_id=1, + is_active=True, + ), + DataType( + data_type_id=5, + code="LegacyType", + name="Legacy Type", + is_active=False, + ), + ] + ) + s.commit() + s.close() + return engine + + +@pytest.fixture +def client(seeded_engine): + from starlette.testclient import TestClient + + app = create_app("sqlite:///:memory:", engine=seeded_engine) + return TestClient(app) + + +@pytest.fixture +def empty_client(engine): + from starlette.testclient import TestClient + + app = create_app("sqlite:///:memory:", engine=engine) + return TestClient(app) + + +def _count_queries(engine): + class _Counter: + def __init__(self): + self.count = 0 + + def __enter__(self): + def _l(c, cur, stmt, p, ctx, m): + self.count += 1 + + self._l = _l + event.listen(engine, "before_cursor_execute", _l) + return self + + def __exit__(self, *exc): + event.remove(engine, "before_cursor_execute", self._l) + return False + + return _Counter() + + +# ------------------------------------------------------------------ # +# Tests — default response +# ------------------------------------------------------------------ # + + +class TestDefaultResponse: + def test_wildcard_lists_all(self, client): + resp = client.get("/api/v1/structure/datatype/*/*/*") + assert resp.status_code == 200 + codes = {d["code"] for d in resp.json()["data"]["dataTypes"]} + assert codes == { + "Decimal", + "String", + "Integer", + "Boolean", + "LegacyType", + } + + def test_single_by_code(self, client): + resp = client.get("/api/v1/structure/datatype/*/Decimal/*") + dts = resp.json()["data"]["dataTypes"] + assert len(dts) == 1 + dt = dts[0] + assert dt["code"] == "Decimal" + assert dt["parentDataTypeId"] is None + assert sorted(dt["childDataTypeIds"]) == [3, 4] + # No children expansion unless requested. + assert "childDataTypes" not in dt + + def test_child_carries_parent_id(self, client): + resp = client.get("/api/v1/structure/datatype/*/Integer/*") + dt = resp.json()["data"]["dataTypes"][0] + assert dt["parentDataTypeId"] == 1 + assert dt["childDataTypeIds"] == [] + + def test_single_by_numeric_id(self, client): + resp = client.get("/api/v1/structure/datatype/*/2/*") + dts = resp.json()["data"]["dataTypes"] + assert len(dts) == 1 + assert dts[0]["code"] == "String" + + def test_inactive_still_listed(self, client): + resp = client.get("/api/v1/structure/datatype/*/LegacyType/*") + dt = resp.json()["data"]["dataTypes"][0] + assert dt["isActive"] is False + + +class TestOwnerRejection: + def test_concrete_owner_204(self, client): + """DataTypes have no owner — a concrete owner returns 204.""" + for owner in ("EBA", "ECB", "anything"): + resp = client.get(f"/api/v1/structure/datatype/{owner}/*/*") + assert resp.status_code == 204 + + def test_release_segment_ignored(self, client): + for path in ( + "/api/v1/structure/datatype/*/Decimal/4.0", + "/api/v1/structure/datatype/*/Decimal/4.1", + "/api/v1/structure/datatype/*/Decimal/999.0", + ): + resp = client.get(path) + assert resp.status_code == 200 + assert resp.json()["data"]["dataTypes"][0]["code"] == "Decimal" + + +class TestChildrenExpansion: + def test_children_expanded(self, client): + resp = client.get( + "/api/v1/structure/datatype/*/Decimal/*?references=children" + ) + dt = resp.json()["data"]["dataTypes"][0] + assert "childDataTypes" in dt + codes = {c["code"] for c in dt["childDataTypes"]} + assert codes == {"Integer", "Boolean"} + # Stubs carry id, code, name, isActive. + for c in dt["childDataTypes"]: + assert set(c.keys()) == {"id", "code", "name", "isActive"} + + def test_leaf_has_empty_children(self, client): + resp = client.get( + "/api/v1/structure/datatype/*/Integer/*?references=children" + ) + dt = resp.json()["data"]["dataTypes"][0] + assert dt["childDataTypes"] == [] + + +class TestAllstubs: + def test_strips_subtrees(self, client): + resp = client.get( + "/api/v1/structure/datatype/*/Decimal/*?detail=allstubs" + ) + dt = resp.json()["data"]["dataTypes"][0] + for key in ( + "parentDataTypeId", + "childDataTypeIds", + "childDataTypes", + ): + assert key not in dt + for key in ("id", "code", "name", "isActive"): + assert key in dt + + +class TestEmptyDatabase: + def test_empty_returns_204(self, empty_client): + resp = empty_client.get("/api/v1/structure/datatype/*/*/*") + assert resp.status_code == 204 + + +class TestQueryBudget: + def test_minimal_query_count(self, client, seeded_engine): + with _count_queries(seeded_engine) as counter: + resp = client.get("/api/v1/structure/datatype/*/*/*") + assert resp.status_code == 200 + # count + main + child IDs = 3; cap at 5. + assert counter.count <= 5, ( + f"default datatype path issued {counter.count} queries." + ) + + def test_children_path_minimal(self, client, seeded_engine): + with _count_queries(seeded_engine) as counter: + resp = client.get( + "/api/v1/structure/datatype/*/*/*?references=children" + ) + assert resp.status_code == 200 + # +1 query for child expansions; cap at 6. + assert counter.count <= 6, ( + f"children datatype path issued {counter.count} queries." + ) diff --git a/tests/unit/server/test_structure_framework.py b/tests/unit/server/test_structure_framework.py new file mode 100644 index 0000000..3fe8aaf --- /dev/null +++ b/tests/unit/server/test_structure_framework.py @@ -0,0 +1,422 @@ +"""Integration tests for /api/v1/structure/framework endpoints.""" + +from __future__ import annotations + +from datetime import date + +import pytest +from sqlalchemy import create_engine, event +from sqlalchemy.orm import Session +from sqlalchemy.pool import StaticPool + +import dpmcore.orm # noqa: F401 — ensure all models are loaded +from dpmcore.orm.base import Base +from dpmcore.orm.infrastructure import ( + Concept, + Organisation, + Release, +) +from dpmcore.orm.packaging import ( + Framework, + Module, + ModuleParameters, + ModuleVersion, +) +from dpmcore.orm.variables import Variable, VariableVersion +from dpmcore.server.app import create_app + +# ------------------------------------------------------------------ # +# Seed model +# ------------------------------------------------------------------ # +# +# Two frameworks (FINREP, COREP), each with two modules. FINREP modules +# have ModuleVersions at both releases 4.0 and 4.1; COREP modules only +# at 4.0. Used to exercise the release-filters-children semantics. +# ------------------------------------------------------------------ # + + +@pytest.fixture +def engine(): + eng = create_engine( + "sqlite:///:memory:", + connect_args={"check_same_thread": False}, + poolclass=StaticPool, + ) + Base.metadata.create_all(eng) + return eng + + +@pytest.fixture +def seeded_engine(engine): + s = Session(bind=engine) + + s.add( + Organisation( + org_id=1, name="European Banking Authority", acronym="EBA" + ) + ) + s.flush() + + for guid in [ + "c-rel-1", + "c-rel-2", + "c-fw-finrep", + "c-fw-corep", + "c-mod-a", + "c-mod-b", + "c-mod-c", + "c-mod-d", + ]: + s.add(Concept(concept_guid=guid, owner_id=1)) + s.flush() + + s.add_all( + [ + Release( + release_id=1, + code="4.0", + date=date(2024, 1, 1), + status="Final", + is_current=False, + row_guid="c-rel-1", + owner_id=1, + ), + Release( + release_id=2, + code="4.1", + date=date(2024, 6, 1), + status="Final", + is_current=True, + row_guid="c-rel-2", + owner_id=1, + ), + ] + ) + s.flush() + + s.add_all( + [ + Framework( + framework_id=10, + code="FINREP", + name="Financial Reporting", + description="FINREP framework", + row_guid="c-fw-finrep", + owner_id=1, + ), + Framework( + framework_id=20, + code="COREP", + name="Common Reporting", + description="COREP framework", + row_guid="c-fw-corep", + owner_id=1, + ), + ] + ) + s.flush() + + # FINREP modules: FINREP9, FINREP9_RST. + s.add_all( + [ + Module( + module_id=100, + framework_id=10, + row_guid="c-mod-a", + is_document_module=False, + owner_id=1, + ), + Module( + module_id=101, + framework_id=10, + row_guid="c-mod-b", + is_document_module=False, + owner_id=1, + ), + # COREP modules. + Module( + module_id=200, + framework_id=20, + row_guid="c-mod-c", + is_document_module=False, + owner_id=1, + ), + Module( + module_id=201, + framework_id=20, + row_guid="c-mod-d", + is_document_module=False, + owner_id=1, + ), + ] + ) + s.flush() + + s.add_all( + [ + # FINREP9 — active at both releases. + ModuleVersion( + module_vid=10000, + module_id=100, + code="FINREP9", + name="FINREP 9", + version_number="9.0", + start_release_id=1, + end_release_id=None, + ), + ModuleVersion( + module_vid=10001, + module_id=101, + code="FINREP9_RST", + name="FINREP 9 — restricted", + version_number="9.0", + start_release_id=1, + end_release_id=None, + ), + # COREP modules — only at release 4.0 (end_release_id=2 → + # excluded from 4.1 by the semver-aware filter). + ModuleVersion( + module_vid=20000, + module_id=200, + code="COREP_OF", + name="COREP Own Funds", + version_number="3.0", + start_release_id=1, + end_release_id=2, + ), + ModuleVersion( + module_vid=20001, + module_id=201, + code="COREP_LR", + name="COREP Leverage", + version_number="3.0", + start_release_id=1, + end_release_id=2, + ), + ] + ) + s.flush() + + # Parameter variable on FINREP9. + s.add(Variable(variable_id=89, type="k", owner_id=1)) + s.flush() + s.add( + VariableVersion( + variable_vid=7777, + variable_id=89, + code="kv01", + name="Row key", + is_multi_valued=False, + start_release_id=1, + end_release_id=None, + ) + ) + s.flush() + s.add(ModuleParameters(module_vid=10000, variable_vid=7777)) + s.commit() + s.close() + return engine + + +@pytest.fixture +def client(seeded_engine): + from starlette.testclient import TestClient + + app = create_app("sqlite:///:memory:", engine=seeded_engine) + return TestClient(app) + + +@pytest.fixture +def empty_client(engine): + from starlette.testclient import TestClient + + app = create_app("sqlite:///:memory:", engine=engine) + return TestClient(app) + + +def _count_queries(engine): + class _Counter: + def __init__(self): + self.count = 0 + + def __enter__(self): + def _l(c, cur, stmt, p, ctx, m): + self.count += 1 + + self._l = _l + event.listen(engine, "before_cursor_execute", _l) + return self + + def __exit__(self, *exc): + event.remove(engine, "before_cursor_execute", self._l) + return False + + return _Counter() + + +# ------------------------------------------------------------------ # +# Tests — default (no children) +# ------------------------------------------------------------------ # + + +class TestDefaultResponse: + def test_single_framework_by_code(self, client): + resp = client.get("/api/v1/structure/framework/EBA/FINREP/*") + assert resp.status_code == 200 + body = resp.json() + fws = body["data"]["frameworks"] + assert len(fws) == 1 + fw = fws[0] + assert fw["code"] == "FINREP" + assert fw["name"] == "Financial Reporting" + assert fw["owner"] == "EBA" + # No children unless requested. + assert "modules" not in fw + + def test_release_ignored_for_selection(self, client): + """Frameworks aren't versioned — every release literal returns + the same set, including unknown release codes. + """ + for path in ( + "/api/v1/structure/framework/EBA/FINREP/4.0", + "/api/v1/structure/framework/EBA/FINREP/4.1", + "/api/v1/structure/framework/EBA/FINREP/999.0", + ): + resp = client.get(path) + assert resp.status_code == 200 + assert resp.json()["data"]["frameworks"][0]["code"] == "FINREP" + + def test_wildcard_lists_all(self, client): + resp = client.get("/api/v1/structure/framework/EBA/*/*") + assert resp.status_code == 200 + codes = {f["code"] for f in resp.json()["data"]["frameworks"]} + assert codes == {"FINREP", "COREP"} + + def test_unknown_code_204(self, client): + resp = client.get("/api/v1/structure/framework/EBA/NOPE/*") + assert resp.status_code == 204 + + def test_unknown_owner_204(self, client): + resp = client.get("/api/v1/structure/framework/UNKNOWN/FINREP/*") + assert resp.status_code == 204 + + +class TestAllstubs: + def test_strips_subtrees(self, client): + resp = client.get( + "/api/v1/structure/framework/EBA/FINREP/*?detail=allstubs" + ) + f = resp.json()["data"]["frameworks"][0] + for key in ("description", "modules"): + assert key not in f + for key in ("id", "code", "name", "owner"): + assert key in f + + +class TestEmptyDatabase: + def test_empty_returns_204(self, empty_client): + resp = empty_client.get("/api/v1/structure/framework/EBA/FINREP/*") + assert resp.status_code == 204 + + +# ------------------------------------------------------------------ # +# Tests — children +# ------------------------------------------------------------------ # + + +class TestChildren: + def test_children_at_4_0_returns_all_finrep_modules(self, client): + resp = client.get( + "/api/v1/structure/framework/EBA/FINREP/4.0?references=children" + ) + f = resp.json()["data"]["frameworks"][0] + assert "modules" in f + codes = {m["code"] for m in f["modules"]} + assert codes == {"FINREP9", "FINREP9_RST"} + # Module shape mirrors /structure/module default — includes + # framework ref, parameterVariableVersionIds, etc. + finrep9 = next(m for m in f["modules"] if m["code"] == "FINREP9") + assert finrep9["framework"]["code"] == "FINREP" + assert finrep9["parameterVariableVersionIds"] == [7777] + # No grandchildren (modules don't carry tables here). + assert "tables" not in finrep9 + + def test_release_filters_children_only(self, client): + """COREP modules end at release 4.1 → no children at 4.1.""" + resp = client.get( + "/api/v1/structure/framework/EBA/COREP/4.1?references=children" + ) + f = resp.json()["data"]["frameworks"][0] + # Framework still present. + assert f["code"] == "COREP" + # Children empty since both COREP modules ended at release 2. + assert f["modules"] == [] + + def test_unknown_release_yields_empty_children(self, client): + """Unknown literal release → framework still returns, modules + empty. + """ + resp = client.get( + "/api/v1/structure/framework/EBA/FINREP/999.0?references=children" + ) + f = resp.json()["data"]["frameworks"][0] + assert f["code"] == "FINREP" + assert f["modules"] == [] + + def test_release_wildcard_returns_all_module_versions(self, client): + resp = client.get( + "/api/v1/structure/framework/EBA/FINREP/*?references=children" + ) + f = resp.json()["data"]["frameworks"][0] + assert {m["code"] for m in f["modules"]} == { + "FINREP9", + "FINREP9_RST", + } + + def test_references_all_includes_organisations(self, client): + resp = client.get( + "/api/v1/structure/framework/EBA/FINREP/4.0?references=all" + ) + body = resp.json() + assert "organisations" in body["data"] + assert "modules" in body["data"]["frameworks"][0] + + +# ------------------------------------------------------------------ # +# Performance — query budget +# ------------------------------------------------------------------ # + + +class TestQueryBudget: + def test_default_path_minimal_queries(self, client, seeded_engine): + with _count_queries(seeded_engine) as counter: + resp = client.get("/api/v1/structure/framework/EBA/*/*") + assert resp.status_code == 200 + # Framework query + one owner lookup ≈ 2–3 (no release work + # since wants_all_releases=True). + assert counter.count <= 5, ( + f"default framework path issued {counter.count} queries." + ) + + def test_children_path_bounded(self, client, seeded_engine): + with _count_queries(seeded_engine) as counter: + resp = client.get( + "/api/v1/structure/framework/EBA/*/4.0?references=children" + ) + assert resp.status_code == 200 + body = resp.json() + # 4 module versions across 2 frameworks at 4.0. + total_modules = sum( + len(f["modules"]) for f in body["data"]["frameworks"] + ) + assert total_modules == 4 + # Budget breakdown: + # 3 release-resolution queries (filter_by_release internals); + # 1 framework query, 1 ModuleVersion children query, + # 1 ModuleParameters bulk, 1 owner lookup. + # ≤10 leaves headroom for incidental changes while still + # flagging N+1 regressions. + assert counter.count <= 10, ( + f"children path issued {counter.count} queries — " + f"likely an N+1 regression." + ) diff --git a/tests/unit/server/test_structure_module.py b/tests/unit/server/test_structure_module.py new file mode 100644 index 0000000..1595612 --- /dev/null +++ b/tests/unit/server/test_structure_module.py @@ -0,0 +1,699 @@ +"""Integration tests for /api/v1/structure/module endpoints.""" + +from __future__ import annotations + +from datetime import date + +import pytest +from sqlalchemy import create_engine, event +from sqlalchemy.orm import Session +from sqlalchemy.pool import StaticPool + +import dpmcore.orm # noqa: F401 — ensure all models are loaded +from dpmcore.orm.base import Base +from dpmcore.orm.glossary import ( + Category, + Item, + ItemCategory, + Property, + SubCategory, + SubCategoryItem, + SubCategoryVersion, +) +from dpmcore.orm.infrastructure import ( + Concept, + DataType, + Organisation, + Release, +) +from dpmcore.orm.packaging import ( + Framework, + Module, + ModuleParameters, + ModuleVersion, + ModuleVersionComposition, +) +from dpmcore.orm.rendering import ( + Cell, + Header, + HeaderVersion, + Table, + TableVersion, + TableVersionCell, + TableVersionHeader, +) +from dpmcore.orm.variables import Variable, VariableVersion +from dpmcore.server.app import create_app + +# ------------------------------------------------------------------ # +# Seed model +# ------------------------------------------------------------------ # +# +# Two modules in one framework, each with 3 tables. Module FINREP9 has +# parameter variables and full table structure on its first table +# (headers, cells, key + fact variables, ASSET_TYPE subcategory +# enumeration). The remaining tables are skeletal but real — enough +# for query-budget assertions to scale meaningfully. +# ------------------------------------------------------------------ # + + +@pytest.fixture +def engine(): + eng = create_engine( + "sqlite:///:memory:", + connect_args={"check_same_thread": False}, + poolclass=StaticPool, + ) + Base.metadata.create_all(eng) + return eng + + +@pytest.fixture +def seeded_engine(engine): + session = Session(bind=engine) + + session.add( + Organisation( + org_id=1, name="European Banking Authority", acronym="EBA" + ) + ) + session.flush() + + for guid in [ + "c-rel-1", + "c-rel-2", + "c-fw-1", + "c-mod-1", + "c-mod-2", + "c-table-1", + "c-table-2", + "c-table-3", + "c-table-4", + "c-table-5", + "c-table-6", + "c-header-c", + "c-header-r", + "c-cell-1", + ]: + session.add(Concept(concept_guid=guid, owner_id=1)) + session.flush() + + session.add_all( + [ + Release( + release_id=1, + code="4.0", + date=date(2024, 1, 1), + status="Final", + is_current=False, + row_guid="c-rel-1", + owner_id=1, + ), + Release( + release_id=2, + code="4.1", + date=date(2024, 6, 1), + status="Final", + is_current=True, + row_guid="c-rel-2", + owner_id=1, + ), + ] + ) + session.flush() + + session.add( + DataType(data_type_id=1, code="String", name="String", is_active=True) + ) + session.flush() + + # Framework owning both modules. + session.add( + Framework( + framework_id=10, + code="FINREP", + name="FINREP", + description="Financial reporting", + row_guid="c-fw-1", + owner_id=1, + ) + ) + session.flush() + + # Two modules. + session.add_all( + [ + Module( + module_id=100, + framework_id=10, + row_guid="c-mod-1", + is_document_module=False, + owner_id=1, + ), + Module( + module_id=101, + framework_id=10, + row_guid="c-mod-2", + is_document_module=False, + owner_id=1, + ), + ] + ) + session.flush() + + session.add_all( + [ + ModuleVersion( + module_vid=10000, + module_id=100, + code="FINREP9", + name="FINREP 9", + description="Financial Reporting v9", + version_number="9.0", + start_release_id=1, + end_release_id=None, + from_reference_date=date(2024, 1, 1), + to_reference_date=None, + is_reported=True, + is_calculated=False, + ), + ModuleVersion( + module_vid=20000, + module_id=101, + code="COREP", + name="COREP", + description="Common Reporting", + version_number="3.0", + start_release_id=1, + end_release_id=None, + ), + ] + ) + session.flush() + + # 3 tables per module. + table_specs = [ + (1, "F_01.01", "c-table-1"), + (2, "F_02.00", "c-table-2"), + (3, "F_03.00", "c-table-3"), + (4, "C_01.00", "c-table-4"), + (5, "C_02.00", "c-table-5"), + (6, "C_03.00", "c-table-6"), + ] + for tid, _code, guid in table_specs: + session.add( + Table( + table_id=tid, + is_abstract=False, + has_open_columns=False, + has_open_rows=False, + has_open_sheets=False, + is_normalised=False, + is_flat=False, + row_guid=guid, + owner_id=1, + ) + ) + session.flush() + for tid, code, _ in table_specs: + session.add( + TableVersion( + table_vid=tid * 1000, + code=code, + name=f"Table {code}", + description="", + table_id=tid, + start_release_id=1, + end_release_id=None, + ) + ) + session.flush() + + # ModuleVersionComposition — 3 tables per module in order. + session.add_all( + [ + ModuleVersionComposition( + module_vid=10000, table_id=1, table_vid=1000, order=1 + ), + ModuleVersionComposition( + module_vid=10000, table_id=2, table_vid=2000, order=2 + ), + ModuleVersionComposition( + module_vid=10000, table_id=3, table_vid=3000, order=3 + ), + ModuleVersionComposition( + module_vid=20000, table_id=4, table_vid=4000, order=1 + ), + ModuleVersionComposition( + module_vid=20000, table_id=5, table_vid=5000, order=2 + ), + ModuleVersionComposition( + module_vid=20000, table_id=6, table_vid=6000, order=3 + ), + ] + ) + session.flush() + + # Full structure for table_vid=1000 (F_01.01) only. + session.add_all( + [ + Header( + header_id=11, + table_id=1, + direction="x", + is_key=False, + is_attribute=False, + row_guid="c-header-c", + owner_id=1, + ), + Header( + header_id=22, + table_id=1, + direction="y", + is_key=True, + is_attribute=False, + row_guid="c-header-r", + owner_id=1, + ), + ] + ) + session.flush() + + # Key variable referenced by the row header. + session.add(Variable(variable_id=89, type="k", owner_id=1)) + session.flush() + session.add( + VariableVersion( + variable_vid=7777, + variable_id=89, + code="kv01", + name="Row key", + is_multi_valued=False, + start_release_id=1, + end_release_id=None, + ) + ) + session.flush() + + # Enumerated category + subcategory the row header references. + session.add( + Category( + category_id=60, + code="ASSET_TYPE", + name="Asset type", + description="", + is_enumerated=True, + is_active=True, + is_external_ref_data=False, + created_release=1, + owner_id=1, + ) + ) + session.flush() + session.add( + SubCategory( + subcategory_id=400, + category_id=60, + code="AT_SUB", + name="Asset type subset", + description="", + owner_id=1, + ) + ) + session.flush() + session.add( + SubCategoryVersion( + subcategory_vid=4441, + subcategory_id=400, + start_release_id=1, + end_release_id=None, + ) + ) + session.flush() + + session.add_all( + [ + HeaderVersion( + header_vid=111, + header_id=11, + code="0010", + label="Carrying amount", + start_release_id=1, + end_release_id=None, + ), + HeaderVersion( + header_vid=222, + header_id=22, + code="010", + label="Loans", + key_variable_vid=7777, + subcategory_vid=4441, + start_release_id=1, + end_release_id=None, + ), + ] + ) + session.flush() + + session.add_all( + [ + TableVersionHeader( + table_vid=1000, header_id=11, header_vid=111, order=0 + ), + TableVersionHeader( + table_vid=1000, header_id=22, header_vid=222, order=1 + ), + ] + ) + session.flush() + + # Items + ItemCategory in the parent category. + session.add_all( + [ + Item(item_id=700, name="Loan", is_property=False, is_active=True), + Item(item_id=701, name="Bond", is_property=False, is_active=True), + Item( + item_id=51, + name="Asset property", + is_property=True, + is_active=True, + ), + ] + ) + session.flush() + session.add_all( + [ + ItemCategory( + item_id=700, + start_release_id=1, + category_id=60, + code="LOAN", + is_default_item=False, + signature="ASSET_TYPE(LOAN)", + end_release_id=None, + ), + ItemCategory( + item_id=701, + start_release_id=1, + category_id=60, + code="BOND", + is_default_item=False, + signature="ASSET_TYPE(BOND)", + end_release_id=None, + ), + ] + ) + session.flush() + session.add_all( + [ + SubCategoryItem(item_id=700, subcategory_vid=4441, order=1), + SubCategoryItem(item_id=701, subcategory_vid=4441, order=2), + ] + ) + session.flush() + + session.add( + Property( + property_id=51, + is_composite=False, + is_metric=False, + data_type_id=1, + owner_id=1, + ) + ) + session.flush() + + # Fact variable held by the cell. + session.add(Variable(variable_id=88, type="d", owner_id=1)) + session.flush() + session.add( + VariableVersion( + variable_vid=8888, + variable_id=88, + property_id=51, + code="ei001", + name="Asset value", + is_multi_valued=False, + start_release_id=1, + end_release_id=None, + ) + ) + session.flush() + + session.add( + Cell( + cell_id=9001, + table_id=1, + column_id=11, + row_id=22, + row_guid="c-cell-1", + owner_id=1, + ) + ) + session.flush() + session.add( + TableVersionCell( + table_vid=1000, + cell_id=9001, + cell_code="{r010,c0010}", + is_nullable=False, + is_excluded=False, + is_void=False, + sign=None, + variable_vid=8888, + ) + ) + session.flush() + + # ModuleParameters — bind two variable versions as parameters of + # FINREP9. + session.add_all( + [ + ModuleParameters(module_vid=10000, variable_vid=7777), + ModuleParameters(module_vid=10000, variable_vid=8888), + ] + ) + session.commit() + session.close() + return engine + + +@pytest.fixture +def client(seeded_engine): + from starlette.testclient import TestClient + + app = create_app("sqlite:///:memory:", engine=seeded_engine) + return TestClient(app) + + +@pytest.fixture +def empty_client(engine): + from starlette.testclient import TestClient + + app = create_app("sqlite:///:memory:", engine=engine) + return TestClient(app) + + +def _count_queries(engine): + """Context manager counting SQL statements issued on *engine*.""" + + class _Counter: + def __init__(self) -> None: + self.count = 0 + + def __enter__(self) -> "_Counter": + def listener( + conn, cursor, statement, params, context, executemany + ): + self.count += 1 + + self._listener = listener + event.listen(engine, "before_cursor_execute", listener) + return self + + def __exit__(self, *exc): + event.remove(engine, "before_cursor_execute", self._listener) + return False + + return _Counter() + + +# ------------------------------------------------------------------ # +# Tests — default response (no children) +# ------------------------------------------------------------------ # + + +class TestSingleModule: + def test_at_literal_release(self, client): + resp = client.get("/api/v1/structure/module/EBA/FINREP9/4.0") + assert resp.status_code == 200 + body = resp.json() + assert "modules" in body["data"] + modules = body["data"]["modules"] + assert len(modules) == 1 + m = modules[0] + assert m["code"] == "FINREP9" + assert m["owner"] == "EBA" + assert m["release"] == "4.0" + assert m["moduleVersionId"] == 10000 + # No children by default. + assert "tables" not in m + # parameterVariableVersionIds always present. + assert sorted(m["parameterVariableVersionIds"]) == [7777, 8888] + # framework reference populated. + assert m["framework"]["code"] == "FINREP" + assert m["versionNumber"] == "9.0" + assert m["isDocumentModule"] is False + + def test_latest_returns_module(self, client): + resp = client.get("/api/v1/structure/module/EBA/FINREP9/~") + assert resp.status_code == 200 + assert resp.json()["data"]["modules"][0]["moduleVersionId"] == 10000 + + def test_latest_stable(self, client): + resp = client.get("/api/v1/structure/module/EBA/FINREP9/+") + assert resp.status_code == 200 + assert resp.json()["data"]["modules"][0]["release"] == "4.1" + + def test_nonexistent_code(self, client): + resp = client.get("/api/v1/structure/module/EBA/NOPE/4.0") + assert resp.status_code == 204 + + def test_nonexistent_owner(self, client): + resp = client.get("/api/v1/structure/module/UNKNOWN/FINREP9/4.0") + assert resp.status_code == 204 + + +class TestAllModules: + def test_wildcard_returns_both(self, client): + resp = client.get("/api/v1/structure/module/EBA/*/4.0") + assert resp.status_code == 200 + codes = {m["code"] for m in resp.json()["data"]["modules"]} + assert codes == {"FINREP9", "COREP"} + + def test_release_wildcard(self, client): + resp = client.get("/api/v1/structure/module/EBA/*/*") + assert resp.status_code == 200 + # Both modules have one ModuleVersion each → 2 entries total. + assert resp.json()["meta"]["totalCount"] == 2 + + +class TestAllstubs: + def test_strips_subtrees(self, client): + resp = client.get( + "/api/v1/structure/module/EBA/FINREP9/4.0?detail=allstubs" + ) + m = resp.json()["data"]["modules"][0] + for key in ( + "tables", + "parameterVariableVersionIds", + "framework", + "versionNumber", + ): + assert key not in m + for key in ("id", "moduleVersionId", "code", "owner", "release"): + assert key in m + + +class TestEmptyDatabase: + def test_empty_returns_204(self, empty_client): + resp = empty_client.get("/api/v1/structure/module/EBA/FINREP9/4.0") + assert resp.status_code == 204 + + +# ------------------------------------------------------------------ # +# Tests — children +# ------------------------------------------------------------------ # + + +class TestChildren: + def test_children_listed_in_composition_order(self, client): + resp = client.get( + "/api/v1/structure/module/EBA/FINREP9/4.0?references=children" + ) + m = resp.json()["data"]["modules"][0] + assert "tables" in m + # ModuleVersionComposition.order: F_01.01, F_02.00, F_03.00. + assert [t["code"] for t in m["tables"]] == [ + "F_01.01", + "F_02.00", + "F_03.00", + ] + + def test_children_carry_full_table_shape(self, client): + resp = client.get( + "/api/v1/structure/module/EBA/FINREP9/4.0?references=children" + ) + t = resp.json()["data"]["modules"][0]["tables"][0] # F_01.01 + assert len(t["headers"]) == 2 + assert len(t["cells"]) == 1 + assert len(t["keyVariables"]) == 1 + assert len(t["factVariables"]) == 1 + # The fact variable inherits the row header's subcategory. + fact = t["factVariables"][0] + assert fact["isEnumerated"] is True + enum = fact["enumeration"] + assert enum["subcategoryCode"] == "AT_SUB" + assert {i["code"] for i in enum["items"]} == {"LOAN", "BOND"} + + def test_references_all_includes_organisations(self, client): + resp = client.get( + "/api/v1/structure/module/EBA/FINREP9/4.0?references=all" + ) + body = resp.json() + assert "organisations" in body["data"] + # Children also present under references=all. + assert "tables" in body["data"]["modules"][0] + + +# ------------------------------------------------------------------ # +# Performance — query budget +# ------------------------------------------------------------------ # + + +class TestQueryBudget: + """Guard against N+1 regressions in the children path. + + With 2 modules × 3 tables seeded, the children path must issue a + bounded number of SQL statements independent of N×M. + """ + + def test_children_query_count_is_bounded(self, client, seeded_engine): + with _count_queries(seeded_engine) as counter: + resp = client.get( + "/api/v1/structure/module/EBA/*/4.0?references=children" + ) + assert resp.status_code == 200 + body = resp.json() + assert body["meta"]["totalCount"] == 2 + # Sanity: 6 tables total exposed across the two modules. + total_tables = sum(len(m["tables"]) for m in body["data"]["modules"]) + assert total_tables == 6 + # Budget breakdown for the children path (current ≈ 18): + # 3 release-resolution queries (filter_by_release internals, + # fired up to twice — once for the module query, once for + # subcategory ItemCategory windowing); + # 1 ModuleVersion main, 1 ModuleParameters, + # 1 Framework refs, 1 ModuleVersionComposition, + # 1 TableVersion (Table joinedload), 1 headers, 1 cells, + # 1 VariableVersion, 1 property names, + # 3 SubCategory enumeration loads (SCV+SC+Cat / SCI+Item / + # ItemCategory), 1 Organisation lookup. + # ≤22 leaves headroom for incidental changes while still + # flagging the N+1 regressions this test exists to catch: + # if the budget scaled with N modules × M tables it would + # quickly exceed 100. + assert counter.count <= 22, ( + f"children path issued {counter.count} queries — " + f"likely an N+1 regression." + ) + + def test_default_path_minimal_queries(self, client, seeded_engine): + with _count_queries(seeded_engine) as counter: + resp = client.get("/api/v1/structure/module/EBA/*/4.0") + assert resp.status_code == 200 + # Without children: 3 release-resolution + module query + + # parameters + framework refs + one owner lookup ≈ 7. + assert counter.count <= 9, ( + f"default module path issued {counter.count} queries." + ) diff --git a/tests/unit/server/test_structure_operation.py b/tests/unit/server/test_structure_operation.py new file mode 100644 index 0000000..3743ade --- /dev/null +++ b/tests/unit/server/test_structure_operation.py @@ -0,0 +1,444 @@ +"""Integration tests for /api/v1/structure/operation endpoints.""" + +from __future__ import annotations + +from datetime import date + +import pytest +from sqlalchemy import create_engine, event +from sqlalchemy.orm import Session +from sqlalchemy.pool import StaticPool + +import dpmcore.orm # noqa: F401 — ensure all models are loaded +from dpmcore.orm.base import Base +from dpmcore.orm.infrastructure import Concept, Organisation, Release +from dpmcore.orm.operations import ( + OperandReference, + OperandReferenceLocation, + Operation, + OperationNode, + OperationVersion, + Operator, +) +from dpmcore.orm.rendering import Cell, Table +from dpmcore.server.app import create_app + +# ------------------------------------------------------------------ # +# Seed model +# ------------------------------------------------------------------ # +# +# Two releases (4.0, 4.1). Two Operations: +# +# - V_001 (id=1): two versions +# - version A (vid=100, start=1, end=2) → 4.0 only +# · 2 nodes (root + child) +# · child node has 1 OperandReference with 1 location +# - version B (vid=101, start=2, end=None) → 4.1+ +# · 1 node, no references +# +# - V_002 (id=2): one version at 4.0 only (vid=200, start=1, end=2) +# → at 4.1 this operation should disappear from the result set. +# ------------------------------------------------------------------ # + + +@pytest.fixture +def engine(): + eng = create_engine( + "sqlite:///:memory:", + connect_args={"check_same_thread": False}, + poolclass=StaticPool, + ) + Base.metadata.create_all(eng) + return eng + + +@pytest.fixture +def seeded_engine(engine): + s = Session(bind=engine) + + s.add( + Organisation( + org_id=1, name="European Banking Authority", acronym="EBA" + ) + ) + s.flush() + + for guid in ["c-rel-1", "c-rel-2", "c-tbl-1", "c-cell-1"]: + s.add(Concept(concept_guid=guid, owner_id=1)) + s.flush() + + s.add_all( + [ + Release( + release_id=1, + code="4.0", + date=date(2024, 1, 1), + status="Final", + row_guid="c-rel-1", + owner_id=1, + ), + Release( + release_id=2, + code="4.1", + date=date(2024, 6, 1), + status="Final", + row_guid="c-rel-2", + owner_id=1, + ), + ] + ) + s.flush() + + # A Table + Cell so OperandReferenceLocation has a real FK target. + s.add( + Table( + table_id=1, + is_abstract=False, + has_open_columns=False, + has_open_rows=False, + has_open_sheets=False, + is_normalised=False, + is_flat=False, + row_guid="c-tbl-1", + owner_id=1, + ) + ) + s.flush() + s.add(Cell(cell_id=9001, table_id=1, row_guid="c-cell-1", owner_id=1)) + s.flush() + + # An Operator (root node references it). + s.add(Operator(operator_id=1, name="Equals", symbol="=", type="comp")) + s.flush() + + # Operations. + s.add_all( + [ + Operation( + operation_id=1, + code="V_001", + type="validation", + source="EBA", + owner_id=1, + ), + Operation( + operation_id=2, + code="V_002", + type="validation", + source="EBA", + owner_id=1, + ), + ] + ) + s.flush() + + # V_001 versions. + s.add_all( + [ + OperationVersion( + operation_vid=100, + operation_id=1, + expression="a = b", + description="V_001 at 4.0", + endorsement="adopted", + is_variant_approved=True, + start_release_id=1, + end_release_id=2, + ), + OperationVersion( + operation_vid=101, + operation_id=1, + expression="a == b", + description="V_001 at 4.1", + endorsement="adopted", + is_variant_approved=True, + start_release_id=2, + end_release_id=None, + ), + ] + ) + s.flush() + + # V_002 has only one version at 4.0 (gone at 4.1). + s.add( + OperationVersion( + operation_vid=200, + operation_id=2, + expression="x > 0", + description="V_002 at 4.0", + endorsement="adopted", + is_variant_approved=True, + start_release_id=1, + end_release_id=2, + ) + ) + s.flush() + + # Nodes for V_001 version A (vid=100): root + child. + s.add_all( + [ + OperationNode( + node_id=10, + operation_vid=100, + parent_node_id=None, + operator_id=1, + is_leaf=False, + operand_type=None, + ), + OperationNode( + node_id=11, + operation_vid=100, + parent_node_id=10, + operator_id=None, + is_leaf=True, + operand_type="datapoint", + scalar="42", + ), + ] + ) + s.flush() + # Node for V_001 version B (vid=101): single leaf. + s.add( + OperationNode( + node_id=12, + operation_vid=101, + parent_node_id=None, + is_leaf=True, + operand_type="datapoint", + scalar="0", + ) + ) + s.flush() + # Node for V_002 (vid=200). + s.add( + OperationNode( + node_id=20, + operation_vid=200, + parent_node_id=None, + is_leaf=True, + scalar="x", + ) + ) + s.flush() + + # OperandReference on V_001 version A child node (id=11). + s.add( + OperandReference( + operand_reference_id=300, + node_id=11, + x=0, + y=10, + z=None, + operand_reference="ref(F_01.01, r0010, c0010)", + ) + ) + s.flush() + + # Location for that reference. + s.add( + OperandReferenceLocation( + operand_reference_id=300, + cell_id=9001, + table="F_01.01", + row="r0010", + column="c0010", + sheet=None, + ) + ) + s.commit() + s.close() + return engine + + +@pytest.fixture +def client(seeded_engine): + from starlette.testclient import TestClient + + app = create_app("sqlite:///:memory:", engine=seeded_engine) + return TestClient(app) + + +@pytest.fixture +def empty_client(engine): + from starlette.testclient import TestClient + + app = create_app("sqlite:///:memory:", engine=engine) + return TestClient(app) + + +def _count_queries(engine): + class _Counter: + def __init__(self): + self.count = 0 + + def __enter__(self): + def _l(c, cur, stmt, p, ctx, m): + self.count += 1 + + self._l = _l + event.listen(engine, "before_cursor_execute", _l) + return self + + def __exit__(self, *exc): + event.remove(engine, "before_cursor_execute", self._l) + return False + + return _Counter() + + +# ------------------------------------------------------------------ # +# Tests — single release +# ------------------------------------------------------------------ # + + +class TestSingleOperationAtRelease: + def test_v001_at_4_0(self, client): + resp = client.get("/api/v1/structure/operation/EBA/V_001/4.0") + assert resp.status_code == 200 + ops = resp.json()["data"]["operations"] + assert len(ops) == 1 + op = ops[0] + assert op["code"] == "V_001" + assert op["type"] == "validation" + assert op["source"] == "EBA" + # Only the version active at 4.0. + assert [v["operationVersionId"] for v in op["versions"]] == [100] + v = op["versions"][0] + assert v["release"] == "4.0" + assert v["expression"] == "a = b" + + def test_v001_at_4_1_returns_only_newer_version(self, client): + resp = client.get("/api/v1/structure/operation/EBA/V_001/4.1") + ops = resp.json()["data"]["operations"] + v = ops[0]["versions"][0] + assert v["operationVersionId"] == 101 + assert v["expression"] == "a == b" + + def test_v002_at_4_1_is_filtered_out(self, client): + """V_002 only has a version at 4.0; at 4.1 the whole Operation + is dropped from the result set. + """ + resp = client.get("/api/v1/structure/operation/EBA/V_002/4.1") + assert resp.status_code == 204 + + def test_nonexistent_code_204(self, client): + resp = client.get("/api/v1/structure/operation/EBA/V_999/4.0") + assert resp.status_code == 204 + + +class TestNestedPayload: + def test_version_a_nodes_present(self, client): + resp = client.get("/api/v1/structure/operation/EBA/V_001/4.0") + v = resp.json()["data"]["operations"][0]["versions"][0] + nodes = v["nodes"] + # Two nodes — flat list with parent links. + assert len(nodes) == 2 + root = next(n for n in nodes if n["parentNodeId"] is None) + child = next(n for n in nodes if n["parentNodeId"] == root["nodeId"]) + assert root["operatorId"] == 1 + assert root["isLeaf"] is False + assert child["isLeaf"] is True + assert child["scalar"] == "42" + + def test_version_a_references_and_locations(self, client): + resp = client.get("/api/v1/structure/operation/EBA/V_001/4.0") + v = resp.json()["data"]["operations"][0]["versions"][0] + child = next(n for n in v["nodes"] if n["isLeaf"]) + assert len(child["references"]) == 1 + ref = child["references"][0] + assert ref["operandReferenceId"] == 300 + assert ref["operandReference"] == "ref(F_01.01, r0010, c0010)" + assert ref["x"] == 0 + assert ref["y"] == 10 + # Location: pointed to cell 9001. + assert len(ref["locations"]) == 1 + loc = ref["locations"][0] + assert loc["cellId"] == 9001 + assert loc["table"] == "F_01.01" + assert loc["row"] == "r0010" + assert loc["column"] == "c0010" + assert loc["sheet"] is None + + def test_node_without_references(self, client): + """Root node of V_001/4.0 has no references; V_001/4.1's leaf + also has none. + """ + resp = client.get("/api/v1/structure/operation/EBA/V_001/4.1") + v = resp.json()["data"]["operations"][0]["versions"][0] + leaf = v["nodes"][0] + assert leaf["references"] == [] + + +class TestAllVersions: + def test_release_wildcard_returns_both_v001_versions(self, client): + resp = client.get("/api/v1/structure/operation/EBA/V_001/*") + op = resp.json()["data"]["operations"][0] + vids = sorted(v["operationVersionId"] for v in op["versions"]) + assert vids == [100, 101] + + def test_release_wildcard_returns_all_operations(self, client): + resp = client.get("/api/v1/structure/operation/EBA/*/*") + codes = {op["code"] for op in resp.json()["data"]["operations"]} + assert codes == {"V_001", "V_002"} + + +class TestAllstubs: + def test_strips_nested_tree(self, client): + resp = client.get( + "/api/v1/structure/operation/EBA/V_001/4.0?detail=allstubs" + ) + op = resp.json()["data"]["operations"][0] + for key in ("versions", "source", "groupOperationId"): + assert key not in op + assert op["operationVersionIds"] == [100] + for key in ("id", "code", "type", "owner"): + assert key in op + + +class TestReferences: + def test_all_adds_organisations(self, client): + resp = client.get( + "/api/v1/structure/operation/EBA/V_001/4.0?references=all" + ) + body = resp.json() + assert "organisations" in body["data"] + + def test_children_noop_default_already_has_payload(self, client): + """references=children doesn't change the default response — + the nested tree is already there. + """ + bare = client.get("/api/v1/structure/operation/EBA/V_001/4.0").json()[ + "data" + ]["operations"][0] + with_children = client.get( + "/api/v1/structure/operation/EBA/V_001/4.0?references=children" + ).json()["data"]["operations"][0] + assert bare == with_children + + +class TestEmptyDatabase: + def test_empty_returns_204(self, empty_client): + resp = empty_client.get("/api/v1/structure/operation/EBA/V_001/4.0") + assert resp.status_code == 204 + + +# ------------------------------------------------------------------ # +# Performance — query budget +# ------------------------------------------------------------------ # + + +class TestQueryBudget: + def test_full_payload_bounded(self, client, seeded_engine): + with _count_queries(seeded_engine) as counter: + resp = client.get("/api/v1/structure/operation/EBA/*/4.0") + assert resp.status_code == 200 + body = resp.json() + # 2 operations at 4.0 (V_001 and V_002). + assert len(body["data"]["operations"]) == 2 + # Budget: release resolution (3) + EXISTS-aware count + main + # paginated query + operations versions + nodes + references + + # locations + owner lookup. ≤15 with headroom. + assert counter.count <= 15, ( + f"operation path issued {counter.count} queries — " + f"likely an N+1 regression." + ) diff --git a/tests/unit/server/test_structure_operator.py b/tests/unit/server/test_structure_operator.py new file mode 100644 index 0000000..640daf9 --- /dev/null +++ b/tests/unit/server/test_structure_operator.py @@ -0,0 +1,214 @@ +"""Integration tests for /api/v1/structure/operator endpoints.""" + +from __future__ import annotations + +import pytest +from sqlalchemy import create_engine, event +from sqlalchemy.orm import Session +from sqlalchemy.pool import StaticPool + +import dpmcore.orm # noqa: F401 — ensure all models are loaded +from dpmcore.orm.base import Base +from dpmcore.orm.operations import Operator, OperatorArgument +from dpmcore.server.app import create_app + + +@pytest.fixture +def engine(): + eng = create_engine( + "sqlite:///:memory:", + connect_args={"check_same_thread": False}, + poolclass=StaticPool, + ) + Base.metadata.create_all(eng) + return eng + + +@pytest.fixture +def seeded_engine(engine): + s = Session(bind=engine) + s.add_all( + [ + Operator( + operator_id=1, name="Equals", symbol="=", type="comparison" + ), + Operator( + operator_id=2, name="Plus", symbol="+", type="arithmetic" + ), + Operator(operator_id=3, name="And", symbol="AND", type="logical"), + ] + ) + s.flush() + s.add_all( + [ + OperatorArgument( + argument_id=10, + operator_id=1, + order=0, + is_mandatory=True, + name="left", + ), + OperatorArgument( + argument_id=11, + operator_id=1, + order=1, + is_mandatory=True, + name="right", + ), + OperatorArgument( + argument_id=20, + operator_id=2, + order=0, + is_mandatory=True, + name="lhs", + ), + OperatorArgument( + argument_id=21, + operator_id=2, + order=1, + is_mandatory=True, + name="rhs", + ), + OperatorArgument( + argument_id=30, + operator_id=3, + order=0, + is_mandatory=True, + name="a", + ), + OperatorArgument( + argument_id=31, + operator_id=3, + order=1, + is_mandatory=True, + name="b", + ), + ] + ) + s.commit() + s.close() + return engine + + +@pytest.fixture +def client(seeded_engine): + from starlette.testclient import TestClient + + app = create_app("sqlite:///:memory:", engine=seeded_engine) + return TestClient(app) + + +@pytest.fixture +def empty_client(engine): + from starlette.testclient import TestClient + + app = create_app("sqlite:///:memory:", engine=engine) + return TestClient(app) + + +def _count_queries(engine): + class _Counter: + def __init__(self): + self.count = 0 + + def __enter__(self): + def _l(c, cur, stmt, p, ctx, m): + self.count += 1 + + self._l = _l + event.listen(engine, "before_cursor_execute", _l) + return self + + def __exit__(self, *exc): + event.remove(engine, "before_cursor_execute", self._l) + return False + + return _Counter() + + +# ------------------------------------------------------------------ # +# Tests +# ------------------------------------------------------------------ # + + +class TestDefaultResponse: + def test_all_operators_wildcards(self, client): + resp = client.get("/api/v1/structure/operator/*/*/*") + assert resp.status_code == 200 + names = {o["name"] for o in resp.json()["data"]["operators"]} + assert names == {"Equals", "Plus", "And"} + + def test_single_by_name(self, client): + resp = client.get("/api/v1/structure/operator/*/Equals/*") + ops = resp.json()["data"]["operators"] + assert len(ops) == 1 + op = ops[0] + assert op["name"] == "Equals" + assert op["symbol"] == "=" + assert op["type"] == "comparison" + # Arguments inline in order. + assert [a["name"] for a in op["arguments"]] == ["left", "right"] + for a in op["arguments"]: + assert a["isMandatory"] is True + + def test_single_by_numeric_id(self, client): + resp = client.get("/api/v1/structure/operator/*/3/*") + op = resp.json()["data"]["operators"][0] + assert op["name"] == "And" + + def test_comma_separated_names(self, client): + resp = client.get("/api/v1/structure/operator/*/Plus,And/*") + names = {o["name"] for o in resp.json()["data"]["operators"]} + assert names == {"Plus", "And"} + + def test_release_segment_ignored(self, client): + for path in ( + "/api/v1/structure/operator/*/Plus/4.0", + "/api/v1/structure/operator/*/Plus/4.1", + "/api/v1/structure/operator/*/Plus/999.0", + ): + resp = client.get(path) + assert resp.status_code == 200 + assert resp.json()["data"]["operators"][0]["name"] == "Plus" + + +class TestOwnerRejection: + def test_concrete_owner_204(self, client): + """Operators have no owner — concrete owners return 204.""" + for owner in ("EBA", "ECB", "anything"): + resp = client.get(f"/api/v1/structure/operator/{owner}/*/*") + assert resp.status_code == 204 + + +class TestAllstubs: + def test_strips_arguments_and_type(self, client): + resp = client.get( + "/api/v1/structure/operator/*/Equals/*?detail=allstubs" + ) + op = resp.json()["data"]["operators"][0] + for key in ("type", "arguments"): + assert key not in op + assert set(op.keys()) == {"id", "name", "symbol"} + + +class TestEmptyDatabase: + def test_empty_returns_204(self, empty_client): + resp = empty_client.get("/api/v1/structure/operator/*/*/*") + assert resp.status_code == 204 + + +class TestNonexistent: + def test_unknown_name_204(self, client): + resp = client.get("/api/v1/structure/operator/*/NOPE/*") + assert resp.status_code == 204 + + +class TestQueryBudget: + def test_minimal_query_count(self, client, seeded_engine): + with _count_queries(seeded_engine) as counter: + resp = client.get("/api/v1/structure/operator/*/*/*") + assert resp.status_code == 200 + # count + main + arguments bulk = 3; cap at 5. + assert counter.count <= 5, ( + f"operator path issued {counter.count} queries." + ) diff --git a/tests/unit/server/test_structure_organisation.py b/tests/unit/server/test_structure_organisation.py new file mode 100644 index 0000000..07b995d --- /dev/null +++ b/tests/unit/server/test_structure_organisation.py @@ -0,0 +1,205 @@ +"""Integration tests for /api/v1/structure/organisation endpoints.""" + +from __future__ import annotations + +import pytest +from sqlalchemy import create_engine, event +from sqlalchemy.orm import Session +from sqlalchemy.pool import StaticPool + +import dpmcore.orm # noqa: F401 — ensure all models are loaded +from dpmcore.orm.base import Base +from dpmcore.orm.infrastructure import Organisation +from dpmcore.server.app import create_app + + +@pytest.fixture +def engine(): + eng = create_engine( + "sqlite:///:memory:", + connect_args={"check_same_thread": False}, + poolclass=StaticPool, + ) + Base.metadata.create_all(eng) + return eng + + +@pytest.fixture +def seeded_engine(engine): + """Three organisations: EBA, ECB, BIS.""" + s = Session(bind=engine) + s.add_all( + [ + Organisation( + org_id=1, + name="European Banking Authority", + acronym="EBA", + id_prefix=101, + ), + Organisation( + org_id=2, + name="European Central Bank", + acronym="ECB", + id_prefix=102, + ), + Organisation( + org_id=3, + name="Bank for International Settlements", + acronym="BIS", + id_prefix=103, + ), + ] + ) + s.commit() + s.close() + return engine + + +@pytest.fixture +def client(seeded_engine): + from starlette.testclient import TestClient + + app = create_app("sqlite:///:memory:", engine=seeded_engine) + return TestClient(app) + + +@pytest.fixture +def empty_client(engine): + from starlette.testclient import TestClient + + app = create_app("sqlite:///:memory:", engine=engine) + return TestClient(app) + + +def _count_queries(engine): + class _Counter: + def __init__(self): + self.count = 0 + + def __enter__(self): + def _l(c, cur, stmt, p, ctx, m): + self.count += 1 + + self._l = _l + event.listen(engine, "before_cursor_execute", _l) + return self + + def __exit__(self, *exc): + event.remove(engine, "before_cursor_execute", self._l) + return False + + return _Counter() + + +# ------------------------------------------------------------------ # +# Tests +# ------------------------------------------------------------------ # + + +class TestDefaultResponse: + def test_all_orgs_wildcards(self, client): + resp = client.get("/api/v1/structure/organisation/*/*/*") + assert resp.status_code == 200 + orgs = resp.json()["data"]["organisations"] + acronyms = {o["acronym"] for o in orgs} + assert acronyms == {"EBA", "ECB", "BIS"} + + def test_fields_present(self, client): + resp = client.get("/api/v1/structure/organisation/*/EBA/*") + o = resp.json()["data"]["organisations"][0] + for key in ("id", "name", "acronym", "idPrefix"): + assert key in o + assert o["acronym"] == "EBA" + assert o["idPrefix"] == 101 + + +class TestFiltering: + def test_owner_segment_filters_by_acronym(self, client): + resp = client.get("/api/v1/structure/organisation/EBA/*/*") + orgs = resp.json()["data"]["organisations"] + assert len(orgs) == 1 + assert orgs[0]["acronym"] == "EBA" + + def test_id_segment_filters_by_acronym(self, client): + resp = client.get("/api/v1/structure/organisation/*/BIS/*") + orgs = resp.json()["data"]["organisations"] + assert {o["acronym"] for o in orgs} == {"BIS"} + + def test_id_segment_filters_by_numeric_id(self, client): + resp = client.get("/api/v1/structure/organisation/*/2/*") + orgs = resp.json()["data"]["organisations"] + assert len(orgs) == 1 + assert orgs[0]["id"] == 2 + assert orgs[0]["acronym"] == "ECB" + + def test_release_segment_ignored(self, client): + """Release isn't versioned — every release literal works.""" + for path in ( + "/api/v1/structure/organisation/*/EBA/4.0", + "/api/v1/structure/organisation/*/EBA/4.1", + "/api/v1/structure/organisation/*/EBA/999.0", + ): + resp = client.get(path) + assert resp.status_code == 200 + assert resp.json()["data"]["organisations"][0]["acronym"] == "EBA" + + def test_owner_and_id_combined(self, client): + # Owner=EBA AND id=EBA → matches EBA only. + resp = client.get("/api/v1/structure/organisation/EBA/EBA/*") + orgs = resp.json()["data"]["organisations"] + assert {o["acronym"] for o in orgs} == {"EBA"} + + def test_owner_and_id_disagree_204(self, client): + # Owner=EBA AND id=BIS → empty intersection. + resp = client.get("/api/v1/structure/organisation/EBA/BIS/*") + assert resp.status_code == 204 + + def test_comma_separated_ids(self, client): + resp = client.get("/api/v1/structure/organisation/*/EBA,BIS/*") + acronyms = {o["acronym"] for o in resp.json()["data"]["organisations"]} + assert acronyms == {"EBA", "BIS"} + + +class TestAllstubs: + def test_strips_extra_fields(self, client): + resp = client.get( + "/api/v1/structure/organisation/*/EBA/*?detail=allstubs" + ) + o = resp.json()["data"]["organisations"][0] + assert set(o.keys()) == {"id", "acronym"} + + +class TestReferences: + def test_all_noop(self, client): + """references=all is a no-op for organisations themselves.""" + resp = client.get( + "/api/v1/structure/organisation/*/EBA/*?references=all" + ) + body = resp.json() + # The data wrapper has only the organisations key — no nested + # 'organisations' enrichment (since we're already that list). + assert set(body["data"].keys()) == {"organisations"} + + def test_children_noop(self, client): + resp = client.get( + "/api/v1/structure/organisation/*/EBA/*?references=children" + ) + body = resp.json() + assert set(body["data"].keys()) == {"organisations"} + + +class TestEmptyDatabase: + def test_empty_returns_204(self, empty_client): + resp = empty_client.get("/api/v1/structure/organisation/*/*/*") + assert resp.status_code == 204 + + +class TestQueryBudget: + def test_minimal_query_count(self, client, seeded_engine): + with _count_queries(seeded_engine) as counter: + resp = client.get("/api/v1/structure/organisation/*/*/*") + assert resp.status_code == 200 + # count + main query = 2; cap at 4 for headroom. + assert counter.count <= 4, ( + f"organisation path issued {counter.count} queries." + ) diff --git a/tests/unit/server/test_structure_property.py b/tests/unit/server/test_structure_property.py new file mode 100644 index 0000000..b47c160 --- /dev/null +++ b/tests/unit/server/test_structure_property.py @@ -0,0 +1,484 @@ +"""Integration tests for /api/v1/structure/property endpoints.""" + +from __future__ import annotations + +from datetime import date + +import pytest +from sqlalchemy import create_engine, event +from sqlalchemy.orm import Session +from sqlalchemy.pool import StaticPool + +import dpmcore.orm # noqa: F401 — ensure all models are loaded +from dpmcore.orm.base import Base +from dpmcore.orm.glossary import ( + Category, + Item, + ItemCategory, + Property, + PropertyCategory, +) +from dpmcore.orm.infrastructure import ( + Concept, + DataType, + Organisation, + Release, +) +from dpmcore.server.app import create_app + +# ------------------------------------------------------------------ # +# Seed model +# ------------------------------------------------------------------ # +# +# Releases 4.0 (Final) and 4.1 (Final). +# +# Meta-category `_PR` registers properties; the property's code, +# signature, and release window come from its `_PR` ItemCategory row. +# +# Three properties, all owned by EBA: +# - 51 "BAS" (Basis) — DataType=String, no PropertyCategory link +# (not enumerated). Has two ItemCategory rows across releases: +# "BAS" at 4.0, "BAS2" at 4.1 (re-coded between releases) — +# exercises the per-version code change. +# - 52 "ASSET" (Asset type) — DataType=Enumeration, PropertyCategory +# links to enumerated Category ASSET_TYPE. Items LOAN (alive +# at 4.0 only) and BOND (alive throughout). +# - 53 "PARTY" — DataType=String, no enumeration. +# ------------------------------------------------------------------ # + + +@pytest.fixture +def engine(): + eng = create_engine( + "sqlite:///:memory:", + connect_args={"check_same_thread": False}, + poolclass=StaticPool, + ) + Base.metadata.create_all(eng) + return eng + + +@pytest.fixture +def seeded_engine(engine): + s = Session(bind=engine) + + s.add( + Organisation( + org_id=1, name="European Banking Authority", acronym="EBA" + ) + ) + s.flush() + + for guid in [ + "c-rel-1", + "c-rel-2", + "c-prop-51", + "c-prop-52", + "c-prop-53", + ]: + s.add(Concept(concept_guid=guid, owner_id=1)) + s.flush() + + s.add_all( + [ + Release( + release_id=1, + code="4.0", + date=date(2024, 1, 1), + status="Final", + is_current=False, + row_guid="c-rel-1", + owner_id=1, + ), + Release( + release_id=2, + code="4.1", + date=date(2024, 6, 1), + status="Final", + is_current=True, + row_guid="c-rel-2", + owner_id=1, + ), + ] + ) + s.flush() + + s.add_all( + [ + DataType( + data_type_id=1, code="String", name="String", is_active=True + ), + DataType( + data_type_id=2, + code="Enumeration", + name="Enumeration", + is_active=True, + ), + ] + ) + s.flush() + + # Meta-category and enumerated category. + s.add_all( + [ + Category( + category_id=1, + code="_PR", + name="Property", + description="", + is_enumerated=False, + is_active=True, + is_external_ref_data=False, + created_release=1, + owner_id=1, + ), + Category( + category_id=60, + code="ASSET_TYPE", + name="Asset type", + description="", + is_enumerated=True, + is_active=True, + is_external_ref_data=False, + created_release=1, + owner_id=1, + ), + ] + ) + s.flush() + + # Property items (subtype of Item). + s.add_all( + [ + Item( + item_id=51, + name="Basis", + description="Accounting basis", + is_property=True, + is_active=True, + row_guid="c-prop-51", + owner_id=1, + ), + Item( + item_id=52, + name="Asset type", + description="Type of asset", + is_property=True, + is_active=True, + row_guid="c-prop-52", + owner_id=1, + ), + Item( + item_id=53, + name="Party type", + description="Type of counterparty", + is_property=True, + is_active=True, + row_guid="c-prop-53", + owner_id=1, + ), + # Enumeration members for property 52. + Item(item_id=700, name="Loan", is_property=False, is_active=True), + Item(item_id=701, name="Bond", is_property=False, is_active=True), + ] + ) + s.flush() + + s.add_all( + [ + Property( + property_id=51, + is_composite=False, + is_metric=False, + data_type_id=1, + owner_id=1, + ), + Property( + property_id=52, + is_composite=False, + is_metric=False, + data_type_id=2, + owner_id=1, + ), + Property( + property_id=53, + is_composite=False, + is_metric=False, + data_type_id=1, + owner_id=1, + ), + ] + ) + s.flush() + + # ItemCategory rows — each property in the `_PR` meta-category. + # Property 51 re-coded between releases. + s.add_all( + [ + ItemCategory( + item_id=51, + start_release_id=1, + category_id=1, + code="BAS", + is_default_item=False, + signature="_PR(BAS)", + end_release_id=2, + ), + ItemCategory( + item_id=51, + start_release_id=2, + category_id=1, + code="BAS2", + is_default_item=False, + signature="_PR(BAS2)", + end_release_id=None, + ), + ItemCategory( + item_id=52, + start_release_id=1, + category_id=1, + code="ASSET", + is_default_item=False, + signature="_PR(ASSET)", + end_release_id=None, + ), + ItemCategory( + item_id=53, + start_release_id=1, + category_id=1, + code="PARTY", + is_default_item=False, + signature="_PR(PARTY)", + end_release_id=None, + ), + # Enumeration members in ASSET_TYPE. + ItemCategory( + item_id=700, + start_release_id=1, + category_id=60, + code="LOAN", + is_default_item=False, + signature="ASSET_TYPE(LOAN)", + end_release_id=2, + ), + ItemCategory( + item_id=701, + start_release_id=1, + category_id=60, + code="BOND", + is_default_item=False, + signature="ASSET_TYPE(BOND)", + end_release_id=None, + ), + ] + ) + s.flush() + + # PropertyCategory — only property 52 is enumerated. + s.add( + PropertyCategory( + property_id=52, + start_release_id=1, + category_id=60, + end_release_id=None, + ) + ) + s.commit() + s.close() + return engine + + +@pytest.fixture +def client(seeded_engine): + from starlette.testclient import TestClient + + app = create_app("sqlite:///:memory:", engine=seeded_engine) + return TestClient(app) + + +@pytest.fixture +def empty_client(engine): + from starlette.testclient import TestClient + + app = create_app("sqlite:///:memory:", engine=engine) + return TestClient(app) + + +def _count_queries(engine): + class _Counter: + def __init__(self): + self.count = 0 + + def __enter__(self): + def _l(c, cur, stmt, p, ctx, m): + self.count += 1 + + self._l = _l + event.listen(engine, "before_cursor_execute", _l) + return self + + def __exit__(self, *exc): + event.remove(engine, "before_cursor_execute", self._l) + return False + + return _Counter() + + +# ------------------------------------------------------------------ # +# Tests — default response +# ------------------------------------------------------------------ # + + +class TestSingleProperty: + def test_at_literal_release(self, client): + resp = client.get("/api/v1/structure/property/EBA/BAS/4.0") + assert resp.status_code == 200 + body = resp.json() + props = body["data"]["properties"] + assert len(props) == 1 + p = props[0] + assert p["code"] == "BAS" + assert p["signature"] == "_PR(BAS)" + assert p["label"] == "Basis" + assert p["release"] == "4.0" + assert p["dataType"]["code"] == "String" + assert p["definingCategory"]["code"] == "_PR" + assert p["isEnumerated"] is False + + def test_code_change_across_releases(self, client): + # BAS at 4.0, BAS2 at 4.1. + r0 = client.get("/api/v1/structure/property/EBA/BAS/4.0") + r1 = client.get("/api/v1/structure/property/EBA/BAS2/4.1") + assert r0.json()["data"]["properties"][0]["code"] == "BAS" + assert r1.json()["data"]["properties"][0]["code"] == "BAS2" + # The id (item_id=51) is the same across versions. + assert ( + r0.json()["data"]["properties"][0]["id"] + == r1.json()["data"]["properties"][0]["id"] + == 51 + ) + + def test_old_code_gone_at_later_release(self, client): + # BAS was alive at 4.0 only; at 4.1 it's BAS2. + resp = client.get("/api/v1/structure/property/EBA/BAS/4.1") + assert resp.status_code == 204 + + def test_latest_returns_newer_code(self, client): + resp = client.get("/api/v1/structure/property/EBA/BAS2/~") + p = resp.json()["data"]["properties"][0] + assert p["code"] == "BAS2" + assert p["release"] == "4.1" + + def test_nonexistent_code_204(self, client): + resp = client.get("/api/v1/structure/property/EBA/NOPE/4.0") + assert resp.status_code == 204 + + +class TestEnumeration: + def test_enumerated_property_carries_enumeration(self, client): + resp = client.get("/api/v1/structure/property/EBA/ASSET/4.0") + p = resp.json()["data"]["properties"][0] + assert p["isEnumerated"] is True + enum = p["enumeration"] + assert enum["categoryCode"] == "ASSET_TYPE" + codes = {i["code"] for i in enum["items"]} + # LOAN ends at 4.1 → present at 4.0. BOND alive throughout. + assert codes == {"LOAN", "BOND"} + + def test_enumeration_items_shrink_at_4_1(self, client): + resp = client.get("/api/v1/structure/property/EBA/ASSET/4.1") + p = resp.json()["data"]["properties"][0] + codes = {i["code"] for i in p["enumeration"]["items"]} + assert codes == {"BOND"} + + def test_non_enumerated_property(self, client): + resp = client.get("/api/v1/structure/property/EBA/PARTY/4.0") + p = resp.json()["data"]["properties"][0] + assert p["isEnumerated"] is False + assert p["enumeration"] is None + + +class TestAllProperties: + def test_wildcard_id_lists_all(self, client): + resp = client.get("/api/v1/structure/property/EBA/*/4.0") + codes = {p["code"] for p in resp.json()["data"]["properties"]} + assert codes == {"BAS", "ASSET", "PARTY"} + + def test_release_wildcard_gives_both_BAS_versions(self, client): + resp = client.get("/api/v1/structure/property/EBA/BAS,BAS2/*") + codes = {p["code"] for p in resp.json()["data"]["properties"]} + assert codes == {"BAS", "BAS2"} + + +class TestAllstubs: + def test_strips_subtrees(self, client): + resp = client.get( + "/api/v1/structure/property/EBA/BAS/4.0?detail=allstubs" + ) + p = resp.json()["data"]["properties"][0] + for key in ( + "dataType", + "definingCategory", + "isEnumerated", + "enumeration", + "isComposite", + "isMetric", + ): + assert key not in p + for key in ( + "id", + "code", + "signature", + "label", + "owner", + "release", + ): + assert key in p + + +class TestReferences: + def test_all_includes_organisations(self, client): + resp = client.get( + "/api/v1/structure/property/EBA/BAS/4.0?references=all" + ) + assert "organisations" in resp.json()["data"] + + def test_children_is_silent_noop(self, client): + resp = client.get( + "/api/v1/structure/property/EBA/BAS/4.0?references=children" + ) + p = resp.json()["data"]["properties"][0] + for key in ("tables", "modules", "children"): + assert key not in p + + +class TestEmptyDatabase: + def test_empty_returns_204(self, empty_client): + resp = empty_client.get("/api/v1/structure/property/EBA/BAS/4.0") + assert resp.status_code == 204 + + +# ------------------------------------------------------------------ # +# Performance — query budget +# ------------------------------------------------------------------ # + + +class TestQueryBudget: + def test_query_count_bounded(self, client, seeded_engine): + with _count_queries(seeded_engine) as counter: + resp = client.get("/api/v1/structure/property/EBA/*/4.0") + assert resp.status_code == 200 + body = resp.json() + assert len(body["data"]["properties"]) == 3 + # Budget breakdown: + # 3 release-resolution queries (filter_by_release internals); + # 1 count, 1 main paginated query; + # 2 enumeration loads (PropertyCategory+Category, then + # ItemCategory+Item — only when at least one property is + # enumerated); + # 1 owner lookup. + # ≤12 leaves headroom; budget is independent of property count. + assert counter.count <= 12, ( + f"property path issued {counter.count} queries — " + f"likely an N+1 regression." + ) diff --git a/tests/unit/server/test_structure_table.py b/tests/unit/server/test_structure_table.py new file mode 100644 index 0000000..d05697c --- /dev/null +++ b/tests/unit/server/test_structure_table.py @@ -0,0 +1,703 @@ +"""Integration tests for /api/v1/structure/table endpoints.""" + +from __future__ import annotations + +from datetime import date + +import pytest +from sqlalchemy import create_engine +from sqlalchemy.orm import Session +from sqlalchemy.pool import StaticPool + +import dpmcore.orm # noqa: F401 — ensure all models are loaded +from dpmcore.orm.base import Base +from dpmcore.orm.glossary import ( + Category, + Item, + ItemCategory, + Property, + SubCategory, + SubCategoryItem, + SubCategoryVersion, +) +from dpmcore.orm.infrastructure import ( + Concept, + DataType, + Organisation, + Release, +) +from dpmcore.orm.rendering import ( + Cell, + Header, + HeaderVersion, + Table, + TableVersion, + TableVersionCell, + TableVersionHeader, +) +from dpmcore.orm.variables import Variable, VariableVersion +from dpmcore.server.app import create_app + +# ------------------------------------------------------------------ # +# Seed model +# ------------------------------------------------------------------ # +# +# Releases: 3.3 (Final), 3.4 (Final), 3.5 (Draft). +# +# A single Table (F_01.01) has two TableVersions: +# - tvid=1000: active 3.3 only (start=1, end=2) +# - tvid=2000: active 3.4+ (start=2, end=None) +# +# Headers: a column header (header_id=11) and a row header +# (header_id=22). The row header carries: +# - key_variable_vid=7777 (a key variable) +# - subcategory_vid=4441 (the row's allowed-values restriction) +# Both the key variable AND the fact variable in cells of that row +# inherit the row header's subcategory as their enumeration. +# +# Subcategory AT_SUB lives under category ASSET_TYPE and lists items +# LOAN, BOND, DEPOSIT via SubCategoryItem rows. The release-aware +# filter on each item's parent ItemCategory then yields: +# - LOAN valid at 3.3 only (start=1, end=2) +# - BOND valid at all releases (start=1, end=None) +# - DEPOSIT valid from 3.4 onward (start=2, end=None) +# ------------------------------------------------------------------ # + + +@pytest.fixture +def engine(): + eng = create_engine( + "sqlite:///:memory:", + connect_args={"check_same_thread": False}, + poolclass=StaticPool, + ) + Base.metadata.create_all(eng) + return eng + + +@pytest.fixture +def seeded_engine(engine): + """Engine with the table fixture model pre-seeded.""" + session = Session(bind=engine) + + # Owner organisation + concepts. + session.add( + Organisation( + org_id=1, name="European Banking Authority", acronym="EBA" + ) + ) + session.flush() + + for guid in [ + "c-rel-1", + "c-rel-2", + "c-rel-3", + "c-table-1", + "c-header-c", + "c-header-r", + "c-cell-1", + "c-var-1", + ]: + session.add(Concept(concept_guid=guid, owner_id=1)) + session.flush() + + # Releases. + session.add_all( + [ + Release( + release_id=1, + code="3.3", + date=date(2024, 1, 1), + description="Release 3.3", + status="Final", + is_current=False, + row_guid="c-rel-1", + owner_id=1, + ), + Release( + release_id=2, + code="3.4", + date=date(2024, 6, 1), + description="Release 3.4", + status="Final", + is_current=True, + row_guid="c-rel-2", + owner_id=1, + ), + Release( + release_id=3, + code="3.5", + date=date(2024, 12, 1), + description="Release 3.5 draft", + status="Draft", + is_current=False, + row_guid="c-rel-3", + owner_id=1, + ), + ] + ) + session.flush() + + # DataType (required for Property FK target). + session.add( + DataType(data_type_id=1, code="String", name="String", is_active=True) + ) + session.flush() + + # Table + two TableVersions (release windows differ). + session.add( + Table( + table_id=100, + is_abstract=False, + has_open_columns=False, + has_open_rows=False, + has_open_sheets=False, + is_normalised=False, + is_flat=False, + row_guid="c-table-1", + owner_id=1, + ) + ) + session.flush() + session.add_all( + [ + TableVersion( + table_vid=1000, + code="F_01.01", + name="Balance sheet (v1)", + description="Assets — first version", + table_id=100, + start_release_id=1, + end_release_id=2, + ), + TableVersion( + table_vid=2000, + code="F_01.01", + name="Balance sheet (v2)", + description="Assets — second version", + table_id=100, + start_release_id=2, + end_release_id=None, + ), + ] + ) + session.flush() + + # Headers (one column, one row). + session.add_all( + [ + Header( + header_id=11, + table_id=100, + direction="x", + is_key=False, + is_attribute=False, + row_guid="c-header-c", + owner_id=1, + ), + Header( + header_id=22, + table_id=100, + direction="y", + is_key=True, + is_attribute=False, + row_guid="c-header-r", + owner_id=1, + ), + ] + ) + session.flush() + + # Key variable used by the row header. Created early so the + # HeaderVersion below can FK into it. + session.add(Variable(variable_id=89, type="k", owner_id=1)) + session.flush() + session.add( + VariableVersion( + variable_vid=7777, + variable_id=89, + property_id=None, + code="kv01", + name="Row key", + is_multi_valued=False, + start_release_id=1, + end_release_id=None, + ) + ) + session.flush() + + # Enumerated parent category (must exist before the SubCategory + # FK below can resolve). + session.add( + Category( + category_id=60, + code="ASSET_TYPE", + name="Asset type", + description="Domain of assets", + is_enumerated=True, + is_active=True, + is_external_ref_data=False, + created_release=1, + owner_id=1, + ) + ) + session.flush() + + # SubCategory under ASSET_TYPE + a SubCategoryVersion that the + # row header will reference. The version spans the whole timeline; + # which items are visible at a given release is then governed by + # each item's ItemCategory window (see below). + session.add( + SubCategory( + subcategory_id=400, + category_id=60, + code="AT_SUB", + name="Asset type subset", + description="Allowed asset types for the row dimension", + owner_id=1, + ) + ) + session.flush() + session.add( + SubCategoryVersion( + subcategory_vid=4441, + subcategory_id=400, + start_release_id=1, + end_release_id=None, + ) + ) + session.flush() + + # HeaderVersions — row header carries key_variable_vid + the + # subcategory that defines the row dimension's enumeration. + session.add_all( + [ + HeaderVersion( + header_vid=111, + header_id=11, + code="0010", + label="Carrying amount", + start_release_id=1, + end_release_id=None, + ), + HeaderVersion( + header_vid=222, + header_id=22, + code="010", + label="Loans", + key_variable_vid=7777, + subcategory_vid=4441, + start_release_id=1, + end_release_id=None, + ), + ] + ) + session.flush() + + # Bind headers to both table versions. + session.add_all( + [ + TableVersionHeader( + table_vid=1000, header_id=11, header_vid=111, order=0 + ), + TableVersionHeader( + table_vid=1000, header_id=22, header_vid=222, order=1 + ), + TableVersionHeader( + table_vid=2000, header_id=11, header_vid=111, order=0 + ), + TableVersionHeader( + table_vid=2000, header_id=22, header_vid=222, order=1 + ), + ] + ) + session.flush() + + # Items + their (release-versioned) presence in ASSET_TYPE. + session.add_all( + [ + Item(item_id=700, name="Loan", is_property=False, is_active=True), + Item(item_id=701, name="Bond", is_property=False, is_active=True), + Item( + item_id=702, + name="Deposit", + is_property=False, + is_active=True, + ), + # The Property is itself an Item (subtype) — needs an Item row. + Item( + item_id=51, + name="Asset property", + is_property=True, + is_active=True, + ), + ] + ) + session.flush() + session.add_all( + [ + ItemCategory( + item_id=700, + start_release_id=1, + category_id=60, + code="LOAN", + is_default_item=False, + signature="ASSET_TYPE(LOAN)", + end_release_id=2, # alive at 3.3 only + ), + ItemCategory( + item_id=701, + start_release_id=1, + category_id=60, + code="BOND", + is_default_item=False, + signature="ASSET_TYPE(BOND)", + end_release_id=None, + ), + ItemCategory( + item_id=702, + start_release_id=2, + category_id=60, + code="DEPOSIT", + is_default_item=True, + signature="ASSET_TYPE(DEPOSIT)", + end_release_id=None, + ), + ] + ) + session.flush() + + # Items 700/701/702 become the SubCategoryVersion's members. + session.add_all( + [ + SubCategoryItem(item_id=700, subcategory_vid=4441, order=1), + SubCategoryItem(item_id=701, subcategory_vid=4441, order=2), + SubCategoryItem(item_id=702, subcategory_vid=4441, order=3), + ] + ) + session.flush() + + # Property kept for variable.property reference (no longer the + # enumeration source). + session.add( + Property( + property_id=51, + is_composite=False, + is_metric=False, + data_type_id=1, + owner_id=1, + ) + ) + session.flush() + + # Fact variables: one per TableVersion to exercise dedup and + # version-aware enumeration windows. + session.add(Variable(variable_id=88, type="d", owner_id=1)) + session.flush() + session.add_all( + [ + VariableVersion( + variable_vid=8888, + variable_id=88, + property_id=51, + code="ei001", + name="Asset value (v1)", + is_multi_valued=False, + start_release_id=1, + end_release_id=2, + ), + VariableVersion( + variable_vid=9999, + variable_id=88, + property_id=51, + code="ei002", + name="Asset value (v2)", + is_multi_valued=False, + start_release_id=2, + end_release_id=None, + ), + ] + ) + session.flush() + + # One cell at (col=11, row=22), bound to each TableVersion with + # its own variable_vid. + session.add( + Cell( + cell_id=9001, + table_id=100, + column_id=11, + row_id=22, + sheet_id=None, + row_guid="c-cell-1", + owner_id=1, + ) + ) + session.flush() + session.add_all( + [ + TableVersionCell( + table_vid=1000, + cell_id=9001, + cell_code="{r010,c0010}", + is_nullable=False, + is_excluded=False, + is_void=False, + sign=None, + variable_vid=8888, + ), + TableVersionCell( + table_vid=2000, + cell_id=9001, + cell_code="{r010,c0010}", + is_nullable=False, + is_excluded=False, + is_void=False, + sign=None, + variable_vid=9999, + ), + ] + ) + session.commit() + session.close() + return engine + + +@pytest.fixture +def client(seeded_engine): + from starlette.testclient import TestClient + + app = create_app("sqlite:///:memory:", engine=seeded_engine) + return TestClient(app) + + +@pytest.fixture +def empty_client(engine): + from starlette.testclient import TestClient + + app = create_app("sqlite:///:memory:", engine=engine) + return TestClient(app) + + +# ------------------------------------------------------------------ # +# Tests +# ------------------------------------------------------------------ # + + +class TestSingleTable: + def test_at_literal_release(self, client): + resp = client.get("/api/v1/structure/table/EBA/F_01.01/3.3") + assert resp.status_code == 200 + body = resp.json() + assert "tables" in body["data"] + tables = body["data"]["tables"] + assert len(tables) == 1 + t = tables[0] + assert t["code"] == "F_01.01" + assert t["owner"] == "EBA" + assert t["release"] == "3.3" + assert t["tableVersionId"] == 1000 + + def test_latest_returns_v2(self, client): + resp = client.get("/api/v1/structure/table/EBA/F_01.01/~") + assert resp.status_code == 200 + tables = resp.json()["data"]["tables"] + assert len(tables) == 1 + assert tables[0]["tableVersionId"] == 2000 + + def test_latest_stable_returns_v2(self, client): + # 3.4 is the latest with status=Final; 3.5-draft is Draft. + resp = client.get("/api/v1/structure/table/EBA/F_01.01/+") + assert resp.status_code == 200 + tables = resp.json()["data"]["tables"] + assert len(tables) == 1 + assert tables[0]["tableVersionId"] == 2000 + assert tables[0]["release"] == "3.4" + + def test_nonexistent_code_returns_204(self, client): + resp = client.get("/api/v1/structure/table/EBA/F_99.99/3.4") + assert resp.status_code == 204 + + def test_nonexistent_owner_returns_204(self, client): + resp = client.get("/api/v1/structure/table/UNKNOWN/F_01.01/3.4") + assert resp.status_code == 204 + + def test_unknown_release_returns_204(self, client): + resp = client.get("/api/v1/structure/table/EBA/F_01.01/9.9") + assert resp.status_code == 204 + + +class TestAllReleases: + def test_release_wildcard_returns_both_versions(self, client): + resp = client.get("/api/v1/structure/table/EBA/F_01.01/*") + assert resp.status_code == 200 + body = resp.json() + tables = body["data"]["tables"] + assert len(tables) == 2 + vids = sorted(t["tableVersionId"] for t in tables) + assert vids == [1000, 2000] + # Each entry exposes its own start release. + rels = sorted(t["release"] for t in tables) + assert rels == ["3.3", "3.4"] + assert body["meta"]["totalCount"] == 2 + + def test_all_owners_wildcard_id(self, client): + resp = client.get("/api/v1/structure/table/*/*/*") + assert resp.status_code == 200 + assert resp.json()["meta"]["totalCount"] == 2 + + +class TestHeadersAndCells: + def test_headers_present_with_full_structural_set(self, client): + resp = client.get("/api/v1/structure/table/EBA/F_01.01/3.4") + t = resp.json()["data"]["tables"][0] + headers = t["headers"] + assert len(headers) == 2 + col = next(h for h in headers if h["direction"] == "x") + assert col["code"] == "0010" + assert col["label"] == "Carrying amount" + assert col["isKey"] is False + # locked-in shape — every header must expose these keys + for key in ( + "headerVersionId", + "property", + "context", + "subcategoryVersionId", + "keyVariableVersionId", + "startReleaseId", + "endReleaseId", + ): + assert key in col + + def test_cells_reference_variable_version(self, client): + resp = client.get("/api/v1/structure/table/EBA/F_01.01/3.3") + t = resp.json()["data"]["tables"][0] + assert len(t["cells"]) == 1 + cell = t["cells"][0] + assert cell["cellCode"] == "{r010,c0010}" + assert cell["columnHeaderId"] == 11 + assert cell["rowHeaderId"] == 22 + assert cell["variableVersionId"] == 8888 + + def test_v2_cell_points_to_v2_variable(self, client): + resp = client.get("/api/v1/structure/table/EBA/F_01.01/3.4") + t = resp.json()["data"]["tables"][0] + assert t["cells"][0]["variableVersionId"] == 9999 + + +class TestFactVariableEnumeration: + def test_fact_variable_is_enumerated(self, client): + resp = client.get("/api/v1/structure/table/EBA/F_01.01/3.3") + t = resp.json()["data"]["tables"][0] + assert len(t["factVariables"]) == 1 + v = t["factVariables"][0] + assert v["versionId"] == 8888 + assert v["isEnumerated"] is True + # The enumeration is sourced from the row header's subcategory. + enum = v["enumeration"] + assert enum["subcategoryVersionId"] == 4441 + assert enum["subcategoryCode"] == "AT_SUB" + assert enum["categoryCode"] == "ASSET_TYPE" + + def test_enumeration_items_at_3_3(self, client): + resp = client.get("/api/v1/structure/table/EBA/F_01.01/3.3") + t = resp.json()["data"]["tables"][0] + codes = { + i["code"] for i in t["factVariables"][0]["enumeration"]["items"] + } + # LOAN ends at 3.4 → still valid at 3.3. DEPOSIT not yet alive. + assert codes == {"LOAN", "BOND"} + + def test_enumeration_items_at_3_4(self, client): + resp = client.get("/api/v1/structure/table/EBA/F_01.01/3.4") + t = resp.json()["data"]["tables"][0] + codes = { + i["code"] for i in t["factVariables"][0]["enumeration"]["items"] + } + # LOAN gone (end=2), DEPOSIT now alive. + assert codes == {"BOND", "DEPOSIT"} + + def test_enumeration_items_carry_signature(self, client): + resp = client.get("/api/v1/structure/table/EBA/F_01.01/3.4") + t = resp.json()["data"]["tables"][0] + items = t["factVariables"][0]["enumeration"]["items"] + for it in items: + assert "signature" in it + assert it["signature"].startswith("ASSET_TYPE(") + + def test_release_wildcard_uses_per_version_window(self, client): + """Each TableVersion in the response carries the enumeration + active at its own start_release (not a single shared window). + """ + resp = client.get("/api/v1/structure/table/EBA/F_01.01/*") + tables = resp.json()["data"]["tables"] + by_vid = {t["tableVersionId"]: t for t in tables} + v1_codes = { + i["code"] + for i in by_vid[1000]["factVariables"][0]["enumeration"]["items"] + } + v2_codes = { + i["code"] + for i in by_vid[2000]["factVariables"][0]["enumeration"]["items"] + } + assert v1_codes == {"LOAN", "BOND"} + assert v2_codes == {"BOND", "DEPOSIT"} + + +class TestKeyVariables: + def test_key_variable_present_and_separated(self, client): + resp = client.get("/api/v1/structure/table/EBA/F_01.01/3.4") + t = resp.json()["data"]["tables"][0] + # Row header (header_id=22) carries key_variable_vid=7777 → + # that variable must land in keyVariables, not factVariables. + assert len(t["keyVariables"]) == 1 + k = t["keyVariables"][0] + assert k["versionId"] == 7777 + assert k["code"] == "kv01" + # The key variable also inherits the row header's subcategory + # enumeration — same source as the fact variable in cells of + # this row. + assert k["isEnumerated"] is True + assert k["enumeration"]["subcategoryVersionId"] == 4441 + # And the fact bucket holds the cell-referenced variable only. + assert {v["versionId"] for v in t["factVariables"]} == {9999} + + def test_header_links_back_to_key_variable(self, client): + resp = client.get("/api/v1/structure/table/EBA/F_01.01/3.4") + t = resp.json()["data"]["tables"][0] + row_header = next(h for h in t["headers"] if h["direction"] == "y") + assert row_header["keyVariableVersionId"] == 7777 + assert row_header["subcategoryVersionId"] == 4441 + + +class TestDetailParameter: + def test_allstubs_strips_subtrees(self, client): + resp = client.get( + "/api/v1/structure/table/EBA/F_01.01/3.4?detail=allstubs" + ) + t = resp.json()["data"]["tables"][0] + for key in ("headers", "cells", "keyVariables", "factVariables"): + assert key not in t + # but identifiers are still present + for key in ("id", "tableVersionId", "code", "owner", "release"): + assert key in t + + +class TestReferencesAll: + def test_includes_organisations(self, client): + resp = client.get( + "/api/v1/structure/table/EBA/F_01.01/3.4?references=all" + ) + body = resp.json() + assert "organisations" in body["data"] + assert body["data"]["organisations"][0]["acronym"] == "EBA" + + def test_default_omits_organisations(self, client): + resp = client.get("/api/v1/structure/table/EBA/F_01.01/3.4") + assert "organisations" not in resp.json()["data"] + + +class TestEmptyDatabase: + def test_empty_returns_204(self, empty_client): + resp = empty_client.get("/api/v1/structure/table/EBA/F_01.01/3.4") + assert resp.status_code == 204 diff --git a/tests/unit/server/test_structure_tablegroup.py b/tests/unit/server/test_structure_tablegroup.py new file mode 100644 index 0000000..6842a0d --- /dev/null +++ b/tests/unit/server/test_structure_tablegroup.py @@ -0,0 +1,398 @@ +"""Integration tests for /api/v1/structure/tablegroup endpoints.""" + +from __future__ import annotations + +from datetime import date + +import pytest +from sqlalchemy import create_engine, event +from sqlalchemy.orm import Session +from sqlalchemy.pool import StaticPool + +import dpmcore.orm # noqa: F401 — ensure all models are loaded +from dpmcore.orm.base import Base +from dpmcore.orm.infrastructure import Concept, Organisation, Release +from dpmcore.orm.rendering import ( + Table, + TableGroup, + TableGroupComposition, + TableVersion, +) +from dpmcore.server.app import create_app + +# ------------------------------------------------------------------ # +# Seed model +# ------------------------------------------------------------------ # +# +# Two releases (4.0, 4.1). Hierarchy: +# +# ROOT_BS (id=100, no parent) +# ├── BS_ASSETS (id=200, parent=100) +# └── BS_LIABS (id=201, parent=100) +# +# Compositions: +# ROOT_BS contains tables 1 + 2 at 4.0 only (TableGroupComposition +# end_release_id=2 → gone at 4.1). +# BS_ASSETS contains table 1 at both releases. +# BS_LIABS contains table 2 at both releases. +# ------------------------------------------------------------------ # + + +@pytest.fixture +def engine(): + eng = create_engine( + "sqlite:///:memory:", + connect_args={"check_same_thread": False}, + poolclass=StaticPool, + ) + Base.metadata.create_all(eng) + return eng + + +@pytest.fixture +def seeded_engine(engine): + s = Session(bind=engine) + + s.add( + Organisation( + org_id=1, name="European Banking Authority", acronym="EBA" + ) + ) + s.flush() + + for guid in [ + "c-rel-1", + "c-rel-2", + "c-tbl-1", + "c-tbl-2", + ]: + s.add(Concept(concept_guid=guid, owner_id=1)) + s.flush() + + s.add_all( + [ + Release( + release_id=1, + code="4.0", + date=date(2024, 1, 1), + status="Final", + row_guid="c-rel-1", + owner_id=1, + ), + Release( + release_id=2, + code="4.1", + date=date(2024, 6, 1), + status="Final", + row_guid="c-rel-2", + owner_id=1, + ), + ] + ) + s.flush() + + # Hierarchy: ROOT_BS with two children. + s.add( + TableGroup( + table_group_id=100, + code="ROOT_BS", + name="Balance Sheet — root", + description="Top-level balance sheet group", + type="navigation", + owner_id=1, + parent_table_group_id=None, + start_release_id=1, + end_release_id=None, + ) + ) + s.flush() + s.add_all( + [ + TableGroup( + table_group_id=200, + code="BS_ASSETS", + name="Balance Sheet — assets", + type="navigation", + owner_id=1, + parent_table_group_id=100, + start_release_id=1, + end_release_id=None, + ), + TableGroup( + table_group_id=201, + code="BS_LIABS", + name="Balance Sheet — liabilities", + type="navigation", + owner_id=1, + parent_table_group_id=100, + start_release_id=1, + end_release_id=None, + ), + ] + ) + s.flush() + + # Tables + TableVersions. + s.add_all( + [ + Table( + table_id=1, + is_abstract=False, + has_open_columns=False, + has_open_rows=False, + has_open_sheets=False, + is_normalised=False, + is_flat=False, + row_guid="c-tbl-1", + owner_id=1, + ), + Table( + table_id=2, + is_abstract=False, + has_open_columns=False, + has_open_rows=False, + has_open_sheets=False, + is_normalised=False, + is_flat=False, + row_guid="c-tbl-2", + owner_id=1, + ), + ] + ) + s.flush() + s.add_all( + [ + TableVersion( + table_vid=1000, + code="F_01.01", + name="Assets", + table_id=1, + start_release_id=1, + end_release_id=None, + ), + TableVersion( + table_vid=2000, + code="F_02.00", + name="Liabilities", + table_id=2, + start_release_id=1, + end_release_id=None, + ), + ] + ) + s.flush() + + # Compositions: ROOT_BS contains both tables but only at 4.0 + # (end_release_id=2 → gone at 4.1). + s.add_all( + [ + TableGroupComposition( + table_group_id=100, + table_id=1, + order=1, + start_release_id=1, + end_release_id=2, + ), + TableGroupComposition( + table_group_id=100, + table_id=2, + order=2, + start_release_id=1, + end_release_id=2, + ), + # Child groups carry their own tables across both releases. + TableGroupComposition( + table_group_id=200, + table_id=1, + order=1, + start_release_id=1, + end_release_id=None, + ), + TableGroupComposition( + table_group_id=201, + table_id=2, + order=1, + start_release_id=1, + end_release_id=None, + ), + ] + ) + s.commit() + s.close() + return engine + + +@pytest.fixture +def client(seeded_engine): + from starlette.testclient import TestClient + + app = create_app("sqlite:///:memory:", engine=seeded_engine) + return TestClient(app) + + +@pytest.fixture +def empty_client(engine): + from starlette.testclient import TestClient + + app = create_app("sqlite:///:memory:", engine=engine) + return TestClient(app) + + +def _count_queries(engine): + class _Counter: + def __init__(self): + self.count = 0 + + def __enter__(self): + def _l(c, cur, stmt, p, ctx, m): + self.count += 1 + + self._l = _l + event.listen(engine, "before_cursor_execute", _l) + return self + + def __exit__(self, *exc): + event.remove(engine, "before_cursor_execute", self._l) + return False + + return _Counter() + + +# ------------------------------------------------------------------ # +# Tests — default response +# ------------------------------------------------------------------ # + + +class TestDefaultResponse: + def test_root_group_at_4_0(self, client): + resp = client.get("/api/v1/structure/tablegroup/EBA/ROOT_BS/4.0") + assert resp.status_code == 200 + body = resp.json() + groups = body["data"]["tableGroups"] + assert len(groups) == 1 + g = groups[0] + assert g["code"] == "ROOT_BS" + assert g["parentTableGroupId"] is None + assert sorted(g["childTableGroupIds"]) == [200, 201] + # No children expansion unless requested. + assert "tables" not in g + assert "childTableGroups" not in g + + def test_child_group_has_parent(self, client): + resp = client.get("/api/v1/structure/tablegroup/EBA/BS_ASSETS/4.0") + g = resp.json()["data"]["tableGroups"][0] + assert g["parentTableGroupId"] == 100 + assert g["childTableGroupIds"] == [] + + def test_wildcard_lists_all(self, client): + resp = client.get("/api/v1/structure/tablegroup/EBA/*/4.0") + codes = {g["code"] for g in resp.json()["data"]["tableGroups"]} + assert codes == {"ROOT_BS", "BS_ASSETS", "BS_LIABS"} + + def test_nonexistent_code_204(self, client): + resp = client.get("/api/v1/structure/tablegroup/EBA/NOPE/4.0") + assert resp.status_code == 204 + + +class TestChildrenExpansion: + def test_root_at_4_0_has_tables_and_child_stubs(self, client): + resp = client.get( + "/api/v1/structure/tablegroup/EBA/ROOT_BS/4.0?references=children" + ) + g = resp.json()["data"]["tableGroups"][0] + # Tables in composition order. + assert [t["code"] for t in g["tables"]] == ["F_01.01", "F_02.00"] + # Tables carry the full table shape (headers/cells/variables). + for t in g["tables"]: + for key in ("headers", "cells", "keyVariables", "factVariables"): + assert key in t + # Child group stubs. + child_codes = {c["code"] for c in g["childTableGroups"]} + assert child_codes == {"BS_ASSETS", "BS_LIABS"} + + def test_root_at_4_1_loses_its_direct_tables(self, client): + """ROOT_BS compositions end at release 4.1 → empty tables[], + but the child group stubs (still alive) remain. + """ + resp = client.get( + "/api/v1/structure/tablegroup/EBA/ROOT_BS/4.1?references=children" + ) + g = resp.json()["data"]["tableGroups"][0] + assert g["tables"] == [] + child_codes = {c["code"] for c in g["childTableGroups"]} + assert child_codes == {"BS_ASSETS", "BS_LIABS"} + + def test_child_group_with_no_subgroups(self, client): + resp = client.get( + "/api/v1/structure/tablegroup/EBA/BS_ASSETS/4.0?references=children" + ) + g = resp.json()["data"]["tableGroups"][0] + assert [t["code"] for t in g["tables"]] == ["F_01.01"] + assert g["childTableGroups"] == [] + + def test_references_all_includes_organisations(self, client): + resp = client.get( + "/api/v1/structure/tablegroup/EBA/ROOT_BS/4.0?references=all" + ) + body = resp.json() + assert "organisations" in body["data"] + # Children also expanded. + assert "tables" in body["data"]["tableGroups"][0] + + +class TestAllstubs: + def test_strips_subtrees(self, client): + resp = client.get( + "/api/v1/structure/tablegroup/EBA/ROOT_BS/4.0?detail=allstubs" + ) + g = resp.json()["data"]["tableGroups"][0] + for key in ( + "tables", + "childTableGroups", + "childTableGroupIds", + "parentTableGroupId", + "description", + "type", + ): + assert key not in g + for key in ("id", "code", "name", "owner", "release"): + assert key in g + + +class TestEmptyDatabase: + def test_empty_returns_204(self, empty_client): + resp = empty_client.get("/api/v1/structure/tablegroup/EBA/ROOT_BS/4.0") + assert resp.status_code == 204 + + +# ------------------------------------------------------------------ # +# Performance — query budget +# ------------------------------------------------------------------ # + + +class TestQueryBudget: + def test_default_path_minimal(self, client, seeded_engine): + with _count_queries(seeded_engine) as counter: + resp = client.get("/api/v1/structure/tablegroup/EBA/*/4.0") + assert resp.status_code == 200 + # Budget: 3 release resolution + tablegroup query + + # child-id lookup + owner lookup ≈ 6–7. Cap at 10. + assert counter.count <= 10, ( + f"default tablegroup path issued {counter.count} queries." + ) + + def test_children_path_bounded(self, client, seeded_engine): + with _count_queries(seeded_engine) as counter: + resp = client.get( + "/api/v1/structure/tablegroup/EBA/*/4.0?references=children" + ) + assert resp.status_code == 200 + body = resp.json() + assert len(body["data"]["tableGroups"]) == 3 + # Children path adds composition + TableVersion + the batch + # table builder's headers/cells/variables/property/subcat + # loads + child-group stubs. Bounded — typically ~17. + assert counter.count <= 22, ( + f"children path issued {counter.count} queries — " + f"likely an N+1 regression." + ) diff --git a/tests/unit/server/test_structure_variable.py b/tests/unit/server/test_structure_variable.py new file mode 100644 index 0000000..20f2455 --- /dev/null +++ b/tests/unit/server/test_structure_variable.py @@ -0,0 +1,487 @@ +"""Integration tests for /api/v1/structure/variable endpoints.""" + +from __future__ import annotations + +from datetime import date + +import pytest +from sqlalchemy import create_engine, event +from sqlalchemy.orm import Session +from sqlalchemy.pool import StaticPool + +import dpmcore.orm # noqa: F401 — ensure all models are loaded +from dpmcore.orm.base import Base +from dpmcore.orm.glossary import ( + Category, + Item, + ItemCategory, + Property, + SubCategory, + SubCategoryItem, + SubCategoryVersion, +) +from dpmcore.orm.infrastructure import ( + Concept, + DataType, + Organisation, + Release, +) +from dpmcore.orm.variables import ( + CompoundKey, + Variable, + VariableVersion, +) +from dpmcore.server.app import create_app + +# ------------------------------------------------------------------ # +# Seed model +# ------------------------------------------------------------------ # +# +# Three Variables: +# - 88 (datapoint, "ei001") with subcategory_vid → AT_SUB → ASSET_TYPE. +# Enumeration items: LOAN (alive 4.0 only), BOND (alive all). +# - 89 (key, "kv01") with no subcategory_vid → no enumeration. +# - 90 (datapoint, "ei002") with compound key. +# +# All across two releases (4.0 / 4.1). Variable 88 changes its +# VariableVersion between 4.0 and 4.1 to exercise the release filter. +# ------------------------------------------------------------------ # + + +@pytest.fixture +def engine(): + eng = create_engine( + "sqlite:///:memory:", + connect_args={"check_same_thread": False}, + poolclass=StaticPool, + ) + Base.metadata.create_all(eng) + return eng + + +@pytest.fixture +def seeded_engine(engine): + s = Session(bind=engine) + + s.add( + Organisation( + org_id=1, name="European Banking Authority", acronym="EBA" + ) + ) + s.flush() + + for guid in [ + "c-rel-1", + "c-rel-2", + "c-var-88", + "c-var-89", + "c-var-90", + ]: + s.add(Concept(concept_guid=guid, owner_id=1)) + s.flush() + + s.add_all( + [ + Release( + release_id=1, + code="4.0", + date=date(2024, 1, 1), + status="Final", + is_current=False, + row_guid="c-rel-1", + owner_id=1, + ), + Release( + release_id=2, + code="4.1", + date=date(2024, 6, 1), + status="Final", + is_current=True, + row_guid="c-rel-2", + owner_id=1, + ), + ] + ) + s.flush() + + s.add( + DataType(data_type_id=1, code="String", name="String", is_active=True) + ) + s.flush() + + # ASSET_TYPE category + AT_SUB subcategory (release-scoped). + s.add( + Category( + category_id=60, + code="ASSET_TYPE", + name="Asset type", + is_enumerated=True, + is_active=True, + is_external_ref_data=False, + created_release=1, + owner_id=1, + ) + ) + s.flush() + s.add( + SubCategory( + subcategory_id=400, + category_id=60, + code="AT_SUB", + name="Asset type subset", + owner_id=1, + ) + ) + s.flush() + s.add( + SubCategoryVersion( + subcategory_vid=4441, + subcategory_id=400, + start_release_id=1, + end_release_id=None, + ) + ) + s.flush() + + # Items + ItemCategory (LOAN ends at 4.1, BOND alive throughout). + s.add_all( + [ + Item(item_id=700, name="Loan", is_property=False, is_active=True), + Item(item_id=701, name="Bond", is_property=False, is_active=True), + Item( + item_id=51, + name="Asset property", + is_property=True, + is_active=True, + ), + ] + ) + s.flush() + s.add_all( + [ + ItemCategory( + item_id=700, + start_release_id=1, + category_id=60, + code="LOAN", + is_default_item=False, + signature="ASSET_TYPE(LOAN)", + end_release_id=2, # gone from 4.1 + ), + ItemCategory( + item_id=701, + start_release_id=1, + category_id=60, + code="BOND", + is_default_item=False, + signature="ASSET_TYPE(BOND)", + end_release_id=None, + ), + ] + ) + s.flush() + s.add_all( + [ + SubCategoryItem(item_id=700, subcategory_vid=4441, order=1), + SubCategoryItem(item_id=701, subcategory_vid=4441, order=2), + ] + ) + s.flush() + s.add( + Property( + property_id=51, + is_composite=False, + is_metric=False, + data_type_id=1, + owner_id=1, + ) + ) + s.flush() + + # CompoundKey for variable 90. + s.add( + CompoundKey( + key_id=999, signature="K(EI002)", row_guid=None, owner_id=1 + ) + ) + s.flush() + + # Variables. + s.add_all( + [ + Variable( + variable_id=88, + type="datapoint", + row_guid="c-var-88", + owner_id=1, + ), + Variable( + variable_id=89, + type="key", + row_guid="c-var-89", + owner_id=1, + ), + Variable( + variable_id=90, + type="datapoint", + row_guid="c-var-90", + owner_id=1, + ), + ] + ) + s.flush() + + # VariableVersions. Variable 88 has two versions to exercise the + # release filter. + s.add_all( + [ + VariableVersion( + variable_vid=8880, + variable_id=88, + property_id=51, + subcategory_vid=4441, + code="ei001", + name="Asset value (v1)", + is_multi_valued=False, + start_release_id=1, + end_release_id=2, + ), + VariableVersion( + variable_vid=8881, + variable_id=88, + property_id=51, + subcategory_vid=4441, + code="ei001", + name="Asset value (v2)", + is_multi_valued=False, + start_release_id=2, + end_release_id=None, + ), + VariableVersion( + variable_vid=8990, + variable_id=89, + property_id=None, + subcategory_vid=None, + code="kv01", + name="Row key", + is_multi_valued=False, + start_release_id=1, + end_release_id=None, + ), + VariableVersion( + variable_vid=9000, + variable_id=90, + property_id=51, + subcategory_vid=None, + key_id=999, + code="ei002", + name="Other datapoint", + is_multi_valued=False, + start_release_id=1, + end_release_id=None, + ), + ] + ) + s.commit() + s.close() + return engine + + +@pytest.fixture +def client(seeded_engine): + from starlette.testclient import TestClient + + app = create_app("sqlite:///:memory:", engine=seeded_engine) + return TestClient(app) + + +@pytest.fixture +def empty_client(engine): + from starlette.testclient import TestClient + + app = create_app("sqlite:///:memory:", engine=engine) + return TestClient(app) + + +def _count_queries(engine): + class _Counter: + def __init__(self): + self.count = 0 + + def __enter__(self): + def _l(c, cur, stmt, p, ctx, m): + self.count += 1 + + self._l = _l + event.listen(engine, "before_cursor_execute", _l) + return self + + def __exit__(self, *exc): + event.remove(engine, "before_cursor_execute", self._l) + return False + + return _Counter() + + +# ------------------------------------------------------------------ # +# Tests — default response +# ------------------------------------------------------------------ # + + +class TestSingleVariable: + def test_at_literal_release(self, client): + resp = client.get("/api/v1/structure/variable/EBA/ei001/4.0") + assert resp.status_code == 200 + body = resp.json() + vs = body["data"]["variables"] + assert len(vs) == 1 + v = vs[0] + assert v["versionId"] == 8880 + assert v["code"] == "ei001" + assert v["type"] == "datapoint" + assert v["owner"] == "EBA" + assert v["release"] == "4.0" + + def test_latest_returns_v2(self, client): + resp = client.get("/api/v1/structure/variable/EBA/ei001/~") + v = resp.json()["data"]["variables"][0] + assert v["versionId"] == 8881 + + def test_nonexistent_code_204(self, client): + resp = client.get("/api/v1/structure/variable/EBA/missing/4.0") + assert resp.status_code == 204 + + def test_unknown_owner_204(self, client): + resp = client.get("/api/v1/structure/variable/UNKNOWN/ei001/4.0") + assert resp.status_code == 204 + + +class TestAllVariables: + def test_wildcards_list_all(self, client): + resp = client.get("/api/v1/structure/variable/EBA/*/4.0") + codes = {v["code"] for v in resp.json()["data"]["variables"]} + assert codes == {"ei001", "kv01", "ei002"} + + def test_release_wildcard_returns_all_versions(self, client): + resp = client.get("/api/v1/structure/variable/EBA/ei001/*") + vids = sorted(v["versionId"] for v in resp.json()["data"]["variables"]) + # Two VariableVersions of variable 88. + assert vids == [8880, 8881] + + +class TestEnumeration: + def test_enumeration_present_for_datapoint(self, client): + resp = client.get("/api/v1/structure/variable/EBA/ei001/4.0") + v = resp.json()["data"]["variables"][0] + assert v["isEnumerated"] is True + assert v["enumeration"]["subcategoryCode"] == "AT_SUB" + codes = {i["code"] for i in v["enumeration"]["items"]} + # LOAN alive at 4.0; BOND alive throughout. + assert codes == {"LOAN", "BOND"} + + def test_enumeration_items_shrink_at_4_1(self, client): + resp = client.get("/api/v1/structure/variable/EBA/ei001/4.1") + v = resp.json()["data"]["variables"][0] + codes = {i["code"] for i in v["enumeration"]["items"]} + # LOAN ends at 4.1 → only BOND remains valid. + assert codes == {"BOND"} + + def test_no_enumeration_when_subcategory_vid_null(self, client): + resp = client.get("/api/v1/structure/variable/EBA/kv01/4.0") + v = resp.json()["data"]["variables"][0] + assert v["isEnumerated"] is False + assert v["enumeration"] is None + assert v["subcategoryVersionId"] is None + + +class TestPropertyAndKey: + def test_property_reference_present(self, client): + resp = client.get("/api/v1/structure/variable/EBA/ei001/4.0") + v = resp.json()["data"]["variables"][0] + assert v["property"]["id"] == 51 + assert v["property"]["name"] == "Asset property" + + def test_property_null_when_absent(self, client): + resp = client.get("/api/v1/structure/variable/EBA/kv01/4.0") + v = resp.json()["data"]["variables"][0] + assert v["property"] is None + + def test_key_signature_resolved(self, client): + resp = client.get("/api/v1/structure/variable/EBA/ei002/4.0") + v = resp.json()["data"]["variables"][0] + assert v["keyId"] == 999 + assert v["keySignature"] == "K(EI002)" + + def test_key_signature_null_when_absent(self, client): + resp = client.get("/api/v1/structure/variable/EBA/kv01/4.0") + v = resp.json()["data"]["variables"][0] + assert v["keyId"] is None + assert v["keySignature"] is None + + +class TestAllstubs: + def test_strips_subtrees(self, client): + resp = client.get( + "/api/v1/structure/variable/EBA/ei001/4.0?detail=allstubs" + ) + v = resp.json()["data"]["variables"][0] + for key in ( + "property", + "enumeration", + "isEnumerated", + "subcategoryVersionId", + "keyId", + "keySignature", + ): + assert key not in v + for key in ("id", "versionId", "code", "type", "owner", "release"): + assert key in v + + +class TestReferences: + def test_all_includes_organisations(self, client): + resp = client.get( + "/api/v1/structure/variable/EBA/ei001/4.0?references=all" + ) + body = resp.json() + assert "organisations" in body["data"] + + def test_children_is_silent_noop(self, client): + resp = client.get( + "/api/v1/structure/variable/EBA/ei001/4.0?references=children" + ) + v = resp.json()["data"]["variables"][0] + for key in ("tables", "modules", "children"): + assert key not in v + + +class TestEmptyDatabase: + def test_empty_returns_204(self, empty_client): + resp = empty_client.get("/api/v1/structure/variable/EBA/ei001/4.0") + assert resp.status_code == 204 + + +# ------------------------------------------------------------------ # +# Performance — query budget +# ------------------------------------------------------------------ # + + +class TestQueryBudget: + def test_query_count_bounded(self, client, seeded_engine): + with _count_queries(seeded_engine) as counter: + resp = client.get("/api/v1/structure/variable/EBA/*/4.0") + assert resp.status_code == 200 + body = resp.json() + # All three variables match at 4.0. + assert len(body["data"]["variables"]) == 3 + # Budget breakdown: + # 3 release-resolution queries (filter_by_release internals); + # 1 variable main, 1 property names, 1 key signatures, + # 3 subcategory enumeration sub-queries + # (SCV+SC+Cat / SCI+Item / ItemCategory), + # 1 owner lookup. + # ≤14 leaves headroom for incidental changes. + assert counter.count <= 14, ( + f"variable path issued {counter.count} queries — " + f"likely an N+1 regression." + ) diff --git a/tests/unit/server/test_structure_versioning_regression.py b/tests/unit/server/test_structure_versioning_regression.py new file mode 100644 index 0000000..828f3d6 --- /dev/null +++ b/tests/unit/server/test_structure_versioning_regression.py @@ -0,0 +1,422 @@ +"""Regression tests for release-aware versioning in the structure API. + +Covers two correctness fixes: + +* Release ordering must use the semver-parsed ``sort_order`` of + ``Release.code``, NOT the opaque ``release_id`` FK (non-monotonic + from DPM 4.2.1) nor ``date``. A chronological backport published + with a high id / late date must still sort inside its semver + lineage. (``_get_all_releases`` / ``_window_alive`` / + ``_version_at_release``.) +* At ``release=*`` a property's enumeration window is keyed per + ItemCategory *version*, not per ``property_id`` — so two versions of + the same property each surface the enumeration valid at their own + release. (``query_properties``.) +""" + +from __future__ import annotations + +from datetime import date + +import pytest +from sqlalchemy import create_engine +from sqlalchemy.orm import Session +from sqlalchemy.pool import StaticPool + +import dpmcore.orm # noqa: F401 — ensure all models are loaded +from dpmcore.orm.base import Base +from dpmcore.orm.glossary import ( + Category, + Item, + ItemCategory, + Property, + PropertyCategory, +) +from dpmcore.orm.infrastructure import DataType, Organisation, Release +from dpmcore.server.app import create_app + + +@pytest.fixture +def engine(): + eng = create_engine( + "sqlite:///:memory:", + connect_args={"check_same_thread": False}, + poolclass=StaticPool, + ) + Base.metadata.create_all(eng) + return eng + + +# ------------------------------------------------------------------ # +# Fixture 1 — non-monotonic release ids (backport scenario) +# ------------------------------------------------------------------ # +# +# Three releases whose release_id AND date order disagree with semver: +# code 4.0 -> release_id 100, date 2024-01 +# code 4.2 -> release_id 101, date 2024-06 +# code 4.1 -> release_id 200, date 2024-09 (a backport: highest id +# and latest date, but +# semver-wise between). +# +# Category MC (created at 4.0) with two items: +# A: alive from 4.0 onward. +# B: added at 4.1 (release_id 200) onward. +# +# Correct (sort_order) walk => versions {A} @4.0, {A,B} @4.1, and the +# version active at 4.2 is {A,B}. A release_id/date walk would break +# early and report {A} at 4.2. +# ------------------------------------------------------------------ # + + +@pytest.fixture +def backport_engine(engine): + s = Session(bind=engine) + s.add( + Organisation( + org_id=1, name="European Banking Authority", acronym="EBA" + ) + ) + s.flush() + + s.add_all( + [ + Release( + release_id=100, + code="4.0", + date=date(2024, 1, 1), + status="Final", + is_current=False, + ), + Release( + release_id=101, + code="4.2", + date=date(2024, 6, 1), + status="Final", + is_current=True, + ), + Release( + release_id=200, + code="4.1", + date=date(2024, 9, 1), + status="Final", + is_current=False, + ), + ] + ) + s.flush() + + s.add( + Category( + category_id=1, + code="MC", + name="Main Category", + description="", + is_enumerated=True, + is_active=True, + is_external_ref_data=False, + ref_data_source=None, + row_guid=None, + created_release=100, + owner_id=1, + ) + ) + s.flush() + + s.add_all( + [ + Item( + item_id=10, + name="Item Alpha", + description="", + is_property=False, + is_active=True, + ), + Item( + item_id=11, + name="Item Beta", + description="", + is_property=False, + is_active=True, + ), + ] + ) + s.flush() + + s.add_all( + [ + ItemCategory( + item_id=10, + start_release_id=100, # 4.0 onward + category_id=1, + code="A", + is_default_item=False, + signature="MC(A)", + end_release_id=None, + ), + ItemCategory( + item_id=11, + start_release_id=200, # 4.1 (backport id) onward + category_id=1, + code="B", + is_default_item=False, + signature="MC(B)", + end_release_id=None, + ), + ] + ) + s.commit() + s.close() + return engine + + +@pytest.fixture +def backport_client(backport_engine): + from starlette.testclient import TestClient + + app = create_app("sqlite:///:memory:", engine=backport_engine) + return TestClient(app) + + +class TestSortOrderNotReleaseId: + def test_versions_listed_in_semver_order(self, backport_client): + resp = backport_client.get("/api/v1/structure/category/*/1/*") + assert resp.status_code == 200 + cats = resp.json()["data"]["categories"] + releases = [c["release"] for c in cats] + # Semver order, not release_id (100,101,200) order. + assert releases == ["4.0", "4.1"] + + def test_item_added_by_backport_visible_at_later_release( + self, backport_client + ): + """B (added at 4.1) must be present at 4.2. + + A release_id walk breaks at the 4.1 backport (release_id 200 > + 4.2's release_id 101) and would omit B here. + """ + resp = backport_client.get("/api/v1/structure/category/*/1/4.2") + assert resp.status_code == 200 + cat = resp.json()["data"]["categories"][0] + item_ids = {i["id"] for i in cat["items"]} + assert item_ids == {10, 11} + + def test_earliest_release_excludes_later_item(self, backport_client): + resp = backport_client.get("/api/v1/structure/category/*/1/4.0") + assert resp.status_code == 200 + cat = resp.json()["data"]["categories"][0] + item_ids = {i["id"] for i in cat["items"]} + assert item_ids == {10} + + def test_latest_resolves_to_highest_semver(self, backport_client): + resp = backport_client.get("/api/v1/structure/category/*/1/~") + assert resp.status_code == 200 + cat = resp.json()["data"]["categories"][0] + # Latest semver is 4.2; active version there was created at 4.1. + assert cat["release"] == "4.1" + assert {i["id"] for i in cat["items"]} == {10, 11} + + +# ------------------------------------------------------------------ # +# Fixture 2 — enumerated property with two versions +# ------------------------------------------------------------------ # +# +# Property 100 has two `_PR` ItemCategory versions (ENUMP @4.0, +# ENUMP2 @4.1) and links to enumerated category ECAT. ECAT's members +# change across releases: +# XC: alive 4.0 only YC: alive throughout ZC: 4.1 onward +# so the enumeration is {XC,YC} @4.0 and {YC,ZC} @4.1. At release=* +# each property version must carry its own release's enumeration. +# ------------------------------------------------------------------ # + + +@pytest.fixture +def enum_versions_engine(engine): + s = Session(bind=engine) + s.add( + Organisation( + org_id=1, name="European Banking Authority", acronym="EBA" + ) + ) + s.flush() + + s.add_all( + [ + Release( + release_id=1, + code="4.0", + date=date(2024, 1, 1), + status="Final", + is_current=False, + ), + Release( + release_id=2, + code="4.1", + date=date(2024, 6, 1), + status="Final", + is_current=True, + ), + ] + ) + s.flush() + + s.add( + DataType( + data_type_id=2, + code="Enumeration", + name="Enumeration", + is_active=True, + ) + ) + s.flush() + + s.add_all( + [ + Category( + category_id=1, + code="_PR", + name="Property", + description="", + is_enumerated=False, + is_active=True, + is_external_ref_data=False, + created_release=1, + owner_id=1, + ), + Category( + category_id=10, + code="ECAT", + name="Enum Category", + description="", + is_enumerated=True, + is_active=True, + is_external_ref_data=False, + created_release=1, + owner_id=1, + ), + ] + ) + s.flush() + + s.add_all( + [ + Item( + item_id=100, + name="Enumerated Property", + description="", + is_property=True, + is_active=True, + owner_id=1, + ), + Item(item_id=101, name="X", is_property=False, is_active=True), + Item(item_id=102, name="Y", is_property=False, is_active=True), + Item(item_id=103, name="Z", is_property=False, is_active=True), + ] + ) + s.flush() + + s.add( + Property( + property_id=100, + is_composite=False, + is_metric=False, + data_type_id=2, + owner_id=1, + ) + ) + s.flush() + + s.add_all( + [ + # Two `_PR` versions of the property. + ItemCategory( + item_id=100, + start_release_id=1, + category_id=1, + code="ENUMP", + is_default_item=False, + signature="_PR(ENUMP)", + end_release_id=2, + ), + ItemCategory( + item_id=100, + start_release_id=2, + category_id=1, + code="ENUMP2", + is_default_item=False, + signature="_PR(ENUMP2)", + end_release_id=None, + ), + # Enumeration members of ECAT (exclusive end semantics). + ItemCategory( + item_id=101, + start_release_id=1, + category_id=10, + code="XC", + is_default_item=False, + signature="ECAT(XC)", + end_release_id=2, # alive 4.0 only + ), + ItemCategory( + item_id=102, + start_release_id=1, + category_id=10, + code="YC", + is_default_item=False, + signature="ECAT(YC)", + end_release_id=None, # alive throughout + ), + ItemCategory( + item_id=103, + start_release_id=2, + category_id=10, + code="ZC", + is_default_item=False, + signature="ECAT(ZC)", + end_release_id=None, # 4.1 onward + ), + ] + ) + s.flush() + + s.add( + PropertyCategory( + property_id=100, + start_release_id=1, + category_id=10, + end_release_id=None, + ) + ) + s.commit() + s.close() + return engine + + +@pytest.fixture +def enum_versions_client(enum_versions_engine): + from starlette.testclient import TestClient + + app = create_app("sqlite:///:memory:", engine=enum_versions_engine) + return TestClient(app) + + +class TestPropertyEnumerationPerVersion: + def test_each_version_carries_its_own_enumeration( + self, enum_versions_client + ): + resp = enum_versions_client.get("/api/v1/structure/property/*/100/*") + assert resp.status_code == 200 + props = resp.json()["data"]["properties"] + by_release = {p["release"]: p for p in props} + assert set(by_release) == {"4.0", "4.1"} + + codes_40 = { + i["code"] for i in by_release["4.0"]["enumeration"]["items"] + } + codes_41 = { + i["code"] for i in by_release["4.1"]["enumeration"]["items"] + } + + # Per-version windows — NOT both pinned to the last version's + # release (which would make 4.0 wrongly show {YC, ZC}). + assert codes_40 == {"XC", "YC"} + assert codes_41 == {"YC", "ZC"}