diff --git a/datajunction-server/datajunction_server/database/column.py b/datajunction-server/datajunction_server/database/column.py index 56887c9b7..f34ade845 100644 --- a/datajunction-server/datajunction_server/database/column.py +++ b/datajunction-server/datajunction_server/database/column.py @@ -3,7 +3,6 @@ from typing import TYPE_CHECKING, List, Optional, Tuple from sqlalchemy import BigInteger, ForeignKey, Index, Integer, String -from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.orm import Mapped, mapped_column, relationship from datajunction_server.database.attributetype import ColumnAttribute @@ -11,6 +10,7 @@ from datajunction_server.models.attribute import ColumnAttributes from datajunction_server.models.base import labelize from datajunction_server.models.column import ColumnTypeDecorator +from datajunction_server.models.unit import Unit, UnitTypeDecorator from datajunction_server.sql.parsing.types import ColumnType if TYPE_CHECKING: @@ -46,9 +46,13 @@ class Column(Base): # type: ignore type: Mapped[Optional[ColumnType]] = mapped_column(ColumnTypeDecorator) description: Mapped[Optional[str]] = mapped_column() - # Structured unit metadata. JSONB to accommodate atomic and compound shapes. - # Validated at the model layer via datajunction_server.models.unit.Unit. - unit: Mapped[Optional[dict]] = mapped_column(JSONB, nullable=True) + # Structured unit metadata. The TypeDecorator validates JSONB into a + # typed `Unit` (AtomicUnit | CompoundUnit) on read and canonicalizes + # Unit-or-dict input back to JSONB on write. + unit: Mapped[Optional[Unit]] = mapped_column( + UnitTypeDecorator, + nullable=True, + ) dimension_id: Mapped[Optional[int]] = mapped_column( ForeignKey("node.id", ondelete="SET NULL", name="fk_column_dimension_id_node"), diff --git a/datajunction-server/datajunction_server/database/node.py b/datajunction-server/datajunction_server/database/node.py index ac83f4dab..635cc01f3 100644 --- a/datajunction-server/datajunction_server/database/node.py +++ b/datajunction-server/datajunction_server/database/node.py @@ -67,12 +67,19 @@ from datajunction_server.models.node import ( DEFAULT_DRAFT_VERSION, BuildCriteria, + MetricUnit, NodeCursor, NodeMode, NodeStatus, ) from datajunction_server.models.node_type import NodeType from datajunction_server.models.partition import PartitionType +from datajunction_server.models.unit import ( + AtomicUnit, + CompoundUnit, + _get_unit_adapter, + structured_to_legacy_unit, +) from datajunction_server.naming import amenable_name from datajunction_server.typing import UTCDatetime from datajunction_server.utils import SEPARATOR, execute_with_retry @@ -174,6 +181,45 @@ def _build_search_score( return relevance * branch_boost * popularity +def _resolve_metric_unit_for_spec( + col_unit: "AtomicUnit | CompoundUnit | dict | None", + legacy_from_md: "MetricUnit | None", +) -> "Tuple[MetricUnit | None, AtomicUnit | CompoundUnit | None]": + """ + Decide which of (legacy enum, structured Unit) to populate on a MetricSpec + when round-tripping a metric back from the DB. + + Rules (preserves authoring intent on round-trip): + - No structured `column.unit` → emit only the legacy field (whatever was + in metric_metadata.unit); structured stays None. + - Structured `column.unit` is legacy-expressible (USD, percentage, time + codes, BYTE) → keep the legacy field as authoritative so `unit: dollar` + round-trips as `unit: dollar`, not the structured shape. + - Structured `column.unit` is NOT legacy-expressible (EUR, compound, + count-with-code, other data sizes) → populate structured, null the + legacy so nothing tries to dual-emit. + + Accepts `col_unit` as either a Unit Pydantic instance (the typed shape + returned by `UnitTypeDecorator`) or a plain dict (in-memory test paths + or untyped writes); normalizes via the adapter so the rest of the + function works on the Unit instance. + + Returns `(legacy_for_spec, structured_for_spec)`: + - `legacy_for_spec` populates `MetricSpec.unit_enum`. + - `structured_for_spec` populates `MetricSpec.unit_structured`. + """ + if col_unit is None: + return legacy_from_md, None + normalized: "AtomicUnit | CompoundUnit" + if isinstance(col_unit, dict): + normalized = _get_unit_adapter().validate_python(col_unit) + else: + normalized = col_unit + if structured_to_legacy_unit(normalized) is None: + return None, normalized + return legacy_from_md, None + + class NodeRelationship(Base): """ Join table for self-referential many-to-many relationships between nodes. @@ -545,6 +591,26 @@ async def to_spec(self, session: AsyncSession) -> NodeSpec: # Metric-specific if self.type == NodeType.METRIC: + col_unit = ( + self.current.columns[0].unit + if self.current.columns and self.current.columns[0].unit + else None + ) + # `MetricUnit.UNKNOWN` is the DB-column default for any metric + # that never had a unit authored. Treat it as "no unit" so it + # doesn't leak as `unit: unknown` on YAML re-export. + legacy_from_md = ( + self.current.metric_metadata.unit + if self.current.metric_metadata + and self.current.metric_metadata.unit + and self.current.metric_metadata.unit != MetricUnit.UNKNOWN + else None + ) + legacy_spec, structured_spec = _resolve_metric_unit_for_spec( + col_unit, + legacy_from_md, + ) + extra_kwargs.update( required_dimensions=sorted( col.name for col in self.current.required_dimensions @@ -552,12 +618,8 @@ async def to_spec(self, session: AsyncSession) -> NodeSpec: direction=self.current.metric_metadata.direction if self.current.metric_metadata else None, - unit_enum=( - self.current.metric_metadata.unit - if self.current.metric_metadata - and self.current.metric_metadata.unit - else None - ), + unit_enum=legacy_spec, + unit_structured=structured_spec, significant_digits=self.current.metric_metadata.significant_digits if self.current.metric_metadata else None, diff --git a/datajunction-server/datajunction_server/internal/deployment/orchestrator.py b/datajunction-server/datajunction_server/internal/deployment/orchestrator.py index 453d8e692..2b6e94759 100644 --- a/datajunction-server/datajunction_server/internal/deployment/orchestrator.py +++ b/datajunction-server/datajunction_server/internal/deployment/orchestrator.py @@ -81,10 +81,18 @@ from datajunction_server.models.node import ( DEFAULT_DRAFT_VERSION, DEFAULT_PUBLISHED_VERSION, + MetricUnit, NodeMode, NodeStatus, NodeType, ) +from datajunction_server.models.unit import ( + AtomicUnit, + CompoundUnit, + _get_unit_adapter, + legacy_unit_to_structured, + structured_to_legacy_unit, +) from datajunction_server.utils import ( SEPARATOR, Version, @@ -3516,17 +3524,30 @@ async def _create_node_revision( if result.spec.node_type == NodeType.METRIC: metric_spec = cast(MetricSpec, result.spec) + output_col: Column | None = None if new_revision.columns: # pragma: no branch - new_revision.columns[0].display_name = new_revision.display_name + col: Column = new_revision.columns[0] + col.display_name = new_revision.display_name + # Bridge legacy metric_metadata.unit and structured + # columns[0].unit so users on either input shape end up + # with both fields populated where expressible. See + # `_resolve_metric_unit` for the precedence rules. + col.unit = self._resolve_metric_unit(metric_spec, col.unit) + output_col = col + + legacy_unit = self._derive_legacy_unit_for_storage( + metric_spec, + output_col, + ) if ( - metric_spec.unit_enum + legacy_unit or metric_spec.direction or metric_spec.significant_digits or metric_spec.max_decimal_exponent or metric_spec.min_decimal_exponent ): new_revision.metric_metadata = MetricMetadata( - unit=metric_spec.unit_enum, + unit=legacy_unit, direction=metric_spec.direction, significant_digits=metric_spec.significant_digits, max_decimal_exponent=metric_spec.max_decimal_exponent, @@ -3546,6 +3567,103 @@ async def _create_node_revision( new_revision.required_dimensions = required_dimensions return new_revision + def _resolve_metric_unit( + self, + metric_spec: MetricSpec, + column_unit: "AtomicUnit | CompoundUnit | dict | None", + ) -> "AtomicUnit | CompoundUnit | None": + """ + Compute the canonical structured unit for a metric's output column, + reconciling the three possible input surfaces. Pure: returns the + value the caller should assign to `column.unit`; no mutation. + + Input surfaces (highest priority first): + 1. `metric_spec.unit_structured` — top-level structured `unit:` + at the metric spec level. Authored shape for the new model. + 2. `column_unit` — structured value set via the explicit + `columns[].unit` form. Supported for uniformity with + non-metric nodes, but unusual on metrics (where the column + name is auto-derived). + 3. `metric_spec.unit_enum` — legacy flat-string `unit: dollar` + form. Translated via the legacy → structured table. + + Conflict handling: if (1) is set together with (2) or (3) and the + values disagree, a warning is logged naming the node. + + Accepts dict for `column_unit` (in-memory test paths assign dicts + directly) and normalizes to Unit internally; returns Unit or None. + """ + legacy = metric_spec.unit_enum + spec_structured: AtomicUnit | CompoundUnit | None = metric_spec.unit_structured + # column.unit may arrive as Unit (typed via the SQLAlchemy decorator) + # or as a raw dict (test fixtures, fresh in-memory writes); normalize + # to a Unit for direct equality comparisons. + if isinstance(column_unit, dict): + column_structured = _get_unit_adapter().validate_python(column_unit) + else: + column_structured = column_unit + + # (1) Metric-level structured input wins absolutely. + if spec_structured is not None: + if column_structured is not None and column_structured != spec_structured: + logger.warning( + "Metric %s sets a structured unit at both the metric " + "spec level (%r) and on columns[].unit (%r); the " + "metric-level value wins. Remove one to silence this.", + metric_spec.rendered_name, + spec_structured, + column_structured, + ) + if legacy is not None and legacy != MetricUnit.UNKNOWN: + logger.warning( + "Metric %s sets both a structured unit (%r) and the " + "legacy unit_enum (%s); structured value wins.", + metric_spec.rendered_name, + spec_structured, + legacy.name, + ) + return spec_structured + + # (2) columns[].unit fallback. + if column_structured is not None: + if legacy is not None and legacy != MetricUnit.UNKNOWN: + logger.warning( + "Metric %s sets both metric_metadata.unit (%s) and " + "columns[].unit (%r); structured value wins.", + metric_spec.rendered_name, + legacy.name, + column_structured, + ) + return column_structured + + # (3) Legacy translation. + return legacy_unit_to_structured(legacy) + + def _derive_legacy_unit_for_storage( + self, + metric_spec: MetricSpec, + output_col: Column | None, + ) -> MetricUnit | None: + """ + Compute the value to write to the legacy `metricmetadata.unit` DB + column given the canonical structured `output_col.unit`. Dual-writing + the legacy column keeps it in sync with `column.unit` so: + - API consumers reading `metric_metadata.unit` keep seeing values. + - A code rollback to a release that reads only the legacy column + still finds the right data for everything the enum can express. + + Returns None when the structured value has no legacy equivalent + (non-USD currencies, compound units, data sizes, count with code) — + the caller writes NULL to `metricmetadata.unit` in that case. + """ + structured = output_col.unit if output_col is not None else None + if structured is not None: + return structured_to_legacy_unit(structured) + # No structured unit — fall back to whatever the legacy spec field + # had (typically None at this point because the caller will have + # already assigned the resolved column.unit from _resolve_metric_unit). + return metric_spec.unit_enum + def _create_column_from_spec( self, col: ColumnSpec, @@ -3558,7 +3676,7 @@ def _create_column_from_spec( display_name=col.display_name, description=col.description, order=order, - unit=col.unit.model_dump() if col.unit is not None else None, + unit=col.unit, attributes=[ ColumnAttribute( attribute_type=self.registry.attributes.get(attr), diff --git a/datajunction-server/datajunction_server/internal/namespaces.py b/datajunction-server/datajunction_server/internal/namespaces.py index 791968406..2e59eeadd 100644 --- a/datajunction-server/datajunction_server/internal/namespaces.py +++ b/datajunction-server/datajunction_server/internal/namespaces.py @@ -1445,7 +1445,14 @@ def _has_column_customizations(col: dict) -> bool: "attributes", [], ) - return has_custom_display or has_attributes or has_description or has_partition + has_unit = bool(col.get("unit")) + return ( + has_custom_display + or has_attributes + or has_description + or has_partition + or has_unit + ) def _merge_columns_preserving_comments(existing_list, new_list, is_cube=False): @@ -1737,6 +1744,10 @@ def _node_spec_to_yaml_dict(node_spec, include_all_columns=False) -> dict: # Filter columns to only include meaningful customizations # Special case for cubes: ALWAYS only export columns with partitions + # Metric specs exclude `columns` from model_dump (`columns: ... exclude=True`), + # so metric YAML never carries per-column entries — the metric's unit is + # emitted at the spec top level via the `unit:` field. No suppression + # needed here for metrics. # For other nodes: respect include_all_columns flag to preserve comments if "columns" in data and data["columns"] is not None: is_cube = data.get("node_type") == "cube" diff --git a/datajunction-server/datajunction_server/internal/nodes.py b/datajunction-server/datajunction_server/internal/nodes.py index c8fd3e2ff..d7361eac3 100644 --- a/datajunction-server/datajunction_server/internal/nodes.py +++ b/datajunction-server/datajunction_server/internal/nodes.py @@ -583,6 +583,26 @@ async def create_node_revision( if node_revision.type == NodeType.METRIC: if node_revision.columns: node_revision.columns[0].display_name = node_revision.display_name + # Bridge legacy metric_metadata.unit onto columns[0].unit so this + # endpoint produces the same DB state as the deployment + # orchestrator. Without this, metrics created here have no + # column.unit while the same node copied via branch fast-path + # (which routes through the orchestrator) does — causing spec + # drift that surfaces as cube column inheritance mismatches. + if ( + node_revision.metric_metadata + and node_revision.metric_metadata.unit + and node_revision.columns[0].unit is None + ): + from datajunction_server.models.unit import ( + legacy_unit_to_structured, + ) + + structured = legacy_unit_to_structured( + node_revision.metric_metadata.unit, + ) + if structured is not None: + node_revision.columns[0].unit = structured node_revision.catalog_id = catalog_id return node_revision @@ -634,6 +654,7 @@ async def create_cube_node_revision( if referenced_node.type == NodeType.METRIC else col.display_name, type=col.type, + unit=col.unit, attributes=[ ColumnAttribute(attribute_type_id=attr.attribute_type_id) for attr in col.attributes diff --git a/datajunction-server/datajunction_server/models/deployment.py b/datajunction-server/datajunction_server/models/deployment.py index 990188744..3a40a9eb4 100644 --- a/datajunction-server/datajunction_server/models/deployment.py +++ b/datajunction-server/datajunction_server/models/deployment.py @@ -4,10 +4,11 @@ Field, PrivateAttr, ConfigDict, + TypeAdapter, model_validator, ) -from typing import Annotated, Any, Literal, Optional, Union +from typing import Annotated, Any, ClassVar, Literal, Optional, Union from datajunction_server.models.partition import Granularity, PartitionType from datajunction_server.errors import ( DJInvalidDeploymentConfig, @@ -27,7 +28,11 @@ NodeStatus, NodeType, ) -from datajunction_server.models.unit import Unit +from datajunction_server.models.unit import ( + Unit, + legacy_unit_to_structured, + unit_to_dict, +) from datajunction_server.utils import SEPARATOR @@ -425,7 +430,18 @@ def __eq__(self, other: Any) -> bool: class MetricSpec(NodeSpec): """ - Specification for a metric node + Specification for a metric node. + + The `unit` input field accepts either of two shapes: + - **Legacy flat string** (`unit: dollar`) — translated via the + `MetricUnit` enum. Bounded to legacy values. + - **Structured dict** (`unit: {kind: currency, code: USD}` or + `unit: {numerator: ..., denominator: ...}`) — the same shape as + `ColumnSpec.unit`, but authored at the metric level so users don't + need to know the metric's output column name. + + Internally these are stored separately (`unit_enum`, `unit_structured`) + and reconciled at deploy time onto the metric's single output column. """ node_type: Literal[NodeType.METRIC] = NodeType.METRIC @@ -436,29 +452,70 @@ class MetricSpec(NodeSpec): required_dimensions: list[str] | None = None # Field(default_factory=list) direction: MetricDirection | None = None unit_enum: MetricUnit | None = Field(default=None, exclude=True) + # Structured unit form at the metric level — peer of `unit_enum`. + # Only one of `unit_enum` / `unit_structured` is set per spec (the + # __init__ dispatches by input shape). + unit_structured: Unit | None = Field(default=None, exclude=True) significant_digits: int | None = None min_decimal_exponent: int | None = None max_decimal_exponent: int | None = None + # Class-level adapter used by __init__ to eagerly validate structured + # unit input. `ClassVar` keeps Pydantic from treating it as a field. + _unit_adapter: ClassVar[TypeAdapter] = TypeAdapter(Unit) + def __init__(self, **data: Any): unit = data.pop("unit", None) - if unit: + # Empty string and empty dict both mean "no unit authored" — match + # the historical `if unit:` permissive gate so existing YAML files + # with `unit: ""` or `unit: {}` (e.g. template residue) keep parsing. + if unit is None or unit == "" or unit == {}: + pass + elif isinstance(unit, MetricUnit): + data["unit_enum"] = unit + elif isinstance(unit, str): try: - if isinstance(unit, MetricUnit): - data["unit_enum"] = unit - else: - data["unit_enum"] = MetricUnit[ # pragma: no cover - unit.strip().upper() - ] - except KeyError: # pragma: no cover + data["unit_enum"] = MetricUnit[unit.strip().upper()] + except KeyError: raise DJInvalidInputException(f"Invalid metric unit: {unit}") + elif isinstance(unit, dict): + # Validate eagerly so users get a clean DJInvalidInputException + # (with the offending dict echoed) instead of a noisy Pydantic + # ValidationError that leaks internal field names like + # `unit_structured.atomic.kind`. + try: + MetricSpec._unit_adapter.validate_python(unit) + except Exception as exc: + raise DJInvalidInputException( + f"Invalid metric unit {unit!r}: {exc}", + ) from exc + data["unit_structured"] = unit + else: + raise DJInvalidInputException( + f"Metric unit must be a string or a structured dict; " + f"got {type(unit).__name__}", + ) super().__init__(**data) @property - def unit(self) -> str | None: - """Return lowercased unit name for JSON serialization.""" - if self.unit_enum is None: # pragma: no cover + def unit(self) -> str | dict | None: + """ + Return the canonical metric unit value for serialization. + + Returns: + - `None` if no unit is set. + - A structured dict if the metric was authored with a structured + unit at the spec level. + - The legacy lowercase enum name (e.g. `"dollar"`) otherwise. + + Output consumers that need a structured value regardless of input + shape should read `column.unit` on the metric's output column. + """ + if self.unit_structured is not None: + # Canonical dict shape (JSON-friendly, no None values). + return unit_to_dict(self.unit_structured) + if self.unit_enum is None or self.unit_enum == MetricUnit.UNKNOWN: return None return self.unit_enum.value.name.lower() @@ -467,6 +524,22 @@ def model_dump(self, **kwargs): # pragma: no cover base["unit"] = self.unit return base + def _canonical_unit(self) -> "Unit | None": + """ + Reduce both legacy and structured inputs to the same canonical Unit + instance for equality comparisons. Returns None when the metric has + no unit (or only the UNKNOWN sentinel). Two specs that author the + same conceptual unit via different input shapes (`unit: dollar` vs + `unit: {kind: currency, code: USD}`) produce equal frozen Unit + instances — so __eq__ doesn't falsely report drift between YAML and + DB-roundtripped specs. + """ + if self.unit_structured is not None: + return self.unit_structured + if self.unit_enum is None or self.unit_enum == MetricUnit.UNKNOWN: + return None + return legacy_unit_to_structured(self.unit_enum) + def __eq__(self, other: Any) -> bool: if not isinstance(other, MetricSpec): return False @@ -475,7 +548,7 @@ def __eq__(self, other: Any) -> bool: and self.query_ast.compare(other.query_ast) and (self.required_dimensions or []) == (other.required_dimensions or []) and eq_or_fallback(self.direction, other.direction, MetricDirection.NEUTRAL) - and eq_or_fallback(self.unit, other.unit, MetricUnit.UNKNOWN.value.name) + and self._canonical_unit() == other._canonical_unit() and self.significant_digits == other.significant_digits and self.min_decimal_exponent == other.min_decimal_exponent and self.max_decimal_exponent == other.max_decimal_exponent diff --git a/datajunction-server/datajunction_server/models/metric.py b/datajunction-server/datajunction_server/models/metric.py index 1fd640f49..ec690966a 100644 --- a/datajunction-server/datajunction_server/models/metric.py +++ b/datajunction-server/datajunction_server/models/metric.py @@ -16,6 +16,7 @@ ) from datajunction_server.models.query import ColumnMetadata, V3ColumnMetadata from datajunction_server.models.sql import ScanEstimate, TranspiledSQL +from datajunction_server.models.unit import unit_to_dict from datajunction_server.sql.decompose import MetricComponentExtractor from datajunction_server.sql.parsing.backends.antlr4 import ast, parse from datajunction_server.transpilation import transpile_sql @@ -42,6 +43,11 @@ class Metric(BaseModel): dimensions: List[DimensionAttributeOutput] metric_metadata: Optional[MetricMetadataOutput] = None + # Structured metric-level unit, derived from the metric's output column. + # `metric_metadata.unit` remains the legacy flat-string field for + # back-compat; `unit` is the structured shape and is the source of truth + # going forward. `None` when no unit is set, regardless of input shape. + unit: Optional[Dict] = None required_dimensions: List[str] incompatible_druid_functions: List[str] @@ -88,6 +94,9 @@ async def parse_node( expression=str(query_ast.select.projection[0]), dimensions=dims, metric_metadata=node.current.metric_metadata, + unit=unit_to_dict( + node.current.columns[0].unit if node.current.columns else None, + ), required_dimensions=[dim.name for dim in node.current.required_dimensions], incompatible_druid_functions=incompatible_druid_functions, measures=measures, diff --git a/datajunction-server/datajunction_server/models/node.py b/datajunction-server/datajunction_server/models/node.py index f59ee4aea..a7c288c9d 100644 --- a/datajunction-server/datajunction_server/models/node.py +++ b/datajunction-server/datajunction_server/models/node.py @@ -32,6 +32,11 @@ from datajunction_server.models.materialization import MaterializationConfigOutput from datajunction_server.models.node_type import NodeNameOutput, NodeType from datajunction_server.models.partition import PartitionOutput +from datajunction_server.models.unit import ( + AtomicUnit, + CompoundUnit, + unit_to_dict, +) from datajunction_server.models.tag import TagMinimum, TagOutput from datajunction_server.models.user import UserNameOnly from datajunction_server.sql.parsing.types import ColumnType @@ -793,6 +798,15 @@ class ColumnOutput(BaseModel): def extract_type(cls, raw): return str(raw) + @field_validator("unit", mode="before") + def _canonicalize_unit( + cls, + raw: "AtomicUnit | CompoundUnit | dict | None", + ) -> dict | None: + """Accept either a typed Unit (from the SQLAlchemy decorator) or a + raw dict; emit the canonical JSON-friendly dict shape on output.""" + return unit_to_dict(raw) + class SourceColumnOutput(BaseModel): """ diff --git a/datajunction-server/datajunction_server/models/unit.py b/datajunction-server/datajunction_server/models/unit.py index e625867b9..b4b259cbe 100644 --- a/datajunction-server/datajunction_server/models/unit.py +++ b/datajunction-server/datajunction_server/models/unit.py @@ -10,9 +10,16 @@ import re from enum import Enum -from typing import Annotated, Any, Union +from typing import TYPE_CHECKING, Annotated, Any, Union -from pydantic import BaseModel, Discriminator, Tag, model_validator +from pydantic import BaseModel, ConfigDict, Discriminator, Tag, model_validator +from sqlalchemy import TypeDecorator +from sqlalchemy.dialects.postgresql import JSONB + +if TYPE_CHECKING: + from pydantic import TypeAdapter + + from datajunction_server.models.node import MetricUnit class UnitKind(str, Enum): @@ -94,6 +101,10 @@ class AtomicUnit(BaseModel): instance (currency code, time scale, count label, etc.). """ + # Frozen so shared singletons (e.g., the legacy translation table) are + # safe to hand out without defensive copies. + model_config = ConfigDict(frozen=True) + kind: UnitKind code: str | None = None @@ -174,6 +185,8 @@ class CompoundUnit(BaseModel): (CTR = clicks/impressions, QPS = queries/second, throughput = bytes/second). """ + model_config = ConfigDict(frozen=True) + numerator: AtomicUnit denominator: AtomicUnit @@ -202,3 +215,195 @@ def _unit_discriminator(value: Any) -> str: ], Discriminator(_unit_discriminator), ] + + +def unit_to_dict(unit: "AtomicUnit | CompoundUnit | dict | None") -> dict | None: + """ + Canonical JSON-friendly dict form of a Unit. Used by every storage and + comparison path so the on-disk shape is stable regardless of input: + + - `mode="json"` so UnitKind enum members render as plain strings. + - `exclude_none=True` so `code: None` doesn't appear on dimensionless + kinds (unitless / percentage / proportion / count-without-code), + keeping shapes consistent with the legacy translation table. + + Accepts either a Pydantic model or an already-dict input — the dict + path is a no-op pass-through after a defensive copy. + """ + if unit is None: + return None + if isinstance(unit, BaseModel): + return unit.model_dump(mode="json", exclude_none=True) + # Already a dict; normalize by stripping any None values one level deep + # so a hand-rolled dict matches the canonical shape. + if isinstance(unit, dict): + return _strip_none(unit) + raise TypeError( # pragma: no cover + f"unit_to_dict expected Unit | dict | None, got {type(unit).__name__}", + ) + + +def _strip_none(value: Any) -> Any: + """Drop None-valued keys recursively from a dict; pass through otherwise.""" + if isinstance(value, dict): + return {k: _strip_none(v) for k, v in value.items() if v is not None} + return value + + +# Type adapter used by the SQLAlchemy TypeDecorator to validate JSONB rows +# into Unit instances on read. Built once at module load. +_UNIT_ADAPTER: "TypeAdapter[Unit]" = None # type: ignore[assignment] + + +def _get_unit_adapter(): + """Lazy accessor — defer TypeAdapter construction until first use so + importing `unit.py` stays cheap and avoids any circular-import risk.""" + global _UNIT_ADAPTER + if _UNIT_ADAPTER is None: + from pydantic import TypeAdapter + + _UNIT_ADAPTER = TypeAdapter(Unit) + return _UNIT_ADAPTER + + +class UnitTypeDecorator(TypeDecorator): + """ + SQLAlchemy TypeDecorator that bridges the structured ``Unit`` pydantic + model and the underlying JSONB storage for ``column.unit``. + + On write: accepts a ``Unit`` instance or a raw dict and normalizes to + the canonical JSONB shape via ``unit_to_dict``. + + On read: validates the JSONB dict into a ``Unit`` (``AtomicUnit`` or + ``CompoundUnit``) so callers get typed access — ``.kind``, ``.code``, + ``.label()`` — without stringly-typed dict lookups. + + Mirrors the existing ``ColumnTypeDecorator`` pattern. Cheaper than the + SQL-type variant because the decoded value is small and Pydantic 2's + Rust-backed validator handles it in microseconds. + """ + + impl = JSONB + cache_ok = True + + def process_bind_param(self, value, dialect): + return unit_to_dict(value) + + def process_result_value(self, value, dialect): + if value is None: + return None + return _get_unit_adapter().validate_python(value) + + +# ------------------------------------------------------------------------- +# Legacy <-> structured translation. +# +# The legacy `MetricUnit` enum (datajunction_server.models.node.MetricUnit) +# was a flat one-per-(kind, denomination) enumeration applied only to metric +# nodes. The new structured `Unit` lives on every column. These functions +# translate between the two so that: +# - existing YAML / API input using the legacy `unit: ` keeps +# parsing into the canonical `column.unit` storage. +# - the legacy `metricmetadata.unit` DB column can be dual-written from +# `column.unit` to keep API consumers reading the legacy field happy. +# - the legacy `metric_metadata.unit` API field can be derived from +# `column.unit` for downstream consumers (the reverse function lands here +# so it lives next to its inverse). +# +# Translation is intentionally lossy in the reverse direction: structured +# values the legacy enum can't represent (non-USD currencies, compound +# units, data sizes, count-with-code) map to None. The legacy column simply +# isn't populated for those. +# ------------------------------------------------------------------------- + +# Keyed by MetricUnit.name (not the enum member itself) so this module can +# avoid importing node.py at module load. Callers translate to/from the enum +# at the call site. Values are frozen Unit instances — safe to hand out +# without defensive copies. +_LEGACY_NAME_TO_STRUCTURED: "dict[str, AtomicUnit | None]" = { + "UNKNOWN": None, + "UNITLESS": AtomicUnit(kind=UnitKind.UNITLESS), + "PERCENTAGE": AtomicUnit(kind=UnitKind.PERCENTAGE), + "PROPORTION": AtomicUnit(kind=UnitKind.PROPORTION), + "DOLLAR": AtomicUnit(kind=UnitKind.CURRENCY, code="USD"), + "MILLISECOND": AtomicUnit(kind=UnitKind.TIME, code="ms"), + "SECOND": AtomicUnit(kind=UnitKind.TIME, code="s"), + "MINUTE": AtomicUnit(kind=UnitKind.TIME, code="min"), + "HOUR": AtomicUnit(kind=UnitKind.TIME, code="h"), + "DAY": AtomicUnit(kind=UnitKind.TIME, code="d"), + "WEEK": AtomicUnit(kind=UnitKind.TIME, code="wk"), + "MONTH": AtomicUnit(kind=UnitKind.TIME, code="mo"), + "YEAR": AtomicUnit(kind=UnitKind.TIME, code="yr"), + "BYTE": AtomicUnit(kind=UnitKind.DATA_SIZE, code="B"), + # BIT has no entry in DATA_SIZE_CODES (which uses byte-based units like + # B, KB, MB, ... and their binary cousins KiB, MiB). Bits are atypical + # in BI / data-platform metrics; if a user appears, add "b" to + # DATA_SIZE_CODES and {"BIT": AtomicUnit(kind=DATA_SIZE, code="b")} here. +} + + +def legacy_unit_to_structured( + legacy: "MetricUnit | None", +) -> "AtomicUnit | None": + """ + Translate a legacy `MetricUnit` enum value into a structured `Unit`. + + Returns None for `MetricUnit.UNKNOWN` and for `None`, since both mean + "no unit set." Returns `AtomicUnit(kind=unitless)` for + `MetricUnit.UNITLESS`, preserving the distinction between "explicitly + no unit" and "not set." + """ + if legacy is None: + return None + return _LEGACY_NAME_TO_STRUCTURED.get(legacy.name) + + +def structured_to_legacy_unit( + unit: "AtomicUnit | CompoundUnit | dict | None", +) -> "MetricUnit | None": + """ + Translate a structured `Unit` (Pydantic model or already-dict) back to + the legacy `MetricUnit` enum value when expressible. Returns None when + the structured value has no legacy equivalent (non-USD currencies, + compound units, data sizes other than BYTE, count with code, etc.) — + callers should treat that as "don't populate the legacy column." + + `MetricUnit` is imported locally to keep `models/unit.py` free of a + module-top dependency on `models/node.py`, where `MetricUnit` lives. + """ + # Local import: `models/node.py` imports `unit_to_dict` from this module + # at module-load, so a top-level reverse import would cycle. + from datajunction_server.models.node import MetricUnit + + if unit is None: + return None + # Normalize dict input to a Unit instance for typed attribute access. + # A malformed dict (e.g., an unknown time code that slipped through a + # direct DB write) has no legacy equivalent — treat it as unmapped. + if isinstance(unit, dict): + try: + unit = _get_unit_adapter().validate_python(unit) + except Exception: + return None + if isinstance(unit, CompoundUnit): + return None + assert isinstance(unit, AtomicUnit) # narrowed by isinstance above + adapter + kind = unit.kind + code = unit.code + if kind == UnitKind.UNITLESS: + return MetricUnit.UNITLESS + if kind == UnitKind.PERCENTAGE: + return MetricUnit.PERCENTAGE + if kind == UnitKind.PROPORTION: + return MetricUnit.PROPORTION + if kind == UnitKind.CURRENCY: + return MetricUnit.DOLLAR if code == "USD" else None + if kind in (UnitKind.TIME, UnitKind.DATA_SIZE): + # Reverse of _LEGACY_NAME_TO_STRUCTURED — direct equality on + # frozen Unit instances. + for legacy_name, structured in _LEGACY_NAME_TO_STRUCTURED.items(): + if structured == unit: + return MetricUnit[legacy_name] + return None + # count — free-form code, no legacy equivalent. + return None diff --git a/datajunction-server/tests/api/namespaces_test.py b/datajunction-server/tests/api/namespaces_test.py index 2c311eb05..2022692cc 100644 --- a/datajunction-server/tests/api/namespaces_test.py +++ b/datajunction-server/tests/api/namespaces_test.py @@ -854,6 +854,7 @@ async def test_export_namespaces_deployment(client_with_roads: AsyncClient): "display_name": "Num Repair Orders", "name": "${prefix}num_repair_orders", "type": "bigint", + "unit": {"kind": "currency", "code": "USD"}, }, { "attributes": [], diff --git a/datajunction-server/tests/api/nodes_test.py b/datajunction-server/tests/api/nodes_test.py index 24975bd3d..8f912b30a 100644 --- a/datajunction-server/tests/api/nodes_test.py +++ b/datajunction-server/tests/api/nodes_test.py @@ -2959,6 +2959,37 @@ async def test_update_metric_node(self, client_with_roads: AsyncClient): data = response.json() assert data["required_dimensions"] == ["repair_order_id"] + @pytest.mark.asyncio + async def test_metric_unknown_unit_does_not_set_column_unit( + self, + client_with_roads: AsyncClient, + ): + """ + MetricUnit.UNKNOWN maps to None in legacy_unit_to_structured, so setting + unit: "unknown" via metric_metadata must not write anything to column.unit. + Covers the structured-is-None branch in internal/nodes.py (line 604). + """ + response = await client_with_roads.post( + "/nodes/metric/", + json={ + "name": "default.test_unknown_unit_metric", + "description": "Test metric with unknown unit", + "query": "SELECT count(repair_order_id) FROM default.repair_orders", + "mode": "published", + "metric_metadata": { + "unit": "unknown", + }, + }, + ) + assert response.status_code in (200, 201) + node = response.json() + col = next( + c + for c in node["columns"] + if c["name"] == "default_DOT_test_unknown_unit_metric" + ) + assert col.get("unit") is None + @pytest.mark.asyncio async def test_create_dimension_node_fails( self, diff --git a/datajunction-server/tests/internal/deployment/orchestration_test.py b/datajunction-server/tests/internal/deployment/orchestration_test.py index 626a7f796..b362182af 100644 --- a/datajunction-server/tests/internal/deployment/orchestration_test.py +++ b/datajunction-server/tests/internal/deployment/orchestration_test.py @@ -44,7 +44,7 @@ DimensionJoinLinkSpec, DimensionReferenceLinkSpec, ) -from datajunction_server.models.node import NodeStatus +from datajunction_server.models.node import MetricUnit, NodeStatus from datajunction_server.database.namespace import NodeNamespace from datajunction_server.database.user import OAuthProvider, User from datajunction_server.database.tag import Tag @@ -2673,3 +2673,234 @@ async def test_auto_register_sources_uses_sqlalchemy_for_system_catalog( assert {"id", "name"} <= col_names # The query service must NOT have been called. mock_query_client.get_columns_for_tables_batch.assert_not_called() + + +# ------------------------------------------------------------------------- +# Legacy/structured unit bridging in the orchestrator metric-deploy path. +# ------------------------------------------------------------------------- + + +class TestReconcileMetricUnit: + """ + Tests for `_resolve_metric_unit` and `_derive_legacy_unit_for_storage`. + These bridge the legacy `MetricSpec.unit_enum` (from YAML `unit: dollar` + or API `metric_metadata.unit`) and the structured `column.unit` so users + on either input shape end up with consistent storage. + + Called as unbound methods with a MagicMock for self — the helpers don't + touch self state, they just operate on the spec and column. + """ + + @staticmethod + def _make_spec(unit: MetricUnit | dict | None = None) -> MetricSpec: + kwargs: dict = { + "name": "m", + "namespace": "default", + "query": "SELECT 1", + } + if unit is not None: + kwargs["unit"] = unit + return MetricSpec(**kwargs) + + @staticmethod + def _resolve(spec: MetricSpec, column_unit: dict | None) -> dict | None: + """Resolve and convert to canonical dict so tests can assert against + plain dict literals. The resolver itself returns Unit; we coerce to + dict here for assertion readability.""" + from datajunction_server.models.unit import unit_to_dict + + return unit_to_dict( + DeploymentOrchestrator._resolve_metric_unit( + MagicMock(), + spec, + column_unit, + ), + ) + + def test_legacy_only_resolves_to_translated_dict(self): + spec = self._make_spec(unit=MetricUnit.DOLLAR) + assert self._resolve(spec, None) == {"kind": "currency", "code": "USD"} + + def test_structured_only_resolves_to_column_unit(self): + spec = self._make_spec() # no legacy unit + assert self._resolve(spec, {"kind": "percentage"}) == {"kind": "percentage"} + + def test_both_set_structured_wins_and_warns(self, caplog): + import logging + + spec = self._make_spec(unit=MetricUnit.DOLLAR) + with caplog.at_level( + logging.WARNING, + logger="datajunction_server.internal.deployment.orchestrator", + ): + resolved = self._resolve(spec, {"kind": "currency", "code": "EUR"}) + assert resolved == {"kind": "currency", "code": "EUR"} + assert any( + "sets both metric_metadata.unit" in rec.message for rec in caplog.records + ) + + def test_neither_set_resolves_to_none(self): + spec = self._make_spec() + assert self._resolve(spec, None) is None + + def test_unknown_legacy_resolves_to_none(self): + # MetricUnit.UNKNOWN translates to None — should not produce any unit. + spec = self._make_spec(unit=MetricUnit.UNKNOWN) + assert self._resolve(spec, None) is None + + def test_unitless_legacy_preserves_distinction(self): + # UNITLESS → {kind: unitless} (explicitly no unit), not None. + spec = self._make_spec(unit=MetricUnit.UNITLESS) + assert self._resolve(spec, None) == {"kind": "unitless"} + + def test_metric_level_structured_overrides_legacy(self): + spec = self._make_spec(unit={"kind": "currency", "code": "EUR"}) + spec.unit_enum = MetricUnit.DOLLAR # ignored when structured is set + assert self._resolve(spec, None) == {"kind": "currency", "code": "EUR"} + + def test_metric_level_structured_overrides_column_unit_with_warning(self, caplog): + import logging + + spec = self._make_spec(unit={"kind": "currency", "code": "EUR"}) + with caplog.at_level( + logging.WARNING, + logger="datajunction_server.internal.deployment.orchestrator", + ): + resolved = self._resolve(spec, {"kind": "percentage"}) + assert resolved == {"kind": "currency", "code": "EUR"} + assert any("metric-level value wins" in rec.message for rec in caplog.records) + + def test_metric_level_structured_matching_column_unit_no_warning(self, caplog): + # When metric-level structured equals columns[].unit, no warning fires. + import logging + + same = {"kind": "currency", "code": "USD"} + spec = self._make_spec(unit=dict(same)) + with caplog.at_level( + logging.WARNING, + logger="datajunction_server.internal.deployment.orchestrator", + ): + resolved = self._resolve(spec, dict(same)) + assert resolved == same + assert not any( + "metric-level value wins" in rec.message for rec in caplog.records + ) + + def test_metric_level_structured_warns_when_combined_with_legacy(self, caplog): + import logging + + spec = self._make_spec(unit=MetricUnit.DOLLAR) + # Override the unit_structured field directly to simulate "both + # fields set" — production code wouldn't normally set both at once, + # but the resolver must handle it gracefully. + from datajunction_server.models.unit import AtomicUnit, UnitKind + + spec.unit_structured = AtomicUnit(kind=UnitKind.CURRENCY, code="EUR") + with caplog.at_level( + logging.WARNING, + logger="datajunction_server.internal.deployment.orchestrator", + ): + resolved = self._resolve(spec, None) + assert resolved == {"kind": "currency", "code": "EUR"} + assert any( + "structured value wins" in rec.message and "legacy" in rec.message + for rec in caplog.records + ) + + +class TestDeriveLegacyUnitForStorage: + """ + Tests for `_derive_legacy_unit_for_storage`, the reverse half of the + bridge: reads the canonical `column.unit` and returns the value to + dual-write to `metricmetadata.unit`. Returns None when the structured + value has no legacy equivalent. + """ + + @staticmethod + def _spec(unit: MetricUnit | None = None) -> MetricSpec: + kwargs: dict = {"name": "m", "namespace": "default", "query": "SELECT 1"} + if unit is not None: + kwargs["unit"] = unit + return MetricSpec(**kwargs) + + @staticmethod + def _col(unit: dict | None) -> Column: + return Column(name="value", type=None, order=0, unit=unit) + + def test_structured_with_legacy_equivalent_returns_enum(self): + col = self._col({"kind": "currency", "code": "USD"}) + result = DeploymentOrchestrator._derive_legacy_unit_for_storage( + MagicMock(), + self._spec(), + col, + ) + assert result == MetricUnit.DOLLAR + + def test_non_usd_currency_returns_none(self): + col = self._col({"kind": "currency", "code": "EUR"}) + result = DeploymentOrchestrator._derive_legacy_unit_for_storage( + MagicMock(), + self._spec(), + col, + ) + assert result is None + + def test_compound_returns_none(self): + col = self._col( + { + "numerator": {"kind": "count"}, + "denominator": {"kind": "time", "code": "s"}, + }, + ) + result = DeploymentOrchestrator._derive_legacy_unit_for_storage( + MagicMock(), + self._spec(), + col, + ) + assert result is None + + def test_data_size_returns_none(self): + col = self._col({"kind": "data_size", "code": "MB"}) + result = DeploymentOrchestrator._derive_legacy_unit_for_storage( + MagicMock(), + self._spec(), + col, + ) + assert result is None + + def test_count_with_code_returns_none(self): + col = self._col({"kind": "count", "code": "clicks"}) + result = DeploymentOrchestrator._derive_legacy_unit_for_storage( + MagicMock(), + self._spec(), + col, + ) + assert result is None + + def test_no_structured_falls_back_to_spec_legacy(self): + # When column.unit is None, fall back to whatever the legacy spec + # field held (covers the rare case where _reconcile didn't run). + col = self._col(None) + result = DeploymentOrchestrator._derive_legacy_unit_for_storage( + MagicMock(), + self._spec(unit=MetricUnit.HOUR), + col, + ) + assert result == MetricUnit.HOUR + + def test_no_structured_no_legacy_returns_none(self): + col = self._col(None) + result = DeploymentOrchestrator._derive_legacy_unit_for_storage( + MagicMock(), + self._spec(), + col, + ) + assert result is None + + def test_no_column_returns_spec_legacy(self): + result = DeploymentOrchestrator._derive_legacy_unit_for_storage( + MagicMock(), + self._spec(unit=MetricUnit.SECOND), + None, + ) + assert result == MetricUnit.SECOND diff --git a/datajunction-server/tests/internal/legacy_unit_compat_test.py b/datajunction-server/tests/internal/legacy_unit_compat_test.py new file mode 100644 index 000000000..1b488e0cd --- /dev/null +++ b/datajunction-server/tests/internal/legacy_unit_compat_test.py @@ -0,0 +1,431 @@ +""" +End-to-end compatibility test for the legacy flat-string `unit:` form +(e.g. `unit: dollar`) — the shape every metric YAML used before the unit +lift to `column.unit`. Verifies: + + 1. The YAML parses through `MetricSpec` without errors. + 2. The orchestrator's reconcile + derive helpers populate both the + canonical `column.unit` (structured) and the legacy `metricmetadata.unit` + (enum) consistently. + 3. The reverse direction — reading from DB state back into a spec — + reproduces the original legacy `unit: dollar` shape (not the structured + dict), preserving the user's authoring intent for round-trip stability. + 4. `node_spec_to_yaml` emits `unit: dollar` (legacy string), not a + structured dict, when the value is legacy-expressible. +""" + +from unittest.mock import MagicMock + +import pytest +import yaml + +from datajunction_server.database.column import Column +from datajunction_server.internal.deployment.orchestrator import ( + DeploymentOrchestrator, +) +from datajunction_server.internal.namespaces import node_spec_to_yaml +from datajunction_server.models.deployment import MetricSpec +from datajunction_server.models.node import MetricUnit +from datajunction_server.models.unit import ( + AtomicUnit, + UnitKind, + legacy_unit_to_structured, + structured_to_legacy_unit, + unit_to_dict, +) + + +# A representative legacy metric YAML — flat-string `unit:` at the metric +# spec top level, no `columns:` block, no structured unit. This is the +# shape every existing metric YAML uses. +LEGACY_METRIC_YAML = """\ +name: legacy_compat.total_revenue +node_type: metric +display_name: Total Revenue (Legacy) +description: Sum of all transaction amounts (authored with legacy unit field). +direction: higher_is_better +unit: dollar +significant_digits: 2 +query: SELECT SUM(amount) FROM legacy_compat.transactions +""" + + +def _make_col(unit: dict | None = None) -> Column: + """Minimal Column constructed without a session (matches existing helper tests).""" + return Column(name="value", type=None, order=0, unit=unit) + + +class TestLegacyYamlEndToEnd: + """ + End-to-end: legacy YAML → MetricSpec → orchestrator helpers → DB state + → MetricSpec (round-trip via to_spec equivalent) → YAML export. + """ + + @pytest.fixture + def spec(self) -> MetricSpec: + return MetricSpec(**yaml.safe_load(LEGACY_METRIC_YAML)) + + def test_parses_into_unit_enum_not_structured(self, spec: MetricSpec): + """Legacy YAML routes to unit_enum; structured field remains None.""" + assert spec.unit_enum == MetricUnit.DOLLAR + assert spec.unit_structured is None + # `spec.unit` returns the legacy string form for back-compat output. + assert spec.unit == "dollar" + + def test_other_metric_fields_preserved(self, spec: MetricSpec): + """Non-unit metric fields still parse correctly alongside the legacy unit.""" + assert spec.rendered_name == "legacy_compat.total_revenue" + assert spec.direction is not None + assert spec.direction.value == "higher_is_better" + assert spec.significant_digits == 2 + + def test_orchestrator_resolves_legacy_to_structured(self, spec: MetricSpec): + """The resolver translates the legacy DOLLAR into the structured form.""" + resolved = DeploymentOrchestrator._resolve_metric_unit( + MagicMock(), + spec, + None, + ) + assert resolved == AtomicUnit(kind=UnitKind.CURRENCY, code="USD") + + def test_orchestrator_derives_legacy_for_storage(self, spec: MetricSpec): + """After resolution, the derive step returns MetricUnit.DOLLAR so the + legacy DB column is dual-written. This is what protects API consumers + reading `metric_metadata.unit`.""" + resolved = DeploymentOrchestrator._resolve_metric_unit( + MagicMock(), + spec, + None, + ) + col = _make_col(unit=resolved) + legacy_enum = DeploymentOrchestrator._derive_legacy_unit_for_storage( + MagicMock(), + spec, + col, + ) + assert legacy_enum == MetricUnit.DOLLAR + + def test_translation_round_trips(self, spec: MetricSpec): + """Legacy → structured → legacy is the identity for every value in the + migration table — guarantees both halves of the bridge agree.""" + structured = legacy_unit_to_structured(spec.unit_enum) + assert structured == AtomicUnit(kind=UnitKind.CURRENCY, code="USD") + legacy = structured_to_legacy_unit(structured) + assert legacy == MetricUnit.DOLLAR + assert legacy == spec.unit_enum + + def test_yaml_export_emits_legacy_string(self, spec: MetricSpec): + """node_spec_to_yaml emits `unit: dollar` (string), not a structured + dict — preserves the author's original YAML shape on re-export when + the value is legacy-expressible.""" + output = node_spec_to_yaml(spec) + assert "unit: dollar" in output + # No structured form leaks into the export. + assert "kind:" not in output + assert "code:" not in output + + def test_yaml_export_does_not_introduce_columns_block(self, spec: MetricSpec): + """A legacy metric YAML has no `columns:` block. After all the unit + plumbing, re-export must NOT introduce one — that would be a churn + diff on every legacy metric file.""" + output = node_spec_to_yaml(spec) + assert "columns:" not in output + + +class TestAllLegacyUnitValuesRoundTrip: + """ + Every legacy `MetricUnit` value that exists in production data + (per the per-revision count we measured) must successfully: + (a) parse from the legacy YAML form, + (b) translate forward into a structured dict, and + (c) translate back via the reverse table. + + BIT and BYTE are intentionally absent from the translation table (0 rows + in production) — they map to None on forward translation, which is + documented behavior. + """ + + @pytest.mark.parametrize( + "legacy_str", + [ + "unitless", + "percentage", + "proportion", + "dollar", + "millisecond", + "second", + "minute", + "hour", + "day", + "week", + "month", + "year", + ], + ) + def test_legacy_value_round_trips(self, legacy_str: str): + spec = MetricSpec( + name="legacy_compat.m", + query="SELECT 1", + unit=legacy_str, + ) + assert spec.unit_enum == MetricUnit[legacy_str.upper()] + assert spec.unit_structured is None + # Forward translation produces a structured value. + structured = legacy_unit_to_structured(spec.unit_enum) + assert structured is not None + # Reverse translation returns the same enum member. + assert structured_to_legacy_unit(structured) == MetricUnit[legacy_str.upper()] + + def test_unknown_legacy_value_rejected(self): + """Typos in legacy YAML still raise a clear error, not a silent pass.""" + from datajunction_server.errors import DJInvalidInputException + + with pytest.raises(DJInvalidInputException, match="Invalid metric unit"): + MetricSpec( + name="legacy_compat.m", + query="SELECT 1", + unit="dollars", # plural typo + ) + + def test_resolve_metric_unit_for_spec_no_column_unit(self): + """Helper: no structured column unit → emit only legacy enum.""" + from datajunction_server.database.node import _resolve_metric_unit_for_spec + + legacy, structured = _resolve_metric_unit_for_spec( + col_unit=None, + legacy_from_md=MetricUnit.DOLLAR, + ) + assert legacy == MetricUnit.DOLLAR + assert structured is None + + def test_resolve_metric_unit_for_spec_legacy_expressible_keeps_legacy(self): + """Helper: structured is legacy-expressible → keep legacy field, structured stays None. + + Preserves round-trip stability: an author who wrote `unit: dollar` sees + `unit: dollar` back, not `unit: {kind: currency, code: USD}`. + """ + from datajunction_server.database.node import _resolve_metric_unit_for_spec + + legacy, structured = _resolve_metric_unit_for_spec( + col_unit={"kind": "currency", "code": "USD"}, + legacy_from_md=MetricUnit.DOLLAR, + ) + assert legacy == MetricUnit.DOLLAR + assert structured is None + + @pytest.mark.parametrize( + "col_unit", + [ + {"kind": "currency", "code": "EUR"}, + {"kind": "data_size", "code": "MB"}, + {"kind": "count", "code": "clicks"}, + { + "numerator": {"kind": "count"}, + "denominator": {"kind": "time", "code": "s"}, + }, + ], + ) + def test_resolve_metric_unit_for_spec_structured_only(self, col_unit): + """Helper: structured NOT legacy-expressible → populate structured, null legacy. + + Covers the round-trip path for EUR / compound / data_size / + count-with-code, which have no MetricUnit enum equivalent. Without + this branch, a metric authored with `unit: {kind: currency, code: EUR}` + would lose its unit on re-export (since the legacy field would be + null and the structured wouldn't be populated). + """ + from datajunction_server.database.node import _resolve_metric_unit_for_spec + + legacy, structured = _resolve_metric_unit_for_spec( + col_unit=col_unit, + legacy_from_md=None, + ) + assert legacy is None + # Helper returns a Unit instance; compare via canonical dict. + assert unit_to_dict(structured) == col_unit + + def test_uppercase_legacy_input_accepted(self): # noqa: ANN201 + """Legacy parser accepts case-insensitive input (existing behavior).""" + spec = MetricSpec( + name="legacy_compat.m", + query="SELECT 1", + unit="DOLLAR", + ) + assert spec.unit_enum == MetricUnit.DOLLAR + + +class TestUnknownDoesNotLeak: + """ + Code-review fix #1: `MetricUnit.UNKNOWN` is the DB-column default for + any metric that never had a unit authored. It must not leak as + `unit: unknown` in YAML re-exports or any output surface. + """ + + def test_metric_spec_unit_property_treats_unknown_as_none(self): + spec = MetricSpec( + name="m", + query="SELECT 1", + unit=MetricUnit.UNKNOWN, + ) + # unit_enum is UNKNOWN internally, but the public `unit` view + # returns None — UNKNOWN is the legacy "no unit" sentinel. + assert spec.unit_enum == MetricUnit.UNKNOWN + assert spec.unit is None + + def test_yaml_export_does_not_emit_unknown(self): + """Round-trip: a metric with the UNKNOWN sentinel must not produce + `unit: unknown` in YAML.""" + from datajunction_server.internal.namespaces import node_spec_to_yaml + + spec = MetricSpec( + name="m", + query="SELECT 1", + unit=MetricUnit.UNKNOWN, + ) + output = node_spec_to_yaml(spec) + assert "unit:" not in output + assert "unknown" not in output + + +class TestEqIgnoresInputShape: + """ + Code-review fix #2: __eq__ must compare units by canonical form so + `unit: dollar` (legacy) and `unit: {kind: currency, code: USD}` + (structured) are equal for the same conceptual unit — preventing + spurious drift detection on redeploy / git-sync. + """ + + def test_legacy_and_structured_usd_compare_equal(self): + a = MetricSpec(name="m", query="SELECT 1", unit="dollar") + b = MetricSpec( + name="m", + query="SELECT 1", + unit={"kind": "currency", "code": "USD"}, + ) + assert a == b + + def test_legacy_percentage_equals_structured_percentage(self): + a = MetricSpec(name="m", query="SELECT 1", unit="percentage") + b = MetricSpec(name="m", query="SELECT 1", unit={"kind": "percentage"}) + assert a == b + + def test_different_currencies_not_equal(self): + a = MetricSpec(name="m", query="SELECT 1", unit="dollar") + b = MetricSpec( + name="m", + query="SELECT 1", + unit={"kind": "currency", "code": "EUR"}, + ) + assert a != b + + def test_no_unit_and_unknown_unit_compare_equal(self): + """UNKNOWN is the legacy "no unit" sentinel; treat it as equivalent + to no unit set on the other side.""" + a = MetricSpec(name="m", query="SELECT 1") + b = MetricSpec(name="m", query="SELECT 1", unit=MetricUnit.UNKNOWN) + assert a == b + + +class TestUnitToDictCanonicalShape: + """ + Code-review fix #3: unit_to_dict produces a stable JSON-friendly dict + that drops None-valued keys, so storage is byte-identical regardless + of whether the input came via the structured (model_dump) path or the + legacy translation table. + """ + + def test_unitless_atomic_drops_code_key(self): + from datajunction_server.models.unit import ( + AtomicUnit, + UnitKind, + unit_to_dict, + ) + + u = AtomicUnit(kind=UnitKind.UNITLESS) + # model_dump() default would emit {'kind': UnitKind.UNITLESS, 'code': None}. + # unit_to_dict yields a JSON-friendly, none-stripped form matching + # what the legacy translation table stores. + assert unit_to_dict(u) == {"kind": "unitless"} + + def test_currency_with_code(self): + from datajunction_server.models.unit import ( + AtomicUnit, + UnitKind, + unit_to_dict, + ) + + assert unit_to_dict( + AtomicUnit(kind=UnitKind.CURRENCY, code="USD"), + ) == {"kind": "currency", "code": "USD"} + + def test_dict_input_passes_through_with_none_stripped(self): + from datajunction_server.models.unit import unit_to_dict + + assert unit_to_dict({"kind": "unitless", "code": None}) == {"kind": "unitless"} + assert unit_to_dict( + {"kind": "currency", "code": "USD"}, + ) == {"kind": "currency", "code": "USD"} + + def test_none_returns_none(self): + from datajunction_server.models.unit import unit_to_dict + + assert unit_to_dict(None) is None + + def test_storage_paths_produce_identical_shape(self): + """The legacy-translation path and the structured-input path must + produce the same canonical value for a conceptually identical unit. + Comparing via unit_to_dict normalizes both Unit and dict inputs.""" + from datajunction_server.models.unit import ( + AtomicUnit, + UnitKind, + legacy_unit_to_structured, + unit_to_dict, + ) + + legacy = legacy_unit_to_structured(MetricUnit.UNITLESS) + structured = AtomicUnit(kind=UnitKind.UNITLESS) + # Frozen Unit equality directly: + assert legacy == structured + # And the JSONB-canonical dict shape matches too: + assert unit_to_dict(legacy) == unit_to_dict(structured) + + +class TestBadDictErrorUx: + """ + Code-review fix #4: a malformed structured `unit` dict must surface as + a clean `DJInvalidInputException` echoing the offending input, not a + raw Pydantic ValidationError that leaks internal field names. + """ + + @pytest.mark.parametrize( + "bad_unit", + [ + {"kind": "bogus"}, # bad kind + {"kind": "time", "code": "seconds"}, # bad time code + {"kind": "currency", "code": "usd"}, # lowercase currency + {"foo": "bar"}, # no kind / numerator + ], + ) + def test_bad_dict_raises_djinvalid_input(self, bad_unit): + from datajunction_server.errors import DJInvalidInputException + + with pytest.raises(DJInvalidInputException) as exc_info: + MetricSpec(name="m", query="SELECT 1", unit=bad_unit) + # Message echoes the offending dict so users know what they sent. + assert "Invalid metric unit" in str(exc_info.value) + # Internal field name must not leak. + assert "unit_structured" not in str(exc_info.value) + + +class TestFalsyInputGate: + """ + Code-review fix #5: empty string / empty dict must be tolerated as + "no unit authored", matching the historical `if unit:` permissive gate. + """ + + @pytest.mark.parametrize("falsy", [None, "", {}]) + def test_falsy_unit_treated_as_unset(self, falsy): + spec = MetricSpec(name="m", query="SELECT 1", unit=falsy) + assert spec.unit_enum is None + assert spec.unit_structured is None + assert spec.unit is None diff --git a/datajunction-server/tests/internal/namespaces_test.py b/datajunction-server/tests/internal/namespaces_test.py index 97322fe6b..caa319de7 100644 --- a/datajunction-server/tests/internal/namespaces_test.py +++ b/datajunction-server/tests/internal/namespaces_test.py @@ -389,6 +389,141 @@ def test_tags_preserve_order_on_merge(self): tag_lines = [line for line in result.splitlines() if " - " in line] assert tag_lines == [" - ratio_metric", " - core"] + def test_metric_with_legacy_string_unit_round_trips(self): + """Metric authored with legacy `unit: dollar` emits `unit: dollar` on export.""" + spec = MetricSpec( + name="ns.metrics.revenue", + node_type=NodeType.METRIC, + unit="dollar", + query="SELECT SUM(amount) FROM ns.transforms.t", + ) + output = node_spec_to_yaml(spec) + assert "unit: dollar" in output + # Not a structured dict in the legacy case. + assert "kind:" not in output + + def test_metric_with_structured_unit_emits_at_spec_level(self): + """Structured `unit:` at the metric spec level emits as a nested dict.""" + spec = MetricSpec( + name="ns.metrics.revenue_eur", + node_type=NodeType.METRIC, + unit={"kind": "currency", "code": "EUR"}, + query="SELECT SUM(amount_eur) FROM ns.transforms.t", + ) + output = node_spec_to_yaml(spec) + # Emitted at spec level, not on a columns block. + assert "unit:" in output + assert "kind: currency" in output + assert "code: EUR" in output + + def test_metric_with_compound_unit_emits_nested(self): + spec = MetricSpec( + name="ns.metrics.qps", + node_type=NodeType.METRIC, + unit={ + "numerator": {"kind": "count"}, + "denominator": {"kind": "time", "code": "s"}, + }, + query="SELECT 1", + ) + output = node_spec_to_yaml(spec) + assert "numerator:" in output + assert "denominator:" in output + assert "kind: time" in output + assert "code: s" in output + + def test_metric_export_suppresses_per_column_unit(self): + """A metric's columns[].unit must NOT appear in YAML export — the metric + emits its unit at the spec top level instead.""" + spec = MetricSpec( + name="ns.metrics.revenue", + node_type=NodeType.METRIC, + unit={"kind": "currency", "code": "USD"}, + query="SELECT SUM(amount) FROM ns.transforms.t", + columns=[ + { + "name": "revenue", + "unit": {"kind": "currency", "code": "USD"}, + }, + ], + ) + output = node_spec_to_yaml(spec) + # Spec-level unit is present... + assert "kind: currency" in output + # ...but columns block (if present) does not contain a duplicate unit + if "columns:" in output: + # Find columns block and inspect its body + columns_idx = output.find("columns:") + columns_block = output[columns_idx:] + # The metric-level `unit:` line lives before `columns:` — so any + # unit-related text after `columns:` would be a per-column emit. + assert "unit:" not in columns_block + + def test_column_with_unit_is_exported(self): + """A column whose only customization is a unit must still be exported. + + `_has_column_customizations` recognizes `unit` alongside display_name, + description, attributes, and partition; without that, a unit-only + column would be treated as "unmodified" and silently dropped. + """ + spec = TransformSpec( + name="ns.transforms.t", + node_type=NodeType.TRANSFORM, + query="SELECT revenue FROM ns.source.s", + columns=[ + { + "name": "revenue", + "unit": {"kind": "currency", "code": "USD"}, + }, + ], + ) + lines = node_spec_to_yaml(spec).splitlines() + assert "columns:" in lines + assert " - name: revenue" in lines + # Unit appears as a nested mapping; check key + values are present + unit_idx = next(i for i, line in enumerate(lines) if line.strip() == "unit:") + # The following lines should be the kind/code mapping + assert "kind: currency" in lines[unit_idx + 1] + assert "code: USD" in lines[unit_idx + 2] + + def test_column_without_unit_is_not_exported_with_noise(self): + """A column with no customizations (including no unit) is omitted from YAML. + + The new `unit` field must not introduce `unit: null` lines on every + column when no unit is set. + """ + spec = TransformSpec( + name="ns.transforms.t", + node_type=NodeType.TRANSFORM, + query="SELECT id FROM ns.source.s", + columns=[{"name": "id"}], # no unit, no customizations + ) + output = node_spec_to_yaml(spec) + assert "unit:" not in output + assert "columns:" not in output # column itself filtered out + + def test_compound_unit_round_trips(self): + """Compound units serialize cleanly and the structure is preserved.""" + spec = TransformSpec( + name="ns.transforms.t", + node_type=NodeType.TRANSFORM, + query="SELECT qps FROM ns.source.s", + columns=[ + { + "name": "qps", + "unit": { + "numerator": {"kind": "count"}, + "denominator": {"kind": "time", "code": "s"}, + }, + }, + ], + ) + output = node_spec_to_yaml(spec) + assert "numerator:" in output + assert "denominator:" in output + assert "kind: time" in output + assert "code: s" in output + def test_column_attributes_are_sorted(self): """column attributes are sorted alphabetically regardless of input order""" spec = TransformSpec( diff --git a/datajunction-server/tests/models/deployment_test.py b/datajunction-server/tests/models/deployment_test.py index 7c20c8260..d593c3900 100644 --- a/datajunction-server/tests/models/deployment_test.py +++ b/datajunction-server/tests/models/deployment_test.py @@ -73,6 +73,79 @@ def test_metric_spec(): assert not metric_spec.__eq__(other_metric_spec) +def test_metric_spec_accepts_legacy_string_unit(): + """Legacy `unit: dollar` (string) parses into unit_enum, leaves unit_structured None.""" + spec = MetricSpec(name="m", query="SELECT 1", unit="dollar") + assert spec.unit_enum == MetricUnit.DOLLAR + assert spec.unit_structured is None + assert spec.unit == "dollar" + + +def test_metric_spec_accepts_metric_unit_enum_directly(): + spec = MetricSpec(name="m", query="SELECT 1", unit=MetricUnit.SECOND) + assert spec.unit_enum == MetricUnit.SECOND + assert spec.unit_structured is None + assert spec.unit == "second" + + +def test_metric_spec_accepts_structured_unit_dict(): + """Structured `unit: {kind: ..., code: ...}` parses into unit_structured.""" + from datajunction_server.models.unit import AtomicUnit, UnitKind + + spec = MetricSpec( + name="m", + query="SELECT 1", + unit={"kind": "currency", "code": "USD"}, + ) + assert spec.unit_enum is None + assert isinstance(spec.unit_structured, AtomicUnit) + assert spec.unit_structured.kind == UnitKind.CURRENCY + assert spec.unit_structured.code == "USD" + assert spec.unit == {"kind": "currency", "code": "USD"} + + +def test_metric_spec_accepts_compound_unit_dict(): + """Compound units round-trip through MetricSpec.""" + from datajunction_server.models.unit import CompoundUnit + + spec = MetricSpec( + name="m", + query="SELECT 1", + unit={ + "numerator": {"kind": "count", "code": "clicks"}, + "denominator": {"kind": "count", "code": "impressions"}, + }, + ) + assert isinstance(spec.unit_structured, CompoundUnit) + assert spec.unit_structured.numerator.code == "clicks" + assert spec.unit_structured.denominator.code == "impressions" + assert spec.unit == { + "numerator": {"kind": "count", "code": "clicks"}, + "denominator": {"kind": "count", "code": "impressions"}, + } + + +def test_metric_spec_rejects_unknown_legacy_string(): + from datajunction_server.errors import DJInvalidInputException + + with pytest.raises(DJInvalidInputException, match="Invalid metric unit"): + MetricSpec(name="m", query="SELECT 1", unit="dollars") + + +def test_metric_spec_rejects_non_string_non_dict_unit(): + from datajunction_server.errors import DJInvalidInputException + + with pytest.raises(DJInvalidInputException, match="must be a string or"): + MetricSpec(name="m", query="SELECT 1", unit=42) + + +def test_metric_spec_no_unit_set(): + spec = MetricSpec(name="m", query="SELECT 1") + assert spec.unit_enum is None + assert spec.unit_structured is None + assert spec.unit is None + + def test_reference_link_spec(): link_spec = DimensionReferenceLinkSpec( role="test_role", diff --git a/datajunction-server/tests/models/unit_test.py b/datajunction-server/tests/models/unit_test.py index 14b73010f..223dfaef2 100644 --- a/datajunction-server/tests/models/unit_test.py +++ b/datajunction-server/tests/models/unit_test.py @@ -6,11 +6,15 @@ from pydantic import TypeAdapter, ValidationError from datajunction_server.models.deployment import ColumnSpec +from datajunction_server.models.node import MetricUnit from datajunction_server.models.unit import ( AtomicUnit, CompoundUnit, Unit, UnitKind, + legacy_unit_to_structured, + structured_to_legacy_unit, + unit_to_dict, ) _unit_adapter: TypeAdapter[Unit] = TypeAdapter(Unit) @@ -206,3 +210,219 @@ def test_column_spec_round_trip_via_dump(self) -> None: assert dumped["unit"] == {"kind": "currency", "code": "USD"} roundtripped = ColumnSpec.model_validate(dumped) assert roundtripped == spec + + +class TestUnitTypeDecorator: + """ + The SQLAlchemy TypeDecorator converts between JSONB dicts and Unit + pydantic instances at the ORM boundary. Read paths return typed + `Unit`; write paths accept either `Unit` or `dict` and canonicalize. + """ + + def test_process_bind_param_accepts_unit_model(self) -> None: + from datajunction_server.models.unit import ( + AtomicUnit, + UnitKind, + UnitTypeDecorator, + ) + + dec = UnitTypeDecorator() + u = AtomicUnit(kind=UnitKind.CURRENCY, code="USD") + assert dec.process_bind_param(u, None) == { + "kind": "currency", + "code": "USD", + } + + def test_process_bind_param_accepts_raw_dict(self) -> None: + from datajunction_server.models.unit import UnitTypeDecorator + + dec = UnitTypeDecorator() + assert dec.process_bind_param( + {"kind": "unitless", "code": None}, + None, + ) == {"kind": "unitless"} + + def test_process_bind_param_none_passthrough(self) -> None: + from datajunction_server.models.unit import UnitTypeDecorator + + dec = UnitTypeDecorator() + assert dec.process_bind_param(None, None) is None + + def test_process_result_value_atomic(self) -> None: + from datajunction_server.models.unit import ( + AtomicUnit, + UnitKind, + UnitTypeDecorator, + ) + + dec = UnitTypeDecorator() + result = dec.process_result_value( + {"kind": "currency", "code": "USD"}, + None, + ) + assert isinstance(result, AtomicUnit) + assert result.kind == UnitKind.CURRENCY + assert result.code == "USD" + + def test_process_result_value_compound(self) -> None: + from datajunction_server.models.unit import ( + CompoundUnit, + UnitTypeDecorator, + ) + + dec = UnitTypeDecorator() + result = dec.process_result_value( + { + "numerator": {"kind": "count"}, + "denominator": {"kind": "time", "code": "s"}, + }, + None, + ) + assert isinstance(result, CompoundUnit) + assert result.numerator.code is None + assert result.denominator.code == "s" + + def test_process_result_value_none_passthrough(self) -> None: + from datajunction_server.models.unit import UnitTypeDecorator + + dec = UnitTypeDecorator() + assert dec.process_result_value(None, None) is None + + def test_round_trip_idempotent(self) -> None: + """write(read(write(x))) == write(x) — canonical shape is stable.""" + from datajunction_server.models.unit import ( + AtomicUnit, + UnitKind, + UnitTypeDecorator, + ) + + dec = UnitTypeDecorator() + original = AtomicUnit(kind=UnitKind.UNITLESS) + wire1 = dec.process_bind_param(original, None) + unit_back = dec.process_result_value(wire1, None) + wire2 = dec.process_bind_param(unit_back, None) + assert wire1 == wire2 == {"kind": "unitless"} + + +class TestLegacyUnitTranslation: + """ + Coverage for `legacy_unit_to_structured` and `structured_to_legacy_unit`. + """ + + @pytest.mark.parametrize( + ("legacy", "structured"), + [ + (MetricUnit.UNKNOWN, None), + (MetricUnit.UNITLESS, {"kind": "unitless"}), + (MetricUnit.PERCENTAGE, {"kind": "percentage"}), + (MetricUnit.PROPORTION, {"kind": "proportion"}), + (MetricUnit.DOLLAR, {"kind": "currency", "code": "USD"}), + (MetricUnit.MILLISECOND, {"kind": "time", "code": "ms"}), + (MetricUnit.SECOND, {"kind": "time", "code": "s"}), + (MetricUnit.MINUTE, {"kind": "time", "code": "min"}), + (MetricUnit.HOUR, {"kind": "time", "code": "h"}), + (MetricUnit.DAY, {"kind": "time", "code": "d"}), + (MetricUnit.WEEK, {"kind": "time", "code": "wk"}), + (MetricUnit.MONTH, {"kind": "time", "code": "mo"}), + (MetricUnit.YEAR, {"kind": "time", "code": "yr"}), + ], + ) + def test_forward_translation_matches_table( + self, + legacy: MetricUnit, + structured: dict | None, + ) -> None: + # Returns a Unit instance (or None); compare via canonical dict. + assert unit_to_dict(legacy_unit_to_structured(legacy)) == structured + + def test_forward_translation_handles_none(self) -> None: + assert legacy_unit_to_structured(None) is None + + def test_forward_translation_byte_maps_to_data_size(self) -> None: + # BYTE has a clean structured equivalent under DATA_SIZE. + assert unit_to_dict(legacy_unit_to_structured(MetricUnit.BYTE)) == { + "kind": "data_size", + "code": "B", + } + + def test_forward_translation_bit_unmapped(self) -> None: + # BIT has no entry in DATA_SIZE_CODES (bytes-only) so it maps to + # None until/unless someone needs it. + assert legacy_unit_to_structured(MetricUnit.BIT) is None + + @pytest.mark.parametrize( + ("structured", "expected"), + [ + ({"kind": "unitless"}, MetricUnit.UNITLESS), + ({"kind": "percentage"}, MetricUnit.PERCENTAGE), + ({"kind": "proportion"}, MetricUnit.PROPORTION), + ({"kind": "currency", "code": "USD"}, MetricUnit.DOLLAR), + ({"kind": "time", "code": "ms"}, MetricUnit.MILLISECOND), + ({"kind": "time", "code": "s"}, MetricUnit.SECOND), + ({"kind": "time", "code": "min"}, MetricUnit.MINUTE), + ({"kind": "time", "code": "h"}, MetricUnit.HOUR), + ({"kind": "time", "code": "d"}, MetricUnit.DAY), + ({"kind": "time", "code": "wk"}, MetricUnit.WEEK), + ({"kind": "time", "code": "mo"}, MetricUnit.MONTH), + ({"kind": "time", "code": "yr"}, MetricUnit.YEAR), + ({"kind": "data_size", "code": "B"}, MetricUnit.BYTE), + ], + ) + def test_reverse_translation_matches_table( + self, + structured: dict, + expected: MetricUnit, + ) -> None: + assert structured_to_legacy_unit(structured) == expected + + @pytest.mark.parametrize( + "structured", + [ + None, + {"kind": "currency", "code": "EUR"}, # non-USD + {"kind": "currency", "code": None}, # currency with no code + {"kind": "data_size", "code": "MB"}, # only BYTE has legacy form + {"kind": "count", "code": "clicks"}, # no legacy equivalent + {"kind": "count"}, # no legacy equivalent + { + "numerator": {"kind": "count"}, + "denominator": {"kind": "time", "code": "s"}, + }, # compound + # time with a code that exists in the new vocabulary but not + # in any legacy enum member (today there is no such code, but + # this guards against future additions to TIME_CODES that + # aren't reflected in _LEGACY_NAME_TO_STRUCTURED). + {"kind": "time", "code": "fortnight"}, + ], + ) + def test_reverse_translation_returns_none_for_inexpressible( + self, + structured: dict | None, + ) -> None: + assert structured_to_legacy_unit(structured) is None + + @pytest.mark.parametrize( + "legacy", + [ + MetricUnit.UNITLESS, + MetricUnit.PERCENTAGE, + MetricUnit.PROPORTION, + MetricUnit.DOLLAR, + MetricUnit.MILLISECOND, + MetricUnit.SECOND, + MetricUnit.MINUTE, + MetricUnit.HOUR, + MetricUnit.DAY, + MetricUnit.WEEK, + MetricUnit.MONTH, + MetricUnit.YEAR, + MetricUnit.BYTE, + ], + ) + def test_round_trip_legacy_to_structured_to_legacy( + self, + legacy: MetricUnit, + ) -> None: + structured = legacy_unit_to_structured(legacy) + assert structured is not None + assert structured_to_legacy_unit(structured) == legacy