diff --git a/docs/schema/PermissibleValueDerivation.md b/docs/schema/PermissibleValueDerivation.md index 5a725450..420adb3f 100644 --- a/docs/schema/PermissibleValueDerivation.md +++ b/docs/schema/PermissibleValueDerivation.md @@ -152,7 +152,7 @@ URI: [linkmlmap:PermissibleValueDerivation](https://w3id.org/linkml/transformer/ | --- | --- | --- | --- | | [name](name.md) | 1
[String](String.md) | Target permissible value text | direct | | [expr](expr.md) | 0..1
[String](String.md) | | direct | -| [populated_from](populated_from.md) | 0..1
[String](String.md) | Source permissible value that maps to this target permissible value | direct | +| [populated_from](populated_from.md) | *
[String](String.md) | Source permissible value(s) that map to this target permissible value | direct | | [sources](sources.md) | *
[String](String.md) | Deprecated | direct | | [hide](hide.md) | 0..1
[Boolean](Boolean.md) | | direct | | [copy_directives](copy_directives.md) | *
[CopyDirective](CopyDirective.md) | | [ElementDerivation](ElementDerivation.md) | @@ -257,7 +257,9 @@ attributes: range: string populated_from: name: populated_from - description: Source permissible value that maps to this target permissible value. + description: Source permissible value(s) that map to this target permissible value. + Accepts a single value or a list; scalar input is normalized to a one-element + list at load time. from_schema: https://w3id.org/linkml/transformer domain_of: - ClassDerivation @@ -265,11 +267,13 @@ attributes: - EnumDerivation - PermissibleValueDerivation range: string + multivalued: true sources: name: sources description: Deprecated. Use populated_from instead. - deprecated: Deprecated. Use populated_from instead. See https://github.com/linkml/linkml-map/issues/193 - for planned list support in populated_from. Will be removed in a future version. + deprecated: Deprecated. Use populated_from instead, which now accepts a list. + See https://github.com/linkml/linkml-map/issues/193. Will be removed in a future + version. from_schema: https://w3id.org/linkml/transformer domain_of: - ClassDerivation @@ -327,7 +331,9 @@ attributes: range: string populated_from: name: populated_from - description: Source permissible value that maps to this target permissible value. + description: Source permissible value(s) that map to this target permissible value. + Accepts a single value or a list; scalar input is normalized to a one-element + list at load time. from_schema: https://w3id.org/linkml/transformer owner: PermissibleValueDerivation domain_of: @@ -336,11 +342,13 @@ attributes: - EnumDerivation - PermissibleValueDerivation range: string + multivalued: true sources: name: sources description: Deprecated. Use populated_from instead. - deprecated: Deprecated. Use populated_from instead. See https://github.com/linkml/linkml-map/issues/193 - for planned list support in populated_from. Will be removed in a future version. + deprecated: Deprecated. Use populated_from instead, which now accepts a list. + See https://github.com/linkml/linkml-map/issues/193. Will be removed in a future + version. from_schema: https://w3id.org/linkml/transformer owner: PermissibleValueDerivation domain_of: diff --git a/src/linkml_map/cli/cli.py b/src/linkml_map/cli/cli.py index b118c99a..0ded2c53 100644 --- a/src/linkml_map/cli/cli.py +++ b/src/linkml_map/cli/cli.py @@ -236,27 +236,27 @@ def _pre_flight_validate( ) -> None: """Surface static spec checks before performing a CLI action. - Runs deprecation checks and reference resolution against the loaded - specification, printing all findings to stderr. **Does not gate** — - even error-severity findings are informational only; the runtime - remains the authoritative arbiter of whether a transformation can - actually execute. Users who want fail-fast behavior should run - ``validate-spec --strict`` separately. - - Validates the merged ``tr.specification`` (after multi-file loading - is complete) so cross-file issues surface where users would see - them via ``validate-spec --merge``. Reuses ``tr.source_schemaview`` - and ``tr.target_schemaview`` when set, avoiding a duplicate load - of large or remote schemas. + Replays pre-normalize scan messages captured at spec-load time and runs + reference resolution, printing all findings to stderr. **Does not gate** — + findings are informational only; the runtime remains the authoritative + arbiter of whether a transformation can actually execute. Users who want + fail-fast behavior should run ``validate-spec --strict`` separately. + + Reading scan messages from ``tr.spec_messages`` rather than re-scanning + the post-migration spec means deprecations whose source fields were + cleared by normalization (e.g., ``object_derivations``, PV ``sources``) + are still surfaced here. Reuses ``tr.source_schemaview`` and + ``tr.target_schemaview`` when set, avoiding a duplicate load of large + or remote schemas. """ - from linkml_map.validator import check_deprecated_fields, validate_spec_semantics + from linkml_map.validator import validate_spec_semantics if tr.specification is None: return spec_dict = tr.specification.model_dump(exclude_none=True) - messages = list(check_deprecated_fields(spec_dict)) + messages = list(tr.spec_messages) messages.extend( validate_spec_semantics( spec_dict, diff --git a/src/linkml_map/compiler/templates/markdown.j2 b/src/linkml_map/compiler/templates/markdown.j2 index c3be8d6e..3dc50cac 100644 --- a/src/linkml_map/compiler/templates/markdown.j2 +++ b/src/linkml_map/compiler/templates/markdown.j2 @@ -20,6 +20,6 @@ | Target | Source | Info | | ------ | ------ | ---- | {%- for pvd in ed.permissible_value_derivations.values() %} -| {{ pvd.name }} | {{ pvd.populated_from }} | . | +| {{ pvd.name }} | {{ (pvd.populated_from or []) | join(", ") }} | . | {%- endfor -%} {% endfor %} diff --git a/src/linkml_map/datamodel/transformer_model.py b/src/linkml_map/datamodel/transformer_model.py index db1ee3aa..7ddc2b49 100644 --- a/src/linkml_map/datamodel/transformer_model.py +++ b/src/linkml_map/datamodel/transformer_model.py @@ -500,14 +500,13 @@ class PermissibleValueDerivation(ElementDerivation): expr: Optional[str] = Field(default=None, json_schema_extra = { "linkml_meta": {'domain_of': ['SlotDerivation', 'EnumDerivation', 'PermissibleValueDerivation']} }) - populated_from: Optional[str] = Field(default=None, description="""Source permissible value that maps to this target permissible value.""", json_schema_extra = { "linkml_meta": {'domain_of': ['ClassDerivation', + populated_from: Optional[list[str]] = Field(default_factory=list, description="""Source permissible value(s) that map to this target permissible value. Accepts a single value or a list; scalar input is normalized to a one-element list at load time.""", json_schema_extra = { "linkml_meta": {'domain_of': ['ClassDerivation', 'SlotDerivation', 'EnumDerivation', 'PermissibleValueDerivation']} }) - sources: Optional[list[str]] = Field(default_factory=list, description="""Deprecated. Use populated_from instead.""", json_schema_extra = { "linkml_meta": {'deprecated': 'Deprecated. Use populated_from instead. See ' - 'https://github.com/linkml/linkml-map/issues/193 for planned ' - 'list support in populated_from. Will be removed in a future ' - 'version.', + sources: Optional[list[str]] = Field(default_factory=list, description="""Deprecated. Use populated_from instead.""", json_schema_extra = { "linkml_meta": {'deprecated': 'Deprecated. Use populated_from instead, which now accepts a ' + 'list. See https://github.com/linkml/linkml-map/issues/193. ' + 'Will be removed in a future version.', 'domain_of': ['ClassDerivation', 'SlotDerivation', 'EnumDerivation', diff --git a/src/linkml_map/datamodel/transformer_model.yaml b/src/linkml_map/datamodel/transformer_model.yaml index 948816af..c012bf8e 100644 --- a/src/linkml_map/datamodel/transformer_model.yaml +++ b/src/linkml_map/datamodel/transformer_model.yaml @@ -486,15 +486,18 @@ classes: range: string populated_from: range: string + multivalued: true description: >- - Source permissible value that maps to this target permissible value. + Source permissible value(s) that map to this target permissible value. + Accepts a single value or a list; scalar input is normalized to a + one-element list at load time. sources: range: string multivalued: true deprecated: >- - Deprecated. Use populated_from instead. - See https://github.com/linkml/linkml-map/issues/193 for planned - list support in populated_from. Will be removed in a future version. + Deprecated. Use populated_from instead, which now accepts a list. + See https://github.com/linkml/linkml-map/issues/193. Will be removed + in a future version. description: >- Deprecated. Use populated_from instead. hide: diff --git a/src/linkml_map/inference/inference.py b/src/linkml_map/inference/inference.py index b8a0e984..4e27cba6 100644 --- a/src/linkml_map/inference/inference.py +++ b/src/linkml_map/inference/inference.py @@ -1,33 +1,11 @@ """Infer missing values in a specification.""" -import warnings - from linkml_runtime import SchemaView from linkml_map.datamodel.transformer_model import TransformationSpecification from linkml_map.utils.fk_utils import resolve_fk_path -def _warn_deprecated_fields(specification: TransformationSpecification) -> None: - """Emit ``DeprecationWarning`` for any deprecated field usage. - - Thin shim over ``validator._check_deprecated_fields`` that re-emits - deprecation-category messages as ``DeprecationWarning`` so runtime - callers (and any code with Python ``warnings`` filters configured) - keep getting the same signal they always did. - - The same underlying check is consumed by static validation paths - (``validate-spec`` and pre-flight from other CLI commands) via - ``ValidationMessage`` records — see ``validator._check_deprecated_fields``. - """ - from linkml_map.validator import check_deprecated_fields - - spec_dict = specification.model_dump(exclude_none=True) - for msg in check_deprecated_fields(spec_dict): - if msg.category == "deprecated": - warnings.warn(msg.message, DeprecationWarning, stacklevel=3) - - def induce_missing_values(specification: TransformationSpecification, source_schemaview: SchemaView) -> None: """ Infer missing values in a specification. @@ -41,7 +19,6 @@ def induce_missing_values(specification: TransformationSpecification, source_sch for cd in specification.class_derivations: if not cd.populated_from: cd.populated_from = cd.name - _warn_deprecated_fields(specification) for cd in specification.class_derivations: for sd in cd.slot_derivations.values(): diff --git a/src/linkml_map/inference/inverter.py b/src/linkml_map/inference/inverter.py index e7bfad49..d39196f5 100644 --- a/src/linkml_map/inference/inverter.py +++ b/src/linkml_map/inference/inverter.py @@ -93,9 +93,18 @@ def invert_enum_derivation(self, ed: EnumDerivation, spec: TransformationSpecifi msg = "TODO: invert enum derivation with expression" raise NonInvertibleSpecificationError(msg) for pv_deriv in ed.permissible_value_derivations.values(): + sources = pv_deriv.populated_from or [pv_deriv.name] + if len(sources) > 1: + msg = ( + f"Cannot invert PermissibleValueDerivation '{pv_deriv.name}': " + f"populated_from has multiple source values {sources}, " + f"which is not a one-to-one mapping." + ) + raise NonInvertibleSpecificationError(msg) + inverted_name = sources[0] inverted_pv_deriv = PermissibleValueDerivation( - name=pv_deriv.populated_from if pv_deriv.populated_from else pv_deriv.name, - populated_from=pv_deriv.name, + name=inverted_name, + populated_from=[pv_deriv.name], ) inverted_ed.permissible_value_derivations[inverted_pv_deriv.name] = inverted_pv_deriv return inverted_ed diff --git a/src/linkml_map/inference/schema_mapper.py b/src/linkml_map/inference/schema_mapper.py index 1686109b..fe0c822f 100644 --- a/src/linkml_map/inference/schema_mapper.py +++ b/src/linkml_map/inference/schema_mapper.py @@ -311,16 +311,13 @@ def _derive_enum(self, enum_derivation: EnumDerivation) -> EnumDefinition: target_enum.attributes = {} target_enum.slot_usage = {} for pv_derivation in enum_derivation.permissible_value_derivations.values(): - if pv_derivation.populated_from: - pv = PermissibleValue(text=pv_derivation.populated_from) - target_enum.permissible_values[pv.text] = pv - elif pv_derivation.sources: - for source in pv_derivation.sources: - pv = PermissibleValue(text=source) - target_enum.permissible_values[pv.text] = pv - else: + sources = pv_derivation.populated_from or pv_derivation.sources + if not sources: msg = f"Missing populated_from or sources for {pv_derivation}" raise ValueError(msg) + for source in sources: + pv = PermissibleValue(text=source) + target_enum.permissible_values[pv.text] = pv if enum_derivation.mirror_source: for pv in source_enum.permissible_values.values(): if pv.text not in target_enum.permissible_values: diff --git a/src/linkml_map/transformer/errors.py b/src/linkml_map/transformer/errors.py index c4209171..18051622 100644 --- a/src/linkml_map/transformer/errors.py +++ b/src/linkml_map/transformer/errors.py @@ -6,6 +6,18 @@ from typing import Any +class SpecificationError(ValueError): + """The transformation specification is malformed or internally inconsistent. + + Raised during spec loading when the pre-normalize scan detects a + user-intent error that the runtime cannot disambiguate (e.g., setting + both ``populated_from`` and ``sources`` on the same derivation, or + setting both ``object_derivations`` and ``class_derivations`` on a + slot). Subclasses ``ValueError`` for backward compatibility with + callers that already catch ``ValueError`` from ``_normalize_spec_dict``. + """ + + @dataclass class TransformationError(Exception): """A row-level error during data transformation. diff --git a/src/linkml_map/transformer/object_transformer.py b/src/linkml_map/transformer/object_transformer.py index 665ecc6a..0a227def 100644 --- a/src/linkml_map/transformer/object_transformer.py +++ b/src/linkml_map/transformer/object_transformer.py @@ -1164,9 +1164,16 @@ def transform_enum(self, source_value: str, enum_names: list[str], source_obj: A if v is not None: return v for pv_deriv in enum_deriv.permissible_value_derivations.values(): - if source_value == pv_deriv.populated_from: - return pv_deriv.name - if source_value in pv_deriv.sources: + if pv_deriv.sources: + from linkml_map.transformer.errors import SpecificationError + + msg = ( + f"PermissibleValueDerivation '{pv_deriv.name}' has 'sources' set; " + "this should have been migrated to 'populated_from' during spec load. " + "Did the spec bypass Transformer._normalize_spec_dict?" + ) + raise SpecificationError(msg) + if pv_deriv.populated_from and source_value in pv_deriv.populated_from: return pv_deriv.name if enum_deriv.mirror_source: return str(source_value) diff --git a/src/linkml_map/transformer/transformer.py b/src/linkml_map/transformer/transformer.py index 307d0227..e502fe35 100644 --- a/src/linkml_map/transformer/transformer.py +++ b/src/linkml_map/transformer/transformer.py @@ -3,6 +3,7 @@ import logging import warnings from abc import ABC +from collections.abc import Iterator from copy import deepcopy from dataclasses import dataclass, field from pathlib import Path @@ -31,6 +32,24 @@ logger = logging.getLogger(__name__) +def _iter_values_or_list(container: Any) -> Iterator[dict[str, Any]]: + """Yield each dict in a derivations section (dict-keyed or list form). + + Doesn't expand compact-key list items — used both pre-SHAPE (where + compact-key items are still present and the caller is responsible for + expanding them via ``_expand_compact_keys`` if it needs to descend + into them) and post-SHAPE (where everything is already canonical). + """ + if isinstance(container, dict): + for v in container.values(): + if isinstance(v, dict): + yield v + elif isinstance(container, list): + for item in container: + if isinstance(item, dict): + yield item + + OBJECT_TYPE = dict[str, Any] | BaseModel | YAMLRoot """An object can be a plain python dict, a pydantic object, or a linkml YAMLRoot""" @@ -93,6 +112,17 @@ class Transformer(ABC): _curie_converter: Converter = None + spec_messages: list[Any] = field(default_factory=list) + """Scan messages captured at spec-load time. + + Populated by the three load methods (``load_transformer_specification``, + ``load_transformer_specifications``, ``create_transformer_specification``) + with the ``ValidationMessage`` list returned by ``_normalize_spec_dict``. + Pre-flight validation reads these instead of re-scanning the post-migration + spec, so deprecations whose source fields were cleared by normalization + (e.g., ``object_derivations``, PV ``sources``) are still surfaced. + """ + def map_object(self, obj: OBJECT_TYPE, source_type: str | None = None, **kwargs: Any) -> OBJECT_TYPE: """ Transform source object into an instance of the target class. @@ -135,7 +165,7 @@ def load_transformer_specification(self, path: str | Path) -> None: """ with open(path) as f: obj = yaml.safe_load(f) - self._normalize_spec_dict(obj) + self.spec_messages = self._normalize_spec_dict(obj) self.specification = TransformationSpecification(**obj) def load_transformer_specifications(self, paths: tuple[str | Path, ...]) -> None: @@ -151,12 +181,18 @@ def load_transformer_specifications(self, paths: tuple[str | Path, ...]) -> None from linkml_map.utils.spec_merge import load_and_merge_specs obj = load_and_merge_specs(paths) - self._normalize_spec_dict(obj) + self.spec_messages = self._normalize_spec_dict(obj) self.specification = TransformationSpecification(**obj) @classmethod def normalize_transform_spec(cls, obj: dict[str, Any], normalizer: ReferenceValidator) -> dict: - """Recursively normalize class_derivations and flatten object_derivations.""" + """Shape-canonicalize class_derivations recursively. + + Pure shape work — no field migrations. ``object_derivations`` + flattening and ``populated_from`` inheritance happen in the MIGRATE + phase of :meth:`_normalize_spec_dict` so the SCAN phase between + them sees the user's original field values. + """ obj = normalizer.normalize(obj) class_derivations = obj.get("class_derivations", []) @@ -167,12 +203,11 @@ def normalize_transform_spec(cls, obj: dict[str, Any], normalizer: ReferenceVali for class_spec in cd_iter: if not isinstance(class_spec, dict): continue - parent_source = class_spec.get("populated_from") slot_derivations = class_spec.get("slot_derivations", {}) for slot_name, slot_spec in slot_derivations.items(): if slot_spec.get("value") is not None and slot_spec.get("range") is None: slot_spec["range"] = "string" - cls._normalize_slot_class_derivations(slot_name, slot_spec, normalizer, parent_source) + cls._normalize_slot_class_derivations(slot_name, slot_spec, normalizer) return obj @classmethod @@ -181,52 +216,22 @@ def _normalize_slot_class_derivations( slot_name: str, slot_spec: dict[str, Any], normalizer: ReferenceValidator, - parent_populated_from: str | None = None, ) -> None: - """Flatten object_derivations and normalize class_derivations on a slot. - - Four steps, applied recursively to nested slots: - 1. Flatten ``object_derivations`` into ``class_derivations`` (with - deprecation warning; error if both are present). - 2. Expand compact-key entries (``- Condition: {...}`` → ``{name: Condition, ...}``). - 3. Inherit ``populated_from`` from the parent class derivation when absent. - 4. Run the normalizer on each class derivation entry so that nested - dict-keyed ``slot_derivations`` get ``name`` injected. + """Shape-canonicalize the ``class_derivations`` on a slot, recursively. + + Two steps, applied recursively to nested slots: + + 1. Expand compact-key entries (``- Condition: {...}`` → + ``{name: Condition, ...}``). + 2. Run the normalizer on each class derivation entry so dict-keyed + ``slot_derivations`` get ``name`` injected and other shape + canonicalization happens. + + Pure shape — no field semantics. ``object_derivations`` flattening + and ``populated_from`` inheritance live in the MIGRATE phase of + :meth:`_normalize_spec_dict`, not here, so the SCAN phase sees the + user's original field values. """ - # Step 1: flatten object_derivations → class_derivations - object_derivations = slot_spec.get("object_derivations", []) - if object_derivations: - if slot_spec.get("class_derivations"): - msg = ( - f"SlotDerivation '{slot_name}' has both 'object_derivations' and " - f"'class_derivations'. Remove 'object_derivations' and use " - f"'class_derivations' only." - ) - raise ValueError(msg) - - warnings.warn( - f"SlotDerivation '{slot_name}' uses 'object_derivations', which is " - f"deprecated. Use list-based class_derivations instead. " - f"'object_derivations' will be removed in a future version. " - f"See https://github.com/linkml/linkml-map/issues/112", - DeprecationWarning, - stacklevel=4, - ) - - flattened: list[dict[str, Any]] = [] - for od in object_derivations: - od_cd = od.get("class_derivations", {}) - if isinstance(od_cd, dict): - for name, body in od_cd.items(): - entry = body if isinstance(body, dict) else {} - entry.setdefault("name", name) - flattened.append(entry) - elif isinstance(od_cd, list): - flattened.extend(od_cd) - slot_spec["class_derivations"] = flattened - del slot_spec["object_derivations"] - - # Steps 2-4: expand compact keys, inherit populated_from, normalize, and recurse slot_cd = slot_spec.get("class_derivations") if not isinstance(slot_cd, list): return @@ -236,33 +241,276 @@ def _normalize_slot_class_derivations( for cd_entry in slot_cd: if not isinstance(cd_entry, dict): continue - if not cd_entry.get("populated_from") and parent_populated_from: - cd_entry["populated_from"] = parent_populated_from normalized = normalizer.normalize(cd_entry) cd_entry.clear() cd_entry.update(normalized) - child_source = cd_entry.get("populated_from") for nested_name, nested_sd in cd_entry.get("slot_derivations", {}).items(): if isinstance(nested_sd, dict): - cls._normalize_slot_class_derivations(nested_name, nested_sd, normalizer, child_source) + cls._normalize_slot_class_derivations(nested_name, nested_sd, normalizer) @classmethod - def _normalize_spec_dict(cls, obj: dict[str, Any]) -> None: - """Normalize a raw specification dict in place. - - Bundles _preprocess_class_derivations, ReferenceValidator normalization, - and nested ObjectDerivation fixup into a single entry point. Mutates - ``obj`` by replacing its contents with the normalized result. + def _normalize_spec_dict(cls, obj: dict[str, Any], *, silent: bool = False) -> list[Any]: + """Normalize a raw specification dict in place. Return scan messages. + + Three clean phases, ordered so the SCAN sees a canonical shape with + the user's original field values: + + 1. **SHAPE** — pure structural canonicalization, no field semantics. + Expands compact-key list forms everywhere they can appear + (top-level class/enum derivations, nested class_derivations in + slots, OD-inner class_derivations, PV-level lists), then runs + ``ReferenceValidator`` for dict↔list canonicalization and name + injection. + 2. **SCAN** — :func:`linkml_map.validator.check_deprecated_fields` + walks the canonical shape, returning ``ValidationMessage`` + records for deprecated-field usage and ambiguous combinations. + Unless ``silent``, deprecation warnings fire as Python + ``DeprecationWarning`` and conflict errors raise + :class:`~linkml_map.transformer.errors.SpecificationError`. + 3. **MIGRATE** — all field semantics in one pass: + ``object_derivations`` flatten, ``populated_from`` inheritance + from parent class, PV scalar → list, PV explicit-``None`` strip, + PV ``sources`` → ``populated_from`` (clearing ``sources``). + + After this method, ``sources`` no longer appears on any + ``PermissibleValueDerivation`` — the runtime can rely on + ``populated_from`` as the single source of truth. :param obj: Raw specification dict (e.g. from YAML or user code). + :param silent: When ``True``, neither emit warnings nor raise on + errors — return them in the message list instead. Used by the + validator path so it can surface findings as + ``ValidationMessage`` records. + :returns: A list of :class:`ValidationMessage` records from the scan. """ - cls._preprocess_class_derivations(obj) + from linkml_map.transformer.errors import SpecificationError + from linkml_map.validator import check_deprecated_fields + + # SHAPE + cls._shape_normalize(obj) + + # SCAN + messages = check_deprecated_fields(obj) + + if not silent: + errors = [m for m in messages if m.severity == "error"] + if errors: + raise SpecificationError("; ".join(m.message for m in errors)) + for m in messages: + if m.category == "deprecated": + warnings.warn(m.message, DeprecationWarning, stacklevel=3) + + # MIGRATE + cls._flatten_object_derivations(obj) + cls._inherit_populated_from(obj) + cls._coerce_pv_populated_from_to_list(obj) + cls._migrate_pv_sources_to_populated_from(obj) + + return messages + + @classmethod + def _shape_normalize(cls, obj: dict[str, Any]) -> None: + """Canonicalize the structural shape of every derivation section. + + Expands compact-key list forms in every derivation-section location + (including OD-inner class_derivations and PV-level lists, which + ``ReferenceValidator`` doesn't canonicalize correctly), then runs + ``ReferenceValidator`` for dict↔list canonicalization and name + injection. No field semantics are mutated. + """ + cls._pre_shape_expand_compact_keys(obj) normalizer = ReferenceValidator(package_schemaview("linkml_map.datamodel.transformer_model")) normalizer.expand_all = True normalized = cls.normalize_transform_spec(obj, normalizer) obj.clear() obj.update(normalized) + @classmethod + def _pre_shape_expand_compact_keys(cls, obj: dict[str, Any]) -> None: + """Recursively expand compact-key list forms before ReferenceValidator. + + Compact-key list form (``[{Name: {body}}]``) is a linkml-map convention, + not a documented LinkML collection form. ``ReferenceValidator`` + therefore doesn't canonicalize it consistently — list-typed fields + (top-level class_derivations) are left as-is, and dict-typed fields + (permissible_value_derivations) get mangled to ``{None: ...}``. This + pre-pass converts compact-key items to explicit-name form everywhere + the linkml-map schema accepts a derivation section, so RV sees only + LinkML-canonical input. + + See https://github.com/linkml/linkml/issues/3529 for the upstream + behavior. The local pre-expansion makes us independent of how (or + whether) that gets resolved. + """ + cls._preprocess_class_derivations(obj) + cls._preprocess_enum_derivations(obj) + for cd in _iter_values_or_list(obj.get("class_derivations")): + cls._expand_compact_keys_in_class_deriv(cd) + + @classmethod + def _expand_compact_keys_in_class_deriv(cls, cd: dict[str, Any]) -> None: + """Expand compact-key list forms in a class derivation, recursively.""" + for sd in _iter_values_or_list(cd.get("slot_derivations")): + # Slot's own nested class_derivations (list-with-compact-keys form) + nested_cds = sd.get("class_derivations") + if isinstance(nested_cds, list): + cls._expand_compact_keys(nested_cds) + for ncd in _iter_values_or_list(nested_cds): + cls._expand_compact_keys_in_class_deriv(ncd) + # OD-inner class_derivations (deprecated form; flattened in MIGRATE) + ods = sd.get("object_derivations") + if isinstance(ods, list): + for od in ods: + if not isinstance(od, dict): + continue + od_cds = od.get("class_derivations") + if isinstance(od_cds, list): + cls._expand_compact_keys(od_cds) + for ncd in _iter_values_or_list(od_cds): + cls._expand_compact_keys_in_class_deriv(ncd) + + @staticmethod + def _preprocess_enum_derivations(obj: dict[str, Any]) -> None: + """Pre-process top-level enum_derivations and their PV sections. + + Handles two compact-key cases ReferenceValidator doesn't: + list-form enum_derivations with ``{Name: {...}}`` items, and + list-form permissible_value_derivations with the same shape (which + RV otherwise mangles into ``{None: ...}``). + """ + eds = obj.get("enum_derivations") + if isinstance(eds, dict): + for k, v in eds.items(): + if v is None: + eds[k] = {} + elif isinstance(eds, list): + Transformer._expand_compact_keys(eds) + for ed in _iter_values_or_list(obj.get("enum_derivations")): + pvs = ed.get("permissible_value_derivations") + if isinstance(pvs, list): + Transformer._expand_compact_keys(pvs) + + @classmethod + def _flatten_object_derivations(cls, obj: dict[str, Any]) -> None: + """Flatten ``object_derivations`` into ``class_derivations`` on every slot. + + Conflicting specs (both ``object_derivations`` and ``class_derivations`` + set) are caught upstream by the SCAN phase as ``severity="error"``, + so this method assumes a non-conflicting input. + """ + for cd in _iter_values_or_list(obj.get("class_derivations")): + cls._flatten_ods_in_class_deriv(cd) + + @classmethod + def _flatten_ods_in_class_deriv(cls, cd: dict[str, Any]) -> None: + """Recursively walk a class derivation, flattening OD on each slot.""" + for sd in _iter_values_or_list(cd.get("slot_derivations")): + ods = sd.get("object_derivations") + if ods: + flattened: list[dict[str, Any]] = [] + for od in ods: + if not isinstance(od, dict): + continue + od_cd = od.get("class_derivations", {}) + if isinstance(od_cd, dict): + for name, body in od_cd.items(): + entry = body if isinstance(body, dict) else {} + entry.setdefault("name", name) + flattened.append(entry) + elif isinstance(od_cd, list): + flattened.extend(od_cd) + sd["class_derivations"] = flattened + del sd["object_derivations"] + for ncd in _iter_values_or_list(sd.get("class_derivations")): + cls._flatten_ods_in_class_deriv(ncd) + + @classmethod + def _inherit_populated_from(cls, obj: dict[str, Any]) -> None: + """Propagate ``populated_from`` from parent class down to nested CDs. + + Walks each class_derivation's slot_derivations.class_derivations; if + a nested CD doesn't set ``populated_from``, it inherits from the + outer class's value. Recurses through arbitrarily deep nesting. + """ + for cd in _iter_values_or_list(obj.get("class_derivations")): + cls._inherit_pf_in_slots(cd.get("slot_derivations"), cd.get("populated_from")) + + @classmethod + def _inherit_pf_in_slots(cls, slots: Any, parent_pf: str | None) -> None: + """Recursively inherit populated_from into nested class_derivations.""" + for sd in _iter_values_or_list(slots): + for ncd in _iter_values_or_list(sd.get("class_derivations")): + if not ncd.get("populated_from") and parent_pf: + ncd["populated_from"] = parent_pf + cls._inherit_pf_in_slots(ncd.get("slot_derivations"), ncd.get("populated_from")) + + @staticmethod + def _iter_pv_derivations(obj: dict[str, Any]) -> Iterator[dict[str, Any]]: + """Yield each PermissibleValueDerivation dict in ``obj`` for in-place mutation.""" + eds = obj.get("enum_derivations") + if isinstance(eds, dict): + ed_iter: Any = eds.values() + elif isinstance(eds, list): + ed_iter = eds + else: + return + for ed in ed_iter: + if not isinstance(ed, dict): + continue + pvds = ed.get("permissible_value_derivations") + if isinstance(pvds, dict): + pvd_iter: Any = pvds.values() + elif isinstance(pvds, list): + pvd_iter = pvds + else: + continue + for pvd in pvd_iter: + if isinstance(pvd, dict): + yield pvd + + @classmethod + def _coerce_pv_populated_from_to_list(cls, obj: dict[str, Any]) -> None: + """Coerce ``populated_from`` to a list on each PV deriv. + + Input shapes handled: + + * Scalar string: wrapped to a one-element list (user convenience that + pydantic would otherwise reject for a multivalued field). + * Explicit ``None`` or a list whose every element is ``None`` + (``populated_from:`` with no YAML value, possibly already wrapped to + ``[None]`` by ``ReferenceValidator``): the key is removed so pydantic + uses the ``default_factory=list`` default — treats "explicitly set to + nothing" as "unset". Empty strings and other falsy-but-not-None + values are kept (a user may legitimately map to the empty-string PV). + * List: left as-is. + """ + for pvd in cls._iter_pv_derivations(obj): + if "populated_from" not in pvd: + continue + pf = pvd["populated_from"] + if pf is None or (isinstance(pf, list) and all(x is None for x in pf)): + del pvd["populated_from"] + elif isinstance(pf, str): + pvd["populated_from"] = [pf] + + @classmethod + def _migrate_pv_sources_to_populated_from(cls, obj: dict[str, Any]) -> None: + """Move deprecated ``sources`` into ``populated_from`` on PV derivs. + + Applied after the pre-normalize scan has already detected and reported + any ``sources`` usage and any ``sources`` + ``populated_from`` conflicts. + For each PV deriv with ``sources`` set, copies into ``populated_from`` + (if not already set) and clears the ``sources`` key. A scalar string + is wrapped to a one-element list rather than exploded into characters + (defends against ``sources: "light_red"`` typos). Post-condition: no + PV has ``sources`` set. The runtime can therefore rely on + ``populated_from`` as the single source of truth and ignore ``sources``. + """ + for pvd in cls._iter_pv_derivations(obj): + srcs = pvd.pop("sources", None) + if srcs and not pvd.get("populated_from"): + pvd["populated_from"] = [srcs] if isinstance(srcs, str) else list(srcs) + @staticmethod def _expand_compact_keys(items: list[dict[str, Any]]) -> None: """Expand YAML compact-key dicts in a list in place. @@ -305,7 +553,7 @@ def create_transformer_specification(self, obj: dict[str, Any]) -> None: :param path: :return: """ - self._normalize_spec_dict(obj) + self.spec_messages = self._normalize_spec_dict(obj) self.specification = TransformationSpecification(**obj) def _apply_source_schema_patches(self) -> None: diff --git a/src/linkml_map/validator.py b/src/linkml_map/validator.py index d559da4a..2a1f7c45 100644 --- a/src/linkml_map/validator.py +++ b/src/linkml_map/validator.py @@ -143,25 +143,43 @@ def normalize_spec_dict(obj: dict[str, Any]) -> dict[str, Any]: Applies the same full normalization pipeline that the transformer engine uses (compact-key expansion, dict→list conversion via ReferenceValidator, - nested ObjectDerivation fixup), plus YAML type coercion for fields that - are commonly mis-parsed. + nested ObjectDerivation fixup, deprecated-field migration), plus YAML + type coercion for fields that are commonly mis-parsed. If normalization itself fails (e.g. due to malformed data), the original dict is returned with only YAML type coercion applied so that JSON Schema validation can still report structural errors. + Use :func:`_normalize_and_collect_messages` instead when you also need + the pre-normalize scan's findings (deprecation warnings, conflict errors). + :param obj: Raw YAML-loaded dict. :returns: A new dict suitable for JSON Schema validation. """ + obj, _ = _normalize_and_collect_messages(obj) + return obj + + +def _normalize_and_collect_messages( + obj: dict[str, Any], +) -> tuple[dict[str, Any], list[ValidationMessage]]: + """Normalize and return both the dict and the pre-normalize scan messages. + + Internal helper used by :func:`validate_spec`. Calls the transformer's + normalization in silent mode so scan findings are returned as + ``ValidationMessage`` records rather than emitted as Python warnings or + raised as ``SpecificationError``. + """ obj = dict(obj) + messages: list[ValidationMessage] = [] try: - Transformer._normalize_spec_dict(obj) + messages = Transformer._normalize_spec_dict(obj, silent=True) or [] except Exception: logger.debug("Normalization failed; falling back to raw dict", exc_info=True) for field in _COERCE_FIELDS: if field in obj: obj[field] = _coerce_yaml_types(obj[field]) - return obj + return obj, messages def _validate_structural( @@ -324,9 +342,9 @@ def _resolve_schema_path( def _iter_derivation_dicts(raw: Any) -> list[dict[str, Any]]: """Normalize a derivations section (dict or list) to a list of dicts. - After ``normalize_spec_dict``, derivation sections may be either a list of - dicts (with ``name`` keys) or a dict keyed by name. This helper - normalizes to a consistent list-of-dicts form for iteration. + Assumes the SHAPE phase of ``Transformer._normalize_spec_dict`` has + already canonicalized compact-key list items, so callers only need to + handle dict-keyed and explicit-name list forms here. """ if isinstance(raw, list): return [item for item in raw if isinstance(item, dict)] @@ -341,25 +359,40 @@ def _iter_derivation_dicts(raw: Any) -> list[dict[str, Any]]: def check_deprecated_fields(data: dict[str, Any]) -> list[ValidationMessage]: - """Check a normalized spec dict for use of deprecated fields. + """Scan a spec dict for deprecated-field usage and ambiguous combinations. + + Runs in the SCAN phase of ``Transformer._normalize_spec_dict`` — after + SHAPE (which runs ``ReferenceValidator.normalize()`` and the local + compact-key pre-expansion) and before MIGRATE (which flattens + ``object_derivations``, inherits ``populated_from``, and rewrites PV + ``sources``). So the dict the scan sees is structurally canonical + (dict-keyed or explicit-name list, no compact-key items) but the + deprecated field values are still as the user wrote them. - Currently flags two deprecations: + Flags: * ``sources`` on ``ClassDerivation`` / ``SlotDerivation`` / ``EnumDerivation`` / ``PermissibleValueDerivation`` — replaced by - ``populated_from``. + ``populated_from``. Severity: warning, category: deprecated. * ``derived_from`` on ``SlotDerivation`` — ignored by the runtime - and removable. - - Findings are collapsed to one message per (deprecation, derivation - type) pair to keep output readable on large specs. - - Other deprecations (``object_derivations`` flattening) are surfaced - during normalization in ``Transformer._normalize_slot_class_derivations`` - and do not need to be re-emitted here. - - :param data: A **normalized** spec dict (post ``normalize_spec_dict``). - :returns: A list of warning messages with ``category="deprecated"``. + and removable. Severity: warning, category: deprecated. + * ``object_derivations`` on ``SlotDerivation`` — flattened into + ``class_derivations`` at load time. Severity: warning, category: + deprecated. + * ``populated_from`` **and** ``sources`` both set on the same + ``PermissibleValueDerivation`` — ambiguous, the user almost + certainly didn't mean both. Severity: error. + * ``object_derivations`` **and** ``class_derivations`` both set on + the same ``SlotDerivation`` — ambiguous. Severity: error. + + ``sources`` deprecation findings are collapsed to one message per + (deprecation, derivation type) pair to keep output readable on + large specs; per-entry messages are emitted for the other categories. + + :param data: A spec dict post-SHAPE, pre-MIGRATE. Derivation sections + are dict-keyed or explicit-name list (no compact-key items). + :returns: A list of validation messages — warnings for deprecations, + errors for ambiguous combinations. """ messages: list[ValidationMessage] = [] sources_counts: dict[str, list[str]] = { @@ -369,6 +402,7 @@ def check_deprecated_fields(data: dict[str, Any]) -> list[ValidationMessage]: "PermissibleValueDerivation": [], } derived_from_names: list[str] = [] + object_derivation_names: list[str] = [] for cd in _iter_derivation_dicts(data.get("class_derivations")): cd_name = cd.get("name", "") @@ -380,6 +414,20 @@ def check_deprecated_fields(data: dict[str, Any]) -> list[ValidationMessage]: sources_counts["SlotDerivation"].append(sd_name) if sd.get("derived_from"): derived_from_names.append(sd_name) + if sd.get("object_derivations"): + object_derivation_names.append(sd_name) + if sd.get("class_derivations"): + messages.append( + ValidationMessage( + severity="error", + path=f"$.class_derivations[{cd_name}].slot_derivations[{sd_name}]", + message=( + f"SlotDerivation '{sd_name}' sets both 'object_derivations' " + f"and 'class_derivations'. Remove 'object_derivations' and " + f"use 'class_derivations' only." + ), + ) + ) for ed in _iter_derivation_dicts(data.get("enum_derivations")): ed_name = ed.get("name", "") @@ -389,6 +437,19 @@ def check_deprecated_fields(data: dict[str, Any]) -> list[ValidationMessage]: pvd_name = pvd.get("name", "") if pvd.get("sources"): sources_counts["PermissibleValueDerivation"].append(pvd_name) + if pvd.get("populated_from"): + messages.append( + ValidationMessage( + severity="error", + path=(f"$.enum_derivations[{ed_name}].permissible_value_derivations[{pvd_name}]"), + message=( + f"PermissibleValueDerivation '{pvd_name}' sets both " + f"'populated_from' and 'sources'. These are alternative " + f"spellings of the same field; set only 'populated_from' " + f"(which now accepts a list)." + ), + ) + ) for deriv_type, names in sources_counts.items(): if names: @@ -407,6 +468,24 @@ def check_deprecated_fields(data: dict[str, Any]) -> list[ValidationMessage]: ) ) + if object_derivation_names: + preview = ", ".join(object_derivation_names[:5]) + suffix = f" (and {len(object_derivation_names) - 5} more)" if len(object_derivation_names) > 5 else "" + messages.append( + ValidationMessage( + severity="warning", + category="deprecated", + path="$.SlotDerivation", + message=( + f"{len(object_derivation_names)} SlotDerivation(s) use 'object_derivations', " + f"which is deprecated and flattened into 'class_derivations' at load time: " + f"{preview}{suffix}. Use list-based 'class_derivations' instead. " + f"'object_derivations' will be removed in a future version. " + f"See https://github.com/linkml/linkml-map/issues/112" + ), + ) + ) + if derived_from_names: preview = ", ".join(derived_from_names[:5]) suffix = f" (and {len(derived_from_names) - 5} more)" if len(derived_from_names) > 5 else "" @@ -722,10 +801,10 @@ def validate_spec( >>> validate_spec(data) [] """ - normalized = normalize_spec_dict(data) + normalized, scan_messages = _normalize_and_collect_messages(data) messages = _validate_structural(normalized, schema_path=schema_path) if not messages: - messages.extend(check_deprecated_fields(normalized)) + messages.extend(scan_messages) messages.extend( validate_spec_semantics( normalized, diff --git a/tests/test_deprecations.py b/tests/test_deprecations.py index 8d609909..fd4aedc5 100644 --- a/tests/test_deprecations.py +++ b/tests/test_deprecations.py @@ -43,7 +43,7 @@ def test_derived_from_emits_deprecation_warning(): - """Using derived_from on a SlotDerivation emits a DeprecationWarning.""" + """Using derived_from on a SlotDerivation emits a DeprecationWarning at load time.""" tr = ObjectTransformer() tr.source_schemaview = SchemaView(SOURCE_SCHEMA) tr.target_schemaview = SchemaView(TARGET_SCHEMA) @@ -60,17 +60,14 @@ def test_derived_from_emits_deprecation_warning(): }, }, } - tr.create_transformer_specification(spec) - with warnings.catch_warnings(record=True) as caught: warnings.simplefilter("always", DeprecationWarning) - # Trigger induce_missing_values via derived_specification - _ = tr.derived_specification + tr.create_transformer_specification(spec) deprecation_warnings = [w for w in caught if issubclass(w.category, DeprecationWarning)] - assert len(deprecation_warnings) == 1 - assert "derived_from" in str(deprecation_warnings[0].message) - assert "full_name" in str(deprecation_warnings[0].message) + derived_from_warnings = [w for w in deprecation_warnings if "derived_from" in str(w.message)] + assert len(derived_from_warnings) == 1 + assert "full_name" in str(derived_from_warnings[0].message) def test_derived_from_does_not_affect_transformation(): @@ -105,7 +102,7 @@ def test_derived_from_does_not_affect_transformation(): def test_sources_on_pv_emits_deprecation_warning(): - """Using sources on a PermissibleValueDerivation emits a DeprecationWarning.""" + """Using sources on a PermissibleValueDerivation emits a DeprecationWarning at load time.""" tr = ObjectTransformer() tr.source_schemaview = SchemaView(SOURCE_SCHEMA) tr.target_schemaview = SchemaView(TARGET_SCHEMA) @@ -128,11 +125,9 @@ def test_sources_on_pv_emits_deprecation_warning(): }, }, } - tr.create_transformer_specification(spec) - with warnings.catch_warnings(record=True) as caught: warnings.simplefilter("always", DeprecationWarning) - _ = tr.derived_specification + tr.create_transformer_specification(spec) deprecation_warnings = [w for w in caught if issubclass(w.category, DeprecationWarning)] sources_warnings = [w for w in deprecation_warnings if "sources" in str(w.message)] @@ -141,7 +136,7 @@ def test_sources_on_pv_emits_deprecation_warning(): def test_sources_on_slot_emits_deprecation_warning(): - """Using sources on a SlotDerivation emits a DeprecationWarning.""" + """Using sources on a SlotDerivation emits a DeprecationWarning at load time.""" tr = ObjectTransformer() tr.source_schemaview = SchemaView(SOURCE_SCHEMA) tr.target_schemaview = SchemaView(TARGET_SCHEMA) @@ -157,11 +152,9 @@ def test_sources_on_slot_emits_deprecation_warning(): }, }, } - tr.create_transformer_specification(spec) - with warnings.catch_warnings(record=True) as caught: warnings.simplefilter("always", DeprecationWarning) - _ = tr.derived_specification + tr.create_transformer_specification(spec) deprecation_warnings = [w for w in caught if issubclass(w.category, DeprecationWarning)] sources_warnings = [w for w in deprecation_warnings if "sources" in str(w.message)] @@ -246,11 +239,9 @@ def test_no_warning_without_deprecated_fields(): }, }, } - tr.create_transformer_specification(spec) - with warnings.catch_warnings(record=True) as caught: warnings.simplefilter("always", DeprecationWarning) - _ = tr.derived_specification + tr.create_transformer_specification(spec) deprecation_warnings = [w for w in caught if issubclass(w.category, DeprecationWarning)] assert len(deprecation_warnings) == 0 diff --git a/tests/test_transformer/test_multi_enum.py b/tests/test_transformer/test_multi_enum.py index 6e6703a1..9da6e7cb 100644 --- a/tests/test_transformer/test_multi_enum.py +++ b/tests/test_transformer/test_multi_enum.py @@ -291,6 +291,54 @@ def test_null_color_stays_none(): assert result["color"] is None +def test_pv_populated_from_list_form_maps_multiple_sources(): + """populated_from as a list maps any listed source value to the target PV.""" + tr = ObjectTransformer() + tr.source_schemaview = SchemaView(SOURCE_SCHEMA) + tr.target_schemaview = SchemaView(TARGET_SCHEMA) + spec = copy.deepcopy(TRANSFORM_SPEC) + # Replace SimplePrimary's deprecated sources with list-form populated_from. + spec["enum_derivations"]["SimplePrimary"]["permissible_value_derivations"] = { + "red": {"populated_from": ["light_red", "dark_red"]}, + "green": {"populated_from": ["light_green", "dark_green"]}, + "blue": {"populated_from": ["light_blue", "dark_blue"]}, + } + tr.create_transformer_specification(spec) + + for src, expected in [("light_red", "red"), ("dark_red", "red"), ("dark_blue", "blue")]: + result = tr.map_object({"id": "l1", "color": src}, source_type="Light") + assert result["color"] == expected + + +def test_pv_populated_from_scalar_form_still_works(): + """Scalar populated_from is wrapped to a one-element list at load time.""" + tr = ObjectTransformer() + tr.source_schemaview = SchemaView(SOURCE_SCHEMA) + tr.target_schemaview = SchemaView(TARGET_SCHEMA) + spec = copy.deepcopy(TRANSFORM_SPEC) + spec["enum_derivations"]["MissingnessTarget"]["permissible_value_derivations"] = { + "na": {"populated_from": "not_available"}, + "oth": {"populated_from": "other"}, + } + tr.create_transformer_specification(spec) + # Pydantic representation is always a list after normalization. + pvds = tr.specification.enum_derivations["MissingnessTarget"].permissible_value_derivations + assert pvds["na"].populated_from == ["not_available"] + + result = tr.map_object({"id": "l1", "color": "not_available"}, source_type="Light") + assert result["color"] == "na" + + +def test_pv_sources_only_is_migrated_to_populated_from(): + """sources-only PV derivs are migrated so the runtime sees only populated_from.""" + tr = _make_transformer() + # SimplePrimary in TRANSFORM_SPEC uses sources: [...] — confirm migration cleared sources. + pvds = tr.specification.enum_derivations["SimplePrimary"].permissible_value_derivations + assert pvds["red"].populated_from == ["light_red", "dark_red"] + # sources is cleared after migration — the runtime relies on populated_from alone. + assert not pvds["red"].sources + + def test_explicit_range_any_with_any_of(): """Slots with explicit range: Any plus any_of enum ranges are mapped correctly.""" schema = SOURCE_SCHEMA.replace( diff --git a/tests/test_upstream_canaries.py b/tests/test_upstream_canaries.py new file mode 100644 index 00000000..c15f1931 --- /dev/null +++ b/tests/test_upstream_canaries.py @@ -0,0 +1,49 @@ +"""Canary tests for upstream linkml/linkml-runtime bugs we work around. + +Each test asserts the **correct** post-fix behavior and is marked +``xfail(strict=True)``. While the bug persists upstream, the test +xfails. When the upstream fix lands, the test starts passing and +``strict=True`` flips XPASS into a hard failure — a loud signal to +remove the local workaround. +""" + +import pytest + + +@pytest.mark.xfail( + strict=True, + reason=( + "linkml/linkml#3529: ReferenceValidator.normalize() mangles compact-key " + "list input to inlined-dict fields as {None: ...}. The proposed upstream " + "fix hoists unambiguous single-key items (and errors on degenerate cases). " + "Our test case is unambiguous so it will hoist. When this passes, remove " + "the PV-level branch from Transformer._pre_shape_expand_compact_keys." + ), +) +def test_reference_validator_normalizes_compact_key_pv_list(): + """Canary: compact-key list for inlined-dict PV field normalizes correctly. + + See: https://github.com/linkml/linkml/issues/3529 + """ + from linkml_runtime.processing.referencevalidator import ReferenceValidator + from linkml_runtime.utils.introspection import package_schemaview + + normalizer = ReferenceValidator(package_schemaview("linkml_map.datamodel.transformer_model")) + normalizer.expand_all = True + + obj = { + "enum_derivations": { + "Target": { + "populated_from": "Source", + "permissible_value_derivations": [ + {"red": {"sources": ["a"]}}, + ], + }, + }, + } + result = normalizer.normalize(obj) + pvs = result["enum_derivations"]["Target"]["permissible_value_derivations"] + + # Expected (post-fix): compact-key name is hoisted as the dict key. + assert None not in pvs, f"upstream still produces None-keyed entries: {pvs}" + assert "red" in pvs, f"upstream did not hoist compact-key name: {pvs}" diff --git a/tests/test_validator.py b/tests/test_validator.py index 21358dcd..de17d1c8 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -174,6 +174,127 @@ def test_validate_detects_bad_slot_derivation_type(): assert len(_errors(msgs)) > 0 +# --------------------------------------------------------------------------- +# PermissibleValueDerivation populated_from / sources interaction +# --------------------------------------------------------------------------- + + +def test_pv_populated_from_list_validates(): + """List-form populated_from on a PV deriv is structurally valid.""" + spec = { + "enum_derivations": { + "Target": { + "populated_from": "Source", + "permissible_value_derivations": { + "red": {"populated_from": ["light_red", "dark_red"]}, + }, + }, + }, + } + msgs = validate_spec(spec) + assert _errors(msgs) == [] + + +def test_pv_populated_from_scalar_validates(): + """Scalar populated_from on a PV deriv is accepted (wrapped to list).""" + spec = { + "enum_derivations": { + "Target": { + "populated_from": "Source", + "permissible_value_derivations": { + "red": {"populated_from": "light_red"}, + }, + }, + }, + } + msgs = validate_spec(spec) + assert _errors(msgs) == [] + + +def test_pv_sources_only_is_deprecation_not_conflict(): + """Sources-only is a deprecation warning, not a conflict error.""" + spec = { + "enum_derivations": { + "Target": { + "populated_from": "Source", + "permissible_value_derivations": { + "red": {"sources": ["light_red", "dark_red"]}, + }, + }, + }, + } + msgs = validate_spec(spec) + assert _errors(msgs) == [] + warnings = _warnings(msgs) + assert any("sources" in m.message and "PermissibleValueDerivation" in m.path for m in warnings) + + +def test_pv_sources_and_populated_from_both_set_errors(): + """Setting both sources and populated_from on a PV deriv is an error.""" + spec = { + "enum_derivations": { + "Target": { + "populated_from": "Source", + "permissible_value_derivations": { + "red": { + "populated_from": ["light_red"], + "sources": ["dark_red"], + }, + }, + }, + }, + } + msgs = validate_spec(spec) + errors = _errors(msgs) + assert any("both 'populated_from' and 'sources'" in m.message for m in errors) + assert any("'red'" in m.message for m in errors) + + +def test_pv_sources_in_compact_key_list_form_detected(): + """Compact-key list-form PV derivs (`[{name: {sources: [...]}}]`) are scanned. + + The SHAPE phase expands compact-key list items before the SCAN runs, so + the deprecation warning fires end-to-end through ``validate_spec``. + """ + spec = { + "enum_derivations": { + "Target": { + "populated_from": "Source", + "permissible_value_derivations": [ + {"red": {"sources": ["light_red", "dark_red"]}}, + ], + }, + }, + } + msgs = validate_spec(spec) + assert _errors(msgs) == [] + warnings = _warnings(msgs) + assert any("sources" in m.message and "PermissibleValueDerivation" in m.path for m in warnings), ( + f"Expected deprecation warning for compact-key list PV; got {warnings}" + ) + + +def test_pv_populated_from_explicit_none_is_stripped(): + """`populated_from: null` (explicit YAML None) is dropped during normalize.""" + raw = { + "enum_derivations": { + "Target": { + "populated_from": "Source", + "permissible_value_derivations": { + "red": {"populated_from": None}, + }, + }, + }, + } + normalized = normalize_spec_dict(raw) + # After normalize, the PV's populated_from key has been removed; pydantic + # will fill in the default factory ([]) when the model is built. + eds = normalized["enum_derivations"] + pvds = eds["Target"]["permissible_value_derivations"] + pvd = pvds["red"] if isinstance(pvds, dict) else next(iter(pvds)) + assert "populated_from" not in pvd + + # --------------------------------------------------------------------------- # File-level validation against real trans-specs (structural only) # ---------------------------------------------------------------------------