From 6783ec810d759cf2c14709bf7ce7c4f0af80204b Mon Sep 17 00:00:00 2001 From: amc-corey-cox <69321580+amc-corey-cox@users.noreply.github.com> Date: Mon, 18 May 2026 07:31:52 -0500 Subject: [PATCH 1/8] Allow list-form populated_from on PermissibleValueDerivation Extends populated_from on PermissibleValueDerivation to accept a list of source values, completing the deprecation of `sources` started in #194 (issue #193). Scope is limited to PV derivations; class/slot/enum list support is separate work. - Schema: populated_from is multivalued; scalar input is wrapped at load. - Runtime: deprecated `sources` is migrated into `populated_from` so the enum-mapping loop has a single field to consult. `sources` is left in place after migration so the validator's deprecation warning still fires. - Validator: setting both `populated_from` and `sources` on the same PV now produces severity=error (validate-spec gates on it). - Architecture: the migration runs only at runtime entry points, so the validator sees un-migrated data and can distinguish "user wrote both" from "we migrated sources". - Updated schema_mapper, inverter, and the markdown template to handle populated_from as a list. Inverter raises NonInvertibleSpecificationError when a PV deriv has multiple source values. --- src/linkml_map/compiler/templates/markdown.j2 | 2 +- src/linkml_map/datamodel/transformer_model.py | 9 +-- .../datamodel/transformer_model.yaml | 11 ++- src/linkml_map/inference/inverter.py | 13 +++- src/linkml_map/inference/schema_mapper.py | 13 ++-- .../transformer/object_transformer.py | 4 +- src/linkml_map/transformer/transformer.py | 62 +++++++++++++++ src/linkml_map/validator.py | 36 +++++++++ tests/test_transformer/test_multi_enum.py | 48 ++++++++++++ tests/test_validator.py | 76 +++++++++++++++++++ 10 files changed, 251 insertions(+), 23 deletions(-) diff --git a/src/linkml_map/compiler/templates/markdown.j2 b/src/linkml_map/compiler/templates/markdown.j2 index c3be8d6e..344a6dfa 100644 --- a/src/linkml_map/compiler/templates/markdown.j2 +++ b/src/linkml_map/compiler/templates/markdown.j2 @@ -20,6 +20,6 @@ | Target | Source | Info | | ------ | ------ | ---- | {%- for pvd in ed.permissible_value_derivations.values() %} -| {{ pvd.name }} | {{ pvd.populated_from }} | . | +| {{ pvd.name }} | {{ pvd.populated_from | join(", ") }} | . | {%- endfor -%} {% endfor %} diff --git a/src/linkml_map/datamodel/transformer_model.py b/src/linkml_map/datamodel/transformer_model.py index db1ee3aa..7ddc2b49 100644 --- a/src/linkml_map/datamodel/transformer_model.py +++ b/src/linkml_map/datamodel/transformer_model.py @@ -500,14 +500,13 @@ class PermissibleValueDerivation(ElementDerivation): expr: Optional[str] = Field(default=None, json_schema_extra = { "linkml_meta": {'domain_of': ['SlotDerivation', 'EnumDerivation', 'PermissibleValueDerivation']} }) - populated_from: Optional[str] = Field(default=None, description="""Source permissible value that maps to this target permissible value.""", json_schema_extra = { "linkml_meta": {'domain_of': ['ClassDerivation', + populated_from: Optional[list[str]] = Field(default_factory=list, description="""Source permissible value(s) that map to this target permissible value. Accepts a single value or a list; scalar input is normalized to a one-element list at load time.""", json_schema_extra = { "linkml_meta": {'domain_of': ['ClassDerivation', 'SlotDerivation', 'EnumDerivation', 'PermissibleValueDerivation']} }) - sources: Optional[list[str]] = Field(default_factory=list, description="""Deprecated. Use populated_from instead.""", json_schema_extra = { "linkml_meta": {'deprecated': 'Deprecated. Use populated_from instead. See ' - 'https://github.com/linkml/linkml-map/issues/193 for planned ' - 'list support in populated_from. Will be removed in a future ' - 'version.', + sources: Optional[list[str]] = Field(default_factory=list, description="""Deprecated. Use populated_from instead.""", json_schema_extra = { "linkml_meta": {'deprecated': 'Deprecated. Use populated_from instead, which now accepts a ' + 'list. See https://github.com/linkml/linkml-map/issues/193. ' + 'Will be removed in a future version.', 'domain_of': ['ClassDerivation', 'SlotDerivation', 'EnumDerivation', diff --git a/src/linkml_map/datamodel/transformer_model.yaml b/src/linkml_map/datamodel/transformer_model.yaml index 948816af..c012bf8e 100644 --- a/src/linkml_map/datamodel/transformer_model.yaml +++ b/src/linkml_map/datamodel/transformer_model.yaml @@ -486,15 +486,18 @@ classes: range: string populated_from: range: string + multivalued: true description: >- - Source permissible value that maps to this target permissible value. + Source permissible value(s) that map to this target permissible value. + Accepts a single value or a list; scalar input is normalized to a + one-element list at load time. sources: range: string multivalued: true deprecated: >- - Deprecated. Use populated_from instead. - See https://github.com/linkml/linkml-map/issues/193 for planned - list support in populated_from. Will be removed in a future version. + Deprecated. Use populated_from instead, which now accepts a list. + See https://github.com/linkml/linkml-map/issues/193. Will be removed + in a future version. description: >- Deprecated. Use populated_from instead. hide: diff --git a/src/linkml_map/inference/inverter.py b/src/linkml_map/inference/inverter.py index e7bfad49..d39196f5 100644 --- a/src/linkml_map/inference/inverter.py +++ b/src/linkml_map/inference/inverter.py @@ -93,9 +93,18 @@ def invert_enum_derivation(self, ed: EnumDerivation, spec: TransformationSpecifi msg = "TODO: invert enum derivation with expression" raise NonInvertibleSpecificationError(msg) for pv_deriv in ed.permissible_value_derivations.values(): + sources = pv_deriv.populated_from or [pv_deriv.name] + if len(sources) > 1: + msg = ( + f"Cannot invert PermissibleValueDerivation '{pv_deriv.name}': " + f"populated_from has multiple source values {sources}, " + f"which is not a one-to-one mapping." + ) + raise NonInvertibleSpecificationError(msg) + inverted_name = sources[0] inverted_pv_deriv = PermissibleValueDerivation( - name=pv_deriv.populated_from if pv_deriv.populated_from else pv_deriv.name, - populated_from=pv_deriv.name, + name=inverted_name, + populated_from=[pv_deriv.name], ) inverted_ed.permissible_value_derivations[inverted_pv_deriv.name] = inverted_pv_deriv return inverted_ed diff --git a/src/linkml_map/inference/schema_mapper.py b/src/linkml_map/inference/schema_mapper.py index 1686109b..fe0c822f 100644 --- a/src/linkml_map/inference/schema_mapper.py +++ b/src/linkml_map/inference/schema_mapper.py @@ -311,16 +311,13 @@ def _derive_enum(self, enum_derivation: EnumDerivation) -> EnumDefinition: target_enum.attributes = {} target_enum.slot_usage = {} for pv_derivation in enum_derivation.permissible_value_derivations.values(): - if pv_derivation.populated_from: - pv = PermissibleValue(text=pv_derivation.populated_from) - target_enum.permissible_values[pv.text] = pv - elif pv_derivation.sources: - for source in pv_derivation.sources: - pv = PermissibleValue(text=source) - target_enum.permissible_values[pv.text] = pv - else: + sources = pv_derivation.populated_from or pv_derivation.sources + if not sources: msg = f"Missing populated_from or sources for {pv_derivation}" raise ValueError(msg) + for source in sources: + pv = PermissibleValue(text=source) + target_enum.permissible_values[pv.text] = pv if enum_derivation.mirror_source: for pv in source_enum.permissible_values.values(): if pv.text not in target_enum.permissible_values: diff --git a/src/linkml_map/transformer/object_transformer.py b/src/linkml_map/transformer/object_transformer.py index 665ecc6a..68fba55f 100644 --- a/src/linkml_map/transformer/object_transformer.py +++ b/src/linkml_map/transformer/object_transformer.py @@ -1164,9 +1164,7 @@ def transform_enum(self, source_value: str, enum_names: list[str], source_obj: A if v is not None: return v for pv_deriv in enum_deriv.permissible_value_derivations.values(): - if source_value == pv_deriv.populated_from: - return pv_deriv.name - if source_value in pv_deriv.sources: + if pv_deriv.populated_from and source_value in pv_deriv.populated_from: return pv_deriv.name if enum_deriv.mirror_source: return str(source_value) diff --git a/src/linkml_map/transformer/transformer.py b/src/linkml_map/transformer/transformer.py index 307d0227..861a4b45 100644 --- a/src/linkml_map/transformer/transformer.py +++ b/src/linkml_map/transformer/transformer.py @@ -3,6 +3,7 @@ import logging import warnings from abc import ABC +from collections.abc import Iterator from copy import deepcopy from dataclasses import dataclass, field from pathlib import Path @@ -136,6 +137,7 @@ def load_transformer_specification(self, path: str | Path) -> None: with open(path) as f: obj = yaml.safe_load(f) self._normalize_spec_dict(obj) + self._migrate_pv_sources_to_populated_from(obj) self.specification = TransformationSpecification(**obj) def load_transformer_specifications(self, paths: tuple[str | Path, ...]) -> None: @@ -152,6 +154,7 @@ def load_transformer_specifications(self, paths: tuple[str | Path, ...]) -> None obj = load_and_merge_specs(paths) self._normalize_spec_dict(obj) + self._migrate_pv_sources_to_populated_from(obj) self.specification = TransformationSpecification(**obj) @classmethod @@ -254,6 +257,11 @@ def _normalize_spec_dict(cls, obj: dict[str, Any]) -> None: and nested ObjectDerivation fixup into a single entry point. Mutates ``obj`` by replacing its contents with the normalized result. + Structural normalization only. The runtime additionally migrates + deprecated PV ``sources`` into ``populated_from`` via + :meth:`_migrate_pv_sources_to_populated_from`; that step is kept + separate so the validator can still distinguish user intent. + :param obj: Raw specification dict (e.g. from YAML or user code). """ cls._preprocess_class_derivations(obj) @@ -262,6 +270,59 @@ def _normalize_spec_dict(cls, obj: dict[str, Any]) -> None: normalized = cls.normalize_transform_spec(obj, normalizer) obj.clear() obj.update(normalized) + cls._coerce_pv_populated_from_to_list(obj) + + @staticmethod + def _iter_pv_derivations(obj: dict[str, Any]) -> Iterator[dict[str, Any]]: + """Yield each PermissibleValueDerivation dict in ``obj`` for in-place mutation.""" + eds = obj.get("enum_derivations") + if isinstance(eds, dict): + ed_iter: Any = eds.values() + elif isinstance(eds, list): + ed_iter = eds + else: + return + for ed in ed_iter: + if not isinstance(ed, dict): + continue + pvds = ed.get("permissible_value_derivations") + if isinstance(pvds, dict): + pvd_iter: Any = pvds.values() + elif isinstance(pvds, list): + pvd_iter = pvds + else: + continue + for pvd in pvd_iter: + if isinstance(pvd, dict): + yield pvd + + @classmethod + def _coerce_pv_populated_from_to_list(cls, obj: dict[str, Any]) -> None: + """Wrap scalar ``populated_from`` to a one-element list on each PV deriv. + + ``PermissibleValueDerivation.populated_from`` is declared multivalued; + scalar input is a user convenience that pydantic would otherwise reject. + """ + for pvd in cls._iter_pv_derivations(obj): + pf = pvd.get("populated_from") + if isinstance(pf, str): + pvd["populated_from"] = [pf] + + @classmethod + def _migrate_pv_sources_to_populated_from(cls, obj: dict[str, Any]) -> None: + """Copy deprecated ``sources`` into ``populated_from`` on PV derivs. + + Only applied when ``populated_from`` is empty/unset; ``sources`` is + left in place so the validator's deprecation warning still fires. + Entries with **both** fields populated are not modified; the validator + surfaces those as ``severity="error"``. Runtime-only — the validator + operates on the un-migrated dict so it can distinguish user intent. + """ + for pvd in cls._iter_pv_derivations(obj): + pf = pvd.get("populated_from") + srcs = pvd.get("sources") + if not pf and srcs: + pvd["populated_from"] = list(srcs) @staticmethod def _expand_compact_keys(items: list[dict[str, Any]]) -> None: @@ -306,6 +367,7 @@ def create_transformer_specification(self, obj: dict[str, Any]) -> None: :return: """ self._normalize_spec_dict(obj) + self._migrate_pv_sources_to_populated_from(obj) self.specification = TransformationSpecification(**obj) def _apply_source_schema_patches(self) -> None: diff --git a/src/linkml_map/validator.py b/src/linkml_map/validator.py index d559da4a..563cdb30 100644 --- a/src/linkml_map/validator.py +++ b/src/linkml_map/validator.py @@ -340,6 +340,41 @@ def _iter_derivation_dicts(raw: Any) -> list[dict[str, Any]]: return [] +def check_pv_field_conflicts(data: dict[str, Any]) -> list[ValidationMessage]: + """Flag PermissibleValueDerivations that set both ``populated_from`` and ``sources``. + + The two fields are alternative ways to express the same mapping (sources is + the deprecated spelling). Setting both is ambiguous: ``populated_from`` is + the canonical winner at runtime, but the user almost certainly didn't mean + to write both. Emitted as ``severity="error"`` so ``validate-spec`` gates + on it. Operates on the un-migrated normalized dict; runtime entry points + migrate ``sources`` → ``populated_from`` *after* normalization, so this + check correctly distinguishes "user wrote both" from "we migrated sources". + + :param data: A normalized spec dict (post ``normalize_spec_dict``, + pre runtime migration). + :returns: A list of error messages (empty if no conflicts). + """ + messages: list[ValidationMessage] = [] + for ed in _iter_derivation_dicts(data.get("enum_derivations")): + ed_name = ed.get("name", "") + for pvd in _iter_derivation_dicts(ed.get("permissible_value_derivations")): + if pvd.get("populated_from") and pvd.get("sources"): + pvd_name = pvd.get("name", "") + messages.append( + ValidationMessage( + severity="error", + path=f"$.enum_derivations[{ed_name}].permissible_value_derivations[{pvd_name}]", + message=( + f"PermissibleValueDerivation '{pvd_name}' sets both 'populated_from' " + f"and 'sources'. These are alternative spellings of the same field; " + f"set only 'populated_from' (which now accepts a list)." + ), + ) + ) + return messages + + def check_deprecated_fields(data: dict[str, Any]) -> list[ValidationMessage]: """Check a normalized spec dict for use of deprecated fields. @@ -725,6 +760,7 @@ def validate_spec( normalized = normalize_spec_dict(data) messages = _validate_structural(normalized, schema_path=schema_path) if not messages: + messages.extend(check_pv_field_conflicts(normalized)) messages.extend(check_deprecated_fields(normalized)) messages.extend( validate_spec_semantics( diff --git a/tests/test_transformer/test_multi_enum.py b/tests/test_transformer/test_multi_enum.py index 6e6703a1..d289461f 100644 --- a/tests/test_transformer/test_multi_enum.py +++ b/tests/test_transformer/test_multi_enum.py @@ -291,6 +291,54 @@ def test_null_color_stays_none(): assert result["color"] is None +def test_pv_populated_from_list_form_maps_multiple_sources(): + """populated_from as a list maps any listed source value to the target PV.""" + tr = ObjectTransformer() + tr.source_schemaview = SchemaView(SOURCE_SCHEMA) + tr.target_schemaview = SchemaView(TARGET_SCHEMA) + spec = copy.deepcopy(TRANSFORM_SPEC) + # Replace SimplePrimary's deprecated sources with list-form populated_from. + spec["enum_derivations"]["SimplePrimary"]["permissible_value_derivations"] = { + "red": {"populated_from": ["light_red", "dark_red"]}, + "green": {"populated_from": ["light_green", "dark_green"]}, + "blue": {"populated_from": ["light_blue", "dark_blue"]}, + } + tr.create_transformer_specification(spec) + + for src, expected in [("light_red", "red"), ("dark_red", "red"), ("dark_blue", "blue")]: + result = tr.map_object({"id": "l1", "color": src}, source_type="Light") + assert result["color"] == expected + + +def test_pv_populated_from_scalar_form_still_works(): + """Scalar populated_from is wrapped to a one-element list at load time.""" + tr = ObjectTransformer() + tr.source_schemaview = SchemaView(SOURCE_SCHEMA) + tr.target_schemaview = SchemaView(TARGET_SCHEMA) + spec = copy.deepcopy(TRANSFORM_SPEC) + spec["enum_derivations"]["MissingnessTarget"]["permissible_value_derivations"] = { + "na": {"populated_from": "not_available"}, + "oth": {"populated_from": "other"}, + } + tr.create_transformer_specification(spec) + # Pydantic representation is always a list after normalization. + pvds = tr.specification.enum_derivations["MissingnessTarget"].permissible_value_derivations + assert pvds["na"].populated_from == ["not_available"] + + result = tr.map_object({"id": "l1", "color": "not_available"}, source_type="Light") + assert result["color"] == "na" + + +def test_pv_sources_only_is_migrated_to_populated_from(): + """sources-only PV derivs are migrated so the runtime uses populated_from.""" + tr = _make_transformer() + # SimplePrimary in TRANSFORM_SPEC uses sources: [...] — confirm migration. + pvds = tr.specification.enum_derivations["SimplePrimary"].permissible_value_derivations + assert pvds["red"].populated_from == ["light_red", "dark_red"] + # sources is preserved so the validator's deprecation warning still fires. + assert pvds["red"].sources == ["light_red", "dark_red"] + + def test_explicit_range_any_with_any_of(): """Slots with explicit range: Any plus any_of enum ranges are mapped correctly.""" schema = SOURCE_SCHEMA.replace( diff --git a/tests/test_validator.py b/tests/test_validator.py index 21358dcd..4402bee5 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -174,6 +174,82 @@ def test_validate_detects_bad_slot_derivation_type(): assert len(_errors(msgs)) > 0 +# --------------------------------------------------------------------------- +# PermissibleValueDerivation populated_from / sources interaction +# --------------------------------------------------------------------------- + + +def test_pv_populated_from_list_validates(): + """List-form populated_from on a PV deriv is structurally valid.""" + spec = { + "enum_derivations": { + "Target": { + "populated_from": "Source", + "permissible_value_derivations": { + "red": {"populated_from": ["light_red", "dark_red"]}, + }, + }, + }, + } + msgs = validate_spec(spec) + assert _errors(msgs) == [] + + +def test_pv_populated_from_scalar_validates(): + """Scalar populated_from on a PV deriv is accepted (wrapped to list).""" + spec = { + "enum_derivations": { + "Target": { + "populated_from": "Source", + "permissible_value_derivations": { + "red": {"populated_from": "light_red"}, + }, + }, + }, + } + msgs = validate_spec(spec) + assert _errors(msgs) == [] + + +def test_pv_sources_only_is_deprecation_not_conflict(): + """Sources-only is a deprecation warning, not a conflict error.""" + spec = { + "enum_derivations": { + "Target": { + "populated_from": "Source", + "permissible_value_derivations": { + "red": {"sources": ["light_red", "dark_red"]}, + }, + }, + }, + } + msgs = validate_spec(spec) + assert _errors(msgs) == [] + warnings = _warnings(msgs) + assert any("sources" in m.message and "PermissibleValueDerivation" in m.path for m in warnings) + + +def test_pv_sources_and_populated_from_both_set_errors(): + """Setting both sources and populated_from on a PV deriv is an error.""" + spec = { + "enum_derivations": { + "Target": { + "populated_from": "Source", + "permissible_value_derivations": { + "red": { + "populated_from": ["light_red"], + "sources": ["dark_red"], + }, + }, + }, + }, + } + msgs = validate_spec(spec) + errors = _errors(msgs) + assert any("both 'populated_from' and 'sources'" in m.message for m in errors) + assert any("'red'" in m.message for m in errors) + + # --------------------------------------------------------------------------- # File-level validation against real trans-specs (structural only) # --------------------------------------------------------------------------- From 631702979d6a6fc018842da171b3c67e0e679ae3 Mon Sep 17 00:00:00 2001 From: amc-corey-cox <69321580+amc-corey-cox@users.noreply.github.com> Date: Mon, 18 May 2026 09:32:36 -0500 Subject: [PATCH 2/8] Move deprecation scanning to pre-normalize; clear sources after migration All deprecation scanning now runs in one pre-normalize pass inside _normalize_spec_dict, so every spec-loading entry point gets it. PV migration clears sources; runtime asserts the invariant. _warn_deprecated_fields shim removed; pre-flight reads scan messages from the Transformer. object_derivations deprecation gains validate-spec surface. Also regenerated docs/schema/PermissibleValueDerivation.md (CI fix). --- docs/schema/PermissibleValueDerivation.md | 22 ++- src/linkml_map/cli/cli.py | 28 ++-- src/linkml_map/inference/inference.py | 23 --- src/linkml_map/transformer/errors.py | 12 ++ .../transformer/object_transformer.py | 5 + src/linkml_map/transformer/transformer.py | 128 +++++++++------ src/linkml_map/validator.py | 153 +++++++++++------- tests/test_deprecations.py | 29 ++-- tests/test_transformer/test_multi_enum.py | 8 +- 9 files changed, 238 insertions(+), 170 deletions(-) diff --git a/docs/schema/PermissibleValueDerivation.md b/docs/schema/PermissibleValueDerivation.md index 5a725450..420adb3f 100644 --- a/docs/schema/PermissibleValueDerivation.md +++ b/docs/schema/PermissibleValueDerivation.md @@ -152,7 +152,7 @@ URI: [linkmlmap:PermissibleValueDerivation](https://w3id.org/linkml/transformer/ | --- | --- | --- | --- | | [name](name.md) | 1
[String](String.md) | Target permissible value text | direct | | [expr](expr.md) | 0..1
[String](String.md) | | direct | -| [populated_from](populated_from.md) | 0..1
[String](String.md) | Source permissible value that maps to this target permissible value | direct | +| [populated_from](populated_from.md) | *
[String](String.md) | Source permissible value(s) that map to this target permissible value | direct | | [sources](sources.md) | *
[String](String.md) | Deprecated | direct | | [hide](hide.md) | 0..1
[Boolean](Boolean.md) | | direct | | [copy_directives](copy_directives.md) | *
[CopyDirective](CopyDirective.md) | | [ElementDerivation](ElementDerivation.md) | @@ -257,7 +257,9 @@ attributes: range: string populated_from: name: populated_from - description: Source permissible value that maps to this target permissible value. + description: Source permissible value(s) that map to this target permissible value. + Accepts a single value or a list; scalar input is normalized to a one-element + list at load time. from_schema: https://w3id.org/linkml/transformer domain_of: - ClassDerivation @@ -265,11 +267,13 @@ attributes: - EnumDerivation - PermissibleValueDerivation range: string + multivalued: true sources: name: sources description: Deprecated. Use populated_from instead. - deprecated: Deprecated. Use populated_from instead. See https://github.com/linkml/linkml-map/issues/193 - for planned list support in populated_from. Will be removed in a future version. + deprecated: Deprecated. Use populated_from instead, which now accepts a list. + See https://github.com/linkml/linkml-map/issues/193. Will be removed in a future + version. from_schema: https://w3id.org/linkml/transformer domain_of: - ClassDerivation @@ -327,7 +331,9 @@ attributes: range: string populated_from: name: populated_from - description: Source permissible value that maps to this target permissible value. + description: Source permissible value(s) that map to this target permissible value. + Accepts a single value or a list; scalar input is normalized to a one-element + list at load time. from_schema: https://w3id.org/linkml/transformer owner: PermissibleValueDerivation domain_of: @@ -336,11 +342,13 @@ attributes: - EnumDerivation - PermissibleValueDerivation range: string + multivalued: true sources: name: sources description: Deprecated. Use populated_from instead. - deprecated: Deprecated. Use populated_from instead. See https://github.com/linkml/linkml-map/issues/193 - for planned list support in populated_from. Will be removed in a future version. + deprecated: Deprecated. Use populated_from instead, which now accepts a list. + See https://github.com/linkml/linkml-map/issues/193. Will be removed in a future + version. from_schema: https://w3id.org/linkml/transformer owner: PermissibleValueDerivation domain_of: diff --git a/src/linkml_map/cli/cli.py b/src/linkml_map/cli/cli.py index b118c99a..0ded2c53 100644 --- a/src/linkml_map/cli/cli.py +++ b/src/linkml_map/cli/cli.py @@ -236,27 +236,27 @@ def _pre_flight_validate( ) -> None: """Surface static spec checks before performing a CLI action. - Runs deprecation checks and reference resolution against the loaded - specification, printing all findings to stderr. **Does not gate** — - even error-severity findings are informational only; the runtime - remains the authoritative arbiter of whether a transformation can - actually execute. Users who want fail-fast behavior should run - ``validate-spec --strict`` separately. - - Validates the merged ``tr.specification`` (after multi-file loading - is complete) so cross-file issues surface where users would see - them via ``validate-spec --merge``. Reuses ``tr.source_schemaview`` - and ``tr.target_schemaview`` when set, avoiding a duplicate load - of large or remote schemas. + Replays pre-normalize scan messages captured at spec-load time and runs + reference resolution, printing all findings to stderr. **Does not gate** — + findings are informational only; the runtime remains the authoritative + arbiter of whether a transformation can actually execute. Users who want + fail-fast behavior should run ``validate-spec --strict`` separately. + + Reading scan messages from ``tr.spec_messages`` rather than re-scanning + the post-migration spec means deprecations whose source fields were + cleared by normalization (e.g., ``object_derivations``, PV ``sources``) + are still surfaced here. Reuses ``tr.source_schemaview`` and + ``tr.target_schemaview`` when set, avoiding a duplicate load of large + or remote schemas. """ - from linkml_map.validator import check_deprecated_fields, validate_spec_semantics + from linkml_map.validator import validate_spec_semantics if tr.specification is None: return spec_dict = tr.specification.model_dump(exclude_none=True) - messages = list(check_deprecated_fields(spec_dict)) + messages = list(tr.spec_messages) messages.extend( validate_spec_semantics( spec_dict, diff --git a/src/linkml_map/inference/inference.py b/src/linkml_map/inference/inference.py index b8a0e984..4e27cba6 100644 --- a/src/linkml_map/inference/inference.py +++ b/src/linkml_map/inference/inference.py @@ -1,33 +1,11 @@ """Infer missing values in a specification.""" -import warnings - from linkml_runtime import SchemaView from linkml_map.datamodel.transformer_model import TransformationSpecification from linkml_map.utils.fk_utils import resolve_fk_path -def _warn_deprecated_fields(specification: TransformationSpecification) -> None: - """Emit ``DeprecationWarning`` for any deprecated field usage. - - Thin shim over ``validator._check_deprecated_fields`` that re-emits - deprecation-category messages as ``DeprecationWarning`` so runtime - callers (and any code with Python ``warnings`` filters configured) - keep getting the same signal they always did. - - The same underlying check is consumed by static validation paths - (``validate-spec`` and pre-flight from other CLI commands) via - ``ValidationMessage`` records — see ``validator._check_deprecated_fields``. - """ - from linkml_map.validator import check_deprecated_fields - - spec_dict = specification.model_dump(exclude_none=True) - for msg in check_deprecated_fields(spec_dict): - if msg.category == "deprecated": - warnings.warn(msg.message, DeprecationWarning, stacklevel=3) - - def induce_missing_values(specification: TransformationSpecification, source_schemaview: SchemaView) -> None: """ Infer missing values in a specification. @@ -41,7 +19,6 @@ def induce_missing_values(specification: TransformationSpecification, source_sch for cd in specification.class_derivations: if not cd.populated_from: cd.populated_from = cd.name - _warn_deprecated_fields(specification) for cd in specification.class_derivations: for sd in cd.slot_derivations.values(): diff --git a/src/linkml_map/transformer/errors.py b/src/linkml_map/transformer/errors.py index c4209171..18051622 100644 --- a/src/linkml_map/transformer/errors.py +++ b/src/linkml_map/transformer/errors.py @@ -6,6 +6,18 @@ from typing import Any +class SpecificationError(ValueError): + """The transformation specification is malformed or internally inconsistent. + + Raised during spec loading when the pre-normalize scan detects a + user-intent error that the runtime cannot disambiguate (e.g., setting + both ``populated_from`` and ``sources`` on the same derivation, or + setting both ``object_derivations`` and ``class_derivations`` on a + slot). Subclasses ``ValueError`` for backward compatibility with + callers that already catch ``ValueError`` from ``_normalize_spec_dict``. + """ + + @dataclass class TransformationError(Exception): """A row-level error during data transformation. diff --git a/src/linkml_map/transformer/object_transformer.py b/src/linkml_map/transformer/object_transformer.py index 68fba55f..af240b3a 100644 --- a/src/linkml_map/transformer/object_transformer.py +++ b/src/linkml_map/transformer/object_transformer.py @@ -1164,6 +1164,11 @@ def transform_enum(self, source_value: str, enum_names: list[str], source_obj: A if v is not None: return v for pv_deriv in enum_deriv.permissible_value_derivations.values(): + assert not pv_deriv.sources, ( + f"PermissibleValueDerivation '{pv_deriv.name}' has 'sources' set; " + "this should have been migrated to 'populated_from' during spec load. " + "Did the spec bypass Transformer._normalize_spec_dict?" + ) if pv_deriv.populated_from and source_value in pv_deriv.populated_from: return pv_deriv.name if enum_deriv.mirror_source: diff --git a/src/linkml_map/transformer/transformer.py b/src/linkml_map/transformer/transformer.py index 861a4b45..c9cd203d 100644 --- a/src/linkml_map/transformer/transformer.py +++ b/src/linkml_map/transformer/transformer.py @@ -94,6 +94,17 @@ class Transformer(ABC): _curie_converter: Converter = None + spec_messages: list[Any] = field(default_factory=list) + """Scan messages captured at spec-load time. + + Populated by the three load methods (``load_transformer_specification``, + ``load_transformer_specifications``, ``create_transformer_specification``) + with the ``ValidationMessage`` list returned by ``_normalize_spec_dict``. + Pre-flight validation reads these instead of re-scanning the post-migration + spec, so deprecations whose source fields were cleared by normalization + (e.g., ``object_derivations``, PV ``sources``) are still surfaced. + """ + def map_object(self, obj: OBJECT_TYPE, source_type: str | None = None, **kwargs: Any) -> OBJECT_TYPE: """ Transform source object into an instance of the target class. @@ -136,8 +147,7 @@ def load_transformer_specification(self, path: str | Path) -> None: """ with open(path) as f: obj = yaml.safe_load(f) - self._normalize_spec_dict(obj) - self._migrate_pv_sources_to_populated_from(obj) + self.spec_messages = self._normalize_spec_dict(obj) self.specification = TransformationSpecification(**obj) def load_transformer_specifications(self, paths: tuple[str | Path, ...]) -> None: @@ -153,8 +163,7 @@ def load_transformer_specifications(self, paths: tuple[str | Path, ...]) -> None from linkml_map.utils.spec_merge import load_and_merge_specs obj = load_and_merge_specs(paths) - self._normalize_spec_dict(obj) - self._migrate_pv_sources_to_populated_from(obj) + self.spec_messages = self._normalize_spec_dict(obj) self.specification = TransformationSpecification(**obj) @classmethod @@ -189,33 +198,20 @@ def _normalize_slot_class_derivations( """Flatten object_derivations and normalize class_derivations on a slot. Four steps, applied recursively to nested slots: - 1. Flatten ``object_derivations`` into ``class_derivations`` (with - deprecation warning; error if both are present). + 1. Flatten ``object_derivations`` into ``class_derivations``. 2. Expand compact-key entries (``- Condition: {...}`` → ``{name: Condition, ...}``). 3. Inherit ``populated_from`` from the parent class derivation when absent. 4. Run the normalizer on each class derivation entry so that nested dict-keyed ``slot_derivations`` get ``name`` injected. + + The deprecation warning for ``object_derivations`` and the error for + ``object_derivations`` + ``class_derivations`` both-set are emitted + upstream by :func:`linkml_map.validator.check_deprecated_fields`, + which runs before this method as part of :meth:`_normalize_spec_dict`. """ - # Step 1: flatten object_derivations → class_derivations + # Step 1: flatten object_derivations → class_derivations (silent — scan emitted) object_derivations = slot_spec.get("object_derivations", []) if object_derivations: - if slot_spec.get("class_derivations"): - msg = ( - f"SlotDerivation '{slot_name}' has both 'object_derivations' and " - f"'class_derivations'. Remove 'object_derivations' and use " - f"'class_derivations' only." - ) - raise ValueError(msg) - - warnings.warn( - f"SlotDerivation '{slot_name}' uses 'object_derivations', which is " - f"deprecated. Use list-based class_derivations instead. " - f"'object_derivations' will be removed in a future version. " - f"See https://github.com/linkml/linkml-map/issues/112", - DeprecationWarning, - stacklevel=4, - ) - flattened: list[dict[str, Any]] = [] for od in object_derivations: od_cd = od.get("class_derivations", {}) @@ -250,27 +246,66 @@ def _normalize_slot_class_derivations( cls._normalize_slot_class_derivations(nested_name, nested_sd, normalizer, child_source) @classmethod - def _normalize_spec_dict(cls, obj: dict[str, Any]) -> None: - """Normalize a raw specification dict in place. - - Bundles _preprocess_class_derivations, ReferenceValidator normalization, - and nested ObjectDerivation fixup into a single entry point. Mutates - ``obj`` by replacing its contents with the normalized result. - - Structural normalization only. The runtime additionally migrates - deprecated PV ``sources`` into ``populated_from`` via - :meth:`_migrate_pv_sources_to_populated_from`; that step is kept - separate so the validator can still distinguish user intent. + def _normalize_spec_dict(cls, obj: dict[str, Any], *, silent: bool = False) -> list[Any]: + """Normalize a raw specification dict in place. Return scan messages. + + Pipeline: + + 1. **Pre-normalize shape**: ``_preprocess_class_derivations`` expands + top-level compact-key forms so the scan sees a consistent shape. + 2. **Scan**: ``validator.check_deprecated_fields`` walks the dict and + returns ``ValidationMessage`` records for deprecated-field usage + and ambiguous combinations (errors). + 3. **Emit/raise** (unless ``silent``): deprecation warnings fire as + Python ``DeprecationWarning``; errors raise + :class:`~linkml_map.transformer.errors.SpecificationError`. + 4. **Structural normalize**: ``ReferenceValidator`` expands nested + compact keys, converts dict↔list as needed, flattens + ``object_derivations`` into ``class_derivations``. + 5. **Field migrations**: PV ``populated_from`` scalar → list, deprecated + PV ``sources`` → ``populated_from`` (with ``sources`` cleared). + + After this method, ``sources`` no longer appears on any + ``PermissibleValueDerivation`` — the runtime can rely on + ``populated_from`` as the single source of truth. :param obj: Raw specification dict (e.g. from YAML or user code). + :param silent: When ``True``, neither emit warnings nor raise on + errors — return them in the message list instead. Used by the + validator path so it can surface findings as + ``ValidationMessage`` records. + :returns: A list of :class:`ValidationMessage` records from the scan. """ + from linkml_map.transformer.errors import SpecificationError + from linkml_map.validator import check_deprecated_fields + + # Phase 1: pre-normalize shape (top-level compact keys) cls._preprocess_class_derivations(obj) + + # Phase 2: scan for deprecations and conflicts + messages = check_deprecated_fields(obj) + + # Phase 3: emit warnings + raise on errors (unless silent) + if not silent: + errors = [m for m in messages if m.severity == "error"] + if errors: + raise SpecificationError("; ".join(m.message for m in errors)) + for m in messages: + if m.category == "deprecated": + warnings.warn(m.message, DeprecationWarning, stacklevel=3) + + # Phase 4: structural normalize (flattens object_derivations silently) normalizer = ReferenceValidator(package_schemaview("linkml_map.datamodel.transformer_model")) normalizer.expand_all = True normalized = cls.normalize_transform_spec(obj, normalizer) obj.clear() obj.update(normalized) + + # Phase 5: field migrations (post-condition: no PV has `sources`) cls._coerce_pv_populated_from_to_list(obj) + cls._migrate_pv_sources_to_populated_from(obj) + + return messages @staticmethod def _iter_pv_derivations(obj: dict[str, Any]) -> Iterator[dict[str, Any]]: @@ -310,18 +345,18 @@ def _coerce_pv_populated_from_to_list(cls, obj: dict[str, Any]) -> None: @classmethod def _migrate_pv_sources_to_populated_from(cls, obj: dict[str, Any]) -> None: - """Copy deprecated ``sources`` into ``populated_from`` on PV derivs. - - Only applied when ``populated_from`` is empty/unset; ``sources`` is - left in place so the validator's deprecation warning still fires. - Entries with **both** fields populated are not modified; the validator - surfaces those as ``severity="error"``. Runtime-only — the validator - operates on the un-migrated dict so it can distinguish user intent. + """Move deprecated ``sources`` into ``populated_from`` on PV derivs. + + Applied after the pre-normalize scan has already detected and reported + any ``sources`` usage and any ``sources`` + ``populated_from`` conflicts. + For each PV deriv with ``sources`` set, copies into ``populated_from`` + (if not already set) and clears the ``sources`` key. Post-condition: + no PV has ``sources`` set. The runtime can therefore rely on + ``populated_from`` as the single source of truth and ignore ``sources``. """ for pvd in cls._iter_pv_derivations(obj): - pf = pvd.get("populated_from") - srcs = pvd.get("sources") - if not pf and srcs: + srcs = pvd.pop("sources", None) + if srcs and not pvd.get("populated_from"): pvd["populated_from"] = list(srcs) @staticmethod @@ -366,8 +401,7 @@ def create_transformer_specification(self, obj: dict[str, Any]) -> None: :param path: :return: """ - self._normalize_spec_dict(obj) - self._migrate_pv_sources_to_populated_from(obj) + self.spec_messages = self._normalize_spec_dict(obj) self.specification = TransformationSpecification(**obj) def _apply_source_schema_patches(self) -> None: diff --git a/src/linkml_map/validator.py b/src/linkml_map/validator.py index 563cdb30..5f1eb765 100644 --- a/src/linkml_map/validator.py +++ b/src/linkml_map/validator.py @@ -143,25 +143,43 @@ def normalize_spec_dict(obj: dict[str, Any]) -> dict[str, Any]: Applies the same full normalization pipeline that the transformer engine uses (compact-key expansion, dict→list conversion via ReferenceValidator, - nested ObjectDerivation fixup), plus YAML type coercion for fields that - are commonly mis-parsed. + nested ObjectDerivation fixup, deprecated-field migration), plus YAML + type coercion for fields that are commonly mis-parsed. If normalization itself fails (e.g. due to malformed data), the original dict is returned with only YAML type coercion applied so that JSON Schema validation can still report structural errors. + Use :func:`_normalize_and_collect_messages` instead when you also need + the pre-normalize scan's findings (deprecation warnings, conflict errors). + :param obj: Raw YAML-loaded dict. :returns: A new dict suitable for JSON Schema validation. """ + obj, _ = _normalize_and_collect_messages(obj) + return obj + + +def _normalize_and_collect_messages( + obj: dict[str, Any], +) -> tuple[dict[str, Any], list[ValidationMessage]]: + """Normalize and return both the dict and the pre-normalize scan messages. + + Internal helper used by :func:`validate_spec`. Calls the transformer's + normalization in silent mode so scan findings are returned as + ``ValidationMessage`` records rather than emitted as Python warnings or + raised as ``SpecificationError``. + """ obj = dict(obj) + messages: list[ValidationMessage] = [] try: - Transformer._normalize_spec_dict(obj) + messages = Transformer._normalize_spec_dict(obj, silent=True) or [] except Exception: logger.debug("Normalization failed; falling back to raw dict", exc_info=True) for field in _COERCE_FIELDS: if field in obj: obj[field] = _coerce_yaml_types(obj[field]) - return obj + return obj, messages def _validate_structural( @@ -340,61 +358,39 @@ def _iter_derivation_dicts(raw: Any) -> list[dict[str, Any]]: return [] -def check_pv_field_conflicts(data: dict[str, Any]) -> list[ValidationMessage]: - """Flag PermissibleValueDerivations that set both ``populated_from`` and ``sources``. - - The two fields are alternative ways to express the same mapping (sources is - the deprecated spelling). Setting both is ambiguous: ``populated_from`` is - the canonical winner at runtime, but the user almost certainly didn't mean - to write both. Emitted as ``severity="error"`` so ``validate-spec`` gates - on it. Operates on the un-migrated normalized dict; runtime entry points - migrate ``sources`` → ``populated_from`` *after* normalization, so this - check correctly distinguishes "user wrote both" from "we migrated sources". - - :param data: A normalized spec dict (post ``normalize_spec_dict``, - pre runtime migration). - :returns: A list of error messages (empty if no conflicts). - """ - messages: list[ValidationMessage] = [] - for ed in _iter_derivation_dicts(data.get("enum_derivations")): - ed_name = ed.get("name", "") - for pvd in _iter_derivation_dicts(ed.get("permissible_value_derivations")): - if pvd.get("populated_from") and pvd.get("sources"): - pvd_name = pvd.get("name", "") - messages.append( - ValidationMessage( - severity="error", - path=f"$.enum_derivations[{ed_name}].permissible_value_derivations[{pvd_name}]", - message=( - f"PermissibleValueDerivation '{pvd_name}' sets both 'populated_from' " - f"and 'sources'. These are alternative spellings of the same field; " - f"set only 'populated_from' (which now accepts a list)." - ), - ) - ) - return messages - - def check_deprecated_fields(data: dict[str, Any]) -> list[ValidationMessage]: - """Check a normalized spec dict for use of deprecated fields. + """Scan a spec dict for deprecated-field usage and ambiguous combinations. - Currently flags two deprecations: + Runs **pre-migration** — i.e., on a structurally-normalized dict whose + deprecated fields are still as the user wrote them. ``Transformer._normalize_spec_dict`` + calls this between structural normalization and field migration, so all + deprecations are detectable here regardless of whether the migration step + later removes them. + + Flags: * ``sources`` on ``ClassDerivation`` / ``SlotDerivation`` / ``EnumDerivation`` / ``PermissibleValueDerivation`` — replaced by - ``populated_from``. + ``populated_from``. Severity: warning, category: deprecated. * ``derived_from`` on ``SlotDerivation`` — ignored by the runtime - and removable. - - Findings are collapsed to one message per (deprecation, derivation - type) pair to keep output readable on large specs. - - Other deprecations (``object_derivations`` flattening) are surfaced - during normalization in ``Transformer._normalize_slot_class_derivations`` - and do not need to be re-emitted here. - - :param data: A **normalized** spec dict (post ``normalize_spec_dict``). - :returns: A list of warning messages with ``category="deprecated"``. + and removable. Severity: warning, category: deprecated. + * ``object_derivations`` on ``SlotDerivation`` — flattened into + ``class_derivations`` at load time. Severity: warning, category: + deprecated. + * ``populated_from`` **and** ``sources`` both set on the same + ``PermissibleValueDerivation`` — ambiguous, the user almost + certainly didn't mean both. Severity: error. + * ``object_derivations`` **and** ``class_derivations`` both set on + the same ``SlotDerivation`` — ambiguous. Severity: error. + + ``sources`` deprecation findings are collapsed to one message per + (deprecation, derivation type) pair to keep output readable on + large specs; per-entry messages are emitted for the other categories. + + :param data: A structurally-normalized spec dict (post + ``normalize_spec_dict``, pre field migration). + :returns: A list of validation messages — warnings for deprecations, + errors for ambiguous combinations. """ messages: list[ValidationMessage] = [] sources_counts: dict[str, list[str]] = { @@ -404,6 +400,7 @@ def check_deprecated_fields(data: dict[str, Any]) -> list[ValidationMessage]: "PermissibleValueDerivation": [], } derived_from_names: list[str] = [] + object_derivation_names: list[str] = [] for cd in _iter_derivation_dicts(data.get("class_derivations")): cd_name = cd.get("name", "") @@ -415,6 +412,20 @@ def check_deprecated_fields(data: dict[str, Any]) -> list[ValidationMessage]: sources_counts["SlotDerivation"].append(sd_name) if sd.get("derived_from"): derived_from_names.append(sd_name) + if sd.get("object_derivations"): + object_derivation_names.append(sd_name) + if sd.get("class_derivations"): + messages.append( + ValidationMessage( + severity="error", + path=f"$.class_derivations[{cd_name}].slot_derivations[{sd_name}]", + message=( + f"SlotDerivation '{sd_name}' sets both 'object_derivations' " + f"and 'class_derivations'. Remove 'object_derivations' and " + f"use 'class_derivations' only." + ), + ) + ) for ed in _iter_derivation_dicts(data.get("enum_derivations")): ed_name = ed.get("name", "") @@ -424,6 +435,19 @@ def check_deprecated_fields(data: dict[str, Any]) -> list[ValidationMessage]: pvd_name = pvd.get("name", "") if pvd.get("sources"): sources_counts["PermissibleValueDerivation"].append(pvd_name) + if pvd.get("populated_from"): + messages.append( + ValidationMessage( + severity="error", + path=(f"$.enum_derivations[{ed_name}].permissible_value_derivations[{pvd_name}]"), + message=( + f"PermissibleValueDerivation '{pvd_name}' sets both " + f"'populated_from' and 'sources'. These are alternative " + f"spellings of the same field; set only 'populated_from' " + f"(which now accepts a list)." + ), + ) + ) for deriv_type, names in sources_counts.items(): if names: @@ -442,6 +466,24 @@ def check_deprecated_fields(data: dict[str, Any]) -> list[ValidationMessage]: ) ) + if object_derivation_names: + preview = ", ".join(object_derivation_names[:5]) + suffix = f" (and {len(object_derivation_names) - 5} more)" if len(object_derivation_names) > 5 else "" + messages.append( + ValidationMessage( + severity="warning", + category="deprecated", + path="$.SlotDerivation", + message=( + f"{len(object_derivation_names)} SlotDerivation(s) use 'object_derivations', " + f"which is deprecated and flattened into 'class_derivations' at load time: " + f"{preview}{suffix}. Use list-based 'class_derivations' instead. " + f"'object_derivations' will be removed in a future version. " + f"See https://github.com/linkml/linkml-map/issues/112" + ), + ) + ) + if derived_from_names: preview = ", ".join(derived_from_names[:5]) suffix = f" (and {len(derived_from_names) - 5} more)" if len(derived_from_names) > 5 else "" @@ -757,11 +799,10 @@ def validate_spec( >>> validate_spec(data) [] """ - normalized = normalize_spec_dict(data) + normalized, scan_messages = _normalize_and_collect_messages(data) messages = _validate_structural(normalized, schema_path=schema_path) if not messages: - messages.extend(check_pv_field_conflicts(normalized)) - messages.extend(check_deprecated_fields(normalized)) + messages.extend(scan_messages) messages.extend( validate_spec_semantics( normalized, diff --git a/tests/test_deprecations.py b/tests/test_deprecations.py index 8d609909..fd4aedc5 100644 --- a/tests/test_deprecations.py +++ b/tests/test_deprecations.py @@ -43,7 +43,7 @@ def test_derived_from_emits_deprecation_warning(): - """Using derived_from on a SlotDerivation emits a DeprecationWarning.""" + """Using derived_from on a SlotDerivation emits a DeprecationWarning at load time.""" tr = ObjectTransformer() tr.source_schemaview = SchemaView(SOURCE_SCHEMA) tr.target_schemaview = SchemaView(TARGET_SCHEMA) @@ -60,17 +60,14 @@ def test_derived_from_emits_deprecation_warning(): }, }, } - tr.create_transformer_specification(spec) - with warnings.catch_warnings(record=True) as caught: warnings.simplefilter("always", DeprecationWarning) - # Trigger induce_missing_values via derived_specification - _ = tr.derived_specification + tr.create_transformer_specification(spec) deprecation_warnings = [w for w in caught if issubclass(w.category, DeprecationWarning)] - assert len(deprecation_warnings) == 1 - assert "derived_from" in str(deprecation_warnings[0].message) - assert "full_name" in str(deprecation_warnings[0].message) + derived_from_warnings = [w for w in deprecation_warnings if "derived_from" in str(w.message)] + assert len(derived_from_warnings) == 1 + assert "full_name" in str(derived_from_warnings[0].message) def test_derived_from_does_not_affect_transformation(): @@ -105,7 +102,7 @@ def test_derived_from_does_not_affect_transformation(): def test_sources_on_pv_emits_deprecation_warning(): - """Using sources on a PermissibleValueDerivation emits a DeprecationWarning.""" + """Using sources on a PermissibleValueDerivation emits a DeprecationWarning at load time.""" tr = ObjectTransformer() tr.source_schemaview = SchemaView(SOURCE_SCHEMA) tr.target_schemaview = SchemaView(TARGET_SCHEMA) @@ -128,11 +125,9 @@ def test_sources_on_pv_emits_deprecation_warning(): }, }, } - tr.create_transformer_specification(spec) - with warnings.catch_warnings(record=True) as caught: warnings.simplefilter("always", DeprecationWarning) - _ = tr.derived_specification + tr.create_transformer_specification(spec) deprecation_warnings = [w for w in caught if issubclass(w.category, DeprecationWarning)] sources_warnings = [w for w in deprecation_warnings if "sources" in str(w.message)] @@ -141,7 +136,7 @@ def test_sources_on_pv_emits_deprecation_warning(): def test_sources_on_slot_emits_deprecation_warning(): - """Using sources on a SlotDerivation emits a DeprecationWarning.""" + """Using sources on a SlotDerivation emits a DeprecationWarning at load time.""" tr = ObjectTransformer() tr.source_schemaview = SchemaView(SOURCE_SCHEMA) tr.target_schemaview = SchemaView(TARGET_SCHEMA) @@ -157,11 +152,9 @@ def test_sources_on_slot_emits_deprecation_warning(): }, }, } - tr.create_transformer_specification(spec) - with warnings.catch_warnings(record=True) as caught: warnings.simplefilter("always", DeprecationWarning) - _ = tr.derived_specification + tr.create_transformer_specification(spec) deprecation_warnings = [w for w in caught if issubclass(w.category, DeprecationWarning)] sources_warnings = [w for w in deprecation_warnings if "sources" in str(w.message)] @@ -246,11 +239,9 @@ def test_no_warning_without_deprecated_fields(): }, }, } - tr.create_transformer_specification(spec) - with warnings.catch_warnings(record=True) as caught: warnings.simplefilter("always", DeprecationWarning) - _ = tr.derived_specification + tr.create_transformer_specification(spec) deprecation_warnings = [w for w in caught if issubclass(w.category, DeprecationWarning)] assert len(deprecation_warnings) == 0 diff --git a/tests/test_transformer/test_multi_enum.py b/tests/test_transformer/test_multi_enum.py index d289461f..9da6e7cb 100644 --- a/tests/test_transformer/test_multi_enum.py +++ b/tests/test_transformer/test_multi_enum.py @@ -330,13 +330,13 @@ def test_pv_populated_from_scalar_form_still_works(): def test_pv_sources_only_is_migrated_to_populated_from(): - """sources-only PV derivs are migrated so the runtime uses populated_from.""" + """sources-only PV derivs are migrated so the runtime sees only populated_from.""" tr = _make_transformer() - # SimplePrimary in TRANSFORM_SPEC uses sources: [...] — confirm migration. + # SimplePrimary in TRANSFORM_SPEC uses sources: [...] — confirm migration cleared sources. pvds = tr.specification.enum_derivations["SimplePrimary"].permissible_value_derivations assert pvds["red"].populated_from == ["light_red", "dark_red"] - # sources is preserved so the validator's deprecation warning still fires. - assert pvds["red"].sources == ["light_red", "dark_red"] + # sources is cleared after migration — the runtime relies on populated_from alone. + assert not pvds["red"].sources def test_explicit_range_any_with_any_of(): From 3532d7b04960288ab521b1147e5c6bb1799ca98f Mon Sep 17 00:00:00 2001 From: amc-corey-cox <69321580+amc-corey-cox@users.noreply.github.com> Date: Mon, 18 May 2026 10:49:23 -0500 Subject: [PATCH 3/8] Detect compact-key list PV derivs; strip explicit-None populated_from - _iter_derivation_dicts now recognizes compact-key list form so the deprecation scan catches `permissible_value_derivations: [{name: {...}}]`. - _coerce_pv_populated_from_to_list strips explicit-null populated_from (also handles [None] post-ReferenceValidator) so pydantic uses the default-factory. - markdown.j2 defends with `or []` so a programmatic None doesn't crash the template. --- src/linkml_map/compiler/templates/markdown.j2 | 2 +- src/linkml_map/transformer/transformer.py | 21 ++++++-- src/linkml_map/validator.py | 49 ++++++++++++++----- tests/test_validator.py | 47 ++++++++++++++++++ 4 files changed, 100 insertions(+), 19 deletions(-) diff --git a/src/linkml_map/compiler/templates/markdown.j2 b/src/linkml_map/compiler/templates/markdown.j2 index 344a6dfa..3dc50cac 100644 --- a/src/linkml_map/compiler/templates/markdown.j2 +++ b/src/linkml_map/compiler/templates/markdown.j2 @@ -20,6 +20,6 @@ | Target | Source | Info | | ------ | ------ | ---- | {%- for pvd in ed.permissible_value_derivations.values() %} -| {{ pvd.name }} | {{ pvd.populated_from | join(", ") }} | . | +| {{ pvd.name }} | {{ (pvd.populated_from or []) | join(", ") }} | . | {%- endfor -%} {% endfor %} diff --git a/src/linkml_map/transformer/transformer.py b/src/linkml_map/transformer/transformer.py index c9cd203d..5f3b9a3f 100644 --- a/src/linkml_map/transformer/transformer.py +++ b/src/linkml_map/transformer/transformer.py @@ -333,14 +333,25 @@ def _iter_pv_derivations(obj: dict[str, Any]) -> Iterator[dict[str, Any]]: @classmethod def _coerce_pv_populated_from_to_list(cls, obj: dict[str, Any]) -> None: - """Wrap scalar ``populated_from`` to a one-element list on each PV deriv. + """Coerce ``populated_from`` to a list on each PV deriv. - ``PermissibleValueDerivation.populated_from`` is declared multivalued; - scalar input is a user convenience that pydantic would otherwise reject. + Input shapes handled: + + * Scalar string: wrapped to a one-element list (user convenience that + pydantic would otherwise reject for a multivalued field). + * Explicit ``None`` or ``[None]`` (``populated_from:`` with no YAML + value, possibly already wrapped to a list by ``ReferenceValidator``): + the key is removed so pydantic uses the ``default_factory=list`` + default — treats "explicitly set to nothing" as "unset". + * List: left as-is. """ for pvd in cls._iter_pv_derivations(obj): - pf = pvd.get("populated_from") - if isinstance(pf, str): + if "populated_from" not in pvd: + continue + pf = pvd["populated_from"] + if pf is None or (isinstance(pf, list) and not any(pf)): + del pvd["populated_from"] + elif isinstance(pf, str): pvd["populated_from"] = [pf] @classmethod diff --git a/src/linkml_map/validator.py b/src/linkml_map/validator.py index 5f1eb765..fd552224 100644 --- a/src/linkml_map/validator.py +++ b/src/linkml_map/validator.py @@ -340,16 +340,36 @@ def _resolve_schema_path( def _iter_derivation_dicts(raw: Any) -> list[dict[str, Any]]: - """Normalize a derivations section (dict or list) to a list of dicts. + """Normalize a derivations section (dict, list, or compact-key list) to dicts. - After ``normalize_spec_dict``, derivation sections may be either a list of - dicts (with ``name`` keys) or a dict keyed by name. This helper - normalizes to a consistent list-of-dicts form for iteration. + Derivation sections in user input can take three shapes: + + * Dict keyed by name: ``{"red": {"populated_from": "x"}}`` + * Explicit-name list: ``[{"name": "red", "populated_from": "x"}]`` + * Compact-key list: ``[{"red": {"populated_from": "x"}}]`` + + All three are normalized to a list of dicts with ``name`` injected, so + callers can scan fields uniformly without caring about user shape. """ if isinstance(raw, list): - return [item for item in raw if isinstance(item, dict)] - if isinstance(raw, dict): result: list[dict[str, Any]] = [] + for item in raw: + if not isinstance(item, dict): + continue + # Compact-key form: a single-key dict whose value is another dict, + # where the key isn't the literal "name" attribute. Hoist the key + # to a `name` field on the inner dict (mirrors Transformer._expand_compact_keys). + if len(item) == 1: + key, val = next(iter(item.items())) + if key != "name" and isinstance(val, dict): + d = dict(val) + d.setdefault("name", key) + result.append(d) + continue + result.append(item) + return result + if isinstance(raw, dict): + result = [] for name, body in raw.items(): d = dict(body) if isinstance(body, dict) else {} d.setdefault("name", name) @@ -361,11 +381,13 @@ def _iter_derivation_dicts(raw: Any) -> list[dict[str, Any]]: def check_deprecated_fields(data: dict[str, Any]) -> list[ValidationMessage]: """Scan a spec dict for deprecated-field usage and ambiguous combinations. - Runs **pre-migration** — i.e., on a structurally-normalized dict whose - deprecated fields are still as the user wrote them. ``Transformer._normalize_spec_dict`` - calls this between structural normalization and field migration, so all - deprecations are detectable here regardless of whether the migration step - later removes them. + Runs **pre-migration** — i.e., before ``_normalize_slot_class_derivations`` + flattens ``object_derivations`` and before the PV ``sources`` → ``populated_from`` + migration. Called by ``Transformer._normalize_spec_dict`` after top-level + compact-key preprocessing but before ``ReferenceValidator.normalize()``; + :func:`_iter_derivation_dicts` handles the remaining shape variations + (dict-keyed, explicit-list, and compact-key list forms) so deprecations + are detected regardless of how the user wrote them. Flags: @@ -387,8 +409,9 @@ def check_deprecated_fields(data: dict[str, Any]) -> list[ValidationMessage]: (deprecation, derivation type) pair to keep output readable on large specs; per-entry messages are emitted for the other categories. - :param data: A structurally-normalized spec dict (post - ``normalize_spec_dict``, pre field migration). + :param data: A spec dict after top-level compact-key preprocessing but + before field migration. Inner derivation shapes are handled by + :func:`_iter_derivation_dicts`. :returns: A list of validation messages — warnings for deprecations, errors for ambiguous combinations. """ diff --git a/tests/test_validator.py b/tests/test_validator.py index 4402bee5..ee705629 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -250,6 +250,53 @@ def test_pv_sources_and_populated_from_both_set_errors(): assert any("'red'" in m.message for m in errors) +def test_pv_sources_in_compact_key_list_form_detected(): + """Compact-key list-form PV derivs (`[{name: {sources: [...]}}]`) are scanned. + + Targets the scan helper directly because the compact-key list form for PVs + isn't currently round-trippable through ``ReferenceValidator.normalize()`` + — a separate concern from whether the deprecation scan catches it. + """ + from linkml_map.validator import check_deprecated_fields + + obj = { + "enum_derivations": { + "Target": { + "populated_from": "Source", + "permissible_value_derivations": [ + {"red": {"sources": ["light_red", "dark_red"]}}, + ], + }, + }, + } + msgs = check_deprecated_fields(obj) + warnings = _warnings(msgs) + assert any("sources" in m.message and "PermissibleValueDerivation" in m.path for m in warnings), ( + f"Expected deprecation warning for compact-key list PV; got {warnings}" + ) + + +def test_pv_populated_from_explicit_none_is_stripped(): + """`populated_from: null` (explicit YAML None) is dropped during normalize.""" + raw = { + "enum_derivations": { + "Target": { + "populated_from": "Source", + "permissible_value_derivations": { + "red": {"populated_from": None}, + }, + }, + }, + } + normalized = normalize_spec_dict(raw) + # After normalize, the PV's populated_from key has been removed; pydantic + # will fill in the default factory ([]) when the model is built. + eds = normalized["enum_derivations"] + pvds = eds["Target"]["permissible_value_derivations"] + pvd = pvds["red"] if isinstance(pvds, dict) else next(iter(pvds)) + assert "populated_from" not in pvd + + # --------------------------------------------------------------------------- # File-level validation against real trans-specs (structural only) # --------------------------------------------------------------------------- From 67d3ef6754359328fe30022b1ae5dda4654ed4d6 Mon Sep 17 00:00:00 2001 From: amc-corey-cox <69321580+amc-corey-cox@users.noreply.github.com> Date: Mon, 18 May 2026 15:04:50 -0500 Subject: [PATCH 4/8] Split shape canonicalization from field migration in _normalize_spec_dict MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three phases now: SHAPE (recursive compact-key + dict↔list everywhere), SCAN (on canonical shape), MIGRATE (object_derivations flatten, populated_from inheritance, PV scalar→list, PV None-strip, PV sources→populated_from). Field semantics no longer entangled with structural normalization, so _iter_derivation_dicts can assume canonical shape. --- src/linkml_map/transformer/transformer.py | 250 ++++++++++++++++------ src/linkml_map/validator.py | 32 +-- tests/test_validator.py | 12 +- 3 files changed, 200 insertions(+), 94 deletions(-) diff --git a/src/linkml_map/transformer/transformer.py b/src/linkml_map/transformer/transformer.py index 5f3b9a3f..8ed562d3 100644 --- a/src/linkml_map/transformer/transformer.py +++ b/src/linkml_map/transformer/transformer.py @@ -32,6 +32,23 @@ logger = logging.getLogger(__name__) +def _iter_values_or_list(container: Any) -> Iterator[dict[str, Any]]: + """Yield each dict in a derivations section (dict-keyed or list form). + + Caller assumes shape has been canonicalized (i.e., the SHAPE phase of + ``Transformer._normalize_spec_dict`` has run), so compact-key list + items are already expanded. + """ + if isinstance(container, dict): + for v in container.values(): + if isinstance(v, dict): + yield v + elif isinstance(container, list): + for item in container: + if isinstance(item, dict): + yield item + + OBJECT_TYPE = dict[str, Any] | BaseModel | YAMLRoot """An object can be a plain python dict, a pydantic object, or a linkml YAMLRoot""" @@ -168,7 +185,13 @@ def load_transformer_specifications(self, paths: tuple[str | Path, ...]) -> None @classmethod def normalize_transform_spec(cls, obj: dict[str, Any], normalizer: ReferenceValidator) -> dict: - """Recursively normalize class_derivations and flatten object_derivations.""" + """Shape-canonicalize class_derivations recursively. + + Pure shape work — no field migrations. ``object_derivations`` + flattening and ``populated_from`` inheritance happen in the MIGRATE + phase of :meth:`_normalize_spec_dict` so the SCAN phase between + them sees the user's original field values. + """ obj = normalizer.normalize(obj) class_derivations = obj.get("class_derivations", []) @@ -179,12 +202,11 @@ def normalize_transform_spec(cls, obj: dict[str, Any], normalizer: ReferenceVali for class_spec in cd_iter: if not isinstance(class_spec, dict): continue - parent_source = class_spec.get("populated_from") slot_derivations = class_spec.get("slot_derivations", {}) for slot_name, slot_spec in slot_derivations.items(): if slot_spec.get("value") is not None and slot_spec.get("range") is None: slot_spec["range"] = "string" - cls._normalize_slot_class_derivations(slot_name, slot_spec, normalizer, parent_source) + cls._normalize_slot_class_derivations(slot_name, slot_spec, normalizer) return obj @classmethod @@ -193,39 +215,22 @@ def _normalize_slot_class_derivations( slot_name: str, slot_spec: dict[str, Any], normalizer: ReferenceValidator, - parent_populated_from: str | None = None, ) -> None: - """Flatten object_derivations and normalize class_derivations on a slot. - - Four steps, applied recursively to nested slots: - 1. Flatten ``object_derivations`` into ``class_derivations``. - 2. Expand compact-key entries (``- Condition: {...}`` → ``{name: Condition, ...}``). - 3. Inherit ``populated_from`` from the parent class derivation when absent. - 4. Run the normalizer on each class derivation entry so that nested - dict-keyed ``slot_derivations`` get ``name`` injected. - - The deprecation warning for ``object_derivations`` and the error for - ``object_derivations`` + ``class_derivations`` both-set are emitted - upstream by :func:`linkml_map.validator.check_deprecated_fields`, - which runs before this method as part of :meth:`_normalize_spec_dict`. + """Shape-canonicalize the ``class_derivations`` on a slot, recursively. + + Two steps, applied recursively to nested slots: + + 1. Expand compact-key entries (``- Condition: {...}`` → + ``{name: Condition, ...}``). + 2. Run the normalizer on each class derivation entry so dict-keyed + ``slot_derivations`` get ``name`` injected and other shape + canonicalization happens. + + Pure shape — no field semantics. ``object_derivations`` flattening + and ``populated_from`` inheritance live in the MIGRATE phase of + :meth:`_normalize_spec_dict`, not here, so the SCAN phase sees the + user's original field values. """ - # Step 1: flatten object_derivations → class_derivations (silent — scan emitted) - object_derivations = slot_spec.get("object_derivations", []) - if object_derivations: - flattened: list[dict[str, Any]] = [] - for od in object_derivations: - od_cd = od.get("class_derivations", {}) - if isinstance(od_cd, dict): - for name, body in od_cd.items(): - entry = body if isinstance(body, dict) else {} - entry.setdefault("name", name) - flattened.append(entry) - elif isinstance(od_cd, list): - flattened.extend(od_cd) - slot_spec["class_derivations"] = flattened - del slot_spec["object_derivations"] - - # Steps 2-4: expand compact keys, inherit populated_from, normalize, and recurse slot_cd = slot_spec.get("class_derivations") if not isinstance(slot_cd, list): return @@ -235,35 +240,36 @@ def _normalize_slot_class_derivations( for cd_entry in slot_cd: if not isinstance(cd_entry, dict): continue - if not cd_entry.get("populated_from") and parent_populated_from: - cd_entry["populated_from"] = parent_populated_from normalized = normalizer.normalize(cd_entry) cd_entry.clear() cd_entry.update(normalized) - child_source = cd_entry.get("populated_from") for nested_name, nested_sd in cd_entry.get("slot_derivations", {}).items(): if isinstance(nested_sd, dict): - cls._normalize_slot_class_derivations(nested_name, nested_sd, normalizer, child_source) + cls._normalize_slot_class_derivations(nested_name, nested_sd, normalizer) @classmethod def _normalize_spec_dict(cls, obj: dict[str, Any], *, silent: bool = False) -> list[Any]: """Normalize a raw specification dict in place. Return scan messages. - Pipeline: - - 1. **Pre-normalize shape**: ``_preprocess_class_derivations`` expands - top-level compact-key forms so the scan sees a consistent shape. - 2. **Scan**: ``validator.check_deprecated_fields`` walks the dict and - returns ``ValidationMessage`` records for deprecated-field usage - and ambiguous combinations (errors). - 3. **Emit/raise** (unless ``silent``): deprecation warnings fire as - Python ``DeprecationWarning``; errors raise + Three clean phases, ordered so the SCAN sees a canonical shape with + the user's original field values: + + 1. **SHAPE** — pure structural canonicalization, no field semantics. + Expands compact-key list forms everywhere they can appear + (top-level class/enum derivations, nested class_derivations in + slots, OD-inner class_derivations, PV-level lists), then runs + ``ReferenceValidator`` for dict↔list canonicalization and name + injection. + 2. **SCAN** — :func:`linkml_map.validator.check_deprecated_fields` + walks the canonical shape, returning ``ValidationMessage`` + records for deprecated-field usage and ambiguous combinations. + Unless ``silent``, deprecation warnings fire as Python + ``DeprecationWarning`` and conflict errors raise :class:`~linkml_map.transformer.errors.SpecificationError`. - 4. **Structural normalize**: ``ReferenceValidator`` expands nested - compact keys, converts dict↔list as needed, flattens - ``object_derivations`` into ``class_derivations``. - 5. **Field migrations**: PV ``populated_from`` scalar → list, deprecated - PV ``sources`` → ``populated_from`` (with ``sources`` cleared). + 3. **MIGRATE** — all field semantics in one pass: + ``object_derivations`` flatten, ``populated_from`` inheritance + from parent class, PV scalar → list, PV explicit-``None`` strip, + PV ``sources`` → ``populated_from`` (clearing ``sources``). After this method, ``sources`` no longer appears on any ``PermissibleValueDerivation`` — the runtime can rely on @@ -279,13 +285,12 @@ def _normalize_spec_dict(cls, obj: dict[str, Any], *, silent: bool = False) -> l from linkml_map.transformer.errors import SpecificationError from linkml_map.validator import check_deprecated_fields - # Phase 1: pre-normalize shape (top-level compact keys) - cls._preprocess_class_derivations(obj) + # SHAPE + cls._shape_normalize(obj) - # Phase 2: scan for deprecations and conflicts + # SCAN messages = check_deprecated_fields(obj) - # Phase 3: emit warnings + raise on errors (unless silent) if not silent: errors = [m for m in messages if m.severity == "error"] if errors: @@ -294,18 +299,141 @@ def _normalize_spec_dict(cls, obj: dict[str, Any], *, silent: bool = False) -> l if m.category == "deprecated": warnings.warn(m.message, DeprecationWarning, stacklevel=3) - # Phase 4: structural normalize (flattens object_derivations silently) + # MIGRATE + cls._flatten_object_derivations(obj) + cls._inherit_populated_from(obj) + cls._coerce_pv_populated_from_to_list(obj) + cls._migrate_pv_sources_to_populated_from(obj) + + return messages + + @classmethod + def _shape_normalize(cls, obj: dict[str, Any]) -> None: + """Canonicalize the structural shape of every derivation section. + + Expands compact-key list forms in every derivation-section location + (including OD-inner class_derivations and PV-level lists, which + ``ReferenceValidator`` doesn't canonicalize correctly), then runs + ``ReferenceValidator`` for dict↔list canonicalization and name + injection. No field semantics are mutated. + """ + cls._pre_shape_expand_compact_keys(obj) normalizer = ReferenceValidator(package_schemaview("linkml_map.datamodel.transformer_model")) normalizer.expand_all = True normalized = cls.normalize_transform_spec(obj, normalizer) obj.clear() obj.update(normalized) - # Phase 5: field migrations (post-condition: no PV has `sources`) - cls._coerce_pv_populated_from_to_list(obj) - cls._migrate_pv_sources_to_populated_from(obj) + @classmethod + def _pre_shape_expand_compact_keys(cls, obj: dict[str, Any]) -> None: + """Recursively expand compact-key list forms before ReferenceValidator. - return messages + Covers spots ``ReferenceValidator`` doesn't normalize: + nested class_derivations in slot_derivations, OD-inner + class_derivations, and PV-level lists. Without this pass RV mangles + compact-key PV lists into ``{None: {: {...}}}``. + """ + cls._preprocess_class_derivations(obj) + cls._preprocess_enum_derivations(obj) + for cd in _iter_values_or_list(obj.get("class_derivations")): + cls._expand_compact_keys_in_class_deriv(cd) + + @classmethod + def _expand_compact_keys_in_class_deriv(cls, cd: dict[str, Any]) -> None: + """Expand compact-key list forms in a class derivation, recursively.""" + for sd in _iter_values_or_list(cd.get("slot_derivations")): + # Slot's own nested class_derivations (list-with-compact-keys form) + nested_cds = sd.get("class_derivations") + if isinstance(nested_cds, list): + cls._expand_compact_keys(nested_cds) + for ncd in _iter_values_or_list(nested_cds): + cls._expand_compact_keys_in_class_deriv(ncd) + # OD-inner class_derivations (deprecated form; flattened in MIGRATE) + ods = sd.get("object_derivations") + if isinstance(ods, list): + for od in ods: + if not isinstance(od, dict): + continue + od_cds = od.get("class_derivations") + if isinstance(od_cds, list): + cls._expand_compact_keys(od_cds) + for ncd in _iter_values_or_list(od_cds): + cls._expand_compact_keys_in_class_deriv(ncd) + + @staticmethod + def _preprocess_enum_derivations(obj: dict[str, Any]) -> None: + """Pre-process top-level enum_derivations and their PV sections. + + Handles two compact-key cases ReferenceValidator doesn't: + list-form enum_derivations with ``{Name: {...}}`` items, and + list-form permissible_value_derivations with the same shape (which + RV otherwise mangles into ``{None: ...}``). + """ + eds = obj.get("enum_derivations") + if isinstance(eds, dict): + for k, v in eds.items(): + if v is None: + eds[k] = {} + elif isinstance(eds, list): + Transformer._expand_compact_keys(eds) + for ed in _iter_values_or_list(obj.get("enum_derivations")): + pvs = ed.get("permissible_value_derivations") + if isinstance(pvs, list): + Transformer._expand_compact_keys(pvs) + + @classmethod + def _flatten_object_derivations(cls, obj: dict[str, Any]) -> None: + """Flatten ``object_derivations`` into ``class_derivations`` on every slot. + + Conflicting specs (both ``object_derivations`` and ``class_derivations`` + set) are caught upstream by the SCAN phase as ``severity="error"``, + so this method assumes a non-conflicting input. + """ + for cd in _iter_values_or_list(obj.get("class_derivations")): + cls._flatten_ods_in_class_deriv(cd) + + @classmethod + def _flatten_ods_in_class_deriv(cls, cd: dict[str, Any]) -> None: + """Recursively walk a class derivation, flattening OD on each slot.""" + for sd in _iter_values_or_list(cd.get("slot_derivations")): + ods = sd.get("object_derivations") + if ods: + flattened: list[dict[str, Any]] = [] + for od in ods: + if not isinstance(od, dict): + continue + od_cd = od.get("class_derivations", {}) + if isinstance(od_cd, dict): + for name, body in od_cd.items(): + entry = body if isinstance(body, dict) else {} + entry.setdefault("name", name) + flattened.append(entry) + elif isinstance(od_cd, list): + flattened.extend(od_cd) + sd["class_derivations"] = flattened + del sd["object_derivations"] + for ncd in _iter_values_or_list(sd.get("class_derivations")): + cls._flatten_ods_in_class_deriv(ncd) + + @classmethod + def _inherit_populated_from(cls, obj: dict[str, Any]) -> None: + """Propagate ``populated_from`` from parent class down to nested CDs. + + Walks each class_derivation's slot_derivations.class_derivations; if + a nested CD doesn't set ``populated_from``, it inherits from the + outer class's value. Recurses through arbitrarily deep nesting. + """ + for cd in _iter_values_or_list(obj.get("class_derivations")): + cls._inherit_pf_in_slots(cd.get("slot_derivations"), cd.get("populated_from")) + + @classmethod + def _inherit_pf_in_slots(cls, slots: Any, parent_pf: str | None) -> None: + """Recursively inherit populated_from into nested class_derivations.""" + for sd in _iter_values_or_list(slots): + for ncd in _iter_values_or_list(sd.get("class_derivations")): + if not ncd.get("populated_from") and parent_pf: + ncd["populated_from"] = parent_pf + cls._inherit_pf_in_slots(ncd.get("slot_derivations"), ncd.get("populated_from")) @staticmethod def _iter_pv_derivations(obj: dict[str, Any]) -> Iterator[dict[str, Any]]: diff --git a/src/linkml_map/validator.py b/src/linkml_map/validator.py index fd552224..2c88e935 100644 --- a/src/linkml_map/validator.py +++ b/src/linkml_map/validator.py @@ -340,36 +340,16 @@ def _resolve_schema_path( def _iter_derivation_dicts(raw: Any) -> list[dict[str, Any]]: - """Normalize a derivations section (dict, list, or compact-key list) to dicts. + """Normalize a derivations section (dict or list) to a list of dicts. - Derivation sections in user input can take three shapes: - - * Dict keyed by name: ``{"red": {"populated_from": "x"}}`` - * Explicit-name list: ``[{"name": "red", "populated_from": "x"}]`` - * Compact-key list: ``[{"red": {"populated_from": "x"}}]`` - - All three are normalized to a list of dicts with ``name`` injected, so - callers can scan fields uniformly without caring about user shape. + Assumes the SHAPE phase of ``Transformer._normalize_spec_dict`` has + already canonicalized compact-key list items, so callers only need to + handle dict-keyed and explicit-name list forms here. """ if isinstance(raw, list): - result: list[dict[str, Any]] = [] - for item in raw: - if not isinstance(item, dict): - continue - # Compact-key form: a single-key dict whose value is another dict, - # where the key isn't the literal "name" attribute. Hoist the key - # to a `name` field on the inner dict (mirrors Transformer._expand_compact_keys). - if len(item) == 1: - key, val = next(iter(item.items())) - if key != "name" and isinstance(val, dict): - d = dict(val) - d.setdefault("name", key) - result.append(d) - continue - result.append(item) - return result + return [item for item in raw if isinstance(item, dict)] if isinstance(raw, dict): - result = [] + result: list[dict[str, Any]] = [] for name, body in raw.items(): d = dict(body) if isinstance(body, dict) else {} d.setdefault("name", name) diff --git a/tests/test_validator.py b/tests/test_validator.py index ee705629..de17d1c8 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -253,13 +253,10 @@ def test_pv_sources_and_populated_from_both_set_errors(): def test_pv_sources_in_compact_key_list_form_detected(): """Compact-key list-form PV derivs (`[{name: {sources: [...]}}]`) are scanned. - Targets the scan helper directly because the compact-key list form for PVs - isn't currently round-trippable through ``ReferenceValidator.normalize()`` - — a separate concern from whether the deprecation scan catches it. + The SHAPE phase expands compact-key list items before the SCAN runs, so + the deprecation warning fires end-to-end through ``validate_spec``. """ - from linkml_map.validator import check_deprecated_fields - - obj = { + spec = { "enum_derivations": { "Target": { "populated_from": "Source", @@ -269,7 +266,8 @@ def test_pv_sources_in_compact_key_list_form_detected(): }, }, } - msgs = check_deprecated_fields(obj) + msgs = validate_spec(spec) + assert _errors(msgs) == [] warnings = _warnings(msgs) assert any("sources" in m.message and "PermissibleValueDerivation" in m.path for m in warnings), ( f"Expected deprecation warning for compact-key list PV; got {warnings}" From 92e958e089920bbd54a39181fe444578e055ad10 Mon Sep 17 00:00:00 2001 From: amc-corey-cox <69321580+amc-corey-cox@users.noreply.github.com> Date: Mon, 18 May 2026 15:48:01 -0500 Subject: [PATCH 5/8] Add canary test for upstream linkml/linkml#3529 xfail(strict=True) on the correct post-fix behavior. Fails loudly with XPASS when upstream lands the ReferenceValidator fix, signaling that the PV-level branch in _pre_shape_expand_compact_keys can be removed. --- tests/test_upstream_canaries.py | 47 +++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 tests/test_upstream_canaries.py diff --git a/tests/test_upstream_canaries.py b/tests/test_upstream_canaries.py new file mode 100644 index 00000000..7d210253 --- /dev/null +++ b/tests/test_upstream_canaries.py @@ -0,0 +1,47 @@ +"""Canary tests for upstream linkml/linkml-runtime bugs we work around. + +Each test asserts the **correct** post-fix behavior and is marked +``xfail(strict=True)``. While the bug persists upstream, the test +xfails. When the upstream fix lands, the test starts passing and +``strict=True`` flips XPASS into a hard failure — a loud signal to +remove the local workaround. +""" + +import pytest + + +@pytest.mark.xfail( + strict=True, + reason=( + "linkml/linkml#3529: ReferenceValidator.normalize() mangles compact-key " + "list input to inlined-dict fields as {None: ...}. When this passes, " + "remove the PV-level branch from Transformer._pre_shape_expand_compact_keys." + ), +) +def test_reference_validator_normalizes_compact_key_pv_list(): + """Canary: compact-key list for inlined-dict PV field normalizes correctly. + + See: https://github.com/linkml/linkml/issues/3529 + """ + from linkml_runtime.processing.referencevalidator import ReferenceValidator + from linkml_runtime.utils.introspection import package_schemaview + + normalizer = ReferenceValidator(package_schemaview("linkml_map.datamodel.transformer_model")) + normalizer.expand_all = True + + obj = { + "enum_derivations": { + "Target": { + "populated_from": "Source", + "permissible_value_derivations": [ + {"red": {"sources": ["a"]}}, + ], + }, + }, + } + result = normalizer.normalize(obj) + pvs = result["enum_derivations"]["Target"]["permissible_value_derivations"] + + # Expected (post-fix): compact-key name is hoisted as the dict key. + assert None not in pvs, f"upstream still produces None-keyed entries: {pvs}" + assert "red" in pvs, f"upstream did not hoist compact-key name: {pvs}" From caca07e7cfdf3b52aae20e9e9f60fbbf1efc7ad8 Mon Sep 17 00:00:00 2001 From: amc-corey-cox <69321580+amc-corey-cox@users.noreply.github.com> Date: Tue, 19 May 2026 09:48:58 -0500 Subject: [PATCH 6/8] Drop upstream canary; document why local pre-expansion stands alone MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removed tests/test_upstream_canaries.py — its xfail(strict=True) only flips on the "accept and normalize" path; if upstream picks "reject loudly" instead, the test errors out and never signals usefully. Updated _pre_shape_expand_compact_keys docstring to explain compact-key list is a linkml-map convention, not LinkML-canonical, and that the pre-pass makes us independent of any upstream resolution. --- src/linkml_map/transformer/transformer.py | 16 ++++++-- tests/test_upstream_canaries.py | 47 ----------------------- 2 files changed, 12 insertions(+), 51 deletions(-) delete mode 100644 tests/test_upstream_canaries.py diff --git a/src/linkml_map/transformer/transformer.py b/src/linkml_map/transformer/transformer.py index 8ed562d3..11ba124a 100644 --- a/src/linkml_map/transformer/transformer.py +++ b/src/linkml_map/transformer/transformer.py @@ -328,10 +328,18 @@ def _shape_normalize(cls, obj: dict[str, Any]) -> None: def _pre_shape_expand_compact_keys(cls, obj: dict[str, Any]) -> None: """Recursively expand compact-key list forms before ReferenceValidator. - Covers spots ``ReferenceValidator`` doesn't normalize: - nested class_derivations in slot_derivations, OD-inner - class_derivations, and PV-level lists. Without this pass RV mangles - compact-key PV lists into ``{None: {: {...}}}``. + Compact-key list form (``[{Name: {body}}]``) is a linkml-map convention, + not a documented LinkML collection form. ``ReferenceValidator`` + therefore doesn't canonicalize it consistently — list-typed fields + (top-level class_derivations) are left as-is, and dict-typed fields + (permissible_value_derivations) get mangled to ``{None: ...}``. This + pre-pass converts compact-key items to explicit-name form everywhere + the linkml-map schema accepts a derivation section, so RV sees only + LinkML-canonical input. + + See https://github.com/linkml/linkml/issues/3529 for the upstream + behavior. The local pre-expansion makes us independent of how (or + whether) that gets resolved. """ cls._preprocess_class_derivations(obj) cls._preprocess_enum_derivations(obj) diff --git a/tests/test_upstream_canaries.py b/tests/test_upstream_canaries.py deleted file mode 100644 index 7d210253..00000000 --- a/tests/test_upstream_canaries.py +++ /dev/null @@ -1,47 +0,0 @@ -"""Canary tests for upstream linkml/linkml-runtime bugs we work around. - -Each test asserts the **correct** post-fix behavior and is marked -``xfail(strict=True)``. While the bug persists upstream, the test -xfails. When the upstream fix lands, the test starts passing and -``strict=True`` flips XPASS into a hard failure — a loud signal to -remove the local workaround. -""" - -import pytest - - -@pytest.mark.xfail( - strict=True, - reason=( - "linkml/linkml#3529: ReferenceValidator.normalize() mangles compact-key " - "list input to inlined-dict fields as {None: ...}. When this passes, " - "remove the PV-level branch from Transformer._pre_shape_expand_compact_keys." - ), -) -def test_reference_validator_normalizes_compact_key_pv_list(): - """Canary: compact-key list for inlined-dict PV field normalizes correctly. - - See: https://github.com/linkml/linkml/issues/3529 - """ - from linkml_runtime.processing.referencevalidator import ReferenceValidator - from linkml_runtime.utils.introspection import package_schemaview - - normalizer = ReferenceValidator(package_schemaview("linkml_map.datamodel.transformer_model")) - normalizer.expand_all = True - - obj = { - "enum_derivations": { - "Target": { - "populated_from": "Source", - "permissible_value_derivations": [ - {"red": {"sources": ["a"]}}, - ], - }, - }, - } - result = normalizer.normalize(obj) - pvs = result["enum_derivations"]["Target"]["permissible_value_derivations"] - - # Expected (post-fix): compact-key name is hoisted as the dict key. - assert None not in pvs, f"upstream still produces None-keyed entries: {pvs}" - assert "red" in pvs, f"upstream did not hoist compact-key name: {pvs}" From fc639a63db15c5d36c31bbe80b0717238d9f1fd8 Mon Sep 17 00:00:00 2001 From: amc-corey-cox <69321580+amc-corey-cox@users.noreply.github.com> Date: Tue, 19 May 2026 10:06:28 -0500 Subject: [PATCH 7/8] Restore upstream canary for linkml/linkml#3529 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upstream's proposed fix hoists unambiguous compact-key list items as the pk (errors on genuinely ambiguous cases). Our test case is unambiguous — key is not a slot of PermissibleValueDerivation, value is a dict — so when the fix lands, RV will produce {'red': {...}} and the xfail(strict=True) flips to a clean signal to remove the workaround. --- tests/test_upstream_canaries.py | 49 +++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 tests/test_upstream_canaries.py diff --git a/tests/test_upstream_canaries.py b/tests/test_upstream_canaries.py new file mode 100644 index 00000000..c15f1931 --- /dev/null +++ b/tests/test_upstream_canaries.py @@ -0,0 +1,49 @@ +"""Canary tests for upstream linkml/linkml-runtime bugs we work around. + +Each test asserts the **correct** post-fix behavior and is marked +``xfail(strict=True)``. While the bug persists upstream, the test +xfails. When the upstream fix lands, the test starts passing and +``strict=True`` flips XPASS into a hard failure — a loud signal to +remove the local workaround. +""" + +import pytest + + +@pytest.mark.xfail( + strict=True, + reason=( + "linkml/linkml#3529: ReferenceValidator.normalize() mangles compact-key " + "list input to inlined-dict fields as {None: ...}. The proposed upstream " + "fix hoists unambiguous single-key items (and errors on degenerate cases). " + "Our test case is unambiguous so it will hoist. When this passes, remove " + "the PV-level branch from Transformer._pre_shape_expand_compact_keys." + ), +) +def test_reference_validator_normalizes_compact_key_pv_list(): + """Canary: compact-key list for inlined-dict PV field normalizes correctly. + + See: https://github.com/linkml/linkml/issues/3529 + """ + from linkml_runtime.processing.referencevalidator import ReferenceValidator + from linkml_runtime.utils.introspection import package_schemaview + + normalizer = ReferenceValidator(package_schemaview("linkml_map.datamodel.transformer_model")) + normalizer.expand_all = True + + obj = { + "enum_derivations": { + "Target": { + "populated_from": "Source", + "permissible_value_derivations": [ + {"red": {"sources": ["a"]}}, + ], + }, + }, + } + result = normalizer.normalize(obj) + pvs = result["enum_derivations"]["Target"]["permissible_value_derivations"] + + # Expected (post-fix): compact-key name is hoisted as the dict key. + assert None not in pvs, f"upstream still produces None-keyed entries: {pvs}" + assert "red" in pvs, f"upstream did not hoist compact-key name: {pvs}" From 255d8f35b77f29bba0dddd3c34b0c84323ead793 Mon Sep 17 00:00:00 2001 From: amc-corey-cox <69321580+amc-corey-cox@users.noreply.github.com> Date: Tue, 19 May 2026 16:09:00 -0500 Subject: [PATCH 8/8] Address Copilot review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - check_deprecated_fields docstring: reflect actual SHAPE→SCAN→MIGRATE ordering (scan now runs after RV, not before). - _coerce_pv_populated_from_to_list: only strip explicit None / all-None lists; empty strings stay (legitimate PV value). - _migrate_pv_sources_to_populated_from: wrap scalar string instead of exploding into characters. - transform_enum: replace `assert not pv_deriv.sources` with explicit SpecificationError raise — invariant check stays enforced under `-O`. - _iter_values_or_list docstring: relax to match actual use (pre- and post-SHAPE callers). --- .../transformer/object_transformer.py | 14 ++++++---- src/linkml_map/transformer/transformer.py | 27 +++++++++++-------- src/linkml_map/validator.py | 19 +++++++------ 3 files changed, 34 insertions(+), 26 deletions(-) diff --git a/src/linkml_map/transformer/object_transformer.py b/src/linkml_map/transformer/object_transformer.py index af240b3a..0a227def 100644 --- a/src/linkml_map/transformer/object_transformer.py +++ b/src/linkml_map/transformer/object_transformer.py @@ -1164,11 +1164,15 @@ def transform_enum(self, source_value: str, enum_names: list[str], source_obj: A if v is not None: return v for pv_deriv in enum_deriv.permissible_value_derivations.values(): - assert not pv_deriv.sources, ( - f"PermissibleValueDerivation '{pv_deriv.name}' has 'sources' set; " - "this should have been migrated to 'populated_from' during spec load. " - "Did the spec bypass Transformer._normalize_spec_dict?" - ) + if pv_deriv.sources: + from linkml_map.transformer.errors import SpecificationError + + msg = ( + f"PermissibleValueDerivation '{pv_deriv.name}' has 'sources' set; " + "this should have been migrated to 'populated_from' during spec load. " + "Did the spec bypass Transformer._normalize_spec_dict?" + ) + raise SpecificationError(msg) if pv_deriv.populated_from and source_value in pv_deriv.populated_from: return pv_deriv.name if enum_deriv.mirror_source: diff --git a/src/linkml_map/transformer/transformer.py b/src/linkml_map/transformer/transformer.py index 11ba124a..e502fe35 100644 --- a/src/linkml_map/transformer/transformer.py +++ b/src/linkml_map/transformer/transformer.py @@ -35,9 +35,10 @@ def _iter_values_or_list(container: Any) -> Iterator[dict[str, Any]]: """Yield each dict in a derivations section (dict-keyed or list form). - Caller assumes shape has been canonicalized (i.e., the SHAPE phase of - ``Transformer._normalize_spec_dict`` has run), so compact-key list - items are already expanded. + Doesn't expand compact-key list items — used both pre-SHAPE (where + compact-key items are still present and the caller is responsible for + expanding them via ``_expand_compact_keys`` if it needs to descend + into them) and post-SHAPE (where everything is already canonical). """ if isinstance(container, dict): for v in container.values(): @@ -475,17 +476,19 @@ def _coerce_pv_populated_from_to_list(cls, obj: dict[str, Any]) -> None: * Scalar string: wrapped to a one-element list (user convenience that pydantic would otherwise reject for a multivalued field). - * Explicit ``None`` or ``[None]`` (``populated_from:`` with no YAML - value, possibly already wrapped to a list by ``ReferenceValidator``): - the key is removed so pydantic uses the ``default_factory=list`` - default — treats "explicitly set to nothing" as "unset". + * Explicit ``None`` or a list whose every element is ``None`` + (``populated_from:`` with no YAML value, possibly already wrapped to + ``[None]`` by ``ReferenceValidator``): the key is removed so pydantic + uses the ``default_factory=list`` default — treats "explicitly set to + nothing" as "unset". Empty strings and other falsy-but-not-None + values are kept (a user may legitimately map to the empty-string PV). * List: left as-is. """ for pvd in cls._iter_pv_derivations(obj): if "populated_from" not in pvd: continue pf = pvd["populated_from"] - if pf is None or (isinstance(pf, list) and not any(pf)): + if pf is None or (isinstance(pf, list) and all(x is None for x in pf)): del pvd["populated_from"] elif isinstance(pf, str): pvd["populated_from"] = [pf] @@ -497,14 +500,16 @@ def _migrate_pv_sources_to_populated_from(cls, obj: dict[str, Any]) -> None: Applied after the pre-normalize scan has already detected and reported any ``sources`` usage and any ``sources`` + ``populated_from`` conflicts. For each PV deriv with ``sources`` set, copies into ``populated_from`` - (if not already set) and clears the ``sources`` key. Post-condition: - no PV has ``sources`` set. The runtime can therefore rely on + (if not already set) and clears the ``sources`` key. A scalar string + is wrapped to a one-element list rather than exploded into characters + (defends against ``sources: "light_red"`` typos). Post-condition: no + PV has ``sources`` set. The runtime can therefore rely on ``populated_from`` as the single source of truth and ignore ``sources``. """ for pvd in cls._iter_pv_derivations(obj): srcs = pvd.pop("sources", None) if srcs and not pvd.get("populated_from"): - pvd["populated_from"] = list(srcs) + pvd["populated_from"] = [srcs] if isinstance(srcs, str) else list(srcs) @staticmethod def _expand_compact_keys(items: list[dict[str, Any]]) -> None: diff --git a/src/linkml_map/validator.py b/src/linkml_map/validator.py index 2c88e935..2a1f7c45 100644 --- a/src/linkml_map/validator.py +++ b/src/linkml_map/validator.py @@ -361,13 +361,13 @@ def _iter_derivation_dicts(raw: Any) -> list[dict[str, Any]]: def check_deprecated_fields(data: dict[str, Any]) -> list[ValidationMessage]: """Scan a spec dict for deprecated-field usage and ambiguous combinations. - Runs **pre-migration** — i.e., before ``_normalize_slot_class_derivations`` - flattens ``object_derivations`` and before the PV ``sources`` → ``populated_from`` - migration. Called by ``Transformer._normalize_spec_dict`` after top-level - compact-key preprocessing but before ``ReferenceValidator.normalize()``; - :func:`_iter_derivation_dicts` handles the remaining shape variations - (dict-keyed, explicit-list, and compact-key list forms) so deprecations - are detected regardless of how the user wrote them. + Runs in the SCAN phase of ``Transformer._normalize_spec_dict`` — after + SHAPE (which runs ``ReferenceValidator.normalize()`` and the local + compact-key pre-expansion) and before MIGRATE (which flattens + ``object_derivations``, inherits ``populated_from``, and rewrites PV + ``sources``). So the dict the scan sees is structurally canonical + (dict-keyed or explicit-name list, no compact-key items) but the + deprecated field values are still as the user wrote them. Flags: @@ -389,9 +389,8 @@ def check_deprecated_fields(data: dict[str, Any]) -> list[ValidationMessage]: (deprecation, derivation type) pair to keep output readable on large specs; per-entry messages are emitted for the other categories. - :param data: A spec dict after top-level compact-key preprocessing but - before field migration. Inner derivation shapes are handled by - :func:`_iter_derivation_dicts`. + :param data: A spec dict post-SHAPE, pre-MIGRATE. Derivation sections + are dict-keyed or explicit-name list (no compact-key items). :returns: A list of validation messages — warnings for deprecations, errors for ambiguous combinations. """