diff --git a/docs/schema/PermissibleValueDerivation.md b/docs/schema/PermissibleValueDerivation.md
index 5a725450..420adb3f 100644
--- a/docs/schema/PermissibleValueDerivation.md
+++ b/docs/schema/PermissibleValueDerivation.md
@@ -152,7 +152,7 @@ URI: [linkmlmap:PermissibleValueDerivation](https://w3id.org/linkml/transformer/
| --- | --- | --- | --- |
| [name](name.md) | 1
[String](String.md) | Target permissible value text | direct |
| [expr](expr.md) | 0..1
[String](String.md) | | direct |
-| [populated_from](populated_from.md) | 0..1
[String](String.md) | Source permissible value that maps to this target permissible value | direct |
+| [populated_from](populated_from.md) | *
[String](String.md) | Source permissible value(s) that map to this target permissible value | direct |
| [sources](sources.md) | *
[String](String.md) | Deprecated | direct |
| [hide](hide.md) | 0..1
[Boolean](Boolean.md) | | direct |
| [copy_directives](copy_directives.md) | *
[CopyDirective](CopyDirective.md) | | [ElementDerivation](ElementDerivation.md) |
@@ -257,7 +257,9 @@ attributes:
range: string
populated_from:
name: populated_from
- description: Source permissible value that maps to this target permissible value.
+ description: Source permissible value(s) that map to this target permissible value.
+ Accepts a single value or a list; scalar input is normalized to a one-element
+ list at load time.
from_schema: https://w3id.org/linkml/transformer
domain_of:
- ClassDerivation
@@ -265,11 +267,13 @@ attributes:
- EnumDerivation
- PermissibleValueDerivation
range: string
+ multivalued: true
sources:
name: sources
description: Deprecated. Use populated_from instead.
- deprecated: Deprecated. Use populated_from instead. See https://github.com/linkml/linkml-map/issues/193
- for planned list support in populated_from. Will be removed in a future version.
+ deprecated: Deprecated. Use populated_from instead, which now accepts a list.
+ See https://github.com/linkml/linkml-map/issues/193. Will be removed in a future
+ version.
from_schema: https://w3id.org/linkml/transformer
domain_of:
- ClassDerivation
@@ -327,7 +331,9 @@ attributes:
range: string
populated_from:
name: populated_from
- description: Source permissible value that maps to this target permissible value.
+ description: Source permissible value(s) that map to this target permissible value.
+ Accepts a single value or a list; scalar input is normalized to a one-element
+ list at load time.
from_schema: https://w3id.org/linkml/transformer
owner: PermissibleValueDerivation
domain_of:
@@ -336,11 +342,13 @@ attributes:
- EnumDerivation
- PermissibleValueDerivation
range: string
+ multivalued: true
sources:
name: sources
description: Deprecated. Use populated_from instead.
- deprecated: Deprecated. Use populated_from instead. See https://github.com/linkml/linkml-map/issues/193
- for planned list support in populated_from. Will be removed in a future version.
+ deprecated: Deprecated. Use populated_from instead, which now accepts a list.
+ See https://github.com/linkml/linkml-map/issues/193. Will be removed in a future
+ version.
from_schema: https://w3id.org/linkml/transformer
owner: PermissibleValueDerivation
domain_of:
diff --git a/src/linkml_map/cli/cli.py b/src/linkml_map/cli/cli.py
index b118c99a..0ded2c53 100644
--- a/src/linkml_map/cli/cli.py
+++ b/src/linkml_map/cli/cli.py
@@ -236,27 +236,27 @@ def _pre_flight_validate(
) -> None:
"""Surface static spec checks before performing a CLI action.
- Runs deprecation checks and reference resolution against the loaded
- specification, printing all findings to stderr. **Does not gate** —
- even error-severity findings are informational only; the runtime
- remains the authoritative arbiter of whether a transformation can
- actually execute. Users who want fail-fast behavior should run
- ``validate-spec --strict`` separately.
-
- Validates the merged ``tr.specification`` (after multi-file loading
- is complete) so cross-file issues surface where users would see
- them via ``validate-spec --merge``. Reuses ``tr.source_schemaview``
- and ``tr.target_schemaview`` when set, avoiding a duplicate load
- of large or remote schemas.
+ Replays pre-normalize scan messages captured at spec-load time and runs
+ reference resolution, printing all findings to stderr. **Does not gate** —
+ findings are informational only; the runtime remains the authoritative
+ arbiter of whether a transformation can actually execute. Users who want
+ fail-fast behavior should run ``validate-spec --strict`` separately.
+
+ Reading scan messages from ``tr.spec_messages`` rather than re-scanning
+ the post-migration spec means deprecations whose source fields were
+ cleared by normalization (e.g., ``object_derivations``, PV ``sources``)
+ are still surfaced here. Reuses ``tr.source_schemaview`` and
+ ``tr.target_schemaview`` when set, avoiding a duplicate load of large
+ or remote schemas.
"""
- from linkml_map.validator import check_deprecated_fields, validate_spec_semantics
+ from linkml_map.validator import validate_spec_semantics
if tr.specification is None:
return
spec_dict = tr.specification.model_dump(exclude_none=True)
- messages = list(check_deprecated_fields(spec_dict))
+ messages = list(tr.spec_messages)
messages.extend(
validate_spec_semantics(
spec_dict,
diff --git a/src/linkml_map/compiler/templates/markdown.j2 b/src/linkml_map/compiler/templates/markdown.j2
index c3be8d6e..3dc50cac 100644
--- a/src/linkml_map/compiler/templates/markdown.j2
+++ b/src/linkml_map/compiler/templates/markdown.j2
@@ -20,6 +20,6 @@
| Target | Source | Info |
| ------ | ------ | ---- |
{%- for pvd in ed.permissible_value_derivations.values() %}
-| {{ pvd.name }} | {{ pvd.populated_from }} | . |
+| {{ pvd.name }} | {{ (pvd.populated_from or []) | join(", ") }} | . |
{%- endfor -%}
{% endfor %}
diff --git a/src/linkml_map/datamodel/transformer_model.py b/src/linkml_map/datamodel/transformer_model.py
index db1ee3aa..7ddc2b49 100644
--- a/src/linkml_map/datamodel/transformer_model.py
+++ b/src/linkml_map/datamodel/transformer_model.py
@@ -500,14 +500,13 @@ class PermissibleValueDerivation(ElementDerivation):
expr: Optional[str] = Field(default=None, json_schema_extra = { "linkml_meta": {'domain_of': ['SlotDerivation',
'EnumDerivation',
'PermissibleValueDerivation']} })
- populated_from: Optional[str] = Field(default=None, description="""Source permissible value that maps to this target permissible value.""", json_schema_extra = { "linkml_meta": {'domain_of': ['ClassDerivation',
+ populated_from: Optional[list[str]] = Field(default_factory=list, description="""Source permissible value(s) that map to this target permissible value. Accepts a single value or a list; scalar input is normalized to a one-element list at load time.""", json_schema_extra = { "linkml_meta": {'domain_of': ['ClassDerivation',
'SlotDerivation',
'EnumDerivation',
'PermissibleValueDerivation']} })
- sources: Optional[list[str]] = Field(default_factory=list, description="""Deprecated. Use populated_from instead.""", json_schema_extra = { "linkml_meta": {'deprecated': 'Deprecated. Use populated_from instead. See '
- 'https://github.com/linkml/linkml-map/issues/193 for planned '
- 'list support in populated_from. Will be removed in a future '
- 'version.',
+ sources: Optional[list[str]] = Field(default_factory=list, description="""Deprecated. Use populated_from instead.""", json_schema_extra = { "linkml_meta": {'deprecated': 'Deprecated. Use populated_from instead, which now accepts a '
+ 'list. See https://github.com/linkml/linkml-map/issues/193. '
+ 'Will be removed in a future version.',
'domain_of': ['ClassDerivation',
'SlotDerivation',
'EnumDerivation',
diff --git a/src/linkml_map/datamodel/transformer_model.yaml b/src/linkml_map/datamodel/transformer_model.yaml
index 948816af..c012bf8e 100644
--- a/src/linkml_map/datamodel/transformer_model.yaml
+++ b/src/linkml_map/datamodel/transformer_model.yaml
@@ -486,15 +486,18 @@ classes:
range: string
populated_from:
range: string
+ multivalued: true
description: >-
- Source permissible value that maps to this target permissible value.
+ Source permissible value(s) that map to this target permissible value.
+ Accepts a single value or a list; scalar input is normalized to a
+ one-element list at load time.
sources:
range: string
multivalued: true
deprecated: >-
- Deprecated. Use populated_from instead.
- See https://github.com/linkml/linkml-map/issues/193 for planned
- list support in populated_from. Will be removed in a future version.
+ Deprecated. Use populated_from instead, which now accepts a list.
+ See https://github.com/linkml/linkml-map/issues/193. Will be removed
+ in a future version.
description: >-
Deprecated. Use populated_from instead.
hide:
diff --git a/src/linkml_map/inference/inference.py b/src/linkml_map/inference/inference.py
index b8a0e984..4e27cba6 100644
--- a/src/linkml_map/inference/inference.py
+++ b/src/linkml_map/inference/inference.py
@@ -1,33 +1,11 @@
"""Infer missing values in a specification."""
-import warnings
-
from linkml_runtime import SchemaView
from linkml_map.datamodel.transformer_model import TransformationSpecification
from linkml_map.utils.fk_utils import resolve_fk_path
-def _warn_deprecated_fields(specification: TransformationSpecification) -> None:
- """Emit ``DeprecationWarning`` for any deprecated field usage.
-
- Thin shim over ``validator._check_deprecated_fields`` that re-emits
- deprecation-category messages as ``DeprecationWarning`` so runtime
- callers (and any code with Python ``warnings`` filters configured)
- keep getting the same signal they always did.
-
- The same underlying check is consumed by static validation paths
- (``validate-spec`` and pre-flight from other CLI commands) via
- ``ValidationMessage`` records — see ``validator._check_deprecated_fields``.
- """
- from linkml_map.validator import check_deprecated_fields
-
- spec_dict = specification.model_dump(exclude_none=True)
- for msg in check_deprecated_fields(spec_dict):
- if msg.category == "deprecated":
- warnings.warn(msg.message, DeprecationWarning, stacklevel=3)
-
-
def induce_missing_values(specification: TransformationSpecification, source_schemaview: SchemaView) -> None:
"""
Infer missing values in a specification.
@@ -41,7 +19,6 @@ def induce_missing_values(specification: TransformationSpecification, source_sch
for cd in specification.class_derivations:
if not cd.populated_from:
cd.populated_from = cd.name
- _warn_deprecated_fields(specification)
for cd in specification.class_derivations:
for sd in cd.slot_derivations.values():
diff --git a/src/linkml_map/inference/inverter.py b/src/linkml_map/inference/inverter.py
index e7bfad49..d39196f5 100644
--- a/src/linkml_map/inference/inverter.py
+++ b/src/linkml_map/inference/inverter.py
@@ -93,9 +93,18 @@ def invert_enum_derivation(self, ed: EnumDerivation, spec: TransformationSpecifi
msg = "TODO: invert enum derivation with expression"
raise NonInvertibleSpecificationError(msg)
for pv_deriv in ed.permissible_value_derivations.values():
+ sources = pv_deriv.populated_from or [pv_deriv.name]
+ if len(sources) > 1:
+ msg = (
+ f"Cannot invert PermissibleValueDerivation '{pv_deriv.name}': "
+ f"populated_from has multiple source values {sources}, "
+ f"which is not a one-to-one mapping."
+ )
+ raise NonInvertibleSpecificationError(msg)
+ inverted_name = sources[0]
inverted_pv_deriv = PermissibleValueDerivation(
- name=pv_deriv.populated_from if pv_deriv.populated_from else pv_deriv.name,
- populated_from=pv_deriv.name,
+ name=inverted_name,
+ populated_from=[pv_deriv.name],
)
inverted_ed.permissible_value_derivations[inverted_pv_deriv.name] = inverted_pv_deriv
return inverted_ed
diff --git a/src/linkml_map/inference/schema_mapper.py b/src/linkml_map/inference/schema_mapper.py
index 1686109b..fe0c822f 100644
--- a/src/linkml_map/inference/schema_mapper.py
+++ b/src/linkml_map/inference/schema_mapper.py
@@ -311,16 +311,13 @@ def _derive_enum(self, enum_derivation: EnumDerivation) -> EnumDefinition:
target_enum.attributes = {}
target_enum.slot_usage = {}
for pv_derivation in enum_derivation.permissible_value_derivations.values():
- if pv_derivation.populated_from:
- pv = PermissibleValue(text=pv_derivation.populated_from)
- target_enum.permissible_values[pv.text] = pv
- elif pv_derivation.sources:
- for source in pv_derivation.sources:
- pv = PermissibleValue(text=source)
- target_enum.permissible_values[pv.text] = pv
- else:
+ sources = pv_derivation.populated_from or pv_derivation.sources
+ if not sources:
msg = f"Missing populated_from or sources for {pv_derivation}"
raise ValueError(msg)
+ for source in sources:
+ pv = PermissibleValue(text=source)
+ target_enum.permissible_values[pv.text] = pv
if enum_derivation.mirror_source:
for pv in source_enum.permissible_values.values():
if pv.text not in target_enum.permissible_values:
diff --git a/src/linkml_map/transformer/errors.py b/src/linkml_map/transformer/errors.py
index c4209171..18051622 100644
--- a/src/linkml_map/transformer/errors.py
+++ b/src/linkml_map/transformer/errors.py
@@ -6,6 +6,18 @@
from typing import Any
+class SpecificationError(ValueError):
+ """The transformation specification is malformed or internally inconsistent.
+
+ Raised during spec loading when the pre-normalize scan detects a
+ user-intent error that the runtime cannot disambiguate (e.g., setting
+ both ``populated_from`` and ``sources`` on the same derivation, or
+ setting both ``object_derivations`` and ``class_derivations`` on a
+ slot). Subclasses ``ValueError`` for backward compatibility with
+ callers that already catch ``ValueError`` from ``_normalize_spec_dict``.
+ """
+
+
@dataclass
class TransformationError(Exception):
"""A row-level error during data transformation.
diff --git a/src/linkml_map/transformer/object_transformer.py b/src/linkml_map/transformer/object_transformer.py
index 665ecc6a..0a227def 100644
--- a/src/linkml_map/transformer/object_transformer.py
+++ b/src/linkml_map/transformer/object_transformer.py
@@ -1164,9 +1164,16 @@ def transform_enum(self, source_value: str, enum_names: list[str], source_obj: A
if v is not None:
return v
for pv_deriv in enum_deriv.permissible_value_derivations.values():
- if source_value == pv_deriv.populated_from:
- return pv_deriv.name
- if source_value in pv_deriv.sources:
+ if pv_deriv.sources:
+ from linkml_map.transformer.errors import SpecificationError
+
+ msg = (
+ f"PermissibleValueDerivation '{pv_deriv.name}' has 'sources' set; "
+ "this should have been migrated to 'populated_from' during spec load. "
+ "Did the spec bypass Transformer._normalize_spec_dict?"
+ )
+ raise SpecificationError(msg)
+ if pv_deriv.populated_from and source_value in pv_deriv.populated_from:
return pv_deriv.name
if enum_deriv.mirror_source:
return str(source_value)
diff --git a/src/linkml_map/transformer/transformer.py b/src/linkml_map/transformer/transformer.py
index 307d0227..e502fe35 100644
--- a/src/linkml_map/transformer/transformer.py
+++ b/src/linkml_map/transformer/transformer.py
@@ -3,6 +3,7 @@
import logging
import warnings
from abc import ABC
+from collections.abc import Iterator
from copy import deepcopy
from dataclasses import dataclass, field
from pathlib import Path
@@ -31,6 +32,24 @@
logger = logging.getLogger(__name__)
+def _iter_values_or_list(container: Any) -> Iterator[dict[str, Any]]:
+ """Yield each dict in a derivations section (dict-keyed or list form).
+
+ Doesn't expand compact-key list items — used both pre-SHAPE (where
+ compact-key items are still present and the caller is responsible for
+ expanding them via ``_expand_compact_keys`` if it needs to descend
+ into them) and post-SHAPE (where everything is already canonical).
+ """
+ if isinstance(container, dict):
+ for v in container.values():
+ if isinstance(v, dict):
+ yield v
+ elif isinstance(container, list):
+ for item in container:
+ if isinstance(item, dict):
+ yield item
+
+
OBJECT_TYPE = dict[str, Any] | BaseModel | YAMLRoot
"""An object can be a plain python dict, a pydantic object, or a linkml YAMLRoot"""
@@ -93,6 +112,17 @@ class Transformer(ABC):
_curie_converter: Converter = None
+ spec_messages: list[Any] = field(default_factory=list)
+ """Scan messages captured at spec-load time.
+
+ Populated by the three load methods (``load_transformer_specification``,
+ ``load_transformer_specifications``, ``create_transformer_specification``)
+ with the ``ValidationMessage`` list returned by ``_normalize_spec_dict``.
+ Pre-flight validation reads these instead of re-scanning the post-migration
+ spec, so deprecations whose source fields were cleared by normalization
+ (e.g., ``object_derivations``, PV ``sources``) are still surfaced.
+ """
+
def map_object(self, obj: OBJECT_TYPE, source_type: str | None = None, **kwargs: Any) -> OBJECT_TYPE:
"""
Transform source object into an instance of the target class.
@@ -135,7 +165,7 @@ def load_transformer_specification(self, path: str | Path) -> None:
"""
with open(path) as f:
obj = yaml.safe_load(f)
- self._normalize_spec_dict(obj)
+ self.spec_messages = self._normalize_spec_dict(obj)
self.specification = TransformationSpecification(**obj)
def load_transformer_specifications(self, paths: tuple[str | Path, ...]) -> None:
@@ -151,12 +181,18 @@ def load_transformer_specifications(self, paths: tuple[str | Path, ...]) -> None
from linkml_map.utils.spec_merge import load_and_merge_specs
obj = load_and_merge_specs(paths)
- self._normalize_spec_dict(obj)
+ self.spec_messages = self._normalize_spec_dict(obj)
self.specification = TransformationSpecification(**obj)
@classmethod
def normalize_transform_spec(cls, obj: dict[str, Any], normalizer: ReferenceValidator) -> dict:
- """Recursively normalize class_derivations and flatten object_derivations."""
+ """Shape-canonicalize class_derivations recursively.
+
+ Pure shape work — no field migrations. ``object_derivations``
+ flattening and ``populated_from`` inheritance happen in the MIGRATE
+ phase of :meth:`_normalize_spec_dict` so the SCAN phase between
+ them sees the user's original field values.
+ """
obj = normalizer.normalize(obj)
class_derivations = obj.get("class_derivations", [])
@@ -167,12 +203,11 @@ def normalize_transform_spec(cls, obj: dict[str, Any], normalizer: ReferenceVali
for class_spec in cd_iter:
if not isinstance(class_spec, dict):
continue
- parent_source = class_spec.get("populated_from")
slot_derivations = class_spec.get("slot_derivations", {})
for slot_name, slot_spec in slot_derivations.items():
if slot_spec.get("value") is not None and slot_spec.get("range") is None:
slot_spec["range"] = "string"
- cls._normalize_slot_class_derivations(slot_name, slot_spec, normalizer, parent_source)
+ cls._normalize_slot_class_derivations(slot_name, slot_spec, normalizer)
return obj
@classmethod
@@ -181,52 +216,22 @@ def _normalize_slot_class_derivations(
slot_name: str,
slot_spec: dict[str, Any],
normalizer: ReferenceValidator,
- parent_populated_from: str | None = None,
) -> None:
- """Flatten object_derivations and normalize class_derivations on a slot.
-
- Four steps, applied recursively to nested slots:
- 1. Flatten ``object_derivations`` into ``class_derivations`` (with
- deprecation warning; error if both are present).
- 2. Expand compact-key entries (``- Condition: {...}`` → ``{name: Condition, ...}``).
- 3. Inherit ``populated_from`` from the parent class derivation when absent.
- 4. Run the normalizer on each class derivation entry so that nested
- dict-keyed ``slot_derivations`` get ``name`` injected.
+ """Shape-canonicalize the ``class_derivations`` on a slot, recursively.
+
+ Two steps, applied recursively to nested slots:
+
+ 1. Expand compact-key entries (``- Condition: {...}`` →
+ ``{name: Condition, ...}``).
+ 2. Run the normalizer on each class derivation entry so dict-keyed
+ ``slot_derivations`` get ``name`` injected and other shape
+ canonicalization happens.
+
+ Pure shape — no field semantics. ``object_derivations`` flattening
+ and ``populated_from`` inheritance live in the MIGRATE phase of
+ :meth:`_normalize_spec_dict`, not here, so the SCAN phase sees the
+ user's original field values.
"""
- # Step 1: flatten object_derivations → class_derivations
- object_derivations = slot_spec.get("object_derivations", [])
- if object_derivations:
- if slot_spec.get("class_derivations"):
- msg = (
- f"SlotDerivation '{slot_name}' has both 'object_derivations' and "
- f"'class_derivations'. Remove 'object_derivations' and use "
- f"'class_derivations' only."
- )
- raise ValueError(msg)
-
- warnings.warn(
- f"SlotDerivation '{slot_name}' uses 'object_derivations', which is "
- f"deprecated. Use list-based class_derivations instead. "
- f"'object_derivations' will be removed in a future version. "
- f"See https://github.com/linkml/linkml-map/issues/112",
- DeprecationWarning,
- stacklevel=4,
- )
-
- flattened: list[dict[str, Any]] = []
- for od in object_derivations:
- od_cd = od.get("class_derivations", {})
- if isinstance(od_cd, dict):
- for name, body in od_cd.items():
- entry = body if isinstance(body, dict) else {}
- entry.setdefault("name", name)
- flattened.append(entry)
- elif isinstance(od_cd, list):
- flattened.extend(od_cd)
- slot_spec["class_derivations"] = flattened
- del slot_spec["object_derivations"]
-
- # Steps 2-4: expand compact keys, inherit populated_from, normalize, and recurse
slot_cd = slot_spec.get("class_derivations")
if not isinstance(slot_cd, list):
return
@@ -236,33 +241,276 @@ def _normalize_slot_class_derivations(
for cd_entry in slot_cd:
if not isinstance(cd_entry, dict):
continue
- if not cd_entry.get("populated_from") and parent_populated_from:
- cd_entry["populated_from"] = parent_populated_from
normalized = normalizer.normalize(cd_entry)
cd_entry.clear()
cd_entry.update(normalized)
- child_source = cd_entry.get("populated_from")
for nested_name, nested_sd in cd_entry.get("slot_derivations", {}).items():
if isinstance(nested_sd, dict):
- cls._normalize_slot_class_derivations(nested_name, nested_sd, normalizer, child_source)
+ cls._normalize_slot_class_derivations(nested_name, nested_sd, normalizer)
@classmethod
- def _normalize_spec_dict(cls, obj: dict[str, Any]) -> None:
- """Normalize a raw specification dict in place.
-
- Bundles _preprocess_class_derivations, ReferenceValidator normalization,
- and nested ObjectDerivation fixup into a single entry point. Mutates
- ``obj`` by replacing its contents with the normalized result.
+ def _normalize_spec_dict(cls, obj: dict[str, Any], *, silent: bool = False) -> list[Any]:
+ """Normalize a raw specification dict in place. Return scan messages.
+
+ Three clean phases, ordered so the SCAN sees a canonical shape with
+ the user's original field values:
+
+ 1. **SHAPE** — pure structural canonicalization, no field semantics.
+ Expands compact-key list forms everywhere they can appear
+ (top-level class/enum derivations, nested class_derivations in
+ slots, OD-inner class_derivations, PV-level lists), then runs
+ ``ReferenceValidator`` for dict↔list canonicalization and name
+ injection.
+ 2. **SCAN** — :func:`linkml_map.validator.check_deprecated_fields`
+ walks the canonical shape, returning ``ValidationMessage``
+ records for deprecated-field usage and ambiguous combinations.
+ Unless ``silent``, deprecation warnings fire as Python
+ ``DeprecationWarning`` and conflict errors raise
+ :class:`~linkml_map.transformer.errors.SpecificationError`.
+ 3. **MIGRATE** — all field semantics in one pass:
+ ``object_derivations`` flatten, ``populated_from`` inheritance
+ from parent class, PV scalar → list, PV explicit-``None`` strip,
+ PV ``sources`` → ``populated_from`` (clearing ``sources``).
+
+ After this method, ``sources`` no longer appears on any
+ ``PermissibleValueDerivation`` — the runtime can rely on
+ ``populated_from`` as the single source of truth.
:param obj: Raw specification dict (e.g. from YAML or user code).
+ :param silent: When ``True``, neither emit warnings nor raise on
+ errors — return them in the message list instead. Used by the
+ validator path so it can surface findings as
+ ``ValidationMessage`` records.
+ :returns: A list of :class:`ValidationMessage` records from the scan.
"""
- cls._preprocess_class_derivations(obj)
+ from linkml_map.transformer.errors import SpecificationError
+ from linkml_map.validator import check_deprecated_fields
+
+ # SHAPE
+ cls._shape_normalize(obj)
+
+ # SCAN
+ messages = check_deprecated_fields(obj)
+
+ if not silent:
+ errors = [m for m in messages if m.severity == "error"]
+ if errors:
+ raise SpecificationError("; ".join(m.message for m in errors))
+ for m in messages:
+ if m.category == "deprecated":
+ warnings.warn(m.message, DeprecationWarning, stacklevel=3)
+
+ # MIGRATE
+ cls._flatten_object_derivations(obj)
+ cls._inherit_populated_from(obj)
+ cls._coerce_pv_populated_from_to_list(obj)
+ cls._migrate_pv_sources_to_populated_from(obj)
+
+ return messages
+
+ @classmethod
+ def _shape_normalize(cls, obj: dict[str, Any]) -> None:
+ """Canonicalize the structural shape of every derivation section.
+
+ Expands compact-key list forms in every derivation-section location
+ (including OD-inner class_derivations and PV-level lists, which
+ ``ReferenceValidator`` doesn't canonicalize correctly), then runs
+ ``ReferenceValidator`` for dict↔list canonicalization and name
+ injection. No field semantics are mutated.
+ """
+ cls._pre_shape_expand_compact_keys(obj)
normalizer = ReferenceValidator(package_schemaview("linkml_map.datamodel.transformer_model"))
normalizer.expand_all = True
normalized = cls.normalize_transform_spec(obj, normalizer)
obj.clear()
obj.update(normalized)
+ @classmethod
+ def _pre_shape_expand_compact_keys(cls, obj: dict[str, Any]) -> None:
+ """Recursively expand compact-key list forms before ReferenceValidator.
+
+ Compact-key list form (``[{Name: {body}}]``) is a linkml-map convention,
+ not a documented LinkML collection form. ``ReferenceValidator``
+ therefore doesn't canonicalize it consistently — list-typed fields
+ (top-level class_derivations) are left as-is, and dict-typed fields
+ (permissible_value_derivations) get mangled to ``{None: ...}``. This
+ pre-pass converts compact-key items to explicit-name form everywhere
+ the linkml-map schema accepts a derivation section, so RV sees only
+ LinkML-canonical input.
+
+ See https://github.com/linkml/linkml/issues/3529 for the upstream
+ behavior. The local pre-expansion makes us independent of how (or
+ whether) that gets resolved.
+ """
+ cls._preprocess_class_derivations(obj)
+ cls._preprocess_enum_derivations(obj)
+ for cd in _iter_values_or_list(obj.get("class_derivations")):
+ cls._expand_compact_keys_in_class_deriv(cd)
+
+ @classmethod
+ def _expand_compact_keys_in_class_deriv(cls, cd: dict[str, Any]) -> None:
+ """Expand compact-key list forms in a class derivation, recursively."""
+ for sd in _iter_values_or_list(cd.get("slot_derivations")):
+ # Slot's own nested class_derivations (list-with-compact-keys form)
+ nested_cds = sd.get("class_derivations")
+ if isinstance(nested_cds, list):
+ cls._expand_compact_keys(nested_cds)
+ for ncd in _iter_values_or_list(nested_cds):
+ cls._expand_compact_keys_in_class_deriv(ncd)
+ # OD-inner class_derivations (deprecated form; flattened in MIGRATE)
+ ods = sd.get("object_derivations")
+ if isinstance(ods, list):
+ for od in ods:
+ if not isinstance(od, dict):
+ continue
+ od_cds = od.get("class_derivations")
+ if isinstance(od_cds, list):
+ cls._expand_compact_keys(od_cds)
+ for ncd in _iter_values_or_list(od_cds):
+ cls._expand_compact_keys_in_class_deriv(ncd)
+
+ @staticmethod
+ def _preprocess_enum_derivations(obj: dict[str, Any]) -> None:
+ """Pre-process top-level enum_derivations and their PV sections.
+
+ Handles two compact-key cases ReferenceValidator doesn't:
+ list-form enum_derivations with ``{Name: {...}}`` items, and
+ list-form permissible_value_derivations with the same shape (which
+ RV otherwise mangles into ``{None: ...}``).
+ """
+ eds = obj.get("enum_derivations")
+ if isinstance(eds, dict):
+ for k, v in eds.items():
+ if v is None:
+ eds[k] = {}
+ elif isinstance(eds, list):
+ Transformer._expand_compact_keys(eds)
+ for ed in _iter_values_or_list(obj.get("enum_derivations")):
+ pvs = ed.get("permissible_value_derivations")
+ if isinstance(pvs, list):
+ Transformer._expand_compact_keys(pvs)
+
+ @classmethod
+ def _flatten_object_derivations(cls, obj: dict[str, Any]) -> None:
+ """Flatten ``object_derivations`` into ``class_derivations`` on every slot.
+
+ Conflicting specs (both ``object_derivations`` and ``class_derivations``
+ set) are caught upstream by the SCAN phase as ``severity="error"``,
+ so this method assumes a non-conflicting input.
+ """
+ for cd in _iter_values_or_list(obj.get("class_derivations")):
+ cls._flatten_ods_in_class_deriv(cd)
+
+ @classmethod
+ def _flatten_ods_in_class_deriv(cls, cd: dict[str, Any]) -> None:
+ """Recursively walk a class derivation, flattening OD on each slot."""
+ for sd in _iter_values_or_list(cd.get("slot_derivations")):
+ ods = sd.get("object_derivations")
+ if ods:
+ flattened: list[dict[str, Any]] = []
+ for od in ods:
+ if not isinstance(od, dict):
+ continue
+ od_cd = od.get("class_derivations", {})
+ if isinstance(od_cd, dict):
+ for name, body in od_cd.items():
+ entry = body if isinstance(body, dict) else {}
+ entry.setdefault("name", name)
+ flattened.append(entry)
+ elif isinstance(od_cd, list):
+ flattened.extend(od_cd)
+ sd["class_derivations"] = flattened
+ del sd["object_derivations"]
+ for ncd in _iter_values_or_list(sd.get("class_derivations")):
+ cls._flatten_ods_in_class_deriv(ncd)
+
+ @classmethod
+ def _inherit_populated_from(cls, obj: dict[str, Any]) -> None:
+ """Propagate ``populated_from`` from parent class down to nested CDs.
+
+ Walks each class_derivation's slot_derivations.class_derivations; if
+ a nested CD doesn't set ``populated_from``, it inherits from the
+ outer class's value. Recurses through arbitrarily deep nesting.
+ """
+ for cd in _iter_values_or_list(obj.get("class_derivations")):
+ cls._inherit_pf_in_slots(cd.get("slot_derivations"), cd.get("populated_from"))
+
+ @classmethod
+ def _inherit_pf_in_slots(cls, slots: Any, parent_pf: str | None) -> None:
+ """Recursively inherit populated_from into nested class_derivations."""
+ for sd in _iter_values_or_list(slots):
+ for ncd in _iter_values_or_list(sd.get("class_derivations")):
+ if not ncd.get("populated_from") and parent_pf:
+ ncd["populated_from"] = parent_pf
+ cls._inherit_pf_in_slots(ncd.get("slot_derivations"), ncd.get("populated_from"))
+
+ @staticmethod
+ def _iter_pv_derivations(obj: dict[str, Any]) -> Iterator[dict[str, Any]]:
+ """Yield each PermissibleValueDerivation dict in ``obj`` for in-place mutation."""
+ eds = obj.get("enum_derivations")
+ if isinstance(eds, dict):
+ ed_iter: Any = eds.values()
+ elif isinstance(eds, list):
+ ed_iter = eds
+ else:
+ return
+ for ed in ed_iter:
+ if not isinstance(ed, dict):
+ continue
+ pvds = ed.get("permissible_value_derivations")
+ if isinstance(pvds, dict):
+ pvd_iter: Any = pvds.values()
+ elif isinstance(pvds, list):
+ pvd_iter = pvds
+ else:
+ continue
+ for pvd in pvd_iter:
+ if isinstance(pvd, dict):
+ yield pvd
+
+ @classmethod
+ def _coerce_pv_populated_from_to_list(cls, obj: dict[str, Any]) -> None:
+ """Coerce ``populated_from`` to a list on each PV deriv.
+
+ Input shapes handled:
+
+ * Scalar string: wrapped to a one-element list (user convenience that
+ pydantic would otherwise reject for a multivalued field).
+ * Explicit ``None`` or a list whose every element is ``None``
+ (``populated_from:`` with no YAML value, possibly already wrapped to
+ ``[None]`` by ``ReferenceValidator``): the key is removed so pydantic
+ uses the ``default_factory=list`` default — treats "explicitly set to
+ nothing" as "unset". Empty strings and other falsy-but-not-None
+ values are kept (a user may legitimately map to the empty-string PV).
+ * List: left as-is.
+ """
+ for pvd in cls._iter_pv_derivations(obj):
+ if "populated_from" not in pvd:
+ continue
+ pf = pvd["populated_from"]
+ if pf is None or (isinstance(pf, list) and all(x is None for x in pf)):
+ del pvd["populated_from"]
+ elif isinstance(pf, str):
+ pvd["populated_from"] = [pf]
+
+ @classmethod
+ def _migrate_pv_sources_to_populated_from(cls, obj: dict[str, Any]) -> None:
+ """Move deprecated ``sources`` into ``populated_from`` on PV derivs.
+
+ Applied after the pre-normalize scan has already detected and reported
+ any ``sources`` usage and any ``sources`` + ``populated_from`` conflicts.
+ For each PV deriv with ``sources`` set, copies into ``populated_from``
+ (if not already set) and clears the ``sources`` key. A scalar string
+ is wrapped to a one-element list rather than exploded into characters
+ (defends against ``sources: "light_red"`` typos). Post-condition: no
+ PV has ``sources`` set. The runtime can therefore rely on
+ ``populated_from`` as the single source of truth and ignore ``sources``.
+ """
+ for pvd in cls._iter_pv_derivations(obj):
+ srcs = pvd.pop("sources", None)
+ if srcs and not pvd.get("populated_from"):
+ pvd["populated_from"] = [srcs] if isinstance(srcs, str) else list(srcs)
+
@staticmethod
def _expand_compact_keys(items: list[dict[str, Any]]) -> None:
"""Expand YAML compact-key dicts in a list in place.
@@ -305,7 +553,7 @@ def create_transformer_specification(self, obj: dict[str, Any]) -> None:
:param path:
:return:
"""
- self._normalize_spec_dict(obj)
+ self.spec_messages = self._normalize_spec_dict(obj)
self.specification = TransformationSpecification(**obj)
def _apply_source_schema_patches(self) -> None:
diff --git a/src/linkml_map/validator.py b/src/linkml_map/validator.py
index d559da4a..2a1f7c45 100644
--- a/src/linkml_map/validator.py
+++ b/src/linkml_map/validator.py
@@ -143,25 +143,43 @@ def normalize_spec_dict(obj: dict[str, Any]) -> dict[str, Any]:
Applies the same full normalization pipeline that the transformer engine
uses (compact-key expansion, dict→list conversion via ReferenceValidator,
- nested ObjectDerivation fixup), plus YAML type coercion for fields that
- are commonly mis-parsed.
+ nested ObjectDerivation fixup, deprecated-field migration), plus YAML
+ type coercion for fields that are commonly mis-parsed.
If normalization itself fails (e.g. due to malformed data), the original
dict is returned with only YAML type coercion applied so that JSON Schema
validation can still report structural errors.
+ Use :func:`_normalize_and_collect_messages` instead when you also need
+ the pre-normalize scan's findings (deprecation warnings, conflict errors).
+
:param obj: Raw YAML-loaded dict.
:returns: A new dict suitable for JSON Schema validation.
"""
+ obj, _ = _normalize_and_collect_messages(obj)
+ return obj
+
+
+def _normalize_and_collect_messages(
+ obj: dict[str, Any],
+) -> tuple[dict[str, Any], list[ValidationMessage]]:
+ """Normalize and return both the dict and the pre-normalize scan messages.
+
+ Internal helper used by :func:`validate_spec`. Calls the transformer's
+ normalization in silent mode so scan findings are returned as
+ ``ValidationMessage`` records rather than emitted as Python warnings or
+ raised as ``SpecificationError``.
+ """
obj = dict(obj)
+ messages: list[ValidationMessage] = []
try:
- Transformer._normalize_spec_dict(obj)
+ messages = Transformer._normalize_spec_dict(obj, silent=True) or []
except Exception:
logger.debug("Normalization failed; falling back to raw dict", exc_info=True)
for field in _COERCE_FIELDS:
if field in obj:
obj[field] = _coerce_yaml_types(obj[field])
- return obj
+ return obj, messages
def _validate_structural(
@@ -324,9 +342,9 @@ def _resolve_schema_path(
def _iter_derivation_dicts(raw: Any) -> list[dict[str, Any]]:
"""Normalize a derivations section (dict or list) to a list of dicts.
- After ``normalize_spec_dict``, derivation sections may be either a list of
- dicts (with ``name`` keys) or a dict keyed by name. This helper
- normalizes to a consistent list-of-dicts form for iteration.
+ Assumes the SHAPE phase of ``Transformer._normalize_spec_dict`` has
+ already canonicalized compact-key list items, so callers only need to
+ handle dict-keyed and explicit-name list forms here.
"""
if isinstance(raw, list):
return [item for item in raw if isinstance(item, dict)]
@@ -341,25 +359,40 @@ def _iter_derivation_dicts(raw: Any) -> list[dict[str, Any]]:
def check_deprecated_fields(data: dict[str, Any]) -> list[ValidationMessage]:
- """Check a normalized spec dict for use of deprecated fields.
+ """Scan a spec dict for deprecated-field usage and ambiguous combinations.
+
+ Runs in the SCAN phase of ``Transformer._normalize_spec_dict`` — after
+ SHAPE (which runs ``ReferenceValidator.normalize()`` and the local
+ compact-key pre-expansion) and before MIGRATE (which flattens
+ ``object_derivations``, inherits ``populated_from``, and rewrites PV
+ ``sources``). So the dict the scan sees is structurally canonical
+ (dict-keyed or explicit-name list, no compact-key items) but the
+ deprecated field values are still as the user wrote them.
- Currently flags two deprecations:
+ Flags:
* ``sources`` on ``ClassDerivation`` / ``SlotDerivation`` /
``EnumDerivation`` / ``PermissibleValueDerivation`` — replaced by
- ``populated_from``.
+ ``populated_from``. Severity: warning, category: deprecated.
* ``derived_from`` on ``SlotDerivation`` — ignored by the runtime
- and removable.
-
- Findings are collapsed to one message per (deprecation, derivation
- type) pair to keep output readable on large specs.
-
- Other deprecations (``object_derivations`` flattening) are surfaced
- during normalization in ``Transformer._normalize_slot_class_derivations``
- and do not need to be re-emitted here.
-
- :param data: A **normalized** spec dict (post ``normalize_spec_dict``).
- :returns: A list of warning messages with ``category="deprecated"``.
+ and removable. Severity: warning, category: deprecated.
+ * ``object_derivations`` on ``SlotDerivation`` — flattened into
+ ``class_derivations`` at load time. Severity: warning, category:
+ deprecated.
+ * ``populated_from`` **and** ``sources`` both set on the same
+ ``PermissibleValueDerivation`` — ambiguous, the user almost
+ certainly didn't mean both. Severity: error.
+ * ``object_derivations`` **and** ``class_derivations`` both set on
+ the same ``SlotDerivation`` — ambiguous. Severity: error.
+
+ ``sources`` deprecation findings are collapsed to one message per
+ (deprecation, derivation type) pair to keep output readable on
+ large specs; per-entry messages are emitted for the other categories.
+
+ :param data: A spec dict post-SHAPE, pre-MIGRATE. Derivation sections
+ are dict-keyed or explicit-name list (no compact-key items).
+ :returns: A list of validation messages — warnings for deprecations,
+ errors for ambiguous combinations.
"""
messages: list[ValidationMessage] = []
sources_counts: dict[str, list[str]] = {
@@ -369,6 +402,7 @@ def check_deprecated_fields(data: dict[str, Any]) -> list[ValidationMessage]:
"PermissibleValueDerivation": [],
}
derived_from_names: list[str] = []
+ object_derivation_names: list[str] = []
for cd in _iter_derivation_dicts(data.get("class_derivations")):
cd_name = cd.get("name", "")
@@ -380,6 +414,20 @@ def check_deprecated_fields(data: dict[str, Any]) -> list[ValidationMessage]:
sources_counts["SlotDerivation"].append(sd_name)
if sd.get("derived_from"):
derived_from_names.append(sd_name)
+ if sd.get("object_derivations"):
+ object_derivation_names.append(sd_name)
+ if sd.get("class_derivations"):
+ messages.append(
+ ValidationMessage(
+ severity="error",
+ path=f"$.class_derivations[{cd_name}].slot_derivations[{sd_name}]",
+ message=(
+ f"SlotDerivation '{sd_name}' sets both 'object_derivations' "
+ f"and 'class_derivations'. Remove 'object_derivations' and "
+ f"use 'class_derivations' only."
+ ),
+ )
+ )
for ed in _iter_derivation_dicts(data.get("enum_derivations")):
ed_name = ed.get("name", "")
@@ -389,6 +437,19 @@ def check_deprecated_fields(data: dict[str, Any]) -> list[ValidationMessage]:
pvd_name = pvd.get("name", "")
if pvd.get("sources"):
sources_counts["PermissibleValueDerivation"].append(pvd_name)
+ if pvd.get("populated_from"):
+ messages.append(
+ ValidationMessage(
+ severity="error",
+ path=(f"$.enum_derivations[{ed_name}].permissible_value_derivations[{pvd_name}]"),
+ message=(
+ f"PermissibleValueDerivation '{pvd_name}' sets both "
+ f"'populated_from' and 'sources'. These are alternative "
+ f"spellings of the same field; set only 'populated_from' "
+ f"(which now accepts a list)."
+ ),
+ )
+ )
for deriv_type, names in sources_counts.items():
if names:
@@ -407,6 +468,24 @@ def check_deprecated_fields(data: dict[str, Any]) -> list[ValidationMessage]:
)
)
+ if object_derivation_names:
+ preview = ", ".join(object_derivation_names[:5])
+ suffix = f" (and {len(object_derivation_names) - 5} more)" if len(object_derivation_names) > 5 else ""
+ messages.append(
+ ValidationMessage(
+ severity="warning",
+ category="deprecated",
+ path="$.SlotDerivation",
+ message=(
+ f"{len(object_derivation_names)} SlotDerivation(s) use 'object_derivations', "
+ f"which is deprecated and flattened into 'class_derivations' at load time: "
+ f"{preview}{suffix}. Use list-based 'class_derivations' instead. "
+ f"'object_derivations' will be removed in a future version. "
+ f"See https://github.com/linkml/linkml-map/issues/112"
+ ),
+ )
+ )
+
if derived_from_names:
preview = ", ".join(derived_from_names[:5])
suffix = f" (and {len(derived_from_names) - 5} more)" if len(derived_from_names) > 5 else ""
@@ -722,10 +801,10 @@ def validate_spec(
>>> validate_spec(data)
[]
"""
- normalized = normalize_spec_dict(data)
+ normalized, scan_messages = _normalize_and_collect_messages(data)
messages = _validate_structural(normalized, schema_path=schema_path)
if not messages:
- messages.extend(check_deprecated_fields(normalized))
+ messages.extend(scan_messages)
messages.extend(
validate_spec_semantics(
normalized,
diff --git a/tests/test_deprecations.py b/tests/test_deprecations.py
index 8d609909..fd4aedc5 100644
--- a/tests/test_deprecations.py
+++ b/tests/test_deprecations.py
@@ -43,7 +43,7 @@
def test_derived_from_emits_deprecation_warning():
- """Using derived_from on a SlotDerivation emits a DeprecationWarning."""
+ """Using derived_from on a SlotDerivation emits a DeprecationWarning at load time."""
tr = ObjectTransformer()
tr.source_schemaview = SchemaView(SOURCE_SCHEMA)
tr.target_schemaview = SchemaView(TARGET_SCHEMA)
@@ -60,17 +60,14 @@ def test_derived_from_emits_deprecation_warning():
},
},
}
- tr.create_transformer_specification(spec)
-
with warnings.catch_warnings(record=True) as caught:
warnings.simplefilter("always", DeprecationWarning)
- # Trigger induce_missing_values via derived_specification
- _ = tr.derived_specification
+ tr.create_transformer_specification(spec)
deprecation_warnings = [w for w in caught if issubclass(w.category, DeprecationWarning)]
- assert len(deprecation_warnings) == 1
- assert "derived_from" in str(deprecation_warnings[0].message)
- assert "full_name" in str(deprecation_warnings[0].message)
+ derived_from_warnings = [w for w in deprecation_warnings if "derived_from" in str(w.message)]
+ assert len(derived_from_warnings) == 1
+ assert "full_name" in str(derived_from_warnings[0].message)
def test_derived_from_does_not_affect_transformation():
@@ -105,7 +102,7 @@ def test_derived_from_does_not_affect_transformation():
def test_sources_on_pv_emits_deprecation_warning():
- """Using sources on a PermissibleValueDerivation emits a DeprecationWarning."""
+ """Using sources on a PermissibleValueDerivation emits a DeprecationWarning at load time."""
tr = ObjectTransformer()
tr.source_schemaview = SchemaView(SOURCE_SCHEMA)
tr.target_schemaview = SchemaView(TARGET_SCHEMA)
@@ -128,11 +125,9 @@ def test_sources_on_pv_emits_deprecation_warning():
},
},
}
- tr.create_transformer_specification(spec)
-
with warnings.catch_warnings(record=True) as caught:
warnings.simplefilter("always", DeprecationWarning)
- _ = tr.derived_specification
+ tr.create_transformer_specification(spec)
deprecation_warnings = [w for w in caught if issubclass(w.category, DeprecationWarning)]
sources_warnings = [w for w in deprecation_warnings if "sources" in str(w.message)]
@@ -141,7 +136,7 @@ def test_sources_on_pv_emits_deprecation_warning():
def test_sources_on_slot_emits_deprecation_warning():
- """Using sources on a SlotDerivation emits a DeprecationWarning."""
+ """Using sources on a SlotDerivation emits a DeprecationWarning at load time."""
tr = ObjectTransformer()
tr.source_schemaview = SchemaView(SOURCE_SCHEMA)
tr.target_schemaview = SchemaView(TARGET_SCHEMA)
@@ -157,11 +152,9 @@ def test_sources_on_slot_emits_deprecation_warning():
},
},
}
- tr.create_transformer_specification(spec)
-
with warnings.catch_warnings(record=True) as caught:
warnings.simplefilter("always", DeprecationWarning)
- _ = tr.derived_specification
+ tr.create_transformer_specification(spec)
deprecation_warnings = [w for w in caught if issubclass(w.category, DeprecationWarning)]
sources_warnings = [w for w in deprecation_warnings if "sources" in str(w.message)]
@@ -246,11 +239,9 @@ def test_no_warning_without_deprecated_fields():
},
},
}
- tr.create_transformer_specification(spec)
-
with warnings.catch_warnings(record=True) as caught:
warnings.simplefilter("always", DeprecationWarning)
- _ = tr.derived_specification
+ tr.create_transformer_specification(spec)
deprecation_warnings = [w for w in caught if issubclass(w.category, DeprecationWarning)]
assert len(deprecation_warnings) == 0
diff --git a/tests/test_transformer/test_multi_enum.py b/tests/test_transformer/test_multi_enum.py
index 6e6703a1..9da6e7cb 100644
--- a/tests/test_transformer/test_multi_enum.py
+++ b/tests/test_transformer/test_multi_enum.py
@@ -291,6 +291,54 @@ def test_null_color_stays_none():
assert result["color"] is None
+def test_pv_populated_from_list_form_maps_multiple_sources():
+ """populated_from as a list maps any listed source value to the target PV."""
+ tr = ObjectTransformer()
+ tr.source_schemaview = SchemaView(SOURCE_SCHEMA)
+ tr.target_schemaview = SchemaView(TARGET_SCHEMA)
+ spec = copy.deepcopy(TRANSFORM_SPEC)
+ # Replace SimplePrimary's deprecated sources with list-form populated_from.
+ spec["enum_derivations"]["SimplePrimary"]["permissible_value_derivations"] = {
+ "red": {"populated_from": ["light_red", "dark_red"]},
+ "green": {"populated_from": ["light_green", "dark_green"]},
+ "blue": {"populated_from": ["light_blue", "dark_blue"]},
+ }
+ tr.create_transformer_specification(spec)
+
+ for src, expected in [("light_red", "red"), ("dark_red", "red"), ("dark_blue", "blue")]:
+ result = tr.map_object({"id": "l1", "color": src}, source_type="Light")
+ assert result["color"] == expected
+
+
+def test_pv_populated_from_scalar_form_still_works():
+ """Scalar populated_from is wrapped to a one-element list at load time."""
+ tr = ObjectTransformer()
+ tr.source_schemaview = SchemaView(SOURCE_SCHEMA)
+ tr.target_schemaview = SchemaView(TARGET_SCHEMA)
+ spec = copy.deepcopy(TRANSFORM_SPEC)
+ spec["enum_derivations"]["MissingnessTarget"]["permissible_value_derivations"] = {
+ "na": {"populated_from": "not_available"},
+ "oth": {"populated_from": "other"},
+ }
+ tr.create_transformer_specification(spec)
+ # Pydantic representation is always a list after normalization.
+ pvds = tr.specification.enum_derivations["MissingnessTarget"].permissible_value_derivations
+ assert pvds["na"].populated_from == ["not_available"]
+
+ result = tr.map_object({"id": "l1", "color": "not_available"}, source_type="Light")
+ assert result["color"] == "na"
+
+
+def test_pv_sources_only_is_migrated_to_populated_from():
+ """sources-only PV derivs are migrated so the runtime sees only populated_from."""
+ tr = _make_transformer()
+ # SimplePrimary in TRANSFORM_SPEC uses sources: [...] — confirm migration cleared sources.
+ pvds = tr.specification.enum_derivations["SimplePrimary"].permissible_value_derivations
+ assert pvds["red"].populated_from == ["light_red", "dark_red"]
+ # sources is cleared after migration — the runtime relies on populated_from alone.
+ assert not pvds["red"].sources
+
+
def test_explicit_range_any_with_any_of():
"""Slots with explicit range: Any plus any_of enum ranges are mapped correctly."""
schema = SOURCE_SCHEMA.replace(
diff --git a/tests/test_upstream_canaries.py b/tests/test_upstream_canaries.py
new file mode 100644
index 00000000..c15f1931
--- /dev/null
+++ b/tests/test_upstream_canaries.py
@@ -0,0 +1,49 @@
+"""Canary tests for upstream linkml/linkml-runtime bugs we work around.
+
+Each test asserts the **correct** post-fix behavior and is marked
+``xfail(strict=True)``. While the bug persists upstream, the test
+xfails. When the upstream fix lands, the test starts passing and
+``strict=True`` flips XPASS into a hard failure — a loud signal to
+remove the local workaround.
+"""
+
+import pytest
+
+
+@pytest.mark.xfail(
+ strict=True,
+ reason=(
+ "linkml/linkml#3529: ReferenceValidator.normalize() mangles compact-key "
+ "list input to inlined-dict fields as {None: ...}. The proposed upstream "
+ "fix hoists unambiguous single-key items (and errors on degenerate cases). "
+ "Our test case is unambiguous so it will hoist. When this passes, remove "
+ "the PV-level branch from Transformer._pre_shape_expand_compact_keys."
+ ),
+)
+def test_reference_validator_normalizes_compact_key_pv_list():
+ """Canary: compact-key list for inlined-dict PV field normalizes correctly.
+
+ See: https://github.com/linkml/linkml/issues/3529
+ """
+ from linkml_runtime.processing.referencevalidator import ReferenceValidator
+ from linkml_runtime.utils.introspection import package_schemaview
+
+ normalizer = ReferenceValidator(package_schemaview("linkml_map.datamodel.transformer_model"))
+ normalizer.expand_all = True
+
+ obj = {
+ "enum_derivations": {
+ "Target": {
+ "populated_from": "Source",
+ "permissible_value_derivations": [
+ {"red": {"sources": ["a"]}},
+ ],
+ },
+ },
+ }
+ result = normalizer.normalize(obj)
+ pvs = result["enum_derivations"]["Target"]["permissible_value_derivations"]
+
+ # Expected (post-fix): compact-key name is hoisted as the dict key.
+ assert None not in pvs, f"upstream still produces None-keyed entries: {pvs}"
+ assert "red" in pvs, f"upstream did not hoist compact-key name: {pvs}"
diff --git a/tests/test_validator.py b/tests/test_validator.py
index 21358dcd..de17d1c8 100644
--- a/tests/test_validator.py
+++ b/tests/test_validator.py
@@ -174,6 +174,127 @@ def test_validate_detects_bad_slot_derivation_type():
assert len(_errors(msgs)) > 0
+# ---------------------------------------------------------------------------
+# PermissibleValueDerivation populated_from / sources interaction
+# ---------------------------------------------------------------------------
+
+
+def test_pv_populated_from_list_validates():
+ """List-form populated_from on a PV deriv is structurally valid."""
+ spec = {
+ "enum_derivations": {
+ "Target": {
+ "populated_from": "Source",
+ "permissible_value_derivations": {
+ "red": {"populated_from": ["light_red", "dark_red"]},
+ },
+ },
+ },
+ }
+ msgs = validate_spec(spec)
+ assert _errors(msgs) == []
+
+
+def test_pv_populated_from_scalar_validates():
+ """Scalar populated_from on a PV deriv is accepted (wrapped to list)."""
+ spec = {
+ "enum_derivations": {
+ "Target": {
+ "populated_from": "Source",
+ "permissible_value_derivations": {
+ "red": {"populated_from": "light_red"},
+ },
+ },
+ },
+ }
+ msgs = validate_spec(spec)
+ assert _errors(msgs) == []
+
+
+def test_pv_sources_only_is_deprecation_not_conflict():
+ """Sources-only is a deprecation warning, not a conflict error."""
+ spec = {
+ "enum_derivations": {
+ "Target": {
+ "populated_from": "Source",
+ "permissible_value_derivations": {
+ "red": {"sources": ["light_red", "dark_red"]},
+ },
+ },
+ },
+ }
+ msgs = validate_spec(spec)
+ assert _errors(msgs) == []
+ warnings = _warnings(msgs)
+ assert any("sources" in m.message and "PermissibleValueDerivation" in m.path for m in warnings)
+
+
+def test_pv_sources_and_populated_from_both_set_errors():
+ """Setting both sources and populated_from on a PV deriv is an error."""
+ spec = {
+ "enum_derivations": {
+ "Target": {
+ "populated_from": "Source",
+ "permissible_value_derivations": {
+ "red": {
+ "populated_from": ["light_red"],
+ "sources": ["dark_red"],
+ },
+ },
+ },
+ },
+ }
+ msgs = validate_spec(spec)
+ errors = _errors(msgs)
+ assert any("both 'populated_from' and 'sources'" in m.message for m in errors)
+ assert any("'red'" in m.message for m in errors)
+
+
+def test_pv_sources_in_compact_key_list_form_detected():
+ """Compact-key list-form PV derivs (`[{name: {sources: [...]}}]`) are scanned.
+
+ The SHAPE phase expands compact-key list items before the SCAN runs, so
+ the deprecation warning fires end-to-end through ``validate_spec``.
+ """
+ spec = {
+ "enum_derivations": {
+ "Target": {
+ "populated_from": "Source",
+ "permissible_value_derivations": [
+ {"red": {"sources": ["light_red", "dark_red"]}},
+ ],
+ },
+ },
+ }
+ msgs = validate_spec(spec)
+ assert _errors(msgs) == []
+ warnings = _warnings(msgs)
+ assert any("sources" in m.message and "PermissibleValueDerivation" in m.path for m in warnings), (
+ f"Expected deprecation warning for compact-key list PV; got {warnings}"
+ )
+
+
+def test_pv_populated_from_explicit_none_is_stripped():
+ """`populated_from: null` (explicit YAML None) is dropped during normalize."""
+ raw = {
+ "enum_derivations": {
+ "Target": {
+ "populated_from": "Source",
+ "permissible_value_derivations": {
+ "red": {"populated_from": None},
+ },
+ },
+ },
+ }
+ normalized = normalize_spec_dict(raw)
+ # After normalize, the PV's populated_from key has been removed; pydantic
+ # will fill in the default factory ([]) when the model is built.
+ eds = normalized["enum_derivations"]
+ pvds = eds["Target"]["permissible_value_derivations"]
+ pvd = pvds["red"] if isinstance(pvds, dict) else next(iter(pvds))
+ assert "populated_from" not in pvd
+
+
# ---------------------------------------------------------------------------
# File-level validation against real trans-specs (structural only)
# ---------------------------------------------------------------------------