linkml · amc-corey-cox · May 12, 2026 · May 12, 2026 · May 12, 2026
diff --git a/src/linkml_map/cli/cli.py b/src/linkml_map/cli/cli.py
@@ -86,6 +86,16 @@ def main(verbose: int, quiet: bool) -> None:
     show_default=True,
     help="Allow unrestricted eval of python expressions.",
 )
+@click.option(
+    "--strict/--no-strict",
+    default=False,
+    show_default=True,
+    help=(
+        "Raise an error when an expression references a name that is not a slot "
+        "on the source class. Recommended for real data transforms; the default "
+        "warns and returns None for backward compatibility."
+    ),
+)
 @click.option(
     "--output-format",
     "-f",

diff --git a/src/linkml_map/transformer/object_transformer.py b/src/linkml_map/transformer/object_transformer.py
@@ -6,7 +6,7 @@
 import logging
 from collections.abc import Iterator, Mapping
 from contextlib import contextmanager
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Any
 
 import yaml
@@ -126,6 +126,26 @@ def __init__(  # noqa: PLR0913
         self.class_deriv: ClassDerivation | None = class_deriv
         if bindings:
             self.bindings.update(bindings)
+        self._schema_slots: set[str] = self._collect_schema_slots()
+
+    def _collect_schema_slots(self) -> set[str]:
+        """Slot names declared on the source class.
+
+        Used by ``__getitem__`` to distinguish a schema-declared slot
+        absent from the current row (legitimate SQL null) from a name
+        that does not exist in the schema (typo / stale reference).
+
+        Explicit-join aliases are not added here — they are already
+        handled earlier in ``__getitem__`` via ``_join_specs``. Slots on
+        join-target classes are intentionally excluded so that a bare
+        reference like ``{score}`` (instead of ``{Reading.score}``)
+        surfaces as a typo in strict mode for explicit-join specs.
+        Implicit-join column resolution is a separate concern handled
+        by the runtime merge in ``_derive_nested_objects`` (#217).
+        """
+        if self.source_type and self.source_type in self.sv.all_classes():
+            return {s.name for s in self.sv.class_induced_slots(self.source_type)}
+        return set()
 
     @classmethod
     def from_context(cls, transformer: ObjectTransformer, context: DerivationContext) -> Bindings:
@@ -203,8 +223,16 @@ def __getitem__(self, name: Any) -> Any:
             elif name == self.source_type and name not in self.source_obj:
                 # Self-reference: {Reading.score} when source_type is Reading (non-merge context)
                 self.bindings[name] = DynObj(**self.source_obj)
-            else:
+            elif name in self.source_obj:
                 _ = self.get_ctxt_obj_and_dict({name: self.source_obj[name]})
+            elif name in self._schema_slots:
+                # Schema-declared slot absent from this row → SQL null.
+                # Distinct from a name that is not in the schema at all,
+                # which falls through and raises KeyError so simpleeval
+                # surfaces it as NameNotDefined (handled in strict mode).
+                return None
+            else:
+                raise KeyError(name)
 
         return self.bindings.get(name)
 
@@ -243,6 +271,18 @@ class ObjectTransformer(Transformer):
     object_index: ObjectIndex = None
     lookup_index: Any = None  # Optional[LookupIndex] — lazy import to avoid hard duckdb dep
 
+    _warned_unbound_names: set[str] = field(default_factory=set, repr=False)
+    """Names already warned about in non-strict mode.
+
+    Shared across all expression evaluations on this transformer
+    instance — each unique unbound reference logs once, not once per
+    row. The set is never cleared automatically, so a transformer
+    reused across multiple logical runs will not re-warn for the same
+    typo. This is the intended behavior: if a spec has a typo, one
+    warning per typo across the lifetime of the transformer is enough.
+    Construct a fresh ``ObjectTransformer`` if you want a clean slate.
+    """
+
     def index(self, source_obj: Any, target: str | None = None) -> None:
         """
         Create an index over a container object.
@@ -510,7 +550,13 @@ def _eval_expr(
             (e.g. ``slot`` for referencing previously derived target slots).
         """
         try:
-            return eval_expr_with_mapping(expr, bindings, functions=functions)
+            return eval_expr_with_mapping(
+                expr,
+                bindings,
+                functions=functions,
+                strict=self.strict,
+                warned_unbound=self._warned_unbound_names,
+            )
         except (InvalidExpression, TypeError, ValueError):
             if not self.unrestricted_eval:
                 raise

diff --git a/src/linkml_map/transformer/transformer.py b/src/linkml_map/transformer/transformer.py
@@ -79,6 +79,18 @@ class Transformer(ABC):
     unrestricted_eval: bool = field(default=False)
     """Set to True to allow arbitrary evals as part of transformation."""
 
+    strict: bool = field(default=False)
+    """Raise on expression references that do not resolve to a schema slot.
+
+    When ``False`` (the default), unresolved names emit a warning and the
+    expression evaluator returns ``None`` (preserving SQL-style null
+    propagation for legitimate empty values but losing the signal for
+    typos). When ``True``, unresolved names raise
+    :class:`~linkml_map.transformer.errors.TransformationError`, which
+    surfaces typos, stale references, and wrong-table accesses that
+    would otherwise produce silent nulls in the output.
+    """
+
     _curie_converter: Converter = None
 
     def map_object(self, obj: OBJECT_TYPE, source_type: str | None = None, **kwargs: Any) -> OBJECT_TYPE:

diff --git a/src/linkml_map/utils/eval_utils.py b/src/linkml_map/utils/eval_utils.py
@@ -410,7 +410,18 @@ class LinkMLEvaluator(EvalWithCompoundTypes):
     - Numeric-string coercion for comparison operators
     """
 
-    def __init__(self, **kwargs: Any) -> None:  # noqa: ANN401
+    def __init__(
+        self,
+        *,
+        strict: bool = False,
+        warned_unbound: set[str] | None = None,
+        **kwargs: Any,  # noqa: ANN401
+    ) -> None:
+        self.strict: bool = strict
+        # Shared across rows when the caller passes a set in. Per-evaluator
+        # default dedupes within a single expression eval (mostly a no-op
+        # since a given name appears once per expr, but cheap and safe).
+        self._warned_unbound: set[str] = warned_unbound if warned_unbound is not None else set()
         super().__init__(**kwargs)
         for op_type in (ast.Eq, ast.NotEq, ast.Lt, ast.LtE, ast.Gt, ast.GtE):
             self.operators[op_type] = _coercing(self.operators[op_type])
@@ -436,14 +447,39 @@ def __init__(self, **kwargs: Any) -> None:  # noqa: ANN401
 
     def _eval_name(self, node: ast.Name) -> Any:  # noqa: ANN401
         """
-        Override name resolution to return None for unbound variables.
-
-        The default simpleeval behavior raises ``NameNotDefined``.
-        In LinkML expressions, unbound variables resolve to None.
+        Override name resolution to surface unbound variables in strict mode.
+
+        The default simpleeval behavior raises ``NameNotDefined`` when a
+        name has no binding. With ``strict=True``, this is re-raised as
+        a built-in :class:`NameError` so callers in the transformer can
+        wrap it with derivation context (see
+        ``ObjectTransformer._slot_error_context``). With ``strict=False``
+        it emits a warning and returns ``None`` (preserving SQL-style
+        null propagation for backward compatibility, while losing the
+        signal for typos and stale references).
+
+        ``NameError`` is deliberately chosen over ``TransformationError``
+        here: ``_slot_error_context`` only enriches non-``TransformationError``
+        exceptions with class/slot/row context. ``NameError`` is also
+        outside the ``(InvalidExpression, TypeError, ValueError)`` catch
+        list in ``ObjectTransformer._eval_expr``, so it propagates
+        cleanly past the ``unrestricted_eval`` fallback rather than
+        being swallowed.
+
+        Note: names that resolve to ``None`` (e.g. a schema-declared slot
+        absent from the current row) never reach this path — they
+        succeed via ``super()._eval_name`` and propagate ``None`` as
+        legitimate SQL null.
         """
         try:
             return super()._eval_name(node)
         except NameNotDefined:
+            msg = f"Expression references '{node.id}' which is not a slot on the source class. Typo or stale reference?"
+            if self.strict:
+                raise NameError(msg) from None
+            if node.id not in self._warned_unbound:
+                self._warned_unbound.add(node.id)
+                logger.warning("%s Returning None (run in strict mode to surface as an error).", msg)
             return None
 
     def _eval_set(self, node: ast.Set) -> Any:  # noqa: ANN401
@@ -503,15 +539,29 @@ def _distributed_getattr(obj: Any, attr: str) -> Any:  # noqa: ANN401
     return getattr(obj, attr)
 
 
-def _make_evaluator(names: Any, functions: Any = None) -> LinkMLEvaluator:  # noqa: ANN401
+def _make_evaluator(
+    names: Any,  # noqa: ANN401
+    functions: Any = None,  # noqa: ANN401
+    *,
+    strict: bool = False,
+    warned_unbound: set[str] | None = None,
+) -> LinkMLEvaluator:
     """Create a configured LinkMLEvaluator instance."""
-    return LinkMLEvaluator(names=names, functions=functions or FUNCTIONS)
+    return LinkMLEvaluator(
+        names=names,
+        functions=functions or FUNCTIONS,
+        strict=strict,
+        warned_unbound=warned_unbound,
+    )
 
 
 def eval_expr_with_mapping(
     expr: str,
     mapping: Mapping,
     functions: dict[str, Any] | None = None,
+    *,
+    strict: bool = False,
+    warned_unbound: set[str] | None = None,
 ) -> Any:  # noqa: ANN401
     """
     Evaluate a given expression, with restricted syntax.
@@ -523,11 +573,18 @@ def eval_expr_with_mapping(
     :param mapping: Variable bindings for the expression.
     :param functions: Additional functions to make available. Merged with
         built-in functions; caller-provided names take precedence.
+    :param strict: If ``True``, unbound names raise
+        :class:`~linkml_map.transformer.errors.TransformationError`
+        instead of warning and returning ``None``.
+    :param warned_unbound: Optional set used to dedupe non-strict
+        warnings across multiple ``eval_expr_with_mapping`` calls. Pass
+        the same set across all evaluations in a single transform run
+        so each unique unbound name is logged once, not once per row.
     """
     if expr == "None":
         return None
     merged = {**FUNCTIONS, **functions} if functions else None
-    evaluator = _make_evaluator(names=mapping, functions=merged)
+    evaluator = _make_evaluator(names=mapping, functions=merged, strict=strict, warned_unbound=warned_unbound)
     return evaluator.eval(expr)
 
 

diff --git a/tests/test_cli/test_cli.py b/tests/test_cli/test_cli.py
@@ -164,6 +164,84 @@ def test_map_data(runner: CliRunner) -> None:
     assert tr_data["agents"][0]["label"] == "fred bloggs"
 
 
+def _write_typo_spec_fixture(tmp_path: Path) -> tuple[Path, Path, Path]:
+    """Write a minimal schema + spec (with a typo'd expr) + input row to tmp_path.
+
+    Returns (schema_path, spec_path, input_path).
+    """
+    schema_path = tmp_path / "schema.yaml"
+    schema_path.write_text(
+        "id: https://example.org/typo-source\n"
+        "name: typo_source\n"
+        "prefixes:\n"
+        "  linkml: https://w3id.org/linkml/\n"
+        "imports:\n"
+        "  - linkml:types\n"
+        "default_range: string\n"
+        "classes:\n"
+        "  Person:\n"
+        "    attributes:\n"
+        "      id: {identifier: true}\n"
+        "      score: {range: integer}\n"
+    )
+    spec_path = tmp_path / "spec.yaml"
+    spec_path.write_text(
+        "source_schema: typo_source\n"
+        "class_derivations:\n"
+        "  Agent:\n"
+        "    populated_from: Person\n"
+        "    slot_derivations:\n"
+        "      name:\n"
+        '        expr: "{scroe}"\n'  # codespell:ignore
+    )
+    input_path = tmp_path / "input.yaml"
+    input_path.write_text("id: p1\nscore: 5\n")
+    return schema_path, spec_path, input_path
+
+
+def test_map_data_strict_flag_surfaces_typo(runner: CliRunner, tmp_path: Path) -> None:
+    """`--strict` makes an expression typo fail the command instead of silent null."""
+    schema_path, spec_path, input_path = _write_typo_spec_fixture(tmp_path)
+    result = runner.invoke(
+        main,
+        [
+            "map-data",
+            "--strict",
+            "-T",
+            str(spec_path),
+            "-s",
+            str(schema_path),
+            "--source-type",
+            "Person",
+            str(input_path),
+        ],
+    )
+    assert result.exit_code != 0
+    assert "scroe" in (result.stderr + str(result.exception))  # codespell:ignore
+
+
+def test_map_data_no_strict_default_succeeds(runner: CliRunner, tmp_path: Path) -> None:
+    """Default (non-strict) preserves backward-compat: exits 0, produces empty/null output."""
+    schema_path, spec_path, input_path = _write_typo_spec_fixture(tmp_path)
+    result = runner.invoke(
+        main,
+        [
+            "map-data",
+            "-T",
+            str(spec_path),
+            "-s",
+            str(schema_path),
+            "--source-type",
+            "Person",
+            str(input_path),
+        ],
+    )
+    assert result.exit_code == 0
+    # Null values are omitted by the YAML dumper, so {name: None} round-trips to {}.
+    out = yaml.safe_load(result.stdout) or {}
+    assert out.get("name") is None
+
+
 def test_map_data_norm_denorm(runner: CliRunner) -> None:
     cmd = [
         "map-data",