From d8d766378db5c0fae5a32e1c7068564d8870ceb8 Mon Sep 17 00:00:00 2001 From: amc-corey-cox <69321580+amc-corey-cox@users.noreply.github.com> Date: Tue, 12 May 2026 12:51:36 -0500 Subject: [PATCH 1/2] Add strict mode for expression evaluation (#213) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By default, expression references to names that aren't slots on the source class silently resolve to None, masking typos and stale references. This adds a `strict` flag on the `Transformer` dataclass that surfaces such names as `TransformationError` instead. Non-strict mode keeps the silent-null behavior but logs a warning the first time each unique unbound name is seen. The `Bindings` class now pre-computes the source class's slot set so "slot declared but absent from this row" (legitimate SQL null) is distinguishable from "name not in schema at all" (typo). Both behaviors are preserved in non-strict mode; only the typo case escalates in strict mode. Explicit-join target class slots are intentionally not added to the slot set — those are accessed via the join alias (`{Reading.score}`), not bare references, so a bare `{score}` on a source class without that slot is a typo. Implicit-join column resolution is deferred to the follow-up runtime work for #217. Exposed on the CLI as `map-data --strict/--no-strict` (default off). --- src/linkml_map/cli/cli.py | 10 ++ .../transformer/object_transformer.py | 44 +++++- src/linkml_map/transformer/transformer.py | 12 ++ src/linkml_map/utils/eval_utils.py | 65 +++++++- tests/test_cli/test_cli.py | 78 ++++++++++ .../test_transformer/test_expr_strict_mode.py | 141 ++++++++++++++++++ tests/test_utils/test_eval_utils.py | 39 +++++ 7 files changed, 378 insertions(+), 11 deletions(-) create mode 100644 tests/test_transformer/test_expr_strict_mode.py diff --git a/src/linkml_map/cli/cli.py b/src/linkml_map/cli/cli.py index 45d49b7d..b118c99a 100644 --- a/src/linkml_map/cli/cli.py +++ b/src/linkml_map/cli/cli.py @@ -86,6 +86,16 @@ def main(verbose: int, quiet: bool) -> None: show_default=True, help="Allow unrestricted eval of python expressions.", ) +@click.option( + "--strict/--no-strict", + default=False, + show_default=True, + help=( + "Raise an error when an expression references a name that is not a slot " + "on the source class. Recommended for real data transforms; the default " + "warns and returns None for backward compatibility." + ), +) @click.option( "--output-format", "-f", diff --git a/src/linkml_map/transformer/object_transformer.py b/src/linkml_map/transformer/object_transformer.py index 46c1e9fb..56e8bd8b 100644 --- a/src/linkml_map/transformer/object_transformer.py +++ b/src/linkml_map/transformer/object_transformer.py @@ -6,7 +6,7 @@ import logging from collections.abc import Iterator, Mapping from contextlib import contextmanager -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Any import yaml @@ -126,6 +126,26 @@ def __init__( # noqa: PLR0913 self.class_deriv: ClassDerivation | None = class_deriv if bindings: self.bindings.update(bindings) + self._schema_slots: set[str] = self._collect_schema_slots() + + def _collect_schema_slots(self) -> set[str]: + """Slot names declared on the source class. + + Used by ``__getitem__`` to distinguish a schema-declared slot + absent from the current row (legitimate SQL null) from a name + that does not exist in the schema (typo / stale reference). + + Explicit-join aliases are not added here — they are already + handled earlier in ``__getitem__`` via ``_join_specs``. Slots on + join-target classes are intentionally excluded so that a bare + reference like ``{score}`` (instead of ``{Reading.score}``) + surfaces as a typo in strict mode for explicit-join specs. + Implicit-join column resolution is a separate concern handled + by the runtime merge in ``_derive_nested_objects`` (#217). + """ + if self.source_type and self.source_type in self.sv.all_classes(): + return {s.name for s in self.sv.class_induced_slots(self.source_type)} + return set() @classmethod def from_context(cls, transformer: ObjectTransformer, context: DerivationContext) -> Bindings: @@ -203,8 +223,16 @@ def __getitem__(self, name: Any) -> Any: elif name == self.source_type and name not in self.source_obj: # Self-reference: {Reading.score} when source_type is Reading (non-merge context) self.bindings[name] = DynObj(**self.source_obj) - else: + elif name in self.source_obj: _ = self.get_ctxt_obj_and_dict({name: self.source_obj[name]}) + elif name in self._schema_slots: + # Schema-declared slot absent from this row → SQL null. + # Distinct from a name that is not in the schema at all, + # which falls through and raises KeyError so simpleeval + # surfaces it as NameNotDefined (handled in strict mode). + return None + else: + raise KeyError(name) return self.bindings.get(name) @@ -243,6 +271,10 @@ class ObjectTransformer(Transformer): object_index: ObjectIndex = None lookup_index: Any = None # Optional[LookupIndex] — lazy import to avoid hard duckdb dep + _warned_unbound_names: set[str] = field(default_factory=set, repr=False) + """Names already warned about in non-strict mode. Shared across rows so + each unique unbound reference logs once per transform run, not once per row.""" + def index(self, source_obj: Any, target: str | None = None) -> None: """ Create an index over a container object. @@ -510,7 +542,13 @@ def _eval_expr( (e.g. ``slot`` for referencing previously derived target slots). """ try: - return eval_expr_with_mapping(expr, bindings, functions=functions) + return eval_expr_with_mapping( + expr, + bindings, + functions=functions, + strict=self.strict, + warned_unbound=self._warned_unbound_names, + ) except (InvalidExpression, TypeError, ValueError): if not self.unrestricted_eval: raise diff --git a/src/linkml_map/transformer/transformer.py b/src/linkml_map/transformer/transformer.py index c2eeea92..307d0227 100644 --- a/src/linkml_map/transformer/transformer.py +++ b/src/linkml_map/transformer/transformer.py @@ -79,6 +79,18 @@ class Transformer(ABC): unrestricted_eval: bool = field(default=False) """Set to True to allow arbitrary evals as part of transformation.""" + strict: bool = field(default=False) + """Raise on expression references that do not resolve to a schema slot. + + When ``False`` (the default), unresolved names emit a warning and the + expression evaluator returns ``None`` (preserving SQL-style null + propagation for legitimate empty values but losing the signal for + typos). When ``True``, unresolved names raise + :class:`~linkml_map.transformer.errors.TransformationError`, which + surfaces typos, stale references, and wrong-table accesses that + would otherwise produce silent nulls in the output. + """ + _curie_converter: Converter = None def map_object(self, obj: OBJECT_TYPE, source_type: str | None = None, **kwargs: Any) -> OBJECT_TYPE: diff --git a/src/linkml_map/utils/eval_utils.py b/src/linkml_map/utils/eval_utils.py index f138260c..1efea0a3 100644 --- a/src/linkml_map/utils/eval_utils.py +++ b/src/linkml_map/utils/eval_utils.py @@ -26,6 +26,8 @@ from simpleeval import EvalWithCompoundTypes, NameNotDefined +from linkml_map.transformer.errors import TransformationError + logger = logging.getLogger(__name__) @@ -410,7 +412,18 @@ class LinkMLEvaluator(EvalWithCompoundTypes): - Numeric-string coercion for comparison operators """ - def __init__(self, **kwargs: Any) -> None: # noqa: ANN401 + def __init__( + self, + *, + strict: bool = False, + warned_unbound: set[str] | None = None, + **kwargs: Any, # noqa: ANN401 + ) -> None: + self.strict: bool = strict + # Shared across rows when the caller passes a set in. Per-evaluator + # default dedupes within a single expression eval (mostly a no-op + # since a given name appears once per expr, but cheap and safe). + self._warned_unbound: set[str] = warned_unbound if warned_unbound is not None else set() super().__init__(**kwargs) for op_type in (ast.Eq, ast.NotEq, ast.Lt, ast.LtE, ast.Gt, ast.GtE): self.operators[op_type] = _coercing(self.operators[op_type]) @@ -436,14 +449,29 @@ def __init__(self, **kwargs: Any) -> None: # noqa: ANN401 def _eval_name(self, node: ast.Name) -> Any: # noqa: ANN401 """ - Override name resolution to return None for unbound variables. - - The default simpleeval behavior raises ``NameNotDefined``. - In LinkML expressions, unbound variables resolve to None. + Override name resolution to surface unbound variables in strict mode. + + The default simpleeval behavior raises ``NameNotDefined`` when a + name has no binding. With ``strict=True``, that surfaces as a + :class:`TransformationError`; with ``strict=False`` it emits a + warning and returns ``None`` (preserving SQL-style null + propagation for backward compatibility, while losing the signal + for typos and stale references). + + Note: names that resolve to ``None`` (e.g. a schema-declared slot + absent from the current row) never reach this path — they + succeed via ``super()._eval_name`` and propagate ``None`` as + legitimate SQL null. """ try: return super()._eval_name(node) except NameNotDefined: + msg = f"Expression references '{node.id}' which is not a slot on the source class. Typo or stale reference?" + if self.strict: + raise TransformationError(msg) from None + if node.id not in self._warned_unbound: + self._warned_unbound.add(node.id) + logger.warning("%s Returning None (run in strict mode to surface as an error).", msg) return None def _eval_set(self, node: ast.Set) -> Any: # noqa: ANN401 @@ -503,15 +531,29 @@ def _distributed_getattr(obj: Any, attr: str) -> Any: # noqa: ANN401 return getattr(obj, attr) -def _make_evaluator(names: Any, functions: Any = None) -> LinkMLEvaluator: # noqa: ANN401 +def _make_evaluator( + names: Any, # noqa: ANN401 + functions: Any = None, # noqa: ANN401 + *, + strict: bool = False, + warned_unbound: set[str] | None = None, +) -> LinkMLEvaluator: """Create a configured LinkMLEvaluator instance.""" - return LinkMLEvaluator(names=names, functions=functions or FUNCTIONS) + return LinkMLEvaluator( + names=names, + functions=functions or FUNCTIONS, + strict=strict, + warned_unbound=warned_unbound, + ) def eval_expr_with_mapping( expr: str, mapping: Mapping, functions: dict[str, Any] | None = None, + *, + strict: bool = False, + warned_unbound: set[str] | None = None, ) -> Any: # noqa: ANN401 """ Evaluate a given expression, with restricted syntax. @@ -523,11 +565,18 @@ def eval_expr_with_mapping( :param mapping: Variable bindings for the expression. :param functions: Additional functions to make available. Merged with built-in functions; caller-provided names take precedence. + :param strict: If ``True``, unbound names raise + :class:`~linkml_map.transformer.errors.TransformationError` + instead of warning and returning ``None``. + :param warned_unbound: Optional set used to dedupe non-strict + warnings across multiple ``eval_expr_with_mapping`` calls. Pass + the same set across all evaluations in a single transform run + so each unique unbound name is logged once, not once per row. """ if expr == "None": return None merged = {**FUNCTIONS, **functions} if functions else None - evaluator = _make_evaluator(names=mapping, functions=merged) + evaluator = _make_evaluator(names=mapping, functions=merged, strict=strict, warned_unbound=warned_unbound) return evaluator.eval(expr) diff --git a/tests/test_cli/test_cli.py b/tests/test_cli/test_cli.py index d9126d6b..eff9e208 100644 --- a/tests/test_cli/test_cli.py +++ b/tests/test_cli/test_cli.py @@ -164,6 +164,84 @@ def test_map_data(runner: CliRunner) -> None: assert tr_data["agents"][0]["label"] == "fred bloggs" +def _write_typo_spec_fixture(tmp_path: Path) -> tuple[Path, Path, Path]: + """Write a minimal schema + spec (with a typo'd expr) + input row to tmp_path. + + Returns (schema_path, spec_path, input_path). + """ + schema_path = tmp_path / "schema.yaml" + schema_path.write_text( + "id: https://example.org/typo-source\n" + "name: typo_source\n" + "prefixes:\n" + " linkml: https://w3id.org/linkml/\n" + "imports:\n" + " - linkml:types\n" + "default_range: string\n" + "classes:\n" + " Person:\n" + " attributes:\n" + " id: {identifier: true}\n" + " score: {range: integer}\n" + ) + spec_path = tmp_path / "spec.yaml" + spec_path.write_text( + "source_schema: typo_source\n" + "class_derivations:\n" + " Agent:\n" + " populated_from: Person\n" + " slot_derivations:\n" + " name:\n" + ' expr: "{scroe}"\n' + ) + input_path = tmp_path / "input.yaml" + input_path.write_text("id: p1\nscore: 5\n") + return schema_path, spec_path, input_path + + +def test_map_data_strict_flag_surfaces_typo(runner: CliRunner, tmp_path: Path) -> None: + """`--strict` makes an expression typo fail the command instead of silent null.""" + schema_path, spec_path, input_path = _write_typo_spec_fixture(tmp_path) + result = runner.invoke( + main, + [ + "map-data", + "--strict", + "-T", + str(spec_path), + "-s", + str(schema_path), + "--source-type", + "Person", + str(input_path), + ], + ) + assert result.exit_code != 0 + assert "scroe" in (result.stderr + str(result.exception)) + + +def test_map_data_no_strict_default_succeeds(runner: CliRunner, tmp_path: Path) -> None: + """Default (non-strict) preserves backward-compat: exits 0, produces empty/null output.""" + schema_path, spec_path, input_path = _write_typo_spec_fixture(tmp_path) + result = runner.invoke( + main, + [ + "map-data", + "-T", + str(spec_path), + "-s", + str(schema_path), + "--source-type", + "Person", + str(input_path), + ], + ) + assert result.exit_code == 0 + # Null values are omitted by the YAML dumper, so {name: None} round-trips to {}. + out = yaml.safe_load(result.stdout) or {} + assert out.get("name") is None + + def test_map_data_norm_denorm(runner: CliRunner) -> None: cmd = [ "map-data", diff --git a/tests/test_transformer/test_expr_strict_mode.py b/tests/test_transformer/test_expr_strict_mode.py new file mode 100644 index 00000000..99af41ac --- /dev/null +++ b/tests/test_transformer/test_expr_strict_mode.py @@ -0,0 +1,141 @@ +"""Tests for strict-mode expression evaluation (issue #213). + +In non-strict mode the evaluator preserves SQL-style null propagation but +emits a warning when an expression references a name that is not a slot +on the source class (or any explicit join target). In strict mode the +same condition raises ``TransformationError``. + +A schema-declared slot that is absent from the current row must still +resolve to ``None`` in both modes — that case is a real SQL null, not +a typo. +""" + +import logging + +import pytest +import yaml +from linkml.utils.schema_builder import SchemaBuilder +from linkml_runtime import SchemaView + +from linkml_map.transformer.errors import TransformationError +from linkml_map.transformer.object_transformer import ObjectTransformer + + +def _build_schemas() -> tuple: + """Source has Person(id, age, score); target has Agent(name).""" + sb_source = SchemaBuilder() + sb_source.add_slot("id", range="string") + sb_source.add_slot("age", range="integer") + sb_source.add_slot("score", range="integer") + sb_source.add_class("Person", slots=["id", "age", "score"]) + sb_source.add_defaults() + + sb_target = SchemaBuilder() + sb_target.add_slot("name", range="string") + sb_target.add_class("Agent", slots=["name"]) + sb_target.add_defaults() + + return sb_source.schema, sb_target.schema + + +def _make_transformer(transform_spec: dict, *, strict: bool) -> ObjectTransformer: + """Build a transformer with the given strict flag and inline spec.""" + source_schema, target_schema = _build_schemas() + transformer = ObjectTransformer(strict=strict) + transformer.source_schemaview = SchemaView(source_schema) + transformer.target_schemaview = SchemaView(target_schema) + transformer.create_transformer_specification(transform_spec) + return transformer + + +TYPO_SPEC = yaml.safe_load(""" +class_derivations: + Agent: + populated_from: Person + slot_derivations: + name: + expr: "{scroe}" +""") + + +VALID_SPEC = yaml.safe_load(""" +class_derivations: + Agent: + populated_from: Person + slot_derivations: + name: + expr: "str({score})" +""") + + +def test_typo_strict_raises() -> None: + """Strict mode raises TransformationError when an expression references a non-slot.""" + transformer = _make_transformer(TYPO_SPEC, strict=True) + with pytest.raises(TransformationError, match="scroe"): + transformer.map_object({"id": "p1", "age": 30, "score": 5}, source_type="Person") + + +def test_typo_non_strict_warns_and_returns_none(caplog: pytest.LogCaptureFixture) -> None: + """Non-strict mode keeps the existing return-None behavior but logs a warning.""" + transformer = _make_transformer(TYPO_SPEC, strict=False) + with caplog.at_level(logging.WARNING, logger="linkml_map.utils.eval_utils"): + result = transformer.map_object({"id": "p1", "age": 30, "score": 5}, source_type="Person") + assert result == {"name": None} + assert any("scroe" in rec.message for rec in caplog.records) + + +@pytest.mark.parametrize("strict", [True, False]) +def test_schema_slot_with_null_value_resolves_to_none(strict: bool) -> None: + """Slot is in the schema, value is None on this row → expression resolves to None.""" + transformer = _make_transformer(VALID_SPEC, strict=strict) + result = transformer.map_object({"id": "p1", "age": 30, "score": None}, source_type="Person") + assert result == {"name": None} + + +@pytest.mark.parametrize("strict", [True, False]) +def test_schema_slot_absent_from_row_resolves_to_none(strict: bool) -> None: + """Slot is declared on the source class but missing from this row → None (SQL null). + + This is the key distinction the fix introduces: a schema-declared + slot absent from the row is a legitimate null, while a name that + is not a slot at all is a typo. Both must behave correctly in + strict mode. + """ + transformer = _make_transformer(VALID_SPEC, strict=strict) + result = transformer.map_object({"id": "p1", "age": 30}, source_type="Person") + assert result == {"name": None} + + +def test_valid_slot_resolves_in_strict_mode() -> None: + """A valid bare-name reference works in strict mode.""" + transformer = _make_transformer(VALID_SPEC, strict=True) + result = transformer.map_object({"id": "p1", "age": 30, "score": 5}, source_type="Person") + assert result == {"name": "5"} + + +def test_default_strict_is_false() -> None: + """Default behavior is non-strict to preserve backward compatibility.""" + transformer = ObjectTransformer() + assert transformer.strict is False + + +def test_non_strict_warning_dedupes_across_rows(caplog: pytest.LogCaptureFixture) -> None: + """Each unique unbound name should warn once per transform run, not once per row. + + Without dedup, a typo in a spec used against a 10M-row dataset + produces 10M log lines for one underlying problem. + """ + transformer = _make_transformer(TYPO_SPEC, strict=False) + rows = [ + {"id": "p1", "age": 30, "score": 5}, + {"id": "p2", "age": 31, "score": 6}, + {"id": "p3", "age": 32, "score": 7}, + ] + with caplog.at_level(logging.WARNING, logger="linkml_map.utils.eval_utils"): + for row in rows: + transformer.map_object(row, source_type="Person") + + scroe_warnings = [rec for rec in caplog.records if "scroe" in rec.message] + assert len(scroe_warnings) == 1, ( + f"Expected one warning for the 'scroe' typo across {len(rows)} rows, got {len(scroe_warnings)}" + ) diff --git a/tests/test_utils/test_eval_utils.py b/tests/test_utils/test_eval_utils.py index e0fc7b69..079e592e 100644 --- a/tests/test_utils/test_eval_utils.py +++ b/tests/test_utils/test_eval_utils.py @@ -641,6 +641,45 @@ def test_eval_expr_with_mapping_none_literal() -> None: assert eval_expr_with_mapping("None", {}) is None +# ---- Strict mode for unbound names (issue #213) ---- + + +def test_eval_unbound_name_default_returns_none() -> None: + """Non-strict (default) returns None for unbound names, preserving SQL-null semantics.""" + from linkml_map.utils.eval_utils import eval_expr_with_mapping + + assert eval_expr_with_mapping("{scroe}", {"score": 5}) is None + + +def test_eval_unbound_name_default_logs_warning(caplog: pytest.LogCaptureFixture) -> None: + """Non-strict emits a warning so the unbound reference is not entirely silent.""" + import logging + + from linkml_map.utils.eval_utils import eval_expr_with_mapping + + with caplog.at_level(logging.WARNING, logger="linkml_map.utils.eval_utils"): + result = eval_expr_with_mapping("{scroe}", {"score": 5}) + + assert result is None + assert any("scroe" in rec.message for rec in caplog.records) + + +def test_eval_unbound_name_strict_raises() -> None: + """Strict surfaces unbound names as TransformationError with the offending name.""" + from linkml_map.transformer.errors import TransformationError + from linkml_map.utils.eval_utils import eval_expr_with_mapping + + with pytest.raises(TransformationError, match="scroe"): + eval_expr_with_mapping("{scroe}", {"score": 5}, strict=True) + + +def test_eval_bound_name_with_none_value_returns_none_in_strict() -> None: + """A name that is bound to None resolves to None even in strict mode (real SQL null).""" + from linkml_map.utils.eval_utils import eval_expr_with_mapping + + assert eval_expr_with_mapping("{score}", {"score": None}, strict=True) is None + + # ---- Distribution edge cases ---- From efb03340243e519389b8dbc1ab2905339438e972 Mon Sep 17 00:00:00 2001 From: amc-corey-cox <69321580+amc-corey-cox@users.noreply.github.com> Date: Tue, 12 May 2026 14:16:28 -0500 Subject: [PATCH 2/2] Address PR review feedback - _eval_name raises NameError (not TransformationError) in strict mode so ObjectTransformer._slot_error_context can enrich it with class / slot / source-row context. NameError is also outside the (InvalidExpression, TypeError, ValueError) catch list in _eval_expr, so it propagates past the unrestricted_eval fallback rather than being swallowed. - Update _warned_unbound_names docstring to match actual lifetime (per transformer instance, not per "run"), and document that this is intentional rather than a bug. - Drop "(or any explicit join target)" from the test module docstring; joined-class slots are intentionally excluded from bare-name resolution. - Inline codespell:ignore directives on lines using the deliberate 'scroe' typo (kept inline rather than added to repo-wide ignore-words-list). --- .../transformer/object_transformer.py | 12 +++++- src/linkml_map/utils/eval_utils.py | 24 ++++++++---- tests/test_cli/test_cli.py | 4 +- .../test_transformer/test_expr_strict_mode.py | 39 +++++++++++-------- tests/test_utils/test_eval_utils.py | 18 +++++---- 5 files changed, 62 insertions(+), 35 deletions(-) diff --git a/src/linkml_map/transformer/object_transformer.py b/src/linkml_map/transformer/object_transformer.py index 56e8bd8b..665ecc6a 100644 --- a/src/linkml_map/transformer/object_transformer.py +++ b/src/linkml_map/transformer/object_transformer.py @@ -272,8 +272,16 @@ class ObjectTransformer(Transformer): lookup_index: Any = None # Optional[LookupIndex] — lazy import to avoid hard duckdb dep _warned_unbound_names: set[str] = field(default_factory=set, repr=False) - """Names already warned about in non-strict mode. Shared across rows so - each unique unbound reference logs once per transform run, not once per row.""" + """Names already warned about in non-strict mode. + + Shared across all expression evaluations on this transformer + instance — each unique unbound reference logs once, not once per + row. The set is never cleared automatically, so a transformer + reused across multiple logical runs will not re-warn for the same + typo. This is the intended behavior: if a spec has a typo, one + warning per typo across the lifetime of the transformer is enough. + Construct a fresh ``ObjectTransformer`` if you want a clean slate. + """ def index(self, source_obj: Any, target: str | None = None) -> None: """ diff --git a/src/linkml_map/utils/eval_utils.py b/src/linkml_map/utils/eval_utils.py index 1efea0a3..52c60139 100644 --- a/src/linkml_map/utils/eval_utils.py +++ b/src/linkml_map/utils/eval_utils.py @@ -26,8 +26,6 @@ from simpleeval import EvalWithCompoundTypes, NameNotDefined -from linkml_map.transformer.errors import TransformationError - logger = logging.getLogger(__name__) @@ -452,11 +450,21 @@ def _eval_name(self, node: ast.Name) -> Any: # noqa: ANN401 Override name resolution to surface unbound variables in strict mode. The default simpleeval behavior raises ``NameNotDefined`` when a - name has no binding. With ``strict=True``, that surfaces as a - :class:`TransformationError`; with ``strict=False`` it emits a - warning and returns ``None`` (preserving SQL-style null - propagation for backward compatibility, while losing the signal - for typos and stale references). + name has no binding. With ``strict=True``, this is re-raised as + a built-in :class:`NameError` so callers in the transformer can + wrap it with derivation context (see + ``ObjectTransformer._slot_error_context``). With ``strict=False`` + it emits a warning and returns ``None`` (preserving SQL-style + null propagation for backward compatibility, while losing the + signal for typos and stale references). + + ``NameError`` is deliberately chosen over ``TransformationError`` + here: ``_slot_error_context`` only enriches non-``TransformationError`` + exceptions with class/slot/row context. ``NameError`` is also + outside the ``(InvalidExpression, TypeError, ValueError)`` catch + list in ``ObjectTransformer._eval_expr``, so it propagates + cleanly past the ``unrestricted_eval`` fallback rather than + being swallowed. Note: names that resolve to ``None`` (e.g. a schema-declared slot absent from the current row) never reach this path — they @@ -468,7 +476,7 @@ def _eval_name(self, node: ast.Name) -> Any: # noqa: ANN401 except NameNotDefined: msg = f"Expression references '{node.id}' which is not a slot on the source class. Typo or stale reference?" if self.strict: - raise TransformationError(msg) from None + raise NameError(msg) from None if node.id not in self._warned_unbound: self._warned_unbound.add(node.id) logger.warning("%s Returning None (run in strict mode to surface as an error).", msg) diff --git a/tests/test_cli/test_cli.py b/tests/test_cli/test_cli.py index eff9e208..879dc0bb 100644 --- a/tests/test_cli/test_cli.py +++ b/tests/test_cli/test_cli.py @@ -192,7 +192,7 @@ def _write_typo_spec_fixture(tmp_path: Path) -> tuple[Path, Path, Path]: " populated_from: Person\n" " slot_derivations:\n" " name:\n" - ' expr: "{scroe}"\n' + ' expr: "{scroe}"\n' # codespell:ignore ) input_path = tmp_path / "input.yaml" input_path.write_text("id: p1\nscore: 5\n") @@ -217,7 +217,7 @@ def test_map_data_strict_flag_surfaces_typo(runner: CliRunner, tmp_path: Path) - ], ) assert result.exit_code != 0 - assert "scroe" in (result.stderr + str(result.exception)) + assert "scroe" in (result.stderr + str(result.exception)) # codespell:ignore def test_map_data_no_strict_default_succeeds(runner: CliRunner, tmp_path: Path) -> None: diff --git a/tests/test_transformer/test_expr_strict_mode.py b/tests/test_transformer/test_expr_strict_mode.py index 99af41ac..c8d08cc2 100644 --- a/tests/test_transformer/test_expr_strict_mode.py +++ b/tests/test_transformer/test_expr_strict_mode.py @@ -2,8 +2,8 @@ In non-strict mode the evaluator preserves SQL-style null propagation but emits a warning when an expression references a name that is not a slot -on the source class (or any explicit join target). In strict mode the -same condition raises ``TransformationError``. +on the source class. In strict mode the same condition raises +``TransformationError``. A schema-declared slot that is absent from the current row must still resolve to ``None`` in both modes — that case is a real SQL null, not @@ -48,14 +48,14 @@ def _make_transformer(transform_spec: dict, *, strict: bool) -> ObjectTransforme return transformer -TYPO_SPEC = yaml.safe_load(""" -class_derivations: - Agent: - populated_from: Person - slot_derivations: - name: - expr: "{scroe}" -""") +TYPO_SPEC = yaml.safe_load( + "class_derivations:\n" + " Agent:\n" + " populated_from: Person\n" + " slot_derivations:\n" + " name:\n" + ' expr: "{scroe}"\n' # codespell:ignore +) VALID_SPEC = yaml.safe_load(""" @@ -69,10 +69,17 @@ def _make_transformer(transform_spec: dict, *, strict: bool) -> ObjectTransforme def test_typo_strict_raises() -> None: - """Strict mode raises TransformationError when an expression references a non-slot.""" + """Strict mode raises TransformationError when an expression references a non-slot. + + The raw ``NameError`` from ``_eval_name`` is wrapped by + ``_slot_error_context`` so the surfaced exception carries class / + slot / source-row context for diagnostics. + """ transformer = _make_transformer(TYPO_SPEC, strict=True) - with pytest.raises(TransformationError, match="scroe"): + with pytest.raises(TransformationError, match="scroe") as exc_info: # codespell:ignore transformer.map_object({"id": "p1", "age": 30, "score": 5}, source_type="Person") + assert exc_info.value.class_derivation_name == "Agent" + assert exc_info.value.slot_derivation_name == "name" def test_typo_non_strict_warns_and_returns_none(caplog: pytest.LogCaptureFixture) -> None: @@ -81,7 +88,7 @@ def test_typo_non_strict_warns_and_returns_none(caplog: pytest.LogCaptureFixture with caplog.at_level(logging.WARNING, logger="linkml_map.utils.eval_utils"): result = transformer.map_object({"id": "p1", "age": 30, "score": 5}, source_type="Person") assert result == {"name": None} - assert any("scroe" in rec.message for rec in caplog.records) + assert any("scroe" in rec.message for rec in caplog.records) # codespell:ignore @pytest.mark.parametrize("strict", [True, False]) @@ -135,7 +142,7 @@ def test_non_strict_warning_dedupes_across_rows(caplog: pytest.LogCaptureFixture for row in rows: transformer.map_object(row, source_type="Person") - scroe_warnings = [rec for rec in caplog.records if "scroe" in rec.message] - assert len(scroe_warnings) == 1, ( - f"Expected one warning for the 'scroe' typo across {len(rows)} rows, got {len(scroe_warnings)}" + typo_warnings = [rec for rec in caplog.records if "scroe" in rec.message] # codespell:ignore + assert len(typo_warnings) == 1, ( + f"Expected one warning for the typo across {len(rows)} rows, got {len(typo_warnings)}" ) diff --git a/tests/test_utils/test_eval_utils.py b/tests/test_utils/test_eval_utils.py index 079e592e..7ae53dcb 100644 --- a/tests/test_utils/test_eval_utils.py +++ b/tests/test_utils/test_eval_utils.py @@ -648,7 +648,7 @@ def test_eval_unbound_name_default_returns_none() -> None: """Non-strict (default) returns None for unbound names, preserving SQL-null semantics.""" from linkml_map.utils.eval_utils import eval_expr_with_mapping - assert eval_expr_with_mapping("{scroe}", {"score": 5}) is None + assert eval_expr_with_mapping("{scroe}", {"score": 5}) is None # codespell:ignore def test_eval_unbound_name_default_logs_warning(caplog: pytest.LogCaptureFixture) -> None: @@ -658,19 +658,23 @@ def test_eval_unbound_name_default_logs_warning(caplog: pytest.LogCaptureFixture from linkml_map.utils.eval_utils import eval_expr_with_mapping with caplog.at_level(logging.WARNING, logger="linkml_map.utils.eval_utils"): - result = eval_expr_with_mapping("{scroe}", {"score": 5}) + result = eval_expr_with_mapping("{scroe}", {"score": 5}) # codespell:ignore assert result is None - assert any("scroe" in rec.message for rec in caplog.records) + assert any("scroe" in rec.message for rec in caplog.records) # codespell:ignore def test_eval_unbound_name_strict_raises() -> None: - """Strict surfaces unbound names as TransformationError with the offending name.""" - from linkml_map.transformer.errors import TransformationError + """Strict surfaces unbound names as NameError with the offending name. + + The transformer wraps this as ``TransformationError`` with derivation + context via ``_slot_error_context``; at the bare-evaluator level it + is the built-in ``NameError``. + """ from linkml_map.utils.eval_utils import eval_expr_with_mapping - with pytest.raises(TransformationError, match="scroe"): - eval_expr_with_mapping("{scroe}", {"score": 5}, strict=True) + with pytest.raises(NameError, match="scroe"): # codespell:ignore + eval_expr_with_mapping("{scroe}", {"score": 5}, strict=True) # codespell:ignore def test_eval_bound_name_with_none_value_returns_none_in_strict() -> None: