Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/linkml_map/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,16 @@ def main(verbose: int, quiet: bool) -> None:
show_default=True,
help="Allow unrestricted eval of python expressions.",
)
@click.option(
"--strict/--no-strict",
default=False,
show_default=True,
help=(
"Raise an error when an expression references a name that is not a slot "
"on the source class. Recommended for real data transforms; the default "
"warns and returns None for backward compatibility."
),
)
@click.option(
"--output-format",
"-f",
Expand Down
52 changes: 49 additions & 3 deletions src/linkml_map/transformer/object_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import logging
from collections.abc import Iterator, Mapping
from contextlib import contextmanager
from dataclasses import dataclass
from dataclasses import dataclass, field
from typing import Any

import yaml
Expand Down Expand Up @@ -126,6 +126,26 @@ def __init__( # noqa: PLR0913
self.class_deriv: ClassDerivation | None = class_deriv
if bindings:
self.bindings.update(bindings)
self._schema_slots: set[str] = self._collect_schema_slots()

def _collect_schema_slots(self) -> set[str]:
"""Slot names declared on the source class.

Used by ``__getitem__`` to distinguish a schema-declared slot
absent from the current row (legitimate SQL null) from a name
that does not exist in the schema (typo / stale reference).

Explicit-join aliases are not added here — they are already
handled earlier in ``__getitem__`` via ``_join_specs``. Slots on
join-target classes are intentionally excluded so that a bare
reference like ``{score}`` (instead of ``{Reading.score}``)
surfaces as a typo in strict mode for explicit-join specs.
Implicit-join column resolution is a separate concern handled
by the runtime merge in ``_derive_nested_objects`` (#217).
"""
if self.source_type and self.source_type in self.sv.all_classes():
return {s.name for s in self.sv.class_induced_slots(self.source_type)}
return set()

@classmethod
def from_context(cls, transformer: ObjectTransformer, context: DerivationContext) -> Bindings:
Expand Down Expand Up @@ -203,8 +223,16 @@ def __getitem__(self, name: Any) -> Any:
elif name == self.source_type and name not in self.source_obj:
# Self-reference: {Reading.score} when source_type is Reading (non-merge context)
self.bindings[name] = DynObj(**self.source_obj)
else:
elif name in self.source_obj:
_ = self.get_ctxt_obj_and_dict({name: self.source_obj[name]})
elif name in self._schema_slots:
# Schema-declared slot absent from this row → SQL null.
# Distinct from a name that is not in the schema at all,
# which falls through and raises KeyError so simpleeval
# surfaces it as NameNotDefined (handled in strict mode).
return None
else:
raise KeyError(name)

return self.bindings.get(name)

Expand Down Expand Up @@ -243,6 +271,18 @@ class ObjectTransformer(Transformer):
object_index: ObjectIndex = None
lookup_index: Any = None # Optional[LookupIndex] — lazy import to avoid hard duckdb dep

_warned_unbound_names: set[str] = field(default_factory=set, repr=False)
"""Names already warned about in non-strict mode.

Shared across all expression evaluations on this transformer
instance — each unique unbound reference logs once, not once per
row. The set is never cleared automatically, so a transformer
reused across multiple logical runs will not re-warn for the same
typo. This is the intended behavior: if a spec has a typo, one
warning per typo across the lifetime of the transformer is enough.
Construct a fresh ``ObjectTransformer`` if you want a clean slate.
"""

def index(self, source_obj: Any, target: str | None = None) -> None:
"""
Create an index over a container object.
Expand Down Expand Up @@ -510,7 +550,13 @@ def _eval_expr(
(e.g. ``slot`` for referencing previously derived target slots).
"""
try:
return eval_expr_with_mapping(expr, bindings, functions=functions)
return eval_expr_with_mapping(
expr,
bindings,
functions=functions,
strict=self.strict,
warned_unbound=self._warned_unbound_names,
)
except (InvalidExpression, TypeError, ValueError):
if not self.unrestricted_eval:
raise
Expand Down
12 changes: 12 additions & 0 deletions src/linkml_map/transformer/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,18 @@ class Transformer(ABC):
unrestricted_eval: bool = field(default=False)
"""Set to True to allow arbitrary evals as part of transformation."""

strict: bool = field(default=False)
"""Raise on expression references that do not resolve to a schema slot.

When ``False`` (the default), unresolved names emit a warning and the
expression evaluator returns ``None`` (preserving SQL-style null
propagation for legitimate empty values but losing the signal for
typos). When ``True``, unresolved names raise
:class:`~linkml_map.transformer.errors.TransformationError`, which
surfaces typos, stale references, and wrong-table accesses that
would otherwise produce silent nulls in the output.
"""

_curie_converter: Converter = None

def map_object(self, obj: OBJECT_TYPE, source_type: str | None = None, **kwargs: Any) -> OBJECT_TYPE:
Expand Down
73 changes: 65 additions & 8 deletions src/linkml_map/utils/eval_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,18 @@ class LinkMLEvaluator(EvalWithCompoundTypes):
- Numeric-string coercion for comparison operators
"""

def __init__(self, **kwargs: Any) -> None: # noqa: ANN401
def __init__(
self,
*,
strict: bool = False,
warned_unbound: set[str] | None = None,
**kwargs: Any, # noqa: ANN401
) -> None:
self.strict: bool = strict
# Shared across rows when the caller passes a set in. Per-evaluator
# default dedupes within a single expression eval (mostly a no-op
# since a given name appears once per expr, but cheap and safe).
self._warned_unbound: set[str] = warned_unbound if warned_unbound is not None else set()
super().__init__(**kwargs)
for op_type in (ast.Eq, ast.NotEq, ast.Lt, ast.LtE, ast.Gt, ast.GtE):
self.operators[op_type] = _coercing(self.operators[op_type])
Expand All @@ -436,14 +447,39 @@ def __init__(self, **kwargs: Any) -> None: # noqa: ANN401

def _eval_name(self, node: ast.Name) -> Any: # noqa: ANN401
"""
Override name resolution to return None for unbound variables.

The default simpleeval behavior raises ``NameNotDefined``.
In LinkML expressions, unbound variables resolve to None.
Override name resolution to surface unbound variables in strict mode.

The default simpleeval behavior raises ``NameNotDefined`` when a
name has no binding. With ``strict=True``, this is re-raised as
a built-in :class:`NameError` so callers in the transformer can
wrap it with derivation context (see
``ObjectTransformer._slot_error_context``). With ``strict=False``
it emits a warning and returns ``None`` (preserving SQL-style
null propagation for backward compatibility, while losing the
signal for typos and stale references).

``NameError`` is deliberately chosen over ``TransformationError``
here: ``_slot_error_context`` only enriches non-``TransformationError``
exceptions with class/slot/row context. ``NameError`` is also
outside the ``(InvalidExpression, TypeError, ValueError)`` catch
list in ``ObjectTransformer._eval_expr``, so it propagates
cleanly past the ``unrestricted_eval`` fallback rather than
being swallowed.

Note: names that resolve to ``None`` (e.g. a schema-declared slot
absent from the current row) never reach this path — they
succeed via ``super()._eval_name`` and propagate ``None`` as
legitimate SQL null.
"""
try:
return super()._eval_name(node)
except NameNotDefined:
msg = f"Expression references '{node.id}' which is not a slot on the source class. Typo or stale reference?"
if self.strict:
raise NameError(msg) from None
if node.id not in self._warned_unbound:
self._warned_unbound.add(node.id)
logger.warning("%s Returning None (run in strict mode to surface as an error).", msg)
Comment thread
amc-corey-cox marked this conversation as resolved.
return None

def _eval_set(self, node: ast.Set) -> Any: # noqa: ANN401
Expand Down Expand Up @@ -503,15 +539,29 @@ def _distributed_getattr(obj: Any, attr: str) -> Any: # noqa: ANN401
return getattr(obj, attr)


def _make_evaluator(names: Any, functions: Any = None) -> LinkMLEvaluator: # noqa: ANN401
def _make_evaluator(
names: Any, # noqa: ANN401
functions: Any = None, # noqa: ANN401
*,
strict: bool = False,
warned_unbound: set[str] | None = None,
) -> LinkMLEvaluator:
"""Create a configured LinkMLEvaluator instance."""
return LinkMLEvaluator(names=names, functions=functions or FUNCTIONS)
return LinkMLEvaluator(
names=names,
functions=functions or FUNCTIONS,
strict=strict,
warned_unbound=warned_unbound,
)


def eval_expr_with_mapping(
expr: str,
mapping: Mapping,
functions: dict[str, Any] | None = None,
*,
strict: bool = False,
warned_unbound: set[str] | None = None,
) -> Any: # noqa: ANN401
"""
Evaluate a given expression, with restricted syntax.
Expand All @@ -523,11 +573,18 @@ def eval_expr_with_mapping(
:param mapping: Variable bindings for the expression.
:param functions: Additional functions to make available. Merged with
built-in functions; caller-provided names take precedence.
:param strict: If ``True``, unbound names raise
:class:`~linkml_map.transformer.errors.TransformationError`
instead of warning and returning ``None``.
:param warned_unbound: Optional set used to dedupe non-strict
warnings across multiple ``eval_expr_with_mapping`` calls. Pass
the same set across all evaluations in a single transform run
so each unique unbound name is logged once, not once per row.
"""
if expr == "None":
return None
merged = {**FUNCTIONS, **functions} if functions else None
evaluator = _make_evaluator(names=mapping, functions=merged)
evaluator = _make_evaluator(names=mapping, functions=merged, strict=strict, warned_unbound=warned_unbound)
return evaluator.eval(expr)


Expand Down
78 changes: 78 additions & 0 deletions tests/test_cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,84 @@ def test_map_data(runner: CliRunner) -> None:
assert tr_data["agents"][0]["label"] == "fred bloggs"


def _write_typo_spec_fixture(tmp_path: Path) -> tuple[Path, Path, Path]:
"""Write a minimal schema + spec (with a typo'd expr) + input row to tmp_path.

Returns (schema_path, spec_path, input_path).
"""
schema_path = tmp_path / "schema.yaml"
schema_path.write_text(
"id: https://example.org/typo-source\n"
"name: typo_source\n"
"prefixes:\n"
" linkml: https://w3id.org/linkml/\n"
"imports:\n"
" - linkml:types\n"
"default_range: string\n"
"classes:\n"
" Person:\n"
" attributes:\n"
" id: {identifier: true}\n"
" score: {range: integer}\n"
)
spec_path = tmp_path / "spec.yaml"
spec_path.write_text(
"source_schema: typo_source\n"
"class_derivations:\n"
" Agent:\n"
" populated_from: Person\n"
" slot_derivations:\n"
" name:\n"
' expr: "{scroe}"\n' # codespell:ignore
)
input_path = tmp_path / "input.yaml"
input_path.write_text("id: p1\nscore: 5\n")
return schema_path, spec_path, input_path


def test_map_data_strict_flag_surfaces_typo(runner: CliRunner, tmp_path: Path) -> None:
"""`--strict` makes an expression typo fail the command instead of silent null."""
schema_path, spec_path, input_path = _write_typo_spec_fixture(tmp_path)
result = runner.invoke(
main,
[
"map-data",
"--strict",
"-T",
str(spec_path),
"-s",
str(schema_path),
"--source-type",
"Person",
str(input_path),
],
)
assert result.exit_code != 0
assert "scroe" in (result.stderr + str(result.exception)) # codespell:ignore


def test_map_data_no_strict_default_succeeds(runner: CliRunner, tmp_path: Path) -> None:
"""Default (non-strict) preserves backward-compat: exits 0, produces empty/null output."""
schema_path, spec_path, input_path = _write_typo_spec_fixture(tmp_path)
result = runner.invoke(
main,
[
"map-data",
"-T",
str(spec_path),
"-s",
str(schema_path),
"--source-type",
"Person",
str(input_path),
],
)
assert result.exit_code == 0
# Null values are omitted by the YAML dumper, so {name: None} round-trips to {}.
out = yaml.safe_load(result.stdout) or {}
assert out.get("name") is None


def test_map_data_norm_denorm(runner: CliRunner) -> None:
cmd = [
"map-data",
Expand Down
Loading
Loading