From 00c752ef8b63ff70c0db462478c710e6ce12cddc Mon Sep 17 00:00:00 2001
From: amc-corey-cox <69321580+amc-corey-cox@users.noreply.github.com>
Date: Wed, 4 Mar 2026 07:57:09 -0600
Subject: [PATCH 1/6] Add cross-table lookup support for join-based
 transformations

Implement spec-driven cross-table lookups using DuckDB, enabling
slot derivations to reference columns from secondary tables via
`{table.column}` syntax in expressions. This supports biomedical
data harmonization use cases (e.g., pulling demographics into
measurement observations from different source tables).

- Extend AliasedClass with source_key, lookup_key, join_on fields
- Create DuckDB-backed LookupIndex for fast keyed row lookups
- Fix _eval_set to accept {obj.attr} null-propagation syntax
- Wire cross-table resolution into Bindings via join_specs
- Add get_path() to DataLoader for file path resolution
- Create transform_spec() engine for spec-driven processing
- Add 18 tests (unit + integration) covering the full stack

Closes #134

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/linkml_map/datamodel/transformer_model.py |   5 +-
 .../datamodel/transformer_model.yaml          |   8 +-
 src/linkml_map/loaders/data_loaders.py        |  14 +
 src/linkml_map/transformer/engine.py          |  80 ++++
 .../transformer/object_transformer.py         |  26 +-
 src/linkml_map/utils/eval_utils.py            |   5 +-
 src/linkml_map/utils/lookup_index.py          |  89 +++++
 .../test_cross_table_lookup.py                | 347 ++++++++++++++++++
 tests/test_utils/test_eval_utils.py           |  32 ++
 tests/test_utils/test_lookup_index.py         |  84 +++++
 10 files changed, 685 insertions(+), 5 deletions(-)
 create mode 100644 src/linkml_map/transformer/engine.py
 create mode 100644 src/linkml_map/utils/lookup_index.py
 create mode 100644 tests/test_transformer/test_cross_table_lookup.py
 create mode 100644 tests/test_utils/test_lookup_index.py

diff --git a/src/linkml_map/datamodel/transformer_model.py b/src/linkml_map/datamodel/transformer_model.py
index 9c50f72..0f20561 100644
--- a/src/linkml_map/datamodel/transformer_model.py
+++ b/src/linkml_map/datamodel/transformer_model.py
@@ -263,7 +263,7 @@ class ClassDerivation(ElementDerivation):
                        'EnumDerivation',
                        'PermissibleValueDerivation']} })
     joins: Optional[Dict[str, AliasedClass]] = Field(default_factory=dict, description="""Additional classes to be joined to derive instances of the target class""", json_schema_extra = { "linkml_meta": {'alias': 'joins',
-         'comments': ['not yet implemented'],
+         'comments': ['supports cross-table lookups via source_key/lookup_key or on shorthand'],
          'domain_of': ['ClassDerivation']} })
     slot_derivations: Optional[Dict[str, SlotDerivation]] = Field(default_factory=dict, json_schema_extra = { "linkml_meta": {'alias': 'slot_derivations',
          'domain_of': ['TransformationSpecification', 'ClassDerivation']} })
@@ -341,6 +341,9 @@ class AliasedClass(ConfiguredBaseModel):
 
     alias: str = Field(default=..., description="""name of the class to be aliased""", json_schema_extra = { "linkml_meta": {'alias': 'alias', 'domain_of': ['AliasedClass']} })
     class_named: Optional[str] = Field(default=None, description="""local alias for the class""", json_schema_extra = { "linkml_meta": {'alias': 'class_named', 'domain_of': ['AliasedClass']} })
+    source_key: Optional[str] = Field(default=None, description="""column in the primary (populated_from) table used as the join key""", json_schema_extra = { "linkml_meta": {'alias': 'source_key', 'domain_of': ['AliasedClass']} })
+    lookup_key: Optional[str] = Field(default=None, description="""column in the secondary (joined) table used as the join key""", json_schema_extra = { "linkml_meta": {'alias': 'lookup_key', 'domain_of': ['AliasedClass']} })
+    join_on: Optional[str] = Field(default=None, description="""shorthand for source_key and lookup_key when both share the same column name""", json_schema_extra = { "linkml_meta": {'alias': 'join_on', 'domain_of': ['AliasedClass']} })
 
 
 class SlotDerivation(ElementDerivation):
diff --git a/src/linkml_map/datamodel/transformer_model.yaml b/src/linkml_map/datamodel/transformer_model.yaml
index ab51e3f..3b3813f 100644
--- a/src/linkml_map/datamodel/transformer_model.yaml
+++ b/src/linkml_map/datamodel/transformer_model.yaml
@@ -185,7 +185,7 @@ classes:
         inlined: true
         description: Additional classes to be joined to derive instances of the target class
         comments:
-          - not yet implemented
+          - supports cross-table lookups via source_key/lookup_key or on shorthand
       slot_derivations:
         range: SlotDerivation
         multivalued: true
@@ -220,6 +220,12 @@ classes:
         description: name of the class to be aliased
       class_named:
         description: local alias for the class
+      source_key:
+        description: column in the primary (populated_from) table used as the join key
+      lookup_key:
+        description: column in the secondary (joined) table used as the join key
+      join_on:
+        description: shorthand for source_key and lookup_key when both share the same column name
 
   SlotDerivation:
     is_a: ElementDerivation
diff --git a/src/linkml_map/loaders/data_loaders.py b/src/linkml_map/loaders/data_loaders.py
index 9c956fd..d4aa094 100644
--- a/src/linkml_map/loaders/data_loaders.py
+++ b/src/linkml_map/loaders/data_loaders.py
@@ -236,6 +236,20 @@ def _find_file(self, identifier: str) -> Optional[Path]:
 
         return None
 
+    def get_path(self, identifier: str) -> Path:
+        """
+        Return the resolved file path for *identifier*.
+
+        :param identifier: Logical table/file name (without extension).
+        :returns: Absolute path to the matching data file.
+        :raises FileNotFoundError: If no matching file is found.
+        """
+        path = self._find_file(identifier)
+        if path is None:
+            msg = f"No data file found for identifier {identifier!r} under {self.base_path}"
+            raise FileNotFoundError(msg)
+        return path
+
     def __contains__(self, identifier: str) -> bool:
         """Check if a data file exists for the given identifier."""
         if self.is_single_file:
diff --git a/src/linkml_map/transformer/engine.py b/src/linkml_map/transformer/engine.py
new file mode 100644
index 0000000..f748083
--- /dev/null
+++ b/src/linkml_map/transformer/engine.py
@@ -0,0 +1,80 @@
+"""Spec-driven processing engine with cross-table lookup support."""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, Any
+
+from linkml_map.utils.lookup_index import LookupIndex
+
+if TYPE_CHECKING:
+    from collections.abc import Iterator
+
+    from linkml_map.loaders.data_loaders import DataLoader
+    from linkml_map.transformer.object_transformer import ObjectTransformer
+
+logger = logging.getLogger(__name__)
+
+
+def transform_spec(
+    transformer: ObjectTransformer,
+    data_loader: DataLoader,
+    source_type: str | None = None,
+) -> Iterator[dict[str, Any]]:
+    """
+    Iterate class_derivation blocks and stream transformed rows.
+
+    For each block whose ``populated_from`` names a loadable table, this
+    function:
+
+    1. Registers any ``joins`` as secondary tables in a :class:`LookupIndex`.
+    2. Streams primary-table rows through
+       :meth:`ObjectTransformer.map_object`.
+    3. Drops secondary tables when the block is done.
+
+    :param transformer: A configured :class:`ObjectTransformer`.
+    :param data_loader: Loader that can resolve table names to file paths.
+    :param source_type: Optional explicit source type override.
+    :returns: Iterator of transformed row dicts.
+    """
+    spec = transformer.derived_specification
+    if spec is None:
+        return
+
+    if transformer.lookup_index is None:
+        transformer.lookup_index = LookupIndex()
+
+    for class_deriv in spec.class_derivations:
+        table_name = class_deriv.populated_from or class_deriv.name
+        if table_name not in data_loader:
+            logger.debug("Skipping class_derivation %s: no data found", class_deriv.name)
+            continue
+
+        joined_tables: list[str] = []
+        try:
+            # Register secondary (joined) tables
+            if class_deriv.joins:
+                for join_name, join_spec in class_deriv.joins.items():
+                    lookup_key = join_spec.lookup_key or join_spec.join_on
+                    if not lookup_key:
+                        msg = (
+                            f"Join {join_name!r} must specify 'join_on' or "
+                            f"'lookup_key'"
+                        )
+                        raise ValueError(msg)
+                    join_path = data_loader.get_path(join_name)
+                    transformer.lookup_index.register_table(
+                        join_name, join_path, lookup_key
+                    )
+                    joined_tables.append(join_name)
+
+            # Stream primary table rows
+            for row in data_loader[table_name]:
+                yield transformer.map_object(
+                    row,
+                    source_type=source_type or table_name,
+                    class_derivation=class_deriv,
+                )
+        finally:
+            for jt in joined_tables:
+                transformer.lookup_index.drop(jt)
diff --git a/src/linkml_map/transformer/object_transformer.py b/src/linkml_map/transformer/object_transformer.py
index fb67cdb..c990570 100644
--- a/src/linkml_map/transformer/object_transformer.py
+++ b/src/linkml_map/transformer/object_transformer.py
@@ -14,6 +14,7 @@
 from pydantic import BaseModel
 
 from linkml_map.datamodel.transformer_model import (
+    AliasedClass,
     ClassDerivation,
     CollectionType,
     PivotDirectionType,
@@ -46,6 +47,7 @@ def __init__(  # noqa: PLR0913
         source_type: str,
         sv: SchemaView,
         bindings: dict,
+        join_specs: Optional[dict[str, AliasedClass]] = None,
     ) -> None:
         self.object_transformer: ObjectTransformer = object_transformer
         self.source_obj: OBJECT_TYPE = source_obj
@@ -53,6 +55,7 @@ def __init__(  # noqa: PLR0913
         self.source_type: str = source_type
         self.sv: SchemaView = sv
         self.bindings: dict = {}
+        self.join_specs: dict[str, AliasedClass] = join_specs or {}
         if bindings:
             self.bindings.update(bindings)
 
@@ -105,10 +108,29 @@ def __iter__(self) -> Iterator:
 
     def __getitem__(self, name: Any) -> Any:
         if name not in self.bindings:
-            _ = self.get_ctxt_obj_and_dict({name: self.source_obj[name]})
+            if name in self.join_specs and self.object_transformer.lookup_index is not None:
+                self.bindings[name] = self._resolve_join(name)
+            else:
+                _ = self.get_ctxt_obj_and_dict({name: self.source_obj[name]})
 
         return self.bindings.get(name)
 
+    def _resolve_join(self, table_name: str) -> DynObj | None:
+        """Resolve a cross-table lookup, returning a DynObj or None."""
+        spec = self.join_specs[table_name]
+        source_key = spec.source_key or spec.join_on
+        lookup_key = spec.lookup_key or spec.join_on
+        if not source_key or not lookup_key:
+            msg = f"Join spec for {table_name!r} must specify 'on' or both 'source_key' and 'lookup_key'"
+            raise ValueError(msg)
+        key_val = self.source_obj.get(source_key)
+        if key_val is None:
+            return None
+        row = self.object_transformer.lookup_index.lookup_row(table_name, lookup_key, key_val)
+        if row is None:
+            return None
+        return DynObj(**row)
+
     def __setitem__(self, name: Any, value: Any) -> None:
         del name, value
         msg = f"__setitem__ not allowed on class {self.__class__.__name__}"
@@ -124,6 +146,7 @@ class ObjectTransformer(Transformer):
     """
 
     object_index: ObjectIndex = None
+    lookup_index: Any = None  # Optional[LookupIndex] — lazy import to avoid hard duckdb dep
 
     def index(self, source_obj: Any, target: Optional[str] = None) -> None:
         """
@@ -264,6 +287,7 @@ def map_object(
                         source_type=source_type,
                         sv=sv,
                         bindings={"NULL": None},
+                        join_specs=class_deriv.joins if class_deriv.joins else None,
                     )
 
                 try:
diff --git a/src/linkml_map/utils/eval_utils.py b/src/linkml_map/utils/eval_utils.py
index 3af263b..3124816 100644
--- a/src/linkml_map/utils/eval_utils.py
+++ b/src/linkml_map/utils/eval_utils.py
@@ -146,12 +146,13 @@ def _eval_set(self, node: ast.Set) -> Any:  # noqa: ANN401
             msg = "The {} must enclose a single variable"
             raise ValueError(msg)
         e = node.elts[0]
-        if not isinstance(e, ast.Name):
+        if not isinstance(e, (ast.Name, ast.Attribute)):
             msg = "The {} must enclose a variable"
             raise TypeError(msg)
         v = self._eval(e)
         if v is None:
-            msg = f"{e.id} is not set"
+            label = ast.dump(e) if isinstance(e, ast.Attribute) else e.id
+            msg = f"{label} is not set"
             raise UnsetValueError(msg)
         return v
 
diff --git a/src/linkml_map/utils/lookup_index.py b/src/linkml_map/utils/lookup_index.py
new file mode 100644
index 0000000..c379a2d
--- /dev/null
+++ b/src/linkml_map/utils/lookup_index.py
@@ -0,0 +1,89 @@
+"""DuckDB-backed cross-table lookup index for join resolution."""
+
+from __future__ import annotations
+
+import re
+from pathlib import Path
+from typing import Any
+
+import duckdb
+
+_IDENTIFIER_RE = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$")
+
+
+def _validate_identifier(name: str) -> None:
+    """Validate that *name* is a safe SQL identifier."""
+    if not _IDENTIFIER_RE.match(name):
+        msg = f"Invalid identifier: {name!r}"
+        raise ValueError(msg)
+
+
+class LookupIndex:
+    """
+    In-memory DuckDB index for cross-table lookups.
+
+    Each registered table is loaded from a CSV/TSV file via ``read_csv_auto``
+    and indexed on a key column for fast single-row lookups.
+    """
+
+    def __init__(self) -> None:
+        """Initialize an empty lookup index with an in-memory DuckDB connection."""
+        self._conn = duckdb.connect(":memory:")
+        self._tables: dict[str, str] = {}  # table_name -> key_column
+
+    def register_table(self, name: str, file_path: Path | str, key_column: str) -> None:
+        """
+        Load a CSV/TSV file into DuckDB and create an index on *key_column*.
+
+        :param name: Logical table name (must be a valid identifier).
+        :param file_path: Path to a CSV or TSV file.
+        :param key_column: Column to index for lookups.
+        """
+        _validate_identifier(name)
+        _validate_identifier(key_column)
+        file_path = Path(file_path)
+        self._conn.execute(
+            f"CREATE OR REPLACE TABLE {name} AS "  # noqa: S608
+            f"SELECT * FROM read_csv_auto('{file_path}', all_varchar=true)"
+        )
+        self._conn.execute(
+            f"CREATE INDEX IF NOT EXISTS idx_{name}_{key_column} ON {name} ({key_column})"
+        )
+        self._tables[name] = key_column
+
+    def lookup_row(
+        self, table: str, key_col: str, key_val: Any  # noqa: ANN401
+    ) -> dict[str, Any] | None:
+        """
+        Return the first row matching *key_val* on *key_col*, or ``None``.
+
+        :param table: Previously registered table name.
+        :param key_col: Column to match on.
+        :param key_val: Value to look up.
+        :returns: Row as a dict, or None if not found.
+        """
+        _validate_identifier(table)
+        _validate_identifier(key_col)
+        result = self._conn.execute(
+            f"SELECT * FROM {table} WHERE {key_col} = $1 LIMIT 1",  # noqa: S608
+            [str(key_val)],
+        ).fetchone()
+        if result is None:
+            return None
+        columns = [desc[0] for desc in self._conn.description]
+        return dict(zip(columns, result, strict=True))
+
+    def drop(self, table: str) -> None:
+        """Drop a registered table, releasing memory."""
+        _validate_identifier(table)
+        self._conn.execute(f"DROP TABLE IF EXISTS {table}")
+        self._tables.pop(table, None)
+
+    def is_registered(self, table: str) -> bool:
+        """Check whether *table* has been registered."""
+        return table in self._tables
+
+    def close(self) -> None:
+        """Close the DuckDB connection."""
+        self._conn.close()
+        self._tables.clear()
diff --git a/tests/test_transformer/test_cross_table_lookup.py b/tests/test_transformer/test_cross_table_lookup.py
new file mode 100644
index 0000000..f2eabb1
--- /dev/null
+++ b/tests/test_transformer/test_cross_table_lookup.py
@@ -0,0 +1,347 @@
+"""Integration tests for cross-table join lookups (Issue #134).
+
+These tests exercise the full stack: DataLoader → LookupIndex → Bindings →
+ObjectTransformer → engine.transform_spec.  Temporary TSV files serve as
+primary and secondary tables.
+"""
+
+# ruff: noqa: ANN401, PLR2004
+
+import textwrap
+
+import pytest
+import yaml
+from linkml_runtime import SchemaView
+
+from linkml_map.loaders.data_loaders import DataLoader
+from linkml_map.transformer.engine import transform_spec
+from linkml_map.transformer.object_transformer import ObjectTransformer
+
+
+# ---- fixtures ----
+
+SOURCE_SCHEMA_YAML = textwrap.dedent("""\
+    id: https://example.org/cross-table-source
+    name: cross_table_source
+    prefixes:
+      linkml: https://w3id.org/linkml/
+    imports:
+      - linkml:types
+    default_range: string
+    classes:
+      lab_results:
+        attributes:
+          sample_id:
+            identifier: true
+          participant_id: {}
+          analyte: {}
+          result_value: {}
+      demographics:
+        attributes:
+          participant_id:
+            identifier: true
+          age_at_exam: {}
+          sex: {}
+      site_info:
+        attributes:
+          site_code:
+            identifier: true
+          site_name: {}
+""")
+
+TARGET_SCHEMA_YAML = textwrap.dedent("""\
+    id: https://example.org/cross-table-target
+    name: cross_table_target
+    prefixes:
+      linkml: https://w3id.org/linkml/
+    imports:
+      - linkml:types
+    default_range: string
+    classes:
+      MeasurementObservation:
+        attributes:
+          sample_id:
+            identifier: true
+          analyte_value: {}
+          age_at_observation: {}
+          participant_sex: {}
+          site_name: {}
+""")
+
+
+@pytest.fixture()
+def data_dir(tmp_path):
+    """Write TSV data files and return the directory path."""
+    lab = tmp_path / "lab_results.tsv"
+    lab.write_text(
+        "sample_id\tparticipant_id\tanalyte\tresult_value\n"
+        "S001\tP001\tglucose\t5.5\n"
+        "S002\tP002\tcholesterol\t200\n"
+        "S003\tP999\tglucose\t6.1\n"  # P999 has no demographics row
+    )
+    demo = tmp_path / "demographics.tsv"
+    demo.write_text(
+        "participant_id\tage_at_exam\tsex\n"
+        "P001\t30\tF\n"
+        "P002\t45\tM\n"
+    )
+    site = tmp_path / "site_info.tsv"
+    site.write_text(
+        "site_code\tsite_name\n"
+        "SITE_A\tBoston Medical\n"
+    )
+    return tmp_path
+
+
+@pytest.fixture()
+def source_sv():
+    return SchemaView(SOURCE_SCHEMA_YAML)
+
+
+@pytest.fixture()
+def target_sv():
+    return SchemaView(TARGET_SCHEMA_YAML)
+
+
+def _make_transformer(source_sv, target_sv, spec_yaml):
+    """Build an ObjectTransformer from inline YAML strings."""
+    tr = ObjectTransformer(unrestricted_eval=False)
+    tr.source_schemaview = source_sv
+    tr.target_schemaview = target_sv
+    tr.create_transformer_specification(yaml.safe_load(spec_yaml))
+    return tr
+
+
+# ---- tests ----
+
+
+def test_cross_table_on_shorthand(data_dir, source_sv, target_sv):
+    """Cross-table lookup using the `on` shorthand (same column name in both tables)."""
+    spec = textwrap.dedent("""\
+        class_derivations:
+          MeasurementObservation:
+            populated_from: lab_results
+            joins:
+              demographics:
+                join_on: participant_id
+            slot_derivations:
+              sample_id:
+                populated_from: sample_id
+              analyte_value:
+                populated_from: result_value
+              age_at_observation:
+                expr: "{demographics.age_at_exam}"
+              participant_sex:
+                expr: "{demographics.sex}"
+    """)
+    tr = _make_transformer(source_sv, target_sv, spec)
+    loader = DataLoader(data_dir)
+    results = list(transform_spec(tr, loader))
+
+    assert len(results) == 3
+
+    # S001 → P001 → age 30, sex F
+    r0 = results[0]
+    assert r0["sample_id"] == "S001"
+    assert str(r0["analyte_value"]) == "5.5"
+    assert r0["age_at_observation"] == "30"
+    assert r0["participant_sex"] == "F"
+
+    # S002 → P002 → age 45, sex M
+    r1 = results[1]
+    assert r1["sample_id"] == "S002"
+    assert r1["age_at_observation"] == "45"
+    assert r1["participant_sex"] == "M"
+
+
+def test_cross_table_explicit_keys(data_dir, source_sv, target_sv):
+    """Cross-table lookup with explicit source_key and lookup_key."""
+    spec = textwrap.dedent("""\
+        class_derivations:
+          MeasurementObservation:
+            populated_from: lab_results
+            joins:
+              demographics:
+                source_key: participant_id
+                lookup_key: participant_id
+            slot_derivations:
+              sample_id:
+                populated_from: sample_id
+              age_at_observation:
+                expr: "{demographics.age_at_exam}"
+    """)
+    tr = _make_transformer(source_sv, target_sv, spec)
+    loader = DataLoader(data_dir)
+    results = list(transform_spec(tr, loader))
+
+    assert results[0]["age_at_observation"] == "30"
+    assert results[1]["age_at_observation"] == "45"
+
+
+def test_null_propagation_no_match(data_dir, source_sv, target_sv):
+    """When the lookup table has no matching row, {table.col} propagates None."""
+    spec = textwrap.dedent("""\
+        class_derivations:
+          MeasurementObservation:
+            populated_from: lab_results
+            joins:
+              demographics:
+                join_on: participant_id
+            slot_derivations:
+              sample_id:
+                populated_from: sample_id
+              age_at_observation:
+                expr: "{demographics.age_at_exam}"
+    """)
+    tr = _make_transformer(source_sv, target_sv, spec)
+    loader = DataLoader(data_dir)
+    results = list(transform_spec(tr, loader))
+
+    # S003 → P999 → no demographics row → None
+    r2 = results[2]
+    assert r2["sample_id"] == "S003"
+    assert r2.get("age_at_observation") is None
+
+
+def test_expression_with_joined_column(data_dir, source_sv, target_sv):
+    """Expressions can combine joined columns with arithmetic."""
+    # Override target schema to use integer range
+    target_yaml = textwrap.dedent("""\
+        id: https://example.org/cross-table-target
+        name: cross_table_target
+        prefixes:
+          linkml: https://w3id.org/linkml/
+        imports:
+          - linkml:types
+        default_range: string
+        classes:
+          MeasurementObservation:
+            attributes:
+              sample_id:
+                identifier: true
+              age_at_observation:
+                range: integer
+    """)
+    t_sv = SchemaView(target_yaml)
+
+    spec = textwrap.dedent("""\
+        class_derivations:
+          MeasurementObservation:
+            populated_from: lab_results
+            joins:
+              demographics:
+                join_on: participant_id
+            slot_derivations:
+              sample_id:
+                populated_from: sample_id
+              age_at_observation:
+                expr: "int({demographics.age_at_exam}) * 365"
+    """)
+    tr = _make_transformer(source_sv, t_sv, spec)
+    loader = DataLoader(data_dir)
+    results = list(transform_spec(tr, loader))
+
+    assert results[0]["age_at_observation"] == 30 * 365
+    assert results[1]["age_at_observation"] == 45 * 365
+    # P999 → null propagation through int() would raise, but {..} catches it first
+    assert results[2].get("age_at_observation") is None
+
+
+def test_multiple_joined_tables(data_dir, source_sv, target_sv, tmp_path):
+    """Multiple secondary tables can be joined in a single class_derivation."""
+    # Add a site_code column to lab_results
+    lab = tmp_path / "lab_results.tsv"
+    lab.write_text(
+        "sample_id\tparticipant_id\tanalyte\tresult_value\tsite_code\n"
+        "S001\tP001\tglucose\t5.5\tSITE_A\n"
+    )
+    # Copy demographics and site_info to tmp_path (already in data_dir fixture)
+    (tmp_path / "demographics.tsv").write_text(
+        "participant_id\tage_at_exam\tsex\n"
+        "P001\t30\tF\n"
+    )
+    (tmp_path / "site_info.tsv").write_text(
+        "site_code\tsite_name\n"
+        "SITE_A\tBoston Medical\n"
+    )
+
+    # Extend source schema to include site_code on lab_results
+    src_yaml = textwrap.dedent("""\
+        id: https://example.org/cross-table-source
+        name: cross_table_source
+        prefixes:
+          linkml: https://w3id.org/linkml/
+        imports:
+          - linkml:types
+        default_range: string
+        classes:
+          lab_results:
+            attributes:
+              sample_id:
+                identifier: true
+              participant_id: {}
+              analyte: {}
+              result_value: {}
+              site_code: {}
+          demographics:
+            attributes:
+              participant_id:
+                identifier: true
+              age_at_exam: {}
+              sex: {}
+          site_info:
+            attributes:
+              site_code:
+                identifier: true
+              site_name: {}
+    """)
+    s_sv = SchemaView(src_yaml)
+
+    spec = textwrap.dedent("""\
+        class_derivations:
+          MeasurementObservation:
+            populated_from: lab_results
+            joins:
+              demographics:
+                join_on: participant_id
+              site_info:
+                source_key: site_code
+                lookup_key: site_code
+            slot_derivations:
+              sample_id:
+                populated_from: sample_id
+              age_at_observation:
+                expr: "{demographics.age_at_exam}"
+              participant_sex:
+                expr: "{demographics.sex}"
+              site_name:
+                expr: "{site_info.site_name}"
+    """)
+    tr = _make_transformer(s_sv, target_sv, spec)
+    loader = DataLoader(tmp_path)
+    results = list(transform_spec(tr, loader))
+
+    assert len(results) == 1
+    assert results[0]["age_at_observation"] == "30"
+    assert results[0]["participant_sex"] == "F"
+    assert results[0]["site_name"] == "Boston Medical"
+
+
+def test_join_spec_missing_key_raises(source_sv, target_sv, data_dir):
+    """A join spec with neither `on` nor source_key/lookup_key raises ValueError."""
+    spec = textwrap.dedent("""\
+        class_derivations:
+          MeasurementObservation:
+            populated_from: lab_results
+            joins:
+              demographics: {}
+            slot_derivations:
+              sample_id:
+                populated_from: sample_id
+              age_at_observation:
+                expr: "{demographics.age_at_exam}"
+    """)
+    tr = _make_transformer(source_sv, target_sv, spec)
+    loader = DataLoader(data_dir)
+    with pytest.raises(ValueError, match="must specify"):
+        list(transform_spec(tr, loader))
diff --git a/tests/test_utils/test_eval_utils.py b/tests/test_utils/test_eval_utils.py
index cf69e95..4df1ad0 100644
--- a/tests/test_utils/test_eval_utils.py
+++ b/tests/test_utils/test_eval_utils.py
@@ -206,6 +206,38 @@ def test_list_variable_concatenation() -> None:
     assert eval_expr("{x} + {y}", x=["a", "b"], y=["c", "d"]) == ["a", "b", "c", "d"]
 
 
+# ---- Curly-braced attribute access (cross-table syntax) ----
+
+
+def test_curly_attribute_access() -> None:
+    """{obj.attr} resolves attribute access with null propagation."""
+    from linkml_map.utils.dynamic_object import DynObj
+
+    demo = DynObj(age=30, name="Alice")
+    assert eval_expr("{demo.age} * 365", demo=demo) == 30 * 365
+
+
+def test_curly_attribute_null_propagation_none_obj() -> None:
+    """{obj.attr} propagates None when the object itself is None."""
+    assert eval_expr("{demo.age} * 365", demo=None) is None
+
+
+def test_curly_attribute_null_propagation_missing_attr() -> None:
+    """{obj.attr} propagates None when the attribute is missing."""
+    from linkml_map.utils.dynamic_object import DynObj
+
+    demo = DynObj(name="Alice")  # no 'age' attribute
+    assert eval_expr("{demo.age} * 365", demo=demo) is None
+
+
+def test_curly_attribute_in_string_concat() -> None:
+    """{obj.attr} works in string concatenation expressions."""
+    from linkml_map.utils.dynamic_object import DynObj
+
+    demo = DynObj(prefix="Dr")
+    assert eval_expr("{demo.prefix} + '. Smith'", demo=demo) == "Dr. Smith"
+
+
 # ---- Functions ----
 
 
diff --git a/tests/test_utils/test_lookup_index.py b/tests/test_utils/test_lookup_index.py
new file mode 100644
index 0000000..31d24de
--- /dev/null
+++ b/tests/test_utils/test_lookup_index.py
@@ -0,0 +1,84 @@
+"""Tests for the DuckDB-backed LookupIndex."""
+
+# ruff: noqa: ANN401
+
+import pytest
+
+from linkml_map.utils.lookup_index import LookupIndex
+
+
+@pytest.fixture()
+def tmp_tsv(tmp_path):
+    """Create a simple TSV file and return its path."""
+    tsv = tmp_path / "demo.tsv"
+    tsv.write_text("id\tname\tage\nP001\tAlice\t30\nP002\tBob\t25\n")
+    return tsv
+
+
+@pytest.fixture()
+def index():
+    """Create a LookupIndex and close it after the test."""
+    idx = LookupIndex()
+    yield idx
+    idx.close()
+
+
+def test_register_and_lookup(index, tmp_tsv):
+    """Register a table and look up a row by key."""
+    index.register_table("demo", tmp_tsv, "id")
+    row = index.lookup_row("demo", "id", "P001")
+    assert row is not None
+    assert row["name"] == "Alice"
+    assert row["age"] == "30"
+
+
+def test_lookup_missing_row(index, tmp_tsv):
+    """Looking up a nonexistent key returns None."""
+    index.register_table("demo", tmp_tsv, "id")
+    assert index.lookup_row("demo", "id", "MISSING") is None
+
+
+def test_is_registered(index, tmp_tsv):
+    """is_registered reflects table state."""
+    assert not index.is_registered("demo")
+    index.register_table("demo", tmp_tsv, "id")
+    assert index.is_registered("demo")
+
+
+def test_drop(index, tmp_tsv):
+    """Dropping a table removes it from the index."""
+    index.register_table("demo", tmp_tsv, "id")
+    index.drop("demo")
+    assert not index.is_registered("demo")
+
+
+def test_drop_nonexistent(index):
+    """Dropping a table that was never registered does not raise."""
+    index.drop("nonexistent")
+
+
+def test_csv_format(index, tmp_path):
+    """CSV files are also handled by read_csv_auto."""
+    csv = tmp_path / "data.csv"
+    csv.write_text("id,value\nX1,100\nX2,200\n")
+    index.register_table("data", csv, "id")
+    row = index.lookup_row("data", "id", "X2")
+    assert row is not None
+    assert row["value"] == "200"
+
+
+def test_invalid_identifier(index):
+    """SQL-injection-style identifiers are rejected."""
+    with pytest.raises(ValueError, match="Invalid identifier"):
+        index.register_table("drop table;--", "/dev/null", "id")
+
+
+def test_all_varchar_coercion(index, tmp_path):
+    """Numeric-looking values are stored as VARCHAR due to all_varchar=true."""
+    tsv = tmp_path / "nums.tsv"
+    tsv.write_text("id\tcount\n1\t42\n2\t99\n")
+    index.register_table("nums", tsv, "id")
+    row = index.lookup_row("nums", "id", "1")
+    assert row is not None
+    assert row["count"] == "42"
+    assert isinstance(row["count"], str)

From f7223a5ff92ddf02da65ef88979660911c060280 Mon Sep 17 00:00:00 2001
From: amc-corey-cox <69321580+amc-corey-cox@users.noreply.github.com>
Date: Wed, 4 Mar 2026 08:12:18 -0600
Subject: [PATCH 2/6] Fix Python 3.9 compatibility: use Optional[] instead of X
 | None

The `DynObj | None` union syntax requires Python 3.10+. CI tests
against Python 3.9, so use `Optional[DynObj]` instead.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/linkml_map/transformer/object_transformer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/linkml_map/transformer/object_transformer.py b/src/linkml_map/transformer/object_transformer.py
index c990570..a38ba61 100644
--- a/src/linkml_map/transformer/object_transformer.py
+++ b/src/linkml_map/transformer/object_transformer.py
@@ -115,7 +115,7 @@ def __getitem__(self, name: Any) -> Any:
 
         return self.bindings.get(name)
 
-    def _resolve_join(self, table_name: str) -> DynObj | None:
+    def _resolve_join(self, table_name: str) -> Optional[DynObj]:
         """Resolve a cross-table lookup, returning a DynObj or None."""
         spec = self.join_specs[table_name]
         source_key = spec.source_key or spec.join_on

From 48fb889a846882dc134902593745e5b3da08abd3 Mon Sep 17 00:00:00 2001
From: amc-corey-cox <69321580+amc-corey-cox@users.noreply.github.com>
Date: Wed, 4 Mar 2026 08:14:56 -0600
Subject: [PATCH 3/6] Align transformer_model.py with gen-pydantic output

The CI regenerates the Pydantic model from YAML and commits any diff.
Align the hand-edited file with gen-pydantic output to avoid spurious
CI push failures (only difference was comment line wrapping).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/linkml_map/datamodel/transformer_model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/linkml_map/datamodel/transformer_model.py b/src/linkml_map/datamodel/transformer_model.py
index 0f20561..d0eb912 100644
--- a/src/linkml_map/datamodel/transformer_model.py
+++ b/src/linkml_map/datamodel/transformer_model.py
@@ -263,7 +263,8 @@ class ClassDerivation(ElementDerivation):
                        'EnumDerivation',
                        'PermissibleValueDerivation']} })
     joins: Optional[Dict[str, AliasedClass]] = Field(default_factory=dict, description="""Additional classes to be joined to derive instances of the target class""", json_schema_extra = { "linkml_meta": {'alias': 'joins',
-         'comments': ['supports cross-table lookups via source_key/lookup_key or on shorthand'],
+         'comments': ['supports cross-table lookups via source_key/lookup_key or on '
+                      'shorthand'],
          'domain_of': ['ClassDerivation']} })
     slot_derivations: Optional[Dict[str, SlotDerivation]] = Field(default_factory=dict, json_schema_extra = { "linkml_meta": {'alias': 'slot_derivations',
          'domain_of': ['TransformationSpecification', 'ClassDerivation']} })

From 18a34062f6072ea87b76d9645014808e87f73fe5 Mon Sep 17 00:00:00 2001
From: amc-corey-cox <69321580+amc-corey-cox@users.noreply.github.com>
Date: Wed, 4 Mar 2026 08:16:56 -0600
Subject: [PATCH 4/6] Fix Python 3.9 compat: remove zip(strict=True)

The strict parameter for zip() was added in Python 3.10.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/linkml_map/utils/lookup_index.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/linkml_map/utils/lookup_index.py b/src/linkml_map/utils/lookup_index.py
index c379a2d..51dd9aa 100644
--- a/src/linkml_map/utils/lookup_index.py
+++ b/src/linkml_map/utils/lookup_index.py
@@ -71,7 +71,7 @@ def lookup_row(
         if result is None:
             return None
         columns = [desc[0] for desc in self._conn.description]
-        return dict(zip(columns, result, strict=True))
+        return dict(zip(columns, result))
 
     def drop(self, table: str) -> None:
         """Drop a registered table, releasing memory."""

From d6f50c5ba37a7fcd2f1be2873ff606316608ee44 Mon Sep 17 00:00:00 2001
From: amc-corey-cox <69321580+amc-corey-cox@users.noreply.github.com>
Date: Fri, 6 Mar 2026 15:36:28 -0600
Subject: [PATCH 5/6] Address PR review feedback

- Fix 'on shorthand' references to 'join_on' in comments and error messages
- Use parameter binding for file_path in DuckDB read_csv_auto call
- Add # noqa: S608 to validated dynamic SQL statements
- Raise clear ValueError when join is configured but lookup_index is None
- Validate both source_key and lookup_key in engine join registration
- Resolve path in data_loader.get_path() to match docstring guarantee

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/linkml_map/datamodel/transformer_model.py    | 4 ++--
 src/linkml_map/datamodel/transformer_model.yaml  | 2 +-
 src/linkml_map/loaders/data_loaders.py           | 2 +-
 src/linkml_map/transformer/engine.py             | 7 ++++---
 src/linkml_map/transformer/object_transformer.py | 7 +++++--
 src/linkml_map/utils/lookup_index.py             | 7 ++++---
 6 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/src/linkml_map/datamodel/transformer_model.py b/src/linkml_map/datamodel/transformer_model.py
index d0eb912..d5a761e 100644
--- a/src/linkml_map/datamodel/transformer_model.py
+++ b/src/linkml_map/datamodel/transformer_model.py
@@ -263,8 +263,8 @@ class ClassDerivation(ElementDerivation):
                        'EnumDerivation',
                        'PermissibleValueDerivation']} })
     joins: Optional[Dict[str, AliasedClass]] = Field(default_factory=dict, description="""Additional classes to be joined to derive instances of the target class""", json_schema_extra = { "linkml_meta": {'alias': 'joins',
-         'comments': ['supports cross-table lookups via source_key/lookup_key or on '
-                      'shorthand'],
+         'comments': ['supports cross-table lookups via source_key/lookup_key or the '
+                      'join_on field'],
          'domain_of': ['ClassDerivation']} })
     slot_derivations: Optional[Dict[str, SlotDerivation]] = Field(default_factory=dict, json_schema_extra = { "linkml_meta": {'alias': 'slot_derivations',
          'domain_of': ['TransformationSpecification', 'ClassDerivation']} })
diff --git a/src/linkml_map/datamodel/transformer_model.yaml b/src/linkml_map/datamodel/transformer_model.yaml
index 3b3813f..2f71fd8 100644
--- a/src/linkml_map/datamodel/transformer_model.yaml
+++ b/src/linkml_map/datamodel/transformer_model.yaml
@@ -185,7 +185,7 @@ classes:
         inlined: true
         description: Additional classes to be joined to derive instances of the target class
         comments:
-          - supports cross-table lookups via source_key/lookup_key or on shorthand
+          - supports cross-table lookups via source_key/lookup_key or the join_on field
       slot_derivations:
         range: SlotDerivation
         multivalued: true
diff --git a/src/linkml_map/loaders/data_loaders.py b/src/linkml_map/loaders/data_loaders.py
index d4aa094..a939c10 100644
--- a/src/linkml_map/loaders/data_loaders.py
+++ b/src/linkml_map/loaders/data_loaders.py
@@ -248,7 +248,7 @@ def get_path(self, identifier: str) -> Path:
         if path is None:
             msg = f"No data file found for identifier {identifier!r} under {self.base_path}"
             raise FileNotFoundError(msg)
-        return path
+        return path.resolve()
 
     def __contains__(self, identifier: str) -> bool:
         """Check if a data file exists for the given identifier."""
diff --git a/src/linkml_map/transformer/engine.py b/src/linkml_map/transformer/engine.py
index f748083..1759b59 100644
--- a/src/linkml_map/transformer/engine.py
+++ b/src/linkml_map/transformer/engine.py
@@ -56,10 +56,11 @@ def transform_spec(
             if class_deriv.joins:
                 for join_name, join_spec in class_deriv.joins.items():
                     lookup_key = join_spec.lookup_key or join_spec.join_on
-                    if not lookup_key:
+                    source_key = join_spec.source_key or join_spec.join_on
+                    if not lookup_key or not source_key:
                         msg = (
-                            f"Join {join_name!r} must specify 'join_on' or "
-                            f"'lookup_key'"
+                            f"Join {join_name!r} must specify 'join_on' or both "
+                            f"'source_key' and 'lookup_key'"
                         )
                         raise ValueError(msg)
                     join_path = data_loader.get_path(join_name)
diff --git a/src/linkml_map/transformer/object_transformer.py b/src/linkml_map/transformer/object_transformer.py
index a38ba61..19bcd8a 100644
--- a/src/linkml_map/transformer/object_transformer.py
+++ b/src/linkml_map/transformer/object_transformer.py
@@ -108,7 +108,10 @@ def __iter__(self) -> Iterator:
 
     def __getitem__(self, name: Any) -> Any:
         if name not in self.bindings:
-            if name in self.join_specs and self.object_transformer.lookup_index is not None:
+            if name in self.join_specs:
+                if self.object_transformer.lookup_index is None:
+                    msg = f"Join configured for {name!r} but lookup_index has not been initialized"
+                    raise ValueError(msg)
                 self.bindings[name] = self._resolve_join(name)
             else:
                 _ = self.get_ctxt_obj_and_dict({name: self.source_obj[name]})
@@ -121,7 +124,7 @@ def _resolve_join(self, table_name: str) -> Optional[DynObj]:
         source_key = spec.source_key or spec.join_on
         lookup_key = spec.lookup_key or spec.join_on
         if not source_key or not lookup_key:
-            msg = f"Join spec for {table_name!r} must specify 'on' or both 'source_key' and 'lookup_key'"
+            msg = f"Join spec for {table_name!r} must specify 'join_on' or both 'source_key' and 'lookup_key'"
             raise ValueError(msg)
         key_val = self.source_obj.get(source_key)
         if key_val is None:
diff --git a/src/linkml_map/utils/lookup_index.py b/src/linkml_map/utils/lookup_index.py
index 51dd9aa..dd2889b 100644
--- a/src/linkml_map/utils/lookup_index.py
+++ b/src/linkml_map/utils/lookup_index.py
@@ -44,10 +44,11 @@ def register_table(self, name: str, file_path: Path | str, key_column: str) -> N
         file_path = Path(file_path)
         self._conn.execute(
             f"CREATE OR REPLACE TABLE {name} AS "  # noqa: S608
-            f"SELECT * FROM read_csv_auto('{file_path}', all_varchar=true)"
+            "SELECT * FROM read_csv_auto(?, all_varchar=true)",
+            [str(file_path)]
         )
         self._conn.execute(
-            f"CREATE INDEX IF NOT EXISTS idx_{name}_{key_column} ON {name} ({key_column})"
+            f"CREATE INDEX IF NOT EXISTS idx_{name}_{key_column} ON {name} ({key_column})"  # noqa: S608
         )
         self._tables[name] = key_column
 
@@ -76,7 +77,7 @@ def lookup_row(
     def drop(self, table: str) -> None:
         """Drop a registered table, releasing memory."""
         _validate_identifier(table)
-        self._conn.execute(f"DROP TABLE IF EXISTS {table}")
+        self._conn.execute(f"DROP TABLE IF EXISTS {table}")  # noqa: S608
         self._tables.pop(table, None)
 
     def is_registered(self, table: str) -> bool:

From 685b1889eda835646cba06b5879a5c611f218171 Mon Sep 17 00:00:00 2001
From: Mark Andrew Miller <MAM@lbl.gov>
Date: Mon, 9 Mar 2026 14:29:11 -0400
Subject: [PATCH 6/6] Add edge-case tests for cross-table lookup

---
 .../test_engine_edge_cases.py                 | 245 ++++++++++++++++++
 .../test_lookup_index_edge_cases.py           | 121 +++++++++
 2 files changed, 366 insertions(+)
 create mode 100644 tests/test_transformer/test_engine_edge_cases.py
 create mode 100644 tests/test_utils/test_lookup_index_edge_cases.py

diff --git a/tests/test_transformer/test_engine_edge_cases.py b/tests/test_transformer/test_engine_edge_cases.py
new file mode 100644
index 0000000..265a79e
--- /dev/null
+++ b/tests/test_transformer/test_engine_edge_cases.py
@@ -0,0 +1,245 @@
+"""Edge-case tests for the transform_spec engine (supplements test_cross_table_lookup.py).
+
+Covers:
+- Engine with no-joins class_derivation (regression safety)
+- Empty joined table (headers only)
+- Mixed derivations: one with joins, one without
+
+See: https://github.com/linkml/linkml-map/pull/136
+"""
+
+# ruff: noqa: ANN401, PLR2004
+
+import textwrap
+
+import yaml
+from linkml_runtime import SchemaView
+
+from linkml_map.loaders.data_loaders import DataLoader
+from linkml_map.transformer.engine import transform_spec
+from linkml_map.transformer.object_transformer import ObjectTransformer
+
+
+# ---- shared schemas ----
+
+SOURCE_SCHEMA_YAML = textwrap.dedent("""\
+    id: https://example.org/engine-test-source
+    name: engine_test_source
+    prefixes:
+      linkml: https://w3id.org/linkml/
+    imports:
+      - linkml:types
+    default_range: string
+    classes:
+      samples:
+        attributes:
+          sample_id:
+            identifier: true
+          name: {}
+          site_code: {}
+      sites:
+        attributes:
+          site_code:
+            identifier: true
+          site_name: {}
+""")
+
+TARGET_SCHEMA_YAML = textwrap.dedent("""\
+    id: https://example.org/engine-test-target
+    name: engine_test_target
+    prefixes:
+      linkml: https://w3id.org/linkml/
+    imports:
+      - linkml:types
+    default_range: string
+    classes:
+      FlatSample:
+        attributes:
+          sample_id:
+            identifier: true
+          name: {}
+          site_name: {}
+""")
+
+
+def _make_transformer(source_sv, target_sv, spec_yaml):
+    """Build an ObjectTransformer from inline YAML strings."""
+    tr = ObjectTransformer(unrestricted_eval=False)
+    tr.source_schemaview = source_sv
+    tr.target_schemaview = target_sv
+    tr.create_transformer_specification(yaml.safe_load(spec_yaml))
+    return tr
+
+
+# ---- no-joins regression ----
+
+
+def test_engine_no_joins(tmp_path):
+    """transform_spec works for a class_derivation with no joins block.
+
+    This is a regression test ensuring the join machinery doesn't break
+    the common case where joins are not used.
+    """
+    (tmp_path / "samples.tsv").write_text(
+        "sample_id\tname\tsite_code\n"
+        "S001\tAlpha\tSITE_A\n"
+        "S002\tBeta\tSITE_B\n"
+    )
+
+    spec = textwrap.dedent("""\
+        class_derivations:
+          FlatSample:
+            populated_from: samples
+            slot_derivations:
+              sample_id:
+                populated_from: sample_id
+              name:
+                populated_from: name
+    """)
+    source_sv = SchemaView(SOURCE_SCHEMA_YAML)
+    target_sv = SchemaView(TARGET_SCHEMA_YAML)
+    tr = _make_transformer(source_sv, target_sv, spec)
+    loader = DataLoader(tmp_path)
+
+    results = list(transform_spec(tr, loader))
+
+    assert len(results) == 2
+    assert results[0]["sample_id"] == "S001"
+    assert results[0]["name"] == "Alpha"
+    assert results[1]["sample_id"] == "S002"
+    assert results[1]["name"] == "Beta"
+
+
+def test_engine_no_joins_no_data(tmp_path):
+    """transform_spec gracefully yields nothing when the data file doesn't exist."""
+    spec = textwrap.dedent("""\
+        class_derivations:
+          FlatSample:
+            populated_from: samples
+            slot_derivations:
+              sample_id:
+                populated_from: sample_id
+    """)
+    source_sv = SchemaView(SOURCE_SCHEMA_YAML)
+    target_sv = SchemaView(TARGET_SCHEMA_YAML)
+    tr = _make_transformer(source_sv, target_sv, spec)
+    loader = DataLoader(tmp_path)  # no files in tmp_path
+
+    results = list(transform_spec(tr, loader))
+    assert results == []
+
+
+# ---- empty joined table ----
+
+
+def test_join_with_empty_secondary_table(tmp_path):
+    """When a joined table has headers but no data rows, lookups return None."""
+    (tmp_path / "samples.tsv").write_text(
+        "sample_id\tname\tsite_code\n"
+        "S001\tAlpha\tSITE_A\n"
+    )
+    # sites.tsv has headers only — no data rows
+    (tmp_path / "sites.tsv").write_text("site_code\tsite_name\n")
+
+    spec = textwrap.dedent("""\
+        class_derivations:
+          FlatSample:
+            populated_from: samples
+            joins:
+              sites:
+                join_on: site_code
+            slot_derivations:
+              sample_id:
+                populated_from: sample_id
+              name:
+                populated_from: name
+              site_name:
+                expr: "{sites.site_name}"
+    """)
+    source_sv = SchemaView(SOURCE_SCHEMA_YAML)
+    target_sv = SchemaView(TARGET_SCHEMA_YAML)
+    tr = _make_transformer(source_sv, target_sv, spec)
+    loader = DataLoader(tmp_path)
+
+    results = list(transform_spec(tr, loader))
+
+    assert len(results) == 1
+    assert results[0]["sample_id"] == "S001"
+    assert results[0]["name"] == "Alpha"
+    # No matching row in empty sites table → None via null propagation
+    assert results[0].get("site_name") is None
+
+
+# ---- mixed: one derivation with joins, one without ----
+
+
+def test_mixed_derivations_with_and_without_joins(tmp_path):
+    """Multiple class_derivations can coexist: some with joins, some without."""
+    (tmp_path / "samples.tsv").write_text(
+        "sample_id\tname\tsite_code\n"
+        "S001\tAlpha\tSITE_A\n"
+    )
+    (tmp_path / "sites.tsv").write_text(
+        "site_code\tsite_name\n"
+        "SITE_A\tBoston Medical\n"
+    )
+
+    # Two target classes: one uses joins, one doesn't
+    target_yaml = textwrap.dedent("""\
+        id: https://example.org/engine-test-target
+        name: engine_test_target
+        prefixes:
+          linkml: https://w3id.org/linkml/
+        imports:
+          - linkml:types
+        default_range: string
+        classes:
+          FlatSample:
+            attributes:
+              sample_id:
+                identifier: true
+              name: {}
+              site_name: {}
+          SimpleSample:
+            attributes:
+              sample_id:
+                identifier: true
+              name: {}
+    """)
+
+    spec = textwrap.dedent("""\
+        class_derivations:
+          FlatSample:
+            populated_from: samples
+            joins:
+              sites:
+                join_on: site_code
+            slot_derivations:
+              sample_id:
+                populated_from: sample_id
+              name:
+                populated_from: name
+              site_name:
+                expr: "{sites.site_name}"
+          SimpleSample:
+            populated_from: samples
+            slot_derivations:
+              sample_id:
+                populated_from: sample_id
+              name:
+                populated_from: name
+    """)
+    source_sv = SchemaView(SOURCE_SCHEMA_YAML)
+    target_sv = SchemaView(target_yaml)
+    tr = _make_transformer(source_sv, target_sv, spec)
+    loader = DataLoader(tmp_path)
+
+    results = list(transform_spec(tr, loader))
+
+    # Should get results from both derivations
+    assert len(results) == 2
+    # First: FlatSample with join
+    assert results[0]["site_name"] == "Boston Medical"
+    # Second: SimpleSample without join
+    assert results[1]["sample_id"] == "S001"
+    assert results[1]["name"] == "Alpha"
diff --git a/tests/test_utils/test_lookup_index_edge_cases.py b/tests/test_utils/test_lookup_index_edge_cases.py
new file mode 100644
index 0000000..40ae4a7
--- /dev/null
+++ b/tests/test_utils/test_lookup_index_edge_cases.py
@@ -0,0 +1,121 @@
+"""Edge-case tests for LookupIndex (supplements test_lookup_index.py).
+
+Covers:
+- Duplicate key behavior (LIMIT 1 first-match semantics)
+- Empty tables (headers only, zero data rows)
+- Lifecycle after close() (operations should fail gracefully)
+
+See: https://github.com/linkml/linkml-map/pull/136
+"""
+
+import duckdb
+import pytest
+
+from linkml_map.utils.lookup_index import LookupIndex
+
+
+@pytest.fixture()
+def index():
+    """Create a LookupIndex and close it after the test."""
+    idx = LookupIndex()
+    yield idx
+    idx.close()
+
+
+# ---- Duplicate key behavior ----
+
+
+def test_duplicate_keys_returns_a_row(index, tmp_path):
+    """When multiple rows share the same key, lookup_row returns one of them.
+
+    The current implementation uses ``LIMIT 1`` without an ``ORDER BY``,
+    so the returned row is deterministic per DuckDB's storage order (insertion
+    order for ``read_csv_auto``) but this is NOT guaranteed by the API.
+    This test documents the behavior without asserting which duplicate wins.
+    """
+    tsv = tmp_path / "dupes.tsv"
+    tsv.write_text(
+        "participant_id\tname\tage\n"
+        "P001\tAlice\t30\n"
+        "P001\tAlice-v2\t31\n"
+        "P002\tBob\t25\n"
+    )
+    index.register_table("dupes", tsv, "participant_id")
+    row = index.lookup_row("dupes", "participant_id", "P001")
+
+    # A row IS returned (not None)
+    assert row is not None
+    assert row["participant_id"] == "P001"
+    # The name is one of the two duplicate rows
+    assert row["name"] in {"Alice", "Alice-v2"}
+
+
+def test_duplicate_keys_unique_rows_unaffected(index, tmp_path):
+    """Rows with unique keys are unaffected by the presence of duplicates elsewhere."""
+    tsv = tmp_path / "dupes.tsv"
+    tsv.write_text(
+        "id\tvalue\n"
+        "A\t1\n"
+        "A\t2\n"
+        "B\t3\n"
+    )
+    index.register_table("dupes", tsv, "id")
+    row = index.lookup_row("dupes", "id", "B")
+    assert row is not None
+    assert row["value"] == "3"
+
+
+# ---- Empty tables ----
+
+
+def test_empty_table_headers_only(index, tmp_path):
+    """A table with column headers but zero data rows can be registered and queried."""
+    tsv = tmp_path / "empty.tsv"
+    tsv.write_text("id\tname\tage\n")
+    index.register_table("empty", tsv, "id")
+
+    assert index.is_registered("empty")
+    assert index.lookup_row("empty", "id", "anything") is None
+
+
+def test_empty_table_then_drop(index, tmp_path):
+    """An empty table can be dropped without error."""
+    tsv = tmp_path / "empty.tsv"
+    tsv.write_text("id\tvalue\n")
+    index.register_table("empty", tsv, "id")
+    index.drop("empty")
+    assert not index.is_registered("empty")
+
+
+# ---- Lifecycle after close() ----
+
+
+def test_close_clears_tables(index, tmp_path):
+    """After close(), is_registered returns False for all tables."""
+    tsv = tmp_path / "data.tsv"
+    tsv.write_text("id\tval\nA\t1\n")
+    index.register_table("data", tsv, "id")
+    assert index.is_registered("data")
+
+    index.close()
+    assert not index.is_registered("data")
+
+
+def test_operations_after_close_raise(tmp_path):
+    """Register and lookup operations after close() raise an error."""
+    idx = LookupIndex()
+    idx.close()
+
+    tsv = tmp_path / "data.tsv"
+    tsv.write_text("id\tval\nA\t1\n")
+
+    with pytest.raises((duckdb.ConnectionException, duckdb.InvalidInputException)):
+        idx.register_table("data", tsv, "id")
+
+
+def test_double_close_is_safe():
+    """Calling close() twice does not raise."""
+    idx = LookupIndex()
+    idx.close()
+    # Second close should not raise
+    idx.close()