Skip to content

Commit 86f23f2

Browse files
fix(#1454): refresh ~lineage on every @Schema decoration
Stale rows in the ~lineage table caused spurious "different lineages" errors during populate() on FK-inherited primary keys. The root cause: when a table was already declared, @Schema decoration skipped Table.declare() entirely (schemas.py:303), so _populate_lineage never ran. Rows from an earlier DataJoint version or a partial declare would persist across upgrades and never get rewritten. Fix: 1. Add Table._refresh_lineage(context) — parses the current definition via the existing declare() machinery to obtain primary_key and fk_attribute_map, then calls _populate_lineage() (which does delete-then-insert idempotently). Errors are logged and swallowed — a stale row is preferable to a failed schema activation. 2. schemas.py:_decorate_table now calls _refresh_lineage when the table is already declared (and create_tables=True). Production deployments (create_tables=False) keep their previous zero-write behavior; they can use schema.rebuild_lineage() manually if needed. 3. Improve the error message in condition.assert_join_compatibility: when one side's lineage is None, surface a tailored hint pointing at rebuild_lineage() instead of the generic "different lineages" message. The original message stands when both lineages are present but differ (the legitimate semantic-mismatch case). Tests added in tests/integration/test_semantic_matching.py: - test_redecorate_overwrites_stale_lineage: inject a bogus lineage row, re-decorate, assert it's overwritten with the correct value. - test_redecorate_restores_missing_lineage: delete a table's rows, re-decorate, assert they're recreated. - test_missing_lineage_error_points_to_rebuild: force a None lineage and assert the join error mentions rebuild_lineage(). Slated for DataJoint 2.3.
1 parent 6077074 commit 86f23f2

4 files changed

Lines changed: 126 additions & 0 deletions

File tree

src/datajoint/condition.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,20 @@ def assert_join_compatibility(
268268
lineage2 = expr2.heading[name].lineage
269269
# Semantic match requires both lineages to be non-None and equal
270270
if lineage1 is None or lineage2 is None or lineage1 != lineage2:
271+
if lineage1 is None or lineage2 is None:
272+
# Missing lineage usually means stale ~lineage rows that survived
273+
# an upgrade or a partial declare. Decoration in 2.3+ refreshes
274+
# lineage automatically, so this typically indicates a schema
275+
# that has not been re-decorated since the upgrade.
276+
raise DataJointError(
277+
f"Cannot join on attribute `{name}`: lineage missing on "
278+
f"one side ({lineage1} vs {lineage2}). This usually "
279+
f"indicates a stale `~lineage` entry from an older "
280+
f"DataJoint version or an incomplete declare. Run "
281+
f"`schema.rebuild_lineage()` to recompute lineage from "
282+
f"current FK definitions. If the lineages are genuinely "
283+
f"different, use `.proj()` to rename one of the attributes."
284+
)
271285
raise DataJointError(
272286
f"Cannot join on attribute `{name}`: "
273287
f"different lineages ({lineage1} vs {lineage2}). "

src/datajoint/schemas.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,11 @@ def _decorate_table(self, table_class: type, context: dict[str, Any], assert_dec
303303
if not is_declared and not assert_declared and create_tables:
304304
instance.declare(context)
305305
self.connection.dependencies.clear()
306+
elif is_declared and create_tables:
307+
# Table already exists — declare() didn't run, so refresh ~lineage
308+
# idempotently to overwrite any stale rows from older DataJoint
309+
# versions or partial declares. See #1454.
310+
instance._refresh_lineage(context)
306311
is_declared = is_declared or instance.is_declared
307312

308313
# add table definition to the doc string

src/datajoint/table.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,43 @@ def _populate_lineage(self, primary_key, fk_attribute_map):
262262
if entries:
263263
insert_lineages(self.connection, self.database, entries)
264264

265+
def _refresh_lineage(self, context=None):
266+
"""
267+
Re-derive ``~lineage`` rows from the current definition and overwrite them.
268+
269+
Called by ``@schema`` decoration on every pass — including when the table
270+
is already declared — so that stale rows from earlier DataJoint versions
271+
or partial declares do not survive a redeclare. The actual deletion +
272+
re-insertion happens in ``_populate_lineage``; this method just parses
273+
the definition to obtain ``primary_key`` and ``fk_attribute_map`` without
274+
executing any DDL.
275+
276+
Errors during refresh (e.g. missing write permission on ``~lineage``) are
277+
logged and swallowed; a stale row is preferable to a failed import.
278+
"""
279+
try:
280+
(
281+
_,
282+
_,
283+
primary_key,
284+
fk_attribute_map,
285+
_,
286+
_,
287+
) = declare(
288+
self.full_table_name,
289+
self.definition,
290+
context,
291+
self.connection.adapter,
292+
config=self.connection._config,
293+
)
294+
self._populate_lineage(primary_key, fk_attribute_map)
295+
except Exception as exc: # noqa: BLE001 — defensive; see docstring
296+
logger.warning(
297+
f"Could not refresh lineage for {self.full_table_name}: {exc}. "
298+
"If you encounter `different lineages` errors, run "
299+
"`schema.rebuild_lineage()` to rebuild from current FK definitions."
300+
)
301+
265302
def alter(self, prompt=True, context=None):
266303
"""
267304
Alter the table definition from self.definition

tests/integration/test_semantic_matching.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,3 +340,73 @@ def test_rebuild_lineage_populates_table(self, schema_semantic):
340340
# Check that lineages were populated for Student table
341341
lineages = get_table_lineages(schema_semantic.connection, schema_semantic.database, "student")
342342
assert "student_id" in lineages
343+
344+
345+
class TestLineageRefreshOnDecoration:
346+
"""Tests for #1454: @schema decoration refreshes stale ~lineage entries."""
347+
348+
def test_redecorate_overwrites_stale_lineage(self, schema_semantic):
349+
"""
350+
Manually corrupt a ~lineage row, then re-decorate via @schema — the row
351+
should be overwritten with the correct lineage derived from current FKs.
352+
"""
353+
from datajoint.lineage import get_lineage, delete_table_lineages, insert_lineages
354+
355+
# Sanity: enrollment.student_id currently traces to Student
356+
correct_lineage = get_lineage(schema_semantic.connection, schema_semantic.database, "enrollment", "student_id")
357+
assert correct_lineage is not None and "student" in correct_lineage.lower()
358+
359+
# Corrupt: replace with a stale value pointing somewhere bogus
360+
delete_table_lineages(schema_semantic.connection, schema_semantic.database, "enrollment")
361+
insert_lineages(
362+
schema_semantic.connection,
363+
schema_semantic.database,
364+
[("enrollment", "student_id", "stale_schema.stale_table.stale_attr")],
365+
)
366+
stale = get_lineage(schema_semantic.connection, schema_semantic.database, "enrollment", "student_id")
367+
assert stale == "stale_schema.stale_table.stale_attr"
368+
369+
# Re-decorate via @schema — should refresh lineage
370+
schema_semantic(Enrollment)
371+
372+
refreshed = get_lineage(schema_semantic.connection, schema_semantic.database, "enrollment", "student_id")
373+
assert refreshed == correct_lineage, f"Expected lineage to be refreshed to {correct_lineage!r}, got {refreshed!r}"
374+
375+
def test_redecorate_restores_missing_lineage(self, schema_semantic):
376+
"""
377+
Delete a table's ~lineage rows entirely, then re-decorate — rows are recreated.
378+
"""
379+
from datajoint.lineage import get_lineage, delete_table_lineages
380+
381+
delete_table_lineages(schema_semantic.connection, schema_semantic.database, "trial")
382+
assert get_lineage(schema_semantic.connection, schema_semantic.database, "trial", "session_id") is None
383+
384+
schema_semantic(Trial)
385+
386+
refreshed = get_lineage(schema_semantic.connection, schema_semantic.database, "trial", "session_id")
387+
assert refreshed is not None and "session" in refreshed.lower()
388+
389+
def test_missing_lineage_error_points_to_rebuild(self, schema_semantic):
390+
"""
391+
When a join fails because one side has None lineage, the error must
392+
point the user at `schema.rebuild_lineage()`.
393+
"""
394+
from datajoint.lineage import delete_table_lineages
395+
from datajoint.heading import Heading
396+
397+
# Wipe enrollment.student_id lineage by deleting the row, then force the
398+
# class-level heading to reload from DB so it reflects the missing row.
399+
delete_table_lineages(schema_semantic.connection, schema_semantic.database, "enrollment")
400+
old_heading = Enrollment._heading
401+
Enrollment._heading = Heading(table_info=old_heading.table_info)
402+
try:
403+
assert Enrollment().heading["student_id"].lineage is None
404+
405+
with pytest.raises(DataJointError) as exc_info:
406+
Student() * Enrollment()
407+
assert "rebuild_lineage" in str(exc_info.value), f"Error must mention rebuild_lineage(); got: {exc_info.value}"
408+
assert "stale" in str(exc_info.value).lower() or "missing" in str(exc_info.value).lower()
409+
finally:
410+
# Restore lineage so subsequent tests see clean state
411+
schema_semantic.rebuild_lineage()
412+
Enrollment._heading = Heading(table_info=old_heading.table_info)

0 commit comments

Comments
 (0)