Skip to content

Commit edbfa90

Browse files
fix(codegen): lineage table CHECK refuses self-references (#78)
Closes #42. `verisimdb_lineage_graph` had no constraint preventing `(source_entity, source_table) == (target_entity, target_table)`. Any row inserted with matching source/target tuples is a self-loop and falsifies the README's "DAG" framing at the structural level. Add a CHECK constraint: CHECK (source_entity <> target_entity OR source_table <> target_table) i.e. at least one component of the (entity, table) pair must differ. Two entities with the same id across different tables (a legitimate cross-table derivation) still pass. Multi-hop cycle prevention is a runtime concern (any number of CHECKs can't catch chains); kept as a separate follow-up (V-L2-I2). Test `test_lineage_table_has_self_reference_check` asserts the exact CHECK clause appears in the emitted DDL with lineage enabled. `cargo clippy --all-targets -- -D warnings` clean; 35 unit tests pass. Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 0c9b766 commit edbfa90

1 file changed

Lines changed: 29 additions & 1 deletion

File tree

src/codegen/overlay.rs

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,10 @@ fn generate_provenance_table() -> String {
139139
/// Together, these edges form a DAG that can be traversed to answer
140140
/// "where did this data come from?" and "what is affected if this changes?"
141141
fn generate_lineage_table() -> String {
142+
// The CHECK constraint refuses edges whose source and target are the
143+
// same (entity, table) pair — i.e. self-loops, which would falsify
144+
// the README's "DAG" claim at the structural level. Closes #42.
145+
// (Multi-hop cycle prevention is a runtime concern tracked separately.)
142146
"-- Lineage: data derivation DAG\n\
143147
CREATE TABLE IF NOT EXISTS verisimdb_lineage_graph (\n\
144148
\x20 edge_id TEXT PRIMARY KEY,\n\
@@ -148,7 +152,8 @@ fn generate_lineage_table() -> String {
148152
\x20 target_table TEXT NOT NULL,\n\
149153
\x20 derivation_type TEXT NOT NULL, -- copy, transform, aggregate, join, filter\n\
150154
\x20 description TEXT,\n\
151-
\x20 created_at TEXT NOT NULL -- ISO 8601\n\
155+
\x20 created_at TEXT NOT NULL, -- ISO 8601\n\
156+
\x20 CHECK (source_entity <> target_entity OR source_table <> target_table)\n\
152157
);\n\
153158
CREATE INDEX IF NOT EXISTS idx_lineage_source ON verisimdb_lineage_graph(source_entity);\n\
154159
CREATE INDEX IF NOT EXISTS idx_lineage_target ON verisimdb_lineage_graph(target_entity);\n\n"
@@ -280,6 +285,29 @@ mod tests {
280285
assert!(ddl.contains("verisimdb_simulation_branches"));
281286
}
282287

288+
/// Lineage edges must refuse self-loops at the storage layer
289+
/// (closes #42). The DAG claim in the README would be unenforced
290+
/// without this check.
291+
#[test]
292+
fn test_lineage_table_has_self_reference_check() {
293+
let schema = test_schema();
294+
let octad = OctadConfig {
295+
enable_provenance: false,
296+
enable_lineage: true,
297+
enable_temporal: false,
298+
enable_access_control: false,
299+
enable_constraints: false,
300+
enable_simulation: false,
301+
};
302+
let ddl = generate_sidecar_schema(&schema, &octad);
303+
assert!(ddl.contains("verisimdb_lineage_graph"));
304+
// The exact CHECK clause must be present in the emitted DDL.
305+
assert!(
306+
ddl.contains("CHECK (source_entity <> target_entity OR source_table <> target_table)"),
307+
"lineage table is missing the self-reference CHECK constraint"
308+
);
309+
}
310+
283311
#[test]
284312
fn test_generate_minimal_dimensions() {
285313
let schema = test_schema();

0 commit comments

Comments
 (0)