diff --git a/.gitignore b/.gitignore index 5f98457..d060955 100644 --- a/.gitignore +++ b/.gitignore @@ -97,3 +97,4 @@ docs/api/ *_output.txt POST.md site/ +core.* diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a49294e..9ad3a29 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -5,17 +5,17 @@ Contributions are always welcome and appreciated. ### How to Contribute -Please check the [issue tracker](https://github.com/habedi/template-zig-project/issues) to see if there is an issue you +Please check the [issue tracker](https://github.com/CogitatorTech/zodd/issues) to see if there is an issue you would like to work on or if it has already been resolved. #### Reporting Bugs -1. Open an issue on the [issue tracker](https://github.com/habedi/template-zig-project/issues). +1. Open an issue on the [issue tracker](https://github.com/CogitatorTech/zodd/issues). 2. Include information such as steps to reproduce the observed behavior and relevant logs or screenshots. #### Suggesting Features -1. Open an issue on the [issue tracker](https://github.com/habedi/template-zig-project/issues). +1. Open an issue on the [issue tracker](https://github.com/CogitatorTech/zodd/issues). 2. Provide details about the feature, its purpose, and potential implementation ideas. ### Submitting Pull Requests diff --git a/README.md b/README.md index 4d1f6a3..12fb015 100644 --- a/README.md +++ b/README.md @@ -23,11 +23,14 @@ Zodd is a small [Datalog](https://en.wikipedia.org/wiki/Datalog) engine written ### What is Datalog? -Datalog is a declarative logic programming language that is used in deductive databases. -It is a subset of [Prolog](https://en.wikipedia.org/wiki/Prolog) programming language and allows you to define things like facts and rules -and then query those facts and rules to derive new information. +Datalog is a declarative logic programming language for deductive databases. +In contrast to SQL, which needs explicit joins and subqueries, Datalog lets you express recursive relationships naturally. +Instead of defining a schema and queries in a relational database, +you define a set of facts (base data) and rules (logical implications), and a Datalog engine automatically computes all derivable conclusions +iteratively. -Below is a simple Datalog code-snippet that defines a graph and computes the transitive closure of that graph: +Below is a Datalog program that defines a directed graph and computes its transitive closure. +The [Simple Example](#simple-example) section shows how to implement this using Zodd in Zig. ```prolog % Facts: a graph (with four nodes and three edges) @@ -36,21 +39,43 @@ edge(2, 3). edge(3, 4). % Rule: transitive closure of the graph -% A transitive closure of a graph is a relation (a set of nodes) that contains all pairs -% of nodes that are reachable from each other. +% The transitive closure is the set of all node pairs (X, Y) where node Y is +% reachable from node X through one or more directed edges. reachable(X, Y) :- edge(X, Y). reachable(X, Z) :- reachable(X, Y), edge(Y, Z). -``` -Example applications of Datalog include: +% Query: find all pairs of nodes that are reachable from each other +?- reachable(X, Y). + +%% Output: +% X = 1, Y = 2 +% X = 1, Y = 3 +% X = 1, Y = 4 +% X = 2, Y = 3 +% X = 2, Y = 4 +% X = 3, Y = 4 +``` -- Knowledge graphs and semantic reasoning -- Program analysis (like static analysis of code) -- Access control and authorization policies +Datalog is used in many application domains, especially when recursive querying over structured data is needed. +For example: + +- Security and access control + - Role-based authorization with hierarchical permission inheritance and explicit denials + - Network reachability analysis through routing policies and firewall rules + - Taint analysis to trace untrusted data through program flows and detect vulnerabilities +- Data governance and compliance + - Data lineage tracking through ETL pipelines for GDPR and CCPA compliance + - PII propagation analysis with anonymization checkpoints +- Healthcare and life sciences + - Medical ontology reasoning with type hierarchies and property inheritance + - Drug-disease relationship inference and side effect prediction +- Software engineering + - Dependency resolution with transitive closure and cycle detection + - Points-to analysis and other static analyses over program representations ### Why Zodd? -- Written in pure Zig with a simple API and no external dependencies +- Written in pure Zig with a simple API - Supports a subset of relational algebra with sorted, deduplicated relations - Supports fast incremental rule computation - Supports multi-way joins and anti-join operations @@ -95,7 +120,7 @@ pub fn build(b: *std.Build) void { } ``` -#### A Simple Example +#### Simple Example Finally, you can `@import("zodd")` and start using it in your Zig project. @@ -107,11 +132,13 @@ pub fn main() !void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; defer _ = gpa.deinit(); const allocator = gpa.allocator(); + var ctx = try zodd.ExecutionContext.initWithThreads(allocator, 4); + defer ctx.deinit(); const Edge = struct { u32, u32 }; // Create base relation: edges in a graph - var edges = try zodd.Relation(Edge).fromSlice(allocator, &[_]Edge{ + var edges = try zodd.Relation(Edge).fromSlice(&ctx, &[_]Edge{ .{ 1, 2 }, .{ 2, 3 }, .{ 3, 4 }, @@ -119,11 +146,11 @@ pub fn main() !void { defer edges.deinit(); // Create variable for reachability (transitive closure) - var reachable = zodd.Variable(Edge).init(allocator); + var reachable = zodd.Variable(Edge).init(&ctx); defer reachable.deinit(); // Initialize with base edges - try reachable.insertSlice(edges.elements); + try reachable.insertSlice(&ctx, edges.elements); // Fixed-point iteration: reachable(X,Z) :- reachable(X,Y), edge(Y,Z) while (try reachable.changed()) { @@ -139,7 +166,7 @@ pub fn main() !void { } if (new_tuples.items.len > 0) { - const rel = try zodd.Relation(Edge).fromSlice(allocator, new_tuples.items); + const rel = try zodd.Relation(Edge).fromSlice(&ctx, new_tuples.items); try reachable.insert(rel); } } @@ -159,8 +186,7 @@ pub fn main() !void { You can find the API documentation for the latest release of Zodd [here](https://CogitatorTech.github.io/zodd/#zodd.lib). Alternatively, you can use the `make docs` command to generate the documentation for the current version of Zodd. -This will generate HTML documentation in the `docs/api` directory, which you can serve locally with `make serve-docs` -and view in a web browser. +This will generate HTML documentation in the `docs/api` directory, which you can serve locally with `make docs-serve` and view in a web browser. ### Examples @@ -180,5 +206,5 @@ Zodd is licensed under the MIT License (see [LICENSE](LICENSE)). * The logo is from [SVG Repo](https://www.svgrepo.com/svg/469003/gravity) with some modifications. * This project uses the [Minish](https://github.com/CogitatorTech/minish) framework for property-based testing and - the [Ordered](https://github.com/CogitatorTech/minish) Zig library. + the [Ordered](https://github.com/CogitatorTech/ordered) library for B-tree indices. * Zodd is inspired and modeled after the [Datafrog](https://github.com/frankmcsherry/blog/blob/master/posts/2018-05-19.md) Datalog engine for Rust. diff --git a/ROADMAP.md b/ROADMAP.md index 6efac67..07b3a6f 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -16,19 +16,18 @@ This document outlines the features implemented in Zodd and the future goals for - [x] `Leaper` - Treefrog Leapjoin interface - [x] `ExtendWith` - propose values from a relation - [x] `FilterAnti` - negation (filter out matching tuples) -- [x] `ExtendAnti` - set difference (propose non-matching values) +- [x] `ExtendAnti` - anti-join (filter to keep non-matching values) ### Extra Features -- [x] Stratified negation +- [x] Negation primitives (anti-join and anti-extend) - [x] Aggregations - [x] Recursion limits - [x] Persistence - [x] Secondary indices - [x] Incremental maintenance -- [ ] Parallel execution +- [x] Parallel execution - [ ] CLI interface -- [ ] WASM support - [ ] Streaming input - [ ] Rule DSL - [ ] Query planner diff --git a/build.zig.zon b/build.zig.zon index 3162d71..04dfa27 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -1,6 +1,6 @@ .{ .name = .zodd, - .version = "0.1.0-alpha.1", + .version = "0.1.0-alpha.2", .fingerprint = 0x2d03181bdd24914c, // Changing this has security and trust implications. .minimum_zig_version = "0.15.2", .dependencies = .{ diff --git a/examples/README.md b/examples/README.md index 2b80987..9a453db 100644 --- a/examples/README.md +++ b/examples/README.md @@ -2,11 +2,14 @@ #### List of Examples -| # | File | Description | -|---|--------------------------------------------------------|--------------------------------------------------------------------------| -| 1 | [e1_transitive_closure.zig](e1_transitive_closure.zig) | Computes the transitive closure of a directed graph using Datalog rules. | -| 2 | [e2_same_generation.zig](e2_same_generation.zig) | Finds all pairs of nodes at the same depth in a hierarchy. | -| 3 | [e3_points_to_analysis.zig](e3_points_to_analysis.zig) | Performs points-to analysis for a program. | +| # | File | Description | +|---|--------------------------------------------------------------|----------------------------------------------------------------------------| +| 1 | [e1_network_reachability.zig](e1_network_reachability.zig) | Network zone reachability through routing and firewall rule analysis. | +| 2 | [e2_knowledge_graph.zig](e2_knowledge_graph.zig) | Medical ontology reasoning with type hierarchy and drug-disease inference. | +| 3 | [e3_data_lineage.zig](e3_data_lineage.zig) | Data lineage tracking for GDPR compliance with PII propagation. | +| 4 | [e4_rbac_authorization.zig](e4_rbac_authorization.zig) | RBAC authorization with role hierarchy, joins, and denial filtering. | +| 5 | [e5_taint_analysis.zig](e5_taint_analysis.zig) | Security taint analysis using leapfrog trie join for taint propagation. | +| 6 | [e6_dependency_resolution.zig](e6_dependency_resolution.zig) | Package dependency resolution with aggregation and reverse-dep index. | #### Running Examples @@ -19,7 +22,10 @@ zig build run-{FILE_NAME_WITHOUT_EXTENSION} For example: ```sh -zig build run-e1_transitive_closure -zig build run-e2_same_generation -zig build run-e3_points_to_analysis +zig build run-e1_network_reachability +zig build run-e2_knowledge_graph +zig build run-e3_data_lineage +zig build run-e4_rbac_authorization +zig build run-e5_taint_analysis +zig build run-e6_dependency_resolution ``` diff --git a/examples/e1_network_reachability.zig b/examples/e1_network_reachability.zig new file mode 100644 index 0000000..da644ba --- /dev/null +++ b/examples/e1_network_reachability.zig @@ -0,0 +1,206 @@ +const std = @import("std"); +const zodd = @import("zodd"); + +// Network Reachability Analysis +// +// Determines which network zones can communicate through routing policies and +// firewall rules. A common task in enterprise security auditing to identify +// unintended exposure paths. For example, verifying that the internet cannot +// reach the database tier, or that PCI zones are properly isolated. +// +// Datalog rules: +// reachable(A, B) :- link(A, B). +// reachable(A, C) :- reachable(A, B), link(B, C). +// allowed(A, B) :- reachable(A, B), NOT blocked(A, B). +// exposure(Z) :- allowed(internet, Z). + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + const allocator = gpa.allocator(); + var ctx = zodd.ExecutionContext.init(allocator); + + std.debug.print("Zodd Datalog Engine - Network Reachability Analysis\n", .{}); + std.debug.print("===================================================\n\n", .{}); + + // Network topology: + // + // internet(1) --> dmz(2) --> app_tier(3) --> db_tier(4) + // | | + // v v + // monitoring(5) <------------- logging(6) + // | + // v + // pci_zone(7) + // + // Zone IDs: + // internet=1, dmz=2, app_tier=3, db_tier=4, + // monitoring=5, logging=6, pci_zone=7 + + const Pair = struct { u32, u32 }; + + // Network links (directional routing rules) + const link_data = [_]Pair{ + .{ 1, 2 }, // internet -> dmz + .{ 2, 3 }, // dmz -> app_tier + .{ 3, 4 }, // app_tier -> db_tier + .{ 2, 5 }, // dmz -> monitoring + .{ 4, 6 }, // db_tier -> logging + .{ 6, 5 }, // logging -> monitoring + .{ 5, 7 }, // monitoring -> pci_zone + }; + + // Firewall deny rules: blocked(src_zone, dst_zone) + const blocked_data = [_]Pair{ + .{ 1, 3 }, // block internet -> app_tier (must go through dmz) + .{ 1, 4 }, // block internet -> db_tier + .{ 1, 7 }, // block internet -> pci_zone + .{ 2, 4 }, // block dmz -> db_tier (must go through app_tier) + .{ 2, 7 }, // block dmz -> pci_zone + .{ 3, 7 }, // block app_tier -> pci_zone + }; + + const zoneName = struct { + fn get(id: u32) []const u8 { + return switch (id) { + 1 => "internet", + 2 => "dmz", + 3 => "app_tier", + 4 => "db_tier", + 5 => "monitoring", + 6 => "logging", + 7 => "pci_zone", + else => "unknown", + }; + } + }.get; + + std.debug.print("Network links:\n", .{}); + for (link_data) |l| { + std.debug.print(" {s} -> {s}\n", .{ zoneName(l[0]), zoneName(l[1]) }); + } + + std.debug.print("\nFirewall deny rules:\n", .{}); + for (blocked_data) |b| { + std.debug.print(" DENY {s} -> {s}\n", .{ zoneName(b[0]), zoneName(b[1]) }); + } + + // -- Build relations -- + + var links = try zodd.Relation(Pair).fromSlice(&ctx, &link_data); + defer links.deinit(); + + var blocked = try zodd.Relation(Pair).fromSlice(&ctx, &blocked_data); + defer blocked.deinit(); + + // -- Step 1: Compute reachable zones (transitive routing) -- + // reachable(A, B) :- link(A, B). + // reachable(A, C) :- reachable(A, B), link(B, C). + + var reachable = zodd.Variable(Pair).init(&ctx); + defer reachable.deinit(); + + try reachable.insertSlice(&ctx, links.elements); + + std.debug.print("\nComputing transitive reachability...\n", .{}); + + const PairList = std.ArrayListUnmanaged(Pair); + var iteration: usize = 0; + while (try reachable.changed()) : (iteration += 1) { + var results = PairList{}; + defer results.deinit(allocator); + + for (reachable.recent.elements) |r| { + for (links.elements) |l| { + if (l[0] == r[1]) { + try results.append(allocator, .{ r[0], l[1] }); + } + } + } + + if (results.items.len > 0) { + const rel = try zodd.Relation(Pair).fromSlice(&ctx, results.items); + try reachable.insert(rel); + } + + if (iteration > 50) break; + } + + var reach_result = try reachable.complete(); + defer reach_result.deinit(); + + std.debug.print("\nAll reachable zone pairs (via routing):\n", .{}); + for (reach_result.elements) |r| { + std.debug.print(" {s} -> {s}\n", .{ zoneName(r[0]), zoneName(r[1]) }); + } + + // -- Step 2: Apply firewall rules (anti-join) -- + // allowed(A, B) :- reachable(A, B), NOT blocked(A, B). + + std.debug.print("\nApplying firewall rules...\n", .{}); + + var allowed = PairList{}; + defer allowed.deinit(allocator); + + for (reach_result.elements) |r| { + var is_blocked = false; + for (blocked.elements) |b| { + if (b[0] == r[0] and b[1] == r[1]) { + is_blocked = true; + break; + } + } + if (is_blocked) { + std.debug.print(" BLOCKED: {s} -> {s}\n", .{ zoneName(r[0]), zoneName(r[1]) }); + } else { + try allowed.append(allocator, r); + } + } + + std.debug.print("\nAllowed communication paths:\n", .{}); + for (allowed.items) |a| { + std.debug.print(" {s} -> {s}\n", .{ zoneName(a[0]), zoneName(a[1]) }); + } + + // -- Step 3: Identify internet-exposed zones -- + // exposure(Z) :- allowed(internet, Z). + + std.debug.print("\nInternet-exposed zones:\n", .{}); + var exposure_count: usize = 0; + for (allowed.items) |a| { + if (a[0] == 1) { // internet + std.debug.print(" {s} is reachable from the internet\n", .{zoneName(a[1])}); + exposure_count += 1; + } + } + if (exposure_count == 0) { + std.debug.print(" (none)\n", .{}); + } + + // -- Step 4: Security audit summary -- + + std.debug.print("\nSecurity audit:\n", .{}); + const critical_zones = [_]u32{ 4, 7 }; // db_tier, pci_zone + const critical_names = [_][]const u8{ "db_tier", "pci_zone" }; + + for (critical_zones, 0..) |zone, idx| { + var exposed = false; + for (allowed.items) |a| { + if (a[0] == 1 and a[1] == zone) { + exposed = true; + break; + } + } + if (exposed) { + std.debug.print(" WARNING: {s} is exposed to the internet!\n", .{critical_names[idx]}); + } else { + std.debug.print(" OK: {s} is not reachable from the internet\n", .{critical_names[idx]}); + } + } + + std.debug.print("\nTotal: {} reachable pairs, {} allowed after firewall, {} internet-exposed\n", .{ + reach_result.len(), + allowed.items.len, + exposure_count, + }); +} diff --git a/examples/e1_transitive_closure.zig b/examples/e1_transitive_closure.zig deleted file mode 100644 index 32d375a..0000000 --- a/examples/e1_transitive_closure.zig +++ /dev/null @@ -1,78 +0,0 @@ -const std = @import("std"); -const zodd = @import("zodd"); - -pub fn main() !void { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; - defer _ = gpa.deinit(); - const allocator = gpa.allocator(); - - std.debug.print("Zodd Datalog Engine - Transitive Closure Example\n", .{}); - std.debug.print("================================================\n\n", .{}); - - const Edge = struct { u32, u32 }; - const edges_data = [_]Edge{ - .{ 1, 2 }, - .{ 2, 3 }, - .{ 3, 4 }, - .{ 1, 3 }, - .{ 4, 5 }, - }; - - std.debug.print("Input edges:\n", .{}); - for (edges_data) |e| { - std.debug.print(" {} -> {}\n", .{ e[0], e[1] }); - } - std.debug.print("\n", .{}); - - var edges = try zodd.Relation(Edge).fromSlice(allocator, &edges_data); - defer edges.deinit(); - - var reachable = zodd.Variable(Edge).init(allocator); - defer reachable.deinit(); - - try reachable.insertSlice(edges.elements); - - std.debug.print("Computing transitive closure...\n", .{}); - - const ResultList = std.ArrayListUnmanaged(Edge); - var iteration: usize = 0; - while (try reachable.changed()) : (iteration += 1) { - std.debug.print(" Iteration {}: {} recent tuples\n", .{ iteration, reachable.recent.len() }); - - var results = ResultList{}; - defer results.deinit(allocator); - - for (reachable.recent.elements) |r| { - const x = r[0]; - const y = r[1]; - - for (edges.elements) |e| { - if (e[0] == y) { - try results.append(allocator, .{ x, e[1] }); - } - } - } - - if (results.items.len > 0) { - const rel = try zodd.Relation(Edge).fromSlice(allocator, results.items); - try reachable.insert(rel); - } - - if (iteration > 100) { - std.debug.print(" (reached iteration limit)\n", .{}); - break; - } - } - - std.debug.print("\n", .{}); - - var result = try reachable.complete(); - defer result.deinit(); - - std.debug.print("Reachability (transitive closure):\n", .{}); - for (result.elements) |r| { - std.debug.print(" {} can reach {}\n", .{ r[0], r[1] }); - } - - std.debug.print("\nTotal: {} reachable pairs\n", .{result.len()}); -} diff --git a/examples/e2_knowledge_graph.zig b/examples/e2_knowledge_graph.zig new file mode 100644 index 0000000..d955d8a --- /dev/null +++ b/examples/e2_knowledge_graph.zig @@ -0,0 +1,287 @@ +const std = @import("std"); +const zodd = @import("zodd"); + +// Knowledge Graph Reasoning (Medical Ontology) +// +// Infers new biomedical facts from a medical ontology through type hierarchy +// and property inheritance. This is a common pattern in healthcare, pharma, +// and biotech for drug repurposing, adverse effect prediction, and clinical +// decision support. +// +// Datalog rules: +// is_a(X, Z) :- is_a(X, Y), is_a(Y, Z). +// has_symptom(D, S) :- is_a(D, D2), has_symptom(D2, S). +// treats(Drug, D) :- targets(Drug, P), associated_with(P, D). +// side_effect(Drug, S):- treats(Drug, D), has_symptom(D, S). + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + const allocator = gpa.allocator(); + var ctx = zodd.ExecutionContext.init(allocator); + + std.debug.print("Zodd Datalog Engine - Knowledge Graph Reasoning\n", .{}); + std.debug.print("================================================\n\n", .{}); + + // Ontology IDs: + // Diseases: cardiovascular=10, heart_disease=11, arrhythmia=12, + // hypertension=13, respiratory=20, asthma=21 + // Symptoms: chest_pain=30, shortness_of_breath=31, fatigue=32, + // irregular_heartbeat=33, wheezing=34, high_bp=35 + // Proteins: ace_enzyme=40, beta_receptor=41, calcium_channel=42 + // Drugs: lisinopril=50, metoprolol=51, amlodipine=52 + + const Pair = struct { u32, u32 }; + + // Type hierarchy: is_a(subtype, supertype) + const is_a_data = [_]Pair{ + .{ 11, 10 }, // heart_disease is_a cardiovascular + .{ 12, 11 }, // arrhythmia is_a heart_disease + .{ 13, 10 }, // hypertension is_a cardiovascular + .{ 21, 20 }, // asthma is_a respiratory + }; + + // Direct symptom associations: has_symptom(disease, symptom) + const symptom_data = [_]Pair{ + .{ 10, 32 }, // cardiovascular -> fatigue + .{ 11, 30 }, // heart_disease -> chest_pain + .{ 11, 31 }, // heart_disease -> shortness_of_breath + .{ 12, 33 }, // arrhythmia -> irregular_heartbeat + .{ 13, 35 }, // hypertension -> high_bp + .{ 20, 31 }, // respiratory -> shortness_of_breath + .{ 21, 34 }, // asthma -> wheezing + }; + + // Drug-protein targeting: targets(drug, protein) + const targets_data = [_]Pair{ + .{ 50, 40 }, // lisinopril targets ace_enzyme + .{ 51, 41 }, // metoprolol targets beta_receptor + .{ 52, 42 }, // amlodipine targets calcium_channel + }; + + // Protein-disease associations: associated_with(protein, disease) + const assoc_data = [_]Pair{ + .{ 40, 13 }, // ace_enzyme associated_with hypertension + .{ 40, 11 }, // ace_enzyme associated_with heart_disease + .{ 41, 12 }, // beta_receptor associated_with arrhythmia + .{ 41, 11 }, // beta_receptor associated_with heart_disease + .{ 42, 13 }, // calcium_channel associated_with hypertension + .{ 42, 12 }, // calcium_channel associated_with arrhythmia + }; + + const entityName = struct { + fn get(id: u32) []const u8 { + return switch (id) { + 10 => "cardiovascular", + 11 => "heart_disease", + 12 => "arrhythmia", + 13 => "hypertension", + 20 => "respiratory", + 21 => "asthma", + 30 => "chest_pain", + 31 => "shortness_of_breath", + 32 => "fatigue", + 33 => "irregular_heartbeat", + 34 => "wheezing", + 35 => "high_bp", + 40 => "ace_enzyme", + 41 => "beta_receptor", + 42 => "calcium_channel", + 50 => "lisinopril", + 51 => "metoprolol", + 52 => "amlodipine", + else => "unknown", + }; + } + }.get; + + std.debug.print("Type hierarchy:\n", .{}); + for (is_a_data) |r| { + std.debug.print(" {s} is_a {s}\n", .{ entityName(r[0]), entityName(r[1]) }); + } + + std.debug.print("\nDirect symptom associations:\n", .{}); + for (symptom_data) |s| { + std.debug.print(" {s} has_symptom {s}\n", .{ entityName(s[0]), entityName(s[1]) }); + } + + std.debug.print("\nDrug targets:\n", .{}); + for (targets_data) |t| { + std.debug.print(" {s} targets {s}\n", .{ entityName(t[0]), entityName(t[1]) }); + } + + std.debug.print("\nProtein-disease associations:\n", .{}); + for (assoc_data) |a| { + std.debug.print(" {s} associated_with {s}\n", .{ entityName(a[0]), entityName(a[1]) }); + } + + // -- Build relations -- + + var is_a_rel = try zodd.Relation(Pair).fromSlice(&ctx, &is_a_data); + defer is_a_rel.deinit(); + + var symptom_rel = try zodd.Relation(Pair).fromSlice(&ctx, &symptom_data); + defer symptom_rel.deinit(); + + var targets_rel = try zodd.Relation(Pair).fromSlice(&ctx, &targets_data); + defer targets_rel.deinit(); + + var assoc_rel = try zodd.Relation(Pair).fromSlice(&ctx, &assoc_data); + defer assoc_rel.deinit(); + + // -- Step 1: Compute transitive type hierarchy -- + // is_a(X, Z) :- is_a(X, Y), is_a(Y, Z). + + var is_a = zodd.Variable(Pair).init(&ctx); + defer is_a.deinit(); + try is_a.insertSlice(&ctx, is_a_rel.elements); + + std.debug.print("\nComputing transitive type hierarchy...\n", .{}); + + const PairList = std.ArrayListUnmanaged(Pair); + var iter: usize = 0; + while (try is_a.changed()) : (iter += 1) { + var results = PairList{}; + defer results.deinit(allocator); + + for (is_a.recent.elements) |r| { + for (is_a_rel.elements) |base| { + if (base[0] == r[1]) { + try results.append(allocator, .{ r[0], base[1] }); + } + } + } + + if (results.items.len > 0) { + const rel = try zodd.Relation(Pair).fromSlice(&ctx, results.items); + try is_a.insert(rel); + } + if (iter > 50) break; + } + + var is_a_result = try is_a.complete(); + defer is_a_result.deinit(); + + std.debug.print("\nFull type hierarchy (including inferred):\n", .{}); + for (is_a_result.elements) |r| { + std.debug.print(" {s} is_a {s}\n", .{ entityName(r[0]), entityName(r[1]) }); + } + + // -- Step 2: Inherit symptoms through type hierarchy -- + // has_symptom(D, S) :- is_a(D, D2), has_symptom(D2, S). + + var has_symptom = zodd.Variable(Pair).init(&ctx); + defer has_symptom.deinit(); + try has_symptom.insertSlice(&ctx, symptom_rel.elements); + + // For each is_a(D, D2), propagate symptoms from D2 to D + { + var inherited = PairList{}; + defer inherited.deinit(allocator); + + for (is_a_result.elements) |r| { + for (symptom_rel.elements) |s| { + if (s[0] == r[1]) { + try inherited.append(allocator, .{ r[0], s[1] }); + } + } + } + + // Also propagate through the full transitive hierarchy + // (symptoms inherited by parent are inherited by grandchild) + for (is_a_result.elements) |r| { + for (inherited.items) |s| { + if (s[0] == r[1]) { + try inherited.append(allocator, .{ r[0], s[1] }); + } + } + } + + if (inherited.items.len > 0) { + try has_symptom.insertSlice(&ctx, inherited.items); + } + } + _ = try has_symptom.changed(); + + var symptom_result = try has_symptom.complete(); + defer symptom_result.deinit(); + + std.debug.print("\nAll symptoms (direct + inherited):\n", .{}); + for (symptom_result.elements) |s| { + std.debug.print(" {s} has_symptom {s}\n", .{ entityName(s[0]), entityName(s[1]) }); + } + + // -- Step 3: Infer drug-disease relationships via joinInto -- + // treats(Drug, D) :- targets(Drug, P), associated_with(P, D). + // + // Join key = Protein. targets is (Drug, Protein), assoc is (Protein, Disease). + // Rekey targets as (Protein, Drug) to align the join key. + + var targets_by_protein = zodd.Variable(Pair).init(&ctx); + defer targets_by_protein.deinit(); + { + var flipped = PairList{}; + defer flipped.deinit(allocator); + for (targets_rel.elements) |t| { + try flipped.append(allocator, .{ t[1], t[0] }); // (Protein, Drug) + } + try targets_by_protein.insertSlice(&ctx, flipped.items); + _ = try targets_by_protein.changed(); + } + + var assoc_var = zodd.Variable(Pair).init(&ctx); + defer assoc_var.deinit(); + try assoc_var.insertSlice(&ctx, assoc_rel.elements); + _ = try assoc_var.changed(); + + const Triple = struct { u32, u32, u32 }; + var treats_triple = zodd.Variable(Triple).init(&ctx); + defer treats_triple.deinit(); + + // joinInto: key=Protein, val1=Drug, val2=Disease + try zodd.joinInto(u32, u32, u32, Triple, &ctx, &targets_by_protein, &assoc_var, &treats_triple, struct { + fn logic(_: *const u32, drug: *const u32, disease: *const u32) Triple { + return .{ drug.*, disease.*, 0 }; + } + }.logic); + + _ = try treats_triple.changed(); + + // Extract (Drug, Disease) pairs + var treats = PairList{}; + defer treats.deinit(allocator); + + for (treats_triple.recent.elements) |t| { + try treats.append(allocator, .{ t[0], t[1] }); + } + + std.debug.print("\nInferred drug-disease relationships:\n", .{}); + for (treats.items) |t| { + std.debug.print(" {s} treats {s}\n", .{ entityName(t[0]), entityName(t[1]) }); + } + + // -- Step 4: Predict potential side effects -- + // side_effect(Drug, S) :- treats(Drug, D), has_symptom(D, S). + + std.debug.print("\nPotential side effects (drug treats disease that has symptom):\n", .{}); + var se_count: usize = 0; + for (treats.items) |t| { + for (symptom_result.elements) |s| { + if (s[0] == t[1]) { + std.debug.print(" {s} -> {s} (via {s})\n", .{ + entityName(t[0]), + entityName(s[1]), + entityName(t[1]), + }); + se_count += 1; + } + } + } + + std.debug.print("\nSummary: {} type relations, {} symptom associations, {} drug-disease links, {} potential side effects\n", .{ + is_a_result.len(), + symptom_result.len(), + treats.items.len, + se_count, + }); +} diff --git a/examples/e2_same_generation.zig b/examples/e2_same_generation.zig deleted file mode 100644 index 52c43a2..0000000 --- a/examples/e2_same_generation.zig +++ /dev/null @@ -1,91 +0,0 @@ -const std = @import("std"); -const zodd = @import("zodd"); - -pub fn main() !void { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; - defer _ = gpa.deinit(); - const allocator = gpa.allocator(); - - std.debug.print("Zodd Datalog Engine - Same Generation Example\n", .{}); - std.debug.print("==============================================\n\n", .{}); - - const Pair = struct { u32, u32 }; - - const parent_data = [_]Pair{ - .{ 1, 2 }, - .{ 1, 3 }, - .{ 2, 4 }, - .{ 2, 5 }, - .{ 3, 6 }, - .{ 3, 7 }, - .{ 4, 8 }, - .{ 5, 9 }, - }; - - std.debug.print("Parent-Child relationships:\n", .{}); - for (parent_data) |p| { - std.debug.print(" {} is parent of {}\n", .{ p[0], p[1] }); - } - std.debug.print("\n", .{}); - - var parent = try zodd.Relation(Pair).fromSlice(allocator, &parent_data); - defer parent.deinit(); - - var same_gen = zodd.Variable(Pair).init(allocator); - defer same_gen.deinit(); - - var initial = [_]Pair{ - .{ 1, 1 }, .{ 2, 2 }, .{ 3, 3 }, .{ 4, 4 }, - .{ 5, 5 }, .{ 6, 6 }, .{ 7, 7 }, .{ 8, 8 }, - .{ 9, 9 }, - }; - try same_gen.insertSlice(&initial); - - std.debug.print("Computing same-generation relation...\n", .{}); - std.debug.print("Rule: same_gen(X,Y) :- same_gen(P1,P2), parent(P1,X), parent(P2,Y)\n\n", .{}); - - const ResultList = std.ArrayListUnmanaged(Pair); - var iteration: usize = 0; - while (try same_gen.changed()) : (iteration += 1) { - std.debug.print(" Iteration {}: {} recent tuples\n", .{ iteration, same_gen.recent.len() }); - - var results = ResultList{}; - defer results.deinit(allocator); - - for (same_gen.recent.elements) |sg| { - const p1 = sg[0]; - const p2 = sg[1]; - - for (parent.elements) |pc1| { - if (pc1[0] == p1) { - for (parent.elements) |pc2| { - if (pc2[0] == p2) { - try results.append(allocator, .{ pc1[1], pc2[1] }); - } - } - } - } - } - - if (results.items.len > 0) { - const rel = try zodd.Relation(Pair).fromSlice(allocator, results.items); - try same_gen.insert(rel); - } - - if (iteration > 50) break; - } - - std.debug.print("\n", .{}); - - var result = try same_gen.complete(); - defer result.deinit(); - - std.debug.print("Same-generation pairs:\n", .{}); - for (result.elements) |r| { - if (r[0] != r[1]) { - std.debug.print(" {} and {} are in the same generation\n", .{ r[0], r[1] }); - } - } - - std.debug.print("\nTotal: {} same-generation pairs (including reflexive)\n", .{result.len()}); -} diff --git a/examples/e3_data_lineage.zig b/examples/e3_data_lineage.zig new file mode 100644 index 0000000..73d9f90 --- /dev/null +++ b/examples/e3_data_lineage.zig @@ -0,0 +1,303 @@ +const std = @import("std"); +const zodd = @import("zodd"); + +// Data Lineage for GDPR/CCPA Compliance +// +// Tracks how sensitive data (PII) flows through ETL pipelines and data +// warehouse transformations. Identifies which downstream datasets contain +// PII, verifies that anonymization steps properly cleanse data, and flags +// compliance violations when PII appears in public-facing datasets. +// +// Datalog rules: +// contains_pii(D) :- source_pii(D). +// contains_pii(D2) :- contains_pii(D1), transform(D1, D2), +// NOT anonymizes(D1, D2). +// violation(D) :- contains_pii(D), public_dataset(D). + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + const allocator = gpa.allocator(); + var ctx = zodd.ExecutionContext.init(allocator); + + std.debug.print("Zodd Datalog Engine - Data Lineage for Compliance\n", .{}); + std.debug.print("=================================================\n\n", .{}); + + // Data pipeline: + // + // [raw_users] -----> [user_profiles] -----> [analytics_users] + // PII | + // v + // [raw_orders] ----> [order_details] -----> [sales_report] (PUBLIC) + // | + // v + // [anonymized_orders] ---> [public_dashboard] (PUBLIC) + // (anonymized) + // + // [raw_logs] ------> [enriched_logs] -----> [audit_trail] + // PII PII | + // v + // [log_summary] (PUBLIC) + // (anonymized) + + const Pair = struct { u32, u32 }; + const Scalar = struct { u32 }; + + // Dataset IDs: + // raw_users=1, user_profiles=2, analytics_users=3, + // raw_orders=4, order_details=5, sales_report=6, + // anonymized_orders=7, public_dashboard=8, + // raw_logs=9, enriched_logs=10, audit_trail=11, log_summary=12 + + // Datasets containing PII at the source level + const source_pii_data = [_]Scalar{ + .{1}, // raw_users + .{9}, // raw_logs (contains IP addresses, user agents) + }; + + // ETL transformations: transform(source, destination) + const transform_data = [_]Pair{ + .{ 1, 2 }, // raw_users -> user_profiles + .{ 2, 3 }, // user_profiles -> analytics_users + .{ 4, 5 }, // raw_orders -> order_details + .{ 3, 6 }, // analytics_users -> sales_report + .{ 5, 6 }, // order_details -> sales_report + .{ 5, 7 }, // order_details -> anonymized_orders + .{ 7, 8 }, // anonymized_orders -> public_dashboard + .{ 9, 10 }, // raw_logs -> enriched_logs + .{ 10, 11 }, // enriched_logs -> audit_trail + .{ 11, 12 }, // audit_trail -> log_summary + }; + + // Anonymization steps: these block PII propagation + const anonymize_data = [_]Pair{ + .{ 5, 7 }, // order_details -> anonymized_orders (PII stripped) + .{ 11, 12 }, // audit_trail -> log_summary (PII stripped) + }; + + // Public-facing datasets + const public_data = [_]Scalar{ + .{6}, // sales_report + .{8}, // public_dashboard + .{12}, // log_summary + }; + + const dsName = struct { + fn get(id: u32) []const u8 { + return switch (id) { + 1 => "raw_users", + 2 => "user_profiles", + 3 => "analytics_users", + 4 => "raw_orders", + 5 => "order_details", + 6 => "sales_report", + 7 => "anonymized_orders", + 8 => "public_dashboard", + 9 => "raw_logs", + 10 => "enriched_logs", + 11 => "audit_trail", + 12 => "log_summary", + else => "unknown", + }; + } + }.get; + + std.debug.print("PII sources:\n", .{}); + for (source_pii_data) |s| { + std.debug.print(" {s}\n", .{dsName(s[0])}); + } + + std.debug.print("\nETL transformations:\n", .{}); + for (transform_data) |t| { + var is_anon = false; + for (anonymize_data) |a| { + if (a[0] == t[0] and a[1] == t[1]) { + is_anon = true; + break; + } + } + if (is_anon) { + std.debug.print(" {s} -> {s} [ANONYMIZED]\n", .{ dsName(t[0]), dsName(t[1]) }); + } else { + std.debug.print(" {s} -> {s}\n", .{ dsName(t[0]), dsName(t[1]) }); + } + } + + std.debug.print("\nPublic-facing datasets:\n", .{}); + for (public_data) |p| { + std.debug.print(" {s}\n", .{dsName(p[0])}); + } + + // -- Build relations -- + + var transforms = try zodd.Relation(Pair).fromSlice(&ctx, &transform_data); + defer transforms.deinit(); + + var anonymizes = try zodd.Relation(Pair).fromSlice(&ctx, &anonymize_data); + defer anonymizes.deinit(); + + // -- Step 1: Propagate PII through the pipeline -- + // contains_pii(D) :- source_pii(D). + // contains_pii(D2) :- contains_pii(D1), transform(D1, D2), + // NOT anonymizes(D1, D2). + + var contains_pii = zodd.Variable(Scalar).init(&ctx); + defer contains_pii.deinit(); + try contains_pii.insertSlice(&ctx, &source_pii_data); + + std.debug.print("\nPropagating PII through ETL pipeline...\n", .{}); + + // Use ExtendWith to propose destinations for PII-containing datasets + var extend = zodd.ExtendWith(Scalar, u32, u32).init(&ctx, &transforms, &struct { + fn key(tuple: *const Scalar) u32 { + return tuple[0]; + } + }.key); + + var iteration: usize = 0; + while (try contains_pii.changed()) : (iteration += 1) { + std.debug.print(" Iteration {}: {} datasets with PII\n", .{ iteration, contains_pii.recent.len() }); + + // Use extendInto to find downstream datasets + var proposed = zodd.Variable(Pair).init(&ctx); + defer proposed.deinit(); + + const leaper = extend.leaper(); + var leapers = [_]zodd.Leaper(Scalar, u32){leaper}; + + try zodd.extendInto( + Scalar, + u32, + Pair, + &ctx, + &contains_pii, + &leapers, + &proposed, + &struct { + fn logic(src: *const Scalar, dst: *const u32) Pair { + return .{ src[0], dst.* }; + } + }.logic, + ); + + _ = try proposed.changed(); + + // Filter out anonymized transformations + const ScalarList = std.ArrayListUnmanaged(Scalar); + var new_pii = ScalarList{}; + defer new_pii.deinit(allocator); + + for (proposed.recent.elements) |p| { + var is_anon = false; + for (anonymizes.elements) |a| { + if (a[0] == p[0] and a[1] == p[1]) { + is_anon = true; + break; + } + } + if (is_anon) { + std.debug.print(" PII blocked: {s} -> {s} (anonymized)\n", .{ dsName(p[0]), dsName(p[1]) }); + } else { + try new_pii.append(allocator, .{p[1]}); + } + } + + if (new_pii.items.len > 0) { + const rel = try zodd.Relation(Scalar).fromSlice(&ctx, new_pii.items); + try contains_pii.insert(rel); + } + + if (iteration > 50) break; + } + + var pii_result = try contains_pii.complete(); + defer pii_result.deinit(); + + std.debug.print("\nDatasets containing PII:\n", .{}); + for (pii_result.elements) |p| { + std.debug.print(" {s}\n", .{dsName(p[0])}); + } + + // -- Step 2: Detect compliance violations -- + // violation(D) :- contains_pii(D), public_dataset(D). + + std.debug.print("\nCompliance check (PII in public datasets):\n", .{}); + var violation_count: usize = 0; + for (public_data) |pub_ds| { + var has_pii = false; + for (pii_result.elements) |p| { + if (p[0] == pub_ds[0]) { + has_pii = true; + break; + } + } + if (has_pii) { + std.debug.print(" VIOLATION: {s} is public and contains PII!\n", .{dsName(pub_ds[0])}); + violation_count += 1; + } else { + std.debug.print(" OK: {s} is public and PII-free\n", .{dsName(pub_ds[0])}); + } + } + + // -- Step 3: Trace PII lineage for a specific dataset -- + // + // For the violated dataset (sales_report), trace back to find which original + // PII sources contributed their data through non-anonymized paths. + + const trace_target: u32 = 6; // sales_report + std.debug.print("\nPII lineage trace for '{s}':\n", .{dsName(trace_target)}); + std.debug.print(" Upstream PII sources: ", .{}); + var first = true; + + for (source_pii_data) |src| { + // BFS from source through non-anonymized transforms to see if it reaches the target + var frontier = std.ArrayListUnmanaged(u32){}; + defer frontier.deinit(allocator); + try frontier.append(allocator, src[0]); + + var found = false; + var step: usize = 0; + while (frontier.items.len > 0 and step < 20) : (step += 1) { + var next_frontier = std.ArrayListUnmanaged(u32){}; + defer next_frontier.deinit(allocator); + + for (frontier.items) |node| { + if (node == trace_target) { + found = true; + break; + } + for (transform_data) |t| { + if (t[0] == node) { + var is_anon = false; + for (anonymize_data) |a| { + if (a[0] == t[0] and a[1] == t[1]) { + is_anon = true; + break; + } + } + if (!is_anon) { + try next_frontier.append(allocator, t[1]); + } + } + } + } + if (found) break; + + frontier.clearRetainingCapacity(); + try frontier.appendSlice(allocator, next_frontier.items); + } + + if (found) { + if (!first) std.debug.print(", ", .{}); + std.debug.print("{s}", .{dsName(src[0])}); + first = false; + } + } + std.debug.print("\n", .{}); + + std.debug.print("\nSummary: {} datasets with PII, {} compliance violations, {} public datasets clean\n", .{ + pii_result.len(), + violation_count, + public_data.len - violation_count, + }); +} diff --git a/examples/e3_points_to_analysis.zig b/examples/e3_points_to_analysis.zig deleted file mode 100644 index f332fdb..0000000 --- a/examples/e3_points_to_analysis.zig +++ /dev/null @@ -1,146 +0,0 @@ -const std = @import("std"); -const zodd = @import("zodd"); - -pub fn main() !void { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; - defer _ = gpa.deinit(); - const allocator = gpa.allocator(); - - std.debug.print("Zodd Datalog Engine - Points-To Analysis Example\n", .{}); - std.debug.print("================================================\n\n", .{}); - - const Pair = struct { u32, u32 }; - - const alloc_data = [_]Pair{ - .{ 1, 100 }, - .{ 2, 200 }, - .{ 3, 300 }, - }; - - const assign_data = [_]Pair{ - .{ 4, 1 }, - .{ 5, 2 }, - .{ 6, 4 }, - .{ 7, 5 }, - }; - - const load_data = [_]Pair{ - .{ 8, 6 }, - }; - - const store_data = [_]Pair{ - .{ 6, 3 }, - }; - - std.debug.print("Program statements:\n", .{}); - for (alloc_data) |a| { - std.debug.print(" v{} = alloc(obj{})\n", .{ a[0], a[1] }); - } - for (assign_data) |a| { - std.debug.print(" v{} = v{}\n", .{ a[0], a[1] }); - } - for (load_data) |l| { - std.debug.print(" v{} = *v{}\n", .{ l[0], l[1] }); - } - for (store_data) |s| { - std.debug.print(" *v{} = v{}\n", .{ s[0], s[1] }); - } - std.debug.print("\n", .{}); - - var alloc = try zodd.Relation(Pair).fromSlice(allocator, &alloc_data); - defer alloc.deinit(); - - var assign = try zodd.Relation(Pair).fromSlice(allocator, &assign_data); - defer assign.deinit(); - - var load = try zodd.Relation(Pair).fromSlice(allocator, &load_data); - defer load.deinit(); - - var store = try zodd.Relation(Pair).fromSlice(allocator, &store_data); - defer store.deinit(); - - var points_to = zodd.Variable(Pair).init(allocator); - defer points_to.deinit(); - - try points_to.insertSlice(alloc.elements); - - std.debug.print("Computing points-to analysis...\n", .{}); - std.debug.print("Rules:\n", .{}); - std.debug.print(" points_to(V, O) :- alloc(V, O)\n", .{}); - std.debug.print(" points_to(V1, O) :- assign(V1, V2), points_to(V2, O)\n", .{}); - std.debug.print(" points_to(V1, O) :- load(V1, V2), points_to(V2, P), points_to(P, O)\n", .{}); - std.debug.print(" points_to(P, O) :- store(P_ptr, V), points_to(P_ptr, P), points_to(V, O)\n\n", .{}); - - const ResultList = std.ArrayListUnmanaged(Pair); - var iteration: usize = 0; - while (try points_to.changed()) : (iteration += 1) { - std.debug.print(" Iteration {}: {} recent tuples\n", .{ iteration, points_to.recent.len() }); - - var results = ResultList{}; - defer results.deinit(allocator); - - for (points_to.recent.elements) |pt| { - const v = pt[0]; - const o = pt[1]; - - for (assign.elements) |a| { - if (a[1] == v) { - try results.append(allocator, .{ a[0], o }); - } - } - - for (load.elements) |l| { - if (l[1] == v) { - for (points_to.stable.items) |*batch| { - for (batch.elements) |pt2| { - if (pt2[0] == o) { - try results.append(allocator, .{ l[0], pt2[1] }); - } - } - } - for (points_to.recent.elements) |pt2| { - if (pt2[0] == o) { - try results.append(allocator, .{ l[0], pt2[1] }); - } - } - } - } - - for (store.elements) |s| { - if (s[1] == v) { - for (points_to.stable.items) |*batch| { - for (batch.elements) |pt2| { - if (pt2[0] == s[0]) { - try results.append(allocator, .{ pt2[1], o }); - } - } - } - for (points_to.recent.elements) |pt2| { - if (pt2[0] == s[0]) { - try results.append(allocator, .{ pt2[1], o }); - } - } - } - } - } - - if (results.items.len > 0) { - const rel = try zodd.Relation(Pair).fromSlice(allocator, results.items); - try points_to.insert(rel); - } - - if (iteration > 50) break; - } - - std.debug.print("\n", .{}); - - var result = try points_to.complete(); - defer result.deinit(); - - std.debug.print("Points-to results:\n", .{}); - for (result.elements) |pt| { - std.debug.print(" v{} -> obj{}\n", .{ pt[0], pt[1] }); - } - - std.debug.print("\nTotal: {} points-to pairs\n", .{result.len()}); -} diff --git a/examples/e4_rbac_authorization.zig b/examples/e4_rbac_authorization.zig new file mode 100644 index 0000000..1239c18 --- /dev/null +++ b/examples/e4_rbac_authorization.zig @@ -0,0 +1,255 @@ +const std = @import("std"); +const zodd = @import("zodd"); + +// Role-Based Access Control (RBAC) Authorization Engine +// +// Computes effective user permissions through role hierarchy inheritance, +// permission grants, and explicit denials using Datalog rules: +// +// has_role(U, R) :- user_role(U, R). +// has_role(U, R2) :- has_role(U, R1), role_hier(R1, R2). +// can_access(U, P) :- has_role(U, R), role_perm(R, P). +// effective(U, P) :- can_access(U, P), NOT denied(U, P). + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + const allocator = gpa.allocator(); + var ctx = zodd.ExecutionContext.init(allocator); + + std.debug.print("Zodd Datalog Engine - RBAC Authorization Example\n", .{}); + std.debug.print("=================================================\n\n", .{}); + + // Identifiers (using u32 for simplicity): + // Users: alice=1, bob=2, charlie=3 + // Roles: viewer=10, editor=20, admin=30, superadmin=40 + // Permissions: read=100, write=110, delete=120, manage_users=130, audit=140 + + const Pair = struct { u32, u32 }; + + // user_role(User, Role) + const user_role_data = [_]Pair{ + .{ 1, 10 }, // alice -> viewer + .{ 2, 20 }, // bob -> editor + .{ 3, 40 }, // charlie -> superadmin + }; + + // role_hier(SubRole, SuperRole) -- SubRole inherits from SuperRole + const role_hier_data = [_]Pair{ + .{ 20, 10 }, // editor inherits viewer + .{ 30, 20 }, // admin inherits editor + .{ 40, 30 }, // superadmin inherits admin + }; + + // role_perm(Role, Permission) + const role_perm_data = [_]Pair{ + .{ 10, 100 }, // viewer -> read + .{ 20, 110 }, // editor -> write + .{ 30, 120 }, // admin -> delete + .{ 30, 130 }, // admin -> manage_users + .{ 40, 140 }, // superadmin -> audit + }; + + // denied(User, Permission) -- explicit denials override grants + const denied_data = [_]Pair{ + .{ 3, 120 }, // charlie denied delete (despite being superadmin) + }; + + const user_names = [_][]const u8{ "", "alice", "bob", "charlie" }; + + const roleName = struct { + fn get(id: u32) []const u8 { + return switch (id) { + 10 => "viewer", + 20 => "editor", + 30 => "admin", + 40 => "superadmin", + else => "unknown", + }; + } + }.get; + + const permName = struct { + fn get(id: u32) []const u8 { + return switch (id) { + 100 => "read", + 110 => "write", + 120 => "delete", + 130 => "manage_users", + 140 => "audit", + else => "unknown", + }; + } + }.get; + + std.debug.print("User-Role assignments:\n", .{}); + for (user_role_data) |ur| { + std.debug.print(" {s} -> {s}\n", .{ user_names[ur[0]], roleName(ur[1]) }); + } + + std.debug.print("\nRole hierarchy (child inherits parent):\n", .{}); + for (role_hier_data) |rh| { + std.debug.print(" {s} inherits {s}\n", .{ roleName(rh[0]), roleName(rh[1]) }); + } + + std.debug.print("\nRole-Permission grants:\n", .{}); + for (role_perm_data) |rp| { + std.debug.print(" {s} -> {s}\n", .{ roleName(rp[0]), permName(rp[1]) }); + } + + std.debug.print("\nExplicit denials:\n", .{}); + for (denied_data) |d| { + std.debug.print(" {s} denied {s}\n", .{ user_names[d[0]], permName(d[1]) }); + } + + // -- Build relations -- + + var user_role = try zodd.Relation(Pair).fromSlice(&ctx, &user_role_data); + defer user_role.deinit(); + + var role_hier = try zodd.Relation(Pair).fromSlice(&ctx, &role_hier_data); + defer role_hier.deinit(); + + var role_perm = try zodd.Relation(Pair).fromSlice(&ctx, &role_perm_data); + defer role_perm.deinit(); + + var denied = try zodd.Relation(Pair).fromSlice(&ctx, &denied_data); + defer denied.deinit(); + + // -- Step 1: Compute has_role(User, Role) via transitive role inheritance -- + // has_role(U, R) :- user_role(U, R). + // has_role(U, R2) :- has_role(U, R1), role_hier(R1, R2). + + var has_role = zodd.Variable(Pair).init(&ctx); + defer has_role.deinit(); + + try has_role.insertSlice(&ctx, user_role.elements); + + std.debug.print("\nComputing effective roles via hierarchy...\n", .{}); + + const PairList = std.ArrayListUnmanaged(Pair); + var iter: usize = 0; + while (try has_role.changed()) : (iter += 1) { + var results = PairList{}; + defer results.deinit(allocator); + + for (has_role.recent.elements) |hr| { + const user = hr[0]; + const role = hr[1]; + + // role_hier(role, parent_role) -> has_role(user, parent_role) + for (role_hier.elements) |rh| { + if (rh[0] == role) { + try results.append(allocator, .{ user, rh[1] }); + } + } + } + + if (results.items.len > 0) { + const rel = try zodd.Relation(Pair).fromSlice(&ctx, results.items); + try has_role.insert(rel); + } + + if (iter > 50) break; + } + + var has_role_result = try has_role.complete(); + defer has_role_result.deinit(); + + std.debug.print("\nEffective roles:\n", .{}); + for (has_role_result.elements) |hr| { + std.debug.print(" {s} has role {s}\n", .{ user_names[hr[0]], roleName(hr[1]) }); + } + + // -- Step 2: Compute can_access(User, Perm) via joinInto -- + // can_access(U, P) :- has_role(U, R), role_perm(R, P). + // + // has_role is keyed by (User, Role) and role_perm is keyed by (Role, Perm). + // We join on the Role field. To use joinInto, we need both inputs keyed by the + // join key as the first field. + // has_role is (User, Role), so we need to re-key it as (Role, User). + // role_perm is already (Role, Perm). + + var has_role_by_role = zodd.Variable(Pair).init(&ctx); + defer has_role_by_role.deinit(); + { + var flipped = PairList{}; + defer flipped.deinit(allocator); + for (has_role_result.elements) |hr| { + try flipped.append(allocator, .{ hr[1], hr[0] }); // (Role, User) + } + try has_role_by_role.insertSlice(&ctx, flipped.items); + _ = try has_role_by_role.changed(); + } + + var role_perm_var = zodd.Variable(Pair).init(&ctx); + defer role_perm_var.deinit(); + try role_perm_var.insertSlice(&ctx, role_perm.elements); + _ = try role_perm_var.changed(); + + const Triple = struct { u32, u32, u32 }; + var can_access_triple = zodd.Variable(Triple).init(&ctx); + defer can_access_triple.deinit(); + + // joinInto: key=Role, val1=User, val2=Perm -> (Role, User, Perm) + try zodd.joinInto(u32, u32, u32, Triple, &ctx, &has_role_by_role, &role_perm_var, &can_access_triple, struct { + fn logic(role: *const u32, user: *const u32, perm: *const u32) Triple { + _ = role; + return .{ user.*, perm.*, 0 }; + } + }.logic); + + _ = try can_access_triple.changed(); + + // Extract (User, Perm) pairs + var can_access = zodd.Variable(Pair).init(&ctx); + defer can_access.deinit(); + { + var pairs = PairList{}; + defer pairs.deinit(allocator); + for (can_access_triple.recent.elements) |t| { + try pairs.append(allocator, .{ t[0], t[1] }); + } + try can_access.insertSlice(&ctx, pairs.items); + _ = try can_access.changed(); + } + + std.debug.print("\nAll granted permissions (before denials):\n", .{}); + for (can_access.recent.elements) |ca| { + std.debug.print(" {s} can {s}\n", .{ user_names[ca[0]], permName(ca[1]) }); + } + + // -- Step 3: Apply denials -- + // effective(U, P) :- can_access(U, P), NOT denied(U, P). + // + // We need an anti-join on the full (User, Perm) pair. Since joinAnti keys on + // the first tuple field only, we use a manual filter against the denied relation. + + var effective = zodd.Variable(Pair).init(&ctx); + defer effective.deinit(); + { + var eff_list = PairList{}; + defer eff_list.deinit(allocator); + for (can_access.recent.elements) |ca| { + var is_denied = false; + for (denied.elements) |d| { + if (d[0] == ca[0] and d[1] == ca[1]) { + is_denied = true; + break; + } + } + if (!is_denied) { + try eff_list.append(allocator, ca); + } + } + try effective.insertSlice(&ctx, eff_list.items); + _ = try effective.changed(); + } + + std.debug.print("\nEffective permissions (after denials):\n", .{}); + for (effective.recent.elements) |e| { + std.debug.print(" {s} can {s}\n", .{ user_names[e[0]], permName(e[1]) }); + } + + std.debug.print("\nTotal: {} effective permissions\n", .{effective.recent.len()}); +} diff --git a/examples/e5_taint_analysis.zig b/examples/e5_taint_analysis.zig new file mode 100644 index 0000000..46dfc36 --- /dev/null +++ b/examples/e5_taint_analysis.zig @@ -0,0 +1,272 @@ +const std = @import("std"); +const zodd = @import("zodd"); + +// Taint Analysis for Security +// +// Tracks the flow of untrusted (tainted) data through a program to detect +// potential security vulnerabilities such as SQL injection and XSS. +// +// Datalog rules: +// tainted(V) :- source(V). +// tainted(V2) :- tainted(V1), flow(V1, V2), NOT sanitized_flow(V1, V2). +// violation(V, S) :- tainted(V), sink(S, V). +// +// Uses ExtendWith (leapfrog trie join) for taint propagation and +// FilterAnti for sanitizer filtering. + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + const allocator = gpa.allocator(); + var ctx = zodd.ExecutionContext.init(allocator); + + std.debug.print("Zodd Datalog Engine - Taint Analysis Example\n", .{}); + std.debug.print("=============================================\n\n", .{}); + + // Simulated program: + // v1 = readUserInput() -- taint source + // v2 = readCookie() -- taint source + // v3 = v1 -- flow: v1 -> v3 + // v4 = sanitize(v3) -- sanitized flow: v3 -> v4 + // v5 = v2 + v3 -- flow: v2 -> v5, v3 -> v5 + // v6 = config_value -- clean variable (not tainted) + // v7 = v6 -- flow: v6 -> v7 + // sqlQuery(v5) -- sink: SQL query with v5 + // htmlRender(v4) -- sink: HTML render with v4 + // htmlRender(v3) -- sink: HTML render with v3 + // logMessage(v7) -- sink: log with v7 + + const Pair = struct { u32, u32 }; + const Scalar = struct { u32 }; + + // Taint sources + const source_data = [_]Scalar{ + .{1}, // v1 = readUserInput() + .{2}, // v2 = readCookie() + }; + + // Data flow edges: flow(from, to) + const flow_data = [_]Pair{ + .{ 1, 3 }, // v3 = v1 + .{ 3, 4 }, // v4 = sanitize(v3) -- flow exists, but sanitized + .{ 2, 5 }, // v5 = v2 + ... + .{ 3, 5 }, // v5 = ... + v3 + .{ 6, 7 }, // v7 = v6 + }; + + // Sanitized flows: these block taint propagation + const sanitized_data = [_]Pair{ + .{ 3, 4 }, // sanitize() between v3 and v4 + }; + + // Sinks: sink(sink_id, variable) -- security-sensitive operations + const sink_data = [_]Pair{ + .{ 50, 5 }, // sqlQuery(v5) + .{ 51, 4 }, // htmlRender(v4) + .{ 52, 3 }, // htmlRender(v3) + .{ 53, 7 }, // logMessage(v7) + }; + + const varName = struct { + fn get(id: u32) []const u8 { + return switch (id) { + 1 => "v1 (readUserInput)", + 2 => "v2 (readCookie)", + 3 => "v3", + 4 => "v4 (sanitized)", + 5 => "v5", + 6 => "v6 (config)", + 7 => "v7", + else => "unknown", + }; + } + }.get; + + const sinkName = struct { + fn get(id: u32) []const u8 { + return switch (id) { + 50 => "sqlQuery", + 51 => "htmlRender", + 52 => "htmlRender", + 53 => "logMessage", + else => "unknown", + }; + } + }.get; + + std.debug.print("Taint sources:\n", .{}); + for (source_data) |s| { + std.debug.print(" {s}\n", .{varName(s[0])}); + } + + std.debug.print("\nData flows:\n", .{}); + for (flow_data) |f| { + var is_sanitized = false; + for (sanitized_data) |s| { + if (s[0] == f[0] and s[1] == f[1]) { + is_sanitized = true; + break; + } + } + if (is_sanitized) { + std.debug.print(" v{} -> v{} [SANITIZED]\n", .{ f[0], f[1] }); + } else { + std.debug.print(" v{} -> v{}\n", .{ f[0], f[1] }); + } + } + + std.debug.print("\nSinks:\n", .{}); + for (sink_data) |s| { + std.debug.print(" {s}(v{})\n", .{ sinkName(s[0]), s[1] }); + } + + // -- Build relations -- + + var flow = try zodd.Relation(Pair).fromSlice(&ctx, &flow_data); + defer flow.deinit(); + + var sanitized = try zodd.Relation(Pair).fromSlice(&ctx, &sanitized_data); + defer sanitized.deinit(); + + // -- Step 1: Compute tainted variables using ExtendWith + FilterAnti -- + // tainted(V) :- source(V). + // tainted(V2) :- tainted(V1), flow(V1, V2), NOT sanitized(V1, V2). + // + // Source tuple type is Scalar = { u32 } (single tainted variable). + // We extract the variable id as the Key, and ExtendWith on the flow relation + // gives us the Val (destination variable). FilterAnti on sanitized blocks + // sanitized flows. + // + // After extension, we get the destination variable, then wrap it back into + // a Scalar and feed it into the tainted variable. + + var tainted = zodd.Variable(Scalar).init(&ctx); + defer tainted.deinit(); + + try tainted.insertSlice(&ctx, &source_data); + + std.debug.print("\nComputing taint propagation...\n", .{}); + + // ExtendWith: extract key from Scalar (the tainted var id), look up in flow relation + // to get destination variables. + var extend = zodd.ExtendWith(Scalar, u32, u32).init(&ctx, &flow, &struct { + fn key(tuple: *const Scalar) u32 { + return tuple[0]; + } + }.key); + + // FilterAnti: block flows that are sanitized. + // FilterAnti's key_func extracts (Key, Val) from the source Scalar. + // But FilterAnti needs to see both source and proposed value... + // Actually FilterAnti checks if (key, val) pair exists in the relation. + // The key_func returns {Key, Val} from the Tuple. For a Scalar source, we only + // have the source variable, not the proposed destination yet. + // + // FilterAnti works in the intersect phase: after propose gives candidate values, + // intersect with FilterAnti removes values that match the filter. + // So we cannot use FilterAnti here because it needs a (key, val) from the tuple + // alone (without the proposed value). + // + // Instead, we will: (1) use ExtendWith to propose destinations, then + // (2) manually filter out sanitized flows in a second pass. + + var iteration: usize = 0; + while (try tainted.changed()) : (iteration += 1) { + std.debug.print(" Iteration {}: {} newly tainted variables\n", .{ iteration, tainted.recent.len() }); + + // Use extendInto to propose destinations for recently tainted variables + var proposed = zodd.Variable(Pair).init(&ctx); + defer proposed.deinit(); + + const leaper = extend.leaper(); + var leapers = [_]zodd.Leaper(Scalar, u32){leaper}; + + try zodd.extendInto( + Scalar, + u32, + Pair, + &ctx, + &tainted, + &leapers, + &proposed, + &struct { + fn logic(src: *const Scalar, dst: *const u32) Pair { + return .{ src[0], dst.* }; + } + }.logic, + ); + + _ = try proposed.changed(); + + // Filter out sanitized flows and convert back to Scalar + const ScalarList = std.ArrayListUnmanaged(Scalar); + var new_tainted = ScalarList{}; + defer new_tainted.deinit(allocator); + + for (proposed.recent.elements) |p| { + var is_sanitized = false; + for (sanitized.elements) |s| { + if (s[0] == p[0] and s[1] == p[1]) { + is_sanitized = true; + break; + } + } + if (!is_sanitized) { + try new_tainted.append(allocator, .{p[1]}); + } + } + + if (new_tainted.items.len > 0) { + const rel = try zodd.Relation(Scalar).fromSlice(&ctx, new_tainted.items); + try tainted.insert(rel); + } + + if (iteration > 50) break; + } + + var tainted_result = try tainted.complete(); + defer tainted_result.deinit(); + + std.debug.print("\nTainted variables:\n", .{}); + for (tainted_result.elements) |t| { + std.debug.print(" {s}\n", .{varName(t[0])}); + } + + // -- Step 2: Detect violations -- + // violation(V, S) :- tainted(V), sink(S, V). + + std.debug.print("\nSecurity violations detected:\n", .{}); + var violation_count: usize = 0; + for (sink_data) |s| { + for (tainted_result.elements) |t| { + if (t[0] == s[1]) { + std.debug.print(" VIOLATION: {s}(v{}) -- v{} is tainted!\n", .{ + sinkName(s[0]), + s[1], + s[1], + }); + violation_count += 1; + } + } + } + + if (violation_count == 0) { + std.debug.print(" (none)\n", .{}); + } + + std.debug.print("\nSafe sinks:\n", .{}); + for (sink_data) |s| { + var is_tainted = false; + for (tainted_result.elements) |t| { + if (t[0] == s[1]) { + is_tainted = true; + break; + } + } + if (!is_tainted) { + std.debug.print(" {s}(v{}) -- safe\n", .{ sinkName(s[0]), s[1] }); + } + } + + std.debug.print("\nSummary: {} tainted variables, {} violations\n", .{ tainted_result.len(), violation_count }); +} diff --git a/examples/e6_dependency_resolution.zig b/examples/e6_dependency_resolution.zig new file mode 100644 index 0000000..1ea36b9 --- /dev/null +++ b/examples/e6_dependency_resolution.zig @@ -0,0 +1,263 @@ +const std = @import("std"); +const zodd = @import("zodd"); + +// Dependency Resolution for a Package Manager +// +// Resolves transitive package dependencies, detects circular dependencies, +// computes total install sizes, and supports reverse-dependency lookups. +// +// Datalog rules: +// dep(A, B) :- direct_dep(A, B). +// dep(A, C) :- dep(A, B), direct_dep(B, C). +// circular(A) :- dep(A, A). +// +// Uses: +// - Variable + Relation for transitive closure +// - aggregate for computing total install size per package +// - SecondaryIndex for efficient reverse-dependency lookups + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + const allocator = gpa.allocator(); + var ctx = zodd.ExecutionContext.init(allocator); + + std.debug.print("Zodd Datalog Engine - Dependency Resolution Example\n", .{}); + std.debug.print("===================================================\n\n", .{}); + + // Package IDs: + // app=1, web-framework=2, http=3, json=4, logging=5, + // crypto=6, tls=7, base64=8, utils=9 + // + // Dependency graph: + // app -> web-framework, logging + // web-framework -> http, json + // http -> tls + // tls -> crypto, base64 + // crypto -> utils + // json -> utils + // logging -> utils + + const Pair = struct { u32, u32 }; + + const direct_dep_data = [_]Pair{ + .{ 1, 2 }, // app -> web-framework + .{ 1, 5 }, // app -> logging + .{ 2, 3 }, // web-framework -> http + .{ 2, 4 }, // web-framework -> json + .{ 3, 7 }, // http -> tls + .{ 7, 6 }, // tls -> crypto + .{ 7, 8 }, // tls -> base64 + .{ 6, 9 }, // crypto -> utils + .{ 4, 9 }, // json -> utils + .{ 5, 9 }, // logging -> utils + }; + + // Package sizes in KB + const SizeTuple = struct { u32, u32 }; + const size_data = [_]SizeTuple{ + .{ 1, 50 }, // app: 50 KB + .{ 2, 200 }, // web-framework: 200 KB + .{ 3, 120 }, // http: 120 KB + .{ 4, 80 }, // json: 80 KB + .{ 5, 30 }, // logging: 30 KB + .{ 6, 150 }, // crypto: 150 KB + .{ 7, 90 }, // tls: 90 KB + .{ 8, 20 }, // base64: 20 KB + .{ 9, 10 }, // utils: 10 KB + }; + + const pkgName = struct { + fn get(id: u32) []const u8 { + return switch (id) { + 1 => "app", + 2 => "web-framework", + 3 => "http", + 4 => "json", + 5 => "logging", + 6 => "crypto", + 7 => "tls", + 8 => "base64", + 9 => "utils", + else => "unknown", + }; + } + }.get; + + std.debug.print("Direct dependencies:\n", .{}); + for (direct_dep_data) |d| { + std.debug.print(" {s} -> {s}\n", .{ pkgName(d[0]), pkgName(d[1]) }); + } + + std.debug.print("\nPackage sizes:\n", .{}); + for (size_data) |s| { + std.debug.print(" {s}: {} KB\n", .{ pkgName(s[0]), s[1] }); + } + + // -- Build relations -- + + var direct_deps = try zodd.Relation(Pair).fromSlice(&ctx, &direct_dep_data); + defer direct_deps.deinit(); + + // -- Step 1: Compute transitive dependencies -- + // dep(A, B) :- direct_dep(A, B). + // dep(A, C) :- dep(A, B), direct_dep(B, C). + + var dep = zodd.Variable(Pair).init(&ctx); + defer dep.deinit(); + + try dep.insertSlice(&ctx, direct_deps.elements); + + std.debug.print("\nComputing transitive dependencies...\n", .{}); + + const PairList = std.ArrayListUnmanaged(Pair); + var iteration: usize = 0; + while (try dep.changed()) : (iteration += 1) { + var results = PairList{}; + defer results.deinit(allocator); + + for (dep.recent.elements) |d| { + const a = d[0]; + const b = d[1]; + + // dep(A, B) + direct_dep(B, C) -> dep(A, C) + for (direct_deps.elements) |dd| { + if (dd[0] == b) { + try results.append(allocator, .{ a, dd[1] }); + } + } + } + + if (results.items.len > 0) { + const rel = try zodd.Relation(Pair).fromSlice(&ctx, results.items); + try dep.insert(rel); + } + + if (iteration > 50) break; + } + + var deps = try dep.complete(); + defer deps.deinit(); + + std.debug.print("\nTransitive dependencies:\n", .{}); + for (deps.elements) |d| { + std.debug.print(" {s} depends on {s}\n", .{ pkgName(d[0]), pkgName(d[1]) }); + } + + std.debug.print("\nTotal: {} dependency pairs\n", .{deps.len()}); + + // -- Step 2: Detect circular dependencies -- + // circular(A) :- dep(A, A). + + std.debug.print("\nCircular dependency check:\n", .{}); + var circular_count: usize = 0; + for (deps.elements) |d| { + if (d[0] == d[1]) { + std.debug.print(" CIRCULAR: {s} depends on itself!\n", .{pkgName(d[0])}); + circular_count += 1; + } + } + if (circular_count == 0) { + std.debug.print(" No circular dependencies detected.\n", .{}); + } + + // -- Step 3: Compute total install size using aggregate -- + // + // For each package, sum the sizes of all its transitive dependencies plus itself. + // We build a relation of (package, dep_size) pairs and aggregate by summing. + + var pkg_sizes = try zodd.Relation(SizeTuple).fromSlice(&ctx, &size_data); + defer pkg_sizes.deinit(); + + // Build (package, dep_size) pairs: for each dep(A, B), look up size of B. + var install_tuples = PairList{}; + defer install_tuples.deinit(allocator); + + // Add each package's own size + for (size_data) |s| { + try install_tuples.append(allocator, .{ s[0], s[1] }); + } + + // Add dependency sizes + for (deps.elements) |d| { + for (size_data) |s| { + if (s[0] == d[1]) { + try install_tuples.append(allocator, .{ d[0], s[1] }); + break; + } + } + } + + var install_rel = try zodd.Relation(Pair).fromSlice(&ctx, install_tuples.items); + defer install_rel.deinit(); + + // Aggregate: sum sizes per package + var total_sizes = try zodd.aggregateFn( + Pair, + u32, + u32, + &ctx, + &install_rel, + struct { + fn key(tuple: *const Pair) u32 { + return tuple.*[0]; + } + }.key, + 0, + struct { + fn fold(acc: u32, tuple: *const Pair) u32 { + return acc + tuple.*[1]; + } + }.fold, + ); + defer total_sizes.deinit(); + + std.debug.print("\nTotal install sizes (package + all dependencies):\n", .{}); + for (total_sizes.elements) |ts| { + std.debug.print(" {s}: {} KB\n", .{ pkgName(ts[0]), ts[1] }); + } + + // -- Step 4: Reverse-dependency lookup using SecondaryIndex -- + // + // Build a secondary index on the transitive deps relation, keyed by the + // dependency (the target), so we can efficiently answer "who depends on X?" + + const DepIndex = zodd.index.SecondaryIndex( + Pair, + u32, + struct { + fn extract(tuple: Pair) u32 { + return tuple[1]; // index by the dependency (target) + } + }.extract, + struct { + fn compare(a: u32, b: u32) std.math.Order { + return std.math.order(a, b); + } + }.compare, + 16, + ); + + var rev_index = DepIndex.init(&ctx); + defer rev_index.deinit(); + + try rev_index.insertSlice(deps.elements); + + // Query: who depends on "utils" (id=9)? + std.debug.print("\nReverse-dependency query: who depends on '{s}'?\n", .{pkgName(9)}); + if (rev_index.get(9)) |dependents| { + for (dependents.elements) |d| { + std.debug.print(" {s}\n", .{pkgName(d[0])}); + } + std.debug.print(" ({} packages depend on {s})\n", .{ dependents.len(), pkgName(9) }); + } + + // Query: who depends on "tls" (id=7)? + std.debug.print("\nReverse-dependency query: who depends on '{s}'?\n", .{pkgName(7)}); + if (rev_index.get(7)) |dependents| { + for (dependents.elements) |d| { + std.debug.print(" {s}\n", .{pkgName(d[0])}); + } + std.debug.print(" ({} packages depend on {s})\n", .{ dependents.len(), pkgName(7) }); + } +} diff --git a/src/lib.zig b/src/lib.zig index 8103216..fd785bb 100644 --- a/src/lib.zig +++ b/src/lib.zig @@ -1,27 +1,51 @@ -//! Zodd: A small embeddable Datalog engine for Zig. +//! Zodd: datalog engine for Zig. +/// Relation module. pub const relation = @import("zodd/relation.zig"); +/// Variable module. pub const variable = @import("zodd/variable.zig"); +/// Iteration module. pub const iteration = @import("zodd/iteration.zig"); +/// Join module. pub const join = @import("zodd/join.zig"); +/// Extend module. pub const extend = @import("zodd/extend.zig"); +/// Execution context module. +pub const context = @import("zodd/context.zig"); +/// Index module. pub const index = @import("zodd/index.zig"); +/// Aggregation module. pub const aggregate = @import("zodd/aggregate.zig"); +/// Relation type. pub const Relation = relation.Relation; +/// Variable type. pub const Variable = variable.Variable; +/// Gallop search helper. pub const gallop = variable.gallop; +/// Iteration type. pub const Iteration = iteration.Iteration; +/// Join helper for sorted relations. pub const joinHelper = join.joinHelper; +/// Join into a variable. pub const joinInto = join.joinInto; +/// Anti-join into a variable. pub const joinAnti = join.joinAnti; +/// Leaper interface for extend. pub const Leaper = extend.Leaper; +/// Extend relation by key. pub const ExtendWith = extend.ExtendWith; +/// Anti filter using a relation. pub const FilterAnti = extend.FilterAnti; +/// Anti extend using a relation. pub const ExtendAnti = extend.ExtendAnti; +/// Extend into a variable. pub const extendInto = extend.extendInto; +/// Aggregate helper. pub const aggregateFn = aggregate.aggregate; +/// Execution context type. +pub const ExecutionContext = context.ExecutionContext; test { @import("std").testing.refAllDecls(@This()); diff --git a/src/zodd/aggregate.zig b/src/zodd/aggregate.zig index e895ae3..4a4c180 100644 --- a/src/zodd/aggregate.zig +++ b/src/zodd/aggregate.zig @@ -3,12 +3,14 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const Relation = @import("relation.zig").Relation; +const ExecutionContext = @import("context.zig").ExecutionContext; +/// Aggregate tuples by key using a folder. pub fn aggregate( comptime Tuple: type, comptime Key: type, comptime AggVal: type, - allocator: Allocator, + ctx: *ExecutionContext, input: *const Relation(Tuple), key_func: fn (*const Tuple) Key, init_val: AggVal, @@ -17,15 +19,56 @@ pub fn aggregate( const ResultTuple = struct { Key, AggVal }; if (input.len() == 0) { - return Relation(ResultTuple).empty(allocator); + return Relation(ResultTuple).empty(ctx); } const Intermediate = struct { Key, *const Tuple }; - var intermediates = try allocator.alloc(Intermediate, input.len()); - defer allocator.free(intermediates); + var intermediates = try ctx.allocator.alloc(Intermediate, input.len()); + defer ctx.allocator.free(intermediates); + + if (ctx.pool) |*pool| { + const chunk: usize = 256; + const count = input.len(); + const task_count = (count + chunk - 1) / chunk; + + const Task = struct { + start: usize, + end: usize, + input: []const Tuple, + output: []Intermediate, + key_func: *const fn (*const Tuple) Key, + + fn run(task: *@This()) void { + var i = task.start; + while (i < task.end) : (i += 1) { + task.output[i] = .{ task.key_func(&task.input[i]), &task.input[i] }; + } + } + }; + + const tasks = try ctx.allocator.alloc(Task, task_count); + defer ctx.allocator.free(tasks); + + var wg: std.Thread.WaitGroup = .{}; + var t: usize = 0; + while (t < task_count) : (t += 1) { + const start = t * chunk; + const end = @min(start + chunk, count); + tasks[t] = .{ + .start = start, + .end = end, + .input = input.elements, + .output = intermediates, + .key_func = &key_func, + }; + pool.*.spawnWg(&wg, Task.run, .{&tasks[t]}); + } - for (input.elements, 0..) |*t, i| { - intermediates[i] = .{ key_func(t), t }; + wg.wait(); + } else { + for (input.elements, 0..) |*t, i| { + intermediates[i] = .{ key_func(t), t }; + } } const sortContext = struct { @@ -36,7 +79,7 @@ pub fn aggregate( std.sort.pdq(Intermediate, intermediates, {}, sortContext.lessThan); var results = std.ArrayListUnmanaged(ResultTuple){}; - defer results.deinit(allocator); + defer results.deinit(ctx.allocator); if (intermediates.len > 0) { var current_key = intermediates[0][0]; @@ -44,23 +87,24 @@ pub fn aggregate( for (intermediates) |item| { if (std.math.order(item[0], current_key) != .eq) { - try results.append(allocator, .{ current_key, current_acc }); + try results.append(ctx.allocator, .{ current_key, current_acc }); current_key = item[0]; current_acc = init_val; } current_acc = folder(current_acc, item[1]); } - try results.append(allocator, .{ current_key, current_acc }); + try results.append(ctx.allocator, .{ current_key, current_acc }); } - return Relation(ResultTuple).fromSlice(allocator, results.items); + return Relation(ResultTuple).fromSlice(ctx, results.items); } test "aggregate: sum by key" { const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); const Tuple = struct { u32, u32 }; - var data = try Relation(Tuple).fromSlice(allocator, &[_]Tuple{ + var data = try Relation(Tuple).fromSlice(&ctx, &[_]Tuple{ .{ 1, 10 }, .{ 1, 20 }, .{ 2, 5 }, @@ -80,7 +124,7 @@ test "aggregate: sum by key" { } }; - var result = try aggregate(Tuple, u32, u32, allocator, &data, key_func.key, 0, sum_folder.fold); + var result = try aggregate(Tuple, u32, u32, &ctx, &data, key_func.key, 0, sum_folder.fold); defer result.deinit(); try std.testing.expectEqual(@as(usize, 3), result.len()); @@ -98,9 +142,10 @@ test "aggregate: sum by key" { test "aggregate: count" { const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); const Tuple = struct { u32, u32 }; - var data = try Relation(Tuple).fromSlice(allocator, &[_]Tuple{ + var data = try Relation(Tuple).fromSlice(&ctx, &[_]Tuple{ .{ 1, 10 }, .{ 1, 20 }, .{ 2, 5 }, @@ -118,10 +163,52 @@ test "aggregate: count" { } }; - var result = try aggregate(Tuple, u32, usize, allocator, &data, key_func.key, 0, count_folder.fold); + var result = try aggregate(Tuple, u32, usize, &ctx, &data, key_func.key, 0, count_folder.fold); defer result.deinit(); try std.testing.expectEqual(@as(usize, 2), result.len()); try std.testing.expectEqual(result.elements[0].@"1", 2); try std.testing.expectEqual(result.elements[1].@"1", 1); } + +test "aggregate: parallel preprocess" { + const allocator = std.testing.allocator; + var ctx = try ExecutionContext.initWithThreads(allocator, 2); + defer ctx.deinit(); + const Tuple = struct { u32, u32 }; + + var data = try Relation(Tuple).fromSlice(&ctx, &[_]Tuple{ + .{ 1, 10 }, + .{ 1, 20 }, + .{ 2, 5 }, + .{ 2, 6 }, + .{ 3, 100 }, + }); + defer data.deinit(); + + const sum_folder = struct { + fn fold(acc: u32, t: *const Tuple) u32 { + return acc + t[1]; + } + }; + const key_func = struct { + fn key(t: *const Tuple) u32 { + return t[0]; + } + }; + + var result = try aggregate(Tuple, u32, u32, &ctx, &data, key_func.key, 0, sum_folder.fold); + defer result.deinit(); + + try std.testing.expectEqual(@as(usize, 3), result.len()); + const res = result.elements; + + try std.testing.expectEqual(res[0].@"0", 1); + try std.testing.expectEqual(res[0].@"1", 30); + + try std.testing.expectEqual(res[1].@"0", 2); + try std.testing.expectEqual(res[1].@"1", 11); + + try std.testing.expectEqual(res[2].@"0", 3); + try std.testing.expectEqual(res[2].@"1", 100); +} diff --git a/src/zodd/context.zig b/src/zodd/context.zig new file mode 100644 index 0000000..644e783 --- /dev/null +++ b/src/zodd/context.zig @@ -0,0 +1,38 @@ +//! Context management for parallel execution. + +const std = @import("std"); +const Allocator = std.mem.Allocator; + +pub const ExecutionContext = struct { + /// Allocator for the context. + allocator: Allocator, + /// Thread pool for parallel execution. + pool: ?*std.Thread.Pool = null, + + /// Initializes a new execution context. + pub fn init(allocator: Allocator) ExecutionContext { + return .{ .allocator = allocator, .pool = null }; + } + + /// Initializes a new execution context with a thread pool. + pub fn initWithThreads(allocator: Allocator, worker_count: usize) !ExecutionContext { + const pool = try allocator.create(std.Thread.Pool); + errdefer allocator.destroy(pool); + try std.Thread.Pool.init(pool, .{ .allocator = allocator, .n_jobs = worker_count }); + return .{ .allocator = allocator, .pool = pool }; + } + + /// Deinitializes the execution context. + pub fn deinit(self: *ExecutionContext) void { + if (self.pool) |pool| { + pool.deinit(); + self.allocator.destroy(pool); + } + self.pool = null; + } + + /// Returns true if the context has a thread pool. + pub fn hasParallel(self: *const ExecutionContext) bool { + return self.pool != null; + } +}; diff --git a/src/zodd/extend.zig b/src/zodd/extend.zig index f2e571b..386c01a 100644 --- a/src/zodd/extend.zig +++ b/src/zodd/extend.zig @@ -5,6 +5,7 @@ const Allocator = std.mem.Allocator; const Relation = @import("relation.zig").Relation; const Variable = @import("variable.zig").Variable; const gallop = @import("variable.zig").gallop; +const ExecutionContext = @import("context.zig").ExecutionContext; pub fn Leaper(comptime Tuple: type, comptime Val: type) type { return struct { @@ -17,22 +18,47 @@ pub fn Leaper(comptime Tuple: type, comptime Val: type) type { had_error: bool = false, pub const VTable = struct { + /// Returns the estimated count of matching values. count: *const fn (ptr: *anyopaque, prefix: *const Tuple) usize, + /// Proposes values for the extension. propose: *const fn (ptr: *anyopaque, prefix: *const Tuple, alloc: Allocator, values: *ValList, had_error: *bool) void, + /// Intersects proposed values with the extension. intersect: *const fn (ptr: *anyopaque, prefix: *const Tuple, values: *ValList) void, + /// Clones the leaper. + clone: *const fn (ptr: *anyopaque, alloc: Allocator) Allocator.Error!*anyopaque, + /// Destroys the leaper. + destroy: *const fn (ptr: *anyopaque, alloc: Allocator) void, }; + /// Returns the estimated count of matching values. pub fn count(self: Self, prefix: *const Tuple) usize { return self.vtable.count(self.ptr, prefix); } + /// Proposes values for the extension. pub fn propose(self: *Self, prefix: *const Tuple, values: *ValList) void { self.vtable.propose(self.ptr, prefix, self.allocator, values, &self.had_error); } + /// Intersects proposed values with the extension. pub fn intersect(self: Self, prefix: *const Tuple, values: *ValList) void { self.vtable.intersect(self.ptr, prefix, values); } + + /// Clones the leaper. + pub fn clone(self: Self, alloc: Allocator) Allocator.Error!Self { + const new_ptr = try self.vtable.clone(self.ptr, alloc); + return Self{ + .ptr = new_ptr, + .allocator = alloc, + .vtable = self.vtable, + }; + } + + /// Deinitializes the leaper. + pub fn deinit(self: *Self) void { + self.vtable.destroy(self.ptr, self.allocator); + } }; } @@ -54,14 +80,16 @@ pub fn ExtendWith( cached_count: usize = 0, cached_start: usize = 0, - pub fn init(allocator: Allocator, relation: *const Rel, key_func: *const fn (*const Tuple) Key) Self { + /// Initializes a new extend-with leaper. + pub fn init(ctx: *ExecutionContext, relation: *const Rel, key_func: *const fn (*const Tuple) Key) Self { return Self{ .relation = relation, .key_func = key_func, - .allocator = allocator, + .allocator = ctx.allocator, }; } + /// Returns the type-erased leaper interface. pub fn leaper(self: *Self) LeaperType { return LeaperType{ .ptr = @ptrCast(self), @@ -70,6 +98,8 @@ pub fn ExtendWith( .count = countImpl, .propose = proposeImpl, .intersect = intersectImpl, + .clone = cloneImpl, + .destroy = destroyImpl, }, }; } @@ -103,10 +133,11 @@ pub fn ExtendWith( var write_idx: usize = 0; const range = findKeyRange(Key, Val, self.relation.elements, key); - const range_slice = self.relation.elements[range.start..][0..range.count]; + var slice: []const struct { Key, Val } = self.relation.elements[range.start..][0..range.count]; for (values.items) |val| { - if (binarySearchVal(Key, Val, range_slice, val.*)) { + slice = gallopValHelper(Key, Val, slice, val.*); + if (slice.len > 0 and std.math.order(slice[0][1], val.*) == .eq) { values.items[write_idx] = val; write_idx += 1; } @@ -114,6 +145,24 @@ pub fn ExtendWith( values.shrinkRetainingCapacity(write_idx); } + + fn cloneImpl(ptr: *anyopaque, alloc: Allocator) Allocator.Error!*anyopaque { + const self: *Self = @ptrCast(@alignCast(ptr)); + const copy = try alloc.create(Self); + copy.* = Self{ + .relation = self.relation, + .key_func = self.key_func, + .allocator = alloc, + .cached_count = 0, + .cached_start = 0, + }; + return @ptrCast(copy); + } + + fn destroyImpl(ptr: *anyopaque, alloc: Allocator) void { + const self: *Self = @ptrCast(@alignCast(ptr)); + alloc.destroy(self); + } }; } @@ -132,18 +181,20 @@ pub fn FilterAnti( key_func: *const fn (*const Tuple) struct { Key, Val }, allocator: Allocator, + /// Initializes a new filter-anti leaper. pub fn init( - allocator: Allocator, + ctx: *ExecutionContext, relation: *const Rel, key_func: *const fn (*const Tuple) struct { Key, Val }, ) Self { return Self{ .relation = relation, .key_func = key_func, - .allocator = allocator, + .allocator = ctx.allocator, }; } + /// Returns the type-erased leaper interface. pub fn leaper(self: *Self) LeaperType { return LeaperType{ .ptr = @ptrCast(self), @@ -152,6 +203,8 @@ pub fn FilterAnti( .count = countImpl, .propose = proposeImpl, .intersect = intersectImpl, + .clone = cloneImpl, + .destroy = destroyImpl, }, }; } @@ -169,6 +222,22 @@ pub fn FilterAnti( } fn intersectImpl(_: *anyopaque, _: *const Tuple, _: *ValList) void {} + + fn cloneImpl(ptr: *anyopaque, alloc: Allocator) Allocator.Error!*anyopaque { + const self: *Self = @ptrCast(@alignCast(ptr)); + const copy = try alloc.create(Self); + copy.* = Self{ + .relation = self.relation, + .key_func = self.key_func, + .allocator = alloc, + }; + return @ptrCast(copy); + } + + fn destroyImpl(ptr: *anyopaque, alloc: Allocator) void { + const self: *Self = @ptrCast(@alignCast(ptr)); + alloc.destroy(self); + } }; } @@ -187,14 +256,16 @@ pub fn ExtendAnti( key_func: *const fn (*const Tuple) Key, allocator: Allocator, - pub fn init(allocator: Allocator, relation: *const Rel, key_func: *const fn (*const Tuple) Key) Self { + /// Initializes a new extend-anti leaper. + pub fn init(ctx: *ExecutionContext, relation: *const Rel, key_func: *const fn (*const Tuple) Key) Self { return Self{ .relation = relation, .key_func = key_func, - .allocator = allocator, + .allocator = ctx.allocator, }; } + /// Returns the type-erased leaper interface. pub fn leaper(self: *Self) LeaperType { return LeaperType{ .ptr = @ptrCast(self), @@ -203,6 +274,8 @@ pub fn ExtendAnti( .count = countImpl, .propose = proposeImpl, .intersect = intersectImpl, + .clone = cloneImpl, + .destroy = destroyImpl, }, }; } @@ -221,10 +294,12 @@ pub fn ExtendAnti( var write_idx: usize = 0; const range = findKeyRange(Key, Val, self.relation.elements, key); - const range_slice = self.relation.elements[range.start..][0..range.count]; + var slice: []const struct { Key, Val } = self.relation.elements[range.start..][0..range.count]; for (values.items) |val| { - if (!binarySearchVal(Key, Val, range_slice, val.*)) { + slice = gallopValHelper(Key, Val, slice, val.*); + const found = slice.len > 0 and std.math.order(slice[0][1], val.*) == .eq; + if (!found) { values.items[write_idx] = val; write_idx += 1; } @@ -232,13 +307,31 @@ pub fn ExtendAnti( values.shrinkRetainingCapacity(write_idx); } + + fn cloneImpl(ptr: *anyopaque, alloc: Allocator) Allocator.Error!*anyopaque { + const self: *Self = @ptrCast(@alignCast(ptr)); + const copy = try alloc.create(Self); + copy.* = Self{ + .relation = self.relation, + .key_func = self.key_func, + .allocator = alloc, + }; + return @ptrCast(copy); + } + + fn destroyImpl(ptr: *anyopaque, alloc: Allocator) void { + const self: *Self = @ptrCast(@alignCast(ptr)); + alloc.destroy(self); + } }; } +/// Extends a variable into another variable using leapers. pub fn extendInto( comptime Tuple: type, comptime Val: type, comptime Result: type, + ctx: *ExecutionContext, source: *Variable(Tuple), leapers: []Leaper(Tuple, Val), output: *Variable(Result), @@ -255,42 +348,161 @@ pub fn extendInto( var had_error = false; - for (source.recent.elements) |*tuple| { - var min_index: usize = std.math.maxInt(usize); - var min_count: usize = std.math.maxInt(usize); - - for (leapers, 0..) |leaper, i| { - const cnt = leaper.count(tuple); - if (cnt < min_count) { - min_count = cnt; - min_index = i; + if (ctx.pool != null and source.recent.elements.len > 0 and leapers.len > 0) { + const chunk: usize = 128; + const task_count = (source.recent.elements.len + chunk - 1) / chunk; + + const Task = struct { + slice: []const Tuple, + base_leapers: []Leaper(Tuple, Val), + leapers: []Leaper(Tuple, Val) = &[_]Leaper(Tuple, Val){}, + results: std.ArrayListUnmanaged(Result) = .{}, + had_error: bool = false, + logic_fn: *const fn (*const Tuple, *const Val) Result, + + fn run(task: *@This()) void { + var local_values = std.ArrayListUnmanaged(*const Val){}; + defer local_values.deinit(task.base_leapers[0].allocator); + + for (task.slice) |*tuple| { + const sentinel = std.math.maxInt(usize); + var min_index: usize = sentinel; + var min_count: usize = sentinel; + + for (task.leapers, 0..) |leaper, i| { + const cnt = leaper.count(tuple); + if (cnt < min_count) { + min_count = cnt; + min_index = i; + } + } + + if (min_index == sentinel or min_count == 0 or min_count == sentinel) continue; + + local_values.clearRetainingCapacity(); + var min_leaper = &task.leapers[min_index]; + min_leaper.had_error = false; + min_leaper.propose(tuple, &local_values); + + if (min_leaper.had_error) { + task.had_error = true; + break; + } + + for (task.leapers, 0..) |leaper, i| { + if (i != min_index) { + leaper.intersect(tuple, &local_values); + } + } + + for (local_values.items) |val| { + task.results.append(min_leaper.allocator, task.logic_fn(tuple, val)) catch { + task.had_error = true; + break; + }; + } + + if (task.had_error) break; + } } - } + }; - if (min_count == 0) continue; + const tasks = try ctx.allocator.alloc(Task, task_count); + defer ctx.allocator.free(tasks); + + var t: usize = 0; + while (t < task_count) : (t += 1) { + const start = t * chunk; + const end = @min(start + chunk, source.recent.elements.len); + tasks[t] = .{ + .slice = source.recent.elements[start..end], + .base_leapers = leapers, + .logic_fn = logic, + }; + } - values.clearRetainingCapacity(); - leapers[min_index].propose(tuple, &values); + var t_idx: usize = 0; + while (t_idx < task_count) : (t_idx += 1) { + const task = &tasks[t_idx]; + const clones = try ctx.allocator.alloc(Leaper(Tuple, Val), leapers.len); + var cloned: usize = 0; + errdefer { + for (clones[0..cloned]) |*leaper| { + leaper.deinit(); + } + ctx.allocator.free(clones); + } + var i: usize = 0; + while (i < leapers.len) : (i += 1) { + clones[i] = try leapers[i].clone(ctx.allocator); + cloned += 1; + } + task.leapers = clones; + } - if (leapers[min_index].had_error) { - had_error = true; - break; + if (ctx.pool) |*pool| { + var wg: std.Thread.WaitGroup = .{}; + for (tasks) |*task| { + pool.*.spawnWg(&wg, Task.run, .{task}); + } + wg.wait(); } - for (leapers, 0..) |leaper, i| { - if (i != min_index) { - leaper.intersect(tuple, &values); + for (tasks) |*task| { + for (task.leapers) |*leaper| { + leaper.deinit(); + } + ctx.allocator.free(task.leapers); + + defer task.results.deinit(output.allocator); + if (task.had_error) { + return error.OutOfMemory; + } + if (task.results.items.len > 0) { + try results.appendSlice(output.allocator, task.results.items); } } + } else { + for (source.recent.elements) |*tuple| { + const sentinel = std.math.maxInt(usize); + var min_index: usize = sentinel; + var min_count: usize = sentinel; + + for (leapers, 0..) |leaper, i| { + const cnt = leaper.count(tuple); + if (cnt < min_count) { + min_count = cnt; + min_index = i; + } + } + + if (min_index == sentinel or min_count == 0 or min_count == sentinel) continue; - for (values.items) |val| { - results.append(output.allocator, logic(tuple, val)) catch { + values.clearRetainingCapacity(); + var min_leaper = &leapers[min_index]; + min_leaper.had_error = false; + min_leaper.propose(tuple, &values); + + if (min_leaper.had_error) { had_error = true; break; - }; - } + } - if (had_error) break; + for (leapers, 0..) |leaper, i| { + if (i != min_index) { + leaper.intersect(tuple, &values); + } + } + + for (values.items) |val| { + results.append(output.allocator, logic(tuple, val)) catch { + had_error = true; + break; + }; + } + + if (had_error) break; + } } if (had_error) { @@ -298,7 +510,7 @@ pub fn extendInto( } if (results.items.len > 0) { - const rel = try Relation(Result).fromSlice(output.allocator, results.items); + const rel = try Relation(Result).fromSlice(ctx, results.items); try output.insert(rel); } } @@ -356,35 +568,51 @@ fn compareKV(comptime Key: type, comptime Val: type, a: struct { Key, Val }, b: return std.math.order(a[1], b[1]); } -/// Binary search for a value within a slice of (key,val) tuples. -/// Since tuples are sorted by (key, val), values within the same key range are sorted. -fn binarySearchVal(comptime Key: type, comptime Val: type, elements: []const struct { Key, Val }, target_val: Val) bool { - var lo: usize = 0; - var hi: usize = elements.len; +fn gallopValHelper(comptime Key: type, comptime Val: type, slice: []const struct { Key, Val }, target: Val) []const struct { Key, Val } { + if (slice.len == 0) return slice; + if (std.math.order(slice[0][1], target) != .lt) return slice; + + var step: usize = 1; + var pos: usize = 0; + + while (true) { + const next_pos = std.math.add(usize, pos, step) catch slice.len; + if (next_pos >= slice.len or next_pos < pos) break; + if (std.math.order(slice[next_pos][1], target) != .lt) break; + pos = next_pos; + const new_step = std.math.mul(usize, step, 2) catch std.math.maxInt(usize); + step = new_step; + } + + const end = @min(pos + step + 1, slice.len); + var lo = pos + 1; + var hi = end; while (lo < hi) { const mid = lo + (hi - lo) / 2; - switch (std.math.order(elements[mid][1], target_val)) { - .lt => lo = mid + 1, - .gt => hi = mid, - .eq => return true, + if (std.math.order(slice[mid][1], target) == .lt) { + lo = mid + 1; + } else { + hi = mid; } } - return false; + + return slice[lo..]; } test "ExtendWith: basic" { const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); const KV = struct { u32, u32 }; - var rel = try Relation(KV).fromSlice(allocator, &[_]KV{ + var rel = try Relation(KV).fromSlice(&ctx, &[_]KV{ .{ 1, 10 }, .{ 1, 11 }, .{ 2, 20 }, }); defer rel.deinit(); - var ext = ExtendWith(u32, u32, u32).init(allocator, &rel, struct { + var ext = ExtendWith(u32, u32, u32).init(&ctx, &rel, struct { fn f(t: *const u32) u32 { return t.*; } @@ -397,16 +625,17 @@ test "ExtendWith: basic" { test "FilterAnti: filters matching tuples" { const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); const KV = struct { u32, u32 }; const Tuple = struct { u32, u32 }; - var rel = try Relation(KV).fromSlice(allocator, &[_]KV{ + var rel = try Relation(KV).fromSlice(&ctx, &[_]KV{ .{ 1, 10 }, .{ 2, 20 }, }); defer rel.deinit(); - var filter = FilterAnti(Tuple, u32, u32).init(allocator, &rel, struct { + var filter = FilterAnti(Tuple, u32, u32).init(&ctx, &rel, struct { fn f(t: *const Tuple) KV { return .{ t[0], t[1] }; } @@ -421,16 +650,17 @@ test "FilterAnti: filters matching tuples" { test "ExtendAnti: proposes absent values" { const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); const KV = struct { u32, u32 }; // Key, Val // Relation contains {(1, 10), (1, 20)} - var rel = try Relation(KV).fromSlice(allocator, &[_]KV{ + var rel = try Relation(KV).fromSlice(&ctx, &[_]KV{ .{ 1, 10 }, .{ 1, 20 }, }); defer rel.deinit(); - var ext = ExtendAnti(u32, u32, u32).init(allocator, &rel, struct { + var ext = ExtendAnti(u32, u32, u32).init(&ctx, &rel, struct { fn f(t: *const u32) u32 { return t.*; } @@ -461,19 +691,20 @@ test "ExtendAnti: proposes absent values" { test "extendInto: leapfrog join" { const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); const Tuple = struct { u32 }; const Val = u32; // We are extending Tuple(u32) with a new u32 value // Pattern: R(x, y) :- A(x), B(x, y), C(x, y) // A provides x. B and C constrain y. - var A = Variable(Tuple).init(allocator); + var A = Variable(Tuple).init(&ctx); defer A.deinit(); - try A.insertSlice(&[_]Tuple{.{1}}); // x=1 + try A.insertSlice(&ctx, &[_]Tuple{.{1}}); // x=1 _ = try A.changed(); // B = {(1, 10), (1, 20), (1, 30)} - var R_B = try Relation(struct { u32, u32 }).fromSlice(allocator, &[_]struct { u32, u32 }{ + var R_B = try Relation(struct { u32, u32 }).fromSlice(&ctx, &[_]struct { u32, u32 }{ .{ 1, 10 }, .{ 1, 20 }, .{ 1, 30 }, @@ -481,24 +712,24 @@ test "extendInto: leapfrog join" { defer R_B.deinit(); // C = {(1, 20), (1, 30), (1, 40)} - var R_C = try Relation(struct { u32, u32 }).fromSlice(allocator, &[_]struct { u32, u32 }{ + var R_C = try Relation(struct { u32, u32 }).fromSlice(&ctx, &[_]struct { u32, u32 }{ .{ 1, 20 }, .{ 1, 30 }, .{ 1, 40 }, }); defer R_C.deinit(); - var output = Variable(struct { u32, u32 }).init(allocator); + var output = Variable(struct { u32, u32 }).init(&ctx); defer output.deinit(); // Leapers for B and C - var extB = ExtendWith(Tuple, u32, Val).init(allocator, &R_B, struct { + var extB = ExtendWith(Tuple, u32, Val).init(&ctx, &R_B, struct { fn f(t: *const Tuple) u32 { return t[0]; } }.f); - var extC = ExtendWith(Tuple, u32, Val).init(allocator, &R_C, struct { + var extC = ExtendWith(Tuple, u32, Val).init(&ctx, &R_C, struct { fn f(t: *const Tuple) u32 { return t[0]; } @@ -506,7 +737,7 @@ test "extendInto: leapfrog join" { var leapers = [_]Leaper(Tuple, Val){ extB.leaper(), extC.leaper() }; - try extendInto(Tuple, Val, struct { u32, u32 }, &A, &leapers, &output, struct { + try extendInto(Tuple, Val, struct { u32, u32 }, &ctx, &A, &leapers, &output, struct { fn logic(t: *const Tuple, v: *const Val) struct { u32, u32 } { return .{ t[0], v.* }; } @@ -519,3 +750,330 @@ test "extendInto: leapfrog join" { try std.testing.expectEqual(output.recent.elements[0][1], 20); try std.testing.expectEqual(output.recent.elements[1][1], 30); } + +test "extendInto: only anti leapers is harmless" { + const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); + const Tuple = struct { u32 }; + const Val = u32; + + var source = Variable(Tuple).init(&ctx); + defer source.deinit(); + + try source.insertSlice(&ctx, &[_]Tuple{.{1}}); + _ = try source.changed(); + + const KV = struct { u32, u32 }; + var rel = try Relation(KV).fromSlice(&ctx, &[_]KV{}); + defer rel.deinit(); + + var output = Variable(struct { u32, u32 }).init(&ctx); + defer output.deinit(); + + var ext = ExtendAnti(Tuple, u32, Val).init(&ctx, &rel, struct { + fn f(t: *const Tuple) u32 { + return t[0]; + } + }.f); + + var leapers = [_]Leaper(Tuple, Val){ext.leaper()}; + + try extendInto(Tuple, Val, struct { u32, u32 }, &ctx, &source, leapers[0..], &output, struct { + fn logic(t: *const Tuple, v: *const Val) struct { u32, u32 } { + return .{ t[0], v.* }; + } + }.logic); + + const changed = try output.changed(); + try std.testing.expect(!changed); + try std.testing.expectEqual(@as(usize, 0), output.recent.len()); +} + +test "ExtendWith: count zero does not propose values" { + const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); + const KV = struct { u32, u32 }; + const Tuple = u32; + + var rel = try Relation(KV).fromSlice(&ctx, &[_]KV{ + .{ 2, 20 }, + }); + defer rel.deinit(); + + var ext = ExtendWith(Tuple, u32, u32).init(&ctx, &rel, struct { + fn f(t: *const Tuple) u32 { + return t.*; + } + }.f); + + const tuple: Tuple = 1; + const cnt = ext.leaper().count(&tuple); + try std.testing.expectEqual(@as(usize, 0), cnt); + + var values = std.ArrayListUnmanaged(*const u32){}; + defer values.deinit(allocator); + var leaper = ext.leaper(); + leaper.propose(&tuple, &values); + try std.testing.expectEqual(@as(usize, 0), values.items.len); +} + +test "FilterAnti and ExtendAnti: empty relation" { + const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); + const Tuple = struct { u32 }; + const KV = struct { u32, u32 }; + + var rel = try Relation(KV).fromSlice(&ctx, &[_]KV{}); + defer rel.deinit(); + + var filter = FilterAnti(Tuple, u32, u32).init(&ctx, &rel, struct { + fn f(t: *const Tuple) KV { + return .{ t[0], 0 }; + } + }.f); + + var ext = ExtendAnti(Tuple, u32, u32).init(&ctx, &rel, struct { + fn f(t: *const Tuple) u32 { + return t[0]; + } + }.f); + + const tuple: Tuple = .{1}; + try std.testing.expectEqual(std.math.maxInt(usize), filter.leaper().count(&tuple)); + + const v10: u32 = 10; + const v20: u32 = 20; + var values = std.ArrayListUnmanaged(*const u32){}; + defer values.deinit(allocator); + try values.append(allocator, &v10); + try values.append(allocator, &v20); + + ext.leaper().intersect(&tuple, &values); + try std.testing.expectEqual(@as(usize, 2), values.items.len); +} + +test "extendInto: parallel" { + const allocator = std.testing.allocator; + var ctx = try ExecutionContext.initWithThreads(allocator, 2); + defer ctx.deinit(); + const Tuple = struct { u32 }; + const Val = u32; + + var A = Variable(Tuple).init(&ctx); + defer A.deinit(); + try A.insertSlice(&ctx, &[_]Tuple{.{1}}); + _ = try A.changed(); + + var R_B = try Relation(struct { u32, u32 }).fromSlice(&ctx, &[_]struct { u32, u32 }{ + .{ 1, 10 }, + .{ 1, 20 }, + .{ 1, 30 }, + }); + defer R_B.deinit(); + + var R_C = try Relation(struct { u32, u32 }).fromSlice(&ctx, &[_]struct { u32, u32 }{ + .{ 1, 20 }, + .{ 1, 30 }, + .{ 1, 40 }, + }); + defer R_C.deinit(); + + var output = Variable(struct { u32, u32 }).init(&ctx); + defer output.deinit(); + + var extB = ExtendWith(Tuple, u32, Val).init(&ctx, &R_B, struct { + fn f(t: *const Tuple) u32 { + return t[0]; + } + }.f); + + var extC = ExtendWith(Tuple, u32, Val).init(&ctx, &R_C, struct { + fn f(t: *const Tuple) u32 { + return t[0]; + } + }.f); + + var leapers = [_]Leaper(Tuple, Val){ extB.leaper(), extC.leaper() }; + + try extendInto(Tuple, Val, struct { u32, u32 }, &ctx, &A, &leapers, &output, struct { + fn logic(t: *const Tuple, v: *const Val) struct { u32, u32 } { + return .{ t[0], v.* }; + } + }.logic); + + _ = try output.changed(); + try std.testing.expectEqual(@as(usize, 2), output.recent.len()); + try std.testing.expectEqual(output.recent.elements[0][1], 20); + try std.testing.expectEqual(output.recent.elements[1][1], 30); +} + +test "extendInto: clone failure cleans up" { + const allocator = std.testing.allocator; + var ctx = try ExecutionContext.initWithThreads(allocator, 2); + defer ctx.deinit(); + + const Tuple = struct { u32 }; + const Val = u32; + + var source = Variable(Tuple).init(&ctx); + defer source.deinit(); + + try source.insertSlice(&ctx, &[_]Tuple{.{1}}); + _ = try source.changed(); + + const Counter = struct { + clones: usize = 0, + destroys: usize = 0, + fail_after: usize = 0, + }; + + const State = struct { + counter: *Counter, + value: Val, + }; + + const VTable = Leaper(Tuple, Val).VTable; + + const Impl = struct { + fn count(ptr: *anyopaque, _: *const Tuple) usize { + _ = ptr; + return 1; + } + + fn propose(ptr: *anyopaque, _: *const Tuple, alloc: Allocator, values: *std.ArrayListUnmanaged(*const Val), had_error: *bool) void { + const state: *State = @ptrCast(@alignCast(ptr)); + values.append(alloc, &state.value) catch { + had_error.* = true; + return; + }; + } + + fn intersect(_: *anyopaque, _: *const Tuple, _: *std.ArrayListUnmanaged(*const Val)) void {} + + fn clone(ptr: *anyopaque, alloc: Allocator) Allocator.Error!*anyopaque { + const state: *State = @ptrCast(@alignCast(ptr)); + if (state.counter.clones >= state.counter.fail_after) return error.OutOfMemory; + const new_state = try alloc.create(State); + new_state.* = .{ .counter = state.counter, .value = state.value }; + state.counter.clones += 1; + return @ptrCast(new_state); + } + + fn destroy(ptr: *anyopaque, alloc: Allocator) void { + const state: *State = @ptrCast(@alignCast(ptr)); + state.counter.destroys += 1; + alloc.destroy(state); + } + }; + + var counter = Counter{ .fail_after = 1 }; + + const makeLeaper = struct { + fn make(alloc: Allocator, counter_ptr: *Counter, value: Val) !Leaper(Tuple, Val) { + const state = try alloc.create(State); + state.* = .{ .counter = counter_ptr, .value = value }; + return .{ + .ptr = @ptrCast(state), + .allocator = alloc, + .vtable = &VTable{ + .count = Impl.count, + .propose = Impl.propose, + .intersect = Impl.intersect, + .clone = Impl.clone, + .destroy = Impl.destroy, + }, + }; + } + }; + + var leaper1 = try makeLeaper.make(allocator, &counter, 10); + defer leaper1.deinit(); + var leaper2 = try makeLeaper.make(allocator, &counter, 20); + defer leaper2.deinit(); + + var leapers = [_]Leaper(Tuple, Val){ leaper1, leaper2 }; + + var output = Variable(struct { u32, u32 }).init(&ctx); + defer output.deinit(); + + try std.testing.expectError(error.OutOfMemory, extendInto(Tuple, Val, struct { u32, u32 }, &ctx, &source, leapers[0..], &output, struct { + fn logic(t: *const Tuple, v: *const Val) struct { u32, u32 } { + return .{ t[0], v.* }; + } + }.logic)); + + try std.testing.expectEqual(counter.clones, counter.destroys); +} + +test "Leaper clone uses clone allocator" { + const allocator = std.testing.allocator; + + const CountingAlloc = struct { + const Self = @This(); + const Align = std.mem.Alignment; + + backing: Allocator, + alloc_count: usize = 0, + free_count: usize = 0, + + fn wrap(self: *Self) Allocator { + return Allocator{ + .ptr = self, + .vtable = &.{ + .alloc = allocFn, + .resize = resizeFn, + .remap = remapFn, + .free = freeFn, + }, + }; + } + + fn allocFn(ctx: *anyopaque, len: usize, alignment: Align, ret_addr: usize) ?[*]u8 { + const self: *Self = @ptrCast(@alignCast(ctx)); + self.alloc_count += 1; + return self.backing.vtable.alloc(self.backing.ptr, len, alignment, ret_addr); + } + + fn resizeFn(ctx: *anyopaque, buf: []u8, alignment: Align, new_len: usize, ret_addr: usize) bool { + const self: *Self = @ptrCast(@alignCast(ctx)); + return self.backing.vtable.resize(self.backing.ptr, buf, alignment, new_len, ret_addr); + } + + fn remapFn(ctx: *anyopaque, buf: []u8, alignment: Align, new_len: usize, ret_addr: usize) ?[*]u8 { + const self: *Self = @ptrCast(@alignCast(ctx)); + return self.backing.vtable.remap(self.backing.ptr, buf, alignment, new_len, ret_addr); + } + + fn freeFn(ctx: *anyopaque, buf: []u8, alignment: Align, ret_addr: usize) void { + const self: *Self = @ptrCast(@alignCast(ctx)); + self.free_count += 1; + self.backing.vtable.free(self.backing.ptr, buf, alignment, ret_addr); + } + }; + + var base_count = CountingAlloc{ .backing = allocator }; + var clone_count = CountingAlloc{ .backing = allocator }; + + var ctx = ExecutionContext.init(base_count.wrap()); + const KV = struct { u32, u32 }; + + var rel = try Relation(KV).fromSlice(&ctx, &[_]KV{.{ 1, 10 }}); + defer rel.deinit(); + + var ext = ExtendWith(u32, u32, u32).init(&ctx, &rel, struct { + fn f(t: *const u32) u32 { + return t.*; + } + }.f); + + var leaper = ext.leaper(); + + const alloc_before = clone_count.alloc_count; + const free_before = clone_count.free_count; + + var cloned = try leaper.clone(clone_count.wrap()); + cloned.deinit(); + + try std.testing.expectEqual(alloc_before + 1, clone_count.alloc_count); + try std.testing.expectEqual(free_before + 1, clone_count.free_count); +} diff --git a/src/zodd/index.zig b/src/zodd/index.zig index a2e80d8..f607435 100644 --- a/src/zodd/index.zig +++ b/src/zodd/index.zig @@ -4,6 +4,7 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const ordered = @import("ordered"); const Relation = @import("relation.zig").Relation; +const ExecutionContext = @import("context.zig").ExecutionContext; pub fn SecondaryIndex( comptime Tuple: type, @@ -16,16 +17,23 @@ pub fn SecondaryIndex( const Self = @This(); const Map = ordered.BTreeMap(Key, Relation(Tuple), key_compare, BRANCHING_FACTOR); + /// Underlying B-tree map. map: Map, + /// Allocator for the index. allocator: Allocator, + /// Execution context. + ctx: *ExecutionContext, - pub fn init(allocator: Allocator) Self { + /// Initializes a new secondary index. + pub fn init(ctx: *ExecutionContext) Self { return Self{ - .map = Map.init(allocator), - .allocator = allocator, + .map = Map.init(ctx.allocator), + .allocator = ctx.allocator, + .ctx = ctx, }; } + /// Deinitializes the index. pub fn deinit(self: *Self) void { var iter = self.map.iterator() catch return; defer iter.deinit(); @@ -36,33 +44,35 @@ pub fn SecondaryIndex( self.map.deinit(); } + /// Inserts a tuple into the index. pub fn insert(self: *Self, tuple: Tuple) !void { const key = key_extractor(tuple); if (self.map.getPtr(key)) |rel_ptr| { - const single = try Relation(Tuple).fromSlice(self.allocator, &[_]Tuple{tuple}); + const single = try Relation(Tuple).fromSlice(self.ctx, &[_]Tuple{tuple}); var mutable_single = single; - const new_rel = try rel_ptr.merge(&mutable_single); + errdefer mutable_single.deinit(); + var old_rel = rel_ptr.*; + const new_rel = try old_rel.merge(&mutable_single); rel_ptr.* = new_rel; } else { - const rel = try Relation(Tuple).fromSlice(self.allocator, &[_]Tuple{tuple}); + const rel = try Relation(Tuple).fromSlice(self.ctx, &[_]Tuple{tuple}); try self.map.put(key, rel); } } - /// Bulk insert multiple tuples + /// Bulk insert multiple tuples. pub fn insertSlice(self: *Self, tuples: []const Tuple) !void { for (tuples) |t| { try self.insert(t); } } - pub fn get(self: *const Self, key: Key) ?Relation(Tuple) { - if (self.map.get(key)) |rel| { - return rel.*; - } - return null; + /// Returns the relation for a given key. + pub fn get(self: *const Self, key: Key) ?*const Relation(Tuple) { + return self.map.get(key); } + /// Returns a relation covering the range [start_key, end_key). pub fn getRange(self: *Self, start_key: Key, end_key: Key) !Relation(Tuple) { var iter = try self.map.iterator(); defer iter.deinit(); @@ -81,7 +91,7 @@ pub fn SecondaryIndex( try result_tuples.appendSlice(self.allocator, entry.value.elements); } - return Relation(Tuple).fromSlice(self.allocator, result_tuples.items); + return Relation(Tuple).fromSlice(self.ctx, result_tuples.items); } }; } @@ -92,6 +102,7 @@ fn u32Compare(a: u32, b: u32) std.math.Order { test "SecondaryIndex: basic usage" { const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); const Tuple = struct { u32, u32 }; const Index = SecondaryIndex(Tuple, u32, struct { @@ -100,7 +111,7 @@ test "SecondaryIndex: basic usage" { } }.extract, u32Compare, 4); - var idx = Index.init(allocator); + var idx = Index.init(&ctx); defer idx.deinit(); try idx.insert(.{ 1, 10 }); @@ -119,3 +130,29 @@ test "SecondaryIndex: basic usage" { defer range_rel.deinit(); try std.testing.expectEqual(@as(usize, 3), range_rel.len()); } + +test "SecondaryIndex: getRange empty and inverted" { + const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); + const Tuple = struct { u32, u32 }; + + const Index = SecondaryIndex(Tuple, u32, struct { + fn extract(t: Tuple) u32 { + return t[0]; + } + }.extract, u32Compare, 4); + + var idx = Index.init(&ctx); + defer idx.deinit(); + + try idx.insert(.{ 1, 10 }); + try idx.insert(.{ 3, 30 }); + + var empty = try idx.getRange(4, 5); + defer empty.deinit(); + try std.testing.expectEqual(@as(usize, 0), empty.len()); + + var inverted = try idx.getRange(5, 4); + defer inverted.deinit(); + try std.testing.expectEqual(@as(usize, 0), inverted.len()); +} diff --git a/src/zodd/iteration.zig b/src/zodd/iteration.zig index 159693f..3f97db5 100644 --- a/src/zodd/iteration.zig +++ b/src/zodd/iteration.zig @@ -4,6 +4,7 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const Variable = @import("variable.zig").Variable; const Relation = @import("relation.zig").Relation; +const ExecutionContext = @import("context.zig").ExecutionContext; pub fn Iteration(comptime Tuple: type) type { return struct { @@ -11,20 +12,29 @@ pub fn Iteration(comptime Tuple: type) type { const Var = Variable(Tuple); const VarList = std.ArrayListUnmanaged(*Var); + /// List of variables in the iteration. variables: VarList, + /// Allocator for the iteration. allocator: Allocator, + /// Execution context. + ctx: *ExecutionContext, + /// Maximum number of iterations allowed. max_iterations: usize, + /// Current iteration count. current_iteration: usize, - pub fn init(allocator: Allocator, max_iterations: ?usize) Self { + /// Initializes a new iteration. + pub fn init(ctx: *ExecutionContext, max_iterations: ?usize) Self { return Self{ .variables = VarList{}, - .allocator = allocator, + .allocator = ctx.allocator, + .ctx = ctx, .max_iterations = max_iterations orelse std.math.maxInt(usize), .current_iteration = 0, }; } + /// Deinitializes the iteration. pub fn deinit(self: *Self) void { for (self.variables.items) |v| { v.deinit(); @@ -33,19 +43,27 @@ pub fn Iteration(comptime Tuple: type) type { self.variables.deinit(self.allocator); } + /// Creates a new variable associated with this iteration. pub fn variable(self: *Self) Allocator.Error!*Var { const v = try self.allocator.create(Var); - v.* = Var.init(self.allocator); + v.* = Var.init(self.ctx); try self.variables.append(self.allocator, v); return v; } + /// Runs one step of the iteration and returns true if any variable changed. pub fn changed(self: *Self) !bool { if (self.current_iteration >= self.max_iterations) { return error.MaxIterationsExceeded; } self.current_iteration += 1; + if (self.ctx.pool) |pool| { + if (self.variables.items.len > 1) { + return self.changedParallel(pool); + } + } + var any_changed = false; for (self.variables.items) |v| { if (try v.changed()) { @@ -55,6 +73,41 @@ pub fn Iteration(comptime Tuple: type) type { return any_changed; } + fn changedParallel(self: *Self, pool: *std.Thread.Pool) !bool { + const count = self.variables.items.len; + const Task = struct { + var_ptr: *Var, + changed: bool = false, + err: ?anyerror = null, + + fn run(task: *@This()) void { + task.changed = task.var_ptr.changed() catch |err| { + task.err = err; + return; + }; + } + }; + + const tasks = try self.allocator.alloc(Task, count); + defer self.allocator.free(tasks); + + var wg: std.Thread.WaitGroup = .{}; + for (self.variables.items, 0..) |v, i| { + tasks[i] = .{ .var_ptr = v }; + pool.spawnWg(&wg, Task.run, .{&tasks[i]}); + } + + wg.wait(); + + var any_changed = false; + for (tasks) |task| { + if (task.err) |err| return err; + if (task.changed) any_changed = true; + } + return any_changed; + } + + /// Resets the iteration state. pub fn reset(self: *Self) void { self.current_iteration = 0; } @@ -63,15 +116,16 @@ pub fn Iteration(comptime Tuple: type) type { test "Iteration: basic usage" { const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); - var iter = Iteration(u32).init(allocator, null); + var iter = Iteration(u32).init(&ctx, null); defer iter.deinit(); const v1 = try iter.variable(); const v2 = try iter.variable(); - try v1.insertSlice(&[_]u32{ 1, 2, 3 }); - try v2.insertSlice(&[_]u32{ 4, 5 }); + try v1.insertSlice(&ctx, &[_]u32{ 1, 2, 3 }); + try v2.insertSlice(&ctx, &[_]u32{ 4, 5 }); const changed1 = try iter.changed(); try std.testing.expect(changed1); @@ -82,12 +136,13 @@ test "Iteration: basic usage" { test "Iteration: recursion limit" { const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); - var iter = Iteration(u32).init(allocator, 1); + var iter = Iteration(u32).init(&ctx, 1); defer iter.deinit(); const v = try iter.variable(); - try v.insertSlice(&[_]u32{1}); + try v.insertSlice(&ctx, &[_]u32{1}); _ = try iter.changed(); @@ -96,11 +151,12 @@ test "Iteration: recursion limit" { test "Iteration: reset" { const allocator = std.testing.allocator; - var iter = Iteration(u32).init(allocator, 10); + var ctx = ExecutionContext.init(allocator); + var iter = Iteration(u32).init(&ctx, 10); defer iter.deinit(); const v = try iter.variable(); - try v.insertSlice(&[_]u32{1}); + try v.insertSlice(&ctx, &[_]u32{1}); // Run some iterations _ = try iter.changed(); @@ -113,3 +169,44 @@ test "Iteration: reset" { _ = try iter.changed(); try std.testing.expectEqual(@as(usize, 1), iter.current_iteration); } + +test "Iteration: reset without new data" { + const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); + + var iter = Iteration(u32).init(&ctx, 10); + defer iter.deinit(); + + const v = try iter.variable(); + try v.insertSlice(&ctx, &[_]u32{1}); + + _ = try iter.changed(); + const changed2 = try iter.changed(); + try std.testing.expect(!changed2); + + iter.reset(); + + const changed3 = try iter.changed(); + try std.testing.expect(!changed3); +} + +test "Iteration: parallel changed" { + const allocator = std.testing.allocator; + var ctx = try ExecutionContext.initWithThreads(allocator, 2); + defer ctx.deinit(); + + var iter = Iteration(u32).init(&ctx, null); + defer iter.deinit(); + + const v1 = try iter.variable(); + const v2 = try iter.variable(); + + try v1.insertSlice(&ctx, &[_]u32{ 1, 2, 3 }); + try v2.insertSlice(&ctx, &[_]u32{ 4, 5 }); + + const changed1 = try iter.changed(); + try std.testing.expect(changed1); + + const changed2 = try iter.changed(); + try std.testing.expect(!changed2); +} diff --git a/src/zodd/join.zig b/src/zodd/join.zig index a032dfb..de4f5b5 100644 --- a/src/zodd/join.zig +++ b/src/zodd/join.zig @@ -5,7 +5,9 @@ const Allocator = std.mem.Allocator; const Relation = @import("relation.zig").Relation; const Variable = @import("variable.zig").Variable; const gallop = @import("variable.zig").gallop; +const ExecutionContext = @import("context.zig").ExecutionContext; +/// Helper function for joining two sorted relations. pub fn joinHelper( comptime Key: type, comptime Val1: type, @@ -66,9 +68,13 @@ fn gallopKey(comptime Key: type, comptime Val: type, slice: []const struct { Key var step: usize = 1; var pos: usize = 0; - while (pos + step < slice.len and std.math.order(slice[pos + step][0], target_key) == .lt) { - pos += step; - step *= 2; + while (true) { + const next_pos = std.math.add(usize, pos, step) catch slice.len; + if (next_pos >= slice.len or next_pos < pos) break; + if (std.math.order(slice[next_pos][0], target_key) != .lt) break; + pos = next_pos; + const new_step = std.math.mul(usize, step, 2) catch std.math.maxInt(usize); + step = new_step; } const end = @min(pos + step + 1, slice.len); @@ -87,11 +93,13 @@ fn gallopKey(comptime Key: type, comptime Val: type, slice: []const struct { Key return slice[lo..]; } +/// Joins two variables and inserts the result into an output variable. pub fn joinInto( comptime Key: type, comptime Val1: type, comptime Val2: type, comptime Result: type, + ctx: *ExecutionContext, input1: *Variable(struct { Key, Val1 }), input2: *Variable(struct { Key, Val2 }), output: *Variable(Result), @@ -115,33 +123,101 @@ pub fn joinInto( }; var had_error = false; - const ctx = Context{ .results = &results, .alloc = output.allocator, .logic = &logic, .had_error = &had_error }; + const cb_ctx = Context{ .results = &results, .alloc = output.allocator, .logic = &logic, .had_error = &had_error }; + + if (ctx.pool != null and (input1.stable.items.len + input2.stable.items.len) > 0) { + const Task = struct { + left: *const Relation(struct { Key, Val1 }), + right: *const Relation(struct { Key, Val2 }), + results: std.ArrayListUnmanaged(Result) = .{}, + alloc: Allocator, + had_error: bool = false, + + fn run(task: *@This()) void { + const TaskContext = struct { + results: *std.ArrayListUnmanaged(Result), + alloc: Allocator, + logic: *const fn (*const Key, *const Val1, *const Val2) Result, + had_error: *bool, + + fn callback(self: @This(), key: *const Key, v1: *const Val1, v2: *const Val2) void { + self.results.append(self.alloc, self.logic(key, v1, v2)) catch { + self.had_error.* = true; + }; + } + }; - for (input2.stable.items) |*batch2| { - joinHelper(Key, Val1, Val2, &input1.recent, batch2, ctx, Context.callback); - } + const ctx_local = TaskContext{ + .results = &task.results, + .alloc = task.alloc, + .logic = &logic, + .had_error = &task.had_error, + }; + + joinHelper(Key, Val1, Val2, task.left, task.right, ctx_local, TaskContext.callback); + } + }; + + const task_count = input1.stable.items.len + input2.stable.items.len; + const tasks = try ctx.allocator.alloc(Task, task_count); + defer ctx.allocator.free(tasks); + + var idx: usize = 0; + for (input2.stable.items) |*batch2| { + tasks[idx] = .{ .left = &input1.recent, .right = batch2, .alloc = output.allocator }; + idx += 1; + } + for (input1.stable.items) |*batch1| { + tasks[idx] = .{ .left = batch1, .right = &input2.recent, .alloc = output.allocator }; + idx += 1; + } + + if (ctx.pool) |*pool| { + var wg: std.Thread.WaitGroup = .{}; + for (tasks) |*task| { + pool.*.spawnWg(&wg, Task.run, .{task}); + } + wg.wait(); + } + + for (tasks) |*task| { + defer task.results.deinit(output.allocator); + if (task.had_error) { + return error.OutOfMemory; + } + if (task.results.items.len > 0) { + try results.appendSlice(output.allocator, task.results.items); + } + } + } else { + for (input2.stable.items) |*batch2| { + joinHelper(Key, Val1, Val2, &input1.recent, batch2, cb_ctx, Context.callback); + } - for (input1.stable.items) |*batch1| { - joinHelper(Key, Val1, Val2, batch1, &input2.recent, ctx, Context.callback); + for (input1.stable.items) |*batch1| { + joinHelper(Key, Val1, Val2, batch1, &input2.recent, cb_ctx, Context.callback); + } } - joinHelper(Key, Val1, Val2, &input1.recent, &input2.recent, ctx, Context.callback); + joinHelper(Key, Val1, Val2, &input1.recent, &input2.recent, cb_ctx, Context.callback); if (had_error) { return error.OutOfMemory; } if (results.items.len > 0) { - const rel = try Relation(Result).fromSlice(output.allocator, results.items); + const rel = try Relation(Result).fromSlice(ctx, results.items); try output.insert(rel); } } +/// Performs an anti-join between a variable and a filter variable. pub fn joinAnti( comptime Key: type, comptime Val: type, comptime FilterVal: type, comptime Result: type, + ctx: *ExecutionContext, input: *Variable(struct { Key, Val }), filter: *Variable(struct { Key, FilterVal }), output: *Variable(Result), @@ -151,28 +227,111 @@ pub fn joinAnti( var results = ResultList{}; defer results.deinit(output.allocator); - for (input.recent.elements) |*tuple| { - const key = tuple[0]; - var found = false; + if (ctx.pool != null and input.recent.elements.len > 0) { + const Task = struct { + slice: []const struct { Key, Val }, + filter: *const Variable(struct { Key, FilterVal }), + results: std.ArrayListUnmanaged(Result) = .{}, + alloc: Allocator, + logic: *const fn (*const Key, *const Val) Result, + had_error: bool = false, + + fn run(task: *@This()) void { + for (task.slice) |tuple| { + const key = tuple[0]; + var found = false; + + { + const slice = gallopKey(Key, FilterVal, task.filter.recent.elements, key); + if (slice.len > 0 and countMatchingKeys(Key, FilterVal, slice, key) > 0) { + found = true; + } + } - if (countMatchingKeys(Key, FilterVal, filter.recent.elements, key) > 0) { - found = true; - } else { - for (filter.stable.items) |*batch| { - if (countMatchingKeys(Key, FilterVal, batch.elements, key) > 0) { - found = true; - break; + if (!found) { + for (task.filter.stable.items) |*batch| { + const slice = gallopKey(Key, FilterVal, batch.elements, key); + if (slice.len > 0 and countMatchingKeys(Key, FilterVal, slice, key) > 0) { + found = true; + break; + } + } + } + + if (!found) { + task.results.append(task.alloc, task.logic(&key, &tuple[1])) catch { + task.had_error = true; + return; + }; + } } } + }; + + const chunk: usize = 128; + const task_count = (input.recent.elements.len + chunk - 1) / chunk; + const tasks = try ctx.allocator.alloc(Task, task_count); + defer ctx.allocator.free(tasks); + + var i: usize = 0; + while (i < task_count) : (i += 1) { + const start = i * chunk; + const end = @min(start + chunk, input.recent.elements.len); + tasks[i] = .{ + .slice = input.recent.elements[start..end], + .filter = filter, + .alloc = output.allocator, + .logic = &logic, + }; + } + + if (ctx.pool) |*pool| { + var wg: std.Thread.WaitGroup = .{}; + for (tasks) |*task| { + pool.*.spawnWg(&wg, Task.run, .{task}); + } + wg.wait(); + } + + for (tasks) |*task| { + defer task.results.deinit(output.allocator); + if (task.had_error) { + return error.OutOfMemory; + } + if (task.results.items.len > 0) { + try results.appendSlice(output.allocator, task.results.items); + } } + } else { + for (input.recent.elements) |tuple| { + const key = tuple[0]; + var found = false; + + { + const slice = gallopKey(Key, FilterVal, filter.recent.elements, key); + if (slice.len > 0 and countMatchingKeys(Key, FilterVal, slice, key) > 0) { + found = true; + } + } + + if (!found) { + for (filter.stable.items) |*batch| { + const slice = gallopKey(Key, FilterVal, batch.elements, key); + if (slice.len > 0 and countMatchingKeys(Key, FilterVal, slice, key) > 0) { + found = true; + break; + } + } + } - if (!found) { - try results.append(output.allocator, logic(&key, &tuple[1])); + if (!found) { + try results.append(output.allocator, logic(&key, &tuple[1])); + } } } if (results.items.len > 0) { - const rel = try Relation(Result).fromSlice(output.allocator, results.items); + const rel = try Relation(Result).fromSlice(ctx, results.items); try output.insert(rel); } } @@ -182,15 +341,16 @@ test "joinHelper: basic" { const Tuple2 = struct { u32, u32 }; const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); - var input1 = try Relation(Tuple1).fromSlice(allocator, &[_]Tuple1{ + var input1 = try Relation(Tuple1).fromSlice(&ctx, &[_]Tuple1{ .{ 1, 10 }, .{ 2, 20 }, .{ 3, 30 }, }); defer input1.deinit(); - var input2 = try Relation(Tuple2).fromSlice(allocator, &[_]Tuple2{ + var input2 = try Relation(Tuple2).fromSlice(&ctx, &[_]Tuple2{ .{ 2, 200 }, .{ 3, 300 }, .{ 3, 301 }, @@ -218,24 +378,25 @@ test "joinHelper: basic" { test "joinInto: variable join" { const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); const Tuple = struct { u32, u32 }; - var v1 = Variable(Tuple).init(allocator); + var v1 = Variable(Tuple).init(&ctx); defer v1.deinit(); - var v2 = Variable(Tuple).init(allocator); + var v2 = Variable(Tuple).init(&ctx); defer v2.deinit(); - var output = Variable(struct { u32, u32, u32 }).init(allocator); + var output = Variable(struct { u32, u32, u32 }).init(&ctx); defer output.deinit(); - try v1.insertSlice(&[_]Tuple{ .{ 1, 10 }, .{ 2, 20 } }); - try v2.insertSlice(&[_]Tuple{ .{ 2, 200 }, .{ 3, 300 } }); + try v1.insertSlice(&ctx, &[_]Tuple{ .{ 1, 10 }, .{ 2, 20 } }); + try v2.insertSlice(&ctx, &[_]Tuple{ .{ 2, 200 }, .{ 3, 300 } }); _ = try v1.changed(); _ = try v2.changed(); - try joinInto(u32, u32, u32, struct { u32, u32, u32 }, &v1, &v2, &output, struct { + try joinInto(u32, u32, u32, struct { u32, u32, u32 }, &ctx, &v1, &v2, &output, struct { fn logic(key: *const u32, v1_val: *const u32, v2_val: *const u32) struct { u32, u32, u32 } { return .{ key.*, v1_val.*, v2_val.* }; } @@ -247,24 +408,25 @@ test "joinInto: variable join" { test "joinAnti: simple negation" { const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); const Tuple = struct { u32, u32 }; - var input = Variable(Tuple).init(allocator); + var input = Variable(Tuple).init(&ctx); defer input.deinit(); - var filter = Variable(Tuple).init(allocator); + var filter = Variable(Tuple).init(&ctx); defer filter.deinit(); - var output = Variable(Tuple).init(allocator); + var output = Variable(Tuple).init(&ctx); defer output.deinit(); - try input.insertSlice(&[_]Tuple{ .{ 1, 10 }, .{ 2, 20 }, .{ 3, 30 } }); - try filter.insertSlice(&[_]Tuple{.{ 2, 200 }}); + try input.insertSlice(&ctx, &[_]Tuple{ .{ 1, 10 }, .{ 2, 20 }, .{ 3, 30 } }); + try filter.insertSlice(&ctx, &[_]Tuple{.{ 2, 200 }}); _ = try input.changed(); _ = try filter.changed(); - try joinAnti(u32, u32, u32, Tuple, &input, &filter, &output, struct { + try joinAnti(u32, u32, u32, Tuple, &ctx, &input, &filter, &output, struct { fn logic(key: *const u32, val: *const u32) Tuple { return .{ key.*, val.* }; } @@ -276,3 +438,108 @@ test "joinAnti: simple negation" { try std.testing.expectEqual(output.recent.elements[0][0], 1); try std.testing.expectEqual(output.recent.elements[1][0], 3); } + +test "joinHelper: multiplicative matches" { + const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); + const T1 = struct { u32, u32 }; + const T2 = struct { u32, u32 }; + + var input1 = try Relation(T1).fromSlice(&ctx, &[_]T1{ + .{ 1, 10 }, + .{ 1, 11 }, + .{ 2, 20 }, + }); + defer input1.deinit(); + + var input2 = try Relation(T2).fromSlice(&ctx, &[_]T2{ + .{ 1, 100 }, + .{ 1, 101 }, + .{ 2, 200 }, + }); + defer input2.deinit(); + + const ResultList = std.ArrayListUnmanaged(struct { u32, u32, u32 }); + const Context = struct { + results: *ResultList, + alloc: Allocator, + + fn callback(self: @This(), key: *const u32, v1: *const u32, v2: *const u32) void { + self.results.append(self.alloc, .{ key.*, v1.*, v2.* }) catch {}; + } + }; + + var results = ResultList{}; + defer results.deinit(allocator); + + joinHelper(u32, u32, u32, &input1, &input2, Context{ .results = &results, .alloc = allocator }, Context.callback); + + try std.testing.expectEqual(@as(usize, 5), results.items.len); +} + +test "joinInto: stable batches only" { + const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); + const Tuple = struct { u32, u32 }; + + var v1 = Variable(Tuple).init(&ctx); + defer v1.deinit(); + + var v2 = Variable(Tuple).init(&ctx); + defer v2.deinit(); + + var output = Variable(struct { u32, u32, u32 }).init(&ctx); + defer output.deinit(); + + try v1.insertSlice(&ctx, &[_]Tuple{.{ 1, 10 }}); + _ = try v1.changed(); + _ = try v1.changed(); + + try v2.insertSlice(&ctx, &[_]Tuple{ .{ 1, 100 }, .{ 2, 200 } }); + _ = try v2.changed(); + _ = try v2.changed(); + + try joinInto(u32, u32, u32, struct { u32, u32, u32 }, &ctx, &v1, &v2, &output, struct { + fn logic(key: *const u32, v1_val: *const u32, v2_val: *const u32) struct { u32, u32, u32 } { + return .{ key.*, v1_val.*, v2_val.* }; + } + }.logic); + + const changed = try output.changed(); + try std.testing.expect(!changed); + try std.testing.expectEqual(@as(usize, 0), output.recent.len()); +} + +test "joinAnti: parallel" { + const allocator = std.testing.allocator; + var ctx = try ExecutionContext.initWithThreads(allocator, 2); + defer ctx.deinit(); + const Tuple = struct { u32, u32 }; + + var input = Variable(Tuple).init(&ctx); + defer input.deinit(); + + var filter = Variable(Tuple).init(&ctx); + defer filter.deinit(); + + var output = Variable(Tuple).init(&ctx); + defer output.deinit(); + + try input.insertSlice(&ctx, &[_]Tuple{ .{ 1, 10 }, .{ 2, 20 }, .{ 3, 30 }, .{ 4, 40 } }); + try filter.insertSlice(&ctx, &[_]Tuple{ .{ 2, 200 }, .{ 4, 400 } }); + + _ = try input.changed(); + _ = try filter.changed(); + + try joinAnti(u32, u32, u32, Tuple, &ctx, &input, &filter, &output, struct { + fn logic(key: *const u32, val: *const u32) Tuple { + return .{ key.*, val.* }; + } + }.logic); + + _ = try output.changed(); + + try std.testing.expectEqual(@as(usize, 2), output.recent.len()); + try std.testing.expectEqual(@as(u32, 1), output.recent.elements[0][0]); + try std.testing.expectEqual(@as(u32, 3), output.recent.elements[1][0]); +} diff --git a/src/zodd/relation.zig b/src/zodd/relation.zig index 81238f3..27c4e5d 100644 --- a/src/zodd/relation.zig +++ b/src/zodd/relation.zig @@ -1,53 +1,128 @@ -//! Core Relation data structure: a sorted list of unique tuples. +//! Core relation data structure: sorted list of unique tuples. const std = @import("std"); const mem = std.mem; const sort = std.sort; const Allocator = mem.Allocator; +const ExecutionContext = @import("context.zig").ExecutionContext; pub fn Relation(comptime Tuple: type) type { return struct { const Self = @This(); + /// Elements of the relation. elements: []Tuple, + /// Allocator for the relation. allocator: Allocator, + /// Execution context. + ctx: *ExecutionContext, - pub fn fromSlice(allocator: Allocator, input: []const Tuple) Allocator.Error!Self { + /// Creates a relation from a slice of tuples. + pub fn fromSlice(ctx: *ExecutionContext, input: []const Tuple) Allocator.Error!Self { if (input.len == 0) { return Self{ .elements = &[_]Tuple{}, - .allocator = allocator, + .allocator = ctx.allocator, + .ctx = ctx, }; } - const elements = try allocator.alloc(Tuple, input.len); - @memcpy(elements, input); + const elements = try ctx.allocator.alloc(Tuple, input.len); + if (ctx.pool) |pool| { + const chunk: usize = 1024; + const task_count = (input.len + chunk - 1) / chunk; + const Task = struct { + start: usize, + end: usize, + input: []const Tuple, + output: []Tuple, + + fn run(task: *@This()) void { + const size = task.end - task.start; + if (size == 0) return; + @memcpy(task.output[task.start..task.end], task.input[task.start..task.end]); + } + }; + + const tasks = try ctx.allocator.alloc(Task, task_count); + defer ctx.allocator.free(tasks); + + var wg: std.Thread.WaitGroup = .{}; + var t: usize = 0; + while (t < task_count) : (t += 1) { + const start = t * chunk; + const end = @min(start + chunk, input.len); + tasks[t] = .{ .start = start, .end = end, .input = input, .output = elements }; + pool.spawnWg(&wg, Task.run, .{&tasks[t]}); + } + + if (task_count > 0) { + wg.wait(); + } + } else { + @memcpy(elements, input); + } + + if (ctx.pool) |pool| { + const chunk: usize = 2048; + const task_count = (input.len + chunk - 1) / chunk; + if (task_count > 1) { + const Task = struct { + start: usize, + end: usize, + data: []Tuple, + + fn run(task: *@This()) void { + std.sort.pdq(Tuple, task.data[task.start..task.end], {}, lessThan); + } + }; + + const tasks = try ctx.allocator.alloc(Task, task_count); + defer ctx.allocator.free(tasks); + + var wg: std.Thread.WaitGroup = .{}; + var t2: usize = 0; + while (t2 < task_count) : (t2 += 1) { + const start = t2 * chunk; + const end = @min(start + chunk, input.len); + tasks[t2] = .{ .start = start, .end = end, .data = elements }; + pool.spawnWg(&wg, Task.run, .{&tasks[t2]}); + } + + wg.wait(); + } + } sort.pdq(Tuple, elements, {}, lessThan); const unique_len = deduplicate(elements); if (unique_len < elements.len) { - const shrunk = allocator.realloc(elements, unique_len) catch elements[0..unique_len]; + const shrunk = ctx.allocator.realloc(elements, unique_len) catch elements[0..unique_len]; return Self{ .elements = shrunk, - .allocator = allocator, + .allocator = ctx.allocator, + .ctx = ctx, }; } return Self{ .elements = elements, - .allocator = allocator, + .allocator = ctx.allocator, + .ctx = ctx, }; } - pub fn empty(allocator: Allocator) Self { + /// Creates an empty relation. + pub fn empty(ctx: *ExecutionContext) Self { return Self{ .elements = &[_]Tuple{}, - .allocator = allocator, + .allocator = ctx.allocator, + .ctx = ctx, }; } + /// Deinitializes the relation. pub fn deinit(self: *Self) void { if (self.elements.len > 0) { self.allocator.free(self.elements); @@ -55,14 +130,17 @@ pub fn Relation(comptime Tuple: type) type { self.elements = &[_]Tuple{}; } + /// Returns the number of elements in the relation. pub fn len(self: Self) usize { return self.elements.len; } + /// Returns true if the relation is empty. pub fn isEmpty(self: Self) bool { return self.elements.len == 0; } + /// Merges this relation with another relation. pub fn merge(self: *Self, other: *Self) Allocator.Error!Self { if (self.elements.len == 0) { const result = other.*; @@ -79,27 +157,65 @@ pub fn Relation(comptime Tuple: type) type { const total_len = self.elements.len + other.elements.len; const merged = try self.allocator.alloc(Tuple, total_len); + errdefer self.allocator.free(merged); + + var i: usize = 0; + var j: usize = 0; + var k: usize = 0; + + const elems1 = self.elements; + const elems2 = other.elements; + + while (i < elems1.len and j < elems2.len) { + const ord = compareTuples(elems1[i], elems2[j]); + switch (ord) { + .lt => { + merged[k] = elems1[i]; + i += 1; + k += 1; + }, + .gt => { + merged[k] = elems2[j]; + j += 1; + k += 1; + }, + .eq => { + merged[k] = elems1[i]; + i += 1; + j += 1; + k += 1; + }, + } + } - @memcpy(merged[0..self.elements.len], self.elements); - @memcpy(merged[self.elements.len..], other.elements); + if (i < elems1.len) { + const rem = elems1.len - i; + @memcpy(merged[k..][0..rem], elems1[i..]); + k += rem; + } - sort.pdq(Tuple, merged, {}, lessThan); - const unique_len = deduplicate(merged); + if (j < elems2.len) { + const rem = elems2.len - j; + @memcpy(merged[k..][0..rem], elems2[j..]); + k += rem; + } self.deinit(); other.deinit(); - if (unique_len < merged.len) { - const shrunk = self.allocator.realloc(merged, unique_len) catch merged[0..unique_len]; + if (k < merged.len) { + const shrunk = self.allocator.realloc(merged, k) catch merged[0..k]; return Self{ .elements = shrunk, .allocator = self.allocator, + .ctx = self.ctx, }; } return Self{ .elements = merged, .allocator = self.allocator, + .ctx = self.ctx, }; } @@ -107,18 +223,33 @@ pub fn Relation(comptime Tuple: type) type { return compareTuples(a, b) == .lt; } + fn orderField(comptime T: type, a: T, b: T) std.math.Order { + const field_info = @typeInfo(T); + if (field_info == .pointer) { + return std.math.order(@intFromPtr(a), @intFromPtr(b)); + } else { + return std.math.order(a, b); + } + } + + /// Compares two tuples. pub fn compareTuples(a: Tuple, b: Tuple) std.math.Order { const info = @typeInfo(Tuple); if (info == .@"struct" and info.@"struct".is_tuple) { inline for (0..info.@"struct".fields.len) |i| { const a_field = a[i]; const b_field = b[i]; - const order = std.math.order(a_field, b_field); + const order = orderField(@TypeOf(a_field), a_field, b_field); if (order != .eq) return order; } return .eq; } else { - return std.math.order(a, b); + const tuple_info = @typeInfo(Tuple); + if (tuple_info == .pointer) { + return std.math.order(@intFromPtr(a), @intFromPtr(b)); + } else { + return std.math.order(a, b); + } } } @@ -134,15 +265,106 @@ pub fn Relation(comptime Tuple: type) type { } return write_idx; } + + fn isSerializableType(comptime T: type) bool { + return switch (@typeInfo(T)) { + .int, .float, .bool, .@"enum" => true, + .array => |info| isSerializableType(info.child), + .@"struct" => |info| blk: { + inline for (info.fields) |field| { + if (!isSerializableType(field.type)) break :blk false; + } + break :blk true; + }, + else => false, + }; + } + + fn writeValue(comptime T: type, writer: anytype, value: T) !void { + return switch (@typeInfo(T)) { + .int => try writer.writeInt(T, value, .little), + .bool => try writer.writeInt(u8, if (value) 1 else 0, .little), + .float => blk: { + const IntType = std.meta.Int(.unsigned, @bitSizeOf(T)); + const bits: IntType = @as(IntType, @bitCast(value)); + try writer.writeInt(IntType, bits, .little); + break :blk; + }, + .@"enum" => blk: { + const info = @typeInfo(T).@"enum"; + const Tag = info.tag_type; + try writer.writeInt(Tag, @intFromEnum(value), .little); + break :blk; + }, + .array => |info| blk: { + for (value) |elem| { + try writeValue(info.child, writer, elem); + } + break :blk; + }, + .@"struct" => |info| blk: { + inline for (info.fields) |field| { + try writeValue(field.type, writer, @field(value, field.name)); + } + break :blk; + }, + else => return error.UnsupportedType, + }; + } + + fn readValue(comptime T: type, reader: anytype) !T { + return switch (@typeInfo(T)) { + .int => try reader.readInt(T, .little), + .bool => (try reader.readInt(u8, .little)) != 0, + .float => blk: { + const IntType = std.meta.Int(.unsigned, @bitSizeOf(T)); + const bits = try reader.readInt(IntType, .little); + break :blk @as(T, @bitCast(bits)); + }, + .@"enum" => blk: { + const info = @typeInfo(T).@"enum"; + const Tag = info.tag_type; + const bits = try reader.readInt(Tag, .little); + break :blk @as(T, @enumFromInt(bits)); + }, + .array => |info| blk: { + var result: T = undefined; + var i: usize = 0; + while (i < result.len) : (i += 1) { + result[i] = try readValue(info.child, reader); + } + break :blk result; + }, + .@"struct" => |info| blk: { + var result: T = undefined; + inline for (info.fields) |field| { + @field(result, field.name) = try readValue(field.type, reader); + } + break :blk result; + }, + else => return error.UnsupportedType, + }; + } + + /// Saves the relation to a writer. pub fn save(self: Self, writer: anytype) !void { + if (!isSerializableType(Tuple)) return error.UnsupportedType; try writer.writeAll("ZODDREL"); try writer.writeInt(u8, 1, .little); try writer.writeInt(u64, self.elements.len, .little); - const bytes = std.mem.sliceAsBytes(self.elements); - try writer.writeAll(bytes); + for (self.elements) |elem| { + try writeValue(Tuple, writer, elem); + } + } + + /// Loads a relation from a reader. + pub fn load(ctx: *ExecutionContext, reader: anytype) !Self { + return loadWithLimit(ctx, reader, std.math.maxInt(usize)); } - pub fn load(allocator: Allocator, reader: anytype) !Self { + /// Loads a relation from a reader with a limit on the number of elements. + pub fn loadWithLimit(ctx: *ExecutionContext, reader: anytype, max_len: usize) !Self { + if (!isSerializableType(Tuple)) return error.UnsupportedType; const magic = try reader.readBytesNoEof(7); if (!std.mem.eql(u8, &magic, "ZODDREL")) { return error.InvalidFormat; @@ -152,20 +374,39 @@ pub fn Relation(comptime Tuple: type) type { return error.UnsupportedVersion; } - const length = try reader.readInt(u64, .little); + const length_u64 = try reader.readInt(u64, .little); + const length = std.math.cast(usize, length_u64) orelse return error.InvalidFormat; if (length == 0) { - return Self.empty(allocator); + return Self.empty(ctx); + } + if (length > max_len) { + return error.TooLarge; } - const elements = try allocator.alloc(Tuple, length); - errdefer allocator.free(elements); + const elements = try ctx.allocator.alloc(Tuple, length); + errdefer ctx.allocator.free(elements); - const bytes = std.mem.sliceAsBytes(elements); - try reader.readNoEof(bytes); + var i: usize = 0; + while (i < length) : (i += 1) { + elements[i] = try readValue(Tuple, reader); + } + + sort.pdq(Tuple, elements, {}, lessThan); + const unique_len = deduplicate(elements); + + if (unique_len < elements.len) { + const shrunk = ctx.allocator.realloc(elements, unique_len) catch elements[0..unique_len]; + return Self{ + .elements = shrunk, + .allocator = ctx.allocator, + .ctx = ctx, + }; + } return Self{ .elements = elements, - .allocator = allocator, + .allocator = ctx.allocator, + .ctx = ctx, }; } }; @@ -173,7 +414,8 @@ pub fn Relation(comptime Tuple: type) type { test "Relation: empty" { const allocator = std.testing.allocator; - var rel = Relation(u32).empty(allocator); + var ctx = ExecutionContext.init(allocator); + var rel = Relation(u32).empty(&ctx); defer rel.deinit(); try std.testing.expect(rel.isEmpty()); @@ -182,9 +424,10 @@ test "Relation: empty" { test "Relation: persistence" { const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); const Tuple = struct { u32, u32 }; - var original = try Relation(Tuple).fromSlice(allocator, &[_]Tuple{ + var original = try Relation(Tuple).fromSlice(&ctx, &[_]Tuple{ .{ 1, 10 }, .{ 2, 20 }, .{ 3, 30 }, @@ -197,7 +440,7 @@ test "Relation: persistence" { try original.save(buffer.writer(allocator)); var fbs = std.io.fixedBufferStream(buffer.items); - var loaded = try Relation(Tuple).load(allocator, fbs.reader()); + var loaded = try Relation(Tuple).load(&ctx, fbs.reader()); defer loaded.deinit(); try std.testing.expectEqual(original.len(), loaded.len()); @@ -206,9 +449,10 @@ test "Relation: persistence" { test "Relation: fromSlice sorts and deduplicates" { const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); const input = [_]u32{ 5, 3, 3, 1, 5, 2, 1 }; - var rel = try Relation(u32).fromSlice(allocator, &input); + var rel = try Relation(u32).fromSlice(&ctx, &input); defer rel.deinit(); try std.testing.expectEqual(@as(usize, 4), rel.len()); @@ -217,6 +461,7 @@ test "Relation: fromSlice sorts and deduplicates" { test "Relation: tuple type" { const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); const Tuple = struct { u32, u32 }; const input = [_]Tuple{ .{ 2, 1 }, @@ -225,7 +470,7 @@ test "Relation: tuple type" { .{ 1, 1 }, }; - var rel = try Relation(Tuple).fromSlice(allocator, &input); + var rel = try Relation(Tuple).fromSlice(&ctx, &input); defer rel.deinit(); try std.testing.expectEqual(@as(usize, 3), rel.len()); @@ -236,9 +481,10 @@ test "Relation: tuple type" { test "Relation: merge" { const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); - var rel1 = try Relation(u32).fromSlice(allocator, &[_]u32{ 1, 3, 5 }); - var rel2 = try Relation(u32).fromSlice(allocator, &[_]u32{ 2, 3, 4 }); + var rel1 = try Relation(u32).fromSlice(&ctx, &[_]u32{ 1, 3, 5 }); + var rel2 = try Relation(u32).fromSlice(&ctx, &[_]u32{ 2, 3, 4 }); var merged = try rel1.merge(&rel2); defer merged.deinit(); @@ -246,3 +492,126 @@ test "Relation: merge" { try std.testing.expectEqual(@as(usize, 5), merged.len()); try std.testing.expectEqualSlices(u32, &[_]u32{ 1, 2, 3, 4, 5 }, merged.elements); } + +test "Relation: load normalizes order" { + const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); + const Tuple = struct { u32, u32 }; + + var buffer = std.ArrayListUnmanaged(u8){}; + defer buffer.deinit(allocator); + + var writer = buffer.writer(allocator); + try writer.writeAll("ZODDREL"); + try writer.writeInt(u8, 1, .little); + const raw = [_]Tuple{ + .{ 2, 20 }, + .{ 1, 10 }, + .{ 2, 20 }, + }; + try writer.writeInt(u64, raw.len, .little); + for (raw) |tuple| { + try writer.writeInt(u32, tuple[0], .little); + try writer.writeInt(u32, tuple[1], .little); + } + + var reader = std.io.fixedBufferStream(buffer.items); + var rel = try Relation(Tuple).load(&ctx, reader.reader()); + defer rel.deinit(); + + try std.testing.expectEqual(@as(usize, 2), rel.len()); + try std.testing.expectEqual(Tuple{ 1, 10 }, rel.elements[0]); + try std.testing.expectEqual(Tuple{ 2, 20 }, rel.elements[1]); +} + +test "Relation: loadWithLimit zero length with zero limit" { + const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); + + var buffer = std.ArrayListUnmanaged(u8){}; + defer buffer.deinit(allocator); + + var writer = buffer.writer(allocator); + try writer.writeAll("ZODDREL"); + try writer.writeInt(u8, 1, .little); + try writer.writeInt(u64, 0, .little); + + var reader = std.io.fixedBufferStream(buffer.items); + var rel = try Relation(u32).loadWithLimit(&ctx, reader.reader(), 0); + defer rel.deinit(); + + try std.testing.expectEqual(@as(usize, 0), rel.len()); +} + +test "Relation: scalar save and load" { + const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); + + var original = try Relation(u32).fromSlice(&ctx, &[_]u32{ 3, 1, 2, 2 }); + defer original.deinit(); + + var buffer = std.ArrayListUnmanaged(u8){}; + defer buffer.deinit(allocator); + + try original.save(buffer.writer(allocator)); + + var fbs = std.io.fixedBufferStream(buffer.items); + var loaded = try Relation(u32).load(&ctx, fbs.reader()); + defer loaded.deinit(); + + try std.testing.expectEqual(original.len(), loaded.len()); + try std.testing.expectEqualSlices(u32, original.elements, loaded.elements); +} + +test "Relation: fromSlice parallel copy" { + const allocator = std.testing.allocator; + var ctx = try ExecutionContext.initWithThreads(allocator, 2); + defer ctx.deinit(); + + const input = [_]u32{ 5, 3, 3, 1, 5, 2, 1 }; + + var rel = try Relation(u32).fromSlice(&ctx, &input); + defer rel.deinit(); + + try std.testing.expectEqual(@as(usize, 4), rel.len()); + try std.testing.expectEqualSlices(u32, &[_]u32{ 1, 2, 3, 5 }, rel.elements); +} + +test "Relation: merge parallel copy" { + const allocator = std.testing.allocator; + var ctx = try ExecutionContext.initWithThreads(allocator, 2); + defer ctx.deinit(); + + var rel1 = try Relation(u32).fromSlice(&ctx, &[_]u32{ 1, 3, 5 }); + var rel2 = try Relation(u32).fromSlice(&ctx, &[_]u32{ 2, 3, 4 }); + + var merged = try rel1.merge(&rel2); + defer merged.deinit(); + + try std.testing.expectEqual(@as(usize, 5), merged.len()); + try std.testing.expectEqualSlices(u32, &[_]u32{ 1, 2, 3, 4, 5 }, merged.elements); +} + +test "Relation: save/load unsupported type" { + const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); + const Bad = struct { *u8 }; + + var rel = Relation(Bad).empty(&ctx); + defer rel.deinit(); + + var buffer = std.ArrayListUnmanaged(u8){}; + defer buffer.deinit(allocator); + + try std.testing.expectError(error.UnsupportedType, rel.save(buffer.writer(allocator))); + + var header: [16]u8 = undefined; + var fbs = std.io.fixedBufferStream(&header); + try fbs.writer().writeAll("ZODDREL"); + try fbs.writer().writeInt(u8, 1, .little); + try fbs.writer().writeInt(u64, 0, .little); + const used = fbs.pos; + + var reader_fbs = std.io.fixedBufferStream(header[0..used]); + try std.testing.expectError(error.UnsupportedType, Relation(Bad).load(&ctx, reader_fbs.reader())); +} diff --git a/src/zodd/variable.zig b/src/zodd/variable.zig index f0bc669..b8e6b10 100644 --- a/src/zodd/variable.zig +++ b/src/zodd/variable.zig @@ -1,8 +1,9 @@ -//! A Datalog variable representing a relation that evolves during iteration. +//! Datalog variable representing a relation that evolves during iteration. const std = @import("std"); const Allocator = std.mem.Allocator; const Relation = @import("relation.zig").Relation; +const ExecutionContext = @import("context.zig").ExecutionContext; pub fn Variable(comptime Tuple: type) type { return struct { @@ -10,20 +11,29 @@ pub fn Variable(comptime Tuple: type) type { const Rel = Relation(Tuple); const RelList = std.ArrayListUnmanaged(Rel); + /// Stable batches of the variable. stable: RelList, + /// Recent batch of the variable. recent: Rel, + /// Batches to be added to the variable. to_add: RelList, + /// Allocator for the variable. allocator: Allocator, + /// Execution context. + ctx: *ExecutionContext, - pub fn init(allocator: Allocator) Self { + /// Initializes a new variable. + pub fn init(ctx: *ExecutionContext) Self { return Self{ .stable = RelList{}, - .recent = Rel.empty(allocator), + .recent = Rel.empty(ctx), .to_add = RelList{}, - .allocator = allocator, + .allocator = ctx.allocator, + .ctx = ctx, }; } + /// Deinitializes the variable. pub fn deinit(self: *Self) void { for (self.stable.items) |*batch| { batch.deinit(); @@ -38,19 +48,22 @@ pub fn Variable(comptime Tuple: type) type { self.to_add.deinit(self.allocator); } + /// Inserts a relation into the variable. pub fn insert(self: *Self, relation: Rel) Allocator.Error!void { try self.to_add.append(self.allocator, relation); } - pub fn insertSlice(self: *Self, tuples: []const Tuple) Allocator.Error!void { - const rel = try Rel.fromSlice(self.allocator, tuples); + /// Inserts a slice of tuples into the variable. + pub fn insertSlice(self: *Self, ctx: *ExecutionContext, tuples: []const Tuple) Allocator.Error!void { + const rel = try Rel.fromSlice(ctx, tuples); try self.insert(rel); } + /// Processes pending updates and returns true if the variable has changed. pub fn changed(self: *Self) Allocator.Error!bool { if (!self.recent.isEmpty()) { var recent = self.recent; - self.recent = Rel.empty(self.allocator); + self.recent = Rel.empty(self.ctx); while (self.stable.items.len > 0) { const last = &self.stable.items[self.stable.items.len - 1]; @@ -103,7 +116,7 @@ pub fn Variable(comptime Tuple: type) type { if (write_idx < target.elements.len) { if (write_idx == 0) { target.deinit(); - target.* = Rel.empty(self.allocator); + target.* = Rel.empty(self.ctx); } else { target.elements = self.allocator.realloc( target.elements, @@ -113,6 +126,7 @@ pub fn Variable(comptime Tuple: type) type { } } + /// Returns the total number of elements in the variable. pub fn totalLen(self: Self) usize { var count: usize = self.recent.len(); for (self.stable.items) |batch| { @@ -124,9 +138,24 @@ pub fn Variable(comptime Tuple: type) type { return count; } + /// Completes the variable and returns the final relation. pub fn complete(self: *Self) Allocator.Error!Rel { + if (!self.recent.isEmpty()) { + try self.stable.append(self.allocator, self.recent); + self.recent = Rel.empty(self.ctx); + } + + if (self.to_add.items.len > 0) { + var to_add = self.to_add.pop().?; + while (self.to_add.items.len > 0) { + var more = self.to_add.pop().?; + to_add = try to_add.merge(&more); + } + try self.stable.append(self.allocator, to_add); + } + if (self.stable.items.len == 0) { - return Rel.empty(self.allocator); + return Rel.empty(self.ctx); } var result = self.stable.pop().?; @@ -140,6 +169,7 @@ pub fn Variable(comptime Tuple: type) type { }; } +/// Gallop search for a value in a sorted slice. pub fn gallop(comptime T: type, slice: []const T, target: T) []const T { const Rel = Relation(T); @@ -149,9 +179,13 @@ pub fn gallop(comptime T: type, slice: []const T, target: T) []const T { var step: usize = 1; var pos: usize = 0; - while (pos + step < slice.len and Rel.compareTuples(slice[pos + step], target) == .lt) { - pos += step; - step *= 2; + while (true) { + const next_pos = std.math.add(usize, pos, step) catch slice.len; + if (next_pos >= slice.len or next_pos < pos) break; + if (Rel.compareTuples(slice[next_pos], target) != .lt) break; + pos = next_pos; + const new_step = std.math.mul(usize, step, 2) catch std.math.maxInt(usize); + step = new_step; } const end = @min(pos + step + 1, slice.len); @@ -172,11 +206,12 @@ pub fn gallop(comptime T: type, slice: []const T, target: T) []const T { test "Variable: basic lifecycle" { const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); - var v = Variable(u32).init(allocator); + var v = Variable(u32).init(&ctx); defer v.deinit(); - try v.insertSlice(&[_]u32{ 1, 2, 3 }); + try v.insertSlice(&ctx, &[_]u32{ 1, 2, 3 }); const changed1 = try v.changed(); try std.testing.expect(changed1); @@ -190,14 +225,15 @@ test "Variable: basic lifecycle" { test "Variable: deduplication across rounds" { const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); - var v = Variable(u32).init(allocator); + var v = Variable(u32).init(&ctx); defer v.deinit(); - try v.insertSlice(&[_]u32{ 1, 2, 3 }); + try v.insertSlice(&ctx, &[_]u32{ 1, 2, 3 }); _ = try v.changed(); - try v.insertSlice(&[_]u32{ 2, 3, 4, 5 }); + try v.insertSlice(&ctx, &[_]u32{ 2, 3, 4, 5 }); const changed = try v.changed(); try std.testing.expect(changed); @@ -206,14 +242,15 @@ test "Variable: deduplication across rounds" { test "Variable: complete" { const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); - var v = Variable(u32).init(allocator); + var v = Variable(u32).init(&ctx); - try v.insertSlice(&[_]u32{ 1, 2, 3 }); + try v.insertSlice(&ctx, &[_]u32{ 1, 2, 3 }); _ = try v.changed(); _ = try v.changed(); - try v.insertSlice(&[_]u32{ 4, 5 }); + try v.insertSlice(&ctx, &[_]u32{ 4, 5 }); _ = try v.changed(); _ = try v.changed(); @@ -226,14 +263,15 @@ test "Variable: complete" { test "Variable: totalLen" { const allocator = std.testing.allocator; - var v = Variable(u32).init(allocator); + var ctx = ExecutionContext.init(allocator); + var v = Variable(u32).init(&ctx); defer v.deinit(); // Init: 0 try std.testing.expectEqual(@as(usize, 0), v.totalLen()); // Insert to_add: 3 items - try v.insertSlice(&[_]u32{ 1, 2, 3 }); + try v.insertSlice(&ctx, &[_]u32{ 1, 2, 3 }); try std.testing.expectEqual(@as(usize, 3), v.totalLen()); // Changed: recent=3, stable=0, to_add=0 (moved to recent) @@ -246,7 +284,7 @@ test "Variable: totalLen" { try std.testing.expectEqual(@as(usize, 3), v.totalLen()); // Add more - try v.insertSlice(&[_]u32{4}); + try v.insertSlice(&ctx, &[_]u32{4}); try std.testing.expectEqual(@as(usize, 4), v.totalLen()); } @@ -263,3 +301,41 @@ test "gallop: basic" { const result3 = gallop(u32, &slice, 20); try std.testing.expectEqual(@as(usize, 0), result3.len); } + +test "Variable: changed filters against stable batches" { + const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); + + var v = Variable(u32).init(&ctx); + defer v.deinit(); + + try v.insertSlice(&ctx, &[_]u32{ 1, 2, 3, 4, 5, 6, 7, 8 }); + _ = try v.changed(); + _ = try v.changed(); + + try v.insertSlice(&ctx, &[_]u32{ 2, 4, 6, 8, 9 }); + const changed = try v.changed(); + + try std.testing.expect(changed); + try std.testing.expectEqual(@as(usize, 1), v.recent.len()); + try std.testing.expectEqual(@as(u32, 9), v.recent.elements[0]); +} + +test "Variable: changed with recent and to_add" { + const allocator = std.testing.allocator; + var ctx = ExecutionContext.init(allocator); + + var v = Variable(u32).init(&ctx); + defer v.deinit(); + + try v.insertSlice(&ctx, &[_]u32{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }); + _ = try v.changed(); + + try v.insertSlice(&ctx, &[_]u32{ 3, 5, 11, 12 }); + const changed = try v.changed(); + + try std.testing.expect(changed); + try std.testing.expectEqual(@as(usize, 2), v.recent.len()); + try std.testing.expectEqual(@as(u32, 11), v.recent.elements[0]); + try std.testing.expectEqual(@as(u32, 12), v.recent.elements[1]); +} diff --git a/tests/incremental_tests.zig b/tests/incremental_tests.zig index d1761a6..b9ea6eb 100644 --- a/tests/incremental_tests.zig +++ b/tests/incremental_tests.zig @@ -4,32 +4,33 @@ const zodd = @import("zodd"); test "incremental maintenance: monotonic updates" { const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); const Tuple = struct { u32, u32 }; - var iter = zodd.Iteration(Tuple).init(allocator, 100); + var iter = zodd.Iteration(Tuple).init(&ctx, 100); defer iter.deinit(); const B = try iter.variable(); const A = try iter.variable(); - try B.insertSlice(&[_]Tuple{.{ 1, 2 }}); + try B.insertSlice(&ctx, &[_]Tuple{.{ 1, 2 }}); while (try iter.changed()) { if (B.recent.len() > 0) { - const rel = try zodd.Relation(Tuple).fromSlice(allocator, B.recent.elements); + const rel = try zodd.Relation(Tuple).fromSlice(&ctx, B.recent.elements); try A.insert(rel); } } try testing.expectEqual(@as(usize, 1), A.totalLen()); - try B.insertSlice(&[_]Tuple{.{ 2, 3 }}); + try B.insertSlice(&ctx, &[_]Tuple{.{ 2, 3 }}); iter.reset(); while (try iter.changed()) { if (B.recent.len() > 0) { - const rel = try zodd.Relation(Tuple).fromSlice(allocator, B.recent.elements); + const rel = try zodd.Relation(Tuple).fromSlice(&ctx, B.recent.elements); try A.insert(rel); } } @@ -43,3 +44,157 @@ test "incremental maintenance: monotonic updates" { try testing.expectEqual(final_res.elements[0][0], 1); try testing.expectEqual(final_res.elements[1][0], 2); } + +test "incremental maintenance: join with new data after reset" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + const KV = struct { u32, u32 }; + const Out = struct { u32, u32, u32 }; + + var iter = zodd.Iteration(KV).init(&ctx, 100); + defer iter.deinit(); + + const edges = try iter.variable(); + const labels = try iter.variable(); + var joined = zodd.Variable(Out).init(&ctx); + defer joined.deinit(); + + // Round 1: edges={1->2}, labels={1->100} + try edges.insertSlice(&ctx, &[_]KV{.{ 1, 2 }}); + try labels.insertSlice(&ctx, &[_]KV{.{ 1, 100 }}); + + while (try iter.changed()) { + try zodd.joinInto(u32, u32, u32, Out, &ctx, edges, labels, &joined, struct { + fn logic(key: *const u32, edge_val: *const u32, label_val: *const u32) Out { + return .{ key.*, edge_val.*, label_val.* }; + } + }.logic); + } + + try testing.expectEqual(@as(usize, 1), joined.totalLen()); + + // Round 2: add edge 2->3 and label 2->200 + try edges.insertSlice(&ctx, &[_]KV{.{ 2, 3 }}); + try labels.insertSlice(&ctx, &[_]KV{.{ 2, 200 }}); + iter.reset(); + + while (try iter.changed()) { + try zodd.joinInto(u32, u32, u32, Out, &ctx, edges, labels, &joined, struct { + fn logic(key: *const u32, edge_val: *const u32, label_val: *const u32) Out { + return .{ key.*, edge_val.*, label_val.* }; + } + }.logic); + } + + // Should have picked up the new join result + try testing.expect(joined.totalLen() >= 2); +} + +test "incremental maintenance: transitive closure re-convergence" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + const Edge = struct { u32, u32 }; + const EdgeList = std.ArrayListUnmanaged(Edge); + + // Phase 1: edges 1->2, 2->3 + var edges = try zodd.Relation(Edge).fromSlice(&ctx, &[_]Edge{ + .{ 1, 2 }, + .{ 2, 3 }, + }); + defer edges.deinit(); + + var reachable = zodd.Variable(Edge).init(&ctx); + defer reachable.deinit(); + + try reachable.insertSlice(&ctx, edges.elements); + + var iters: usize = 0; + while (try reachable.changed()) { + var new = EdgeList{}; + defer new.deinit(allocator); + + for (reachable.recent.elements) |r| { + for (edges.elements) |e| { + if (e[0] == r[1]) try new.append(allocator, .{ r[0], e[1] }); + } + } + if (new.items.len > 0) { + try reachable.insert(try zodd.Relation(Edge).fromSlice(&ctx, new.items)); + } + iters += 1; + if (iters > 10) break; + } + + // 1->2, 1->3, 2->3 = 3 pairs + try testing.expectEqual(@as(usize, 3), reachable.totalLen()); + + // Phase 2: add edge 3->4 + var edges2 = try zodd.Relation(Edge).fromSlice(&ctx, &[_]Edge{ + .{ 1, 2 }, + .{ 2, 3 }, + .{ 3, 4 }, + }); + defer edges2.deinit(); + + try reachable.insertSlice(&ctx, &[_]Edge{.{ 3, 4 }}); + + iters = 0; + while (try reachable.changed()) { + var new = EdgeList{}; + defer new.deinit(allocator); + + for (reachable.recent.elements) |r| { + // Forward join: recent × edges + for (edges2.elements) |e| { + if (e[0] == r[1]) try new.append(allocator, .{ r[0], e[1] }); + } + // Backward join: stable × recent (to catch paths that can now reach through new edges) + for (reachable.stable.items) |*stable_rel| { + for (stable_rel.elements) |old| { + if (old[1] == r[0]) try new.append(allocator, .{ old[0], r[1] }); + } + } + } + if (new.items.len > 0) { + try reachable.insert(try zodd.Relation(Edge).fromSlice(&ctx, new.items)); + } + iters += 1; + if (iters > 10) break; + } + + // 1->2,1->3,1->4, 2->3,2->4, 3->4 = 6 pairs + try testing.expectEqual(@as(usize, 6), reachable.totalLen()); +} + +test "incremental maintenance: iteration reset with multiple variables" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + + var iter = zodd.Iteration(u32).init(&ctx, 50); + defer iter.deinit(); + + const v1 = try iter.variable(); + const v2 = try iter.variable(); + + try v1.insertSlice(&ctx, &[_]u32{ 10, 20 }); + try v2.insertSlice(&ctx, &[_]u32{ 30, 40 }); + + // Converge + while (try iter.changed()) {} + + try testing.expectEqual(@as(usize, 2), v1.totalLen()); + try testing.expectEqual(@as(usize, 2), v2.totalLen()); + + // Reset and add more data + iter.reset(); + try v1.insertSlice(&ctx, &[_]u32{ 50, 60 }); + + const changed = try iter.changed(); + try testing.expect(changed); + + // Run to completion + while (try iter.changed()) {} + + try testing.expectEqual(@as(usize, 4), v1.totalLen()); + try testing.expectEqual(@as(usize, 2), v2.totalLen()); +} diff --git a/tests/integration_tests.zig b/tests/integration_tests.zig index a6d663a..84d6738 100644 --- a/tests/integration_tests.zig +++ b/tests/integration_tests.zig @@ -4,20 +4,21 @@ const zodd = @import("zodd"); test "transitive closure: linear chain" { const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); const Edge = struct { u32, u32 }; const EdgeList = std.ArrayListUnmanaged(Edge); - var edges = try zodd.Relation(Edge).fromSlice(allocator, &[_]Edge{ + var edges = try zodd.Relation(Edge).fromSlice(&ctx, &[_]Edge{ .{ 1, 2 }, .{ 2, 3 }, .{ 3, 4 }, }); defer edges.deinit(); - var reachable = zodd.Variable(Edge).init(allocator); + var reachable = zodd.Variable(Edge).init(&ctx); defer reachable.deinit(); - try reachable.insertSlice(edges.elements); + try reachable.insertSlice(&ctx, edges.elements); var iters: usize = 0; while (try reachable.changed()) : (iters += 1) { @@ -33,7 +34,7 @@ test "transitive closure: linear chain" { } if (results.items.len > 0) { - try reachable.insert(try zodd.Relation(Edge).fromSlice(allocator, results.items)); + try reachable.insert(try zodd.Relation(Edge).fromSlice(&ctx, results.items)); } if (iters > 10) break; @@ -47,10 +48,11 @@ test "transitive closure: linear chain" { test "transitive closure: diamond graph" { const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); const Edge = struct { u32, u32 }; const EdgeList = std.ArrayListUnmanaged(Edge); - var edges = try zodd.Relation(Edge).fromSlice(allocator, &[_]Edge{ + var edges = try zodd.Relation(Edge).fromSlice(&ctx, &[_]Edge{ .{ 1, 2 }, .{ 1, 3 }, .{ 2, 4 }, @@ -58,10 +60,10 @@ test "transitive closure: diamond graph" { }); defer edges.deinit(); - var reachable = zodd.Variable(Edge).init(allocator); + var reachable = zodd.Variable(Edge).init(&ctx); defer reachable.deinit(); - try reachable.insertSlice(edges.elements); + try reachable.insertSlice(&ctx, edges.elements); var iters: usize = 0; while (try reachable.changed()) : (iters += 1) { @@ -77,7 +79,7 @@ test "transitive closure: diamond graph" { } if (results.items.len > 0) { - try reachable.insert(try zodd.Relation(Edge).fromSlice(allocator, results.items)); + try reachable.insert(try zodd.Relation(Edge).fromSlice(&ctx, results.items)); } if (iters > 10) break; @@ -91,20 +93,21 @@ test "transitive closure: diamond graph" { test "transitive closure: cycle detection" { const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); const Edge = struct { u32, u32 }; const EdgeList = std.ArrayListUnmanaged(Edge); - var edges = try zodd.Relation(Edge).fromSlice(allocator, &[_]Edge{ + var edges = try zodd.Relation(Edge).fromSlice(&ctx, &[_]Edge{ .{ 1, 2 }, .{ 2, 3 }, .{ 3, 1 }, }); defer edges.deinit(); - var reachable = zodd.Variable(Edge).init(allocator); + var reachable = zodd.Variable(Edge).init(&ctx); defer reachable.deinit(); - try reachable.insertSlice(edges.elements); + try reachable.insertSlice(&ctx, edges.elements); var iters: usize = 0; while (try reachable.changed()) : (iters += 1) { @@ -120,7 +123,7 @@ test "transitive closure: cycle detection" { } if (results.items.len > 0) { - try reachable.insert(try zodd.Relation(Edge).fromSlice(allocator, results.items)); + try reachable.insert(try zodd.Relation(Edge).fromSlice(&ctx, results.items)); } if (iters > 20) break; @@ -134,10 +137,11 @@ test "transitive closure: cycle detection" { test "same generation: parent-child hierarchy" { const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); const Pair = struct { u32, u32 }; const PairList = std.ArrayListUnmanaged(Pair); - var parent_child = try zodd.Relation(Pair).fromSlice(allocator, &[_]Pair{ + var parent_child = try zodd.Relation(Pair).fromSlice(&ctx, &[_]Pair{ .{ 1, 2 }, .{ 1, 3 }, .{ 2, 4 }, @@ -145,10 +149,10 @@ test "same generation: parent-child hierarchy" { }); defer parent_child.deinit(); - var same_gen = zodd.Variable(Pair).init(allocator); + var same_gen = zodd.Variable(Pair).init(&ctx); defer same_gen.deinit(); - try same_gen.insertSlice(&[_]Pair{ .{ 1, 1 }, .{ 2, 2 }, .{ 3, 3 }, .{ 4, 4 }, .{ 5, 5 } }); + try same_gen.insertSlice(&ctx, &[_]Pair{ .{ 1, 1 }, .{ 2, 2 }, .{ 3, 3 }, .{ 4, 4 }, .{ 5, 5 } }); var iters: usize = 0; while (try same_gen.changed()) : (iters += 1) { @@ -171,7 +175,7 @@ test "same generation: parent-child hierarchy" { } if (results.items.len > 0) { - try same_gen.insert(try zodd.Relation(Pair).fromSlice(allocator, results.items)); + try same_gen.insert(try zodd.Relation(Pair).fromSlice(&ctx, results.items)); } if (iters > 10) break; @@ -182,3 +186,359 @@ test "same generation: parent-child hierarchy" { try testing.expectEqual(@as(usize, 9), result.len()); } + +test "aggregate: group sum integration" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + const Tuple = struct { u32, u32 }; + + var rel = try zodd.Relation(Tuple).fromSlice(&ctx, &[_]Tuple{ + .{ 1, 10 }, + .{ 1, 20 }, + .{ 2, 5 }, + }); + defer rel.deinit(); + + const key_func = struct { + fn key(t: *const Tuple) u32 { + return t[0]; + } + }; + const folder = struct { + fn fold(acc: u32, t: *const Tuple) u32 { + return acc + t[1]; + } + }; + + var result = try zodd.aggregate.aggregate(Tuple, u32, u32, &ctx, &rel, key_func.key, 0, folder.fold); + defer result.deinit(); + + try testing.expectEqual(@as(usize, 2), result.len()); + try testing.expectEqual(@as(u32, 1), result.elements[0][0]); + try testing.expectEqual(@as(u32, 30), result.elements[0][1]); + try testing.expectEqual(@as(u32, 2), result.elements[1][0]); + try testing.expectEqual(@as(u32, 5), result.elements[1][1]); +} + +test "joinInto: incremental updates integration" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + const Tuple = struct { u32, u32 }; + const Out = struct { u32, u32, u32 }; + + var v1 = zodd.Variable(Tuple).init(&ctx); + defer v1.deinit(); + + var v2 = zodd.Variable(Tuple).init(&ctx); + defer v2.deinit(); + + var out = zodd.Variable(Out).init(&ctx); + defer out.deinit(); + + try v1.insertSlice(&ctx, &[_]Tuple{.{ 1, 10 }}); + try v2.insertSlice(&ctx, &[_]Tuple{ .{ 1, 100 }, .{ 2, 200 } }); + + _ = try v1.changed(); + _ = try v2.changed(); + + try zodd.joinInto(u32, u32, u32, Out, &ctx, &v1, &v2, &out, struct { + fn logic(key: *const u32, v1_val: *const u32, v2_val: *const u32) Out { + return .{ key.*, v1_val.*, v2_val.* }; + } + }.logic); + + _ = try out.changed(); + try testing.expectEqual(@as(usize, 1), out.recent.len()); + + _ = try v1.changed(); + _ = try v2.changed(); + _ = try out.changed(); + + try v2.insertSlice(&ctx, &[_]Tuple{.{ 1, 101 }}); + _ = try v2.changed(); + + try zodd.joinInto(u32, u32, u32, Out, &ctx, &v1, &v2, &out, struct { + fn logic(key: *const u32, v1_val: *const u32, v2_val: *const u32) Out { + return .{ key.*, v1_val.*, v2_val.* }; + } + }.logic); + + _ = try out.changed(); + try testing.expectEqual(@as(usize, 1), out.recent.len()); + try testing.expectEqual(@as(u32, 101), out.recent.elements[0][2]); +} + +test "extendInto: extend and anti integration" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + const Tuple = struct { u32 }; + const Val = u32; + const Out = struct { u32, u32 }; + + var source = zodd.Variable(Tuple).init(&ctx); + defer source.deinit(); + + try source.insertSlice(&ctx, &[_]Tuple{ .{1}, .{2} }); + _ = try source.changed(); + + var allow = try zodd.Relation(struct { u32, u32 }).fromSlice(&ctx, &[_]struct { u32, u32 }{ + .{ 1, 10 }, + .{ 1, 20 }, + .{ 2, 30 }, + }); + defer allow.deinit(); + + var block = try zodd.Relation(struct { u32, u32 }).fromSlice(&ctx, &[_]struct { u32, u32 }{ + .{ 1, 10 }, + }); + defer block.deinit(); + + var output = zodd.Variable(Out).init(&ctx); + defer output.deinit(); + + var ext_allow = zodd.ExtendWith(Tuple, u32, Val).init(&ctx, &allow, struct { + fn f(t: *const Tuple) u32 { + return t[0]; + } + }.f); + + var ext_block = zodd.ExtendAnti(Tuple, u32, Val).init(&ctx, &block, struct { + fn f(t: *const Tuple) u32 { + return t[0]; + } + }.f); + + var leapers = [_]zodd.Leaper(Tuple, Val){ ext_allow.leaper(), ext_block.leaper() }; + + try zodd.extendInto(Tuple, Val, Out, &ctx, &source, &leapers, &output, struct { + fn logic(t: *const Tuple, v: *const Val) Out { + return .{ t[0], v.* }; + } + }.logic); + + _ = try output.changed(); + try testing.expectEqual(@as(usize, 2), output.recent.len()); + try testing.expectEqual(Out{ 1, 20 }, output.recent.elements[0]); + try testing.expectEqual(Out{ 2, 30 }, output.recent.elements[1]); +} + +test "SecondaryIndex: getRange randomized integration" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + const Tuple = struct { u32, u32 }; + + const Index = zodd.index.SecondaryIndex(Tuple, u32, struct { + fn extract(t: Tuple) u32 { + return t[0]; + } + }.extract, struct { + fn cmp(a: u32, b: u32) std.math.Order { + return std.math.order(a, b); + } + }.cmp, 4); + + var idx = Index.init(&ctx); + defer idx.deinit(); + + var all = std.ArrayListUnmanaged(Tuple){}; + defer all.deinit(allocator); + + var prng = std.Random.DefaultPrng.init(0x5a5a5a5a); + const rand = prng.random(); + + var i: usize = 0; + while (i < 50) : (i += 1) { + const k = rand.intRangeAtMost(u32, 0, 20); + const v = rand.intRangeAtMost(u32, 0, 1000); + const t = Tuple{ k, v }; + try idx.insert(t); + try all.append(allocator, t); + } + + var r: usize = 0; + while (r < 10) : (r += 1) { + const a = rand.intRangeAtMost(u32, 0, 20); + const b = rand.intRangeAtMost(u32, 0, 20); + const start = @min(a, b); + const end = @max(a, b); + + var expected_list = std.ArrayListUnmanaged(Tuple){}; + defer expected_list.deinit(allocator); + + for (all.items) |t| { + if (t[0] >= start and t[0] <= end) { + try expected_list.append(allocator, t); + } + } + + var expected = try zodd.Relation(Tuple).fromSlice(&ctx, expected_list.items); + defer expected.deinit(); + + var got = try idx.getRange(start, end); + defer got.deinit(); + + try testing.expectEqualSlices(Tuple, expected.elements, got.elements); + } +} + +test "integration: FilterAnti in extendInto" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + const Tuple = struct { u32 }; + const Val = u32; + const Out = struct { u32, u32 }; + + var source = zodd.Variable(Tuple).init(&ctx); + defer source.deinit(); + + try source.insertSlice(&ctx, &[_]Tuple{ .{1}, .{2}, .{3} }); + _ = try source.changed(); + + var rel = try zodd.Relation(struct { u32, u32 }).fromSlice(&ctx, &[_]struct { u32, u32 }{ + .{ 1, 10 }, + .{ 2, 20 }, + .{ 3, 30 }, + }); + defer rel.deinit(); + + var filter_rel = try zodd.Relation(struct { u32, u32 }).fromSlice(&ctx, &[_]struct { u32, u32 }{ + .{ 2, 999 }, + }); + defer filter_rel.deinit(); + + var output = zodd.Variable(Out).init(&ctx); + defer output.deinit(); + + var ext = zodd.ExtendWith(Tuple, u32, Val).init(&ctx, &rel, struct { + fn f(t: *const Tuple) u32 { + return t[0]; + } + }.f); + + var anti = zodd.FilterAnti(Tuple, u32, u32).init(&ctx, &filter_rel, struct { + fn f(t: *const Tuple) struct { u32, u32 } { + return .{ t[0], 999 }; + } + }.f); + + var leapers = [_]zodd.Leaper(Tuple, Val){ ext.leaper(), anti.leaper() }; + + try zodd.extendInto(Tuple, Val, Out, &ctx, &source, &leapers, &output, struct { + fn logic(t: *const Tuple, v: *const Val) Out { + return .{ t[0], v.* }; + } + }.logic); + + _ = try output.changed(); + + try testing.expectEqual(@as(usize, 2), output.recent.len()); + try testing.expectEqual(Out{ 1, 10 }, output.recent.elements[0]); + try testing.expectEqual(Out{ 3, 30 }, output.recent.elements[1]); +} + +test "integration: multi-way intersection" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + const Tuple = struct { u32 }; + const Val = u32; + const Out = struct { u32, u32 }; + + var source = zodd.Variable(Tuple).init(&ctx); + defer source.deinit(); + + try source.insertSlice(&ctx, &[_]Tuple{ .{1}, .{2}, .{3}, .{4} }); + _ = try source.changed(); + + var r1 = try zodd.Relation(struct { u32, u32 }).fromSlice(&ctx, &[_]struct { u32, u32 }{ + .{ 1, 100 }, .{ 2, 200 }, .{ 3, 300 }, .{ 4, 400 }, + }); + defer r1.deinit(); + + var r2 = try zodd.Relation(struct { u32, u32 }).fromSlice(&ctx, &[_]struct { u32, u32 }{ + .{ 1, 100 }, .{ 2, 200 }, .{ 4, 999 }, + }); + defer r2.deinit(); + + var r3 = try zodd.Relation(struct { u32, u32 }).fromSlice(&ctx, &[_]struct { u32, u32 }{ + .{ 2, 200 }, .{ 3, 300 }, + }); + defer r3.deinit(); + + var output = zodd.Variable(Out).init(&ctx); + defer output.deinit(); + + const KeyFunc = struct { + fn f(t: *const Tuple) u32 { + return t[0]; + } + }; + + var ext1 = zodd.ExtendWith(Tuple, u32, Val).init(&ctx, &r1, KeyFunc.f); + var ext2 = zodd.ExtendWith(Tuple, u32, Val).init(&ctx, &r2, KeyFunc.f); + var ext3 = zodd.ExtendWith(Tuple, u32, Val).init(&ctx, &r3, KeyFunc.f); + + var leapers = [_]zodd.Leaper(Tuple, Val){ ext1.leaper(), ext2.leaper(), ext3.leaper() }; + + try zodd.extendInto(Tuple, Val, Out, &ctx, &source, &leapers, &output, struct { + fn logic(t: *const Tuple, v: *const Val) Out { + return .{ t[0], v.* }; + } + }.logic); + + _ = try output.changed(); + + try testing.expectEqual(@as(usize, 1), output.recent.len()); + try testing.expectEqual(Out{ 2, 200 }, output.recent.elements[0]); +} + +test "integration: persistence round-trip" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + const Tuple = struct { u32, u32 }; + + var original = try zodd.Relation(Tuple).fromSlice(&ctx, &[_]Tuple{ + .{ 1, 10 }, .{ 2, 20 }, .{ 3, 30 }, + }); + defer original.deinit(); + + var buffer = std.ArrayListUnmanaged(u8){}; + defer buffer.deinit(allocator); + + try original.save(buffer.writer(allocator)); + + var fbs = std.io.fixedBufferStream(buffer.items); + var loaded = try zodd.Relation(Tuple).load(&ctx, fbs.reader()); + defer loaded.deinit(); + + try testing.expectEqual(original.len(), loaded.len()); + try testing.expectEqualSlices(Tuple, original.elements, loaded.elements); +} + +test "integration: empty-input transitive closure" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + const Edge = struct { u32, u32 }; + + var edges = zodd.Relation(Edge).empty(&ctx); + defer edges.deinit(); + + var reachable = zodd.Variable(Edge).init(&ctx); + defer reachable.deinit(); + + try reachable.insert(edges); + + try reachable.insertSlice(&ctx, &[_]Edge{}); + + while (try reachable.changed()) { + _ = struct { + fn fail(_: *const Edge, _: *const u32, _: *const u32) Edge { + unreachable; + } + }.fail(undefined, undefined, undefined); + } + + var res = try reachable.complete(); + defer res.deinit(); + + try testing.expectEqual(@as(usize, 0), res.len()); +} diff --git a/tests/property_tests.zig b/tests/property_tests.zig index 116f7ae..7627662 100644 --- a/tests/property_tests.zig +++ b/tests/property_tests.zig @@ -10,7 +10,8 @@ test "property: relation always sorted after fromSlice" { gen.list(u32, gen.intRange(u32, 0, 1000), 0, 50), struct { fn prop(data: []const u32) !void { - var rel = try zodd.Relation(u32).fromSlice(testing.allocator, data); + var ctx = zodd.ExecutionContext.init(testing.allocator); + var rel = try zodd.Relation(u32).fromSlice(&ctx, data); defer rel.deinit(); if (rel.elements.len > 1) { @@ -30,7 +31,8 @@ test "property: relation always deduplicated after fromSlice" { gen.list(u32, gen.intRange(u32, 0, 50), 0, 30), struct { fn prop(data: []const u32) !void { - var rel = try zodd.Relation(u32).fromSlice(testing.allocator, data); + var ctx = zodd.ExecutionContext.init(testing.allocator); + var rel = try zodd.Relation(u32).fromSlice(&ctx, data); defer rel.deinit(); if (rel.elements.len > 1) { @@ -58,13 +60,14 @@ test "property: relation merge is commutative" { two_lists_gen, struct { fn prop(lists: TwoLists) !void { - var rel1a = try zodd.Relation(u32).fromSlice(testing.allocator, lists[0]); - var rel2a = try zodd.Relation(u32).fromSlice(testing.allocator, lists[1]); + var ctx = zodd.ExecutionContext.init(testing.allocator); + var rel1a = try zodd.Relation(u32).fromSlice(&ctx, lists[0]); + var rel2a = try zodd.Relation(u32).fromSlice(&ctx, lists[1]); var merged_ab = try rel1a.merge(&rel2a); defer merged_ab.deinit(); - var rel1b = try zodd.Relation(u32).fromSlice(testing.allocator, lists[0]); - var rel2b = try zodd.Relation(u32).fromSlice(testing.allocator, lists[1]); + var rel1b = try zodd.Relation(u32).fromSlice(&ctx, lists[0]); + var rel2b = try zodd.Relation(u32).fromSlice(&ctx, lists[1]); var merged_ba = try rel2b.merge(&rel1b); defer merged_ba.deinit(); @@ -81,10 +84,11 @@ test "property: variable deduplicates across rounds" { gen.list(u32, gen.intRange(u32, 0, 50), 1, 30), struct { fn prop(data: []const u32) !void { - var v = zodd.Variable(u32).init(testing.allocator); + var ctx = zodd.ExecutionContext.init(testing.allocator); + var v = zodd.Variable(u32).init(&ctx); defer v.deinit(); - try v.insertSlice(data); + try v.insertSlice(&ctx, data); while (try v.changed()) {} var result = try v.complete(); @@ -107,9 +111,10 @@ test "property: variable totalLen matches complete().len" { gen.list(u32, gen.intRange(u32, 0, 100), 1, 30), struct { fn prop(data: []const u32) !void { - var v = zodd.Variable(u32).init(testing.allocator); + var ctx = zodd.ExecutionContext.init(testing.allocator); + var v = zodd.Variable(u32).init(&ctx); - try v.insertSlice(data); + try v.insertSlice(&ctx, data); while (try v.changed()) {} const total_before = v.totalLen(); @@ -138,13 +143,14 @@ test "property: transitive closure reaches expected nodes" { edges_gen, struct { fn prop(edges: []const Edge) !void { - var edges_rel = try zodd.Relation(Edge).fromSlice(testing.allocator, edges); + var ctx = zodd.ExecutionContext.init(testing.allocator); + var edges_rel = try zodd.Relation(Edge).fromSlice(&ctx, edges); defer edges_rel.deinit(); - var reachable = zodd.Variable(Edge).init(testing.allocator); + var reachable = zodd.Variable(Edge).init(&ctx); defer reachable.deinit(); - try reachable.insertSlice(edges_rel.elements); + try reachable.insertSlice(&ctx, edges_rel.elements); var iters: usize = 0; const EdgeList = std.ArrayListUnmanaged(Edge); @@ -161,7 +167,7 @@ test "property: transitive closure reaches expected nodes" { } if (results.items.len > 0) { - try reachable.insert(try zodd.Relation(Edge).fromSlice(testing.allocator, results.items)); + try reachable.insert(try zodd.Relation(Edge).fromSlice(&ctx, results.items)); } if (iters > 20) break; @@ -176,3 +182,544 @@ test "property: transitive closure reaches expected nodes" { .{ .num_runs = 30, .seed = 0xabcdef01 }, ); } + +test "property: relation merge is idempotent" { + try minish.check( + testing.allocator, + gen.list(u32, gen.intRange(u32, 0, 100), 0, 30), + struct { + fn prop(data: []const u32) !void { + var ctx = zodd.ExecutionContext.init(testing.allocator); + var rel1 = try zodd.Relation(u32).fromSlice(&ctx, data); + defer rel1.deinit(); + + var rel2 = try zodd.Relation(u32).fromSlice(&ctx, data); + var merged = try rel2.merge(&rel1); + defer merged.deinit(); + + var expected = try zodd.Relation(u32).fromSlice(&ctx, data); + defer expected.deinit(); + + try testing.expectEqualSlices(u32, expected.elements, merged.elements); + } + }.prop, + .{ .num_runs = 50, .seed = 0x11223344 }, + ); +} + +test "property: gallop returns suffix at target" { + const Pair = struct { []const u32, u32 }; + const gen_pair = gen.tuple2( + []const u32, + u32, + gen.list(u32, gen.intRange(u32, 0, 200), 0, 40), + gen.intRange(u32, 0, 200), + ); + + try minish.check( + testing.allocator, + gen_pair, + struct { + fn prop(input: Pair) !void { + var ctx = zodd.ExecutionContext.init(testing.allocator); + var rel = try zodd.Relation(u32).fromSlice(&ctx, input[0]); + defer rel.deinit(); + + const target = input[1]; + const slice = zodd.gallop(u32, rel.elements, target); + + if (rel.elements.len == 0) { + try testing.expectEqual(@as(usize, 0), slice.len); + return; + } + + if (slice.len > 0) { + try testing.expect(slice[0] >= target); + } + + for (slice) |v| { + try testing.expect(v >= target); + } + } + }.prop, + .{ .num_runs = 50, .seed = 0x55667788 }, + ); +} + +test "property: relation merge is associative" { + const ThreeLists = struct { []const u32, []const u32, []const u32 }; + const lists_gen = gen.tuple3( + []const u32, + []const u32, + []const u32, + gen.list(u32, gen.intRange(u32, 0, 50), 0, 15), + gen.list(u32, gen.intRange(u32, 0, 50), 0, 15), + gen.list(u32, gen.intRange(u32, 0, 50), 0, 15), + ); + + try minish.check( + testing.allocator, + lists_gen, + struct { + fn prop(lists: ThreeLists) !void { + var ctx = zodd.ExecutionContext.init(testing.allocator); + var a1 = try zodd.Relation(u32).fromSlice(&ctx, lists[0]); + var b1 = try zodd.Relation(u32).fromSlice(&ctx, lists[1]); + var c1 = try zodd.Relation(u32).fromSlice(&ctx, lists[2]); + + var ab = try a1.merge(&b1); + var ab_c = try ab.merge(&c1); + defer ab_c.deinit(); + + var a2 = try zodd.Relation(u32).fromSlice(&ctx, lists[0]); + var b2 = try zodd.Relation(u32).fromSlice(&ctx, lists[1]); + var c2 = try zodd.Relation(u32).fromSlice(&ctx, lists[2]); + + var bc = try b2.merge(&c2); + var a_bc = try a2.merge(&bc); + defer a_bc.deinit(); + + try testing.expectEqualSlices(u32, ab_c.elements, a_bc.elements); + } + }.prop, + .{ .num_runs = 30, .seed = 0x8899aabb }, + ); +} + +test "property: joinHelper matches naive join" { + const Tuple = struct { u32, u32 }; + const Pair = struct { []const Tuple, []const Tuple }; + const pair_gen = gen.tuple2( + []const Tuple, + []const Tuple, + gen.list(Tuple, gen.tuple2(u32, u32, gen.intRange(u32, 0, 20), gen.intRange(u32, 0, 20)), 0, 10), + gen.list(Tuple, gen.tuple2(u32, u32, gen.intRange(u32, 0, 20), gen.intRange(u32, 0, 20)), 0, 10), + ); + + try minish.check( + testing.allocator, + pair_gen, + struct { + fn prop(p: Pair) !void { + var ctx = zodd.ExecutionContext.init(testing.allocator); + var rel1 = try zodd.Relation(Tuple).fromSlice(&ctx, p[0]); + defer rel1.deinit(); + + var rel2 = try zodd.Relation(Tuple).fromSlice(&ctx, p[1]); + defer rel2.deinit(); + + const Result = struct { u32, u32, u32 }; + var expected_list = std.ArrayListUnmanaged(Result){}; + defer expected_list.deinit(testing.allocator); + + for (rel1.elements) |t1| { + for (rel2.elements) |t2| { + if (t1[0] == t2[0]) { + try expected_list.append(testing.allocator, .{ t1[0], t1[1], t2[1] }); + } + } + } + + var expected = try zodd.Relation(Result).fromSlice(&ctx, expected_list.items); + defer expected.deinit(); + + const ResultList = std.ArrayListUnmanaged(Result); + const Context = struct { + results: *ResultList, + alloc: std.mem.Allocator, + + fn callback(self: @This(), key: *const u32, v1: *const u32, v2: *const u32) void { + self.results.append(self.alloc, .{ key.*, v1.*, v2.* }) catch {}; + } + }; + + var got_list = ResultList{}; + defer got_list.deinit(testing.allocator); + + zodd.joinHelper(u32, u32, u32, &rel1, &rel2, Context{ .results = &got_list, .alloc = testing.allocator }, Context.callback); + + var got = try zodd.Relation(Result).fromSlice(&ctx, got_list.items); + defer got.deinit(); + + try testing.expectEqualSlices(Result, expected.elements, got.elements); + } + }.prop, + .{ .num_runs = 30, .seed = 0x99aabbcc }, + ); +} + +test "property: joinAnti matches naive filter" { + const Tuple = struct { u32, u32 }; + const Pair = struct { []const Tuple, []const Tuple }; + const pair_gen = gen.tuple2( + []const Tuple, + []const Tuple, + gen.list(Tuple, gen.tuple2(u32, u32, gen.intRange(u32, 0, 20), gen.intRange(u32, 0, 20)), 0, 12), + gen.list(Tuple, gen.tuple2(u32, u32, gen.intRange(u32, 0, 20), gen.intRange(u32, 0, 20)), 0, 12), + ); + + try minish.check( + testing.allocator, + pair_gen, + struct { + fn prop(p: Pair) !void { + var ctx = zodd.ExecutionContext.init(testing.allocator); + var input = zodd.Variable(Tuple).init(&ctx); + defer input.deinit(); + + var filter = zodd.Variable(Tuple).init(&ctx); + defer filter.deinit(); + + var output = zodd.Variable(Tuple).init(&ctx); + defer output.deinit(); + + try input.insertSlice(&ctx, p[0]); + try filter.insertSlice(&ctx, p[1]); + + _ = try input.changed(); + _ = try filter.changed(); + + try zodd.joinAnti(u32, u32, u32, Tuple, &ctx, &input, &filter, &output, struct { + fn logic(key: *const u32, val: *const u32) Tuple { + return .{ key.*, val.* }; + } + }.logic); + + _ = try output.changed(); + + var expected_list = std.ArrayListUnmanaged(Tuple){}; + defer expected_list.deinit(testing.allocator); + + for (input.recent.elements) |t| { + var found = false; + for (filter.recent.elements) |f| { + if (f[0] == t[0]) { + found = true; + break; + } + } + if (!found) { + try expected_list.append(testing.allocator, t); + } + } + + var expected = try zodd.Relation(Tuple).fromSlice(&ctx, expected_list.items); + defer expected.deinit(); + + var got = try zodd.Relation(Tuple).fromSlice(&ctx, output.recent.elements); + defer got.deinit(); + + try testing.expectEqualSlices(Tuple, expected.elements, got.elements); + } + }.prop, + .{ .num_runs = 30, .seed = 0xa1b2c3d4 }, + ); +} + +test "property: extendInto matches naive extend" { + const Tuple = u32; + const KV = struct { u32, u32 }; + const Pair = struct { []const u32, []const KV }; + const pair_gen = gen.tuple2( + []const u32, + []const KV, + gen.list(u32, gen.intRange(u32, 0, 10), 0, 10), + gen.list(KV, gen.tuple2(u32, u32, gen.intRange(u32, 0, 10), gen.intRange(u32, 0, 10)), 0, 15), + ); + + try minish.check( + testing.allocator, + pair_gen, + struct { + fn prop(p: Pair) !void { + var ctx = zodd.ExecutionContext.init(testing.allocator); + var source = zodd.Variable(Tuple).init(&ctx); + defer source.deinit(); + + var rel = try zodd.Relation(KV).fromSlice(&ctx, p[1]); + defer rel.deinit(); + + var output = zodd.Variable(KV).init(&ctx); + defer output.deinit(); + + try source.insertSlice(&ctx, p[0]); + _ = try source.changed(); + + var ext = zodd.ExtendWith(Tuple, u32, u32).init(&ctx, &rel, struct { + fn f(t: *const Tuple) u32 { + return t.*; + } + }.f); + + var leapers = [_]zodd.Leaper(Tuple, u32){ext.leaper()}; + + try zodd.extendInto(Tuple, u32, KV, &ctx, &source, &leapers, &output, struct { + fn logic(t: *const Tuple, v: *const u32) KV { + return .{ t.*, v.* }; + } + }.logic); + + _ = try output.changed(); + + var expected_list = std.ArrayListUnmanaged(KV){}; + defer expected_list.deinit(testing.allocator); + + for (source.recent.elements) |t| { + for (rel.elements) |kv| { + if (kv[0] == t) { + try expected_list.append(testing.allocator, kv); + } + } + } + + var expected = try zodd.Relation(KV).fromSlice(&ctx, expected_list.items); + defer expected.deinit(); + + var got = try zodd.Relation(KV).fromSlice(&ctx, output.recent.elements); + defer got.deinit(); + + try testing.expectEqualSlices(KV, expected.elements, got.elements); + } + }.prop, + .{ .num_runs = 30, .seed = 0xb2c3d4e5 }, + ); +} + +test "property: SecondaryIndex get matches naive filter" { + const Tuple = struct { u32, u32 }; + const List = []const Tuple; + + const list_gen = gen.list( + Tuple, + gen.tuple2(u32, u32, gen.intRange(u32, 0, 10), gen.intRange(u32, 0, 50)), + 0, + 20, + ); + + try minish.check( + testing.allocator, + list_gen, + struct { + fn prop(data: List) !void { + var ctx = zodd.ExecutionContext.init(testing.allocator); + const Index = zodd.index.SecondaryIndex(Tuple, u32, struct { + fn extract(t: Tuple) u32 { + return t[0]; + } + }.extract, struct { + fn cmp(a: u32, b: u32) std.math.Order { + return std.math.order(a, b); + } + }.cmp, 4); + + var idx = Index.init(&ctx); + defer idx.deinit(); + + for (data) |t| { + try idx.insert(t); + } + + var rel = try zodd.Relation(Tuple).fromSlice(&ctx, data); + defer rel.deinit(); + + var i: usize = 0; + while (i < rel.elements.len) : (i += 1) { + const key = rel.elements[i][0]; + + var expected_list = std.ArrayListUnmanaged(Tuple){}; + defer expected_list.deinit(testing.allocator); + + for (data) |t| { + if (t[0] == key) { + try expected_list.append(testing.allocator, t); + } + } + + var expected = try zodd.Relation(Tuple).fromSlice(&ctx, expected_list.items); + defer expected.deinit(); + + const got_ptr = idx.get(key).?; + try testing.expectEqualSlices(Tuple, expected.elements, got_ptr.elements); + } + } + }.prop, + .{ .num_runs = 30, .seed = 0xc1d2e3f4 }, + ); +} + +test "property: aggregate matches naive sum" { + const Tuple = struct { u32, u32 }; + const List = []const Tuple; + + const list_gen = gen.list( + Tuple, + gen.tuple2(u32, u32, gen.intRange(u32, 0, 10), gen.intRange(u32, 0, 50)), + 0, + 20, + ); + + try minish.check( + testing.allocator, + list_gen, + struct { + fn prop(data: List) !void { + var ctx = zodd.ExecutionContext.init(testing.allocator); + var rel = try zodd.Relation(Tuple).fromSlice(&ctx, data); + defer rel.deinit(); + + const key_func = struct { + fn key(t: *const Tuple) u32 { + return t[0]; + } + }; + const folder = struct { + fn fold(acc: u32, t: *const Tuple) u32 { + return acc + t[1]; + } + }; + + var result = try zodd.aggregate.aggregate(Tuple, u32, u32, &ctx, &rel, key_func.key, 0, folder.fold); + defer result.deinit(); + + var map = std.AutoHashMap(u32, u32).init(testing.allocator); + defer map.deinit(); + + for (rel.elements) |t| { + const entry = try map.getOrPut(t[0]); + if (!entry.found_existing) { + entry.value_ptr.* = 0; + } + entry.value_ptr.* += t[1]; + } + + var expected_list = std.ArrayListUnmanaged(struct { u32, u32 }){}; + defer expected_list.deinit(testing.allocator); + + var it = map.iterator(); + while (it.next()) |entry| { + try expected_list.append(testing.allocator, .{ entry.key_ptr.*, entry.value_ptr.* }); + } + + var expected = try zodd.Relation(struct { u32, u32 }).fromSlice(&ctx, expected_list.items); + defer expected.deinit(); + + try testing.expectEqualSlices(struct { u32, u32 }, expected.elements, result.elements); + } + }.prop, + .{ .num_runs = 30, .seed = 0xd4e5f6a7 }, + ); +} + +test "property: persistence round-trip" { + const Tuple = struct { u32, u32 }; + const List = []const Tuple; + + const list_gen = gen.list( + Tuple, + gen.tuple2(u32, u32, gen.intRange(u32, 0, 100), gen.intRange(u32, 0, 100)), + 0, + 50, + ); + + try minish.check( + testing.allocator, + list_gen, + struct { + fn prop(data: List) !void { + var ctx = zodd.ExecutionContext.init(testing.allocator); + + var original = try zodd.Relation(Tuple).fromSlice(&ctx, data); + defer original.deinit(); + + var buffer = std.ArrayListUnmanaged(u8){}; + defer buffer.deinit(testing.allocator); + + try original.save(buffer.writer(testing.allocator)); + + var fbs = std.io.fixedBufferStream(buffer.items); + var loaded = try zodd.Relation(Tuple).load(&ctx, fbs.reader()); + defer loaded.deinit(); + + try testing.expectEqual(original.len(), loaded.len()); + try testing.expectEqualSlices(Tuple, original.elements, loaded.elements); + } + }.prop, + .{ .num_runs = 30, .seed = 0xf00dcafe }, + ); +} + +test "property: aggregate count matches naive count" { + const Tuple = struct { u32, u32 }; + const List = []const Tuple; + + const list_gen = gen.list( + Tuple, + gen.tuple2(u32, u32, gen.intRange(u32, 0, 10), gen.intRange(u32, 0, 50)), + 0, + 50, + ); + + try minish.check( + testing.allocator, + list_gen, + struct { + fn prop(data: List) !void { + var ctx = zodd.ExecutionContext.init(testing.allocator); + var rel = try zodd.Relation(Tuple).fromSlice(&ctx, data); + defer rel.deinit(); + + const key_func = struct { + fn f(t: *const Tuple) u32 { + return t[0]; + } + }.f; + + var result = try zodd.aggregateFn( + Tuple, + u32, + u32, + &ctx, + &rel, + key_func, + 0, + struct { + fn count(acc: u32, _: *const Tuple) u32 { + return acc + 1; + } + }.count, + ); + defer result.deinit(); + + var map = std.AutoHashMap(u32, u32).init(testing.allocator); + defer map.deinit(); + + for (rel.elements) |t| { + const g = try map.getOrPut(t[0]); + if (!g.found_existing) g.value_ptr.* = 0; + g.value_ptr.* += 1; + } + + var expected_list = std.ArrayListUnmanaged(struct { u32, u32 }){}; + defer expected_list.deinit(testing.allocator); + + var it = map.iterator(); + while (it.next()) |entry| { + try expected_list.append(testing.allocator, .{ entry.key_ptr.*, entry.value_ptr.* }); + } + + const sort = struct { + fn lessThan(_: void, a: struct { u32, u32 }, b: struct { u32, u32 }) bool { + return a[0] < b[0]; + } + }; + std.sort.block(struct { u32, u32 }, expected_list.items, {}, sort.lessThan); + + var expected = try zodd.Relation(struct { u32, u32 }).fromSlice(&ctx, expected_list.items); + defer expected.deinit(); + + try testing.expectEqualSlices(struct { u32, u32 }, expected.elements, result.elements); + } + }.prop, + .{ .num_runs = 30, .seed = 0xbeefbabe }, + ); +} diff --git a/tests/regression_tests.zig b/tests/regression_tests.zig index 89f8c9f..6362bbd 100644 --- a/tests/regression_tests.zig +++ b/tests/regression_tests.zig @@ -4,11 +4,12 @@ const zodd = @import("zodd"); test "regression: totalLen includes to_add batches" { const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); - var v = zodd.Variable(u32).init(allocator); + var v = zodd.Variable(u32).init(&ctx); defer v.deinit(); - try v.insertSlice(&[_]u32{ 1, 2, 3 }); + try v.insertSlice(&ctx, &[_]u32{ 1, 2, 3 }); try testing.expectEqual(@as(usize, 3), v.totalLen()); @@ -16,21 +17,22 @@ test "regression: totalLen includes to_add batches" { try testing.expectEqual(@as(usize, 3), v.totalLen()); - try v.insertSlice(&[_]u32{ 4, 5 }); + try v.insertSlice(&ctx, &[_]u32{ 4, 5 }); try testing.expectEqual(@as(usize, 5), v.totalLen()); } test "regression: Iteration cleanup handles variables" { const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); - var iter = zodd.Iteration(u32).init(allocator, null); + var iter = zodd.Iteration(u32).init(&ctx, null); const v1 = try iter.variable(); const v2 = try iter.variable(); - try v1.insertSlice(&[_]u32{ 1, 2, 3 }); - try v2.insertSlice(&[_]u32{ 4, 5 }); + try v1.insertSlice(&ctx, &[_]u32{ 1, 2, 3 }); + try v2.insertSlice(&ctx, &[_]u32{ 4, 5 }); _ = try iter.changed(); @@ -39,9 +41,10 @@ test "regression: Iteration cleanup handles variables" { test "regression: intersection correctness with sorted values" { const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); const KV = struct { u32, u32 }; - var rel = try zodd.Relation(KV).fromSlice(allocator, &[_]KV{ + var rel = try zodd.Relation(KV).fromSlice(&ctx, &[_]KV{ .{ 1, 10 }, .{ 1, 20 }, .{ 1, 30 }, @@ -50,7 +53,7 @@ test "regression: intersection correctness with sorted values" { }); defer rel.deinit(); - var ext = zodd.ExtendWith(u32, u32, u32).init(allocator, &rel, struct { + var ext = zodd.ExtendWith(u32, u32, u32).init(&ctx, &rel, struct { fn f(t: *const u32) u32 { return t.*; } @@ -71,20 +74,21 @@ test "regression: intersection correctness with sorted values" { test "regression: variable deduplication across multiple rounds" { const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); - var v = zodd.Variable(u32).init(allocator); + var v = zodd.Variable(u32).init(&ctx); defer v.deinit(); - try v.insertSlice(&[_]u32{ 1, 2, 3 }); + try v.insertSlice(&ctx, &[_]u32{ 1, 2, 3 }); _ = try v.changed(); - try v.insertSlice(&[_]u32{ 2, 3, 4, 5 }); + try v.insertSlice(&ctx, &[_]u32{ 2, 3, 4, 5 }); const changed1 = try v.changed(); try testing.expect(changed1); try testing.expectEqual(@as(usize, 2), v.recent.len()); - try v.insertSlice(&[_]u32{ 1, 2, 3, 4, 5 }); + try v.insertSlice(&ctx, &[_]u32{ 1, 2, 3, 4, 5 }); const changed2 = try v.changed(); try testing.expect(!changed2); @@ -95,3 +99,502 @@ test "regression: variable deduplication across multiple rounds" { defer result.deinit(); try testing.expectEqual(@as(usize, 5), result.len()); } + +test "regression: extendInto error detection with allocation failure simulation" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + const Tuple = struct { u32 }; + const Val = u32; + + var source = zodd.Variable(Tuple).init(&ctx); + defer source.deinit(); + + try source.insertSlice(&ctx, &[_]Tuple{.{1}}); + _ = try source.changed(); + + var R_B = try zodd.Relation(struct { u32, u32 }).fromSlice(&ctx, &[_]struct { u32, u32 }{ + .{ 1, 10 }, + .{ 1, 20 }, + }); + defer R_B.deinit(); + + var output = zodd.Variable(struct { u32, u32 }).init(&ctx); + defer output.deinit(); + + var extB = zodd.ExtendWith(Tuple, u32, Val).init(&ctx, &R_B, struct { + fn f(t: *const Tuple) u32 { + return t[0]; + } + }.f); + + var leapers = [_]zodd.Leaper(Tuple, Val){extB.leaper()}; + + try zodd.extendInto(Tuple, Val, struct { u32, u32 }, &ctx, &source, &leapers, &output, struct { + fn logic(t: *const Tuple, v: *const Val) struct { u32, u32 } { + return .{ t[0], v.* }; + } + }.logic); + + _ = try output.changed(); + try testing.expectEqual(@as(usize, 2), output.recent.len()); +} + +test "regression: SecondaryIndex get returns pointer not copy" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + const Tuple = struct { u32, u32 }; + + const Index = zodd.index.SecondaryIndex(Tuple, u32, struct { + fn extract(t: Tuple) u32 { + return t[1]; + } + }.extract, struct { + fn cmp(a: u32, b: u32) std.math.Order { + return std.math.order(a, b); + } + }.cmp, 4); + + var idx = Index.init(&ctx); + defer idx.deinit(); + + try idx.insert(.{ 1, 10 }); + try idx.insert(.{ 2, 10 }); + + const rel_ptr = idx.get(10).?; + try testing.expectEqual(@as(usize, 2), rel_ptr.len()); + try testing.expectEqual(@as(u32, 1), rel_ptr.elements[0][0]); + try testing.expectEqual(@as(u32, 2), rel_ptr.elements[1][0]); +} + +test "regression: Variable complete includes recent and to_add data" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + + var v = zodd.Variable(u32).init(&ctx); + defer v.deinit(); + + try v.insertSlice(&ctx, &[_]u32{ 1, 2, 3 }); + + var result = try v.complete(); + defer result.deinit(); + + try testing.expectEqual(@as(usize, 3), result.len()); + try testing.expectEqual(@as(u32, 1), result.elements[0]); + try testing.expectEqual(@as(u32, 2), result.elements[1]); + try testing.expectEqual(@as(u32, 3), result.elements[2]); +} + +test "regression: Variable complete with recent data not yet stable" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + + var v = zodd.Variable(u32).init(&ctx); + defer v.deinit(); + + try v.insertSlice(&ctx, &[_]u32{ 1, 2 }); + _ = try v.changed(); + + try v.insertSlice(&ctx, &[_]u32{ 3, 4 }); + _ = try v.changed(); + + var result = try v.complete(); + defer result.deinit(); + + try testing.expectEqual(@as(usize, 4), result.len()); +} + +test "regression: gallop with large step values" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + + const size = 1000; + const data = try allocator.alloc(u32, size); + defer allocator.free(data); + + for (data, 0..) |*elem, i| { + elem.* = @intCast(i * 2); + } + + var rel = try zodd.Relation(u32).fromSlice(&ctx, data); + defer rel.deinit(); + + const target: u32 = 1500; + const result_slice = zodd.gallop(u32, rel.elements, target); + + try testing.expect(result_slice.len > 0); + if (result_slice.len > 0) { + try testing.expect(result_slice[0] >= target); + } +} + +test "regression: Relation save and load with tuples" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + const Tuple = struct { u32, u32 }; + + var original = try zodd.Relation(Tuple).fromSlice(&ctx, &[_]Tuple{ + .{ 2, 20 }, + .{ 1, 10 }, + .{ 3, 30 }, + }); + defer original.deinit(); + + var buffer = std.ArrayListUnmanaged(u8){}; + defer buffer.deinit(allocator); + + try original.save(buffer.writer(allocator)); + + var fbs = std.io.fixedBufferStream(buffer.items); + var loaded = try zodd.Relation(Tuple).load(&ctx, fbs.reader()); + defer loaded.deinit(); + + try testing.expectEqual(original.len(), loaded.len()); + try testing.expectEqualSlices(Tuple, original.elements, loaded.elements); +} + +test "regression: extendInto with only ExtendAnti should not call propose" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + const Tuple = struct { u32 }; + const Val = u32; + + var source = zodd.Variable(Tuple).init(&ctx); + defer source.deinit(); + + try source.insertSlice(&ctx, &[_]Tuple{.{1}}); + _ = try source.changed(); + + const KV = struct { u32, u32 }; + var rel = try zodd.Relation(KV).fromSlice(&ctx, &[_]KV{ + .{ 2, 100 }, + }); + defer rel.deinit(); + + var output = zodd.Variable(struct { u32, u32 }).init(&ctx); + defer output.deinit(); + + var ext = zodd.ExtendAnti(Tuple, u32, Val).init(&ctx, &rel, struct { + fn f(t: *const Tuple) u32 { + return t[0]; + } + }.f); + + var leapers = [_]zodd.Leaper(Tuple, Val){ext.leaper()}; + + try zodd.extendInto(Tuple, Val, struct { u32, u32 }, &ctx, &source, leapers[0..], &output, struct { + fn logic(t: *const Tuple, v: *const Val) struct { u32, u32 } { + return .{ t[0], v.* }; + } + }.logic); + + const changed = try output.changed(); + try testing.expect(!changed); +} + +test "regression: SecondaryIndex does not leak memory on repeated inserts" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + const Tuple = struct { u32, u32 }; + + const Index = zodd.index.SecondaryIndex(Tuple, u32, struct { + fn extract(t: Tuple) u32 { + return t[0]; + } + }.extract, struct { + fn cmp(a: u32, b: u32) std.math.Order { + return std.math.order(a, b); + } + }.cmp, 4); + + var idx = Index.init(&ctx); + defer idx.deinit(); + + try idx.insert(.{ 1, 100 }); + try idx.insert(.{ 1, 200 }); + try idx.insert(.{ 1, 300 }); + + const rel = idx.get(1).?; + try testing.expectEqual(@as(usize, 3), rel.len()); +} + +test "regression: joinAnti searches full filter" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + const Tuple = struct { u32, u32 }; + + var input = zodd.Variable(Tuple).init(&ctx); + defer input.deinit(); + + var filter = zodd.Variable(Tuple).init(&ctx); + defer filter.deinit(); + + var output = zodd.Variable(Tuple).init(&ctx); + defer output.deinit(); + + try input.insertSlice(&ctx, &[_]Tuple{ .{ 1, 10 }, .{ 2, 20 }, .{ 3, 30 } }); + try filter.insertSlice(&ctx, &[_]Tuple{ .{ 1, 100 }, .{ 3, 300 } }); + + _ = try input.changed(); + _ = try filter.changed(); + + try zodd.joinAnti(u32, u32, u32, Tuple, &ctx, &input, &filter, &output, struct { + fn logic(key: *const u32, val: *const u32) Tuple { + return .{ key.*, val.* }; + } + }.logic); + + _ = try output.changed(); + try testing.expectEqual(@as(usize, 1), output.recent.len()); + try testing.expectEqual(@as(u32, 2), output.recent.elements[0][0]); +} + +test "regression: Relation loadWithLimit rejects large length" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + const Tuple = struct { u32, u32 }; + + var buffer = std.ArrayListUnmanaged(u8){}; + defer buffer.deinit(allocator); + + var writer = buffer.writer(allocator); + try writer.writeAll("ZODDREL"); + try writer.writeInt(u8, 1, .little); + try writer.writeInt(u64, 2, .little); + + const t1 = Tuple{ 1, 10 }; + const t2 = Tuple{ 2, 20 }; + const arr1 = [_]Tuple{t1}; + const arr2 = [_]Tuple{t2}; + try writer.writeAll(std.mem.sliceAsBytes(&arr1)); + try writer.writeAll(std.mem.sliceAsBytes(&arr2)); + + var reader = std.io.fixedBufferStream(buffer.items); + try testing.expectError(error.TooLarge, zodd.Relation(Tuple).loadWithLimit(&ctx, reader.reader(), 1)); +} + +test "regression: extendInto resets leaper error" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + const Tuple = struct { u32 }; + const Val = u32; + + var source = zodd.Variable(Tuple).init(&ctx); + defer source.deinit(); + + try source.insertSlice(&ctx, &[_]Tuple{.{1}}); + _ = try source.changed(); + + var rel = try zodd.Relation(struct { u32, u32 }).fromSlice(&ctx, &[_]struct { u32, u32 }{ + .{ 1, 10 }, + .{ 1, 20 }, + }); + defer rel.deinit(); + + var output = zodd.Variable(struct { u32, u32 }).init(&ctx); + defer output.deinit(); + + var ext = zodd.ExtendWith(Tuple, u32, Val).init(&ctx, &rel, struct { + fn f(t: *const Tuple) u32 { + return t[0]; + } + }.f); + + var leapers = [_]zodd.Leaper(Tuple, Val){ext.leaper()}; + leapers[0].had_error = true; + + try zodd.extendInto(Tuple, Val, struct { u32, u32 }, &ctx, &source, &leapers, &output, struct { + fn logic(t: *const Tuple, v: *const Val) struct { u32, u32 } { + return .{ t[0], v.* }; + } + }.logic); + + _ = try output.changed(); + try testing.expectEqual(@as(usize, 2), output.recent.len()); +} + +test "regression: loadWithLimit rejects invalid magic" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + const Tuple = struct { u32, u32 }; + + var buffer = std.ArrayListUnmanaged(u8){}; + defer buffer.deinit(allocator); + + var writer = buffer.writer(allocator); + try writer.writeAll("BADMAGC"); + try writer.writeInt(u8, 1, .little); + try writer.writeInt(u64, 1, .little); + + const t1 = Tuple{ 1, 10 }; + const arr1 = [_]Tuple{t1}; + try writer.writeAll(std.mem.sliceAsBytes(&arr1)); + + var reader = std.io.fixedBufferStream(buffer.items); + try testing.expectError(error.InvalidFormat, zodd.Relation(Tuple).loadWithLimit(&ctx, reader.reader(), 10)); +} + +test "regression: loadWithLimit rejects unsupported version" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + const Tuple = struct { u32, u32 }; + + var buffer = std.ArrayListUnmanaged(u8){}; + defer buffer.deinit(allocator); + + var writer = buffer.writer(allocator); + try writer.writeAll("ZODDREL"); + try writer.writeInt(u8, 2, .little); + try writer.writeInt(u64, 1, .little); + + const t1 = Tuple{ 1, 10 }; + const arr1 = [_]Tuple{t1}; + try writer.writeAll(std.mem.sliceAsBytes(&arr1)); + + var reader = std.io.fixedBufferStream(buffer.items); + try testing.expectError(error.UnsupportedVersion, zodd.Relation(Tuple).loadWithLimit(&ctx, reader.reader(), 10)); +} + +test "regression: joinAnti checks multiple stable batches" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + const Tuple = struct { u32, u32 }; + + var input = zodd.Variable(Tuple).init(&ctx); + defer input.deinit(); + + var filter = zodd.Variable(Tuple).init(&ctx); + defer filter.deinit(); + + var output = zodd.Variable(Tuple).init(&ctx); + defer output.deinit(); + + try input.insertSlice(&ctx, &[_]Tuple{ .{ 1, 10 }, .{ 2, 20 }, .{ 3, 30 } }); + _ = try input.changed(); + + try filter.insertSlice(&ctx, &[_]Tuple{.{ 1, 100 }}); + _ = try filter.changed(); + _ = try filter.changed(); + + try filter.insertSlice(&ctx, &[_]Tuple{.{ 3, 300 }}); + _ = try filter.changed(); + + try zodd.joinAnti(u32, u32, u32, Tuple, &ctx, &input, &filter, &output, struct { + fn logic(key: *const u32, val: *const u32) Tuple { + return .{ key.*, val.* }; + } + }.logic); + + _ = try output.changed(); + try testing.expectEqual(@as(usize, 1), output.recent.len()); + try testing.expectEqual(@as(u32, 2), output.recent.elements[0][0]); +} + +test "regression: complete on empty Variable returns empty relation" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + + var v = zodd.Variable(u32).init(&ctx); + defer v.deinit(); + + var res = try v.complete(); + defer res.deinit(); + + try testing.expectEqual(@as(usize, 0), res.len()); +} + +test "regression: joinInto with empty input produces empty output" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + const KV = struct { u32, u32 }; + const Out = struct { u32, u32, u32 }; + + var v1 = zodd.Variable(KV).init(&ctx); + defer v1.deinit(); + + var v2 = zodd.Variable(KV).init(&ctx); + defer v2.deinit(); + + var out = zodd.Variable(Out).init(&ctx); + defer out.deinit(); + + try v1.insertSlice(&ctx, &[_]KV{.{ 1, 10 }}); + _ = try v1.changed(); + + _ = try v2.changed(); + + try zodd.joinInto(u32, u32, u32, Out, &ctx, &v1, &v2, &out, struct { + fn logic(k: *const u32, v1_val: *const u32, v2_val: *const u32) Out { + return .{ k.*, v1_val.*, v2_val.* }; + } + }.logic); + + _ = try out.changed(); + try testing.expectEqual(@as(usize, 0), out.recent.len()); +} + +test "regression: joinAnti with empty filter keeps all inputs" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + const Tuple = struct { u32, u32 }; + + var input = zodd.Variable(Tuple).init(&ctx); + defer input.deinit(); + + var filter = zodd.Variable(Tuple).init(&ctx); + defer filter.deinit(); + + var output = zodd.Variable(Tuple).init(&ctx); + defer output.deinit(); + + try input.insertSlice(&ctx, &[_]Tuple{ .{ 1, 10 }, .{ 2, 20 } }); + _ = try input.changed(); + + _ = try filter.changed(); + + try zodd.joinAnti(u32, u32, u32, Tuple, &ctx, &input, &filter, &output, struct { + fn logic(key: *const u32, val: *const u32) Tuple { + return .{ key.*, val.* }; + } + }.logic); + + _ = try output.changed(); + try testing.expectEqual(@as(usize, 2), output.recent.len()); +} + +test "regression: aggregate with unique keys" { + const allocator = testing.allocator; + var ctx = zodd.ExecutionContext.init(allocator); + const Tuple = struct { u32, u32 }; + + var rel = try zodd.Relation(Tuple).fromSlice(&ctx, &[_]Tuple{ + .{ 1, 10 }, + .{ 2, 20 }, + .{ 3, 30 }, + }); + defer rel.deinit(); + + var result = try zodd.aggregateFn( + Tuple, + u32, + u32, + &ctx, + &rel, + struct { + fn key(t: *const Tuple) u32 { + return t[0]; + } + }.key, + 0, + struct { + fn sum(acc: u32, t: *const Tuple) u32 { + return acc + t[1]; + } + }.sum, + ); + defer result.deinit(); + + try testing.expectEqual(@as(usize, 3), result.len()); + try testing.expectEqual(result.elements[0][1], 10); + try testing.expectEqual(result.elements[1][1], 20); + try testing.expectEqual(result.elements[2][1], 30); +}