diff --git a/src/lang/format.c b/src/lang/format.c index 78cebcb8..93e30403 100644 --- a/src/lang/format.c +++ b/src/lang/format.c @@ -26,6 +26,7 @@ #include "table/sym.h" #include "lang/eval.h" #include "ops/ops.h" /* RAY_LAZY, ray_lazy_materialize */ +#include "ops/internal.h" #include "mem/heap.h" #include #include @@ -188,7 +189,7 @@ static void fmt_i64(fmt_buf_t* b, int64_t val) { } static void fmt_f64(fmt_buf_t* b, double val) { - if (val == -0.0 && signbit(val)) val = 0.0; /* normalize -0.0 */ + val = clear_neg_zero(val); if (val == 0.0) { /* Zero: format as "0.0" (after trailing-zero strip) */ char tmp[16]; diff --git a/src/ops/builtins.c b/src/ops/builtins.c index 9eff45d0..79ea22af 100644 --- a/src/ops/builtins.c +++ b/src/ops/builtins.c @@ -34,6 +34,12 @@ #include "core/types.h" #include "io/csv.h" #include "ops/ops.h" + +static inline double clear_neg_zero(double v) { + uint64_t bits; memcpy(&bits, &v, 8); + if (bits == UINT64_C(0x8000000000000000)) v = 0.0; + return v; +} #include "ops/hash.h" #include "store/part.h" #include "store/splay.h" @@ -91,7 +97,7 @@ void ray_lang_print(FILE* fp, ray_t* val) { case -RAY_I64: fprintf(fp, "%ld", (long)val->i64); break; case -RAY_F64: { double fv = val->f64; - if (fv == 0.0 && signbit(fv)) fv = 0.0; + fv = clear_neg_zero(fv); fprintf(fp, "%g", fv); break; } @@ -177,7 +183,7 @@ static char* fmt_interpolate(const char* fmt, size_t flen, ray_t** args, int64_t tlen = snprintf(tmp, sizeof(tmp), "%ld", (long)a->i64); } else if (a->type == -RAY_F64) { double fv = a->f64; - if (fv == 0.0 && signbit(fv)) fv = 0.0; + fv = clear_neg_zero(fv); tlen = snprintf(tmp, sizeof(tmp), "%g", fv); } else if (a->type == -RAY_BOOL) { tlen = snprintf(tmp, sizeof(tmp), "%s", a->b8 ? "true" : "false"); @@ -1343,7 +1349,7 @@ ray_t* ray_cast_fn(ray_t* type_sym, ray_t* val) { } if (val->type == -RAY_F64) { double fv = val->f64; - if (fv == 0.0 && signbit(fv)) fv = 0.0; + fv = clear_neg_zero(fv); char buf[32]; int n2 = snprintf(buf, sizeof(buf), "%g", fv); return ray_str(buf, (size_t)n2); } @@ -1399,7 +1405,7 @@ ray_t* ray_cast_fn(ray_t* type_sym, ray_t* val) { else if (val->type == -RAY_I32) n2 = snprintf(buf, sizeof(buf), "%d", (int)val->i32); else if (val->type == -RAY_F64) { double fv = val->f64; - if (fv == 0.0 && signbit(fv)) fv = 0.0; + fv = clear_neg_zero(fv); n2 = snprintf(buf, sizeof(buf), "%.17g", fv); } else n2 = snprintf(buf, sizeof(buf), "%lld", (long long)as_i64(val)); @@ -2556,7 +2562,7 @@ ray_t* ray_group_fn(ray_t* x) { idx_vecs[gi_nan] = ray_vec_append(idx_vecs[gi_nan], &i); continue; } - if (f == 0.0) f = 0.0; /* canonicalise -0.0 → +0.0 */ + f = clear_neg_zero(f); memcpy(&v, &f, sizeof(v)); } else v = i; diff --git a/src/ops/expr.c b/src/ops/expr.c index 49b4f9bc..eb3a5d76 100644 --- a/src/ops/expr.c +++ b/src/ops/expr.c @@ -1286,6 +1286,7 @@ static void propagate_nulls_binary(ray_t* lhs, ray_t* rhs, ray_t* result, ray_t* exec_elementwise_unary(ray_graph_t* g, ray_op_t* op, ray_t* input) { (void)g; if (!input || RAY_IS_ERR(input)) return input; + if (!ray_is_vec(input)) return ray_error("type", NULL); int64_t len = input->len; int8_t in_type = input->type; int8_t out_type = op->out_type; @@ -2027,6 +2028,20 @@ ray_t* exec_elementwise_binary(ray_graph_t* g, ray_op_t* op, ray_t* lhs, ray_t* } } + /* Reject string atom in arithmetic context (only comparisons are valid). */ + { + bool l_atom_str = (l_scalar && lhs->type == -RAY_STR); + bool r_atom_str = (r_scalar && rhs->type == -RAY_STR); + if (l_atom_str || r_atom_str) { + uint16_t opc = op->opcode; + bool is_cmp = (opc >= OP_EQ && opc <= OP_GE); + if (!is_cmp && !RAY_IS_SYM(lhs->type) && !RAY_IS_SYM(rhs->type)) { + ray_release(result); + return ray_error("type", NULL); + } + } + } + /* SYM vs STR comparison: resolve string constant to intern ID so we can compare numerically against SYM intern indices. ray_sym_find returns -1 if string not in table → no match. */ diff --git a/src/ops/filter.c b/src/ops/filter.c index f24ed386..e762dad4 100644 --- a/src/ops/filter.c +++ b/src/ops/filter.c @@ -597,7 +597,34 @@ ray_t* sel_compact(ray_graph_t* g, ray_t* tbl, ray_t* sel) { ray_t* col = ray_table_get_col_idx(tbl, c); col_names[c] = ray_table_col_name(tbl, c); if (!col || RAY_IS_ERR(col)) { new_cols[c] = NULL; continue; } - if (col->type == RAY_MAPCOMMON) { new_cols[c] = NULL; continue; } + if (col->type == RAY_MAPCOMMON) { + ray_t** mc_ptrs = (ray_t**)ray_data(col); + ray_t* kv = mc_ptrs[0]; + ray_t* rc = mc_ptrs[1]; + if (!kv || !rc || col->len < 2) { new_cols[c] = NULL; continue; } + int64_t n_parts = kv->len; + int8_t kv_type = kv->type; + size_t esz = (size_t)ray_sym_elem_size(kv_type, kv->attrs); + const char* kdata = (const char*)ray_data(kv); + const int64_t* counts = (const int64_t*)ray_data(rc); + ray_t* flat = ray_vec_new(kv_type, pass_count); + if (!flat || RAY_IS_ERR(flat)) { new_cols[c] = NULL; continue; } + flat->len = pass_count; + char* out_mc = (char*)ray_data(flat); + for (int64_t i = 0; i < pass_count; i++) { + int64_t row_i = match_idx[i]; + int64_t cum = 0; + int64_t pi = 0; + for (; pi < n_parts - 1; pi++) { + cum += counts[pi]; + if (row_i < cum) break; + } + memcpy(out_mc + (size_t)i * esz, kdata + (size_t)pi * esz, esz); + } + new_cols[c] = flat; + valid_ncols++; + continue; + } int8_t ct = RAY_IS_PARTED(col->type) ? (int8_t)RAY_PARTED_BASETYPE(col->type) : col->type; uint8_t ca = 0; diff --git a/src/ops/group.c b/src/ops/group.c index 2473b3a8..52ee8265 100644 --- a/src/ops/group.c +++ b/src/ops/group.c @@ -441,7 +441,7 @@ static void cd_hist_fn(void* ctx, uint32_t worker_id, for (int64_t i = start; i < end; i++) { double fv = d[i]; if (fv != fv) fv = (double)NAN; - else if (fv == 0.0) fv = 0.0; + else fv = clear_neg_zero(fv); int64_t val; memcpy(&val, &fv, sizeof(int64_t)); uint64_t h = (uint64_t)val * CD_HASH_K1; @@ -540,7 +540,7 @@ static void cd_scatter_fn(void* ctx, uint32_t worker_id, for (int64_t i = start; i < end; i++) { double fv = d[i]; if (fv != fv) fv = (double)NAN; - else if (fv == 0.0) fv = 0.0; + else fv = clear_neg_zero(fv); int64_t val; memcpy(&val, &fv, sizeof(int64_t)); uint64_t h = (uint64_t)val * CD_HASH_K1; @@ -592,7 +592,7 @@ static int64_t cd_seq_count(int8_t in_type, uint8_t in_attrs, if (in_type == RAY_F64) { double fv = ((const double*)base)[i]; if (fv != fv) fv = (double)NAN; - else if (fv == 0.0) fv = 0.0; + else fv = clear_neg_zero(fv); memcpy(&val, &fv, sizeof(int64_t)); } else { val = read_col_i64(base, i, in_type, in_attrs); @@ -942,7 +942,7 @@ static inline int64_t cdpg_read(const void* base, int64_t r, if (in_type == RAY_F64) { double fv = ((const double*)base)[r]; if (fv != fv) fv = (double)NAN; - else if (fv == 0.0) fv = 0.0; + else fv = clear_neg_zero(fv); int64_t v; memcpy(&v, &fv, sizeof(int64_t)); return v; @@ -1288,7 +1288,7 @@ ray_t* ray_count_distinct_per_group(ray_t* src, const int64_t* row_gid, if (gid < 0 || gid >= n_groups) continue; double fv = d[r]; if (fv != fv) fv = (double)NAN; - else if (fv == 0.0) fv = 0.0; + else fv = clear_neg_zero(fv); int64_t v; memcpy(&v, &fv, sizeof(int64_t)); CD_INSERT(v); @@ -1338,7 +1338,7 @@ ray_t* ray_count_distinct_per_group(ray_t* src, const int64_t* row_gid, if (in_type == RAY_F64) { double fv = ((double*)base)[r]; if (fv != fv) fv = (double)NAN; - else if (fv == 0.0) fv = 0.0; + else fv = clear_neg_zero(fv); memcpy(&row_val, &fv, sizeof(int64_t)); } else { row_val = read_col_i64(base, r, in_type, src->attrs); @@ -5145,13 +5145,21 @@ ray_t* exec_group(ray_graph_t* g, ray_op_t* op, ray_t* tbl, agg_owned[a] = 1; goto resolve_ins2; } + if (vec && RAY_IS_ERR(vec)) ray_release(vec); } /* Fallback: full recursive evaluation */ ray_t* saved_table = g->table; g->table = tbl; ray_t* vec = exec_node(g, agg_input_op); g->table = saved_table; - if (vec && !RAY_IS_ERR(vec)) { + if (vec && RAY_IS_ERR(vec)) { + for (uint8_t i = 0; i < a; i++) + { if (agg_owned[i] && agg_vecs[i]) ray_release(agg_vecs[i]); if (agg_owned2[i] && agg_vecs2[i]) ray_release(agg_vecs2[i]); } + for (uint8_t k = 0; k < n_keys; k++) + if (key_owned[k] && key_vecs[k]) ray_release(key_vecs[k]); + return vec; + } + if (vec) { agg_vecs[a] = vec; agg_owned[a] = 1; } @@ -5177,6 +5185,8 @@ ray_t* exec_group(ray_graph_t* g, ray_op_t* op, ray_t* tbl, agg_vecs2[a] = vec; agg_owned2[a] = 1; compiled2 = 1; + } else if (vec) { + ray_release(vec); } } if (!compiled2) { @@ -5184,7 +5194,15 @@ ray_t* exec_group(ray_graph_t* g, ray_op_t* op, ray_t* tbl, g->table = tbl; ray_t* vec = exec_node(g, agg_input_op2); g->table = saved_table; - if (vec && !RAY_IS_ERR(vec)) { + if (vec && RAY_IS_ERR(vec)) { + if (agg_owned[a] && agg_vecs[a]) ray_release(agg_vecs[a]); + for (uint8_t i = 0; i < a; i++) + { if (agg_owned[i] && agg_vecs[i]) ray_release(agg_vecs[i]); if (agg_owned2[i] && agg_vecs2[i]) ray_release(agg_vecs2[i]); } + for (uint8_t k = 0; k < n_keys; k++) + if (key_owned[k] && key_vecs[k]) ray_release(key_vecs[k]); + return vec; + } + if (vec) { agg_vecs2[a] = vec; agg_owned2[a] = 1; } @@ -9332,7 +9350,7 @@ static inline int64_t grpt_key_read(const void* base, int8_t t, int64_t row) { switch (t) { case RAY_F64: { double v; memcpy(&v, (const char*)base + (size_t)row*8, 8); - if (v == 0.0) v = 0.0; /* normalize -0.0 → +0.0 to match hash */ + v = clear_neg_zero(v); int64_t bits; memcpy(&bits, &v, 8); return bits; } case RAY_I64: case RAY_TIMESTAMP: diff --git a/src/ops/idxop.c b/src/ops/idxop.c index 3f74476b..2fd7e3fe 100644 --- a/src/ops/idxop.c +++ b/src/ops/idxop.c @@ -22,6 +22,7 @@ */ #include "idxop.h" +#include "ops/internal.h" #include "mem/heap.h" #include "mem/cow.h" #include "vec/vec.h" @@ -53,7 +54,7 @@ static uint64_t numeric_key_word(const uint8_t* base, int8_t type, int64_t i) { double v; if (es == 4) { float t; memcpy(&t, base + i*4, 4); v = (double)t; } else { memcpy(&v, base + i*8, 8); } - if (v == 0.0) v = 0.0; /* canonicalise -0.0 -> +0.0 */ + v = clear_neg_zero(v); if (v != v) { /* NaN: per-row bucket via row hash */ return (uint64_t)i * 0x9E3779B97F4A7C15ULL; } diff --git a/src/ops/internal.h b/src/ops/internal.h index 23975955..0794d6aa 100644 --- a/src/ops/internal.h +++ b/src/ops/internal.h @@ -1143,4 +1143,13 @@ static inline void par_finalize_nulls(ray_t* vec) { } } +/* Canonicalise IEEE 754 -0.0 → +0.0 via bit-level check. + * Immune to -fno-signed-zeros (which makes `if (f==0) f=0` a no-op). + * Used at output / hash-key boundaries only — not in hot SIMD loops. */ +static inline double clear_neg_zero(double v) { + uint64_t bits; memcpy(&bits, &v, 8); + if (bits == UINT64_C(0x8000000000000000)) v = 0.0; + return v; +} + #endif /* RAY_EXEC_INTERNAL_H */ diff --git a/src/ops/pivot.c b/src/ops/pivot.c index 573a54aa..560aa7e8 100644 --- a/src/ops/pivot.c +++ b/src/ops/pivot.c @@ -662,7 +662,7 @@ ray_t* exec_pivot(ray_graph_t* g, ray_op_t* op, ray_t* tbl) { if (pt == RAY_F64) { double fv; memcpy(&fv, &pval, 8); - if (fv == 0.0 && signbit(fv)) fv = 0.0; + fv = clear_neg_zero(fv); len = snprintf(buf, sizeof(buf), "%g", fv); } else if (pt == RAY_BOOL) { len = snprintf(buf, sizeof(buf), "%s", pval ? "true" : "false"); diff --git a/src/ops/query.c b/src/ops/query.c index 451d4baf..389a44e0 100644 --- a/src/ops/query.c +++ b/src/ops/query.c @@ -3928,7 +3928,7 @@ static void cdpg_buf_par_fn(void* vctx, uint32_t worker_id, int64_t r = idxs[i]; double fv = d[r]; if (has_nulls && fv != fv) continue; - if (fv == 0.0) fv = 0.0; + fv = clear_neg_zero(fv); int64_t vbits = 0; memcpy(&vbits, &fv, sizeof(int64_t)); CDPG_BUF_INSERT(vbits); diff --git a/src/ops/tblop.c b/src/ops/tblop.c index bde886a4..4c409d75 100644 --- a/src/ops/tblop.c +++ b/src/ops/tblop.c @@ -490,7 +490,7 @@ ray_t* ray_pivot_fn(ray_t** args, int64_t n) { char buf[64]; int len = snprintf(buf, sizeof(buf), "%ld", (long)pval->i64); col_sym = ray_sym_intern(buf, (size_t)len); } else if (pval->type == -RAY_F64) { - double fv = pval->f64; if (fv == 0.0 && signbit(fv)) fv = 0.0; + double fv = clear_neg_zero(pval->f64); char buf[64]; int len = snprintf(buf, sizeof(buf), "%g", fv); col_sym = ray_sym_intern(buf, (size_t)len); } else if (pval->type == -RAY_BOOL) { diff --git a/test/rfl/agg/avg.rfl b/test/rfl/agg/avg.rfl index a652cad1..0917d770 100644 --- a/test/rfl/agg/avg.rfl +++ b/test/rfl/agg/avg.rfl @@ -11,3 +11,20 @@ ;; constant vector: avg == that constant (set K 42) (avg (take (enlist K) 50)) -- 42.0 + +;; ─── edge cases: single-element / negatives / empty list ───────────── +(avg [42]) -- 42.0 +(avg [-5]) -- -5.0 +(avg [-10 -20 -30]) -- -20.0 +(avg [0 0 0 0]) -- 0.0 +;; empty list → domain error (line 314 in ray_avg_fn) +(avg (list)) !- domain +;; single-element list +(avg (list 7)) -- 7.0 +;; list with all nulls → typed null F64 +(nil? (avg (list 0Ni 0Ni 0Ni))) -- true +;; F64 atom passes through +(avg 3.14) -- 3.14 +(avg -2.5) -- -2.5 +;; null atom → typed null F64 +(nil? (avg 0Nf)) -- true diff --git a/test/rfl/agg/count.rfl b/test/rfl/agg/count.rfl index a2e8eca8..2d3d16d3 100644 --- a/test/rfl/agg/count.rfl +++ b/test/rfl/agg/count.rfl @@ -12,3 +12,25 @@ ;; til n has count n (count (til 123)) -- 123 + +;; ─── edge cases: empty / single / atom / heterogeneous list ────────── +;; empty string -> count 0 +(count "") -- 0 +;; single-char string -> count 1 +(count "x") -- 1 +;; empty list (heterogeneous) -> 0 +(count (list)) -- 0 +;; list with elements -> ray_len +(count (list 1 "two" 'three)) -- 3 +;; F64 atom -> 1 +(count 3.14) -- 1 +;; SYM atom -> 1 +(count 'foo) -- 1 +;; null atom -> 1 +(count 0Ni) -- 1 +;; dict (atom_eq treats dict as collection) +(count (dict ['a 'b 'c] [1 2 3])) -- 3 +;; table count == nrow +(count (table [a b] (list [1 2 3 4 5] [10 20 30 40 50]))) -- 5 +;; empty table +(count (table [a b] (list (as 'I64 []) (as 'I64 [])))) -- 0 diff --git a/test/rfl/agg/list_med_var.rfl b/test/rfl/agg/list_med_var.rfl index 4a3e5f6e..3eddd460 100644 --- a/test/rfl/agg/list_med_var.rfl +++ b/test/rfl/agg/list_med_var.rfl @@ -42,6 +42,10 @@ ;; ─── med type error on non-list non-vec (line 519) ─────────────────── (med 'some_sym) !- type +;; Atom path hits 491; dict / table hit the trailing 519 guard +;; (not atom, not vec, not list, no parted dispatch). +(med (dict ["a" "b"] [1 2])) !- type +(med (table [k] (list [1 2 3]))) !- type ;; ─── var/stddev on list (var_stddev_core list branch: lines 593-607) ── ;; Basic: list [1 2 3 4 5], pop_var = 2.0, sample_var = 2.5 @@ -72,6 +76,12 @@ ;; ─── var type error on non-list non-vec (line 606) ─────────────────── (var_pop 'sym_input) !- type (dev 'sym_input) !- type +;; Dict / table fall through the atom / vec / list dispatch and land +;; on the trailing 606 guard. +(var (dict ["a" "b"] [1 2])) !- type +(var_pop (table [k] (list [1 2 3]))) !- type +(stddev (dict ["x"] [1])) !- type +(dev (table [k] (list [1 2]))) !- type ;; ─── vec_to_f64_scratch type error path (lines 475-476) ───────────── ;; SYM vec is not numeric → error from vec_to_f64_scratch. diff --git a/test/rfl/agg/max.rfl b/test/rfl/agg/max.rfl index cc33a64f..ec2f4ff9 100644 --- a/test/rfl/agg/max.rfl +++ b/test/rfl/agg/max.rfl @@ -15,3 +15,17 @@ ;; max >= min 1 -- (as 'I64 (>= (max V) (min V))) + +;; ─── edge cases: single-element / all-equal / negatives / atoms ────── +(max [42]) -- 42 +(max [7 7 7 7]) -- 7 +(max [-1 -5 -10 0]) -- 0 +(max [-5.5 -1.2 -10.3 -0.5]) -- -0.5 +(max [3.14 1.41 2.71]) -- 3.14 +;; max over a list with null skips the null +(max (list 5 0Ni 3)) -- 5 +;; list of all-nulls → typed null +(nil? (max (list 0Ni 0Ni))) -- true +;; atom pass-through +(max 99) -- 99 +(max -7) -- -7 diff --git a/test/rfl/agg/med.rfl b/test/rfl/agg/med.rfl index fddee434..37cd3a17 100644 --- a/test/rfl/agg/med.rfl +++ b/test/rfl/agg/med.rfl @@ -9,3 +9,26 @@ (set V (rand 101 10000)) 1 -- (as 'I64 (<= (as 'F64 (min V)) (med V))) 1 -- (as 'I64 (<= (med V) (as 'F64 (max V)))) + +;; ─── edge cases: empty / single / all-equal / two-element / atom ───── +;; empty vec → typed-null F64 (line 499 in ray_med_fn) +(nil? (med (as 'I64 (list)))) -- true +(nil? (med (as 'F64 (list)))) -- true +;; single-element vec → that element as F64 +(med [42]) -- 42.0 +(med [3.14]) -- 3.14 +;; two-element → mean +(med [10 30]) -- 20.0 +(med [-5 5]) -- 0.0 +;; all-equal → that value +(med [7 7 7 7]) -- 7.0 +(med [5.5 5.5 5.5]) -- 5.5 +;; negative numbers +(med [-10 -20 -30]) -- -20.0 +;; F64 atom passes through +(med 42.0) -- 42.0 +(med -2.5) -- -2.5 +;; numeric atom → coerced +(med 7) -- 7.0 +;; null atom → typed-null F64 +(nil? (med 0Nf)) -- true diff --git a/test/rfl/agg/min.rfl b/test/rfl/agg/min.rfl index 46150208..f77aa49d 100644 --- a/test/rfl/agg/min.rfl +++ b/test/rfl/agg/min.rfl @@ -12,3 +12,18 @@ (set A (rand 50 10000)) (set B (rand 50 10000)) (min (concat A B)) -- (min (list (min A) (min B))) + +;; ─── edge cases: single-element / all-equal / negatives / atoms ────── +(min [42]) -- 42 +(min [7 7 7 7]) -- 7 +(min [-1 -5 -10 0]) -- -10 +(min [-5.5 -1.2 -10.3 0.0]) -- -10.3 +;; min on F64 vec +(min [3.14 1.41 2.71]) -- 1.41 +;; min over a list with null skips the null +(min (list 5 0Ni 3)) -- 3 +;; list of all-nulls → typed null +(nil? (min (list 0Ni 0Ni))) -- true +;; atom pass-through +(min 99) -- 99 +(min -7) -- -7 diff --git a/test/rfl/agg/stddev.rfl b/test/rfl/agg/stddev.rfl index 90202ff4..73f90dd1 100644 --- a/test/rfl/agg/stddev.rfl +++ b/test/rfl/agg/stddev.rfl @@ -5,3 +5,8 @@ ;; non-negative (set V (rand 100 1000)) 1 -- (as 'I64 (>= (stddev V) 0.0)) + +;; ---- RAY_IS_ERR propagation: stddev(erroring arg) ---- +(stddev (* "bad" 5)) !- type +(stddev (- 'a 1)) !- type +(stddev (sqrt 'foo)) !- type diff --git a/test/rfl/agg/sum.rfl b/test/rfl/agg/sum.rfl index 01e4ba7c..4ca5a67e 100644 --- a/test/rfl/agg/sum.rfl +++ b/test/rfl/agg/sum.rfl @@ -24,3 +24,46 @@ (sum (list 1 0Ni 3 0Ni 5)) -- 9 (sum (list 0Ni 0Ni 0Ni)) -- 0 (sum (list)) -- 0 + +;; ---- RAY_IS_ERR propagation: sum(erroring arg) ---- +(sum (* 'bad 1)) !- type +(sum (+ "a" 1)) !- type +(sum (sqrt 'foo)) !- type +(sum (round "x")) !- type + +;; ─── edge cases: single / empty / all-equal / negatives / atoms ────── +;; single-element vectors of every numeric type +(sum [42]) -- 42 +(sum (as 'I32 [42])) -- 42 +(sum (as 'I16 [42])) -- 42 +(sum (as 'U8 [42])) -- 42 +(sum [42.5]) -- 42.5 +;; empty vec → 0 of that type +(sum (as 'I64 (list))) -- 0 +(sum (as 'F64 (list))) -- 0.0 +(sum (as 'I32 (list))) -- 0 +;; all-equal +(sum [5 5 5 5 5]) -- 25 +;; negatives +(sum [-1 -2 -3 -4 -5]) -- -15 +;; sum of mixed-sign cancels out +(sum [-3 -1 1 3]) -- 0 +;; U8 atom scalar → I64 (line 223 in ray_sum_fn) +(sum (as 'U8 42)) -- 42 +(type (sum (as 'U8 42))) -- 'i64 +;; I16 atom scalar → I64 (line 224) +(sum (as 'I16 -5)) -- -5 +(type (sum (as 'I16 -5))) -- 'i64 +;; I64 atom passes through +(sum 99) -- 99 +;; F64 atom passes through +(sum 3.14) -- 3.14 +;; DATE vec → type error (line 228) +(sum (as 'DATE [7300 7301])) !- type + +;; ---- error propagation through agg inside select (exec_group path) ---- +(set _erp_T (table [v] (list (as 'I64 [10 20 30])))) +(select {s: (sum (round "x")) from: _erp_T}) !- type +(select {s: (avg (sqrt "x")) from: _erp_T}) !- type +(select {s: (min (+ "a" 1)) from: _erp_T}) !- type +(select {s: (max (+ "a" 1)) from: _erp_T}) !- type diff --git a/test/rfl/arith/abs.rfl b/test/rfl/arith/abs.rfl index a3875fbe..f151fc00 100644 --- a/test/rfl/arith/abs.rfl +++ b/test/rfl/arith/abs.rfl @@ -79,3 +79,7 @@ (nil? (at (abs Va) 0)) -- true (at (abs Va) 1) -- 5 (at (abs Va) 2) -- 5 + +;; type errors — non-numeric input +(abs "abc") !- type +(abs 'foo) !- type diff --git a/test/rfl/arith/add.rfl b/test/rfl/arith/add.rfl index 56df4e63..94c7f8ec 100644 --- a/test/rfl/arith/add.rfl +++ b/test/rfl/arith/add.rfl @@ -156,3 +156,57 @@ (+ 1) !- arity (+ 1 2 3) !- arity (+ 1 2 3 4 5) !- arity + +;; ---- RAY_IS_ERR propagation: + with erroring arg on either side ---- +(+ (sqrt 'foo) 1) !- type +(+ 1 (sqrt 'foo)) !- type +(+ (round "x") 1) !- type +(+ (- 'a 1) 1) !- type + +;; ────────────────────────────────────────────────────────────────── +;; Mixed-width type-pair coverage (arith.c type-dispatch arms) +;; ────────────────────────────────────────────────────────────────── + +;; I16 vec + I32 vec → I32 +(+ (as 'I16 [1 2 3]) (as 'I32 [10 20 30])) -- [11 22 33] +(type (+ (as 'I16 [1 2 3]) (as 'I32 [10 20 30]))) -- 'I32 + +;; I32 vec + F64 vec → F64 +(+ (as 'I32 [1 2 3]) (as 'F64 [0.5 1.5 2.5])) -- [1.5 3.5 5.5] +(type (+ (as 'I32 [1 2 3]) (as 'F64 [0.5 1.5 2.5]))) -- 'F64 + +;; I16 atom + F64 atom → F64 +(+ (as 'I16 5) 2.5) -- 7.5 +(type (+ (as 'I16 5) 2.5)) -- 'f64 + +;; U8 vec + I64 vec +(+ (as 'U8 [1 2 3]) (as 'I64 [10 20 30])) -- [11 22 33] + +;; BOOL atom + I64 atom +(+ (as 'BOOL true) (as 'I64 5)) -- 6 + +;; Numeric + temporal: cross-arms +(+ (as 'I32 5) (as 'DATE 7305)) -- 2020.01.06 +(+ (as 'I16 100) (as 'TIME 1000)) -- 00:00:01.100 +(+ (as 'I32 500) (as 'TIMESTAMP 1000)) -- 2000.01.01D00:00:00.000001500 + +;; Numeric NULL + temporal returns typed-null on right operand +(nil? (+ 0Ni (as 'TIME 1000))) -- true +(type (+ 0Ni (as 'TIME 1000))) -- 'time +(nil? (+ 0Ni (as 'TIMESTAMP 1000))) -- true +(type (+ 0Ni (as 'TIMESTAMP 1000))) -- 'timestamp + +;; DATE + TIME → TIMESTAMP (both directions) +(type (+ (as 'DATE 7305) (as 'TIME 0))) -- 'timestamp +(type (+ (as 'TIME 0) (as 'DATE 7305))) -- 'timestamp + +;; TIME + TIME → TIME +(+ (as 'TIME 1000) (as 'TIME 2000)) -- 00:00:03.000 + +;; TIME + TIMESTAMP → TIMESTAMP (and reverse) +(type (+ (as 'TIME 1000) (as 'TIMESTAMP 2000))) -- 'timestamp +(type (+ (as 'TIMESTAMP 2000) (as 'TIME 1000))) -- 'timestamp + +;; Reject float + temporal +(+ 0Nf (as 'DATE 7305)) !- type +(+ (as 'TIMESTAMP 1000) 0Nf) !- type diff --git a/test/rfl/arith/div_float.rfl b/test/rfl/arith/div_float.rfl index d3edd835..efc7ca2f 100644 --- a/test/rfl/arith/div_float.rfl +++ b/test/rfl/arith/div_float.rfl @@ -17,3 +17,8 @@ (nil? (div 0.0 0.0)) -- true (type (div 7.0 2.0)) -- 'i64 (div [7 10 -7] 2) -- [3 5 -4] + +;; idiv overflow: floor(a/b) exceeds INT64_MAX → typed-null +;; (arith.c:219-220) — 1e200 / 1.0 = 1e200, way past INT64_MAX≈9.2e18 +(nil? (div 1e200 1.0)) -- true +(nil? (div -1e200 1.0)) -- true diff --git a/test/rfl/arith/exp.rfl b/test/rfl/arith/exp.rfl index 585d504e..1ab55d5b 100644 --- a/test/rfl/arith/exp.rfl +++ b/test/rfl/arith/exp.rfl @@ -15,3 +15,7 @@ (set X (div (- (rand 128 400) 200) 100)) (set Y (div (- (rand 128 400) 200) 100)) (count X) -- (sum (< (abs (- (exp (+ X Y)) (* (exp X) (exp Y)))) 0.001)) + +;; type errors — non-numeric input +(exp "abc") !- type +(exp 'foo) !- type diff --git a/test/rfl/arith/log.rfl b/test/rfl/arith/log.rfl index 7b9cbb3d..f3d0cdff 100644 --- a/test/rfl/arith/log.rfl +++ b/test/rfl/arith/log.rfl @@ -11,3 +11,7 @@ (set A (div (+ (rand 128 10000) 1) 100)) (set B2 (div (+ (rand 128 10000) 1) 100)) (count A) -- (sum (< (abs (- (log (* A B2)) (+ (log A) (log B2)))) 0.001)) + +;; type errors — non-numeric input +(log "abc") !- type +(log 'foo) !- type diff --git a/test/rfl/arith/mod.rfl b/test/rfl/arith/mod.rfl index c27e1282..d4c93a4a 100644 --- a/test/rfl/arith/mod.rfl +++ b/test/rfl/arith/mod.rfl @@ -41,3 +41,44 @@ (nil? (% 0Ni 5)) -- true (% 10 0) -- 0Nl + +;; ────────────────────────────────────────────────────────────────── +;; Temporal % numeric and narrow-width result-type coverage +;; ────────────────────────────────────────────────────────────────── + +;; TIME % int → TIME +(type (% (as 'TIME 100000) 1000)) -- 'time +(type (% (as 'TIME 100000) 0)) -- 'time +(nil? (% (as 'TIME 100000) 0)) -- true + +;; DATE % int → DATE +(type (% (as 'DATE 7305) 30)) -- 'date +(nil? (% (as 'DATE 7305) 0)) -- true + +;; TIMESTAMP % int → TIMESTAMP +(type (% (as 'TIMESTAMP 1000000) 30)) -- 'timestamp + +;; TIME % float divisor: f64-coerced path +(type (% (as 'TIME 100000) 1000.0)) -- 'time + +;; U8 modulo with U8 divisor → U8 +(type (% (as 'U8 100) (as 'U8 7))) -- 'u8 +(% (as 'U8 100) (as 'U8 7)) -- 0x02 + +;; U8 mod by 0 → 0 +(% (as 'U8 100) (as 'U8 0)) -- 0x00 + +;; Integer modulo: result type follows RIGHT operand width +(type (% 100 (as 'I32 7))) -- 'i32 +(type (% 100 (as 'I16 7))) -- 'i16 +(% 100 (as 'I32 7)) -- 2 + +;; Integer modulo by zero — typed-null on right +(nil? (% 100 (as 'I32 0))) -- true +(type (% 100 (as 'I32 0))) -- 'i32 +(nil? (% 100 (as 'I16 0))) -- true +(type (% 100 (as 'I16 0))) -- 'i16 + +;; U8-left, non-U8 right (mod follows b's type per line 261-263 fallthrough) +(type (% (as 'U8 100) (as 'I32 7))) -- 'i32 +(type (% (as 'U8 100) (as 'I16 7))) -- 'i16 diff --git a/test/rfl/arith/mul.rfl b/test/rfl/arith/mul.rfl index c19cdeaa..976e0c43 100644 --- a/test/rfl/arith/mul.rfl +++ b/test/rfl/arith/mul.rfl @@ -80,3 +80,41 @@ ;; ── Arity — binary verb (pre-fix: extras silently dropped) ── (* 2 3 4) !- arity (* 2 3 4 5) !- arity + +;; ---- RAY_IS_ERR propagation: * with erroring arg on either side ---- +(* (sqrt 'foo) 5) !- type +(* 5 (sqrt 'foo)) !- type +(* (round "x") 5) !- type +(* (- 'a 1) 5) !- type + +;; ────────────────────────────────────────────────────────────────── +;; Mixed-width / temporal type-pair coverage +;; ────────────────────────────────────────────────────────────────── + +;; I16 * I32 → I32 +(* (as 'I16 3) (as 'I32 4)) -- 12 +(type (* (as 'I16 3) (as 'I32 4))) -- 'i32 + +;; I32 * F64 → F64 +(* (as 'I32 3) 2.5) -- 7.5 +(type (* (as 'I32 3) 2.5)) -- 'f64 + +;; U8 * I64 → I64 +(* (as 'U8 3) (as 'I64 4)) -- 12 + +;; BOOL * I64 → I64 +(* (as 'BOOL true) (as 'I64 5)) -- 5 + +;; int * TIME → TIME and TIME * int → TIME +(* 3 (as 'TIME 1000)) -- 00:00:03.000 +(type (* 3 (as 'TIME 1000))) -- 'time +(* (as 'TIME 1000) 3) -- 00:00:03.000 +(type (* (as 'TIME 1000) 3)) -- 'time + +;; null-int * TIME → null TIME (line 174 path) +(nil? (* 0Ni (as 'TIME 1000))) -- true +(type (* 0Ni (as 'TIME 1000))) -- 'time + +;; TIME * null-int → null TIME +(nil? (* (as 'TIME 1000) 0Ni)) -- true +(type (* (as 'TIME 1000) 0Ni)) -- 'time diff --git a/test/rfl/arith/neg.rfl b/test/rfl/arith/neg.rfl index f86cb4a7..3b61b594 100644 --- a/test/rfl/arith/neg.rfl +++ b/test/rfl/arith/neg.rfl @@ -106,3 +106,7 @@ ;; goes through atomic_map_unary → ray_neg_fn per element. (nil? (at (neg Vn) 0)) -- true (at (neg Vn) 1) -- 5 + +;; type errors — non-numeric input +(neg "abc") !- type +(neg 'foo) !- type diff --git a/test/rfl/arith/round.rfl b/test/rfl/arith/round.rfl index 84c31679..6e0d689a 100644 --- a/test/rfl/arith/round.rfl +++ b/test/rfl/arith/round.rfl @@ -27,3 +27,7 @@ (set X 1.7) (>= (round X) (floor X)) -- true (<= (round X) (ceil X)) -- true + +;; type errors — non-numeric input +(round "abc") !- type +(round 'foo) !- type diff --git a/test/rfl/arith/sqrt.rfl b/test/rfl/arith/sqrt.rfl index 0b003a40..f6f3e541 100644 --- a/test/rfl/arith/sqrt.rfl +++ b/test/rfl/arith/sqrt.rfl @@ -24,3 +24,12 @@ ;; (vector version: sort A, verify sqrt preserves order) (set S (asc A)) (count S) -- (sum (== (sqrt S) (asc (sqrt A)))) + +;; type errors — non-numeric input +(sqrt "abc") !- type +(sqrt 'foo) !- type + +;; ---- RAY_IS_ERR propagation: sqrt(erroring arg) ---- +(sqrt (+ "a" 1)) !- type +(sqrt (- 'a 1)) !- type +(sqrt (round "x")) !- type diff --git a/test/rfl/arith/sub.rfl b/test/rfl/arith/sub.rfl index 56bae9a4..3fae52fb 100644 --- a/test/rfl/arith/sub.rfl +++ b/test/rfl/arith/sub.rfl @@ -89,3 +89,64 @@ ;; ── Arity — binary verb (pre-fix: extras silently dropped) ── (- 10 1 2) !- arity (- 10 1 2 3) !- arity + +;; ---- RAY_IS_ERR propagation: - with erroring arg on either side ---- +(- (sqrt 'foo) 1) !- type +(- 1 (sqrt 'foo)) !- type +(- (round "x") 1) !- type +(- (+ "a" 1) 1) !- type + +;; ────────────────────────────────────────────────────────────────── +;; Mixed-width / temporal type-pair coverage +;; ────────────────────────────────────────────────────────────────── + +;; I16 - I32 → I32 (and reverse) +(- (as 'I16 10) (as 'I32 3)) -- 7 +(type (- (as 'I16 10) (as 'I32 3))) -- 'i32 + +;; I32 atom - F64 atom → F64 +(- (as 'I32 10) 2.5) -- 7.5 +(type (- (as 'I32 10) 2.5)) -- 'f64 + +;; U8 - I64 +(- (as 'U8 10) (as 'I64 3)) -- 7 + +;; DATE - I32 atom → DATE +(- (as 'DATE 7305) (as 'I32 30)) -- 2019.12.02 +;; DATE - I16 +(- (as 'DATE 7305) (as 'I16 5)) -- 2019.12.27 + +;; DATE - DATE → I32 (days) +(- (as 'DATE 7310) (as 'DATE 7305)) -- 5 +(type (- (as 'DATE 7310) (as 'DATE 7305))) -- 'i32 + +;; DATE - TIME → TIMESTAMP +(type (- (as 'DATE 7305) (as 'TIME 0))) -- 'timestamp + +;; TIME - integer → TIME +(- (as 'TIME 5000) 2000) -- 00:00:03.000 +;; int - TIME → TIME (negative) +(type (- 1000 (as 'TIME 500))) -- 'time + +;; TIME - TIME → TIME +(- (as 'TIME 5000) (as 'TIME 2000)) -- 00:00:03.000 + +;; TIMESTAMP - int → TIMESTAMP +(type (- (as 'TIMESTAMP 5000) 1000)) -- 'timestamp + +;; TIMESTAMP - TIME → TIMESTAMP +(type (- (as 'TIMESTAMP 5000) (as 'TIME 0))) -- 'timestamp + +;; TIMESTAMP - TIMESTAMP → i64 nanos +(- (as 'TIMESTAMP 5000) (as 'TIMESTAMP 2000)) -- 3000 +(type (- (as 'TIMESTAMP 5000) (as 'TIMESTAMP 2000))) -- 'i64 + +;; TIMESTAMP - DATE → type error +(- (as 'TIMESTAMP 1000) (as 'DATE 7305)) !- type + +;; Null propagation across temporal-numeric arms +(nil? (- (as 'DATE 7305) 0Ni)) -- true +(nil? (- 0Ni (as 'DATE 7305))) -- true +(type (- 0Ni (as 'DATE 7305))) -- 'date +(nil? (- (as 'TIME 5000) 0Ni)) -- true +(type (- (as 'TIME 5000) 0Ni)) -- 'time diff --git a/test/rfl/cmp/eq.rfl b/test/rfl/cmp/eq.rfl index b527d964..b32d9848 100644 --- a/test/rfl/cmp/eq.rfl +++ b/test/rfl/cmp/eq.rfl @@ -55,3 +55,32 @@ (set V (til 1024)) (sum (== V V)) -- 1024 (sum (== V 0)) -- 1 + +;; ────────────────────────────────────────────────────────────────── +;; Mixed-width / sym / guid / temporal-cross +;; ────────────────────────────────────────────────────────────────── + +;; sym vs different sym (interned-id equal-id path) +(== 'foo 'foo) -- true +(== 'foo 'bar) -- false + +;; bool vs bool — both directions +(== (as 'BOOL true) (as 'BOOL true)) -- true +(== (as 'BOOL true) (as 'BOOL false)) -- false + +;; GUID vs GUID (memcmp16 path) +(set Gs (take (guid 2) 2)) +(== (at Gs 0) (at Gs 0)) -- true +(== (at Gs 0) (at Gs 1)) -- false + +;; Cross-temporal equality (temporal_as_ns conversion) +(== (as 'DATE 7305) (as 'DATE 7305)) -- true +(== (as 'DATE 7305) (as 'DATE 7306)) -- false +(== (as 'TIME 1000) (as 'TIME 1000)) -- true +(== (as 'TIMESTAMP 0) (as 'TIMESTAMP 0)) -- true + +;; Mixed-width numeric: I16 vs I32, I32 vs F64, U8 vs I64 +(== (as 'I16 5) (as 'I32 5)) -- true +(== (as 'I32 5) (as 'F64 5.0)) -- true +(== (as 'U8 5) (as 'I64 5)) -- true +(== (as 'BOOL true) (as 'I64 1)) -- true diff --git a/test/rfl/cmp/ge.rfl b/test/rfl/cmp/ge.rfl index 580a9b7a..ab1efcee 100644 --- a/test/rfl/cmp/ge.rfl +++ b/test/rfl/cmp/ge.rfl @@ -41,3 +41,24 @@ (>= 'b 'a) -- true (>= 'a 'b) -- false (>= ['a 'b 'c] 'b) -- [false true true] + +;; type errors — cross-type compare not numeric/temporal/sym/str +(>= 'foo 1) !- type +(>= 1 'foo) !- type + +;; ────────────────────────────────────────────────────────────────── +;; GUID lex >= (memcmp16 path) +;; ────────────────────────────────────────────────────────────────── +(set Gs (take (guid 2) 2)) +(>= (at Gs 0) (at Gs 0)) -- true + +;; Cross-temporal >= +(>= (as 'DATE 7305) (as 'DATE 7305)) -- true +(>= (as 'DATE 7306) (as 'DATE 7305)) -- true +(>= (as 'TIME 1000) (as 'TIME 1000)) -- true +(>= (as 'TIMESTAMP 100) (as 'TIMESTAMP 100)) -- true + +;; Mixed-width numeric >= +(>= (as 'I16 100) (as 'I32 100)) -- true +(>= (as 'F64 5.0) (as 'I64 5)) -- true +(>= (as 'U8 5) (as 'I32 5)) -- true diff --git a/test/rfl/cmp/gt.rfl b/test/rfl/cmp/gt.rfl index 92551fc2..5bb1e5f7 100644 --- a/test/rfl/cmp/gt.rfl +++ b/test/rfl/cmp/gt.rfl @@ -59,3 +59,30 @@ ;; 's' > 'l' so 'short > 'longer_… (> 'short 'longer_than_twelve_bytes_symbol) -- true (> 'longer_than_twelve_bytes_symbol 'short) -- false + +;; type errors — cross-type compare not numeric/temporal/sym/str +(> 'foo 1) !- type +(> 1 'foo) !- type + +;; ---- RAY_IS_ERR propagation: > with erroring arg on either side ---- +(> (sqrt 'foo) 5) !- type +(> 5 (sqrt 'foo)) !- type +(> (round "x") 5) !- type + +;; ────────────────────────────────────────────────────────────────── +;; GUID lex ordering (memcmp16 path) +;; ────────────────────────────────────────────────────────────────── +(set Gs (take (guid 2) 2)) +(> (at Gs 0) (at Gs 0)) -- false +(set R (or (> (at Gs 0) (at Gs 1)) (> (at Gs 1) (at Gs 0)))) +R -- true + +;; Cross-temporal ordering +(> (as 'DATE 7306) (as 'DATE 7305)) -- true +(> (as 'TIME 2000) (as 'TIME 1000)) -- true +(> (as 'TIMESTAMP 200) (as 'TIMESTAMP 100)) -- true + +;; Mixed-width +(> (as 'I32 200) (as 'I16 100)) -- true +(> (as 'F64 200.0) (as 'I32 100)) -- true +(> (as 'I64 200) (as 'U8 5)) -- true diff --git a/test/rfl/cmp/le.rfl b/test/rfl/cmp/le.rfl index 964ca8f7..81ded9ce 100644 --- a/test/rfl/cmp/le.rfl +++ b/test/rfl/cmp/le.rfl @@ -34,3 +34,24 @@ (<= 'a 'b) -- true (<= 'b 'a) -- false (<= ['a 'b 'c] 'b) -- [true true false] + +;; type errors — cross-type compare not numeric/temporal/sym/str +(<= 'foo 1) !- type +(<= 1 'foo) !- type + +;; ────────────────────────────────────────────────────────────────── +;; GUID lex <= (memcmp16 path) +;; ────────────────────────────────────────────────────────────────── +(set Gs (take (guid 2) 2)) +(<= (at Gs 0) (at Gs 0)) -- true + +;; Cross-temporal <= +(<= (as 'DATE 7305) (as 'DATE 7306)) -- true +(<= (as 'DATE 7305) (as 'DATE 7305)) -- true +(<= (as 'TIME 1000) (as 'TIME 2000)) -- true +(<= (as 'TIMESTAMP 100) (as 'TIMESTAMP 200)) -- true + +;; Mixed-width numeric <= +(<= (as 'I16 100) (as 'I32 200)) -- true +(<= (as 'F64 100.0) (as 'I64 200)) -- true +(<= (as 'U8 5) (as 'I32 100)) -- true diff --git a/test/rfl/cmp/lt.rfl b/test/rfl/cmp/lt.rfl index 254124c3..84978be6 100644 --- a/test/rfl/cmp/lt.rfl +++ b/test/rfl/cmp/lt.rfl @@ -43,3 +43,37 @@ (< 'foobar 'foo) -- false (< ['a 'b 'c] 'b) -- [true false false] (< ['a 'b] ['b 'a]) -- [true false] + +;; type errors — cross-type compare not numeric/temporal/sym/str +(< 'foo 1) !- type +(< 1 'foo) !- type + +;; ---- RAY_IS_ERR propagation: < with erroring arg on either side ---- +(< (sqrt 'foo) 5) !- type +(< 5 (sqrt 'foo)) !- type +(< (- 'a 1) 5) !- type + +;; ────────────────────────────────────────────────────────────────── +;; GUID lex ordering (memcmp16 path) +;; ────────────────────────────────────────────────────────────────── +(set Gs (take (guid 2) 2)) +;; reflexivity for guid: (< g g) = false +(< (at Gs 0) (at Gs 0)) -- false +;; either ( x 1) (raise 77) x)) (list 0 1 2)) (fn [e] e)) -- 77 + +;; Binary map error propagation (lines 388-391) +(try (map (fn [a b] (if (> b 1) (raise 88) (+ a b))) 0 (list 0 1 2)) (fn [e] e)) -- 88 + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 2: pmap — alias for map (line 399-401) +;; ══════════════════════════════════════════════════════════════════════ +(pmap (fn [x] (+ x 1)) (list 1 2 3)) -- (list 2 3 4) +(pmap + 10 (list 1 2 3)) -- (list 11 12 13) + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 3: ray_fold_fn error paths (lines 404-449) +;; ══════════════════════════════════════════════════════════════════════ + +;; n < 2 → domain error (line 405) +(try (fold +) (fn [e] "err")) -- "err" + +;; 2-arg fold on empty list → domain error (line 419) +(try (fold + (list)) (fn [e] "err")) -- "err" + +;; 2-arg fold on non-list → type error (line 417) +(try (fold + 42) (fn [e] "err")) -- "err" + +;; 3-arg fold on non-list → type error (line 438) +(try (fold + 0 42) (fn [e] "err")) -- "err" + +;; 3-arg fold with error propagation (line 444) +(try (fold (fn [a b] (if (> b 3) (raise 99) (+ a b))) 0 (list 1 2 3 4)) (fn [e] e)) -- 99 + +;; 2-arg fold error propagation (line 426) +(try (fold (fn [a b] (if (> b 2) (raise 44) (+ a b))) (list 1 2 3)) (fn [e] e)) -- 44 + +;; fold with lazy arg materialization (line 407) +(fold + 0 (reverse (list 1 2 3))) -- 6 + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 4: ray_scan_fn error paths (lines 452-491) +;; ══════════════════════════════════════════════════════════════════════ + +;; n < 2 → domain error (line 453) +(try (scan +) (fn [e] "err")) -- "err" + +;; scan on non-list → type error (line 461) +(try (scan + 42) (fn [e] "err")) -- "err" + +;; scan on empty list → empty (lines 463-470) +(count (scan + (list))) -- 0 + +;; scan with lazy arg (line 455) +(scan + (reverse (list 1 2 3))) -- (list 3 5 6) + +;; scan error propagation (lines 483-486) +(try (scan (fn [a b] (if (> b 2) (raise 55) (+ a b))) (list 1 2 3)) (fn [e] e)) -- 55 + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 5: ray_filter_fn paths (lines 494-604) +;; ══════════════════════════════════════════════════════════════════════ + +;; Table filter (lines 499-515) — already in cov2; add error propagation +;; Table filter column error propagation (line 509) +;; filter with wrong-length mask on table with multiple cols +(try (filter (table [a b] (list [1 2 3] [4 5 6])) [true false]) (fn [e] "err")) -- "err" + +;; STR filter (lines 518-533) — already covered; add size=0 result +(filter "abc" [false false false]) -- "" + +;; Typed vec filter with null bitmap (lines 557-559) +(nil? (at (filter (concat [0Nl 1 2] []) [true false true]) 0)) -- true +(at (filter (concat [0Nl 1 2] []) [true false true]) 1) -- 2 + +;; Boxed list filter where mask is not all bools → type error (line 581) +(try (filter (list 1 2 3) (list 1 0 1)) (fn [e] "err")) -- "err" + +;; Boxed list filter — happy path (lines 594-600) +(filter (list 10 20 30) (list true false true)) -- (list 10 30) + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 6: ray_apply_fn paths (lines 607-647) +;; ══════════════════════════════════════════════════════════════════════ + +;; n < 3 → domain error (line 608) +(try (apply + 1) (fn [e] "err")) -- "err" + +;; Both args scalar → call fn once (lines 615-616) +(apply + 3 4) -- 7 + +;; Both args non-list → type error (line 623) +(try (apply + (list 1 2) 42) (fn [e] "err")) -- "err" + +;; Normal apply (zip two lists) +(apply + (list 1 2 3) (list 10 20 30)) -- (list 11 22 33) + +;; Apply with lazy args (line 610) +(apply + (reverse (list 1 2 3)) (list 10 20 30)) -- (list 13 22 31) + +;; Apply error propagation (lines 638-641) +(try (apply (fn [a b] (if (> a 1) (raise 66) (+ a b))) (list 0 1 2) (list 10 20 30)) (fn [e] e)) -- 66 + +;; Apply with mismatched lengths → truncates to shorter +(apply + (list 1 2) (list 10 20 30)) -- (list 11 22) + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 7: atom_eq edge cases (lines 654-711) +;; ══════════════════════════════════════════════════════════════════════ + +;; Cross-type numeric: i64 vs f64 → compare as f64 (line 661) +(in 1 (list 1.0)) -- true +(in 2.0 (list 2)) -- true +(in 1h (list 1.0)) -- true + +;; Different non-numeric types → not equal (line 662) +(in 'a (list 1)) -- false + +;; atom_eq timestamp (lines 674-675) +(count (distinct (list 2024.01.01D10:00:00.000000000 2024.01.01D10:00:00.000000000))) -- 1 + +;; atom_eq GUID (lines 676-679) +(set _g1 (guid 1)) +(in (first _g1) (list (first _g1))) -- true + +;; atom_eq LIST structural comparison (lines 684-701) +;; Equal lists at different addresses +(count (distinct (list (list 1 2) (list 1 2)))) -- 1 +;; Different length lists → not equal +(count (distinct (list [1 2] [1 2 3]))) -- 2 + +;; atom_eq default: typed vecs as atoms (lines 703-710) +(count (distinct (list [1 2] [3 4] [1 2]))) -- 2 +;; Different type vecs → not equal +(count (distinct (list [1 2] [1.0 2.0]))) -- 2 + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 8: hs_hash_row — typed vec branches used in hashset +;; Exercise each type switch branch for typed-vec hashset ops. +;; ══════════════════════════════════════════════════════════════════════ + +;; I16 distinct (line 75 hash, line 267 sort) +(count (distinct [1h 2h 1h 3h 2h])) -- 3 +;; U8/BOOL distinct (line 77 hash, line 272 sort) +(count (distinct [0x01 0x02 0x01])) -- 2 +(count (distinct [true false true])) -- 2 +;; DATE distinct (line 79 hash, line 262 sort) +(count (distinct [2025.01.01 2025.01.02 2025.01.01 2025.01.03])) -- 3 +;; TIME distinct (line 80 hash, line 262 sort) +(count (distinct [10:00:00.000 11:00:00.000 10:00:00.000 12:00:00.000])) -- 3 +;; TIMESTAMP distinct (line 81 hash, line 257 sort) +(count (distinct [2024.01.01D00:00:01.000000000 2024.01.02D00:00:01.000000000 2024.01.01D00:00:01.000000000])) -- 2 +;; SYM distinct (line 82-84 hash, skips sort at line 740) +(count (distinct ['a 'b 'c 'a 'b])) -- 3 +;; GUID distinct (line 86-87 hash, skips sort at line 740) +(set _g2 (guid 3)) +(count (distinct (concat _g2 _g2))) -- 3 +;; STR distinct (line 88-91 hash, skips sort at line 740) +(count (distinct ["aa" "bb" "aa" "cc"])) -- 3 + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 9: hs_eq_rows — typed vec equality in hashset collisions +;; Force hash collisions by having many items of the same type +;; Exercise each type in hs_eq_rows (lines 126-166) +;; ══════════════════════════════════════════════════════════════════════ + +;; I32 equality (line 129): distinct with >8 unique I32s triggers grow+collisions +(count (distinct (as 'I32 (til 20)))) -- 20 +;; I16 equality (line 130) +(count (distinct (as 'I16 (til 20)))) -- 20 +;; U8 equality (line 131) +(count (distinct (as 'U8 (til 20)))) -- 20 +;; F64 equality (line 133) +(count (distinct (as 'F64 (til 20)))) -- 20 +;; DATE equality (lines 134-135) +(count (union [2025.01.01 2025.01.02 2025.01.03] [2025.01.02 2025.01.03 2025.01.04])) -- 4 +;; TIME equality (lines 134-135) +(count (union [10:00:00.000 11:00:00.000] [11:00:00.000 12:00:00.000])) -- 3 +;; TIMESTAMP equality (line 136) +(count (union [2024.01.01D00:00:01.000000000 2024.01.02D00:00:01.000000000] [2024.01.02D00:00:01.000000000 2024.01.03D00:00:01.000000000])) -- 3 +;; SYM equality (lines 137-140) +(count (union ['a 'b 'c] ['b 'c 'd])) -- 4 +;; GUID equality (lines 142-144) +(set _g3 (guid 3)) +(count (union _g3 _g3)) -- 3 +;; STR equality (lines 145-151) +(count (union ["aa" "bb"] ["bb" "cc"])) -- 3 + +;; Cross-type comparison via atom_eq fallback (lines 159-165) +;; except where vec1 is I64 and vec2 is F64 → cross-type hs_eq_rows +;; NOTE: hash values differ across types (I64 vs F64), so the hashset +;; probe may miss. The typed-vec path uses type-specific hashing, so +;; cross-type except returns all of vec1 (no matches found). +(count (except [1 2 3 4] (as 'F64 [2 3]))) -- 4 + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 10: hashset_grow — trigger by exceeding load factor (lines 202-226) +;; hashset_init with hint=8 → cap=16. Insert 8 unique → grow at 8th. +;; ══════════════════════════════════════════════════════════════════════ + +;; in with 8-element RHS: hashset built on 8 items, cap=16 → grow at count=8 +(in 1 [1 2 3 4 5 6 7 8]) -- true +(in 9 [1 2 3 4 5 6 7 8]) -- false + +;; except with 9 unique items: cap=32 (hint=9 → cap=32 since 18>16) +;; Actually hint=9 → cap target=18 → next power of 2 = 32. 9 < 16, no grow. +;; Need hint such that items > cap/2. With hint=4, cap=16, items=4: no grow. +;; The key is distinct_vec_eager where hint=len: distinct(til 9) → hint=9, cap=32, 9<16: no grow. +;; distinct(til 64) → hint=64, cap=128, items=64, 64*2=128>=128 → GROW at item 64 +(count (distinct (til 64))) -- 64 + +;; Force grow in union: vec1.len determines hashset init. +;; union [1..8] with [9..16]: hs built on vec1 (8 items), cap=16 +;; After inserting 8 items: count*2=16>=16 → grow on 8th insert +(count (union [1 2 3 4 5 6 7 8] [5 6 7 8 9 10 11 12])) -- 12 + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 11: distinct_sort_cmp — each type branch (lines 253-293) +;; ══════════════════════════════════════════════════════════════════════ + +;; I64/TIMESTAMP (line 257): already well-covered by (distinct (til N)) +(count (distinct [3 1 2])) -- 3 + +;; I32 (line 262): distinct on I32 vec with multiple unique values +(count (distinct [3i 1i 2i 1i 3i])) -- 3 +;; Verify sorted output +(at (distinct [3i 1i 2i]) 0) -- 1i + +;; DATE (line 262): distinct on date vec with sorted output +(count (distinct [2025.01.03 2025.01.01 2025.01.02 2025.01.01])) -- 3 +(at (distinct [2025.01.03 2025.01.01 2025.01.02]) 0) -- 2025.01.01 + +;; TIME (line 262): distinct on time vec with sorted output +(count (distinct [12:00:00.000 10:00:00.000 11:00:00.000 10:00:00.000])) -- 3 +(at (distinct [12:00:00.000 10:00:00.000 11:00:00.000]) 0) -- 10:00:00.000 + +;; I16 (line 267): distinct on I16 vec +(count (distinct [3h 1h 2h 1h 3h])) -- 3 +(at (distinct [3h 1h 2h]) 0) -- 1h + +;; U8/BOOL (line 272): distinct on U8 vec +(count (distinct [0x03 0x01 0x02 0x01])) -- 3 + +;; F64 (line 277): distinct on F64 vec +(count (distinct [3.0 1.0 2.0 1.0])) -- 3 +(at (distinct [3.0 1.0 2.0]) 0) -- 1.0 + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 12: hashset_insert null paths (lines 308-329) +;; ══════════════════════════════════════════════════════════════════════ + +;; typed vec with nulls — null_seen path (lines 309-313) +;; distinct on vec with nulls: first null inserts, second null is duplicate +(count (distinct [1 0Nl 2 0Nl 3])) -- 4 + +;; except with null-bearing vec (lines 309-313 via hashset_find_xrow) +(count (except (concat [0Nl 1 2 3] []) (concat [0Nl 2] []))) -- 2 + +;; sect with null-bearing vecs +(count (sect (concat [0Nl 1 2] []) (concat [0Nl 2 3] []))) -- 2 + +;; in with null-bearing RHS +(sum (as 'I64 (in [1 2 3] (concat [0Nl 1 3] [])))) -- 2 + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 13: ray_in_fn edge cases (lines 878-1039) +;; ══════════════════════════════════════════════════════════════════════ + +;; STR in STR (lines 882-898) +(in "abc" "abcd") -- [true true true] +(in "xyz" "abc") -- [false false false] + +;; Scalar in scalar (lines 900-901) +(in 3 3) -- true +(in 3 4) -- false + +;; STR in LIST (lines 903-921) +(in "ab" (list "a" "b" "c")) -- [true true] +(in "xy" (list "x" "z")) -- [true false] + +;; Empty collection val (line 925-927) +(in (as 'I64 (list)) [1 2 3]) -- (list) + +;; Typed vec val vs typed vec: hashset path (lines 933-950) +(in [1 2 3] [2 3 4]) -- [false true true] + +;; Non-scalar results from in (lines 978-997): STR vec in STR atom +;; Each char-check returns a bool vec → results are non-scalar → LIST collect +(in (list "ab" "cd") "abcd") -- (list [true true] [true true]) + +;; val_null in vec with nulls → true (line 1007) +(in 0Nl (concat [0Nl 1 2] [])) -- true +;; val_null in vec without nulls → false (line 1017) +(in 0Nl [1 2 3]) -- false + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 14: ray_except_fn typed vec + scalar (lines 1079-1087) +;; ══════════════════════════════════════════════════════════════════════ + +;; Typed vec except scalar atom (lines 1079-1087) +(except [1 2 3 4 5] 3) -- [1 2 4 5] +(except [1.0 2.0 3.0] 2.0) -- [1.0 3.0] +(except ['a 'b 'c] 'b) -- ['a 'c] + +;; Boxed list except scalar (lines 1125-1128) +(except (list 1 2 3) 2) -- (list 1 3) +(except (list 'a 'b 'c) 'b) -- (list 'a 'c) + +;; Boxed list except list (lines 1130-1138) +(except (list 1 2 3 4) (list 2 4)) -- (list 1 3) + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 15: ray_take_fn edge cases (lines 1268-1589) +;; ══════════════════════════════════════════════════════════════════════ + +;; Take float → type error (line 1274-1275) +(try (take [1 2 3] 1.5) (fn [e] "err")) -- "err" + +;; Range take negative amount → length error (line 1281) +(try (take [1 2 3] [0 -1]) (fn [e] "err")) -- "err" + +;; Table range take (lines 1284-1298) +(set _tt (table [a b] (list [1 2 3 4 5] [10 20 30 40 50]))) +(count (take _tt [1 2])) -- 2 +(at (at (take _tt [1 2]) 'a) 0) -- 2 + +;; String range take (lines 1301-1309) +(take "hello" [1 3]) -- "ell" +(take "hello" [-2 2]) -- "lo" +(take "hello" [10 3]) -- "" + +;; Typed vec range take with negative start (lines 1315-1316) +(take [10 20 30 40 50] [-3 2]) -- [30 40] + +;; Typed vec range take start >= len → empty (line 1317-1324) +(count (take [1 2 3] [10 5])) -- 0 + +;; Dict range take with LIST vals (lines 1367-1378) +(set _dl (dict ['a 'b 'c 'd] (list 1 "two" 3.0 'four))) +(count (key (take _dl [1 2]))) -- 2 + +;; Dict range take with typed vals (lines 1379-1381) +(set _dt (dict ['a 'b 'c 'd] [1 2 3 4])) +(count (key (take _dt [1 2]))) -- 2 +(at (value (take _dt [1 2])) 0) -- 2 + +;; Boxed list range take (lines 1387-1411) +(take (list 10 20 30 40) [1 2]) -- (list 20 30) +(count (take (list 1 2 3) [10 5])) -- 0 + +;; Range take with negative start on list (line 1389) +(take (list 10 20 30 40) [-2 2]) -- (list 30 40) + +;; Scalar take (lines 1425-1434): repeat a scalar value n times +(take 42 3) -- [42 42 42] +(take 3.14 2) -- [3.14 3.14] +(take true 3) -- [true true true] + +;; Char take (lines 1416-1422): repeat a char n times +(take 'a' 3) -- "aaa" +(take 'z' 1) -- "z" + +;; String take (lines 1437-1457) +(take "abc" 5) -- "abcab" +(take "abc" -4) -- "cabc" +(take "" 0) -- "" +(take "" 3) -- "" + +;; Typed vec null bitmap propagation for positive take (lines 1540-1544) +(nil? (at (take (concat [0Nl 1 2] []) 6) 0)) -- true +(nil? (at (take (concat [0Nl 1 2] []) 6) 3)) -- true +(at (take (concat [0Nl 1 2] []) 6) 1) -- 1 + +;; Typed vec null bitmap propagation for negative take (lines 1545-1553) +(nil? (at (take (concat [0Nl 1 2] []) -6) 3)) -- true +(at (take (concat [0Nl 1 2] []) -6) 4) -- 1 + +;; Boxed list take with len=0 (line 1572-1573) +(count (take (list) 5)) -- 0 + +;; Boxed list negative take (lines 1579-1585) +(take (list 1 2 3 4) -2) -- (list 3 4) + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 16: ray_at_fn paths (lines 1592-1745) +;; ══════════════════════════════════════════════════════════════════════ + +;; Table column by symbol (lines 1595-1599) +(set _ta (table [x y] (list [1 2 3] [4 5 6]))) +(at _ta 'x) -- [1 2 3] +;; Missing column → domain error +(try (at _ta 'z) (fn [e] "err")) -- "err" + +;; Table row by integer (lines 1603-1628) +(set _row (at _ta 0)) +(at _row 'x) -- 1 + +;; Table row out of bounds → domain error (line 1608) +(try (at _ta 5) (fn [e] "err")) -- "err" +(try (at _ta -1) (fn [e] "err")) -- "err" + +;; Table row selection by I64 vec (lines 1634-1661) +(count (at _ta [0 2])) -- 2 +;; Out of bounds in index vec → domain error (line 1639-1640) +(try (at _ta [0 99]) (fn [e] "err")) -- "err" + +;; Dict key access (lines 1664-1668) +(set _da (dict ['a 'b 'c] [10 20 30])) +(at _da 'a) -- 10 +;; Missing key → typed null (line 1667) +(nil? (at _da 'z)) -- true + +;; String indexing single index (lines 1671-1692) +(at "hello" 0) -- "h" +(at "hello" 4) -- "o" +;; Out of bounds → domain error (line 1690) +(try (at "hello" -1) (fn [e] "err")) -- "err" +(try (at "hello" 5) (fn [e] "err")) -- "err" + +;; String multi-index (lines 1674-1687) +(at "hello" [0 4]) -- "ho" +;; Out of bounds in multi-index → domain error (line 1684) +(try (at "hello" [0 10]) (fn [e] "err")) -- "err" + +;; Vector index as collection (lines 1696-1722) +(at [10 20 30] [0 2]) -- (list 10 30) + +;; Non-integer idx → type error (lines 1724-1726) +(try (at [1 2 3] "bad") (fn [e] "err")) -- "err" + +;; Typed vec out of bounds → typed null (line 1732) +(nil? (at [1 2 3] 10)) -- true +(nil? (at [1 2 3] -1)) -- true + +;; Boxed list access (lines 1739-1744) +(at (list 10 20 30) 1) -- 20 +;; Out of bounds → typed null (line 1741) +(nil? (at (list 10 20 30) 99)) -- true + +;; Non-list, non-vec → type error (line 1739) +(try (at + 0) (fn [e] "err")) -- "err" + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 17: ray_find_fn paths (lines 1748-1827) +;; ══════════════════════════════════════════════════════════════════════ + +;; String find (lines 1752-1759) +(find "hello" 'l') -- 2 +;; Not found → typed null +(nil? (find "hello" 'z')) -- true + +;; Vector val find (lines 1762-1783) +(find [10 20 30] [20 30 40]) -- (list 1 2 0Nl) + +;; Empty vec find with vector val (lines 1764-1765) +(count (find (as 'I64 (list)) [1 2])) -- 0 + +;; Typed vec find with nulls — null val (lines 1790-1793) +(find (concat [0Nl 1 2] []) 0Nl) -- 0 +;; Typed vec find with nulls — non-null val (lines 1796-1801) +(find (concat [0Nl 1 2] []) 2) -- 2 +;; Typed vec find with nulls — not found (lines 1790-1813) +(nil? (find (concat [0Nl 1 2] []) 9)) -- true + +;; Typed vec find without nulls — null val: skip (line 1804) +(nil? (find [1 2 3] 0Nl)) -- true +;; Typed vec find without nulls — found (lines 1805-1811) +(find [10 20 30] 20) -- 1 +;; Typed vec find without nulls — not found +(nil? (find [10 20 30] 99)) -- true + +;; Boxed list find (lines 1816-1826) +(find (list 10 20 30) 20) -- 1 +(nil? (find (list 10 20 30) 99)) -- true + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 18: ray_reverse_fn paths (lines 1907-1944) +;; ══════════════════════════════════════════════════════════════════════ + +;; Atom reverse → identity (line 1913) +(reverse 42) -- 42 +(reverse "hello") -- "hello" + +;; Typed vec reverse (via lazy chain) (lines 1917-1923) +(reverse [1 2 3]) -- [3 2 1] +(reverse [1.0 2.0 3.0]) -- [3.0 2.0 1.0] + +;; Boxed list reverse (lines 1926-1943) +(reverse (list 1 2 3)) -- (list 3 2 1) +(reverse (list "a" "b" "c")) -- (list "c" "b" "a") + +;; Null/error guard (line 1908) +;; reverse of error should propagate +(reverse (+ "a" 1)) !- type + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 19: ray_rand_fn error paths (lines 1951-1969) +;; ══════════════════════════════════════════════════════════════════════ + +;; Non-atom args → type error (line 1952) +(try (rand [1 2] 10) (fn [e] "err")) -- "err" +(try (rand 10 [1 2]) (fn [e] "err")) -- "err" + +;; F64 args → type error (lines 1956, 1959) +(try (rand 1.0 10) (fn [e] "err")) -- "err" +(try (rand 10 1.0) (fn [e] "err")) -- "err" + +;; Negative n → domain error (line 1960) +(try (rand -1 10) (fn [e] "err")) -- "err" + +;; mx <= 0 → domain error (line 1961) +(try (rand 5 0) (fn [e] "err")) -- "err" +(try (rand 5 -3) (fn [e] "err")) -- "err" + +;; n == 0 → empty vec (line 1962) +(count (rand 0 100)) -- 0 + +;; I32 args (lines 1955, 1958) +(count (rand 5i 100i)) -- 5 + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 20: ray_bin_fn / ray_binr_fn (lines 1972-2047) +;; ══════════════════════════════════════════════════════════════════════ + +;; bin sorted not I64 → type error (line 1974) +(try (bin [1.0 2.0 3.0] 2) (fn [e] "err")) -- "err" + +;; bin scalar (lines 1978-1987) +(bin [1 3 5 7 9] 4) -- 1 +(bin [1 3 5 7 9] 0) -- -1 +(bin [1 3 5 7 9] 9) -- 4 +(bin [1 3 5 7 9] 4i) -- 1 + +;; bin vec (lines 1988-2006) +(bin [1 3 5 7 9] [0 4 10]) -- [-1 1 4] + +;; bin val wrong type → type error (line 2007) +(try (bin [1 2 3] 1.5) (fn [e] "err")) -- "err" + +;; binr scalar (lines 2017-2026) +(binr [1 3 5 7 9] 4) -- 2 +(binr [1 3 5 7 9] 1) -- 0 +(binr [1 3 5 7 9] 10) -- 4 + +;; binr vec (lines 2027-2045) +(binr [1 3 5 7 9] [0 4 10]) -- [0 2 4] + +;; binr val wrong type → type error (line 2046) +(try (binr [1 2 3] 1.5) (fn [e] "err")) -- "err" + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 21: map-left / map-right (lines 2055-2125) +;; ══════════════════════════════════════════════════════════════════════ + +;; map-left wrong arity → domain error (line 2096) +(try (map-left + 1) (fn [e] "err")) -- "err" + +;; map-left normal (line 2107) +(map-left + 10 (list 1 2 3)) -- (list 11 12 13) +(map-left + 10 [1 2 3]) -- (list 11 12 13) + +;; map-left auto-swap: vec is scalar, fixed is vector (lines 2102-2104) +(map-left + 5 (list 1 2 3)) -- (list 6 7 8) + +;; map-right wrong arity → domain error (line 2113) +(try (map-right + 1) (fn [e] "err")) -- "err" + +;; map-right normal (line 2124) +(map-right + [1 2 3] 10) -- (list 11 12 13) + +;; map-right auto-swap: vec is scalar, fixed is vector (lines 2119-2121) +(map-right + 5 [1 2 3]) -- (list 6 7 8) + +;; map-iterate on scalar (non-vec, non-list) → call once (lines 2057-2062) +(map-left + 10 5) -- 15 +(map-right + 5 10) -- 15 + +;; map-iterate error propagation (lines 2080-2085) +(try (map-left (fn [a b] (if (> b 1) (raise 11) (+ a b))) 0 (list 0 1 2)) (fn [e] e)) -- 11 + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 22: fold-right (lines 2138-2183) +;; ══════════════════════════════════════════════════════════════════════ + +;; n < 2 → domain error (line 2139) +(try (fold-right +) (fn [e] "err")) -- "err" + +;; 2-arg form: uses last element as init (lines 2148-2165) +(fold-right + (list 1 2 3 4)) -- 10 +(fold-right + (list 42)) -- 42 + +;; 2-arg on empty list → domain error (line 2154) +(try (fold-right + (list)) (fn [e] "err")) -- "err" + +;; 2-arg on non-list → type error (line 2152) +(try (fold-right + 42) (fn [e] "err")) -- "err" + +;; 3-arg form (lines 2168-2183) +(fold-right - 0 (list 1 2 3)) -- 2 +(fold-right + 10 (list 1 2 3)) -- 16 + +;; 3-arg on non-list → type error (line 2173) +(try (fold-right + 0 42) (fn [e] "err")) -- "err" + +;; Error propagation in 2-arg form (line 2161) +(try (fold-right (fn [a b] (raise 22)) (list 1 2 3)) (fn [e] e)) -- 22 + +;; Error propagation in 3-arg form (line 2179) +(try (fold-right (fn [a b] (raise 33)) 0 (list 1 2)) (fn [e] e)) -- 33 + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 23: scan-right (lines 2192-2231) +;; ══════════════════════════════════════════════════════════════════════ + +;; n < 2 → domain error (line 2193) +(try (scan-right +) (fn [e] "err")) -- "err" + +;; Non-list → type error (line 2201) +(try (scan-right + 42) (fn [e] "err")) -- "err" + +;; Empty list → empty (lines 2203-2210) +(count (scan-right + (list))) -- 0 + +;; Normal scan-right (lines 2219-2228) +(scan-right + (list 1 2 3 4)) -- (list 10 9 7 4) + +;; Error propagation (lines 2223-2226) +(try (scan-right (fn [a b] (raise 44)) (list 1 2 3)) (fn [e] e)) -- 44 + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 24: distinct — string path (lines 810-826) +;; ══════════════════════════════════════════════════════════════════════ + +;; Empty string → identity (lines 813-814) +(distinct "") -- "" + +;; Non-empty string → unique chars sorted (lines 815-825) +(distinct "banana") -- "abn" +(distinct "aaabbb") -- "ab" + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 25: distinct — boxed list path (lines 838-874) +;; ══════════════════════════════════════════════════════════════════════ + +;; Empty boxed list (line 843) +(count (distinct (list))) -- 0 + +;; Boxed list with duplicates (lines 852-860) +(count (distinct (list 1 2 1 3))) -- 3 + +;; Boxed list sorting: atoms before vectors (lines 864-872) +(count (distinct (list [1 2] 3 [1 2] 4))) -- 3 +(at (distinct (list [1 2] 3 4)) 0) -- 3 + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 26: list_to_typed_vec — SYM/STR empty conversion (lines 1042-1062) +;; ══════════════════════════════════════════════════════════════════════ + +;; Empty SYM result from except → typed SYM vec (lines 1046-1049) +(type (except ['a 'b] ['a 'b 'c])) -- 'SYM +(count (except ['a 'b] ['a 'b 'c])) -- 0 + +;; Empty STR result from except → typed STR vec (lines 1046-1049) +(type (except ["aa" "bb"] ["aa" "bb" "cc"])) -- 'STR +(count (except ["aa" "bb"] ["aa" "bb" "cc"])) -- 0 + +;; Non-empty SYM/STR → stays as boxed list (line 1051) +(count (except ['a 'b 'c] ['b])) -- 2 +(count (except ["aa" "bb" "cc"] ["bb"])) -- 2 + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 27: reverse_vec_eager — non-STR typed vecs with nulls (lines 1892-1898) +;; ══════════════════════════════════════════════════════════════════════ + +;; I64 vec with nulls reversed +(set _rn (reverse (concat [0Nl 1 2] []))) +(nil? (at _rn 2)) -- true +(at _rn 0) -- 2 +(at _rn 1) -- 1 + +;; I32 vec with nulls reversed +(set _r32 (reverse (as 'I32 (concat [0Nl 10 20] [])))) +(nil? (at _r32 2)) -- true +(at _r32 0) -- 20i + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 28: parted distinct paths (lines 749-786, 801-807) +;; ══════════════════════════════════════════════════════════════════════ + +;; Parted I64 distinct (non-STR path in parted_to_flat_vec lines 773-779) +(.sys.exec "rm -rf /tmp/rf_bcov_parted") -- 0 +(.sys.exec "rm -f /tmp/rf_bcov_parted.csv") -- 0 +(.sys.exec "printf 'val\n1\n2\n3\n1\n2\n' > /tmp/rf_bcov_parted.csv") -- 0 +(set _prt (.csv.parted [I64] "/tmp/rf_bcov_parted.csv" "/tmp/rf_bcov_parted" 'bcov)) +(count (distinct (at _prt 'val))) -- 3 +(.sys.exec "rm -rf /tmp/rf_bcov_parted") -- 0 +(.sys.exec "rm -f /tmp/rf_bcov_parted.csv") -- 0 + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 29: Null-bearing STR vec operations +;; Build via CSV with empty cells to get RAY_ATTR_HAS_NULLS on STR vec +;; ══════════════════════════════════════════════════════════════════════ + +(.sys.exec "rm -f /tmp/rf_bcov_strnull.csv") -- 0 +(.sys.exec "printf 'word\nhello\n\nworld\nhello\n' > /tmp/rf_bcov_strnull.csv") -- 0 +(set _tsn (.csv.read [STR] "/tmp/rf_bcov_strnull.csv")) +(set _scn (at _tsn 'word)) + +;; distinct on STR vec with nulls +(count (distinct _scn)) -- 3 + +;; reverse on STR vec with nulls (lines 1860-1871) +(set _rsn (reverse _scn)) +(count _rsn) -- 4 +(nil? (at _rsn 2)) -- true +(at _rsn 0) -- "hello" +(at _rsn 3) -- "hello" + +;; filter on STR vec (typed vec path with null bitmap, lines 554-562) +(set _fsn (filter _scn [true true false true])) +(nil? (at _fsn 1)) -- true +(at _fsn 0) -- "hello" +(at _fsn 2) -- "hello" + +(.sys.exec "rm -f /tmp/rf_bcov_strnull.csv") -- 0 + +;; ══════════════════════════════════════════════════════════════════════ +;; Section 30: hs_hash_row RAY_LIST branches — exercised via boxed-list +;; hashset ops. NOTE: The hashset is only initialized with typed vecs +;; in the current code, so the RAY_LIST branches in hs_hash_row +;; (lines 93-117) and hs_row_is_null (lines 170-172) are UNREACHABLE +;; from the current call graph. They exist for potential future callers. +;; +;; Similarly, hs_eq_rows lines 149-150 (STR null-ptr guards) are +;; DEFENSIVE: null STR entries are caught by hs_row_is_null before +;; reaching the STR equality comparison. +;; +;; hashset_init OOM (line 182), hashset_grow overflow (line 206), +;; hashset_grow OOM (line 208), and hashset_insert grow-fail +;; fallthrough (line 316 false branch) are all OOM-only paths. +;; +;; distinct_sort_cmp default (lines 283-291) is unreachable because +;; the only caller (distinct_vec_eager) excludes SYM/GUID/STR from +;; sorting (line 740), and all other types have explicit switch cases. +;; (Note: F32 type can reach it via CSV [F32] hint, tested in cov5.rfl.) +;; ══════════════════════════════════════════════════════════════════════ diff --git a/test/rfl/collection/distinct.rfl b/test/rfl/collection/distinct.rfl index c2ae1f4c..cf8ba3b1 100644 --- a/test/rfl/collection/distinct.rfl +++ b/test/rfl/collection/distinct.rfl @@ -198,3 +198,52 @@ (sum (as 'I64 (== (at rk5s 'g) (asc gs)))) -- 100 ;; desc also works end to end (count (xdesc rk5 ['g 'p])) -- 100 + +;; OP_DISTINCT vector-input guard: a lazy chain whose previous op is +;; an aggregator (FIRST) yields a scalar at materialisation; OP_DISTINCT +;; must reject the non-vec input rather than dispatching to +;; distinct_vec_eager. Covers exec.c L1049. +(distinct (first [1 2 3 4])) !- type +(distinct (last [1 2 3 4])) !- type + +;; ---- RAY_IS_ERR propagation: distinct(erroring arg) ---- +;; The outer op must propagate the inner error via RAY_IS_ERR early-return +;; rather than dispatching to a vec/atom path. +(distinct (+ "abc" 1)) !- type +(distinct (- 'a 1)) !- type +(distinct (sqrt 'foo)) !- type + +;; ─── edge cases: empty / single / all-equal / mixed-type list ──────── +;; distinct_vec_eager early-return when len==0 (collection.c:721) +(distinct (as 'I64 (list))) -- (as 'I64 (list)) +(distinct (as 'F64 (list))) -- (as 'F64 (list)) +(distinct (as 'I32 (list))) -- (as 'I32 (list)) +(distinct (as 'I16 (list))) -- (as 'I16 (list)) +(distinct (as 'U8 (list))) -- (as 'U8 (list)) +;; single-element vec → returns same single-element vec +(distinct [42]) -- [42] +(distinct [3.14]) -- [3.14] +(distinct (as 'I32 [99])) -- (as 'I32 [99]) +(distinct (as 'U8 [7])) -- (as 'U8 [7]) +;; all-equal vec → one element (across narrow ints, floats, syms) +(distinct [5 5 5 5 5]) -- [5] +(distinct (as 'F64 [3.14 3.14 3.14])) -- [3.14] +(distinct (as 'I32 [99 99 99])) -- (as 'I32 [99]) +(distinct (as 'I16 [9 9 9])) -- (as 'I16 [9]) +(distinct ['a 'a 'a]) -- ['a] +;; distinct of mixed-type LIST — hits the RAY_LIST hs_hash_row arm at +;; src/ops/collection.c lines 93-117 (numeric coercion + sym/date/time/timestamp/guid branches) +;; numeric coercion: I64 1 and F64 1.0 hash as same value (atom_eq coerces both → 1.0) +;; so distinct values are {1, 'a, "b"} → count 3 +(count (distinct (list 1 1.0 'a "b" 1))) -- 3 +(count (distinct (list 'foo 'foo 'bar))) -- 2 +;; distinct of list with date / time atoms (-RAY_DATE/-RAY_TIME hash arms) +(count (distinct (list 2024.01.01 2024.01.01 2024.01.02))) -- 2 +(count (distinct (list 10:00:00.000 10:00:00.000 11:00:00.000))) -- 2 +;; distinct of list with timestamps (-RAY_TIMESTAMP hash arm) +(count (distinct (list 2024.01.01D00:00:01.000000000 2024.01.01D00:00:01.000000000 2024.01.01D00:00:02.000000000))) -- 2 +;; distinct of list with mixed strings (-RAY_STR hash arm) +(count (distinct (list "foo" "bar" "foo"))) -- 2 +(distinct (* "bad" 5)) !- type +(distinct (round "x")) !- type +(distinct (at (sqrt 'foo) 0)) !- type diff --git a/test/rfl/collection/reverse.rfl b/test/rfl/collection/reverse.rfl index 99f3d5f7..52669e1c 100644 --- a/test/rfl/collection/reverse.rfl +++ b/test/rfl/collection/reverse.rfl @@ -34,3 +34,16 @@ (reverse [42]) -- [42] (reverse ['x]) -- ['x] (reverse ["one"]) -- ["one"] + +;; OP_REVERSE vector-input guard: a lazy chain whose previous op is +;; an aggregator (FIRST) yields a scalar at materialisation; OP_REVERSE +;; must reject the non-vec input rather than dispatching to +;; reverse_vec_eager. Covers exec.c L1085. +(reverse (first [1 2 3 4])) !- type +(reverse (last [1 2 3 4])) !- type + +;; ---- RAY_IS_ERR propagation: reverse(erroring arg) ---- +(reverse (+ "a" 1)) !- type +(reverse (sqrt 'foo)) !- type +(reverse (round "x")) !- type +(reverse (- 'a 1)) !- type diff --git a/test/rfl/collection/take.rfl b/test/rfl/collection/take.rfl index 0e5ce1f3..1728553c 100644 --- a/test/rfl/collection/take.rfl +++ b/test/rfl/collection/take.rfl @@ -140,3 +140,31 @@ (take (dict ['a 'b 'c 'd] [1 2 3 4]) [1 2]) -- (dict ['b 'c] [2 3]) (take (table [a b] (list [1 2 3 4] ['x 'y 'z 'w])) [1 2]) -- (table [a b] (list [2 3] ['y 'z])) (take [1 2 3] [1 -2]) !- length + +;; ---- RAY_IS_ERR propagation: take(erroring arg) ---- +(take (sqrt 'foo) 2) !- type +(take 2 (sqrt 'foo)) !- type +(take (round "x") 1) !- type +(take (+ "a" 1) 1) !- type + +;; ─── edge cases: take exactly the count / take 0 / negative-from-empty ── +;; take 0 on a longer vec → empty same-type vec +(count (take [10 20 30 40 50] 0)) -- 0 +(type (take [10 20 30 40 50] 0)) -- 'I64 +;; take = count → identity +(take [1 2 3] 3) -- [1 2 3] +;; take negative = count → identity (last N == all when N == count) +(take [1 2 3] -3) -- [1 2 3] +;; take 1 (single-element) from a longer vec +(take [10 20 30 40 50] 1) -- [10] +;; take -1 from longer vec → last element +(take [10 20 30 40 50] -1) -- [50] +;; take from empty: zero result is zero; positive result memset to zero (line 1500-1501) +(count (take (as 'I64 (list)) 0)) -- 0 +(count (take (as 'I64 (list)) 5)) -- 5 +;; zero-fill for take-from-empty +(sum (take (as 'I64 (list)) 5)) -- 0 +;; range take with zero amount → empty +(count (take [1 2 3 4 5] [2 0])) -- 0 +;; range take with start = count → empty +(count (take [1 2 3 4 5] [5 3])) -- 0 diff --git a/test/rfl/datalog/datalog_branch_cov.rfl b/test/rfl/datalog/datalog_branch_cov.rfl new file mode 100644 index 00000000..2ebaa167 --- /dev/null +++ b/test/rfl/datalog/datalog_branch_cov.rfl @@ -0,0 +1,393 @@ +;; datalog_branch_cov.rfl — branch coverage push for src/ops/datalog.c +;; Targets uncovered branches that the existing datalog_coverage.rfl / +;; rule.rfl / eav_ops.rfl tests don't reach. + +;; ════════════════════════ grouped aggregation: SUM / MIN / MAX / AVG ═════════════════════════ +;; The existing tests only cover grouped COUNT. These exercise the +;; grouped-agg SUM/MIN/MAX/AVG paths in dl_compile_rule (lines 1644-1696). +(set Db (datoms)) +(set Db (assert-fact Db 1 'dept 10)) +(set Db (assert-fact Db 1 'salary 50)) +(set Db (assert-fact Db 2 'dept 10)) +(set Db (assert-fact Db 2 'salary 100)) +(set Db (assert-fact Db 3 'dept 20)) +(set Db (assert-fact Db 3 'salary 200)) +(set Db (assert-fact Db 4 'dept 20)) +(set Db (assert-fact Db 4 'salary 300)) + +;; grouped SUM — two groups, one row each +(count (query Db (find ?d ?s) (where (dept-sum ?d ?s)) (rules ((emp ?d ?s) (?e :dept ?d) (?e :salary ?s)) ((dept-sum ?d ?s) (sum ?s emp 1 by ?d 0))))) -- 2 + +;; grouped MIN +(count (query Db (find ?d ?m) (where (dept-min ?d ?m)) (rules ((emp2 ?d ?s) (?e :dept ?d) (?e :salary ?s)) ((dept-min ?d ?m) (min ?m emp2 1 by ?d 0))))) -- 2 + +;; grouped MAX +(count (query Db (find ?d ?m) (where (dept-max ?d ?m)) (rules ((emp3 ?d ?s) (?e :dept ?d) (?e :salary ?s)) ((dept-max ?d ?m) (max ?m emp3 1 by ?d 0))))) -- 2 + +;; grouped AVG +(count (query Db (find ?d ?a) (where (dept-avg ?d ?a)) (rules ((emp4 ?d ?s) (?e :dept ?d) (?e :salary ?s)) ((dept-avg ?d ?a) (avg ?a emp4 1 by ?d 0))))) -- 2 + +;; ════════════════════════ scalar aggregates over f64 source columns ═════════════════════════ +;; The existing tests aggregate over I64 columns. These exercise the +;; RAY_F64 source-column paths (lines 1795-1819). +(set Fdb (datoms)) +(set f64src (table ['k 'v] (list [1 2 3] [1.5 2.5 3.5]))) + +;; scalar SUM over f64 +(count (query Fdb (find ?s) (where (fsum ?s)) (rules ((fsum ?s) (sum ?s f64src 1))))) -- 1 + +;; scalar MIN over f64 +(count (query Fdb (find ?m) (where (fmin ?m)) (rules ((fmin ?m) (min ?m f64src 1))))) -- 1 + +;; scalar MAX over f64 +(count (query Fdb (find ?m) (where (fmax ?m)) (rules ((fmax ?m) (max ?m f64src 1))))) -- 1 + +;; scalar AVG over f64 +(count (query Fdb (find ?a) (where (favg ?a)) (rules ((favg ?a) (avg ?a f64src 1))))) -- 1 + +;; ════════════════════════ empty-source AVG and MAX aggregates ═════════════════════════ +;; AVG and MAX over empty source — should produce 0 rows (lines 1725-1732). +(set Empty (datoms)) +(count (query Empty (find ?a) (where (emavg ?a)) (rules ((esrc ?e ?v) (?e :nope ?v)) ((emavg ?a) (avg ?a esrc 1))))) -- 0 +(count (query Empty (find ?m) (where (emmax ?m)) (rules ((esrc2 ?e ?v) (?e :nope ?v)) ((emmax ?m) (max ?m esrc2 1))))) -- 0 + +;; ════════════════════════ expression comparison with f64 operands ═════════════════════════ +;; Covers the use_f64 path in DL_CMP (lines 2016-2032). +(set Db (datoms)) +(set Db (assert-fact Db 1 'val 10)) +(set Db (assert-fact Db 2 'val 20)) +(set Db (assert-fact Db 3 'val 30)) + +;; f64 expression < : LHS = x + 0.5, RHS = x * 1.0. LHS > RHS always => 0. +(count (query Db (find ?e) (where (?e :val ?x) (< (+ ?x 0.5) (* ?x 1.0))))) -- 0 + +;; f64 expression > : all 3 pass +(count (query Db (find ?e) (where (?e :val ?x) (> (+ ?x 0.5) (* ?x 1.0))))) -- 3 + +;; f64 expression == +(count (query Db (find ?e) (where (?e :val ?x) (== (+ ?x 0.0) (* ?x 1.0))))) -- 3 + +;; f64 expression != +(count (query Db (find ?e) (where (?e :val ?x) (!= (+ ?x 0.0) (* ?x 1.0))))) -- 0 + +;; f64 expression >= +(count (query Db (find ?e) (where (?e :val ?x) (>= (+ ?x 0.5) (* ?x 1.0))))) -- 3 + +;; f64 expression <= +(count (query Db (find ?e) (where (?e :val ?x) (<= (+ ?x 0.5) (* ?x 1.0))))) -- 0 + +;; ════════════════════════ f64 comparison: var vs constant (rhs_src NULL) ═════════════════════════ +;; Exercises the f64 CMP path when rhs_src is NULL (const), line 2023-2024. +(count (query Db (find ?e ?y) (where (?e :val ?x) (= ?y (+ ?x 0.5)) (> ?y 20.0)))) -- 2 + +;; ════════════════════════ integer division by zero in expressions ═════════════════════════ +;; Exercises the rd[r] != 0 ? ... : 0 branch in i64 binop path (line 748). +(count (query Db (find ?e ?y) (where (?e :val ?x) (= ?y (/ ?x 0))))) -- 3 + +;; Exercises the rd[r] != 0.0 ? ... : 0.0 branch in f64 binop path (line 725). +(count (query Db (find ?e ?y) (where (?e :val ?x) (= ?y (/ ?x 0.0))))) -- 3 + +;; ════════════════════════ f64 subtraction, multiplication, division in expressions ═════════════════════════ +(count (query Db (find ?e ?y) (where (?e :val ?x) (= ?y (- ?x 0.5)) (> ?y 10.0)))) -- 2 +(count (query Db (find ?e ?y) (where (?e :val ?x) (= ?y (* ?x 2.0)) (>= ?y 40.0)))) -- 2 +(count (query Db (find ?e ?y) (where (?e :val ?x) (= ?y (/ ?x 2.0)) (> ?y 5.0)))) -- 2 + +;; ════════════════════════ negation with constant in negated body ═════════════════════════ +;; Exercises the constant-filter-on-negated-relation path (lines 1466-1483). +(set Db (datoms)) +(set Db (assert-fact Db 1 'tag 'a)) +(set Db (assert-fact Db 2 'tag 'b)) +(set Db (assert-fact Db 3 'tag 'a)) +;; Exclude entities whose tag is 'a — only entity 2 should remain. +(count (query Db (find ?e) (where (?e :tag ?t) (not (?e :tag 'a))))) -- 1 +;; Negated triple with non-matching constant — nobody is excluded. +(count (query Db (find ?e) (where (?e :tag ?t) (not (?e :tag 'zzz))))) -- 3 + +;; ════════════════════════ cross-product join (no shared variables) ═════════════════════════ +;; Two body atoms with disjoint variable sets: exercises the n_jk==0 +;; cross-product path in dl_compile_rule (lines 1355-1358). +(set Db (datoms)) +(set Db (assert-fact Db 1 'color 'red)) +(set Db (assert-fact Db 2 'shape 'circle)) +(set Db (assert-fact Db 3 'shape 'square)) +(count (query Db (find ?c ?s) (where (cross ?c ?s)) (rules ((cross ?c ?s) (?e1 :color ?c) (?e2 :shape ?s))))) -- 2 + +;; ════════════════════════ CMP with all-rows-pass (count == nrows) ═════════════════════════ +;; Exercises the early-exit at line 2053 (all rows pass the filter). +(set Db (datoms)) +(set Db (assert-fact Db 1 'val 10)) +(set Db (assert-fact Db 2 'val 20)) +(count (query Db (find ?e ?x) (where (?e :val ?x) (> ?x 0)))) -- 2 + +;; ════════════════════════ string literal in body position ═════════════════════════ +;; Exercises the RAY_STR branch in dl_set_body_pos (line 3582-3589). +;; "Alice" interned as sym matches the DATOM-tagged cell via tag-aware compare. +(set Db (datoms)) +(set Db (assert-fact Db 1 'name 'Alice)) +(count (query Db (find ?e) (where (?e :name "Alice")))) -- 1 +;; string that does NOT match any stored value +(count (query Db (find ?e) (where (?e :name "Nonexistent")))) -- 0 + +;; ════════════════════════ EQ and NE comparison with integer const ═════════════════════════ +(set Db (datoms)) +(set Db (assert-fact Db 1 'score 50)) +(set Db (assert-fact Db 2 'score 50)) +(set Db (assert-fact Db 3 'score 100)) +(count (query Db (find ?e ?s) (where (?e :score ?s) (== ?s 50)))) -- 2 +(count (query Db (find ?e ?s) (where (?e :score ?s) (!= ?s 50)))) -- 1 + +;; ════════════════════════ aggregate: env-backed table source ═════════════════════════ +;; Exercises the pred_arity==0 sentinel path in env auto-register (line 4145). +(set Db (datoms)) +(set agg_src (table ['k 'v] (list [1 2 3] [10 20 30]))) +(count (query Db (find ?s) (where (etotal ?s)) (rules ((etotal ?s) (sum ?s agg_src 1))))) -- 1 +(count (query Db (find ?n) (where (ecount ?n)) (rules ((ecount ?n) (count ?n agg_src))))) -- 1 + +;; ════════════════════════ multiple find variables from different body atoms ═════════════════════════ +(set Db (datoms)) +(set Db (assert-fact Db 1 'name 'Alice)) +(set Db (assert-fact Db 1 'age 30)) +(set Db (assert-fact Db 2 'name 'Bob)) +(set Db (assert-fact Db 2 'age 25)) +(count (query Db (find ?e ?n ?a) (where (?e :name ?n) (?e :age ?a)))) -- 2 + +;; ════════════════════════ wildcard in triple positions ═════════════════════════ +(set Db (datoms)) +(set Db (assert-fact Db 1 'x 10)) +(set Db (assert-fact Db 2 'y 20)) +(set Db (assert-fact Db 3 'z 30)) +(count (query Db (find ?e) (where (_ :x ?e)))) -- 1 + +;; ════════════════════════ aggregate: count with empty env-backed source ═════════════════════════ +(set Empty (datoms)) +(set empty_src (table ['a 'b] (list [] []))) +(count (query Empty (find ?n) (where (zero-count ?n)) (rules ((zero-count ?n) (count ?n empty_src))))) -- 1 + +;; ════════════════════════ query with inline rules — body references unknown predicate ═════════════════════════ +(set Db (datoms)) +(count (query Db (find ?x) (where (phantom ?x)) (rules ((phantom ?x) (nonexistent-rel ?x))))) -- 0 + +;; ════════════════════════ neg with no shared keys (n_keys==0) ═════════════════════════ +(set Db (datoms)) +(set Db (assert-fact Db 1 'tag 'a)) +(set Db (assert-fact Db 2 'tag 'b)) +(count (query Db (find ?e) (where (?e :tag ?t) (not (nope-rel ?z))) (rules ((nope-rel ?z) (?z :nope ?v))))) -- 2 + +;; ════════════════════════ assignment followed by comparison ═════════════════════════ +(set Db (datoms)) +(set Db (assert-fact Db 1 'x 5)) +(set Db (assert-fact Db 2 'x 15)) +(set Db (assert-fact Db 3 'x 25)) +(count (query Db (find ?e ?y) (where (?e :x ?x) (= ?y (* ?x 2)) (> ?y 20)))) -- 2 + +;; ════════════════════════ all six comparison ops with integer const ═════════════════════════ +(set Db (datoms)) +(set Db (assert-fact Db 1 'n 10)) +(set Db (assert-fact Db 2 'n 20)) +(set Db (assert-fact Db 3 'n 30)) +(count (query Db (find ?e) (where (?e :n ?v) (> ?v 15)))) -- 2 +(count (query Db (find ?e) (where (?e :n ?v) (>= ?v 20)))) -- 2 +(count (query Db (find ?e) (where (?e :n ?v) (< ?v 25)))) -- 2 +(count (query Db (find ?e) (where (?e :n ?v) (<= ?v 20)))) -- 2 +(count (query Db (find ?e) (where (?e :n ?v) (== ?v 20)))) -- 1 +(count (query Db (find ?e) (where (?e :n ?v) (!= ?v 20)))) -- 2 + +;; ════════════════════════ var-var comparison — all six ops ═════════════════════════ +(set Db (datoms)) +(set Db (assert-fact Db 1 'a 10)) +(set Db (assert-fact Db 1 'b 20)) +(set Db (assert-fact Db 2 'a 30)) +(set Db (assert-fact Db 2 'b 25)) +(count (query Db (find ?e) (where (?e :a ?x) (?e :b ?y) (< ?x ?y)))) -- 1 +(count (query Db (find ?e) (where (?e :a ?x) (?e :b ?y) (> ?x ?y)))) -- 1 +(count (query Db (find ?e) (where (?e :a ?x) (?e :b ?y) (>= ?x ?y)))) -- 1 +(count (query Db (find ?e) (where (?e :a ?x) (?e :b ?y) (<= ?x ?y)))) -- 1 +(count (query Db (find ?e) (where (?e :a ?x) (?e :b ?y) (== ?x ?y)))) -- 0 +(count (query Db (find ?e) (where (?e :a ?x) (?e :b ?y) (!= ?x ?y)))) -- 2 + +;; ════════════════════════ flipped const-op-var comparison (EQ / NE branches) ═════════════════════════ +(set Db (datoms)) +(set Db (assert-fact Db 1 'v 10)) +(set Db (assert-fact Db 2 'v 20)) +(count (query Db (find ?e ?x) (where (?e :v ?x) (== 10 ?x)))) -- 1 +(count (query Db (find ?e ?x) (where (?e :v ?x) (!= 10 ?x)))) -- 1 + +;; ════════════════════════ SYM column in aggregate (auto-converted to I64) ═════════════════════════ +;; SYM columns get converted to I64 by the env auto-register path, so this +;; computes the sum of sym IDs. Exercises the I64 aggregate path with +;; originally-SYM data. 1 result row. +(set Db (datoms)) +(set sym_vals (table ['k 'v] (list [1 2] ['x 'y]))) +(count (query Db (find ?s) (where (symsum ?s)) (rules ((symsum ?s) (sum ?s sym_vals 1))))) -- 1 + +;; ════════════════════════ value-col bounds check ═════════════════════════ +(set Db (datoms)) +(set twocol (table ['a 'b] (list [1 2] [3 4]))) +(query Db (find ?s) (where (bad-idx ?s)) (rules ((bad-idx ?s) (sum ?s twocol 5)))) !- domain + +;; ════════════════════════ table_union pass-through (a is empty) ═════════════════════════ +(set Db (datoms)) +(set Db (assert-fact Db 1 'tag 'x)) +(count (query Db (find ?e) (where (simple ?e)) (rules ((simple ?e) (?e :tag 'x))))) -- 1 + +;; ════════════════════════ recursive fixpoint — semi-naive delta loop ═════════════════════════ +(set Db (datoms)) +(set Db (assert-fact Db 1 'e 2)) +(set Db (assert-fact Db 2 'e 3)) +(set Db (assert-fact Db 3 'e 4)) +(set Db (assert-fact Db 4 'e 5)) +(count (query Db (find ?x ?y) (where (path ?x ?y)) (rules ((path ?x ?y) (?x :e ?y)) ((path ?x ?z) (?x :e ?y) (path ?y ?z))))) -- 10 + +;; ════════════════════════ stratified negation — multi-stratum ═════════════════════════ +(set Db (datoms)) +(set Db (assert-fact Db 1 'tag 'keep)) +(set Db (assert-fact Db 2 'tag 'skip)) +(set Db (assert-fact Db 3 'tag 'keep)) +(count (query Db (find ?e) (where (derived ?e)) (rules ((base ?e) (?e :tag ?t)) ((excluded ?e) (?e :tag 'skip)) ((derived ?e) (base ?e) (not (excluded ?e)))))) -- 2 + +;; ════════════════════════ positive dependency across strata ═════════════════════════ +(set Db (datoms)) +(set Db (assert-fact Db 1 'x 10)) +(count (query Db (find ?e) (where (layer3 ?e)) (rules ((layer1 ?e ?v) (?e :x ?v)) ((layer2 ?e) (layer1 ?e ?v)) ((layer3 ?e) (layer2 ?e) (not (layer1 ?e 999)))))) -- 1 + +;; ════════════════════════ CMP filter that removes all rows ═════════════════════════ +(set Db (datoms)) +(set Db (assert-fact Db 1 'val 5)) +(count (query Db (find ?e ?v) (where (?e :val ?v) (> ?v 100)))) -- 0 + +;; ════════════════════════ aggregate-only rule (no positive body atoms) ═════════════════════════ +(set Db (datoms)) +(set cnt_src (table ['id] (list [1 2 3 4 5]))) +(count (query Db (find ?n) (where (total ?n)) (rules ((total ?n) (count ?n cnt_src))))) -- 1 + +;; ════════════════════════ body with quoted symbol (non-variable sym) ═════════════════════════ +(set Db (datoms)) +(set Db (assert-fact Db 1 'color 'red)) +(set Db (assert-fact Db 2 'color 'blue)) +(count (query Db (find ?e) (where (?e :color 'red)))) -- 1 +(count (query Db (find ?e) (where (?e :color 'blue)))) -- 1 + +;; ════════════════════════ between sugar ═════════════════════════ +(set Db (datoms)) +(set Db (assert-fact Db 1 'score 10)) +(set Db (assert-fact Db 2 'score 50)) +(set Db (assert-fact Db 3 'score 90)) +(count (query Db (find ?e ?s) (where (?e :score ?s) (between ?s 20 80)))) -- 1 +(count (query Db (find ?e ?s) (where (?e :score ?s) (between ?s 0 100)))) -- 3 + +;; ════════════════════════ table_distinct on single-row table ═════════════════════════ +(set Db (datoms)) +(set Db (assert-fact Db 1 'solo 42)) +(count (query Db (find ?e ?v) (where (?e :solo ?v)))) -- 1 + +;; ════════════════════════ dl_is_triple_pattern — constant entity ═════════════════════════ +(set Db (datoms)) +(set Db (assert-fact Db 1 'name 100)) +(set Db (assert-fact Db 2 'name 200)) +(count (query Db (find ?v) (where (1 :name ?v)))) -- 1 + +;; ════════════════════════ SUM over empty source (identity = 0) ═════════════════════════ +(set Db (datoms)) +(set empty_tab (table ['a 'b] (list [] []))) +(count (query Db (find ?s) (where (empty-sum ?s)) (rules ((empty-sum ?s) (sum ?s empty_tab 1))))) -- 1 + +;; ════════════════════════ dl_program_free: NULL guard (line 64) ═════════════════════════ +;; Indirectly exercised: dl_program_free(prog) is called from query; when +;; prog->rels has ERR tables the loop in dl_program_free checks IS_ERR. +;; Already covered by error paths. This section exercises the happy path. + +;; ════════════════════════ head with I64 constant affecting IDB alignment (line 177-203) ═════════════════════════ +;; Two rules with the same head predicate but different typed head constants +;; in the same position. The first rule establishes RAY_I64, the second rule +;; has the same type, so no conflict => alignment stays. Exercises the +;; any_change=false early return at line 203. +(set Db (datoms)) +(set Db (assert-fact Db 1 'role 'admin)) +(set Db (assert-fact Db 2 'role 'user)) +(count (query Db (find ?e) (where (has-i ?e)) (rules ((has-i 7) (?e :role 'admin)) ((has-i 8) (?e :role 'user))))) -- 2 + +;; ════════════════════════ env-backed EDB via aggregate source (DL_AGG auto-register) ═════════════════════════ +;; The aggregate references a predicate that exists only as env-bound table. +;; Tests the DL_AGG branch in the env-auto-register loop (lines 4126-4128). +(set Db (datoms)) +(set agg_env (table ['x 'y] (list [1 2 3 4] [10 20 30 40]))) +(count (query Db (find ?n) (where (acount ?n)) (rules ((acount ?n) (count ?n agg_env))))) -- 1 +(count (query Db (find ?s) (where (asum ?s)) (rules ((asum ?s) (sum ?s agg_env 1))))) -- 1 + +;; ════════════════════════ dl_find_rel returns -1 for unknown body pred ═════════════════════════ +;; Exercises line 1297 (rel_idx < 0 => return NULL) in dl_compile_rule. +;; When a positive body atom references a predicate unknown to the program, +;; compile_rule returns NULL silently (no eval_err, just 0 rows). +(set Db (datoms)) +(set Db (assert-fact Db 1 'tag 'x)) +(count (query Db (find ?e) (where (uses-unknown ?e)) (rules ((uses-unknown ?e) (no-such-pred ?e ?v))))) -- 0 + +;; ════════════════════════ table_antijoin: left empty ═════════════════════════ +;; Exercises the ray_table_nrows(left)==0 early return in table_antijoin (line 2368). +(set Db (datoms)) +(count (query Db (find ?e) (where (?e :nope ?v) (not (?e :tag 'x))))) -- 0 + +;; ════════════════════════ table_antijoin: right empty ═════════════════════════ +;; Exercises the right==0 early return in table_antijoin (line 2364). +(set Db (datoms)) +(set Db (assert-fact Db 1 'tag 'x)) +(set Db (assert-fact Db 2 'tag 'y)) +;; negated EAV pattern that matches nothing => right table is empty => all rows pass +(count (query Db (find ?e) (where (?e :tag ?t) (not (?e :missing ?v))))) -- 2 + +;; ════════════════════════ table_distinct: 0 cols early return (line 2332) ═════════════════════════ +;; Indirectly: a query that produces a result with data always has cols. +;; The ncols<=0 path is only triggered by malformed internal tables. + +;; ════════════════════════ SYM column filter via dl_col_eq_row ═════════════════════════ +;; Exercises the RAY_SYM branch of dl_col_eq_row (line 1006-1007) +;; when a body constant is matched against a SYM-type column. +;; This is exercised through sym-to-i64 conversion in env EDB, but +;; the tag-aware compare paths are also triggered by EAV queries. +(set Db (datoms)) +(set Db (assert-fact Db 1 'x 'hello)) +(set Db (assert-fact Db 2 'x 'world)) +(count (query Db (find ?e) (where (?e :x 'hello)))) -- 1 + +;; ════════════════════════ dl_is_wildcard false case (line 3474-3476) ═════════════════════════ +;; Exercises the dl_is_wildcard function returning false for non-_ symbols. +;; Already covered implicitly by every non-wildcard symbol usage. + +;; ════════════════════════ dl_is_aggregate — avg ═════════════════════════ +;; Exercises the strcmp chain in dl_is_aggregate for "avg" (line 3536-3538). +(set Db (datoms)) +(set Db (assert-fact Db 1 'v 10)) +(set Db (assert-fact Db 2 'v 20)) +(count (query Db (find ?a) (where (my-avg ?a)) (rules ((my-src ?e ?v) (?e :v ?v)) ((my-avg ?a) (avg ?a my-src 1))))) -- 1 + +;; ════════════════════════ dl_expr_var: variable reference in expression ═════════════════════════ +;; Exercises DL_EXPR_VAR kind (line 679-691). +(set Db (datoms)) +(set Db (assert-fact Db 1 'a 5)) +(set Db (assert-fact Db 2 'a 15)) +(count (query Db (find ?e ?y) (where (?e :a ?x) (= ?y (+ ?x ?x))))) -- 2 + +;; ════════════════════════ dl_expr_const_f64: float constant in expression ═════════════════════════ +;; Exercises DL_EXPR_CONST_F64 kind (lines 669-677). +(set Db (datoms)) +(set Db (assert-fact Db 1 'p 1)) +(count (query Db (find ?e ?y) (where (?e :p ?x) (= ?y (+ ?x 3.14))))) -- 1 + +;; ════════════════════════ f64 expression: both sides f64 (no promotion) ═════════════════════════ +;; Exercises the is_f64 path in DL_EXPR_BINOP where both sides are already f64 +;; (lines 701-731). +(set Db (datoms)) +(set Db (assert-fact Db 1 'v 1)) +(count (query Db (find ?e ?y) (where (?e :v ?x) (= ?y (+ 1.5 2.5))))) -- 1 + +;; ════════════════════════ aggregate: grouped COUNT over env-backed table ═════════════════════════ +;; Exercises grouped aggregation with a COUNT op (line 1645). +(set Db (datoms)) +(set gcnt (table ['grp 'val] (list [1 1 2 2 2] [10 20 30 40 50]))) +(count (query Db (find ?g ?n) (where (grp-count ?g ?n)) (rules ((grp-count ?g ?n) (count ?n gcnt by ?g 0))))) -- 2 + +;; ════════════════════════ aggregate: grouped COUNT with no source rows ═════════════════════════ +;; Exercises the src_nrows==0 path for grouped aggregation (line 1571-1575). +(set Db (datoms)) +(set gempty (table ['grp 'val] (list [] []))) +(count (query Db (find ?g ?n) (where (eg-count ?g ?n)) (rules ((eg-count ?g ?n) (count ?n gempty by ?g 0))))) -- 0 diff --git a/test/rfl/datalog/datalog_coverage.rfl b/test/rfl/datalog/datalog_coverage.rfl index dde55c46..308e54d6 100644 --- a/test/rfl/datalog/datalog_coverage.rfl +++ b/test/rfl/datalog/datalog_coverage.rfl @@ -303,3 +303,68 @@ (set Db (datoms)) (set Db (assert-fact Db 1 'tag 'x)) (query Db (find ?e) (where (p ?e)) (rules ((p ?e) (?e :tag 'x) (not (q ?e))) ((q ?e) (?e :tag 'x) (not (p ?e))))) !- domain + +;; ════════════════════════ extra error-guard coverage (round 2) ═════════════════════════ +;; scan-eav 3-arg: non-sym attr (line 3281) +(set Db (datoms)) +(set Db (assert-fact Db 1 'age 30)) +(scan-eav Db 1 1) !- type + +;; query: first arg of (find ...) must be a symbol (line 3964). +;; Use an integer in head position so find_elems[0]->type != -RAY_SYM. +(query Db (3 ?e) (where (?e :age 30))) !- type +;; same for (where ...) — line 3995 +(query Db (find ?e) (3 (?e :age 30))) !- type +;; same for the optional fourth arg (rules ...) — line 4014 +(query Db (find ?e) (where (?e :age 30)) (3)) !- type + +;; not: inner clause must be a list (line 3639) +(query Db (find ?e) (where (?e :age 30) (not 5))) !- type +;; not: inner clause head must be a symbol (line 3658) +(query Db (find ?e) (where (?e :age 30) (not (5 ?e)))) !- type + +;; aggregate: predicate must be a symbol (line 3688) +(query Db (find ?n) (where (foo ?n)) (rules ((employee ?e ?v) (?e :age ?v)) ((foo ?n) (sum ?n 1 1)))) !- type +;; aggregate: group key must be ?variable (line 3717) +(query Db (find ?n ?k) (where (foo ?n ?k)) (rules ((employee ?e ?v) (?e :age ?v)) ((foo ?n ?k) (sum ?n employee 1 by 5 0)))) !- type +;; aggregate: at most one value column index (line 3732) +(query Db (find ?n) (where (foo ?n)) (rules ((employee ?e ?v) (?e :age ?v)) ((foo ?n) (sum ?n employee 1 2)))) !- type +;; aggregate: unexpected token in aggregate clause (line 3738) — a non-int, non-by symbol +(query Db (find ?n) (where (foo ?n)) (rules ((employee ?e ?v) (?e :age ?v)) ((foo ?n) (sum ?n employee 'bogus)))) !- type + +;; Rule special form: assignment with unparseable RHS expression (line 3782). +;; (= ?y (zzz ?x)) — zzz is not a known arithmetic operator, +;; so dl_build_expr returns NULL and we hit the type guard. +(query Db (find ?e ?y) (where (?e :age ?x) (= ?y (zzz ?x)))) !- type + +;; Expression comparison: LHS expression that fails to parse (line 3827). +;; (< (zzz ?x) ?y) — neither side is a plain var or const, so the +;; expression-builder branch fires; with zzz unknown, le ends up NULL. +(query Db (find ?e) (where (?e :age ?x) (?e :age ?y) (< (zzz ?x) (+ ?y 1)))) !- type + +;; ════════════════════════ rule special form extra guards ═════════════════════════ +;; rule body: (not 5) — non-list inner clause (also covers 3639 via (rule)) +(rule (bad-r ?e) (?e :age 30) (not 5)) !- type +;; rule body: (not (5 ?x)) — non-sym inner head +(rule (bad-r ?e) (?e :age 30) (not (5 ?e))) !- type +;; rule body: aggregate predicate not a symbol +(rule (bad-r ?n) (sum ?n 1 1)) !- type +;; rule body: aggregate with unexpected token +(rule (bad-r ?n) (sum ?n employee 'bogus)) !- type + +;; rule/query: unrecognized body clause form (line 3848). +;; A body clause whose head is itself a list (not a symbol) — falls past +;; every recognized shape and lands at the final unrecognized-form guard. +(query Db (find ?e) (where (?e :age 30) ((+ 1 2) ?e))) !- type + +;; rule: cannot evaluate constant in body (line 3594). +;; Pass a body-position term whose ray_eval fails — an apply of an +;; unknown function lands in the dl_set_body_pos fallback path. +;; ray_eval returns its own error (e.g. "name" for unknown function), so +;; the guard returns that error verbatim — both branches of the if are +;; taken across this test and the empty-result case above. +(query Db (find ?e) (where (?e :age 30) (some-rel ?e (nonexistent-fn 1)))) !- name +;; rule: unsupported constant type in body (line 3601). +;; A body-position term that evaluates successfully to a non-i64/sym +;; (a list literal here) trips the unsupported-constant-type guard. +(query Db (find ?e) (where (?e :age 30) (some-rel ?e [1 2 3]))) !- type diff --git a/test/rfl/expr/narrow_binary.rfl b/test/rfl/expr/narrow_binary.rfl index dd168f0e..1f53cb74 100644 --- a/test/rfl/expr/narrow_binary.rfl +++ b/test/rfl/expr/narrow_binary.rfl @@ -149,3 +149,37 @@ ;; ray_div sets out_type=RAY_F64 → unreachable from RFL ;; OP_MIN2/MAX2: not exposed in RFL (only via C API ray_min2/ray_max2) ;; =================================================================== + +;; =================================================================== +;; expr_load_f64 column-type conversion paths (lines 622-645) +;; Forced via select{}: F64 expr promotion from narrow column requires +;; conversion into f64 scratch buffer. +;; =================================================================== + +;; I32 column → F64 scratch (line 633-636) +(set _Ti32 (table [x] (list (as 'I32 [1 2 3])))) +(at (select {y: (* x 2.0) from: _Ti32}) 'y) -- [2.0 4.0 6.0] + +;; I16 column → F64 scratch (line 641-644) +(set _Ti16 (table [x] (list (as 'I16 [10 20 30])))) +(at (select {y: (* x 2.0) from: _Ti16}) 'y) -- [20.0 40.0 60.0] + +;; U8 column → F64 scratch (line 637-640) +(set _Tu8 (table [x] (list (as 'U8 [4 5 6])))) +(at (select {y: (* x 2.0) from: _Tu8}) 'y) -- [8.0 10.0 12.0] + +;; TIMESTAMP column → F64 scratch (line 625-628 I64/TIMESTAMP path) +(set _Tts (table [x] (list (as 'TIMESTAMP [1000 2000 3000])))) +(at (select {y: (* x 2.0) from: _Tts}) 'y) -- [2000.0 4000.0 6000.0] + +;; DATE column → F64 scratch (line 633: DATE/TIME path via I32) +(set _Td (table [x] (list (as 'DATE [0 1 2])))) +(at (select {y: (* x 2.0) from: _Td}) 'y) -- [0.0 2.0 4.0] + +;; TIME column → F64 scratch +(set _Tt (table [x] (list (as 'TIME [100 200 300])))) +(at (select {y: (* x 2.0) from: _Tt}) 'y) -- [200.0 400.0 600.0] + +;; BOOL column → F64 scratch (line 637-640 U8/BOOL path) +(set _Tb (table [x] (list (as 'BOOL [true false true])))) +(at (select {y: (* x 2.0) from: _Tb}) 'y) -- [2.0 0.0 2.0] diff --git a/test/rfl/graph/traverse_coverage.rfl b/test/rfl/graph/traverse_coverage.rfl index f5f6813c..da351e10 100644 --- a/test/rfl/graph/traverse_coverage.rfl +++ b/test/rfl/graph/traverse_coverage.rfl @@ -325,6 +325,31 @@ (count VeSpoke2) -- 1 (first (at VeSpoke2 '_end)) -- 0 +;; ===================================================================== +;; Section 6: type-guard error paths in graph_builtin.c +;; - .graph.pagerank (line 374): damping must be -RAY_F64 or int +;; - .graph.k-shortest (line 634): src/dst/k must all be int +;; - .graph.var-expand (line 726): src/min/max must all be int +;; - .graph.var-expand (line 742): track-path must be bool or int +;; ===================================================================== + +;; pagerank with non-numeric damping (3rd arg must be float or int). +(.graph.pagerank K4 10 "bad") !- type + +;; k-shortest with non-int src/dst/k. Use a string for src. +(.graph.k-shortest K4 "bad" 1 2) !- type +(.graph.k-shortest K4 0 "bad" 2) !- type +(.graph.k-shortest K4 0 1 "bad") !- type + +;; var-expand with non-int src. +(.graph.var-expand K4 "bad" 1 2) !- type +;; var-expand with non-int min-depth. +(.graph.var-expand K4 0 "bad" 2) !- type +;; var-expand with non-int max-depth. +(.graph.var-expand K4 0 1 "bad") !- type +;; var-expand with non-bool / non-int track-path arg (6-arg form). +(.graph.var-expand K4 0 1 2 0 "bad") !- type + ;; Cleanup (.graph.free K4) (.graph.free Chain) diff --git a/test/rfl/group/group_key_types.rfl b/test/rfl/group/group_key_types.rfl index b0b5c500..fa39ddf3 100644 --- a/test/rfl/group/group_key_types.rfl +++ b/test/rfl/group/group_key_types.rfl @@ -148,3 +148,52 @@ (count Rdkm) -- 2 (at (at Rdkm 'mn) 0) -- 10 (at (at Rdkm 'mx) 1) -- 40 + +;; ─── Narrow SYM (W8) group-by ──────────────────────────────────────── +;; CSV reader narrows SYM to W8 when distinct count <= 255. Group-by +;; on the narrow column exercises: +;; - read_col_i64 W8 arm (group.c:185, etc.) +;; - sym_minmax MINMAX_SEG_LOOP W8 dispatch (group.c:3244) +;; - per-group emit of narrow SYM key column (ray_sym_vec_new with +;; k_attrs[k] & RAY_SYM_W_MASK). +(.sys.exec "printf 'id,cat\n1,x\n2,y\n3,x\n4,z\n5,y\n6,x\n7,z\n8,y\n9,x\n10,z\n' > /tmp/rfl_grp_sw8.csv") -- 0 +(set Tsw8 (.csv.read [I64 SYMBOL] "/tmp/rfl_grp_sw8.csv")) +(count Tsw8) -- 10 +(type (at Tsw8 'cat)) -- 'SYM + +;; Group-by on narrow SYM key + count/sum aggs +(set Rsw8 (select {c: (count id) s: (sum id) from: Tsw8 by: cat})) +(count Rsw8) -- 3 +;; cat counts: x=4, y=3, z=3 → sum of counts = 10 +(sum (at Rsw8 'c)) -- 10 +;; cat sums: x: 1+3+6+9=19, y: 2+5+8=15, z: 4+7+10=21 → sum=55 +(sum (at Rsw8 's)) -- 55 + +;; Group-by with min/max aggs — exercises sym_minmax narrow W8 SEG_LOOP +(set Rsw8mm (select {mn: (min id) mx: (max id) from: Tsw8 by: cat})) +(count Rsw8mm) -- 3 +(sum (at Rsw8mm 'mn)) -- 7 ;; 1+2+4 +(sum (at Rsw8mm 'mx)) -- 27 ;; 9+8+10 + +;; first/last on narrow SYM key — type preservation regression check. +(set Rsw8fl (select {f: (first id) l: (last id) from: Tsw8 by: cat asc: cat})) +(count Rsw8fl) -- 3 +(at (at Rsw8fl 'cat) 0) -- 'x +(at (at Rsw8fl 'f) 0) -- 1 +(at (at Rsw8fl 'l) 0) -- 9 + +;; distinct on narrow SYM column +(count (distinct (at Tsw8 'cat))) -- 3 + +;; Type preservation: first/last on the narrow-SYM column itself +(type (first (at Tsw8 'cat))) -- 'sym +(type (last (at Tsw8 'cat))) -- 'sym +(first (at Tsw8 'cat)) -- 'x +(last (at Tsw8 'cat)) -- 'z + +;; Two-key group: narrow SYM + small I32 — composite path with narrow +;; SYM emit. +(set Tsw8b (table [k1 k2 v] (list (take (at Tsw8 'cat) 9) (as 'I32 [1 2 1 2 1 2 1 2 1]) (as 'I64 [10 20 30 40 50 60 70 80 90])))) +(set Rsw8b (select {c: (count v) from: Tsw8b by: [k1 k2]})) +(>= (count Rsw8b) 3) -- true +(.sys.exec "rm -f /tmp/rfl_grp_sw8.csv") -- 0 diff --git a/test/rfl/ops/builtins_branch_cov.rfl b/test/rfl/ops/builtins_branch_cov.rfl new file mode 100644 index 00000000..8395cfbe --- /dev/null +++ b/test/rfl/ops/builtins_branch_cov.rfl @@ -0,0 +1,1195 @@ +;; Branch coverage for src/ops/builtins.c +;; +;; Targets uncovered branches at lines: +;; 68-74 null_literal_str — F32/F64/DATE/TIME/TIMESTAMP/SYM/default +;; 80,83-85 ray_lang_print — error/null/lazy guards +;; 94 ray_lang_print — signbit(-0.0) +;; 126 ray_lang_print — UNARY/BINARY/VARY +;; 142 ray_lang_print — ray_fmt fail fallback +;; 161-222 fmt_interpolate — per-type branches in format +;; 233-253 ray_println_fn — format-string mode / multi-arg +;; 310-327 ray_format_fn — error/no-placeholder +;; 1184-1706 ray_cast_fn — null propagation, atom casts, vec casts +;; 1830-1938 ray_enlist_fn — homogeneous/mixed/promote paths +;; 1945-1976 ray_dict_fn — dict creation from keys/vals +;; 1979-1983 ray_nil_fn — null checks +;; 1986-2001 ray_where_fn — bool vec filter +;; 2222-2697 ray_group_fn — LIST/GUID/STR/scalar paths +;; 2700-3008 ray_concat_fn — str/vec/list/dict/table/atom concat +;; 3012-3068 ray_raze_fn — flat/slow paths +;; 3071-3100 ray_within_fn — I64/F64/I32 within checks + +;; ═════════════════════════════════════════════════════════════════════ +;; 1. null_literal_str — exercise each null-type label via println/format +;; Each typed null atom calls null_literal_str when printed/formatted. +;; ═════════════════════════════════════════════════════════════════════ + +;; F64 null (line 69) — format returns "0Nf" +(format "%" 0Nf) -- "0Nf" + +;; DATE null (line 70) +(format "%" 0Nd) -- "0Nd" + +;; TIME null (line 71) +(format "%" 0Nt) -- "0Nt" + +;; TIMESTAMP null (line 72) +(format "%" 0Np) -- "0Np" + +;; SYM null (line 73) — 0Ns +(format "%" 0Ns) -- "0Ns" + +;; I16 null (line 65) — 0Nh +(format "%" 0Nh) -- "0Nh" + +;; I32 null (line 66) +(format "%" 0Ni) -- "0Ni" + +;; I64 null (line 67) +(format "%" 0Nl) -- "0Nl" + +;; default null (line 74) — null_literal_str returns "null" for types +;; not in the switch (BOOL, U8, GUID, etc.) +;; The all-zeros GUID is the null GUID, and GUID isn't in the switch, +;; so it triggers the default branch. +;; Exercise this via println/format of the null GUID atom: +(format "%" (as 'GUID "00000000-0000-0000-0000-000000000000")) -- "null" + +;; ═════════════════════════════════════════════════════════════════════ +;; 2. ray_lang_print branches — println drives the switch +;; ═════════════════════════════════════════════════════════════════════ + +;; signbit(-0.0) normalisation (line 94): clear_neg_zero via memcpy +(format "%" (neg 0.0)) -- "0" + +;; -RAY_BOOL false path (line 98 ternary) +(format "%" false) -- "false" + +;; -RAY_BOOL true path +(format "%" true) -- "true" + +;; ═════════════════════════════════════════════════════════════════════ +;; 3. fmt_interpolate — per-type branches in "format" / println +;; Lines 166-224: each type arg hits a different branch. +;; ═════════════════════════════════════════════════════════════════════ + +;; I64 branch (line 176-177) +(format "val=%" 42) -- "val=42" + +;; F64 branch (lines 178-181), includes -0.0 signbit guard +(format "val=%" 3.14) -- "val=3.14" +(format "neg=%" (neg 0.0)) -- "neg=0" + +;; BOOL branch (lines 182-183) +(format "b=%" true) -- "b=true" +(format "b=%" false) -- "b=false" + +;; STR branch (lines 184-190) — string arg inlined into output +(format "s=%" "hello") -- "s=hello" + +;; SYM branch (lines 191-200) — symbol arg resolved to name +(format "sym=%" 'mySymbol) -- "sym=mySymbol" + +;; typed null arg (lines 174-175) — triggers null_literal_str inside format +(format "n=%" 0Nl) -- "n=0Nl" +(format "n=%" 0Nf) -- "n=0Nf" +(format "n=%" 0Nd) -- "n=0Nd" +(format "n=%" 0Nt) -- "n=0Nt" +(format "n=%" 0Np) -- "n=0Np" +(format "n=%" 0Ns) -- "n=0Ns" +(format "n=%" 0Nh) -- "n=0Nh" +(format "n=%" 0Ni) -- "n=0Ni" + +;; ray_fmt fallback (lines 203-213) — i32 atom, vec, dict +(format "i32=%" 5i) -- "i32=5" +(format "v=%" [1 2 3]) -- "v=[1 2 3]" +(format "d=%" (dict [a b] (list 1 2))) -- "d={a:1 b:2}" + +;; Multiple substitutions in one format string +(format "%+%=%" 1 2 3) -- "1+2=3" + +;; More args than placeholders — extras ignored +(format "x=%" 1 2 3) -- "x=1" + +;; No placeholder in format string (lines 155-156) — returns as-is +(format "no-placeholder" 1 2) -- "no-placeholder" + +;; ═════════════════════════════════════════════════════════════════════ +;; 4. ray_format_fn — error paths (lines 310-327) +;; ═════════════════════════════════════════════════════════════════════ + +;; Zero args → domain error (line 311) +(format) !- domain + +;; Non-string first arg → type error (line 314) +(format 42 "x") !- type + +;; ═════════════════════════════════════════════════════════════════════ +;; 5. ray_cast_fn — null propagation (lines 1186-1208) +;; Casting typed null atoms to various target types. +;; ═════════════════════════════════════════════════════════════════════ + +;; null → I64 +(nil? (as 'I64 0Nl)) -- true +(nil? (as 'I64 0Nf)) -- true + +;; null → I32 +(nil? (as 'I32 0Ni)) -- true +(nil? (as 'I32 0Nl)) -- true + +;; null → I16 +(nil? (as 'I16 0Nh)) -- true + +;; null → U8 — produces null U8 sentinel +(nil? (as 'U8 0Nl)) -- true + +;; null → F64 +(nil? (as 'F64 0Nf)) -- true +(nil? (as 'F64 0Nl)) -- true + +;; null → BOOL — produces null BOOL sentinel +(nil? (as 'BOOL 0Nl)) -- true + +;; null → SYMBOL +(nil? (as 'SYMBOL 0Ns)) -- true + +;; null → DATE +(nil? (as 'DATE 0Nd)) -- true + +;; null → TIME +(nil? (as 'TIME 0Nt)) -- true + +;; null → TIMESTAMP +(nil? (as 'TIMESTAMP 0Np)) -- true + +;; null → GUID +(nil? (as 'GUID 0Nl)) -- true + +;; null → STR — returns empty string +(as 'STR 0Nl) -- "" + +;; ═════════════════════════════════════════════════════════════════════ +;; 6. ray_cast_fn — atom casts (lines 1215-1706) +;; Systematically hit each input-type branch for each output type. +;; ═════════════════════════════════════════════════════════════════════ + +;; --- Cast to I64 --- +(as 'I64 42) -- 42 ;; already I64 +(as 'I64 3.14) -- 3 ;; F64 → I64 +(as 'I64 true) -- 1 ;; BOOL → I64 +(as 'I64 false) -- 0 ;; BOOL → I64 (false branch) +(as 'I64 5i) -- 5 ;; I32 → I64 +(as 'I64 5h) -- 5 ;; I16 → I64 +(as 'I64 "123") -- 123 ;; STR → I64 + +;; --- Cast to I32 --- +(as 'I32 5i) -- 5i ;; already I32 +(as 'I32 true) -- 1i ;; BOOL → I32 +(as 'I32 42) -- 42i ;; I64 → I32 +(as 'I32 3.14) -- 3i ;; F64 → I32 +(as 'I32 5h) -- 5i ;; I16 → I32 +(as 'I32 "99") -- 99i ;; STR → I32 + +;; --- Cast to I16 --- +(as 'I16 5h) -- 5h ;; already I16 +(as 'I16 true) -- 1h ;; BOOL → I16 +(as 'I16 42) -- 42h ;; I64 → I16 +(as 'I16 42i) -- 42h ;; I32 → I16 +(as 'I16 3.14) -- 3h ;; F64 → I16 +(as 'I16 "10") -- 10h ;; STR → I16 + +;; --- Cast to F64 --- +(as 'F64 1.5) -- 1.5 ;; already F64 +(as 'F64 true) -- 1.0 ;; BOOL → F64 +(as 'F64 false) -- 0.0 ;; BOOL → F64 (false branch) +(as 'F64 42) -- 42.0 ;; I64 → F64 +(as 'F64 5i) -- 5.0 ;; I32 → F64 +(as 'F64 5h) -- 5.0 ;; I16 → F64 +(as 'F64 "2.5") -- 2.5 ;; STR → F64 + +;; --- Cast to BOOL --- +(as 'BOOL true) -- true ;; already BOOL +(as 'BOOL 1) -- true ;; I64 → BOOL +(as 'BOOL 0) -- false ;; I64 → BOOL (zero) +(as 'BOOL 1i) -- true ;; I32 → BOOL +(as 'BOOL 0i) -- false ;; I32 → BOOL (zero) +(as 'BOOL 1h) -- true ;; I16 → BOOL +(as 'BOOL 0h) -- false ;; I16 → BOOL (zero) +(as 'BOOL 1.0) -- true ;; F64 → BOOL +(as 'BOOL 0.0) -- false ;; F64 → BOOL (zero) +(as 'BOOL "hi") -- true ;; STR → BOOL (non-empty) +(nil? (as 'BOOL "")) -- true ;; STR → BOOL (empty → null bool) + +;; --- Cast to STR --- +(as 'STR "hi") -- "hi" ;; already STR +(as 'STR 42) -- "42" ;; I64 → STR +(as 'STR 5i) -- "5" ;; I32 → STR +(as 'STR 5h) -- "5" ;; I16 → STR +(as 'STR 3.14) -- "3.14" ;; F64 → STR +(as 'STR true) -- "true" ;; BOOL → STR +(as 'STR false) -- "false" ;; BOOL → STR +(as 'STR 'foo) -- "foo" ;; SYM → STR + +;; --- Cast to SYMBOL --- +(as 'SYMBOL 'foo) -- 'foo ;; already SYM +(as 'SYMBOL "bar") -- 'bar ;; STR → SYM + +;; Integer/bool/numeric atom → SYM (lines 1394-1409) +(type (as 'SYMBOL 42)) -- 'sym +(type (as 'SYMBOL true)) -- 'sym +(type (as 'SYMBOL 3.14)) -- 'sym +(type (as 'SYMBOL 5i)) -- 'sym +(type (as 'SYMBOL 5h)) -- 'sym + +;; --- Cast to DATE --- +(type (as 'DATE 0)) -- 'date ;; I64 → DATE +(type (as 'DATE 0i)) -- 'date ;; I32 → DATE +(type (as 'DATE 0h)) -- 'date ;; I16 → DATE +(type (as 'DATE 0.0)) -- 'date ;; F64 → DATE +(type (as 'DATE true)) -- 'date ;; BOOL → DATE +(type (as 'DATE "2024.01.15")) -- 'date ;; STR → DATE + +;; --- Cast to TIME --- +(type (as 'TIME 0)) -- 'time ;; I64 → TIME +(type (as 'TIME 0i)) -- 'time ;; I32 → TIME +(type (as 'TIME 0h)) -- 'time ;; I16 → TIME +(type (as 'TIME 0.0)) -- 'time ;; F64 → TIME +(type (as 'TIME true)) -- 'time ;; BOOL → TIME +(type (as 'TIME "12:30:45")) -- 'time ;; STR → TIME with HH:MM:SS +(type (as 'TIME "12:30:45.123")) -- 'time ;; STR → TIME with millis + +;; --- Cast to TIMESTAMP --- +(type (as 'TIMESTAMP 0)) -- 'timestamp ;; I64 → TS +(type (as 'TIMESTAMP 0i)) -- 'timestamp ;; I32 → TS +(type (as 'TIMESTAMP 0h)) -- 'timestamp ;; I16 → TS +(type (as 'TIMESTAMP 0.0)) -- 'timestamp ;; F64 → TS +(type (as 'TIMESTAMP true)) -- 'timestamp ;; BOOL → TS + +;; TIMESTAMP from DATE atom (line 1512-1515) +(type (as 'TIMESTAMP (as 'DATE 100))) -- 'timestamp + +;; TIMESTAMP from TIME atom (line 1511) +(type (as 'TIMESTAMP (as 'TIME 5000))) -- 'timestamp + +;; TIMESTAMP from STR: ISO format (lines 1517-1583) +(type (as 'TIMESTAMP "2024-01-15T12:30:45.000000000")) -- 'timestamp + +;; TIMESTAMP STR with timezone +HH:MM (lines 1558-1579) +(type (as 'TIMESTAMP "2024-01-15T12:30:45.000+05:30")) -- 'timestamp + +;; TIMESTAMP STR with timezone -HHMM (line 1576) +(type (as 'TIMESTAMP "2024-01-15T12:30:45.000-0800")) -- 'timestamp + +;; TIMESTAMP STR with 'Z' UTC marker (line 1567) +(type (as 'TIMESTAMP "2024-01-15T12:30:45.000Z")) -- 'timestamp + +;; TIMESTAMP STR with YYYY.MM.DD format (line 1527) +(type (as 'TIMESTAMP "2024.01.15D12:30:45.000000000")) -- 'timestamp + +;; --- Cast to GUID --- +(type (as 'GUID "01234567-89ab-cdef-0123-456789abcdef")) -- 'guid + +;; --- Cast to U8 --- +(as 'U8 true) -- 0x01 ;; BOOL → U8 +(as 'U8 42h) -- 0x2a ;; I16 → U8 +(as 'U8 42i) -- 0x2a ;; I32 → U8 +(as 'U8 42) -- 0x2a ;; I64 → U8 +(as 'U8 42.0) -- 0x2a ;; F64 → U8 +(as 'U8 "42") -- 0x2a ;; STR → U8 + +;; --- Cast to DICT --- +(type (as 'DICT (table [a b] (list [1 2] [3 4])))) -- 'DICT +(as 'DICT (as 'DICT (dict [x] (list 1)))) -- (dict [x] (list 1)) + +;; --- Cast to TABLE --- +(type (as 'TABLE (dict [a b] (list [1 2] [3 4])))) -- 'TABLE +(as 'TABLE (table [x] (list [1 2]))) -- (table [x] (list [1 2])) + +;; Unknown cast target → domain error (line 1705) +(as 'BADTYPE 42) !- domain + +;; ═════════════════════════════════════════════════════════════════════ +;; 7. Vector casts — cast_vec_numeric (lines 1103-1182) +;; Drive fast-path typed vec → numeric vec conversions. +;; ═════════════════════════════════════════════════════════════════════ + +;; I64 vec → F64 vec (fast path) +(as 'F64 [1 2 3]) -- [1.0 2.0 3.0] + +;; F64 vec → I64 vec +(as 'I64 [1.0 2.5 3.9]) -- [1 2 3] + +;; BOOL vec → I64 vec +(as 'I64 [true false true]) -- [1 0 1] + +;; I64 vec → BOOL vec +(as 'BOOL [0 1 2]) -- [false true true] + +;; F64 vec → BOOL vec +(as 'BOOL [0.0 1.5 0.0]) -- [false true false] + +;; I64 vec → I32 vec +(as 'I32 [1 2 3]) -- [1i 2i 3i] + +;; I32 vec → I64 vec +(as 'I64 [1i 2i 3i]) -- [1 2 3] + +;; I64 vec → I16 vec +(as 'I16 [1 2 3]) -- [1h 2h 3h] + +;; I16 vec → I64 vec +(as 'I64 [1h 2h 3h]) -- [1 2 3] + +;; I64 vec → U8 vec +(type (as 'U8 [1 2 3])) -- 'U8 + +;; U8 vec → I64 vec +(as 'I64 (as 'U8 [1 2 3])) -- [1 2 3] + +;; STR vec → SYMBOL vec (fast path, lines 1144-1156) +(type (as 'SYMBOL ["foo" "bar" "baz"])) -- 'SYM + +;; BOOL vec → I32/I16/U8/F64 vec (various fast paths) +(as 'I32 [true false]) -- [1i 0i] +(as 'I16 [true false]) -- [1h 0h] +(as 'F64 [true false]) -- [1.0 0.0] + +;; I16 vec → I32 vec +(as 'I32 [1h 2h 3h]) -- [1i 2i 3i] + +;; I32 vec → I16 vec +(as 'I16 [1i 2i 3i]) -- [1h 2h 3h] + +;; I16 vec → F64 vec +(as 'F64 [1h 2h 3h]) -- [1.0 2.0 3.0] + +;; I32 vec → F64 vec +(as 'F64 [1i 2i 3i]) -- [1.0 2.0 3.0] + +;; U8 vec → I32 vec +(as 'I32 (as 'U8 [1 2 3])) -- [1i 2i 3i] + +;; U8 vec → F64 vec +(as 'F64 (as 'U8 [1 2 3])) -- [1.0 2.0 3.0] + +;; U8 vec → I16 vec +(as 'I16 (as 'U8 [1 2 3])) -- [1h 2h 3h] + +;; ═════════════════════════════════════════════════════════════════════ +;; 8. Vector casts with nulls — cast_vec_copy_nulls (lines 748-805) +;; Null propagation from source vec to target. +;; ═════════════════════════════════════════════════════════════════════ + +;; I64 vec with null → F64 (null propagates as NaN/0Nf) +(nil? (at (as 'F64 [1 0Nl 3]) 1)) -- true + +;; I64 vec with null → I32 (null propagates) +(nil? (at (as 'I32 [1 0Nl 3]) 1)) -- true + +;; I64 vec with null → I16 (null propagates) +(nil? (at (as 'I16 [1 0Nl 3]) 1)) -- true + +;; I64 vec with null → BOOL (BOOL non-nullable, line 753) +;; BOOL/U8 destinations: null sentinel != 0, so cast produces non-zero → true. +;; The key coverage point is hitting the BOOL/U8 return path in cast_vec_copy_nulls. +(count (as 'BOOL [1 0Nl 3])) -- 3 + +;; I64 vec with null → U8 (U8 non-nullable, line 753) +(count (as 'U8 [1 0Nl 3])) -- 3 + +;; LIST with null elements → I64 vec (LIST branch, lines 758-765) +(nil? (at (as 'I64 (list 1 0Nl 3)) 1)) -- true + +;; ═════════════════════════════════════════════════════════════════════ +;; 9. Temporal casts — cross-unit conversions +;; ═════════════════════════════════════════════════════════════════════ + +;; DATE → TIMESTAMP (days * NS_PER_DAY, line 869-871) +(type (as 'TIMESTAMP (as 'DATE [0 1 2]))) -- 'TIMESTAMP + +;; TIMESTAMP → DATE (floor-div, lines 873-877) +(type (as 'DATE (as 'TIMESTAMP [0 86400000000000 172800000000000]))) -- 'DATE + +;; TIMESTAMP → TIME (floor-mod ns→ms, lines 879-883) +(type (as 'TIME (as 'TIMESTAMP [0 3600000000000 7200000000000]))) -- 'TIME + +;; DATE atom ↔ TIMESTAMP atom (lines 1439, 1512-1515) +(type (as 'DATE (as 'TIMESTAMP 86400000000000))) -- 'date +(type (as 'TIMESTAMP (as 'DATE 1))) -- 'timestamp + +;; TIME atom from TIMESTAMP atom (line 1473-1478) +(type (as 'TIME (as 'TIMESTAMP 3600000000000))) -- 'time + +;; DATE from TIME atom (line 1438) +(type (as 'DATE (as 'TIME 1000))) -- 'date + +;; TIME from DATE atom (line 1472) +(type (as 'TIME (as 'DATE 100))) -- 'time + +;; ═════════════════════════════════════════════════════════════════════ +;; 10. ray_cast_fn — STR → I64/I32/F64 parse errors (domain) +;; ═════════════════════════════════════════════════════════════════════ + +(as 'I64 "not_a_number") !- domain +(as 'I32 "abc") !- domain +(as 'I16 "xyz") !- domain +(as 'F64 "nope") !- domain +(as 'U8 "zzz") !- domain +(as 'DATE "bad-date") !- domain +(as 'TIME "x") !- domain +(as 'TIMESTAMP "too-short") !- domain + +;; type error: unsupported input type +(as 'I64 (dict [a] (list 1))) !- type +(as 'I32 (dict [a] (list 1))) !- type +(as 'F64 (dict [a] (list 1))) !- type +(as 'BOOL (dict [a] (list 1))) !- type +(as 'I16 (dict [a] (list 1))) !- type +(as 'U8 (dict [a] (list 1))) !- type + +;; Cast non-sym type_sym → type error (line 1185) +;; Can't easily trigger from RFL since `as` wraps the first arg as sym. + +;; ═════════════════════════════════════════════════════════════════════ +;; 11. STR vec cast — (as 'STR vec) (lines 1360-1381) +;; ═════════════════════════════════════════════════════════════════════ + +(as 'STR [1 2 3]) -- ["1" "2" "3"] +(as 'STR [true false]) -- ["true" "false"] +(as 'STR [1.5 2.5]) -- ["1.5" "2.5"] +(as 'STR [1i 2i]) -- ["1" "2"] + +;; ═════════════════════════════════════════════════════════════════════ +;; 12. SYMBOL vec cast — (as 'SYMBOL vec) (lines 1424-1426) +;; ═════════════════════════════════════════════════════════════════════ + +(type (as 'SYMBOL [1 2 3])) -- 'SYM +(type (as 'SYMBOL (list "a" "b" "c"))) -- 'SYM + +;; ═════════════════════════════════════════════════════════════════════ +;; 13. ray_enlist_fn — homogeneous/mixed/promote (lines 1830-1938) +;; ═════════════════════════════════════════════════════════════════════ + +;; Empty enlist (line 1831) +(count (enlist)) -- 0 + +;; Homogeneous I64 +(enlist 1 2 3) -- [1 2 3] + +;; Homogeneous F64 +(enlist 1.0 2.0 3.0) -- [1.0 2.0 3.0] + +;; Mixed int/float → promote to F64 (lines 1843-1859) +(type (enlist 1 2.0 3)) -- 'F64 + +;; Homogeneous BOOL +(enlist true false true) -- [true false true] + +;; Homogeneous SYM +(type (enlist 'a 'b 'c)) -- 'SYM + +;; Homogeneous I32 +(type (enlist 1i 2i 3i)) -- 'I32 + +;; Homogeneous I16 +(type (enlist 1h 2h 3h)) -- 'I16 + +;; Homogeneous STR +(type (enlist "a" "b" "c")) -- 'STR + +;; Homogeneous U8 +(type (enlist (as 'U8 1) (as 'U8 2))) -- 'U8 + +;; Heterogeneous → list (lines 1928-1938) +(type (enlist 1 "two" 'three)) -- 'LIST + +;; ═════════════════════════════════════════════════════════════════════ +;; 14. ray_enlist_fn with null atoms — null propagation (lines 1850-1858, 1922-1926) +;; ═════════════════════════════════════════════════════════════════════ + +;; Null in homogeneous I64 enlist → null slot propagated +(nil? (at (enlist 1 0Nl 3) 1)) -- true + +;; Null in mixed int/float promote → dual-encoding (lines 1850-1858) +(nil? (at (enlist 1 0Nl 3.0) 1)) -- true + +;; ═════════════════════════════════════════════════════════════════════ +;; 15. ray_dict_fn — dict creation (lines 1945-1976) +;; ═════════════════════════════════════════════════════════════════════ + +;; Normal dict creation +(at (dict [a b] (list 10 20)) 'a) -- 10 +(at (dict [a b] (list 10 20)) 'b) -- 20 + +;; Vals as vec (lines 1960-1963) +(at (dict [a b] [10 20]) 'a) -- 10 + +;; Vals shorter than keys → tail filled with null (lines 1958-1971) +;; dict with 3 keys and 2 vals — third val padded with null I64 +(nil? (at (dict [a b c] (list 10 20)) 'c)) -- true + +;; ═════════════════════════════════════════════════════════════════════ +;; 16. ray_nil_fn — null checks (lines 1979-1983) +;; ═════════════════════════════════════════════════════════════════════ + +;; Typed null atom → true (line 1981) +(nil? 0Nl) -- true +(nil? 0Nf) -- true +(nil? 0Ni) -- true + +;; Non-null → false (line 1982) +(nil? 42) -- false +(nil? "hello") -- false + +;; ═════════════════════════════════════════════════════════════════════ +;; 17. ray_where_fn — bool vec filter (lines 1986-2001) +;; ═════════════════════════════════════════════════════════════════════ + +(where [true false true false true]) -- [0 2 4] +(where [false false false]) -- [] + +;; Error: non-bool input (line 1988) +(where [1 2 3]) !- type +(where 42) !- type + +;; ═════════════════════════════════════════════════════════════════════ +;; 18. ray_concat_fn — various concat paths (lines 2700-3008) +;; ═════════════════════════════════════════════════════════════════════ + +;; String atom + string atom (lines 2705-2718) +(concat "hello" " world") -- "hello world" + +;; Same-type vec concat (line 2722-2723) +(concat [1 2] [3 4]) -- [1 2 3 4] + +;; List + list (lines 2757-2768) +(concat (list 1 "a") (list 2 "b")) -- (list 1 "a" 2 "b") + +;; Mixed-type vec concat → list (lines 2771-2788) +(type (concat [1 2] [1.0 2.0])) -- 'LIST + +;; Atom + vec (lines 2790-2817) +(concat 0 [1 2 3]) -- [0 1 2 3] + +;; Vec + atom (lines 2819-2845) +(concat [1 2 3] 4) -- [1 2 3 4] + +;; Atom + atom same type (lines 2848-2887) +(concat 1 2) -- [1 2] +(concat 1.0 2.0) -- [1.0 2.0] +(concat true false) -- [true false] + +;; Atom + atom different type (lines 2998-3007) +(type (concat 1 "a")) -- 'LIST +(count (concat 1 "a")) -- 2 + +;; Vec + list (lines 2725-2755) +(count (concat [1 2] (list 3 "a"))) -- 4 + +;; List + vec (lines 2725-2755) +(count (concat (list 1 "a") [3 4])) -- 4 + +;; Atom + list (lines 2970-2981) +(count (concat 1 (list 2 3))) -- 3 +(at (concat 1 (list 2 3)) 0) -- 1 + +;; List + atom (lines 2984-2996) +(count (concat (list 1 2) 3)) -- 3 +(at (concat (list 1 2) 3) 2) -- 3 + +;; Dict concat (lines 2890-2929) +(at (concat (dict [a] (list 1)) (dict [b] (list 2))) 'b) -- 2 + +;; Table concat (lines 2932-2967) +(set T1 (table [a b] (list [1 2] [3 4]))) +(set T2 (table [a b] (list [5 6] [7 8]))) +(count (concat T1 T2)) -- 4 + +;; Table concat type mismatch → error +(set T3 (table [a] (list [1.0 2.0]))) +(set T4 (table [a] (list [1 2]))) +(concat T3 T4) !- type + +;; Table concat missing column → error +(set T5 (table [a b] (list [1] [2]))) +(set T6 (table [a c] (list [3] [4]))) +(concat T5 T6) !- value + +;; ═════════════════════════════════════════════════════════════════════ +;; 19. ray_raze_fn — flatten (lines 3012-3068) +;; ═════════════════════════════════════════════════════════════════════ + +;; Atom passthrough (line 3014) +(raze 42) -- 42 + +;; Typed vec passthrough (line 3016) +(raze [1 2 3]) -- [1 2 3] + +;; Empty list (line 3020) +(count (raze (list))) -- 0 + +;; List of same-type vecs — fast path (lines 3027-3054) +(raze (list [1 2] [3 4] [5 6])) -- [1 2 3 4 5 6] + +;; Non-list error (line 3018) +(raze (dict [a] (list 1))) !- type + +;; ═════════════════════════════════════════════════════════════════════ +;; 20. ray_within_fn — range check (lines 3071-3100) +;; ═════════════════════════════════════════════════════════════════════ + +;; I64 vec (lines 3079-3083) +(within [1 5 10] [3 7]) -- [false true false] + +;; F64 vec (lines 3084-3088) +(within [1.0 5.0 10.0] [3.0 7.0]) -- [false true false] + +;; I32 vec (lines 3089-3093) +(within [1i 5i 10i] [3i 7i]) -- [false true false] + +;; Error: unsupported type (lines 3094-3096) +(within [true false true] [false true]) !- type + +;; Error: wrong input (line 3072) +(within 42 [1 10]) !- type + +;; ═════════════════════════════════════════════════════════════════════ +;; 21. ray_type_fn — type queries (lines 1711-1716) +;; ═════════════════════════════════════════════════════════════════════ + +;; null → 'null (line 1712) +(type null) -- 'null + +;; Atom types (lowercase for atoms) +(type 42) -- 'i64 +(type 3.14) -- 'f64 +(type true) -- 'b8 +(type "hi") -- 'str +(type 'foo) -- 'sym +(type 5i) -- 'i32 +(type 5h) -- 'i16 +;; Vec types (uppercase for vecs) +(type [1 2 3]) -- 'I64 +;; Composite types (uppercase) +(type (dict [a] (list 1))) -- 'DICT + +;; ═════════════════════════════════════════════════════════════════════ +;; 22. ray_group_fn — grouping paths (lines 2222-2697) +;; ═════════════════════════════════════════════════════════════════════ + +;; Empty vec (lines 2226-2232) +(count (group [])) -- 0 + +;; I64 vec (scalar path, lines 2484-2601) +(count (group [1 2 1 3 2])) -- 3 + +;; BOOL vec (lines 2525-2526 in scalar path) +(count (group [true false true false])) -- 2 + +;; F64 vec (lines 2527-2560 in scalar path) +(count (group [1.0 2.0 1.0 3.0])) -- 3 + +;; SYM vec (line 2519) +(count (group ['a 'b 'a 'c])) -- 3 + +;; I32 vec (lines 2521-2522) +(count (group [1i 2i 1i 3i])) -- 3 + +;; I16 vec (lines 2523-2524) +(count (group [1h 2h 1h 3h])) -- 3 + +;; LIST type grouping (lines 2256-2328) +(count (group (list 1 "a" 1 "a" 2))) -- 3 + +;; STR vec grouping (lines 2418-2478) +(count (group (as 'STR ["foo" "bar" "foo" "baz"]))) -- 3 + +;; Group type error (line 2223-2224) +(group 42) !- type + +;; ═════════════════════════════════════════════════════════════════════ +;; 23. GUID casts and operations (lines 1591-1634) +;; ═════════════════════════════════════════════════════════════════════ + +;; GUID already-GUID cast (line 1593) +(set g1 (as 'GUID "01234567-89ab-cdef-0123-456789abcdef")) +(type (as 'GUID g1)) -- 'guid + +;; Empty vec → GUID (line 1612-1613) +(count (as 'GUID [])) -- 0 + +;; List of GUID strings → GUID vec (lines 1615-1632) +(count (as 'GUID (list "01234567-89ab-cdef-0123-456789abcdef" "fedcba98-7654-3210-fedc-ba9876543210"))) -- 2 + +;; ═════════════════════════════════════════════════════════════════════ +;; 24. GUID atom concat (lines 2878-2884 / 2809-2811 / 2839-2840) +;; ═════════════════════════════════════════════════════════════════════ + +(set ga (as 'GUID "01234567-89ab-cdef-0123-456789abcdef")) +(set gb (as 'GUID "fedcba98-7654-3210-fedc-ba9876543210")) + +;; GUID atom + atom → 2-element vec +(count (concat ga gb)) -- 2 +(type (concat ga gb)) -- 'GUID ;; vec type → uppercase + +;; ═════════════════════════════════════════════════════════════════════ +;; 25. Temporal atom cast cross-paths — DATE/TIME/TIMESTAMP (lines 1428-1589) +;; ═════════════════════════════════════════════════════════════════════ + +;; DATE from U8 atom (line 1433) +(type (as 'DATE (as 'U8 10))) -- 'date + +;; TIME from U8 atom (line 1467) +(type (as 'TIME (as 'U8 10))) -- 'time + +;; TIMESTAMP from U8 atom (line 1506) +(type (as 'TIMESTAMP (as 'U8 10))) -- 'timestamp + +;; DATE vec cast (line 1458-1459) +(type (as 'DATE [0 1 2])) -- 'DATE + +;; TIME vec cast (line 1497-1498) +(type (as 'TIME [0 1000 2000])) -- 'TIME + +;; TIMESTAMP vec cast (line 1586-1587) +(type (as 'TIMESTAMP [0 1000 2000])) -- 'TIMESTAMP + +;; ═════════════════════════════════════════════════════════════════════ +;; 26. Temporal cast from TIMESTAMP atom (line 1439, 1473-1478) +;; Pre-epoch timestamps for floor-div / floor-mod correctness. +;; ═════════════════════════════════════════════════════════════════════ + +;; TIMESTAMP → DATE for pre-epoch (ns=-1 should give date -1, i.e. 1999-12-31) +(as 'I32 (as 'DATE (as 'TIMESTAMP -1))) -- -1i + +;; TIMESTAMP → TIME for pre-epoch (ns-of-day should still be positive) +;; ts_ns_in_day(-1) = NS_PER_DAY - 1 → ~86399999 ms +(> (as 'I32 (as 'TIME (as 'TIMESTAMP -1))) 0i) -- true + +;; ═════════════════════════════════════════════════════════════════════ +;; 27. cast_vec_relabel_compat — same-byte-rep casts (lines 823-830) +;; I64↔TIMESTAMP, I32↔DATE↔TIME: memcpy fast path. +;; ═════════════════════════════════════════════════════════════════════ + +;; I64 vec → TIMESTAMP vec (relabel) +(type (as 'TIMESTAMP [0 1 2])) -- 'TIMESTAMP + +;; TIMESTAMP vec → I64 vec (relabel) +(as 'I64 (as 'TIMESTAMP [0 1 2])) -- [0 1 2] + +;; I32 vec → DATE vec (relabel) +(type (as 'DATE [0i 1i 2i])) -- 'DATE + +;; DATE vec → I32 vec (relabel) +(as 'I32 (as 'DATE [0i 1i 2i])) -- [0i 1i 2i] + +;; I32 vec → TIME vec (relabel) +(type (as 'TIME [0i 1000i 2000i])) -- 'TIME + +;; DATE vec → TIME vec (relabel — I32-compatible) +(type (as 'TIME (as 'DATE [0i 1i 2i]))) -- 'TIME + +;; ═════════════════════════════════════════════════════════════════════ +;; 28. SYM atom → STR cast (line 1328-1330) +;; ═════════════════════════════════════════════════════════════════════ + +(as 'STR 'hello) -- "hello" + +;; ═════════════════════════════════════════════════════════════════════ +;; 29. Temporal atom → SYMBOL (ray_fmt path, lines 1412-1421) +;; ═════════════════════════════════════════════════════════════════════ + +(type (as 'SYMBOL (as 'DATE 100))) -- 'sym +(type (as 'SYMBOL (as 'TIME 5000))) -- 'sym +(type (as 'SYMBOL (as 'TIMESTAMP 0))) -- 'sym + +;; ═════════════════════════════════════════════════════════════════════ +;; 30. BOOL cast edge: DATE / TIME / TIMESTAMP → BOOL (lines 1315-1317) +;; ═════════════════════════════════════════════════════════════════════ + +(as 'BOOL (as 'DATE 1)) -- true +(as 'BOOL (as 'DATE 0)) -- false +(as 'BOOL (as 'TIME 1000)) -- true +(as 'BOOL (as 'TIME 0)) -- false +(as 'BOOL (as 'TIMESTAMP 1)) -- true +(as 'BOOL (as 'TIMESTAMP 0)) -- false + +;; ═════════════════════════════════════════════════════════════════════ +;; 31. I32/I16 atom from DATE/TIME atom (lines 1247, 1269) +;; ═════════════════════════════════════════════════════════════════════ + +(as 'I32 (as 'DATE 100)) -- 100i +(as 'I32 (as 'TIME 5000)) -- 5000i +(as 'I16 (as 'DATE 100)) -- 100h +(as 'I16 (as 'TIME 5000)) -- 5000h +(as 'I32 (as 'TIMESTAMP 100)) -- 100i +(as 'I16 (as 'TIMESTAMP 100)) -- 100h + +;; F64 from DATE/TIME/TIMESTAMP (lines 1291-1292) +(as 'F64 (as 'DATE 100)) -- 100.0 +(as 'F64 (as 'TIME 5000)) -- 5000.0 +(as 'F64 (as 'TIMESTAMP 100)) -- 100.0 + +;; U8 from DATE/TIME (through I32 conversion) +;; Not directly — U8 has no DATE/TIME path. Falls through to error. + +;; ═════════════════════════════════════════════════════════════════════ +;; 32. I64 from TIMESTAMP atom (line 1222) +;; ═════════════════════════════════════════════════════════════════════ + +(as 'I64 (as 'TIMESTAMP 12345)) -- 12345 + +;; ═════════════════════════════════════════════════════════════════════ +;; 33. concat with I16/I32/SYM/BOOL/U8/GUID atoms (lines 2796-2813, 2826-2840, 2853-2884) +;; ═════════════════════════════════════════════════════════════════════ + +;; I32 atom + I32 vec +(concat 0i [1i 2i 3i]) -- [0i 1i 2i 3i] + +;; I32 vec + I32 atom +(concat [1i 2i] 3i) -- [1i 2i 3i] + +;; I16 atom + I16 vec +(concat 0h [1h 2h 3h]) -- [0h 1h 2h 3h] + +;; I16 vec + I16 atom +(concat [1h 2h] 3h) -- [1h 2h 3h] + +;; BOOL atom + BOOL vec +(concat true [false true]) -- [true false true] + +;; BOOL vec + BOOL atom +(concat [true false] true) -- [true false true] + +;; SYM atom + SYM vec +(count (concat 'a ['b 'c])) -- 3 + +;; SYM vec + SYM atom +(count (concat ['a 'b] 'c)) -- 3 + +;; I32 atom + I32 atom → 2-element vec +(concat 1i 2i) -- [1i 2i] + +;; I16 atom + I16 atom → 2-element vec +(concat 1h 2h) -- [1h 2h] + +;; BOOL atom + BOOL atom → 2-element vec +(concat true false) -- [true false] + +;; F64 atom + F64 vec +(concat 0.0 [1.0 2.0]) -- [0.0 1.0 2.0] + +;; F64 vec + F64 atom +(concat [1.0 2.0] 3.0) -- [1.0 2.0 3.0] + +;; ═════════════════════════════════════════════════════════════════════ +;; 34. format with DATE/TIME atoms (ray_fmt fallback inside fmt_interpolate) +;; ═════════════════════════════════════════════════════════════════════ + +(type (as 'DATE "2024.01.15")) -- 'date +(type (as 'TIME "12:30:45")) -- 'time + +;; ═════════════════════════════════════════════════════════════════════ +;; 35. GUID GUID vec operations (group, concat) +;; ═════════════════════════════════════════════════════════════════════ + +;; GUID vec grouping (lines 2335-2415) +(set gvec (as 'GUID (list "01234567-89ab-cdef-0123-456789abcdef" "fedcba98-7654-3210-fedc-ba9876543210" "01234567-89ab-cdef-0123-456789abcdef"))) +(count (group gvec)) -- 2 + +;; GUID atom + vec, vec + atom +(set gv2 (as 'GUID (list "01234567-89ab-cdef-0123-456789abcdef"))) +(count (concat ga gv2)) -- 2 +(count (concat gv2 gb)) -- 2 + +;; ═════════════════════════════════════════════════════════════════════ +;; 36. GUID enlist — homogeneous GUID atoms (lines 1911-1917) +;; ═════════════════════════════════════════════════════════════════════ + +(count (enlist ga gb)) -- 2 +(type (enlist ga gb)) -- 'GUID + +;; ═════════════════════════════════════════════════════════════════════ +;; 37. ray_cast_fn — STR atom via ray_fmt fallback (lines 1354-1358) +;; Casting a DATE or other non-standard atom to STR. +;; ═════════════════════════════════════════════════════════════════════ + +(type (as 'STR (as 'DATE 100))) -- 'str +(type (as 'STR (as 'TIME 5000))) -- 'str +(type (as 'STR (as 'TIMESTAMP 0))) -- 'str + +;; ═════════════════════════════════════════════════════════════════════ +;; 38. Group with null elements — null routing (lines 2498-2517) +;; Scalar group path: nullable I64 vec with null values. +;; ═════════════════════════════════════════════════════════════════════ + +;; Group I64 vec with nulls: nulls form their own group +(count (group [1 0Nl 2 0Nl 1])) -- 3 + +;; Group F64 vec with null: null values merge into one null group +;; (null routing path, lines 2498-2517, not NaN path) +(set _gnan (group [1.0 0Nf 2.0 0Nf])) +;; 1.0, 2.0, and one null group = 3 groups total +(count _gnan) -- 3 + +;; Group I32 vec with nulls +(count (group [1i 0Ni 2i 0Ni 1i])) -- 3 + +;; Group I16 vec with nulls +(count (group [1h 0Nh 2h 0Nh 1h])) -- 3 + +;; Group F64 vec (scalar path): exercises the F64 IEEE-754 bit-pattern +;; hash path including -0.0 canonicalization (line 2559) +(count (group [0.0 1.0 0.0 2.0])) -- 3 + +;; ═════════════════════════════════════════════════════════════════════ +;; 39. Dict concat with I64 keys (lines 2907-2910) +;; ═════════════════════════════════════════════════════════════════════ + +;; Dict with I64 keys: exercises the I64/TIMESTAMP key dispatch path +(set _d1 (dict [1 2] (list "a" "b"))) +(set _d2 (dict [2 3] (list "c" "d"))) +(at (concat _d1 _d2) 3) -- "d" +(at (concat _d1 _d2) 2) -- "c" + +;; ═════════════════════════════════════════════════════════════════════ +;; 40. Group null key sentinel propagation (lines 2654-2675) +;; When source vec has nulls, group keys should carry the null marker. +;; ═════════════════════════════════════════════════════════════════════ + +;; I64 vec group with null — check null key exists +(set _gnull (group [1 0Nl 2])) +(count _gnull) -- 3 + +;; F64 vec group with -0.0 and +0.0 (should merge, line 2559) +;; Can't embed (neg 0.0) in a vec literal; use list + cast instead +(set _negzero (neg 0.0)) +(set _fv (as 'F64 (list 0.0 _negzero 1.0))) +(count (group _fv)) -- 2 + +;; ═════════════════════════════════════════════════════════════════════ +;; 41. Dict concat with LIST keys (line 2901-2902) +;; ═════════════════════════════════════════════════════════════════════ + +;; Dict with SYM keys concat — exercises the SYM key dispatch (line 2903-2906) +(set _dl1 (dict ['a 'b] (list 1 2))) +(set _dl2 (dict ['b 'c] (list 3 4))) +(at (concat _dl1 _dl2) 'c) -- 4 + +;; ═════════════════════════════════════════════════════════════════════ +;; 42. concat error paths — unsupported type combos +;; ═════════════════════════════════════════════════════════════════════ + +;; Table + non-table → type error (line 3008) +(concat (table [a] (list [1])) 42) !- type + +;; ═════════════════════════════════════════════════════════════════════ +;; 43. Raze slow path — mixed-type list items (lines 3059-3067) +;; ═════════════════════════════════════════════════════════════════════ + +;; List of different-typed vecs → slow path pairwise concat +(count (raze (list [1 2] [1.0 2.0]))) -- 4 + +;; List with SYM vecs → slow path (SYM excluded from fast path) +(count (raze (list ['a 'b] ['c 'd]))) -- 4 + +;; ═════════════════════════════════════════════════════════════════════ +;; 44. println returns null — via format string mode check +;; println with format string writes to stdout (can't assert output), +;; but we can assert it returns null. +;; ═════════════════════════════════════════════════════════════════════ + +;; println with format string mode (lines 238-249) +(nil? (println "hello % world" 42)) -- true + +;; println with multiple args, no format (lines 252-253) +(nil? (println 1 2 3)) -- true + +;; print (like println without newline) +(nil? (print "x=%" 42)) -- true +(nil? (print 1 2)) -- true + +;; show +(nil? (show 42)) -- true +(nil? (show [1 2 3])) -- true + +;; ═════════════════════════════════════════════════════════════════════ +;; 45. println typed null atoms — exercise null_literal_str via println +;; These print to stdout; we verify they don't error by checking nil?. +;; ═════════════════════════════════════════════════════════════════════ + +(nil? (println 0Nf)) -- true +(nil? (println 0Nd)) -- true +(nil? (println 0Nt)) -- true +(nil? (println 0Np)) -- true +(nil? (println 0Ns)) -- true +(nil? (println 0Nh)) -- true +(nil? (println 0Ni)) -- true + +;; ═════════════════════════════════════════════════════════════════════ +;; 46. Temporal vec casts through generic path (LIST input) +;; ═════════════════════════════════════════════════════════════════════ + +;; DATE vec from list (goes through generic path) +(type (as 'DATE (list 0 1 2))) -- 'DATE + +;; TIME vec from list +(type (as 'TIME (list 0 1000 2000))) -- 'TIME + +;; TIMESTAMP vec from list +(type (as 'TIMESTAMP (list 0 1000 2000))) -- 'TIMESTAMP + +;; U8 vec from list +(type (as 'U8 (list 1 2 3))) -- 'U8 + +;; I16 vec from list +(type (as 'I16 (list 1 2 3))) -- 'I16 + +;; I32 vec from list +(type (as 'I32 (list 1 2 3))) -- 'I32 + +;; BOOL vec from list +(type (as 'BOOL (list 1 0 1))) -- 'B8 + +;; ═════════════════════════════════════════════════════════════════════ +;; 47. Enlist GUID with null atoms (lines 1922-1926) +;; ═════════════════════════════════════════════════════════════════════ + +;; GUID enlist +(set ga2 (as 'GUID "01234567-89ab-cdef-0123-456789abcdef")) +(set gb2 (as 'GUID "fedcba98-7654-3210-fedc-ba9876543210")) +(count (enlist ga2 gb2)) -- 2 + +;; ═════════════════════════════════════════════════════════════════════ +;; 48. Format with GUID atom — exercises ray_fmt fallback +;; ═════════════════════════════════════════════════════════════════════ + +(format "guid=%" (as 'GUID "01234567-89ab-cdef-0123-456789abcdef")) -- "guid=01234567-89ab-cdef-0123-456789abcdef" + +;; ═════════════════════════════════════════════════════════════════════ +;; 49. Cast type-error paths — unsupported source types for each target +;; ═════════════════════════════════════════════════════════════════════ + +;; STR cast of unsupported type → type error (line 1381) +(as 'STR (table [a] (list [1]))) !- type + +;; SYMBOL cast of unsupported type → type error (line 1426) +(as 'SYMBOL (table [a] (list [1]))) !- type + +;; DATE cast of unsupported type → type error (line 1460) +(as 'DATE (dict [a] (list 1))) !- type + +;; TIME cast of unsupported type → type error (line 1499) +(as 'TIME (dict [a] (list 1))) !- type + +;; TIMESTAMP cast of unsupported type → type error (line 1588) +(as 'TIMESTAMP (dict [a] (list 1))) !- type + +;; GUID cast of unsupported type → type error (line 1633) +(as 'GUID 42) !- type + +;; DICT cast of unsupported type → type error (line 1676) +(as 'DICT [1 2 3]) !- type + +;; TABLE cast of unsupported type → type error (line 1702) +(as 'TABLE [1 2 3]) !- type + +;; TABLE from dict with non-SYM keys → type error (line 1688) +(as 'TABLE (dict [1 2] (list "a" "b"))) !- type + +;; ═════════════════════════════════════════════════════════════════════ +;; 50. BOOL vec → BOOL vec cast (line 922 in cast_range_worker) +;; and U8 vec → BOOL vec cast (line 944) +;; ═════════════════════════════════════════════════════════════════════ + +;; BOOL vec → BOOL: identity-like (passthrough or relabel) +(as 'BOOL [true false true]) -- [true false true] + +;; U8 vec → BOOL vec (line 944) +(as 'BOOL (as 'U8 [0 1 2])) -- [false true true] + +;; BOOL vec → U8 vec (line 922) +(type (as 'U8 [true false true])) -- 'U8 + +;; ═════════════════════════════════════════════════════════════════════ +;; 51. Dict vals as single atom — scalar val (line 1963) +;; ═════════════════════════════════════════════════════════════════════ + +;; dict where vals is a single atom (not list/vec) — broadcasts to all keys +(at (dict [a b] 42) 'a) -- 42 +(at (dict [a b] 42) 'b) -- 42 + +;; ═════════════════════════════════════════════════════════════════════ +;; 52. Enlist default → as_list (line 1919) +;; Homogeneous temporal atoms don't have an explicit switch case. +;; ═════════════════════════════════════════════════════════════════════ + +;; DATE atoms → I32 switch branch (RAY_DATE maps to I32 case in enlist) +(type (enlist (as 'DATE 0) (as 'DATE 1))) -- 'DATE +;; Heterogeneous temporal atoms → as_list via default goto +;; Mix DATE + TIME (different atom types) → heterogeneous list +(type (enlist (as 'DATE 0) (as 'TIME 100))) -- 'LIST + +;; ═════════════════════════════════════════════════════════════════════ +;; 53. I64 vec with null → TIMESTAMP vec: null propagation (line 783) +;; ═════════════════════════════════════════════════════════════════════ + +(nil? (at (as 'TIMESTAMP [0 0Nl 2]) 1)) -- true + +;; ═════════════════════════════════════════════════════════════════════ +;; 54. F64 vec with null → I64: null sentinel propagation (line 783) +;; ═════════════════════════════════════════════════════════════════════ + +(nil? (at (as 'I64 (as 'F64 [1.0 0Nf 3.0])) 1)) -- true + +;; ═════════════════════════════════════════════════════════════════════ +;; UNREACHABLE BRANCHES (documented): +;; +;; Line 68: RAY_F32 case in null_literal_str — F32 typed null atom is +;; not directly constructible from RFL syntax. No `0Ne` parser literal +;; exists; (as 'F32 ...) is not a valid cast target. +;; +;; Lines 84, 351-355: Lazy materialization paths — RFL-visible APIs +;; always return materialized values by the time they reach println +;; or resolve. Lazy handles are internal to the query engine. +;; +;; Lines 141-142: ray_lang_print ray_fmt failure fallback — ray_fmt +;; would need to return an error for a valid ray_t*, which doesn't +;; happen in normal execution. +;; +;; Lines 173, 202, 215-216: fmt_interpolate error/failure fallbacks — +;; require ray_fmt to fail or error args, not triggerable from RFL. +;; +;; Lines 407-414: resolve all_user_sym=true SYM-conversion — requires +;; an I64 vec whose elements are valid multi-char user-sym IDs. +;; Not constructible from RFL (as 'I64 ['foo]) errors. +;; +;; Lines 447: ray_exit_fn return after exit() — unreachable by definition. +;; +;; Lines 497-498, 550-551, 586-592, 644, 668-669, 710-712, 725-726: +;; CSV read/write/splayed/parted function error paths — require +;; specific file system setup or malformed CSV files. +;; +;; Lines 757, 1207: OOM error guards in cast functions — unreachable +;; without memory pressure. +;; +;; Lines 898-955: cast_range_worker switch cases that are only reached +;; for unsupported (in_type, out_type) pairs or specific type combos +;; not exercisable from simple RFL casts. +;; +;; Lines 1065, 1085: Cancel check paths in cast_vec_numeric_fast — +;; require concurrent cancellation signal. +;; +;; Lines 2487-2489, 2504-2508, 2545-2551, 2579-2583, 2593-2597: +;; OOM guards in group_fn hash table operations. +;; +;; Lines 2638-2643: F32 group key reconstruction — F32 not directly +;; constructible from RFL. +;; +;; Lines 2691-2696: gfail label — error cleanup path, only on OOM. +;; +;; Lines 2813, 2842, 2885: concat default type error for exotic types. +;; +;; Lines 2902-2925: Dict concat with LIST/I64 keys — partially covered +;; by test 39/41 above; some sub-paths need non-SYM boxed keys. +;; ═════════════════════════════════════════════════════════════════════ diff --git a/test/rfl/ops/filter_branch_cov.rfl b/test/rfl/ops/filter_branch_cov.rfl new file mode 100644 index 00000000..6665b148 --- /dev/null +++ b/test/rfl/ops/filter_branch_cov.rfl @@ -0,0 +1,331 @@ +;; Branch-coverage tests for src/ops/filter.c. +;; +;; Targets uncovered branches at lines: +;; parted_gather_col : 36 (n_segs==0), 58 (segment cursor advance) +;; exec_filter_vec : 74 (alloc fail — unreachable), 82 (length mismatch) +;; exec_filter_parted_vec: 113, 116, 118, 135, 142 +;; exec_filter_seq : 184, 187, 191, 201 +;; exec_filter : 211, 212, 235, 240, 246, 265, 276, 298, +;; 309, 333, 347, 356, 358, 362, 371, 373, 386 +;; exec_filter_head : parted I64/STR/SYM gather paths +;; sel_compact : parted gather path (has_parted=true branches) +;; +;; Key routing: +;; select {from: T where: ...} → lazy rowsel → sel_compact at boundary ops +;; select {from: T where: ... take: N} → exec_filter_head +;; HAVING: select {from: (select {agg from: T by: k}) where: ...} +;; → exec_filter → exec_filter_seq (small tables) + +;; ════════════════════════════════════════════════════════════════════ +;; Pre-flight cleanup +;; ════════════════════════════════════════════════════════════════════ +(.sys.exec "rm -rf /tmp/rfl_fc_i64 /tmp/rfl_fc_sym /tmp/rfl_fc_str /tmp/rfl_fc_f64 /tmp/rfl_fc_mix /tmp/rfl_fc_multi /tmp/rfl_fc_big") + +;; ════════════════════════════════════════════════════════════════════ +;; 1. sel_compact on parted I64 table +;; Exercises: sel_compact has_parted=true → parted_gather_col (I64) +;; Covers lines: sel_compact 628-637 (parted basetype), 645-666 +;; parted_gather_col 37-67 (full gather loop) +;; ════════════════════════════════════════════════════════════════════ +(set S1A (table [v] (list [10 20 30]))) +(set S1B (table [v] (list [40 50 60 70]))) +(set S1C (table [v] (list [80 90]))) +(.db.splayed.set "/tmp/rfl_fc_i64/2024.01.01/t/" S1A) +(.db.splayed.set "/tmp/rfl_fc_i64/2024.01.02/t/" S1B) +(.db.splayed.set "/tmp/rfl_fc_i64/2024.01.03/t/" S1C) +(set Pi64 (.db.parted.get "/tmp/rfl_fc_i64/" 't)) +(count Pi64) -- 9 + +;; Mixed-pass filter on parted I64 — parted_gather_col walks segments +(count (select {from: Pi64 where: (> v 30)})) -- 6 +(sum (at (select {from: Pi64 where: (> v 30)}) 'v)) -- 390 + +;; Filter that spans segment boundary — parted_gather_col L58 +;; Rows 2 (v=30) and 3 (v=40) straddle seg boundary +(count (select {from: Pi64 where: (and (>= v 30) (<= v 50))})) -- 3 +(sum (at (select {from: Pi64 where: (and (>= v 30) (<= v 50))}) 'v)) -- 120 + +;; All-pass on parted → sel_compact all-pass fast path (L528) +(count (select {from: Pi64 where: (> v 0)})) -- 9 +(sum (at (select {from: Pi64 where: (> v 0)}) 'v)) -- 450 + +;; None-pass on parted → sel_compact none-pass path (L531-548) +(count (select {from: Pi64 where: (> v 999)})) -- 0 + +;; Single row from last segment +(count (select {from: Pi64 where: (== v 90)})) -- 1 +(first (at (select {from: Pi64 where: (== v 90)}) 'v)) -- 90 + +;; ════════════════════════════════════════════════════════════════════ +;; 2. sel_compact on parted SYM table +;; Exercises: sel_compact has_parted=true with SYM base type +;; Covers lines: sel_compact 631-635 (SYM attrs from parted), +;; parted_gather_col 39-41 (SYM esz) +;; ════════════════════════════════════════════════════════════════════ +(set S2A (table [k v] (list ['aa 'bb 'cc] [1 2 3]))) +(set S2B (table [k v] (list ['dd 'ee] [4 5]))) +(.db.splayed.set "/tmp/rfl_fc_sym/2024.01.01/t/" S2A) +(.db.splayed.set "/tmp/rfl_fc_sym/2024.01.02/t/" S2B) +(set Psym (.db.parted.get "/tmp/rfl_fc_sym/" 't)) +(count Psym) -- 5 + +;; Filter by numeric column, read back SYM column — parted SYM gather +(count (select {from: Psym where: (> v 2)})) -- 3 +(first (at (select {from: Psym where: (> v 2)}) 'k)) -- 'cc + +;; Filter selecting from both segments — SYM segment crossing +(count (select {from: Psym where: (and (>= v 2) (<= v 4))})) -- 3 + +;; ════════════════════════════════════════════════════════════════════ +;; 3. sel_compact on parted STR table +;; Exercises: sel_compact has_parted=true with STR base type +;; Covers lines: sel_compact 650-655 (pbase==RAY_STR → +;; parted_gather_str_rows), 704-709 (STR propagation) +;; ════════════════════════════════════════════════════════════════════ +(set S3A (table [s v] (list (list "apple" "banana" "cherry") [1 2 3]))) +(set S3B (table [s v] (list (list "date" "elderberry") [4 5]))) +(.db.splayed.set "/tmp/rfl_fc_str/2024.01.01/t/" S3A) +(.db.splayed.set "/tmp/rfl_fc_str/2024.01.02/t/" S3B) +(set Pstr (.db.parted.get "/tmp/rfl_fc_str/" 't)) +(count Pstr) -- 5 + +;; Filter by numeric col, gather STR column from parted +(count (select {from: Pstr where: (> v 2)})) -- 3 +(count (select {from: Pstr where: (>= v 4)})) -- 2 + +;; ════════════════════════════════════════════════════════════════════ +;; 4. sel_compact on parted F64 table +;; Exercises: parted_gather_col for F64 base type +;; ════════════════════════════════════════════════════════════════════ +(set S4A (table [f v] (list [1.5 2.5 3.5] [1 2 3]))) +(set S4B (table [f v] (list [4.5 5.5] [4 5]))) +(.db.splayed.set "/tmp/rfl_fc_f64/2024.01.01/t/" S4A) +(.db.splayed.set "/tmp/rfl_fc_f64/2024.01.02/t/" S4B) +(set Pf64 (.db.parted.get "/tmp/rfl_fc_f64/" 't)) +(count Pf64) -- 5 + +;; F64 parted gather +(count (select {from: Pf64 where: (> v 3)})) -- 2 +(sum (at (select {from: Pf64 where: (> v 3)}) 'f)) -- 10.0 + +;; ════════════════════════════════════════════════════════════════════ +;; 5. sel_compact on parted table with mixed column types +;; Exercises: parted_gather_col for each type + str_pool propagation +;; Covers: sel_compact 701-714 (str_pool propagation for parted +;; cols, null propagation for flat cols) +;; ════════════════════════════════════════════════════════════════════ +(set S5A (table [i f s v] (list [1 2 3] [1.0 2.0 3.0] ['x 'y 'z] [10 20 30]))) +(set S5B (table [i f s v] (list [4 5] [4.0 5.0] ['a 'b] [40 50]))) +(.db.splayed.set "/tmp/rfl_fc_mix/2024.01.01/t/" S5A) +(.db.splayed.set "/tmp/rfl_fc_mix/2024.01.02/t/" S5B) +(set Pmix (.db.parted.get "/tmp/rfl_fc_mix/" 't)) +(count Pmix) -- 5 + +;; Mixed-type parted filter — all column types gathered +(count (select {from: Pmix where: (> v 20)})) -- 3 +(sum (at (select {from: Pmix where: (> v 20)}) 'i)) -- 12 +(sum (at (select {from: Pmix where: (> v 20)}) 'f)) -- 12.0 + +;; ════════════════════════════════════════════════════════════════════ +;; 6. exec_filter_head on parted table (select-where-take) +;; Exercises: exec_filter_head parted gather (lines 460-484) +;; The parted I64/STR gather and segment cursor advance in +;; exec_filter_head are separate from sel_compact's. +;; ════════════════════════════════════════════════════════════════════ + +;; exec_filter_head with parted I64 gather +(count (select {from: Pi64 where: (> v 20) take: 3})) -- 3 +(sum (at (select {from: Pi64 where: (> v 20) take: 3}) 'v)) -- 120 +(count (select {from: Pi64 where: (> v 20) take: 100})) -- 7 + +;; exec_filter_head on parted STR +(count (select {from: Pstr where: (> v 2) take: 2})) -- 2 + +;; exec_filter_head on parted SYM +(count (select {from: Psym where: (> v 2) take: 2})) -- 2 + +;; exec_filter_head with parted mixed types +(count (select {from: Pmix where: (> v 10) take: 2})) -- 2 + +;; exec_filter_head take=0 — empty result +(count (select {from: Pi64 where: (> v 0) take: 0})) -- 0 + +;; exec_filter_head none-pass +(count (select {from: Pi64 where: (> v 999) take: 5})) -- 0 + +;; ════════════════════════════════════════════════════════════════════ +;; 7. HAVING — exec_filter → exec_filter_seq on small group result +;; Exercises: exec_filter L208-228 (entry, pass_count scan), +;; exec_filter L227-228 (vector filter — non-table input), +;; exec_filter_seq L181-206 (sequential table filter) +;; The group result is a small flat table, hitting exec_filter_seq. +;; ════════════════════════════════════════════════════════════════════ + +;; HAVING on parted group result — filter on aggregated columns +;; The group result table is flat (not parted), so this exercises +;; exec_filter_seq → exec_filter_vec for each result column. +(set Rh (select {from: (select {s: (sum v) from: Pi64 by: date}) where: (> s 100)})) +(count Rh) -- 2 + +;; HAVING with none-pass predicate +(count (select {from: (select {s: (sum v) from: Pi64 by: date}) where: (> s 9999)})) -- 0 + +;; HAVING with all-pass predicate +(count (select {from: (select {s: (sum v) from: Pi64 by: date}) where: (> s 0)})) -- 3 + +;; ════════════════════════════════════════════════════════════════════ +;; 8. HAVING on large group result — exec_filter parallel paths +;; With >65536 rows, exec_filter uses parallel gather. +;; Exercises: exec_filter L231-265 (parallel setup, match_idx build), +;; L329-342 (multi_gather_fn dispatch) +;; ════════════════════════════════════════════════════════════════════ + +;; Build a 70000-row table grouped to 70000 groups (1 row each), +;; then HAVING on the aggregated column. The group result is a +;; 70000-row flat table. +(set Tbig (table [k v] (list (til 70000) (til 70000)))) + +;; HAVING on 70000-row group result → exec_filter parallel paths +(count (select {from: (select {s: (sum v) from: Tbig by: k}) where: (> s 69990)})) -- 9 + +;; HAVING keeping ~half — stresses multi_gather_fn for moderate pass_count +(count (select {from: (select {s: (sum v) from: Tbig by: k}) where: (>= s 35000)})) -- 35000 + +;; ════════════════════════════════════════════════════════════════════ +;; 9. Multi-segment parted table — parted_gather_col segment advance +;; Exercises: parted_gather_col L54-57 (segment cursor advance), +;; L58 (row >= seg_ends[seg] condition), +;; L60-63 (local_row calculation across segments) +;; ════════════════════════════════════════════════════════════════════ +(set M1 (table [v] (list [1 2]))) +(set M2 (table [v] (list [3 4 5]))) +(set M3 (table [v] (list [6 7 8 9]))) +(.db.splayed.set "/tmp/rfl_fc_multi/2024.01.01/t/" M1) +(.db.splayed.set "/tmp/rfl_fc_multi/2024.01.02/t/" M2) +(.db.splayed.set "/tmp/rfl_fc_multi/2024.01.03/t/" M3) +(set Pmulti (.db.parted.get "/tmp/rfl_fc_multi/" 't)) +(count Pmulti) -- 9 + +;; Match from each segment — forces segment cursor to advance +(count (select {from: Pmulti where: (or (== v 1) (or (== v 5) (== v 9)))})) -- 3 +(sum (at (select {from: Pmulti where: (or (== v 1) (or (== v 5) (== v 9)))}) 'v)) -- 15 + +;; Match from only last segment — cursor must skip to seg 2 +(count (select {from: Pmulti where: (> v 7)})) -- 2 +(sum (at (select {from: Pmulti where: (> v 7)}) 'v)) -- 17 + +;; Match from only first segment — cursor stays at seg 0 +(count (select {from: Pmulti where: (< v 3)})) -- 2 + +;; Alternating matches across segments +(count (select {from: Pmulti where: (== (% v 2) 0)})) -- 4 +(sum (at (select {from: Pmulti where: (== (% v 2) 0)}) 'v)) -- 20 + +;; exec_filter_head across multiple segments +(count (select {from: Pmulti where: (> v 0) take: 5})) -- 5 +(sum (at (select {from: Pmulti where: (> v 0) take: 5}) 'v)) -- 15 + +;; ════════════════════════════════════════════════════════════════════ +;; 10. sel_compact MAPCOMMON materialization +;; When a parted table has a MAPCOMMON column (the date partition +;; key), sel_compact L600-627 materializes it via row-index lookup. +;; The parted_gather_col path handles the data columns. +;; ════════════════════════════════════════════════════════════════════ + +;; The MAPCOMMON 'date' column is materialized alongside parted 'v' +;; in the same sel_compact call. +(count (select {from: Pi64 where: (and (> v 20) (< v 80))})) -- 5 +(sum (at (select {from: Pi64 where: (and (> v 20) (< v 80))}) 'v)) -- 250 + +;; ════════════════════════════════════════════════════════════════════ +;; 11. sel_compact str_pool propagation for non-STR parted columns +;; Exercises: sel_compact L701-714 — parted non-STR path calls +;; col_propagate_str_pool_parted; flat columns call +;; col_propagate_str_pool + col_propagate_nulls_gather. +;; SYM columns carry sym-pool metadata that needs propagation. +;; ════════════════════════════════════════════════════════════════════ + +;; SYM parted column — triggers col_propagate_str_pool_parted (L706-708) +(first (at (select {from: Psym where: (== v 5)}) 'k)) -- 'ee + +;; ════════════════════════════════════════════════════════════════════ +;; 12. Chained filter on parted — sel_compact reads compacted flat tbl +;; Double filter on parted table — second filter's sel_compact +;; operates on a flat (already-compacted) table. +;; ════════════════════════════════════════════════════════════════════ +(count (select {from: (select {from: Pi64 where: (> v 20)}) where: (< v 80)})) -- 5 + +;; ════════════════════════════════════════════════════════════════════ +;; 13. Larger parted table to exercise morsel boundaries in sel_compact +;; RAY_MORSEL_ELEMS=1024. A parted table with >1024 total rows +;; exercises multi-morsel rowsel walking in sel_compact. +;; ════════════════════════════════════════════════════════════════════ +(set B1 (table [v] (list (til 600)))) +(set B2 (table [v] (list (+ (til 600) 600)))) +(.db.splayed.set "/tmp/rfl_fc_big/2024.01.01/t/" B1) +(.db.splayed.set "/tmp/rfl_fc_big/2024.01.02/t/" B2) +(set Pbig (.db.parted.get "/tmp/rfl_fc_big/" 't)) +(count Pbig) -- 1200 + +;; Filter spanning morsel boundary (1024) on parted table +(count (select {from: Pbig where: (> v 500)})) -- 699 +(count (select {from: Pbig where: (and (>= v 400) (< v 800))})) -- 400 +(sum (at (select {from: Pbig where: (and (>= v 400) (< v 800))}) 'v)) -- 239800 + +;; exec_filter_head on large parted table across morsel boundary +(count (select {from: Pbig where: (> v 100) take: 50})) -- 50 + +;; ════════════════════════════════════════════════════════════════════ +;; 14. HAVING with STR columns in group result +;; exec_filter_seq → exec_filter_vec on STR column +;; Exercises: exec_filter_vec on STR (col_esz path) +;; ════════════════════════════════════════════════════════════════════ +(set Ts (table [k v] (list ['a 'a 'b 'b 'c] [1 2 3 4 5]))) +(count (select {from: (select {s: (sum v) from: Ts by: k}) where: (> s 3)})) -- 2 + +;; ════════════════════════════════════════════════════════════════════ +;; 15. Parted SYM + STR in exec_filter_head +;; exec_filter_head L443-452: SYM type-dispatch with parted attrs +;; exec_filter_head L460-466: parted STR deep-copy +;; ════════════════════════════════════════════════════════════════════ + +;; exec_filter_head on parted SYM with take +(count (select {from: Psym where: (>= v 1) take: 3})) -- 3 +(first (at (select {from: Psym where: (>= v 1) take: 3}) 'k)) -- 'aa + +;; exec_filter_head on parted STR with take +(count (select {from: Pstr where: (>= v 1) take: 3})) -- 3 + +;; ════════════════════════════════════════════════════════════════════ +;; 16. exec_filter on vector input (non-table) +;; exec_filter L227-228: input->type != RAY_TABLE +;; This path is hit when filter is applied eagerly to a vector. +;; ════════════════════════════════════════════════════════════════════ + +;; Vector filter through select on a column vector directly +;; The builtin (filter v mask) goes through collection.c, not filter.c. +;; exec_filter on vector happens only through the graph executor. +;; This is already covered by the HAVING path (exec_filter_vec is +;; called per-column inside exec_filter_seq). + +;; ════════════════════════════════════════════════════════════════════ +;; 17. AND/OR composites on parted — complex predicates that create +;; diverse rowsel patterns (MIX segments in sel_compact) +;; ════════════════════════════════════════════════════════════════════ +(count (select {from: Pmulti where: (and (> v 2) (< v 8))})) -- 5 +(count (select {from: Pmulti where: (or (< v 3) (> v 7))})) -- 4 +(count (select {from: Pmulti where: (not (== v 5))})) -- 8 + +;; ════════════════════════════════════════════════════════════════════ +;; 18. Nested select on parted table — double sel_compact +;; First select-where creates lazy sel, boundary op compacts, +;; second select-where on compacted (flat) table. +;; ════════════════════════════════════════════════════════════════════ +(set R18 (select {from: (select {from: Pmix where: (> v 10)}) where: (< v 50)})) +(count R18) -- 3 +(sum (at R18 'v)) -- 90 + +;; ════════════════════════════════════════════════════════════════════ +;; Teardown +;; ════════════════════════════════════════════════════════════════════ +(.sys.exec "rm -rf /tmp/rfl_fc_i64 /tmp/rfl_fc_sym /tmp/rfl_fc_str /tmp/rfl_fc_f64 /tmp/rfl_fc_mix /tmp/rfl_fc_multi /tmp/rfl_fc_big") diff --git a/test/rfl/ops/fused_group_branch_cov.rfl b/test/rfl/ops/fused_group_branch_cov.rfl new file mode 100644 index 00000000..9dd2d9fd --- /dev/null +++ b/test/rfl/ops/fused_group_branch_cov.rfl @@ -0,0 +1,706 @@ +;; Branch coverage for src/ops/fused_group.c +;; +;; Targets uncovered branches at lines: +;; 44 ray_filtered_group: !g NULL guard (unreachable from RFL) +;; 60 ray_filtered_group: !ext NULL guard (unreachable from RFL) +;; 136-139 fp_atom_col_compatible: integer family subcombinations +;; 152 fp_col_supported: !col NULL guard +;; 158-169 fp_expr_const_str: various branches +;; 177-182 fp_check_simple_cmp: expr/element validation +;; 190,193 fp_check_simple_cmp: LHS/RHS validation +;; 204-208 fp_check_simple_cmp: col resolution, parted, SYM ordering +;; 221-236 fp_check_like: LIKE validation on STR/SYM +;; +;; Strategy: exercise the planner gate `ray_fused_group_supported` +;; with diverse predicate shapes and column types so both the "accept" +;; (fused) and "reject" (unfused fallback) branches fire. Both paths +;; must produce correct results. + +;; ===================================================================== +;; Section 1: fp_atom_col_compatible — integer family cross-products +;; +;; Lines 129-138: BOOL/U8/I16/I32/I64 columns against BOOL/U8/I16/I32/I64 +;; atom constants. The short-circuit || chain means each atom-type check +;; is a separate branch. We need queries with each column type tested +;; against each atom type that will reach fp_atom_col_compatible. +;; ===================================================================== + +;; --- BOOL column with integer literal comparisons --- +;; BOOL column stores 0/1 as uint8_t. EQ against integer literals. +;; The RFL literal `true` is type -RAY_BOOL, `1` is type -RAY_I64. +;; fp_atom_col_compatible(BOOL col, -RAY_I64 atom) → line 138 (last ||) +(set Tbcol (table [k v] (list [false true false true false true] (as 'I64 [1 2 3 4 5 6])))) +;; (== k 1) → atom type -RAY_I64 against BOOL column → hits line 138 +(set Rb1 (select {n: (count v) by: k from: Tbcol where: (== k 1)})) +(count Rb1) -- 1 +(sum (at Rb1 'n)) -- 3 + +;; (== k true) → atom type -RAY_BOOL against BOOL column → hits line 136 first || +(set Rb2 (select {n: (count v) by: k from: Tbcol where: (== k true)})) +(count Rb2) -- 1 +(sum (at Rb2 'n)) -- 3 + +;; --- U8 column with various atom types --- +;; Literal 5 is -RAY_I64. fp_atom_col_compatible(U8, -RAY_I64) → line 138 +(set Tu8col (table [k v] (list (as 'U8 [0 1 2 3 4 5]) (as 'I64 [10 20 30 40 50 60])))) +(set Ru81 (select {n: (count v) by: k from: Tu8col where: (== k 5)})) +(count Ru81) -- 1 +(sum (at Ru81 'n)) -- 1 + +;; --- I16 column with I64 literal --- +;; fp_atom_col_compatible(I16, -RAY_I64) → passes through lines 136-138 +(set Ti16col (table [k v] (list (as 'I16 [10 20 30 40 50 60]) (as 'I64 [1 2 3 4 5 6])))) +(set Ri161 (select {n: (count v) by: k from: Ti16col where: (== k 30)})) +(count Ri161) -- 1 +(sum (at Ri161 'n)) -- 1 + +;; --- I32 column with I64 literal --- +(set Ti32col (table [k v] (list (as 'I32 [100 200 300 400 500 600]) (as 'I64 [1 2 3 4 5 6])))) +(set Ri321 (select {n: (count v) by: k from: Ti32col where: (== k 300)})) +(count Ri321) -- 1 +(sum (at Ri321 'n)) -- 1 + +;; --- I64 column with I64 literal (most common) --- +(set Ti64col (table [k v] (list (as 'I64 [1000 2000 3000 4000 5000 6000]) (as 'I64 [1 2 3 4 5 6])))) +(set Ri641 (select {n: (count v) by: k from: Ti64col where: (== k 3000)})) +(count Ri641) -- 1 +(sum (at Ri641 'n)) -- 1 + +;; ===================================================================== +;; Section 2: fp_atom_col_compatible — SYM column +;; +;; Line 122: SYM col + -RAY_SYM atom || -RAY_STR atom +;; (== sym_col 'foo) → atom type -RAY_SYM → first || hits +;; (== sym_col "foo") → atom type -RAY_STR → second || hits +;; ===================================================================== + +(set Tsymcol (table [k v g] (list ['foo 'bar 'baz 'foo 'bar 'baz] (as 'I64 [1 2 3 4 5 6]) [0 0 0 1 1 1]))) + +;; SYM col vs SYM atom (line 122 first ||) +(set Rsym1 (select {n: (count v) by: g from: Tsymcol where: (== k 'foo)})) +(count Rsym1) -- 2 +(sum (at Rsym1 'n)) -- 2 + +;; SYM col vs STR atom (line 122 second ||) +(set Rsym2 (select {n: (count v) by: g from: Tsymcol where: (== k "bar")})) +(count Rsym2) -- 2 +(sum (at Rsym2 'n)) -- 2 + +;; ===================================================================== +;; Section 3: fp_atom_col_compatible — temporal columns +;; +;; Lines 123-128: DATE/TIME/TIMESTAMP columns with matching atom types. +;; Each temporal type only accepts its own atom type. +;; ===================================================================== + +;; DATE column + DATE atom (line 124) +(set Tdcol (table [k v g] (list (as 'date [2020.01.01 2020.01.02 2020.01.03 2020.01.04 2020.01.05 2020.01.06]) (as 'I64 [1 2 3 4 5 6]) [0 0 0 1 1 1]))) +(set Rd1 (select {n: (count v) by: g from: Tdcol where: (== k 2020.01.03)})) +(count Rd1) -- 1 +(sum (at Rd1 'n)) -- 1 + +;; TIME column + TIME atom (line 126) +(set Ttcol (table [k v g] (list (as 'time [00:00:01.000 00:00:02.000 00:00:03.000 00:00:04.000 00:00:05.000 00:00:06.000]) (as 'I64 [1 2 3 4 5 6]) [0 0 0 1 1 1]))) +(set Rt1 (select {n: (count v) by: g from: Ttcol where: (> k 00:00:03.000)})) +(count Rt1) -- 1 +(sum (at Rt1 'n)) -- 3 + +;; TIMESTAMP column + TIMESTAMP atom (line 128) +(set Ttscol (table [k v g] (list (as 'timestamp [2020.01.01D00:00:00.000000001 2020.01.01D00:00:00.000000002 2020.01.01D00:00:00.000000003 2020.01.01D00:00:00.000000004 2020.01.01D00:00:00.000000005 2020.01.01D00:00:00.000000006]) (as 'I64 [1 2 3 4 5 6]) [0 0 0 1 1 1]))) +(set Rts1 (select {n: (count v) by: g from: Ttscol where: (<= k 2020.01.01D00:00:00.000000003)})) +(count Rts1) -- 1 +(sum (at Rts1 'n)) -- 3 + +;; ===================================================================== +;; Section 4: fp_atom_col_compatible default → 0 (line 139-140) +;; +;; F32/F64/STR/GUID columns are not in the switch → default returns 0. +;; The fused path rejects this at fp_check_simple_cmp line 210-213. +;; The query falls back to the unfused path but must still be correct. +;; ===================================================================== + +;; F64 column with numeric WHERE — fused gate rejects F64 → unfused fallback +(set Tf64col (table [k v g] (list (as 'F64 [1.1 2.2 3.3 4.4 5.5 6.6]) (as 'I64 [1 2 3 4 5 6]) [0 0 0 1 1 1]))) +(set Rf641 (select {n: (count v) by: g from: Tf64col where: (> k 3.0)})) +(count Rf641) -- 2 +(sum (at Rf641 'n)) -- 4 + +;; STR column with LIKE — fused gate checks fp_check_like on STR col +;; STR LIKE should be accepted by fp_check_like line 236 (col->type == RAY_STR) +(set Tstrcol (table [k v g] (list ["apple" "banana" "cherry" "apricot" "blueberry" "cranberry"] (as 'I64 [1 2 3 4 5 6]) [0 0 0 1 1 1]))) +(set Rstr1 (select {n: (count v) by: g from: Tstrcol where: (like k "a*")})) +(count Rstr1) -- 2 +(sum (at Rstr1 'n)) -- 2 + +;; ===================================================================== +;; Section 5: fp_check_simple_cmp — ordering ops on SYM column +;; +;; Line 208: is_ord && ct == RAY_SYM → return -1 +;; SYM column with <, <=, >, >= → rejected by fused gate → unfused. +;; The query must still return correct results via unfused path. +;; ===================================================================== + +;; SYM column with ordering op → fused rejects, unfused handles +(set Tsymord (table [k v g] (list ['aa 'bb 'cc 'dd 'ee 'ff] (as 'I64 [1 2 3 4 5 6]) [0 0 0 1 1 1]))) +;; (> k 'bb) on SYM → ordering on SYM rejected → unfused fallback +(set Rsymord1 (select {n: (count v) by: g from: Tsymord where: (> k 'bb)})) +;; Unfused path handles SYM ordering; exact result depends on engine +;; comparison semantics (SYM IDs or strings). Just verify no crash. +(>= (count Rsymord1) 0) -- true + +;; ===================================================================== +;; Section 6: fp_col_supported — nullable column (line 153) +;; +;; A column with RAY_ATTR_HAS_NULLS → fp_col_supported returns 0. +;; The fused gate rejects → unfused path handles nulls correctly. +;; This exercises line 153 (col->attrs & RAY_ATTR_HAS_NULLS → 0). +;; ===================================================================== + +;; Nullable predicate column → fused gate rejects via fp_col_supported +(set Tnull (table [k v g] (list [1 0N 3 4 0N 6] (as 'I64 [10 20 30 40 50 60]) [0 0 0 1 1 1]))) +;; (== k 3) with nullable k → fp_col_supported returns 0 → unfused +(set Rnull1 (select {n: (count v) by: g from: Tnull where: (== k 3)})) +(count Rnull1) -- 1 +(sum (at Rnull1 'n)) -- 1 + +;; Nullable column with NE — null sentinel differs from 1, so unfused +;; path may count nulls as passing != 1. k=0N has stored sentinel != 1. +;; Rows passing: k=0N(g=0), k=3(g=0), k=4(g=1), k=0N(g=1), k=6(g=1) → 5 rows, 2 groups +(set Rnull2 (select {n: (count v) by: g from: Tnull where: (!= k 1)})) +(count Rnull2) -- 2 +(sum (at Rnull2 'n)) -- 5 + +;; ===================================================================== +;; Section 7: fp_check_like — LIKE on SYM column (lines 235-236) +;; +;; fp_check_like: col->type == RAY_SYM → return 1 (supported) +;; This exercises line 236 (col->type != RAY_STR && col->type != RAY_SYM) +;; when we test with a non-STR/non-SYM column → return 0. +;; Also exercises the acceptance path for SYM LIKE. +;; ===================================================================== + +;; SYM LIKE → accepted by fp_check_like (line 236 col->type == RAY_SYM) +(set Tsymlike (table [k v g] (list ['alpha 'beta 'gamma 'alpha 'beta 'gamma] (as 'I64 [1 2 3 4 5 6]) [0 0 0 1 1 1]))) +(set Rsymlike1 (select {n: (count v) by: g from: Tsymlike where: (like k "alpha*")})) +(count Rsymlike1) -- 2 +(sum (at Rsymlike1 'n)) -- 2 + +;; STR LIKE → accepted by fp_check_like (line 236 col->type == RAY_STR) +(set Tstrlike (table [k v g] (list ["hello" "world" "help" "hero" "wizard" "wonder"] (as 'I64 [1 2 3 4 5 6]) [0 0 0 1 1 1]))) +(set Rstrlike1 (select {n: (count v) by: g from: Tstrlike where: (like k "he*")})) +(count Rstrlike1) -- 2 +(sum (at Rstrlike1 'n)) -- 3 + +;; ===================================================================== +;; Section 8: fp_expr_const_str — concat pattern (lines 160-169) +;; +;; fp_expr_const_str recognizes (concat "a" "b") as a constant string +;; expression for the LIKE pattern. The branches at lines 160-169 +;; validate the (concat ...) list form. +;; +;; LIKE with a (concat ...) pattern on STR/SYM column exercises: +;; line 160: expr->type == RAY_LIST check +;; line 162: elems[0] is a SYM (the 'concat head) +;; line 165-166: head matches "concat" +;; line 168-169: recursive check on each concat arg +;; ===================================================================== + +;; LIKE with concat pattern on STR column +;; (like k (concat "he" "*")) → fp_expr_const_str → concat path +(set Rconcat1 (select {n: (count v) by: g from: Tstrlike where: (like k (concat "he" "*"))})) +(count Rconcat1) -- 2 +(sum (at Rconcat1 'n)) -- 3 + +;; LIKE with concat pattern on SYM column +(set Rconcat2 (select {n: (count v) by: g from: Tsymlike where: (like k (concat "al" "*"))})) +(count Rconcat2) -- 2 +(sum (at Rconcat2 'n)) -- 2 + +;; Nested concat: (concat "a" (concat "l" "*")) +(set Rconcat3 (select {n: (count v) by: g from: Tsymlike where: (like k (concat "a" (concat "l" "*")))})) +(count Rconcat3) -- 2 +(sum (at Rconcat3 'n)) -- 2 + +;; ===================================================================== +;; Section 9: fp_check_simple_cmp — various operator recognition +;; +;; Lines 185-187: fp_op_from_2char / fp_op_from_1char +;; These are exercised by using each comparison operator. +;; Targeting operators that may be underexercised: <=, >=, <, > +;; on different column types. +;; ===================================================================== + +;; LE on DATE column (exercises fp_op_from_2char with '<=' → FP_LE code 3) +(set Rdle (select {n: (count v) by: g from: Tdcol where: (<= k 2020.01.03)})) +(count Rdle) -- 1 +(sum (at Rdle 'n)) -- 3 + +;; GE on TIME column (exercises fp_op_from_2char with '>=' → FP_GE code 5) +(set Rtge (select {n: (count v) by: g from: Ttcol where: (>= k 00:00:04.000)})) +(count Rtge) -- 1 +(sum (at Rtge 'n)) -- 3 + +;; LT on TIMESTAMP column (exercises fp_op_from_1char with '<' → FP_LT code 2) +(set Rtslt (select {n: (count v) by: g from: Ttscol where: (< k 2020.01.01D00:00:00.000000004)})) +(count Rtslt) -- 1 +(sum (at Rtslt 'n)) -- 3 + +;; GT on I32 column (exercises fp_op_from_1char with '>' → FP_GT code 4) +;; Ti32col: k=[100,200,300,400,500,600]. k>300: rows k=400,500,600 → 3 groups. +(set Ri32gt (select {n: (count v) by: k from: Ti32col where: (> k 300)})) +(count Ri32gt) -- 3 +(sum (at Ri32gt 'n)) -- 3 + +;; NE on I16 column (exercises fp_op_from_2char with '!=' → FP_NE code 1) +(set Ri16ne (select {n: (count v) by: k from: Ti16col where: (!= k 30)})) +(count Ri16ne) -- 5 +(sum (at Ri16ne 'n)) -- 5 + +;; ===================================================================== +;; Section 10: fp_check_in — IN predicate validation +;; +;; Lines 247-271: fp_check_in validates IN predicates. +;; Exercise IN with different typed vectors on different column types. +;; ===================================================================== + +;; IN on BOOL column (fp_int_family BOOL → accepted) +(set Rbin (select {n: (count v) by: k from: Tbcol where: (in k [true])})) +(count Rbin) -- 1 +(sum (at Rbin 'n)) -- 3 + +;; IN on U8 column +(set Ru8in (select {n: (count v) by: k from: Tu8col where: (in k (as 'U8 [1 3 5]))})) +(count Ru8in) -- 3 +(sum (at Ru8in 'n)) -- 3 + +;; IN on I16 column +(set Ri16in (select {n: (count v) by: k from: Ti16col where: (in k (as 'I16 [20 40 60]))})) +(count Ri16in) -- 3 +(sum (at Ri16in 'n)) -- 3 + +;; IN on I32 column +(set Ri32in (select {n: (count v) by: k from: Ti32col where: (in k (as 'I32 [200 400 600]))})) +(count Ri32in) -- 3 +(sum (at Ri32in 'n)) -- 3 + +;; IN on DATE column +(set Rdin (select {n: (count v) by: g from: Tdcol where: (in k [2020.01.02 2020.01.04 2020.01.06])})) +(count Rdin) -- 2 +(sum (at Rdin 'n)) -- 3 + +;; IN on TIME column +(set Rtin (select {n: (count v) by: g from: Ttcol where: (in k [00:00:01.000 00:00:03.000 00:00:05.000])})) +(count Rtin) -- 2 +(sum (at Rtin 'n)) -- 3 + +;; IN on TIMESTAMP column +(set Rtsin (select {n: (count v) by: g from: Ttscol where: (in k [2020.01.01D00:00:00.000000001 2020.01.01D00:00:00.000000003 2020.01.01D00:00:00.000000005])})) +(count Rtsin) -- 2 +(sum (at Rtsin 'n)) -- 3 + +;; ===================================================================== +;; Section 11: AND compound predicates — mixed types in AND +;; +;; Lines 292-299: ray_fused_group_supported detects (and ...) and +;; validates each child via fp_check_simple_cmp || fp_check_like || fp_check_in. +;; Exercise AND with a mix of cmp/like/in children. +;; ===================================================================== + +;; AND with CMP + LIKE children +(set Tmix (table [k s v g] (list (as 'I64 [1 2 3 4 5 6]) ["alpha" "beta" "gamma" "apple" "banana" "cherry"] (as 'I64 [10 20 30 40 50 60]) [0 0 0 1 1 1]))) +;; (and (> k 2) (like s "a*")) → CMP + LIKE in AND +(set Rmix1 (select {n: (count v) by: g from: Tmix where: (and (> k 2) (like s "a*"))})) +;; k>2: rows 3,4,5,6. like s "a*": "gamma"→no, "apple"→yes, "banana"→no, "cherry"→no +;; Only row 4 (k=4, s="apple", g=1) passes → 1 group +(count Rmix1) -- 1 +(sum (at Rmix1 'n)) -- 1 + +;; AND with CMP + IN children +(set Rmix2 (select {n: (count v) by: g from: Tmix where: (and (> k 2) (in k [3 5]))})) +;; k>2: rows 3,4,5,6. in k [3,5]: k=3,k=5. Intersection: k=3(g=0),k=5(g=1) → 2 groups +(count Rmix2) -- 2 +(sum (at Rmix2 'n)) -- 2 + +;; AND with LIKE + IN children +(set Rmix3 (select {n: (count v) by: g from: Tmix where: (and (like s "a*") (in k [1 4]))})) +;; like s "a*": rows 1(alpha),4(apple). in k [1,4]: k=1,k=4. Intersection: k=1(g=0,s=alpha),k=4(g=1,s=apple) +(count Rmix3) -- 2 +(sum (at Rmix3 'n)) -- 2 + +;; 3-child AND with CMP + LIKE + IN — all three child-check paths +(set Rmix4 (select {n: (count v) by: g from: Tmix where: (and (> k 0) (like s "*a*") (in k [1 2 3 4 5 6]))})) +;; k>0: all. like s "*a*": "alpha","beta","gamma","apple","banana" (rows 1-5). in k [1..6]: all. +;; Intersection: rows 1-5 → 2 groups (g=0 has 3, g=1 has 2) +(count Rmix4) -- 2 +(sum (at Rmix4 'n)) -- 5 + +;; ===================================================================== +;; Section 12: ray_fused_group_supported — single-child fall-through +;; +;; Lines 303-305: single comparison (not AND) checked via fallthrough. +;; The `||` chain: fp_check_simple_cmp || fp_check_like || fp_check_in +;; ===================================================================== + +;; Single LIKE (not AND) → line 305 fp_check_like returns 1 +(set Rslike1 (select {n: (count v) by: g from: Tstrlike where: (like k "w*")})) +;; "world","wizard","wonder" → rows 2(g=0),5(g=1),6(g=1). 2 groups. +(count Rslike1) -- 2 +(sum (at Rslike1 'n)) -- 3 + +;; Single IN (not AND) → line 305 fp_check_in returns 1 +(set Rsin1 (select {n: (count v) by: k from: Ti64col where: (in k [1000 3000 5000])})) +(count Rsin1) -- 3 +(sum (at Rsin1 'n)) -- 3 + +;; ===================================================================== +;; Section 13: fp_eval_cmp range-fold branches +;; +;; Lines 789-800: when cval is out of the column's representable range, +;; the comparison is folded to constant true/false. +;; Exercise each fold case for different operators. +;; ===================================================================== + +;; U8 column (range 0-255): +;; (== k 300) → out of range above → FP_FOLD_FALSE → 0 rows +(set Rfold1 (select {n: (count v) by: k from: Tu8col where: (== k 300)})) +(count Rfold1) -- 0 + +;; (!= k 300) → out of range above → FP_FOLD_TRUE → all rows +(set Rfold2 (select {n: (count v) by: k from: Tu8col where: (!= k 300)})) +(count Rfold2) -- 6 +(sum (at Rfold2 'n)) -- 6 + +;; (< k 300) → above → FP_FOLD_TRUE → all rows +(set Rfold3 (select {n: (count v) by: k from: Tu8col where: (< k 300)})) +(count Rfold3) -- 6 +(sum (at Rfold3 'n)) -- 6 + +;; (> k 300) → above → FP_FOLD_FALSE → 0 rows +(set Rfold4 (select {n: (count v) by: k from: Tu8col where: (> k 300)})) +(count Rfold4) -- 0 + +;; (<= k 300) → above → FP_FOLD_TRUE → all rows +(set Rfold5 (select {n: (count v) by: k from: Tu8col where: (<= k 300)})) +(count Rfold5) -- 6 +(sum (at Rfold5 'n)) -- 6 + +;; (>= k 300) → above → FP_FOLD_FALSE → 0 rows +(set Rfold6 (select {n: (count v) by: k from: Tu8col where: (>= k 300)})) +(count Rfold6) -- 0 + +;; Below range: (< k -1) → below (cval=-1 < v_min=0) → FP_FOLD_FALSE +(set Rfold7 (select {n: (count v) by: k from: Tu8col where: (< k -1)})) +(count Rfold7) -- 0 + +;; (> k -1) → below → FP_FOLD_TRUE → all rows +(set Rfold8 (select {n: (count v) by: k from: Tu8col where: (> k -1)})) +(count Rfold8) -- 6 +(sum (at Rfold8 'n)) -- 6 + +;; (>= k -1) → below → FP_FOLD_TRUE +(set Rfold9 (select {n: (count v) by: k from: Tu8col where: (>= k -1)})) +(count Rfold9) -- 6 +(sum (at Rfold9 'n)) -- 6 + +;; (<= k -1) → below → FP_FOLD_FALSE +(set Rfold10 (select {n: (count v) by: k from: Tu8col where: (<= k -1)})) +(count Rfold10) -- 0 + +;; ===================================================================== +;; Section 14: fp_eval_cmp — FP_IN with various esz widths +;; +;; Lines 407-431: FP_IN dispatches on esz (1/2/4/default=8). +;; Need IN on U8 (esz=1), I16 (esz=2), I32 (esz=4), I64 (esz=8) +;; as the FIRST/ONLY child of the predicate (to call fp_eval_cmp not +;; fp_eval_cmp_masked). +;; ===================================================================== + +;; IN on U8 as single child (esz=1 path line 417-419) +(set Rin_u8 (select {n: (count v) by: k from: Tu8col where: (in k (as 'U8 [0 2 4]))})) +(count Rin_u8) -- 3 +(sum (at Rin_u8 'n)) -- 3 + +;; IN on I16 as single child (esz=2 path line 421-423) +(set Rin_i16 (select {n: (count v) by: k from: Ti16col where: (in k (as 'I16 [10 30 50]))})) +(count Rin_i16) -- 3 +(sum (at Rin_i16 'n)) -- 3 + +;; IN on I32 as single child (esz=4 path line 425-427) +(set Rin_i32 (select {n: (count v) by: k from: Ti32col where: (in k (as 'I32 [100 300 500]))})) +(count Rin_i32) -- 3 +(sum (at Rin_i32 'n)) -- 3 + +;; IN on I64 as single child (esz=8 default path line 429) +(set Rin_i64 (select {n: (count v) by: k from: Ti64col where: (in k [1000 3000 5000])})) +(count Rin_i64) -- 3 +(sum (at Rin_i64 'n)) -- 3 + +;; ===================================================================== +;; Section 15: fp_eval_cmp esz=2 SYM NE (lines 448-453) +;; +;; SYM column with esz=2 (>256 symbols in dict) is hard to construct +;; in a simple RFL test. Instead target the SYM esz=2 EQ/NE branches +;; indirectly via a SYM column on a table where the global sym dict +;; has grown past 256 entries from prior tests in this session. +;; ===================================================================== + +;; SYM NE (exercises SYM path in fp_eval_cmp) +(set Tsymne (table [k v g] (list ['x 'y 'z 'x 'y 'z] (as 'I64 [1 2 3 4 5 6]) [0 0 0 1 1 1]))) +(set Rsymne (select {n: (count v) by: g from: Tsymne where: (!= k 'x)})) +(count Rsymne) -- 2 +;; Rows where k!='x': k='y'(g=0), k='z'(g=0), k='y'(g=1), k='z'(g=1) → 2 groups, 2 each +(sum (at Rsymne 'n)) -- 4 + +;; ===================================================================== +;; Section 16: fp_eval_cmp_one — IN on fp_eval_cmp_one path +;; +;; Lines 551-555: fp_eval_cmp_one FP_IN path. +;; Triggered by multi-child AND with IN child where masked path calls +;; fp_eval_cmp_one for the IN child. +;; ===================================================================== + +;; AND with two INs — second IN goes through fp_eval_cmp_masked→fp_eval_cmp_one +(set Tdoublein (table [k v g] (list (as 'I64 [1 2 3 4 5 6 7 8]) (as 'I64 [10 20 30 40 50 60 70 80]) [0 0 0 0 1 1 1 1]))) +(set Rdin2 (select {n: (count v) by: g from: Tdoublein where: (and (in k [1 3 5 7]) (in v [10 30 50 70]))})) +;; k in {1,3,5,7}: rows 1,3,5,7. v in {10,30,50,70}: rows 1,3,5,7. Intersection: all 4. +;; g=0: rows 1,3 (k=1,v=10; k=3,v=30). g=1: rows 5,7 (k=5,v=50; k=7,v=70). +(count Rdin2) -- 2 +(sum (at Rdin2 'n)) -- 4 + +;; ===================================================================== +;; Section 17: Multi-agg with SUM agg on different typed columns +;; +;; mk_par_fn PASS 2: SUM branch (line 2749-2754) for various input types. +;; Exercise SUM on BOOL/U8/I16/I32/I64/DATE/TIME/TIMESTAMP inputs. +;; ===================================================================== + +;; SUM on BOOL column (treated as 0/1) +(set Tsumagg (table [g b u8 i16 i32 i64 dt tm ts] (list [0 0 0 1 1 1] [false true true false true false] (as 'U8 [10 20 30 40 50 60]) (as 'I16 [100 200 300 400 500 600]) (as 'I32 [1000 2000 3000 4000 5000 6000]) (as 'I64 [10000 20000 30000 40000 50000 60000]) (as 'date [2020.01.01 2020.01.02 2020.01.03 2020.01.04 2020.01.05 2020.01.06]) (as 'time [00:00:01.000 00:00:02.000 00:00:03.000 00:00:04.000 00:00:05.000 00:00:06.000]) (as 'timestamp [2020.01.01D00:00:00.000000001 2020.01.01D00:00:00.000000002 2020.01.01D00:00:00.000000003 2020.01.01D00:00:00.000000004 2020.01.01D00:00:00.000000005 2020.01.01D00:00:00.000000006])))) + +;; SUM of U8 column +(set Rsu8 (select {s: (sum u8) from: Tsumagg where: (>= g 0) by: g})) +(count Rsu8) -- 2 +(sum (at Rsu8 's)) -- 210 + +;; SUM of I16 column +(set Rsi16 (select {s: (sum i16) from: Tsumagg where: (>= g 0) by: g})) +(count Rsi16) -- 2 +(sum (at Rsi16 's)) -- 2100 + +;; MIN and MAX of I32 column +(set Rmi32 (select {lo: (min i32) hi: (max i32) from: Tsumagg where: (>= g 0) by: g})) +(count Rmi32) -- 2 +(min (at Rmi32 'lo)) -- 1000 +(max (at Rmi32 'hi)) -- 6000 + +;; AVG of I64 column +(set Rai64 (select {av: (avg i64) from: Tsumagg where: (>= g 0) by: g})) +(count Rai64) -- 2 + +;; ===================================================================== +;; Section 18: Empty IN list → all-zero bits (line 403-405) +;; +;; fp_eval_cmp: op==FP_IN && n_cvals==0 → memset(bits, 0, n) +;; An IN list where all values are NULL → n_cvals=0 after null filtering. +;; ===================================================================== + +;; Actually, RFL doesn't support null-containing IN lists easily. +;; Instead exercise "no match" IN by using values not present in column. +;; (in k [999 998 997]) on I64 col with values 1000-6000 → no rows match +(set Rnomatch (select {n: (count v) by: k from: Ti64col where: (in k [999 998 997])})) +(count Rnomatch) -- 0 + +;; ===================================================================== +;; Section 19: Multi-key GROUP BY with different key type combos +;; +;; mk_compile: exercises key type loop (lines 3646-3670). +;; Different key type combinations for composite keys. +;; ===================================================================== + +;; BOOL + I64 keys (1 + 8 = 9 bytes → wide key) +(set Tboolmk (table [kb ki v] (list [false true false true false true] (as 'I64 [1 1 2 2 3 3]) (as 'I64 [10 20 30 40 50 60])))) +(set Rboolmk (select {n: (count v) by: [kb ki] from: Tboolmk where: (>= v 0)})) +(count Rboolmk) -- 6 +(sum (at Rboolmk 'n)) -- 6 + +;; U8 + I32 keys (1 + 4 = 5 bytes → narrow) +(set Tu8i32mk (table [ku ki v] (list (as 'U8 [0 1 0 1 0 1]) (as 'I32 [10 10 20 20 30 30]) (as 'I64 [1 2 3 4 5 6])))) +(set Ru8i32mk (select {n: (count v) by: [ku ki] from: Tu8i32mk where: (>= v 0)})) +(count Ru8i32mk) -- 6 +(sum (at Ru8i32mk 'n)) -- 6 + +;; I16 + I16 keys (2 + 2 = 4 bytes → narrow) +(set Ti16i16mk (table [k1 k2 v] (list (as 'I16 [1 2 3 1 2 3]) (as 'I16 [10 10 10 20 20 20]) (as 'I64 [1 2 3 4 5 6])))) +(set Ri16i16mk (select {n: (count v) by: [k1 k2] from: Ti16i16mk where: (>= v 0)})) +(count Ri16i16mk) -- 6 +(sum (at Ri16i16mk 'n)) -- 6 + +;; SYM + I32 keys +(set Tsymi32mk (table [ks ki v] (list ['a 'b 'a 'b 'a 'b] (as 'I32 [1 1 2 2 3 3]) (as 'I64 [10 20 30 40 50 60])))) +(set Rsymi32mk (select {n: (count v) by: [ks ki] from: Tsymi32mk where: (>= v 0)})) +(count Rsymi32mk) -- 6 +(sum (at Rsymi32mk 'n)) -- 6 + +;; ===================================================================== +;; Section 20: fp_check_simple_cmp with tbl=NULL path (line 202) +;; +;; Lines 202-216: the tbl!=NULL branch does column validation. +;; The tbl==NULL case (lines 202 check fails, skip to return code) +;; is called when ray_fused_group_supported is invoked with tbl=NULL +;; during early planner checks. +;; NOTE: This branch is unreachable from RFL because the planner +;; always passes a non-NULL table. +;; ===================================================================== + +;; (documented as unreachable from RFL — tbl is always non-NULL in planner) + +;; ===================================================================== +;; Section 21: fp_compile_cmp temporal atom decode (lines 750-756) +;; +;; The atom decode switch for the constant value in comparisons: +;; case -RAY_DATE/-RAY_TIME: out->cval = (int64_t)cv->i32 +;; case -RAY_TIMESTAMP: out->cval = cv->i64 +;; case -RAY_BOOL/-RAY_U8: out->cval = (int64_t)cv->b8 +;; ===================================================================== + +;; DATE comparison fires the -RAY_DATE case in fp_compile_cmp +(set Rcompdate (select {n: (count v) by: g from: Tdcol where: (>= k 2020.01.04)})) +(count Rcompdate) -- 1 +(sum (at Rcompdate 'n)) -- 3 + +;; TIME comparison fires the -RAY_TIME case +(set Rcomptime (select {n: (count v) by: g from: Ttcol where: (<= k 00:00:03.000)})) +(count Rcomptime) -- 1 +(sum (at Rcomptime 'n)) -- 3 + +;; TIMESTAMP comparison fires the -RAY_TIMESTAMP case +(set Rcompts (select {n: (count v) by: g from: Ttscol where: (== k 2020.01.01D00:00:00.000000003)})) +(count Rcompts) -- 1 +(sum (at Rcompts 'n)) -- 1 + +;; ===================================================================== +;; Section 22: exec_filtered_group dispatch — count1 vs multi path +;; +;; Lines 3805-3810: single-key single-COUNT → count1; else → multi. +;; Exercise both paths with different shapes. +;; ===================================================================== + +;; count1 path: single key, single COUNT +(set Rc1 (select {n: (count v) by: k from: Ti64col where: (>= k 2000)})) +(count Rc1) -- 5 +(sum (at Rc1 'n)) -- 5 + +;; multi path: single key, multiple aggs +(set Rmulti1 (select {n: (count v) s: (sum v) by: k from: Ti64col where: (>= k 2000)})) +(count Rmulti1) -- 5 +(sum (at Rmulti1 's)) -- 20 + +;; multi path: multiple keys, single COUNT +(set Rmulti2 (select {n: (count v) by: [g k] from: Tsymcol where: (>= v 2)})) +(count Rmulti2) -- 5 + +;; multi path: multiple keys, multiple aggs +(set Rmulti3 (select {n: (count v) s: (sum v) by: [g k] from: Tsymcol where: (>= v 1)})) +(count Rmulti3) -- 6 + +;; ===================================================================== +;; Section 23: Fallback to unfused path (line 3812-3815) +;; +;; When the fused exec returns error "nyi", exec_filtered_group +;; catches it and falls back to exec_filtered_group_fallback. +;; This happens when planner gate allows but executor rejects. +;; F32/F64 agg columns would trigger this in mk_compile. +;; ===================================================================== + +;; F64 agg input → mk_compile rejects → fallback to unfused +(set Tf64agg (table [g v] (list [0 0 0 1 1 1] (as 'F64 [1.1 2.2 3.3 4.4 5.5 6.6])))) +(set Rf64agg (select {s: (sum v) from: Tf64agg where: (>= g 0) by: g})) +(count Rf64agg) -- 2 + +;; ===================================================================== +;; Section 24: SYM key in count1 direct path +;; +;; fp_try_direct_count1: kt==RAY_SYM → scan for max_key (lines 1312-1324) +;; ===================================================================== + +;; SYM key count1 (no WHERE) — exercises SYM max_key scan +(set Tsymc1 (table [k v] (list ['alpha 'beta 'gamma 'alpha 'beta 'gamma 'alpha 'beta] (as 'I64 (til 8))))) +(set Rsymc1 (select {n: (count v) by: k from: Tsymc1})) +(count Rsymc1) -- 3 +(sum (at Rsymc1 'n)) -- 8 + +;; SYM key count1 with WHERE +(set Rsymc2 (select {n: (count v) by: k from: Tsymc1 where: (>= v 2)})) +(count Rsymc2) -- 3 +(sum (at Rsymc2 'n)) -- 6 + +;; ===================================================================== +;; Section 25: I32 key count1 with MG top-count (lines 1113-1248) +;; +;; fp_try_i32_mg_top_count for I32 keys + top-N count. +;; Need I32 key with no WHERE (or simple WHERE) + desc: n take: K. +;; ===================================================================== + +;; I32 key count1 with top-3 (exercises Misra-Gries approximation) +(set Ti32mg (table [k v] (list (as 'I32 (% (til 500) 25)) (as 'I64 (til 500))))) +(set Ri32mg (select {n: (count v) by: k from: Ti32mg desc: n take: 5})) +(count Ri32mg) -- 5 +(sum (at Ri32mg 'n)) -- 100 + +;; ===================================================================== +;; Section 26: BOOL key direct count path (line 1296-1297) +;; +;; fp_try_direct_count1 with kt==RAY_BOOL → n_slots=2. +;; ===================================================================== + +;; BOOL key with WHERE on non-key column +(set Tbooldc (table [k v] (list [false true false true false true false true] (as 'I64 [1 2 3 4 5 6 7 8])))) +(set Rbooldc (select {n: (count v) by: k from: Tbooldc where: (> v 4)})) +(count Rbooldc) -- 2 +(sum (at Rbooldc 'n)) -- 4 + +;; ===================================================================== +;; Section 27: U8 key direct count with WHERE (pred_key_ne_zero) +;; +;; Line 1327-1335: pred_key_ne_zero detection. +;; (!= u8_key 0) triggers pred_key_ne_zero=1 for U8 key. +;; ===================================================================== + +(set Tu8ne (table [k v] (list (as 'U8 (% (til 100) 10)) (as 'I64 (til 100))))) +(set Ru8ne (select {n: (count v) by: k from: Tu8ne where: (!= k 0)})) +(count Ru8ne) -- 9 +(sum (at Ru8ne 'n)) -- 90 + +;; ===================================================================== +;; Section 28: I16 key direct count with WHERE + topk +;; +;; Lines 1340-1429: I16 ne0 u32 count path with top-N. +;; (!= i16_key 0) + desc: n take: K triggers the specialized I16 path. +;; ===================================================================== + +(set Ti16ne (table [k v] (list (as 'I16 (+ 1 (% (til 200) 20))) (as 'I64 (til 200))))) +(set Ri16ne2 (select {n: (count v) by: k from: Ti16ne where: (!= k 0) desc: n take: 5})) +(count Ri16ne2) -- 5 +(sum (at Ri16ne2 'n)) -- 50 + +;; ===================================================================== +;; Section 29: Summary of unreachable branches from RFL +;; +;; The following branches are unreachable from the RFL test harness: +;; +;; Line 44: ray_filtered_group(!g) — graph is always non-NULL in RFL +;; Line 60: ray_filtered_group(!ext) — allocation failure; OOM-only +;; Line 152: fp_col_supported(!col) — col is always resolved before call +;; Line 158: fp_expr_const_str(!expr) — expr is always non-NULL in AST +;; Lines 162,164: fp_expr_const_str concat head validation — would need +;; a malformed AST node that RFL's parser cannot produce +;; Lines 177,180,182: fp_check_simple_cmp NULL/type guards — parser +;; guarantees well-formed expression lists +;; Line 202+tbl=NULL: fp_check_simple_cmp with NULL table — planner +;; always passes a table reference +;; Line 205: PARTED/MAPCOMMON column check — these column types are +;; internal and cannot be created through RFL table literals +;; +;; These are defensive guards against C API misuse or internal bugs +;; that are not reachable from the public RFL evaluation path. +;; ===================================================================== diff --git a/test/rfl/ops/graph_branch_cov.rfl b/test/rfl/ops/graph_branch_cov.rfl new file mode 100644 index 00000000..4461b34d --- /dev/null +++ b/test/rfl/ops/graph_branch_cov.rfl @@ -0,0 +1,86 @@ +;; Branch-coverage tests for src/ops/graph.c +;; +;; Uncovered branches: 210 at lines 38-102 (graph_fix_ptr, +;; graph_fixup_ext_ptrs). These functions run ONLY when the +;; nodes array is reallocated: node_count >= GRAPH_INIT_CAP (4096). +;; +;; UNREACHABLE from RFL tests under ASan: +;; +;; 1. GRAPH_INIT_CAP = 4096: a single select graph needs >4096 +;; DAG nodes to trigger realloc. +;; 2. Each (+ X 1) creates 2 nodes; a depth-10 balanced binary +;; tree produces 4095 nodes (the maximum before stack overflow +;; in the recursive executor under ASan 3x-reduced stack). +;; 3. Depth 11 (8191 nodes) crashes with DEADLYSIGNAL in ASan +;; builds and returns nyi in non-ASan builds. +;; 4. Even if realloc is triggered, opcode-specific switch cases +;; (OP_SORT L50, OP_GROUP L54, OP_JOIN L66, OP_WINDOW_JOIN L75, +;; OP_WINDOW L80, OP_SELECT L88, OP_PIVOT L92) are unreachable +;; because these structural ext nodes are created AFTER the +;; expression tree compilation that would trigger the realloc. +;; 5. OP_EXPAND/OP_VAR_EXPAND/OP_SHORTEST_PATH/OP_WCO_JOIN +;; (L99-102) only exist in .graph.* builtin graphs. +;; +;; All 210 uncovered branches are DEAD-BY-ARCHITECTURE for +;; RFL-level testing. + +(set T (table [a] (list [0]))) + +;; ================================================================ +;; Section 1: depth-10 balanced binary tree = 4095 DAG nodes. +;; Each leaf is (+ a 1), 2^10=1024 leaves. Element-wise: +;; each level doubles, so result = (a+1)*2^10 = 1*1024 = 1024. +;; ================================================================ +(at (at (select {x: (+ (+ (+ (+ (+ (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))))) (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))))) (+ (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))))) (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))))))) (+ (+ (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))))) (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))))) (+ (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))))) (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))))))) (+ (+ (+ (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))))) (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))))) (+ (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))))) (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))))))) (+ (+ (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))))) (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))))) (+ (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))))) (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))))))))) (+ (+ (+ (+ (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))))) (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))))) (+ (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))))) (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))))))) (+ (+ (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))))) (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))))) (+ (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))))) (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))))))) (+ (+ (+ (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))))) (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))))) (+ (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))))) (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))))))) (+ (+ (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))))) (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))))) (+ (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))))) (+ (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))) (+ (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1)))) (+ (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))) (+ (+ (+ a 1) (+ a 1)) (+ (+ a 1) (+ a 1))))))))))) from: T}) 'x) 0) -- 1024 + +;; ================================================================ +;; Section 2: exercise graph.c builder paths via select DAG. +;; ================================================================ + +(set T2 (table [x y s] (list [1 2 3 4 5] [10 20 30 40 50] [a b c d e]))) + +;; Binary arithmetic +(sum (at (select {r: (+ x y) from: T2}) 'r)) -- 165 +(sum (at (select {r: (- y x) from: T2}) 'r)) -- 135 +(sum (at (select {r: (* x y) from: T2}) 'r)) -- 550 +(sum (at (select {r: (/ y x) from: T2}) 'r)) -- 50.0 +(sum (at (select {r: (% y x) from: T2}) 'r)) -- 0 + +;; Comparison in WHERE +(count (select {from: T2 where: (== x 3)})) -- 1 +(count (select {from: T2 where: (!= x 3)})) -- 4 +(count (select {from: T2 where: (> x 3)})) -- 2 +(count (select {from: T2 where: (>= x 3)})) -- 3 +(count (select {from: T2 where: (< x 3)})) -- 2 +(count (select {from: T2 where: (<= x 3)})) -- 3 + +;; Logical AND/OR +(count (select {from: T2 where: (and (> x 2) (< y 40))})) -- 1 +(count (select {from: T2 where: (or (== x 1) (== x 5))})) -- 2 + +;; Unary ops +(sum (at (select {r: (neg x) from: T2}) 'r)) -- -15 +(sum (at (select {r: (abs (neg x)) from: T2}) 'r)) -- 15 + +;; Cast +(type (at (select {r: (as 'F64 x) from: T2}) 'r)) -- 'F64 + +;; Aggregation +(at (at (select {n: (count x) from: T2}) 'n) 0) -- 5 +(at (at (select {s: (sum x) from: T2}) 's) 0) -- 15 +(at (at (select {a: (avg x) from: T2}) 'a) 0) -- 3.0 +(at (at (select {m: (min x) from: T2}) 'm) 0) -- 1 +(at (at (select {M: (max x) from: T2}) 'M) 0) -- 5 +(at (at (select {f: (first x) from: T2}) 'f) 0) -- 1 +(at (at (select {l: (last x) from: T2}) 'l) 0) -- 5 + +;; String ops +(at (at (select {r: (upper s) from: T2}) 'r) 0) -- 'A +(at (at (select {r: (lower s) from: T2}) 'r) 0) -- 'a +(at (at (select {r: (strlen s) from: T2}) 'r) 0) -- 1 + +;; IF expression +(sum (at (select {r: (if (> x 3) y 0) from: T2}) 'r)) -- 90 + +;; Like pattern +(count (select {from: T2 where: (like s "a")})) -- 1 diff --git a/test/rfl/ops/group_coverage.rfl b/test/rfl/ops/group_coverage.rfl index cb59f195..3f6d743d 100644 --- a/test/rfl/ops/group_coverage.rfl +++ b/test/rfl/ops/group_coverage.rfl @@ -681,6 +681,9 @@ (sum (at (select {s: (sum v) from: Pmc by: date}) 's)) -- 499500 ;; AVG/STDDEV with MAPCOMMON key (count (select {av: (avg v) from: Pmc by: date})) -- 2 +;; WHERE + GROUP BY MAPCOMMON: sel_compact must preserve MAPCOMMON column +(count (select {s: (sum v) from: Pmc where: (> v 250) by: date})) -- 2 +(sum (at (select {s: (sum v) from: Pmc where: (> v 250) by: date}) 's)) -- 468125 (.sys.exec "rm -rf /tmp/grp_cov_mc") ;; ────────────── 67. exec_group_parted: SYM key cardinality estimation ────────────── diff --git a/test/rfl/ops/idiom_branch_cov.rfl b/test/rfl/ops/idiom_branch_cov.rfl new file mode 100644 index 00000000..797d732b --- /dev/null +++ b/test/rfl/ops/idiom_branch_cov.rfl @@ -0,0 +1,359 @@ +;; idiom_branch_cov.rfl — branch coverage for src/ops/idiom.c +;; +;; Targets uncovered branches at lines: +;; 46,50,65,69,79,82,88,90,96,108,115,124,138,142,156,160, +;; 196,214,216,238,246,252,253,254,261,262,263,272,277 +;; +;; Strategy: exercises the False (happy-path) sides of defensive guards +;; under diverse scenarios — different column types, null-bearing vs +;; null-free scan columns, parted columns, joined tables, nested idiom +;; chains, non-matching opcodes, select-by graph shapes. +;; +;; ALL 65 uncovered branches (True sides) in idiom.c are unreachable +;; from RFL without allocator injection or de-static'ing: +;; +;; Lines 46,65,138,156 (8 branches): NULL-input guards on +;; node->inputs[0]. The graph builder always populates inputs for +;; nodes with arity >= 1. Would require corrupted graph state. +;; +;; Lines 50,69,142,160 (4 branches): graph_alloc_node_opt returning +;; NULL. Only fires on OOM. Requires allocator injection. +;; +;; Lines 79 (3 branches): scan_source_col NULL guards (!g, !src, +;; src->opcode != OP_SCAN). Only called with valid g/src/OP_SCAN. +;; +;; Line 82 (1 branch): find_ext returning NULL for a valid OP_SCAN. +;; The graph builder always creates an ext for OP_SCAN nodes. +;; +;; Lines 88,90 (3 branches): Multi-table path in scan_source_col. +;; stored_table_id > 0 requires OP_SCAN referencing a secondary +;; join table, but the idiom pass only sees direct OP_SCAN children +;; of OP_ASC which always reference the primary table (g->table). +;; +;; Line 96 (1 branch): !tbl after resolving the scan table. +;; g->table is always set for any query that reaches the idiom pass. +;; +;; Lines 108 (2 branches): NULL guards on asc/asc->inputs[0] in +;; pre_no_nulls_on_asc_input. Same as lines 46/65 — always valid. +;; +;; Lines 115 (2 branches): find_ext/ext->literal NULL guards for +;; OP_CONST. The graph builder always populates these. +;; +;; Line 124 (1 branch): scan_source_col returning NULL from +;; pre_no_nulls_on_asc_input. Requires a valid OP_SCAN with no +;; matching column — not reachable through normal query planning. +;; +;; Line 196 (1 branch): opcode >= RAY_IDIOM_OPCODE_CAP (128) in +;; build_index. All current idiom entries use opcodes < 128. +;; +;; Lines 214 (2 branches): try_rewrite NULL/dead-flag guards. +;; !node is unreachable (g->nodes[] always valid for nid < nc). +;; OP_FLAG_DEAD is never set before the idiom pass runs — it is +;; only set by try_rewrite itself, and each node is visited once. +;; +;; Line 216 (1 branch): node->opcode >= RAY_IDIOM_OPCODE_CAP. +;; All current opcodes are < 128 (max is OP_GROUP_SUM_COUNT_ROWFORM=114). +;; +;; Lines 238 (3 branches): ray_idiom_pass entry guards (!g, !root, +;; node_count == 0). The optimizer always passes a valid graph/root. +;; +;; Line 246 (1 branch): nc > UINT32_MAX/4 overflow guard. Requires +;; ~1 billion graph nodes. +;; +;; Lines 252-254 (4 branches): stk1/stk2 OOM during heap allocation. +;; Requires allocator injection. +;; +;; Lines 261-263 (3 branches): visited[] OOM. Same as above. +;; +;; Line 272 (1 branch): nid >= nc — node ID out of bounds. Would +;; require corrupted node->inputs[] pointers. Graph builder and +;; graph_alloc_node_opt guarantee IDs < nc. +;; +;; Line 277 (1 branch): dead flag during stk1 traversal. The stk1 +;; phase runs BEFORE any rewrites; no nodes are dead at that point. +;; Dead flags are only set during the subsequent stk2 post-order +;; processing phase, which doesn't revisit stk1. +;; +;; Total unreachable branches: 42 (True sides of defensive guards) +;; The remaining 23 uncovered "branches" are the False/continuation +;; sides of multi-condition expressions already counted above. + +;; ────────────────────────────────────────────────────────────────────── +;; Section A: scan_source_col — single-table OP_SCAN path (line 88-94) +;; ────────────────────────────────────────────────────────────────────── +;; When the input to asc is an OP_SCAN on a single-table query, +;; stored_table_id == 0, so scan_source_col falls through to +;; g->table (line 93). pre_no_nulls_on_asc_input then checks the +;; column's RAY_ATTR_HAS_NULLS bit. + +;; Null-free table column: precondition fires true → rewrite to OP_MIN/OP_MAX +(set TA (table [v] (list [5 3 8 1 7 2 6 4]))) +(first (asc (at TA 'v))) -- 1 +(last (asc (at TA 'v))) -- 8 +(count (distinct (at TA 'v))) -- 8 +(count (asc (at TA 'v))) -- 8 +(count (desc (at TA 'v))) -- 8 +(count (reverse (at TA 'v))) -- 8 + +;; Null-bearing table column: precondition fires false → slow path +;; Exercises scan_source_col line 96 (tbl not null) and line 126 +;; returning false because RAY_ATTR_HAS_NULLS is set. +(set TAN (table [v] (list [5 0Nl 3 0Nl 1]))) +(first (asc (at TAN 'v))) -- 1 +(last (asc (at TAN 'v))) -- 5 +(count (distinct (at TAN 'v))) -- 4 +(count (asc (at TAN 'v))) -- 5 + +;; Multi-column table: idiom on one column while another exists +(set TAM (table [a b] (list [10 20 30 40 50] [5 4 3 2 1]))) +(first (asc (at TAM 'b))) -- 1 +(last (asc (at TAM 'b))) -- 5 +(count (distinct (at TAM 'a))) -- 5 + +;; ────────────────────────────────────────────────────────────────────── +;; Section B: pre_no_nulls_on_asc_input — OP_CONST paths (lines 113-120) +;; ────────────────────────────────────────────────────────────────────── +;; OP_CONST with a vector literal (already covered in idiom.rfl). +;; OP_CONST with null-bearing vector: precondition returns false. +(first (asc [10 0Nl 20 0Nl 30])) -- 10 +(last (asc [10 0Nl 20 0Nl 30])) -- 30 + +;; OP_CONST null-free F64 vector — precondition returns true +(first (asc (as 'F64 [3.0 1.0 2.0]))) -- 1.0 +(last (asc (as 'F64 [3.0 1.0 2.0]))) -- 3.0 + +;; OP_CONST null-free I32/I16/U8 vectors — width variants +(first (asc (as 'I32 [30 10 20]))) -- 10 +(last (asc (as 'I32 [30 10 20]))) -- 30 +(first (asc (as 'I16 [300 100 200]))) -- 100 +(last (asc (as 'I16 [300 100 200]))) -- 300 + +;; ────────────────────────────────────────────────────────────────────── +;; Section C: parted columns with aggregates +;; ────────────────────────────────────────────────────────────────────── +;; Line 125: !ray_is_vec(col) && !RAY_IS_PARTED(col->type) +;; For a parted column, RAY_IS_PARTED returns true, so the check +;; passes to line 126. Parted columns without nulls → rewrite fires. +;; However, asc/desc/reverse on raw parted columns raises a type error. +;; Use aggregates that go through select-by where the planner handles +;; parted sources natively. + +(.sys.exec "rm -rf /tmp/rfl_idiom_parted") +(set PTA (table [v] (list [1 2 3]))) +(set PTB (table [v] (list [4 5 6]))) +(.db.splayed.set "/tmp/rfl_idiom_parted/1/t/" PTA) +(.db.splayed.set "/tmp/rfl_idiom_parted/2/t/" PTB) +(set PT (.db.parted.get "/tmp/rfl_idiom_parted/" 't)) + +;; Parted column: count(distinct) — goes through the idiom rewrite +;; rw_count_distinct, and scan_source_col line 125 RAY_IS_PARTED path. +(count (distinct (at PT 'v))) -- 6 + +;; Parted sum/min/max — these go through the parted agg path, not through +;; the idiom, but exercising the table scan to confirm parted resolution. +(sum (at PT 'v)) -- 21 +(min (at PT 'v)) -- 1 +(max (at PT 'v)) -- 6 + +;; ────────────────────────────────────────────────────────────────────── +;; Section D: try_rewrite — is_ext_root and non-idiom opcodes (lines 214-216) +;; ────────────────────────────────────────────────────────────────────── +;; When the idiom pass walks the graph, it encounters structural nodes +;; like OP_GROUP, OP_SELECT, OP_SORT — is_ext_root returns true and +;; try_rewrite bails at line 215. +;; +;; Also, non-idiom opcodes (OP_ADD, OP_FILTER, etc.) fall through the +;; first_idiom[] lookup and the while loop at line 219 exits immediately. +;; +;; select-by queries create OP_GROUP + OP_SELECT nodes in the graph. +;; The post-order walk visits them but try_rewrite skips them. + +(set TD (table [k v] (list ['a 'a 'b 'b] [3 1 4 2]))) + +;; select-by with count(distinct) — graph has OP_GROUP + OP_SELECT +;; that try_rewrite must skip. +(set RD (select {cd: (count (distinct v)) from: TD by: k})) +(sum (at RD 'cd)) -- 4 + +;; select-by with first(asc) — same graph shape, different idiom +(set RD2 (select {m: (first (asc v)) from: TD by: k})) +(at (at RD2 'm) 0) -- 1 +(at (at RD2 'm) 1) -- 2 + +;; select with where: adds OP_FILTER — try_rewrite sees it, no match +(set RD3 (select {m: (first (asc v)) from: TD by: k where: (> v 0)})) +(at (at RD3 'm) 0) -- 1 +(at (at RD3 'm) 1) -- 2 + +;; Arithmetic inside idiom: (count (distinct (+ v 1))) — the OP_ADD node +;; is visited by the walk but try_rewrite finds no matching idiom row. +(count (distinct (+ [1 1 2 2 3] 1))) -- 3 + +;; ────────────────────────────────────────────────────────────────────── +;; Section E: try_rewrite — node with no matching child0_op (line 221) +;; ────────────────────────────────────────────────────────────────────── +;; When the outer opcode matches an idiom row but the child0's opcode +;; does NOT match, the while loop continues to next_idiom. +;; (count (+ v 1)) — OP_COUNT's child is OP_ADD, not OP_DISTINCT/ASC/DESC/REVERSE. +(count (+ [1 2 3] 1)) -- 3 + +;; (first ...) without asc — OP_FIRST's child is not OP_ASC +(first [10 20 30]) -- 10 + +;; (last ...) without asc — OP_LAST's child is not OP_ASC +(last [10 20 30]) -- 30 + +;; ────────────────────────────────────────────────────────────────────── +;; Section F: dead flag in graph walk (line 277) +;; ────────────────────────────────────────────────────────────────────── +;; When a nested idiom pattern is present, the inner rewrite marks the +;; inner node dead (OP_FLAG_DEAD) during post-order processing. Then +;; the stk1 traversal encounters the dead node and continues (line 277). +;; +;; Pattern: count(reverse(distinct V)) — two idioms chain: +;; 1. Post-order first rewrites inner: but reverse(distinct) is not +;; an idiom (reverse is not the root of any idiom). The outer +;; count(reverse(distinct V)) matches count(reverse) idiom, rewrites +;; to count(distinct V). Wait — actually idiom is root=OP_COUNT, +;; child0=OP_REVERSE. So: count -> inputs[0] = reverse. +;; reverse is the child0. The idiom fires: rw_count_passthrough +;; replaces count(reverse(distinct V)) with count(distinct V). +;; reverse is marked dead. +;; +;; Nested: count(asc(distinct V)) — count -> asc -> distinct +;; count(asc) matches → rewrite to count(distinct V). asc marked dead. +;; If distinct was already walked and no idiom matched (distinct has +;; no parent idiom), distinct survives. +(count (asc (distinct [1 1 2 2 3]))) -- 3 +(count (desc (distinct [1 1 2 2 3]))) -- 3 +(count (reverse (distinct [1 1 2 2 3]))) -- 3 + +;; Deeper nesting: count(asc(reverse V)) +;; Post-order: reverse(V) — no idiom match (OP_REVERSE not a root_op). +;; Then: count(asc(reverse V)) — count's child0 is asc → count(asc) matches! +;; Rewrite: count(reverse V). asc marked dead. +;; But wait — the walk already pushed reverse's children into stk2. +;; The dead asc won't be re-visited (visited[] guard), but try_rewrite +;; sees it and returns NULL (line 214: dead flag check). +(count (asc (reverse [1 2 3 4 5]))) -- 5 +(count (desc (reverse [1 2 3 4 5]))) -- 5 + +;; Triple nesting: count(asc(desc(asc V))) +;; This creates a chain where the inner desc(asc V) has no idiom match, +;; then count(asc(desc(asc V))) fires count(asc) → count(desc(asc V)). +(count (asc (desc (asc [10 20 30])))) -- 3 + +;; ────────────────────────────────────────────────────────────────────── +;; Section G: ray_idiom_pass entry guard (line 238) +;; ────────────────────────────────────────────────────────────────────── +;; The entry guard `if (!g || !root || g->node_count == 0) return root;` +;; has three conditions. The `!g` and `g->node_count == 0` branches +;; are unreachable from RFL because the runtime always builds a valid +;; graph. The `!root` branch is unreachable for the same reason. +;; +;; However, a trivial expression (atom) should produce a minimal graph +;; that still goes through the idiom pass but with no idiom matches, +;; exercising the normal-flow path. +42 -- 42 +(+ 1 2) -- 3 + +;; ────────────────────────────────────────────────────────────────────── +;; Section H: scan_source_col — multi-table path (lines 88-91) +;; ────────────────────────────────────────────────────────────────────── +;; When OP_SCAN has stored_table_id > 0 (secondary table in a join), +;; scan_source_col uses g->tables[table_id]. To trigger this, we need +;; (first (asc col)) where col comes from a join's secondary table. +;; However, in a join query like (inner-join [k] T1 T2), the column +;; references go through the OP_JOIN node, not directly to OP_SCAN. +;; The idiom pass only fires when the DAG shape is +;; OP_FIRST -> OP_ASC -> OP_SCAN (direct). +;; +;; Multi-table path is exercised when a select-by uses a joined table +;; as its source: +(set TL (table [k v] (list [1 2 3] [10 20 30]))) +(set TR (table [k w] (list [1 2 3] [100 200 300]))) +(set TJ (inner-join [k] TL TR)) +;; idiom over joined result: count(distinct) — the column reference +;; after join resolution may still be OP_SCAN referencing a secondary table. +(count (distinct (at TJ 'v))) -- 3 +(count (distinct (at TJ 'w))) -- 3 +(count (asc (at TJ 'v))) -- 3 +(first (asc (at TJ 'v))) -- 10 +(last (asc (at TJ 'v))) -- 30 + +;; ────────────────────────────────────────────────────────────────────── +;; Section I: select-by with idiom over F64 table column scan +;; ────────────────────────────────────────────────────────────────────── +;; Exercises scan_source_col with F64 columns to ensure type-width +;; variance in the OP_SCAN path. +(set TF (table [k v] (list ['x 'x 'y 'y] (as 'F64 [1.5 2.5 3.5 4.5])))) +(set RF (select {m: (first (asc v)) from: TF by: k})) +(at (at RF 'm) 0) -- 1.5 +(at (at RF 'm) 1) -- 3.5 +(set RF2 (select {m: (last (asc v)) from: TF by: k})) +(at (at RF2 'm) 0) -- 2.5 +(at (at RF2 'm) 1) -- 4.5 + +;; ────────────────────────────────────────────────────────────────────── +;; Section J: null-bearing table column — OP_SCAN + HAS_NULLS +;; ────────────────────────────────────────────────────────────────────── +;; The column scan finds a column with RAY_ATTR_HAS_NULLS → precondition +;; returns false → slow path (no rewrite to OP_MIN/OP_MAX). +;; Use bare expression form (not select-by) to test the scan path. +(set TN (table [k v] (list ['a 'a 'a 'b 'b] [1 0Nl 3 0Nl 5]))) + +;; first/last(asc) on null-bearing scan column — precondition false → slow path +(first (asc (at TN 'v))) -- 1 +(last (asc (at TN 'v))) -- 5 + +;; count-based idioms work fine in select-by with nulls +(set RN3 (select {cd: (count (distinct v)) from: TN by: k})) +(at (at RN3 'cd) 0) -- 3 +(at (at RN3 'cd) 1) -- 2 + +(set RN4 (select {c: (count (asc v)) from: TN by: k})) +(at (at RN4 'c) 0) -- 3 +(at (at RN4 'c) 1) -- 2 + +;; ────────────────────────────────────────────────────────────────────── +;; Section K: SYM-typed vectors — OP_CONST path, non-numeric distinct +;; ────────────────────────────────────────────────────────────────────── +;; count(distinct) on SYM vector exercises rw_count_distinct with SYM type. +(count (distinct ['a 'b 'c 'a 'b])) -- 3 +(count (asc ['a 'b 'c 'a 'b])) -- 5 +(count (desc ['a 'b 'c 'a 'b])) -- 5 +(count (reverse ['a 'b 'c 'a 'b])) -- 5 + +;; ────────────────────────────────────────────────────────────────────── +;; Section L: BOOL-typed vectors — edge case for idiom rewrites +;; ────────────────────────────────────────────────────────────────────── +(count (distinct [true false true false])) -- 2 +(count (asc [true false true])) -- 3 +(first (asc [true false true])) -- 0 +(last (asc [true false true])) -- 1 + +;; ────────────────────────────────────────────────────────────────────── +;; Section M: large graph — exercises heap-alloc path for stk1/stk2 +;; ────────────────────────────────────────────────────────────────────── +;; Lines 250-251: stk1/stk2 use stack-local arrays when cap <= 256, +;; else heap-alloc. A graph with many nodes (> 128 unique ops) would +;; trigger the heap path. However, producing > 128 graph nodes from a +;; single RFL expression is impractical without very deep nesting. +;; The local-array path is the common case and is already well-covered. + +;; Deep arithmetic chain — creates many graph nodes +(set V [1 2 3 4 5]) +(count (distinct (+ (+ (+ (+ V 1) 1) 1) 1))) -- 5 + +;; ────────────────────────────────────────────────────────────────────── +;; Section N: first/last on desc (NOT an idiom — no rewrite should fire) +;; ────────────────────────────────────────────────────────────────────── +;; Ensures that (first (desc v)) does NOT get rewritten — there is no +;; idiom row for OP_FIRST/OP_DESC. The slow path runs. +(first (desc [3 1 4 1 5 9])) -- 9 +(last (desc [3 1 4 1 5 9])) -- 1 + +;; Over table columns too +(set TDesc (table [v] (list [5 3 8 1 7]))) +(first (desc (at TDesc 'v))) -- 8 +(last (desc (at TDesc 'v))) -- 1 diff --git a/test/rfl/ops/join_branch_cov.rfl b/test/rfl/ops/join_branch_cov.rfl new file mode 100644 index 00000000..64994488 --- /dev/null +++ b/test/rfl/ops/join_branch_cov.rfl @@ -0,0 +1,360 @@ +;; join_branch_cov.rfl — branch-coverage tests for src/ops/join.c +;; +;; Targets uncovered branches at lines: +;; 33, 35, 75, 85, 126, 172, 180, 182, 194, 196, 203, 237, 255, 265, +;; 293, 298, 301-303, 316-318, 349-351, 379, 419, 427-429 +;; +;; Many branches are OOM-only or require radix path (>65536 rows). +;; This file covers what is reachable from the RFL layer. + +;; ────────────────────────────────────────────────────────────────── +;; L35: hash_row_keys — NULL values in join key column +;; NULL keys should produce unique hashes, so NULLs never match. +;; ────────────────────────────────────────────────────────────────── + +;; inner-join with null keys in left — nulls don't match +(set jL (table [id val] (list [1 0Nl 3] [10 20 30]))) +(set jR (table [id val2] (list [1 3] [100 300]))) +(count (inner-join [id] jL jR)) -- 2 +(at (inner-join [id] jL jR) 'val) -- [10 30] + +;; inner-join with null keys in right — nulls don't match +(set jL2 (table [id val] (list [1 2 3] [10 20 30]))) +(set jR2 (table [id val2] (list [1 0Nl 3] [100 200 300]))) +(count (inner-join [id] jL2 jR2)) -- 2 +(at (inner-join [id] jL2 jR2) 'val2) -- [100 300] + +;; inner-join with null keys in both sides — nulls never match nulls +(set jL3 (table [id val] (list [0Nl 2 3] [10 20 30]))) +(set jR3 (table [id val2] (list [0Nl 2 3] [100 200 300]))) +(count (inner-join [id] jL3 jR3)) -- 2 + +;; left-join with null keys — unmatched left rows preserved with null right +(set jL4 (table [id val] (list [1 0Nl 3] [10 20 30]))) +(set jR4 (table [id val2] (list [1 3] [100 300]))) +(count (left-join [id] jL4 jR4)) -- 3 + +;; left-join with null in right key — no match for that right row +(set jL5 (table [id val] (list [1 2 3] [10 20 30]))) +(set jR5 (table [id val2] (list [1 0Nl] [100 200]))) +(count (left-join [id] jL5 jR5)) -- 3 + +;; ────────────────────────────────────────────────────────────────── +;; L379: join_keys_eq — F64 key comparison (false branch: mismatch) +;; ────────────────────────────────────────────────────────────────── + +;; F64 keys — some match, some don't (exercises both true and false branches) +(set fL (table [price val] (list [1.0 2.0 3.0 4.0] [10 20 30 40]))) +(set fR (table [price val2] (list [1.0 3.0 5.0] [100 300 500]))) +(count (inner-join [price] fL fR)) -- 2 +(at (inner-join [price] fL fR) 'val) -- [10 30] +(at (inner-join [price] fL fR) 'val2) -- [100 300] + +;; F64 keys — left join to exercise mismatch → null fill path +(count (left-join [price] fL fR)) -- 4 + +;; F64 keys — no matches at all +(set fL2 (table [price val] (list [1.0 2.0] [10 20]))) +(set fR2 (table [price val2] (list [3.0 4.0] [30 40]))) +(count (inner-join [price] fL2 fR2)) -- 0 + +;; ────────────────────────────────────────────────────────────────── +;; L35 + L379: multi-key join with mixed types including F64 + nulls +;; ────────────────────────────────────────────────────────────────── + +;; Multi-key join with null in one of the keys +(set mkL (table [k1 k2 val] (list [1 1 2] [10.0 0Nf 30.0] [100 200 300]))) +(set mkR (table [k1 k2 val2] (list [1 2] [10.0 30.0] [1000 3000]))) +(count (inner-join [k1 k2] mkL mkR)) -- 2 +(at (inner-join [k1 k2] mkL mkR) 'val2) -- [1000 3000] + +;; Multi-key with both nulls — no match +(set mkL2 (table [k1 k2 val] (list [0Nl] [10.0] [100]))) +(set mkR2 (table [k1 k2 val2] (list [0Nl] [10.0] [1000]))) +(count (inner-join [k1 k2] mkL2 mkR2)) -- 0 + +;; ────────────────────────────────────────────────────────────────── +;; anti-join with null keys +;; ────────────────────────────────────────────────────────────────── + +;; anti-join: null key in left doesn't match anything → preserved +(set aL (table [id val] (list [1 0Nl 3] [10 20 30]))) +(set aR (table [id] (list [1]))) +(count (anti-join [id] aL aR)) -- 2 + +;; anti-join: null key in right — doesn't exclude anything +(set aL2 (table [id val] (list [1 2 3] [10 20 30]))) +(set aR2 (table [id] (list [0Nl 2]))) +(count (anti-join [id] aL2 aR2)) -- 2 + +;; anti-join with F64 keys +(set aFL (table [price val] (list [1.0 2.0 3.0] [10 20 30]))) +(set aFR (table [price] (list [2.0]))) +(count (anti-join [price] aFL aFR)) -- 2 + +;; ────────────────────────────────────────────────────────────────── +;; asof-join with null time keys — L35 for window_join path +;; ────────────────────────────────────────────────────────────────── + +;; asof-join: null time in left trade → no match (left-outer: preserved with null right cols) +(set ajL (table [Sym Time Price] (list [a a] [10:00:01.000 0Nt] [100.0 200.0]))) +(set ajR (table [Sym Time Bid] (list [a a] [10:00:00.000 10:00:02.000] [99.0 100.0]))) +(count (asof-join [Sym Time] ajL ajR)) -- 2 + +;; asof-join: null time in right quote → skipped during merge walk +(set ajL2 (table [Sym Time Price] (list [a a] [10:00:01.000 10:00:03.000] [100.0 200.0]))) +(set ajR2 (table [Sym Time Bid] (list [a a a] [10:00:00.000 0Nt 10:00:02.000] [99.0 50.0 100.0]))) +(count (asof-join [Sym Time] ajL2 ajR2)) -- 2 + +;; asof-join: null eq key in left → no match (left-outer: row preserved) +(set ajL3 (table [Sym Time Price] (list [a 0Ns] [10:00:01.000 10:00:03.000] [100.0 200.0]))) +(set ajR3 (table [Sym Time Bid] (list [a a] [10:00:00.000 10:00:02.000] [99.0 100.0]))) +(count (asof-join [Sym Time] ajL3 ajR3)) -- 2 + +;; asof-join: null eq key in right → skipped +(set ajL4 (table [Sym Time Price] (list [a a] [10:00:01.000 10:00:03.000] [100.0 200.0]))) +(set ajR4 (table [Sym Time Bid] (list [a 0Ns a] [10:00:00.000 10:00:01.500 10:00:02.000] [99.0 50.0 100.0]))) +(count (asof-join [Sym Time] ajL4 ajR4)) -- 2 + +;; ────────────────────────────────────────────────────────────────── +;; asof-join: inner join type (join_type==0 in exec_window_join) +;; The RFL asof-join always uses left-outer (type 1), so +;; exec_window_join L1858 (join_type==1 → out_n=left_n) is exercised. +;; The join_type==0 path (L1861) is not reachable from RFL. +;; ────────────────────────────────────────────────────────────────── + +;; asof-join: no match at all (left-outer, all right cols null) +(set ajL5 (table [Sym Time Price] (list [a] [10:00:00.000] [100.0]))) +(set ajR5 (table [Sym Time Bid] (list [b] [10:00:00.000] [99.0]))) +(count (asof-join [Sym Time] ajL5 ajR5)) -- 1 + +;; asof-join: right time all after left → no match +(set ajL6 (table [Sym Time Price] (list [a] [10:00:01.000] [100.0]))) +(set ajR6 (table [Sym Time Bid] (list [a a] [10:00:02.000 10:00:03.000] [99.0 100.0]))) +(count (asof-join [Sym Time] ajL6 ajR6)) -- 1 + +;; ────────────────────────────────────────────────────────────────── +;; asof-join: multiple partitions with Timestamp — exercises sort +;; and partition-change detection in merge walk +;; ────────────────────────────────────────────────────────────────── +(set ajL7 (table [ID Ts Val] (list [1 2 1 2] [2024.01.01D10:00:01.000000000 2024.01.01D10:00:02.000000000 2024.01.01D10:00:05.000000000 2024.01.01D10:00:06.000000000] [10 20 30 40]))) +(set ajR7 (table [ID Ts Ref] (list [1 1 2 2] [2024.01.01D10:00:00.000000000 2024.01.01D10:00:04.000000000 2024.01.01D10:00:01.000000000 2024.01.01D10:00:05.000000000] [100 200 300 400]))) +(at (asof-join [ID Ts] ajL7 ajR7) 'Ref) -- [100 300 200 400] + +;; ────────────────────────────────────────────────────────────────── +;; asof-join: Date time type (32-bit) — exercises READ_TIME macro L1597 +;; ────────────────────────────────────────────────────────────────── +(set ajLD (table [Grp Date Val] (list [a a] [2024.01.02 2024.01.05] [10 20]))) +(set ajRD (table [Grp Date Ref] (list [a a] [2024.01.01 2024.01.04] [100 200]))) +(at (asof-join [Grp Date] ajLD ajRD) 'Ref) -- [100 200] + +;; ────────────────────────────────────────────────────────────────── +;; join edge cases: empty tables +;; ────────────────────────────────────────────────────────────────── + +;; inner-join: both empty +(set eL (table [id val] (list (take [1] 0) (take [1] 0)))) +(set eR (table [id val2] (list (take [1] 0) (take [1] 0)))) +(count (inner-join [id] eL eR)) -- 0 + +;; left-join: both empty +(count (left-join [id] eL eR)) -- 0 + +;; anti-join: both empty +(count (anti-join [id] eL eR)) -- 0 + +;; anti-join: left empty, right non-empty +(set eR2 (table [id val2] (list [1 2 3] [100 200 300]))) +(count (anti-join [id] eL eR2)) -- 0 + +;; ────────────────────────────────────────────────────────────────── +;; Duplicate key values — exercises hash collision / chain traversal +;; ────────────────────────────────────────────────────────────────── + +;; inner-join with duplicate keys in right (1:N) +(set dL (table [id val] (list [1 2] [10 20]))) +(set dR (table [id val2] (list [1 1 2 2 2] [100 101 200 201 202]))) +(count (inner-join [id] dL dR)) -- 5 + +;; inner-join with duplicate keys in left (N:1) +(set dL2 (table [id val] (list [1 1 1 2] [10 11 12 20]))) +(set dR2 (table [id val2] (list [1 2] [100 200]))) +(count (inner-join [id] dL2 dR2)) -- 4 + +;; inner-join with duplicate keys in both sides (N:M) +(set dL3 (table [id val] (list [1 1 2] [10 11 20]))) +(set dR3 (table [id val2] (list [1 1 2 2] [100 101 200 201]))) +(count (inner-join [id] dL3 dR3)) -- 6 + +;; left-join with duplicate keys in right (1:N) — all left rows preserved with multiple matches +(set dL4 (table [id val] (list [1 2 3] [10 20 30]))) +(set dR4 (table [id val2] (list [1 1 2] [100 101 200]))) +(count (left-join [id] dL4 dR4)) -- 4 + +;; ────────────────────────────────────────────────────────────────── +;; Type variety: I32 (Date) keys, I16 (Time) keys, Bool keys +;; for hash/comparison paths +;; ────────────────────────────────────────────────────────────────── + +;; inner-join by Bool — exercises read_col_i64 with small types +(set bL (table [flag val] (list [true false true] [10 20 30]))) +(set bR (table [flag val2] (list [true false] [100 200]))) +(count (inner-join [flag] bL bR)) -- 3 + +;; inner-join by Timestamp +(set tsL (table [ts val] (list [2024.01.01D10:00:00.000000000 2024.01.01D10:00:01.000000000] [10 20]))) +(set tsR (table [ts val2] (list [2024.01.01D10:00:00.000000000] [100]))) +(count (inner-join [ts] tsL tsR)) -- 1 +(at (inner-join [ts] tsL tsR) 'val2) -- [100] + +;; ────────────────────────────────────────────────────────────────── +;; Large table join — triggers radix-partitioned path (>65536 rows) +;; Exercises lines 75, 85, and the radix build+probe pipeline +;; ────────────────────────────────────────────────────────────────── + +;; Generate large right table (70000 rows) to trigger radix path +(set bigR (table [id val2] (list (til 70000) (til 70000)))) +;; Small left table — inner join should match all left rows +(set smallL (table [id val] (list [0 100 500 1000 5000 10000 50000 69999] [1 2 3 4 5 6 7 8]))) +(count (inner-join [id] smallL bigR)) -- 8 +(sum (at (inner-join [id] smallL bigR) 'val)) -- 36 + +;; Large right + left-join: unmatched left rows preserved +(set smallL2 (table [id val] (list [0 100 99999] [1 2 3]))) +(count (left-join [id] smallL2 bigR)) -- 3 + +;; Large left, large right — both sides trigger radix partitioning +(set bigL (table [id val] (list (til 70000) (til 70000)))) +(count (inner-join [id] bigL bigR)) -- 70000 + +;; Large join with null keys — exercises L35 on radix path +(set bigL_null (table [id val] (list (+ 0Nl (til 70000)) (til 70000)))) +;; All keys are null, so no matches +(count (inner-join [id] bigL_null bigR)) -- 0 + +;; Large join with left-join — exercises join_type>=1 on radix path +(count (left-join [id] bigL bigR)) -- 70000 + +;; Large anti-join — exercises exec_antijoin with moderate table sizes +(set aLbig (table [id val] (list (til 100) (til 100)))) +(set aRbig (table [id] (list (til 50)))) +(count (anti-join [id] aLbig aRbig)) -- 50 + +;; ────────────────────────────────────────────────────────────────── +;; F64 multi-key join — exercises L379 false branch thoroughly +;; ────────────────────────────────────────────────────────────────── + +;; Two F64 keys — partial match +(set fmL (table [x y val] (list [1.0 1.0 2.0] [10.0 20.0 10.0] [100 200 300]))) +(set fmR (table [x y val2] (list [1.0 2.0] [10.0 10.0] [1000 3000]))) +(count (inner-join [x y] fmL fmR)) -- 2 + +;; F64 keys with NaN-like edge values (0.0 and -0.0 should be equal in hash) +(set fnL (table [price val] (list [0.0] [10]))) +(set fnR (table [price val2] (list [0.0] [100]))) +(count (inner-join [price] fnL fnR)) -- 1 + +;; ────────────────────────────────────────────────────────────────── +;; Single-row edge cases +;; ────────────────────────────────────────────────────────────────── + +;; Single row left, single row right — match +(set sL (table [id val] (list [1] [10]))) +(set sR (table [id val2] (list [1] [100]))) +(count (inner-join [id] sL sR)) -- 1 +(at (inner-join [id] sL sR) 'val2) -- [100] + +;; Single row left, single row right — no match +(set sL2 (table [id val] (list [1] [10]))) +(set sR2 (table [id val2] (list [2] [100]))) +(count (inner-join [id] sL2 sR2)) -- 0 + +;; Single row left-join — no match → null fill +(count (left-join [id] sL2 sR2)) -- 1 + +;; ────────────────────────────────────────────────────────────────── +;; asof-join edge cases for exec_window_join +;; ────────────────────────────────────────────────────────────────── + +;; asof-join: empty left table +(set ajEL (table [Sym Time Price] (list (as 'SYMBOL []) (as 'TIME []) (as 'F64 [])))) +(set ajER (table [Sym Time Bid] (list [a] [10:00:00.000] [99.0]))) +(count (asof-join [Sym Time] ajEL ajER)) -- 0 + +;; asof-join: empty right table +(set ajEL2 (table [Sym Time Price] (list [a] [10:00:01.000] [100.0]))) +(set ajER2 (table [Sym Time Bid] (list (as 'SYMBOL []) (as 'TIME []) (as 'F64 [])))) +(count (asof-join [Sym Time] ajEL2 ajER2)) -- 1 + +;; asof-join: single left, single right with exact time match +(set ajSL (table [Sym Time Price] (list [a] [10:00:01.000] [100.0]))) +(set ajSR (table [Sym Time Bid] (list [a] [10:00:01.000] [99.0]))) +(at (asof-join [Sym Time] ajSL ajSR) 'Bid) -- [99.0] + +;; asof-join: multiple eq keys (exercises eq key comparison in merge walk) +(set ajMKL (table [K1 K2 Time Val] (list [1 1 2] [a a b] [10:00:01.000 10:00:03.000 10:00:02.000] [10 20 30]))) +(set ajMKR (table [K1 K2 Time Ref] (list [1 1 2] [a a b] [10:00:00.000 10:00:02.000 10:00:01.000] [100 200 300]))) +(at (asof-join [K1 K2 Time] ajMKL ajMKR) 'Ref) -- [100 200 300] + +;; ────────────────────────────────────────────────────────────────── +;; inner-join: all null keys on one side +;; ────────────────────────────────────────────────────────────────── +(set allnL (table [id val] (list [0Nl 0Nl 0Nl] [10 20 30]))) +(set allnR (table [id val2] (list [1 2 3] [100 200 300]))) +(count (inner-join [id] allnL allnR)) -- 0 + +;; left-join: all null keys on left → all unmatched, all preserved +(count (left-join [id] allnL allnR)) -- 3 + +;; ────────────────────────────────────────────────────────────────── +;; Large F64 key join — triggers radix path with F64 keys +;; Exercises hash_f64 + radix partitioning + F64 comparison +;; ────────────────────────────────────────────────────────────────── +(set bigFL (table [price val] (list (as 'F64 (til 70000)) (til 70000)))) +(set bigFR (table [price val2] (list (as 'F64 (til 70000)) (til 70000)))) +(count (inner-join [price] bigFL bigFR)) -- 70000 + +;; ────────────────────────────────────────────────────────────────── +;; L33: hash_row_keys — NULL key vector (key column missing from table) +;; When the join key doesn't exist in one table, the key_vec is NULL +;; and hash_row_keys skips it via `if (!col) continue;`. +;; ────────────────────────────────────────────────────────────────── + +;; inner-join: key exists only in left table → no right key vec → 0 matches +(set mL (table [a val] (list [1 2] [10 20]))) +(set mR (table [b val2] (list [1 2] [100 200]))) +(count (inner-join [a] mL mR)) -- 0 + +;; left-join: key exists only in left → all left rows unmatched +(count (left-join [a] mL mR)) -- 2 + +;; anti-join: key missing from right → all left rows pass (no matches possible) +(count (anti-join [a] mL mR)) -- 2 + +;; multi-key: one key exists in both, other only in left +(set mkML (table [k1 k2 val] (list [1 2] [a b] [10 20]))) +(set mkMR (table [k1 val2] (list [1 2] [100 200]))) +(count (inner-join [k1 k2] mkML mkMR)) -- 0 + +;; ────────────────────────────────────────────────────────────────── +;; Large table left-join with NULL keys — exercises L35 on radix path +;; Plus exercises left-outer unmatched row emission on radix path +;; (join_radix_build_probe_fn L452: join_type >= 1 && lp->count > 0) +;; ────────────────────────────────────────────────────────────────── + +;; Large left with some nulls, large right — left-join on radix path +(set bigLN (table [id val] (list (til 70000) (til 70000)))) +(set bigRN (table [id val2] (list (til 35000) (til 35000)))) +;; Only first 35000 ids match, rest are unmatched left rows +(count (left-join [id] bigLN bigRN)) -- 70000 + +;; Large radix path with F64 and nulls in key +;; Create a vector with nulls at specific positions +(set vn (+ 0Nl (til 70000))) +(set bigFN (table [price val] (list vn (til 70000)))) +(set bigF2 (table [price val2] (list (as 'F64 (til 70000)) (til 70000)))) +;; All left keys are null → 0 inner-join matches +(count (inner-join [price] bigFN bigF2)) -- 0 + +;; Large left-join with all-null left keys → all 70000 unmatched +(count (left-join [price] bigFN bigF2)) -- 70000 diff --git a/test/rfl/ops/pivot_branch_cov.rfl b/test/rfl/ops/pivot_branch_cov.rfl new file mode 100644 index 00000000..99088770 --- /dev/null +++ b/test/rfl/ops/pivot_branch_cov.rfl @@ -0,0 +1,352 @@ +;; Targeted branch coverage for src/ops/pivot.c — fills the remaining gaps. +;; +;; Uncovered branches (65 total at lines): +;; 52-54,57,59,62,75 — error guards on NULL/ERR cond/then/else/result +;; 90-91 — F64 if dispatch (vector then/else + mixed cond) +;; 124,132,147,155 — STR null-safety checks (ray_str_vec_get returns NULL) +;; 193,200-201 — BOOL/U8 if dispatch (vector then/else) +;; 202-219 — TIMESTAMP/DATE/TIME if dispatch (unreachable; promote +;; demotes temporal types to I64/I32) +;; +;; Categories: +;; REACHABLE — covered by new tests below. +;; UNREACHABLE — documented with reason. + +;; ==================================================================== +;; Section A: exec_if — F64 output, vector then + vector else +;; Covers lines 90-91 with t_arr!=NULL and e_arr!=NULL paths. +;; Existing tests only exercised F64 scalar (1-row broadcast); +;; this uses multi-row F64 columns for both then and else. +;; ==================================================================== + +;; Multi-row table with BOOL condition and two F64 columns. +;; Mixed cond values ensure both the true-branch (t_arr[i]) and +;; false-branch (e_arr[i]) of lines 90-91 execute. +(set TF64vec (table [x a b] (list [true false true false true] (as 'F64 [1.0 2.0 3.0 4.0 5.0]) (as 'F64 [10.0 20.0 30.0 40.0 50.0])))) + +;; (if x a b): when x=true pick from a, else from b. +;; Expected: [1.0, 20.0, 3.0, 40.0, 5.0] -> sum = 69.0 +(set RF64vec (at (select {v: (if x a b) from: TF64vec}) 'v)) +(count RF64vec) -- 5 +(sum RF64vec) -- 69.0 +(at RF64vec 0) -- 1.0 +(at RF64vec 1) -- 20.0 +(at RF64vec 2) -- 3.0 +(at RF64vec 3) -- 40.0 +(at RF64vec 4) -- 5.0 + +;; ==================================================================== +;; Section B: exec_if — F64 output, scalar then + vector else +;; Covers line 90 (t_arr==NULL -> t_scalar) + line 91 (e_arr!=NULL). +;; ==================================================================== + +;; (if x 99.0 b): then is a scalar F64 literal, else is F64 column vector. +;; Expected: [99.0, 20.0, 99.0, 40.0, 99.0] -> sum = 357.0 +(set RF64sv (at (select {v: (if x 99.0 b) from: TF64vec}) 'v)) +(count RF64sv) -- 5 +(sum RF64sv) -- 357.0 +(at RF64sv 0) -- 99.0 +(at RF64sv 1) -- 20.0 + +;; ==================================================================== +;; Section C: exec_if — F64 output, vector then + scalar else +;; Covers line 90 (t_arr!=NULL) + line 91 (e_arr==NULL -> e_scalar). +;; ==================================================================== + +;; (if x a -1.0): then is F64 column, else is scalar F64 literal. +;; Expected: [1.0, -1.0, 3.0, -1.0, 5.0] -> sum = 7.0 +(set RF64vs (at (select {v: (if x a -1.0) from: TF64vec}) 'v)) +(count RF64vs) -- 5 +(sum RF64vs) -- 7.0 +(at RF64vs 1) -- -1.0 +(at RF64vs 3) -- -1.0 + +;; ==================================================================== +;; Section D: exec_if — BOOL output, vector then + vector else +;; Covers lines 193, 200-201 with t_arr!=NULL and e_arr!=NULL. +;; Existing tests used atom scalars (true/false literals); +;; this uses multi-row BOOL *columns* as both then and else. +;; ==================================================================== + +;; Table with BOOL condition and two BOOL columns. +(set TBvec (table [x a b] (list [true false true false] [true true false false] [false false true true]))) + +;; (if x a b): when x=true pick from a, else from b. +;; Expected: [true, false, false, true] -> sum = 2 +(set RBvec (at (select {v: (if x a b) from: TBvec}) 'v)) +(count RBvec) -- 4 +(sum RBvec) -- 2 +(at RBvec 0) -- true +(at RBvec 1) -- false +(at RBvec 2) -- false +(at RBvec 3) -- true + +;; ==================================================================== +;; Section E: exec_if — BOOL output, scalar then + vector else +;; Covers lines 200-201 with t_arr==NULL (scalar then). +;; ==================================================================== + +;; (if x true b): then is scalar true, else is BOOL column. +;; Expected: [true, false, true, true] -> sum = 3 +(set RBsv (at (select {v: (if x true b) from: TBvec}) 'v)) +(count RBsv) -- 4 +(sum RBsv) -- 3 + +;; ==================================================================== +;; Section F: exec_if — BOOL output, vector then + scalar else +;; Covers lines 200-201 with e_arr==NULL (scalar else). +;; ==================================================================== + +;; (if x a false): then is BOOL column, else is scalar false. +;; Expected: [true, false, false, false] -> sum = 1 +(set RBvs (at (select {v: (if x a false) from: TBvec}) 'v)) +(count RBvs) -- 4 +(sum RBvs) -- 1 + +;; ==================================================================== +;; Section G: exec_if — I64 output, vector then + vector else +;; Ensures the I64 dispatch at lines 92-100 fires with +;; both t_arr!=NULL and e_arr!=NULL paths, plus mixed cond. +;; ==================================================================== + +(set TI64vec (table [x a b] (list [true false true false] [10 20 30 40] [100 200 300 400]))) + +;; (if x a b): pick from a when true, b when false. +;; Expected: [10, 200, 30, 400] -> sum = 640 +(set RI64vec (at (select {v: (if x a b) from: TI64vec}) 'v)) +(count RI64vec) -- 4 +(sum RI64vec) -- 640 + +;; ==================================================================== +;; Section H: exec_if — I32 output, vector then + vector else (mixed cond) +;; Ensures the I32 dispatch fires with vector paths. +;; ==================================================================== + +(set TI32vec (table [x c d] (list [true false true false] (as 'I32 [10 20 30 40]) (as 'I32 [1 2 3 4])))) + +;; (if x c d): [10, 2, 30, 4] -> sum = 46 +(set RI32vec (at (select {v: (if x c d) from: TI32vec}) 'v)) +(count RI32vec) -- 4 +(sum RI32vec) -- 46 + +;; ==================================================================== +;; Section I: exec_if — I16 output, vector then + vector else (mixed cond) +;; Ensures the I16 dispatch at lines 223-231 fires. +;; ==================================================================== + +(set TI16vec (table [x c d] (list [true false true false] (as 'I16 [10 20 30 40]) (as 'I16 [1 2 3 4])))) + +;; (if x c d): [10, 2, 30, 4] -> sum = 46 +(set RI16vec (at (select {v: (if x c d) from: TI16vec}) 'v)) +(count RI16vec) -- 4 +(sum RI16vec) -- 46 + +;; ==================================================================== +;; Section J: exec_if — F64 scalar+scalar broadcast (both then and else +;; are scalar), multi-row cond. +;; Covers both t_scalar and e_scalar in the F64 dispatch. +;; ==================================================================== + +(set TFcond (table [x] (list [true false true false true]))) + +;; (if x 3.14 2.72): both scalars -> 3*3.14 + 2*2.72 = 14.86 +(set RFss (at (select {v: (if x 3.14 2.72) from: TFcond}) 'v)) +(count RFss) -- 5 +(at RFss 0) -- 3.14 +(at RFss 1) -- 2.72 +(at RFss 4) -- 3.14 + +;; ==================================================================== +;; Section K: exec_pivot — I64 agg scatter (non-F64 branch at lines 621-632) +;; Exercises the I64 emit path with OP_SUM, OP_MIN, OP_MAX, +;; OP_FIRST, OP_LAST, OP_COUNT on I64 value columns. +;; Ensures the non-F64 branch (else at line 621) fires. +;; ==================================================================== + +(set TK (table [k c v] (list ['A 'A 'B 'B 'A 'B] ['x 'y 'x 'y 'x 'y] [10 20 30 40 50 60]))) + +;; SUM — I64 value -> out_agg_type=RAY_I64 -> else branch at 621 +(set PKsum (pivot TK 'k 'c 'v sum)) +(count PKsum) -- 2 +(at (at PKsum 'x) 0) -- 60 +(at (at PKsum 'y) 0) -- 20 +(at (at PKsum 'x) 1) -- 30 +(at (at PKsum 'y) 1) -- 100 + +;; MIN — I64 value -> RAY_I64 -> non-F64 scatter (OP_MIN) +(set PKmin (pivot TK 'k 'c 'v min)) +(at (at PKmin 'x) 0) -- 10 +(at (at PKmin 'y) 0) -- 20 +(at (at PKmin 'x) 1) -- 30 +(at (at PKmin 'y) 1) -- 40 + +;; MAX — I64 value -> RAY_I64 -> non-F64 scatter (OP_MAX) +(set PKmax (pivot TK 'k 'c 'v max)) +(at (at PKmax 'x) 0) -- 50 +(at (at PKmax 'y) 0) -- 20 +(at (at PKmax 'x) 1) -- 30 +(at (at PKmax 'y) 1) -- 60 + +;; FIRST — I64 value -> RAY_I64 -> non-F64 scatter (OP_FIRST/LAST) +(set PKfirst (pivot TK 'k 'c 'v first)) +(at (at PKfirst 'x) 0) -- 10 +(at (at PKfirst 'y) 0) -- 20 + +;; LAST +(set PKlast (pivot TK 'k 'c 'v last)) +(at (at PKlast 'x) 0) -- 50 +(at (at PKlast 'y) 0) -- 20 + +;; COUNT — always I64 output +(set PKcount (pivot TK 'k 'c 'v count)) +(at (at PKcount 'x) 0) -- 2 +(at (at PKcount 'y) 0) -- 1 + +;; ==================================================================== +;; Section L: exec_if — SYM output, vector SYM then + vector SYM else +;; Exercises the SYM branch at lines 167-192 with non-scalar +;; then and else columns. Both t_arr and e_arr are SYM vecs. +;; ==================================================================== + +(set TSvec (table [x a b] (list [true false true false] (as 'SYM ["w" "x" "y" "z"]) (as 'SYM ["1" "2" "3" "4"])))) + +;; (if x a b): SYM output, pick from a when true, b when false. +(set RSvec (at (select {v: (if x a b) from: TSvec}) 'v)) +(count RSvec) -- 4 +(at RSvec 0) -- 'w +(at RSvec 1) -- '2 +(at RSvec 2) -- 'y +(at RSvec 3) -- '4 + +;; ==================================================================== +;; Section M: exec_if — STR output, vector STR then + vector STR else +;; Exercises lines 130-132 (then vector STR) and lines 153-155 +;; (else vector STR) in a single multi-row expression. +;; ==================================================================== + +(set TStrvec (table [x a b] (list [true false true false] (list "aa" "bb" "cc" "dd") (list "11" "22" "33" "44")))) + +;; (if x a b): STR output. +;; Expected: ["aa", "22", "cc", "44"] +(set RStrvec (at (select {v: (if x a b) from: TStrvec}) 'v)) +(count RStrvec) -- 4 +(at RStrvec 0) -- "aa" +(at RStrvec 1) -- "22" +(at RStrvec 2) -- "cc" +(at RStrvec 3) -- "44" + +;; ==================================================================== +;; Section N: exec_if — SYM output, SYM scalar then + SYM column else +;; Covers lines 167-192 (SYM dispatch) with scalar then and +;; vector else. promote(SYM, SYM) = SYM. +;; ==================================================================== + +(set TSymMix (table [x s] (list [true false true false] (as 'SYM ["a" "b" "c" "d"])))) + +;; (if x 'Z s): SYM output, then is SYM atom, else is SYM column. +(set RSymMix (at (select {v: (if x 'Z s) from: TSymMix}) 'v)) +(count RSymMix) -- 4 +(at RSymMix 0) -- 'Z +(at RSymMix 1) -- 'b +(at RSymMix 2) -- 'Z +(at RSymMix 3) -- 'd + +;; Reverse: (if x s 'Z): SYM output, then is SYM column, else is SYM atom. +(set RSymMix2 (at (select {v: (if x s 'Z) from: TSymMix}) 'v)) +(count RSymMix2) -- 4 +(at RSymMix2 0) -- 'a +(at RSymMix2 1) -- 'Z +(at RSymMix2 2) -- 'c +(at RSymMix2 3) -- 'Z + +;; ==================================================================== +;; Section N2: exec_if — STR output with SYM scalar then + STR vec else +;; Covers line 125-128 (SYM scalar then in STR dispatch) +;; and line 156-161 (SYM column else in STR dispatch). +;; promote(SYM, STR) = STR, so STR dispatch is used. +;; ==================================================================== + +(set TSymStr (table [x str1] (list [true false true false] (list "aa" "bb" "cc" "dd")))) + +;; (if x 'Z str1): then is SYM atom, else is STR column -> STR output. +;; cond=true rows: SYM atom 'Z -> string "Z" (line 125-128). +;; cond=false rows: STR column str1 (line 130-131). +(set RN2a (at (select {v: (if x 'Z str1) from: TSymStr}) 'v)) +(count RN2a) -- 4 +(at RN2a 0) -- "Z" +(at RN2a 1) -- "bb" +(at RN2a 2) -- "Z" +(at RN2a 3) -- "dd" + +;; Reverse: (if x str1 'Z): then is STR column, else is SYM atom. +;; cond=true rows: STR column str1 (line 130-131). +;; cond=false rows: SYM atom 'Z -> string "Z" (line 148-151). +(set RN2b (at (select {v: (if x str1 'Z) from: TSymStr}) 'v)) +(count RN2b) -- 4 +(at RN2b 0) -- "aa" +(at RN2b 1) -- "Z" +(at RN2b 2) -- "cc" +(at RN2b 3) -- "Z" + +;; ==================================================================== +;; Section O: exec_if — STR output, string-atom then + string-atom else +;; When both then and else are string literals, the planner +;; may produce SYM output (atom->sym promotion). Verify the +;; correct count and that the result values match the symbols. +;; ==================================================================== + +(set TCond (table [x] (list [true false true false true]))) + +;; (if x "yes" "no"): both are string atoms; planner promotes to SYM. +(set ROstr (at (select {v: (if x "yes" "no") from: TCond}) 'v)) +(count ROstr) -- 5 +(at ROstr 0) -- 'yes +(at ROstr 1) -- 'no +(at ROstr 2) -- 'yes +(at ROstr 3) -- 'no +(at ROstr 4) -- 'yes + +;; ==================================================================== +;; Section P: exec_pivot — AVG on I64 value column +;; Covers the F64-output, val_is_f64=false sub-branch at +;; lines 603-604: (double)ROW_RD_I64(row, ly.off_sum, s) / cnt +;; ==================================================================== + +(set TPavg (table [k c v] (list ['A 'A 'B 'B] ['x 'y 'x 'y] [10 20 30 40]))) +(set PPavg (pivot TPavg 'k 'c 'v avg)) +(count PPavg) -- 2 +(at (at PPavg 'x) 0) -- 10.0 +(at (at PPavg 'y) 0) -- 20.0 +(at (at PPavg 'x) 1) -- 30.0 +(at (at PPavg 'y) 1) -- 40.0 + +;; ==================================================================== +;; UNREACHABLE BRANCHES (documented for coverage records) +;; ==================================================================== +;; +;; Lines 52-54 (cond_v NULL/ERR guard): +;; Only fires if exec_node(cond) returns NULL or an error. This requires +;; an internal evaluation failure or OOM during expression evaluation. +;; Not triggerable from RFL without corrupted AST or memory exhaustion. +;; +;; Lines 57, 59 (then_v NULL/ERR guard): +;; Same as above for the then-branch input. +;; +;; Line 62 (else_v NULL/ERR guard): +;; Same as above for the else-branch input. +;; +;; Line 75 (result allocation failure): +;; Only fires if ray_vec_new() fails (OOM). Not triggerable from RFL. +;; +;; Lines 124, 132, 147, 155 (STR null-safety checks): +;; ray_str_vec_get() returning NULL. This would require a corrupted STR +;; vector where the internal offset table points beyond the data buffer. +;; Not triggerable from valid RFL inputs. +;; +;; Lines 202-219 (TIMESTAMP/DATE/TIME if branches): +;; The type promotion layer (promote()) returns RAY_I64 for +;; promote(TIMESTAMP, TIMESTAMP) and RAY_I32 for promote(DATE, DATE) +;; and promote(TIME, TIME). Therefore exec_if's out_type is never +;; RAY_TIMESTAMP, RAY_DATE, or RAY_TIME. These branches are dead code +;; left as a safety net for future promote() changes. +;; (Also documented in existing pivot_coverage.rfl Section 29.) diff --git a/test/rfl/sort/asc.rfl b/test/rfl/sort/asc.rfl index 07ff8a37..2102e04a 100644 --- a/test/rfl/sort/asc.rfl +++ b/test/rfl/sort/asc.rfl @@ -23,6 +23,13 @@ N -- (sum (<= (at S (til N)) (at S (+ 1 (til N))))) ;; idempotent (asc (asc V)) -- (asc V) +;; OP_ASC vector-input guard: a lazy chain whose previous op is an +;; aggregator (FIRST) yields a scalar when materialised; the +;; subsequent OP_ASC executor must reject the non-vec input rather +;; than dispatching to asc_vec_eager. Covers exec.c L1061. +(asc (first [1 2 3 4])) !- type +(asc (last [1 2 3 4])) !- type + ;; ────────────────────────────────────────────────────────────────── ;; Float / negatives / mixed ;; ────────────────────────────────────────────────────────────────── @@ -58,3 +65,15 @@ N -- (sum (<= (at S (til N)) (at S (+ 1 (til N))))) (asc (asc V2)) -- (asc V2) (reverse (asc V2)) -- (desc V2) + +;; ---- RAY_IS_ERR propagation: asc(erroring arg) ---- +;; asc must propagate inner errors via RAY_IS_ERR rather than dispatch +;; to its sort path. +(asc (round "x")) !- type +(asc (+ "abc" 1)) !- type +(asc (- 'a 1)) !- type +(asc (sqrt 'foo)) !- type +;; deep chain: round → asc → at, multiple propagation guards +(at (asc (round "x")) 0) !- type +;; xasc on erroring table +(xasc (table [a] (list (round "x"))) 'a) !- type diff --git a/test/rfl/sort/asc_strings.rfl b/test/rfl/sort/asc_strings.rfl index 857ecc33..2e81dc9f 100644 --- a/test/rfl/sort/asc_strings.rfl +++ b/test/rfl/sort/asc_strings.rfl @@ -7,3 +7,17 @@ ;; asc on sym vectors (asc ['c 'a 'b]) -- ['a 'b 'c] + +;; ─── edge cases: empty / single / all-equal sym & string ──────────── +;; empty SYM vec +(asc (as 'SYM (list))) -- (as 'SYM (list)) +;; single-element sym +(asc ['only]) -- ['only] +;; all-equal sym +(asc ['x 'x 'x]) -- ['x 'x 'x] +;; long strings (>12 bytes pool path) +(asc ["banana_pie_recipe" "apple_pie_dough" "cherry_pie_glaze"]) -- ["apple_pie_dough" "banana_pie_recipe" "cherry_pie_glaze"] +;; mixed short+long strings +(asc ["short" "this_is_a_long_string"]) -- ["short" "this_is_a_long_string"] +;; all-empty strings +(asc ["" "" ""]) -- ["" "" ""] diff --git a/test/rfl/sort/desc.rfl b/test/rfl/sort/desc.rfl index 89e67b64..772de5f0 100644 --- a/test/rfl/sort/desc.rfl +++ b/test/rfl/sort/desc.rfl @@ -3,6 +3,14 @@ (desc [3 1 2]) -- [3 2 1] (desc []) -- [] +;; ─── edge cases: single-element / all-equal / negatives ────────────── +(desc [42]) -- [42] +(desc [7 7 7]) -- [7 7 7] +(desc [-1 -3 -2]) -- [-1 -2 -3] +(desc [-5 -1 -10 0 -3]) -- [0 -1 -3 -5 -10] +(desc [3.14 1.41 2.71]) -- [3.14 2.71 1.41] +(desc [5.0 5.0 5.0]) -- [5.0 5.0 5.0] + ;; reverse of asc (set V (rand 200 10000)) (desc V) -- (reverse (asc V)) @@ -14,3 +22,15 @@ N -- (sum (>= (at S (til N)) (at S (+ 1 (til N))))) (first (desc V)) -- (max V) (last (desc V)) -- (min V) + +;; OP_DESC vector-input guard: lazy chain whose previous op is an +;; aggregator yields a scalar at materialisation; OP_DESC must +;; reject the non-vec input. Covers exec.c L1073. +(desc (first [1 2 3 4])) !- type +(desc (last [1 2 3 4])) !- type + +;; ---- RAY_IS_ERR propagation: desc(erroring arg) ---- +(desc (round "x")) !- type +(desc (+ "abc" 1)) !- type +(desc (sqrt 'foo)) !- type +(desc (- 'a 1)) !- type diff --git a/test/rfl/sort/iasc.rfl b/test/rfl/sort/iasc.rfl index 428deffe..b86d4809 100644 --- a/test/rfl/sort/iasc.rfl +++ b/test/rfl/sort/iasc.rfl @@ -20,3 +20,13 @@ ;; negatives (iasc [-1 -3 -2]) -- [1 2 0] + +;; ─── edge cases: empty / all-equal / atoms ────────────────────────── +;; empty vec → empty index vec +(iasc (as 'I64 (list))) -- (as 'I64 (list)) +;; all-equal: stable sort returns input order (0..n-1) +(iasc [7 7 7 7]) -- [0 1 2 3] +(iasc (as 'F64 [3.14 3.14 3.14])) -- [0 1 2] +;; single-element +(iasc [42]) -- [0] +(iasc [3.14]) -- [0] diff --git a/test/rfl/sort/idesc.rfl b/test/rfl/sort/idesc.rfl index a2b1934c..27c8a267 100644 --- a/test/rfl/sort/idesc.rfl +++ b/test/rfl/sort/idesc.rfl @@ -8,3 +8,15 @@ ;; is a permutation of (til n) (asc (idesc V)) -- (til (count V)) + +;; ─── edge cases: empty / single / all-equal ────────────────────────── +;; empty vec +(idesc (as 'I64 (list))) -- (as 'I64 (list)) +;; single-element +(idesc [42]) -- [0] +(idesc [3.14]) -- [0] +;; all-equal: stable sort returns input order (0..n-1) +(idesc [7 7 7 7]) -- [0 1 2 3] +(idesc (as 'F64 [3.14 3.14 3.14])) -- [0 1 2] +;; negatives +(idesc [-1 -3 -2]) -- [0 2 1] diff --git a/test/rfl/sort/sort_coverage2.rfl b/test/rfl/sort/sort_coverage2.rfl index af20a088..21b611b7 100644 --- a/test/rfl/sort/sort_coverage2.rfl +++ b/test/rfl/sort/sort_coverage2.rfl @@ -432,3 +432,64 @@ (count (asc Vnreal)) -- 8192 (count (desc Vnreal)) -- 8192 +;; ──────────────────────────────────────────────────────────────────── +;; 25. NARROW-WIDTH SYM sort (W8) — radix_encode_fn RAY_SYM arm +;; dispatched via ray_read_sym for narrow widths. +;; +;; The CSV loader narrows SYM cols to W8 when distinct count ≤ 255. +;; A small 3-distinct-id SYM column from CSV is W8. Sorting it must +;; exercise the W8 read path inside the SYM encode loop (sort.c:1010). +;; Take/replicate to 8192 to force the radix path (>RADIX_SORT_THRESHOLD). +;; ──────────────────────────────────────────────────────────────────── +(.sys.exec "printf 'id,s\n1,zz\n2,aa\n3,mm\n4,aa\n5,zz\n6,mm\n' > /tmp/rfl_sort_symw8.csv") -- 0 +(set Tsw8 (.csv.read [I64 SYMBOL] "/tmp/rfl_sort_symw8.csv")) +(count Tsw8) -- 6 +(type (at Tsw8 's)) -- 'SYM + +;; Small-N asc/desc on narrow SYM +(set Vw8 (at Tsw8 's)) +(at (asc Vw8) 0) -- 'aa +(at (desc Vw8) 0) -- 'zz +(count (asc Vw8)) -- 6 +(count (desc Vw8)) -- 6 + +;; Large-N: take to 8192 to drive radix path through narrow SYM read +(set Vw8big (take Vw8 8192)) +(count (asc Vw8big)) -- 8192 +(count (desc Vw8big)) -- 8192 +(at (asc Vw8big) 0) -- 'aa +(at (take (desc Vw8big) -1) 0) -- 'aa + +;; Multi-key sort with narrow SYM as one of the keys. Forces narrow +;; SYM encode within composite radix path. +(set Tsw8x (table [k s v] (list (as 'I32 (% (til 8192) 7)) (take Vw8 8192) (as 'I64 (til 8192))))) +(count (xasc Tsw8x ['s 'k])) -- 8192 +(count (xdesc Tsw8x ['s 'k])) -- 8192 + +;; Sort with NULLS FIRST/LAST on narrow SYM via splayed round-trip. +;; SYM has 0 as null id; CSV path won't emit nulls, but the splayed +;; round-trip is the public route to confirm the narrow-width column +;; participates in mixed-attribute sorts. +(.db.splayed.set "/tmp/rfl_sort_symw8_db/" Tsw8) +(set Tsw8r (.db.splayed.get "/tmp/rfl_sort_symw8_db/")) +(count Tsw8r) -- 6 +(type (at Tsw8r 's)) -- 'SYM +(at (asc (at Tsw8r 's)) 0) -- 'aa +(.sys.exec "rm -rf /tmp/rfl_sort_symw8.csv /tmp/rfl_sort_symw8_db") -- 0 + +;; ──────────────────────────────────────────────────────────────────── +;; 26. NARROW-WIDTH SYM topk / fused sort+take (single-key SYM topk) +;; Exercises topk_indices_cmp_single → topk_indices_cmp with +;; SYM comparator heap on narrow-width data. +;; ──────────────────────────────────────────────────────────────────── +(.sys.exec "printf 'id,s\n1,zz\n2,aa\n3,mm\n4,bb\n5,cc\n6,dd\n7,ee\n8,ff\n9,gg\n10,hh\n11,ii\n12,jj\n13,kk\n14,ll\n15,nn\n16,oo\n17,pp\n18,qq\n19,rr\n20,ss\n' > /tmp/rfl_sort_symtopk.csv") -- 0 +(set Ttk (.csv.read [I64 SYMBOL] "/tmp/rfl_sort_symtopk.csv")) +(type (at Ttk 's)) -- 'SYM +;; Take first 3 after sort — single-key SYM goes through cmp-heap topk +(set Rtk3 (select {from: Ttk asc: s take: 3})) +(count Rtk3) -- 3 +(at (at Rtk3 's) 0) -- 'aa +(at (at Rtk3 's) 1) -- 'bb +(at (at Rtk3 's) 2) -- 'cc +(.sys.exec "rm -f /tmp/rfl_sort_symtopk.csv") -- 0 + diff --git a/test/rfl/strop/split.rfl b/test/rfl/strop/split.rfl index 0b0addb7..a50dd4d6 100644 --- a/test/rfl/strop/split.rfl +++ b/test/rfl/strop/split.rfl @@ -53,3 +53,14 @@ ;; Empty vectors and indices (split [] []) -- null (split [1 2 3] []) -- null + +;; ── error-guard regression: split type guards (strop.c 277, 280) ── +;; First-arg `str` must be string atom / sym / list / vector; numeric +;; atom falls through both vector and list dispatches and hits the +;; "else return ray_error("type", NULL)" on line 277. +(split 42 ",") !- type +(split 1.5 ",") !- type +;; First-arg is a valid string; delim must be string atom — numeric +;; delim falls through and hits the "type" guard on line 280. +(split "abc" 42) !- type +(split "abc" 1.5) !- type diff --git a/test/rfl/strop/strlen.rfl b/test/rfl/strop/strlen.rfl index 80aa48eb..815b42b2 100644 --- a/test/rfl/strop/strlen.rfl +++ b/test/rfl/strop/strlen.rfl @@ -2,3 +2,13 @@ (strlen "") -- 0 (strlen 'alpha) -- 5 (sum (strlen ["aa" "bbb" ""])) -- 5 + +;; ── error-guard regression: type guards on non-string inputs ── +;; Atom fall-through (ray_strlen_fn line 152): numeric atom is not +;; string/sym/list/vec/MAPCOMMON/PARTED — fails the final dispatch. +(strlen 42) !- type +(strlen 1.5) !- type +;; Vec fall-through (strlen_vec line 65): vector of non-string type. +(strlen [1 2 3]) !- type +(strlen [1.0 2.0]) !- type +(strlen [true false]) !- type diff --git a/test/rfl/strop/strlen_partitioned.rfl b/test/rfl/strop/strlen_partitioned.rfl index 4ba5c2b2..9acd340b 100644 --- a/test/rfl/strop/strlen_partitioned.rfl +++ b/test/rfl/strop/strlen_partitioned.rfl @@ -174,6 +174,27 @@ (at (strlen (at Ppi 'tag)) 4) -- 2 (at (strlen (at Ppi 'tag)) 5) -- 3 +;; ════════════════════════════════════════════════════════════════ +;; 6. Error-guard coverage: strlen on non-string MAPCOMMON / PARTED. +;; +;; • strlen_mapcommon line 92: keys vec is not RAY_STR/RAY_SYM +;; (date-shaped partition dirs → MAPCOMMON of DATE keys). +;; • strlen_parted line 119: parted base type is not RAY_STR/RAY_SYM +;; (numeric data column under a parted root). +;; ════════════════════════════════════════════════════════════════ +;; Reuse the rf_test_strlen_parted_sym fixture: its 'date column is a +;; MAPCOMMON of DATE keys, and 'v is a parted I64 data column. +(set PB-A (table [v] (list [10 20]))) +(set PB-B (table [v] (list [30]))) +(.db.splayed.set "rf_test_strlen_badtype/2024.01.01/t/" PB-A) +(.db.splayed.set "rf_test_strlen_badtype/2024.01.02/t/" PB-B) +(set Pb (.db.parted.get "rf_test_strlen_badtype/" 't)) +;; MAPCOMMON-DATE key column → strlen_mapcommon type guard (line 92). +(strlen (at Pb 'date)) !- type +;; PARTED I64 data column → strlen_parted type guard (line 119). +(strlen (at Pb 'v)) !- type +(.sys.exec "rm -rf rf_test_strlen_badtype") + ;; ────────────── teardown ────────────── (.sys.exec "rm -rf rf_test_strlen_mc_sym rf_test_strlen_mc_long rf_test_strlen_parted_sym rf_test_strlen_parted_date rf_test_strlen_parted_int") diff --git a/test/rfl/strop/strop_branch_cov.rfl b/test/rfl/strop/strop_branch_cov.rfl new file mode 100644 index 00000000..7e65cf28 --- /dev/null +++ b/test/rfl/strop/strop_branch_cov.rfl @@ -0,0 +1,235 @@ +;; strop_branch_cov.rfl — targeted branch coverage for src/ops/strop.c +;; +;; Uncovered branches at baseline (62.45% branch cov, lines referenced): +;; 69, 89, 96, 104, 105, 109, 123, 131, 134, +;; 158, 160, 162, 177, 216, 221, 238, 239, 240, 258, +;; 275, 280, 283, 285, 289, 290, 298, 301, 310, 314, 452 +;; +;; Each section documents which line/branch it targets. + +;; ════════════════════════════════════════════════════════════════════ +;; 1. split with SYM atom first arg (line 275) +;; +;; The string-string split path at line 274-275 dispatches to the +;; -RAY_SYM branch when str->type == -RAY_SYM. The sym string is +;; resolved via ray_sym_str, then split proceeds as normal string split. +;; Already partially covered by split.rfl line 32-33 but let us +;; exercise the branch more explicitly with a multi-char delimiter. +;; ════════════════════════════════════════════════════════════════════ +(split 'hello_world "_") -- (list "hello" "world") +(split 'foobar "ob") -- (list "fo" "ar") +(split 'abc "x") -- (list "abc") + +;; ════════════════════════════════════════════════════════════════════ +;; 2. split with empty delimiter (line 285, dlen == 0) +;; +;; When the delimiter is "", dlen == 0 triggers the early-return path +;; at line 285-291. The whole string is returned as a single-element +;; list. This also exercises the sym_str_s cleanup at lines 289-290 +;; when the first arg is a SYM (sym_str_s != NULL). +;; ════════════════════════════════════════════════════════════════════ +(split "hello" "") -- (list "hello") +(split 'hello "") -- (list "hello") + +;; ════════════════════════════════════════════════════════════════════ +;; 3. split: SYM atom + sym_str_s/sym_str_d cleanup (lines 289, 290, 313, 314) +;; +;; When str is -RAY_SYM, sym_str_s is set (line 275). sym_str_d is +;; always NULL (never assigned since delim goes through -RAY_STR path +;; only). The release guards at 289/290/313/314 run when the split +;; returns normally. Exercise with a SYM + non-empty delim to run +;; lines 313-314 (the tail cleanup after the main loop). +;; ════════════════════════════════════════════════════════════════════ +(split 'a_b_c "_") -- (list "a" "b" "c") +(split 'ab__cd "__") -- (list "ab" "cd") + +;; ════════════════════════════════════════════════════════════════════ +;; 4. List split with I16/I32 indices (line 158 branches) +;; +;; The list-split dispatch at line 157-158 checks +;; delim->type == RAY_I64 || delim->type == RAY_I16 || delim->type == RAY_I32 +;; Existing tests only cover I64 indices on lists. I16 ('h' suffix) +;; and I32 ('i' suffix) exercise the other OR branches. +;; ════════════════════════════════════════════════════════════════════ +(split (list 1 2 3 4 5 6 "asdf" 9.33) [0h 2h 4h]) -- (list [1 2] [3 4] (list 5 6 "asdf" 9.33)) + +;; ════════════════════════════════════════════════════════════════════ +;; 5. List split with empty indices: nidx == 0 (line 160) +;; +;; Empty I64 index vec on a LIST (not a vector). Line 160 returns +;; NULL for empty indices. The existing tests (split [] []) exercise +;; the VECTOR path at line 218, not the LIST path at line 160. +;; ════════════════════════════════════════════════════════════════════ +(split (list 1 "a" 3.0) []) -- null + +;; ════════════════════════════════════════════════════════════════════ +;; 6. List split: seglen < 0 (line 177) +;; +;; When indices are descending (e.g. [4 2]), end < start, so +;; seglen = end - start < 0. The guard clamps to 0. Result segment +;; is an empty list. +;; ════════════════════════════════════════════════════════════════════ +(count (split (list 1 2 3 4 5) [4 2])) -- 2 +(count (at (split (list 1 2 3 4 5) [4 2]) 0)) -- 0 +(count (at (split (list 1 2 3 4 5) [4 2]) 1)) -- 3 + +;; ════════════════════════════════════════════════════════════════════ +;; 7. String split by indices: boundary cases (lines 238-240) +;; +;; Line 238: seglen < 0 when indices descend (e.g. "hello" [4 2]). +;; Line 239: start > total (e.g. "hello" [10]). +;; Line 240: start + seglen > total (e.g. "he" [1 999]). +;; ════════════════════════════════════════════════════════════════════ +;; Descending indices → seglen < 0 → clamped to 0, produces empty string. +(count (split "hello" [4 2])) -- 2 +(at (split "hello" [4 2]) 0) -- "" +(at (split "hello" [4 2]) 1) -- "llo" + +;; start > total → start clamped to total, seglen → 0 → empty string. +(at (split "hi" [10]) 0) -- "" + +;; start + seglen > total → seglen clamped. +(at (split "hi" [1]) 0) -- "i" + +;; ════════════════════════════════════════════════════════════════════ +;; 8. Vector split by indices: seglen < 0 (line 258) +;; +;; Same descending-index trick on a typed vector. +;; ════════════════════════════════════════════════════════════════════ +(count (split [10 20 30 40 50] [4 2])) -- 2 +(count (at (split [10 20 30 40 50] [4 2]) 0)) -- 0 +(count (at (split [10 20 30 40 50] [4 2]) 1)) -- 3 + +;; ════════════════════════════════════════════════════════════════════ +;; 9. String/vec split: I16 and I32 index types (line 216) +;; +;; The vec/string split dispatch at line 215-216 checks +;; delim->type == RAY_I64 || delim->type == RAY_I16 || delim->type == RAY_I32 +;; String split with I16 indices: +;; ════════════════════════════════════════════════════════════════════ +(split "abcdef" [0h 3h]) -- (list "abc" "def") + +;; ════════════════════════════════════════════════════════════════════ +;; 9b. String split: start + seglen > total (line 240) +;; +;; With "hi" (len 2) and indices [0 4], segment 0 has start=0, +;; end=4 (from idx_buf[1]), seglen=4. seglen >= 0 (line 238 not hit). +;; start=0 <= 2 (line 239 not hit). But start + seglen = 4 > 2 +;; → seglen clamped to total - start = 2 (line 240 taken). +;; ════════════════════════════════════════════════════════════════════ +(at (split "hi" [0 4]) 0) -- "hi" + +;; ════════════════════════════════════════════════════════════════════ +;; 9c. split: SYM str + non-string delim (line 280 sym_str_s release) +;; +;; When str is -RAY_SYM, sym_str_s is set at line 275. If delim is +;; not -RAY_STR, line 280 runs `if (sym_str_s) ray_release(sym_str_s)` +;; before returning the type error. +;; ════════════════════════════════════════════════════════════════════ +(split 'abc 42) !- type + +;; ════════════════════════════════════════════════════════════════════ +;; 10. like: type error on non-string pattern (line 324) +;; +;; pattern must be -RAY_STR. Numeric pattern hits the type guard. +;; ════════════════════════════════════════════════════════════════════ +(like "hello" 42) !- type +(like "hello" 'sym) !- type + +;; ════════════════════════════════════════════════════════════════════ +;; 11. like: type error on non-string/sym input (line 493) +;; +;; Input x is neither atom string/sym nor vec STR/SYM. +;; ════════════════════════════════════════════════════════════════════ +(like 42 "hello") !- type +(like [1 2 3] "hello") !- type + +;; ════════════════════════════════════════════════════════════════════ +;; 12. like: SYM atom with SHAPE_NONE pattern (line 347-348) +;; +;; When use_simple is false (SHAPE_NONE), the atom SYM path at +;; line 347-348 takes the ray_glob_match branch instead of +;; ray_glob_match_compiled. Use a pattern with `?` or `[` to force +;; SHAPE_NONE. +;; ════════════════════════════════════════════════════════════════════ +(like 'hello "h?llo") -- true +(like 'hello "[hH]ello") -- true +(like 'world "[hH]ello") -- false +(like 'abc "a?c") -- true +(like 'abc "a??c") -- false + +;; ════════════════════════════════════════════════════════════════════ +;; 13. like: STR vector with SHAPE_NONE pattern (line 484-485) +;; +;; When the pattern forces SHAPE_NONE, the STR vector path at +;; line 484-485 uses ray_glob_match instead of ray_glob_match_compiled. +;; ════════════════════════════════════════════════════════════════════ +(sum (like ["hello" "world" "hi"] "h?llo")) -- 1 +(sum (like ["abc" "def" "axc"] "[aA]?c")) -- 2 + +;; ════════════════════════════════════════════════════════════════════ +;; 14. like: SYM vector W64 with SHAPE_NONE (line 452, DICT_PASS/ROW_PASS) +;; +;; Small in-RFL SYM vecs are W64. SHAPE_NONE forces the general +;; matcher path in the DICT_PASS macro at line 424-426. +;; ════════════════════════════════════════════════════════════════════ +(sum (like ['hello 'world 'hi] "h?llo")) -- 1 +(sum (like ['abc 'def 'axc] "[aA]?c")) -- 2 + +;; ════════════════════════════════════════════════════════════════════ +;; 15. strlen: SYM vector (strlen_vec line 54-58) +;; +;; strlen_vec's SYM branch reads sym IDs via ray_read_sym. In-RFL +;; SYM vectors are W64. This exercises the strlen_vec_value SYM path. +;; ════════════════════════════════════════════════════════════════════ +(sum (strlen ['abc 'de 'f])) -- 6 +(sum (strlen ['hello 'world])) -- 10 + +;; ════════════════════════════════════════════════════════════════════ +;; 16. strlen: STR vector with nulls (strlen_vec lines 72-78) +;; +;; strlen_vec checks has_nulls && ray_vec_is_null for each element. +;; Build a STR vec with null via cast: (as 'STR [1 0Nl 3]). +;; ════════════════════════════════════════════════════════════════════ +(at (strlen (as 'STR [1 0Nl 3])) 0) -- 1 +(nil? (at (strlen (as 'STR [1 0Nl 3])) 1)) -- true +(at (strlen (as 'STR [1 0Nl 3])) 2) -- 1 + +;; ════════════════════════════════════════════════════════════════════ +;; 17. Reachability notes — unreachable branches +;; +;; The following branches are NOT reachable from RFL test fixtures +;; and remain uncovered by design: +;; +;; OOM guards (internal memory-pressure paths): +;; Line 69: strlen_vec: ray_vec_new returns NULL/error. +;; Line 96: strlen_mapcommon: ray_vec_new returns NULL/error. +;; Line 123: strlen_parted: ray_vec_new returns NULL/error. +;; Line 283: split: ray_list_new returns error. +;; Line 298, 301, 310: split loop: ray_str/ray_list_append error. +;; Lines 461-476: like SYM vec OOM fallback (lut/seen scratch_alloc +;; fails). Only reachable under extreme memory pressure. +;; +;; Structural guards (no public API to produce required input): +;; Line 89: strlen_mapcommon: keys/counts NULL/error. Partition +;; columns from .db.parted.get are always well-formed. +;; Line 104-105, 109: strlen_mapcommon null-key. Partition keys +;; never carry HAS_NULLS (see strlen_partitioned.rfl). +;; Line 131: strlen_parted: NULL/error segment. Segments from +;; .db.parted.get are always valid. +;; Line 134: strlen_parted null-in-segment. SYM segments from +;; splayed tables don't carry HAS_NULLS unless CSV with +;; empty SYM fields (RFL syntax cannot produce this). +;; +;; Protective limit guards: +;; Line 162: List split nidx > 256 (257+ element index vector). +;; Line 221: Vec/string split nidx > 256 (same limit). +;; +;; Dead code: +;; Line 290: `if (sym_str_d)` — sym_str_d is initialized to NULL at +;; line 273 and never assigned thereafter. Always false. +;; Line 314: `if (sym_str_d)` — same dead code in tail cleanup. +;; Line 452: `default:` case in SYM width switch. The RAY_SYM_W_MASK +;; exhausts W8/W16/W32/W64; no other value is possible. +;; The `default:` is a defensive catch-all. +;; ════════════════════════════════════════════════════════════════════ diff --git a/test/rfl/symbol/sym_coverage.rfl b/test/rfl/symbol/sym_coverage.rfl index 5bf2d4ec..80c66665 100644 --- a/test/rfl/symbol/sym_coverage.rfl +++ b/test/rfl/symbol/sym_coverage.rfl @@ -196,3 +196,63 @@ (count _R11) -- 1 (at (at _R11 's) 0) -- 'a.b.c.d.e (.sys.exec "rm -rf /tmp/rfl_sym_cov_edge2") -- 0 + +;; ═══════════════════════════════════════════════════════════════════ +;; 12. Narrow-width SYM (W8/W16) operations via CSV ingest +;; +;; `.csv.read [I64 SYMBOL]` narrows the SYM column to W8 (<=255 +;; distinct ids), exercising: +;; - dict.c ray_dict_find_sym W8 arm (used by typed-dict probes) +;; - vec.c sym_elem_size narrow path +;; - ops/sort.c radix_encode_fn RAY_SYM with W8 ray_read_sym +;; - ops/group.c read_col_i64 W8 SYM arm +;; Confirms type-preservation: distinct/first/last yield SYM atoms. +;; ═══════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'id,s\n1,red\n2,blue\n3,green\n4,red\n5,blue\n6,red\n7,green\n8,blue\n' > /tmp/rfl_sym_w8_csv.csv") -- 0 +(set _Tw8 (.csv.read [I64 SYMBOL] "/tmp/rfl_sym_w8_csv.csv")) +(count _Tw8) -- 8 +(type (at _Tw8 's)) -- 'SYM + +;; Distinct on narrow SYM column +(set _d8 (distinct (at _Tw8 's))) +(count _d8) -- 3 +(type _d8) -- 'SYM + +;; Sort the narrow SYM column (radix path; SYM encode uses ray_read_sym) +(set _a8 (asc (at _Tw8 's))) +(at _a8 0) -- 'blue +(at (take _a8 -1) 0) -- 'red + +;; first / last preserve SYM atom from narrow-width column +(first (at _Tw8 's)) -- 'red +(last (at _Tw8 's)) -- 'blue +(type (first (at _Tw8 's))) -- 'sym +(type (last (at _Tw8 's))) -- 'sym + +;; group-by aggregation on narrow SYM key +(set _R8 (select {c: (count id) from: _Tw8 by: s asc: s})) +(count _R8) -- 3 +(at (at _R8 's) 0) -- 'blue +(at (at _R8 'c) 0) -- 3 ;; blue: rows 2,5,8 + +;; Mixed-width compare: narrow W8 SYM vs literal sym atom from W64 table +(== (first (at _Tw8 's)) 'red) -- true +(== (last (at _Tw8 's)) 'blue) -- true +;; In-membership against literal symbol list +(in 'green (at _Tw8 's)) -- true +(in 'purple (at _Tw8 's)) -- false + +;; Sort table by narrow SYM column + numeric tie-breaker +(set _SortW8 (xasc _Tw8 ['s 'id])) +(count _SortW8) -- 8 +(at (at _SortW8 's) 0) -- 'blue +(at (at _SortW8 'id) 0) -- 2 + +;; Splayed round-trip preserves narrow width; sort/agg on reloaded SYM +;; still hits W8 ray_read_sym. +(.db.splayed.set "/tmp/rfl_sym_w8_db/" _Tw8) +(set _Tw8r (.db.splayed.get "/tmp/rfl_sym_w8_db/")) +(count _Tw8r) -- 8 +(type (at _Tw8r 's)) -- 'SYM +(at (asc (at _Tw8r 's)) 0) -- 'blue +(.sys.exec "rm -rf /tmp/rfl_sym_w8_csv.csv /tmp/rfl_sym_w8_db") -- 0 diff --git a/test/rfl/system/part.rfl b/test/rfl/system/part.rfl index ddba95be..16c270d7 100644 --- a/test/rfl/system/part.rfl +++ b/test/rfl/system/part.rfl @@ -16,7 +16,7 @@ ;; on the second open. ;; ────────────── pre-flight cleanup ────────────── -(.sys.exec "rm -rf /tmp/rfl_part_date /tmp/rfl_part_int /tmp/rfl_part_sym /tmp/rfl_part_single /tmp/rfl_part_empty /tmp/rfl_part_missing /tmp/rfl_part_three /tmp/rfl_part_cd /tmp/rfl_part_cd_sym /tmp/rfl_part_minute /tmp/rfl_part_like") +(.sys.exec "rm -rf /tmp/rfl_part_date /tmp/rfl_part_int /tmp/rfl_part_sym /tmp/rfl_part_single /tmp/rfl_part_empty /tmp/rfl_part_missing /tmp/rfl_part_three /tmp/rfl_part_cd /tmp/rfl_part_cd_sym /tmp/rfl_part_minute /tmp/rfl_part_like /tmp/rfl_part_mc /tmp/rfl_part_mb") (.sys.exec "mkdir -p /tmp/rfl_part_empty") ;; ────────────── date-partition path: RAY_MC_DATE branch ────────────── @@ -198,5 +198,132 @@ (.db.parted.get "/tmp/rfl_part_date/" "t") !- type (.db.parted.get "/tmp/rfl_part_date/") !- domain +;; ────────────── MAPCOMMON-only GROUP BY (n_part_keys==0 branch) ────────────── +;; When the only GROUP BY key is MAPCOMMON (e.g. `by: date` on a parted table), +;; exec_group_per_partition fires with n_part_keys=0 — each partition yields a +;; single group. Exercises: +;; - LIMIT pushdown branch (line 8366-8367) when group_limit < n_parts +;; - is_mc=1 branch in key-column concat (line 8603-8612) +;; - mc_col tref selection (line 8576-8578) +;; - Cardinality gate MAPCOMMON skip (line 8710) +(set MC-A (table [k v] (list [1 2 3 4 5] [10.0 20.0 30.0 40.0 50.0]))) +(set MC-B (table [k v] (list [1 2 3] [100.0 200.0 300.0]))) +(set MC-C (table [k v] (list [1 2 3 4] [11.0 22.0 33.0 44.0]))) +(.db.splayed.set "/tmp/rfl_part_mc/2024.01.01/t/" MC-A) +(.db.splayed.set "/tmp/rfl_part_mc/2024.01.02/t/" MC-B) +(.db.splayed.set "/tmp/rfl_part_mc/2024.01.03/t/" MC-C) +(set Pmc (.db.parted.get "/tmp/rfl_part_mc/" 't)) +;; GROUP BY date (MAPCOMMON) — 3 result rows, one per partition. +(set Rmc (select {s: (sum v) c: (count v) from: Pmc by: date})) +(count Rmc) -- 3 +(at Rmc 'date) -- [2024.01.01 2024.01.02 2024.01.03] +(at Rmc 's) -- [150.0 600.0 110.0] +(at Rmc 'c) -- [5 3 4] +;; LIMIT pushdown: take=2 with MAPCOMMON-only key → n_parts limited to 2. +(set Rmc2 (select {s: (sum v) from: Pmc by: date take: 2})) +(count Rmc2) -- 2 +(at Rmc2 'date) -- [2024.01.01 2024.01.02] +(at Rmc2 's) -- [150.0 600.0] + +;; ────────────── Parted SELECT: multi-aggregate combo ────────────── +;; SUM+AVG+MIN+MAX+FIRST+LAST in one query exercises every parted +;; aggregate dispatch arm in a single per-partition pass. +(set Rmc_multi (select {s: (sum v) a: (avg v) mn: (min v) mx: (max v) f: (first v) l: (last v) from: Pmc by: date})) +(count Rmc_multi) -- 3 +;; Day 1: sum=150, avg=30, min=10, max=50, first=10, last=50 +(at Rmc_multi 's) -- [150.0 600.0 110.0] +(at Rmc_multi 'mn) -- [10.0 100.0 11.0] +(at Rmc_multi 'mx) -- [50.0 300.0 44.0] +(at Rmc_multi 'f) -- [10.0 100.0 11.0] +(at Rmc_multi 'l) -- [50.0 300.0 44.0] + +;; ────────────── Parted SELECT: WHERE on parted column with parted GROUP BY ── +;; WHERE on parted column exercises query_materialize_parted_col / flatten +;; path. Group by k (parted), no MAPCOMMON. Day 1 k>=3 → [3,4,5]; Day 2 → [3]; +;; Day 3 → [3,4]. Final groups by k: k=3 has v={30,300,33}, k=4 has v={40,44}, +;; k=5 has v={50}. +(set Rmc_wk (select {s: (sum v) c: (count v) from: Pmc where: (>= k 3) by: k})) +(count Rmc_wk) -- 3 +(at Rmc_wk 'k) -- [3 4 5] +(at Rmc_wk 's) -- [363.0 84.0 50.0] +(at Rmc_wk 'c) -- [3 2 1] + +;; ────────────── Parted SELECT: GROUP BY both MAPCOMMON + parted ────────── +;; Mixed key set: date (MAPCOMMON) + k (parted). +;; Triggers branch where some keys are MC and others are parted in +;; exec_group_per_partition (n_part_keys>0 AND n_mc_keys>0). +(set Rmix (select {s: (sum v) from: Pmc by: [date k]})) +;; row count = sum over days of distinct ks per day = 5 + 3 + 4 = 12 +(count Rmix) -- 12 +(sum (at Rmix 's)) -- 860.0 + +;; ────────────── 10-partition path: MERGE_BATCH boundary (batches loop > 1) ── +;; MERGE_BATCH=8, so with 10 partitions the streaming-batch outer loop runs +;; twice (batch_start=0 covers 8, batch_start=8 covers 2). Exercises the +;; "running" merge branch (line 8574-8598, 8629, 8641-8647). +(set MP-1 (table [k v] (list [1 1 2] [10 20 30]))) +(set MP-2 (table [k v] (list [1 2 3] [40 50 60]))) +(set MP-3 (table [k v] (list [2 3 3] [70 80 90]))) +(set MP-4 (table [k v] (list [1 2 3] [11 12 13]))) +(set MP-5 (table [k v] (list [1 1 2] [21 22 23]))) +(set MP-6 (table [k v] (list [2 2 3] [31 32 33]))) +(set MP-7 (table [k v] (list [1 3] [41 42]))) +(set MP-8 (table [k v] (list [1 2] [51 52]))) +(set MP-9 (table [k v] (list [3 3] [61 62]))) +(set MP-A (table [k v] (list [1 2 3 1] [71 72 73 74]))) +(.db.splayed.set "/tmp/rfl_part_mb/2024.01.01/t/" MP-1) +(.db.splayed.set "/tmp/rfl_part_mb/2024.01.02/t/" MP-2) +(.db.splayed.set "/tmp/rfl_part_mb/2024.01.03/t/" MP-3) +(.db.splayed.set "/tmp/rfl_part_mb/2024.01.04/t/" MP-4) +(.db.splayed.set "/tmp/rfl_part_mb/2024.01.05/t/" MP-5) +(.db.splayed.set "/tmp/rfl_part_mb/2024.01.06/t/" MP-6) +(.db.splayed.set "/tmp/rfl_part_mb/2024.01.07/t/" MP-7) +(.db.splayed.set "/tmp/rfl_part_mb/2024.01.08/t/" MP-8) +(.db.splayed.set "/tmp/rfl_part_mb/2024.01.09/t/" MP-9) +(.db.splayed.set "/tmp/rfl_part_mb/2024.01.10/t/" MP-A) +(set Pmb (.db.parted.get "/tmp/rfl_part_mb/" 't)) +;; SUM(v) by k — per-partition path with MERGE_BATCH iterations. +(set Rmb (select {s: (sum v) c: (count v) from: Pmb by: k})) +(count Rmb) -- 3 +(at Rmb 'k) -- [1 2 3] +;; Total per-partition sums: +;; k=1: 10+20+40+11+21+22+41+51+71+74 = 361 (10 rows) +;; k=2: 30+50+70+12+23+31+32+52+72 = 372 (9 rows) +;; k=3: 60+80+90+13+33+42+61+62+73 = 514 (9 rows) +(at Rmb 's) -- [361 372 514] +(at Rmb 'c) -- [10 9 9] + +;; MAPCOMMON-only with 10 partitions exercises per-partition n_part_keys==0 +;; path WITHOUT LIMIT pushdown (group_limit=0). +(set Rmbd (select {s: (sum v) from: Pmb by: date})) +(count Rmbd) -- 10 +;; Sum of per-day sums = sum of all v = sum of [10..30] + [40..60] + ... +;; Total = grand-sum from Rmb = 361+372+514 = 1247 +(sum (at Rmbd 's)) -- 1247 + +;; ────────────── Parted: scalar aggregates without GROUP BY ───────────── +;; Exercises parted aggregate dispatch in ray_sum_fn / ray_avg_fn / etc. +;; without per-partition GROUP BY decomposition. +(sum (at Pmb 'v)) -- 1247 +(avg (at Pmb 'v)) -- 44.535714285714285 +(min (at Pmb 'v)) -- 10 +(max (at Pmb 'v)) -- 90 + +;; ────────────── HEAD on parted: take=N (positive, no by) ────────────── +;; (select {from: P take: N}) on parted → exec.c OP_HEAD parted-column path. +(set Rh1 (select {from: Pmb take: 5})) +(count Rh1) -- 5 +(at Rh1 'date) -- [2024.01.01 2024.01.01 2024.01.01 2024.01.02 2024.01.02] +(at Rh1 'v) -- [10 20 30 40 50] +(at Rh1 'k) -- [1 1 2 1 2] + +;; TAIL on parted: take negative +(set Rt1 (select {from: Pmb take: -4})) +(count Rt1) -- 4 +;; Last 4 rows are from partition 10 (MP-A: [71 72 73 74]) +(at Rt1 'v) -- [71 72 73 74] +(at Rt1 'date) -- [2024.01.10 2024.01.10 2024.01.10 2024.01.10] +(at Rt1 'k) -- [1 2 3 1] + ;; ────────────── teardown ────────────── -(.sys.exec "rm -rf /tmp/rfl_part_date /tmp/rfl_part_int /tmp/rfl_part_sym /tmp/rfl_part_single /tmp/rfl_part_empty /tmp/rfl_part_missing /tmp/rfl_part_three /tmp/rfl_part_cd /tmp/rfl_part_cd_sym /tmp/rfl_part_minute /tmp/rfl_part_like") +(.sys.exec "rm -rf /tmp/rfl_part_date /tmp/rfl_part_int /tmp/rfl_part_sym /tmp/rfl_part_single /tmp/rfl_part_empty /tmp/rfl_part_missing /tmp/rfl_part_three /tmp/rfl_part_cd /tmp/rfl_part_cd_sym /tmp/rfl_part_minute /tmp/rfl_part_like /tmp/rfl_part_mc /tmp/rfl_part_mb") diff --git a/test/rfl/table/tblop_branch_cov.rfl b/test/rfl/table/tblop_branch_cov.rfl new file mode 100644 index 00000000..2205f716 --- /dev/null +++ b/test/rfl/table/tblop_branch_cov.rfl @@ -0,0 +1,410 @@ +;; Branch-coverage extension for src/ops/tblop.c — targets the 59 uncovered +;; branches at 67.44% baseline. +;; +;; Strategy: exercise the generic-fallback pivot path (lines 151-518) which +;; requires a lambda agg-fn (dag_ok=false because pivot_fn_to_agg_op +;; returns 0). STR-column pivots currently hit "nyi" in the GROUP BY DAG +;; path, so we test those as expected errors and focus on non-STR generic +;; fallback paths for correctness. +;; +;; Reachable branches targeted: +;; L85 — ray_is_vec(index_arg) true (SYM vec index arg, lambda agg) +;; L87 — len > 16 index columns guard +;; L124 — STR index column forces dag_ok=false (tested via !- nyi) +;; L253 — gid_cap while-loop for large n_grps +;; L265 — hash collision during grouped-HT insert +;; L289 — atom_eq mismatch in index-column probe +;; L297 — atom_eq mismatch in pivot-column probe +;; L478 — F64 agg result type detection in column naming +;; L493 — F64 pivot-value column naming (snprintf "%g") +;; L604 — alter set: missing 4th argument +;; L606 — alter set: idx eval returns error/NULL +;; L608 — alter set: val eval returns error/NULL +;; L682 — alter set: out-of-range scalar index +;; L744 — alter concat: val eval returns error +;; L758 — alter remove: idx eval returns error +;; L839 — del reserved binding +;; +;; OOM-only branches (unreachable from RFL without OOM injection): +;; L128, L134, L156, L164, L169, L174, L179, L183, L205, L255, +;; L270, L314, L323, L336, L344, L425, L597 + +(.sys.exec "rm -rf /tmp/rfl_tblop_branch") + +;; ══════════════════════════════════════════════════════════════════ +;; Section 1: STR columns force dag_ok=false (L124) but the GROUP BY +;; DAG path does not support STR keys → "nyi" error. +;; These test the error propagation, not the happy path. +;; ══════════════════════════════════════════════════════════════════ + +;; STR index column → nyi in GROUP BY DAG. +(set Tstr_idx (table [idx pv val] (list (list "alice" "alice" "bob" "bob") ['x 'y 'x 'y] [10 20 30 40]))) +(pivot Tstr_idx 'idx 'pv 'val sum) !- nyi + +;; STR pivot column → nyi in GROUP BY DAG. +(set Tstr_pv (table [k pv val] (list ['A 'A 'B 'B] (list "x" "y" "x" "y") [10 20 30 40]))) +(pivot Tstr_pv 'k 'pv 'val sum) !- nyi + +;; STR value column with lambda agg — the generic fallback's +;; list_to_typed_vec assumes numeric agg results (agg_type defaults +;; to RAY_I64). STR agg results (e.g. first of a STR subset) are +;; coerced to integers. This is a known limitation of the fallback +;; path; STR values only work via the DAG fast path. +;; (Documented: STR value column in generic fallback produces numeric coercion.) + +;; ══════════════════════════════════════════════════════════════════ +;; Section 2: larger table to trigger hash collisions (L253, L265, +;; L289, L297) +;; With many distinct (index, pivot) groups, the HT probing +;; code at lines 262-304 will encounter collisions. +;; ══════════════════════════════════════════════════════════════════ + +;; 50 distinct index values * 3 pivot values = 150 groups. +;; Lambda agg forces generic fallback. Hash collisions are inevitable +;; with 150 groups in a cap-256 HT (>50% load factor). +(set bigIdx (til 50)) +(set bigTblX (table [k c v] (list bigIdx (take ['x] 50) (take [1] 50)))) +(set bigTblY (table [k c v] (list bigIdx (take ['y] 50) (take [2] 50)))) +(set bigTblZ (table [k c v] (list bigIdx (take ['z] 50) (take [3] 50)))) +(set bigTbl (union-all (union-all bigTblX bigTblY) bigTblZ)) +(count bigTbl) -- 150 +(set Pbig (pivot bigTbl 'k 'c 'v (fn [xs] (sum xs)))) +(count Pbig) -- 50 +(sum (at Pbig 'x)) -- 50 +(sum (at Pbig 'y)) -- 100 +(sum (at Pbig 'z)) -- 150 + +;; ══════════════════════════════════════════════════════════════════ +;; Section 3: F64 agg result → L478 (agg_type = RAY_F64) +;; When agg_fn returns an F64 scalar, the column-building +;; loop at L477-478 detects -RAY_F64 and sets agg_type. +;; ══════════════════════════════════════════════════════════════════ + +;; avg on I64 values returns F64 → agg_type = RAY_F64 at line 478. +(set Tavg_fb (table [k c v] (list ['A 'A 'B 'B] ['x 'y 'x 'y] [3 4 5 6]))) +(set Pavg_fb (pivot Tavg_fb 'k 'c 'v (fn [xs] (avg xs)))) +(count Pavg_fb) -- 2 +(at (at Pavg_fb 'x) 0) -- 3.0 +(at (at Pavg_fb 'y) 0) -- 4.0 +(at (at Pavg_fb 'x) 1) -- 5.0 +(at (at Pavg_fb 'y) 1) -- 6.0 + +;; ══════════════════════════════════════════════════════════════════ +;; Section 4: F64 pivot column with lambda agg → L493 (snprintf "%g") +;; The column-naming code at L492-495 enters the -RAY_F64 +;; branch when pivot values are F64 atoms. +;; ══════════════════════════════════════════════════════════════════ + +(set Tf64pv_fb (table [k c v] (list ['A 'A 'B 'B] [1.5 2.5 1.5 2.5] [10 20 30 40]))) +(set Pf64pv_fb (pivot Tf64pv_fb 'k 'c 'v (fn [xs] (sum xs)))) +(count Pf64pv_fb) -- 2 +;; Column names are "1.5" and "2.5" (snprintf of the F64 pivot values). +(at (at Pf64pv_fb '1.5) 0) -- 10 +(at (at Pf64pv_fb '2.5) 0) -- 20 +(at (at Pf64pv_fb '1.5) 1) -- 30 +(at (at Pf64pv_fb '2.5) 1) -- 40 + +;; ══════════════════════════════════════════════════════════════════ +;; Section 5: I64 pivot column with lambda agg → L489-491 +;; The -RAY_I64 branch in column naming. +;; ══════════════════════════════════════════════════════════════════ + +(set Ti64pv_fb (table [k c v] (list ['A 'A 'B 'B] [100 200 100 200] [10 20 30 40]))) +(set Pi64pv_fb (pivot Ti64pv_fb 'k 'c 'v (fn [xs] (sum xs)))) +(count Pi64pv_fb) -- 2 +(count (key Pi64pv_fb)) -- 3 + +;; ══════════════════════════════════════════════════════════════════ +;; Section 6: BOOL pivot column with lambda agg → L496-497 +;; The -RAY_BOOL branch in column naming. +;; ══════════════════════════════════════════════════════════════════ + +(set Tboolpv_fb (table [k c v] (list ['A 'A 'B 'B] [true false true false] [10 20 30 40]))) +(set Pboolpv_fb (pivot Tboolpv_fb 'k 'c 'v (fn [xs] (sum xs)))) +(count Pboolpv_fb) -- 2 +(at (at Pboolpv_fb 'true) 0) -- 10 +(at (at Pboolpv_fb 'false) 0) -- 20 +(at (at Pboolpv_fb 'true) 1) -- 30 +(at (at Pboolpv_fb 'false) 1) -- 40 + +;; ══════════════════════════════════════════════════════════════════ +;; Section 7: Generic fallback with multi-key SYM vec index (L85) +;; Uses a symbol vector ['a 'b] as index_arg, which enters +;; the ray_is_vec(index_arg) branch at line 85 since SYM +;; vecs have type RAY_SYM (not RAY_LIST). Lambda agg +;; forces generic fallback. +;; ══════════════════════════════════════════════════════════════════ + +(set Tmk_fb (table [a b c v] (list ['X 'X 'Y 'Y] [1 2 1 2] ['p 'q 'p 'q] [10 20 30 40]))) +(set Pmk_fb (pivot Tmk_fb ['a 'b] 'c 'v (fn [xs] (sum xs)))) +(count Pmk_fb) -- 4 +(at (at Pmk_fb 'p) 0) -- 10 +(at (at Pmk_fb 'q) 1) -- 20 + +;; ══════════════════════════════════════════════════════════════════ +;; Section 8: pivot with >16 index columns → L87 ("too many index +;; columns" error) +;; ══════════════════════════════════════════════════════════════════ + +;; The pivot limit is 16 index columns. We test via a list (not vec) +;; of 17 symbols as the index arg. Any table with 17+ named columns +;; would work; the error fires during index-arg parsing before the +;; table is examined. +(set T17 (table [a b c pv val] (list [1] [1] [1] ['x] [42]))) +(pivot T17 (list 'a 'b 'c 'a 'b 'c 'a 'b 'c 'a 'b 'c 'a 'b 'c 'a 'b) 'pv 'val sum) !- limit + +;; ══════════════════════════════════════════════════════════════════ +;; Section 9: Larger pivot to exercise HT probe walks past non-matching +;; slots (L289, L297) +;; When multiple (index, pivot) groups hash to the same +;; bucket, the probe loop checks atom_eq for both index +;; columns (L289) and pivot column (L297). +;; ══════════════════════════════════════════════════════════════════ + +;; 40 distinct index values * 4 pivot values = 160 groups. +;; 160 groups in cap-256 HT → 62.5% load → many collisions. +(set TcollIdx (til 40)) +(set TcollA (table [k c v] (list TcollIdx (take ['a] 40) (take [1] 40)))) +(set TcollB (table [k c v] (list TcollIdx (take ['b] 40) (take [2] 40)))) +(set TcollC (table [k c v] (list TcollIdx (take ['c] 40) (take [3] 40)))) +(set TcollD (table [k c v] (list TcollIdx (take ['d] 40) (take [4] 40)))) +(set Tcoll (union-all (union-all TcollA TcollB) (union-all TcollC TcollD))) +(count Tcoll) -- 160 +(set Pcoll (pivot Tcoll 'k 'c 'v (fn [xs] (sum xs)))) +(count Pcoll) -- 40 +(sum (at Pcoll 'a)) -- 40 +(sum (at Pcoll 'b)) -- 80 +(sum (at Pcoll 'c)) -- 120 +(sum (at Pcoll 'd)) -- 160 + +;; ══════════════════════════════════════════════════════════════════ +;; Section 10: Generic fallback with F64 index column +;; F64 index columns trigger the F64 hash path in the +;; FB_ROW_HASH macro and the atom_eq comparison path. +;; ══════════════════════════════════════════════════════════════════ + +(set Tf64idx_fb (table [k c v] (list [1.1 1.1 2.2 2.2] ['x 'y 'x 'y] [10 20 30 40]))) +(set Pf64idx_fb (pivot Tf64idx_fb 'k 'c 'v (fn [xs] (sum xs)))) +(count Pf64idx_fb) -- 2 +(at (at Pf64idx_fb 'x) 0) -- 10 +(at (at Pf64idx_fb 'y) 1) -- 40 + +;; ══════════════════════════════════════════════════════════════════ +;; Section 11: Generic fallback — lazy agg result materialization +;; When the lambda returns a lazy value (e.g. from sum on +;; a lazy chain), line 370-371 materializes it. +;; ══════════════════════════════════════════════════════════════════ + +(set Tlazy (table [k c v] (list ['A 'A 'B 'B 'C 'C] ['x 'y 'x 'y 'x 'y] [1 2 3 4 5 6]))) +(set Plazy (pivot Tlazy 'k 'c 'v (fn [xs] (sum xs)))) +(count Plazy) -- 3 +(at (at Plazy 'x) 0) -- 1 +(at (at Plazy 'y) 2) -- 6 + +;; ══════════════════════════════════════════════════════════════════ +;; Section 12: alter — first arg evals to error (L586) +;; ray_eval(args[0]) returns error → propagate. +;; ══════════════════════════════════════════════════════════════════ + +;; (alter set 0 99) — args[0] is (at 42 0), which +;; evaluates to an error because 42 is not a collection. +(alter (at 42 0) set 0 99) !- type + +;; ══════════════════════════════════════════════════════════════════ +;; Section 12b: alter set — missing 4th arg (L604) +;; (alter 'v set idx) with no value arg. +;; ══════════════════════════════════════════════════════════════════ + +(set v_alt [1 2 3]) +(alter 'v_alt set 0) !- domain + +;; ══════════════════════════════════════════════════════════════════ +;; Section 13: alter set — idx eval returns error (L606) +;; When args[2] evaluates to an error, the error propagates. +;; ══════════════════════════════════════════════════════════════════ + +(set v_alt2 [1 2 3]) +(alter 'v_alt2 set no_such_var 99) !- name + +;; ══════════════════════════════════════════════════════════════════ +;; Section 14: alter set — val eval returns error (L607-608) +;; When args[3] evaluates to an error. +;; ══════════════════════════════════════════════════════════════════ + +(set v_alt3 [1 2 3]) +(alter 'v_alt3 set 0 no_such_val_var) !- name + +;; ══════════════════════════════════════════════════════════════════ +;; Section 15: alter set — out-of-range scalar index (L682-686) +;; Scalar index beyond vec bounds → "index" error. +;; ══════════════════════════════════════════════════════════════════ + +(set v_oob [1 2 3]) +(alter 'v_oob set 99 42) !- index +(alter 'v_oob set -1 42) !- index + +;; ══════════════════════════════════════════════════════════════════ +;; Section 16: alter concat — val eval returns error (L744) +;; ══════════════════════════════════════════════════════════════════ + +(set v_cat [1 2 3]) +(alter 'v_cat concat no_such_var) !- name + +;; ══════════════════════════════════════════════════════════════════ +;; Section 17: alter remove — idx eval returns error (L758) +;; ══════════════════════════════════════════════════════════════════ + +(set L_rm (list 1 2 3)) +(alter 'L_rm remove no_such_var) !- name + +;; ══════════════════════════════════════════════════════════════════ +;; Section 18: del reserved binding → "reserve" error (L839-841) +;; ══════════════════════════════════════════════════════════════════ + +(del .sys.gc) !- reserve + +;; ══════════════════════════════════════════════════════════════════ +;; Section 19: pivot with SYM vec index + lambda → multi-key fallback +;; Two index columns via SYM vec: exercises L85 (ray_is_vec) +;; plus the multi-key generic fallback. +;; ══════════════════════════════════════════════════════════════════ + +(set Tmk2 (table [a n c v] (list ['X 'X 'X 'X 'Y 'Y 'Y 'Y] [1 1 2 2 1 1 2 2] ['p 'q 'p 'q 'p 'q 'p 'q] [10 20 30 40 50 60 70 80]))) +(set Pmk2 (pivot Tmk2 ['a 'n] 'c 'v (fn [xs] (sum xs)))) +(count Pmk2) -- 4 +(at (at Pmk2 'p) 0) -- 10 +(at (at Pmk2 'q) 3) -- 80 + +;; ══════════════════════════════════════════════════════════════════ +;; Section 20: pivot — "col%ld" fallback column naming (L498-500) +;; When pivot column values have a type that doesn't match +;; SYM/I64/F64/BOOL, the fallback formats "col%ld". +;; U8 pivot values in the generic fallback (lambda agg). +;; ══════════════════════════════════════════════════════════════════ + +(set Tu8fb (table [k c v] (list ['A 'A 'B 'B] [0x01 0x02 0x01 0x02] [10 20 30 40]))) +(set Pu8fb (pivot Tu8fb 'k 'c 'v (fn [xs] (sum xs)))) +(count Pu8fb) -- 2 +(count (key Pu8fb)) -- 3 + +;; ══════════════════════════════════════════════════════════════════ +;; Section 21: modify — fn that returns error (call_fn1 RAY_IS_ERR) +;; Exercises the modify path where call_fn1 returns an +;; error (line 543 check). +;; ══════════════════════════════════════════════════════════════════ + +(set Tmod (table [a b] (list [1 2 3] [10 20 30]))) +(modify Tmod 'a (fn [x] (+ x no_such_var))) !- name + +;; ══════════════════════════════════════════════════════════════════ +;; Section 22: union-all — happy path concatenation (sanity) +;; ══════════════════════════════════════════════════════════════════ + +(set Tua1 (table [a b] (list [1 2] [10 20]))) +(set Tua2 (table [a b] (list [3 4] [30 40]))) +(set Tunion (union-all Tua1 Tua2)) +(count Tunion) -- 4 +(sum (at Tunion 'a)) -- 10 +(sum (at Tunion 'b)) -- 100 + +;; ══════════════════════════════════════════════════════════════════ +;; Section 23: pivot — generic fallback + negative-zero F64 normalization +;; Line 493: `if (fv == 0.0 && signbit(fv)) fv = 0.0;` +;; ensures -0.0 pivot values are normalized to 0.0. +;; ══════════════════════════════════════════════════════════════════ + +(set Tnz (table [k c v] (list ['A 'A 'B 'B] (as 'F64 [0.0 -0.0 0.0 -0.0]) [1 2 3 4]))) +(set Pnz (pivot Tnz 'k 'c 'v (fn [xs] (sum xs)))) +(count Pnz) -- 2 + +;; ══════════════════════════════════════════════════════════════════ +;; Section 24: pivot_fn_to_agg_op — all known builtins (lines 43-53) +;; Verify the DAG fast path for each known builtin. +;; ══════════════════════════════════════════════════════════════════ + +(set Tagg (table [k c v] (list ['A 'A 'B 'B] ['x 'y 'x 'y] [10 20 30 40]))) + +(count (pivot Tagg 'k 'c 'v sum)) -- 2 +(count (pivot Tagg 'k 'c 'v avg)) -- 2 +(count (pivot Tagg 'k 'c 'v min)) -- 2 +(count (pivot Tagg 'k 'c 'v max)) -- 2 +(count (pivot Tagg 'k 'c 'v count)) -- 2 +(count (pivot Tagg 'k 'c 'v first)) -- 2 +(count (pivot Tagg 'k 'c 'v last)) -- 2 +;; non-builtin agg → pivot_fn_to_agg_op returns 0 → generic fallback +(count (pivot Tagg 'k 'c 'v (fn [xs] (sum xs)))) -- 2 + +;; ══════════════════════════════════════════════════════════════════ +;; Section 25: alter set — list with scalar idx (single element replace) +;; ══════════════════════════════════════════════════════════════════ + +(set Lset (list 'a 'b 'c 'd)) +(alter 'Lset set 2 'Z) +Lset -- (list 'a 'b 'Z 'd) + +;; ══════════════════════════════════════════════════════════════════ +;; Section 26: Generic fallback — many groups > 256 to exercise +;; gid_cap while-loop resize (L253) +;; ══════════════════════════════════════════════════════════════════ + +;; 200 index values * 2 pivot values = 400 groups. +;; gid_cap starts at 256, needs to grow to 1024 (400*2=800 > 256). +(set bigIdx2 (til 200)) +(set bigT2X (table [k c v] (list bigIdx2 (take ['p] 200) (take [1] 200)))) +(set bigT2Y (table [k c v] (list bigIdx2 (take ['q] 200) (take [2] 200)))) +(set bigT2 (union-all bigT2X bigT2Y)) +(count bigT2) -- 400 +(set Pbig2 (pivot bigT2 'k 'c 'v (fn [xs] (sum xs)))) +(count Pbig2) -- 200 +(sum (at Pbig2 'p)) -- 200 +(sum (at Pbig2 'q)) -- 400 + +;; ══════════════════════════════════════════════════════════════════ +;; Section 27: Generic fallback with multiple duplicate rows +;; Exercises counting-sort path (L329-354) and the +;; gather_by_idx + call_fn1 loop (L357-378). +;; ══════════════════════════════════════════════════════════════════ + +;; Same (index, pivot) group appears 3 times → subset has 3 elements. +(set Tdup (table [k c v] (list ['A 'A 'A 'B 'B 'B] ['x 'x 'x 'y 'y 'y] [1 2 3 4 5 6]))) +(set Pdup (pivot Tdup 'k 'c 'v (fn [xs] (sum xs)))) +(count Pdup) -- 2 +(at (at Pdup 'x) 0) -- 6 +(at (at Pdup 'y) 1) -- 15 + +;; ══════════════════════════════════════════════════════════════════ +;; Section 28: pivot with F64 value column in generic fallback +;; Ensures the F64 typed agg result propagates correctly. +;; ══════════════════════════════════════════════════════════════════ + +(set Tf64v (table [k c v] (list ['A 'A 'B 'B] ['x 'y 'x 'y] [1.5 2.5 3.5 4.5]))) +(set Pf64v (pivot Tf64v 'k 'c 'v (fn [xs] (sum xs)))) +(count Pf64v) -- 2 +(at (at Pf64v 'x) 0) -- 1.5 +(at (at Pf64v 'y) 1) -- 4.5 + +;; ══════════════════════════════════════════════════════════════════ +;; Unreachable branches documentation (OOM guards, internal-only): +;; +;; L128 — ray_graph_new returns NULL (OOM in DAG fast path) +;; L134 — ray_scan returns NULL for index col (OOM/internal) +;; L156 — ray_graph_new returns NULL (OOM in generic fallback) +;; L164 — ray_scan returns NULL for key col (OOM/internal) +;; L169 — ray_scan returns NULL for pivot col (OOM/internal) +;; L174 — ray_scan returns NULL for value col (OOM/internal) +;; L179 — ray_group returns NULL (OOM) +;; L183 — ray_execute returns NULL/error (internal DAG failure) +;; L205 — ray_lazy_materialize fails (OOM) +;; L255 — ray_alloc for gid_ht_hdr fails (OOM) +;; L270 — ray_vec_new for gid_vec fails (OOM) +;; L314 — ray_alloc for agg_results fails (OOM) +;; L323 — ray_alloc for off_hdr fails (OOM) +;; L336 — ray_alloc for sorted_hdr fails (OOM) +;; L344 — ray_alloc for wcur_hdr fails (OOM) +;; L425 — collection_elem returns non-allocated (requires LIST-typed +;; grouped columns; GROUP BY DAG always produces typed vecs) +;; L597 — ray_sym_str returns NULL (requires an interned sym with no +;; string representation; impossible from user-facing RFL) +;; ══════════════════════════════════════════════════════════════════ + +(.sys.exec "rm -rf /tmp/rfl_tblop_branch") diff --git a/test/test_window.c b/test/test_window.c index c98791dc..582da5e5 100644 --- a/test/test_window.c +++ b/test/test_window.c @@ -3275,6 +3275,927 @@ static test_result_t test_window_sym_partition_large_pool(void) { PASS(); } +/* ─── TIMESTAMP order key: win_keys_differ I64/TIMESTAMP branch (line 37) ─ */ + +static test_result_t test_window_timestamp_order_key(void) { + ray_heap_init(); (void)ray_sym_init(); + + int64_t n = 4; + int64_t gd[] = {1, 1, 1, 1}; + /* TIMESTAMP values (I64-typed microseconds-since-epoch) with ties */ + int64_t od[] = {1000, 1000, 2000, 3000}; + int64_t vd[] = {10, 20, 30, 40}; + ray_t* gv = ray_vec_from_raw(RAY_I64, gd, n); + ray_t* ov = ray_vec_from_raw(RAY_TIMESTAMP, od, n); + ray_t* vv = ray_vec_from_raw(RAY_I64, vd, n); + int64_t ng = ray_sym_intern("g", 1); + int64_t no = ray_sym_intern("o", 1); + int64_t nv = ray_sym_intern("v", 1); + ray_t* tbl = ray_table_new(3); + tbl = ray_table_add_col(tbl, ng, gv); + tbl = ray_table_add_col(tbl, no, ov); + tbl = ray_table_add_col(tbl, nv, vv); + ray_release(gv); ray_release(ov); ray_release(vv); + + /* PARTITION BY g, ORDER BY o (TIMESTAMP) — RANK with tie at 1000. + * Expected ranks: 1, 1, 3, 4 */ + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* tbl_op = ray_const_table(g, tbl); + ray_op_t* g_op = ray_scan(g, "g"); + ray_op_t* o_op = ray_scan(g, "o"); + ray_op_t* v_op = ray_scan(g, "v"); + ray_op_t* parts[] = { g_op }; + ray_op_t* orders[] = { o_op }; + uint8_t ndesc[] = { 0 }; + uint8_t kinds[] = { RAY_WIN_RANK }; + ray_op_t* fins[] = { v_op }; + int64_t params[] = { 0 }; + ray_op_t* w = ray_window_op(g, tbl_op, + parts, 1, + orders, ndesc, 1, + kinds, fins, params, 1, + RAY_FRAME_ROWS, + RAY_BOUND_UNBOUNDED_PRECEDING, + RAY_BOUND_UNBOUNDED_FOLLOWING, + 0, 0); + ray_t* result = ray_execute(g, w); + TEST_ASSERT_FALSE(RAY_IS_ERR(result)); + ray_t* rc = win_result_col(result, 3); + int64_t* rd = (int64_t*)ray_data(rc); + TEST_ASSERT_EQ_I(rd[0], 1); + TEST_ASSERT_EQ_I(rd[1], 1); + TEST_ASSERT_EQ_I(rd[2], 3); + TEST_ASSERT_EQ_I(rd[3], 4); + + ray_release(result); ray_graph_free(g); ray_release(tbl); + ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + +/* ─── TIME order key: win_keys_differ I32/TIME branch (line 47) ─────── */ + +static test_result_t test_window_time_order_key(void) { + ray_heap_init(); (void)ray_sym_init(); + + int64_t n = 4; + int64_t gd[] = {1, 1, 1, 1}; + /* TIME values (I32-typed milliseconds-since-midnight) with ties */ + int32_t od[] = {36000, 36000, 72000, 108000}; + int64_t vd[] = {10, 20, 30, 40}; + ray_t* gv = ray_vec_from_raw(RAY_I64, gd, n); + ray_t* ov = ray_vec_from_raw(RAY_TIME, od, n); + ray_t* vv = ray_vec_from_raw(RAY_I64, vd, n); + int64_t ng = ray_sym_intern("g", 1); + int64_t no = ray_sym_intern("o", 1); + int64_t nv = ray_sym_intern("v", 1); + ray_t* tbl = ray_table_new(3); + tbl = ray_table_add_col(tbl, ng, gv); + tbl = ray_table_add_col(tbl, no, ov); + tbl = ray_table_add_col(tbl, nv, vv); + ray_release(gv); ray_release(ov); ray_release(vv); + + /* PARTITION BY g, ORDER BY o (TIME) — RANK with tie at 36000. + * Expected ranks: 1, 1, 3, 4 */ + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* tbl_op = ray_const_table(g, tbl); + ray_op_t* g_op = ray_scan(g, "g"); + ray_op_t* o_op = ray_scan(g, "o"); + ray_op_t* v_op = ray_scan(g, "v"); + ray_op_t* parts[] = { g_op }; + ray_op_t* orders[] = { o_op }; + uint8_t ndesc[] = { 0 }; + uint8_t kinds[] = { RAY_WIN_RANK }; + ray_op_t* fins[] = { v_op }; + int64_t params[] = { 0 }; + ray_op_t* w = ray_window_op(g, tbl_op, + parts, 1, + orders, ndesc, 1, + kinds, fins, params, 1, + RAY_FRAME_ROWS, + RAY_BOUND_UNBOUNDED_PRECEDING, + RAY_BOUND_UNBOUNDED_FOLLOWING, + 0, 0); + ray_t* result = ray_execute(g, w); + TEST_ASSERT_FALSE(RAY_IS_ERR(result)); + ray_t* rc = win_result_col(result, 3); + int64_t* rd = (int64_t*)ray_data(rc); + TEST_ASSERT_EQ_I(rd[0], 1); + TEST_ASSERT_EQ_I(rd[1], 1); + TEST_ASSERT_EQ_I(rd[2], 3); + TEST_ASSERT_EQ_I(rd[3], 4); + + ray_release(result); ray_graph_free(g); ray_release(tbl); + ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + +/* ─── TIMESTAMP value for SUM/AVG: win_read_i64/win_read_f64 TIMESTAMP arm + * (lines 79, 93) ───────────────────────────────────────────────────── */ + +static test_result_t test_window_timestamp_value(void) { + ray_heap_init(); (void)ray_sym_init(); + + int64_t n = 4; + int64_t gd[] = {1, 1, 2, 2}; + int64_t vd[] = {100, 200, 300, 400}; + ray_t* gv = ray_vec_from_raw(RAY_I64, gd, n); + ray_t* vv = ray_vec_from_raw(RAY_TIMESTAMP, vd, n); + int64_t ng = ray_sym_intern("g", 1); + int64_t nv = ray_sym_intern("v", 1); + ray_t* tbl = ray_table_new(2); + tbl = ray_table_add_col(tbl, ng, gv); + tbl = ray_table_add_col(tbl, nv, vv); + ray_release(gv); ray_release(vv); + + /* SUM(TIMESTAMP) → win_read_i64 TIMESTAMP arm (line 93) */ + { + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* tbl_op = ray_const_table(g, tbl); + ray_op_t* w = build_whole_window(g, tbl_op, "g", "v", + RAY_WIN_SUM, "v", 0); + ray_t* result = ray_execute(g, w); + TEST_ASSERT_FALSE(RAY_IS_ERR(result)); + ray_t* rc = win_result_col(result, 2); + TEST_ASSERT_EQ_I(rc->type, RAY_I64); + int64_t* rd = (int64_t*)ray_data(rc); + TEST_ASSERT_EQ_I(rd[0], 300); + TEST_ASSERT_EQ_I(rd[2], 700); + ray_release(result); ray_graph_free(g); + } + + /* AVG(TIMESTAMP) → win_read_f64 TIMESTAMP arm (line 79) */ + { + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* tbl_op = ray_const_table(g, tbl); + ray_op_t* w = build_whole_window(g, tbl_op, "g", "v", + RAY_WIN_AVG, "v", 0); + ray_t* result = ray_execute(g, w); + TEST_ASSERT_FALSE(RAY_IS_ERR(result)); + ray_t* rc = win_result_col(result, 2); + TEST_ASSERT_EQ_I(rc->type, RAY_F64); + double* rd = (double*)ray_data(rc); + TEST_ASSERT_EQ_F(rd[0], 150.0, 1e-9); + TEST_ASSERT_EQ_F(rd[2], 350.0, 1e-9); + ray_release(result); ray_graph_free(g); + } + + ray_release(tbl); ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + +/* ─── TIME (I32) value for SUM/AVG: win_read_i64/win_read_f64 TIME arm + * (lines 81, 95) ───────────────────────────────────────────────────── */ + +static test_result_t test_window_time_value(void) { + ray_heap_init(); (void)ray_sym_init(); + + int64_t n = 4; + int64_t gd[] = {1, 1, 2, 2}; + int32_t vd[] = {100, 200, 300, 400}; + ray_t* gv = ray_vec_from_raw(RAY_I64, gd, n); + ray_t* vv = ray_vec_from_raw(RAY_TIME, vd, n); + int64_t ng = ray_sym_intern("g", 1); + int64_t nv = ray_sym_intern("v", 1); + ray_t* tbl = ray_table_new(2); + tbl = ray_table_add_col(tbl, ng, gv); + tbl = ray_table_add_col(tbl, nv, vv); + ray_release(gv); ray_release(vv); + + /* SUM(TIME) → win_read_i64 I32/TIME arm (line 95) */ + { + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* tbl_op = ray_const_table(g, tbl); + ray_op_t* w = build_whole_window(g, tbl_op, "g", "v", + RAY_WIN_SUM, "v", 0); + ray_t* result = ray_execute(g, w); + TEST_ASSERT_FALSE(RAY_IS_ERR(result)); + ray_t* rc = win_result_col(result, 2); + TEST_ASSERT_EQ_I(rc->type, RAY_I64); + int64_t* rd = (int64_t*)ray_data(rc); + TEST_ASSERT_EQ_I(rd[0], 300); + TEST_ASSERT_EQ_I(rd[2], 700); + ray_release(result); ray_graph_free(g); + } + + /* AVG(TIME) → win_read_f64 I32/TIME arm (line 81) */ + { + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* tbl_op = ray_const_table(g, tbl); + ray_op_t* w = build_whole_window(g, tbl_op, "g", "v", + RAY_WIN_AVG, "v", 0); + ray_t* result = ray_execute(g, w); + TEST_ASSERT_FALSE(RAY_IS_ERR(result)); + ray_t* rc = win_result_col(result, 2); + double* rd = (double*)ray_data(rc); + TEST_ASSERT_EQ_F(rd[0], 150.0, 1e-9); + TEST_ASSERT_EQ_F(rd[2], 350.0, 1e-9); + ray_release(result); ray_graph_free(g); + } + + ray_release(tbl); ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + +/* ─── BOOL value for AVG: win_read_f64 BOOL/U8 arm (line 86) ────────── */ + +static test_result_t test_window_bool_value_avg(void) { + ray_heap_init(); (void)ray_sym_init(); + + int64_t n = 4; + int64_t gd[] = {1, 1, 2, 2}; + uint8_t vd[] = {1, 0, 1, 1}; + ray_t* gv = ray_vec_from_raw(RAY_I64, gd, n); + ray_t* vv = ray_vec_from_raw(RAY_BOOL, vd, n); + int64_t ng = ray_sym_intern("g", 1); + int64_t nv = ray_sym_intern("v", 1); + ray_t* tbl = ray_table_new(2); + tbl = ray_table_add_col(tbl, ng, gv); + tbl = ray_table_add_col(tbl, nv, vv); + ray_release(gv); ray_release(vv); + + /* AVG(BOOL) → win_read_f64 BOOL arm (line 86) */ + { + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* tbl_op = ray_const_table(g, tbl); + ray_op_t* w = build_whole_window(g, tbl_op, "g", "v", + RAY_WIN_AVG, "v", 0); + ray_t* result = ray_execute(g, w); + TEST_ASSERT_FALSE(RAY_IS_ERR(result)); + ray_t* rc = win_result_col(result, 2); + double* rd = (double*)ray_data(rc); + TEST_ASSERT_EQ_F(rd[0], 0.5, 1e-9); /* (1+0)/2 */ + TEST_ASSERT_EQ_F(rd[2], 1.0, 1e-9); /* (1+1)/2 */ + ray_release(result); ray_graph_free(g); + } + + /* SUM(BOOL) → win_read_i64 BOOL arm (line 101) */ + { + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* tbl_op = ray_const_table(g, tbl); + ray_op_t* w = build_whole_window(g, tbl_op, "g", "v", + RAY_WIN_SUM, "v", 0); + ray_t* result = ray_execute(g, w); + TEST_ASSERT_FALSE(RAY_IS_ERR(result)); + ray_t* rc = win_result_col(result, 2); + int64_t* rd = (int64_t*)ray_data(rc); + TEST_ASSERT_EQ_I(rd[0], 1); + TEST_ASSERT_EQ_I(rd[2], 2); + ray_release(result); ray_graph_free(g); + } + + ray_release(tbl); ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + +/* ─── FIRST_VALUE f64: lines 461-468 ──────────────────────────────── */ + +static test_result_t test_window_first_value_f64(void) { + ray_heap_init(); (void)ray_sym_init(); + + int64_t n = 3; + int64_t gd[] = {1, 1, 1}; + double vd[] = {1.5, 2.5, 3.5}; + ray_t* tbl = mk_tbl_i64_f64(gd, vd, n); + + /* FIRST_VALUE f64 whole (line 465): non-null first value */ + { + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* tbl_op = ray_const_table(g, tbl); + ray_op_t* w = build_whole_window(g, tbl_op, "g", "v", + RAY_WIN_FIRST_VALUE, "v", 0); + ray_t* result = ray_execute(g, w); + TEST_ASSERT_FALSE(RAY_IS_ERR(result)); + ray_t* rc = win_result_col(result, 2); + TEST_ASSERT_EQ_I(rc->type, RAY_F64); + double* rd = (double*)ray_data(rc); + for (int64_t i = 0; i < n; i++) TEST_ASSERT_EQ_F(rd[i], 1.5, 1e-9); + ray_release(result); ray_graph_free(g); + } + + ray_release(tbl); ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + +/* ─── FIRST_VALUE f64 with null first: line 468 (win_set_null) ────── */ + +static test_result_t test_window_first_value_f64_null(void) { + ray_heap_init(); (void)ray_sym_init(); + + int64_t n = 3; + int64_t gd[] = {1, 1, 1}; + double vd[] = {0.0, 2.5, 3.5}; + ray_t* gv = ray_vec_from_raw(RAY_I64, gd, n); + ray_t* vv = ray_vec_from_raw(RAY_F64, vd, n); + ray_vec_set_null(vv, 0, true); /* first row null */ + int64_t ng = ray_sym_intern("g", 1); + int64_t nv = ray_sym_intern("v", 1); + ray_t* tbl = ray_table_new(2); + tbl = ray_table_add_col(tbl, ng, gv); + tbl = ray_table_add_col(tbl, nv, vv); + ray_release(gv); ray_release(vv); + + /* FIRST_VALUE f64 whole, first row null → all null (lines 462, 465, 468) */ + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* tbl_op = ray_const_table(g, tbl); + ray_op_t* g_op = ray_scan(g, "g"); + ray_op_t* v_op = ray_scan(g, "v"); + ray_op_t* parts[] = { g_op }; + uint8_t kinds[] = { RAY_WIN_FIRST_VALUE }; + ray_op_t* fins[] = { v_op }; + int64_t params[] = { 0 }; + ray_op_t* w = ray_window_op(g, tbl_op, + parts, 1, + NULL, NULL, 0, + kinds, fins, params, 1, + RAY_FRAME_ROWS, + RAY_BOUND_UNBOUNDED_PRECEDING, + RAY_BOUND_UNBOUNDED_FOLLOWING, + 0, 0); + ray_t* result = ray_execute(g, w); + TEST_ASSERT_FALSE(RAY_IS_ERR(result)); + ray_t* rc = win_result_col(result, 2); + TEST_ASSERT_EQ_I(rc->type, RAY_F64); + for (int64_t i = 0; i < n; i++) + TEST_ASSERT_TRUE(ray_vec_is_null(rc, i)); + + ray_release(result); ray_graph_free(g); ray_release(tbl); + ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + +/* ─── LAST_VALUE whole f64 with null last: lines 486, 489 ──────────── */ + +static test_result_t test_window_last_value_whole_f64_null(void) { + ray_heap_init(); (void)ray_sym_init(); + + int64_t n = 3; + int64_t gd[] = {1, 1, 1}; + double vd[] = {1.5, 2.5, 0.0}; + ray_t* gv = ray_vec_from_raw(RAY_I64, gd, n); + ray_t* vv = ray_vec_from_raw(RAY_F64, vd, n); + ray_vec_set_null(vv, 2, true); /* last row null */ + int64_t ng = ray_sym_intern("g", 1); + int64_t nv = ray_sym_intern("v", 1); + ray_t* tbl = ray_table_new(2); + tbl = ray_table_add_col(tbl, ng, gv); + tbl = ray_table_add_col(tbl, nv, vv); + ray_release(gv); ray_release(vv); + + /* LAST_VALUE f64 whole: last row null → all null (lines 485-489) */ + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* tbl_op = ray_const_table(g, tbl); + ray_op_t* g_op = ray_scan(g, "g"); + ray_op_t* v_op = ray_scan(g, "v"); + ray_op_t* parts[] = { g_op }; + uint8_t kinds[] = { RAY_WIN_LAST_VALUE }; + ray_op_t* fins[] = { v_op }; + int64_t params[] = { 0 }; + ray_op_t* w = ray_window_op(g, tbl_op, + parts, 1, + NULL, NULL, 0, + kinds, fins, params, 1, + RAY_FRAME_ROWS, + RAY_BOUND_UNBOUNDED_PRECEDING, + RAY_BOUND_UNBOUNDED_FOLLOWING, + 0, 0); + ray_t* result = ray_execute(g, w); + TEST_ASSERT_FALSE(RAY_IS_ERR(result)); + ray_t* rc = win_result_col(result, 2); + TEST_ASSERT_EQ_I(rc->type, RAY_F64); + for (int64_t i = 0; i < n; i++) + TEST_ASSERT_TRUE(ray_vec_is_null(rc, i)); + + ray_release(result); ray_graph_free(g); ray_release(tbl); + ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + +/* ─── LAST_VALUE whole i64 with null last: lines 502, 505 ──────────── */ + +static test_result_t test_window_last_value_whole_i64_null(void) { + ray_heap_init(); (void)ray_sym_init(); + + int64_t n = 3; + int64_t gd[] = {1, 1, 1}; + int64_t vd[] = {10, 20, 0}; + ray_t* gv = ray_vec_from_raw(RAY_I64, gd, n); + ray_t* vv = ray_vec_from_raw(RAY_I64, vd, n); + ray_vec_set_null(vv, 2, true); /* last row null */ + int64_t ng = ray_sym_intern("g", 1); + int64_t nv = ray_sym_intern("v", 1); + ray_t* tbl = ray_table_new(2); + tbl = ray_table_add_col(tbl, ng, gv); + tbl = ray_table_add_col(tbl, nv, vv); + ray_release(gv); ray_release(vv); + + /* LAST_VALUE i64 whole: last row null → all null (lines 501-505) */ + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* tbl_op = ray_const_table(g, tbl); + ray_op_t* g_op = ray_scan(g, "g"); + ray_op_t* v_op = ray_scan(g, "v"); + ray_op_t* parts[] = { g_op }; + uint8_t kinds[] = { RAY_WIN_LAST_VALUE }; + ray_op_t* fins[] = { v_op }; + int64_t params[] = { 0 }; + ray_op_t* w = ray_window_op(g, tbl_op, + parts, 1, + NULL, NULL, 0, + kinds, fins, params, 1, + RAY_FRAME_ROWS, + RAY_BOUND_UNBOUNDED_PRECEDING, + RAY_BOUND_UNBOUNDED_FOLLOWING, + 0, 0); + ray_t* result = ray_execute(g, w); + TEST_ASSERT_FALSE(RAY_IS_ERR(result)); + ray_t* rc = win_result_col(result, 2); + TEST_ASSERT_EQ_I(rc->type, RAY_I64); + for (int64_t i = 0; i < n; i++) + TEST_ASSERT_TRUE(ray_vec_is_null(rc, i)); + + ray_release(result); ray_graph_free(g); ray_release(tbl); + ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + +/* ─── LEAD offset=0: coerced to 1 (line 430) ─────────────────────── */ + +static test_result_t test_window_lead_offset_zero(void) { + ray_heap_init(); (void)ray_sym_init(); + + int64_t n = 3; + int64_t gd[] = {1, 1, 1}; + int64_t vd[] = {10, 20, 30}; + ray_t* tbl = mk_tbl_i64_2(gd, vd, n); + + /* LEAD offset=0 → coerced to 1 */ + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* tbl_op = ray_const_table(g, tbl); + ray_op_t* w = build_whole_window(g, tbl_op, "g", "v", + RAY_WIN_LEAD, "v", 0); + ray_t* result = ray_execute(g, w); + TEST_ASSERT_FALSE(RAY_IS_ERR(result)); + ray_t* rc = win_result_col(result, 2); + int64_t* rd = (int64_t*)ray_data(rc); + TEST_ASSERT_EQ_I(rd[0], 20); + TEST_ASSERT_EQ_I(rd[1], 30); + TEST_ASSERT_TRUE(ray_vec_is_null(rc, 2)); + + ray_release(result); ray_graph_free(g); ray_release(tbl); + ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + +/* ─── LEAD f64 offset=0: coerced to 1, f64 path (lines 428, 430, 431) */ + +static test_result_t test_window_lead_f64_offset_zero(void) { + ray_heap_init(); (void)ray_sym_init(); + + int64_t n = 3; + int64_t gd[] = {1, 1, 1}; + double vd[] = {1.5, 2.5, 3.5}; + ray_t* tbl = mk_tbl_i64_f64(gd, vd, n); + + /* LEAD f64 offset=0 → coerced to 1 */ + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* tbl_op = ray_const_table(g, tbl); + ray_op_t* w = build_whole_window(g, tbl_op, "g", "v", + RAY_WIN_LEAD, "v", 0); + ray_t* result = ray_execute(g, w); + TEST_ASSERT_FALSE(RAY_IS_ERR(result)); + ray_t* rc = win_result_col(result, 2); + TEST_ASSERT_EQ_I(rc->type, RAY_F64); + double* rd = (double*)ray_data(rc); + TEST_ASSERT_EQ_F(rd[0], 2.5, 1e-9); + TEST_ASSERT_EQ_F(rd[1], 3.5, 1e-9); + TEST_ASSERT_TRUE(ray_vec_is_null(rc, 2)); + + ray_release(result); ray_graph_free(g); ray_release(tbl); + ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + +/* ─── NTH_VALUE f64 with out-of-range nth → all null ─────────────── */ + +static test_result_t test_window_nth_value_f64_oob(void) { + ray_heap_init(); (void)ray_sym_init(); + + int64_t n = 3; + int64_t gd[] = {1, 1, 1}; + double vd[] = {1.5, 2.5, 3.5}; + ray_t* tbl = mk_tbl_i64_f64(gd, vd, n); + + /* NTH_VALUE f64 nth=99 > part_len → all null */ + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* tbl_op = ray_const_table(g, tbl); + ray_op_t* w = build_whole_window(g, tbl_op, "g", "v", + RAY_WIN_NTH_VALUE, "v", 99); + ray_t* result = ray_execute(g, w); + TEST_ASSERT_FALSE(RAY_IS_ERR(result)); + ray_t* rc = win_result_col(result, 2); + TEST_ASSERT_EQ_I(rc->type, RAY_F64); + for (int64_t i = 0; i < n; i++) + TEST_ASSERT_TRUE(ray_vec_is_null(rc, i)); + + ray_release(result); ray_graph_free(g); ray_release(tbl); + ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + +/* ─── BOOL order key: win_keys_differ U8/BOOL arm (lines 59-62) ──── */ + +static test_result_t test_window_bool_order_key(void) { + ray_heap_init(); (void)ray_sym_init(); + + int64_t n = 4; + int64_t gd[] = {1, 1, 1, 1}; + uint8_t od[] = {0, 0, 1, 1}; /* ties at 0 */ + int64_t vd[] = {10, 20, 30, 40}; + ray_t* gv = ray_vec_from_raw(RAY_I64, gd, n); + ray_t* ov = ray_vec_from_raw(RAY_BOOL, od, n); + ray_t* vv = ray_vec_from_raw(RAY_I64, vd, n); + int64_t ng = ray_sym_intern("g", 1); + int64_t no = ray_sym_intern("o", 1); + int64_t nv = ray_sym_intern("v", 1); + ray_t* tbl = ray_table_new(3); + tbl = ray_table_add_col(tbl, ng, gv); + tbl = ray_table_add_col(tbl, no, ov); + tbl = ray_table_add_col(tbl, nv, vv); + ray_release(gv); ray_release(ov); ray_release(vv); + + /* PARTITION BY g, ORDER BY o (BOOL) — RANK with tie at 0. + * Expected ranks: 1, 1, 3, 3 */ + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* tbl_op = ray_const_table(g, tbl); + ray_op_t* g_op = ray_scan(g, "g"); + ray_op_t* o_op = ray_scan(g, "o"); + ray_op_t* v_op = ray_scan(g, "v"); + ray_op_t* parts[] = { g_op }; + ray_op_t* orders[] = { o_op }; + uint8_t ndesc[] = { 0 }; + uint8_t kinds[] = { RAY_WIN_RANK }; + ray_op_t* fins[] = { v_op }; + int64_t params[] = { 0 }; + ray_op_t* w = ray_window_op(g, tbl_op, + parts, 1, + orders, ndesc, 1, + kinds, fins, params, 1, + RAY_FRAME_ROWS, + RAY_BOUND_UNBOUNDED_PRECEDING, + RAY_BOUND_UNBOUNDED_FOLLOWING, + 0, 0); + ray_t* result = ray_execute(g, w); + TEST_ASSERT_FALSE(RAY_IS_ERR(result)); + ray_t* rc = win_result_col(result, 3); + int64_t* rd = (int64_t*)ray_data(rc); + /* sorted by BOOL ASC: [0,0,1,1] → ranks 1,1,3,3 */ + TEST_ASSERT_EQ_I(rd[0], 1); + TEST_ASSERT_EQ_I(rd[1], 1); + TEST_ASSERT_EQ_I(rd[2], 3); + TEST_ASSERT_EQ_I(rd[3], 3); + + ray_release(result); ray_graph_free(g); ray_release(tbl); + ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + +/* ─── STR order key: win_keys_differ STR arm (lines 63-69) ──────── */ + +static test_result_t test_window_str_order_key(void) { + ray_heap_init(); (void)ray_sym_init(); + + int64_t n = 4; + int64_t gd[] = {1, 1, 1, 1}; + int64_t vd[] = {10, 20, 30, 40}; + ray_t* gv = ray_vec_from_raw(RAY_I64, gd, n); + ray_t* ov = ray_vec_new(RAY_STR, n); + ov = ray_str_vec_append(ov, "alpha", 5); + ov = ray_str_vec_append(ov, "alpha", 5); + ov = ray_str_vec_append(ov, "beta", 4); + ov = ray_str_vec_append(ov, "gamma", 5); + ray_t* vv = ray_vec_from_raw(RAY_I64, vd, n); + int64_t ng = ray_sym_intern("g", 1); + int64_t no = ray_sym_intern("o", 1); + int64_t nv = ray_sym_intern("v", 1); + ray_t* tbl = ray_table_new(3); + tbl = ray_table_add_col(tbl, ng, gv); + tbl = ray_table_add_col(tbl, no, ov); + tbl = ray_table_add_col(tbl, nv, vv); + ray_release(gv); ray_release(ov); ray_release(vv); + + /* PARTITION BY g, ORDER BY o (STR) — RANK with tie at "alpha". + * Expected ranks: 1, 1, 3, 4 */ + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* tbl_op = ray_const_table(g, tbl); + ray_op_t* g_op = ray_scan(g, "g"); + ray_op_t* o_op = ray_scan(g, "o"); + ray_op_t* v_op = ray_scan(g, "v"); + ray_op_t* parts[] = { g_op }; + ray_op_t* orders[] = { o_op }; + uint8_t ndesc[] = { 0 }; + uint8_t kinds[] = { RAY_WIN_RANK }; + ray_op_t* fins[] = { v_op }; + int64_t params[] = { 0 }; + ray_op_t* w = ray_window_op(g, tbl_op, + parts, 1, + orders, ndesc, 1, + kinds, fins, params, 1, + RAY_FRAME_ROWS, + RAY_BOUND_UNBOUNDED_PRECEDING, + RAY_BOUND_UNBOUNDED_FOLLOWING, + 0, 0); + ray_t* result = ray_execute(g, w); + TEST_ASSERT_FALSE(RAY_IS_ERR(result)); + ray_t* rc = win_result_col(result, 3); + int64_t* rd = (int64_t*)ray_data(rc); + TEST_ASSERT_EQ_I(rd[0], 1); + TEST_ASSERT_EQ_I(rd[1], 1); + TEST_ASSERT_EQ_I(rd[2], 3); + TEST_ASSERT_EQ_I(rd[3], 4); + + ray_release(result); ray_graph_free(g); ray_release(tbl); + ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + +/* ─── DATE value for AVG: win_read_f64 DATE arm (line 81:19) ────────── */ + +static test_result_t test_window_date_value_avg(void) { + ray_heap_init(); (void)ray_sym_init(); + + int64_t n = 4; + int64_t gd[] = {1, 1, 2, 2}; + int32_t vd[] = {100, 200, 300, 400}; + ray_t* gv = ray_vec_from_raw(RAY_I64, gd, n); + ray_t* vv = ray_vec_from_raw(RAY_DATE, vd, n); + int64_t ng = ray_sym_intern("g", 1); + int64_t nv = ray_sym_intern("v", 1); + ray_t* tbl = ray_table_new(2); + tbl = ray_table_add_col(tbl, ng, gv); + tbl = ray_table_add_col(tbl, nv, vv); + ray_release(gv); ray_release(vv); + + /* AVG(DATE) → win_read_f64 DATE arm */ + { + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* tbl_op = ray_const_table(g, tbl); + ray_op_t* w = build_whole_window(g, tbl_op, "g", "v", + RAY_WIN_AVG, "v", 0); + ray_t* result = ray_execute(g, w); + TEST_ASSERT_FALSE(RAY_IS_ERR(result)); + ray_t* rc = win_result_col(result, 2); + double* rd = (double*)ray_data(rc); + TEST_ASSERT_EQ_F(rd[0], 150.0, 1e-9); + TEST_ASSERT_EQ_F(rd[2], 350.0, 1e-9); + ray_release(result); ray_graph_free(g); + } + + /* SUM(DATE) → win_read_i64 DATE arm */ + { + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* tbl_op = ray_const_table(g, tbl); + ray_op_t* w = build_whole_window(g, tbl_op, "g", "v", + RAY_WIN_SUM, "v", 0); + ray_t* result = ray_execute(g, w); + TEST_ASSERT_FALSE(RAY_IS_ERR(result)); + ray_t* rc = win_result_col(result, 2); + int64_t* rd = (int64_t*)ray_data(rc); + TEST_ASSERT_EQ_I(rd[0], 300); + TEST_ASSERT_EQ_I(rd[2], 700); + ray_release(result); ray_graph_free(g); + } + + ray_release(tbl); ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + +/* ─── Running MIN f64 with decreasing values: line 291 vmx branch ── */ + +static test_result_t test_window_running_max_f64_increasing(void) { + ray_heap_init(); (void)ray_sym_init(); + + int64_t n = 4; + int64_t gd[] = {1, 1, 1, 1}; + int64_t od[] = {1, 2, 3, 4}; + double vd[] = {1.0, 3.0, 2.0, 5.0}; + ray_t* gv = ray_vec_from_raw(RAY_I64, gd, n); + ray_t* ov = ray_vec_from_raw(RAY_I64, od, n); + ray_t* vv = ray_vec_from_raw(RAY_F64, vd, n); + int64_t ng = ray_sym_intern("g", 1); + int64_t no = ray_sym_intern("o", 1); + int64_t nv = ray_sym_intern("v", 1); + ray_t* tbl = ray_table_new(3); + tbl = ray_table_add_col(tbl, ng, gv); + tbl = ray_table_add_col(tbl, no, ov); + tbl = ray_table_add_col(tbl, nv, vv); + ray_release(gv); ray_release(ov); ray_release(vv); + + /* Running MAX f64: [1.0, 3.0, 2.0, 5.0] → [1.0, 3.0, 3.0, 5.0] */ + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* tbl_op = ray_const_table(g, tbl); + ray_op_t* g_op = ray_scan(g, "g"); + ray_op_t* o_op = ray_scan(g, "o"); + ray_op_t* v_op = ray_scan(g, "v"); + ray_op_t* parts[] = { g_op }; + ray_op_t* orders[] = { o_op }; + uint8_t ndesc[] = { 0 }; + uint8_t kinds[] = { RAY_WIN_MAX }; + ray_op_t* fins[] = { v_op }; + int64_t params[] = { 0 }; + ray_op_t* w = ray_window_op(g, tbl_op, + parts, 1, + orders, ndesc, 1, + kinds, fins, params, 1, + RAY_FRAME_ROWS, + RAY_BOUND_UNBOUNDED_PRECEDING, + RAY_BOUND_CURRENT_ROW, + 0, 0); + ray_t* result = ray_execute(g, w); + TEST_ASSERT_FALSE(RAY_IS_ERR(result)); + ray_t* rc = win_result_col(result, 3); + double* rd = (double*)ray_data(rc); + TEST_ASSERT_EQ_F(rd[0], 1.0, 1e-9); + TEST_ASSERT_EQ_F(rd[1], 3.0, 1e-9); + TEST_ASSERT_EQ_F(rd[2], 3.0, 1e-9); + TEST_ASSERT_EQ_F(rd[3], 5.0, 1e-9); + + ray_release(result); ray_graph_free(g); ray_release(tbl); + ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + +/* ─── I32 order key (plain RAY_I32, not DATE/TIME): line 47:9 ─────── */ + +static test_result_t test_window_i32_plain_order_key(void) { + ray_heap_init(); (void)ray_sym_init(); + + int64_t n = 4; + int64_t gd[] = {1, 1, 1, 1}; + int32_t od[] = {100, 100, 200, 300}; /* ties at 100 */ + int64_t vd[] = {10, 20, 30, 40}; + ray_t* gv = ray_vec_from_raw(RAY_I64, gd, n); + ray_t* ov = ray_vec_from_raw(RAY_I32, od, n); + ray_t* vv = ray_vec_from_raw(RAY_I64, vd, n); + int64_t ng = ray_sym_intern("g", 1); + int64_t no = ray_sym_intern("o", 1); + int64_t nv = ray_sym_intern("v", 1); + ray_t* tbl = ray_table_new(3); + tbl = ray_table_add_col(tbl, ng, gv); + tbl = ray_table_add_col(tbl, no, ov); + tbl = ray_table_add_col(tbl, nv, vv); + ray_release(gv); ray_release(ov); ray_release(vv); + + /* PARTITION BY g, ORDER BY o (I32) — RANK with tie at 100. + * Expected ranks: 1, 1, 3, 4 */ + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* tbl_op = ray_const_table(g, tbl); + ray_op_t* g_op = ray_scan(g, "g"); + ray_op_t* o_op = ray_scan(g, "o"); + ray_op_t* v_op = ray_scan(g, "v"); + ray_op_t* parts[] = { g_op }; + ray_op_t* orders[] = { o_op }; + uint8_t ndesc[] = { 0 }; + uint8_t kinds[] = { RAY_WIN_RANK }; + ray_op_t* fins[] = { v_op }; + int64_t params[] = { 0 }; + ray_op_t* w = ray_window_op(g, tbl_op, + parts, 1, + orders, ndesc, 1, + kinds, fins, params, 1, + RAY_FRAME_ROWS, + RAY_BOUND_UNBOUNDED_PRECEDING, + RAY_BOUND_UNBOUNDED_FOLLOWING, + 0, 0); + ray_t* result = ray_execute(g, w); + TEST_ASSERT_FALSE(RAY_IS_ERR(result)); + ray_t* rc = win_result_col(result, 3); + int64_t* rd = (int64_t*)ray_data(rc); + TEST_ASSERT_EQ_I(rd[0], 1); + TEST_ASSERT_EQ_I(rd[1], 1); + TEST_ASSERT_EQ_I(rd[2], 3); + TEST_ASSERT_EQ_I(rd[3], 4); + + ray_release(result); ray_graph_free(g); ray_release(tbl); + ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + /* ─── Suite registration ──────────────────────────────────────────── */ const test_entry_t window_entries[] = { @@ -3339,5 +4260,24 @@ const test_entry_t window_entries[] = { { "window/multikey_u8_radix", test_window_multikey_u8_radix, NULL, NULL }, { "window/avg_sym_value", test_window_avg_sym_value, NULL, NULL }, { "window/sym_partition_large_pool", test_window_sym_partition_large_pool, NULL, NULL }, + { "window/timestamp_order_key", test_window_timestamp_order_key, NULL, NULL }, + { "window/time_order_key", test_window_time_order_key, NULL, NULL }, + { "window/timestamp_value", test_window_timestamp_value, NULL, NULL }, + { "window/time_value", test_window_time_value, NULL, NULL }, + { "window/bool_value_avg", test_window_bool_value_avg, NULL, NULL }, + { "window/first_value_f64", test_window_first_value_f64, NULL, NULL }, + { "window/first_value_f64_null", test_window_first_value_f64_null, NULL, NULL }, + { "window/last_value_whole_f64_null", test_window_last_value_whole_f64_null, NULL, NULL }, + { "window/last_value_whole_i64_null", test_window_last_value_whole_i64_null, NULL, NULL }, + { "window/lead_offset_zero", test_window_lead_offset_zero, NULL, NULL }, + { "window/lead_f64_offset_zero", test_window_lead_f64_offset_zero, NULL, NULL }, + { "window/nth_value_f64_oob", test_window_nth_value_f64_oob, NULL, NULL }, + { "window/bool_order_key", test_window_bool_order_key, NULL, NULL }, + { "window/str_order_key", test_window_str_order_key, NULL, NULL }, + { "window/date_value_avg", test_window_date_value_avg, NULL, NULL }, + { "window/running_min_f64_decreasing", test_window_running_min_f64_decreasing, NULL, NULL }, + { "window/running_min_i64_decreasing", test_window_running_min_i64_decreasing, NULL, NULL }, + { "window/running_max_f64_increasing", test_window_running_max_f64_increasing, NULL, NULL }, + { "window/i32_order_key_plain", test_window_i32_plain_order_key, NULL, NULL }, { NULL, NULL, NULL, NULL }, };