diff --git a/README.md b/README.md index f9230b0..e6a1bcf 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ It is not a SQL database or persistence layer. ColQL is for data you already wan - Object predicates plus tuple-style `where(column, operator, value)` - Explicit equality indexes and sorted numeric indexes for hot predicates - Unique indexes for stable ID lookups and duplicate-key protection +- Public `query.explain()` diagnostics for planner visibility without executing queries - JS Array migration helpers such as `fromRows`, `firstWhere`, `countWhere`, and `exists` - Mutable tables with `updateMany` and `deleteMany` - Runtime validation with structured `ColQLError` codes @@ -60,6 +61,10 @@ const activeAdults = users .select(["id", "age", "score"]) .toArray(); +console.log( + users.where({ status: "active", age: { gte: 18 } }).select(["id"]).explain(), +); + const result = users.updateMany( { status: "passive", age: { lt: 18 } }, { status: "archived" }, @@ -96,6 +101,7 @@ npm run test:large For benchmark scripts and interpretation notes, see [Performance and Benchmarks](./docs/doc/13-performance-and-benchmarks.md). For JS Array comparisons, run `npm run benchmark:array-comparison`; results are local guidance, not universal promises or CI requirements. +For a scenario-style local workload, run `npm run benchmark:session-analytics`. ## When To Use ColQL @@ -112,14 +118,26 @@ Use ColQL when: Avoid ColQL when: - you need durable storage, transactions, joins, or SQL +- data must be shared across pods, workers, processes, or machines +- writes dominate the workload and broad indexes are frequently dirtied - row indexes must be stable external identifiers - a small/simple JavaScript array is already clear and fast enough - every query requires arbitrary sorting or grouping - you need concurrent writers or multi-process coordination - you want automatic indexes, compound indexes, or query planning across tables +- you want analytical SQL over files or large columnar datasets, where DuckDB may be a better fit Row indexes are physical positions and can change after deletes. Use an explicit `id` column for stable identity. +## Decision Guide + +| Tool | Good fit | Not a good fit | +|---|---|---| +| ColQL | Process-local TypeScript data that benefits from compact in-memory columns, explicit indexes, validation, and inspectable query plans | Persistence, SQL, joins, transactions, shared state, or distributed coordination | +| JavaScript arrays | Small or simple datasets, ad hoc transforms, or write-heavy logic where object arrays are already clear and fast enough | Memory-sensitive data, repeated projections, structured indexed lookups, or runtime schema validation | +| SQLite | Durable embedded relational storage with SQL, transactions, and indexes | Pure process-local ephemeral caches where a database file and SQL layer are unnecessary | +| DuckDB | Analytical SQL, file-based analytics, large columnar datasets, and ad hoc aggregations | TypeScript-first mutable process-local tables with explicit in-memory indexes | + ## Examples - [Basic usage](./examples/basic.ts) @@ -159,6 +177,7 @@ users.where({ status: "active", age: { gte: 18 } }).toArray(); users.where("age", ">=", 18).select(["id"]).toArray(); users.whereIn("status", ["active", "passive"]); users.filter((row) => row.score > 90); +users.where({ status: "active" }).explain(); users.count(); users.avg("age"); @@ -175,9 +194,11 @@ users.findBy("id", 123); const buffer = users.serialize(); const restored = table.deserialize(buffer); +restored.createIndex("status"); ``` `filter(fn)` is intentionally a full-scan escape hatch. Prefer structured predicates when you want index planning. +`query.explain()` returns structured diagnostics without executing the query, scanning rows, materializing rows, calling `onQuery`, or rebuilding dirty indexes. ## Error Handling @@ -209,11 +230,12 @@ npm run benchmark:optimizer npm run benchmark:serialization npm run benchmark:delete npm run benchmark:array-comparison +npm run benchmark:session-analytics ``` ## Status -ColQL v0.3.x aims to keep the public API reasonably stable, but breaking changes may still happen before 1.0.0. +ColQL v0.4.x introduces public query diagnostics and continues moving toward API stabilization, but breaking changes may still happen before 1.0.0. The API is not fully frozen. ## Limitations diff --git a/benchmarks/session-analytics.mjs b/benchmarks/session-analytics.mjs new file mode 100644 index 0000000..6d8ed32 --- /dev/null +++ b/benchmarks/session-analytics.mjs @@ -0,0 +1,276 @@ +import os from "node:os"; +import { column, fromRows, table } from "../dist/index.mjs"; + +const DEFAULT_ROWS = 25_000; +const NOW = 1_725_000_000; +const STALE_ACTIVE_CUTOFF = NOW - 7 * 86_400; +const OLD_INACTIVE_CUTOFF = NOW - 21 * 86_400; + +const rows = process.env.ROWS + ? Number.parseInt(process.env.ROWS, 10) + : DEFAULT_ROWS; +const jsonOutput = process.argv.includes("--json"); + +if (!Number.isInteger(rows) || rows < 1) { + throw new Error(`Invalid ROWS value: ${String(process.env.ROWS)}`); +} + +const schema = { + id: column.uint32(), + userId: column.uint32(), + segment: column.dictionary(["free", "pro", "enterprise"]), + status: column.dictionary(["active", "expired", "inactive"]), + startedAt: column.uint32(), + durationMs: column.uint32(), + country: column.dictionary(["US", "TR", "DE", "GB"]), +}; + +function createRows(rowCount) { + return Array.from({ length: rowCount }, (_unused, id) => ({ + id, + userId: 10_000 + (id % 3_000), + segment: id % 19 === 0 ? "enterprise" : id % 4 === 0 ? "pro" : "free", + status: id % 29 === 0 ? "inactive" : id % 7 === 0 ? "expired" : "active", + startedAt: NOW - ((id * 137) % (30 * 86_400)), + durationMs: 30_000 + ((id * 9_973) % 9_000_000), + country: id % 11 === 0 ? "TR" : id % 7 === 0 ? "DE" : id % 5 === 0 ? "GB" : "US", + })); +} + +function createIndexedTable(sourceRows) { + return fromRows(schema, sourceRows) + .createIndex("status") + .createIndex("segment") + .createIndex("country") + .createSortedIndex("startedAt") + .createSortedIndex("durationMs"); +} + +function time(fn) { + const start = performance.now(); + const result = fn(); + return { duration: performance.now() - start, result }; +} + +function scanType(query) { + return query.explain().scanType; +} + +function pushResult(results, phase, rowCount, operation, resultCount, scan, duration) { + results.push({ + phase, + rows: rowCount, + operation, + resultCount, + scanType: scan, + ms: duration, + }); +} + +function assertCount(label, actual, expected) { + if (actual !== expected) { + throw new Error(`${label} sanity check failed: expected ${expected}, received ${actual}.`); + } +} + +function updateOracle(sourceRows, predicate, patch) { + let affectedRows = 0; + for (const row of sourceRows) { + if (!predicate(row)) { + continue; + } + Object.assign(row, patch); + affectedRows += 1; + } + return affectedRows; +} + +function deleteOracle(sourceRows, predicate) { + let affectedRows = 0; + for (let index = sourceRows.length - 1; index >= 0; index -= 1) { + if (!predicate(sourceRows[index])) { + continue; + } + sourceRows.splice(index, 1); + affectedRows += 1; + } + return affectedRows; +} + +function queryCount(query) { + return query.count(); +} + +function queryArrayLength(query) { + return query.toArray().length; +} + +function runSessionAnalyticsBenchmark(rowCount) { + const results = []; + const dataset = time(() => createRows(rowCount)); + const sourceRows = dataset.result; + const oracleRows = sourceRows.map((row) => ({ ...row })); + pushResult(results, "dataset", rowCount, "generate deterministic sessions", sourceRows.length, "n/a", dataset.duration); + + const setup = time(() => createIndexedTable(sourceRows)); + const sessions = setup.result; + pushResult(results, "setup", rowCount, "insert rows and create explicit indexes", sessions.rowCount, "n/a", setup.duration); + + const activeQuery = sessions.where("status", "=", "inactive"); + const activeExpected = oracleRows.filter((row) => row.status === "inactive").length; + const activeScan = scanType(activeQuery); + const active = time(() => queryCount(activeQuery)); + assertCount("equality index query", active.result, activeExpected); + pushResult(results, "query", rowCount, "equality index: status = inactive", active.result, activeScan, active.duration); + + const windowStart = NOW - 6 * 60 * 60; + const windowQuery = sessions.where({ startedAt: { gte: windowStart, lte: NOW } }); + const windowExpected = oracleRows.filter((row) => row.startedAt >= windowStart && row.startedAt <= NOW).length; + const windowScan = scanType(windowQuery); + const window = time(() => queryCount(windowQuery)); + assertCount("sorted range query", window.result, windowExpected); + pushResult(results, "query", rowCount, "sorted range: startedAt last 6h", window.result, windowScan, window.duration); + + const projectionQuery = sessions + .where({ country: "TR", status: "active" }) + .select(["id", "userId", "startedAt", "durationMs"]) + .limit(100); + const projectionExpected = oracleRows.filter((row) => row.country === "TR" && row.status === "active").slice(0, 100).length; + const projectionScan = scanType(projectionQuery); + const projection = time(() => queryArrayLength(projectionQuery)); + assertCount("projection + limit query", projection.result, projectionExpected); + pushResult(results, "query", rowCount, "projection + limit: active TR sessions", projection.result, projectionScan, projection.duration); + + const combinedQuery = sessions.where({ segment: "enterprise", startedAt: { gte: NOW - 3 * 86_400 } }); + const combinedExpected = oracleRows.filter((row) => row.segment === "enterprise" && row.startedAt >= NOW - 3 * 86_400).length; + const combinedScan = scanType(combinedQuery); + const combined = time(() => queryCount(combinedQuery)); + assertCount("combined equality + range query", combined.result, combinedExpected); + pushResult(results, "query", rowCount, "combined: enterprise sessions in last 3d", combined.result, combinedScan, combined.duration); + + const callbackQuery = sessions.filter((row) => row.status === "active" && row.durationMs > 8_000_000); + const callbackExpected = oracleRows.filter((row) => row.status === "active" && row.durationMs > 8_000_000).length; + const callbackScan = scanType(callbackQuery); + const callback = time(() => queryCount(callbackQuery)); + assertCount("callback full scan query", callback.result, callbackExpected); + pushResult(results, "query", rowCount, "callback filter: active slow sessions", callback.result, callbackScan, callback.duration); + + const updateExpected = updateOracle( + oracleRows, + (row) => row.status === "active" && row.segment === "enterprise" && row.startedAt < STALE_ACTIVE_CUTOFF, + { status: "expired" }, + ); + const update = time(() => + sessions.updateMany( + { status: "active", segment: "enterprise", startedAt: { lt: STALE_ACTIVE_CUTOFF } }, + { status: "expired" }, + ), + ); + assertCount("expire mutation", update.result.affectedRows, updateExpected); + pushResult(results, "mutation", rowCount, "update: expire stale enterprise sessions", update.result.affectedRows, "n/a", update.duration); + + const firstDirtyQuery = sessions.where("status", "=", "inactive"); + const firstDirtyExpected = oracleRows.filter((row) => row.status === "inactive").length; + const firstDirtyScan = scanType(firstDirtyQuery); + const firstDirty = time(() => queryCount(firstDirtyQuery)); + assertCount("first post-update dirty index query", firstDirty.result, firstDirtyExpected); + pushResult(results, "post-mutation", rowCount, "first query after dirty equality index", firstDirty.result, firstDirtyScan, firstDirty.duration); + + const secondDirtyQuery = sessions.where("status", "=", "inactive"); + const secondDirtyScan = scanType(secondDirtyQuery); + const secondDirty = time(() => queryCount(secondDirtyQuery)); + assertCount("second post-update equality query", secondDirty.result, firstDirtyExpected); + pushResult(results, "post-mutation", rowCount, "second identical equality query", secondDirty.result, secondDirtyScan, secondDirty.duration); + + const deleteExpected = deleteOracle( + oracleRows, + (row) => row.status === "inactive" && row.startedAt < OLD_INACTIVE_CUTOFF, + ); + const deletion = time(() => + sessions.deleteMany({ + status: "inactive", + startedAt: { lt: OLD_INACTIVE_CUTOFF }, + }), + ); + assertCount("delete mutation", deletion.result.affectedRows, deleteExpected); + pushResult(results, "mutation", rowCount, "delete: old inactive sessions", deletion.result.affectedRows, "n/a", deletion.duration); + + const firstRangeAfterDeleteQuery = sessions.where("startedAt", "<", OLD_INACTIVE_CUTOFF); + const firstRangeAfterDeleteExpected = oracleRows.filter((row) => row.startedAt < OLD_INACTIVE_CUTOFF).length; + const firstRangeAfterDeleteScan = scanType(firstRangeAfterDeleteQuery); + const firstRangeAfterDelete = time(() => queryCount(firstRangeAfterDeleteQuery)); + assertCount("first post-delete dirty sorted query", firstRangeAfterDelete.result, firstRangeAfterDeleteExpected); + pushResult(results, "post-mutation", rowCount, "first query after dirty sorted index", firstRangeAfterDelete.result, firstRangeAfterDeleteScan, firstRangeAfterDelete.duration); + + const secondRangeAfterDeleteQuery = sessions.where("startedAt", "<", OLD_INACTIVE_CUTOFF); + const secondRangeAfterDeleteScan = scanType(secondRangeAfterDeleteQuery); + const secondRangeAfterDelete = time(() => queryCount(secondRangeAfterDeleteQuery)); + assertCount("second post-delete sorted query", secondRangeAfterDelete.result, firstRangeAfterDeleteExpected); + pushResult(results, "post-mutation", rowCount, "second identical sorted query", secondRangeAfterDelete.result, secondRangeAfterDeleteScan, secondRangeAfterDelete.duration); + + const serialization = time(() => sessions.serialize()); + pushResult(results, "lifecycle", sessions.rowCount, "serialize table data", serialization.result.byteLength, "n/a", serialization.duration); + + const restore = time(() => table.deserialize(serialization.result)); + const restored = restore.result; + assertCount("restore row count", restored.rowCount, oracleRows.length); + pushResult(results, "lifecycle", restored.rowCount, "restore table data", restored.rowCount, "n/a", restore.duration); + + const recreateIndexes = time(() => { + restored + .createIndex("status") + .createIndex("segment") + .createIndex("country") + .createSortedIndex("startedAt") + .createSortedIndex("durationMs"); + return restored.indexes().length + restored.sortedIndexes().length; + }); + assertCount("recreate index count", recreateIndexes.result, 5); + pushResult(results, "lifecycle", restored.rowCount, "recreate explicit indexes", recreateIndexes.result, "n/a", recreateIndexes.duration); + + const restoredQuery = restored.where("status", "=", "inactive"); + const restoredExpected = oracleRows.filter((row) => row.status === "inactive").length; + const restoredScan = scanType(restoredQuery); + const restoredRun = time(() => queryCount(restoredQuery)); + assertCount("restored indexed query", restoredRun.result, restoredExpected); + pushResult(results, "lifecycle", restored.rowCount, "indexed query after restore + reindex", restoredRun.result, restoredScan, restoredRun.duration); + + return results; +} + +function formatMs(value) { + return value.toFixed(3); +} + +function printHuman(results) { + console.log("ColQL session analytics benchmark"); + console.log(`Node ${process.version} on ${process.platform} ${process.arch}`); + console.log(`CPU: ${os.cpus()[0]?.model ?? "unknown"} (${os.cpus().length} logical cores)`); + console.log("Caveats: local machine only; timings vary with runtime, CPU, memory pressure, data shape, and selectivity."); + console.log("Tip: run with `ROWS=100000 npm run benchmark:session-analytics` or add `-- --json` for JSON output.\n"); + + console.log("Phase Rows Operation Result Count Scan Type Time (ms)"); + console.log("--------------------------------------------------------------------------------------------------------"); + for (const result of results) { + console.log( + `${result.phase.padEnd(13)} ${String(result.rows).padStart(9)} ${result.operation.padEnd(45)} ${String(result.resultCount).padStart(12)} ${result.scanType.padEnd(9)} ${formatMs(result.ms).padStart(9)}`, + ); + } +} + +const results = runSessionAnalyticsBenchmark(rows); + +if (jsonOutput) { + console.log(JSON.stringify({ + env: { + node: process.version, + platform: process.platform, + arch: process.arch, + cpu: os.cpus()[0]?.model, + }, + rows, + results, + }, null, 2)); +} else { + printHuman(results); +} diff --git a/docs/doc/00-overview.md b/docs/doc/00-overview.md index 1122910..7e134ac 100644 --- a/docs/doc/00-overview.md +++ b/docs/doc/00-overview.md @@ -2,7 +2,7 @@ ColQL is an in-memory columnar query engine for TypeScript. It is designed for applications that already have data in process and want compact storage, lazy filtering, projections, simple aggregations, explicit indexes, safe mutations, and binary serialization without bringing in a database server or runtime dependencies. -ColQL is not a SQL database, ORM, persistence layer, or transactional system. It does not parse SQL, join tables, enforce uniqueness, or provide concurrency control. Its core tradeoff is narrower scope in exchange for predictable in-memory behavior. +ColQL is not a SQL database, ORM, persistence layer, distributed system, or transactional system. It does not parse SQL, join tables, coordinate across processes, or provide durable storage. Its core tradeoff is narrower scope in exchange for predictable in-memory behavior. ## Core Idea @@ -40,9 +40,24 @@ Use ColQL when: Avoid ColQL when: - you need durable storage, transactions, joins, or SQL +- data must be shared across pods, processes, workers, or machines +- writes dominate the workload and frequently dirty broad indexes +- a small/simple JavaScript array is already clear and fast enough - row indexes must be stable external identifiers - every query requires arbitrary sorting or grouping - you need concurrent writers or multi-process coordination +- you want analytical SQL over files or large columnar datasets, where DuckDB may be a better fit + +## Decision Guide + +ColQL sits between plain JavaScript arrays and embedded analytical databases: + +| Tool | Good fit | Not a good fit | +|---|---|---| +| ColQL | Process-local TypeScript data with compact in-memory storage, explicit indexes, runtime validation, and inspectable query plans | Persistence, SQL, joins, transactions, shared state, or distributed coordination | +| JavaScript arrays | Small/simple datasets, ad hoc transforms, and write-heavy object logic | Memory-sensitive tables, repeated projections, indexed lookups, or runtime schema validation | +| SQLite | Durable embedded relational storage with SQL and transactions | Ephemeral process-local caches where a database file and SQL layer are unnecessary | +| DuckDB | Analytical SQL, file-based analytics, and large columnar datasets | Mutable TypeScript-first in-memory tables with explicit indexes | ## Major Capabilities @@ -54,12 +69,14 @@ Avoid ColQL when: - Aggregations such as `count`, `avg`, `top`, and `bottom` - Equality indexes for numeric and dictionary columns - Sorted indexes for numeric range queries +- Public `query.explain()` diagnostics for planner visibility - Physical deletes and row updates - Predicate-based update/delete with snapshot semantics - Runtime validation and structured `ColQLError` failures - Binary serialization and deserialization Indexes are derived performance structures. Query results must be the same whether ColQL uses an index or a full scan. +`query.explain()` helps inspect planner choices without executing the query, scanning rows, materializing rows, calling `onQuery`, or rebuilding dirty indexes. ## Quick Example diff --git a/docs/doc/10-error-handling.md b/docs/doc/10-error-handling.md index 4b32f4e..35f7e6a 100644 --- a/docs/doc/10-error-handling.md +++ b/docs/doc/10-error-handling.md @@ -70,6 +70,8 @@ Serialization errors: - `COLQL_INVALID_SERIALIZED_DATA` +Query explain reason codes are not `ColQLError` codes. `query.explain()` returns `reasonCode` values in its `QueryExplainPlan` for diagnostics such as missing indexes, broad candidate sets, callback full scans, and dirty indexes that would rebuild during execution. + ## Examples Unknown column: diff --git a/docs/doc/11-serialization.md b/docs/doc/11-serialization.md index c50ec7d..490a583 100644 --- a/docs/doc/11-serialization.md +++ b/docs/doc/11-serialization.md @@ -39,6 +39,32 @@ restored.createUniqueIndex("id"); `restored.indexes()`, `restored.sortedIndexes()`, and `restored.uniqueIndexes()` are empty until indexes are recreated. +## Index Lifecycle After Restore + +Restored tables are correct before indexes are recreated, but indexed performance and unique-index helpers require explicit index creation: + +```ts +const restored = table.deserialize(buffer); + +console.log(restored.where("status", "=", "active").explain()); +// scanType: "full" +// reasonCode: "NO_INDEX_FOR_COLUMN" + +restored.createIndex("status"); +console.log(restored.where("status", "=", "active").explain()); +// scanType: "index" +``` + +Dirty indexes are different from missing indexes. After updates or deletes, existing indexes may be marked dirty. Actual query execution rebuilds a dirty index lazily before using it, so stale index results are not returned. `query.explain()` reports that state without rebuilding: + +```ts +users.updateMany({ status: "active" }, { status: "expired" }); + +console.log(users.where("status", "=", "expired").explain()); +// indexState: "dirty" +// reasonCode: "INDEX_DIRTY_WOULD_REBUILD_ON_EXECUTION" +``` + ## After Mutations and Deletes Serialization writes the current physical state: diff --git a/docs/doc/13-performance-and-benchmarks.md b/docs/doc/13-performance-and-benchmarks.md index 5ec6ebb..585a70d 100644 --- a/docs/doc/13-performance-and-benchmarks.md +++ b/docs/doc/13-performance-and-benchmarks.md @@ -4,6 +4,8 @@ Benchmarks are local tools for understanding tradeoffs on your machine. They are Benchmark numbers are not CI requirements. Use them as local diagnostics and rerun them on the hardware, Node version, data shape, and workload that matter to your application. +ColQL also keeps API-like confidence scenarios under `tests/scenarios/`. Those tests cover realistic read, projection, mutation, dirty-index, serialization, and oracle-parity flows. They are correctness coverage, not performance benchmarks. + Build first: ```sh @@ -21,6 +23,7 @@ npm run benchmark:optimizer npm run benchmark:serialization npm run benchmark:delete npm run benchmark:array-comparison +npm run benchmark:session-analytics ``` Most benchmark scripts accept larger scenarios with: @@ -40,6 +43,53 @@ COLQL_BENCH_LARGE=1 npm run benchmark:indexed - `benchmark:delete`: physical delete, update, dirty index rebuild, and memory phases. - `benchmark:physical-delete`: focused physical-delete behavior. - `benchmark:array-comparison`: JS object arrays versus ColQL scan, equality, sorted, and unique-index paths across common workloads. +- `benchmark:session-analytics`: scenario-style process-local session analytics workload with query, mutation, dirty-index, and serialization phases. + +## Session Analytics Benchmark + +`benchmark:session-analytics` is a scenario-oriented benchmark. It is meant to show how ColQL behaves across a realistic process-local session analytics workload, not to make a universal speed claim. + +It reports: + +- dataset generation +- table setup and explicit index creation +- equality-index queries +- sorted-range queries +- projection plus limit +- combined equality and range predicates +- callback `filter(fn)` full-scan queries +- update and delete mutations +- first and second queries after dirty indexes +- serialize, restore, recreate-index, and indexed-query lifecycle + +Run it with the default 25,000-row dataset: + +```sh +npm run benchmark:session-analytics +``` + +Scale rows with `ROWS`: + +```sh +ROWS=100000 npm run benchmark:session-analytics +``` + +For JSON output: + +```sh +npm run benchmark:session-analytics -- --json +``` + +Output columns: + +- `Phase`: benchmark phase such as dataset, setup, query, mutation, post-mutation, or lifecycle. +- `Rows`: rows in the table or dataset at that phase. +- `Operation`: workload label. +- `Result Count`: sanity-check value such as matching rows, affected rows, serialized bytes, or recreated index count. +- `Scan Type`: `index`, `full`, or `n/a`, taken from `query.explain()` before timing. +- `Time (ms)`: local elapsed time for the operation. `query.explain()` is not included in query timings. + +The benchmark validates result counts against a plain JavaScript-array oracle and throws if they differ. It does not assert timing thresholds. ## JS Array Comparison Benchmark diff --git a/docs/doc/15-limitations-and-design-decisions.md b/docs/doc/15-limitations-and-design-decisions.md index e446686..26cc3c7 100644 --- a/docs/doc/15-limitations-and-design-decisions.md +++ b/docs/doc/15-limitations-and-design-decisions.md @@ -2,7 +2,7 @@ ColQL intentionally keeps a narrow, explicit feature set. -ColQL aims to keep the public API reasonably stable, but breaking changes may still happen before 1.0.0. +ColQL aims to keep the public API reasonably stable, and v0.4.x adds a public diagnostics API with `query.explain()`. Breaking changes may still happen before 1.0.0; the API is not fully frozen. ## Not Included @@ -18,6 +18,7 @@ ColQL aims to keep the public API reasonably stable, but breaking changes may st - durable storage - serialized indexes - compound unique indexes +- distributed or multi-process coordination ## Why These Limits Exist @@ -32,6 +33,17 @@ ColQL optimizes for: Adding SQL, joins, transactional semantics, or automatic indexing would make the engine broader and less predictable. +## When Not To Use ColQL + +ColQL is intentionally process-local and in-memory. Avoid it when: + +- multiple pods, workers, processes, or machines need shared mutable state +- data must survive process restarts without an external persistence layer +- transactional semantics, rollback, isolation, or write-ahead durability are required +- writes dominate the workload and frequently dirty large indexes +- a small JavaScript array is already simple, clear, and fast enough +- analytical SQL over files or large columnar datasets is the main requirement, where DuckDB may be a better fit + ## Row Indexes Are Not Stable IDs Row indexes are internal positions, not stable external identifiers. Inserts, updates, and deletes may change row positions; physical deletes shift row indexes after the deleted row. Use an explicit ID column for stable identity: @@ -49,7 +61,7 @@ Equality and sorted indexes are optional derived structures and are not serializ Unique indexes are also derived and not serialized, but they are integrity constraints as well as lookup structures. Recreate them after deserialization when uniqueness enforcement or by-key helpers are needed. -Dirty indexes are rebuilt before use or explicitly by the user. This avoids complex incremental row-position maintenance, especially around physical deletes. +Dirty indexes are rebuilt before actual query execution or explicitly by the user. `query.explain()` reports dirty index state without rebuilding, so diagnostics do not hide the first-query rebuild cost. ## Mutation Semantics Are Safety-Oriented diff --git a/docs/doc/16-api-reference.md b/docs/doc/16-api-reference.md index 3c914ee..6f33742 100644 --- a/docs/doc/16-api-reference.md +++ b/docs/doc/16-api-reference.md @@ -10,6 +10,8 @@ import type { MutationResult, ObjectWherePredicate, Operator, + QueryExplainPlan, + QueryExplainReasonCode, QueryHook, QueryInfo, RowForSchema, @@ -157,6 +159,7 @@ query.filter(callback); query.select(columns); query.limit(n); query.offset(n); +query.explain(); query.first(); query.toArray(); @@ -191,6 +194,83 @@ type ObjectWherePredicate = { type RowPredicate = (row: RowForSchema) => boolean; ``` +## Query Diagnostics + +```ts +const plan = users + .where({ status: "active", age: { gte: 18 } }) + .select(["id"]) + .explain(); +``` + +`query.explain()` returns structured diagnostics for a query. It does not execute the query. It does not scan rows, materialize rows, call `onQuery`, or rebuild dirty indexes. + +Example output: + +```ts +{ + scanType: "index", + indexesUsed: ["equality:status"], + predicates: 2, + predicateOrder: ["status =", "age >="], + projectionPushdown: true, + candidateRows: 42, + indexState: "fresh" +} +``` + +Types: + +```ts +type QueryExplainReasonCode = + | "NO_PREDICATES" + | "NO_INDEX_FOR_COLUMN" + | "RANGE_QUERY_WITHOUT_SORTED_INDEX" + | "INDEX_CANDIDATE_SET_TOO_LARGE" + | "CALLBACK_PREDICATE_REQUIRES_FULL_SCAN" + | "INDEX_DIRTY_WOULD_REBUILD_ON_EXECUTION" + | "UNSUPPORTED_INDEX_OPERATOR"; + +type QueryExplainPlan = { + scanType: "index" | "full"; + indexesUsed: readonly string[]; + predicates: number; + predicateOrder: readonly string[]; + projectionPushdown: boolean; + candidateRows?: number; + indexState?: "fresh" | "dirty"; + reasonCode?: QueryExplainReasonCode; + reason?: string; +}; +``` + +Fields: + +- `scanType`: whether execution is expected to use an index or full scan. +- `indexesUsed`: selected index labels such as `equality:status` or `sorted:startedAt`. +- `predicates`: structured predicates plus callback predicates. +- `predicateOrder`: structured predicate evaluation order after planner ordering. +- `projectionPushdown`: `true` when `select(...)` limits materialized columns. +- `candidateRows`: concrete indexed candidate count when it can be computed without scanning, materializing, or rebuilding. +- `indexState`: `fresh` or `dirty` for selected indexes. +- `reasonCode`: stable reason code for full scans or dirty-index diagnostics. +- `reason`: human-readable explanation; prefer `reasonCode` for programmatic handling. + +Dirty indexes are reported without being rebuilt: + +```ts +users.updateMany({ status: "active" }, { status: "expired" }); + +console.log(users.where("status", "=", "expired").explain()); +// { +// scanType: "index", +// indexesUsed: ["equality:status"], +// indexState: "dirty", +// reasonCode: "INDEX_DIRTY_WOULD_REBUILD_ON_EXECUTION", +// ... +// } +``` + ## Aggregations ```ts @@ -291,7 +371,7 @@ users.getIndexedCandidatePlan(filters); users.getIndexDebugPlan(filters); ``` -Queries expose `__debugPlan()` for planner diagnostics. It is useful in tests and debugging, but application code should not depend on it as a stable planning contract. +Use `query.explain()` for public query diagnostics. Queries still expose `__debugPlan()` for internal tests and low-level debugging, but application code should not depend on it as a stable planning contract. ## Errors diff --git a/examples/session-analytics.ts b/examples/session-analytics.ts new file mode 100644 index 0000000..6ef0212 --- /dev/null +++ b/examples/session-analytics.ts @@ -0,0 +1,131 @@ +import { column, table } from "@colql/colql"; + +const SESSION_COUNT = 15_000; +const NOW = 1_725_000_000; +const STALE_ACTIVE_CUTOFF = NOW - 86_400; +const OLD_INACTIVE_CUTOFF = NOW - 14 * 86_400; + +const sessions = table({ + id: column.uint32(), + userId: column.uint32(), + segment: column.dictionary(["free", "pro", "enterprise"] as const), + status: column.dictionary(["active", "expired", "inactive"] as const), + startedAt: column.uint32(), + durationMs: column.uint32(), + country: column.dictionary(["US", "TR", "DE", "GB"] as const), + device: column.dictionary(["desktop", "mobile", "tablet"] as const), +}); + +type Session = (typeof sessions)["toArray"] extends () => Array + ? Row + : never; + +function generateSessions(count: number): Session[] { + return Array.from({ length: count }, (_unused, id) => ({ + id, + userId: 10_000 + (id % 3_000), + segment: + id % 19 === 0 ? "enterprise" : id % 4 === 0 ? "pro" : "free", + status: + id % 29 === 0 ? "inactive" : id % 7 === 0 ? "expired" : "active", + startedAt: NOW - ((id * 137) % (30 * 86_400)), + durationMs: 30_000 + ((id * 9_973) % 9_000_000), + country: id % 11 === 0 ? "TR" : id % 7 === 0 ? "DE" : id % 5 === 0 ? "GB" : "US", + device: id % 6 === 0 ? "tablet" : id % 2 === 0 ? "mobile" : "desktop", + })); +} + +sessions + .insertMany(generateSessions(SESSION_COUNT)) + .createIndex("status") + .createIndex("segment") + .createIndex("country") + .createSortedIndex("startedAt") + .createSortedIndex("durationMs"); + +function printQuery(label: string, query: { explain(): unknown; toArray(): T[] }): T[] { + console.log(`\n${label}`); + console.log("explain", query.explain()); + const rows = query.toArray(); + console.log("rows", rows.length); + console.log("sample", rows.slice(0, 3)); + return rows; +} + +console.log("session analytics table", { + rows: sessions.rowCount, + equalityIndexes: sessions.indexes(), + sortedIndexes: sessions.sortedIndexes(), +}); + +const activeEnterprise = sessions + .where({ status: "active", segment: "enterprise" }) + .select(["id", "userId", "country", "device", "durationMs"]) + .limit(10); +printQuery("GET /sessions?status=active&segment=enterprise&limit=10", activeEnterprise); + +const recentWindow = sessions + .where({ startedAt: { gte: NOW - 6 * 60 * 60, lte: NOW } }) + .select(["id", "userId", "startedAt", "status"]) + .limit(10); +printQuery("GET /sessions?from=last_6h&to=now&limit=10", recentWindow); + +const countryDevice = sessions + .where({ country: "TR", device: "mobile", status: "active" }) + .select(["id", "userId", "segment", "startedAt"]) + .limit(10); +printQuery("GET /sessions?country=TR&device=mobile&status=active", countryDevice); + +const slowActiveSessions = sessions + .where("status", "=", "active") + .select(["id", "userId", "durationMs", "country", "device"]); +console.log("\nGET /sessions/slow?status=active&limit=5"); +console.log("explain", slowActiveSessions.explain()); +console.log("top", slowActiveSessions.top(5, "durationMs")); + +const activePro = sessions.where({ status: "active", segment: "pro" }); +console.log("\nGET /sessions/summary?status=active&segment=pro"); +console.log("explain", activePro.explain()); +console.log({ + count: activePro.count(), + averageDurationMs: Math.round(activePro.avg("durationMs") ?? 0), + totalDurationMs: activePro.sum("durationMs"), +}); + +console.log("\nPATCH /sessions/expire"); +const expiredBefore = sessions.where("status", "=", "expired").count(); +const expirePlan = sessions + .where({ status: "active", startedAt: { lt: STALE_ACTIVE_CUTOFF } }) + .explain(); +console.log("explain", expirePlan); +const expireResult = sessions.updateMany( + { status: "active", startedAt: { lt: STALE_ACTIVE_CUTOFF } }, + { status: "expired" }, +); +console.log("result", { + ...expireResult, + expiredBefore, + expiredAfter: expiredBefore + expireResult.affectedRows, +}); + +const expiredAfterMutation = sessions + .where("status", "=", "expired") + .select(["id", "userId", "startedAt", "durationMs"]) + .limit(10); +printQuery("GET /sessions?status=expired after mutation", expiredAfterMutation); + +console.log("\nDELETE /sessions?status=inactive&startedAt(); private readonly sortedIndexesByColumn = new Map(); @@ -392,6 +421,70 @@ export class IndexManager { }; } + explainPlan( + filters: readonly IndexFilter[], + rowCount: number, + readNumericValue: (rowIndex: number, columnName: string) => number, + ): IndexExplainPlan { + if (filters.length === 0) { + return { + mode: "scan", + rowCount, + threshold: DEFAULT_INDEX_SELECTIVITY_THRESHOLD, + reasonCode: "NO_PREDICATES", + }; + } + + const dirty = this.dirtyCandidateEstimate(filters); + if (dirty !== undefined) { + return { + mode: "index", + source: dirty.source, + column: dirty.column, + operator: dirty.operator, + rowCount, + threshold: DEFAULT_INDEX_SELECTIVITY_THRESHOLD, + indexState: "dirty", + reasonCode: "INDEX_DIRTY_WOULD_REBUILD_ON_EXECUTION", + }; + } + + const best = this.bestCandidateEstimate(filters, rowCount, readNumericValue); + if (best === undefined) { + return { + mode: "scan", + rowCount, + threshold: DEFAULT_INDEX_SELECTIVITY_THRESHOLD, + reasonCode: this.reasonCodeForNoCandidate(filters), + }; + } + + const threshold = rowCount * DEFAULT_INDEX_SELECTIVITY_THRESHOLD; + if (best.candidateCount > threshold) { + return { + mode: "scan", + source: best.source, + column: best.column, + operator: best.operator, + candidateCount: best.candidateCount, + rowCount, + threshold: DEFAULT_INDEX_SELECTIVITY_THRESHOLD, + reasonCode: "INDEX_CANDIDATE_SET_TOO_LARGE", + }; + } + + return { + mode: "index", + source: best.source, + column: best.column, + operator: best.operator, + candidateCount: best.candidateCount, + rowCount, + threshold: DEFAULT_INDEX_SELECTIVITY_THRESHOLD, + indexState: "fresh", + }; + } + private bestCandidateEstimate( filters: readonly IndexFilter[], rowCount: number, @@ -414,6 +507,71 @@ export class IndexManager { return best; } + private dirtyCandidateEstimate( + filters: readonly IndexFilter[], + ): DirtyCandidateEstimate | undefined { + for (const filter of filters) { + if ( + (filter.operator === "=" || filter.operator === "in") && + this.equalityDirty && + this.indexesByColumn.has(filter.columnName) + ) { + return { + source: "equality", + column: filter.columnName, + operator: filter.operator, + }; + } + + if (this.isRangeOperator(filter.operator)) { + const index = this.sortedIndexesByColumn.get(filter.columnName); + if (index !== undefined && index.isDirty()) { + return { + source: "sorted", + column: filter.columnName, + operator: filter.operator, + }; + } + } + } + + return undefined; + } + + private reasonCodeForNoCandidate( + filters: readonly IndexFilter[], + ): QueryExplainReasonCode { + if ( + filters.some( + (filter) => filter.operator === "!=" || filter.operator === "not in", + ) + ) { + return "UNSUPPORTED_INDEX_OPERATOR"; + } + + if ( + filters.some( + (filter) => + this.isRangeOperator(filter.operator) && + !this.sortedIndexesByColumn.has(filter.columnName), + ) + ) { + return "RANGE_QUERY_WITHOUT_SORTED_INDEX"; + } + + if ( + filters.some( + (filter) => + (filter.operator === "=" || filter.operator === "in") && + !this.indexesByColumn.has(filter.columnName), + ) + ) { + return "NO_INDEX_FOR_COLUMN"; + } + + return "NO_INDEX_FOR_COLUMN"; + } + private equalityEstimate(filter: IndexFilter): EqualityCandidateEstimate | undefined { const index = this.indexesByColumn.get(filter.columnName); if (index === undefined || (filter.operator !== "=" && filter.operator !== "in")) { diff --git a/src/indexing/sorted-index.ts b/src/indexing/sorted-index.ts index cac84b2..baee8a8 100644 --- a/src/indexing/sorted-index.ts +++ b/src/indexing/sorted-index.ts @@ -23,6 +23,10 @@ export class SortedIndex { this.dirty = true; } + isDirty(): boolean { + return this.dirty; + } + ensureFresh(rowCount: number, readValue: (rowIndex: number) => number): void { if (!this.dirty && this.rowIdsSortedByValue.length === rowCount) { return; diff --git a/src/query.ts b/src/query.ts index 901ce21..95b496d 100644 --- a/src/query.ts +++ b/src/query.ts @@ -1,6 +1,7 @@ import { BinaryHeap, type HeapItem } from "./heap"; import type { Table } from "./table"; -import type { ColumnValue, Filter, MutationResult, NumericColumnKey, ObjectWherePredicate, Operator, RowForSchema, RowPredicate, Schema, SelectedRow } from "./types"; +import type { ColumnValue, Filter, MutationResult, NumericColumnKey, ObjectWherePredicate, Operator, QueryExplainPlan, QueryExplainReasonCode, RowForSchema, RowPredicate, Schema, SelectedRow } from "./types"; +import type { IndexExplainPlan } from "./indexing/index-manager"; import { ColQLError } from "./errors"; import { assertColumnExists, assertNonNegativeInteger, assertPositiveInteger } from "./validation"; @@ -15,6 +16,10 @@ type MutationSource = { deleteRows(rowIndexes: readonly number[]): MutationResult; }; +type ExplainPlanSource = { + getIndexExplainPlan(filters: readonly InternalFilter[]): IndexExplainPlan; +}; + export class Query implements Iterable { private readonly filters: readonly InternalFilter[]; private readonly plannedFilters: readonly InternalFilter[]; @@ -464,7 +469,45 @@ export class Query implements Iterable return this; } + explain(): QueryExplainPlan { + const predicates = this.filters.length + this.rowPredicates.length; + const predicateOrder = this.plannedFilters.map((filter) => + `${filter.columnName} ${filter.operator}`, + ); + + if (this.rowPredicates.length > 0) { + return { + scanType: "full", + indexesUsed: [], + predicates, + predicateOrder, + projectionPushdown: this.selectedColumns !== undefined, + reasonCode: "CALLBACK_PREDICATE_REQUIRES_FULL_SCAN", + reason: this.reasonFor("CALLBACK_PREDICATE_REQUIRES_FULL_SCAN"), + }; + } + + const plan = (this.source as unknown as ExplainPlanSource).getIndexExplainPlan(this.filters); + const reasonCode = plan.reasonCode; + return { + scanType: plan.mode === "index" ? "index" : "full", + indexesUsed: + plan.mode === "index" ? [`${plan.source}:${plan.column}`] : [], + predicates, + predicateOrder, + projectionPushdown: this.selectedColumns !== undefined, + ...(plan.candidateCount !== undefined + ? { candidateRows: plan.candidateCount } + : {}), + ...(plan.mode === "index" ? { indexState: plan.indexState } : {}), + ...(reasonCode !== undefined ? { reasonCode } : {}), + ...(reasonCode !== undefined ? { reason: this.reasonFor(reasonCode) } : {}), + }; + } + __debugPlan(): ReturnType["getIndexDebugPlan"]> { + // Internal diagnostics retained for existing tests/debugging. Application + // code should use the public explain() contract instead. if (this.rowPredicates.length > 0) { return this.source.getIndexDebugPlan([]); } @@ -615,6 +658,25 @@ export class Query implements Iterable return this.rowPredicates.length === 0 && this.source.getIndexDebugPlan(this.filters).mode === "index"; } + private reasonFor(reasonCode: QueryExplainReasonCode): string { + switch (reasonCode) { + case "NO_PREDICATES": + return "Query has no structured predicates, so ColQL will scan rows in order."; + case "NO_INDEX_FOR_COLUMN": + return "No usable equality index exists for the indexed predicate column."; + case "RANGE_QUERY_WITHOUT_SORTED_INDEX": + return "Range predicates require a sorted index to avoid a full scan."; + case "INDEX_CANDIDATE_SET_TOO_LARGE": + return "The best index candidate set is too large, so a scan is expected to be cheaper."; + case "CALLBACK_PREDICATE_REQUIRES_FULL_SCAN": + return "Callback predicates are not index-aware and require a full scan."; + case "INDEX_DIRTY_WOULD_REBUILD_ON_EXECUTION": + return "The selected index is dirty; executing the query would lazily rebuild it before use."; + case "UNSUPPORTED_INDEX_OPERATOR": + return "The predicate operator is not supported by equality or sorted indexes."; + } + } + private whereObject(predicate: ObjectWherePredicate): Query { this.assertObjectPredicate(predicate); diff --git a/src/table.ts b/src/table.ts index 915acc9..f2efb2f 100644 --- a/src/table.ts +++ b/src/table.ts @@ -8,6 +8,7 @@ import { IndexManager, type IndexCandidatePlan, type IndexDebugPlan, + type IndexExplainPlan, type IndexFilter, } from "./indexing/index-manager"; import type { EqualityIndexStats } from "./indexing/equality-index"; @@ -43,7 +44,9 @@ import type { } from "./types"; const DEFAULT_CAPACITY = 1024; -const SERIALIZATION_VERSION = "@colql/colql@0.3.0"; +// Binary serialization format version. This is intentionally separate from +// the package release version so patch/minor releases can preserve the wire format. +const SERIALIZATION_VERSION = "@colql/colql@0.4.0"; const SERIALIZATION_MAGIC = "COLQL003"; const MAGIC_BYTES = 8; const HEADER_LENGTH_BYTES = 4; @@ -1029,6 +1032,17 @@ export class Table { ); } + private getIndexExplainPlan( + filters: readonly IndexFilter[], + ): IndexExplainPlan { + return this.indexManager.explainPlan( + filters, + this.currentRowCount, + (rowIndex, name) => + this.getNumericValue(rowIndex, name as NumericColumnKey), + ); + } + static deserialize(input: ArrayBuffer | Uint8Array): Table { if (!(input instanceof ArrayBuffer) && !(input instanceof Uint8Array)) { throw new ColQLError( diff --git a/src/types.ts b/src/types.ts index 1218fd8..400ed5e 100644 --- a/src/types.ts +++ b/src/types.ts @@ -76,6 +76,27 @@ export type TableOptions = { readonly onQuery?: QueryHook; }; +export type QueryExplainReasonCode = + | "NO_PREDICATES" + | "NO_INDEX_FOR_COLUMN" + | "RANGE_QUERY_WITHOUT_SORTED_INDEX" + | "INDEX_CANDIDATE_SET_TOO_LARGE" + | "CALLBACK_PREDICATE_REQUIRES_FULL_SCAN" + | "INDEX_DIRTY_WOULD_REBUILD_ON_EXECUTION" + | "UNSUPPORTED_INDEX_OPERATOR"; + +export type QueryExplainPlan = { + readonly scanType: "index" | "full"; + readonly indexesUsed: readonly string[]; + readonly predicates: number; + readonly predicateOrder: readonly string[]; + readonly projectionPushdown: boolean; + readonly candidateRows?: number; + readonly indexState?: "fresh" | "dirty"; + readonly reasonCode?: QueryExplainReasonCode; + readonly reason?: string; +}; + export type SelectedRow< TSchema extends Schema, Keys extends readonly (keyof TSchema)[], diff --git a/tests/query-explain.test.ts b/tests/query-explain.test.ts new file mode 100644 index 0000000..a977adb --- /dev/null +++ b/tests/query-explain.test.ts @@ -0,0 +1,245 @@ +import { describe, expect, it } from "vitest"; +import { column, table, type QueryExplainPlan } from "../src"; + +function usersFixture(count = 100) { + const users = table({ + id: column.uint32(), + age: column.uint8(), + status: column.dictionary(["active", "passive"] as const), + active: column.boolean(), + }); + + for (let id = 0; id < count; id += 1) { + users.insert({ + id, + age: id % 100, + status: id < 70 ? "active" : "passive", + active: id % 2 === 0, + }); + } + + return users; +} + +describe("query explain", () => { + it("does not execute, materialize, scan, notify, or rebuild dirty indexes", () => { + const events: unknown[] = []; + const users = table( + { + id: column.uint32(), + age: column.uint8(), + status: column.dictionary(["active", "passive"] as const), + }, + { onQuery: (info) => events.push(info) }, + ); + + users + .insertMany([ + { id: 1, age: 10, status: "active" }, + { id: 2, age: 20, status: "passive" }, + { id: 3, age: 30, status: "active" }, + ]) + .createIndex("id") + .createSortedIndex("age"); + users.deleteMany({ id: 2 }); + users.resetScanCounter(); + users.resetMaterializationCounter(); + events.length = 0; + + const equalityStatsBefore = users.indexStats(); + const sortedStatsBefore = users.sortedIndexStats(); + const explain = users.where("id", "=", 3).select(["id"]).explain(); + + expect(explain).toEqual( + expect.objectContaining({ + scanType: "index", + indexesUsed: ["equality:id"], + indexState: "dirty", + reasonCode: "INDEX_DIRTY_WOULD_REBUILD_ON_EXECUTION", + projectionPushdown: true, + }), + ); + expect(explain).not.toHaveProperty("candidateRows"); + expect(users.scannedRowCount).toBe(0); + expect(users.materializedRowCount).toBe(0); + expect(events).toEqual([]); + expect(users.indexStats()).toEqual(equalityStatsBefore); + expect(users.sortedIndexStats()).toEqual(sortedStatsBefore); + }); + + it("reports a selective equality index plan", () => { + const users = usersFixture(); + users.createIndex("id"); + + const explain: QueryExplainPlan = users.where("id", "=", 42).explain(); + + expect(explain).toEqual({ + scanType: "index", + indexesUsed: ["equality:id"], + predicates: 1, + predicateOrder: ["id ="], + projectionPushdown: false, + candidateRows: 1, + indexState: "fresh", + }); + }); + + it("reports a selective sorted range index plan", () => { + const users = usersFixture(); + users.createSortedIndex("age"); + + expect(users.where("age", ">=", 95).explain()).toEqual( + expect.objectContaining({ + scanType: "index", + indexesUsed: ["sorted:age"], + candidateRows: 5, + indexState: "fresh", + }), + ); + }); + + it("reports predicate ordering and projection pushdown", () => { + const users = usersFixture(); + users.createIndex("status").createSortedIndex("age"); + + expect( + users + .where("age", ">=", 25) + .where("status", "=", "passive") + .select(["id", "age"]) + .explain(), + ).toEqual( + expect.objectContaining({ + predicates: 2, + predicateOrder: ["status =", "age >="], + projectionPushdown: true, + }), + ); + }); + + it("reports callback predicates as full scans", () => { + const users = usersFixture(); + users.createIndex("id"); + + expect( + users + .where("id", "=", 42) + .filter((row) => row.active) + .explain(), + ).toEqual( + expect.objectContaining({ + scanType: "full", + indexesUsed: [], + predicates: 2, + predicateOrder: ["id ="], + reasonCode: "CALLBACK_PREDICATE_REQUIRES_FULL_SCAN", + }), + ); + }); + + it("reports no predicates as a full scan", () => { + expect(usersFixture().query().explain()).toEqual( + expect.objectContaining({ + scanType: "full", + indexesUsed: [], + predicates: 0, + predicateOrder: [], + reasonCode: "NO_PREDICATES", + }), + ); + }); + + it("reports missing equality indexes", () => { + expect(usersFixture().where("status", "=", "active").explain()).toEqual( + expect.objectContaining({ + scanType: "full", + reasonCode: "NO_INDEX_FOR_COLUMN", + }), + ); + }); + + it("reports missing sorted indexes for range predicates", () => { + expect(usersFixture().where("age", ">=", 90).explain()).toEqual( + expect.objectContaining({ + scanType: "full", + reasonCode: "RANGE_QUERY_WITHOUT_SORTED_INDEX", + }), + ); + }); + + it("reports unsupported indexed operators", () => { + const users = usersFixture(); + users.createIndex("id"); + + expect(users.where("id", "!=", 42).explain()).toEqual( + expect.objectContaining({ + scanType: "full", + reasonCode: "UNSUPPORTED_INDEX_OPERATOR", + }), + ); + }); + + it("reports broad index candidates as full scans", () => { + const users = usersFixture(); + users.createIndex("status"); + + expect(users.where("status", "=", "active").explain()).toEqual( + expect.objectContaining({ + scanType: "full", + candidateRows: 70, + reasonCode: "INDEX_CANDIDATE_SET_TOO_LARGE", + }), + ); + }); + + it("reports dirty sorted indexes as index plans without candidate rows", () => { + const users = usersFixture(); + users.createSortedIndex("age"); + users.insert({ id: 101, age: 99, status: "passive", active: true }); + + const explain = users.where("age", ">=", 95).explain(); + + expect(explain).toEqual( + expect.objectContaining({ + scanType: "index", + indexesUsed: ["sorted:age"], + indexState: "dirty", + reasonCode: "INDEX_DIRTY_WOULD_REBUILD_ON_EXECUTION", + }), + ); + expect(explain).not.toHaveProperty("candidateRows"); + expect(users.sortedIndexStats()[0]?.dirty).toBe(true); + }); + + it("matches dirty equality index execution behavior after lazy rebuild", () => { + const users = usersFixture(); + users.createIndex("id"); + users.updateMany({ status: "passive" }, { active: true }); + + const explain = users.where("id", "=", 42).explain(); + + expect(explain).toEqual( + expect.objectContaining({ + scanType: "index", + indexesUsed: ["equality:id"], + indexState: "dirty", + reasonCode: "INDEX_DIRTY_WOULD_REBUILD_ON_EXECUTION", + }), + ); + expect(explain).not.toHaveProperty("candidateRows"); + + users.resetScanCounter(); + expect(users.where("id", "=", 42).toArray()).toEqual([ + { id: 42, age: 42, status: "active", active: true }, + ]); + expect(users.scannedRowCount).toBe(1); + expect(users.where("id", "=", 42).explain()).toEqual( + expect.objectContaining({ + scanType: "index", + indexesUsed: ["equality:id"], + candidateRows: 1, + indexState: "fresh", + }), + ); + }); +}); diff --git a/tests/scenarios/consistency/dirty-index-lifecycle.scenario.test.ts b/tests/scenarios/consistency/dirty-index-lifecycle.scenario.test.ts new file mode 100644 index 0000000..c8b1700 --- /dev/null +++ b/tests/scenarios/consistency/dirty-index-lifecycle.scenario.test.ts @@ -0,0 +1,37 @@ +import { describe, it } from "vitest"; +import { buildProductCatalogFixture } from "../helpers/api-fixtures"; +import { expectRowsEqual } from "../helpers/assertions"; +import { expectDirtyIndex, expectFreshIndex } from "../helpers/explain"; +import { updateOracle } from "../helpers/oracle"; + +describe("consistency dirty index lifecycle endpoint scenarios", () => { + it("PATCH /products dirty indexes explain before execution and become fresh after requery", () => { + const { products, oracle } = buildProductCatalogFixture(); + + updateOracle( + oracle, + (row) => row.category === "games" && row.price >= 20_000, + { status: "inactive" }, + ); + products.updateMany( + { category: "games", price: { gte: 20_000 } }, + { status: "inactive" }, + ); + + const statusQuery = products.where("status", "=", "inactive"); + expectDirtyIndex(statusQuery, "equality:status"); + expectRowsEqual( + statusQuery.toArray(), + oracle.filter((row) => row.status === "inactive"), + ); + expectFreshIndex(products.where("status", "=", "inactive"), "equality:status"); + + const priceQuery = products.where("price", ">=", 49_000); + expectDirtyIndex(priceQuery, "sorted:price"); + expectRowsEqual( + priceQuery.toArray(), + oracle.filter((row) => row.price >= 49_000), + ); + expectFreshIndex(products.where("price", ">=", 49_000), "sorted:price"); + }); +}); diff --git a/tests/scenarios/consistency/row-index-instability.scenario.test.ts b/tests/scenarios/consistency/row-index-instability.scenario.test.ts new file mode 100644 index 0000000..ec130f6 --- /dev/null +++ b/tests/scenarios/consistency/row-index-instability.scenario.test.ts @@ -0,0 +1,17 @@ +import { describe, expect, it } from "vitest"; +import { buildUserAnalyticsFixture } from "../helpers/api-fixtures"; + +describe("consistency row index instability endpoint scenarios", () => { + it("DELETE /users/:rowIndex demonstrates rowIndex is not a stable external ID", () => { + const { users } = buildUserAnalyticsFixture(50); + + const firstRow = users.get(0); + const secondRow = users.get(1); + users.delete(0); + + expect(firstRow.id).not.toBe(users.get(0).id); + expect(users.get(0).id).toBe(secondRow.id); + expect(users.findBy("id", firstRow.id)).toBeUndefined(); + expect(users.findBy("id", secondRow.id)).toEqual(secondRow); + }); +}); diff --git a/tests/scenarios/consistency/update-delete-parity.scenario.test.ts b/tests/scenarios/consistency/update-delete-parity.scenario.test.ts new file mode 100644 index 0000000..195cf3c --- /dev/null +++ b/tests/scenarios/consistency/update-delete-parity.scenario.test.ts @@ -0,0 +1,54 @@ +import { describe, expect, it } from "vitest"; +import { ColQLError } from "../../../src"; +import { buildUserAnalyticsFixture } from "../helpers/api-fixtures"; +import { expectMutationResult, expectRowsEqual } from "../helpers/assertions"; +import { deleteFromOracle, updateOracle } from "../helpers/oracle"; + +describe("consistency update/delete parity endpoint scenarios", () => { + it("PATCH then DELETE /users maintains parity through repeated mutation/query sequence", () => { + const { users, oracle } = buildUserAnalyticsFixture(25_000); + + const updated = updateOracle( + oracle, + (row) => row.segment === "enterprise" && row.score >= 7_500, + { status: "suspended" }, + ); + expectMutationResult( + users.updateMany({ segment: "enterprise", score: { gte: 7_500 } }, { status: "suspended" }), + updated, + ); + + const deleted = deleteFromOracle( + oracle, + (row) => row.status === "inactive" && row.lastSeen < 1_700_030_000, + ); + expectMutationResult( + users.deleteMany({ status: "inactive", lastSeen: { lt: 1_700_030_000 } }), + deleted, + ); + + expectRowsEqual( + users.where({ status: "suspended", age: { gte: 40 } }).toArray(), + oracle.filter((row) => row.status === "suspended" && row.age >= 40), + ); + expectRowsEqual( + users.where("segment", "=", "enterprise").limit(100).toArray(), + oracle.filter((row) => row.segment === "enterprise").slice(0, 100), + ); + }); + + it("POST /users bulk insert with duplicate id fails all-or-nothing", () => { + const { users, oracle } = buildUserAnalyticsFixture(); + const before = users.toArray(); + + expect(() => + users.insertMany([ + { id: 99_001, status: "active", segment: "pro", age: 33, score: 100, lastSeen: 1_700_001_000 }, + { id: 42, status: "inactive", segment: "free", age: 22, score: 200, lastSeen: 1_700_002_000 }, + ]), + ).toThrow(ColQLError); + + expectRowsEqual(users.toArray(), before); + expectRowsEqual(users.toArray(), oracle); + }); +}); diff --git a/tests/scenarios/event-logs/dirty-index-requery.scenario.test.ts b/tests/scenarios/event-logs/dirty-index-requery.scenario.test.ts new file mode 100644 index 0000000..84601f1 --- /dev/null +++ b/tests/scenarios/event-logs/dirty-index-requery.scenario.test.ts @@ -0,0 +1,43 @@ +import { describe, it } from "vitest"; +import { buildEventLogsFixture } from "../helpers/api-fixtures"; +import { expectMutationResult, expectRowsEqual } from "../helpers/assertions"; +import { expectDirtyIndex, expectFreshIndex } from "../helpers/explain"; +import { deleteFromOracle, updateOracle } from "../helpers/oracle"; + +describe("event log dirty index requery endpoint scenarios", () => { + it("DELETE /events?severity=debug dirties indexes, then requery rebuilds and matches oracle", () => { + const { events, oracle } = buildEventLogsFixture(); + + const deleted = deleteFromOracle(oracle, (row) => row.severity === "debug"); + expectMutationResult(events.deleteMany({ severity: "debug" }), deleted); + + const query = events.where("timestamp", ">=", 1_710_145_000); + expectDirtyIndex(query, "sorted:timestamp"); + expectRowsEqual( + query.toArray(), + oracle.filter((row) => row.timestamp >= 1_710_145_000), + ); + expectFreshIndex(events.where("timestamp", ">=", 1_710_145_000), "sorted:timestamp"); + }); + + it("PATCH /events?service=worker marks slow worker events as warnings and keeps equality queries correct", () => { + const { events, oracle } = buildEventLogsFixture(); + + const updated = updateOracle( + oracle, + (row) => row.service === "worker" && row.durationMs >= 1_500, + { severity: "warn" }, + ); + expectMutationResult( + events.updateMany({ service: "worker", durationMs: { gte: 1_500 } }, { severity: "warn" }), + updated, + ); + + const query = events.where("severity", "=", "warn"); + expectDirtyIndex(query, "equality:severity"); + expectRowsEqual( + query.toArray(), + oracle.filter((row) => row.severity === "warn"), + ); + }); +}); diff --git a/tests/scenarios/event-logs/service-severity.scenario.test.ts b/tests/scenarios/event-logs/service-severity.scenario.test.ts new file mode 100644 index 0000000..50f7709 --- /dev/null +++ b/tests/scenarios/event-logs/service-severity.scenario.test.ts @@ -0,0 +1,18 @@ +import { describe, it } from "vitest"; +import { buildEventLogsFixture } from "../helpers/api-fixtures"; +import { expectRowsEqual } from "../helpers/assertions"; +import { expectFreshIndex } from "../helpers/explain"; + +describe("event log service and severity endpoint scenarios", () => { + it("GET /events?service=billing&severity=error uses equality index and matches oracle", () => { + const { events, oracle } = buildEventLogsFixture(); + + const query = events.where({ service: "billing", severity: "error" }); + const expected = oracle.filter( + (row) => row.service === "billing" && row.severity === "error", + ); + + expectFreshIndex(query, "equality:severity"); + expectRowsEqual(query.toArray(), expected); + }); +}); diff --git a/tests/scenarios/event-logs/time-range.scenario.test.ts b/tests/scenarios/event-logs/time-range.scenario.test.ts new file mode 100644 index 0000000..c67d0bf --- /dev/null +++ b/tests/scenarios/event-logs/time-range.scenario.test.ts @@ -0,0 +1,20 @@ +import { describe, it } from "vitest"; +import { buildEventLogsFixture } from "../helpers/api-fixtures"; +import { expectRowsEqual } from "../helpers/assertions"; +import { expectUsesIndex } from "../helpers/explain"; + +describe("event log time range endpoint scenarios", () => { + it("GET /events?from=&to= uses sorted timestamp index and matches oracle", () => { + const { events, oracle } = buildEventLogsFixture(); + const from = 1_710_120_000; + const to = 1_710_126_000; + + const query = events.where({ timestamp: { gte: from, lte: to } }); + const expected = oracle.filter( + (row) => row.timestamp >= from && row.timestamp <= to, + ); + + expectUsesIndex(query, "sorted:timestamp"); + expectRowsEqual(query.toArray(), expected); + }); +}); diff --git a/tests/scenarios/event-logs/top-bottom.scenario.test.ts b/tests/scenarios/event-logs/top-bottom.scenario.test.ts new file mode 100644 index 0000000..dc66aca --- /dev/null +++ b/tests/scenarios/event-logs/top-bottom.scenario.test.ts @@ -0,0 +1,32 @@ +import { describe, expect, it } from "vitest"; +import { buildEventLogsFixture } from "../helpers/api-fixtures"; +import { expectRowsEqual } from "../helpers/assertions"; +import { bottomBy, topBy } from "../helpers/oracle"; + +describe("event log top and bottom endpoint scenarios", () => { + it("GET /events/top?severity=error returns slowest error events by duration", () => { + const { events, oracle } = buildEventLogsFixture(); + + const filtered = oracle.filter((row) => row.severity === "error"); + expectRowsEqual( + events.where("severity", "=", "error").top(10, "durationMs"), + topBy(filtered, "durationMs", 10), + ); + }); + + it("GET /events/bottom?service=api returns fastest api events by duration", () => { + const { events, oracle } = buildEventLogsFixture(); + + const filtered = oracle.filter((row) => row.service === "api"); + const expected = bottomBy(filtered, "durationMs", 10); + const actual = events.where("service", "=", "api").bottom(10, "durationMs"); + const maxExpectedDuration = expected[expected.length - 1]?.durationMs ?? 0; + + expect(actual).toHaveLength(10); + expect(actual.every((row) => row.service === "api")).toBe(true); + expect(actual.map((row) => row.durationMs)).toEqual( + [...actual.map((row) => row.durationMs)].sort((left, right) => left - right), + ); + expect(actual.every((row) => row.durationMs <= maxExpectedDuration)).toBe(true); + }); +}); diff --git a/tests/scenarios/helpers/api-fixtures.ts b/tests/scenarios/helpers/api-fixtures.ts new file mode 100644 index 0000000..af2f7f4 --- /dev/null +++ b/tests/scenarios/helpers/api-fixtures.ts @@ -0,0 +1,61 @@ +import { fromRows } from "../../../src"; +import { + eventLogSchema, + makeEventLogs, + makeProducts, + makeSessions, + makeUsers, + productCatalogSchema, + sessionAnalyticsSchema, + userAnalyticsSchema, +} from "./datasets"; + +export function buildUserAnalyticsFixture(count = 5_000) { + const rows = makeUsers(count); + const users = fromRows(userAnalyticsSchema, rows) + .createUniqueIndex("id") + .createIndex("status") + .createIndex("segment") + .createSortedIndex("age") + .createSortedIndex("lastSeen"); + + return { users, oracle: rows.map((row) => ({ ...row })) }; +} + +export function buildEventLogsFixture(count = 5_000) { + const rows = makeEventLogs(count); + const events = fromRows(eventLogSchema, rows) + .createUniqueIndex("id") + .createIndex("severity") + .createIndex("service") + .createSortedIndex("timestamp") + .createSortedIndex("durationMs"); + + return { events, oracle: rows.map((row) => ({ ...row })) }; +} + +export function buildProductCatalogFixture(count = 4_000) { + const rows = makeProducts(count); + const products = fromRows(productCatalogSchema, rows) + .createUniqueIndex("id") + .createIndex("category") + .createIndex("status") + .createSortedIndex("price") + .createSortedIndex("rating") + .createSortedIndex("stock"); + + return { products, oracle: rows.map((row) => ({ ...row })) }; +} + +export function buildSessionAnalyticsFixture(count = 5_000) { + const rows = makeSessions(count); + const sessions = fromRows(sessionAnalyticsSchema, rows) + .createUniqueIndex("id") + .createIndex("status") + .createIndex("segment") + .createIndex("country") + .createSortedIndex("startedAt") + .createSortedIndex("durationMs"); + + return { sessions, oracle: rows.map((row) => ({ ...row })) }; +} diff --git a/tests/scenarios/helpers/assertions.ts b/tests/scenarios/helpers/assertions.ts new file mode 100644 index 0000000..4a0f382 --- /dev/null +++ b/tests/scenarios/helpers/assertions.ts @@ -0,0 +1,25 @@ +import { expect } from "vitest"; + +export function expectRowsEqual( + actual: readonly TRow[], + expected: readonly TRow[], +): void { + expect(actual).toEqual(expected); +} + +export function expectProjectedRows( + actual: readonly TRow[], + expected: readonly TRow[], +): void { + expect(actual).toEqual(expected); + for (const row of actual) { + expect(Object.keys(row).sort()).toEqual(Object.keys(expected[0] ?? row).sort()); + } +} + +export function expectMutationResult( + actual: { readonly affectedRows: number }, + expectedAffectedRows: number, +): void { + expect(actual).toEqual({ affectedRows: expectedAffectedRows }); +} diff --git a/tests/scenarios/helpers/datasets.ts b/tests/scenarios/helpers/datasets.ts new file mode 100644 index 0000000..39f2220 --- /dev/null +++ b/tests/scenarios/helpers/datasets.ts @@ -0,0 +1,86 @@ +import { column, type RowForSchema } from "../../../src"; + +export const userAnalyticsSchema = { + id: column.uint32(), + status: column.dictionary(["active", "inactive", "suspended"] as const), + segment: column.dictionary(["free", "pro", "enterprise"] as const), + age: column.uint8(), + score: column.uint32(), + lastSeen: column.uint32(), +}; + +export const eventLogSchema = { + id: column.uint32(), + timestamp: column.uint32(), + severity: column.dictionary(["debug", "info", "warn", "error"] as const), + service: column.dictionary(["api", "worker", "billing", "search"] as const), + durationMs: column.uint32(), +}; + +export const productCatalogSchema = { + id: column.uint32(), + category: column.dictionary(["books", "games", "tools", "apparel"] as const), + status: column.dictionary(["active", "inactive", "discontinued"] as const), + price: column.uint32(), + rating: column.uint8(), + stock: column.uint16(), +}; + +export const sessionAnalyticsSchema = { + id: column.uint32(), + userId: column.uint32(), + segment: column.dictionary(["free", "pro", "enterprise"] as const), + status: column.dictionary(["active", "expired", "revoked"] as const), + startedAt: column.uint32(), + durationMs: column.uint32(), + country: column.dictionary(["US", "TR", "DE", "GB"] as const), +}; + +export type UserAnalyticsRow = RowForSchema; +export type EventLogRow = RowForSchema; +export type ProductCatalogRow = RowForSchema; +export type SessionAnalyticsRow = RowForSchema; + +export function makeUsers(count = 5_000): UserAnalyticsRow[] { + return Array.from({ length: count }, (_unused, id) => ({ + id, + status: id % 11 === 0 ? "suspended" : id % 5 === 0 ? "inactive" : "active", + segment: id % 13 === 0 ? "enterprise" : id % 3 === 0 ? "pro" : "free", + age: 18 + ((id * 7) % 55), + score: (id * 37) % 10_000, + lastSeen: 1_700_000_000 + ((id * 97) % 120_000), + })); +} + +export function makeEventLogs(count = 5_000): EventLogRow[] { + return Array.from({ length: count }, (_unused, id) => ({ + id, + timestamp: 1_710_000_000 + id * 30, + severity: id % 17 === 0 ? "error" : id % 7 === 0 ? "warn" : id % 3 === 0 ? "debug" : "info", + service: id % 5 === 0 ? "billing" : id % 4 === 0 ? "search" : id % 2 === 0 ? "worker" : "api", + durationMs: 20 + ((id * 23) % 2_000), + })); +} + +export function makeProducts(count = 4_000): ProductCatalogRow[] { + return Array.from({ length: count }, (_unused, id) => ({ + id, + category: id % 7 === 0 ? "tools" : id % 5 === 0 ? "games" : id % 3 === 0 ? "apparel" : "books", + status: id % 19 === 0 ? "discontinued" : id % 6 === 0 ? "inactive" : "active", + price: 500 + ((id * 41) % 50_000), + rating: 1 + ((id * 3) % 5), + stock: (id * 11) % 500, + })); +} + +export function makeSessions(count = 5_000): SessionAnalyticsRow[] { + return Array.from({ length: count }, (_unused, id) => ({ + id, + userId: 1_000 + (id % 1_250), + segment: id % 17 === 0 ? "enterprise" : id % 4 === 0 ? "pro" : "free", + status: id % 23 === 0 ? "revoked" : id % 6 === 0 ? "expired" : "active", + startedAt: 1_720_000_000 + ((id * 45) % 200_000), + durationMs: 60_000 + ((id * 1_337) % 7_200_000), + country: id % 11 === 0 ? "TR" : id % 7 === 0 ? "DE" : id % 5 === 0 ? "GB" : "US", + })); +} diff --git a/tests/scenarios/helpers/explain.ts b/tests/scenarios/helpers/explain.ts new file mode 100644 index 0000000..33290c1 --- /dev/null +++ b/tests/scenarios/helpers/explain.ts @@ -0,0 +1,75 @@ +import { expect } from "vitest"; +import type { QueryExplainReasonCode } from "../../../src"; + +type Explainable = { + explain(): { + readonly scanType: "index" | "full"; + readonly indexesUsed: readonly string[]; + readonly indexState?: "fresh" | "dirty"; + readonly reasonCode?: QueryExplainReasonCode; + readonly projectionPushdown: boolean; + readonly candidateRows?: number; + }; +}; + +export function expectUsesIndex( + query: Explainable, + indexName: string, +): void { + expect(query.explain()).toEqual( + expect.objectContaining({ + scanType: "index", + indexesUsed: expect.arrayContaining([indexName]), + indexState: expect.any(String), + }), + ); +} + +export function expectFreshIndex( + query: Explainable, + indexName: string, +): void { + expect(query.explain()).toEqual( + expect.objectContaining({ + scanType: "index", + indexesUsed: expect.arrayContaining([indexName]), + indexState: "fresh", + }), + ); +} + +export function expectDirtyIndex( + query: Explainable, + indexName: string, +): void { + const explain = query.explain(); + expect(explain).toEqual( + expect.objectContaining({ + scanType: "index", + indexesUsed: expect.arrayContaining([indexName]), + indexState: "dirty", + reasonCode: "INDEX_DIRTY_WOULD_REBUILD_ON_EXECUTION", + }), + ); + expect(explain).not.toHaveProperty("candidateRows"); +} + +export function expectFullScanReason( + query: Explainable, + reasonCode: QueryExplainReasonCode, +): void { + expect(query.explain()).toEqual( + expect.objectContaining({ + scanType: "full", + reasonCode, + }), + ); +} + +export function expectProjectionPushdown(query: Explainable): void { + expect(query.explain()).toEqual( + expect.objectContaining({ + projectionPushdown: true, + }), + ); +} diff --git a/tests/scenarios/helpers/oracle.ts b/tests/scenarios/helpers/oracle.ts new file mode 100644 index 0000000..c38f115 --- /dev/null +++ b/tests/scenarios/helpers/oracle.ts @@ -0,0 +1,77 @@ +export function projectRows( + rows: readonly TRow[], + keys: readonly Key[], +): Pick[] { + return rows.map((row) => { + const projected: Partial> = {}; + for (const key of keys) { + projected[key] = row[key]; + } + return projected as Pick; + }); +} + +export function updateOracle( + rows: TRow[], + predicate: (row: TRow) => boolean, + patch: Partial, +): number { + let affectedRows = 0; + for (const row of rows) { + if (!predicate(row)) { + continue; + } + Object.assign(row as object, patch); + affectedRows += 1; + } + return affectedRows; +} + +export function deleteFromOracle( + rows: TRow[], + predicate: (row: TRow) => boolean, +): number { + let affectedRows = 0; + for (let index = rows.length - 1; index >= 0; index -= 1) { + if (!predicate(rows[index])) { + continue; + } + rows.splice(index, 1); + affectedRows += 1; + } + return affectedRows; +} + +export function topBy( + rows: readonly TRow[], + column: keyof TRow, + count: number, +): TRow[] { + return [...rows] + .sort((left, right) => Number(right[column]) - Number(left[column])) + .slice(0, count); +} + +export function bottomBy( + rows: readonly TRow[], + column: keyof TRow, + count: number, +): TRow[] { + return [...rows] + .sort((left, right) => Number(left[column]) - Number(right[column])) + .slice(0, count); +} + +export function sumBy( + rows: readonly TRow[], + column: keyof TRow, +): number { + return rows.reduce((total, row) => total + Number(row[column]), 0); +} + +export function avgBy( + rows: readonly TRow[], + column: keyof TRow, +): number | undefined { + return rows.length === 0 ? undefined : sumBy(rows, column) / rows.length; +} diff --git a/tests/scenarios/product-search/catalog-filters.scenario.test.ts b/tests/scenarios/product-search/catalog-filters.scenario.test.ts new file mode 100644 index 0000000..6c47283 --- /dev/null +++ b/tests/scenarios/product-search/catalog-filters.scenario.test.ts @@ -0,0 +1,26 @@ +import { describe, it } from "vitest"; +import { buildProductCatalogFixture } from "../helpers/api-fixtures"; +import { expectProjectedRows } from "../helpers/assertions"; +import { expectProjectionPushdown, expectUsesIndex } from "../helpers/explain"; +import { projectRows } from "../helpers/oracle"; + +describe("product search catalog filter endpoint scenarios", () => { + it("GET /products?category=tools&status=active uses equality index and projects results", () => { + const { products, oracle } = buildProductCatalogFixture(); + + const query = products + .where({ category: "tools", status: "active" }) + .select(["id", "price", "stock"]) + .limit(50); + const expected = projectRows( + oracle + .filter((row) => row.category === "tools" && row.status === "active") + .slice(0, 50), + ["id", "price", "stock"], + ); + + expectUsesIndex(query, "equality:category"); + expectProjectionPushdown(query); + expectProjectedRows(query.toArray(), expected); + }); +}); diff --git a/tests/scenarios/product-search/price-rating-ranges.scenario.test.ts b/tests/scenarios/product-search/price-rating-ranges.scenario.test.ts new file mode 100644 index 0000000..fd1a46e --- /dev/null +++ b/tests/scenarios/product-search/price-rating-ranges.scenario.test.ts @@ -0,0 +1,28 @@ +import { describe, it } from "vitest"; +import { buildProductCatalogFixture } from "../helpers/api-fixtures"; +import { expectRowsEqual } from "../helpers/assertions"; +import { expectUsesIndex } from "../helpers/explain"; + +describe("product search price and rating range endpoint scenarios", () => { + it("GET /products?priceMin=&priceMax=&rating>= uses sorted price index and matches oracle", () => { + const { products, oracle } = buildProductCatalogFixture(); + const minPrice = 10_000; + const maxPrice = 12_500; + + const query = products.where({ + price: { gte: minPrice, lte: maxPrice }, + rating: { gte: 4 }, + status: "active", + }); + const expected = oracle.filter( + (row) => + row.price >= minPrice && + row.price <= maxPrice && + row.rating >= 4 && + row.status === "active", + ); + + expectUsesIndex(query, "sorted:price"); + expectRowsEqual(query.toArray(), expected); + }); +}); diff --git a/tests/scenarios/product-search/stock-mutations.scenario.test.ts b/tests/scenarios/product-search/stock-mutations.scenario.test.ts new file mode 100644 index 0000000..c11a781 --- /dev/null +++ b/tests/scenarios/product-search/stock-mutations.scenario.test.ts @@ -0,0 +1,42 @@ +import { describe, it } from "vitest"; +import { buildProductCatalogFixture } from "../helpers/api-fixtures"; +import { expectMutationResult, expectRowsEqual } from "../helpers/assertions"; +import { expectDirtyIndex } from "../helpers/explain"; +import { deleteFromOracle, updateOracle } from "../helpers/oracle"; + +describe("product search stock mutation endpoint scenarios", () => { + it("PATCH /products/restock updates low-stock active products and preserves range query parity", () => { + const { products, oracle } = buildProductCatalogFixture(); + + const updated = updateOracle( + oracle, + (row) => row.status === "active" && row.stock < 5, + { stock: 50 }, + ); + expectMutationResult( + products.updateMany({ status: "active", stock: { lt: 5 } }, { stock: 50 }), + updated, + ); + + const query = products.where("stock", "<", 5); + expectDirtyIndex(query, "sorted:stock"); + expectRowsEqual( + query.toArray(), + oracle.filter((row) => row.stock < 5), + ); + }); + + it("DELETE /products?status=discontinued removes inactive catalog rows and keeps active search correct", () => { + const { products, oracle } = buildProductCatalogFixture(); + + const deleted = deleteFromOracle(oracle, (row) => row.status === "discontinued"); + expectMutationResult(products.deleteMany({ status: "discontinued" }), deleted); + + const query = products.where({ category: "books", status: "active" }); + expectDirtyIndex(query, "equality:category"); + expectRowsEqual( + query.toArray(), + oracle.filter((row) => row.category === "books" && row.status === "active"), + ); + }); +}); diff --git a/tests/scenarios/serialization/mutation-after-restore.scenario.test.ts b/tests/scenarios/serialization/mutation-after-restore.scenario.test.ts new file mode 100644 index 0000000..4edc699 --- /dev/null +++ b/tests/scenarios/serialization/mutation-after-restore.scenario.test.ts @@ -0,0 +1,32 @@ +import { describe, expect, it } from "vitest"; +import { table } from "../../../src"; +import { buildUserAnalyticsFixture } from "../helpers/api-fixtures"; +import { expectMutationResult, expectRowsEqual } from "../helpers/assertions"; +import { expectDirtyIndex } from "../helpers/explain"; +import { updateOracle } from "../helpers/oracle"; + +describe("serialization mutation after restore endpoint scenarios", () => { + it("PATCH /users after restore and reindex mutates correctly and preserves find-by-id", () => { + const { users, oracle } = buildUserAnalyticsFixture(); + const restored = table.deserialize(users.serialize()) as unknown as typeof users; + restored.createIndex("segment").createSortedIndex("age").createUniqueIndex("id"); + + const updated = updateOracle( + oracle, + (row) => row.segment === "free" && row.age < 25, + { segment: "pro" }, + ); + expectMutationResult( + restored.updateMany({ segment: "free", age: { lt: 25 } }, { segment: "pro" }), + updated, + ); + + const query = restored.where("segment", "=", "pro"); + expectDirtyIndex(query, "equality:segment"); + expectRowsEqual( + query.toArray(), + oracle.filter((row) => row.segment === "pro"), + ); + expect(restored.findBy("id", 42)).toEqual(oracle.find((row) => row.id === 42)); + }); +}); diff --git a/tests/scenarios/serialization/restore-and-reindex.scenario.test.ts b/tests/scenarios/serialization/restore-and-reindex.scenario.test.ts new file mode 100644 index 0000000..fae1a9f --- /dev/null +++ b/tests/scenarios/serialization/restore-and-reindex.scenario.test.ts @@ -0,0 +1,27 @@ +import { describe, it } from "vitest"; +import { table } from "../../../src"; +import { buildUserAnalyticsFixture } from "../helpers/api-fixtures"; +import { expectRowsEqual } from "../helpers/assertions"; +import { expectFreshIndex, expectFullScanReason } from "../helpers/explain"; + +describe("serialization restore and reindex endpoint scenarios", () => { + it("GET /users transitions from full scan to index usage after explicit reindex", () => { + const { users, oracle } = buildUserAnalyticsFixture(); + const restored = table.deserialize(users.serialize()) as unknown as typeof users; + + const before = restored.where("lastSeen", ">=", 1_700_110_000); + expectFullScanReason(before, "RANGE_QUERY_WITHOUT_SORTED_INDEX"); + expectRowsEqual( + before.toArray(), + oracle.filter((row) => row.lastSeen >= 1_700_110_000), + ); + + restored.createIndex("status").createSortedIndex("lastSeen").createUniqueIndex("id"); + const after = restored.where("lastSeen", ">=", 1_700_110_000); + expectFreshIndex(after, "sorted:lastSeen"); + expectRowsEqual( + after.toArray(), + oracle.filter((row) => row.lastSeen >= 1_700_110_000), + ); + }); +}); diff --git a/tests/scenarios/serialization/restore-without-indexes.scenario.test.ts b/tests/scenarios/serialization/restore-without-indexes.scenario.test.ts new file mode 100644 index 0000000..c513302 --- /dev/null +++ b/tests/scenarios/serialization/restore-without-indexes.scenario.test.ts @@ -0,0 +1,23 @@ +import { describe, expect, it } from "vitest"; +import { table } from "../../../src"; +import { buildUserAnalyticsFixture } from "../helpers/api-fixtures"; +import { expectRowsEqual } from "../helpers/assertions"; +import { expectFullScanReason } from "../helpers/explain"; + +describe("serialization restore without indexes endpoint scenarios", () => { + it("GET /users after restore is correct before reindex but explains no implicit indexes", () => { + const { users, oracle } = buildUserAnalyticsFixture(); + const restored = table.deserialize(users.serialize()) as unknown as typeof users; + + expect(restored.indexes()).toEqual([]); + expect(restored.sortedIndexes()).toEqual([]); + expect(restored.uniqueIndexes()).toEqual([]); + + const query = restored.where("status", "=", "suspended"); + expectFullScanReason(query, "NO_INDEX_FOR_COLUMN"); + expectRowsEqual( + query.toArray(), + oracle.filter((row) => row.status === "suspended"), + ); + }); +}); diff --git a/tests/scenarios/session-analytics/active-sessions.scenario.test.ts b/tests/scenarios/session-analytics/active-sessions.scenario.test.ts new file mode 100644 index 0000000..06aab22 --- /dev/null +++ b/tests/scenarios/session-analytics/active-sessions.scenario.test.ts @@ -0,0 +1,21 @@ +import { describe, expect, it } from "vitest"; +import { buildSessionAnalyticsFixture } from "../helpers/api-fixtures"; +import { expectRowsEqual } from "../helpers/assertions"; +import { expectUsesIndex } from "../helpers/explain"; + +describe("session analytics active session endpoint scenarios", () => { + it("GET /sessions?status=active&country=TR uses equality index and matches oracle", () => { + const { sessions, oracle } = buildSessionAnalyticsFixture(); + + const query = sessions.where({ status: "active", country: "TR" }); + const expected = oracle.filter( + (row) => row.status === "active" && row.country === "TR", + ); + + expectUsesIndex(query, "equality:country"); + expectRowsEqual(query.toArray(), expected); + expect(sessions.countWhere({ status: "active" })).toBe( + oracle.filter((row) => row.status === "active").length, + ); + }); +}); diff --git a/tests/scenarios/session-analytics/expire-sessions.scenario.test.ts b/tests/scenarios/session-analytics/expire-sessions.scenario.test.ts new file mode 100644 index 0000000..5bcdbb0 --- /dev/null +++ b/tests/scenarios/session-analytics/expire-sessions.scenario.test.ts @@ -0,0 +1,32 @@ +import { describe, expect, it } from "vitest"; +import { buildSessionAnalyticsFixture } from "../helpers/api-fixtures"; +import { expectMutationResult, expectRowsEqual } from "../helpers/assertions"; +import { expectDirtyIndex } from "../helpers/explain"; +import { updateOracle } from "../helpers/oracle"; + +describe("session analytics expire endpoint scenarios", () => { + it("PATCH /sessions/expire marks stale sessions inactive and keeps indexed queries correct", () => { + const { sessions, oracle } = buildSessionAnalyticsFixture(); + const cutoff = 1_720_060_000; + + const expired = updateOracle( + oracle, + (row) => row.status === "active" && row.startedAt < cutoff, + { status: "expired" }, + ); + expectMutationResult( + sessions.updateMany({ status: "active", startedAt: { lt: cutoff } }, { status: "expired" }), + expired, + ); + + const query = sessions.where("status", "=", "expired"); + expectDirtyIndex(query, "equality:status"); + expectRowsEqual( + query.toArray(), + oracle.filter((row) => row.status === "expired"), + ); + expect(sessions.avg("durationMs")).toBe( + oracle.reduce((total, row) => total + row.durationMs, 0) / oracle.length, + ); + }); +}); diff --git a/tests/scenarios/session-analytics/segment-analytics.scenario.test.ts b/tests/scenarios/session-analytics/segment-analytics.scenario.test.ts new file mode 100644 index 0000000..527f2bc --- /dev/null +++ b/tests/scenarios/session-analytics/segment-analytics.scenario.test.ts @@ -0,0 +1,21 @@ +import { describe, expect, it } from "vitest"; +import { buildSessionAnalyticsFixture } from "../helpers/api-fixtures"; +import { expectRowsEqual } from "../helpers/assertions"; +import { expectUsesIndex } from "../helpers/explain"; +import { avgBy, sumBy } from "../helpers/oracle"; + +describe("session analytics segment endpoint scenarios", () => { + it("GET /sessions/segments?segment=enterprise&status=active reports aggregate usage and parity", () => { + const { sessions, oracle } = buildSessionAnalyticsFixture(); + + const query = sessions.where({ segment: "enterprise", status: "active" }); + const expected = oracle.filter( + (row) => row.segment === "enterprise" && row.status === "active", + ); + + expectUsesIndex(query, "equality:segment"); + expectRowsEqual(query.toArray(), expected); + expect(query.sum("durationMs")).toBe(sumBy(expected, "durationMs")); + expect(query.avg("durationMs")).toBe(avgBy(expected, "durationMs")); + }); +}); diff --git a/tests/scenarios/user-analytics/delete-users.scenario.test.ts b/tests/scenarios/user-analytics/delete-users.scenario.test.ts new file mode 100644 index 0000000..58f410a --- /dev/null +++ b/tests/scenarios/user-analytics/delete-users.scenario.test.ts @@ -0,0 +1,29 @@ +import { describe, it } from "vitest"; +import { buildUserAnalyticsFixture } from "../helpers/api-fixtures"; +import { expectMutationResult, expectRowsEqual } from "../helpers/assertions"; +import { expectDirtyIndex } from "../helpers/explain"; +import { deleteFromOracle } from "../helpers/oracle"; + +describe("user analytics delete endpoint scenarios", () => { + it("DELETE /users?status=suspended&lastSeen { + const { users, oracle } = buildUserAnalyticsFixture(); + const cutoff = 1_700_040_000; + + const affectedRows = deleteFromOracle( + oracle, + (row) => row.status === "suspended" && row.lastSeen < cutoff, + ); + const result = users.deleteMany({ + status: "suspended", + lastSeen: { lt: cutoff }, + }); + + expectMutationResult(result, affectedRows); + const query = users.where("lastSeen", "<", cutoff); + expectDirtyIndex(query, "sorted:lastSeen"); + expectRowsEqual( + query.toArray(), + oracle.filter((row) => row.lastSeen < cutoff), + ); + }); +}); diff --git a/tests/scenarios/user-analytics/list-users.scenario.test.ts b/tests/scenarios/user-analytics/list-users.scenario.test.ts new file mode 100644 index 0000000..344dee2 --- /dev/null +++ b/tests/scenarios/user-analytics/list-users.scenario.test.ts @@ -0,0 +1,18 @@ +import { describe, it } from "vitest"; +import { buildUserAnalyticsFixture } from "../helpers/api-fixtures"; +import { expectRowsEqual } from "../helpers/assertions"; +import { expectFreshIndex } from "../helpers/explain"; + +describe("user analytics list endpoint scenarios", () => { + it("GET /users?status=suspended&segment=enterprise uses equality index and matches oracle", () => { + const { users, oracle } = buildUserAnalyticsFixture(); + + const query = users.where({ status: "suspended", segment: "enterprise" }); + const expected = oracle.filter( + (row) => row.status === "suspended" && row.segment === "enterprise", + ); + + expectFreshIndex(query, "equality:segment"); + expectRowsEqual(query.toArray(), expected); + }); +}); diff --git a/tests/scenarios/user-analytics/projection-pagination.scenario.test.ts b/tests/scenarios/user-analytics/projection-pagination.scenario.test.ts new file mode 100644 index 0000000..c2eb6b9 --- /dev/null +++ b/tests/scenarios/user-analytics/projection-pagination.scenario.test.ts @@ -0,0 +1,27 @@ +import { describe, it } from "vitest"; +import { buildUserAnalyticsFixture } from "../helpers/api-fixtures"; +import { expectProjectedRows } from "../helpers/assertions"; +import { expectProjectionPushdown, expectUsesIndex } from "../helpers/explain"; +import { projectRows } from "../helpers/oracle"; + +describe("user analytics projection and pagination endpoint scenarios", () => { + it("GET /users?segment=enterprise&age>=45&limit=25&offset=5 projects only requested fields", () => { + const { users, oracle } = buildUserAnalyticsFixture(); + + const query = users + .where({ segment: "enterprise", age: { gte: 45 } }) + .select(["id", "age", "score"]) + .offset(5) + .limit(25); + const expected = projectRows( + oracle + .filter((row) => row.segment === "enterprise" && row.age >= 45) + .slice(5, 30), + ["id", "age", "score"], + ); + + expectUsesIndex(query, "equality:segment"); + expectProjectionPushdown(query); + expectProjectedRows(query.toArray(), expected); + }); +}); diff --git a/tests/scenarios/user-analytics/update-users.scenario.test.ts b/tests/scenarios/user-analytics/update-users.scenario.test.ts new file mode 100644 index 0000000..45425d5 --- /dev/null +++ b/tests/scenarios/user-analytics/update-users.scenario.test.ts @@ -0,0 +1,29 @@ +import { describe, it } from "vitest"; +import { buildUserAnalyticsFixture } from "../helpers/api-fixtures"; +import { expectMutationResult, expectRowsEqual } from "../helpers/assertions"; +import { expectDirtyIndex } from "../helpers/explain"; +import { updateOracle } from "../helpers/oracle"; + +describe("user analytics update endpoint scenarios", () => { + it("PATCH /users?status=inactive&age>=60 updates a subset and keeps indexed queries correct", () => { + const { users, oracle } = buildUserAnalyticsFixture(); + + const affectedRows = updateOracle( + oracle, + (row) => row.status === "inactive" && row.age >= 60, + { status: "active", score: 9_999 }, + ); + const result = users.updateMany( + { status: "inactive", age: { gte: 60 } }, + { status: "active", score: 9_999 }, + ); + + expectMutationResult(result, affectedRows); + const query = users.where("status", "=", "active"); + expectDirtyIndex(query, "equality:status"); + expectRowsEqual( + query.toArray(), + oracle.filter((row) => row.status === "active"), + ); + }); +}); diff --git a/tests/type-inference.test-d.ts b/tests/type-inference.test-d.ts index 3158766..4fbd244 100644 --- a/tests/type-inference.test-d.ts +++ b/tests/type-inference.test-d.ts @@ -1,5 +1,5 @@ import { column, fromRows, table } from "../src"; -import type { MutationResult, QueryInfo, UniqueIndexStats } from "../src"; +import type { MutationResult, QueryExplainPlan, QueryExplainReasonCode, QueryInfo, UniqueIndexStats } from "../src"; const users = table({ id: column.uint32(), @@ -39,6 +39,11 @@ users.where({ status: { eq: "passive", in: ["active"] }, is_active: { eq: false, users.where("age", ">", 18).where({ status: "active" }).select(["id"]); users.filter((row) => row.age > 18).where({ status: "active" }).toArray(); users.where({ status: "active" }).filter((row) => row.is_active).select(["id"]); +const explainPlan: QueryExplainPlan = users.where("id", "=", 1).select(["id"]).explain(); +const explainReasonCode: QueryExplainReasonCode | undefined = explainPlan.reasonCode; +const explainScanType: "index" | "full" = explainPlan.scanType; +void explainReasonCode; +void explainScanType; table(users.getSchema(), { onQuery(info: QueryInfo) { const duration: number = info.duration;