From 6c8ce99325202ad24e0c9afeec5e6082d650b4f5 Mon Sep 17 00:00:00 2001 From: emremy Date: Sat, 9 May 2026 02:22:04 +0300 Subject: [PATCH] feat: add v0.5.0 trust and stability hardening Add the v0.5.0 hardening release focused on correctness, API stability, serialization safety, and performance predictability. Highlights: - add type-test gate with dedicated tsconfig and CI wiring - implement column-scoped index invalidation for updates - avoid dirtying unrelated equality/sorted/unique indexes on non-indexed updates - keep deletes broadly dirtying row-position-sensitive indexes - harden serialization/deserialization validation - introduce numeric wire-format version independent from package version - reject malformed serialized metadata, corrupted payloads, and serialized indexes - wrap restore-time reconstruction failures as COLQL_INVALID_SERIALIZED_DATA - add deterministic mutation/index/serialization fuzzer - extend QueryInfo and explain diagnostics additively - mark unstable diagnostic surfaces as internal - add CodSpeed benchmark cases for no-rebuild vs lazy-rebuild paths - update README/docs with v0.5 behavior, serialization policy, type gate, and release checklist --- .github/workflows/ci.yml | 3 + README.md | 4 +- benchmarks/codspeed/mutation.bench.ts | 11 + docs/doc/01-installation.md | 2 + docs/doc/06-indexing.md | 6 +- docs/doc/07-sorted-indexes.md | 2 +- docs/doc/08-mutations.md | 6 +- docs/doc/11-serialization.md | 14 +- docs/doc/12-memory-model.md | 2 +- docs/doc/13-performance-and-benchmarks.md | 30 ++ docs/doc/14-typescript-type-safety.md | 8 +- .../15-limitations-and-design-decisions.md | 2 +- docs/doc/16-api-reference.md | 14 +- package-lock.json | 4 +- package.json | 4 +- src/indexing/index-manager.ts | 54 ++- src/query.ts | 83 +++- src/table.ts | 403 ++++++++++++++++-- src/types.ts | 11 + .../column-scoped-index-invalidation.test.ts | 140 ++++++ tests/deterministic-mutation-fuzzer.test.ts | 279 ++++++++++++ tests/on-query.test.ts | 48 ++- tests/query-explain.test.ts | 11 +- .../dirty-index-lifecycle.scenario.test.ts | 5 +- tests/serialization-validation.test.ts | 215 +++++++++- tests/type-inference.test-d.ts | 10 + tsconfig.type-tests.json | 7 + 27 files changed, 1276 insertions(+), 102 deletions(-) create mode 100644 tests/column-scoped-index-invalidation.test.ts create mode 100644 tests/deterministic-mutation-fuzzer.test.ts create mode 100644 tsconfig.type-tests.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4588953..ef9bc57 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,5 +28,8 @@ jobs: - name: Build run: npm run build + - name: Type tests + run: npm run test:types + - name: Test run: npm test diff --git a/README.md b/README.md index b5b9cae..e1966e3 100644 --- a/README.md +++ b/README.md @@ -222,7 +222,9 @@ try { ```sh npm install +npm run check npm test +npm run test:types npm run build npm run bench:codspeed npm run benchmark:memory @@ -238,7 +240,7 @@ npm run benchmark:session-analytics ## Status -ColQL v0.4.x introduces public query diagnostics and continues moving toward API stabilization, but breaking changes may still happen before 1.0.0. The API is not fully frozen. +ColQL v0.5.x focuses on trust and stability hardening: narrower index invalidation after updates, stricter snapshot deserialization, a type-test gate for public TypeScript behavior, and clearer diagnostics. Breaking changes may still happen before 1.0.0, but the project is moving toward API stabilization. ## Limitations diff --git a/benchmarks/codspeed/mutation.bench.ts b/benchmarks/codspeed/mutation.bench.ts index eb27efd..af525fe 100644 --- a/benchmarks/codspeed/mutation.bench.ts +++ b/benchmarks/codspeed/mutation.bench.ts @@ -8,6 +8,7 @@ import { let lazyRebuildTenantId = 62; let batchScore = 25; +let unrelatedDuration = 30_000; describe("mutation", () => { bench("mutation/updateBy/single/10k", () => { @@ -46,4 +47,14 @@ describe("mutation", () => { mediumSessions.indexed.updateBy("id", 7_501, { tenantId: lazyRebuildTenantId }); mediumSessions.indexed.where("tenantId", "=", lazyRebuildTenantId).count(); }); + + bench("index/no-rebuild/after-unindexed-column-mutation/10k", () => { + unrelatedDuration = unrelatedDuration === 30_000 ? 45_000 : 30_000; + mediumSessions.indexed.updateBy("id", 7_502, { durationMs: unrelatedDuration }); + mediumSessions.indexed.where("tenantId", "=", dashboardTenantId).count(); + }); + + bench("index/requery/after-lazy-rebuild/10k", () => { + mediumSessions.indexed.where("tenantId", "=", lazyRebuildTenantId).count(); + }); }); diff --git a/docs/doc/01-installation.md b/docs/doc/01-installation.md index 41e6282..ff52921 100644 --- a/docs/doc/01-installation.md +++ b/docs/doc/01-installation.md @@ -55,7 +55,9 @@ When contributing locally: ```sh npm install +npm run check npm test +npm run test:types npm run build ``` diff --git a/docs/doc/06-indexing.md b/docs/doc/06-indexing.md index 97209e4..8caaf00 100644 --- a/docs/doc/06-indexing.md +++ b/docs/doc/06-indexing.md @@ -62,7 +62,11 @@ users.where("status", "in", ["active", "passive"]).count(); ## Dirty and Lazy Rebuilds -Inserts, deletes, and updates can change internal row positions or indexed values. Row positions are not stable IDs and should not be used as external identifiers. If stable identity is required, define and index an ID column. ColQL marks existing indexes dirty after nonzero mutations. When an indexed query requires a dirty index, ColQL rebuilds it before use. The first indexed query after a mutation may be slower than later queries. +Inserts, deletes, and updates can change internal row positions or indexed values. Row positions are not stable IDs and should not be used as external identifiers. If stable identity is required, define and index an ID column. + +Updates dirty only indexes whose indexed columns changed. Updating an unrelated column does not dirty equality or sorted indexes for other columns. Deletes still dirty equality and sorted indexes broadly because physical row positions shift. Inserts update clean equality indexes incrementally and mark sorted indexes dirty. + +When an indexed query requires a dirty index, ColQL rebuilds it before use. The first indexed query after a relevant indexed-column mutation may be slower than later queries. You can rebuild explicitly: diff --git a/docs/doc/07-sorted-indexes.md b/docs/doc/07-sorted-indexes.md index e86fa7e..e46e6c4 100644 --- a/docs/doc/07-sorted-indexes.md +++ b/docs/doc/07-sorted-indexes.md @@ -53,7 +53,7 @@ Candidate row positions are returned in scan order so query output preserves log ## Dirty and Lazy Rebuilds -Sorted indexes are marked dirty after inserts, deletes, and updates. When an indexed query requires a dirty sorted index, ColQL rebuilds it before use. Dirty sorted indexes are not used to return stale results. You can also rebuild eagerly with: +Sorted indexes are marked dirty after inserts, deletes, and updates to that sorted column. Updates to unrelated columns do not dirty the sorted index. Deletes still dirty sorted indexes because physical row positions shift. When an indexed query requires a dirty sorted index, ColQL rebuilds it before use. Dirty sorted indexes are not used to return stale results. You can also rebuild eagerly with: ```ts users.rebuildSortedIndex("age"); diff --git a/docs/doc/08-mutations.md b/docs/doc/08-mutations.md index c4ce6ef..0feaf99 100644 --- a/docs/doc/08-mutations.md +++ b/docs/doc/08-mutations.md @@ -117,10 +117,10 @@ ColQL applies mutation safety rules internally: - unique-index violations are checked before writing and keep bulk updates all-or-nothing - predicate deletes delete matched row indexes from highest to lowest - no-match predicate update/delete returns `{ affectedRows: 0 }` -- nonzero update/delete mutations mark existing indexes dirty -- incremental index maintenance is not attempted +- nonzero updates dirty only indexes for columns changed by the patch +- deletes dirty equality and sorted indexes broadly because row positions shift -Dirty indexes are rebuilt before an indexed query uses them, so index dirtiness affects rebuild cost, not query correctness. +Dirty indexes are rebuilt before an indexed query uses them, so index dirtiness affects rebuild cost, not query correctness. Updating an unindexed column should not make later indexed reads pay a lazy rebuild. Unique indexes are stricter than equality and sorted indexes. They enforce uniqueness for indexed columns and reject duplicate-producing inserts or updates with `COLQL_DUPLICATE_KEY`. Deletes free unique keys for reuse. diff --git a/docs/doc/11-serialization.md b/docs/doc/11-serialization.md index 490a583..7cbaea2 100644 --- a/docs/doc/11-serialization.md +++ b/docs/doc/11-serialization.md @@ -1,6 +1,6 @@ # Serialization -ColQL can serialize a table to an `ArrayBuffer`: +ColQL can serialize a process-local table snapshot to an `ArrayBuffer`: ```ts const buffer = users.serialize(); @@ -9,7 +9,7 @@ const restored = table.deserialize(buffer); ## What Is Serialized -Serialization stores: +Snapshot serialization stores: - schema metadata - row count @@ -18,7 +18,7 @@ Serialization stores: - dictionary column codes and dictionary values - boolean bit storage -Serialization does not materialize row objects. +Serialization does not materialize row objects. It is not durable storage, a database file format, or a cross-process coordination mechanism. ## What Is Not Serialized @@ -79,6 +79,12 @@ console.log(restored.toArray()); ## Validation -Deserialization validates the input buffer shape, magic header, version, metadata, and column payload sizes. Invalid input throws `ColQLError` with `COLQL_INVALID_SERIALIZED_DATA`. +Deserialization validates the input buffer shape, magic header, wire-format version, metadata, column names, row-count/capacity relationship, payload offsets, alignment, payload lengths, dictionary values, and dictionary codes. Invalid input throws `ColQLError` with `COLQL_INVALID_SERIALIZED_DATA`. + +## Wire Format Policy + +The serialized wire-format version is independent from the npm package version. Patch and minor releases should preserve the current wire format when possible, but ColQL is still pre-1.0 and unsupported snapshot versions fail loudly with `COLQL_INVALID_SERIALIZED_DATA`. Snapshots produced by v0.4.x are not guaranteed to be compatible with v0.5.0. + +Indexes are never trusted from serialized input. If future metadata contains serialized index state, current ColQL versions reject it rather than loading stale derived row positions. See [Error Handling](./10-error-handling.md) and [Indexing](./06-indexing.md). diff --git a/docs/doc/12-memory-model.md b/docs/doc/12-memory-model.md index 24aadb3..c6ba8b1 100644 --- a/docs/doc/12-memory-model.md +++ b/docs/doc/12-memory-model.md @@ -91,7 +91,7 @@ For very broad changes, expect temporary row-index snapshot memory. - Use indexes for selective hot queries. - Avoid indexes for columns with low selectivity unless queries prove useful. - Drop indexes to recover derived-memory overhead. -- Expect the first indexed query after mutation to include lazy rebuild cost if the needed index is dirty. +- Expect the first indexed query after a relevant indexed-column mutation to include lazy rebuild cost if the needed index is dirty. Updates to unrelated columns should not dirty unrelated indexes. - Avoid `toArray()` for huge result sets when counting or streaming is enough. - Remember that `heapUsed` alone can under-report typed-array storage; inspect `arrayBuffers` too. diff --git a/docs/doc/13-performance-and-benchmarks.md b/docs/doc/13-performance-and-benchmarks.md index 74156c3..b03aefd 100644 --- a/docs/doc/13-performance-and-benchmarks.md +++ b/docs/doc/13-performance-and-benchmarks.md @@ -58,6 +58,8 @@ These benchmarks live in `benchmarks/codspeed/` and intentionally use smaller de CodSpeed covers representative equality-index, `in`, sorted-range, compound filter, projection pushdown, larger filtered materialization, aggregation, scan fallback, mutation, lazy index rebuild, serialization, and backend-style dashboard query scenarios. +For v0.5.x, CodSpeed also tracks the difference between updates that touch indexed columns and updates that touch unrelated columns. Updating an unrelated column should not force a later indexed query to pay lazy rebuild cost. + Treat CodSpeed results as PR-level regression signals, not absolute production throughput claims. The existing manual benchmarks remain the source for larger local runs, memory analysis, 1M-row comparisons, and workload-specific investigation. Setup is excluded from measured hot paths where doing so keeps the benchmark valid. Some destructive mutation benchmarks cannot safely reuse a table after each iteration, so they include fresh table setup by design; those benchmarks include `setup-inclusive` in their names. High-RME benchmarks should be treated carefully and improved before they are used as release claims. @@ -154,6 +156,34 @@ tracked total = heapUsed + arrayBuffers Use tracked total when comparing ColQL storage with object arrays. +## Release Benchmark Checklist + +Before a release, run the correctness gates first: + +```sh +npm run build +npm run test:types +npm test +npm run bench:codspeed +``` + +Then run the local/manual benchmark suite as release evidence, not hard pass/fail thresholds: + +```sh +npm run benchmark:memory +npm run benchmark:query +npm run benchmark:indexed +npm run benchmark:range +npm run benchmark:optimizer +npm run benchmark:serialization +npm run benchmark:delete +npm run benchmark:physical-delete +npm run benchmark:array-comparison +ROWS=100000 npm run benchmark:session-analytics +``` + +Use `COLQL_BENCH_LARGE=1` for indexed and range benchmarks when checking larger local datasets. Memory-sensitive release notes should include `heapUsed`, `rss`, `external`, `arrayBuffers`, and tracked total when those metrics are available. + In a local stabilization run on 2026-04-29, `benchmark:memory` reported for 100,000 rows: | Storage | heapUsed | arrayBuffers | tracked total | diff --git a/docs/doc/14-typescript-type-safety.md b/docs/doc/14-typescript-type-safety.md index 6c77b34..06b8135 100644 --- a/docs/doc/14-typescript-type-safety.md +++ b/docs/doc/14-typescript-type-safety.md @@ -158,4 +158,10 @@ Structured helper predicates use `where(...)`; callback predicates use `filter(f ## Type Tests -The repository includes `tests/type-inference.test-d.ts` with `@ts-expect-error` examples. These are useful references for the intended type surface. +The repository includes `tests/type-inference.test-d.ts` with positive inference checks and `@ts-expect-error` examples. These are part of the release gate: + +```sh +npm run test:types +``` + +This protects the public TypeScript surface for predicates, projections, mutations, unique indexes, `query.explain()`, and `onQuery`. diff --git a/docs/doc/15-limitations-and-design-decisions.md b/docs/doc/15-limitations-and-design-decisions.md index 26cc3c7..b1b7ef2 100644 --- a/docs/doc/15-limitations-and-design-decisions.md +++ b/docs/doc/15-limitations-and-design-decisions.md @@ -2,7 +2,7 @@ ColQL intentionally keeps a narrow, explicit feature set. -ColQL aims to keep the public API reasonably stable, and v0.4.x adds a public diagnostics API with `query.explain()`. Breaking changes may still happen before 1.0.0; the API is not fully frozen. +ColQL aims to keep the public API reasonably stable, and v0.5.x continues hardening public diagnostics, serialization validation, and type/API gates. Breaking changes may still happen before 1.0.0; the API is not fully frozen. ## Not Included diff --git a/docs/doc/16-api-reference.md b/docs/doc/16-api-reference.md index 6f33742..55b9b79 100644 --- a/docs/doc/16-api-reference.md +++ b/docs/doc/16-api-reference.md @@ -40,8 +40,18 @@ const restored = table.deserialize(buffer); ```ts type QueryInfo = { duration: number; + durationMs?: number; rowsScanned: number; indexUsed: boolean; + scanType?: "index" | "full"; + selectedIndex?: string; + reasonCode?: QueryExplainReasonCode; + candidateRows?: number; + materializedRows?: number; + resultCount?: number; + projectionPushdown?: boolean; + dirtyIndexRebuildPaid?: boolean; + dirtyIndexReason?: "equality" | "sorted" | "unique"; }; type QueryHook = (info: QueryInfo) => void; @@ -234,6 +244,7 @@ type QueryExplainReasonCode = type QueryExplainPlan = { scanType: "index" | "full"; indexesUsed: readonly string[]; + selectedIndex?: string; predicates: number; predicateOrder: readonly string[]; projectionPushdown: boolean; @@ -248,6 +259,7 @@ Fields: - `scanType`: whether execution is expected to use an index or full scan. - `indexesUsed`: selected index labels such as `equality:status` or `sorted:startedAt`. +- `selectedIndex`: the selected index label when an index plan is expected. - `predicates`: structured predicates plus callback predicates. - `predicateOrder`: structured predicate evaluation order after planner ordering. - `projectionPushdown`: `true` when `select(...)` limits materialized columns. @@ -371,7 +383,7 @@ users.getIndexedCandidatePlan(filters); users.getIndexDebugPlan(filters); ``` -Use `query.explain()` for public query diagnostics. Queries still expose `__debugPlan()` for internal tests and low-level debugging, but application code should not depend on it as a stable planning contract. +Use `query.explain()` for stable public query diagnostics. The scan/materialization counters and typed reads are advanced diagnostics. `getIndexedCandidatePlan()`, `getIndexDebugPlan()`, and query `__debugPlan()` are unstable internal diagnostics retained for tests and low-level debugging; application code should not depend on them as stable planning contracts. ## Errors diff --git a/package-lock.json b/package-lock.json index 9da97be..bc66d03 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@colql/colql", - "version": "0.4.0", + "version": "0.5.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@colql/colql", - "version": "0.4.0", + "version": "0.5.0", "license": "MIT", "devDependencies": { "@codspeed/vitest-plugin": "^5.4.0", diff --git a/package.json b/package.json index 73e153a..4508572 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@colql/colql", - "version": "0.4.0", + "version": "0.5.0", "description": "Memory-efficient in-memory columnar query engine for TypeScript", "main": "dist/index.js", "module": "dist/index.mjs", @@ -12,6 +12,8 @@ "build": "tsup src/index.ts --format cjs,esm --dts", "dev": "tsup src/index.ts --watch", "test": "vitest run", + "test:types": "tsc --noEmit -p tsconfig.type-tests.json", + "check": "npm run build && npm run test:types && npm test", "bench:codspeed": "vitest bench --run --config vitest.bench.config.mts", "benchmark:memory": "node --expose-gc benchmarks/memory.mjs", "benchmark:query": "node benchmarks/query.mjs", diff --git a/src/indexing/index-manager.ts b/src/indexing/index-manager.ts index df95853..2b9977d 100644 --- a/src/indexing/index-manager.ts +++ b/src/indexing/index-manager.ts @@ -96,7 +96,7 @@ export class IndexManager { private readonly indexesByColumn = new Map(); private readonly sortedIndexesByColumn = new Map(); private readonly uniqueIndexesByColumn = new Map(); - private equalityDirty = false; + private readonly dirtyEqualityColumns = new Set(); create( columnName: string, @@ -121,6 +121,7 @@ export class IndexManager { if (!this.indexesByColumn.delete(columnName)) { throw new ColQLError("COLQL_INDEX_NOT_FOUND", `Index not found for column "${columnName}".`); } + this.dirtyEqualityColumns.delete(columnName); } has(columnName: string): boolean { @@ -144,8 +145,9 @@ export class IndexManager { throw new ColQLError("COLQL_INDEX_NOT_FOUND", `Index not found for column "${columnName}".`); } - if (this.equalityDirty) { - this.rebuildEqualityIndexes(rowCount, readComparableValue); + if (this.dirtyEqualityColumns.has(columnName)) { + this.indexesByColumn.set(columnName, this.buildEqualityIndex(columnName, rowCount, readComparableValue)); + this.dirtyEqualityColumns.delete(columnName); return; } @@ -162,7 +164,7 @@ export class IndexManager { } addRow(columnName: string, value: number | boolean, rowIndex: number): void { - if (this.equalityDirty) { + if (this.dirtyEqualityColumns.has(columnName)) { return; } @@ -323,14 +325,36 @@ export class IndexManager { } } - markPerformanceDirty(): void { - if (this.indexesByColumn.size > 0) { - this.equalityDirty = true; + markSortedColumnsDirty(columns: readonly string[]): void { + for (const column of columns) { + this.sortedIndexesByColumn.get(column)?.markDirty(); + } + } + + markEqualityDirty(): void { + for (const column of this.indexesByColumn.keys()) { + this.dirtyEqualityColumns.add(column); + } + } + + markEqualityColumnsDirty(columns: readonly string[]): void { + for (const column of columns) { + if (this.indexesByColumn.has(column)) { + this.dirtyEqualityColumns.add(column); + } } + } + markPerformanceDirty(): void { + this.markEqualityDirty(); this.markSortedDirty(); } + markPerformanceColumnsDirty(columns: readonly string[]): void { + this.markEqualityColumnsDirty(columns); + this.markSortedColumnsDirty(columns); + } + markUniqueDirty(columns?: readonly string[]): void { const indexes = columns === undefined ? [...this.uniqueIndexesByColumn.values()] @@ -513,7 +537,7 @@ export class IndexManager { for (const filter of filters) { if ( (filter.operator === "=" || filter.operator === "in") && - this.equalityDirty && + this.dirtyEqualityColumns.has(filter.columnName) && this.indexesByColumn.has(filter.columnName) ) { return { @@ -630,12 +654,18 @@ export class IndexManager { rowCount: number, readComparableValue?: (rowIndex: number, columnName: string) => number | boolean, ): void { - if (!this.equalityDirty || readComparableValue === undefined) { + if (this.dirtyEqualityColumns.size === 0 || readComparableValue === undefined) { return; } - this.rebuildEqualityIndexes(rowCount, readComparableValue); - this.equalityDirty = false; + for (const column of [...this.dirtyEqualityColumns]) { + if (!this.indexesByColumn.has(column)) { + this.dirtyEqualityColumns.delete(column); + continue; + } + this.indexesByColumn.set(column, this.buildEqualityIndex(column, rowCount, readComparableValue)); + this.dirtyEqualityColumns.delete(column); + } } private rebuildEqualityIndexes( @@ -648,7 +678,7 @@ export class IndexManager { this.indexesByColumn.set(column, this.buildEqualityIndex(column, rowCount, readComparableValue)); } - this.equalityDirty = false; + this.dirtyEqualityColumns.clear(); } private rebuildUniqueIfDirty( diff --git a/src/query.ts b/src/query.ts index 95b496d..dd32e19 100644 --- a/src/query.ts +++ b/src/query.ts @@ -137,7 +137,7 @@ export class Query implements Iterable toArray(): TResult[] { if (this.source.hasQueryHook()) { - return this.runTerminal(() => this.collectArray()); + return this.runTerminal(() => this.collectArray(), (result) => result.length); } return this.collectArray(); @@ -151,7 +151,10 @@ export class Query implements Iterable first(): TResult | undefined { if (this.source.hasQueryHook()) { - return this.runTerminal(() => this.firstUninstrumented()); + return this.runTerminal( + () => this.firstUninstrumented(), + (result) => result === undefined ? 0 : 1, + ); } return this.firstUninstrumented(); @@ -216,7 +219,7 @@ export class Query implements Iterable count(): number { if (this.source.hasQueryHook()) { - return this.runTerminal(() => this.countUninstrumented()); + return this.runTerminal(() => this.countUninstrumented(), (result) => result); } return this.countUninstrumented(); @@ -416,7 +419,10 @@ export class Query implements Iterable assertPositiveInteger(n, "top"); this.assertNumericColumn(columnName); if (this.source.hasQueryHook()) { - return this.runTerminal(() => this.topOrBottom(n, columnName, "top")); + return this.runTerminal( + () => this.topOrBottom(n, columnName, "top"), + (result) => result.length, + ); } return this.topOrBottom(n, columnName, "top"); @@ -426,7 +432,10 @@ export class Query implements Iterable assertPositiveInteger(n, "bottom"); this.assertNumericColumn(columnName); if (this.source.hasQueryHook()) { - return this.runTerminal(() => this.topOrBottom(n, columnName, "bottom")); + return this.runTerminal( + () => this.topOrBottom(n, columnName, "bottom"), + (result) => result.length, + ); } return this.topOrBottom(n, columnName, "bottom"); @@ -434,7 +443,10 @@ export class Query implements Iterable update(partialRow: Partial>): MutationResult { if (this.source.hasQueryHook()) { - return this.runTerminal(() => this.updateUninstrumented(partialRow)); + return this.runTerminal( + () => this.updateUninstrumented(partialRow), + (result) => result.affectedRows, + ); } return this.updateUninstrumented(partialRow); @@ -442,7 +454,10 @@ export class Query implements Iterable delete(): MutationResult { if (this.source.hasQueryHook()) { - return this.runTerminal(() => this.deleteUninstrumented()); + return this.runTerminal( + () => this.deleteUninstrumented(), + (result) => result.affectedRows, + ); } return this.deleteUninstrumented(); @@ -493,6 +508,9 @@ export class Query implements Iterable scanType: plan.mode === "index" ? "index" : "full", indexesUsed: plan.mode === "index" ? [`${plan.source}:${plan.column}`] : [], + ...(plan.mode === "index" + ? { selectedIndex: `${plan.source}:${plan.column}` } + : {}), predicates, predicateOrder, projectionPushdown: this.selectedColumns !== undefined, @@ -505,6 +523,10 @@ export class Query implements Iterable }; } + /** + * @internal Unstable diagnostic surface retained for tests and low-level debugging. + * Application code should use explain(). + */ __debugPlan(): ReturnType["getIndexDebugPlan"]> { // Internal diagnostics retained for existing tests/debugging. Application // code should use the public explain() contract instead. @@ -641,21 +663,58 @@ export class Query implements Iterable return (this.source as unknown as MutationSource).deleteRows(this.snapshotMatchingRowIndexes()); } - private runTerminal(operation: () => T): T { + private runTerminal( + operation: () => T, + resultCount?: (result: T) => number | undefined, + ): T { const startScannedRows = this.source.scannedRowCount; + const startMaterializedRows = this.source.materializedRowCount; const start = Date.now(); - const indexUsed = this.usesIndexPlan(); + const plan = this.executionExplainPlan(); + const selectedIndex = + plan?.mode === "index" ? `${plan.source}:${plan.column}` : undefined; + const indexUsed = plan?.mode === "index"; + const dirtyIndexRebuildPaid = + plan?.mode === "index" && + plan.indexState === "dirty" && + plan.reasonCode === "INDEX_DIRTY_WOULD_REBUILD_ON_EXECUTION"; const result = operation(); + const duration = Date.now() - start; + const materializedRows = + this.source.materializedRowCount - startMaterializedRows; + const terminalResultCount = resultCount?.(result); this.source.notifyQuery({ - duration: Date.now() - start, + duration, + durationMs: duration, rowsScanned: this.source.scannedRowCount - startScannedRows, indexUsed, + scanType: indexUsed ? "index" : "full", + ...(selectedIndex !== undefined ? { selectedIndex } : {}), + ...(plan?.reasonCode !== undefined ? { reasonCode: plan.reasonCode } : {}), + ...(plan?.candidateCount !== undefined + ? { candidateRows: plan.candidateCount } + : {}), + materializedRows, + ...(terminalResultCount !== undefined + ? { resultCount: terminalResultCount } + : {}), + projectionPushdown: this.selectedColumns !== undefined, + dirtyIndexRebuildPaid, + ...(dirtyIndexRebuildPaid && plan?.mode === "index" + ? { dirtyIndexReason: plan.source } + : {}), }); return result; } - private usesIndexPlan(): boolean { - return this.rowPredicates.length === 0 && this.source.getIndexDebugPlan(this.filters).mode === "index"; + private executionExplainPlan(): IndexExplainPlan | undefined { + if (this.rowPredicates.length > 0) { + return undefined; + } + + return (this.source as unknown as ExplainPlanSource).getIndexExplainPlan( + this.filters, + ); } private reasonFor(reasonCode: QueryExplainReasonCode): string { diff --git a/src/table.ts b/src/table.ts index f2efb2f..1c9d92a 100644 --- a/src/table.ts +++ b/src/table.ts @@ -16,6 +16,7 @@ import type { SortedIndexStats } from "./indexing/sorted-index"; import type { UniqueIndexStats } from "./indexing/unique-index"; import { assertColumnExists, + assertDictionaryValues, assertNonNegativeInteger, assertOperator, assertOperatorAllowed, @@ -46,7 +47,7 @@ import type { const DEFAULT_CAPACITY = 1024; // Binary serialization format version. This is intentionally separate from // the package release version so patch/minor releases can preserve the wire format. -const SERIALIZATION_VERSION = "@colql/colql@0.4.0"; +const SERIALIZATION_VERSION = 1; const SERIALIZATION_MAGIC = "COLQL003"; const MAGIC_BYTES = 8; const HEADER_LENGTH_BYTES = 4; @@ -63,7 +64,7 @@ type SerializedColumnMeta = { }; type SerializedTableMeta = { - readonly version: typeof SERIALIZATION_VERSION; + readonly version: number; readonly rowCount: number; readonly capacity: number; readonly columns: readonly SerializedColumnMeta[]; @@ -1008,6 +1009,10 @@ export class Table { return new Query(this); } + /** + * @internal Unstable diagnostic surface retained for tests and low-level debugging. + * Application code should use query.explain(). + */ getIndexedCandidatePlan( filters: readonly IndexFilter[], ): IndexCandidatePlan | undefined { @@ -1021,6 +1026,10 @@ export class Table { ); } + /** + * @internal Unstable diagnostic surface retained for tests and low-level debugging. + * Application code should use query.explain(). + */ getIndexDebugPlan(filters: readonly IndexFilter[]): IndexDebugPlan { return this.indexManager.debugPlan( filters, @@ -1085,6 +1094,7 @@ export class Table { const meta = Table.parseSerializedMeta( new TextDecoder().decode(bytes.subarray(headerStart, headerEnd)), ); + Table.validateSerializedMeta(meta, bytes.byteLength, headerEnd); if (meta.version !== SERIALIZATION_VERSION) { throw new ColQLError( "COLQL_INVALID_SERIALIZED_DATA", @@ -1107,12 +1117,26 @@ export class Table { columnMeta.byteOffset, columnMeta.byteOffset + columnMeta.byteLength, ); - const { definition, storage } = Table.restoreColumn( - columnMeta, - meta.capacity, - meta.rowCount, - view, - ); + let restoredColumn: { definition: ColumnDefinition; storage: ColumnStorage }; + try { + restoredColumn = Table.restoreColumn( + columnMeta, + meta.capacity, + meta.rowCount, + view, + ); + } catch (error) { + if (error instanceof ColQLError && error.code === "COLQL_INVALID_SERIALIZED_DATA") { + throw error; + } + + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + `Invalid serialized ColQL data: failed to restore column "${columnMeta.name}".`, + error, + ); + } + const { definition, storage } = restoredColumn; schemaEntries.push([columnMeta.name, definition]); storageEntries.push([columnMeta.name, storage]); } @@ -1369,10 +1393,11 @@ export class Table { private markIndexesAfterUpdate( values: readonly [keyof TSchema, ColumnValue][], ): void { - this.indexManager.markPerformanceDirty(); - const uniqueColumns = values - .map(([key]) => String(key)) - .filter((columnName) => this.indexManager.hasUnique(columnName)); + const changedColumns = values.map(([key]) => String(key)); + this.indexManager.markPerformanceColumnsDirty(changedColumns); + const uniqueColumns = changedColumns.filter((columnName) => + this.indexManager.hasUnique(columnName), + ); if (uniqueColumns.length > 0) { this.indexManager.markUniqueDirty(uniqueColumns); } @@ -1642,21 +1667,15 @@ export class Table { private static parseSerializedMeta(json: string): SerializedTableMeta { try { - const parsed = JSON.parse(json) as SerializedTableMeta; - if ( - typeof parsed !== "object" || - parsed === null || - !Number.isInteger(parsed.rowCount) || - !Number.isInteger(parsed.capacity) || - !Array.isArray(parsed.columns) - ) { + const parsed = JSON.parse(json) as unknown; + if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) { throw new ColQLError( "COLQL_INVALID_SERIALIZED_DATA", "Invalid serialized ColQL data: missing schema metadata.", ); } - return parsed; + return parsed as SerializedTableMeta; } catch (error) { if (error instanceof ColQLError) { throw error; @@ -1668,6 +1687,305 @@ export class Table { } } + private static validateSerializedMeta( + meta: SerializedTableMeta, + totalBytes: number, + headerEnd: number, + ): void { + const record = meta as unknown as Record; + if ("indexes" in record && record.indexes !== undefined) { + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + "Invalid serialized ColQL data: serialized indexes are not supported.", + ); + } + + if (typeof record.version !== "number" || !Number.isInteger(record.version)) { + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + `Unsupported ColQL serialization version "${String(record.version)}".`, + ); + } + + if (!Number.isInteger(record.rowCount) || (record.rowCount as number) < 0) { + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + "Invalid serialized ColQL data: rowCount must be a non-negative integer.", + ); + } + + if (!Number.isInteger(record.capacity) || (record.capacity as number) < 1) { + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + "Invalid serialized ColQL data: capacity must be a positive integer.", + ); + } + + if ((record.rowCount as number) > (record.capacity as number)) { + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + "Invalid serialized ColQL data: rowCount exceeds capacity.", + ); + } + + if (!Array.isArray(record.columns) || record.columns.length === 0) { + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + "Invalid serialized ColQL data: expected at least one column.", + ); + } + + const seenNames = new Set(); + for (const columnMeta of record.columns) { + if (typeof columnMeta !== "object" || columnMeta === null || Array.isArray(columnMeta)) { + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + "Invalid serialized ColQL data: invalid column metadata.", + ); + } + + const name = (columnMeta as Partial).name; + if (typeof name !== "string" || name.length === 0) { + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + "Invalid serialized ColQL data: column name must be a non-empty string.", + ); + } + + if (seenNames.has(name)) { + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + `Invalid serialized ColQL data: duplicate column "${name}".`, + ); + } + seenNames.add(name); + } + + seenNames.clear(); + const ranges: { start: number; end: number; name: string }[] = []; + for (const columnMeta of record.columns) { + Table.validateSerializedColumnMeta( + columnMeta, + record.capacity as number, + totalBytes, + headerEnd, + seenNames, + ranges, + ); + } + + ranges.sort((left, right) => left.start - right.start); + for (let index = 1; index < ranges.length; index += 1) { + if (ranges[index].start < ranges[index - 1].end) { + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + `Invalid serialized ColQL data: column "${ranges[index].name}" overlaps column "${ranges[index - 1].name}".`, + ); + } + } + } + + private static validateSerializedColumnMeta( + columnMeta: unknown, + capacity: number, + totalBytes: number, + headerEnd: number, + seenNames: Set, + ranges: { start: number; end: number; name: string }[], + ): void { + if (typeof columnMeta !== "object" || columnMeta === null || Array.isArray(columnMeta)) { + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + "Invalid serialized ColQL data: invalid column metadata.", + ); + } + + const meta = columnMeta as Partial; + if (typeof meta.name !== "string" || meta.name.length === 0) { + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + "Invalid serialized ColQL data: column name must be a non-empty string.", + ); + } + + if (seenNames.has(meta.name)) { + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + `Invalid serialized ColQL data: duplicate column "${meta.name}".`, + ); + } + seenNames.add(meta.name); + + if (!Table.isNonNegativeInteger(meta.byteLength)) { + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + `Invalid serialized ColQL data: column "${meta.name}" byteLength must be a non-negative integer.`, + ); + } + + if (!Table.isNonNegativeInteger(meta.byteOffset)) { + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + `Invalid serialized ColQL data: column "${meta.name}" byteOffset must be a non-negative integer.`, + ); + } + + if (!Table.isPositiveInteger(meta.alignment)) { + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + `Invalid serialized ColQL data: column "${meta.name}" alignment must be a positive integer.`, + ); + } + + const expected = Table.expectedSerializedColumn(meta, capacity); + if (meta.alignment !== expected.alignment) { + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + `Invalid serialized ColQL data: column "${meta.name}" alignment is invalid.`, + ); + } + + if (meta.byteLength !== expected.byteLength) { + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + `Invalid serialized ColQL data: column "${meta.name}" byte length is invalid.`, + ); + } + + if (meta.byteOffset < headerEnd || meta.byteOffset % meta.alignment !== 0) { + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + `Invalid serialized ColQL data: column "${meta.name}" byte offset is invalid.`, + ); + } + + const byteEnd = meta.byteOffset + meta.byteLength; + if (byteEnd > totalBytes || byteEnd < meta.byteOffset) { + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + `Invalid serialized ColQL data: column "${meta.name}" exceeds input size.`, + ); + } + + ranges.push({ start: meta.byteOffset, end: byteEnd, name: meta.name }); + } + + private static expectedSerializedColumn( + meta: Partial, + capacity: number, + ): { byteLength: number; alignment: number } { + if (meta.kind === "numeric") { + if (typeof meta.type !== "string") { + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + `Invalid serialized ColQL data: invalid numeric column "${String(meta.name)}" type "${String(meta.type)}".`, + ); + } + + const bytesPerElement = Table.bytesPerNumericElement(meta.type); + return { + byteLength: capacity * bytesPerElement, + alignment: Table.alignmentForSerializedNumericType(meta.type), + }; + } + + if (meta.kind === "dictionary") { + if (!Array.isArray(meta.values) || meta.values.length === 0) { + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + `Invalid serialized ColQL data: invalid dictionary column "${String(meta.name)}" values.`, + ); + } + + try { + assertDictionaryValues(meta.values, String(meta.name)); + } catch (error) { + if (error instanceof ColQLError) { + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + error.message, + error.details, + ); + } + throw error; + } + const bytesPerElement = Table.bytesPerDictionaryCode(meta.values.length); + return { + byteLength: capacity * bytesPerElement, + alignment: bytesPerElement, + }; + } + + if (meta.kind === "boolean") { + return { + byteLength: Math.ceil(capacity / 8), + alignment: 1, + }; + } + + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + `Invalid serialized ColQL data: unknown column kind "${String(meta.kind)}".`, + ); + } + + private static isNonNegativeInteger(value: unknown): value is number { + return Number.isInteger(value) && (value as number) >= 0; + } + + private static isPositiveInteger(value: unknown): value is number { + return Number.isInteger(value) && (value as number) > 0; + } + + private static bytesPerNumericElement(type: string): number { + switch (type) { + case "int16": + case "uint16": + return 2; + case "int32": + case "uint32": + case "float32": + return 4; + case "float64": + return 8; + case "uint8": + return 1; + default: + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + `Invalid serialized ColQL data: invalid numeric column type "${String(type)}".`, + ); + } + } + + private static alignmentForSerializedNumericType(type: string): number { + switch (type) { + case "int16": + case "uint16": + return 2; + case "int32": + case "uint32": + case "float32": + return 4; + case "float64": + return 8; + default: + return 1; + } + } + + private static bytesPerDictionaryCode(size: number): number { + if (size <= 255) { + return 1; + } + + if (size <= 65_535) { + return 2; + } + + return 4; + } + private static restoreColumn( meta: SerializedColumnMeta, capacity: number, @@ -1803,39 +2121,46 @@ export class Table { const values = meta.values as unknown as readonly [string, ...string[]]; const definition = column.dictionary(values); if (values.length <= 255) { + const codes = new Uint8Array(bytes.buffer, bytes.byteOffset, capacity); + Table.validateDictionaryCodes(meta.name, values.length, codes, rowCount); return { definition, - storage: new DictionaryColumnStorage( - values, - capacity, - new Uint8Array(bytes.buffer, bytes.byteOffset, capacity), - rowCount, - ), + storage: new DictionaryColumnStorage(values, capacity, codes, rowCount), }; } if (values.length <= 65_535) { + const codes = new Uint16Array(bytes.buffer, bytes.byteOffset, capacity); + Table.validateDictionaryCodes(meta.name, values.length, codes, rowCount); return { definition, - storage: new DictionaryColumnStorage( - values, - capacity, - new Uint16Array(bytes.buffer, bytes.byteOffset, capacity), - rowCount, - ), + storage: new DictionaryColumnStorage(values, capacity, codes, rowCount), }; } + const codes = new Uint32Array(bytes.buffer, bytes.byteOffset, capacity); + Table.validateDictionaryCodes(meta.name, values.length, codes, rowCount); return { definition, - storage: new DictionaryColumnStorage( - values, - capacity, - new Uint32Array(bytes.buffer, bytes.byteOffset, capacity), - rowCount, - ), + storage: new DictionaryColumnStorage(values, capacity, codes, rowCount), }; } + + private static validateDictionaryCodes( + columnName: string, + valueCount: number, + codes: Uint8Array | Uint16Array | Uint32Array, + rowCount: number, + ): void { + for (let rowIndex = 0; rowIndex < rowCount; rowIndex += 1) { + if (codes[rowIndex] >= valueCount) { + throw new ColQLError( + "COLQL_INVALID_SERIALIZED_DATA", + `Invalid serialized ColQL data: dictionary column "${columnName}" contains invalid code ${codes[rowIndex]}.`, + ); + } + } + } } export function table( diff --git a/src/types.ts b/src/types.ts index 400ed5e..5aa4aa3 100644 --- a/src/types.ts +++ b/src/types.ts @@ -66,8 +66,18 @@ export type RowPredicate = ( export type QueryInfo = { readonly duration: number; + readonly durationMs?: number; readonly rowsScanned: number; readonly indexUsed: boolean; + readonly scanType?: "index" | "full"; + readonly selectedIndex?: string; + readonly reasonCode?: QueryExplainReasonCode; + readonly candidateRows?: number; + readonly materializedRows?: number; + readonly resultCount?: number; + readonly projectionPushdown?: boolean; + readonly dirtyIndexRebuildPaid?: boolean; + readonly dirtyIndexReason?: "equality" | "sorted" | "unique"; }; export type QueryHook = (info: QueryInfo) => void; @@ -88,6 +98,7 @@ export type QueryExplainReasonCode = export type QueryExplainPlan = { readonly scanType: "index" | "full"; readonly indexesUsed: readonly string[]; + readonly selectedIndex?: string; readonly predicates: number; readonly predicateOrder: readonly string[]; readonly projectionPushdown: boolean; diff --git a/tests/column-scoped-index-invalidation.test.ts b/tests/column-scoped-index-invalidation.test.ts new file mode 100644 index 0000000..d35457d --- /dev/null +++ b/tests/column-scoped-index-invalidation.test.ts @@ -0,0 +1,140 @@ +import { describe, expect, it } from "vitest"; +import { ColQLError, column, table } from "../src"; + +function usersTable() { + const users = table({ + id: column.uint32(), + age: column.uint8(), + score: column.uint32(), + status: column.dictionary(["active", "passive", "archived"] as const), + active: column.boolean(), + }); + + users.insertMany( + Array.from({ length: 80 }, (_unused, id) => ({ + id, + age: (id * 7) % 100, + score: id * 10, + status: id % 3 === 0 ? "active" : id % 3 === 1 ? "passive" : "archived", + active: id % 2 === 0, + })), + ); + + return users + .createUniqueIndex("id") + .createIndex("id") + .createIndex("status") + .createSortedIndex("age"); +} + +describe("column-scoped index invalidation", () => { + it("does not dirty equality or sorted indexes when updating an unrelated column", () => { + const users = usersTable(); + + expect(users.where("status", "=", "active").explain()).toEqual( + expect.objectContaining({ indexState: "fresh" }), + ); + expect(users.where("age", ">=", 95).explain()).toEqual( + expect.objectContaining({ indexState: "fresh" }), + ); + + expect(users.updateMany({ status: "active" }, { score: 999 })).toEqual({ + affectedRows: 27, + }); + + expect(users.where("status", "=", "active").explain()).toEqual( + expect.objectContaining({ indexState: "fresh" }), + ); + expect(users.where("age", ">=", 95).explain()).toEqual( + expect.objectContaining({ indexState: "fresh" }), + ); + }); + + it("dirties only equality indexes for updated equality-indexed columns", () => { + const users = usersTable(); + + users.updateMany({ status: "active" }, { status: "passive" }); + + expect(users.where("status", "=", "passive").explain()).toEqual( + expect.objectContaining({ + selectedIndex: "equality:status", + indexState: "dirty", + reasonCode: "INDEX_DIRTY_WOULD_REBUILD_ON_EXECUTION", + }), + ); + expect(users.where("age", ">=", 95).explain()).toEqual( + expect.objectContaining({ selectedIndex: "sorted:age", indexState: "fresh" }), + ); + expect(users.where("status", "=", "passive").toArray()).toEqual( + users.toArray().filter((row) => row.status === "passive"), + ); + expect(users.where("status", "=", "passive").explain()).toEqual( + expect.objectContaining({ reasonCode: "INDEX_CANDIDATE_SET_TOO_LARGE" }), + ); + }); + + it("dirties only sorted indexes for updated sorted-indexed columns", () => { + const users = usersTable(); + + users.updateMany({ status: "passive" }, { age: 99 }); + + expect(users.where("age", ">=", 95).explain()).toEqual( + expect.objectContaining({ + selectedIndex: "sorted:age", + indexState: "dirty", + reasonCode: "INDEX_DIRTY_WOULD_REBUILD_ON_EXECUTION", + }), + ); + expect(users.where("status", "=", "active").explain()).toEqual( + expect.objectContaining({ selectedIndex: "equality:status", indexState: "fresh" }), + ); + expect(users.where("age", ">=", 95).toArray()).toEqual( + users.toArray().filter((row) => row.age >= 95), + ); + expect(users.where("age", ">=", 95).explain()).toEqual( + expect.objectContaining({ indexState: "fresh" }), + ); + }); + + it("does not dirty indexes for no-match or failed updates", () => { + const users = usersTable(); + const before = users.toArray(); + + expect(users.updateMany({ id: 999_999 }, { status: "archived" })).toEqual({ + affectedRows: 0, + }); + expect(users.where("status", "=", "active").explain()).toEqual( + expect.objectContaining({ indexState: "fresh" }), + ); + expect(users.where("age", ">=", 95).explain()).toEqual( + expect.objectContaining({ indexState: "fresh" }), + ); + + expect(() => users.updateMany({ status: "active" }, { age: 300 })).toThrow( + ColQLError, + ); + expect(users.toArray()).toEqual(before); + expect(users.where("status", "=", "active").explain()).toEqual( + expect.objectContaining({ indexState: "fresh" }), + ); + expect(users.where("age", ">=", 95).explain()).toEqual( + expect.objectContaining({ indexState: "fresh" }), + ); + }); + + it("keeps delete behavior broad because row positions shift", () => { + const users = usersTable(); + + users.deleteMany({ status: "archived", age: { lt: 50 } }); + + expect(users.where("status", "=", "active").explain()).toEqual( + expect.objectContaining({ indexState: "dirty" }), + ); + expect(users.where("age", ">=", 95).explain()).toEqual( + expect.objectContaining({ indexState: "dirty" }), + ); + expect(users.findBy("id", 10)).toEqual( + users.toArray().find((row) => row.id === 10), + ); + }); +}); diff --git a/tests/deterministic-mutation-fuzzer.test.ts b/tests/deterministic-mutation-fuzzer.test.ts new file mode 100644 index 0000000..87f0943 --- /dev/null +++ b/tests/deterministic-mutation-fuzzer.test.ts @@ -0,0 +1,279 @@ +import { describe, expect, it } from "vitest"; +import { column, table, type RowForSchema } from "../src"; + +const schema = { + id: column.uint32(), + tenantId: column.uint16(), + age: column.uint8(), + score: column.uint32(), + createdAt: column.uint32(), + status: column.dictionary(["active", "trial", "paused", "churned"] as const), + category: column.dictionary(["free", "pro", "team", "enterprise"] as const), + active: column.boolean(), +}; + +type Row = RowForSchema; + +function rng(seed: number): () => number { + let state = seed >>> 0; + return () => { + state = (state * 1_664_525 + 1_013_904_223) >>> 0; + return state / 0x1_0000_0000; + }; +} + +function integer(random: () => number, maxExclusive: number): number { + return Math.floor(random() * maxExclusive); +} + +function pick( + random: () => number, + values: Values, +): Values[number] { + return values[integer(random, values.length)]; +} + +function rowForId(id: number, random: () => number): Row { + const statuses: readonly Row["status"][] = ["active", "trial", "paused", "churned"]; + const categories: readonly Row["category"][] = ["free", "pro", "team", "enterprise"]; + return { + id, + tenantId: 1 + integer(random, 32), + age: integer(random, 100), + score: integer(random, 10_000), + createdAt: 1_700_000_000 + integer(random, 1_000_000), + status: pick(random, statuses), + category: pick(random, categories), + active: integer(random, 2) === 0, + }; +} + +function createIndexedTable(rows: readonly Row[]) { + return table(schema) + .insertMany(rows) + .createUniqueIndex("id") + .createIndex("tenantId") + .createIndex("status") + .createIndex("category") + .createSortedIndex("age") + .createSortedIndex("createdAt"); +} + +function reindex(restored: ReturnType) { + return restored + .createUniqueIndex("id") + .createIndex("tenantId") + .createIndex("status") + .createIndex("category") + .createSortedIndex("age") + .createSortedIndex("createdAt"); +} + +type PredicateCase = { + readonly label: string; + readonly apply: (users: ReturnType) => ReturnType["query"]; + readonly test: (row: Row) => boolean; +}; + +function predicateCase(random: () => number): PredicateCase { + const status = pick(random, ["active", "trial", "paused", "churned"] as const); + const category = pick(random, ["free", "pro", "team", "enterprise"] as const); + const tenantId = 1 + integer(random, 32); + const age = integer(random, 100); + const createdAt = 1_700_000_000 + integer(random, 1_000_000); + + switch (integer(random, 6)) { + case 0: + return { + label: `status=${status}`, + apply: (users) => users.where("status", "=", status), + test: (row) => row.status === status, + }; + case 1: + return { + label: `category=${category}`, + apply: (users) => users.where("category", "=", category), + test: (row) => row.category === category, + }; + case 2: + return { + label: `tenantId=${tenantId}`, + apply: (users) => users.where("tenantId", "=", tenantId), + test: (row) => row.tenantId === tenantId, + }; + case 3: + return { + label: `age>=${age}`, + apply: (users) => users.where("age", ">=", age), + test: (row) => row.age >= age, + }; + case 4: + return { + label: `createdAt<${createdAt}`, + apply: (users) => users.where("createdAt", "<", createdAt), + test: (row) => row.createdAt < createdAt, + }; + default: + return { + label: "active=true", + apply: (users) => users.where("active", "=", true), + test: (row) => row.active, + }; + } +} + +function patch(random: () => number): Partial { + switch (integer(random, 5)) { + case 0: + return { score: integer(random, 10_000) }; + case 1: + return { age: integer(random, 100) }; + case 2: + return { status: pick(random, ["active", "trial", "paused", "churned"] as const) }; + case 3: + return { category: pick(random, ["free", "pro", "team", "enterprise"] as const) }; + default: + return { active: integer(random, 2) === 0 }; + } +} + +function assertParity( + users: ReturnType, + oracle: readonly Row[], + random: () => number, + context: string, +): void { + expect(users.toArray(), context).toEqual(oracle); + + for (let index = 0; index < 3; index += 1) { + const predicate = predicateCase(random); + expect(predicate.apply(users).toArray(), `${context}:${predicate.label}`).toEqual( + oracle.filter(predicate.test), + ); + } + + const target = oracle[integer(random, Math.max(oracle.length, 1))]; + if (target !== undefined) { + expect(users.findBy("id", target.id), `${context}:findBy`).toEqual(target); + } +} + +describe("deterministic mutation/index/serialization fuzzer", () => { + it("matches a JS array oracle across long deterministic sequences", () => { + for (const seed of [0xC01C, 0xD00D, 0x5EED, 0xBEEF, 0xFACE]) { + const random = rng(seed); + let nextId = 0; + const initialRows = Array.from({ length: 160 }, () => rowForId(nextId++, random)); + let oracle = initialRows.map((row) => ({ ...row })); + let users = createIndexedTable(oracle); + const history: string[] = []; + + const record = (entry: string): void => { + history.push(entry); + if (history.length > 40) { + history.shift(); + } + }; + + for (let step = 0; step < 260; step += 1) { + try { + switch (integer(random, 9)) { + case 0: { + const row = rowForId(nextId++, random); + users.insert(row); + oracle.push(row); + record(`insert:${row.id}`); + break; + } + case 1: { + const rows = Array.from({ length: 1 + integer(random, 3) }, () => + rowForId(nextId++, random), + ); + users.insertMany(rows); + oracle.push(...rows); + record(`insertMany:${rows.map((row) => row.id).join(",")}`); + break; + } + case 2: { + if (oracle.length === 0) break; + const rowIndex = integer(random, oracle.length); + const values = patch(random); + users.update(rowIndex, values); + oracle[rowIndex] = { ...oracle[rowIndex], ...values }; + record(`update:${rowIndex}:${JSON.stringify(values)}`); + break; + } + case 3: { + const predicate = predicateCase(random); + const values = patch(random); + const affected = oracle.filter(predicate.test).length; + expect(predicate.apply(users).update(values)).toEqual({ affectedRows: affected }); + oracle = oracle.map((row) => predicate.test(row) ? { ...row, ...values } : row); + record(`predicateUpdate:${predicate.label}:${JSON.stringify(values)}`); + break; + } + case 4: { + if (oracle.length === 0) break; + const rowIndex = integer(random, oracle.length); + users.delete(rowIndex); + oracle.splice(rowIndex, 1); + record(`delete:${rowIndex}`); + break; + } + case 5: { + const predicate = predicateCase(random); + const limit = 1 + integer(random, 8); + const offset = integer(random, 4); + const targetIds = oracle + .filter(predicate.test) + .slice(offset, offset + limit) + .map((row) => row.id); + expect(predicate.apply(users).offset(offset).limit(limit).delete()).toEqual({ + affectedRows: targetIds.length, + }); + oracle = oracle.filter((row) => !targetIds.includes(row.id)); + record(`predicateDelete:${predicate.label}:${offset}:${limit}`); + break; + } + case 6: { + if (oracle.length === 0) break; + const target = oracle[integer(random, oracle.length)]; + const values = patch(random); + expect(users.updateBy("id", target.id, values)).toEqual({ affectedRows: 1 }); + Object.assign(target, values); + record(`updateBy:${target.id}:${JSON.stringify(values)}`); + break; + } + case 7: { + if (oracle.length === 0) break; + const target = oracle[integer(random, oracle.length)]; + expect(users.deleteBy("id", target.id)).toEqual({ affectedRows: 1 }); + oracle = oracle.filter((row) => row.id !== target.id); + record(`deleteBy:${target.id}`); + break; + } + default: { + const restored = table.deserialize(users.serialize()); + expect(restored.indexes()).toEqual([]); + expect(restored.sortedIndexes()).toEqual([]); + expect(restored.uniqueIndexes()).toEqual([]); + users = reindex(restored) as ReturnType; + record("serialize:restore:reindex"); + break; + } + } + + if (step % 10 === 0) { + assertParity(users, oracle, random, `seed=${seed}:step=${step}`); + } + } catch (error) { + throw new Error( + `Fuzzer failed seed=${seed} step=${step} history=${history.join(" | ")} cause=${error instanceof Error ? error.message : String(error)}`, + ); + } + } + + assertParity(users, oracle, random, `seed=${seed}:final`); + } + }); +}); diff --git a/tests/on-query.test.ts b/tests/on-query.test.ts index 1a19508..a2141e0 100644 --- a/tests/on-query.test.ts +++ b/tests/on-query.test.ts @@ -30,12 +30,56 @@ describe("onQuery", () => { expect(users.where("id", "=", 4).count()).toBe(1); expect(events).toHaveLength(1); - expect(events[0]).toEqual(expect.objectContaining({ rowsScanned: 1, indexUsed: true })); + expect(events[0]).toEqual(expect.objectContaining({ + durationMs: events[0].duration, + rowsScanned: 1, + indexUsed: true, + scanType: "index", + selectedIndex: "equality:id", + resultCount: 1, + projectionPushdown: false, + dirtyIndexRebuildPaid: false, + })); expect(events[0].duration).toBeGreaterThanOrEqual(0); users.where("status", "=", "active").filter((row) => row.id < 4).toArray(); expect(events).toHaveLength(2); - expect(events[1]).toEqual(expect.objectContaining({ rowsScanned: users.rowCount, indexUsed: false })); + expect(events[1]).toEqual(expect.objectContaining({ + rowsScanned: users.rowCount, + indexUsed: false, + scanType: "full", + resultCount: 2, + })); + }); + + it("reports dirty index rebuilds paid by terminal execution", () => { + const events: QueryInfo[] = []; + const users = table(schema, { onQuery: (info) => events.push(info) }); + seed(users); + users.createIndex("status"); + + users.updateMany({ status: "active" }, { status: "passive" }); + events.length = 0; + users.where("status", "=", "passive").count(); + + expect(events).toHaveLength(1); + expect(events[0]).toEqual(expect.objectContaining({ + indexUsed: true, + selectedIndex: "equality:status", + reasonCode: "INDEX_DIRTY_WOULD_REBUILD_ON_EXECUTION", + dirtyIndexRebuildPaid: true, + dirtyIndexReason: "equality", + })); + }); + + it("does not invent result counts for numeric aggregations", () => { + const events: QueryInfo[] = []; + const users = table(schema, { onQuery: (info) => events.push(info) }); + seed(users); + + expect(users.avg("age")).toBe(4.5); + expect(events).toHaveLength(1); + expect(events[0]).not.toHaveProperty("resultCount"); }); it("does not instrument non-terminal query construction or streams", () => { diff --git a/tests/query-explain.test.ts b/tests/query-explain.test.ts index a977adb..897b189 100644 --- a/tests/query-explain.test.ts +++ b/tests/query-explain.test.ts @@ -76,6 +76,7 @@ describe("query explain", () => { expect(explain).toEqual({ scanType: "index", indexesUsed: ["equality:id"], + selectedIndex: "equality:id", predicates: 1, predicateOrder: ["id ="], projectionPushdown: false, @@ -214,9 +215,9 @@ describe("query explain", () => { it("matches dirty equality index execution behavior after lazy rebuild", () => { const users = usersFixture(); users.createIndex("id"); - users.updateMany({ status: "passive" }, { active: true }); + users.update(42, { id: 142 }); - const explain = users.where("id", "=", 42).explain(); + const explain = users.where("id", "=", 142).explain(); expect(explain).toEqual( expect.objectContaining({ @@ -229,11 +230,11 @@ describe("query explain", () => { expect(explain).not.toHaveProperty("candidateRows"); users.resetScanCounter(); - expect(users.where("id", "=", 42).toArray()).toEqual([ - { id: 42, age: 42, status: "active", active: true }, + expect(users.where("id", "=", 142).toArray()).toEqual([ + { id: 142, age: 42, status: "active", active: true }, ]); expect(users.scannedRowCount).toBe(1); - expect(users.where("id", "=", 42).explain()).toEqual( + expect(users.where("id", "=", 142).explain()).toEqual( expect.objectContaining({ scanType: "index", indexesUsed: ["equality:id"], diff --git a/tests/scenarios/consistency/dirty-index-lifecycle.scenario.test.ts b/tests/scenarios/consistency/dirty-index-lifecycle.scenario.test.ts index c8b1700..d1246fd 100644 --- a/tests/scenarios/consistency/dirty-index-lifecycle.scenario.test.ts +++ b/tests/scenarios/consistency/dirty-index-lifecycle.scenario.test.ts @@ -5,7 +5,7 @@ import { expectDirtyIndex, expectFreshIndex } from "../helpers/explain"; import { updateOracle } from "../helpers/oracle"; describe("consistency dirty index lifecycle endpoint scenarios", () => { - it("PATCH /products dirty indexes explain before execution and become fresh after requery", () => { + it("PATCH /products dirties only indexes for changed columns", () => { const { products, oracle } = buildProductCatalogFixture(); updateOracle( @@ -27,11 +27,10 @@ describe("consistency dirty index lifecycle endpoint scenarios", () => { expectFreshIndex(products.where("status", "=", "inactive"), "equality:status"); const priceQuery = products.where("price", ">=", 49_000); - expectDirtyIndex(priceQuery, "sorted:price"); + expectFreshIndex(priceQuery, "sorted:price"); expectRowsEqual( priceQuery.toArray(), oracle.filter((row) => row.price >= 49_000), ); - expectFreshIndex(products.where("price", ">=", 49_000), "sorted:price"); }); }); diff --git a/tests/serialization-validation.test.ts b/tests/serialization-validation.test.ts index 550791a..fdb985c 100644 --- a/tests/serialization-validation.test.ts +++ b/tests/serialization-validation.test.ts @@ -1,6 +1,14 @@ import { describe, expect, it } from "vitest"; import { ColQLError, column, table } from "../src"; +type SerializedMeta = { + version: unknown; + rowCount: unknown; + capacity: unknown; + columns: Array>; + indexes?: unknown; +}; + function expectCode(fn: () => unknown, code: string, message: RegExp): void { expect(fn).toThrow(ColQLError); try { @@ -11,6 +19,52 @@ function expectCode(fn: () => unknown, code: string, message: RegExp): void { } } +function serializedFixture() { + const users = table({ + id: column.uint32(), + age: column.uint8(), + status: column.dictionary(["active", "passive", "archived"] as const), + active: column.boolean(), + }); + users.insertMany([ + { id: 1, age: 10, status: "active", active: true }, + { id: 2, age: 20, status: "passive", active: false }, + { id: 3, age: 30, status: "archived", active: true }, + ]); + users.updateMany({ id: 2 }, { status: "active" }); + + return users.serialize(); +} + +function readMeta(buffer: ArrayBuffer): { + bytes: Uint8Array; + headerLength: number; + headerStart: number; + meta: SerializedMeta; +} { + const bytes = new Uint8Array(buffer.slice(0)); + const headerLength = new DataView(bytes.buffer).getUint32(8, true); + const headerStart = 12; + const meta = JSON.parse( + new TextDecoder().decode(bytes.subarray(headerStart, headerStart + headerLength)), + ) as SerializedMeta; + return { bytes, headerLength, headerStart, meta }; +} + +function withPatchedMeta( + buffer: ArrayBuffer, + patch: (meta: SerializedMeta) => void, +): ArrayBuffer { + const { bytes, headerStart, meta } = readMeta(buffer); + patch(meta); + const encoded = new TextEncoder().encode(JSON.stringify(meta)); + const patched = new Uint8Array(Math.max(bytes.byteLength, headerStart + encoded.byteLength)); + patched.set(bytes); + new DataView(patched.buffer).setUint32(8, encoded.byteLength, true); + patched.set(encoded, headerStart); + return patched.buffer; +} + describe("serialization validation", () => { it("rejects invalid input shape and corrupted magic", () => { expectCode(() => table.deserialize({} as ArrayBuffer), "COLQL_INVALID_SERIALIZED_DATA", /expected ArrayBuffer or Uint8Array/); @@ -25,18 +79,155 @@ describe("serialization validation", () => { it("rejects unsupported versions and truncated payloads", () => { const users = table({ id: column.uint32() }); const buffer = users.serialize(); - const bytes = new Uint8Array(buffer.slice(0)); - const headerLength = new DataView(bytes.buffer).getUint32(8, true); - const headerStart = 12; - const meta = JSON.parse(new TextDecoder().decode(bytes.subarray(headerStart, headerStart + headerLength))) as { version: string }; - meta.version = "@colql/colql@0.0.1"; - const encoded = new TextEncoder().encode(JSON.stringify(meta)); - const patched = new Uint8Array(headerStart + encoded.byteLength); - patched.set(bytes.subarray(0, headerStart)); - new DataView(patched.buffer).setUint32(8, encoded.byteLength, true); - patched.set(encoded, headerStart); - - expectCode(() => table.deserialize(patched.buffer), "COLQL_INVALID_SERIALIZED_DATA", /Unsupported ColQL serialization version/); + const patched = withPatchedMeta(buffer, (meta) => { + meta.version = "@colql/colql@0.0.1"; + }); + + expectCode(() => table.deserialize(patched), "COLQL_INVALID_SERIALIZED_DATA", /Unsupported ColQL serialization version/); expectCode(() => table.deserialize(buffer.slice(0, buffer.byteLength - 1)), "COLQL_INVALID_SERIALIZED_DATA", /exceeds input size/); }); + + it("rejects malformed table metadata", () => { + const buffer = serializedFixture(); + + expectCode( + () => table.deserialize(withPatchedMeta(buffer, (meta) => { meta.rowCount = (meta.capacity as number) + 1; })), + "COLQL_INVALID_SERIALIZED_DATA", + /rowCount exceeds capacity/, + ); + expectCode( + () => table.deserialize(withPatchedMeta(buffer, (meta) => { meta.capacity = -1; })), + "COLQL_INVALID_SERIALIZED_DATA", + /capacity must be a positive integer/, + ); + expectCode( + () => table.deserialize(withPatchedMeta(buffer, (meta) => { meta.columns = []; })), + "COLQL_INVALID_SERIALIZED_DATA", + /expected at least one column/, + ); + expectCode( + () => table.deserialize(withPatchedMeta(buffer, (meta) => { meta.indexes = [{ column: "id" }]; })), + "COLQL_INVALID_SERIALIZED_DATA", + /serialized indexes are not supported/, + ); + }); + + it("rejects malformed column metadata", () => { + const buffer = serializedFixture(); + + expectCode( + () => table.deserialize(withPatchedMeta(buffer, (meta) => { + meta.columns.push({ ...meta.columns[0] }); + })), + "COLQL_INVALID_SERIALIZED_DATA", + /duplicate column/, + ); + expectCode( + () => table.deserialize(withPatchedMeta(buffer, (meta) => { + meta.columns[0].kind = "vector"; + })), + "COLQL_INVALID_SERIALIZED_DATA", + /unknown column kind/, + ); + expectCode( + () => table.deserialize(withPatchedMeta(buffer, (meta) => { + meta.columns[0].type = "uint64"; + })), + "COLQL_INVALID_SERIALIZED_DATA", + /invalid numeric column type/, + ); + expectCode( + () => table.deserialize(withPatchedMeta(buffer, (meta) => { + meta.columns[0].byteLength = -1; + })), + "COLQL_INVALID_SERIALIZED_DATA", + /byteLength must be a non-negative integer/, + ); + expectCode( + () => table.deserialize(withPatchedMeta(buffer, (meta) => { + meta.columns[0].byteOffset = 13; + })), + "COLQL_INVALID_SERIALIZED_DATA", + /byte offset is invalid/, + ); + expectCode( + () => table.deserialize(withPatchedMeta(buffer, (meta) => { + meta.columns[1].byteOffset = meta.columns[0].byteOffset; + })), + "COLQL_INVALID_SERIALIZED_DATA", + /overlaps column/, + ); + }); + + it("rejects corrupted dictionary metadata and payload codes", () => { + const buffer = serializedFixture(); + const { bytes, meta } = readMeta(buffer); + const statusMeta = meta.columns.find((columnMeta) => columnMeta.name === "status"); + if (statusMeta === undefined) { + throw new Error("Expected status column metadata"); + } + + expectCode( + () => table.deserialize(withPatchedMeta(buffer, (patchedMeta) => { + const patchedStatus = patchedMeta.columns.find((columnMeta) => columnMeta.name === "status"); + if (patchedStatus !== undefined) { + patchedStatus.values = ["active", "active"]; + } + })), + "COLQL_INVALID_SERIALIZED_DATA", + /Duplicate dictionary value/, + ); + + bytes[statusMeta.byteOffset as number] = 99; + expectCode( + () => table.deserialize(bytes.buffer), + "COLQL_INVALID_SERIALIZED_DATA", + /contains invalid code/, + ); + }); + + for (const value of [Number.NaN, Number.POSITIVE_INFINITY, Number.NEGATIVE_INFINITY]) { + it(`maps corrupted numeric payload value ${String(value)} to serialized-data errors`, () => { + const metrics = table({ score: column.float64() }); + metrics.insert({ score: 1.5 }); + const { bytes, meta } = readMeta(metrics.serialize()); + const scoreMeta = meta.columns.find((columnMeta) => columnMeta.name === "score"); + if (scoreMeta === undefined) { + throw new Error("Expected score column metadata"); + } + + new DataView(bytes.buffer).setFloat64(scoreMeta.byteOffset as number, value, true); + expectCode( + () => table.deserialize(bytes.buffer), + "COLQL_INVALID_SERIALIZED_DATA", + /failed to restore column "score"/, + ); + }); + } + + it("restores mutated snapshots without indexes and can be reindexed", () => { + const buffer = serializedFixture(); + const restored = table.deserialize(buffer); + + expect(restored.indexes()).toEqual([]); + expect(restored.sortedIndexes()).toEqual([]); + expect(restored.uniqueIndexes()).toEqual([]); + expect(restored.where("status", "=", "active").explain()).toEqual( + expect.objectContaining({ + scanType: "full", + reasonCode: "NO_INDEX_FOR_COLUMN", + }), + ); + + restored.createIndex("status").createSortedIndex("age").createUniqueIndex("id"); + expect(restored.where("status", "=", "archived").explain()).toEqual( + expect.objectContaining({ scanType: "index", indexState: "fresh" }), + ); + expect(restored.findBy("id", 1)).toEqual({ + id: 1, + age: 10, + status: "active", + active: true, + }); + }); }); diff --git a/tests/type-inference.test-d.ts b/tests/type-inference.test-d.ts index 4fbd244..d85d15f 100644 --- a/tests/type-inference.test-d.ts +++ b/tests/type-inference.test-d.ts @@ -47,11 +47,21 @@ void explainScanType; table(users.getSchema(), { onQuery(info: QueryInfo) { const duration: number = info.duration; + const durationMs: number | undefined = info.durationMs; const rowsScanned: number = info.rowsScanned; const indexUsed: boolean = info.indexUsed; + const scanType: "index" | "full" | undefined = info.scanType; + const selectedIndex: string | undefined = info.selectedIndex; + const resultCount: number | undefined = info.resultCount; + const materializedRows: number | undefined = info.materializedRows; void duration; + void durationMs; void rowsScanned; void indexUsed; + void scanType; + void selectedIndex; + void resultCount; + void materializedRows; }, }); users.createIndex("id"); diff --git a/tsconfig.type-tests.json b/tsconfig.type-tests.json new file mode 100644 index 0000000..02ba07f --- /dev/null +++ b/tsconfig.type-tests.json @@ -0,0 +1,7 @@ +{ + "extends": "./tsconfig.json", + "compilerOptions": { + "noEmit": true + }, + "include": ["src/**/*.ts", "tests/type-inference.test-d.ts"] +}