From bf83f9df96932f41d6c070e89d9aceeeb131d50f Mon Sep 17 00:00:00 2001 From: Matt Wonlaw Date: Mon, 4 May 2026 13:13:08 -0400 Subject: [PATCH] chore: gather more stat4 samples stat4 gathers sample values and sample cardianlities for each index. This helps the sqlite planner choose better plans. Problems can still arise though. One such case exists for terabugs: the issueLabel -> issue join. `SELECT * FROM issueLabel WHERE label_id = xx` If `xx` is not present in stat4, sqlite falls back to an average. The average is roughly the number of unsampled rows divided by the number of rows in the table. If there is a heavy skew, this average can be way off. In terabugs we see it as being 10x off in cases. This PR gathers more stats to get better averages for the case a sample is missing. --- deps/defines.gypi | 1 + deps/download.sh | 1 + docs/compilation.md | 1 + test/51.stat4-samples.js | 37 +++++++++++++++++++++++++++++++++++++ 4 files changed, 40 insertions(+) create mode 100644 test/51.stat4-samples.js diff --git a/deps/defines.gypi b/deps/defines.gypi index f4f3215..74bc09d 100644 --- a/deps/defines.gypi +++ b/deps/defines.gypi @@ -35,6 +35,7 @@ 'SQLITE_OMIT_SHARED_CACHE', 'SQLITE_OMIT_TCL_VARIABLE', 'SQLITE_SOUNDEX', + 'SQLITE_STAT4_SAMPLES=128', 'SQLITE_THREADSAFE=2', 'SQLITE_TRACE_SIZE_LIMIT=32', 'SQLITE_USE_URI=1', diff --git a/deps/download.sh b/deps/download.sh index 9d9ec15..aa962e9 100755 --- a/deps/download.sh +++ b/deps/download.sh @@ -55,6 +55,7 @@ SQLITE_OMIT_PROGRESS_CALLBACK SQLITE_OMIT_SHARED_CACHE SQLITE_OMIT_TCL_VARIABLE SQLITE_SOUNDEX +SQLITE_STAT4_SAMPLES=128 SQLITE_THREADSAFE=2 SQLITE_TRACE_SIZE_LIMIT=32 SQLITE_USE_URI=1 diff --git a/docs/compilation.md b/docs/compilation.md index 98f2f60..9b53092 100644 --- a/docs/compilation.md +++ b/docs/compilation.md @@ -80,6 +80,7 @@ SQLITE_OMIT_PROGRESS_CALLBACK SQLITE_OMIT_SHARED_CACHE SQLITE_OMIT_TCL_VARIABLE SQLITE_SOUNDEX +SQLITE_STAT4_SAMPLES=128 SQLITE_THREADSAFE=2 SQLITE_TRACE_SIZE_LIMIT=32 SQLITE_USE_URI=1 diff --git a/test/51.stat4-samples.js b/test/51.stat4-samples.js new file mode 100644 index 0000000..b9f5b38 --- /dev/null +++ b/test/51.stat4-samples.js @@ -0,0 +1,37 @@ +'use strict'; +const Database = require('../.'); + +describe('SQLITE_STAT4_SAMPLES', function () { + beforeEach(function () { + this.db = new Database(util.next()); + }); + afterEach(function () { + this.db.close(); + }); + + it('is compiled with STAT4_SAMPLES=128', function () { + const options = this.db.pragma('compile_options').map(r => r.compile_options); + expect(options).to.include('STAT4_SAMPLES=128'); + }); + + it('ANALYZE collects more than the default 24 samples per index', function () { + this.db.prepare('CREATE TABLE t (x INTEGER)').run(); + this.db.prepare('CREATE INDEX t_x ON t (x)').run(); + + const rowCount = 5000; + const insert = this.db.prepare('INSERT INTO t (x) VALUES (?)'); + const insertMany = this.db.transaction(() => { + for (let i = 0; i < rowCount; i++) insert.run(i); + }); + insertMany(); + + this.db.exec('ANALYZE'); + + const samples = this.db + .prepare("SELECT COUNT(*) AS n FROM sqlite_stat4 WHERE idx = 't_x'") + .get().n; + + expect(samples).to.be.above(24); + expect(samples).to.be.at.most(128); + }); +});