From b34f446e9d8b15915c01fafe5436dbfd89e5e49a Mon Sep 17 00:00:00 2001 From: Day Matchullis Date: Mon, 27 Oct 2025 13:52:35 -0600 Subject: [PATCH 1/4] still probably some issues with this but it's mostly working --- .gitignore | 1 + src/defines.ts | 3 + src/parser.ts | 81 +++++++++++++++++++ src/tokenizer.ts | 9 +++ test/identifier/multiple-statement.spec.ts | 16 ++-- test/index.spec.ts | 94 +++++++++++++++++++++- test/parser/bigquery.spec.ts | 6 +- 7 files changed, 198 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index 1002a47..e1b2ce3 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ yarn.lock lib/ node_modules/ webpack/ +mise.toml diff --git a/src/defines.ts b/src/defines.ts index 02eec84..f71f8fc 100644 --- a/src/defines.ts +++ b/src/defines.ts @@ -71,6 +71,9 @@ export type StatementType = | 'ALTER_FUNCTION' | 'ALTER_INDEX' | 'ALTER_PROCEDURE' + | 'BEGIN_TRANSACTION' + | 'COMMIT' + | 'ROLLBACK' | 'ANON_BLOCK' | 'UNKNOWN'; diff --git a/src/parser.ts b/src/parser.ts index e44507c..3b72389 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -87,6 +87,9 @@ export const EXECUTION_TYPES: Record = { ALTER_FUNCTION: 'MODIFICATION', ALTER_INDEX: 'MODIFICATION', ALTER_PROCEDURE: 'MODIFICATION', + BEGIN_TRANSACTION: 'MODIFICATION', + COMMIT: 'MODIFICATION', + ROLLBACK: 'MODIFICATION', UNKNOWN: 'UNKNOWN', ANON_BLOCK: 'ANON_BLOCK', }; @@ -342,7 +345,16 @@ function createStatementParserByToken( if (['bigquery', 'oracle'].includes(options.dialect) && nextToken.value !== 'TRANSACTION') { return createBlockStatementParser(options); } + return createBeginTransactionStatementParser(options); + case 'START': + if (nextToken.value === 'TRANSACTION') { + return createBeginTransactionStatementParser(options); + } break; + case 'COMMIT': + return createCommitStatementParser(options); + case 'ROLLBACK': + return createRollbackStatementParser(options); case 'DECLARE': if (options.dialect === 'oracle') { return createBlockStatementParser(options); @@ -709,6 +721,75 @@ function createShowStatementParser(options: ParseOptions) { return stateMachineStatementParser(statement, steps, options); } +function createBeginTransactionStatementParser(options: ParseOptions) { + const statement = createInitialStatement(); + + const steps: Step[] = [ + { + preCanGoToNext: () => false, + validation: { + acceptTokens: [ + { type: 'keyword', value: 'BEGIN' }, + { type: 'keyword', value: 'START' } + ], + }, + add: (token) => { + statement.type = 'BEGIN_TRANSACTION'; + if (statement.start < 0) { + statement.start = token.start; + } + }, + postCanGoToNext: () => true, + } + ]; + + return stateMachineStatementParser(statement, steps, options); +} + +function createCommitStatementParser(options: ParseOptions) { + const statement = createInitialStatement(); + + const steps: Step[] = [ + { + preCanGoToNext: () => false, + validation: { + acceptTokens: [{ type: 'keyword', value: 'COMMIT' }], + }, + add: (token) => { + statement.type = 'COMMIT'; + if (statement.start < 0) { + statement.start = token.start; + } + }, + postCanGoToNext: () => true, + }, + ]; + + return stateMachineStatementParser(statement, steps, options); +} + +function createRollbackStatementParser(options: ParseOptions) { + const statement = createInitialStatement(); + + const steps: Step[] = [ + { + preCanGoToNext: () => false, + validation: { + acceptTokens: [{ type: 'keyword', value: 'ROLLBACK' }], + }, + add: (token) => { + statement.type = 'ROLLBACK'; + if (statement.start < 0) { + statement.start = token.start; + } + }, + postCanGoToNext: () => true, + }, + ]; + + return stateMachineStatementParser(statement, steps, options); +} + function createUnknownStatementParser(options: ParseOptions) { const statement = createInitialStatement(); diff --git a/src/tokenizer.ts b/src/tokenizer.ts index fce2841..a21fb49 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -26,6 +26,15 @@ const KEYWORDS = [ 'AS', 'MATERIALIZED', 'BEGIN', + 'START', + 'COMMIT', + 'ROLLBACK', + 'TRANSACTION', + 'TRAN', + 'WORK', + 'DEFERRED', + 'IMMEDIATE', + 'EXCLUSIVE', 'DECLARE', 'CASE', 'LOOP', diff --git a/test/identifier/multiple-statement.spec.ts b/test/identifier/multiple-statement.spec.ts index 4e2c003..5765c89 100644 --- a/test/identifier/multiple-statement.spec.ts +++ b/test/identifier/multiple-statement.spec.ts @@ -394,8 +394,8 @@ describe('identifier', () => { start: 0, end: 17, text: statements[0], - type: 'UNKNOWN', - executionType: 'UNKNOWN', + type: 'BEGIN_TRANSACTION', + executionType: 'MODIFICATION', parameters: [], tables: [], }, @@ -412,8 +412,8 @@ describe('identifier', () => { start: 29, end: 35, text: statements[2], - type: 'UNKNOWN', - executionType: 'UNKNOWN', + type: 'COMMIT', + executionType: 'MODIFICATION', parameters: [], tables: [], }, @@ -432,8 +432,8 @@ describe('identifier', () => { start: 0, end: 17 + offset, text: statements[0], - type: 'UNKNOWN', - executionType: 'UNKNOWN', + type: 'BEGIN_TRANSACTION', + executionType: 'MODIFICATION', parameters: [], tables: [], }, @@ -450,8 +450,8 @@ describe('identifier', () => { start: 29 + offset, end: 35 + offset, text: statements[2], - type: 'UNKNOWN', - executionType: 'UNKNOWN', + type: 'COMMIT', + executionType: 'MODIFICATION', parameters: [], tables: [], }, diff --git a/test/index.spec.ts b/test/index.spec.ts index bf8be98..5923aa2 100644 --- a/test/index.spec.ts +++ b/test/index.spec.ts @@ -3,6 +3,12 @@ import { expect } from 'chai'; import { ParamTypes } from '../src/defines'; describe('identify', () => { + it.only('test', () => { + const result = identify("SET search_path = 'cycle_a,cycle_a.data,cycle_a.macros';", { dialect: 'generic', strict: false }) + console.log("RESULT: ", result) + expect(true) + }) + it('should throw error for invalid dialect', () => { expect(() => identify('SELECT * FROM foo', { dialect: 'invalid' as Dialect })).to.throw( 'Unknown dialect. Allowed values: mssql, sqlite, mysql, oracle, psql, bigquery, generic', @@ -109,7 +115,7 @@ describe('getExecutionType', () => { expect(getExecutionType('SELECT')).to.equal('LISTING'); }); - ['UPDATE', 'DELETE', 'INSERT', 'TRUNCATE'].forEach((type) => { + ['UPDATE', 'DELETE', 'INSERT', 'TRUNCATE', 'BEGIN_TRANSACTION', 'COMMIT', 'ROLLBACK'].forEach((type) => { it(`should return MODIFICATION for ${type}`, () => { expect(getExecutionType(type)).to.equal('MODIFICATION'); }); @@ -159,3 +165,89 @@ describe('Regression tests', () => { }); }); }); + +describe('Transaction statements', () => { + it('should identify BEGIN TRANSACTION', () => { + expect(identify('BEGIN TRANSACTION', { strict: false })).to.eql([ + { + start: 0, + end: 16, + text: 'BEGIN TRANSACTION', + type: 'BEGIN_TRANSACTION', + executionType: 'MODIFICATION', + parameters: [], + tables: [], + }, + ]); + }); + + it('should identify BEGIN without TRANSACTION keyword', () => { + expect(identify('BEGIN;', { strict: false })).to.eql([ + { + start: 0, + end: 5, + text: 'BEGIN;', + type: 'BEGIN_TRANSACTION', + executionType: 'MODIFICATION', + parameters: [], + tables: [], + }, + ]); + }); + + it('should identify START TRANSACTION', () => { + expect(identify('START TRANSACTION', { strict: false })).to.eql([ + { + start: 0, + end: 16, + text: 'START TRANSACTION', + type: 'BEGIN_TRANSACTION', + executionType: 'MODIFICATION', + parameters: [], + tables: [], + }, + ]); + }); + + it('should identify COMMIT', () => { + expect(identify('COMMIT', { strict: false })).to.eql([ + { + start: 0, + end: 5, + text: 'COMMIT', + type: 'COMMIT', + executionType: 'MODIFICATION', + parameters: [], + tables: [], + }, + ]); + }); + + it('should identify ROLLBACK', () => { + expect(identify('ROLLBACK', { strict: false })).to.eql([ + { + start: 0, + end: 7, + text: 'ROLLBACK', + type: 'ROLLBACK', + executionType: 'MODIFICATION', + parameters: [], + tables: [], + }, + ]); + }); + + it('should still identify BEGIN as ANON_BLOCK for oracle/bigquery when not followed by TRANSACTION', () => { + expect(identify('BEGIN select 1; END;', { dialect: 'oracle' })).to.eql([ + { + start: 0, + end: 19, + text: 'BEGIN select 1; END;', + type: 'ANON_BLOCK', + executionType: 'ANON_BLOCK', + parameters: [], + tables: [], + }, + ]); + }); +}); diff --git a/test/parser/bigquery.spec.ts b/test/parser/bigquery.spec.ts index d1af5a5..525a170 100644 --- a/test/parser/bigquery.spec.ts +++ b/test/parser/bigquery.spec.ts @@ -69,11 +69,11 @@ describe('Parser for bigquery', () => { expect(result.body[1].type).to.eql('SELECT'); }); - it('parses BEGIN TRANSACTION as UNKNOWN', () => { + it('parses BEGIN TRANSACTION as transaction statement', () => { const result = parse(`BEGIN TRANSACTION; SELECT 1; COMMIT;`, false, 'bigquery'); expect(result.body.length).to.eql(3); - expect(result.body[0].type).to.eql('UNKNOWN'); + expect(result.body[0].type).to.eql('BEGIN_TRANSACTION'); expect(result.body[1].type).to.eql('SELECT'); - expect(result.body[2].type).to.eql('UNKNOWN'); + expect(result.body[2].type).to.eql('COMMIT'); }); }); From 78f6164538c5c610caeafbf444df7567bd76bbab Mon Sep 17 00:00:00 2001 From: Day Matchullis Date: Wed, 12 Nov 2025 23:20:54 -0700 Subject: [PATCH 2/4] fix linting --- src/parser.ts | 4 ++-- test/index.spec.ts | 18 +++++++----------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 3b72389..17727f0 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -730,7 +730,7 @@ function createBeginTransactionStatementParser(options: ParseOptions) { validation: { acceptTokens: [ { type: 'keyword', value: 'BEGIN' }, - { type: 'keyword', value: 'START' } + { type: 'keyword', value: 'START' }, ], }, add: (token) => { @@ -740,7 +740,7 @@ function createBeginTransactionStatementParser(options: ParseOptions) { } }, postCanGoToNext: () => true, - } + }, ]; return stateMachineStatementParser(statement, steps, options); diff --git a/test/index.spec.ts b/test/index.spec.ts index 5923aa2..f57935f 100644 --- a/test/index.spec.ts +++ b/test/index.spec.ts @@ -3,12 +3,6 @@ import { expect } from 'chai'; import { ParamTypes } from '../src/defines'; describe('identify', () => { - it.only('test', () => { - const result = identify("SET search_path = 'cycle_a,cycle_a.data,cycle_a.macros';", { dialect: 'generic', strict: false }) - console.log("RESULT: ", result) - expect(true) - }) - it('should throw error for invalid dialect', () => { expect(() => identify('SELECT * FROM foo', { dialect: 'invalid' as Dialect })).to.throw( 'Unknown dialect. Allowed values: mssql, sqlite, mysql, oracle, psql, bigquery, generic', @@ -115,11 +109,13 @@ describe('getExecutionType', () => { expect(getExecutionType('SELECT')).to.equal('LISTING'); }); - ['UPDATE', 'DELETE', 'INSERT', 'TRUNCATE', 'BEGIN_TRANSACTION', 'COMMIT', 'ROLLBACK'].forEach((type) => { - it(`should return MODIFICATION for ${type}`, () => { - expect(getExecutionType(type)).to.equal('MODIFICATION'); - }); - }); + ['UPDATE', 'DELETE', 'INSERT', 'TRUNCATE', 'BEGIN_TRANSACTION', 'COMMIT', 'ROLLBACK'].forEach( + (type) => { + it(`should return MODIFICATION for ${type}`, () => { + expect(getExecutionType(type)).to.equal('MODIFICATION'); + }); + }, + ); ['CREATE', 'DROP', 'ALTER'].forEach((action) => { ['DATABASE', 'SCHEMA', 'TABLE', 'VIEW', 'FUNCTION', 'TRIGGER'].forEach((type) => { From 775679ee47e66defe5490ae7a7ab72d8c131b3db Mon Sep 17 00:00:00 2001 From: Day Matchullis Date: Thu, 20 Nov 2025 22:15:56 -0700 Subject: [PATCH 3/4] change execution type for transaction statements and add a test case for START REPLICA --- src/defines.ts | 8 ++++- src/parser.ts | 6 ++-- test/identifier/multiple-statement.spec.ts | 8 ++--- test/index.spec.ts | 34 ++++++++++++++-------- 4 files changed, 36 insertions(+), 20 deletions(-) diff --git a/src/defines.ts b/src/defines.ts index f71f8fc..f26ecda 100644 --- a/src/defines.ts +++ b/src/defines.ts @@ -77,7 +77,13 @@ export type StatementType = | 'ANON_BLOCK' | 'UNKNOWN'; -export type ExecutionType = 'LISTING' | 'MODIFICATION' | 'INFORMATION' | 'ANON_BLOCK' | 'UNKNOWN'; +export type ExecutionType = + | 'LISTING' + | 'MODIFICATION' + | 'INFORMATION' + | 'ANON_BLOCK' + | 'TRANSACTION' + | 'UNKNOWN'; export interface ParamTypes { positional?: boolean; diff --git a/src/parser.ts b/src/parser.ts index 17727f0..b185904 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -87,9 +87,9 @@ export const EXECUTION_TYPES: Record = { ALTER_FUNCTION: 'MODIFICATION', ALTER_INDEX: 'MODIFICATION', ALTER_PROCEDURE: 'MODIFICATION', - BEGIN_TRANSACTION: 'MODIFICATION', - COMMIT: 'MODIFICATION', - ROLLBACK: 'MODIFICATION', + BEGIN_TRANSACTION: 'TRANSACTION', + COMMIT: 'TRANSACTION', + ROLLBACK: 'TRANSACTION', UNKNOWN: 'UNKNOWN', ANON_BLOCK: 'ANON_BLOCK', }; diff --git a/test/identifier/multiple-statement.spec.ts b/test/identifier/multiple-statement.spec.ts index 5765c89..a55b620 100644 --- a/test/identifier/multiple-statement.spec.ts +++ b/test/identifier/multiple-statement.spec.ts @@ -395,7 +395,7 @@ describe('identifier', () => { end: 17, text: statements[0], type: 'BEGIN_TRANSACTION', - executionType: 'MODIFICATION', + executionType: 'TRANSACTION', parameters: [], tables: [], }, @@ -413,7 +413,7 @@ describe('identifier', () => { end: 35, text: statements[2], type: 'COMMIT', - executionType: 'MODIFICATION', + executionType: 'TRANSACTION', parameters: [], tables: [], }, @@ -433,7 +433,7 @@ describe('identifier', () => { end: 17 + offset, text: statements[0], type: 'BEGIN_TRANSACTION', - executionType: 'MODIFICATION', + executionType: 'TRANSACTION', parameters: [], tables: [], }, @@ -451,7 +451,7 @@ describe('identifier', () => { end: 35 + offset, text: statements[2], type: 'COMMIT', - executionType: 'MODIFICATION', + executionType: 'TRANSACTION', parameters: [], tables: [], }, diff --git a/test/index.spec.ts b/test/index.spec.ts index f57935f..668624c 100644 --- a/test/index.spec.ts +++ b/test/index.spec.ts @@ -109,13 +109,17 @@ describe('getExecutionType', () => { expect(getExecutionType('SELECT')).to.equal('LISTING'); }); - ['UPDATE', 'DELETE', 'INSERT', 'TRUNCATE', 'BEGIN_TRANSACTION', 'COMMIT', 'ROLLBACK'].forEach( - (type) => { - it(`should return MODIFICATION for ${type}`, () => { - expect(getExecutionType(type)).to.equal('MODIFICATION'); - }); - }, - ); + ['UPDATE', 'DELETE', 'INSERT', 'TRUNCATE'].forEach((type) => { + it(`should return MODIFICATION for ${type}`, () => { + expect(getExecutionType(type)).to.equal('MODIFICATION'); + }); + }); + + ['BEGIN_TRANSACTION', 'COMMIT', 'ROLLBACK'].forEach((type) => { + it(`should return TRANSACTION for ${type}`, () => { + expect(getExecutionType(type)).to.equal('TRANSACTION'); + }); + }); ['CREATE', 'DROP', 'ALTER'].forEach((action) => { ['DATABASE', 'SCHEMA', 'TABLE', 'VIEW', 'FUNCTION', 'TRIGGER'].forEach((type) => { @@ -170,7 +174,7 @@ describe('Transaction statements', () => { end: 16, text: 'BEGIN TRANSACTION', type: 'BEGIN_TRANSACTION', - executionType: 'MODIFICATION', + executionType: 'TRANSACTION', parameters: [], tables: [], }, @@ -184,7 +188,7 @@ describe('Transaction statements', () => { end: 5, text: 'BEGIN;', type: 'BEGIN_TRANSACTION', - executionType: 'MODIFICATION', + executionType: 'TRANSACTION', parameters: [], tables: [], }, @@ -198,7 +202,7 @@ describe('Transaction statements', () => { end: 16, text: 'START TRANSACTION', type: 'BEGIN_TRANSACTION', - executionType: 'MODIFICATION', + executionType: 'TRANSACTION', parameters: [], tables: [], }, @@ -212,7 +216,7 @@ describe('Transaction statements', () => { end: 5, text: 'COMMIT', type: 'COMMIT', - executionType: 'MODIFICATION', + executionType: 'TRANSACTION', parameters: [], tables: [], }, @@ -226,7 +230,7 @@ describe('Transaction statements', () => { end: 7, text: 'ROLLBACK', type: 'ROLLBACK', - executionType: 'MODIFICATION', + executionType: 'TRANSACTION', parameters: [], tables: [], }, @@ -246,4 +250,10 @@ describe('Transaction statements', () => { }, ]); }); + + it('should not identify START REPLICA as a transaction', () => { + expect(() => identify('START REPLICA;', { dialect: 'mysql' })).to.throw( + `Invalid statement parser "START"`, + ); + }); }); From fd06187c7e02fbf52fdde924fe9684426334ab25 Mon Sep 17 00:00:00 2001 From: Day Matchullis Date: Mon, 24 Nov 2025 22:38:37 -0700 Subject: [PATCH 4/4] added more test cases --- test/index.spec.ts | 142 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) diff --git a/test/index.spec.ts b/test/index.spec.ts index 668624c..557cd86 100644 --- a/test/index.spec.ts +++ b/test/index.spec.ts @@ -256,4 +256,146 @@ describe('Transaction statements', () => { `Invalid statement parser "START"`, ); }); + + it('Should identify ANSI-ish / generic transaction start syntaxes', () => { + expect(identify('START TRANSACTION;', { dialect: 'generic' })).to.eql([ + { + start: 0, + end: 17, + text: 'START TRANSACTION;', + type: 'BEGIN_TRANSACTION', + executionType: 'TRANSACTION', + parameters: [], + tables: [], + }, + ]); + + expect(identify('BEGIN;', { dialect: 'generic' })).to.eql([ + { + start: 0, + end: 5, + text: 'BEGIN;', + type: 'BEGIN_TRANSACTION', + executionType: 'TRANSACTION', + parameters: [], + tables: [], + }, + ]); + }); + + it('Should identify MySQL/MariaDB style transaction start syntaxes', () => { + expect(identify('START TRANSACTION;', { dialect: 'mysql' })).to.eql([ + { + start: 0, + end: 17, + text: 'START TRANSACTION;', + type: 'BEGIN_TRANSACTION', + executionType: 'TRANSACTION', + parameters: [], + tables: [], + }, + ]); + + expect(identify('BEGIN;', { dialect: 'mysql' })).to.eql([ + { + start: 0, + end: 5, + text: 'BEGIN;', + type: 'BEGIN_TRANSACTION', + executionType: 'TRANSACTION', + parameters: [], + tables: [], + }, + ]); + + expect(identify('BEGIN WORK;', { dialect: 'mysql' })).to.eql([ + { + start: 0, + end: 10, + text: 'BEGIN WORK;', + type: 'BEGIN_TRANSACTION', + executionType: 'TRANSACTION', + parameters: [], + tables: [], + }, + ]); + + expect(identify('START TRANSACTION READ ONLY;', { dialect: 'mysql' })).to.eql([ + { + start: 0, + end: 27, + text: 'START TRANSACTION READ ONLY;', + type: 'BEGIN_TRANSACTION', + executionType: 'TRANSACTION', + parameters: [], + tables: [], + }, + ]); + + expect( + identify('START TRANSACTION ISOLATION LEVEL SERIALIZABLE;', { dialect: 'mysql' }), + ).to.eql([ + { + start: 0, + end: 46, + text: 'START TRANSACTION ISOLATION LEVEL SERIALIZABLE;', + type: 'BEGIN_TRANSACTION', + executionType: 'TRANSACTION', + parameters: [], + tables: [], + }, + ]); + }); + + it('Should identify Postgres style transaction start syntaxes', () => { + expect(identify('BEGIN;', { dialect: 'psql' })).to.eql([ + { + start: 0, + end: 5, + text: 'BEGIN;', + type: 'BEGIN_TRANSACTION', + executionType: 'TRANSACTION', + parameters: [], + tables: [], + }, + ]); + + expect(identify('START TRANSACTION;', { dialect: 'psql' })).to.eql([ + { + start: 0, + end: 17, + text: 'START TRANSACTION;', + type: 'BEGIN_TRANSACTION', + executionType: 'TRANSACTION', + parameters: [], + tables: [], + }, + ]); + + expect(identify('BEGIN TRANSACTION;', { dialect: 'psql' })).to.eql([ + { + start: 0, + end: 17, + text: 'BEGIN TRANSACTION;', + type: 'BEGIN_TRANSACTION', + executionType: 'TRANSACTION', + parameters: [], + tables: [], + }, + ]); + + expect( + identify('BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;', { dialect: 'psql' }), + ).to.eql([ + { + start: 0, + end: 49, + text: 'BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;', + type: 'BEGIN_TRANSACTION', + executionType: 'TRANSACTION', + parameters: [], + tables: [], + }, + ]); + }); });