diff --git a/src/column-parser.ts b/src/column-parser.ts new file mode 100644 index 0000000..0d1a432 --- /dev/null +++ b/src/column-parser.ts @@ -0,0 +1,355 @@ +import { ColumnReference, Dialect, Token } from './defines'; + +// States for skipping MSSQL's TOP clause: SELECT TOP n [PERCENT] [WITH TIES] +// The tokenizer emits digits as individual single-character 'unknown' tokens, +// so CONSUMING_BARE_VALUE keeps consuming until a non-digit token appears. +const enum TopState { + NONE = 0, // Not in a TOP clause + EXPECTING_VALUE = 1, // Seen TOP, expecting a number or '(' + CONSUMING_NUM = 2, // Inside a bare numeric value (e.g., consuming '1','0' for TOP 10) + INSIDE_PARENS = 3, // Inside TOP(...), waiting for closing ')' + AFTER_VALUE = 4, // Consumed the TOP value, may see PERCENT / WITH TIES + AFTER_PERCENT = 5, // Seen PERCENT, may still see WITH TIES + EXPECTING_TIES = 6, // Seen WITH, expecting TIES +} + +// States for skipping PostgreSQL's DISTINCT ON (...) clause: +// SELECT DISTINCT ON (expr [, ...]) col1, col2 ... +const enum DistinctOnState { + NONE = 0, // Not in a DISTINCT ON clause + EXPECTING_ON = 1, // Seen DISTINCT, expecting ON (or not — plain DISTINCT is valid too) + EXPECTING_OPEN_PAREN = 2, // Seen ON, expecting '(' + INSIDE_PARENS = 3, // Inside ON(...), waiting for closing ')' +} + +export class ColumnParser { + private parts: string[] = []; + private currentPart = ''; + private alias?: string; + private waitingForAlias = false; + private parensDepth = 0; + private skipCurrent = false; + private finished = false; + private existing: Set = new Set(); + + // State for skipping MSSQL TOP clause + private topState: TopState = TopState.NONE; + private topParensDepth = 0; + + // State for skipping PostgreSQL DISTINCT ON (...) clause + private distinctOnState: DistinctOnState = DistinctOnState.NONE; + private distinctOnParensDepth = 0; + + constructor(private dialect: Dialect) {} + + private STOP_KEYWORDS: Set = new Set([ + 'FROM', + 'WHERE', + 'GROUP', + 'ORDER', + 'HAVING', + 'LIMIT', + 'OFFSET', + 'UNION', + 'INTERSECT', + 'EXCEPT', + ]); + + shouldStop(): boolean { + return this.finished; + } + + resetState(): void { + this.parts = []; + this.currentPart = ''; + this.alias = undefined; + this.waitingForAlias = false; + this.skipCurrent = false; + } + + /** + * Handles MSSQL TOP clause skipping. Returns true if the token was consumed + * by the TOP state machine (i.e., should not be processed as a column token). + */ + private processTopToken(token: Token): boolean { + const upper = token.value.toUpperCase(); + + switch (this.topState) { + case TopState.EXPECTING_VALUE: + if (token.value === '(') { + this.topParensDepth = 1; + this.topState = TopState.INSIDE_PARENS; + } else { + // Bare value — the tokenizer emits digits as individual characters, + // so we enter CONSUMING_BARE_VALUE to eat all remaining digit tokens + this.topState = TopState.CONSUMING_NUM; + } + return true; + + case TopState.CONSUMING_NUM: + // Keep consuming digit characters; stop when we see a non-digit + if (/^\d+$/.test(token.value)) { + return true; + } + // Non-digit token — the bare number is fully consumed, transition to AFTER_VALUE + // and fall through to let AFTER_VALUE handle this token + this.topState = TopState.AFTER_VALUE; + return this.processTopToken(token); + + case TopState.INSIDE_PARENS: + if (token.value === '(') { + this.topParensDepth++; + } else if (token.value === ')') { + this.topParensDepth--; + if (this.topParensDepth === 0) { + this.topState = TopState.AFTER_VALUE; + } + } + return true; + + case TopState.AFTER_VALUE: + if (upper === 'PERCENT') { + this.topState = TopState.AFTER_PERCENT; + return true; + } else if (upper === 'WITH') { + this.topState = TopState.EXPECTING_TIES; + return true; + } + // Not a TOP modifier -- done skipping, let normal parsing handle this token + this.topState = TopState.NONE; + return false; + + case TopState.AFTER_PERCENT: + if (upper === 'WITH') { + this.topState = TopState.EXPECTING_TIES; + return true; + } + // Done skipping + this.topState = TopState.NONE; + return false; + + case TopState.EXPECTING_TIES: + if (upper === 'TIES') { + this.topState = TopState.NONE; + return true; + } + // 'WITH' was not followed by 'TIES' -- done skipping, process this token normally + this.topState = TopState.NONE; + return false; + + default: + return false; + } + } + + /** + * Handles PostgreSQL DISTINCT ON (...) clause skipping. Returns true if the + * token was consumed by the state machine (i.e., should not be processed as + * a column token). + */ + private processDistinctOnToken(token: Token): boolean { + const upper = token.value.toUpperCase(); + + switch (this.distinctOnState) { + case DistinctOnState.EXPECTING_ON: + if (upper === 'ON') { + this.distinctOnState = DistinctOnState.EXPECTING_OPEN_PAREN; + return true; + } + // Not ON — this is a plain DISTINCT (already skipped), let normal parsing handle this token + this.distinctOnState = DistinctOnState.NONE; + return false; + + case DistinctOnState.EXPECTING_OPEN_PAREN: + if (token.value === '(') { + this.distinctOnParensDepth = 1; + this.distinctOnState = DistinctOnState.INSIDE_PARENS; + return true; + } + // No opening paren — unexpected, bail out + this.distinctOnState = DistinctOnState.NONE; + return false; + + case DistinctOnState.INSIDE_PARENS: + if (token.value === '(') { + this.distinctOnParensDepth++; + } else if (token.value === ')') { + this.distinctOnParensDepth--; + if (this.distinctOnParensDepth === 0) { + this.distinctOnState = DistinctOnState.NONE; + } + } + return true; + + default: + return false; + } + } + + processToken( + token: Token, + prevToken?: Token, + prevNonWhitespaceToken?: Token, + ): ColumnReference | null { + // Skip MSSQL TOP clause tokens + if (this.topState !== TopState.NONE) { + if (this.processTopToken(token)) { + return null; + } + } + + // Skip PostgreSQL DISTINCT ON (...) clause tokens + if (this.distinctOnState !== DistinctOnState.NONE) { + if (this.processDistinctOnToken(token)) { + return null; + } + } + + if (this.STOP_KEYWORDS.has(token.value.toUpperCase())) { + this.finished = true; + return this.finalizeReference(); + } else if (token.value.toUpperCase() === 'DISTINCT') { + // Skip distinct keyword; for psql, also watch for DISTINCT ON (...) + if (this.dialect === 'psql') { + this.distinctOnState = DistinctOnState.EXPECTING_ON; + } + } else if ( + this.dialect === 'mssql' && + token.value.toUpperCase() === 'TOP' && + this.topState === TopState.NONE + ) { + // Enter TOP-skipping mode for MSSQL dialect + this.topState = TopState.EXPECTING_VALUE; + } else if (token.value === '(') { + if (this.parensDepth === 0) { + this.skipCurrent = true; + } + this.parensDepth++; + } else if (token.value === ')') { + this.parensDepth--; + } else if (token.type === 'keyword' && token.value.toUpperCase() === 'AS') { + this.waitingForAlias = true; + } else if ( + this.waitingForAlias && + token.type !== 'comment-inline' && + token.type !== 'comment-block' + ) { + this.alias = token.value; + this.waitingForAlias = false; + } else if (token.value === ',' && this.parensDepth === 0) { + return this.finalizeReference(); + } else if (token.value === '.' && this.parensDepth === 0) { + // Separator, keep building but don't add to parts + } else if ( + token.type !== 'comment-inline' && + token.type !== 'comment-block' && + this.parensDepth === 0 && + !this.waitingForAlias + ) { + if (prevNonWhitespaceToken?.value === '.' && !!this.currentPart) { + this.parts.push(this.currentPart); + this.currentPart = token.value; + } else { + if ( + (this.parts.length > 0 || !!this.currentPart) && + prevNonWhitespaceToken?.value !== '.' && + prevNonWhitespaceToken?.value !== ',' && + prevToken?.type === 'whitespace' && + this.maybeIdent(token) + ) { + if (!this.alias) { + this.alias = token.value; + } + } else { + this.currentPart += token.value; + } + } + } + + return null; + } + + flush(): ColumnReference | null { + if (!this.finished) { + return this.finalizeReference(); + } + return null; + } + + private finalizeReference(): ColumnReference | null { + const ref = this.buildReference(); + this.resetState(); + if (ref && !this.exists(ref)) { + this.addRef(ref); + return ref; + } + return null; + } + + buildReference(): ColumnReference | null { + if ((this.parts.length <= 0 && !this.currentPart) || this.skipCurrent) { + return null; + } + + if (this.currentPart) { + this.parts.push(this.currentPart); + } + + let col: ColumnReference | null = null; + + if (this.parts.length === 1) { + const name = this.parts[0]; + col = { + name, + isWildcard: name === '*', + }; + } else if (this.parts.length === 2) { + const [table, name] = this.parts; + col = { + name, + table, + isWildcard: name === '*', + }; + } else if (this.parts.length === 3) { + const [schema, table, name] = this.parts; + col = { + name, + table, + schema, + isWildcard: name === '*', + }; + } else { + const fullName = this.parts.join('.'); + col = { + name: fullName, + isWildcard: false, + }; + } + + if (!!this.alias && !!col) { + col.alias = this.alias; + } + + return col; + } + + exists(other: ColumnReference): boolean { + return this.existing.has(this.getIdentString(other)); + } + + addRef(col: ColumnReference): void { + this.existing.add(this.getIdentString(col)); + } + + getIdentString(col: ColumnReference): string { + return `${col.schema ?? 'none'}.${col.table ?? 'none'}.${col.name ?? 'none'}:${ + col.alias ?? 'none' + }`; + } + + private maybeIdent(token: Token): boolean { + const ch = token.value[0]; + const startChars = this.dialect === 'mssql' ? ['"', '['] : ['"', '`']; + return token.type !== 'string' && (startChars.includes(ch) || /[a-zA-Z_]/.test(ch)); + } +} diff --git a/src/defines.ts b/src/defines.ts index f26ecda..5fc2f15 100644 --- a/src/defines.ts +++ b/src/defines.ts @@ -94,10 +94,26 @@ export interface ParamTypes { custom?: string[]; } +export interface ColumnReference { + name: string; + alias?: string; + table?: string; + schema?: string; + isWildcard: boolean; +} + +export interface TableReference { + name: string; + schema?: string; + database?: string; + alias?: string; +} + export interface IdentifyOptions { strict?: boolean; dialect?: Dialect; identifyTables?: boolean; + identifyColumns?: boolean; paramTypes?: ParamTypes; } @@ -108,7 +124,8 @@ export interface IdentifyResult { type: StatementType; executionType: ExecutionType; parameters: string[]; - tables: string[]; + tables: TableReference[]; + columns: ColumnReference[]; } export interface Statement { @@ -122,7 +139,8 @@ export interface Statement { algorithm?: number; sqlSecurity?: number; parameters: string[]; - tables: string[]; + tables: TableReference[]; + columns: ColumnReference[]; isCte?: boolean; } diff --git a/src/index.ts b/src/index.ts index f600339..f5c10f3 100644 --- a/src/index.ts +++ b/src/index.ts @@ -24,7 +24,14 @@ export function identify(query: string, options: IdentifyOptions = {}): Identify // Default parameter types for each dialect const paramTypes = options.paramTypes || defaultParamTypesFor(dialect); - const result = parse(query, isStrict, dialect, options.identifyTables, paramTypes); + const result = parse( + query, + isStrict, + dialect, + options.identifyTables, + options.identifyColumns, + paramTypes, + ); const sort = dialect === 'psql' && !options.paramTypes; return result.body.map((statement) => { @@ -37,6 +44,7 @@ export function identify(query: string, options: IdentifyOptions = {}): Identify // we want to sort the postgres params: $1 $2 $3, regardless of the order they appear parameters: sort ? statement.parameters.sort() : statement.parameters, tables: statement.tables || [], + columns: statement.columns || [], }; return result; }); diff --git a/src/parser.ts b/src/parser.ts index b185904..c180c78 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -11,10 +11,13 @@ import type { ConcreteStatement, ParamTypes, } from './defines'; +import { ColumnParser } from './column-parser'; +import { TableParser } from './table-parser'; interface StatementParser { addToken: (token: Token, nextToken: Token) => void; getStatement: () => Statement; + flush: () => void; } /** @@ -102,10 +105,6 @@ const statementsWithEnds = [ 'UNKNOWN', ]; -// keywords that come directly before a table name. -// v1 - keeping it very simple. -const PRE_TABLE_KEYWORDS = /^from$|^join$|^into$/i; - const blockOpeners: Record = { generic: ['BEGIN', 'CASE'], psql: ['BEGIN', 'CASE', 'LOOP', 'IF'], @@ -120,6 +119,7 @@ interface ParseOptions { isStrict: boolean; dialect: Dialect; identifyTables: boolean; + identifyColumns: boolean; } function createInitialStatement(): Statement { @@ -128,6 +128,7 @@ function createInitialStatement(): Statement { end: 0, parameters: [], tables: [], + columns: [], }; } @@ -148,6 +149,7 @@ export function parse( isStrict = true, dialect: Dialect = 'generic', identifyTables = false, + identifyColumns = false, paramTypes?: ParamTypes, ): ParseResult { const topLevelState = initState({ input }); @@ -211,6 +213,7 @@ export function parse( executionType: 'UNKNOWN', parameters: [], tables: [], + columns: [], }); cteState.isCte = false; cteState.asSeen = false; @@ -252,6 +255,7 @@ export function parse( isStrict, dialect, identifyTables, + identifyColumns, }); if (cteState.isCte) { statementParser.getStatement().start = cteState.state.start; @@ -274,6 +278,7 @@ export function parse( const statement = statementParser.getStatement(); if (statement.endStatement) { + statementParser.flush(); statement.end = token.end; topLevelStatement.body.push(statement as ConcreteStatement); statementParser = null; @@ -283,6 +288,7 @@ export function parse( // last statement without ending key if (statementParser) { + statementParser.flush(); const statement = statementParser.getStatement(); if (!statement.endStatement) { statement.end = topLevelStatement.end; @@ -812,7 +818,7 @@ function createUnknownStatementParser(options: ParseOptions) { function stateMachineStatementParser( statement: Statement, steps: Step[], - { isStrict, dialect, identifyTables }: ParseOptions, + { isStrict, dialect, identifyTables, identifyColumns }: ParseOptions, ): StatementParser { let currentStepIndex = 0; let prevToken: Token | undefined; @@ -823,6 +829,9 @@ function stateMachineStatementParser( let openBlocks = 0; + const columnParser = new ColumnParser(dialect); + const tableParser = new TableParser(); + /* eslint arrow-body-style: 0, no-extra-parens: 0 */ const isValidToken = (step: Step, token: Token) => { if (!step.validation) { @@ -851,6 +860,21 @@ function stateMachineStatementParser( return statement; }, + flush() { + if (identifyTables) { + const table = tableParser.flush(); + if (table) { + statement.tables.push(table); + } + } + if (identifyColumns) { + const column = columnParser.flush(); + if (column) { + statement.columns.push(column); + } + } + }, + addToken(token: Token, nextToken: Token) { /* eslint no-param-reassign: 0 */ if (statement.endStatement) { @@ -912,15 +936,17 @@ function stateMachineStatementParser( } } - if ( - identifyTables && - PRE_TABLE_KEYWORDS.exec(token.value) && - !statement.isCte && - statement.type?.match(/SELECT|INSERT/) - ) { - const tableValue = nextToken.value; - if (!statement.tables.includes(tableValue)) { - statement.tables.push(tableValue); + if (identifyTables && !statement.isCte && statement.type?.match(/SELECT|INSERT/)) { + const table = tableParser.processToken(token, nextToken); + if (table) { + statement.tables.push(table); + } + } + + if (identifyColumns && statement.type === 'SELECT' && !columnParser.shouldStop()) { + const ref = columnParser.processToken(token, prevToken, prevNonWhitespaceToken); + if (ref) { + statement.columns.push(ref); } } diff --git a/src/table-parser.ts b/src/table-parser.ts new file mode 100644 index 0000000..0a5861b --- /dev/null +++ b/src/table-parser.ts @@ -0,0 +1,185 @@ +import { TableReference, Token } from './defines'; + +export class TableParser { + private parts: string[] = []; + private alias?: string; + private existing: Set = new Set(); + private parsing = false; + private waitingForAlias = false; + private maybeCommaSep = false; + private parensDepth = 0; + + // keywords that come directly before a table name. + // v1 - keeping it very simple. + private PRE_TABLE_KEYWORDS = new Set(['FROM', 'JOIN', 'INTO']); + + // Tokens that indicate "no alias follows" when we're in the pending state. + // If we see one of these after a table name, we finalize without an alias. + private NON_ALIAS_KEYWORDS = new Set([ + 'ON', + 'WHERE', + 'SET', + 'VALUES', + 'GROUP', + 'ORDER', + 'HAVING', + 'LIMIT', + 'OFFSET', + 'UNION', + 'INTERSECT', + 'EXCEPT', + 'LEFT', + 'RIGHT', + 'INNER', + 'CROSS', + 'FULL', + 'OUTER', + 'NATURAL', + 'FROM', + 'JOIN', + 'INTO', + ]); + + resetState(): void { + this.parts = []; + this.alias = undefined; + this.parsing = false; + this.waitingForAlias = false; + this.maybeCommaSep = false; + this.parensDepth = 0; + } + + processToken(token: Token, nextToken: Token): TableReference | null { + const upper = token.value.toUpperCase(); + + if (this.maybeCommaSep && token.value === ',') { + this.parsing = true; + this.maybeCommaSep = false; + return null; + } + + // Waiting for the alias token (after AS or implicit) + if (this.waitingForAlias) { + if (upper === 'AS') { + return null; + } + this.alias = token.value; + const ref = this.finalizeReference(); + if (nextToken.value === ',') { + this.maybeCommaSep = true; + } + return ref; + } + + // Actively collecting table name parts + if (this.parsing) { + const val = token.value; + if (val === '(') { + this.parensDepth++; + } else if (val === ')') { + this.parensDepth--; + } else if (this.parensDepth === 0) { + if (val !== '.') { + this.parts.push(val); + } + if (val !== '.' && nextToken.value !== '.') { + const nextUpper = nextToken.value.toUpperCase(); + if ( + this.NON_ALIAS_KEYWORDS.has(nextUpper) || + nextToken.type === 'semicolon' || + nextToken.value === ',' || + nextToken.value === '(' || + nextToken.value === ')' + ) { + const ref = this.finalizeReference(); + if (nextToken.value === ',') { + this.maybeCommaSep = true; + } + return ref; + } + this.parsing = false; + this.waitingForAlias = true; + return null; + } + } + } else if (this.PRE_TABLE_KEYWORDS.has(upper)) { + this.parsing = true; + } + + return null; + } + + /** + * Flush any pending table reference that hasn't been finalized yet. + * Called when the statement ends (semicolon or end of input). + */ + flush(): TableReference | null { + if (this.waitingForAlias || this.parsing) { + return this.finalizeReference(); + } + return null; + } + + private finalizeReference(): TableReference | null { + const ref = this.buildReference(); + this.resetState(); + if (ref && !this.exists(ref)) { + this.addRef(ref); + return ref; + } + return null; + } + + buildReference(): TableReference | null { + if (this.parts.length <= 0) { + return null; + } + + let table: TableReference | null = null; + + if (this.parts.length === 1) { + const name = this.parts[0]; + table = { + name, + }; + } else if (this.parts.length === 2) { + const [schema, name] = this.parts; + table = { + name, + schema, + }; + } else if (this.parts.length === 3) { + const [database, schema, name] = this.parts; + table = { + name, + schema, + database, + }; + } else { + const fullName = this.parts.join('.'); + table = { + name: fullName, + }; + } + + if (!!this.alias && !!table) { + table.alias = this.alias; + } + + return table; + } + + exists(other: TableReference): boolean { + return this.existing.has(this.getIdentString(other)); + } + + addRef(table: TableReference): void { + this.existing.add(this.getIdentString(table)); + } + + getIdentString(table: TableReference): string { + return `${table.database ?? 'none'}.${table.schema ?? 'none'}.${table.name ?? 'none'}:${ + table.alias ?? 'none' + }`; + } +} diff --git a/src/tokenizer.ts b/src/tokenizer.ts index a21fb49..5422704 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -409,9 +409,9 @@ function scanWord(state: State): Token { do { nextChar = read(state); - } while (isLetter(nextChar)); + } while (isAlphaNumeric(nextChar)); - if (nextChar !== null && !isLetter(nextChar)) { + if (nextChar !== null && !isAlphaNumeric(nextChar)) { unread(state); } diff --git a/test/identifier/columns.spec.ts b/test/identifier/columns.spec.ts new file mode 100644 index 0000000..0128404 --- /dev/null +++ b/test/identifier/columns.spec.ts @@ -0,0 +1,988 @@ +import { expect } from 'chai'; + +import { identify } from '../../src'; + +describe('identifier', () => { + describe('column identification', () => { + describe('when identifyColumns is false or not provided', () => { + it('should return empty columns array when option is false', () => { + const actual = identify('SELECT * FROM Persons', { identifyColumns: false }); + expect(actual[0].columns).to.eql([]); + }); + + it('should return empty columns array when option is not provided', () => { + const actual = identify('SELECT * FROM Persons'); + expect(actual[0].columns).to.eql([]); + }); + }); + + describe('basic column identification', () => { + it('should identify wildcard', () => { + const actual = identify('SELECT * FROM Persons', { identifyColumns: true }); + expect(actual[0].columns).to.eql([{ name: '*', isWildcard: true }]); + }); + + it('should identify single column', () => { + const actual = identify('SELECT column_1 FROM Persons', { identifyColumns: true }); + expect(actual[0].columns).to.eql([{ name: 'column_1', isWildcard: false }]); + }); + + it('should identify multiple columns', () => { + const actual = identify('SELECT column_1, column_2 FROM Persons', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'column_1', isWildcard: false }, + { name: 'column_2', isWildcard: false }, + ]); + }); + + it('should identify column with alias using AS', () => { + const actual = identify('SELECT column_2 AS hello FROM Persons', { identifyColumns: true }); + expect(actual[0].columns).to.eql([{ name: 'column_2', alias: 'hello', isWildcard: false }]); + }); + + it('should identify column with implicit alias (no AS)', () => { + const actual = identify('SELECT column_1 col1 FROM Persons', { identifyColumns: true }); + expect(actual[0].columns).to.eql([{ name: 'column_1', alias: 'col1', isWildcard: false }]); + }); + + it('should identify multiple columns with aliases', () => { + const actual = identify('SELECT id AS user_id, name AS username FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'id', alias: 'user_id', isWildcard: false }, + { name: 'name', alias: 'username', isWildcard: false }, + ]); + }); + + it('should handle DISTINCT keyword', () => { + const actual = identify('SELECT DISTINCT column_1 FROM Persons', { identifyColumns: true }); + expect(actual[0].columns).to.eql([{ name: 'column_1', isWildcard: false }]); + }); + + it('should handle DISTINCT with multiple columns', () => { + const actual = identify('SELECT DISTINCT column_1, column_2 FROM Persons', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'column_1', isWildcard: false }, + { name: 'column_2', isWildcard: false }, + ]); + }); + + it('should handle DISTINCT with wildcard', () => { + const actual = identify('SELECT DISTINCT * FROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([{ name: '*', isWildcard: true }]); + }); + + it('should handle DISTINCT with qualified columns', () => { + const actual = identify('SELECT DISTINCT users.id, users.name FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'id', table: 'users', isWildcard: false }, + { name: 'name', table: 'users', isWildcard: false }, + ]); + }); + + it('should handle DISTINCT with alias', () => { + const actual = identify('SELECT DISTINCT column_1 AS col FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([{ name: 'column_1', alias: 'col', isWildcard: false }]); + }); + + it('should handle DISTINCT with qualified wildcard', () => { + const actual = identify('SELECT DISTINCT users.* FROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([{ name: '*', table: 'users', isWildcard: true }]); + }); + }); + + describe('MSSQL TOP clause', () => { + it('should skip TOP with integer', () => { + const actual = identify('SELECT TOP 10 name, id FROM users', { + identifyColumns: true, + dialect: 'mssql', + }); + expect(actual[0].columns).to.eql([ + { name: 'name', isWildcard: false }, + { name: 'id', isWildcard: false }, + ]); + }); + + it('should skip TOP with parenthesized integer', () => { + const actual = identify('SELECT TOP (10) name, id FROM users', { + identifyColumns: true, + dialect: 'mssql', + }); + expect(actual[0].columns).to.eql([ + { name: 'name', isWildcard: false }, + { name: 'id', isWildcard: false }, + ]); + }); + + it('should skip TOP with PERCENT', () => { + const actual = identify('SELECT TOP 10 PERCENT name, id FROM users', { + identifyColumns: true, + dialect: 'mssql', + }); + expect(actual[0].columns).to.eql([ + { name: 'name', isWildcard: false }, + { name: 'id', isWildcard: false }, + ]); + }); + + it('should skip TOP with parenthesized PERCENT', () => { + const actual = identify('SELECT TOP (10) PERCENT name, id FROM users', { + identifyColumns: true, + dialect: 'mssql', + }); + expect(actual[0].columns).to.eql([ + { name: 'name', isWildcard: false }, + { name: 'id', isWildcard: false }, + ]); + }); + + it('should skip TOP with WITH TIES', () => { + const actual = identify('SELECT TOP 10 WITH TIES name, id FROM users', { + identifyColumns: true, + dialect: 'mssql', + }); + expect(actual[0].columns).to.eql([ + { name: 'name', isWildcard: false }, + { name: 'id', isWildcard: false }, + ]); + }); + + it('should skip TOP with parenthesized WITH TIES', () => { + const actual = identify('SELECT TOP (10) WITH TIES name, id FROM users', { + identifyColumns: true, + dialect: 'mssql', + }); + expect(actual[0].columns).to.eql([ + { name: 'name', isWildcard: false }, + { name: 'id', isWildcard: false }, + ]); + }); + + it('should skip TOP with PERCENT and WITH TIES', () => { + const actual = identify('SELECT TOP 10 PERCENT WITH TIES name, id FROM users', { + identifyColumns: true, + dialect: 'mssql', + }); + expect(actual[0].columns).to.eql([ + { name: 'name', isWildcard: false }, + { name: 'id', isWildcard: false }, + ]); + }); + + it('should skip TOP with parenthesized PERCENT and WITH TIES', () => { + const actual = identify('SELECT TOP (10) PERCENT WITH TIES name, id FROM users', { + identifyColumns: true, + dialect: 'mssql', + }); + expect(actual[0].columns).to.eql([ + { name: 'name', isWildcard: false }, + { name: 'id', isWildcard: false }, + ]); + }); + + it('should skip TOP with parenthesized expression', () => { + const actual = identify('SELECT TOP (@n) name, id FROM users', { + identifyColumns: true, + dialect: 'mssql', + }); + expect(actual[0].columns).to.eql([ + { name: 'name', isWildcard: false }, + { name: 'id', isWildcard: false }, + ]); + }); + + it('should handle DISTINCT with TOP', () => { + const actual = identify('SELECT DISTINCT TOP 10 name FROM users', { + identifyColumns: true, + dialect: 'mssql', + }); + expect(actual[0].columns).to.eql([{ name: 'name', isWildcard: false }]); + }); + + it('should handle TOP with wildcard', () => { + const actual = identify('SELECT TOP 10 * FROM users', { + identifyColumns: true, + dialect: 'mssql', + }); + expect(actual[0].columns).to.eql([{ name: '*', isWildcard: true }]); + }); + + it('should handle TOP with qualified columns', () => { + const actual = identify('SELECT TOP 5 u.name, u.id FROM users u', { + identifyColumns: true, + dialect: 'mssql', + }); + expect(actual[0].columns).to.eql([ + { name: 'name', table: 'u', isWildcard: false }, + { name: 'id', table: 'u', isWildcard: false }, + ]); + }); + + it('should handle TOP with column alias', () => { + const actual = identify('SELECT TOP 10 name AS n, id FROM users', { + identifyColumns: true, + dialect: 'mssql', + }); + expect(actual[0].columns).to.eql([ + { name: 'name', alias: 'n', isWildcard: false }, + { name: 'id', isWildcard: false }, + ]); + }); + }); + + describe('table-qualified columns', () => { + it('should identify table.column', () => { + const actual = identify('SELECT users.name FROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([{ name: 'name', table: 'users', isWildcard: false }]); + }); + + it('should identify table.*', () => { + const actual = identify('SELECT users.* FROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([{ name: '*', table: 'users', isWildcard: true }]); + }); + + it('should identify multiple table-qualified columns', () => { + const actual = identify('SELECT users.name, orders.id FROM users JOIN orders', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'name', table: 'users', isWildcard: false }, + { name: 'id', table: 'orders', isWildcard: false }, + ]); + }); + + it('should identify multiple wildcards from different tables', () => { + const actual = identify('SELECT users.*, orders.* FROM users JOIN orders', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: '*', table: 'users', isWildcard: true }, + { name: '*', table: 'orders', isWildcard: true }, + ]); + }); + + it('should identify table-qualified column with alias', () => { + const actual = identify('SELECT users.name AS username FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'name', table: 'users', alias: 'username', isWildcard: false }, + ]); + }); + }); + + describe('schema-qualified columns', () => { + it('should identify schema.table.column', () => { + const actual = identify('SELECT public.users.name FROM public.users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'name', schema: 'public', table: 'users', isWildcard: false }, + ]); + }); + + it('should identify schema.table.*', () => { + const actual = identify('SELECT public.users.* FROM public.users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: '*', schema: 'public', table: 'users', isWildcard: true }, + ]); + }); + + it('should identify multiple schema-qualified columns', () => { + const actual = identify( + 'SELECT public.users.name, dbo.orders.id FROM public.users JOIN dbo.orders', + { identifyColumns: true }, + ); + expect(actual[0].columns).to.eql([ + { name: 'name', schema: 'public', table: 'users', isWildcard: false }, + { name: 'id', schema: 'dbo', table: 'orders', isWildcard: false }, + ]); + }); + + it('should identify schema.table.column with alias', () => { + const actual = identify('SELECT public.users.name AS username FROM public.users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'name', schema: 'public', table: 'users', alias: 'username', isWildcard: false }, + ]); + }); + }); + + describe('wildcard edge cases', () => { + it('should identify wildcard mixed with regular column before it', () => { + const actual = identify('SELECT id, * FROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([ + { name: 'id', isWildcard: false }, + { name: '*', isWildcard: true }, + ]); + }); + + it('should identify wildcard mixed with regular column after it', () => { + const actual = identify('SELECT *, id FROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([ + { name: '*', isWildcard: true }, + { name: 'id', isWildcard: false }, + ]); + }); + + it('should identify wildcard between columns', () => { + const actual = identify('SELECT id, *, name FROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([ + { name: 'id', isWildcard: false }, + { name: '*', isWildcard: true }, + { name: 'name', isWildcard: false }, + ]); + }); + + it('should identify unqualified and qualified wildcards together', () => { + const actual = identify('SELECT *, users.* FROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([ + { name: '*', isWildcard: true }, + { name: '*', table: 'users', isWildcard: true }, + ]); + }); + + it('should identify multiple qualified wildcards', () => { + const actual = identify('SELECT users.*, orders.*, products.* FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: '*', table: 'users', isWildcard: true }, + { name: '*', table: 'orders', isWildcard: true }, + { name: '*', table: 'products', isWildcard: true }, + ]); + }); + + it('should identify schema-qualified wildcards mixed with unqualified', () => { + const actual = identify('SELECT *, public.users.* FROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([ + { name: '*', isWildcard: true }, + { name: '*', schema: 'public', table: 'users', isWildcard: true }, + ]); + }); + }); + + describe('function calls', () => { + // Functions with parentheses are skipped in simple mode. + // Only actual column references and wildcards are captured. + + it('should skip COUNT(*) as expression', () => { + const actual = identify('SELECT COUNT(*) FROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([]); + }); + + it('should skip function with column argument', () => { + const actual = identify('SELECT SUM(price) FROM orders', { identifyColumns: true }); + expect(actual[0].columns).to.eql([]); + }); + + it('should skip multiple functions', () => { + const actual = identify('SELECT COUNT(*), SUM(price) FROM orders', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([]); + }); + + it('should skip function with alias', () => { + const actual = identify('SELECT COUNT(*) AS total FROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([]); + }); + + it('should skip UPPER function with alias', () => { + const actual = identify('SELECT UPPER(name) AS upper_name FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([]); + }); + + it('should identify columns but skip functions when mixed', () => { + const actual = identify('SELECT id, name, COUNT(*) AS total FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'id', isWildcard: false }, + { name: 'name', isWildcard: false }, + ]); + }); + + it('should skip nested functions', () => { + const actual = identify('SELECT UPPER(LOWER(name)) FROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([]); + }); + + it('should skip function with multiple arguments', () => { + const actual = identify('SELECT COALESCE(col1, col2, col3) FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([]); + }); + + it('should skip function with qualified column argument', () => { + const actual = identify('SELECT COUNT(users.id) FROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([]); + }); + + it('should skip function with schema-qualified column argument', () => { + const actual = identify('SELECT SUM(public.orders.amount) FROM public.orders', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([]); + }); + + it('should skip string concatenation function', () => { + const actual = identify('SELECT CONCAT(first_name, last_name) FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([]); + }); + + it('should skip aggregate with DISTINCT inside parentheses', () => { + const actual = identify('SELECT COUNT(DISTINCT user_id) FROM orders', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([]); + }); + + it('should skip multiple nested function calls', () => { + const actual = identify('SELECT ROUND(AVG(price), 2) FROM products', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([]); + }); + + it('should skip triply nested functions', () => { + const actual = identify("SELECT COALESCE(UPPER(TRIM(name)), 'UNKNOWN') FROM users", { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([]); + }); + }); + + describe('parentheses without functions', () => { + it('should skip parenthesized expression', () => { + const actual = identify('SELECT (price * 1.1) FROM products', { identifyColumns: true }); + expect(actual[0].columns).to.eql([]); + }); + + it('should skip parenthesized column reference', () => { + const actual = identify('SELECT (id) FROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([]); + }); + + it('should handle regular columns mixed with parenthesized expressions', () => { + const actual = identify('SELECT id, (price * 1.1), name FROM products', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'id', isWildcard: false }, + { name: 'name', isWildcard: false }, + ]); + }); + }); + + describe('queries with different clauses', () => { + it('should stop parsing at FROM clause', () => { + const actual = identify('SELECT column_1 FROM Persons WHERE id = 1', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([{ name: 'column_1', isWildcard: false }]); + }); + + it('should stop parsing at WHERE clause (no FROM)', () => { + const actual = identify('SELECT column_1 WHERE 1=1', { identifyColumns: true }); + expect(actual[0].columns).to.eql([{ name: 'column_1', isWildcard: false }]); + }); + + it('should stop parsing at GROUP BY', () => { + const actual = identify('SELECT column_1, COUNT(*) FROM users GROUP BY column_1', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([{ name: 'column_1', isWildcard: false }]); + }); + + it('should stop parsing at ORDER BY', () => { + const actual = identify('SELECT column_1 FROM users ORDER BY column_1', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([{ name: 'column_1', isWildcard: false }]); + }); + + it('should stop parsing at HAVING', () => { + const actual = identify('SELECT COUNT(*) FROM users HAVING COUNT(*) > 10', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([]); + }); + + it('should stop parsing at LIMIT', () => { + const actual = identify('SELECT column_1 FROM users LIMIT 10', { identifyColumns: true }); + expect(actual[0].columns).to.eql([{ name: 'column_1', isWildcard: false }]); + }); + + it('should stop parsing at UNION', () => { + const actual = identify('SELECT column_1 FROM users UNION SELECT column_2 FROM orders', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([{ name: 'column_1', isWildcard: false }]); + }); + + it('should stop parsing at UNION ALL', () => { + const actual = identify( + 'SELECT column_1 FROM users UNION ALL SELECT column_2 FROM orders', + { + identifyColumns: true, + }, + ); + expect(actual[0].columns).to.eql([{ name: 'column_1', isWildcard: false }]); + }); + + it('should handle multiple columns before UNION', () => { + const actual = identify( + 'SELECT id, name, email FROM users UNION SELECT id, title, author FROM posts', + { + identifyColumns: true, + }, + ); + expect(actual[0].columns).to.eql([ + { name: 'id', isWildcard: false }, + { name: 'name', isWildcard: false }, + { name: 'email', isWildcard: false }, + ]); + }); + + it('should stop parsing with no FROM clause before WHERE', () => { + const actual = identify('SELECT 1, 2, 3 WHERE 1=1', { identifyColumns: true }); + expect(actual[0].columns).to.eql([ + { name: '1', isWildcard: false }, + { name: '2', isWildcard: false }, + { name: '3', isWildcard: false }, + ]); + }); + }); + + describe('edge cases', () => { + it('should handle query with quoted identifier', () => { + const actual = identify('SELECT "column name" FROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([{ name: '"column name"', isWildcard: false }]); + }); + + it('should handle query with backtick quoted identifier', () => { + const actual = identify('SELECT `column name` FROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([{ name: '`column name`', isWildcard: false }]); + }); + + it('should handle inline comments in column list', () => { + const actual = identify('SELECT column_1, /* comment */ column_2 FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'column_1', isWildcard: false }, + { name: 'column_2', isWildcard: false }, + ]); + }); + + it('should handle line comments in column list', () => { + const actual = identify('SELECT column_1, -- comment\ncolumn_2 FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'column_1', isWildcard: false }, + { name: 'column_2', isWildcard: false }, + ]); + }); + + describe('quoted identifiers with special characters', () => { + it('should handle quoted identifier with dots inside', () => { + const actual = identify('SELECT "column.with.dots" FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([{ name: '"column.with.dots"', isWildcard: false }]); + }); + + it('should handle backtick identifier with dots inside', () => { + const actual = identify('SELECT `column.with.dots` FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([{ name: '`column.with.dots`', isWildcard: false }]); + }); + + it('should handle mixed quoted and unquoted columns', () => { + const actual = identify('SELECT "first name", last_name, "middle name" FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: '"first name"', isWildcard: false }, + { name: 'last_name', isWildcard: false }, + { name: '"middle name"', isWildcard: false }, + ]); + }); + + it('should handle quoted identifier with alias', () => { + const actual = identify('SELECT "column name" AS col FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: '"column name"', alias: 'col', isWildcard: false }, + ]); + }); + + it('should handle qualified quoted identifier', () => { + const actual = identify('SELECT users."column name" FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: '"column name"', table: 'users', isWildcard: false }, + ]); + }); + }); + + describe('duplicate column handling', () => { + it('should deduplicate identical unqualified columns', () => { + const actual = identify('SELECT column_1, column_1 FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([{ name: 'column_1', isWildcard: false }]); + }); + + it('should deduplicate identical qualified columns', () => { + const actual = identify('SELECT users.id, users.id FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([{ name: 'id', table: 'users', isWildcard: false }]); + }); + + it('should keep columns with different aliases', () => { + const actual = identify('SELECT column_1 AS first, column_1 AS second FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'column_1', alias: 'first', isWildcard: false }, + { name: 'column_1', alias: 'second', isWildcard: false }, + ]); + }); + + it('should keep same column name from different tables', () => { + const actual = identify('SELECT users.id, orders.id FROM users JOIN orders', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'id', table: 'users', isWildcard: false }, + { name: 'id', table: 'orders', isWildcard: false }, + ]); + }); + + it('should deduplicate wildcard', () => { + const actual = identify('SELECT *, * FROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([{ name: '*', isWildcard: true }]); + }); + + it('should deduplicate qualified wildcard', () => { + const actual = identify('SELECT users.*, users.* FROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([{ name: '*', table: 'users', isWildcard: true }]); + }); + + it('should not deduplicate columns with one qualified and one unqualified', () => { + const actual = identify('SELECT id, users.id FROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([ + { name: 'id', isWildcard: false }, + { name: 'id', table: 'users', isWildcard: false }, + ]); + }); + }); + }); + + describe('combined with identifyTables', () => { + it('should identify both tables and columns', () => { + const actual = identify('SELECT id, name FROM users', { + identifyTables: true, + identifyColumns: true, + }); + expect(actual[0].tables).to.eql([{ name: 'users' }]); + expect(actual[0].columns).to.eql([ + { name: 'id', isWildcard: false }, + { name: 'name', isWildcard: false }, + ]); + }); + + it('should identify both with JOIN', () => { + const actual = identify('SELECT users.id, orders.total FROM users JOIN orders', { + identifyTables: true, + identifyColumns: true, + }); + expect(actual[0].tables).to.eql([{ name: 'users' }, { name: 'orders' }]); + expect(actual[0].columns).to.eql([ + { name: 'id', table: 'users', isWildcard: false }, + { name: 'total', table: 'orders', isWildcard: false }, + ]); + }); + }); + + describe('alias variations', () => { + it('should identify qualified column with implicit alias', () => { + const actual = identify('SELECT users.name username FROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([ + { name: 'name', table: 'users', alias: 'username', isWildcard: false }, + ]); + }); + + it('should identify schema-qualified column with implicit alias', () => { + const actual = identify('SELECT public.users.name username FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'name', schema: 'public', table: 'users', alias: 'username', isWildcard: false }, + ]); + }); + + it('should identify multiple columns with same name but different aliases', () => { + const actual = identify('SELECT id AS user_id, id AS order_id FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'id', alias: 'user_id', isWildcard: false }, + { name: 'id', alias: 'order_id', isWildcard: false }, + ]); + }); + + it('should handle reserved word as quoted alias', () => { + const actual = identify('SELECT column_1 AS "select" FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'column_1', alias: '"select"', isWildcard: false }, + ]); + }); + + it('should handle alias with special characters', () => { + const actual = identify('SELECT id AS "user-id" FROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([{ name: 'id', alias: '"user-id"', isWildcard: false }]); + }); + + it('should handle backtick alias', () => { + const actual = identify('SELECT id AS `user id` FROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([{ name: 'id', alias: '`user id`', isWildcard: false }]); + }); + + it('should handle mixed explicit and implicit aliases', () => { + const actual = identify('SELECT id AS user_id, name username, email FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'id', alias: 'user_id', isWildcard: false }, + { name: 'name', alias: 'username', isWildcard: false }, + { name: 'email', isWildcard: false }, + ]); + }); + }); + + describe('whitespace and formatting', () => { + it('should handle extra spaces around commas', () => { + const actual = identify('SELECT id , name , email FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'id', isWildcard: false }, + { name: 'name', isWildcard: false }, + { name: 'email', isWildcard: false }, + ]); + }); + + it('should handle newlines between columns', () => { + const actual = identify('SELECT\nid,\nname,\nemail\nFROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([ + { name: 'id', isWildcard: false }, + { name: 'name', isWildcard: false }, + { name: 'email', isWildcard: false }, + ]); + }); + + it('should handle tabs between columns', () => { + const actual = identify('SELECT\tid,\tname\tFROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([ + { name: 'id', isWildcard: false }, + { name: 'name', isWildcard: false }, + ]); + }); + + it('should handle mixed whitespace', () => { + const actual = identify('SELECT id,\n\t name FROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([ + { name: 'id', isWildcard: false }, + { name: 'name', isWildcard: false }, + ]); + }); + + it('should handle no whitespace around dots in qualified columns', () => { + const actual = identify('SELECT users.id,orders.total FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'id', table: 'users', isWildcard: false }, + { name: 'total', table: 'orders', isWildcard: false }, + ]); + }); + + it('should handle excessive whitespace in qualified columns', () => { + const actual = identify('SELECT users . id , orders . total FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'id', table: 'users', isWildcard: false }, + { name: 'total', table: 'orders', isWildcard: false }, + ]); + }); + }); + + describe('complex mixed scenarios', () => { + it('should handle columns, wildcards, and functions mixed together', () => { + const actual = identify('SELECT id, users.*, COUNT(*), name FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'id', isWildcard: false }, + { name: '*', table: 'users', isWildcard: true }, + { name: 'name', isWildcard: false }, + ]); + }); + + it('should handle multiple qualified wildcards with regular columns', () => { + const actual = identify('SELECT users.*, orders.id, orders.total, products.* FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: '*', table: 'users', isWildcard: true }, + { name: 'id', table: 'orders', isWildcard: false }, + { name: 'total', table: 'orders', isWildcard: false }, + { name: '*', table: 'products', isWildcard: true }, + ]); + }); + + it('should handle all qualification levels in one query', () => { + const actual = identify('SELECT id, users.name, public.orders.total, * FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'id', isWildcard: false }, + { name: 'name', table: 'users', isWildcard: false }, + { name: 'total', schema: 'public', table: 'orders', isWildcard: false }, + { name: '*', isWildcard: true }, + ]); + }); + + it('should handle columns with functions interspersed', () => { + const actual = identify( + 'SELECT id, COUNT(*), name, SUM(price), email, MAX(created_at) FROM users', + { identifyColumns: true }, + ); + expect(actual[0].columns).to.eql([ + { name: 'id', isWildcard: false }, + { name: 'name', isWildcard: false }, + { name: 'email', isWildcard: false }, + ]); + }); + + it('should handle schema-qualified columns with functions', () => { + const actual = identify( + 'SELECT public.users.id, COUNT(*), dbo.orders.total, SUM(amount) FROM users', + { identifyColumns: true }, + ); + expect(actual[0].columns).to.eql([ + { name: 'id', schema: 'public', table: 'users', isWildcard: false }, + { name: 'total', schema: 'dbo', table: 'orders', isWildcard: false }, + ]); + }); + + it('should handle DISTINCT with mixed column types and functions', () => { + const actual = identify('SELECT DISTINCT id, users.name, COUNT(*), * FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'id', isWildcard: false }, + { name: 'name', table: 'users', isWildcard: false }, + { name: '*', isWildcard: true }, + ]); + }); + + it('should handle all features combined: DISTINCT, qualified, wildcards, aliases, functions', () => { + const actual = identify( + 'SELECT DISTINCT id AS user_id, users.*, public.orders.total AS total, COUNT(*), name FROM users', + { identifyColumns: true }, + ); + expect(actual[0].columns).to.eql([ + { name: 'id', alias: 'user_id', isWildcard: false }, + { name: '*', table: 'users', isWildcard: true }, + { name: 'total', schema: 'public', table: 'orders', alias: 'total', isWildcard: false }, + { name: 'name', isWildcard: false }, + ]); + }); + }); + + describe('long and unusual column names', () => { + it('should handle very long column name', () => { + const longName = 'a'.repeat(100); + const actual = identify(`SELECT ${longName} FROM users`, { identifyColumns: true }); + expect(actual[0].columns).to.eql([{ name: longName, isWildcard: false }]); + }); + + it('should handle very long alias', () => { + const longAlias = 'b'.repeat(100); + const actual = identify(`SELECT id AS ${longAlias} FROM users`, { identifyColumns: true }); + expect(actual[0].columns).to.eql([{ name: 'id', alias: longAlias, isWildcard: false }]); + }); + + it('should handle column name with underscores', () => { + const actual = identify('SELECT _col_name_, __private__, column_name_123 FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: '_col_name_', isWildcard: false }, + { name: '__private__', isWildcard: false }, + { name: 'column_name_123', isWildcard: false }, + ]); + }); + + it('should handle column name with numbers', () => { + const actual = identify('SELECT col1, col2, col123, column1name FROM users', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([ + { name: 'col1', isWildcard: false }, + { name: 'col2', isWildcard: false }, + { name: 'col123', isWildcard: false }, + { name: 'column1name', isWildcard: false }, + ]); + }); + }); + + describe('non-SELECT statements', () => { + it('should not identify columns for INSERT', () => { + const actual = identify('INSERT INTO users (id, name) VALUES (1, "test")', { + identifyColumns: true, + }); + expect(actual[0].columns).to.eql([]); + }); + + it('should not identify columns for UPDATE', () => { + const actual = identify('UPDATE users SET name = "test"', { identifyColumns: true }); + expect(actual[0].columns).to.eql([]); + }); + + it('should not identify columns for DELETE', () => { + const actual = identify('DELETE FROM users', { identifyColumns: true }); + expect(actual[0].columns).to.eql([]); + }); + }); + }); +}); diff --git a/test/identifier/edge-cases-misidentified-references.spec.ts b/test/identifier/edge-cases-misidentified-references.spec.ts new file mode 100644 index 0000000..3ba3ba9 --- /dev/null +++ b/test/identifier/edge-cases-misidentified-references.spec.ts @@ -0,0 +1,44 @@ +import { expect } from 'chai'; + +import { identify } from '../../src'; + +describe('edge cases — misidentified references', () => { + describe('column parser', () => { + // Valid ANSI SQL — arithmetic expressions in SELECT are standard + it('should not treat arithmetic operator as alias', () => { + const actual = identify('SELECT a + b FROM t', { identifyColumns: true }); + // Actual: [{name:'a', alias:'+'}] — the + operator is misidentified as an alias + const columns = actual[0].columns; + const hasPlus = columns.some((col: { alias?: string }) => col.alias === '+'); + expect(hasPlus).to.equal(false); + }); + + // Valid MSSQL — TOP is a MSSQL-specific clause (SQL Server) + it('should not misidentify MSSQL TOP as a column', () => { + const actual = identify('SELECT TOP 10 name, id FROM users', { + identifyColumns: true, + dialect: 'mssql', + }); + expect(actual[0].columns).to.eql([ + { name: 'name', isWildcard: false }, + { name: 'id', isWildcard: false }, + ]); + }); + }); + + describe('table parser', () => { + // Valid ANSI SQL — derived table / subquery in FROM is standard SQL + it('should not produce garbage from subquery in FROM', () => { + const actual = identify('SELECT * FROM (SELECT id FROM users) AS subquery', { + identifyTables: true, + }); + // Actual: [{name:'(', alias:'SELECT'}, {name:'users'}] + // The '(' is misidentified as a table name, 'SELECT' as its alias + const tables = actual[0].tables; + tables.forEach((t: { name: string }) => { + expect(t.name).to.not.equal('('); + expect(t.name).to.not.equal('SELECT'); + }); + }); + }); +}); diff --git a/test/identifier/edge-cases-missed-references.spec.ts b/test/identifier/edge-cases-missed-references.spec.ts new file mode 100644 index 0000000..d2dd14e --- /dev/null +++ b/test/identifier/edge-cases-missed-references.spec.ts @@ -0,0 +1,147 @@ +import { expect } from 'chai'; + +import { identify } from '../../src'; + +describe('edge cases — missed references', () => { + describe('column parser', () => { + // Valid ANSI SQL — SELECT without FROM is allowed (e.g. SELECT 1+1) + it('should not lose last column in SELECT without FROM (multiple columns)', () => { + const actual = identify('SELECT a, b, c', { identifyColumns: true }); + // Actual: [{name:'a'}, {name:'b'}] — last column 'c' is lost (no flush at end of input) + expect(actual[0].columns).to.eql([ + { name: 'a', isWildcard: false }, + { name: 'b', isWildcard: false }, + { name: 'c', isWildcard: false }, + ]); + }); + + // Valid ANSI SQL — single column SELECT without FROM + it('should not lose single column in SELECT without FROM', () => { + const actual = identify('SELECT a', { identifyColumns: true }); + // Actual: [] — the only column is never flushed + expect(actual[0].columns).to.eql([{ name: 'a', isWildcard: false }]); + }); + + // Valid ANSI SQL — CASE expressions are standard SQL-92+ + it('should identify id column alongside CASE expression', () => { + const actual = identify( + "SELECT id, CASE WHEN status = 1 THEN 'active' ELSE 'inactive' END AS status_text FROM users", + { identifyColumns: true }, + ); + const columns = actual[0].columns; + expect(columns[0]).to.eql({ name: 'id', isWildcard: false }); + }); + + // Valid MSSQL — TOP is a MSSQL-specific clause + it('should not lose columns after MSSQL TOP clause', () => { + const actual = identify('SELECT TOP 10 name, id FROM users', { + identifyColumns: true, + dialect: 'mssql', + }); + // Actual: [{name:'TOP0', alias:'1'}, {name:'id'}] — 'name' is lost + const colNames = actual[0].columns.map((col: { name: string }) => col.name); + expect(colNames).to.include('name'); + expect(colNames).to.include('id'); + }); + + // Valid PostgreSQL — DISTINCT ON is PostgreSQL-specific (9.0+) + it('should not lose columns after PostgreSQL DISTINCT ON', () => { + const actual = identify('SELECT DISTINCT ON (id) name, email FROM users', { + identifyColumns: true, + dialect: 'psql', + }); + // Actual: [{name:'email'}] — 'name' is lost (ON absorbs into skipped parens expression) + const colNames = actual[0].columns.map((col: { name: string }) => col.name); + expect(colNames).to.include('name'); + expect(colNames).to.include('email'); + }); + + // Valid ANSI SQL — string literals in SELECT list are standard + it('should not lose columns after string literal', () => { + const actual = identify("SELECT 'hello' AS greeting, id FROM users", { + identifyColumns: true, + }); + const colNames = actual[0].columns.map((col: { name: string }) => col.name); + expect(colNames).to.include('id'); + }); + }); + + describe('table parser', () => { + // Valid ANSI SQL — comma-separated tables (implicit cross join) is SQL-89 + it('should find second table in comma-separated list', () => { + const actual = identify('SELECT * FROM a, b', { identifyTables: true }); + // Actual: [{name:'a'}] — 'b' is missed (no PRE_TABLE_KEYWORD after comma) + expect(actual[0].tables).to.eql([{ name: 'a' }, { name: 'b' }]); + }); + + // Valid ANSI SQL — multiple comma-separated tables + it('should find all three comma-separated tables', () => { + const actual = identify('SELECT * FROM a, b, c', { identifyTables: true }); + // Actual: [{name:'a'}] — 'b' and 'c' are missed + expect(actual[0].tables).to.eql([{ name: 'a' }, { name: 'b' }, { name: 'c' }]); + }); + + // Valid ANSI SQL — comma-separated tables with aliases + it('should find comma-separated tables with aliases', () => { + const actual = identify('SELECT * FROM users u, orders o', { identifyTables: true }); + // Actual: [{name:'users', alias:'u'}] — 'orders' is missed + expect(actual[0].tables).to.eql([ + { name: 'users', alias: 'u' }, + { name: 'orders', alias: 'o' }, + ]); + }); + + // These tests are features we don't necessarily need for v1, but can be added in the future + // // Valid ANSI SQL — CTEs (WITH clause) are standard SQL:1999+ + // it('should find table referenced from CTE', () => { + // const actual = identify('WITH cte AS (SELECT id FROM users) SELECT * FROM cte', { + // identifyTables: true, + // }); + // // Actual: [] — 'cte' not found (WITH not handled, FROM inside parens is skipped) + // const tableNames = actual[0].tables.map((t: { name: string }) => t.name); + // expect(tableNames).to.include('cte'); + // }); + + // // Valid ANSI SQL — UPDATE with table identification + // it('should find table in basic UPDATE statement', () => { + // const actual = identify('UPDATE users SET name = 1', { identifyTables: true }); + // // Actual: [] — UPDATE not in PRE_TABLE_KEYWORDS, so the table is never found + // const tableNames = actual[0].tables.map((t: { name: string }) => t.name); + // expect(tableNames).to.include('users'); + // }); + + // // Valid ANSI SQL — DELETE with table identification + // it('should find table in basic DELETE statement', () => { + // const actual = identify('DELETE FROM orders WHERE id = 1', { identifyTables: true }); + // // Actual: [] — even though FROM is a PRE_TABLE_KEYWORD, the table is not found + // // (likely a flush issue — DELETE FROM orders ends without a NON_ALIAS_KEYWORD) + // const tableNames = actual[0].tables.map((t: { name: string }) => t.name); + // expect(tableNames).to.include('orders'); + // }); + + // // Valid PostgreSQL — UPDATE ... FROM is PostgreSQL-specific + // it('should find both tables in UPDATE ... FROM (PostgreSQL)', () => { + // const actual = identify( + // 'UPDATE target SET col = source.col FROM source WHERE target.id = source.id', + // { identifyTables: true, dialect: 'psql' }, + // ); + // // Actual: [] — neither table found (UPDATE not in PRE_TABLE_KEYWORDS, + // // and the parser state prevents FROM from triggering after SET) + // const tableNames = actual[0].tables.map((t: { name: string }) => t.name); + // expect(tableNames).to.include('target'); + // expect(tableNames).to.include('source'); + // }); + + // // Valid PostgreSQL — DELETE ... USING is PostgreSQL-specific + // it('should find USING table in DELETE ... USING (PostgreSQL)', () => { + // const actual = identify('DELETE FROM orders USING users WHERE orders.user_id = users.id', { + // identifyTables: true, + // dialect: 'psql', + // }); + // // Actual: [] — 'orders' not found (flush issue), 'users' not found (USING not in PRE_TABLE_KEYWORDS) + // const tableNames = actual[0].tables.map((t: { name: string }) => t.name); + // expect(tableNames).to.include('orders'); + // expect(tableNames).to.include('users'); + // }); + }); +}); diff --git a/test/identifier/inner-statements.spec.ts b/test/identifier/inner-statements.spec.ts index cbbc98b..8fbcce0 100644 --- a/test/identifier/inner-statements.spec.ts +++ b/test/identifier/inner-statements.spec.ts @@ -17,6 +17,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; @@ -36,6 +37,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; @@ -57,6 +59,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; @@ -79,6 +82,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; diff --git a/test/identifier/multiple-statement.spec.ts b/test/identifier/multiple-statement.spec.ts index a55b620..76fa52f 100644 --- a/test/identifier/multiple-statement.spec.ts +++ b/test/identifier/multiple-statement.spec.ts @@ -17,6 +17,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, { end: 76, @@ -26,6 +27,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ]; @@ -47,6 +49,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, { start: 74, @@ -56,6 +59,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ]; @@ -80,6 +84,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, { start: 35, @@ -89,6 +94,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ]; @@ -112,6 +118,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, { start: 20, @@ -121,6 +128,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, { start: 50, @@ -130,6 +138,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ]; @@ -174,6 +183,7 @@ describe('identifier', () => { start: 11, text: 'DECLARE\n PK_NAME VARCHAR(200);\n\n BEGIN\n EXECUTE IMMEDIATE (\'CREATE SEQUENCE "untitled_table8_seq"\');\n\n SELECT\n cols.column_name INTO PK_NAME\n FROM\n all_constraints cons,\n all_cons_columns cols\n WHERE\n cons.constraint_type = \'P\'\n AND cons.constraint_name = cols.constraint_name\n AND cons.owner = cols.owner\n AND cols.table_name = \'untitled_table8\';\n\n execute immediate (\n \'create or replace trigger "untitled_table8_autoinc_trg" BEFORE INSERT on "untitled_table8" for each row declare checking number := 1; begin if (:new."\' || PK_NAME || \'" is null) then while checking >= 1 loop select "untitled_table8_seq".nextval into :new."\' || PK_NAME || \'" from dual; select count("\' || PK_NAME || \'") into checking from "untitled_table8" where "\' || PK_NAME || \'" = :new."\' || PK_NAME || \'"; end loop; end if; end;\'\n );\n\n END;', type: 'ANON_BLOCK', + columns: [], }, ]; expect(actual).to.eql(expected); @@ -222,6 +232,7 @@ describe('identifier', () => { start: 11, text: 'create table\n "untitled_table8" (\n "id" integer not null primary key,\n "created_at" varchar(255) not null\n );', type: 'CREATE_TABLE', + columns: [], }, { end: 1212, @@ -231,6 +242,7 @@ describe('identifier', () => { start: 180, text: 'DECLARE\n PK_NAME VARCHAR(200);\n\n BEGIN\n EXECUTE IMMEDIATE (\'CREATE SEQUENCE "untitled_table8_seq"\');\n\n SELECT\n cols.column_name INTO PK_NAME\n FROM\n all_constraints cons,\n all_cons_columns cols\n WHERE\n cons.constraint_type = \'P\'\n AND cons.constraint_name = cols.constraint_name\n AND cons.owner = cols.owner\n AND cols.table_name = \'untitled_table8\';\n\n execute immediate (\n \'create or replace trigger "untitled_table8_autoinc_trg" BEFORE INSERT on "untitled_table8" for each row declare checking number := 1; begin if (:new."\' || PK_NAME || \'" is null) then while checking >= 1 loop select "untitled_table8_seq".nextval into :new."\' || PK_NAME || \'" from dual; select count("\' || PK_NAME || \'") into checking from "untitled_table8" where "\' || PK_NAME || \'" = :new."\' || PK_NAME || \'"; end loop; end if; end;\'\n );\n\n END;', type: 'ANON_BLOCK', + columns: [], }, ]; expect(actual).to.eql(expected); @@ -261,6 +273,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, { start: 79, @@ -270,6 +283,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, { start: 250, @@ -279,6 +293,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ]; @@ -302,6 +317,7 @@ describe('identifier', () => { executionType: 'UNKNOWN', parameters: [], tables: [], + columns: [], }, { start: 54, @@ -311,6 +327,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ]; @@ -335,6 +352,7 @@ describe('identifier', () => { executionType: 'UNKNOWN', parameters: [], tables: [], + columns: [], }, { start: 6, @@ -344,6 +362,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ]; @@ -367,6 +386,7 @@ describe('identifier', () => { executionType: 'UNKNOWN', parameters: [], tables: [], + columns: [], }, { start: 24, @@ -376,6 +396,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ]; @@ -398,6 +419,7 @@ describe('identifier', () => { executionType: 'TRANSACTION', parameters: [], tables: [], + columns: [], }, { start: 19, @@ -407,6 +429,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, { start: 29, @@ -416,6 +439,7 @@ describe('identifier', () => { executionType: 'TRANSACTION', parameters: [], tables: [], + columns: [], }, ]; expect(actual).to.eql(expected); @@ -436,6 +460,7 @@ describe('identifier', () => { executionType: 'TRANSACTION', parameters: [], tables: [], + columns: [], }, { start: 19 + offset, @@ -445,6 +470,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, { start: 29 + offset, @@ -454,6 +480,7 @@ describe('identifier', () => { executionType: 'TRANSACTION', parameters: [], tables: [], + columns: [], }, ]; expect(actual).to.eql(expected); diff --git a/test/identifier/single-statement.spec.ts b/test/identifier/single-statement.spec.ts index f86c1d8..cd25e45 100644 --- a/test/identifier/single-statement.spec.ts +++ b/test/identifier/single-statement.spec.ts @@ -15,6 +15,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ]; @@ -32,6 +33,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ]; @@ -49,6 +51,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ]; @@ -66,6 +69,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ]; @@ -86,6 +90,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; @@ -111,6 +116,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; @@ -129,6 +135,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; @@ -149,6 +156,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; @@ -179,6 +187,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; @@ -216,6 +225,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; @@ -248,6 +258,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; @@ -270,6 +281,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; @@ -294,6 +306,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; expect(actual).to.eql(expected); @@ -320,6 +333,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; expect(actual).to.eql(expected); @@ -355,6 +369,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; expect(actual).to.eql(expected); @@ -373,6 +388,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; expect(actual).to.eql(expected); @@ -403,6 +419,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; expect(actual).to.eql(expected); @@ -431,6 +448,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], // FIXME: should return mydataset.customers + columns: [], }, ]; expect(actual).to.eql(expected); @@ -457,6 +475,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; expect(actual).to.eql(expected); @@ -493,6 +512,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; expect(actual).to.eql(expected); @@ -522,6 +542,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; expect(actual).to.eql(expected); @@ -597,6 +618,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ]; @@ -645,6 +667,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; expect(actual).to.eql(expected); @@ -696,6 +719,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; expect(actual).to.eql(expected); @@ -721,6 +745,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; @@ -740,6 +765,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; expect(actual).to.eql(expected); @@ -759,6 +785,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; expect(actual).to.eql(expected); @@ -793,6 +820,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; expect(actual).to.eql(expected); @@ -812,6 +840,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; expect(actual).to.eql(expected); @@ -829,6 +858,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; expect(actual).to.eql(expected); @@ -848,6 +878,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; expect(actual).to.eql(expected); @@ -869,6 +900,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; expect(actual).to.eql(expected); @@ -891,6 +923,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; @@ -916,6 +949,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; @@ -933,6 +967,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; @@ -950,6 +985,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; @@ -967,6 +1003,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; expect(actual).to.eql(expected); @@ -984,6 +1021,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; expect(actual).to.eql(expected); @@ -1001,6 +1039,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; expect(actual).to.eql(expected); @@ -1017,6 +1056,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; @@ -1034,6 +1074,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; @@ -1051,6 +1092,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; @@ -1070,6 +1112,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; expect(actual).to.eql(expected); @@ -1086,6 +1129,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; @@ -1113,6 +1157,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; @@ -1151,6 +1196,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ]; @@ -1173,6 +1219,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ]; @@ -1195,6 +1242,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ]; @@ -1215,6 +1263,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ]; @@ -1236,6 +1285,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ]; @@ -1259,6 +1309,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ]; @@ -1284,6 +1335,7 @@ describe('identifier', () => { executionType: 'UNKNOWN', parameters: [], tables: [], + columns: [], }, ]; @@ -1307,6 +1359,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ]; @@ -1329,6 +1382,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], // FIXME: should return 'table'? + columns: [], }, ]; @@ -1368,6 +1422,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ]; @@ -1397,6 +1452,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ]; @@ -1418,6 +1474,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: ['$1', '$2'], tables: [], + columns: [], }, ]; @@ -1438,6 +1495,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: ['$1', '$2', '$3', '$4'], tables: [], + columns: [], }, ]; @@ -1458,6 +1516,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [':one', ':two'], tables: [], + columns: [], }, ]; @@ -1478,6 +1537,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: [':one', ':two', ':three'], tables: [], + columns: [], }, ]; @@ -1498,6 +1558,7 @@ describe('identifier', () => { executionType: 'LISTING', parameters: ['?', '?', '?'], tables: [], + columns: [], }, ]; @@ -1518,6 +1579,7 @@ describe('identifier', () => { executionType: 'UNKNOWN', parameters: [], tables: [], + columns: [], }, ]; @@ -1543,6 +1605,7 @@ describe('identifier', () => { executionType: 'MODIFICATION', parameters: [], tables: [], + columns: [], }, ]; @@ -1561,6 +1624,7 @@ describe('identifier', () => { executionType: 'UNKNOWN', parameters: [], tables: [], + columns: [], }, ]; diff --git a/test/index.spec.ts b/test/index.spec.ts index 557cd86..861a2c5 100644 --- a/test/index.spec.ts +++ b/test/index.spec.ts @@ -19,6 +19,7 @@ describe('identify', () => { executionType: 'LISTING', parameters: ['$1', '$2'], tables: [], + columns: [], }, ]); }); @@ -42,6 +43,7 @@ describe('identify', () => { executionType: 'LISTING', parameters: ['?', '$1', ':fizzz', ':"buzz buzz"', '{fooo}'], tables: [], + columns: [], }, ]); }); @@ -62,6 +64,7 @@ describe('identify', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ]); }); @@ -83,6 +86,7 @@ describe('identify', () => { executionType: 'LISTING', parameters: ['$1'], tables: [], + columns: [], }, ]); }); @@ -98,10 +102,230 @@ describe('identify', () => { type: 'SELECT', executionType: 'LISTING', parameters: [], - tables: ['foo', 'bar'], + tables: [{ name: 'foo' }, { name: 'bar' }], + columns: [], }, ]); }); + + it('should identify tables and schema', () => { + expect( + identify('SELECT * FROM public.foo JOIN public.bar ON foo.id = bar.id', { + identifyTables: true, + }), + ).to.eql([ + { + start: 0, + end: 58, + text: 'SELECT * FROM public.foo JOIN public.bar ON foo.id = bar.id', + type: 'SELECT', + executionType: 'LISTING', + parameters: [], + tables: [ + { name: 'foo', schema: 'public' }, + { name: 'bar', schema: 'public' }, + ], + columns: [], + }, + ]); + }); + + describe('Table identification with qualified names', () => { + it('should identify single-part table names', () => { + const result = identify('SELECT * FROM users', { identifyTables: true }); + expect(result[0].tables).to.eql([{ name: 'users' }]); + }); + + it('should identify two-part qualified names (schema.table)', () => { + const result = identify('SELECT * FROM public.users', { identifyTables: true }); + expect(result[0].tables).to.eql([{ name: 'users', schema: 'public' }]); + }); + + it('should identify three-part qualified names (database.schema.table)', () => { + const result = identify('SELECT * FROM mydb.public.users', { identifyTables: true }); + expect(result[0].tables).to.eql([{ name: 'users', schema: 'public', database: 'mydb' }]); + }); + + it('should handle mixed qualification levels in JOINs', () => { + const result = identify( + 'SELECT * FROM users JOIN public.orders ON users.id = orders.user_id', + { identifyTables: true }, + ); + expect(result[0].tables).to.eql([{ name: 'users' }, { name: 'orders', schema: 'public' }]); + }); + + it('should identify multiple three-part qualified names', () => { + const result = identify('SELECT * FROM db1.schema1.table1 JOIN db2.schema2.table2', { + identifyTables: true, + }); + expect(result[0].tables).to.eql([ + { name: 'table1', schema: 'schema1', database: 'db1' }, + { name: 'table2', schema: 'schema2', database: 'db2' }, + ]); + }); + + it('should identify qualified table names in INSERT statements', () => { + const result = identify('INSERT INTO public.users (id, name) VALUES (1, "test")', { + identifyTables: true, + }); + expect(result[0].tables).to.eql([{ name: 'users', schema: 'public' }]); + }); + + it('should handle multiple JOINs with different qualification levels', () => { + const result = identify( + 'SELECT * FROM users u JOIN public.orders o ON u.id = o.user_id JOIN db.schema.products p ON o.product_id = p.id', + { identifyTables: true }, + ); + expect(result[0].tables).to.eql([ + { name: 'users', alias: 'u' }, + { name: 'orders', schema: 'public', alias: 'o' }, + { name: 'products', schema: 'schema', database: 'db', alias: 'p' }, + ]); + }); + + it('should not duplicate table references without aliases', () => { + const result = identify('SELECT * FROM users JOIN users ON users.id = users.manager_id', { + identifyTables: true, + }); + expect(result[0].tables).to.eql([{ name: 'users' }]); + }); + + it('should treat same table with different aliases as separate entries', () => { + const result = identify('SELECT * FROM users u1 JOIN users u2 ON u1.id = u2.manager_id', { + identifyTables: true, + }); + expect(result[0].tables).to.eql([ + { name: 'users', alias: 'u1' }, + { name: 'users', alias: 'u2' }, + ]); + }); + + it('should identify tables with LEFT JOIN', () => { + const result = identify( + 'SELECT * FROM public.customers LEFT JOIN orders ON customers.id = orders.customer_id', + { identifyTables: true }, + ); + expect(result[0].tables).to.eql([ + { name: 'customers', schema: 'public' }, + { name: 'orders' }, + ]); + }); + + it('should identify tables with RIGHT JOIN', () => { + const result = identify( + 'SELECT * FROM orders RIGHT JOIN db.schema.products ON orders.product_id = products.id', + { identifyTables: true }, + ); + expect(result[0].tables).to.eql([ + { name: 'orders' }, + { name: 'products', schema: 'schema', database: 'db' }, + ]); + }); + + it('should identify tables with INNER JOIN', () => { + const result = identify( + 'SELECT * FROM users INNER JOIN public.profiles ON users.id = profiles.user_id', + { identifyTables: true }, + ); + expect(result[0].tables).to.eql([{ name: 'users' }, { name: 'profiles', schema: 'public' }]); + }); + + it('should identify INSERT INTO with three-part qualified name', () => { + const result = identify('INSERT INTO mydb.dbo.employees (name, age) VALUES ("John", 30)', { + identifyTables: true, + }); + expect(result[0].tables).to.eql([{ name: 'employees', schema: 'dbo', database: 'mydb' }]); + }); + + it('should handle complex query with multiple qualification levels', () => { + const result = identify( + 'SELECT * FROM users JOIN public.orders ON users.id = orders.user_id JOIN db.schema.products ON orders.product_id = products.id', + { identifyTables: true }, + ); + expect(result[0].tables).to.eql([ + { name: 'users' }, + { name: 'orders', schema: 'public' }, + { name: 'products', schema: 'schema', database: 'db' }, + ]); + }); + }); + + describe('Table alias identification', () => { + it('should identify explicit AS alias', () => { + const result = identify('SELECT * FROM users AS u', { identifyTables: true }); + expect(result[0].tables).to.eql([{ name: 'users', alias: 'u' }]); + }); + + it('should identify implicit alias', () => { + const result = identify('SELECT * FROM users u', { identifyTables: true }); + expect(result[0].tables).to.eql([{ name: 'users', alias: 'u' }]); + }); + + it('should identify explicit alias on schema-qualified table', () => { + const result = identify('SELECT * FROM public.users AS u', { identifyTables: true }); + expect(result[0].tables).to.eql([{ name: 'users', schema: 'public', alias: 'u' }]); + }); + + it('should identify implicit alias on schema-qualified table', () => { + const result = identify('SELECT * FROM public.users u', { identifyTables: true }); + expect(result[0].tables).to.eql([{ name: 'users', schema: 'public', alias: 'u' }]); + }); + + it('should identify alias on three-part qualified table', () => { + const result = identify('SELECT * FROM mydb.public.users u', { identifyTables: true }); + expect(result[0].tables).to.eql([ + { name: 'users', schema: 'public', database: 'mydb', alias: 'u' }, + ]); + }); + + it('should identify explicit alias on three-part qualified table', () => { + const result = identify('SELECT * FROM mydb.public.users AS u', { identifyTables: true }); + expect(result[0].tables).to.eql([ + { name: 'users', schema: 'public', database: 'mydb', alias: 'u' }, + ]); + }); + + it('should not treat WHERE as an alias', () => { + const result = identify('SELECT * FROM users WHERE id = 1', { identifyTables: true }); + expect(result[0].tables).to.eql([{ name: 'users' }]); + }); + + it('should not treat ON as an alias', () => { + const result = identify('SELECT * FROM users JOIN orders ON users.id = orders.user_id', { + identifyTables: true, + }); + expect(result[0].tables).to.eql([{ name: 'users' }, { name: 'orders' }]); + }); + + it('should not treat JOIN keywords as an alias', () => { + const result = identify('SELECT * FROM users LEFT JOIN orders ON users.id = orders.user_id', { + identifyTables: true, + }); + expect(result[0].tables).to.eql([{ name: 'users' }, { name: 'orders' }]); + }); + + it('should handle mixed explicit and implicit aliases', () => { + const result = identify('SELECT * FROM users AS u JOIN public.orders o ON u.id = o.user_id', { + identifyTables: true, + }); + expect(result[0].tables).to.eql([ + { name: 'users', alias: 'u' }, + { name: 'orders', schema: 'public', alias: 'o' }, + ]); + }); + + it('should handle alias followed by WHERE clause', () => { + const result = identify('SELECT * FROM users u WHERE u.id = 1', { identifyTables: true }); + expect(result[0].tables).to.eql([{ name: 'users', alias: 'u' }]); + }); + + it('should not capture alias for INSERT INTO', () => { + const result = identify('INSERT INTO users (name) VALUES ("test")', { + identifyTables: true, + }); + expect(result[0].tables).to.eql([{ name: 'users' }]); + }); + }); }); describe('getExecutionType', () => { @@ -177,6 +401,7 @@ describe('Transaction statements', () => { executionType: 'TRANSACTION', parameters: [], tables: [], + columns: [], }, ]); }); @@ -191,6 +416,7 @@ describe('Transaction statements', () => { executionType: 'TRANSACTION', parameters: [], tables: [], + columns: [], }, ]); }); @@ -205,6 +431,7 @@ describe('Transaction statements', () => { executionType: 'TRANSACTION', parameters: [], tables: [], + columns: [], }, ]); }); @@ -219,6 +446,7 @@ describe('Transaction statements', () => { executionType: 'TRANSACTION', parameters: [], tables: [], + columns: [], }, ]); }); @@ -233,6 +461,7 @@ describe('Transaction statements', () => { executionType: 'TRANSACTION', parameters: [], tables: [], + columns: [], }, ]); }); @@ -247,6 +476,7 @@ describe('Transaction statements', () => { executionType: 'ANON_BLOCK', parameters: [], tables: [], + columns: [], }, ]); }); @@ -267,6 +497,7 @@ describe('Transaction statements', () => { executionType: 'TRANSACTION', parameters: [], tables: [], + columns: [], }, ]); @@ -279,6 +510,7 @@ describe('Transaction statements', () => { executionType: 'TRANSACTION', parameters: [], tables: [], + columns: [], }, ]); }); @@ -293,6 +525,7 @@ describe('Transaction statements', () => { executionType: 'TRANSACTION', parameters: [], tables: [], + columns: [], }, ]); @@ -305,6 +538,7 @@ describe('Transaction statements', () => { executionType: 'TRANSACTION', parameters: [], tables: [], + columns: [], }, ]); @@ -317,6 +551,7 @@ describe('Transaction statements', () => { executionType: 'TRANSACTION', parameters: [], tables: [], + columns: [], }, ]); @@ -329,6 +564,7 @@ describe('Transaction statements', () => { executionType: 'TRANSACTION', parameters: [], tables: [], + columns: [], }, ]); @@ -343,6 +579,7 @@ describe('Transaction statements', () => { executionType: 'TRANSACTION', parameters: [], tables: [], + columns: [], }, ]); }); @@ -357,6 +594,7 @@ describe('Transaction statements', () => { executionType: 'TRANSACTION', parameters: [], tables: [], + columns: [], }, ]); @@ -369,6 +607,7 @@ describe('Transaction statements', () => { executionType: 'TRANSACTION', parameters: [], tables: [], + columns: [], }, ]); @@ -381,6 +620,7 @@ describe('Transaction statements', () => { executionType: 'TRANSACTION', parameters: [], tables: [], + columns: [], }, ]); @@ -395,6 +635,7 @@ describe('Transaction statements', () => { executionType: 'TRANSACTION', parameters: [], tables: [], + columns: [], }, ]); }); diff --git a/test/parser/multiple-statements.spec.ts b/test/parser/multiple-statements.spec.ts index af638db..57a751b 100644 --- a/test/parser/multiple-statements.spec.ts +++ b/test/parser/multiple-statements.spec.ts @@ -25,6 +25,7 @@ describe('parser', () => { endStatement: ';', parameters: [], tables: [], + columns: [], }, { start: 56, @@ -33,6 +34,7 @@ describe('parser', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ], tokens: [ @@ -96,6 +98,7 @@ describe('parser', () => { endStatement: ';', parameters: [], tables: [], + columns: [], }, { start: 74, @@ -104,6 +107,7 @@ describe('parser', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ], tokens: [ diff --git a/test/parser/single-statements.spec.ts b/test/parser/single-statements.spec.ts index 4fa5b0b..37a4208 100644 --- a/test/parser/single-statements.spec.ts +++ b/test/parser/single-statements.spec.ts @@ -24,6 +24,7 @@ describe('parser', () => { end: 14, parameters: [], tables: [], + columns: [], type: 'UNKNOWN', executionType: 'UNKNOWN', }, @@ -45,6 +46,7 @@ describe('parser', () => { end: 19, parameters: [], tables: [], + columns: [], type: 'UNKNOWN', executionType: 'UNKNOWN', }, @@ -76,6 +78,7 @@ describe('parser', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ], tokens: [ @@ -114,6 +117,7 @@ describe('parser', () => { executionType: 'LISTING', parameters: [], tables: [], + columns: [], }, ], tokens: [ @@ -153,6 +157,7 @@ describe('parser', () => { endStatement: ';', parameters: [], tables: [], + columns: [], }, ], tokens: [ @@ -218,6 +223,7 @@ describe('parser', () => { endStatement: ';', parameters: [], tables: [], + columns: [], }, ], tokens: [ @@ -283,6 +289,7 @@ describe('parser', () => { endStatement: ';', parameters: [], tables: [], + columns: [], }, ], tokens: [ @@ -340,6 +347,7 @@ describe('parser', () => { endStatement: ';', parameters: [], tables: [], + columns: [], }, ], tokens: [ @@ -403,6 +411,7 @@ describe('parser', () => { endStatement: ';', parameters: [], tables: [], + columns: [], }, ], tokens: [ @@ -460,6 +469,7 @@ describe('parser', () => { endStatement: ';', parameters: [], tables: [], + columns: [], }, ], tokens: [ @@ -522,6 +532,7 @@ describe('parser', () => { endStatement: ';', parameters: [], tables: [], + columns: [], }, ], tokens: [ @@ -567,6 +578,7 @@ describe('parser', () => { endStatement: ';', parameters: [], tables: [], + columns: [], }, ], tokens: [ @@ -612,6 +624,7 @@ describe('parser', () => { endStatement: ';', parameters: [], tables: [], + columns: [], }, ], tokens: [ @@ -657,6 +670,7 @@ describe('parser', () => { endStatement: ';', parameters: [], tables: [], + columns: [], }, ], tokens: [ @@ -730,6 +744,7 @@ describe('parser', () => { true, 'psql', false, + false, defaultParamTypesFor('psql'), ); actual.tokens = aggregateUnknownTokens(actual.tokens); @@ -763,6 +778,7 @@ describe('parser', () => { true, 'psql', false, + false, defaultParamTypesFor('psql'), ); actual.tokens = aggregateUnknownTokens(actual.tokens); @@ -808,6 +824,7 @@ describe('parser', () => { true, 'mssql', false, + false, defaultParamTypesFor('mssql'), ); actual.tokens = aggregateUnknownTokens(actual.tokens); @@ -879,6 +896,7 @@ describe('parser', () => { true, 'mssql', false, + false, defaultParamTypesFor('mssql'), ); actual.tokens = aggregateUnknownTokens(actual.tokens);