Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,6 @@ We provide **JSON Schema** for `.sql-formatter.json` configuration file, enablin
- [Using the schema in VSCode](https://code.visualstudio.com/docs/languages/json#_mapping-in-the-user-settings)
- [Using the schema in Zed](https://zed.dev/docs/languages/json#schema-specification-via-settings)


### Usage as ESLint plugin

- Inside `eslint-plugin-sql` by using the rule [eslint-plugin-sql#format](https://github.com/gajus/eslint-plugin-sql#format).
Expand Down
1 change: 1 addition & 0 deletions src/allDialects.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
export { bigquery } from './languages/bigquery/bigquery.formatter.js';
export { clickhouse } from './languages/clickhouse/clickhouse.formatter.js';
export { db2 } from './languages/db2/db2.formatter.js';
export { db2i } from './languages/db2i/db2i.formatter.js';
export { duckdb } from './languages/duckdb/duckdb.formatter.js';
Expand Down
1 change: 1 addition & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export { ConfigError } from './validateConfig.js';

// When adding a new dialect, be sure to add it to the list of exports below.
export { bigquery } from './languages/bigquery/bigquery.formatter.js';
export { clickhouse } from './languages/clickhouse/clickhouse.formatter.js';
export { db2 } from './languages/db2/db2.formatter.js';
export { db2i } from './languages/db2i/db2i.formatter.js';
export { duckdb } from './languages/duckdb/duckdb.formatter.js';
Expand Down
364 changes: 364 additions & 0 deletions src/languages/clickhouse/clickhouse.formatter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,364 @@
import { DialectOptions } from '../../dialect.js';
import { expandPhrases } from '../../expandPhrases.js';
import { EOF_TOKEN, Token, TokenType } from '../../lexer/token.js';
import { functions } from './clickhouse.functions.js';
import { dataTypes, keywords } from './clickhouse.keywords.js';

const reservedSelect = expandPhrases([
'SELECT [DISTINCT]',
// https://clickhouse.com/docs/sql-reference/statements/alter/view
'MODIFY QUERY SELECT [DISTINCT]',
]);

const reservedClauses = expandPhrases([
'SET',
// https://clickhouse.com/docs/sql-reference/statements/select
'WITH',
'FROM',
'SAMPLE',
'PREWHERE',
'WHERE',
'GROUP BY',
'HAVING',
'QUALIFY',
'ORDER BY',
'LIMIT', // Note: Clickhouse has no OFFSET clause
'SETTINGS',
'INTO OUTFILE',
'FORMAT',
// https://clickhouse.com/docs/sql-reference/window-functions
'WINDOW',
'PARTITION BY',
// https://clickhouse.com/docs/sql-reference/statements/insert-into
'INSERT INTO',
'VALUES',
// https://clickhouse.com/docs/sql-reference/statements/create/view#refreshable-materialized-view
'DEPENDS ON',
// https://clickhouse.com/docs/sql-reference/statements/move
'MOVE {USER | ROLE | QUOTA | SETTINGS PROFILE | ROW POLICY}',
// https://clickhouse.com/docs/sql-reference/statements/grant
'GRANT',
// https://clickhouse.com/docs/sql-reference/statements/revoke
'REVOKE',
// https://clickhouse.com/docs/sql-reference/statements/check-grant
'CHECK GRANT',
// https://clickhouse.com/docs/sql-reference/statements/set-role
'SET [DEFAULT] ROLE [NONE | ALL | ALL EXCEPT]',
// https://clickhouse.com/docs/sql-reference/statements/optimize
'DEDUPLICATE BY',
// https://clickhouse.com/docs/sql-reference/statements/alter/statistics
'MODIFY STATISTICS',
// Used for ALTER INDEX ... TYPE and ALTER STATISTICS ... TYPE
'TYPE',
// https://clickhouse.com/docs/sql-reference/statements/alter
'ALTER USER [IF EXISTS]',
'ALTER [ROW] POLICY [IF EXISTS]',
// https://clickhouse.com/docs/sql-reference/statements/drop
'DROP {USER | ROLE | QUOTA | PROFILE | SETTINGS PROFILE | ROW POLICY | POLICY} [IF EXISTS]',
]);

const standardOnelineClauses = expandPhrases([
// https://clickhouse.com/docs/sql-reference/statements/create
'CREATE [OR REPLACE] [TEMPORARY] TABLE [IF NOT EXISTS]',
]);
const tabularOnelineClauses = expandPhrases([
'ALL EXCEPT',
'ON CLUSTER',
// https://clickhouse.com/docs/sql-reference/statements/update
'UPDATE',
// https://clickhouse.com/docs/sql-reference/statements/system
'SYSTEM RELOAD {DICTIONARIES | DICTIONARY | FUNCTIONS | FUNCTION | ASYNCHRONOUS METRICS}',
'SYSTEM DROP {DNS CACHE | MARK CACHE | ICEBERG METADATA CACHE | TEXT INDEX DICTIONARY CACHE | TEXT INDEX HEADER CACHE | TEXT INDEX POSTINGS CACHE | REPLICA | DATABASE REPLICA | UNCOMPRESSED CACHE | COMPILED EXPRESSION CACHE | QUERY CONDITION CACHE | QUERY CACHE | FORMAT SCHEMA CACHE | FILESYSTEM CACHE}',
'SYSTEM FLUSH LOGS',
'SYSTEM RELOAD {CONFIG | USERS}',
'SYSTEM SHUTDOWN',
'SYSTEM KILL',
'SYSTEM FLUSH DISTRIBUTED',
'SYSTEM START DISTRIBUTED SENDS',
'SYSTEM {STOP | START} {LISTEN | MERGES | TTL MERGES | MOVES | FETCHES | REPLICATED SENDS | REPLICATION QUEUES | PULLING REPLICATION LOG}',
'SYSTEM {SYNC | RESTART | RESTORE} REPLICA',
'SYSTEM {SYNC | RESTORE} DATABASE REPLICA',
'SYSTEM RESTART REPLICAS',
'SYSTEM UNFREEZE',
'SYSTEM WAIT LOADING PARTS',
'SYSTEM {LOAD | UNLOAD} PRIMARY KEY',
'SYSTEM {STOP | START} [REPLICATED] VIEW',
'SYSTEM {STOP | START} VIEWS',
'SYSTEM {REFRESH | CANCEL | WAIT} VIEW',
'WITH NAME',
// https://clickhouse.com/docs/sql-reference/statements/show
'SHOW [CREATE] {TABLE | TEMPORARY TABLE | DICTIONARY | VIEW | DATABASE}',
'SHOW DATABASES [[NOT] {LIKE | ILIKE}]',
'SHOW [FULL] [TEMPORARY] TABLES [FROM | IN]',
'SHOW [EXTENDED] [FULL] COLUMNS {FROM | IN}',
// https://clickhouse.com/docs/sql-reference/statements/attach
'ATTACH {TABLE | DICTIONARY | DATABASE} [IF NOT EXISTS]',
// https://clickhouse.com/docs/sql-reference/statements/detach
'DETACH {TABLE | DICTIONARY | DATABASE} [IF EXISTS]',
'PERMANENTLY',
'SYNC',
// https://clickhouse.com/docs/sql-reference/statements/drop
'DROP {DICTIONARY | DATABASE | PROFILE | VIEW | FUNCTION | NAMED COLLECTION} [IF EXISTS]',
'DROP [TEMPORARY] TABLE [IF EXISTS] [IF EMPTY]',
// https://clickhouse.com/docs/sql-reference/statements/alter/table#rename
'RENAME TO',
// https://clickhouse.com/docs/sql-reference/statements/exists
'EXISTS [TEMPORARY] {TABLE | DICTIONARY | DATABASE}',
// https://clickhouse.com/docs/sql-reference/statements/kill
'KILL QUERY',
// https://clickhouse.com/docs/sql-reference/statements/optimize
'OPTIMIZE TABLE',
// https://clickhouse.com/docs/sql-reference/statements/rename
'RENAME {TABLE | DICTIONARY | DATABASE}',
// https://clickhouse.com/docs/sql-reference/statements/exchange
'EXCHANGE {TABLES | DICTIONARIES}',
// https://clickhouse.com/docs/sql-reference/statements/truncate
'TRUNCATE TABLE [IF EXISTS]',
// https://clickhouse.com/docs/sql-reference/statements/execute_as
'EXECUTE AS',
// https://clickhouse.com/docs/sql-reference/statements/use
'USE',
'TO',
// https://clickhouse.com/docs/sql-reference/statements/undrop
'UNDROP TABLE',
// https://clickhouse.com/docs/sql-reference/statements/create
'CREATE {DATABASE | NAMED COLLECTION} [IF NOT EXISTS]',
'CREATE [OR REPLACE] {VIEW | DICTIONARY} [IF NOT EXISTS]',
'CREATE MATERIALIZED VIEW [IF NOT EXISTS]',
'CREATE FUNCTION',
'CREATE {USER | ROLE | QUOTA | SETTINGS PROFILE} [IF NOT EXISTS | OR REPLACE]',
'CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE]',
// https://clickhouse.com/docs/sql-reference/statements/create/table#replace-table
'REPLACE [TEMPORARY] TABLE [IF NOT EXISTS]',
// https://clickhouse.com/docs/sql-reference/statements/alter
'ALTER {ROLE | QUOTA | SETTINGS PROFILE} [IF EXISTS]',
'ALTER [TEMPORARY] TABLE',
'ALTER NAMED COLLECTION [IF EXISTS]',
// https://clickhouse.com/docs/sql-reference/statements/alter/user
'GRANTEES',
'NOT IDENTIFIED',
'RESET AUTHENTICATION METHODS TO NEW',
'{IDENTIFIED | ADD IDENTIFIED} [WITH | BY]',
'[ADD | DROP] HOST {LOCAL | NAME | REGEXP | IP | LIKE}',
'VALID UNTIL',
'DROP [ALL] {PROFILES | SETTINGS}',
'{ADD | MODIFY} SETTINGS',
'ADD PROFILES',
// https://clickhouse.com/docs/sql-reference/statements/alter/apply-deleted-mask
'APPLY DELETED MASK',
'IN PARTITION',
// https://clickhouse.com/docs/sql-reference/statements/alter/column
'{ADD | DROP | RENAME | CLEAR | COMMENT | MODIFY | ALTER | MATERIALIZE} COLUMN',
// https://clickhouse.com/docs/sql-reference/statements/alter/partition
'{DETACH | DROP | ATTACH | FETCH | MOVE} {PART | PARTITION}',
'DROP DETACHED {PART | PARTITION}',
'{FORGET | REPLACE} PARTITION',
'CLEAR COLUMN',
'{FREEZE | UNFREEZE} [PARTITION]',
'CLEAR INDEX',
'TO {DISK | VOLUME}',
'[DELETE | REWRITE PARTS] IN PARTITION',
// https://clickhouse.com/docs/sql-reference/statements/alter/setting
'{MODIFY | RESET} SETTING',
// https://clickhouse.com/docs/sql-reference/statements/alter/delete
'DELETE WHERE',
// https://clickhouse.com/docs/sql-reference/statements/alter/order-by
'MODIFY ORDER BY',
// https://clickhouse.com/docs/sql-reference/statements/alter/sample-by
'{MODIFY | REMOVE} SAMPLE BY',
// https://clickhouse.com/docs/sql-reference/statements/alter/skipping-index
'{ADD | MATERIALIZE | CLEAR} INDEX [IF NOT EXISTS]',
'DROP INDEX [IF EXISTS]',
'GRANULARITY',
'AFTER',
'FIRST',

// https://clickhouse.com/docs/sql-reference/statements/alter/constraint
'ADD CONSTRAINT [IF NOT EXISTS]',
'DROP CONSTRAINT [IF EXISTS]',
// https://clickhouse.com/docs/sql-reference/statements/alter/ttl
'MODIFY TTL',
'REMOVE TTL',
// https://clickhouse.com/docs/sql-reference/statements/alter/statistics
'ADD STATISTICS [IF NOT EXISTS]',
'{DROP | CLEAR} STATISTICS [IF EXISTS]',
'MATERIALIZE STATISTICS [ALL | IF EXISTS]',
// https://clickhouse.com/docs/sql-reference/statements/alter/quota
'KEYED BY',
'NOT KEYED',
'FOR [RANDOMIZED] INTERVAL',
// https://clickhouse.com/docs/sql-reference/statements/alter/row-policy
'AS {PERMISSIVE | RESTRICTIVE}',
'FOR SELECT',
// https://clickhouse.com/docs/sql-reference/statements/alter/projection
'ADD PROJECTION [IF NOT EXISTS]',
'{DROP | MATERIALIZE | CLEAR} PROJECTION [IF EXISTS]',
// https://clickhouse.com/docs/sql-reference/statements/create/view#refreshable-materialized-view
'REFRESH {EVERY | AFTER}',
'RANDOMIZE FOR',
'APPEND',
'APPEND TO',
// https://clickhouse.com/docs/sql-reference/statements/delete
'DELETE FROM',
// https://clickhouse.com/docs/sql-reference/statements/explain
'EXPLAIN [AST | SYNTAX | QUERY TREE | PLAN | PIPELINE | ESTIMATE | TABLE OVERRIDE]',
// https://clickhouse.com/docs/sql-reference/statements/grant
'GRANT ON CLUSTER',
'GRANT CURRENT GRANTS',
'WITH GRANT OPTION',
// https://clickhouse.com/docs/sql-reference/statements/revoke
'REVOKE ON CLUSTER',
'ADMIN OPTION FOR',
// https://clickhouse.com/docs/sql-reference/statements/check-table
'CHECK TABLE',
'PARTITION ID',
// https://clickhouse.com/docs/sql-reference/statements/describe-table
'{DESC | DESCRIBE} TABLE',
]);

const reservedSetOperations = expandPhrases([
// https://clickhouse.com/docs/sql-reference/statements/select/union
'UNION [ALL | DISTINCT]',
// https://clickhouse.com/docs/sql-reference/statements/parallel_with
'PARALLEL WITH',
]);

const reservedJoins = expandPhrases([
// https://clickhouse.com/docs/sql-reference/statements/select/join
'[GLOBAL] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI|ANY|ALL|ASOF] JOIN',
]);

const reservedKeywordPhrases = expandPhrases([
'{ROWS | RANGE} BETWEEN',
'ALTER MATERIALIZE STATISTICS',
]);

// https://clickhouse.com/docs/sql-reference/syntax
export const clickhouse: DialectOptions = {
name: 'clickhouse',
tokenizerOptions: {
reservedSelect,
reservedClauses: [...reservedClauses, ...standardOnelineClauses, ...tabularOnelineClauses],
reservedSetOperations,
reservedJoins,
reservedKeywordPhrases,

reservedKeywords: keywords,
reservedDataTypes: dataTypes,
reservedFunctionNames: functions,
extraParens: ['[]'],
lineCommentTypes: ['#', '--'],
nestedBlockComments: false,
underscoresInNumbers: true,
stringTypes: ['$$', "''-qq-bs"],
identTypes: ['""-qq-bs', '``'],
paramTypes: {
// https://clickhouse.com/docs/sql-reference/syntax#defining-and-using-query-parameters
custom: [
{
regex: String.raw`\{\s*[^:]+:[^}]+\}`,
key: v => {
const match = /\{([^:]+):/.exec(v);
return match ? match[1].trim() : v;
},
},
],
},
operators: [
// Arithmetic
'%', // modulo

// Ternary
'?',
':',

// Lambda creation
'->',
],
postProcess,
},
formatOptions: {
onelineClauses: [...standardOnelineClauses, ...tabularOnelineClauses],
tabularOnelineClauses,
},
};

/**
* Converts IN and ANY from RESERVED_FUNCTION_NAME to RESERVED_KEYWORD
* when they are used as operators (not function calls).
*
* IN operator: foo IN (1, 2, 3) - IN comes after an identifier/expression
* IN function: IN(foo, 1, 2, 3) - IN comes at start or after operators/keywords
Comment on lines +290 to +291
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see in listed in section titled Functions for Implementing the IN Operator

This gives me the impression that in being available as a function is more of an implementation detail of Clickhouse and one normally wouldn't use it like so.

*
* ANY operator: foo = ANY (1, 2, 3) - ANY comes after an operator like =
* ANY function: ANY(foo, 1, 2, 3) - ANY comes at start or after operators/keywords
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think any() function is used like so. The docs say it's used as an aggregate function:

SELECT any(column) FROM tbl

*/
function postProcess(tokens: Token[]): Token[] {
return tokens.map((token, i) => {
const nextToken = tokens[i + 1] || EOF_TOKEN;
const prevToken = tokens[i - 1] || EOF_TOKEN;

// Only process IN and ANY that are currently RESERVED_FUNCTION_NAME
// Check text (uppercase canonical form) for matching, but preserve raw (original casing)
if (
token.type === TokenType.RESERVED_FUNCTION_NAME &&
(token.text === 'IN' || token.text === 'ANY')
Comment on lines +303 to +305
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see IN and ANY being listed in both clickhouse.keywords.ts and clickhouse.functions.ts. There seems to be many more. Like there's AND, OR, NOT, CAST, DATE in both functions and keywords.

Having them listed in both places creates confusion, as it's not obvious which one takes priority. It happens to be that function names are detected before keywords by the tokenizer, so these all end up being classified as function names. But it would be better to avoid this ambiguity by only listing them in one place.

I would suggest listing them in the category that's the more common use case. Like I would move all these things to the keywords list and remove them completely from functions. This way it also aligns better with other dialects.

PS. AND and OR won't really have any effect in either list as these are hard-coded to the tokenizer and will always get detected as special type of tokens.

) {
// Must be followed by ( to be a function
if (nextToken.text !== '(') {
// Not followed by ( means it's an operator/keyword, convert to uppercase
return { ...token, type: TokenType.RESERVED_KEYWORD, raw: token.text };
}

// For IN: convert to keyword if previous token is an expression token
// For ANY: convert to keyword if previous token is an operator
if (
(token.text === 'IN' &&
(prevToken.type === TokenType.IDENTIFIER ||
prevToken.type === TokenType.QUOTED_IDENTIFIER ||
prevToken.type === TokenType.NUMBER ||
prevToken.type === TokenType.STRING ||
prevToken.type === TokenType.CLOSE_PAREN ||
prevToken.type === TokenType.ASTERISK)) ||
(token.text === 'ANY' && prevToken.type === TokenType.OPERATOR)
) {
// Convert to keyword (operator) - use uppercase for display
return { ...token, type: TokenType.RESERVED_KEYWORD, raw: token.text };
}
// Otherwise, keep as RESERVED_FUNCTION_NAME to preserve original casing via functionCase option
}

// If we have queries like
// > GRANT SELECT, INSERT ON db.table TO john
// > GRANT SELECT(a, b), SELECT(c) ON db.table TO john
// we want to format them as
// > GRANT
// > SELECT,
// > INSERT ON db.table
// > TO john
// > GRANT
// > SELECT(a, b),
// > SELECT(c) ON db.table
// > TO john
// To do this we need to convert the SELECT keyword to a RESERVED_KEYWORD.
if (
token.type === TokenType.RESERVED_SELECT &&
(nextToken.type === TokenType.COMMA ||
prevToken.type === TokenType.RESERVED_CLAUSE ||
prevToken.type === TokenType.COMMA)
) {
return { ...token, type: TokenType.RESERVED_KEYWORD };
}

// We should format `set(100)` as-is rather than `SET (100)`
if (
token.type === TokenType.RESERVED_CLAUSE &&
token.text === 'SET' &&
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better to use one of the utility function like isToken.SET(token).

Feel free to add more things to these utility functions as long as they're not clickhouse dialect specific

nextToken.type === TokenType.OPEN_PAREN
) {
return { ...token, type: TokenType.RESERVED_FUNCTION_NAME, text: token.raw };
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should not change text and raw properties of a token.

  • changing text to anything else than uppercase will mess up any other code that's looking for a token of that name.
  • changing raw to anything else than the original text will mess up formatting

See src/languages/mariadb/likeMariaDb.ts for comparison.

}

return token;
});
}
Loading