diff --git a/src/core/completions.js b/src/core/completions.js index 0fa2204..6e22bb4 100644 --- a/src/core/completions.js +++ b/src/core/completions.js @@ -6,8 +6,8 @@ // path — never a query per keystroke. `assembleReferenceData` falls back to the // built-in tokenizer sets when the server didn't supply them. -import { SQL_KEYWORDS, SQL_FUNCS } from './sql-highlight.js'; -import { quoteIdent } from './format.js'; +import { SQL_KEYWORDS, SQL_FUNCS, tokenize } from './sql-highlight.js'; +import { quoteIdent, unquoteIdent } from './format.js'; const BUILTIN_KEYWORDS = [...SQL_KEYWORDS]; const BUILTIN_FUNCS = [...SQL_FUNCS]; @@ -116,28 +116,72 @@ export function buildCompletions(ref, schema) { * Returns {word, from, to, qualified, parent, afterFormat}. */ export function completionContext(value, pos) { - let s = pos; - while (s > 0 && /[A-Za-z0-9_]/.test(value[s - 1])) s--; - const word = value.slice(s, pos); + // The word being typed is either inside an OPEN backtick-quoted identifier (a + // `non-bare-name… still being typed) or a bare [A-Za-z0-9_] run. We use the SQL + // tokenizer to find an open backtick so a backtick inside a string/comment (or + // an escaped `\`` in a closed name) can't fool us. `from` is where an accepted + // candidate replaces to — the opening backtick in the quoted case, so accepting + // never doubles the backtick. + const open = openBacktickStart(value, pos); + let from; + let word; + if (open >= 0) { + from = open; + word = value.slice(from + 1, pos); // partial name, unquoted + } else { + from = pos; + while (from > 0 && /[A-Za-z0-9_]/.test(value[from - 1])) from--; + word = value.slice(from, pos); + } // Inside a FORMAT clause? (the identifier just before the word is `FORMAT`) → // complete output-format names instead of the general candidate set. - let b = s; + let b = from; while (b > 0 && /\s/.test(value[b - 1])) b--; let pf = b; while (pf > 0 && /[A-Za-z0-9_]/.test(value[pf - 1])) pf--; const afterFormat = value.slice(pf, b).toUpperCase() === 'FORMAT'; + // Qualified? An identifier (bare OR backtick-quoted) then a dot immediately + // before the word. parent is the unquoted table name, matched against item.parent. let qualified = false; let parent = null; - if (value[s - 1] === '.') { - let p = s - 1; - while (p > 0 && /[A-Za-z0-9_]/.test(value[p - 1])) p--; - const name = value.slice(p, s - 1); + if (value[from - 1] === '.') { // Only qualified when a real identifier precedes the dot. A bare '.' after a - // non-identifier (`.col`, `).c`, `count().c`) would otherwise yield parent='' - // and an empty dropdown — fall back to normal completion instead (#4 review). + // non-identifier (`.col`, `).c`, `count().c`) yields '' → normal completion. + const name = identBefore(value, from - 1); if (name) { qualified = true; parent = name; } } - return { word, from: s, to: pos, qualified, parent, afterFormat }; + return { word, from, to: pos, qualified, parent, afterFormat }; +} + +// The start index of an OPEN (still-being-typed) backtick-quoted identifier that +// contains the caret, or -1. Uses the tokenizer so a backtick inside a string or +// comment isn't mistaken for an identifier quote, and an escaped `\`` inside a +// closed name doesn't desync a naive parity count. +function openBacktickStart(value, pos) { + let off = 0; + for (const [type, text] of tokenize(value)) { + const end = off + text.length; + if (off < pos && pos <= end && type === 'ident' && text[0] === '`') { + // open = the caret is inside the run before any closing backtick (an + // unterminated `name… token, or the caret sits before its trailing `). + const closedAtEnd = pos === end && text.length > 1 && text[text.length - 1] === '`'; + if (!closedAtEnd) return off; + } + off = end; + } + return -1; +} + +// The identifier ending at index `end` (exclusive): a backtick-quoted run +// (returned unquoted/unescaped) or a bare [A-Za-z0-9_] run. '' if neither. +function identBefore(value, end) { + if (value[end - 1] === '`') { + const open = value.lastIndexOf('`', end - 2); + if (open >= 0) return unquoteIdent(value.slice(open, end)); + } + let p = end; + while (p > 0 && /[A-Za-z0-9_]/.test(value[p - 1])) p--; + return value.slice(p, end); } /** diff --git a/src/core/format.js b/src/core/format.js index 16eb8de..a8fb60a 100644 --- a/src/core/format.js +++ b/src/core/format.js @@ -75,6 +75,15 @@ export function qualifyIdent(...parts) { return parts.filter((p) => p != null && p !== '').map(quoteIdent).join('.'); } +/** + * Inverse of `quoteIdent` for a single part: strip the surrounding backticks and + * unescape `` \` `` / `\\` when `part` is backtick-quoted; bare names pass through. + */ +export function unquoteIdent(part) { + const s = String(part); + return s[0] === '`' ? s.slice(1, -1).replace(/\\(.)/g, '$1') : s; +} + /** * Terminate `sql` so a programmatic full-replace (Format / Insert DDL) leaves the * caret on empty space rather than at the end of the last token. The editor's diff --git a/src/core/schema-graph.js b/src/core/schema-graph.js index 68b3a23..efdc958 100644 --- a/src/core/schema-graph.js +++ b/src/core/schema-graph.js @@ -10,6 +10,8 @@ // engine_full for Distributed/Buffer/Merge. All best-effort: a miss yields a node // with no edge, never a throw. +import { unquoteIdent } from './format.js'; + /** Map a ClickHouse engine name to a node kind. */ export function objectKind(engine) { const e = String(engine || ''); @@ -31,12 +33,27 @@ export function parseAstTables(astText) { return out; } -/** The explicit `TO db.table` target of a materialized view, or null. */ +// One ClickHouse identifier part: a backtick-quoted run (with `\`` / `\\` escapes) +// or a bare identifier. Used to parse names out of create_table_query, where CH +// backtick-quotes non-bare names (e.g. TO target_all.`agg.out.parquet`). +const IDENT_PART = '(?:`(?:[^`\\\\]|\\\\.)*`|[A-Za-z_][A-Za-z0-9_]*)'; +const TO_RE = new RegExp('\\sTO\\s+(' + IDENT_PART + ')(?:\\.(' + IDENT_PART + '))?'); + +/** + * The explicit `TO [db.]table` target of a materialized view as `{ db?, table }` + * (raw, backticks stripped — so it matches the row ids), or null for an implicit + * (`.inner.*`) MV. Handles backtick-quoted, dotted names. + */ export function parseMvTarget(createTableQuery) { const s = String(createTableQuery || ''); - const head = s.split(/\sAS\s+SELECT/i)[0]; // only look before the SELECT body - const m = /\sTO\s+([A-Za-z_][\w]*(?:\.[A-Za-z_][\w]*)?)/.exec(head); - return m ? m[1] : null; + // The optional TO clause sits between the view name and the column list / AS + // SELECT, so only scan up to the first '(' (and before any AS SELECT). This + // keeps a stray " TO " inside a column comment or the SELECT body from being + // mistaken for the target (which would also suppress the real .inner edge). + const head = s.split(/\sAS\s+SELECT/i)[0].split('(')[0]; + const m = TO_RE.exec(head); + if (!m) return null; + return m[2] ? { db: unquoteIdent(m[1]), table: unquoteIdent(m[2]) } : { table: unquoteIdent(m[1]) }; } /** A dictionary's source as `{ db, table }` (ClickHouse source) or `{ external }`. */ @@ -71,11 +88,6 @@ export function parseEngineRef(engine, engineFull) { return null; } -// A *reference* whose name MIGHT already be `db.table` (MV target / EXPLAIN AST -// source) — the dot heuristic decides. Ambiguous for table names that themselves -// contain dots, but those paths only ever *match against* known ids, so a miss -// drops an edge rather than mis-linking. -const qualify = (db, name) => (name && name.includes('.') ? name : db + '.' + name); // A reference whose db is always supplied separately (dependencies_*, engine // args, table-focus center) — join unconditionally so a dotted table name // (`…snappy.parquet`) keeps its db prefix instead of being mistaken for an @@ -146,17 +158,21 @@ export function buildSchemaGraph(rows, focus) { node(dep, byId.has(dep) ? nodes.get(dep).kind : 'table'); addEdge(id, dep, 'feeds'); } - // fallback: EXPLAIN AST sources of a view/MV → source → this object. Only real - // (in-scope) objects count, so CTE/alias names from the AST are dropped. + // fallback: EXPLAIN AST sources of a view/MV → source → this object. EXPLAIN + // AST prints names unquoted, qualified-or-bare — so resolve against the known + // ids both ways (as-is, then db-qualified). A name that matches no real object + // (a CTE/alias) is dropped; a CTE that shadows a real same-db table will still + // resolve to that table (we can't tell them apart from the name alone). if ((kind === 'mv' || kind === 'view') && Array.isArray(t.astTables)) { for (const src of t.astTables) { - const sid = qualify(t.database, src); - if (byId.has(sid)) addEdge(sid, id, kind === 'mv' ? 'feeds' : 'reads'); + const qid = joinId(t.database, src); + const sid = byId.has(src) ? src : (byId.has(qid) ? qid : null); + if (sid) addEdge(sid, id, kind === 'mv' ? 'feeds' : 'reads'); } } if (kind === 'mv') { const target = parseMvTarget(t.create_table_query); - const targetId = target ? qualify(t.database, target) : innerByUuid.get(String(t.uuid || '')); + const targetId = target ? joinId(target.db || t.database, target.table) : innerByUuid.get(String(t.uuid || '')); if (targetId) { node(targetId, byId.has(targetId) ? nodes.get(targetId).kind : 'table'); addEdge(id, targetId, 'writes'); } } else if (kind === 'distributed' || kind === 'buffer' || kind === 'merge') { const ref = parseEngineRef(t.engine, t.engine_full); diff --git a/tests/unit/completions.test.js b/tests/unit/completions.test.js index 0933b59..678991b 100644 --- a/tests/unit/completions.test.js +++ b/tests/unit/completions.test.js @@ -130,6 +130,37 @@ describe('completionContext', () => { expect(completionContext('.col', 4)).toMatchObject({ word: 'col', qualified: false, parent: null }); expect(completionContext('count().c', 9)).toMatchObject({ word: 'c', qualified: false, parent: null }); }); + it('treats an open backtick as the word start (from = the backtick, word unquoted)', () => { + // SELECT * FROM `part (caret at end) → word 'part', replace range starts at the backtick + const v = 'SELECT * FROM `part'; + expect(completionContext(v, v.length)).toMatchObject({ word: 'part', from: 14, qualified: false }); + }); + it('does not mistake a CLOSED backtick run for an open one', () => { + const v = 'SELECT * FROM `my tbl` WHERE x'; + expect(completionContext(v, v.length)).toMatchObject({ word: 'x', qualified: false }); // even # of backticks → bare + }); + it('qualifies a column under a backtick-quoted table (parent unquoted)', () => { + const v = 'SELECT `od` FROM x'; // not qualified — no dot + expect(completionContext(v, 10)).toMatchObject({ word: 'od', qualified: false }); + const q = '`weird.tbl`.col'; + expect(completionContext(q, q.length)).toMatchObject({ word: 'col', qualified: true, parent: 'weird.tbl' }); + }); + it('handles an open backtick column under a backtick parent', () => { + const v = '`weird.tbl`.`od'; + expect(completionContext(v, v.length)).toMatchObject({ word: 'od', qualified: true, parent: 'weird.tbl', from: 12 }); + }); + it('ignores a backtick inside a string literal (no false open-backtick mode)', () => { + const v = "SELECT 'a`b' FROM cl"; + expect(completionContext(v, v.length)).toMatchObject({ word: 'cl', from: 18, qualified: false }); + }); + it('ignores a backtick inside a line comment', () => { + const v = 'SELECT 1 -- use `id\nFROM ev'; + expect(completionContext(v, v.length)).toMatchObject({ word: 'ev', qualified: false }); + }); + it('a closed backtick identifier earlier in the query does not flip the mode', () => { + const v = 'SELECT `a b` , co'; + expect(completionContext(v, v.length)).toMatchObject({ word: 'co', qualified: false }); + }); }); describe('rankCompletions', () => { diff --git a/tests/unit/schema-graph.test.js b/tests/unit/schema-graph.test.js index c83d396..347f126 100644 --- a/tests/unit/schema-graph.test.js +++ b/tests/unit/schema-graph.test.js @@ -38,12 +38,24 @@ describe('parseAstTables', () => { }); describe('parseMvTarget', () => { - it('reads the explicit TO target before the SELECT body', () => { - expect(parseMvTarget('CREATE MATERIALIZED VIEW lin.events_mv TO lin.events_daily (`day` Date) AS SELECT toDate(ts) AS day FROM lin.events GROUP BY day')).toBe('lin.events_daily'); + it('reads the explicit TO target before the SELECT body as {db, table}', () => { + expect(parseMvTarget('CREATE MATERIALIZED VIEW lin.events_mv TO lin.events_daily (`day` Date) AS SELECT toDate(ts) AS day FROM lin.events GROUP BY day')).toEqual({ db: 'lin', table: 'events_daily' }); + }); + it('strips backticks from a dotted/backticked TO target (real CH create query)', () => { + // CH backtick-quotes non-bare names in create_table_query. + expect(parseMvTarget('CREATE MATERIALIZED VIEW target_all.`mv-1` TO target_all.`agg.out.parquet` (`id` UInt64) AS SELECT id, count() AS c FROM target_all.`part-0.snappy.parquet` GROUP BY id')) + .toEqual({ db: 'target_all', table: 'agg.out.parquet' }); + }); + it('returns a single {table} when the TO target is unqualified', () => { + expect(parseMvTarget('CREATE MATERIALIZED VIEW lin.mv TO `weird.dst` AS SELECT 1')).toEqual({ table: 'weird.dst' }); }); it('returns null for an implicit MV (ENGINE = …, no TO)', () => { expect(parseMvTarget('CREATE MATERIALIZED VIEW lin.events_mv2 (`day` Date) ENGINE = SummingMergeTree ORDER BY day AS SELECT toDate(ts) AS day FROM lin.events GROUP BY day')).toBeNull(); }); + it('does not treat a stray " TO " in the column list/comment as a target', () => { + // implicit MV — the only TO is inside a column COMMENT, after the '(' → ignored + expect(parseMvTarget("CREATE MATERIALIZED VIEW lin.mv (`x` String COMMENT 'route TO sink') ENGINE = SummingMergeTree ORDER BY x AS SELECT x FROM s")).toBeNull(); + }); }); describe('parseDictSource', () => { @@ -195,6 +207,32 @@ describe('buildSchemaGraph', () => { expect(() => buildSchemaGraph(rows, { kind: 'db', db: 'lin' })).not.toThrow(); }); + it('links an MV to a backticked/dotted TO target and a dotted AST source (CH 26.5 fixtures)', () => { + // Captured from Docker CH 26.5.1: create_table_query backtick-quotes the TO + // target; EXPLAIN AST prints the source unquoted (here UNqualified + dotted). + const rows = { tables: [ + T('target_all', 'part-0.snappy.parquet', 'MergeTree'), + T('target_all', 'agg.out.parquet', 'SummingMergeTree'), + T('target_all', 'mv-1', 'MaterializedView', { + astTables: ['part-0.snappy.parquet'], // unqualified, dotted (default-db ref) + create_table_query: 'CREATE MATERIALIZED VIEW target_all.`mv-1` TO target_all.`agg.out.parquet` (`id` UInt64) AS SELECT id, count() AS c FROM target_all.`part-0.snappy.parquet` GROUP BY id', + }), + ], dictionaries: [] }; + const g = buildSchemaGraph(rows, { kind: 'db', db: 'target_all' }); + const E = eset(g); + expect(E.has('target_all.part-0.snappy.parquet>target_all.mv-1:feeds')).toBe(true); // AST source resolved + expect(E.has('target_all.mv-1>target_all.agg.out.parquet:writes')).toBe(true); // backticked TO target + expect(g.nodes.some((n) => n.id === 'target_all.target_all')).toBe(false); // no phantom from the old bug + }); + + it('resolves a fully-qualified dotted AST source (CH prints db.table unquoted)', () => { + const rows = { tables: [ + T('target_all', 'part-0.snappy.parquet', 'MergeTree'), + T('target_all', 'v', 'View', { astTables: ['target_all.part-0.snappy.parquet'] }), + ], dictionaries: [] }; + expect(eset(buildSchemaGraph(rows, { kind: 'db', db: 'target_all' })).has('target_all.part-0.snappy.parquet>target_all.v:reads')).toBe(true); + }); + it('table-focus on a dotted-name table keeps the center + its 1-hop (not empty)', () => { const rows = { tables: [ T('target_all', 'part-0.snappy.parquet', 'MergeTree', { dependencies_database: ['target_all'], dependencies_table: ['v_over_parquet'] }),