Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 57 additions & 13 deletions src/core/completions.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
// path — never a query per keystroke. `assembleReferenceData` falls back to the
// built-in tokenizer sets when the server didn't supply them.

import { SQL_KEYWORDS, SQL_FUNCS } from './sql-highlight.js';
import { quoteIdent } from './format.js';
import { SQL_KEYWORDS, SQL_FUNCS, tokenize } from './sql-highlight.js';
import { quoteIdent, unquoteIdent } from './format.js';

const BUILTIN_KEYWORDS = [...SQL_KEYWORDS];
const BUILTIN_FUNCS = [...SQL_FUNCS];
Expand Down Expand Up @@ -116,28 +116,72 @@ export function buildCompletions(ref, schema) {
* Returns {word, from, to, qualified, parent, afterFormat}.
*/
export function completionContext(value, pos) {
let s = pos;
while (s > 0 && /[A-Za-z0-9_]/.test(value[s - 1])) s--;
const word = value.slice(s, pos);
// The word being typed is either inside an OPEN backtick-quoted identifier (a
// `non-bare-name… still being typed) or a bare [A-Za-z0-9_] run. We use the SQL
// tokenizer to find an open backtick so a backtick inside a string/comment (or
// an escaped `\`` in a closed name) can't fool us. `from` is where an accepted
// candidate replaces to — the opening backtick in the quoted case, so accepting
// never doubles the backtick.
const open = openBacktickStart(value, pos);
let from;
let word;
if (open >= 0) {
from = open;
word = value.slice(from + 1, pos); // partial name, unquoted
} else {
from = pos;
while (from > 0 && /[A-Za-z0-9_]/.test(value[from - 1])) from--;
word = value.slice(from, pos);
}
// Inside a FORMAT clause? (the identifier just before the word is `FORMAT`) →
// complete output-format names instead of the general candidate set.
let b = s;
let b = from;
while (b > 0 && /\s/.test(value[b - 1])) b--;
let pf = b;
while (pf > 0 && /[A-Za-z0-9_]/.test(value[pf - 1])) pf--;
const afterFormat = value.slice(pf, b).toUpperCase() === 'FORMAT';
// Qualified? An identifier (bare OR backtick-quoted) then a dot immediately
// before the word. parent is the unquoted table name, matched against item.parent.
let qualified = false;
let parent = null;
if (value[s - 1] === '.') {
let p = s - 1;
while (p > 0 && /[A-Za-z0-9_]/.test(value[p - 1])) p--;
const name = value.slice(p, s - 1);
if (value[from - 1] === '.') {
// Only qualified when a real identifier precedes the dot. A bare '.' after a
// non-identifier (`.col`, `).c`, `count().c`) would otherwise yield parent=''
// and an empty dropdown — fall back to normal completion instead (#4 review).
// non-identifier (`.col`, `).c`, `count().c`) yields '' → normal completion.
const name = identBefore(value, from - 1);
if (name) { qualified = true; parent = name; }
}
return { word, from: s, to: pos, qualified, parent, afterFormat };
return { word, from, to: pos, qualified, parent, afterFormat };
}

// The start index of an OPEN (still-being-typed) backtick-quoted identifier that
// contains the caret, or -1. Uses the tokenizer so a backtick inside a string or
// comment isn't mistaken for an identifier quote, and an escaped `\`` inside a
// closed name doesn't desync a naive parity count.
function openBacktickStart(value, pos) {
let off = 0;
for (const [type, text] of tokenize(value)) {
const end = off + text.length;
if (off < pos && pos <= end && type === 'ident' && text[0] === '`') {
// open = the caret is inside the run before any closing backtick (an
// unterminated `name… token, or the caret sits before its trailing `).
const closedAtEnd = pos === end && text.length > 1 && text[text.length - 1] === '`';
if (!closedAtEnd) return off;
}
off = end;
}
return -1;
}

// The identifier ending at index `end` (exclusive): a backtick-quoted run
// (returned unquoted/unescaped) or a bare [A-Za-z0-9_] run. '' if neither.
function identBefore(value, end) {
if (value[end - 1] === '`') {
const open = value.lastIndexOf('`', end - 2);
if (open >= 0) return unquoteIdent(value.slice(open, end));
}
let p = end;
while (p > 0 && /[A-Za-z0-9_]/.test(value[p - 1])) p--;
return value.slice(p, end);
}

/**
Expand Down
9 changes: 9 additions & 0 deletions src/core/format.js
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,15 @@ export function qualifyIdent(...parts) {
return parts.filter((p) => p != null && p !== '').map(quoteIdent).join('.');
}

/**
* Inverse of `quoteIdent` for a single part: strip the surrounding backticks and
* unescape `` \` `` / `\\` when `part` is backtick-quoted; bare names pass through.
*/
export function unquoteIdent(part) {
const s = String(part);
return s[0] === '`' ? s.slice(1, -1).replace(/\\(.)/g, '$1') : s;
}

/**
* Terminate `sql` so a programmatic full-replace (Format / Insert DDL) leaves the
* caret on empty space rather than at the end of the last token. The editor's
Expand Down
44 changes: 30 additions & 14 deletions src/core/schema-graph.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// engine_full for Distributed/Buffer/Merge. All best-effort: a miss yields a node
// with no edge, never a throw.

import { unquoteIdent } from './format.js';

/** Map a ClickHouse engine name to a node kind. */
export function objectKind(engine) {
const e = String(engine || '');
Expand All @@ -31,12 +33,27 @@ export function parseAstTables(astText) {
return out;
}

/** The explicit `TO db.table` target of a materialized view, or null. */
// One ClickHouse identifier part: a backtick-quoted run (with `\`` / `\\` escapes)
// or a bare identifier. Used to parse names out of create_table_query, where CH
// backtick-quotes non-bare names (e.g. TO target_all.`agg.out.parquet`).
const IDENT_PART = '(?:`(?:[^`\\\\]|\\\\.)*`|[A-Za-z_][A-Za-z0-9_]*)';
const TO_RE = new RegExp('\\sTO\\s+(' + IDENT_PART + ')(?:\\.(' + IDENT_PART + '))?');

/**
* The explicit `TO [db.]table` target of a materialized view as `{ db?, table }`
* (raw, backticks stripped — so it matches the row ids), or null for an implicit
* (`.inner.*`) MV. Handles backtick-quoted, dotted names.
*/
export function parseMvTarget(createTableQuery) {
const s = String(createTableQuery || '');
const head = s.split(/\sAS\s+SELECT/i)[0]; // only look before the SELECT body
const m = /\sTO\s+([A-Za-z_][\w]*(?:\.[A-Za-z_][\w]*)?)/.exec(head);
return m ? m[1] : null;
// The optional TO clause sits between the view name and the column list / AS
// SELECT, so only scan up to the first '(' (and before any AS SELECT). This
// keeps a stray " TO " inside a column comment or the SELECT body from being
// mistaken for the target (which would also suppress the real .inner edge).
const head = s.split(/\sAS\s+SELECT/i)[0].split('(')[0];
const m = TO_RE.exec(head);
if (!m) return null;
return m[2] ? { db: unquoteIdent(m[1]), table: unquoteIdent(m[2]) } : { table: unquoteIdent(m[1]) };
}

/** A dictionary's source as `{ db, table }` (ClickHouse source) or `{ external }`. */
Expand Down Expand Up @@ -71,11 +88,6 @@ export function parseEngineRef(engine, engineFull) {
return null;
}

// A *reference* whose name MIGHT already be `db.table` (MV target / EXPLAIN AST
// source) — the dot heuristic decides. Ambiguous for table names that themselves
// contain dots, but those paths only ever *match against* known ids, so a miss
// drops an edge rather than mis-linking.
const qualify = (db, name) => (name && name.includes('.') ? name : db + '.' + name);
// A reference whose db is always supplied separately (dependencies_*, engine
// args, table-focus center) — join unconditionally so a dotted table name
// (`…snappy.parquet`) keeps its db prefix instead of being mistaken for an
Expand Down Expand Up @@ -146,17 +158,21 @@ export function buildSchemaGraph(rows, focus) {
node(dep, byId.has(dep) ? nodes.get(dep).kind : 'table');
addEdge(id, dep, 'feeds');
}
// fallback: EXPLAIN AST sources of a view/MV → source → this object. Only real
// (in-scope) objects count, so CTE/alias names from the AST are dropped.
// fallback: EXPLAIN AST sources of a view/MV → source → this object. EXPLAIN
// AST prints names unquoted, qualified-or-bare — so resolve against the known
// ids both ways (as-is, then db-qualified). A name that matches no real object
// (a CTE/alias) is dropped; a CTE that shadows a real same-db table will still
// resolve to that table (we can't tell them apart from the name alone).
if ((kind === 'mv' || kind === 'view') && Array.isArray(t.astTables)) {
for (const src of t.astTables) {
const sid = qualify(t.database, src);
if (byId.has(sid)) addEdge(sid, id, kind === 'mv' ? 'feeds' : 'reads');
const qid = joinId(t.database, src);
const sid = byId.has(src) ? src : (byId.has(qid) ? qid : null);
if (sid) addEdge(sid, id, kind === 'mv' ? 'feeds' : 'reads');
}
}
if (kind === 'mv') {
const target = parseMvTarget(t.create_table_query);
const targetId = target ? qualify(t.database, target) : innerByUuid.get(String(t.uuid || ''));
const targetId = target ? joinId(target.db || t.database, target.table) : innerByUuid.get(String(t.uuid || ''));
if (targetId) { node(targetId, byId.has(targetId) ? nodes.get(targetId).kind : 'table'); addEdge(id, targetId, 'writes'); }
} else if (kind === 'distributed' || kind === 'buffer' || kind === 'merge') {
const ref = parseEngineRef(t.engine, t.engine_full);
Expand Down
31 changes: 31 additions & 0 deletions tests/unit/completions.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,37 @@ describe('completionContext', () => {
expect(completionContext('.col', 4)).toMatchObject({ word: 'col', qualified: false, parent: null });
expect(completionContext('count().c', 9)).toMatchObject({ word: 'c', qualified: false, parent: null });
});
it('treats an open backtick as the word start (from = the backtick, word unquoted)', () => {
// SELECT * FROM `part (caret at end) → word 'part', replace range starts at the backtick
const v = 'SELECT * FROM `part';
expect(completionContext(v, v.length)).toMatchObject({ word: 'part', from: 14, qualified: false });
});
it('does not mistake a CLOSED backtick run for an open one', () => {
const v = 'SELECT * FROM `my tbl` WHERE x';
expect(completionContext(v, v.length)).toMatchObject({ word: 'x', qualified: false }); // even # of backticks → bare
});
it('qualifies a column under a backtick-quoted table (parent unquoted)', () => {
const v = 'SELECT `od` FROM x'; // not qualified — no dot
expect(completionContext(v, 10)).toMatchObject({ word: 'od', qualified: false });
const q = '`weird.tbl`.col';
expect(completionContext(q, q.length)).toMatchObject({ word: 'col', qualified: true, parent: 'weird.tbl' });
});
it('handles an open backtick column under a backtick parent', () => {
const v = '`weird.tbl`.`od';
expect(completionContext(v, v.length)).toMatchObject({ word: 'od', qualified: true, parent: 'weird.tbl', from: 12 });
});
it('ignores a backtick inside a string literal (no false open-backtick mode)', () => {
const v = "SELECT 'a`b' FROM cl";
expect(completionContext(v, v.length)).toMatchObject({ word: 'cl', from: 18, qualified: false });
});
it('ignores a backtick inside a line comment', () => {
const v = 'SELECT 1 -- use `id\nFROM ev';
expect(completionContext(v, v.length)).toMatchObject({ word: 'ev', qualified: false });
});
it('a closed backtick identifier earlier in the query does not flip the mode', () => {
const v = 'SELECT `a b` , co';
expect(completionContext(v, v.length)).toMatchObject({ word: 'co', qualified: false });
});
});

describe('rankCompletions', () => {
Expand Down
42 changes: 40 additions & 2 deletions tests/unit/schema-graph.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,24 @@ describe('parseAstTables', () => {
});

describe('parseMvTarget', () => {
it('reads the explicit TO target before the SELECT body', () => {
expect(parseMvTarget('CREATE MATERIALIZED VIEW lin.events_mv TO lin.events_daily (`day` Date) AS SELECT toDate(ts) AS day FROM lin.events GROUP BY day')).toBe('lin.events_daily');
it('reads the explicit TO target before the SELECT body as {db, table}', () => {
expect(parseMvTarget('CREATE MATERIALIZED VIEW lin.events_mv TO lin.events_daily (`day` Date) AS SELECT toDate(ts) AS day FROM lin.events GROUP BY day')).toEqual({ db: 'lin', table: 'events_daily' });
});
it('strips backticks from a dotted/backticked TO target (real CH create query)', () => {
// CH backtick-quotes non-bare names in create_table_query.
expect(parseMvTarget('CREATE MATERIALIZED VIEW target_all.`mv-1` TO target_all.`agg.out.parquet` (`id` UInt64) AS SELECT id, count() AS c FROM target_all.`part-0.snappy.parquet` GROUP BY id'))
.toEqual({ db: 'target_all', table: 'agg.out.parquet' });
});
it('returns a single {table} when the TO target is unqualified', () => {
expect(parseMvTarget('CREATE MATERIALIZED VIEW lin.mv TO `weird.dst` AS SELECT 1')).toEqual({ table: 'weird.dst' });
});
it('returns null for an implicit MV (ENGINE = …, no TO)', () => {
expect(parseMvTarget('CREATE MATERIALIZED VIEW lin.events_mv2 (`day` Date) ENGINE = SummingMergeTree ORDER BY day AS SELECT toDate(ts) AS day FROM lin.events GROUP BY day')).toBeNull();
});
it('does not treat a stray " TO " in the column list/comment as a target', () => {
// implicit MV — the only TO is inside a column COMMENT, after the '(' → ignored
expect(parseMvTarget("CREATE MATERIALIZED VIEW lin.mv (`x` String COMMENT 'route TO sink') ENGINE = SummingMergeTree ORDER BY x AS SELECT x FROM s")).toBeNull();
});
});

describe('parseDictSource', () => {
Expand Down Expand Up @@ -195,6 +207,32 @@ describe('buildSchemaGraph', () => {
expect(() => buildSchemaGraph(rows, { kind: 'db', db: 'lin' })).not.toThrow();
});

it('links an MV to a backticked/dotted TO target and a dotted AST source (CH 26.5 fixtures)', () => {
// Captured from Docker CH 26.5.1: create_table_query backtick-quotes the TO
// target; EXPLAIN AST prints the source unquoted (here UNqualified + dotted).
const rows = { tables: [
T('target_all', 'part-0.snappy.parquet', 'MergeTree'),
T('target_all', 'agg.out.parquet', 'SummingMergeTree'),
T('target_all', 'mv-1', 'MaterializedView', {
astTables: ['part-0.snappy.parquet'], // unqualified, dotted (default-db ref)
create_table_query: 'CREATE MATERIALIZED VIEW target_all.`mv-1` TO target_all.`agg.out.parquet` (`id` UInt64) AS SELECT id, count() AS c FROM target_all.`part-0.snappy.parquet` GROUP BY id',
}),
], dictionaries: [] };
const g = buildSchemaGraph(rows, { kind: 'db', db: 'target_all' });
const E = eset(g);
expect(E.has('target_all.part-0.snappy.parquet>target_all.mv-1:feeds')).toBe(true); // AST source resolved
expect(E.has('target_all.mv-1>target_all.agg.out.parquet:writes')).toBe(true); // backticked TO target
expect(g.nodes.some((n) => n.id === 'target_all.target_all')).toBe(false); // no phantom from the old bug
});

it('resolves a fully-qualified dotted AST source (CH prints db.table unquoted)', () => {
const rows = { tables: [
T('target_all', 'part-0.snappy.parquet', 'MergeTree'),
T('target_all', 'v', 'View', { astTables: ['target_all.part-0.snappy.parquet'] }),
], dictionaries: [] };
expect(eset(buildSchemaGraph(rows, { kind: 'db', db: 'target_all' })).has('target_all.part-0.snappy.parquet>target_all.v:reads')).toBe(true);
});

it('table-focus on a dotted-name table keeps the center + its 1-hop (not empty)', () => {
const rows = { tables: [
T('target_all', 'part-0.snappy.parquet', 'MergeTree', { dependencies_database: ['target_all'], dependencies_table: ['v_over_parquet'] }),
Expand Down
Loading