diff --git a/packages/dbt-tools/src/dbt-cli.ts b/packages/dbt-tools/src/dbt-cli.ts index be9b92a97..d2974c5e3 100644 --- a/packages/dbt-tools/src/dbt-cli.ts +++ b/packages/dbt-tools/src/dbt-cli.ts @@ -51,6 +51,20 @@ function getDbt(): ResolvedDbt { return resolvedDbt } +/** Shape of an execFile rejection — carries stdout/stderr alongside message. */ +interface ExecFileError extends Error { + stdout?: string | Buffer + stderr?: string | Buffer + code?: number | string + signal?: string +} + +/** Coerce an unknown rejection into something the catch blocks can read safely. */ +function toExecFileError(e: unknown): ExecFileError { + if (e instanceof Error) return e as ExecFileError + return new Error(String(e)) as ExecFileError +} + function run(args: string[]): Promise<{ stdout: string; stderr: string }> { const dbt = getDbt() const env = buildDbtEnv(dbt) @@ -58,8 +72,18 @@ function run(args: string[]): Promise<{ stdout: string; stderr: string }> { return new Promise((resolve, reject) => { execFile(dbt.path, args, { timeout: 120_000, maxBuffer: 10 * 1024 * 1024, env, cwd }, (err, stdout, stderr) => { - if (err) reject(err) - else resolve({ stdout, stderr }) + if (err) { + // Node's execFile passes stdout/stderr as separate callback arguments, + // not as properties on the error. Attach them here so callers can + // surface the real dbt failure text instead of Node's generic + // "Command failed: ..." message. + const execErr = err as ExecFileError + execErr.stdout = stdout + execErr.stderr = stderr + reject(execErr) + } else { + resolve({ stdout, stderr }) + } }) }) } @@ -221,66 +245,86 @@ export async function execDbtShow(sql: string, limit?: number) { const args = ["show", "--inline", sql, "--output", "json", "--log-format", "json"] if (limit !== undefined) args.push("--limit", String(limit)) - let lines: Record[] + // Capture the run() errors so we can bubble the real dbt failure up if all + // parse tiers fail; the generic "Could not parse" alone misleads callers + // into treating structural project errors as transient. + let primaryRunError: ExecFileError | undefined + let lines: Record[] = [] try { const { stdout } = await run(args) lines = parseJsonLines(stdout) - } catch { - lines = [] + } catch (e) { + primaryRunError = toExecFileError(e) + // Deliberately do NOT feed crashed-run stdout into `lines` for the + // heuristic tiers below. Crash logs can contain incidental arrays that + // `looksLikeRowData` would happily return as "rows" (silent wrong data). + // The crashed stdout is still consulted by extractDbtError below for the + // structured `level: "error"` event. } - // --- Tier 1: known field paths --- - const previewLine = - lines.find((l: any) => l.data?.preview) ?? - lines.find((l: any) => l.data?.rows) ?? - lines.find((l: any) => l.result?.preview) ?? - lines.find((l: any) => l.result?.rows) - - const sqlLine = - lines.find((l: any) => l.data?.sql) ?? - lines.find((l: any) => l.data?.compiled_sql) ?? - lines.find((l: any) => l.result?.sql) - - if (previewLine) { - const preview = - (previewLine as any).data?.preview ?? - (previewLine as any).data?.rows ?? - (previewLine as any).result?.preview ?? - (previewLine as any).result?.rows - - // Guard JSON.parse — fall through to Tier 2 on malformed strings - let rows: Record[] - if (typeof preview === "string") { - const parsed = safeJsonParse(preview) - if (Array.isArray(parsed)) { - rows = parsed + // Skip the success-only tiers when the primary run failed — see comment + // above. We still try Tier 3 (a separate plain-text run) because that can + // recover from JSON-mode-specific failures. + if (!primaryRunError) { + // --- Tier 1: known field paths --- + const previewLine = + lines.find((l: any) => l.data?.preview) ?? + lines.find((l: any) => l.data?.rows) ?? + lines.find((l: any) => l.result?.preview) ?? + lines.find((l: any) => l.result?.rows) + + const sqlLine = + lines.find((l: any) => l.data?.sql) ?? + lines.find((l: any) => l.data?.compiled_sql) ?? + lines.find((l: any) => l.result?.sql) + + if (previewLine) { + const preview = + (previewLine as any).data?.preview ?? + (previewLine as any).data?.rows ?? + (previewLine as any).result?.preview ?? + (previewLine as any).result?.rows + + // The previewLine match upstream only checked for truthiness, so a + // future dbt version emitting `data.preview = {}` or `= 42` would + // flow into `rows` unchecked and the downstream `data: rows` field + // would crash callers that do `.map` / `.length`. Guard explicitly + // for the three shapes we accept (parsed JSON array, native array, + // malformed) and emit an empty result for anything else. + let rows: Record[] + if (typeof preview === "string") { + const parsed = safeJsonParse(preview) + rows = Array.isArray(parsed) ? parsed : [] + } else if (Array.isArray(preview)) { + rows = preview } else { - rows = [] // Malformed — will fall through below + rows = [] } - } else { - rows = preview - } - - // Return the result — even if empty. An empty preview means the query returned - // zero rows, which is a valid result. Do NOT fall through to Tier 2, which could - // match spurious log metadata as row data. - const columnNames = rows.length > 0 && rows[0] ? Object.keys(rows[0]) : [] - const compiledSql = (sqlLine as any)?.data?.sql ?? (sqlLine as any)?.data?.compiled_sql ?? (sqlLine as any)?.result?.sql ?? sql - return { columnNames, columnTypes: columnNames.map(() => "string"), data: rows, rawSql: sql, compiledSql } - } - // --- Tier 2: heuristic deep scan --- - for (const line of lines) { - const found = deepFind(line, (val) => looksLikeRowData(val)) - if (found) { - const rows: Record[] = typeof found === "string" ? JSON.parse(found as string) : (found as Record[]) + // Return the result — even if empty. An empty preview means the query returned + // zero rows, which is a valid result. Do NOT fall through to Tier 2, which could + // match spurious log metadata as row data. const columnNames = rows.length > 0 && rows[0] ? Object.keys(rows[0]) : [] - const compiledSql = (deepFind(line, (val) => looksLikeSql(val)) as string) ?? sql + const compiledSql = (sqlLine as any)?.data?.sql ?? (sqlLine as any)?.data?.compiled_sql ?? (sqlLine as any)?.result?.sql ?? sql return { columnNames, columnTypes: columnNames.map(() => "string"), data: rows, rawSql: sql, compiledSql } } + + // --- Tier 2: heuristic deep scan --- + for (const line of lines) { + const found = deepFind(line, (val) => looksLikeRowData(val)) + if (found) { + const rows: Record[] = typeof found === "string" ? JSON.parse(found as string) : (found as Record[]) + const columnNames = rows.length > 0 && rows[0] ? Object.keys(rows[0]) : [] + const compiledSql = (deepFind(line, (val) => looksLikeSql(val)) as string) ?? sql + return { columnNames, columnTypes: columnNames.map(() => "string"), data: rows, rawSql: sql, compiledSql } + } + } } // --- Tier 3: plain text fallback (ASCII table) --- + // Tried unconditionally — even if JSON-mode crashed, the plain-text mode + // sometimes succeeds and gives us a usable table. + let plainRunError: ExecFileError | undefined try { const plainArgs = ["show", "--inline", sql] if (limit !== undefined) plainArgs.push("--limit", String(limit)) @@ -295,8 +339,44 @@ export async function execDbtShow(sql: string, limit?: number) { compiledSql: sql, } } - } catch { - // Plain text dbt show also failed — fall through to error below + } catch (e) { + plainRunError = toExecFileError(e) + } + + // Two distinct failure modes; don't conflate them: + // + // (a) JSON-mode `dbt show` actually crashed → surface the real dbt error. + // This is the original motivation for the PR. + // + // (b) JSON-mode succeeded (exit 0) but emitted output we can't decode, + // AND the plain-text retry then failed for some other reason. The + // `dbt show` command itself didn't fail; our parser did. Throwing + // "dbt show failed: " here would misattribute a + // parser regression as a dbt execution failure. + if (primaryRunError) { + const errorLogLines = parseJsonLines(primaryRunError.stdout?.toString() ?? "") + const realError = extractDbtError(errorLogLines, primaryRunError, plainRunError) + if (realError) { + // Avoid doubling the "failed:" prefix when dbt's own category prefix + // is already in the message (e.g. "Database Error: ...", + // "Compilation Error: ..."). + const hasDbtCategoryPrefix = /^(Compilation|Database|Runtime|Parsing|Validation|Dependency)\s+Error\b/.test( + realError, + ) + throw new Error(hasDbtCategoryPrefix ? realError : `dbt show failed: ${realError}`) + } + } + + if (plainRunError) { + // Both branches stay SQL-safe: extractDbtError already strips ANSI and + // redacts via fallbackExitMessage; the fallback here uses the same helper + // explicitly so this code path can't regress to a raw err.message even if + // extractDbtError is refactored. + const fallback = + extractDbtError([], undefined, plainRunError) ?? + fallbackExitMessage(undefined, plainRunError) ?? + "unknown error" + throw new Error(`Could not parse dbt show JSON output, and plain-text fallback failed: ${fallback}`) } throw new Error( @@ -305,52 +385,170 @@ export async function execDbtShow(sql: string, limit?: number) { ) } +/** + * Pick the best human-readable error from a failed `dbt show` invocation. + * + * Preference order: + * 1. The LAST structured `level: "error"` event in the JSON log. dbt often + * emits a generic header (e.g. "Encountered an error:") before the + * actionable message; we want the actionable one. + * 2. Stderr from the JSON-mode run. + * 3. Stderr from the plain-text-mode run. + * 4. A concise "dbt exited with status N" fallback. We deliberately do NOT + * surface `err.message` directly when it's an execFile rejection — Node + * embeds the full command line (including the inline SQL) in that + * message, which would leak the user's query into logs and UI. + * + * Returns undefined if neither run rejected — caller falls back to the generic + * "Could not parse" message, which is correct when dbt exited 0 but emitted + * something we can't decode. + * + * ANSI escape codes are stripped from the returned message so logs and UI + * bubbles stay clean (dbt may colour-code stderr and structured events). + */ +interface DbtLogLine { + info?: { level?: string; msg?: string } + level?: string + msg?: string +} + +function extractDbtError( + lines: Record[], + primary?: ExecFileError, + plain?: ExecFileError, +): string | undefined { + if (!primary && !plain) return undefined + + const errorMessages = lines + .map((l) => { + const line = l as DbtLogLine + const isError = line.info?.level === "error" || line.level === "error" + if (!isError) return undefined + return line.info?.msg ?? line.msg + }) + .filter((m): m is string => typeof m === "string" && m.trim().length > 0) + const structuredMsg = errorMessages.at(-1) + + const primaryStderr = primary?.stderr?.toString().trim() + const plainStderr = plain?.stderr?.toString().trim() + + const chosen = + (structuredMsg && structuredMsg.length > 0 ? structuredMsg : undefined) ?? + (primaryStderr && primaryStderr.length > 0 ? primaryStderr : undefined) ?? + (plainStderr && plainStderr.length > 0 ? plainStderr : undefined) ?? + fallbackExitMessage(primary, plain) + + return chosen ? stripAnsi(chosen) : undefined +} + +/** + * Build a concise exit-status message that does NOT leak the dbt command line. + * + * Node's `execFile` rejection has `err.message` = `"Command failed: + * show --inline '' ..."` whenever the spawned process actually + * ran — exit-non-zero AND timeout/signal kills both produce this message, + * embedding the user's full query (potentially with secrets, PII, multi-KB + * literals) into any log/UI surface that displays the error. + * + * Spawn-time failures (ENOENT etc.) have a different message shape that does + * NOT embed args, so they're safe to surface directly. + */ +function fallbackExitMessage(primary?: ExecFileError, plain?: ExecFileError): string | undefined { + const err = primary ?? plain + if (!err) return undefined + + const looksLikeCommandFailed = typeof err.message === "string" && err.message.startsWith("Command failed:") + if (!looksLikeCommandFailed) return err.message + + // The process ran; redact the embedded command line. + if (typeof err.code === "number") return `dbt exited with status ${err.code}` + if (err.signal) return `dbt killed by signal ${err.signal}` + if (typeof err.code === "string") return `dbt failed: ${err.code}` + return "dbt failed (no exit code reported)" +} + +/** + * Build a user-facing error message from a failed `dbt ` invocation. + * + * Used by every execDbt* function so all three share the same error UX: + * - structured `level: "error"` event from JSON logs > primary stderr > + * plain-text stderr > redacted exit-status fallback + * - ANSI escapes stripped + * - inline SQL / command-line redacted from any err.message-derived path + * - no doubled prefix when dbt's own category prefix is already present + */ +function bubbleDbtError(label: string, primary?: ExecFileError, plain?: ExecFileError): string { + const errorLogLines = primary?.stdout ? parseJsonLines(primary.stdout.toString()) : [] + const real = extractDbtError(errorLogLines, primary, plain) + if (real) { + const hasDbtCategoryPrefix = /^(Compilation|Database|Runtime|Parsing|Validation|Dependency)\s+Error\b/.test( + real, + ) + return hasDbtCategoryPrefix ? real : `${label}: ${real}` + } + return `${label}: ${fallbackExitMessage(primary, plain) ?? "unknown error"}` +} + /** * Compile a model via `dbt compile --select ` and return compiled SQL. */ export async function execDbtCompile(model: string): Promise<{ sql: string }> { const args = ["compile", "--select", model, "--output", "json", "--log-format", "json"] - let lines: Record[] + let lines: Record[] = [] + let primaryRunError: ExecFileError | undefined try { const { stdout } = await run(args) lines = parseJsonLines(stdout) - } catch { - lines = [] + } catch (e) { + primaryRunError = toExecFileError(e) } - // --- Tier 1: known field paths --- - const sql = findCompiledSql(lines) - if (sql) return { sql } - - // --- Tier 2: heuristic deep scan --- - for (const line of lines) { - const found = deepFind(line, (val) => looksLikeSql(val)) - if (found) return { sql: found as string } + // Skip success-only tiers when the primary run failed (same anti-spurious- + // data reasoning as execDbtShow). + if (!primaryRunError) { + // --- Tier 1: known field paths --- + const sql = findCompiledSql(lines) + if (sql) return { sql } + + // --- Tier 2: heuristic deep scan --- + for (const line of lines) { + const found = deepFind(line, (val) => looksLikeSql(val)) + if (found) return { sql: found as string } + } } - // --- Tier 3: read compiled SQL from manifest.json (more reliable than stdout) --- - // dbt compile writes compiled_code to target/manifest.json even when stdout is logs. - // We run compile without JSON flags so it writes to manifest, then read the artifact. + // --- Manifest fallback --- + // dbt compile writes compiled_code to target/manifest.json even when stdout + // is just logs. Re-run plain (no JSON flags) so the artifact is fresh, then + // read it back. We tolerate a failure here (a prior successful compile may + // have left a usable manifest) but capture the error for later bubbling. + let manifestRunError: ExecFileError | undefined try { await run(["compile", "--select", model]) - } catch { - // Compile may fail (e.g., dbt not found, project errors) — continue to manifest check - // since a prior successful compile may have left a usable manifest + } catch (e) { + manifestRunError = toExecFileError(e) } const fromManifest = readCompiledFromManifest(model) if (fromManifest) return { sql: fromManifest } - // Last resort: return stdout (may contain logs mixed with SQL) + // --- Last resort: plain compile, return raw stdout --- + let plainRunError: ExecFileError | undefined try { const { stdout: plainOut } = await run(["compile", "--select", model]) return { sql: plainOut.trim() } } catch (e) { - throw new Error( - `Could not compile model '${model}' in any format (JSON, heuristic, or manifest). ` + - `Last error: ${e instanceof Error ? e.message : String(e)}`, - ) + plainRunError = toExecFileError(e) + } + + // If dbt actually failed at any tier, surface the real dbt error via the + // shared helper so the message is SQL-safe, ANSI-stripped, and consistent + // with execDbtShow's error UX. + if (primaryRunError || plainRunError || manifestRunError) { + throw new Error(bubbleDbtError("dbt compile failed", primaryRunError, plainRunError ?? manifestRunError)) } + + throw new Error(`Could not compile model '${model}' in any format (JSON, heuristic, or manifest).`) } /** @@ -362,34 +560,45 @@ export async function execDbtCompileInline( ): Promise<{ sql: string }> { const args = ["compile", "--inline", sql, "--output", "json", "--log-format", "json"] - let lines: Record[] + let lines: Record[] = [] + let primaryRunError: ExecFileError | undefined try { const { stdout } = await run(args) lines = parseJsonLines(stdout) - } catch { - lines = [] + } catch (e) { + primaryRunError = toExecFileError(e) } - // --- Tier 1: known field paths --- - const compiled = findCompiledSql(lines) - if (compiled) return { sql: compiled } + // Skip success-only tiers when the primary run failed. + if (!primaryRunError) { + // --- Tier 1: known field paths --- + const compiled = findCompiledSql(lines) + if (compiled) return { sql: compiled } - // --- Tier 2: heuristic deep scan --- - for (const line of lines) { - const found = deepFind(line, (val) => looksLikeSql(val)) - if (found) return { sql: found as string } + // --- Tier 2: heuristic deep scan --- + for (const line of lines) { + const found = deepFind(line, (val) => looksLikeSql(val)) + if (found) return { sql: found as string } + } } // --- Tier 3: plain text fallback --- + let plainRunError: ExecFileError | undefined try { const { stdout: plainOut } = await run(["compile", "--inline", sql]) return { sql: plainOut.trim() } } catch (e) { - throw new Error( - `Could not compile inline SQL in any format (JSON, heuristic, or plain text). ` + - `Last error: ${e instanceof Error ? e.message : String(e)}`, - ) + plainRunError = toExecFileError(e) + } + + // bubbleDbtError redacts inline SQL from any err.message fallback — critical + // here because we're spawning `dbt compile --inline ` and Node's + // rejection message embeds the full command line (with the SQL) verbatim. + if (primaryRunError || plainRunError) { + throw new Error(bubbleDbtError("dbt compile inline failed", primaryRunError, plainRunError)) } + + throw new Error("Could not compile inline SQL in any format (JSON, heuristic, or plain text).") } /** Shared: extract compiled SQL from known dbt JSON output formats. */ diff --git a/packages/dbt-tools/test/dbt-cli.test.ts b/packages/dbt-tools/test/dbt-cli.test.ts index e8c7c18eb..b9c81b290 100644 --- a/packages/dbt-tools/test/dbt-cli.test.ts +++ b/packages/dbt-tools/test/dbt-cli.test.ts @@ -149,6 +149,305 @@ describe("execDbtShow", () => { await expect(execDbtShow("SELECT 1")).rejects.toThrow("Could not parse dbt show output in any format") }) + + // --- Bubble real dbt error instead of generic "Could not parse" --- + // + // IMPORTANT — faithful execFile wiring: + // Node's `execFile` passes `stdout`/`stderr` as the 2nd/3rd callback args + // on error, NOT as properties on the error object. The tests below + // intentionally do NOT pre-attach `err.stdout` / `err.stderr` — that + // would let the production code "work" through a mock-only quirk. The + // `run()` wrapper in dbt-cli.ts is responsible for moving the callback + // args onto the rejected error; if it stops doing that, these tests + // must fail. + + test("surfaces real dbt stderr when run fails", async () => { + mockExecFile.mockImplementation((_cmd: string, _args: string[], _opts: any, cb: Function) => { + const err: any = new Error("Command failed: dbt show --inline ...") + err.code = 1 + cb( + err, + "", + "Runtime Error: Failed to read package: No dbt_project.yml found at expected path dbt_packages/dbt_utils/dbt_project.yml", + ) + }) + + // The error already starts with "Runtime Error:" (a dbt category prefix), + // so we intentionally do NOT add a "dbt show failed:" prefix in front. + await expect(execDbtShow("SELECT 1")).rejects.toThrow(/^Runtime Error: Failed to read package/) + }) + + test("prefers structured error event in JSON log over raw stderr", async () => { + const errorLog = JSON.stringify({ + info: { + level: "error", + msg: "Compilation Error: Model 'foo' depends on a node named 'bar' which was not found", + }, + }) + mockExecFile.mockImplementation((_cmd: string, _args: string[], _opts: any, cb: Function) => { + const err: any = new Error("Command failed") + err.code = 1 + cb(err, errorLog, "exit status 1") + }) + + await expect(execDbtShow("SELECT 1")).rejects.toThrow(/Compilation Error.*Model 'foo'/) + }) + + test("recognises top-level { level: 'error' } event shape (no nested info)", async () => { + // extractDbtError handles both `l.info?.level === 'error'` and the + // top-level `l.level === 'error'` shape. This test exercises the latter. + const errorLog = JSON.stringify({ + level: "error", + msg: "Database Error: connection to server lost", + }) + mockExecFile.mockImplementation((_cmd: string, _args: string[], _opts: any, cb: Function) => { + const err: any = new Error("Command failed") + err.code = 1 + cb(err, errorLog, "") + }) + + await expect(execDbtShow("SELECT 1")).rejects.toThrow(/connection to server lost/) + }) + + test("does not surface generic 'Could not parse' when dbt actually crashed", async () => { + mockExecFile.mockImplementation((_cmd: string, _args: string[], _opts: any, cb: Function) => { + const err: any = new Error("Command failed") + err.code = 2 + cb(err, "", "Database Error: connection refused") + }) + + // Positive assertion: the real dbt error must be surfaced. The earlier + // negation-only check would have passed even if some unrelated error + // were thrown; the positive form below is sufficient on its own. + await expect(execDbtShow("SELECT 1")).rejects.toThrow(/Database Error.*connection refused/) + }) + + test("falls back to error message when stderr is empty", async () => { + mockExecFile.mockImplementation((_cmd: string, _args: string[], _opts: any, cb: Function) => { + const err: any = new Error("spawn ENOENT") + err.code = "ENOENT" + cb(err, "", "") + }) + + await expect(execDbtShow("SELECT 1")).rejects.toThrow(/spawn ENOENT|dbt show failed/) + }) + + test("does NOT return rows from a crashed run's stdout (Tier 1/2 skip on error)", async () => { + // If `run()` rejects, the JSON log lines from the crashed stdout MUST + // NOT be fed into Tier 1/Tier 2 heuristics — a crash log can contain + // incidental arrays that `looksLikeRowData` would happily return as + // "rows" (silent wrong data, worse than the original misleading error). + let callCount = 0 + mockExecFile.mockImplementation((_cmd: string, _args: string[], _opts: any, cb: Function) => { + callCount++ + if (callCount === 1) { + // JSON-mode run crashes, but stdout has an incidental array of objects + // nested in a log line. Pre-fix Tier 2 would happily match it. + const err: any = new Error("Command failed") + err.code = 1 + const trapLine = JSON.stringify({ + payload: { incidental_metadata: [{ unrelated: "log_entry" }, { unrelated: "another" }] }, + }) + cb(err, trapLine, "Database Error: connection refused") + } else { + // Plain-text retry also fails so the real error is what bubbles up. + const err: any = new Error("Command failed") + err.code = 1 + cb(err, "", "Database Error: connection refused") + } + }) + + await expect(execDbtShow("SELECT 1")).rejects.toThrow(/Database Error.*connection refused/) + await expect(execDbtShow("SELECT 1")).rejects.not.toThrow(/unrelated/) + }) + + test("Tier 3 recovers: JSON-mode crashes but plain-text succeeds → returns the table", async () => { + // Regression guard: even when the JSON-mode `dbt show --output json` run + // rejects, we still attempt the plain-text retry and return the parsed + // table if it succeeds. Throwing the JSON-mode error here would lose a + // valid recovery path. + let callCount = 0 + mockExecFile.mockImplementation((_cmd: string, _args: string[], _opts: any, cb: Function) => { + callCount++ + if (callCount === 1) { + const err: any = new Error("Command failed") + err.code = 1 + cb(err, "", "some json-mode-specific failure") + } else { + cb(null, ["| id | name |", "| -- | ----- |", "| 1 | Alice |"].join("\n"), "") + } + }) + + const result = await execDbtShow("SELECT id, name FROM users") + expect(result.columnNames).toEqual(["id", "name"]) + expect(result.data).toEqual([{ id: "1", name: "Alice" }]) + }) + + test("does NOT attribute parser failure to 'dbt show failed' when primary succeeded", async () => { + // Regression: when the JSON-mode primary run exits 0 with output we can't + // decode AND the plain-text retry fails for a different reason, the error + // must distinguish the two cases. Throwing "dbt show failed: " misattributes a parser regression as a dbt execution failure. + let callCount = 0 + mockExecFile.mockImplementation((_cmd: string, _args: string[], _opts: any, cb: Function) => { + callCount++ + if (callCount === 1) { + // Primary JSON-mode run succeeds — exit 0, just an unrecognised shape. + cb(null, JSON.stringify({ info: { msg: "done" }, unknown_shape: true }), "") + } else { + // Plain-text retry fails for an unrelated reason. + const err: any = new Error("Command failed") + err.code = 1 + cb(err, "", "some plain-mode-specific failure") + } + }) + + // Capture the rejection once — the mock is stateful (callCount), so + // calling execDbtShow twice would re-enter the mock from a different + // count and exercise a different branch. + const caught = (await execDbtShow("SELECT 1").catch((e) => e)) as Error + expect(caught.message).toMatch(/Could not parse dbt show JSON output, and plain-text fallback failed/) + // The error must NOT carry the "dbt show failed:" prefix — dbt show + // itself succeeded; only the parser + retry failed. + expect(caught.message).not.toMatch(/dbt show failed:/) + }) + + test("picks the LAST level:'error' event when dbt emits multiple", async () => { + // dbt often emits a generic header first ("Encountered an error:") and the + // actionable error second. We want the actionable one. + const errorLog = [ + JSON.stringify({ info: { level: "error", msg: "Encountered an error:" } }), + JSON.stringify({ info: { level: "info", msg: "Some info between" } }), + JSON.stringify({ + info: { level: "error", msg: "Compilation Error in model 'orders': Undefined macro 'foo'" }, + }), + ].join("\n") + mockExecFile.mockImplementation((_cmd: string, _args: string[], _opts: any, cb: Function) => { + const err: any = new Error("Command failed") + err.code = 1 + cb(err, errorLog, "") + }) + + await expect(execDbtShow("SELECT 1")).rejects.toThrow(/Undefined macro 'foo'/) + await expect(execDbtShow("SELECT 1")).rejects.not.toThrow(/Encountered an error:$/) + }) + + test("strips ANSI escape codes from surfaced errors", async () => { + const ansiRed = "\u001b[31m" + const ansiReset = "\u001b[0m" + const errorLog = JSON.stringify({ + info: { level: "error", msg: `${ansiRed}Database Error${ansiReset}: connection refused` }, + }) + mockExecFile.mockImplementation((_cmd: string, _args: string[], _opts: any, cb: Function) => { + const err: any = new Error("Command failed") + err.code = 1 + cb(err, errorLog, "") + }) + + const caught = (await execDbtShow("SELECT 1").catch((e) => e)) as Error + expect(caught.message).toContain("Database Error: connection refused") + expect(caught.message).not.toContain("\u001b[") + }) + + test("does NOT embed full SQL into the surfaced error (no Command-failed leak)", async () => { + // Node's execFile rejection has err.message = "Command failed: + // show --inline '' ...". When no structured event or stderr + // is available, the fallback must surface only the exit status, not the + // full message with embedded SQL. + const sensitiveSql = "SELECT 'PII_TOKEN_abc123' AS secret FROM users WHERE ssn = '999-00-0000'" + mockExecFile.mockImplementation((_cmd: string, _args: string[], _opts: any, cb: Function) => { + // Realistic Node message embeds the args (and so the SQL). + const err: any = new Error( + `Command failed: /usr/local/bin/dbt show --inline ${sensitiveSql} --output json --log-format json`, + ) + err.code = 2 + cb(err, "", "") + }) + + const caught = (await execDbtShow(sensitiveSql).catch((e) => e)) as Error + expect(caught.message).not.toContain("PII_TOKEN_abc123") + expect(caught.message).not.toContain("999-00-0000") + expect(caught.message).toMatch(/dbt show failed: dbt exited with status 2/) + }) + + test("does NOT leak SQL when dbt is killed by signal / timeout", async () => { + // Timeout/signal kills also produce a `Command failed: ` + // message from Node. The redaction must catch this case too, not just + // numeric exit codes. + const sensitiveSql = "SELECT 'leak_canary_xyz' FROM secrets" + mockExecFile.mockImplementation((_cmd: string, _args: string[], _opts: any, cb: Function) => { + const err: any = new Error( + `Command failed: /usr/local/bin/dbt show --inline ${sensitiveSql} --output json --log-format json`, + ) + err.killed = true + err.signal = "SIGTERM" + cb(err, "", "") + }) + + const caught = (await execDbtShow(sensitiveSql).catch((e) => e)) as Error + expect(caught.message).not.toContain("leak_canary_xyz") + expect(caught.message).toMatch(/dbt show failed: dbt killed by signal SIGTERM/) + }) + + test("does NOT double the 'failed:' prefix when dbt's own category prefix is present", async () => { + // dbt's own messages already start with "Database Error:", + // "Compilation Error:", etc. Adding "dbt show failed: " in front yields + // "dbt show failed: Database Error: ..." which is redundant. + mockExecFile.mockImplementation((_cmd: string, _args: string[], _opts: any, cb: Function) => { + const err: any = new Error("Command failed") + err.code = 1 + cb(err, "", "Compilation Error in model 'foo'") + }) + + const caught = (await execDbtShow("SELECT 1").catch((e) => e)) as Error + expect(caught.message).toBe("Compilation Error in model 'foo'") + expect(caught.message).not.toMatch(/dbt show failed: (Compilation|Database|Runtime|Parsing|Validation|Dependency)\s+Error/) + }) + + test("handles Buffer-typed stdout/stderr from execFile", async () => { + // ExecFileError.stdout/stderr is typed `string | Buffer` because Node's + // execFile delivers Buffers when called with encoding: "buffer". Our + // production calls don't set that, but the type widening means callers + // could. Make sure .toString() / parseJsonLines / extractDbtError all + // handle Buffers correctly. + const errorLog = JSON.stringify({ + info: { level: "error", msg: "Database Error: connection refused" }, + }) + mockExecFile.mockImplementation((_cmd: string, _args: string[], _opts: any, cb: Function) => { + const err: any = new Error("Command failed") + err.code = 1 + cb(err, Buffer.from(errorLog), Buffer.from("exit status 1")) + }) + + await expect(execDbtShow("SELECT 1")).rejects.toThrow(/Database Error: connection refused/) + }) + + test("does NOT return malformed data when data.preview is a truthy non-array", async () => { + // Regression for #944: the previewLine match only checks truthiness, so a + // future dbt version emitting `data.preview = {}` would flow into `rows` + // and the downstream `data: rows` field would crash callers that do + // `.map` / `.length`. Treat unexpected shapes as empty rows instead. + const jsonLines = [JSON.stringify({ data: { preview: {} } })].join("\n") + mockExecFile.mockImplementation((_cmd: string, _args: string[], _opts: any, cb: Function) => { + cb(null, jsonLines, "") + }) + + const result = await execDbtShow("SELECT 1") + expect(Array.isArray(result.data)).toBe(true) + expect(result.data).toEqual([]) + expect(result.columnNames).toEqual([]) + }) + + test("also treats numeric data.preview as empty (defence-in-depth)", async () => { + const jsonLines = [JSON.stringify({ data: { preview: 42 } })].join("\n") + mockExecFile.mockImplementation((_cmd: string, _args: string[], _opts: any, cb: Function) => { + cb(null, jsonLines, "") + }) + + const result = await execDbtShow("SELECT 1") + expect(Array.isArray(result.data)).toBe(true) + expect(result.data).toEqual([]) + }) }) // --------------------------------------------------------------------------- @@ -241,6 +540,45 @@ describe("execDbtCompile", () => { const result = await execDbtCompile("my_model") expect(result.sql).toBe("SELECT * FROM final_model") }) + + // --- Real dbt error bubbling (#943) --- + + test("surfaces real dbt stderr when compile fails", async () => { + // Pre-fix: catch { lines = [] } swallowed the real error and the final + // throw embedded Node's generic "Command failed: ..." message. + mockExecFile.mockImplementation((_cmd: string, _args: string[], _opts: any, cb: Function) => { + const err: any = new Error("Command failed: dbt compile --select foo ...") + err.code = 1 + cb(err, "", "Compilation Error: Model 'foo' depends on a node named 'bar' which was not found") + }) + + await expect(execDbtCompile("foo")).rejects.toThrow(/Compilation Error.*Model 'foo'.*depends on a node named 'bar'/) + }) + + test("prefers structured JSON error event over raw stderr", async () => { + const errorLog = JSON.stringify({ + info: { level: "error", msg: "Database Error: relation does not exist" }, + }) + mockExecFile.mockImplementation((_cmd: string, _args: string[], _opts: any, cb: Function) => { + const err: any = new Error("Command failed") + err.code = 1 + cb(err, errorLog, "exit status 1") + }) + + await expect(execDbtCompile("foo")).rejects.toThrow(/Database Error: relation does not exist/) + }) + + test("does not double the 'dbt compile failed:' prefix on dbt category errors", async () => { + mockExecFile.mockImplementation((_cmd: string, _args: string[], _opts: any, cb: Function) => { + const err: any = new Error("Command failed") + err.code = 1 + cb(err, "", "Compilation Error: model not found") + }) + + const caught = (await execDbtCompile("foo").catch((e) => e)) as Error + expect(caught.message).toBe("Compilation Error: model not found") + expect(caught.message).not.toMatch(/dbt compile failed: Compilation Error/) + }) }) // --------------------------------------------------------------------------- @@ -261,6 +599,53 @@ describe("execDbtCompileInline", () => { const result = await execDbtCompileInline("SELECT * FROM {{ ref('customers') }}") expect(result.sql).toBe("SELECT id, name FROM raw.customers") }) + + // --- Real dbt error bubbling (#943) + SQL redaction (#945) --- + + test("surfaces real dbt error when inline compile fails", async () => { + mockExecFile.mockImplementation((_cmd: string, _args: string[], _opts: any, cb: Function) => { + const err: any = new Error("Command failed: dbt compile --inline 'SELECT 1' ...") + err.code = 1 + cb(err, "", "Compilation Error: Undefined macro 'unknown_macro'") + }) + + await expect(execDbtCompileInline("SELECT 1")).rejects.toThrow(/Compilation Error.*Undefined macro/) + }) + + test("does NOT embed inline SQL into the surfaced error (no Command-failed leak)", async () => { + // Regression for #945: pre-fix the throw used `e.message` directly, + // which is Node's "Command failed: compile --inline '' …" format — leaking the user's full query into logs and UI. + const sensitiveSql = "SELECT 'PII_TOKEN_compile_xyz' AS secret FROM users WHERE ssn = '111-22-3333'" + mockExecFile.mockImplementation((_cmd: string, _args: string[], _opts: any, cb: Function) => { + const err: any = new Error( + `Command failed: /usr/local/bin/dbt compile --inline ${sensitiveSql} --output json --log-format json`, + ) + err.code = 2 + cb(err, "", "") + }) + + const caught = (await execDbtCompileInline(sensitiveSql).catch((e) => e)) as Error + expect(caught.message).not.toContain("PII_TOKEN_compile_xyz") + expect(caught.message).not.toContain("111-22-3333") + expect(caught.message).toMatch(/dbt compile inline failed: dbt exited with status 2/) + }) + + test("does NOT leak SQL when inline compile is killed by signal / timeout", async () => { + const sensitiveSql = "SELECT 'compile_leak_canary' FROM secrets" + mockExecFile.mockImplementation((_cmd: string, _args: string[], _opts: any, cb: Function) => { + const err: any = new Error( + `Command failed: /usr/local/bin/dbt compile --inline ${sensitiveSql} --output json --log-format json`, + ) + err.killed = true + err.signal = "SIGTERM" + cb(err, "", "") + }) + + const caught = (await execDbtCompileInline(sensitiveSql).catch((e) => e)) as Error + expect(caught.message).not.toContain("compile_leak_canary") + expect(caught.message).toMatch(/dbt compile inline failed: dbt killed by signal SIGTERM/) + }) }) // ---------------------------------------------------------------------------