From c6e1de3d59bee623e7929e4a7bd0c378a4ad4566 Mon Sep 17 00:00:00 2001 From: Abigail Atheryon Date: Fri, 8 May 2026 07:34:07 +1000 Subject: [PATCH 1/2] fix(gemini-adapter): detect new ~/.gemini/oauth_creds.json auth path gemini-cli >=0.30 stores OAuth credentials at ~/.gemini/oauth_creds.json instead of the legacy ~/.config/gemini/ directory. The benchmark adapter's availability check now succeeds for users on recent gemini-cli releases who have authenticated via interactive login. Both paths are accepted so users on older versions still work. --- test/helpers/providers/gemini.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/helpers/providers/gemini.ts b/test/helpers/providers/gemini.ts index 4395470397..5e7abba13a 100644 --- a/test/helpers/providers/gemini.ts +++ b/test/helpers/providers/gemini.ts @@ -22,8 +22,10 @@ export class GeminiAdapter implements ProviderAdapter { if (res.status !== 0) { return { ok: false, reason: 'gemini CLI not found on PATH. Install per https://github.com/google-gemini/gemini-cli' }; } - const cfgDir = path.join(os.homedir(), '.config', 'gemini'); - const hasCfg = fs.existsSync(cfgDir); + const legacyCfgDir = path.join(os.homedir(), '.config', 'gemini'); + const newCfgDir = path.join(os.homedir(), '.gemini'); + const newOauth = path.join(newCfgDir, 'oauth_creds.json'); + const hasCfg = fs.existsSync(legacyCfgDir) || fs.existsSync(newOauth); const hasKey = !!process.env.GOOGLE_API_KEY; if (!hasCfg && !hasKey) { return { ok: false, reason: 'No Gemini auth found. Log in via `gemini login` or export GOOGLE_API_KEY.' }; From 4b8bc2984111a4d522290ea0bfab6c3cf272faa0 Mon Sep 17 00:00:00 2001 From: Abigail Atheryon Date: Fri, 8 May 2026 08:51:00 +1000 Subject: [PATCH 2/2] feat(gemini-adapter): prefer HTTP API when GEMINI_API_KEY is set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The OAuth tier hits hard daily quotas on gemini-2.5-pro after a handful of calls, and the CLI's stream-json output reports 0/0 tokens for OAuth runs (so cost numbers were always $0.0000). Switch to the Generative Language HTTP API directly when GEMINI_API_KEY (or GOOGLE_API_KEY) is set — paid quotas, full usage data including reasoning ("thoughts") tokens which are folded into output for accurate cost estimation. CLI fallback retained for free-OAuth users with no API key, so /benchmark still works out of the box on a fresh install. Auth precedence: 1. GEMINI_API_KEY → HTTP path (preferred) 2. GOOGLE_API_KEY → HTTP path 3. ~/.gemini/oauth_creds.json or ~/.config/gemini/ → CLI path --- test/helpers/providers/gemini.ts | 138 +++++++++++++++++++++++++------ 1 file changed, 111 insertions(+), 27 deletions(-) diff --git a/test/helpers/providers/gemini.ts b/test/helpers/providers/gemini.ts index 5e7abba13a..feb6c4596f 100644 --- a/test/helpers/providers/gemini.ts +++ b/test/helpers/providers/gemini.ts @@ -6,37 +6,132 @@ import * as path from 'path'; import * as os from 'os'; /** - * Gemini adapter — wraps the `gemini` CLI. + * Gemini adapter — prefers the Generative Language HTTP API when an API key + * is set, falls back to the `gemini` CLI for OAuth-only users. * - * Gemini CLI auth comes from either ~/.config/gemini/ or GOOGLE_API_KEY. Output - * format is NDJSON with `message`/`tool_use`/`result` events when `--output-format - * stream-json` is requested. This adapter uses a single-response form for simplicity - * in benchmarks; richer streaming lives in gemini-session-runner.ts. + * Why two paths: the OAuth tier hits hard daily quotas on gemini-2.5-pro + * after a handful of calls, and the CLI's stream-json output reports 0/0 + * tokens for OAuth runs. The HTTP path with GEMINI_API_KEY bypasses both — + * paid quotas and full usage data. CLI fallback keeps personal-tier free-OAuth + * users working out of the box. + * + * Auth precedence: + * 1. GEMINI_API_KEY → HTTP path (preferred) + * 2. GOOGLE_API_KEY → HTTP path + * 3. ~/.gemini/oauth_creds.json or ~/.config/gemini/ → CLI path */ +const GENLANG_BASE = 'https://generativelanguage.googleapis.com/v1beta'; +const DEFAULT_MODEL = 'gemini-2.5-pro'; + +interface GenLangResponse { + candidates?: Array<{ content?: { parts?: Array<{ text?: string }> } }>; + usageMetadata?: { + promptTokenCount?: number; + candidatesTokenCount?: number; + thoughtsTokenCount?: number; + cachedContentTokenCount?: number; + }; + modelVersion?: string; + error?: { code?: number; message?: string; status?: string }; +} + export class GeminiAdapter implements ProviderAdapter { readonly name = 'gemini'; readonly family = 'gemini' as const; async available(): Promise { + if (this.apiKey()) return { ok: true }; + const res = spawnSync('sh', ['-c', 'command -v gemini'], { timeout: 2000 }); if (res.status !== 0) { - return { ok: false, reason: 'gemini CLI not found on PATH. Install per https://github.com/google-gemini/gemini-cli' }; + return { ok: false, reason: 'gemini CLI not found on PATH. Install per https://github.com/google-gemini/gemini-cli, or export GEMINI_API_KEY.' }; } const legacyCfgDir = path.join(os.homedir(), '.config', 'gemini'); - const newCfgDir = path.join(os.homedir(), '.gemini'); - const newOauth = path.join(newCfgDir, 'oauth_creds.json'); - const hasCfg = fs.existsSync(legacyCfgDir) || fs.existsSync(newOauth); - const hasKey = !!process.env.GOOGLE_API_KEY; - if (!hasCfg && !hasKey) { - return { ok: false, reason: 'No Gemini auth found. Log in via `gemini login` or export GOOGLE_API_KEY.' }; + const newOauth = path.join(os.homedir(), '.gemini', 'oauth_creds.json'); + if (!fs.existsSync(legacyCfgDir) && !fs.existsSync(newOauth)) { + return { ok: false, reason: 'No Gemini auth found. Log in via `gemini` interactive session, or export GEMINI_API_KEY.' }; } return { ok: true }; } async run(opts: RunOpts): Promise { + return this.apiKey() ? this.runHttp(opts) : this.runCli(opts); + } + + estimateCost(tokens: { input: number; output: number; cached?: number }, model?: string): number { + return estimateCostUsd(tokens, model ?? DEFAULT_MODEL); + } + + private apiKey(): string | undefined { + return process.env.GEMINI_API_KEY ?? process.env.GOOGLE_API_KEY; + } + + private async runHttp(opts: RunOpts): Promise { + const start = Date.now(); + const key = this.apiKey()!; + const model = opts.model ?? DEFAULT_MODEL; + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), opts.timeoutMs); + + try { + const res = await fetch(`${GENLANG_BASE}/models/${model}:generateContent?key=${encodeURIComponent(key)}`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ contents: [{ parts: [{ text: opts.prompt }] }] }), + signal: controller.signal, + }); + + const durationMs = Date.now() - start; + const bodyText = await res.text(); + + if (!res.ok) { + const reason = bodyText.slice(0, 400) || `${res.status} ${res.statusText}`; + if (res.status === 401 || res.status === 403) { + return this.emptyResult(durationMs, { code: 'auth', reason }, model); + } + if (res.status === 429) { + return this.emptyResult(durationMs, { code: 'rate_limit', reason }, model); + } + return this.emptyResult(durationMs, { code: 'unknown', reason }, model); + } + + const data = JSON.parse(bodyText) as GenLangResponse; + const output = (data.candidates?.[0]?.content?.parts ?? []) + .map((p) => p.text ?? '') + .join(''); + + const promptTokens = data.usageMetadata?.promptTokenCount ?? 0; + const candidates = data.usageMetadata?.candidatesTokenCount ?? 0; + // Reasoning ("thoughts") tokens are billed as output by Google. Fold them in + // so cost estimation matches the real bill. + const thoughts = data.usageMetadata?.thoughtsTokenCount ?? 0; + const cached = data.usageMetadata?.cachedContentTokenCount; + + return { + output, + tokens: { + input: promptTokens, + output: candidates + thoughts, + ...(cached !== undefined ? { cached } : {}), + }, + durationMs, + toolCalls: 0, + modelUsed: data.modelVersion ?? model, + }; + } catch (err: unknown) { + const durationMs = Date.now() - start; + const e = err as { name?: string; message?: string }; + if (e.name === 'AbortError') { + return this.emptyResult(durationMs, { code: 'timeout', reason: `exceeded ${opts.timeoutMs}ms` }, model); + } + return this.emptyResult(durationMs, { code: 'unknown', reason: (e.message ?? 'unknown').slice(0, 400) }, model); + } finally { + clearTimeout(timer); + } + } + + private async runCli(opts: RunOpts): Promise { const start = Date.now(); - // Default to --yolo (non-interactive) and stream-json output so we can parse - // tokens + tool calls. Callers can override via extraArgs. const args = ['-p', opts.prompt, '--output-format', 'stream-json', '--yolo']; if (opts.model) args.push('--model', opts.model); if (opts.extraArgs) args.push(...opts.extraArgs); @@ -54,7 +149,7 @@ export class GeminiAdapter implements ProviderAdapter { tokens: parsed.tokens, durationMs: Date.now() - start, toolCalls: parsed.toolCalls, - modelUsed: parsed.modelUsed || opts.model || 'gemini-2.5-pro', + modelUsed: parsed.modelUsed || opts.model || DEFAULT_MODEL, }; } catch (err: unknown) { const durationMs = Date.now() - start; @@ -73,17 +168,6 @@ export class GeminiAdapter implements ProviderAdapter { } } - estimateCost(tokens: { input: number; output: number; cached?: number }, model?: string): number { - return estimateCostUsd(tokens, model ?? 'gemini-2.5-pro'); - } - - /** - * Parse gemini NDJSON stream events: - * init → session id (discarded here) - * message { delta: true, text } → concat to output - * tool_use { name } → increment toolCalls - * result { usage: { input_token_count, output_token_count } } → tokens - */ private parseStreamJson(raw: string): { output: string; tokens: { input: number; output: number }; toolCalls: number; modelUsed?: string } { let output = ''; let input = 0; @@ -118,7 +202,7 @@ export class GeminiAdapter implements ProviderAdapter { tokens: { input: 0, output: 0 }, durationMs, toolCalls: 0, - modelUsed: model ?? 'gemini-2.5-pro', + modelUsed: model ?? DEFAULT_MODEL, error, }; }