From c6e1de3d59bee623e7929e4a7bd0c378a4ad4566 Mon Sep 17 00:00:00 2001
From: Abigail Atheryon <abi@atheryon.ai>
Date: Fri, 8 May 2026 07:34:07 +1000
Subject: [PATCH 1/2] fix(gemini-adapter): detect new
 ~/.gemini/oauth_creds.json auth path

gemini-cli >=0.30 stores OAuth credentials at ~/.gemini/oauth_creds.json
instead of the legacy ~/.config/gemini/ directory. The benchmark adapter's
availability check now succeeds for users on recent gemini-cli releases
who have authenticated via interactive login.

Both paths are accepted so users on older versions still work.
---
 test/helpers/providers/gemini.ts | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/test/helpers/providers/gemini.ts b/test/helpers/providers/gemini.ts
index 4395470397..5e7abba13a 100644
--- a/test/helpers/providers/gemini.ts
+++ b/test/helpers/providers/gemini.ts
@@ -22,8 +22,10 @@ export class GeminiAdapter implements ProviderAdapter {
     if (res.status !== 0) {
       return { ok: false, reason: 'gemini CLI not found on PATH. Install per https://github.com/google-gemini/gemini-cli' };
     }
-    const cfgDir = path.join(os.homedir(), '.config', 'gemini');
-    const hasCfg = fs.existsSync(cfgDir);
+    const legacyCfgDir = path.join(os.homedir(), '.config', 'gemini');
+    const newCfgDir = path.join(os.homedir(), '.gemini');
+    const newOauth = path.join(newCfgDir, 'oauth_creds.json');
+    const hasCfg = fs.existsSync(legacyCfgDir) || fs.existsSync(newOauth);
     const hasKey = !!process.env.GOOGLE_API_KEY;
     if (!hasCfg && !hasKey) {
       return { ok: false, reason: 'No Gemini auth found. Log in via `gemini login` or export GOOGLE_API_KEY.' };

From 4b8bc2984111a4d522290ea0bfab6c3cf272faa0 Mon Sep 17 00:00:00 2001
From: Abigail Atheryon <abi@atheryon.ai>
Date: Fri, 8 May 2026 08:51:00 +1000
Subject: [PATCH 2/2] feat(gemini-adapter): prefer HTTP API when GEMINI_API_KEY
 is set
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The OAuth tier hits hard daily quotas on gemini-2.5-pro after a handful
of calls, and the CLI's stream-json output reports 0/0 tokens for
OAuth runs (so cost numbers were always $0.0000). Switch to the
Generative Language HTTP API directly when GEMINI_API_KEY (or
GOOGLE_API_KEY) is set — paid quotas, full usage data including
reasoning ("thoughts") tokens which are folded into output for accurate
cost estimation.

CLI fallback retained for free-OAuth users with no API key, so /benchmark
still works out of the box on a fresh install.

Auth precedence:
  1. GEMINI_API_KEY  → HTTP path (preferred)
  2. GOOGLE_API_KEY  → HTTP path
  3. ~/.gemini/oauth_creds.json or ~/.config/gemini/  → CLI path
---
 test/helpers/providers/gemini.ts | 138 +++++++++++++++++++++++++------
 1 file changed, 111 insertions(+), 27 deletions(-)

diff --git a/test/helpers/providers/gemini.ts b/test/helpers/providers/gemini.ts
index 5e7abba13a..feb6c4596f 100644
--- a/test/helpers/providers/gemini.ts
+++ b/test/helpers/providers/gemini.ts
@@ -6,37 +6,132 @@ import * as path from 'path';
 import * as os from 'os';
 
 /**
- * Gemini adapter — wraps the `gemini` CLI.
+ * Gemini adapter — prefers the Generative Language HTTP API when an API key
+ * is set, falls back to the `gemini` CLI for OAuth-only users.
  *
- * Gemini CLI auth comes from either ~/.config/gemini/ or GOOGLE_API_KEY. Output
- * format is NDJSON with `message`/`tool_use`/`result` events when `--output-format
- * stream-json` is requested. This adapter uses a single-response form for simplicity
- * in benchmarks; richer streaming lives in gemini-session-runner.ts.
+ * Why two paths: the OAuth tier hits hard daily quotas on gemini-2.5-pro
+ * after a handful of calls, and the CLI's stream-json output reports 0/0
+ * tokens for OAuth runs. The HTTP path with GEMINI_API_KEY bypasses both —
+ * paid quotas and full usage data. CLI fallback keeps personal-tier free-OAuth
+ * users working out of the box.
+ *
+ * Auth precedence:
+ *   1. GEMINI_API_KEY  → HTTP path (preferred)
+ *   2. GOOGLE_API_KEY  → HTTP path
+ *   3. ~/.gemini/oauth_creds.json or ~/.config/gemini/  → CLI path
  */
+const GENLANG_BASE = 'https://generativelanguage.googleapis.com/v1beta';
+const DEFAULT_MODEL = 'gemini-2.5-pro';
+
+interface GenLangResponse {
+  candidates?: Array<{ content?: { parts?: Array<{ text?: string }> } }>;
+  usageMetadata?: {
+    promptTokenCount?: number;
+    candidatesTokenCount?: number;
+    thoughtsTokenCount?: number;
+    cachedContentTokenCount?: number;
+  };
+  modelVersion?: string;
+  error?: { code?: number; message?: string; status?: string };
+}
+
 export class GeminiAdapter implements ProviderAdapter {
   readonly name = 'gemini';
   readonly family = 'gemini' as const;
 
   async available(): Promise<AvailabilityCheck> {
+    if (this.apiKey()) return { ok: true };
+
     const res = spawnSync('sh', ['-c', 'command -v gemini'], { timeout: 2000 });
     if (res.status !== 0) {
-      return { ok: false, reason: 'gemini CLI not found on PATH. Install per https://github.com/google-gemini/gemini-cli' };
+      return { ok: false, reason: 'gemini CLI not found on PATH. Install per https://github.com/google-gemini/gemini-cli, or export GEMINI_API_KEY.' };
     }
     const legacyCfgDir = path.join(os.homedir(), '.config', 'gemini');
-    const newCfgDir = path.join(os.homedir(), '.gemini');
-    const newOauth = path.join(newCfgDir, 'oauth_creds.json');
-    const hasCfg = fs.existsSync(legacyCfgDir) || fs.existsSync(newOauth);
-    const hasKey = !!process.env.GOOGLE_API_KEY;
-    if (!hasCfg && !hasKey) {
-      return { ok: false, reason: 'No Gemini auth found. Log in via `gemini login` or export GOOGLE_API_KEY.' };
+    const newOauth = path.join(os.homedir(), '.gemini', 'oauth_creds.json');
+    if (!fs.existsSync(legacyCfgDir) && !fs.existsSync(newOauth)) {
+      return { ok: false, reason: 'No Gemini auth found. Log in via `gemini` interactive session, or export GEMINI_API_KEY.' };
     }
     return { ok: true };
   }
 
   async run(opts: RunOpts): Promise<RunResult> {
+    return this.apiKey() ? this.runHttp(opts) : this.runCli(opts);
+  }
+
+  estimateCost(tokens: { input: number; output: number; cached?: number }, model?: string): number {
+    return estimateCostUsd(tokens, model ?? DEFAULT_MODEL);
+  }
+
+  private apiKey(): string | undefined {
+    return process.env.GEMINI_API_KEY ?? process.env.GOOGLE_API_KEY;
+  }
+
+  private async runHttp(opts: RunOpts): Promise<RunResult> {
+    const start = Date.now();
+    const key = this.apiKey()!;
+    const model = opts.model ?? DEFAULT_MODEL;
+    const controller = new AbortController();
+    const timer = setTimeout(() => controller.abort(), opts.timeoutMs);
+
+    try {
+      const res = await fetch(`${GENLANG_BASE}/models/${model}:generateContent?key=${encodeURIComponent(key)}`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ contents: [{ parts: [{ text: opts.prompt }] }] }),
+        signal: controller.signal,
+      });
+
+      const durationMs = Date.now() - start;
+      const bodyText = await res.text();
+
+      if (!res.ok) {
+        const reason = bodyText.slice(0, 400) || `${res.status} ${res.statusText}`;
+        if (res.status === 401 || res.status === 403) {
+          return this.emptyResult(durationMs, { code: 'auth', reason }, model);
+        }
+        if (res.status === 429) {
+          return this.emptyResult(durationMs, { code: 'rate_limit', reason }, model);
+        }
+        return this.emptyResult(durationMs, { code: 'unknown', reason }, model);
+      }
+
+      const data = JSON.parse(bodyText) as GenLangResponse;
+      const output = (data.candidates?.[0]?.content?.parts ?? [])
+        .map((p) => p.text ?? '')
+        .join('');
+
+      const promptTokens = data.usageMetadata?.promptTokenCount ?? 0;
+      const candidates = data.usageMetadata?.candidatesTokenCount ?? 0;
+      // Reasoning ("thoughts") tokens are billed as output by Google. Fold them in
+      // so cost estimation matches the real bill.
+      const thoughts = data.usageMetadata?.thoughtsTokenCount ?? 0;
+      const cached = data.usageMetadata?.cachedContentTokenCount;
+
+      return {
+        output,
+        tokens: {
+          input: promptTokens,
+          output: candidates + thoughts,
+          ...(cached !== undefined ? { cached } : {}),
+        },
+        durationMs,
+        toolCalls: 0,
+        modelUsed: data.modelVersion ?? model,
+      };
+    } catch (err: unknown) {
+      const durationMs = Date.now() - start;
+      const e = err as { name?: string; message?: string };
+      if (e.name === 'AbortError') {
+        return this.emptyResult(durationMs, { code: 'timeout', reason: `exceeded ${opts.timeoutMs}ms` }, model);
+      }
+      return this.emptyResult(durationMs, { code: 'unknown', reason: (e.message ?? 'unknown').slice(0, 400) }, model);
+    } finally {
+      clearTimeout(timer);
+    }
+  }
+
+  private async runCli(opts: RunOpts): Promise<RunResult> {
     const start = Date.now();
-    // Default to --yolo (non-interactive) and stream-json output so we can parse
-    // tokens + tool calls. Callers can override via extraArgs.
     const args = ['-p', opts.prompt, '--output-format', 'stream-json', '--yolo'];
     if (opts.model) args.push('--model', opts.model);
     if (opts.extraArgs) args.push(...opts.extraArgs);
@@ -54,7 +149,7 @@ export class GeminiAdapter implements ProviderAdapter {
         tokens: parsed.tokens,
         durationMs: Date.now() - start,
         toolCalls: parsed.toolCalls,
-        modelUsed: parsed.modelUsed || opts.model || 'gemini-2.5-pro',
+        modelUsed: parsed.modelUsed || opts.model || DEFAULT_MODEL,
       };
     } catch (err: unknown) {
       const durationMs = Date.now() - start;
@@ -73,17 +168,6 @@ export class GeminiAdapter implements ProviderAdapter {
     }
   }
 
-  estimateCost(tokens: { input: number; output: number; cached?: number }, model?: string): number {
-    return estimateCostUsd(tokens, model ?? 'gemini-2.5-pro');
-  }
-
-  /**
-   * Parse gemini NDJSON stream events:
-   *   init  → session id (discarded here)
-   *   message { delta: true, text } → concat to output
-   *   tool_use { name } → increment toolCalls
-   *   result { usage: { input_token_count, output_token_count } } → tokens
-   */
   private parseStreamJson(raw: string): { output: string; tokens: { input: number; output: number }; toolCalls: number; modelUsed?: string } {
     let output = '';
     let input = 0;
@@ -118,7 +202,7 @@ export class GeminiAdapter implements ProviderAdapter {
       tokens: { input: 0, output: 0 },
       durationMs,
       toolCalls: 0,
-      modelUsed: model ?? 'gemini-2.5-pro',
+      modelUsed: model ?? DEFAULT_MODEL,
       error,
     };
   }