From e1d3591c29b4e8f36e6c470dbdcef05dff263ea7 Mon Sep 17 00:00:00 2001
From: Szymon Janikowski <szymon.janikowski@itlibrium.com>
Date: Sat, 9 May 2026 12:15:11 +0200
Subject: [PATCH] feat(skill): bundle OAuth scripts into nasde-benchmark-runner
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Users who installed nasde via `pip install nasde-toolkit` previously hit
broken paths: the skill referenced `scripts/export_oauth_token.sh` but
end users have no repo checkout. After `nasde install-skills` the scripts
now ship at `~/.claude/skills/nasde-benchmark-runner/scripts/`.

- Copy six OAuth scripts (3 agents x .sh+.ps1) into the skill dir.
  hatch force-include picks them up automatically because the entire
  skill dir is bundled to nasde_toolkit/_bundled_skills/...
- Repo `scripts/` stays as the public-facing copy — existing external
  links continue to resolve.
- Drift guard: tests/test_skills_installer.py asserts byte-identical
  copies between scripts/ and the skill bundle, with an actionable
  error message pointing at the fix.
- SKILL.md auth tables: paths now reference <SKILL_SCRIPTS> placeholder
  with explicit absolute-path resolution per OS scope.

E2E verified locally on macOS: fresh wheel build + uv tool install +
nasde install-skills + source from bundled location = working
CLAUDE_CODE_OAUTH_TOKEN. 159/159 tests green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../skills/nasde-benchmark-runner/SKILL.md    | 50 +++++++++------
 .../scripts/export_codex_oauth_token.ps1      | 61 +++++++++++++++++++
 .../scripts/export_codex_oauth_token.sh       | 57 +++++++++++++++++
 .../scripts/export_gemini_oauth_token.ps1     | 59 ++++++++++++++++++
 .../scripts/export_gemini_oauth_token.sh      | 52 ++++++++++++++++
 .../scripts/export_oauth_token.ps1            | 40 ++++++++++++
 .../scripts/export_oauth_token.sh             | 46 ++++++++++++++
 tests/test_skills_installer.py                | 30 +++++++++
 8 files changed, 377 insertions(+), 18 deletions(-)
 create mode 100644 .claude/skills/nasde-benchmark-runner/scripts/export_codex_oauth_token.ps1
 create mode 100755 .claude/skills/nasde-benchmark-runner/scripts/export_codex_oauth_token.sh
 create mode 100644 .claude/skills/nasde-benchmark-runner/scripts/export_gemini_oauth_token.ps1
 create mode 100755 .claude/skills/nasde-benchmark-runner/scripts/export_gemini_oauth_token.sh
 create mode 100644 .claude/skills/nasde-benchmark-runner/scripts/export_oauth_token.ps1
 create mode 100755 .claude/skills/nasde-benchmark-runner/scripts/export_oauth_token.sh
diff --git a/.claude/skills/nasde-benchmark-runner/SKILL.md b/.claude/skills/nasde-benchmark-runner/SKILL.md
index cd3bc4c..b030283 100644
--- a/.claude/skills/nasde-benchmark-runner/SKILL.md
+++ b/.claude/skills/nasde-benchmark-runner/SKILL.md
@@ -28,6 +28,16 @@ Before running any benchmark, set up authentication tokens for the agents you pl
 
 Then detect their OS and pick the matching script row from the table below. On Windows, also ask whether they're in **PowerShell** or **WSL** (cmd.exe is not directly supported — see "Windows: cmd.exe" below).
 
+### Where the auth scripts live
+
+The OAuth scripts ship inside this skill. After `nasde install-skills` they are at:
+
+- **User scope** (default): `~/.claude/skills/nasde-benchmark-runner/scripts/` (macOS/Linux/WSL) or `%USERPROFILE%\.claude\skills\nasde-benchmark-runner\scripts\` (Windows PowerShell)
+- **Project scope**: `<project>/.claude/skills/nasde-benchmark-runner/scripts/` (if installed with `nasde install-skills --scope project`)
+- **Editable nasde checkout** (devs only): `<repo>/scripts/` — same files, mirrored from the skill bundle
+
+Below, `<SKILL_SCRIPTS>` is shorthand for whichever absolute path applies. Resolve it once, then substitute it in every command. Verify the path with `ls <SKILL_SCRIPTS>` before telling the user to source anything — if the directory is missing, they need to run `nasde install-skills` first.
+
 ### Step 2 — Run the right script per agent × OS
 
 Priority order: **Claude → Codex → Gemini.** Claude is required even for non-Claude variants when `[evaluation] backend = "claude"` (default), because the assessment evaluator spawns `claude` CLI as a subprocess.
@@ -36,10 +46,10 @@ Priority order: **Claude → Codex → Gemini.** Claude is required even for non
 
 | OS / shell | OAuth (subscription) | API key |
 |---|---|---|
-| macOS | `source scripts/export_oauth_token.sh` (reads Keychain entry "Claude Code-credentials") | `export ANTHROPIC_API_KEY=sk-ant-...` |
-| Linux | `source scripts/export_oauth_token.sh` (reads `~/.claude/.credentials.json`) | `export ANTHROPIC_API_KEY=sk-ant-...` |
-| Windows PowerShell | `. .\scripts\export_oauth_token.ps1` (reads `%USERPROFILE%\.claude\.credentials.json`) | `$env:ANTHROPIC_API_KEY = 'sk-ant-...'` |
-| Windows WSL (Ubuntu) | `source scripts/export_oauth_token.sh` (Linux path) | `export ANTHROPIC_API_KEY=sk-ant-...` |
+| macOS | `source <SKILL_SCRIPTS>/export_oauth_token.sh` (reads Keychain entry "Claude Code-credentials") | `export ANTHROPIC_API_KEY=sk-ant-...` |
+| Linux | `source <SKILL_SCRIPTS>/export_oauth_token.sh` (reads `~/.claude/.credentials.json`) | `export ANTHROPIC_API_KEY=sk-ant-...` |
+| Windows PowerShell | `. <SKILL_SCRIPTS>\export_oauth_token.ps1` (reads `%USERPROFILE%\.claude\.credentials.json`) | `$env:ANTHROPIC_API_KEY = 'sk-ant-...'` |
+| Windows WSL (Ubuntu) | `source <SKILL_SCRIPTS>/export_oauth_token.sh` (Linux path; resolve `<SKILL_SCRIPTS>` from your WSL home, not the Windows host's) | `export ANTHROPIC_API_KEY=sk-ant-...` |
 
 Prerequisite for OAuth: `claude` CLI installed and `claude` ran once to log in.
 
@@ -49,10 +59,10 @@ The script exports `CLAUDE_CODE_OAUTH_TOKEN`. This is required for both Claude v
 
 | OS / shell | OAuth (ChatGPT subscription) | API key |
 |---|---|---|
-| macOS | `codex login` once, then `source scripts/export_codex_oauth_token.sh` | `export CODEX_API_KEY=sk-proj-...` (or `OPENAI_API_KEY`) |
-| Linux | `codex login` once, then `source scripts/export_codex_oauth_token.sh` | `export CODEX_API_KEY=sk-proj-...` |
-| Windows PowerShell | `codex login` once, then `. .\scripts\export_codex_oauth_token.ps1` | `$env:CODEX_API_KEY = 'sk-proj-...'` |
-| Windows WSL (Ubuntu) | `codex login` once, then `source scripts/export_codex_oauth_token.sh` | `export CODEX_API_KEY=sk-proj-...` |
+| macOS | `codex login` once, then `source <SKILL_SCRIPTS>/export_codex_oauth_token.sh` | `export CODEX_API_KEY=sk-proj-...` (or `OPENAI_API_KEY`) |
+| Linux | `codex login` once, then `source <SKILL_SCRIPTS>/export_codex_oauth_token.sh` | `export CODEX_API_KEY=sk-proj-...` |
+| Windows PowerShell | `codex login` once, then `. <SKILL_SCRIPTS>\export_codex_oauth_token.ps1` | `$env:CODEX_API_KEY = 'sk-proj-...'` |
+| Windows WSL (Ubuntu) | `codex login` once, then `source <SKILL_SCRIPTS>/export_codex_oauth_token.sh` | `export CODEX_API_KEY=sk-proj-...` |
 
 The OAuth scripts only **validate** `~/.codex/auth.json` (or `%USERPROFILE%\.codex\auth.json`) — Harbor injects the file into the sandbox automatically. API key always takes priority over OAuth when both are present.
 
@@ -60,27 +70,31 @@ The OAuth scripts only **validate** `~/.codex/auth.json` (or `%USERPROFILE%\.cod
 
 | OS / shell | OAuth (Google account) | API key |
 |---|---|---|
-| macOS | `gemini login` once, then `source scripts/export_gemini_oauth_token.sh` | `export GEMINI_API_KEY=...` |
-| Linux | `gemini login` once, then `source scripts/export_gemini_oauth_token.sh` | `export GEMINI_API_KEY=...` |
-| Windows PowerShell | `gemini login` once, then `. .\scripts\export_gemini_oauth_token.ps1` | `$env:GEMINI_API_KEY = '...'` |
-| Windows WSL (Ubuntu) | `gemini login` once, then `source scripts/export_gemini_oauth_token.sh` | `export GEMINI_API_KEY=...` |
+| macOS | `gemini login` once, then `source <SKILL_SCRIPTS>/export_gemini_oauth_token.sh` | `export GEMINI_API_KEY=...` |
+| Linux | `gemini login` once, then `source <SKILL_SCRIPTS>/export_gemini_oauth_token.sh` | `export GEMINI_API_KEY=...` |
+| Windows PowerShell | `gemini login` once, then `. <SKILL_SCRIPTS>\export_gemini_oauth_token.ps1` | `$env:GEMINI_API_KEY = '...'` |
+| Windows WSL (Ubuntu) | `gemini login` once, then `source <SKILL_SCRIPTS>/export_gemini_oauth_token.sh` | `export GEMINI_API_KEY=...` |
 
 The OAuth scripts export `GEMINI_OAUTH_CREDS` (the raw JSON) — `ConfigurableGemini` reads that env var and injects credentials into the sandbox. API key always takes priority over OAuth.
 
 ### Combined setup for cross-agent runs
 
+Resolve `<SKILL_SCRIPTS>` first, then run all three.
+
 **macOS / Linux / Windows WSL:**
 ```bash
-source scripts/export_oauth_token.sh         # Claude (subscription)
-source scripts/export_codex_oauth_token.sh   # Codex (subscription) — or: export CODEX_API_KEY=...
-source scripts/export_gemini_oauth_token.sh  # Gemini (Google account) — or: export GEMINI_API_KEY=...
+SKILL_SCRIPTS=~/.claude/skills/nasde-benchmark-runner/scripts   # adjust if --scope project
+source $SKILL_SCRIPTS/export_oauth_token.sh         # Claude (subscription)
+source $SKILL_SCRIPTS/export_codex_oauth_token.sh   # Codex (subscription) — or: export CODEX_API_KEY=...
+source $SKILL_SCRIPTS/export_gemini_oauth_token.sh  # Gemini (Google account) — or: export GEMINI_API_KEY=...
 ```
 
 **Windows PowerShell:**
 ```powershell
-. .\scripts\export_oauth_token.ps1
-. .\scripts\export_codex_oauth_token.ps1
-. .\scripts\export_gemini_oauth_token.ps1
+$SkillScripts = "$env:USERPROFILE\.claude\skills\nasde-benchmark-runner\scripts"
+. "$SkillScripts\export_oauth_token.ps1"
+. "$SkillScripts\export_codex_oauth_token.ps1"
+. "$SkillScripts\export_gemini_oauth_token.ps1"
 ```
 
 ### Windows: cmd.exe
diff --git a/.claude/skills/nasde-benchmark-runner/scripts/export_codex_oauth_token.ps1 b/.claude/skills/nasde-benchmark-runner/scripts/export_codex_oauth_token.ps1
new file mode 100644
index 0000000..589cd45
--- /dev/null
+++ b/.claude/skills/nasde-benchmark-runner/scripts/export_codex_oauth_token.ps1
@@ -0,0 +1,61 @@
+# Validate Codex OAuth auth.json for ChatGPT subscription-based authentication.
+#
+# Windows-equivalent of scripts/export_codex_oauth_token.sh.
+#
+# Dot-source this script before running nasde to verify your ChatGPT
+# subscription credentials:
+#
+#     . .\scripts\export_codex_oauth_token.ps1
+#     nasde run --variant codex-vanilla -C my-benchmark
+#
+# Prerequisites: run `codex login` to authenticate via ChatGPT.
+# Reads %USERPROFILE%\.codex\auth.json. NASDE injects this file into the
+# sandbox automatically — this script only validates it.
+
+$ErrorActionPreference = 'Stop'
+
+$authPath = Join-Path $env:USERPROFILE '.codex\auth.json'
+
+if (-not (Test-Path $authPath)) {
+    Write-Error "$authPath not found. Run 'codex login' to authenticate via ChatGPT subscription."
+    return
+}
+
+try {
+    $auth = Get-Content $authPath -Raw | ConvertFrom-Json
+} catch {
+    Write-Error "Failed to parse $authPath : $_"
+    return
+}
+
+if ($auth.auth_mode -ne 'chatgpt') {
+    Write-Error "auth_mode is '$($auth.auth_mode)', expected 'chatgpt'. Run 'codex login' to authenticate via ChatGPT subscription."
+    return
+}
+
+$accessToken = $auth.tokens.access_token
+if ([string]::IsNullOrEmpty($accessToken)) {
+    Write-Error "access_token is empty in $authPath. Run 'codex login' to re-authenticate."
+    return
+}
+
+# Decode JWT payload (middle segment) to read exp claim. Pad base64url to
+# multiple of 4 with '=' before decoding.
+try {
+    $payloadSegment = $accessToken.Split('.')[1]
+    $padded = $payloadSegment.Replace('-', '+').Replace('_', '/')
+    switch ($padded.Length % 4) { 2 { $padded += '==' } 3 { $padded += '=' } }
+    $payloadJson = [Text.Encoding]::UTF8.GetString([Convert]::FromBase64String($padded))
+    $exp = ($payloadJson | ConvertFrom-Json).exp
+} catch {
+    $exp = 0
+}
+
+$now = [int][double]::Parse((Get-Date -UFormat %s))
+if ($exp -gt 0 -and $now -gt $exp) {
+    Write-Warning "access_token expired. Run 'codex login' to refresh."
+    Write-Warning "Proceeding anyway -- Codex CLI may auto-refresh via refresh_token."
+}
+
+$preview = $accessToken.Substring(0, [Math]::Min(20, $accessToken.Length))
+Write-Host "OK Codex OAuth validated (auth_mode=chatgpt, token=$preview...)" -ForegroundColor Green
diff --git a/.claude/skills/nasde-benchmark-runner/scripts/export_codex_oauth_token.sh b/.claude/skills/nasde-benchmark-runner/scripts/export_codex_oauth_token.sh
new file mode 100755
index 0000000..723b6b9
--- /dev/null
+++ b/.claude/skills/nasde-benchmark-runner/scripts/export_codex_oauth_token.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+# Validate Codex OAuth auth.json for ChatGPT subscription-based authentication.
+#
+# Source this script before running nasde to verify that your ChatGPT
+# subscription credentials are valid:
+#
+#   source scripts/export_codex_oauth_token.sh
+#   uv run nasde run --variant codex-vanilla -C my-benchmark
+#
+# Prerequisites: run `codex login` to authenticate via ChatGPT.
+# The token is read from ~/.codex/auth.json.
+
+# NOTE: Do NOT use `set -e` here — this script is sourced into the user's
+# shell, so errexit would persist and kill the terminal on any later non-zero
+# exit code. Each command already has its own `|| { ... }` error handling.
+
+_codex_auth_path="$HOME/.codex/auth.json"
+
+if [ ! -f "$_codex_auth_path" ]; then
+    echo "ERROR: $_codex_auth_path not found." >&2
+    echo "Run 'codex login' to authenticate via ChatGPT subscription." >&2
+    return 1 2>/dev/null || exit 1
+fi
+
+_auth_mode="$(python3 -c "import json; print(json.load(open('$_codex_auth_path')).get('auth_mode', ''))")" || {
+    echo "ERROR: Failed to parse $_codex_auth_path." >&2
+    return 1 2>/dev/null || exit 1
+}
+
+if [ "$_auth_mode" != "chatgpt" ]; then
+    echo "ERROR: auth_mode is '$_auth_mode', expected 'chatgpt'." >&2
+    echo "Run 'codex login' to authenticate via ChatGPT subscription." >&2
+    return 1 2>/dev/null || exit 1
+fi
+
+_access_token="$(python3 -c "import json; print(json.load(open('$_codex_auth_path')).get('tokens',{}).get('access_token',''))")" || {
+    echo "ERROR: Failed to extract access_token from $_codex_auth_path." >&2
+    return 1 2>/dev/null || exit 1
+}
+
+if [ -z "$_access_token" ]; then
+    echo "ERROR: access_token is empty in $_codex_auth_path." >&2
+    echo "Run 'codex login' to re-authenticate." >&2
+    return 1 2>/dev/null || exit 1
+fi
+
+_exp="$(echo "$_access_token" | cut -d. -f2 | base64 -d 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin).get('exp',0))" 2>/dev/null)" || _exp=0
+_now="$(python3 -c "import time; print(int(time.time()))")"
+if [ "$_exp" -gt 0 ] && [ "$_now" -gt "$_exp" ]; then
+    echo "WARNING: access_token expired. Run 'codex login' to refresh." >&2
+    echo "Proceeding anyway -- Codex CLI may auto-refresh via refresh_token." >&2
+fi
+
+echo "Codex OAuth validated (auth_mode=chatgpt, token=${_access_token:0:20}...)"
+
+unset _codex_auth_path _auth_mode _access_token _exp _now
diff --git a/.claude/skills/nasde-benchmark-runner/scripts/export_gemini_oauth_token.ps1 b/.claude/skills/nasde-benchmark-runner/scripts/export_gemini_oauth_token.ps1
new file mode 100644
index 0000000..b8a247c
--- /dev/null
+++ b/.claude/skills/nasde-benchmark-runner/scripts/export_gemini_oauth_token.ps1
@@ -0,0 +1,59 @@
+# Export Gemini CLI OAuth credentials for subscription-based authentication.
+#
+# Windows-equivalent of scripts/export_gemini_oauth_token.sh.
+#
+# Dot-source this script before running nasde to authenticate via your
+# Google account instead of GEMINI_API_KEY:
+#
+#     . .\scripts\export_gemini_oauth_token.ps1
+#     nasde run --variant gemini-vanilla -C my-benchmark
+#
+# Prerequisites: run `gemini login` to authenticate via Google.
+# Reads %USERPROFILE%\.gemini\oauth_creds.json and exports the raw JSON as
+# $env:GEMINI_OAUTH_CREDS. ConfigurableGemini injects this into the sandbox.
+
+$ErrorActionPreference = 'Stop'
+
+$credPath = Join-Path $env:USERPROFILE '.gemini\oauth_creds.json'
+
+if (-not (Test-Path $credPath)) {
+    Write-Error "$credPath not found. Run 'gemini login' to authenticate via Google."
+    return
+}
+
+try {
+    $rawJson = Get-Content $credPath -Raw
+    $parsed = $rawJson | ConvertFrom-Json
+} catch {
+    Write-Error "$credPath does not contain valid JSON credentials: $_"
+    return
+}
+
+if (-not $parsed) {
+    Write-Error "$credPath contains empty credentials."
+    return
+}
+
+$accessToken = $parsed.access_token
+if (-not [string]::IsNullOrEmpty($accessToken)) {
+    try {
+        $payloadSegment = $accessToken.Split('.')[1]
+        $padded = $payloadSegment.Replace('-', '+').Replace('_', '/')
+        switch ($padded.Length % 4) { 2 { $padded += '==' } 3 { $padded += '=' } }
+        $payloadJson = [Text.Encoding]::UTF8.GetString([Convert]::FromBase64String($padded))
+        $exp = ($payloadJson | ConvertFrom-Json).exp
+    } catch {
+        $exp = 0
+    }
+
+    $now = [int][double]::Parse((Get-Date -UFormat %s))
+    if ($exp -gt 0 -and $now -gt $exp) {
+        Write-Warning "access_token appears expired. Run 'gemini login' to refresh."
+        Write-Warning "Proceeding anyway -- Gemini CLI may auto-refresh via refresh_token."
+    }
+}
+
+$env:GEMINI_OAUTH_CREDS = $rawJson
+
+$preview = $rawJson.Substring(0, [Math]::Min(20, $rawJson.Length))
+Write-Host "OK GEMINI_OAUTH_CREDS exported ($preview...)" -ForegroundColor Green
diff --git a/.claude/skills/nasde-benchmark-runner/scripts/export_gemini_oauth_token.sh b/.claude/skills/nasde-benchmark-runner/scripts/export_gemini_oauth_token.sh
new file mode 100755
index 0000000..19a0217
--- /dev/null
+++ b/.claude/skills/nasde-benchmark-runner/scripts/export_gemini_oauth_token.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+# Export Gemini CLI OAuth credentials for subscription-based authentication.
+#
+# Source this script before running nasde to authenticate via your
+# Google/Gemini subscription instead of GEMINI_API_KEY:
+#
+#   source scripts/export_gemini_oauth_token.sh
+#   uv run nasde run --variant gemini-vanilla -C my-benchmark
+#
+# Prerequisites: run `gemini login` to authenticate via Google.
+# The token is read from ~/.gemini/oauth_creds.json.
+
+# NOTE: Do NOT use `set -e` here — this script is sourced into the user's
+# shell, so errexit would persist and kill the terminal on any later non-zero
+# exit code. Each command already has its own `|| { ... }` error handling.
+
+_gemini_creds_path="$HOME/.gemini/oauth_creds.json"
+
+if [ ! -f "$_gemini_creds_path" ]; then
+    echo "ERROR: $_gemini_creds_path not found." >&2
+    echo "Run 'gemini login' to authenticate via Google." >&2
+    return 1 2>/dev/null || exit 1
+fi
+
+_gemini_creds="$(cat "$_gemini_creds_path")" || {
+    echo "ERROR: Failed to read $_gemini_creds_path." >&2
+    return 1 2>/dev/null || exit 1
+}
+
+python3 -c "import json; d=json.loads('''$_gemini_creds'''); assert d, 'empty credentials'" 2>/dev/null || {
+    echo "ERROR: $_gemini_creds_path does not contain valid JSON credentials." >&2
+    return 1 2>/dev/null || exit 1
+}
+
+_access_token="$(python3 -c "import json; print(json.load(open('$_gemini_creds_path')).get('access_token',''))")" || _access_token=""
+
+if [ -n "$_access_token" ]; then
+    _exp="$(echo "$_access_token" | cut -d. -f2 | base64 -d 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin).get('exp',0))" 2>/dev/null)" || _exp=0
+    _now="$(python3 -c "import time; print(int(time.time()))")"
+    if [ "$_exp" -gt 0 ] && [ "$_now" -gt "$_exp" ]; then
+        echo "WARNING: access_token appears expired. Run 'gemini login' to refresh." >&2
+        echo "Proceeding anyway -- Gemini CLI may auto-refresh via refresh_token." >&2
+    fi
+fi
+
+GEMINI_OAUTH_CREDS="$_gemini_creds"
+export GEMINI_OAUTH_CREDS
+
+echo "GEMINI_OAUTH_CREDS exported (${_gemini_creds:0:20}...)"
+
+unset _gemini_creds_path _gemini_creds _access_token _exp _now
diff --git a/.claude/skills/nasde-benchmark-runner/scripts/export_oauth_token.ps1 b/.claude/skills/nasde-benchmark-runner/scripts/export_oauth_token.ps1
new file mode 100644
index 0000000..79b81d9
--- /dev/null
+++ b/.claude/skills/nasde-benchmark-runner/scripts/export_oauth_token.ps1
@@ -0,0 +1,40 @@
+# Extract Claude Code OAuth token from %USERPROFILE%\.claude\.credentials.json
+# and export it as $env:CLAUDE_CODE_OAUTH_TOKEN.
+#
+# Windows-equivalent of scripts/export_oauth_token.sh (which reads the macOS
+# Keychain). On Windows, Claude Code stores credentials as plain JSON.
+#
+# Dot-source this script before running nasde so the env var persists in the
+# current PowerShell session:
+#
+#     . .\scripts\export_oauth_token.ps1
+#     nasde run --variant baseline -C my-benchmark
+#
+# Running without dot-source (.\scripts\export_oauth_token.ps1) sets the var
+# only inside the script's child scope and it disappears on return.
+
+$ErrorActionPreference = 'Stop'
+
+$credPath = Join-Path $env:USERPROFILE '.claude\.credentials.json'
+
+if (-not (Test-Path $credPath)) {
+    Write-Error "Could not find '$credPath'. Run 'claude' CLI and log in first."
+    return
+}
+
+try {
+    $token = (Get-Content $credPath -Raw | ConvertFrom-Json).claudeAiOauth.accessToken
+} catch {
+    Write-Error "Failed to parse OAuth token from '$credPath': $_"
+    return
+}
+
+if ([string]::IsNullOrEmpty($token)) {
+    Write-Error "claudeAiOauth.accessToken is empty in '$credPath'."
+    return
+}
+
+$env:CLAUDE_CODE_OAUTH_TOKEN = $token
+
+$preview = $token.Substring(0, [Math]::Min(20, $token.Length))
+Write-Host "OK CLAUDE_CODE_OAUTH_TOKEN exported ($preview...)" -ForegroundColor Green
diff --git a/.claude/skills/nasde-benchmark-runner/scripts/export_oauth_token.sh b/.claude/skills/nasde-benchmark-runner/scripts/export_oauth_token.sh
new file mode 100755
index 0000000..ddfbecb
--- /dev/null
+++ b/.claude/skills/nasde-benchmark-runner/scripts/export_oauth_token.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Extract Claude Code OAuth token and export it as CLAUDE_CODE_OAUTH_TOKEN.
+#
+# Source this script before running nasde to authenticate via your
+# Claude Pro/Max subscription instead of ANTHROPIC_API_KEY:
+#
+#   source scripts/export_oauth_token.sh
+#   nasde run --variant baseline -C my-benchmark
+#
+# Storage backend depends on OS:
+#   - macOS: Keychain entry "Claude Code-credentials" (written by `claude` CLI)
+#   - Linux: plain JSON at ~/.claude/.credentials.json
+# Windows users: see scripts/export_oauth_token.ps1 (PowerShell).
+
+# NOTE: Do NOT use `set -e` here -- this script is sourced into the user's
+# shell, so errexit would persist and kill the terminal on any later non-zero
+# exit code. Each command already has its own `|| { ... }` error handling.
+
+_raw_creds=""
+
+if command -v security >/dev/null 2>&1; then
+    _raw_creds="$(security find-generic-password -s "Claude Code-credentials" -w 2>/dev/null)"
+fi
+
+if [ -z "$_raw_creds" ] && [ -f "$HOME/.claude/.credentials.json" ]; then
+    _raw_creds="$(cat "$HOME/.claude/.credentials.json")"
+fi
+
+if [ -z "$_raw_creds" ]; then
+    echo "ERROR: Could not read Claude Code credentials." >&2
+    echo "  - macOS: keychain entry 'Claude Code-credentials' missing" >&2
+    echo "  - Linux: ~/.claude/.credentials.json missing" >&2
+    echo "Run 'claude' CLI and log in first." >&2
+    return 1 2>/dev/null || exit 1
+fi
+
+CLAUDE_CODE_OAUTH_TOKEN="$(echo "$_raw_creds" | python3 -c "import sys,json; print(json.load(sys.stdin)['claudeAiOauth']['accessToken'])")" || {
+    echo "ERROR: Failed to parse OAuth token from Claude credentials." >&2
+    return 1 2>/dev/null || exit 1
+}
+export CLAUDE_CODE_OAUTH_TOKEN
+
+unset _raw_creds
+
+echo "✓ CLAUDE_CODE_OAUTH_TOKEN exported (${CLAUDE_CODE_OAUTH_TOKEN:0:20}...)"
diff --git a/tests/test_skills_installer.py b/tests/test_skills_installer.py
index 0a4f423..f773b74 100644
--- a/tests/test_skills_installer.py
+++ b/tests/test_skills_installer.py
@@ -7,6 +7,18 @@
 
 from nasde_toolkit.skills_installer import install_bundled_skills
 
+REPO_ROOT = Path(__file__).resolve().parents[1]
+REPO_SCRIPTS = REPO_ROOT / "scripts"
+SKILL_SCRIPTS = REPO_ROOT / ".claude" / "skills" / "nasde-benchmark-runner" / "scripts"
+BUNDLED_AUTH_SCRIPTS = (
+    "export_oauth_token.sh",
+    "export_oauth_token.ps1",
+    "export_codex_oauth_token.sh",
+    "export_codex_oauth_token.ps1",
+    "export_gemini_oauth_token.sh",
+    "export_gemini_oauth_token.ps1",
+)
+
 
 def test_install_bundled_skills_copies_all_nasde_benchmark_skills(tmp_path: Path) -> None:
     target = tmp_path / "skills"
@@ -43,3 +55,21 @@ def test_install_bundled_skills_rejects_unknown_scope(tmp_path: Path) -> None:
 
     with pytest.raises(typer.BadParameter):
         install_bundled_skills(console=Console(), scope="nope", target_dir=None, force=False)
+
+
+@pytest.mark.parametrize("name", BUNDLED_AUTH_SCRIPTS)
+def test_runner_skill_bundles_auth_script(tmp_path: Path, name: str) -> None:
+    target = tmp_path / "skills"
+    install_bundled_skills(console=Console(), scope="user", target_dir=target, force=False)
+    assert (target / "nasde-benchmark-runner" / "scripts" / name).is_file()
+
+
+@pytest.mark.parametrize("name", BUNDLED_AUTH_SCRIPTS)
+def test_skill_scripts_match_repo_scripts(name: str) -> None:
+    repo_copy = REPO_SCRIPTS / name
+    skill_copy = SKILL_SCRIPTS / name
+    assert repo_copy.read_bytes() == skill_copy.read_bytes(), (
+        f"{name} drifted between scripts/ and .claude/skills/nasde-benchmark-runner/scripts/. "
+        "Update both copies — repo scripts/ is the public-facing copy and skill scripts/ ships "
+        "in the wheel via hatch force-include. Tip: cp scripts/* .claude/skills/nasde-benchmark-runner/scripts/"
+    )