promptfoo · MrFlounder · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026
diff --git a/.ralph/PROMPT.md b/.ralph/PROMPT.md
@@ -94,7 +94,7 @@ layout:
     - name: server
       command: pnpm dev
     - name: main
-      command: claude --dangerously-skip-permissions
+      command: codex --full-auto
 
 env_sync:
   files:

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.12.0"
+  ".": "0.13.0"
 }
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,7 +2,11 @@
 
 ## Unreleased
 
-## [0.12.0] - 2026-02-20
+### Changed
+
+- Codex is now the default agent when `agent:` is omitted, `crab court` uses a Codex judge, and the top-level docs/examples reflect Codex-first defaults
+
+## [0.13.0] - 2026-03-11
 
 ### Added
 

diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@
   <      >
 ```
 
-A lightning-fast tmux-based workspace manager for multi-repo development. Agent-agnostic — works with both [Claude Code](https://claude.ai/code) and [Codex CLI](https://github.com/openai/codex). Manage multiple projects, start full dev environments in seconds.
+A lightning-fast tmux-based workspace manager for multi-repo development, built around [Codex CLI](https://github.com/openai/codex). Manage multiple projects, start full dev environments in seconds.
 
 ## Quick Start
 
@@ -260,9 +260,9 @@ crab court 3230                     # Judge + 2 reviewers
 ```
 
 **Court Review** uses the judge pattern:
-- **Judge (Claude)**: Orchestrates, verifies findings, delivers verdict
-- **Reviewer A (Claude teammate)**: Independent code review
-- **Reviewer B (Codex)**: Independent code review
+- **Judge (Codex)**: Orchestrates, verifies findings, delivers verdict
+- **Reviewer A (teammate pass)**: Independent code review
+- **Reviewer B (Codex)**: Secondary review pass
 
 The judge traces every finding to actual code, resolves disagreements, and produces a verdict with zero false positives.
 
@@ -286,14 +286,14 @@ crab session delete "feature-x"   # Delete a session
 
 ### Agent Sync (`crab agent`)
 
-Sync user-level configurations (MCP servers, custom agents/skills) between Claude Code and Codex CLI. Useful when switching a project's agent or maintaining parity across both.
+Sync user-level Codex configuration, MCP servers, and skills.
 
 ```bash
 crab agent status                              # Audit what's configured on each side
-crab agent sync mcp --from claude              # Preview MCP server sync (dry run)
-crab agent sync mcp --from claude --apply      # Sync MCP servers Claude → Codex
-crab agent sync agents --from claude           # Preview agent → skill rewrites (dry run)
-crab agent sync agents --from claude --apply   # Rewrite Claude agents as Codex skills (LLM-assisted)
+crab agent sync mcp                            # Preview MCP server sync (dry run)
+crab agent sync mcp --apply                    # Apply MCP server sync
+crab agent sync agents                         # Preview skill sync (dry run)
+crab agent sync agents --apply                 # Apply skill sync
 crab agent sync all --apply                    # Sync everything both directions
 ```
 
@@ -407,7 +407,7 @@ Per-project config (`~/.crabcode/projects/<alias>.yaml`):
 
 ```yaml
 session_name: pf
-agent: claude                # or "codex" — defaults to claude if omitted
+agent: codex                 # defaults to codex if omitted
 workspace_base: ~/Dev/my-project-workspaces
 main_repo: ~/Dev/my-project
 
@@ -430,7 +430,7 @@ layout:
     - name: server
       command: pnpm dev
     - name: main
-      command: claude --dangerously-skip-permissions  # or: codex --full-auto
+      command: ""            # defaults to codex --full-auto; override directly if needed
 
 # Optional: persistent storage across resets
 shared_volume:
@@ -457,18 +457,17 @@ See `examples/` for more configuration examples.
 - `OPENAI_API_KEY` or `ANTHROPIC_API_KEY` (for `crab pf`)
 - Slack bot token (for `crab pf serve`)
 
-**For AI agents (pick one or both):**
-- [Claude Code](https://claude.ai/code): `npm install -g @anthropic-ai/claude-code`
+**For AI agents:**
 - [Codex CLI](https://github.com/openai/codex): `npm install -g @openai/codex`
 
 **For PR reviews (`crab review`, `crab court`):**
 - [gh](https://cli.github.com/)
-- Claude Code and/or Codex CLI (court review uses both)
+- Codex CLI
 
 ```bash
 # macOS
 brew install tmux yq zip gh
-npm install -g @anthropic-ai/claude-code  # and/or @openai/codex
+npm install -g @openai/codex
 ```
 
 ## Installation
@@ -506,7 +505,7 @@ git pull origin main
 ┌─────────────────────────┬─────────────────────────┐
 │      terminal           │                         │
 │      (shell)            │        main             │
-├─────────────────────────┤   (claude/codex/editor)  │
+├─────────────────────────┤   (codex/editor)         │
 │      server             │                         │
 │      (pnpm dev)         │                         │
 └─────────────────────────┴─────────────────────────┘
@@ -573,9 +572,9 @@ The restore agent walks through each phase — installing tools, restoring confi
 6. **Edit config for your project:**
    ```bash
    # Set your layout commands in ~/.crabcode/projects/<alias>.yaml
-   # - agent: claude or codex (defaults to claude)
+   # - agent: codex (defaults to codex)
    # - server pane: your dev server (e.g., pnpm dev)
-   # - main pane: your agent command (e.g., claude --dangerously-skip-permissions, codex --full-auto)
+   # - main pane: optional override (defaults to codex --full-auto)
    ```
 
 7. **Start working:**

diff --git a/docs/index.html b/docs/index.html
@@ -1599,7 +1599,7 @@ <h3>Multi-Agent Code Review Tribunal</h3>
         <div class="terminal-preview">
           <div><span class="prompt">$</span> <span class="command">crab court 3230</span></div>
           <div class="output">⚖️  Assembling review tribunal...</div>
-          <div class="output">✓ Reviewer A (Claude): analyzing architecture impact</div>
+          <div class="output">✓ Reviewer A (Teammate): analyzing architecture impact</div>
           <div class="output">✓ Reviewer B (Codex): analyzing implementation correctness</div>
           <div class="output">✓ Judge: cross-referencing findings against source...</div>
           <div class="output">✓ Verdict delivered. 0 false positives. <span class="command">Justice served. 🦀</span></div>
@@ -1609,7 +1609,7 @@ <h3>Multi-Agent Code Review Tribunal</h3>
       <div class="bento-card medium">
         <div class="icon">🎫</div>
         <h3>Ticket-Driven Development</h3>
-        <p>Connect workspaces directly to Linear tickets for automatic context injection. Branch naming, ticket metadata, and Claude context — all provisioned from a single identifier. Reduces ticket-to-first-commit latency by 76%.</p>
+        <p>Connect workspaces directly to Linear tickets for automatic context injection. Branch naming, ticket metadata, and agent context are all provisioned from a single identifier. Reduces ticket-to-first-commit latency by 76%.</p>
       </div>
 
       <div class="bento-card half">
@@ -1708,7 +1708,7 @@ <h2 class="section-title">Optimized Developer Experience</h2>
         <div class="tmux-pane main">
           <span class="pane-label">main</span>
           <div class="pane-content">
-            <span style="color: var(--crab-shell);">claude</span> <span class="active">></span> <br>
+            <span style="color: var(--crab-shell);">codex</span> <span class="active">></span> <br>
             <br>
             AI pair programming<br>
             session operationalized. 🦀
@@ -1996,7 +1996,7 @@ <h3>Extensibility & Customization</h3>
         </div>
         <div class="command-item">
           <code>crab session resume &lt;name&gt;</code>
-          <p>Restore named Claude session with full context continuity</p>
+          <p>Restore named agent session with full context continuity</p>
         </div>
       </div>
     </div>
@@ -2473,7 +2473,7 @@ <h3>Extensibility & Customization</h3>
         lines: [
           { type: 'typed', text: '$ crab review 3230' },
           { type: 'output', text: '📋 Fetching PR context...' },
-          { type: 'output', text: '🤖 Claude reviewing...' },
+          { type: 'output', text: '🤖 Codex reviewing...' },
           { type: 'output', text: '✓ Review saved!' }
         ]
       },
@@ -2490,7 +2490,7 @@ <h3>Extensibility & Customization</h3>
           { type: 'typed', text: '$ crab ticket ENG-123' },
           { type: 'output', text: '🎫 Fetching ticket context...' },
           { type: 'output', text: '→ Workspace 4 provisioned' },
-          { type: 'output', text: '✓ Claude has ticket context' }
+          { type: 'output', text: '✓ Codex has ticket context' }
         ]
       },
       'crab projects': {

diff --git a/docs/llms.txt b/docs/llms.txt
@@ -82,7 +82,7 @@ crab review https://github.com/...  # Full URL
 crab court 3230                     # Judge + 2 reviewers
 ```
 
-Court review uses the judge pattern: a Judge (Claude) orchestrates two independent reviewers (Claude teammate + Codex), verifies every finding against actual code, resolves disagreements, and delivers a verdict with zero false positives.
+Court review uses the judge pattern: a Codex-led judge by default orchestrates two independent review passes, verifies every finding against actual code, resolves disagreements, and delivers a verdict with zero false positives.
 
 ```bash
 crab review ls              # List review sessions
@@ -151,7 +151,7 @@ crab alias rm rr             # Remove alias
 
 ### Session Management
 
-Track and resume named Claude conversations:
+Track and resume named agent conversations:
 
 ```bash
 crab session ls              # List sessions
@@ -190,6 +190,7 @@ Per-project config (`~/.crabcode/projects/<alias>.yaml`):
 
 ```yaml
 session_name: pf
+agent: codex
 workspace_base: ~/Dev/my-project-workspaces
 main_repo: ~/Dev/my-project
 
@@ -213,7 +214,7 @@ layout:
     - name: server
       command: pnpm dev
     - name: main
-      command: claude
+      command: ""   # defaults to codex --full-auto
 
 # Persistent storage across workspace resets
 shared_volume:
@@ -243,7 +244,7 @@ slack:
 ┌─────────────────────────┬─────────────────────────┐
 │      terminal           │                         │
 │      (shell)            │        main             │
-├─────────────────────────┤   (claude/editor)       │
+├─────────────────────────┤   (codex/editor)        │
 │      server             │                         │
 │      (pnpm dev)         │                         │
 └─────────────────────────┴─────────────────────────┘
@@ -259,9 +260,7 @@ Keybindings (prefix: `Ctrl+a`):
 
 **Core:** bash, tmux, git, [yq](https://github.com/mikefarah/yq), zip
 
-**For PR reviews:** [gh](https://cli.github.com/) (GitHub CLI), [Claude Code](https://claude.ai/code) (`claude` CLI)
-
-**Optional:** [Codex CLI](https://github.com/openai/codex) (for court review with Codex reviewer)
+**For PR reviews:** [gh](https://cli.github.com/) (GitHub CLI), [Codex CLI](https://github.com/openai/codex) (`codex` CLI)
 
 ## Links
 

diff --git a/docs/promptfoo-plugin.md b/docs/promptfoo-plugin.md
@@ -91,7 +91,7 @@ promptfoo eval
 |------|-------------|
 | `--file`, `-f` | Input file path |
 | `--output`, `-o` | Output directory (default: current dir) |
-| `--provider` | LLM provider (default: `openai:gpt-4o`) |
+| `--provider` | LLM provider (default: `openai:gpt-5`) |
 | `--verbose`, `-v` | Show detailed agent output |
 | `--max-turns` | Max agent iterations (default: 30) |
 

diff --git a/examples/nodejs-monorepo.yaml b/examples/nodejs-monorepo.yaml
@@ -23,7 +23,7 @@ layout:
     - name: server
       command: pnpm dev
     - name: main
-      command: claude --dangerously-skip-permissions
+      command: codex --full-auto
 
 env_sync:
   files:

diff --git a/examples/promptfoo-cloud.yaml b/examples/promptfoo-cloud.yaml
@@ -23,7 +23,7 @@ layout:
     - name: server
       command: pnpm dev
     - name: main
-      command: claude --dangerously-skip-permissions --chrome
+      command: codex --full-auto
 
 env_sync:
   files:

diff --git a/examples/python-project.yaml b/examples/python-project.yaml
@@ -23,7 +23,7 @@ layout:
     - name: server
       command: uvicorn app.main:app --reload --port $API_PORT
     - name: main
-      command: claude --dangerously-skip-permissions
+      command: codex --full-auto
 
 env_sync:
   files:

diff --git a/plugins/promptfoo/src/agent/providers.ts b/plugins/promptfoo/src/agent/providers.ts
@@ -46,7 +46,7 @@ export class OpenAIProvider implements LLMProvider {
 
   constructor(options: { apiKey?: string; model?: string; baseUrl?: string; reasoningEffort?: string }) {
     this.apiKey = options.apiKey || process.env.OPENAI_API_KEY || '';
-    this.model = options.model || 'gpt-4o';
+    this.model = options.model || 'gpt-5';
     this.baseUrl = options.baseUrl || 'https://api.openai.com/v1';
     this.reasoningEffort = options.reasoningEffort;
 

diff --git a/plugins/promptfoo/src/serve.ts b/plugins/promptfoo/src/serve.ts
@@ -469,7 +469,7 @@ Usage:
 
 Options:
   --setup              One-time configuration
-  --provider <str>     LLM provider (default: from config or openai:gpt-4o)
+  --provider <str>     LLM provider (default: from config or openai:gpt-5)
   --interval <ms>      Poll interval in ms (default: 5000)
   --verbose, -v        Show detailed output
   --help, -h           Show this help