From 854b1a964af6660b82fd6b39b7fd561699229439 Mon Sep 17 00:00:00 2001 From: Derek Ko Date: Sun, 17 May 2026 11:39:37 -0700 Subject: [PATCH] docs: expand env example for benchmark setup --- .env.example | 133 +++++++++++++++++++++++++++++++++++++++++---------- README.md | 8 +++- 2 files changed, 115 insertions(+), 26 deletions(-) diff --git a/.env.example b/.env.example index b1cb439..97995ca 100644 --- a/.env.example +++ b/.env.example @@ -1,40 +1,125 @@ -DOCKER_IMAGE=wildclawbench-ubuntu:v1.3 -GATEWAY_PORT=18789 -TMP_WORKSPACE=/tmp_workspace - +# Copy this file to .env and fill in the values for the harnesses/tasks you run. +# Do not commit .env; it is ignored by .gitignore. + +# --------------------------------------------------------------------------- +# Core benchmark settings +# --------------------------------------------------------------------------- TASKS_SUBDIR=tasks OUTPUT_SUBDIR=output - -DEFAULT_MODEL=openrouter/xxx - +TMP_WORKSPACE=/tmp_workspace DEFAULT_PARALLEL=1 - + +# Model passed to eval/run_batch.py when --model is omitted. +# OpenClaw/Codex examples use the openrouter// form. +# Claude Code/Hermes examples use /; their runners add the +# OpenRouter prefix internally when needed. +DEFAULT_MODEL=openrouter/anthropic/claude-sonnet-4.6 + +# --------------------------------------------------------------------------- +# Provider credentials +# --------------------------------------------------------------------------- +OPENROUTER_BASE_URL=https://openrouter.ai/api/v1 +OPENROUTER_API_KEY= + +# Used by judge-based grading metrics. Leave empty to use task/code defaults, +# or set an explicit OpenRouter model such as openai/gpt-5.4. +JUDGE_MODEL=openai/gpt-5.4 + +# Required for Search & Retrieval tasks and for harnesses that expose web search. +BRAVE_API_KEY= + +# --------------------------------------------------------------------------- +# Docker images +# --------------------------------------------------------------------------- +# OpenClaw harness image. +DOCKER_IMAGE=wildclawbench-ubuntu:v1.3 + +# Claude Code harness image. Both names are supported by the runner. +DOCKER_IMAGE_CLAUDECODE=wildclawbench-claudecode-ubuntu:v0.2 +# CLAUDECODE_DOCKER_IMAGE=wildclawbench-claudecode-ubuntu:v0.2 + +# Codex harness image. +DOCKER_IMAGE_CODEX=wildclawbench-codex-ubuntu:v0.0 + +# Hermes Agent harness image. +HERMES_DOCKER_IMAGE=wildclawbench-hermes-agent:v0.5 + +# OpenClaw gateway port on the host. +GATEWAY_PORT=18789 + +# --------------------------------------------------------------------------- +# Network/proxy settings inside benchmark containers +# --------------------------------------------------------------------------- HTTP_PROXY_INNER= HTTPS_PROXY_INNER= NO_PROXY_INNER= -OPENROUTER_BASE_URL='https://openrouter.ai/api/v1' -JUDGE_MODEL=openai/gpt-5.4 -OPENROUTER_API_KEY= -BRAVE_API_KEY= +# --------------------------------------------------------------------------- +# OpenClaw and multimodal helper settings +# --------------------------------------------------------------------------- +# Optional image model for the OpenClaw image tool. If unset, the OpenClaw +# runner falls back to the chat model passed with --model. +OPENCLAW_IMAGE_MODEL= -# Using a Custom Model Endpoint +# Optional image model for the Codex image helper. +OPENROUTER_IMAGE_MODEL= +WILDCLAW_IMAGE_MODEL= +WILDCLAW_IMAGE_HELPER_CALL_LIMIT=2 + +# --------------------------------------------------------------------------- +# Custom OpenClaw model endpoint support +# --------------------------------------------------------------------------- +# Required only when a --models-config JSON file contains ${MY_PROXY_API_KEY}. # MY_PROXY_API_KEY= -# Lobster profile env keys (add values here for skills that need them) -# GEMINI_API_KEY= -# FIRECRAWL_API_KEY= -# EXA_API_KEY= +# --------------------------------------------------------------------------- +# Codex harness settings +# --------------------------------------------------------------------------- +CODEX_NPM_PACKAGE=@openai/codex +CODEX_NPM_VERSION=0.117.0 +CODEX_BOOTSTRAP_RETRIES=2 +CODEX_BOOTSTRAP_RETRY_BASE_DELAY=3 -# Task env -# 01_Productivity_Flow +# Optional overrides. Leave blank for runner defaults. +CODEX_REASONING_EFFORT= +CODEX_WIRE_API= -# 02_Code_Intelligence +# Optional cost accounting for Codex usage.json. Values are USD per 1M tokens. +CODEX_INPUT_PRICE_PER_MTOK=0 +CODEX_OUTPUT_PRICE_PER_MTOK=0 +CODEX_CACHE_READ_PRICE_PER_MTOK=0 +CODEX_CACHE_WRITE_PRICE_PER_MTOK=0 -# 03_Social_Interaction +# --------------------------------------------------------------------------- +# Claude Code harness settings +# --------------------------------------------------------------------------- +DISABLE_PROMPT_CACHING=1 +DISABLE_INTERLEAVED_THINKING=1 +CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1 +IS_SANDBOX=1 +CLAUDE_CODE_FULL_LOG_PATH=./log -# 04_Search_Retrieval +# Optional cost accounting for Claude Code usage.json. Values are USD per 1M +# tokens. +CLAUDECODE_INPUT_PRICE_PER_MTOK=0 +CLAUDECODE_OUTPUT_PRICE_PER_MTOK=0 +CLAUDECODE_CACHE_READ_PRICE_PER_MTOK=0 +CLAUDECODE_CACHE_WRITE_PRICE_PER_MTOK=0 -# 05_Creative_Synthesis +# --------------------------------------------------------------------------- +# Personal OpenClaw/Lobster profile env keys +# --------------------------------------------------------------------------- +# Add real values here when passing their names through --lobster-env. +# GEMINI_API_KEY= +# FIRECRAWL_API_KEY= +# EXA_API_KEY= -# 06_Safety_Alignment +# --------------------------------------------------------------------------- +# Task-specific env +# --------------------------------------------------------------------------- +# Most built-in automated checks that call an LLM use: +# OPENROUTER_API_KEY +# OPENROUTER_BASE_URL +# JUDGE_MODEL +# Individual task ## Env sections may request additional keys; define them here +# and they will be injected into the runtime container. diff --git a/README.md b/README.md index e2f71de..6901eab 100644 --- a/README.md +++ b/README.md @@ -226,7 +226,7 @@ The script will: - Download SAM3 model weights for Code Intelligence tasks -Prerequisites: `yt-dlp`, `ffmpeg`, `gdown`. +Prerequisites: `yt-dlp`, `ffmpeg`, `modelscope`. > **Note:** YouTube downloads may require authentication. If you encounter a "Sign in to confirm you're not a bot" error, try one of the following: > - [Get cookies.txt locally](https://chromewebstore.google.com/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc?pli=1). @@ -235,9 +235,13 @@ Prerequisites: `yt-dlp`, `ffmpeg`, `gdown`. ### Run -Set your API keys in the `.env` file: +Create a local `.env` file from the example, then set your API keys: +```bash +cp .env.example .env ``` + +```bash OPENROUTER_API_KEY=your_api_key_here BRAVE_API_KEY=your_brave_key_here # required for search tasks ```