Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions CONFIGURATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -247,3 +247,49 @@ When a request arrives, the proxy selects a model chain using the following orde
3. **Scenario routing** — fall back to the scenario chain (`default`, `background`, `think`, `complex`, `long_context`, `fast`).

> **Trust model:** any client whose requests flow through the proxy can select from the configured `model_overrides` set without additional authentication. If you run the proxy as a shared service, treat `model_overrides` as a privileged allowlist.

### Streaming Scenario Routing

`enable_streaming_scenario_routing` controls whether streaming requests are evaluated by the full scenario router or routed directly to the `fast` scenario.

> **Note for Claude Code `/review-code`, `/ultracode`, and multi-agent workflows**
>
> If you use Claude Code workflows that dispatch many subagents or produce many parallel tool calls, enable streaming scenario routing:
>
> ```json
> {
> "enable_streaming_scenario_routing": true
> }
> ```
>
> Without this option, streaming requests are routed through the `fast` scenario even when the request is actually tool-heavy. This can route complex Claude Code workloads, such as `/review-code` with many `Agent` tool calls, to a fast model that may not handle parallel tool-call orchestration reliably.
>
> When enabled, streaming requests are evaluated by the same scenario router as non-streaming requests, allowing large or tool-heavy workloads to use `complex` or `long_context` models instead of always using the `fast` model.

Recommended setup for Claude Code review workflows:

```json
{
"enable_streaming_scenario_routing": true,
"models": {
"fast": {
"provider": "opencode-go",
"model_id": "deepseek-v4-flash",
"max_tokens": 4096
},
"complex": {
"provider": "opencode-go",
"model_id": "minimax-m3",
"max_tokens": 8192
},
"long_context": {
"provider": "opencode-go",
"model_id": "minimax-m3",
"max_tokens": 16384,
"context_threshold": 80000
}
}
}
```

Use the `fast` scenario for short/simple requests. Use `complex` or `long_context` for code review, multi-agent dispatch, large diffs, many tools, or long-context Claude Code sessions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ OpenCode Go gives you access to powerful open coding models for **$5/month** (th
- **Transparent Proxy** — Claude Code sends Anthropic-format requests, proxy transforms to OpenAI/Responses/Gemini format and back
- **Dual Provider Support** — Route models through OpenCode Go or OpenCode Zen based on your needs
- **Model Routing** — Automatically routes to different models based on context (default, thinking, long context, background)
- **Streaming Scenario Routing** — Configurable routing for streaming requests; enables proper scenario selection for Claude Code multi-agent and review workflows (see [CONFIGURATION.md](CONFIGURATION.md#streaming-scenario-routing))
- **Fallback Chains** — If a model fails, automatically tries the next one in your configured chain
- **Circuit Breaker** — Tracks model health and skips failing models to avoid latency spikes
- **Real-time Streaming** — Full SSE streaming with live format transformation
Expand Down
6 changes: 4 additions & 2 deletions configs/config.example.json
Original file line number Diff line number Diff line change
Expand Up @@ -185,15 +185,17 @@
"opencode_go": {
"base_url": "https://opencode.ai/zen/go/v1/chat/completions",
"anthropic_base_url": "https://opencode.ai/zen/go/v1/messages",
"timeout_ms": 300000
"timeout_ms": 300000,
"streaming_timeout_ms": 600000
},

"opencode_zen": {
"base_url": "https://opencode.ai/zen/v1/chat/completions",
"anthropic_base_url": "https://opencode.ai/zen/v1/messages",
"responses_base_url": "https://opencode.ai/zen/v1/responses",
"gemini_base_url": "https://opencode.ai/zen/v1/models",
"timeout_ms": 300000
"timeout_ms": 300000,
"streaming_timeout_ms": 600000
},

"logging": {
Expand Down
65 changes: 60 additions & 5 deletions internal/client/opencode.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,18 @@ const (
ProviderOpenCodeZen = "opencode-zen"
)

// APIError represents an HTTP API error returned by an upstream provider.
// Callers should use errors.As to check for this type and inspect StatusCode
// for classification (4xx non-retryable, 5xx retryable, etc.).
type APIError struct {
StatusCode int
Body string
}

func (e *APIError) Error() string {
return fmt.Sprintf("API error %d: %s", e.StatusCode, e.Body)
}

// OpenCodeClient handles communication with OpenCode Go and Zen APIs.
type OpenCodeClient struct {
atomic *config.AtomicConfig
Expand Down Expand Up @@ -86,6 +98,48 @@ func (c *OpenCodeClient) StreamIdleTimeout(modelConfig config.ModelConfig) time.
return time.Duration(ms) * time.Millisecond
}

// RequestTimeout returns the provider timeout for a non-streaming attempt.
func (c *OpenCodeClient) RequestTimeout(model config.ModelConfig) time.Duration {
if c == nil || c.atomic == nil {
return 5 * time.Minute
}
cfg := c.atomic.Get()
var timeoutMs int
if IsZen(model) {
timeoutMs = cfg.OpenCodeZen.TimeoutMs
} else {
timeoutMs = cfg.OpenCodeGo.TimeoutMs
}
if timeoutMs > 0 {
return time.Duration(timeoutMs) * time.Millisecond
}
return 5 * time.Minute
}

// StreamingTimeout returns the provider timeout for a streaming attempt.
func (c *OpenCodeClient) StreamingTimeout(model config.ModelConfig) time.Duration {
if c == nil || c.atomic == nil {
return 5 * time.Minute
}
cfg := c.atomic.Get()
var timeoutMs int
if IsZen(model) {
timeoutMs = cfg.OpenCodeZen.StreamingTimeoutMs
if timeoutMs <= 0 {
timeoutMs = cfg.OpenCodeZen.TimeoutMs
}
} else {
timeoutMs = cfg.OpenCodeGo.StreamingTimeoutMs
if timeoutMs <= 0 {
timeoutMs = cfg.OpenCodeGo.TimeoutMs
}
}
if timeoutMs > 0 {
return time.Duration(timeoutMs) * time.Millisecond
}
return 5 * time.Minute
}

// IsAnthropicModel returns true if the model requires the Anthropic endpoint.
// Most Go provider models use the Chat Completions transform path for broader
// compatibility (tool format, message roles, etc.). Exceptions are models whose
Expand All @@ -95,7 +149,8 @@ func (c *OpenCodeClient) StreamIdleTimeout(modelConfig config.ModelConfig) time.
// Only Zen models use the raw Anthropic endpoint via ClassifyEndpoint.
func IsAnthropicModel(modelID string) bool {
switch modelID {
case "qwen3.7-max": // OpenCode Go backend doesn't support oa-compat for this model
case "minimax-m2.5", "minimax-m2.7", "minimax-m3",
"qwen3.5-plus", "qwen3.6-plus", "qwen3.7-plus", "qwen3.7-max":
return true
default:
return false
Expand Down Expand Up @@ -247,7 +302,7 @@ func (c *OpenCodeClient) ChatCompletion(
if resp.StatusCode >= http.StatusBadRequest {
bodyBytes, _ := io.ReadAll(resp.Body)
_ = resp.Body.Close()
return nil, fmt.Errorf("API error %d: %s", resp.StatusCode, string(bodyBytes))
return nil, &APIError{StatusCode: resp.StatusCode, Body: string(bodyBytes)}
}

return resp, nil
Expand Down Expand Up @@ -338,7 +393,7 @@ func (c *OpenCodeClient) SendAnthropicRequest(
if resp.StatusCode >= http.StatusBadRequest {
bodyBytes, _ := io.ReadAll(resp.Body)
_ = resp.Body.Close()
return nil, fmt.Errorf("API error %d: %s", resp.StatusCode, string(bodyBytes))
return nil, &APIError{StatusCode: resp.StatusCode, Body: string(bodyBytes)}
}

return resp, nil
Expand Down Expand Up @@ -374,7 +429,7 @@ func (c *OpenCodeClient) ResponsesCompletion(
if resp.StatusCode >= http.StatusBadRequest {
bodyBytes, _ := io.ReadAll(resp.Body)
_ = resp.Body.Close()
return nil, fmt.Errorf("API error %d: %s", resp.StatusCode, string(bodyBytes))
return nil, &APIError{StatusCode: resp.StatusCode, Body: string(bodyBytes)}
}

return resp, nil
Expand Down Expand Up @@ -455,7 +510,7 @@ func (c *OpenCodeClient) GeminiCompletion(
if resp.StatusCode >= http.StatusBadRequest {
bodyBytes, _ := io.ReadAll(resp.Body)
_ = resp.Body.Close()
return nil, fmt.Errorf("API error %d: %s", resp.StatusCode, string(bodyBytes))
return nil, &APIError{StatusCode: resp.StatusCode, Body: string(bodyBytes)}
}

return resp, nil
Expand Down
157 changes: 145 additions & 12 deletions internal/client/opencode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,19 @@ func TestIsAnthropicModelOnlyRoutesNativeAnthropicModels(t *testing.T) {
want bool
}{
{
name: "minimax m2.5 uses openai endpoint on Go provider",
name: "minimax m2.5 uses anthropic endpoint on Go provider",
modelID: "minimax-m2.5",
want: false,
want: true,
},
{
name: "minimax m2.7 uses openai endpoint on Go provider",
name: "minimax m2.7 uses anthropic endpoint on Go provider",
modelID: "minimax-m2.7",
want: false,
want: true,
},
{
name: "minimax m3 uses openai endpoint on Go provider",
name: "minimax m3 uses anthropic endpoint on Go provider",
modelID: "minimax-m3",
want: false,
want: true,
},
{
name: "deepseek pro uses openai endpoint",
Expand Down Expand Up @@ -64,19 +64,19 @@ func TestIsAnthropicModelOnlyRoutesNativeAnthropicModels(t *testing.T) {
want: false,
},
{
name: "qwen3.5-plus uses openai endpoint on Go provider",
name: "qwen3.5-plus uses anthropic endpoint on Go provider",
modelID: "qwen3.5-plus",
want: false,
want: true,
},
{
name: "qwen3.6-plus uses openai endpoint on Go provider",
name: "qwen3.6-plus uses anthropic endpoint on Go provider",
modelID: "qwen3.6-plus",
want: false,
want: true,
},
{
name: "qwen3.7-plus uses openai endpoint on Go provider",
name: "qwen3.7-plus uses anthropic endpoint on Go provider",
modelID: "qwen3.7-plus",
want: false,
want: true,
},
{
name: "qwen3.7-max uses anthropic endpoint (no oa-compat support)",
Expand Down Expand Up @@ -511,3 +511,136 @@ func TestStreamIdleTimeout(t *testing.T) {
})
}
}

func TestRequestTimeout_UsesConfiguredTimeout(t *testing.T) {
cfg := &config.Config{
OpenCodeGo: config.OpenCodeGoConfig{
TimeoutMs: 120000,
},
}
atomicCfg := config.NewAtomicConfig(cfg, "")
c := NewOpenCodeClient(atomicCfg)

model := config.ModelConfig{Provider: ProviderOpenCodeGo, ModelID: "kimi-k2.6"}
timeout := c.RequestTimeout(model)
if timeout != 120*time.Second {
t.Errorf("RequestTimeout = %v, want 120s", timeout)
}
}

func TestRequestTimeout_FallsBackToDefault(t *testing.T) {
cfg := &config.Config{
OpenCodeGo: config.OpenCodeGoConfig{
TimeoutMs: 0,
},
}
atomicCfg := config.NewAtomicConfig(cfg, "")
c := NewOpenCodeClient(atomicCfg)

model := config.ModelConfig{Provider: ProviderOpenCodeGo, ModelID: "kimi-k2.6"}
timeout := c.RequestTimeout(model)
if timeout != 5*time.Minute {
t.Errorf("RequestTimeout = %v, want 5m", timeout)
}
}

func TestRequestTimeout_ZenProvider(t *testing.T) {
cfg := &config.Config{
OpenCodeZen: config.OpenCodeZenConfig{
TimeoutMs: 60000,
},
}
atomicCfg := config.NewAtomicConfig(cfg, "")
c := NewOpenCodeClient(atomicCfg)

model := config.ModelConfig{Provider: ProviderOpenCodeZen, ModelID: "claude-sonnet-4.5"}
timeout := c.RequestTimeout(model)
if timeout != 60*time.Second {
t.Errorf("RequestTimeout = %v, want 60s", timeout)
}
}

func TestStreamingTimeout_UsesStreamingTimeoutMs(t *testing.T) {
cfg := &config.Config{
OpenCodeGo: config.OpenCodeGoConfig{
TimeoutMs: 300000,
StreamingTimeoutMs: 600000,
},
}
atomicCfg := config.NewAtomicConfig(cfg, "")
c := NewOpenCodeClient(atomicCfg)

model := config.ModelConfig{Provider: ProviderOpenCodeGo, ModelID: "kimi-k2.6"}
timeout := c.StreamingTimeout(model)
if timeout != 600*time.Second {
t.Errorf("StreamingTimeout = %v, want 600s", timeout)
}
}

func TestStreamingTimeout_FallsBackToTimeoutMs(t *testing.T) {
cfg := &config.Config{
OpenCodeGo: config.OpenCodeGoConfig{
TimeoutMs: 300000,
StreamingTimeoutMs: 0,
},
}
atomicCfg := config.NewAtomicConfig(cfg, "")
c := NewOpenCodeClient(atomicCfg)

model := config.ModelConfig{Provider: ProviderOpenCodeGo, ModelID: "kimi-k2.6"}
timeout := c.StreamingTimeout(model)
if timeout != 300*time.Second {
t.Errorf("StreamingTimeout = %v, want 300s (fallback to timeout_ms)", timeout)
}
}

func TestStreamingTimeout_FallsBackToDefault(t *testing.T) {
cfg := &config.Config{
OpenCodeGo: config.OpenCodeGoConfig{
TimeoutMs: 0,
StreamingTimeoutMs: 0,
},
}
atomicCfg := config.NewAtomicConfig(cfg, "")
c := NewOpenCodeClient(atomicCfg)

model := config.ModelConfig{Provider: ProviderOpenCodeGo, ModelID: "kimi-k2.6"}
timeout := c.StreamingTimeout(model)
if timeout != 5*time.Minute {
t.Errorf("StreamingTimeout = %v, want 5m", timeout)
}
}

func TestStreamingTimeout_ZenProvider(t *testing.T) {
cfg := &config.Config{
OpenCodeZen: config.OpenCodeZenConfig{
TimeoutMs: 300000,
StreamingTimeoutMs: 600000,
},
}
atomicCfg := config.NewAtomicConfig(cfg, "")
c := NewOpenCodeClient(atomicCfg)

model := config.ModelConfig{Provider: ProviderOpenCodeZen, ModelID: "claude-sonnet-4.5"}
timeout := c.StreamingTimeout(model)
if timeout != 600*time.Second {
t.Errorf("StreamingTimeout = %v, want 600s", timeout)
}
}

func TestStreamingTimeout_SmallConfiguredValue(t *testing.T) {
cfg := &config.Config{
OpenCodeGo: config.OpenCodeGoConfig{
TimeoutMs: 300000,
StreamingTimeoutMs: 100,
},
}
atomicCfg := config.NewAtomicConfig(cfg, "")
c := NewOpenCodeClient(atomicCfg)

model := config.ModelConfig{Provider: ProviderOpenCodeGo, ModelID: "kimi-k2.6"}
timeout := c.StreamingTimeout(model)
if timeout != 100*time.Millisecond {
t.Errorf("StreamingTimeout = %v, want 100ms", timeout)
}
}
Loading
Loading