Skip to content

Commit c691a29

Browse files
author
1bcMax
committed
fix: route reasoning keywords only in user prompt, replace o3 with DeepSeek
- Fix bug where system prompt reasoning keywords triggered REASONING tier - Replace expensive o3 (/M) with DeepSeek Reasoner (/bin/zsh.42/M) for ~10x savings - Add tier & reasoning to debug logging for easier troubleshooting - Add test case for system prompt with reasoning keywords - Fix formatting for CI
1 parent 32aa00c commit c691a29

8 files changed

Lines changed: 79 additions & 26 deletions

File tree

README.md

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ One wallet, 30+ models, zero API keys.
2222
"What is 2+2?" → DeepSeek $0.27/M saved 99%
2323
"Summarize this article" → GPT-4o-mini $0.60/M saved 99%
2424
"Build a React component" → Claude Sonnet $15.00/M best balance
25-
"Prove this theorem" → o3 $10.00/M reasoning
25+
"Prove this theorem" → DeepSeek-R $0.42/M reasoning
2626
"Run 50 parallel searches"→ Kimi K2.5 $2.40/M agentic swarm
2727
```
2828

@@ -117,12 +117,12 @@ Weighted sum → sigmoid confidence calibration → tier selection.
117117

118118
### Tier → Model Mapping
119119

120-
| Tier | Primary Model | Cost/M | Savings vs Opus |
121-
| --------- | --------------- | ------ | --------------- |
122-
| SIMPLE | deepseek-chat | $0.27 | **99.6%** |
123-
| MEDIUM | gpt-4o-mini | $0.60 | **99.2%** |
124-
| COMPLEX | claude-sonnet-4 | $15.00 | **80%** |
125-
| REASONING | o3 | $10.00 | **87%** |
120+
| Tier | Primary Model | Cost/M | Savings vs Opus |
121+
| --------- | ----------------- | ------ | --------------- |
122+
| SIMPLE | gemini-2.5-flash | $0.60 | **99.2%** |
123+
| MEDIUM | deepseek-chat | $0.42 | **99.4%** |
124+
| COMPLEX | claude-opus-4 | $75.00 | baseline |
125+
| REASONING | deepseek-reasoner | $0.42 | **99.4%** |
126126

127127
Special rule: 2+ reasoning markers → REASONING at 0.97 confidence.
128128

@@ -365,12 +365,12 @@ const decision = route("Prove sqrt(2) is irrational", undefined, 4096, {
365365

366366
console.log(decision);
367367
// {
368-
// model: "openai/o3",
368+
// model: "deepseek/deepseek-reasoner",
369369
// tier: "REASONING",
370370
// confidence: 0.97,
371371
// method: "rules",
372-
// savings: 0.87,
373-
// costEstimate: 0.041,
372+
// savings: 0.994,
373+
// costEstimate: 0.002,
374374
// }
375375
```
376376

@@ -471,9 +471,9 @@ openclaw logs --follow
471471
You should see model selection for each request:
472472

473473
```
474-
[plugins] google/gemini-2.5-flash $0.0012 (saved 99%)
475-
[plugins] deepseek/deepseek-chat $0.0003 (saved 99%)
476-
[plugins] anthropic/claude-sonnet-4 $0.0450 (saved 80%)
474+
[plugins] [SIMPLE] google/gemini-2.5-flash $0.0012 (saved 99%)
475+
[plugins] [MEDIUM] deepseek/deepseek-chat $0.0003 (saved 99%)
476+
[plugins] [REASONING] deepseek/deepseek-reasoner $0.0005 (saved 99%)
477477
```
478478

479479
---

package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@blockrun/clawrouter",
3-
"version": "0.3.28",
3+
"version": "0.3.29",
44
"description": "Smart LLM router — save 78% on inference costs. 30+ models, one wallet, x402 micropayments.",
55
"type": "module",
66
"main": "dist/index.js",

src/index.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,10 @@ function injectAuthProfile(logger: { info: (msg: string) => void }): void {
124124

125125
// Load or create auth-profiles.json with correct OpenClaw format
126126
// Format: { version: 1, profiles: { "provider:profileId": { type, provider, key } } }
127-
let store: { version: number; profiles: Record<string, unknown> } = { version: 1, profiles: {} };
127+
let store: { version: number; profiles: Record<string, unknown> } = {
128+
version: 1,
129+
profiles: {},
130+
};
128131
if (existsSync(authPath)) {
129132
try {
130133
const existing = JSON.parse(readFileSync(authPath, "utf-8"));
@@ -219,7 +222,9 @@ async function startProxyInBackground(api: OpenClawPluginApi): Promise<void> {
219222
onRouted: (decision) => {
220223
const cost = decision.costEstimate.toFixed(4);
221224
const saved = (decision.savings * 100).toFixed(0);
222-
api.logger.info(`${decision.model} $${cost} (saved ${saved}%)`);
225+
api.logger.info(
226+
`[${decision.tier}] ${decision.model} $${cost} (saved ${saved}%) | ${decision.reasoning}`,
227+
);
223228
},
224229
onLowBalance: (info) => {
225230
api.logger.warn(`[!] Low balance: ${info.balanceUSD}. Fund wallet: ${info.walletAddress}`);

src/proxy.ts

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -317,11 +317,16 @@ async function proxyRequest(
317317
}
318318

319319
// Normalize model name for comparison (trim whitespace, lowercase)
320-
const normalizedModel = typeof parsed.model === "string" ? parsed.model.trim().toLowerCase() : "";
321-
const isAutoModel = normalizedModel === AUTO_MODEL.toLowerCase() || normalizedModel === AUTO_MODEL_SHORT.toLowerCase();
320+
const normalizedModel =
321+
typeof parsed.model === "string" ? parsed.model.trim().toLowerCase() : "";
322+
const isAutoModel =
323+
normalizedModel === AUTO_MODEL.toLowerCase() ||
324+
normalizedModel === AUTO_MODEL_SHORT.toLowerCase();
322325

323326
// Debug: log received model name
324-
console.log(`[ClawRouter] Received model: "${parsed.model}" -> normalized: "${normalizedModel}", isAuto: ${isAutoModel}`);
327+
console.log(
328+
`[ClawRouter] Received model: "${parsed.model}" -> normalized: "${normalizedModel}", isAuto: ${isAutoModel}`,
329+
);
325330

326331
if (isAutoModel) {
327332
// Extract prompt from messages

src/router/config.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,8 +187,8 @@ export const DEFAULT_ROUTING_CONFIG: RoutingConfig = {
187187
fallback: ["anthropic/claude-sonnet-4", "openai/gpt-4o"],
188188
},
189189
REASONING: {
190-
primary: "openai/o3",
191-
fallback: ["google/gemini-2.5-pro", "anthropic/claude-sonnet-4"],
190+
primary: "deepseek/deepseek-reasoner",
191+
fallback: ["moonshot/kimi-k2.5", "google/gemini-2.5-pro"],
192192
},
193193
},
194194

src/router/rules.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ export function classifyByRules(
8080
config: ScoringConfig,
8181
): ScoringResult {
8282
const text = `${systemPrompt ?? ""} ${prompt}`.toLowerCase();
83+
// User prompt only — used for reasoning markers (system prompt shouldn't influence complexity)
84+
const userText = prompt.toLowerCase();
8385

8486
// Score all 14 dimensions
8587
const dimensions: DimensionScore[] = [
@@ -93,8 +95,9 @@ export function classifyByRules(
9395
{ low: 1, high: 2 },
9496
{ none: 0, low: 0.5, high: 1.0 },
9597
),
98+
// Reasoning markers use USER prompt only — system prompt "step by step" shouldn't trigger reasoning
9699
scoreKeywordMatch(
97-
text,
100+
userText,
98101
config.reasoningKeywords,
99102
"reasoningMarkers",
100103
"reasoning",
@@ -190,8 +193,11 @@ export function classifyByRules(
190193
weightedScore += d.score * w;
191194
}
192195

193-
// Count reasoning markers for override
194-
const reasoningMatches = config.reasoningKeywords.filter((kw) => text.includes(kw.toLowerCase()));
196+
// Count reasoning markers for override — only check USER prompt, not system prompt
197+
// This prevents system prompts with "step by step" from triggering REASONING for simple queries
198+
const reasoningMatches = config.reasoningKeywords.filter((kw) =>
199+
userText.includes(kw.toLowerCase()),
200+
);
195201

196202
// Direct reasoning override: 2+ reasoning markers = high confidence REASONING
197203
if (reasoningMatches.length >= 2) {

test/e2e.ts

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,43 @@ const config = DEFAULT_ROUTING_CONFIG;
7575
);
7676
}
7777

78+
// System prompt with reasoning keywords should NOT trigger REASONING for simple queries
79+
// This was a bug: if client's system prompt had "step by step" or "logically", ALL queries became REASONING
80+
{
81+
console.log("\nSystem prompt with reasoning keywords (should NOT affect simple queries):");
82+
const systemPrompt = "Think step by step and reason logically about the user's question.";
83+
84+
const r1 = classifyByRules("What is 2+2?", systemPrompt, 10, config.scoring);
85+
assert(
86+
r1.tier === "SIMPLE",
87+
`"2+2" with reasoning system prompt → ${r1.tier} (should be SIMPLE)`,
88+
);
89+
90+
const r2 = classifyByRules("Hello", systemPrompt, 5, config.scoring);
91+
assert(
92+
r2.tier === "SIMPLE",
93+
`"Hello" with reasoning system prompt → ${r2.tier} (should be SIMPLE)`,
94+
);
95+
96+
const r3 = classifyByRules("What is the capital of France?", systemPrompt, 12, config.scoring);
97+
assert(
98+
r3.tier === "SIMPLE",
99+
`"Capital of France" with reasoning system prompt → ${r3.tier} (should be SIMPLE)`,
100+
);
101+
102+
// But if USER explicitly asks for step-by-step, it SHOULD trigger REASONING
103+
const r4 = classifyByRules(
104+
"Prove step by step that sqrt(2) is irrational",
105+
systemPrompt,
106+
50,
107+
config.scoring,
108+
);
109+
assert(
110+
r4.tier === "REASONING",
111+
`User asks for step-by-step proof → ${r4.tier} (should be REASONING)`,
112+
);
113+
}
114+
78115
// Medium queries (may be ambiguous — that's ok, LLM classifier handles them)
79116
{
80117
console.log("\nMedium/Ambiguous queries:");

0 commit comments

Comments
 (0)