Skip to content

Commit 6043c83

Browse files
geelenlinuxdevel
authored andcommitted
fix(router): increase inference validation token budget (NVIDIA#432)
1 parent abfea11 commit 6043c83

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

crates/openshell-router/src/backend.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ fn validation_probe(route: &ResolvedRoute) -> Result<ValidationProbe, Validation
167167
path: "/v1/chat/completions",
168168
protocol: "openai_chat_completions",
169169
body: bytes::Bytes::from_static(
170-
br#"{"messages":[{"role":"user","content":"ping"}],"max_tokens":1}"#,
170+
br#"{"messages":[{"role":"user","content":"ping"}],"max_tokens":32}"#,
171171
),
172172
});
173173
}
@@ -181,7 +181,7 @@ fn validation_probe(route: &ResolvedRoute) -> Result<ValidationProbe, Validation
181181
path: "/v1/messages",
182182
protocol: "anthropic_messages",
183183
body: bytes::Bytes::from_static(
184-
br#"{"messages":[{"role":"user","content":"ping"}],"max_tokens":1}"#,
184+
br#"{"messages":[{"role":"user","content":"ping"}],"max_tokens":32}"#,
185185
),
186186
});
187187
}
@@ -194,7 +194,7 @@ fn validation_probe(route: &ResolvedRoute) -> Result<ValidationProbe, Validation
194194
return Ok(ValidationProbe {
195195
path: "/v1/responses",
196196
protocol: "openai_responses",
197-
body: bytes::Bytes::from_static(br#"{"input":"ping","max_output_tokens":1}"#),
197+
body: bytes::Bytes::from_static(br#"{"input":"ping","max_output_tokens":32}"#),
198198
});
199199
}
200200

@@ -206,7 +206,7 @@ fn validation_probe(route: &ResolvedRoute) -> Result<ValidationProbe, Validation
206206
return Ok(ValidationProbe {
207207
path: "/v1/completions",
208208
protocol: "openai_completions",
209-
body: bytes::Bytes::from_static(br#"{"prompt":"ping","max_tokens":1}"#),
209+
body: bytes::Bytes::from_static(br#"{"prompt":"ping","max_tokens":32}"#),
210210
});
211211
}
212212

@@ -434,7 +434,7 @@ mod tests {
434434
.and(header("anthropic-version", "2023-06-01"))
435435
.and(body_partial_json(serde_json::json!({
436436
"model": "test-model",
437-
"max_tokens": 1,
437+
"max_tokens": 32,
438438
})))
439439
.respond_with(
440440
ResponseTemplate::new(200).set_body_json(serde_json::json!({"id": "msg_1"})),

crates/openshell-server/src/inference.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -854,7 +854,7 @@ mod tests {
854854
.and(header("content-type", "application/json"))
855855
.and(body_partial_json(serde_json::json!({
856856
"model": "gpt-4o-mini",
857-
"max_tokens": 1,
857+
"max_tokens": 32,
858858
})))
859859
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({
860860
"id": "chatcmpl-123",

0 commit comments

Comments
 (0)