@@ -31,6 +31,11 @@ struct ValidationProbe {
3131 path : & ' static str ,
3232 protocol : & ' static str ,
3333 body : bytes:: Bytes ,
34+ /// Alternate body to try when the primary probe fails with HTTP 400.
35+ /// Used for OpenAI chat completions where newer models require
36+ /// `max_completion_tokens` while legacy/self-hosted backends only
37+ /// accept `max_tokens`.
38+ fallback_body : Option < bytes:: Bytes > ,
3439}
3540
3641/// Response from a proxied HTTP request to a backend (fully buffered).
@@ -163,12 +168,17 @@ fn validation_probe(route: &ResolvedRoute) -> Result<ValidationProbe, Validation
163168 . iter ( )
164169 . any ( |protocol| protocol == "openai_chat_completions" )
165170 {
171+ // Use max_completion_tokens (modern OpenAI parameter, required by GPT-5+)
172+ // with max_tokens as fallback for legacy/self-hosted backends.
166173 return Ok ( ValidationProbe {
167174 path : "/v1/chat/completions" ,
168175 protocol : "openai_chat_completions" ,
169176 body : bytes:: Bytes :: from_static (
170- br#"{"messages":[{"role":"user","content":"ping"}],"max_tokens ":32}"# ,
177+ br#"{"messages":[{"role":"user","content":"ping"}],"max_completion_tokens ":32}"# ,
171178 ) ,
179+ fallback_body : Some ( bytes:: Bytes :: from_static (
180+ br#"{"messages":[{"role":"user","content":"ping"}],"max_tokens":32}"# ,
181+ ) ) ,
172182 } ) ;
173183 }
174184
@@ -183,6 +193,7 @@ fn validation_probe(route: &ResolvedRoute) -> Result<ValidationProbe, Validation
183193 body : bytes:: Bytes :: from_static (
184194 br#"{"messages":[{"role":"user","content":"ping"}],"max_tokens":32}"# ,
185195 ) ,
196+ fallback_body : None ,
186197 } ) ;
187198 }
188199
@@ -195,6 +206,7 @@ fn validation_probe(route: &ResolvedRoute) -> Result<ValidationProbe, Validation
195206 path : "/v1/responses" ,
196207 protocol : "openai_responses" ,
197208 body : bytes:: Bytes :: from_static ( br#"{"input":"ping","max_output_tokens":32}"# ) ,
209+ fallback_body : None ,
198210 } ) ;
199211 }
200212
@@ -207,6 +219,7 @@ fn validation_probe(route: &ResolvedRoute) -> Result<ValidationProbe, Validation
207219 path : "/v1/completions" ,
208220 protocol : "openai_completions" ,
209221 body : bytes:: Bytes :: from_static ( br#"{"prompt":"ping","max_tokens":32}"# ) ,
222+ fallback_body : None ,
210223 } ) ;
211224 }
212225
@@ -233,7 +246,47 @@ pub async fn verify_backend_endpoint(
233246 } ) ;
234247 }
235248
236- let response = send_backend_request ( client, route, "POST" , probe. path , headers, probe. body )
249+ let result = try_validation_request (
250+ client,
251+ route,
252+ probe. path ,
253+ probe. protocol ,
254+ headers. clone ( ) ,
255+ probe. body ,
256+ )
257+ . await ;
258+
259+ // If the primary probe failed with a request-shape error (HTTP 400) and
260+ // there is a fallback body, retry with the alternate token parameter.
261+ // This handles the split between `max_completion_tokens` (GPT-5+) and
262+ // `max_tokens` (legacy/self-hosted backends).
263+ if let ( Err ( err) , Some ( fallback_body) ) = ( & result, probe. fallback_body ) {
264+ if err. kind == ValidationFailureKind :: RequestShape {
265+ return try_validation_request (
266+ client,
267+ route,
268+ probe. path ,
269+ probe. protocol ,
270+ headers,
271+ fallback_body,
272+ )
273+ . await ;
274+ }
275+ }
276+
277+ result
278+ }
279+
280+ /// Send a single validation request and classify the response.
281+ async fn try_validation_request (
282+ client : & reqwest:: Client ,
283+ route : & ResolvedRoute ,
284+ path : & str ,
285+ protocol : & str ,
286+ headers : Vec < ( String , String ) > ,
287+ body : bytes:: Bytes ,
288+ ) -> Result < ValidatedEndpoint , ValidationFailure > {
289+ let response = send_backend_request ( client, route, "POST" , path, headers, body)
237290 . await
238291 . map_err ( |err| match err {
239292 RouterError :: UpstreamUnavailable ( details) => ValidationFailure {
@@ -253,12 +306,12 @@ pub async fn verify_backend_endpoint(
253306 details,
254307 } ,
255308 } ) ?;
256- let url = build_backend_url ( & route. endpoint , probe . path ) ;
309+ let url = build_backend_url ( & route. endpoint , path) ;
257310
258311 if response. status ( ) . is_success ( ) {
259312 return Ok ( ValidatedEndpoint {
260313 url,
261- protocol : probe . protocol . to_string ( ) ,
314+ protocol : protocol. to_string ( ) ,
262315 } ) ;
263316 }
264317
@@ -376,7 +429,7 @@ fn build_backend_url(endpoint: &str, path: &str) -> String {
376429
377430#[ cfg( test) ]
378431mod tests {
379- use super :: { build_backend_url, verify_backend_endpoint} ;
432+ use super :: { ValidationFailureKind , build_backend_url, verify_backend_endpoint} ;
380433 use crate :: config:: ResolvedRoute ;
381434 use openshell_core:: inference:: AuthHeader ;
382435 use wiremock:: matchers:: { body_partial_json, header, method, path} ;
@@ -463,4 +516,102 @@ mod tests {
463516 assert_eq ! ( validated. protocol, "openai_chat_completions" ) ;
464517 assert_eq ! ( validated. url, "mock://test-backend/v1/chat/completions" ) ;
465518 }
519+
520+ /// GPT-5+ models reject `max_tokens` — the primary probe uses
521+ /// `max_completion_tokens` so validation should succeed directly.
522+ #[ tokio:: test]
523+ async fn verify_openai_chat_uses_max_completion_tokens ( ) {
524+ let mock_server = MockServer :: start ( ) . await ;
525+ let route = test_route (
526+ & mock_server. uri ( ) ,
527+ & [ "openai_chat_completions" ] ,
528+ AuthHeader :: Bearer ,
529+ ) ;
530+
531+ Mock :: given ( method ( "POST" ) )
532+ . and ( path ( "/v1/chat/completions" ) )
533+ . and ( body_partial_json ( serde_json:: json!( {
534+ "max_completion_tokens" : 32 ,
535+ } ) ) )
536+ . respond_with (
537+ ResponseTemplate :: new ( 200 ) . set_body_json ( serde_json:: json!( { "id" : "chatcmpl-1" } ) ) ,
538+ )
539+ . mount ( & mock_server)
540+ . await ;
541+
542+ let client = reqwest:: Client :: builder ( ) . build ( ) . unwrap ( ) ;
543+ let validated = verify_backend_endpoint ( & client, & route) . await . unwrap ( ) ;
544+
545+ assert_eq ! ( validated. protocol, "openai_chat_completions" ) ;
546+ }
547+
548+ /// Legacy/self-hosted backends that reject `max_completion_tokens`
549+ /// should succeed on the fallback probe using `max_tokens`.
550+ #[ tokio:: test]
551+ async fn verify_openai_chat_falls_back_to_max_tokens ( ) {
552+ let mock_server = MockServer :: start ( ) . await ;
553+ let route = test_route (
554+ & mock_server. uri ( ) ,
555+ & [ "openai_chat_completions" ] ,
556+ AuthHeader :: Bearer ,
557+ ) ;
558+
559+ // Reject the primary probe (max_completion_tokens) with 400.
560+ Mock :: given ( method ( "POST" ) )
561+ . and ( path ( "/v1/chat/completions" ) )
562+ . and ( body_partial_json ( serde_json:: json!( {
563+ "max_completion_tokens" : 32 ,
564+ } ) ) )
565+ . respond_with ( ResponseTemplate :: new ( 400 ) . set_body_string (
566+ r#"{"error":{"message":"Unsupported parameter: 'max_completion_tokens'"}}"# ,
567+ ) )
568+ . expect ( 1 )
569+ . mount ( & mock_server)
570+ . await ;
571+
572+ // Accept the fallback probe (max_tokens).
573+ Mock :: given ( method ( "POST" ) )
574+ . and ( path ( "/v1/chat/completions" ) )
575+ . and ( body_partial_json ( serde_json:: json!( {
576+ "max_tokens" : 32 ,
577+ } ) ) )
578+ . respond_with (
579+ ResponseTemplate :: new ( 200 ) . set_body_json ( serde_json:: json!( { "id" : "chatcmpl-2" } ) ) ,
580+ )
581+ . expect ( 1 )
582+ . mount ( & mock_server)
583+ . await ;
584+
585+ let client = reqwest:: Client :: builder ( ) . build ( ) . unwrap ( ) ;
586+ let validated = verify_backend_endpoint ( & client, & route) . await . unwrap ( ) ;
587+
588+ assert_eq ! ( validated. protocol, "openai_chat_completions" ) ;
589+ }
590+
591+ /// Non-chat-completions probes (e.g. anthropic_messages) should not
592+ /// have a fallback — a 400 remains a hard failure.
593+ #[ tokio:: test]
594+ async fn verify_non_chat_completions_no_fallback ( ) {
595+ let mock_server = MockServer :: start ( ) . await ;
596+ let route = test_route (
597+ & mock_server. uri ( ) ,
598+ & [ "anthropic_messages" ] ,
599+ AuthHeader :: Custom ( "x-api-key" ) ,
600+ ) ;
601+
602+ Mock :: given ( method ( "POST" ) )
603+ . and ( path ( "/v1/messages" ) )
604+ . respond_with ( ResponseTemplate :: new ( 400 ) . set_body_string ( "bad request" ) )
605+ . mount ( & mock_server)
606+ . await ;
607+
608+ let client = reqwest:: Client :: builder ( ) . build ( ) . unwrap ( ) ;
609+ let result = verify_backend_endpoint ( & client, & route) . await ;
610+
611+ assert ! ( result. is_err( ) ) ;
612+ assert_eq ! (
613+ result. unwrap_err( ) . kind,
614+ ValidationFailureKind :: RequestShape
615+ ) ;
616+ }
466617}
0 commit comments