From 9db6be357234816bbc71087b52aba3e62b8287cf Mon Sep 17 00:00:00 2001 From: localai-bot Date: Thu, 5 Mar 2026 17:12:43 +0000 Subject: [PATCH 1/2] feat: Add WebSocket endpoint support for OpenAI Responses API - Add WebSocket route handling for /v1/responses and /responses endpoints - Add WebSocket message types to schema (ORWebSocketClientMessage, ORWebSocketServerEvent, etc.) - Add connection-local cache types for response storage - Implement initial WebSocket infrastructure (handler to be added in next commit) --- IMPLEMENTATION_PLAN.md | 79 +++++++++++++++++++++++++++++++ core/http/routes/openresponses.go | 12 +++++ core/schema/openresponses.go | 70 +++++++++++++++++++++++++++ 3 files changed, 161 insertions(+) create mode 100644 IMPLEMENTATION_PLAN.md diff --git a/IMPLEMENTATION_PLAN.md b/IMPLEMENTATION_PLAN.md new file mode 100644 index 000000000000..a1c84d3585fc --- /dev/null +++ b/IMPLEMENTATION_PLAN.md @@ -0,0 +1,79 @@ +# WebSocket Mode Implementation Plan for OpenAI Responses API + +## Overview +Implement WebSocket support for LocalAI's OpenAI API-compatible Responses endpoint, enabling persistent WebSocket connections for long-running, tool-call-heavy agentic workflows. + +## Technical Requirements + +### 1. WebSocket Endpoint +- **Endpoint**: `ws://:/v1/responses` +- **Upgrade**: HTTP upgrade from POST /v1/responses when `Upgrade: websocket` header is present + +### 2. Message Types (Client → Server) + +#### response.create (Initial Turn) +```json +{ + "type": "response.create", + "model": "gpt-4o", + "store": false, + "input": [...], + "tools": [] +} +``` + +#### response.create with Continuation (Subsequent Turns) +```json +{ + "type": "response.create", + "model": "gpt-4o", + "store": false, + "previous_response_id": "resp_123", + "input": [...], + "tools": [] +} +``` + +### 3. Response Events (Server → Client) + +1. **response.created** - Response object created +2. **response.progress** - Incremental output +3. **response.function_call_arguments.delta** - Streaming function arguments +4. **response.function_call_arguments.done** - Function call complete +5. **response.done** - Final response + +### 4. Connection Management +- Track active connections with 60-minute timeout +- Connection-local cache for responses (when store=false) +- One in-flight response at a time per connection + +### 5. Error Handling +- `previous_response_not_found` (400) +- `websocket_connection_limit_reached` (400) + +## Implementation Steps + +### Step 1: Add WebSocket Schema Types +- Add WebSocket message types to `core/schema/openresponses.go` +- Add connection-related types + +### Step 2: Add WebSocket Route +- Modify `core/http/routes/openresponses.go` to handle WebSocket upgrade +- Add GET /v1/responses WebSocket endpoint + +### Step 3: Create WebSocket Handler +- Create `core/http/endpoints/openresponses/websocket.go` +- Implement connection handling +- Implement message parsing +- Implement event streaming + +### Step 4: Add Connection Store +- Implement connection management in store +- Add 60-minute timeout +- Add connection-local cache + +## Files to Modify/Create +1. `core/schema/openresponses.go` - Add WebSocket types +2. `core/http/routes/openresponses.go` - Add WebSocket route +3. `core/http/endpoints/openresponses/websocket.go` - New WebSocket handler (create) +4. `core/http/endpoints/openresponses/store.go` - Add connection management diff --git a/core/http/routes/openresponses.go b/core/http/routes/openresponses.go index 19cadbbae677..d1a5fa4f18b2 100644 --- a/core/http/routes/openresponses.go +++ b/core/http/routes/openresponses.go @@ -43,6 +43,18 @@ func RegisterOpenResponsesRoutes(app *echo.Echo, cancelResponseHandler := openresponses.CancelResponseEndpoint() app.POST("/v1/responses/:id/cancel", cancelResponseHandler, middleware.TraceMiddleware(application)) app.POST("/responses/:id/cancel", cancelResponseHandler, middleware.TraceMiddleware(application)) + + // WebSocket endpoint for OpenAI Responses API WebSocket Mode + websocketHandler := openresponses.WebSocketEndpoint( + application.ModelConfigLoader(), + application.ModelLoader(), + application.TemplatesEvaluator(), + application.ApplicationConfig(), + ) + + // WebSocket at /v1/responses (GET method for upgrade) + app.GET("/v1/responses", websocketHandler, middleware.TraceMiddleware(application)) + app.GET("/responses", websocketHandler, middleware.TraceMiddleware(application)) } // setOpenResponsesRequestContext sets up the context and cancel function for Open Responses requests diff --git a/core/schema/openresponses.go b/core/schema/openresponses.go index b5a81f413362..ddcb11b0ccf6 100644 --- a/core/schema/openresponses.go +++ b/core/schema/openresponses.go @@ -1,6 +1,7 @@ package schema import ( + "time" "context" ) @@ -309,3 +310,72 @@ func ORContentPartWithLogprobs(text string, logprobs *Logprobs) ORContentPart { Logprobs: orLogprobs, // REQUIRED - must always be present as array (empty if none) } } + +// WebSocket message types for Open Responses API WebSocket Mode +// https://developers.openai.com/api/docs/guides/websocket-mode + +// ORWebSocketMessage represents a WebSocket message (client -> server or server -> client) +type ORWebSocketMessage struct { + Type string `json:"type"` // response.create, response.created, response.progress, etc. +} + +// ORWebSocketClientMessage represents a client message to the WebSocket endpoint +type ORWebSocketClientMessage struct { + Type string `json:"type"` // "response.create" + Model string `json:"model,omitempty"` + Input interface{} `json:"input,omitempty"` + Tools []ORFunctionTool `json:"tools,omitempty"` + ToolChoice interface{} `json:"tool_choice,omitempty"` + MaxOutputTokens *int `json:"max_output_tokens,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` + Truncation string `json:"truncation,omitempty"` + Instructions string `json:"instructions,omitempty"` + Reasoning *ORReasoningParam `json:"reasoning,omitempty"` + Metadata map[string]string `json:"metadata,omitempty"` + PreviousResponseID string `json:"previous_response_id,omitempty"` + Store *bool `json:"store,omitempty"` + TextFormat interface{} `json:"text_format,omitempty"` + ServiceTier string `json:"service_tier,omitempty"` + AllowedTools []string `json:"allowed_tools,omitempty"` + ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"` + PresencePenalty *float64 `json:"presence_penalty,omitempty"` + FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"` + TopLogprobs *int `json:"top_logprobs,omitempty"` + MaxToolCalls *int `json:"max_tool_calls,omitempty"` + Generate *bool `json:"generate,omitempty"` // If false, just warm up and return response_id +} + +// ORWebSocketServerEvent represents a server event to the WebSocket +type ORWebSocketServerEvent struct { + Type string `json:"type"` // response.created, response.progress, etc. + ResponseID string `json:"response_id,omitempty"` + Response *ORResponseResource `json:"response,omitempty"` + OutputIndex *int `json:"output_index,omitempty"` + Output []ORItemField `json:"output,omitempty"` + ItemID string `json:"item_id,omitempty"` + Item *ORItemField `json:"item,omitempty"` + ContentIndex *int `json:"content_index,omitempty"` + Delta *string `json:"delta,omitempty"` + Text *string `json:"text,omitempty"` + CallID string `json:"call_id,omitempty"` + Arguments *string `json:"arguments,omitempty"` + Error *ORError `json:"error,omitempty"` +} + +// ORWebSocketError represents a WebSocket error event +type ORWebSocketError struct { + Type string `json:"type"` // error + Code string `json:"code,omitempty"` // previous_response_not_found, websocket_connection_limit_reached, etc. + Message string `json:"message"` + Param string `json:"param,omitempty"` +} + +// ConnectionLocalCacheEntry represents a cached response in connection-local storage +type ConnectionLocalCacheEntry struct { + ResponseID string + Response *ORResponseResource + Input *ORWebSocketClientMessage + CachedAt time.Time + ExpiresAt *time.Time +} From 00c224320178a86f74e8856d0d8b3adf1221c954 Mon Sep 17 00:00:00 2001 From: localai-bot Date: Fri, 6 Mar 2026 12:41:14 +0000 Subject: [PATCH 2/2] feat(cli): add configurable backend image fallback tags via CLI options - Add three new CLI flags: --backend-images-release-tag, --backend-images-branch-tag, --backend-dev-suffix - Add corresponding fields to SystemState for passing configuration - Add WithBackendImagesReleaseTag, WithBackendImagesBranchTag, WithBackendDevSuffix options - Modify getFallbackTagValues to use SystemState instead of environment variables - Pass CLI options through to SystemState in run.go Signed-off-by: localai-bot Signed-off-by: localai-bot --- core/cli/run.go | 6 ++++++ core/gallery/backends.go | 41 ++++++++++++++++++---------------------- pkg/system/state.go | 23 ++++++++++++++++++++++ 3 files changed, 47 insertions(+), 23 deletions(-) diff --git a/core/cli/run.go b/core/cli/run.go index 7410c8def070..2ecad7672ff7 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -34,6 +34,9 @@ type RunCMD struct { BackendGalleries string `env:"LOCALAI_BACKEND_GALLERIES,BACKEND_GALLERIES" help:"JSON list of backend galleries" group:"backends" default:"${backends}"` Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"` AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models" default:"true"` + BackendImagesReleaseTag string `env:"LOCALAI_BACKEND_IMAGES_RELEASE_TAG,BACKEND_IMAGES_RELEASE_TAG" help:"Fallback release tag for backend images" group:"backends" default:"latest"` + BackendImagesBranchTag string `env:"LOCALAI_BACKEND_IMAGES_BRANCH_TAG,BACKEND_IMAGES_BRANCH_TAG" help:"Fallback branch tag for backend images" group:"backends" default:"master"` + BackendDevSuffix string `env:"LOCALAI_BACKEND_DEV_SUFFIX,BACKEND_DEV_SUFFIX" help:"Development suffix for backend images" group:"backends" default:"development"` AutoloadBackendGalleries bool `env:"LOCALAI_AUTOLOAD_BACKEND_GALLERIES,AUTOLOAD_BACKEND_GALLERIES" group:"backends" default:"true"` PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"` Models []string `env:"LOCALAI_MODELS,MODELS" help:"A List of model configuration URLs to load" group:"models"` @@ -102,6 +105,9 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { system.WithBackendSystemPath(r.BackendsSystemPath), system.WithModelPath(r.ModelsPath), system.WithBackendPath(r.BackendsPath), + system.WithBackendImagesReleaseTag(r.BackendImagesReleaseTag), + system.WithBackendImagesBranchTag(r.BackendImagesBranchTag), + system.WithBackendDevSuffix(r.BackendDevSuffix), ) if err != nil { return err diff --git a/core/gallery/backends.go b/core/gallery/backends.go index 833408cfcdf5..2fab810ffc9b 100644 --- a/core/gallery/backends.go +++ b/core/gallery/backends.go @@ -7,7 +7,6 @@ import ( "encoding/json" "errors" "fmt" - "os" "path/filepath" "strings" "time" @@ -25,33 +24,29 @@ const ( runFile = "run.sh" ) -// Environment variables for configurable fallback URI patterns +// Default fallback tag values const ( - // Default fallback tag values defaultLatestTag = "latest" defaultMasterTag = "master" defaultDevSuffix = "development" - - // Environment variable names - envLatestTag = "LOCALAI_BACKEND_IMAGES_RELEASE_TAG" - envMasterTag = "LOCALAI_BACKEND_IMAGES_BRANCH_TAG" - envDevSuffix = "LOCALAI_BACKEND_DEV_SUFFIX" ) -// getFallbackTagValues returns the configurable fallback tag values from environment variables -func getFallbackTagValues() (latestTag, masterTag, devSuffix string) { - latestTag = os.Getenv(envLatestTag) - masterTag = os.Getenv(envMasterTag) - devSuffix = os.Getenv(envDevSuffix) - - // Use defaults if environment variables are not set - if latestTag == "" { +// getFallbackTagValues returns the configurable fallback tag values from SystemState +func getFallbackTagValues(systemState *system.SystemState) (latestTag, masterTag, devSuffix string) { + // Use SystemState fields if set, otherwise use defaults + if systemState.BackendImagesReleaseTag != "" { + latestTag = systemState.BackendImagesReleaseTag + } else { latestTag = defaultLatestTag } - if masterTag == "" { + if systemState.BackendImagesBranchTag != "" { + masterTag = systemState.BackendImagesBranchTag + } else { masterTag = defaultMasterTag } - if devSuffix == "" { + if systemState.BackendDevSuffix != "" { + devSuffix = systemState.BackendDevSuffix + } else { devSuffix = defaultDevSuffix } @@ -172,8 +167,8 @@ func InstallBackendFromGallery(ctx context.Context, galleries []config.Gallery, } func InstallBackend(ctx context.Context, systemState *system.SystemState, modelLoader *model.ModelLoader, config *GalleryBackend, downloadStatus func(string, string, string, float64)) error { - // Get configurable fallback tag values from environment variables - latestTag, masterTag, devSuffix := getFallbackTagValues() + // Get configurable fallback tag values from SystemState + latestTag, masterTag, devSuffix := getFallbackTagValues(systemState) // Create base path if it doesn't exist err := os.MkdirAll(systemState.Backend.BackendsPath, 0750) @@ -225,7 +220,7 @@ func InstallBackend(ctx context.Context, systemState *system.SystemState, modelL } // Try fallback: replace latestTag + "-" with masterTag + "-" in the URI - fallbackURI := strings.Replace(string(config.URI), latestTag + "-", masterTag + "-", 1) + fallbackURI := strings.Replace(string(config.URI), latestTag+"-", masterTag+"-", 1) if fallbackURI != string(config.URI) { xlog.Debug("Trying fallback URI", "original", config.URI, "fallback", fallbackURI) if err := downloader.URI(fallbackURI).DownloadFileWithContext(ctx, backendPath, "", 1, 1, downloadStatus); err == nil { @@ -234,7 +229,7 @@ func InstallBackend(ctx context.Context, systemState *system.SystemState, modelL } else { // Try another fallback: add "-" + devSuffix suffix to the backend name // For example: master-gpu-nvidia-cuda-13-ace-step -> master-gpu-nvidia-cuda-13-ace-step-development - if !strings.Contains(fallbackURI, "-" + devSuffix) { + if !strings.Contains(fallbackURI, "-"+devSuffix) { // Extract backend name from URI and add -development parts := strings.Split(fallbackURI, "-") if len(parts) >= 2 { @@ -441,7 +436,7 @@ func ListSystemBackends(systemState *system.SystemState) (SystemBackends, error) metaMap[dir] = metadata - // Concrete backend entry + // Concrete-backend entry if _, err := os.Stat(run); err == nil { backends[dir] = SystemBackend{ Name: dir, diff --git a/pkg/system/state.go b/pkg/system/state.go index 6e8d2a335495..2d9afcf0470d 100644 --- a/pkg/system/state.go +++ b/pkg/system/state.go @@ -21,6 +21,11 @@ type SystemState struct { VRAM uint64 systemCapabilities string + + // Backend image fallback tag configuration + BackendImagesReleaseTag string + BackendImagesBranchTag string + BackendDevSuffix string } type SystemStateOptions func(*SystemState) @@ -43,6 +48,24 @@ func WithModelPath(path string) SystemStateOptions { } } +func WithBackendImagesReleaseTag(tag string) SystemStateOptions { + return func(s *SystemState) { + s.BackendImagesReleaseTag = tag + } +} + +func WithBackendImagesBranchTag(tag string) SystemStateOptions { + return func(s *SystemState) { + s.BackendImagesBranchTag = tag + } +} + +func WithBackendDevSuffix(suffix string) SystemStateOptions { + return func(s *SystemState) { + s.BackendDevSuffix = suffix + } +} + func GetSystemState(opts ...SystemStateOptions) (*SystemState, error) { state := &SystemState{} for _, opt := range opts {