From 023abe2e40d3afe9463bf4a253e2727050b25ce2 Mon Sep 17 00:00:00 2001 From: Kyle D Date: Fri, 27 Feb 2026 04:25:58 +0000 Subject: [PATCH] feat: add Avian as a cloud LLM inference provider Add Avian (https://avian.io) as a Go backend that proxies requests to the Avian OpenAI-compatible API at https://api.avian.io/v1. Backend implementation: - Go gRPC backend at backend/go/avian/ following the huggingface backend pattern - Supports chat completions with structured messages and streaming (SSE) - Authentication via AVIAN_API_KEY environment variable - Configurable base URL via AVIAN_API_BASE environment variable Gallery models: - deepseek/deepseek-v3.2: 164K context, $0.26/$0.38 per 1M tokens - moonshotai/kimi-k2.5: 131K context, $0.45/$2.20 per 1M tokens - z-ai/glm-5: 131K context, $0.30/$2.55 per 1M tokens - minimax/minimax-m2.5: 1M context, $0.30/$1.10 per 1M tokens Build infrastructure: - Backend definition in Makefile (golang backend) - CI workflow entries for Linux (amd64/arm64) and macOS (metal) - Backend index.yaml entries with OCI image references Signed-off-by: Kyle D --- .github/workflows/backend.yml | 18 +++ Makefile | 6 +- backend/go/avian/Makefile | 12 ++ backend/go/avian/avian.go | 290 ++++++++++++++++++++++++++++++++++ backend/go/avian/main.go | 22 +++ backend/go/avian/package.sh | 12 ++ backend/go/avian/run.sh | 6 + backend/index.yaml | 40 +++++ gallery/avian.yaml | 18 +++ gallery/index.yaml | 106 +++++++++++++ 10 files changed, 528 insertions(+), 2 deletions(-) create mode 100644 backend/go/avian/Makefile create mode 100644 backend/go/avian/avian.go create mode 100644 backend/go/avian/main.go create mode 100644 backend/go/avian/package.sh create mode 100644 backend/go/avian/run.sh create mode 100644 gallery/avian.yaml diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 2bc4c259368b..95569e0be7cf 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -1795,6 +1795,20 @@ jobs: dockerfile: "./backend/Dockerfile.golang" context: "./" ubuntu-version: '2404' + # avian + - build-type: '' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64,linux/arm64' + tag-latest: 'auto' + tag-suffix: '-avian' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "avian" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' # rfdetr - build-type: '' cuda-major-version: "" @@ -2089,6 +2103,10 @@ jobs: tag-suffix: "-metal-darwin-arm64-huggingface" build-type: "metal" lang: "go" + - backend: "avian" + tag-suffix: "-metal-darwin-arm64-avian" + build-type: "metal" + lang: "go" with: backend: ${{ matrix.backend }} build-type: ${{ matrix.build-type }} diff --git a/Makefile b/Makefile index 54c21088afa0..ef42ff75af40 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # Disable parallel execution for backend builds -.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/voxtral +.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/avian backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/voxtral GOCMD=go GOTEST=$(GOCMD) test @@ -466,6 +466,7 @@ BACKEND_LLAMA_CPP = llama-cpp|llama-cpp|.|false|false BACKEND_PIPER = piper|golang|.|false|true BACKEND_LOCAL_STORE = local-store|golang|.|false|true BACKEND_HUGGINGFACE = huggingface|golang|.|false|true +BACKEND_AVIAN = avian|golang|.|false|true BACKEND_SILERO_VAD = silero-vad|golang|.|false|true BACKEND_STABLEDIFFUSION_GGML = stablediffusion-ggml|golang|.|--progress=plain|true BACKEND_WHISPER = whisper|golang|.|false|true @@ -521,6 +522,7 @@ $(eval $(call generate-docker-build-target,$(BACKEND_LLAMA_CPP))) $(eval $(call generate-docker-build-target,$(BACKEND_PIPER))) $(eval $(call generate-docker-build-target,$(BACKEND_LOCAL_STORE))) $(eval $(call generate-docker-build-target,$(BACKEND_HUGGINGFACE))) +$(eval $(call generate-docker-build-target,$(BACKEND_AVIAN))) $(eval $(call generate-docker-build-target,$(BACKEND_SILERO_VAD))) $(eval $(call generate-docker-build-target,$(BACKEND_STABLEDIFFUSION_GGML))) $(eval $(call generate-docker-build-target,$(BACKEND_WHISPER))) @@ -553,7 +555,7 @@ $(eval $(call generate-docker-build-target,$(BACKEND_ACE_STEP))) docker-save-%: backend-images docker save local-ai-backend:$* -o backend-images/$*.tar -docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-voxtral +docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-voxtral docker-build-avian ######################################################## ### Mock Backend for E2E Tests diff --git a/backend/go/avian/Makefile b/backend/go/avian/Makefile new file mode 100644 index 000000000000..a4b356b4129f --- /dev/null +++ b/backend/go/avian/Makefile @@ -0,0 +1,12 @@ +GOCMD=go + +avian: + CGO_ENABLED=0 $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o avian ./ + +package: + bash package.sh + +build: avian package + +clean: + rm -f avian diff --git a/backend/go/avian/avian.go b/backend/go/avian/avian.go new file mode 100644 index 000000000000..2fc69d4df7e2 --- /dev/null +++ b/backend/go/avian/avian.go @@ -0,0 +1,290 @@ +package main + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "strings" + + "github.com/mudler/LocalAI/pkg/grpc/base" + pb "github.com/mudler/LocalAI/pkg/grpc/proto" +) + +const ( + defaultBaseURL = "https://api.avian.io/v1" +) + +type Avian struct { + base.SingleThread + + apiKey string + baseURL string + model string +} + +// chatMessage represents an OpenAI-compatible chat message. +type chatMessage struct { + Role string `json:"role"` + Content string `json:"content"` + Name string `json:"name,omitempty"` + ToolCallID string `json:"tool_call_id,omitempty"` +} + +// chatRequest represents an OpenAI-compatible chat completion request. +type chatRequest struct { + Model string `json:"model"` + Messages []chatMessage `json:"messages"` + MaxTokens int `json:"max_tokens,omitempty"` + Temperature float32 `json:"temperature,omitempty"` + TopP float32 `json:"top_p,omitempty"` + Stream bool `json:"stream"` + Stop []string `json:"stop,omitempty"` +} + +// chatChoice represents a single choice in a chat completion response. +type chatChoice struct { + Index int `json:"index"` + Message chatMessage `json:"message"` + FinishReason string `json:"finish_reason"` +} + +// chatUsage represents token usage in a chat completion response. +type chatUsage struct { + PromptTokens int `json:"prompt_tokens"` + CompletionTokens int `json:"completion_tokens"` + TotalTokens int `json:"total_tokens"` +} + +// chatResponse represents an OpenAI-compatible chat completion response. +type chatResponse struct { + Choices []chatChoice `json:"choices"` + Usage chatUsage `json:"usage"` +} + +// streamDelta represents the delta in a streaming response chunk. +type streamDelta struct { + Role string `json:"role,omitempty"` + Content string `json:"content,omitempty"` +} + +// streamChoice represents a choice in a streaming response chunk. +type streamChoice struct { + Index int `json:"index"` + Delta streamDelta `json:"delta"` + FinishReason *string `json:"finish_reason"` +} + +// streamChunk represents a single chunk in a streaming response. +type streamChunk struct { + Choices []streamChoice `json:"choices"` + Usage *chatUsage `json:"usage,omitempty"` +} + +func (a *Avian) Load(opts *pb.ModelOptions) error { + a.apiKey = os.Getenv("AVIAN_API_KEY") + if a.apiKey == "" { + return fmt.Errorf("AVIAN_API_KEY environment variable is required") + } + + a.baseURL = os.Getenv("AVIAN_API_BASE") + if a.baseURL == "" { + a.baseURL = defaultBaseURL + } + + a.model = opts.Model + if a.model == "" { + return fmt.Errorf("model name is required") + } + + return nil +} + +func (a *Avian) buildMessages(opts *pb.PredictOptions) []chatMessage { + // If structured messages are provided (from chat completions), use them directly + if len(opts.Messages) > 0 { + messages := make([]chatMessage, len(opts.Messages)) + for i, msg := range opts.Messages { + messages[i] = chatMessage{ + Role: msg.Role, + Content: msg.Content, + Name: msg.Name, + ToolCallID: msg.ToolCallId, + } + } + return messages + } + + // Fall back to using the prompt as a single user message + return []chatMessage{ + {Role: "user", Content: opts.Prompt}, + } +} + +func (a *Avian) Predict(opts *pb.PredictOptions) (string, error) { + reqBody := chatRequest{ + Model: a.model, + Messages: a.buildMessages(opts), + Stream: false, + } + + if opts.Tokens > 0 { + reqBody.MaxTokens = int(opts.Tokens) + } + if opts.Temperature > 0 { + reqBody.Temperature = opts.Temperature + } + if opts.TopP > 0 { + reqBody.TopP = opts.TopP + } + if len(opts.StopPrompts) > 0 { + reqBody.Stop = opts.StopPrompts + } + + jsonBody, err := json.Marshal(reqBody) + if err != nil { + return "", fmt.Errorf("failed to marshal request: %w", err) + } + + req, err := http.NewRequest("POST", a.baseURL+"/chat/completions", bytes.NewReader(jsonBody)) + if err != nil { + return "", fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", "Bearer "+a.apiKey) + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return "", fmt.Errorf("request failed: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("failed to read response: %w", err) + } + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("API returned status %d: %s", resp.StatusCode, string(body)) + } + + var chatResp chatResponse + if err := json.Unmarshal(body, &chatResp); err != nil { + return "", fmt.Errorf("failed to parse response: %w", err) + } + + if len(chatResp.Choices) == 0 { + return "", fmt.Errorf("no choices in response") + } + + return chatResp.Choices[0].Message.Content, nil +} + +func (a *Avian) PredictStream(opts *pb.PredictOptions, results chan string) error { + reqBody := chatRequest{ + Model: a.model, + Messages: a.buildMessages(opts), + Stream: true, + } + + if opts.Tokens > 0 { + reqBody.MaxTokens = int(opts.Tokens) + } + if opts.Temperature > 0 { + reqBody.Temperature = opts.Temperature + } + if opts.TopP > 0 { + reqBody.TopP = opts.TopP + } + if len(opts.StopPrompts) > 0 { + reqBody.Stop = opts.StopPrompts + } + + jsonBody, err := json.Marshal(reqBody) + if err != nil { + close(results) + return fmt.Errorf("failed to marshal request: %w", err) + } + + req, err := http.NewRequest("POST", a.baseURL+"/chat/completions", bytes.NewReader(jsonBody)) + if err != nil { + close(results) + return fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", "Bearer "+a.apiKey) + req.Header.Set("Accept", "text/event-stream") + + go func() { + defer close(results) + + resp, err := http.DefaultClient.Do(req) + if err != nil { + fmt.Fprintf(os.Stderr, "avian: stream request failed: %v\n", err) + return + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + fmt.Fprintf(os.Stderr, "avian: API returned status %d: %s\n", resp.StatusCode, string(body)) + return + } + + // Read SSE stream + buf := make([]byte, 4096) + var lineBuf strings.Builder + + for { + n, err := resp.Body.Read(buf) + if n > 0 { + lineBuf.Write(buf[:n]) + + // Process complete lines + for { + text := lineBuf.String() + idx := strings.Index(text, "\n") + if idx < 0 { + break + } + + line := strings.TrimSpace(text[:idx]) + lineBuf.Reset() + lineBuf.WriteString(text[idx+1:]) + + if line == "" || line == "data: [DONE]" { + continue + } + + if !strings.HasPrefix(line, "data: ") { + continue + } + + data := strings.TrimPrefix(line, "data: ") + + var chunk streamChunk + if jsonErr := json.Unmarshal([]byte(data), &chunk); jsonErr != nil { + continue + } + + if len(chunk.Choices) > 0 && chunk.Choices[0].Delta.Content != "" { + results <- chunk.Choices[0].Delta.Content + } + } + } + + if err != nil { + if err != io.EOF { + fmt.Fprintf(os.Stderr, "avian: stream read error: %v\n", err) + } + break + } + } + }() + + return nil +} diff --git a/backend/go/avian/main.go b/backend/go/avian/main.go new file mode 100644 index 000000000000..84ccc65bf1b4 --- /dev/null +++ b/backend/go/avian/main.go @@ -0,0 +1,22 @@ +package main + +// Avian backend - proxies requests to the Avian API (https://api.avian.io/v1) +// Avian provides an OpenAI-compatible API for LLM inference. + +import ( + "flag" + + grpc "github.com/mudler/LocalAI/pkg/grpc" +) + +var ( + addr = flag.String("addr", "localhost:50051", "the address to connect to") +) + +func main() { + flag.Parse() + + if err := grpc.StartServer(*addr, &Avian{}); err != nil { + panic(err) + } +} diff --git a/backend/go/avian/package.sh b/backend/go/avian/package.sh new file mode 100644 index 000000000000..4f4d570549cb --- /dev/null +++ b/backend/go/avian/package.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +# Script to copy the appropriate libraries based on architecture +# This script is used in the final stage of the Dockerfile + +set -e + +CURDIR=$(dirname "$(realpath $0)") + +mkdir -p $CURDIR/package +cp -avf $CURDIR/avian $CURDIR/package/ +cp -rfv $CURDIR/run.sh $CURDIR/package/ diff --git a/backend/go/avian/run.sh b/backend/go/avian/run.sh new file mode 100644 index 000000000000..9eb98a0d69fe --- /dev/null +++ b/backend/go/avian/run.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -ex + +CURDIR=$(dirname "$(realpath $0)") + +exec $CURDIR/avian "$@" diff --git a/backend/index.yaml b/backend/index.yaml index e518170ca680..0d092246da48 100644 --- a/backend/index.yaml +++ b/backend/index.yaml @@ -697,6 +697,25 @@ - LLM - huggingface license: MIT +- &avian + name: "avian" + uri: "quay.io/go-skynet/local-ai-backends:latest-avian" + mirrors: + - localai/localai-backends:latest-avian + icon: https://avian.io/assets/avian-logo.svg + urls: + - https://avian.io + - https://api.avian.io/v1 + description: | + Avian is a cloud LLM inference provider with an OpenAI-compatible API. + Supports models like DeepSeek-V3.2, Kimi-K2.5, GLM-5, and MiniMax-M2.5. + Requires AVIAN_API_KEY environment variable. + tags: + - LLM + - cloud + - API + - avian + license: proprietary - &kitten-tts name: "kitten-tts" uri: "quay.io/go-skynet/local-ai-backends:latest-kitten-tts" @@ -2703,3 +2722,24 @@ uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-voxtral" mirrors: - localai/localai-backends:master-metal-darwin-arm64-voxtral +## avian +- !!merge <<: *avian + name: "cpu-avian" + uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-avian" + mirrors: + - localai/localai-backends:latest-cpu-avian +- !!merge <<: *avian + name: "cpu-avian-development" + uri: "quay.io/go-skynet/local-ai-backends:master-cpu-avian" + mirrors: + - localai/localai-backends:master-cpu-avian +- !!merge <<: *avian + name: "metal-avian" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-avian" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-avian +- !!merge <<: *avian + name: "metal-avian-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-avian" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-avian diff --git a/gallery/avian.yaml b/gallery/avian.yaml new file mode 100644 index 000000000000..af5e115a8036 --- /dev/null +++ b/gallery/avian.yaml @@ -0,0 +1,18 @@ +--- +name: "avian" + +config_file: | + backend: avian + parameters: + max_tokens: 8192 + known_usecases: + - FLAG_CHAT + - FLAG_COMPLETION + function: + disable_no_action: true + grammar: + disable: true + parallel_calls: true + expect_strings_after_json: true + template: + use_tokenizer_template: true diff --git a/gallery/index.yaml b/gallery/index.yaml index 25eac7020fe6..63396bd18db2 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -17492,3 +17492,109 @@ pipeline_type: LTX2ImageToVideoPipeline options: - torch_dtype:bf16 +## Avian (remote API provider) +- &avian-deepseek-v3 + name: "avian-deepseek-v3.2" + url: "github:mudler/LocalAI/gallery/avian.yaml@master" + urls: + - https://avian.io + - https://api.avian.io/v1 + icon: https://avian.io/assets/avian-logo.svg + license: proprietary + description: | + DeepSeek-V3.2 hosted on Avian. 164K context, 65K output tokens. + Pricing: $0.26/$0.38 per 1M tokens (input/output). + Requires AVIAN_API_KEY environment variable. + tags: + - llm + - api + - cloud + - avian + - deepseek + - chat + overrides: + parameters: + model: deepseek/deepseek-v3.2 + context_size: 164000 + known_usecases: + - FLAG_CHAT + - FLAG_COMPLETION +- &avian-kimi-k2 + name: "avian-kimi-k2.5" + url: "github:mudler/LocalAI/gallery/avian.yaml@master" + urls: + - https://avian.io + - https://api.avian.io/v1 + icon: https://avian.io/assets/avian-logo.svg + license: proprietary + description: | + Moonshot AI Kimi-K2.5 hosted on Avian. 131K context, 8K output tokens. + Pricing: $0.45/$2.20 per 1M tokens (input/output). + Requires AVIAN_API_KEY environment variable. + tags: + - llm + - api + - cloud + - avian + - kimi + - chat + overrides: + parameters: + model: moonshotai/kimi-k2.5 + context_size: 131000 + known_usecases: + - FLAG_CHAT + - FLAG_COMPLETION +- &avian-glm-5 + name: "avian-glm-5" + url: "github:mudler/LocalAI/gallery/avian.yaml@master" + urls: + - https://avian.io + - https://api.avian.io/v1 + icon: https://avian.io/assets/avian-logo.svg + license: proprietary + description: | + Z-AI GLM-5 hosted on Avian. 131K context, 16K output tokens. + Pricing: $0.30/$2.55 per 1M tokens (input/output). + Requires AVIAN_API_KEY environment variable. + tags: + - llm + - api + - cloud + - avian + - glm + - chat + overrides: + parameters: + model: z-ai/glm-5 + context_size: 131000 + known_usecases: + - FLAG_CHAT + - FLAG_COMPLETION +- &avian-minimax-m2 + name: "avian-minimax-m2.5" + url: "github:mudler/LocalAI/gallery/avian.yaml@master" + urls: + - https://avian.io + - https://api.avian.io/v1 + icon: https://avian.io/assets/avian-logo.svg + license: proprietary + description: | + MiniMax M2.5 hosted on Avian. 1M context, 1M output tokens. + Pricing: $0.30/$1.10 per 1M tokens (input/output). + Requires AVIAN_API_KEY environment variable. + tags: + - llm + - api + - cloud + - avian + - minimax + - chat + - long-context + overrides: + parameters: + model: minimax/minimax-m2.5 + context_size: 1000000 + known_usecases: + - FLAG_CHAT + - FLAG_COMPLETION