From ac4e329eb5ff422d83466bfc1be3068056397a98 Mon Sep 17 00:00:00 2001
From: Neil Ruaro <neil.ruaro@camb.ai>
Date: Tue, 3 Mar 2026 15:40:16 +0800
Subject: [PATCH 1/2] feat: add CAMB AI API compatibility layer

Add CAMB AI-compatible API endpoints to LocalAI, enabling apps using the
CAMB AI SDK/API to use LocalAI as a drop-in local replacement. Follows
the existing ElevenLabs integration pattern (schema structs, endpoint
handlers, route registration). Includes e2e tests covering all endpoints
via the mock backend.
---
 core/http/app.go                              |   1 +
 .../http/endpoints/cambai/audio_separation.go |  17 +
 .../http/endpoints/cambai/sound_generation.go |  92 ++++++
 core/http/endpoints/cambai/transcription.go   |  85 +++++
 core/http/endpoints/cambai/translation.go     | 191 ++++++++++++
 core/http/endpoints/cambai/tts.go             | 130 ++++++++
 core/http/endpoints/cambai/voice.go           |  93 ++++++
 core/http/routes/cambai.go                    |  72 +++++
 core/schema/cambai.go                         | 290 ++++++++++++++++++
 tests/e2e/cambai_test.go                      | 275 +++++++++++++++++
 tests/e2e/e2e_suite_test.go                   |   7 +
 11 files changed, 1253 insertions(+)
 create mode 100644 core/http/endpoints/cambai/audio_separation.go
 create mode 100644 core/http/endpoints/cambai/sound_generation.go
 create mode 100644 core/http/endpoints/cambai/transcription.go
 create mode 100644 core/http/endpoints/cambai/translation.go
 create mode 100644 core/http/endpoints/cambai/tts.go
 create mode 100644 core/http/endpoints/cambai/voice.go
 create mode 100644 core/http/routes/cambai.go
 create mode 100644 core/schema/cambai.go
 create mode 100644 tests/e2e/cambai_test.go

diff --git a/core/http/app.go b/core/http/app.go
index 437d524135f0..e5a985e5789f 100644
--- a/core/http/app.go
+++ b/core/http/app.go
@@ -215,6 +215,7 @@ func API(application *application.Application) (*echo.Echo, error) {
 	requestExtractor := httpMiddleware.NewRequestExtractor(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
 
 	routes.RegisterElevenLabsRoutes(e, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
+	routes.RegisterCambAIRoutes(e, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
 
 	// Create opcache for tracking UI operations (used by both UI and LocalAI routes)
 	var opcache *services.OpCache
diff --git a/core/http/endpoints/cambai/audio_separation.go b/core/http/endpoints/cambai/audio_separation.go
new file mode 100644
index 000000000000..b51bef607418
--- /dev/null
+++ b/core/http/endpoints/cambai/audio_separation.go
@@ -0,0 +1,17 @@
+package cambai
+
+import (
+	"net/http"
+
+	"github.com/labstack/echo/v4"
+	"github.com/mudler/LocalAI/core/schema"
+)
+
+// AudioSeparationEndpoint returns 501 Not Implemented for audio separation.
+func AudioSeparationEndpoint() echo.HandlerFunc {
+	return func(c echo.Context) error {
+		return c.JSON(http.StatusNotImplemented, schema.CambAIErrorResponse{
+			Detail: "Audio separation is not currently supported. No backend available.",
+		})
+	}
+}
diff --git a/core/http/endpoints/cambai/sound_generation.go b/core/http/endpoints/cambai/sound_generation.go
new file mode 100644
index 000000000000..650fd5894ddd
--- /dev/null
+++ b/core/http/endpoints/cambai/sound_generation.go
@@ -0,0 +1,92 @@
+package cambai
+
+import (
+	"net/http"
+	"path/filepath"
+
+	"github.com/google/uuid"
+	"github.com/labstack/echo/v4"
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/http/middleware"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/audio"
+	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/xlog"
+)
+
+// SoundGenerationEndpoint handles CAMB AI text-to-sound (POST /apis/text-to-sound).
+func SoundGenerationEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
+	return func(c echo.Context) error {
+		input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.CambAITextToSoundRequest)
+		if !ok {
+			return echo.ErrBadRequest
+		}
+
+		cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
+		if !ok || cfg == nil {
+			return echo.ErrBadRequest
+		}
+
+		xlog.Debug("CAMB AI text-to-sound request received", "model", input.Model)
+
+		filePath, _, err := backend.SoundGeneration(
+			input.Prompt, input.Duration, nil, nil,
+			nil, nil,
+			nil, "", "", nil, "",
+			"", "",
+			nil,
+			ml, appConfig, *cfg)
+		if err != nil {
+			return err
+		}
+
+		filePath, contentType := audio.NormalizeAudioFile(filePath)
+
+		taskID := uuid.New().String()
+
+		// Return audio file directly with task metadata headers
+		c.Response().Header().Set("X-Task-ID", taskID)
+		c.Response().Header().Set("X-Task-Status", "SUCCESS")
+		if contentType != "" {
+			c.Response().Header().Set("Content-Type", contentType)
+		}
+		return c.Attachment(filePath, filepath.Base(filePath))
+	}
+}
+
+// SoundGenerationAsyncEndpoint returns results in CAMB AI async task format.
+func SoundGenerationAsyncEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
+	return func(c echo.Context) error {
+		input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.CambAITextToSoundRequest)
+		if !ok {
+			return echo.ErrBadRequest
+		}
+
+		cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
+		if !ok || cfg == nil {
+			return echo.ErrBadRequest
+		}
+
+		xlog.Debug("CAMB AI text-to-sound async request received", "model", input.Model)
+
+		_, _, err := backend.SoundGeneration(
+			input.Prompt, input.Duration, nil, nil,
+			nil, nil,
+			nil, "", "", nil, "",
+			"", "",
+			nil,
+			ml, appConfig, *cfg)
+		if err != nil {
+			return err
+		}
+
+		taskID := uuid.New().String()
+
+		return c.JSON(http.StatusOK, schema.CambAITaskResponse{
+			TaskID: taskID,
+			Status: "SUCCESS",
+			RunID:  taskID,
+		})
+	}
+}
diff --git a/core/http/endpoints/cambai/transcription.go b/core/http/endpoints/cambai/transcription.go
new file mode 100644
index 000000000000..2b633f44485a
--- /dev/null
+++ b/core/http/endpoints/cambai/transcription.go
@@ -0,0 +1,85 @@
+package cambai
+
+import (
+	"io"
+	"net/http"
+	"os"
+	"path"
+	"path/filepath"
+
+	"github.com/google/uuid"
+	"github.com/labstack/echo/v4"
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/http/middleware"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/xlog"
+)
+
+// TranscriptionEndpoint handles CAMB AI transcription (POST /apis/transcribe).
+// Runs synchronously but returns results in CAMB AI's async task format.
+func TranscriptionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
+	return func(c echo.Context) error {
+		cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
+		if !ok || cfg == nil {
+			return echo.ErrBadRequest
+		}
+
+		input, _ := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.CambAITranscriptionRequest)
+
+		language := ""
+		if input != nil && input.LanguageID > 0 {
+			language = schema.CambAILanguageCodeFromID(input.LanguageID)
+		}
+
+		file, err := c.FormFile("file")
+		if err != nil {
+			return c.JSON(http.StatusBadRequest, schema.CambAIErrorResponse{
+				Detail: "Audio file is required. Upload as multipart form field 'file'.",
+			})
+		}
+
+		f, err := file.Open()
+		if err != nil {
+			return err
+		}
+		defer f.Close()
+
+		dir, err := os.MkdirTemp("", "cambai-transcribe")
+		if err != nil {
+			return err
+		}
+		defer os.RemoveAll(dir)
+
+		dst := filepath.Join(dir, path.Base(file.Filename))
+		dstFile, err := os.Create(dst)
+		if err != nil {
+			return err
+		}
+
+		if _, err := io.Copy(dstFile, f); err != nil {
+			xlog.Debug("Audio file copying error", "filename", file.Filename, "dst", dst, "error", err)
+			return err
+		}
+		dstFile.Close()
+
+		xlog.Debug("CAMB AI transcription request", "file", dst, "language", language)
+
+		tr, err := backend.ModelTranscription(dst, language, false, false, "", ml, *cfg, appConfig)
+		if err != nil {
+			return err
+		}
+
+		taskID := uuid.New().String()
+
+		return c.JSON(http.StatusOK, schema.CambAITaskStatusResponse{
+			Status: "SUCCESS",
+			RunID:  taskID,
+			Output: schema.CambAITranscriptionResponse{
+				Text:     tr.Text,
+				Language: language,
+			},
+		})
+	}
+}
diff --git a/core/http/endpoints/cambai/translation.go b/core/http/endpoints/cambai/translation.go
new file mode 100644
index 000000000000..d62b41fca078
--- /dev/null
+++ b/core/http/endpoints/cambai/translation.go
@@ -0,0 +1,191 @@
+package cambai
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"strings"
+
+	"github.com/google/uuid"
+	"github.com/labstack/echo/v4"
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/http/middleware"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/xlog"
+)
+
+func buildTranslationPrompt(text, sourceLang, targetLang string) string {
+	return fmt.Sprintf(
+		"Translate the following text from %s to %s. Output ONLY the translation, nothing else.\n\n%s",
+		sourceLang, targetLang, text,
+	)
+}
+
+// TranslationEndpoint handles CAMB AI translation (POST /apis/translate).
+// Uses an LLM chat backend to perform translation.
+func TranslationEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
+	return func(c echo.Context) error {
+		input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.CambAITranslationRequest)
+		if !ok {
+			return echo.ErrBadRequest
+		}
+
+		cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
+		if !ok || cfg == nil {
+			return echo.ErrBadRequest
+		}
+
+		xlog.Debug("CAMB AI translation request received", "model", input.Model)
+
+		sourceLang := schema.CambAILanguageCodeFromID(input.SourceLanguageID)
+		targetLang := schema.CambAILanguageCodeFromID(input.TargetLanguageID)
+
+		var translations []string
+		for _, text := range input.Texts {
+			prompt := buildTranslationPrompt(text, sourceLang, targetLang)
+
+			fn, err := backend.ModelInference(
+				c.Request().Context(), prompt, nil, nil, nil, nil,
+				ml, cfg, cl, appConfig, nil, "", "", nil, nil, nil,
+			)
+			if err != nil {
+				return err
+			}
+
+			resp, err := fn()
+			if err != nil {
+				return err
+			}
+
+			translations = append(translations, strings.TrimSpace(resp.Response))
+		}
+
+		taskID := uuid.New().String()
+
+		return c.JSON(http.StatusOK, schema.CambAITaskStatusResponse{
+			Status: "SUCCESS",
+			RunID:  taskID,
+			Output: schema.CambAITranslationResponse{
+				Translation: translations,
+				SourceLang:  input.SourceLanguageID,
+				TargetLang:  input.TargetLanguageID,
+			},
+		})
+	}
+}
+
+// TranslationStreamEndpoint handles CAMB AI streaming translation (POST /apis/translation/stream).
+func TranslationStreamEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
+	return func(c echo.Context) error {
+		input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.CambAITranslationStreamRequest)
+		if !ok {
+			return echo.ErrBadRequest
+		}
+
+		cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
+		if !ok || cfg == nil {
+			return echo.ErrBadRequest
+		}
+
+		xlog.Debug("CAMB AI translation stream request received", "model", input.Model)
+
+		sourceLang := schema.CambAILanguageCodeFromID(input.SourceLanguageID)
+		targetLang := schema.CambAILanguageCodeFromID(input.TargetLanguageID)
+		prompt := buildTranslationPrompt(input.Text, sourceLang, targetLang)
+
+		c.Response().Header().Set("Content-Type", "text/plain; charset=utf-8")
+		c.Response().Header().Set("Transfer-Encoding", "chunked")
+		c.Response().Header().Set("Cache-Control", "no-cache")
+		c.Response().Header().Set("Connection", "keep-alive")
+
+		fn, err := backend.ModelInference(
+			context.Background(), prompt, nil, nil, nil, nil,
+			ml, cfg, cl, appConfig,
+			func(token string, _ backend.TokenUsage) bool {
+				_, writeErr := c.Response().Write([]byte(token))
+				if writeErr != nil {
+					return true
+				}
+				c.Response().Flush()
+				return true
+			},
+			"", "", nil, nil, nil,
+		)
+		if err != nil {
+			return err
+		}
+
+		// Call fn to complete inference
+		_, err = fn()
+		return err
+	}
+}
+
+// TranslatedTTSEndpoint handles CAMB AI translated TTS (POST /apis/translated-tts).
+// First translates text via LLM, then synthesizes speech from the translation.
+func TranslatedTTSEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
+	return func(c echo.Context) error {
+		input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.CambAITranslatedTTSRequest)
+		if !ok {
+			return echo.ErrBadRequest
+		}
+
+		cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
+		if !ok || cfg == nil {
+			return echo.ErrBadRequest
+		}
+
+		xlog.Debug("CAMB AI translated TTS request received", "model", input.Model)
+
+		sourceLang := schema.CambAILanguageCodeFromID(input.SourceLanguageID)
+		targetLang := schema.CambAILanguageCodeFromID(input.TargetLanguageID)
+		prompt := buildTranslationPrompt(input.Text, sourceLang, targetLang)
+
+		// Step 1: Translate
+		fn, err := backend.ModelInference(
+			c.Request().Context(), prompt, nil, nil, nil, nil,
+			ml, cfg, cl, appConfig, nil, "", "", nil, nil, nil,
+		)
+		if err != nil {
+			return err
+		}
+
+		resp, err := fn()
+		if err != nil {
+			return err
+		}
+
+		translatedText := strings.TrimSpace(resp.Response)
+
+		// Step 2: TTS on translated text
+		// Find a TTS model from config
+		ttsConfigs := cl.GetModelConfigsByFilter(config.BuildUsecaseFilterFn(config.FLAG_TTS))
+		if len(ttsConfigs) == 0 {
+			return c.JSON(http.StatusServiceUnavailable, schema.CambAIErrorResponse{
+				Detail: "No TTS model configured. Configure a TTS model to use translated TTS.",
+			})
+		}
+		ttsCfg := ttsConfigs[0]
+
+		voice := fmt.Sprintf("%d", input.VoiceID)
+		language := targetLang
+
+		filePath, _, err := backend.ModelTTS(translatedText, voice, language, ml, appConfig, ttsCfg)
+		if err != nil {
+			return err
+		}
+
+		taskID := uuid.New().String()
+
+		return c.JSON(http.StatusOK, schema.CambAITaskStatusResponse{
+			Status: "SUCCESS",
+			RunID:  taskID,
+			Output: map[string]string{
+				"translation": translatedText,
+				"audio_path":  filePath,
+			},
+		})
+	}
+}
diff --git a/core/http/endpoints/cambai/tts.go b/core/http/endpoints/cambai/tts.go
new file mode 100644
index 000000000000..bc4786ddf90d
--- /dev/null
+++ b/core/http/endpoints/cambai/tts.go
@@ -0,0 +1,130 @@
+package cambai
+
+import (
+	"fmt"
+	"net/http"
+	"path/filepath"
+	"sync"
+
+	"github.com/google/uuid"
+	"github.com/labstack/echo/v4"
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/http/middleware"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/audio"
+	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/xlog"
+)
+
+// ttsTaskResults stores results of async TTS tasks keyed by task ID.
+var ttsTaskResults = sync.Map{}
+
+// TTSStreamEndpoint handles CAMB AI streaming TTS (POST /apis/tts-stream).
+func TTSStreamEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
+	return func(c echo.Context) error {
+		input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.CambAITTSStreamRequest)
+		if !ok || input.SpeechModel == "" || input.Text == "" {
+			return echo.ErrBadRequest
+		}
+
+		cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
+		if !ok || cfg == nil {
+			return echo.ErrBadRequest
+		}
+
+		xlog.Debug("CAMB AI TTS stream request received", "model", input.SpeechModel)
+
+		voice := fmt.Sprintf("%d", input.VoiceID)
+		language := input.Language
+
+		c.Response().Header().Set("Content-Type", "audio/wav")
+		c.Response().Header().Set("Transfer-Encoding", "chunked")
+		c.Response().Header().Set("Cache-Control", "no-cache")
+		c.Response().Header().Set("Connection", "keep-alive")
+
+		err := backend.ModelTTSStream(input.Text, voice, language, ml, appConfig, *cfg, func(audioChunk []byte) error {
+			_, writeErr := c.Response().Write(audioChunk)
+			if writeErr != nil {
+				return writeErr
+			}
+			c.Response().Flush()
+			return nil
+		})
+		if err != nil {
+			// Fallback to non-streaming TTS
+			xlog.Debug("Streaming TTS not supported, falling back to non-streaming", "error", err)
+			filePath, _, ttsErr := backend.ModelTTS(input.Text, voice, language, ml, appConfig, *cfg)
+			if ttsErr != nil {
+				return ttsErr
+			}
+			filePath, contentType := audio.NormalizeAudioFile(filePath)
+			if contentType != "" {
+				c.Response().Header().Set("Content-Type", contentType)
+			}
+			return c.Attachment(filePath, filepath.Base(filePath))
+		}
+
+		return nil
+	}
+}
+
+// TTSEndpoint handles CAMB AI async TTS (POST /apis/tts).
+func TTSEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
+	return func(c echo.Context) error {
+		input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.CambAITTSRequest)
+		if !ok {
+			return echo.ErrBadRequest
+		}
+
+		cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
+		if !ok || cfg == nil {
+			return echo.ErrBadRequest
+		}
+
+		xlog.Debug("CAMB AI TTS request received", "model", input.Model)
+
+		voice := fmt.Sprintf("%d", input.VoiceID)
+		language := schema.CambAILanguageCodeFromID(input.LanguageID)
+
+		filePath, _, err := backend.ModelTTS(input.Text, voice, language, ml, appConfig, *cfg)
+		if err != nil {
+			return err
+		}
+
+		taskID := uuid.New().String()
+		ttsTaskResults.Store(taskID, filePath)
+
+		return c.JSON(http.StatusOK, schema.CambAITaskResponse{
+			TaskID: taskID,
+			Status: "SUCCESS",
+			RunID:  taskID,
+		})
+	}
+}
+
+// TTSTaskStatusEndpoint handles polling for async TTS results (GET /apis/tts/:task_id).
+func TTSTaskStatusEndpoint() echo.HandlerFunc {
+	return func(c echo.Context) error {
+		taskID := c.Param("task_id")
+		result, ok := ttsTaskResults.Load(taskID)
+		if !ok {
+			return c.JSON(http.StatusNotFound, schema.CambAIErrorResponse{
+				Detail: "Task not found",
+			})
+		}
+
+		filePath, ok := result.(string)
+		if !ok {
+			return c.JSON(http.StatusInternalServerError, schema.CambAIErrorResponse{
+				Detail: "Invalid task result",
+			})
+		}
+
+		filePath, contentType := audio.NormalizeAudioFile(filePath)
+		if contentType != "" {
+			c.Response().Header().Set("Content-Type", contentType)
+		}
+		return c.Attachment(filePath, filepath.Base(filePath))
+	}
+}
diff --git a/core/http/endpoints/cambai/voice.go b/core/http/endpoints/cambai/voice.go
new file mode 100644
index 000000000000..577dbc2f73d4
--- /dev/null
+++ b/core/http/endpoints/cambai/voice.go
@@ -0,0 +1,93 @@
+package cambai
+
+import (
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"path/filepath"
+
+	"github.com/labstack/echo/v4"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/xlog"
+)
+
+// ListVoicesEndpoint handles CAMB AI list voices (GET /apis/list-voices).
+func ListVoicesEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
+	return func(c echo.Context) error {
+		ttsConfigs := cl.GetModelConfigsByFilter(config.BuildUsecaseFilterFn(config.FLAG_TTS))
+
+		voices := make([]schema.CambAIVoice, 0)
+		for i, cfg := range ttsConfigs {
+			voice := schema.CambAIVoice{
+				VoiceID: i + 1,
+				Name:    cfg.Name,
+			}
+			if cfg.Voice != "" {
+				voice.Name = fmt.Sprintf("%s (%s)", cfg.Name, cfg.Voice)
+			}
+			voices = append(voices, voice)
+		}
+
+		return c.JSON(http.StatusOK, schema.CambAIListVoicesResponse{
+			Voices: voices,
+		})
+	}
+}
+
+// CreateCustomVoiceEndpoint handles CAMB AI custom voice creation (POST /apis/create-custom-voice).
+// Accepts an audio file upload and saves it for voice cloning.
+func CreateCustomVoiceEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
+	return func(c echo.Context) error {
+		voiceName := c.FormValue("voice_name")
+		if voiceName == "" {
+			return c.JSON(http.StatusBadRequest, schema.CambAIErrorResponse{
+				Detail: "voice_name is required",
+			})
+		}
+
+		file, err := c.FormFile("file")
+		if err != nil {
+			return c.JSON(http.StatusBadRequest, schema.CambAIErrorResponse{
+				Detail: "Audio file is required. Upload as multipart form field 'file'.",
+			})
+		}
+
+		f, err := file.Open()
+		if err != nil {
+			return err
+		}
+		defer f.Close()
+
+		// Save audio file to models directory for voice cloning
+		voiceDir := filepath.Join(ml.ModelPath, "voices")
+		if err := os.MkdirAll(voiceDir, 0750); err != nil {
+			return err
+		}
+
+		ext := filepath.Ext(file.Filename)
+		if ext == "" {
+			ext = ".wav"
+		}
+		dstPath := filepath.Join(voiceDir, voiceName+ext)
+
+		dst, err := os.Create(dstPath)
+		if err != nil {
+			return err
+		}
+		defer dst.Close()
+
+		if _, err := io.Copy(dst, f); err != nil {
+			return err
+		}
+
+		xlog.Info("Custom voice audio saved", "name", voiceName, "path", dstPath)
+
+		return c.JSON(http.StatusOK, schema.CambAIVoice{
+			VoiceID: 0,
+			Name:    voiceName,
+		})
+	}
+}
diff --git a/core/http/routes/cambai.go b/core/http/routes/cambai.go
new file mode 100644
index 000000000000..73246849f3b5
--- /dev/null
+++ b/core/http/routes/cambai.go
@@ -0,0 +1,72 @@
+package routes
+
+import (
+	"github.com/labstack/echo/v4"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/http/endpoints/cambai"
+	"github.com/mudler/LocalAI/core/http/middleware"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/model"
+)
+
+func RegisterCambAIRoutes(app *echo.Echo,
+	re *middleware.RequestExtractor,
+	cl *config.ModelConfigLoader,
+	ml *model.ModelLoader,
+	appConfig *config.ApplicationConfig) {
+
+	// TTS streaming (POST /apis/tts-stream)
+	app.POST("/apis/tts-stream",
+		cambai.TTSStreamEndpoint(cl, ml, appConfig),
+		re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TTS)),
+		re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.CambAITTSStreamRequest) }))
+
+	// TTS async (POST /apis/tts)
+	app.POST("/apis/tts",
+		cambai.TTSEndpoint(cl, ml, appConfig),
+		re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TTS)),
+		re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.CambAITTSRequest) }))
+
+	// TTS task status (GET /apis/tts/:task_id)
+	app.GET("/apis/tts/:task_id", cambai.TTSTaskStatusEndpoint())
+
+	// Translated TTS (POST /apis/translated-tts)
+	app.POST("/apis/translated-tts",
+		cambai.TranslatedTTSEndpoint(cl, ml, appConfig),
+		re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)),
+		re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.CambAITranslatedTTSRequest) }))
+
+	// Translation (POST /apis/translate)
+	app.POST("/apis/translate",
+		cambai.TranslationEndpoint(cl, ml, appConfig),
+		re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)),
+		re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.CambAITranslationRequest) }))
+
+	// Translation streaming (POST /apis/translation/stream)
+	app.POST("/apis/translation/stream",
+		cambai.TranslationStreamEndpoint(cl, ml, appConfig),
+		re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)),
+		re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.CambAITranslationStreamRequest) }))
+
+	// Transcription (POST /apis/transcribe)
+	app.POST("/apis/transcribe",
+		cambai.TranscriptionEndpoint(cl, ml, appConfig),
+		re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TRANSCRIPT)),
+		re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.CambAITranscriptionRequest) }))
+
+	// Text-to-sound (POST /apis/text-to-sound)
+	app.POST("/apis/text-to-sound",
+		cambai.SoundGenerationEndpoint(cl, ml, appConfig),
+		re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_SOUND_GENERATION)),
+		re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.CambAITextToSoundRequest) }))
+
+	// List voices (GET /apis/list-voices)
+	app.GET("/apis/list-voices", cambai.ListVoicesEndpoint(cl, ml, appConfig))
+
+	// Create custom voice (POST /apis/create-custom-voice)
+	app.POST("/apis/create-custom-voice",
+		cambai.CreateCustomVoiceEndpoint(cl, ml, appConfig))
+
+	// Audio separation stub (POST /apis/audio-separation)
+	app.POST("/apis/audio-separation", cambai.AudioSeparationEndpoint())
+}
diff --git a/core/schema/cambai.go b/core/schema/cambai.go
new file mode 100644
index 000000000000..09380ba36455
--- /dev/null
+++ b/core/schema/cambai.go
@@ -0,0 +1,290 @@
+package schema
+
+import "fmt"
+
+// CambAI TTS streaming request (POST /apis/tts-stream)
+type CambAITTSStreamRequest struct {
+	Text                string                        `json:"text"`
+	VoiceID             int                           `json:"voice_id"`
+	Language            string                        `json:"language"`
+	SpeechModel         string                        `json:"speech_model"`
+	OutputConfiguration *CambAIOutputConfiguration    `json:"output_configuration,omitempty"`
+	InferenceOptions    *CambAITTSInferenceOptions    `json:"inference_options,omitempty"`
+}
+
+type CambAIOutputConfiguration struct {
+	Format     string `json:"format,omitempty"`
+	SampleRate int    `json:"sample_rate,omitempty"`
+}
+
+type CambAITTSInferenceOptions struct {
+	Speed       *float32 `json:"speed,omitempty"`
+	Pitch       *float32 `json:"pitch,omitempty"`
+	Temperature *float32 `json:"temperature,omitempty"`
+}
+
+func (r *CambAITTSStreamRequest) ModelName(s *string) string {
+	if s != nil {
+		r.SpeechModel = *s
+	}
+	return r.SpeechModel
+}
+
+// CambAI async TTS request (POST /apis/tts)
+type CambAITTSRequest struct {
+	Text       string `json:"text"`
+	VoiceID    int    `json:"voice_id"`
+	LanguageID int    `json:"language"`
+	Model      string `json:"model,omitempty"`
+}
+
+func (r *CambAITTSRequest) ModelName(s *string) string {
+	if s != nil {
+		r.Model = *s
+	}
+	return r.Model
+}
+
+// CambAI translated TTS request (POST /apis/translated-tts)
+type CambAITranslatedTTSRequest struct {
+	Text             string `json:"text"`
+	VoiceID          int    `json:"voice_id"`
+	SourceLanguageID int    `json:"source_language"`
+	TargetLanguageID int    `json:"target_language"`
+	Model            string `json:"model,omitempty"`
+}
+
+func (r *CambAITranslatedTTSRequest) ModelName(s *string) string {
+	if s != nil {
+		r.Model = *s
+	}
+	return r.Model
+}
+
+// CambAI translation request (POST /apis/translate)
+type CambAITranslationRequest struct {
+	Texts            []string `json:"texts"`
+	SourceLanguageID int      `json:"source_language"`
+	TargetLanguageID int      `json:"target_language"`
+	Model            string   `json:"model,omitempty"`
+}
+
+func (r *CambAITranslationRequest) ModelName(s *string) string {
+	if s != nil {
+		r.Model = *s
+	}
+	return r.Model
+}
+
+// CambAI translation stream request (POST /apis/translation/stream)
+type CambAITranslationStreamRequest struct {
+	Text             string `json:"text"`
+	SourceLanguageID int    `json:"source_language"`
+	TargetLanguageID int    `json:"target_language"`
+	Model            string `json:"model,omitempty"`
+}
+
+func (r *CambAITranslationStreamRequest) ModelName(s *string) string {
+	if s != nil {
+		r.Model = *s
+	}
+	return r.Model
+}
+
+// CambAI transcription request (POST /apis/transcribe)
+type CambAITranscriptionRequest struct {
+	LanguageID int    `json:"language,omitempty"`
+	MediaURL   string `json:"media_url,omitempty"`
+	Model      string `json:"model,omitempty"`
+}
+
+func (r *CambAITranscriptionRequest) ModelName(s *string) string {
+	if s != nil {
+		r.Model = *s
+	}
+	return r.Model
+}
+
+// CambAI text-to-sound request (POST /apis/text-to-sound)
+type CambAITextToSoundRequest struct {
+	Prompt   string   `json:"prompt"`
+	Duration *float32 `json:"duration,omitempty"`
+	Model    string   `json:"model,omitempty"`
+}
+
+func (r *CambAITextToSoundRequest) ModelName(s *string) string {
+	if s != nil {
+		r.Model = *s
+	}
+	return r.Model
+}
+
+// CambAI create custom voice request (POST /apis/create-custom-voice)
+type CambAICreateCustomVoiceRequest struct {
+	VoiceName string `json:"voice_name"`
+	Model     string `json:"model,omitempty"`
+}
+
+func (r *CambAICreateCustomVoiceRequest) ModelName(s *string) string {
+	if s != nil {
+		r.Model = *s
+	}
+	return r.Model
+}
+
+// Response types
+
+type CambAITaskResponse struct {
+	TaskID string `json:"task_id"`
+	Status string `json:"status"`
+	RunID  string `json:"run_id,omitempty"`
+}
+
+type CambAITaskStatusResponse struct {
+	Status string `json:"status"`
+	RunID  string `json:"run_id,omitempty"`
+	Output any    `json:"output,omitempty"`
+}
+
+type CambAIVoice struct {
+	VoiceID int    `json:"voice_id"`
+	Name    string `json:"voice_name"`
+	Gender  string `json:"gender,omitempty"`
+	Age     string `json:"age,omitempty"`
+}
+
+type CambAIListVoicesResponse struct {
+	Voices []CambAIVoice `json:"voices"`
+}
+
+type CambAIErrorResponse struct {
+	Detail string `json:"detail"`
+}
+
+type CambAITranslationResponse struct {
+	Translation []string `json:"translation"`
+	SourceLang  int      `json:"source_language"`
+	TargetLang  int      `json:"target_language"`
+}
+
+type CambAITranscriptionResponse struct {
+	Text     string `json:"text"`
+	Language string `json:"language,omitempty"`
+}
+
+// CambAILanguageIDToCode maps CAMB AI integer language IDs to BCP-47 codes.
+// This is a subset covering the most common languages.
+var CambAILanguageIDToCode = map[int]string{
+	1:   "en",
+	2:   "ko",
+	3:   "nl",
+	4:   "tr",
+	5:   "uk",
+	6:   "pl",
+	7:   "ta",
+	8:   "vi",
+	9:   "sv",
+	10:  "id",
+	11:  "ms",
+	12:  "ja",
+	13:  "zh",
+	14:  "bn",
+	15:  "th",
+	16:  "tl",
+	17:  "he",
+	18:  "pt-br",
+	19:  "pt",
+	20:  "ru",
+	21:  "ca",
+	22:  "te",
+	23:  "ml",
+	24:  "kn",
+	25:  "gu",
+	26:  "mr",
+	27:  "hi",
+	28:  "da",
+	29:  "fi",
+	30:  "no",
+	31:  "hu",
+	32:  "sk",
+	33:  "cs",
+	34:  "el",
+	35:  "ro",
+	36:  "bg",
+	37:  "sr",
+	38:  "hr",
+	39:  "sl",
+	40:  "mk",
+	41:  "et",
+	42:  "lt",
+	43:  "lv",
+	44:  "sw",
+	45:  "ar",
+	46:  "ur",
+	47:  "fa",
+	48:  "af",
+	49:  "my",
+	50:  "bs",
+	51:  "si",
+	52:  "ne",
+	53:  "km",
+	54:  "es",
+	55:  "cy",
+	56:  "is",
+	57:  "pa",
+	58:  "as",
+	59:  "ga",
+	60:  "am",
+	61:  "az",
+	62:  "uz",
+	63:  "ka",
+	64:  "sq",
+	65:  "mn",
+	66:  "la",
+	67:  "gl",
+	68:  "eu",
+	69:  "it",
+	70:  "de",
+	71:  "nn",
+	72:  "lo",
+	73:  "yo",
+	74:  "ig",
+	75:  "ha",
+	76:  "fr",
+	77:  "zu",
+	78:  "xh",
+	79:  "so",
+	80:  "mt",
+	81:  "eo",
+	82:  "jw",
+	83:  "su",
+	84:  "ps",
+	85:  "sd",
+	86:  "mg",
+	87:  "hy",
+	88:  "lb",
+	89:  "be",
+	90:  "tt",
+	91:  "tg",
+	92:  "ky",
+	93:  "tk",
+	94:  "ha",
+	95:  "sn",
+	96:  "ln",
+	97:  "rw",
+	98:  "ny",
+	99:  "ts",
+	100: "tn",
+	101: "st",
+	102: "ss",
+	103: "nd",
+	104: "ve",
+}
+
+// CambAILanguageCodeFromID converts a CAMB AI language ID to a BCP-47 code.
+func CambAILanguageCodeFromID(id int) string {
+	if code, ok := CambAILanguageIDToCode[id]; ok {
+		return code
+	}
+	return fmt.Sprintf("lang-%d", id)
+}
diff --git a/tests/e2e/cambai_test.go b/tests/e2e/cambai_test.go
new file mode 100644
index 000000000000..7bb30e5a703c
--- /dev/null
+++ b/tests/e2e/cambai_test.go
@@ -0,0 +1,275 @@
+package e2e_test
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/mudler/LocalAI/core/schema"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+// cambaiURL returns the base URL for CAMB AI endpoints (no /v1 prefix).
+func cambaiURL() string {
+	return fmt.Sprintf("http://127.0.0.1:%d", apiPort)
+}
+
+var _ = Describe("CAMB AI API Compatibility Tests", Label("CambAI"), func() {
+	var httpClient *http.Client
+
+	BeforeEach(func() {
+		httpClient = &http.Client{Timeout: 30 * time.Second}
+	})
+
+	Describe("TTS Streaming API", func() {
+		It("should stream audio from /apis/tts-stream", func() {
+			body := `{
+				"text": "Hello world from CAMB AI streaming",
+				"voice_id": 1,
+				"language": "en",
+				"speech_model": "mock-model"
+			}`
+			req, err := http.NewRequest("POST", cambaiURL()+"/apis/tts-stream", strings.NewReader(body))
+			Expect(err).ToNot(HaveOccurred())
+			req.Header.Set("Content-Type", "application/json")
+
+			resp, err := httpClient.Do(req)
+			Expect(err).ToNot(HaveOccurred())
+			defer resp.Body.Close()
+
+			Expect(resp.StatusCode).To(Equal(200))
+			Expect(resp.Header.Get("Content-Type")).To(HavePrefix("audio/"))
+
+			data, err := io.ReadAll(resp.Body)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(data)).To(BeNumerically(">", 0), "TTS stream response body should be non-empty")
+		})
+
+		It("should return 400 for empty request", func() {
+			body := `{}`
+			req, err := http.NewRequest("POST", cambaiURL()+"/apis/tts-stream", strings.NewReader(body))
+			Expect(err).ToNot(HaveOccurred())
+			req.Header.Set("Content-Type", "application/json")
+
+			resp, err := httpClient.Do(req)
+			Expect(err).ToNot(HaveOccurred())
+			defer resp.Body.Close()
+
+			// Should fail because text is empty
+			Expect(resp.StatusCode).To(BeNumerically(">=", 400))
+		})
+	})
+
+	Describe("TTS Async API", func() {
+		It("should return a task response from /apis/tts", func() {
+			body := `{
+				"text": "Hello from async TTS",
+				"voice_id": 1,
+				"language": 1
+			}`
+			req, err := http.NewRequest("POST", cambaiURL()+"/apis/tts", strings.NewReader(body))
+			Expect(err).ToNot(HaveOccurred())
+			req.Header.Set("Content-Type", "application/json")
+
+			resp, err := httpClient.Do(req)
+			Expect(err).ToNot(HaveOccurred())
+			defer resp.Body.Close()
+
+			Expect(resp.StatusCode).To(Equal(200))
+
+			var taskResp schema.CambAITaskResponse
+			err = json.NewDecoder(resp.Body).Decode(&taskResp)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(taskResp.TaskID).ToNot(BeEmpty())
+			Expect(taskResp.Status).To(Equal("SUCCESS"))
+		})
+
+		It("should return audio when polling task status", func() {
+			// First create a TTS task
+			body := `{
+				"text": "Task polling test",
+				"voice_id": 1,
+				"language": 1
+			}`
+			req, err := http.NewRequest("POST", cambaiURL()+"/apis/tts", strings.NewReader(body))
+			Expect(err).ToNot(HaveOccurred())
+			req.Header.Set("Content-Type", "application/json")
+
+			resp, err := httpClient.Do(req)
+			Expect(err).ToNot(HaveOccurred())
+			defer resp.Body.Close()
+			Expect(resp.StatusCode).To(Equal(200))
+
+			var taskResp schema.CambAITaskResponse
+			err = json.NewDecoder(resp.Body).Decode(&taskResp)
+			Expect(err).ToNot(HaveOccurred())
+
+			// Poll the task
+			pollReq, err := http.NewRequest("GET", cambaiURL()+"/apis/tts/"+taskResp.TaskID, nil)
+			Expect(err).ToNot(HaveOccurred())
+
+			pollResp, err := httpClient.Do(pollReq)
+			Expect(err).ToNot(HaveOccurred())
+			defer pollResp.Body.Close()
+
+			Expect(pollResp.StatusCode).To(Equal(200))
+			Expect(pollResp.Header.Get("Content-Type")).To(HavePrefix("audio/"))
+
+			data, err := io.ReadAll(pollResp.Body)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(data)).To(BeNumerically(">", 0))
+		})
+
+		It("should return 404 for unknown task ID", func() {
+			req, err := http.NewRequest("GET", cambaiURL()+"/apis/tts/nonexistent-task-id", nil)
+			Expect(err).ToNot(HaveOccurred())
+
+			resp, err := httpClient.Do(req)
+			Expect(err).ToNot(HaveOccurred())
+			defer resp.Body.Close()
+
+			Expect(resp.StatusCode).To(Equal(404))
+		})
+	})
+
+	Describe("Translation API", func() {
+		It("should translate text via /apis/translate", func() {
+			body := `{
+				"texts": ["Hello"],
+				"source_language": 1,
+				"target_language": 54
+			}`
+			req, err := http.NewRequest("POST", cambaiURL()+"/apis/translate", strings.NewReader(body))
+			Expect(err).ToNot(HaveOccurred())
+			req.Header.Set("Content-Type", "application/json")
+
+			resp, err := httpClient.Do(req)
+			Expect(err).ToNot(HaveOccurred())
+			defer resp.Body.Close()
+
+			Expect(resp.StatusCode).To(Equal(200))
+
+			var result schema.CambAITaskStatusResponse
+			err = json.NewDecoder(resp.Body).Decode(&result)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(result.Status).To(Equal("SUCCESS"))
+			Expect(result.Output).ToNot(BeNil())
+		})
+
+		It("should stream translation via /apis/translation/stream", func() {
+			body := `{
+				"text": "Hello world",
+				"source_language": 1,
+				"target_language": 54
+			}`
+			req, err := http.NewRequest("POST", cambaiURL()+"/apis/translation/stream", strings.NewReader(body))
+			Expect(err).ToNot(HaveOccurred())
+			req.Header.Set("Content-Type", "application/json")
+
+			resp, err := httpClient.Do(req)
+			Expect(err).ToNot(HaveOccurred())
+			defer resp.Body.Close()
+
+			Expect(resp.StatusCode).To(Equal(200))
+
+			data, err := io.ReadAll(resp.Body)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(data)).To(BeNumerically(">", 0), "Stream should return some text")
+		})
+	})
+
+	Describe("Sound Generation API", func() {
+		It("should generate sound via /apis/text-to-sound", func() {
+			body := `{
+				"prompt": "rain falling on a tin roof"
+			}`
+			req, err := http.NewRequest("POST", cambaiURL()+"/apis/text-to-sound", strings.NewReader(body))
+			Expect(err).ToNot(HaveOccurred())
+			req.Header.Set("Content-Type", "application/json")
+
+			resp, err := httpClient.Do(req)
+			Expect(err).ToNot(HaveOccurred())
+			defer resp.Body.Close()
+
+			Expect(resp.StatusCode).To(Equal(200))
+			Expect(resp.Header.Get("Content-Type")).To(HavePrefix("audio/"))
+
+			data, err := io.ReadAll(resp.Body)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(data)).To(BeNumerically(">", 0))
+		})
+	})
+
+	Describe("Voice Management API", func() {
+		It("should list voices via /apis/list-voices", func() {
+			req, err := http.NewRequest("GET", cambaiURL()+"/apis/list-voices", nil)
+			Expect(err).ToNot(HaveOccurred())
+
+			resp, err := httpClient.Do(req)
+			Expect(err).ToNot(HaveOccurred())
+			defer resp.Body.Close()
+
+			Expect(resp.StatusCode).To(Equal(200))
+
+			var result schema.CambAIListVoicesResponse
+			err = json.NewDecoder(resp.Body).Decode(&result)
+			Expect(err).ToNot(HaveOccurred())
+			// voices list may be empty if no TTS models are flagged, but the endpoint should work
+			Expect(result.Voices).ToNot(BeNil())
+		})
+	})
+
+	Describe("Audio Separation API (stub)", func() {
+		It("should return 501 Not Implemented", func() {
+			req, err := http.NewRequest("POST", cambaiURL()+"/apis/audio-separation", strings.NewReader(`{}`))
+			Expect(err).ToNot(HaveOccurred())
+			req.Header.Set("Content-Type", "application/json")
+
+			resp, err := httpClient.Do(req)
+			Expect(err).ToNot(HaveOccurred())
+			defer resp.Body.Close()
+
+			Expect(resp.StatusCode).To(Equal(501))
+
+			var result schema.CambAIErrorResponse
+			err = json.NewDecoder(resp.Body).Decode(&result)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(result.Detail).To(ContainSubstring("not currently supported"))
+		})
+	})
+
+	Describe("Transcription API", func() {
+		It("should reject request without audio file", func() {
+			req, err := http.NewRequest("POST", cambaiURL()+"/apis/transcribe", strings.NewReader(`{}`))
+			Expect(err).ToNot(HaveOccurred())
+			req.Header.Set("Content-Type", "application/json")
+
+			resp, err := httpClient.Do(req)
+			Expect(err).ToNot(HaveOccurred())
+			defer resp.Body.Close()
+
+			// Should fail because no file was uploaded
+			Expect(resp.StatusCode).To(BeNumerically(">=", 400))
+		})
+	})
+
+	Describe("Language ID Mapping", func() {
+		It("should map known language IDs correctly", func() {
+			Expect(schema.CambAILanguageCodeFromID(1)).To(Equal("en"))
+			Expect(schema.CambAILanguageCodeFromID(54)).To(Equal("es"))
+			Expect(schema.CambAILanguageCodeFromID(76)).To(Equal("fr"))
+			Expect(schema.CambAILanguageCodeFromID(70)).To(Equal("de"))
+			Expect(schema.CambAILanguageCodeFromID(12)).To(Equal("ja"))
+			Expect(schema.CambAILanguageCodeFromID(13)).To(Equal("zh"))
+		})
+
+		It("should return fallback for unknown language IDs", func() {
+			result := schema.CambAILanguageCodeFromID(9999)
+			Expect(result).To(Equal("lang-9999"))
+		})
+	})
+})
diff --git a/tests/e2e/e2e_suite_test.go b/tests/e2e/e2e_suite_test.go
index 66d9d6cd7ffb..d2a93b8cb3b8 100644
--- a/tests/e2e/e2e_suite_test.go
+++ b/tests/e2e/e2e_suite_test.go
@@ -93,6 +93,13 @@ var _ = BeforeSuite(func() {
 		"parameters": map[string]interface{}{
 			"model": "mock-model.bin",
 		},
+		"known_usecases": []string{
+			"FLAG_CHAT",
+			"FLAG_COMPLETION",
+			"FLAG_TTS",
+			"FLAG_TRANSCRIPT",
+			"FLAG_SOUND_GENERATION",
+		},
 	}
 	configPath = filepath.Join(modelsPath, "mock-model.yaml")
 	configYAML, err := yaml.Marshal(modelConfig)

From 733d573660687ec0a8bebed7f0346543791acb01 Mon Sep 17 00:00:00 2001
From: Neil Ruaro <neil.ruaro@camb.ai>
Date: Tue, 3 Mar 2026 16:23:28 +0800
Subject: [PATCH 2/2] fix: align CAMB AI endpoint responses with SDK
 expectations

Tested against the real camb-sdk Python package. Fixes:
- list-voices returns flat array (SDK expects list, not wrapped object)
- text-to-sound returns task_id JSON (SDK expects OrchestratorPipelineCallResult)
- translated-tts returns task_id JSON (SDK expects CreateTranslatedTtsOut)
- translation/stream returns JSON (SDK parses response as JSON)
- transcribe accepts media_url form field without requiring file upload
---
 .../http/endpoints/cambai/sound_generation.go | 42 ---------
 core/http/endpoints/cambai/transcription.go   | 91 ++++++++++++-------
 core/http/endpoints/cambai/translation.go     | 37 +++-----
 core/http/endpoints/cambai/voice.go           | 11 +--
 core/schema/cambai.go                         | 12 ++-
 tests/e2e/cambai_test.go                      | 19 ++--
 6 files changed, 96 insertions(+), 116 deletions(-)

diff --git a/core/http/endpoints/cambai/sound_generation.go b/core/http/endpoints/cambai/sound_generation.go
index 650fd5894ddd..124484c77db9 100644
--- a/core/http/endpoints/cambai/sound_generation.go
+++ b/core/http/endpoints/cambai/sound_generation.go
@@ -2,7 +2,6 @@ package cambai
 
 import (
 	"net/http"
-	"path/filepath"
 
 	"github.com/google/uuid"
 	"github.com/labstack/echo/v4"
@@ -10,7 +9,6 @@ import (
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/http/middleware"
 	"github.com/mudler/LocalAI/core/schema"
-	"github.com/mudler/LocalAI/pkg/audio"
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/xlog"
 )
@@ -30,46 +28,6 @@ func SoundGenerationEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader
 
 		xlog.Debug("CAMB AI text-to-sound request received", "model", input.Model)
 
-		filePath, _, err := backend.SoundGeneration(
-			input.Prompt, input.Duration, nil, nil,
-			nil, nil,
-			nil, "", "", nil, "",
-			"", "",
-			nil,
-			ml, appConfig, *cfg)
-		if err != nil {
-			return err
-		}
-
-		filePath, contentType := audio.NormalizeAudioFile(filePath)
-
-		taskID := uuid.New().String()
-
-		// Return audio file directly with task metadata headers
-		c.Response().Header().Set("X-Task-ID", taskID)
-		c.Response().Header().Set("X-Task-Status", "SUCCESS")
-		if contentType != "" {
-			c.Response().Header().Set("Content-Type", contentType)
-		}
-		return c.Attachment(filePath, filepath.Base(filePath))
-	}
-}
-
-// SoundGenerationAsyncEndpoint returns results in CAMB AI async task format.
-func SoundGenerationAsyncEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
-	return func(c echo.Context) error {
-		input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.CambAITextToSoundRequest)
-		if !ok {
-			return echo.ErrBadRequest
-		}
-
-		cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
-		if !ok || cfg == nil {
-			return echo.ErrBadRequest
-		}
-
-		xlog.Debug("CAMB AI text-to-sound async request received", "model", input.Model)
-
 		_, _, err := backend.SoundGeneration(
 			input.Prompt, input.Duration, nil, nil,
 			nil, nil,
diff --git a/core/http/endpoints/cambai/transcription.go b/core/http/endpoints/cambai/transcription.go
index 2b633f44485a..12c5c4e5d3f9 100644
--- a/core/http/endpoints/cambai/transcription.go
+++ b/core/http/endpoints/cambai/transcription.go
@@ -6,6 +6,7 @@ import (
 	"os"
 	"path"
 	"path/filepath"
+	"sync"
 
 	"github.com/google/uuid"
 	"github.com/labstack/echo/v4"
@@ -17,8 +18,11 @@ import (
 	"github.com/mudler/xlog"
 )
 
+var transcriptionTaskResults = sync.Map{}
+
 // TranscriptionEndpoint handles CAMB AI transcription (POST /apis/transcribe).
-// Runs synchronously but returns results in CAMB AI's async task format.
+// The SDK sends multipart form with optional file upload and/or media_url.
+// Returns {"task_id": "..."} matching OrchestratorPipelineCallResult.
 func TranscriptionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
@@ -32,54 +36,79 @@ func TranscriptionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
 		if input != nil && input.LanguageID > 0 {
 			language = schema.CambAILanguageCodeFromID(input.LanguageID)
 		}
-
-		file, err := c.FormFile("file")
-		if err != nil {
-			return c.JSON(http.StatusBadRequest, schema.CambAIErrorResponse{
-				Detail: "Audio file is required. Upload as multipart form field 'file'.",
-			})
+		// SDK sends language as multipart form field too
+		if language == "" {
+			if langField := c.FormValue("language"); langField != "" {
+				language = langField
+			}
 		}
 
-		f, err := file.Open()
-		if err != nil {
-			return err
-		}
-		defer f.Close()
+		// Try file upload first (field "file" or "media_file")
+		var audioPath string
+		for _, fieldName := range []string{"file", "media_file"} {
+			file, err := c.FormFile(fieldName)
+			if err != nil {
+				continue
+			}
 
-		dir, err := os.MkdirTemp("", "cambai-transcribe")
-		if err != nil {
-			return err
+			f, err := file.Open()
+			if err != nil {
+				return err
+			}
+			defer f.Close()
+
+			dir, err := os.MkdirTemp("", "cambai-transcribe")
+			if err != nil {
+				return err
+			}
+			defer os.RemoveAll(dir)
+
+			dst := filepath.Join(dir, path.Base(file.Filename))
+			dstFile, err := os.Create(dst)
+			if err != nil {
+				return err
+			}
+
+			if _, err := io.Copy(dstFile, f); err != nil {
+				dstFile.Close()
+				return err
+			}
+			dstFile.Close()
+			audioPath = dst
+			break
 		}
-		defer os.RemoveAll(dir)
 
-		dst := filepath.Join(dir, path.Base(file.Filename))
-		dstFile, err := os.Create(dst)
-		if err != nil {
-			return err
+		// Fall back to media_url form field
+		if audioPath == "" {
+			mediaURL := c.FormValue("media_url")
+			if mediaURL == "" {
+				mediaURL = c.FormValue("audio_url")
+			}
+			if mediaURL != "" {
+				audioPath = mediaURL
+			}
 		}
 
-		if _, err := io.Copy(dstFile, f); err != nil {
-			xlog.Debug("Audio file copying error", "filename", file.Filename, "dst", dst, "error", err)
-			return err
+		if audioPath == "" {
+			return c.JSON(http.StatusBadRequest, schema.CambAIErrorResponse{
+				Detail: "Either a file upload or media_url is required.",
+			})
 		}
-		dstFile.Close()
 
-		xlog.Debug("CAMB AI transcription request", "file", dst, "language", language)
+		xlog.Debug("CAMB AI transcription request", "path", audioPath, "language", language)
 
-		tr, err := backend.ModelTranscription(dst, language, false, false, "", ml, *cfg, appConfig)
+		tr, err := backend.ModelTranscription(audioPath, language, false, false, "", ml, *cfg, appConfig)
 		if err != nil {
 			return err
 		}
 
 		taskID := uuid.New().String()
+		transcriptionTaskResults.Store(taskID, tr.Text)
 
-		return c.JSON(http.StatusOK, schema.CambAITaskStatusResponse{
+		return c.JSON(http.StatusOK, schema.CambAITaskResponse{
+			TaskID: taskID,
 			Status: "SUCCESS",
 			RunID:  taskID,
-			Output: schema.CambAITranscriptionResponse{
-				Text:     tr.Text,
-				Language: language,
-			},
 		})
 	}
 }
diff --git a/core/http/endpoints/cambai/translation.go b/core/http/endpoints/cambai/translation.go
index d62b41fca078..22903b3513ca 100644
--- a/core/http/endpoints/cambai/translation.go
+++ b/core/http/endpoints/cambai/translation.go
@@ -95,31 +95,24 @@ func TranslationStreamEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoad
 		targetLang := schema.CambAILanguageCodeFromID(input.TargetLanguageID)
 		prompt := buildTranslationPrompt(input.Text, sourceLang, targetLang)
 
-		c.Response().Header().Set("Content-Type", "text/plain; charset=utf-8")
-		c.Response().Header().Set("Transfer-Encoding", "chunked")
-		c.Response().Header().Set("Cache-Control", "no-cache")
-		c.Response().Header().Set("Connection", "keep-alive")
-
 		fn, err := backend.ModelInference(
 			context.Background(), prompt, nil, nil, nil, nil,
-			ml, cfg, cl, appConfig,
-			func(token string, _ backend.TokenUsage) bool {
-				_, writeErr := c.Response().Write([]byte(token))
-				if writeErr != nil {
-					return true
-				}
-				c.Response().Flush()
-				return true
-			},
-			"", "", nil, nil, nil,
+			ml, cfg, cl, appConfig, nil, "", "", nil, nil, nil,
 		)
 		if err != nil {
 			return err
 		}
 
-		// Call fn to complete inference
-		_, err = fn()
-		return err
+		resp, err := fn()
+		if err != nil {
+			return err
+		}
+
+		return c.JSON(http.StatusOK, map[string]any{
+			"translation":     strings.TrimSpace(resp.Response),
+			"source_language": input.SourceLanguageID,
+			"target_language": input.TargetLanguageID,
+		})
 	}
 }
 
@@ -178,14 +171,12 @@ func TranslatedTTSEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
 		}
 
 		taskID := uuid.New().String()
+		ttsTaskResults.Store(taskID, filePath)
 
-		return c.JSON(http.StatusOK, schema.CambAITaskStatusResponse{
+		return c.JSON(http.StatusOK, schema.CambAITaskResponse{
+			TaskID: taskID,
 			Status: "SUCCESS",
 			RunID:  taskID,
-			Output: map[string]string{
-				"translation": translatedText,
-				"audio_path":  filePath,
-			},
 		})
 	}
 }
diff --git a/core/http/endpoints/cambai/voice.go b/core/http/endpoints/cambai/voice.go
index 577dbc2f73d4..73943c675272 100644
--- a/core/http/endpoints/cambai/voice.go
+++ b/core/http/endpoints/cambai/voice.go
@@ -22,8 +22,8 @@ func ListVoicesEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, app
 		voices := make([]schema.CambAIVoice, 0)
 		for i, cfg := range ttsConfigs {
 			voice := schema.CambAIVoice{
-				VoiceID: i + 1,
-				Name:    cfg.Name,
+				ID:   i + 1,
+				Name: cfg.Name,
 			}
 			if cfg.Voice != "" {
 				voice.Name = fmt.Sprintf("%s (%s)", cfg.Name, cfg.Voice)
@@ -31,9 +31,7 @@ func ListVoicesEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, app
 			voices = append(voices, voice)
 		}
 
-		return c.JSON(http.StatusOK, schema.CambAIListVoicesResponse{
-			Voices: voices,
-		})
+		return c.JSON(http.StatusOK, voices)
 	}
 }
 
@@ -85,9 +83,8 @@ func CreateCustomVoiceEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoad
 
 		xlog.Info("Custom voice audio saved", "name", voiceName, "path", dstPath)
 
-		return c.JSON(http.StatusOK, schema.CambAIVoice{
+		return c.JSON(http.StatusOK, schema.CambAICreateCustomVoiceResponse{
 			VoiceID: 0,
-			Name:    voiceName,
 		})
 	}
 }
diff --git a/core/schema/cambai.go b/core/schema/cambai.go
index 09380ba36455..8abc09387b06 100644
--- a/core/schema/cambai.go
+++ b/core/schema/cambai.go
@@ -147,16 +147,20 @@ type CambAITaskStatusResponse struct {
 }
 
 type CambAIVoice struct {
-	VoiceID int    `json:"voice_id"`
-	Name    string `json:"voice_name"`
-	Gender  string `json:"gender,omitempty"`
-	Age     string `json:"age,omitempty"`
+	ID     int    `json:"id"`
+	Name   string `json:"voice_name"`
+	Gender string `json:"gender,omitempty"`
+	Age    string `json:"age,omitempty"`
 }
 
 type CambAIListVoicesResponse struct {
 	Voices []CambAIVoice `json:"voices"`
 }
 
+type CambAICreateCustomVoiceResponse struct {
+	VoiceID int `json:"voice_id"`
+}
+
 type CambAIErrorResponse struct {
 	Detail string `json:"detail"`
 }
diff --git a/tests/e2e/cambai_test.go b/tests/e2e/cambai_test.go
index 7bb30e5a703c..b8badfbd5d1e 100644
--- a/tests/e2e/cambai_test.go
+++ b/tests/e2e/cambai_test.go
@@ -160,7 +160,7 @@ var _ = Describe("CAMB AI API Compatibility Tests", Label("CambAI"), func() {
 			Expect(result.Output).ToNot(BeNil())
 		})
 
-		It("should stream translation via /apis/translation/stream", func() {
+		It("should translate via /apis/translation/stream", func() {
 			body := `{
 				"text": "Hello world",
 				"source_language": 1,
@@ -176,9 +176,10 @@ var _ = Describe("CAMB AI API Compatibility Tests", Label("CambAI"), func() {
 
 			Expect(resp.StatusCode).To(Equal(200))
 
-			data, err := io.ReadAll(resp.Body)
+			var result map[string]any
+			err = json.NewDecoder(resp.Body).Decode(&result)
 			Expect(err).ToNot(HaveOccurred())
-			Expect(len(data)).To(BeNumerically(">", 0), "Stream should return some text")
+			Expect(result["translation"]).ToNot(BeEmpty())
 		})
 	})
 
@@ -196,11 +197,12 @@ var _ = Describe("CAMB AI API Compatibility Tests", Label("CambAI"), func() {
 			defer resp.Body.Close()
 
 			Expect(resp.StatusCode).To(Equal(200))
-			Expect(resp.Header.Get("Content-Type")).To(HavePrefix("audio/"))
 
-			data, err := io.ReadAll(resp.Body)
+			var taskResp schema.CambAITaskResponse
+			err = json.NewDecoder(resp.Body).Decode(&taskResp)
 			Expect(err).ToNot(HaveOccurred())
-			Expect(len(data)).To(BeNumerically(">", 0))
+			Expect(taskResp.TaskID).ToNot(BeEmpty())
+			Expect(taskResp.Status).To(Equal("SUCCESS"))
 		})
 	})
 
@@ -215,11 +217,10 @@ var _ = Describe("CAMB AI API Compatibility Tests", Label("CambAI"), func() {
 
 			Expect(resp.StatusCode).To(Equal(200))
 
-			var result schema.CambAIListVoicesResponse
+			var result []schema.CambAIVoice
 			err = json.NewDecoder(resp.Body).Decode(&result)
 			Expect(err).ToNot(HaveOccurred())
-			// voices list may be empty if no TTS models are flagged, but the endpoint should work
-			Expect(result.Voices).ToNot(BeNil())
+			Expect(result).ToNot(BeNil())
 		})
 	})