From ddbe7dfe02751c4e0515673ef67dc756f168c0bc Mon Sep 17 00:00:00 2001 From: taoliang Date: Wed, 20 May 2026 19:47:10 +0800 Subject: [PATCH 1/2] fix(volcengine): route vision embedding models to multimodal endpoint Volcengine multimodal embedding models (e.g. doubao-embedding-vision-251215) require POST /api/v3/embeddings/multimodal and reject the standard /api/v3/embeddings path with a 400 InvalidParameter error: "the requested model doubao-embedding-vision-251215 does not support this api" Detect by model-name keywords (case-insensitive: contains "embedding" AND contains "vision" or "multimodal"). Both UpstreamModelName and OriginModelName are checked so model_mapping aliases still work. Pure-text embedding models continue to use the standard endpoint. --- relay/channel/volcengine/adaptor.go | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/relay/channel/volcengine/adaptor.go b/relay/channel/volcengine/adaptor.go index ba9f223bd2f..80b7bf42092 100644 --- a/relay/channel/volcengine/adaptor.go +++ b/relay/channel/volcengine/adaptor.go @@ -263,6 +263,15 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) { } return fmt.Sprintf("%s/api/v3/chat/completions", baseUrl), nil case constant.RelayModeEmbeddings: + // Volcengine multimodal embedding models (e.g. doubao-embedding-vision-*) + // require the dedicated /api/v3/embeddings/multimodal endpoint and reject + // the standard /embeddings path with a 400 InvalidParameter error. + // Heuristic: route by model name keywords. Both the request model and the + // upstream model name are checked so model_mapping aliases still work. + if isVolcengineMultimodalEmbedding(info.UpstreamModelName) || + isVolcengineMultimodalEmbedding(info.OriginModelName) { + return fmt.Sprintf("%s/api/v3/embeddings/multimodal", baseUrl), nil + } return fmt.Sprintf("%s/api/v3/embeddings", baseUrl), nil //豆包的图生图也走generations接口: https://www.volcengine.com/docs/82379/1824121 case constant.RelayModeImagesGenerations, constant.RelayModeImagesEdits: @@ -400,3 +409,22 @@ func (a *Adaptor) GetModelList() []string { func (a *Adaptor) GetChannelName() string { return ChannelName } + +// isVolcengineMultimodalEmbedding reports whether the given model name targets +// Volcengine's multimodal (image+text) embedding endpoint, which requires the +// `/api/v3/embeddings/multimodal` path instead of the standard `/embeddings`. +// +// The detection is keyword-based on the model name. We accept both the original +// "vision" series (e.g. doubao-embedding-vision-241215, doubao-embedding-vision-251215) +// and any future "multimodal" naming. Matching is case-insensitive so user-supplied +// model_mapping aliases work even with mixed case. +func isVolcengineMultimodalEmbedding(modelName string) bool { + if modelName == "" { + return false + } + lower := strings.ToLower(modelName) + if !strings.Contains(lower, "embedding") { + return false + } + return strings.Contains(lower, "vision") || strings.Contains(lower, "multimodal") +} From bbbd8d8e630cefeff79c955c08c387f015434e5f Mon Sep 17 00:00:00 2001 From: taoliang Date: Wed, 20 May 2026 20:09:28 +0800 Subject: [PATCH 2/2] fix(volcengine): use multimodal input shape when testing vision embedding channels The follow-up fix for the multimodal endpoint routing exposed a second bug in the channel test path: controller/channel-test.go always emits Input=["hello world"] for any embedding model, but Volcengine's /api/v3/embeddings/multimodal endpoint rejects flat string arrays with: "we could not parse the JSON body of your request" So the dashboard "Test channel" button still failed for vision embedding models even after routing was corrected. Detect vision/multimodal embedding models by name keyword (case-insensitive) and emit the typed-parts shape that matches the multimodal endpoint: {"input": [{"type": "text", "text": "hello world"}]} Plain text embedding models (Doubao-embedding, doubao-embedding-text-*, m3e, bge-*, OpenAI text-embedding-3-*, etc.) keep using the legacy string array shape. --- controller/channel-test.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/controller/channel-test.go b/controller/channel-test.go index b225585ed7a..a16aa495947 100644 --- a/controller/channel-test.go +++ b/controller/channel-test.go @@ -758,6 +758,24 @@ func buildTestRequest(model string, endpointType string, channel *model.Channel, if strings.Contains(strings.ToLower(model), "embedding") || strings.HasPrefix(model, "m3e") || strings.Contains(model, "bge-") { + // Volcengine multimodal embedding endpoint (/api/v3/embeddings/multimodal) + // requires input to be an array of typed parts (e.g. [{type:"text",text:"..."}]), + // not a plain string array. Sending the standard `["hello world"]` shape + // against vision/multimodal models triggers a 400 from the upstream + // "we could not parse the JSON body of your request" — so emit the + // multimodal-compatible shape when the model name signals it. + lower := strings.ToLower(model) + if strings.Contains(lower, "vision") || strings.Contains(lower, "multimodal") { + return &dto.EmbeddingRequest{ + Model: model, + Input: []any{ + map[string]any{ + "type": "text", + "text": "hello world", + }, + }, + } + } // 返回 EmbeddingRequest return &dto.EmbeddingRequest{ Model: model,