diff --git a/controller/channel-test.go b/controller/channel-test.go index b225585ed7a..a16aa495947 100644 --- a/controller/channel-test.go +++ b/controller/channel-test.go @@ -758,6 +758,24 @@ func buildTestRequest(model string, endpointType string, channel *model.Channel, if strings.Contains(strings.ToLower(model), "embedding") || strings.HasPrefix(model, "m3e") || strings.Contains(model, "bge-") { + // Volcengine multimodal embedding endpoint (/api/v3/embeddings/multimodal) + // requires input to be an array of typed parts (e.g. [{type:"text",text:"..."}]), + // not a plain string array. Sending the standard `["hello world"]` shape + // against vision/multimodal models triggers a 400 from the upstream + // "we could not parse the JSON body of your request" — so emit the + // multimodal-compatible shape when the model name signals it. + lower := strings.ToLower(model) + if strings.Contains(lower, "vision") || strings.Contains(lower, "multimodal") { + return &dto.EmbeddingRequest{ + Model: model, + Input: []any{ + map[string]any{ + "type": "text", + "text": "hello world", + }, + }, + } + } // 返回 EmbeddingRequest return &dto.EmbeddingRequest{ Model: model, diff --git a/relay/channel/volcengine/adaptor.go b/relay/channel/volcengine/adaptor.go index ba9f223bd2f..80b7bf42092 100644 --- a/relay/channel/volcengine/adaptor.go +++ b/relay/channel/volcengine/adaptor.go @@ -263,6 +263,15 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) { } return fmt.Sprintf("%s/api/v3/chat/completions", baseUrl), nil case constant.RelayModeEmbeddings: + // Volcengine multimodal embedding models (e.g. doubao-embedding-vision-*) + // require the dedicated /api/v3/embeddings/multimodal endpoint and reject + // the standard /embeddings path with a 400 InvalidParameter error. + // Heuristic: route by model name keywords. Both the request model and the + // upstream model name are checked so model_mapping aliases still work. + if isVolcengineMultimodalEmbedding(info.UpstreamModelName) || + isVolcengineMultimodalEmbedding(info.OriginModelName) { + return fmt.Sprintf("%s/api/v3/embeddings/multimodal", baseUrl), nil + } return fmt.Sprintf("%s/api/v3/embeddings", baseUrl), nil //豆包的图生图也走generations接口: https://www.volcengine.com/docs/82379/1824121 case constant.RelayModeImagesGenerations, constant.RelayModeImagesEdits: @@ -400,3 +409,22 @@ func (a *Adaptor) GetModelList() []string { func (a *Adaptor) GetChannelName() string { return ChannelName } + +// isVolcengineMultimodalEmbedding reports whether the given model name targets +// Volcengine's multimodal (image+text) embedding endpoint, which requires the +// `/api/v3/embeddings/multimodal` path instead of the standard `/embeddings`. +// +// The detection is keyword-based on the model name. We accept both the original +// "vision" series (e.g. doubao-embedding-vision-241215, doubao-embedding-vision-251215) +// and any future "multimodal" naming. Matching is case-insensitive so user-supplied +// model_mapping aliases work even with mixed case. +func isVolcengineMultimodalEmbedding(modelName string) bool { + if modelName == "" { + return false + } + lower := strings.ToLower(modelName) + if !strings.Contains(lower, "embedding") { + return false + } + return strings.Contains(lower, "vision") || strings.Contains(lower, "multimodal") +}