QuantumNous · rangerTao · May 20, 2026 · May 20, 2026 · coderabbitai · May 20, 2026
diff --git a/controller/channel-test.go b/controller/channel-test.go
@@ -758,6 +758,24 @@ func buildTestRequest(model string, endpointType string, channel *model.Channel,
 	if strings.Contains(strings.ToLower(model), "embedding") ||
 		strings.HasPrefix(model, "m3e") ||
 		strings.Contains(model, "bge-") {
+		// Volcengine multimodal embedding endpoint (/api/v3/embeddings/multimodal)
+		// requires input to be an array of typed parts (e.g. [{type:"text",text:"..."}]),
+		// not a plain string array. Sending the standard `["hello world"]` shape
+		// against vision/multimodal models triggers a 400 from the upstream
+		// "we could not parse the JSON body of your request" — so emit the
+		// multimodal-compatible shape when the model name signals it.
+		lower := strings.ToLower(model)
+		if strings.Contains(lower, "vision") || strings.Contains(lower, "multimodal") {
+			return &dto.EmbeddingRequest{
+				Model: model,
+				Input: []any{
+					map[string]any{
+						"type": "text",
+						"text": "hello world",
+					},
+				},
+			}
+		}
 		// 返回 EmbeddingRequest
 		return &dto.EmbeddingRequest{
 			Model: model,

diff --git a/relay/channel/volcengine/adaptor.go b/relay/channel/volcengine/adaptor.go
@@ -263,6 +263,15 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
 			}
 			return fmt.Sprintf("%s/api/v3/chat/completions", baseUrl), nil
 		case constant.RelayModeEmbeddings:
+			// Volcengine multimodal embedding models (e.g. doubao-embedding-vision-*)
+			// require the dedicated /api/v3/embeddings/multimodal endpoint and reject
+			// the standard /embeddings path with a 400 InvalidParameter error.
+			// Heuristic: route by model name keywords. Both the request model and the
+			// upstream model name are checked so model_mapping aliases still work.
+			if isVolcengineMultimodalEmbedding(info.UpstreamModelName) ||
+				isVolcengineMultimodalEmbedding(info.OriginModelName) {
+				return fmt.Sprintf("%s/api/v3/embeddings/multimodal", baseUrl), nil
+			}
 			return fmt.Sprintf("%s/api/v3/embeddings", baseUrl), nil
 		//豆包的图生图也走generations接口: https://www.volcengine.com/docs/82379/1824121
 		case constant.RelayModeImagesGenerations, constant.RelayModeImagesEdits:
@@ -400,3 +409,22 @@ func (a *Adaptor) GetModelList() []string {
 func (a *Adaptor) GetChannelName() string {
 	return ChannelName
 }
+
+// isVolcengineMultimodalEmbedding reports whether the given model name targets
+// Volcengine's multimodal (image+text) embedding endpoint, which requires the
+// `/api/v3/embeddings/multimodal` path instead of the standard `/embeddings`.
+//
+// The detection is keyword-based on the model name. We accept both the original
+// "vision" series (e.g. doubao-embedding-vision-241215, doubao-embedding-vision-251215)
+// and any future "multimodal" naming. Matching is case-insensitive so user-supplied
+// model_mapping aliases work even with mixed case.
+func isVolcengineMultimodalEmbedding(modelName string) bool {
+	if modelName == "" {
+		return false
+	}
+	lower := strings.ToLower(modelName)
+	if !strings.Contains(lower, "embedding") {
+		return false
+	}
+	return strings.Contains(lower, "vision") || strings.Contains(lower, "multimodal")
+}