From aa7768652717b6fbf395a50fed84ceb623771bbb Mon Sep 17 00:00:00 2001
From: xemxx <xemxx@qq.com>
Date: Thu, 21 May 2026 15:29:21 +0800
Subject: [PATCH 1/4] feat: smart channel routing based on request API type

When multiple channels share the same priority for a model, prefer channels
whose native API type matches the client's request format (OpenAI/Claude/Gemini).
This avoids unnecessary request/response conversion and improves relay performance.

Changes:
- Add RelayFormatToAPIType() mapping in types/relay_format.go
- Pass RelayFormat through RetryParam to channel selection
- Smart routing in model/ability.go (DB query path, memory cache disabled)
- Smart routing in model/channel_cache.go (memory cache enabled)
- Default to OpenAI format in middleware/distributor.go (pre-detection phase)

Fixes #4982
---
 controller/relay.go       | 18 ++++----
 middleware/distributor.go |  9 ++--
 model/ability.go          | 93 +++++++++++++++++++++++++++++++++++++--
 model/channel_cache.go    | 33 +++++++++++++-
 service/channel_select.go |  6 ++-
 types/relay_format.go     | 23 ++++++++++
 6 files changed, 162 insertions(+), 20 deletions(-)

diff --git a/controller/relay.go b/controller/relay.go
index 5e2db44c25a..538c27b9d61 100644
--- a/controller/relay.go
+++ b/controller/relay.go
@@ -179,10 +179,11 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) {
 	}()
 
 	retryParam := &service.RetryParam{
-		Ctx:        c,
-		TokenGroup: relayInfo.TokenGroup,
-		ModelName:  relayInfo.OriginModelName,
-		Retry:      common.GetPointer(0),
+		Ctx:         c,
+		TokenGroup:  relayInfo.TokenGroup,
+		ModelName:   relayInfo.OriginModelName,
+		RelayFormat: relayInfo.RelayFormat,
+		Retry:       common.GetPointer(0),
 	}
 	relayInfo.RetryIndex = 0
 	relayInfo.LastError = nil
@@ -507,10 +508,11 @@ func RelayTask(c *gin.Context) {
 	}()
 
 	retryParam := &service.RetryParam{
-		Ctx:        c,
-		TokenGroup: relayInfo.TokenGroup,
-		ModelName:  relayInfo.OriginModelName,
-		Retry:      common.GetPointer(0),
+		Ctx:         c,
+		TokenGroup:  relayInfo.TokenGroup,
+		ModelName:   relayInfo.OriginModelName,
+		RelayFormat: relayInfo.RelayFormat,
+		Retry:       common.GetPointer(0),
 	}
 
 	for ; retryParam.GetRetry() <= common.RetryTimes; retryParam.IncreaseRetry() {
diff --git a/middleware/distributor.go b/middleware/distributor.go
index 2263fae3fae..308ef276e10 100644
--- a/middleware/distributor.go
+++ b/middleware/distributor.go
@@ -129,10 +129,11 @@ func Distribute() func(c *gin.Context) {
 
 				if channel == nil {
 					channel, selectGroup, err = service.CacheGetRandomSatisfiedChannel(&service.RetryParam{
-						Ctx:        c,
-						ModelName:  modelRequest.Model,
-						TokenGroup: usingGroup,
-						Retry:      common.GetPointer(0),
+						Ctx:         c,
+						ModelName:   modelRequest.Model,
+						TokenGroup:  usingGroup,
+						RelayFormat: types.RelayFormatOpenAI, // distributor runs before format detection, default to OpenAI
+						Retry:       common.GetPointer(0),
 					})
 					if err != nil {
 						showGroup := usingGroup
diff --git a/model/ability.go b/model/ability.go
index 1d7c53fa580..ad913f61529 100644
--- a/model/ability.go
+++ b/model/ability.go
@@ -7,6 +7,8 @@ import (
 	"sync"
 
 	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/constant"
+	"github.com/QuantumNous/new-api/types"
 
 	"github.com/samber/lo"
 	"gorm.io/gorm"
@@ -103,14 +105,97 @@ func getChannelQuery(group string, model string, retry int) (*gorm.DB, error) {
 	return channelQuery, nil
 }
 
-func GetChannel(group string, model string, retry int) (*Channel, error) {
-	var abilities []Ability
-
-	var err error = nil
+// getChannelQueryWithAPIType returns a query that filters channels by priority,
+// and when multiple channels share the same priority, prefers those whose
+// channel type matches the expected API type (for smart routing).
+func getChannelQueryWithAPIType(group string, model string, retry int, expectedAPIType int) (*gorm.DB, error) {
 	channelQuery, err := getChannelQuery(group, model, retry)
 	if err != nil {
 		return nil, err
 	}
+
+	// Join with channels table to access channel type for filtering
+	var abilities []AbilityWithChannel
+	err = channelQuery.Table("abilities").
+		Select("abilities.*, channels.type as channel_type").
+		Joins("left join channels on abilities.channel_id = channels.id").
+		Scan(&abilities).Error
+	if err != nil {
+		return channelQuery, nil // fall back to original query
+	}
+
+	if len(abilities) <= 1 {
+		return channelQuery, nil
+	}
+
+	// Check if any channel matches the expected API type
+	var hasMatch bool
+	for _, ab := range abilities {
+		channelAPIType, ok := common.ChannelType2APIType(ab.ChannelType)
+		if ok && channelAPIType == expectedAPIType {
+			hasMatch = true
+			break
+		}
+	}
+
+	if hasMatch {
+		// Rebuild query with API type filter
+		// We need to filter by channel type, so we rebuild with join
+		var priority interface{}
+		if retry == 0 {
+			priority = DB.Model(&Ability{}).Select("MAX(priority)").Where(commonGroupCol+" = ? and model = ? and enabled = ?", group, model, true)
+		} else {
+			p, err := getPriority(group, model, retry)
+			if err != nil {
+				return channelQuery, nil
+			}
+			priority = p
+		}
+		filteredQuery := DB.Table("abilities").
+			Joins("left join channels on abilities.channel_id = channels.id").
+			Where(commonGroupCol+" = ? and abilities.model = ? and abilities.enabled = ? and abilities.priority = ?", group, model, true, priority).
+			Where("channels.type IN (?)", getMatchingChannelTypes(expectedAPIType))
+		return filteredQuery, nil
+	}
+
+	return channelQuery, nil
+}
+
+// getMatchingChannelTypes returns channel types that map to the given API type.
+func getMatchingChannelTypes(expectedAPIType int) []int {
+	var types []int
+	for i := 1; i < constant.ChannelTypeDummy; i++ {
+		apiType, ok := common.ChannelType2APIType(i)
+		if ok && apiType == expectedAPIType {
+			types = append(types, i)
+		}
+	}
+	return types
+}
+
+func GetChannel(group string, model string, retry int, relayFormat types.RelayFormat) (*Channel, error) {
+	var abilities []Ability
+
+	var err error = nil
+	var channelQuery *gorm.DB
+
+	// Use smart routing when relayFormat is provided and memory cache is disabled
+	if relayFormat != "" {
+		if expectedAPIType, ok := types.RelayFormatToAPIType(relayFormat); ok {
+			channelQuery, err = getChannelQueryWithAPIType(group, model, retry, expectedAPIType)
+			if err != nil {
+				return nil, err
+			}
+		}
+	}
+
+	if channelQuery == nil {
+		channelQuery, err = getChannelQuery(group, model, retry)
+		if err != nil {
+			return nil, err
+		}
+	}
+
 	if common.UsingSQLite || common.UsingPostgreSQL {
 		err = channelQuery.Order("weight DESC").Find(&abilities).Error
 	} else {
diff --git a/model/channel_cache.go b/model/channel_cache.go
index 03740d2cd3a..0e9c95b9311 100644
--- a/model/channel_cache.go
+++ b/model/channel_cache.go
@@ -13,6 +13,7 @@ import (
 	"github.com/QuantumNous/new-api/constant"
 	"github.com/QuantumNous/new-api/logger"
 	"github.com/QuantumNous/new-api/setting/ratio_setting"
+	"github.com/QuantumNous/new-api/types"
 )
 
 var group2model2channels map[string]map[string][]int // enabled channel
@@ -94,10 +95,10 @@ func SyncChannelCache(frequency int) {
 	}
 }
 
-func GetRandomSatisfiedChannel(group string, model string, retry int) (*Channel, error) {
+func GetRandomSatisfiedChannel(group string, model string, retry int, relayFormat types.RelayFormat) (*Channel, error) {
 	// if memory cache is disabled, get channel directly from database
 	if !common.MemoryCacheEnabled {
-		return GetChannel(group, model, retry)
+		return GetChannel(group, model, retry, relayFormat)
 	}
 
 	channelSyncLock.RLock()
@@ -160,6 +161,34 @@ func GetRandomSatisfiedChannel(group string, model string, retry int) (*Channel,
 		return nil, errors.New(fmt.Sprintf("no channel found, group: %s, model: %s, priority: %d", group, model, targetPriority))
 	}
 
+	// Smart routing: when multiple channels share the same priority,
+	// prefer channels whose native API type matches the client request format.
+	// This avoids unnecessary request/response conversion.
+	if len(targetChannels) > 1 && relayFormat != "" {
+		if expectedAPIType, ok := types.RelayFormatToAPIType(relayFormat); ok {
+			var preferredChannels []*Channel
+			var fallbackChannels []*Channel
+			for _, ch := range targetChannels {
+				channelAPIType, typeOk := common.ChannelType2APIType(ch.Type)
+				if typeOk && channelAPIType == expectedAPIType {
+					preferredChannels = append(preferredChannels, ch)
+				} else {
+					fallbackChannels = append(fallbackChannels, ch)
+				}
+			}
+			// Only use preferred channels if at least one matches;
+			// otherwise fall back to the original set.
+			if len(preferredChannels) > 0 {
+				targetChannels = preferredChannels
+				// Recalculate sumWeight for the filtered set
+				sumWeight = 0
+				for _, ch := range targetChannels {
+					sumWeight += ch.GetWeight()
+				}
+			}
+		}
+	}
+
 	// smoothing factor and adjustment
 	smoothingFactor := 1
 	smoothingAdjustment := 0
diff --git a/service/channel_select.go b/service/channel_select.go
index a3710ef8cec..5228622c2c2 100644
--- a/service/channel_select.go
+++ b/service/channel_select.go
@@ -8,6 +8,7 @@ import (
 	"github.com/QuantumNous/new-api/logger"
 	"github.com/QuantumNous/new-api/model"
 	"github.com/QuantumNous/new-api/setting"
+	"github.com/QuantumNous/new-api/types"
 	"github.com/gin-gonic/gin"
 )
 
@@ -15,6 +16,7 @@ type RetryParam struct {
 	Ctx          *gin.Context
 	TokenGroup   string
 	ModelName    string
+	RelayFormat  types.RelayFormat // client request API format for smart channel routing
 	Retry        *int
 	resetNextTry bool
 }
@@ -115,7 +117,7 @@ func CacheGetRandomSatisfiedChannel(param *RetryParam) (*model.Channel, string,
 			}
 			logger.LogDebug(param.Ctx, "Auto selecting group: %s, priorityRetry: %d", autoGroup, priorityRetry)
 
-			channel, _ = model.GetRandomSatisfiedChannel(autoGroup, param.ModelName, priorityRetry)
+			channel, _ = model.GetRandomSatisfiedChannel(autoGroup, param.ModelName, priorityRetry, param.RelayFormat)
 			if channel == nil {
 				// Current group has no available channel for this model, try next group
 				// 当前分组没有该模型的可用渠道，尝试下一个分组
@@ -153,7 +155,7 @@ func CacheGetRandomSatisfiedChannel(param *RetryParam) (*model.Channel, string,
 			break
 		}
 	} else {
-		channel, err = model.GetRandomSatisfiedChannel(param.TokenGroup, param.ModelName, param.GetRetry())
+		channel, err = model.GetRandomSatisfiedChannel(param.TokenGroup, param.ModelName, param.GetRetry(), param.RelayFormat)
 		if err != nil {
 			return nil, param.TokenGroup, err
 		}
diff --git a/types/relay_format.go b/types/relay_format.go
index 9b4c86f2493..c835fbf5613 100644
--- a/types/relay_format.go
+++ b/types/relay_format.go
@@ -17,3 +17,26 @@ const (
 	RelayFormatTask    = "task"
 	RelayFormatMjProxy = "mj_proxy"
 )
+
+// RelayFormatToAPIType maps the client request relay format to the expected provider API type.
+// This is used for smart channel routing: prefer channels whose native API type matches
+// the client's request format, avoiding unnecessary request/response conversion.
+func RelayFormatToAPIType(relayFormat RelayFormat) (int, bool) {
+	switch relayFormat {
+	case RelayFormatOpenAI, RelayFormatOpenAIAudio, RelayFormatOpenAIImage, RelayFormatOpenAIRealtime, RelayFormatOpenAIResponses, RelayFormatOpenAIResponsesCompaction:
+		return 0, true // APITypeOpenAI
+	case RelayFormatClaude:
+		return 1, true // APITypeAnthropic
+	case RelayFormatGemini:
+		return 13, true // APITypeGemini
+	case RelayFormatEmbedding:
+		// Embedding requests can be handled by multiple provider types;
+		// return OpenAI as the most common format, but let the caller
+		// decide whether to enforce strict matching.
+		return 0, true // APITypeOpenAI
+	case RelayFormatRerank:
+		return 0, true // APITypeOpenAI
+	default:
+		return 0, false
+	}
+}

From 8967f535e45e1de57893a1c25aadf6d995066342 Mon Sep 17 00:00:00 2001
From: xem <xemxx@qq.com>
Date: Thu, 21 May 2026 17:32:07 +0800
Subject: [PATCH 2/4] fix(model): qualify ambiguous columns in smart channel
 routing query

When the smart channel routing query joins abilities with channels,
both tables expose 'group' and 'weight' columns, causing SQLite to
fail with 'ambiguous column name' errors.

- Qualify the group filter with abilities. prefix in JOINed queries
- Wrap the priority subquery in parentheses so the generated SQL is
  valid (priority = (SELECT MAX(priority) ...) instead of priority =
  SELECT ...)
- Order by abilities.weight DESC explicitly, and select abilities.*
  on the filtered query to avoid column collision when scanning rows
---
 model/ability.go | 43 ++++++++++++++++++++++++++-----------------
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/model/ability.go b/model/ability.go
index ad913f61529..29d1db43273 100644
--- a/model/ability.go
+++ b/model/ability.go
@@ -108,17 +108,37 @@ func getChannelQuery(group string, model string, retry int) (*gorm.DB, error) {
 // getChannelQueryWithAPIType returns a query that filters channels by priority,
 // and when multiple channels share the same priority, prefers those whose
 // channel type matches the expected API type (for smart routing).
+//
+// Both abilities and channels expose a `group` column, so any reference to
+// `group` inside JOINed queries must be qualified with `abilities.` to avoid
+// "ambiguous column name" errors (notably on SQLite).
 func getChannelQueryWithAPIType(group string, model string, retry int, expectedAPIType int) (*gorm.DB, error) {
 	channelQuery, err := getChannelQuery(group, model, retry)
 	if err != nil {
 		return nil, err
 	}
 
-	// Join with channels table to access channel type for filtering
+	abilitiesGroupCol := "abilities." + commonGroupCol
+
+	// Resolve the priority value once, shared by the probing query and the
+	// final filtered query. Reusing the existing channelQuery here would be
+	// unsafe because its WHERE clause references the bare `group` column.
+	var priorityValue interface{}
+	if retry == 0 {
+		priorityValue = DB.Model(&Ability{}).Select("MAX(priority)").Where(commonGroupCol+" = ? and model = ? and enabled = ?", group, model, true)
+	} else {
+		p, err := getPriority(group, model, retry)
+		if err != nil {
+			return channelQuery, nil
+		}
+		priorityValue = p
+	}
+
 	var abilities []AbilityWithChannel
-	err = channelQuery.Table("abilities").
+	err = DB.Table("abilities").
 		Select("abilities.*, channels.type as channel_type").
 		Joins("left join channels on abilities.channel_id = channels.id").
+		Where(abilitiesGroupCol+" = ? and abilities.model = ? and abilities.enabled = ? and abilities.priority = (?)", group, model, true, priorityValue).
 		Scan(&abilities).Error
 	if err != nil {
 		return channelQuery, nil // fall back to original query
@@ -139,21 +159,10 @@ func getChannelQueryWithAPIType(group string, model string, retry int, expectedA
 	}
 
 	if hasMatch {
-		// Rebuild query with API type filter
-		// We need to filter by channel type, so we rebuild with join
-		var priority interface{}
-		if retry == 0 {
-			priority = DB.Model(&Ability{}).Select("MAX(priority)").Where(commonGroupCol+" = ? and model = ? and enabled = ?", group, model, true)
-		} else {
-			p, err := getPriority(group, model, retry)
-			if err != nil {
-				return channelQuery, nil
-			}
-			priority = p
-		}
 		filteredQuery := DB.Table("abilities").
+			Select("abilities.*").
 			Joins("left join channels on abilities.channel_id = channels.id").
-			Where(commonGroupCol+" = ? and abilities.model = ? and abilities.enabled = ? and abilities.priority = ?", group, model, true, priority).
+			Where(abilitiesGroupCol+" = ? and abilities.model = ? and abilities.enabled = ? and abilities.priority = (?)", group, model, true, priorityValue).
 			Where("channels.type IN (?)", getMatchingChannelTypes(expectedAPIType))
 		return filteredQuery, nil
 	}
@@ -197,9 +206,9 @@ func GetChannel(group string, model string, retry int, relayFormat types.RelayFo
 	}
 
 	if common.UsingSQLite || common.UsingPostgreSQL {
-		err = channelQuery.Order("weight DESC").Find(&abilities).Error
+		err = channelQuery.Order("abilities.weight DESC").Find(&abilities).Error
 	} else {
-		err = channelQuery.Order("weight DESC").Find(&abilities).Error
+		err = channelQuery.Order("abilities.weight DESC").Find(&abilities).Error
 	}
 	if err != nil {
 		return nil, err

From e0fe3e4503835951e48414a9b1bad8b4d287195b Mon Sep 17 00:00:00 2001
From: xem <xemxx@qq.com>
Date: Thu, 21 May 2026 17:32:16 +0800
Subject: [PATCH 3/4] fix(middleware): infer relay format from request path for
 smart routing

The Distribute middleware runs before the per-route handler that
sets the actual RelayFormat, so smart channel routing was always
seeing RelayFormatOpenAI and could never prefer a Claude- or
Gemini-native channel for /v1/messages or Gemini paths.

Add InferRelayFormatFromPath, which mirrors the routing table in
router/relay-router.go, and use it in the distributor so requests
hitting /v1/messages get RelayFormatClaude (and so on) before
channel selection runs.
---
 middleware/distributor.go |  2 +-
 types/relay_format.go     | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/middleware/distributor.go b/middleware/distributor.go
index 308ef276e10..5fed4ca6011 100644
--- a/middleware/distributor.go
+++ b/middleware/distributor.go
@@ -132,7 +132,7 @@ func Distribute() func(c *gin.Context) {
 						Ctx:         c,
 						ModelName:   modelRequest.Model,
 						TokenGroup:  usingGroup,
-						RelayFormat: types.RelayFormatOpenAI, // distributor runs before format detection, default to OpenAI
+						RelayFormat: types.InferRelayFormatFromPath(c.Request.URL.Path),
 						Retry:       common.GetPointer(0),
 					})
 					if err != nil {
diff --git a/types/relay_format.go b/types/relay_format.go
index c835fbf5613..fab0a36d48d 100644
--- a/types/relay_format.go
+++ b/types/relay_format.go
@@ -1,5 +1,7 @@
 package types
 
+import "strings"
+
 type RelayFormat string
 
 const (
@@ -40,3 +42,36 @@ func RelayFormatToAPIType(relayFormat RelayFormat) (int, bool) {
 		return 0, false
 	}
 }
+
+// InferRelayFormatFromPath returns the RelayFormat that the request to the given URL path will
+// eventually be relayed as. The middleware Distribute() runs before the per-route handler that
+// sets the format explicitly, so smart channel routing has to peek at the path here.
+//
+// Keep this in sync with router/relay-router.go. Unknown paths fall back to RelayFormatOpenAI,
+// which preserves the previous behaviour (no API-type filtering for unknown formats).
+func InferRelayFormatFromPath(path string) RelayFormat {
+	switch {
+	case strings.HasPrefix(path, "/v1/messages"):
+		return RelayFormatClaude
+	case strings.HasPrefix(path, "/v1/responses/compact"):
+		return RelayFormatOpenAIResponsesCompaction
+	case strings.HasPrefix(path, "/v1/responses"):
+		return RelayFormatOpenAIResponses
+	case strings.HasPrefix(path, "/v1/realtime"):
+		return RelayFormatOpenAIRealtime
+	case strings.HasPrefix(path, "/v1/embeddings"):
+		return RelayFormatEmbedding
+	case strings.HasPrefix(path, "/v1/rerank"):
+		return RelayFormatRerank
+	case strings.HasPrefix(path, "/v1/audio/"):
+		return RelayFormatOpenAIAudio
+	case strings.HasPrefix(path, "/v1/images/"), strings.HasPrefix(path, "/v1/edits"):
+		return RelayFormatOpenAIImage
+	case strings.HasPrefix(path, "/v1/engines/") && strings.HasSuffix(path, "/embeddings"):
+		return RelayFormatGemini
+	case strings.HasPrefix(path, "/v1/models/"):
+		return RelayFormatGemini
+	default:
+		return RelayFormatOpenAI
+	}
+}

From 59dd3884f57154a3f99bae63c52e9da6f2a942b5 Mon Sep 17 00:00:00 2001
From: xem <xemxx@qq.com>
Date: Fri, 22 May 2026 11:42:51 +0800
Subject: [PATCH 4/4] fix(types): correct relay format inference for
 embeddings, gemini, and unknown paths

Address review feedback on smart channel routing:

- /v1/engines/*/embeddings is OpenAI's legacy embeddings endpoint, not
  Gemini. Return RelayFormatEmbedding so it routes to the correct backend.
- /v1beta/models/ is the Gemini API path used by upstream clients (see
  middleware/distributor.go handling); add it alongside /v1/models/ so
  Gemini routes are inferred consistently.
- Default branch now returns an empty RelayFormat instead of
  RelayFormatOpenAI. Downstream GetChannel treats empty as 'no API-type
  hint' and falls back to the original priority/weight-based selection,
  which is the truly neutral behaviour for unknown paths.
---
 types/relay_format.go | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/types/relay_format.go b/types/relay_format.go
index fab0a36d48d..82d7fb8e628 100644
--- a/types/relay_format.go
+++ b/types/relay_format.go
@@ -47,8 +47,9 @@ func RelayFormatToAPIType(relayFormat RelayFormat) (int, bool) {
 // eventually be relayed as. The middleware Distribute() runs before the per-route handler that
 // sets the format explicitly, so smart channel routing has to peek at the path here.
 //
-// Keep this in sync with router/relay-router.go. Unknown paths fall back to RelayFormatOpenAI,
-// which preserves the previous behaviour (no API-type filtering for unknown formats).
+// Keep this in sync with router/relay-router.go. Unknown paths return an empty RelayFormat,
+// which downstream callers (e.g. model.GetChannel) treat as "no API-type hint" and fall back
+// to the original priority/weight-based selection.
 func InferRelayFormatFromPath(path string) RelayFormat {
 	switch {
 	case strings.HasPrefix(path, "/v1/messages"):
@@ -68,10 +69,10 @@ func InferRelayFormatFromPath(path string) RelayFormat {
 	case strings.HasPrefix(path, "/v1/images/"), strings.HasPrefix(path, "/v1/edits"):
 		return RelayFormatOpenAIImage
 	case strings.HasPrefix(path, "/v1/engines/") && strings.HasSuffix(path, "/embeddings"):
-		return RelayFormatGemini
-	case strings.HasPrefix(path, "/v1/models/"):
+		return RelayFormatEmbedding
+	case strings.HasPrefix(path, "/v1beta/models/"), strings.HasPrefix(path, "/v1/models/"):
 		return RelayFormatGemini
 	default:
-		return RelayFormatOpenAI
+		return ""
 	}
 }