diff --git a/controller/channel-test.go b/controller/channel-test.go index b225585ed7a..fb3388b88b6 100644 --- a/controller/channel-test.go +++ b/controller/channel-test.go @@ -527,17 +527,11 @@ func settleTestQuota(info *relaycommon.RelayInfo, priceData types.PriceData, usa } } - quota := 0 - if !priceData.UsePrice { - quota = usage.PromptTokens + int(math.Round(float64(usage.CompletionTokens)*priceData.CompletionRatio)) - quota = int(math.Round(float64(quota) * priceData.ModelRatio)) - if priceData.ModelRatio != 0 && quota <= 0 { - quota = 1 - } - return quota, nil + if info == nil { + info = &relaycommon.RelayInfo{} } - - return int(priceData.ModelPrice * common.QuotaPerUnit), nil + info.PriceData = priceData + return service.CalculateTextQuotaForUsage(&gin.Context{}, info, usage), nil } func buildTestLogOther(c *gin.Context, info *relaycommon.RelayInfo, priceData types.PriceData, usage *dto.Usage, tieredResult *billingexpr.TieredResult) map[string]interface{} { diff --git a/controller/channel_test_internal_test.go b/controller/channel_test_internal_test.go index 9c26d623efb..d6e2d8d306e 100644 --- a/controller/channel_test_internal_test.go +++ b/controller/channel_test_internal_test.go @@ -3,6 +3,7 @@ package controller import ( "net/http/httptest" "testing" + "time" "github.com/QuantumNous/new-api/common" "github.com/QuantumNous/new-api/dto" @@ -41,6 +42,31 @@ func TestSettleTestQuotaUsesTieredBilling(t *testing.T) { require.Equal(t, "stream", result.MatchedTier) } +func TestSettleTestQuotaIncludesCacheReadTokens(t *testing.T) { + info := &relaycommon.RelayInfo{ + OriginModelName: "gpt-test", + StartTime: time.Now(), + } + + quota, result := settleTestQuota(info, types.PriceData{ + ModelRatio: 1.5, + CompletionRatio: 5, + CacheRatio: 0.1, + GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1}, + }, &dto.Usage{ + PromptTokens: 335, + CompletionTokens: 15, + TotalTokens: 350, + PromptTokensDetails: dto.InputTokenDetails{ + CachedTokens: 2492, + }, + UsageSemantic: "anthropic", + }) + + require.Nil(t, result) + require.Equal(t, 989, quota) +} + func TestBuildTestLogOtherInjectsTieredInfo(t *testing.T) { gin.SetMode(gin.TestMode) ctx, _ := gin.CreateTestContext(httptest.NewRecorder()) diff --git a/service/text_quota.go b/service/text_quota.go index 3f344dc3e57..2810d743a19 100644 --- a/service/text_quota.go +++ b/service/text_quota.go @@ -81,6 +81,14 @@ func isLegacyClaudeDerivedOpenAIUsage(relayInfo *relaycommon.RelayInfo, usage *d return usage.ClaudeCacheCreation5mTokens > 0 || usage.ClaudeCacheCreation1hTokens > 0 } +func usageLooksLikeSeparatedInputCache(usage *dto.Usage) bool { + if usage == nil { + return false + } + return usage.PromptTokensDetails.CachedTokens > usage.PromptTokens || + usage.PromptTokensDetails.CachedCreationTokens > usage.PromptTokens +} + func calculateTextToolCallSurcharge(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, summary *textQuotaSummary) decimal.Decimal { dGroupRatio := decimal.NewFromFloat(summary.GroupRatio) dQuotaPerUnit := decimal.NewFromFloat(common.QuotaPerUnit) @@ -309,6 +317,12 @@ func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInf return summary } +// CalculateTextQuotaForUsage returns the same text quota used by normal relay settlement. +// It is intended for callers that need the quota value without writing consume logs. +func CalculateTextQuotaForUsage(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage) int { + return calculateTextQuotaSummary(ctx, relayInfo, usage).Quota +} + func usageSemanticFromUsage(relayInfo *relaycommon.RelayInfo, usage *dto.Usage) string { if usage != nil && usage.UsageSemantic != "" { return usage.UsageSemantic @@ -316,6 +330,9 @@ func usageSemanticFromUsage(relayInfo *relaycommon.RelayInfo, usage *dto.Usage) if relayInfo != nil && relayInfo.GetFinalRequestRelayFormat() == types.RelayFormatClaude { return "anthropic" } + if usageLooksLikeSeparatedInputCache(usage) { + return "anthropic" + } return "openai" } diff --git a/service/text_quota_test.go b/service/text_quota_test.go index 37ce1877482..c109c8edee7 100644 --- a/service/text_quota_test.go +++ b/service/text_quota_test.go @@ -148,6 +148,39 @@ func TestCalculateTextQuotaSummaryUsesAnthropicUsageSemanticFromUpstreamUsage(t require.Equal(t, 1488, summary.Quota) } +func TestCalculateTextQuotaSummaryInfersSeparatedCacheReadUsage(t *testing.T) { + gin.SetMode(gin.TestMode) + w := httptest.NewRecorder() + ctx, _ := gin.CreateTestContext(w) + + relayInfo := &relaycommon.RelayInfo{ + RelayFormat: types.RelayFormatOpenAI, + OriginModelName: "gpt-compatible-claude", + PriceData: types.PriceData{ + ModelRatio: 1.5, + CompletionRatio: 5, + CacheRatio: 0.1, + GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1}, + }, + StartTime: time.Now(), + } + + usage := &dto.Usage{ + PromptTokens: 335, + CompletionTokens: 15, + PromptTokensDetails: dto.InputTokenDetails{ + CachedTokens: 2492, + }, + } + + summary := calculateTextQuotaSummary(ctx, relayInfo, usage) + + require.True(t, summary.IsClaudeUsageSemantic) + require.Equal(t, "anthropic", summary.UsageSemantic) + require.Equal(t, 335, summary.PromptTokens) + require.Equal(t, 989, summary.Quota) +} + func TestCacheWriteTokensTotal(t *testing.T) { t.Run("split cache creation", func(t *testing.T) { summary := textQuotaSummary{