diff --git a/controller/channel-test.go b/controller/channel-test.go
index b225585ed7a..fb3388b88b6 100644
--- a/controller/channel-test.go
+++ b/controller/channel-test.go
@@ -527,17 +527,11 @@ func settleTestQuota(info *relaycommon.RelayInfo, priceData types.PriceData, usa
 		}
 	}
 
-	quota := 0
-	if !priceData.UsePrice {
-		quota = usage.PromptTokens + int(math.Round(float64(usage.CompletionTokens)*priceData.CompletionRatio))
-		quota = int(math.Round(float64(quota) * priceData.ModelRatio))
-		if priceData.ModelRatio != 0 && quota <= 0 {
-			quota = 1
-		}
-		return quota, nil
+	if info == nil {
+		info = &relaycommon.RelayInfo{}
 	}
-
-	return int(priceData.ModelPrice * common.QuotaPerUnit), nil
+	info.PriceData = priceData
+	return service.CalculateTextQuotaForUsage(&gin.Context{}, info, usage), nil
 }
 
 func buildTestLogOther(c *gin.Context, info *relaycommon.RelayInfo, priceData types.PriceData, usage *dto.Usage, tieredResult *billingexpr.TieredResult) map[string]interface{} {
diff --git a/controller/channel_test_internal_test.go b/controller/channel_test_internal_test.go
index 9c26d623efb..d6e2d8d306e 100644
--- a/controller/channel_test_internal_test.go
+++ b/controller/channel_test_internal_test.go
@@ -3,6 +3,7 @@ package controller
 import (
 	"net/http/httptest"
 	"testing"
+	"time"
 
 	"github.com/QuantumNous/new-api/common"
 	"github.com/QuantumNous/new-api/dto"
@@ -41,6 +42,31 @@ func TestSettleTestQuotaUsesTieredBilling(t *testing.T) {
 	require.Equal(t, "stream", result.MatchedTier)
 }
 
+func TestSettleTestQuotaIncludesCacheReadTokens(t *testing.T) {
+	info := &relaycommon.RelayInfo{
+		OriginModelName: "gpt-test",
+		StartTime:       time.Now(),
+	}
+
+	quota, result := settleTestQuota(info, types.PriceData{
+		ModelRatio:      1.5,
+		CompletionRatio: 5,
+		CacheRatio:      0.1,
+		GroupRatioInfo:  types.GroupRatioInfo{GroupRatio: 1},
+	}, &dto.Usage{
+		PromptTokens:     335,
+		CompletionTokens: 15,
+		TotalTokens:      350,
+		PromptTokensDetails: dto.InputTokenDetails{
+			CachedTokens: 2492,
+		},
+		UsageSemantic: "anthropic",
+	})
+
+	require.Nil(t, result)
+	require.Equal(t, 989, quota)
+}
+
 func TestBuildTestLogOtherInjectsTieredInfo(t *testing.T) {
 	gin.SetMode(gin.TestMode)
 	ctx, _ := gin.CreateTestContext(httptest.NewRecorder())
diff --git a/service/text_quota.go b/service/text_quota.go
index 3f344dc3e57..2810d743a19 100644
--- a/service/text_quota.go
+++ b/service/text_quota.go
@@ -81,6 +81,14 @@ func isLegacyClaudeDerivedOpenAIUsage(relayInfo *relaycommon.RelayInfo, usage *d
 	return usage.ClaudeCacheCreation5mTokens > 0 || usage.ClaudeCacheCreation1hTokens > 0
 }
 
+func usageLooksLikeSeparatedInputCache(usage *dto.Usage) bool {
+	if usage == nil {
+		return false
+	}
+	return usage.PromptTokensDetails.CachedTokens > usage.PromptTokens ||
+		usage.PromptTokensDetails.CachedCreationTokens > usage.PromptTokens
+}
+
 func calculateTextToolCallSurcharge(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, summary *textQuotaSummary) decimal.Decimal {
 	dGroupRatio := decimal.NewFromFloat(summary.GroupRatio)
 	dQuotaPerUnit := decimal.NewFromFloat(common.QuotaPerUnit)
@@ -309,6 +317,12 @@ func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInf
 	return summary
 }
 
+// CalculateTextQuotaForUsage returns the same text quota used by normal relay settlement.
+// It is intended for callers that need the quota value without writing consume logs.
+func CalculateTextQuotaForUsage(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage) int {
+	return calculateTextQuotaSummary(ctx, relayInfo, usage).Quota
+}
+
 func usageSemanticFromUsage(relayInfo *relaycommon.RelayInfo, usage *dto.Usage) string {
 	if usage != nil && usage.UsageSemantic != "" {
 		return usage.UsageSemantic
@@ -316,6 +330,9 @@ func usageSemanticFromUsage(relayInfo *relaycommon.RelayInfo, usage *dto.Usage)
 	if relayInfo != nil && relayInfo.GetFinalRequestRelayFormat() == types.RelayFormatClaude {
 		return "anthropic"
 	}
+	if usageLooksLikeSeparatedInputCache(usage) {
+		return "anthropic"
+	}
 	return "openai"
 }
 
diff --git a/service/text_quota_test.go b/service/text_quota_test.go
index 37ce1877482..c109c8edee7 100644
--- a/service/text_quota_test.go
+++ b/service/text_quota_test.go
@@ -148,6 +148,39 @@ func TestCalculateTextQuotaSummaryUsesAnthropicUsageSemanticFromUpstreamUsage(t
 	require.Equal(t, 1488, summary.Quota)
 }
 
+func TestCalculateTextQuotaSummaryInfersSeparatedCacheReadUsage(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	w := httptest.NewRecorder()
+	ctx, _ := gin.CreateTestContext(w)
+
+	relayInfo := &relaycommon.RelayInfo{
+		RelayFormat:     types.RelayFormatOpenAI,
+		OriginModelName: "gpt-compatible-claude",
+		PriceData: types.PriceData{
+			ModelRatio:      1.5,
+			CompletionRatio: 5,
+			CacheRatio:      0.1,
+			GroupRatioInfo:  types.GroupRatioInfo{GroupRatio: 1},
+		},
+		StartTime: time.Now(),
+	}
+
+	usage := &dto.Usage{
+		PromptTokens:     335,
+		CompletionTokens: 15,
+		PromptTokensDetails: dto.InputTokenDetails{
+			CachedTokens: 2492,
+		},
+	}
+
+	summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
+
+	require.True(t, summary.IsClaudeUsageSemantic)
+	require.Equal(t, "anthropic", summary.UsageSemantic)
+	require.Equal(t, 335, summary.PromptTokens)
+	require.Equal(t, 989, summary.Quota)
+}
+
 func TestCacheWriteTokensTotal(t *testing.T) {
 	t.Run("split cache creation", func(t *testing.T) {
 		summary := textQuotaSummary{