client: add pre-throttling demand RU/s metric

JmPotato · JmPotato · commit 5933f85ced40 · 2026-04-08T17:29:18.000+08:00
Add a new client-side Prometheus Gauge `resource_manager_client_resource_group_demand_ru_per_sec` that tracks the EMA of demanded RU/s before Resource Control throttling takes effect. The existing `avgRUPerSec` is based on post-throttling consumption: when a request is rejected by the token bucket, its RU cost is subtracted from the consumption counter. This means the consumption-based EMA underreports the true workload demand when the resource group is actively throttled. The new demand metric samples RU cost at every `onRequestWaitImpl`, `onResponseImpl`, `onResponseWaitImpl`, and `addRUConsumption` entry point, accumulating into a monotonically increasing `demandRUTotal` counter that is never subtracted on throttle failure. A demand EMA is then computed using the same `movingAvgFactor` as the consumption EMA and flushed to the Gauge on each `updateAvgRequestResourcePerSec` tick. This enables operators to: - See per-instance RU demand in Grafana (natural `instance` label). - Aggregate cluster-wide demand via `sum by (resource_group)`. - Identify the true workload peak via `max_over_time(...)`. Close #10581 Signed-off-by: JmPotato <ghzpotato@gmail.com> Signed-off-by: JmPotato <github@ipotato.me>
diff --git a/client/resource_group/controller/group_controller.go b/client/resource_group/controller/group_controller.go
@@ -51,6 +51,9 @@ type groupCostController struct {
 		consumption   *rmpb.Consumption
 		storeCounter  map[uint64]*rmpb.Consumption
 		globalCounter *rmpb.Consumption
+		// demandRUTotal accumulates total demanded RU (pre-throttling).
+		// Unlike consumption, this is never subtracted on throttle failure.
+		demandRUTotal float64
 	}
 
 	// fast path to make once token limit with un-limit burst.
@@ -75,6 +78,9 @@ type groupCostController struct {
 		// last update.
 		targetPeriod time.Duration
 
+		// demandRUTotal is a snapshot of mu.demandRUTotal, copied in updateRunState.
+		demandRUTotal float64
+
 		// consumptions stores the last value of mu.consumption.
 		// requestUnitConsumptions []*rmpb.RequestUnitItem
 		// resourceConsumptions    []*rmpb.ResourceItem
@@ -106,6 +112,7 @@ type groupMetricsCollection struct {
 	tokenRequestCounter               prometheus.Counter
 	runningKVRequestCounter           prometheus.Gauge
 	consumeTokenHistogram             prometheus.Observer
+	demandRUPerSecGauge               prometheus.Gauge
 }
 
 func initMetrics(oldName, name string) *groupMetricsCollection {
@@ -122,6 +129,7 @@ func initMetrics(oldName, name string) *groupMetricsCollection {
 		tokenRequestCounter:               metrics.ResourceGroupTokenRequestCounter.WithLabelValues(oldName, name),
 		runningKVRequestCounter:           metrics.GroupRunningKVRequestCounter.WithLabelValues(name),
 		consumeTokenHistogram:             metrics.TokenConsumedHistogram.WithLabelValues(name),
+		demandRUPerSecGauge:               metrics.DemandRUPerSecGauge.WithLabelValues(name),
 	}
 }
 
@@ -136,6 +144,12 @@ type tokenCounter struct {
 	avgRUPerSecLastRU float64
 	avgLastTime       time.Time
 
+	// avgDemandRUPerSec is an EMA of the demanded RU/s before throttling,
+	// reflecting the true workload demand regardless of token bucket limits.
+	avgDemandRUPerSec       float64
+	avgDemandRUPerSecLastRU float64
+	avgDemandLastTime       time.Time
+
 	notify struct {
 		mu                         sync.Mutex
 		setupNotificationCh        <-chan time.Time
@@ -220,10 +234,11 @@ func (gc *groupCostController) initRunState() {
 	defer gc.metaLock.RUnlock()
 	limiter := NewLimiterWithCfg(gc.name, now, cfgFunc(gc.meta.RUSettings.RU), gc.lowRUNotifyChan)
 	counter := &tokenCounter{
-		limiter:     limiter,
-		avgRUPerSec: 0,
-		avgLastTime: now,
-		fillRate:    gc.meta.RUSettings.RU.Settings.FillRate,
+		limiter:           limiter,
+		avgRUPerSec:       0,
+		avgLastTime:       now,
+		avgDemandLastTime: now,
+		fillRate:          gc.meta.RUSettings.RU.Settings.FillRate,
 	}
 	gc.run.requestUnitTokens = counter
 	gc.burstable.Store(isBurstable)
@@ -257,6 +272,7 @@ func (gc *groupCostController) updateRunState() {
 		calc.Trickle(gc.mu.consumption)
 	}
 	*gc.run.consumption = *gc.mu.consumption
+	gc.run.demandRUTotal = gc.mu.demandRUTotal
 	gc.mu.Unlock()
 	logControllerTrace("[resource group controller] update run state", zap.String("name", gc.name), zap.Any("request-unit-consumption", gc.run.consumption), zap.Bool("is-throttled", gc.isThrottled.Load()))
 	gc.run.now = newTime
@@ -271,7 +287,9 @@ func (gc *groupCostController) updateAvgRequestResourcePerSec() {
 	if !gc.calcAvg(counter, getRUValueFromConsumption(gc.run.consumption)) {
 		return
 	}
-	logControllerTrace("[resource group controller] update avg ru per sec", zap.String("name", gc.name), zap.Float64("avg-ru-per-sec", counter.avgRUPerSec), zap.Bool("is-throttled", gc.isThrottled.Load()))
+	gc.calcDemandAvg(counter, gc.run.demandRUTotal)
+	gc.metrics.demandRUPerSecGauge.Set(counter.avgDemandRUPerSec)
+	logControllerTrace("[resource group controller] update avg ru per sec", zap.String("name", gc.name), zap.Float64("avg-ru-per-sec", counter.avgRUPerSec), zap.Float64("avg-demand-ru-per-sec", counter.avgDemandRUPerSec), zap.Bool("is-throttled", gc.isThrottled.Load()))
 	gc.burstable.Store(isBurstable)
 }
 
@@ -319,6 +337,20 @@ func (gc *groupCostController) calcAvg(counter *tokenCounter, new float64) bool
 	return true
 }
 
+func (gc *groupCostController) calcDemandAvg(counter *tokenCounter, new float64) {
+	deltaDuration := gc.run.now.Sub(counter.avgDemandLastTime)
+	if deltaDuration <= 0 {
+		return
+	}
+	delta := (new - counter.avgDemandRUPerSecLastRU) / deltaDuration.Seconds()
+	counter.avgDemandRUPerSec = movingAvgFactor*counter.avgDemandRUPerSec + (1-movingAvgFactor)*delta
+	if counter.avgDemandRUPerSec < 0 {
+		counter.avgDemandRUPerSec = 0
+	}
+	counter.avgDemandLastTime = gc.run.now
+	counter.avgDemandRUPerSecLastRU = new
+}
+
 func (gc *groupCostController) shouldReportConsumption() bool {
 	if !gc.run.initialRequestCompleted {
 		return true
@@ -554,6 +586,7 @@ func (gc *groupCostController) onRequestWaitImpl(
 
 	gc.mu.Lock()
 	add(gc.mu.consumption, delta)
+	gc.mu.demandRUTotal += getRUValueFromConsumption(delta)
 	gc.mu.Unlock()
 
 	if !gc.burstable.Load() {
@@ -611,6 +644,8 @@ func (gc *groupCostController) onResponseImpl(
 	gc.mu.Lock()
 	// Record the consumption of the request
 	add(gc.mu.consumption, delta)
+	// Record the response-phase demand as well (actual read bytes, CPU, etc.)
+	gc.mu.demandRUTotal += getRUValueFromConsumption(delta)
 	// Record the consumption of the request by store
 	count := &rmpb.Consumption{}
 	*count = *delta
@@ -652,6 +687,7 @@ func (gc *groupCostController) onResponseWaitImpl(
 	gc.mu.Lock()
 	// Record the consumption of the request
 	add(gc.mu.consumption, delta)
+	gc.mu.demandRUTotal += getRUValueFromConsumption(delta)
 	// Record the consumption of the request by store
 	count := &rmpb.Consumption{}
 	*count = *delta
@@ -669,6 +705,7 @@ func (gc *groupCostController) onResponseWaitImpl(
 func (gc *groupCostController) addRUConsumption(consumption *rmpb.Consumption) {
 	gc.mu.Lock()
 	add(gc.mu.consumption, consumption)
+	gc.mu.demandRUTotal += getRUValueFromConsumption(consumption)
 	gc.mu.Unlock()
 }
 
diff --git a/client/resource_group/controller/group_controller_test.go b/client/resource_group/controller/group_controller_test.go
@@ -315,3 +315,64 @@ func TestAcquireTokensFallbackToTimer(t *testing.T) {
 	// waitDuration should be roughly retryTimes * retryInterval.
 	re.GreaterOrEqual(waitDuration, gc.mainCfg.WaitRetryInterval*time.Duration(gc.mainCfg.WaitRetryTimes))
 }
+
+func TestDemandRUTracking(t *testing.T) {
+	re := require.New(t)
+	gc := createTestGroupCostController(re)
+
+	// Simulate requests arriving: demand should accumulate regardless of throttling.
+	req := &TestRequestInfo{
+		isWrite:    true,
+		writeBytes: 100,
+	}
+	resp := &TestResponseInfo{
+		readBytes: 100,
+		succeed:   true,
+	}
+
+	// Issue several successful requests.
+	for range 5 {
+		consumption, _, _, _, err := gc.onRequestWaitImpl(context.TODO(), req)
+		re.NoError(err)
+		re.NotNil(consumption)
+		_, err = gc.onResponseImpl(req, resp)
+		re.NoError(err)
+	}
+
+	// demandRUTotal should have accumulated all pre-request and post-response RU.
+	gc.mu.Lock()
+	demandTotal := gc.mu.demandRUTotal
+	gc.mu.Unlock()
+	re.Positive(demandTotal, "demand should be accumulated after requests")
+
+	// Now issue a request that gets throttled (rejected).
+	bigReq := &TestRequestInfo{
+		isWrite:    true,
+		writeBytes: 10000000,
+	}
+	_, _, _, _, err := gc.onRequestWaitImpl(context.TODO(), bigReq)
+	re.Error(err)
+	re.True(errs.ErrClientResourceGroupThrottled.Equal(err))
+
+	// demandRUTotal should still include the throttled request's RU.
+	gc.mu.Lock()
+	demandAfterThrottle := gc.mu.demandRUTotal
+	gc.mu.Unlock()
+	re.Greater(demandAfterThrottle, demandTotal,
+		"demand should increase even for throttled requests")
+
+	// Verify that the demand EMA is computed correctly.
+	now := time.Now()
+	gc.run.now = now
+	gc.updateRunState()
+	gc.updateAvgRequestResourcePerSec()
+
+	// Advance time and update again so the EMA has two data points.
+	gc.run.now = now.Add(time.Second)
+	gc.updateRunState()
+	gc.updateAvgRequestResourcePerSec()
+
+	counter := gc.run.requestUnitTokens
+	re.GreaterOrEqual(counter.avgDemandRUPerSec, 0.0,
+		"demand EMA should be non-negative")
+}
diff --git a/client/resource_group/controller/metrics/metrics.go b/client/resource_group/controller/metrics/metrics.go
@@ -31,6 +31,8 @@ const (
 var (
 	// ResourceGroupStatusGauge comments placeholder
 	ResourceGroupStatusGauge *prometheus.GaugeVec
+	// DemandRUPerSecGauge is the EMA of demanded RU/s before throttling per resource group.
+	DemandRUPerSecGauge *prometheus.GaugeVec
 	// SuccessfulRequestDuration comments placeholder
 	SuccessfulRequestDuration *prometheus.HistogramVec
 	// FailedLimitReserveDuration comments placeholder
@@ -69,6 +71,15 @@ func initMetrics(constLabels prometheus.Labels) {
 			ConstLabels: constLabels,
 		}, []string{resourceGroupNameLabel, newResourceGroupNameLabel})
 
+	DemandRUPerSecGauge = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Namespace:   namespace,
+			Subsystem:   "resource_group",
+			Name:        "demand_ru_per_sec",
+			Help:        "EMA of demanded RU/s before throttling for each resource group.",
+			ConstLabels: constLabels,
+		}, []string{newResourceGroupNameLabel})
+
 	SuccessfulRequestDuration = prometheus.NewHistogramVec(
 		prometheus.HistogramOpts{
 			Namespace:   namespace,
@@ -162,6 +173,7 @@ func initMetrics(constLabels prometheus.Labels) {
 func InitAndRegisterMetrics(constLabels prometheus.Labels) {
 	initMetrics(constLabels)
 	prometheus.MustRegister(ResourceGroupStatusGauge)
+	prometheus.MustRegister(DemandRUPerSecGauge)
 	prometheus.MustRegister(SuccessfulRequestDuration)
 	prometheus.MustRegister(FailedRequestCounter)
 	prometheus.MustRegister(FailedLimitReserveDuration)