diff --git a/container/libcontainer/handler.go b/container/libcontainer/handler.go index 613e8260af..fa5b6f05e8 100644 --- a/container/libcontainer/handler.go +++ b/container/libcontainer/handler.go @@ -771,6 +771,8 @@ func setCPUStats(s *cgroups.Stats, ret *info.ContainerStats, withPerCPU bool) { ret.Cpu.CFS.Periods = s.CpuStats.ThrottlingData.Periods ret.Cpu.CFS.ThrottledPeriods = s.CpuStats.ThrottlingData.ThrottledPeriods ret.Cpu.CFS.ThrottledTime = s.CpuStats.ThrottlingData.ThrottledTime + ret.Cpu.CFS.BurstsPeriods = s.CpuStats.BurstData.BurstsPeriods + ret.Cpu.CFS.BurstTime = s.CpuStats.BurstData.BurstTime setPSIStats(s.CpuStats.PSI, &ret.Cpu.PSI) if !withPerCPU { diff --git a/info/v1/container.go b/info/v1/container.go index cd365fc785..3d2d282135 100644 --- a/info/v1/container.go +++ b/info/v1/container.go @@ -329,6 +329,13 @@ type CpuCFS struct { // Total time duration for which tasks in the cgroup have been throttled. // Unit: nanoseconds. ThrottledTime uint64 `json:"throttled_time"` + + // Total number of periods when CPU burst occurs. + BurstsPeriods uint64 `json:"bursts_periods"` + + // Total time duration when CPU burst occurs. + // Unit: nanoseconds. + BurstTime uint64 `json:"burst_time"` } // Cpu Aggregated scheduler statistics diff --git a/metrics/prometheus.go b/metrics/prometheus.go index bf8815a0d0..3702dd79cb 100644 --- a/metrics/prometheus.go +++ b/metrics/prometheus.go @@ -234,6 +234,30 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri timestamp: s.Timestamp, }} }, + }, { + name: "container_cpu_cfs_burst_periods_total", + help: "Number of periods when burst occurs.", + valueType: prometheus.CounterValue, + condition: func(s info.ContainerSpec) bool { return s.Cpu.Quota != 0 }, + getValues: func(s *info.ContainerStats) metricValues { + return metricValues{ + { + value: float64(s.Cpu.CFS.BurstsPeriods), + timestamp: s.Timestamp, + }} + }, + }, { + name: "container_cpu_cfs_burst_seconds_total", + help: "Total time duration the container has been bursted.", + valueType: prometheus.CounterValue, + condition: func(s info.ContainerSpec) bool { return s.Cpu.Quota != 0 }, + getValues: func(s *info.ContainerStats) metricValues { + return metricValues{ + { + value: float64(s.Cpu.CFS.BurstTime) / float64(time.Second), + timestamp: s.Timestamp, + }} + }, }, }...) } diff --git a/metrics/prometheus_fake.go b/metrics/prometheus_fake.go index f6bb281a47..ab48cce860 100644 --- a/metrics/prometheus_fake.go +++ b/metrics/prometheus_fake.go @@ -320,6 +320,8 @@ func (p testSubcontainersInfoProvider) GetRequestedContainersInfo(string, v2.Req Periods: 723, ThrottledPeriods: 18, ThrottledTime: 1724314000, + BurstsPeriods: 25, + BurstTime: 500000000, }, Schedstat: info.CpuSchedstat{ RunTime: 53643567, diff --git a/metrics/prometheus_test.go b/metrics/prometheus_test.go index 8379c8e078..928fbf1674 100644 --- a/metrics/prometheus_test.go +++ b/metrics/prometheus_test.go @@ -428,3 +428,39 @@ func TestIOCostMetrics(t *testing.T) { }) } } + +func TestCPUBurstMetrics(t *testing.T) { + containerStats := &info.ContainerStats{ + Timestamp: time.Unix(1395066363, 0), + Cpu: info.CpuStats{ + CFS: info.CpuCFS{ + BurstsPeriods: 25, + BurstTime: 500000000, + }, + }, + } + + testCases := []struct { + name string + getValue func() float64 + expectedValue float64 + }{ + { + name: "BurstsPeriods", + getValue: func() float64 { return float64(containerStats.Cpu.CFS.BurstsPeriods) }, + expectedValue: 25.0, + }, + { + name: "BurstTime", + getValue: func() float64 { return float64(containerStats.Cpu.CFS.BurstTime) / float64(time.Second) }, + expectedValue: 0.5, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := tc.getValue() + assert.Equal(t, tc.expectedValue, result) + }) + } +} diff --git a/metrics/testdata/prometheus_metrics b/metrics/testdata/prometheus_metrics index 62f05cea40..b29a01a177 100644 --- a/metrics/testdata/prometheus_metrics +++ b/metrics/testdata/prometheus_metrics @@ -18,6 +18,12 @@ container_cpu_cfs_throttled_periods_total{container_env_foo_env="prod",container # HELP container_cpu_cfs_throttled_seconds_total Total time duration the container has been throttled. # TYPE container_cpu_cfs_throttled_seconds_total counter container_cpu_cfs_throttled_seconds_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1.724314 1395066363000 +# HELP container_cpu_cfs_burst_periods_total Number of periods when burst occurs. +# TYPE container_cpu_cfs_burst_periods_total counter +container_cpu_cfs_burst_periods_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 25 1395066363000 +# HELP container_cpu_cfs_burst_seconds_total Total time duration the container has been bursted. +# TYPE container_cpu_cfs_burst_seconds_total counter +container_cpu_cfs_burst_seconds_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.5 1395066363000 # HELP container_cpu_load_average_10s Value of container cpu load average over the last 10 seconds. # TYPE container_cpu_load_average_10s gauge container_cpu_load_average_10s{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 2 1395066363000 diff --git a/metrics/testdata/prometheus_metrics_whitelist_filtered b/metrics/testdata/prometheus_metrics_whitelist_filtered index 0bd2589c9d..0bab1a7dcf 100644 --- a/metrics/testdata/prometheus_metrics_whitelist_filtered +++ b/metrics/testdata/prometheus_metrics_whitelist_filtered @@ -18,6 +18,12 @@ container_cpu_cfs_throttled_periods_total{container_env_foo_env="prod",id="testc # HELP container_cpu_cfs_throttled_seconds_total Total time duration the container has been throttled. # TYPE container_cpu_cfs_throttled_seconds_total counter container_cpu_cfs_throttled_seconds_total{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1.724314 1395066363000 +# HELP container_cpu_cfs_burst_periods_total Number of periods when burst occurs. +# TYPE container_cpu_cfs_burst_periods_total counter +container_cpu_cfs_burst_periods_total{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 25 1395066363000 +# HELP container_cpu_cfs_burst_seconds_total Total time duration the container has been bursted. +# TYPE container_cpu_cfs_burst_seconds_total counter +container_cpu_cfs_burst_seconds_total{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.5 1395066363000 # HELP container_cpu_load_average_10s Value of container cpu load average over the last 10 seconds. # TYPE container_cpu_load_average_10s gauge container_cpu_load_average_10s{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 2 1395066363000