From 97706870ed05d8052b2db488f2b2629684df8435 Mon Sep 17 00:00:00 2001 From: Danielle Maywood Date: Fri, 7 Nov 2025 03:00:27 +0000 Subject: [PATCH 1/4] Fix cpu.max file not found error in init.scope cgroup On systems running RKE2 with sysbox where /proc/self/cgroup reports 0::/init.scope, the code was failing because /sys/fs/cgroup/init.scope/cpu.max doesn't exist. The error occurred because cpuUsed() and cpuTotal() tried to read the CPU period directly from the missing file without falling back to parent cgroups. Changes: - Added cpuPeriod() helper method that follows the same pattern as cpuQuota() - The new method handles fs.ErrNotExist and strconv.ErrSyntax errors - Falls back to parent cgroup when cpu.max is missing (common in system-level cgroups like init.scope) - Updated cpuUsed() to use cpuPeriod() instead of directly reading - Updated cpuTotal() to use cpuPeriod() instead of directly reading - Added test case fsContainerCgroupV2InitScope to verify the fix The fix ensures consistent parent fallback behavior across all values read from cpu.max (quota, period, and usage calculations). Fixes issue where systems report: /sys/fs/cgroup/init.scope/cpu.max file does not exist --- cgroupv2.go | 35 ++++++++++++++++++++++++++++++----- stat_internal_test.go | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 5 deletions(-) diff --git a/cgroupv2.go b/cgroupv2.go index a9ae957..de7daf9 100644 --- a/cgroupv2.go +++ b/cgroupv2.go @@ -66,18 +66,17 @@ func newCgroupV2Statter(fs afero.Fs, path string, depth int) (*cgroupV2Statter, func (s cgroupV2Statter) cpuUsed() (used float64, err error) { cpuStatPath := filepath.Join(s.path, cgroupV2CPUStat) - cpuMaxPath := filepath.Join(s.path, cgroupV2CPUMax) usageUs, err := readInt64Prefix(s.fs, cpuStatPath, "usage_usec") if err != nil { return 0, xerrors.Errorf("get cgroupv2 cpu used: %w", err) } - periodUs, err := readInt64SepIdx(s.fs, cpuMaxPath, " ", 1) + periodUs, err := s.cpuPeriod() if err != nil { return 0, xerrors.Errorf("get cpu period: %w", err) } - return float64(usageUs) / float64(periodUs), nil + return float64(usageUs) / periodUs, nil } func (s cgroupV2Statter) cpuQuota() (float64, error) { @@ -106,10 +105,36 @@ func (s cgroupV2Statter) cpuQuota() (float64, error) { return float64(quotaUs), nil } -func (s cgroupV2Statter) cpuTotal() (total float64, err error) { +func (s cgroupV2Statter) cpuPeriod() (float64, error) { cpuMaxPath := filepath.Join(s.path, cgroupV2CPUMax) periodUs, err := readInt64SepIdx(s.fs, cpuMaxPath, " ", 1) + if err != nil { + if !errors.Is(err, strconv.ErrSyntax) && !errors.Is(err, fs.ErrNotExist) { + return 0, xerrors.Errorf("get cpu period: %w", err) + } + + // If the value is not a valid integer or the cpu.max file does + // not exist, we call the parent to find its period. This can happen + // in system-level cgroups like init.scope where cpu.max may not exist. + if s.parent != nil { + period, err := s.parent.cpuPeriod() + if err != nil { + return 0, xerrors.Errorf("get parent cpu period: %w", err) + } + return period, nil + } + + // No parent and no period found. This should not happen in a properly + // configured cgroup hierarchy, but return an error to avoid division by zero. + return 0, xerrors.Errorf("cpu period not found in cgroup hierarchy") + } + + return float64(periodUs), nil +} + +func (s cgroupV2Statter) cpuTotal() (total float64, err error) { + periodUs, err := s.cpuPeriod() if err != nil { return 0, xerrors.Errorf("get cpu period: %w", err) } @@ -119,7 +144,7 @@ func (s cgroupV2Statter) cpuTotal() (total float64, err error) { return 0, xerrors.Errorf("get cpu quota: %w", err) } - return float64(quotaUs) / float64(periodUs), nil + return quotaUs / periodUs, nil } func (s cgroupV2Statter) memoryMaxBytes() (*float64, error) { diff --git a/stat_internal_test.go b/stat_internal_test.go index 8bc9a82..07ea639 100644 --- a/stat_internal_test.go +++ b/stat_internal_test.go @@ -396,6 +396,28 @@ func TestStatter(t *testing.T) { assert.Equal(t, "cores", cpu.Unit) }) + t.Run("CPU/InitScopeFallback", func(t *testing.T) { + t.Parallel() + + // Test RKE2/sysbox scenario where /init.scope cgroup doesn't have + // cpu.max but the root cgroup does. The period should be read from + // the parent (root) cgroup. + fs := initFS(t, fsContainerCgroupV2InitScope) + fakeWait := func(time.Duration) { + mungeFS(t, fs, filepath.Join(cgroupRootPath, "init.scope", cgroupV2CPUStat), "usage_usec 100000") + } + s, err := New(WithFS(fs), withWait(fakeWait), withIsCgroupV2(true)) + require.NoError(t, err) + + cpu, err := s.ContainerCPU() + require.NoError(t, err) + + require.NotNil(t, cpu) + assert.Equal(t, 1.0, cpu.Used) + require.Nil(t, cpu.Total) // quota is "max" so no limit + assert.Equal(t, "cores", cpu.Unit) + }) + t.Run("Memory/Limit", func(t *testing.T) { t.Parallel() @@ -727,6 +749,24 @@ proc /proc/sys proc ro,nosuid,nodev,noexec,relatime 0 0`, filepath.Join(cgroupRootPath, fsContainerCgroupV2KubernetesPath, cgroupV2MemoryStat): "inactive_file 268435456", filepath.Join(cgroupRootPath, fsContainerCgroupV2KubernetesPath, cgroupV2MemoryUsageBytes): "536870912", } + // fsContainerCgroupV2InitScope simulates RKE2/sysbox environment where + // the cgroup path is /init.scope and cpu.max does not exist at that level + // but does exist at the root cgroup. This tests the parent fallback logic. + fsContainerCgroupV2InitScope = map[string]string{ + procOneCgroup: "0::/", + procSelfCgroup: "0::/init.scope", + procMounts: `overlay / overlay rw,relatime,lowerdir=/some/path:/some/path,upperdir=/some/path:/some/path,workdir=/some/path:/some/path 0 0 +proc /proc/sys proc ro,nosuid,nodev,noexec,relatime 0 0 +sysboxfs /proc/sys sysboxfs rw,nosuid,nodev,noexec,relatime 0 0`, + sysCgroupType: "domain", + + // cpu.max purposefully missing at /init.scope level + filepath.Join(cgroupRootPath, cgroupV2CPUMax): "max 100000", + filepath.Join(cgroupRootPath, "init.scope", cgroupV2CPUStat): "usage_usec 0", + filepath.Join(cgroupRootPath, "init.scope", cgroupV2MemoryMaxBytes): "max", + filepath.Join(cgroupRootPath, "init.scope", cgroupV2MemoryStat): "inactive_file 268435456", + filepath.Join(cgroupRootPath, "init.scope", cgroupV2MemoryUsageBytes): "536870912", + } fsContainerCgroupV1 = map[string]string{ procOneCgroup: "0::/docker/aa86ac98959eeedeae0ecb6e0c9ddd8ae8b97a9d0fdccccf7ea7a474f4e0bb1f", procSelfCgroup: "0::/docker/aa86ac98959eeedeae0ecb6e0c9ddd8ae8b97a9d0fdccccf7ea7a474f4e0bb1f", From 8e6b24c41b7ef76f880f20f920fa663221f782a7 Mon Sep 17 00:00:00 2001 From: Danielle Maywood Date: Fri, 7 Nov 2025 03:12:07 +0000 Subject: [PATCH 2/4] Use default CPU period per kernel docs instead of error Per Linux kernel documentation, cpu.max has a default value of "max 100000" where 100000 microseconds (100ms) is the default period. The file exists on non-root cgroups, but in practice may be missing in certain scenarios. When cpu.max cannot be found anywhere in the cgroup hierarchy, instead of returning an error, we now use the documented default period of 100000 microseconds. This makes the code more robust and aligns with kernel behavior. Changes: - Added cgroupV2DefaultPeriodUs constant (100000) - Updated cpuPeriod() to return default period instead of error when no parent exists - Added test fixture fsContainerCgroupV2InitScopeNoCPUMax - Added test case CPU/InitScopeDefaultPeriod to verify default period behavior Ref: https://docs.kernel.org/admin-guide/cgroup-v2.html#cpu-interface-files --- cgroupv2.go | 11 ++++++++--- stat_internal_test.go | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/cgroupv2.go b/cgroupv2.go index de7daf9..17d9b20 100644 --- a/cgroupv2.go +++ b/cgroupv2.go @@ -25,6 +25,10 @@ const ( // Other memory stats - we are interested in total_inactive_file cgroupV2MemoryStat = "memory.stat" + // Default period for cpu.max as documented in the kernel docs. + // The default is "max 100000" where 100000 is 100ms in microseconds. + cgroupV2DefaultPeriodUs = 100000 + // What is the maximum cgroup depth we support? // We only expect to see a depth of around 3-4 at max, but we // allow 10 to give us some headroom. If this limit is reached @@ -125,9 +129,10 @@ func (s cgroupV2Statter) cpuPeriod() (float64, error) { return period, nil } - // No parent and no period found. This should not happen in a properly - // configured cgroup hierarchy, but return an error to avoid division by zero. - return 0, xerrors.Errorf("cpu period not found in cgroup hierarchy") + // No parent and no period found in the cgroup hierarchy. + // Per kernel docs, the default period is 100000 microseconds (100ms). + // Ref: https://docs.kernel.org/admin-guide/cgroup-v2.html#cpu-interface-files + return cgroupV2DefaultPeriodUs, nil } return float64(periodUs), nil diff --git a/stat_internal_test.go b/stat_internal_test.go index 07ea639..745fa29 100644 --- a/stat_internal_test.go +++ b/stat_internal_test.go @@ -418,6 +418,28 @@ func TestStatter(t *testing.T) { assert.Equal(t, "cores", cpu.Unit) }) + t.Run("CPU/InitScopeDefaultPeriod", func(t *testing.T) { + t.Parallel() + + // Test scenario where cpu.max doesn't exist at any level in the + // hierarchy. Per kernel docs, the default period is 100000us (100ms). + fs := initFS(t, fsContainerCgroupV2InitScopeNoCPUMax) + fakeWait := func(time.Duration) { + mungeFS(t, fs, filepath.Join(cgroupRootPath, "init.scope", cgroupV2CPUStat), "usage_usec 100000") + } + s, err := New(WithFS(fs), withWait(fakeWait), withIsCgroupV2(true)) + require.NoError(t, err) + + cpu, err := s.ContainerCPU() + require.NoError(t, err) + + require.NotNil(t, cpu) + // With default period of 100000us, usage_usec 100000 = 1.0 core + assert.Equal(t, 1.0, cpu.Used) + require.Nil(t, cpu.Total) // no limit anywhere + assert.Equal(t, "cores", cpu.Unit) + }) + t.Run("Memory/Limit", func(t *testing.T) { t.Parallel() @@ -767,6 +789,22 @@ sysboxfs /proc/sys sysboxfs rw,nosuid,nodev,noexec,relatime 0 0`, filepath.Join(cgroupRootPath, "init.scope", cgroupV2MemoryStat): "inactive_file 268435456", filepath.Join(cgroupRootPath, "init.scope", cgroupV2MemoryUsageBytes): "536870912", } + // fsContainerCgroupV2InitScopeNoCPUMax simulates a scenario where cpu.max + // doesn't exist at any level in the hierarchy. Tests the default period fallback. + fsContainerCgroupV2InitScopeNoCPUMax = map[string]string{ + procOneCgroup: "0::/", + procSelfCgroup: "0::/init.scope", + procMounts: `overlay / overlay rw,relatime,lowerdir=/some/path:/some/path,upperdir=/some/path:/some/path,workdir=/some/path:/some/path 0 0 +proc /proc/sys proc ro,nosuid,nodev,noexec,relatime 0 0 +sysboxfs /proc/sys sysboxfs rw,nosuid,nodev,noexec,relatime 0 0`, + sysCgroupType: "domain", + + // cpu.max purposefully missing at all levels to test default period + filepath.Join(cgroupRootPath, "init.scope", cgroupV2CPUStat): "usage_usec 0", + filepath.Join(cgroupRootPath, "init.scope", cgroupV2MemoryMaxBytes): "max", + filepath.Join(cgroupRootPath, "init.scope", cgroupV2MemoryStat): "inactive_file 268435456", + filepath.Join(cgroupRootPath, "init.scope", cgroupV2MemoryUsageBytes): "536870912", + } fsContainerCgroupV1 = map[string]string{ procOneCgroup: "0::/docker/aa86ac98959eeedeae0ecb6e0c9ddd8ae8b97a9d0fdccccf7ea7a474f4e0bb1f", procSelfCgroup: "0::/docker/aa86ac98959eeedeae0ecb6e0c9ddd8ae8b97a9d0fdccccf7ea7a474f4e0bb1f", From d558b1ad994bfe4125d1b585302a5a8e5a429e08 Mon Sep 17 00:00:00 2001 From: Danielle Maywood Date: Fri, 7 Nov 2025 03:16:31 +0000 Subject: [PATCH 3/4] tidy up a little --- cgroupv2.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cgroupv2.go b/cgroupv2.go index 17d9b20..28aab9f 100644 --- a/cgroupv2.go +++ b/cgroupv2.go @@ -26,7 +26,7 @@ const ( cgroupV2MemoryStat = "memory.stat" // Default period for cpu.max as documented in the kernel docs. - // The default is "max 100000" where 100000 is 100ms in microseconds. + // The default is "max 100000". cgroupV2DefaultPeriodUs = 100000 // What is the maximum cgroup depth we support? @@ -131,7 +131,7 @@ func (s cgroupV2Statter) cpuPeriod() (float64, error) { // No parent and no period found in the cgroup hierarchy. // Per kernel docs, the default period is 100000 microseconds (100ms). - // Ref: https://docs.kernel.org/admin-guide/cgroup-v2.html#cpu-interface-files + // Ref: https://docs.kernel.org/6.17/admin-guide/cgroup-v2.html#cpu-interface-files return cgroupV2DefaultPeriodUs, nil } From 18e34e3928a2410f01aebefbd739309b072d8e2d Mon Sep 17 00:00:00 2001 From: Danielle Maywood Date: Fri, 7 Nov 2025 13:55:42 +0000 Subject: [PATCH 4/4] Feedback --- cgroupv2.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cgroupv2.go b/cgroupv2.go index 28aab9f..40e18fd 100644 --- a/cgroupv2.go +++ b/cgroupv2.go @@ -26,7 +26,8 @@ const ( cgroupV2MemoryStat = "memory.stat" // Default period for cpu.max as documented in the kernel docs. - // The default is "max 100000". + // The default period is 100000 microseconds (100ms). + // Ref: https://docs.kernel.org/6.17/admin-guide/cgroup-v2.html#cpu-interface-files cgroupV2DefaultPeriodUs = 100000 // What is the maximum cgroup depth we support? @@ -130,8 +131,6 @@ func (s cgroupV2Statter) cpuPeriod() (float64, error) { } // No parent and no period found in the cgroup hierarchy. - // Per kernel docs, the default period is 100000 microseconds (100ms). - // Ref: https://docs.kernel.org/6.17/admin-guide/cgroup-v2.html#cpu-interface-files return cgroupV2DefaultPeriodUs, nil }