Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 39 additions & 10 deletions collector/diskstats_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,9 +272,19 @@ func (c *diskstatsCollector) Update(ch chan<- prometheus.Metric) error {
continue
}

info, err := getUdevDeviceProperties(stats.MajorNumber, stats.MinorNumber)
if err != nil {
c.logger.Debug("Failed to parse udev info", "err", err)
// Only fetch udev device properties when udev is available. The
// getUdevDeviceProperties field is set to nil in NewDiskstatsCollector
// when the udev data directory is not accessible, so calling the bare
// package-level function directly (as was done before this fix) would
// bypass that guard and cause unnecessary file I/O on every scrape for
// every device, even on systems that do not have udev.
var info udevInfo
if c.getUdevDeviceProperties != nil {
var err error
info, err = c.getUdevDeviceProperties(stats.MajorNumber, stats.MinorNumber)
if err != nil {
c.logger.Debug("Failed to parse udev info", "err", err)
}
}

// This is usually the serial printed on the disk label.
Expand All @@ -290,12 +300,6 @@ func (c *diskstatsCollector) Update(ch chan<- prometheus.Metric) error {
serial = info[udevIDSerial]
}

queueStats, err := c.fs.SysBlockDeviceQueueStats(dev)
// Block Device Queue stats may not exist for all devices.
if err != nil && !os.IsNotExist(err) {
c.logger.Debug("Failed to get block device queue stats", "device", dev, "err", err)
}

ch <- c.infoDesc.mustNewConstMetric(1.0, dev,
fmt.Sprint(stats.MajorNumber),
fmt.Sprint(stats.MinorNumber),
Expand All @@ -304,7 +308,7 @@ func (c *diskstatsCollector) Update(ch chan<- prometheus.Metric) error {
info[udevIDModel],
serial,
info[udevIDRevision],
strconv.FormatUint(queueStats.Rotational, 2),
rotationalLabel(c.fs, dev),
)

statCount := stats.IoStatsCount - 3 // Total diskstats record count, less MajorNumber, MinorNumber and DeviceName
Expand Down Expand Up @@ -372,6 +376,31 @@ func (c *diskstatsCollector) Update(ch chan<- prometheus.Metric) error {
return nil
}

// rotationalLabel returns "1" for a rotational block device (HDD) or "0" for
// a non-rotational device (SSD/NVMe) by reading
// /sys/block/<dev>/queue/rotational via blockdevice.FS.
// On any error — for example when the device has no queue directory — it
// returns "0", preserving the backward-compatible behaviour of the previous
// code that zero-initialised BlockQueueStats on error (Rotational uint64
// default is 0).
//
// SysBlockDeviceRotational reads exactly 1 sysfs file per device, reducing
// per-device sysfs I/O from ~30 reads (SysBlockDeviceQueueStats) to 1 and
// fixing the 10× scrape regression on systems with many block devices (#3282).
func rotationalLabel(fs blockdevice.FS, dev string) string {
rot, err := fs.SysBlockDeviceRotational(dev)
if err != nil {
// Default to "0" (non-rotational) on error to preserve backward
// compatibility: the previous code zero-initialised the struct on
// error, which also produced rotational="0".
return "0"
}
if rot == 1 {
return "1"
}
return "0"
}

func getUdevDeviceProperties(major, minor uint32) (udevInfo, error) {
filename := udevDataFilePath(fmt.Sprintf("b%d:%d", major, minor))

Expand Down
98 changes: 98 additions & 0 deletions collector/diskstats_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,14 @@ import (
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"strings"
"testing"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/prometheus/procfs/blockdevice"
)

type testDiskStatsCollector struct {
Expand Down Expand Up @@ -342,3 +345,98 @@ node_disk_written_bytes_total{device="vda"} 1.0938236928e+11
t.Fatal(err)
}
}

// TestRotationalLabel verifies that rotationalLabel returns the correct string
// value from the sysfs file via blockdevice.FS.SysBlockDeviceRotational,
// fails closed to "0" for missing files, and returns "0" when the file does
// not exist (preserving backward compatibility with the previous
// SysBlockDeviceQueueStats zero-value behaviour, issue #3282).
func TestRotationalLabel(t *testing.T) {
tempProc := t.TempDir()
tempSys := t.TempDir()

// Create a valid rotational=1 file (HDD).
err := os.MkdirAll(filepath.Join(tempSys, "block/sda/queue"), 0755)
if err != nil {
t.Fatal(err)
}
err = os.WriteFile(filepath.Join(tempSys, "block/sda/queue/rotational"), []byte("1\n"), 0644)
if err != nil {
t.Fatal(err)
}

// Create a rotational=0 file (SSD).
err = os.MkdirAll(filepath.Join(tempSys, "block/sdb/queue"), 0755)
if err != nil {
t.Fatal(err)
}
err = os.WriteFile(filepath.Join(tempSys, "block/sdb/queue/rotational"), []byte("0\n"), 0644)
if err != nil {
t.Fatal(err)
}

fs, err := blockdevice.NewFS(tempProc, tempSys)
if err != nil {
t.Fatal(err)
}

cases := []struct {
dev string
expected string
desc string
}{
{
dev: "sda",
expected: "1",
desc: "rotational HDD: file contains '1'",
},
{
dev: "sdb",
expected: "0",
desc: "non-rotational SSD: file contains '0'",
},
{
dev: "nonexistent_device_xyz",
expected: "0",
desc: "missing rotational file: defaults to '0' for backward compat",
},
}

for _, tc := range cases {
t.Run(tc.dev, func(t *testing.T) {
got := rotationalLabel(fs, tc.dev)
if got != tc.expected {
t.Errorf("rotationalLabel(%q): got %q, want %q (%s)",
tc.dev, got, tc.expected, tc.desc)
}
})
}
}

// BenchmarkDiskstatsUpdate measures the full Update() call so that future PRs
// introducing per-device sysfs I/O regressions are detectable before merge.
// Run with: go test -bench=BenchmarkDiskstatsUpdate -benchtime=5s ./collector/
func BenchmarkDiskstatsUpdate(b *testing.B) {
*sysPath = "fixtures/sys"
*procPath = "fixtures/proc"
*udevDataPath = "fixtures/udev/data"
*diskstatsDeviceExclude = "^(z?ram|loop|fd|(h|s|v|xv)d[a-z]|nvme\\d+n\\d+p)\\d+$"

logger := slog.New(slog.NewTextHandler(io.Discard, nil))
collector, err := NewDiskstatsCollector(logger)
if err != nil {
b.Fatal(err)
}

b.ResetTimer()
for b.Loop() {
ch := make(chan prometheus.Metric, 512)
if err := collector.Update(ch); err != nil {
b.Fatal(err)
}
// Drain channel so goroutine doesn't block.
for len(ch) > 0 {
<-ch
}
}
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ require (
github.com/prometheus/client_model v0.6.2
github.com/prometheus/common v0.68.1
github.com/prometheus/exporter-toolkit v0.16.0
github.com/prometheus/procfs v0.20.1
github.com/prometheus/procfs v0.20.2-0.20260618104242-66a9e6ebfb87
github.com/safchain/ethtool v0.7.0
golang.org/x/sys v0.45.0
howett.net/plist v1.0.1
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ github.com/prometheus/common v0.68.1 h1:omjRRl4QP4komogpXuhfeOiisQg7xdy8VM1UY+pS
github.com/prometheus/common v0.68.1/go.mod h1:ZzL3f6u94qUxh9p+tJTrF+FvBS1XXbbRAZCQkytAL0Y=
github.com/prometheus/exporter-toolkit v0.16.0 h1:xT/j7L2XKF+VJd6B4fpUw6xWabHrSmsUf6mYmFqyu0s=
github.com/prometheus/exporter-toolkit v0.16.0/go.mod h1:d1EL8Z9674xQe/iWhwP2wDyCEoBPbXVeqDbqAUsgJWY=
github.com/prometheus/procfs v0.20.1 h1:XwbrGOIplXW/AU3YhIhLODXMJYyC1isLFfYCsTEycfc=
github.com/prometheus/procfs v0.20.1/go.mod h1:o9EMBZGRyvDrSPH1RqdxhojkuXstoe4UlK79eF5TGGo=
github.com/prometheus/procfs v0.20.2-0.20260618104242-66a9e6ebfb87 h1:hSwLsC0pENha9vjrQKFKayki5mGDM39A2LUcOWhnALA=
github.com/prometheus/procfs v0.20.2-0.20260618104242-66a9e6ebfb87/go.mod h1:UOx4nf4IO0zAKdb6PSRQu4RtIKHJr7R7lrhTakKuHqs=
github.com/safchain/ethtool v0.7.0 h1:rlJzfDetsVvT61uz8x1YIcFn12akMfuPulHtZjtb7Is=
github.com/safchain/ethtool v0.7.0/go.mod h1:MenQKEjXdfkjD3mp2QdCk8B/hwvkrlOTm/FD4gTpFxQ=
github.com/siebenmann/go-kstat v0.0.0-20210513183136-173c9b0a9973 h1:GfSdC6wKfTGcgCS7BtzF5694Amne1pGCSTY252WhlEY=
Expand Down