From f9e6ce201f37cf6f3510479d25bfb8a15f6e2fd3 Mon Sep 17 00:00:00 2001 From: John Morales Date: Thu, 5 Mar 2026 22:43:16 -0500 Subject: [PATCH 01/12] WIP Signed-off-by: John Morales --- collector/diskstats_linux.go | 22 +++++++ collector/ext4_linux.go | 110 +++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) create mode 100644 collector/ext4_linux.go diff --git a/collector/diskstats_linux.go b/collector/diskstats_linux.go index daca55d968..22d522a7b0 100644 --- a/collector/diskstats_linux.go +++ b/collector/diskstats_linux.go @@ -247,6 +247,20 @@ func NewDiskstatsCollector(logger *slog.Logger) (Collector, error) { ), valueType: prometheus.GaugeValue, }, }, + ioErrDesc: typedFactorDesc{ + desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "ioerr_total"), + "Number of IO commands that completed with an error.", + []string{"device"}, + nil, + ), valueType: prometheus.CounterValue, + }, + ioDoneDesc: typedFactorDesc{ + desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "iodone_total"), + "Number of completed or rejected IO commands.", + []string{"device"}, + nil, + ), valueType: prometheus.CounterValue, + }, logger: logger, } @@ -368,6 +382,14 @@ func (c *diskstatsCollector) Update(ch chan<- prometheus.Metric) error { } } } + + ioDeviceStats, err := c.fs.SysBlockDeviceIOStat(dev) + if err != nil && !os.IsNotExist(err) { + c.logger.Debug("Failed to get block device io stats", "device", dev, "err", err) + } + ch <- c.ioErrDesc.mustNewConstMetric(float64(ioDeviceStats.IOErrCount), dev) + ch <- c.ioDoneDesc.mustNewConstMetric(float64(ioDeviceStats.IODoneCount), dev) + } return nil } diff --git a/collector/ext4_linux.go b/collector/ext4_linux.go new file mode 100644 index 0000000000..6ae591b2b5 --- /dev/null +++ b/collector/ext4_linux.go @@ -0,0 +1,110 @@ +// Copyright 2017 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !noext4 +// +build !noext4 + +package collector + +import ( + "fmt" + "log/slog" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/procfs/ext4" +) + +// An ext4Collector is a Collector which gathers metrics from ext4 filesystems. +type ext4Collector struct { + fs ext4.FS + logger *slog.Logger +} + +func init() { + registerCollector("ext4", defaultEnabled, NewExt4Collector) +} + +// NewExt4Collector returns a new Collector exposing ext4 statistics. +func NewExt4Collector(logger *slog.Logger) (Collector, error) { + fs, err := ext4.NewFS(*procPath, *sysPath) + if err != nil { + return nil, fmt.Errorf("failed to open sysfs: %w", err) + } + + return &ext4Collector{ + fs: fs, + logger: logger, + }, nil +} + +// Update implements Collector. +func (c *ext4Collector) Update(ch chan<- prometheus.Metric) error { + stats, err := c.fs.ProcStat() + if err != nil { + return fmt.Errorf("failed to retrieve ext4 stats: %w", err) + } + + for _, s := range stats { + c.updateExt4Stats(ch, s) + } + + return nil +} + +// updateExt4Stats collects statistics for a single ext4 filesystem. +func (c *ext4Collector) updateExt4Stats(ch chan<- prometheus.Metric, s *ext4.Stats) { + const ( + subsystem = "ext4" + ) + var ( + labels = []string{"device"} + ) + + metrics := []struct { + name string + desc string + value float64 + }{ + { + name: "errors", + desc: "Number of ext4 filesystem errors.", + value: float64(s.Errors), + }, + { + name: "warnings", + desc: "Number of ext4 filesystem warnings.", + value: float64(s.Warnings), + }, + { + name: "messages", + desc: "Number of ext4 filesystem log messages.", + value: float64(s.Messages), + }, + } + + for _, m := range metrics { + desc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, subsystem, m.name), + m.desc, + labels, + nil, + ) + + ch <- prometheus.MustNewConstMetric( + desc, + prometheus.CounterValue, + m.value, + s.Name, + ) + } +} From dc5b0b546dad00268ab80c6dc7cce5389b4d74e5 Mon Sep 17 00:00:00 2001 From: John Morales Date: Fri, 6 Mar 2026 17:25:49 -0500 Subject: [PATCH 02/12] Update test cases and sync up definition for typedDesc Signed-off-by: John Morales --- collector/diskstats_linux.go | 6 ++++-- collector/diskstats_linux_test.go | 8 ++++++++ collector/fixtures/e2e-64k-page-output.txt | 9 +++++++++ collector/fixtures/e2e-output.txt | 9 +++++++++ end-to-end-test.sh | 1 + 5 files changed, 31 insertions(+), 2 deletions(-) diff --git a/collector/diskstats_linux.go b/collector/diskstats_linux.go index 22d522a7b0..45a3b1794b 100644 --- a/collector/diskstats_linux.go +++ b/collector/diskstats_linux.go @@ -75,6 +75,8 @@ type diskstatsCollector struct { filesystemInfoDesc typedDesc deviceMapperInfoDesc typedDesc ataDescs map[string]typedDesc + ioErrDesc typedDesc + ioDoneDesc typedDesc logger *slog.Logger getUdevDeviceProperties func(uint32, uint32) (udevInfo, error) } @@ -247,14 +249,14 @@ func NewDiskstatsCollector(logger *slog.Logger) (Collector, error) { ), valueType: prometheus.GaugeValue, }, }, - ioErrDesc: typedFactorDesc{ + ioErrDesc: typedDesc{ desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "ioerr_total"), "Number of IO commands that completed with an error.", []string{"device"}, nil, ), valueType: prometheus.CounterValue, }, - ioDoneDesc: typedFactorDesc{ + ioDoneDesc: typedDesc{ desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "iodone_total"), "Number of completed or rejected IO commands.", []string{"device"}, diff --git a/collector/diskstats_linux_test.go b/collector/diskstats_linux_test.go index 08a5024c8a..e7175fb7ab 100644 --- a/collector/diskstats_linux_test.go +++ b/collector/diskstats_linux_test.go @@ -178,6 +178,14 @@ node_disk_io_time_weighted_seconds_total{device="sdb"} 67.07000000000001 node_disk_io_time_weighted_seconds_total{device="sdc"} 17.07 node_disk_io_time_weighted_seconds_total{device="sr0"} 0 node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06 +# HELP node_disk_iodone_total Number of completed or rejected IO commands. +# TYPE node_disk_iodone_total counter +node_disk_iodone_total{device="sda"} 307 +node_disk_iodone_total{device="sr0"} 2767 +# HELP node_disk_ioerr_total Number of IO commands that completed with an error. +# TYPE node_disk_ioerr_total counter +node_disk_ioerr_total{device="sda"} 3 +node_disk_ioerr_total{device="sr0"} 29 # HELP node_disk_read_bytes_total The total number of bytes read successfully. # TYPE node_disk_read_bytes_total counter node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11 diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index 8065c5fee7..112072b5ed 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -1145,6 +1145,14 @@ node_disk_io_time_weighted_seconds_total{device="sdb"} 67.07000000000001 node_disk_io_time_weighted_seconds_total{device="sdc"} 17.07 node_disk_io_time_weighted_seconds_total{device="sr0"} 0 node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06 +# HELP node_disk_iodone_total Number of completed or rejected IO commands. +# TYPE node_disk_iodone_total counter +node_disk_iodone_total{device="sda"} 307 +node_disk_iodone_total{device="sr0"} 2767 +# HELP node_disk_ioerr_total Number of IO commands that completed with an error. +# TYPE node_disk_ioerr_total counter +node_disk_ioerr_total{device="sda"} 3 +node_disk_ioerr_total{device="sr0"} 29 # HELP node_disk_read_bytes_total The total number of bytes read successfully. # TYPE node_disk_read_bytes_total counter node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11 @@ -3654,6 +3662,7 @@ node_scrape_collector_success{collector="dmi"} 1 node_scrape_collector_success{collector="drbd"} 1 node_scrape_collector_success{collector="edac"} 1 node_scrape_collector_success{collector="entropy"} 1 +node_scrape_collector_success{collector="ext4"} 1 node_scrape_collector_success{collector="fibrechannel"} 1 node_scrape_collector_success{collector="filefd"} 1 node_scrape_collector_success{collector="hwmon"} 1 diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 09a58cdc52..ccfea00f82 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -1177,6 +1177,14 @@ node_disk_io_time_weighted_seconds_total{device="sdb"} 67.07000000000001 node_disk_io_time_weighted_seconds_total{device="sdc"} 17.07 node_disk_io_time_weighted_seconds_total{device="sr0"} 0 node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06 +# HELP node_disk_iodone_total Number of completed or rejected IO commands. +# TYPE node_disk_iodone_total counter +node_disk_iodone_total{device="sda"} 307 +node_disk_iodone_total{device="sr0"} 2767 +# HELP node_disk_ioerr_total Number of IO commands that completed with an error. +# TYPE node_disk_ioerr_total counter +node_disk_ioerr_total{device="sda"} 3 +node_disk_ioerr_total{device="sr0"} 29 # HELP node_disk_read_bytes_total The total number of bytes read successfully. # TYPE node_disk_read_bytes_total counter node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11 @@ -3686,6 +3694,7 @@ node_scrape_collector_success{collector="dmi"} 1 node_scrape_collector_success{collector="drbd"} 1 node_scrape_collector_success{collector="edac"} 1 node_scrape_collector_success{collector="entropy"} 1 +node_scrape_collector_success{collector="ext4"} 1 node_scrape_collector_success{collector="fibrechannel"} 1 node_scrape_collector_success{collector="filefd"} 1 node_scrape_collector_success{collector="hwmon"} 1 diff --git a/end-to-end-test.sh b/end-to-end-test.sh index bd9679560b..9a9a11ffdf 100755 --- a/end-to-end-test.sh +++ b/end-to-end-test.sh @@ -51,6 +51,7 @@ enabled_collectors=$(cat << COLLECTORS drbd edac entropy + ext4 fibrechannel filefd hwmon From 8b77376fb159eb70ae5d1dd26dfadce76a2db4ea Mon Sep 17 00:00:00 2001 From: John Morales Date: Fri, 6 Mar 2026 19:24:45 -0500 Subject: [PATCH 03/12] ext4 test fixes Signed-off-by: John Morales --- collector/diskstats_linux.go | 11 ++--- collector/ext4_linux.go | 37 +++++++++-------- collector/ext4_linux_test.go | 69 +++++++++++++++++++++++++++++++ collector/fixtures/sys.ttar | 80 +++++++++++++++++++++++++++++------- 4 files changed, 161 insertions(+), 36 deletions(-) create mode 100644 collector/ext4_linux_test.go diff --git a/collector/diskstats_linux.go b/collector/diskstats_linux.go index 45a3b1794b..c9a31238e6 100644 --- a/collector/diskstats_linux.go +++ b/collector/diskstats_linux.go @@ -385,12 +385,13 @@ func (c *diskstatsCollector) Update(ch chan<- prometheus.Metric) error { } } - ioDeviceStats, err := c.fs.SysBlockDeviceIOStat(dev) - if err != nil && !os.IsNotExist(err) { - c.logger.Debug("Failed to get block device io stats", "device", dev, "err", err) + ioDeviceStats, ioErr := c.fs.SysBlockDeviceIOStat(dev) + if ioErr == nil { + ch <- c.ioErrDesc.mustNewConstMetric(float64(ioDeviceStats.IOErrCount), dev) + ch <- c.ioDoneDesc.mustNewConstMetric(float64(ioDeviceStats.IODoneCount), dev) + } else if !os.IsNotExist(ioErr) { + c.logger.Info("Failed to get block device io stats", "device", dev, "err", ioErr) } - ch <- c.ioErrDesc.mustNewConstMetric(float64(ioDeviceStats.IOErrCount), dev) - ch <- c.ioDoneDesc.mustNewConstMetric(float64(ioDeviceStats.IODoneCount), dev) } return nil diff --git a/collector/ext4_linux.go b/collector/ext4_linux.go index 6ae591b2b5..a7b1563005 100644 --- a/collector/ext4_linux.go +++ b/collector/ext4_linux.go @@ -61,37 +61,42 @@ func (c *ext4Collector) Update(ch chan<- prometheus.Metric) error { return nil } -// updateExt4Stats collects statistics for a single ext4 filesystem. -func (c *ext4Collector) updateExt4Stats(ch chan<- prometheus.Metric, s *ext4.Stats) { - const ( - subsystem = "ext4" - ) - var ( - labels = []string{"device"} - ) +type ext4Metric struct { + name string + desc string + value float64 +} - metrics := []struct { - name string - desc string - value float64 - }{ +func (c *ext4Collector) getMetrics(s *ext4.Stats) []ext4Metric { + return []ext4Metric{ { - name: "errors", + name: "errors_total", desc: "Number of ext4 filesystem errors.", value: float64(s.Errors), }, { - name: "warnings", + name: "warnings_total", desc: "Number of ext4 filesystem warnings.", value: float64(s.Warnings), }, { - name: "messages", + name: "messages_total", desc: "Number of ext4 filesystem log messages.", value: float64(s.Messages), }, } +} + +// updateExt4Stats collects statistics for a single ext4 filesystem. +func (c *ext4Collector) updateExt4Stats(ch chan<- prometheus.Metric, s *ext4.Stats) { + const ( + subsystem = "ext4" + ) + var ( + labels = []string{"device"} + ) + metrics := c.getMetrics(s) for _, m := range metrics { desc := prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, m.name), diff --git a/collector/ext4_linux_test.go b/collector/ext4_linux_test.go new file mode 100644 index 0000000000..8ec32232e1 --- /dev/null +++ b/collector/ext4_linux_test.go @@ -0,0 +1,69 @@ +// Copyright 2019 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !noext4 + +package collector + +import ( + "testing" + + "github.com/prometheus/procfs" + "github.com/prometheus/procfs/ext4" +) + +var expectedExt4Metrics = [][]ext4Metric{ + { + {name: "errors_total", value: 12}, + {name: "warnings_total", value: 34}, + {name: "messages_total", value: 567}, + }, +} + +func checkExt4Metric(exp, got *ext4Metric) bool { + if exp.name != got.name || + exp.value != got.value { + return false + } + return true +} + +func TestExt4(t *testing.T) { + fs, err := ext4.NewFS(procfs.DefaultMountPoint, "fixtures/sys") + if err != nil { + t.Fatal(err) + } + collector := &ext4Collector{fs: fs} + + stats, err := collector.fs.ProcStat() + if err != nil { + t.Fatalf("Failed to retrieve ext4 stats: %v", err) + } + if len(stats) != len(expectedExt4Metrics) { + t.Fatalf("Unexpected number of ext4 stats: expected %v, got %v", len(expectedExt4Metrics), len(stats)) + } + + for i, s := range stats { + metrics := collector.getMetrics(s) + if len(metrics) != len(expectedExt4Metrics[i]) { + t.Fatalf("Unexpected number of ext4 metrics: expected %v, got %v", len(expectedExt4Metrics[i]), len(metrics)) + } + + for j, m := range metrics { + exp := expectedExt4Metrics[i][j] + if !checkExt4Metric(&exp, &m) { + t.Errorf("Incorrect ext4 metric: expected %#v, got: %#v", exp, m) + } + } + } +} diff --git a/collector/fixtures/sys.ttar b/collector/fixtures/sys.ttar index bc8744cbe7..624bb67a48 100644 --- a/collector/fixtures/sys.ttar +++ b/collector/fixtures/sys.ttar @@ -595,6 +595,19 @@ Mode: 644 Directory: sys/block/sda Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/block/sda/device +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/block/sda/device/iodone_cnt +Lines: 1 +307 +Mode: 664 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/block/sda/device/ioerr_cnt +Lines: 1 +3 +Mode: 664 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/block/sda/queue Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -826,6 +839,22 @@ Lines: 1 none Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/block/sr0 +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/block/sr0/device +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/block/sr0/device/iodone_cnt +Lines: 1 +2767 +Mode: 664 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/block/sr0/device/ioerr_cnt +Lines: 1 +29 +Mode: 664 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/bus Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -2219,16 +2248,6 @@ Lines: 1 Samsung SSD 970 PRO 512GB Mode: 444 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Path: sys/class/nvme/nvme0/serial -Lines: 1 -S680HF8N190894I -Mode: 444 -# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Path: sys/class/nvme/nvme0/state -Lines: 1 -live -Mode: 444 -# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/class/nvme/nvme0/nvme0c0n0 Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -2237,11 +2256,6 @@ Lines: 1 optimized Mode: 444 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Path: sys/class/nvme/nvme0/nvme0c0n0/size -Lines: 1 -3906250000 -Mode: 444 -# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/class/nvme/nvme0/nvme0c0n0/nuse Lines: 1 488281250 @@ -2255,6 +2269,21 @@ Lines: 1 4096 Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme/nvme0/nvme0c0n0/size +Lines: 1 +3906250000 +Mode: 444 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme/nvme0/serial +Lines: 1 +S680HF8N190894I +Mode: 444 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme/nvme0/state +Lines: 1 +live +Mode: 444 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/class/power_supply Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -11413,6 +11442,27 @@ Lines: 1 4096 Mode: 444 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/fs/ext4 +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/fs/ext4/sdb1 +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/fs/ext4/sdb1/errors_count +Lines: 1 +12 +Mode: 664 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/fs/ext4/sdb1/msg_count +Lines: 1 +567 +Mode: 664 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/fs/ext4/sdb1/warning_count +Lines: 1 +34 +Mode: 664 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/fs/xfs Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - From c4c2a1498cb242bb3eaac667c09671ddb34748b2 Mon Sep 17 00:00:00 2001 From: John Morales Date: Fri, 6 Mar 2026 19:33:27 -0500 Subject: [PATCH 04/12] Both ext4 and diskstats tests passing with hex encoded fixtures for diskstats Signed-off-by: John Morales --- collector/diskstats_linux_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/collector/diskstats_linux_test.go b/collector/diskstats_linux_test.go index e7175fb7ab..cbcbc8bf61 100644 --- a/collector/diskstats_linux_test.go +++ b/collector/diskstats_linux_test.go @@ -180,12 +180,12 @@ node_disk_io_time_weighted_seconds_total{device="sr0"} 0 node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06 # HELP node_disk_iodone_total Number of completed or rejected IO commands. # TYPE node_disk_iodone_total counter -node_disk_iodone_total{device="sda"} 307 -node_disk_iodone_total{device="sr0"} 2767 +node_disk_iodone_total{device="sda"} 775 +node_disk_iodone_total{device="sr0"} 1.29433517e+08 # HELP node_disk_ioerr_total Number of IO commands that completed with an error. # TYPE node_disk_ioerr_total counter -node_disk_ioerr_total{device="sda"} 3 -node_disk_ioerr_total{device="sr0"} 29 +node_disk_ioerr_total{device="sda"} 11 +node_disk_ioerr_total{device="sr0"} 41 # HELP node_disk_read_bytes_total The total number of bytes read successfully. # TYPE node_disk_read_bytes_total counter node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11 From 18938b59c78dd4efa22b5df819b209a43d28b525 Mon Sep 17 00:00:00 2001 From: John Morales Date: Fri, 6 Mar 2026 19:44:34 -0500 Subject: [PATCH 05/12] update sys.ttar for hex fixture values Signed-off-by: John Morales --- collector/fixtures/sys.ttar | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/collector/fixtures/sys.ttar b/collector/fixtures/sys.ttar index 624bb67a48..beb3860674 100644 --- a/collector/fixtures/sys.ttar +++ b/collector/fixtures/sys.ttar @@ -600,12 +600,12 @@ Mode: 775 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/block/sda/device/iodone_cnt Lines: 1 -307 +0x307 Mode: 664 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/block/sda/device/ioerr_cnt Lines: 1 -3 +0xb Mode: 664 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/block/sda/queue @@ -847,12 +847,12 @@ Mode: 775 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/block/sr0/device/iodone_cnt Lines: 1 -2767 +0x7b6ffad Mode: 664 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/block/sr0/device/ioerr_cnt Lines: 1 -29 +0x29 Mode: 664 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/bus From 127f26498e1053ddbdf0456c1bbf4b1e17fffa41 Mon Sep 17 00:00:00 2001 From: John Morales Date: Sat, 7 Mar 2026 21:32:21 -0500 Subject: [PATCH 06/12] Add device filters flags for ext4 Signed-off-by: John Morales --- collector/diskstats_linux.go | 2 +- collector/ext4_linux.go | 48 +++++++++++++++++++++++++++++++++--- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/collector/diskstats_linux.go b/collector/diskstats_linux.go index c9a31238e6..ea74b1ea28 100644 --- a/collector/diskstats_linux.go +++ b/collector/diskstats_linux.go @@ -390,7 +390,7 @@ func (c *diskstatsCollector) Update(ch chan<- prometheus.Metric) error { ch <- c.ioErrDesc.mustNewConstMetric(float64(ioDeviceStats.IOErrCount), dev) ch <- c.ioDoneDesc.mustNewConstMetric(float64(ioDeviceStats.IODoneCount), dev) } else if !os.IsNotExist(ioErr) { - c.logger.Info("Failed to get block device io stats", "device", dev, "err", ioErr) + c.logger.Debug("Failed to get block device io stats", "device", dev, "err", ioErr) } } diff --git a/collector/ext4_linux.go b/collector/ext4_linux.go index a7b1563005..a403d5b233 100644 --- a/collector/ext4_linux.go +++ b/collector/ext4_linux.go @@ -17,33 +17,70 @@ package collector import ( + "errors" "fmt" "log/slog" + "github.com/alecthomas/kingpin/v2" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/procfs/ext4" ) +var ( + ext4DeviceExclude = kingpin.Flag( + "collector.ext4.device-exclude", + "Regexp of ext4 devices to exclude (mutually exclusive to device-include).", + ).String() + + ext4DeviceInclude = kingpin.Flag( + "collector.ext4.device-include", + "Regexp of ext4 devices to include (mutually exclusive to device-exclude).", + ).String() +) + // An ext4Collector is a Collector which gathers metrics from ext4 filesystems. type ext4Collector struct { - fs ext4.FS - logger *slog.Logger + deviceFilter deviceFilter + fs ext4.FS + logger *slog.Logger } func init() { registerCollector("ext4", defaultEnabled, NewExt4Collector) } +func newExt4DeviceFilter(logger *slog.Logger) (deviceFilter, error) { + if *ext4DeviceExclude != "" && *ext4DeviceInclude != "" { + return deviceFilter{}, errors.New("device-exclude & device-include are mutually exclusive") + } + + if *ext4DeviceExclude != "" { + logger.Info("Parsed flag --collector.ext4.device-exclude", "flag", *ext4DeviceExclude) + } + + if *ext4DeviceInclude != "" { + logger.Info("Parsed Flag --collector.ext4.device-include", "flag", *ext4DeviceInclude) + } + + return newDeviceFilter(*ext4DeviceExclude, *ext4DeviceInclude), nil +} + // NewExt4Collector returns a new Collector exposing ext4 statistics. func NewExt4Collector(logger *slog.Logger) (Collector, error) { + ext4DeviceFilter, err := newExt4DeviceFilter(logger) + if err != nil { + return nil, fmt.Errorf("failed to parse device filter flags: %w", err) + } + fs, err := ext4.NewFS(*procPath, *sysPath) if err != nil { return nil, fmt.Errorf("failed to open sysfs: %w", err) } return &ext4Collector{ - fs: fs, - logger: logger, + deviceFilter: ext4DeviceFilter, + fs: fs, + logger: logger, }, nil } @@ -55,6 +92,9 @@ func (c *ext4Collector) Update(ch chan<- prometheus.Metric) error { } for _, s := range stats { + if c.deviceFilter.ignored(s.Name) { + continue + } c.updateExt4Stats(ch, s) } From 80ce4abcefeb53eb3666f0cc9d659cfd668902e7 Mon Sep 17 00:00:00 2001 From: John Morales Date: Sat, 7 Mar 2026 22:51:12 -0500 Subject: [PATCH 07/12] Default-exclude the ext4 'features' meta device Signed-off-by: John Morales --- collector/ext4_linux.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/collector/ext4_linux.go b/collector/ext4_linux.go index a403d5b233..96f45a05e1 100644 --- a/collector/ext4_linux.go +++ b/collector/ext4_linux.go @@ -26,11 +26,15 @@ import ( "github.com/prometheus/procfs/ext4" ) +const ( + ext4DefaultIgnoredDevices = "^features$" +) + var ( ext4DeviceExclude = kingpin.Flag( "collector.ext4.device-exclude", "Regexp of ext4 devices to exclude (mutually exclusive to device-include).", - ).String() + ).Default(ext4DefaultIgnoredDevices).String() ext4DeviceInclude = kingpin.Flag( "collector.ext4.device-include", From a98addee87fb26dabdfffbfffda2a5514cec757c Mon Sep 17 00:00:00 2001 From: John Morales Date: Sun, 8 Mar 2026 18:44:22 -0400 Subject: [PATCH 08/12] e2e test fixture fixes Signed-off-by: John Morales --- collector/fixtures/e2e-output.txt | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index ccfea00f82..f1ddc9b0d6 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -1179,12 +1179,12 @@ node_disk_io_time_weighted_seconds_total{device="sr0"} 0 node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06 # HELP node_disk_iodone_total Number of completed or rejected IO commands. # TYPE node_disk_iodone_total counter -node_disk_iodone_total{device="sda"} 307 -node_disk_iodone_total{device="sr0"} 2767 +node_disk_iodone_total{device="sda"} 775 +node_disk_iodone_total{device="sr0"} 1.29433517e+08 # HELP node_disk_ioerr_total Number of IO commands that completed with an error. # TYPE node_disk_ioerr_total counter -node_disk_ioerr_total{device="sda"} 3 -node_disk_ioerr_total{device="sr0"} 29 +node_disk_ioerr_total{device="sda"} 11 +node_disk_ioerr_total{device="sr0"} 41 # HELP node_disk_read_bytes_total The total number of bytes read successfully. # TYPE node_disk_read_bytes_total counter node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11 @@ -1393,6 +1393,15 @@ node_entropy_available_bits 1337 node_entropy_pool_size_bits 4096 # HELP node_exporter_build_info A metric with a constant '1' value labeled by version, revision, branch, goversion from which node_exporter was built, and the goos and goarch for the build. # TYPE node_exporter_build_info gauge +# HELP node_ext4_errors_total Number of ext4 filesystem errors. +# TYPE node_ext4_errors_total counter +node_ext4_errors_total{device="sdb1"} 12 +# HELP node_ext4_messages_total Number of ext4 filesystem log messages. +# TYPE node_ext4_messages_total counter +node_ext4_messages_total{device="sdb1"} 567 +# HELP node_ext4_warnings_total Number of ext4 filesystem warnings. +# TYPE node_ext4_warnings_total counter +node_ext4_warnings_total{device="sdb1"} 34 # HELP node_fibrechannel_dumped_frames_total Number of dumped frames # TYPE node_fibrechannel_dumped_frames_total counter node_fibrechannel_dumped_frames_total{fc_host="host1"} 0 From b1a9a05a874b45342048dbde674b1c6bccb77cd5 Mon Sep 17 00:00:00 2001 From: John Morales Date: Sun, 8 Mar 2026 18:52:05 -0400 Subject: [PATCH 09/12] e2e-64k-page-output.txt fixture update Signed-off-by: John Morales --- collector/fixtures/e2e-64k-page-output.txt | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index 112072b5ed..2c8fd71c2c 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -1147,12 +1147,12 @@ node_disk_io_time_weighted_seconds_total{device="sr0"} 0 node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06 # HELP node_disk_iodone_total Number of completed or rejected IO commands. # TYPE node_disk_iodone_total counter -node_disk_iodone_total{device="sda"} 307 -node_disk_iodone_total{device="sr0"} 2767 +node_disk_iodone_total{device="sda"} 775 +node_disk_iodone_total{device="sr0"} 1.29433517e+08 # HELP node_disk_ioerr_total Number of IO commands that completed with an error. # TYPE node_disk_ioerr_total counter -node_disk_ioerr_total{device="sda"} 3 -node_disk_ioerr_total{device="sr0"} 29 +node_disk_ioerr_total{device="sda"} 11 +node_disk_ioerr_total{device="sr0"} 41 # HELP node_disk_read_bytes_total The total number of bytes read successfully. # TYPE node_disk_read_bytes_total counter node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11 @@ -1361,6 +1361,15 @@ node_entropy_available_bits 1337 node_entropy_pool_size_bits 4096 # HELP node_exporter_build_info A metric with a constant '1' value labeled by version, revision, branch, goversion from which node_exporter was built, and the goos and goarch for the build. # TYPE node_exporter_build_info gauge +# HELP node_ext4_errors_total Number of ext4 filesystem errors. +# TYPE node_ext4_errors_total counter +node_ext4_errors_total{device="sdb1"} 12 +# HELP node_ext4_messages_total Number of ext4 filesystem log messages. +# TYPE node_ext4_messages_total counter +node_ext4_messages_total{device="sdb1"} 567 +# HELP node_ext4_warnings_total Number of ext4 filesystem warnings. +# TYPE node_ext4_warnings_total counter +node_ext4_warnings_total{device="sdb1"} 34 # HELP node_fibrechannel_dumped_frames_total Number of dumped frames # TYPE node_fibrechannel_dumped_frames_total counter node_fibrechannel_dumped_frames_total{fc_host="host1"} 0 From 58aca64ac3ea074ae62aa663dc4b968195484032 Mon Sep 17 00:00:00 2001 From: John Morales Date: Mon, 9 Mar 2026 15:22:52 -0400 Subject: [PATCH 10/12] feedback: update ext4 collector for more consistent label naming partition instead of device Signed-off-by: John Morales --- collector/ext4_linux.go | 54 +++++++++++----------- collector/fixtures/e2e-64k-page-output.txt | 6 +-- collector/fixtures/e2e-output.txt | 6 +-- 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/collector/ext4_linux.go b/collector/ext4_linux.go index 96f45a05e1..db418fb05a 100644 --- a/collector/ext4_linux.go +++ b/collector/ext4_linux.go @@ -27,53 +27,53 @@ import ( ) const ( - ext4DefaultIgnoredDevices = "^features$" + ext4DefaultIgnoredPartitions = "^features$" ) var ( - ext4DeviceExclude = kingpin.Flag( - "collector.ext4.device-exclude", - "Regexp of ext4 devices to exclude (mutually exclusive to device-include).", - ).Default(ext4DefaultIgnoredDevices).String() - - ext4DeviceInclude = kingpin.Flag( - "collector.ext4.device-include", - "Regexp of ext4 devices to include (mutually exclusive to device-exclude).", + ext4PartitionExclude = kingpin.Flag( + "collector.ext4.partition-exclude", + "Regexp of ext4 partitions to exclude (mutually exclusive to partition-include).", + ).Default(ext4DefaultIgnoredPartitions).String() + + ext4PartitionInclude = kingpin.Flag( + "collector.ext4.partition-include", + "Regexp of ext4 partitions to include (mutually exclusive to partition-exclude).", ).String() ) // An ext4Collector is a Collector which gathers metrics from ext4 filesystems. type ext4Collector struct { - deviceFilter deviceFilter - fs ext4.FS - logger *slog.Logger + partitionFilter deviceFilter + fs ext4.FS + logger *slog.Logger } func init() { registerCollector("ext4", defaultEnabled, NewExt4Collector) } -func newExt4DeviceFilter(logger *slog.Logger) (deviceFilter, error) { - if *ext4DeviceExclude != "" && *ext4DeviceInclude != "" { - return deviceFilter{}, errors.New("device-exclude & device-include are mutually exclusive") +func newExt4PartitionFilter(logger *slog.Logger) (deviceFilter, error) { + if *ext4PartitionExclude != "" && *ext4PartitionInclude != "" { + return deviceFilter{}, errors.New("partition-exclude & partition-include are mutually exclusive") } - if *ext4DeviceExclude != "" { - logger.Info("Parsed flag --collector.ext4.device-exclude", "flag", *ext4DeviceExclude) + if *ext4PartitionExclude != "" { + logger.Info("Parsed flag --collector.ext4.partition-exclude", "flag", *ext4PartitionExclude) } - if *ext4DeviceInclude != "" { - logger.Info("Parsed Flag --collector.ext4.device-include", "flag", *ext4DeviceInclude) + if *ext4PartitionInclude != "" { + logger.Info("Parsed Flag --collector.ext4.partition-include", "flag", *ext4PartitionInclude) } - return newDeviceFilter(*ext4DeviceExclude, *ext4DeviceInclude), nil + return newDeviceFilter(*ext4PartitionExclude, *ext4PartitionInclude), nil } // NewExt4Collector returns a new Collector exposing ext4 statistics. func NewExt4Collector(logger *slog.Logger) (Collector, error) { - ext4DeviceFilter, err := newExt4DeviceFilter(logger) + ext4PartitionFilter, err := newExt4PartitionFilter(logger) if err != nil { - return nil, fmt.Errorf("failed to parse device filter flags: %w", err) + return nil, fmt.Errorf("failed to parse partition filter flags: %w", err) } fs, err := ext4.NewFS(*procPath, *sysPath) @@ -82,9 +82,9 @@ func NewExt4Collector(logger *slog.Logger) (Collector, error) { } return &ext4Collector{ - deviceFilter: ext4DeviceFilter, - fs: fs, - logger: logger, + partitionFilter: ext4PartitionFilter, + fs: fs, + logger: logger, }, nil } @@ -96,7 +96,7 @@ func (c *ext4Collector) Update(ch chan<- prometheus.Metric) error { } for _, s := range stats { - if c.deviceFilter.ignored(s.Name) { + if c.partitionFilter.ignored(s.Name) { continue } c.updateExt4Stats(ch, s) @@ -137,7 +137,7 @@ func (c *ext4Collector) updateExt4Stats(ch chan<- prometheus.Metric, s *ext4.Sta subsystem = "ext4" ) var ( - labels = []string{"device"} + labels = []string{"partition"} ) metrics := c.getMetrics(s) diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index 2c8fd71c2c..a4221625b9 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -1363,13 +1363,13 @@ node_entropy_pool_size_bits 4096 # TYPE node_exporter_build_info gauge # HELP node_ext4_errors_total Number of ext4 filesystem errors. # TYPE node_ext4_errors_total counter -node_ext4_errors_total{device="sdb1"} 12 +node_ext4_errors_total{partition="sdb1"} 12 # HELP node_ext4_messages_total Number of ext4 filesystem log messages. # TYPE node_ext4_messages_total counter -node_ext4_messages_total{device="sdb1"} 567 +node_ext4_messages_total{partition="sdb1"} 567 # HELP node_ext4_warnings_total Number of ext4 filesystem warnings. # TYPE node_ext4_warnings_total counter -node_ext4_warnings_total{device="sdb1"} 34 +node_ext4_warnings_total{partition="sdb1"} 34 # HELP node_fibrechannel_dumped_frames_total Number of dumped frames # TYPE node_fibrechannel_dumped_frames_total counter node_fibrechannel_dumped_frames_total{fc_host="host1"} 0 diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index f1ddc9b0d6..7867d43457 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -1395,13 +1395,13 @@ node_entropy_pool_size_bits 4096 # TYPE node_exporter_build_info gauge # HELP node_ext4_errors_total Number of ext4 filesystem errors. # TYPE node_ext4_errors_total counter -node_ext4_errors_total{device="sdb1"} 12 +node_ext4_errors_total{partition="sdb1"} 12 # HELP node_ext4_messages_total Number of ext4 filesystem log messages. # TYPE node_ext4_messages_total counter -node_ext4_messages_total{device="sdb1"} 567 +node_ext4_messages_total{partition="sdb1"} 567 # HELP node_ext4_warnings_total Number of ext4 filesystem warnings. # TYPE node_ext4_warnings_total counter -node_ext4_warnings_total{device="sdb1"} 34 +node_ext4_warnings_total{partition="sdb1"} 34 # HELP node_fibrechannel_dumped_frames_total Number of dumped frames # TYPE node_fibrechannel_dumped_frames_total counter node_fibrechannel_dumped_frames_total{fc_host="host1"} 0 From 52e1b2f0f0232ad98a5528af6d34ba4f9e5a318b Mon Sep 17 00:00:00 2001 From: John Morales Date: Mon, 16 Mar 2026 20:24:53 -0400 Subject: [PATCH 11/12] Update README.md for new ext4 collector and partition include/exclude flags Signed-off-by: John Morales --- README.md | 2 ++ collector/ext4_linux.go | 2 ++ 2 files changed, 4 insertions(+) diff --git a/README.md b/README.md index ec5adbf7ac..107d271be4 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,7 @@ cpu | flags | --collector.cpu.info.flags-include | N/A diskstats | device | --collector.diskstats.device-include | --collector.diskstats.device-exclude ethtool | device | --collector.ethtool.device-include | --collector.ethtool.device-exclude ethtool | metrics | --collector.ethtool.metrics-include | N/A +ext4 | partitions | --collector.ext4.partition-include | --collector.ext4.partition-exclude filesystem | fs-types | --collector.filesystem.fs-types-include | --collector.filesystem.fs-types-exclude filesystem | mount-points | --collector.filesystem.mount-points-include | --collector.filesystem.mount-points-exclude hwmon | chip | --collector.hwmon.chip-include | --collector.hwmon.chip-exclude @@ -128,6 +129,7 @@ dmi | Expose Desktop Management Interface (DMI) info from `/sys/class/dmi/id/` | edac | Exposes error detection and correction statistics. | Linux entropy | Exposes available entropy. | Linux exec | Exposes execution statistics. | Dragonfly, FreeBSD +ext4 | Exposes ext4 statistics by partition from `/sys/fs/ext4/` | Linux fibrechannel | Exposes fibre channel information and statistics from `/sys/class/fc_host/`. | Linux filefd | Exposes file descriptor statistics from `/proc/sys/fs/file-nr`. | Linux filesystem | Exposes filesystem statistics, such as disk space used. | Darwin, Dragonfly, FreeBSD, Linux, OpenBSD diff --git a/collector/ext4_linux.go b/collector/ext4_linux.go index db418fb05a..5a2ed23401 100644 --- a/collector/ext4_linux.go +++ b/collector/ext4_linux.go @@ -92,6 +92,8 @@ func NewExt4Collector(logger *slog.Logger) (Collector, error) { func (c *ext4Collector) Update(ch chan<- prometheus.Metric) error { stats, err := c.fs.ProcStat() if err != nil { + // ProcStat() returns an empty slice and nil error if no /sys/fs/ext4, not os.ErrNotExist. + // Therefore return any non-nil error return fmt.Errorf("failed to retrieve ext4 stats: %w", err) } From 786c30ee26de89bc1fb2f38868ef075686f9902f Mon Sep 17 00:00:00 2001 From: John Morales Date: Mon, 16 Mar 2026 20:38:17 -0400 Subject: [PATCH 12/12] Update README.md for naming consistency (singular) Signed-off-by: John Morales --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 107d271be4..180535c707 100644 --- a/README.md +++ b/README.md @@ -100,7 +100,7 @@ cpu | flags | --collector.cpu.info.flags-include | N/A diskstats | device | --collector.diskstats.device-include | --collector.diskstats.device-exclude ethtool | device | --collector.ethtool.device-include | --collector.ethtool.device-exclude ethtool | metrics | --collector.ethtool.metrics-include | N/A -ext4 | partitions | --collector.ext4.partition-include | --collector.ext4.partition-exclude +ext4 | partition | --collector.ext4.partition-include | --collector.ext4.partition-exclude filesystem | fs-types | --collector.filesystem.fs-types-include | --collector.filesystem.fs-types-exclude filesystem | mount-points | --collector.filesystem.mount-points-include | --collector.filesystem.mount-points-exclude hwmon | chip | --collector.hwmon.chip-include | --collector.hwmon.chip-exclude