Skip to content
25 changes: 25 additions & 0 deletions collector/diskstats_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ type diskstatsCollector struct {
filesystemInfoDesc typedDesc
deviceMapperInfoDesc typedDesc
ataDescs map[string]typedDesc
ioErrDesc typedDesc
ioDoneDesc typedDesc
logger *slog.Logger
getUdevDeviceProperties func(uint32, uint32) (udevInfo, error)
}
Expand Down Expand Up @@ -247,6 +249,20 @@ func NewDiskstatsCollector(logger *slog.Logger) (Collector, error) {
), valueType: prometheus.GaugeValue,
},
},
ioErrDesc: typedDesc{
desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "ioerr_total"),
"Number of IO commands that completed with an error.",
[]string{"device"},
nil,
), valueType: prometheus.CounterValue,
},
ioDoneDesc: typedDesc{
desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "iodone_total"),
"Number of completed or rejected IO commands.",
[]string{"device"},
nil,
), valueType: prometheus.CounterValue,
},
logger: logger,
}

Expand Down Expand Up @@ -368,6 +384,15 @@ func (c *diskstatsCollector) Update(ch chan<- prometheus.Metric) error {
}
}
}

ioDeviceStats, ioErr := c.fs.SysBlockDeviceIOStat(dev)
if ioErr == nil {
ch <- c.ioErrDesc.mustNewConstMetric(float64(ioDeviceStats.IOErrCount), dev)
ch <- c.ioDoneDesc.mustNewConstMetric(float64(ioDeviceStats.IODoneCount), dev)
} else if !os.IsNotExist(ioErr) {
c.logger.Debug("Failed to get block device io stats", "device", dev, "err", ioErr)
}

}
return nil
}
Expand Down
8 changes: 8 additions & 0 deletions collector/diskstats_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,14 @@ node_disk_io_time_weighted_seconds_total{device="sdb"} 67.07000000000001
node_disk_io_time_weighted_seconds_total{device="sdc"} 17.07
node_disk_io_time_weighted_seconds_total{device="sr0"} 0
node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06
# HELP node_disk_iodone_total Number of completed or rejected IO commands.
# TYPE node_disk_iodone_total counter
node_disk_iodone_total{device="sda"} 775
node_disk_iodone_total{device="sr0"} 1.29433517e+08
# HELP node_disk_ioerr_total Number of IO commands that completed with an error.
# TYPE node_disk_ioerr_total counter
node_disk_ioerr_total{device="sda"} 11
node_disk_ioerr_total{device="sr0"} 41
# HELP node_disk_read_bytes_total The total number of bytes read successfully.
# TYPE node_disk_read_bytes_total counter
node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11
Expand Down
159 changes: 159 additions & 0 deletions collector/ext4_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
// Copyright 2017 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !noext4
// +build !noext4

package collector

import (
"errors"
"fmt"
"log/slog"

"github.com/alecthomas/kingpin/v2"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/procfs/ext4"
)

const (
ext4DefaultIgnoredPartitions = "^features$"
)

var (
ext4PartitionExclude = kingpin.Flag(
"collector.ext4.partition-exclude",
"Regexp of ext4 partitions to exclude (mutually exclusive to partition-include).",
).Default(ext4DefaultIgnoredPartitions).String()

ext4PartitionInclude = kingpin.Flag(
"collector.ext4.partition-include",
"Regexp of ext4 partitions to include (mutually exclusive to partition-exclude).",
).String()
)

// An ext4Collector is a Collector which gathers metrics from ext4 filesystems.
type ext4Collector struct {
partitionFilter deviceFilter
fs ext4.FS
logger *slog.Logger
}

func init() {
registerCollector("ext4", defaultEnabled, NewExt4Collector)
}

func newExt4PartitionFilter(logger *slog.Logger) (deviceFilter, error) {
if *ext4PartitionExclude != "" && *ext4PartitionInclude != "" {
return deviceFilter{}, errors.New("partition-exclude & partition-include are mutually exclusive")
}

if *ext4PartitionExclude != "" {
logger.Info("Parsed flag --collector.ext4.partition-exclude", "flag", *ext4PartitionExclude)
}

if *ext4PartitionInclude != "" {
logger.Info("Parsed Flag --collector.ext4.partition-include", "flag", *ext4PartitionInclude)
}

return newDeviceFilter(*ext4PartitionExclude, *ext4PartitionInclude), nil
}

// NewExt4Collector returns a new Collector exposing ext4 statistics.
func NewExt4Collector(logger *slog.Logger) (Collector, error) {
ext4PartitionFilter, err := newExt4PartitionFilter(logger)
if err != nil {
return nil, fmt.Errorf("failed to parse partition filter flags: %w", err)
}

fs, err := ext4.NewFS(*procPath, *sysPath)
if err != nil {
return nil, fmt.Errorf("failed to open sysfs: %w", err)
}

return &ext4Collector{
partitionFilter: ext4PartitionFilter,
fs: fs,
logger: logger,
}, nil
}

// Update implements Collector.
func (c *ext4Collector) Update(ch chan<- prometheus.Metric) error {
stats, err := c.fs.ProcStat()
if err != nil {
return fmt.Errorf("failed to retrieve ext4 stats: %w", err)
}

for _, s := range stats {
if c.partitionFilter.ignored(s.Name) {
continue
}
c.updateExt4Stats(ch, s)
}

return nil
}

type ext4Metric struct {
name string
desc string
value float64
}

func (c *ext4Collector) getMetrics(s *ext4.Stats) []ext4Metric {
return []ext4Metric{
{
name: "errors_total",
desc: "Number of ext4 filesystem errors.",
value: float64(s.Errors),
},
{
name: "warnings_total",
desc: "Number of ext4 filesystem warnings.",
value: float64(s.Warnings),
},
{
name: "messages_total",
desc: "Number of ext4 filesystem log messages.",
value: float64(s.Messages),
},
}
}

// updateExt4Stats collects statistics for a single ext4 filesystem.
func (c *ext4Collector) updateExt4Stats(ch chan<- prometheus.Metric, s *ext4.Stats) {
const (
subsystem = "ext4"
)
var (
labels = []string{"partition"}
)

metrics := c.getMetrics(s)
for _, m := range metrics {
desc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, m.name),
m.desc,
labels,
nil,
)

ch <- prometheus.MustNewConstMetric(
desc,
prometheus.CounterValue,
m.value,
s.Name,
)
}
}
69 changes: 69 additions & 0 deletions collector/ext4_linux_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Copyright 2019 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !noext4

package collector

import (
"testing"

"github.com/prometheus/procfs"
"github.com/prometheus/procfs/ext4"
)

var expectedExt4Metrics = [][]ext4Metric{
{
{name: "errors_total", value: 12},
{name: "warnings_total", value: 34},
{name: "messages_total", value: 567},
},
}

func checkExt4Metric(exp, got *ext4Metric) bool {
if exp.name != got.name ||
exp.value != got.value {
return false
}
return true
}

func TestExt4(t *testing.T) {
fs, err := ext4.NewFS(procfs.DefaultMountPoint, "fixtures/sys")
if err != nil {
t.Fatal(err)
}
collector := &ext4Collector{fs: fs}

stats, err := collector.fs.ProcStat()
if err != nil {
t.Fatalf("Failed to retrieve ext4 stats: %v", err)
}
if len(stats) != len(expectedExt4Metrics) {
t.Fatalf("Unexpected number of ext4 stats: expected %v, got %v", len(expectedExt4Metrics), len(stats))
}

for i, s := range stats {
metrics := collector.getMetrics(s)
if len(metrics) != len(expectedExt4Metrics[i]) {
t.Fatalf("Unexpected number of ext4 metrics: expected %v, got %v", len(expectedExt4Metrics[i]), len(metrics))
}

for j, m := range metrics {
exp := expectedExt4Metrics[i][j]
if !checkExt4Metric(&exp, &m) {
t.Errorf("Incorrect ext4 metric: expected %#v, got: %#v", exp, m)
}
}
}
}
18 changes: 18 additions & 0 deletions collector/fixtures/e2e-64k-page-output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,14 @@ node_disk_io_time_weighted_seconds_total{device="sdb"} 67.07000000000001
node_disk_io_time_weighted_seconds_total{device="sdc"} 17.07
node_disk_io_time_weighted_seconds_total{device="sr0"} 0
node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06
# HELP node_disk_iodone_total Number of completed or rejected IO commands.
# TYPE node_disk_iodone_total counter
node_disk_iodone_total{device="sda"} 775
node_disk_iodone_total{device="sr0"} 1.29433517e+08
# HELP node_disk_ioerr_total Number of IO commands that completed with an error.
# TYPE node_disk_ioerr_total counter
node_disk_ioerr_total{device="sda"} 11
node_disk_ioerr_total{device="sr0"} 41
# HELP node_disk_read_bytes_total The total number of bytes read successfully.
# TYPE node_disk_read_bytes_total counter
node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11
Expand Down Expand Up @@ -762,6 +770,15 @@ node_entropy_available_bits 1337
node_entropy_pool_size_bits 4096
# HELP node_exporter_build_info A metric with a constant '1' value labeled by version, revision, branch, goversion from which node_exporter was built, and the goos and goarch for the build.
# TYPE node_exporter_build_info gauge
# HELP node_ext4_errors_total Number of ext4 filesystem errors.
# TYPE node_ext4_errors_total counter
node_ext4_errors_total{partition="sdb1"} 12
# HELP node_ext4_messages_total Number of ext4 filesystem log messages.
# TYPE node_ext4_messages_total counter
node_ext4_messages_total{partition="sdb1"} 567
# HELP node_ext4_warnings_total Number of ext4 filesystem warnings.
# TYPE node_ext4_warnings_total counter
node_ext4_warnings_total{partition="sdb1"} 34
# HELP node_fibrechannel_dumped_frames_total Number of dumped frames
# TYPE node_fibrechannel_dumped_frames_total counter
node_fibrechannel_dumped_frames_total{fc_host="host1"} 0
Expand Down Expand Up @@ -3062,6 +3079,7 @@ node_scrape_collector_success{collector="dmi"} 1
node_scrape_collector_success{collector="drbd"} 1
node_scrape_collector_success{collector="edac"} 1
node_scrape_collector_success{collector="entropy"} 1
node_scrape_collector_success{collector="ext4"} 1
node_scrape_collector_success{collector="fibrechannel"} 1
node_scrape_collector_success{collector="filefd"} 1
node_scrape_collector_success{collector="hwmon"} 1
Expand Down
18 changes: 18 additions & 0 deletions collector/fixtures/e2e-output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,14 @@ node_disk_io_time_weighted_seconds_total{device="sdb"} 67.07000000000001
node_disk_io_time_weighted_seconds_total{device="sdc"} 17.07
node_disk_io_time_weighted_seconds_total{device="sr0"} 0
node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06
# HELP node_disk_iodone_total Number of completed or rejected IO commands.
# TYPE node_disk_iodone_total counter
node_disk_iodone_total{device="sda"} 775
node_disk_iodone_total{device="sr0"} 1.29433517e+08
# HELP node_disk_ioerr_total Number of IO commands that completed with an error.
# TYPE node_disk_ioerr_total counter
node_disk_ioerr_total{device="sda"} 11
node_disk_ioerr_total{device="sr0"} 41
# HELP node_disk_read_bytes_total The total number of bytes read successfully.
# TYPE node_disk_read_bytes_total counter
node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11
Expand Down Expand Up @@ -794,6 +802,15 @@ node_entropy_available_bits 1337
node_entropy_pool_size_bits 4096
# HELP node_exporter_build_info A metric with a constant '1' value labeled by version, revision, branch, goversion from which node_exporter was built, and the goos and goarch for the build.
# TYPE node_exporter_build_info gauge
# HELP node_ext4_errors_total Number of ext4 filesystem errors.
# TYPE node_ext4_errors_total counter
node_ext4_errors_total{partition="sdb1"} 12
# HELP node_ext4_messages_total Number of ext4 filesystem log messages.
# TYPE node_ext4_messages_total counter
node_ext4_messages_total{partition="sdb1"} 567
# HELP node_ext4_warnings_total Number of ext4 filesystem warnings.
# TYPE node_ext4_warnings_total counter
node_ext4_warnings_total{partition="sdb1"} 34
# HELP node_fibrechannel_dumped_frames_total Number of dumped frames
# TYPE node_fibrechannel_dumped_frames_total counter
node_fibrechannel_dumped_frames_total{fc_host="host1"} 0
Expand Down Expand Up @@ -3094,6 +3111,7 @@ node_scrape_collector_success{collector="dmi"} 1
node_scrape_collector_success{collector="drbd"} 1
node_scrape_collector_success{collector="edac"} 1
node_scrape_collector_success{collector="entropy"} 1
node_scrape_collector_success{collector="ext4"} 1
node_scrape_collector_success{collector="fibrechannel"} 1
node_scrape_collector_success{collector="filefd"} 1
node_scrape_collector_success{collector="hwmon"} 1
Expand Down
Loading