diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d819fc3..b4d6411c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ * [ENHANCEMENT] Add node_softirqs_total metric #2221 * [ENHANCEMENT] Add device filter flags to arp collector #2254 * [ENHANCEMENT] Add rapl zone name label option #2401 +* [ENHANCEMENT] Add slabinfo collector #1799 * [BUGFIX] Sanitize rapl zone names #2299 ## 1.3.1 / 2021-12-01 diff --git a/README.md b/README.md index 2fd364ed..0c2a29cd 100644 --- a/README.md +++ b/README.md @@ -210,6 +210,7 @@ perf | Exposes perf based metrics (Warning: Metrics are dependent on kernel conf processes | Exposes aggregate process statistics from `/proc`. | Linux qdisc | Exposes [queuing discipline](https://en.wikipedia.org/wiki/Network_scheduler#Linux_kernel) statistics | Linux runit | Exposes service status from [runit](http://smarden.org/runit/). | _any_ +slabinfo | Exposes slab statistics from `/proc/slabinfo`. Note that permission of `/proc/slabinfo` is usually 0400, so set it appropriately. | Linux supervisord | Exposes service status from [supervisord](http://supervisord.org/). | _any_ systemd | Exposes service and system status from [systemd](http://www.freedesktop.org/wiki/Software/systemd/). | Linux tcpstat | Exposes TCP connection status information from `/proc/net/tcp` and `/proc/net/tcp6`. (Warning: the current version has potential performance issues in high load situations.) | Linux diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index 4477c970..0e7e0343 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -3013,6 +3013,7 @@ node_scrape_collector_success{collector="qdisc"} 1 node_scrape_collector_success{collector="rapl"} 1 node_scrape_collector_success{collector="schedstat"} 1 node_scrape_collector_success{collector="selinux"} 1 +node_scrape_collector_success{collector="slabinfo"} 1 node_scrape_collector_success{collector="sockstat"} 1 node_scrape_collector_success{collector="softnet"} 1 node_scrape_collector_success{collector="stat"} 1 @@ -3029,6 +3030,36 @@ node_scrape_collector_success{collector="zoneinfo"} 1 # HELP node_selinux_enabled SELinux is enabled, 1 is true, 0 is false # TYPE node_selinux_enabled gauge node_selinux_enabled 0 +# HELP node_slabinfo_active_objects The number of objects that are currently active (i.e., in use). +# TYPE node_slabinfo_active_objects gauge +node_slabinfo_active_objects{slab="dmaengine-unmap-128"} 1206 +node_slabinfo_active_objects{slab="kmalloc-8192"} 132 +node_slabinfo_active_objects{slab="kmem_cache"} 320 +node_slabinfo_active_objects{slab="tw_sock_TCP"} 704 +# HELP node_slabinfo_object_size_bytes The size of objects in this slab, in bytes. +# TYPE node_slabinfo_object_size_bytes gauge +node_slabinfo_object_size_bytes{slab="dmaengine-unmap-128"} 1088 +node_slabinfo_object_size_bytes{slab="kmalloc-8192"} 8192 +node_slabinfo_object_size_bytes{slab="kmem_cache"} 256 +node_slabinfo_object_size_bytes{slab="tw_sock_TCP"} 256 +# HELP node_slabinfo_objects The total number of allocated objects (i.e., objects that are both in use and not in use). +# TYPE node_slabinfo_objects gauge +node_slabinfo_objects{slab="dmaengine-unmap-128"} 1320 +node_slabinfo_objects{slab="kmalloc-8192"} 148 +node_slabinfo_objects{slab="kmem_cache"} 320 +node_slabinfo_objects{slab="tw_sock_TCP"} 864 +# HELP node_slabinfo_objects_per_slab The number of objects stored in each slab. +# TYPE node_slabinfo_objects_per_slab gauge +node_slabinfo_objects_per_slab{slab="dmaengine-unmap-128"} 30 +node_slabinfo_objects_per_slab{slab="kmalloc-8192"} 4 +node_slabinfo_objects_per_slab{slab="kmem_cache"} 32 +node_slabinfo_objects_per_slab{slab="tw_sock_TCP"} 32 +# HELP node_slabinfo_pages_per_slab The number of pages allocated for each slab. +# TYPE node_slabinfo_pages_per_slab gauge +node_slabinfo_pages_per_slab{slab="dmaengine-unmap-128"} 8 +node_slabinfo_pages_per_slab{slab="kmalloc-8192"} 8 +node_slabinfo_pages_per_slab{slab="kmem_cache"} 2 +node_slabinfo_pages_per_slab{slab="tw_sock_TCP"} 2 # HELP node_sockstat_FRAG6_inuse Number of FRAG6 sockets in state inuse. # TYPE node_sockstat_FRAG6_inuse gauge node_sockstat_FRAG6_inuse 0 diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 73d4bfc9..7fe83642 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -3035,6 +3035,7 @@ node_scrape_collector_success{collector="qdisc"} 1 node_scrape_collector_success{collector="rapl"} 1 node_scrape_collector_success{collector="schedstat"} 1 node_scrape_collector_success{collector="selinux"} 1 +node_scrape_collector_success{collector="slabinfo"} 1 node_scrape_collector_success{collector="sockstat"} 1 node_scrape_collector_success{collector="softnet"} 1 node_scrape_collector_success{collector="stat"} 1 @@ -3051,6 +3052,36 @@ node_scrape_collector_success{collector="zoneinfo"} 1 # HELP node_selinux_enabled SELinux is enabled, 1 is true, 0 is false # TYPE node_selinux_enabled gauge node_selinux_enabled 0 +# HELP node_slabinfo_active_objects The number of objects that are currently active (i.e., in use). +# TYPE node_slabinfo_active_objects gauge +node_slabinfo_active_objects{slab="dmaengine-unmap-128"} 1206 +node_slabinfo_active_objects{slab="kmalloc-8192"} 132 +node_slabinfo_active_objects{slab="kmem_cache"} 320 +node_slabinfo_active_objects{slab="tw_sock_TCP"} 704 +# HELP node_slabinfo_object_size_bytes The size of objects in this slab, in bytes. +# TYPE node_slabinfo_object_size_bytes gauge +node_slabinfo_object_size_bytes{slab="dmaengine-unmap-128"} 1088 +node_slabinfo_object_size_bytes{slab="kmalloc-8192"} 8192 +node_slabinfo_object_size_bytes{slab="kmem_cache"} 256 +node_slabinfo_object_size_bytes{slab="tw_sock_TCP"} 256 +# HELP node_slabinfo_objects The total number of allocated objects (i.e., objects that are both in use and not in use). +# TYPE node_slabinfo_objects gauge +node_slabinfo_objects{slab="dmaengine-unmap-128"} 1320 +node_slabinfo_objects{slab="kmalloc-8192"} 148 +node_slabinfo_objects{slab="kmem_cache"} 320 +node_slabinfo_objects{slab="tw_sock_TCP"} 864 +# HELP node_slabinfo_objects_per_slab The number of objects stored in each slab. +# TYPE node_slabinfo_objects_per_slab gauge +node_slabinfo_objects_per_slab{slab="dmaengine-unmap-128"} 30 +node_slabinfo_objects_per_slab{slab="kmalloc-8192"} 4 +node_slabinfo_objects_per_slab{slab="kmem_cache"} 32 +node_slabinfo_objects_per_slab{slab="tw_sock_TCP"} 32 +# HELP node_slabinfo_pages_per_slab The number of pages allocated for each slab. +# TYPE node_slabinfo_pages_per_slab gauge +node_slabinfo_pages_per_slab{slab="dmaengine-unmap-128"} 8 +node_slabinfo_pages_per_slab{slab="kmalloc-8192"} 8 +node_slabinfo_pages_per_slab{slab="kmem_cache"} 2 +node_slabinfo_pages_per_slab{slab="tw_sock_TCP"} 2 # HELP node_sockstat_FRAG6_inuse Number of FRAG6 sockets in state inuse. # TYPE node_sockstat_FRAG6_inuse gauge node_sockstat_FRAG6_inuse 0 diff --git a/collector/fixtures/proc/slabinfo b/collector/fixtures/proc/slabinfo new file mode 100644 index 00000000..8f2de4ba --- /dev/null +++ b/collector/fixtures/proc/slabinfo @@ -0,0 +1,6 @@ +slabinfo - version: 2.1 +# name : tunables : slabdata +tw_sock_TCP 704 864 256 32 2 : tunables 0 0 0 : slabdata 27 27 0 +dmaengine-unmap-128 1206 1320 1088 30 8 : tunables 0 0 0 : slabdata 44 44 0 +kmalloc-8192 132 148 8192 4 8 : tunables 0 0 0 : slabdata 37 37 0 +kmem_cache 320 320 256 32 2 : tunables 0 0 0 : slabdata 10 10 0 diff --git a/collector/slabinfo_linux.go b/collector/slabinfo_linux.go new file mode 100644 index 00000000..8c032cbb --- /dev/null +++ b/collector/slabinfo_linux.go @@ -0,0 +1,121 @@ +// Copyright 2022 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build linux && !noslabinfo +// +build linux,!noslabinfo + +package collector + +import ( + "fmt" + + "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/procfs" +) + +type slabinfoCollector struct { + fs procfs.FS + logger log.Logger + subsystem string + labels []string +} + +func init() { + registerCollector("slabinfo", defaultDisabled, NewSlabinfoCollector) +} + +func NewSlabinfoCollector(logger log.Logger) (Collector, error) { + fs, err := procfs.NewFS(*procPath) + if err != nil { + return nil, fmt.Errorf("failed to open procfs: %w", err) + } + + return &slabinfoCollector{logger: logger, + fs: fs, + subsystem: "slabinfo", + labels: []string{"slab"}, + }, nil +} + +func (c *slabinfoCollector) Update(ch chan<- prometheus.Metric) error { + slabinfo, err := c.fs.SlabInfo() + if err != nil { + return fmt.Errorf("couldn't get %s: %w", c.subsystem, err) + } + + for _, slab := range slabinfo.Slabs { + ch <- c.activeObjects(slab.Name, slab.ObjActive) + ch <- c.objects(slab.Name, slab.ObjNum) + ch <- c.objectSizeBytes(slab.Name, slab.ObjSize) + ch <- c.objectsPerSlab(slab.Name, slab.ObjPerSlab) + ch <- c.pagesPerSlab(slab.Name, slab.PagesPerSlab) + } + + return nil +} + +func (c *slabinfoCollector) activeObjects(label string, val int64) prometheus.Metric { + desc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, c.subsystem, "active_objects"), + "The number of objects that are currently active (i.e., in use).", + c.labels, nil) + + return prometheus.MustNewConstMetric( + desc, prometheus.GaugeValue, float64(val), label, + ) +} + +func (c *slabinfoCollector) objects(label string, val int64) prometheus.Metric { + desc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, c.subsystem, "objects"), + "The total number of allocated objects (i.e., objects that are both in use and not in use).", + c.labels, nil) + + return prometheus.MustNewConstMetric( + desc, prometheus.GaugeValue, float64(val), label, + ) +} + +func (c *slabinfoCollector) objectSizeBytes(label string, val int64) prometheus.Metric { + desc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, c.subsystem, "object_size_bytes"), + "The size of objects in this slab, in bytes.", + c.labels, nil) + + return prometheus.MustNewConstMetric( + desc, prometheus.GaugeValue, float64(val), label, + ) +} + +func (c *slabinfoCollector) objectsPerSlab(label string, val int64) prometheus.Metric { + desc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, c.subsystem, "objects_per_slab"), + "The number of objects stored in each slab.", + c.labels, nil) + + return prometheus.MustNewConstMetric( + desc, prometheus.GaugeValue, float64(val), label, + ) +} + +func (c *slabinfoCollector) pagesPerSlab(label string, val int64) prometheus.Metric { + desc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, c.subsystem, "pages_per_slab"), + "The number of pages allocated for each slab.", + c.labels, nil) + + return prometheus.MustNewConstMetric( + desc, prometheus.GaugeValue, float64(val), label, + ) +} diff --git a/end-to-end-test.sh b/end-to-end-test.sh index ca677759..c1e610bb 100755 --- a/end-to-end-test.sh +++ b/end-to-end-test.sh @@ -38,6 +38,7 @@ enabled_collectors=$(cat << COLLECTORS rapl schedstat selinux + slabinfo sockstat stat thermal_zone