Add a collector for ZFS, currently focussed on ARC stats.

It is tested on FreeBSD 10.2-RELEASE and Linux (ZFS on Linux 0.6.5.4).

On FreeBSD, Solaris, etc. ZFS metrics are exposed through sysctls.
ZFS on Linux exposes the same metrics through procfs `/proc/spl/...`.

In addition to sysctl metrics, 'computed metrics' are exposed by
the collector, which are based on several sysctl values.
There is some conditional logic involved in computing these metrics
which cannot be easily mapped to PromQL.

Not all 92 ARC sysctls are exposed right now but this can be changed
with one additional LOC each.
This commit is contained in:
Christian Schwarz 2016-02-25 13:55:02 +01:00 committed by Joe Handzik
parent dde59014b8
commit f29f3873ea
10 changed files with 678 additions and 0 deletions

View file

@ -0,0 +1,93 @@
6 1 0x01 91 4368 5266997922 97951858082072
name type data
hits 4 8772612
misses 4 604635
demand_data_hits 4 7221032
demand_data_misses 4 73300
demand_metadata_hits 4 1464353
demand_metadata_misses 4 498170
prefetch_data_hits 4 3615
prefetch_data_misses 4 17094
prefetch_metadata_hits 4 83612
prefetch_metadata_misses 4 16071
mru_hits 4 855535
mru_ghost_hits 4 21100
mfu_hits 4 7829854
mfu_ghost_hits 4 821
deleted 4 60403
mutex_miss 4 2
evict_skip 4 2265729
evict_not_enough 4 680
evict_l2_cached 4 0
evict_l2_eligible 4 8992514560
evict_l2_ineligible 4 992552448
evict_l2_skip 4 0
hash_elements 4 42359
hash_elements_max 4 88245
hash_collisions 4 50564
hash_chains 4 412
hash_chain_max 4 3
p 4 516395305
c 4 1643208777
c_min 4 33554432
c_max 4 8367976448
size 4 1603939792
hdr_size 4 16361080
data_size 4 1295836160
metadata_size 4 175298560
other_size 4 116443992
anon_size 4 1917440
anon_evictable_data 4 0
anon_evictable_metadata 4 0
mru_size 4 402593792
mru_evictable_data 4 278091264
mru_evictable_metadata 4 18606592
mru_ghost_size 4 999728128
mru_ghost_evictable_data 4 883765248
mru_ghost_evictable_metadata 4 115962880
mfu_size 4 1066623488
mfu_evictable_data 4 1017613824
mfu_evictable_metadata 4 9163776
mfu_ghost_size 4 104936448
mfu_ghost_evictable_data 4 96731136
mfu_ghost_evictable_metadata 4 8205312
l2_hits 4 0
l2_misses 4 0
l2_feeds 4 0
l2_rw_clash 4 0
l2_read_bytes 4 0
l2_write_bytes 4 0
l2_writes_sent 4 0
l2_writes_done 4 0
l2_writes_error 4 0
l2_writes_lock_retry 4 0
l2_evict_lock_retry 4 0
l2_evict_reading 4 0
l2_evict_l1cached 4 0
l2_free_on_write 4 0
l2_cdata_free_on_write 4 0
l2_abort_lowmem 4 0
l2_cksum_bad 4 0
l2_io_error 4 0
l2_size 4 0
l2_asize 4 0
l2_hdr_size 4 0
l2_compress_successes 4 0
l2_compress_zeros 4 0
l2_compress_failures 4 0
memory_throttle_count 4 0
duplicate_buffers 4 0
duplicate_buffers_size 4 0
duplicate_reads 4 0
memory_direct_count 4 542
memory_indirect_count 4 3006
arc_no_grow 4 0
arc_tempreserve 4 0
arc_loaned_bytes 4 0
arc_prune 4 0
arc_meta_used 4 308103632
arc_meta_limit 4 6275982336
arc_meta_max 4 449286096
arc_meta_min 4 16777216
arc_need_free 4 0
arc_sys_free 4 261496832

View file

@ -0,0 +1,78 @@
kstat.zfs.misc.arcstats.arc_meta_max: 1503210048
kstat.zfs.misc.arcstats.arc_meta_limit: 393216000
kstat.zfs.misc.arcstats.arc_meta_used: 392649848
kstat.zfs.misc.arcstats.duplicate_reads: 0
kstat.zfs.misc.arcstats.duplicate_buffers_size: 0
kstat.zfs.misc.arcstats.duplicate_buffers: 0
kstat.zfs.misc.arcstats.memory_throttle_count: 0
kstat.zfs.misc.arcstats.l2_write_buffer_list_null_iter: 0
kstat.zfs.misc.arcstats.l2_write_buffer_list_iter: 0
kstat.zfs.misc.arcstats.l2_write_buffer_bytes_scanned: 0
kstat.zfs.misc.arcstats.l2_write_pios: 0
kstat.zfs.misc.arcstats.l2_write_buffer_iter: 0
kstat.zfs.misc.arcstats.l2_write_full: 0
kstat.zfs.misc.arcstats.l2_write_not_cacheable: 29425
kstat.zfs.misc.arcstats.l2_write_io_in_progress: 0
kstat.zfs.misc.arcstats.l2_write_in_l2: 0
kstat.zfs.misc.arcstats.l2_write_spa_mismatch: 0
kstat.zfs.misc.arcstats.l2_write_passed_headroom: 0
kstat.zfs.misc.arcstats.l2_write_trylock_fail: 0
kstat.zfs.misc.arcstats.l2_compress_failures: 0
kstat.zfs.misc.arcstats.l2_compress_zeros: 0
kstat.zfs.misc.arcstats.l2_compress_successes: 0
kstat.zfs.misc.arcstats.l2_hdr_size: 0
kstat.zfs.misc.arcstats.l2_asize: 0
kstat.zfs.misc.arcstats.l2_size: 0
kstat.zfs.misc.arcstats.l2_io_error: 0
kstat.zfs.misc.arcstats.l2_cksum_bad: 0
kstat.zfs.misc.arcstats.l2_abort_lowmem: 0
kstat.zfs.misc.arcstats.l2_cdata_free_on_write: 0
kstat.zfs.misc.arcstats.l2_free_on_write: 0
kstat.zfs.misc.arcstats.l2_evict_reading: 0
kstat.zfs.misc.arcstats.l2_evict_lock_retry: 0
kstat.zfs.misc.arcstats.l2_writes_hdr_miss: 0
kstat.zfs.misc.arcstats.l2_writes_error: 0
kstat.zfs.misc.arcstats.l2_writes_done: 0
kstat.zfs.misc.arcstats.l2_writes_sent: 0
kstat.zfs.misc.arcstats.l2_write_bytes: 0
kstat.zfs.misc.arcstats.l2_read_bytes: 0
kstat.zfs.misc.arcstats.l2_rw_clash: 0
kstat.zfs.misc.arcstats.l2_feeds: 0
kstat.zfs.misc.arcstats.l2_misses: 0
kstat.zfs.misc.arcstats.l2_hits: 0
kstat.zfs.misc.arcstats.other_size: 166832272
kstat.zfs.misc.arcstats.data_size: 1200779776
kstat.zfs.misc.arcstats.hdr_size: 27244008
kstat.zfs.misc.arcstats.size: 1394856056
kstat.zfs.misc.arcstats.c_max: 1572864000
kstat.zfs.misc.arcstats.c_min: 196608000
kstat.zfs.misc.arcstats.c: 1470553736
kstat.zfs.misc.arcstats.p: 665524427
kstat.zfs.misc.arcstats.hash_chain_max: 7
kstat.zfs.misc.arcstats.hash_chains: 14180
kstat.zfs.misc.arcstats.hash_collisions: 2180398
kstat.zfs.misc.arcstats.hash_elements_max: 238188
kstat.zfs.misc.arcstats.hash_elements: 111458
kstat.zfs.misc.arcstats.evict_l2_ineligible: 60262400
kstat.zfs.misc.arcstats.evict_l2_eligible: 35702978560
kstat.zfs.misc.arcstats.evict_l2_cached: 0
kstat.zfs.misc.arcstats.evict_skip: 21716568
kstat.zfs.misc.arcstats.mutex_miss: 873
kstat.zfs.misc.arcstats.recycle_miss: 5018771
kstat.zfs.misc.arcstats.stolen: 1327563
kstat.zfs.misc.arcstats.deleted: 1187256
kstat.zfs.misc.arcstats.allocated: 10150518
kstat.zfs.misc.arcstats.mfu_ghost_hits: 1408986
kstat.zfs.misc.arcstats.mfu_hits: 51952454
kstat.zfs.misc.arcstats.mru_ghost_hits: 696819
kstat.zfs.misc.arcstats.mru_hits: 11115835
kstat.zfs.misc.arcstats.prefetch_metadata_misses: 32
kstat.zfs.misc.arcstats.prefetch_metadata_hits: 2
kstat.zfs.misc.arcstats.prefetch_data_misses: 0
kstat.zfs.misc.arcstats.prefetch_data_hits: 0
kstat.zfs.misc.arcstats.demand_metadata_misses: 9231542
kstat.zfs.misc.arcstats.demand_metadata_hits: 40650947
kstat.zfs.misc.arcstats.demand_data_misses: 75230
kstat.zfs.misc.arcstats.demand_data_hits: 22417340
kstat.zfs.misc.arcstats.misses: 9306804
kstat.zfs.misc.arcstats.hits: 63068289

View file

@ -0,0 +1,12 @@
trout size 4294967296 -
trout free 1040117248 -
trout allocated 70144 -
trout capacity 0% -
trout dedupratio 1.00x -
trout fragmentation 0% -
zroot size 118111600640 -
zroot free 3990917120 -
zroot allocated 114120683520 -
zroot capacity 50% -
zroot dedupratio 1.00x -
zroot fragmentation 67% -

109
collector/zfs.go Normal file
View file

@ -0,0 +1,109 @@
package collector
// +build linux freebsd
// +build !nozfs
import (
"errors"
"strings"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
)
type zfsMetricValue int
const zfsErrorValue = zfsMetricValue(-1)
var zfsNotAvailableError = errors.New("ZFS / ZFS statistics are not available")
type zfsSysctl string
type zfsSubsystemName string
const (
arc = zfsSubsystemName("zfsArc")
zpoolSubsystem = zfsSubsystemName("zfsPool")
)
// Metrics
type zfsMetric struct {
subsystem zfsSubsystemName // The Prometheus subsystem name.
name string // The Prometheus name of the metric.
sysctl zfsSysctl // The sysctl of the ZFS metric.
}
type datasetMetric struct {
subsystem zfsSubsystemName
name string
}
// Collector
func init() {
Factories["zfs"] = NewZFSCollector
}
type zfsCollector struct {
zfsMetrics []zfsMetric
}
func NewZFSCollector() (Collector, error) {
return &zfsCollector{}, nil
}
func (c *zfsCollector) Update(ch chan<- prometheus.Metric) (err error) {
err = c.zfsAvailable()
switch {
case err == zfsNotAvailableError:
log.Debug(err)
return nil
case err != nil:
return err
}
// Arcstats
err = c.updateArcstats(ch)
if err != nil {
return err
}
// Pool stats
return c.updatePoolStats(ch)
}
func (s zfsSysctl) metricName() string {
parts := strings.Split(string(s), ".")
return parts[len(parts)-1]
}
func (c *zfsCollector) ConstSysctlMetric(subsystem zfsSubsystemName, sysctl zfsSysctl, value zfsMetricValue) prometheus.Metric {
metricName := sysctl.metricName()
return prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName(Namespace, string(subsystem), metricName),
string(sysctl),
nil,
nil,
),
prometheus.UntypedValue,
float64(value),
)
}
func (c *zfsCollector) ConstZpoolMetric(pool, name string, value float64) prometheus.Metric {
return prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName(Namespace, string(zpoolSubsystem), name),
name,
[]string{"pool"},
nil,
),
prometheus.UntypedValue,
float64(value),
pool,
)
}

110
collector/zfs_freebsd.go Normal file
View file

@ -0,0 +1,110 @@
package collector
import (
"bufio"
"io"
"os/exec"
"strconv"
"strings"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
)
/*
#cgo LDFLAGS:
#include <sys/param.h>
#include <sys/module.h>
int zfsModuleLoaded() {
int modid = modfind("zfs");
return modid < 0 ? 0 : -1;
}
*/
import "C"
func (c *zfsCollector) zfsAvailable() error {
if C.zfsModuleLoaded() == 0 {
return zfsNotAvailableError
}
return nil
}
const zfsArcstatsSysctl = "kstat.zfs.misc.arcstats"
func (c *zfsCollector) RunOnStdout(cmd *exec.Cmd, handler func(io.Reader) error) (err error) {
stdout, err := cmd.StdoutPipe()
if err != nil {
return
}
if err = cmd.Start(); err != nil {
return
}
err = handler(stdout)
if err != nil {
return
}
return cmd.Wait()
}
func (c *zfsCollector) updateArcstats(ch chan<- prometheus.Metric) (err error) {
cmd := exec.Command("sysctl", zfsArcstatsSysctl)
err = c.RunOnStdout(cmd, func(stdout io.Reader) error {
return c.parseArcstatsSysctlOutput(stdout, func(sysctl zfsSysctl, value zfsMetricValue) {
ch <- c.ConstSysctlMetric(arc, sysctl, zfsMetricValue(value))
})
})
return err
}
func (c *zfsCollector) parseArcstatsSysctlOutput(reader io.Reader, handler func(zfsSysctl, zfsMetricValue)) (err error) {
// Decode values
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
fields := strings.Fields(scanner.Text())
if len(fields) != 2 ||
!strings.HasPrefix(fields[0], zfsArcstatsSysctl) ||
!strings.HasSuffix(fields[0], ":") {
log.Debugf("Skipping line of unknown format: %q", scanner.Text())
continue
}
sysctl := zfsSysctl(strings.TrimSuffix(fields[0], ":"))
value, err := strconv.Atoi(fields[1])
if err != nil {
return err
}
handler(sysctl, zfsMetricValue(value))
}
return scanner.Err()
}
func (c *zfsCollector) updatePoolStats(ch chan<- prometheus.Metric) (err error) {
poolProperties := []string{"size", "free", "allocated", "capacity", "dedupratio", "fragmentation"}
cmd := exec.Command("zpool", "get", "-pH", strings.Join(poolProperties, ","))
err = c.RunOnStdout(cmd, func(stdout io.Reader) error {
return c.parseZpoolOutput(stdout, func(pool, name string, value float64) {
ch <- c.ConstZpoolMetric(pool, name, value)
})
})
return err
}

View file

@ -0,0 +1,44 @@
package collector
import (
"os"
"testing"
)
func TestArcstatsParsing(t *testing.T) {
arcstatsOutput, err := os.Open("fixtures/sysctl/freebsd/kstat.zfs.misc.arcstats.txt")
if err != nil {
t.Fatal(err)
}
defer arcstatsOutput.Close()
c := zfsCollector{}
if err != nil {
t.Fatal(err)
}
handlerCalled := false
err = c.parseArcstatsSysctlOutput(arcstatsOutput, func(s zfsSysctl, v zfsMetricValue) {
if s != zfsSysctl("kstat.zfs.misc.arcstats.hits") {
return
}
handlerCalled = true
if v != zfsMetricValue(63068289) {
t.Fatalf("Incorrect value parsed from sysctl output")
}
})
if err != nil {
t.Fatal(err)
}
if !handlerCalled {
t.Fatal("Arcstats parsing handler was not called for some expected sysctls")
}
}

88
collector/zfs_linux.go Normal file
View file

@ -0,0 +1,88 @@
package collector
import (
"bufio"
"errors"
"fmt"
"io"
"os"
"strconv"
"strings"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
)
const (
zfsArcstatsProcpath = "spl/kstat/zfs/arcstats"
)
func (c *zfsCollector) zfsAvailable() (err error) {
file, err := c.openArcstatsFile()
if err != nil {
file.Close()
}
return err
}
func (c *zfsCollector) openArcstatsFile() (file *os.File, err error) {
file, err = os.Open(procFilePath(zfsArcstatsProcpath))
if err != nil {
log.Debugf("Cannot open %q for reading. Is the kernel module loaded?", procFilePath(zfsArcstatsProcpath))
err = zfsNotAvailableError
}
return
}
func (c *zfsCollector) updateArcstats(ch chan<- prometheus.Metric) (err error) {
file, err := c.openArcstatsFile()
if err != nil {
return err
}
defer file.Close()
return c.parseArcstatsProcfsFile(file, func(s zfsSysctl, v zfsMetricValue) {
ch <- c.ConstSysctlMetric(arc, s, v)
})
}
func (c *zfsCollector) parseArcstatsProcfsFile(reader io.Reader, handler func(zfsSysctl, zfsMetricValue)) (err error) {
scanner := bufio.NewScanner(reader)
parseLine := false
for scanner.Scan() {
parts := strings.Fields(scanner.Text())
if !parseLine && len(parts) == 3 && parts[0] == "name" && parts[1] == "type" && parts[2] == "data" {
// Start parsing from here.
parseLine = true
continue
}
if !parseLine || len(parts) < 3 {
continue
}
key := fmt.Sprintf("kstat.zfs.misc.arcstats.%s", parts[0])
value, err := strconv.Atoi(parts[2])
if err != nil {
return fmt.Errorf("could not parse expected integer value for %q", key)
}
handler(zfsSysctl(key), zfsMetricValue(value))
}
if !parseLine {
return errors.New("did not parse a single arcstat metric")
}
return scanner.Err()
}
func (c *zfsCollector) updatePoolStats(ch chan<- prometheus.Metric) (err error) {
return nil
}

View file

@ -0,0 +1,44 @@
package collector
import (
"os"
"testing"
)
func TestArcstatsParsing(t *testing.T) {
arcstatsFile, err := os.Open("fixtures/proc/spl/kstat/zfs/arcstats")
if err != nil {
t.Fatal(err)
}
defer arcstatsFile.Close()
c := zfsCollector{}
if err != nil {
t.Fatal(err)
}
handlerCalled := false
err = c.parseArcstatsProcfsFile(arcstatsFile, func(s zfsSysctl, v zfsMetricValue) {
if s != zfsSysctl("kstat.zfs.misc.arcstats.hits") {
return
}
handlerCalled = true
if v != zfsMetricValue(8772612) {
t.Fatalf("Incorrect value parsed from procfs data")
}
})
if err != nil {
t.Fatal(err)
}
if !handlerCalled {
t.Fatal("Arcstats parsing handler was not called for some expected sysctls")
}
}

41
collector/zfs_zpool.go Normal file
View file

@ -0,0 +1,41 @@
package collector
import (
"bufio"
"fmt"
"io"
"strconv"
"strings"
)
// zpool metrics
func (c *zfsCollector) parseZpoolOutput(reader io.Reader, handler func(string, string, float64)) (err error) {
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
fields := strings.Fields(scanner.Text())
if len(fields) != 4 {
return fmt.Errorf("Unexpected output of zpool command")
}
valueString := fields[2]
switch {
case strings.HasSuffix(fields[2], "%"):
percentage := strings.TrimSuffix(fields[2], "%")
valueString = "0." + percentage
case strings.HasSuffix(fields[2], "x"):
valueString = strings.TrimSuffix(fields[2], "x")
}
value, err := strconv.ParseFloat(valueString, 64)
if err != nil {
return err
}
handler(fields[0], fields[1], value)
}
return scanner.Err()
}

View file

@ -0,0 +1,59 @@
package collector
import (
"os"
"testing"
)
func TestZpoolParsing(t *testing.T) {
zpoolOutput, err := os.Open("fixtures/zfs/zpool_stats_stdout.txt")
if err != nil {
t.Fatal(err)
}
defer zpoolOutput.Close()
c := zfsCollector{}
if err != nil {
t.Fatal(err)
}
pools := make([]string, 2)
troutSize := float64(-1)
troutDedupratio := float64(-1)
zrootCapacity := float64(-1)
err = c.parseZpoolOutput(zpoolOutput, func(pool, name string, value float64) {
pools = append(pools, pool)
if pool == "trout" && name == "size" {
troutSize = value
}
if pool == "trout" && name == "dedupratio" {
troutDedupratio = value
}
if pool == "zroot" && name == "capacity" {
zrootCapacity = value
}
})
if err != nil {
t.Fatal(err)
}
if pools[0] == "trout" && pools[1] == "zroot" {
t.Fatal("Did not parse all pools in fixture")
}
if troutSize != float64(4294967296) {
t.Fatal("Unexpected value for pool 'trout's size value")
}
if troutDedupratio != float64(1.0) {
t.Fatal("Unexpected value for pool 'trout's dedupratio value")
}
if zrootCapacity != float64(0.5) {
t.Fatal("Unexpected value for pool 'zroot's capacity value")
}
}