From 6269f7502a80558da5c6ea2f40ef0c0261664321 Mon Sep 17 00:00:00 2001 From: Ed Schouten Date: Mon, 5 Dec 2016 11:37:35 +0100 Subject: [PATCH] Add a collector for DRBD. This collector exposes most of the useful information that can be found in /proc/drbd. Sizes are normalised to be in bytes, as /proc/drbd uses kibibytes. --- README.md | 1 + collector/drbd_linux.go | 211 ++++++++++++++++++++++++++++++ collector/fixtures/e2e-output.txt | 47 +++++++ collector/fixtures/proc/drbd | 5 + end-to-end-test.sh | 1 + 5 files changed, 265 insertions(+) create mode 100644 collector/drbd_linux.go create mode 100644 collector/fixtures/proc/drbd diff --git a/README.md b/README.md index 507851c1..69b5d8f4 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,7 @@ Name | Description | OS ---------|-------------|---- bonding | Exposes the number of configured and active slaves of Linux bonding interfaces. | Linux devstat | Exposes device statistics | Dragonfly, FreeBSD +drbd | Exposes Distributed Replicated Block Device statistics | Linux gmond | Exposes statistics from Ganglia. | _any_ interrupts | Exposes detailed interrupts statistics. | Linux, OpenBSD ipvs | Exposes IPVS status from `/proc/net/ip_vs` and stats from `/proc/net/ip_vs_stats`. | Linux diff --git a/collector/drbd_linux.go b/collector/drbd_linux.go new file mode 100644 index 00000000..37cbe9cd --- /dev/null +++ b/collector/drbd_linux.go @@ -0,0 +1,211 @@ +// Copyright 2016 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "bufio" + "fmt" + "os" + "strconv" + "strings" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/log" +) + +// Numerical metric provided by /proc/drbd. +type drbdNumericalMetric struct { + desc *prometheus.Desc + valueType prometheus.ValueType + multiplier float64 +} + +func newDrbdNumericalMetric(name string, desc string, valueType prometheus.ValueType, multiplier float64) drbdNumericalMetric { + return drbdNumericalMetric{ + desc: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, "drbd", name), + desc, + []string{"device"}, nil), + valueType: valueType, + multiplier: multiplier, + } +} + +// String pair metric provided by /proc/drbd. +type drbdStringPairMetric struct { + desc *prometheus.Desc + valueOkay string +} + +func (metric *drbdStringPairMetric) isOkay(value string) float64 { + if value == metric.valueOkay { + return 1 + } + return 0 +} + +func newDrbdStringPairMetric(name string, desc string, valueOkay string) drbdStringPairMetric { + return drbdStringPairMetric{ + desc: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, "drbd", name), + desc, + []string{"device", "node"}, nil), + valueOkay: valueOkay, + } +} + +var ( + drbdNumericalMetrics = map[string]drbdNumericalMetric{ + "ns": newDrbdNumericalMetric( + "network_sent_bytes", + "Volume of net data sent to the partner via the network connection.", + prometheus.CounterValue, + 1024), + "nr": newDrbdNumericalMetric( + "network_received_bytes", + "Volume of net data received by the partner via the network connection.", + prometheus.CounterValue, + 1), + "dw": newDrbdNumericalMetric( + "disk_written_bytes", + "Net data written on local hard disk.", + prometheus.CounterValue, + 1024), + "dr": newDrbdNumericalMetric( + "disk_read_bytes", + "Net data read from local hard disk.", + prometheus.CounterValue, + 1024), + "al": newDrbdNumericalMetric( + "activitylog_writes", + "Number of updates of the activity log area of the meta data.", + prometheus.CounterValue, + 1), + "bm": newDrbdNumericalMetric( + "bitmap_writes", + "Number of updates of the bitmap area of the meta data.", + prometheus.CounterValue, + 1), + "lo": newDrbdNumericalMetric( + "local_pending", + "Number of open requests to the local I/O sub-system.", + prometheus.GaugeValue, + 1), + "pe": newDrbdNumericalMetric( + "remote_pending", + "Number of requests sent to the partner, but that have not yet been answered by the latter.", + prometheus.GaugeValue, + 1), + "ua": newDrbdNumericalMetric( + "remote_unacknowledged", + "Number of requests received by the partner via the network connection, but that have not yet been answered.", + prometheus.GaugeValue, + 1), + "ap": newDrbdNumericalMetric( + "application_pending", + "Number of block I/O requests forwarded to DRBD, but not yet answered by DRBD.", + prometheus.GaugeValue, + 1), + "ep": newDrbdNumericalMetric( + "epochs", + "Number of Epochs currently on the fly.", + prometheus.GaugeValue, + 1), + "oos": newDrbdNumericalMetric( + "out_of_sync_bytes", + "Amount of data known to be out of sync.", + prometheus.GaugeValue, + 1024), + } + drbdStringPairMetrics = map[string]drbdStringPairMetric{ + "ro": newDrbdStringPairMetric( + "node_role_is_primary", + "Whether the role of the node is in the primary state.", + "Primary"), + "ds": newDrbdStringPairMetric( + "disk_state_is_up_to_date", + "Whether the disk of the node is up to date.", + "UpToDate"), + } + + drbdConnected = prometheus.NewDesc( + prometheus.BuildFQName(Namespace, "drbd", "connected"), + "Whether DRBD is connected to the partner.", + []string{"device"}, nil) +) + +type drbdCollector struct{} + +func init() { + Factories["drbd"] = NewDrbdCollector +} + +func NewDrbdCollector() (Collector, error) { + return &drbdCollector{}, nil +} + +func (c *drbdCollector) Update(ch chan<- prometheus.Metric) (err error) { + statsFile := procFilePath("drbd") + f, err := os.Open(statsFile) + if err != nil { + if os.IsNotExist(err) { + log.Debugf("Not collecting DRBD statistics, as %s does not exist: %s", statsFile) + return nil + } + return err + } + defer f.Close() + + scanner := bufio.NewScanner(f) + scanner.Split(bufio.ScanWords) + device := "unknown" + for scanner.Scan() { + field := scanner.Text() + if kv := strings.Split(field, ":"); len(kv) == 2 { + if id, err := strconv.ParseUint(kv[0], 10, 64); err == nil && kv[1] == "" { + device = fmt.Sprintf("drbd%d", id) + } else if metric, ok := drbdNumericalMetrics[kv[0]]; ok { + // Numerical value. + value, _ := strconv.ParseFloat(kv[1], 64) + ch <- prometheus.MustNewConstMetric( + metric.desc, metric.valueType, + value*metric.multiplier, device) + } else if metric, ok := drbdStringPairMetrics[kv[0]]; ok { + // String pair value. + values := strings.Split(kv[1], "/") + ch <- prometheus.MustNewConstMetric( + metric.desc, prometheus.GaugeValue, + metric.isOkay(values[0]), device, "local") + ch <- prometheus.MustNewConstMetric( + metric.desc, prometheus.GaugeValue, + metric.isOkay(values[1]), device, "remote") + } else if kv[0] == "cs" { + // Connection state. + var connected float64 = 0 + if kv[1] == "Connected" { + connected = 1 + } + ch <- prometheus.MustNewConstMetric( + drbdConnected, prometheus.GaugeValue, + connected, device) + + } else { + log.Infof("Don't know how to process key-value pair [%s: %s]", kv[0], kv[1]) + } + } else { + log.Infof("Don't know how to process string %s", field) + } + } + return nil +} diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index baf8f972..b7bf9641 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -375,6 +375,53 @@ node_disk_writes_merged{device="nvme0n1"} 43950 node_disk_writes_merged{device="sda"} 1.1134226e+07 node_disk_writes_merged{device="sr0"} 0 node_disk_writes_merged{device="vda"} 2.0711856e+07 +# HELP node_drbd_activitylog_writes Number of updates of the activity log area of the meta data. +# TYPE node_drbd_activitylog_writes counter +node_drbd_activitylog_writes{device="drbd1"} 1100 +# HELP node_drbd_application_pending Number of block I/O requests forwarded to DRBD, but not yet answered by DRBD. +# TYPE node_drbd_application_pending gauge +node_drbd_application_pending{device="drbd1"} 12348 +# HELP node_drbd_bitmap_writes Number of updates of the bitmap area of the meta data. +# TYPE node_drbd_bitmap_writes counter +node_drbd_bitmap_writes{device="drbd1"} 221 +# HELP node_drbd_connected Whether DRBD is connected to the partner. +# TYPE node_drbd_connected gauge +node_drbd_connected{device="drbd1"} 1 +# HELP node_drbd_disk_read_bytes Net data read from local hard disk. +# TYPE node_drbd_disk_read_bytes counter +node_drbd_disk_read_bytes{device="drbd1"} 1.2154539008e+11 +# HELP node_drbd_disk_state_is_up_to_date Whether the disk of the node is up to date. +# TYPE node_drbd_disk_state_is_up_to_date gauge +node_drbd_disk_state_is_up_to_date{device="drbd1",node="local"} 1 +node_drbd_disk_state_is_up_to_date{device="drbd1",node="remote"} 1 +# HELP node_drbd_disk_written_bytes Net data written on local hard disk. +# TYPE node_drbd_disk_written_bytes counter +node_drbd_disk_written_bytes{device="drbd1"} 2.8941845504e+10 +# HELP node_drbd_epochs Number of Epochs currently on the fly. +# TYPE node_drbd_epochs gauge +node_drbd_epochs{device="drbd1"} 1 +# HELP node_drbd_local_pending Number of open requests to the local I/O sub-system. +# TYPE node_drbd_local_pending gauge +node_drbd_local_pending{device="drbd1"} 12345 +# HELP node_drbd_network_received_bytes Volume of net data received by the partner via the network connection. +# TYPE node_drbd_network_received_bytes counter +node_drbd_network_received_bytes{device="drbd1"} 1.0961011e+07 +# HELP node_drbd_network_sent_bytes Volume of net data sent to the partner via the network connection. +# TYPE node_drbd_network_sent_bytes counter +node_drbd_network_sent_bytes{device="drbd1"} 1.7740228608e+10 +# HELP node_drbd_node_role_is_primary Whether the role of the node is in the primary state. +# TYPE node_drbd_node_role_is_primary gauge +node_drbd_node_role_is_primary{device="drbd1",node="local"} 1 +node_drbd_node_role_is_primary{device="drbd1",node="remote"} 1 +# HELP node_drbd_out_of_sync_bytes Amount of data known to be out of sync. +# TYPE node_drbd_out_of_sync_bytes gauge +node_drbd_out_of_sync_bytes{device="drbd1"} 1.2645376e+07 +# HELP node_drbd_remote_pending Number of requests sent to the partner, but that have not yet been answered by the latter. +# TYPE node_drbd_remote_pending gauge +node_drbd_remote_pending{device="drbd1"} 12346 +# HELP node_drbd_remote_unacknowledged Number of requests received by the partner via the network connection, but that have not yet been answered. +# TYPE node_drbd_remote_unacknowledged gauge +node_drbd_remote_unacknowledged{device="drbd1"} 12347 # HELP node_entropy_available_bits Bits of available entropy. # TYPE node_entropy_available_bits gauge node_entropy_available_bits 1337 diff --git a/collector/fixtures/proc/drbd b/collector/fixtures/proc/drbd new file mode 100644 index 00000000..77d16440 --- /dev/null +++ b/collector/fixtures/proc/drbd @@ -0,0 +1,5 @@ +version: 8.4.3 (api:1/proto:86-101) +srcversion: 1A9F77B1CA5FF92235C2213 + + 1: cs:Connected ro:Primary/Primary ds:UpToDate/UpToDate C r----- + ns:17324442 nr:10961011 dw:28263521 dr:118696670 al:1100 bm:221 lo:12345 pe:12346 ua:12347 ap:12348 ep:1 wo:d oos:12349 diff --git a/end-to-end-test.sh b/end-to-end-test.sh index ce6351e9..ecdab863 100755 --- a/end-to-end-test.sh +++ b/end-to-end-test.sh @@ -5,6 +5,7 @@ set -euf -o pipefail collectors=$(cat << COLLECTORS conntrack diskstats + drbd entropy filefd hwmon