Merge pull request #365 from EdSchouten/drbd

A collector for DRBD
This commit is contained in:
Johannes 'fish' Ziemke 2016-12-25 11:04:43 +01:00 committed by GitHub
commit 71ea37987f
6 changed files with 268 additions and 0 deletions

View file

@ -16,6 +16,7 @@ The following individuals have contributed code to this repository
* Björn Rabenstein <beorn@soundcloud.com>
* Brian Brazil <brian.brazil@boxever.com>
* Daniel Speichert <daniel@speichert.pro>
* Ed Schouten <ed@kumina.nl>
* Eric Ripa
* Fabian Reinartz <fabian@soundcloud.com>
* Franklin Wise <franklin@krave.io>

View file

@ -42,6 +42,7 @@ Name | Description | OS
---------|-------------|----
bonding | Exposes the number of configured and active slaves of Linux bonding interfaces. | Linux
devstat | Exposes device statistics | Dragonfly, FreeBSD
drbd | Exposes Distributed Replicated Block Device statistics | Linux
gmond | Exposes statistics from Ganglia. | _any_
interrupts | Exposes detailed interrupts statistics. | Linux, OpenBSD
ipvs | Exposes IPVS status from `/proc/net/ip_vs` and stats from `/proc/net/ip_vs_stats`. | Linux

213
collector/drbd_linux.go Normal file
View file

@ -0,0 +1,213 @@
// Copyright 2016 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"bufio"
"fmt"
"os"
"strconv"
"strings"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
)
// Numerical metric provided by /proc/drbd.
type drbdNumericalMetric struct {
desc *prometheus.Desc
valueType prometheus.ValueType
multiplier float64
}
func newDRBDNumericalMetric(name string, desc string, valueType prometheus.ValueType, multiplier float64) drbdNumericalMetric {
return drbdNumericalMetric{
desc: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, "drbd", name),
desc,
[]string{"device"}, nil),
valueType: valueType,
multiplier: multiplier,
}
}
// String pair metric provided by /proc/drbd.
type drbdStringPairMetric struct {
desc *prometheus.Desc
valueOkay string
}
func (metric *drbdStringPairMetric) isOkay(value string) float64 {
if value == metric.valueOkay {
return 1
}
return 0
}
func newDRBDStringPairMetric(name string, desc string, valueOkay string) drbdStringPairMetric {
return drbdStringPairMetric{
desc: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, "drbd", name),
desc,
[]string{"device", "node"}, nil),
valueOkay: valueOkay,
}
}
var (
drbdNumericalMetrics = map[string]drbdNumericalMetric{
"ns": newDRBDNumericalMetric(
"network_sent_bytes_total",
"Total number of bytes sent via the network.",
prometheus.CounterValue,
1024),
"nr": newDRBDNumericalMetric(
"network_received_bytes_total",
"Total number of bytes received via the network.",
prometheus.CounterValue,
1),
"dw": newDRBDNumericalMetric(
"disk_written_bytes_total",
"Net data written on local hard disk; in bytes.",
prometheus.CounterValue,
1024),
"dr": newDRBDNumericalMetric(
"disk_read_bytes_total",
"Net data read from local hard disk; in bytes.",
prometheus.CounterValue,
1024),
"al": newDRBDNumericalMetric(
"activitylog_writes_total",
"Number of updates of the activity log area of the meta data.",
prometheus.CounterValue,
1),
"bm": newDRBDNumericalMetric(
"bitmap_writes_total",
"Number of updates of the bitmap area of the meta data.",
prometheus.CounterValue,
1),
"lo": newDRBDNumericalMetric(
"local_pending",
"Number of open requests to the local I/O sub-system.",
prometheus.GaugeValue,
1),
"pe": newDRBDNumericalMetric(
"remote_pending",
"Number of requests sent to the peer, but that have not yet been answered by the latter.",
prometheus.GaugeValue,
1),
"ua": newDRBDNumericalMetric(
"remote_unacknowledged",
"Number of requests received by the peer via the network connection, but that have not yet been answered.",
prometheus.GaugeValue,
1),
"ap": newDRBDNumericalMetric(
"application_pending",
"Number of block I/O requests forwarded to DRBD, but not yet answered by DRBD.",
prometheus.GaugeValue,
1),
"ep": newDRBDNumericalMetric(
"epochs",
"Number of Epochs currently on the fly.",
prometheus.GaugeValue,
1),
"oos": newDRBDNumericalMetric(
"out_of_sync_bytes",
"Amount of data known to be out of sync; in bytes.",
prometheus.GaugeValue,
1024),
}
drbdStringPairMetrics = map[string]drbdStringPairMetric{
"ro": newDRBDStringPairMetric(
"node_role_is_primary",
"Whether the role of the node is in the primary state.",
"Primary"),
"ds": newDRBDStringPairMetric(
"disk_state_is_up_to_date",
"Whether the disk of the node is up to date.",
"UpToDate"),
}
drbdConnected = prometheus.NewDesc(
prometheus.BuildFQName(Namespace, "drbd", "connected"),
"Whether DRBD is connected to the peer.",
[]string{"device"}, nil)
)
type drbdCollector struct{}
func init() {
Factories["drbd"] = newDRBDCollector
}
func newDRBDCollector() (Collector, error) {
return &drbdCollector{}, nil
}
func (c *drbdCollector) Update(ch chan<- prometheus.Metric) (err error) {
statsFile := procFilePath("drbd")
file, err := os.Open(statsFile)
if err != nil {
if os.IsNotExist(err) {
log.Debugf("Not collecting DRBD statistics, as %s does not exist: %s", statsFile, err)
return nil
}
return err
}
defer file.Close()
scanner := bufio.NewScanner(file)
scanner.Split(bufio.ScanWords)
device := "unknown"
for scanner.Scan() {
field := scanner.Text()
if kv := strings.Split(field, ":"); len(kv) == 2 {
if id, err := strconv.ParseUint(kv[0], 10, 64); err == nil && kv[1] == "" {
device = fmt.Sprintf("drbd%d", id)
} else if metric, ok := drbdNumericalMetrics[kv[0]]; ok {
// Numerical value.
value, err := strconv.ParseFloat(kv[1], 64)
if err != nil {
return err
}
ch <- prometheus.MustNewConstMetric(
metric.desc, metric.valueType,
value*metric.multiplier, device)
} else if metric, ok := drbdStringPairMetrics[kv[0]]; ok {
// String pair value.
values := strings.Split(kv[1], "/")
ch <- prometheus.MustNewConstMetric(
metric.desc, prometheus.GaugeValue,
metric.isOkay(values[0]), device, "local")
ch <- prometheus.MustNewConstMetric(
metric.desc, prometheus.GaugeValue,
metric.isOkay(values[1]), device, "remote")
} else if kv[0] == "cs" {
// Connection state.
var connected float64
if kv[1] == "Connected" {
connected = 1
}
ch <- prometheus.MustNewConstMetric(
drbdConnected, prometheus.GaugeValue,
connected, device)
} else {
log.Debugf("Don't know how to process key-value pair [%s: %q]", kv[0], kv[1])
}
} else {
log.Debugf("Don't know how to process string %q", field)
}
}
return scanner.Err()
}

View file

@ -375,6 +375,53 @@ node_disk_writes_merged{device="nvme0n1"} 43950
node_disk_writes_merged{device="sda"} 1.1134226e+07
node_disk_writes_merged{device="sr0"} 0
node_disk_writes_merged{device="vda"} 2.0711856e+07
# HELP node_drbd_activitylog_writes_total Number of updates of the activity log area of the meta data.
# TYPE node_drbd_activitylog_writes_total counter
node_drbd_activitylog_writes_total{device="drbd1"} 1100
# HELP node_drbd_application_pending Number of block I/O requests forwarded to DRBD, but not yet answered by DRBD.
# TYPE node_drbd_application_pending gauge
node_drbd_application_pending{device="drbd1"} 12348
# HELP node_drbd_bitmap_writes_total Number of updates of the bitmap area of the meta data.
# TYPE node_drbd_bitmap_writes_total counter
node_drbd_bitmap_writes_total{device="drbd1"} 221
# HELP node_drbd_connected Whether DRBD is connected to the peer.
# TYPE node_drbd_connected gauge
node_drbd_connected{device="drbd1"} 1
# HELP node_drbd_disk_read_bytes_total Net data read from local hard disk; in bytes.
# TYPE node_drbd_disk_read_bytes_total counter
node_drbd_disk_read_bytes_total{device="drbd1"} 1.2154539008e+11
# HELP node_drbd_disk_state_is_up_to_date Whether the disk of the node is up to date.
# TYPE node_drbd_disk_state_is_up_to_date gauge
node_drbd_disk_state_is_up_to_date{device="drbd1",node="local"} 1
node_drbd_disk_state_is_up_to_date{device="drbd1",node="remote"} 1
# HELP node_drbd_disk_written_bytes_total Net data written on local hard disk; in bytes.
# TYPE node_drbd_disk_written_bytes_total counter
node_drbd_disk_written_bytes_total{device="drbd1"} 2.8941845504e+10
# HELP node_drbd_epochs Number of Epochs currently on the fly.
# TYPE node_drbd_epochs gauge
node_drbd_epochs{device="drbd1"} 1
# HELP node_drbd_local_pending Number of open requests to the local I/O sub-system.
# TYPE node_drbd_local_pending gauge
node_drbd_local_pending{device="drbd1"} 12345
# HELP node_drbd_network_received_bytes_total Total number of bytes received via the network.
# TYPE node_drbd_network_received_bytes_total counter
node_drbd_network_received_bytes_total{device="drbd1"} 1.0961011e+07
# HELP node_drbd_network_sent_bytes_total Total number of bytes sent via the network.
# TYPE node_drbd_network_sent_bytes_total counter
node_drbd_network_sent_bytes_total{device="drbd1"} 1.7740228608e+10
# HELP node_drbd_node_role_is_primary Whether the role of the node is in the primary state.
# TYPE node_drbd_node_role_is_primary gauge
node_drbd_node_role_is_primary{device="drbd1",node="local"} 1
node_drbd_node_role_is_primary{device="drbd1",node="remote"} 1
# HELP node_drbd_out_of_sync_bytes Amount of data known to be out of sync; in bytes.
# TYPE node_drbd_out_of_sync_bytes gauge
node_drbd_out_of_sync_bytes{device="drbd1"} 1.2645376e+07
# HELP node_drbd_remote_pending Number of requests sent to the peer, but that have not yet been answered by the latter.
# TYPE node_drbd_remote_pending gauge
node_drbd_remote_pending{device="drbd1"} 12346
# HELP node_drbd_remote_unacknowledged Number of requests received by the peer via the network connection, but that have not yet been answered.
# TYPE node_drbd_remote_unacknowledged gauge
node_drbd_remote_unacknowledged{device="drbd1"} 12347
# HELP node_entropy_available_bits Bits of available entropy.
# TYPE node_entropy_available_bits gauge
node_entropy_available_bits 1337

View file

@ -0,0 +1,5 @@
version: 8.4.3 (api:1/proto:86-101)
srcversion: 1A9F77B1CA5FF92235C2213
1: cs:Connected ro:Primary/Primary ds:UpToDate/UpToDate C r-----
ns:17324442 nr:10961011 dw:28263521 dr:118696670 al:1100 bm:221 lo:12345 pe:12346 ua:12347 ap:12348 ep:1 wo:d oos:12349

View file

@ -5,6 +5,7 @@ set -euf -o pipefail
collectors=$(cat << COLLECTORS
conntrack
diskstats
drbd
entropy
filefd
hwmon