Add a sample btrfs stats collector script (#1200)

Signed-off-by: Anton Tolchanov <commits@knyar.net>
This commit is contained in:
Anton Tolchanov 2018-12-21 13:10:03 +00:00 committed by Ben Kochie
parent 97dab59e18
commit cf8b29d1fb

View file

@ -0,0 +1,112 @@
#!/usr/bin/env python
# Collect per-device btrfs filesystem errors.
# Designed to work on Debian and Centos 6 (with python2.6).
import collections
import glob
import os
import re
import subprocess
def get_btrfs_mount_points():
"""List all btrfs mount points.
Yields:
(string) filesystem mount points.
"""
with open("/proc/mounts") as f:
for line in f:
parts = line.split()
if parts[2] == "btrfs":
yield parts[1]
def get_btrfs_errors(mountpoint):
"""Get per-device errors for a btrfs mount point.
Args:
mountpoint: (string) path to a mount point.
Yields:
(device, error_type, error_count) tuples, where:
device: (string) path to block device.
error_type: (string) type of btrfs error.
error_count: (int) number of btrfs errors of a given type.
"""
p = subprocess.Popen(["btrfs", "device", "stats", mountpoint],
stdout=subprocess.PIPE)
(stdout, stderr) = p.communicate()
if p.returncode != 0:
raise RuntimeError("btrfs returned exit code %d" % p.returncode)
for line in stdout.splitlines():
if line == '':
continue
# Sample line:
# [/dev/vdb1].flush_io_errs 0
m = re.search(r"^\[([^\]]+)\]\.(\S+)\s+(\d+)$", line.decode("utf-8"))
if not m:
raise RuntimeError("unexpected output from btrfs: '%s'" % line)
yield m.group(1), m.group(2), int(m.group(3))
def btrfs_error_metrics():
"""Collect btrfs error metrics.
Returns:
a list of strings to be exposed as Prometheus metrics.
"""
metric = "node_btrfs_errors_total"
contents = [
"# TYPE %s counter" % metric,
"# HELP %s number of btrfs errors" % metric,
]
errors_by_device = collections.defaultdict(dict)
for mountpoint in get_btrfs_mount_points():
for device, error_type, error_count in get_btrfs_errors(mountpoint):
contents.append(
'%s{mountpoint="%s",device="%s",type="%s"} %d' %
(metric, mountpoint, device, error_type, error_count))
if len(contents) > 2:
# return metrics if there are actual btrfs filesystems found
# (i.e. `contents` contains more than just TYPE and HELP).
return contents
def btrfs_allocation_metrics():
"""Collect btrfs allocation metrics.
Returns:
a list of strings to be exposed as Prometheus metrics.
"""
prefix = 'node_btrfs_allocation'
metric_to_filename = {
'size_bytes': 'total_bytes',
'used_bytes': 'bytes_used',
'reserved_bytes': 'bytes_reserved',
'pinned_bytes': 'bytes_pinned',
'disk_size_bytes': 'disk_total',
'disk_used_bytes': 'disk_used',
}
contents = []
for m, f in metric_to_filename.items():
contents += [
"# TYPE %s_%s gauge" % (prefix, m),
"# HELP %s_%s btrfs allocation data (%s)" % (prefix, m, f),
]
for alloc in glob.glob("/sys/fs/btrfs/*/allocation"):
fs = alloc.split('/')[4]
for type_ in ('data', 'metadata', 'system'):
for m, f in metric_to_filename.items():
filename = os.path.join(alloc, type_, f)
with open(filename) as f:
value = int(f.read().strip())
contents.append('%s_%s{fs="%s",type="%s"} %d' % (
prefix, m, fs, type_, value))
if len(contents) > 2*len(metric_to_filename):
return contents
if __name__ == "__main__":
contents = ((btrfs_error_metrics() or []) +
(btrfs_allocation_metrics() or []))
print("\n".join(contents))