diff --git a/text_collector_examples/btrfs_stats.py b/text_collector_examples/btrfs_stats.py new file mode 100755 index 00000000..b26bfd23 --- /dev/null +++ b/text_collector_examples/btrfs_stats.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python + +# Collect per-device btrfs filesystem errors. +# Designed to work on Debian and Centos 6 (with python2.6). + +import collections +import glob +import os +import re +import subprocess + +def get_btrfs_mount_points(): + """List all btrfs mount points. + + Yields: + (string) filesystem mount points. + """ + with open("/proc/mounts") as f: + for line in f: + parts = line.split() + if parts[2] == "btrfs": + yield parts[1] + +def get_btrfs_errors(mountpoint): + """Get per-device errors for a btrfs mount point. + + Args: + mountpoint: (string) path to a mount point. + + Yields: + (device, error_type, error_count) tuples, where: + device: (string) path to block device. + error_type: (string) type of btrfs error. + error_count: (int) number of btrfs errors of a given type. + """ + p = subprocess.Popen(["btrfs", "device", "stats", mountpoint], + stdout=subprocess.PIPE) + (stdout, stderr) = p.communicate() + if p.returncode != 0: + raise RuntimeError("btrfs returned exit code %d" % p.returncode) + for line in stdout.splitlines(): + if line == '': + continue + # Sample line: + # [/dev/vdb1].flush_io_errs 0 + m = re.search(r"^\[([^\]]+)\]\.(\S+)\s+(\d+)$", line.decode("utf-8")) + if not m: + raise RuntimeError("unexpected output from btrfs: '%s'" % line) + yield m.group(1), m.group(2), int(m.group(3)) + +def btrfs_error_metrics(): + """Collect btrfs error metrics. + + Returns: + a list of strings to be exposed as Prometheus metrics. + """ + metric = "node_btrfs_errors_total" + contents = [ + "# TYPE %s counter" % metric, + "# HELP %s number of btrfs errors" % metric, + ] + errors_by_device = collections.defaultdict(dict) + for mountpoint in get_btrfs_mount_points(): + for device, error_type, error_count in get_btrfs_errors(mountpoint): + contents.append( + '%s{mountpoint="%s",device="%s",type="%s"} %d' % + (metric, mountpoint, device, error_type, error_count)) + + if len(contents) > 2: + # return metrics if there are actual btrfs filesystems found + # (i.e. `contents` contains more than just TYPE and HELP). + return contents + +def btrfs_allocation_metrics(): + """Collect btrfs allocation metrics. + + Returns: + a list of strings to be exposed as Prometheus metrics. + """ + prefix = 'node_btrfs_allocation' + metric_to_filename = { + 'size_bytes': 'total_bytes', + 'used_bytes': 'bytes_used', + 'reserved_bytes': 'bytes_reserved', + 'pinned_bytes': 'bytes_pinned', + 'disk_size_bytes': 'disk_total', + 'disk_used_bytes': 'disk_used', + } + contents = [] + for m, f in metric_to_filename.items(): + contents += [ + "# TYPE %s_%s gauge" % (prefix, m), + "# HELP %s_%s btrfs allocation data (%s)" % (prefix, m, f), + ] + + for alloc in glob.glob("/sys/fs/btrfs/*/allocation"): + fs = alloc.split('/')[4] + for type_ in ('data', 'metadata', 'system'): + for m, f in metric_to_filename.items(): + filename = os.path.join(alloc, type_, f) + with open(filename) as f: + value = int(f.read().strip()) + contents.append('%s_%s{fs="%s",type="%s"} %d' % ( + prefix, m, fs, type_, value)) + if len(contents) > 2*len(metric_to_filename): + return contents + +if __name__ == "__main__": + contents = ((btrfs_error_metrics() or []) + + (btrfs_allocation_metrics() or [])) + + print("\n".join(contents))