node_exporter/text_collector_examples/ntpd_metrics.py

114 lines
3 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
#
# Description: Extract NTPd metrics from ntpq -np.
# Author: Ben Kochie <superq@gmail.com>
import re
import subprocess
import sys
# NTP peers status, with no DNS lookups.
ntpq_cmd = ['ntpq', '-np']
# Regex to match all of the fields in the output of ntpq -np
metrics_fields = [
'^(?P<status>.)(?P<remote>[\w\.]+)',
'(?P<refid>[\w\.]+)',
'(?P<stratum>\d+)',
'(?P<type>\w)',
'(?P<when>\d+)',
'(?P<poll>\d+)',
'(?P<reach>\d+)',
'(?P<delay>\d+\.\d+)',
'(?P<offset>-?\d+\.\d+)',
'(?P<jitter>\d+\.\d+)',
]
metrics_re = '\s+'.join(metrics_fields)
# Remote types
# http://support.ntp.org/bin/view/Support/TroubleshootingNTP
remote_types = {
'l': 'local',
'u': 'unicast',
'm': 'multicast',
'b': 'broadcast',
'-': 'netaddr',
}
# Status codes:
# http://www.eecis.udel.edu/~mills/ntp/html/decode.html#peer
status_types = {
' ': 0,
'x': 1,
'.': 2,
'-': 3,
'+': 4,
'#': 5,
'*': 6,
'o': 7,
}
# Run the ntpq command.
def get_ntpq():
try:
ntpq = subprocess.check_output(ntpq_cmd, stderr=subprocess.DEVNULL)
except subprocess.CalledProcessError as e:
return None
return ntpq.decode()
# Print metrics in Prometheus format.
def print_prometheus(metric, values):
print("# HELP ntpd_%s NTPd metric for %s" % (metric, metric))
print("# TYPE ntpd_%s gauge" % (metric))
for labels in values:
print("ntpd_%s{%s} %f" % (metric, labels, values[labels]))
# Parse raw ntpq lines.
def parse_line(line):
if re.match('\s+remote\s+refid', line):
return None
if re.match('=+', line):
return None
if re.match('.+\.(LOCL|POOL)\.', line):
return None
if re.match('^$', line):
return None
return re.match(metrics_re, line)
# Main function
def main(argv):
ntpq = get_ntpq()
peer_status_metrics = {}
delay_metrics = {}
offset_metrics = {}
jitter_metrics = {}
for line in ntpq.split('\n'):
metric_match = parse_line(line)
if metric_match is None:
continue
remote = metric_match.group('remote')
refid = metric_match.group('refid')
stratum = metric_match.group('stratum')
remote_type = remote_types[metric_match.group('type')]
common_labels = "remote=\"%s\",reference=\"%s\"" % (remote, refid)
peer_labels = "%s,stratum=\"%s\",type=\"%s\"" % (common_labels, stratum, remote_type)
peer_status_metrics[peer_labels] = float(status_types[metric_match.group('status')])
delay_metrics[common_labels] = float(metric_match.group('delay'))
offset_metrics[common_labels] = float(metric_match.group('offset'))
jitter_metrics[common_labels] = float(metric_match.group('jitter'))
print_prometheus('peer_status', peer_status_metrics)
print_prometheus('delay_milliseconds', delay_metrics)
print_prometheus('offset_milliseconds', offset_metrics)
print_prometheus('jitter_milliseconds', jitter_metrics)
# Go go go!
if __name__ == "__main__":
main(sys.argv[1:])