From 4329b0a86ba3e9f22555c322947fe9291132f5d4 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Thu, 4 Jan 2018 02:49:36 -0800 Subject: [PATCH] Add summary metrics for systemd exporter (#765) --- collector/systemd_linux.go | 40 ++++++++++++++++++++++++++++----- collector/systemd_linux_test.go | 21 +++++++++++++++++ 2 files changed, 56 insertions(+), 5 deletions(-) diff --git a/collector/systemd_linux.go b/collector/systemd_linux.go index 7817421f..cbb951d1 100644 --- a/collector/systemd_linux.go +++ b/collector/systemd_linux.go @@ -34,6 +34,7 @@ var ( type systemdCollector struct { unitDesc *prometheus.Desc systemRunningDesc *prometheus.Desc + summaryDesc *prometheus.Desc unitWhitelistPattern *regexp.Regexp unitBlacklistPattern *regexp.Regexp } @@ -57,22 +58,31 @@ func NewSystemdCollector() (Collector, error) { "Whether the system is operational (see 'systemctl is-system-running')", nil, nil, ) + summaryDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, subsystem, "units"), + "Summary of systemd unit states", []string{"state"}, nil) unitWhitelistPattern := regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *unitWhitelist)) unitBlacklistPattern := regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *unitBlacklist)) return &systemdCollector{ unitDesc: unitDesc, systemRunningDesc: systemRunningDesc, + summaryDesc: summaryDesc, unitWhitelistPattern: unitWhitelistPattern, unitBlacklistPattern: unitBlacklistPattern, }, nil } func (c *systemdCollector) Update(ch chan<- prometheus.Metric) error { - units, err := c.listUnits() + allUnits, err := c.getAllUnits() if err != nil { - return fmt.Errorf("couldn't get units states: %s", err) + return fmt.Errorf("couldn't get units: %s", err) } + + summary := summarizeUnits(allUnits) + c.collectSummaryMetrics(ch, summary) + + units := filterUnits(allUnits, c.unitWhitelistPattern, c.unitBlacklistPattern) c.collectUnitStatusMetrics(ch, units) systemState, err := c.getSystemState() @@ -98,6 +108,13 @@ func (c *systemdCollector) collectUnitStatusMetrics(ch chan<- prometheus.Metric, } } +func (c *systemdCollector) collectSummaryMetrics(ch chan<- prometheus.Metric, summary map[string]float64) { + for stateName, count := range summary { + ch <- prometheus.MustNewConstMetric( + c.summaryDesc, prometheus.GaugeValue, count, stateName) + } +} + func (c *systemdCollector) collectSystemState(ch chan<- prometheus.Metric, systemState string) { isSystemRunning := 0.0 if systemState == `"running"` { @@ -113,7 +130,7 @@ func (c *systemdCollector) newDbus() (*dbus.Conn, error) { return dbus.New() } -func (c *systemdCollector) listUnits() ([]dbus.UnitStatus, error) { +func (c *systemdCollector) getAllUnits() ([]dbus.UnitStatus, error) { conn, err := c.newDbus() if err != nil { return nil, fmt.Errorf("couldn't get dbus connection: %s", err) @@ -125,8 +142,21 @@ func (c *systemdCollector) listUnits() ([]dbus.UnitStatus, error) { return []dbus.UnitStatus{}, err } - units := filterUnits(allUnits, c.unitWhitelistPattern, c.unitBlacklistPattern) - return units, nil + return allUnits, nil +} + +func summarizeUnits(units []dbus.UnitStatus) map[string]float64 { + summarized := make(map[string]float64) + + for _, unitStateName := range unitStatesName { + summarized[unitStateName] = 0.0 + } + + for _, unit := range units { + summarized[unit.ActiveState] += 1.0 + } + + return summarized } func filterUnits(units []dbus.UnitStatus, whitelistPattern, blacklistPattern *regexp.Regexp) []dbus.UnitStatus { diff --git a/collector/systemd_linux_test.go b/collector/systemd_linux_test.go index 2d9bdf66..0555d465 100644 --- a/collector/systemd_linux_test.go +++ b/collector/systemd_linux_test.go @@ -121,3 +121,24 @@ func TestSystemdIgnoreFilterDefaultKeepsAll(t *testing.T) { t.Error("Default filters removed units") } } + +func TestSystemdSummary(t *testing.T) { + fixtures := getUnitListFixtures() + summary := summarizeUnits(fixtures[0]) + + for _, state := range unitStatesName { + if state == "inactive" { + testSummaryHelper(t, state, summary[state], 3.0) + } else if state == "active" { + testSummaryHelper(t, state, summary[state], 1.0) + } else { + testSummaryHelper(t, state, summary[state], 0.0) + } + } +} + +func testSummaryHelper(t *testing.T, state string, actual float64, expected float64) { + if actual != expected { + t.Errorf("Summary mode didn't count %s jobs correctly. Actual: %f, expected: %f", state, actual, expected) + } +}