From b14168cf6ad2fd40bbe53b29eebea149dae31105 Mon Sep 17 00:00:00 2001 From: Daniel Hodges Date: Fri, 17 Apr 2020 06:02:08 -0400 Subject: [PATCH] Add perf tracepoint collection flag (#1664) * Add tracepoint collector option for perf collector Signed-off-by: Daniel Hodges --- README.md | 7 ++ collector/perf_linux.go | 185 ++++++++++++++++++++++++++++++++--- collector/perf_linux_test.go | 70 ++++++++++++- 3 files changed, 245 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index bed3a908..2860c850 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,13 @@ configuration is zero indexed and can also take a stride value; e.g. `--collector.perf --collector.perf.cpus=1-10:5` would collect on CPUs 1, 5, and 10. +The perf collector is also able to collect +[tracepoint](https://www.kernel.org/doc/html/latest/core-api/tracepoint.html) +counts when using the `--collector.perf.tracepoint` flag. Tracepoints can be +found using [`perf list`](http://man7.org/linux/man-pages/man1/perf.1.html) or +from debugfs. And example usage of this would be +`--collector.perf.tracepoint="sched:sched_process_exec"`. + Name | Description | OS ---------|-------------|---- diff --git a/collector/perf_linux.go b/collector/perf_linux.go index 6d19683f..e452754c 100644 --- a/collector/perf_linux.go +++ b/collector/perf_linux.go @@ -20,8 +20,10 @@ import ( "strings" "github.com/go-kit/kit/log" + "github.com/go-kit/kit/log/level" "github.com/hodgesds/perf-utils" "github.com/prometheus/client_golang/prometheus" + "golang.org/x/sys/unix" kingpin "gopkg.in/alecthomas/kingpin.v2" ) @@ -30,27 +32,29 @@ const ( ) var ( - perfCPUsFlag = kingpin.Flag("collector.perf.cpus", "List of CPUs from which perf metrics should be collected").Default("").String() + perfCPUsFlag = kingpin.Flag("collector.perf.cpus", "List of CPUs from which perf metrics should be collected").Default("").String() + perfTracepointFlag = kingpin.Flag("collector.perf.tracepoint", "perf tracepoint that should be collected").Strings() ) func init() { registerCollector(perfSubsystem, defaultDisabled, NewPerfCollector) } -// perfCollector is a Collector that uses the perf subsystem to collect -// metrics. It uses perf_event_open an ioctls for profiling. Due to the fact -// that the perf subsystem is highly dependent on kernel configuration and -// settings not all profiler values may be exposed on the target system at any -// given time. -type perfCollector struct { - hwProfilerCPUMap map[*perf.HardwareProfiler]int - swProfilerCPUMap map[*perf.SoftwareProfiler]int - cacheProfilerCPUMap map[*perf.CacheProfiler]int - perfHwProfilers map[int]*perf.HardwareProfiler - perfSwProfilers map[int]*perf.SoftwareProfiler - perfCacheProfilers map[int]*perf.CacheProfiler - desc map[string]*prometheus.Desc - logger log.Logger +// perfTracepointFlagToTracepoints returns the set of configured tracepoints. +func perfTracepointFlagToTracepoints(tracepointsFlag []string) ([]*perfTracepoint, error) { + tracepoints := make([]*perfTracepoint, len(tracepointsFlag)) + + for i, tracepoint := range tracepointsFlag { + split := strings.Split(tracepoint, ":") + if len(split) != 2 { + return nil, fmt.Errorf("Invalid tracepoint config %v", tracepoint) + } + tracepoints[i] = &perfTracepoint{ + subsystem: split[0], + event: split[1], + } + } + return tracepoints, nil } // perfCPUFlagToCPUs returns a set of CPUs for the perf collectors to monitor. @@ -98,6 +102,144 @@ func perfCPUFlagToCPUs(cpuFlag string) ([]int, error) { return cpus, nil } +// perfTracepoint is a struct for holding tracepoint information. +type perfTracepoint struct { + subsystem string + event string +} + +// label returns the tracepoint name in the format of subsystem_tracepoint. +func (t *perfTracepoint) label() string { + return t.subsystem + "_" + t.event +} + +// tracepoint returns the tracepoint name in the format of subsystem:tracepoint. +func (t *perfTracepoint) tracepoint() string { + return t.subsystem + ":" + t.event +} + +// perfCollector is a Collector that uses the perf subsystem to collect +// metrics. It uses perf_event_open an ioctls for profiling. Due to the fact +// that the perf subsystem is highly dependent on kernel configuration and +// settings not all profiler values may be exposed on the target system at any +// given time. +type perfCollector struct { + hwProfilerCPUMap map[*perf.HardwareProfiler]int + swProfilerCPUMap map[*perf.SoftwareProfiler]int + cacheProfilerCPUMap map[*perf.CacheProfiler]int + perfHwProfilers map[int]*perf.HardwareProfiler + perfSwProfilers map[int]*perf.SoftwareProfiler + perfCacheProfilers map[int]*perf.CacheProfiler + desc map[string]*prometheus.Desc + logger log.Logger + tracepointCollector *perfTracepointCollector +} + +type perfTracepointCollector struct { + // desc is the mapping of subsystem to tracepoint *prometheus.Desc. + descs map[string]map[string]*prometheus.Desc + // collection order is the sorted configured collection order of the profiler. + collectionOrder []string + + logger log.Logger + profilers map[int]perf.GroupProfiler +} + +// update is used collect all tracepoints across all tracepoint profilers. +func (c *perfTracepointCollector) update(ch chan<- prometheus.Metric) error { + for cpu := range c.profilers { + if err := c.updateCPU(cpu, ch); err != nil { + return err + } + } + return nil +} + +// updateCPU is used to update metrics per CPU profiler. +func (c *perfTracepointCollector) updateCPU(cpu int, ch chan<- prometheus.Metric) error { + cpuStr := fmt.Sprintf("%d", cpu) + profiler := c.profilers[cpu] + p, err := profiler.Profile() + if err != nil { + level.Error(c.logger).Log("msg", "Failed to collect tracepoint profile", "err", err) + return err + } + + for i, value := range p.Values { + // Get the Desc from the ordered group value. + descKey := c.collectionOrder[i] + descKeySlice := strings.Split(descKey, ":") + ch <- prometheus.MustNewConstMetric( + c.descs[descKeySlice[0]][descKeySlice[1]], + prometheus.CounterValue, + float64(value), + cpuStr, + ) + } + return nil +} + +// newPerfTracepointCollector returns a configured perfTracepointCollector. +func newPerfTracepointCollector( + logger log.Logger, + tracepointsFlag []string, + cpus []int, +) (*perfTracepointCollector, error) { + tracepoints, err := perfTracepointFlagToTracepoints(tracepointsFlag) + if err != nil { + return nil, err + } + + collectionOrder := make([]string, len(tracepoints)) + descs := map[string]map[string]*prometheus.Desc{} + eventAttrs := make([]unix.PerfEventAttr, len(tracepoints)) + + for i, tracepoint := range tracepoints { + eventAttr, err := perf.TracepointEventAttr(tracepoint.subsystem, tracepoint.event) + if err != nil { + return nil, err + } + eventAttrs[i] = *eventAttr + collectionOrder[i] = tracepoint.tracepoint() + if _, ok := descs[tracepoint.subsystem]; !ok { + descs[tracepoint.subsystem] = map[string]*prometheus.Desc{} + } + descs[tracepoint.subsystem][tracepoint.event] = prometheus.NewDesc( + prometheus.BuildFQName( + namespace, + perfSubsystem, + tracepoint.label(), + ), + "Perf tracepoint "+tracepoint.tracepoint(), + []string{"cpu"}, + nil, + ) + } + + profilers := make(map[int]perf.GroupProfiler, len(cpus)) + for _, cpu := range cpus { + profiler, err := perf.NewGroupProfiler(-1, cpu, 0, eventAttrs...) + if err != nil { + return nil, err + } + profilers[cpu] = profiler + } + + c := &perfTracepointCollector{ + descs: descs, + collectionOrder: collectionOrder, + profilers: profilers, + logger: logger, + } + + for _, profiler := range c.profilers { + if err := profiler.Start(); err != nil { + return nil, err + } + } + return c, nil +} + // NewPerfCollector returns a new perf based collector, it creates a profiler // per CPU. func NewPerfCollector(logger log.Logger) (Collector, error) { @@ -127,6 +269,16 @@ func NewPerfCollector(logger log.Logger) (Collector, error) { } } + // First configure any tracepoints. + if *perfTracepointFlag != nil && len(*perfTracepointFlag) > 0 { + tracepointCollector, err := newPerfTracepointCollector(logger, *perfTracepointFlag, cpus) + if err != nil { + return nil, err + } + collector.tracepointCollector = tracepointCollector + } + + // Configure all profilers for the specified CPUs. for _, cpu := range cpus { // Use -1 to profile all processes on the CPU, see: // man perf_event_open @@ -411,6 +563,9 @@ func (c *perfCollector) Update(ch chan<- prometheus.Metric) error { if err := c.updateCacheStats(ch); err != nil { return err } + if c.tracepointCollector != nil { + return c.tracepointCollector.update(ch) + } return nil } diff --git a/collector/perf_linux_test.go b/collector/perf_linux_test.go index fca54558..b384a656 100644 --- a/collector/perf_linux_test.go +++ b/collector/perf_linux_test.go @@ -180,8 +180,74 @@ func TestPerfCPUFlagToCPUs(t *testing.T) { if test.exCpus[i] != cpus[i] { t.Fatalf( "expected cpus %v, got %v", - test.exCpus, - cpus, + test.exCpus[i], + cpus[i], + ) + } + } + }) + } +} + +func TestPerfTracepointFlagToTracepoints(t *testing.T) { + tests := []struct { + name string + flag []string + exTracepoints []*perfTracepoint + errStr string + }{ + { + name: "valid single tracepoint", + flag: []string{"sched:sched_kthread_stop"}, + exTracepoints: []*perfTracepoint{ + { + subsystem: "sched", + event: "sched_kthread_stop", + }, + }, + }, + { + name: "valid multiple tracepoints", + flag: []string{"sched:sched_kthread_stop", "sched:sched_process_fork"}, + exTracepoints: []*perfTracepoint{ + { + subsystem: "sched", + event: "sched_kthread_stop", + }, + { + subsystem: "sched", + event: "sched_process_fork", + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + tracepoints, err := perfTracepointFlagToTracepoints(test.flag) + if test.errStr != "" { + if err != nil { + t.Fatal("expected error to not be nil") + } + if test.errStr != err.Error() { + t.Fatalf( + "expected error %q, got %q", + test.errStr, + err.Error(), + ) + } + return + } + if err != nil { + t.Fatal(err) + } + for i := range tracepoints { + if test.exTracepoints[i].event != tracepoints[i].event && + test.exTracepoints[i].subsystem != tracepoints[i].subsystem { + t.Fatalf( + "expected tracepoint %v, got %v", + test.exTracepoints[i], + tracepoints[i], ) } }