mirror of
				https://github.com/prometheus/node_exporter.git
				synced 2025-08-20 18:33:52 -07:00 
			
		
		
		
	Add perf tracepoint collection flag (#1664)
* Add tracepoint collector option for perf collector Signed-off-by: Daniel Hodges <hodges.daniel.scott@gmail.com>
This commit is contained in:
		
							parent
							
								
									44357ed677
								
							
						
					
					
						commit
						b14168cf6a
					
				|  | @ -96,6 +96,13 @@ configuration is zero indexed and can also take a stride value; e.g. | ||||||
| `--collector.perf --collector.perf.cpus=1-10:5` would collect on CPUs | `--collector.perf --collector.perf.cpus=1-10:5` would collect on CPUs | ||||||
| 1, 5, and 10. | 1, 5, and 10. | ||||||
| 
 | 
 | ||||||
|  | The perf collector is also able to collect | ||||||
|  | [tracepoint](https://www.kernel.org/doc/html/latest/core-api/tracepoint.html) | ||||||
|  | counts when using the `--collector.perf.tracepoint` flag. Tracepoints can be | ||||||
|  | found using [`perf list`](http://man7.org/linux/man-pages/man1/perf.1.html) or | ||||||
|  | from debugfs. And example usage of this would be | ||||||
|  | `--collector.perf.tracepoint="sched:sched_process_exec"`. | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| Name     | Description | OS | Name     | Description | OS | ||||||
| ---------|-------------|---- | ---------|-------------|---- | ||||||
|  |  | ||||||
|  | @ -20,8 +20,10 @@ import ( | ||||||
| 	"strings" | 	"strings" | ||||||
| 
 | 
 | ||||||
| 	"github.com/go-kit/kit/log" | 	"github.com/go-kit/kit/log" | ||||||
|  | 	"github.com/go-kit/kit/log/level" | ||||||
| 	"github.com/hodgesds/perf-utils" | 	"github.com/hodgesds/perf-utils" | ||||||
| 	"github.com/prometheus/client_golang/prometheus" | 	"github.com/prometheus/client_golang/prometheus" | ||||||
|  | 	"golang.org/x/sys/unix" | ||||||
| 	kingpin "gopkg.in/alecthomas/kingpin.v2" | 	kingpin "gopkg.in/alecthomas/kingpin.v2" | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | @ -31,26 +33,28 @@ const ( | ||||||
| 
 | 
 | ||||||
| var ( | var ( | ||||||
| 	perfCPUsFlag       = kingpin.Flag("collector.perf.cpus", "List of CPUs from which perf metrics should be collected").Default("").String() | 	perfCPUsFlag       = kingpin.Flag("collector.perf.cpus", "List of CPUs from which perf metrics should be collected").Default("").String() | ||||||
|  | 	perfTracepointFlag = kingpin.Flag("collector.perf.tracepoint", "perf tracepoint that should be collected").Strings() | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| func init() { | func init() { | ||||||
| 	registerCollector(perfSubsystem, defaultDisabled, NewPerfCollector) | 	registerCollector(perfSubsystem, defaultDisabled, NewPerfCollector) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // perfCollector is a Collector that uses the perf subsystem to collect
 | // perfTracepointFlagToTracepoints returns the set of configured tracepoints.
 | ||||||
| // metrics. It uses perf_event_open an ioctls for profiling. Due to the fact
 | func perfTracepointFlagToTracepoints(tracepointsFlag []string) ([]*perfTracepoint, error) { | ||||||
| // that the perf subsystem is highly dependent on kernel configuration and
 | 	tracepoints := make([]*perfTracepoint, len(tracepointsFlag)) | ||||||
| // settings not all profiler values may be exposed on the target system at any
 | 
 | ||||||
| // given time.
 | 	for i, tracepoint := range tracepointsFlag { | ||||||
| type perfCollector struct { | 		split := strings.Split(tracepoint, ":") | ||||||
| 	hwProfilerCPUMap    map[*perf.HardwareProfiler]int | 		if len(split) != 2 { | ||||||
| 	swProfilerCPUMap    map[*perf.SoftwareProfiler]int | 			return nil, fmt.Errorf("Invalid tracepoint config %v", tracepoint) | ||||||
| 	cacheProfilerCPUMap map[*perf.CacheProfiler]int | 		} | ||||||
| 	perfHwProfilers     map[int]*perf.HardwareProfiler | 		tracepoints[i] = &perfTracepoint{ | ||||||
| 	perfSwProfilers     map[int]*perf.SoftwareProfiler | 			subsystem: split[0], | ||||||
| 	perfCacheProfilers  map[int]*perf.CacheProfiler | 			event:     split[1], | ||||||
| 	desc                map[string]*prometheus.Desc | 		} | ||||||
| 	logger              log.Logger | 	} | ||||||
|  | 	return tracepoints, nil | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // perfCPUFlagToCPUs returns a set of CPUs for the perf collectors to monitor.
 | // perfCPUFlagToCPUs returns a set of CPUs for the perf collectors to monitor.
 | ||||||
|  | @ -98,6 +102,144 @@ func perfCPUFlagToCPUs(cpuFlag string) ([]int, error) { | ||||||
| 	return cpus, nil | 	return cpus, nil | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | // perfTracepoint is a struct for holding tracepoint information.
 | ||||||
|  | type perfTracepoint struct { | ||||||
|  | 	subsystem string | ||||||
|  | 	event     string | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // label returns the tracepoint name in the format of subsystem_tracepoint.
 | ||||||
|  | func (t *perfTracepoint) label() string { | ||||||
|  | 	return t.subsystem + "_" + t.event | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // tracepoint returns the tracepoint name in the format of subsystem:tracepoint.
 | ||||||
|  | func (t *perfTracepoint) tracepoint() string { | ||||||
|  | 	return t.subsystem + ":" + t.event | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // perfCollector is a Collector that uses the perf subsystem to collect
 | ||||||
|  | // metrics. It uses perf_event_open an ioctls for profiling. Due to the fact
 | ||||||
|  | // that the perf subsystem is highly dependent on kernel configuration and
 | ||||||
|  | // settings not all profiler values may be exposed on the target system at any
 | ||||||
|  | // given time.
 | ||||||
|  | type perfCollector struct { | ||||||
|  | 	hwProfilerCPUMap    map[*perf.HardwareProfiler]int | ||||||
|  | 	swProfilerCPUMap    map[*perf.SoftwareProfiler]int | ||||||
|  | 	cacheProfilerCPUMap map[*perf.CacheProfiler]int | ||||||
|  | 	perfHwProfilers     map[int]*perf.HardwareProfiler | ||||||
|  | 	perfSwProfilers     map[int]*perf.SoftwareProfiler | ||||||
|  | 	perfCacheProfilers  map[int]*perf.CacheProfiler | ||||||
|  | 	desc                map[string]*prometheus.Desc | ||||||
|  | 	logger              log.Logger | ||||||
|  | 	tracepointCollector *perfTracepointCollector | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | type perfTracepointCollector struct { | ||||||
|  | 	// desc is the mapping of subsystem to tracepoint *prometheus.Desc.
 | ||||||
|  | 	descs map[string]map[string]*prometheus.Desc | ||||||
|  | 	// collection order is the sorted configured collection order of the profiler.
 | ||||||
|  | 	collectionOrder []string | ||||||
|  | 
 | ||||||
|  | 	logger    log.Logger | ||||||
|  | 	profilers map[int]perf.GroupProfiler | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // update is used collect all tracepoints across all tracepoint profilers.
 | ||||||
|  | func (c *perfTracepointCollector) update(ch chan<- prometheus.Metric) error { | ||||||
|  | 	for cpu := range c.profilers { | ||||||
|  | 		if err := c.updateCPU(cpu, ch); err != nil { | ||||||
|  | 			return err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // updateCPU is used to update metrics per CPU profiler.
 | ||||||
|  | func (c *perfTracepointCollector) updateCPU(cpu int, ch chan<- prometheus.Metric) error { | ||||||
|  | 	cpuStr := fmt.Sprintf("%d", cpu) | ||||||
|  | 	profiler := c.profilers[cpu] | ||||||
|  | 	p, err := profiler.Profile() | ||||||
|  | 	if err != nil { | ||||||
|  | 		level.Error(c.logger).Log("msg", "Failed to collect tracepoint profile", "err", err) | ||||||
|  | 		return err | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	for i, value := range p.Values { | ||||||
|  | 		// Get the Desc from the ordered group value.
 | ||||||
|  | 		descKey := c.collectionOrder[i] | ||||||
|  | 		descKeySlice := strings.Split(descKey, ":") | ||||||
|  | 		ch <- prometheus.MustNewConstMetric( | ||||||
|  | 			c.descs[descKeySlice[0]][descKeySlice[1]], | ||||||
|  | 			prometheus.CounterValue, | ||||||
|  | 			float64(value), | ||||||
|  | 			cpuStr, | ||||||
|  | 		) | ||||||
|  | 	} | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // newPerfTracepointCollector returns a configured perfTracepointCollector.
 | ||||||
|  | func newPerfTracepointCollector( | ||||||
|  | 	logger log.Logger, | ||||||
|  | 	tracepointsFlag []string, | ||||||
|  | 	cpus []int, | ||||||
|  | ) (*perfTracepointCollector, error) { | ||||||
|  | 	tracepoints, err := perfTracepointFlagToTracepoints(tracepointsFlag) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return nil, err | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	collectionOrder := make([]string, len(tracepoints)) | ||||||
|  | 	descs := map[string]map[string]*prometheus.Desc{} | ||||||
|  | 	eventAttrs := make([]unix.PerfEventAttr, len(tracepoints)) | ||||||
|  | 
 | ||||||
|  | 	for i, tracepoint := range tracepoints { | ||||||
|  | 		eventAttr, err := perf.TracepointEventAttr(tracepoint.subsystem, tracepoint.event) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, err | ||||||
|  | 		} | ||||||
|  | 		eventAttrs[i] = *eventAttr | ||||||
|  | 		collectionOrder[i] = tracepoint.tracepoint() | ||||||
|  | 		if _, ok := descs[tracepoint.subsystem]; !ok { | ||||||
|  | 			descs[tracepoint.subsystem] = map[string]*prometheus.Desc{} | ||||||
|  | 		} | ||||||
|  | 		descs[tracepoint.subsystem][tracepoint.event] = prometheus.NewDesc( | ||||||
|  | 			prometheus.BuildFQName( | ||||||
|  | 				namespace, | ||||||
|  | 				perfSubsystem, | ||||||
|  | 				tracepoint.label(), | ||||||
|  | 			), | ||||||
|  | 			"Perf tracepoint "+tracepoint.tracepoint(), | ||||||
|  | 			[]string{"cpu"}, | ||||||
|  | 			nil, | ||||||
|  | 		) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	profilers := make(map[int]perf.GroupProfiler, len(cpus)) | ||||||
|  | 	for _, cpu := range cpus { | ||||||
|  | 		profiler, err := perf.NewGroupProfiler(-1, cpu, 0, eventAttrs...) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, err | ||||||
|  | 		} | ||||||
|  | 		profilers[cpu] = profiler | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	c := &perfTracepointCollector{ | ||||||
|  | 		descs:           descs, | ||||||
|  | 		collectionOrder: collectionOrder, | ||||||
|  | 		profilers:       profilers, | ||||||
|  | 		logger:          logger, | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	for _, profiler := range c.profilers { | ||||||
|  | 		if err := profiler.Start(); err != nil { | ||||||
|  | 			return nil, err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return c, nil | ||||||
|  | } | ||||||
|  | 
 | ||||||
| // NewPerfCollector returns a new perf based collector, it creates a profiler
 | // NewPerfCollector returns a new perf based collector, it creates a profiler
 | ||||||
| // per CPU.
 | // per CPU.
 | ||||||
| func NewPerfCollector(logger log.Logger) (Collector, error) { | func NewPerfCollector(logger log.Logger) (Collector, error) { | ||||||
|  | @ -127,6 +269,16 @@ func NewPerfCollector(logger log.Logger) (Collector, error) { | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	// First configure any tracepoints.
 | ||||||
|  | 	if *perfTracepointFlag != nil && len(*perfTracepointFlag) > 0 { | ||||||
|  | 		tracepointCollector, err := newPerfTracepointCollector(logger, *perfTracepointFlag, cpus) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, err | ||||||
|  | 		} | ||||||
|  | 		collector.tracepointCollector = tracepointCollector | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// Configure all profilers for the specified CPUs.
 | ||||||
| 	for _, cpu := range cpus { | 	for _, cpu := range cpus { | ||||||
| 		// Use -1 to profile all processes on the CPU, see:
 | 		// Use -1 to profile all processes on the CPU, see:
 | ||||||
| 		// man perf_event_open
 | 		// man perf_event_open
 | ||||||
|  | @ -411,6 +563,9 @@ func (c *perfCollector) Update(ch chan<- prometheus.Metric) error { | ||||||
| 	if err := c.updateCacheStats(ch); err != nil { | 	if err := c.updateCacheStats(ch); err != nil { | ||||||
| 		return err | 		return err | ||||||
| 	} | 	} | ||||||
|  | 	if c.tracepointCollector != nil { | ||||||
|  | 		return c.tracepointCollector.update(ch) | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -180,8 +180,74 @@ func TestPerfCPUFlagToCPUs(t *testing.T) { | ||||||
| 				if test.exCpus[i] != cpus[i] { | 				if test.exCpus[i] != cpus[i] { | ||||||
| 					t.Fatalf( | 					t.Fatalf( | ||||||
| 						"expected cpus %v, got %v", | 						"expected cpus %v, got %v", | ||||||
| 						test.exCpus, | 						test.exCpus[i], | ||||||
| 						cpus, | 						cpus[i], | ||||||
|  | 					) | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 		}) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func TestPerfTracepointFlagToTracepoints(t *testing.T) { | ||||||
|  | 	tests := []struct { | ||||||
|  | 		name          string | ||||||
|  | 		flag          []string | ||||||
|  | 		exTracepoints []*perfTracepoint | ||||||
|  | 		errStr        string | ||||||
|  | 	}{ | ||||||
|  | 		{ | ||||||
|  | 			name: "valid single tracepoint", | ||||||
|  | 			flag: []string{"sched:sched_kthread_stop"}, | ||||||
|  | 			exTracepoints: []*perfTracepoint{ | ||||||
|  | 				{ | ||||||
|  | 					subsystem: "sched", | ||||||
|  | 					event:     "sched_kthread_stop", | ||||||
|  | 				}, | ||||||
|  | 			}, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			name: "valid multiple tracepoints", | ||||||
|  | 			flag: []string{"sched:sched_kthread_stop", "sched:sched_process_fork"}, | ||||||
|  | 			exTracepoints: []*perfTracepoint{ | ||||||
|  | 				{ | ||||||
|  | 					subsystem: "sched", | ||||||
|  | 					event:     "sched_kthread_stop", | ||||||
|  | 				}, | ||||||
|  | 				{ | ||||||
|  | 					subsystem: "sched", | ||||||
|  | 					event:     "sched_process_fork", | ||||||
|  | 				}, | ||||||
|  | 			}, | ||||||
|  | 		}, | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	for _, test := range tests { | ||||||
|  | 		t.Run(test.name, func(t *testing.T) { | ||||||
|  | 			tracepoints, err := perfTracepointFlagToTracepoints(test.flag) | ||||||
|  | 			if test.errStr != "" { | ||||||
|  | 				if err != nil { | ||||||
|  | 					t.Fatal("expected error to not be nil") | ||||||
|  | 				} | ||||||
|  | 				if test.errStr != err.Error() { | ||||||
|  | 					t.Fatalf( | ||||||
|  | 						"expected error %q, got %q", | ||||||
|  | 						test.errStr, | ||||||
|  | 						err.Error(), | ||||||
|  | 					) | ||||||
|  | 				} | ||||||
|  | 				return | ||||||
|  | 			} | ||||||
|  | 			if err != nil { | ||||||
|  | 				t.Fatal(err) | ||||||
|  | 			} | ||||||
|  | 			for i := range tracepoints { | ||||||
|  | 				if test.exTracepoints[i].event != tracepoints[i].event && | ||||||
|  | 					test.exTracepoints[i].subsystem != tracepoints[i].subsystem { | ||||||
|  | 					t.Fatalf( | ||||||
|  | 						"expected tracepoint %v, got %v", | ||||||
|  | 						test.exTracepoints[i], | ||||||
|  | 						tracepoints[i], | ||||||
| 					) | 					) | ||||||
| 				} | 				} | ||||||
| 			} | 			} | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue