mirror of
https://github.com/prometheus/node_exporter.git
synced 2025-03-05 21:00:12 -08:00
Add perf tracepoint collection flag (#1664)
* Add tracepoint collector option for perf collector Signed-off-by: Daniel Hodges <hodges.daniel.scott@gmail.com>
This commit is contained in:
parent
44357ed677
commit
b14168cf6a
|
@ -96,6 +96,13 @@ configuration is zero indexed and can also take a stride value; e.g.
|
||||||
`--collector.perf --collector.perf.cpus=1-10:5` would collect on CPUs
|
`--collector.perf --collector.perf.cpus=1-10:5` would collect on CPUs
|
||||||
1, 5, and 10.
|
1, 5, and 10.
|
||||||
|
|
||||||
|
The perf collector is also able to collect
|
||||||
|
[tracepoint](https://www.kernel.org/doc/html/latest/core-api/tracepoint.html)
|
||||||
|
counts when using the `--collector.perf.tracepoint` flag. Tracepoints can be
|
||||||
|
found using [`perf list`](http://man7.org/linux/man-pages/man1/perf.1.html) or
|
||||||
|
from debugfs. And example usage of this would be
|
||||||
|
`--collector.perf.tracepoint="sched:sched_process_exec"`.
|
||||||
|
|
||||||
|
|
||||||
Name | Description | OS
|
Name | Description | OS
|
||||||
---------|-------------|----
|
---------|-------------|----
|
||||||
|
|
|
@ -20,8 +20,10 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/go-kit/kit/log"
|
"github.com/go-kit/kit/log"
|
||||||
|
"github.com/go-kit/kit/log/level"
|
||||||
"github.com/hodgesds/perf-utils"
|
"github.com/hodgesds/perf-utils"
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
kingpin "gopkg.in/alecthomas/kingpin.v2"
|
kingpin "gopkg.in/alecthomas/kingpin.v2"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -31,26 +33,28 @@ const (
|
||||||
|
|
||||||
var (
|
var (
|
||||||
perfCPUsFlag = kingpin.Flag("collector.perf.cpus", "List of CPUs from which perf metrics should be collected").Default("").String()
|
perfCPUsFlag = kingpin.Flag("collector.perf.cpus", "List of CPUs from which perf metrics should be collected").Default("").String()
|
||||||
|
perfTracepointFlag = kingpin.Flag("collector.perf.tracepoint", "perf tracepoint that should be collected").Strings()
|
||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
registerCollector(perfSubsystem, defaultDisabled, NewPerfCollector)
|
registerCollector(perfSubsystem, defaultDisabled, NewPerfCollector)
|
||||||
}
|
}
|
||||||
|
|
||||||
// perfCollector is a Collector that uses the perf subsystem to collect
|
// perfTracepointFlagToTracepoints returns the set of configured tracepoints.
|
||||||
// metrics. It uses perf_event_open an ioctls for profiling. Due to the fact
|
func perfTracepointFlagToTracepoints(tracepointsFlag []string) ([]*perfTracepoint, error) {
|
||||||
// that the perf subsystem is highly dependent on kernel configuration and
|
tracepoints := make([]*perfTracepoint, len(tracepointsFlag))
|
||||||
// settings not all profiler values may be exposed on the target system at any
|
|
||||||
// given time.
|
for i, tracepoint := range tracepointsFlag {
|
||||||
type perfCollector struct {
|
split := strings.Split(tracepoint, ":")
|
||||||
hwProfilerCPUMap map[*perf.HardwareProfiler]int
|
if len(split) != 2 {
|
||||||
swProfilerCPUMap map[*perf.SoftwareProfiler]int
|
return nil, fmt.Errorf("Invalid tracepoint config %v", tracepoint)
|
||||||
cacheProfilerCPUMap map[*perf.CacheProfiler]int
|
}
|
||||||
perfHwProfilers map[int]*perf.HardwareProfiler
|
tracepoints[i] = &perfTracepoint{
|
||||||
perfSwProfilers map[int]*perf.SoftwareProfiler
|
subsystem: split[0],
|
||||||
perfCacheProfilers map[int]*perf.CacheProfiler
|
event: split[1],
|
||||||
desc map[string]*prometheus.Desc
|
}
|
||||||
logger log.Logger
|
}
|
||||||
|
return tracepoints, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// perfCPUFlagToCPUs returns a set of CPUs for the perf collectors to monitor.
|
// perfCPUFlagToCPUs returns a set of CPUs for the perf collectors to monitor.
|
||||||
|
@ -98,6 +102,144 @@ func perfCPUFlagToCPUs(cpuFlag string) ([]int, error) {
|
||||||
return cpus, nil
|
return cpus, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// perfTracepoint is a struct for holding tracepoint information.
|
||||||
|
type perfTracepoint struct {
|
||||||
|
subsystem string
|
||||||
|
event string
|
||||||
|
}
|
||||||
|
|
||||||
|
// label returns the tracepoint name in the format of subsystem_tracepoint.
|
||||||
|
func (t *perfTracepoint) label() string {
|
||||||
|
return t.subsystem + "_" + t.event
|
||||||
|
}
|
||||||
|
|
||||||
|
// tracepoint returns the tracepoint name in the format of subsystem:tracepoint.
|
||||||
|
func (t *perfTracepoint) tracepoint() string {
|
||||||
|
return t.subsystem + ":" + t.event
|
||||||
|
}
|
||||||
|
|
||||||
|
// perfCollector is a Collector that uses the perf subsystem to collect
|
||||||
|
// metrics. It uses perf_event_open an ioctls for profiling. Due to the fact
|
||||||
|
// that the perf subsystem is highly dependent on kernel configuration and
|
||||||
|
// settings not all profiler values may be exposed on the target system at any
|
||||||
|
// given time.
|
||||||
|
type perfCollector struct {
|
||||||
|
hwProfilerCPUMap map[*perf.HardwareProfiler]int
|
||||||
|
swProfilerCPUMap map[*perf.SoftwareProfiler]int
|
||||||
|
cacheProfilerCPUMap map[*perf.CacheProfiler]int
|
||||||
|
perfHwProfilers map[int]*perf.HardwareProfiler
|
||||||
|
perfSwProfilers map[int]*perf.SoftwareProfiler
|
||||||
|
perfCacheProfilers map[int]*perf.CacheProfiler
|
||||||
|
desc map[string]*prometheus.Desc
|
||||||
|
logger log.Logger
|
||||||
|
tracepointCollector *perfTracepointCollector
|
||||||
|
}
|
||||||
|
|
||||||
|
type perfTracepointCollector struct {
|
||||||
|
// desc is the mapping of subsystem to tracepoint *prometheus.Desc.
|
||||||
|
descs map[string]map[string]*prometheus.Desc
|
||||||
|
// collection order is the sorted configured collection order of the profiler.
|
||||||
|
collectionOrder []string
|
||||||
|
|
||||||
|
logger log.Logger
|
||||||
|
profilers map[int]perf.GroupProfiler
|
||||||
|
}
|
||||||
|
|
||||||
|
// update is used collect all tracepoints across all tracepoint profilers.
|
||||||
|
func (c *perfTracepointCollector) update(ch chan<- prometheus.Metric) error {
|
||||||
|
for cpu := range c.profilers {
|
||||||
|
if err := c.updateCPU(cpu, ch); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// updateCPU is used to update metrics per CPU profiler.
|
||||||
|
func (c *perfTracepointCollector) updateCPU(cpu int, ch chan<- prometheus.Metric) error {
|
||||||
|
cpuStr := fmt.Sprintf("%d", cpu)
|
||||||
|
profiler := c.profilers[cpu]
|
||||||
|
p, err := profiler.Profile()
|
||||||
|
if err != nil {
|
||||||
|
level.Error(c.logger).Log("msg", "Failed to collect tracepoint profile", "err", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, value := range p.Values {
|
||||||
|
// Get the Desc from the ordered group value.
|
||||||
|
descKey := c.collectionOrder[i]
|
||||||
|
descKeySlice := strings.Split(descKey, ":")
|
||||||
|
ch <- prometheus.MustNewConstMetric(
|
||||||
|
c.descs[descKeySlice[0]][descKeySlice[1]],
|
||||||
|
prometheus.CounterValue,
|
||||||
|
float64(value),
|
||||||
|
cpuStr,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// newPerfTracepointCollector returns a configured perfTracepointCollector.
|
||||||
|
func newPerfTracepointCollector(
|
||||||
|
logger log.Logger,
|
||||||
|
tracepointsFlag []string,
|
||||||
|
cpus []int,
|
||||||
|
) (*perfTracepointCollector, error) {
|
||||||
|
tracepoints, err := perfTracepointFlagToTracepoints(tracepointsFlag)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
collectionOrder := make([]string, len(tracepoints))
|
||||||
|
descs := map[string]map[string]*prometheus.Desc{}
|
||||||
|
eventAttrs := make([]unix.PerfEventAttr, len(tracepoints))
|
||||||
|
|
||||||
|
for i, tracepoint := range tracepoints {
|
||||||
|
eventAttr, err := perf.TracepointEventAttr(tracepoint.subsystem, tracepoint.event)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
eventAttrs[i] = *eventAttr
|
||||||
|
collectionOrder[i] = tracepoint.tracepoint()
|
||||||
|
if _, ok := descs[tracepoint.subsystem]; !ok {
|
||||||
|
descs[tracepoint.subsystem] = map[string]*prometheus.Desc{}
|
||||||
|
}
|
||||||
|
descs[tracepoint.subsystem][tracepoint.event] = prometheus.NewDesc(
|
||||||
|
prometheus.BuildFQName(
|
||||||
|
namespace,
|
||||||
|
perfSubsystem,
|
||||||
|
tracepoint.label(),
|
||||||
|
),
|
||||||
|
"Perf tracepoint "+tracepoint.tracepoint(),
|
||||||
|
[]string{"cpu"},
|
||||||
|
nil,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
profilers := make(map[int]perf.GroupProfiler, len(cpus))
|
||||||
|
for _, cpu := range cpus {
|
||||||
|
profiler, err := perf.NewGroupProfiler(-1, cpu, 0, eventAttrs...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
profilers[cpu] = profiler
|
||||||
|
}
|
||||||
|
|
||||||
|
c := &perfTracepointCollector{
|
||||||
|
descs: descs,
|
||||||
|
collectionOrder: collectionOrder,
|
||||||
|
profilers: profilers,
|
||||||
|
logger: logger,
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, profiler := range c.profilers {
|
||||||
|
if err := profiler.Start(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return c, nil
|
||||||
|
}
|
||||||
|
|
||||||
// NewPerfCollector returns a new perf based collector, it creates a profiler
|
// NewPerfCollector returns a new perf based collector, it creates a profiler
|
||||||
// per CPU.
|
// per CPU.
|
||||||
func NewPerfCollector(logger log.Logger) (Collector, error) {
|
func NewPerfCollector(logger log.Logger) (Collector, error) {
|
||||||
|
@ -127,6 +269,16 @@ func NewPerfCollector(logger log.Logger) (Collector, error) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// First configure any tracepoints.
|
||||||
|
if *perfTracepointFlag != nil && len(*perfTracepointFlag) > 0 {
|
||||||
|
tracepointCollector, err := newPerfTracepointCollector(logger, *perfTracepointFlag, cpus)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
collector.tracepointCollector = tracepointCollector
|
||||||
|
}
|
||||||
|
|
||||||
|
// Configure all profilers for the specified CPUs.
|
||||||
for _, cpu := range cpus {
|
for _, cpu := range cpus {
|
||||||
// Use -1 to profile all processes on the CPU, see:
|
// Use -1 to profile all processes on the CPU, see:
|
||||||
// man perf_event_open
|
// man perf_event_open
|
||||||
|
@ -411,6 +563,9 @@ func (c *perfCollector) Update(ch chan<- prometheus.Metric) error {
|
||||||
if err := c.updateCacheStats(ch); err != nil {
|
if err := c.updateCacheStats(ch); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
if c.tracepointCollector != nil {
|
||||||
|
return c.tracepointCollector.update(ch)
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -180,8 +180,74 @@ func TestPerfCPUFlagToCPUs(t *testing.T) {
|
||||||
if test.exCpus[i] != cpus[i] {
|
if test.exCpus[i] != cpus[i] {
|
||||||
t.Fatalf(
|
t.Fatalf(
|
||||||
"expected cpus %v, got %v",
|
"expected cpus %v, got %v",
|
||||||
test.exCpus,
|
test.exCpus[i],
|
||||||
cpus,
|
cpus[i],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPerfTracepointFlagToTracepoints(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
flag []string
|
||||||
|
exTracepoints []*perfTracepoint
|
||||||
|
errStr string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "valid single tracepoint",
|
||||||
|
flag: []string{"sched:sched_kthread_stop"},
|
||||||
|
exTracepoints: []*perfTracepoint{
|
||||||
|
{
|
||||||
|
subsystem: "sched",
|
||||||
|
event: "sched_kthread_stop",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "valid multiple tracepoints",
|
||||||
|
flag: []string{"sched:sched_kthread_stop", "sched:sched_process_fork"},
|
||||||
|
exTracepoints: []*perfTracepoint{
|
||||||
|
{
|
||||||
|
subsystem: "sched",
|
||||||
|
event: "sched_kthread_stop",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
subsystem: "sched",
|
||||||
|
event: "sched_process_fork",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
t.Run(test.name, func(t *testing.T) {
|
||||||
|
tracepoints, err := perfTracepointFlagToTracepoints(test.flag)
|
||||||
|
if test.errStr != "" {
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal("expected error to not be nil")
|
||||||
|
}
|
||||||
|
if test.errStr != err.Error() {
|
||||||
|
t.Fatalf(
|
||||||
|
"expected error %q, got %q",
|
||||||
|
test.errStr,
|
||||||
|
err.Error(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
for i := range tracepoints {
|
||||||
|
if test.exTracepoints[i].event != tracepoints[i].event &&
|
||||||
|
test.exTracepoints[i].subsystem != tracepoints[i].subsystem {
|
||||||
|
t.Fatalf(
|
||||||
|
"expected tracepoint %v, got %v",
|
||||||
|
test.exTracepoints[i],
|
||||||
|
tracepoints[i],
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue