Add flag to aggr ipvs metrics to avoid high cardinality metrics (#1709)

Fixes #1708

Signed-off-by: Wing924 <weihe924stephen@gmail.com>
This commit is contained in:
Wei He 2020-06-02 17:52:00 +09:00 committed by GitHub
parent b7cb72adeb
commit 0253277121
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 349 additions and 85 deletions

View file

@ -0,0 +1,33 @@
# HELP node_ipvs_backend_connections_active The current active connections by local and remote address.
# TYPE node_ipvs_backend_connections_active gauge
node_ipvs_backend_connections_active{local_address="",local_port="0"} 385
node_ipvs_backend_connections_active{local_address="192.168.0.22",local_port="3306"} 744
node_ipvs_backend_connections_active{local_address="192.168.0.55",local_port="3306"} 0
node_ipvs_backend_connections_active{local_address="192.168.0.57",local_port="3306"} 2997
# HELP node_ipvs_backend_connections_inactive The current inactive connections by local and remote address.
# TYPE node_ipvs_backend_connections_inactive gauge
node_ipvs_backend_connections_inactive{local_address="",local_port="0"} 6
node_ipvs_backend_connections_inactive{local_address="192.168.0.22",local_port="3306"} 5
node_ipvs_backend_connections_inactive{local_address="192.168.0.55",local_port="3306"} 0
node_ipvs_backend_connections_inactive{local_address="192.168.0.57",local_port="3306"} 0
# HELP node_ipvs_backend_weight The current backend weight by local and remote address.
# TYPE node_ipvs_backend_weight gauge
node_ipvs_backend_weight{local_address="",local_port="0"} 120
node_ipvs_backend_weight{local_address="192.168.0.22",local_port="3306"} 300
node_ipvs_backend_weight{local_address="192.168.0.55",local_port="3306"} 100
node_ipvs_backend_weight{local_address="192.168.0.57",local_port="3306"} 200
# HELP node_ipvs_connections_total The total number of connections made.
# TYPE node_ipvs_connections_total counter
node_ipvs_connections_total 2.3765872e+07
# HELP node_ipvs_incoming_bytes_total The total amount of incoming data.
# TYPE node_ipvs_incoming_bytes_total counter
node_ipvs_incoming_bytes_total 8.9991519156915e+13
# HELP node_ipvs_incoming_packets_total The total number of incoming packets.
# TYPE node_ipvs_incoming_packets_total counter
node_ipvs_incoming_packets_total 3.811989221e+09
# HELP node_ipvs_outgoing_bytes_total The total amount of outgoing data.
# TYPE node_ipvs_outgoing_bytes_total counter
node_ipvs_outgoing_bytes_total 0
# HELP node_ipvs_outgoing_packets_total The total number of outgoing packets.
# TYPE node_ipvs_outgoing_packets_total counter
node_ipvs_outgoing_packets_total 0

View file

@ -0,0 +1,27 @@
# HELP node_ipvs_backend_connections_active The current active connections by local and remote address.
# TYPE node_ipvs_backend_connections_active gauge
node_ipvs_backend_connections_active{local_port="0"} 385
node_ipvs_backend_connections_active{local_port="3306"} 3741
# HELP node_ipvs_backend_connections_inactive The current inactive connections by local and remote address.
# TYPE node_ipvs_backend_connections_inactive gauge
node_ipvs_backend_connections_inactive{local_port="0"} 6
node_ipvs_backend_connections_inactive{local_port="3306"} 5
# HELP node_ipvs_backend_weight The current backend weight by local and remote address.
# TYPE node_ipvs_backend_weight gauge
node_ipvs_backend_weight{local_port="0"} 120
node_ipvs_backend_weight{local_port="3306"} 600
# HELP node_ipvs_connections_total The total number of connections made.
# TYPE node_ipvs_connections_total counter
node_ipvs_connections_total 2.3765872e+07
# HELP node_ipvs_incoming_bytes_total The total amount of incoming data.
# TYPE node_ipvs_incoming_bytes_total counter
node_ipvs_incoming_bytes_total 8.9991519156915e+13
# HELP node_ipvs_incoming_packets_total The total number of incoming packets.
# TYPE node_ipvs_incoming_packets_total counter
node_ipvs_incoming_packets_total 3.811989221e+09
# HELP node_ipvs_outgoing_bytes_total The total amount of outgoing data.
# TYPE node_ipvs_outgoing_bytes_total counter
node_ipvs_outgoing_bytes_total 0
# HELP node_ipvs_outgoing_packets_total The total number of outgoing packets.
# TYPE node_ipvs_outgoing_packets_total counter
node_ipvs_outgoing_packets_total 0

View file

@ -0,0 +1,24 @@
# HELP node_ipvs_backend_connections_active The current active connections by local and remote address.
# TYPE node_ipvs_backend_connections_active gauge
node_ipvs_backend_connections_active 4126
# HELP node_ipvs_backend_connections_inactive The current inactive connections by local and remote address.
# TYPE node_ipvs_backend_connections_inactive gauge
node_ipvs_backend_connections_inactive 11
# HELP node_ipvs_backend_weight The current backend weight by local and remote address.
# TYPE node_ipvs_backend_weight gauge
node_ipvs_backend_weight 720
# HELP node_ipvs_connections_total The total number of connections made.
# TYPE node_ipvs_connections_total counter
node_ipvs_connections_total 2.3765872e+07
# HELP node_ipvs_incoming_bytes_total The total amount of incoming data.
# TYPE node_ipvs_incoming_bytes_total counter
node_ipvs_incoming_bytes_total 8.9991519156915e+13
# HELP node_ipvs_incoming_packets_total The total number of incoming packets.
# TYPE node_ipvs_incoming_packets_total counter
node_ipvs_incoming_packets_total 3.811989221e+09
# HELP node_ipvs_outgoing_bytes_total The total amount of outgoing data.
# TYPE node_ipvs_outgoing_bytes_total counter
node_ipvs_outgoing_bytes_total 0
# HELP node_ipvs_outgoing_packets_total The total number of outgoing packets.
# TYPE node_ipvs_outgoing_packets_total counter
node_ipvs_outgoing_packets_total 0

View file

@ -18,22 +18,53 @@ package collector
import (
"fmt"
"os"
"sort"
"strconv"
"strings"
"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/procfs"
kingpin "gopkg.in/alecthomas/kingpin.v2"
)
type ipvsCollector struct {
Collector
fs procfs.FS
backendLabels []string
backendConnectionsActive, backendConnectionsInact, backendWeight typedDesc
connections, incomingPackets, outgoingPackets, incomingBytes, outgoingBytes typedDesc
logger log.Logger
}
type ipvsBackendStatus struct {
ActiveConn uint64
InactConn uint64
Weight uint64
}
const (
ipvsLabelLocalAddress = "local_address"
ipvsLabelLocalPort = "local_port"
ipvsLabelRemoteAddress = "remote_address"
ipvsLabelRemotePort = "remote_port"
ipvsLabelProto = "proto"
ipvsLabelLocalMark = "local_mark"
)
var (
fullIpvsBackendLabels = []string{
ipvsLabelLocalAddress,
ipvsLabelLocalPort,
ipvsLabelRemoteAddress,
ipvsLabelRemotePort,
ipvsLabelProto,
ipvsLabelLocalMark,
}
ipvsLabels = kingpin.Flag("collector.ipvs.backend-labels", "Comma separated list for IPVS backend stats labels.").Default(strings.Join(fullIpvsBackendLabels, ",")).String()
)
func init() {
registerCollector("ipvs", defaultEnabled, NewIPVSCollector)
}
@ -46,19 +77,15 @@ func NewIPVSCollector(logger log.Logger) (Collector, error) {
func newIPVSCollector(logger log.Logger) (*ipvsCollector, error) {
var (
ipvsBackendLabelNames = []string{
"local_address",
"local_port",
"remote_address",
"remote_port",
"proto",
"local_mark",
}
c ipvsCollector
err error
subsystem = "ipvs"
)
if c.backendLabels, err = c.parseIpvsLabels(*ipvsLabels); err != nil {
return nil, err
}
c.logger = logger
c.fs, err = procfs.NewFS(*procPath)
if err != nil {
@ -93,17 +120,17 @@ func newIPVSCollector(logger log.Logger) (*ipvsCollector, error) {
c.backendConnectionsActive = typedDesc{prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "backend_connections_active"),
"The current active connections by local and remote address.",
ipvsBackendLabelNames, nil,
c.backendLabels, nil,
), prometheus.GaugeValue}
c.backendConnectionsInact = typedDesc{prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "backend_connections_inactive"),
"The current inactive connections by local and remote address.",
ipvsBackendLabelNames, nil,
c.backendLabels, nil,
), prometheus.GaugeValue}
c.backendWeight = typedDesc{prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "backend_weight"),
"The current backend weight by local and remote address.",
ipvsBackendLabelNames, nil,
c.backendLabels, nil,
), prometheus.GaugeValue}
return &c, nil
@ -130,22 +157,74 @@ func (c *ipvsCollector) Update(ch chan<- prometheus.Metric) error {
return fmt.Errorf("could not get backend status: %s", err)
}
sums := map[string]ipvsBackendStatus{}
labelValues := map[string][]string{}
for _, backend := range backendStats {
localAddress := ""
if backend.LocalAddress.String() != "<nil>" {
localAddress = backend.LocalAddress.String()
}
labelValues := []string{
localAddress,
strconv.FormatUint(uint64(backend.LocalPort), 10),
backend.RemoteAddress.String(),
strconv.FormatUint(uint64(backend.RemotePort), 10),
backend.Proto,
backend.LocalMark,
kv := make([]string, len(c.backendLabels))
for i, label := range c.backendLabels {
var labelValue string
switch label {
case ipvsLabelLocalAddress:
labelValue = localAddress
case ipvsLabelLocalPort:
labelValue = strconv.FormatUint(uint64(backend.LocalPort), 10)
case ipvsLabelRemoteAddress:
labelValue = backend.RemoteAddress.String()
case ipvsLabelRemotePort:
labelValue = strconv.FormatUint(uint64(backend.RemotePort), 10)
case ipvsLabelProto:
labelValue = backend.Proto
case ipvsLabelLocalMark:
labelValue = backend.LocalMark
}
kv[i] = labelValue
}
ch <- c.backendConnectionsActive.mustNewConstMetric(float64(backend.ActiveConn), labelValues...)
ch <- c.backendConnectionsInact.mustNewConstMetric(float64(backend.InactConn), labelValues...)
ch <- c.backendWeight.mustNewConstMetric(float64(backend.Weight), labelValues...)
key := strings.Join(kv, "-")
status := sums[key]
status.ActiveConn += backend.ActiveConn
status.InactConn += backend.InactConn
status.Weight += backend.Weight
sums[key] = status
labelValues[key] = kv
}
for key, status := range sums {
kv := labelValues[key]
ch <- c.backendConnectionsActive.mustNewConstMetric(float64(status.ActiveConn), kv...)
ch <- c.backendConnectionsInact.mustNewConstMetric(float64(status.InactConn), kv...)
ch <- c.backendWeight.mustNewConstMetric(float64(status.Weight), kv...)
}
return nil
}
func (c *ipvsCollector) parseIpvsLabels(labelString string) ([]string, error) {
labels := strings.Split(labelString, ",")
labelSet := make(map[string]bool, len(labels))
results := make([]string, 0, len(labels))
for _, label := range labels {
if label != "" {
labelSet[label] = true
}
}
for _, label := range fullIpvsBackendLabels {
if labelSet[label] {
results = append(results, label)
}
delete(labelSet, label)
}
if len(labelSet) > 0 {
keys := make([]string, 0, len(labelSet))
for label := range labelSet {
keys = append(keys, label)
}
sort.Strings(keys)
return nil, fmt.Errorf("unknown IPVS backend labels: %q", strings.Join(keys, ", "))
}
return results, nil
}

View file

@ -14,47 +14,131 @@
package collector
import (
"errors"
"fmt"
"github.com/go-kit/kit/log"
"io/ioutil"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/go-kit/kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"gopkg.in/alecthomas/kingpin.v2"
)
func TestIPVSCollector(t *testing.T) {
if _, err := kingpin.CommandLine.Parse([]string{"--path.procfs", "fixtures/proc"}); err != nil {
t.Fatal(err)
testcases := []struct {
labels string
expects []string
err error
}{
{
"<none>",
[]string{
prometheus.NewDesc("node_ipvs_connections_total", "The total number of connections made.", nil, nil).String(),
prometheus.NewDesc("node_ipvs_incoming_packets_total", "The total number of incoming packets.", nil, nil).String(),
prometheus.NewDesc("node_ipvs_outgoing_packets_total", "The total number of outgoing packets.", nil, nil).String(),
prometheus.NewDesc("node_ipvs_incoming_bytes_total", "The total amount of incoming data.", nil, nil).String(),
prometheus.NewDesc("node_ipvs_outgoing_bytes_total", "The total amount of outgoing data.", nil, nil).String(),
prometheus.NewDesc("node_ipvs_backend_connections_active", "The current active connections by local and remote address.", []string{"local_address", "local_port", "remote_address", "remote_port", "proto", "local_mark"}, nil).String(),
prometheus.NewDesc("node_ipvs_backend_connections_inactive", "The current inactive connections by local and remote address.", []string{"local_address", "local_port", "remote_address", "remote_port", "proto", "local_mark"}, nil).String(),
prometheus.NewDesc("node_ipvs_backend_weight", "The current backend weight by local and remote address.", []string{"local_address", "local_port", "remote_address", "remote_port", "proto", "local_mark"}, nil).String(),
},
nil,
},
{
"",
[]string{
prometheus.NewDesc("node_ipvs_connections_total", "The total number of connections made.", nil, nil).String(),
prometheus.NewDesc("node_ipvs_incoming_packets_total", "The total number of incoming packets.", nil, nil).String(),
prometheus.NewDesc("node_ipvs_outgoing_packets_total", "The total number of outgoing packets.", nil, nil).String(),
prometheus.NewDesc("node_ipvs_incoming_bytes_total", "The total amount of incoming data.", nil, nil).String(),
prometheus.NewDesc("node_ipvs_outgoing_bytes_total", "The total amount of outgoing data.", nil, nil).String(),
prometheus.NewDesc("node_ipvs_backend_connections_active", "The current active connections by local and remote address.", nil, nil).String(),
prometheus.NewDesc("node_ipvs_backend_connections_inactive", "The current inactive connections by local and remote address.", nil, nil).String(),
prometheus.NewDesc("node_ipvs_backend_weight", "The current backend weight by local and remote address.", nil, nil).String(),
},
nil,
},
{
"local_port",
[]string{
prometheus.NewDesc("node_ipvs_connections_total", "The total number of connections made.", nil, nil).String(),
prometheus.NewDesc("node_ipvs_incoming_packets_total", "The total number of incoming packets.", nil, nil).String(),
prometheus.NewDesc("node_ipvs_outgoing_packets_total", "The total number of outgoing packets.", nil, nil).String(),
prometheus.NewDesc("node_ipvs_incoming_bytes_total", "The total amount of incoming data.", nil, nil).String(),
prometheus.NewDesc("node_ipvs_outgoing_bytes_total", "The total amount of outgoing data.", nil, nil).String(),
prometheus.NewDesc("node_ipvs_backend_connections_active", "The current active connections by local and remote address.", []string{"local_port"}, nil).String(),
prometheus.NewDesc("node_ipvs_backend_connections_inactive", "The current inactive connections by local and remote address.", []string{"local_port"}, nil).String(),
prometheus.NewDesc("node_ipvs_backend_weight", "The current backend weight by local and remote address.", []string{"local_port"}, nil).String(),
},
nil,
},
{
"local_address,local_port",
[]string{
prometheus.NewDesc("node_ipvs_connections_total", "The total number of connections made.", nil, nil).String(),
prometheus.NewDesc("node_ipvs_incoming_packets_total", "The total number of incoming packets.", nil, nil).String(),
prometheus.NewDesc("node_ipvs_outgoing_packets_total", "The total number of outgoing packets.", nil, nil).String(),
prometheus.NewDesc("node_ipvs_incoming_bytes_total", "The total amount of incoming data.", nil, nil).String(),
prometheus.NewDesc("node_ipvs_outgoing_bytes_total", "The total amount of outgoing data.", nil, nil).String(),
prometheus.NewDesc("node_ipvs_backend_connections_active", "The current active connections by local and remote address.", []string{"local_address", "local_port"}, nil).String(),
prometheus.NewDesc("node_ipvs_backend_connections_inactive", "The current inactive connections by local and remote address.", []string{"local_address", "local_port"}, nil).String(),
prometheus.NewDesc("node_ipvs_backend_weight", "The current backend weight by local and remote address.", []string{"local_address", "local_port"}, nil).String(),
},
nil,
},
{
"invalid_label",
nil,
errors.New(`unknown IPVS backend labels: "invalid_label"`),
},
{
"invalid_label,bad_label",
nil,
errors.New(`unknown IPVS backend labels: "bad_label, invalid_label"`),
},
}
collector, err := newIPVSCollector(log.NewNopLogger())
if err != nil {
t.Fatal(err)
}
sink := make(chan prometheus.Metric)
go func() {
err = collector.Update(sink)
if err != nil {
panic(fmt.Sprintf("failed to update collector: %v", err))
}
}()
for expected, got := range map[string]string{
prometheus.NewDesc("node_ipvs_connections_total", "The total number of connections made.", nil, nil).String(): (<-sink).Desc().String(),
prometheus.NewDesc("node_ipvs_incoming_packets_total", "The total number of incoming packets.", nil, nil).String(): (<-sink).Desc().String(),
prometheus.NewDesc("node_ipvs_outgoing_packets_total", "The total number of outgoing packets.", nil, nil).String(): (<-sink).Desc().String(),
prometheus.NewDesc("node_ipvs_incoming_bytes_total", "The total amount of incoming data.", nil, nil).String(): (<-sink).Desc().String(),
prometheus.NewDesc("node_ipvs_outgoing_bytes_total", "The total amount of outgoing data.", nil, nil).String(): (<-sink).Desc().String(),
prometheus.NewDesc("node_ipvs_backend_connections_active", "The current active connections by local and remote address.", []string{"local_address", "local_port", "remote_address", "remote_port", "proto", "local_mark"}, nil).String(): (<-sink).Desc().String(),
prometheus.NewDesc("node_ipvs_backend_connections_inactive", "The current inactive connections by local and remote address.", []string{"local_address", "local_port", "remote_address", "remote_port", "proto", "local_mark"}, nil).String(): (<-sink).Desc().String(),
prometheus.NewDesc("node_ipvs_backend_weight", "The current backend weight by local and remote address.", []string{"local_address", "local_port", "remote_address", "remote_port", "proto", "local_mark"}, nil).String(): (<-sink).Desc().String(),
} {
if expected != got {
t.Fatalf("Expected '%s' but got '%s'", expected, got)
}
for _, test := range testcases {
t.Run(test.labels, func(t *testing.T) {
args := []string{"--path.procfs", "fixtures/proc"}
if test.labels != "<none>" {
args = append(args, "--collector.ipvs.backend-labels="+test.labels)
}
if _, err := kingpin.CommandLine.Parse(args); err != nil {
t.Fatal(err)
}
collector, err := newIPVSCollector(log.NewNopLogger())
if err != nil {
if test.err == nil {
t.Fatal(err)
}
if !strings.Contains(err.Error(), test.err.Error()) {
t.Fatalf("expect error: %v contains %v", err, test.err)
}
return
}
if test.err != nil {
t.Fatalf("expect error: %v but got no error", test.err)
}
sink := make(chan prometheus.Metric)
go func() {
err = collector.Update(sink)
if err != nil {
panic(fmt.Sprintf("failed to update collector: %v", err))
}
}()
for _, expected := range test.expects {
got := (<-sink).Desc().String()
if expected != got {
t.Fatalf("Expected '%s' but got '%s'", expected, got)
}
}
})
}
}
@ -77,44 +161,61 @@ func (c miniCollector) Describe(ch chan<- *prometheus.Desc) {
}
func TestIPVSCollectorResponse(t *testing.T) {
if _, err := kingpin.CommandLine.Parse([]string{"--path.procfs", "fixtures/proc"}); err != nil {
t.Fatal(err)
testcases := []struct {
labels string
metricsFile string
}{
{"<none>", "fixtures/ip_vs_result.txt"},
{"", "fixtures/ip_vs_result_lbs_none.txt"},
{"local_port", "fixtures/ip_vs_result_lbs_local_port.txt"},
{"local_address,local_port", "fixtures/ip_vs_result_lbs_local_address_local_port.txt"},
}
collector, err := NewIPVSCollector(log.NewNopLogger())
if err != nil {
t.Fatal(err)
}
prometheus.MustRegister(miniCollector{c: collector})
rw := httptest.NewRecorder()
promhttp.Handler().ServeHTTP(rw, &http.Request{})
metricsFile := "fixtures/ip_vs_result.txt"
wantMetrics, err := ioutil.ReadFile(metricsFile)
if err != nil {
t.Fatalf("unable to read input test file %s: %s", metricsFile, err)
}
wantLines := strings.Split(string(wantMetrics), "\n")
gotLines := strings.Split(string(rw.Body.String()), "\n")
gotLinesIdx := 0
// Until the Prometheus Go client library offers better testability
// (https://github.com/prometheus/client_golang/issues/58), we simply compare
// verbatim text-format metrics outputs, but ignore any lines we don't have
// in the fixture. Put differently, we are only testing that each line from
// the fixture is present, in the order given.
wantLoop:
for _, want := range wantLines {
for _, got := range gotLines[gotLinesIdx:] {
if want == got {
// this is a line we are interested in, and it is correct
continue wantLoop
} else {
gotLinesIdx++
for _, test := range testcases {
t.Run(test.labels, func(t *testing.T) {
args := []string{"--path.procfs", "fixtures/proc"}
if test.labels != "<none>" {
args = append(args, "--collector.ipvs.backend-labels="+test.labels)
}
}
// if this point is reached, the line we want was missing
t.Fatalf("Missing expected output line(s), first missing line is %s", want)
if _, err := kingpin.CommandLine.Parse(args); err != nil {
t.Fatal(err)
}
collector, err := NewIPVSCollector(log.NewNopLogger())
if err != nil {
t.Fatal(err)
}
registry := prometheus.NewRegistry()
registry.MustRegister(miniCollector{c: collector})
rw := httptest.NewRecorder()
promhttp.InstrumentMetricHandler(registry, promhttp.HandlerFor(registry, promhttp.HandlerOpts{})).ServeHTTP(rw, &http.Request{})
wantMetrics, err := ioutil.ReadFile(test.metricsFile)
if err != nil {
t.Fatalf("unable to read input test file %s: %s", test.metricsFile, err)
}
wantLines := strings.Split(string(wantMetrics), "\n")
gotLines := strings.Split(string(rw.Body.String()), "\n")
gotLinesIdx := 0
// Until the Prometheus Go client library offers better testability
// (https://github.com/prometheus/client_golang/issues/58), we simply compare
// verbatim text-format metrics outputs, but ignore any lines we don't have
// in the fixture. Put differently, we are only testing that each line from
// the fixture is present, in the order given.
wantLoop:
for _, want := range wantLines {
for _, got := range gotLines[gotLinesIdx:] {
if want == got {
// this is a line we are interested in, and it is correct
continue wantLoop
} else {
gotLinesIdx++
}
}
// if this point is reached, the line we want was missing
t.Fatalf("Missing expected output line(s), first missing line is %s", want)
}
})
}
}