Merge pull request #5254 from nevill/fix-4890

Change prometheus_sd_configs_failed_total to Gauge
This commit is contained in:
Björn Rabenstein 2019-09-24 12:10:40 +02:00 committed by GitHub
commit 52e0504f83
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 104 additions and 8 deletions

View file

@ -41,10 +41,10 @@ import (
) )
var ( var (
failedConfigs = prometheus.NewCounterVec( failedConfigs = prometheus.NewGaugeVec(
prometheus.CounterOpts{ prometheus.GaugeOpts{
Name: "prometheus_sd_configs_failed_total", Name: "prometheus_sd_failed_configs",
Help: "Total number of service discovery configurations that failed to load.", Help: "Current number of service discovery configurations that failed to load.",
}, },
[]string{"name"}, []string{"name"},
) )
@ -194,10 +194,14 @@ func (m *Manager) ApplyConfig(cfg map[string]sd_config.ServiceDiscoveryConfig) e
m.targets = make(map[poolKey]map[string]*targetgroup.Group) m.targets = make(map[poolKey]map[string]*targetgroup.Group)
m.providers = nil m.providers = nil
m.discoverCancel = nil m.discoverCancel = nil
failedCount := 0
for name, scfg := range cfg { for name, scfg := range cfg {
m.registerProviders(scfg, name) failedCount += m.registerProviders(scfg, name)
discoveredTargets.WithLabelValues(m.name, name).Set(0) discoveredTargets.WithLabelValues(m.name, name).Set(0)
} }
failedConfigs.WithLabelValues(m.name).Set(float64(failedCount))
for _, prov := range m.providers { for _, prov := range m.providers {
m.startProvider(m.ctx, prov) m.startProvider(m.ctx, prov)
} }
@ -317,8 +321,12 @@ func (m *Manager) allGroups() map[string][]*targetgroup.Group {
return tSets return tSets
} }
func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setName string) { // registerProviders returns a number of failed SD config.
var added bool func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setName string) int {
var (
failedCount int
added bool
)
add := func(cfg interface{}, newDiscoverer func() (Discoverer, error)) { add := func(cfg interface{}, newDiscoverer func() (Discoverer, error)) {
t := reflect.TypeOf(cfg).String() t := reflect.TypeOf(cfg).String()
for _, p := range m.providers { for _, p := range m.providers {
@ -332,7 +340,7 @@ func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setNam
d, err := newDiscoverer() d, err := newDiscoverer()
if err != nil { if err != nil {
level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", t) level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", t)
failedConfigs.WithLabelValues(m.name).Inc() failedCount++
return return
} }
@ -421,6 +429,7 @@ func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setNam
return &StaticProvider{TargetGroups: []*targetgroup.Group{{}}}, nil return &StaticProvider{TargetGroups: []*targetgroup.Group{{}}}, nil
}) })
} }
return failedCount
} }
// StaticProvider holds a list of target groups that never change. // StaticProvider holds a list of target groups that never change.

View file

@ -25,6 +25,8 @@ import (
"time" "time"
"github.com/go-kit/kit/log" "github.com/go-kit/kit/log"
"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/config"
sd_config "github.com/prometheus/prometheus/discovery/config" sd_config "github.com/prometheus/prometheus/discovery/config"
@ -949,6 +951,91 @@ scrape_configs:
} }
} }
func TestGaugeFailedConfigs(t *testing.T) {
var (
fcGauge prometheus.Gauge
err error
)
cfgOneText := `
scrape_configs:
- job_name: prometheus
consul_sd_configs:
- server: "foo:8500"
tls_config:
cert_file: "/tmp/non_existent"
- server: "bar:8500"
tls_config:
cert_file: "/tmp/non_existent"
- server: "foo2:8500"
tls_config:
cert_file: "/tmp/non_existent"
`
cfgOne := &config.Config{}
err = yaml.UnmarshalStrict([]byte(cfgOneText), cfgOne)
if err != nil {
t.Fatalf("Unable to load YAML config cfgOne: %s", err)
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
c := make(map[string]sd_config.ServiceDiscoveryConfig)
for _, v := range cfgOne.ScrapeConfigs {
c[v.JobName] = v.ServiceDiscoveryConfig
}
discoveryManager.ApplyConfig(c)
<-discoveryManager.SyncCh()
metricOne := &dto.Metric{}
fcGauge, err = failedConfigs.GetMetricWithLabelValues(discoveryManager.name)
if err != nil {
t.Fatal(err)
}
fcGauge.Write(metricOne)
failedCount := metricOne.GetGauge().GetValue()
if failedCount != 3 {
t.Fatalf("Expected to have 3 failed configs, got: %v", failedCount)
}
cfgTwoText := `
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ["foo:9090"]
`
cfgTwo := &config.Config{}
if err := yaml.UnmarshalStrict([]byte(cfgTwoText), cfgTwo); err != nil {
t.Fatalf("Unable to load YAML config cfgTwo: %s", err)
}
c = make(map[string]sd_config.ServiceDiscoveryConfig)
for _, v := range cfgTwo.ScrapeConfigs {
c[v.JobName] = v.ServiceDiscoveryConfig
}
discoveryManager.ApplyConfig(c)
<-discoveryManager.SyncCh()
metricTwo := &dto.Metric{}
fcGauge, err = failedConfigs.GetMetricWithLabelValues(discoveryManager.name)
if err != nil {
t.Fatal(err)
}
fcGauge.Write(metricTwo)
failedCount = metricTwo.GetGauge().GetValue()
if failedCount != 0 {
t.Fatalf("Expected to get no failed config, got: %v", failedCount)
}
}
func TestCoordinationWithReceiver(t *testing.T) { func TestCoordinationWithReceiver(t *testing.T) {
updateDelay := 100 * time.Millisecond updateDelay := 100 * time.Millisecond