Merge pull request #5254 from nevill/fix-4890

Change prometheus_sd_configs_failed_total to Gauge
This commit is contained in:
Björn Rabenstein 2019-09-24 12:10:40 +02:00 committed by GitHub
commit 52e0504f83
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 104 additions and 8 deletions

View file

@ -41,10 +41,10 @@ import (
)
var (
failedConfigs = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "prometheus_sd_configs_failed_total",
Help: "Total number of service discovery configurations that failed to load.",
failedConfigs = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "prometheus_sd_failed_configs",
Help: "Current number of service discovery configurations that failed to load.",
},
[]string{"name"},
)
@ -194,10 +194,14 @@ func (m *Manager) ApplyConfig(cfg map[string]sd_config.ServiceDiscoveryConfig) e
m.targets = make(map[poolKey]map[string]*targetgroup.Group)
m.providers = nil
m.discoverCancel = nil
failedCount := 0
for name, scfg := range cfg {
m.registerProviders(scfg, name)
failedCount += m.registerProviders(scfg, name)
discoveredTargets.WithLabelValues(m.name, name).Set(0)
}
failedConfigs.WithLabelValues(m.name).Set(float64(failedCount))
for _, prov := range m.providers {
m.startProvider(m.ctx, prov)
}
@ -317,8 +321,12 @@ func (m *Manager) allGroups() map[string][]*targetgroup.Group {
return tSets
}
func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setName string) {
var added bool
// registerProviders returns a number of failed SD config.
func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setName string) int {
var (
failedCount int
added bool
)
add := func(cfg interface{}, newDiscoverer func() (Discoverer, error)) {
t := reflect.TypeOf(cfg).String()
for _, p := range m.providers {
@ -332,7 +340,7 @@ func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setNam
d, err := newDiscoverer()
if err != nil {
level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", t)
failedConfigs.WithLabelValues(m.name).Inc()
failedCount++
return
}
@ -421,6 +429,7 @@ func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setNam
return &StaticProvider{TargetGroups: []*targetgroup.Group{{}}}, nil
})
}
return failedCount
}
// StaticProvider holds a list of target groups that never change.

View file

@ -25,6 +25,8 @@ import (
"time"
"github.com/go-kit/kit/log"
"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/config"
sd_config "github.com/prometheus/prometheus/discovery/config"
@ -949,6 +951,91 @@ scrape_configs:
}
}
func TestGaugeFailedConfigs(t *testing.T) {
var (
fcGauge prometheus.Gauge
err error
)
cfgOneText := `
scrape_configs:
- job_name: prometheus
consul_sd_configs:
- server: "foo:8500"
tls_config:
cert_file: "/tmp/non_existent"
- server: "bar:8500"
tls_config:
cert_file: "/tmp/non_existent"
- server: "foo2:8500"
tls_config:
cert_file: "/tmp/non_existent"
`
cfgOne := &config.Config{}
err = yaml.UnmarshalStrict([]byte(cfgOneText), cfgOne)
if err != nil {
t.Fatalf("Unable to load YAML config cfgOne: %s", err)
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
c := make(map[string]sd_config.ServiceDiscoveryConfig)
for _, v := range cfgOne.ScrapeConfigs {
c[v.JobName] = v.ServiceDiscoveryConfig
}
discoveryManager.ApplyConfig(c)
<-discoveryManager.SyncCh()
metricOne := &dto.Metric{}
fcGauge, err = failedConfigs.GetMetricWithLabelValues(discoveryManager.name)
if err != nil {
t.Fatal(err)
}
fcGauge.Write(metricOne)
failedCount := metricOne.GetGauge().GetValue()
if failedCount != 3 {
t.Fatalf("Expected to have 3 failed configs, got: %v", failedCount)
}
cfgTwoText := `
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ["foo:9090"]
`
cfgTwo := &config.Config{}
if err := yaml.UnmarshalStrict([]byte(cfgTwoText), cfgTwo); err != nil {
t.Fatalf("Unable to load YAML config cfgTwo: %s", err)
}
c = make(map[string]sd_config.ServiceDiscoveryConfig)
for _, v := range cfgTwo.ScrapeConfigs {
c[v.JobName] = v.ServiceDiscoveryConfig
}
discoveryManager.ApplyConfig(c)
<-discoveryManager.SyncCh()
metricTwo := &dto.Metric{}
fcGauge, err = failedConfigs.GetMetricWithLabelValues(discoveryManager.name)
if err != nil {
t.Fatal(err)
}
fcGauge.Write(metricTwo)
failedCount = metricTwo.GetGauge().GetValue()
if failedCount != 0 {
t.Fatalf("Expected to get no failed config, got: %v", failedCount)
}
}
func TestCoordinationWithReceiver(t *testing.T) {
updateDelay := 100 * time.Millisecond