mirror of
https://github.com/prometheus/prometheus.git
synced 2024-12-26 22:19:40 -08:00
Merge pull request #5254 from nevill/fix-4890
Change prometheus_sd_configs_failed_total to Gauge
This commit is contained in:
commit
52e0504f83
|
@ -41,10 +41,10 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
failedConfigs = prometheus.NewCounterVec(
|
failedConfigs = prometheus.NewGaugeVec(
|
||||||
prometheus.CounterOpts{
|
prometheus.GaugeOpts{
|
||||||
Name: "prometheus_sd_configs_failed_total",
|
Name: "prometheus_sd_failed_configs",
|
||||||
Help: "Total number of service discovery configurations that failed to load.",
|
Help: "Current number of service discovery configurations that failed to load.",
|
||||||
},
|
},
|
||||||
[]string{"name"},
|
[]string{"name"},
|
||||||
)
|
)
|
||||||
|
@ -194,10 +194,14 @@ func (m *Manager) ApplyConfig(cfg map[string]sd_config.ServiceDiscoveryConfig) e
|
||||||
m.targets = make(map[poolKey]map[string]*targetgroup.Group)
|
m.targets = make(map[poolKey]map[string]*targetgroup.Group)
|
||||||
m.providers = nil
|
m.providers = nil
|
||||||
m.discoverCancel = nil
|
m.discoverCancel = nil
|
||||||
|
|
||||||
|
failedCount := 0
|
||||||
for name, scfg := range cfg {
|
for name, scfg := range cfg {
|
||||||
m.registerProviders(scfg, name)
|
failedCount += m.registerProviders(scfg, name)
|
||||||
discoveredTargets.WithLabelValues(m.name, name).Set(0)
|
discoveredTargets.WithLabelValues(m.name, name).Set(0)
|
||||||
}
|
}
|
||||||
|
failedConfigs.WithLabelValues(m.name).Set(float64(failedCount))
|
||||||
|
|
||||||
for _, prov := range m.providers {
|
for _, prov := range m.providers {
|
||||||
m.startProvider(m.ctx, prov)
|
m.startProvider(m.ctx, prov)
|
||||||
}
|
}
|
||||||
|
@ -317,8 +321,12 @@ func (m *Manager) allGroups() map[string][]*targetgroup.Group {
|
||||||
return tSets
|
return tSets
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setName string) {
|
// registerProviders returns a number of failed SD config.
|
||||||
var added bool
|
func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setName string) int {
|
||||||
|
var (
|
||||||
|
failedCount int
|
||||||
|
added bool
|
||||||
|
)
|
||||||
add := func(cfg interface{}, newDiscoverer func() (Discoverer, error)) {
|
add := func(cfg interface{}, newDiscoverer func() (Discoverer, error)) {
|
||||||
t := reflect.TypeOf(cfg).String()
|
t := reflect.TypeOf(cfg).String()
|
||||||
for _, p := range m.providers {
|
for _, p := range m.providers {
|
||||||
|
@ -332,7 +340,7 @@ func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setNam
|
||||||
d, err := newDiscoverer()
|
d, err := newDiscoverer()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", t)
|
level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", t)
|
||||||
failedConfigs.WithLabelValues(m.name).Inc()
|
failedCount++
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -421,6 +429,7 @@ func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setNam
|
||||||
return &StaticProvider{TargetGroups: []*targetgroup.Group{{}}}, nil
|
return &StaticProvider{TargetGroups: []*targetgroup.Group{{}}}, nil
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
return failedCount
|
||||||
}
|
}
|
||||||
|
|
||||||
// StaticProvider holds a list of target groups that never change.
|
// StaticProvider holds a list of target groups that never change.
|
||||||
|
|
|
@ -25,6 +25,8 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/go-kit/kit/log"
|
"github.com/go-kit/kit/log"
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
dto "github.com/prometheus/client_model/go"
|
||||||
"github.com/prometheus/common/model"
|
"github.com/prometheus/common/model"
|
||||||
"github.com/prometheus/prometheus/config"
|
"github.com/prometheus/prometheus/config"
|
||||||
sd_config "github.com/prometheus/prometheus/discovery/config"
|
sd_config "github.com/prometheus/prometheus/discovery/config"
|
||||||
|
@ -949,6 +951,91 @@ scrape_configs:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGaugeFailedConfigs(t *testing.T) {
|
||||||
|
var (
|
||||||
|
fcGauge prometheus.Gauge
|
||||||
|
err error
|
||||||
|
)
|
||||||
|
|
||||||
|
cfgOneText := `
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: prometheus
|
||||||
|
consul_sd_configs:
|
||||||
|
- server: "foo:8500"
|
||||||
|
tls_config:
|
||||||
|
cert_file: "/tmp/non_existent"
|
||||||
|
- server: "bar:8500"
|
||||||
|
tls_config:
|
||||||
|
cert_file: "/tmp/non_existent"
|
||||||
|
- server: "foo2:8500"
|
||||||
|
tls_config:
|
||||||
|
cert_file: "/tmp/non_existent"
|
||||||
|
`
|
||||||
|
cfgOne := &config.Config{}
|
||||||
|
|
||||||
|
err = yaml.UnmarshalStrict([]byte(cfgOneText), cfgOne)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Unable to load YAML config cfgOne: %s", err)
|
||||||
|
}
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
discoveryManager := NewManager(ctx, log.NewNopLogger())
|
||||||
|
discoveryManager.updatert = 100 * time.Millisecond
|
||||||
|
go discoveryManager.Run()
|
||||||
|
|
||||||
|
c := make(map[string]sd_config.ServiceDiscoveryConfig)
|
||||||
|
for _, v := range cfgOne.ScrapeConfigs {
|
||||||
|
c[v.JobName] = v.ServiceDiscoveryConfig
|
||||||
|
}
|
||||||
|
|
||||||
|
discoveryManager.ApplyConfig(c)
|
||||||
|
<-discoveryManager.SyncCh()
|
||||||
|
|
||||||
|
metricOne := &dto.Metric{}
|
||||||
|
fcGauge, err = failedConfigs.GetMetricWithLabelValues(discoveryManager.name)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fcGauge.Write(metricOne)
|
||||||
|
|
||||||
|
failedCount := metricOne.GetGauge().GetValue()
|
||||||
|
if failedCount != 3 {
|
||||||
|
t.Fatalf("Expected to have 3 failed configs, got: %v", failedCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
cfgTwoText := `
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: 'prometheus'
|
||||||
|
static_configs:
|
||||||
|
- targets: ["foo:9090"]
|
||||||
|
`
|
||||||
|
cfgTwo := &config.Config{}
|
||||||
|
if err := yaml.UnmarshalStrict([]byte(cfgTwoText), cfgTwo); err != nil {
|
||||||
|
t.Fatalf("Unable to load YAML config cfgTwo: %s", err)
|
||||||
|
}
|
||||||
|
c = make(map[string]sd_config.ServiceDiscoveryConfig)
|
||||||
|
for _, v := range cfgTwo.ScrapeConfigs {
|
||||||
|
c[v.JobName] = v.ServiceDiscoveryConfig
|
||||||
|
}
|
||||||
|
|
||||||
|
discoveryManager.ApplyConfig(c)
|
||||||
|
<-discoveryManager.SyncCh()
|
||||||
|
|
||||||
|
metricTwo := &dto.Metric{}
|
||||||
|
fcGauge, err = failedConfigs.GetMetricWithLabelValues(discoveryManager.name)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
fcGauge.Write(metricTwo)
|
||||||
|
|
||||||
|
failedCount = metricTwo.GetGauge().GetValue()
|
||||||
|
if failedCount != 0 {
|
||||||
|
t.Fatalf("Expected to get no failed config, got: %v", failedCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
func TestCoordinationWithReceiver(t *testing.T) {
|
func TestCoordinationWithReceiver(t *testing.T) {
|
||||||
updateDelay := 100 * time.Millisecond
|
updateDelay := 100 * time.Millisecond
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue