From 048f81218ddfc03bd33c9a7346bfd0ea72f498be Mon Sep 17 00:00:00 2001 From: Nevill Date: Fri, 22 Feb 2019 17:24:19 +0800 Subject: [PATCH 1/2] Change prometheus_sd_configs_failed_total to Gauge Signed-off-by: Nevill --- discovery/manager.go | 11 +++-- discovery/manager_test.go | 87 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 4 deletions(-) diff --git a/discovery/manager.go b/discovery/manager.go index 4625e42a3..00293ca66 100644 --- a/discovery/manager.go +++ b/discovery/manager.go @@ -41,10 +41,10 @@ import ( ) var ( - failedConfigs = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "prometheus_sd_configs_failed_total", - Help: "Total number of service discovery configurations that failed to load.", + failedConfigs = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "prometheus_sd_failed_configs", + Help: "Current number of service discovery configurations that failed to load.", }, []string{"name"}, ) @@ -194,6 +194,9 @@ func (m *Manager) ApplyConfig(cfg map[string]sd_config.ServiceDiscoveryConfig) e m.targets = make(map[poolKey]map[string]*targetgroup.Group) m.providers = nil m.discoverCancel = nil + + failedConfigs.WithLabelValues(m.name).Set(0) + for name, scfg := range cfg { m.registerProviders(scfg, name) discoveredTargets.WithLabelValues(m.name, name).Set(0) diff --git a/discovery/manager_test.go b/discovery/manager_test.go index b2bff1fc4..9e5b229fb 100644 --- a/discovery/manager_test.go +++ b/discovery/manager_test.go @@ -25,6 +25,8 @@ import ( "time" "github.com/go-kit/kit/log" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/config" sd_config "github.com/prometheus/prometheus/discovery/config" @@ -949,6 +951,91 @@ scrape_configs: } } +func TestGaugeFailedConfigs(t *testing.T) { + var ( + fcGauge prometheus.Gauge + err error + ) + + cfgOneText := ` +scrape_configs: +- job_name: prometheus + consul_sd_configs: + - server: "foo:8500" + tls_config: + cert_file: "/tmp/non_existent" + - server: "bar:8500" + tls_config: + cert_file: "/tmp/non_existent" + - server: "foo2:8500" + tls_config: + cert_file: "/tmp/non_existent" +` + cfgOne := &config.Config{} + + err = yaml.UnmarshalStrict([]byte(cfgOneText), cfgOne) + if err != nil { + t.Fatalf("Unable to load YAML config cfgOne: %s", err) + } + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + discoveryManager := NewManager(ctx, log.NewNopLogger()) + discoveryManager.updatert = 100 * time.Millisecond + go discoveryManager.Run() + + c := make(map[string]sd_config.ServiceDiscoveryConfig) + for _, v := range cfgOne.ScrapeConfigs { + c[v.JobName] = v.ServiceDiscoveryConfig + } + + discoveryManager.ApplyConfig(c) + <-discoveryManager.SyncCh() + + metricOne := &dto.Metric{} + fcGauge, err = failedConfigs.GetMetricWithLabelValues(discoveryManager.name) + if err != nil { + t.Fatal(err) + } + + fcGauge.Write(metricOne) + + failedCount := metricOne.GetGauge().GetValue() + if failedCount != 3 { + t.Fatalf("Expected to have 3 failed configs, got: %v", failedCount) + } + + cfgTwoText := ` +scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: ["foo:9090"] +` + cfgTwo := &config.Config{} + if err := yaml.UnmarshalStrict([]byte(cfgTwoText), cfgTwo); err != nil { + t.Fatalf("Unable to load YAML config cfgTwo: %s", err) + } + c = make(map[string]sd_config.ServiceDiscoveryConfig) + for _, v := range cfgTwo.ScrapeConfigs { + c[v.JobName] = v.ServiceDiscoveryConfig + } + + discoveryManager.ApplyConfig(c) + <-discoveryManager.SyncCh() + + metricTwo := &dto.Metric{} + fcGauge, err = failedConfigs.GetMetricWithLabelValues(discoveryManager.name) + if err != nil { + t.Fatal(err) + } + fcGauge.Write(metricTwo) + + failedCount = metricTwo.GetGauge().GetValue() + if failedCount != 0 { + t.Fatalf("Expected to get no failed config, got: %v", failedCount) + } + +} + func TestCoordinationWithReceiver(t *testing.T) { updateDelay := 100 * time.Millisecond From 55661ab004ca8e901c9bbd796816e5d4fde53d0e Mon Sep 17 00:00:00 2001 From: Nevill Date: Sat, 21 Sep 2019 12:01:57 +0800 Subject: [PATCH 2/2] Set failedConfigs only once right after registerProviders finished Signed-off-by: Nevill --- discovery/manager.go | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/discovery/manager.go b/discovery/manager.go index 00293ca66..5457bd9b2 100644 --- a/discovery/manager.go +++ b/discovery/manager.go @@ -195,12 +195,13 @@ func (m *Manager) ApplyConfig(cfg map[string]sd_config.ServiceDiscoveryConfig) e m.providers = nil m.discoverCancel = nil - failedConfigs.WithLabelValues(m.name).Set(0) - + failedCount := 0 for name, scfg := range cfg { - m.registerProviders(scfg, name) + failedCount += m.registerProviders(scfg, name) discoveredTargets.WithLabelValues(m.name, name).Set(0) } + failedConfigs.WithLabelValues(m.name).Set(float64(failedCount)) + for _, prov := range m.providers { m.startProvider(m.ctx, prov) } @@ -320,8 +321,12 @@ func (m *Manager) allGroups() map[string][]*targetgroup.Group { return tSets } -func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setName string) { - var added bool +// registerProviders returns a number of failed SD config. +func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setName string) int { + var ( + failedCount int + added bool + ) add := func(cfg interface{}, newDiscoverer func() (Discoverer, error)) { t := reflect.TypeOf(cfg).String() for _, p := range m.providers { @@ -335,7 +340,7 @@ func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setNam d, err := newDiscoverer() if err != nil { level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", t) - failedConfigs.WithLabelValues(m.name).Inc() + failedCount++ return } @@ -424,6 +429,7 @@ func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setNam return &StaticProvider{TargetGroups: []*targetgroup.Group{{}}}, nil }) } + return failedCount } // StaticProvider holds a list of target groups that never change.