mirror of
https://github.com/prometheus/prometheus.git
synced 2024-12-25 05:34:05 -08:00
Added a failure counter to the HTTP service discovery (#10372)
* Added a failure counter to the http service discovery Signed-off-by: David N Perkins <David.N.Perkins@ibm.com>
This commit is contained in:
parent
025528a5d6
commit
097b359b41
|
@ -28,6 +28,7 @@ import (
|
||||||
"github.com/go-kit/log"
|
"github.com/go-kit/log"
|
||||||
"github.com/grafana/regexp"
|
"github.com/grafana/regexp"
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"github.com/prometheus/common/config"
|
"github.com/prometheus/common/config"
|
||||||
"github.com/prometheus/common/model"
|
"github.com/prometheus/common/model"
|
||||||
"github.com/prometheus/common/version"
|
"github.com/prometheus/common/version"
|
||||||
|
@ -45,10 +46,17 @@ var (
|
||||||
}
|
}
|
||||||
userAgent = fmt.Sprintf("Prometheus/%s", version.Version)
|
userAgent = fmt.Sprintf("Prometheus/%s", version.Version)
|
||||||
matchContentType = regexp.MustCompile(`^(?i:application\/json(;\s*charset=("utf-8"|utf-8))?)$`)
|
matchContentType = regexp.MustCompile(`^(?i:application\/json(;\s*charset=("utf-8"|utf-8))?)$`)
|
||||||
|
|
||||||
|
failuresCount = prometheus.NewCounter(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Name: "prometheus_sd_http_failures_total",
|
||||||
|
Help: "Number of HTTP service discovery refresh failures.",
|
||||||
|
})
|
||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
discovery.RegisterConfig(&SDConfig{})
|
discovery.RegisterConfig(&SDConfig{})
|
||||||
|
prometheus.MustRegister(failuresCount)
|
||||||
}
|
}
|
||||||
|
|
||||||
// SDConfig is the configuration for HTTP based discovery.
|
// SDConfig is the configuration for HTTP based discovery.
|
||||||
|
@ -145,6 +153,7 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
|
||||||
|
|
||||||
resp, err := d.client.Do(req.WithContext(ctx))
|
resp, err := d.client.Do(req.WithContext(ctx))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
failuresCount.Inc()
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
defer func() {
|
defer func() {
|
||||||
|
@ -153,26 +162,31 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
|
||||||
}()
|
}()
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
failuresCount.Inc()
|
||||||
return nil, errors.Errorf("server returned HTTP status %s", resp.Status)
|
return nil, errors.Errorf("server returned HTTP status %s", resp.Status)
|
||||||
}
|
}
|
||||||
|
|
||||||
if !matchContentType.MatchString(strings.TrimSpace(resp.Header.Get("Content-Type"))) {
|
if !matchContentType.MatchString(strings.TrimSpace(resp.Header.Get("Content-Type"))) {
|
||||||
|
failuresCount.Inc()
|
||||||
return nil, errors.Errorf("unsupported content type %q", resp.Header.Get("Content-Type"))
|
return nil, errors.Errorf("unsupported content type %q", resp.Header.Get("Content-Type"))
|
||||||
}
|
}
|
||||||
|
|
||||||
b, err := ioutil.ReadAll(resp.Body)
|
b, err := ioutil.ReadAll(resp.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
failuresCount.Inc()
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
var targetGroups []*targetgroup.Group
|
var targetGroups []*targetgroup.Group
|
||||||
|
|
||||||
if err := json.Unmarshal(b, &targetGroups); err != nil {
|
if err := json.Unmarshal(b, &targetGroups); err != nil {
|
||||||
|
failuresCount.Inc()
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
for i, tg := range targetGroups {
|
for i, tg := range targetGroups {
|
||||||
if tg == nil {
|
if tg == nil {
|
||||||
|
failuresCount.Inc()
|
||||||
err = errors.New("nil target group item found")
|
err = errors.New("nil target group item found")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,8 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/go-kit/log"
|
"github.com/go-kit/log"
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
dto "github.com/prometheus/client_model/go"
|
||||||
"github.com/prometheus/common/config"
|
"github.com/prometheus/common/config"
|
||||||
"github.com/prometheus/common/model"
|
"github.com/prometheus/common/model"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
@ -61,6 +63,7 @@ func TestHTTPValidRefresh(t *testing.T) {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
require.Equal(t, tgs, expectedTargets)
|
require.Equal(t, tgs, expectedTargets)
|
||||||
|
require.Equal(t, 0.0, getFailureCount())
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestHTTPInvalidCode(t *testing.T) {
|
func TestHTTPInvalidCode(t *testing.T) {
|
||||||
|
@ -82,6 +85,7 @@ func TestHTTPInvalidCode(t *testing.T) {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
_, err = d.refresh(ctx)
|
_, err = d.refresh(ctx)
|
||||||
require.EqualError(t, err, "server returned HTTP status 400 Bad Request")
|
require.EqualError(t, err, "server returned HTTP status 400 Bad Request")
|
||||||
|
require.Equal(t, 1.0, getFailureCount())
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestHTTPInvalidFormat(t *testing.T) {
|
func TestHTTPInvalidFormat(t *testing.T) {
|
||||||
|
@ -103,6 +107,32 @@ func TestHTTPInvalidFormat(t *testing.T) {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
_, err = d.refresh(ctx)
|
_, err = d.refresh(ctx)
|
||||||
require.EqualError(t, err, `unsupported content type "text/plain; charset=utf-8"`)
|
require.EqualError(t, err, `unsupported content type "text/plain; charset=utf-8"`)
|
||||||
|
require.Equal(t, 1.0, getFailureCount())
|
||||||
|
}
|
||||||
|
|
||||||
|
var lastFailureCount float64
|
||||||
|
|
||||||
|
func getFailureCount() float64 {
|
||||||
|
failureChan := make(chan prometheus.Metric)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
failuresCount.Collect(failureChan)
|
||||||
|
close(failureChan)
|
||||||
|
}()
|
||||||
|
|
||||||
|
var counter dto.Metric
|
||||||
|
for {
|
||||||
|
metric, ok := <-failureChan
|
||||||
|
if ok == false {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
metric.Write(&counter)
|
||||||
|
}
|
||||||
|
|
||||||
|
// account for failures in prior tests
|
||||||
|
count := *counter.Counter.Value - lastFailureCount
|
||||||
|
lastFailureCount = *counter.Counter.Value
|
||||||
|
return count
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestContentTypeRegex(t *testing.T) {
|
func TestContentTypeRegex(t *testing.T) {
|
||||||
|
|
|
@ -1448,8 +1448,9 @@ Example response body:
|
||||||
]
|
]
|
||||||
```
|
```
|
||||||
|
|
||||||
The endpoint is queried periodically at the specified
|
The endpoint is queried periodically at the specified refresh interval.
|
||||||
refresh interval.
|
The `prometheus_sd_http_failures_total` counter metric tracks the number of
|
||||||
|
refresh failures.
|
||||||
|
|
||||||
Each target has a meta label `__meta_url` during the
|
Each target has a meta label `__meta_url` during the
|
||||||
[relabeling phase](#relabel_config). Its value is set to the
|
[relabeling phase](#relabel_config). Its value is set to the
|
||||||
|
|
|
@ -40,7 +40,8 @@ an empty list `[]`. Target lists are unordered.
|
||||||
|
|
||||||
Prometheus caches target lists. If an error occurs while fetching an updated
|
Prometheus caches target lists. If an error occurs while fetching an updated
|
||||||
targets list, Prometheus keeps using the current targets list. The targets list
|
targets list, Prometheus keeps using the current targets list. The targets list
|
||||||
is not saved across restart.
|
is not saved across restart. The `prometheus_sd_http_failures_total` counter
|
||||||
|
metric tracks the number of refresh failures.
|
||||||
|
|
||||||
The whole list of targets must be returned on every scrape. There is no support
|
The whole list of targets must be returned on every scrape. There is no support
|
||||||
for incremental updates. A Prometheus instance does not send its hostname and it
|
for incremental updates. A Prometheus instance does not send its hostname and it
|
||||||
|
|
Loading…
Reference in a new issue