mirror of
https://github.com/prometheus/prometheus.git
synced 2025-02-21 03:16:00 -08:00
Add Consul-SD metrics (#2097)
* Add Consul-SD metrics * Remove unnecessary metric and add labels to summary. * Do not stutter
This commit is contained in:
parent
5273b55a0c
commit
0c69227616
|
@ -21,6 +21,7 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
consul "github.com/hashicorp/consul/api"
|
consul "github.com/hashicorp/consul/api"
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"github.com/prometheus/common/log"
|
"github.com/prometheus/common/log"
|
||||||
"github.com/prometheus/common/model"
|
"github.com/prometheus/common/model"
|
||||||
"golang.org/x/net/context"
|
"golang.org/x/net/context"
|
||||||
|
@ -48,8 +49,37 @@ const (
|
||||||
datacenterLabel = model.MetaLabelPrefix + "consul_dc"
|
datacenterLabel = model.MetaLabelPrefix + "consul_dc"
|
||||||
// serviceIDLabel is the name of the label containing the service ID.
|
// serviceIDLabel is the name of the label containing the service ID.
|
||||||
serviceIDLabel = model.MetaLabelPrefix + "consul_service_id"
|
serviceIDLabel = model.MetaLabelPrefix + "consul_service_id"
|
||||||
|
|
||||||
|
// Constants for instrumentation.
|
||||||
|
namespace = "prometheus"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
rpcFailuresCount = prometheus.NewCounter(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Namespace: namespace,
|
||||||
|
Name: "sd_consul_rpc_failures_total",
|
||||||
|
Help: "The number of Consul RPC call failures.",
|
||||||
|
})
|
||||||
|
rpcDuration = prometheus.NewSummaryVec(
|
||||||
|
prometheus.SummaryOpts{
|
||||||
|
Namespace: namespace,
|
||||||
|
Name: "sd_consul_rpc_duration_seconds",
|
||||||
|
Help: "The duration of a Consul RPC call in seconds.",
|
||||||
|
},
|
||||||
|
[]string{"endpoint", "call"},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
prometheus.MustRegister(rpcFailuresCount)
|
||||||
|
prometheus.MustRegister(rpcDuration)
|
||||||
|
|
||||||
|
// Initialize metric vectors.
|
||||||
|
rpcDuration.WithLabelValues("catalog", "service")
|
||||||
|
rpcDuration.WithLabelValues("catalog", "services")
|
||||||
|
}
|
||||||
|
|
||||||
// Discovery retrieves target information from a Consul server
|
// Discovery retrieves target information from a Consul server
|
||||||
// and updates them via watches.
|
// and updates them via watches.
|
||||||
type Discovery struct {
|
type Discovery struct {
|
||||||
|
@ -110,10 +140,12 @@ func (cd *Discovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup) {
|
||||||
var lastIndex uint64
|
var lastIndex uint64
|
||||||
for {
|
for {
|
||||||
catalog := cd.client.Catalog()
|
catalog := cd.client.Catalog()
|
||||||
|
t0 := time.Now()
|
||||||
srvs, meta, err := catalog.Services(&consul.QueryOptions{
|
srvs, meta, err := catalog.Services(&consul.QueryOptions{
|
||||||
WaitIndex: lastIndex,
|
WaitIndex: lastIndex,
|
||||||
WaitTime: watchTimeout,
|
WaitTime: watchTimeout,
|
||||||
})
|
})
|
||||||
|
rpcDuration.WithLabelValues("catalog", "services").Observe(time.Since(t0).Seconds())
|
||||||
|
|
||||||
// We have to check the context at least once. The checks during channel sends
|
// We have to check the context at least once. The checks during channel sends
|
||||||
// do not guarantee that.
|
// do not guarantee that.
|
||||||
|
@ -125,6 +157,7 @@ func (cd *Discovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup) {
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Error refreshing service list: %s", err)
|
log.Errorf("Error refreshing service list: %s", err)
|
||||||
|
rpcFailuresCount.Inc()
|
||||||
time.Sleep(retryInterval)
|
time.Sleep(retryInterval)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -202,10 +235,13 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*config.TargetG
|
||||||
|
|
||||||
lastIndex := uint64(0)
|
lastIndex := uint64(0)
|
||||||
for {
|
for {
|
||||||
|
t0 := time.Now()
|
||||||
nodes, meta, err := catalog.Service(srv.name, "", &consul.QueryOptions{
|
nodes, meta, err := catalog.Service(srv.name, "", &consul.QueryOptions{
|
||||||
WaitIndex: lastIndex,
|
WaitIndex: lastIndex,
|
||||||
WaitTime: watchTimeout,
|
WaitTime: watchTimeout,
|
||||||
})
|
})
|
||||||
|
rpcDuration.WithLabelValues("catalog", "service").Observe(time.Since(t0).Seconds())
|
||||||
|
|
||||||
// Check the context before potentially falling in a continue-loop.
|
// Check the context before potentially falling in a continue-loop.
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
|
@ -216,6 +252,7 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*config.TargetG
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Error refreshing service %s: %s", srv.name, err)
|
log.Errorf("Error refreshing service %s: %s", srv.name, err)
|
||||||
|
rpcFailuresCount.Inc()
|
||||||
time.Sleep(retryInterval)
|
time.Sleep(retryInterval)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue