From c40e269c3e514953299e9ba1f6265e067ab43e64 Mon Sep 17 00:00:00 2001 From: TJ Hoplock <33664289+tjhop@users.noreply.github.com> Date: Wed, 11 May 2022 10:54:08 -0400 Subject: [PATCH] feat: add linode SD failure count metric (#10673) This commit introduces a new metric to count the number of failed requests to Linode's API when using Linode SD. Resolves #10672, inspired by #10476. _Note_: this doens't count failures when polling the `/account/events` endpoint, as a `401` there is how we determine if the supplied token has the needed API scopes to do event polling vs full refreshes each interval. Signed-off-by: TJ Hoplock --- discovery/linode/linode.go | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/discovery/linode/linode.go b/discovery/linode/linode.go index a673b44c68..0fd0a2c370 100644 --- a/discovery/linode/linode.go +++ b/discovery/linode/linode.go @@ -25,6 +25,7 @@ import ( "github.com/go-kit/log" "github.com/linode/linodego" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/prometheus/common/version" @@ -65,15 +66,24 @@ const ( ) // DefaultSDConfig is the default Linode SD configuration. -var DefaultSDConfig = SDConfig{ - TagSeparator: ",", - Port: 80, - RefreshInterval: model.Duration(60 * time.Second), - HTTPClientConfig: config.DefaultHTTPClientConfig, -} +var ( + DefaultSDConfig = SDConfig{ + TagSeparator: ",", + Port: 80, + RefreshInterval: model.Duration(60 * time.Second), + HTTPClientConfig: config.DefaultHTTPClientConfig, + } + + failuresCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_sd_linode_failures_total", + Help: "Number of Linode service discovery refresh failures.", + }) +) func init() { discovery.RegisterConfig(&SDConfig{}) + prometheus.MustRegister(failuresCount) } // SDConfig is the configuration for Linode based service discovery. @@ -211,12 +221,14 @@ func (d *Discovery) refreshData(ctx context.Context) ([]*targetgroup.Group, erro // Gather all linode instances. instances, err := d.client.ListInstances(ctx, &linodego.ListOptions{PageSize: 500}) if err != nil { + failuresCount.Inc() return nil, err } // Gather detailed IP address info for all IPs on all linode instances. detailedIPs, err := d.client.ListIPAddresses(ctx, &linodego.ListOptions{PageSize: 500}) if err != nil { + failuresCount.Inc() return nil, err }