From 7baf08409256d21cefba59968e469318e3f4fdb9 Mon Sep 17 00:00:00 2001 From: TJ Hoplock <33664289+tjhop@users.noreply.github.com> Date: Wed, 4 Aug 2021 06:05:49 -0400 Subject: [PATCH] optimize Linode SD by polling for event changes during refresh (#8980) * optimize Linode SD by polling for event changes during refresh Most accounts are fairly "static", in the sense that they're not cycling through instances constantly. So rather than do a full refresh every interval and potentially make several behind-the-scenes paginated API calls, this will now poll the `/account/events/` endpoint every minute with a list of events that we care about. If a matching event is found, we then do a full refresh. Co-authored-by: William Smith Signed-off-by: TJ Hoplock Signed-off-by: William Smith --- discovery/linode/linode.go | 67 ++++++++++++++++++-- discovery/linode/linode_test.go | 1 + discovery/linode/mock_test.go | 39 ++++++++++++ docs/configuration/configuration.md | 2 +- documentation/examples/prometheus-linode.yml | 2 +- 5 files changed, 104 insertions(+), 7 deletions(-) diff --git a/discovery/linode/linode.go b/discovery/linode/linode.go index 9c22eb684..21c856857 100644 --- a/discovery/linode/linode.go +++ b/discovery/linode/linode.go @@ -15,6 +15,7 @@ package linode import ( "context" + "errors" "fmt" "net" "net/http" @@ -56,6 +57,11 @@ const ( linodeLabelSpecsVCPUs = linodeLabel + "specs_vcpus" linodeLabelSpecsTransferBytes = linodeLabel + "specs_transfer_bytes" linodeLabelExtraIPs = linodeLabel + "extra_ips" + + // This is our events filter; when polling for changes, we care only about + // events since our last refresh. + // Docs: https://www.linode.com/docs/api/account/#events-list + filterTemplate = `{"created": {"+gte": "%s"}}` ) // DefaultSDConfig is the default Linode SD configuration. @@ -107,16 +113,23 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { // the Discoverer interface. type Discovery struct { *refresh.Discovery - client *linodego.Client - port int - tagSeparator string + client *linodego.Client + port int + tagSeparator string + lastRefreshTimestamp time.Time + pollCount int + lastResults []*targetgroup.Group + eventPollingEnabled bool } // NewDiscovery returns a new Discovery which periodically refreshes its targets. func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) { d := &Discovery{ - port: conf.Port, - tagSeparator: conf.TagSeparator, + port: conf.Port, + tagSeparator: conf.TagSeparator, + pollCount: 0, + lastRefreshTimestamp: time.Now().UTC(), + eventPollingEnabled: true, } rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "linode_sd", config.WithHTTP2Disabled()) @@ -143,6 +156,50 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) { } func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { + needsRefresh := true + ts := time.Now().UTC() + + if d.lastResults != nil && d.eventPollingEnabled { + // Check to see if there have been any events. If so, refresh our data. + opts := linodego.NewListOptions(1, fmt.Sprintf(filterTemplate, d.lastRefreshTimestamp.Format("2006-01-02T15:04:05"))) + events, err := d.client.ListEvents(ctx, opts) + if err != nil { + var e *linodego.Error + if errors.As(err, &e) && e.Code == http.StatusUnauthorized { + // If we get a 401, the token doesn't have `events:read_only` scope. + // Disable event polling and fallback to doing a full refresh every interval. + d.eventPollingEnabled = false + } else { + return nil, err + } + } else { + // Event polling tells us changes the Linode API is aware of. Actions issued outside of the Linode API, + // such as issuing a `shutdown` at the VM's console instead of using the API to power off an instance, + // can potentially cause us to return stale data. Just in case, trigger a full refresh after ~10 polling + // intervals of no events. + d.pollCount++ + + if len(events) == 0 && d.pollCount < 10 { + needsRefresh = false + } + } + } + + if needsRefresh { + newData, err := d.refreshData(ctx) + if err != nil { + return nil, err + } + d.pollCount = 0 + d.lastResults = newData + } + + d.lastRefreshTimestamp = ts + + return d.lastResults, nil +} + +func (d *Discovery) refreshData(ctx context.Context) ([]*targetgroup.Group, error) { tg := &targetgroup.Group{ Source: "Linode", } diff --git a/discovery/linode/linode_test.go b/discovery/linode/linode_test.go index d222afffd..a201458a7 100644 --- a/discovery/linode/linode_test.go +++ b/discovery/linode/linode_test.go @@ -39,6 +39,7 @@ func (s *LinodeSDTestSuite) SetupTest(t *testing.T) { s.Mock.HandleLinodeInstancesList() s.Mock.HandleLinodeNeworkingIPs() + s.Mock.HandleLinodeAccountEvents() } func TestLinodeSDRefresh(t *testing.T) { diff --git a/discovery/linode/mock_test.go b/discovery/linode/mock_test.go index 62b9f7782..ade726ed7 100644 --- a/discovery/linode/mock_test.go +++ b/discovery/linode/mock_test.go @@ -413,3 +413,42 @@ func (m *SDMock) HandleLinodeNeworkingIPs() { ) }) } + +// HandleLinodeAccountEvents mocks linode the account/events endpoint. +func (m *SDMock) HandleLinodeAccountEvents() { + m.Mux.HandleFunc("/v4/account/events", func(w http.ResponseWriter, r *http.Request) { + if r.Header.Get("Authorization") != fmt.Sprintf("Bearer %s", tokenID) { + w.WriteHeader(http.StatusUnauthorized) + return + } + + if r.Header.Get("X-Filter") == "" { + // This should never happen; if the client sends an events request without + // a filter, cause it to fail. The error below is not a real response from + // the API, but should aid in debugging failed tests. + w.WriteHeader(http.StatusBadRequest) + fmt.Fprint(w, ` +{ + "errors": [ + { + "reason": "Request missing expected X-Filter headers" + } + ] +}`, + ) + return + } + + w.Header().Set("content-type", "application/json; charset=utf-8") + w.WriteHeader(http.StatusOK) + + fmt.Fprint(w, ` +{ + "data": [], + "results": 0, + "pages": 1, + "page": 1 +}`, + ) + }) +} diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index 5f90ad435..36a22d14b 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -1706,7 +1706,7 @@ The following meta labels are available on targets during [relabeling](#relabel_ # Note that `basic_auth` and `authorization` options are # mutually exclusive. # password and password_file are mutually exclusive. -# Note: Linode APIv4 Token must be created with scopes: 'linodes:read_only' and 'ips:read_only' +# Note: Linode APIv4 Token must be created with scopes: 'linodes:read_only', 'ips:read_only', and 'events:read_only' # Optional HTTP basic authentication information, not currently supported by Linode APIv4. basic_auth: diff --git a/documentation/examples/prometheus-linode.yml b/documentation/examples/prometheus-linode.yml index 315675071..a19b2c3a8 100644 --- a/documentation/examples/prometheus-linode.yml +++ b/documentation/examples/prometheus-linode.yml @@ -11,7 +11,7 @@ scrape_configs: - job_name: "node" linode_sd_configs: - authorization: - credentials: "" + credentials: "" relabel_configs: # Only scrape targets that have a tag 'monitoring'. - source_labels: [__meta_linode_tags]