optimize Linode SD by polling for event changes during refresh (#8980)

* optimize Linode SD by polling for event changes during refresh

Most accounts are fairly "static", in the sense that they're not cycling
through instances constantly. So rather than do a full refresh every
interval and potentially make several behind-the-scenes paginated API
calls, this will now poll the `/account/events/` endpoint every minute
with a list of events that we care about. If a matching event is found,
we then do a full refresh.

Co-authored-by: William Smith <wsmith@linode.com>
Signed-off-by: TJ Hoplock <t.hoplock@gmail.com>
Signed-off-by: William Smith <wsmith@linode.com>
This commit is contained in:
TJ Hoplock 2021-08-04 06:05:49 -04:00 committed by GitHub
parent 03bee3b5df
commit 7baf084092
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 104 additions and 7 deletions

View file

@ -15,6 +15,7 @@ package linode
import (
"context"
"errors"
"fmt"
"net"
"net/http"
@ -56,6 +57,11 @@ const (
linodeLabelSpecsVCPUs = linodeLabel + "specs_vcpus"
linodeLabelSpecsTransferBytes = linodeLabel + "specs_transfer_bytes"
linodeLabelExtraIPs = linodeLabel + "extra_ips"
// This is our events filter; when polling for changes, we care only about
// events since our last refresh.
// Docs: https://www.linode.com/docs/api/account/#events-list
filterTemplate = `{"created": {"+gte": "%s"}}`
)
// DefaultSDConfig is the default Linode SD configuration.
@ -107,16 +113,23 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
// the Discoverer interface.
type Discovery struct {
*refresh.Discovery
client *linodego.Client
port int
tagSeparator string
client *linodego.Client
port int
tagSeparator string
lastRefreshTimestamp time.Time
pollCount int
lastResults []*targetgroup.Group
eventPollingEnabled bool
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
d := &Discovery{
port: conf.Port,
tagSeparator: conf.TagSeparator,
port: conf.Port,
tagSeparator: conf.TagSeparator,
pollCount: 0,
lastRefreshTimestamp: time.Now().UTC(),
eventPollingEnabled: true,
}
rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "linode_sd", config.WithHTTP2Disabled())
@ -143,6 +156,50 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
}
func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
needsRefresh := true
ts := time.Now().UTC()
if d.lastResults != nil && d.eventPollingEnabled {
// Check to see if there have been any events. If so, refresh our data.
opts := linodego.NewListOptions(1, fmt.Sprintf(filterTemplate, d.lastRefreshTimestamp.Format("2006-01-02T15:04:05")))
events, err := d.client.ListEvents(ctx, opts)
if err != nil {
var e *linodego.Error
if errors.As(err, &e) && e.Code == http.StatusUnauthorized {
// If we get a 401, the token doesn't have `events:read_only` scope.
// Disable event polling and fallback to doing a full refresh every interval.
d.eventPollingEnabled = false
} else {
return nil, err
}
} else {
// Event polling tells us changes the Linode API is aware of. Actions issued outside of the Linode API,
// such as issuing a `shutdown` at the VM's console instead of using the API to power off an instance,
// can potentially cause us to return stale data. Just in case, trigger a full refresh after ~10 polling
// intervals of no events.
d.pollCount++
if len(events) == 0 && d.pollCount < 10 {
needsRefresh = false
}
}
}
if needsRefresh {
newData, err := d.refreshData(ctx)
if err != nil {
return nil, err
}
d.pollCount = 0
d.lastResults = newData
}
d.lastRefreshTimestamp = ts
return d.lastResults, nil
}
func (d *Discovery) refreshData(ctx context.Context) ([]*targetgroup.Group, error) {
tg := &targetgroup.Group{
Source: "Linode",
}

View file

@ -39,6 +39,7 @@ func (s *LinodeSDTestSuite) SetupTest(t *testing.T) {
s.Mock.HandleLinodeInstancesList()
s.Mock.HandleLinodeNeworkingIPs()
s.Mock.HandleLinodeAccountEvents()
}
func TestLinodeSDRefresh(t *testing.T) {

View file

@ -413,3 +413,42 @@ func (m *SDMock) HandleLinodeNeworkingIPs() {
)
})
}
// HandleLinodeAccountEvents mocks linode the account/events endpoint.
func (m *SDMock) HandleLinodeAccountEvents() {
m.Mux.HandleFunc("/v4/account/events", func(w http.ResponseWriter, r *http.Request) {
if r.Header.Get("Authorization") != fmt.Sprintf("Bearer %s", tokenID) {
w.WriteHeader(http.StatusUnauthorized)
return
}
if r.Header.Get("X-Filter") == "" {
// This should never happen; if the client sends an events request without
// a filter, cause it to fail. The error below is not a real response from
// the API, but should aid in debugging failed tests.
w.WriteHeader(http.StatusBadRequest)
fmt.Fprint(w, `
{
"errors": [
{
"reason": "Request missing expected X-Filter headers"
}
]
}`,
)
return
}
w.Header().Set("content-type", "application/json; charset=utf-8")
w.WriteHeader(http.StatusOK)
fmt.Fprint(w, `
{
"data": [],
"results": 0,
"pages": 1,
"page": 1
}`,
)
})
}

View file

@ -1706,7 +1706,7 @@ The following meta labels are available on targets during [relabeling](#relabel_
# Note that `basic_auth` and `authorization` options are
# mutually exclusive.
# password and password_file are mutually exclusive.
# Note: Linode APIv4 Token must be created with scopes: 'linodes:read_only' and 'ips:read_only'
# Note: Linode APIv4 Token must be created with scopes: 'linodes:read_only', 'ips:read_only', and 'events:read_only'
# Optional HTTP basic authentication information, not currently supported by Linode APIv4.
basic_auth:

View file

@ -11,7 +11,7 @@ scrape_configs:
- job_name: "node"
linode_sd_configs:
- authorization:
credentials: "<replace with a Personal Access Token with linodes:read_only + ips:read_only access>"
credentials: "<replace with a Personal Access Token with linodes:read_only, ips:read_only, and events:read_only access>"
relabel_configs:
# Only scrape targets that have a tag 'monitoring'.
- source_labels: [__meta_linode_tags]