diff --git a/discovery/azure/azure.go b/discovery/azure/azure.go index 23b3cb8c4d..675ff7c217 100644 --- a/discovery/azure/azure.go +++ b/discovery/azure/azure.go @@ -17,6 +17,7 @@ import ( "context" "errors" "fmt" + "math/rand" "net" "net/http" "strings" @@ -30,10 +31,13 @@ import ( "github.com/Azure/azure-sdk-for-go/sdk/azidentity" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v4" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v2" + cache "github.com/Code-Hex/go-generics-cache" + "github.com/Code-Hex/go-generics-cache/policy/lru" "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" config_util "github.com/prometheus/common/config" + "github.com/prometheus/common/model" "github.com/prometheus/common/version" @@ -80,6 +84,11 @@ var ( Name: "prometheus_sd_azure_failures_total", Help: "Number of Azure service discovery refresh failures.", }) + cacheHitCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_sd_azure_cache_hit_total", + Help: "Number of cache hit during refresh.", + }) ) var environments = map[string]cloud.Configuration{ @@ -105,6 +114,7 @@ func CloudConfigurationFromName(name string) (cloud.Configuration, error) { func init() { discovery.RegisterConfig(&SDConfig{}) prometheus.MustRegister(failuresCount) + prometheus.MustRegister(cacheHitCount) } // SDConfig is the configuration for Azure based service discovery. @@ -145,7 +155,6 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { if err != nil { return err } - if err = validateAuthParam(c.SubscriptionID, "subscription_id"); err != nil { return err } @@ -174,6 +183,7 @@ type Discovery struct { logger log.Logger cfg *SDConfig port int + cache *cache.Cache[string, *armnetwork.Interface] } // NewDiscovery returns a new AzureDiscovery which periodically refreshes its targets. @@ -181,17 +191,21 @@ func NewDiscovery(cfg *SDConfig, logger log.Logger) *Discovery { if logger == nil { logger = log.NewNopLogger() } + l := cache.New(cache.AsLRU[string, *armnetwork.Interface](lru.WithCapacity(5000))) d := &Discovery{ cfg: cfg, port: cfg.Port, logger: logger, + cache: l, } + d.Discovery = refresh.NewDiscovery( logger, "azure", time.Duration(cfg.RefreshInterval), d.refresh, ) + return d } @@ -385,15 +399,22 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { // Get the IP address information via separate call to the network provider. for _, nicID := range vm.NetworkInterfaces { - networkInterface, err := client.getNetworkInterfaceByID(ctx, nicID) - if err != nil { - if errors.Is(err, errorNotFound) { - level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err) - } else { - ch <- target{labelSet: nil, err: err} + var networkInterface *armnetwork.Interface + if v, ok := d.getFromCache(nicID); ok { + networkInterface = v + cacheHitCount.Add(1) + } else { + networkInterface, err = client.getNetworkInterfaceByID(ctx, nicID) + if err != nil { + if errors.Is(err, errorNotFound) { + level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err) + } else { + ch <- target{labelSet: nil, err: err} + } + // Get out of this routine because we cannot continue without a network interface. + return } - // Get out of this routine because we cannot continue without a network interface. - return + d.addToCache(nicID, networkInterface) } if networkInterface.Properties == nil { @@ -628,3 +649,19 @@ func (client *azureClient) getNetworkInterfaceByID(ctx context.Context, networkI return &resp.Interface, nil } + +// addToCache will add the network interface information for the specified nicID +func (d *Discovery) addToCache(nicID string, netInt *armnetwork.Interface) { + random := rand.Int63n(int64(time.Duration(d.cfg.RefreshInterval * 3).Seconds())) + rs := time.Duration(random) * time.Second + exptime := time.Duration(d.cfg.RefreshInterval*10) + rs + d.cache.Set(nicID, netInt, cache.WithExpiration(exptime)) + level.Debug(d.logger).Log("msg", "Adding nic", "nic", nicID, "time", exptime.Seconds()) +} + +// getFromCache will get the network Interface for the specified nicID +// If the cache is disabled nothing will happen +func (d *Discovery) getFromCache(nicID string) (*armnetwork.Interface, bool) { + net, found := d.cache.Get(nicID) + return net, found +} diff --git a/go.mod b/go.mod index 629dd11472..6cc2d02370 100644 --- a/go.mod +++ b/go.mod @@ -110,6 +110,7 @@ require ( ) require ( + github.com/Code-Hex/go-generics-cache v1.3.1 github.com/Microsoft/go-winio v0.6.1 // indirect github.com/armon/go-metrics v0.4.1 // indirect github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect diff --git a/go.sum b/go.sum index e8f2013a70..01804516bb 100644 --- a/go.sum +++ b/go.sum @@ -54,6 +54,8 @@ github.com/AzureAD/microsoft-authentication-library-for-go v1.1.1 h1:WpB/QDNLpMw github.com/AzureAD/microsoft-authentication-library-for-go v1.1.1/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/Code-Hex/go-generics-cache v1.3.1 h1:i8rLwyhoyhaerr7JpjtYjJZUcCbWOdiYO3fZXLiEC4g= +github.com/Code-Hex/go-generics-cache v1.3.1/go.mod h1:qxcC9kRVrct9rHeiYpFWSoW1vxyillCVzX13KZG8dl4= github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0= github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow=