mirror of
https://github.com/prometheus/prometheus.git
synced 2025-01-12 22:37:27 -08:00
Add support for Azure discovery
This change adds the ability to do target discovery with Microsoft's Azure platform.
This commit is contained in:
parent
3683eaf205
commit
0988e3b937
|
@ -31,7 +31,7 @@ var (
|
|||
patJobName = regexp.MustCompile(`^[a-zA-Z_][a-zA-Z0-9_-]*$`)
|
||||
patFileSDName = regexp.MustCompile(`^[^*]*(\*[^/]*)?\.(json|yml|yaml|JSON|YML|YAML)$`)
|
||||
patRulePath = regexp.MustCompile(`^[^*]*(\*[^/]*)?$`)
|
||||
patAuthLine = regexp.MustCompile(`((?:password|bearer_token|secret_key):\s+)(".+"|'.+'|[^\s]+)`)
|
||||
patAuthLine = regexp.MustCompile(`((?:password|bearer_token|secret_key|client_secret):\s+)(".+"|'.+'|[^\s]+)`)
|
||||
)
|
||||
|
||||
// Load parses the YAML input s into a Config.
|
||||
|
@ -139,6 +139,12 @@ var (
|
|||
Port: 80,
|
||||
RefreshInterval: model.Duration(60 * time.Second),
|
||||
}
|
||||
|
||||
// DefaultAzureSDConfig is the default Azure SD configuration.
|
||||
DefaultAzureSDConfig = AzureSDConfig{
|
||||
Port: 80,
|
||||
RefreshInterval: model.Duration(5 * time.Minute),
|
||||
}
|
||||
)
|
||||
|
||||
// URL is a custom URL type that allows validation at configuration load time.
|
||||
|
@ -409,6 +415,8 @@ type ScrapeConfig struct {
|
|||
KubernetesSDConfigs []*KubernetesSDConfig `yaml:"kubernetes_sd_configs,omitempty"`
|
||||
// List of EC2 service discovery configurations.
|
||||
EC2SDConfigs []*EC2SDConfig `yaml:"ec2_sd_configs,omitempty"`
|
||||
// List of Azure service discovery configurations.
|
||||
AzureSDConfigs []*AzureSDConfig `yaml:"azure_sd_configs,omitempty"`
|
||||
|
||||
// List of target relabel configurations.
|
||||
RelabelConfigs []*RelabelConfig `yaml:"relabel_configs,omitempty"`
|
||||
|
@ -801,6 +809,30 @@ func (c *EC2SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||
return checkOverflow(c.XXX, "ec2_sd_config")
|
||||
}
|
||||
|
||||
// AzureSDConfig is the configuration for Azure based service discovery.
|
||||
type AzureSDConfig struct {
|
||||
Port int `yaml:"port"`
|
||||
SubscriptionID string `yaml:"subscription_id"`
|
||||
TenantID string `yaml:"tenant_id,omitempty"`
|
||||
ClientID string `yaml:"client_id,omitempty"`
|
||||
ClientSecret string `yaml:"client_secret,omitempty"`
|
||||
RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"`
|
||||
// Catches all undefined fields and must be empty after parsing.
|
||||
XXX map[string]interface{} `yaml:",inline"`
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
func (c *AzureSDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
*c = DefaultAzureSDConfig
|
||||
type plain AzureSDConfig
|
||||
err := unmarshal((*plain)(c))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return checkOverflow(c.XXX, "azure_sd_config")
|
||||
}
|
||||
|
||||
// RelabelAction is the action to be performed on relabeling.
|
||||
type RelabelAction string
|
||||
|
||||
|
|
|
@ -270,6 +270,26 @@ var expectedConf = &Config{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
JobName: "service-azure",
|
||||
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
|
||||
MetricsPath: DefaultScrapeConfig.MetricsPath,
|
||||
Scheme: DefaultScrapeConfig.Scheme,
|
||||
|
||||
AzureSDConfigs: []*AzureSDConfig{
|
||||
{
|
||||
SubscriptionID: "11AAAA11-A11A-111A-A111-1111A1111A11",
|
||||
TenantID: "BBBB222B-B2B2-2B22-B222-2BB2222BB2B2",
|
||||
ClientID: "333333CC-3C33-3333-CCC3-33C3CCCCC33C",
|
||||
ClientSecret: "nAdvAK2oBuVym4IXix",
|
||||
RefreshInterval: model.Duration(5 * time.Minute),
|
||||
Port: 9100,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
JobName: "service-nerve",
|
||||
|
||||
|
|
8
config/testdata/conf.good.yml
vendored
8
config/testdata/conf.good.yml
vendored
|
@ -127,6 +127,14 @@ scrape_configs:
|
|||
access_key: access
|
||||
secret_key: secret
|
||||
|
||||
- job_name: service-azure
|
||||
azure_sd_configs:
|
||||
- subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11
|
||||
tenant_id: BBBB222B-B2B2-2B22-B222-2BB2222BB2B2
|
||||
client_id: 333333CC-3C33-3333-CCC3-33C3CCCCC33C
|
||||
client_secret: nAdvAK2oBuVym4IXix
|
||||
port: 9100
|
||||
|
||||
- job_name: service-nerve
|
||||
nerve_sd_configs:
|
||||
- servers:
|
||||
|
|
248
retrieval/discovery/azure.go
Normal file
248
retrieval/discovery/azure.go
Normal file
|
@ -0,0 +1,248 @@
|
|||
// Copyright 2015 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package discovery
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/Azure/azure-sdk-for-go/arm/compute"
|
||||
"github.com/Azure/azure-sdk-for-go/arm/network"
|
||||
"github.com/Azure/go-autorest/autorest/azure"
|
||||
|
||||
"github.com/prometheus/common/log"
|
||||
"github.com/prometheus/common/model"
|
||||
"golang.org/x/net/context"
|
||||
|
||||
"github.com/prometheus/prometheus/config"
|
||||
"github.com/prometheus/prometheus/util/strutil"
|
||||
)
|
||||
|
||||
const (
|
||||
azureLabel = model.MetaLabelPrefix + "azure_"
|
||||
azureLabelMachineID = azureLabel + "machine_id"
|
||||
azureLabelMachineResourceGroup = azureLabel + "machine_resource_group"
|
||||
azureLabelMachineName = azureLabel + "machine_name"
|
||||
azureLabelMachineLocation = azureLabel + "machine_location"
|
||||
azureLabelMachinePrivateIP = azureLabel + "machine_private_ip"
|
||||
azureLabelMachineTag = azureLabel + "machine_tag_"
|
||||
)
|
||||
|
||||
// AzureDiscovery periodically performs Azure-SD requests. It implements
|
||||
// the TargetProvider interface.
|
||||
type AzureDiscovery struct {
|
||||
cfg *config.AzureSDConfig
|
||||
interval time.Duration
|
||||
port int
|
||||
}
|
||||
|
||||
// NewAzureDiscovery returns a new AzureDiscovery which periodically refreshes its targets.
|
||||
func NewAzureDiscovery(cfg *config.AzureSDConfig) *AzureDiscovery {
|
||||
return &AzureDiscovery{
|
||||
cfg: cfg,
|
||||
interval: time.Duration(cfg.RefreshInterval),
|
||||
port: cfg.Port,
|
||||
}
|
||||
}
|
||||
|
||||
// Run implements the TargetProvider interface.
|
||||
func (ad *AzureDiscovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup) {
|
||||
defer close(ch)
|
||||
ticker := time.NewTicker(ad.interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
tg, err := ad.refresh()
|
||||
if err != nil {
|
||||
log.Errorf("unable to refresh during Azure discovery: %s", err)
|
||||
} else {
|
||||
ch <- []*config.TargetGroup{tg}
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ticker.C:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// azureClient represents multiple Azure Resource Manager providers.
|
||||
type azureClient struct {
|
||||
nic network.InterfacesClient
|
||||
vm compute.VirtualMachinesClient
|
||||
}
|
||||
|
||||
// createAzureClient is a helper function for creating an Azure compute client to ARM.
|
||||
func createAzureClient(cfg config.AzureSDConfig) (azureClient, error) {
|
||||
var c azureClient
|
||||
oauthConfig, err := azure.PublicCloud.OAuthConfigForTenant(cfg.TenantID)
|
||||
if err != nil {
|
||||
return azureClient{}, err
|
||||
}
|
||||
spt, err := azure.NewServicePrincipalToken(*oauthConfig, cfg.ClientID, cfg.ClientSecret, azure.PublicCloud.ResourceManagerEndpoint)
|
||||
if err != nil {
|
||||
return azureClient{}, err
|
||||
}
|
||||
|
||||
c.vm = compute.NewVirtualMachinesClient(cfg.SubscriptionID)
|
||||
c.vm.Authorizer = spt
|
||||
|
||||
c.nic = network.NewInterfacesClient(cfg.SubscriptionID)
|
||||
c.nic.Authorizer = spt
|
||||
|
||||
return c, nil
|
||||
}
|
||||
|
||||
// azureResource represents a resource identifier in Azure.
|
||||
type azureResource struct {
|
||||
Name string
|
||||
ResourceGroup string
|
||||
}
|
||||
|
||||
// Create a new azureResource object from an ID string.
|
||||
func newAzureResourceFromID(id string) (azureResource, error) {
|
||||
// Resource IDs have the following format.
|
||||
// /subscriptions/SUBSCRIPTION_ID/resourceGroups/RESOURCE_GROUP/providers/PROVIDER/TYPE/NAME
|
||||
s := strings.Split(id, "/")
|
||||
if len(s) != 9 {
|
||||
err := fmt.Errorf("invalid ID '%s'. Refusing to create azureResource", id)
|
||||
log.Error(err)
|
||||
return azureResource{}, err
|
||||
}
|
||||
return azureResource{
|
||||
Name: strings.ToLower(s[8]),
|
||||
ResourceGroup: strings.ToLower(s[4]),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (ad *AzureDiscovery) refresh() (*config.TargetGroup, error) {
|
||||
tg := &config.TargetGroup{}
|
||||
client, err := createAzureClient(*ad.cfg)
|
||||
if err != nil {
|
||||
return tg, fmt.Errorf("could not create Azure client: %s", err)
|
||||
}
|
||||
|
||||
var machines []compute.VirtualMachine
|
||||
result, err := client.vm.ListAll()
|
||||
if err != nil {
|
||||
return tg, fmt.Errorf("could not list virtual machines: %s", err)
|
||||
}
|
||||
machines = append(machines, *result.Value...)
|
||||
|
||||
// If we still have results, keep going until we have no more.
|
||||
for result.NextLink != nil {
|
||||
result, err = client.vm.ListAllNextResults(result)
|
||||
if err != nil {
|
||||
return tg, fmt.Errorf("could not list virtual machines: %s", err)
|
||||
}
|
||||
machines = append(machines, *result.Value...)
|
||||
}
|
||||
log.Debugf("Found %d virtual machines during Azure discovery.", len(machines))
|
||||
|
||||
// We have the slice of machines. Now turn them into targets.
|
||||
// Doing them in go routines because the network interface calls are slow.
|
||||
type target struct {
|
||||
labelSet model.LabelSet
|
||||
err error
|
||||
}
|
||||
|
||||
ch := make(chan target, len(machines))
|
||||
for i, vm := range machines {
|
||||
go func(i int, vm compute.VirtualMachine) {
|
||||
r, err := newAzureResourceFromID(*vm.ID)
|
||||
if err != nil {
|
||||
ch <- target{labelSet: nil, err: err}
|
||||
return
|
||||
}
|
||||
|
||||
labels := model.LabelSet{
|
||||
azureLabelMachineID: model.LabelValue(*vm.ID),
|
||||
azureLabelMachineName: model.LabelValue(*vm.Name),
|
||||
azureLabelMachineLocation: model.LabelValue(*vm.Location),
|
||||
azureLabelMachineResourceGroup: model.LabelValue(r.ResourceGroup),
|
||||
}
|
||||
|
||||
if vm.Tags != nil {
|
||||
for k, v := range *vm.Tags {
|
||||
name := strutil.SanitizeLabelName(k)
|
||||
labels[azureLabelMachineTag+model.LabelName(name)] = model.LabelValue(*v)
|
||||
}
|
||||
}
|
||||
|
||||
// Get the IP address information via seperate call to the network provider.
|
||||
for _, nic := range *vm.Properties.NetworkProfile.NetworkInterfaces {
|
||||
r, err := newAzureResourceFromID(*nic.ID)
|
||||
if err != nil {
|
||||
ch <- target{labelSet: nil, err: err}
|
||||
return
|
||||
}
|
||||
networkInterface, err := client.nic.Get(r.ResourceGroup, r.Name, "")
|
||||
if err != nil {
|
||||
log.Errorf("Unable to get network interface %s: %s", r.Name, err)
|
||||
ch <- target{labelSet: nil, err: err}
|
||||
// Get out of this routine because we cannot continue without a network interface.
|
||||
return
|
||||
}
|
||||
|
||||
// Unfortunately Azure does not return information on whether a VM is deallocated.
|
||||
// This information is available via another API call however the Go SDK does not
|
||||
// yet support this. On deallocated machines, this value happens to be nil so it
|
||||
// is a cheap and easy way to determine if a machine is allocated or not.
|
||||
if networkInterface.Properties.Primary == nil {
|
||||
log.Debugf("Virtual machine %s is deallocated. Skipping during Azure SD.", *vm.Name)
|
||||
ch <- target{}
|
||||
return
|
||||
}
|
||||
|
||||
if *networkInterface.Properties.Primary {
|
||||
for _, ip := range *networkInterface.Properties.IPConfigurations {
|
||||
if ip.Properties.PrivateIPAddress != nil {
|
||||
labels[azureLabelMachinePrivateIP] = model.LabelValue(*ip.Properties.PrivateIPAddress)
|
||||
address := fmt.Sprintf("%s:%d", *ip.Properties.PrivateIPAddress, ad.port)
|
||||
labels[model.AddressLabel] = model.LabelValue(address)
|
||||
ch <- target{labelSet: labels, err: nil}
|
||||
return
|
||||
}
|
||||
// If we made it here, we don't have a private IP which should be impossible.
|
||||
// Return an empty target and error to ensure an all or nothing situation.
|
||||
err = fmt.Errorf("unable to find a private IP for VM %s", *vm.Name)
|
||||
ch <- target{labelSet: nil, err: err}
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}(i, vm)
|
||||
}
|
||||
|
||||
for range machines {
|
||||
tgt := <-ch
|
||||
if tgt.err != nil {
|
||||
return nil, fmt.Errorf("unable to complete Azure service discovery: %s", err)
|
||||
}
|
||||
if tgt.labelSet != nil {
|
||||
tg.Targets = append(tg.Targets, tgt.labelSet)
|
||||
}
|
||||
}
|
||||
|
||||
log.Debugf("Azure discovery completed.")
|
||||
return tg, nil
|
||||
}
|
|
@ -393,6 +393,9 @@ func providersFromConfig(cfg *config.ScrapeConfig) map[string]TargetProvider {
|
|||
for i, c := range cfg.EC2SDConfigs {
|
||||
app("ec2", i, discovery.NewEC2Discovery(c))
|
||||
}
|
||||
for i, c := range cfg.AzureSDConfigs {
|
||||
app("azure", i, discovery.NewAzureDiscovery(c))
|
||||
}
|
||||
if len(cfg.TargetGroups) > 0 {
|
||||
app("static", 0, NewStaticProvider(cfg.TargetGroups))
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue