prometheus/discovery/ec2/ec2.go

236 lines
6.6 KiB
Go
Raw Normal View History

2015-09-21 11:49:19 -07:00
// Copyright 2015 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ec2
2015-09-21 11:49:19 -07:00
import (
"context"
2015-09-21 11:49:19 -07:00
"fmt"
"net"
"strings"
2015-09-21 11:49:19 -07:00
"time"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/credentials"
"github.com/aws/aws-sdk-go/aws/credentials/ec2rolecreds"
"github.com/aws/aws-sdk-go/aws/credentials/stscreds"
"github.com/aws/aws-sdk-go/aws/ec2metadata"
"github.com/aws/aws-sdk-go/aws/session"
2017-08-11 11:45:52 -07:00
"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/prometheus/client_golang/prometheus"
2015-09-21 11:49:19 -07:00
"github.com/prometheus/common/model"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/util/strutil"
)
const (
2016-09-22 06:01:23 -07:00
ec2Label = model.MetaLabelPrefix + "ec2_"
ec2LabelAZ = ec2Label + "availability_zone"
ec2LabelInstanceID = ec2Label + "instance_id"
ec2LabelInstanceState = ec2Label + "instance_state"
2016-10-21 03:13:47 -07:00
ec2LabelInstanceType = ec2Label + "instance_type"
2016-09-22 06:01:23 -07:00
ec2LabelPublicDNS = ec2Label + "public_dns_name"
ec2LabelPublicIP = ec2Label + "public_ip"
ec2LabelPrivateIP = ec2Label + "private_ip"
ec2LabelSubnetID = ec2Label + "subnet_id"
ec2LabelTag = ec2Label + "tag_"
ec2LabelVPCID = ec2Label + "vpc_id"
subnetSeparator = ","
2015-09-21 11:49:19 -07:00
)
var (
ec2SDRefreshFailuresCount = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_ec2_refresh_failures_total",
Help: "The number of EC2-SD scrape failures.",
})
ec2SDRefreshDuration = prometheus.NewSummary(
prometheus.SummaryOpts{
Name: "prometheus_sd_ec2_refresh_duration_seconds",
Help: "The duration of a EC2-SD refresh in seconds.",
})
)
func init() {
prometheus.MustRegister(ec2SDRefreshFailuresCount)
prometheus.MustRegister(ec2SDRefreshDuration)
}
// Discovery periodically performs EC2-SD requests. It implements
2015-09-21 11:49:19 -07:00
// the TargetProvider interface.
type Discovery struct {
2015-09-21 11:49:19 -07:00
aws *aws.Config
interval time.Duration
profile string
roleARN string
2015-09-21 11:49:19 -07:00
port int
logger log.Logger
2015-09-21 11:49:19 -07:00
}
// NewDiscovery returns a new EC2Discovery which periodically refreshes its targets.
func NewDiscovery(conf *config.EC2SDConfig, logger log.Logger) *Discovery {
creds := credentials.NewStaticCredentials(conf.AccessKey, string(conf.SecretKey), "")
2015-09-21 11:49:19 -07:00
if conf.AccessKey == "" && conf.SecretKey == "" {
creds = nil
2015-09-21 11:49:19 -07:00
}
2017-08-11 11:45:52 -07:00
if logger == nil {
logger = log.NewNopLogger()
}
return &Discovery{
2015-09-21 11:49:19 -07:00
aws: &aws.Config{
Region: &conf.Region,
Credentials: creds,
},
profile: conf.Profile,
roleARN: conf.RoleARN,
2015-09-21 11:49:19 -07:00
interval: time.Duration(conf.RefreshInterval),
port: conf.Port,
logger: logger,
2015-09-21 11:49:19 -07:00
}
}
// Run implements the TargetProvider interface.
func (d *Discovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup) {
ticker := time.NewTicker(d.interval)
2015-09-21 11:49:19 -07:00
defer ticker.Stop()
// Get an initial set right away.
tg, err := d.refresh()
2015-09-21 11:49:19 -07:00
if err != nil {
2017-08-11 11:45:52 -07:00
level.Error(d.logger).Log("msg", "Refresh failed", "err", err)
2015-09-21 11:49:19 -07:00
} else {
select {
case ch <- []*config.TargetGroup{tg}:
case <-ctx.Done():
return
}
2015-09-21 11:49:19 -07:00
}
for {
select {
case <-ticker.C:
tg, err := d.refresh()
2015-09-21 11:49:19 -07:00
if err != nil {
2017-08-11 11:45:52 -07:00
level.Error(d.logger).Log("msg", "Refresh failed", "err", err)
continue
}
select {
case ch <- []*config.TargetGroup{tg}:
case <-ctx.Done():
return
2015-09-21 11:49:19 -07:00
}
case <-ctx.Done():
2015-09-21 11:49:19 -07:00
return
}
}
}
func (d *Discovery) ec2MetadataAvailable(sess *session.Session) (isAvailable bool) {
svc := ec2metadata.New(sess, &aws.Config{
MaxRetries: aws.Int(0),
})
isAvailable = svc.Available()
return isAvailable
}
func (d *Discovery) refresh() (tg *config.TargetGroup, err error) {
t0 := time.Now()
defer func() {
ec2SDRefreshDuration.Observe(time.Since(t0).Seconds())
if err != nil {
ec2SDRefreshFailuresCount.Inc()
}
}()
sess, err := session.NewSessionWithOptions(session.Options{
Config: *d.aws,
Profile: d.profile,
})
if err != nil {
return nil, fmt.Errorf("could not create aws session: %s", err)
}
var ec2s *ec2.EC2
if d.roleARN != "" {
creds := stscreds.NewCredentials(sess, d.roleARN)
ec2s = ec2.New(sess, &aws.Config{Credentials: creds})
} else {
if d.aws.Credentials == nil && d.ec2MetadataAvailable(sess) {
creds := ec2rolecreds.NewCredentials(sess)
ec2s = ec2.New(sess, &aws.Config{Credentials: creds})
} else {
ec2s = ec2.New(sess)
}
}
tg = &config.TargetGroup{
Source: *d.aws.Region,
2015-09-21 11:49:19 -07:00
}
if err = ec2s.DescribeInstancesPages(nil, func(p *ec2.DescribeInstancesOutput, lastPage bool) bool {
2015-09-21 11:49:19 -07:00
for _, r := range p.Reservations {
for _, inst := range r.Instances {
if inst.PrivateIpAddress == nil {
continue
}
labels := model.LabelSet{
ec2LabelInstanceID: model.LabelValue(*inst.InstanceId),
}
labels[ec2LabelPrivateIP] = model.LabelValue(*inst.PrivateIpAddress)
addr := net.JoinHostPort(*inst.PrivateIpAddress, fmt.Sprintf("%d", d.port))
2015-09-21 11:49:19 -07:00
labels[model.AddressLabel] = model.LabelValue(addr)
if inst.PublicIpAddress != nil {
labels[ec2LabelPublicIP] = model.LabelValue(*inst.PublicIpAddress)
labels[ec2LabelPublicDNS] = model.LabelValue(*inst.PublicDnsName)
}
labels[ec2LabelAZ] = model.LabelValue(*inst.Placement.AvailabilityZone)
2016-09-22 06:01:23 -07:00
labels[ec2LabelInstanceState] = model.LabelValue(*inst.State.Name)
2016-10-21 03:13:47 -07:00
labels[ec2LabelInstanceType] = model.LabelValue(*inst.InstanceType)
if inst.VpcId != nil {
labels[ec2LabelVPCID] = model.LabelValue(*inst.VpcId)
subnetsMap := make(map[string]struct{})
for _, eni := range inst.NetworkInterfaces {
subnetsMap[*eni.SubnetId] = struct{}{}
}
subnets := []string{}
for k := range subnetsMap {
subnets = append(subnets, k)
}
labels[ec2LabelSubnetID] = model.LabelValue(
subnetSeparator +
strings.Join(subnets, subnetSeparator) +
subnetSeparator)
}
2015-09-21 11:49:19 -07:00
for _, t := range inst.Tags {
name := strutil.SanitizeLabelName(*t.Key)
labels[ec2LabelTag+model.LabelName(name)] = model.LabelValue(*t.Value)
}
tg.Targets = append(tg.Targets, labels)
}
}
return true
}); err != nil {
return nil, fmt.Errorf("could not describe instances: %s", err)
}
return tg, nil
}