prometheus/discovery/ec2/ec2.go
Jason Anderson 808f79f00a Feature: Allow getting credentials via EC2 role (#3343)
* Allow getting credentials via EC2 role

This is subtly different than the existing `role_arn` solution, which
allows Prometheus to assume an IAM role given some set of credentials
already in-scope. With EC2 roles, one specifies the role at instance
launch time (via an instance profile.) The instance then exposes
temporary credentials via its metadata. The AWS Go SDK exposes a
credential provider that polls the [instance metadata endpoint][1]
already, so we can simply use that and it will take care of renewing the
credentials when they expire.

Without this, if this is being used inside EC2, it is difficult to
cleanly allow the use of STS credentials. One has to set up a proxy role
that can assume the role you really want, and launch the EC2 instance
with the proxy role. This isn't very clean, and also doesn't seem to be
[supported very well][2].

[1]:
http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html
[2]: https://github.com/aws/aws-cli/issues/1390

* Automatically try to detect EC2 role credentials

The `Available()` function exposed on ec2metadata returns a simple
true/false if the ec2 metadata is available. This is the best way to
know if we're actually running in EC2 (which is the only valid use-case
for this credential provider.)

This allows this to "just work" if you are using EC2 instance roles.
2017-10-25 14:15:39 +01:00

236 lines
6.6 KiB
Go

// Copyright 2015 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ec2
import (
"context"
"fmt"
"net"
"strings"
"time"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/credentials"
"github.com/aws/aws-sdk-go/aws/credentials/ec2rolecreds"
"github.com/aws/aws-sdk-go/aws/credentials/stscreds"
"github.com/aws/aws-sdk-go/aws/ec2metadata"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/util/strutil"
)
const (
ec2Label = model.MetaLabelPrefix + "ec2_"
ec2LabelAZ = ec2Label + "availability_zone"
ec2LabelInstanceID = ec2Label + "instance_id"
ec2LabelInstanceState = ec2Label + "instance_state"
ec2LabelInstanceType = ec2Label + "instance_type"
ec2LabelPublicDNS = ec2Label + "public_dns_name"
ec2LabelPublicIP = ec2Label + "public_ip"
ec2LabelPrivateIP = ec2Label + "private_ip"
ec2LabelSubnetID = ec2Label + "subnet_id"
ec2LabelTag = ec2Label + "tag_"
ec2LabelVPCID = ec2Label + "vpc_id"
subnetSeparator = ","
)
var (
ec2SDRefreshFailuresCount = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_ec2_refresh_failures_total",
Help: "The number of EC2-SD scrape failures.",
})
ec2SDRefreshDuration = prometheus.NewSummary(
prometheus.SummaryOpts{
Name: "prometheus_sd_ec2_refresh_duration_seconds",
Help: "The duration of a EC2-SD refresh in seconds.",
})
)
func init() {
prometheus.MustRegister(ec2SDRefreshFailuresCount)
prometheus.MustRegister(ec2SDRefreshDuration)
}
// Discovery periodically performs EC2-SD requests. It implements
// the TargetProvider interface.
type Discovery struct {
aws *aws.Config
interval time.Duration
profile string
roleARN string
port int
logger log.Logger
}
// NewDiscovery returns a new EC2Discovery which periodically refreshes its targets.
func NewDiscovery(conf *config.EC2SDConfig, logger log.Logger) *Discovery {
creds := credentials.NewStaticCredentials(conf.AccessKey, string(conf.SecretKey), "")
if conf.AccessKey == "" && conf.SecretKey == "" {
creds = nil
}
if logger == nil {
logger = log.NewNopLogger()
}
return &Discovery{
aws: &aws.Config{
Region: &conf.Region,
Credentials: creds,
},
profile: conf.Profile,
roleARN: conf.RoleARN,
interval: time.Duration(conf.RefreshInterval),
port: conf.Port,
logger: logger,
}
}
// Run implements the TargetProvider interface.
func (d *Discovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup) {
ticker := time.NewTicker(d.interval)
defer ticker.Stop()
// Get an initial set right away.
tg, err := d.refresh()
if err != nil {
level.Error(d.logger).Log("msg", "Refresh failed", "err", err)
} else {
select {
case ch <- []*config.TargetGroup{tg}:
case <-ctx.Done():
return
}
}
for {
select {
case <-ticker.C:
tg, err := d.refresh()
if err != nil {
level.Error(d.logger).Log("msg", "Refresh failed", "err", err)
continue
}
select {
case ch <- []*config.TargetGroup{tg}:
case <-ctx.Done():
return
}
case <-ctx.Done():
return
}
}
}
func (d *Discovery) ec2MetadataAvailable(sess *session.Session) (isAvailable bool) {
svc := ec2metadata.New(sess, &aws.Config{
MaxRetries: aws.Int(0),
})
isAvailable = svc.Available()
return isAvailable
}
func (d *Discovery) refresh() (tg *config.TargetGroup, err error) {
t0 := time.Now()
defer func() {
ec2SDRefreshDuration.Observe(time.Since(t0).Seconds())
if err != nil {
ec2SDRefreshFailuresCount.Inc()
}
}()
sess, err := session.NewSessionWithOptions(session.Options{
Config: *d.aws,
Profile: d.profile,
})
if err != nil {
return nil, fmt.Errorf("could not create aws session: %s", err)
}
var ec2s *ec2.EC2
if d.roleARN != "" {
creds := stscreds.NewCredentials(sess, d.roleARN)
ec2s = ec2.New(sess, &aws.Config{Credentials: creds})
} else {
if d.aws.Credentials == nil && d.ec2MetadataAvailable(sess) {
creds := ec2rolecreds.NewCredentials(sess)
ec2s = ec2.New(sess, &aws.Config{Credentials: creds})
} else {
ec2s = ec2.New(sess)
}
}
tg = &config.TargetGroup{
Source: *d.aws.Region,
}
if err = ec2s.DescribeInstancesPages(nil, func(p *ec2.DescribeInstancesOutput, lastPage bool) bool {
for _, r := range p.Reservations {
for _, inst := range r.Instances {
if inst.PrivateIpAddress == nil {
continue
}
labels := model.LabelSet{
ec2LabelInstanceID: model.LabelValue(*inst.InstanceId),
}
labels[ec2LabelPrivateIP] = model.LabelValue(*inst.PrivateIpAddress)
addr := net.JoinHostPort(*inst.PrivateIpAddress, fmt.Sprintf("%d", d.port))
labels[model.AddressLabel] = model.LabelValue(addr)
if inst.PublicIpAddress != nil {
labels[ec2LabelPublicIP] = model.LabelValue(*inst.PublicIpAddress)
labels[ec2LabelPublicDNS] = model.LabelValue(*inst.PublicDnsName)
}
labels[ec2LabelAZ] = model.LabelValue(*inst.Placement.AvailabilityZone)
labels[ec2LabelInstanceState] = model.LabelValue(*inst.State.Name)
labels[ec2LabelInstanceType] = model.LabelValue(*inst.InstanceType)
if inst.VpcId != nil {
labels[ec2LabelVPCID] = model.LabelValue(*inst.VpcId)
subnetsMap := make(map[string]struct{})
for _, eni := range inst.NetworkInterfaces {
subnetsMap[*eni.SubnetId] = struct{}{}
}
subnets := []string{}
for k := range subnetsMap {
subnets = append(subnets, k)
}
labels[ec2LabelSubnetID] = model.LabelValue(
subnetSeparator +
strings.Join(subnets, subnetSeparator) +
subnetSeparator)
}
for _, t := range inst.Tags {
name := strutil.SanitizeLabelName(*t.Key)
labels[ec2LabelTag+model.LabelName(name)] = model.LabelValue(*t.Value)
}
tg.Targets = append(tg.Targets, labels)
}
}
return true
}); err != nil {
return nil, fmt.Errorf("could not describe instances: %s", err)
}
return tg, nil
}