2015-06-05 09:41:34 -07:00
|
|
|
// Copyright 2015 The Prometheus Authors
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
2015-08-17 10:51:12 -07:00
|
|
|
// limitations under the License.
|
2015-06-05 09:41:34 -07:00
|
|
|
|
2015-05-14 04:32:27 -07:00
|
|
|
package discovery
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"net/http"
|
2015-07-31 09:18:25 -07:00
|
|
|
"strconv"
|
2015-05-14 04:32:27 -07:00
|
|
|
"strings"
|
|
|
|
"sync"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
consul "github.com/hashicorp/consul/api"
|
2015-10-03 01:21:43 -07:00
|
|
|
"github.com/prometheus/common/log"
|
2015-08-20 08:18:46 -07:00
|
|
|
"github.com/prometheus/common/model"
|
2015-05-14 04:32:27 -07:00
|
|
|
|
|
|
|
"github.com/prometheus/prometheus/config"
|
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
consulWatchTimeout = 30 * time.Second
|
|
|
|
consulRetryInterval = 15 * time.Second
|
|
|
|
|
2015-08-24 06:07:27 -07:00
|
|
|
// consulAddressLabel is the name for the label containing a target's address.
|
|
|
|
consulAddressLabel = model.MetaLabelPrefix + "consul_address"
|
|
|
|
// consulNodeLabel is the name for the label containing a target's node name.
|
|
|
|
consulNodeLabel = model.MetaLabelPrefix + "consul_node"
|
|
|
|
// consulTagsLabel is the name of the label containing the tags assigned to the target.
|
|
|
|
consulTagsLabel = model.MetaLabelPrefix + "consul_tags"
|
|
|
|
// consulServiceLabel is the name of the label containing the service name.
|
|
|
|
consulServiceLabel = model.MetaLabelPrefix + "consul_service"
|
|
|
|
// consulServiceAddressLabel is the name of the label containing the (optional) service address.
|
|
|
|
consulServiceAddressLabel = model.MetaLabelPrefix + "consul_service_address"
|
|
|
|
// consulServicePortLabel is the name of the label containing the service port.
|
|
|
|
consulServicePortLabel = model.MetaLabelPrefix + "consul_service_port"
|
|
|
|
// consulDCLabel is the name of the label containing the datacenter ID.
|
|
|
|
consulDCLabel = model.MetaLabelPrefix + "consul_dc"
|
|
|
|
// consulServiceIDLabel is the name of the label containing the service ID.
|
|
|
|
consulServiceIDLabel = model.MetaLabelPrefix + "consul_service_id"
|
2015-05-14 04:32:27 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
// ConsulDiscovery retrieves target information from a Consul server
|
|
|
|
// and updates them via watches.
|
|
|
|
type ConsulDiscovery struct {
|
2015-08-16 15:02:02 -07:00
|
|
|
client *consul.Client
|
|
|
|
clientConf *consul.Config
|
|
|
|
clientDatacenter string
|
|
|
|
tagSeparator string
|
|
|
|
scrapedServices map[string]struct{}
|
2015-05-14 04:32:27 -07:00
|
|
|
|
2015-08-10 07:44:32 -07:00
|
|
|
mu sync.RWMutex
|
|
|
|
services map[string]*consulService
|
2015-05-14 04:32:27 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// consulService contains data belonging to the same service.
|
|
|
|
type consulService struct {
|
|
|
|
name string
|
2015-10-08 09:06:58 -07:00
|
|
|
tgroup config.TargetGroup
|
2015-05-14 04:32:27 -07:00
|
|
|
lastIndex uint64
|
|
|
|
removed bool
|
|
|
|
running bool
|
|
|
|
done chan struct{}
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewConsulDiscovery returns a new ConsulDiscovery for the given config.
|
|
|
|
func NewConsulDiscovery(conf *config.ConsulSDConfig) *ConsulDiscovery {
|
|
|
|
clientConf := &consul.Config{
|
|
|
|
Address: conf.Server,
|
|
|
|
Scheme: conf.Scheme,
|
|
|
|
Datacenter: conf.Datacenter,
|
|
|
|
Token: conf.Token,
|
|
|
|
HttpAuth: &consul.HttpBasicAuth{
|
|
|
|
Username: conf.Username,
|
|
|
|
Password: conf.Password,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
client, err := consul.NewClient(clientConf)
|
|
|
|
if err != nil {
|
|
|
|
// NewClient always returns a nil error.
|
|
|
|
panic(fmt.Errorf("discovery.NewConsulDiscovery: %s", err))
|
|
|
|
}
|
|
|
|
cd := &ConsulDiscovery{
|
|
|
|
client: client,
|
|
|
|
clientConf: clientConf,
|
|
|
|
tagSeparator: conf.TagSeparator,
|
|
|
|
scrapedServices: map[string]struct{}{},
|
|
|
|
services: map[string]*consulService{},
|
|
|
|
}
|
2015-08-16 15:02:02 -07:00
|
|
|
// If the datacenter isn't set in the clientConf, let's get it from the local Consul agent
|
|
|
|
// (Consul default is to use local node's datacenter if one isn't given for a query).
|
|
|
|
if clientConf.Datacenter == "" {
|
|
|
|
info, err := client.Agent().Self()
|
|
|
|
if err != nil {
|
|
|
|
panic(fmt.Errorf("discovery.NewConsulDiscovery: %s", err))
|
|
|
|
}
|
|
|
|
cd.clientDatacenter = info["Config"]["Datacenter"].(string)
|
|
|
|
} else {
|
|
|
|
cd.clientDatacenter = clientConf.Datacenter
|
|
|
|
}
|
2015-05-14 04:32:27 -07:00
|
|
|
for _, name := range conf.Services {
|
|
|
|
cd.scrapedServices[name] = struct{}{}
|
|
|
|
}
|
|
|
|
return cd
|
|
|
|
}
|
|
|
|
|
|
|
|
// Sources implements the TargetProvider interface.
|
|
|
|
func (cd *ConsulDiscovery) Sources() []string {
|
|
|
|
clientConf := *cd.clientConf
|
|
|
|
clientConf.HttpClient = &http.Client{Timeout: 5 * time.Second}
|
|
|
|
|
|
|
|
client, err := consul.NewClient(&clientConf)
|
|
|
|
if err != nil {
|
|
|
|
// NewClient always returns a nil error.
|
|
|
|
panic(fmt.Errorf("discovery.ConsulDiscovery.Sources: %s", err))
|
|
|
|
}
|
|
|
|
|
|
|
|
srvs, _, err := client.Catalog().Services(nil)
|
|
|
|
if err != nil {
|
2015-05-20 09:10:29 -07:00
|
|
|
log.Errorf("Error refreshing service list: %s", err)
|
2015-05-14 04:32:27 -07:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
cd.mu.Lock()
|
|
|
|
defer cd.mu.Unlock()
|
|
|
|
|
|
|
|
srcs := make([]string, 0, len(srvs))
|
|
|
|
for name := range srvs {
|
2015-08-14 08:39:41 -07:00
|
|
|
if _, ok := cd.scrapedServices[name]; len(cd.scrapedServices) == 0 || ok {
|
2015-08-07 04:18:19 -07:00
|
|
|
srcs = append(srcs, name)
|
2015-05-14 04:32:27 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return srcs
|
|
|
|
}
|
|
|
|
|
|
|
|
// Run implements the TargetProvider interface.
|
2015-10-08 09:06:58 -07:00
|
|
|
func (cd *ConsulDiscovery) Run(ch chan<- config.TargetGroup, done <-chan struct{}) {
|
2015-05-14 04:32:27 -07:00
|
|
|
defer close(ch)
|
2015-08-10 07:44:32 -07:00
|
|
|
defer cd.stop()
|
2015-05-14 04:32:27 -07:00
|
|
|
|
|
|
|
update := make(chan *consulService, 10)
|
2015-08-10 07:44:32 -07:00
|
|
|
go cd.watchServices(update, done)
|
2015-05-14 04:32:27 -07:00
|
|
|
|
|
|
|
for {
|
|
|
|
select {
|
2015-08-10 07:44:32 -07:00
|
|
|
case <-done:
|
2015-05-14 04:32:27 -07:00
|
|
|
return
|
|
|
|
case srv := <-update:
|
|
|
|
if srv.removed {
|
2015-08-10 07:44:32 -07:00
|
|
|
close(srv.done)
|
|
|
|
|
|
|
|
// Send clearing update.
|
2015-10-08 09:06:58 -07:00
|
|
|
ch <- config.TargetGroup{Source: srv.name}
|
2015-05-14 04:32:27 -07:00
|
|
|
break
|
|
|
|
}
|
|
|
|
// Launch watcher for the service.
|
|
|
|
if !srv.running {
|
|
|
|
go cd.watchService(srv, ch)
|
|
|
|
srv.running = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-10 07:44:32 -07:00
|
|
|
func (cd *ConsulDiscovery) stop() {
|
2015-05-14 04:32:27 -07:00
|
|
|
// The lock prevents Run from terminating while the watchers attempt
|
|
|
|
// to send on their channels.
|
|
|
|
cd.mu.Lock()
|
|
|
|
defer cd.mu.Unlock()
|
|
|
|
|
|
|
|
for _, srv := range cd.services {
|
2015-08-10 07:44:32 -07:00
|
|
|
close(srv.done)
|
2015-05-14 04:32:27 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// watchServices retrieves updates from Consul's services endpoint and sends
|
|
|
|
// potential updates to the update channel.
|
2015-08-10 07:44:32 -07:00
|
|
|
func (cd *ConsulDiscovery) watchServices(update chan<- *consulService, done <-chan struct{}) {
|
2015-05-14 04:32:27 -07:00
|
|
|
var lastIndex uint64
|
|
|
|
for {
|
|
|
|
catalog := cd.client.Catalog()
|
|
|
|
srvs, meta, err := catalog.Services(&consul.QueryOptions{
|
2015-06-24 08:46:11 -07:00
|
|
|
WaitIndex: lastIndex,
|
|
|
|
WaitTime: consulWatchTimeout,
|
2015-05-14 04:32:27 -07:00
|
|
|
})
|
|
|
|
if err != nil {
|
2015-05-20 09:10:29 -07:00
|
|
|
log.Errorf("Error refreshing service list: %s", err)
|
2015-08-10 07:44:32 -07:00
|
|
|
time.Sleep(consulRetryInterval)
|
2015-09-22 00:04:31 -07:00
|
|
|
continue
|
2015-05-14 04:32:27 -07:00
|
|
|
}
|
|
|
|
// If the index equals the previous one, the watch timed out with no update.
|
|
|
|
if meta.LastIndex == lastIndex {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
lastIndex = meta.LastIndex
|
|
|
|
|
|
|
|
cd.mu.Lock()
|
|
|
|
select {
|
2015-08-10 07:44:32 -07:00
|
|
|
case <-done:
|
2015-06-24 08:46:11 -07:00
|
|
|
cd.mu.Unlock()
|
2015-05-14 04:32:27 -07:00
|
|
|
return
|
|
|
|
default:
|
|
|
|
// Continue.
|
|
|
|
}
|
|
|
|
// Check for new services.
|
|
|
|
for name := range srvs {
|
2015-08-14 08:39:41 -07:00
|
|
|
if _, ok := cd.scrapedServices[name]; len(cd.scrapedServices) > 0 && !ok {
|
2015-05-14 04:32:27 -07:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
srv, ok := cd.services[name]
|
|
|
|
if !ok {
|
|
|
|
srv = &consulService{
|
2015-10-08 09:06:58 -07:00
|
|
|
name: name,
|
|
|
|
done: make(chan struct{}),
|
2015-05-14 04:32:27 -07:00
|
|
|
}
|
2015-08-07 04:18:19 -07:00
|
|
|
srv.tgroup.Source = name
|
2015-05-14 04:32:27 -07:00
|
|
|
cd.services[name] = srv
|
|
|
|
}
|
2015-08-20 08:18:46 -07:00
|
|
|
srv.tgroup.Labels = model.LabelSet{
|
2015-08-24 06:07:27 -07:00
|
|
|
consulServiceLabel: model.LabelValue(name),
|
|
|
|
consulDCLabel: model.LabelValue(cd.clientDatacenter),
|
2015-05-14 04:32:27 -07:00
|
|
|
}
|
|
|
|
update <- srv
|
|
|
|
}
|
|
|
|
// Check for removed services.
|
|
|
|
for name, srv := range cd.services {
|
|
|
|
if _, ok := srvs[name]; !ok {
|
|
|
|
srv.removed = true
|
|
|
|
update <- srv
|
|
|
|
delete(cd.services, name)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
cd.mu.Unlock()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// watchService retrieves updates about srv from Consul's service endpoint.
|
|
|
|
// On a potential update the resulting target group is sent to ch.
|
2015-10-08 09:06:58 -07:00
|
|
|
func (cd *ConsulDiscovery) watchService(srv *consulService, ch chan<- config.TargetGroup) {
|
2015-05-14 04:32:27 -07:00
|
|
|
catalog := cd.client.Catalog()
|
|
|
|
for {
|
|
|
|
nodes, meta, err := catalog.Service(srv.name, "", &consul.QueryOptions{
|
|
|
|
WaitIndex: srv.lastIndex,
|
|
|
|
WaitTime: consulWatchTimeout,
|
|
|
|
})
|
|
|
|
if err != nil {
|
2015-05-20 09:10:29 -07:00
|
|
|
log.Errorf("Error refreshing service %s: %s", srv.name, err)
|
2015-08-10 07:44:32 -07:00
|
|
|
time.Sleep(consulRetryInterval)
|
2015-05-14 04:32:27 -07:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
// If the index equals the previous one, the watch timed out with no update.
|
|
|
|
if meta.LastIndex == srv.lastIndex {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
srv.lastIndex = meta.LastIndex
|
2015-08-20 08:18:46 -07:00
|
|
|
srv.tgroup.Targets = make([]model.LabelSet, 0, len(nodes))
|
2015-05-14 04:32:27 -07:00
|
|
|
|
|
|
|
for _, node := range nodes {
|
|
|
|
addr := fmt.Sprintf("%s:%d", node.Address, node.ServicePort)
|
2015-06-05 09:41:34 -07:00
|
|
|
// We surround the separated list with the separator as well. This way regular expressions
|
|
|
|
// in relabeling rules don't have to consider tag positions.
|
|
|
|
tags := cd.tagSeparator + strings.Join(node.ServiceTags, cd.tagSeparator) + cd.tagSeparator
|
2015-05-14 04:32:27 -07:00
|
|
|
|
2015-08-20 08:18:46 -07:00
|
|
|
srv.tgroup.Targets = append(srv.tgroup.Targets, model.LabelSet{
|
|
|
|
model.AddressLabel: model.LabelValue(addr),
|
2015-08-24 06:07:27 -07:00
|
|
|
consulAddressLabel: model.LabelValue(node.Address),
|
|
|
|
consulNodeLabel: model.LabelValue(node.Node),
|
|
|
|
consulTagsLabel: model.LabelValue(tags),
|
|
|
|
consulServiceAddressLabel: model.LabelValue(node.ServiceAddress),
|
|
|
|
consulServicePortLabel: model.LabelValue(strconv.Itoa(node.ServicePort)),
|
|
|
|
consulServiceIDLabel: model.LabelValue(node.ServiceID),
|
2015-05-14 04:32:27 -07:00
|
|
|
})
|
|
|
|
}
|
2015-06-24 08:46:11 -07:00
|
|
|
|
2015-05-14 04:32:27 -07:00
|
|
|
cd.mu.Lock()
|
|
|
|
select {
|
|
|
|
case <-srv.done:
|
2015-06-24 08:46:11 -07:00
|
|
|
cd.mu.Unlock()
|
2015-05-14 04:32:27 -07:00
|
|
|
return
|
|
|
|
default:
|
|
|
|
// Continue.
|
|
|
|
}
|
|
|
|
ch <- srv.tgroup
|
|
|
|
cd.mu.Unlock()
|
|
|
|
}
|
|
|
|
}
|