mirror of
https://github.com/prometheus/prometheus.git
synced 2024-12-25 13:44:05 -08:00
Add initial implementation for SD via Consul.
This commit adds service discovery using Consul's HTTP API and watches (long polling) to retrieve target updates.
This commit is contained in:
parent
b0c181dc0d
commit
7143dff02f
266
retrieval/discovery/consul.go
Normal file
266
retrieval/discovery/consul.go
Normal file
|
@ -0,0 +1,266 @@
|
|||
package discovery
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
|
||||
consul "github.com/hashicorp/consul/api"
|
||||
clientmodel "github.com/prometheus/client_golang/model"
|
||||
|
||||
"github.com/prometheus/prometheus/config"
|
||||
)
|
||||
|
||||
const (
|
||||
consulSourcePrefix = "consul"
|
||||
consulWatchTimeout = 30 * time.Second
|
||||
consulRetryInterval = 15 * time.Second
|
||||
|
||||
// ConsuleNodeLabel is the name for the label containing a target's node name.
|
||||
ConsulNodeLabel = clientmodel.MetaLabelPrefix + "consul_node"
|
||||
// ConsulTagsLabel is the name of the label containing the tags assigned to the target.
|
||||
ConsulTagsLabel = clientmodel.MetaLabelPrefix + "consul_tags"
|
||||
// ConsulServiceLabel is the name of the label containing the service name.
|
||||
ConsulServiceLabel = clientmodel.MetaLabelPrefix + "consul_service"
|
||||
// ConsulDCLabel is the name of the label containing the datacenter ID.
|
||||
ConsulDCLabel = clientmodel.MetaLabelPrefix + "consul_dc"
|
||||
)
|
||||
|
||||
// ConsulDiscovery retrieves target information from a Consul server
|
||||
// and updates them via watches.
|
||||
type ConsulDiscovery struct {
|
||||
client *consul.Client
|
||||
clientConf *consul.Config
|
||||
tagSeparator string
|
||||
scrapedServices map[string]struct{}
|
||||
|
||||
mu sync.RWMutex
|
||||
services map[string]*consulService
|
||||
runDone, srvsDone chan struct{}
|
||||
}
|
||||
|
||||
// consulService contains data belonging to the same service.
|
||||
type consulService struct {
|
||||
name string
|
||||
tgroup *config.TargetGroup
|
||||
lastIndex uint64
|
||||
removed bool
|
||||
running bool
|
||||
done chan struct{}
|
||||
}
|
||||
|
||||
// NewConsulDiscovery returns a new ConsulDiscovery for the given config.
|
||||
func NewConsulDiscovery(conf *config.ConsulSDConfig) *ConsulDiscovery {
|
||||
clientConf := &consul.Config{
|
||||
Address: conf.Server,
|
||||
Scheme: conf.Scheme,
|
||||
Datacenter: conf.Datacenter,
|
||||
Token: conf.Token,
|
||||
HttpAuth: &consul.HttpBasicAuth{
|
||||
Username: conf.Username,
|
||||
Password: conf.Password,
|
||||
},
|
||||
}
|
||||
client, err := consul.NewClient(clientConf)
|
||||
if err != nil {
|
||||
// NewClient always returns a nil error.
|
||||
panic(fmt.Errorf("discovery.NewConsulDiscovery: %s", err))
|
||||
}
|
||||
cd := &ConsulDiscovery{
|
||||
client: client,
|
||||
clientConf: clientConf,
|
||||
tagSeparator: conf.TagSeparator,
|
||||
runDone: make(chan struct{}),
|
||||
srvsDone: make(chan struct{}, 1),
|
||||
scrapedServices: map[string]struct{}{},
|
||||
services: map[string]*consulService{},
|
||||
}
|
||||
for _, name := range conf.Services {
|
||||
cd.scrapedServices[name] = struct{}{}
|
||||
}
|
||||
return cd
|
||||
}
|
||||
|
||||
// Sources implements the TargetProvider interface.
|
||||
func (cd *ConsulDiscovery) Sources() []string {
|
||||
clientConf := *cd.clientConf
|
||||
clientConf.HttpClient = &http.Client{Timeout: 5 * time.Second}
|
||||
|
||||
client, err := consul.NewClient(&clientConf)
|
||||
if err != nil {
|
||||
// NewClient always returns a nil error.
|
||||
panic(fmt.Errorf("discovery.ConsulDiscovery.Sources: %s", err))
|
||||
}
|
||||
|
||||
srvs, _, err := client.Catalog().Services(nil)
|
||||
if err != nil {
|
||||
glog.Errorf("Error refreshing service list: %s", err)
|
||||
return nil
|
||||
}
|
||||
cd.mu.Lock()
|
||||
defer cd.mu.Unlock()
|
||||
|
||||
srcs := make([]string, 0, len(srvs))
|
||||
for name := range srvs {
|
||||
if _, ok := cd.scrapedServices[name]; ok {
|
||||
srcs = append(srcs, consulSourcePrefix+":"+name)
|
||||
}
|
||||
}
|
||||
return srcs
|
||||
}
|
||||
|
||||
// Run implements the TargetProvider interface.
|
||||
func (cd *ConsulDiscovery) Run(ch chan<- *config.TargetGroup) {
|
||||
defer close(ch)
|
||||
|
||||
update := make(chan *consulService, 10)
|
||||
go cd.watchServices(update)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-cd.runDone:
|
||||
return
|
||||
case srv := <-update:
|
||||
if srv.removed {
|
||||
ch <- &config.TargetGroup{Source: consulSourcePrefix + ":" + srv.name}
|
||||
break
|
||||
}
|
||||
// Launch watcher for the service.
|
||||
if !srv.running {
|
||||
go cd.watchService(srv, ch)
|
||||
srv.running = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Stop implements the TargetProvider interface.
|
||||
func (cd *ConsulDiscovery) Stop() {
|
||||
glog.V(1).Infof("Stopping Consul service discovery for %s", cd.clientConf.Address)
|
||||
|
||||
// The lock prevents Run from terminating while the watchers attempt
|
||||
// to send on their channels.
|
||||
cd.mu.Lock()
|
||||
defer cd.mu.Unlock()
|
||||
|
||||
// The watching goroutines will terminate after their next watch timeout.
|
||||
// As this can take long, the channel is buffered and we do not wait.
|
||||
for _, srv := range cd.services {
|
||||
srv.done <- struct{}{}
|
||||
}
|
||||
cd.srvsDone <- struct{}{}
|
||||
|
||||
// Terminate Run.
|
||||
cd.runDone <- struct{}{}
|
||||
|
||||
glog.V(1).Infof("Consul service discovery for %s stopped.", cd.clientConf.Address)
|
||||
}
|
||||
|
||||
// watchServices retrieves updates from Consul's services endpoint and sends
|
||||
// potential updates to the update channel.
|
||||
func (cd *ConsulDiscovery) watchServices(update chan<- *consulService) {
|
||||
var lastIndex uint64
|
||||
for {
|
||||
catalog := cd.client.Catalog()
|
||||
srvs, meta, err := catalog.Services(&consul.QueryOptions{
|
||||
RequireConsistent: false,
|
||||
WaitIndex: lastIndex,
|
||||
})
|
||||
if err != nil {
|
||||
glog.Errorf("Error refreshing service list: %s", err)
|
||||
<-time.After(consulRetryInterval)
|
||||
continue
|
||||
}
|
||||
// If the index equals the previous one, the watch timed out with no update.
|
||||
if meta.LastIndex == lastIndex {
|
||||
continue
|
||||
}
|
||||
lastIndex = meta.LastIndex
|
||||
|
||||
cd.mu.Lock()
|
||||
select {
|
||||
case <-cd.srvsDone:
|
||||
return
|
||||
default:
|
||||
// Continue.
|
||||
}
|
||||
// Check for new services.
|
||||
for name := range srvs {
|
||||
if _, ok := cd.scrapedServices[name]; !ok {
|
||||
continue
|
||||
}
|
||||
srv, ok := cd.services[name]
|
||||
if !ok {
|
||||
srv = &consulService{
|
||||
name: name,
|
||||
tgroup: &config.TargetGroup{},
|
||||
done: make(chan struct{}, 1),
|
||||
}
|
||||
srv.tgroup.Source = consulSourcePrefix + ":" + name
|
||||
cd.services[name] = srv
|
||||
}
|
||||
srv.tgroup.Labels = clientmodel.LabelSet{
|
||||
ConsulServiceLabel: clientmodel.LabelValue(name),
|
||||
ConsulDCLabel: clientmodel.LabelValue(cd.clientConf.Datacenter),
|
||||
}
|
||||
update <- srv
|
||||
}
|
||||
// Check for removed services.
|
||||
for name, srv := range cd.services {
|
||||
if _, ok := srvs[name]; !ok {
|
||||
srv.removed = true
|
||||
update <- srv
|
||||
srv.done <- struct{}{}
|
||||
delete(cd.services, name)
|
||||
}
|
||||
}
|
||||
cd.mu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
// watchService retrieves updates about srv from Consul's service endpoint.
|
||||
// On a potential update the resulting target group is sent to ch.
|
||||
func (cd *ConsulDiscovery) watchService(srv *consulService, ch chan<- *config.TargetGroup) {
|
||||
catalog := cd.client.Catalog()
|
||||
for {
|
||||
nodes, meta, err := catalog.Service(srv.name, "", &consul.QueryOptions{
|
||||
WaitIndex: srv.lastIndex,
|
||||
WaitTime: consulWatchTimeout,
|
||||
})
|
||||
if err != nil {
|
||||
glog.Errorf("Error refreshing service %s: %s", srv.name, err)
|
||||
<-time.After(consulRetryInterval)
|
||||
continue
|
||||
}
|
||||
// If the index equals the previous one, the watch timed out with no update.
|
||||
if meta.LastIndex == srv.lastIndex {
|
||||
continue
|
||||
}
|
||||
srv.lastIndex = meta.LastIndex
|
||||
srv.tgroup.Targets = make([]clientmodel.LabelSet, 0, len(nodes))
|
||||
|
||||
for _, node := range nodes {
|
||||
addr := fmt.Sprintf("%s:%d", node.Address, node.ServicePort)
|
||||
tags := strings.Join(node.ServiceTags, cd.tagSeparator)
|
||||
|
||||
srv.tgroup.Targets = append(srv.tgroup.Targets, clientmodel.LabelSet{
|
||||
clientmodel.AddressLabel: clientmodel.LabelValue(addr),
|
||||
ConsulNodeLabel: clientmodel.LabelValue(node.Node),
|
||||
ConsulTagsLabel: clientmodel.LabelValue(tags),
|
||||
})
|
||||
}
|
||||
cd.mu.Lock()
|
||||
select {
|
||||
case <-srv.done:
|
||||
return
|
||||
default:
|
||||
// Continue.
|
||||
}
|
||||
ch <- srv.tgroup
|
||||
cd.mu.Unlock()
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue