Merge pull request #297 from prometheus/feature/dns-sd

Integrate DNS-SD support for discovering job targets. Pairing with Matt/Julius.
This commit is contained in:
juliusv 2013-06-12 09:13:12 -07:00
commit 697d3139d0
12 changed files with 189 additions and 21 deletions

View file

@ -11,7 +11,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
all: generated all: generated/config.pb.go
SUFFIXES: SUFFIXES:

View file

@ -72,11 +72,17 @@ func (c Config) Validate() error {
if _, err := utility.StringToDuration(job.GetScrapeInterval()); err != nil { if _, err := utility.StringToDuration(job.GetScrapeInterval()); err != nil {
return fmt.Errorf("Invalid scrape interval for job '%s': %s", job.GetName(), err) return fmt.Errorf("Invalid scrape interval for job '%s': %s", job.GetName(), err)
} }
if _, err := utility.StringToDuration(job.GetSdRefreshInterval()); err != nil {
return fmt.Errorf("Invalid SD refresh interval for job '%s': %s", job.GetName(), err)
}
for _, targetGroup := range job.TargetGroup { for _, targetGroup := range job.TargetGroup {
if err := c.validateLabels(targetGroup.Labels); err != nil { if err := c.validateLabels(targetGroup.Labels); err != nil {
return fmt.Errorf("Invalid labels for job '%s': %s", job.GetName(), err) return fmt.Errorf("Invalid labels for job '%s': %s", job.GetName(), err)
} }
} }
if job.SdName != nil && len(job.TargetGroup) > 0 {
return fmt.Errorf("Specified both DNS-SD name and target group for job: %s", job.GetName())
}
} }
return nil return nil

View file

@ -55,8 +55,18 @@ message JobConfig {
// How frequently to scrape targets from this job. Overrides the global // How frequently to scrape targets from this job. Overrides the global
// default. // default.
optional string scrape_interval = 2; optional string scrape_interval = 2;
// List of labeled target groups for this job. // The DNS-SD service name pointing to SRV records containing endpoint
repeated TargetGroup target_group = 3; // information for a job. When this field is provided, no target_group
// elements may be set.
optional string sd_name = 3;
// Discovery refresh period when using DNS-SD to discover targets. Must be a
// valid Prometheus duration string in the form "[0-9]+[smhdwy]".
optional string sd_refresh_interval = 4 [default = "30s"];
// List of labeled target groups for this job. Only legal when DNS-SD isn't
// used for a job.
repeated TargetGroup target_group = 5;
// The HTTP resource path to fetch metrics from on targets.
optional string metrics_path = 6 [default = "/metrics.json"];
} }
// The top-level Prometheus configuration. // The top-level Prometheus configuration.

View file

@ -32,6 +32,8 @@ var configTests = []struct {
inputFile: "sample.conf.input", inputFile: "sample.conf.input",
}, { }, {
inputFile: "empty.conf.input", inputFile: "empty.conf.input",
}, {
inputFile: "sd_targets.conf.input",
}, },
{ {
inputFile: "invalid_proto_format.conf.input", inputFile: "invalid_proto_format.conf.input",
@ -53,6 +55,11 @@ var configTests = []struct {
shouldFail: true, shouldFail: true,
errContains: "Invalid label name", errContains: "Invalid label name",
}, },
{
inputFile: "mixing_sd_and_manual_targets.conf.input",
shouldFail: true,
errContains: "Specified both DNS-SD name and target group",
},
} }
func TestConfigs(t *testing.T) { func TestConfigs(t *testing.T) {

View file

@ -0,0 +1,7 @@
job: <
name: "testjob"
sd_name: "sd_name"
target_group: <
target: "http://sampletarget:8080/metrics.json"
>
>

View file

@ -0,0 +1,4 @@
job: <
name: "testjob"
sd_name: "sd_name"
>

View file

@ -121,16 +121,22 @@ func (m *TargetGroup) GetLabels() *LabelPairs {
} }
type JobConfig struct { type JobConfig struct {
Name *string `protobuf:"bytes,1,req,name=name" json:"name,omitempty"` Name *string `protobuf:"bytes,1,req,name=name" json:"name,omitempty"`
ScrapeInterval *string `protobuf:"bytes,2,opt,name=scrape_interval" json:"scrape_interval,omitempty"` ScrapeInterval *string `protobuf:"bytes,2,opt,name=scrape_interval" json:"scrape_interval,omitempty"`
TargetGroup []*TargetGroup `protobuf:"bytes,3,rep,name=target_group" json:"target_group,omitempty"` SdName *string `protobuf:"bytes,3,opt,name=sd_name" json:"sd_name,omitempty"`
XXX_unrecognized []byte `json:"-"` SdRefreshInterval *string `protobuf:"bytes,4,opt,name=sd_refresh_interval,def=30s" json:"sd_refresh_interval,omitempty"`
TargetGroup []*TargetGroup `protobuf:"bytes,5,rep,name=target_group" json:"target_group,omitempty"`
MetricsPath *string `protobuf:"bytes,6,opt,name=metrics_path,def=/metrics.json" json:"metrics_path,omitempty"`
XXX_unrecognized []byte `json:"-"`
} }
func (m *JobConfig) Reset() { *m = JobConfig{} } func (m *JobConfig) Reset() { *m = JobConfig{} }
func (m *JobConfig) String() string { return proto.CompactTextString(m) } func (m *JobConfig) String() string { return proto.CompactTextString(m) }
func (*JobConfig) ProtoMessage() {} func (*JobConfig) ProtoMessage() {}
const Default_JobConfig_SdRefreshInterval string = "30s"
const Default_JobConfig_MetricsPath string = "/metrics.json"
func (m *JobConfig) GetName() string { func (m *JobConfig) GetName() string {
if m != nil && m.Name != nil { if m != nil && m.Name != nil {
return *m.Name return *m.Name
@ -145,6 +151,20 @@ func (m *JobConfig) GetScrapeInterval() string {
return "" return ""
} }
func (m *JobConfig) GetSdName() string {
if m != nil && m.SdName != nil {
return *m.SdName
}
return ""
}
func (m *JobConfig) GetSdRefreshInterval() string {
if m != nil && m.SdRefreshInterval != nil {
return *m.SdRefreshInterval
}
return Default_JobConfig_SdRefreshInterval
}
func (m *JobConfig) GetTargetGroup() []*TargetGroup { func (m *JobConfig) GetTargetGroup() []*TargetGroup {
if m != nil { if m != nil {
return m.TargetGroup return m.TargetGroup
@ -152,6 +172,13 @@ func (m *JobConfig) GetTargetGroup() []*TargetGroup {
return nil return nil
} }
func (m *JobConfig) GetMetricsPath() string {
if m != nil && m.MetricsPath != nil {
return *m.MetricsPath
}
return Default_JobConfig_MetricsPath
}
type PrometheusConfig struct { type PrometheusConfig struct {
Global *GlobalConfig `protobuf:"bytes,1,opt,name=global" json:"global,omitempty"` Global *GlobalConfig `protobuf:"bytes,1,opt,name=global" json:"global,omitempty"`
Job []*JobConfig `protobuf:"bytes,2,rep,name=job" json:"job,omitempty"` Job []*JobConfig `protobuf:"bytes,2,rep,name=job" json:"job,omitempty"`

View file

@ -0,0 +1,85 @@
// Copyright 2013 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package retrieval
import (
"fmt"
"net"
"net/url"
"time"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/model"
"github.com/prometheus/prometheus/utility"
)
// TargetProvider encapsulates retrieving all targets for a job.
type TargetProvider interface {
// Retrieves the current list of targets for this provider.
Targets() ([]Target, error)
}
type sdTargetProvider struct {
job config.JobConfig
targets []Target
lastRefresh time.Time
refreshInterval time.Duration
}
// Constructs a new sdTargetProvider for a job.
func NewSdTargetProvider(job config.JobConfig) *sdTargetProvider {
i, err := utility.StringToDuration(job.GetSdRefreshInterval())
if err != nil {
panic(fmt.Sprintf("illegal refresh duration string %s: %s", job.GetSdRefreshInterval(), err))
}
return &sdTargetProvider{
job: job,
refreshInterval: i,
}
}
func (p *sdTargetProvider) Targets() ([]Target, error) {
if time.Since(p.lastRefresh) > p.refreshInterval {
return p.targets, nil
}
_, addrs, err := net.LookupSRV("", "", p.job.GetSdName())
if err != nil {
return nil, err
}
baseLabels := model.LabelSet{
model.JobLabel: model.LabelValue(p.job.GetName()),
}
targets := make([]Target, 0, len(addrs))
endpoint := &url.URL{
Scheme: "http",
Path: p.job.GetMetricsPath(),
}
for _, addr := range addrs {
// Remove the final dot from rooted DNS names to make them look more usual.
if addr.Target[len(addr.Target)-1] == '.' {
addr.Target = addr.Target[:len(addr.Target)-1]
}
endpoint.Host = fmt.Sprintf("%s:%d", addr.Target, addr.Port)
t := NewTarget(endpoint.String(), time.Second*5, baseLabels)
targets = append(targets, t)
}
p.targets = targets
return targets, nil
}

View file

@ -24,8 +24,8 @@ import (
type TargetManager interface { type TargetManager interface {
acquire() acquire()
release() release()
AddTarget(job config.JobConfig, t Target, defaultScrapeInterval time.Duration) AddTarget(job config.JobConfig, t Target)
ReplaceTargets(job config.JobConfig, newTargets []Target, defaultScrapeInterval time.Duration) ReplaceTargets(job config.JobConfig, newTargets []Target)
Remove(t Target) Remove(t Target)
AddTargetsFromConfig(config config.Config) AddTargetsFromConfig(config config.Config)
Pools() map[string]*TargetPool Pools() map[string]*TargetPool
@ -53,11 +53,16 @@ func (m *targetManager) release() {
<-m.requestAllowance <-m.requestAllowance
} }
func (m *targetManager) TargetPoolForJob(job config.JobConfig, defaultScrapeInterval time.Duration) *TargetPool { func (m *targetManager) TargetPoolForJob(job config.JobConfig) *TargetPool {
targetPool, ok := m.poolsByJob[job.GetName()] targetPool, ok := m.poolsByJob[job.GetName()]
if !ok { if !ok {
targetPool = NewTargetPool(m) var provider TargetProvider = nil
if job.SdName != nil {
provider = NewSdTargetProvider(job)
}
targetPool = NewTargetPool(m, provider)
log.Printf("Pool for job %s does not exist; creating and starting...", job.GetName()) log.Printf("Pool for job %s does not exist; creating and starting...", job.GetName())
interval := job.ScrapeInterval() interval := job.ScrapeInterval()
@ -69,14 +74,14 @@ func (m *targetManager) TargetPoolForJob(job config.JobConfig, defaultScrapeInte
return targetPool return targetPool
} }
func (m *targetManager) AddTarget(job config.JobConfig, t Target, defaultScrapeInterval time.Duration) { func (m *targetManager) AddTarget(job config.JobConfig, t Target) {
targetPool := m.TargetPoolForJob(job, defaultScrapeInterval) targetPool := m.TargetPoolForJob(job)
targetPool.AddTarget(t) targetPool.AddTarget(t)
m.poolsByJob[job.GetName()] = targetPool m.poolsByJob[job.GetName()] = targetPool
} }
func (m *targetManager) ReplaceTargets(job config.JobConfig, newTargets []Target, defaultScrapeInterval time.Duration) { func (m *targetManager) ReplaceTargets(job config.JobConfig, newTargets []Target) {
targetPool := m.TargetPoolForJob(job, defaultScrapeInterval) targetPool := m.TargetPoolForJob(job)
targetPool.replaceTargets(newTargets) targetPool.replaceTargets(newTargets)
} }
@ -86,6 +91,11 @@ func (m targetManager) Remove(t Target) {
func (m *targetManager) AddTargetsFromConfig(config config.Config) { func (m *targetManager) AddTargetsFromConfig(config config.Config) {
for _, job := range config.Jobs() { for _, job := range config.Jobs() {
if job.SdName != nil {
m.TargetPoolForJob(job)
continue
}
for _, targetGroup := range job.TargetGroup { for _, targetGroup := range job.TargetGroup {
baseLabels := model.LabelSet{ baseLabels := model.LabelSet{
model.JobLabel: model.LabelValue(job.GetName()), model.JobLabel: model.LabelValue(job.GetName()),
@ -98,7 +108,7 @@ func (m *targetManager) AddTargetsFromConfig(config config.Config) {
for _, endpoint := range targetGroup.Target { for _, endpoint := range targetGroup.Target {
target := NewTarget(endpoint, time.Second*5, baseLabels) target := NewTarget(endpoint, time.Second*5, baseLabels)
m.AddTarget(job, target, config.ScrapeInterval()) m.AddTarget(job, target)
} }
} }
} }

View file

@ -95,15 +95,15 @@ func testTargetManager(t test.Tester) {
interval: time.Minute, interval: time.Minute,
} }
targetManager.AddTarget(testJob1, target1GroupA, 0) targetManager.AddTarget(testJob1, target1GroupA)
targetManager.AddTarget(testJob1, target2GroupA, 0) targetManager.AddTarget(testJob1, target2GroupA)
target1GroupB := &fakeTarget{ target1GroupB := &fakeTarget{
schedules: []time.Time{time.Now()}, schedules: []time.Time{time.Now()},
interval: time.Minute * 2, interval: time.Minute * 2,
} }
targetManager.AddTarget(testJob2, target1GroupB, 0) targetManager.AddTarget(testJob2, target1GroupB)
} }
func TestTargetManager(t *testing.T) { func TestTargetManager(t *testing.T) {

View file

@ -37,13 +37,16 @@ type TargetPool struct {
targets targets targets targets
addTargetQueue chan Target addTargetQueue chan Target
replaceTargetsQueue chan targets replaceTargetsQueue chan targets
targetProvider TargetProvider
} }
func NewTargetPool(m TargetManager) *TargetPool { func NewTargetPool(m TargetManager, p TargetProvider) *TargetPool {
return &TargetPool{ return &TargetPool{
manager: m, manager: m,
addTargetQueue: make(chan Target, targetAddQueueSize), addTargetQueue: make(chan Target, targetAddQueueSize),
replaceTargetsQueue: make(chan targets, targetReplaceQueueSize), replaceTargetsQueue: make(chan targets, targetReplaceQueueSize),
targetProvider: p,
} }
} }
@ -121,6 +124,15 @@ func (p *TargetPool) runSingle(earliest time.Time, results chan format.Result, t
} }
func (p *TargetPool) runIteration(results chan format.Result, interval time.Duration) { func (p *TargetPool) runIteration(results chan format.Result, interval time.Duration) {
if p.targetProvider != nil {
targets, err := p.targetProvider.Targets()
if err != nil {
log.Printf("Error looking up targets: %s", err)
return
}
p.ReplaceTargets(targets)
}
p.RLock() p.RLock()
defer p.RUnlock() defer p.RUnlock()

View file

@ -52,5 +52,5 @@ func (serv MetricsService) SetTargets(targetGroups []TargetGroup, jobName string
// BUG(julius): Validate that this ScrapeInterval is in fact the proper one // BUG(julius): Validate that this ScrapeInterval is in fact the proper one
// for the job. // for the job.
serv.TargetManager.ReplaceTargets(*job, newTargets, serv.Config.ScrapeInterval()) serv.TargetManager.ReplaceTargets(*job, newTargets)
} }