mirror of
https://github.com/prometheus/prometheus.git
synced 2024-11-10 23:54:05 -08:00
Merge "Stagger scrapes to spread out load."
This commit is contained in:
commit
e995cda75c
4
main.go
4
main.go
|
@ -67,8 +67,6 @@ var (
|
||||||
|
|
||||||
notificationQueueCapacity = flag.Int("alertmanager.notificationQueueCapacity", 100, "The size of the queue for pending alert manager notifications.")
|
notificationQueueCapacity = flag.Int("alertmanager.notificationQueueCapacity", 100, "The size of the queue for pending alert manager notifications.")
|
||||||
|
|
||||||
concurrentRetrievalAllowance = flag.Int("concurrentRetrievalAllowance", 15, "The number of concurrent metrics retrieval requests allowed.")
|
|
||||||
|
|
||||||
printVersion = flag.Bool("version", false, "print version information")
|
printVersion = flag.Bool("version", false, "print version information")
|
||||||
|
|
||||||
shutdownTimeout = flag.Duration("shutdownGracePeriod", 0*time.Second, "The amount of time Prometheus gives background services to finish running when shutdown is requested.")
|
shutdownTimeout = flag.Duration("shutdownGracePeriod", 0*time.Second, "The amount of time Prometheus gives background services to finish running when shutdown is requested.")
|
||||||
|
@ -269,7 +267,7 @@ func main() {
|
||||||
deletionTimer := time.NewTicker(*deleteInterval)
|
deletionTimer := time.NewTicker(*deleteInterval)
|
||||||
|
|
||||||
// Queue depth will need to be exposed
|
// Queue depth will need to be exposed
|
||||||
targetManager := retrieval.NewTargetManager(ingester, *concurrentRetrievalAllowance)
|
targetManager := retrieval.NewTargetManager(ingester)
|
||||||
targetManager.AddTargetsFromConfig(conf)
|
targetManager.AddTargetsFromConfig(conf)
|
||||||
|
|
||||||
notifications := make(chan notification.NotificationReqs, *notificationQueueCapacity)
|
notifications := make(chan notification.NotificationReqs, *notificationQueueCapacity)
|
||||||
|
|
|
@ -15,6 +15,7 @@ package retrieval
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math/rand"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
|
@ -41,6 +42,7 @@ const (
|
||||||
failure = "failure"
|
failure = "failure"
|
||||||
outcome = "outcome"
|
outcome = "outcome"
|
||||||
success = "success"
|
success = "success"
|
||||||
|
interval = "interval"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -55,10 +57,20 @@ var (
|
||||||
},
|
},
|
||||||
[]string{job, instance, outcome},
|
[]string{job, instance, outcome},
|
||||||
)
|
)
|
||||||
|
targetIntervalLength = prometheus.NewSummaryVec(
|
||||||
|
prometheus.SummaryOpts{
|
||||||
|
Namespace: namespace,
|
||||||
|
Name: "target_interval_length_seconds",
|
||||||
|
Help: "Actual intervals between scrapes.",
|
||||||
|
Objectives: []float64{0.01, 0.05, 0.5, 0.90, 0.99},
|
||||||
|
},
|
||||||
|
[]string{interval},
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
prometheus.MustRegister(targetOperationLatencies)
|
prometheus.MustRegister(targetOperationLatencies)
|
||||||
|
prometheus.MustRegister(targetIntervalLength)
|
||||||
}
|
}
|
||||||
|
|
||||||
// The state of the given Target.
|
// The state of the given Target.
|
||||||
|
@ -99,8 +111,6 @@ const (
|
||||||
// metrics are retrieved and deserialized from the given instance to which it
|
// metrics are retrieved and deserialized from the given instance to which it
|
||||||
// refers.
|
// refers.
|
||||||
type Target interface {
|
type Target interface {
|
||||||
// Retrieve values from this target.
|
|
||||||
Scrape(ingester extraction.Ingester) error
|
|
||||||
// Return the last encountered scrape error, if any.
|
// Return the last encountered scrape error, if any.
|
||||||
LastError() error
|
LastError() error
|
||||||
// Return the health of the target.
|
// Return the health of the target.
|
||||||
|
@ -120,6 +130,12 @@ type Target interface {
|
||||||
// labels) into an old target definition for the same endpoint. Preserve
|
// labels) into an old target definition for the same endpoint. Preserve
|
||||||
// remaining information - like health state - from the old target.
|
// remaining information - like health state - from the old target.
|
||||||
Merge(newTarget Target)
|
Merge(newTarget Target)
|
||||||
|
// Scrape target at the specified interval.
|
||||||
|
RunScraper(extraction.Ingester, time.Duration)
|
||||||
|
// Stop scraping, synchronous.
|
||||||
|
StopScraper()
|
||||||
|
// Do a single scrape.
|
||||||
|
scrape(ingester extraction.Ingester) error
|
||||||
}
|
}
|
||||||
|
|
||||||
// target is a Target that refers to a singular HTTP or HTTPS endpoint.
|
// target is a Target that refers to a singular HTTP or HTTPS endpoint.
|
||||||
|
@ -130,6 +146,9 @@ type target struct {
|
||||||
lastError error
|
lastError error
|
||||||
// The last time a scrape was attempted.
|
// The last time a scrape was attempted.
|
||||||
lastScrape time.Time
|
lastScrape time.Time
|
||||||
|
// Channel to signal RunScraper should stop, holds a channel
|
||||||
|
// to notify once stopped.
|
||||||
|
stopScraper chan bool
|
||||||
|
|
||||||
address string
|
address string
|
||||||
// What is the deadline for the HTTP or HTTPS against this endpoint.
|
// What is the deadline for the HTTP or HTTPS against this endpoint.
|
||||||
|
@ -147,6 +166,7 @@ func NewTarget(address string, deadline time.Duration, baseLabels clientmodel.La
|
||||||
Deadline: deadline,
|
Deadline: deadline,
|
||||||
baseLabels: baseLabels,
|
baseLabels: baseLabels,
|
||||||
httpClient: utility.NewDeadlineClient(deadline),
|
httpClient: utility.NewDeadlineClient(deadline),
|
||||||
|
stopScraper: make(chan bool),
|
||||||
}
|
}
|
||||||
|
|
||||||
return target
|
return target
|
||||||
|
@ -177,24 +197,40 @@ func (t *target) recordScrapeHealth(ingester extraction.Ingester, timestamp clie
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *target) Scrape(ingester extraction.Ingester) error {
|
func (t *target) RunScraper(ingester extraction.Ingester, interval time.Duration) {
|
||||||
now := clientmodel.Now()
|
jitterTimer := time.NewTimer(time.Duration(float64(interval) * rand.Float64()))
|
||||||
err := t.scrape(now, ingester)
|
select {
|
||||||
if err == nil {
|
case <-jitterTimer.C:
|
||||||
t.state = ALIVE
|
case <-t.stopScraper:
|
||||||
t.recordScrapeHealth(ingester, now, true)
|
return
|
||||||
} else {
|
|
||||||
t.state = UNREACHABLE
|
|
||||||
t.recordScrapeHealth(ingester, now, false)
|
|
||||||
}
|
}
|
||||||
|
jitterTimer.Stop()
|
||||||
|
|
||||||
|
ticker := time.NewTicker(interval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
t.lastScrape = time.Now()
|
t.lastScrape = time.Now()
|
||||||
t.lastError = err
|
t.scrape(ingester)
|
||||||
return err
|
for {
|
||||||
|
select {
|
||||||
|
case <-ticker.C:
|
||||||
|
targetIntervalLength.WithLabelValues(interval.String()).Observe(float64(time.Since(t.lastScrape) / time.Second))
|
||||||
|
t.lastScrape = time.Now()
|
||||||
|
t.scrape(ingester)
|
||||||
|
case <-t.stopScraper:
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *target) StopScraper() {
|
||||||
|
t.stopScraper <- true
|
||||||
}
|
}
|
||||||
|
|
||||||
const acceptHeader = `application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited;q=0.7,text/plain;version=0.0.4;q=0.3,application/json;schema=prometheus/telemetry;version=0.0.2;q=0.2,*/*;q=0.1`
|
const acceptHeader = `application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited;q=0.7,text/plain;version=0.0.4;q=0.3,application/json;schema=prometheus/telemetry;version=0.0.2;q=0.2,*/*;q=0.1`
|
||||||
|
|
||||||
func (t *target) scrape(timestamp clientmodel.Timestamp, ingester extraction.Ingester) (err error) {
|
func (t *target) scrape(ingester extraction.Ingester) (err error) {
|
||||||
|
timestamp := clientmodel.Now()
|
||||||
defer func(start time.Time) {
|
defer func(start time.Time) {
|
||||||
ms := float64(time.Since(start)) / float64(time.Millisecond)
|
ms := float64(time.Since(start)) / float64(time.Millisecond)
|
||||||
labels := prometheus.Labels{
|
labels := prometheus.Labels{
|
||||||
|
@ -202,11 +238,16 @@ func (t *target) scrape(timestamp clientmodel.Timestamp, ingester extraction.Ing
|
||||||
instance: t.Address(),
|
instance: t.Address(),
|
||||||
outcome: success,
|
outcome: success,
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err == nil {
|
||||||
|
t.state = ALIVE
|
||||||
|
t.recordScrapeHealth(ingester, timestamp, true)
|
||||||
labels[outcome] = failure
|
labels[outcome] = failure
|
||||||
|
} else {
|
||||||
|
t.state = UNREACHABLE
|
||||||
|
t.recordScrapeHealth(ingester, timestamp, false)
|
||||||
}
|
}
|
||||||
|
|
||||||
targetOperationLatencies.With(labels).Observe(ms)
|
targetOperationLatencies.With(labels).Observe(ms)
|
||||||
|
t.lastError = err
|
||||||
}(time.Now())
|
}(time.Now())
|
||||||
|
|
||||||
req, err := http.NewRequest("GET", t.Address(), nil)
|
req, err := http.NewRequest("GET", t.Address(), nil)
|
||||||
|
@ -292,7 +333,3 @@ func (t *target) Merge(newTarget Target) {
|
||||||
}
|
}
|
||||||
|
|
||||||
type targets []Target
|
type targets []Target
|
||||||
|
|
||||||
func (t targets) Len() int {
|
|
||||||
return len(t)
|
|
||||||
}
|
|
||||||
|
|
|
@ -42,7 +42,7 @@ func TestTargetScrapeUpdatesState(t *testing.T) {
|
||||||
address: "bad schema",
|
address: "bad schema",
|
||||||
httpClient: utility.NewDeadlineClient(0),
|
httpClient: utility.NewDeadlineClient(0),
|
||||||
}
|
}
|
||||||
testTarget.Scrape(nopIngester{})
|
testTarget.scrape(nopIngester{})
|
||||||
if testTarget.state != UNREACHABLE {
|
if testTarget.state != UNREACHABLE {
|
||||||
t.Errorf("Expected target state %v, actual: %v", UNREACHABLE, testTarget.state)
|
t.Errorf("Expected target state %v, actual: %v", UNREACHABLE, testTarget.state)
|
||||||
}
|
}
|
||||||
|
@ -100,7 +100,7 @@ func TestTargetScrapeTimeout(t *testing.T) {
|
||||||
|
|
||||||
// scrape once without timeout
|
// scrape once without timeout
|
||||||
signal <- true
|
signal <- true
|
||||||
if err := testTarget.Scrape(ingester); err != nil {
|
if err := testTarget.scrape(ingester); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -109,12 +109,12 @@ func TestTargetScrapeTimeout(t *testing.T) {
|
||||||
|
|
||||||
// now scrape again
|
// now scrape again
|
||||||
signal <- true
|
signal <- true
|
||||||
if err := testTarget.Scrape(ingester); err != nil {
|
if err := testTarget.scrape(ingester); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// now timeout
|
// now timeout
|
||||||
if err := testTarget.Scrape(ingester); err == nil {
|
if err := testTarget.scrape(ingester); err == nil {
|
||||||
t.Fatal("expected scrape to timeout")
|
t.Fatal("expected scrape to timeout")
|
||||||
} else {
|
} else {
|
||||||
signal <- true // let handler continue
|
signal <- true // let handler continue
|
||||||
|
@ -122,7 +122,7 @@ func TestTargetScrapeTimeout(t *testing.T) {
|
||||||
|
|
||||||
// now scrape again without timeout
|
// now scrape again without timeout
|
||||||
signal <- true
|
signal <- true
|
||||||
if err := testTarget.Scrape(ingester); err != nil {
|
if err := testTarget.scrape(ingester); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -138,8 +138,34 @@ func TestTargetScrape404(t *testing.T) {
|
||||||
ingester := nopIngester{}
|
ingester := nopIngester{}
|
||||||
|
|
||||||
want := errors.New("server returned HTTP status 404 Not Found")
|
want := errors.New("server returned HTTP status 404 Not Found")
|
||||||
got := testTarget.Scrape(ingester)
|
got := testTarget.scrape(ingester)
|
||||||
if got == nil || want.Error() != got.Error() {
|
if got == nil || want.Error() != got.Error() {
|
||||||
t.Fatalf("want err %q, got %q", want, got)
|
t.Fatalf("want err %q, got %q", want, got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestTargetRunScraperScrapes(t *testing.T) {
|
||||||
|
testTarget := target{
|
||||||
|
state: UNKNOWN,
|
||||||
|
address: "bad schema",
|
||||||
|
httpClient: utility.NewDeadlineClient(0),
|
||||||
|
stopScraper: make(chan bool, 1),
|
||||||
|
}
|
||||||
|
go testTarget.RunScraper(nopIngester{}, time.Duration(time.Millisecond))
|
||||||
|
|
||||||
|
// Enough time for a scrape to happen.
|
||||||
|
time.Sleep(2 * time.Millisecond)
|
||||||
|
if testTarget.lastScrape.IsZero() {
|
||||||
|
t.Errorf("Scrape hasn't occured.")
|
||||||
|
}
|
||||||
|
|
||||||
|
testTarget.StopScraper()
|
||||||
|
// Wait for it to take effect.
|
||||||
|
time.Sleep(2 * time.Millisecond)
|
||||||
|
last := testTarget.lastScrape
|
||||||
|
// Enough time for a scrape to happen.
|
||||||
|
time.Sleep(2 * time.Millisecond)
|
||||||
|
if testTarget.lastScrape != last {
|
||||||
|
t.Errorf("Scrape occured after it was stopped.")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -23,8 +23,6 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
type TargetManager interface {
|
type TargetManager interface {
|
||||||
acquire()
|
|
||||||
release()
|
|
||||||
AddTarget(job config.JobConfig, t Target)
|
AddTarget(job config.JobConfig, t Target)
|
||||||
ReplaceTargets(job config.JobConfig, newTargets []Target)
|
ReplaceTargets(job config.JobConfig, newTargets []Target)
|
||||||
Remove(t Target)
|
Remove(t Target)
|
||||||
|
@ -34,27 +32,17 @@ type TargetManager interface {
|
||||||
}
|
}
|
||||||
|
|
||||||
type targetManager struct {
|
type targetManager struct {
|
||||||
requestAllowance chan bool
|
|
||||||
poolsByJob map[string]*TargetPool
|
poolsByJob map[string]*TargetPool
|
||||||
ingester extraction.Ingester
|
ingester extraction.Ingester
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewTargetManager(ingester extraction.Ingester, requestAllowance int) TargetManager {
|
func NewTargetManager(ingester extraction.Ingester) TargetManager {
|
||||||
return &targetManager{
|
return &targetManager{
|
||||||
requestAllowance: make(chan bool, requestAllowance),
|
|
||||||
ingester: ingester,
|
ingester: ingester,
|
||||||
poolsByJob: make(map[string]*TargetPool),
|
poolsByJob: make(map[string]*TargetPool),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *targetManager) acquire() {
|
|
||||||
m.requestAllowance <- true
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *targetManager) release() {
|
|
||||||
<-m.requestAllowance
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *targetManager) TargetPoolForJob(job config.JobConfig) *TargetPool {
|
func (m *targetManager) TargetPoolForJob(job config.JobConfig) *TargetPool {
|
||||||
targetPool, ok := m.poolsByJob[job.GetName()]
|
targetPool, ok := m.poolsByJob[job.GetName()]
|
||||||
|
|
||||||
|
@ -64,13 +52,13 @@ func (m *targetManager) TargetPoolForJob(job config.JobConfig) *TargetPool {
|
||||||
provider = NewSdTargetProvider(job)
|
provider = NewSdTargetProvider(job)
|
||||||
}
|
}
|
||||||
|
|
||||||
targetPool = NewTargetPool(m, provider)
|
interval := job.ScrapeInterval()
|
||||||
|
targetPool = NewTargetPool(m, provider, m.ingester, interval)
|
||||||
glog.Infof("Pool for job %s does not exist; creating and starting...", job.GetName())
|
glog.Infof("Pool for job %s does not exist; creating and starting...", job.GetName())
|
||||||
|
|
||||||
interval := job.ScrapeInterval()
|
|
||||||
m.poolsByJob[job.GetName()] = targetPool
|
m.poolsByJob[job.GetName()] = targetPool
|
||||||
// BUG(all): Investigate whether this auto-goroutine creation is desired.
|
// BUG(all): Investigate whether this auto-goroutine creation is desired.
|
||||||
go targetPool.Run(m.ingester, interval)
|
go targetPool.Run()
|
||||||
}
|
}
|
||||||
|
|
||||||
return targetPool
|
return targetPool
|
||||||
|
@ -84,7 +72,7 @@ func (m *targetManager) AddTarget(job config.JobConfig, t Target) {
|
||||||
|
|
||||||
func (m *targetManager) ReplaceTargets(job config.JobConfig, newTargets []Target) {
|
func (m *targetManager) ReplaceTargets(job config.JobConfig, newTargets []Target) {
|
||||||
targetPool := m.TargetPoolForJob(job)
|
targetPool := m.TargetPoolForJob(job)
|
||||||
targetPool.replaceTargets(newTargets)
|
targetPool.ReplaceTargets(newTargets)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m targetManager) Remove(t Target) {
|
func (m targetManager) Remove(t Target) {
|
||||||
|
|
|
@ -30,9 +30,8 @@ import (
|
||||||
|
|
||||||
type fakeTarget struct {
|
type fakeTarget struct {
|
||||||
scrapeCount int
|
scrapeCount int
|
||||||
schedules []time.Time
|
lastScrape time.Time
|
||||||
interval time.Duration
|
interval time.Duration
|
||||||
scheduleIndex int
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t fakeTarget) LastError() error {
|
func (t fakeTarget) LastError() error {
|
||||||
|
@ -55,33 +54,32 @@ func (t fakeTarget) Interval() time.Duration {
|
||||||
return t.interval
|
return t.interval
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *fakeTarget) Scrape(i extraction.Ingester) error {
|
func (t fakeTarget) LastScrape() time.Time {
|
||||||
|
return t.lastScrape
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t fakeTarget) scrape(i extraction.Ingester) error {
|
||||||
t.scrapeCount++
|
t.scrapeCount++
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (t fakeTarget) RunScraper(ingester extraction.Ingester, interval time.Duration) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t fakeTarget) StopScraper() {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
func (t fakeTarget) State() TargetState {
|
func (t fakeTarget) State() TargetState {
|
||||||
return ALIVE
|
return ALIVE
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t fakeTarget) LastScrape() time.Time {
|
|
||||||
return time.Now()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *fakeTarget) ScheduledFor() (time time.Time) {
|
|
||||||
time = t.schedules[t.scheduleIndex]
|
|
||||||
t.scheduleIndex++
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *fakeTarget) Merge(newTarget Target) {}
|
func (t *fakeTarget) Merge(newTarget Target) {}
|
||||||
|
|
||||||
func (t *fakeTarget) EstimatedTimeToExecute() time.Duration { return 0 }
|
|
||||||
|
|
||||||
func testTargetManager(t testing.TB) {
|
func testTargetManager(t testing.TB) {
|
||||||
targetManager := NewTargetManager(nopIngester{}, 3)
|
targetManager := NewTargetManager(nopIngester{})
|
||||||
testJob1 := config.JobConfig{
|
testJob1 := config.JobConfig{
|
||||||
JobConfig: pb.JobConfig{
|
JobConfig: pb.JobConfig{
|
||||||
Name: proto.String("test_job1"),
|
Name: proto.String("test_job1"),
|
||||||
|
@ -96,11 +94,9 @@ func testTargetManager(t testing.TB) {
|
||||||
}
|
}
|
||||||
|
|
||||||
target1GroupA := &fakeTarget{
|
target1GroupA := &fakeTarget{
|
||||||
schedules: []time.Time{time.Now()},
|
|
||||||
interval: time.Minute,
|
interval: time.Minute,
|
||||||
}
|
}
|
||||||
target2GroupA := &fakeTarget{
|
target2GroupA := &fakeTarget{
|
||||||
schedules: []time.Time{time.Now()},
|
|
||||||
interval: time.Minute,
|
interval: time.Minute,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -108,7 +104,6 @@ func testTargetManager(t testing.TB) {
|
||||||
targetManager.AddTarget(testJob1, target2GroupA)
|
targetManager.AddTarget(testJob1, target2GroupA)
|
||||||
|
|
||||||
target1GroupB := &fakeTarget{
|
target1GroupB := &fakeTarget{
|
||||||
schedules: []time.Time{time.Now()},
|
|
||||||
interval: time.Minute * 2,
|
interval: time.Minute * 2,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,75 +19,69 @@ import (
|
||||||
|
|
||||||
"github.com/golang/glog"
|
"github.com/golang/glog"
|
||||||
"github.com/prometheus/client_golang/extraction"
|
"github.com/prometheus/client_golang/extraction"
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/prometheus/utility"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
targetAddQueueSize = 100
|
targetAddQueueSize = 100
|
||||||
targetReplaceQueueSize = 1
|
targetReplaceQueueSize = 1
|
||||||
|
|
||||||
intervalKey = "interval"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
|
||||||
retrievalDurations = prometheus.NewSummaryVec(
|
|
||||||
prometheus.SummaryOpts{
|
|
||||||
Namespace: namespace,
|
|
||||||
Name: "targetpool_retrieve_time_milliseconds",
|
|
||||||
Help: "The time needed for each TargetPool to retrieve state from all included entities.",
|
|
||||||
Objectives: []float64{0.01, 0.05, 0.5, 0.90, 0.99},
|
|
||||||
},
|
|
||||||
[]string{intervalKey},
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
prometheus.MustRegister(retrievalDurations)
|
|
||||||
}
|
|
||||||
|
|
||||||
type TargetPool struct {
|
type TargetPool struct {
|
||||||
sync.RWMutex
|
sync.RWMutex
|
||||||
|
|
||||||
done chan bool
|
done chan chan bool
|
||||||
manager TargetManager
|
manager TargetManager
|
||||||
targets targets
|
targetsByAddress map[string]Target
|
||||||
|
interval time.Duration
|
||||||
|
ingester extraction.Ingester
|
||||||
addTargetQueue chan Target
|
addTargetQueue chan Target
|
||||||
replaceTargetsQueue chan targets
|
|
||||||
|
|
||||||
targetProvider TargetProvider
|
targetProvider TargetProvider
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewTargetPool(m TargetManager, p TargetProvider) *TargetPool {
|
func NewTargetPool(m TargetManager, p TargetProvider, ing extraction.Ingester, i time.Duration) *TargetPool {
|
||||||
return &TargetPool{
|
return &TargetPool{
|
||||||
manager: m,
|
manager: m,
|
||||||
|
interval: i,
|
||||||
|
ingester: ing,
|
||||||
|
targetsByAddress: make(map[string]Target),
|
||||||
addTargetQueue: make(chan Target, targetAddQueueSize),
|
addTargetQueue: make(chan Target, targetAddQueueSize),
|
||||||
replaceTargetsQueue: make(chan targets, targetReplaceQueueSize),
|
|
||||||
targetProvider: p,
|
targetProvider: p,
|
||||||
done: make(chan bool),
|
done: make(chan chan bool),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *TargetPool) Run(ingester extraction.Ingester, interval time.Duration) {
|
func (p *TargetPool) Run() {
|
||||||
ticker := time.NewTicker(interval)
|
ticker := time.NewTicker(p.interval)
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
p.runIteration(ingester, interval)
|
if p.targetProvider != nil {
|
||||||
|
targets, err := p.targetProvider.Targets()
|
||||||
|
if err != nil {
|
||||||
|
glog.Warningf("Error looking up targets, keeping old list: %s", err)
|
||||||
|
} else {
|
||||||
|
p.ReplaceTargets(targets)
|
||||||
|
}
|
||||||
|
}
|
||||||
case newTarget := <-p.addTargetQueue:
|
case newTarget := <-p.addTargetQueue:
|
||||||
p.addTarget(newTarget)
|
p.addTarget(newTarget)
|
||||||
case newTargets := <-p.replaceTargetsQueue:
|
case stopped := <-p.done:
|
||||||
p.replaceTargets(newTargets)
|
p.ReplaceTargets([]Target{})
|
||||||
case <-p.done:
|
|
||||||
glog.Info("TargetPool exiting...")
|
glog.Info("TargetPool exiting...")
|
||||||
|
stopped <- true
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p TargetPool) Stop() {
|
func (p TargetPool) Stop() {
|
||||||
p.done <- true
|
stopped := make(chan bool)
|
||||||
|
p.done <- stopped
|
||||||
|
<-stopped
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *TargetPool) AddTarget(target Target) {
|
func (p *TargetPool) AddTarget(target Target) {
|
||||||
|
@ -98,85 +92,45 @@ func (p *TargetPool) addTarget(target Target) {
|
||||||
p.Lock()
|
p.Lock()
|
||||||
defer p.Unlock()
|
defer p.Unlock()
|
||||||
|
|
||||||
p.targets = append(p.targets, target)
|
p.targetsByAddress[target.Address()] = target
|
||||||
|
go target.RunScraper(p.ingester, p.interval)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *TargetPool) ReplaceTargets(newTargets []Target) {
|
func (p *TargetPool) ReplaceTargets(newTargets []Target) {
|
||||||
p.Lock()
|
p.Lock()
|
||||||
defer p.Unlock()
|
defer p.Unlock()
|
||||||
|
|
||||||
// If there is anything remaining in the queue for effectuation, clear it out,
|
|
||||||
// because the last mutation should win.
|
|
||||||
select {
|
|
||||||
case <-p.replaceTargetsQueue:
|
|
||||||
default:
|
|
||||||
p.replaceTargetsQueue <- newTargets
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *TargetPool) replaceTargets(newTargets []Target) {
|
|
||||||
p.Lock()
|
|
||||||
defer p.Unlock()
|
|
||||||
|
|
||||||
// Replace old target list by new one, but reuse those targets from the old
|
// Replace old target list by new one, but reuse those targets from the old
|
||||||
// list of targets which are also in the new list (to preserve scheduling and
|
// list of targets which are also in the new list (to preserve scheduling and
|
||||||
// health state).
|
// health state).
|
||||||
for j, newTarget := range newTargets {
|
newTargetAddresses := make(utility.Set)
|
||||||
for _, oldTarget := range p.targets {
|
for _, newTarget := range newTargets {
|
||||||
if oldTarget.Address() == newTarget.Address() {
|
newTargetAddresses.Add(newTarget.Address())
|
||||||
oldTarget.Merge(newTargets[j])
|
oldTarget, ok := p.targetsByAddress[newTarget.Address()]
|
||||||
newTargets[j] = oldTarget
|
if ok {
|
||||||
}
|
oldTarget.Merge(newTarget)
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
p.targets = newTargets
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *TargetPool) runSingle(ingester extraction.Ingester, t Target) {
|
|
||||||
p.manager.acquire()
|
|
||||||
defer p.manager.release()
|
|
||||||
|
|
||||||
t.Scrape(ingester)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *TargetPool) runIteration(ingester extraction.Ingester, interval time.Duration) {
|
|
||||||
if p.targetProvider != nil {
|
|
||||||
targets, err := p.targetProvider.Targets()
|
|
||||||
if err != nil {
|
|
||||||
glog.Warningf("Error looking up targets, keeping old list: %s", err)
|
|
||||||
} else {
|
} else {
|
||||||
p.ReplaceTargets(targets)
|
p.targetsByAddress[newTarget.Address()] = newTarget
|
||||||
|
go newTarget.RunScraper(p.ingester, p.interval)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Stop any targets no longer present.
|
||||||
p.RLock()
|
for k, oldTarget := range p.targetsByAddress {
|
||||||
defer p.RUnlock()
|
if !newTargetAddresses.Has(k) {
|
||||||
|
glog.V(1).Info("Stopping scraper for target ", k)
|
||||||
begin := time.Now()
|
oldTarget.StopScraper()
|
||||||
wait := sync.WaitGroup{}
|
delete(p.targetsByAddress, k)
|
||||||
|
}
|
||||||
for _, target := range p.targets {
|
|
||||||
wait.Add(1)
|
|
||||||
|
|
||||||
go func(t Target) {
|
|
||||||
p.runSingle(ingester, t)
|
|
||||||
wait.Done()
|
|
||||||
}(target)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
wait.Wait()
|
|
||||||
|
|
||||||
duration := float64(time.Since(begin) / time.Millisecond)
|
|
||||||
retrievalDurations.WithLabelValues(interval.String()).Observe(duration)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *TargetPool) Targets() []Target {
|
func (p *TargetPool) Targets() []Target {
|
||||||
p.RLock()
|
p.RLock()
|
||||||
defer p.RUnlock()
|
defer p.RUnlock()
|
||||||
|
|
||||||
targets := make([]Target, len(p.targets))
|
targets := make([]Target, 0, len(p.targetsByAddress))
|
||||||
copy(targets, p.targets)
|
for _, v := range p.targetsByAddress {
|
||||||
|
targets = append(targets, v)
|
||||||
|
}
|
||||||
return targets
|
return targets
|
||||||
}
|
}
|
||||||
|
|
|
@ -77,7 +77,7 @@ func testTargetPool(t testing.TB) {
|
||||||
}
|
}
|
||||||
|
|
||||||
for i, scenario := range scenarios {
|
for i, scenario := range scenarios {
|
||||||
pool := TargetPool{}
|
pool := NewTargetPool(nil, nil, nopIngester{}, time.Duration(1))
|
||||||
|
|
||||||
for _, input := range scenario.inputs {
|
for _, input := range scenario.inputs {
|
||||||
target := target{
|
target := target{
|
||||||
|
@ -87,11 +87,11 @@ func testTargetPool(t testing.TB) {
|
||||||
pool.addTarget(&target)
|
pool.addTarget(&target)
|
||||||
}
|
}
|
||||||
|
|
||||||
if pool.targets.Len() != len(scenario.outputs) {
|
if len(pool.targetsByAddress) != len(scenario.outputs) {
|
||||||
t.Errorf("%s %d. expected TargetPool size to be %d but was %d", scenario.name, i, len(scenario.outputs), pool.targets.Len())
|
t.Errorf("%s %d. expected TargetPool size to be %d but was %d", scenario.name, i, len(scenario.outputs), len(pool.targetsByAddress))
|
||||||
} else {
|
} else {
|
||||||
for j, output := range scenario.outputs {
|
for j, output := range scenario.outputs {
|
||||||
target := pool.targets[j]
|
target := pool.Targets()[j]
|
||||||
|
|
||||||
if target.Address() != output.address {
|
if target.Address() != output.address {
|
||||||
t.Errorf("%s %d.%d. expected Target address to be %s but was %s", scenario.name, i, j, output.address, target.Address())
|
t.Errorf("%s %d.%d. expected Target address to be %s but was %s", scenario.name, i, j, output.address, target.Address())
|
||||||
|
@ -99,8 +99,8 @@ func testTargetPool(t testing.TB) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if pool.targets.Len() != len(scenario.outputs) {
|
if len(pool.targetsByAddress) != len(scenario.outputs) {
|
||||||
t.Errorf("%s %d. expected to repopulated with %d elements, got %d", scenario.name, i, len(scenario.outputs), pool.targets.Len())
|
t.Errorf("%s %d. expected to repopulated with %d elements, got %d", scenario.name, i, len(scenario.outputs), len(pool.targetsByAddress))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -111,41 +111,48 @@ func TestTargetPool(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestTargetPoolReplaceTargets(t *testing.T) {
|
func TestTargetPoolReplaceTargets(t *testing.T) {
|
||||||
pool := TargetPool{}
|
pool := NewTargetPool(nil, nil, nopIngester{}, time.Duration(1))
|
||||||
oldTarget1 := &target{
|
oldTarget1 := &target{
|
||||||
address: "http://example1.com/metrics.json",
|
address: "example1",
|
||||||
state: UNREACHABLE,
|
state: UNREACHABLE,
|
||||||
|
stopScraper: make(chan bool, 1),
|
||||||
}
|
}
|
||||||
oldTarget2 := &target{
|
oldTarget2 := &target{
|
||||||
address: "http://example2.com/metrics.json",
|
address: "example2",
|
||||||
state: UNREACHABLE,
|
state: UNREACHABLE,
|
||||||
|
stopScraper: make(chan bool, 1),
|
||||||
}
|
}
|
||||||
newTarget1 := &target{
|
newTarget1 := &target{
|
||||||
address: "http://example1.com/metrics.json",
|
address: "example1",
|
||||||
state: ALIVE,
|
state: ALIVE,
|
||||||
|
stopScraper: make(chan bool, 1),
|
||||||
}
|
}
|
||||||
newTarget2 := &target{
|
newTarget2 := &target{
|
||||||
address: "http://example3.com/metrics.json",
|
address: "example3",
|
||||||
state: ALIVE,
|
state: ALIVE,
|
||||||
|
stopScraper: make(chan bool, 1),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
oldTarget1.StopScraper()
|
||||||
|
oldTarget2.StopScraper()
|
||||||
|
newTarget2.StopScraper()
|
||||||
|
|
||||||
pool.addTarget(oldTarget1)
|
pool.addTarget(oldTarget1)
|
||||||
pool.addTarget(oldTarget2)
|
pool.addTarget(oldTarget2)
|
||||||
|
|
||||||
pool.replaceTargets([]Target{newTarget1, newTarget2})
|
pool.ReplaceTargets([]Target{newTarget1, newTarget2})
|
||||||
|
|
||||||
if pool.targets.Len() != 2 {
|
if len(pool.targetsByAddress) != 2 {
|
||||||
t.Errorf("Expected 2 elements in pool, had %d", pool.targets.Len())
|
t.Errorf("Expected 2 elements in pool, had %d", len(pool.targetsByAddress))
|
||||||
}
|
}
|
||||||
|
|
||||||
target1 := pool.targets[0].(*target)
|
if pool.targetsByAddress["example1"].State() != oldTarget1.State() {
|
||||||
if target1.state != oldTarget1.state {
|
t.Errorf("target1 channel has changed")
|
||||||
t.Errorf("Wrong first target returned from pool, expected %v, got %v", oldTarget1, target1)
|
|
||||||
}
|
}
|
||||||
target2 := pool.targets[1].(*target)
|
if pool.targetsByAddress["example3"].State() == oldTarget2.State() {
|
||||||
if target2.state != newTarget2.state {
|
t.Errorf("newTarget2 channel same as oldTarget2's")
|
||||||
t.Errorf("Wrong second target returned from pool, expected %v, got %v", newTarget2, target2)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkTargetPool(b *testing.B) {
|
func BenchmarkTargetPool(b *testing.B) {
|
||||||
|
|
Loading…
Reference in a new issue