mirror of
https://github.com/prometheus/prometheus.git
synced 2024-12-25 05:34:05 -08:00
Add per scrape-config targets limit (#7554)
* Add per scrape-config targets limit Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu>
This commit is contained in:
parent
01e3bfcd1a
commit
f482c7bdd7
|
@ -379,8 +379,11 @@ type ScrapeConfig struct {
|
||||||
MetricsPath string `yaml:"metrics_path,omitempty"`
|
MetricsPath string `yaml:"metrics_path,omitempty"`
|
||||||
// The URL scheme with which to fetch metrics from targets.
|
// The URL scheme with which to fetch metrics from targets.
|
||||||
Scheme string `yaml:"scheme,omitempty"`
|
Scheme string `yaml:"scheme,omitempty"`
|
||||||
// More than this many samples post metric-relabelling will cause the scrape to fail.
|
// More than this many samples post metric-relabeling will cause the scrape to fail.
|
||||||
SampleLimit uint `yaml:"sample_limit,omitempty"`
|
SampleLimit uint `yaml:"sample_limit,omitempty"`
|
||||||
|
// More than this many targets after the target relabeling will cause the
|
||||||
|
// scrapes to fail.
|
||||||
|
TargetLimit uint `yaml:"target_limit,omitempty"`
|
||||||
|
|
||||||
// We cannot do proper Go type embedding below as the parser will then parse
|
// We cannot do proper Go type embedding below as the parser will then parse
|
||||||
// values arbitrarily into the overflow maps of further-down types.
|
// values arbitrarily into the overflow maps of further-down types.
|
||||||
|
|
|
@ -252,9 +252,16 @@ metric_relabel_configs:
|
||||||
[ - <relabel_config> ... ]
|
[ - <relabel_config> ... ]
|
||||||
|
|
||||||
# Per-scrape limit on number of scraped samples that will be accepted.
|
# Per-scrape limit on number of scraped samples that will be accepted.
|
||||||
# If more than this number of samples are present after metric relabelling
|
# If more than this number of samples are present after metric relabeling
|
||||||
# the entire scrape will be treated as failed. 0 means no limit.
|
# the entire scrape will be treated as failed. 0 means no limit.
|
||||||
[ sample_limit: <int> | default = 0 ]
|
[ sample_limit: <int> | default = 0 ]
|
||||||
|
|
||||||
|
# Per-scrape config limit on number of unique targets that will be
|
||||||
|
# accepted. If more than this number of targets are present after target
|
||||||
|
# relabeling, Prometheus will mark the targets as failed without scraping them.
|
||||||
|
# 0 means no limit. This is an experimental feature, this behaviour could
|
||||||
|
# change in the future.
|
||||||
|
[ target_limit: <int> | default = 0 ]
|
||||||
```
|
```
|
||||||
|
|
||||||
Where `<job_name>` must be unique across all scrape configurations.
|
Where `<job_name>` must be unique across all scrape configurations.
|
||||||
|
|
|
@ -259,6 +259,20 @@
|
||||||
description: 'Prometheus %(prometheusName)s has missed {{ printf "%%.0f" $value }} rule group evaluations in the last 5m.' % $._config,
|
description: 'Prometheus %(prometheusName)s has missed {{ printf "%%.0f" $value }} rule group evaluations in the last 5m.' % $._config,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
alert: 'PrometheusTargetLimitHit',
|
||||||
|
expr: |||
|
||||||
|
increase(prometheus_target_scrape_pool_exceeded_target_limit_total{%(prometheusSelector)s}[5m]) > 0
|
||||||
|
||| % $._config,
|
||||||
|
'for': '15m',
|
||||||
|
labels: {
|
||||||
|
severity: 'warning',
|
||||||
|
},
|
||||||
|
annotations: {
|
||||||
|
summary: 'Prometheus has dropped targets because some scrape configs have exceeded the targets limit.',
|
||||||
|
description: 'Prometheus %(prometheusName)s has dropped {{ printf "%%.0f" $value }} targets because the number of targets exceeded the configured target_limit.' % $._config,
|
||||||
|
},
|
||||||
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
|
181
scrape/scrape.go
181
scrape/scrape.go
|
@ -81,15 +81,35 @@ var (
|
||||||
targetScrapePoolReloads = prometheus.NewCounter(
|
targetScrapePoolReloads = prometheus.NewCounter(
|
||||||
prometheus.CounterOpts{
|
prometheus.CounterOpts{
|
||||||
Name: "prometheus_target_scrape_pool_reloads_total",
|
Name: "prometheus_target_scrape_pool_reloads_total",
|
||||||
Help: "Total number of scrape loop reloads.",
|
Help: "Total number of scrape pool reloads.",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
targetScrapePoolReloadsFailed = prometheus.NewCounter(
|
targetScrapePoolReloadsFailed = prometheus.NewCounter(
|
||||||
prometheus.CounterOpts{
|
prometheus.CounterOpts{
|
||||||
Name: "prometheus_target_scrape_pool_reloads_failed_total",
|
Name: "prometheus_target_scrape_pool_reloads_failed_total",
|
||||||
Help: "Total number of failed scrape loop reloads.",
|
Help: "Total number of failed scrape pool reloads.",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
targetScrapePoolExceededTargetLimit = prometheus.NewCounter(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Name: "prometheus_target_scrape_pool_exceeded_target_limit_total",
|
||||||
|
Help: "Total number of times scrape pools hit the target limit, during sync or config reload.",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
targetScrapePoolTargetLimit = prometheus.NewGaugeVec(
|
||||||
|
prometheus.GaugeOpts{
|
||||||
|
Name: "prometheus_target_scrape_pool_target_limit",
|
||||||
|
Help: "Maximum number of targets allowed in this scrape pool.",
|
||||||
|
},
|
||||||
|
[]string{"scrape_job"},
|
||||||
|
)
|
||||||
|
targetScrapePoolTargetsAdded = prometheus.NewGaugeVec(
|
||||||
|
prometheus.GaugeOpts{
|
||||||
|
Name: "prometheus_target_scrape_pool_targets",
|
||||||
|
Help: "Current number of targets in this scrape pool.",
|
||||||
|
},
|
||||||
|
[]string{"scrape_job"},
|
||||||
|
)
|
||||||
targetSyncIntervalLength = prometheus.NewSummaryVec(
|
targetSyncIntervalLength = prometheus.NewSummaryVec(
|
||||||
prometheus.SummaryOpts{
|
prometheus.SummaryOpts{
|
||||||
Name: "prometheus_target_sync_length_seconds",
|
Name: "prometheus_target_sync_length_seconds",
|
||||||
|
@ -151,6 +171,9 @@ func init() {
|
||||||
targetScrapeSampleDuplicate,
|
targetScrapeSampleDuplicate,
|
||||||
targetScrapeSampleOutOfOrder,
|
targetScrapeSampleOutOfOrder,
|
||||||
targetScrapeSampleOutOfBounds,
|
targetScrapeSampleOutOfBounds,
|
||||||
|
targetScrapePoolExceededTargetLimit,
|
||||||
|
targetScrapePoolTargetLimit,
|
||||||
|
targetScrapePoolTargetsAdded,
|
||||||
targetScrapeCacheFlushForced,
|
targetScrapeCacheFlushForced,
|
||||||
targetMetadataCache,
|
targetMetadataCache,
|
||||||
)
|
)
|
||||||
|
@ -170,6 +193,7 @@ type scrapePool struct {
|
||||||
droppedTargets []*Target
|
droppedTargets []*Target
|
||||||
loops map[uint64]loop
|
loops map[uint64]loop
|
||||||
cancel context.CancelFunc
|
cancel context.CancelFunc
|
||||||
|
targetLimitHit bool // Internal state to speed up the target_limit checks.
|
||||||
|
|
||||||
// Constructor for new scrape loops. This is settable for testing convenience.
|
// Constructor for new scrape loops. This is settable for testing convenience.
|
||||||
newLoop func(scrapeLoopOptions) loop
|
newLoop func(scrapeLoopOptions) loop
|
||||||
|
@ -278,6 +302,13 @@ func (sp *scrapePool) stop() {
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
sp.client.CloseIdleConnections()
|
sp.client.CloseIdleConnections()
|
||||||
|
|
||||||
|
if sp.config != nil {
|
||||||
|
targetScrapePoolSyncsCounter.DeleteLabelValues(sp.config.JobName)
|
||||||
|
targetScrapePoolTargetLimit.DeleteLabelValues(sp.config.JobName)
|
||||||
|
targetScrapePoolTargetsAdded.DeleteLabelValues(sp.config.JobName)
|
||||||
|
targetSyncIntervalLength.DeleteLabelValues(sp.config.JobName)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// reload the scrape pool with the given scrape configuration. The target state is preserved
|
// reload the scrape pool with the given scrape configuration. The target state is preserved
|
||||||
|
@ -301,6 +332,8 @@ func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error {
|
||||||
oldClient := sp.client
|
oldClient := sp.client
|
||||||
sp.client = client
|
sp.client = client
|
||||||
|
|
||||||
|
targetScrapePoolTargetLimit.WithLabelValues(sp.config.JobName).Set(float64(sp.config.TargetLimit))
|
||||||
|
|
||||||
var (
|
var (
|
||||||
wg sync.WaitGroup
|
wg sync.WaitGroup
|
||||||
interval = time.Duration(sp.config.ScrapeInterval)
|
interval = time.Duration(sp.config.ScrapeInterval)
|
||||||
|
@ -311,6 +344,7 @@ func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error {
|
||||||
mrc = sp.config.MetricRelabelConfigs
|
mrc = sp.config.MetricRelabelConfigs
|
||||||
)
|
)
|
||||||
|
|
||||||
|
forcedErr := sp.refreshTargetLimitErr()
|
||||||
for fp, oldLoop := range sp.loops {
|
for fp, oldLoop := range sp.loops {
|
||||||
var cache *scrapeCache
|
var cache *scrapeCache
|
||||||
if oc := oldLoop.getCache(); reuseCache && oc != nil {
|
if oc := oldLoop.getCache(); reuseCache && oc != nil {
|
||||||
|
@ -338,6 +372,7 @@ func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error {
|
||||||
oldLoop.stop()
|
oldLoop.stop()
|
||||||
wg.Done()
|
wg.Done()
|
||||||
|
|
||||||
|
newLoop.setForcedError(forcedErr)
|
||||||
go newLoop.run(interval, timeout, nil)
|
go newLoop.run(interval, timeout, nil)
|
||||||
}(oldLoop, newLoop)
|
}(oldLoop, newLoop)
|
||||||
|
|
||||||
|
@ -355,10 +390,12 @@ func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error {
|
||||||
// Sync converts target groups into actual scrape targets and synchronizes
|
// Sync converts target groups into actual scrape targets and synchronizes
|
||||||
// the currently running scraper with the resulting set and returns all scraped and dropped targets.
|
// the currently running scraper with the resulting set and returns all scraped and dropped targets.
|
||||||
func (sp *scrapePool) Sync(tgs []*targetgroup.Group) {
|
func (sp *scrapePool) Sync(tgs []*targetgroup.Group) {
|
||||||
|
sp.mtx.Lock()
|
||||||
|
defer sp.mtx.Unlock()
|
||||||
|
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
|
||||||
var all []*Target
|
var all []*Target
|
||||||
sp.mtx.Lock()
|
|
||||||
sp.droppedTargets = []*Target{}
|
sp.droppedTargets = []*Target{}
|
||||||
for _, tg := range tgs {
|
for _, tg := range tgs {
|
||||||
targets, err := targetsFromGroup(tg, sp.config)
|
targets, err := targetsFromGroup(tg, sp.config)
|
||||||
|
@ -374,7 +411,6 @@ func (sp *scrapePool) Sync(tgs []*targetgroup.Group) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
sp.mtx.Unlock()
|
|
||||||
sp.sync(all)
|
sp.sync(all)
|
||||||
|
|
||||||
targetSyncIntervalLength.WithLabelValues(sp.config.JobName).Observe(
|
targetSyncIntervalLength.WithLabelValues(sp.config.JobName).Observe(
|
||||||
|
@ -387,11 +423,9 @@ func (sp *scrapePool) Sync(tgs []*targetgroup.Group) {
|
||||||
// scrape loops for new targets, and stops scrape loops for disappeared targets.
|
// scrape loops for new targets, and stops scrape loops for disappeared targets.
|
||||||
// It returns after all stopped scrape loops terminated.
|
// It returns after all stopped scrape loops terminated.
|
||||||
func (sp *scrapePool) sync(targets []*Target) {
|
func (sp *scrapePool) sync(targets []*Target) {
|
||||||
sp.mtx.Lock()
|
// This function expects that you have acquired the sp.mtx lock.
|
||||||
defer sp.mtx.Unlock()
|
|
||||||
|
|
||||||
var (
|
var (
|
||||||
uniqueTargets = map[uint64]struct{}{}
|
uniqueLoops = make(map[uint64]loop)
|
||||||
interval = time.Duration(sp.config.ScrapeInterval)
|
interval = time.Duration(sp.config.ScrapeInterval)
|
||||||
timeout = time.Duration(sp.config.ScrapeTimeout)
|
timeout = time.Duration(sp.config.ScrapeTimeout)
|
||||||
limit = int(sp.config.SampleLimit)
|
limit = int(sp.config.SampleLimit)
|
||||||
|
@ -403,7 +437,6 @@ func (sp *scrapePool) sync(targets []*Target) {
|
||||||
for _, t := range targets {
|
for _, t := range targets {
|
||||||
t := t
|
t := t
|
||||||
hash := t.hash()
|
hash := t.hash()
|
||||||
uniqueTargets[hash] = struct{}{}
|
|
||||||
|
|
||||||
if _, ok := sp.activeTargets[hash]; !ok {
|
if _, ok := sp.activeTargets[hash]; !ok {
|
||||||
s := &targetScraper{Target: t, client: sp.client, timeout: timeout}
|
s := &targetScraper{Target: t, client: sp.client, timeout: timeout}
|
||||||
|
@ -419,8 +452,12 @@ func (sp *scrapePool) sync(targets []*Target) {
|
||||||
sp.activeTargets[hash] = t
|
sp.activeTargets[hash] = t
|
||||||
sp.loops[hash] = l
|
sp.loops[hash] = l
|
||||||
|
|
||||||
go l.run(interval, timeout, nil)
|
uniqueLoops[hash] = l
|
||||||
} else {
|
} else {
|
||||||
|
// This might be a duplicated target.
|
||||||
|
if _, ok := uniqueLoops[hash]; !ok {
|
||||||
|
uniqueLoops[hash] = nil
|
||||||
|
}
|
||||||
// Need to keep the most updated labels information
|
// Need to keep the most updated labels information
|
||||||
// for displaying it in the Service Discovery web page.
|
// for displaying it in the Service Discovery web page.
|
||||||
sp.activeTargets[hash].SetDiscoveredLabels(t.DiscoveredLabels())
|
sp.activeTargets[hash].SetDiscoveredLabels(t.DiscoveredLabels())
|
||||||
|
@ -431,7 +468,7 @@ func (sp *scrapePool) sync(targets []*Target) {
|
||||||
|
|
||||||
// Stop and remove old targets and scraper loops.
|
// Stop and remove old targets and scraper loops.
|
||||||
for hash := range sp.activeTargets {
|
for hash := range sp.activeTargets {
|
||||||
if _, ok := uniqueTargets[hash]; !ok {
|
if _, ok := uniqueLoops[hash]; !ok {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func(l loop) {
|
go func(l loop) {
|
||||||
l.stop()
|
l.stop()
|
||||||
|
@ -443,6 +480,16 @@ func (sp *scrapePool) sync(targets []*Target) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
targetScrapePoolTargetsAdded.WithLabelValues(sp.config.JobName).Set(float64(len(uniqueLoops)))
|
||||||
|
forcedErr := sp.refreshTargetLimitErr()
|
||||||
|
for _, l := range sp.loops {
|
||||||
|
l.setForcedError(forcedErr)
|
||||||
|
}
|
||||||
|
for _, l := range uniqueLoops {
|
||||||
|
if l != nil {
|
||||||
|
go l.run(interval, timeout, nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
// Wait for all potentially stopped scrapers to terminate.
|
// Wait for all potentially stopped scrapers to terminate.
|
||||||
// This covers the case of flapping targets. If the server is under high load, a new scraper
|
// This covers the case of flapping targets. If the server is under high load, a new scraper
|
||||||
// may be active and tries to insert. The old scraper that didn't terminate yet could still
|
// may be active and tries to insert. The old scraper that didn't terminate yet could still
|
||||||
|
@ -450,6 +497,26 @@ func (sp *scrapePool) sync(targets []*Target) {
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// refreshTargetLimitErr returns an error that can be passed to the scrape loops
|
||||||
|
// if the number of targets exceeds the configured limit.
|
||||||
|
func (sp *scrapePool) refreshTargetLimitErr() error {
|
||||||
|
// This function expects that you have acquired the sp.mtx lock.
|
||||||
|
if sp.config == nil || sp.config.TargetLimit == 0 && !sp.targetLimitHit {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
l := len(sp.activeTargets)
|
||||||
|
if l <= int(sp.config.TargetLimit) && !sp.targetLimitHit {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
var err error
|
||||||
|
sp.targetLimitHit = l > int(sp.config.TargetLimit)
|
||||||
|
if sp.targetLimitHit {
|
||||||
|
targetScrapePoolExceededTargetLimit.Inc()
|
||||||
|
err = fmt.Errorf("target_limit exceeded (number of targets: %d, limit: %d)", l, sp.config.TargetLimit)
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
func mutateSampleLabels(lset labels.Labels, target *Target, honor bool, rc []*relabel.Config) labels.Labels {
|
func mutateSampleLabels(lset labels.Labels, target *Target, honor bool, rc []*relabel.Config) labels.Labels {
|
||||||
lb := labels.NewBuilder(lset)
|
lb := labels.NewBuilder(lset)
|
||||||
|
|
||||||
|
@ -590,6 +657,7 @@ func (s *targetScraper) scrape(ctx context.Context, w io.Writer) (string, error)
|
||||||
// A loop can run and be stopped again. It must not be reused after it was stopped.
|
// A loop can run and be stopped again. It must not be reused after it was stopped.
|
||||||
type loop interface {
|
type loop interface {
|
||||||
run(interval, timeout time.Duration, errc chan<- error)
|
run(interval, timeout time.Duration, errc chan<- error)
|
||||||
|
setForcedError(err error)
|
||||||
stop()
|
stop()
|
||||||
getCache() *scrapeCache
|
getCache() *scrapeCache
|
||||||
disableEndOfRunStalenessMarkers()
|
disableEndOfRunStalenessMarkers()
|
||||||
|
@ -610,6 +678,8 @@ type scrapeLoop struct {
|
||||||
buffers *pool.Pool
|
buffers *pool.Pool
|
||||||
jitterSeed uint64
|
jitterSeed uint64
|
||||||
honorTimestamps bool
|
honorTimestamps bool
|
||||||
|
forcedErr error
|
||||||
|
forcedErrMtx sync.Mutex
|
||||||
|
|
||||||
appender func() storage.Appender
|
appender func() storage.Appender
|
||||||
sampleMutator labelsMutator
|
sampleMutator labelsMutator
|
||||||
|
@ -953,10 +1023,7 @@ mainLoop:
|
||||||
// together with reporting metrics, by using as few appenders as possible.
|
// together with reporting metrics, by using as few appenders as possible.
|
||||||
// In the happy scenario, a single appender is used.
|
// In the happy scenario, a single appender is used.
|
||||||
func (sl *scrapeLoop) scrapeAndReport(interval, timeout time.Duration, last time.Time, errc chan<- error) time.Time {
|
func (sl *scrapeLoop) scrapeAndReport(interval, timeout time.Duration, last time.Time, errc chan<- error) time.Time {
|
||||||
var (
|
start := time.Now()
|
||||||
start = time.Now()
|
|
||||||
scrapeCtx, cancel = context.WithTimeout(sl.ctx, timeout)
|
|
||||||
)
|
|
||||||
|
|
||||||
// Only record after the first scrape.
|
// Only record after the first scrape.
|
||||||
if !last.IsZero() {
|
if !last.IsZero() {
|
||||||
|
@ -968,26 +1035,9 @@ func (sl *scrapeLoop) scrapeAndReport(interval, timeout time.Duration, last time
|
||||||
b := sl.buffers.Get(sl.lastScrapeSize).([]byte)
|
b := sl.buffers.Get(sl.lastScrapeSize).([]byte)
|
||||||
buf := bytes.NewBuffer(b)
|
buf := bytes.NewBuffer(b)
|
||||||
|
|
||||||
contentType, scrapeErr := sl.scraper.scrape(scrapeCtx, buf)
|
|
||||||
cancel()
|
|
||||||
|
|
||||||
if scrapeErr == nil {
|
|
||||||
b = buf.Bytes()
|
|
||||||
// NOTE: There were issues with misbehaving clients in the past
|
|
||||||
// that occasionally returned empty results. We don't want those
|
|
||||||
// to falsely reset our buffer size.
|
|
||||||
if len(b) > 0 {
|
|
||||||
sl.lastScrapeSize = len(b)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
level.Debug(sl.l).Log("msg", "Scrape failed", "err", scrapeErr.Error())
|
|
||||||
if errc != nil {
|
|
||||||
errc <- scrapeErr
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
app := sl.appender()
|
app := sl.appender()
|
||||||
var err error
|
var total, added, seriesAdded int
|
||||||
|
var err, appErr, scrapeErr error
|
||||||
defer func() {
|
defer func() {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
app.Rollback()
|
app.Rollback()
|
||||||
|
@ -998,20 +1048,53 @@ func (sl *scrapeLoop) scrapeAndReport(interval, timeout time.Duration, last time
|
||||||
level.Error(sl.l).Log("msg", "Scrape commit failed", "err", err)
|
level.Error(sl.l).Log("msg", "Scrape commit failed", "err", err)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
// A failed scrape is the same as an empty scrape,
|
if forcedErr := sl.getForcedError(); forcedErr != nil {
|
||||||
// we still call sl.append to trigger stale markers.
|
appErr = forcedErr
|
||||||
total, added, seriesAdded, appErr := sl.append(app, b, contentType, start)
|
// Add stale markers.
|
||||||
if appErr != nil {
|
|
||||||
app.Rollback()
|
|
||||||
app = sl.appender()
|
|
||||||
level.Debug(sl.l).Log("msg", "Append failed", "err", appErr)
|
|
||||||
// The append failed, probably due to a parse error or sample limit.
|
|
||||||
// Call sl.append again with an empty scrape to trigger stale markers.
|
|
||||||
if _, _, _, err := sl.append(app, []byte{}, "", start); err != nil {
|
if _, _, _, err := sl.append(app, []byte{}, "", start); err != nil {
|
||||||
app.Rollback()
|
app.Rollback()
|
||||||
app = sl.appender()
|
app = sl.appender()
|
||||||
level.Warn(sl.l).Log("msg", "Append failed", "err", err)
|
level.Warn(sl.l).Log("msg", "Append failed", "err", err)
|
||||||
}
|
}
|
||||||
|
if errc != nil {
|
||||||
|
errc <- forcedErr
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
var contentType string
|
||||||
|
scrapeCtx, cancel := context.WithTimeout(sl.ctx, timeout)
|
||||||
|
contentType, scrapeErr = sl.scraper.scrape(scrapeCtx, buf)
|
||||||
|
cancel()
|
||||||
|
|
||||||
|
if scrapeErr == nil {
|
||||||
|
b = buf.Bytes()
|
||||||
|
// NOTE: There were issues with misbehaving clients in the past
|
||||||
|
// that occasionally returned empty results. We don't want those
|
||||||
|
// to falsely reset our buffer size.
|
||||||
|
if len(b) > 0 {
|
||||||
|
sl.lastScrapeSize = len(b)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
level.Debug(sl.l).Log("msg", "Scrape failed", "err", scrapeErr.Error())
|
||||||
|
if errc != nil {
|
||||||
|
errc <- scrapeErr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// A failed scrape is the same as an empty scrape,
|
||||||
|
// we still call sl.append to trigger stale markers.
|
||||||
|
total, added, seriesAdded, appErr = sl.append(app, b, contentType, start)
|
||||||
|
if appErr != nil {
|
||||||
|
app.Rollback()
|
||||||
|
app = sl.appender()
|
||||||
|
level.Debug(sl.l).Log("msg", "Append failed", "err", appErr)
|
||||||
|
// The append failed, probably due to a parse error or sample limit.
|
||||||
|
// Call sl.append again with an empty scrape to trigger stale markers.
|
||||||
|
if _, _, _, err := sl.append(app, []byte{}, "", start); err != nil {
|
||||||
|
app.Rollback()
|
||||||
|
app = sl.appender()
|
||||||
|
level.Warn(sl.l).Log("msg", "Append failed", "err", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sl.buffers.Put(b)
|
sl.buffers.Put(b)
|
||||||
|
@ -1026,6 +1109,18 @@ func (sl *scrapeLoop) scrapeAndReport(interval, timeout time.Duration, last time
|
||||||
return start
|
return start
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (sl *scrapeLoop) setForcedError(err error) {
|
||||||
|
sl.forcedErrMtx.Lock()
|
||||||
|
defer sl.forcedErrMtx.Unlock()
|
||||||
|
sl.forcedErr = err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sl *scrapeLoop) getForcedError() error {
|
||||||
|
sl.forcedErrMtx.Lock()
|
||||||
|
defer sl.forcedErrMtx.Unlock()
|
||||||
|
return sl.forcedErr
|
||||||
|
}
|
||||||
|
|
||||||
func (sl *scrapeLoop) endOfRunStaleness(last time.Time, ticker *time.Ticker, interval time.Duration) {
|
func (sl *scrapeLoop) endOfRunStaleness(last time.Time, ticker *time.Ticker, interval time.Duration) {
|
||||||
// Scraping has stopped. We want to write stale markers but
|
// Scraping has stopped. We want to write stale markers but
|
||||||
// the target may be recreated, so we wait just over 2 scrape intervals
|
// the target may be recreated, so we wait just over 2 scrape intervals
|
||||||
|
|
|
@ -138,8 +138,10 @@ func TestDiscoveredLabelsUpdate(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
type testLoop struct {
|
type testLoop struct {
|
||||||
startFunc func(interval, timeout time.Duration, errc chan<- error)
|
startFunc func(interval, timeout time.Duration, errc chan<- error)
|
||||||
stopFunc func()
|
stopFunc func()
|
||||||
|
forcedErr error
|
||||||
|
forcedErrMtx sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *testLoop) run(interval, timeout time.Duration, errc chan<- error) {
|
func (l *testLoop) run(interval, timeout time.Duration, errc chan<- error) {
|
||||||
|
@ -149,6 +151,18 @@ func (l *testLoop) run(interval, timeout time.Duration, errc chan<- error) {
|
||||||
func (l *testLoop) disableEndOfRunStalenessMarkers() {
|
func (l *testLoop) disableEndOfRunStalenessMarkers() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (l *testLoop) setForcedError(err error) {
|
||||||
|
l.forcedErrMtx.Lock()
|
||||||
|
defer l.forcedErrMtx.Unlock()
|
||||||
|
l.forcedErr = err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *testLoop) getForcedError() error {
|
||||||
|
l.forcedErrMtx.Lock()
|
||||||
|
defer l.forcedErrMtx.Unlock()
|
||||||
|
return l.forcedErr
|
||||||
|
}
|
||||||
|
|
||||||
func (l *testLoop) stop() {
|
func (l *testLoop) stop() {
|
||||||
l.stopFunc()
|
l.stopFunc()
|
||||||
}
|
}
|
||||||
|
@ -301,6 +315,92 @@ func TestScrapePoolReload(t *testing.T) {
|
||||||
testutil.Equals(t, numTargets, len(sp.loops), "Unexpected number of stopped loops after reload")
|
testutil.Equals(t, numTargets, len(sp.loops), "Unexpected number of stopped loops after reload")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestScrapePoolTargetLimit(t *testing.T) {
|
||||||
|
// On starting to run, new loops created on reload check whether their preceding
|
||||||
|
// equivalents have been stopped.
|
||||||
|
newLoop := func(opts scrapeLoopOptions) loop {
|
||||||
|
l := &testLoop{
|
||||||
|
startFunc: func(interval, timeout time.Duration, errc chan<- error) {},
|
||||||
|
stopFunc: func() {},
|
||||||
|
}
|
||||||
|
return l
|
||||||
|
}
|
||||||
|
sp := &scrapePool{
|
||||||
|
appendable: &nopAppendable{},
|
||||||
|
activeTargets: map[uint64]*Target{},
|
||||||
|
loops: map[uint64]loop{},
|
||||||
|
newLoop: newLoop,
|
||||||
|
logger: nil,
|
||||||
|
client: http.DefaultClient,
|
||||||
|
}
|
||||||
|
|
||||||
|
var tgs = []*targetgroup.Group{}
|
||||||
|
for i := 0; i < 50; i++ {
|
||||||
|
tgs = append(tgs,
|
||||||
|
&targetgroup.Group{
|
||||||
|
Targets: []model.LabelSet{
|
||||||
|
{model.AddressLabel: model.LabelValue(fmt.Sprintf("127.0.0.1:%d", 9090+i))},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
var limit uint
|
||||||
|
reloadWithLimit := func(l uint) {
|
||||||
|
limit = l
|
||||||
|
testutil.Ok(t, sp.reload(&config.ScrapeConfig{
|
||||||
|
ScrapeInterval: model.Duration(3 * time.Second),
|
||||||
|
ScrapeTimeout: model.Duration(2 * time.Second),
|
||||||
|
TargetLimit: l,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
var targets int
|
||||||
|
loadTargets := func(n int) {
|
||||||
|
targets = n
|
||||||
|
sp.Sync(tgs[:n])
|
||||||
|
}
|
||||||
|
|
||||||
|
validateErrorMessage := func(shouldErr bool) {
|
||||||
|
for _, l := range sp.loops {
|
||||||
|
lerr := l.(*testLoop).getForcedError()
|
||||||
|
if shouldErr {
|
||||||
|
testutil.Assert(t, lerr != nil, "error was expected for %d targets with a limit of %d", targets, limit)
|
||||||
|
testutil.Equals(t, fmt.Sprintf("target_limit exceeded (number of targets: %d, limit: %d)", targets, limit), lerr.Error())
|
||||||
|
} else {
|
||||||
|
testutil.Equals(t, nil, lerr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
reloadWithLimit(0)
|
||||||
|
loadTargets(50)
|
||||||
|
|
||||||
|
// Simulate an initial config with a limit.
|
||||||
|
sp.config.TargetLimit = 30
|
||||||
|
limit = 30
|
||||||
|
loadTargets(50)
|
||||||
|
validateErrorMessage(true)
|
||||||
|
|
||||||
|
reloadWithLimit(50)
|
||||||
|
validateErrorMessage(false)
|
||||||
|
|
||||||
|
reloadWithLimit(40)
|
||||||
|
validateErrorMessage(true)
|
||||||
|
|
||||||
|
loadTargets(30)
|
||||||
|
validateErrorMessage(false)
|
||||||
|
|
||||||
|
loadTargets(40)
|
||||||
|
validateErrorMessage(false)
|
||||||
|
|
||||||
|
loadTargets(41)
|
||||||
|
validateErrorMessage(true)
|
||||||
|
|
||||||
|
reloadWithLimit(51)
|
||||||
|
validateErrorMessage(false)
|
||||||
|
}
|
||||||
|
|
||||||
func TestScrapePoolAppender(t *testing.T) {
|
func TestScrapePoolAppender(t *testing.T) {
|
||||||
cfg := &config.ScrapeConfig{}
|
cfg := &config.ScrapeConfig{}
|
||||||
app := &nopAppendable{}
|
app := &nopAppendable{}
|
||||||
|
@ -595,6 +695,57 @@ func TestScrapeLoopRun(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestScrapeLoopForcedErr(t *testing.T) {
|
||||||
|
var (
|
||||||
|
signal = make(chan struct{}, 1)
|
||||||
|
errc = make(chan error)
|
||||||
|
|
||||||
|
scraper = &testScraper{}
|
||||||
|
app = func() storage.Appender { return &nopAppender{} }
|
||||||
|
)
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
sl := newScrapeLoop(ctx,
|
||||||
|
scraper,
|
||||||
|
nil, nil,
|
||||||
|
nopMutator,
|
||||||
|
nopMutator,
|
||||||
|
app,
|
||||||
|
nil,
|
||||||
|
0,
|
||||||
|
true,
|
||||||
|
)
|
||||||
|
|
||||||
|
forcedErr := fmt.Errorf("forced err")
|
||||||
|
sl.setForcedError(forcedErr)
|
||||||
|
|
||||||
|
scraper.scrapeFunc = func(context.Context, io.Writer) error {
|
||||||
|
t.Fatalf("should not be scraped")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
sl.run(time.Second, time.Hour, errc)
|
||||||
|
signal <- struct{}{}
|
||||||
|
}()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case err := <-errc:
|
||||||
|
if err != forcedErr {
|
||||||
|
t.Fatalf("Expected forced error but got: %s", err)
|
||||||
|
}
|
||||||
|
case <-time.After(3 * time.Second):
|
||||||
|
t.Fatalf("Expected forced error but got none")
|
||||||
|
}
|
||||||
|
cancel()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-signal:
|
||||||
|
case <-time.After(5 * time.Second):
|
||||||
|
t.Fatalf("Scrape not stopped")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestScrapeLoopMetadata(t *testing.T) {
|
func TestScrapeLoopMetadata(t *testing.T) {
|
||||||
var (
|
var (
|
||||||
signal = make(chan struct{})
|
signal = make(chan struct{})
|
||||||
|
|
Loading…
Reference in a new issue