Add scrape_sample_limit metric

This adds a new metric exposing per target scrape sample_limit value. Metrics are only exposed if extra-scrape-metrics feature flag is enabled.
scrape_sample_limit will make it easy to monitor and alert on targets getting close to configured sample_limit, which is important given than exceeding sample_limit results in the entire scrape results being rejected.

Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
This commit is contained in:
Łukasz Mierzwa 2021-09-03 14:37:42 +01:00
parent 9de62707b3
commit f0a26266c0
3 changed files with 46 additions and 3 deletions

View file

@ -34,7 +34,7 @@ that PromQL does not look ahead of the evaluation time for samples.
`--enable-feature=promql-negative-offset`
In contrast to the positive offset modifier, the negative offset modifier lets
one shift a vector selector into the future. An example in which one may want
one shift a vector selector into the future. An example in which one may want
to use a negative offset is reviewing past data and making temporal comparisons
with more recent data.
@ -59,7 +59,7 @@ Exemplar storage is implemented as a fixed size circular buffer that stores exem
`--enable-feature=memory-snapshot-on-shutdown`
This takes the snapshot of the chunks that are in memory along with the series information when shutting down and stores
it on disk. This will reduce the startup time since the memory state can be restored with this snapshot and m-mapped
it on disk. This will reduce the startup time since the memory state can be restored with this snapshot and m-mapped
chunks without the need of WAL replay.
## Extra Scrape Metrics
@ -68,4 +68,6 @@ chunks without the need of WAL replay.
When enabled, for each instance scrape, Prometheus stores a sample in the following additional time series:
* `scrape_timeout_seconds`. The configured `scrape_timeout` for a target. This allows you to measure each target to find out how close they are to timing out with `scrape_duration_seconds / scrape_timeout_seconds`.
- `scrape_timeout_seconds`. The configured `scrape_timeout` for a target. This allows you to measure each target to find out how close they are to timing out with `scrape_duration_seconds / scrape_timeout_seconds`.
- `scrape_sample_limit`. The configured `sample_limit` for a target. This allows you to measure each target
to find out how close they are to reaching the limit with `scrape_samples_post_metric_relabeling / scrape_sample_limit`. Note that `scrape_sample_limit` can be zero if there is no limit configured, which means that the query above can return `+Inf` for targets with no limit (as we divide by zero). If you want to query only for targets that do have a sample limit use this query: `scrape_samples_post_metric_relabeling / (scrape_sample_limit > 0)`.

View file

@ -308,6 +308,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, jitterSeed
cache,
jitterSeed,
opts.honorTimestamps,
opts.sampleLimit,
opts.labelLimits,
opts.interval,
opts.timeout,
@ -815,6 +816,7 @@ type scrapeLoop struct {
honorTimestamps bool
forcedErr error
forcedErrMtx sync.Mutex
sampleLimit int
labelLimits *labelLimits
interval time.Duration
timeout time.Duration
@ -1087,6 +1089,7 @@ func newScrapeLoop(ctx context.Context,
cache *scrapeCache,
jitterSeed uint64,
honorTimestamps bool,
sampleLimit int,
labelLimits *labelLimits,
interval time.Duration,
timeout time.Duration,
@ -1113,6 +1116,7 @@ func newScrapeLoop(ctx context.Context,
l: l,
parentCtx: ctx,
honorTimestamps: honorTimestamps,
sampleLimit: sampleLimit,
labelLimits: labelLimits,
interval: interval,
timeout: timeout,
@ -1610,6 +1614,7 @@ const (
samplesPostRelabelMetricName = "scrape_samples_post_metric_relabeling" + "\xff"
scrapeSeriesAddedMetricName = "scrape_series_added" + "\xff"
scrapeTimeoutMetricName = "scrape_timeout_seconds" + "\xff"
scrapeSampleLimitMetricName = "scrape_sample_limit" + "\xff"
)
func (sl *scrapeLoop) report(app storage.Appender, start time.Time, timeout, duration time.Duration, scraped, added, seriesAdded int, scrapeErr error) (err error) {
@ -1641,6 +1646,9 @@ func (sl *scrapeLoop) report(app storage.Appender, start time.Time, timeout, dur
if err = sl.addReportSample(app, scrapeTimeoutMetricName, ts, timeout.Seconds()); err != nil {
return
}
if err = sl.addReportSample(app, scrapeSampleLimitMetricName, ts, float64(sl.sampleLimit)); err != nil {
return
}
}
return
}
@ -1669,6 +1677,9 @@ func (sl *scrapeLoop) reportStale(app storage.Appender, start time.Time) (err er
if err = sl.addReportSample(app, scrapeTimeoutMetricName, ts, stale); err != nil {
return
}
if err = sl.addReportSample(app, scrapeSampleLimitMetricName, ts, stale); err != nil {
return
}
}
return
}

View file

@ -586,6 +586,7 @@ func TestScrapeLoopStopBeforeRun(t *testing.T) {
nopMutator,
nil, nil, 0,
true,
0,
nil,
1,
0,
@ -654,6 +655,7 @@ func TestScrapeLoopStop(t *testing.T) {
nil,
0,
true,
0,
nil,
10*time.Millisecond,
time.Hour,
@ -725,6 +727,7 @@ func TestScrapeLoopRun(t *testing.T) {
nil,
0,
true,
0,
nil,
time.Second,
time.Hour,
@ -776,6 +779,7 @@ func TestScrapeLoopRun(t *testing.T) {
nil,
0,
true,
0,
nil,
time.Second,
100*time.Millisecond,
@ -831,6 +835,7 @@ func TestScrapeLoopForcedErr(t *testing.T) {
nil,
0,
true,
0,
nil,
time.Second,
time.Hour,
@ -885,6 +890,7 @@ func TestScrapeLoopMetadata(t *testing.T) {
cache,
0,
true,
0,
nil,
0,
0,
@ -938,6 +944,7 @@ func TestScrapeLoopSeriesAdded(t *testing.T) {
nil,
0,
true,
0,
nil,
0,
0,
@ -980,6 +987,7 @@ func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrape(t *testing.T) {
nil,
0,
true,
0,
nil,
10*time.Millisecond,
time.Hour,
@ -1038,6 +1046,7 @@ func TestScrapeLoopRunCreatesStaleMarkersOnParseFailure(t *testing.T) {
nil,
0,
true,
0,
nil,
10*time.Millisecond,
time.Hour,
@ -1100,6 +1109,7 @@ func TestScrapeLoopCache(t *testing.T) {
nil,
0,
true,
0,
nil,
10*time.Millisecond,
time.Hour,
@ -1178,6 +1188,7 @@ func TestScrapeLoopCacheMemoryExhaustionProtection(t *testing.T) {
nil,
0,
true,
0,
nil,
10*time.Millisecond,
time.Hour,
@ -1288,6 +1299,7 @@ func TestScrapeLoopAppend(t *testing.T) {
nil,
0,
true,
0,
nil,
0,
0,
@ -1333,6 +1345,7 @@ func TestScrapeLoopAppendCacheEntryButErrNotFound(t *testing.T) {
nil,
0,
true,
0,
nil,
0,
0,
@ -1386,6 +1399,7 @@ func TestScrapeLoopAppendSampleLimit(t *testing.T) {
nil,
0,
true,
app.limit,
nil,
0,
0,
@ -1459,6 +1473,7 @@ func TestScrapeLoop_ChangingMetricString(t *testing.T) {
nil,
0,
true,
0,
nil,
0,
0,
@ -1503,6 +1518,7 @@ func TestScrapeLoopAppendStaleness(t *testing.T) {
nil,
0,
true,
0,
nil,
0,
0,
@ -1550,6 +1566,7 @@ func TestScrapeLoopAppendNoStalenessIfTimestamp(t *testing.T) {
nil,
0,
true,
0,
nil,
0,
0,
@ -1655,6 +1672,7 @@ metric_total{n="2"} 2 # {t="2"} 2.0 20000
nil,
0,
true,
0,
nil,
0,
0,
@ -1716,6 +1734,7 @@ func TestScrapeLoopAppendExemplarSeries(t *testing.T) {
nil,
0,
true,
0,
nil,
0,
0,
@ -1764,6 +1783,7 @@ func TestScrapeLoopRunReportsTargetDownOnScrapeError(t *testing.T) {
nil,
0,
true,
0,
nil,
10*time.Millisecond,
time.Hour,
@ -1796,6 +1816,7 @@ func TestScrapeLoopRunReportsTargetDownOnInvalidUTF8(t *testing.T) {
nil,
0,
true,
0,
nil,
10*time.Millisecond,
time.Hour,
@ -1841,6 +1862,7 @@ func TestScrapeLoopAppendGracefullyIfAmendOrOutOfOrderOrOutOfBounds(t *testing.T
nil,
0,
true,
0,
nil,
0,
0,
@ -1882,6 +1904,7 @@ func TestScrapeLoopOutOfBoundsTimeError(t *testing.T) {
nil,
0,
true,
0,
nil,
0,
0,
@ -2136,6 +2159,7 @@ func TestScrapeLoop_RespectTimestamps(t *testing.T) {
func(ctx context.Context) storage.Appender { return capp },
nil, 0,
true,
0,
nil,
0,
0,
@ -2173,6 +2197,7 @@ func TestScrapeLoop_DiscardTimestamps(t *testing.T) {
func(ctx context.Context) storage.Appender { return capp },
nil, 0,
false,
0,
nil,
0,
0,
@ -2209,6 +2234,7 @@ func TestScrapeLoopDiscardDuplicateLabels(t *testing.T) {
nil,
0,
true,
0,
nil,
0,
0,
@ -2263,6 +2289,7 @@ func TestScrapeLoopDiscardUnnamedMetrics(t *testing.T) {
nil,
0,
true,
0,
nil,
0,
0,
@ -2484,6 +2511,7 @@ func TestScrapeAddFast(t *testing.T) {
nil,
0,
true,
0,
nil,
0,
0,
@ -2571,6 +2599,7 @@ func TestScrapeReportSingleAppender(t *testing.T) {
nil,
0,
true,
0,
nil,
10*time.Millisecond,
time.Hour,
@ -2703,6 +2732,7 @@ func TestScrapeLoopLabelLimit(t *testing.T) {
nil,
0,
true,
0,
&test.labelLimits,
0,
0,