Scraping: add scrape_count extra metric

So admins can detect missing or delayed scrapes.
Only enabled if feature-flag `extra-scrape-metrics` is turned on.

Extend `TestScrapeLoopStop` to check it was incremented.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
This commit is contained in:
Bryan Boreham 2024-07-08 11:08:09 +01:00
parent 89608c69a7
commit c64ef7732a
2 changed files with 20 additions and 5 deletions

View file

@ -824,6 +824,7 @@ type scrapeLoop struct {
l log.Logger
cache *scrapeCache
lastScrapeSize int
scrapeCount uint64
buffers *pool.Pool
offsetSeed uint64
honorTimestamps bool
@ -1325,6 +1326,7 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er
var b []byte
var buf *bytes.Buffer
scrapeCtx, cancel := context.WithTimeout(sl.parentCtx, sl.timeout)
sl.scrapeCount++
resp, scrapeErr = sl.scraper.scrape(scrapeCtx)
if scrapeErr == nil {
b = sl.buffers.Get(sl.lastScrapeSize).([]byte)
@ -1824,6 +1826,7 @@ var (
scrapeTimeoutMetricName = []byte("scrape_timeout_seconds" + "\xff")
scrapeSampleLimitMetricName = []byte("scrape_sample_limit" + "\xff")
scrapeBodySizeBytesMetricName = []byte("scrape_body_size_bytes" + "\xff")
scrapeCountMetricName = []byte("scrape_count" + "\xff")
)
func (sl *scrapeLoop) report(app storage.Appender, start time.Time, duration time.Duration, scraped, added, seriesAdded, bytes int, scrapeErr error) (err error) {
@ -1862,6 +1865,9 @@ func (sl *scrapeLoop) report(app storage.Appender, start time.Time, duration tim
if err = sl.addReportSample(app, scrapeBodySizeBytesMetricName, ts, float64(bytes), b); err != nil {
return
}
if err = sl.addReportSample(app, scrapeCountMetricName, ts, float64(sl.scrapeCount), b); err != nil {
return
}
}
return
}
@ -1897,6 +1903,9 @@ func (sl *scrapeLoop) reportStale(app storage.Appender, start time.Time) (err er
if err = sl.addReportSample(app, scrapeBodySizeBytesMetricName, ts, stale, b); err != nil {
return
}
if err = sl.addReportSample(app, scrapeCountMetricName, ts, stale, b); err != nil {
return
}
}
return
}

View file

@ -744,13 +744,15 @@ func TestScrapeLoopStop(t *testing.T) {
)
sl := newBasicScrapeLoop(t, context.Background(), scraper, app, 10*time.Millisecond)
sl.reportExtraMetrics = true // So we can check scrape_count.
// Terminate loop after 2 scrapes.
const expectedScrapes = 2
numScrapes := 0
scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error {
numScrapes++
if numScrapes == 2 {
if numScrapes == expectedScrapes {
go sl.stop()
<-sl.ctx.Done()
}
@ -769,15 +771,16 @@ func TestScrapeLoopStop(t *testing.T) {
require.FailNow(t, "Scrape wasn't stopped.")
}
// We expected 1 actual sample for each scrape plus 5 for report samples.
// We expected 1 actual sample for each scrape plus 9 for report samples.
const expectedSamplesPerScrape = 10
// At least 2 scrapes were made, plus the final stale markers.
require.GreaterOrEqual(t, len(appender.resultFloats), 6*3, "Expected at least 3 scrapes with 6 samples each.")
require.Zero(t, len(appender.resultFloats)%6, "There is a scrape with missing samples.")
require.GreaterOrEqual(t, len(appender.resultFloats), expectedSamplesPerScrape*(expectedScrapes+1), "Expected at least 3 scrapes with 10 samples each.")
require.Zero(t, len(appender.resultFloats)%expectedSamplesPerScrape, "There is a scrape with missing samples.")
// All samples in a scrape must have the same timestamp.
var ts int64
for i, s := range appender.resultFloats {
switch {
case i%6 == 0:
case i%expectedSamplesPerScrape == 0:
ts = s.t
case s.t != ts:
t.Fatalf("Unexpected multiple timestamps within single scrape")
@ -787,6 +790,9 @@ func TestScrapeLoopStop(t *testing.T) {
for _, s := range appender.resultFloats[len(appender.resultFloats)-5:] {
require.True(t, value.IsStaleNaN(s.f), "Appended last sample not as expected. Wanted: stale NaN Got: %x", math.Float64bits(s.f))
}
scrapeCounts := getResultFloats(appender, "scrape_count")
require.Equal(t, float64(expectedScrapes), scrapeCounts[len(scrapeCounts)-2])
}
func TestScrapeLoopRun(t *testing.T) {