From c64ef7732a16d300a7b37c37ead23621259739d2 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 8 Jul 2024 11:08:09 +0100 Subject: [PATCH] Scraping: add scrape_count extra metric So admins can detect missing or delayed scrapes. Only enabled if feature-flag `extra-scrape-metrics` is turned on. Extend `TestScrapeLoopStop` to check it was incremented. Signed-off-by: Bryan Boreham --- scrape/scrape.go | 9 +++++++++ scrape/scrape_test.go | 16 +++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/scrape/scrape.go b/scrape/scrape.go index 68411a62e..1c6bc5fce 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -824,6 +824,7 @@ type scrapeLoop struct { l log.Logger cache *scrapeCache lastScrapeSize int + scrapeCount uint64 buffers *pool.Pool offsetSeed uint64 honorTimestamps bool @@ -1325,6 +1326,7 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er var b []byte var buf *bytes.Buffer scrapeCtx, cancel := context.WithTimeout(sl.parentCtx, sl.timeout) + sl.scrapeCount++ resp, scrapeErr = sl.scraper.scrape(scrapeCtx) if scrapeErr == nil { b = sl.buffers.Get(sl.lastScrapeSize).([]byte) @@ -1824,6 +1826,7 @@ var ( scrapeTimeoutMetricName = []byte("scrape_timeout_seconds" + "\xff") scrapeSampleLimitMetricName = []byte("scrape_sample_limit" + "\xff") scrapeBodySizeBytesMetricName = []byte("scrape_body_size_bytes" + "\xff") + scrapeCountMetricName = []byte("scrape_count" + "\xff") ) func (sl *scrapeLoop) report(app storage.Appender, start time.Time, duration time.Duration, scraped, added, seriesAdded, bytes int, scrapeErr error) (err error) { @@ -1862,6 +1865,9 @@ func (sl *scrapeLoop) report(app storage.Appender, start time.Time, duration tim if err = sl.addReportSample(app, scrapeBodySizeBytesMetricName, ts, float64(bytes), b); err != nil { return } + if err = sl.addReportSample(app, scrapeCountMetricName, ts, float64(sl.scrapeCount), b); err != nil { + return + } } return } @@ -1897,6 +1903,9 @@ func (sl *scrapeLoop) reportStale(app storage.Appender, start time.Time) (err er if err = sl.addReportSample(app, scrapeBodySizeBytesMetricName, ts, stale, b); err != nil { return } + if err = sl.addReportSample(app, scrapeCountMetricName, ts, stale, b); err != nil { + return + } } return } diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index a3fe6ac1a..a46d3519c 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -744,13 +744,15 @@ func TestScrapeLoopStop(t *testing.T) { ) sl := newBasicScrapeLoop(t, context.Background(), scraper, app, 10*time.Millisecond) + sl.reportExtraMetrics = true // So we can check scrape_count. // Terminate loop after 2 scrapes. + const expectedScrapes = 2 numScrapes := 0 scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error { numScrapes++ - if numScrapes == 2 { + if numScrapes == expectedScrapes { go sl.stop() <-sl.ctx.Done() } @@ -769,15 +771,16 @@ func TestScrapeLoopStop(t *testing.T) { require.FailNow(t, "Scrape wasn't stopped.") } - // We expected 1 actual sample for each scrape plus 5 for report samples. + // We expected 1 actual sample for each scrape plus 9 for report samples. + const expectedSamplesPerScrape = 10 // At least 2 scrapes were made, plus the final stale markers. - require.GreaterOrEqual(t, len(appender.resultFloats), 6*3, "Expected at least 3 scrapes with 6 samples each.") - require.Zero(t, len(appender.resultFloats)%6, "There is a scrape with missing samples.") + require.GreaterOrEqual(t, len(appender.resultFloats), expectedSamplesPerScrape*(expectedScrapes+1), "Expected at least 3 scrapes with 10 samples each.") + require.Zero(t, len(appender.resultFloats)%expectedSamplesPerScrape, "There is a scrape with missing samples.") // All samples in a scrape must have the same timestamp. var ts int64 for i, s := range appender.resultFloats { switch { - case i%6 == 0: + case i%expectedSamplesPerScrape == 0: ts = s.t case s.t != ts: t.Fatalf("Unexpected multiple timestamps within single scrape") @@ -787,6 +790,9 @@ func TestScrapeLoopStop(t *testing.T) { for _, s := range appender.resultFloats[len(appender.resultFloats)-5:] { require.True(t, value.IsStaleNaN(s.f), "Appended last sample not as expected. Wanted: stale NaN Got: %x", math.Float64bits(s.f)) } + + scrapeCounts := getResultFloats(appender, "scrape_count") + require.Equal(t, float64(expectedScrapes), scrapeCounts[len(scrapeCounts)-2]) } func TestScrapeLoopRun(t *testing.T) {