Scraping: add scrape_count extra metric

So admins can detect missing or delayed scrapes.
Only enabled if feature-flag `extra-scrape-metrics` is turned on.

Extend `TestScrapeLoopStop` to check it was incremented.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
This commit is contained in:
Bryan Boreham 2024-07-08 11:08:09 +01:00
parent 89608c69a7
commit c64ef7732a
2 changed files with 20 additions and 5 deletions

View file

@ -824,6 +824,7 @@ type scrapeLoop struct {
l log.Logger l log.Logger
cache *scrapeCache cache *scrapeCache
lastScrapeSize int lastScrapeSize int
scrapeCount uint64
buffers *pool.Pool buffers *pool.Pool
offsetSeed uint64 offsetSeed uint64
honorTimestamps bool honorTimestamps bool
@ -1325,6 +1326,7 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er
var b []byte var b []byte
var buf *bytes.Buffer var buf *bytes.Buffer
scrapeCtx, cancel := context.WithTimeout(sl.parentCtx, sl.timeout) scrapeCtx, cancel := context.WithTimeout(sl.parentCtx, sl.timeout)
sl.scrapeCount++
resp, scrapeErr = sl.scraper.scrape(scrapeCtx) resp, scrapeErr = sl.scraper.scrape(scrapeCtx)
if scrapeErr == nil { if scrapeErr == nil {
b = sl.buffers.Get(sl.lastScrapeSize).([]byte) b = sl.buffers.Get(sl.lastScrapeSize).([]byte)
@ -1824,6 +1826,7 @@ var (
scrapeTimeoutMetricName = []byte("scrape_timeout_seconds" + "\xff") scrapeTimeoutMetricName = []byte("scrape_timeout_seconds" + "\xff")
scrapeSampleLimitMetricName = []byte("scrape_sample_limit" + "\xff") scrapeSampleLimitMetricName = []byte("scrape_sample_limit" + "\xff")
scrapeBodySizeBytesMetricName = []byte("scrape_body_size_bytes" + "\xff") scrapeBodySizeBytesMetricName = []byte("scrape_body_size_bytes" + "\xff")
scrapeCountMetricName = []byte("scrape_count" + "\xff")
) )
func (sl *scrapeLoop) report(app storage.Appender, start time.Time, duration time.Duration, scraped, added, seriesAdded, bytes int, scrapeErr error) (err error) { func (sl *scrapeLoop) report(app storage.Appender, start time.Time, duration time.Duration, scraped, added, seriesAdded, bytes int, scrapeErr error) (err error) {
@ -1862,6 +1865,9 @@ func (sl *scrapeLoop) report(app storage.Appender, start time.Time, duration tim
if err = sl.addReportSample(app, scrapeBodySizeBytesMetricName, ts, float64(bytes), b); err != nil { if err = sl.addReportSample(app, scrapeBodySizeBytesMetricName, ts, float64(bytes), b); err != nil {
return return
} }
if err = sl.addReportSample(app, scrapeCountMetricName, ts, float64(sl.scrapeCount), b); err != nil {
return
}
} }
return return
} }
@ -1897,6 +1903,9 @@ func (sl *scrapeLoop) reportStale(app storage.Appender, start time.Time) (err er
if err = sl.addReportSample(app, scrapeBodySizeBytesMetricName, ts, stale, b); err != nil { if err = sl.addReportSample(app, scrapeBodySizeBytesMetricName, ts, stale, b); err != nil {
return return
} }
if err = sl.addReportSample(app, scrapeCountMetricName, ts, stale, b); err != nil {
return
}
} }
return return
} }

View file

@ -744,13 +744,15 @@ func TestScrapeLoopStop(t *testing.T) {
) )
sl := newBasicScrapeLoop(t, context.Background(), scraper, app, 10*time.Millisecond) sl := newBasicScrapeLoop(t, context.Background(), scraper, app, 10*time.Millisecond)
sl.reportExtraMetrics = true // So we can check scrape_count.
// Terminate loop after 2 scrapes. // Terminate loop after 2 scrapes.
const expectedScrapes = 2
numScrapes := 0 numScrapes := 0
scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error { scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error {
numScrapes++ numScrapes++
if numScrapes == 2 { if numScrapes == expectedScrapes {
go sl.stop() go sl.stop()
<-sl.ctx.Done() <-sl.ctx.Done()
} }
@ -769,15 +771,16 @@ func TestScrapeLoopStop(t *testing.T) {
require.FailNow(t, "Scrape wasn't stopped.") require.FailNow(t, "Scrape wasn't stopped.")
} }
// We expected 1 actual sample for each scrape plus 5 for report samples. // We expected 1 actual sample for each scrape plus 9 for report samples.
const expectedSamplesPerScrape = 10
// At least 2 scrapes were made, plus the final stale markers. // At least 2 scrapes were made, plus the final stale markers.
require.GreaterOrEqual(t, len(appender.resultFloats), 6*3, "Expected at least 3 scrapes with 6 samples each.") require.GreaterOrEqual(t, len(appender.resultFloats), expectedSamplesPerScrape*(expectedScrapes+1), "Expected at least 3 scrapes with 10 samples each.")
require.Zero(t, len(appender.resultFloats)%6, "There is a scrape with missing samples.") require.Zero(t, len(appender.resultFloats)%expectedSamplesPerScrape, "There is a scrape with missing samples.")
// All samples in a scrape must have the same timestamp. // All samples in a scrape must have the same timestamp.
var ts int64 var ts int64
for i, s := range appender.resultFloats { for i, s := range appender.resultFloats {
switch { switch {
case i%6 == 0: case i%expectedSamplesPerScrape == 0:
ts = s.t ts = s.t
case s.t != ts: case s.t != ts:
t.Fatalf("Unexpected multiple timestamps within single scrape") t.Fatalf("Unexpected multiple timestamps within single scrape")
@ -787,6 +790,9 @@ func TestScrapeLoopStop(t *testing.T) {
for _, s := range appender.resultFloats[len(appender.resultFloats)-5:] { for _, s := range appender.resultFloats[len(appender.resultFloats)-5:] {
require.True(t, value.IsStaleNaN(s.f), "Appended last sample not as expected. Wanted: stale NaN Got: %x", math.Float64bits(s.f)) require.True(t, value.IsStaleNaN(s.f), "Appended last sample not as expected. Wanted: stale NaN Got: %x", math.Float64bits(s.f))
} }
scrapeCounts := getResultFloats(appender, "scrape_count")
require.Equal(t, float64(expectedScrapes), scrapeCounts[len(scrapeCounts)-2])
} }
func TestScrapeLoopRun(t *testing.T) { func TestScrapeLoopRun(t *testing.T) {