diff --git a/docs/feature_flags.md b/docs/feature_flags.md index 8a8c9f70d4..adef7fc931 100644 --- a/docs/feature_flags.md +++ b/docs/feature_flags.md @@ -71,6 +71,7 @@ When enabled, for each instance scrape, Prometheus stores a sample in the follow - `scrape_timeout_seconds`. The configured `scrape_timeout` for a target. This allows you to measure each target to find out how close they are to timing out with `scrape_duration_seconds / scrape_timeout_seconds`. - `scrape_sample_limit`. The configured `sample_limit` for a target. This allows you to measure each target to find out how close they are to reaching the limit with `scrape_samples_post_metric_relabeling / scrape_sample_limit`. Note that `scrape_sample_limit` can be zero if there is no limit configured, which means that the query above can return `+Inf` for targets with no limit (as we divide by zero). If you want to query only for targets that do have a sample limit use this query: `scrape_samples_post_metric_relabeling / (scrape_sample_limit > 0)`. +- `scrape_body_size_bytes`. The uncompressed size of the most recent scrape response, if successful. Scrapes failing because `body_size_limit` is exceeded report `-1`, other scrape failures report `0`. ## New service discovery manager diff --git a/scrape/scrape.go b/scrape/scrape.go index a6c4409737..e981ad36a3 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -264,7 +264,7 @@ const maxAheadTime = 10 * time.Minute type labelsMutator func(labels.Labels) labels.Labels -func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, jitterSeed uint64, logger log.Logger, reportScrapeTimeout bool) (*scrapePool, error) { +func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, jitterSeed uint64, logger log.Logger, reportExtraMetrics bool) (*scrapePool, error) { targetScrapePools.Inc() if logger == nil { logger = log.NewNopLogger() @@ -313,7 +313,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, jitterSeed opts.labelLimits, opts.interval, opts.timeout, - reportScrapeTimeout, + reportExtraMetrics, ) } @@ -861,7 +861,7 @@ type scrapeLoop struct { disabledEndOfRunStalenessMarkers bool - reportScrapeTimeout bool + reportExtraMetrics bool } // scrapeCache tracks mappings of exposed metric strings to label sets and @@ -1122,7 +1122,7 @@ func newScrapeLoop(ctx context.Context, labelLimits *labelLimits, interval time.Duration, timeout time.Duration, - reportScrapeTimeout bool, + reportExtraMetrics bool, ) *scrapeLoop { if l == nil { l = log.NewNopLogger() @@ -1149,7 +1149,7 @@ func newScrapeLoop(ctx context.Context, labelLimits: labelLimits, interval: interval, timeout: timeout, - reportScrapeTimeout: reportScrapeTimeout, + reportExtraMetrics: reportExtraMetrics, } sl.ctx, sl.cancel = context.WithCancel(ctx) @@ -1238,7 +1238,7 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er defer sl.buffers.Put(b) buf := bytes.NewBuffer(b) - var total, added, seriesAdded int + var total, added, seriesAdded, bytes int var err, appErr, scrapeErr error app := sl.appender(sl.parentCtx) @@ -1254,7 +1254,7 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er }() defer func() { - if err = sl.report(app, appendTime, time.Since(start), total, added, seriesAdded, scrapeErr); err != nil { + if err = sl.report(app, appendTime, time.Since(start), total, added, seriesAdded, bytes, scrapeErr); err != nil { level.Warn(sl.l).Log("msg", "Appending scrape report failed", "err", err) } }() @@ -1287,11 +1287,15 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er if len(b) > 0 { sl.lastScrapeSize = len(b) } + bytes = len(b) } else { level.Debug(sl.l).Log("msg", "Scrape failed", "err", scrapeErr) if errc != nil { errc <- scrapeErr } + if errors.Is(scrapeErr, errBodySizeLimit) { + bytes = -1 + } } // A failed scrape is the same as an empty scrape, @@ -1638,16 +1642,17 @@ func (sl *scrapeLoop) checkAddExemplarError(err error, e exemplar.Exemplar, appE // The constants are suffixed with the invalid \xff unicode rune to avoid collisions // with scraped metrics in the cache. const ( - scrapeHealthMetricName = "up" + "\xff" - scrapeDurationMetricName = "scrape_duration_seconds" + "\xff" - scrapeSamplesMetricName = "scrape_samples_scraped" + "\xff" - samplesPostRelabelMetricName = "scrape_samples_post_metric_relabeling" + "\xff" - scrapeSeriesAddedMetricName = "scrape_series_added" + "\xff" - scrapeTimeoutMetricName = "scrape_timeout_seconds" + "\xff" - scrapeSampleLimitMetricName = "scrape_sample_limit" + "\xff" + scrapeHealthMetricName = "up" + "\xff" + scrapeDurationMetricName = "scrape_duration_seconds" + "\xff" + scrapeSamplesMetricName = "scrape_samples_scraped" + "\xff" + samplesPostRelabelMetricName = "scrape_samples_post_metric_relabeling" + "\xff" + scrapeSeriesAddedMetricName = "scrape_series_added" + "\xff" + scrapeTimeoutMetricName = "scrape_timeout_seconds" + "\xff" + scrapeSampleLimitMetricName = "scrape_sample_limit" + "\xff" + scrapeBodySizeBytesMetricName = "scrape_body_size_bytes" + "\xff" ) -func (sl *scrapeLoop) report(app storage.Appender, start time.Time, duration time.Duration, scraped, added, seriesAdded int, scrapeErr error) (err error) { +func (sl *scrapeLoop) report(app storage.Appender, start time.Time, duration time.Duration, scraped, added, seriesAdded, bytes int, scrapeErr error) (err error) { sl.scraper.Report(start, duration, scrapeErr) ts := timestamp.FromTime(start) @@ -1672,13 +1677,16 @@ func (sl *scrapeLoop) report(app storage.Appender, start time.Time, duration tim if err = sl.addReportSample(app, scrapeSeriesAddedMetricName, ts, float64(seriesAdded)); err != nil { return } - if sl.reportScrapeTimeout { + if sl.reportExtraMetrics { if err = sl.addReportSample(app, scrapeTimeoutMetricName, ts, sl.timeout.Seconds()); err != nil { return } if err = sl.addReportSample(app, scrapeSampleLimitMetricName, ts, float64(sl.sampleLimit)); err != nil { return } + if err = sl.addReportSample(app, scrapeBodySizeBytesMetricName, ts, float64(bytes)); err != nil { + return + } } return } @@ -1703,13 +1711,16 @@ func (sl *scrapeLoop) reportStale(app storage.Appender, start time.Time) (err er if err = sl.addReportSample(app, scrapeSeriesAddedMetricName, ts, stale); err != nil { return } - if sl.reportScrapeTimeout { + if sl.reportExtraMetrics { if err = sl.addReportSample(app, scrapeTimeoutMetricName, ts, stale); err != nil { return } if err = sl.addReportSample(app, scrapeSampleLimitMetricName, ts, stale); err != nil { return } + if err = sl.addReportSample(app, scrapeBodySizeBytesMetricName, ts, stale); err != nil { + return + } } return }