Add scrape_body_size_bytes metric (#9569)

Fixes #9520

Signed-off-by: Furkan <furkan.turkal@trendyol.com>
This commit is contained in:
Furkan Türkal 2021-10-25 00:45:31 +03:00 committed by GitHub
parent 5d409b0637
commit a6e6011d55
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 29 additions and 17 deletions

View file

@ -71,6 +71,7 @@ When enabled, for each instance scrape, Prometheus stores a sample in the follow
- `scrape_timeout_seconds`. The configured `scrape_timeout` for a target. This allows you to measure each target to find out how close they are to timing out with `scrape_duration_seconds / scrape_timeout_seconds`.
- `scrape_sample_limit`. The configured `sample_limit` for a target. This allows you to measure each target
to find out how close they are to reaching the limit with `scrape_samples_post_metric_relabeling / scrape_sample_limit`. Note that `scrape_sample_limit` can be zero if there is no limit configured, which means that the query above can return `+Inf` for targets with no limit (as we divide by zero). If you want to query only for targets that do have a sample limit use this query: `scrape_samples_post_metric_relabeling / (scrape_sample_limit > 0)`.
- `scrape_body_size_bytes`. The uncompressed size of the most recent scrape response, if successful. Scrapes failing because `body_size_limit` is exceeded report `-1`, other scrape failures report `0`.
## New service discovery manager

View file

@ -264,7 +264,7 @@ const maxAheadTime = 10 * time.Minute
type labelsMutator func(labels.Labels) labels.Labels
func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, jitterSeed uint64, logger log.Logger, reportScrapeTimeout bool) (*scrapePool, error) {
func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, jitterSeed uint64, logger log.Logger, reportExtraMetrics bool) (*scrapePool, error) {
targetScrapePools.Inc()
if logger == nil {
logger = log.NewNopLogger()
@ -313,7 +313,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, jitterSeed
opts.labelLimits,
opts.interval,
opts.timeout,
reportScrapeTimeout,
reportExtraMetrics,
)
}
@ -861,7 +861,7 @@ type scrapeLoop struct {
disabledEndOfRunStalenessMarkers bool
reportScrapeTimeout bool
reportExtraMetrics bool
}
// scrapeCache tracks mappings of exposed metric strings to label sets and
@ -1122,7 +1122,7 @@ func newScrapeLoop(ctx context.Context,
labelLimits *labelLimits,
interval time.Duration,
timeout time.Duration,
reportScrapeTimeout bool,
reportExtraMetrics bool,
) *scrapeLoop {
if l == nil {
l = log.NewNopLogger()
@ -1149,7 +1149,7 @@ func newScrapeLoop(ctx context.Context,
labelLimits: labelLimits,
interval: interval,
timeout: timeout,
reportScrapeTimeout: reportScrapeTimeout,
reportExtraMetrics: reportExtraMetrics,
}
sl.ctx, sl.cancel = context.WithCancel(ctx)
@ -1238,7 +1238,7 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er
defer sl.buffers.Put(b)
buf := bytes.NewBuffer(b)
var total, added, seriesAdded int
var total, added, seriesAdded, bytes int
var err, appErr, scrapeErr error
app := sl.appender(sl.parentCtx)
@ -1254,7 +1254,7 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er
}()
defer func() {
if err = sl.report(app, appendTime, time.Since(start), total, added, seriesAdded, scrapeErr); err != nil {
if err = sl.report(app, appendTime, time.Since(start), total, added, seriesAdded, bytes, scrapeErr); err != nil {
level.Warn(sl.l).Log("msg", "Appending scrape report failed", "err", err)
}
}()
@ -1287,11 +1287,15 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er
if len(b) > 0 {
sl.lastScrapeSize = len(b)
}
bytes = len(b)
} else {
level.Debug(sl.l).Log("msg", "Scrape failed", "err", scrapeErr)
if errc != nil {
errc <- scrapeErr
}
if errors.Is(scrapeErr, errBodySizeLimit) {
bytes = -1
}
}
// A failed scrape is the same as an empty scrape,
@ -1638,16 +1642,17 @@ func (sl *scrapeLoop) checkAddExemplarError(err error, e exemplar.Exemplar, appE
// The constants are suffixed with the invalid \xff unicode rune to avoid collisions
// with scraped metrics in the cache.
const (
scrapeHealthMetricName = "up" + "\xff"
scrapeDurationMetricName = "scrape_duration_seconds" + "\xff"
scrapeSamplesMetricName = "scrape_samples_scraped" + "\xff"
samplesPostRelabelMetricName = "scrape_samples_post_metric_relabeling" + "\xff"
scrapeSeriesAddedMetricName = "scrape_series_added" + "\xff"
scrapeTimeoutMetricName = "scrape_timeout_seconds" + "\xff"
scrapeSampleLimitMetricName = "scrape_sample_limit" + "\xff"
scrapeHealthMetricName = "up" + "\xff"
scrapeDurationMetricName = "scrape_duration_seconds" + "\xff"
scrapeSamplesMetricName = "scrape_samples_scraped" + "\xff"
samplesPostRelabelMetricName = "scrape_samples_post_metric_relabeling" + "\xff"
scrapeSeriesAddedMetricName = "scrape_series_added" + "\xff"
scrapeTimeoutMetricName = "scrape_timeout_seconds" + "\xff"
scrapeSampleLimitMetricName = "scrape_sample_limit" + "\xff"
scrapeBodySizeBytesMetricName = "scrape_body_size_bytes" + "\xff"
)
func (sl *scrapeLoop) report(app storage.Appender, start time.Time, duration time.Duration, scraped, added, seriesAdded int, scrapeErr error) (err error) {
func (sl *scrapeLoop) report(app storage.Appender, start time.Time, duration time.Duration, scraped, added, seriesAdded, bytes int, scrapeErr error) (err error) {
sl.scraper.Report(start, duration, scrapeErr)
ts := timestamp.FromTime(start)
@ -1672,13 +1677,16 @@ func (sl *scrapeLoop) report(app storage.Appender, start time.Time, duration tim
if err = sl.addReportSample(app, scrapeSeriesAddedMetricName, ts, float64(seriesAdded)); err != nil {
return
}
if sl.reportScrapeTimeout {
if sl.reportExtraMetrics {
if err = sl.addReportSample(app, scrapeTimeoutMetricName, ts, sl.timeout.Seconds()); err != nil {
return
}
if err = sl.addReportSample(app, scrapeSampleLimitMetricName, ts, float64(sl.sampleLimit)); err != nil {
return
}
if err = sl.addReportSample(app, scrapeBodySizeBytesMetricName, ts, float64(bytes)); err != nil {
return
}
}
return
}
@ -1703,13 +1711,16 @@ func (sl *scrapeLoop) reportStale(app storage.Appender, start time.Time) (err er
if err = sl.addReportSample(app, scrapeSeriesAddedMetricName, ts, stale); err != nil {
return
}
if sl.reportScrapeTimeout {
if sl.reportExtraMetrics {
if err = sl.addReportSample(app, scrapeTimeoutMetricName, ts, stale); err != nil {
return
}
if err = sl.addReportSample(app, scrapeSampleLimitMetricName, ts, stale); err != nil {
return
}
if err = sl.addReportSample(app, scrapeBodySizeBytesMetricName, ts, stale); err != nil {
return
}
}
return
}