mirror of
https://github.com/prometheus/prometheus.git
synced 2024-11-09 23:24:05 -08:00
Allow to tune the scrape tolerance (#9283)
* Allow to tune the scrape tolerance In most of the classic monitoring use cases, a few milliseconds difference can be omitted. In Prometheus, a few millisecond difference can however make a big difference. Currently, Prometheus will ignore up to 2 ms difference in the alignments. It turns out that for users who can afford a 10ms difference, there is a lot of resources and disk space to win, as shown in this graph, which shows the bytes / samples over a production Prometheus server. You can clearly see the switch from 2ms to 10ms tolerance. This pull request enables the adjustment of the scrape timestamp alignment tolerance. Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu> * Fix golint Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu>
This commit is contained in:
parent
2327236bb5
commit
48a101be1b
|
@ -295,9 +295,12 @@ func main() {
|
||||||
a.Flag("rules.alert.resend-delay", "Minimum amount of time to wait before resending an alert to Alertmanager.").
|
a.Flag("rules.alert.resend-delay", "Minimum amount of time to wait before resending an alert to Alertmanager.").
|
||||||
Default("1m").SetValue(&cfg.resendDelay)
|
Default("1m").SetValue(&cfg.resendDelay)
|
||||||
|
|
||||||
a.Flag("scrape.adjust-timestamps", "Adjust scrape timestamps by up to 2ms to align them to the intended schedule. See https://github.com/prometheus/prometheus/issues/7846 for more context. Experimental. This flag will be removed in a future release.").
|
a.Flag("scrape.adjust-timestamps", "Adjust scrape timestamps by up to `scrape.timestamp-tolerance` to align them to the intended schedule. See https://github.com/prometheus/prometheus/issues/7846 for more context. Experimental. This flag will be removed in a future release.").
|
||||||
Hidden().Default("true").BoolVar(&scrape.AlignScrapeTimestamps)
|
Hidden().Default("true").BoolVar(&scrape.AlignScrapeTimestamps)
|
||||||
|
|
||||||
|
a.Flag("scrape.timestamp-tolerance", "Timestamp tolerance. See https://github.com/prometheus/prometheus/issues/7846 for more context. Experimental. This flag will be removed in a future release.").
|
||||||
|
Hidden().Default("2ms").DurationVar(&scrape.ScrapeTimestampTolerance)
|
||||||
|
|
||||||
a.Flag("alertmanager.notification-queue-capacity", "The capacity of the queue for pending Alertmanager notifications.").
|
a.Flag("alertmanager.notification-queue-capacity", "The capacity of the queue for pending Alertmanager notifications.").
|
||||||
Default("10000").IntVar(&cfg.notifier.QueueCapacity)
|
Default("10000").IntVar(&cfg.notifier.QueueCapacity)
|
||||||
|
|
||||||
|
|
|
@ -49,10 +49,10 @@ import (
|
||||||
"github.com/prometheus/prometheus/storage"
|
"github.com/prometheus/prometheus/storage"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Temporary tolerance for scrape appends timestamps alignment, to enable better
|
// ScrapeTimestampTolerance is the tolerance for scrape appends timestamps
|
||||||
// compression at the TSDB level.
|
// alignment, to enable better compression at the TSDB level.
|
||||||
// See https://github.com/prometheus/prometheus/issues/7846
|
// See https://github.com/prometheus/prometheus/issues/7846
|
||||||
const scrapeTimestampTolerance = 2 * time.Millisecond
|
var ScrapeTimestampTolerance = 2 * time.Millisecond
|
||||||
|
|
||||||
// AlignScrapeTimestamps enables the tolerance for scrape appends timestamps described above.
|
// AlignScrapeTimestamps enables the tolerance for scrape appends timestamps described above.
|
||||||
var AlignScrapeTimestamps = true
|
var AlignScrapeTimestamps = true
|
||||||
|
@ -1159,14 +1159,14 @@ mainLoop:
|
||||||
// Calling Round ensures the time used is the wall clock, as otherwise .Sub
|
// Calling Round ensures the time used is the wall clock, as otherwise .Sub
|
||||||
// and .Add on time.Time behave differently (see time package docs).
|
// and .Add on time.Time behave differently (see time package docs).
|
||||||
scrapeTime := time.Now().Round(0)
|
scrapeTime := time.Now().Round(0)
|
||||||
if AlignScrapeTimestamps && sl.interval > 100*scrapeTimestampTolerance {
|
if AlignScrapeTimestamps && sl.interval > 100*ScrapeTimestampTolerance {
|
||||||
// For some reason, a tick might have been skipped, in which case we
|
// For some reason, a tick might have been skipped, in which case we
|
||||||
// would call alignedScrapeTime.Add(interval) multiple times.
|
// would call alignedScrapeTime.Add(interval) multiple times.
|
||||||
for scrapeTime.Sub(alignedScrapeTime) >= sl.interval {
|
for scrapeTime.Sub(alignedScrapeTime) >= sl.interval {
|
||||||
alignedScrapeTime = alignedScrapeTime.Add(sl.interval)
|
alignedScrapeTime = alignedScrapeTime.Add(sl.interval)
|
||||||
}
|
}
|
||||||
// Align the scrape time if we are in the tolerance boundaries.
|
// Align the scrape time if we are in the tolerance boundaries.
|
||||||
if scrapeTime.Sub(alignedScrapeTime) <= scrapeTimestampTolerance {
|
if scrapeTime.Sub(alignedScrapeTime) <= ScrapeTimestampTolerance {
|
||||||
scrapeTime = alignedScrapeTime
|
scrapeTime = alignedScrapeTime
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue