From bc9f9ee3aa5240646775c11028065dfc6a5d1de2 Mon Sep 17 00:00:00 2001 From: Julien Pivotto Date: Wed, 30 Dec 2020 02:06:41 +0100 Subject: [PATCH] Backfilling: fast-path for non-consecutive blocks (#8324) * Backfilling: optimize for non-consecutive blocks When you have missing data for > 2 hours, you spend a lot of time re-reading the complete file. It is not optimal. This introduces a fastpath for this scenario. Next, we do parse the metric even when we know we will not use it, based on its timestamp. This only computes the metric when we know its timestamp is right. Signed-off-by: Julien Pivotto --- cmd/promtool/backfill.go | 30 ++++++++++++--- cmd/promtool/backfill_test.go | 72 +++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 5 deletions(-) diff --git a/cmd/promtool/backfill.go b/cmd/promtool/backfill.go index b3c33208d..6e7a98c55 100644 --- a/cmd/promtool/backfill.go +++ b/cmd/promtool/backfill.go @@ -77,9 +77,21 @@ func createBlocks(input []byte, mint, maxt int64, maxSamplesInAppender int, outp returnErr = tsdb_errors.NewMulti(returnErr, db.Close()).Err() }() - var wroteHeader bool + var ( + wroteHeader bool + nextSampleTs int64 = math.MaxInt64 + ) for t := mint; t <= maxt; t = t + blockDuration { + tsUpper := t + blockDuration + if nextSampleTs != math.MaxInt64 && nextSampleTs >= tsUpper { + // The next sample is not in this timerange, we can avoid parsing + // the file for this timerange. + continue + + } + nextSampleTs = math.MaxInt64 + err := func() error { w, err := tsdb.NewBlockWriter(log.NewNopLogger(), outputDir, blockDuration) if err != nil { @@ -92,7 +104,6 @@ func createBlocks(input []byte, mint, maxt int64, maxSamplesInAppender int, outp ctx := context.Background() app := w.Appender(ctx) p := textparse.NewOpenMetricsParser(input) - tsUpper := t + blockDuration samplesCount := 0 for { e, err := p.Next() @@ -106,15 +117,24 @@ func createBlocks(input []byte, mint, maxt int64, maxSamplesInAppender int, outp continue } - l := labels.Labels{} - p.Metric(&l) _, ts, v := p.Series() if ts == nil { + l := labels.Labels{} + p.Metric(&l) return errors.Errorf("expected timestamp for series %v, got none", l) } - if *ts < t || *ts >= tsUpper { + if *ts < t { continue } + if *ts >= tsUpper { + if *ts < nextSampleTs { + nextSampleTs = *ts + } + continue + } + + l := labels.Labels{} + p.Metric(&l) if _, err := app.Add(l, *ts, v); err != nil { return errors.Wrap(err, "add sample") diff --git a/cmd/promtool/backfill_test.go b/cmd/promtool/backfill_test.go index 87f18d64b..56c3cf70c 100644 --- a/cmd/promtool/backfill_test.go +++ b/cmd/promtool/backfill_test.go @@ -150,6 +150,78 @@ http_requests_total{code="400"} 1 1565133713.990 ToParse: `# HELP http_requests_total The total number of HTTP requests. # TYPE http_requests_total counter http_requests_total{code="200"} 1021 1565133713.989 +http_requests_total{code="200"} 1022 1565392913.989 +http_requests_total{code="200"} 1023 1565652113.989 +# EOF +`, + IsOk: true, + Description: "Multiple samples separated by 3 days.", + MaxSamplesInAppender: 5000, + Expected: struct { + MinTime int64 + MaxTime int64 + NumBlocks int + Samples []backfillSample + }{ + MinTime: 1565133713989, + MaxTime: 1565652113989, + NumBlocks: 3, + Samples: []backfillSample{ + { + Timestamp: 1565133713989, + Value: 1021, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"), + }, + { + Timestamp: 1565392913989, + Value: 1022, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"), + }, + { + Timestamp: 1565652113989, + Value: 1023, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"), + }, + }, + }, + }, + { + ToParse: `# TYPE go info +go_info{version="go1.15.3"} 1 1565392913.989 +# TYPE http_requests_total counter +http_requests_total{code="200"} 1021 1565133713.989 +# EOF +`, + IsOk: true, + Description: "Unordered samples from multiple series, which end in different blocks.", + MaxSamplesInAppender: 5000, + Expected: struct { + MinTime int64 + MaxTime int64 + NumBlocks int + Samples []backfillSample + }{ + MinTime: 1565133713989, + MaxTime: 1565392913989, + NumBlocks: 2, + Samples: []backfillSample{ + { + Timestamp: 1565133713989, + Value: 1021, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"), + }, + { + Timestamp: 1565392913989, + Value: 1, + Labels: labels.FromStrings("__name__", "go_info", "version", "go1.15.3"), + }, + }, + }, + }, + { + ToParse: `# HELP http_requests_total The total number of HTTP requests. +# TYPE http_requests_total counter +http_requests_total{code="200"} 1021 1565133713.989 http_requests_total{code="200"} 1 1565133714.989 http_requests_total{code="400"} 2 1565133715.989 # EOF