Backfilling: fast-path for non-consecutive blocks (#8324)

* Backfilling: optimize for non-consecutive blocks

When you have missing data for > 2 hours, you spend a lot of time
re-reading the complete file. It is not optimal.

This introduces a fastpath for this scenario.

Next, we do parse the metric even when we know we will not use it, based
on its timestamp. This only computes the metric when we know its
timestamp is right.

Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu>
This commit is contained in:
Julien Pivotto 2020-12-30 02:06:41 +01:00 committed by GitHub
parent 0de83bfa14
commit bc9f9ee3aa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 97 additions and 5 deletions

View file

@ -77,9 +77,21 @@ func createBlocks(input []byte, mint, maxt int64, maxSamplesInAppender int, outp
returnErr = tsdb_errors.NewMulti(returnErr, db.Close()).Err() returnErr = tsdb_errors.NewMulti(returnErr, db.Close()).Err()
}() }()
var wroteHeader bool var (
wroteHeader bool
nextSampleTs int64 = math.MaxInt64
)
for t := mint; t <= maxt; t = t + blockDuration { for t := mint; t <= maxt; t = t + blockDuration {
tsUpper := t + blockDuration
if nextSampleTs != math.MaxInt64 && nextSampleTs >= tsUpper {
// The next sample is not in this timerange, we can avoid parsing
// the file for this timerange.
continue
}
nextSampleTs = math.MaxInt64
err := func() error { err := func() error {
w, err := tsdb.NewBlockWriter(log.NewNopLogger(), outputDir, blockDuration) w, err := tsdb.NewBlockWriter(log.NewNopLogger(), outputDir, blockDuration)
if err != nil { if err != nil {
@ -92,7 +104,6 @@ func createBlocks(input []byte, mint, maxt int64, maxSamplesInAppender int, outp
ctx := context.Background() ctx := context.Background()
app := w.Appender(ctx) app := w.Appender(ctx)
p := textparse.NewOpenMetricsParser(input) p := textparse.NewOpenMetricsParser(input)
tsUpper := t + blockDuration
samplesCount := 0 samplesCount := 0
for { for {
e, err := p.Next() e, err := p.Next()
@ -106,15 +117,24 @@ func createBlocks(input []byte, mint, maxt int64, maxSamplesInAppender int, outp
continue continue
} }
l := labels.Labels{}
p.Metric(&l)
_, ts, v := p.Series() _, ts, v := p.Series()
if ts == nil { if ts == nil {
l := labels.Labels{}
p.Metric(&l)
return errors.Errorf("expected timestamp for series %v, got none", l) return errors.Errorf("expected timestamp for series %v, got none", l)
} }
if *ts < t || *ts >= tsUpper { if *ts < t {
continue continue
} }
if *ts >= tsUpper {
if *ts < nextSampleTs {
nextSampleTs = *ts
}
continue
}
l := labels.Labels{}
p.Metric(&l)
if _, err := app.Add(l, *ts, v); err != nil { if _, err := app.Add(l, *ts, v); err != nil {
return errors.Wrap(err, "add sample") return errors.Wrap(err, "add sample")

View file

@ -150,6 +150,78 @@ http_requests_total{code="400"} 1 1565133713.990
ToParse: `# HELP http_requests_total The total number of HTTP requests. ToParse: `# HELP http_requests_total The total number of HTTP requests.
# TYPE http_requests_total counter # TYPE http_requests_total counter
http_requests_total{code="200"} 1021 1565133713.989 http_requests_total{code="200"} 1021 1565133713.989
http_requests_total{code="200"} 1022 1565392913.989
http_requests_total{code="200"} 1023 1565652113.989
# EOF
`,
IsOk: true,
Description: "Multiple samples separated by 3 days.",
MaxSamplesInAppender: 5000,
Expected: struct {
MinTime int64
MaxTime int64
NumBlocks int
Samples []backfillSample
}{
MinTime: 1565133713989,
MaxTime: 1565652113989,
NumBlocks: 3,
Samples: []backfillSample{
{
Timestamp: 1565133713989,
Value: 1021,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"),
},
{
Timestamp: 1565392913989,
Value: 1022,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"),
},
{
Timestamp: 1565652113989,
Value: 1023,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"),
},
},
},
},
{
ToParse: `# TYPE go info
go_info{version="go1.15.3"} 1 1565392913.989
# TYPE http_requests_total counter
http_requests_total{code="200"} 1021 1565133713.989
# EOF
`,
IsOk: true,
Description: "Unordered samples from multiple series, which end in different blocks.",
MaxSamplesInAppender: 5000,
Expected: struct {
MinTime int64
MaxTime int64
NumBlocks int
Samples []backfillSample
}{
MinTime: 1565133713989,
MaxTime: 1565392913989,
NumBlocks: 2,
Samples: []backfillSample{
{
Timestamp: 1565133713989,
Value: 1021,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"),
},
{
Timestamp: 1565392913989,
Value: 1,
Labels: labels.FromStrings("__name__", "go_info", "version", "go1.15.3"),
},
},
},
},
{
ToParse: `# HELP http_requests_total The total number of HTTP requests.
# TYPE http_requests_total counter
http_requests_total{code="200"} 1021 1565133713.989
http_requests_total{code="200"} 1 1565133714.989 http_requests_total{code="200"} 1 1565133714.989
http_requests_total{code="400"} 2 1565133715.989 http_requests_total{code="400"} 2 1565133715.989
# EOF # EOF