prometheus/cmd/promtool/backfill.go
Julien Pivotto bc9f9ee3aa
Backfilling: fast-path for non-consecutive blocks (#8324)
* Backfilling: optimize for non-consecutive blocks

When you have missing data for > 2 hours, you spend a lot of time
re-reading the complete file. It is not optimal.

This introduces a fastpath for this scenario.

Next, we do parse the metric even when we know we will not use it, based
on its timestamp. This only computes the metric when we know its
timestamp is right.

Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu>
2020-12-30 02:06:41 +01:00

201 lines
4.7 KiB
Go

// Copyright 2020 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"context"
"io"
"math"
"github.com/go-kit/kit/log"
"github.com/pkg/errors"
"github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/pkg/textparse"
"github.com/prometheus/prometheus/tsdb"
tsdb_errors "github.com/prometheus/prometheus/tsdb/errors"
)
func getMinAndMaxTimestamps(p textparse.Parser) (int64, int64, error) {
var maxt, mint int64 = math.MinInt64, math.MaxInt64
for {
entry, err := p.Next()
if err == io.EOF {
break
}
if err != nil {
return 0, 0, errors.Wrap(err, "next")
}
if entry != textparse.EntrySeries {
continue
}
_, ts, _ := p.Series()
if ts == nil {
return 0, 0, errors.Errorf("expected timestamp for series got none")
}
if *ts > maxt {
maxt = *ts
}
if *ts < mint {
mint = *ts
}
}
if maxt == math.MinInt64 {
maxt = 0
}
if mint == math.MaxInt64 {
mint = 0
}
return maxt, mint, nil
}
func createBlocks(input []byte, mint, maxt int64, maxSamplesInAppender int, outputDir string, humanReadable bool) (returnErr error) {
blockDuration := tsdb.DefaultBlockDuration
mint = blockDuration * (mint / blockDuration)
db, err := tsdb.OpenDBReadOnly(outputDir, nil)
if err != nil {
return err
}
defer func() {
returnErr = tsdb_errors.NewMulti(returnErr, db.Close()).Err()
}()
var (
wroteHeader bool
nextSampleTs int64 = math.MaxInt64
)
for t := mint; t <= maxt; t = t + blockDuration {
tsUpper := t + blockDuration
if nextSampleTs != math.MaxInt64 && nextSampleTs >= tsUpper {
// The next sample is not in this timerange, we can avoid parsing
// the file for this timerange.
continue
}
nextSampleTs = math.MaxInt64
err := func() error {
w, err := tsdb.NewBlockWriter(log.NewNopLogger(), outputDir, blockDuration)
if err != nil {
return errors.Wrap(err, "block writer")
}
defer func() {
err = tsdb_errors.NewMulti(err, w.Close()).Err()
}()
ctx := context.Background()
app := w.Appender(ctx)
p := textparse.NewOpenMetricsParser(input)
samplesCount := 0
for {
e, err := p.Next()
if err == io.EOF {
break
}
if err != nil {
return errors.Wrap(err, "parse")
}
if e != textparse.EntrySeries {
continue
}
_, ts, v := p.Series()
if ts == nil {
l := labels.Labels{}
p.Metric(&l)
return errors.Errorf("expected timestamp for series %v, got none", l)
}
if *ts < t {
continue
}
if *ts >= tsUpper {
if *ts < nextSampleTs {
nextSampleTs = *ts
}
continue
}
l := labels.Labels{}
p.Metric(&l)
if _, err := app.Add(l, *ts, v); err != nil {
return errors.Wrap(err, "add sample")
}
samplesCount++
if samplesCount < maxSamplesInAppender {
continue
}
// If we arrive here, the samples count is greater than the maxSamplesInAppender.
// Therefore the old appender is committed and a new one is created.
// This prevents keeping too many samples lined up in an appender and thus in RAM.
if err := app.Commit(); err != nil {
return errors.Wrap(err, "commit")
}
app = w.Appender(ctx)
samplesCount = 0
}
if err := app.Commit(); err != nil {
return errors.Wrap(err, "commit")
}
block, err := w.Flush(ctx)
switch err {
case nil:
blocks, err := db.Blocks()
if err != nil {
return errors.Wrap(err, "get blocks")
}
for _, b := range blocks {
if b.Meta().ULID == block {
printBlocks([]tsdb.BlockReader{b}, !wroteHeader, humanReadable)
wroteHeader = true
break
}
}
case tsdb.ErrNoSeriesAppended:
default:
return errors.Wrap(err, "flush")
}
return nil
}()
if err != nil {
return errors.Wrap(err, "process blocks")
}
}
return nil
}
func backfill(maxSamplesInAppender int, input []byte, outputDir string, humanReadable bool) (err error) {
p := textparse.NewOpenMetricsParser(input)
maxt, mint, err := getMinAndMaxTimestamps(p)
if err != nil {
return errors.Wrap(err, "getting min and max timestamp")
}
return errors.Wrap(createBlocks(input, mint, maxt, maxSamplesInAppender, outputDir, humanReadable), "block creation")
}