promtool: backfill: allow configuring block duration (#8919)

* promtool: backfill: allow configuring block duration

When backfilling large amounts of data across long periods of time, it
may in certain circumstances be useful to use a longer block duration to
increase the efficiency and speed of the backfilling process. This patch
adds a flag --block-duration-power to allow a user to choose the power N
where the block duration is 2^(N+1)h.

Signed-off-by: Steve Kuznetsov <skuznets@redhat.com>

* promtool: use sub-tests in backfill testing

Signed-off-by: Steve Kuznetsov <skuznets@redhat.com>

* backfill: add messages to tests for clarity

When someone new breaks a test, seeing "expected: false, got: true" is
really not useful. A nice message helps here.

Signed-off-by: Steve Kuznetsov <skuznets@redhat.com>

* backfill: test long block durations

A test that uses a long block duration to write bigger blocks is added.
The check to make sure all blocks are the default duration is removed.

Signed-off-by: Steve Kuznetsov <skuznets@redhat.com>
This commit is contained in:
Steve Kuznetsov 2021-06-29 02:23:38 -07:00 committed by GitHub
parent 664b391573
commit fd6c852567
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 291 additions and 114 deletions

View file

@ -17,6 +17,7 @@ import (
"context"
"io"
"math"
"time"
"github.com/go-kit/log"
"github.com/pkg/errors"
@ -65,8 +66,19 @@ func getMinAndMaxTimestamps(p textparse.Parser) (int64, int64, error) {
return maxt, mint, nil
}
func createBlocks(input []byte, mint, maxt int64, maxSamplesInAppender int, outputDir string, humanReadable, quiet bool) (returnErr error) {
func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesInAppender int, outputDir string, humanReadable, quiet bool) (returnErr error) {
blockDuration := tsdb.DefaultBlockDuration
if maxBlockDuration > tsdb.DefaultBlockDuration {
ranges := tsdb.ExponentialBlockRanges(tsdb.DefaultBlockDuration, 10, 3)
idx := len(ranges) - 1 // Use largest range if user asked for something enormous.
for i, v := range ranges {
if v > maxBlockDuration {
idx = i - 1
break
}
}
blockDuration = ranges[idx]
}
mint = blockDuration * (mint / blockDuration)
db, err := tsdb.OpenDBReadOnly(outputDir, nil)
@ -199,11 +211,11 @@ func createBlocks(input []byte, mint, maxt int64, maxSamplesInAppender int, outp
}
func backfill(maxSamplesInAppender int, input []byte, outputDir string, humanReadable, quiet bool) (err error) {
func backfill(maxSamplesInAppender int, input []byte, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration) (err error) {
p := textparse.NewOpenMetricsParser(input)
maxt, mint, err := getMinAndMaxTimestamps(p)
if err != nil {
return errors.Wrap(err, "getting min and max timestamp")
}
return errors.Wrap(createBlocks(input, mint, maxt, maxSamplesInAppender, outputDir, humanReadable, quiet), "block creation")
return errors.Wrap(createBlocks(input, mint, maxt, int64(maxBlockDuration/time.Millisecond), maxSamplesInAppender, outputDir, humanReadable, quiet), "block creation")
}

View file

@ -20,6 +20,7 @@ import (
"os"
"sort"
"testing"
"time"
"github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/storage"
@ -58,12 +59,12 @@ func queryAllSeries(t testing.TB, q storage.Querier, expectedMinTime, expectedMa
return samples
}
func testBlocks(t *testing.T, db *tsdb.DB, expectedMinTime, expectedMaxTime int64, expectedSamples []backfillSample, expectedNumBlocks int) {
func testBlocks(t *testing.T, db *tsdb.DB, expectedMinTime, expectedMaxTime, expectedBlockDuration int64, expectedSamples []backfillSample, expectedNumBlocks int) {
blocks := db.Blocks()
require.Equal(t, expectedNumBlocks, len(blocks))
require.Equal(t, expectedNumBlocks, len(blocks), "did not create correct number of blocks")
for _, block := range blocks {
require.Equal(t, true, block.MinTime()/tsdb.DefaultBlockDuration == (block.MaxTime()-1)/tsdb.DefaultBlockDuration)
for i, block := range blocks {
require.Equal(t, block.MinTime()/expectedBlockDuration, (block.MaxTime()-1)/expectedBlockDuration, "block %d contains data outside of one aligned block duration", i)
}
q, err := db.Querier(context.Background(), math.MinInt64, math.MaxInt64)
@ -75,11 +76,11 @@ func testBlocks(t *testing.T, db *tsdb.DB, expectedMinTime, expectedMaxTime int6
allSamples := queryAllSeries(t, q, expectedMinTime, expectedMaxTime)
sortSamples(allSamples)
sortSamples(expectedSamples)
require.Equal(t, expectedSamples, allSamples)
require.Equal(t, expectedSamples, allSamples, "did not create correct samples")
if len(allSamples) > 0 {
require.Equal(t, expectedMinTime, allSamples[0].Timestamp)
require.Equal(t, expectedMaxTime, allSamples[len(allSamples)-1].Timestamp)
require.Equal(t, expectedMinTime, allSamples[0].Timestamp, "timestamp of first sample is not the expected minimum time")
require.Equal(t, expectedMaxTime, allSamples[len(allSamples)-1].Timestamp, "timestamp of last sample is not the expected maximum time")
}
}
@ -89,11 +90,13 @@ func TestBackfill(t *testing.T) {
IsOk bool
Description string
MaxSamplesInAppender int
MaxBlockDuration time.Duration
Expected struct {
MinTime int64
MaxTime int64
NumBlocks int
Samples []backfillSample
MinTime int64
MaxTime int64
NumBlocks int
BlockDuration int64
Samples []backfillSample
}
}{
{
@ -102,15 +105,17 @@ func TestBackfill(t *testing.T) {
Description: "Empty file.",
MaxSamplesInAppender: 5000,
Expected: struct {
MinTime int64
MaxTime int64
NumBlocks int
Samples []backfillSample
MinTime int64
MaxTime int64
NumBlocks int
BlockDuration int64
Samples []backfillSample
}{
MinTime: math.MaxInt64,
MaxTime: math.MinInt64,
NumBlocks: 0,
Samples: []backfillSample{},
MinTime: math.MaxInt64,
MaxTime: math.MinInt64,
NumBlocks: 0,
BlockDuration: tsdb.DefaultBlockDuration,
Samples: []backfillSample{},
},
},
{
@ -124,14 +129,16 @@ http_requests_total{code="400"} 1 1565133713.990
Description: "Multiple samples with different timestamp for different series.",
MaxSamplesInAppender: 5000,
Expected: struct {
MinTime int64
MaxTime int64
NumBlocks int
Samples []backfillSample
MinTime int64
MaxTime int64
NumBlocks int
BlockDuration int64
Samples []backfillSample
}{
MinTime: 1565133713989,
MaxTime: 1565133713990,
NumBlocks: 1,
MinTime: 1565133713989,
MaxTime: 1565133713990,
NumBlocks: 1,
BlockDuration: tsdb.DefaultBlockDuration,
Samples: []backfillSample{
{
Timestamp: 1565133713989,
@ -158,14 +165,16 @@ http_requests_total{code="200"} 1023 1565652113.989
Description: "Multiple samples separated by 3 days.",
MaxSamplesInAppender: 5000,
Expected: struct {
MinTime int64
MaxTime int64
NumBlocks int
Samples []backfillSample
MinTime int64
MaxTime int64
NumBlocks int
BlockDuration int64
Samples []backfillSample
}{
MinTime: 1565133713989,
MaxTime: 1565652113989,
NumBlocks: 3,
MinTime: 1565133713989,
MaxTime: 1565652113989,
NumBlocks: 3,
BlockDuration: tsdb.DefaultBlockDuration,
Samples: []backfillSample{
{
Timestamp: 1565133713989,
@ -196,14 +205,16 @@ http_requests_total{code="200"} 1021 1565133713.989
Description: "Unordered samples from multiple series, which end in different blocks.",
MaxSamplesInAppender: 5000,
Expected: struct {
MinTime int64
MaxTime int64
NumBlocks int
Samples []backfillSample
MinTime int64
MaxTime int64
NumBlocks int
BlockDuration int64
Samples []backfillSample
}{
MinTime: 1565133713989,
MaxTime: 1565392913989,
NumBlocks: 2,
MinTime: 1565133713989,
MaxTime: 1565392913989,
NumBlocks: 2,
BlockDuration: tsdb.DefaultBlockDuration,
Samples: []backfillSample{
{
Timestamp: 1565133713989,
@ -230,14 +241,16 @@ http_requests_total{code="400"} 2 1565133715.989
Description: "Multiple samples with different timestamp for the same series.",
MaxSamplesInAppender: 5000,
Expected: struct {
MinTime int64
MaxTime int64
NumBlocks int
Samples []backfillSample
MinTime int64
MaxTime int64
NumBlocks int
BlockDuration int64
Samples []backfillSample
}{
MinTime: 1565133713989,
MaxTime: 1565133715989,
NumBlocks: 1,
MinTime: 1565133713989,
MaxTime: 1565133715989,
NumBlocks: 1,
BlockDuration: tsdb.DefaultBlockDuration,
Samples: []backfillSample{
{
Timestamp: 1565133713989,
@ -260,6 +273,132 @@ http_requests_total{code="400"} 2 1565133715.989
{
ToParse: `# HELP http_requests_total The total number of HTTP requests.
# TYPE http_requests_total counter
http_requests_total{code="200"} 1021 1624463088.000
http_requests_total{code="200"} 1 1627055153.000
http_requests_total{code="400"} 2 1627056153.000
# EOF
`,
IsOk: true,
Description: "Long maximum block duration puts all data into one block.",
MaxSamplesInAppender: 5000,
MaxBlockDuration: 2048 * time.Hour,
Expected: struct {
MinTime int64
MaxTime int64
NumBlocks int
BlockDuration int64
Samples []backfillSample
}{
MinTime: 1624463088000,
MaxTime: 1627056153000,
NumBlocks: 1,
BlockDuration: int64(1458 * time.Hour / time.Millisecond),
Samples: []backfillSample{
{
Timestamp: 1624463088000,
Value: 1021,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"),
},
{
Timestamp: 1627055153000,
Value: 1,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"),
},
{
Timestamp: 1627056153000,
Value: 2,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "400"),
},
},
},
},
{
ToParse: `# HELP http_requests_total The total number of HTTP requests.
# TYPE http_requests_total counter
http_requests_total{code="200"} 1 1624463088.000
http_requests_total{code="200"} 2 1629503088.000
http_requests_total{code="200"} 3 1629863088.000
# EOF
`,
IsOk: true,
Description: "Long maximum block duration puts all data into two blocks.",
MaxSamplesInAppender: 5000,
MaxBlockDuration: 2048 * time.Hour,
Expected: struct {
MinTime int64
MaxTime int64
NumBlocks int
BlockDuration int64
Samples []backfillSample
}{
MinTime: 1624463088000,
MaxTime: 1629863088000,
NumBlocks: 2,
BlockDuration: int64(1458 * time.Hour / time.Millisecond),
Samples: []backfillSample{
{
Timestamp: 1624463088000,
Value: 1,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"),
},
{
Timestamp: 1629503088000,
Value: 2,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"),
},
{
Timestamp: 1629863088000,
Value: 3,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"),
},
},
},
},
{
ToParse: `# HELP http_requests_total The total number of HTTP requests.
# TYPE http_requests_total counter
http_requests_total{code="200"} 1 1624463088.000
http_requests_total{code="200"} 2 1765943088.000
http_requests_total{code="200"} 3 1768463088.000
# EOF
`,
IsOk: true,
Description: "Maximum block duration longer than longest possible duration, uses largest duration, puts all data into two blocks.",
MaxSamplesInAppender: 5000,
MaxBlockDuration: 200000 * time.Hour,
Expected: struct {
MinTime int64
MaxTime int64
NumBlocks int
BlockDuration int64
Samples []backfillSample
}{
MinTime: 1624463088000,
MaxTime: 1768463088000,
NumBlocks: 2,
BlockDuration: int64(39366 * time.Hour / time.Millisecond),
Samples: []backfillSample{
{
Timestamp: 1624463088000,
Value: 1,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"),
},
{
Timestamp: 1765943088000,
Value: 2,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"),
},
{
Timestamp: 1768463088000,
Value: 3,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"),
},
},
},
},
{
ToParse: `# HELP http_requests_total The total number of HTTP requests.
# TYPE http_requests_total counter
http_requests_total{code="200"} 1021 1565133713.989
http_requests_total{code="200"} 1022 1565144513.989
http_requests_total{code="400"} 2 1565155313.989
@ -270,14 +409,16 @@ http_requests_total{code="400"} 1 1565166113.989
Description: "Multiple samples that end up in different blocks.",
MaxSamplesInAppender: 5000,
Expected: struct {
MinTime int64
MaxTime int64
NumBlocks int
Samples []backfillSample
MinTime int64
MaxTime int64
NumBlocks int
BlockDuration int64
Samples []backfillSample
}{
MinTime: 1565133713989,
MaxTime: 1565166113989,
NumBlocks: 4,
MinTime: 1565133713989,
MaxTime: 1565166113989,
NumBlocks: 4,
BlockDuration: tsdb.DefaultBlockDuration,
Samples: []backfillSample{
{
Timestamp: 1565133713989,
@ -318,14 +459,16 @@ http_requests_total{code="400"} 1 1565166113.989
Description: "Number of samples are greater than the sample batch size.",
MaxSamplesInAppender: 2,
Expected: struct {
MinTime int64
MaxTime int64
NumBlocks int
Samples []backfillSample
MinTime int64
MaxTime int64
NumBlocks int
BlockDuration int64
Samples []backfillSample
}{
MinTime: 1565133713989,
MaxTime: 1565166113989,
NumBlocks: 4,
MinTime: 1565133713989,
MaxTime: 1565166113989,
NumBlocks: 4,
BlockDuration: tsdb.DefaultBlockDuration,
Samples: []backfillSample{
{
Timestamp: 1565133713989,
@ -378,14 +521,16 @@ http_requests_total{code="400"} 1024 7199
Description: "One series spanning 2h in same block should not cause problems to other series.",
MaxSamplesInAppender: 1,
Expected: struct {
MinTime int64
MaxTime int64
NumBlocks int
Samples []backfillSample
MinTime int64
MaxTime int64
NumBlocks int
BlockDuration int64
Samples []backfillSample
}{
MinTime: 0,
MaxTime: 7199000,
NumBlocks: 1,
MinTime: 0,
MaxTime: 7199000,
NumBlocks: 1,
BlockDuration: tsdb.DefaultBlockDuration,
Samples: []backfillSample{
{
Timestamp: 0,
@ -418,14 +563,16 @@ http_requests_total{code="400"} 1024 7199
Description: "Sample with no #HELP or #TYPE keyword.",
MaxSamplesInAppender: 5000,
Expected: struct {
MinTime int64
MaxTime int64
NumBlocks int
Samples []backfillSample
MinTime int64
MaxTime int64
NumBlocks int
BlockDuration int64
Samples []backfillSample
}{
MinTime: 6900000,
MaxTime: 6900000,
NumBlocks: 1,
MinTime: 6900000,
MaxTime: 6900000,
NumBlocks: 1,
BlockDuration: tsdb.DefaultBlockDuration,
Samples: []backfillSample{
{
Timestamp: 6900000,
@ -442,14 +589,16 @@ http_requests_total{code="400"} 1024 7199
Description: "Sample without newline after # EOF.",
MaxSamplesInAppender: 5000,
Expected: struct {
MinTime int64
MaxTime int64
NumBlocks int
Samples []backfillSample
MinTime int64
MaxTime int64
NumBlocks int
BlockDuration int64
Samples []backfillSample
}{
MinTime: 6900000,
MaxTime: 6900000,
NumBlocks: 1,
MinTime: 6900000,
MaxTime: 6900000,
NumBlocks: 1,
BlockDuration: tsdb.DefaultBlockDuration,
Samples: []backfillSample{
{
Timestamp: 6900000,
@ -467,14 +616,16 @@ http_requests_total{code="400"} 1024 7199
Description: "Bare sample.",
MaxSamplesInAppender: 5000,
Expected: struct {
MinTime int64
MaxTime int64
NumBlocks int
Samples []backfillSample
MinTime int64
MaxTime int64
NumBlocks int
BlockDuration int64
Samples []backfillSample
}{
MinTime: 1001000,
MaxTime: 1001000,
NumBlocks: 1,
MinTime: 1001000,
MaxTime: 1001000,
NumBlocks: 1,
BlockDuration: tsdb.DefaultBlockDuration,
Samples: []backfillSample{
{
Timestamp: 1001000,
@ -532,28 +683,32 @@ after_eof 1 2
},
}
for _, test := range tests {
t.Logf("Test:%s", test.Description)
t.Run(test.Description, func(t *testing.T) {
t.Logf("Test:%s", test.Description)
outputDir, err := ioutil.TempDir("", "myDir")
require.NoError(t, err)
defer func() {
require.NoError(t, os.RemoveAll(outputDir))
}()
outputDir, err := ioutil.TempDir("", "myDir")
require.NoError(t, err)
defer func() {
require.NoError(t, os.RemoveAll(outputDir))
}()
err = backfill(test.MaxSamplesInAppender, []byte(test.ToParse), outputDir, false, false)
err = backfill(test.MaxSamplesInAppender, []byte(test.ToParse), outputDir, false, false, test.MaxBlockDuration)
if !test.IsOk {
require.Error(t, err, test.Description)
continue
}
if !test.IsOk {
require.Error(t, err, test.Description)
return
}
require.NoError(t, err)
db, err := tsdb.Open(outputDir, nil, nil, tsdb.DefaultOptions(), nil)
require.NoError(t, err)
defer func() {
require.NoError(t, db.Close())
}()
require.NoError(t, err)
options := tsdb.DefaultOptions()
options.RetentionDuration = int64(10 * 365 * 24 * time.Hour / time.Millisecond) // maximum duration tests require a long retention
db, err := tsdb.Open(outputDir, nil, nil, options, nil)
require.NoError(t, err)
defer func() {
require.NoError(t, db.Close())
}()
testBlocks(t, db, test.Expected.MinTime, test.Expected.MaxTime, test.Expected.Samples, test.Expected.NumBlocks)
testBlocks(t, db, test.Expected.MinTime, test.Expected.MaxTime, test.Expected.BlockDuration, test.Expected.Samples, test.Expected.NumBlocks)
})
}
}

View file

@ -145,8 +145,8 @@ func main() {
importCmd := tsdbCmd.Command("create-blocks-from", "[Experimental] Import samples from input and produce TSDB blocks. Please refer to the storage docs for more details.")
importHumanReadable := importCmd.Flag("human-readable", "Print human readable values.").Short('r').Bool()
importQuiet := importCmd.Flag("quiet", "Do not print created blocks.").Short('q').Bool()
maxBlockDuration := importCmd.Flag("max-block-duration", "Maximum duration created blocks may span. Anything less than 2h is ignored.").Hidden().PlaceHolder("<duration>").Duration()
openMetricsImportCmd := importCmd.Command("openmetrics", "Import samples from OpenMetrics input and produce TSDB blocks. Please refer to the storage docs for more details.")
// TODO(aSquare14): add flag to set default block duration
importFilePath := openMetricsImportCmd.Arg("input file", "OpenMetrics file to read samples from.").Required().String()
importDBPath := openMetricsImportCmd.Arg("output directory", "Output directory for generated blocks.").Default(defaultDBPath).String()
importRulesCmd := importCmd.Command("rules", "Create blocks of data for new recording rules.")
@ -222,7 +222,7 @@ func main() {
os.Exit(checkErr(dumpSamples(*dumpPath, *dumpMinTime, *dumpMaxTime)))
//TODO(aSquare14): Work on adding support for custom block size.
case openMetricsImportCmd.FullCommand():
os.Exit(backfillOpenMetrics(*importFilePath, *importDBPath, *importHumanReadable, *importQuiet))
os.Exit(backfillOpenMetrics(*importFilePath, *importDBPath, *importHumanReadable, *importQuiet, *maxBlockDuration))
case importRulesCmd.FullCommand():
os.Exit(checkErr(importRules(*importRulesURL, *importRulesStart, *importRulesEnd, *importRulesOutputDir, *importRulesEvalInterval, *importRulesFiles...)))

View file

@ -611,7 +611,7 @@ func checkErr(err error) int {
return 0
}
func backfillOpenMetrics(path string, outputDir string, humanReadable, quiet bool) int {
func backfillOpenMetrics(path string, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration) int {
inputFile, err := fileutil.OpenMmapFile(path)
if err != nil {
return checkErr(err)
@ -622,5 +622,5 @@ func backfillOpenMetrics(path string, outputDir string, humanReadable, quiet boo
return checkErr(errors.Wrap(err, "create output dir"))
}
return checkErr(backfill(5000, inputFile.Bytes(), outputDir, humanReadable, quiet))
return checkErr(backfill(5000, inputFile.Bytes(), outputDir, humanReadable, quiet, maxBlockDuration))
}

View file

@ -157,6 +157,16 @@ promtool tsdb create-blocks-from openmetrics <input file> [<output directory>]
After the creation of the blocks, move it to the data directory of Prometheus. If there is an overlap with the existing blocks in Prometheus, the flag `--storage.tsdb.allow-overlapping-blocks` needs to be set. Note that any backfilled data is subject to the retention configured for your Prometheus server (by time or size).
#### Longer Block Durations
By default, the promtool will use the default block duration (2h) for the blocks; this behavior is the most generally applicable and correct. However, when backfilling data over a long range of times, it may be advantageous to use a larger value for the block duration to backfill faster and prevent additional compactions by TSDB later.
The `--max-block-duration` flag allows the user to configure a maximum duration of blocks. The backfilling tool will pick a suitable block duration no larger than this.
While larger blocks may improve the performance of backfilling large datasets, drawbacks exist as well. Time-based retention policies must keep the entire block around if even one sample of the (potentially large) block is still within the retention policy. Conversely, size-based retention policies will remove the entire block even if the TSDB only goes over the size limit in a minor way.
Therefore, backfilling with few blocks, thereby choosing a larger block duration, must be done with care and is not recommended for any production instances.
## Backfilling for Recording Rules
### Overview