diff --git a/cmd/promtool/backfill.go b/cmd/promtool/backfill.go index 76e5a82df..3d05260e3 100644 --- a/cmd/promtool/backfill.go +++ b/cmd/promtool/backfill.go @@ -17,6 +17,7 @@ import ( "context" "io" "math" + "time" "github.com/go-kit/log" "github.com/pkg/errors" @@ -65,8 +66,19 @@ func getMinAndMaxTimestamps(p textparse.Parser) (int64, int64, error) { return maxt, mint, nil } -func createBlocks(input []byte, mint, maxt int64, maxSamplesInAppender int, outputDir string, humanReadable, quiet bool) (returnErr error) { +func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesInAppender int, outputDir string, humanReadable, quiet bool) (returnErr error) { blockDuration := tsdb.DefaultBlockDuration + if maxBlockDuration > tsdb.DefaultBlockDuration { + ranges := tsdb.ExponentialBlockRanges(tsdb.DefaultBlockDuration, 10, 3) + idx := len(ranges) - 1 // Use largest range if user asked for something enormous. + for i, v := range ranges { + if v > maxBlockDuration { + idx = i - 1 + break + } + } + blockDuration = ranges[idx] + } mint = blockDuration * (mint / blockDuration) db, err := tsdb.OpenDBReadOnly(outputDir, nil) @@ -199,11 +211,11 @@ func createBlocks(input []byte, mint, maxt int64, maxSamplesInAppender int, outp } -func backfill(maxSamplesInAppender int, input []byte, outputDir string, humanReadable, quiet bool) (err error) { +func backfill(maxSamplesInAppender int, input []byte, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration) (err error) { p := textparse.NewOpenMetricsParser(input) maxt, mint, err := getMinAndMaxTimestamps(p) if err != nil { return errors.Wrap(err, "getting min and max timestamp") } - return errors.Wrap(createBlocks(input, mint, maxt, maxSamplesInAppender, outputDir, humanReadable, quiet), "block creation") + return errors.Wrap(createBlocks(input, mint, maxt, int64(maxBlockDuration/time.Millisecond), maxSamplesInAppender, outputDir, humanReadable, quiet), "block creation") } diff --git a/cmd/promtool/backfill_test.go b/cmd/promtool/backfill_test.go index bd0109da1..4bbb5602c 100644 --- a/cmd/promtool/backfill_test.go +++ b/cmd/promtool/backfill_test.go @@ -20,6 +20,7 @@ import ( "os" "sort" "testing" + "time" "github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/storage" @@ -58,12 +59,12 @@ func queryAllSeries(t testing.TB, q storage.Querier, expectedMinTime, expectedMa return samples } -func testBlocks(t *testing.T, db *tsdb.DB, expectedMinTime, expectedMaxTime int64, expectedSamples []backfillSample, expectedNumBlocks int) { +func testBlocks(t *testing.T, db *tsdb.DB, expectedMinTime, expectedMaxTime, expectedBlockDuration int64, expectedSamples []backfillSample, expectedNumBlocks int) { blocks := db.Blocks() - require.Equal(t, expectedNumBlocks, len(blocks)) + require.Equal(t, expectedNumBlocks, len(blocks), "did not create correct number of blocks") - for _, block := range blocks { - require.Equal(t, true, block.MinTime()/tsdb.DefaultBlockDuration == (block.MaxTime()-1)/tsdb.DefaultBlockDuration) + for i, block := range blocks { + require.Equal(t, block.MinTime()/expectedBlockDuration, (block.MaxTime()-1)/expectedBlockDuration, "block %d contains data outside of one aligned block duration", i) } q, err := db.Querier(context.Background(), math.MinInt64, math.MaxInt64) @@ -75,11 +76,11 @@ func testBlocks(t *testing.T, db *tsdb.DB, expectedMinTime, expectedMaxTime int6 allSamples := queryAllSeries(t, q, expectedMinTime, expectedMaxTime) sortSamples(allSamples) sortSamples(expectedSamples) - require.Equal(t, expectedSamples, allSamples) + require.Equal(t, expectedSamples, allSamples, "did not create correct samples") if len(allSamples) > 0 { - require.Equal(t, expectedMinTime, allSamples[0].Timestamp) - require.Equal(t, expectedMaxTime, allSamples[len(allSamples)-1].Timestamp) + require.Equal(t, expectedMinTime, allSamples[0].Timestamp, "timestamp of first sample is not the expected minimum time") + require.Equal(t, expectedMaxTime, allSamples[len(allSamples)-1].Timestamp, "timestamp of last sample is not the expected maximum time") } } @@ -89,11 +90,13 @@ func TestBackfill(t *testing.T) { IsOk bool Description string MaxSamplesInAppender int + MaxBlockDuration time.Duration Expected struct { - MinTime int64 - MaxTime int64 - NumBlocks int - Samples []backfillSample + MinTime int64 + MaxTime int64 + NumBlocks int + BlockDuration int64 + Samples []backfillSample } }{ { @@ -102,15 +105,17 @@ func TestBackfill(t *testing.T) { Description: "Empty file.", MaxSamplesInAppender: 5000, Expected: struct { - MinTime int64 - MaxTime int64 - NumBlocks int - Samples []backfillSample + MinTime int64 + MaxTime int64 + NumBlocks int + BlockDuration int64 + Samples []backfillSample }{ - MinTime: math.MaxInt64, - MaxTime: math.MinInt64, - NumBlocks: 0, - Samples: []backfillSample{}, + MinTime: math.MaxInt64, + MaxTime: math.MinInt64, + NumBlocks: 0, + BlockDuration: tsdb.DefaultBlockDuration, + Samples: []backfillSample{}, }, }, { @@ -124,14 +129,16 @@ http_requests_total{code="400"} 1 1565133713.990 Description: "Multiple samples with different timestamp for different series.", MaxSamplesInAppender: 5000, Expected: struct { - MinTime int64 - MaxTime int64 - NumBlocks int - Samples []backfillSample + MinTime int64 + MaxTime int64 + NumBlocks int + BlockDuration int64 + Samples []backfillSample }{ - MinTime: 1565133713989, - MaxTime: 1565133713990, - NumBlocks: 1, + MinTime: 1565133713989, + MaxTime: 1565133713990, + NumBlocks: 1, + BlockDuration: tsdb.DefaultBlockDuration, Samples: []backfillSample{ { Timestamp: 1565133713989, @@ -158,14 +165,16 @@ http_requests_total{code="200"} 1023 1565652113.989 Description: "Multiple samples separated by 3 days.", MaxSamplesInAppender: 5000, Expected: struct { - MinTime int64 - MaxTime int64 - NumBlocks int - Samples []backfillSample + MinTime int64 + MaxTime int64 + NumBlocks int + BlockDuration int64 + Samples []backfillSample }{ - MinTime: 1565133713989, - MaxTime: 1565652113989, - NumBlocks: 3, + MinTime: 1565133713989, + MaxTime: 1565652113989, + NumBlocks: 3, + BlockDuration: tsdb.DefaultBlockDuration, Samples: []backfillSample{ { Timestamp: 1565133713989, @@ -196,14 +205,16 @@ http_requests_total{code="200"} 1021 1565133713.989 Description: "Unordered samples from multiple series, which end in different blocks.", MaxSamplesInAppender: 5000, Expected: struct { - MinTime int64 - MaxTime int64 - NumBlocks int - Samples []backfillSample + MinTime int64 + MaxTime int64 + NumBlocks int + BlockDuration int64 + Samples []backfillSample }{ - MinTime: 1565133713989, - MaxTime: 1565392913989, - NumBlocks: 2, + MinTime: 1565133713989, + MaxTime: 1565392913989, + NumBlocks: 2, + BlockDuration: tsdb.DefaultBlockDuration, Samples: []backfillSample{ { Timestamp: 1565133713989, @@ -230,14 +241,16 @@ http_requests_total{code="400"} 2 1565133715.989 Description: "Multiple samples with different timestamp for the same series.", MaxSamplesInAppender: 5000, Expected: struct { - MinTime int64 - MaxTime int64 - NumBlocks int - Samples []backfillSample + MinTime int64 + MaxTime int64 + NumBlocks int + BlockDuration int64 + Samples []backfillSample }{ - MinTime: 1565133713989, - MaxTime: 1565133715989, - NumBlocks: 1, + MinTime: 1565133713989, + MaxTime: 1565133715989, + NumBlocks: 1, + BlockDuration: tsdb.DefaultBlockDuration, Samples: []backfillSample{ { Timestamp: 1565133713989, @@ -260,6 +273,132 @@ http_requests_total{code="400"} 2 1565133715.989 { ToParse: `# HELP http_requests_total The total number of HTTP requests. # TYPE http_requests_total counter +http_requests_total{code="200"} 1021 1624463088.000 +http_requests_total{code="200"} 1 1627055153.000 +http_requests_total{code="400"} 2 1627056153.000 +# EOF +`, + IsOk: true, + Description: "Long maximum block duration puts all data into one block.", + MaxSamplesInAppender: 5000, + MaxBlockDuration: 2048 * time.Hour, + Expected: struct { + MinTime int64 + MaxTime int64 + NumBlocks int + BlockDuration int64 + Samples []backfillSample + }{ + MinTime: 1624463088000, + MaxTime: 1627056153000, + NumBlocks: 1, + BlockDuration: int64(1458 * time.Hour / time.Millisecond), + Samples: []backfillSample{ + { + Timestamp: 1624463088000, + Value: 1021, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"), + }, + { + Timestamp: 1627055153000, + Value: 1, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"), + }, + { + Timestamp: 1627056153000, + Value: 2, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "400"), + }, + }, + }, + }, + { + ToParse: `# HELP http_requests_total The total number of HTTP requests. +# TYPE http_requests_total counter +http_requests_total{code="200"} 1 1624463088.000 +http_requests_total{code="200"} 2 1629503088.000 +http_requests_total{code="200"} 3 1629863088.000 +# EOF +`, + IsOk: true, + Description: "Long maximum block duration puts all data into two blocks.", + MaxSamplesInAppender: 5000, + MaxBlockDuration: 2048 * time.Hour, + Expected: struct { + MinTime int64 + MaxTime int64 + NumBlocks int + BlockDuration int64 + Samples []backfillSample + }{ + MinTime: 1624463088000, + MaxTime: 1629863088000, + NumBlocks: 2, + BlockDuration: int64(1458 * time.Hour / time.Millisecond), + Samples: []backfillSample{ + { + Timestamp: 1624463088000, + Value: 1, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"), + }, + { + Timestamp: 1629503088000, + Value: 2, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"), + }, + { + Timestamp: 1629863088000, + Value: 3, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"), + }, + }, + }, + }, + { + ToParse: `# HELP http_requests_total The total number of HTTP requests. +# TYPE http_requests_total counter +http_requests_total{code="200"} 1 1624463088.000 +http_requests_total{code="200"} 2 1765943088.000 +http_requests_total{code="200"} 3 1768463088.000 +# EOF +`, + IsOk: true, + Description: "Maximum block duration longer than longest possible duration, uses largest duration, puts all data into two blocks.", + MaxSamplesInAppender: 5000, + MaxBlockDuration: 200000 * time.Hour, + Expected: struct { + MinTime int64 + MaxTime int64 + NumBlocks int + BlockDuration int64 + Samples []backfillSample + }{ + MinTime: 1624463088000, + MaxTime: 1768463088000, + NumBlocks: 2, + BlockDuration: int64(39366 * time.Hour / time.Millisecond), + Samples: []backfillSample{ + { + Timestamp: 1624463088000, + Value: 1, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"), + }, + { + Timestamp: 1765943088000, + Value: 2, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"), + }, + { + Timestamp: 1768463088000, + Value: 3, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"), + }, + }, + }, + }, + { + ToParse: `# HELP http_requests_total The total number of HTTP requests. +# TYPE http_requests_total counter http_requests_total{code="200"} 1021 1565133713.989 http_requests_total{code="200"} 1022 1565144513.989 http_requests_total{code="400"} 2 1565155313.989 @@ -270,14 +409,16 @@ http_requests_total{code="400"} 1 1565166113.989 Description: "Multiple samples that end up in different blocks.", MaxSamplesInAppender: 5000, Expected: struct { - MinTime int64 - MaxTime int64 - NumBlocks int - Samples []backfillSample + MinTime int64 + MaxTime int64 + NumBlocks int + BlockDuration int64 + Samples []backfillSample }{ - MinTime: 1565133713989, - MaxTime: 1565166113989, - NumBlocks: 4, + MinTime: 1565133713989, + MaxTime: 1565166113989, + NumBlocks: 4, + BlockDuration: tsdb.DefaultBlockDuration, Samples: []backfillSample{ { Timestamp: 1565133713989, @@ -318,14 +459,16 @@ http_requests_total{code="400"} 1 1565166113.989 Description: "Number of samples are greater than the sample batch size.", MaxSamplesInAppender: 2, Expected: struct { - MinTime int64 - MaxTime int64 - NumBlocks int - Samples []backfillSample + MinTime int64 + MaxTime int64 + NumBlocks int + BlockDuration int64 + Samples []backfillSample }{ - MinTime: 1565133713989, - MaxTime: 1565166113989, - NumBlocks: 4, + MinTime: 1565133713989, + MaxTime: 1565166113989, + NumBlocks: 4, + BlockDuration: tsdb.DefaultBlockDuration, Samples: []backfillSample{ { Timestamp: 1565133713989, @@ -378,14 +521,16 @@ http_requests_total{code="400"} 1024 7199 Description: "One series spanning 2h in same block should not cause problems to other series.", MaxSamplesInAppender: 1, Expected: struct { - MinTime int64 - MaxTime int64 - NumBlocks int - Samples []backfillSample + MinTime int64 + MaxTime int64 + NumBlocks int + BlockDuration int64 + Samples []backfillSample }{ - MinTime: 0, - MaxTime: 7199000, - NumBlocks: 1, + MinTime: 0, + MaxTime: 7199000, + NumBlocks: 1, + BlockDuration: tsdb.DefaultBlockDuration, Samples: []backfillSample{ { Timestamp: 0, @@ -418,14 +563,16 @@ http_requests_total{code="400"} 1024 7199 Description: "Sample with no #HELP or #TYPE keyword.", MaxSamplesInAppender: 5000, Expected: struct { - MinTime int64 - MaxTime int64 - NumBlocks int - Samples []backfillSample + MinTime int64 + MaxTime int64 + NumBlocks int + BlockDuration int64 + Samples []backfillSample }{ - MinTime: 6900000, - MaxTime: 6900000, - NumBlocks: 1, + MinTime: 6900000, + MaxTime: 6900000, + NumBlocks: 1, + BlockDuration: tsdb.DefaultBlockDuration, Samples: []backfillSample{ { Timestamp: 6900000, @@ -442,14 +589,16 @@ http_requests_total{code="400"} 1024 7199 Description: "Sample without newline after # EOF.", MaxSamplesInAppender: 5000, Expected: struct { - MinTime int64 - MaxTime int64 - NumBlocks int - Samples []backfillSample + MinTime int64 + MaxTime int64 + NumBlocks int + BlockDuration int64 + Samples []backfillSample }{ - MinTime: 6900000, - MaxTime: 6900000, - NumBlocks: 1, + MinTime: 6900000, + MaxTime: 6900000, + NumBlocks: 1, + BlockDuration: tsdb.DefaultBlockDuration, Samples: []backfillSample{ { Timestamp: 6900000, @@ -467,14 +616,16 @@ http_requests_total{code="400"} 1024 7199 Description: "Bare sample.", MaxSamplesInAppender: 5000, Expected: struct { - MinTime int64 - MaxTime int64 - NumBlocks int - Samples []backfillSample + MinTime int64 + MaxTime int64 + NumBlocks int + BlockDuration int64 + Samples []backfillSample }{ - MinTime: 1001000, - MaxTime: 1001000, - NumBlocks: 1, + MinTime: 1001000, + MaxTime: 1001000, + NumBlocks: 1, + BlockDuration: tsdb.DefaultBlockDuration, Samples: []backfillSample{ { Timestamp: 1001000, @@ -532,28 +683,32 @@ after_eof 1 2 }, } for _, test := range tests { - t.Logf("Test:%s", test.Description) + t.Run(test.Description, func(t *testing.T) { + t.Logf("Test:%s", test.Description) - outputDir, err := ioutil.TempDir("", "myDir") - require.NoError(t, err) - defer func() { - require.NoError(t, os.RemoveAll(outputDir)) - }() + outputDir, err := ioutil.TempDir("", "myDir") + require.NoError(t, err) + defer func() { + require.NoError(t, os.RemoveAll(outputDir)) + }() - err = backfill(test.MaxSamplesInAppender, []byte(test.ToParse), outputDir, false, false) + err = backfill(test.MaxSamplesInAppender, []byte(test.ToParse), outputDir, false, false, test.MaxBlockDuration) - if !test.IsOk { - require.Error(t, err, test.Description) - continue - } + if !test.IsOk { + require.Error(t, err, test.Description) + return + } - require.NoError(t, err) - db, err := tsdb.Open(outputDir, nil, nil, tsdb.DefaultOptions(), nil) - require.NoError(t, err) - defer func() { - require.NoError(t, db.Close()) - }() + require.NoError(t, err) + options := tsdb.DefaultOptions() + options.RetentionDuration = int64(10 * 365 * 24 * time.Hour / time.Millisecond) // maximum duration tests require a long retention + db, err := tsdb.Open(outputDir, nil, nil, options, nil) + require.NoError(t, err) + defer func() { + require.NoError(t, db.Close()) + }() - testBlocks(t, db, test.Expected.MinTime, test.Expected.MaxTime, test.Expected.Samples, test.Expected.NumBlocks) + testBlocks(t, db, test.Expected.MinTime, test.Expected.MaxTime, test.Expected.BlockDuration, test.Expected.Samples, test.Expected.NumBlocks) + }) } } diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index c9e0aebe8..a380a6c01 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -145,8 +145,8 @@ func main() { importCmd := tsdbCmd.Command("create-blocks-from", "[Experimental] Import samples from input and produce TSDB blocks. Please refer to the storage docs for more details.") importHumanReadable := importCmd.Flag("human-readable", "Print human readable values.").Short('r').Bool() importQuiet := importCmd.Flag("quiet", "Do not print created blocks.").Short('q').Bool() + maxBlockDuration := importCmd.Flag("max-block-duration", "Maximum duration created blocks may span. Anything less than 2h is ignored.").Hidden().PlaceHolder("").Duration() openMetricsImportCmd := importCmd.Command("openmetrics", "Import samples from OpenMetrics input and produce TSDB blocks. Please refer to the storage docs for more details.") - // TODO(aSquare14): add flag to set default block duration importFilePath := openMetricsImportCmd.Arg("input file", "OpenMetrics file to read samples from.").Required().String() importDBPath := openMetricsImportCmd.Arg("output directory", "Output directory for generated blocks.").Default(defaultDBPath).String() importRulesCmd := importCmd.Command("rules", "Create blocks of data for new recording rules.") @@ -222,7 +222,7 @@ func main() { os.Exit(checkErr(dumpSamples(*dumpPath, *dumpMinTime, *dumpMaxTime))) //TODO(aSquare14): Work on adding support for custom block size. case openMetricsImportCmd.FullCommand(): - os.Exit(backfillOpenMetrics(*importFilePath, *importDBPath, *importHumanReadable, *importQuiet)) + os.Exit(backfillOpenMetrics(*importFilePath, *importDBPath, *importHumanReadable, *importQuiet, *maxBlockDuration)) case importRulesCmd.FullCommand(): os.Exit(checkErr(importRules(*importRulesURL, *importRulesStart, *importRulesEnd, *importRulesOutputDir, *importRulesEvalInterval, *importRulesFiles...))) diff --git a/cmd/promtool/tsdb.go b/cmd/promtool/tsdb.go index bcf9f47e8..2135966c6 100644 --- a/cmd/promtool/tsdb.go +++ b/cmd/promtool/tsdb.go @@ -611,7 +611,7 @@ func checkErr(err error) int { return 0 } -func backfillOpenMetrics(path string, outputDir string, humanReadable, quiet bool) int { +func backfillOpenMetrics(path string, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration) int { inputFile, err := fileutil.OpenMmapFile(path) if err != nil { return checkErr(err) @@ -622,5 +622,5 @@ func backfillOpenMetrics(path string, outputDir string, humanReadable, quiet boo return checkErr(errors.Wrap(err, "create output dir")) } - return checkErr(backfill(5000, inputFile.Bytes(), outputDir, humanReadable, quiet)) + return checkErr(backfill(5000, inputFile.Bytes(), outputDir, humanReadable, quiet, maxBlockDuration)) } diff --git a/docs/storage.md b/docs/storage.md index c4907959d..71688afdd 100644 --- a/docs/storage.md +++ b/docs/storage.md @@ -157,6 +157,16 @@ promtool tsdb create-blocks-from openmetrics [] After the creation of the blocks, move it to the data directory of Prometheus. If there is an overlap with the existing blocks in Prometheus, the flag `--storage.tsdb.allow-overlapping-blocks` needs to be set. Note that any backfilled data is subject to the retention configured for your Prometheus server (by time or size). +#### Longer Block Durations + +By default, the promtool will use the default block duration (2h) for the blocks; this behavior is the most generally applicable and correct. However, when backfilling data over a long range of times, it may be advantageous to use a larger value for the block duration to backfill faster and prevent additional compactions by TSDB later. + +The `--max-block-duration` flag allows the user to configure a maximum duration of blocks. The backfilling tool will pick a suitable block duration no larger than this. + +While larger blocks may improve the performance of backfilling large datasets, drawbacks exist as well. Time-based retention policies must keep the entire block around if even one sample of the (potentially large) block is still within the retention policy. Conversely, size-based retention policies will remove the entire block even if the TSDB only goes over the size limit in a minor way. + +Therefore, backfilling with few blocks, thereby choosing a larger block duration, must be done with care and is not recommended for any production instances. + ## Backfilling for Recording Rules ### Overview