promtool: backfill: allow configuring block duration (#8919)

* promtool: backfill: allow configuring block duration

When backfilling large amounts of data across long periods of time, it
may in certain circumstances be useful to use a longer block duration to
increase the efficiency and speed of the backfilling process. This patch
adds a flag --block-duration-power to allow a user to choose the power N
where the block duration is 2^(N+1)h.

Signed-off-by: Steve Kuznetsov <skuznets@redhat.com>

* promtool: use sub-tests in backfill testing

Signed-off-by: Steve Kuznetsov <skuznets@redhat.com>

* backfill: add messages to tests for clarity

When someone new breaks a test, seeing "expected: false, got: true" is
really not useful. A nice message helps here.

Signed-off-by: Steve Kuznetsov <skuznets@redhat.com>

* backfill: test long block durations

A test that uses a long block duration to write bigger blocks is added.
The check to make sure all blocks are the default duration is removed.

Signed-off-by: Steve Kuznetsov <skuznets@redhat.com>
This commit is contained in:
Steve Kuznetsov 2021-06-29 02:23:38 -07:00 committed by GitHub
parent 664b391573
commit fd6c852567
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 291 additions and 114 deletions

View file

@ -17,6 +17,7 @@ import (
"context" "context"
"io" "io"
"math" "math"
"time"
"github.com/go-kit/log" "github.com/go-kit/log"
"github.com/pkg/errors" "github.com/pkg/errors"
@ -65,8 +66,19 @@ func getMinAndMaxTimestamps(p textparse.Parser) (int64, int64, error) {
return maxt, mint, nil return maxt, mint, nil
} }
func createBlocks(input []byte, mint, maxt int64, maxSamplesInAppender int, outputDir string, humanReadable, quiet bool) (returnErr error) { func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesInAppender int, outputDir string, humanReadable, quiet bool) (returnErr error) {
blockDuration := tsdb.DefaultBlockDuration blockDuration := tsdb.DefaultBlockDuration
if maxBlockDuration > tsdb.DefaultBlockDuration {
ranges := tsdb.ExponentialBlockRanges(tsdb.DefaultBlockDuration, 10, 3)
idx := len(ranges) - 1 // Use largest range if user asked for something enormous.
for i, v := range ranges {
if v > maxBlockDuration {
idx = i - 1
break
}
}
blockDuration = ranges[idx]
}
mint = blockDuration * (mint / blockDuration) mint = blockDuration * (mint / blockDuration)
db, err := tsdb.OpenDBReadOnly(outputDir, nil) db, err := tsdb.OpenDBReadOnly(outputDir, nil)
@ -199,11 +211,11 @@ func createBlocks(input []byte, mint, maxt int64, maxSamplesInAppender int, outp
} }
func backfill(maxSamplesInAppender int, input []byte, outputDir string, humanReadable, quiet bool) (err error) { func backfill(maxSamplesInAppender int, input []byte, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration) (err error) {
p := textparse.NewOpenMetricsParser(input) p := textparse.NewOpenMetricsParser(input)
maxt, mint, err := getMinAndMaxTimestamps(p) maxt, mint, err := getMinAndMaxTimestamps(p)
if err != nil { if err != nil {
return errors.Wrap(err, "getting min and max timestamp") return errors.Wrap(err, "getting min and max timestamp")
} }
return errors.Wrap(createBlocks(input, mint, maxt, maxSamplesInAppender, outputDir, humanReadable, quiet), "block creation") return errors.Wrap(createBlocks(input, mint, maxt, int64(maxBlockDuration/time.Millisecond), maxSamplesInAppender, outputDir, humanReadable, quiet), "block creation")
} }

View file

@ -20,6 +20,7 @@ import (
"os" "os"
"sort" "sort"
"testing" "testing"
"time"
"github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/storage"
@ -58,12 +59,12 @@ func queryAllSeries(t testing.TB, q storage.Querier, expectedMinTime, expectedMa
return samples return samples
} }
func testBlocks(t *testing.T, db *tsdb.DB, expectedMinTime, expectedMaxTime int64, expectedSamples []backfillSample, expectedNumBlocks int) { func testBlocks(t *testing.T, db *tsdb.DB, expectedMinTime, expectedMaxTime, expectedBlockDuration int64, expectedSamples []backfillSample, expectedNumBlocks int) {
blocks := db.Blocks() blocks := db.Blocks()
require.Equal(t, expectedNumBlocks, len(blocks)) require.Equal(t, expectedNumBlocks, len(blocks), "did not create correct number of blocks")
for _, block := range blocks { for i, block := range blocks {
require.Equal(t, true, block.MinTime()/tsdb.DefaultBlockDuration == (block.MaxTime()-1)/tsdb.DefaultBlockDuration) require.Equal(t, block.MinTime()/expectedBlockDuration, (block.MaxTime()-1)/expectedBlockDuration, "block %d contains data outside of one aligned block duration", i)
} }
q, err := db.Querier(context.Background(), math.MinInt64, math.MaxInt64) q, err := db.Querier(context.Background(), math.MinInt64, math.MaxInt64)
@ -75,11 +76,11 @@ func testBlocks(t *testing.T, db *tsdb.DB, expectedMinTime, expectedMaxTime int6
allSamples := queryAllSeries(t, q, expectedMinTime, expectedMaxTime) allSamples := queryAllSeries(t, q, expectedMinTime, expectedMaxTime)
sortSamples(allSamples) sortSamples(allSamples)
sortSamples(expectedSamples) sortSamples(expectedSamples)
require.Equal(t, expectedSamples, allSamples) require.Equal(t, expectedSamples, allSamples, "did not create correct samples")
if len(allSamples) > 0 { if len(allSamples) > 0 {
require.Equal(t, expectedMinTime, allSamples[0].Timestamp) require.Equal(t, expectedMinTime, allSamples[0].Timestamp, "timestamp of first sample is not the expected minimum time")
require.Equal(t, expectedMaxTime, allSamples[len(allSamples)-1].Timestamp) require.Equal(t, expectedMaxTime, allSamples[len(allSamples)-1].Timestamp, "timestamp of last sample is not the expected maximum time")
} }
} }
@ -89,11 +90,13 @@ func TestBackfill(t *testing.T) {
IsOk bool IsOk bool
Description string Description string
MaxSamplesInAppender int MaxSamplesInAppender int
MaxBlockDuration time.Duration
Expected struct { Expected struct {
MinTime int64 MinTime int64
MaxTime int64 MaxTime int64
NumBlocks int NumBlocks int
Samples []backfillSample BlockDuration int64
Samples []backfillSample
} }
}{ }{
{ {
@ -102,15 +105,17 @@ func TestBackfill(t *testing.T) {
Description: "Empty file.", Description: "Empty file.",
MaxSamplesInAppender: 5000, MaxSamplesInAppender: 5000,
Expected: struct { Expected: struct {
MinTime int64 MinTime int64
MaxTime int64 MaxTime int64
NumBlocks int NumBlocks int
Samples []backfillSample BlockDuration int64
Samples []backfillSample
}{ }{
MinTime: math.MaxInt64, MinTime: math.MaxInt64,
MaxTime: math.MinInt64, MaxTime: math.MinInt64,
NumBlocks: 0, NumBlocks: 0,
Samples: []backfillSample{}, BlockDuration: tsdb.DefaultBlockDuration,
Samples: []backfillSample{},
}, },
}, },
{ {
@ -124,14 +129,16 @@ http_requests_total{code="400"} 1 1565133713.990
Description: "Multiple samples with different timestamp for different series.", Description: "Multiple samples with different timestamp for different series.",
MaxSamplesInAppender: 5000, MaxSamplesInAppender: 5000,
Expected: struct { Expected: struct {
MinTime int64 MinTime int64
MaxTime int64 MaxTime int64
NumBlocks int NumBlocks int
Samples []backfillSample BlockDuration int64
Samples []backfillSample
}{ }{
MinTime: 1565133713989, MinTime: 1565133713989,
MaxTime: 1565133713990, MaxTime: 1565133713990,
NumBlocks: 1, NumBlocks: 1,
BlockDuration: tsdb.DefaultBlockDuration,
Samples: []backfillSample{ Samples: []backfillSample{
{ {
Timestamp: 1565133713989, Timestamp: 1565133713989,
@ -158,14 +165,16 @@ http_requests_total{code="200"} 1023 1565652113.989
Description: "Multiple samples separated by 3 days.", Description: "Multiple samples separated by 3 days.",
MaxSamplesInAppender: 5000, MaxSamplesInAppender: 5000,
Expected: struct { Expected: struct {
MinTime int64 MinTime int64
MaxTime int64 MaxTime int64
NumBlocks int NumBlocks int
Samples []backfillSample BlockDuration int64
Samples []backfillSample
}{ }{
MinTime: 1565133713989, MinTime: 1565133713989,
MaxTime: 1565652113989, MaxTime: 1565652113989,
NumBlocks: 3, NumBlocks: 3,
BlockDuration: tsdb.DefaultBlockDuration,
Samples: []backfillSample{ Samples: []backfillSample{
{ {
Timestamp: 1565133713989, Timestamp: 1565133713989,
@ -196,14 +205,16 @@ http_requests_total{code="200"} 1021 1565133713.989
Description: "Unordered samples from multiple series, which end in different blocks.", Description: "Unordered samples from multiple series, which end in different blocks.",
MaxSamplesInAppender: 5000, MaxSamplesInAppender: 5000,
Expected: struct { Expected: struct {
MinTime int64 MinTime int64
MaxTime int64 MaxTime int64
NumBlocks int NumBlocks int
Samples []backfillSample BlockDuration int64
Samples []backfillSample
}{ }{
MinTime: 1565133713989, MinTime: 1565133713989,
MaxTime: 1565392913989, MaxTime: 1565392913989,
NumBlocks: 2, NumBlocks: 2,
BlockDuration: tsdb.DefaultBlockDuration,
Samples: []backfillSample{ Samples: []backfillSample{
{ {
Timestamp: 1565133713989, Timestamp: 1565133713989,
@ -230,14 +241,16 @@ http_requests_total{code="400"} 2 1565133715.989
Description: "Multiple samples with different timestamp for the same series.", Description: "Multiple samples with different timestamp for the same series.",
MaxSamplesInAppender: 5000, MaxSamplesInAppender: 5000,
Expected: struct { Expected: struct {
MinTime int64 MinTime int64
MaxTime int64 MaxTime int64
NumBlocks int NumBlocks int
Samples []backfillSample BlockDuration int64
Samples []backfillSample
}{ }{
MinTime: 1565133713989, MinTime: 1565133713989,
MaxTime: 1565133715989, MaxTime: 1565133715989,
NumBlocks: 1, NumBlocks: 1,
BlockDuration: tsdb.DefaultBlockDuration,
Samples: []backfillSample{ Samples: []backfillSample{
{ {
Timestamp: 1565133713989, Timestamp: 1565133713989,
@ -260,6 +273,132 @@ http_requests_total{code="400"} 2 1565133715.989
{ {
ToParse: `# HELP http_requests_total The total number of HTTP requests. ToParse: `# HELP http_requests_total The total number of HTTP requests.
# TYPE http_requests_total counter # TYPE http_requests_total counter
http_requests_total{code="200"} 1021 1624463088.000
http_requests_total{code="200"} 1 1627055153.000
http_requests_total{code="400"} 2 1627056153.000
# EOF
`,
IsOk: true,
Description: "Long maximum block duration puts all data into one block.",
MaxSamplesInAppender: 5000,
MaxBlockDuration: 2048 * time.Hour,
Expected: struct {
MinTime int64
MaxTime int64
NumBlocks int
BlockDuration int64
Samples []backfillSample
}{
MinTime: 1624463088000,
MaxTime: 1627056153000,
NumBlocks: 1,
BlockDuration: int64(1458 * time.Hour / time.Millisecond),
Samples: []backfillSample{
{
Timestamp: 1624463088000,
Value: 1021,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"),
},
{
Timestamp: 1627055153000,
Value: 1,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"),
},
{
Timestamp: 1627056153000,
Value: 2,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "400"),
},
},
},
},
{
ToParse: `# HELP http_requests_total The total number of HTTP requests.
# TYPE http_requests_total counter
http_requests_total{code="200"} 1 1624463088.000
http_requests_total{code="200"} 2 1629503088.000
http_requests_total{code="200"} 3 1629863088.000
# EOF
`,
IsOk: true,
Description: "Long maximum block duration puts all data into two blocks.",
MaxSamplesInAppender: 5000,
MaxBlockDuration: 2048 * time.Hour,
Expected: struct {
MinTime int64
MaxTime int64
NumBlocks int
BlockDuration int64
Samples []backfillSample
}{
MinTime: 1624463088000,
MaxTime: 1629863088000,
NumBlocks: 2,
BlockDuration: int64(1458 * time.Hour / time.Millisecond),
Samples: []backfillSample{
{
Timestamp: 1624463088000,
Value: 1,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"),
},
{
Timestamp: 1629503088000,
Value: 2,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"),
},
{
Timestamp: 1629863088000,
Value: 3,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"),
},
},
},
},
{
ToParse: `# HELP http_requests_total The total number of HTTP requests.
# TYPE http_requests_total counter
http_requests_total{code="200"} 1 1624463088.000
http_requests_total{code="200"} 2 1765943088.000
http_requests_total{code="200"} 3 1768463088.000
# EOF
`,
IsOk: true,
Description: "Maximum block duration longer than longest possible duration, uses largest duration, puts all data into two blocks.",
MaxSamplesInAppender: 5000,
MaxBlockDuration: 200000 * time.Hour,
Expected: struct {
MinTime int64
MaxTime int64
NumBlocks int
BlockDuration int64
Samples []backfillSample
}{
MinTime: 1624463088000,
MaxTime: 1768463088000,
NumBlocks: 2,
BlockDuration: int64(39366 * time.Hour / time.Millisecond),
Samples: []backfillSample{
{
Timestamp: 1624463088000,
Value: 1,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"),
},
{
Timestamp: 1765943088000,
Value: 2,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"),
},
{
Timestamp: 1768463088000,
Value: 3,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"),
},
},
},
},
{
ToParse: `# HELP http_requests_total The total number of HTTP requests.
# TYPE http_requests_total counter
http_requests_total{code="200"} 1021 1565133713.989 http_requests_total{code="200"} 1021 1565133713.989
http_requests_total{code="200"} 1022 1565144513.989 http_requests_total{code="200"} 1022 1565144513.989
http_requests_total{code="400"} 2 1565155313.989 http_requests_total{code="400"} 2 1565155313.989
@ -270,14 +409,16 @@ http_requests_total{code="400"} 1 1565166113.989
Description: "Multiple samples that end up in different blocks.", Description: "Multiple samples that end up in different blocks.",
MaxSamplesInAppender: 5000, MaxSamplesInAppender: 5000,
Expected: struct { Expected: struct {
MinTime int64 MinTime int64
MaxTime int64 MaxTime int64
NumBlocks int NumBlocks int
Samples []backfillSample BlockDuration int64
Samples []backfillSample
}{ }{
MinTime: 1565133713989, MinTime: 1565133713989,
MaxTime: 1565166113989, MaxTime: 1565166113989,
NumBlocks: 4, NumBlocks: 4,
BlockDuration: tsdb.DefaultBlockDuration,
Samples: []backfillSample{ Samples: []backfillSample{
{ {
Timestamp: 1565133713989, Timestamp: 1565133713989,
@ -318,14 +459,16 @@ http_requests_total{code="400"} 1 1565166113.989
Description: "Number of samples are greater than the sample batch size.", Description: "Number of samples are greater than the sample batch size.",
MaxSamplesInAppender: 2, MaxSamplesInAppender: 2,
Expected: struct { Expected: struct {
MinTime int64 MinTime int64
MaxTime int64 MaxTime int64
NumBlocks int NumBlocks int
Samples []backfillSample BlockDuration int64
Samples []backfillSample
}{ }{
MinTime: 1565133713989, MinTime: 1565133713989,
MaxTime: 1565166113989, MaxTime: 1565166113989,
NumBlocks: 4, NumBlocks: 4,
BlockDuration: tsdb.DefaultBlockDuration,
Samples: []backfillSample{ Samples: []backfillSample{
{ {
Timestamp: 1565133713989, Timestamp: 1565133713989,
@ -378,14 +521,16 @@ http_requests_total{code="400"} 1024 7199
Description: "One series spanning 2h in same block should not cause problems to other series.", Description: "One series spanning 2h in same block should not cause problems to other series.",
MaxSamplesInAppender: 1, MaxSamplesInAppender: 1,
Expected: struct { Expected: struct {
MinTime int64 MinTime int64
MaxTime int64 MaxTime int64
NumBlocks int NumBlocks int
Samples []backfillSample BlockDuration int64
Samples []backfillSample
}{ }{
MinTime: 0, MinTime: 0,
MaxTime: 7199000, MaxTime: 7199000,
NumBlocks: 1, NumBlocks: 1,
BlockDuration: tsdb.DefaultBlockDuration,
Samples: []backfillSample{ Samples: []backfillSample{
{ {
Timestamp: 0, Timestamp: 0,
@ -418,14 +563,16 @@ http_requests_total{code="400"} 1024 7199
Description: "Sample with no #HELP or #TYPE keyword.", Description: "Sample with no #HELP or #TYPE keyword.",
MaxSamplesInAppender: 5000, MaxSamplesInAppender: 5000,
Expected: struct { Expected: struct {
MinTime int64 MinTime int64
MaxTime int64 MaxTime int64
NumBlocks int NumBlocks int
Samples []backfillSample BlockDuration int64
Samples []backfillSample
}{ }{
MinTime: 6900000, MinTime: 6900000,
MaxTime: 6900000, MaxTime: 6900000,
NumBlocks: 1, NumBlocks: 1,
BlockDuration: tsdb.DefaultBlockDuration,
Samples: []backfillSample{ Samples: []backfillSample{
{ {
Timestamp: 6900000, Timestamp: 6900000,
@ -442,14 +589,16 @@ http_requests_total{code="400"} 1024 7199
Description: "Sample without newline after # EOF.", Description: "Sample without newline after # EOF.",
MaxSamplesInAppender: 5000, MaxSamplesInAppender: 5000,
Expected: struct { Expected: struct {
MinTime int64 MinTime int64
MaxTime int64 MaxTime int64
NumBlocks int NumBlocks int
Samples []backfillSample BlockDuration int64
Samples []backfillSample
}{ }{
MinTime: 6900000, MinTime: 6900000,
MaxTime: 6900000, MaxTime: 6900000,
NumBlocks: 1, NumBlocks: 1,
BlockDuration: tsdb.DefaultBlockDuration,
Samples: []backfillSample{ Samples: []backfillSample{
{ {
Timestamp: 6900000, Timestamp: 6900000,
@ -467,14 +616,16 @@ http_requests_total{code="400"} 1024 7199
Description: "Bare sample.", Description: "Bare sample.",
MaxSamplesInAppender: 5000, MaxSamplesInAppender: 5000,
Expected: struct { Expected: struct {
MinTime int64 MinTime int64
MaxTime int64 MaxTime int64
NumBlocks int NumBlocks int
Samples []backfillSample BlockDuration int64
Samples []backfillSample
}{ }{
MinTime: 1001000, MinTime: 1001000,
MaxTime: 1001000, MaxTime: 1001000,
NumBlocks: 1, NumBlocks: 1,
BlockDuration: tsdb.DefaultBlockDuration,
Samples: []backfillSample{ Samples: []backfillSample{
{ {
Timestamp: 1001000, Timestamp: 1001000,
@ -532,28 +683,32 @@ after_eof 1 2
}, },
} }
for _, test := range tests { for _, test := range tests {
t.Logf("Test:%s", test.Description) t.Run(test.Description, func(t *testing.T) {
t.Logf("Test:%s", test.Description)
outputDir, err := ioutil.TempDir("", "myDir") outputDir, err := ioutil.TempDir("", "myDir")
require.NoError(t, err) require.NoError(t, err)
defer func() { defer func() {
require.NoError(t, os.RemoveAll(outputDir)) require.NoError(t, os.RemoveAll(outputDir))
}() }()
err = backfill(test.MaxSamplesInAppender, []byte(test.ToParse), outputDir, false, false) err = backfill(test.MaxSamplesInAppender, []byte(test.ToParse), outputDir, false, false, test.MaxBlockDuration)
if !test.IsOk { if !test.IsOk {
require.Error(t, err, test.Description) require.Error(t, err, test.Description)
continue return
} }
require.NoError(t, err) require.NoError(t, err)
db, err := tsdb.Open(outputDir, nil, nil, tsdb.DefaultOptions(), nil) options := tsdb.DefaultOptions()
require.NoError(t, err) options.RetentionDuration = int64(10 * 365 * 24 * time.Hour / time.Millisecond) // maximum duration tests require a long retention
defer func() { db, err := tsdb.Open(outputDir, nil, nil, options, nil)
require.NoError(t, db.Close()) require.NoError(t, err)
}() defer func() {
require.NoError(t, db.Close())
}()
testBlocks(t, db, test.Expected.MinTime, test.Expected.MaxTime, test.Expected.Samples, test.Expected.NumBlocks) testBlocks(t, db, test.Expected.MinTime, test.Expected.MaxTime, test.Expected.BlockDuration, test.Expected.Samples, test.Expected.NumBlocks)
})
} }
} }

View file

@ -145,8 +145,8 @@ func main() {
importCmd := tsdbCmd.Command("create-blocks-from", "[Experimental] Import samples from input and produce TSDB blocks. Please refer to the storage docs for more details.") importCmd := tsdbCmd.Command("create-blocks-from", "[Experimental] Import samples from input and produce TSDB blocks. Please refer to the storage docs for more details.")
importHumanReadable := importCmd.Flag("human-readable", "Print human readable values.").Short('r').Bool() importHumanReadable := importCmd.Flag("human-readable", "Print human readable values.").Short('r').Bool()
importQuiet := importCmd.Flag("quiet", "Do not print created blocks.").Short('q').Bool() importQuiet := importCmd.Flag("quiet", "Do not print created blocks.").Short('q').Bool()
maxBlockDuration := importCmd.Flag("max-block-duration", "Maximum duration created blocks may span. Anything less than 2h is ignored.").Hidden().PlaceHolder("<duration>").Duration()
openMetricsImportCmd := importCmd.Command("openmetrics", "Import samples from OpenMetrics input and produce TSDB blocks. Please refer to the storage docs for more details.") openMetricsImportCmd := importCmd.Command("openmetrics", "Import samples from OpenMetrics input and produce TSDB blocks. Please refer to the storage docs for more details.")
// TODO(aSquare14): add flag to set default block duration
importFilePath := openMetricsImportCmd.Arg("input file", "OpenMetrics file to read samples from.").Required().String() importFilePath := openMetricsImportCmd.Arg("input file", "OpenMetrics file to read samples from.").Required().String()
importDBPath := openMetricsImportCmd.Arg("output directory", "Output directory for generated blocks.").Default(defaultDBPath).String() importDBPath := openMetricsImportCmd.Arg("output directory", "Output directory for generated blocks.").Default(defaultDBPath).String()
importRulesCmd := importCmd.Command("rules", "Create blocks of data for new recording rules.") importRulesCmd := importCmd.Command("rules", "Create blocks of data for new recording rules.")
@ -222,7 +222,7 @@ func main() {
os.Exit(checkErr(dumpSamples(*dumpPath, *dumpMinTime, *dumpMaxTime))) os.Exit(checkErr(dumpSamples(*dumpPath, *dumpMinTime, *dumpMaxTime)))
//TODO(aSquare14): Work on adding support for custom block size. //TODO(aSquare14): Work on adding support for custom block size.
case openMetricsImportCmd.FullCommand(): case openMetricsImportCmd.FullCommand():
os.Exit(backfillOpenMetrics(*importFilePath, *importDBPath, *importHumanReadable, *importQuiet)) os.Exit(backfillOpenMetrics(*importFilePath, *importDBPath, *importHumanReadable, *importQuiet, *maxBlockDuration))
case importRulesCmd.FullCommand(): case importRulesCmd.FullCommand():
os.Exit(checkErr(importRules(*importRulesURL, *importRulesStart, *importRulesEnd, *importRulesOutputDir, *importRulesEvalInterval, *importRulesFiles...))) os.Exit(checkErr(importRules(*importRulesURL, *importRulesStart, *importRulesEnd, *importRulesOutputDir, *importRulesEvalInterval, *importRulesFiles...)))

View file

@ -611,7 +611,7 @@ func checkErr(err error) int {
return 0 return 0
} }
func backfillOpenMetrics(path string, outputDir string, humanReadable, quiet bool) int { func backfillOpenMetrics(path string, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration) int {
inputFile, err := fileutil.OpenMmapFile(path) inputFile, err := fileutil.OpenMmapFile(path)
if err != nil { if err != nil {
return checkErr(err) return checkErr(err)
@ -622,5 +622,5 @@ func backfillOpenMetrics(path string, outputDir string, humanReadable, quiet boo
return checkErr(errors.Wrap(err, "create output dir")) return checkErr(errors.Wrap(err, "create output dir"))
} }
return checkErr(backfill(5000, inputFile.Bytes(), outputDir, humanReadable, quiet)) return checkErr(backfill(5000, inputFile.Bytes(), outputDir, humanReadable, quiet, maxBlockDuration))
} }

View file

@ -157,6 +157,16 @@ promtool tsdb create-blocks-from openmetrics <input file> [<output directory>]
After the creation of the blocks, move it to the data directory of Prometheus. If there is an overlap with the existing blocks in Prometheus, the flag `--storage.tsdb.allow-overlapping-blocks` needs to be set. Note that any backfilled data is subject to the retention configured for your Prometheus server (by time or size). After the creation of the blocks, move it to the data directory of Prometheus. If there is an overlap with the existing blocks in Prometheus, the flag `--storage.tsdb.allow-overlapping-blocks` needs to be set. Note that any backfilled data is subject to the retention configured for your Prometheus server (by time or size).
#### Longer Block Durations
By default, the promtool will use the default block duration (2h) for the blocks; this behavior is the most generally applicable and correct. However, when backfilling data over a long range of times, it may be advantageous to use a larger value for the block duration to backfill faster and prevent additional compactions by TSDB later.
The `--max-block-duration` flag allows the user to configure a maximum duration of blocks. The backfilling tool will pick a suitable block duration no larger than this.
While larger blocks may improve the performance of backfilling large datasets, drawbacks exist as well. Time-based retention policies must keep the entire block around if even one sample of the (potentially large) block is still within the retention policy. Conversely, size-based retention policies will remove the entire block even if the TSDB only goes over the size limit in a minor way.
Therefore, backfilling with few blocks, thereby choosing a larger block duration, must be done with care and is not recommended for any production instances.
## Backfilling for Recording Rules ## Backfilling for Recording Rules
### Overview ### Overview