From 7c125aa5fb22c07a15831e0f79cafc58c2ebae0b Mon Sep 17 00:00:00 2001 From: Filip Petkovski Date: Fri, 2 Jul 2021 12:08:52 +0200 Subject: [PATCH] Promtool: Add support for compaction analysis (#8940) * Extend promtool to support compaction analysis This commit extends the promtool tsdb analyze command to help troubleshoot high Prometheus disk usage. The command now plots a distribution of how full chunks are relative to the maximum capacity of 120 samples per chunk. Signed-off-by: fpetkovski * Update cmd/promtool/tsdb.go Co-authored-by: Bartlomiej Plotka Co-authored-by: Bartlomiej Plotka --- cmd/promtool/main.go | 2 +- cmd/promtool/tsdb.go | 56 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index 8e39ca362d..bfc753140e 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -132,7 +132,7 @@ func main() { benchWriteNumScrapes := tsdbBenchWriteCmd.Flag("scrapes", "Number of scrapes to simulate.").Default("3000").Int() benchSamplesFile := tsdbBenchWriteCmd.Arg("file", "Input file with samples data, default is ("+filepath.Join("..", "..", "tsdb", "testdata", "20kseries.json")+").").Default(filepath.Join("..", "..", "tsdb", "testdata", "20kseries.json")).String() - tsdbAnalyzeCmd := tsdbCmd.Command("analyze", "Analyze churn, label pair cardinality.") + tsdbAnalyzeCmd := tsdbCmd.Command("analyze", "Analyze churn, label pair cardinality and compaction efficiency.") analyzePath := tsdbAnalyzeCmd.Arg("db path", "Database path (default is "+defaultDBPath+").").Default(defaultDBPath).String() analyzeBlockID := tsdbAnalyzeCmd.Arg("block id", "Block to analyze (default is the last block).").String() analyzeLimit := tsdbAnalyzeCmd.Flag("limit", "How many items to show in each list.").Default("20").Int() diff --git a/cmd/promtool/tsdb.go b/cmd/promtool/tsdb.go index 2135966c66..782ea7d54e 100644 --- a/cmd/promtool/tsdb.go +++ b/cmd/promtool/tsdb.go @@ -17,8 +17,10 @@ import ( "bufio" "context" "fmt" + "github.com/prometheus/prometheus/tsdb/index" "io" "io/ioutil" + "math" "os" "path/filepath" "runtime" @@ -561,6 +563,60 @@ func analyzeBlock(path, blockID string, limit int) error { } fmt.Printf("\nHighest cardinality metric names:\n") printInfo(postingInfos) + + return analyzeCompaction(block, ir) +} + +func analyzeCompaction(block tsdb.BlockReader, indexr tsdb.IndexReader) (err error) { + postingsr, err := indexr.Postings(index.AllPostingsKey()) + if err != nil { + return err + } + chunkr, err := block.Chunks() + if err != nil { + return err + } + defer func() { + err = tsdb_errors.NewMulti(err, chunkr.Close()).Err() + }() + + const maxSamplesPerChunk = 120 + nBuckets := 10 + histogram := make([]int, nBuckets) + totalChunks := 0 + for postingsr.Next() { + var lbsl = labels.Labels{} + var chks []chunks.Meta + if err := indexr.Series(postingsr.At(), &lbsl, &chks); err != nil { + return err + } + + for _, chk := range chks { + // Load the actual data of the chunk. + chk, err := chunkr.Chunk(chk.Ref) + if err != nil { + return err + } + chunkSize := math.Min(float64(chk.NumSamples()), maxSamplesPerChunk) + // Calculate the bucket for the chunk and increment it in the histogram. + bucket := int(math.Ceil(float64(nBuckets)*chunkSize/maxSamplesPerChunk)) - 1 + histogram[bucket]++ + totalChunks++ + } + } + + fmt.Printf("\nCompaction analysis:\n") + fmt.Println("Fullness: Amount of samples in chunks (100% is 120 samples)") + // Normalize absolute counts to percentages and print them out. + for bucket, count := range histogram { + percentage := 100.0 * count / totalChunks + fmt.Printf("%7d%%: ", (bucket+1)*10) + for j := 0; j < percentage; j++ { + fmt.Printf("#") + } + fmt.Println() + } + return nil }