Promtool: Add support for compaction analysis (#8940)

* Extend promtool to support compaction analysis This commit extends the promtool tsdb analyze command to help troubleshoot high Prometheus disk usage. The command now plots a distribution of how full chunks are relative to the maximum capacity of 120 samples per chunk. Signed-off-by: fpetkovski <filip.petkovsky@gmail.com> * Update cmd/promtool/tsdb.go Co-authored-by: Bartlomiej Plotka <bwplotka@gmail.com> Co-authored-by: Bartlomiej Plotka <bwplotka@gmail.com>
2025-03-05 20:59:13 -08:00 · 2021-07-02 12:08:52 +02:00 · 2021-07-02 12:08:52 +02:00 · 7c125aa5fb
parent 179b2155d1
commit 7c125aa5fb
2 changed files with 57 additions and 1 deletions
--- a/cmd/promtool/main.go
+++ b/cmd/promtool/main.go
@ -132,7 +132,7 @@ func main() {
 	benchWriteNumScrapes := tsdbBenchWriteCmd.Flag("scrapes", "Number of scrapes to simulate.").Default("3000").Int()
 	benchSamplesFile := tsdbBenchWriteCmd.Arg("file", "Input file with samples data, default is ("+filepath.Join("..", "..", "tsdb", "testdata", "20kseries.json")+").").Default(filepath.Join("..", "..", "tsdb", "testdata", "20kseries.json")).String()
-	tsdbAnalyzeCmd := tsdbCmd.Command("analyze", "Analyze churn, label pair cardinality.")
+	tsdbAnalyzeCmd := tsdbCmd.Command("analyze", "Analyze churn, label pair cardinality and compaction efficiency.")
 	analyzePath := tsdbAnalyzeCmd.Arg("db path", "Database path (default is "+defaultDBPath+").").Default(defaultDBPath).String()
 	analyzeBlockID := tsdbAnalyzeCmd.Arg("block id", "Block to analyze (default is the last block).").String()
 	analyzeLimit := tsdbAnalyzeCmd.Flag("limit", "How many items to show in each list.").Default("20").Int()
--- a/cmd/promtool/tsdb.go
+++ b/cmd/promtool/tsdb.go
@ -17,8 +17,10 @@ import (
 	"bufio"
 	"context"
 	"fmt"
 	"github.com/prometheus/prometheus/tsdb/index"
 	"io"
 	"io/ioutil"
 	"math"
 	"os"
 	"path/filepath"
 	"runtime"
@ -561,6 +563,60 @@ func analyzeBlock(path, blockID string, limit int) error {
 	}
 	fmt.Printf("\nHighest cardinality metric names:\n")
 	printInfo(postingInfos)
 	return analyzeCompaction(block, ir)
 }
 func analyzeCompaction(block tsdb.BlockReader, indexr tsdb.IndexReader) (err error) {
 	postingsr, err := indexr.Postings(index.AllPostingsKey())
 	if err != nil {
 		return err
 	}
 	chunkr, err := block.Chunks()
 	if err != nil {
 		return err
 	}
 	defer func() {
 		err = tsdb_errors.NewMulti(err, chunkr.Close()).Err()
 	}()
 	const maxSamplesPerChunk = 120
 	nBuckets := 10
 	histogram := make([]int, nBuckets)
 	totalChunks := 0
 	for postingsr.Next() {
 		var lbsl = labels.Labels{}
 		var chks []chunks.Meta
 		if err := indexr.Series(postingsr.At(), &lbsl, &chks); err != nil {
 			return err
 		}
 		for _, chk := range chks {
 			// Load the actual data of the chunk.
 			chk, err := chunkr.Chunk(chk.Ref)
 			if err != nil {
 				return err
 			}
 			chunkSize := math.Min(float64(chk.NumSamples()), maxSamplesPerChunk)
 			// Calculate the bucket for the chunk and increment it in the histogram.
 			bucket := int(math.Ceil(float64(nBuckets)*chunkSize/maxSamplesPerChunk)) - 1
 			histogram[bucket]++
 			totalChunks++
 		}
 	}
 	fmt.Printf("\nCompaction analysis:\n")
 	fmt.Println("Fullness: Amount of samples in chunks (100% is 120 samples)")
 	// Normalize absolute counts to percentages and print them out.
 	for bucket, count := range histogram {
 		percentage := 100.0 * count / totalChunks
 		fmt.Printf("%7d%%: ", (bucket+1)*10)
 		for j := 0; j < percentage; j++ {
 			fmt.Printf("#")
 		}
 		fmt.Println()
 	}
 	return nil
 }