Promtool: Add support for compaction analysis (#8940)

* Extend promtool to support compaction analysis This commit extends the promtool tsdb analyze command to help troubleshoot high Prometheus disk usage. The command now plots a distribution of how full chunks are relative to the maximum capacity of 120 samples per chunk. Signed-off-by: fpetkovski <filip.petkovsky@gmail.com> * Update cmd/promtool/tsdb.go Co-authored-by: Bartlomiej Plotka <bwplotka@gmail.com> Co-authored-by: Bartlomiej Plotka <bwplotka@gmail.com>
2025-03-05 20:59:13 -08:00 · 2021-07-02 12:08:52 +02:00 · 2021-07-02 12:08:52 +02:00 · 7c125aa5fb
parent 179b2155d1
commit 7c125aa5fb
2 changed files with 57 additions and 1 deletions
--- a/cmd/promtool/main.go
+++ b/cmd/promtool/main.go
@ -132,7 +132,7 @@ func main() {
 	benchWriteNumScrapes := tsdbBenchWriteCmd.Flag("scrapes", "Number of scrapes to simulate.").Default("3000").Int()
 	benchSamplesFile := tsdbBenchWriteCmd.Arg("file", "Input file with samples data, default is ("+filepath.Join("..", "..", "tsdb", "testdata", "20kseries.json")+").").Default(filepath.Join("..", "..", "tsdb", "testdata", "20kseries.json")).String()

-	tsdbAnalyzeCmd := tsdbCmd.Command("analyze", "Analyze churn, label pair cardinality.")
+	tsdbAnalyzeCmd := tsdbCmd.Command("analyze", "Analyze churn, label pair cardinality and compaction efficiency.")
 	analyzePath := tsdbAnalyzeCmd.Arg("db path", "Database path (default is "+defaultDBPath+").").Default(defaultDBPath).String()
 	analyzeBlockID := tsdbAnalyzeCmd.Arg("block id", "Block to analyze (default is the last block).").String()
 	analyzeLimit := tsdbAnalyzeCmd.Flag("limit", "How many items to show in each list.").Default("20").Int()
--- a/cmd/promtool/tsdb.go
+++ b/cmd/promtool/tsdb.go
@ -17,8 +17,10 @@ import (
 	"bufio"
 	"context"
 	"fmt"
+	"github.com/prometheus/prometheus/tsdb/index"
 	"io"
 	"io/ioutil"
+	"math"
 	"os"
 	"path/filepath"
 	"runtime"
@ -561,6 +563,60 @@ func analyzeBlock(path, blockID string, limit int) error {
 	}
 	fmt.Printf("\nHighest cardinality metric names:\n")
 	printInfo(postingInfos)
+
+	return analyzeCompaction(block, ir)
+}
+
+func analyzeCompaction(block tsdb.BlockReader, indexr tsdb.IndexReader) (err error) {
+	postingsr, err := indexr.Postings(index.AllPostingsKey())
+	if err != nil {
+		return err
+	}
+	chunkr, err := block.Chunks()
+	if err != nil {
+		return err
+	}
+	defer func() {
+		err = tsdb_errors.NewMulti(err, chunkr.Close()).Err()
+	}()
+
+	const maxSamplesPerChunk = 120
+	nBuckets := 10
+	histogram := make([]int, nBuckets)
+	totalChunks := 0
+	for postingsr.Next() {
+		var lbsl = labels.Labels{}
+		var chks []chunks.Meta
+		if err := indexr.Series(postingsr.At(), &lbsl, &chks); err != nil {
+			return err
+		}
+
+		for _, chk := range chks {
+			// Load the actual data of the chunk.
+			chk, err := chunkr.Chunk(chk.Ref)
+			if err != nil {
+				return err
+			}
+			chunkSize := math.Min(float64(chk.NumSamples()), maxSamplesPerChunk)
+			// Calculate the bucket for the chunk and increment it in the histogram.
+			bucket := int(math.Ceil(float64(nBuckets)*chunkSize/maxSamplesPerChunk)) - 1
+			histogram[bucket]++
+			totalChunks++
+		}
+	}
+
+	fmt.Printf("\nCompaction analysis:\n")
+	fmt.Println("Fullness: Amount of samples in chunks (100% is 120 samples)")
+	// Normalize absolute counts to percentages and print them out.
+	for bucket, count := range histogram {
+		percentage := 100.0 * count / totalChunks
+		fmt.Printf("%7d%%: ", (bucket+1)*10)
+		for j := 0; j < percentage; j++ {
+			fmt.Printf("#")
+		}
+		fmt.Println()
+	}
+
 	return nil
 }