enhance promtool tsdb analyze command (#12869)

Improve promtool tsdb analyze

- Make it more suitable for variable size float chunks.
- Add support for histogram chunks.

---------

Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
Signed-off-by: Levi Harrison <git@leviharrison.dev>
This commit is contained in:
Ziqi Zhao 2023-10-15 02:34:50 +08:00 committed by Levi Harrison
parent 4284e6f2f7
commit 60e2bb0135
2 changed files with 131 additions and 18 deletions

View file

@ -18,7 +18,6 @@ import (
"context" "context"
"fmt" "fmt"
"io" "io"
"math"
"os" "os"
"path/filepath" "path/filepath"
"runtime" "runtime"
@ -620,10 +619,12 @@ func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb.
err = tsdb_errors.NewMulti(err, chunkr.Close()).Err() err = tsdb_errors.NewMulti(err, chunkr.Close()).Err()
}() }()
const maxSamplesPerChunk = 120
nBuckets := 10
histogram := make([]int, nBuckets)
totalChunks := 0 totalChunks := 0
floatChunkSamplesCount := make([]int, 0)
floatChunkSize := make([]int, 0)
histogramChunkSamplesCount := make([]int, 0)
histogramChunkSize := make([]int, 0)
histogramChunkBucketsCount := make([]int, 0)
var builder labels.ScratchBuilder var builder labels.ScratchBuilder
for postingsr.Next() { for postingsr.Next() {
var chks []chunks.Meta var chks []chunks.Meta
@ -637,26 +638,56 @@ func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb.
if err != nil { if err != nil {
return err return err
} }
chunkSize := math.Min(float64(chk.NumSamples()), maxSamplesPerChunk) switch chk.Encoding() {
// Calculate the bucket for the chunk and increment it in the histogram. case chunkenc.EncXOR:
bucket := int(math.Ceil(float64(nBuckets)*chunkSize/maxSamplesPerChunk)) - 1 floatChunkSamplesCount = append(floatChunkSamplesCount, chk.NumSamples())
histogram[bucket]++ floatChunkSize = append(floatChunkSize, len(chk.Bytes()))
case chunkenc.EncFloatHistogram:
histogramChunkSamplesCount = append(histogramChunkSamplesCount, chk.NumSamples())
histogramChunkSize = append(histogramChunkSize, len(chk.Bytes()))
fhchk, ok := chk.(*chunkenc.FloatHistogramChunk)
if !ok {
return fmt.Errorf("chunk is not FloatHistogramChunk")
}
it := fhchk.Iterator(nil)
bucketCount := 0
for it.Next() == chunkenc.ValFloatHistogram {
_, f := it.AtFloatHistogram()
bucketCount += len(f.PositiveBuckets)
bucketCount += len(f.NegativeBuckets)
}
histogramChunkBucketsCount = append(histogramChunkBucketsCount, bucketCount)
case chunkenc.EncHistogram:
histogramChunkSamplesCount = append(histogramChunkSamplesCount, chk.NumSamples())
histogramChunkSize = append(histogramChunkSize, len(chk.Bytes()))
hchk, ok := chk.(*chunkenc.HistogramChunk)
if !ok {
return fmt.Errorf("chunk is not HistogramChunk")
}
it := hchk.Iterator(nil)
bucketCount := 0
for it.Next() == chunkenc.ValHistogram {
_, f := it.AtHistogram()
bucketCount += len(f.PositiveBuckets)
bucketCount += len(f.NegativeBuckets)
}
histogramChunkBucketsCount = append(histogramChunkBucketsCount, bucketCount)
}
totalChunks++ totalChunks++
} }
} }
fmt.Printf("\nCompaction analysis:\n") fmt.Printf("\nCompaction analysis:\n")
fmt.Println("Fullness: Amount of samples in chunks (100% is 120 samples)") fmt.Println()
// Normalize absolute counts to percentages and print them out. displayHistogram("samples per float chunk", floatChunkSamplesCount, totalChunks)
for bucket, count := range histogram {
percentage := 100.0 * count / totalChunks
fmt.Printf("%7d%%: ", (bucket+1)*10)
for j := 0; j < percentage; j++ {
fmt.Printf("#")
}
fmt.Println()
}
displayHistogram("bytes per float chunk", floatChunkSize, totalChunks)
displayHistogram("samples per histogram chunk", histogramChunkSamplesCount, totalChunks)
displayHistogram("bytes per histogram chunk", histogramChunkSize, totalChunks)
displayHistogram("buckets per histogram chunk", histogramChunkBucketsCount, totalChunks)
return nil return nil
} }
@ -732,3 +763,42 @@ func backfillOpenMetrics(path, outputDir string, humanReadable, quiet bool, maxB
return checkErr(backfill(5000, inputFile.Bytes(), outputDir, humanReadable, quiet, maxBlockDuration)) return checkErr(backfill(5000, inputFile.Bytes(), outputDir, humanReadable, quiet, maxBlockDuration))
} }
func displayHistogram(dataType string, datas []int, total int) {
slices.Sort(datas)
start, end, step := generateBucket(datas[0], datas[len(datas)-1])
sum := 0
buckets := make([]int, (end-start)/step+1)
maxCount := 0
for _, c := range datas {
sum += c
buckets[(c-start)/step]++
if buckets[(c-start)/step] > maxCount {
maxCount = buckets[(c-start)/step]
}
}
avg := sum / len(datas)
fmt.Printf("%s (min/avg/max): %d/%d/%d\n", dataType, datas[0], avg, datas[len(datas)-1])
maxLeftLen := strconv.Itoa(len(fmt.Sprintf("%d", end)))
maxRightLen := strconv.Itoa(len(fmt.Sprintf("%d", end+step)))
maxCountLen := strconv.Itoa(len(fmt.Sprintf("%d", maxCount)))
for bucket, count := range buckets {
percentage := 100.0 * count / total
fmt.Printf("[%"+maxLeftLen+"d, %"+maxRightLen+"d]: %"+maxCountLen+"d %s\n", bucket*step+start+1, (bucket+1)*step+start, count, strings.Repeat("#", percentage))
}
fmt.Println()
}
func generateBucket(min, max int) (start, end, step int) {
s := (max - min) / 10
step = 10
for step < s && step <= 10000 {
step *= 10
}
start = min - min%step
end = max - max%step + step
return
}

43
cmd/promtool/tsdb_test.go Normal file
View file

@ -0,0 +1,43 @@
// Copyright 2017 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestGenerateBucket(t *testing.T) {
tcs := []struct {
min, max int
start, end, step int
}{
{
min: 101,
max: 141,
start: 100,
end: 150,
step: 10,
},
}
for _, tc := range tcs {
start, end, step := generateBucket(tc.min, tc.max)
require.Equal(t, tc.start, start)
require.Equal(t, tc.end, end)
require.Equal(t, tc.step, step)
}
}