mirror of
https://github.com/prometheus/prometheus.git
synced 2025-01-24 20:21:35 -08:00
5bee0cfce2
The ChunkReader interface's Chunk() has been changed to ChunkOrIterable(). This is a precursor to OOO native histogram support - with OOO native histograms, the chunks.Meta passed to Chunk() can result in multiple chunks being returned rather than just a single chunk (e.g. if oooMergedChunk has a counter reset in the middle). To support this, ChunkOrIterable() requires either a single chunk or an iterable to be returned. If an iterable is returned, the caller has the responsibility of converting the samples from the iterable into possibly multiple chunks. The OOOHeadChunkReader now returns an iterable rather than a chunk to prepare for the native histograms case. Also as a beneficial side effect, oooMergedChunk and boundedChunk has been simplified as they only need to implement the Iterable interface now, not the full Chunk interface. --------- Signed-off-by: Fiona Liao <fiona.y.liao@gmail.com> Co-authored-by: George Krajcsovits <krajorama@users.noreply.github.com>
820 lines
20 KiB
Go
820 lines
20 KiB
Go
// Copyright 2017 The Prometheus Authors
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package main
|
|
|
|
import (
|
|
"bufio"
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
"runtime"
|
|
"runtime/pprof"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"text/tabwriter"
|
|
"time"
|
|
|
|
"github.com/alecthomas/units"
|
|
"github.com/go-kit/log"
|
|
"golang.org/x/exp/slices"
|
|
|
|
"github.com/prometheus/prometheus/model/labels"
|
|
"github.com/prometheus/prometheus/promql/parser"
|
|
"github.com/prometheus/prometheus/storage"
|
|
"github.com/prometheus/prometheus/tsdb"
|
|
"github.com/prometheus/prometheus/tsdb/chunkenc"
|
|
"github.com/prometheus/prometheus/tsdb/chunks"
|
|
tsdb_errors "github.com/prometheus/prometheus/tsdb/errors"
|
|
"github.com/prometheus/prometheus/tsdb/fileutil"
|
|
"github.com/prometheus/prometheus/tsdb/index"
|
|
)
|
|
|
|
const timeDelta = 30000
|
|
|
|
type writeBenchmark struct {
|
|
outPath string
|
|
samplesFile string
|
|
cleanup bool
|
|
numMetrics int
|
|
|
|
storage *tsdb.DB
|
|
|
|
cpuprof *os.File
|
|
memprof *os.File
|
|
blockprof *os.File
|
|
mtxprof *os.File
|
|
logger log.Logger
|
|
}
|
|
|
|
func benchmarkWrite(outPath, samplesFile string, numMetrics, numScrapes int) error {
|
|
b := &writeBenchmark{
|
|
outPath: outPath,
|
|
samplesFile: samplesFile,
|
|
numMetrics: numMetrics,
|
|
logger: log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)),
|
|
}
|
|
if b.outPath == "" {
|
|
dir, err := os.MkdirTemp("", "tsdb_bench")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
b.outPath = dir
|
|
b.cleanup = true
|
|
}
|
|
if err := os.RemoveAll(b.outPath); err != nil {
|
|
return err
|
|
}
|
|
if err := os.MkdirAll(b.outPath, 0o777); err != nil {
|
|
return err
|
|
}
|
|
|
|
dir := filepath.Join(b.outPath, "storage")
|
|
|
|
l := log.With(b.logger, "ts", log.DefaultTimestampUTC, "caller", log.DefaultCaller)
|
|
|
|
st, err := tsdb.Open(dir, l, nil, &tsdb.Options{
|
|
RetentionDuration: int64(15 * 24 * time.Hour / time.Millisecond),
|
|
MinBlockDuration: int64(2 * time.Hour / time.Millisecond),
|
|
}, tsdb.NewDBStats())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
st.DisableCompactions()
|
|
b.storage = st
|
|
|
|
var lbs []labels.Labels
|
|
|
|
if _, err = measureTime("readData", func() error {
|
|
f, err := os.Open(b.samplesFile)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer f.Close()
|
|
|
|
lbs, err = readPrometheusLabels(f, b.numMetrics)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
var total uint64
|
|
|
|
dur, err := measureTime("ingestScrapes", func() error {
|
|
if err := b.startProfiling(); err != nil {
|
|
return err
|
|
}
|
|
total, err = b.ingestScrapes(lbs, numScrapes)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
fmt.Println(" > total samples:", total)
|
|
fmt.Println(" > samples/sec:", float64(total)/dur.Seconds())
|
|
|
|
if _, err = measureTime("stopStorage", func() error {
|
|
if err := b.storage.Close(); err != nil {
|
|
return err
|
|
}
|
|
|
|
return b.stopProfiling()
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (b *writeBenchmark) ingestScrapes(lbls []labels.Labels, scrapeCount int) (uint64, error) {
|
|
var mu sync.Mutex
|
|
var total uint64
|
|
|
|
for i := 0; i < scrapeCount; i += 100 {
|
|
var wg sync.WaitGroup
|
|
lbls := lbls
|
|
for len(lbls) > 0 {
|
|
l := 1000
|
|
if len(lbls) < 1000 {
|
|
l = len(lbls)
|
|
}
|
|
batch := lbls[:l]
|
|
lbls = lbls[l:]
|
|
|
|
wg.Add(1)
|
|
go func() {
|
|
n, err := b.ingestScrapesShard(batch, 100, int64(timeDelta*i))
|
|
if err != nil {
|
|
// exitWithError(err)
|
|
fmt.Println(" err", err)
|
|
}
|
|
mu.Lock()
|
|
total += n
|
|
mu.Unlock()
|
|
wg.Done()
|
|
}()
|
|
}
|
|
wg.Wait()
|
|
}
|
|
fmt.Println("ingestion completed")
|
|
|
|
return total, nil
|
|
}
|
|
|
|
func (b *writeBenchmark) ingestScrapesShard(lbls []labels.Labels, scrapeCount int, baset int64) (uint64, error) {
|
|
ts := baset
|
|
|
|
type sample struct {
|
|
labels labels.Labels
|
|
value int64
|
|
ref *storage.SeriesRef
|
|
}
|
|
|
|
scrape := make([]*sample, 0, len(lbls))
|
|
|
|
for _, m := range lbls {
|
|
scrape = append(scrape, &sample{
|
|
labels: m,
|
|
value: 123456789,
|
|
})
|
|
}
|
|
total := uint64(0)
|
|
|
|
for i := 0; i < scrapeCount; i++ {
|
|
app := b.storage.Appender(context.TODO())
|
|
ts += timeDelta
|
|
|
|
for _, s := range scrape {
|
|
s.value += 1000
|
|
|
|
var ref storage.SeriesRef
|
|
if s.ref != nil {
|
|
ref = *s.ref
|
|
}
|
|
|
|
ref, err := app.Append(ref, s.labels, ts, float64(s.value))
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
if s.ref == nil {
|
|
s.ref = &ref
|
|
}
|
|
total++
|
|
}
|
|
if err := app.Commit(); err != nil {
|
|
return total, err
|
|
}
|
|
}
|
|
return total, nil
|
|
}
|
|
|
|
func (b *writeBenchmark) startProfiling() error {
|
|
var err error
|
|
|
|
// Start CPU profiling.
|
|
b.cpuprof, err = os.Create(filepath.Join(b.outPath, "cpu.prof"))
|
|
if err != nil {
|
|
return fmt.Errorf("bench: could not create cpu profile: %w", err)
|
|
}
|
|
if err := pprof.StartCPUProfile(b.cpuprof); err != nil {
|
|
return fmt.Errorf("bench: could not start CPU profile: %w", err)
|
|
}
|
|
|
|
// Start memory profiling.
|
|
b.memprof, err = os.Create(filepath.Join(b.outPath, "mem.prof"))
|
|
if err != nil {
|
|
return fmt.Errorf("bench: could not create memory profile: %w", err)
|
|
}
|
|
runtime.MemProfileRate = 64 * 1024
|
|
|
|
// Start fatal profiling.
|
|
b.blockprof, err = os.Create(filepath.Join(b.outPath, "block.prof"))
|
|
if err != nil {
|
|
return fmt.Errorf("bench: could not create block profile: %w", err)
|
|
}
|
|
runtime.SetBlockProfileRate(20)
|
|
|
|
b.mtxprof, err = os.Create(filepath.Join(b.outPath, "mutex.prof"))
|
|
if err != nil {
|
|
return fmt.Errorf("bench: could not create mutex profile: %w", err)
|
|
}
|
|
runtime.SetMutexProfileFraction(20)
|
|
return nil
|
|
}
|
|
|
|
func (b *writeBenchmark) stopProfiling() error {
|
|
if b.cpuprof != nil {
|
|
pprof.StopCPUProfile()
|
|
b.cpuprof.Close()
|
|
b.cpuprof = nil
|
|
}
|
|
if b.memprof != nil {
|
|
if err := pprof.Lookup("heap").WriteTo(b.memprof, 0); err != nil {
|
|
return fmt.Errorf("error writing mem profile: %w", err)
|
|
}
|
|
b.memprof.Close()
|
|
b.memprof = nil
|
|
}
|
|
if b.blockprof != nil {
|
|
if err := pprof.Lookup("block").WriteTo(b.blockprof, 0); err != nil {
|
|
return fmt.Errorf("error writing block profile: %w", err)
|
|
}
|
|
b.blockprof.Close()
|
|
b.blockprof = nil
|
|
runtime.SetBlockProfileRate(0)
|
|
}
|
|
if b.mtxprof != nil {
|
|
if err := pprof.Lookup("mutex").WriteTo(b.mtxprof, 0); err != nil {
|
|
return fmt.Errorf("error writing mutex profile: %w", err)
|
|
}
|
|
b.mtxprof.Close()
|
|
b.mtxprof = nil
|
|
runtime.SetMutexProfileFraction(0)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func measureTime(stage string, f func() error) (time.Duration, error) {
|
|
fmt.Printf(">> start stage=%s\n", stage)
|
|
start := time.Now()
|
|
if err := f(); err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
fmt.Printf(">> completed stage=%s duration=%s\n", stage, time.Since(start))
|
|
return time.Since(start), nil
|
|
}
|
|
|
|
func readPrometheusLabels(r io.Reader, n int) ([]labels.Labels, error) {
|
|
scanner := bufio.NewScanner(r)
|
|
|
|
var mets []labels.Labels
|
|
hashes := map[uint64]struct{}{}
|
|
i := 0
|
|
|
|
for scanner.Scan() && i < n {
|
|
m := make([]labels.Label, 0, 10)
|
|
|
|
r := strings.NewReplacer("\"", "", "{", "", "}", "")
|
|
s := r.Replace(scanner.Text())
|
|
|
|
labelChunks := strings.Split(s, ",")
|
|
for _, labelChunk := range labelChunks {
|
|
split := strings.Split(labelChunk, ":")
|
|
m = append(m, labels.Label{Name: split[0], Value: split[1]})
|
|
}
|
|
ml := labels.New(m...) // This sorts by name - order of the k/v labels matters, don't assume we'll always receive them already sorted.
|
|
h := ml.Hash()
|
|
if _, ok := hashes[h]; ok {
|
|
continue
|
|
}
|
|
mets = append(mets, ml)
|
|
hashes[h] = struct{}{}
|
|
i++
|
|
}
|
|
return mets, nil
|
|
}
|
|
|
|
func listBlocks(path string, humanReadable bool) error {
|
|
db, err := tsdb.OpenDBReadOnly(path, nil)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer func() {
|
|
err = tsdb_errors.NewMulti(err, db.Close()).Err()
|
|
}()
|
|
blocks, err := db.Blocks()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
printBlocks(blocks, true, humanReadable)
|
|
return nil
|
|
}
|
|
|
|
func printBlocks(blocks []tsdb.BlockReader, writeHeader, humanReadable bool) {
|
|
tw := tabwriter.NewWriter(os.Stdout, 13, 0, 2, ' ', 0)
|
|
defer tw.Flush()
|
|
|
|
if writeHeader {
|
|
fmt.Fprintln(tw, "BLOCK ULID\tMIN TIME\tMAX TIME\tDURATION\tNUM SAMPLES\tNUM CHUNKS\tNUM SERIES\tSIZE")
|
|
}
|
|
|
|
for _, b := range blocks {
|
|
meta := b.Meta()
|
|
|
|
fmt.Fprintf(tw,
|
|
"%v\t%v\t%v\t%v\t%v\t%v\t%v\t%v\n",
|
|
meta.ULID,
|
|
getFormatedTime(meta.MinTime, humanReadable),
|
|
getFormatedTime(meta.MaxTime, humanReadable),
|
|
time.Duration(meta.MaxTime-meta.MinTime)*time.Millisecond,
|
|
meta.Stats.NumSamples,
|
|
meta.Stats.NumChunks,
|
|
meta.Stats.NumSeries,
|
|
getFormatedBytes(b.Size(), humanReadable),
|
|
)
|
|
}
|
|
}
|
|
|
|
func getFormatedTime(timestamp int64, humanReadable bool) string {
|
|
if humanReadable {
|
|
return time.Unix(timestamp/1000, 0).UTC().String()
|
|
}
|
|
return strconv.FormatInt(timestamp, 10)
|
|
}
|
|
|
|
func getFormatedBytes(bytes int64, humanReadable bool) string {
|
|
if humanReadable {
|
|
return units.Base2Bytes(bytes).String()
|
|
}
|
|
return strconv.FormatInt(bytes, 10)
|
|
}
|
|
|
|
func openBlock(path, blockID string) (*tsdb.DBReadOnly, tsdb.BlockReader, error) {
|
|
db, err := tsdb.OpenDBReadOnly(path, nil)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
if blockID == "" {
|
|
blockID, err = db.LastBlockID()
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
}
|
|
|
|
b, err := db.Block(blockID)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
return db, b, nil
|
|
}
|
|
|
|
func analyzeBlock(ctx context.Context, path, blockID string, limit int, runExtended bool, matchers string) error {
|
|
var (
|
|
selectors []*labels.Matcher
|
|
err error
|
|
)
|
|
if len(matchers) > 0 {
|
|
selectors, err = parser.ParseMetricSelector(matchers)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
db, block, err := openBlock(path, blockID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer func() {
|
|
err = tsdb_errors.NewMulti(err, db.Close()).Err()
|
|
}()
|
|
|
|
meta := block.Meta()
|
|
fmt.Printf("Block ID: %s\n", meta.ULID)
|
|
// Presume 1ms resolution that Prometheus uses.
|
|
fmt.Printf("Duration: %s\n", (time.Duration(meta.MaxTime-meta.MinTime) * 1e6).String())
|
|
fmt.Printf("Total Series: %d\n", meta.Stats.NumSeries)
|
|
if len(matchers) > 0 {
|
|
fmt.Printf("Matcher: %s\n", matchers)
|
|
}
|
|
ir, err := block.Index()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer ir.Close()
|
|
|
|
allLabelNames, err := ir.LabelNames(ctx, selectors...)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
fmt.Printf("Label names: %d\n", len(allLabelNames))
|
|
|
|
type postingInfo struct {
|
|
key string
|
|
metric uint64
|
|
}
|
|
postingInfos := []postingInfo{}
|
|
|
|
printInfo := func(postingInfos []postingInfo) {
|
|
slices.SortFunc(postingInfos, func(a, b postingInfo) int {
|
|
switch {
|
|
case b.metric < a.metric:
|
|
return -1
|
|
case b.metric > a.metric:
|
|
return 1
|
|
default:
|
|
return 0
|
|
}
|
|
})
|
|
|
|
for i, pc := range postingInfos {
|
|
if i >= limit {
|
|
break
|
|
}
|
|
fmt.Printf("%d %s\n", pc.metric, pc.key)
|
|
}
|
|
}
|
|
|
|
labelsUncovered := map[string]uint64{}
|
|
labelpairsUncovered := map[string]uint64{}
|
|
labelpairsCount := map[string]uint64{}
|
|
entries := 0
|
|
var (
|
|
p index.Postings
|
|
refs []storage.SeriesRef
|
|
)
|
|
if len(matchers) > 0 {
|
|
p, err = tsdb.PostingsForMatchers(ctx, ir, selectors...)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// Expand refs first and cache in memory.
|
|
// So later we don't have to expand again.
|
|
refs, err = index.ExpandPostings(p)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
fmt.Printf("Matched series: %d\n", len(refs))
|
|
p = index.NewListPostings(refs)
|
|
} else {
|
|
p, err = ir.Postings(ctx, "", "") // The special all key.
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
chks := []chunks.Meta{}
|
|
builder := labels.ScratchBuilder{}
|
|
for p.Next() {
|
|
if err = ir.Series(p.At(), &builder, &chks); err != nil {
|
|
return err
|
|
}
|
|
// Amount of the block time range not covered by this series.
|
|
uncovered := uint64(meta.MaxTime-meta.MinTime) - uint64(chks[len(chks)-1].MaxTime-chks[0].MinTime)
|
|
builder.Labels().Range(func(lbl labels.Label) {
|
|
key := lbl.Name + "=" + lbl.Value
|
|
labelsUncovered[lbl.Name] += uncovered
|
|
labelpairsUncovered[key] += uncovered
|
|
labelpairsCount[key]++
|
|
entries++
|
|
})
|
|
}
|
|
if p.Err() != nil {
|
|
return p.Err()
|
|
}
|
|
fmt.Printf("Postings (unique label pairs): %d\n", len(labelpairsUncovered))
|
|
fmt.Printf("Postings entries (total label pairs): %d\n", entries)
|
|
|
|
postingInfos = postingInfos[:0]
|
|
for k, m := range labelpairsUncovered {
|
|
postingInfos = append(postingInfos, postingInfo{k, uint64(float64(m) / float64(meta.MaxTime-meta.MinTime))})
|
|
}
|
|
|
|
fmt.Printf("\nLabel pairs most involved in churning:\n")
|
|
printInfo(postingInfos)
|
|
|
|
postingInfos = postingInfos[:0]
|
|
for k, m := range labelsUncovered {
|
|
postingInfos = append(postingInfos, postingInfo{k, uint64(float64(m) / float64(meta.MaxTime-meta.MinTime))})
|
|
}
|
|
|
|
fmt.Printf("\nLabel names most involved in churning:\n")
|
|
printInfo(postingInfos)
|
|
|
|
postingInfos = postingInfos[:0]
|
|
for k, m := range labelpairsCount {
|
|
postingInfos = append(postingInfos, postingInfo{k, m})
|
|
}
|
|
|
|
fmt.Printf("\nMost common label pairs:\n")
|
|
printInfo(postingInfos)
|
|
|
|
postingInfos = postingInfos[:0]
|
|
for _, n := range allLabelNames {
|
|
values, err := ir.SortedLabelValues(ctx, n, selectors...)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
var cumulativeLength uint64
|
|
for _, str := range values {
|
|
cumulativeLength += uint64(len(str))
|
|
}
|
|
postingInfos = append(postingInfos, postingInfo{n, cumulativeLength})
|
|
}
|
|
|
|
fmt.Printf("\nLabel names with highest cumulative label value length:\n")
|
|
printInfo(postingInfos)
|
|
|
|
postingInfos = postingInfos[:0]
|
|
for _, n := range allLabelNames {
|
|
lv, err := ir.SortedLabelValues(ctx, n, selectors...)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
postingInfos = append(postingInfos, postingInfo{n, uint64(len(lv))})
|
|
}
|
|
fmt.Printf("\nHighest cardinality labels:\n")
|
|
printInfo(postingInfos)
|
|
|
|
postingInfos = postingInfos[:0]
|
|
lv, err := ir.SortedLabelValues(ctx, "__name__", selectors...)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for _, n := range lv {
|
|
postings, err := ir.Postings(ctx, "__name__", n)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
postings = index.Intersect(postings, index.NewListPostings(refs))
|
|
count := 0
|
|
for postings.Next() {
|
|
count++
|
|
}
|
|
if postings.Err() != nil {
|
|
return postings.Err()
|
|
}
|
|
postingInfos = append(postingInfos, postingInfo{n, uint64(count)})
|
|
}
|
|
fmt.Printf("\nHighest cardinality metric names:\n")
|
|
printInfo(postingInfos)
|
|
|
|
if runExtended {
|
|
return analyzeCompaction(ctx, block, ir, selectors)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb.IndexReader, matchers []*labels.Matcher) (err error) {
|
|
var postingsr index.Postings
|
|
if len(matchers) > 0 {
|
|
postingsr, err = tsdb.PostingsForMatchers(ctx, indexr, matchers...)
|
|
} else {
|
|
n, v := index.AllPostingsKey()
|
|
postingsr, err = indexr.Postings(ctx, n, v)
|
|
}
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
chunkr, err := block.Chunks()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer func() {
|
|
err = tsdb_errors.NewMulti(err, chunkr.Close()).Err()
|
|
}()
|
|
|
|
totalChunks := 0
|
|
floatChunkSamplesCount := make([]int, 0)
|
|
floatChunkSize := make([]int, 0)
|
|
histogramChunkSamplesCount := make([]int, 0)
|
|
histogramChunkSize := make([]int, 0)
|
|
histogramChunkBucketsCount := make([]int, 0)
|
|
var builder labels.ScratchBuilder
|
|
for postingsr.Next() {
|
|
var chks []chunks.Meta
|
|
if err := indexr.Series(postingsr.At(), &builder, &chks); err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, chk := range chks {
|
|
// Load the actual data of the chunk.
|
|
chk, iterable, err := chunkr.ChunkOrIterable(chk)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// Chunks within blocks should not need to be re-written, so an
|
|
// iterable is not expected to be returned from the chunk reader.
|
|
if iterable != nil {
|
|
return errors.New("ChunkOrIterable should not return an iterable when reading a block")
|
|
}
|
|
switch chk.Encoding() {
|
|
case chunkenc.EncXOR:
|
|
floatChunkSamplesCount = append(floatChunkSamplesCount, chk.NumSamples())
|
|
floatChunkSize = append(floatChunkSize, len(chk.Bytes()))
|
|
case chunkenc.EncFloatHistogram:
|
|
histogramChunkSamplesCount = append(histogramChunkSamplesCount, chk.NumSamples())
|
|
histogramChunkSize = append(histogramChunkSize, len(chk.Bytes()))
|
|
fhchk, ok := chk.(*chunkenc.FloatHistogramChunk)
|
|
if !ok {
|
|
return fmt.Errorf("chunk is not FloatHistogramChunk")
|
|
}
|
|
it := fhchk.Iterator(nil)
|
|
bucketCount := 0
|
|
for it.Next() == chunkenc.ValFloatHistogram {
|
|
_, f := it.AtFloatHistogram()
|
|
bucketCount += len(f.PositiveBuckets)
|
|
bucketCount += len(f.NegativeBuckets)
|
|
}
|
|
histogramChunkBucketsCount = append(histogramChunkBucketsCount, bucketCount)
|
|
case chunkenc.EncHistogram:
|
|
histogramChunkSamplesCount = append(histogramChunkSamplesCount, chk.NumSamples())
|
|
histogramChunkSize = append(histogramChunkSize, len(chk.Bytes()))
|
|
hchk, ok := chk.(*chunkenc.HistogramChunk)
|
|
if !ok {
|
|
return fmt.Errorf("chunk is not HistogramChunk")
|
|
}
|
|
it := hchk.Iterator(nil)
|
|
bucketCount := 0
|
|
for it.Next() == chunkenc.ValHistogram {
|
|
_, f := it.AtHistogram()
|
|
bucketCount += len(f.PositiveBuckets)
|
|
bucketCount += len(f.NegativeBuckets)
|
|
}
|
|
histogramChunkBucketsCount = append(histogramChunkBucketsCount, bucketCount)
|
|
}
|
|
totalChunks++
|
|
}
|
|
}
|
|
|
|
fmt.Printf("\nCompaction analysis:\n")
|
|
fmt.Println()
|
|
displayHistogram("samples per float chunk", floatChunkSamplesCount, totalChunks)
|
|
|
|
displayHistogram("bytes per float chunk", floatChunkSize, totalChunks)
|
|
|
|
displayHistogram("samples per histogram chunk", histogramChunkSamplesCount, totalChunks)
|
|
|
|
displayHistogram("bytes per histogram chunk", histogramChunkSize, totalChunks)
|
|
|
|
displayHistogram("buckets per histogram chunk", histogramChunkBucketsCount, totalChunks)
|
|
return nil
|
|
}
|
|
|
|
func dumpSamples(ctx context.Context, path string, mint, maxt int64, match string) (err error) {
|
|
db, err := tsdb.OpenDBReadOnly(path, nil)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer func() {
|
|
err = tsdb_errors.NewMulti(err, db.Close()).Err()
|
|
}()
|
|
q, err := db.Querier(mint, maxt)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer q.Close()
|
|
|
|
matchers, err := parser.ParseMetricSelector(match)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ss := q.Select(ctx, false, nil, matchers...)
|
|
|
|
for ss.Next() {
|
|
series := ss.At()
|
|
lbs := series.Labels()
|
|
it := series.Iterator(nil)
|
|
for it.Next() == chunkenc.ValFloat {
|
|
ts, val := it.At()
|
|
fmt.Printf("%s %g %d\n", lbs, val, ts)
|
|
}
|
|
for it.Next() == chunkenc.ValFloatHistogram {
|
|
ts, fh := it.AtFloatHistogram()
|
|
fmt.Printf("%s %s %d\n", lbs, fh.String(), ts)
|
|
}
|
|
for it.Next() == chunkenc.ValHistogram {
|
|
ts, h := it.AtHistogram()
|
|
fmt.Printf("%s %s %d\n", lbs, h.String(), ts)
|
|
}
|
|
if it.Err() != nil {
|
|
return ss.Err()
|
|
}
|
|
}
|
|
|
|
if ws := ss.Warnings(); len(ws) > 0 {
|
|
return tsdb_errors.NewMulti(ws.AsErrors()...).Err()
|
|
}
|
|
|
|
if ss.Err() != nil {
|
|
return ss.Err()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func checkErr(err error) int {
|
|
if err != nil {
|
|
fmt.Fprintln(os.Stderr, err)
|
|
return 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func backfillOpenMetrics(path, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration) int {
|
|
inputFile, err := fileutil.OpenMmapFile(path)
|
|
if err != nil {
|
|
return checkErr(err)
|
|
}
|
|
defer inputFile.Close()
|
|
|
|
if err := os.MkdirAll(outputDir, 0o777); err != nil {
|
|
return checkErr(fmt.Errorf("create output dir: %w", err))
|
|
}
|
|
|
|
return checkErr(backfill(5000, inputFile.Bytes(), outputDir, humanReadable, quiet, maxBlockDuration))
|
|
}
|
|
|
|
func displayHistogram(dataType string, datas []int, total int) {
|
|
slices.Sort(datas)
|
|
start, end, step := generateBucket(datas[0], datas[len(datas)-1])
|
|
sum := 0
|
|
buckets := make([]int, (end-start)/step+1)
|
|
maxCount := 0
|
|
for _, c := range datas {
|
|
sum += c
|
|
buckets[(c-start)/step]++
|
|
if buckets[(c-start)/step] > maxCount {
|
|
maxCount = buckets[(c-start)/step]
|
|
}
|
|
}
|
|
avg := sum / len(datas)
|
|
fmt.Printf("%s (min/avg/max): %d/%d/%d\n", dataType, datas[0], avg, datas[len(datas)-1])
|
|
maxLeftLen := strconv.Itoa(len(fmt.Sprintf("%d", end)))
|
|
maxRightLen := strconv.Itoa(len(fmt.Sprintf("%d", end+step)))
|
|
maxCountLen := strconv.Itoa(len(fmt.Sprintf("%d", maxCount)))
|
|
for bucket, count := range buckets {
|
|
percentage := 100.0 * count / total
|
|
fmt.Printf("[%"+maxLeftLen+"d, %"+maxRightLen+"d]: %"+maxCountLen+"d %s\n", bucket*step+start+1, (bucket+1)*step+start, count, strings.Repeat("#", percentage))
|
|
}
|
|
fmt.Println()
|
|
}
|
|
|
|
func generateBucket(min, max int) (start, end, step int) {
|
|
s := (max - min) / 10
|
|
|
|
step = 10
|
|
for step < s && step <= 10000 {
|
|
step *= 10
|
|
}
|
|
|
|
start = min - min%step
|
|
end = max - max%step + step
|
|
|
|
return
|
|
}
|