mirror of
https://github.com/prometheus/prometheus.git
synced 2025-03-05 20:59:13 -08:00
Compare block sizes with sparse histograms (#9045)
Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com>
This commit is contained in:
parent
08258cc539
commit
79305e704b
|
@ -18,9 +18,11 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"math"
|
"math"
|
||||||
|
"math/rand"
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"sync"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -1377,3 +1379,314 @@ func TestHeadCompactionWithHistograms(t *testing.T) {
|
||||||
|
|
||||||
require.Equal(t, expHists, actHists)
|
require.Equal(t, expHists, actHists)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Depending on numSeriesPerSchema, it can take few gigs of memory;
|
||||||
|
// the test adds all samples to appender before committing instead of
|
||||||
|
// buffering the writes to make it run faster.
|
||||||
|
func TestSparseHistoSpaceSavings(t *testing.T) {
|
||||||
|
t.Skip()
|
||||||
|
|
||||||
|
cases := []struct {
|
||||||
|
numSeriesPerSchema int
|
||||||
|
numBuckets int
|
||||||
|
numSpans int
|
||||||
|
gapBetweenSpans int
|
||||||
|
}{
|
||||||
|
{1, 15, 1, 0},
|
||||||
|
{1, 50, 1, 0},
|
||||||
|
{1, 100, 1, 0},
|
||||||
|
{1, 15, 3, 5},
|
||||||
|
{1, 50, 3, 3},
|
||||||
|
{1, 100, 3, 2},
|
||||||
|
{100, 15, 1, 0},
|
||||||
|
{100, 50, 1, 0},
|
||||||
|
{100, 100, 1, 0},
|
||||||
|
{100, 15, 3, 5},
|
||||||
|
{100, 50, 3, 3},
|
||||||
|
{100, 100, 3, 2},
|
||||||
|
//{1000, 15, 1, 0},
|
||||||
|
//{1000, 50, 1, 0},
|
||||||
|
//{1000, 100, 1, 0},
|
||||||
|
//{1000, 15, 3, 5},
|
||||||
|
//{1000, 50, 3, 3},
|
||||||
|
//{1000, 100, 3, 2},
|
||||||
|
}
|
||||||
|
|
||||||
|
type testSummary struct {
|
||||||
|
oldBlockTotalSeries int
|
||||||
|
oldBlockIndexSize int64
|
||||||
|
oldBlockChunksSize int64
|
||||||
|
oldBlockTotalSize int64
|
||||||
|
|
||||||
|
sparseBlockTotalSeries int
|
||||||
|
sparseBlockIndexSize int64
|
||||||
|
sparseBlockChunksSize int64
|
||||||
|
sparseBlockTotalSize int64
|
||||||
|
|
||||||
|
numBuckets int
|
||||||
|
numSpans int
|
||||||
|
gapBetweenSpans int
|
||||||
|
}
|
||||||
|
|
||||||
|
var summaries []testSummary
|
||||||
|
|
||||||
|
allSchemas := []int{-4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8}
|
||||||
|
schemaDescription := []string{"minus_4", "minus_3", "minus_2", "minus_1", "0", "1", "2", "3", "4", "5", "6", "7", "8"}
|
||||||
|
numHistograms := 120 * 4 // 15s scrape interval.
|
||||||
|
timeStep := DefaultBlockDuration / int64(numHistograms)
|
||||||
|
for _, c := range cases {
|
||||||
|
t.Run(
|
||||||
|
fmt.Sprintf("series=%d,span=%d,gap=%d,buckets=%d",
|
||||||
|
len(allSchemas)*c.numSeriesPerSchema,
|
||||||
|
c.numSpans,
|
||||||
|
c.gapBetweenSpans,
|
||||||
|
c.numBuckets,
|
||||||
|
),
|
||||||
|
func(t *testing.T) {
|
||||||
|
oldHead, _ := newTestHead(t, DefaultBlockDuration, false)
|
||||||
|
t.Cleanup(func() {
|
||||||
|
require.NoError(t, oldHead.Close())
|
||||||
|
})
|
||||||
|
sparseHead, _ := newTestHead(t, DefaultBlockDuration, false)
|
||||||
|
t.Cleanup(func() {
|
||||||
|
require.NoError(t, sparseHead.Close())
|
||||||
|
})
|
||||||
|
|
||||||
|
var allSparseSeries []struct {
|
||||||
|
baseLabels labels.Labels
|
||||||
|
hists []histogram.SparseHistogram
|
||||||
|
}
|
||||||
|
|
||||||
|
for sid, schema := range allSchemas {
|
||||||
|
for i := 0; i < c.numSeriesPerSchema; i++ {
|
||||||
|
lbls := labels.Labels{
|
||||||
|
{Name: "__name__", Value: fmt.Sprintf("rpc_durations_%d_histogram_seconds", i)},
|
||||||
|
{Name: "instance", Value: "localhost:8080"},
|
||||||
|
{Name: "job", Value: fmt.Sprintf("sparse_histogram_schema_%s", schemaDescription[sid])},
|
||||||
|
}
|
||||||
|
allSparseSeries = append(allSparseSeries, struct {
|
||||||
|
baseLabels labels.Labels
|
||||||
|
hists []histogram.SparseHistogram
|
||||||
|
}{baseLabels: lbls, hists: generateCustomHistograms(numHistograms, c.numBuckets, c.numSpans, c.gapBetweenSpans, schema)})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
oldApp := oldHead.Appender(context.Background())
|
||||||
|
sparseApp := sparseHead.Appender(context.Background())
|
||||||
|
numOldSeriesPerHistogram := 0
|
||||||
|
|
||||||
|
var oldULID ulid.ULID
|
||||||
|
var sparseULID ulid.ULID
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
// Ingest sparse histograms.
|
||||||
|
for _, ah := range allSparseSeries {
|
||||||
|
var (
|
||||||
|
ref uint64
|
||||||
|
err error
|
||||||
|
)
|
||||||
|
for i := 0; i < numHistograms; i++ {
|
||||||
|
ts := int64(i) * timeStep
|
||||||
|
ref, err = sparseApp.AppendHistogram(ref, ah.baseLabels, ts, ah.hists[i])
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
require.NoError(t, sparseApp.Commit())
|
||||||
|
|
||||||
|
// Sparse head compaction.
|
||||||
|
mint := sparseHead.MinTime()
|
||||||
|
maxt := sparseHead.MaxTime() + 1 // Block intervals are half-open: [b.MinTime, b.MaxTime).
|
||||||
|
compactor, err := NewLeveledCompactor(context.Background(), nil, nil, []int64{DefaultBlockDuration}, chunkenc.NewPool(), nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
sparseULID, err = compactor.Write(sparseHead.opts.ChunkDirRoot, sparseHead, mint, maxt, nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotEqual(t, ulid.ULID{}, sparseULID)
|
||||||
|
}()
|
||||||
|
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
// Ingest histograms the old way.
|
||||||
|
for _, ah := range allSparseSeries {
|
||||||
|
refs := make([]uint64, c.numBuckets+((c.numSpans-1)*c.gapBetweenSpans))
|
||||||
|
for i := 0; i < numHistograms; i++ {
|
||||||
|
ts := int64(i) * timeStep
|
||||||
|
|
||||||
|
h := ah.hists[i]
|
||||||
|
|
||||||
|
numOldSeriesPerHistogram = 0
|
||||||
|
it := histogram.CumulativeExpandSparseHistogram(h)
|
||||||
|
itIdx := 0
|
||||||
|
var err error
|
||||||
|
for it.Next() {
|
||||||
|
numOldSeriesPerHistogram++
|
||||||
|
b := it.At()
|
||||||
|
lbls := append(ah.baseLabels, labels.Label{Name: "le", Value: fmt.Sprintf("%.16f", b.Le)})
|
||||||
|
refs[itIdx], err = oldApp.Append(refs[itIdx], lbls, ts, float64(b.Count))
|
||||||
|
require.NoError(t, err)
|
||||||
|
itIdx++
|
||||||
|
}
|
||||||
|
require.NoError(t, it.Err())
|
||||||
|
// _count metric.
|
||||||
|
countLbls := ah.baseLabels.Copy()
|
||||||
|
countLbls[0].Value = countLbls[0].Value + "_count"
|
||||||
|
_, err = oldApp.Append(0, countLbls, ts, float64(h.Count))
|
||||||
|
require.NoError(t, err)
|
||||||
|
numOldSeriesPerHistogram++
|
||||||
|
|
||||||
|
// _sum metric.
|
||||||
|
sumLbls := ah.baseLabels.Copy()
|
||||||
|
sumLbls[0].Value = sumLbls[0].Value + "_sum"
|
||||||
|
_, err = oldApp.Append(0, sumLbls, ts, h.Sum)
|
||||||
|
require.NoError(t, err)
|
||||||
|
numOldSeriesPerHistogram++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
require.NoError(t, oldApp.Commit())
|
||||||
|
|
||||||
|
// Old head compaction.
|
||||||
|
mint := oldHead.MinTime()
|
||||||
|
maxt := oldHead.MaxTime() + 1 // Block intervals are half-open: [b.MinTime, b.MaxTime).
|
||||||
|
compactor, err := NewLeveledCompactor(context.Background(), nil, nil, []int64{DefaultBlockDuration}, chunkenc.NewPool(), nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
oldULID, err = compactor.Write(oldHead.opts.ChunkDirRoot, oldHead, mint, maxt, nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotEqual(t, ulid.ULID{}, oldULID)
|
||||||
|
}()
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
oldBlockDir := filepath.Join(oldHead.opts.ChunkDirRoot, oldULID.String())
|
||||||
|
sparseBlockDir := filepath.Join(sparseHead.opts.ChunkDirRoot, sparseULID.String())
|
||||||
|
|
||||||
|
oldSize, err := fileutil.DirSize(oldBlockDir)
|
||||||
|
require.NoError(t, err)
|
||||||
|
oldIndexSize, err := fileutil.DirSize(filepath.Join(oldBlockDir, "index"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
oldChunksSize, err := fileutil.DirSize(filepath.Join(oldBlockDir, "chunks"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
sparseSize, err := fileutil.DirSize(sparseBlockDir)
|
||||||
|
require.NoError(t, err)
|
||||||
|
sparseIndexSize, err := fileutil.DirSize(filepath.Join(sparseBlockDir, "index"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
sparseChunksSize, err := fileutil.DirSize(filepath.Join(sparseBlockDir, "chunks"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
summaries = append(summaries, testSummary{
|
||||||
|
oldBlockTotalSeries: len(allSchemas) * c.numSeriesPerSchema * numOldSeriesPerHistogram,
|
||||||
|
oldBlockIndexSize: oldIndexSize,
|
||||||
|
oldBlockChunksSize: oldChunksSize,
|
||||||
|
oldBlockTotalSize: oldSize,
|
||||||
|
sparseBlockTotalSeries: len(allSchemas) * c.numSeriesPerSchema,
|
||||||
|
sparseBlockIndexSize: sparseIndexSize,
|
||||||
|
sparseBlockChunksSize: sparseChunksSize,
|
||||||
|
sparseBlockTotalSize: sparseSize,
|
||||||
|
numBuckets: c.numBuckets,
|
||||||
|
numSpans: c.numSpans,
|
||||||
|
gapBetweenSpans: c.gapBetweenSpans,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, s := range summaries {
|
||||||
|
fmt.Printf(`
|
||||||
|
Meta: NumBuckets=%d, NumSpans=%d, GapBetweenSpans=%d
|
||||||
|
Old Block: NumSeries=%d, IndexSize=%d, ChunksSize=%d, TotalSize=%d
|
||||||
|
Sparse Block: NumSeries=%d, IndexSize=%d, ChunksSize=%d, TotalSize=%d
|
||||||
|
Savings: Index=%.2f%%, Chunks=%.2f%%, Total=%.2f%%
|
||||||
|
`,
|
||||||
|
s.numBuckets, s.numSpans, s.gapBetweenSpans,
|
||||||
|
s.oldBlockTotalSeries, s.oldBlockIndexSize, s.oldBlockChunksSize, s.oldBlockTotalSize,
|
||||||
|
s.sparseBlockTotalSeries, s.sparseBlockIndexSize, s.sparseBlockChunksSize, s.sparseBlockTotalSize,
|
||||||
|
100*(1-float64(s.sparseBlockIndexSize)/float64(s.oldBlockIndexSize)),
|
||||||
|
100*(1-float64(s.sparseBlockChunksSize)/float64(s.oldBlockChunksSize)),
|
||||||
|
100*(1-float64(s.sparseBlockTotalSize)/float64(s.oldBlockTotalSize)),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateCustomHistograms(numHists, numBuckets, numSpans, gapBetweenSpans, schema int) (r []histogram.SparseHistogram) {
|
||||||
|
// First histogram with all the settings.
|
||||||
|
h := histogram.SparseHistogram{
|
||||||
|
Sum: 1000 * rand.Float64(),
|
||||||
|
Schema: int32(schema),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate spans.
|
||||||
|
h.PositiveSpans = []histogram.Span{
|
||||||
|
{Offset: int32(rand.Intn(10)), Length: uint32(numBuckets)},
|
||||||
|
}
|
||||||
|
if numSpans > 1 {
|
||||||
|
spanWidth := numBuckets / numSpans
|
||||||
|
// First span gets those additional buckets.
|
||||||
|
h.PositiveSpans[0].Length = uint32(spanWidth + (numBuckets - spanWidth*numSpans))
|
||||||
|
for i := 0; i < numSpans-1; i++ {
|
||||||
|
h.PositiveSpans = append(h.PositiveSpans, histogram.Span{Offset: int32(rand.Intn(gapBetweenSpans) + 1), Length: uint32(spanWidth)})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate buckets.
|
||||||
|
v := int64(rand.Intn(30) + 1)
|
||||||
|
h.PositiveBuckets = []int64{v}
|
||||||
|
count := v
|
||||||
|
firstHistValues := []int64{v}
|
||||||
|
for i := 0; i < numBuckets-1; i++ {
|
||||||
|
delta := int64(rand.Intn(20))
|
||||||
|
if rand.Int()%2 == 0 && firstHistValues[len(firstHistValues)-1] > delta {
|
||||||
|
// Randomly making delta negative such that curr value will be >0.
|
||||||
|
delta = -delta
|
||||||
|
}
|
||||||
|
|
||||||
|
currVal := firstHistValues[len(firstHistValues)-1] + delta
|
||||||
|
count += currVal
|
||||||
|
firstHistValues = append(firstHistValues, currVal)
|
||||||
|
|
||||||
|
h.PositiveBuckets = append(h.PositiveBuckets, delta)
|
||||||
|
}
|
||||||
|
|
||||||
|
h.Count = uint64(count)
|
||||||
|
|
||||||
|
r = append(r, h)
|
||||||
|
|
||||||
|
// Remaining histograms with same spans but changed bucket values.
|
||||||
|
for j := 0; j < numHists-1; j++ {
|
||||||
|
newH := h.Copy()
|
||||||
|
newH.Sum = float64(j+1) * 1000 * rand.Float64()
|
||||||
|
|
||||||
|
// Generate buckets.
|
||||||
|
count := int64(0)
|
||||||
|
currVal := int64(0)
|
||||||
|
for i := range newH.PositiveBuckets {
|
||||||
|
delta := int64(rand.Intn(10))
|
||||||
|
if i == 0 {
|
||||||
|
newH.PositiveBuckets[i] += delta
|
||||||
|
currVal = newH.PositiveBuckets[i]
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
currVal += newH.PositiveBuckets[i]
|
||||||
|
if rand.Int()%2 == 0 && (currVal-delta) > firstHistValues[i] {
|
||||||
|
// Randomly making delta negative such that curr value will be >0
|
||||||
|
// and above the previous count since we are not doing resets here.
|
||||||
|
delta = -delta
|
||||||
|
}
|
||||||
|
newH.PositiveBuckets[i] += delta
|
||||||
|
currVal += delta
|
||||||
|
count += currVal
|
||||||
|
}
|
||||||
|
|
||||||
|
newH.Count = uint64(count)
|
||||||
|
|
||||||
|
r = append(r, newH)
|
||||||
|
h = newH
|
||||||
|
}
|
||||||
|
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue