[ENHANCEMENT] TSDB: Improve calculation of space used by labels

The labels for each series in the Head take up some some space in the
Postings index, but far more space in the `memSeries` structure.

Instead of having the Postings index calculate this overhead, which is
a layering violation, have the caller pass in a function to do it.

Provide three implementations of this function for the three Labels
versions.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
This commit is contained in:
Bryan Boreham 2024-03-18 10:58:24 +00:00
parent 2898d5d715
commit bd4fd1d2f7
6 changed files with 22 additions and 5 deletions

View file

@ -19,6 +19,7 @@ import (
"bytes" "bytes"
"slices" "slices"
"strings" "strings"
"unsafe"
"github.com/cespare/xxhash/v2" "github.com/cespare/xxhash/v2"
) )
@ -487,3 +488,8 @@ func (b *ScratchBuilder) Labels() Labels {
func (b *ScratchBuilder) Overwrite(ls *Labels) { func (b *ScratchBuilder) Overwrite(ls *Labels) {
*ls = append((*ls)[:0], b.add...) *ls = append((*ls)[:0], b.add...)
} }
// SizeOfLabels returns the approximate space required for n copies of a label.
func SizeOfLabels(value string, n uint64) uint64 {
return (uint64(len(value)) + uint64(unsafe.Sizeof(value))) * n
}

View file

@ -815,3 +815,8 @@ func (b *ScratchBuilder) Overwrite(ls *Labels) {
ls.syms = b.syms.nameTable ls.syms = b.syms.nameTable
ls.data = yoloString(b.overwriteBuffer) ls.data = yoloString(b.overwriteBuffer)
} }
// SizeOfLabels returns the approximate space required for n copies of a label.
func SizeOfLabels(value string, n uint64) uint64 {
return uint64(len(value)) + n*2 // Assuming most symbol-table entries are 2 bytes long.
}

View file

@ -694,3 +694,8 @@ func NewScratchBuilderWithSymbolTable(_ *SymbolTable, n int) ScratchBuilder {
func (b *ScratchBuilder) SetSymbolTable(_ *SymbolTable) { func (b *ScratchBuilder) SetSymbolTable(_ *SymbolTable) {
// no-op // no-op
} }
// SizeOfLabels returns the approximate space required for n copies of a label.
func SizeOfLabels(value string, n uint64) uint64 {
return uint64(len(value)+sizeVarint(uint64(len(value)))) * n
}

View file

@ -1036,7 +1036,7 @@ func (h *Head) PostingsCardinalityStats(statsByLabelName string, limit int) *ind
return h.cardinalityCache return h.cardinalityCache
} }
h.cardinalityCacheKey = cacheKey h.cardinalityCacheKey = cacheKey
h.cardinalityCache = h.postings.Stats(statsByLabelName, limit) h.cardinalityCache = h.postings.Stats(statsByLabelName, limit, labels.SizeOfLabels)
h.lastPostingsStatsCall = time.Duration(time.Now().Unix()) * time.Second h.lastPostingsStatsCall = time.Duration(time.Now().Unix()) * time.Second
return h.cardinalityCache return h.cardinalityCache

View file

@ -163,7 +163,8 @@ type PostingsStats struct {
} }
// Stats calculates the cardinality statistics from postings. // Stats calculates the cardinality statistics from postings.
func (p *MemPostings) Stats(label string, limit int) *PostingsStats { // Caller can pass in a function which computes the space required for n series with a given label.
func (p *MemPostings) Stats(label string, limit int, bytes func(string, uint64) uint64) *PostingsStats {
var size uint64 var size uint64
p.mtx.RLock() p.mtx.RLock()
@ -191,7 +192,7 @@ func (p *MemPostings) Stats(label string, limit int) *PostingsStats {
} }
seriesCnt := uint64(len(values)) seriesCnt := uint64(len(values))
labelValuePairs.push(Stat{Name: n + "=" + name, Count: seriesCnt}) labelValuePairs.push(Stat{Name: n + "=" + name, Count: seriesCnt})
size += uint64(len(name)) * seriesCnt size += bytes(name, seriesCnt)
} }
labelValueLength.push(Stat{Name: n, Count: size}) labelValueLength.push(Stat{Name: n, Count: size})
} }

View file

@ -939,7 +939,7 @@ func BenchmarkPostings_Stats(b *testing.B) {
} }
b.ResetTimer() b.ResetTimer()
for n := 0; n < b.N; n++ { for n := 0; n < b.N; n++ {
p.Stats("__name__", 10) p.Stats("__name__", 10, labels.SizeOfLabels)
} }
} }
@ -954,7 +954,7 @@ func TestMemPostingsStats(t *testing.T) {
p.Add(2, labels.FromStrings("label", "value1")) p.Add(2, labels.FromStrings("label", "value1"))
// call the Stats method to calculate the cardinality statistics // call the Stats method to calculate the cardinality statistics
stats := p.Stats("label", 10) stats := p.Stats("label", 10, func(s string, n uint64) uint64 { return uint64(len(s)) * n })
// assert that the expected statistics were calculated // assert that the expected statistics were calculated
require.Equal(t, uint64(2), stats.CardinalityMetricsStats[0].Count) require.Equal(t, uint64(2), stats.CardinalityMetricsStats[0].Count)