[ENHANCEMENT] TSDB: Improve calculation of space used by labels

The labels for each series in the Head take up some some space in the
Postings index, but far more space in the `memSeries` structure.

Instead of having the Postings index calculate this overhead, which is
a layering violation, have the caller pass in a function to do it.

Provide three implementations of this function for the three Labels
versions.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
This commit is contained in:
Bryan Boreham 2024-03-18 10:58:24 +00:00
parent 302e151de8
commit f96d06a975
6 changed files with 22 additions and 5 deletions

View file

@ -19,6 +19,7 @@ import (
"bytes"
"slices"
"strings"
"unsafe"
"github.com/cespare/xxhash/v2"
)
@ -485,3 +486,8 @@ func (b *ScratchBuilder) Labels() Labels {
func (b *ScratchBuilder) Overwrite(ls *Labels) {
*ls = append((*ls)[:0], b.add...)
}
// SizeOfLabels returns the approximate space required for n copies of a label.
func SizeOfLabels(value string, n uint64) uint64 {
return (uint64(len(value)) + uint64(unsafe.Sizeof(value))) * n
}

View file

@ -805,3 +805,8 @@ func (b *ScratchBuilder) Overwrite(ls *Labels) {
ls.syms = b.syms.nameTable
ls.data = yoloString(b.overwriteBuffer)
}
// SizeOfLabels returns the approximate space required for n copies of a label.
func SizeOfLabels(value string, n uint64) uint64 {
return uint64(len(value)) + n*2 // Assuming most symbol-table entries are 2 bytes long.
}

View file

@ -701,3 +701,8 @@ func NewScratchBuilderWithSymbolTable(_ *SymbolTable, n int) ScratchBuilder {
func (b *ScratchBuilder) SetSymbolTable(_ *SymbolTable) {
// no-op
}
// SizeOfLabels returns the approximate space required for n copies of a label.
func SizeOfLabels(value string, n uint64) uint64 {
return uint64(len(value)+sizeVarint(uint64(len(value)))) * n
}

View file

@ -1019,7 +1019,7 @@ func (h *Head) PostingsCardinalityStats(statsByLabelName string, limit int) *ind
return h.cardinalityCache
}
h.cardinalityCacheKey = cacheKey
h.cardinalityCache = h.postings.Stats(statsByLabelName, limit)
h.cardinalityCache = h.postings.Stats(statsByLabelName, limit, labels.SizeOfLabels)
h.lastPostingsStatsCall = time.Duration(time.Now().Unix()) * time.Second
return h.cardinalityCache

View file

@ -163,7 +163,8 @@ type PostingsStats struct {
}
// Stats calculates the cardinality statistics from postings.
func (p *MemPostings) Stats(label string, limit int) *PostingsStats {
// Caller can pass in a function which computes the space required for n series with a given label.
func (p *MemPostings) Stats(label string, limit int, bytes func(string, uint64) uint64) *PostingsStats {
var size uint64
p.mtx.RLock()
@ -191,7 +192,7 @@ func (p *MemPostings) Stats(label string, limit int) *PostingsStats {
}
seriesCnt := uint64(len(values))
labelValuePairs.push(Stat{Name: n + "=" + name, Count: seriesCnt})
size += uint64(len(name)) * seriesCnt
size += bytes(name, seriesCnt)
}
labelValueLength.push(Stat{Name: n, Count: size})
}

View file

@ -935,7 +935,7 @@ func BenchmarkPostings_Stats(b *testing.B) {
}
b.ResetTimer()
for n := 0; n < b.N; n++ {
p.Stats("__name__", 10)
p.Stats("__name__", 10, labels.SizeOfLabels)
}
}
@ -950,7 +950,7 @@ func TestMemPostingsStats(t *testing.T) {
p.Add(2, labels.FromStrings("label", "value1"))
// call the Stats method to calculate the cardinality statistics
stats := p.Stats("label", 10)
stats := p.Stats("label", 10, func(s string, n uint64) uint64 { return uint64(len(s)) * n })
// assert that the expected statistics were calculated
require.Equal(t, uint64(2), stats.CardinalityMetricsStats[0].Count)