tsdb: remove duplicate values set to reduce memory usage(map overhead) (#7915)

Signed-off-by: Xiaochao Dong (@damnever) <dxc.wolf@gmail.com>
2025-03-05 20:59:13 -08:00 · 2020-09-10 23:05:47 +08:00 · 2020-09-10 23:05:47 +08:00 · a282d25099
parent 90fc6be70f
commit a282d25099
4 changed files with 56 additions and 64 deletions
--- a/tsdb/head.go
+++ b/tsdb/head.go
@ -20,7 +20,6 @@ import (
 	"path/filepath"
 	"runtime"
 	"sort"
 	"strings"
 	"sync"
 	"time"
@ -73,7 +72,6 @@ type Head struct {
 	symMtx  sync.RWMutex
 	symbols map[string]struct{}
 	values  map[string]stringset // Label names to possible values.
 	deletedMtx sync.Mutex
 	deleted    map[uint64]int // Deleted series, and what WAL segment they must be kept until.
@ -303,7 +301,6 @@ func NewHead(r prometheus.Registerer, l log.Logger, wal *wal.WAL, chunkRange int
 		wal:        wal,
 		logger:     l,
 		series:     newStripeSeries(stripeSize, seriesCallback),
 		values:     map[string]stringset{},
 		symbols:    map[string]struct{}{},
 		postings:   index.NewUnorderedMemPostings(),
 		tombstones: tombstones.NewMemTombstones(),
@ -1345,24 +1342,15 @@ func (h *Head) gc() {
 	defer h.symMtx.Unlock()
 	symbols := make(map[string]struct{}, len(h.symbols))
-	values := make(map[string]stringset, len(h.values))
+	if err := h.postings.Iter(func(l labels.Label, _ index.Postings) error {
-	if err := h.postings.Iter(func(t labels.Label, _ index.Postings) error {
+		symbols[l.Name] = struct{}{}
-		symbols[t.Name] = struct{}{}
+		symbols[l.Value] = struct{}{}
 		symbols[t.Value] = struct{}{}
 		ss, ok := values[t.Name]
 		if !ok {
 			ss = stringset{}
 			values[t.Name] = ss
 		}
 		ss.set(t.Value)
 		return nil
 	}); err != nil {
 		// This should never happen, as the iteration function only returns nil.
 		panic(err)
 	}
 	h.symbols = symbols
 	h.values = values
 }
 // Tombstones returns a new reader over the head's tombstones
@ -1572,37 +1560,27 @@ func (h *headIndexReader) SortedLabelValues(name string) ([]string, error) {
 // specific label name that are within the time range mint to maxt.
 func (h *headIndexReader) LabelValues(name string) ([]string, error) {
 	h.head.symMtx.RLock()
-
+	defer h.head.symMtx.RUnlock()
 	if h.maxt < h.head.MinTime() || h.mint > h.head.MaxTime() {
 		h.head.symMtx.RUnlock()
 		return []string{}, nil
 	}
-	sl := make([]string, 0, len(h.head.values[name]))
+	values := h.head.postings.LabelValues(name)
-	for s := range h.head.values[name] {
+	return values, nil
 		sl = append(sl, s)
 	}
 	h.head.symMtx.RUnlock()
 	return sl, nil
 }
 // LabelNames returns all the unique label names present in the head
 // that are within the time range mint to maxt.
 func (h *headIndexReader) LabelNames() ([]string, error) {
 	h.head.symMtx.RLock()
 	defer h.head.symMtx.RUnlock()
 	if h.maxt < h.head.MinTime() || h.mint > h.head.MaxTime() {
 		h.head.symMtx.RUnlock()
 		return []string{}, nil
 	}
-	labelNames := make([]string, 0, len(h.head.values))
+	labelNames := h.head.postings.LabelNames()
-	for name := range h.head.values {
+	h.head.symMtx.RUnlock()
-		if name == "" {
+
 			continue
 		}
 		labelNames = append(labelNames, name)
 	}
 	sort.Strings(labelNames)
 	return labelNames, nil
 }
@ -1714,13 +1692,6 @@ func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*memSerie
 	defer h.symMtx.Unlock()
 	for _, l := range lset {
 		valset, ok := h.values[l.Name]
 		if !ok {
 			valset = stringset{}
 			h.values[l.Name] = valset
 		}
 		valset.set(l.Value)
 		h.symbols[l.Name] = struct{}{}
 		h.symbols[l.Value] = struct{}{}
 	}
@ -2335,25 +2306,6 @@ func (it *memSafeIterator) At() (int64, float64) {
 	return s.t, s.v
 }
 type stringset map[string]struct{}
 func (ss stringset) set(s string) {
 	ss[s] = struct{}{}
 }
 func (ss stringset) String() string {
 	return strings.Join(ss.slice(), ",")
 }
 func (ss stringset) slice() []string {
 	slice := make([]string, 0, len(ss))
 	for k := range ss {
 		slice = append(slice, k)
 	}
 	sort.Strings(slice)
 	return slice
 }
 type mmappedChunk struct {
 	ref              uint64
 	numSamples       uint16
--- a/tsdb/head_test.go
+++ b/tsdb/head_test.go
@ -387,11 +387,21 @@ func TestHead_Truncate(t *testing.T) {
 		"2": {},
 	}, h.symbols)
-	testutil.Equals(t, map[string]stringset{
+	values := map[string]map[string]struct{}{}
 	for _, name := range h.postings.LabelNames() {
 		ss, ok := values[name]
 		if !ok {
 			ss = map[string]struct{}{}
 			values[name] = ss
 		}
 		for _, value := range h.postings.LabelValues(name) {
 			ss[value] = struct{}{}
 		}
 	}
 	testutil.Equals(t, map[string]map[string]struct{}{
 		"a": {"1": struct{}{}, "2": struct{}{}},
 		"b": {"1": struct{}{}},
-		"":  {"": struct{}{}},
+	}, values)
 	}, h.values)
 }
 // Validate various behaviors brought on by firstChunkID accounting for
--- a/tsdb/index/postings.go
+++ b/tsdb/index/postings.go
@ -79,6 +79,36 @@ func (p *MemPostings) SortedKeys() []labels.Label {
 	return keys
 }
 // LabelNames returns all the unique label names.
 func (p *MemPostings) LabelNames() []string {
 	p.mtx.RLock()
 	defer p.mtx.RUnlock()
 	n := len(p.m)
 	if n == 0 {
 		return nil
 	}
 	names := make([]string, 0, n-1)
 	for name := range p.m {
 		if name != allPostingsKey.Name {
 			names = append(names, name)
 		}
 	}
 	return names
 }
 // LabelValues returns label values for the given name.
 func (p *MemPostings) LabelValues(name string) []string {
 	p.mtx.RLock()
 	defer p.mtx.RUnlock()
 	values := make([]string, 0, len(p.m[name]))
 	for v := range p.m[name] {
 		values = append(values, v)
 	}
 	return values
 }
 // PostingsStats contains cardinality based statistics for postings.
 type PostingsStats struct {
 	CardinalityMetricsStats []Stat
--- a/tsdb/querier_test.go
+++ b/tsdb/querier_test.go
@ -106,7 +106,7 @@ func createIdxChkReaders(t *testing.T, tc []seriesSamples) (IndexReader, ChunkRe
 	postings := index.NewMemPostings()
 	chkReader := mockChunkReader(make(map[uint64]chunkenc.Chunk))
-	lblIdx := make(map[string]stringset)
+	lblIdx := make(map[string]map[string]struct{})
 	mi := newMockIndex()
 	blockMint := int64(math.MaxInt64)
 	blockMaxt := int64(math.MinInt64)
@ -145,10 +145,10 @@ func createIdxChkReaders(t *testing.T, tc []seriesSamples) (IndexReader, ChunkRe
 		for _, l := range ls {
 			vs, present := lblIdx[l.Name]
 			if !present {
-				vs = stringset{}
+				vs = map[string]struct{}{}
 				lblIdx[l.Name] = vs
 			}
-			vs.set(l.Value)
+			vs[l.Value] = struct{}{}
 		}
 	}