tsdb: remove duplicate values set to reduce memory usage(map overhead) (#7915)

Signed-off-by: Xiaochao Dong (@damnever) <dxc.wolf@gmail.com>
2025-03-05 20:59:13 -08:00 · 2020-09-10 23:05:47 +08:00 · 2020-09-10 23:05:47 +08:00 · a282d25099
parent 90fc6be70f
commit a282d25099
4 changed files with 56 additions and 64 deletions
--- a/tsdb/head.go
+++ b/tsdb/head.go
@ -20,7 +20,6 @@ import (
 	"path/filepath"
 	"runtime"
 	"sort"
-	"strings"
 	"sync"
 	"time"

@ -73,7 +72,6 @@ type Head struct {

 	symMtx  sync.RWMutex
 	symbols map[string]struct{}
-	values  map[string]stringset // Label names to possible values.

 	deletedMtx sync.Mutex
 	deleted    map[uint64]int // Deleted series, and what WAL segment they must be kept until.
@ -303,7 +301,6 @@ func NewHead(r prometheus.Registerer, l log.Logger, wal *wal.WAL, chunkRange int
 		wal:        wal,
 		logger:     l,
 		series:     newStripeSeries(stripeSize, seriesCallback),
-		values:     map[string]stringset{},
 		symbols:    map[string]struct{}{},
 		postings:   index.NewUnorderedMemPostings(),
 		tombstones: tombstones.NewMemTombstones(),
@ -1345,24 +1342,15 @@ func (h *Head) gc() {
 	defer h.symMtx.Unlock()

 	symbols := make(map[string]struct{}, len(h.symbols))
-	values := make(map[string]stringset, len(h.values))
-	if err := h.postings.Iter(func(t labels.Label, _ index.Postings) error {
-		symbols[t.Name] = struct{}{}
-		symbols[t.Value] = struct{}{}
-
-		ss, ok := values[t.Name]
-		if !ok {
-			ss = stringset{}
-			values[t.Name] = ss
-		}
-		ss.set(t.Value)
+	if err := h.postings.Iter(func(l labels.Label, _ index.Postings) error {
+		symbols[l.Name] = struct{}{}
+		symbols[l.Value] = struct{}{}
 		return nil
 	}); err != nil {
 		// This should never happen, as the iteration function only returns nil.
 		panic(err)
 	}
 	h.symbols = symbols
-	h.values = values
 }

 // Tombstones returns a new reader over the head's tombstones
@ -1572,37 +1560,27 @@ func (h *headIndexReader) SortedLabelValues(name string) ([]string, error) {
 // specific label name that are within the time range mint to maxt.
 func (h *headIndexReader) LabelValues(name string) ([]string, error) {
 	h.head.symMtx.RLock()
-
+	defer h.head.symMtx.RUnlock()
 	if h.maxt < h.head.MinTime() || h.mint > h.head.MaxTime() {
-		h.head.symMtx.RUnlock()
 		return []string{}, nil
 	}

-	sl := make([]string, 0, len(h.head.values[name]))
-	for s := range h.head.values[name] {
-		sl = append(sl, s)
-	}
-	h.head.symMtx.RUnlock()
-	return sl, nil
+	values := h.head.postings.LabelValues(name)
+	return values, nil
 }

 // LabelNames returns all the unique label names present in the head
 // that are within the time range mint to maxt.
 func (h *headIndexReader) LabelNames() ([]string, error) {
 	h.head.symMtx.RLock()
-	defer h.head.symMtx.RUnlock()
-
 	if h.maxt < h.head.MinTime() || h.mint > h.head.MaxTime() {
+		h.head.symMtx.RUnlock()
 		return []string{}, nil
 	}

-	labelNames := make([]string, 0, len(h.head.values))
-	for name := range h.head.values {
-		if name == "" {
-			continue
-		}
-		labelNames = append(labelNames, name)
-	}
+	labelNames := h.head.postings.LabelNames()
+	h.head.symMtx.RUnlock()
+
 	sort.Strings(labelNames)
 	return labelNames, nil
 }
@ -1714,13 +1692,6 @@ func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*memSerie
 	defer h.symMtx.Unlock()

 	for _, l := range lset {
-		valset, ok := h.values[l.Name]
-		if !ok {
-			valset = stringset{}
-			h.values[l.Name] = valset
-		}
-		valset.set(l.Value)
-
 		h.symbols[l.Name] = struct{}{}
 		h.symbols[l.Value] = struct{}{}
 	}
@ -2335,25 +2306,6 @@ func (it *memSafeIterator) At() (int64, float64) {
 	return s.t, s.v
 }

-type stringset map[string]struct{}
-
-func (ss stringset) set(s string) {
-	ss[s] = struct{}{}
-}
-
-func (ss stringset) String() string {
-	return strings.Join(ss.slice(), ",")
-}
-
-func (ss stringset) slice() []string {
-	slice := make([]string, 0, len(ss))
-	for k := range ss {
-		slice = append(slice, k)
-	}
-	sort.Strings(slice)
-	return slice
-}
-
 type mmappedChunk struct {
 	ref              uint64
 	numSamples       uint16
--- a/tsdb/head_test.go
+++ b/tsdb/head_test.go
@ -387,11 +387,21 @@ func TestHead_Truncate(t *testing.T) {
 		"2": {},
 	}, h.symbols)

-	testutil.Equals(t, map[string]stringset{
+	values := map[string]map[string]struct{}{}
+	for _, name := range h.postings.LabelNames() {
+		ss, ok := values[name]
+		if !ok {
+			ss = map[string]struct{}{}
+			values[name] = ss
+		}
+		for _, value := range h.postings.LabelValues(name) {
+			ss[value] = struct{}{}
+		}
+	}
+	testutil.Equals(t, map[string]map[string]struct{}{
 		"a": {"1": struct{}{}, "2": struct{}{}},
 		"b": {"1": struct{}{}},
-		"":  {"": struct{}{}},
-	}, h.values)
+	}, values)
 }

 // Validate various behaviors brought on by firstChunkID accounting for
--- a/tsdb/index/postings.go
+++ b/tsdb/index/postings.go
@ -79,6 +79,36 @@ func (p *MemPostings) SortedKeys() []labels.Label {
 	return keys
 }

+// LabelNames returns all the unique label names.
+func (p *MemPostings) LabelNames() []string {
+	p.mtx.RLock()
+	defer p.mtx.RUnlock()
+	n := len(p.m)
+	if n == 0 {
+		return nil
+	}
+
+	names := make([]string, 0, n-1)
+	for name := range p.m {
+		if name != allPostingsKey.Name {
+			names = append(names, name)
+		}
+	}
+	return names
+}
+
+// LabelValues returns label values for the given name.
+func (p *MemPostings) LabelValues(name string) []string {
+	p.mtx.RLock()
+	defer p.mtx.RUnlock()
+
+	values := make([]string, 0, len(p.m[name]))
+	for v := range p.m[name] {
+		values = append(values, v)
+	}
+	return values
+}
+
 // PostingsStats contains cardinality based statistics for postings.
 type PostingsStats struct {
 	CardinalityMetricsStats []Stat
--- a/tsdb/querier_test.go
+++ b/tsdb/querier_test.go
@ -106,7 +106,7 @@ func createIdxChkReaders(t *testing.T, tc []seriesSamples) (IndexReader, ChunkRe

 	postings := index.NewMemPostings()
 	chkReader := mockChunkReader(make(map[uint64]chunkenc.Chunk))
-	lblIdx := make(map[string]stringset)
+	lblIdx := make(map[string]map[string]struct{})
 	mi := newMockIndex()
 	blockMint := int64(math.MaxInt64)
 	blockMaxt := int64(math.MinInt64)
@ -145,10 +145,10 @@ func createIdxChkReaders(t *testing.T, tc []seriesSamples) (IndexReader, ChunkRe
 		for _, l := range ls {
 			vs, present := lblIdx[l.Name]
 			if !present {
-				vs = stringset{}
+				vs = map[string]struct{}{}
 				lblIdx[l.Name] = vs
 			}
-			vs.set(l.Value)
+			vs[l.Value] = struct{}{}
 		}
 	}