tsdb: remove duplicate values set to reduce memory usage(map overhead) (#7915)

Signed-off-by: Xiaochao Dong (@damnever) <dxc.wolf@gmail.com>
This commit is contained in:
Xiaochao Dong 2020-09-10 23:05:47 +08:00 committed by GitHub
parent 90fc6be70f
commit a282d25099
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 56 additions and 64 deletions

View file

@ -20,7 +20,6 @@ import (
"path/filepath"
"runtime"
"sort"
"strings"
"sync"
"time"
@ -73,7 +72,6 @@ type Head struct {
symMtx sync.RWMutex
symbols map[string]struct{}
values map[string]stringset // Label names to possible values.
deletedMtx sync.Mutex
deleted map[uint64]int // Deleted series, and what WAL segment they must be kept until.
@ -303,7 +301,6 @@ func NewHead(r prometheus.Registerer, l log.Logger, wal *wal.WAL, chunkRange int
wal: wal,
logger: l,
series: newStripeSeries(stripeSize, seriesCallback),
values: map[string]stringset{},
symbols: map[string]struct{}{},
postings: index.NewUnorderedMemPostings(),
tombstones: tombstones.NewMemTombstones(),
@ -1345,24 +1342,15 @@ func (h *Head) gc() {
defer h.symMtx.Unlock()
symbols := make(map[string]struct{}, len(h.symbols))
values := make(map[string]stringset, len(h.values))
if err := h.postings.Iter(func(t labels.Label, _ index.Postings) error {
symbols[t.Name] = struct{}{}
symbols[t.Value] = struct{}{}
ss, ok := values[t.Name]
if !ok {
ss = stringset{}
values[t.Name] = ss
}
ss.set(t.Value)
if err := h.postings.Iter(func(l labels.Label, _ index.Postings) error {
symbols[l.Name] = struct{}{}
symbols[l.Value] = struct{}{}
return nil
}); err != nil {
// This should never happen, as the iteration function only returns nil.
panic(err)
}
h.symbols = symbols
h.values = values
}
// Tombstones returns a new reader over the head's tombstones
@ -1572,37 +1560,27 @@ func (h *headIndexReader) SortedLabelValues(name string) ([]string, error) {
// specific label name that are within the time range mint to maxt.
func (h *headIndexReader) LabelValues(name string) ([]string, error) {
h.head.symMtx.RLock()
defer h.head.symMtx.RUnlock()
if h.maxt < h.head.MinTime() || h.mint > h.head.MaxTime() {
h.head.symMtx.RUnlock()
return []string{}, nil
}
sl := make([]string, 0, len(h.head.values[name]))
for s := range h.head.values[name] {
sl = append(sl, s)
}
h.head.symMtx.RUnlock()
return sl, nil
values := h.head.postings.LabelValues(name)
return values, nil
}
// LabelNames returns all the unique label names present in the head
// that are within the time range mint to maxt.
func (h *headIndexReader) LabelNames() ([]string, error) {
h.head.symMtx.RLock()
defer h.head.symMtx.RUnlock()
if h.maxt < h.head.MinTime() || h.mint > h.head.MaxTime() {
h.head.symMtx.RUnlock()
return []string{}, nil
}
labelNames := make([]string, 0, len(h.head.values))
for name := range h.head.values {
if name == "" {
continue
}
labelNames = append(labelNames, name)
}
labelNames := h.head.postings.LabelNames()
h.head.symMtx.RUnlock()
sort.Strings(labelNames)
return labelNames, nil
}
@ -1714,13 +1692,6 @@ func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*memSerie
defer h.symMtx.Unlock()
for _, l := range lset {
valset, ok := h.values[l.Name]
if !ok {
valset = stringset{}
h.values[l.Name] = valset
}
valset.set(l.Value)
h.symbols[l.Name] = struct{}{}
h.symbols[l.Value] = struct{}{}
}
@ -2335,25 +2306,6 @@ func (it *memSafeIterator) At() (int64, float64) {
return s.t, s.v
}
type stringset map[string]struct{}
func (ss stringset) set(s string) {
ss[s] = struct{}{}
}
func (ss stringset) String() string {
return strings.Join(ss.slice(), ",")
}
func (ss stringset) slice() []string {
slice := make([]string, 0, len(ss))
for k := range ss {
slice = append(slice, k)
}
sort.Strings(slice)
return slice
}
type mmappedChunk struct {
ref uint64
numSamples uint16

View file

@ -387,11 +387,21 @@ func TestHead_Truncate(t *testing.T) {
"2": {},
}, h.symbols)
testutil.Equals(t, map[string]stringset{
values := map[string]map[string]struct{}{}
for _, name := range h.postings.LabelNames() {
ss, ok := values[name]
if !ok {
ss = map[string]struct{}{}
values[name] = ss
}
for _, value := range h.postings.LabelValues(name) {
ss[value] = struct{}{}
}
}
testutil.Equals(t, map[string]map[string]struct{}{
"a": {"1": struct{}{}, "2": struct{}{}},
"b": {"1": struct{}{}},
"": {"": struct{}{}},
}, h.values)
}, values)
}
// Validate various behaviors brought on by firstChunkID accounting for

View file

@ -79,6 +79,36 @@ func (p *MemPostings) SortedKeys() []labels.Label {
return keys
}
// LabelNames returns all the unique label names.
func (p *MemPostings) LabelNames() []string {
p.mtx.RLock()
defer p.mtx.RUnlock()
n := len(p.m)
if n == 0 {
return nil
}
names := make([]string, 0, n-1)
for name := range p.m {
if name != allPostingsKey.Name {
names = append(names, name)
}
}
return names
}
// LabelValues returns label values for the given name.
func (p *MemPostings) LabelValues(name string) []string {
p.mtx.RLock()
defer p.mtx.RUnlock()
values := make([]string, 0, len(p.m[name]))
for v := range p.m[name] {
values = append(values, v)
}
return values
}
// PostingsStats contains cardinality based statistics for postings.
type PostingsStats struct {
CardinalityMetricsStats []Stat

View file

@ -106,7 +106,7 @@ func createIdxChkReaders(t *testing.T, tc []seriesSamples) (IndexReader, ChunkRe
postings := index.NewMemPostings()
chkReader := mockChunkReader(make(map[uint64]chunkenc.Chunk))
lblIdx := make(map[string]stringset)
lblIdx := make(map[string]map[string]struct{})
mi := newMockIndex()
blockMint := int64(math.MaxInt64)
blockMaxt := int64(math.MinInt64)
@ -145,10 +145,10 @@ func createIdxChkReaders(t *testing.T, tc []seriesSamples) (IndexReader, ChunkRe
for _, l := range ls {
vs, present := lblIdx[l.Name]
if !present {
vs = stringset{}
vs = map[string]struct{}{}
lblIdx[l.Name] = vs
}
vs.set(l.Value)
vs[l.Value] = struct{}{}
}
}