From 082c17b691e2b1926b8968bed47476a6b686c47b Mon Sep 17 00:00:00 2001 From: Ganesh Vernekar <15064823+codesome@users.noreply.github.com> Date: Thu, 25 Jun 2020 18:40:29 +0530 Subject: [PATCH] Introduce SortedLabelValues/LabelValues to speedup queries for high cardinality (#7448) * Introduce LabelValuesUnsorted to speedup queries for high cardinality Signed-off-by: Ganesh Vernekar * Add sort check Signed-off-by: Ganesh Vernekar --- tsdb/block.go | 10 +++++++++- tsdb/cmd/tsdb/main.go | 6 +++--- tsdb/head.go | 11 ++++++++++- tsdb/head_test.go | 2 +- tsdb/index/index.go | 12 +++++++++++- tsdb/index/index_test.go | 2 +- tsdb/querier.go | 18 +++++++++++++++++- tsdb/querier_test.go | 12 +++++++++++- 8 files changed, 63 insertions(+), 10 deletions(-) diff --git a/tsdb/block.go b/tsdb/block.go index 2d26f9038..a4b03f07f 100644 --- a/tsdb/block.go +++ b/tsdb/block.go @@ -62,7 +62,10 @@ type IndexReader interface { // beyond the lifetime of the index reader. Symbols() index.StringIter - // LabelValues returns sorted possible label values. + // SortedLabelValues returns sorted possible label values. + SortedLabelValues(name string) ([]string, error) + + // LabelValues returns possible label values which may not be sorted. LabelValues(name string) ([]string, error) // Postings returns the postings list iterator for the label pairs. @@ -419,6 +422,11 @@ func (r blockIndexReader) Symbols() index.StringIter { return r.ir.Symbols() } +func (r blockIndexReader) SortedLabelValues(name string) ([]string, error) { + st, err := r.ir.SortedLabelValues(name) + return st, errors.Wrapf(err, "block: %s", r.b.Meta().ULID) +} + func (r blockIndexReader) LabelValues(name string) ([]string, error) { st, err := r.ir.LabelValues(name) return st, errors.Wrapf(err, "block: %s", r.b.Meta().ULID) diff --git a/tsdb/cmd/tsdb/main.go b/tsdb/cmd/tsdb/main.go index 1286c3a8f..238244f1d 100644 --- a/tsdb/cmd/tsdb/main.go +++ b/tsdb/cmd/tsdb/main.go @@ -556,7 +556,7 @@ func analyzeBlock(b tsdb.BlockReader, limit int) error { postingInfos = postingInfos[:0] for _, n := range allLabelNames { - values, err := ir.LabelValues(n) + values, err := ir.SortedLabelValues(n) if err != nil { return err } @@ -572,7 +572,7 @@ func analyzeBlock(b tsdb.BlockReader, limit int) error { postingInfos = postingInfos[:0] for _, n := range allLabelNames { - lv, err := ir.LabelValues(n) + lv, err := ir.SortedLabelValues(n) if err != nil { return err } @@ -582,7 +582,7 @@ func analyzeBlock(b tsdb.BlockReader, limit int) error { printInfo(postingInfos) postingInfos = postingInfos[:0] - lv, err := ir.LabelValues("__name__") + lv, err := ir.SortedLabelValues("__name__") if err != nil { return err } diff --git a/tsdb/head.go b/tsdb/head.go index 0a16671ea..f02ef767f 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -1537,6 +1537,16 @@ func (h *headIndexReader) Symbols() index.StringIter { return index.NewStringListIter(res) } +// SortedLabelValues returns label values present in the head for the +// specific label name that are within the time range mint to maxt. +func (h *headIndexReader) SortedLabelValues(name string) ([]string, error) { + values, err := h.LabelValues(name) + if err == nil { + sort.Strings(values) + } + return values, err +} + // LabelValues returns label values present in the head for the // specific label name that are within the time range mint to maxt. func (h *headIndexReader) LabelValues(name string) ([]string, error) { @@ -1552,7 +1562,6 @@ func (h *headIndexReader) LabelValues(name string) ([]string, error) { sl = append(sl, s) } h.head.symMtx.RUnlock() - sort.Strings(sl) return sl, nil } diff --git a/tsdb/head_test.go b/tsdb/head_test.go index f3071a568..6c5e46658 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -1867,7 +1867,7 @@ func TestHeadLabelNamesValuesWithMinMaxRange(t *testing.T) { testutil.Equals(t, tt.expectedNames, actualLabelNames) if len(tt.expectedValues) > 0 { for i, name := range expectedLabelNames { - actualLabelValue, err := headIdxReader.LabelValues(name) + actualLabelValue, err := headIdxReader.SortedLabelValues(name) testutil.Ok(t, err) testutil.Equals(t, []string{tt.expectedValues[i]}, actualLabelValue) } diff --git a/tsdb/index/index.go b/tsdb/index/index.go index 744f24f88..20f94610c 100644 --- a/tsdb/index/index.go +++ b/tsdb/index/index.go @@ -1418,6 +1418,17 @@ func (r *Reader) SymbolTableSize() uint64 { return uint64(r.symbols.Size()) } +// SortedLabelValues returns value tuples that exist for the given label name. +// It is not safe to use the return value beyond the lifetime of the byte slice +// passed into the Reader. +func (r *Reader) SortedLabelValues(name string) ([]string, error) { + values, err := r.LabelValues(name) + if err == nil && r.version == FormatV1 { + sort.Strings(values) + } + return values, err +} + // LabelValues returns value tuples that exist for the given label name. // It is not safe to use the return value beyond the lifetime of the byte slice // passed into the Reader. @@ -1431,7 +1442,6 @@ func (r *Reader) LabelValues(name string) ([]string, error) { for k := range e { values = append(values, k) } - sort.Strings(values) return values, nil } diff --git a/tsdb/index/index_test.go b/tsdb/index/index_test.go index e0a3c1967..969b3627c 100644 --- a/tsdb/index/index_test.go +++ b/tsdb/index/index_test.go @@ -453,7 +453,7 @@ func TestPersistence_index_e2e(t *testing.T) { for k, v := range labelPairs { sort.Strings(v) - res, err := ir.LabelValues(k) + res, err := ir.SortedLabelValues(k) testutil.Ok(t, err) testutil.Equals(t, len(v), len(res)) diff --git a/tsdb/querier.go b/tsdb/querier.go index 9d7ac8c46..70a65a570 100644 --- a/tsdb/querier.go +++ b/tsdb/querier.go @@ -208,7 +208,7 @@ func (q *blockQuerier) Select(sortSeries bool, hints *storage.SelectHints, ms .. } func (q *blockQuerier) LabelValues(name string) ([]string, storage.Warnings, error) { - res, err := q.index.LabelValues(name) + res, err := q.index.SortedLabelValues(name) return res, nil, err } @@ -393,9 +393,14 @@ func postingsForMatcher(ix IndexReader, m *labels.Matcher) (index.Postings, erro } var res []string + lastVal, isSorted := "", true for _, val := range vals { if m.Matches(val) { res = append(res, val) + if isSorted && val < lastVal { + isSorted = false + } + lastVal = val } } @@ -403,6 +408,9 @@ func postingsForMatcher(ix IndexReader, m *labels.Matcher) (index.Postings, erro return index.EmptyPostings(), nil } + if !isSorted { + sort.Strings(res) + } return ix.Postings(m.Name, res...) } @@ -414,12 +422,20 @@ func inversePostingsForMatcher(ix IndexReader, m *labels.Matcher) (index.Posting } var res []string + lastVal, isSorted := "", true for _, val := range vals { if !m.Matches(val) { res = append(res, val) + if isSorted && val < lastVal { + isSorted = false + } + lastVal = val } } + if !isSorted { + sort.Strings(res) + } return ix.Postings(m.Name, res...) } diff --git a/tsdb/querier_test.go b/tsdb/querier_test.go index 795fbb93f..743255eb1 100644 --- a/tsdb/querier_test.go +++ b/tsdb/querier_test.go @@ -1400,6 +1400,12 @@ func (m mockIndex) Close() error { return nil } +func (m mockIndex) SortedLabelValues(name string) ([]string, error) { + values, _ := m.LabelValues(name) + sort.Strings(values) + return values, nil +} + func (m mockIndex) LabelValues(name string) ([]string, error) { values := []string{} for l := range m.postings { @@ -1407,7 +1413,6 @@ func (m mockIndex) LabelValues(name string) ([]string, error) { values = append(values, l.Value) } } - sort.Strings(values) return values, nil } @@ -2278,6 +2283,11 @@ func (m mockMatcherIndex) Symbols() index.StringIter { return nil } func (m mockMatcherIndex) Close() error { return nil } +// SortedLabelValues will return error if it is called. +func (m mockMatcherIndex) SortedLabelValues(name string) ([]string, error) { + return []string{}, errors.New("sorted label values called") +} + // LabelValues will return error if it is called. func (m mockMatcherIndex) LabelValues(name string) ([]string, error) { return []string{}, errors.New("label values called")