change labelset comparison in promql engine to avoid false positive during detection of duplicates (#7058)

* Use go1.14 new hash/maphash to hash both RHS and LHS instead of XOR'ing
which has been resulting in hash collisions.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Refactor engine labelset signature generation, just use labels.Labels
instead of hashes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Address review comments; function comments + store result of
lhs.String+rhs.String as key.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Replace all signatureFunc usage with signatureFuncString.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Make optimizations to labels String function and generation of rhs+lhs
as string in resultMetric.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Use separate string functions that don't use strconv just for engine
maps.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Use a byte invalid separator instead of quoting and have a buffer
attached to EvalNodeHelper instead of using a global pool in the labels
package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Address review comments.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Address more review comments, labels has a function that now builds a
byte slice without turning it into a string.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Use two different non-ascii hex codes as byte separators between labels
and between sets of labels when building bytes of a Labels struct.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* We only need the 2nd byte invalid sep. at the beginning of a
labels.Bytes

Signed-off-by: Callum Styan <callumstyan@gmail.com>
This commit is contained in:
Callum Styan 2020-05-12 14:03:15 -07:00 committed by GitHub
parent da217cbde2
commit 5bb7f00d00
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 124 additions and 53 deletions

View file

@ -22,14 +22,15 @@ import (
"github.com/cespare/xxhash" "github.com/cespare/xxhash"
) )
const sep = '\xff'
// Well-known label names used by Prometheus components. // Well-known label names used by Prometheus components.
const ( const (
MetricName = "__name__" MetricName = "__name__"
AlertName = "alertname" AlertName = "alertname"
BucketLabel = "le" BucketLabel = "le"
InstanceName = "instance" InstanceName = "instance"
sep = '\xff'
labelSep = '\xfe'
) )
// Label is a key/value pair of strings. // Label is a key/value pair of strings.
@ -59,10 +60,25 @@ func (ls Labels) String() string {
b.WriteString(strconv.Quote(l.Value)) b.WriteString(strconv.Quote(l.Value))
} }
b.WriteByte('}') b.WriteByte('}')
return b.String() return b.String()
} }
// Bytes returns ls as a byte slice.
// It uses an byte invalid character as a separator and so should not be used for printing.
func (ls Labels) Bytes(buf []byte) []byte {
b := bytes.NewBuffer(buf[:0])
b.WriteByte(labelSep)
for i, l := range ls {
if i > 0 {
b.WriteByte(sep)
}
b.WriteString(l.Name)
b.WriteByte(sep)
b.WriteString(l.Value)
}
return b.Bytes()
}
// MarshalJSON implements json.Marshaler. // MarshalJSON implements json.Marshaler.
func (ls Labels) MarshalJSON() ([]byte, error) { func (ls Labels) MarshalJSON() ([]byte, error) {
return json.Marshal(ls.Map()) return json.Marshal(ls.Map())
@ -172,6 +188,44 @@ func (ls Labels) HashWithoutLabels(b []byte, names ...string) (uint64, []byte) {
return xxhash.Sum64(b), b return xxhash.Sum64(b), b
} }
// WithLabels returns a new labels.Labels from ls that only contains labels matching names.
// 'names' have to be sorted in ascending order.
func (ls Labels) WithLabels(names ...string) Labels {
ret := make([]Label, 0, len(ls))
i, j := 0, 0
for i < len(ls) && j < len(names) {
if names[j] < ls[i].Name {
j++
} else if ls[i].Name < names[j] {
i++
} else {
ret = append(ret, ls[i])
i++
j++
}
}
return ret
}
// WithLabels returns a new labels.Labels from ls that contains labels not matching names.
// 'names' have to be sorted in ascending order.
func (ls Labels) WithoutLabels(names ...string) Labels {
ret := make([]Label, 0, len(ls))
j := 0
for i := range ls {
for j < len(names) && names[j] < ls[i].Name {
j++
}
if ls[i].Name == MetricName || (j < len(names) && ls[i].Name == names[j]) {
continue
}
ret = append(ret, ls[i])
}
return ret
}
// Copy returns a copy of the labels. // Copy returns a copy of the labels.
func (ls Labels) Copy() Labels { func (ls Labels) Copy() Labels {
res := make(Labels, len(ls)) res := make(Labels, len(ls))

View file

@ -14,6 +14,7 @@
package promql package promql
import ( import (
"bytes"
"container/heap" "container/heap"
"context" "context"
"fmt" "fmt"
@ -831,16 +832,20 @@ type EvalNodeHelper struct {
// dropMetricName and label_*. // dropMetricName and label_*.
dmn map[uint64]labels.Labels dmn map[uint64]labels.Labels
// signatureFunc. // signatureFunc.
sigf map[uint64]uint64 sigf map[string]string
// funcHistogramQuantile. // funcHistogramQuantile.
signatureToMetricWithBuckets map[uint64]*metricWithBuckets signatureToMetricWithBuckets map[string]*metricWithBuckets
// label_replace. // label_replace.
regex *regexp.Regexp regex *regexp.Regexp
lb *labels.Builder
lblBuf []byte
lblResultBuf []byte
// For binary vector matching. // For binary vector matching.
rightSigs map[uint64]Sample rightSigs map[string]Sample
matchedSigs map[uint64]map[uint64]struct{} matchedSigs map[string]map[uint64]struct{}
resultMetric map[uint64]labels.Labels resultMetric map[string]labels.Labels
} }
// dropMetricName is a cached version of dropMetricName. // dropMetricName is a cached version of dropMetricName.
@ -858,20 +863,19 @@ func (enh *EvalNodeHelper) dropMetricName(l labels.Labels) labels.Labels {
return ret return ret
} }
// signatureFunc is a cached version of signatureFunc. func (enh *EvalNodeHelper) signatureFunc(on bool, names ...string) func(labels.Labels) string {
func (enh *EvalNodeHelper) signatureFunc(on bool, names ...string) func(labels.Labels) uint64 {
if enh.sigf == nil { if enh.sigf == nil {
enh.sigf = make(map[uint64]uint64, len(enh.out)) enh.sigf = make(map[string]string, len(enh.out))
} }
f := signatureFunc(on, names...) f := signatureFunc(on, enh.lblBuf, names...)
return func(l labels.Labels) uint64 { return func(l labels.Labels) string {
h := l.Hash() enh.lblBuf = l.Bytes(enh.lblBuf)
ret, ok := enh.sigf[h] ret, ok := enh.sigf[string(enh.lblBuf)]
if ok { if ok {
return ret return ret
} }
ret = f(l) ret = f(l)
enh.sigf[h] = ret enh.sigf[string(enh.lblBuf)] = ret
return ret return ret
} }
} }
@ -1527,7 +1531,7 @@ func (ev *evaluator) VectorAnd(lhs, rhs Vector, matching *parser.VectorMatching,
sigf := enh.signatureFunc(matching.On, matching.MatchingLabels...) sigf := enh.signatureFunc(matching.On, matching.MatchingLabels...)
// The set of signatures for the right-hand side Vector. // The set of signatures for the right-hand side Vector.
rightSigs := map[uint64]struct{}{} rightSigs := map[string]struct{}{}
// Add all rhs samples to a map so we can easily find matches later. // Add all rhs samples to a map so we can easily find matches later.
for _, rs := range rhs { for _, rs := range rhs {
rightSigs[sigf(rs.Metric)] = struct{}{} rightSigs[sigf(rs.Metric)] = struct{}{}
@ -1548,7 +1552,7 @@ func (ev *evaluator) VectorOr(lhs, rhs Vector, matching *parser.VectorMatching,
} }
sigf := enh.signatureFunc(matching.On, matching.MatchingLabels...) sigf := enh.signatureFunc(matching.On, matching.MatchingLabels...)
leftSigs := map[uint64]struct{}{} leftSigs := map[string]struct{}{}
// Add everything from the left-hand-side Vector. // Add everything from the left-hand-side Vector.
for _, ls := range lhs { for _, ls := range lhs {
leftSigs[sigf(ls.Metric)] = struct{}{} leftSigs[sigf(ls.Metric)] = struct{}{}
@ -1569,7 +1573,7 @@ func (ev *evaluator) VectorUnless(lhs, rhs Vector, matching *parser.VectorMatchi
} }
sigf := enh.signatureFunc(matching.On, matching.MatchingLabels...) sigf := enh.signatureFunc(matching.On, matching.MatchingLabels...)
rightSigs := map[uint64]struct{}{} rightSigs := map[string]struct{}{}
for _, rs := range rhs { for _, rs := range rhs {
rightSigs[sigf(rs.Metric)] = struct{}{} rightSigs[sigf(rs.Metric)] = struct{}{}
} }
@ -1598,7 +1602,7 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching *
// All samples from the rhs hashed by the matching label/values. // All samples from the rhs hashed by the matching label/values.
if enh.rightSigs == nil { if enh.rightSigs == nil {
enh.rightSigs = make(map[uint64]Sample, len(enh.out)) enh.rightSigs = make(map[string]Sample, len(enh.out))
} else { } else {
for k := range enh.rightSigs { for k := range enh.rightSigs {
delete(enh.rightSigs, k) delete(enh.rightSigs, k)
@ -1628,7 +1632,7 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching *
// Tracks the match-signature. For one-to-one operations the value is nil. For many-to-one // Tracks the match-signature. For one-to-one operations the value is nil. For many-to-one
// the value is a set of signatures to detect duplicated result elements. // the value is a set of signatures to detect duplicated result elements.
if enh.matchedSigs == nil { if enh.matchedSigs == nil {
enh.matchedSigs = make(map[uint64]map[uint64]struct{}, len(rightSigs)) enh.matchedSigs = make(map[string]map[uint64]struct{}, len(rightSigs))
} else { } else {
for k := range enh.matchedSigs { for k := range enh.matchedSigs {
delete(enh.matchedSigs, k) delete(enh.matchedSigs, k)
@ -1662,7 +1666,6 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching *
continue continue
} }
metric := resultMetric(ls.Metric, rs.Metric, op, matching, enh) metric := resultMetric(ls.Metric, rs.Metric, op, matching, enh)
insertedSigs, exists := matchedSigs[sig] insertedSigs, exists := matchedSigs[sig]
if matching.Card == parser.CardOneToOne { if matching.Card == parser.CardOneToOne {
if exists { if exists {
@ -1692,19 +1695,15 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching *
return enh.out return enh.out
} }
// signatureFunc returns a function that calculates the signature for a metric func signatureFunc(on bool, b []byte, names ...string) func(labels.Labels) string {
// ignoring the provided labels. If on, then the given labels are only used instead.
func signatureFunc(on bool, names ...string) func(labels.Labels) uint64 {
sort.Strings(names) sort.Strings(names)
if on { if on {
return func(lset labels.Labels) uint64 { return func(lset labels.Labels) string {
h, _ := lset.HashForLabels(make([]byte, 0, 1024), names...) return string(lset.WithLabels(names...).Bytes(b))
return h
} }
} }
return func(lset labels.Labels) uint64 { return func(lset labels.Labels) string {
h, _ := lset.HashWithoutLabels(make([]byte, 0, 1024), names...) return string(lset.WithoutLabels(names...).Bytes(b))
return h
} }
} }
@ -1712,22 +1711,29 @@ func signatureFunc(on bool, names ...string) func(labels.Labels) uint64 {
// binary operation and the matching options. // binary operation and the matching options.
func resultMetric(lhs, rhs labels.Labels, op parser.ItemType, matching *parser.VectorMatching, enh *EvalNodeHelper) labels.Labels { func resultMetric(lhs, rhs labels.Labels, op parser.ItemType, matching *parser.VectorMatching, enh *EvalNodeHelper) labels.Labels {
if enh.resultMetric == nil { if enh.resultMetric == nil {
enh.resultMetric = make(map[uint64]labels.Labels, len(enh.out)) enh.resultMetric = make(map[string]labels.Labels, len(enh.out))
} }
// op and matching are always the same for a given node, so
// there's no need to include them in the hash key. if enh.lb == nil {
// If the lhs and rhs are the same then the xor would be 0, enh.lb = labels.NewBuilder(lhs)
// so add in one side to protect against that. } else {
lh := lhs.Hash() enh.lb.Reset(lhs)
h := (lh ^ rhs.Hash()) + lh }
if ret, ok := enh.resultMetric[h]; ok {
buf := bytes.NewBuffer(enh.lblResultBuf[:0])
enh.lblBuf = lhs.Bytes(enh.lblBuf)
buf.Write(enh.lblBuf)
enh.lblBuf = rhs.Bytes(enh.lblBuf)
buf.Write(enh.lblBuf)
enh.lblResultBuf = buf.Bytes()
if ret, ok := enh.resultMetric[string(enh.lblResultBuf)]; ok {
return ret return ret
} }
str := string(enh.lblResultBuf)
lb := labels.NewBuilder(lhs)
if shouldDropMetricName(op) { if shouldDropMetricName(op) {
lb.Del(labels.MetricName) enh.lb.Del(labels.MetricName)
} }
if matching.Card == parser.CardOneToOne { if matching.Card == parser.CardOneToOne {
@ -1739,23 +1745,23 @@ func resultMetric(lhs, rhs labels.Labels, op parser.ItemType, matching *parser.V
continue Outer continue Outer
} }
} }
lb.Del(l.Name) enh.lb.Del(l.Name)
} }
} else { } else {
lb.Del(matching.MatchingLabels...) enh.lb.Del(matching.MatchingLabels...)
} }
} }
for _, ln := range matching.Include { for _, ln := range matching.Include {
// Included labels from the `group_x` modifier are taken from the "one"-side. // Included labels from the `group_x` modifier are taken from the "one"-side.
if v := rhs.Get(ln); v != "" { if v := rhs.Get(ln); v != "" {
lb.Set(ln, v) enh.lb.Set(ln, v)
} else { } else {
lb.Del(ln) enh.lb.Del(ln)
} }
} }
ret := lb.Labels() ret := enh.lb.Labels()
enh.resultMetric[h] = ret enh.resultMetric[str] = ret
return ret return ret
} }

View file

@ -598,10 +598,10 @@ func funcPredictLinear(vals []parser.Value, args parser.Expressions, enh *EvalNo
func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector { func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector {
q := vals[0].(Vector)[0].V q := vals[0].(Vector)[0].V
inVec := vals[1].(Vector) inVec := vals[1].(Vector)
sigf := enh.signatureFunc(false, excludedLabels...) sigf := signatureFunc(false, enh.lblBuf, excludedLabels...)
if enh.signatureToMetricWithBuckets == nil { if enh.signatureToMetricWithBuckets == nil {
enh.signatureToMetricWithBuckets = map[uint64]*metricWithBuckets{} enh.signatureToMetricWithBuckets = map[string]*metricWithBuckets{}
} else { } else {
for _, v := range enh.signatureToMetricWithBuckets { for _, v := range enh.signatureToMetricWithBuckets {
v.buckets = v.buckets[:0] v.buckets = v.buckets[:0]
@ -616,16 +616,16 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
// TODO(beorn7): Issue a warning somehow. // TODO(beorn7): Issue a warning somehow.
continue continue
} }
hash := sigf(el.Metric) l := sigf(el.Metric)
mb, ok := enh.signatureToMetricWithBuckets[hash] mb, ok := enh.signatureToMetricWithBuckets[l]
if !ok { if !ok {
el.Metric = labels.NewBuilder(el.Metric). el.Metric = labels.NewBuilder(el.Metric).
Del(labels.BucketLabel, labels.MetricName). Del(labels.BucketLabel, labels.MetricName).
Labels() Labels()
mb = &metricWithBuckets{el.Metric, nil} mb = &metricWithBuckets{el.Metric, nil}
enh.signatureToMetricWithBuckets[hash] = mb enh.signatureToMetricWithBuckets[l] = mb
} }
mb.buckets = append(mb.buckets, bucket{upperBound, el.V}) mb.buckets = append(mb.buckets, bucket{upperBound, el.V})
} }

11
promql/testdata/collision.test vendored Normal file
View file

@ -0,0 +1,11 @@
load 1s
node_namespace_pod:kube_pod_info:{namespace="observability",node="gke-search-infra-custom-96-253440-fli-d135b119-jx00",pod="node-exporter-l454v"} 1
node_cpu_seconds_total{cpu="10",endpoint="https",instance="10.253.57.87:9100",job="node-exporter",mode="idle",namespace="observability",pod="node-exporter-l454v",service="node-exporter"} 449
node_cpu_seconds_total{cpu="35",endpoint="https",instance="10.253.57.87:9100",job="node-exporter",mode="idle",namespace="observability",pod="node-exporter-l454v",service="node-exporter"} 449
node_cpu_seconds_total{cpu="89",endpoint="https",instance="10.253.57.87:9100",job="node-exporter",mode="idle",namespace="observability",pod="node-exporter-l454v",service="node-exporter"} 449
eval instant at 4s count by(namespace, pod, cpu) (node_cpu_seconds_total{cpu=~".*",job="node-exporter",mode="idle",namespace="observability",pod="node-exporter-l454v"}) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{namespace="observability",pod="node-exporter-l454v"}
{cpu="10",namespace="observability",node="gke-search-infra-custom-96-253440-fli-d135b119-jx00",pod="node-exporter-l454v"} 1
{cpu="35",namespace="observability",node="gke-search-infra-custom-96-253440-fli-d135b119-jx00",pod="node-exporter-l454v"} 1
{cpu="89",namespace="observability",node="gke-search-infra-custom-96-253440-fli-d135b119-jx00",pod="node-exporter-l454v"} 1