change labelset comparison in promql engine to avoid false positive during detection of duplicates (#7058)

* Use go1.14 new hash/maphash to hash both RHS and LHS instead of XOR'ing
which has been resulting in hash collisions.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Refactor engine labelset signature generation, just use labels.Labels
instead of hashes.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Address review comments; function comments + store result of
lhs.String+rhs.String as key.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Replace all signatureFunc usage with signatureFuncString.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Make optimizations to labels String function and generation of rhs+lhs
as string in resultMetric.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Use separate string functions that don't use strconv just for engine
maps.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Use a byte invalid separator instead of quoting and have a buffer
attached to EvalNodeHelper instead of using a global pool in the labels
package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Address review comments.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Address more review comments, labels has a function that now builds a
byte slice without turning it into a string.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Use two different non-ascii hex codes as byte separators between labels
and between sets of labels when building bytes of a Labels struct.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* We only need the 2nd byte invalid sep. at the beginning of a
labels.Bytes

Signed-off-by: Callum Styan <callumstyan@gmail.com>
This commit is contained in:
Callum Styan 2020-05-12 14:03:15 -07:00 committed by GitHub
parent da217cbde2
commit 5bb7f00d00
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 124 additions and 53 deletions

View file

@ -22,14 +22,15 @@ import (
"github.com/cespare/xxhash"
)
const sep = '\xff'
// Well-known label names used by Prometheus components.
const (
MetricName = "__name__"
AlertName = "alertname"
BucketLabel = "le"
InstanceName = "instance"
sep = '\xff'
labelSep = '\xfe'
)
// Label is a key/value pair of strings.
@ -59,10 +60,25 @@ func (ls Labels) String() string {
b.WriteString(strconv.Quote(l.Value))
}
b.WriteByte('}')
return b.String()
}
// Bytes returns ls as a byte slice.
// It uses an byte invalid character as a separator and so should not be used for printing.
func (ls Labels) Bytes(buf []byte) []byte {
b := bytes.NewBuffer(buf[:0])
b.WriteByte(labelSep)
for i, l := range ls {
if i > 0 {
b.WriteByte(sep)
}
b.WriteString(l.Name)
b.WriteByte(sep)
b.WriteString(l.Value)
}
return b.Bytes()
}
// MarshalJSON implements json.Marshaler.
func (ls Labels) MarshalJSON() ([]byte, error) {
return json.Marshal(ls.Map())
@ -172,6 +188,44 @@ func (ls Labels) HashWithoutLabels(b []byte, names ...string) (uint64, []byte) {
return xxhash.Sum64(b), b
}
// WithLabels returns a new labels.Labels from ls that only contains labels matching names.
// 'names' have to be sorted in ascending order.
func (ls Labels) WithLabels(names ...string) Labels {
ret := make([]Label, 0, len(ls))
i, j := 0, 0
for i < len(ls) && j < len(names) {
if names[j] < ls[i].Name {
j++
} else if ls[i].Name < names[j] {
i++
} else {
ret = append(ret, ls[i])
i++
j++
}
}
return ret
}
// WithLabels returns a new labels.Labels from ls that contains labels not matching names.
// 'names' have to be sorted in ascending order.
func (ls Labels) WithoutLabels(names ...string) Labels {
ret := make([]Label, 0, len(ls))
j := 0
for i := range ls {
for j < len(names) && names[j] < ls[i].Name {
j++
}
if ls[i].Name == MetricName || (j < len(names) && ls[i].Name == names[j]) {
continue
}
ret = append(ret, ls[i])
}
return ret
}
// Copy returns a copy of the labels.
func (ls Labels) Copy() Labels {
res := make(Labels, len(ls))

View file

@ -14,6 +14,7 @@
package promql
import (
"bytes"
"container/heap"
"context"
"fmt"
@ -831,16 +832,20 @@ type EvalNodeHelper struct {
// dropMetricName and label_*.
dmn map[uint64]labels.Labels
// signatureFunc.
sigf map[uint64]uint64
sigf map[string]string
// funcHistogramQuantile.
signatureToMetricWithBuckets map[uint64]*metricWithBuckets
signatureToMetricWithBuckets map[string]*metricWithBuckets
// label_replace.
regex *regexp.Regexp
lb *labels.Builder
lblBuf []byte
lblResultBuf []byte
// For binary vector matching.
rightSigs map[uint64]Sample
matchedSigs map[uint64]map[uint64]struct{}
resultMetric map[uint64]labels.Labels
rightSigs map[string]Sample
matchedSigs map[string]map[uint64]struct{}
resultMetric map[string]labels.Labels
}
// dropMetricName is a cached version of dropMetricName.
@ -858,20 +863,19 @@ func (enh *EvalNodeHelper) dropMetricName(l labels.Labels) labels.Labels {
return ret
}
// signatureFunc is a cached version of signatureFunc.
func (enh *EvalNodeHelper) signatureFunc(on bool, names ...string) func(labels.Labels) uint64 {
func (enh *EvalNodeHelper) signatureFunc(on bool, names ...string) func(labels.Labels) string {
if enh.sigf == nil {
enh.sigf = make(map[uint64]uint64, len(enh.out))
enh.sigf = make(map[string]string, len(enh.out))
}
f := signatureFunc(on, names...)
return func(l labels.Labels) uint64 {
h := l.Hash()
ret, ok := enh.sigf[h]
f := signatureFunc(on, enh.lblBuf, names...)
return func(l labels.Labels) string {
enh.lblBuf = l.Bytes(enh.lblBuf)
ret, ok := enh.sigf[string(enh.lblBuf)]
if ok {
return ret
}
ret = f(l)
enh.sigf[h] = ret
enh.sigf[string(enh.lblBuf)] = ret
return ret
}
}
@ -1527,7 +1531,7 @@ func (ev *evaluator) VectorAnd(lhs, rhs Vector, matching *parser.VectorMatching,
sigf := enh.signatureFunc(matching.On, matching.MatchingLabels...)
// The set of signatures for the right-hand side Vector.
rightSigs := map[uint64]struct{}{}
rightSigs := map[string]struct{}{}
// Add all rhs samples to a map so we can easily find matches later.
for _, rs := range rhs {
rightSigs[sigf(rs.Metric)] = struct{}{}
@ -1548,7 +1552,7 @@ func (ev *evaluator) VectorOr(lhs, rhs Vector, matching *parser.VectorMatching,
}
sigf := enh.signatureFunc(matching.On, matching.MatchingLabels...)
leftSigs := map[uint64]struct{}{}
leftSigs := map[string]struct{}{}
// Add everything from the left-hand-side Vector.
for _, ls := range lhs {
leftSigs[sigf(ls.Metric)] = struct{}{}
@ -1569,7 +1573,7 @@ func (ev *evaluator) VectorUnless(lhs, rhs Vector, matching *parser.VectorMatchi
}
sigf := enh.signatureFunc(matching.On, matching.MatchingLabels...)
rightSigs := map[uint64]struct{}{}
rightSigs := map[string]struct{}{}
for _, rs := range rhs {
rightSigs[sigf(rs.Metric)] = struct{}{}
}
@ -1598,7 +1602,7 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching *
// All samples from the rhs hashed by the matching label/values.
if enh.rightSigs == nil {
enh.rightSigs = make(map[uint64]Sample, len(enh.out))
enh.rightSigs = make(map[string]Sample, len(enh.out))
} else {
for k := range enh.rightSigs {
delete(enh.rightSigs, k)
@ -1628,7 +1632,7 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching *
// Tracks the match-signature. For one-to-one operations the value is nil. For many-to-one
// the value is a set of signatures to detect duplicated result elements.
if enh.matchedSigs == nil {
enh.matchedSigs = make(map[uint64]map[uint64]struct{}, len(rightSigs))
enh.matchedSigs = make(map[string]map[uint64]struct{}, len(rightSigs))
} else {
for k := range enh.matchedSigs {
delete(enh.matchedSigs, k)
@ -1662,7 +1666,6 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching *
continue
}
metric := resultMetric(ls.Metric, rs.Metric, op, matching, enh)
insertedSigs, exists := matchedSigs[sig]
if matching.Card == parser.CardOneToOne {
if exists {
@ -1692,19 +1695,15 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching *
return enh.out
}
// signatureFunc returns a function that calculates the signature for a metric
// ignoring the provided labels. If on, then the given labels are only used instead.
func signatureFunc(on bool, names ...string) func(labels.Labels) uint64 {
func signatureFunc(on bool, b []byte, names ...string) func(labels.Labels) string {
sort.Strings(names)
if on {
return func(lset labels.Labels) uint64 {
h, _ := lset.HashForLabels(make([]byte, 0, 1024), names...)
return h
return func(lset labels.Labels) string {
return string(lset.WithLabels(names...).Bytes(b))
}
}
return func(lset labels.Labels) uint64 {
h, _ := lset.HashWithoutLabels(make([]byte, 0, 1024), names...)
return h
return func(lset labels.Labels) string {
return string(lset.WithoutLabels(names...).Bytes(b))
}
}
@ -1712,22 +1711,29 @@ func signatureFunc(on bool, names ...string) func(labels.Labels) uint64 {
// binary operation and the matching options.
func resultMetric(lhs, rhs labels.Labels, op parser.ItemType, matching *parser.VectorMatching, enh *EvalNodeHelper) labels.Labels {
if enh.resultMetric == nil {
enh.resultMetric = make(map[uint64]labels.Labels, len(enh.out))
enh.resultMetric = make(map[string]labels.Labels, len(enh.out))
}
// op and matching are always the same for a given node, so
// there's no need to include them in the hash key.
// If the lhs and rhs are the same then the xor would be 0,
// so add in one side to protect against that.
lh := lhs.Hash()
h := (lh ^ rhs.Hash()) + lh
if ret, ok := enh.resultMetric[h]; ok {
if enh.lb == nil {
enh.lb = labels.NewBuilder(lhs)
} else {
enh.lb.Reset(lhs)
}
buf := bytes.NewBuffer(enh.lblResultBuf[:0])
enh.lblBuf = lhs.Bytes(enh.lblBuf)
buf.Write(enh.lblBuf)
enh.lblBuf = rhs.Bytes(enh.lblBuf)
buf.Write(enh.lblBuf)
enh.lblResultBuf = buf.Bytes()
if ret, ok := enh.resultMetric[string(enh.lblResultBuf)]; ok {
return ret
}
lb := labels.NewBuilder(lhs)
str := string(enh.lblResultBuf)
if shouldDropMetricName(op) {
lb.Del(labels.MetricName)
enh.lb.Del(labels.MetricName)
}
if matching.Card == parser.CardOneToOne {
@ -1739,23 +1745,23 @@ func resultMetric(lhs, rhs labels.Labels, op parser.ItemType, matching *parser.V
continue Outer
}
}
lb.Del(l.Name)
enh.lb.Del(l.Name)
}
} else {
lb.Del(matching.MatchingLabels...)
enh.lb.Del(matching.MatchingLabels...)
}
}
for _, ln := range matching.Include {
// Included labels from the `group_x` modifier are taken from the "one"-side.
if v := rhs.Get(ln); v != "" {
lb.Set(ln, v)
enh.lb.Set(ln, v)
} else {
lb.Del(ln)
enh.lb.Del(ln)
}
}
ret := lb.Labels()
enh.resultMetric[h] = ret
ret := enh.lb.Labels()
enh.resultMetric[str] = ret
return ret
}

View file

@ -598,10 +598,10 @@ func funcPredictLinear(vals []parser.Value, args parser.Expressions, enh *EvalNo
func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector {
q := vals[0].(Vector)[0].V
inVec := vals[1].(Vector)
sigf := enh.signatureFunc(false, excludedLabels...)
sigf := signatureFunc(false, enh.lblBuf, excludedLabels...)
if enh.signatureToMetricWithBuckets == nil {
enh.signatureToMetricWithBuckets = map[uint64]*metricWithBuckets{}
enh.signatureToMetricWithBuckets = map[string]*metricWithBuckets{}
} else {
for _, v := range enh.signatureToMetricWithBuckets {
v.buckets = v.buckets[:0]
@ -616,16 +616,16 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
// TODO(beorn7): Issue a warning somehow.
continue
}
hash := sigf(el.Metric)
l := sigf(el.Metric)
mb, ok := enh.signatureToMetricWithBuckets[hash]
mb, ok := enh.signatureToMetricWithBuckets[l]
if !ok {
el.Metric = labels.NewBuilder(el.Metric).
Del(labels.BucketLabel, labels.MetricName).
Labels()
mb = &metricWithBuckets{el.Metric, nil}
enh.signatureToMetricWithBuckets[hash] = mb
enh.signatureToMetricWithBuckets[l] = mb
}
mb.buckets = append(mb.buckets, bucket{upperBound, el.V})
}

11
promql/testdata/collision.test vendored Normal file
View file

@ -0,0 +1,11 @@
load 1s
node_namespace_pod:kube_pod_info:{namespace="observability",node="gke-search-infra-custom-96-253440-fli-d135b119-jx00",pod="node-exporter-l454v"} 1
node_cpu_seconds_total{cpu="10",endpoint="https",instance="10.253.57.87:9100",job="node-exporter",mode="idle",namespace="observability",pod="node-exporter-l454v",service="node-exporter"} 449
node_cpu_seconds_total{cpu="35",endpoint="https",instance="10.253.57.87:9100",job="node-exporter",mode="idle",namespace="observability",pod="node-exporter-l454v",service="node-exporter"} 449
node_cpu_seconds_total{cpu="89",endpoint="https",instance="10.253.57.87:9100",job="node-exporter",mode="idle",namespace="observability",pod="node-exporter-l454v",service="node-exporter"} 449
eval instant at 4s count by(namespace, pod, cpu) (node_cpu_seconds_total{cpu=~".*",job="node-exporter",mode="idle",namespace="observability",pod="node-exporter-l454v"}) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{namespace="observability",pod="node-exporter-l454v"}
{cpu="10",namespace="observability",node="gke-search-infra-custom-96-253440-fli-d135b119-jx00",pod="node-exporter-l454v"} 1
{cpu="35",namespace="observability",node="gke-search-infra-custom-96-253440-fli-d135b119-jx00",pod="node-exporter-l454v"} 1
{cpu="89",namespace="observability",node="gke-search-infra-custom-96-253440-fli-d135b119-jx00",pod="node-exporter-l454v"} 1