Merge pull request #1788 from prometheus/topk

Make topk/bottomk aggregators.
This commit is contained in:
Fabian Reinartz 2016-07-05 11:32:17 +02:00 committed by GitHub
commit abdf3536e4
10 changed files with 181 additions and 128 deletions

View file

@ -103,6 +103,7 @@ type Expressions []Expr
type AggregateExpr struct { type AggregateExpr struct {
Op itemType // The used aggregation operation. Op itemType // The used aggregation operation.
Expr Expr // The vector expression over which is aggregated. Expr Expr // The vector expression over which is aggregated.
Param Expr // Parameter used by some aggregators.
Grouping model.LabelNames // The labels by which to group the vector. Grouping model.LabelNames // The labels by which to group the vector.
Without bool // Whether to drop the given labels rather than keep them. Without bool // Whether to drop the given labels rather than keep them.
KeepCommonLabels bool // Whether to keep common labels among result elements. KeepCommonLabels bool // Whether to keep common labels among result elements.

View file

@ -14,6 +14,7 @@
package promql package promql
import ( import (
"container/heap"
"fmt" "fmt"
"math" "math"
"runtime" "runtime"
@ -610,7 +611,7 @@ func (ev *evaluator) eval(expr Expr) model.Value {
switch e := expr.(type) { switch e := expr.(type) {
case *AggregateExpr: case *AggregateExpr:
vector := ev.evalVector(e.Expr) vector := ev.evalVector(e.Expr)
return ev.aggregation(e.Op, e.Grouping, e.Without, e.KeepCommonLabels, vector) return ev.aggregation(e.Op, e.Grouping, e.Without, e.KeepCommonLabels, e.Param, vector)
case *BinaryExpr: case *BinaryExpr:
lhs := ev.evalOneOf(e.LHS, model.ValScalar, model.ValVector) lhs := ev.evalOneOf(e.LHS, model.ValScalar, model.ValVector)
@ -1060,15 +1061,24 @@ type groupedAggregation struct {
value model.SampleValue value model.SampleValue
valuesSquaredSum model.SampleValue valuesSquaredSum model.SampleValue
groupCount int groupCount int
heap vectorByValueHeap
reverseHeap vectorByReverseValueHeap
} }
// aggregation evaluates an aggregation operation on a vector. // aggregation evaluates an aggregation operation on a vector.
func (ev *evaluator) aggregation(op itemType, grouping model.LabelNames, without bool, keepCommon bool, vec vector) vector { func (ev *evaluator) aggregation(op itemType, grouping model.LabelNames, without bool, keepCommon bool, param Expr, vec vector) vector {
result := map[uint64]*groupedAggregation{} result := map[uint64]*groupedAggregation{}
var k int
if op == itemTopK || op == itemBottomK {
k = ev.evalInt(param)
if k < 1 {
return vector{}
}
}
for _, sample := range vec { for _, s := range vec {
withoutMetric := sample.Metric withoutMetric := s.Metric
if without { if without {
for _, l := range grouping { for _, l := range grouping {
withoutMetric.Del(l) withoutMetric.Del(l)
@ -1080,7 +1090,7 @@ func (ev *evaluator) aggregation(op itemType, grouping model.LabelNames, without
if without { if without {
groupingKey = uint64(withoutMetric.Metric.Fingerprint()) groupingKey = uint64(withoutMetric.Metric.Fingerprint())
} else { } else {
groupingKey = model.SignatureForLabels(sample.Metric.Metric, grouping...) groupingKey = model.SignatureForLabels(s.Metric.Metric, grouping...)
} }
groupedResult, ok := result[groupingKey] groupedResult, ok := result[groupingKey]
@ -1088,7 +1098,7 @@ func (ev *evaluator) aggregation(op itemType, grouping model.LabelNames, without
if !ok { if !ok {
var m metric.Metric var m metric.Metric
if keepCommon { if keepCommon {
m = sample.Metric m = s.Metric
m.Del(model.MetricNameLabel) m.Del(model.MetricNameLabel)
} else if without { } else if without {
m = withoutMetric m = withoutMetric
@ -1098,44 +1108,65 @@ func (ev *evaluator) aggregation(op itemType, grouping model.LabelNames, without
Copied: true, Copied: true,
} }
for _, l := range grouping { for _, l := range grouping {
if v, ok := sample.Metric.Metric[l]; ok { if v, ok := s.Metric.Metric[l]; ok {
m.Set(l, v) m.Set(l, v)
} }
} }
} }
result[groupingKey] = &groupedAggregation{ result[groupingKey] = &groupedAggregation{
labels: m, labels: m,
value: sample.Value, value: s.Value,
valuesSquaredSum: sample.Value * sample.Value, valuesSquaredSum: s.Value * s.Value,
groupCount: 1, groupCount: 1,
} }
if op == itemTopK {
result[groupingKey].heap = make(vectorByValueHeap, 0, k)
heap.Push(&result[groupingKey].heap, &sample{Value: s.Value, Metric: s.Metric})
} else if op == itemBottomK {
result[groupingKey].reverseHeap = make(vectorByReverseValueHeap, 0, k)
heap.Push(&result[groupingKey].reverseHeap, &sample{Value: s.Value, Metric: s.Metric})
}
continue continue
} }
// Add the sample to the existing group. // Add the sample to the existing group.
if keepCommon { if keepCommon {
groupedResult.labels = labelIntersection(groupedResult.labels, sample.Metric) groupedResult.labels = labelIntersection(groupedResult.labels, s.Metric)
} }
switch op { switch op {
case itemSum: case itemSum:
groupedResult.value += sample.Value groupedResult.value += s.Value
case itemAvg: case itemAvg:
groupedResult.value += sample.Value groupedResult.value += s.Value
groupedResult.groupCount++ groupedResult.groupCount++
case itemMax: case itemMax:
if groupedResult.value < sample.Value || math.IsNaN(float64(groupedResult.value)) { if groupedResult.value < s.Value || math.IsNaN(float64(groupedResult.value)) {
groupedResult.value = sample.Value groupedResult.value = s.Value
} }
case itemMin: case itemMin:
if groupedResult.value > sample.Value || math.IsNaN(float64(groupedResult.value)) { if groupedResult.value > s.Value || math.IsNaN(float64(groupedResult.value)) {
groupedResult.value = sample.Value groupedResult.value = s.Value
} }
case itemCount: case itemCount:
groupedResult.groupCount++ groupedResult.groupCount++
case itemStdvar, itemStddev: case itemStdvar, itemStddev:
groupedResult.value += sample.Value groupedResult.value += s.Value
groupedResult.valuesSquaredSum += sample.Value * sample.Value groupedResult.valuesSquaredSum += s.Value * s.Value
groupedResult.groupCount++ groupedResult.groupCount++
case itemTopK:
if len(groupedResult.heap) < k || groupedResult.heap[0].Value < s.Value || math.IsNaN(float64(groupedResult.heap[0].Value)) {
if len(groupedResult.heap) == k {
heap.Pop(&groupedResult.heap)
}
heap.Push(&groupedResult.heap, &sample{Value: s.Value, Metric: s.Metric})
}
case itemBottomK:
if len(groupedResult.reverseHeap) < k || groupedResult.reverseHeap[0].Value > s.Value || math.IsNaN(float64(groupedResult.reverseHeap[0].Value)) {
if len(groupedResult.reverseHeap) == k {
heap.Pop(&groupedResult.reverseHeap)
}
heap.Push(&groupedResult.reverseHeap, &sample{Value: s.Value, Metric: s.Metric})
}
default: default:
panic(fmt.Errorf("expected aggregation operator but got %q", op)) panic(fmt.Errorf("expected aggregation operator but got %q", op))
} }
@ -1156,6 +1187,28 @@ func (ev *evaluator) aggregation(op itemType, grouping model.LabelNames, without
case itemStddev: case itemStddev:
avg := float64(aggr.value) / float64(aggr.groupCount) avg := float64(aggr.value) / float64(aggr.groupCount)
aggr.value = model.SampleValue(math.Sqrt(float64(aggr.valuesSquaredSum)/float64(aggr.groupCount) - avg*avg)) aggr.value = model.SampleValue(math.Sqrt(float64(aggr.valuesSquaredSum)/float64(aggr.groupCount) - avg*avg))
case itemTopK:
// The heap keeps the lowest value on top, so reverse it.
sort.Sort(sort.Reverse(aggr.heap))
for _, v := range aggr.heap {
resultVector = append(resultVector, &sample{
Metric: v.Metric,
Value: v.Value,
Timestamp: ev.Timestamp,
})
}
continue // Bypass default append.
case itemBottomK:
// The heap keeps the lowest value on top, so reverse it.
sort.Sort(sort.Reverse(aggr.reverseHeap))
for _, v := range aggr.reverseHeap {
resultVector = append(resultVector, &sample{
Metric: v.Metric,
Value: v.Value,
Timestamp: ev.Timestamp,
})
}
continue // Bypass default append.
default: default:
// For other aggregations, we already have the right value. // For other aggregations, we already have the right value.
} }

View file

@ -14,7 +14,6 @@
package promql package promql
import ( import (
"container/heap"
"math" "math"
"regexp" "regexp"
"sort" "sort"
@ -298,52 +297,6 @@ func funcSortDesc(ev *evaluator, args Expressions) model.Value {
return vector(byValueSorter) return vector(byValueSorter)
} }
// === topk(k model.ValScalar, node model.ValVector) Vector ===
func funcTopk(ev *evaluator, args Expressions) model.Value {
k := ev.evalInt(args[0])
if k < 1 {
return vector{}
}
vec := ev.evalVector(args[1])
topk := make(vectorByValueHeap, 0, k)
for _, el := range vec {
if len(topk) < k || topk[0].Value < el.Value || math.IsNaN(float64(topk[0].Value)) {
if len(topk) == k {
heap.Pop(&topk)
}
heap.Push(&topk, el)
}
}
// The heap keeps the lowest value on top, so reverse it.
sort.Sort(sort.Reverse(topk))
return vector(topk)
}
// === bottomk(k model.ValScalar, node model.ValVector) Vector ===
func funcBottomk(ev *evaluator, args Expressions) model.Value {
k := ev.evalInt(args[0])
if k < 1 {
return vector{}
}
vec := ev.evalVector(args[1])
bottomk := make(vectorByReverseValueHeap, 0, k)
for _, el := range vec {
if len(bottomk) < k || bottomk[0].Value > el.Value || math.IsNaN(float64(bottomk[0].Value)) {
if len(bottomk) == k {
heap.Pop(&bottomk)
}
heap.Push(&bottomk, el)
}
}
// The heap keeps the highest value on top, so reverse it.
sort.Sort(sort.Reverse(bottomk))
return vector(bottomk)
}
// === clamp_max(vector model.ValVector, max Scalar) Vector === // === clamp_max(vector model.ValVector, max Scalar) Vector ===
func funcClampMax(ev *evaluator, args Expressions) model.Value { func funcClampMax(ev *evaluator, args Expressions) model.Value {
vec := ev.evalVector(args[0]) vec := ev.evalVector(args[0])
@ -866,12 +819,6 @@ var functions = map[string]*Function{
ReturnType: model.ValVector, ReturnType: model.ValVector,
Call: funcAvgOverTime, Call: funcAvgOverTime,
}, },
"bottomk": {
Name: "bottomk",
ArgTypes: []model.ValueType{model.ValScalar, model.ValVector},
ReturnType: model.ValVector,
Call: funcBottomk,
},
"ceil": { "ceil": {
Name: "ceil", Name: "ceil",
ArgTypes: []model.ValueType{model.ValVector}, ArgTypes: []model.ValueType{model.ValVector},
@ -1053,12 +1000,6 @@ var functions = map[string]*Function{
ReturnType: model.ValScalar, ReturnType: model.ValScalar,
Call: funcTime, Call: funcTime,
}, },
"topk": {
Name: "topk",
ArgTypes: []model.ValueType{model.ValScalar, model.ValVector},
ReturnType: model.ValVector,
Call: funcTopk,
},
"vector": { "vector": {
Name: "vector", Name: "vector",
ArgTypes: []model.ValueType{model.ValScalar}, ArgTypes: []model.ValueType{model.ValScalar},

View file

@ -58,6 +58,10 @@ func (i itemType) isOperator() bool { return i > operatorsStart && i < operators
// Returns false otherwise // Returns false otherwise
func (i itemType) isAggregator() bool { return i > aggregatorsStart && i < aggregatorsEnd } func (i itemType) isAggregator() bool { return i > aggregatorsStart && i < aggregatorsEnd }
// isAggregator returns true if the item is an aggregator that takes a parameter.
// Returns false otherwise
func (i itemType) isAggregatorWithParam() bool { return i == itemTopK || i == itemBottomK }
// isKeyword returns true if the item corresponds to a keyword. // isKeyword returns true if the item corresponds to a keyword.
// Returns false otherwise. // Returns false otherwise.
func (i itemType) isKeyword() bool { return i > keywordsStart && i < keywordsEnd } func (i itemType) isKeyword() bool { return i > keywordsStart && i < keywordsEnd }
@ -170,6 +174,8 @@ const (
itemMax itemMax
itemStddev itemStddev
itemStdvar itemStdvar
itemTopK
itemBottomK
aggregatorsEnd aggregatorsEnd
keywordsStart keywordsStart
@ -203,13 +209,15 @@ var key = map[string]itemType{
"unless": itemLUnless, "unless": itemLUnless,
// Aggregators. // Aggregators.
"sum": itemSum, "sum": itemSum,
"avg": itemAvg, "avg": itemAvg,
"count": itemCount, "count": itemCount,
"min": itemMin, "min": itemMin,
"max": itemMax, "max": itemMax,
"stddev": itemStddev, "stddev": itemStddev,
"stdvar": itemStdvar, "stdvar": itemStdvar,
"topk": itemTopK,
"bottomk": itemBottomK,
// Keywords. // Keywords.
"alert": itemAlert, "alert": itemAlert,

View file

@ -719,6 +719,11 @@ func (p *parser) aggrExpr() *AggregateExpr {
} }
p.expect(itemLeftParen, ctx) p.expect(itemLeftParen, ctx)
var param Expr
if agop.typ.isAggregatorWithParam() {
param = p.expr()
p.expect(itemComma, ctx)
}
e := p.expr() e := p.expr()
p.expect(itemRightParen, ctx) p.expect(itemRightParen, ctx)
@ -746,6 +751,7 @@ func (p *parser) aggrExpr() *AggregateExpr {
return &AggregateExpr{ return &AggregateExpr{
Op: agop.typ, Op: agop.typ,
Expr: e, Expr: e,
Param: param,
Grouping: grouping, Grouping: grouping,
Without: without, Without: without,
KeepCommonLabels: keepCommon, KeepCommonLabels: keepCommon,
@ -1043,6 +1049,9 @@ func (p *parser) checkType(node Node) (typ model.ValueType) {
p.errorf("aggregation operator expected in aggregation expression but got %q", n.Op) p.errorf("aggregation operator expected in aggregation expression but got %q", n.Op)
} }
p.expectType(n.Expr, model.ValVector, "aggregation expression") p.expectType(n.Expr, model.ValVector, "aggregation expression")
if n.Op == itemTopK || n.Op == itemBottomK {
p.expectType(n.Param, model.ValScalar, "aggregation parameter")
}
case *BinaryExpr: case *BinaryExpr:
lt := p.checkType(n.LHS) lt := p.checkType(n.LHS)

View file

@ -1201,6 +1201,18 @@ var testExpr = []struct {
}, },
Grouping: model.LabelNames{}, Grouping: model.LabelNames{},
}, },
}, {
input: "topk(5, some_metric)",
expected: &AggregateExpr{
Op: itemTopK,
Expr: &VectorSelector{
Name: "some_metric",
LabelMatchers: metric.LabelMatchers{
{Type: metric.Equal, Name: model.MetricNameLabel, Value: "some_metric"},
},
},
Param: &NumberLiteral{5},
},
}, { }, {
input: `sum some_metric by (test)`, input: `sum some_metric by (test)`,
fail: true, fail: true,
@ -1237,6 +1249,14 @@ var testExpr = []struct {
input: `sum without (test) (some_metric) by (test)`, input: `sum without (test) (some_metric) by (test)`,
fail: true, fail: true,
errMsg: "could not parse remaining input \"by (test)\"...", errMsg: "could not parse remaining input \"by (test)\"...",
}, {
input: `topk(some_metric)`,
fail: true,
errMsg: "parse error at char 17: unexpected \")\" in aggregation, expected \",\"",
}, {
input: `topk(some_metric, other_metric)`,
fail: true,
errMsg: "parse error at char 32: expected type scalar in aggregation parameter, got vector",
}, },
// Test function calls. // Test function calls.
{ {

View file

@ -135,7 +135,11 @@ func (es Expressions) String() (s string) {
} }
func (node *AggregateExpr) String() string { func (node *AggregateExpr) String() string {
aggrString := fmt.Sprintf("%s(%s)", node.Op, node.Expr) aggrString := fmt.Sprintf("%s(", node.Op)
if node.Op.isAggregatorWithParam() {
aggrString += fmt.Sprintf("%s, ", node.Param)
}
aggrString += fmt.Sprintf("%s)", node.Expr)
if len(node.Grouping) > 0 { if len(node.Grouping) > 0 {
var format string var format string
if node.Without { if node.Without {

View file

@ -71,6 +71,9 @@ func TestExprString(t *testing.T) {
{ {
in: `sum(task:errors:rate10s{job="s"}) WITHOUT (instance)`, in: `sum(task:errors:rate10s{job="s"}) WITHOUT (instance)`,
}, },
{
in: `topk(5, task:errors:rate10s{job="s"})`,
},
{ {
in: `a - ON(b) c`, in: `a - ON(b) c`,
}, },

View file

@ -126,3 +126,60 @@ eval instant at 0m max by (group) (http_requests)
eval instant at 0m min by (group) (http_requests) eval instant at 0m min by (group) (http_requests)
{group="production"} 1 {group="production"} 1
{group="canary"} 3 {group="canary"} 3
clear
# Tests for topk/bottomk.
load 5m
http_requests{job="api-server", instance="0", group="production"} 0+10x10
http_requests{job="api-server", instance="1", group="production"} 0+20x10
http_requests{job="api-server", instance="2", group="production"} NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
http_requests{job="api-server", instance="0", group="canary"} 0+30x10
http_requests{job="api-server", instance="1", group="canary"} 0+40x10
http_requests{job="app-server", instance="0", group="production"} 0+50x10
http_requests{job="app-server", instance="1", group="production"} 0+60x10
http_requests{job="app-server", instance="0", group="canary"} 0+70x10
http_requests{job="app-server", instance="1", group="canary"} 0+80x10
eval_ordered instant at 50m topk(3, http_requests)
http_requests{group="canary", instance="1", job="app-server"} 800
http_requests{group="canary", instance="0", job="app-server"} 700
http_requests{group="production", instance="1", job="app-server"} 600
eval_ordered instant at 50m topk(5, http_requests{group="canary",job="app-server"})
http_requests{group="canary", instance="1", job="app-server"} 800
http_requests{group="canary", instance="0", job="app-server"} 700
eval_ordered instant at 50m bottomk(3, http_requests)
http_requests{group="production", instance="0", job="api-server"} 100
http_requests{group="production", instance="1", job="api-server"} 200
http_requests{group="canary", instance="0", job="api-server"} 300
eval_ordered instant at 50m bottomk(5, http_requests{group="canary",job="app-server"})
http_requests{group="canary", instance="0", job="app-server"} 700
http_requests{group="canary", instance="1", job="app-server"} 800
eval instant at 50m topk by (group) (1, http_requests)
http_requests{group="production", instance="1", job="app-server"} 600
http_requests{group="canary", instance="1", job="app-server"} 800
eval instant at 50m bottomk by (group) (2, http_requests)
http_requests{group="canary", instance="0", job="api-server"} 300
http_requests{group="canary", instance="1", job="api-server"} 400
http_requests{group="production", instance="0", job="api-server"} 100
http_requests{group="production", instance="1", job="api-server"} 200
eval_ordered instant at 50m bottomk by (group) (2, http_requests{group="production"})
http_requests{group="production", instance="0", job="api-server"} 100
http_requests{group="production", instance="1", job="api-server"} 200
# Test NaN is sorted away from the top/bottom.
eval_ordered instant at 50m topk(3, http_requests{job="api-server",group="production"})
http_requests{job="api-server", instance="1", group="production"} 200
http_requests{job="api-server", instance="0", group="production"} 100
http_requests{job="api-server", instance="2", group="production"} NaN
eval_ordered instant at 50m bottomk(3, http_requests{job="api-server",group="production"})
http_requests{job="api-server", instance="0", group="production"} 100
http_requests{job="api-server", instance="1", group="production"} 200
http_requests{job="api-server", instance="2", group="production"} NaN

View file

@ -210,49 +210,6 @@ eval instant at 0m clamp_max(clamp_min(test_clamp, -20), 70)
{src="clamp-b"} 0 {src="clamp-b"} 0
{src="clamp-c"} 70 {src="clamp-c"} 70
clear
# Tests for topk/bottomk.
load 5m
http_requests{job="api-server", instance="0", group="production"} 0+10x10
http_requests{job="api-server", instance="1", group="production"} 0+20x10
http_requests{job="api-server", instance="2", group="production"} NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
http_requests{job="api-server", instance="0", group="canary"} 0+30x10
http_requests{job="api-server", instance="1", group="canary"} 0+40x10
http_requests{job="app-server", instance="0", group="production"} 0+50x10
http_requests{job="app-server", instance="1", group="production"} 0+60x10
http_requests{job="app-server", instance="0", group="canary"} 0+70x10
http_requests{job="app-server", instance="1", group="canary"} 0+80x10
eval_ordered instant at 50m topk(3, http_requests)
http_requests{group="canary", instance="1", job="app-server"} 800
http_requests{group="canary", instance="0", job="app-server"} 700
http_requests{group="production", instance="1", job="app-server"} 600
eval_ordered instant at 50m topk(5, http_requests{group="canary",job="app-server"})
http_requests{group="canary", instance="1", job="app-server"} 800
http_requests{group="canary", instance="0", job="app-server"} 700
eval_ordered instant at 50m bottomk(3, http_requests)
http_requests{group="production", instance="0", job="api-server"} 100
http_requests{group="production", instance="1", job="api-server"} 200
http_requests{group="canary", instance="0", job="api-server"} 300
eval_ordered instant at 50m bottomk(5, http_requests{group="canary",job="app-server"})
http_requests{group="canary", instance="0", job="app-server"} 700
http_requests{group="canary", instance="1", job="app-server"} 800
# Test NaN is sorted away from the top/bottom.
eval_ordered instant at 50m topk(3, http_requests{job="api-server",group="production"})
http_requests{job="api-server", instance="1", group="production"} 200
http_requests{job="api-server", instance="0", group="production"} 100
http_requests{job="api-server", instance="2", group="production"} NaN
eval_ordered instant at 50m bottomk(3, http_requests{job="api-server",group="production"})
http_requests{job="api-server", instance="0", group="production"} 100
http_requests{job="api-server", instance="1", group="production"} 200
http_requests{job="api-server", instance="2", group="production"} NaN
# Tests for sort/sort_desc. # Tests for sort/sort_desc.
clear clear