prometheus/rules/ast/functions.go
beorn7 9e7c3e3bcd Add the histogram_quantile function.
Since we are now getting really deep into floating point calculation,
the tests had to take into account the precision loss. Since the rule
tests are based on direct line matching in the output, implementing
the "almost equal" semantics was pretty cumbersome, but here we are.
2015-02-22 01:04:51 +01:00

686 lines
19 KiB
Go

// Copyright 2013 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ast
import (
"container/heap"
"fmt"
"math"
"sort"
"strconv"
"time"
clientmodel "github.com/prometheus/client_golang/model"
"github.com/prometheus/prometheus/storage/metric"
)
// Function represents a function of the expression language and is
// used by function nodes.
type Function struct {
name string
argTypes []ExprType
optionalArgs int
returnType ExprType
callFn func(timestamp clientmodel.Timestamp, args []Node) interface{}
}
// CheckArgTypes returns a non-nil error if the number or types of
// passed in arg nodes do not match the function's expectations.
func (function *Function) CheckArgTypes(args []Node) error {
if len(function.argTypes) < len(args) {
return fmt.Errorf(
"too many arguments to function %v(): %v expected at most, %v given",
function.name, len(function.argTypes), len(args),
)
}
if len(function.argTypes)-function.optionalArgs > len(args) {
return fmt.Errorf(
"too few arguments to function %v(): %v expected at least, %v given",
function.name, len(function.argTypes)-function.optionalArgs, len(args),
)
}
for idx, arg := range args {
invalidType := false
var expectedType string
if _, ok := arg.(ScalarNode); function.argTypes[idx] == ScalarType && !ok {
invalidType = true
expectedType = "scalar"
}
if _, ok := arg.(VectorNode); function.argTypes[idx] == VectorType && !ok {
invalidType = true
expectedType = "vector"
}
if _, ok := arg.(MatrixNode); function.argTypes[idx] == MatrixType && !ok {
invalidType = true
expectedType = "matrix"
}
if _, ok := arg.(StringNode); function.argTypes[idx] == StringType && !ok {
invalidType = true
expectedType = "string"
}
if invalidType {
return fmt.Errorf(
"wrong type for argument %v in function %v(), expected %v",
idx, function.name, expectedType,
)
}
}
return nil
}
// === time() clientmodel.SampleValue ===
func timeImpl(timestamp clientmodel.Timestamp, args []Node) interface{} {
return clientmodel.SampleValue(timestamp.Unix())
}
// === delta(matrix MatrixNode, isCounter=0 ScalarNode) Vector ===
func deltaImpl(timestamp clientmodel.Timestamp, args []Node) interface{} {
matrixNode := args[0].(MatrixNode)
isCounter := len(args) >= 2 && args[1].(ScalarNode).Eval(timestamp) > 0
resultVector := Vector{}
// If we treat these metrics as counters, we need to fetch all values
// in the interval to find breaks in the timeseries' monotonicity.
// I.e. if a counter resets, we want to ignore that reset.
var matrixValue Matrix
if isCounter {
matrixValue = matrixNode.Eval(timestamp)
} else {
matrixValue = matrixNode.EvalBoundaries(timestamp)
}
for _, samples := range matrixValue {
// No sense in trying to compute a delta without at least two points. Drop
// this vector element.
if len(samples.Values) < 2 {
continue
}
counterCorrection := clientmodel.SampleValue(0)
lastValue := clientmodel.SampleValue(0)
for _, sample := range samples.Values {
currentValue := sample.Value
if isCounter && currentValue < lastValue {
counterCorrection += lastValue - currentValue
}
lastValue = currentValue
}
resultValue := lastValue - samples.Values[0].Value + counterCorrection
targetInterval := args[0].(*MatrixSelector).interval
sampledInterval := samples.Values[len(samples.Values)-1].Timestamp.Sub(samples.Values[0].Timestamp)
if sampledInterval == 0 {
// Only found one sample. Cannot compute a rate from this.
continue
}
// Correct for differences in target vs. actual delta interval.
//
// Above, we didn't actually calculate the delta for the specified target
// interval, but for an interval between the first and last found samples
// under the target interval, which will usually have less time between
// them. Depending on how many samples are found under a target interval,
// the delta results are distorted and temporal aliasing occurs (ugly
// bumps). This effect is corrected for below.
intervalCorrection := clientmodel.SampleValue(targetInterval) / clientmodel.SampleValue(sampledInterval)
resultValue *= intervalCorrection
resultSample := &Sample{
Metric: samples.Metric,
Value: resultValue,
Timestamp: timestamp,
}
resultSample.Metric.Delete(clientmodel.MetricNameLabel)
resultVector = append(resultVector, resultSample)
}
return resultVector
}
// === rate(node MatrixNode) Vector ===
func rateImpl(timestamp clientmodel.Timestamp, args []Node) interface{} {
args = append(args, &ScalarLiteral{value: 1})
vector := deltaImpl(timestamp, args).(Vector)
// TODO: could be other type of MatrixNode in the future (right now, only
// MatrixSelector exists). Find a better way of getting the duration of a
// matrix, such as looking at the samples themselves.
interval := args[0].(*MatrixSelector).interval
for i := range vector {
vector[i].Value /= clientmodel.SampleValue(interval / time.Second)
}
return vector
}
type vectorByValueHeap Vector
func (s vectorByValueHeap) Len() int {
return len(s)
}
func (s vectorByValueHeap) Less(i, j int) bool {
return s[i].Value < s[j].Value
}
func (s vectorByValueHeap) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
func (s *vectorByValueHeap) Push(x interface{}) {
*s = append(*s, x.(*Sample))
}
func (s *vectorByValueHeap) Pop() interface{} {
old := *s
n := len(old)
el := old[n-1]
*s = old[0 : n-1]
return el
}
type reverseHeap struct {
heap.Interface
}
func (s reverseHeap) Less(i, j int) bool {
return s.Interface.Less(j, i)
}
// === sort(node VectorNode) Vector ===
func sortImpl(timestamp clientmodel.Timestamp, args []Node) interface{} {
byValueSorter := vectorByValueHeap(args[0].(VectorNode).Eval(timestamp))
sort.Sort(byValueSorter)
return Vector(byValueSorter)
}
// === sortDesc(node VectorNode) Vector ===
func sortDescImpl(timestamp clientmodel.Timestamp, args []Node) interface{} {
byValueSorter := vectorByValueHeap(args[0].(VectorNode).Eval(timestamp))
sort.Sort(sort.Reverse(byValueSorter))
return Vector(byValueSorter)
}
// === topk(k ScalarNode, node VectorNode) Vector ===
func topkImpl(timestamp clientmodel.Timestamp, args []Node) interface{} {
k := int(args[0].(ScalarNode).Eval(timestamp))
if k < 1 {
return Vector{}
}
topk := make(vectorByValueHeap, 0, k)
vector := args[1].(VectorNode).Eval(timestamp)
for _, el := range vector {
if len(topk) < k || topk[0].Value < el.Value {
if len(topk) == k {
heap.Pop(&topk)
}
heap.Push(&topk, el)
}
}
sort.Sort(sort.Reverse(topk))
return Vector(topk)
}
// === bottomk(k ScalarNode, node VectorNode) Vector ===
func bottomkImpl(timestamp clientmodel.Timestamp, args []Node) interface{} {
k := int(args[0].(ScalarNode).Eval(timestamp))
if k < 1 {
return Vector{}
}
bottomk := make(vectorByValueHeap, 0, k)
bkHeap := reverseHeap{Interface: &bottomk}
vector := args[1].(VectorNode).Eval(timestamp)
for _, el := range vector {
if len(bottomk) < k || bottomk[0].Value > el.Value {
if len(bottomk) == k {
heap.Pop(&bkHeap)
}
heap.Push(&bkHeap, el)
}
}
sort.Sort(bottomk)
return Vector(bottomk)
}
// === drop_common_labels(node VectorNode) Vector ===
func dropCommonLabelsImpl(timestamp clientmodel.Timestamp, args []Node) interface{} {
vector := args[0].(VectorNode).Eval(timestamp)
if len(vector) < 1 {
return Vector{}
}
common := clientmodel.LabelSet{}
for k, v := range vector[0].Metric.Metric {
// TODO(julius): Should we also drop common metric names?
if k == clientmodel.MetricNameLabel {
continue
}
common[k] = v
}
for _, el := range vector[1:] {
for k, v := range common {
if el.Metric.Metric[k] != v {
// Deletion of map entries while iterating over them is safe.
// From http://golang.org/ref/spec#For_statements:
// "If map entries that have not yet been reached are deleted during
// iteration, the corresponding iteration values will not be produced."
delete(common, k)
}
}
}
for _, el := range vector {
for k := range el.Metric.Metric {
if _, ok := common[k]; ok {
el.Metric.Delete(k)
}
}
}
return vector
}
// === round(vector VectorNode, toNearest=1 Scalar) Vector ===
func roundImpl(timestamp clientmodel.Timestamp, args []Node) interface{} {
// round returns a number rounded to toNearest.
// Ties are solved by rounding up.
toNearest := float64(1)
if len(args) >= 2 {
toNearest = float64(args[1].(ScalarNode).Eval(timestamp))
}
// Invert as it seems to cause fewer floating point accuracy issues.
toNearestInverse := 1.0 / toNearest
n := args[0].(VectorNode)
vector := n.Eval(timestamp)
for _, el := range vector {
el.Metric.Delete(clientmodel.MetricNameLabel)
el.Value = clientmodel.SampleValue(math.Floor(float64(el.Value)*toNearestInverse+0.5) / toNearestInverse)
}
return vector
}
// === scalar(node VectorNode) Scalar ===
func scalarImpl(timestamp clientmodel.Timestamp, args []Node) interface{} {
v := args[0].(VectorNode).Eval(timestamp)
if len(v) != 1 {
return clientmodel.SampleValue(math.NaN())
}
return clientmodel.SampleValue(v[0].Value)
}
// === count_scalar(vector VectorNode) model.SampleValue ===
func countScalarImpl(timestamp clientmodel.Timestamp, args []Node) interface{} {
return clientmodel.SampleValue(len(args[0].(VectorNode).Eval(timestamp)))
}
func aggrOverTime(timestamp clientmodel.Timestamp, args []Node, aggrFn func(metric.Values) clientmodel.SampleValue) interface{} {
n := args[0].(MatrixNode)
matrixVal := n.Eval(timestamp)
resultVector := Vector{}
for _, el := range matrixVal {
if len(el.Values) == 0 {
continue
}
el.Metric.Delete(clientmodel.MetricNameLabel)
resultVector = append(resultVector, &Sample{
Metric: el.Metric,
Value: aggrFn(el.Values),
Timestamp: timestamp,
})
}
return resultVector
}
// === avg_over_time(matrix MatrixNode) Vector ===
func avgOverTimeImpl(timestamp clientmodel.Timestamp, args []Node) interface{} {
return aggrOverTime(timestamp, args, func(values metric.Values) clientmodel.SampleValue {
var sum clientmodel.SampleValue
for _, v := range values {
sum += v.Value
}
return sum / clientmodel.SampleValue(len(values))
})
}
// === count_over_time(matrix MatrixNode) Vector ===
func countOverTimeImpl(timestamp clientmodel.Timestamp, args []Node) interface{} {
return aggrOverTime(timestamp, args, func(values metric.Values) clientmodel.SampleValue {
return clientmodel.SampleValue(len(values))
})
}
// === floor(vector VectorNode) Vector ===
func floorImpl(timestamp clientmodel.Timestamp, args []Node) interface{} {
n := args[0].(VectorNode)
vector := n.Eval(timestamp)
for _, el := range vector {
el.Metric.Delete(clientmodel.MetricNameLabel)
el.Value = clientmodel.SampleValue(math.Floor(float64(el.Value)))
}
return vector
}
// === max_over_time(matrix MatrixNode) Vector ===
func maxOverTimeImpl(timestamp clientmodel.Timestamp, args []Node) interface{} {
return aggrOverTime(timestamp, args, func(values metric.Values) clientmodel.SampleValue {
max := math.Inf(-1)
for _, v := range values {
max = math.Max(max, float64(v.Value))
}
return clientmodel.SampleValue(max)
})
}
// === min_over_time(matrix MatrixNode) Vector ===
func minOverTimeImpl(timestamp clientmodel.Timestamp, args []Node) interface{} {
return aggrOverTime(timestamp, args, func(values metric.Values) clientmodel.SampleValue {
min := math.Inf(1)
for _, v := range values {
min = math.Min(min, float64(v.Value))
}
return clientmodel.SampleValue(min)
})
}
// === sum_over_time(matrix MatrixNode) Vector ===
func sumOverTimeImpl(timestamp clientmodel.Timestamp, args []Node) interface{} {
return aggrOverTime(timestamp, args, func(values metric.Values) clientmodel.SampleValue {
var sum clientmodel.SampleValue
for _, v := range values {
sum += v.Value
}
return sum
})
}
// === abs(vector VectorNode) Vector ===
func absImpl(timestamp clientmodel.Timestamp, args []Node) interface{} {
n := args[0].(VectorNode)
vector := n.Eval(timestamp)
for _, el := range vector {
el.Metric.Delete(clientmodel.MetricNameLabel)
el.Value = clientmodel.SampleValue(math.Abs(float64(el.Value)))
}
return vector
}
// === absent(vector VectorNode) Vector ===
func absentImpl(timestamp clientmodel.Timestamp, args []Node) interface{} {
n := args[0].(VectorNode)
if len(n.Eval(timestamp)) > 0 {
return Vector{}
}
m := clientmodel.Metric{}
if vs, ok := n.(*VectorSelector); ok {
for _, matcher := range vs.labelMatchers {
if matcher.Type == metric.Equal && matcher.Name != clientmodel.MetricNameLabel {
m[matcher.Name] = matcher.Value
}
}
}
return Vector{
&Sample{
Metric: clientmodel.COWMetric{
Metric: m,
Copied: true,
},
Value: 1,
Timestamp: timestamp,
},
}
}
// === ceil(vector VectorNode) Vector ===
func ceilImpl(timestamp clientmodel.Timestamp, args []Node) interface{} {
n := args[0].(VectorNode)
vector := n.Eval(timestamp)
for _, el := range vector {
el.Metric.Delete(clientmodel.MetricNameLabel)
el.Value = clientmodel.SampleValue(math.Ceil(float64(el.Value)))
}
return vector
}
// === deriv(node MatrixNode) Vector ===
func derivImpl(timestamp clientmodel.Timestamp, args []Node) interface{} {
matrixNode := args[0].(MatrixNode)
resultVector := Vector{}
matrixValue := matrixNode.Eval(timestamp)
for _, samples := range matrixValue {
// No sense in trying to compute a derivative without at least two points.
// Drop this vector element.
if len(samples.Values) < 2 {
continue
}
// Least squares.
n := clientmodel.SampleValue(0)
sumY := clientmodel.SampleValue(0)
sumX := clientmodel.SampleValue(0)
sumXY := clientmodel.SampleValue(0)
sumX2 := clientmodel.SampleValue(0)
for _, sample := range samples.Values {
x := clientmodel.SampleValue(sample.Timestamp.UnixNano() / 1e9)
n += 1.0
sumY += sample.Value
sumX += x
sumXY += x * sample.Value
sumX2 += x * x
}
numerator := sumXY - sumX*sumY/n
denominator := sumX2 - (sumX*sumX)/n
resultValue := numerator / denominator
resultSample := &Sample{
Metric: samples.Metric,
Value: resultValue,
Timestamp: timestamp,
}
resultSample.Metric.Delete(clientmodel.MetricNameLabel)
resultVector = append(resultVector, resultSample)
}
return resultVector
}
// === histogram_quantile(k ScalarNode, vector VectorNode) Vector ===
func histogramQuantileImpl(timestamp clientmodel.Timestamp, args []Node) interface{} {
q := args[0].(ScalarNode).Eval(timestamp)
inVec := args[1].(VectorNode).Eval(timestamp)
outVec := Vector{}
fpToMetricWithBuckets := map[clientmodel.Fingerprint]*metricWithBuckets{}
for _, el := range inVec {
upperBound, err := strconv.ParseFloat(
string(el.Metric.Metric[clientmodel.BucketLabel]), 64,
)
if err != nil {
// Oops, no bucket label or malformed label value. Skip.
// TODO(beorn7): Issue a warning somehow.
continue
}
// TODO avoid copying each time by using a custom fingerprint
el.Metric.Delete(clientmodel.BucketLabel)
el.Metric.Delete(clientmodel.MetricNameLabel)
fp := el.Metric.Metric.Fingerprint()
mb, ok := fpToMetricWithBuckets[fp]
if !ok {
mb = &metricWithBuckets{el.Metric, nil}
fpToMetricWithBuckets[fp] = mb
}
mb.buckets = append(mb.buckets, bucket{upperBound, el.Value})
}
for _, mb := range fpToMetricWithBuckets {
outVec = append(outVec, &Sample{
Metric: mb.metric,
Value: clientmodel.SampleValue(quantile(q, mb.buckets)),
Timestamp: timestamp,
})
}
return outVec
}
var functions = map[string]*Function{
"abs": {
name: "abs",
argTypes: []ExprType{VectorType},
returnType: VectorType,
callFn: absImpl,
},
"absent": {
name: "absent",
argTypes: []ExprType{VectorType},
returnType: VectorType,
callFn: absentImpl,
},
"avg_over_time": {
name: "avg_over_time",
argTypes: []ExprType{MatrixType},
returnType: VectorType,
callFn: avgOverTimeImpl,
},
"bottomk": {
name: "bottomk",
argTypes: []ExprType{ScalarType, VectorType},
returnType: VectorType,
callFn: bottomkImpl,
},
"ceil": {
name: "ceil",
argTypes: []ExprType{VectorType},
returnType: VectorType,
callFn: ceilImpl,
},
"count_over_time": {
name: "count_over_time",
argTypes: []ExprType{MatrixType},
returnType: VectorType,
callFn: countOverTimeImpl,
},
"count_scalar": {
name: "count_scalar",
argTypes: []ExprType{VectorType},
returnType: ScalarType,
callFn: countScalarImpl,
},
"delta": {
name: "delta",
argTypes: []ExprType{MatrixType, ScalarType},
optionalArgs: 1, // The 2nd argument is deprecated.
returnType: VectorType,
callFn: deltaImpl,
},
"deriv": {
name: "deriv",
argTypes: []ExprType{MatrixType},
returnType: VectorType,
callFn: derivImpl,
},
"drop_common_labels": {
name: "drop_common_labels",
argTypes: []ExprType{VectorType},
returnType: VectorType,
callFn: dropCommonLabelsImpl,
},
"floor": {
name: "floor",
argTypes: []ExprType{VectorType},
returnType: VectorType,
callFn: floorImpl,
},
"histogram_quantile": {
name: "histogram_quantile",
argTypes: []ExprType{ScalarType, VectorType},
returnType: VectorType,
callFn: histogramQuantileImpl,
},
"max_over_time": {
name: "max_over_time",
argTypes: []ExprType{MatrixType},
returnType: VectorType,
callFn: maxOverTimeImpl,
},
"min_over_time": {
name: "min_over_time",
argTypes: []ExprType{MatrixType},
returnType: VectorType,
callFn: minOverTimeImpl,
},
"rate": {
name: "rate",
argTypes: []ExprType{MatrixType},
returnType: VectorType,
callFn: rateImpl,
},
"round": {
name: "round",
argTypes: []ExprType{VectorType, ScalarType},
optionalArgs: 1,
returnType: VectorType,
callFn: roundImpl,
},
"scalar": {
name: "scalar",
argTypes: []ExprType{VectorType},
returnType: ScalarType,
callFn: scalarImpl,
},
"sort": {
name: "sort",
argTypes: []ExprType{VectorType},
returnType: VectorType,
callFn: sortImpl,
},
"sort_desc": {
name: "sort_desc",
argTypes: []ExprType{VectorType},
returnType: VectorType,
callFn: sortDescImpl,
},
"sum_over_time": {
name: "sum_over_time",
argTypes: []ExprType{MatrixType},
returnType: VectorType,
callFn: sumOverTimeImpl,
},
"time": {
name: "time",
argTypes: []ExprType{},
returnType: ScalarType,
callFn: timeImpl,
},
"topk": {
name: "topk",
argTypes: []ExprType{ScalarType, VectorType},
returnType: VectorType,
callFn: topkImpl,
},
}
// GetFunction returns a predefined Function object for the given
// name.
func GetFunction(name string) (*Function, error) {
function, ok := functions[name]
if !ok {
return nil, fmt.Errorf("couldn't find function %v()", name)
}
return function, nil
}