From 9e7c3e3bcd5767e64848e94c91458d978ab3f5b7 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Thu, 19 Feb 2015 19:56:45 +0100 Subject: [PATCH 1/4] Add the histogram_quantile function. Since we are now getting really deep into floating point calculation, the tests had to take into account the precision loss. Since the rule tests are based on direct line matching in the output, implementing the "almost equal" semantics was pretty cumbersome, but here we are. --- rules/ast/functions.go | 57 +++++++++- rules/ast/quantile.go | 99 ++++++++++++++++ rules/helpers_test.go | 218 +++++++++++++++++++++++++++++++++++ rules/rules_test.go | 253 ++++++++++++++++++++++++++++++++++++++++- 4 files changed, 620 insertions(+), 7 deletions(-) create mode 100644 rules/ast/quantile.go diff --git a/rules/ast/functions.go b/rules/ast/functions.go index dafa3c99b..0c5fcafd9 100644 --- a/rules/ast/functions.go +++ b/rules/ast/functions.go @@ -18,6 +18,7 @@ import ( "fmt" "math" "sort" + "strconv" "time" clientmodel "github.com/prometheus/client_golang/model" @@ -498,6 +499,44 @@ func derivImpl(timestamp clientmodel.Timestamp, args []Node) interface{} { return resultVector } +// === histogram_quantile(k ScalarNode, vector VectorNode) Vector === +func histogramQuantileImpl(timestamp clientmodel.Timestamp, args []Node) interface{} { + q := args[0].(ScalarNode).Eval(timestamp) + inVec := args[1].(VectorNode).Eval(timestamp) + outVec := Vector{} + fpToMetricWithBuckets := map[clientmodel.Fingerprint]*metricWithBuckets{} + for _, el := range inVec { + upperBound, err := strconv.ParseFloat( + string(el.Metric.Metric[clientmodel.BucketLabel]), 64, + ) + if err != nil { + // Oops, no bucket label or malformed label value. Skip. + // TODO(beorn7): Issue a warning somehow. + continue + } + // TODO avoid copying each time by using a custom fingerprint + el.Metric.Delete(clientmodel.BucketLabel) + el.Metric.Delete(clientmodel.MetricNameLabel) + fp := el.Metric.Metric.Fingerprint() + mb, ok := fpToMetricWithBuckets[fp] + if !ok { + mb = &metricWithBuckets{el.Metric, nil} + fpToMetricWithBuckets[fp] = mb + } + mb.buckets = append(mb.buckets, bucket{upperBound, el.Value}) + } + + for _, mb := range fpToMetricWithBuckets { + outVec = append(outVec, &Sample{ + Metric: mb.metric, + Value: clientmodel.SampleValue(quantile(q, mb.buckets)), + Timestamp: timestamp, + }) + } + + return outVec +} + var functions = map[string]*Function{ "abs": { name: "abs", @@ -548,6 +587,12 @@ var functions = map[string]*Function{ returnType: VectorType, callFn: deltaImpl, }, + "deriv": { + name: "deriv", + argTypes: []ExprType{MatrixType}, + returnType: VectorType, + callFn: derivImpl, + }, "drop_common_labels": { name: "drop_common_labels", argTypes: []ExprType{VectorType}, @@ -560,6 +605,12 @@ var functions = map[string]*Function{ returnType: VectorType, callFn: floorImpl, }, + "histogram_quantile": { + name: "histogram_quantile", + argTypes: []ExprType{ScalarType, VectorType}, + returnType: VectorType, + callFn: histogramQuantileImpl, + }, "max_over_time": { name: "max_over_time", argTypes: []ExprType{MatrixType}, @@ -621,12 +672,6 @@ var functions = map[string]*Function{ returnType: VectorType, callFn: topkImpl, }, - "deriv": { - name: "deriv", - argTypes: []ExprType{MatrixType}, - returnType: VectorType, - callFn: derivImpl, - }, } // GetFunction returns a predefined Function object for the given diff --git a/rules/ast/quantile.go b/rules/ast/quantile.go new file mode 100644 index 000000000..f38313313 --- /dev/null +++ b/rules/ast/quantile.go @@ -0,0 +1,99 @@ +// Copyright 2015 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ast + +import ( + "math" + "sort" + + clientmodel "github.com/prometheus/client_golang/model" +) + +// Helpers to calculate quantiles. + +type bucket struct { + upperBound float64 + count clientmodel.SampleValue +} + +// buckets implements sort.Interface. +type buckets []bucket + +func (b buckets) Len() int { return len(b) } +func (b buckets) Swap(i, j int) { b[i], b[j] = b[j], b[i] } +func (b buckets) Less(i, j int) bool { return b[i].upperBound < b[j].upperBound } + +type metricWithBuckets struct { + metric clientmodel.COWMetric + buckets buckets +} + +// quantile calculates the quantile 'q' based on the given buckets. The buckets +// will be sorted by upperBound by this function (i.e. no sorting needed before +// calling this function). The quantile value is interpolated assuming a linear +// distribution within a bucket. However, if the quantile falls into the highest +// bucket, the upper bound of the 2nd highest bucket is returned. A natural +// lower bound of 0 is assumed if the upper bound of the lowest bucket is +// greater 0. In that case, interpolation in the lowest bucket happens linearly +// between 0 and the upper bound of the lowest bucket. However, if the lowest +// bucket has an upper bound less or equal 0, this upper bound is returned if +// the quantile falls into the lowest bucket. +// +// There are a number of special cases (once we have a way to report errors +// happening during evaluations of AST functions, we should report those +// explicitly): +// +// If 'buckets' has fewer than 2 elements, NaN is returned. +// +// If the highest bucket is not +Inf, NaN is returned. +// +// If q<0, -Inf is returned. +// +// If q>1, +Inf is returned. +func quantile(q clientmodel.SampleValue, buckets buckets) float64 { + if q < 0 { + return math.Inf(-1) + } + if q > 1 { + return math.Inf(+1) + } + if len(buckets) < 2 { + return math.NaN() + } + sort.Sort(buckets) + if !math.IsInf(buckets[len(buckets)-1].upperBound, +1) { + return math.NaN() + } + + rank := q * buckets[len(buckets)-1].count + b := sort.Search(len(buckets)-1, func(i int) bool { return buckets[i].count >= rank }) + + if b == len(buckets)-1 { + return buckets[len(buckets)-2].upperBound + } + if b == 0 && buckets[0].upperBound <= 0 { + return buckets[0].upperBound + } + var ( + bucketStart float64 + bucketEnd = buckets[b].upperBound + count = buckets[b].count + ) + if b > 0 { + bucketStart = buckets[b-1].upperBound + count -= buckets[b-1].count + rank -= buckets[b-1].count + } + return bucketStart + (bucketEnd-bucketStart)*float64(rank/count) +} diff --git a/rules/helpers_test.go b/rules/helpers_test.go index 867f66891..eb22ea53f 100644 --- a/rules/helpers_test.go +++ b/rules/helpers_test.go @@ -205,6 +205,224 @@ var testMatrix = ast.Matrix{ }, Values: getTestValueStream(0, 200, 20, testStartTime), }, + // Two histogram with 4 buckets each (*_sum and *_count not included, + // only buckets). Lowest bucket for one histogram < 0, for the other > + // 0. They have the same name, just separated by label. Not useful in + // practice, but can happen (if clients change bucketing), and the + // server has to cope with it. + { + Metric: clientmodel.COWMetric{ + Metric: clientmodel.Metric{ + clientmodel.MetricNameLabel: "testhistogram_bucket", + "le": "0.1", + "start": "positive", + }, + }, + Values: getTestValueStream(0, 50, 5, testStartTime), + }, + { + Metric: clientmodel.COWMetric{ + Metric: clientmodel.Metric{ + clientmodel.MetricNameLabel: "testhistogram_bucket", + "le": ".2", + "start": "positive", + }, + }, + Values: getTestValueStream(0, 70, 7, testStartTime), + }, + { + Metric: clientmodel.COWMetric{ + Metric: clientmodel.Metric{ + clientmodel.MetricNameLabel: "testhistogram_bucket", + "le": "1e0", + "start": "positive", + }, + }, + Values: getTestValueStream(0, 110, 11, testStartTime), + }, + { + Metric: clientmodel.COWMetric{ + Metric: clientmodel.Metric{ + clientmodel.MetricNameLabel: "testhistogram_bucket", + "le": "+Inf", + "start": "positive", + }, + }, + Values: getTestValueStream(0, 120, 12, testStartTime), + }, + { + Metric: clientmodel.COWMetric{ + Metric: clientmodel.Metric{ + clientmodel.MetricNameLabel: "testhistogram_bucket", + "le": "-.2", + "start": "negative", + }, + }, + Values: getTestValueStream(0, 10, 1, testStartTime), + }, + { + Metric: clientmodel.COWMetric{ + Metric: clientmodel.Metric{ + clientmodel.MetricNameLabel: "testhistogram_bucket", + "le": "-0.1", + "start": "negative", + }, + }, + Values: getTestValueStream(0, 20, 2, testStartTime), + }, + { + Metric: clientmodel.COWMetric{ + Metric: clientmodel.Metric{ + clientmodel.MetricNameLabel: "testhistogram_bucket", + "le": "0.3", + "start": "negative", + }, + }, + Values: getTestValueStream(0, 20, 2, testStartTime), + }, + { + Metric: clientmodel.COWMetric{ + Metric: clientmodel.Metric{ + clientmodel.MetricNameLabel: "testhistogram_bucket", + "le": "+Inf", + "start": "negative", + }, + }, + Values: getTestValueStream(0, 30, 3, testStartTime), + }, + // Now a more realistic histogram per job and instance to test aggregation. + { + Metric: clientmodel.COWMetric{ + Metric: clientmodel.Metric{ + clientmodel.MetricNameLabel: "request_duration_seconds_bucket", + clientmodel.JobLabel: "job1", + "instance": "ins1", + "le": "0.1", + }, + }, + Values: getTestValueStream(0, 10, 1, testStartTime), + }, + { + Metric: clientmodel.COWMetric{ + Metric: clientmodel.Metric{ + clientmodel.MetricNameLabel: "request_duration_seconds_bucket", + clientmodel.JobLabel: "job1", + "instance": "ins1", + "le": "0.2", + }, + }, + Values: getTestValueStream(0, 30, 3, testStartTime), + }, + { + Metric: clientmodel.COWMetric{ + Metric: clientmodel.Metric{ + clientmodel.MetricNameLabel: "request_duration_seconds_bucket", + clientmodel.JobLabel: "job1", + "instance": "ins1", + "le": "+Inf", + }, + }, + Values: getTestValueStream(0, 40, 4, testStartTime), + }, + { + Metric: clientmodel.COWMetric{ + Metric: clientmodel.Metric{ + clientmodel.MetricNameLabel: "request_duration_seconds_bucket", + clientmodel.JobLabel: "job1", + "instance": "ins2", + "le": "0.1", + }, + }, + Values: getTestValueStream(0, 20, 2, testStartTime), + }, + { + Metric: clientmodel.COWMetric{ + Metric: clientmodel.Metric{ + clientmodel.MetricNameLabel: "request_duration_seconds_bucket", + clientmodel.JobLabel: "job1", + "instance": "ins2", + "le": "0.2", + }, + }, + Values: getTestValueStream(0, 50, 5, testStartTime), + }, + { + Metric: clientmodel.COWMetric{ + Metric: clientmodel.Metric{ + clientmodel.MetricNameLabel: "request_duration_seconds_bucket", + clientmodel.JobLabel: "job1", + "instance": "ins2", + "le": "+Inf", + }, + }, + Values: getTestValueStream(0, 60, 6, testStartTime), + }, + { + Metric: clientmodel.COWMetric{ + Metric: clientmodel.Metric{ + clientmodel.MetricNameLabel: "request_duration_seconds_bucket", + clientmodel.JobLabel: "job2", + "instance": "ins1", + "le": "0.1", + }, + }, + Values: getTestValueStream(0, 30, 3, testStartTime), + }, + { + Metric: clientmodel.COWMetric{ + Metric: clientmodel.Metric{ + clientmodel.MetricNameLabel: "request_duration_seconds_bucket", + clientmodel.JobLabel: "job2", + "instance": "ins1", + "le": "0.2", + }, + }, + Values: getTestValueStream(0, 40, 4, testStartTime), + }, + { + Metric: clientmodel.COWMetric{ + Metric: clientmodel.Metric{ + clientmodel.MetricNameLabel: "request_duration_seconds_bucket", + clientmodel.JobLabel: "job2", + "instance": "ins1", + "le": "+Inf", + }, + }, + Values: getTestValueStream(0, 60, 6, testStartTime), + }, + { + Metric: clientmodel.COWMetric{ + Metric: clientmodel.Metric{ + clientmodel.MetricNameLabel: "request_duration_seconds_bucket", + clientmodel.JobLabel: "job2", + "instance": "ins2", + "le": "0.1", + }, + }, + Values: getTestValueStream(0, 40, 4, testStartTime), + }, + { + Metric: clientmodel.COWMetric{ + Metric: clientmodel.Metric{ + clientmodel.MetricNameLabel: "request_duration_seconds_bucket", + clientmodel.JobLabel: "job2", + "instance": "ins2", + "le": "0.2", + }, + }, + Values: getTestValueStream(0, 70, 7, testStartTime), + }, + { + Metric: clientmodel.COWMetric{ + Metric: clientmodel.Metric{ + clientmodel.MetricNameLabel: "request_duration_seconds_bucket", + clientmodel.JobLabel: "job2", + "instance": "ins2", + "le": "+Inf", + }, + }, + Values: getTestValueStream(0, 90, 9, testStartTime), + }, } var testVector = getTestVectorFromTestMatrix(testMatrix) diff --git a/rules/rules_test.go b/rules/rules_test.go index d25cd2501..9d85dcd10 100644 --- a/rules/rules_test.go +++ b/rules/rules_test.go @@ -15,7 +15,10 @@ package rules import ( "fmt" + "math" "path" + "regexp" + "strconv" "strings" "testing" "time" @@ -32,6 +35,13 @@ import ( var ( testEvalTime = testStartTime.Add(testSampleInterval * 10) fixturesPath = "fixtures" + + reSample = regexp.MustCompile(`^(.*) \=\> (\-?\d+\.?\d*e?\d*|[+-]Inf|NaN) \@\[(\d+)\]$`) + minNormal = math.Float64frombits(0x0010000000000000) // The smallest positive normal value of type float64. +) + +const ( + epsilon = 0.000001 // Relative error allowed for sample values. ) func annotateWithTime(lines []string, timestamp clientmodel.Timestamp) []string { @@ -53,6 +63,51 @@ func vectorComparisonString(expected []string, actual []string) string { separator) } +// samplesAlmostEqual returns true if the two sample lines only differ by a +// small relative error in their sample value. +func samplesAlmostEqual(a, b string) bool { + if a == b { + // Fast path if strings are equal. + return true + } + aMatches := reSample.FindStringSubmatch(a) + if aMatches == nil { + panic(fmt.Errorf("sample %q did not match regular expression", a)) + } + bMatches := reSample.FindStringSubmatch(b) + if bMatches == nil { + panic(fmt.Errorf("sample %q did not match regular expression", b)) + } + if aMatches[1] != bMatches[1] { + return false // Labels don't match. + } + if aMatches[3] != bMatches[3] { + return false // Timestamps don't match. + } + // If we are here, we have the diff in the floats. + // We have to check if they are almost equal. + aVal, err := strconv.ParseFloat(aMatches[2], 64) + if err != nil { + panic(err) + } + bVal, err := strconv.ParseFloat(bMatches[2], 64) + if err != nil { + panic(err) + } + + // Cf. http://floating-point-gui.de/errors/comparison/ + if aVal == bVal { + return true + } + + diff := math.Abs(aVal - bVal) + + if aVal == 0 || bVal == 0 || diff < minNormal { + return diff < epsilon*minNormal + } + return diff/(math.Abs(aVal)+math.Abs(bVal)) < epsilon +} + func newTestStorage(t testing.TB) (storage local.Storage, closer test.Closer) { storage, closer = local.NewTestStorage(t) storeMatrix(storage, testMatrix) @@ -555,6 +610,26 @@ func TestExpressions(t *testing.T) { `x{y="testvalue"} => 100 @[%v]`, `label_grouping_test{a="a", b="abb"} => 200 @[%v]`, `label_grouping_test{a="aa", b="bb"} => 100 @[%v]`, + `testhistogram_bucket{le="0.1", start="positive"} => 50 @[%v]`, + `testhistogram_bucket{le=".2", start="positive"} => 70 @[%v]`, + `testhistogram_bucket{le="1e0", start="positive"} => 110 @[%v]`, + `testhistogram_bucket{le="+Inf", start="positive"} => 120 @[%v]`, + `testhistogram_bucket{le="-.2", start="negative"} => 10 @[%v]`, + `testhistogram_bucket{le="-0.1", start="negative"} => 20 @[%v]`, + `testhistogram_bucket{le="0.3", start="negative"} => 20 @[%v]`, + `testhistogram_bucket{le="+Inf", start="negative"} => 30 @[%v]`, + `request_duration_seconds_bucket{instance="ins1", job="job1", le="0.1"} => 10 @[%v]`, + `request_duration_seconds_bucket{instance="ins1", job="job1", le="0.2"} => 30 @[%v]`, + `request_duration_seconds_bucket{instance="ins1", job="job1", le="+Inf"} => 40 @[%v]`, + `request_duration_seconds_bucket{instance="ins2", job="job1", le="0.1"} => 20 @[%v]`, + `request_duration_seconds_bucket{instance="ins2", job="job1", le="0.2"} => 50 @[%v]`, + `request_duration_seconds_bucket{instance="ins2", job="job1", le="+Inf"} => 60 @[%v]`, + `request_duration_seconds_bucket{instance="ins1", job="job2", le="0.1"} => 30 @[%v]`, + `request_duration_seconds_bucket{instance="ins1", job="job2", le="0.2"} => 40 @[%v]`, + `request_duration_seconds_bucket{instance="ins1", job="job2", le="+Inf"} => 60 @[%v]`, + `request_duration_seconds_bucket{instance="ins2", job="job2", le="0.1"} => 40 @[%v]`, + `request_duration_seconds_bucket{instance="ins2", job="job2", le="0.2"} => 70 @[%v]`, + `request_duration_seconds_bucket{instance="ins2", job="job2", le="+Inf"} => 90 @[%v]`, }, }, { @@ -651,6 +726,182 @@ func TestExpressions(t *testing.T) { `{a="aa", b="bb"} => 100 @[%v]`, }, }, + // Quantile too low. + { + expr: `histogram_quantile(-0.1, testhistogram_bucket)`, + output: []string{ + `{start="positive"} => -Inf @[%v]`, + `{start="negative"} => -Inf @[%v]`, + }, + }, + // Quantile too high. + { + expr: `histogram_quantile(1.01, testhistogram_bucket)`, + output: []string{ + `{start="positive"} => +Inf @[%v]`, + `{start="negative"} => +Inf @[%v]`, + }, + }, + // Quantile value in lowest bucket, which is positive. + { + expr: `histogram_quantile(0, testhistogram_bucket{start="positive"})`, + output: []string{ + `{start="positive"} => 0 @[%v]`, + }, + }, + // Quantile value in lowest bucket, which is negative. + { + expr: `histogram_quantile(0, testhistogram_bucket{start="negative"})`, + output: []string{ + `{start="negative"} => -0.2 @[%v]`, + }, + }, + // Quantile value in highest bucket. + { + expr: `histogram_quantile(1, testhistogram_bucket)`, + output: []string{ + `{start="positive"} => 1 @[%v]`, + `{start="negative"} => 0.3 @[%v]`, + }, + }, + // Finally some useful quantiles. + { + expr: `histogram_quantile(0.2, testhistogram_bucket)`, + output: []string{ + `{start="positive"} => 0.048 @[%v]`, + `{start="negative"} => -0.2 @[%v]`, + }, + }, + { + expr: `histogram_quantile(0.5, testhistogram_bucket)`, + output: []string{ + `{start="positive"} => 0.15 @[%v]`, + `{start="negative"} => -0.15 @[%v]`, + }, + }, + { + expr: `histogram_quantile(0.8, testhistogram_bucket)`, + output: []string{ + `{start="positive"} => 0.72 @[%v]`, + `{start="negative"} => 0.3 @[%v]`, + }, + }, + // More realistic with rates. + { + expr: `histogram_quantile(0.2, rate(testhistogram_bucket[5m]))`, + output: []string{ + `{start="positive"} => 0.048 @[%v]`, + `{start="negative"} => -0.2 @[%v]`, + }, + }, + { + expr: `histogram_quantile(0.5, rate(testhistogram_bucket[5m]))`, + output: []string{ + `{start="positive"} => 0.15 @[%v]`, + `{start="negative"} => -0.15 @[%v]`, + }, + }, + { + expr: `histogram_quantile(0.8, rate(testhistogram_bucket[5m]))`, + output: []string{ + `{start="positive"} => 0.72 @[%v]`, + `{start="negative"} => 0.3 @[%v]`, + }, + }, + // Aggregated histogram: Everything in one. + { + expr: `histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le))`, + output: []string{ + `{} => 0.075 @[%v]`, + }, + }, + { + expr: `histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[5m])) by (le))`, + output: []string{ + `{} => 0.1277777777777778 @[%v]`, + }, + }, + // Aggregated histogram: Everything in one. Now with avg, which does not change anything. + { + expr: `histogram_quantile(0.3, avg(rate(request_duration_seconds_bucket[5m])) by (le))`, + output: []string{ + `{} => 0.075 @[%v]`, + }, + }, + { + expr: `histogram_quantile(0.5, avg(rate(request_duration_seconds_bucket[5m])) by (le))`, + output: []string{ + `{} => 0.12777777777777778 @[%v]`, + }, + }, + // Aggregated histogram: By job. + { + expr: `histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le, instance))`, + output: []string{ + `{instance="ins1"} => 0.075 @[%v]`, + `{instance="ins2"} => 0.075 @[%v]`, + }, + }, + { + expr: `histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[5m])) by (le, instance))`, + output: []string{ + `{instance="ins1"} => 0.1333333333 @[%v]`, + `{instance="ins2"} => 0.125 @[%v]`, + }, + }, + // Aggregated histogram: By instance. + { + expr: `histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le, job))`, + output: []string{ + `{job="job1"} => 0.1 @[%v]`, + `{job="job2"} => 0.0642857142857143 @[%v]`, + }, + }, + { + expr: `histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[5m])) by (le, job))`, + output: []string{ + `{job="job1"} => 0.14 @[%v]`, + `{job="job2"} => 0.1125 @[%v]`, + }, + }, + // Aggregated histogram: By job and instance. + { + expr: `histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le, job, instance))`, + output: []string{ + `{instance="ins1", job="job1"} => 0.11 @[%v]`, + `{instance="ins2", job="job1"} => 0.09 @[%v]`, + `{instance="ins1", job="job2"} => 0.06 @[%v]`, + `{instance="ins2", job="job2"} => 0.0675 @[%v]`, + }, + }, + { + expr: `histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[5m])) by (le, job, instance))`, + output: []string{ + `{instance="ins1", job="job1"} => 0.15 @[%v]`, + `{instance="ins2", job="job1"} => 0.1333333333333333 @[%v]`, + `{instance="ins1", job="job2"} => 0.1 @[%v]`, + `{instance="ins2", job="job2"} => 0.1166666666666667 @[%v]`, + }, + }, + // The unaggregated histogram for comparison. Same result as the previous one. + { + expr: `histogram_quantile(0.3, rate(request_duration_seconds_bucket[5m]))`, + output: []string{ + `{instance="ins1", job="job1"} => 0.11 @[%v]`, + `{instance="ins2", job="job1"} => 0.09 @[%v]`, + `{instance="ins1", job="job2"} => 0.06 @[%v]`, + `{instance="ins2", job="job2"} => 0.0675 @[%v]`, + }, + }, + { + expr: `histogram_quantile(0.5, rate(request_duration_seconds_bucket[5m]))`, + output: []string{ + `{instance="ins1", job="job1"} => 0.15 @[%v]`, + `{instance="ins2", job="job1"} => 0.13333333333333333 @[%v]`, + `{instance="ins1", job="job2"} => 0.1 @[%v]`, + `{instance="ins2", job="job2"} => 0.11666666666666667 @[%v]`, + }, + }, } storage, closer := newTestStorage(t) @@ -691,7 +942,7 @@ func TestExpressions(t *testing.T) { for j, expectedSample := range expectedLines { found := false for _, actualSample := range resultLines { - if actualSample == expectedSample { + if samplesAlmostEqual(actualSample, expectedSample) { found = true } } From 17443d288bcf7b0d704ccc3ad1759fd938a8e433 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Sun, 22 Feb 2015 00:25:02 +0100 Subject: [PATCH 2/4] Avoid copying of the COWMetric if we already have the metric available. --- rules/ast/functions.go | 7 +++---- rules/ast/quantile.go | 45 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/rules/ast/functions.go b/rules/ast/functions.go index 0c5fcafd9..3a207b976 100644 --- a/rules/ast/functions.go +++ b/rules/ast/functions.go @@ -514,12 +514,11 @@ func histogramQuantileImpl(timestamp clientmodel.Timestamp, args []Node) interfa // TODO(beorn7): Issue a warning somehow. continue } - // TODO avoid copying each time by using a custom fingerprint - el.Metric.Delete(clientmodel.BucketLabel) - el.Metric.Delete(clientmodel.MetricNameLabel) - fp := el.Metric.Metric.Fingerprint() + fp := bucketFingerprint(el.Metric.Metric) mb, ok := fpToMetricWithBuckets[fp] if !ok { + el.Metric.Delete(clientmodel.BucketLabel) + el.Metric.Delete(clientmodel.MetricNameLabel) mb = &metricWithBuckets{el.Metric, nil} fpToMetricWithBuckets[fp] = mb } diff --git a/rules/ast/quantile.go b/rules/ast/quantile.go index f38313313..0f628153f 100644 --- a/rules/ast/quantile.go +++ b/rules/ast/quantile.go @@ -14,6 +14,8 @@ package ast import ( + "encoding/binary" + "hash/fnv" "math" "sort" @@ -97,3 +99,46 @@ func quantile(q clientmodel.SampleValue, buckets buckets) float64 { } return bucketStart + (bucketEnd-bucketStart)*float64(rank/count) } + +// bucketFingerprint works like the Fingerprint method of Metric, but ignores +// the name and the bucket label. +func bucketFingerprint(m clientmodel.Metric) clientmodel.Fingerprint { + numLabels := 0 + if len(m) > 2 { + numLabels = len(m) - 2 + } + labelNames := make([]string, 0, numLabels) + maxLength := 0 + + for labelName, labelValue := range m { + if labelName == clientmodel.MetricNameLabel || labelName == clientmodel.BucketLabel { + continue + } + labelNames = append(labelNames, string(labelName)) + if len(labelName) > maxLength { + maxLength = len(labelName) + } + if len(labelValue) > maxLength { + maxLength = len(labelValue) + } + } + + sort.Strings(labelNames) + + summer := fnv.New64a() + buf := make([]byte, maxLength) + + for _, labelName := range labelNames { + labelValue := m[clientmodel.LabelName(labelName)] + + copy(buf, labelName) + summer.Write(buf[:len(labelName)]) + summer.Write([]byte{clientmodel.SeparatorByte}) + + copy(buf, labelValue) + summer.Write(buf[:len(labelValue)]) + summer.Write([]byte{clientmodel.SeparatorByte}) + } + + return clientmodel.Fingerprint(binary.LittleEndian.Uint64(summer.Sum(nil))) +} From 9827e1ccec54810874899759ca3522146982caa4 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Sun, 22 Feb 2015 00:39:50 +0100 Subject: [PATCH 3/4] Update vendoring of client_golang. --- Godeps/Godeps.json | 24 +-- .../extraction/metricfamilyprocessor.go | 139 ++++++++++-------- .../client_golang/model/labelname.go | 8 + .../prometheus/benchmark_test.go | 28 ++++ .../client_golang/prometheus/counter.go | 2 +- .../client_golang/prometheus/examples_test.go | 52 ++++++- .../client_golang/prometheus/http.go | 2 +- .../client_golang/prometheus/registry.go | 4 +- .../client_golang/prometheus/summary.go | 39 +++-- .../client_golang/prometheus/summary_test.go | 8 + .../prometheus/client_golang/text/create.go | 23 ++- .../client_golang/text/create_test.go | 44 ++++++ .../prometheus/client_golang/text/parse.go | 8 +- 13 files changed, 289 insertions(+), 92 deletions(-) diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json index 078e4b4b1..dfc8c581e 100644 --- a/Godeps/Godeps.json +++ b/Godeps/Godeps.json @@ -20,33 +20,33 @@ }, { "ImportPath": "github.com/prometheus/client_golang/_vendor/goautoneg", - "Comment": "0.1.0-24-g4627d59", - "Rev": "4627d59e8a09c330c5ccfe7414baca28d8df847d" + "Comment": "0.1.0-37-g54238de", + "Rev": "54238dea8fa4dfaa411b8c9ef0680ba441fb7b1c" }, { "ImportPath": "github.com/prometheus/client_golang/_vendor/perks/quantile", - "Comment": "0.1.0-24-g4627d59", - "Rev": "4627d59e8a09c330c5ccfe7414baca28d8df847d" + "Comment": "0.1.0-37-g54238de", + "Rev": "54238dea8fa4dfaa411b8c9ef0680ba441fb7b1c" }, { "ImportPath": "github.com/prometheus/client_golang/extraction", - "Comment": "0.1.0-24-g4627d59", - "Rev": "4627d59e8a09c330c5ccfe7414baca28d8df847d" + "Comment": "0.1.0-37-g54238de", + "Rev": "54238dea8fa4dfaa411b8c9ef0680ba441fb7b1c" }, { "ImportPath": "github.com/prometheus/client_golang/model", - "Comment": "0.1.0-24-g4627d59", - "Rev": "4627d59e8a09c330c5ccfe7414baca28d8df847d" + "Comment": "0.1.0-37-g54238de", + "Rev": "54238dea8fa4dfaa411b8c9ef0680ba441fb7b1c" }, { "ImportPath": "github.com/prometheus/client_golang/prometheus", - "Comment": "0.1.0-24-g4627d59", - "Rev": "4627d59e8a09c330c5ccfe7414baca28d8df847d" + "Comment": "0.1.0-37-g54238de", + "Rev": "54238dea8fa4dfaa411b8c9ef0680ba441fb7b1c" }, { "ImportPath": "github.com/prometheus/client_golang/text", - "Comment": "0.1.0-24-g4627d59", - "Rev": "4627d59e8a09c330c5ccfe7414baca28d8df847d" + "Comment": "0.1.0-37-g54238de", + "Rev": "54238dea8fa4dfaa411b8c9ef0680ba441fb7b1c" }, { "ImportPath": "github.com/prometheus/client_model/go", diff --git a/Godeps/_workspace/src/github.com/prometheus/client_golang/extraction/metricfamilyprocessor.go b/Godeps/_workspace/src/github.com/prometheus/client_golang/extraction/metricfamilyprocessor.go index 5ed534348..5edb49c24 100644 --- a/Godeps/_workspace/src/github.com/prometheus/client_golang/extraction/metricfamilyprocessor.go +++ b/Godeps/_workspace/src/github.com/prometheus/client_golang/extraction/metricfamilyprocessor.go @@ -16,6 +16,7 @@ package extraction import ( "fmt" "io" + "math" dto "github.com/prometheus/client_model/go" @@ -85,7 +86,10 @@ func extractCounter(out Ingester, o *ProcessOptions, f *dto.MetricFamily) error continue } - sample := new(model.Sample) + sample := &model.Sample{ + Metric: model.Metric{}, + Value: model.SampleValue(m.Counter.GetValue()), + } samples = append(samples, sample) if m.TimestampMs != nil { @@ -93,16 +97,12 @@ func extractCounter(out Ingester, o *ProcessOptions, f *dto.MetricFamily) error } else { sample.Timestamp = o.Timestamp } - sample.Metric = model.Metric{} - metric := sample.Metric + metric := sample.Metric for _, p := range m.Label { metric[model.LabelName(p.GetName())] = model.LabelValue(p.GetValue()) } - metric[model.MetricNameLabel] = model.LabelValue(f.GetName()) - - sample.Value = model.SampleValue(m.Counter.GetValue()) } return out.Ingest(samples) @@ -116,7 +116,10 @@ func extractGauge(out Ingester, o *ProcessOptions, f *dto.MetricFamily) error { continue } - sample := new(model.Sample) + sample := &model.Sample{ + Metric: model.Metric{}, + Value: model.SampleValue(m.Gauge.GetValue()), + } samples = append(samples, sample) if m.TimestampMs != nil { @@ -124,16 +127,12 @@ func extractGauge(out Ingester, o *ProcessOptions, f *dto.MetricFamily) error { } else { sample.Timestamp = o.Timestamp } - sample.Metric = model.Metric{} - metric := sample.Metric + metric := sample.Metric for _, p := range m.Label { metric[model.LabelName(p.GetName())] = model.LabelValue(p.GetValue()) } - metric[model.MetricNameLabel] = model.LabelValue(f.GetName()) - - sample.Value = model.SampleValue(m.Gauge.GetValue()) } return out.Ingest(samples) @@ -153,48 +152,50 @@ func extractSummary(out Ingester, o *ProcessOptions, f *dto.MetricFamily) error } for _, q := range m.Summary.Quantile { - sample := new(model.Sample) + sample := &model.Sample{ + Metric: model.Metric{}, + Value: model.SampleValue(q.GetValue()), + Timestamp: timestamp, + } samples = append(samples, sample) - sample.Timestamp = timestamp - sample.Metric = model.Metric{} metric := sample.Metric - for _, p := range m.Label { metric[model.LabelName(p.GetName())] = model.LabelValue(p.GetValue()) } // BUG(matt): Update other names to "quantile". - metric[model.LabelName("quantile")] = model.LabelValue(fmt.Sprint(q.GetQuantile())) - + metric[model.LabelName(model.QuantileLabel)] = model.LabelValue(fmt.Sprint(q.GetQuantile())) metric[model.MetricNameLabel] = model.LabelValue(f.GetName()) - - sample.Value = model.SampleValue(q.GetValue()) } if m.Summary.SampleSum != nil { - sum := new(model.Sample) - sum.Timestamp = timestamp - metric := model.Metric{} + sum := &model.Sample{ + Metric: model.Metric{}, + Value: model.SampleValue(m.Summary.GetSampleSum()), + Timestamp: timestamp, + } + samples = append(samples, sum) + + metric := sum.Metric for _, p := range m.Label { metric[model.LabelName(p.GetName())] = model.LabelValue(p.GetValue()) } metric[model.MetricNameLabel] = model.LabelValue(f.GetName() + "_sum") - sum.Metric = metric - sum.Value = model.SampleValue(m.Summary.GetSampleSum()) - samples = append(samples, sum) } if m.Summary.SampleCount != nil { - count := new(model.Sample) - count.Timestamp = timestamp - metric := model.Metric{} + count := &model.Sample{ + Metric: model.Metric{}, + Value: model.SampleValue(m.Summary.GetSampleCount()), + Timestamp: timestamp, + } + samples = append(samples, count) + + metric := count.Metric for _, p := range m.Label { metric[model.LabelName(p.GetName())] = model.LabelValue(p.GetValue()) } metric[model.MetricNameLabel] = model.LabelValue(f.GetName() + "_count") - count.Metric = metric - count.Value = model.SampleValue(m.Summary.GetSampleCount()) - samples = append(samples, count) } } @@ -209,7 +210,10 @@ func extractUntyped(out Ingester, o *ProcessOptions, f *dto.MetricFamily) error continue } - sample := new(model.Sample) + sample := &model.Sample{ + Metric: model.Metric{}, + Value: model.SampleValue(m.Untyped.GetValue()), + } samples = append(samples, sample) if m.TimestampMs != nil { @@ -217,16 +221,12 @@ func extractUntyped(out Ingester, o *ProcessOptions, f *dto.MetricFamily) error } else { sample.Timestamp = o.Timestamp } - sample.Metric = model.Metric{} - metric := sample.Metric + metric := sample.Metric for _, p := range m.Label { metric[model.LabelName(p.GetName())] = model.LabelValue(p.GetValue()) } - metric[model.MetricNameLabel] = model.LabelValue(f.GetName()) - - sample.Value = model.SampleValue(m.Untyped.GetValue()) } return out.Ingest(samples) @@ -245,49 +245,72 @@ func extractHistogram(out Ingester, o *ProcessOptions, f *dto.MetricFamily) erro timestamp = model.TimestampFromUnixNano(*m.TimestampMs * 1000000) } + infSeen := false + for _, q := range m.Histogram.Bucket { - sample := new(model.Sample) + sample := &model.Sample{ + Metric: model.Metric{}, + Value: model.SampleValue(q.GetCumulativeCount()), + Timestamp: timestamp, + } samples = append(samples, sample) - sample.Timestamp = timestamp - sample.Metric = model.Metric{} metric := sample.Metric - for _, p := range m.Label { metric[model.LabelName(p.GetName())] = model.LabelValue(p.GetValue()) } - metric[model.LabelName("le")] = model.LabelValue(fmt.Sprint(q.GetUpperBound())) - + metric[model.LabelName(model.BucketLabel)] = model.LabelValue(fmt.Sprint(q.GetUpperBound())) metric[model.MetricNameLabel] = model.LabelValue(f.GetName() + "_bucket") - sample.Value = model.SampleValue(q.GetCumulativeCount()) + if math.IsInf(q.GetUpperBound(), +1) { + infSeen = true + } } - // TODO: If +Inf bucket is missing, add it. if m.Histogram.SampleSum != nil { - sum := new(model.Sample) - sum.Timestamp = timestamp - metric := model.Metric{} + sum := &model.Sample{ + Metric: model.Metric{}, + Value: model.SampleValue(m.Histogram.GetSampleSum()), + Timestamp: timestamp, + } + samples = append(samples, sum) + + metric := sum.Metric for _, p := range m.Label { metric[model.LabelName(p.GetName())] = model.LabelValue(p.GetValue()) } metric[model.MetricNameLabel] = model.LabelValue(f.GetName() + "_sum") - sum.Metric = metric - sum.Value = model.SampleValue(m.Histogram.GetSampleSum()) - samples = append(samples, sum) } if m.Histogram.SampleCount != nil { - count := new(model.Sample) - count.Timestamp = timestamp - metric := model.Metric{} + count := &model.Sample{ + Metric: model.Metric{}, + Value: model.SampleValue(m.Histogram.GetSampleCount()), + Timestamp: timestamp, + } + samples = append(samples, count) + + metric := count.Metric for _, p := range m.Label { metric[model.LabelName(p.GetName())] = model.LabelValue(p.GetValue()) } metric[model.MetricNameLabel] = model.LabelValue(f.GetName() + "_count") - count.Metric = metric - count.Value = model.SampleValue(m.Histogram.GetSampleCount()) - samples = append(samples, count) + + if !infSeen { + infBucket := &model.Sample{ + Metric: model.Metric{}, + Value: count.Value, + Timestamp: timestamp, + } + samples = append(samples, infBucket) + + metric := infBucket.Metric + for _, p := range m.Label { + metric[model.LabelName(p.GetName())] = model.LabelValue(p.GetValue()) + } + metric[model.LabelName(model.BucketLabel)] = model.LabelValue("+Inf") + metric[model.MetricNameLabel] = model.LabelValue(f.GetName() + "_bucket") + } } } diff --git a/Godeps/_workspace/src/github.com/prometheus/client_golang/model/labelname.go b/Godeps/_workspace/src/github.com/prometheus/client_golang/model/labelname.go index 047e75655..75b2e79da 100644 --- a/Godeps/_workspace/src/github.com/prometheus/client_golang/model/labelname.go +++ b/Godeps/_workspace/src/github.com/prometheus/client_golang/model/labelname.go @@ -33,6 +33,14 @@ const ( // JobLabel is the label name indicating the job from which a timeseries // was scraped. JobLabel LabelName = "job" + + // BucketLabel is used for the label that defines the upper bound of a + // bucket of a histogram ("le" -> "less or equal"). + BucketLabel = "le" + + // QuantileLabel is used for the label that defines the quantile in a + // summary. + QuantileLabel = "quantile" ) // A LabelName is a key for a LabelSet or Metric. It has a value associated diff --git a/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/benchmark_test.go b/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/benchmark_test.go index d43a857e6..6ae7333fc 100644 --- a/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/benchmark_test.go +++ b/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/benchmark_test.go @@ -129,3 +129,31 @@ func BenchmarkSummaryNoLabels(b *testing.B) { m.Observe(3.1415) } } + +func BenchmarkHistogramWithLabelValues(b *testing.B) { + m := NewHistogramVec( + HistogramOpts{ + Name: "benchmark_histogram", + Help: "A histogram to benchmark it.", + }, + []string{"one", "two", "three"}, + ) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + m.WithLabelValues("eins", "zwei", "drei").Observe(3.1415) + } +} + +func BenchmarkHistogramNoLabels(b *testing.B) { + m := NewHistogram(HistogramOpts{ + Name: "benchmark_histogram", + Help: "A histogram to benchmark it.", + }, + ) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + m.Observe(3.1415) + } +} diff --git a/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/counter.go b/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/counter.go index d715ee0bb..f8d633fbd 100644 --- a/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/counter.go +++ b/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/counter.go @@ -74,7 +74,7 @@ func (c *counter) Add(v float64) { // CounterVec is a Collector that bundles a set of Counters that all share the // same Desc, but have different values for their variable labels. This is used // if you want to count the same thing partitioned by various dimensions -// (e.g. number of http requests, partitioned by response code and +// (e.g. number of HTTP requests, partitioned by response code and // method). Create instances with NewCounterVec. // // CounterVec embeds MetricVec. See there for a full list of methods with diff --git a/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/examples_test.go b/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/examples_test.go index 78b1b81f2..5e62967b0 100644 --- a/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/examples_test.go +++ b/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/examples_test.go @@ -129,7 +129,7 @@ func ExampleCounterVec() { httpReqs := prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "http_requests_total", - Help: "How many HTTP requests processed, partitioned by status code and http method.", + Help: "How many HTTP requests processed, partitioned by status code and HTTP method.", ConstLabels: prometheus.Labels{"env": *binaryVersion}, }, []string{"code", "method"}, @@ -200,7 +200,7 @@ func ExampleRegister() { fmt.Println("taskCounter registered.") } // Don't forget to tell the HTTP server about the Prometheus handler. - // (In a real program, you still need to start the http server...) + // (In a real program, you still need to start the HTTP server...) http.Handle("/metrics", prometheus.Handler()) // Now you can start workers and give every one of them a pointer to @@ -240,7 +240,7 @@ func ExampleRegister() { // Prometheus will not allow you to ever export metrics with // inconsistent help strings or label names. After unregistering, the - // unregistered metrics will cease to show up in the /metrics http + // unregistered metrics will cease to show up in the /metrics HTTP // response, but the registry still remembers that those metrics had // been exported before. For this example, we will now choose a // different name. (In a real program, you would obviously not export @@ -452,3 +452,49 @@ func ExampleSummaryVec() { // > // ] } + +func ExampleHistogram() { + temps := prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: "pond_temperature_celsius", + Help: "The temperature of the frog pond.", // Sorry, we can't measure how badly it smells. + Buckets: prometheus.LinearBuckets(20, 5, 5), // 5 buckets, each 5 centigrade wide. + }) + + // Simulate some observations. + for i := 0; i < 1000; i++ { + temps.Observe(30 + math.Floor(120*math.Sin(float64(i)*0.1))/10) + } + + // Just for demonstration, let's check the state of the histogram by + // (ab)using its Write method (which is usually only used by Prometheus + // internally). + metric := &dto.Metric{} + temps.Write(metric) + fmt.Println(proto.MarshalTextString(metric)) + + // Output: + // histogram: < + // sample_count: 1000 + // sample_sum: 29969.50000000001 + // bucket: < + // cumulative_count: 192 + // upper_bound: 20 + // > + // bucket: < + // cumulative_count: 366 + // upper_bound: 25 + // > + // bucket: < + // cumulative_count: 501 + // upper_bound: 30 + // > + // bucket: < + // cumulative_count: 638 + // upper_bound: 35 + // > + // bucket: < + // cumulative_count: 816 + // upper_bound: 40 + // > + // > +} diff --git a/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/http.go b/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/http.go index 818c90fb6..dac92fd90 100644 --- a/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/http.go +++ b/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/http.go @@ -47,7 +47,7 @@ func nowSeries(t ...time.Time) nower { } // InstrumentHandler wraps the given HTTP handler for instrumentation. It -// registers four metric collectors (if not already done) and reports http +// registers four metric collectors (if not already done) and reports HTTP // metrics to the (newly or already) registered collectors: http_requests_total // (CounterVec), http_request_duration_microseconds (Summary), // http_request_size_bytes (Summary), http_response_size_bytes (Summary). Each diff --git a/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/registry.go b/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/registry.go index 505a4aa81..6709f2578 100644 --- a/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/registry.go +++ b/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/registry.go @@ -171,7 +171,7 @@ func SetMetricFamilyInjectionHook(hook func() []*dto.MetricFamily) { } // PanicOnCollectError sets the behavior whether a panic is caused upon an error -// while metrics are collected and served to the http endpoint. By default, an +// while metrics are collected and served to the HTTP endpoint. By default, an // internal server error (status code 500) is served with an error message. func PanicOnCollectError(b bool) { defRegistry.panicOnCollectError = b @@ -464,6 +464,8 @@ func (r *registry) writePB(w io.Writer, writeEncoded encoder) (int, error) { metricFamily.Type = dto.MetricType_SUMMARY.Enum() case dtoMetric.Untyped != nil: metricFamily.Type = dto.MetricType_UNTYPED.Enum() + case dtoMetric.Histogram != nil: + metricFamily.Type = dto.MetricType_HISTOGRAM.Enum() default: return 0, fmt.Errorf("empty metric collected: %s", dtoMetric) } diff --git a/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/summary.go b/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/summary.go index 5d8b82f59..93e7b6bb1 100644 --- a/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/summary.go +++ b/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/summary.go @@ -25,6 +25,7 @@ import ( dto "github.com/prometheus/client_model/go" "github.com/prometheus/client_golang/_vendor/perks/quantile" + "github.com/prometheus/client_golang/model" ) // A Summary captures individual observations from an event or sample stream and @@ -35,6 +36,12 @@ import ( // Summary provides the median, the 90th and the 99th percentile of the latency // as rank estimations. // +// Note that the rank estimations cannot be aggregated in a meaningful way with +// the Prometheus query language (i.e. you cannot average or add them). If you +// need aggregatable quantiles (e.g. you want the 99th percentile latency of all +// queries served across all instances of a service), consider the Histogram +// metric type. See the Prometheus documentation for more details. +// // To create Summary instances, use NewSummary. type Summary interface { Metric @@ -44,9 +51,13 @@ type Summary interface { Observe(float64) } -// DefObjectives are the default Summary quantile values. var ( + // DefObjectives are the default Summary quantile values. DefObjectives = map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001} + + errQuantileLabelNotAllowed = fmt.Errorf( + "%q is not allowed as label name in summaries", model.QuantileLabel, + ) ) // Default values for SummaryOpts. @@ -110,7 +121,10 @@ type SummaryOpts struct { // AgeBuckets is the number of buckets used to exclude observations that // are older than MaxAge from the summary. A higher number has a // resource penalty, so only increase it if the higher resolution is - // really required. The default value is DefAgeBuckets. + // really required. For very high observation rates, you might want to + // reduce the number of age buckets. With only one age bucket, you will + // effectively see a complete reset of the summary each time MaxAge has + // passed. The default value is DefAgeBuckets. AgeBuckets uint32 // BufCap defines the default sample stream buffer size. The default @@ -119,10 +133,6 @@ type SummaryOpts struct { // is the internal buffer size of the underlying package // "github.com/bmizerany/perks/quantile"). BufCap uint32 - - // Epsilon is the error epsilon for the quantile rank estimate. Must be - // positive. The default is DefEpsilon. - Epsilon float64 } // TODO: Great fuck-up with the sliding-window decay algorithm... The Merge @@ -158,6 +168,17 @@ func newSummary(desc *Desc, opts SummaryOpts, labelValues ...string) Summary { panic(errInconsistentCardinality) } + for _, n := range desc.variableLabels { + if n == model.QuantileLabel { + panic(errQuantileLabelNotAllowed) + } + } + for _, lp := range desc.constLabelPairs { + if lp.GetName() == model.QuantileLabel { + panic(errQuantileLabelNotAllowed) + } + } + if len(opts.Objectives) == 0 { opts.Objectives = DefObjectives } @@ -358,7 +379,7 @@ func (s quantSort) Less(i, j int) bool { // SummaryVec is a Collector that bundles a set of Summaries that all share the // same Desc, but have different values for their variable labels. This is used // if you want to count the same thing partitioned by various dimensions -// (e.g. http request latencies, partitioned by status code and method). Create +// (e.g. HTTP request latencies, partitioned by status code and method). Create // instances with NewSummaryVec. type SummaryVec struct { MetricVec @@ -411,14 +432,14 @@ func (m *SummaryVec) GetMetricWith(labels Labels) (Summary, error) { // WithLabelValues works as GetMetricWithLabelValues, but panics where // GetMetricWithLabelValues would have returned an error. By not returning an // error, WithLabelValues allows shortcuts like -// myVec.WithLabelValues("404", "GET").Add(42) +// myVec.WithLabelValues("404", "GET").Observe(42.21) func (m *SummaryVec) WithLabelValues(lvs ...string) Summary { return m.MetricVec.WithLabelValues(lvs...).(Summary) } // With works as GetMetricWith, but panics where GetMetricWithLabels would have // returned an error. By not returning an error, With allows shortcuts like -// myVec.With(Labels{"code": "404", "method": "GET"}).Add(42) +// myVec.With(Labels{"code": "404", "method": "GET"}).Observe(42.21) func (m *SummaryVec) With(labels Labels) Summary { return m.MetricVec.With(labels).(Summary) } diff --git a/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/summary_test.go b/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/summary_test.go index 3f83796ea..40d05fae5 100644 --- a/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/summary_test.go +++ b/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus/summary_test.go @@ -120,6 +120,10 @@ func BenchmarkSummaryWrite8(b *testing.B) { } func TestSummaryConcurrency(t *testing.T) { + if testing.Short() { + t.Skip("Skipping test in short mode.") + } + rand.Seed(42) it := func(n uint32) bool { @@ -195,6 +199,10 @@ func TestSummaryConcurrency(t *testing.T) { } func TestSummaryVecConcurrency(t *testing.T) { + if testing.Short() { + t.Skip("Skipping test in short mode.") + } + rand.Seed(42) objectives := make([]float64, 0, len(DefObjectives)) diff --git a/Godeps/_workspace/src/github.com/prometheus/client_golang/text/create.go b/Godeps/_workspace/src/github.com/prometheus/client_golang/text/create.go index 64e372c0c..443045994 100644 --- a/Godeps/_workspace/src/github.com/prometheus/client_golang/text/create.go +++ b/Godeps/_workspace/src/github.com/prometheus/client_golang/text/create.go @@ -24,8 +24,10 @@ import ( "bytes" "fmt" "io" + "math" "strings" + "github.com/prometheus/client_golang/model" dto "github.com/prometheus/client_model/go" ) @@ -116,7 +118,7 @@ func MetricFamilyToText(out io.Writer, in *dto.MetricFamily) (int, error) { for _, q := range metric.Summary.Quantile { n, err = writeSample( name, metric, - "quantile", fmt.Sprint(q.GetQuantile()), + model.QuantileLabel, fmt.Sprint(q.GetQuantile()), q.GetValue(), out, ) @@ -145,10 +147,11 @@ func MetricFamilyToText(out io.Writer, in *dto.MetricFamily) (int, error) { "expected summary in metric %s", metric, ) } + infSeen := false for _, q := range metric.Histogram.Bucket { n, err = writeSample( name+"_bucket", metric, - "le", fmt.Sprint(q.GetUpperBound()), + model.BucketLabel, fmt.Sprint(q.GetUpperBound()), float64(q.GetCumulativeCount()), out, ) @@ -156,7 +159,21 @@ func MetricFamilyToText(out io.Writer, in *dto.MetricFamily) (int, error) { if err != nil { return written, err } - // TODO: Add +inf bucket if it's missing. + if math.IsInf(q.GetUpperBound(), +1) { + infSeen = true + } + } + if !infSeen { + n, err = writeSample( + name+"_bucket", metric, + model.BucketLabel, "+Inf", + float64(metric.Histogram.GetSampleCount()), + out, + ) + if err != nil { + return written, err + } + written += n } n, err = writeSample( name+"_sum", metric, "", "", diff --git a/Godeps/_workspace/src/github.com/prometheus/client_golang/text/create_test.go b/Godeps/_workspace/src/github.com/prometheus/client_golang/text/create_test.go index 9326e6e5b..fe938de80 100644 --- a/Godeps/_workspace/src/github.com/prometheus/client_golang/text/create_test.go +++ b/Godeps/_workspace/src/github.com/prometheus/client_golang/text/create_test.go @@ -267,6 +267,50 @@ request_duration_microseconds_bucket{le="172.8"} 1524 request_duration_microseconds_bucket{le="+Inf"} 2693 request_duration_microseconds_sum 1.7560473e+06 request_duration_microseconds_count 2693 +`, + }, + // 5: Histogram with missing +Inf bucket. + { + in: &dto.MetricFamily{ + Name: proto.String("request_duration_microseconds"), + Help: proto.String("The response latency."), + Type: dto.MetricType_HISTOGRAM.Enum(), + Metric: []*dto.Metric{ + &dto.Metric{ + Histogram: &dto.Histogram{ + SampleCount: proto.Uint64(2693), + SampleSum: proto.Float64(1756047.3), + Bucket: []*dto.Bucket{ + &dto.Bucket{ + UpperBound: proto.Float64(100), + CumulativeCount: proto.Uint64(123), + }, + &dto.Bucket{ + UpperBound: proto.Float64(120), + CumulativeCount: proto.Uint64(412), + }, + &dto.Bucket{ + UpperBound: proto.Float64(144), + CumulativeCount: proto.Uint64(592), + }, + &dto.Bucket{ + UpperBound: proto.Float64(172.8), + CumulativeCount: proto.Uint64(1524), + }, + }, + }, + }, + }, + }, + out: `# HELP request_duration_microseconds The response latency. +# TYPE request_duration_microseconds histogram +request_duration_microseconds_bucket{le="100"} 123 +request_duration_microseconds_bucket{le="120"} 412 +request_duration_microseconds_bucket{le="144"} 592 +request_duration_microseconds_bucket{le="172.8"} 1524 +request_duration_microseconds_bucket{le="+Inf"} 2693 +request_duration_microseconds_sum 1.7560473e+06 +request_duration_microseconds_count 2693 `, }, } diff --git a/Godeps/_workspace/src/github.com/prometheus/client_golang/text/parse.go b/Godeps/_workspace/src/github.com/prometheus/client_golang/text/parse.go index eaff59274..e317d6850 100644 --- a/Godeps/_workspace/src/github.com/prometheus/client_golang/text/parse.go +++ b/Godeps/_workspace/src/github.com/prometheus/client_golang/text/parse.go @@ -274,8 +274,8 @@ func (p *Parser) startLabelName() stateFn { } // Special summary/histogram treatment. Don't add 'quantile' and 'le' // labels to 'real' labels. - if !(p.currentMF.GetType() == dto.MetricType_SUMMARY && p.currentLabelPair.GetName() == "quantile") && - !(p.currentMF.GetType() == dto.MetricType_HISTOGRAM && p.currentLabelPair.GetName() == "le") { + if !(p.currentMF.GetType() == dto.MetricType_SUMMARY && p.currentLabelPair.GetName() == model.QuantileLabel) && + !(p.currentMF.GetType() == dto.MetricType_HISTOGRAM && p.currentLabelPair.GetName() == model.BucketLabel) { p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPair) } if p.skipBlankTabIfCurrentBlankTab(); p.err != nil { @@ -306,7 +306,7 @@ func (p *Parser) startLabelValue() stateFn { // - Quantile labels are special, will result in dto.Quantile later. // - Other labels have to be added to currentLabels for signature calculation. if p.currentMF.GetType() == dto.MetricType_SUMMARY { - if p.currentLabelPair.GetName() == "quantile" { + if p.currentLabelPair.GetName() == model.QuantileLabel { if p.currentQuantile, p.err = strconv.ParseFloat(p.currentLabelPair.GetValue(), 64); p.err != nil { // Create a more helpful error message. p.parseError(fmt.Sprintf("expected float as value for 'quantile' label, got %q", p.currentLabelPair.GetValue())) @@ -318,7 +318,7 @@ func (p *Parser) startLabelValue() stateFn { } // Similar special treatment of histograms. if p.currentMF.GetType() == dto.MetricType_HISTOGRAM { - if p.currentLabelPair.GetName() == "le" { + if p.currentLabelPair.GetName() == model.BucketLabel { if p.currentBucket, p.err = strconv.ParseFloat(p.currentLabelPair.GetValue(), 64); p.err != nil { // Create a more helpful error message. p.parseError(fmt.Sprintf("expected float as value for 'le' label, got %q", p.currentLabelPair.GetValue())) From 1a61bcae07d5e4c7fcdac53053f917f04d424a72 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Mon, 23 Feb 2015 15:29:26 +0100 Subject: [PATCH 4/4] Fix plural of 'histogram'. Actually, 'histogram' is Ancient Greek and 3rd declension... ;-) --- rules/helpers_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules/helpers_test.go b/rules/helpers_test.go index eb22ea53f..fe5b81921 100644 --- a/rules/helpers_test.go +++ b/rules/helpers_test.go @@ -205,7 +205,7 @@ var testMatrix = ast.Matrix{ }, Values: getTestValueStream(0, 200, 20, testStartTime), }, - // Two histogram with 4 buckets each (*_sum and *_count not included, + // Two histograms with 4 buckets each (*_sum and *_count not included, // only buckets). Lowest bucket for one histogram < 0, for the other > // 0. They have the same name, just separated by label. Not useful in // practice, but can happen (if clients change bucketing), and the