mirror of
https://github.com/prometheus/prometheus.git
synced 2025-01-12 14:27:27 -08:00
Merge pull request #14677 from prometheus/beorn7/histogram
promql(native histograms): Introduce exponential interpolation
This commit is contained in:
commit
df9916ef66
|
@ -326,45 +326,70 @@ With native histograms, aggregating everything works as usual without any `by` c
|
|||
|
||||
histogram_quantile(0.9, sum(rate(http_request_duration_seconds[10m])))
|
||||
|
||||
The `histogram_quantile()` function interpolates quantile values by
|
||||
assuming a linear distribution within a bucket.
|
||||
In the (common) case that a quantile value does not coincide with a bucket
|
||||
boundary, the `histogram_quantile()` function interpolates the quantile value
|
||||
within the bucket the quantile value falls into. For classic histograms, for
|
||||
native histograms with custom bucket boundaries, and for the zero bucket of
|
||||
other native histograms, it assumes a uniform distribution of observations
|
||||
within the bucket (also called _linear interpolation_). For the
|
||||
non-zero-buckets of native histograms with a standard exponential bucketing
|
||||
schema, the interpolation is done under the assumption that the samples within
|
||||
the bucket are distributed in a way that they would uniformly populate the
|
||||
buckets in a hypothetical histogram with higher resolution. (This is also
|
||||
called _exponential interpolation_.)
|
||||
|
||||
If `b` has 0 observations, `NaN` is returned. For φ < 0, `-Inf` is
|
||||
returned. For φ > 1, `+Inf` is returned. For φ = `NaN`, `NaN` is returned.
|
||||
|
||||
The following is only relevant for classic histograms: If `b` contains
|
||||
fewer than two buckets, `NaN` is returned. The highest bucket must have an
|
||||
upper bound of `+Inf`. (Otherwise, `NaN` is returned.) If a quantile is located
|
||||
in the highest bucket, the upper bound of the second highest bucket is
|
||||
returned. A lower limit of the lowest bucket is assumed to be 0 if the upper
|
||||
bound of that bucket is greater than
|
||||
0. In that case, the usual linear interpolation is applied within that
|
||||
bucket. Otherwise, the upper bound of the lowest bucket is returned for
|
||||
quantiles located in the lowest bucket.
|
||||
Special cases for classic histograms:
|
||||
|
||||
You can use `histogram_quantile(0, v instant-vector)` to get the estimated minimum value stored in
|
||||
a histogram.
|
||||
* If `b` contains fewer than two buckets, `NaN` is returned.
|
||||
* The highest bucket must have an upper bound of `+Inf`. (Otherwise, `NaN` is
|
||||
returned.)
|
||||
* If a quantile is located in the highest bucket, the upper bound of the second
|
||||
highest bucket is returned.
|
||||
* The lower limit of the lowest bucket is assumed to be 0 if the upper bound of
|
||||
that bucket is greater than 0. In that case, the usual linear interpolation
|
||||
is applied within that bucket. Otherwise, the upper bound of the lowest
|
||||
bucket is returned for quantiles located in the lowest bucket.
|
||||
|
||||
You can use `histogram_quantile(1, v instant-vector)` to get the estimated maximum value stored in
|
||||
a histogram.
|
||||
Special cases for native histograms (relevant for the exact interpolation
|
||||
happening within the zero bucket):
|
||||
|
||||
Buckets of classic histograms are cumulative. Therefore, the following should always be the case:
|
||||
* A zero bucket with finite width is assumed to contain no negative
|
||||
observations if the histogram has observations in positive buckets, but none
|
||||
in negative buckets.
|
||||
* A zero bucket with finite width is assumed to contain no positive
|
||||
observations if the histogram has observations in negative buckets, but none
|
||||
in positive buckets.
|
||||
|
||||
* The counts in the buckets are monotonically increasing (strictly non-decreasing).
|
||||
* A lack of observations between the upper limits of two consecutive buckets results in equal counts
|
||||
in those two buckets.
|
||||
You can use `histogram_quantile(0, v instant-vector)` to get the estimated
|
||||
minimum value stored in a histogram.
|
||||
|
||||
However, floating point precision issues (e.g. small discrepancies introduced by computing of buckets
|
||||
with `sum(rate(...))`) or invalid data might violate these assumptions. In that case,
|
||||
`histogram_quantile` would be unable to return meaningful results. To mitigate the issue,
|
||||
`histogram_quantile` assumes that tiny relative differences between consecutive buckets are happening
|
||||
because of floating point precision errors and ignores them. (The threshold to ignore a difference
|
||||
between two buckets is a trillionth (1e-12) of the sum of both buckets.) Furthermore, if there are
|
||||
non-monotonic bucket counts even after this adjustment, they are increased to the value of the
|
||||
previous buckets to enforce monotonicity. The latter is evidence for an actual issue with the input
|
||||
data and is therefore flagged with an informational annotation reading `input to histogram_quantile
|
||||
needed to be fixed for monotonicity`. If you encounter this annotation, you should find and remove
|
||||
the source of the invalid data.
|
||||
You can use `histogram_quantile(1, v instant-vector)` to get the estimated
|
||||
maximum value stored in a histogram.
|
||||
|
||||
Buckets of classic histograms are cumulative. Therefore, the following should
|
||||
always be the case:
|
||||
|
||||
* The counts in the buckets are monotonically increasing (strictly
|
||||
non-decreasing).
|
||||
* A lack of observations between the upper limits of two consecutive buckets
|
||||
results in equal counts in those two buckets.
|
||||
|
||||
However, floating point precision issues (e.g. small discrepancies introduced
|
||||
by computing of buckets with `sum(rate(...))`) or invalid data might violate
|
||||
these assumptions. In that case, `histogram_quantile` would be unable to return
|
||||
meaningful results. To mitigate the issue, `histogram_quantile` assumes that
|
||||
tiny relative differences between consecutive buckets are happening because of
|
||||
floating point precision errors and ignores them. (The threshold to ignore a
|
||||
difference between two buckets is a trillionth (1e-12) of the sum of both
|
||||
buckets.) Furthermore, if there are non-monotonic bucket counts even after this
|
||||
adjustment, they are increased to the value of the previous buckets to enforce
|
||||
monotonicity. The latter is evidence for an actual issue with the input data
|
||||
and is therefore flagged with an informational annotation reading `input to
|
||||
histogram_quantile needed to be fixed for monotonicity`. If you encounter this
|
||||
annotation, you should find and remove the source of the invalid data.
|
||||
|
||||
## `histogram_stddev()` and `histogram_stdvar()`
|
||||
|
||||
|
|
232
promql/promqltest/testdata/native_histograms.test
vendored
232
promql/promqltest/testdata/native_histograms.test
vendored
|
@ -46,9 +46,12 @@ eval instant at 1m histogram_fraction(1, 2, single_histogram)
|
|||
eval instant at 1m histogram_fraction(0, 8, single_histogram)
|
||||
{} 1
|
||||
|
||||
# Median is 1.5 due to linear estimation of the midpoint of the middle bucket, whose values are within range 1 < x <= 2.
|
||||
# Median is 1.414213562373095 (2**2**-1, or sqrt(2)) due to
|
||||
# exponential interpolation, i.e. the "midpoint" within range 1 < x <=
|
||||
# 2 is assumed where the bucket boundary would be if we increased the
|
||||
# resolution of the histogram by one step.
|
||||
eval instant at 1m histogram_quantile(0.5, single_histogram)
|
||||
{} 1.5
|
||||
{} 1.414213562373095
|
||||
|
||||
clear
|
||||
|
||||
|
@ -68,8 +71,9 @@ eval instant at 5m histogram_avg(multi_histogram)
|
|||
eval instant at 5m histogram_fraction(1, 2, multi_histogram)
|
||||
{} 0.5
|
||||
|
||||
# See explanation for exponential interpolation above.
|
||||
eval instant at 5m histogram_quantile(0.5, multi_histogram)
|
||||
{} 1.5
|
||||
{} 1.414213562373095
|
||||
|
||||
|
||||
# Each entry should look the same as the first.
|
||||
|
@ -85,8 +89,9 @@ eval instant at 50m histogram_avg(multi_histogram)
|
|||
eval instant at 50m histogram_fraction(1, 2, multi_histogram)
|
||||
{} 0.5
|
||||
|
||||
# See explanation for exponential interpolation above.
|
||||
eval instant at 50m histogram_quantile(0.5, multi_histogram)
|
||||
{} 1.5
|
||||
{} 1.414213562373095
|
||||
|
||||
clear
|
||||
|
||||
|
@ -109,8 +114,9 @@ eval instant at 5m histogram_avg(incr_histogram)
|
|||
eval instant at 5m histogram_fraction(1, 2, incr_histogram)
|
||||
{} 0.6
|
||||
|
||||
# See explanation for exponential interpolation above.
|
||||
eval instant at 5m histogram_quantile(0.5, incr_histogram)
|
||||
{} 1.5
|
||||
{} 1.414213562373095
|
||||
|
||||
|
||||
eval instant at 50m incr_histogram
|
||||
|
@ -129,16 +135,18 @@ eval instant at 50m histogram_avg(incr_histogram)
|
|||
eval instant at 50m histogram_fraction(1, 2, incr_histogram)
|
||||
{} 0.8571428571428571
|
||||
|
||||
# See explanation for exponential interpolation above.
|
||||
eval instant at 50m histogram_quantile(0.5, incr_histogram)
|
||||
{} 1.5
|
||||
{} 1.414213562373095
|
||||
|
||||
# Per-second average rate of increase should be 1/(5*60) for count and buckets, then 2/(5*60) for sum.
|
||||
eval instant at 50m rate(incr_histogram[10m])
|
||||
{} {{count:0.0033333333333333335 sum:0.006666666666666667 offset:1 buckets:[0.0033333333333333335]}}
|
||||
|
||||
# Calculate the 50th percentile of observations over the last 10m.
|
||||
# See explanation for exponential interpolation above.
|
||||
eval instant at 50m histogram_quantile(0.5, rate(incr_histogram[10m]))
|
||||
{} 1.5
|
||||
{} 1.414213562373095
|
||||
|
||||
clear
|
||||
|
||||
|
@ -211,8 +219,9 @@ eval instant at 1m histogram_avg(negative_histogram)
|
|||
eval instant at 1m histogram_fraction(-2, -1, negative_histogram)
|
||||
{} 0.5
|
||||
|
||||
# Exponential interpolation works the same as for positive buckets, just mirrored.
|
||||
eval instant at 1m histogram_quantile(0.5, negative_histogram)
|
||||
{} -1.5
|
||||
{} -1.414213562373095
|
||||
|
||||
clear
|
||||
|
||||
|
@ -233,8 +242,9 @@ eval instant at 5m histogram_avg(two_samples_histogram)
|
|||
eval instant at 5m histogram_fraction(-2, -1, two_samples_histogram)
|
||||
{} 0.5
|
||||
|
||||
# See explanation for exponential interpolation above.
|
||||
eval instant at 5m histogram_quantile(0.5, two_samples_histogram)
|
||||
{} -1.5
|
||||
{} -1.414213562373095
|
||||
|
||||
clear
|
||||
|
||||
|
@ -392,20 +402,24 @@ eval_warn instant at 10m histogram_quantile(1.001, histogram_quantile_1)
|
|||
eval instant at 10m histogram_quantile(1, histogram_quantile_1)
|
||||
{} 16
|
||||
|
||||
# The following quantiles are within a bucket. Exponential
|
||||
# interpolation is applied (rather than linear, as it is done for
|
||||
# classic histograms), leading to slightly different quantile values.
|
||||
eval instant at 10m histogram_quantile(0.99, histogram_quantile_1)
|
||||
{} 15.759999999999998
|
||||
{} 15.67072476139083
|
||||
|
||||
eval instant at 10m histogram_quantile(0.9, histogram_quantile_1)
|
||||
{} 13.600000000000001
|
||||
{} 12.99603834169977
|
||||
|
||||
eval instant at 10m histogram_quantile(0.6, histogram_quantile_1)
|
||||
{} 4.799999999999997
|
||||
{} 4.594793419988138
|
||||
|
||||
eval instant at 10m histogram_quantile(0.5, histogram_quantile_1)
|
||||
{} 1.6666666666666665
|
||||
{} 1.5874010519681994
|
||||
|
||||
# Linear interpolation within the zero bucket after all.
|
||||
eval instant at 10m histogram_quantile(0.1, histogram_quantile_1)
|
||||
{} 0.0006000000000000001
|
||||
{} 0.0006
|
||||
|
||||
eval instant at 10m histogram_quantile(0, histogram_quantile_1)
|
||||
{} 0
|
||||
|
@ -425,17 +439,20 @@ eval_warn instant at 10m histogram_quantile(1.001, histogram_quantile_2)
|
|||
eval instant at 10m histogram_quantile(1, histogram_quantile_2)
|
||||
{} 0
|
||||
|
||||
# Again, the quantile values here are slightly different from what
|
||||
# they would be with linear interpolation. Note that quantiles
|
||||
# ending up in the zero bucket are linearly interpolated after all.
|
||||
eval instant at 10m histogram_quantile(0.99, histogram_quantile_2)
|
||||
{} -6.000000000000048e-05
|
||||
{} -0.00006
|
||||
|
||||
eval instant at 10m histogram_quantile(0.9, histogram_quantile_2)
|
||||
{} -0.0005999999999999996
|
||||
{} -0.0006
|
||||
|
||||
eval instant at 10m histogram_quantile(0.5, histogram_quantile_2)
|
||||
{} -1.6666666666666667
|
||||
{} -1.5874010519681996
|
||||
|
||||
eval instant at 10m histogram_quantile(0.1, histogram_quantile_2)
|
||||
{} -13.6
|
||||
{} -12.996038341699768
|
||||
|
||||
eval instant at 10m histogram_quantile(0, histogram_quantile_2)
|
||||
{} -16
|
||||
|
@ -445,7 +462,9 @@ eval_warn instant at 10m histogram_quantile(-1, histogram_quantile_2)
|
|||
|
||||
clear
|
||||
|
||||
# Apply quantile function to histogram with both positive and negative buckets with zero bucket.
|
||||
# Apply quantile function to histogram with both positive and negative
|
||||
# buckets with zero bucket.
|
||||
# First positive buckets with exponential interpolation.
|
||||
load 10m
|
||||
histogram_quantile_3 {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1
|
||||
|
||||
|
@ -456,31 +475,34 @@ eval instant at 10m histogram_quantile(1, histogram_quantile_3)
|
|||
{} 16
|
||||
|
||||
eval instant at 10m histogram_quantile(0.99, histogram_quantile_3)
|
||||
{} 15.519999999999996
|
||||
{} 15.34822590920423
|
||||
|
||||
eval instant at 10m histogram_quantile(0.9, histogram_quantile_3)
|
||||
{} 11.200000000000003
|
||||
{} 10.556063286183155
|
||||
|
||||
eval instant at 10m histogram_quantile(0.7, histogram_quantile_3)
|
||||
{} 1.2666666666666657
|
||||
{} 1.2030250360821164
|
||||
|
||||
# Linear interpolation in the zero bucket, symmetrically centered around
|
||||
# the zero point.
|
||||
eval instant at 10m histogram_quantile(0.55, histogram_quantile_3)
|
||||
{} 0.0006000000000000005
|
||||
{} 0.0006
|
||||
|
||||
eval instant at 10m histogram_quantile(0.5, histogram_quantile_3)
|
||||
{} 0
|
||||
|
||||
eval instant at 10m histogram_quantile(0.45, histogram_quantile_3)
|
||||
{} -0.0005999999999999996
|
||||
{} -0.0006
|
||||
|
||||
# Finally negative buckets with mirrored exponential interpolation.
|
||||
eval instant at 10m histogram_quantile(0.3, histogram_quantile_3)
|
||||
{} -1.266666666666667
|
||||
{} -1.2030250360821169
|
||||
|
||||
eval instant at 10m histogram_quantile(0.1, histogram_quantile_3)
|
||||
{} -11.2
|
||||
{} -10.556063286183155
|
||||
|
||||
eval instant at 10m histogram_quantile(0.01, histogram_quantile_3)
|
||||
{} -15.52
|
||||
{} -15.34822590920423
|
||||
|
||||
eval instant at 10m histogram_quantile(0, histogram_quantile_3)
|
||||
{} -16
|
||||
|
@ -490,6 +512,90 @@ eval_warn instant at 10m histogram_quantile(-1, histogram_quantile_3)
|
|||
|
||||
clear
|
||||
|
||||
# Try different schemas. (The interpolation logic must not depend on the schema.)
|
||||
clear
|
||||
load 1m
|
||||
var_res_histogram{schema="-1"} {{schema:-1 sum:6 count:5 buckets:[0 5]}}
|
||||
var_res_histogram{schema="0"} {{schema:0 sum:4 count:5 buckets:[0 5]}}
|
||||
var_res_histogram{schema="+1"} {{schema:1 sum:4 count:5 buckets:[0 5]}}
|
||||
|
||||
eval instant at 1m histogram_quantile(0.5, var_res_histogram)
|
||||
{schema="-1"} 2.0
|
||||
{schema="0"} 1.4142135623730951
|
||||
{schema="+1"} 1.189207
|
||||
|
||||
eval instant at 1m histogram_fraction(0, 2, var_res_histogram{schema="-1"})
|
||||
{schema="-1"} 0.5
|
||||
|
||||
eval instant at 1m histogram_fraction(0, 1.4142135623730951, var_res_histogram{schema="0"})
|
||||
{schema="0"} 0.5
|
||||
|
||||
eval instant at 1m histogram_fraction(0, 1.189207, var_res_histogram{schema="+1"})
|
||||
{schema="+1"} 0.5
|
||||
|
||||
# The same as above, but one bucket "further to the right".
|
||||
clear
|
||||
load 1m
|
||||
var_res_histogram{schema="-1"} {{schema:-1 sum:6 count:5 buckets:[0 0 5]}}
|
||||
var_res_histogram{schema="0"} {{schema:0 sum:4 count:5 buckets:[0 0 5]}}
|
||||
var_res_histogram{schema="+1"} {{schema:1 sum:4 count:5 buckets:[0 0 5]}}
|
||||
|
||||
eval instant at 1m histogram_quantile(0.5, var_res_histogram)
|
||||
{schema="-1"} 8.0
|
||||
{schema="0"} 2.82842712474619
|
||||
{schema="+1"} 1.6817928305074292
|
||||
|
||||
eval instant at 1m histogram_fraction(0, 8, var_res_histogram{schema="-1"})
|
||||
{schema="-1"} 0.5
|
||||
|
||||
eval instant at 1m histogram_fraction(0, 2.82842712474619, var_res_histogram{schema="0"})
|
||||
{schema="0"} 0.5
|
||||
|
||||
eval instant at 1m histogram_fraction(0, 1.6817928305074292, var_res_histogram{schema="+1"})
|
||||
{schema="+1"} 0.5
|
||||
|
||||
# And everything again but for negative buckets.
|
||||
clear
|
||||
load 1m
|
||||
var_res_histogram{schema="-1"} {{schema:-1 sum:6 count:5 n_buckets:[0 5]}}
|
||||
var_res_histogram{schema="0"} {{schema:0 sum:4 count:5 n_buckets:[0 5]}}
|
||||
var_res_histogram{schema="+1"} {{schema:1 sum:4 count:5 n_buckets:[0 5]}}
|
||||
|
||||
eval instant at 1m histogram_quantile(0.5, var_res_histogram)
|
||||
{schema="-1"} -2.0
|
||||
{schema="0"} -1.4142135623730951
|
||||
{schema="+1"} -1.189207
|
||||
|
||||
eval instant at 1m histogram_fraction(-2, 0, var_res_histogram{schema="-1"})
|
||||
{schema="-1"} 0.5
|
||||
|
||||
eval instant at 1m histogram_fraction(-1.4142135623730951, 0, var_res_histogram{schema="0"})
|
||||
{schema="0"} 0.5
|
||||
|
||||
eval instant at 1m histogram_fraction(-1.189207, 0, var_res_histogram{schema="+1"})
|
||||
{schema="+1"} 0.5
|
||||
|
||||
clear
|
||||
load 1m
|
||||
var_res_histogram{schema="-1"} {{schema:-1 sum:6 count:5 n_buckets:[0 0 5]}}
|
||||
var_res_histogram{schema="0"} {{schema:0 sum:4 count:5 n_buckets:[0 0 5]}}
|
||||
var_res_histogram{schema="+1"} {{schema:1 sum:4 count:5 n_buckets:[0 0 5]}}
|
||||
|
||||
eval instant at 1m histogram_quantile(0.5, var_res_histogram)
|
||||
{schema="-1"} -8.0
|
||||
{schema="0"} -2.82842712474619
|
||||
{schema="+1"} -1.6817928305074292
|
||||
|
||||
eval instant at 1m histogram_fraction(-8, 0, var_res_histogram{schema="-1"})
|
||||
{schema="-1"} 0.5
|
||||
|
||||
eval instant at 1m histogram_fraction(-2.82842712474619, 0, var_res_histogram{schema="0"})
|
||||
{schema="0"} 0.5
|
||||
|
||||
eval instant at 1m histogram_fraction(-1.6817928305074292, 0, var_res_histogram{schema="+1"})
|
||||
{schema="+1"} 0.5
|
||||
|
||||
|
||||
# Apply fraction function to empty histogram.
|
||||
load 10m
|
||||
histogram_fraction_1 {{}}x1
|
||||
|
@ -515,11 +621,18 @@ eval instant at 10m histogram_fraction(-0.001, 0, histogram_fraction_2)
|
|||
eval instant at 10m histogram_fraction(0, 0.001, histogram_fraction_2)
|
||||
{} 0.16666666666666666
|
||||
|
||||
# Note that this result and the one above add up to 1.
|
||||
eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_2)
|
||||
{} 0.8333333333333334
|
||||
|
||||
# We are in the zero bucket, resulting in linear interpolation
|
||||
eval instant at 10m histogram_fraction(0, 0.0005, histogram_fraction_2)
|
||||
{} 0.08333333333333333
|
||||
|
||||
eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_2)
|
||||
{} 0.8333333333333334
|
||||
# Demonstrate that the inverse operation with histogram_quantile yields
|
||||
# the original value with the non-trivial result above.
|
||||
eval instant at 10m histogram_quantile(0.08333333333333333, histogram_fraction_2)
|
||||
{} 0.0005
|
||||
|
||||
eval instant at 10m histogram_fraction(-inf, -0.001, histogram_fraction_2)
|
||||
{} 0
|
||||
|
@ -527,17 +640,30 @@ eval instant at 10m histogram_fraction(-inf, -0.001, histogram_fraction_2)
|
|||
eval instant at 10m histogram_fraction(1, 2, histogram_fraction_2)
|
||||
{} 0.25
|
||||
|
||||
# More non-trivial results with interpolation involved below, including
|
||||
# some round-trips via histogram_quantile to prove that the inverse
|
||||
# operation leads to the same results.
|
||||
|
||||
eval instant at 10m histogram_fraction(0, 1.5, histogram_fraction_2)
|
||||
{} 0.4795739585136224
|
||||
|
||||
eval instant at 10m histogram_fraction(1.5, 2, histogram_fraction_2)
|
||||
{} 0.125
|
||||
{} 0.10375937481971091
|
||||
|
||||
eval instant at 10m histogram_fraction(1, 8, histogram_fraction_2)
|
||||
{} 0.3333333333333333
|
||||
|
||||
eval instant at 10m histogram_fraction(0, 6, histogram_fraction_2)
|
||||
{} 0.6320802083934297
|
||||
|
||||
eval instant at 10m histogram_quantile(0.6320802083934297, histogram_fraction_2)
|
||||
{} 6
|
||||
|
||||
eval instant at 10m histogram_fraction(1, 6, histogram_fraction_2)
|
||||
{} 0.2916666666666667
|
||||
{} 0.29874687506009634
|
||||
|
||||
eval instant at 10m histogram_fraction(1.5, 6, histogram_fraction_2)
|
||||
{} 0.16666666666666666
|
||||
{} 0.15250624987980724
|
||||
|
||||
eval instant at 10m histogram_fraction(-2, -1, histogram_fraction_2)
|
||||
{} 0
|
||||
|
@ -600,6 +726,12 @@ eval instant at 10m histogram_fraction(0, 0.001, histogram_fraction_3)
|
|||
eval instant at 10m histogram_fraction(-0.0005, 0, histogram_fraction_3)
|
||||
{} 0.08333333333333333
|
||||
|
||||
eval instant at 10m histogram_fraction(-inf, -0.0005, histogram_fraction_3)
|
||||
{} 0.9166666666666666
|
||||
|
||||
eval instant at 10m histogram_quantile(0.9166666666666666, histogram_fraction_3)
|
||||
{} -0.0005
|
||||
|
||||
eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_3)
|
||||
{} 0
|
||||
|
||||
|
@ -625,16 +757,22 @@ eval instant at 10m histogram_fraction(-2, -1, histogram_fraction_3)
|
|||
{} 0.25
|
||||
|
||||
eval instant at 10m histogram_fraction(-2, -1.5, histogram_fraction_3)
|
||||
{} 0.125
|
||||
{} 0.10375937481971091
|
||||
|
||||
eval instant at 10m histogram_fraction(-8, -1, histogram_fraction_3)
|
||||
{} 0.3333333333333333
|
||||
|
||||
eval instant at 10m histogram_fraction(-inf, -6, histogram_fraction_3)
|
||||
{} 0.36791979160657035
|
||||
|
||||
eval instant at 10m histogram_quantile(0.36791979160657035, histogram_fraction_3)
|
||||
{} -6
|
||||
|
||||
eval instant at 10m histogram_fraction(-6, -1, histogram_fraction_3)
|
||||
{} 0.2916666666666667
|
||||
{} 0.29874687506009634
|
||||
|
||||
eval instant at 10m histogram_fraction(-6, -1.5, histogram_fraction_3)
|
||||
{} 0.16666666666666666
|
||||
{} 0.15250624987980724
|
||||
|
||||
eval instant at 10m histogram_fraction(42, 3.1415, histogram_fraction_3)
|
||||
{} 0
|
||||
|
@ -684,6 +822,18 @@ eval instant at 10m histogram_fraction(0, 0.001, histogram_fraction_4)
|
|||
eval instant at 10m histogram_fraction(-0.0005, 0.0005, histogram_fraction_4)
|
||||
{} 0.08333333333333333
|
||||
|
||||
eval instant at 10m histogram_fraction(-inf, 0.0005, histogram_fraction_4)
|
||||
{} 0.5416666666666666
|
||||
|
||||
eval instant at 10m histogram_quantile(0.5416666666666666, histogram_fraction_4)
|
||||
{} 0.0005
|
||||
|
||||
eval instant at 10m histogram_fraction(-inf, -0.0005, histogram_fraction_4)
|
||||
{} 0.4583333333333333
|
||||
|
||||
eval instant at 10m histogram_quantile(0.4583333333333333, histogram_fraction_4)
|
||||
{} -0.0005
|
||||
|
||||
eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_4)
|
||||
{} 0.4166666666666667
|
||||
|
||||
|
@ -694,31 +844,31 @@ eval instant at 10m histogram_fraction(1, 2, histogram_fraction_4)
|
|||
{} 0.125
|
||||
|
||||
eval instant at 10m histogram_fraction(1.5, 2, histogram_fraction_4)
|
||||
{} 0.0625
|
||||
{} 0.051879687409855414
|
||||
|
||||
eval instant at 10m histogram_fraction(1, 8, histogram_fraction_4)
|
||||
{} 0.16666666666666666
|
||||
|
||||
eval instant at 10m histogram_fraction(1, 6, histogram_fraction_4)
|
||||
{} 0.14583333333333334
|
||||
{} 0.14937343753004825
|
||||
|
||||
eval instant at 10m histogram_fraction(1.5, 6, histogram_fraction_4)
|
||||
{} 0.08333333333333333
|
||||
{} 0.07625312493990366
|
||||
|
||||
eval instant at 10m histogram_fraction(-2, -1, histogram_fraction_4)
|
||||
{} 0.125
|
||||
|
||||
eval instant at 10m histogram_fraction(-2, -1.5, histogram_fraction_4)
|
||||
{} 0.0625
|
||||
{} 0.051879687409855456
|
||||
|
||||
eval instant at 10m histogram_fraction(-8, -1, histogram_fraction_4)
|
||||
{} 0.16666666666666666
|
||||
|
||||
eval instant at 10m histogram_fraction(-6, -1, histogram_fraction_4)
|
||||
{} 0.14583333333333334
|
||||
{} 0.14937343753004817
|
||||
|
||||
eval instant at 10m histogram_fraction(-6, -1.5, histogram_fraction_4)
|
||||
{} 0.08333333333333333
|
||||
{} 0.07625312493990362
|
||||
|
||||
eval instant at 10m histogram_fraction(42, 3.1415, histogram_fraction_4)
|
||||
{} 0
|
||||
|
|
|
@ -153,19 +153,31 @@ func bucketQuantile(q float64, buckets buckets) (float64, bool, bool) {
|
|||
|
||||
// histogramQuantile calculates the quantile 'q' based on the given histogram.
|
||||
//
|
||||
// The quantile value is interpolated assuming a linear distribution within a
|
||||
// bucket.
|
||||
// TODO(beorn7): Find an interpolation method that is a better fit for
|
||||
// exponential buckets (and think about configurable interpolation).
|
||||
// For custom buckets, the result is interpolated linearly, i.e. it is assumed
|
||||
// the observations are uniformly distributed within each bucket. (This is a
|
||||
// quite blunt assumption, but it is consistent with the interpolation method
|
||||
// used for classic histograms so far.)
|
||||
//
|
||||
// For exponential buckets, the interpolation is done under the assumption that
|
||||
// the samples within each bucket are distributed in a way that they would
|
||||
// uniformly populate the buckets in a hypothetical histogram with higher
|
||||
// resolution. For example, if the rank calculation suggests that the requested
|
||||
// quantile is right in the middle of the population of the (1,2] bucket, we
|
||||
// assume the quantile would be right at the bucket boundary between the two
|
||||
// buckets the (1,2] bucket would be divided into if the histogram had double
|
||||
// the resolution, which is 2**2**-1 = 1.4142... We call this exponential
|
||||
// interpolation.
|
||||
//
|
||||
// However, for a quantile that ends up in the zero bucket, this method isn't
|
||||
// very helpful (because there is an infinite number of buckets close to zero,
|
||||
// so we would have to assume zero as the result). Therefore, we return to
|
||||
// linear interpolation in the zero bucket.
|
||||
//
|
||||
// A natural lower bound of 0 is assumed if the histogram has only positive
|
||||
// buckets. Likewise, a natural upper bound of 0 is assumed if the histogram has
|
||||
// only negative buckets.
|
||||
// TODO(beorn7): Come to terms if we want that.
|
||||
//
|
||||
// There are a number of special cases (once we have a way to report errors
|
||||
// happening during evaluations of AST functions, we should report those
|
||||
// explicitly):
|
||||
// There are a number of special cases:
|
||||
//
|
||||
// If the histogram has 0 observations, NaN is returned.
|
||||
//
|
||||
|
@ -193,9 +205,9 @@ func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 {
|
|||
rank float64
|
||||
)
|
||||
|
||||
// if there are NaN observations in the histogram (h.Sum is NaN), use the forward iterator
|
||||
// if the q < 0.5, use the forward iterator
|
||||
// if the q >= 0.5, use the reverse iterator
|
||||
// If there are NaN observations in the histogram (h.Sum is NaN), use the forward iterator.
|
||||
// If q < 0.5, use the forward iterator.
|
||||
// If q >= 0.5, use the reverse iterator.
|
||||
if math.IsNaN(h.Sum) || q < 0.5 {
|
||||
it = h.AllBucketIterator()
|
||||
rank = q * h.Count
|
||||
|
@ -260,8 +272,29 @@ func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 {
|
|||
rank = count - rank
|
||||
}
|
||||
|
||||
// TODO(codesome): Use a better estimation than linear.
|
||||
return bucket.Lower + (bucket.Upper-bucket.Lower)*(rank/bucket.Count)
|
||||
// The fraction of how far we are into the current bucket.
|
||||
fraction := rank / bucket.Count
|
||||
|
||||
// Return linear interpolation for custom buckets and for quantiles that
|
||||
// end up in the zero bucket.
|
||||
if h.UsesCustomBuckets() || (bucket.Lower <= 0 && bucket.Upper >= 0) {
|
||||
return bucket.Lower + (bucket.Upper-bucket.Lower)*fraction
|
||||
}
|
||||
|
||||
// For exponential buckets, we interpolate on a logarithmic scale. On a
|
||||
// logarithmic scale, the exponential bucket boundaries (for any schema)
|
||||
// become linear (every bucket has the same width). Therefore, after
|
||||
// taking the logarithm of both bucket boundaries, we can use the
|
||||
// calculated fraction in the same way as for linear interpolation (see
|
||||
// above). Finally, we return to the normal scale by applying the
|
||||
// exponential function to the result.
|
||||
logLower := math.Log2(math.Abs(bucket.Lower))
|
||||
logUpper := math.Log2(math.Abs(bucket.Upper))
|
||||
if bucket.Lower > 0 { // Positive bucket.
|
||||
return math.Exp2(logLower + (logUpper-logLower)*fraction)
|
||||
}
|
||||
// Otherwise, we are in a negative bucket and have to mirror things.
|
||||
return -math.Exp2(logUpper + (logLower-logUpper)*(1-fraction))
|
||||
}
|
||||
|
||||
// histogramFraction calculates the fraction of observations between the
|
||||
|
@ -271,8 +304,8 @@ func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 {
|
|||
// histogramQuantile(0.9, h) returns 123.4, then histogramFraction(-Inf, 123.4, h)
|
||||
// returns 0.9.
|
||||
//
|
||||
// The same notes (and TODOs) with regard to interpolation and assumptions about
|
||||
// the zero bucket boundaries apply as for histogramQuantile.
|
||||
// The same notes with regard to interpolation and assumptions about the zero
|
||||
// bucket boundaries apply as for histogramQuantile.
|
||||
//
|
||||
// Whether either boundary is inclusive or exclusive doesn’t actually matter as
|
||||
// long as interpolation has to be performed anyway. In the case of a boundary
|
||||
|
@ -310,7 +343,35 @@ func histogramFraction(lower, upper float64, h *histogram.FloatHistogram) float6
|
|||
)
|
||||
for it.Next() {
|
||||
b := it.At()
|
||||
if b.Lower < 0 && b.Upper > 0 {
|
||||
zeroBucket := false
|
||||
|
||||
// interpolateLinearly is used for custom buckets to be
|
||||
// consistent with the linear interpolation known from classic
|
||||
// histograms. It is also used for the zero bucket.
|
||||
interpolateLinearly := func(v float64) float64 {
|
||||
return rank + b.Count*(v-b.Lower)/(b.Upper-b.Lower)
|
||||
}
|
||||
|
||||
// interpolateExponentially is using the same exponential
|
||||
// interpolation method as above for histogramQuantile. This
|
||||
// method is a better fit for exponential bucketing.
|
||||
interpolateExponentially := func(v float64) float64 {
|
||||
var (
|
||||
logLower = math.Log2(math.Abs(b.Lower))
|
||||
logUpper = math.Log2(math.Abs(b.Upper))
|
||||
logV = math.Log2(math.Abs(v))
|
||||
fraction float64
|
||||
)
|
||||
if v > 0 {
|
||||
fraction = (logV - logLower) / (logUpper - logLower)
|
||||
} else {
|
||||
fraction = 1 - ((logV - logUpper) / (logLower - logUpper))
|
||||
}
|
||||
return rank + b.Count*fraction
|
||||
}
|
||||
|
||||
if b.Lower <= 0 && b.Upper >= 0 {
|
||||
zeroBucket = true
|
||||
switch {
|
||||
case len(h.NegativeBuckets) == 0 && len(h.PositiveBuckets) > 0:
|
||||
// This is the zero bucket and the histogram has only
|
||||
|
@ -325,10 +386,12 @@ func histogramFraction(lower, upper float64, h *histogram.FloatHistogram) float6
|
|||
}
|
||||
}
|
||||
if !lowerSet && b.Lower >= lower {
|
||||
// We have hit the lower value at the lower bucket boundary.
|
||||
lowerRank = rank
|
||||
lowerSet = true
|
||||
}
|
||||
if !upperSet && b.Lower >= upper {
|
||||
// We have hit the upper value at the lower bucket boundary.
|
||||
upperRank = rank
|
||||
upperSet = true
|
||||
}
|
||||
|
@ -336,11 +399,21 @@ func histogramFraction(lower, upper float64, h *histogram.FloatHistogram) float6
|
|||
break
|
||||
}
|
||||
if !lowerSet && b.Lower < lower && b.Upper > lower {
|
||||
lowerRank = rank + b.Count*(lower-b.Lower)/(b.Upper-b.Lower)
|
||||
// The lower value is in this bucket.
|
||||
if h.UsesCustomBuckets() || zeroBucket {
|
||||
lowerRank = interpolateLinearly(lower)
|
||||
} else {
|
||||
lowerRank = interpolateExponentially(lower)
|
||||
}
|
||||
lowerSet = true
|
||||
}
|
||||
if !upperSet && b.Lower < upper && b.Upper > upper {
|
||||
upperRank = rank + b.Count*(upper-b.Lower)/(b.Upper-b.Lower)
|
||||
// The upper value is in this bucket.
|
||||
if h.UsesCustomBuckets() || zeroBucket {
|
||||
upperRank = interpolateLinearly(upper)
|
||||
} else {
|
||||
upperRank = interpolateExponentially(upper)
|
||||
}
|
||||
upperSet = true
|
||||
}
|
||||
if lowerSet && upperSet {
|
||||
|
|
Loading…
Reference in a new issue