Merge pull request #14677 from prometheus/beorn7/histogram

promql(native histograms): Introduce exponential interpolation
2025-03-05 20:59:13 -08:00 · 2024-09-19 18:08:59 +02:00 · 2024-09-19 18:08:59 +02:00 · df9916ef66
parent c7fb6188b4 6fcd225aee
commit df9916ef66
3 changed files with 337 additions and 89 deletions
--- a/docs/querying/functions.md
+++ b/docs/querying/functions.md
@ -326,45 +326,70 @@ With native histograms, aggregating everything works as usual without any `by` c

    histogram_quantile(0.9, sum(rate(http_request_duration_seconds[10m])))

-The `histogram_quantile()` function interpolates quantile values by
-assuming a linear distribution within a bucket. 
+In the (common) case that a quantile value does not coincide with a bucket
+boundary, the `histogram_quantile()` function interpolates the quantile value
+within the bucket the quantile value falls into. For classic histograms, for
+native histograms with custom bucket boundaries, and for the zero bucket of
+other native histograms, it assumes a uniform distribution of observations
+within the bucket (also called _linear interpolation_). For the
+non-zero-buckets of native histograms with a standard exponential bucketing
+schema, the interpolation is done under the assumption that the samples within
+the bucket are distributed in a way that they would uniformly populate the
+buckets in a hypothetical histogram with higher resolution. (This is also
+called _exponential interpolation_.)

 If `b` has 0 observations, `NaN` is returned. For φ < 0, `-Inf` is
 returned. For φ > 1, `+Inf` is returned. For φ = `NaN`, `NaN` is returned.

-The following is only relevant for classic histograms: If `b` contains
-fewer than two buckets, `NaN` is returned. The highest bucket must have an
-upper bound of `+Inf`. (Otherwise, `NaN` is returned.) If a quantile is located
-in the highest bucket, the upper bound of the second highest bucket is
-returned. A lower limit of the lowest bucket is assumed to be 0 if the upper
-bound of that bucket is greater than
-0. In that case, the usual linear interpolation is applied within that
-bucket. Otherwise, the upper bound of the lowest bucket is returned for
-quantiles located in the lowest bucket. 
+Special cases for classic histograms:

-You can use `histogram_quantile(0, v instant-vector)` to get the estimated minimum value stored in
-a histogram.
+* If `b` contains fewer than two buckets, `NaN` is returned.
+* The highest bucket must have an upper bound of `+Inf`. (Otherwise, `NaN` is
+  returned.)
+* If a quantile is located in the highest bucket, the upper bound of the second
+  highest bucket is returned.
+* The lower limit of the lowest bucket is assumed to be 0 if the upper bound of
+  that bucket is greater than 0. In that case, the usual linear interpolation
+  is applied within that bucket. Otherwise, the upper bound of the lowest
+  bucket is returned for quantiles located in the lowest bucket.

-You can use `histogram_quantile(1, v instant-vector)` to get the estimated maximum value stored in
-a histogram.
+Special cases for native histograms (relevant for the exact interpolation
+happening within the zero bucket):

-Buckets of classic histograms are cumulative. Therefore, the following should always be the case:
+* A zero bucket with finite width is assumed to contain no negative
+  observations if the histogram has observations in positive buckets, but none
+  in negative buckets.
+* A zero bucket with finite width is assumed to contain no positive
+  observations if the histogram has observations in negative buckets, but none
+  in positive buckets.

-* The counts in the buckets are monotonically increasing (strictly non-decreasing).
-* A lack of observations between the upper limits of two consecutive buckets results in equal counts
-in those two buckets.
+You can use `histogram_quantile(0, v instant-vector)` to get the estimated
+minimum value stored in a histogram.

-However, floating point precision issues (e.g. small discrepancies introduced by computing of buckets
-with `sum(rate(...))`) or invalid data might violate these assumptions. In that case,
-`histogram_quantile` would be unable to return meaningful results. To mitigate the issue,
-`histogram_quantile` assumes that tiny relative differences between consecutive buckets are happening
-because of floating point precision errors and ignores them. (The threshold to ignore a difference
-between two buckets is a trillionth (1e-12) of the sum of both buckets.) Furthermore, if there are
-non-monotonic bucket counts even after this adjustment, they are increased to the value of the
-previous buckets to enforce monotonicity. The latter is evidence for an actual issue with the input
-data and is therefore flagged with an informational annotation reading `input to histogram_quantile
-needed to be fixed for monotonicity`. If you encounter this annotation, you should find and remove
-the source of the invalid data.
+You can use `histogram_quantile(1, v instant-vector)` to get the estimated
+maximum value stored in a histogram.
+
+Buckets of classic histograms are cumulative. Therefore, the following should
+always be the case:
+
+* The counts in the buckets are monotonically increasing (strictly
+  non-decreasing).
+* A lack of observations between the upper limits of two consecutive buckets
+  results in equal counts in those two buckets.
+
+However, floating point precision issues (e.g. small discrepancies introduced
+by computing of buckets with `sum(rate(...))`) or invalid data might violate
+these assumptions. In that case, `histogram_quantile` would be unable to return
+meaningful results. To mitigate the issue, `histogram_quantile` assumes that
+tiny relative differences between consecutive buckets are happening because of
+floating point precision errors and ignores them. (The threshold to ignore a
+difference between two buckets is a trillionth (1e-12) of the sum of both
+buckets.) Furthermore, if there are non-monotonic bucket counts even after this
+adjustment, they are increased to the value of the previous buckets to enforce
+monotonicity. The latter is evidence for an actual issue with the input data
+and is therefore flagged with an informational annotation reading `input to
+histogram_quantile needed to be fixed for monotonicity`. If you encounter this
+annotation, you should find and remove the source of the invalid data.

 ## `histogram_stddev()` and `histogram_stdvar()`

--- a/promql/promqltest/testdata/native_histograms.test
+++ b/promql/promqltest/testdata/native_histograms.test
@ -46,9 +46,12 @@ eval instant at 1m histogram_fraction(1, 2, single_histogram)
 eval instant at 1m histogram_fraction(0, 8, single_histogram)
 	{} 1

-# Median is 1.5 due to linear estimation of the midpoint of the middle bucket, whose values are within range 1 < x <= 2.
+# Median is 1.414213562373095 (2**2**-1, or sqrt(2)) due to
+# exponential interpolation, i.e. the "midpoint" within range 1 < x <=
+# 2 is assumed where the bucket boundary would be if we increased the
+# resolution of the histogram by one step.
 eval instant at 1m histogram_quantile(0.5, single_histogram)
-	{} 1.5
+	{} 1.414213562373095

 clear

@ -68,8 +71,9 @@ eval instant at 5m histogram_avg(multi_histogram)
 eval instant at 5m histogram_fraction(1, 2, multi_histogram)
 	{} 0.5

+# See explanation for exponential interpolation above.
 eval instant at 5m histogram_quantile(0.5, multi_histogram)
-	{} 1.5
+	{} 1.414213562373095


 # Each entry should look the same as the first.
@ -85,8 +89,9 @@ eval instant at 50m histogram_avg(multi_histogram)
 eval instant at 50m histogram_fraction(1, 2, multi_histogram)
 	{} 0.5

+# See explanation for exponential interpolation above.
 eval instant at 50m histogram_quantile(0.5, multi_histogram)
-	{} 1.5
+	{} 1.414213562373095

 clear

@ -109,8 +114,9 @@ eval instant at 5m histogram_avg(incr_histogram)
 eval instant at 5m histogram_fraction(1, 2, incr_histogram)
 	{} 0.6

+# See explanation for exponential interpolation above.
 eval instant at 5m histogram_quantile(0.5, incr_histogram)
-	{} 1.5
+	{} 1.414213562373095


 eval instant at 50m incr_histogram
@ -129,16 +135,18 @@ eval instant at 50m histogram_avg(incr_histogram)
 eval instant at 50m histogram_fraction(1, 2, incr_histogram)
 	{} 0.8571428571428571

+# See explanation for exponential interpolation above.
 eval instant at 50m histogram_quantile(0.5, incr_histogram)
-	{} 1.5
+	{} 1.414213562373095

 # Per-second average rate of increase should be 1/(5*60) for count and buckets, then 2/(5*60) for sum.
 eval instant at 50m rate(incr_histogram[10m])
    {} {{count:0.0033333333333333335 sum:0.006666666666666667 offset:1 buckets:[0.0033333333333333335]}}

 # Calculate the 50th percentile of observations over the last 10m.
+# See explanation for exponential interpolation above.
 eval instant at 50m histogram_quantile(0.5, rate(incr_histogram[10m]))
-	{} 1.5
+	{} 1.414213562373095

 clear

@ -211,8 +219,9 @@ eval instant at 1m histogram_avg(negative_histogram)
 eval instant at 1m histogram_fraction(-2, -1, negative_histogram)
 	{} 0.5

+# Exponential interpolation works the same as for positive buckets, just mirrored.
 eval instant at 1m histogram_quantile(0.5, negative_histogram)
-	{} -1.5
+	{} -1.414213562373095

 clear

@ -233,8 +242,9 @@ eval instant at 5m histogram_avg(two_samples_histogram)
 eval instant at 5m histogram_fraction(-2, -1, two_samples_histogram)
 	{} 0.5

+# See explanation for exponential interpolation above.
 eval instant at 5m histogram_quantile(0.5, two_samples_histogram)
-	{} -1.5
+	{} -1.414213562373095

 clear

@ -392,20 +402,24 @@ eval_warn instant at 10m histogram_quantile(1.001, histogram_quantile_1)
 eval instant at 10m histogram_quantile(1, histogram_quantile_1)
    {} 16

+# The following quantiles are within a bucket. Exponential
+# interpolation is applied (rather than linear, as it is done for
+# classic histograms), leading to slightly different quantile values.
 eval instant at 10m histogram_quantile(0.99, histogram_quantile_1)
-    {} 15.759999999999998
+    {} 15.67072476139083

 eval instant at 10m histogram_quantile(0.9, histogram_quantile_1)
-    {} 13.600000000000001
+    {} 12.99603834169977

 eval instant at 10m histogram_quantile(0.6, histogram_quantile_1)
-    {} 4.799999999999997
+    {} 4.594793419988138

 eval instant at 10m histogram_quantile(0.5, histogram_quantile_1)
-    {} 1.6666666666666665
+    {} 1.5874010519681994

+# Linear interpolation within the zero bucket after all.
 eval instant at 10m histogram_quantile(0.1, histogram_quantile_1)
-    {} 0.0006000000000000001
+    {} 0.0006

 eval instant at 10m histogram_quantile(0, histogram_quantile_1)
    {} 0
@ -425,17 +439,20 @@ eval_warn instant at 10m histogram_quantile(1.001, histogram_quantile_2)
 eval instant at 10m histogram_quantile(1, histogram_quantile_2)
    {} 0

+# Again, the quantile values here are slightly different from what
+# they would be with linear interpolation. Note that quantiles
+# ending up in the zero bucket are linearly interpolated after all.
 eval instant at 10m histogram_quantile(0.99, histogram_quantile_2)
-    {} -6.000000000000048e-05
+    {} -0.00006

 eval instant at 10m histogram_quantile(0.9, histogram_quantile_2)
-    {} -0.0005999999999999996
+    {} -0.0006

 eval instant at 10m histogram_quantile(0.5, histogram_quantile_2)
-    {} -1.6666666666666667
+    {} -1.5874010519681996

 eval instant at 10m histogram_quantile(0.1, histogram_quantile_2)
-    {} -13.6
+    {} -12.996038341699768

 eval instant at 10m histogram_quantile(0, histogram_quantile_2)
    {} -16
@ -445,7 +462,9 @@ eval_warn instant at 10m histogram_quantile(-1, histogram_quantile_2)

 clear

-# Apply quantile function to histogram with both positive and negative buckets with zero bucket.
+# Apply quantile function to histogram with both positive and negative
+# buckets with zero bucket.
+# First positive buckets with exponential interpolation.
 load 10m
    histogram_quantile_3 {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1

@ -456,31 +475,34 @@ eval instant at 10m histogram_quantile(1, histogram_quantile_3)
    {} 16

 eval instant at 10m histogram_quantile(0.99, histogram_quantile_3)
-    {} 15.519999999999996
+    {} 15.34822590920423

 eval instant at 10m histogram_quantile(0.9, histogram_quantile_3)
-    {} 11.200000000000003
+    {} 10.556063286183155

 eval instant at 10m histogram_quantile(0.7, histogram_quantile_3)
-    {} 1.2666666666666657
+    {} 1.2030250360821164

+# Linear interpolation in the zero bucket, symmetrically centered around
+# the zero point.
 eval instant at 10m histogram_quantile(0.55, histogram_quantile_3)
-    {} 0.0006000000000000005
+    {} 0.0006

 eval instant at 10m histogram_quantile(0.5, histogram_quantile_3)
    {} 0

 eval instant at 10m histogram_quantile(0.45, histogram_quantile_3)
-    {} -0.0005999999999999996
+    {} -0.0006

+# Finally negative buckets with mirrored exponential interpolation.
 eval instant at 10m histogram_quantile(0.3, histogram_quantile_3)
-    {} -1.266666666666667
+    {} -1.2030250360821169

 eval instant at 10m histogram_quantile(0.1, histogram_quantile_3)
-    {} -11.2
+    {} -10.556063286183155

 eval instant at 10m histogram_quantile(0.01, histogram_quantile_3)
-    {} -15.52
+    {} -15.34822590920423

 eval instant at 10m histogram_quantile(0, histogram_quantile_3)
    {} -16
@ -490,6 +512,90 @@ eval_warn instant at 10m histogram_quantile(-1, histogram_quantile_3)

 clear

+# Try different schemas. (The interpolation logic must not depend on the schema.)
+clear
+load 1m
+    var_res_histogram{schema="-1"} {{schema:-1 sum:6 count:5 buckets:[0 5]}}
+    var_res_histogram{schema="0"}  {{schema:0 sum:4 count:5 buckets:[0 5]}}
+    var_res_histogram{schema="+1"} {{schema:1 sum:4 count:5 buckets:[0 5]}}
+
+eval instant at 1m histogram_quantile(0.5, var_res_histogram)
+    {schema="-1"}  2.0
+    {schema="0"}   1.4142135623730951
+    {schema="+1"}  1.189207
+
+eval instant at 1m histogram_fraction(0, 2, var_res_histogram{schema="-1"})
+    {schema="-1"}  0.5
+
+eval instant at 1m histogram_fraction(0, 1.4142135623730951, var_res_histogram{schema="0"})
+    {schema="0"}  0.5
+
+eval instant at 1m histogram_fraction(0, 1.189207, var_res_histogram{schema="+1"})
+    {schema="+1"}  0.5
+
+# The same as above, but one bucket "further to the right".
+clear
+load 1m
+    var_res_histogram{schema="-1"} {{schema:-1 sum:6 count:5 buckets:[0 0 5]}}
+    var_res_histogram{schema="0"}  {{schema:0 sum:4 count:5 buckets:[0 0 5]}}
+    var_res_histogram{schema="+1"} {{schema:1 sum:4 count:5 buckets:[0 0 5]}}
+
+eval instant at 1m histogram_quantile(0.5, var_res_histogram)
+    {schema="-1"}  8.0
+    {schema="0"}   2.82842712474619
+    {schema="+1"}  1.6817928305074292
+
+eval instant at 1m histogram_fraction(0, 8, var_res_histogram{schema="-1"})
+    {schema="-1"}  0.5
+
+eval instant at 1m histogram_fraction(0, 2.82842712474619, var_res_histogram{schema="0"})
+    {schema="0"}  0.5
+
+eval instant at 1m histogram_fraction(0, 1.6817928305074292, var_res_histogram{schema="+1"})
+    {schema="+1"}  0.5
+
+# And everything again but for negative buckets.
+clear
+load 1m
+    var_res_histogram{schema="-1"} {{schema:-1 sum:6 count:5 n_buckets:[0 5]}}
+    var_res_histogram{schema="0"}  {{schema:0 sum:4 count:5 n_buckets:[0 5]}}
+    var_res_histogram{schema="+1"} {{schema:1 sum:4 count:5 n_buckets:[0 5]}}
+
+eval instant at 1m histogram_quantile(0.5, var_res_histogram)
+    {schema="-1"}  -2.0
+    {schema="0"}   -1.4142135623730951
+    {schema="+1"}  -1.189207
+
+eval instant at 1m histogram_fraction(-2, 0, var_res_histogram{schema="-1"})
+    {schema="-1"}  0.5
+
+eval instant at 1m histogram_fraction(-1.4142135623730951, 0, var_res_histogram{schema="0"})
+    {schema="0"}  0.5
+
+eval instant at 1m histogram_fraction(-1.189207, 0, var_res_histogram{schema="+1"})
+    {schema="+1"}  0.5
+
+clear
+load 1m
+    var_res_histogram{schema="-1"} {{schema:-1 sum:6 count:5 n_buckets:[0 0 5]}}
+    var_res_histogram{schema="0"}  {{schema:0 sum:4 count:5 n_buckets:[0 0 5]}}
+    var_res_histogram{schema="+1"} {{schema:1 sum:4 count:5 n_buckets:[0 0 5]}}
+
+eval instant at 1m histogram_quantile(0.5, var_res_histogram)
+    {schema="-1"}  -8.0
+    {schema="0"}   -2.82842712474619
+    {schema="+1"}  -1.6817928305074292
+
+eval instant at 1m histogram_fraction(-8, 0, var_res_histogram{schema="-1"})
+    {schema="-1"}  0.5
+
+eval instant at 1m histogram_fraction(-2.82842712474619, 0, var_res_histogram{schema="0"})
+    {schema="0"}  0.5
+
+eval instant at 1m histogram_fraction(-1.6817928305074292, 0, var_res_histogram{schema="+1"})
+    {schema="+1"}  0.5
+
+
 # Apply fraction function to empty histogram.
 load 10m
    histogram_fraction_1 {{}}x1
@ -515,11 +621,18 @@ eval instant at 10m histogram_fraction(-0.001, 0, histogram_fraction_2)
 eval instant at 10m histogram_fraction(0, 0.001, histogram_fraction_2)
    {} 0.16666666666666666

+# Note that this result and the one above add up to 1.
+eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_2)
+    {} 0.8333333333333334
+
+# We are in the zero bucket, resulting in linear interpolation
 eval instant at 10m histogram_fraction(0, 0.0005, histogram_fraction_2)
    {} 0.08333333333333333

-eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_2)
-    {} 0.8333333333333334
+# Demonstrate that the inverse operation with histogram_quantile yields
+# the original value with the non-trivial result above.
+eval instant at 10m histogram_quantile(0.08333333333333333, histogram_fraction_2)
+    {} 0.0005

 eval instant at 10m histogram_fraction(-inf, -0.001, histogram_fraction_2)
    {} 0
@ -527,17 +640,30 @@ eval instant at 10m histogram_fraction(-inf, -0.001, histogram_fraction_2)
 eval instant at 10m histogram_fraction(1, 2, histogram_fraction_2)
    {} 0.25

+# More non-trivial results with interpolation involved below, including
+# some round-trips via histogram_quantile to prove that the inverse
+# operation leads to the same results.
+
+eval instant at 10m histogram_fraction(0, 1.5, histogram_fraction_2)
+    {} 0.4795739585136224
+
 eval instant at 10m histogram_fraction(1.5, 2, histogram_fraction_2)
-    {} 0.125
+    {} 0.10375937481971091

 eval instant at 10m histogram_fraction(1, 8, histogram_fraction_2)
    {} 0.3333333333333333

+eval instant at 10m histogram_fraction(0, 6, histogram_fraction_2)
+    {} 0.6320802083934297
+
+eval instant at 10m histogram_quantile(0.6320802083934297, histogram_fraction_2)
+    {} 6
+
 eval instant at 10m histogram_fraction(1, 6, histogram_fraction_2)
-    {} 0.2916666666666667
+    {} 0.29874687506009634

 eval instant at 10m histogram_fraction(1.5, 6, histogram_fraction_2)
-    {} 0.16666666666666666
+    {} 0.15250624987980724

 eval instant at 10m histogram_fraction(-2, -1, histogram_fraction_2)
    {} 0
@ -600,6 +726,12 @@ eval instant at 10m histogram_fraction(0, 0.001, histogram_fraction_3)
 eval instant at 10m histogram_fraction(-0.0005, 0, histogram_fraction_3)
    {} 0.08333333333333333

+eval instant at 10m histogram_fraction(-inf, -0.0005, histogram_fraction_3)
+    {} 0.9166666666666666
+
+eval instant at 10m histogram_quantile(0.9166666666666666, histogram_fraction_3)
+    {} -0.0005
+
 eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_3)
    {} 0

@ -625,16 +757,22 @@ eval instant at 10m histogram_fraction(-2, -1, histogram_fraction_3)
    {} 0.25

 eval instant at 10m histogram_fraction(-2, -1.5, histogram_fraction_3)
-    {} 0.125
+    {} 0.10375937481971091

 eval instant at 10m histogram_fraction(-8, -1, histogram_fraction_3)
    {} 0.3333333333333333

+eval instant at 10m histogram_fraction(-inf, -6, histogram_fraction_3)
+    {} 0.36791979160657035
+
+eval instant at 10m histogram_quantile(0.36791979160657035, histogram_fraction_3)
+    {} -6
+
 eval instant at 10m histogram_fraction(-6, -1, histogram_fraction_3)
-    {} 0.2916666666666667
+    {} 0.29874687506009634

 eval instant at 10m histogram_fraction(-6, -1.5, histogram_fraction_3)
-    {} 0.16666666666666666
+    {} 0.15250624987980724

 eval instant at 10m histogram_fraction(42, 3.1415, histogram_fraction_3)
    {} 0
@ -684,6 +822,18 @@ eval instant at 10m histogram_fraction(0, 0.001, histogram_fraction_4)
 eval instant at 10m histogram_fraction(-0.0005, 0.0005, histogram_fraction_4)
    {} 0.08333333333333333

+eval instant at 10m histogram_fraction(-inf, 0.0005, histogram_fraction_4)
+    {} 0.5416666666666666
+
+eval instant at 10m histogram_quantile(0.5416666666666666, histogram_fraction_4)
+    {} 0.0005
+
+eval instant at 10m histogram_fraction(-inf, -0.0005, histogram_fraction_4)
+    {} 0.4583333333333333
+
+eval instant at 10m histogram_quantile(0.4583333333333333, histogram_fraction_4)
+    {} -0.0005
+
 eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_4)
    {} 0.4166666666666667

@ -694,31 +844,31 @@ eval instant at 10m histogram_fraction(1, 2, histogram_fraction_4)
    {} 0.125

 eval instant at 10m histogram_fraction(1.5, 2, histogram_fraction_4)
-    {} 0.0625
+    {} 0.051879687409855414

 eval instant at 10m histogram_fraction(1, 8, histogram_fraction_4)
    {} 0.16666666666666666

 eval instant at 10m histogram_fraction(1, 6, histogram_fraction_4)
-    {} 0.14583333333333334
+    {} 0.14937343753004825

 eval instant at 10m histogram_fraction(1.5, 6, histogram_fraction_4)
-    {} 0.08333333333333333
+    {} 0.07625312493990366

 eval instant at 10m histogram_fraction(-2, -1, histogram_fraction_4)
    {} 0.125

 eval instant at 10m histogram_fraction(-2, -1.5, histogram_fraction_4)
-    {} 0.0625
+    {} 0.051879687409855456

 eval instant at 10m histogram_fraction(-8, -1, histogram_fraction_4)
    {} 0.16666666666666666

 eval instant at 10m histogram_fraction(-6, -1, histogram_fraction_4)
-    {} 0.14583333333333334
+    {} 0.14937343753004817

 eval instant at 10m histogram_fraction(-6, -1.5, histogram_fraction_4)
-    {} 0.08333333333333333
+    {} 0.07625312493990362

 eval instant at 10m histogram_fraction(42, 3.1415, histogram_fraction_4)
    {} 0
--- a/promql/quantile.go
+++ b/promql/quantile.go
@ -153,19 +153,31 @@ func bucketQuantile(q float64, buckets buckets) (float64, bool, bool) {

 // histogramQuantile calculates the quantile 'q' based on the given histogram.
 //
-// The quantile value is interpolated assuming a linear distribution within a
-// bucket.
-// TODO(beorn7): Find an interpolation method that is a better fit for
-// exponential buckets (and think about configurable interpolation).
+// For custom buckets, the result is interpolated linearly, i.e. it is assumed
+// the observations are uniformly distributed within each bucket. (This is a
+// quite blunt assumption, but it is consistent with the interpolation method
+// used for classic histograms so far.)
+//
+// For exponential buckets, the interpolation is done under the assumption that
+// the samples within each bucket are distributed in a way that they would
+// uniformly populate the buckets in a hypothetical histogram with higher
+// resolution. For example, if the rank calculation suggests that the requested
+// quantile is right in the middle of the population of the (1,2] bucket, we
+// assume the quantile would be right at the bucket boundary between the two
+// buckets the (1,2] bucket would be divided into if the histogram had double
+// the resolution, which is 2**2**-1 = 1.4142... We call this exponential
+// interpolation.
+//
+// However, for a quantile that ends up in the zero bucket, this method isn't
+// very helpful (because there is an infinite number of buckets close to zero,
+// so we would have to assume zero as the result). Therefore, we return to
+// linear interpolation in the zero bucket.
 //
 // A natural lower bound of 0 is assumed if the histogram has only positive
 // buckets. Likewise, a natural upper bound of 0 is assumed if the histogram has
 // only negative buckets.
-// TODO(beorn7): Come to terms if we want that.
 //
-// There are a number of special cases (once we have a way to report errors
-// happening during evaluations of AST functions, we should report those
-// explicitly):
+// There are a number of special cases:
 //
 // If the histogram has 0 observations, NaN is returned.
 //
@ -193,9 +205,9 @@ func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 {
 		rank   float64
 	)

-	// if there are NaN observations in the histogram (h.Sum is NaN), use the forward iterator
-	// if the q < 0.5, use the forward iterator
-	// if the q >= 0.5, use the reverse iterator
+	// If there are NaN observations in the histogram (h.Sum is NaN), use the forward iterator.
+	// If q < 0.5, use the forward iterator.
+	// If q >= 0.5, use the reverse iterator.
 	if math.IsNaN(h.Sum) || q < 0.5 {
 		it = h.AllBucketIterator()
 		rank = q * h.Count
@ -260,8 +272,29 @@ func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 {
 		rank = count - rank
 	}

-	// TODO(codesome): Use a better estimation than linear.
-	return bucket.Lower + (bucket.Upper-bucket.Lower)*(rank/bucket.Count)
+	// The fraction of how far we are into the current bucket.
+	fraction := rank / bucket.Count
+
+	// Return linear interpolation for custom buckets and for quantiles that
+	// end up in the zero bucket.
+	if h.UsesCustomBuckets() || (bucket.Lower <= 0 && bucket.Upper >= 0) {
+		return bucket.Lower + (bucket.Upper-bucket.Lower)*fraction
+	}
+
+	// For exponential buckets, we interpolate on a logarithmic scale. On a
+	// logarithmic scale, the exponential bucket boundaries (for any schema)
+	// become linear (every bucket has the same width). Therefore, after
+	// taking the logarithm of both bucket boundaries, we can use the
+	// calculated fraction in the same way as for linear interpolation (see
+	// above). Finally, we return to the normal scale by applying the
+	// exponential function to the result.
+	logLower := math.Log2(math.Abs(bucket.Lower))
+	logUpper := math.Log2(math.Abs(bucket.Upper))
+	if bucket.Lower > 0 { // Positive bucket.
+		return math.Exp2(logLower + (logUpper-logLower)*fraction)
+	}
+	// Otherwise, we are in a negative bucket and have to mirror things.
+	return -math.Exp2(logUpper + (logLower-logUpper)*(1-fraction))
 }

 // histogramFraction calculates the fraction of observations between the
@ -271,8 +304,8 @@ func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 {
 // histogramQuantile(0.9, h) returns 123.4, then histogramFraction(-Inf, 123.4, h)
 // returns 0.9.
 //
-// The same notes (and TODOs) with regard to interpolation and assumptions about
-// the zero bucket boundaries apply as for histogramQuantile.
+// The same notes with regard to interpolation and assumptions about the zero
+// bucket boundaries apply as for histogramQuantile.
 //
 // Whether either boundary is inclusive or exclusive doesn’t actually matter as
 // long as interpolation has to be performed anyway. In the case of a boundary
@ -310,7 +343,35 @@ func histogramFraction(lower, upper float64, h *histogram.FloatHistogram) float6
 	)
 	for it.Next() {
 		b := it.At()
-		if b.Lower < 0 && b.Upper > 0 {
+		zeroBucket := false
+
+		// interpolateLinearly is used for custom buckets to be
+		// consistent with the linear interpolation known from classic
+		// histograms. It is also used for the zero bucket.
+		interpolateLinearly := func(v float64) float64 {
+			return rank + b.Count*(v-b.Lower)/(b.Upper-b.Lower)
+		}
+
+		// interpolateExponentially is using the same exponential
+		// interpolation method as above for histogramQuantile. This
+		// method is a better fit for exponential bucketing.
+		interpolateExponentially := func(v float64) float64 {
+			var (
+				logLower = math.Log2(math.Abs(b.Lower))
+				logUpper = math.Log2(math.Abs(b.Upper))
+				logV     = math.Log2(math.Abs(v))
+				fraction float64
+			)
+			if v > 0 {
+				fraction = (logV - logLower) / (logUpper - logLower)
+			} else {
+				fraction = 1 - ((logV - logUpper) / (logLower - logUpper))
+			}
+			return rank + b.Count*fraction
+		}
+
+		if b.Lower <= 0 && b.Upper >= 0 {
+			zeroBucket = true
 			switch {
 			case len(h.NegativeBuckets) == 0 && len(h.PositiveBuckets) > 0:
 				// This is the zero bucket and the histogram has only
@ -325,10 +386,12 @@ func histogramFraction(lower, upper float64, h *histogram.FloatHistogram) float6
 			}
 		}
 		if !lowerSet && b.Lower >= lower {
+			// We have hit the lower value at the lower bucket boundary.
 			lowerRank = rank
 			lowerSet = true
 		}
 		if !upperSet && b.Lower >= upper {
+			// We have hit the upper value at the lower bucket boundary.
 			upperRank = rank
 			upperSet = true
 		}
@ -336,11 +399,21 @@ func histogramFraction(lower, upper float64, h *histogram.FloatHistogram) float6
 			break
 		}
 		if !lowerSet && b.Lower < lower && b.Upper > lower {
-			lowerRank = rank + b.Count*(lower-b.Lower)/(b.Upper-b.Lower)
+			// The lower value is in this bucket.
+			if h.UsesCustomBuckets() || zeroBucket {
+				lowerRank = interpolateLinearly(lower)
+			} else {
+				lowerRank = interpolateExponentially(lower)
+			}
 			lowerSet = true
 		}
 		if !upperSet && b.Lower < upper && b.Upper > upper {
-			upperRank = rank + b.Count*(upper-b.Lower)/(b.Upper-b.Lower)
+			// The upper value is in this bucket.
+			if h.UsesCustomBuckets() || zeroBucket {
+				upperRank = interpolateLinearly(upper)
+			} else {
+				upperRank = interpolateExponentially(upper)
+			}
 			upperSet = true
 		}
 		if lowerSet && upperSet {