Merge pull request #10886 from prometheus/beorn7/sparsehistogram

Add histogram_fraction function
This commit is contained in:
Björn Rabenstein 2022-06-28 16:34:13 +02:00 committed by GitHub
commit f9f2e2855d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 602 additions and 10 deletions

View file

@ -152,8 +152,14 @@ Special cases are:
`floor(v instant-vector)` rounds the sample values of all elements in `v` down
to the nearest integer.
## `histogram_fraction()`
TODO(beorn7): Add documentation.
## `histogram_quantile()`
TODO(beorn7): This needs a lot of updates for Histograms as sample value types.
`histogram_quantile(φ scalar, b instant-vector)` calculates the φ-quantile (0 ≤ φ
≤ 1) from the buckets `b` of a
[histogram](https://prometheus.io/docs/concepts/metric_types/#histogram). (See

View file

@ -3244,15 +3244,15 @@ func TestSparseHistogram_HistogramQuantile(t *testing.T) {
},
{ // Zero bucket.
quantile: "1",
value: 0.001,
value: 0,
},
{ // Zero bucket.
quantile: "0.99",
value: 0.0008799999999999991,
value: -6.000000000000048e-05,
},
{ // Zero bucket.
quantile: "0.9",
value: -0.00019999999999999933,
value: -0.0005999999999999996,
},
{
quantile: "0.5",
@ -3394,6 +3394,434 @@ func TestSparseHistogram_HistogramQuantile(t *testing.T) {
}
}
func TestSparseHistogram_HistogramFraction(t *testing.T) {
// TODO(codesome): Integrate histograms into the PromQL testing framework
// and write more tests there.
type subCase struct {
lower, upper string
value float64
}
invariantCases := []subCase{
{
lower: "42",
upper: "3.1415",
value: 0,
},
{
lower: "0",
upper: "0",
value: 0,
},
{
lower: "0.000001",
upper: "0.000001",
value: 0,
},
{
lower: "42",
upper: "42",
value: 0,
},
{
lower: "-3.1",
upper: "-3.1",
value: 0,
},
{
lower: "3.1415",
upper: "NaN",
value: math.NaN(),
},
{
lower: "NaN",
upper: "42",
value: math.NaN(),
},
{
lower: "NaN",
upper: "NaN",
value: math.NaN(),
},
{
lower: "-Inf",
upper: "+Inf",
value: 1,
},
}
cases := []struct {
text string
// Histogram to test.
h *histogram.Histogram
// Different ranges to test for this histogram.
subCases []subCase
}{
{
text: "empty histogram",
h: &histogram.Histogram{},
subCases: []subCase{
{
lower: "3.1415",
upper: "42",
value: math.NaN(),
},
},
},
{
text: "all positive buckets with zero bucket",
h: &histogram.Histogram{
Count: 12,
ZeroCount: 2,
ZeroThreshold: 0.001,
Sum: 100, // Does not matter.
Schema: 0,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
PositiveBuckets: []int64{2, 1, -2, 3}, // Abs: 2, 3, 1, 4
},
subCases: append([]subCase{
{
lower: "0",
upper: "+Inf",
value: 1,
},
{
lower: "-Inf",
upper: "0",
value: 0,
},
{
lower: "-0.001",
upper: "0",
value: 0,
},
{
lower: "0",
upper: "0.001",
value: 2. / 12.,
},
{
lower: "0",
upper: "0.0005",
value: 1. / 12.,
},
{
lower: "0.001",
upper: "inf",
value: 10. / 12.,
},
{
lower: "-inf",
upper: "-0.001",
value: 0,
},
{
lower: "1",
upper: "2",
value: 3. / 12.,
},
{
lower: "1.5",
upper: "2",
value: 1.5 / 12.,
},
{
lower: "1",
upper: "8",
value: 4. / 12.,
},
{
lower: "1",
upper: "6",
value: 3.5 / 12.,
},
{
lower: "1.5",
upper: "6",
value: 2. / 12.,
},
{
lower: "-2",
upper: "-1",
value: 0,
},
{
lower: "-2",
upper: "-1.5",
value: 0,
},
{
lower: "-8",
upper: "-1",
value: 0,
},
{
lower: "-6",
upper: "-1",
value: 0,
},
{
lower: "-6",
upper: "-1.5",
value: 0,
},
}, invariantCases...),
},
{
text: "all negative buckets with zero bucket",
h: &histogram.Histogram{
Count: 12,
ZeroCount: 2,
ZeroThreshold: 0.001,
Sum: 100, // Does not matter.
Schema: 0,
NegativeSpans: []histogram.Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
NegativeBuckets: []int64{2, 1, -2, 3},
},
subCases: append([]subCase{
{
lower: "0",
upper: "+Inf",
value: 0,
},
{
lower: "-Inf",
upper: "0",
value: 1,
},
{
lower: "-0.001",
upper: "0",
value: 2. / 12.,
},
{
lower: "0",
upper: "0.001",
value: 0,
},
{
lower: "-0.0005",
upper: "0",
value: 1. / 12.,
},
{
lower: "0.001",
upper: "inf",
value: 0,
},
{
lower: "-inf",
upper: "-0.001",
value: 10. / 12.,
},
{
lower: "1",
upper: "2",
value: 0,
},
{
lower: "1.5",
upper: "2",
value: 0,
},
{
lower: "1",
upper: "8",
value: 0,
},
{
lower: "1",
upper: "6",
value: 0,
},
{
lower: "1.5",
upper: "6",
value: 0,
},
{
lower: "-2",
upper: "-1",
value: 3. / 12.,
},
{
lower: "-2",
upper: "-1.5",
value: 1.5 / 12.,
},
{
lower: "-8",
upper: "-1",
value: 4. / 12.,
},
{
lower: "-6",
upper: "-1",
value: 3.5 / 12.,
},
{
lower: "-6",
upper: "-1.5",
value: 2. / 12.,
},
}, invariantCases...),
},
{
text: "both positive and negative buckets with zero bucket",
h: &histogram.Histogram{
Count: 24,
ZeroCount: 4,
ZeroThreshold: 0.001,
Sum: 100, // Does not matter.
Schema: 0,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
PositiveBuckets: []int64{2, 1, -2, 3},
NegativeSpans: []histogram.Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
NegativeBuckets: []int64{2, 1, -2, 3},
},
subCases: append([]subCase{
{
lower: "0",
upper: "+Inf",
value: 0.5,
},
{
lower: "-Inf",
upper: "0",
value: 0.5,
},
{
lower: "-0.001",
upper: "0",
value: 2. / 24,
},
{
lower: "0",
upper: "0.001",
value: 2. / 24.,
},
{
lower: "-0.0005",
upper: "0.0005",
value: 2. / 24.,
},
{
lower: "0.001",
upper: "inf",
value: 10. / 24.,
},
{
lower: "-inf",
upper: "-0.001",
value: 10. / 24.,
},
{
lower: "1",
upper: "2",
value: 3. / 24.,
},
{
lower: "1.5",
upper: "2",
value: 1.5 / 24.,
},
{
lower: "1",
upper: "8",
value: 4. / 24.,
},
{
lower: "1",
upper: "6",
value: 3.5 / 24.,
},
{
lower: "1.5",
upper: "6",
value: 2. / 24.,
},
{
lower: "-2",
upper: "-1",
value: 3. / 24.,
},
{
lower: "-2",
upper: "-1.5",
value: 1.5 / 24.,
},
{
lower: "-8",
upper: "-1",
value: 4. / 24.,
},
{
lower: "-6",
upper: "-1",
value: 3.5 / 24.,
},
{
lower: "-6",
upper: "-1.5",
value: 2. / 24.,
},
}, invariantCases...),
},
}
for i, c := range cases {
t.Run(c.text, func(t *testing.T) {
test, err := NewTest(t, "")
require.NoError(t, err)
t.Cleanup(test.Close)
seriesName := "sparse_histogram_series"
lbls := labels.FromStrings("__name__", seriesName)
engine := test.QueryEngine()
ts := int64(i+1) * int64(10*time.Minute/time.Millisecond)
app := test.Storage().Appender(context.TODO())
_, err = app.AppendHistogram(0, lbls, ts, c.h)
require.NoError(t, err)
require.NoError(t, app.Commit())
for j, sc := range c.subCases {
t.Run(fmt.Sprintf("%d %s %s", j, sc.lower, sc.upper), func(t *testing.T) {
queryString := fmt.Sprintf("histogram_fraction(%s, %s, %s)", sc.lower, sc.upper, seriesName)
qry, err := engine.NewInstantQuery(test.Queryable(), nil, queryString, timestamp.Time(ts))
require.NoError(t, err)
res := qry.Exec(test.Context())
require.NoError(t, res.Err)
vector, err := res.Vector()
require.NoError(t, err)
require.Len(t, vector, 1)
require.Nil(t, vector[0].H)
if math.IsNaN(sc.value) {
require.True(t, math.IsNaN(vector[0].V))
return
}
require.Equal(t, sc.value, vector[0].V)
})
}
})
}
}
func TestSparseHistogram_Sum_AddOperator(t *testing.T) {
// TODO(codesome): Integrate histograms into the PromQL testing framework
// and write more tests there.

View file

@ -864,6 +864,25 @@ func funcPredictLinear(vals []parser.Value, args parser.Expressions, enh *EvalNo
})
}
// === histogram_fraction(lower, upper parser.ValueTypeScalar, Vector parser.ValueTypeVector) Vector ===
func funcHistogramFraction(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector {
lower := vals[0].(Vector)[0].V
upper := vals[1].(Vector)[0].V
inVec := vals[2].(Vector)
for _, sample := range inVec {
// Skip non-histogram samples.
if sample.H == nil {
continue
}
enh.Out = append(enh.Out, Sample{
Metric: enh.DropMetricName(sample.Metric),
Point: Point{V: histogramFraction(lower, upper, sample.H)},
})
}
return enh.Out
}
// === histogram_quantile(k parser.ValueTypeScalar, Vector parser.ValueTypeVector) Vector ===
func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector {
q := vals[0].(Vector)[0].V
@ -1205,6 +1224,7 @@ var FunctionCalls = map[string]FunctionCall{
"deriv": funcDeriv,
"exp": funcExp,
"floor": funcFloor,
"histogram_fraction": funcHistogramFraction,
"histogram_quantile": funcHistogramQuantile,
"holt_winters": funcHoltWinters,
"hour": funcHour,

View file

@ -163,6 +163,11 @@ var Functions = map[string]*Function{
ArgTypes: []ValueType{ValueTypeVector},
ReturnType: ValueTypeVector,
},
"histogram_fraction": {
Name: "histogram_fraction",
ArgTypes: []ValueType{ValueTypeScalar, ValueTypeScalar, ValueTypeVector},
ReturnType: ValueTypeVector,
},
"histogram_quantile": {
Name: "histogram_quantile",
ArgTypes: []ValueType{ValueTypeScalar, ValueTypeVector},

View file

@ -127,7 +127,10 @@ func bucketQuantile(q float64, buckets buckets) float64 {
// TODO(beorn7): Find an interpolation method that is a better fit for
// exponential buckets (and think about configurable interpolation).
//
// A natural lower bound of 0 is assumed if the histogram has no negative buckets.
// A natural lower bound of 0 is assumed if the histogram has only positive
// buckets. Likewise, a natural upper bound of 0 is assumed if the histogram has
// only negative buckets.
// TODO(beorn7): Come to terms if we want that.
//
// There are a number of special cases (once we have a way to report errors
// happening during evaluations of AST functions, we should report those
@ -138,6 +141,8 @@ func bucketQuantile(q float64, buckets buckets) float64 {
// If q<0, -Inf is returned.
//
// If q>1, +Inf is returned.
//
// If q is NaN, NaN is returned.
func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 {
if q < 0 {
return math.Inf(-1)
@ -146,7 +151,7 @@ func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 {
return math.Inf(+1)
}
if h.Count == 0 {
if h.Count == 0 || math.IsNaN(q) {
return math.NaN()
}
@ -163,10 +168,16 @@ func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 {
break
}
}
if bucket.Lower < 0 && bucket.Upper > 0 && len(h.NegativeBuckets) == 0 {
// The result is in the zero bucket and the histogram has no
// negative buckets. So we consider 0 to be the lower bound.
bucket.Lower = 0
if bucket.Lower < 0 && bucket.Upper > 0 {
if len(h.NegativeBuckets) == 0 && len(h.PositiveBuckets) > 0 {
// The result is in the zero bucket and the histogram has only
// positive buckets. So we consider 0 to be the lower bound.
bucket.Lower = 0
} else if len(h.PositiveBuckets) == 0 && len(h.NegativeBuckets) > 0 {
// The result is in the zero bucket and the histogram has only
// negative buckets. So we consider 0 to be the upper bound.
bucket.Upper = 0
}
}
// Due to numerical inaccuracies, we could end up with a higher count
// than h.Count. Thus, make sure count is never higher than h.Count.
@ -186,6 +197,99 @@ func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 {
return bucket.Lower + (bucket.Upper-bucket.Lower)*(rank/bucket.Count)
}
// histogramFraction calculates the fraction of observations between the
// provided lower and upper bounds, based on the provided histogram.
//
// histogramFraction is in a certain way the inverse of histogramQuantile. If
// histogramQuantile(0.9, h) returns 123.4, then histogramFraction(-Inf, 123.4, h)
// returns 0.9.
//
// The same notes (and TODOs) with regard to interpolation and assumptions about
// the zero bucket boundaries apply as for histogramQuantile.
//
// Whether either boundary is inclusive or exclusive doesnt actually matter as
// long as interpolation has to be performed anyway. In the case of a boundary
// coinciding with a bucket boundary, the inclusive or exclusive nature of the
// boundary determines the exact behavior of the threshold. With the current
// implementation, that means that lower is exclusive for positive values and
// inclusive for negative values, while upper is inclusive for positive values
// and exclusive for negative values.
//
// Special cases:
//
// If the histogram has 0 observations, NaN is returned.
//
// Use a lower bound of -Inf to get the fraction of all observations below the
// upper bound.
//
// Use an upper bound of +Inf to get the fraction of all observations above the
// lower bound.
//
// If lower or upper is NaN, NaN is returned.
//
// If lower >= upper and the histogram has at least 1 observation, zero is returned.
func histogramFraction(lower, upper float64, h *histogram.FloatHistogram) float64 {
if h.Count == 0 || math.IsNaN(lower) || math.IsNaN(upper) {
return math.NaN()
}
if lower >= upper {
return 0
}
var (
rank, lowerRank, upperRank float64
lowerSet, upperSet bool
it = h.AllBucketIterator()
)
for it.Next() {
b := it.At()
if b.Lower < 0 && b.Upper > 0 {
if len(h.NegativeBuckets) == 0 && len(h.PositiveBuckets) > 0 {
// This is the zero bucket and the histogram has only
// positive buckets. So we consider 0 to be the lower
// bound.
b.Lower = 0
} else if len(h.PositiveBuckets) == 0 && len(h.NegativeBuckets) > 0 {
// This is in the zero bucket and the histogram has only
// negative buckets. So we consider 0 to be the upper
// bound.
b.Upper = 0
}
}
if !lowerSet && b.Lower >= lower {
lowerRank = rank
lowerSet = true
}
if !upperSet && b.Lower >= upper {
upperRank = rank
upperSet = true
}
if lowerSet && upperSet {
break
}
if !lowerSet && b.Lower < lower && b.Upper > lower {
lowerRank = rank + b.Count*(lower-b.Lower)/(b.Upper-b.Lower)
lowerSet = true
}
if !upperSet && b.Lower < upper && b.Upper > upper {
upperRank = rank + b.Count*(upper-b.Lower)/(b.Upper-b.Lower)
upperSet = true
}
if lowerSet && upperSet {
break
}
rank += b.Count
}
if !lowerSet || lowerRank > h.Count {
lowerRank = h.Count
}
if !upperSet || upperRank > h.Count {
upperRank = h.Count
}
return (upperRank - lowerRank) / h.Count
}
// coalesceBuckets merges buckets with the same upper bound.
//
// The input buckets must be sorted.

View file

@ -215,10 +215,16 @@ export const functionIdentifierTerms = [
info: 'Round down values of input series to nearest integer',
type: 'function',
},
{
label: 'histogram_fraction',
detail: 'function',
info: 'Calculate fractions of observations within an interval from a native histogram',
type: 'function',
},
{
label: 'histogram_quantile',
detail: 'function',
info: 'Calculate quantiles from histogram buckets',
info: 'Calculate quantiles from native histograms and from legacy histogram buckets',
type: 'function',
},
{

View file

@ -714,6 +714,20 @@ describe('promql operations', () => {
expectedValueType: ValueType.vector,
expectedDiag: [],
},
{
expr:
'histogram_fraction( # Root of the query, final result, approximates a fraction of observations within an interval.\n' +
' -Inf, # 1st argument to histogram_fraction(), start of the interval.\n' +
' 123.4, # 2nd argument to histogram_fraction(), end of the interval.\n' +
' sum by(method, path) ( # 3rd argument to histogram_fraction(), an aggregated histogram.\n' +
' rate( # Argument to sum(), the per-second increase of a histogram over 5m.\n' +
' demo_api_request_duration_seconds{job="demo"}[5m] # Argument to rate(), a vector of sparse histogram series over the last 5m.\n' +
' )\n' +
' )\n' +
')',
expectedValueType: ValueType.vector,
expectedDiag: [],
},
{
expr: '1 @ start()',
expectedValueType: ValueType.scalar,

View file

@ -39,6 +39,7 @@ import {
Deriv,
Exp,
Floor,
HistogramFraction,
HistogramQuantile,
HoltWinters,
Hour,
@ -261,6 +262,12 @@ const promqlFunctions: { [key: number]: PromQLFunction } = {
variadic: 0,
returnType: ValueType.vector,
},
[HistogramFraction]: {
name: 'histogram_fraction',
argTypes: [ValueType.scalar, ValueType.scalar, ValueType.vector],
variadic: 0,
returnType: ValueType.vector,
},
[HistogramQuantile]: {
name: 'histogram_quantile',
argTypes: [ValueType.scalar, ValueType.vector],

View file

@ -146,6 +146,7 @@ FunctionIdentifier {
Deriv |
Exp |
Floor |
HistogramFraction |
HistogramQuantile |
HoltWinters |
Hour |
@ -387,6 +388,7 @@ NumberLiteral {
Deriv { condFn<"deriv"> }
Exp { condFn<"exp"> }
Floor { condFn<"floor"> }
HistogramFraction { condFn<"histogram_fraction"> }
HistogramQuantile { condFn<"histogram_quantile"> }
HoltWinters { condFn<"holt_winters"> }
Hour { condFn<"hour"> }