promql: Add histogram_count and histogram_sum

This follow a simple function-based approach to access the count and
sum fields of a native Histogram. It might be more elegant to
implement “accessors” via the dot operator, as considered in the
brainstorming doc [1]. However, that would require the introduction of
a whole new concept in PromQL. For the PoC, we should be fine with the
function-based approch. Even the obvious inefficiencies (rate'ing a
whole histogram twice when we only want to rate each the count and the
sum once) could be optimized behind the scenes.

Note that the function-based approach elegantly solves the problem of
detecting counter resets in the sum of observations in the case of
negative observations. (Since the whole native Histogram is rate'd,
the counter reset is detected for the Histogram as a whole.)

We will decide later if an “accessor” approach is really needed. It
would change the example expression for average duration in
functions.md from

      histogram_sum(rate(http_request_duration_seconds[10m]))
	/
      histogram_count(rate(http_request_duration_seconds[10m]))

to

      rate(http_request_duration_seconds.sum[10m])
	/
      rate(http_request_duration_seconds.count[10m])

[1]: https://docs.google.com/document/d/1ch6ru8GKg03N02jRjYriurt-CZqUVY09evPg6yKTA1s/edit

Signed-off-by: beorn7 <beorn@grafana.com>
This commit is contained in:
beorn7 2022-06-28 16:43:58 +02:00
parent f9f2e2855d
commit 9eafed0f79
8 changed files with 185 additions and 0 deletions

View file

@ -152,6 +152,27 @@ Special cases are:
`floor(v instant-vector)` rounds the sample values of all elements in `v` down
to the nearest integer.
## `histogram_count()` and `histogram_sum()`
`histogram_count(v instant-vector)` returns the count of observations stored in
a native Histogram. Samples that are not native Histograms are ignored and do
not show up in the returned vector.
Similarly, `histogram_sum(v instant-vector)` returns the sum of observations
stored in a native Histogram.
Use `histogram_count` in the following way to calculate a rate of observations
(in this case corresponding to “requests per second”) from a native Histogram:
histogram_count(rate(http_request_duration_seconds[10m]))
The additional use of `histogram_sum` enables the calculation of the average of
observed values (in this case corresponding to “average request duration”):
histogram_sum(rate(http_request_duration_seconds[10m]))
/
histogram_count(rate(http_request_duration_seconds[10m]))
## `histogram_fraction()`
TODO(beorn7): Add documentation.

View file

@ -3155,6 +3155,70 @@ func TestSparseHistogramRate(t *testing.T) {
require.Equal(t, expectedHistogram, actualHistogram)
}
func TestSparseHistogram_HistogramCountAndSum(t *testing.T) {
// TODO(codesome): Integrate histograms into the PromQL testing framework
// and write more tests there.
h := &histogram.Histogram{
Count: 24,
ZeroCount: 4,
ZeroThreshold: 0.001,
Sum: 100,
Schema: 0,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
PositiveBuckets: []int64{2, 1, -2, 3},
NegativeSpans: []histogram.Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
NegativeBuckets: []int64{2, 1, -2, 3},
}
test, err := NewTest(t, "")
require.NoError(t, err)
t.Cleanup(test.Close)
seriesName := "sparse_histogram_series"
lbls := labels.FromStrings("__name__", seriesName)
engine := test.QueryEngine()
ts := int64(10 * time.Minute / time.Millisecond)
app := test.Storage().Appender(context.TODO())
_, err = app.AppendHistogram(0, lbls, ts, h)
require.NoError(t, err)
require.NoError(t, app.Commit())
queryString := fmt.Sprintf("histogram_count(%s)", seriesName)
qry, err := engine.NewInstantQuery(test.Queryable(), nil, queryString, timestamp.Time(ts))
require.NoError(t, err)
res := qry.Exec(test.Context())
require.NoError(t, res.Err)
vector, err := res.Vector()
require.NoError(t, err)
require.Len(t, vector, 1)
require.Nil(t, vector[0].H)
require.Equal(t, float64(h.Count), vector[0].V)
queryString = fmt.Sprintf("histogram_sum(%s)", seriesName)
qry, err = engine.NewInstantQuery(test.Queryable(), nil, queryString, timestamp.Time(ts))
require.NoError(t, err)
res = qry.Exec(test.Context())
require.NoError(t, res.Err)
vector, err = res.Vector()
require.NoError(t, err)
require.Len(t, vector, 1)
require.Nil(t, vector[0].H)
require.Equal(t, h.Sum, vector[0].V)
}
func TestSparseHistogram_HistogramQuantile(t *testing.T) {
// TODO(codesome): Integrate histograms into the PromQL testing framework
// and write more tests there.

View file

@ -864,6 +864,40 @@ func funcPredictLinear(vals []parser.Value, args parser.Expressions, enh *EvalNo
})
}
// === histogram_count(Vector parser.ValueTypeVector) Vector ===
func funcHistogramCount(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector {
inVec := vals[0].(Vector)
for _, sample := range inVec {
// Skip non-histogram samples.
if sample.H == nil {
continue
}
enh.Out = append(enh.Out, Sample{
Metric: enh.DropMetricName(sample.Metric),
Point: Point{V: sample.H.Count},
})
}
return enh.Out
}
// === histogram_sum(Vector parser.ValueTypeVector) Vector ===
func funcHistogramSum(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector {
inVec := vals[0].(Vector)
for _, sample := range inVec {
// Skip non-histogram samples.
if sample.H == nil {
continue
}
enh.Out = append(enh.Out, Sample{
Metric: enh.DropMetricName(sample.Metric),
Point: Point{V: sample.H.Sum},
})
}
return enh.Out
}
// === histogram_fraction(lower, upper parser.ValueTypeScalar, Vector parser.ValueTypeVector) Vector ===
func funcHistogramFraction(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector {
lower := vals[0].(Vector)[0].V
@ -1224,8 +1258,10 @@ var FunctionCalls = map[string]FunctionCall{
"deriv": funcDeriv,
"exp": funcExp,
"floor": funcFloor,
"histogram_count": funcHistogramCount,
"histogram_fraction": funcHistogramFraction,
"histogram_quantile": funcHistogramQuantile,
"histogram_sum": funcHistogramSum,
"holt_winters": funcHoltWinters,
"hour": funcHour,
"idelta": funcIdelta,

View file

@ -163,6 +163,16 @@ var Functions = map[string]*Function{
ArgTypes: []ValueType{ValueTypeVector},
ReturnType: ValueTypeVector,
},
"histogram_count": {
Name: "histogram_count",
ArgTypes: []ValueType{ValueTypeVector},
ReturnType: ValueTypeVector,
},
"histogram_sum": {
Name: "histogram_sum",
ArgTypes: []ValueType{ValueTypeVector},
ReturnType: ValueTypeVector,
},
"histogram_fraction": {
Name: "histogram_fraction",
ArgTypes: []ValueType{ValueTypeScalar, ValueTypeScalar, ValueTypeVector},

View file

@ -215,6 +215,12 @@ export const functionIdentifierTerms = [
info: 'Round down values of input series to nearest integer',
type: 'function',
},
{
label: 'histogram_count',
detail: 'function',
info: 'Return the count of observations from a native histogram',
type: 'function',
},
{
label: 'histogram_fraction',
detail: 'function',
@ -227,6 +233,12 @@ export const functionIdentifierTerms = [
info: 'Calculate quantiles from native histograms and from legacy histogram buckets',
type: 'function',
},
{
label: 'histogram_sum',
detail: 'function',
info: 'Return the sum of observations from a native histogram',
type: 'function',
},
{
label: 'holt_winters',
detail: 'function',

View file

@ -728,6 +728,30 @@ describe('promql operations', () => {
expectedValueType: ValueType.vector,
expectedDiag: [],
},
{
expr:
'histogram_count( # Root of the query, final result, returns the count of observations.\n' +
' sum by(method, path) ( # Argument to histogram_count(), an aggregated histogram.\n' +
' rate( # Argument to sum(), the per-second increase of a histogram over 5m.\n' +
' demo_api_request_duration_seconds{job="demo"}[5m] # Argument to rate(), a vector of sparse histogram series over the last 5m.\n' +
' )\n' +
' )\n' +
')',
expectedValueType: ValueType.vector,
expectedDiag: [],
},
{
expr:
'histogram_sum( # Root of the query, final result, returns the sum of observations.\n' +
' sum by(method, path) ( # Argument to histogram_sum(), an aggregated histogram.\n' +
' rate( # Argument to sum(), the per-second increase of a histogram over 5m.\n' +
' demo_api_request_duration_seconds{job="demo"}[5m] # Argument to rate(), a vector of sparse histogram series over the last 5m.\n' +
' )\n' +
' )\n' +
')',
expectedValueType: ValueType.vector,
expectedDiag: [],
},
{
expr: '1 @ start()',
expectedValueType: ValueType.scalar,

View file

@ -39,8 +39,10 @@ import {
Deriv,
Exp,
Floor,
HistogramCount,
HistogramFraction,
HistogramQuantile,
HistogramSum,
HoltWinters,
Hour,
Idelta,
@ -262,6 +264,12 @@ const promqlFunctions: { [key: number]: PromQLFunction } = {
variadic: 0,
returnType: ValueType.vector,
},
[HistogramCount]: {
name: 'histogram_count',
argTypes: [ValueType.vector],
variadic: 0,
returnType: ValueType.vector,
},
[HistogramFraction]: {
name: 'histogram_fraction',
argTypes: [ValueType.scalar, ValueType.scalar, ValueType.vector],
@ -274,6 +282,12 @@ const promqlFunctions: { [key: number]: PromQLFunction } = {
variadic: 0,
returnType: ValueType.vector,
},
[HistogramSum]: {
name: 'histogram_sum',
argTypes: [ValueType.vector],
variadic: 0,
returnType: ValueType.vector,
},
[HoltWinters]: {
name: 'holt_winters',
argTypes: [ValueType.matrix, ValueType.scalar, ValueType.scalar],

View file

@ -146,8 +146,10 @@ FunctionIdentifier {
Deriv |
Exp |
Floor |
HistogramCount |
HistogramFraction |
HistogramQuantile |
HistogramSum |
HoltWinters |
Hour |
Idelta |
@ -388,8 +390,10 @@ NumberLiteral {
Deriv { condFn<"deriv"> }
Exp { condFn<"exp"> }
Floor { condFn<"floor"> }
HistogramCount { condFn<"histogram_count"> }
HistogramFraction { condFn<"histogram_fraction"> }
HistogramQuantile { condFn<"histogram_quantile"> }
HistogramSum { condFn<"histogram_sum"> }
HoltWinters { condFn<"holt_winters"> }
Hour { condFn<"hour"> }
Idelta { condFn<"idelta"> }