promql: Add histogram_fraction function

Signed-off-by: beorn7 <beorn@grafana.com>
2025-03-05 20:59:13 -08:00 · 2022-06-16 20:44:12 +02:00 · 2022-06-16 20:44:12 +02:00 · a3a8f58bb3
parent ffaabea91a
commit a3a8f58bb3
9 changed files with 585 additions and 2 deletions
--- a/docs/querying/functions.md
+++ b/docs/querying/functions.md
@ -152,8 +152,14 @@ Special cases are:
 `floor(v instant-vector)` rounds the sample values of all elements in `v` down
 to the nearest integer.

+## `histogram_fraction()`
+
+TODO(beorn7): Add documentation.
+
 ## `histogram_quantile()`

+TODO(beorn7): This needs a lot of updates for Histograms as sample value types.
+
 `histogram_quantile(φ scalar, b instant-vector)` calculates the φ-quantile (0 ≤ φ
 ≤ 1) from the buckets `b` of a
 [histogram](https://prometheus.io/docs/concepts/metric_types/#histogram). (See
--- a/promql/engine_test.go
+++ b/promql/engine_test.go
@ -3394,6 +3394,434 @@ func TestSparseHistogram_HistogramQuantile(t *testing.T) {
 	}
 }

+func TestSparseHistogram_HistogramFraction(t *testing.T) {
+	// TODO(codesome): Integrate histograms into the PromQL testing framework
+	// and write more tests there.
+	type subCase struct {
+		lower, upper string
+		value        float64
+	}
+
+	invariantCases := []subCase{
+		{
+			lower: "42",
+			upper: "3.1415",
+			value: 0,
+		},
+		{
+			lower: "0",
+			upper: "0",
+			value: 0,
+		},
+		{
+			lower: "0.000001",
+			upper: "0.000001",
+			value: 0,
+		},
+		{
+			lower: "42",
+			upper: "42",
+			value: 0,
+		},
+		{
+			lower: "-3.1",
+			upper: "-3.1",
+			value: 0,
+		},
+		{
+			lower: "3.1415",
+			upper: "NaN",
+			value: math.NaN(),
+		},
+		{
+			lower: "NaN",
+			upper: "42",
+			value: math.NaN(),
+		},
+		{
+			lower: "NaN",
+			upper: "NaN",
+			value: math.NaN(),
+		},
+		{
+			lower: "-Inf",
+			upper: "+Inf",
+			value: 1,
+		},
+	}
+
+	cases := []struct {
+		text string
+		// Histogram to test.
+		h *histogram.Histogram
+		// Different ranges to test for this histogram.
+		subCases []subCase
+	}{
+		{
+			text: "empty histogram",
+			h:    &histogram.Histogram{},
+			subCases: []subCase{
+				{
+					lower: "3.1415",
+					upper: "42",
+					value: math.NaN(),
+				},
+			},
+		},
+		{
+			text: "all positive buckets with zero bucket",
+			h: &histogram.Histogram{
+				Count:         12,
+				ZeroCount:     2,
+				ZeroThreshold: 0.001,
+				Sum:           100, // Does not matter.
+				Schema:        0,
+				PositiveSpans: []histogram.Span{
+					{Offset: 0, Length: 2},
+					{Offset: 1, Length: 2},
+				},
+				PositiveBuckets: []int64{2, 1, -2, 3}, // Abs: 2, 3, 1, 4
+			},
+			subCases: append([]subCase{
+				{
+					lower: "0",
+					upper: "+Inf",
+					value: 1,
+				},
+				{
+					lower: "-Inf",
+					upper: "0",
+					value: 0,
+				},
+				{
+					lower: "-0.001",
+					upper: "0",
+					value: 0,
+				},
+				{
+					lower: "0",
+					upper: "0.001",
+					value: 2. / 12.,
+				},
+				{
+					lower: "0",
+					upper: "0.0005",
+					value: 1. / 12.,
+				},
+				{
+					lower: "0.001",
+					upper: "inf",
+					value: 10. / 12.,
+				},
+				{
+					lower: "-inf",
+					upper: "-0.001",
+					value: 0,
+				},
+				{
+					lower: "1",
+					upper: "2",
+					value: 3. / 12.,
+				},
+				{
+					lower: "1.5",
+					upper: "2",
+					value: 1.5 / 12.,
+				},
+				{
+					lower: "1",
+					upper: "8",
+					value: 4. / 12.,
+				},
+				{
+					lower: "1",
+					upper: "6",
+					value: 3.5 / 12.,
+				},
+				{
+					lower: "1.5",
+					upper: "6",
+					value: 2. / 12.,
+				},
+				{
+					lower: "-2",
+					upper: "-1",
+					value: 0,
+				},
+				{
+					lower: "-2",
+					upper: "-1.5",
+					value: 0,
+				},
+				{
+					lower: "-8",
+					upper: "-1",
+					value: 0,
+				},
+				{
+					lower: "-6",
+					upper: "-1",
+					value: 0,
+				},
+				{
+					lower: "-6",
+					upper: "-1.5",
+					value: 0,
+				},
+			}, invariantCases...),
+		},
+		{
+			text: "all negative buckets with zero bucket",
+			h: &histogram.Histogram{
+				Count:         12,
+				ZeroCount:     2,
+				ZeroThreshold: 0.001,
+				Sum:           100, // Does not matter.
+				Schema:        0,
+				NegativeSpans: []histogram.Span{
+					{Offset: 0, Length: 2},
+					{Offset: 1, Length: 2},
+				},
+				NegativeBuckets: []int64{2, 1, -2, 3},
+			},
+			subCases: append([]subCase{
+				{
+					lower: "0",
+					upper: "+Inf",
+					value: 0,
+				},
+				{
+					lower: "-Inf",
+					upper: "0",
+					value: 1,
+				},
+				{
+					lower: "-0.001",
+					upper: "0",
+					value: 2. / 12.,
+				},
+				{
+					lower: "0",
+					upper: "0.001",
+					value: 0,
+				},
+				{
+					lower: "-0.0005",
+					upper: "0",
+					value: 1. / 12.,
+				},
+				{
+					lower: "0.001",
+					upper: "inf",
+					value: 0,
+				},
+				{
+					lower: "-inf",
+					upper: "-0.001",
+					value: 10. / 12.,
+				},
+				{
+					lower: "1",
+					upper: "2",
+					value: 0,
+				},
+				{
+					lower: "1.5",
+					upper: "2",
+					value: 0,
+				},
+				{
+					lower: "1",
+					upper: "8",
+					value: 0,
+				},
+				{
+					lower: "1",
+					upper: "6",
+					value: 0,
+				},
+				{
+					lower: "1.5",
+					upper: "6",
+					value: 0,
+				},
+				{
+					lower: "-2",
+					upper: "-1",
+					value: 3. / 12.,
+				},
+				{
+					lower: "-2",
+					upper: "-1.5",
+					value: 1.5 / 12.,
+				},
+				{
+					lower: "-8",
+					upper: "-1",
+					value: 4. / 12.,
+				},
+				{
+					lower: "-6",
+					upper: "-1",
+					value: 3.5 / 12.,
+				},
+				{
+					lower: "-6",
+					upper: "-1.5",
+					value: 2. / 12.,
+				},
+			}, invariantCases...),
+		},
+		{
+			text: "both positive and negative buckets with zero bucket",
+			h: &histogram.Histogram{
+				Count:         24,
+				ZeroCount:     4,
+				ZeroThreshold: 0.001,
+				Sum:           100, // Does not matter.
+				Schema:        0,
+				PositiveSpans: []histogram.Span{
+					{Offset: 0, Length: 2},
+					{Offset: 1, Length: 2},
+				},
+				PositiveBuckets: []int64{2, 1, -2, 3},
+				NegativeSpans: []histogram.Span{
+					{Offset: 0, Length: 2},
+					{Offset: 1, Length: 2},
+				},
+				NegativeBuckets: []int64{2, 1, -2, 3},
+			},
+			subCases: append([]subCase{
+				{
+					lower: "0",
+					upper: "+Inf",
+					value: 0.5,
+				},
+				{
+					lower: "-Inf",
+					upper: "0",
+					value: 0.5,
+				},
+				{
+					lower: "-0.001",
+					upper: "0",
+					value: 2. / 24,
+				},
+				{
+					lower: "0",
+					upper: "0.001",
+					value: 2. / 24.,
+				},
+				{
+					lower: "-0.0005",
+					upper: "0.0005",
+					value: 2. / 24.,
+				},
+				{
+					lower: "0.001",
+					upper: "inf",
+					value: 10. / 24.,
+				},
+				{
+					lower: "-inf",
+					upper: "-0.001",
+					value: 10. / 24.,
+				},
+				{
+					lower: "1",
+					upper: "2",
+					value: 3. / 24.,
+				},
+				{
+					lower: "1.5",
+					upper: "2",
+					value: 1.5 / 24.,
+				},
+				{
+					lower: "1",
+					upper: "8",
+					value: 4. / 24.,
+				},
+				{
+					lower: "1",
+					upper: "6",
+					value: 3.5 / 24.,
+				},
+				{
+					lower: "1.5",
+					upper: "6",
+					value: 2. / 24.,
+				},
+				{
+					lower: "-2",
+					upper: "-1",
+					value: 3. / 24.,
+				},
+				{
+					lower: "-2",
+					upper: "-1.5",
+					value: 1.5 / 24.,
+				},
+				{
+					lower: "-8",
+					upper: "-1",
+					value: 4. / 24.,
+				},
+				{
+					lower: "-6",
+					upper: "-1",
+					value: 3.5 / 24.,
+				},
+				{
+					lower: "-6",
+					upper: "-1.5",
+					value: 2. / 24.,
+				},
+			}, invariantCases...),
+		},
+	}
+
+	for i, c := range cases {
+		t.Run(c.text, func(t *testing.T) {
+			test, err := NewTest(t, "")
+			require.NoError(t, err)
+			t.Cleanup(test.Close)
+
+			seriesName := "sparse_histogram_series"
+			lbls := labels.FromStrings("__name__", seriesName)
+			engine := test.QueryEngine()
+
+			ts := int64(i+1) * int64(10*time.Minute/time.Millisecond)
+			app := test.Storage().Appender(context.TODO())
+			_, err = app.AppendHistogram(0, lbls, ts, c.h)
+			require.NoError(t, err)
+			require.NoError(t, app.Commit())
+
+			for j, sc := range c.subCases {
+				t.Run(fmt.Sprintf("%d %s %s", j, sc.lower, sc.upper), func(t *testing.T) {
+					queryString := fmt.Sprintf("histogram_fraction(%s, %s, %s)", sc.lower, sc.upper, seriesName)
+					qry, err := engine.NewInstantQuery(test.Queryable(), nil, queryString, timestamp.Time(ts))
+					require.NoError(t, err)
+
+					res := qry.Exec(test.Context())
+					require.NoError(t, res.Err)
+
+					vector, err := res.Vector()
+					require.NoError(t, err)
+
+					require.Len(t, vector, 1)
+					require.Nil(t, vector[0].H)
+					if math.IsNaN(sc.value) {
+						require.True(t, math.IsNaN(vector[0].V))
+						return
+					}
+					require.Equal(t, sc.value, vector[0].V)
+				})
+			}
+		})
+	}
+}
+
 func TestSparseHistogram_Sum_AddOperator(t *testing.T) {
 	// TODO(codesome): Integrate histograms into the PromQL testing framework
 	// and write more tests there.
--- a/promql/functions.go
+++ b/promql/functions.go
@ -864,6 +864,25 @@ func funcPredictLinear(vals []parser.Value, args parser.Expressions, enh *EvalNo
 	})
 }

+// === histogram_fraction(lower, upper parser.ValueTypeScalar, Vector parser.ValueTypeVector) Vector ===
+func funcHistogramFraction(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector {
+	lower := vals[0].(Vector)[0].V
+	upper := vals[1].(Vector)[0].V
+	inVec := vals[2].(Vector)
+
+	for _, sample := range inVec {
+		// Skip non-histogram samples.
+		if sample.H == nil {
+			continue
+		}
+		enh.Out = append(enh.Out, Sample{
+			Metric: enh.DropMetricName(sample.Metric),
+			Point:  Point{V: histogramFraction(lower, upper, sample.H)},
+		})
+	}
+	return enh.Out
+}
+
 // === histogram_quantile(k parser.ValueTypeScalar, Vector parser.ValueTypeVector) Vector ===
 func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector {
 	q := vals[0].(Vector)[0].V
@ -1205,6 +1224,7 @@ var FunctionCalls = map[string]FunctionCall{
 	"deriv":              funcDeriv,
 	"exp":                funcExp,
 	"floor":              funcFloor,
+	"histogram_fraction": funcHistogramFraction,
 	"histogram_quantile": funcHistogramQuantile,
 	"holt_winters":       funcHoltWinters,
 	"hour":               funcHour,
--- a/promql/parser/functions.go
+++ b/promql/parser/functions.go
@ -163,6 +163,11 @@ var Functions = map[string]*Function{
 		ArgTypes:   []ValueType{ValueTypeVector},
 		ReturnType: ValueTypeVector,
 	},
+	"histogram_fraction": {
+		Name:       "histogram_fraction",
+		ArgTypes:   []ValueType{ValueTypeScalar, ValueTypeScalar, ValueTypeVector},
+		ReturnType: ValueTypeVector,
+	},
 	"histogram_quantile": {
 		Name:       "histogram_quantile",
 		ArgTypes:   []ValueType{ValueTypeScalar, ValueTypeVector},
--- a/promql/quantile.go
+++ b/promql/quantile.go
@ -141,6 +141,8 @@ func bucketQuantile(q float64, buckets buckets) float64 {
 // If q<0, -Inf is returned.
 //
 // If q>1, +Inf is returned.
+//
+// If q is NaN, NaN is returned.
 func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 {
 	if q < 0 {
 		return math.Inf(-1)
@ -149,7 +151,7 @@ func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 {
 		return math.Inf(+1)
 	}

-	if h.Count == 0 {
+	if h.Count == 0 || math.IsNaN(q) {
 		return math.NaN()
 	}

@ -195,6 +197,99 @@ func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 {
 	return bucket.Lower + (bucket.Upper-bucket.Lower)*(rank/bucket.Count)
 }

+// histogramFraction calculates the fraction of observations between the
+// provided lower and upper bounds, based on the provided histogram.
+//
+// histogramFraction is in a certain way the inverse of histogramQuantile.  If
+// histogramQuantile(0.9, h) returns 123.4, then histogramFraction(-Inf, 123.4, h)
+// returns 0.9.
+//
+// The same notes (and TODOs) with regard to interpolation and assumptions about
+// the zero bucket boundaries apply as for histogramQuantile.
+//
+// Whether either boundary is inclusive or exclusive doesn’t actually matter as
+// long as interpolation has to be performed anyway. In the case of a boundary
+// coinciding with a bucket boundary, the inclusive or exclusive nature of the
+// boundary determines the exact behavior of the threshold. With the current
+// implementation, that means that lower is exclusive for positive values and
+// inclusive for negative values, while upper is inclusive for positive values
+// and exclusive for negative values.
+//
+// Special cases:
+//
+// If the histogram has 0 observations, NaN is returned.
+//
+// Use a lower bound of -Inf to get the fraction of all observations below the
+// upper bound.
+//
+// Use an upper bound of +Inf to get the fraction of all observations above the
+// lower bound.
+//
+// If lower or upper is NaN, NaN is returned.
+//
+// If lower >= upper and the histogram has at least 1 observation, zero is returned.
+func histogramFraction(lower, upper float64, h *histogram.FloatHistogram) float64 {
+	if h.Count == 0 || math.IsNaN(lower) || math.IsNaN(upper) {
+		return math.NaN()
+	}
+	if lower >= upper {
+		return 0
+	}
+
+	var (
+		rank, lowerRank, upperRank float64
+		lowerSet, upperSet         bool
+		it                         = h.AllBucketIterator()
+	)
+	for it.Next() {
+		b := it.At()
+		if b.Lower < 0 && b.Upper > 0 {
+			if len(h.NegativeBuckets) == 0 && len(h.PositiveBuckets) > 0 {
+				// This is the zero bucket and the histogram has only
+				// positive buckets. So we consider 0 to be the lower
+				// bound.
+				b.Lower = 0
+			} else if len(h.PositiveBuckets) == 0 && len(h.NegativeBuckets) > 0 {
+				// This is in the zero bucket and the histogram has only
+				// negative buckets. So we consider 0 to be the upper
+				// bound.
+				b.Upper = 0
+			}
+		}
+		if !lowerSet && b.Lower >= lower {
+			lowerRank = rank
+			lowerSet = true
+		}
+		if !upperSet && b.Lower >= upper {
+			upperRank = rank
+			upperSet = true
+		}
+		if lowerSet && upperSet {
+			break
+		}
+		if !lowerSet && b.Lower < lower && b.Upper > lower {
+			lowerRank = rank + b.Count*(lower-b.Lower)/(b.Upper-b.Lower)
+			lowerSet = true
+		}
+		if !upperSet && b.Lower < upper && b.Upper > upper {
+			upperRank = rank + b.Count*(upper-b.Lower)/(b.Upper-b.Lower)
+			upperSet = true
+		}
+		if lowerSet && upperSet {
+			break
+		}
+		rank += b.Count
+	}
+	if !lowerSet || lowerRank > h.Count {
+		lowerRank = h.Count
+	}
+	if !upperSet || upperRank > h.Count {
+		upperRank = h.Count
+	}
+
+	return (upperRank - lowerRank) / h.Count
+}
+
 // coalesceBuckets merges buckets with the same upper bound.
 //
 // The input buckets must be sorted.
--- a/web/ui/module/codemirror-promql/src/complete/promql.terms.ts
+++ b/web/ui/module/codemirror-promql/src/complete/promql.terms.ts
@ -215,10 +215,16 @@ export const functionIdentifierTerms = [
    info: 'Round down values of input series to nearest integer',
    type: 'function',
  },
+  {
+    label: 'histogram_fraction',
+    detail: 'function',
+    info: 'Calculate fractions of observations within an interval from a native histogram',
+    type: 'function',
+  },
  {
    label: 'histogram_quantile',
    detail: 'function',
-    info: 'Calculate quantiles from histogram buckets',
+    info: 'Calculate quantiles from native histograms and from legacy histogram buckets',
    type: 'function',
  },
  {
--- a/web/ui/module/codemirror-promql/src/parser/parser.test.ts
+++ b/web/ui/module/codemirror-promql/src/parser/parser.test.ts
@ -714,6 +714,20 @@ describe('promql operations', () => {
      expectedValueType: ValueType.vector,
      expectedDiag: [],
    },
+    {
+      expr:
+        'histogram_fraction(                                      # Root of the query, final result, approximates a fraction of observations within an interval.\n' +
+        '  -Inf,                                                  # 1st argument to histogram_fraction(), start of the interval.\n' +
+        '  123.4,                                                 # 2nd argument to histogram_fraction(), end of the interval.\n' +
+        '  sum by(method, path) (                                 # 3rd argument to histogram_fraction(), an aggregated histogram.\n' +
+        '    rate(                                                # Argument to sum(), the per-second increase of a histogram over 5m.\n' +
+        '      demo_api_request_duration_seconds{job="demo"}[5m]  # Argument to rate(), a vector of sparse histogram series over the last 5m.\n' +
+        '    )\n' +
+        '  )\n' +
+        ')',
+      expectedValueType: ValueType.vector,
+      expectedDiag: [],
+    },
    {
      expr: '1 @ start()',
      expectedValueType: ValueType.scalar,
--- a/web/ui/module/codemirror-promql/src/types/function.ts
+++ b/web/ui/module/codemirror-promql/src/types/function.ts
@ -39,6 +39,7 @@ import {
  Deriv,
  Exp,
  Floor,
+  HistogramFraction,
  HistogramQuantile,
  HoltWinters,
  Hour,
@ -261,6 +262,12 @@ const promqlFunctions: { [key: number]: PromQLFunction } = {
    variadic: 0,
    returnType: ValueType.vector,
  },
+  [HistogramFraction]: {
+    name: 'histogram_fraction',
+    argTypes: [ValueType.scalar, ValueType.scalar, ValueType.vector],
+    variadic: 0,
+    returnType: ValueType.vector,
+  },
  [HistogramQuantile]: {
    name: 'histogram_quantile',
    argTypes: [ValueType.scalar, ValueType.vector],
--- a/web/ui/module/lezer-promql/src/promql.grammar
+++ b/web/ui/module/lezer-promql/src/promql.grammar
@ -146,6 +146,7 @@ FunctionIdentifier {
  Deriv |
  Exp |
  Floor |
+  HistogramFraction |
  HistogramQuantile |
  HoltWinters |
  Hour |
@ -387,6 +388,7 @@ NumberLiteral  {
  Deriv { condFn<"deriv"> }
  Exp { condFn<"exp"> }
  Floor { condFn<"floor"> }
+  HistogramFraction { condFn<"histogram_fraction"> }
  HistogramQuantile { condFn<"histogram_quantile"> }
  HoltWinters { condFn<"holt_winters"> }
  Hour { condFn<"hour"> }