promql: fix rate calculation with a counter reset after the 1st histogram

If a rate (or increase) is calculated on native histograms, and there
is a counter reset between the 1st and 2nd histogram, we never have to
touch the 1st histogram, so it doesn't even matter if it has an
incompatible bucket layout. So we should not error out in that case.

This simply nulls out the 1st histogram in that case.

Signed-off-by: beorn7 <beorn@grafana.com>
This commit is contained in:
beorn7 2025-01-29 14:47:44 +01:00
parent a8235d5dfd
commit 2581c7d057
2 changed files with 77 additions and 29 deletions

View file

@ -187,35 +187,48 @@ func extrapolatedRate(vals []parser.Value, args parser.Expressions, enh *EvalNod
// not a histogram, and a warning wrapped in an annotation in that case.
// Otherwise, it returns the calculated histogram and an empty annotation.
func histogramRate(points []HPoint, isCounter bool, metricName string, pos posrange.PositionRange) (*histogram.FloatHistogram, annotations.Annotations) {
prev := points[0].H
usingCustomBuckets := prev.UsesCustomBuckets()
last := points[len(points)-1].H
var (
prev = points[0].H
usingCustomBuckets = prev.UsesCustomBuckets()
last = points[len(points)-1].H
annos annotations.Annotations
)
if last == nil {
return nil, annotations.New().Add(annotations.NewMixedFloatsHistogramsWarning(metricName, pos))
return nil, annos.Add(annotations.NewMixedFloatsHistogramsWarning(metricName, pos))
}
minSchema := prev.Schema
if last.Schema < minSchema {
minSchema = last.Schema
// We check for gauge type histograms in the loop below, but the loop
// below does not run on the first and last point, so check the first
// and last point now.
if isCounter && (prev.CounterResetHint == histogram.GaugeType || last.CounterResetHint == histogram.GaugeType) {
annos.Add(annotations.NewNativeHistogramNotCounterWarning(metricName, pos))
}
// Null out the 1st sample if there is a counter reset between the 1st
// and 2nd. In this case, we want to ignore any incompatibility in the
// bucket layout of the 1st sample because we do not need to look at it.
if isCounter && len(points) > 1 {
second := points[1].H
if second != nil && second.DetectReset(prev) {
prev = &histogram.FloatHistogram{}
prev.Schema = second.Schema
prev.CustomValues = second.CustomValues
usingCustomBuckets = second.UsesCustomBuckets()
}
}
if last.UsesCustomBuckets() != usingCustomBuckets {
return nil, annotations.New().Add(annotations.NewMixedExponentialCustomHistogramsWarning(metricName, pos))
}
var annos annotations.Annotations
// We check for gauge type histograms in the loop below, but the loop below does not run on the first and last point,
// so check the first and last point now.
if isCounter && (prev.CounterResetHint == histogram.GaugeType || last.CounterResetHint == histogram.GaugeType) {
annos.Add(annotations.NewNativeHistogramNotCounterWarning(metricName, pos))
return nil, annos.Add(annotations.NewMixedExponentialCustomHistogramsWarning(metricName, pos))
}
// First iteration to find out two things:
// - What's the smallest relevant schema?
// - Are all data points histograms?
// TODO(beorn7): Find a way to check that earlier, e.g. by handing in a
// []FloatPoint and a []HistogramPoint separately.
minSchema := prev.Schema
if last.Schema < minSchema {
minSchema = last.Schema
}
for _, currPoint := range points[1 : len(points)-1] {
curr := currPoint.H
if curr == nil {

View file

@ -1013,7 +1013,7 @@ eval instant at 5m sum(custom_buckets_histogram)
clear
# Test 'this native histogram metric is not a gauge' warning for rate
# Test 'this native histogram metric is not a counter' warning for rate
load 30s
some_metric {{schema:0 sum:1 count:1 buckets:[1] counter_reset_hint:gauge}} {{schema:0 sum:2 count:2 buckets:[2] counter_reset_hint:gauge}} {{schema:0 sum:3 count:3 buckets:[3] counter_reset_hint:gauge}}
@ -1022,7 +1022,7 @@ eval_warn instant at 30s rate(some_metric[1m])
{} {{count:0.03333333333333333 sum:0.03333333333333333 buckets:[0.03333333333333333]}}
# Test the case where we have more than two points for rate
eval_warn instant at 1m rate(some_metric[1m])
eval_warn instant at 1m rate(some_metric[1m30s])
{} {{count:0.03333333333333333 sum:0.03333333333333333 buckets:[0.03333333333333333]}}
clear
@ -1032,20 +1032,20 @@ load 30s
some_metric {{schema:0 sum:1 count:1 buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}}
# Start and end with exponential, with custom in the middle.
eval_warn instant at 1m rate(some_metric[1m])
eval_warn instant at 1m rate(some_metric[1m30s])
# Should produce no results.
# Start and end with custom, with exponential in the middle.
eval_warn instant at 1m30s rate(some_metric[1m])
eval_warn instant at 1m30s rate(some_metric[1m30s])
# Should produce no results.
# Start with custom, end with exponential.
eval_warn instant at 1m rate(some_metric[1m])
# Should produce no results.
# Start with custom, end with exponential. Return the exponential histogram divided by 30.
eval instant at 1m rate(some_metric[1m])
{} {{schema:0 sum:0.16666666666666666 count:0.13333333333333333 buckets:[0.03333333333333333 0.06666666666666667 0.03333333333333333]}}
# Start with exponential, end with custom.
eval_warn instant at 30s rate(some_metric[1m])
# Should produce no results.
# Start with exponential, end with custom. Return the custom buckets histogram divided by 30.
eval instant at 30s rate(some_metric[1m])
{} {{schema:-53 sum:0.03333333333333333 count:0.03333333333333333 custom_values:[5 10] buckets:[0.03333333333333333]}}
clear
@ -1179,7 +1179,10 @@ eval_info range from 0 to 6m step 6m metric2 > metric2
clear
load 6m
nhcb_metric {{schema:-53 sum:1 count:1 custom_values:[2] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}}
nhcb_metric {{schema:-53 sum:1 count:1 custom_values:[2] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[2] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}}
# If evaluating at 12m, the first two NHCBs have the same custom values
# while the 3rd one has different ones.
eval_warn instant at 12m sum_over_time(nhcb_metric[13m])
@ -1206,6 +1209,38 @@ eval_warn instant at 12m rate(nhcb_metric[13m])
eval instant at 12m resets(nhcb_metric[13m])
{} 1
# Now doing the same again, but at 18m, where the first NHCB has
# different custom_values compared to the other two. This now
# works with no warning for increase() and rate(). No change
# otherwise.
eval_warn instant at 18m sum_over_time(nhcb_metric[13m])
eval_warn instant at 18m avg_over_time(nhcb_metric[13m])
eval instant at 18m last_over_time(nhcb_metric[13m])
nhcb_metric{} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}}
eval instant at 18m count_over_time(nhcb_metric[13m])
{} 3
eval instant at 18m present_over_time(nhcb_metric[13m])
{} 1
eval instant at 18m changes(nhcb_metric[13m])
{} 1
eval_warn instant at 18m delta(nhcb_metric[13m])
eval instant at 18m increase(nhcb_metric[13m])
{} {{schema:-53 count:1.0833333333333333 sum:1.0833333333333333 custom_values:[5 10] buckets:[1.0833333333333333]}}
eval instant at 18m rate(nhcb_metric[13m])
{} {{schema:-53 count:0.0013888888888888887 sum:0.0013888888888888887 custom_values:[5 10] buckets:[0.0013888888888888887]}}
eval instant at 18m resets(nhcb_metric[13m])
{} 1
clear
load 1m