Ignore stale histograms for counter reset detection

The histogram stats decoder keeps track of the last seen histogram sample
in order to properly detect counter resets. We are seeing an issue where
a histogram with UnknownResetHint gets treated as a counter reset when it follows
a stale histogram sample.

I believe that this is incorrect since stale samples should be completely ignored
in PromQL. As a result, they should not be stored in the histogram stats iterator
and the counter reset detection needs to be done against the last non-stale sample.

Signed-off-by: Filip Petkovski <filip.petkovsky@gmail.com>
This commit is contained in:
Filip Petkovski 2024-07-26 09:49:57 +02:00 committed by György Krajcsovits
parent 6d7ed08850
commit 6100e756a8
3 changed files with 84 additions and 51 deletions

View file

@ -48,7 +48,6 @@ func (f *histogramStatsIterator) AtHistogram(h *histogram.Histogram) (int64, *hi
var t int64 var t int64
t, f.currentH = f.Iterator.AtHistogram(f.currentH) t, f.currentH = f.Iterator.AtHistogram(f.currentH)
if value.IsStaleNaN(f.currentH.Sum) { if value.IsStaleNaN(f.currentH.Sum) {
f.setLastH(f.currentH)
h = &histogram.Histogram{Sum: f.currentH.Sum} h = &histogram.Histogram{Sum: f.currentH.Sum}
return t, h return t, h
} }
@ -77,7 +76,6 @@ func (f *histogramStatsIterator) AtFloatHistogram(fh *histogram.FloatHistogram)
var t int64 var t int64
t, f.currentFH = f.Iterator.AtFloatHistogram(f.currentFH) t, f.currentFH = f.Iterator.AtFloatHistogram(f.currentFH)
if value.IsStaleNaN(f.currentFH.Sum) { if value.IsStaleNaN(f.currentFH.Sum) {
f.setLastFH(f.currentFH)
return t, &histogram.FloatHistogram{Sum: f.currentFH.Sum} return t, &histogram.FloatHistogram{Sum: f.currentFH.Sum}
} }

View file

@ -14,62 +14,99 @@
package promql package promql
import ( import (
"fmt"
"math"
"testing" "testing"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/histogram"
"github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/value"
"github.com/prometheus/prometheus/tsdb/chunkenc" "github.com/prometheus/prometheus/tsdb/chunkenc"
"github.com/prometheus/prometheus/tsdb/tsdbutil" "github.com/prometheus/prometheus/tsdb/tsdbutil"
) )
func TestHistogramStatsDecoding(t *testing.T) { func TestHistogramStatsDecoding(t *testing.T) {
histograms := []*histogram.Histogram{ cases := []struct {
tsdbutil.GenerateTestHistogram(0), name string
tsdbutil.GenerateTestHistogram(1), histograms []*histogram.Histogram
tsdbutil.GenerateTestHistogram(2), expectedHints []histogram.CounterResetHint
tsdbutil.GenerateTestHistogram(2), }{
} {
histograms[0].CounterResetHint = histogram.NotCounterReset name: "unknown counter reset triggers detection",
histograms[1].CounterResetHint = histogram.UnknownCounterReset histograms: []*histogram.Histogram{
histograms[2].CounterResetHint = histogram.CounterReset tsdbutil.GenerateTestHistogramWithHint(0, histogram.NotCounterReset),
histograms[3].CounterResetHint = histogram.UnknownCounterReset tsdbutil.GenerateTestHistogramWithHint(1, histogram.UnknownCounterReset),
tsdbutil.GenerateTestHistogramWithHint(2, histogram.CounterReset),
expectedHints := []histogram.CounterResetHint{ tsdbutil.GenerateTestHistogramWithHint(2, histogram.UnknownCounterReset),
histogram.NotCounterReset, },
histogram.NotCounterReset, expectedHints: []histogram.CounterResetHint{
histogram.CounterReset, histogram.NotCounterReset,
histogram.NotCounterReset, histogram.NotCounterReset,
histogram.CounterReset,
histogram.NotCounterReset,
},
},
{
name: "stale sample before unknown reset hint",
histograms: []*histogram.Histogram{
tsdbutil.GenerateTestHistogramWithHint(0, histogram.NotCounterReset),
tsdbutil.GenerateTestHistogramWithHint(1, histogram.UnknownCounterReset),
{Sum: math.Float64frombits(value.StaleNaN)},
tsdbutil.GenerateTestHistogramWithHint(1, histogram.UnknownCounterReset),
},
expectedHints: []histogram.CounterResetHint{
histogram.NotCounterReset,
histogram.NotCounterReset,
histogram.UnknownCounterReset,
histogram.NotCounterReset,
},
},
} }
t.Run("histogram_stats", func(t *testing.T) { for _, tc := range cases {
decodedStats := make([]*histogram.Histogram, 0) t.Run(tc.name, func(t *testing.T) {
statsIterator := NewHistogramStatsIterator(newHistogramSeries(histograms).Iterator(nil)) t.Run("histogram_stats", func(t *testing.T) {
for statsIterator.Next() != chunkenc.ValNone { decodedStats := make([]*histogram.Histogram, 0)
_, h := statsIterator.AtHistogram(nil) statsIterator := NewHistogramStatsIterator(newHistogramSeries(tc.histograms).Iterator(nil))
decodedStats = append(decodedStats, h) for statsIterator.Next() != chunkenc.ValNone {
} _, h := statsIterator.AtHistogram(nil)
for i := 0; i < len(histograms); i++ { decodedStats = append(decodedStats, h)
require.Equal(t, expectedHints[i], decodedStats[i].CounterResetHint) }
require.Equal(t, histograms[i].Count, decodedStats[i].Count) for i := 0; i < len(tc.histograms); i++ {
require.Equal(t, histograms[i].Sum, decodedStats[i].Sum) require.Equal(t, tc.expectedHints[i], decodedStats[i].CounterResetHint, fmt.Sprintf("mismatch in counter reset hint for histogram %d", i))
} h := tc.histograms[i]
}) if value.IsStaleNaN(h.Sum) {
t.Run("float_histogram_stats", func(t *testing.T) { require.True(t, value.IsStaleNaN(decodedStats[i].Sum))
decodedStats := make([]*histogram.FloatHistogram, 0) require.Equal(t, uint64(0), decodedStats[i].Count)
statsIterator := NewHistogramStatsIterator(newHistogramSeries(histograms).Iterator(nil)) } else {
for statsIterator.Next() != chunkenc.ValNone { require.Equal(t, tc.histograms[i].Count, decodedStats[i].Count)
_, h := statsIterator.AtFloatHistogram(nil) require.Equal(t, tc.histograms[i].Sum, decodedStats[i].Sum)
decodedStats = append(decodedStats, h) }
} }
for i := 0; i < len(histograms); i++ { })
fh := histograms[i].ToFloat(nil) t.Run("float_histogram_stats", func(t *testing.T) {
require.Equal(t, expectedHints[i], decodedStats[i].CounterResetHint) decodedStats := make([]*histogram.FloatHistogram, 0)
require.Equal(t, fh.Count, decodedStats[i].Count) statsIterator := NewHistogramStatsIterator(newHistogramSeries(tc.histograms).Iterator(nil))
require.Equal(t, fh.Sum, decodedStats[i].Sum) for statsIterator.Next() != chunkenc.ValNone {
} _, h := statsIterator.AtFloatHistogram(nil)
}) decodedStats = append(decodedStats, h)
}
for i := 0; i < len(tc.histograms); i++ {
require.Equal(t, tc.expectedHints[i], decodedStats[i].CounterResetHint)
fh := tc.histograms[i].ToFloat(nil)
if value.IsStaleNaN(fh.Sum) {
require.True(t, value.IsStaleNaN(decodedStats[i].Sum))
require.Equal(t, float64(0), decodedStats[i].Count)
} else {
require.Equal(t, fh.Count, decodedStats[i].Count)
require.Equal(t, fh.Sum, decodedStats[i].Sum)
}
}
})
})
}
} }
type histogramSeries struct { type histogramSeries struct {

View file

@ -30,12 +30,10 @@ func GenerateTestHistograms(n int) (r []*histogram.Histogram) {
return r return r
} }
func GenerateTestHistogramsWithUnknownResetHint(n int) []*histogram.Histogram { func GenerateTestHistogramWithHint(n int, hint histogram.CounterResetHint) *histogram.Histogram {
hs := GenerateTestHistograms(n) h := GenerateTestHistogram(n)
for i := range hs { h.CounterResetHint = hint
hs[i].CounterResetHint = histogram.UnknownCounterReset return h
}
return hs
} }
// GenerateTestHistogram but it is up to the user to set any known counter reset hint. // GenerateTestHistogram but it is up to the user to set any known counter reset hint.