Ignore stale histograms for counter reset detection

The histogram stats decoder keeps track of the last seen histogram sample
in order to properly detect counter resets. We are seeing an issue where
a histogram with UnknownResetHint gets treated as a counter reset when it follows
a stale histogram sample.

I believe that this is incorrect since stale samples should be completely ignored
in PromQL. As a result, they should not be stored in the histogram stats iterator
and the counter reset detection needs to be done against the last non-stale sample.

Signed-off-by: Filip Petkovski <filip.petkovsky@gmail.com>
This commit is contained in:
Filip Petkovski 2024-07-26 09:49:57 +02:00
parent 71c90c71d4
commit be7a4c9b83
No known key found for this signature in database
GPG key ID: 88DE577D81202846
3 changed files with 84 additions and 51 deletions

View file

@ -48,7 +48,6 @@ func (f *histogramStatsIterator) AtHistogram(h *histogram.Histogram) (int64, *hi
var t int64
t, f.currentH = f.Iterator.AtHistogram(f.currentH)
if value.IsStaleNaN(f.currentH.Sum) {
f.setLastH(f.currentH)
h = &histogram.Histogram{Sum: f.currentH.Sum}
return t, h
}
@ -77,7 +76,6 @@ func (f *histogramStatsIterator) AtFloatHistogram(fh *histogram.FloatHistogram)
var t int64
t, f.currentFH = f.Iterator.AtFloatHistogram(f.currentFH)
if value.IsStaleNaN(f.currentFH.Sum) {
f.setLastFH(f.currentFH)
return t, &histogram.FloatHistogram{Sum: f.currentFH.Sum}
}

View file

@ -14,62 +14,99 @@
package promql
import (
"fmt"
"math"
"testing"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/model/histogram"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/value"
"github.com/prometheus/prometheus/tsdb/chunkenc"
"github.com/prometheus/prometheus/tsdb/tsdbutil"
)
func TestHistogramStatsDecoding(t *testing.T) {
histograms := []*histogram.Histogram{
tsdbutil.GenerateTestHistogram(0),
tsdbutil.GenerateTestHistogram(1),
tsdbutil.GenerateTestHistogram(2),
tsdbutil.GenerateTestHistogram(2),
}
histograms[0].CounterResetHint = histogram.NotCounterReset
histograms[1].CounterResetHint = histogram.UnknownCounterReset
histograms[2].CounterResetHint = histogram.CounterReset
histograms[3].CounterResetHint = histogram.UnknownCounterReset
expectedHints := []histogram.CounterResetHint{
cases := []struct {
name string
histograms []*histogram.Histogram
expectedHints []histogram.CounterResetHint
}{
{
name: "unknown counter reset triggers detection",
histograms: []*histogram.Histogram{
tsdbutil.GenerateTestHistogramWithHint(0, histogram.NotCounterReset),
tsdbutil.GenerateTestHistogramWithHint(1, histogram.UnknownCounterReset),
tsdbutil.GenerateTestHistogramWithHint(2, histogram.CounterReset),
tsdbutil.GenerateTestHistogramWithHint(2, histogram.UnknownCounterReset),
},
expectedHints: []histogram.CounterResetHint{
histogram.NotCounterReset,
histogram.NotCounterReset,
histogram.CounterReset,
histogram.NotCounterReset,
},
},
{
name: "stale sample before unknown reset hint",
histograms: []*histogram.Histogram{
tsdbutil.GenerateTestHistogramWithHint(0, histogram.NotCounterReset),
tsdbutil.GenerateTestHistogramWithHint(1, histogram.UnknownCounterReset),
{Sum: math.Float64frombits(value.StaleNaN)},
tsdbutil.GenerateTestHistogramWithHint(1, histogram.UnknownCounterReset),
},
expectedHints: []histogram.CounterResetHint{
histogram.NotCounterReset,
histogram.NotCounterReset,
histogram.UnknownCounterReset,
histogram.NotCounterReset,
},
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
t.Run("histogram_stats", func(t *testing.T) {
decodedStats := make([]*histogram.Histogram, 0)
statsIterator := NewHistogramStatsIterator(newHistogramSeries(histograms).Iterator(nil))
statsIterator := NewHistogramStatsIterator(newHistogramSeries(tc.histograms).Iterator(nil))
for statsIterator.Next() != chunkenc.ValNone {
_, h := statsIterator.AtHistogram(nil)
decodedStats = append(decodedStats, h)
}
for i := 0; i < len(histograms); i++ {
require.Equal(t, expectedHints[i], decodedStats[i].CounterResetHint)
require.Equal(t, histograms[i].Count, decodedStats[i].Count)
require.Equal(t, histograms[i].Sum, decodedStats[i].Sum)
for i := 0; i < len(tc.histograms); i++ {
require.Equal(t, tc.expectedHints[i], decodedStats[i].CounterResetHint, fmt.Sprintf("mismatch in counter reset hint for histogram %d", i))
h := tc.histograms[i]
if value.IsStaleNaN(h.Sum) {
require.True(t, value.IsStaleNaN(decodedStats[i].Sum))
require.Equal(t, uint64(0), decodedStats[i].Count)
} else {
require.Equal(t, tc.histograms[i].Count, decodedStats[i].Count)
require.Equal(t, tc.histograms[i].Sum, decodedStats[i].Sum)
}
}
})
t.Run("float_histogram_stats", func(t *testing.T) {
decodedStats := make([]*histogram.FloatHistogram, 0)
statsIterator := NewHistogramStatsIterator(newHistogramSeries(histograms).Iterator(nil))
statsIterator := NewHistogramStatsIterator(newHistogramSeries(tc.histograms).Iterator(nil))
for statsIterator.Next() != chunkenc.ValNone {
_, h := statsIterator.AtFloatHistogram(nil)
decodedStats = append(decodedStats, h)
}
for i := 0; i < len(histograms); i++ {
fh := histograms[i].ToFloat(nil)
require.Equal(t, expectedHints[i], decodedStats[i].CounterResetHint)
for i := 0; i < len(tc.histograms); i++ {
require.Equal(t, tc.expectedHints[i], decodedStats[i].CounterResetHint)
fh := tc.histograms[i].ToFloat(nil)
if value.IsStaleNaN(fh.Sum) {
require.True(t, value.IsStaleNaN(decodedStats[i].Sum))
require.Equal(t, float64(0), decodedStats[i].Count)
} else {
require.Equal(t, fh.Count, decodedStats[i].Count)
require.Equal(t, fh.Sum, decodedStats[i].Sum)
}
}
})
})
}
}
type histogramSeries struct {

View file

@ -30,12 +30,10 @@ func GenerateTestHistograms(n int) (r []*histogram.Histogram) {
return r
}
func GenerateTestHistogramsWithUnknownResetHint(n int) []*histogram.Histogram {
hs := GenerateTestHistograms(n)
for i := range hs {
hs[i].CounterResetHint = histogram.UnknownCounterReset
}
return hs
func GenerateTestHistogramWithHint(n int, hint histogram.CounterResetHint) *histogram.Histogram {
h := GenerateTestHistogram(n)
h.CounterResetHint = hint
return h
}
// GenerateTestHistogram but it is up to the user to set any known counter reset hint.