Ignore stale histograms for counter reset detection

The histogram stats decoder keeps track of the last seen histogram sample
in order to properly detect counter resets. We are seeing an issue where
a histogram with UnknownResetHint gets treated as a counter reset when it follows
a stale histogram sample.

I believe that this is incorrect since stale samples should be completely ignored
in PromQL. As a result, they should not be stored in the histogram stats iterator
and the counter reset detection needs to be done against the last non-stale sample.

Signed-off-by: Filip Petkovski <filip.petkovsky@gmail.com>
This commit is contained in:
Filip Petkovski 2024-07-26 09:49:57 +02:00
parent 71c90c71d4
commit be7a4c9b83
No known key found for this signature in database
GPG key ID: 88DE577D81202846
3 changed files with 84 additions and 51 deletions

View file

@ -48,7 +48,6 @@ func (f *histogramStatsIterator) AtHistogram(h *histogram.Histogram) (int64, *hi
var t int64 var t int64
t, f.currentH = f.Iterator.AtHistogram(f.currentH) t, f.currentH = f.Iterator.AtHistogram(f.currentH)
if value.IsStaleNaN(f.currentH.Sum) { if value.IsStaleNaN(f.currentH.Sum) {
f.setLastH(f.currentH)
h = &histogram.Histogram{Sum: f.currentH.Sum} h = &histogram.Histogram{Sum: f.currentH.Sum}
return t, h return t, h
} }
@ -77,7 +76,6 @@ func (f *histogramStatsIterator) AtFloatHistogram(fh *histogram.FloatHistogram)
var t int64 var t int64
t, f.currentFH = f.Iterator.AtFloatHistogram(f.currentFH) t, f.currentFH = f.Iterator.AtFloatHistogram(f.currentFH)
if value.IsStaleNaN(f.currentFH.Sum) { if value.IsStaleNaN(f.currentFH.Sum) {
f.setLastFH(f.currentFH)
return t, &histogram.FloatHistogram{Sum: f.currentFH.Sum} return t, &histogram.FloatHistogram{Sum: f.currentFH.Sum}
} }

View file

@ -14,62 +14,99 @@
package promql package promql
import ( import (
"fmt"
"math"
"testing" "testing"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/histogram"
"github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/value"
"github.com/prometheus/prometheus/tsdb/chunkenc" "github.com/prometheus/prometheus/tsdb/chunkenc"
"github.com/prometheus/prometheus/tsdb/tsdbutil" "github.com/prometheus/prometheus/tsdb/tsdbutil"
) )
func TestHistogramStatsDecoding(t *testing.T) { func TestHistogramStatsDecoding(t *testing.T) {
histograms := []*histogram.Histogram{ cases := []struct {
tsdbutil.GenerateTestHistogram(0), name string
tsdbutil.GenerateTestHistogram(1), histograms []*histogram.Histogram
tsdbutil.GenerateTestHistogram(2), expectedHints []histogram.CounterResetHint
tsdbutil.GenerateTestHistogram(2), }{
} {
histograms[0].CounterResetHint = histogram.NotCounterReset name: "unknown counter reset triggers detection",
histograms[1].CounterResetHint = histogram.UnknownCounterReset histograms: []*histogram.Histogram{
histograms[2].CounterResetHint = histogram.CounterReset tsdbutil.GenerateTestHistogramWithHint(0, histogram.NotCounterReset),
histograms[3].CounterResetHint = histogram.UnknownCounterReset tsdbutil.GenerateTestHistogramWithHint(1, histogram.UnknownCounterReset),
tsdbutil.GenerateTestHistogramWithHint(2, histogram.CounterReset),
expectedHints := []histogram.CounterResetHint{ tsdbutil.GenerateTestHistogramWithHint(2, histogram.UnknownCounterReset),
histogram.NotCounterReset, },
histogram.NotCounterReset, expectedHints: []histogram.CounterResetHint{
histogram.CounterReset, histogram.NotCounterReset,
histogram.NotCounterReset, histogram.NotCounterReset,
histogram.CounterReset,
histogram.NotCounterReset,
},
},
{
name: "stale sample before unknown reset hint",
histograms: []*histogram.Histogram{
tsdbutil.GenerateTestHistogramWithHint(0, histogram.NotCounterReset),
tsdbutil.GenerateTestHistogramWithHint(1, histogram.UnknownCounterReset),
{Sum: math.Float64frombits(value.StaleNaN)},
tsdbutil.GenerateTestHistogramWithHint(1, histogram.UnknownCounterReset),
},
expectedHints: []histogram.CounterResetHint{
histogram.NotCounterReset,
histogram.NotCounterReset,
histogram.UnknownCounterReset,
histogram.NotCounterReset,
},
},
} }
t.Run("histogram_stats", func(t *testing.T) { for _, tc := range cases {
decodedStats := make([]*histogram.Histogram, 0) t.Run(tc.name, func(t *testing.T) {
statsIterator := NewHistogramStatsIterator(newHistogramSeries(histograms).Iterator(nil)) t.Run("histogram_stats", func(t *testing.T) {
for statsIterator.Next() != chunkenc.ValNone { decodedStats := make([]*histogram.Histogram, 0)
_, h := statsIterator.AtHistogram(nil) statsIterator := NewHistogramStatsIterator(newHistogramSeries(tc.histograms).Iterator(nil))
decodedStats = append(decodedStats, h) for statsIterator.Next() != chunkenc.ValNone {
} _, h := statsIterator.AtHistogram(nil)
for i := 0; i < len(histograms); i++ { decodedStats = append(decodedStats, h)
require.Equal(t, expectedHints[i], decodedStats[i].CounterResetHint) }
require.Equal(t, histograms[i].Count, decodedStats[i].Count) for i := 0; i < len(tc.histograms); i++ {
require.Equal(t, histograms[i].Sum, decodedStats[i].Sum) require.Equal(t, tc.expectedHints[i], decodedStats[i].CounterResetHint, fmt.Sprintf("mismatch in counter reset hint for histogram %d", i))
} h := tc.histograms[i]
}) if value.IsStaleNaN(h.Sum) {
t.Run("float_histogram_stats", func(t *testing.T) { require.True(t, value.IsStaleNaN(decodedStats[i].Sum))
decodedStats := make([]*histogram.FloatHistogram, 0) require.Equal(t, uint64(0), decodedStats[i].Count)
statsIterator := NewHistogramStatsIterator(newHistogramSeries(histograms).Iterator(nil)) } else {
for statsIterator.Next() != chunkenc.ValNone { require.Equal(t, tc.histograms[i].Count, decodedStats[i].Count)
_, h := statsIterator.AtFloatHistogram(nil) require.Equal(t, tc.histograms[i].Sum, decodedStats[i].Sum)
decodedStats = append(decodedStats, h) }
} }
for i := 0; i < len(histograms); i++ { })
fh := histograms[i].ToFloat(nil) t.Run("float_histogram_stats", func(t *testing.T) {
require.Equal(t, expectedHints[i], decodedStats[i].CounterResetHint) decodedStats := make([]*histogram.FloatHistogram, 0)
require.Equal(t, fh.Count, decodedStats[i].Count) statsIterator := NewHistogramStatsIterator(newHistogramSeries(tc.histograms).Iterator(nil))
require.Equal(t, fh.Sum, decodedStats[i].Sum) for statsIterator.Next() != chunkenc.ValNone {
} _, h := statsIterator.AtFloatHistogram(nil)
}) decodedStats = append(decodedStats, h)
}
for i := 0; i < len(tc.histograms); i++ {
require.Equal(t, tc.expectedHints[i], decodedStats[i].CounterResetHint)
fh := tc.histograms[i].ToFloat(nil)
if value.IsStaleNaN(fh.Sum) {
require.True(t, value.IsStaleNaN(decodedStats[i].Sum))
require.Equal(t, float64(0), decodedStats[i].Count)
} else {
require.Equal(t, fh.Count, decodedStats[i].Count)
require.Equal(t, fh.Sum, decodedStats[i].Sum)
}
}
})
})
}
} }
type histogramSeries struct { type histogramSeries struct {

View file

@ -30,12 +30,10 @@ func GenerateTestHistograms(n int) (r []*histogram.Histogram) {
return r return r
} }
func GenerateTestHistogramsWithUnknownResetHint(n int) []*histogram.Histogram { func GenerateTestHistogramWithHint(n int, hint histogram.CounterResetHint) *histogram.Histogram {
hs := GenerateTestHistograms(n) h := GenerateTestHistogram(n)
for i := range hs { h.CounterResetHint = hint
hs[i].CounterResetHint = histogram.UnknownCounterReset return h
}
return hs
} }
// GenerateTestHistogram but it is up to the user to set any known counter reset hint. // GenerateTestHistogram but it is up to the user to set any known counter reset hint.