ValidateHistogram: strict Count check in absence of NaNs

Signed-off-by: Linas Medziunas <linas.medziunas@gmail.com>
This commit is contained in:
Linas Medziunas 2023-10-13 10:58:26 +03:00
parent fae4561369
commit 1cd6c1cde5
5 changed files with 41 additions and 23 deletions

View file

@ -3399,7 +3399,7 @@ func TestNativeHistogram_HistogramStdDevVar(t *testing.T) {
{
name: "-50, -8, 0, 3, 8, 9, 100, +Inf",
h: &histogram.Histogram{
Count: 8,
Count: 7,
ZeroCount: 1,
Sum: math.Inf(1),
Schema: 3,

View file

@ -44,6 +44,7 @@ var (
ErrExemplarsDisabled = fmt.Errorf("exemplar storage is disabled or max exemplars is less than or equal to 0")
ErrNativeHistogramsDisabled = fmt.Errorf("native histograms are disabled")
ErrHistogramCountNotBigEnough = errors.New("histogram's observation count should be at least the number of observations found in the buckets")
ErrHistogramCountMismatch = errors.New("histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)")
ErrHistogramNegativeBucketCount = errors.New("histogram has a bucket whose observation count is negative")
ErrHistogramSpanNegativeOffset = errors.New("histogram has a span whose offset is negative")
ErrHistogramSpansBucketsMismatch = errors.New("histogram spans specify different number of buckets than provided")

View file

@ -508,7 +508,7 @@ func TestAmendHistogramDatapointCausesError(t *testing.T) {
h := histogram.Histogram{
Schema: 3,
Count: 61,
Count: 52,
Sum: 2.7,
ZeroThreshold: 0.1,
ZeroCount: 42,
@ -6314,6 +6314,7 @@ func testHistogramAppendAndQueryHelper(t *testing.T, floatHistogram bool) {
t.Run("buckets disappearing", func(t *testing.T) {
h.PositiveSpans[1].Length--
h.PositiveBuckets = h.PositiveBuckets[:len(h.PositiveBuckets)-1]
h.Count -= 3
appendHistogram(series1, 110, h, &exp1, histogram.CounterReset)
testQuery("foo", "bar1", map[string][]chunks.Sample{series1.String(): exp1})
})
@ -6533,7 +6534,7 @@ func TestNativeHistogramFlag(t *testing.T) {
require.NoError(t, db.Close())
})
h := &histogram.Histogram{
Count: 10,
Count: 9,
ZeroCount: 4,
ZeroThreshold: 0.001,
Sum: 35.5,

View file

@ -659,11 +659,21 @@ func ValidateHistogram(h *histogram.Histogram) error {
return errors.Wrap(err, "positive side")
}
if c := nCount + pCount + h.ZeroCount; c > h.Count {
return errors.Wrap(
storage.ErrHistogramCountNotBigEnough,
fmt.Sprintf("%d observations found in buckets, but the Count field is %d", c, h.Count),
)
sumOfBuckets := nCount + pCount + h.ZeroCount
if math.IsNaN(h.Sum) {
if sumOfBuckets > h.Count {
return errors.Wrap(
storage.ErrHistogramCountNotBigEnough,
fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count),
)
}
} else {
if sumOfBuckets != h.Count {
return errors.Wrap(
storage.ErrHistogramCountMismatch,
fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count),
)
}
}
return nil

View file

@ -3419,7 +3419,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) {
hists = tsdbutil.GenerateTestHistograms(numHistograms)
}
for _, h := range hists {
h.Count *= 2
h.NegativeSpans = h.PositiveSpans
h.NegativeBuckets = h.PositiveBuckets
_, err := app.AppendHistogram(0, s1, ts, h, nil)
@ -3442,7 +3441,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) {
hists = tsdbutil.GenerateTestFloatHistograms(numHistograms)
}
for _, h := range hists {
h.Count *= 2
h.NegativeSpans = h.PositiveSpans
h.NegativeBuckets = h.PositiveBuckets
_, err := app.AppendHistogram(0, s1, ts, nil, h)
@ -3484,7 +3482,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) {
}
for _, h := range hists {
ts++
h.Count *= 2
h.NegativeSpans = h.PositiveSpans
h.NegativeBuckets = h.PositiveBuckets
_, err := app.AppendHistogram(0, s2, ts, h, nil)
@ -3521,7 +3518,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) {
}
for _, h := range hists {
ts++
h.Count *= 2
h.NegativeSpans = h.PositiveSpans
h.NegativeBuckets = h.PositiveBuckets
_, err := app.AppendHistogram(0, s2, ts, nil, h)
@ -4907,7 +4903,7 @@ func TestHistogramValidation(t *testing.T) {
"valid histogram": {
h: tsdbutil.GenerateTestHistograms(1)[0],
},
"valid histogram that has its Count (4) higher than the actual total of buckets (2 + 1)": {
"valid histogram with NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": {
// This case is possible if NaN values (which do not fall into any bucket) are observed.
h: &histogram.Histogram{
ZeroCount: 2,
@ -4917,6 +4913,17 @@ func TestHistogramValidation(t *testing.T) {
PositiveBuckets: []int64{1},
},
},
"rejects histogram without NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": {
h: &histogram.Histogram{
ZeroCount: 2,
Count: 4,
Sum: 333,
PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}},
PositiveBuckets: []int64{1},
},
errMsg: `3 observations found in buckets, but the Count field is 4: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
skipFloat: true,
},
"rejects histogram that has too few negative buckets": {
h: &histogram.Histogram{
NegativeSpans: []histogram.Span{{Offset: 0, Length: 1}},
@ -4981,7 +4988,7 @@ func TestHistogramValidation(t *testing.T) {
NegativeBuckets: []int64{1},
PositiveBuckets: []int64{1},
},
errMsg: `2 observations found in buckets, but the Count field is 0: histogram's observation count should be at least the number of observations found in the buckets`,
errMsg: `2 observations found in buckets, but the Count field is 0: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
skipFloat: true,
},
"rejects a histogram that doesn't count the zero bucket in its count": {
@ -4993,7 +5000,7 @@ func TestHistogramValidation(t *testing.T) {
NegativeBuckets: []int64{1},
PositiveBuckets: []int64{1},
},
errMsg: `3 observations found in buckets, but the Count field is 2: histogram's observation count should be at least the number of observations found in the buckets`,
errMsg: `3 observations found in buckets, but the Count field is 2: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
skipFloat: true,
},
}
@ -5029,8 +5036,8 @@ func generateBigTestHistograms(numHistograms, numBuckets int) []*histogram.Histo
numSpans := numBuckets / 10
bucketsPerSide := numBuckets / 2
spanLength := uint32(bucketsPerSide / numSpans)
// Given all bucket deltas are 1, sum numHistograms + 1.
observationCount := numBuckets / 2 * (1 + numBuckets)
// Given all bucket deltas are 1, sum bucketsPerSide + 1.
observationCount := bucketsPerSide * (1 + bucketsPerSide)
var histograms []*histogram.Histogram
for i := 0; i < numHistograms; i++ {
@ -5491,14 +5498,13 @@ func TestCuttingNewHeadChunks(t *testing.T) {
numSamples int
numBytes int
}{
{30, 696},
{30, 700},
{30, 708},
{30, 693},
{40, 896},
{40, 899},
{40, 896},
{30, 690},
{30, 691},
{30, 692},
{30, 695},
{30, 694},
{30, 693},
},
},
"really large histograms": {