Merge pull request #12977 from linasm/linasm/strict-validation-of-histogram-without-nans

ValidateHistogram: strict Count check in absence of NaNs
This commit is contained in:
Björn Rabenstein 2023-11-05 23:13:29 +01:00 committed by GitHub
commit 69c9c29061
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 359 additions and 292 deletions

View file

@ -17,6 +17,8 @@ import (
"fmt" "fmt"
"math" "math"
"strings" "strings"
"github.com/pkg/errors"
) )
// FloatHistogram is similar to Histogram but uses float64 for all // FloatHistogram is similar to Histogram but uses float64 for all
@ -593,6 +595,31 @@ func (h *FloatHistogram) AllReverseBucketIterator() BucketIterator[float64] {
} }
} }
// Validate validates consistency between span and bucket slices. Also, buckets are checked
// against negative values.
// We do not check for h.Count being at least as large as the sum of the
// counts in the buckets because floating point precision issues can
// create false positives here.
func (h *FloatHistogram) Validate() error {
if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
return errors.Wrap(err, "negative side")
}
if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
return errors.Wrap(err, "positive side")
}
var nCount, pCount float64
err := checkHistogramBuckets(h.NegativeBuckets, &nCount, false)
if err != nil {
return errors.Wrap(err, "negative side")
}
err = checkHistogramBuckets(h.PositiveBuckets, &pCount, false)
if err != nil {
return errors.Wrap(err, "positive side")
}
return nil
}
// zeroCountForLargerThreshold returns what the histogram's zero count would be // zeroCountForLargerThreshold returns what the histogram's zero count would be
// if the ZeroThreshold had the provided larger (or equal) value. If the // if the ZeroThreshold had the provided larger (or equal) value. If the
// provided value is less than the histogram's ZeroThreshold, the method panics. // provided value is less than the histogram's ZeroThreshold, the method panics.

View file

@ -17,6 +17,16 @@ import (
"fmt" "fmt"
"math" "math"
"strings" "strings"
"github.com/pkg/errors"
)
var (
ErrHistogramCountNotBigEnough = errors.New("histogram's observation count should be at least the number of observations found in the buckets")
ErrHistogramCountMismatch = errors.New("histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)")
ErrHistogramNegativeBucketCount = errors.New("histogram has a bucket whose observation count is negative")
ErrHistogramSpanNegativeOffset = errors.New("histogram has a span whose offset is negative")
ErrHistogramSpansBucketsMismatch = errors.New("histogram spans specify different number of buckets than provided")
) )
// BucketCount is a type constraint for the count in a bucket, which can be // BucketCount is a type constraint for the count in a bucket, which can be
@ -347,6 +357,52 @@ func compactBuckets[IBC InternalBucketCount](buckets []IBC, spans []Span, maxEmp
return buckets, spans return buckets, spans
} }
func checkHistogramSpans(spans []Span, numBuckets int) error {
var spanBuckets int
for n, span := range spans {
if n > 0 && span.Offset < 0 {
return errors.Wrap(
ErrHistogramSpanNegativeOffset,
fmt.Sprintf("span number %d with offset %d", n+1, span.Offset),
)
}
spanBuckets += int(span.Length)
}
if spanBuckets != numBuckets {
return errors.Wrap(
ErrHistogramSpansBucketsMismatch,
fmt.Sprintf("spans need %d buckets, have %d buckets", spanBuckets, numBuckets),
)
}
return nil
}
func checkHistogramBuckets[BC BucketCount, IBC InternalBucketCount](buckets []IBC, count *BC, deltas bool) error {
if len(buckets) == 0 {
return nil
}
var last IBC
for i := 0; i < len(buckets); i++ {
var c IBC
if deltas {
c = last + buckets[i]
} else {
c = buckets[i]
}
if c < 0 {
return errors.Wrap(
ErrHistogramNegativeBucketCount,
fmt.Sprintf("bucket number %d has observation count of %v", i+1, c),
)
}
last = c
*count += BC(c)
}
return nil
}
func getBound(idx, schema int32) float64 { func getBound(idx, schema int32) float64 {
// Here a bit of context about the behavior for the last bucket counting // Here a bit of context about the behavior for the last bucket counting
// regular numbers (called simply "last bucket" below) and the bucket // regular numbers (called simply "last bucket" below) and the bucket

View file

@ -18,6 +18,7 @@ import (
"math" "math"
"strings" "strings"
"github.com/pkg/errors"
"golang.org/x/exp/slices" "golang.org/x/exp/slices"
) )
@ -328,6 +329,50 @@ func (h *Histogram) ToFloat() *FloatHistogram {
} }
} }
// Validate validates consistency between span and bucket slices. Also, buckets are checked
// against negative values.
// For histograms that have not observed any NaN values (based on IsNaN(h.Sum) check), a
// strict h.Count = nCount + pCount + h.ZeroCount check is performed.
// Otherwise, only a lower bound check will be done (h.Count >= nCount + pCount + h.ZeroCount),
// because NaN observations do not increment the values of buckets (but they do increment
// the total h.Count).
func (h *Histogram) Validate() error {
if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
return errors.Wrap(err, "negative side")
}
if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
return errors.Wrap(err, "positive side")
}
var nCount, pCount uint64
err := checkHistogramBuckets(h.NegativeBuckets, &nCount, true)
if err != nil {
return errors.Wrap(err, "negative side")
}
err = checkHistogramBuckets(h.PositiveBuckets, &pCount, true)
if err != nil {
return errors.Wrap(err, "positive side")
}
sumOfBuckets := nCount + pCount + h.ZeroCount
if math.IsNaN(h.Sum) {
if sumOfBuckets > h.Count {
return errors.Wrap(
ErrHistogramCountNotBigEnough,
fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count),
)
}
} else {
if sumOfBuckets != h.Count {
return errors.Wrap(
ErrHistogramCountMismatch,
fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count),
)
}
}
return nil
}
type regularBucketIterator struct { type regularBucketIterator struct {
baseBucketIterator[uint64, int64] baseBucketIterator[uint64, int64]
} }

View file

@ -811,3 +811,159 @@ func TestHistogramCompact(t *testing.T) {
}) })
} }
} }
func TestHistogramValidation(t *testing.T) {
tests := map[string]struct {
h *Histogram
errMsg string
skipFloat bool
}{
"valid histogram": {
h: &Histogram{
Count: 12,
ZeroCount: 2,
ZeroThreshold: 0.001,
Sum: 19.4,
Schema: 1,
PositiveSpans: []Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
PositiveBuckets: []int64{1, 1, -1, 0},
NegativeSpans: []Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
NegativeBuckets: []int64{1, 1, -1, 0},
},
},
"valid histogram with NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": {
// This case is possible if NaN values (which do not fall into any bucket) are observed.
h: &Histogram{
ZeroCount: 2,
Count: 4,
Sum: math.NaN(),
PositiveSpans: []Span{{Offset: 0, Length: 1}},
PositiveBuckets: []int64{1},
},
},
"rejects histogram without NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": {
h: &Histogram{
ZeroCount: 2,
Count: 4,
Sum: 333,
PositiveSpans: []Span{{Offset: 0, Length: 1}},
PositiveBuckets: []int64{1},
},
errMsg: `3 observations found in buckets, but the Count field is 4: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
skipFloat: true,
},
"rejects histogram that has too few negative buckets": {
h: &Histogram{
NegativeSpans: []Span{{Offset: 0, Length: 1}},
NegativeBuckets: []int64{},
},
errMsg: `negative side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`,
},
"rejects histogram that has too few positive buckets": {
h: &Histogram{
PositiveSpans: []Span{{Offset: 0, Length: 1}},
PositiveBuckets: []int64{},
},
errMsg: `positive side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`,
},
"rejects histogram that has too many negative buckets": {
h: &Histogram{
NegativeSpans: []Span{{Offset: 0, Length: 1}},
NegativeBuckets: []int64{1, 2},
},
errMsg: `negative side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`,
},
"rejects histogram that has too many positive buckets": {
h: &Histogram{
PositiveSpans: []Span{{Offset: 0, Length: 1}},
PositiveBuckets: []int64{1, 2},
},
errMsg: `positive side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`,
},
"rejects a histogram that has a negative span with a negative offset": {
h: &Histogram{
NegativeSpans: []Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
NegativeBuckets: []int64{1, 2},
},
errMsg: `negative side: span number 2 with offset -1: histogram has a span whose offset is negative`,
},
"rejects a histogram which has a positive span with a negative offset": {
h: &Histogram{
PositiveSpans: []Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
PositiveBuckets: []int64{1, 2},
},
errMsg: `positive side: span number 2 with offset -1: histogram has a span whose offset is negative`,
},
"rejects a histogram that has a negative bucket with a negative count": {
h: &Histogram{
NegativeSpans: []Span{{Offset: -1, Length: 1}},
NegativeBuckets: []int64{-1},
},
errMsg: `negative side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`,
},
"rejects a histogram that has a positive bucket with a negative count": {
h: &Histogram{
PositiveSpans: []Span{{Offset: -1, Length: 1}},
PositiveBuckets: []int64{-1},
},
errMsg: `positive side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`,
},
"rejects a histogram that has a lower count than count in buckets": {
h: &Histogram{
Count: 0,
NegativeSpans: []Span{{Offset: -1, Length: 1}},
PositiveSpans: []Span{{Offset: -1, Length: 1}},
NegativeBuckets: []int64{1},
PositiveBuckets: []int64{1},
},
errMsg: `2 observations found in buckets, but the Count field is 0: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
skipFloat: true,
},
"rejects a histogram that doesn't count the zero bucket in its count": {
h: &Histogram{
Count: 2,
ZeroCount: 1,
NegativeSpans: []Span{{Offset: -1, Length: 1}},
PositiveSpans: []Span{{Offset: -1, Length: 1}},
NegativeBuckets: []int64{1},
PositiveBuckets: []int64{1},
},
errMsg: `3 observations found in buckets, but the Count field is 2: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
skipFloat: true,
},
}
for testName, tc := range tests {
t.Run(testName, func(t *testing.T) {
if err := tc.h.Validate(); tc.errMsg != "" {
require.EqualError(t, err, tc.errMsg)
} else {
require.NoError(t, err)
}
if tc.skipFloat {
return
}
fh := tc.h.ToFloat()
if err := fh.Validate(); tc.errMsg != "" {
require.EqualError(t, err, tc.errMsg)
} else {
require.NoError(t, err)
}
})
}
}
func BenchmarkHistogramValidation(b *testing.B) {
histograms := GenerateBigTestHistograms(b.N, 500)
b.ResetTimer()
for _, h := range histograms {
require.NoError(b, h.Validate())
}
}

View file

@ -0,0 +1,52 @@
// Copyright 2023 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package histogram
// GenerateBigTestHistograms generates a slice of histograms with given number of buckets each.
func GenerateBigTestHistograms(numHistograms, numBuckets int) []*Histogram {
numSpans := numBuckets / 10
bucketsPerSide := numBuckets / 2
spanLength := uint32(bucketsPerSide / numSpans)
// Given all bucket deltas are 1, sum bucketsPerSide + 1.
observationCount := bucketsPerSide * (1 + bucketsPerSide)
var histograms []*Histogram
for i := 0; i < numHistograms; i++ {
h := &Histogram{
Count: uint64(i + observationCount),
ZeroCount: uint64(i),
ZeroThreshold: 1e-128,
Sum: 18.4 * float64(i+1),
Schema: 2,
NegativeSpans: make([]Span, numSpans),
PositiveSpans: make([]Span, numSpans),
NegativeBuckets: make([]int64, bucketsPerSide),
PositiveBuckets: make([]int64, bucketsPerSide),
}
for j := 0; j < numSpans; j++ {
s := Span{Offset: 1, Length: spanLength}
h.NegativeSpans[j] = s
h.PositiveSpans[j] = s
}
for j := 0; j < bucketsPerSide; j++ {
h.NegativeBuckets[j] = 1
h.PositiveBuckets[j] = 1
}
histograms = append(histograms, h)
}
return histograms
}

View file

@ -3399,7 +3399,7 @@ func TestNativeHistogram_HistogramStdDevVar(t *testing.T) {
{ {
name: "-50, -8, 0, 3, 8, 9, 100, +Inf", name: "-50, -8, 0, 3, 8, 9, 100, +Inf",
h: &histogram.Histogram{ h: &histogram.Histogram{
Count: 8, Count: 7,
ZeroCount: 1, ZeroCount: 1,
Sum: math.Inf(1), Sum: math.Inf(1),
Schema: 3, Schema: 3,

View file

@ -37,16 +37,12 @@ var (
// ErrTooOldSample is when out of order support is enabled but the sample is outside the time window allowed. // ErrTooOldSample is when out of order support is enabled but the sample is outside the time window allowed.
ErrTooOldSample = errors.New("too old sample") ErrTooOldSample = errors.New("too old sample")
// ErrDuplicateSampleForTimestamp is when the sample has same timestamp but different value. // ErrDuplicateSampleForTimestamp is when the sample has same timestamp but different value.
ErrDuplicateSampleForTimestamp = errors.New("duplicate sample for timestamp") ErrDuplicateSampleForTimestamp = errors.New("duplicate sample for timestamp")
ErrOutOfOrderExemplar = errors.New("out of order exemplar") ErrOutOfOrderExemplar = errors.New("out of order exemplar")
ErrDuplicateExemplar = errors.New("duplicate exemplar") ErrDuplicateExemplar = errors.New("duplicate exemplar")
ErrExemplarLabelLength = fmt.Errorf("label length for exemplar exceeds maximum of %d UTF-8 characters", exemplar.ExemplarMaxLabelSetLength) ErrExemplarLabelLength = fmt.Errorf("label length for exemplar exceeds maximum of %d UTF-8 characters", exemplar.ExemplarMaxLabelSetLength)
ErrExemplarsDisabled = fmt.Errorf("exemplar storage is disabled or max exemplars is less than or equal to 0") ErrExemplarsDisabled = fmt.Errorf("exemplar storage is disabled or max exemplars is less than or equal to 0")
ErrNativeHistogramsDisabled = fmt.Errorf("native histograms are disabled") ErrNativeHistogramsDisabled = fmt.Errorf("native histograms are disabled")
ErrHistogramCountNotBigEnough = errors.New("histogram's observation count should be at least the number of observations found in the buckets")
ErrHistogramNegativeBucketCount = errors.New("histogram has a bucket whose observation count is negative")
ErrHistogramSpanNegativeOffset = errors.New("histogram has a span whose offset is negative")
ErrHistogramSpansBucketsMismatch = errors.New("histogram spans specify different number of buckets than provided")
) )
// SeriesRef is a generic series reference. In prometheus it is either a // SeriesRef is a generic series reference. In prometheus it is either a

View file

@ -883,13 +883,13 @@ func (a *appender) AppendExemplar(ref storage.SeriesRef, _ labels.Labels, e exem
func (a *appender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { func (a *appender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) {
if h != nil { if h != nil {
if err := tsdb.ValidateHistogram(h); err != nil { if err := h.Validate(); err != nil {
return 0, err return 0, err
} }
} }
if fh != nil { if fh != nil {
if err := tsdb.ValidateFloatHistogram(fh); err != nil { if err := fh.Validate(); err != nil {
return 0, err return 0, err
} }
} }

View file

@ -508,7 +508,7 @@ func TestAmendHistogramDatapointCausesError(t *testing.T) {
h := histogram.Histogram{ h := histogram.Histogram{
Schema: 3, Schema: 3,
Count: 61, Count: 52,
Sum: 2.7, Sum: 2.7,
ZeroThreshold: 0.1, ZeroThreshold: 0.1,
ZeroCount: 42, ZeroCount: 42,
@ -6314,6 +6314,7 @@ func testHistogramAppendAndQueryHelper(t *testing.T, floatHistogram bool) {
t.Run("buckets disappearing", func(t *testing.T) { t.Run("buckets disappearing", func(t *testing.T) {
h.PositiveSpans[1].Length-- h.PositiveSpans[1].Length--
h.PositiveBuckets = h.PositiveBuckets[:len(h.PositiveBuckets)-1] h.PositiveBuckets = h.PositiveBuckets[:len(h.PositiveBuckets)-1]
h.Count -= 3
appendHistogram(series1, 110, h, &exp1, histogram.CounterReset) appendHistogram(series1, 110, h, &exp1, histogram.CounterReset)
testQuery("foo", "bar1", map[string][]chunks.Sample{series1.String(): exp1}) testQuery("foo", "bar1", map[string][]chunks.Sample{series1.String(): exp1})
}) })
@ -6533,7 +6534,7 @@ func TestNativeHistogramFlag(t *testing.T) {
require.NoError(t, db.Close()) require.NoError(t, db.Close())
}) })
h := &histogram.Histogram{ h := &histogram.Histogram{
Count: 10, Count: 9,
ZeroCount: 4, ZeroCount: 4,
ZeroThreshold: 0.001, ZeroThreshold: 0.001,
Sum: 35.5, Sum: 35.5,

View file

@ -521,13 +521,13 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels
} }
if h != nil { if h != nil {
if err := ValidateHistogram(h); err != nil { if err := h.Validate(); err != nil {
return 0, err return 0, err
} }
} }
if fh != nil { if fh != nil {
if err := ValidateFloatHistogram(fh); err != nil { if err := fh.Validate(); err != nil {
return 0, err return 0, err
} }
} }
@ -642,103 +642,6 @@ func (a *headAppender) UpdateMetadata(ref storage.SeriesRef, lset labels.Labels,
return ref, nil return ref, nil
} }
func ValidateHistogram(h *histogram.Histogram) error {
if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
return errors.Wrap(err, "negative side")
}
if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
return errors.Wrap(err, "positive side")
}
var nCount, pCount uint64
err := checkHistogramBuckets(h.NegativeBuckets, &nCount, true)
if err != nil {
return errors.Wrap(err, "negative side")
}
err = checkHistogramBuckets(h.PositiveBuckets, &pCount, true)
if err != nil {
return errors.Wrap(err, "positive side")
}
if c := nCount + pCount + h.ZeroCount; c > h.Count {
return errors.Wrap(
storage.ErrHistogramCountNotBigEnough,
fmt.Sprintf("%d observations found in buckets, but the Count field is %d", c, h.Count),
)
}
return nil
}
func ValidateFloatHistogram(h *histogram.FloatHistogram) error {
if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
return errors.Wrap(err, "negative side")
}
if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
return errors.Wrap(err, "positive side")
}
var nCount, pCount float64
err := checkHistogramBuckets(h.NegativeBuckets, &nCount, false)
if err != nil {
return errors.Wrap(err, "negative side")
}
err = checkHistogramBuckets(h.PositiveBuckets, &pCount, false)
if err != nil {
return errors.Wrap(err, "positive side")
}
// We do not check for h.Count being at least as large as the sum of the
// counts in the buckets because floating point precision issues can
// create false positives here.
return nil
}
func checkHistogramSpans(spans []histogram.Span, numBuckets int) error {
var spanBuckets int
for n, span := range spans {
if n > 0 && span.Offset < 0 {
return errors.Wrap(
storage.ErrHistogramSpanNegativeOffset,
fmt.Sprintf("span number %d with offset %d", n+1, span.Offset),
)
}
spanBuckets += int(span.Length)
}
if spanBuckets != numBuckets {
return errors.Wrap(
storage.ErrHistogramSpansBucketsMismatch,
fmt.Sprintf("spans need %d buckets, have %d buckets", spanBuckets, numBuckets),
)
}
return nil
}
func checkHistogramBuckets[BC histogram.BucketCount, IBC histogram.InternalBucketCount](buckets []IBC, count *BC, deltas bool) error {
if len(buckets) == 0 {
return nil
}
var last IBC
for i := 0; i < len(buckets); i++ {
var c IBC
if deltas {
c = last + buckets[i]
} else {
c = buckets[i]
}
if c < 0 {
return errors.Wrap(
storage.ErrHistogramNegativeBucketCount,
fmt.Sprintf("bucket number %d has observation count of %v", i+1, c),
)
}
last = c
*count += BC(c)
}
return nil
}
var _ storage.GetRef = &headAppender{} var _ storage.GetRef = &headAppender{}
func (a *headAppender) GetRef(lset labels.Labels, hash uint64) (storage.SeriesRef, labels.Labels) { func (a *headAppender) GetRef(lset labels.Labels, hash uint64) (storage.SeriesRef, labels.Labels) {

View file

@ -3419,7 +3419,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) {
hists = tsdbutil.GenerateTestHistograms(numHistograms) hists = tsdbutil.GenerateTestHistograms(numHistograms)
} }
for _, h := range hists { for _, h := range hists {
h.Count *= 2
h.NegativeSpans = h.PositiveSpans h.NegativeSpans = h.PositiveSpans
h.NegativeBuckets = h.PositiveBuckets h.NegativeBuckets = h.PositiveBuckets
_, err := app.AppendHistogram(0, s1, ts, h, nil) _, err := app.AppendHistogram(0, s1, ts, h, nil)
@ -3442,7 +3441,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) {
hists = tsdbutil.GenerateTestFloatHistograms(numHistograms) hists = tsdbutil.GenerateTestFloatHistograms(numHistograms)
} }
for _, h := range hists { for _, h := range hists {
h.Count *= 2
h.NegativeSpans = h.PositiveSpans h.NegativeSpans = h.PositiveSpans
h.NegativeBuckets = h.PositiveBuckets h.NegativeBuckets = h.PositiveBuckets
_, err := app.AppendHistogram(0, s1, ts, nil, h) _, err := app.AppendHistogram(0, s1, ts, nil, h)
@ -3484,7 +3482,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) {
} }
for _, h := range hists { for _, h := range hists {
ts++ ts++
h.Count *= 2
h.NegativeSpans = h.PositiveSpans h.NegativeSpans = h.PositiveSpans
h.NegativeBuckets = h.PositiveBuckets h.NegativeBuckets = h.PositiveBuckets
_, err := app.AppendHistogram(0, s2, ts, h, nil) _, err := app.AppendHistogram(0, s2, ts, h, nil)
@ -3521,7 +3518,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) {
} }
for _, h := range hists { for _, h := range hists {
ts++ ts++
h.Count *= 2
h.NegativeSpans = h.PositiveSpans h.NegativeSpans = h.PositiveSpans
h.NegativeBuckets = h.PositiveBuckets h.NegativeBuckets = h.PositiveBuckets
_, err := app.AppendHistogram(0, s2, ts, nil, h) _, err := app.AppendHistogram(0, s2, ts, nil, h)
@ -4898,170 +4894,6 @@ func TestReplayAfterMmapReplayError(t *testing.T) {
require.NoError(t, h.Close()) require.NoError(t, h.Close())
} }
func TestHistogramValidation(t *testing.T) {
tests := map[string]struct {
h *histogram.Histogram
errMsg string
skipFloat bool
}{
"valid histogram": {
h: tsdbutil.GenerateTestHistograms(1)[0],
},
"valid histogram that has its Count (4) higher than the actual total of buckets (2 + 1)": {
// This case is possible if NaN values (which do not fall into any bucket) are observed.
h: &histogram.Histogram{
ZeroCount: 2,
Count: 4,
Sum: math.NaN(),
PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}},
PositiveBuckets: []int64{1},
},
},
"rejects histogram that has too few negative buckets": {
h: &histogram.Histogram{
NegativeSpans: []histogram.Span{{Offset: 0, Length: 1}},
NegativeBuckets: []int64{},
},
errMsg: `negative side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`,
},
"rejects histogram that has too few positive buckets": {
h: &histogram.Histogram{
PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}},
PositiveBuckets: []int64{},
},
errMsg: `positive side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`,
},
"rejects histogram that has too many negative buckets": {
h: &histogram.Histogram{
NegativeSpans: []histogram.Span{{Offset: 0, Length: 1}},
NegativeBuckets: []int64{1, 2},
},
errMsg: `negative side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`,
},
"rejects histogram that has too many positive buckets": {
h: &histogram.Histogram{
PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}},
PositiveBuckets: []int64{1, 2},
},
errMsg: `positive side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`,
},
"rejects a histogram that has a negative span with a negative offset": {
h: &histogram.Histogram{
NegativeSpans: []histogram.Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
NegativeBuckets: []int64{1, 2},
},
errMsg: `negative side: span number 2 with offset -1: histogram has a span whose offset is negative`,
},
"rejects a histogram which has a positive span with a negative offset": {
h: &histogram.Histogram{
PositiveSpans: []histogram.Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
PositiveBuckets: []int64{1, 2},
},
errMsg: `positive side: span number 2 with offset -1: histogram has a span whose offset is negative`,
},
"rejects a histogram that has a negative bucket with a negative count": {
h: &histogram.Histogram{
NegativeSpans: []histogram.Span{{Offset: -1, Length: 1}},
NegativeBuckets: []int64{-1},
},
errMsg: `negative side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`,
},
"rejects a histogram that has a positive bucket with a negative count": {
h: &histogram.Histogram{
PositiveSpans: []histogram.Span{{Offset: -1, Length: 1}},
PositiveBuckets: []int64{-1},
},
errMsg: `positive side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`,
},
"rejects a histogram that has a lower count than count in buckets": {
h: &histogram.Histogram{
Count: 0,
NegativeSpans: []histogram.Span{{Offset: -1, Length: 1}},
PositiveSpans: []histogram.Span{{Offset: -1, Length: 1}},
NegativeBuckets: []int64{1},
PositiveBuckets: []int64{1},
},
errMsg: `2 observations found in buckets, but the Count field is 0: histogram's observation count should be at least the number of observations found in the buckets`,
skipFloat: true,
},
"rejects a histogram that doesn't count the zero bucket in its count": {
h: &histogram.Histogram{
Count: 2,
ZeroCount: 1,
NegativeSpans: []histogram.Span{{Offset: -1, Length: 1}},
PositiveSpans: []histogram.Span{{Offset: -1, Length: 1}},
NegativeBuckets: []int64{1},
PositiveBuckets: []int64{1},
},
errMsg: `3 observations found in buckets, but the Count field is 2: histogram's observation count should be at least the number of observations found in the buckets`,
skipFloat: true,
},
}
for testName, tc := range tests {
t.Run(testName, func(t *testing.T) {
if err := ValidateHistogram(tc.h); tc.errMsg != "" {
require.EqualError(t, err, tc.errMsg)
} else {
require.NoError(t, err)
}
if tc.skipFloat {
return
}
if err := ValidateFloatHistogram(tc.h.ToFloat()); tc.errMsg != "" {
require.EqualError(t, err, tc.errMsg)
} else {
require.NoError(t, err)
}
})
}
}
func BenchmarkHistogramValidation(b *testing.B) {
histograms := generateBigTestHistograms(b.N, 500)
b.ResetTimer()
for _, h := range histograms {
require.NoError(b, ValidateHistogram(h))
}
}
func generateBigTestHistograms(numHistograms, numBuckets int) []*histogram.Histogram {
numSpans := numBuckets / 10
bucketsPerSide := numBuckets / 2
spanLength := uint32(bucketsPerSide / numSpans)
// Given all bucket deltas are 1, sum numHistograms + 1.
observationCount := numBuckets / 2 * (1 + numBuckets)
var histograms []*histogram.Histogram
for i := 0; i < numHistograms; i++ {
h := &histogram.Histogram{
Count: uint64(i + observationCount),
ZeroCount: uint64(i),
ZeroThreshold: 1e-128,
Sum: 18.4 * float64(i+1),
Schema: 2,
NegativeSpans: make([]histogram.Span, numSpans),
PositiveSpans: make([]histogram.Span, numSpans),
NegativeBuckets: make([]int64, bucketsPerSide),
PositiveBuckets: make([]int64, bucketsPerSide),
}
for j := 0; j < numSpans; j++ {
s := histogram.Span{Offset: 1, Length: spanLength}
h.NegativeSpans[j] = s
h.PositiveSpans[j] = s
}
for j := 0; j < bucketsPerSide; j++ {
h.NegativeBuckets[j] = 1
h.PositiveBuckets[j] = 1
}
histograms = append(histograms, h)
}
return histograms
}
func TestOOOAppendWithNoSeries(t *testing.T) { func TestOOOAppendWithNoSeries(t *testing.T) {
dir := t.TempDir() dir := t.TempDir()
wal, err := wlog.NewSize(nil, nil, filepath.Join(dir, "wal"), 32768, wlog.CompressionSnappy) wal, err := wlog.NewSize(nil, nil, filepath.Join(dir, "wal"), 32768, wlog.CompressionSnappy)
@ -5402,7 +5234,7 @@ func BenchmarkCuttingHeadHistogramChunks(b *testing.B) {
numSamples = 50000 numSamples = 50000
numBuckets = 100 numBuckets = 100
) )
samples := generateBigTestHistograms(numSamples, numBuckets) samples := histogram.GenerateBigTestHistograms(numSamples, numBuckets)
h, _ := newTestHead(b, DefaultBlockDuration, wlog.CompressionNone, false) h, _ := newTestHead(b, DefaultBlockDuration, wlog.CompressionNone, false)
defer func() { defer func() {
@ -5466,7 +5298,7 @@ func TestCuttingNewHeadChunks(t *testing.T) {
"small histograms": { "small histograms": {
numTotalSamples: 240, numTotalSamples: 240,
histValFunc: func() func(i int) *histogram.Histogram { histValFunc: func() func(i int) *histogram.Histogram {
hists := generateBigTestHistograms(240, 10) hists := histogram.GenerateBigTestHistograms(240, 10)
return func(i int) *histogram.Histogram { return func(i int) *histogram.Histogram {
return hists[i] return hists[i]
} }
@ -5482,7 +5314,7 @@ func TestCuttingNewHeadChunks(t *testing.T) {
"large histograms": { "large histograms": {
numTotalSamples: 240, numTotalSamples: 240,
histValFunc: func() func(i int) *histogram.Histogram { histValFunc: func() func(i int) *histogram.Histogram {
hists := generateBigTestHistograms(240, 100) hists := histogram.GenerateBigTestHistograms(240, 100)
return func(i int) *histogram.Histogram { return func(i int) *histogram.Histogram {
return hists[i] return hists[i]
} }
@ -5491,14 +5323,13 @@ func TestCuttingNewHeadChunks(t *testing.T) {
numSamples int numSamples int
numBytes int numBytes int
}{ }{
{30, 696}, {40, 896},
{30, 700}, {40, 899},
{30, 708}, {40, 896},
{30, 693}, {30, 690},
{30, 691}, {30, 691},
{30, 692},
{30, 695},
{30, 694}, {30, 694},
{30, 693},
}, },
}, },
"really large histograms": { "really large histograms": {
@ -5506,7 +5337,7 @@ func TestCuttingNewHeadChunks(t *testing.T) {
// per chunk. // per chunk.
numTotalSamples: 11, numTotalSamples: 11,
histValFunc: func() func(i int) *histogram.Histogram { histValFunc: func() func(i int) *histogram.Histogram {
hists := generateBigTestHistograms(11, 100000) hists := histogram.GenerateBigTestHistograms(11, 100000)
return func(i int) *histogram.Histogram { return func(i int) *histogram.Histogram {
return hists[i] return hists[i]
} }