mirror of
https://github.com/prometheus/prometheus.git
synced 2025-01-11 13:57:36 -08:00
Merge pull request #12977 from linasm/linasm/strict-validation-of-histogram-without-nans
ValidateHistogram: strict Count check in absence of NaNs
This commit is contained in:
commit
69c9c29061
|
@ -17,6 +17,8 @@ import (
|
|||
"fmt"
|
||||
"math"
|
||||
"strings"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// FloatHistogram is similar to Histogram but uses float64 for all
|
||||
|
@ -593,6 +595,31 @@ func (h *FloatHistogram) AllReverseBucketIterator() BucketIterator[float64] {
|
|||
}
|
||||
}
|
||||
|
||||
// Validate validates consistency between span and bucket slices. Also, buckets are checked
|
||||
// against negative values.
|
||||
// We do not check for h.Count being at least as large as the sum of the
|
||||
// counts in the buckets because floating point precision issues can
|
||||
// create false positives here.
|
||||
func (h *FloatHistogram) Validate() error {
|
||||
if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
|
||||
return errors.Wrap(err, "negative side")
|
||||
}
|
||||
if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
|
||||
return errors.Wrap(err, "positive side")
|
||||
}
|
||||
var nCount, pCount float64
|
||||
err := checkHistogramBuckets(h.NegativeBuckets, &nCount, false)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "negative side")
|
||||
}
|
||||
err = checkHistogramBuckets(h.PositiveBuckets, &pCount, false)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "positive side")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// zeroCountForLargerThreshold returns what the histogram's zero count would be
|
||||
// if the ZeroThreshold had the provided larger (or equal) value. If the
|
||||
// provided value is less than the histogram's ZeroThreshold, the method panics.
|
||||
|
|
|
@ -17,6 +17,16 @@ import (
|
|||
"fmt"
|
||||
"math"
|
||||
"strings"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrHistogramCountNotBigEnough = errors.New("histogram's observation count should be at least the number of observations found in the buckets")
|
||||
ErrHistogramCountMismatch = errors.New("histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)")
|
||||
ErrHistogramNegativeBucketCount = errors.New("histogram has a bucket whose observation count is negative")
|
||||
ErrHistogramSpanNegativeOffset = errors.New("histogram has a span whose offset is negative")
|
||||
ErrHistogramSpansBucketsMismatch = errors.New("histogram spans specify different number of buckets than provided")
|
||||
)
|
||||
|
||||
// BucketCount is a type constraint for the count in a bucket, which can be
|
||||
|
@ -347,6 +357,52 @@ func compactBuckets[IBC InternalBucketCount](buckets []IBC, spans []Span, maxEmp
|
|||
return buckets, spans
|
||||
}
|
||||
|
||||
func checkHistogramSpans(spans []Span, numBuckets int) error {
|
||||
var spanBuckets int
|
||||
for n, span := range spans {
|
||||
if n > 0 && span.Offset < 0 {
|
||||
return errors.Wrap(
|
||||
ErrHistogramSpanNegativeOffset,
|
||||
fmt.Sprintf("span number %d with offset %d", n+1, span.Offset),
|
||||
)
|
||||
}
|
||||
spanBuckets += int(span.Length)
|
||||
}
|
||||
if spanBuckets != numBuckets {
|
||||
return errors.Wrap(
|
||||
ErrHistogramSpansBucketsMismatch,
|
||||
fmt.Sprintf("spans need %d buckets, have %d buckets", spanBuckets, numBuckets),
|
||||
)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func checkHistogramBuckets[BC BucketCount, IBC InternalBucketCount](buckets []IBC, count *BC, deltas bool) error {
|
||||
if len(buckets) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var last IBC
|
||||
for i := 0; i < len(buckets); i++ {
|
||||
var c IBC
|
||||
if deltas {
|
||||
c = last + buckets[i]
|
||||
} else {
|
||||
c = buckets[i]
|
||||
}
|
||||
if c < 0 {
|
||||
return errors.Wrap(
|
||||
ErrHistogramNegativeBucketCount,
|
||||
fmt.Sprintf("bucket number %d has observation count of %v", i+1, c),
|
||||
)
|
||||
}
|
||||
last = c
|
||||
*count += BC(c)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getBound(idx, schema int32) float64 {
|
||||
// Here a bit of context about the behavior for the last bucket counting
|
||||
// regular numbers (called simply "last bucket" below) and the bucket
|
||||
|
|
|
@ -18,6 +18,7 @@ import (
|
|||
"math"
|
||||
"strings"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/exp/slices"
|
||||
)
|
||||
|
||||
|
@ -328,6 +329,50 @@ func (h *Histogram) ToFloat() *FloatHistogram {
|
|||
}
|
||||
}
|
||||
|
||||
// Validate validates consistency between span and bucket slices. Also, buckets are checked
|
||||
// against negative values.
|
||||
// For histograms that have not observed any NaN values (based on IsNaN(h.Sum) check), a
|
||||
// strict h.Count = nCount + pCount + h.ZeroCount check is performed.
|
||||
// Otherwise, only a lower bound check will be done (h.Count >= nCount + pCount + h.ZeroCount),
|
||||
// because NaN observations do not increment the values of buckets (but they do increment
|
||||
// the total h.Count).
|
||||
func (h *Histogram) Validate() error {
|
||||
if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
|
||||
return errors.Wrap(err, "negative side")
|
||||
}
|
||||
if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
|
||||
return errors.Wrap(err, "positive side")
|
||||
}
|
||||
var nCount, pCount uint64
|
||||
err := checkHistogramBuckets(h.NegativeBuckets, &nCount, true)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "negative side")
|
||||
}
|
||||
err = checkHistogramBuckets(h.PositiveBuckets, &pCount, true)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "positive side")
|
||||
}
|
||||
|
||||
sumOfBuckets := nCount + pCount + h.ZeroCount
|
||||
if math.IsNaN(h.Sum) {
|
||||
if sumOfBuckets > h.Count {
|
||||
return errors.Wrap(
|
||||
ErrHistogramCountNotBigEnough,
|
||||
fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count),
|
||||
)
|
||||
}
|
||||
} else {
|
||||
if sumOfBuckets != h.Count {
|
||||
return errors.Wrap(
|
||||
ErrHistogramCountMismatch,
|
||||
fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type regularBucketIterator struct {
|
||||
baseBucketIterator[uint64, int64]
|
||||
}
|
||||
|
|
|
@ -811,3 +811,159 @@ func TestHistogramCompact(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestHistogramValidation(t *testing.T) {
|
||||
tests := map[string]struct {
|
||||
h *Histogram
|
||||
errMsg string
|
||||
skipFloat bool
|
||||
}{
|
||||
"valid histogram": {
|
||||
h: &Histogram{
|
||||
Count: 12,
|
||||
ZeroCount: 2,
|
||||
ZeroThreshold: 0.001,
|
||||
Sum: 19.4,
|
||||
Schema: 1,
|
||||
PositiveSpans: []Span{
|
||||
{Offset: 0, Length: 2},
|
||||
{Offset: 1, Length: 2},
|
||||
},
|
||||
PositiveBuckets: []int64{1, 1, -1, 0},
|
||||
NegativeSpans: []Span{
|
||||
{Offset: 0, Length: 2},
|
||||
{Offset: 1, Length: 2},
|
||||
},
|
||||
NegativeBuckets: []int64{1, 1, -1, 0},
|
||||
},
|
||||
},
|
||||
"valid histogram with NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": {
|
||||
// This case is possible if NaN values (which do not fall into any bucket) are observed.
|
||||
h: &Histogram{
|
||||
ZeroCount: 2,
|
||||
Count: 4,
|
||||
Sum: math.NaN(),
|
||||
PositiveSpans: []Span{{Offset: 0, Length: 1}},
|
||||
PositiveBuckets: []int64{1},
|
||||
},
|
||||
},
|
||||
"rejects histogram without NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": {
|
||||
h: &Histogram{
|
||||
ZeroCount: 2,
|
||||
Count: 4,
|
||||
Sum: 333,
|
||||
PositiveSpans: []Span{{Offset: 0, Length: 1}},
|
||||
PositiveBuckets: []int64{1},
|
||||
},
|
||||
errMsg: `3 observations found in buckets, but the Count field is 4: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
|
||||
skipFloat: true,
|
||||
},
|
||||
"rejects histogram that has too few negative buckets": {
|
||||
h: &Histogram{
|
||||
NegativeSpans: []Span{{Offset: 0, Length: 1}},
|
||||
NegativeBuckets: []int64{},
|
||||
},
|
||||
errMsg: `negative side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`,
|
||||
},
|
||||
"rejects histogram that has too few positive buckets": {
|
||||
h: &Histogram{
|
||||
PositiveSpans: []Span{{Offset: 0, Length: 1}},
|
||||
PositiveBuckets: []int64{},
|
||||
},
|
||||
errMsg: `positive side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`,
|
||||
},
|
||||
"rejects histogram that has too many negative buckets": {
|
||||
h: &Histogram{
|
||||
NegativeSpans: []Span{{Offset: 0, Length: 1}},
|
||||
NegativeBuckets: []int64{1, 2},
|
||||
},
|
||||
errMsg: `negative side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`,
|
||||
},
|
||||
"rejects histogram that has too many positive buckets": {
|
||||
h: &Histogram{
|
||||
PositiveSpans: []Span{{Offset: 0, Length: 1}},
|
||||
PositiveBuckets: []int64{1, 2},
|
||||
},
|
||||
errMsg: `positive side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`,
|
||||
},
|
||||
"rejects a histogram that has a negative span with a negative offset": {
|
||||
h: &Histogram{
|
||||
NegativeSpans: []Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
|
||||
NegativeBuckets: []int64{1, 2},
|
||||
},
|
||||
errMsg: `negative side: span number 2 with offset -1: histogram has a span whose offset is negative`,
|
||||
},
|
||||
"rejects a histogram which has a positive span with a negative offset": {
|
||||
h: &Histogram{
|
||||
PositiveSpans: []Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
|
||||
PositiveBuckets: []int64{1, 2},
|
||||
},
|
||||
errMsg: `positive side: span number 2 with offset -1: histogram has a span whose offset is negative`,
|
||||
},
|
||||
"rejects a histogram that has a negative bucket with a negative count": {
|
||||
h: &Histogram{
|
||||
NegativeSpans: []Span{{Offset: -1, Length: 1}},
|
||||
NegativeBuckets: []int64{-1},
|
||||
},
|
||||
errMsg: `negative side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`,
|
||||
},
|
||||
"rejects a histogram that has a positive bucket with a negative count": {
|
||||
h: &Histogram{
|
||||
PositiveSpans: []Span{{Offset: -1, Length: 1}},
|
||||
PositiveBuckets: []int64{-1},
|
||||
},
|
||||
errMsg: `positive side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`,
|
||||
},
|
||||
"rejects a histogram that has a lower count than count in buckets": {
|
||||
h: &Histogram{
|
||||
Count: 0,
|
||||
NegativeSpans: []Span{{Offset: -1, Length: 1}},
|
||||
PositiveSpans: []Span{{Offset: -1, Length: 1}},
|
||||
NegativeBuckets: []int64{1},
|
||||
PositiveBuckets: []int64{1},
|
||||
},
|
||||
errMsg: `2 observations found in buckets, but the Count field is 0: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
|
||||
skipFloat: true,
|
||||
},
|
||||
"rejects a histogram that doesn't count the zero bucket in its count": {
|
||||
h: &Histogram{
|
||||
Count: 2,
|
||||
ZeroCount: 1,
|
||||
NegativeSpans: []Span{{Offset: -1, Length: 1}},
|
||||
PositiveSpans: []Span{{Offset: -1, Length: 1}},
|
||||
NegativeBuckets: []int64{1},
|
||||
PositiveBuckets: []int64{1},
|
||||
},
|
||||
errMsg: `3 observations found in buckets, but the Count field is 2: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
|
||||
skipFloat: true,
|
||||
},
|
||||
}
|
||||
|
||||
for testName, tc := range tests {
|
||||
t.Run(testName, func(t *testing.T) {
|
||||
if err := tc.h.Validate(); tc.errMsg != "" {
|
||||
require.EqualError(t, err, tc.errMsg)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
if tc.skipFloat {
|
||||
return
|
||||
}
|
||||
|
||||
fh := tc.h.ToFloat()
|
||||
if err := fh.Validate(); tc.errMsg != "" {
|
||||
require.EqualError(t, err, tc.errMsg)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkHistogramValidation(b *testing.B) {
|
||||
histograms := GenerateBigTestHistograms(b.N, 500)
|
||||
b.ResetTimer()
|
||||
for _, h := range histograms {
|
||||
require.NoError(b, h.Validate())
|
||||
}
|
||||
}
|
||||
|
|
52
model/histogram/test_utils.go
Normal file
52
model/histogram/test_utils.go
Normal file
|
@ -0,0 +1,52 @@
|
|||
// Copyright 2023 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package histogram
|
||||
|
||||
// GenerateBigTestHistograms generates a slice of histograms with given number of buckets each.
|
||||
func GenerateBigTestHistograms(numHistograms, numBuckets int) []*Histogram {
|
||||
numSpans := numBuckets / 10
|
||||
bucketsPerSide := numBuckets / 2
|
||||
spanLength := uint32(bucketsPerSide / numSpans)
|
||||
// Given all bucket deltas are 1, sum bucketsPerSide + 1.
|
||||
observationCount := bucketsPerSide * (1 + bucketsPerSide)
|
||||
|
||||
var histograms []*Histogram
|
||||
for i := 0; i < numHistograms; i++ {
|
||||
h := &Histogram{
|
||||
Count: uint64(i + observationCount),
|
||||
ZeroCount: uint64(i),
|
||||
ZeroThreshold: 1e-128,
|
||||
Sum: 18.4 * float64(i+1),
|
||||
Schema: 2,
|
||||
NegativeSpans: make([]Span, numSpans),
|
||||
PositiveSpans: make([]Span, numSpans),
|
||||
NegativeBuckets: make([]int64, bucketsPerSide),
|
||||
PositiveBuckets: make([]int64, bucketsPerSide),
|
||||
}
|
||||
|
||||
for j := 0; j < numSpans; j++ {
|
||||
s := Span{Offset: 1, Length: spanLength}
|
||||
h.NegativeSpans[j] = s
|
||||
h.PositiveSpans[j] = s
|
||||
}
|
||||
|
||||
for j := 0; j < bucketsPerSide; j++ {
|
||||
h.NegativeBuckets[j] = 1
|
||||
h.PositiveBuckets[j] = 1
|
||||
}
|
||||
|
||||
histograms = append(histograms, h)
|
||||
}
|
||||
return histograms
|
||||
}
|
|
@ -3399,7 +3399,7 @@ func TestNativeHistogram_HistogramStdDevVar(t *testing.T) {
|
|||
{
|
||||
name: "-50, -8, 0, 3, 8, 9, 100, +Inf",
|
||||
h: &histogram.Histogram{
|
||||
Count: 8,
|
||||
Count: 7,
|
||||
ZeroCount: 1,
|
||||
Sum: math.Inf(1),
|
||||
Schema: 3,
|
||||
|
|
|
@ -37,16 +37,12 @@ var (
|
|||
// ErrTooOldSample is when out of order support is enabled but the sample is outside the time window allowed.
|
||||
ErrTooOldSample = errors.New("too old sample")
|
||||
// ErrDuplicateSampleForTimestamp is when the sample has same timestamp but different value.
|
||||
ErrDuplicateSampleForTimestamp = errors.New("duplicate sample for timestamp")
|
||||
ErrOutOfOrderExemplar = errors.New("out of order exemplar")
|
||||
ErrDuplicateExemplar = errors.New("duplicate exemplar")
|
||||
ErrExemplarLabelLength = fmt.Errorf("label length for exemplar exceeds maximum of %d UTF-8 characters", exemplar.ExemplarMaxLabelSetLength)
|
||||
ErrExemplarsDisabled = fmt.Errorf("exemplar storage is disabled or max exemplars is less than or equal to 0")
|
||||
ErrNativeHistogramsDisabled = fmt.Errorf("native histograms are disabled")
|
||||
ErrHistogramCountNotBigEnough = errors.New("histogram's observation count should be at least the number of observations found in the buckets")
|
||||
ErrHistogramNegativeBucketCount = errors.New("histogram has a bucket whose observation count is negative")
|
||||
ErrHistogramSpanNegativeOffset = errors.New("histogram has a span whose offset is negative")
|
||||
ErrHistogramSpansBucketsMismatch = errors.New("histogram spans specify different number of buckets than provided")
|
||||
ErrDuplicateSampleForTimestamp = errors.New("duplicate sample for timestamp")
|
||||
ErrOutOfOrderExemplar = errors.New("out of order exemplar")
|
||||
ErrDuplicateExemplar = errors.New("duplicate exemplar")
|
||||
ErrExemplarLabelLength = fmt.Errorf("label length for exemplar exceeds maximum of %d UTF-8 characters", exemplar.ExemplarMaxLabelSetLength)
|
||||
ErrExemplarsDisabled = fmt.Errorf("exemplar storage is disabled or max exemplars is less than or equal to 0")
|
||||
ErrNativeHistogramsDisabled = fmt.Errorf("native histograms are disabled")
|
||||
)
|
||||
|
||||
// SeriesRef is a generic series reference. In prometheus it is either a
|
||||
|
|
|
@ -883,13 +883,13 @@ func (a *appender) AppendExemplar(ref storage.SeriesRef, _ labels.Labels, e exem
|
|||
|
||||
func (a *appender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) {
|
||||
if h != nil {
|
||||
if err := tsdb.ValidateHistogram(h); err != nil {
|
||||
if err := h.Validate(); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
if fh != nil {
|
||||
if err := tsdb.ValidateFloatHistogram(fh); err != nil {
|
||||
if err := fh.Validate(); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
|
|
@ -508,7 +508,7 @@ func TestAmendHistogramDatapointCausesError(t *testing.T) {
|
|||
|
||||
h := histogram.Histogram{
|
||||
Schema: 3,
|
||||
Count: 61,
|
||||
Count: 52,
|
||||
Sum: 2.7,
|
||||
ZeroThreshold: 0.1,
|
||||
ZeroCount: 42,
|
||||
|
@ -6314,6 +6314,7 @@ func testHistogramAppendAndQueryHelper(t *testing.T, floatHistogram bool) {
|
|||
t.Run("buckets disappearing", func(t *testing.T) {
|
||||
h.PositiveSpans[1].Length--
|
||||
h.PositiveBuckets = h.PositiveBuckets[:len(h.PositiveBuckets)-1]
|
||||
h.Count -= 3
|
||||
appendHistogram(series1, 110, h, &exp1, histogram.CounterReset)
|
||||
testQuery("foo", "bar1", map[string][]chunks.Sample{series1.String(): exp1})
|
||||
})
|
||||
|
@ -6533,7 +6534,7 @@ func TestNativeHistogramFlag(t *testing.T) {
|
|||
require.NoError(t, db.Close())
|
||||
})
|
||||
h := &histogram.Histogram{
|
||||
Count: 10,
|
||||
Count: 9,
|
||||
ZeroCount: 4,
|
||||
ZeroThreshold: 0.001,
|
||||
Sum: 35.5,
|
||||
|
|
|
@ -521,13 +521,13 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels
|
|||
}
|
||||
|
||||
if h != nil {
|
||||
if err := ValidateHistogram(h); err != nil {
|
||||
if err := h.Validate(); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
if fh != nil {
|
||||
if err := ValidateFloatHistogram(fh); err != nil {
|
||||
if err := fh.Validate(); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
@ -642,103 +642,6 @@ func (a *headAppender) UpdateMetadata(ref storage.SeriesRef, lset labels.Labels,
|
|||
return ref, nil
|
||||
}
|
||||
|
||||
func ValidateHistogram(h *histogram.Histogram) error {
|
||||
if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
|
||||
return errors.Wrap(err, "negative side")
|
||||
}
|
||||
if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
|
||||
return errors.Wrap(err, "positive side")
|
||||
}
|
||||
var nCount, pCount uint64
|
||||
err := checkHistogramBuckets(h.NegativeBuckets, &nCount, true)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "negative side")
|
||||
}
|
||||
err = checkHistogramBuckets(h.PositiveBuckets, &pCount, true)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "positive side")
|
||||
}
|
||||
|
||||
if c := nCount + pCount + h.ZeroCount; c > h.Count {
|
||||
return errors.Wrap(
|
||||
storage.ErrHistogramCountNotBigEnough,
|
||||
fmt.Sprintf("%d observations found in buckets, but the Count field is %d", c, h.Count),
|
||||
)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func ValidateFloatHistogram(h *histogram.FloatHistogram) error {
|
||||
if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
|
||||
return errors.Wrap(err, "negative side")
|
||||
}
|
||||
if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
|
||||
return errors.Wrap(err, "positive side")
|
||||
}
|
||||
var nCount, pCount float64
|
||||
err := checkHistogramBuckets(h.NegativeBuckets, &nCount, false)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "negative side")
|
||||
}
|
||||
err = checkHistogramBuckets(h.PositiveBuckets, &pCount, false)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "positive side")
|
||||
}
|
||||
|
||||
// We do not check for h.Count being at least as large as the sum of the
|
||||
// counts in the buckets because floating point precision issues can
|
||||
// create false positives here.
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func checkHistogramSpans(spans []histogram.Span, numBuckets int) error {
|
||||
var spanBuckets int
|
||||
for n, span := range spans {
|
||||
if n > 0 && span.Offset < 0 {
|
||||
return errors.Wrap(
|
||||
storage.ErrHistogramSpanNegativeOffset,
|
||||
fmt.Sprintf("span number %d with offset %d", n+1, span.Offset),
|
||||
)
|
||||
}
|
||||
spanBuckets += int(span.Length)
|
||||
}
|
||||
if spanBuckets != numBuckets {
|
||||
return errors.Wrap(
|
||||
storage.ErrHistogramSpansBucketsMismatch,
|
||||
fmt.Sprintf("spans need %d buckets, have %d buckets", spanBuckets, numBuckets),
|
||||
)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func checkHistogramBuckets[BC histogram.BucketCount, IBC histogram.InternalBucketCount](buckets []IBC, count *BC, deltas bool) error {
|
||||
if len(buckets) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var last IBC
|
||||
for i := 0; i < len(buckets); i++ {
|
||||
var c IBC
|
||||
if deltas {
|
||||
c = last + buckets[i]
|
||||
} else {
|
||||
c = buckets[i]
|
||||
}
|
||||
if c < 0 {
|
||||
return errors.Wrap(
|
||||
storage.ErrHistogramNegativeBucketCount,
|
||||
fmt.Sprintf("bucket number %d has observation count of %v", i+1, c),
|
||||
)
|
||||
}
|
||||
last = c
|
||||
*count += BC(c)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
var _ storage.GetRef = &headAppender{}
|
||||
|
||||
func (a *headAppender) GetRef(lset labels.Labels, hash uint64) (storage.SeriesRef, labels.Labels) {
|
||||
|
|
|
@ -3419,7 +3419,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) {
|
|||
hists = tsdbutil.GenerateTestHistograms(numHistograms)
|
||||
}
|
||||
for _, h := range hists {
|
||||
h.Count *= 2
|
||||
h.NegativeSpans = h.PositiveSpans
|
||||
h.NegativeBuckets = h.PositiveBuckets
|
||||
_, err := app.AppendHistogram(0, s1, ts, h, nil)
|
||||
|
@ -3442,7 +3441,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) {
|
|||
hists = tsdbutil.GenerateTestFloatHistograms(numHistograms)
|
||||
}
|
||||
for _, h := range hists {
|
||||
h.Count *= 2
|
||||
h.NegativeSpans = h.PositiveSpans
|
||||
h.NegativeBuckets = h.PositiveBuckets
|
||||
_, err := app.AppendHistogram(0, s1, ts, nil, h)
|
||||
|
@ -3484,7 +3482,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) {
|
|||
}
|
||||
for _, h := range hists {
|
||||
ts++
|
||||
h.Count *= 2
|
||||
h.NegativeSpans = h.PositiveSpans
|
||||
h.NegativeBuckets = h.PositiveBuckets
|
||||
_, err := app.AppendHistogram(0, s2, ts, h, nil)
|
||||
|
@ -3521,7 +3518,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) {
|
|||
}
|
||||
for _, h := range hists {
|
||||
ts++
|
||||
h.Count *= 2
|
||||
h.NegativeSpans = h.PositiveSpans
|
||||
h.NegativeBuckets = h.PositiveBuckets
|
||||
_, err := app.AppendHistogram(0, s2, ts, nil, h)
|
||||
|
@ -4898,170 +4894,6 @@ func TestReplayAfterMmapReplayError(t *testing.T) {
|
|||
require.NoError(t, h.Close())
|
||||
}
|
||||
|
||||
func TestHistogramValidation(t *testing.T) {
|
||||
tests := map[string]struct {
|
||||
h *histogram.Histogram
|
||||
errMsg string
|
||||
skipFloat bool
|
||||
}{
|
||||
"valid histogram": {
|
||||
h: tsdbutil.GenerateTestHistograms(1)[0],
|
||||
},
|
||||
"valid histogram that has its Count (4) higher than the actual total of buckets (2 + 1)": {
|
||||
// This case is possible if NaN values (which do not fall into any bucket) are observed.
|
||||
h: &histogram.Histogram{
|
||||
ZeroCount: 2,
|
||||
Count: 4,
|
||||
Sum: math.NaN(),
|
||||
PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}},
|
||||
PositiveBuckets: []int64{1},
|
||||
},
|
||||
},
|
||||
"rejects histogram that has too few negative buckets": {
|
||||
h: &histogram.Histogram{
|
||||
NegativeSpans: []histogram.Span{{Offset: 0, Length: 1}},
|
||||
NegativeBuckets: []int64{},
|
||||
},
|
||||
errMsg: `negative side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`,
|
||||
},
|
||||
"rejects histogram that has too few positive buckets": {
|
||||
h: &histogram.Histogram{
|
||||
PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}},
|
||||
PositiveBuckets: []int64{},
|
||||
},
|
||||
errMsg: `positive side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`,
|
||||
},
|
||||
"rejects histogram that has too many negative buckets": {
|
||||
h: &histogram.Histogram{
|
||||
NegativeSpans: []histogram.Span{{Offset: 0, Length: 1}},
|
||||
NegativeBuckets: []int64{1, 2},
|
||||
},
|
||||
errMsg: `negative side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`,
|
||||
},
|
||||
"rejects histogram that has too many positive buckets": {
|
||||
h: &histogram.Histogram{
|
||||
PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}},
|
||||
PositiveBuckets: []int64{1, 2},
|
||||
},
|
||||
errMsg: `positive side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`,
|
||||
},
|
||||
"rejects a histogram that has a negative span with a negative offset": {
|
||||
h: &histogram.Histogram{
|
||||
NegativeSpans: []histogram.Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
|
||||
NegativeBuckets: []int64{1, 2},
|
||||
},
|
||||
errMsg: `negative side: span number 2 with offset -1: histogram has a span whose offset is negative`,
|
||||
},
|
||||
"rejects a histogram which has a positive span with a negative offset": {
|
||||
h: &histogram.Histogram{
|
||||
PositiveSpans: []histogram.Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
|
||||
PositiveBuckets: []int64{1, 2},
|
||||
},
|
||||
errMsg: `positive side: span number 2 with offset -1: histogram has a span whose offset is negative`,
|
||||
},
|
||||
"rejects a histogram that has a negative bucket with a negative count": {
|
||||
h: &histogram.Histogram{
|
||||
NegativeSpans: []histogram.Span{{Offset: -1, Length: 1}},
|
||||
NegativeBuckets: []int64{-1},
|
||||
},
|
||||
errMsg: `negative side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`,
|
||||
},
|
||||
"rejects a histogram that has a positive bucket with a negative count": {
|
||||
h: &histogram.Histogram{
|
||||
PositiveSpans: []histogram.Span{{Offset: -1, Length: 1}},
|
||||
PositiveBuckets: []int64{-1},
|
||||
},
|
||||
errMsg: `positive side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`,
|
||||
},
|
||||
"rejects a histogram that has a lower count than count in buckets": {
|
||||
h: &histogram.Histogram{
|
||||
Count: 0,
|
||||
NegativeSpans: []histogram.Span{{Offset: -1, Length: 1}},
|
||||
PositiveSpans: []histogram.Span{{Offset: -1, Length: 1}},
|
||||
NegativeBuckets: []int64{1},
|
||||
PositiveBuckets: []int64{1},
|
||||
},
|
||||
errMsg: `2 observations found in buckets, but the Count field is 0: histogram's observation count should be at least the number of observations found in the buckets`,
|
||||
skipFloat: true,
|
||||
},
|
||||
"rejects a histogram that doesn't count the zero bucket in its count": {
|
||||
h: &histogram.Histogram{
|
||||
Count: 2,
|
||||
ZeroCount: 1,
|
||||
NegativeSpans: []histogram.Span{{Offset: -1, Length: 1}},
|
||||
PositiveSpans: []histogram.Span{{Offset: -1, Length: 1}},
|
||||
NegativeBuckets: []int64{1},
|
||||
PositiveBuckets: []int64{1},
|
||||
},
|
||||
errMsg: `3 observations found in buckets, but the Count field is 2: histogram's observation count should be at least the number of observations found in the buckets`,
|
||||
skipFloat: true,
|
||||
},
|
||||
}
|
||||
|
||||
for testName, tc := range tests {
|
||||
t.Run(testName, func(t *testing.T) {
|
||||
if err := ValidateHistogram(tc.h); tc.errMsg != "" {
|
||||
require.EqualError(t, err, tc.errMsg)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
if tc.skipFloat {
|
||||
return
|
||||
}
|
||||
if err := ValidateFloatHistogram(tc.h.ToFloat()); tc.errMsg != "" {
|
||||
require.EqualError(t, err, tc.errMsg)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkHistogramValidation(b *testing.B) {
|
||||
histograms := generateBigTestHistograms(b.N, 500)
|
||||
b.ResetTimer()
|
||||
for _, h := range histograms {
|
||||
require.NoError(b, ValidateHistogram(h))
|
||||
}
|
||||
}
|
||||
|
||||
func generateBigTestHistograms(numHistograms, numBuckets int) []*histogram.Histogram {
|
||||
numSpans := numBuckets / 10
|
||||
bucketsPerSide := numBuckets / 2
|
||||
spanLength := uint32(bucketsPerSide / numSpans)
|
||||
// Given all bucket deltas are 1, sum numHistograms + 1.
|
||||
observationCount := numBuckets / 2 * (1 + numBuckets)
|
||||
|
||||
var histograms []*histogram.Histogram
|
||||
for i := 0; i < numHistograms; i++ {
|
||||
h := &histogram.Histogram{
|
||||
Count: uint64(i + observationCount),
|
||||
ZeroCount: uint64(i),
|
||||
ZeroThreshold: 1e-128,
|
||||
Sum: 18.4 * float64(i+1),
|
||||
Schema: 2,
|
||||
NegativeSpans: make([]histogram.Span, numSpans),
|
||||
PositiveSpans: make([]histogram.Span, numSpans),
|
||||
NegativeBuckets: make([]int64, bucketsPerSide),
|
||||
PositiveBuckets: make([]int64, bucketsPerSide),
|
||||
}
|
||||
|
||||
for j := 0; j < numSpans; j++ {
|
||||
s := histogram.Span{Offset: 1, Length: spanLength}
|
||||
h.NegativeSpans[j] = s
|
||||
h.PositiveSpans[j] = s
|
||||
}
|
||||
|
||||
for j := 0; j < bucketsPerSide; j++ {
|
||||
h.NegativeBuckets[j] = 1
|
||||
h.PositiveBuckets[j] = 1
|
||||
}
|
||||
|
||||
histograms = append(histograms, h)
|
||||
}
|
||||
return histograms
|
||||
}
|
||||
|
||||
func TestOOOAppendWithNoSeries(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
wal, err := wlog.NewSize(nil, nil, filepath.Join(dir, "wal"), 32768, wlog.CompressionSnappy)
|
||||
|
@ -5402,7 +5234,7 @@ func BenchmarkCuttingHeadHistogramChunks(b *testing.B) {
|
|||
numSamples = 50000
|
||||
numBuckets = 100
|
||||
)
|
||||
samples := generateBigTestHistograms(numSamples, numBuckets)
|
||||
samples := histogram.GenerateBigTestHistograms(numSamples, numBuckets)
|
||||
|
||||
h, _ := newTestHead(b, DefaultBlockDuration, wlog.CompressionNone, false)
|
||||
defer func() {
|
||||
|
@ -5466,7 +5298,7 @@ func TestCuttingNewHeadChunks(t *testing.T) {
|
|||
"small histograms": {
|
||||
numTotalSamples: 240,
|
||||
histValFunc: func() func(i int) *histogram.Histogram {
|
||||
hists := generateBigTestHistograms(240, 10)
|
||||
hists := histogram.GenerateBigTestHistograms(240, 10)
|
||||
return func(i int) *histogram.Histogram {
|
||||
return hists[i]
|
||||
}
|
||||
|
@ -5482,7 +5314,7 @@ func TestCuttingNewHeadChunks(t *testing.T) {
|
|||
"large histograms": {
|
||||
numTotalSamples: 240,
|
||||
histValFunc: func() func(i int) *histogram.Histogram {
|
||||
hists := generateBigTestHistograms(240, 100)
|
||||
hists := histogram.GenerateBigTestHistograms(240, 100)
|
||||
return func(i int) *histogram.Histogram {
|
||||
return hists[i]
|
||||
}
|
||||
|
@ -5491,14 +5323,13 @@ func TestCuttingNewHeadChunks(t *testing.T) {
|
|||
numSamples int
|
||||
numBytes int
|
||||
}{
|
||||
{30, 696},
|
||||
{30, 700},
|
||||
{30, 708},
|
||||
{30, 693},
|
||||
{40, 896},
|
||||
{40, 899},
|
||||
{40, 896},
|
||||
{30, 690},
|
||||
{30, 691},
|
||||
{30, 692},
|
||||
{30, 695},
|
||||
{30, 694},
|
||||
{30, 693},
|
||||
},
|
||||
},
|
||||
"really large histograms": {
|
||||
|
@ -5506,7 +5337,7 @@ func TestCuttingNewHeadChunks(t *testing.T) {
|
|||
// per chunk.
|
||||
numTotalSamples: 11,
|
||||
histValFunc: func() func(i int) *histogram.Histogram {
|
||||
hists := generateBigTestHistograms(11, 100000)
|
||||
hists := histogram.GenerateBigTestHistograms(11, 100000)
|
||||
return func(i int) *histogram.Histogram {
|
||||
return hists[i]
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue