Add histogram validation (#11052)

* Add histogram validation

Signed-off-by: Levi Harrison <git@leviharrison.dev>

* Correct negative offset validation

Signed-off-by: Levi Harrison <git@leviharrison.dev>

* Address review comments

Signed-off-by: Levi Harrison <git@leviharrison.dev>

* Validation benchmark

Signed-off-by: Levi Harrison <git@leviharrison.dev>

* Add more checks

Signed-off-by: Levi Harrison <git@leviharrison.dev>

* Attempt to fix tests

Signed-off-by: Levi Harrison <git@leviharrison.dev>

* Fix stuff

Signed-off-by: Levi Harrison <git@leviharrison.dev>
This commit is contained in:
Levi Harrison 2022-07-29 09:52:49 -05:00 committed by GitHub
parent cb8582637a
commit 77a7af4461
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 228 additions and 15 deletions

View file

@ -3897,7 +3897,7 @@ func TestSparseHistogram_Sum_AddOperator(t *testing.T) {
histograms: []histogram.Histogram{
{
Schema: 0,
Count: 9,
Count: 21,
Sum: 1234.5,
ZeroThreshold: 0.001,
ZeroCount: 4,
@ -3914,7 +3914,7 @@ func TestSparseHistogram_Sum_AddOperator(t *testing.T) {
},
{
Schema: 0,
Count: 15,
Count: 36,
Sum: 2345.6,
ZeroThreshold: 0.001,
ZeroCount: 5,
@ -3933,7 +3933,7 @@ func TestSparseHistogram_Sum_AddOperator(t *testing.T) {
},
{
Schema: 0,
Count: 15,
Count: 36,
Sum: 1111.1,
ZeroThreshold: 0.001,
ZeroCount: 5,
@ -3955,7 +3955,7 @@ func TestSparseHistogram_Sum_AddOperator(t *testing.T) {
Schema: 0,
ZeroThreshold: 0.001,
ZeroCount: 14,
Count: 39,
Count: 93,
Sum: 4691.2,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: 3},
@ -3988,8 +3988,8 @@ func TestSparseHistogram_Sum_AddOperator(t *testing.T) {
lbls := labels.FromStrings("__name__", seriesName, "idx", fmt.Sprintf("%d", idx))
// Since we mutate h later, we need to create a copy here.
_, err = app.AppendHistogram(0, lbls, ts, h.Copy())
require.NoError(t, err)
}
require.NoError(t, err)
require.NoError(t, app.Commit())
queryAndCheck := func(queryString string) {

View file

@ -27,14 +27,18 @@ import (
// The errors exposed.
var (
ErrNotFound = errors.New("not found")
ErrOutOfOrderSample = errors.New("out of order sample")
ErrDuplicateSampleForTimestamp = errors.New("duplicate sample for timestamp")
ErrOutOfBounds = errors.New("out of bounds")
ErrOutOfOrderExemplar = errors.New("out of order exemplar")
ErrDuplicateExemplar = errors.New("duplicate exemplar")
ErrExemplarLabelLength = fmt.Errorf("label length for exemplar exceeds maximum of %d UTF-8 characters", exemplar.ExemplarMaxLabelSetLength)
ErrExemplarsDisabled = fmt.Errorf("exemplar storage is disabled or max exemplars is less than or equal to 0")
ErrNotFound = errors.New("not found")
ErrOutOfOrderSample = errors.New("out of order sample")
ErrDuplicateSampleForTimestamp = errors.New("duplicate sample for timestamp")
ErrOutOfBounds = errors.New("out of bounds")
ErrOutOfOrderExemplar = errors.New("out of order exemplar")
ErrDuplicateExemplar = errors.New("duplicate exemplar")
ErrExemplarLabelLength = fmt.Errorf("label length for exemplar exceeds maximum of %d UTF-8 characters", exemplar.ExemplarMaxLabelSetLength)
ErrExemplarsDisabled = fmt.Errorf("exemplar storage is disabled or max exemplars is less than or equal to 0")
ErrHistogramSpanNegativeOffset = errors.New("histogram has a span whose offset is negative")
ErrHistogramSpansBucketsMismatch = errors.New("histogram spans specify different number of buckets than provided")
ErrHistogramNegativeBucketCount = errors.New("histogram has a bucket whose observation count is negative")
ErrHistogramCountNotBigEnough = errors.New("histogram's observation count should be at least the number of observations found in the buckets")
)
// SeriesRef is a generic series reference. In prometheus it is either a

View file

@ -418,6 +418,10 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels
return 0, storage.ErrOutOfBounds
}
if err := ValidateHistogram(h); err != nil {
return 0, err
}
s := a.head.series.getByID(chunks.HeadSeriesRef(ref))
if s == nil {
// Ensure no empty labels have gotten through.
@ -472,6 +476,76 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels
return storage.SeriesRef(s.ref), nil
}
func ValidateHistogram(h *histogram.Histogram) error {
if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
return errors.Wrap(err, "negative side")
}
if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
return errors.Wrap(err, "positive side")
}
negativeCount, err := checkHistogramBuckets(h.NegativeBuckets)
if err != nil {
return errors.Wrap(err, "negative side")
}
positiveCount, err := checkHistogramBuckets(h.PositiveBuckets)
if err != nil {
return errors.Wrap(err, "positive side")
}
if c := negativeCount + positiveCount; c > h.Count {
return errors.Wrap(
storage.ErrHistogramCountNotBigEnough,
fmt.Sprintf("%d observations found in buckets, but overall count is %d", c, h.Count),
)
}
return nil
}
func checkHistogramSpans(spans []histogram.Span, numBuckets int) error {
var spanBuckets int
for n, span := range spans {
if n > 0 && span.Offset < 0 {
return errors.Wrap(
storage.ErrHistogramSpanNegativeOffset,
fmt.Sprintf("span number %d with offset %d", n+1, span.Offset),
)
}
spanBuckets += int(span.Length)
}
if spanBuckets != numBuckets {
return errors.Wrap(
storage.ErrHistogramSpansBucketsMismatch,
fmt.Sprintf("spans need %d buckets, have %d buckets", spanBuckets, numBuckets),
)
}
return nil
}
func checkHistogramBuckets(buckets []int64) (uint64, error) {
if len(buckets) == 0 {
return 0, nil
}
var count uint64
var last int64
for i := 0; i < len(buckets); i++ {
c := last + buckets[i]
if c < 0 {
return 0, errors.Wrap(
storage.ErrHistogramNegativeBucketCount,
fmt.Sprintf("bucket number %d has observation count of %d", i+1, c),
)
}
last = c
count += uint64(c)
}
return count, nil
}
var _ storage.GetRef = &headAppender{}
func (a *headAppender) GetRef(lset labels.Labels) (storage.SeriesRef, labels.Labels) {

View file

@ -2865,6 +2865,7 @@ func TestHistogramInWAL(t *testing.T) {
}
expHistograms := make([]timedHistogram, 0, numHistograms)
for i, h := range GenerateTestHistograms(numHistograms) {
h.Count = h.Count * 2
h.NegativeSpans = h.PositiveSpans
h.NegativeBuckets = h.PositiveBuckets
_, err := app.AppendHistogram(0, l, int64(i), h)
@ -3369,8 +3370,9 @@ func TestHistogramCounterResetHeader(t *testing.T) {
h.NegativeSpans = append([]histogram.Span{}, h.PositiveSpans...)
h.NegativeBuckets = append([]int64{}, h.PositiveBuckets...)
}
h.PositiveBuckets[0] = 100
h.NegativeBuckets[0] = 100
h.PositiveBuckets = []int64{100, 1, 1, 1}
h.NegativeBuckets = []int64{100, 1, 1, 1}
h.Count = 1000
// First histogram is UnknownCounterReset.
appendHistogram(h)
@ -3804,3 +3806,136 @@ func TestReplayAfterMmapReplayError(t *testing.T) {
require.NoError(t, h.Close())
}
func TestHistogramValidation(t *testing.T) {
tests := map[string]struct {
h *histogram.Histogram
errMsg string
}{
"valid histogram": {
h: GenerateTestHistograms(1)[0],
},
"rejects histogram who has too few negative buckets": {
h: &histogram.Histogram{
NegativeSpans: []histogram.Span{{Offset: 0, Length: 1}},
NegativeBuckets: []int64{},
},
errMsg: `negative side: spans need 1 buckets, have 0 buckets`,
},
"rejects histogram who has too few positive buckets": {
h: &histogram.Histogram{
PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}},
PositiveBuckets: []int64{},
},
errMsg: `positive side: spans need 1 buckets, have 0 buckets`,
},
"rejects histogram who has too many negative buckets": {
h: &histogram.Histogram{
NegativeSpans: []histogram.Span{{Offset: 0, Length: 1}},
NegativeBuckets: []int64{1, 2},
},
errMsg: `negative side: spans need 1 buckets, have 2 buckets`,
},
"rejects histogram who has too many positive buckets": {
h: &histogram.Histogram{
PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}},
PositiveBuckets: []int64{1, 2},
},
errMsg: `positive side: spans need 1 buckets, have 2 buckets`,
},
"rejects a histogram which has a negative span with a negative offset": {
h: &histogram.Histogram{
NegativeSpans: []histogram.Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
NegativeBuckets: []int64{1, 2},
},
errMsg: `negative side: span number 2 with offset -1`,
},
"rejects a histogram which has a positive span with a negative offset": {
h: &histogram.Histogram{
PositiveSpans: []histogram.Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
PositiveBuckets: []int64{1, 2},
},
errMsg: `positive side: span number 2 with offset -1`,
},
"rejects a histogram which has a negative bucket with a negative count": {
h: &histogram.Histogram{
NegativeSpans: []histogram.Span{{Offset: -1, Length: 1}},
NegativeBuckets: []int64{-1},
},
errMsg: `negative side: bucket number 1 has observation count of -1`,
},
"rejects a histogram which has a positive bucket with a negative count": {
h: &histogram.Histogram{
PositiveSpans: []histogram.Span{{Offset: -1, Length: 1}},
PositiveBuckets: []int64{-1},
},
errMsg: `positive side: bucket number 1 has observation count of -1`,
},
"rejects a histogram which which has a lower count than count in buckets": {
h: &histogram.Histogram{
Count: 0,
NegativeSpans: []histogram.Span{{Offset: -1, Length: 1}},
PositiveSpans: []histogram.Span{{Offset: -1, Length: 1}},
NegativeBuckets: []int64{1},
PositiveBuckets: []int64{1},
},
errMsg: `2 observations found in buckets, but overall count is 0`,
},
}
for testName, tc := range tests {
t.Run(testName, func(t *testing.T) {
err := ValidateHistogram(tc.h)
if tc.errMsg != "" {
require.ErrorContains(t, err, tc.errMsg)
} else {
require.NoError(t, err)
}
})
}
}
func BenchmarkHistogramValidation(b *testing.B) {
histograms := generateBigTestHistograms(b.N)
for _, h := range histograms {
require.NoError(b, ValidateHistogram(h))
}
}
func generateBigTestHistograms(n int) []*histogram.Histogram {
const numBuckets = 500
numSpans := numBuckets / 10
bucketsPerSide := numBuckets / 2
spanLength := uint32(bucketsPerSide / numSpans)
// Given all bucket deltas are 1, sum n + 1.
observationCount := numBuckets / 2 * (1 + numBuckets)
var histograms []*histogram.Histogram
for i := 0; i < n; i++ {
h := &histogram.Histogram{
Count: uint64(i + observationCount),
ZeroCount: uint64(i),
ZeroThreshold: 1e-128,
Sum: 18.4 * float64(i+1),
Schema: 2,
NegativeSpans: make([]histogram.Span, numSpans),
PositiveSpans: make([]histogram.Span, numSpans),
NegativeBuckets: make([]int64, bucketsPerSide),
PositiveBuckets: make([]int64, bucketsPerSide),
}
for j := 0; j < numSpans; j++ {
s := histogram.Span{Offset: 1 + int32(i), Length: spanLength}
h.NegativeSpans[j] = s
h.PositiveSpans[j] = s
}
for j := 0; j < bucketsPerSide; j++ {
h.NegativeBuckets[j] = 1
h.PositiveBuckets[j] = 1
}
histograms = append(histograms, h)
}
return histograms
}