Change Validate to be a method on histogram structs

Signed-off-by: Linas Medziunas <linas.medziunas@gmail.com>
This commit is contained in:
Linas Medziunas 2023-11-03 16:47:59 +02:00
parent 1f8aea11d6
commit ebed7d0612
8 changed files with 288 additions and 315 deletions

View file

@ -17,6 +17,8 @@ import (
"fmt"
"math"
"strings"
"github.com/pkg/errors"
)
// FloatHistogram is similar to Histogram but uses float64 for all
@ -593,6 +595,31 @@ func (h *FloatHistogram) AllReverseBucketIterator() BucketIterator[float64] {
}
}
// Validate validates consistency between span and bucket slices. Also, buckets are checked
// against negative values.
// We do not check for h.Count being at least as large as the sum of the
// counts in the buckets because floating point precision issues can
// create false positives here.
func (h *FloatHistogram) Validate() error {
if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
return errors.Wrap(err, "negative side")
}
if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
return errors.Wrap(err, "positive side")
}
var nCount, pCount float64
err := checkHistogramBuckets(h.NegativeBuckets, &nCount, false)
if err != nil {
return errors.Wrap(err, "negative side")
}
err = checkHistogramBuckets(h.PositiveBuckets, &pCount, false)
if err != nil {
return errors.Wrap(err, "positive side")
}
return nil
}
// zeroCountForLargerThreshold returns what the histogram's zero count would be
// if the ZeroThreshold had the provided larger (or equal) value. If the
// provided value is less than the histogram's ZeroThreshold, the method panics.

View file

@ -17,6 +17,16 @@ import (
"fmt"
"math"
"strings"
"github.com/pkg/errors"
)
var (
ErrHistogramCountNotBigEnough = errors.New("histogram's observation count should be at least the number of observations found in the buckets")
ErrHistogramCountMismatch = errors.New("histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)")
ErrHistogramNegativeBucketCount = errors.New("histogram has a bucket whose observation count is negative")
ErrHistogramSpanNegativeOffset = errors.New("histogram has a span whose offset is negative")
ErrHistogramSpansBucketsMismatch = errors.New("histogram spans specify different number of buckets than provided")
)
// BucketCount is a type constraint for the count in a bucket, which can be
@ -347,6 +357,52 @@ func compactBuckets[IBC InternalBucketCount](buckets []IBC, spans []Span, maxEmp
return buckets, spans
}
func checkHistogramSpans(spans []Span, numBuckets int) error {
var spanBuckets int
for n, span := range spans {
if n > 0 && span.Offset < 0 {
return errors.Wrap(
ErrHistogramSpanNegativeOffset,
fmt.Sprintf("span number %d with offset %d", n+1, span.Offset),
)
}
spanBuckets += int(span.Length)
}
if spanBuckets != numBuckets {
return errors.Wrap(
ErrHistogramSpansBucketsMismatch,
fmt.Sprintf("spans need %d buckets, have %d buckets", spanBuckets, numBuckets),
)
}
return nil
}
func checkHistogramBuckets[BC BucketCount, IBC InternalBucketCount](buckets []IBC, count *BC, deltas bool) error {
if len(buckets) == 0 {
return nil
}
var last IBC
for i := 0; i < len(buckets); i++ {
var c IBC
if deltas {
c = last + buckets[i]
} else {
c = buckets[i]
}
if c < 0 {
return errors.Wrap(
ErrHistogramNegativeBucketCount,
fmt.Sprintf("bucket number %d has observation count of %v", i+1, c),
)
}
last = c
*count += BC(c)
}
return nil
}
func getBound(idx, schema int32) float64 {
// Here a bit of context about the behavior for the last bucket counting
// regular numbers (called simply "last bucket" below) and the bucket

View file

@ -18,6 +18,7 @@ import (
"math"
"strings"
"github.com/pkg/errors"
"golang.org/x/exp/slices"
)
@ -328,6 +329,50 @@ func (h *Histogram) ToFloat() *FloatHistogram {
}
}
// Validate validates consistency between span and bucket slices. Also, buckets are checked
// against negative values.
// For histograms that have not observed any NaN values (based on IsNaN(h.Sum) check), a
// strict h.Count = nCount + pCount + h.ZeroCount check is performed.
// Otherwise, only a lower bound check will be done (h.Count >= nCount + pCount + h.ZeroCount),
// because NaN observations do not increment the values of buckets (but they do increment
// the total h.Count).
func (h *Histogram) Validate() error {
if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
return errors.Wrap(err, "negative side")
}
if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
return errors.Wrap(err, "positive side")
}
var nCount, pCount uint64
err := checkHistogramBuckets(h.NegativeBuckets, &nCount, true)
if err != nil {
return errors.Wrap(err, "negative side")
}
err = checkHistogramBuckets(h.PositiveBuckets, &pCount, true)
if err != nil {
return errors.Wrap(err, "positive side")
}
sumOfBuckets := nCount + pCount + h.ZeroCount
if math.IsNaN(h.Sum) {
if sumOfBuckets > h.Count {
return errors.Wrap(
ErrHistogramCountNotBigEnough,
fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count),
)
}
} else {
if sumOfBuckets != h.Count {
return errors.Wrap(
ErrHistogramCountMismatch,
fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count),
)
}
}
return nil
}
type regularBucketIterator struct {
baseBucketIterator[uint64, int64]
}

View file

@ -811,3 +811,159 @@ func TestHistogramCompact(t *testing.T) {
})
}
}
func TestHistogramValidation(t *testing.T) {
tests := map[string]struct {
h *Histogram
errMsg string
skipFloat bool
}{
"valid histogram": {
h: &Histogram{
Count: 12,
ZeroCount: 2,
ZeroThreshold: 0.001,
Sum: 19.4,
Schema: 1,
PositiveSpans: []Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
PositiveBuckets: []int64{1, 1, -1, 0},
NegativeSpans: []Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
NegativeBuckets: []int64{1, 1, -1, 0},
},
},
"valid histogram with NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": {
// This case is possible if NaN values (which do not fall into any bucket) are observed.
h: &Histogram{
ZeroCount: 2,
Count: 4,
Sum: math.NaN(),
PositiveSpans: []Span{{Offset: 0, Length: 1}},
PositiveBuckets: []int64{1},
},
},
"rejects histogram without NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": {
h: &Histogram{
ZeroCount: 2,
Count: 4,
Sum: 333,
PositiveSpans: []Span{{Offset: 0, Length: 1}},
PositiveBuckets: []int64{1},
},
errMsg: `3 observations found in buckets, but the Count field is 4: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
skipFloat: true,
},
"rejects histogram that has too few negative buckets": {
h: &Histogram{
NegativeSpans: []Span{{Offset: 0, Length: 1}},
NegativeBuckets: []int64{},
},
errMsg: `negative side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`,
},
"rejects histogram that has too few positive buckets": {
h: &Histogram{
PositiveSpans: []Span{{Offset: 0, Length: 1}},
PositiveBuckets: []int64{},
},
errMsg: `positive side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`,
},
"rejects histogram that has too many negative buckets": {
h: &Histogram{
NegativeSpans: []Span{{Offset: 0, Length: 1}},
NegativeBuckets: []int64{1, 2},
},
errMsg: `negative side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`,
},
"rejects histogram that has too many positive buckets": {
h: &Histogram{
PositiveSpans: []Span{{Offset: 0, Length: 1}},
PositiveBuckets: []int64{1, 2},
},
errMsg: `positive side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`,
},
"rejects a histogram that has a negative span with a negative offset": {
h: &Histogram{
NegativeSpans: []Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
NegativeBuckets: []int64{1, 2},
},
errMsg: `negative side: span number 2 with offset -1: histogram has a span whose offset is negative`,
},
"rejects a histogram which has a positive span with a negative offset": {
h: &Histogram{
PositiveSpans: []Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
PositiveBuckets: []int64{1, 2},
},
errMsg: `positive side: span number 2 with offset -1: histogram has a span whose offset is negative`,
},
"rejects a histogram that has a negative bucket with a negative count": {
h: &Histogram{
NegativeSpans: []Span{{Offset: -1, Length: 1}},
NegativeBuckets: []int64{-1},
},
errMsg: `negative side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`,
},
"rejects a histogram that has a positive bucket with a negative count": {
h: &Histogram{
PositiveSpans: []Span{{Offset: -1, Length: 1}},
PositiveBuckets: []int64{-1},
},
errMsg: `positive side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`,
},
"rejects a histogram that has a lower count than count in buckets": {
h: &Histogram{
Count: 0,
NegativeSpans: []Span{{Offset: -1, Length: 1}},
PositiveSpans: []Span{{Offset: -1, Length: 1}},
NegativeBuckets: []int64{1},
PositiveBuckets: []int64{1},
},
errMsg: `2 observations found in buckets, but the Count field is 0: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
skipFloat: true,
},
"rejects a histogram that doesn't count the zero bucket in its count": {
h: &Histogram{
Count: 2,
ZeroCount: 1,
NegativeSpans: []Span{{Offset: -1, Length: 1}},
PositiveSpans: []Span{{Offset: -1, Length: 1}},
NegativeBuckets: []int64{1},
PositiveBuckets: []int64{1},
},
errMsg: `3 observations found in buckets, but the Count field is 2: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
skipFloat: true,
},
}
for testName, tc := range tests {
t.Run(testName, func(t *testing.T) {
if err := tc.h.Validate(); tc.errMsg != "" {
require.EqualError(t, err, tc.errMsg)
} else {
require.NoError(t, err)
}
if tc.skipFloat {
return
}
fh := tc.h.ToFloat()
if err := fh.Validate(); tc.errMsg != "" {
require.EqualError(t, err, tc.errMsg)
} else {
require.NoError(t, err)
}
})
}
}
func BenchmarkHistogramValidation(b *testing.B) {
histograms := GenerateBigTestHistograms(b.N, 500)
b.ResetTimer()
for _, h := range histograms {
require.NoError(b, h.Validate())
}
}

View file

@ -1,136 +0,0 @@
// Copyright 2023 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package histogram
import (
"fmt"
"math"
"github.com/pkg/errors"
)
var (
ErrHistogramCountNotBigEnough = errors.New("histogram's observation count should be at least the number of observations found in the buckets")
ErrHistogramCountMismatch = errors.New("histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)")
ErrHistogramNegativeBucketCount = errors.New("histogram has a bucket whose observation count is negative")
ErrHistogramSpanNegativeOffset = errors.New("histogram has a span whose offset is negative")
ErrHistogramSpansBucketsMismatch = errors.New("histogram spans specify different number of buckets than provided")
)
func ValidateHistogram(h *Histogram) error {
if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
return errors.Wrap(err, "negative side")
}
if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
return errors.Wrap(err, "positive side")
}
var nCount, pCount uint64
err := checkHistogramBuckets(h.NegativeBuckets, &nCount, true)
if err != nil {
return errors.Wrap(err, "negative side")
}
err = checkHistogramBuckets(h.PositiveBuckets, &pCount, true)
if err != nil {
return errors.Wrap(err, "positive side")
}
sumOfBuckets := nCount + pCount + h.ZeroCount
if math.IsNaN(h.Sum) {
if sumOfBuckets > h.Count {
return errors.Wrap(
ErrHistogramCountNotBigEnough,
fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count),
)
}
} else {
if sumOfBuckets != h.Count {
return errors.Wrap(
ErrHistogramCountMismatch,
fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count),
)
}
}
return nil
}
func ValidateFloatHistogram(h *FloatHistogram) error {
if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
return errors.Wrap(err, "negative side")
}
if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
return errors.Wrap(err, "positive side")
}
var nCount, pCount float64
err := checkHistogramBuckets(h.NegativeBuckets, &nCount, false)
if err != nil {
return errors.Wrap(err, "negative side")
}
err = checkHistogramBuckets(h.PositiveBuckets, &pCount, false)
if err != nil {
return errors.Wrap(err, "positive side")
}
// We do not check for h.Count being at least as large as the sum of the
// counts in the buckets because floating point precision issues can
// create false positives here.
return nil
}
func checkHistogramSpans(spans []Span, numBuckets int) error {
var spanBuckets int
for n, span := range spans {
if n > 0 && span.Offset < 0 {
return errors.Wrap(
ErrHistogramSpanNegativeOffset,
fmt.Sprintf("span number %d with offset %d", n+1, span.Offset),
)
}
spanBuckets += int(span.Length)
}
if spanBuckets != numBuckets {
return errors.Wrap(
ErrHistogramSpansBucketsMismatch,
fmt.Sprintf("spans need %d buckets, have %d buckets", spanBuckets, numBuckets),
)
}
return nil
}
func checkHistogramBuckets[BC BucketCount, IBC InternalBucketCount](buckets []IBC, count *BC, deltas bool) error {
if len(buckets) == 0 {
return nil
}
var last IBC
for i := 0; i < len(buckets); i++ {
var c IBC
if deltas {
c = last + buckets[i]
} else {
c = buckets[i]
}
if c < 0 {
return errors.Wrap(
ErrHistogramNegativeBucketCount,
fmt.Sprintf("bucket number %d has observation count of %v", i+1, c),
)
}
last = c
*count += BC(c)
}
return nil
}

View file

@ -1,175 +0,0 @@
// Copyright 2023 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package histogram
import (
"math"
"testing"
"github.com/stretchr/testify/require"
)
func TestHistogramValidation(t *testing.T) {
tests := map[string]struct {
h *Histogram
errMsg string
skipFloat bool
}{
"valid histogram": {
h: &Histogram{
Count: 12,
ZeroCount: 2,
ZeroThreshold: 0.001,
Sum: 19.4,
Schema: 1,
PositiveSpans: []Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
PositiveBuckets: []int64{1, 1, -1, 0},
NegativeSpans: []Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
NegativeBuckets: []int64{1, 1, -1, 0},
},
},
"valid histogram with NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": {
// This case is possible if NaN values (which do not fall into any bucket) are observed.
h: &Histogram{
ZeroCount: 2,
Count: 4,
Sum: math.NaN(),
PositiveSpans: []Span{{Offset: 0, Length: 1}},
PositiveBuckets: []int64{1},
},
},
"rejects histogram without NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": {
h: &Histogram{
ZeroCount: 2,
Count: 4,
Sum: 333,
PositiveSpans: []Span{{Offset: 0, Length: 1}},
PositiveBuckets: []int64{1},
},
errMsg: `3 observations found in buckets, but the Count field is 4: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
skipFloat: true,
},
"rejects histogram that has too few negative buckets": {
h: &Histogram{
NegativeSpans: []Span{{Offset: 0, Length: 1}},
NegativeBuckets: []int64{},
},
errMsg: `negative side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`,
},
"rejects histogram that has too few positive buckets": {
h: &Histogram{
PositiveSpans: []Span{{Offset: 0, Length: 1}},
PositiveBuckets: []int64{},
},
errMsg: `positive side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`,
},
"rejects histogram that has too many negative buckets": {
h: &Histogram{
NegativeSpans: []Span{{Offset: 0, Length: 1}},
NegativeBuckets: []int64{1, 2},
},
errMsg: `negative side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`,
},
"rejects histogram that has too many positive buckets": {
h: &Histogram{
PositiveSpans: []Span{{Offset: 0, Length: 1}},
PositiveBuckets: []int64{1, 2},
},
errMsg: `positive side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`,
},
"rejects a histogram that has a negative span with a negative offset": {
h: &Histogram{
NegativeSpans: []Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
NegativeBuckets: []int64{1, 2},
},
errMsg: `negative side: span number 2 with offset -1: histogram has a span whose offset is negative`,
},
"rejects a histogram which has a positive span with a negative offset": {
h: &Histogram{
PositiveSpans: []Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
PositiveBuckets: []int64{1, 2},
},
errMsg: `positive side: span number 2 with offset -1: histogram has a span whose offset is negative`,
},
"rejects a histogram that has a negative bucket with a negative count": {
h: &Histogram{
NegativeSpans: []Span{{Offset: -1, Length: 1}},
NegativeBuckets: []int64{-1},
},
errMsg: `negative side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`,
},
"rejects a histogram that has a positive bucket with a negative count": {
h: &Histogram{
PositiveSpans: []Span{{Offset: -1, Length: 1}},
PositiveBuckets: []int64{-1},
},
errMsg: `positive side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`,
},
"rejects a histogram that has a lower count than count in buckets": {
h: &Histogram{
Count: 0,
NegativeSpans: []Span{{Offset: -1, Length: 1}},
PositiveSpans: []Span{{Offset: -1, Length: 1}},
NegativeBuckets: []int64{1},
PositiveBuckets: []int64{1},
},
errMsg: `2 observations found in buckets, but the Count field is 0: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
skipFloat: true,
},
"rejects a histogram that doesn't count the zero bucket in its count": {
h: &Histogram{
Count: 2,
ZeroCount: 1,
NegativeSpans: []Span{{Offset: -1, Length: 1}},
PositiveSpans: []Span{{Offset: -1, Length: 1}},
NegativeBuckets: []int64{1},
PositiveBuckets: []int64{1},
},
errMsg: `3 observations found in buckets, but the Count field is 2: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
skipFloat: true,
},
}
for testName, tc := range tests {
t.Run(testName, func(t *testing.T) {
if err := ValidateHistogram(tc.h); tc.errMsg != "" {
require.EqualError(t, err, tc.errMsg)
} else {
require.NoError(t, err)
}
if tc.skipFloat {
return
}
if err := ValidateFloatHistogram(tc.h.ToFloat()); tc.errMsg != "" {
require.EqualError(t, err, tc.errMsg)
} else {
require.NoError(t, err)
}
})
}
}
func BenchmarkHistogramValidation(b *testing.B) {
histograms := GenerateBigTestHistograms(b.N, 500)
b.ResetTimer()
for _, h := range histograms {
require.NoError(b, ValidateHistogram(h))
}
}

View file

@ -883,13 +883,13 @@ func (a *appender) AppendExemplar(ref storage.SeriesRef, _ labels.Labels, e exem
func (a *appender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) {
if h != nil {
if err := histogram.ValidateHistogram(h); err != nil {
if err := h.Validate(); err != nil {
return 0, err
}
}
if fh != nil {
if err := histogram.ValidateFloatHistogram(fh); err != nil {
if err := fh.Validate(); err != nil {
return 0, err
}
}

View file

@ -521,13 +521,13 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels
}
if h != nil {
if err := histogram.ValidateHistogram(h); err != nil {
if err := h.Validate(); err != nil {
return 0, err
}
}
if fh != nil {
if err := histogram.ValidateFloatHistogram(fh); err != nil {
if err := fh.Validate(); err != nil {
return 0, err
}
}