Merge pull request #12977 from linasm/linasm/strict-validation-of-histogram-without-nans

ValidateHistogram: strict Count check in absence of NaNs
2025-03-05 20:59:13 -08:00 · 2023-11-05 23:13:29 +01:00 · 2023-11-05 23:13:29 +01:00 · 69c9c29061
parent 7a07a279c9 ebed7d0612
commit 69c9c29061
11 changed files with 359 additions and 292 deletions
--- a/model/histogram/float_histogram.go
+++ b/model/histogram/float_histogram.go
@ -17,6 +17,8 @@ import (
 	"fmt"
 	"math"
 	"strings"
+
+	"github.com/pkg/errors"
 )

 // FloatHistogram is similar to Histogram but uses float64 for all
@ -593,6 +595,31 @@ func (h *FloatHistogram) AllReverseBucketIterator() BucketIterator[float64] {
 	}
 }

+// Validate validates consistency between span and bucket slices. Also, buckets are checked
+// against negative values.
+// We do not check for h.Count being at least as large as the sum of the
+// counts in the buckets because floating point precision issues can
+// create false positives here.
+func (h *FloatHistogram) Validate() error {
+	if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
+		return errors.Wrap(err, "negative side")
+	}
+	if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
+		return errors.Wrap(err, "positive side")
+	}
+	var nCount, pCount float64
+	err := checkHistogramBuckets(h.NegativeBuckets, &nCount, false)
+	if err != nil {
+		return errors.Wrap(err, "negative side")
+	}
+	err = checkHistogramBuckets(h.PositiveBuckets, &pCount, false)
+	if err != nil {
+		return errors.Wrap(err, "positive side")
+	}
+
+	return nil
+}
+
 // zeroCountForLargerThreshold returns what the histogram's zero count would be
 // if the ZeroThreshold had the provided larger (or equal) value. If the
 // provided value is less than the histogram's ZeroThreshold, the method panics.
--- a/model/histogram/generic.go
+++ b/model/histogram/generic.go
@ -17,6 +17,16 @@ import (
 	"fmt"
 	"math"
 	"strings"
+
+	"github.com/pkg/errors"
+)
+
+var (
+	ErrHistogramCountNotBigEnough    = errors.New("histogram's observation count should be at least the number of observations found in the buckets")
+	ErrHistogramCountMismatch        = errors.New("histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)")
+	ErrHistogramNegativeBucketCount  = errors.New("histogram has a bucket whose observation count is negative")
+	ErrHistogramSpanNegativeOffset   = errors.New("histogram has a span whose offset is negative")
+	ErrHistogramSpansBucketsMismatch = errors.New("histogram spans specify different number of buckets than provided")
 )

 // BucketCount is a type constraint for the count in a bucket, which can be
@ -347,6 +357,52 @@ func compactBuckets[IBC InternalBucketCount](buckets []IBC, spans []Span, maxEmp
 	return buckets, spans
 }

+func checkHistogramSpans(spans []Span, numBuckets int) error {
+	var spanBuckets int
+	for n, span := range spans {
+		if n > 0 && span.Offset < 0 {
+			return errors.Wrap(
+				ErrHistogramSpanNegativeOffset,
+				fmt.Sprintf("span number %d with offset %d", n+1, span.Offset),
+			)
+		}
+		spanBuckets += int(span.Length)
+	}
+	if spanBuckets != numBuckets {
+		return errors.Wrap(
+			ErrHistogramSpansBucketsMismatch,
+			fmt.Sprintf("spans need %d buckets, have %d buckets", spanBuckets, numBuckets),
+		)
+	}
+	return nil
+}
+
+func checkHistogramBuckets[BC BucketCount, IBC InternalBucketCount](buckets []IBC, count *BC, deltas bool) error {
+	if len(buckets) == 0 {
+		return nil
+	}
+
+	var last IBC
+	for i := 0; i < len(buckets); i++ {
+		var c IBC
+		if deltas {
+			c = last + buckets[i]
+		} else {
+			c = buckets[i]
+		}
+		if c < 0 {
+			return errors.Wrap(
+				ErrHistogramNegativeBucketCount,
+				fmt.Sprintf("bucket number %d has observation count of %v", i+1, c),
+			)
+		}
+		last = c
+		*count += BC(c)
+	}
+
+	return nil
+}
+
 func getBound(idx, schema int32) float64 {
 	// Here a bit of context about the behavior for the last bucket counting
 	// regular numbers (called simply "last bucket" below) and the bucket
--- a/model/histogram/histogram.go
+++ b/model/histogram/histogram.go
@ -18,6 +18,7 @@ import (
 	"math"
 	"strings"

+	"github.com/pkg/errors"
 	"golang.org/x/exp/slices"
 )

@ -328,6 +329,50 @@ func (h *Histogram) ToFloat() *FloatHistogram {
 	}
 }

+// Validate validates consistency between span and bucket slices. Also, buckets are checked
+// against negative values.
+// For histograms that have not observed any NaN values (based on IsNaN(h.Sum) check), a
+// strict h.Count = nCount + pCount + h.ZeroCount check is performed.
+// Otherwise, only a lower bound check will be done (h.Count >= nCount + pCount + h.ZeroCount),
+// because NaN observations do not increment the values of buckets (but they do increment
+// the total h.Count).
+func (h *Histogram) Validate() error {
+	if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
+		return errors.Wrap(err, "negative side")
+	}
+	if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
+		return errors.Wrap(err, "positive side")
+	}
+	var nCount, pCount uint64
+	err := checkHistogramBuckets(h.NegativeBuckets, &nCount, true)
+	if err != nil {
+		return errors.Wrap(err, "negative side")
+	}
+	err = checkHistogramBuckets(h.PositiveBuckets, &pCount, true)
+	if err != nil {
+		return errors.Wrap(err, "positive side")
+	}
+
+	sumOfBuckets := nCount + pCount + h.ZeroCount
+	if math.IsNaN(h.Sum) {
+		if sumOfBuckets > h.Count {
+			return errors.Wrap(
+				ErrHistogramCountNotBigEnough,
+				fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count),
+			)
+		}
+	} else {
+		if sumOfBuckets != h.Count {
+			return errors.Wrap(
+				ErrHistogramCountMismatch,
+				fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count),
+			)
+		}
+	}
+
+	return nil
+}
+
 type regularBucketIterator struct {
 	baseBucketIterator[uint64, int64]
 }
--- a/model/histogram/histogram_test.go
+++ b/model/histogram/histogram_test.go
@ -811,3 +811,159 @@ func TestHistogramCompact(t *testing.T) {
 		})
 	}
 }
+
+func TestHistogramValidation(t *testing.T) {
+	tests := map[string]struct {
+		h         *Histogram
+		errMsg    string
+		skipFloat bool
+	}{
+		"valid histogram": {
+			h: &Histogram{
+				Count:         12,
+				ZeroCount:     2,
+				ZeroThreshold: 0.001,
+				Sum:           19.4,
+				Schema:        1,
+				PositiveSpans: []Span{
+					{Offset: 0, Length: 2},
+					{Offset: 1, Length: 2},
+				},
+				PositiveBuckets: []int64{1, 1, -1, 0},
+				NegativeSpans: []Span{
+					{Offset: 0, Length: 2},
+					{Offset: 1, Length: 2},
+				},
+				NegativeBuckets: []int64{1, 1, -1, 0},
+			},
+		},
+		"valid histogram with NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": {
+			// This case is possible if NaN values (which do not fall into any bucket) are observed.
+			h: &Histogram{
+				ZeroCount:       2,
+				Count:           4,
+				Sum:             math.NaN(),
+				PositiveSpans:   []Span{{Offset: 0, Length: 1}},
+				PositiveBuckets: []int64{1},
+			},
+		},
+		"rejects histogram without NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": {
+			h: &Histogram{
+				ZeroCount:       2,
+				Count:           4,
+				Sum:             333,
+				PositiveSpans:   []Span{{Offset: 0, Length: 1}},
+				PositiveBuckets: []int64{1},
+			},
+			errMsg:    `3 observations found in buckets, but the Count field is 4: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
+			skipFloat: true,
+		},
+		"rejects histogram that has too few negative buckets": {
+			h: &Histogram{
+				NegativeSpans:   []Span{{Offset: 0, Length: 1}},
+				NegativeBuckets: []int64{},
+			},
+			errMsg: `negative side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`,
+		},
+		"rejects histogram that has too few positive buckets": {
+			h: &Histogram{
+				PositiveSpans:   []Span{{Offset: 0, Length: 1}},
+				PositiveBuckets: []int64{},
+			},
+			errMsg: `positive side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`,
+		},
+		"rejects histogram that has too many negative buckets": {
+			h: &Histogram{
+				NegativeSpans:   []Span{{Offset: 0, Length: 1}},
+				NegativeBuckets: []int64{1, 2},
+			},
+			errMsg: `negative side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`,
+		},
+		"rejects histogram that has too many positive buckets": {
+			h: &Histogram{
+				PositiveSpans:   []Span{{Offset: 0, Length: 1}},
+				PositiveBuckets: []int64{1, 2},
+			},
+			errMsg: `positive side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`,
+		},
+		"rejects a histogram that has a negative span with a negative offset": {
+			h: &Histogram{
+				NegativeSpans:   []Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
+				NegativeBuckets: []int64{1, 2},
+			},
+			errMsg: `negative side: span number 2 with offset -1: histogram has a span whose offset is negative`,
+		},
+		"rejects a histogram which has a positive span with a negative offset": {
+			h: &Histogram{
+				PositiveSpans:   []Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
+				PositiveBuckets: []int64{1, 2},
+			},
+			errMsg: `positive side: span number 2 with offset -1: histogram has a span whose offset is negative`,
+		},
+		"rejects a histogram that has a negative bucket with a negative count": {
+			h: &Histogram{
+				NegativeSpans:   []Span{{Offset: -1, Length: 1}},
+				NegativeBuckets: []int64{-1},
+			},
+			errMsg: `negative side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`,
+		},
+		"rejects a histogram that has a positive bucket with a negative count": {
+			h: &Histogram{
+				PositiveSpans:   []Span{{Offset: -1, Length: 1}},
+				PositiveBuckets: []int64{-1},
+			},
+			errMsg: `positive side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`,
+		},
+		"rejects a histogram that has a lower count than count in buckets": {
+			h: &Histogram{
+				Count:           0,
+				NegativeSpans:   []Span{{Offset: -1, Length: 1}},
+				PositiveSpans:   []Span{{Offset: -1, Length: 1}},
+				NegativeBuckets: []int64{1},
+				PositiveBuckets: []int64{1},
+			},
+			errMsg:    `2 observations found in buckets, but the Count field is 0: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
+			skipFloat: true,
+		},
+		"rejects a histogram that doesn't count the zero bucket in its count": {
+			h: &Histogram{
+				Count:           2,
+				ZeroCount:       1,
+				NegativeSpans:   []Span{{Offset: -1, Length: 1}},
+				PositiveSpans:   []Span{{Offset: -1, Length: 1}},
+				NegativeBuckets: []int64{1},
+				PositiveBuckets: []int64{1},
+			},
+			errMsg:    `3 observations found in buckets, but the Count field is 2: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
+			skipFloat: true,
+		},
+	}
+
+	for testName, tc := range tests {
+		t.Run(testName, func(t *testing.T) {
+			if err := tc.h.Validate(); tc.errMsg != "" {
+				require.EqualError(t, err, tc.errMsg)
+			} else {
+				require.NoError(t, err)
+			}
+			if tc.skipFloat {
+				return
+			}
+
+			fh := tc.h.ToFloat()
+			if err := fh.Validate(); tc.errMsg != "" {
+				require.EqualError(t, err, tc.errMsg)
+			} else {
+				require.NoError(t, err)
+			}
+		})
+	}
+}
+
+func BenchmarkHistogramValidation(b *testing.B) {
+	histograms := GenerateBigTestHistograms(b.N, 500)
+	b.ResetTimer()
+	for _, h := range histograms {
+		require.NoError(b, h.Validate())
+	}
+}
--- a/model/histogram/test_utils.go
+++ b/model/histogram/test_utils.go
@ -0,0 +1,52 @@
+// Copyright 2023 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package histogram
+
+// GenerateBigTestHistograms generates a slice of histograms with given number of buckets each.
+func GenerateBigTestHistograms(numHistograms, numBuckets int) []*Histogram {
+	numSpans := numBuckets / 10
+	bucketsPerSide := numBuckets / 2
+	spanLength := uint32(bucketsPerSide / numSpans)
+	// Given all bucket deltas are 1, sum bucketsPerSide + 1.
+	observationCount := bucketsPerSide * (1 + bucketsPerSide)
+
+	var histograms []*Histogram
+	for i := 0; i < numHistograms; i++ {
+		h := &Histogram{
+			Count:           uint64(i + observationCount),
+			ZeroCount:       uint64(i),
+			ZeroThreshold:   1e-128,
+			Sum:             18.4 * float64(i+1),
+			Schema:          2,
+			NegativeSpans:   make([]Span, numSpans),
+			PositiveSpans:   make([]Span, numSpans),
+			NegativeBuckets: make([]int64, bucketsPerSide),
+			PositiveBuckets: make([]int64, bucketsPerSide),
+		}
+
+		for j := 0; j < numSpans; j++ {
+			s := Span{Offset: 1, Length: spanLength}
+			h.NegativeSpans[j] = s
+			h.PositiveSpans[j] = s
+		}
+
+		for j := 0; j < bucketsPerSide; j++ {
+			h.NegativeBuckets[j] = 1
+			h.PositiveBuckets[j] = 1
+		}
+
+		histograms = append(histograms, h)
+	}
+	return histograms
+}
--- a/promql/engine_test.go
+++ b/promql/engine_test.go
@ -3399,7 +3399,7 @@ func TestNativeHistogram_HistogramStdDevVar(t *testing.T) {
 		{
 			name: "-50, -8, 0, 3, 8, 9, 100, +Inf",
 			h: &histogram.Histogram{
-				Count:     8,
+				Count:     7,
 				ZeroCount: 1,
 				Sum:       math.Inf(1),
 				Schema:    3,
--- a/storage/interface.go
+++ b/storage/interface.go
@ -37,16 +37,12 @@ var (
 	// ErrTooOldSample is when out of order support is enabled but the sample is outside the time window allowed.
 	ErrTooOldSample = errors.New("too old sample")
 	// ErrDuplicateSampleForTimestamp is when the sample has same timestamp but different value.
-	ErrDuplicateSampleForTimestamp   = errors.New("duplicate sample for timestamp")
-	ErrOutOfOrderExemplar            = errors.New("out of order exemplar")
-	ErrDuplicateExemplar             = errors.New("duplicate exemplar")
-	ErrExemplarLabelLength           = fmt.Errorf("label length for exemplar exceeds maximum of %d UTF-8 characters", exemplar.ExemplarMaxLabelSetLength)
-	ErrExemplarsDisabled             = fmt.Errorf("exemplar storage is disabled or max exemplars is less than or equal to 0")
-	ErrNativeHistogramsDisabled      = fmt.Errorf("native histograms are disabled")
-	ErrHistogramCountNotBigEnough    = errors.New("histogram's observation count should be at least the number of observations found in the buckets")
-	ErrHistogramNegativeBucketCount  = errors.New("histogram has a bucket whose observation count is negative")
-	ErrHistogramSpanNegativeOffset   = errors.New("histogram has a span whose offset is negative")
-	ErrHistogramSpansBucketsMismatch = errors.New("histogram spans specify different number of buckets than provided")
+	ErrDuplicateSampleForTimestamp = errors.New("duplicate sample for timestamp")
+	ErrOutOfOrderExemplar          = errors.New("out of order exemplar")
+	ErrDuplicateExemplar           = errors.New("duplicate exemplar")
+	ErrExemplarLabelLength         = fmt.Errorf("label length for exemplar exceeds maximum of %d UTF-8 characters", exemplar.ExemplarMaxLabelSetLength)
+	ErrExemplarsDisabled           = fmt.Errorf("exemplar storage is disabled or max exemplars is less than or equal to 0")
+	ErrNativeHistogramsDisabled    = fmt.Errorf("native histograms are disabled")
 )

 // SeriesRef is a generic series reference. In prometheus it is either a
--- a/tsdb/agent/db.go
+++ b/tsdb/agent/db.go
@ -883,13 +883,13 @@ func (a *appender) AppendExemplar(ref storage.SeriesRef, _ labels.Labels, e exem

 func (a *appender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) {
 	if h != nil {
-		if err := tsdb.ValidateHistogram(h); err != nil {
+		if err := h.Validate(); err != nil {
 			return 0, err
 		}
 	}

 	if fh != nil {
-		if err := tsdb.ValidateFloatHistogram(fh); err != nil {
+		if err := fh.Validate(); err != nil {
 			return 0, err
 		}
 	}
--- a/tsdb/db_test.go
+++ b/tsdb/db_test.go
@ -508,7 +508,7 @@ func TestAmendHistogramDatapointCausesError(t *testing.T) {

 	h := histogram.Histogram{
 		Schema:        3,
-		Count:         61,
+		Count:         52,
 		Sum:           2.7,
 		ZeroThreshold: 0.1,
 		ZeroCount:     42,
@ -6314,6 +6314,7 @@ func testHistogramAppendAndQueryHelper(t *testing.T, floatHistogram bool) {
 		t.Run("buckets disappearing", func(t *testing.T) {
 			h.PositiveSpans[1].Length--
 			h.PositiveBuckets = h.PositiveBuckets[:len(h.PositiveBuckets)-1]
+			h.Count -= 3
 			appendHistogram(series1, 110, h, &exp1, histogram.CounterReset)
 			testQuery("foo", "bar1", map[string][]chunks.Sample{series1.String(): exp1})
 		})
@ -6533,7 +6534,7 @@ func TestNativeHistogramFlag(t *testing.T) {
 		require.NoError(t, db.Close())
 	})
 	h := &histogram.Histogram{
-		Count:         10,
+		Count:         9,
 		ZeroCount:     4,
 		ZeroThreshold: 0.001,
 		Sum:           35.5,
--- a/tsdb/head_append.go
+++ b/tsdb/head_append.go
@ -521,13 +521,13 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels
 	}

 	if h != nil {
-		if err := ValidateHistogram(h); err != nil {
+		if err := h.Validate(); err != nil {
 			return 0, err
 		}
 	}

 	if fh != nil {
-		if err := ValidateFloatHistogram(fh); err != nil {
+		if err := fh.Validate(); err != nil {
 			return 0, err
 		}
 	}
@ -642,103 +642,6 @@ func (a *headAppender) UpdateMetadata(ref storage.SeriesRef, lset labels.Labels,
 	return ref, nil
 }

-func ValidateHistogram(h *histogram.Histogram) error {
-	if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
-		return errors.Wrap(err, "negative side")
-	}
-	if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
-		return errors.Wrap(err, "positive side")
-	}
-	var nCount, pCount uint64
-	err := checkHistogramBuckets(h.NegativeBuckets, &nCount, true)
-	if err != nil {
-		return errors.Wrap(err, "negative side")
-	}
-	err = checkHistogramBuckets(h.PositiveBuckets, &pCount, true)
-	if err != nil {
-		return errors.Wrap(err, "positive side")
-	}
-
-	if c := nCount + pCount + h.ZeroCount; c > h.Count {
-		return errors.Wrap(
-			storage.ErrHistogramCountNotBigEnough,
-			fmt.Sprintf("%d observations found in buckets, but the Count field is %d", c, h.Count),
-		)
-	}
-
-	return nil
-}
-
-func ValidateFloatHistogram(h *histogram.FloatHistogram) error {
-	if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
-		return errors.Wrap(err, "negative side")
-	}
-	if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
-		return errors.Wrap(err, "positive side")
-	}
-	var nCount, pCount float64
-	err := checkHistogramBuckets(h.NegativeBuckets, &nCount, false)
-	if err != nil {
-		return errors.Wrap(err, "negative side")
-	}
-	err = checkHistogramBuckets(h.PositiveBuckets, &pCount, false)
-	if err != nil {
-		return errors.Wrap(err, "positive side")
-	}
-
-	// We do not check for h.Count being at least as large as the sum of the
-	// counts in the buckets because floating point precision issues can
-	// create false positives here.
-
-	return nil
-}
-
-func checkHistogramSpans(spans []histogram.Span, numBuckets int) error {
-	var spanBuckets int
-	for n, span := range spans {
-		if n > 0 && span.Offset < 0 {
-			return errors.Wrap(
-				storage.ErrHistogramSpanNegativeOffset,
-				fmt.Sprintf("span number %d with offset %d", n+1, span.Offset),
-			)
-		}
-		spanBuckets += int(span.Length)
-	}
-	if spanBuckets != numBuckets {
-		return errors.Wrap(
-			storage.ErrHistogramSpansBucketsMismatch,
-			fmt.Sprintf("spans need %d buckets, have %d buckets", spanBuckets, numBuckets),
-		)
-	}
-	return nil
-}
-
-func checkHistogramBuckets[BC histogram.BucketCount, IBC histogram.InternalBucketCount](buckets []IBC, count *BC, deltas bool) error {
-	if len(buckets) == 0 {
-		return nil
-	}
-
-	var last IBC
-	for i := 0; i < len(buckets); i++ {
-		var c IBC
-		if deltas {
-			c = last + buckets[i]
-		} else {
-			c = buckets[i]
-		}
-		if c < 0 {
-			return errors.Wrap(
-				storage.ErrHistogramNegativeBucketCount,
-				fmt.Sprintf("bucket number %d has observation count of %v", i+1, c),
-			)
-		}
-		last = c
-		*count += BC(c)
-	}
-
-	return nil
-}
-
 var _ storage.GetRef = &headAppender{}

 func (a *headAppender) GetRef(lset labels.Labels, hash uint64) (storage.SeriesRef, labels.Labels) {
--- a/tsdb/head_test.go
+++ b/tsdb/head_test.go
@ -3419,7 +3419,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) {
 			hists = tsdbutil.GenerateTestHistograms(numHistograms)
 		}
 		for _, h := range hists {
-			h.Count *= 2
 			h.NegativeSpans = h.PositiveSpans
 			h.NegativeBuckets = h.PositiveBuckets
 			_, err := app.AppendHistogram(0, s1, ts, h, nil)
@ -3442,7 +3441,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) {
 			hists = tsdbutil.GenerateTestFloatHistograms(numHistograms)
 		}
 		for _, h := range hists {
-			h.Count *= 2
 			h.NegativeSpans = h.PositiveSpans
 			h.NegativeBuckets = h.PositiveBuckets
 			_, err := app.AppendHistogram(0, s1, ts, nil, h)
@ -3484,7 +3482,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) {
 		}
 		for _, h := range hists {
 			ts++
-			h.Count *= 2
 			h.NegativeSpans = h.PositiveSpans
 			h.NegativeBuckets = h.PositiveBuckets
 			_, err := app.AppendHistogram(0, s2, ts, h, nil)
@ -3521,7 +3518,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) {
 		}
 		for _, h := range hists {
 			ts++
-			h.Count *= 2
 			h.NegativeSpans = h.PositiveSpans
 			h.NegativeBuckets = h.PositiveBuckets
 			_, err := app.AppendHistogram(0, s2, ts, nil, h)
@ -4898,170 +4894,6 @@ func TestReplayAfterMmapReplayError(t *testing.T) {
 	require.NoError(t, h.Close())
 }

-func TestHistogramValidation(t *testing.T) {
-	tests := map[string]struct {
-		h         *histogram.Histogram
-		errMsg    string
-		skipFloat bool
-	}{
-		"valid histogram": {
-			h: tsdbutil.GenerateTestHistograms(1)[0],
-		},
-		"valid histogram that has its Count (4) higher than the actual total of buckets (2 + 1)": {
-			// This case is possible if NaN values (which do not fall into any bucket) are observed.
-			h: &histogram.Histogram{
-				ZeroCount:       2,
-				Count:           4,
-				Sum:             math.NaN(),
-				PositiveSpans:   []histogram.Span{{Offset: 0, Length: 1}},
-				PositiveBuckets: []int64{1},
-			},
-		},
-		"rejects histogram that has too few negative buckets": {
-			h: &histogram.Histogram{
-				NegativeSpans:   []histogram.Span{{Offset: 0, Length: 1}},
-				NegativeBuckets: []int64{},
-			},
-			errMsg: `negative side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`,
-		},
-		"rejects histogram that has too few positive buckets": {
-			h: &histogram.Histogram{
-				PositiveSpans:   []histogram.Span{{Offset: 0, Length: 1}},
-				PositiveBuckets: []int64{},
-			},
-			errMsg: `positive side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`,
-		},
-		"rejects histogram that has too many negative buckets": {
-			h: &histogram.Histogram{
-				NegativeSpans:   []histogram.Span{{Offset: 0, Length: 1}},
-				NegativeBuckets: []int64{1, 2},
-			},
-			errMsg: `negative side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`,
-		},
-		"rejects histogram that has too many positive buckets": {
-			h: &histogram.Histogram{
-				PositiveSpans:   []histogram.Span{{Offset: 0, Length: 1}},
-				PositiveBuckets: []int64{1, 2},
-			},
-			errMsg: `positive side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`,
-		},
-		"rejects a histogram that has a negative span with a negative offset": {
-			h: &histogram.Histogram{
-				NegativeSpans:   []histogram.Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
-				NegativeBuckets: []int64{1, 2},
-			},
-			errMsg: `negative side: span number 2 with offset -1: histogram has a span whose offset is negative`,
-		},
-		"rejects a histogram which has a positive span with a negative offset": {
-			h: &histogram.Histogram{
-				PositiveSpans:   []histogram.Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
-				PositiveBuckets: []int64{1, 2},
-			},
-			errMsg: `positive side: span number 2 with offset -1: histogram has a span whose offset is negative`,
-		},
-		"rejects a histogram that has a negative bucket with a negative count": {
-			h: &histogram.Histogram{
-				NegativeSpans:   []histogram.Span{{Offset: -1, Length: 1}},
-				NegativeBuckets: []int64{-1},
-			},
-			errMsg: `negative side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`,
-		},
-		"rejects a histogram that has a positive bucket with a negative count": {
-			h: &histogram.Histogram{
-				PositiveSpans:   []histogram.Span{{Offset: -1, Length: 1}},
-				PositiveBuckets: []int64{-1},
-			},
-			errMsg: `positive side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`,
-		},
-		"rejects a histogram that has a lower count than count in buckets": {
-			h: &histogram.Histogram{
-				Count:           0,
-				NegativeSpans:   []histogram.Span{{Offset: -1, Length: 1}},
-				PositiveSpans:   []histogram.Span{{Offset: -1, Length: 1}},
-				NegativeBuckets: []int64{1},
-				PositiveBuckets: []int64{1},
-			},
-			errMsg:    `2 observations found in buckets, but the Count field is 0: histogram's observation count should be at least the number of observations found in the buckets`,
-			skipFloat: true,
-		},
-		"rejects a histogram that doesn't count the zero bucket in its count": {
-			h: &histogram.Histogram{
-				Count:           2,
-				ZeroCount:       1,
-				NegativeSpans:   []histogram.Span{{Offset: -1, Length: 1}},
-				PositiveSpans:   []histogram.Span{{Offset: -1, Length: 1}},
-				NegativeBuckets: []int64{1},
-				PositiveBuckets: []int64{1},
-			},
-			errMsg:    `3 observations found in buckets, but the Count field is 2: histogram's observation count should be at least the number of observations found in the buckets`,
-			skipFloat: true,
-		},
-	}
-
-	for testName, tc := range tests {
-		t.Run(testName, func(t *testing.T) {
-			if err := ValidateHistogram(tc.h); tc.errMsg != "" {
-				require.EqualError(t, err, tc.errMsg)
-			} else {
-				require.NoError(t, err)
-			}
-			if tc.skipFloat {
-				return
-			}
-			if err := ValidateFloatHistogram(tc.h.ToFloat()); tc.errMsg != "" {
-				require.EqualError(t, err, tc.errMsg)
-			} else {
-				require.NoError(t, err)
-			}
-		})
-	}
-}
-
-func BenchmarkHistogramValidation(b *testing.B) {
-	histograms := generateBigTestHistograms(b.N, 500)
-	b.ResetTimer()
-	for _, h := range histograms {
-		require.NoError(b, ValidateHistogram(h))
-	}
-}
-
-func generateBigTestHistograms(numHistograms, numBuckets int) []*histogram.Histogram {
-	numSpans := numBuckets / 10
-	bucketsPerSide := numBuckets / 2
-	spanLength := uint32(bucketsPerSide / numSpans)
-	// Given all bucket deltas are 1, sum numHistograms + 1.
-	observationCount := numBuckets / 2 * (1 + numBuckets)
-
-	var histograms []*histogram.Histogram
-	for i := 0; i < numHistograms; i++ {
-		h := &histogram.Histogram{
-			Count:           uint64(i + observationCount),
-			ZeroCount:       uint64(i),
-			ZeroThreshold:   1e-128,
-			Sum:             18.4 * float64(i+1),
-			Schema:          2,
-			NegativeSpans:   make([]histogram.Span, numSpans),
-			PositiveSpans:   make([]histogram.Span, numSpans),
-			NegativeBuckets: make([]int64, bucketsPerSide),
-			PositiveBuckets: make([]int64, bucketsPerSide),
-		}
-
-		for j := 0; j < numSpans; j++ {
-			s := histogram.Span{Offset: 1, Length: spanLength}
-			h.NegativeSpans[j] = s
-			h.PositiveSpans[j] = s
-		}
-
-		for j := 0; j < bucketsPerSide; j++ {
-			h.NegativeBuckets[j] = 1
-			h.PositiveBuckets[j] = 1
-		}
-
-		histograms = append(histograms, h)
-	}
-	return histograms
-}
-
 func TestOOOAppendWithNoSeries(t *testing.T) {
 	dir := t.TempDir()
 	wal, err := wlog.NewSize(nil, nil, filepath.Join(dir, "wal"), 32768, wlog.CompressionSnappy)
@ -5402,7 +5234,7 @@ func BenchmarkCuttingHeadHistogramChunks(b *testing.B) {
 		numSamples = 50000
 		numBuckets = 100
 	)
-	samples := generateBigTestHistograms(numSamples, numBuckets)
+	samples := histogram.GenerateBigTestHistograms(numSamples, numBuckets)

 	h, _ := newTestHead(b, DefaultBlockDuration, wlog.CompressionNone, false)
 	defer func() {
@ -5466,7 +5298,7 @@ func TestCuttingNewHeadChunks(t *testing.T) {
 		"small histograms": {
 			numTotalSamples: 240,
 			histValFunc: func() func(i int) *histogram.Histogram {
-				hists := generateBigTestHistograms(240, 10)
+				hists := histogram.GenerateBigTestHistograms(240, 10)
 				return func(i int) *histogram.Histogram {
 					return hists[i]
 				}
@ -5482,7 +5314,7 @@ func TestCuttingNewHeadChunks(t *testing.T) {
 		"large histograms": {
 			numTotalSamples: 240,
 			histValFunc: func() func(i int) *histogram.Histogram {
-				hists := generateBigTestHistograms(240, 100)
+				hists := histogram.GenerateBigTestHistograms(240, 100)
 				return func(i int) *histogram.Histogram {
 					return hists[i]
 				}
@ -5491,14 +5323,13 @@ func TestCuttingNewHeadChunks(t *testing.T) {
 				numSamples int
 				numBytes   int
 			}{
-				{30, 696},
-				{30, 700},
-				{30, 708},
-				{30, 693},
+				{40, 896},
+				{40, 899},
+				{40, 896},
+				{30, 690},
 				{30, 691},
-				{30, 692},
-				{30, 695},
 				{30, 694},
+				{30, 693},
 			},
 		},
 		"really large histograms": {
@ -5506,7 +5337,7 @@ func TestCuttingNewHeadChunks(t *testing.T) {
 			// per chunk.
 			numTotalSamples: 11,
 			histValFunc: func() func(i int) *histogram.Histogram {
-				hists := generateBigTestHistograms(11, 100000)
+				hists := histogram.GenerateBigTestHistograms(11, 100000)
 				return func(i int) *histogram.Histogram {
 					return hists[i]
 				}