histograms: Move to new exposition protobuf format

This is an incompatible protobuf change. Instrumented targets must include https://github.com/prometheus/client_golang/pull/1092 to make this work. Signed-off-by: beorn7 <beorn@grafana.com>
2025-03-05 20:59:13 -08:00 · 2022-07-19 18:11:33 +02:00 · 2022-07-19 18:11:33 +02:00 · c40b105efd
parent 5937b4f5d4
commit c40b105efd
4 changed files with 728 additions and 719 deletions
--- a/model/textparse/protobufparse.go
+++ b/model/textparse/protobufparse.go
@ -38,7 +38,7 @@ import (
 // protobuf format and then present it as it if were parsed by a
 // Prometheus-2-style text parser. This is only done so that we can easily plug
 // in the protobuf format into Prometheus 2. For future use (with the final
-// format that will be used for sparse histograms), we have to revisit the
+// format that will be used for native histograms), we have to revisit the
 // parsing. A lot of the efficiency tricks of the Prometheus-2-style parsing
 // could be used in a similar fashion (byte-slice pointers into the raw
 // payload), which requires some hand-coded protobuf handling. But the current
@ -132,8 +132,8 @@ func (p *ProtobufParser) Series() ([]byte, *int64, float64) {
 	return p.metricBytes.Bytes(), nil, v
 }

-// Histogram returns the bytes of a series with a sparse histogram as a
-// value, the timestamp if set, and the sparse histogram in the current
+// Histogram returns the bytes of a series with a native histogram as a
+// value, the timestamp if set, and the native histogram in the current
 // sample.
 func (p *ProtobufParser) Histogram() ([]byte, *int64, *histogram.Histogram) {
 	var (
@ -144,19 +144,19 @@ func (p *ProtobufParser) Histogram() ([]byte, *int64, *histogram.Histogram) {
 	sh := histogram.Histogram{
 		Count:           h.GetSampleCount(),
 		Sum:             h.GetSampleSum(),
-		ZeroThreshold:   h.GetSbZeroThreshold(),
-		ZeroCount:       h.GetSbZeroCount(),
-		Schema:          h.GetSbSchema(),
-		PositiveSpans:   make([]histogram.Span, len(h.GetSbPositive().GetSpan())),
-		PositiveBuckets: h.GetSbPositive().GetDelta(),
-		NegativeSpans:   make([]histogram.Span, len(h.GetSbNegative().GetSpan())),
-		NegativeBuckets: h.GetSbNegative().GetDelta(),
+		ZeroThreshold:   h.GetZeroThreshold(),
+		ZeroCount:       h.GetZeroCount(),
+		Schema:          h.GetSchema(),
+		PositiveSpans:   make([]histogram.Span, len(h.GetPositiveSpan())),
+		PositiveBuckets: h.GetPositiveDelta(),
+		NegativeSpans:   make([]histogram.Span, len(h.GetNegativeSpan())),
+		NegativeBuckets: h.GetNegativeDelta(),
 	}
-	for i, span := range h.GetSbPositive().GetSpan() {
+	for i, span := range h.GetPositiveSpan() {
 		sh.PositiveSpans[i].Offset = span.GetOffset()
 		sh.PositiveSpans[i].Length = span.GetLength()
 	}
-	for i, span := range h.GetSbNegative().GetSpan() {
+	for i, span := range h.GetNegativeSpan() {
 		sh.NegativeSpans[i].Offset = span.GetOffset()
 		sh.NegativeSpans[i].Length = span.GetLength()
 	}
@ -231,7 +231,7 @@ func (p *ProtobufParser) Metric(l *labels.Labels) string {
 }

 // Exemplar writes the exemplar of the current sample into the passed
-// exemplar. It returns if an exemplar exists or not. In case of a sparse
+// exemplar. It returns if an exemplar exists or not. In case of a native
 // histogram, the legacy bucket section is still used for exemplars. To ingest
 // all examplars, call the Exemplar method repeatedly until it returns false.
 func (p *ProtobufParser) Exemplar(ex *exemplar.Exemplar) bool {
@ -246,7 +246,7 @@ func (p *ProtobufParser) Exemplar(ex *exemplar.Exemplar) bool {
 			if p.state == EntrySeries {
 				return false // At _count or _sum.
 			}
-			p.fieldPos = 0 // Start at 1st bucket for sparse histograms.
+			p.fieldPos = 0 // Start at 1st bucket for native histograms.
 		}
 		for p.fieldPos < len(bb) {
 			exProto = bb[p.fieldPos].GetExemplar()
@ -314,7 +314,7 @@ func (p *ProtobufParser) Next() (Entry, error) {
 		p.state = EntryType
 	case EntryType:
 		if p.mf.GetType() == dto.MetricType_HISTOGRAM &&
-			isSparseHistogram(p.mf.GetMetric()[0].GetHistogram()) {
+			isNativeHistogram(p.mf.GetMetric()[0].GetHistogram()) {
 			p.state = EntryHistogram
 		} else {
 			p.state = EntrySeries
@ -465,18 +465,18 @@ func formatOpenMetricsFloat(f float64) string {
 	return s + ".0"
 }

-// isSparseHistogram returns false iff the provided histograms has no
-// SparseBuckets and a zero threshold of 0 and a zero count of 0. In principle,
-// this could still be meant to be a sparse histgram (with a zero threshold of 0
-// and no observations yet), but for now, we'll treat this case as a conventional
+// isNativeHistogram returns false iff the provided histograms has no sparse
+// buckets and a zero threshold of 0 and a zero count of 0. In principle, this
+// could still be meant to be a native histogram (with a zero threshold of 0 and
+// no observations yet), but for now, we'll treat this case as a conventional
 // histogram.
 //
 // TODO(beorn7): In the final format, there should be an unambiguous way of
-// deciding if a histogram should be ingested as a conventional one or a sparse
+// deciding if a histogram should be ingested as a conventional one or a native
 // one.
-func isSparseHistogram(h *dto.Histogram) bool {
-	return len(h.GetSbNegative().GetDelta()) > 0 ||
-		len(h.GetSbPositive().GetDelta()) > 0 ||
-		h.GetSbZeroCount() > 0 ||
-		h.GetSbZeroThreshold() > 0
+func isNativeHistogram(h *dto.Histogram) bool {
+	return len(h.GetNegativeDelta()) > 0 ||
+		len(h.GetPositiveDelta()) > 0 ||
+		h.GetZeroCount() > 0 ||
+		h.GetZeroThreshold() > 0
 }
--- a/model/textparse/protobufparse_test.go
+++ b/model/textparse/protobufparse_test.go
@ -122,38 +122,34 @@ metric: <
        value: -0.00029
      >
    >
-    sb_schema: 3
-    sb_zero_threshold: 2.938735877055719e-39
-    sb_zero_count: 2
-    sb_negative: <
-      span: <
-        offset: -162
-        length: 1
-      >
-      span: <
-        offset: 23
-        length: 4
-      >
-      delta: 1
-      delta: 3
-      delta: -2
-      delta: -1
-      delta: 1
+    schema: 3
+    zero_threshold: 2.938735877055719e-39
+    zero_count: 2
+    negative_span: <
+      offset: -162
+      length: 1
    >
-    sb_positive: <
-      span: <
-        offset: -161
-        length: 1
-      >
-      span: <
-        offset: 8
-        length: 3
-      >
-      delta: 1
-      delta: 2
-      delta: -1
-      delta: -1
+    negative_span: <
+      offset: 23
+      length: 4
    >
+    negative_delta: 1
+    negative_delta: 3
+    negative_delta: -2
+    negative_delta: -1
+    negative_delta: 1
+    positive_span: <
+      offset: -161
+      length: 1
+    >
+    positive_span: <
+      offset: 8
+      length: 3
+    >
+    positive_delta: 1
+    positive_delta: 2
+    positive_delta: -1
+    positive_delta: -1
  >
  timestamp_ms: 1234568
 >
@ -196,8 +192,8 @@ metric: <
        value: -0.000295
      >
    >
-    sb_schema: 0
-    sb_zero_threshold: 0
+    schema: 0
+    zero_threshold: 0
  >
 >

--- a/prompb/io/prometheus/client/metrics.pb.go
+++ b/prompb/io/prometheus/client/metrics.pb.go
--- a/prompb/io/prometheus/client/metrics.proto
+++ b/prompb/io/prometheus/client/metrics.proto
@ -68,22 +68,39 @@ message Untyped {
 }

 message Histogram {
-  uint64 sample_count = 1;
-  double sample_count_float = 9; // Overrides sample_count if > 0.
-  double sample_sum   = 2;
-  repeated Bucket bucket       = 3; // Ordered in increasing order of upper_bound, +Inf bucket is optional.
-  // Sparse bucket (sb) stuff:
-  // The sb_schema defines the bucket schema. Currently, valid numbers are -4 <= n <= 8.
+  uint64 sample_count       = 1;
+  double sample_count_float = 4; // Overrides sample_count if > 0.
+  double sample_sum         = 2;
+  // Buckets for the conventional histogram.
+  repeated Bucket bucket    = 3; // Ordered in increasing order of upper_bound, +Inf bucket is optional.
+
+  // Everything below here is for native histograms (also known as sparse histograms).
+
+  // schema defines the bucket schema. Currently, valid numbers are -4 <= n <= 8.
  // They are all for base-2 bucket schemas, where 1 is a bucket boundary in each case, and
  // then each power of two is divided into 2^n logarithmic buckets.
  // Or in other words, each bucket boundary is the previous boundary times 2^(2^-n).
  // In the future, more bucket schemas may be added using numbers < -4 or > 8.
-  sint32 sb_schema           = 4;
-  double sb_zero_threshold   = 5;  // Breadth of the zero bucket.
-  uint64 sb_zero_count       = 6;  // Count in zero bucket.
-  double sb_zero_count_float = 10; // Overrides sb_zero_count if > 0.
-  SparseBuckets sb_negative  = 7;  // Negative sparse buckets.
-  SparseBuckets sb_positive  = 8;  // Positive sparse buckets.
+  sint32 schema           = 5;
+  double zero_threshold   = 6; // Breadth of the zero bucket.
+  uint64 zero_count       = 7; // Count in zero bucket.
+  double zero_count_float = 8; // Overrides sb_zero_count if > 0.
+
+  // Negative buckets for the native histogram.
+  repeated BucketSpan negative_span =  9;
+  // Use either "negative_delta" or "negative_count", the former for
+  // regular histograms with integer counts, the latter for float
+  // histograms.
+  repeated sint64 negative_delta    = 10; // Count delta of each bucket compared to previous one (or to zero for 1st bucket).
+  repeated double negative_count    = 11; // Absolute count of each bucket.
+
+  // Positive buckets for the native histogram.
+  repeated BucketSpan positive_span = 12;
+  // Use either "positive_delta" or "positive_count", the former for
+  // regular histograms with integer counts, the latter for float
+  // histograms.
+  repeated sint64 positive_delta    = 13; // Count delta of each bucket compared to previous one (or to zero for 1st bucket).
+  repeated double positive_count    = 14; // Absolute count of each bucket.
 }

 message Bucket {
@ -93,22 +110,15 @@ message Bucket {
  Exemplar exemplar               = 3;
 }

-message SparseBuckets {
-  // A Span is a given number of consecutive buckets at a given
-  // offset. Logically, it would be more straightforward to include
-  // the bucket counts in the Span. However, the protobuf
-  // representation is more compact in the way the data is structured
-  // here (with all the buckets in a single array separate from the
-  // Spans).
-  message Span {
-    sint32 offset = 1; // Gap to previous span, or starting point for 1st span (which can be negative).
-    uint32 length = 2; // Length of consecutive buckets.
-  }
-  repeated Span span = 1;
-  // Only one of "delta" or "count" may be used, the former for regular
-  // histograms with integer counts, the latter for float histograms.
-  repeated sint64 delta = 2; // Count delta of each bucket compared to previous one (or to zero for 1st bucket).
-  repeated double count = 3; // Absolute count of each bucket.
+// A BucketSpan defines a number of consecutive buckets in a native
+// histogram with their offset. Logically, it would be more
+// straightforward to include the bucket counts in the Span. However,
+// the protobuf representation is more compact in the way the data is
+// structured here (with all the buckets in a single array separate
+// from the Spans).
+message BucketSpan {
+  sint32 offset = 1; // Gap to previous span, or starting point for 1st span (which can be negative).
+  uint32 length = 2; // Length of consecutive buckets.
 }

 message Exemplar {