From b2cff372f2eb88ced17a7a754471650e5ce9fa47 Mon Sep 17 00:00:00 2001
From: Chris Marchbanks <csmarchbanks@gmail.com>
Date: Tue, 2 Jan 2024 14:09:04 -0700
Subject: [PATCH] Parse a verbose text based native histogram representation

---
 model/textparse/openmetricsparse.go      | 140 +++++++++++++++++++++++
 model/textparse/openmetricsparse_test.go |  32 +++++-
 2 files changed, 171 insertions(+), 1 deletion(-)

diff --git a/model/textparse/openmetricsparse.go b/model/textparse/openmetricsparse.go
index ddfbe4fc5c..167f9efe7c 100644
--- a/model/textparse/openmetricsparse.go
+++ b/model/textparse/openmetricsparse.go
@@ -21,6 +21,7 @@ import (
 	"fmt"
 	"io"
 	"math"
+	"strconv"
 	"strings"
 	"unicode/utf8"
 
@@ -83,6 +84,12 @@ type OpenMetricsParser struct {
 	start   int
 	offsets []int
 
+	h           *histogram.Histogram
+	hLabels     labels.Labels
+	removeH     bool
+	cachedEntry Entry
+	cachedErr   error
+
 	eOffsets      []int
 	exemplar      []byte
 	exemplarVal   float64
@@ -176,6 +183,57 @@ func (p *OpenMetricsParser) Metric(l *labels.Labels) string {
 	return s
 }
 
+func (p *OpenMetricsParser) withoutHistLabels() labels.Labels {
+	p.builder.Reset()
+
+	s := string(p.series)
+	for i := 1; i < len(p.offsets); i += 4 {
+		a := p.offsets[i] - p.start
+		b := p.offsets[i+1] - p.start
+		c := p.offsets[i+2] - p.start
+		d := p.offsets[i+3] - p.start
+
+		switch s[a:b] {
+		case "le", "offset", "i":
+			continue
+		default:
+		}
+
+		value := s[c:d]
+		// Replacer causes allocations. Replace only when necessary.
+		if strings.IndexByte(s[c:d], byte('\\')) >= 0 {
+			value = lvalReplacer.Replace(value)
+		}
+		p.builder.Add(s[a:b], value)
+	}
+
+	p.builder.Sort()
+	return p.builder.Labels()
+}
+
+func (p *OpenMetricsParser) offset() (offset int, index int, err error) {
+	for i := 1; i < len(p.offsets); i += 4 {
+		a := p.offsets[i] - p.start
+		b := p.offsets[i+1] - p.start
+		c := p.offsets[i+2] - p.start
+		d := p.offsets[i+3] - p.start
+
+		switch string(p.series[a:b]) {
+		case "offset":
+			offset, err = strconv.Atoi(string(p.series[c:d]))
+			if err != nil {
+				return
+			}
+		case "i":
+			index, err = strconv.Atoi(string(p.series[c:d]))
+			if err != nil {
+				return
+			}
+		}
+	}
+	return
+}
+
 // Exemplar writes the exemplar of the current sample into the passed exemplar.
 // It returns whether an exemplar exists. As OpenMetrics only ever has one
 // exemplar per sample, every call after the first (for the same sample) will
@@ -236,6 +294,88 @@ func (p *OpenMetricsParser) parseError(exp string, got token) error {
 // Next advances the parser to the next sample. It returns false if no
 // more samples were read or an error occurred.
 func (p *OpenMetricsParser) Next() (Entry, error) {
+	if p.removeH {
+		p.h = nil
+		return p.cachedEntry, p.cachedErr
+	}
+	entry, err := p.next()
+	if err != nil {
+		return entry, err
+	}
+	if (p.h != nil && p.h.ZeroThreshold != 0.0) &&
+		(entry != EntrySeries ||
+			!labels.Equal(p.hLabels, p.withoutHistLabels())) {
+		p.cachedEntry = entry
+		p.cachedErr = err
+		p.removeH = true
+		return EntryHistogram, nil
+	}
+
+	if entry != EntrySeries ||
+		!(p.mtype == model.MetricTypeHistogram || p.mtype == model.MetricTypeGaugeHistogram) {
+		return entry, err
+	}
+	if p.h == nil {
+		p.h = &histogram.Histogram{}
+		p.hLabels = p.withoutHistLabels()
+	}
+	//hist := histogram.Histogram{}
+	name := string(p.series[:p.offsets[0]-p.start])
+	switch {
+	case strings.HasSuffix(name, "bucket"):
+		return EntrySeries, nil
+	case strings.HasSuffix(name, "count"):
+		p.h.Count = uint64(p.val)
+		return EntrySeries, nil
+	case strings.HasSuffix(name, "sum"):
+		p.h.Sum = p.val
+		return EntrySeries, nil
+	case strings.HasSuffix(name, "created"):
+		return EntrySeries, nil
+	case strings.HasSuffix(name, "zero_threshold"):
+		p.h.ZeroThreshold = p.val
+	case strings.HasSuffix(name, "zero_count"):
+		p.h.ZeroCount = uint64(p.val)
+	case strings.HasSuffix(name, "positive_span"):
+		offset, _, err := p.offset()
+		if err != nil {
+			return EntryInvalid, fmt.Errorf("could not parse offset")
+		}
+
+		if len(p.h.PositiveSpans) == 0 ||
+			p.h.PositiveSpans[len(p.h.PositiveSpans)-1].Offset != int32(offset) {
+			p.h.PositiveSpans = append(p.h.PositiveSpans, histogram.Span{
+				Offset: int32(offset),
+				Length: 1,
+			})
+		} else {
+			p.h.PositiveSpans[len(p.h.PositiveSpans)-1].Length += 1
+		}
+		p.h.PositiveBuckets = append(p.h.PositiveBuckets, int64(p.val))
+	case strings.HasSuffix(name, "negative_span"):
+		offset, _, err := p.offset()
+		if err != nil {
+			return EntryInvalid, fmt.Errorf("could not parse offset")
+		}
+
+		if len(p.h.NegativeSpans) == 0 ||
+			p.h.NegativeSpans[len(p.h.PositiveSpans)-1].Offset != int32(offset) {
+			p.h.NegativeSpans = append(p.h.NegativeSpans, histogram.Span{
+				Offset: int32(offset),
+				Length: 1,
+			})
+		} else {
+			p.h.NegativeSpans[len(p.h.PositiveSpans)-1].Length += 1
+		}
+		p.h.NegativeBuckets = append(p.h.NegativeBuckets, int64(p.val))
+	default:
+		return EntryInvalid, fmt.Errorf("unexpected histogram suffix encountered for: %s", name)
+	}
+	fmt.Printf("name: `%s`, metric_type: %v\n", name, p.mtype)
+	return p.Next()
+}
+
+func (p *OpenMetricsParser) next() (Entry, error) {
 	var err error
 
 	p.start = p.l.i
diff --git a/model/textparse/openmetricsparse_test.go b/model/textparse/openmetricsparse_test.go
index 2b1d909f38..b6a36527a7 100644
--- a/model/textparse/openmetricsparse_test.go
+++ b/model/textparse/openmetricsparse_test.go
@@ -22,6 +22,7 @@ import (
 	"github.com/stretchr/testify/require"
 
 	"github.com/prometheus/prometheus/model/exemplar"
+	"github.com/prometheus/prometheus/model/histogram"
 	"github.com/prometheus/prometheus/model/labels"
 )
 
@@ -65,7 +66,22 @@ _metric_starting_with_underscore 1
 testmetric{_label_starting_with_underscore="foo"} 1
 testmetric{label="\"bar\""} 1
 # TYPE foo counter
-foo_total 17.0 1520879607.789 # {id="counter-test"} 5`
+foo_total 17.0 1520879607.789 # {id="counter-test"} 5
+# TYPE nativehistogram histogram
+nativehistogram_count 24
+nativehistogram_sum 100
+nativehistogram_created 1520430000.123
+nativehistogram_schema 0
+nativehistogram_zerothreshold 0.001
+nativehistogram_zerocount 4
+nativehistogram_positive_span{offset="0",i="0"} 2
+nativehistogram_positive_span{offset="0",i="1"} 1
+nativehistogram_positive_span{offset="1",i="0"} -2
+nativehistogram_positive_span{offset="1",i="1"} 3
+nativehistogram_negative_span{offset="0",i="0"} 2
+nativehistogram_negative_span{offset="0",i="1"} 1
+nativehistogram_negative_span{offset="1",i="0"} -2
+nativehistogram_negative_span{offset="1",i="1"} 3`
 
 	input += "\n# HELP metric foo\x00bar"
 	input += "\nnull_byte_metric{a=\"abc\x00\"} 1"
@@ -79,6 +95,7 @@ foo_total 17.0 1520879607.789 # {id="counter-test"} 5`
 		t       *int64
 		v       float64
 		typ     model.MetricType
+		h       *histogram.Histogram
 		help    string
 		unit    string
 		comment string
@@ -236,6 +253,10 @@ foo_total 17.0 1520879607.789 # {id="counter-test"} 5`
 			lset: labels.FromStrings("__name__", "foo_total"),
 			t:    int64p(1520879607789),
 			e:    &exemplar.Exemplar{Labels: labels.FromStrings("id", "counter-test"), Value: 5},
+		}, {
+			m:    "nativehistogram",
+			typ:  model.MetricTypeHistogram,
+			lset: labels.FromStrings("__name__", "nativehistogram"),
 		}, {
 			m:    "metric",
 			help: "foo\x00bar",
@@ -276,6 +297,15 @@ foo_total 17.0 1520879607.789 # {id="counter-test"} 5`
 				require.Equal(t, *exp[i].e, e)
 			}
 
+		case EntryHistogram:
+			m, ts, h, _ := p.Histogram()
+
+			p.Metric(&res)
+			require.Equal(t, exp[i].m, string(m))
+			require.Equal(t, exp[i].t, ts)
+			require.Equal(t, exp[i].h, h)
+			require.Equal(t, exp[i].lset, res)
+
 		case EntryType:
 			m, typ := p.Type()
 			require.Equal(t, exp[i].m, string(m))