From 9b619e77ee3437862c75547caac64a9f409a56d6 Mon Sep 17 00:00:00 2001
From: bwplotka <bwplotka@gmail.com>
Date: Fri, 4 Oct 2024 14:04:27 +0100
Subject: [PATCH] Added more tests; some changes/optimizations when
 pair-programming with Manik.

Signed-off-by: bwplotka <bwplotka@gmail.com>
---
 model/textparse/openmetricsparse.go      |  78 ++++++++-----
 model/textparse/openmetricsparse_test.go | 135 ++++++++++++++++++-----
 2 files changed, 157 insertions(+), 56 deletions(-)

diff --git a/model/textparse/openmetricsparse.go b/model/textparse/openmetricsparse.go
index 0e82dc9f5c..bf2d8027f9 100644
--- a/model/textparse/openmetricsparse.go
+++ b/model/textparse/openmetricsparse.go
@@ -17,6 +17,7 @@
 package textparse
 
 import (
+	"bytes"
 	"errors"
 	"fmt"
 	"io"
@@ -72,15 +73,16 @@ func (l *openMetricsLexer) Error(es string) {
 // OpenMetrics text exposition format.
 // This is based on the working draft https://docs.google.com/document/u/1/d/1KwV0mAXwwbvvifBvDKH_LU1YjyXE_wxCkHNoCGq1GX0/edit
 type OpenMetricsParser struct {
-	l       *openMetricsLexer
-	builder labels.ScratchBuilder
-	series  []byte
-	text    []byte
-	mtype   model.MetricType
-	val     float64
-	ts      int64
-	hasTS   bool
-	start   int
+	l         *openMetricsLexer
+	builder   labels.ScratchBuilder
+	series    []byte
+	mfNameLen int // length of metric family name to get from series.
+	text      []byte
+	mtype     model.MetricType
+	val       float64
+	ts        int64
+	hasTS     bool
+	start     int
 	// offsets is a list of offsets into series that describe the positions
 	// of the metric name and label names and values for this series.
 	// p.offsets[0] is the start character of the metric name.
@@ -98,10 +100,10 @@ type OpenMetricsParser struct {
 	// Created timestamp parsing state.
 	ct        int64
 	ctHashSet uint64
-	// visitedName is the metric name of the last visited metric when peeking ahead
+	// visitedMFName is the metric family name of the last visited metric when peeking ahead
 	// for _created series during the execution of the CreatedTimestamp method.
-	visitedName  string
-	skipCTSeries bool
+	visitedMFName []byte
+	skipCTSeries  bool
 }
 
 type openMetricsParserOptions struct {
@@ -255,14 +257,37 @@ func (p *OpenMetricsParser) Exemplar(e *exemplar.Exemplar) bool {
 	return true
 }
 
+//func (p *OpenMetricsParser) metricFamilyID(mtype model.MetricType) string {
+//	// Copy the buffer to a string: this is only necessary for the return value.
+//	s := string(p.series) // TODO: double check
+//
+//	p.builder.Reset()
+//	metricName := unreplace(s[p.offsets[0]-p.start : p.offsets[1]-p.start])
+//	p.builder.Add(labels.MetricName, metricName)
+//
+//	for i := 2; i < len(p.offsets); i += 4 {
+//		a := p.offsets[i] - p.start
+//		b := p.offsets[i+1] - p.start
+//		label := unreplace(s[a:b])
+//		c := p.offsets[i+2] - p.start
+//		d := p.offsets[i+3] - p.start
+//		value := unreplace(s[c:d])
+//
+//		p.builder.Add(label, value)
+//	}
+//
+//	p.builder.Sort()
+//	*l = p.builder.Labels()
+//
+//	return s
+//}
+
 // CreatedTimestamp returns the created timestamp for a current Metric if exists or nil.
 // NOTE(Maniktherana): Might use additional CPU/mem resources due to deep copy of parser required for peeking given 1.0 OM specification on _created series.
 func (p *OpenMetricsParser) CreatedTimestamp() *int64 {
 	if !typeRequiresCT(p.mtype) {
 		// Not a CT supported metric type, fast path.
-		p.ct = 0
-		p.visitedName = ""
-		p.ctHashSet = 0
+		p.ctHashSet = 0 // Use ctHashSet as a single way of telling "empty cache"
 		return nil
 	}
 
@@ -271,14 +296,14 @@ func (p *OpenMetricsParser) CreatedTimestamp() *int64 {
 		buf                     []byte
 		peekWithoutNameLsetHash uint64
 	)
-	p.Metric(&currLset)
-	currFamilyLsetHash, buf := currLset.HashWithoutLabels(buf, labels.MetricName, "le", "quantile")
-	currName := currLset.Get(model.MetricNameLabel)
-	currName = findBaseMetricName(currName)
+	p.Metric(&currLset) // Avoid/optimize (probably similar techique Metric is using, parsing only bits we need e.g.
+	// take relevant bytes from {...} (excluding le ONLY for histograms, quantile for summaries).
 
-	// make sure we're on a new metric before returning
-	if currName == p.visitedName && currFamilyLsetHash == p.ctHashSet && p.visitedName != "" && p.ctHashSet > 0 && p.ct > 0 {
-		// CT is already known, fast path.
+	currFamilyLsetHash, buf := currLset.HashWithoutLabels(buf, labels.MetricName, "le", "quantile")
+	currName := p.series[0:p.mfNameLen]
+
+	// Check cache, perhaps we fetched something already.
+	if p.ctHashSet > 0 && bytes.Equal(currName, p.visitedMFName) && currFamilyLsetHash == p.ctHashSet && p.ct > 0 {
 		return &p.ct
 	}
 
@@ -335,20 +360,18 @@ func (p *OpenMetricsParser) CreatedTimestamp() *int64 {
 
 // setCTParseValues sets the parser to the state after CreatedTimestamp method was called and CT was found.
 // This is useful to prevent re-parsing the same series again and early return the CT value.
-func (p *OpenMetricsParser) setCTParseValues(ct int64, ctHashSet uint64, visitedName string, skipCTSeries bool, resetLexer *openMetricsLexer) {
+func (p *OpenMetricsParser) setCTParseValues(ct int64, ctHashSet uint64, mfName []byte, skipCTSeries bool, resetLexer *openMetricsLexer) {
 	p.ct = ct
 	p.l = resetLexer
 	p.ctHashSet = ctHashSet
-	p.visitedName = visitedName
-	p.skipCTSeries = skipCTSeries
+	p.visitedMFName = mfName
+	p.skipCTSeries = skipCTSeries // Do we need to set it?
 }
 
 // resetCtParseValues resets the parser to the state before CreatedTimestamp method was called.
 func (p *OpenMetricsParser) resetCTParseValues(resetLexer *openMetricsLexer) {
 	p.l = resetLexer
-	p.ct = 0
 	p.ctHashSet = 0
-	p.visitedName = ""
 	p.skipCTSeries = true
 }
 
@@ -419,6 +442,7 @@ func (p *OpenMetricsParser) Next() (Entry, error) {
 				mStart++
 				mEnd--
 			}
+			p.mfNameLen = mEnd - mStart
 			p.offsets = append(p.offsets, mStart, mEnd)
 		default:
 			return EntryInvalid, p.parseError("expected metric name after "+t.String(), t2)
diff --git a/model/textparse/openmetricsparse_test.go b/model/textparse/openmetricsparse_test.go
index b6089bd960..da1174d2b4 100644
--- a/model/textparse/openmetricsparse_test.go
+++ b/model/textparse/openmetricsparse_test.go
@@ -15,6 +15,7 @@ package textparse
 
 import (
 	"errors"
+	"fmt"
 	"io"
 	"os"
 	"testing"
@@ -901,42 +902,118 @@ func TestOMNullByteHandling(t *testing.T) {
 // current OM spec limitations or clients with broken OM format.
 // TODO(maniktherana): Make sure OM 1.1/2.0 pass CT via metadata or exemplar-like to avoid this.
 func TestCTParseFailures(t *testing.T) {
-	input := `# HELP thing Histogram with _created as first line
+	for _, tcase := range []struct {
+		name     string
+		input    string
+		expected []parsedEntry
+	}{
+		{
+			name: "_created line is a first one",
+			input: `# HELP thing histogram with _created as first line
 # TYPE thing histogram
 thing_created 1520872607.123
 thing_count 17
 thing_sum 324789.3
 thing_bucket{le="0.0"} 0
-thing_bucket{le="+Inf"} 17`
-
-	input += "\n# EOF\n"
-
-	exp := []parsedEntry{
-		{
-			m:    "thing",
-			help: "Histogram with _created as first line",
-		}, {
-			m:   "thing",
-			typ: model.MetricTypeHistogram,
-		}, {
-			m:  `thing_count`,
-			ct: nil, // Should be int64p(1520872607123).
-		}, {
-			m:  `thing_sum`,
-			ct: nil, // Should be int64p(1520872607123).
-		}, {
-			m:  `thing_bucket{le="0.0"}`,
-			ct: nil, // Should be int64p(1520872607123).
-		}, {
-			m:  `thing_bucket{le="+Inf"}`,
-			ct: nil, // Should be int64p(1520872607123),
+thing_bucket{le="+Inf"} 17
+# HELP thing_c counter with _created as first line
+# TYPE thing_c counter
+thing_c_created 1520872607.123
+thing_c_total 14123.232
+# EOF
+`,
+			expected: []parsedEntry{
+				{
+					m:    "thing",
+					help: "histogram with _created as first line",
+				}, {
+					m:   "thing",
+					typ: model.MetricTypeHistogram,
+				}, {
+					m:  `thing_count`,
+					ct: nil, // Should be int64p(1520872607123).
+				}, {
+					m:  `thing_sum`,
+					ct: nil, // Should be int64p(1520872607123).
+				}, {
+					m:  `thing_bucket{le="0.0"}`,
+					ct: nil, // Should be int64p(1520872607123).
+				}, {
+					m:  `thing_bucket{le="+Inf"}`,
+					ct: nil, // Should be int64p(1520872607123),
+				},
+				{
+					m:    "thing_c",
+					help: "counter with _created as first line",
+				}, {
+					m:   "thing_c",
+					typ: model.MetricTypeCounter,
+				}, {
+					m:  `thing_c_total`,
+					ct: nil, // Should be int64p(1520872607123).
+				},
+			},
 		},
+		{
+			name: "counter with le label",
+			input: `# HELP foo good counter
+# TYPE foo counter
+foo_total 17.0
+foo_created 1520872607.123
+foo_total{le="b"} 17.0
+foo_created{le="b"} 1520872608.123
+# EOF
+`,
+			expected: []parsedEntry{
+				{
+					m:    "foo",
+					help: "good counter",
+				}, {
+					m:   "foo",
+					typ: model.MetricTypeCounter,
+				},
+				{
+					m:  `foo_total`,
+					ct: int64p(1520872607123),
+				},
+				{
+					m:  `foo_total{le="b"}`,
+					ct: int64p(1520872607123), // Wrong, should 1520872608123
+				},
+			},
+		},
+		{
+			// TODO(bwplotka): Kind of correct bevaviour? If yes, let's move to the OK tests above.
+			name: "maybe counter with no meta",
+			input: `foo_total 17.0
+foo_created 1520872607.123
+foo_total{a="b"} 17.0
+foo_created{a="b"} 1520872608.123
+# EOF
+`,
+			expected: []parsedEntry{
+				{
+					m: `foo_total`,
+				},
+				{
+					m: `foo_created`,
+				},
+				{
+					m: `foo_total{a="b"}`,
+				},
+				{
+					m: `foo_created{a="b"}`,
+				},
+			},
+		},
+	} {
+		t.Run(fmt.Sprintf("case=%v", tcase.name), func(t *testing.T) {
+			p := NewOpenMetricsParser([]byte(tcase.input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped())
+			got := testParse(t, p)
+			resetValAndLset(got) // Keep this test focused on metric, basic entries and CT only.
+			requireEntries(t, tcase.expected, got)
+		})
 	}
-
-	p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped())
-	got := testParse(t, p)
-	resetValAndLset(got) // Keep this test focused on metric, basic entries and CT only.
-	requireEntries(t, exp, got)
 }
 
 func resetValAndLset(e []parsedEntry) {