diff --git a/model/textparse/openmetricsparse.go b/model/textparse/openmetricsparse.go index 0e82dc9f5..13629e66d 100644 --- a/model/textparse/openmetricsparse.go +++ b/model/textparse/openmetricsparse.go @@ -17,6 +17,7 @@ package textparse import ( + "bytes" "errors" "fmt" "io" @@ -24,6 +25,7 @@ import ( "strings" "unicode/utf8" + "github.com/cespare/xxhash/v2" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/exemplar" @@ -72,15 +74,16 @@ func (l *openMetricsLexer) Error(es string) { // OpenMetrics text exposition format. // This is based on the working draft https://docs.google.com/document/u/1/d/1KwV0mAXwwbvvifBvDKH_LU1YjyXE_wxCkHNoCGq1GX0/edit type OpenMetricsParser struct { - l *openMetricsLexer - builder labels.ScratchBuilder - series []byte - text []byte - mtype model.MetricType - val float64 - ts int64 - hasTS bool - start int + l *openMetricsLexer + builder labels.ScratchBuilder + series []byte + mfNameLen int // length of metric family name to get from series. + text []byte + mtype model.MetricType + val float64 + ts int64 + hasTS bool + start int // offsets is a list of offsets into series that describe the positions // of the metric name and label names and values for this series. // p.offsets[0] is the start character of the metric name. @@ -98,10 +101,10 @@ type OpenMetricsParser struct { // Created timestamp parsing state. ct int64 ctHashSet uint64 - // visitedName is the metric name of the last visited metric when peeking ahead + // visitedMFName is the metric family name of the last visited metric when peeking ahead // for _created series during the execution of the CreatedTimestamp method. - visitedName string - skipCTSeries bool + visitedMFName []byte + skipCTSeries bool } type openMetricsParserOptions struct { @@ -260,25 +263,24 @@ func (p *OpenMetricsParser) Exemplar(e *exemplar.Exemplar) bool { func (p *OpenMetricsParser) CreatedTimestamp() *int64 { if !typeRequiresCT(p.mtype) { // Not a CT supported metric type, fast path. - p.ct = 0 - p.visitedName = "" - p.ctHashSet = 0 + p.ctHashSet = 0 // Use ctHashSet as a single way of telling "empty cache" return nil } var ( - currLset labels.Labels - buf []byte - peekWithoutNameLsetHash uint64 + buf []byte + currName []byte ) - p.Metric(&currLset) - currFamilyLsetHash, buf := currLset.HashWithoutLabels(buf, labels.MetricName, "le", "quantile") - currName := currLset.Get(model.MetricNameLabel) - currName = findBaseMetricName(currName) + if len(p.series) > 1 && p.series[0] == '{' && p.series[1] == '"' { + // special case for UTF-8 encoded metric family names. + currName = p.series[p.offsets[0]-p.start : p.mfNameLen+2] + } else { + currName = p.series[p.offsets[0]-p.start : p.mfNameLen] + } - // make sure we're on a new metric before returning - if currName == p.visitedName && currFamilyLsetHash == p.ctHashSet && p.visitedName != "" && p.ctHashSet > 0 && p.ct > 0 { - // CT is already known, fast path. + currHash := p.seriesHash(&buf, currName) + // Check cache, perhaps we fetched something already. + if currHash == p.ctHashSet && p.ct > 0 { return &p.ct } @@ -309,17 +311,15 @@ func (p *OpenMetricsParser) CreatedTimestamp() *int64 { return nil } - var peekedLset labels.Labels - p.Metric(&peekedLset) - peekedName := peekedLset.Get(model.MetricNameLabel) - if !strings.HasSuffix(peekedName, "_created") { + peekedName := p.series[p.offsets[0]-p.start : p.offsets[1]-p.start] + if len(peekedName) < 8 || string(peekedName[len(peekedName)-8:]) != "_created" { // Not a CT line, search more. continue } - // We got a CT line here, but let's search if CT line is actually for our series, edge case. - peekWithoutNameLsetHash, _ = peekedLset.HashWithoutLabels(buf, labels.MetricName, "le", "quantile") - if peekWithoutNameLsetHash != currFamilyLsetHash { + // Remove _created suffix. + peekedHash := p.seriesHash(&buf, peekedName[:len(peekedName)-8]) + if peekedHash != currHash { // Found CT line for a different series, for our series no CT. p.resetCTParseValues(resetLexer) return nil @@ -328,44 +328,63 @@ func (p *OpenMetricsParser) CreatedTimestamp() *int64 { // All timestamps in OpenMetrics are Unix Epoch in seconds. Convert to milliseconds. // https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#timestamps ct := int64(p.val * 1000.0) - p.setCTParseValues(ct, currFamilyLsetHash, currName, true, resetLexer) + p.setCTParseValues(ct, currHash, currName, true, resetLexer) return &ct } } +var ( + leBytes = []byte{108, 101} + quantileBytes = []byte{113, 117, 97, 110, 116, 105, 108, 101} +) + +// seriesHash generates a hash based on the metric family name and the offsets +// of label names and values from the parsed OpenMetrics data. It skips quantile +// and le labels for summaries and histograms respectively. +func (p *OpenMetricsParser) seriesHash(offsetsArr *[]byte, metricFamilyName []byte) uint64 { + // Iterate through p.offsets to find the label names and values. + for i := 2; i < len(p.offsets); i += 4 { + lStart := p.offsets[i] - p.start + lEnd := p.offsets[i+1] - p.start + label := p.series[lStart:lEnd] + // Skip quantile and le labels for summaries and histograms. + if p.mtype == model.MetricTypeSummary && bytes.Equal(label, quantileBytes) { + continue + } + if p.mtype == model.MetricTypeHistogram && bytes.Equal(label, leBytes) { + continue + } + *offsetsArr = append(*offsetsArr, p.series[lStart:lEnd]...) + vStart := p.offsets[i+2] - p.start + vEnd := p.offsets[i+3] - p.start + *offsetsArr = append(*offsetsArr, p.series[vStart:vEnd]...) + } + + *offsetsArr = append(*offsetsArr, metricFamilyName...) + hashedOffsets := xxhash.Sum64(*offsetsArr) + + // Reset the offsets array for later reuse. + *offsetsArr = (*offsetsArr)[:0] + return hashedOffsets +} + // setCTParseValues sets the parser to the state after CreatedTimestamp method was called and CT was found. // This is useful to prevent re-parsing the same series again and early return the CT value. -func (p *OpenMetricsParser) setCTParseValues(ct int64, ctHashSet uint64, visitedName string, skipCTSeries bool, resetLexer *openMetricsLexer) { +func (p *OpenMetricsParser) setCTParseValues(ct int64, ctHashSet uint64, mfName []byte, skipCTSeries bool, resetLexer *openMetricsLexer) { p.ct = ct p.l = resetLexer p.ctHashSet = ctHashSet - p.visitedName = visitedName - p.skipCTSeries = skipCTSeries + p.visitedMFName = mfName + p.skipCTSeries = skipCTSeries // Do we need to set it? } // resetCtParseValues resets the parser to the state before CreatedTimestamp method was called. func (p *OpenMetricsParser) resetCTParseValues(resetLexer *openMetricsLexer) { p.l = resetLexer - p.ct = 0 p.ctHashSet = 0 - p.visitedName = "" p.skipCTSeries = true } -// findBaseMetricName returns the metric name without reserved suffixes such as "_created", -// "_sum", etc. based on the OpenMetrics specification found at -// https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md. -// If no suffix is found, the original name is returned. -func findBaseMetricName(name string) string { - suffixes := []string{"_created", "_count", "_sum", "_bucket", "_total", "_gcount", "_gsum", "_info"} - for _, suffix := range suffixes { - if strings.HasSuffix(name, suffix) { - return strings.TrimSuffix(name, suffix) - } - } - return name -} - // typeRequiresCT returns true if the metric type requires a _created timestamp. func typeRequiresCT(t model.MetricType) bool { switch t { @@ -419,6 +438,7 @@ func (p *OpenMetricsParser) Next() (Entry, error) { mStart++ mEnd-- } + p.mfNameLen = mEnd - mStart p.offsets = append(p.offsets, mStart, mEnd) default: return EntryInvalid, p.parseError("expected metric name after "+t.String(), t2) diff --git a/model/textparse/openmetricsparse_test.go b/model/textparse/openmetricsparse_test.go index bcb25a253..467a23771 100644 --- a/model/textparse/openmetricsparse_test.go +++ b/model/textparse/openmetricsparse_test.go @@ -14,6 +14,7 @@ package textparse import ( + "fmt" "io" "testing" @@ -71,6 +72,8 @@ foo_total 17.0 1520879607.789 # {id="counter-test"} 5 foo_created 1520872607.123 foo_total{a="b"} 17.0 1520879607.789 # {id="counter-test"} 5 foo_created{a="b"} 1520872607.123 +foo_total{le="c"} 21.0 +foo_created{le="c"} 1520872621.123 # HELP bar Summary with CT at the end, making sure we find CT even if it's multiple lines a far # TYPE bar summary bar_count 17.0 @@ -294,6 +297,11 @@ foobar{quantile="0.99"} 150.1` {Labels: labels.FromStrings("id", "counter-test"), Value: 5}, }, ct: int64p(1520872607123), + }, { + m: `foo_total{le="c"}`, + v: 21.0, + lset: labels.FromStrings("__name__", "foo_total", "le", "c"), + ct: int64p(1520872621123), }, { m: "bar", help: "Summary with CT at the end, making sure we find CT even if it's multiple lines a far", @@ -820,7 +828,7 @@ func TestOpenMetricsParseErrors(t *testing.T) { for err == nil { _, err = p.Next() } - require.EqualError(t, err, c.err, "test %d: %s", i, c.input) + require.Equal(t, c.err, err.Error(), "test %d: %s", i, c.input) } } @@ -899,42 +907,97 @@ func TestOMNullByteHandling(t *testing.T) { // current OM spec limitations or clients with broken OM format. // TODO(maniktherana): Make sure OM 1.1/2.0 pass CT via metadata or exemplar-like to avoid this. func TestCTParseFailures(t *testing.T) { - input := `# HELP thing Histogram with _created as first line + for _, tcase := range []struct { + name string + input string + expected []parsedEntry + }{ + { + name: "_created line is a first one", + input: `# HELP thing histogram with _created as first line # TYPE thing histogram thing_created 1520872607.123 thing_count 17 thing_sum 324789.3 thing_bucket{le="0.0"} 0 -thing_bucket{le="+Inf"} 17` - - input += "\n# EOF\n" - - exp := []parsedEntry{ - { - m: "thing", - help: "Histogram with _created as first line", - }, { - m: "thing", - typ: model.MetricTypeHistogram, - }, { - m: `thing_count`, - ct: nil, // Should be int64p(1520872607123). - }, { - m: `thing_sum`, - ct: nil, // Should be int64p(1520872607123). - }, { - m: `thing_bucket{le="0.0"}`, - ct: nil, // Should be int64p(1520872607123). - }, { - m: `thing_bucket{le="+Inf"}`, - ct: nil, // Should be int64p(1520872607123), +thing_bucket{le="+Inf"} 17 +# HELP thing_c counter with _created as first line +# TYPE thing_c counter +thing_c_created 1520872607.123 +thing_c_total 14123.232 +# EOF +`, + expected: []parsedEntry{ + { + m: "thing", + help: "histogram with _created as first line", + }, + { + m: "thing", + typ: model.MetricTypeHistogram, + }, + { + m: `thing_count`, + ct: nil, // Should be int64p(1520872607123). + }, + { + m: `thing_sum`, + ct: nil, // Should be int64p(1520872607123). + }, + { + m: `thing_bucket{le="0.0"}`, + ct: nil, // Should be int64p(1520872607123). + }, + { + m: `thing_bucket{le="+Inf"}`, + ct: nil, // Should be int64p(1520872607123), + }, + { + m: "thing_c", + help: "counter with _created as first line", + }, + { + m: "thing_c", + typ: model.MetricTypeCounter, + }, + { + m: `thing_c_total`, + ct: nil, // Should be int64p(1520872607123). + }, + }, }, + { + // TODO(bwplotka): Kind of correct bevaviour? If yes, let's move to the OK tests above. + name: "maybe counter with no meta", + input: `foo_total 17.0 +foo_created 1520872607.123 +foo_total{a="b"} 17.0 +foo_created{a="b"} 1520872608.123 +# EOF +`, + expected: []parsedEntry{ + { + m: `foo_total`, + }, + { + m: `foo_created`, + }, + { + m: `foo_total{a="b"}`, + }, + { + m: `foo_created{a="b"}`, + }, + }, + }, + } { + t.Run(fmt.Sprintf("case=%v", tcase.name), func(t *testing.T) { + p := NewOpenMetricsParser([]byte(tcase.input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) + got := testParse(t, p) + resetValAndLset(got) // Keep this test focused on metric, basic entries and CT only. + requireEntries(t, tcase.expected, got) + }) } - - p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) - got := testParse(t, p) - resetValAndLset(got) // Keep this test focused on metric, basic entries and CT only. - requireEntries(t, exp, got) } func resetValAndLset(e []parsedEntry) {