mirror of
https://github.com/prometheus/prometheus.git
synced 2024-12-31 16:37:26 -08:00
2e30f1231b
The docs suggest the Next method returns a bool, but that's not the case (`Entry` is an int). ``` // Next advances the parser to the next sample. It returns false if no // more samples were read or an error occurred. Next() (Entry, error) ``` The docs were first added ind80a3de235
in 2017. Back then the signature was indeed `func (p *Parser) Next() bool`. But then it got refactored in76a4a46cb0
and the signature changed with it, yet docs stayed the same - and eventually made their way into the `Parser` interface. However, the Protobuf parser does have the right wording:5de2df752f
``` // Next advances the parser to the next "sample" (emulating the behavior of a // text format parser). It returns (EntryInvalid, io.EOF) if no samples were // read. ``` Changing all other implementations (and the interface itself) to match this doc. Signed-off-by: Ondrej Kokes <ondrej.kokes@gmail.com>
515 lines
13 KiB
Go
515 lines
13 KiB
Go
// Copyright 2017 The Prometheus Authors
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
//go:generate go get -u modernc.org/golex
|
|
//go:generate golex -o=promlex.l.go promlex.l
|
|
|
|
package textparse
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"math"
|
|
"strconv"
|
|
"strings"
|
|
"unicode/utf8"
|
|
"unsafe"
|
|
|
|
"github.com/prometheus/common/model"
|
|
|
|
"github.com/prometheus/prometheus/model/exemplar"
|
|
"github.com/prometheus/prometheus/model/histogram"
|
|
"github.com/prometheus/prometheus/model/labels"
|
|
"github.com/prometheus/prometheus/model/value"
|
|
)
|
|
|
|
type promlexer struct {
|
|
b []byte
|
|
i int
|
|
start int
|
|
err error
|
|
state int
|
|
}
|
|
|
|
type token int
|
|
|
|
const (
|
|
tInvalid token = -1
|
|
tEOF token = 0
|
|
tLinebreak token = iota
|
|
tWhitespace
|
|
tHelp
|
|
tType
|
|
tUnit
|
|
tEOFWord
|
|
tText
|
|
tComment
|
|
tBlank
|
|
tMName
|
|
tQString
|
|
tBraceOpen
|
|
tBraceClose
|
|
tLName
|
|
tLValue
|
|
tComma
|
|
tEqual
|
|
tTimestamp
|
|
tValue
|
|
)
|
|
|
|
func (t token) String() string {
|
|
switch t {
|
|
case tInvalid:
|
|
return "INVALID"
|
|
case tEOF:
|
|
return "EOF"
|
|
case tLinebreak:
|
|
return "LINEBREAK"
|
|
case tWhitespace:
|
|
return "WHITESPACE"
|
|
case tHelp:
|
|
return "HELP"
|
|
case tType:
|
|
return "TYPE"
|
|
case tUnit:
|
|
return "UNIT"
|
|
case tEOFWord:
|
|
return "EOFWORD"
|
|
case tText:
|
|
return "TEXT"
|
|
case tComment:
|
|
return "COMMENT"
|
|
case tBlank:
|
|
return "BLANK"
|
|
case tMName:
|
|
return "MNAME"
|
|
case tQString:
|
|
return "QSTRING"
|
|
case tBraceOpen:
|
|
return "BOPEN"
|
|
case tBraceClose:
|
|
return "BCLOSE"
|
|
case tLName:
|
|
return "LNAME"
|
|
case tLValue:
|
|
return "LVALUE"
|
|
case tEqual:
|
|
return "EQUAL"
|
|
case tComma:
|
|
return "COMMA"
|
|
case tTimestamp:
|
|
return "TIMESTAMP"
|
|
case tValue:
|
|
return "VALUE"
|
|
}
|
|
return fmt.Sprintf("<invalid: %d>", t)
|
|
}
|
|
|
|
// buf returns the buffer of the current token.
|
|
func (l *promlexer) buf() []byte {
|
|
return l.b[l.start:l.i]
|
|
}
|
|
|
|
func (l *promlexer) cur() byte {
|
|
return l.b[l.i]
|
|
}
|
|
|
|
// next advances the promlexer to the next character.
|
|
func (l *promlexer) next() byte {
|
|
l.i++
|
|
if l.i >= len(l.b) {
|
|
l.err = io.EOF
|
|
return byte(tEOF)
|
|
}
|
|
// Lex struggles with null bytes. If we are in a label value or help string, where
|
|
// they are allowed, consume them here immediately.
|
|
for l.b[l.i] == 0 && (l.state == sLValue || l.state == sMeta2 || l.state == sComment) {
|
|
l.i++
|
|
}
|
|
return l.b[l.i]
|
|
}
|
|
|
|
func (l *promlexer) Error(es string) {
|
|
l.err = errors.New(es)
|
|
}
|
|
|
|
// PromParser parses samples from a byte slice of samples in the official
|
|
// Prometheus text exposition format.
|
|
type PromParser struct {
|
|
l *promlexer
|
|
builder labels.ScratchBuilder
|
|
series []byte
|
|
text []byte
|
|
mtype model.MetricType
|
|
val float64
|
|
ts int64
|
|
hasTS bool
|
|
start int
|
|
// offsets is a list of offsets into series that describe the positions
|
|
// of the metric name and label names and values for this series.
|
|
// p.offsets[0] is the start character of the metric name.
|
|
// p.offsets[1] is the end of the metric name.
|
|
// Subsequently, p.offsets is a pair of pair of offsets for the positions
|
|
// of the label name and value start and end characters.
|
|
offsets []int
|
|
}
|
|
|
|
// NewPromParser returns a new parser of the byte slice.
|
|
func NewPromParser(b []byte, st *labels.SymbolTable) Parser {
|
|
return &PromParser{
|
|
l: &promlexer{b: append(b, '\n')},
|
|
builder: labels.NewScratchBuilderWithSymbolTable(st, 16),
|
|
}
|
|
}
|
|
|
|
// Series returns the bytes of the series, the timestamp if set, and the value
|
|
// of the current sample.
|
|
func (p *PromParser) Series() ([]byte, *int64, float64) {
|
|
if p.hasTS {
|
|
return p.series, &p.ts, p.val
|
|
}
|
|
return p.series, nil, p.val
|
|
}
|
|
|
|
// Histogram returns (nil, nil, nil, nil) for now because the Prometheus text
|
|
// format does not support sparse histograms yet.
|
|
func (p *PromParser) Histogram() ([]byte, *int64, *histogram.Histogram, *histogram.FloatHistogram) {
|
|
return nil, nil, nil, nil
|
|
}
|
|
|
|
// Help returns the metric name and help text in the current entry.
|
|
// Must only be called after Next returned a help entry.
|
|
// The returned byte slices become invalid after the next call to Next.
|
|
func (p *PromParser) Help() ([]byte, []byte) {
|
|
m := p.l.b[p.offsets[0]:p.offsets[1]]
|
|
|
|
// Replacer causes allocations. Replace only when necessary.
|
|
if strings.IndexByte(yoloString(p.text), byte('\\')) >= 0 {
|
|
return m, []byte(helpReplacer.Replace(string(p.text)))
|
|
}
|
|
return m, p.text
|
|
}
|
|
|
|
// Type returns the metric name and type in the current entry.
|
|
// Must only be called after Next returned a type entry.
|
|
// The returned byte slices become invalid after the next call to Next.
|
|
func (p *PromParser) Type() ([]byte, model.MetricType) {
|
|
return p.l.b[p.offsets[0]:p.offsets[1]], p.mtype
|
|
}
|
|
|
|
// Unit returns the metric name and unit in the current entry.
|
|
// Must only be called after Next returned a unit entry.
|
|
// The returned byte slices become invalid after the next call to Next.
|
|
func (p *PromParser) Unit() ([]byte, []byte) {
|
|
// The Prometheus format does not have units.
|
|
return nil, nil
|
|
}
|
|
|
|
// Comment returns the text of the current comment.
|
|
// Must only be called after Next returned a comment entry.
|
|
// The returned byte slice becomes invalid after the next call to Next.
|
|
func (p *PromParser) Comment() []byte {
|
|
return p.text
|
|
}
|
|
|
|
// Metric writes the labels of the current sample into the passed labels.
|
|
// It returns the string from which the metric was parsed.
|
|
func (p *PromParser) Metric(l *labels.Labels) string {
|
|
// Copy the buffer to a string: this is only necessary for the return value.
|
|
s := string(p.series)
|
|
|
|
p.builder.Reset()
|
|
metricName := unreplace(s[p.offsets[0]-p.start : p.offsets[1]-p.start])
|
|
p.builder.Add(labels.MetricName, metricName)
|
|
|
|
for i := 2; i < len(p.offsets); i += 4 {
|
|
a := p.offsets[i] - p.start
|
|
b := p.offsets[i+1] - p.start
|
|
label := unreplace(s[a:b])
|
|
c := p.offsets[i+2] - p.start
|
|
d := p.offsets[i+3] - p.start
|
|
value := unreplace(s[c:d])
|
|
p.builder.Add(label, value)
|
|
}
|
|
|
|
p.builder.Sort()
|
|
*l = p.builder.Labels()
|
|
|
|
return s
|
|
}
|
|
|
|
// Exemplar implements the Parser interface. However, since the classic
|
|
// Prometheus text format does not support exemplars, this implementation simply
|
|
// returns false and does nothing else.
|
|
func (p *PromParser) Exemplar(*exemplar.Exemplar) bool {
|
|
return false
|
|
}
|
|
|
|
// CreatedTimestamp returns nil as it's not implemented yet.
|
|
// TODO(bwplotka): https://github.com/prometheus/prometheus/issues/12980
|
|
func (p *PromParser) CreatedTimestamp() *int64 {
|
|
return nil
|
|
}
|
|
|
|
// nextToken returns the next token from the promlexer. It skips over tabs
|
|
// and spaces.
|
|
func (p *PromParser) nextToken() token {
|
|
for {
|
|
if tok := p.l.Lex(); tok != tWhitespace {
|
|
return tok
|
|
}
|
|
}
|
|
}
|
|
|
|
func (p *PromParser) parseError(exp string, got token) error {
|
|
e := p.l.i + 1
|
|
if len(p.l.b) < e {
|
|
e = len(p.l.b)
|
|
}
|
|
return fmt.Errorf("%s, got %q (%q) while parsing: %q", exp, p.l.b[p.l.start:e], got, p.l.b[p.start:e])
|
|
}
|
|
|
|
// Next advances the parser to the next sample.
|
|
// It returns (EntryInvalid, io.EOF) if no samples were read.
|
|
func (p *PromParser) Next() (Entry, error) {
|
|
var err error
|
|
|
|
p.start = p.l.i
|
|
p.offsets = p.offsets[:0]
|
|
|
|
switch t := p.nextToken(); t {
|
|
case tEOF:
|
|
return EntryInvalid, io.EOF
|
|
case tLinebreak:
|
|
// Allow full blank lines.
|
|
return p.Next()
|
|
|
|
case tHelp, tType:
|
|
switch t2 := p.nextToken(); t2 {
|
|
case tMName:
|
|
mStart := p.l.start
|
|
mEnd := p.l.i
|
|
if p.l.b[mStart] == '"' && p.l.b[mEnd-1] == '"' {
|
|
mStart++
|
|
mEnd--
|
|
}
|
|
p.offsets = append(p.offsets, mStart, mEnd)
|
|
default:
|
|
return EntryInvalid, p.parseError("expected metric name after "+t.String(), t2)
|
|
}
|
|
switch t2 := p.nextToken(); t2 {
|
|
case tText:
|
|
if len(p.l.buf()) > 1 {
|
|
p.text = p.l.buf()[1:]
|
|
} else {
|
|
p.text = []byte{}
|
|
}
|
|
default:
|
|
return EntryInvalid, fmt.Errorf("expected text in %s, got %v", t.String(), t2.String())
|
|
}
|
|
switch t {
|
|
case tType:
|
|
switch s := yoloString(p.text); s {
|
|
case "counter":
|
|
p.mtype = model.MetricTypeCounter
|
|
case "gauge":
|
|
p.mtype = model.MetricTypeGauge
|
|
case "histogram":
|
|
p.mtype = model.MetricTypeHistogram
|
|
case "summary":
|
|
p.mtype = model.MetricTypeSummary
|
|
case "untyped":
|
|
p.mtype = model.MetricTypeUnknown
|
|
default:
|
|
return EntryInvalid, fmt.Errorf("invalid metric type %q", s)
|
|
}
|
|
case tHelp:
|
|
if !utf8.Valid(p.text) {
|
|
return EntryInvalid, fmt.Errorf("help text %q is not a valid utf8 string", p.text)
|
|
}
|
|
}
|
|
if t := p.nextToken(); t != tLinebreak {
|
|
return EntryInvalid, p.parseError("linebreak expected after metadata", t)
|
|
}
|
|
switch t {
|
|
case tHelp:
|
|
return EntryHelp, nil
|
|
case tType:
|
|
return EntryType, nil
|
|
}
|
|
case tComment:
|
|
p.text = p.l.buf()
|
|
if t := p.nextToken(); t != tLinebreak {
|
|
return EntryInvalid, p.parseError("linebreak expected after comment", t)
|
|
}
|
|
return EntryComment, nil
|
|
case tBraceOpen:
|
|
// We found a brace, so make room for the eventual metric name. If these
|
|
// values aren't updated, then the metric name was not set inside the
|
|
// braces and we can return an error.
|
|
if len(p.offsets) == 0 {
|
|
p.offsets = []int{-1, -1}
|
|
}
|
|
if err := p.parseLVals(); err != nil {
|
|
return EntryInvalid, err
|
|
}
|
|
|
|
p.series = p.l.b[p.start:p.l.i]
|
|
return p.parseMetricSuffix(p.nextToken())
|
|
case tMName:
|
|
p.offsets = append(p.offsets, p.start, p.l.i)
|
|
p.series = p.l.b[p.start:p.l.i]
|
|
t2 := p.nextToken()
|
|
// If there's a brace, consume and parse the label values.
|
|
if t2 == tBraceOpen {
|
|
if err := p.parseLVals(); err != nil {
|
|
return EntryInvalid, err
|
|
}
|
|
p.series = p.l.b[p.start:p.l.i]
|
|
t2 = p.nextToken()
|
|
}
|
|
return p.parseMetricSuffix(t2)
|
|
|
|
default:
|
|
err = p.parseError("expected a valid start token", t)
|
|
}
|
|
return EntryInvalid, err
|
|
}
|
|
|
|
// parseLVals parses the contents inside the braces.
|
|
func (p *PromParser) parseLVals() error {
|
|
t := p.nextToken()
|
|
for {
|
|
curTStart := p.l.start
|
|
curTI := p.l.i
|
|
switch t {
|
|
case tBraceClose:
|
|
return nil
|
|
case tLName:
|
|
case tQString:
|
|
default:
|
|
return p.parseError("expected label name", t)
|
|
}
|
|
|
|
t = p.nextToken()
|
|
// A quoted string followed by a comma or brace is a metric name. Set the
|
|
// offsets and continue processing.
|
|
if t == tComma || t == tBraceClose {
|
|
if p.offsets[0] != -1 || p.offsets[1] != -1 {
|
|
return fmt.Errorf("metric name already set while parsing: %q", p.l.b[p.start:p.l.i])
|
|
}
|
|
p.offsets[0] = curTStart + 1
|
|
p.offsets[1] = curTI - 1
|
|
if t == tBraceClose {
|
|
return nil
|
|
}
|
|
t = p.nextToken()
|
|
continue
|
|
}
|
|
// We have a label name, and it might be quoted.
|
|
if p.l.b[curTStart] == '"' {
|
|
curTStart++
|
|
curTI--
|
|
}
|
|
p.offsets = append(p.offsets, curTStart, curTI)
|
|
if t != tEqual {
|
|
return p.parseError("expected equal", t)
|
|
}
|
|
if t := p.nextToken(); t != tLValue {
|
|
return p.parseError("expected label value", t)
|
|
}
|
|
if !utf8.Valid(p.l.buf()) {
|
|
return fmt.Errorf("invalid UTF-8 label value: %q", p.l.buf())
|
|
}
|
|
|
|
// The promlexer ensures the value string is quoted. Strip first
|
|
// and last character.
|
|
p.offsets = append(p.offsets, p.l.start+1, p.l.i-1)
|
|
|
|
// Free trailing commas are allowed. NOTE: this allows spaces between label
|
|
// names, unlike in OpenMetrics. It is not clear if this is intended or an
|
|
// accidental bug.
|
|
if t = p.nextToken(); t == tComma {
|
|
t = p.nextToken()
|
|
}
|
|
}
|
|
}
|
|
|
|
// parseMetricSuffix parses the end of the line after the metric name and
|
|
// labels. It starts parsing with the provided token.
|
|
func (p *PromParser) parseMetricSuffix(t token) (Entry, error) {
|
|
if p.offsets[0] == -1 {
|
|
return EntryInvalid, fmt.Errorf("metric name not set while parsing: %q", p.l.b[p.start:p.l.i])
|
|
}
|
|
if t != tValue {
|
|
return EntryInvalid, p.parseError("expected value after metric", t)
|
|
}
|
|
var err error
|
|
if p.val, err = parseFloat(yoloString(p.l.buf())); err != nil {
|
|
return EntryInvalid, fmt.Errorf("%w while parsing: %q", err, p.l.b[p.start:p.l.i])
|
|
}
|
|
// Ensure canonical NaN value.
|
|
if math.IsNaN(p.val) {
|
|
p.val = math.Float64frombits(value.NormalNaN)
|
|
}
|
|
p.hasTS = false
|
|
switch t := p.nextToken(); t {
|
|
case tLinebreak:
|
|
break
|
|
case tTimestamp:
|
|
p.hasTS = true
|
|
if p.ts, err = strconv.ParseInt(yoloString(p.l.buf()), 10, 64); err != nil {
|
|
return EntryInvalid, fmt.Errorf("%w while parsing: %q", err, p.l.b[p.start:p.l.i])
|
|
}
|
|
if t2 := p.nextToken(); t2 != tLinebreak {
|
|
return EntryInvalid, p.parseError("expected next entry after timestamp", t2)
|
|
}
|
|
default:
|
|
return EntryInvalid, p.parseError("expected timestamp or new record", t)
|
|
}
|
|
|
|
return EntrySeries, nil
|
|
}
|
|
|
|
var lvalReplacer = strings.NewReplacer(
|
|
`\"`, "\"",
|
|
`\\`, "\\",
|
|
`\n`, "\n",
|
|
)
|
|
|
|
var helpReplacer = strings.NewReplacer(
|
|
`\\`, "\\",
|
|
`\n`, "\n",
|
|
)
|
|
|
|
func unreplace(s string) string {
|
|
// Replacer causes allocations. Replace only when necessary.
|
|
if strings.IndexByte(s, byte('\\')) >= 0 {
|
|
return lvalReplacer.Replace(s)
|
|
}
|
|
return s
|
|
}
|
|
|
|
func yoloString(b []byte) string {
|
|
return *((*string)(unsafe.Pointer(&b)))
|
|
}
|
|
|
|
func parseFloat(s string) (float64, error) {
|
|
// Keep to pre-Go 1.13 float formats.
|
|
if strings.ContainsAny(s, "pP_") {
|
|
return 0, fmt.Errorf("unsupported character in float")
|
|
}
|
|
return strconv.ParseFloat(s, 64)
|
|
}
|