Merge pull request #13271 from ywwg/owilliams/utf8

UTF-8: Add partial support for parsing UTF-8 metric and label names
This commit is contained in:
Björn Rabenstein 2024-02-16 11:42:55 +01:00 committed by GitHub
commit ac10cd4d99
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 1652 additions and 1095 deletions

2
go.mod
View file

@ -52,7 +52,7 @@ require (
github.com/prometheus/alertmanager v0.26.0 github.com/prometheus/alertmanager v0.26.0
github.com/prometheus/client_golang v1.18.0 github.com/prometheus/client_golang v1.18.0
github.com/prometheus/client_model v0.5.0 github.com/prometheus/client_model v0.5.0
github.com/prometheus/common v0.46.0 github.com/prometheus/common v0.47.0
github.com/prometheus/common/assets v0.2.0 github.com/prometheus/common/assets v0.2.0
github.com/prometheus/common/sigv4 v0.1.0 github.com/prometheus/common/sigv4 v0.1.0
github.com/prometheus/exporter-toolkit v0.11.0 github.com/prometheus/exporter-toolkit v0.11.0

4
go.sum
View file

@ -670,8 +670,8 @@ github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8b
github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=
github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc=
github.com/prometheus/common v0.29.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= github.com/prometheus/common v0.29.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls=
github.com/prometheus/common v0.46.0 h1:doXzt5ybi1HBKpsZOL0sSkaNHJJqkyfEWZGGqqScV0Y= github.com/prometheus/common v0.47.0 h1:p5Cz0FNHo7SnWOmWmoRozVcjEp0bIVU8cV7OShpjL1k=
github.com/prometheus/common v0.46.0/go.mod h1:Tp0qkxpb9Jsg54QMe+EAmqXkSV7Evdy1BTn+g2pa/hQ= github.com/prometheus/common v0.47.0/go.mod h1:0/KsvlIEfPQCQ5I2iNSAWKPZziNCvRs5EC6ILDTlAPc=
github.com/prometheus/common/assets v0.2.0 h1:0P5OrzoHrYBOSM1OigWL3mY8ZvV2N4zIE/5AahrSrfM= github.com/prometheus/common/assets v0.2.0 h1:0P5OrzoHrYBOSM1OigWL3mY8ZvV2N4zIE/5AahrSrfM=
github.com/prometheus/common/assets v0.2.0/go.mod h1:D17UVUE12bHbim7HzwUvtqm6gwBEaDQ0F+hIGbFbccI= github.com/prometheus/common/assets v0.2.0/go.mod h1:D17UVUE12bHbim7HzwUvtqm6gwBEaDQ0F+hIGbFbccI=
github.com/prometheus/common/sigv4 v0.1.0 h1:qoVebwtwwEhS85Czm2dSROY5fTo2PAPEVdDeppTwGX4= github.com/prometheus/common/sigv4 v0.1.0 h1:qoVebwtwwEhS85Czm2dSROY5fTo2PAPEVdDeppTwGX4=

View file

@ -50,12 +50,15 @@ S [ ]
<sComment>TYPE{S} l.state = sMeta1; return tType <sComment>TYPE{S} l.state = sMeta1; return tType
<sComment>UNIT{S} l.state = sMeta1; return tUnit <sComment>UNIT{S} l.state = sMeta1; return tUnit
<sComment>"EOF"\n? l.state = sInit; return tEOFWord <sComment>"EOF"\n? l.state = sInit; return tEOFWord
<sMeta1>\"(\\.|[^\\"])*\" l.state = sMeta2; return tMName
<sMeta1>{M}({M}|{D})* l.state = sMeta2; return tMName <sMeta1>{M}({M}|{D})* l.state = sMeta2; return tMName
<sMeta2>{S}{C}*\n l.state = sInit; return tText <sMeta2>{S}{C}*\n l.state = sInit; return tText
{M}({M}|{D})* l.state = sValue; return tMName {M}({M}|{D})* l.state = sValue; return tMName
<sValue>\{ l.state = sLabels; return tBraceOpen <sValue>\{ l.state = sLabels; return tBraceOpen
\{ l.state = sLabels; return tBraceOpen
<sLabels>{L}({L}|{D})* return tLName <sLabels>{L}({L}|{D})* return tLName
<sLabels>\"(\\.|[^\\"])*\" l.state = sLabels; return tQString
<sLabels>\} l.state = sValue; return tBraceClose <sLabels>\} l.state = sValue; return tBraceClose
<sLabels>= l.state = sLValue; return tEqual <sLabels>= l.state = sLValue; return tEqual
<sLabels>, return tComma <sLabels>, return tComma

File diff suppressed because it is too large Load diff

View file

@ -81,6 +81,12 @@ type OpenMetricsParser struct {
ts int64 ts int64
hasTS bool hasTS bool
start int start int
// offsets is a list of offsets into series that describe the positions
// of the metric name and label names and values for this series.
// p.offsets[0] is the start character of the metric name.
// p.offsets[1] is the end of the metric name.
// Subsequently, p.offsets is a pair of pair of offsets for the positions
// of the label name and value start and end characters.
offsets []int offsets []int
eOffsets []int eOffsets []int
@ -153,20 +159,18 @@ func (p *OpenMetricsParser) Metric(l *labels.Labels) string {
s := string(p.series) s := string(p.series)
p.builder.Reset() p.builder.Reset()
p.builder.Add(labels.MetricName, s[:p.offsets[0]-p.start]) metricName := unreplace(s[p.offsets[0]-p.start : p.offsets[1]-p.start])
p.builder.Add(labels.MetricName, metricName)
for i := 1; i < len(p.offsets); i += 4 { for i := 2; i < len(p.offsets); i += 4 {
a := p.offsets[i] - p.start a := p.offsets[i] - p.start
b := p.offsets[i+1] - p.start b := p.offsets[i+1] - p.start
label := unreplace(s[a:b])
c := p.offsets[i+2] - p.start c := p.offsets[i+2] - p.start
d := p.offsets[i+3] - p.start d := p.offsets[i+3] - p.start
value := unreplace(s[c:d])
value := s[c:d] p.builder.Add(label, value)
// Replacer causes allocations. Replace only when necessary.
if strings.IndexByte(s[c:d], byte('\\')) >= 0 {
value = lvalReplacer.Replace(value)
}
p.builder.Add(s[a:b], value)
} }
p.builder.Sort() p.builder.Sort()
@ -255,7 +259,13 @@ func (p *OpenMetricsParser) Next() (Entry, error) {
case tHelp, tType, tUnit: case tHelp, tType, tUnit:
switch t2 := p.nextToken(); t2 { switch t2 := p.nextToken(); t2 {
case tMName: case tMName:
p.offsets = append(p.offsets, p.l.start, p.l.i) mStart := p.l.start
mEnd := p.l.i
if p.l.b[mStart] == '"' && p.l.b[mEnd-1] == '"' {
mStart++
mEnd--
}
p.offsets = append(p.offsets, mStart, mEnd)
default: default:
return EntryInvalid, p.parseError("expected metric name after "+t.String(), t2) return EntryInvalid, p.parseError("expected metric name after "+t.String(), t2)
} }
@ -312,58 +322,33 @@ func (p *OpenMetricsParser) Next() (Entry, error) {
return EntryUnit, nil return EntryUnit, nil
} }
case tBraceOpen:
// We found a brace, so make room for the eventual metric name. If these
// values aren't updated, then the metric name was not set inside the
// braces and we can return an error.
if len(p.offsets) == 0 {
p.offsets = []int{-1, -1}
}
if p.offsets, err = p.parseLVals(p.offsets, false); err != nil {
return EntryInvalid, err
}
p.series = p.l.b[p.start:p.l.i]
return p.parseMetricSuffix(p.nextToken())
case tMName: case tMName:
p.offsets = append(p.offsets, p.l.i) p.offsets = append(p.offsets, p.start, p.l.i)
p.series = p.l.b[p.start:p.l.i] p.series = p.l.b[p.start:p.l.i]
t2 := p.nextToken() t2 := p.nextToken()
if t2 == tBraceOpen { if t2 == tBraceOpen {
p.offsets, err = p.parseLVals(p.offsets) p.offsets, err = p.parseLVals(p.offsets, false)
if err != nil { if err != nil {
return EntryInvalid, err return EntryInvalid, err
} }
p.series = p.l.b[p.start:p.l.i] p.series = p.l.b[p.start:p.l.i]
t2 = p.nextToken() t2 = p.nextToken()
} }
p.val, err = p.getFloatValue(t2, "metric") return p.parseMetricSuffix(t2)
if err != nil {
return EntryInvalid, err
}
p.hasTS = false
switch t2 := p.nextToken(); t2 {
case tEOF:
return EntryInvalid, errors.New("data does not end with # EOF")
case tLinebreak:
break
case tComment:
if err := p.parseComment(); err != nil {
return EntryInvalid, err
}
case tTimestamp:
p.hasTS = true
var ts float64
// A float is enough to hold what we need for millisecond resolution.
if ts, err = parseFloat(yoloString(p.l.buf()[1:])); err != nil {
return EntryInvalid, fmt.Errorf("%w while parsing: %q", err, p.l.b[p.start:p.l.i])
}
if math.IsNaN(ts) || math.IsInf(ts, 0) {
return EntryInvalid, fmt.Errorf("invalid timestamp %f", ts)
}
p.ts = int64(ts * 1000)
switch t3 := p.nextToken(); t3 {
case tLinebreak:
case tComment:
if err := p.parseComment(); err != nil {
return EntryInvalid, err
}
default:
return EntryInvalid, p.parseError("expected next entry after timestamp", t3)
}
default:
return EntryInvalid, p.parseError("expected timestamp or # symbol", t2)
}
return EntrySeries, nil
default: default:
err = p.parseError("expected a valid start token", t) err = p.parseError("expected a valid start token", t)
@ -374,7 +359,7 @@ func (p *OpenMetricsParser) Next() (Entry, error) {
func (p *OpenMetricsParser) parseComment() error { func (p *OpenMetricsParser) parseComment() error {
var err error var err error
// Parse the labels. // Parse the labels.
p.eOffsets, err = p.parseLVals(p.eOffsets) p.eOffsets, err = p.parseLVals(p.eOffsets, true)
if err != nil { if err != nil {
return err return err
} }
@ -415,38 +400,47 @@ func (p *OpenMetricsParser) parseComment() error {
return nil return nil
} }
func (p *OpenMetricsParser) parseLVals(offsets []int) ([]int, error) { func (p *OpenMetricsParser) parseLVals(offsets []int, isExemplar bool) ([]int, error) {
first := true
for {
t := p.nextToken() t := p.nextToken()
for {
curTStart := p.l.start
curTI := p.l.i
switch t { switch t {
case tBraceClose: case tBraceClose:
return offsets, nil return offsets, nil
case tComma: case tLName:
if first { case tQString:
return nil, p.parseError("expected label name or left brace", t) default:
}
t = p.nextToken()
if t != tLName {
return nil, p.parseError("expected label name", t) return nil, p.parseError("expected label name", t)
} }
case tLName:
if !first {
return nil, p.parseError("expected comma", t)
}
default:
if first {
return nil, p.parseError("expected label name or left brace", t)
}
return nil, p.parseError("expected comma or left brace", t)
t = p.nextToken()
// A quoted string followed by a comma or brace is a metric name. Set the
// offsets and continue processing. If this is an exemplar, this format
// is not allowed.
if t == tComma || t == tBraceClose {
if isExemplar {
return nil, p.parseError("expected label name", t)
} }
first = false if offsets[0] != -1 || offsets[1] != -1 {
// t is now a label name. return nil, fmt.Errorf("metric name already set while parsing: %q", p.l.b[p.start:p.l.i])
}
offsets[0] = curTStart + 1
offsets[1] = curTI - 1
if t == tBraceClose {
return offsets, nil
}
t = p.nextToken()
continue
}
// We have a label name, and it might be quoted.
if p.l.b[curTStart] == '"' {
curTStart++
curTI--
}
offsets = append(offsets, curTStart, curTI)
offsets = append(offsets, p.l.start, p.l.i) if t != tEqual {
if t := p.nextToken(); t != tEqual {
return nil, p.parseError("expected equal", t) return nil, p.parseError("expected equal", t)
} }
if t := p.nextToken(); t != tLValue { if t := p.nextToken(); t != tLValue {
@ -459,7 +453,62 @@ func (p *OpenMetricsParser) parseLVals(offsets []int) ([]int, error) {
// The openMetricsLexer ensures the value string is quoted. Strip first // The openMetricsLexer ensures the value string is quoted. Strip first
// and last character. // and last character.
offsets = append(offsets, p.l.start+1, p.l.i-1) offsets = append(offsets, p.l.start+1, p.l.i-1)
// Free trailing commas are allowed.
t = p.nextToken()
if t == tComma {
t = p.nextToken()
} else if t != tBraceClose {
return nil, p.parseError("expected comma or brace close", t)
} }
}
}
// parseMetricSuffix parses the end of the line after the metric name and
// labels. It starts parsing with the provided token.
func (p *OpenMetricsParser) parseMetricSuffix(t token) (Entry, error) {
if p.offsets[0] == -1 {
return EntryInvalid, fmt.Errorf("metric name not set while parsing: %q", p.l.b[p.start:p.l.i])
}
var err error
p.val, err = p.getFloatValue(t, "metric")
if err != nil {
return EntryInvalid, err
}
p.hasTS = false
switch t2 := p.nextToken(); t2 {
case tEOF:
return EntryInvalid, errors.New("data does not end with # EOF")
case tLinebreak:
break
case tComment:
if err := p.parseComment(); err != nil {
return EntryInvalid, err
}
case tTimestamp:
p.hasTS = true
var ts float64
// A float is enough to hold what we need for millisecond resolution.
if ts, err = parseFloat(yoloString(p.l.buf()[1:])); err != nil {
return EntryInvalid, fmt.Errorf("%w while parsing: %q", err, p.l.b[p.start:p.l.i])
}
if math.IsNaN(ts) || math.IsInf(ts, 0) {
return EntryInvalid, fmt.Errorf("invalid timestamp %f", ts)
}
p.ts = int64(ts * 1000)
switch t3 := p.nextToken(); t3 {
case tLinebreak:
case tComment:
if err := p.parseComment(); err != nil {
return EntryInvalid, err
}
default:
return EntryInvalid, p.parseError("expected next entry after timestamp", t3)
}
}
return EntrySeries, nil
} }
func (p *OpenMetricsParser) getFloatValue(t token, after string) (float64, error) { func (p *OpenMetricsParser) getFloatValue(t token, after string) (float64, error) {

View file

@ -301,6 +301,137 @@ foo_total 17.0 1520879607.789 # {id="counter-test"} 5`
require.Len(t, exp, i) require.Len(t, exp, i)
} }
func TestUTF8OpenMetricsParse(t *testing.T) {
oldValidationScheme := model.NameValidationScheme
model.NameValidationScheme = model.UTF8Validation
defer func() {
model.NameValidationScheme = oldValidationScheme
}()
input := `# HELP "go.gc_duration_seconds" A summary of the GC invocation durations.
# TYPE "go.gc_duration_seconds" summary
# UNIT "go.gc_duration_seconds" seconds
{"go.gc_duration_seconds",quantile="0"} 4.9351e-05
{"go.gc_duration_seconds",quantile="0.25"} 7.424100000000001e-05
{"go.gc_duration_seconds",quantile="0.5",a="b"} 8.3835e-05
{"http.status",q="0.9",a="b"} 8.3835e-05
{"http.status",q="0.9",a="b"} 8.3835e-05
{q="0.9","http.status",a="b"} 8.3835e-05
{"go.gc_duration_seconds_sum"} 0.004304266
{"Heizölrückstoßabdämpfung 10€ metric with \"interesting\" {character\nchoices}","strange©™\n'quoted' \"name\""="6"} 10.0`
input += "\n# EOF\n"
exp := []struct {
lset labels.Labels
m string
t *int64
v float64
typ model.MetricType
help string
unit string
comment string
e *exemplar.Exemplar
}{
{
m: "go.gc_duration_seconds",
help: "A summary of the GC invocation durations.",
}, {
m: "go.gc_duration_seconds",
typ: model.MetricTypeSummary,
}, {
m: "go.gc_duration_seconds",
unit: "seconds",
}, {
m: `{"go.gc_duration_seconds",quantile="0"}`,
v: 4.9351e-05,
lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0"),
}, {
m: `{"go.gc_duration_seconds",quantile="0.25"}`,
v: 7.424100000000001e-05,
lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.25"),
}, {
m: `{"go.gc_duration_seconds",quantile="0.5",a="b"}`,
v: 8.3835e-05,
lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.5", "a", "b"),
}, {
m: `{"http.status",q="0.9",a="b"}`,
v: 8.3835e-05,
lset: labels.FromStrings("__name__", "http.status", "q", "0.9", "a", "b"),
}, {
m: `{"http.status",q="0.9",a="b"}`,
v: 8.3835e-05,
lset: labels.FromStrings("__name__", "http.status", "q", "0.9", "a", "b"),
}, {
m: `{q="0.9","http.status",a="b"}`,
v: 8.3835e-05,
lset: labels.FromStrings("__name__", "http.status", "q", "0.9", "a", "b"),
}, {
m: `{"go.gc_duration_seconds_sum"}`,
v: 0.004304266,
lset: labels.FromStrings("__name__", "go.gc_duration_seconds_sum"),
}, {
m: `{"Heizölrückstoßabdämpfung 10€ metric with \"interesting\" {character\nchoices}","strange©™\n'quoted' \"name\""="6"}`,
v: 10.0,
lset: labels.FromStrings("__name__", `Heizölrückstoßabdämpfung 10 metric with "interesting" {character
choices}`, "strange©™\n'quoted' \"name\"", "6"),
},
}
p := NewOpenMetricsParser([]byte(input))
i := 0
var res labels.Labels
for {
et, err := p.Next()
if errors.Is(err, io.EOF) {
break
}
require.NoError(t, err)
switch et {
case EntrySeries:
m, ts, v := p.Series()
var e exemplar.Exemplar
p.Metric(&res)
found := p.Exemplar(&e)
require.Equal(t, exp[i].m, string(m))
require.Equal(t, exp[i].t, ts)
require.Equal(t, exp[i].v, v)
require.Equal(t, exp[i].lset, res)
if exp[i].e == nil {
require.False(t, found)
} else {
require.True(t, found)
require.Equal(t, *exp[i].e, e)
}
case EntryType:
m, typ := p.Type()
require.Equal(t, exp[i].m, string(m))
require.Equal(t, exp[i].typ, typ)
case EntryHelp:
m, h := p.Help()
require.Equal(t, exp[i].m, string(m))
require.Equal(t, exp[i].help, string(h))
case EntryUnit:
m, u := p.Unit()
require.Equal(t, exp[i].m, string(m))
require.Equal(t, exp[i].unit, string(u))
case EntryComment:
require.Equal(t, exp[i].comment, string(p.Comment()))
}
i++
}
require.Len(t, exp, i)
}
func TestOpenMetricsParseErrors(t *testing.T) { func TestOpenMetricsParseErrors(t *testing.T) {
cases := []struct { cases := []struct {
input string input string
@ -457,17 +588,13 @@ func TestOpenMetricsParseErrors(t *testing.T) {
input: "a{b='c'} 1\n# EOF\n", input: "a{b='c'} 1\n# EOF\n",
err: "expected label value, got \"'\" (\"INVALID\") while parsing: \"a{b='\"", err: "expected label value, got \"'\" (\"INVALID\") while parsing: \"a{b='\"",
}, },
{
input: "a{b=\"c\",} 1\n# EOF\n",
err: "expected label name, got \"} \" (\"BCLOSE\") while parsing: \"a{b=\\\"c\\\",} \"",
},
{ {
input: "a{,b=\"c\"} 1\n# EOF\n", input: "a{,b=\"c\"} 1\n# EOF\n",
err: "expected label name or left brace, got \",b\" (\"COMMA\") while parsing: \"a{,b\"", err: "expected label name, got \",b\" (\"COMMA\") while parsing: \"a{,b\"",
}, },
{ {
input: "a{b=\"c\"d=\"e\"} 1\n# EOF\n", input: "a{b=\"c\"d=\"e\"} 1\n# EOF\n",
err: "expected comma, got \"d=\" (\"LNAME\") while parsing: \"a{b=\\\"c\\\"d=\"", err: "expected comma or brace close, got \"d=\" (\"LNAME\") while parsing: \"a{b=\\\"c\\\"d=\"",
}, },
{ {
input: "a{b=\"c\",,d=\"e\"} 1\n# EOF\n", input: "a{b=\"c\",,d=\"e\"} 1\n# EOF\n",
@ -479,12 +606,24 @@ func TestOpenMetricsParseErrors(t *testing.T) {
}, },
{ {
input: "a{\xff=\"foo\"} 1\n# EOF\n", input: "a{\xff=\"foo\"} 1\n# EOF\n",
err: "expected label name or left brace, got \"\\xff\" (\"INVALID\") while parsing: \"a{\\xff\"", err: "expected label name, got \"\\xff\" (\"INVALID\") while parsing: \"a{\\xff\"",
}, },
{ {
input: "a{b=\"\xff\"} 1\n# EOF\n", input: "a{b=\"\xff\"} 1\n# EOF\n",
err: "invalid UTF-8 label value: \"\\\"\\xff\\\"\"", err: "invalid UTF-8 label value: \"\\\"\\xff\\\"\"",
}, },
{
input: `{"a","b = "c"}
# EOF
`,
err: "expected equal, got \"c\\\"\" (\"LNAME\") while parsing: \"{\\\"a\\\",\\\"b = \\\"c\\\"\"",
},
{
input: `{"a",b\nc="d"} 1
# EOF
`,
err: "expected equal, got \"\\\\\" (\"INVALID\") while parsing: \"{\\\"a\\\",b\\\\\"",
},
{ {
input: "a true\n", input: "a true\n",
err: "strconv.ParseFloat: parsing \"true\": invalid syntax while parsing: \"a true\"", err: "strconv.ParseFloat: parsing \"true\": invalid syntax while parsing: \"a true\"",
@ -495,7 +634,7 @@ func TestOpenMetricsParseErrors(t *testing.T) {
}, },
{ {
input: "empty_label_name{=\"\"} 0\n# EOF\n", input: "empty_label_name{=\"\"} 0\n# EOF\n",
err: "expected label name or left brace, got \"=\\\"\" (\"EQUAL\") while parsing: \"empty_label_name{=\\\"\"", err: "expected label name, got \"=\\\"\" (\"EQUAL\") while parsing: \"empty_label_name{=\\\"\"",
}, },
{ {
input: "foo 1_2\n\n# EOF\n", input: "foo 1_2\n\n# EOF\n",
@ -525,6 +664,14 @@ func TestOpenMetricsParseErrors(t *testing.T) {
input: `custom_metric_total 1 # {aa="bb"}`, input: `custom_metric_total 1 # {aa="bb"}`,
err: "expected value after exemplar labels, got \"}\" (\"EOF\") while parsing: \"custom_metric_total 1 # {aa=\\\"bb\\\"}\"", err: "expected value after exemplar labels, got \"}\" (\"EOF\") while parsing: \"custom_metric_total 1 # {aa=\\\"bb\\\"}\"",
}, },
{
input: `custom_metric_total 1 # {bb}`,
err: "expected label name, got \"}\" (\"BCLOSE\") while parsing: \"custom_metric_total 1 # {bb}\"",
},
{
input: `custom_metric_total 1 # {bb, a="dd"}`,
err: "expected label name, got \", \" (\"COMMA\") while parsing: \"custom_metric_total 1 # {bb, \"",
},
{ {
input: `custom_metric_total 1 # {aa="bb",,cc="dd"} 1`, input: `custom_metric_total 1 # {aa="bb",,cc="dd"} 1`,
err: "expected label name, got \",c\" (\"COMMA\") while parsing: \"custom_metric_total 1 # {aa=\\\"bb\\\",,c\"", err: "expected label name, got \",c\" (\"COMMA\") while parsing: \"custom_metric_total 1 # {aa=\\\"bb\\\",,c\"",
@ -551,7 +698,7 @@ func TestOpenMetricsParseErrors(t *testing.T) {
}, },
{ {
input: `{b="c",} 1`, input: `{b="c",} 1`,
err: "expected a valid start token, got \"{\" (\"INVALID\") while parsing: \"{\"", err: "metric name not set while parsing: \"{b=\\\"c\\\",} 1\"",
}, },
{ {
input: `a 1 NaN`, input: `a 1 NaN`,

View file

@ -66,12 +66,15 @@ C [^\n]
# return l.consumeComment() # return l.consumeComment()
<sComment>HELP[\t ]+ l.state = sMeta1; return tHelp <sComment>HELP[\t ]+ l.state = sMeta1; return tHelp
<sComment>TYPE[\t ]+ l.state = sMeta1; return tType <sComment>TYPE[\t ]+ l.state = sMeta1; return tType
<sMeta1>\"(\\.|[^\\"])*\" l.state = sMeta2; return tMName
<sMeta1>{M}({M}|{D})* l.state = sMeta2; return tMName <sMeta1>{M}({M}|{D})* l.state = sMeta2; return tMName
<sMeta2>{C}* l.state = sInit; return tText <sMeta2>{C}* l.state = sInit; return tText
{M}({M}|{D})* l.state = sValue; return tMName {M}({M}|{D})* l.state = sValue; return tMName
<sValue>\{ l.state = sLabels; return tBraceOpen <sValue>\{ l.state = sLabels; return tBraceOpen
\{ l.state = sLabels; return tBraceOpen
<sLabels>{L}({L}|{D})* return tLName <sLabels>{L}({L}|{D})* return tLName
<sLabels>\"(\\.|[^\\"])*\" l.state = sLabels; return tQString
<sLabels>\} l.state = sValue; return tBraceClose <sLabels>\} l.state = sValue; return tBraceClose
<sLabels>= l.state = sLValue; return tEqual <sLabels>= l.state = sLValue; return tEqual
<sLabels>, return tComma <sLabels>, return tComma

View file

@ -51,19 +51,19 @@ yystate0:
case 0: // start condition: INITIAL case 0: // start condition: INITIAL
goto yystart1 goto yystart1
case 1: // start condition: sComment case 1: // start condition: sComment
goto yystart8 goto yystart9
case 2: // start condition: sMeta1 case 2: // start condition: sMeta1
goto yystart19 goto yystart20
case 3: // start condition: sMeta2 case 3: // start condition: sMeta2
goto yystart21 goto yystart25
case 4: // start condition: sLabels case 4: // start condition: sLabels
goto yystart24 goto yystart28
case 5: // start condition: sLValue case 5: // start condition: sLValue
goto yystart29
case 6: // start condition: sValue
goto yystart33
case 7: // start condition: sTimestamp
goto yystart36 goto yystart36
case 6: // start condition: sValue
goto yystart40
case 7: // start condition: sTimestamp
goto yystart43
} }
yystate1: yystate1:
@ -82,6 +82,8 @@ yystart1:
goto yystate3 goto yystate3
case c == '\x00': case c == '\x00':
goto yystate2 goto yystate2
case c == '{':
goto yystate8
} }
yystate2: yystate2:
@ -123,40 +125,35 @@ yystate7:
c = l.next() c = l.next()
switch { switch {
default: default:
goto yyrule10 goto yyrule11
case c >= '0' && c <= ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': case c >= '0' && c <= ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z':
goto yystate7 goto yystate7
} }
yystate8: yystate8:
c = l.next() c = l.next()
yystart8: goto yyrule13
yystate9:
c = l.next()
yystart9:
switch { switch {
default: default:
goto yyabort goto yyabort
case c == 'H': case c == 'H':
goto yystate9 goto yystate10
case c == 'T': case c == 'T':
goto yystate14 goto yystate15
case c == '\t' || c == ' ': case c == '\t' || c == ' ':
goto yystate3 goto yystate3
} }
yystate9:
c = l.next()
switch {
default:
goto yyabort
case c == 'E':
goto yystate10
}
yystate10: yystate10:
c = l.next() c = l.next()
switch { switch {
default: default:
goto yyabort goto yyabort
case c == 'L': case c == 'E':
goto yystate11 goto yystate11
} }
@ -165,7 +162,7 @@ yystate11:
switch { switch {
default: default:
goto yyabort goto yyabort
case c == 'P': case c == 'L':
goto yystate12 goto yystate12
} }
@ -174,7 +171,7 @@ yystate12:
switch { switch {
default: default:
goto yyabort goto yyabort
case c == '\t' || c == ' ': case c == 'P':
goto yystate13 goto yystate13
} }
@ -182,18 +179,18 @@ yystate13:
c = l.next() c = l.next()
switch { switch {
default: default:
goto yyrule6 goto yyabort
case c == '\t' || c == ' ': case c == '\t' || c == ' ':
goto yystate13 goto yystate14
} }
yystate14: yystate14:
c = l.next() c = l.next()
switch { switch {
default: default:
goto yyabort goto yyrule6
case c == 'Y': case c == '\t' || c == ' ':
goto yystate15 goto yystate14
} }
yystate15: yystate15:
@ -201,7 +198,7 @@ yystate15:
switch { switch {
default: default:
goto yyabort goto yyabort
case c == 'P': case c == 'Y':
goto yystate16 goto yystate16
} }
@ -210,7 +207,7 @@ yystate16:
switch { switch {
default: default:
goto yyabort goto yyabort
case c == 'E': case c == 'P':
goto yystate17 goto yystate17
} }
@ -219,7 +216,7 @@ yystate17:
switch { switch {
default: default:
goto yyabort goto yyabort
case c == '\t' || c == ' ': case c == 'E':
goto yystate18 goto yystate18
} }
@ -227,167 +224,167 @@ yystate18:
c = l.next() c = l.next()
switch { switch {
default: default:
goto yyrule7 goto yyabort
case c == '\t' || c == ' ': case c == '\t' || c == ' ':
goto yystate18 goto yystate19
} }
yystate19: yystate19:
c = l.next() c = l.next()
yystart19:
switch { switch {
default: default:
goto yyabort goto yyrule7
case c == ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z':
goto yystate20
case c == '\t' || c == ' ': case c == '\t' || c == ' ':
goto yystate3 goto yystate19
} }
yystate20: yystate20:
c = l.next() c = l.next()
yystart20:
switch { switch {
default: default:
goto yyrule8 goto yyabort
case c >= '0' && c <= ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': case c == '"':
goto yystate20 goto yystate21
case c == ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z':
goto yystate24
case c == '\t' || c == ' ':
goto yystate3
} }
yystate21: yystate21:
c = l.next() c = l.next()
yystart21:
switch { switch {
default: default:
goto yyrule9 goto yyabort
case c == '\t' || c == ' ': case c == '"':
goto yystate23
case c >= '\x01' && c <= '\b' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ':
goto yystate22 goto yystate22
case c == '\\':
goto yystate23
case c >= '\x01' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ':
goto yystate21
} }
yystate22: yystate22:
c = l.next() c = l.next()
switch { goto yyrule8
default:
goto yyrule9
case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ':
goto yystate22
}
yystate23: yystate23:
c = l.next() c = l.next()
switch { switch {
default: default:
goto yyrule3 goto yyabort
case c == '\t' || c == ' ': case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ':
goto yystate23 goto yystate21
case c >= '\x01' && c <= '\b' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ':
goto yystate22
} }
yystate24: yystate24:
c = l.next() c = l.next()
yystart24:
switch { switch {
default: default:
goto yyabort goto yyrule9
case c == ',': case c >= '0' && c <= ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z':
goto yystate25 goto yystate24
case c == '=':
goto yystate26
case c == '\t' || c == ' ':
goto yystate3
case c == '}':
goto yystate28
case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z':
goto yystate27
} }
yystate25: yystate25:
c = l.next() c = l.next()
goto yyrule15 yystart25:
switch {
default:
goto yyrule10
case c == '\t' || c == ' ':
goto yystate27
case c >= '\x01' && c <= '\b' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ':
goto yystate26
}
yystate26: yystate26:
c = l.next() c = l.next()
goto yyrule14 switch {
default:
goto yyrule10
case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ':
goto yystate26
}
yystate27: yystate27:
c = l.next() c = l.next()
switch { switch {
default: default:
goto yyrule12 goto yyrule3
case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': case c == '\t' || c == ' ':
goto yystate27 goto yystate27
case c >= '\x01' && c <= '\b' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ':
goto yystate26
} }
yystate28: yystate28:
c = l.next() c = l.next()
goto yyrule13 yystart28:
switch {
default:
goto yyabort
case c == '"':
goto yystate29
case c == ',':
goto yystate32
case c == '=':
goto yystate33
case c == '\t' || c == ' ':
goto yystate3
case c == '}':
goto yystate35
case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z':
goto yystate34
}
yystate29: yystate29:
c = l.next() c = l.next()
yystart29:
switch { switch {
default: default:
goto yyabort goto yyabort
case c == '"': case c == '"':
goto yystate30 goto yystate30
case c == '\t' || c == ' ': case c == '\\':
goto yystate3 goto yystate31
case c >= '\x01' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ':
goto yystate29
} }
yystate30: yystate30:
c = l.next() c = l.next()
switch { goto yyrule15
default:
goto yyabort
case c == '"':
goto yystate31
case c == '\\':
goto yystate32
case c >= '\x01' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ':
goto yystate30
}
yystate31: yystate31:
c = l.next()
goto yyrule16
yystate32:
c = l.next() c = l.next()
switch { switch {
default: default:
goto yyabort goto yyabort
case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ':
goto yystate30 goto yystate29
} }
yystate32:
c = l.next()
goto yyrule18
yystate33: yystate33:
c = l.next() c = l.next()
yystart33: goto yyrule17
switch {
default:
goto yyabort
case c == '\t' || c == ' ':
goto yystate3
case c == '{':
goto yystate35
case c >= '\x01' && c <= '\b' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'z' || c >= '|' && c <= 'ÿ':
goto yystate34
}
yystate34: yystate34:
c = l.next() c = l.next()
switch { switch {
default: default:
goto yyrule17 goto yyrule14
case c >= '\x01' && c <= '\b' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'z' || c >= '|' && c <= 'ÿ': case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z':
goto yystate34 goto yystate34
} }
yystate35: yystate35:
c = l.next() c = l.next()
goto yyrule11 goto yyrule16
yystate36: yystate36:
c = l.next() c = l.next()
@ -395,25 +392,90 @@ yystart36:
switch { switch {
default: default:
goto yyabort goto yyabort
case c == '\n': case c == '"':
goto yystate37 goto yystate37
case c == '\t' || c == ' ': case c == '\t' || c == ' ':
goto yystate3 goto yystate3
case c >= '0' && c <= '9':
goto yystate38
} }
yystate37: yystate37:
c = l.next() c = l.next()
goto yyrule19 switch {
default:
goto yyabort
case c == '"':
goto yystate38
case c == '\\':
goto yystate39
case c >= '\x01' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ':
goto yystate37
}
yystate38: yystate38:
c = l.next()
goto yyrule19
yystate39:
c = l.next() c = l.next()
switch { switch {
default: default:
goto yyrule18 goto yyabort
case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ':
goto yystate37
}
yystate40:
c = l.next()
yystart40:
switch {
default:
goto yyabort
case c == '\t' || c == ' ':
goto yystate3
case c == '{':
goto yystate42
case c >= '\x01' && c <= '\b' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'z' || c >= '|' && c <= 'ÿ':
goto yystate41
}
yystate41:
c = l.next()
switch {
default:
goto yyrule20
case c >= '\x01' && c <= '\b' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'z' || c >= '|' && c <= 'ÿ':
goto yystate41
}
yystate42:
c = l.next()
goto yyrule12
yystate43:
c = l.next()
yystart43:
switch {
default:
goto yyabort
case c == '\n':
goto yystate44
case c == '\t' || c == ' ':
goto yystate3
case c >= '0' && c <= '9': case c >= '0' && c <= '9':
goto yystate38 goto yystate45
}
yystate44:
c = l.next()
goto yyrule22
yystate45:
c = l.next()
switch {
default:
goto yyrule21
case c >= '0' && c <= '9':
goto yystate45
} }
yyrule1: // \0 yyrule1: // \0
@ -451,67 +513,85 @@ yyrule7: // TYPE[\t ]+
return tType return tType
goto yystate0 goto yystate0
} }
yyrule8: // {M}({M}|{D})* yyrule8: // \"(\\.|[^\\"])*\"
{ {
l.state = sMeta2 l.state = sMeta2
return tMName return tMName
goto yystate0 goto yystate0
} }
yyrule9: // {C}* yyrule9: // {M}({M}|{D})*
{
l.state = sMeta2
return tMName
goto yystate0
}
yyrule10: // {C}*
{ {
l.state = sInit l.state = sInit
return tText return tText
goto yystate0 goto yystate0
} }
yyrule10: // {M}({M}|{D})* yyrule11: // {M}({M}|{D})*
{ {
l.state = sValue l.state = sValue
return tMName return tMName
goto yystate0 goto yystate0
} }
yyrule11: // \{ yyrule12: // \{
{ {
l.state = sLabels l.state = sLabels
return tBraceOpen return tBraceOpen
goto yystate0 goto yystate0
} }
yyrule12: // {L}({L}|{D})* yyrule13: // \{
{
l.state = sLabels
return tBraceOpen
goto yystate0
}
yyrule14: // {L}({L}|{D})*
{ {
return tLName return tLName
} }
yyrule13: // \} yyrule15: // \"(\\.|[^\\"])*\"
{
l.state = sLabels
return tQString
goto yystate0
}
yyrule16: // \}
{ {
l.state = sValue l.state = sValue
return tBraceClose return tBraceClose
goto yystate0 goto yystate0
} }
yyrule14: // = yyrule17: // =
{ {
l.state = sLValue l.state = sLValue
return tEqual return tEqual
goto yystate0 goto yystate0
} }
yyrule15: // , yyrule18: // ,
{ {
return tComma return tComma
} }
yyrule16: // \"(\\.|[^\\"])*\" yyrule19: // \"(\\.|[^\\"])*\"
{ {
l.state = sLabels l.state = sLabels
return tLValue return tLValue
goto yystate0 goto yystate0
} }
yyrule17: // [^{ \t\n]+ yyrule20: // [^{ \t\n]+
{ {
l.state = sTimestamp l.state = sTimestamp
return tValue return tValue
goto yystate0 goto yystate0
} }
yyrule18: // {D}+ yyrule21: // {D}+
{ {
return tTimestamp return tTimestamp
} }
yyrule19: // \n yyrule22: // \n
if true { // avoid go vet determining the below panic will not be reached if true { // avoid go vet determining the below panic will not be reached
l.state = sInit l.state = sInit
return tLinebreak return tLinebreak
@ -520,9 +600,7 @@ yyrule19: // \n
panic("unreachable") panic("unreachable")
yyabort: // no lexem recognized yyabort: // no lexem recognized
//
// silence unused label errors for build and satisfy go vet reachability analysis // silence unused label errors for build and satisfy go vet reachability analysis
//
{ {
if false { if false {
goto yyabort goto yyabort
@ -534,26 +612,26 @@ yyabort: // no lexem recognized
goto yystate1 goto yystate1
} }
if false { if false {
goto yystate8 goto yystate9
} }
if false { if false {
goto yystate19 goto yystate20
} }
if false { if false {
goto yystate21 goto yystate25
} }
if false { if false {
goto yystate24 goto yystate28
}
if false {
goto yystate29
}
if false {
goto yystate33
} }
if false { if false {
goto yystate36 goto yystate36
} }
if false {
goto yystate40
}
if false {
goto yystate43
}
} }
// Workaround to gobble up comments that started with a HELP or TYPE // Workaround to gobble up comments that started with a HELP or TYPE

View file

@ -57,6 +57,7 @@ const (
tComment tComment
tBlank tBlank
tMName tMName
tQString
tBraceOpen tBraceOpen
tBraceClose tBraceClose
tLName tLName
@ -93,6 +94,8 @@ func (t token) String() string {
return "BLANK" return "BLANK"
case tMName: case tMName:
return "MNAME" return "MNAME"
case tQString:
return "QSTRING"
case tBraceOpen: case tBraceOpen:
return "BOPEN" return "BOPEN"
case tBraceClose: case tBraceClose:
@ -153,6 +156,12 @@ type PromParser struct {
ts int64 ts int64
hasTS bool hasTS bool
start int start int
// offsets is a list of offsets into series that describe the positions
// of the metric name and label names and values for this series.
// p.offsets[0] is the start character of the metric name.
// p.offsets[1] is the end of the metric name.
// Subsequently, p.offsets is a pair of pair of offsets for the positions
// of the label name and value start and end characters.
offsets []int offsets []int
} }
@ -218,20 +227,17 @@ func (p *PromParser) Metric(l *labels.Labels) string {
s := string(p.series) s := string(p.series)
p.builder.Reset() p.builder.Reset()
p.builder.Add(labels.MetricName, s[:p.offsets[0]-p.start]) metricName := unreplace(s[p.offsets[0]-p.start : p.offsets[1]-p.start])
p.builder.Add(labels.MetricName, metricName)
for i := 1; i < len(p.offsets); i += 4 { for i := 2; i < len(p.offsets); i += 4 {
a := p.offsets[i] - p.start a := p.offsets[i] - p.start
b := p.offsets[i+1] - p.start b := p.offsets[i+1] - p.start
label := unreplace(s[a:b])
c := p.offsets[i+2] - p.start c := p.offsets[i+2] - p.start
d := p.offsets[i+3] - p.start d := p.offsets[i+3] - p.start
value := unreplace(s[c:d])
value := s[c:d] p.builder.Add(label, value)
// Replacer causes allocations. Replace only when necessary.
if strings.IndexByte(s[c:d], byte('\\')) >= 0 {
value = lvalReplacer.Replace(value)
}
p.builder.Add(s[a:b], value)
} }
p.builder.Sort() p.builder.Sort()
@ -289,7 +295,13 @@ func (p *PromParser) Next() (Entry, error) {
case tHelp, tType: case tHelp, tType:
switch t2 := p.nextToken(); t2 { switch t2 := p.nextToken(); t2 {
case tMName: case tMName:
p.offsets = append(p.offsets, p.l.start, p.l.i) mStart := p.l.start
mEnd := p.l.i
if p.l.b[mStart] == '"' && p.l.b[mEnd-1] == '"' {
mStart++
mEnd--
}
p.offsets = append(p.offsets, mStart, mEnd)
default: default:
return EntryInvalid, p.parseError("expected metric name after "+t.String(), t2) return EntryInvalid, p.parseError("expected metric name after "+t.String(), t2)
} }
@ -301,7 +313,7 @@ func (p *PromParser) Next() (Entry, error) {
p.text = []byte{} p.text = []byte{}
} }
default: default:
return EntryInvalid, fmt.Errorf("expected text in %s", t.String()) return EntryInvalid, fmt.Errorf("expected text in %s, got %v", t.String(), t2.String())
} }
switch t { switch t {
case tType: case tType:
@ -339,12 +351,24 @@ func (p *PromParser) Next() (Entry, error) {
return EntryInvalid, p.parseError("linebreak expected after comment", t) return EntryInvalid, p.parseError("linebreak expected after comment", t)
} }
return EntryComment, nil return EntryComment, nil
case tBraceOpen:
// We found a brace, so make room for the eventual metric name. If these
// values aren't updated, then the metric name was not set inside the
// braces and we can return an error.
if len(p.offsets) == 0 {
p.offsets = []int{-1, -1}
}
if err := p.parseLVals(); err != nil {
return EntryInvalid, err
}
case tMName:
p.offsets = append(p.offsets, p.l.i)
p.series = p.l.b[p.start:p.l.i] p.series = p.l.b[p.start:p.l.i]
return p.parseMetricSuffix(p.nextToken())
case tMName:
p.offsets = append(p.offsets, p.start, p.l.i)
p.series = p.l.b[p.start:p.l.i]
t2 := p.nextToken() t2 := p.nextToken()
// If there's a brace, consume and parse the label values.
if t2 == tBraceOpen { if t2 == tBraceOpen {
if err := p.parseLVals(); err != nil { if err := p.parseLVals(); err != nil {
return EntryInvalid, err return EntryInvalid, err
@ -352,9 +376,83 @@ func (p *PromParser) Next() (Entry, error) {
p.series = p.l.b[p.start:p.l.i] p.series = p.l.b[p.start:p.l.i]
t2 = p.nextToken() t2 = p.nextToken()
} }
if t2 != tValue { return p.parseMetricSuffix(t2)
return EntryInvalid, p.parseError("expected value after metric", t2)
default:
err = p.parseError("expected a valid start token", t)
} }
return EntryInvalid, err
}
// parseLVals parses the contents inside the braces.
func (p *PromParser) parseLVals() error {
t := p.nextToken()
for {
curTStart := p.l.start
curTI := p.l.i
switch t {
case tBraceClose:
return nil
case tLName:
case tQString:
default:
return p.parseError("expected label name", t)
}
t = p.nextToken()
// A quoted string followed by a comma or brace is a metric name. Set the
// offsets and continue processing.
if t == tComma || t == tBraceClose {
if p.offsets[0] != -1 || p.offsets[1] != -1 {
return fmt.Errorf("metric name already set while parsing: %q", p.l.b[p.start:p.l.i])
}
p.offsets[0] = curTStart + 1
p.offsets[1] = curTI - 1
if t == tBraceClose {
return nil
}
t = p.nextToken()
continue
}
// We have a label name, and it might be quoted.
if p.l.b[curTStart] == '"' {
curTStart++
curTI--
}
p.offsets = append(p.offsets, curTStart, curTI)
if t != tEqual {
return p.parseError("expected equal", t)
}
if t := p.nextToken(); t != tLValue {
return p.parseError("expected label value", t)
}
if !utf8.Valid(p.l.buf()) {
return fmt.Errorf("invalid UTF-8 label value: %q", p.l.buf())
}
// The promlexer ensures the value string is quoted. Strip first
// and last character.
p.offsets = append(p.offsets, p.l.start+1, p.l.i-1)
// Free trailing commas are allowed. NOTE: this allows spaces between label
// names, unlike in OpenMetrics. It is not clear if this is intended or an
// accidental bug.
if t = p.nextToken(); t == tComma {
t = p.nextToken()
}
}
}
// parseMetricSuffix parses the end of the line after the metric name and
// labels. It starts parsing with the provided token.
func (p *PromParser) parseMetricSuffix(t token) (Entry, error) {
if p.offsets[0] == -1 {
return EntryInvalid, fmt.Errorf("metric name not set while parsing: %q", p.l.b[p.start:p.l.i])
}
if t != tValue {
return EntryInvalid, p.parseError("expected value after metric", t)
}
var err error
if p.val, err = parseFloat(yoloString(p.l.buf())); err != nil { if p.val, err = parseFloat(yoloString(p.l.buf())); err != nil {
return EntryInvalid, fmt.Errorf("%w while parsing: %q", err, p.l.b[p.start:p.l.i]) return EntryInvalid, fmt.Errorf("%w while parsing: %q", err, p.l.b[p.start:p.l.i])
} }
@ -377,45 +475,8 @@ func (p *PromParser) Next() (Entry, error) {
default: default:
return EntryInvalid, p.parseError("expected timestamp or new record", t) return EntryInvalid, p.parseError("expected timestamp or new record", t)
} }
return EntrySeries, nil return EntrySeries, nil
default:
err = p.parseError("expected a valid start token", t)
}
return EntryInvalid, err
}
func (p *PromParser) parseLVals() error {
t := p.nextToken()
for {
switch t {
case tBraceClose:
return nil
case tLName:
default:
return p.parseError("expected label name", t)
}
p.offsets = append(p.offsets, p.l.start, p.l.i)
if t := p.nextToken(); t != tEqual {
return p.parseError("expected equal", t)
}
if t := p.nextToken(); t != tLValue {
return p.parseError("expected label value", t)
}
if !utf8.Valid(p.l.buf()) {
return fmt.Errorf("invalid UTF-8 label value: %q", p.l.buf())
}
// The promlexer ensures the value string is quoted. Strip first
// and last character.
p.offsets = append(p.offsets, p.l.start+1, p.l.i-1)
// Free trailing commas are allowed.
if t = p.nextToken(); t == tComma {
t = p.nextToken()
}
}
} }
var lvalReplacer = strings.NewReplacer( var lvalReplacer = strings.NewReplacer(
@ -429,6 +490,14 @@ var helpReplacer = strings.NewReplacer(
`\n`, "\n", `\n`, "\n",
) )
func unreplace(s string) string {
// Replacer causes allocations. Replace only when necessary.
if strings.IndexByte(s, byte('\\')) >= 0 {
return lvalReplacer.Replace(s)
}
return s
}
func yoloString(b []byte) string { func yoloString(b []byte) string {
return *((*string)(unsafe.Pointer(&b))) return *((*string)(unsafe.Pointer(&b)))
} }

View file

@ -48,6 +48,7 @@ go_gc_duration_seconds{ quantile="1.0", a="b" } 8.3835e-05
go_gc_duration_seconds { quantile="1.0", a="b" } 8.3835e-05 go_gc_duration_seconds { quantile="1.0", a="b" } 8.3835e-05
go_gc_duration_seconds { quantile= "1.0", a= "b", } 8.3835e-05 go_gc_duration_seconds { quantile= "1.0", a= "b", } 8.3835e-05
go_gc_duration_seconds { quantile = "1.0", a = "b" } 8.3835e-05 go_gc_duration_seconds { quantile = "1.0", a = "b" } 8.3835e-05
go_gc_duration_seconds { quantile = "2.0" a = "b" } 8.3835e-05
go_gc_duration_seconds_count 99 go_gc_duration_seconds_count 99
some:aggregate:rate5m{a_b="c"} 1 some:aggregate:rate5m{a_b="c"} 1
# HELP go_goroutines Number of goroutines that currently exist. # HELP go_goroutines Number of goroutines that currently exist.
@ -130,6 +131,11 @@ testmetric{label="\"bar\""} 1`
m: `go_gc_duration_seconds { quantile = "1.0", a = "b" }`, m: `go_gc_duration_seconds { quantile = "1.0", a = "b" }`,
v: 8.3835e-05, v: 8.3835e-05,
lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "1.0", "a", "b"), lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "1.0", "a", "b"),
}, {
// NOTE: Unlike OpenMetrics, Promparse allows spaces between label terms. This appears to be unintended and should probably be fixed.
m: `go_gc_duration_seconds { quantile = "2.0" a = "b" }`,
v: 8.3835e-05,
lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "2.0", "a", "b"),
}, { }, {
m: `go_gc_duration_seconds_count`, m: `go_gc_duration_seconds_count`,
v: 99, v: 99,
@ -213,6 +219,132 @@ testmetric{label="\"bar\""} 1`
require.Len(t, exp, i) require.Len(t, exp, i)
} }
func TestUTF8PromParse(t *testing.T) {
oldValidationScheme := model.NameValidationScheme
model.NameValidationScheme = model.UTF8Validation
defer func() {
model.NameValidationScheme = oldValidationScheme
}()
input := `# HELP "go.gc_duration_seconds" A summary of the GC invocation durations.
# TYPE "go.gc_duration_seconds" summary
{"go.gc_duration_seconds",quantile="0"} 4.9351e-05
{"go.gc_duration_seconds",quantile="0.25",} 7.424100000000001e-05
{"go.gc_duration_seconds",quantile="0.5",a="b"} 8.3835e-05
{"go.gc_duration_seconds",quantile="0.8", a="b"} 8.3835e-05
{"go.gc_duration_seconds", quantile="0.9", a="b"} 8.3835e-05
{"go.gc_duration_seconds", quantile="1.0", a="b" } 8.3835e-05
{ "go.gc_duration_seconds", quantile="1.0", a="b" } 8.3835e-05
{ "go.gc_duration_seconds", quantile= "1.0", a= "b", } 8.3835e-05
{ "go.gc_duration_seconds", quantile = "1.0", a = "b" } 8.3835e-05
{"go.gc_duration_seconds_count"} 99
{"Heizölrückstoßabdämpfung 10€ metric with \"interesting\" {character\nchoices}","strange©™\n'quoted' \"name\""="6"} 10.0`
exp := []struct {
lset labels.Labels
m string
t *int64
v float64
typ model.MetricType
help string
comment string
}{
{
m: "go.gc_duration_seconds",
help: "A summary of the GC invocation durations.",
}, {
m: "go.gc_duration_seconds",
typ: model.MetricTypeSummary,
}, {
m: `{"go.gc_duration_seconds",quantile="0"}`,
v: 4.9351e-05,
lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0"),
}, {
m: `{"go.gc_duration_seconds",quantile="0.25",}`,
v: 7.424100000000001e-05,
lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.25"),
}, {
m: `{"go.gc_duration_seconds",quantile="0.5",a="b"}`,
v: 8.3835e-05,
lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.5", "a", "b"),
}, {
m: `{"go.gc_duration_seconds",quantile="0.8", a="b"}`,
v: 8.3835e-05,
lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.8", "a", "b"),
}, {
m: `{"go.gc_duration_seconds", quantile="0.9", a="b"}`,
v: 8.3835e-05,
lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.9", "a", "b"),
}, {
m: `{"go.gc_duration_seconds", quantile="1.0", a="b" }`,
v: 8.3835e-05,
lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "1.0", "a", "b"),
}, {
m: `{ "go.gc_duration_seconds", quantile="1.0", a="b" }`,
v: 8.3835e-05,
lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "1.0", "a", "b"),
}, {
m: `{ "go.gc_duration_seconds", quantile= "1.0", a= "b", }`,
v: 8.3835e-05,
lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "1.0", "a", "b"),
}, {
m: `{ "go.gc_duration_seconds", quantile = "1.0", a = "b" }`,
v: 8.3835e-05,
lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "1.0", "a", "b"),
}, {
m: `{"go.gc_duration_seconds_count"}`,
v: 99,
lset: labels.FromStrings("__name__", "go.gc_duration_seconds_count"),
}, {
m: `{"Heizölrückstoßabdämpfung 10€ metric with \"interesting\" {character\nchoices}","strange©™\n'quoted' \"name\""="6"}`,
v: 10.0,
lset: labels.FromStrings("__name__", `Heizölrückstoßabdämpfung 10 metric with "interesting" {character
choices}`, "strange©™\n'quoted' \"name\"", "6"),
},
}
p := NewPromParser([]byte(input))
i := 0
var res labels.Labels
for {
et, err := p.Next()
if errors.Is(err, io.EOF) {
break
}
require.NoError(t, err)
switch et {
case EntrySeries:
m, ts, v := p.Series()
p.Metric(&res)
require.Equal(t, exp[i].m, string(m))
require.Equal(t, exp[i].t, ts)
require.Equal(t, exp[i].v, v)
require.Equal(t, exp[i].lset, res)
case EntryType:
m, typ := p.Type()
require.Equal(t, exp[i].m, string(m))
require.Equal(t, exp[i].typ, typ)
case EntryHelp:
m, h := p.Help()
require.Equal(t, exp[i].m, string(m))
require.Equal(t, exp[i].help, string(h))
case EntryComment:
require.Equal(t, exp[i].comment, string(p.Comment()))
}
i++
}
require.Len(t, exp, i)
}
func TestPromParseErrors(t *testing.T) { func TestPromParseErrors(t *testing.T) {
cases := []struct { cases := []struct {
input string input string
@ -238,6 +370,14 @@ func TestPromParseErrors(t *testing.T) {
input: "a{b=\"\xff\"} 1\n", input: "a{b=\"\xff\"} 1\n",
err: "invalid UTF-8 label value: \"\\\"\\xff\\\"\"", err: "invalid UTF-8 label value: \"\\\"\\xff\\\"\"",
}, },
{
input: `{"a", "b = "c"}`,
err: "expected equal, got \"c\\\"\" (\"LNAME\") while parsing: \"{\\\"a\\\", \\\"b = \\\"c\\\"\"",
},
{
input: `{"a",b\nc="d"} 1`,
err: "expected equal, got \"\\\\\" (\"INVALID\") while parsing: \"{\\\"a\\\",b\\\\\"",
},
{ {
input: "a true\n", input: "a true\n",
err: "strconv.ParseFloat: parsing \"true\": invalid syntax while parsing: \"a true\"", err: "strconv.ParseFloat: parsing \"true\": invalid syntax while parsing: \"a true\"",
@ -268,7 +408,7 @@ func TestPromParseErrors(t *testing.T) {
}, },
{ {
input: `{a="ok"} 1`, input: `{a="ok"} 1`,
err: "expected a valid start token, got \"{\" (\"INVALID\") while parsing: \"{\"", err: "metric name not set while parsing: \"{a=\\\"ok\\\"} 1\"",
}, },
{ {
input: "# TYPE #\n#EOF\n", input: "# TYPE #\n#EOF\n",

View file

@ -161,7 +161,7 @@ START_METRIC_SELECTOR
// Type definitions for grammar rules. // Type definitions for grammar rules.
%type <matchers> label_match_list %type <matchers> label_match_list
%type <matcher> label_matcher %type <matcher> label_matcher
%type <item> aggregate_op grouping_label match_op maybe_label metric_identifier unary_op at_modifier_preprocessors %type <item> aggregate_op grouping_label match_op maybe_label metric_identifier unary_op at_modifier_preprocessors string_identifier
%type <labels> label_set metric %type <labels> label_set metric
%type <lblList> label_set_list %type <lblList> label_set_list
%type <label> label_set_item %type <label> label_set_item
@ -583,6 +583,12 @@ label_match_list: label_match_list COMMA label_matcher
label_matcher : IDENTIFIER match_op STRING label_matcher : IDENTIFIER match_op STRING
{ $$ = yylex.(*parser).newLabelMatcher($1, $2, $3); } { $$ = yylex.(*parser).newLabelMatcher($1, $2, $3); }
| string_identifier match_op STRING
{ $$ = yylex.(*parser).newLabelMatcher($1, $2, $3); }
| string_identifier
{ $$ = yylex.(*parser).newMetricNameMatcher($1); }
| string_identifier match_op error
{ yylex.(*parser).unexpected("label matching", "string"); $$ = nil}
| IDENTIFIER match_op error | IDENTIFIER match_op error
{ yylex.(*parser).unexpected("label matching", "string"); $$ = nil} { yylex.(*parser).unexpected("label matching", "string"); $$ = nil}
| IDENTIFIER error | IDENTIFIER error
@ -903,6 +909,16 @@ string_literal : STRING
} }
; ;
string_identifier : STRING
{
$$ = Item{
Typ: METRIC_IDENTIFIER,
Pos: $1.PositionRange().Start,
Val: yylex.(*parser).unquoteString($1.Val),
}
}
;
/* /*
* Wrappers for optional arguments. * Wrappers for optional arguments.
*/ */

File diff suppressed because it is too large Load diff

View file

@ -417,6 +417,8 @@ func (p *parser) newBinaryExpression(lhs Node, op Item, modifiers, rhs Node) *Bi
} }
func (p *parser) assembleVectorSelector(vs *VectorSelector) { func (p *parser) assembleVectorSelector(vs *VectorSelector) {
// If the metric name was set outside the braces, add a matcher for it.
// If the metric name was inside the braces we don't need to do anything.
if vs.Name != "" { if vs.Name != "" {
nameMatcher, err := labels.NewMatcher(labels.MatchEqual, labels.MetricName, vs.Name) nameMatcher, err := labels.NewMatcher(labels.MatchEqual, labels.MetricName, vs.Name)
if err != nil { if err != nil {
@ -789,6 +791,18 @@ func (p *parser) checkAST(node Node) (typ ValueType) {
// Skip the check for non-empty matchers because an explicit // Skip the check for non-empty matchers because an explicit
// metric name is a non-empty matcher. // metric name is a non-empty matcher.
break break
} else {
// We also have to make sure a metric name was not set twice inside the
// braces.
foundMetricName := ""
for _, m := range n.LabelMatchers {
if m != nil && m.Name == labels.MetricName {
if foundMetricName != "" {
p.addParseErrf(n.PositionRange(), "metric name must not be set twice: %q or %q", foundMetricName, m.Value)
}
foundMetricName = m.Value
}
}
} }
// A Vector selector must contain at least one non-empty matcher to prevent // A Vector selector must contain at least one non-empty matcher to prevent
@ -872,6 +886,15 @@ func (p *parser) newLabelMatcher(label, operator, value Item) *labels.Matcher {
return m return m
} }
func (p *parser) newMetricNameMatcher(value Item) *labels.Matcher {
m, err := labels.NewMatcher(labels.MatchEqual, labels.MetricName, value.Val)
if err != nil {
p.addParseErr(value.PositionRange(), err)
}
return m
}
// addOffset is used to set the offset in the generated parser. // addOffset is used to set the offset in the generated parser.
func (p *parser) addOffset(e Node, offset time.Duration) { func (p *parser) addOffset(e Node, offset time.Duration) {
var orgoffsetp *time.Duration var orgoffsetp *time.Duration

View file

@ -474,6 +474,22 @@ var testExpr = []struct {
StartPos: 1, StartPos: 1,
}, },
}, },
{
input: ` +{"some_metric"}`,
expected: &UnaryExpr{
Op: ADD,
Expr: &VectorSelector{
LabelMatchers: []*labels.Matcher{
MustLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "some_metric"),
},
PosRange: posrange.PositionRange{
Start: 2,
End: 17,
},
},
StartPos: 1,
},
},
{ {
input: "", input: "",
fail: true, fail: true,
@ -1702,6 +1718,33 @@ var testExpr = []struct {
}, },
}, },
}, },
{
input: `{"foo"}`,
expected: &VectorSelector{
// When a metric is named inside the braces, the Name field is not set.
LabelMatchers: []*labels.Matcher{
MustLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "foo"),
},
PosRange: posrange.PositionRange{
Start: 0,
End: 7,
},
},
},
{
input: `{"foo", a="bc"}`,
expected: &VectorSelector{
// When a metric is named inside the braces, the Name field is not set.
LabelMatchers: []*labels.Matcher{
MustLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "foo"),
MustLabelMatcher(labels.MatchEqual, "a", "bc"),
},
PosRange: posrange.PositionRange{
Start: 0,
End: 15,
},
},
},
{ {
input: `foo{NaN='bc'}`, input: `foo{NaN='bc'}`,
expected: &VectorSelector{ expected: &VectorSelector{
@ -1747,6 +1790,23 @@ var testExpr = []struct {
}, },
}, },
}, },
{
// Metric name in the middle of selector list is fine.
input: `{a="b", foo!="bar", "foo", test=~"test", bar!~"baz"}`,
expected: &VectorSelector{
LabelMatchers: []*labels.Matcher{
MustLabelMatcher(labels.MatchEqual, "a", "b"),
MustLabelMatcher(labels.MatchNotEqual, "foo", "bar"),
MustLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "foo"),
MustLabelMatcher(labels.MatchRegexp, "test", "test"),
MustLabelMatcher(labels.MatchNotRegexp, "bar", "baz"),
},
PosRange: posrange.PositionRange{
Start: 0,
End: 52,
},
},
},
{ {
input: `foo{a="b", foo!="bar", test=~"test", bar!~"baz",}`, input: `foo{a="b", foo!="bar", test=~"test", bar!~"baz",}`,
expected: &VectorSelector{ expected: &VectorSelector{
@ -1871,6 +1931,11 @@ var testExpr = []struct {
fail: true, fail: true,
errMsg: `unexpected identifier "lol" in label matching, expected "," or "}"`, errMsg: `unexpected identifier "lol" in label matching, expected "," or "}"`,
}, },
{
input: `foo{"a"=}`,
fail: true,
errMsg: `unexpected "}" in label matching, expected string`,
},
// Test matrix selector. // Test matrix selector.
{ {
input: "test[5s]", input: "test[5s]",

View file

@ -1332,7 +1332,7 @@ func TestScrapeLoopAppend(t *testing.T) {
// Honor Labels should ignore labels with the same name. // Honor Labels should ignore labels with the same name.
title: "Honor Labels", title: "Honor Labels",
honorLabels: true, honorLabels: true,
scrapeLabels: `metric{n1="1" n2="2"} 0`, scrapeLabels: `metric{n1="1", n2="2"} 0`,
discoveryLabels: []string{"n1", "0"}, discoveryLabels: []string{"n1", "0"},
expLset: labels.FromStrings("__name__", "metric", "n1", "1", "n2", "2"), expLset: labels.FromStrings("__name__", "metric", "n1", "1", "n2", "2"),
expValue: 0, expValue: 0,
@ -1406,7 +1406,7 @@ func TestScrapeLoopAppendForConflictingPrefixedLabels(t *testing.T) {
}, },
"One target label collides with existing label, plus existing label already with prefix 'exported": { "One target label collides with existing label, plus existing label already with prefix 'exported": {
targetLabels: []string{"foo", "3"}, targetLabels: []string{"foo", "3"},
exposedLabels: `metric{foo="1" exported_foo="2"} 0`, exposedLabels: `metric{foo="1", exported_foo="2"} 0`,
expected: []string{"__name__", "metric", "exported_exported_foo", "1", "exported_foo", "2", "foo", "3"}, expected: []string{"__name__", "metric", "exported_exported_foo", "1", "exported_foo", "2", "foo", "3"},
}, },
"One target label collides with existing label, both already with prefix 'exported'": { "One target label collides with existing label, both already with prefix 'exported'": {
@ -1416,7 +1416,7 @@ func TestScrapeLoopAppendForConflictingPrefixedLabels(t *testing.T) {
}, },
"Two target labels collide with existing labels, both with and without prefix 'exported'": { "Two target labels collide with existing labels, both with and without prefix 'exported'": {
targetLabels: []string{"foo", "3", "exported_foo", "4"}, targetLabels: []string{"foo", "3", "exported_foo", "4"},
exposedLabels: `metric{foo="1" exported_foo="2"} 0`, exposedLabels: `metric{foo="1", exported_foo="2"} 0`,
expected: []string{ expected: []string{
"__name__", "metric", "exported_exported_foo", "1", "exported_exported_exported_foo", "__name__", "metric", "exported_exported_foo", "1", "exported_exported_exported_foo",
"2", "exported_foo", "4", "foo", "3", "2", "exported_foo", "4", "foo", "3",
@ -1424,7 +1424,7 @@ func TestScrapeLoopAppendForConflictingPrefixedLabels(t *testing.T) {
}, },
"Extreme example": { "Extreme example": {
targetLabels: []string{"foo", "0", "exported_exported_foo", "1", "exported_exported_exported_foo", "2"}, targetLabels: []string{"foo", "0", "exported_exported_foo", "1", "exported_exported_exported_foo", "2"},
exposedLabels: `metric{foo="3" exported_foo="4" exported_exported_exported_foo="5"} 0`, exposedLabels: `metric{foo="3", exported_foo="4", exported_exported_exported_foo="5"} 0`,
expected: []string{ expected: []string{
"__name__", "metric", "__name__", "metric",
"exported_exported_exported_exported_exported_foo", "5", "exported_exported_exported_exported_exported_foo", "5",

View file

@ -81,7 +81,7 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNoContent) w.WriteHeader(http.StatusNoContent)
} }
// checkAppendExemplarError modifies the AppendExamplar's returned error based on the error cause. // checkAppendExemplarError modifies the AppendExemplar's returned error based on the error cause.
func (h *writeHandler) checkAppendExemplarError(err error, e exemplar.Exemplar, outOfOrderErrs *int) error { func (h *writeHandler) checkAppendExemplarError(err error, e exemplar.Exemplar, outOfOrderErrs *int) error {
unwrappedErr := errors.Unwrap(err) unwrappedErr := errors.Unwrap(err)
if unwrappedErr == nil { if unwrappedErr == nil {

View file

@ -189,8 +189,9 @@ Loop:
) )
for _, s := range vec { for _, s := range vec {
isHistogram := s.H != nil isHistogram := s.H != nil
formatType := format.FormatType()
if isHistogram && if isHistogram &&
format != expfmt.FmtProtoDelim && format != expfmt.FmtProtoText && format != expfmt.FmtProtoCompact { formatType != expfmt.TypeProtoDelim && formatType != expfmt.TypeProtoText && formatType != expfmt.TypeProtoCompact {
// Can't serve the native histogram. // Can't serve the native histogram.
// TODO(codesome): Serve them when other protocols get the native histogram support. // TODO(codesome): Serve them when other protocols get the native histogram support.
continue continue