feat: advance parser and parse histograms + summary

Signed-off-by: Manik Rana <manikrana54@gmail.com>
This commit is contained in:
Manik Rana 2024-07-09 20:16:20 +05:30
parent ea9c8cb5a3
commit 96e850df7e
4 changed files with 313 additions and 208 deletions

View file

@ -31,6 +31,23 @@ func (ls Labels) Len() int { return len(ls) }
func (ls Labels) Swap(i, j int) { ls[i], ls[j] = ls[j], ls[i] }
func (ls Labels) Less(i, j int) bool { return ls[i].Name < ls[j].Name }
func (l Labels) ExtractNames() []string {
names := make([]string, len(l))
for i, label := range l {
names[i] = label.Name
}
return names
}
func (l Labels) Contains(name string) bool {
for _, label := range l {
if label.Name == name {
return true
}
}
return false
}
// Bytes returns ls as a byte slice.
// It uses an byte invalid character as a separator and so should not be used for printing.
func (ls Labels) Bytes(buf []byte) []byte {

View file

@ -95,6 +95,7 @@ type OpenMetricsParser struct {
exemplarVal float64
exemplarTs int64
hasExemplarTs bool
skipCT bool
}
// NewOpenMetricsParser returns a new parser of the byte slice.
@ -102,6 +103,7 @@ func NewOpenMetricsParser(b []byte, st *labels.SymbolTable) Parser {
return &OpenMetricsParser{
l: &openMetricsLexer{b: b},
builder: labels.NewScratchBuilderWithSymbolTable(st, 16),
skipCT: true,
}
}
@ -232,6 +234,11 @@ loop:
for {
switch t, _ := newParser.Next(); t {
case EntrySeries:
// TODO: potentially broken? Missing type?
if newParser.mName != p.mName {
return nil
}
// continue instead of return nil until we get new series/labels. can happen for histograms, summaries
// Check _created suffix
var newLbs labels.Labels
@ -241,15 +248,29 @@ loop:
continue
}
// TODO: potentially broken? Missing type?
if newParser.mName != p.mName {
return nil
}
// edge case: if gauge_created of unknown type -> skip parsing
newLbs = newLbs.DropMetricName()
if !labels.Equal(lbs, newLbs) {
return nil
switch p.mtype {
case model.MetricTypeCounter:
if !labels.Equal(lbs, newLbs) {
return nil
}
case model.MetricTypeSummary:
labelDiffs := lbs.MatchLabels(false, newLbs.ExtractNames()...)
if labelDiffs.Len() != 0 {
if !labelDiffs.Contains("quantile") || labelDiffs.Len() != 1 {
return nil
}
}
case model.MetricTypeHistogram:
labelDiffs := lbs.MatchLabels(false, newLbs.ExtractNames()...)
if labelDiffs.Len() != 0 {
if !labelDiffs.Contains("le") || labelDiffs.Len() != 1 {
return nil
}
}
default:
break
}
// TODO: for histograms
@ -259,10 +280,6 @@ loop:
// gauge_created is a metric
ct := int64(newParser.val)
_, err := p.Next()
if err != nil {
return nil
}
return &ct
default:
break loop
@ -312,6 +329,7 @@ func deepCopyParser(p *OpenMetricsParser) OpenMetricsParser {
exemplarVal: p.exemplarVal,
exemplarTs: p.exemplarTs,
hasExemplarTs: p.hasExemplarTs,
skipCT: false,
}
return newParser
}
@ -603,6 +621,13 @@ func (p *OpenMetricsParser) parseMetricSuffix(t token) (Entry, error) {
return EntryInvalid, p.parseError("expected next entry after timestamp", t3)
}
}
var newLbs labels.Labels
p.Metric(&newLbs)
name := newLbs.Get(model.MetricNameLabel)
if strings.HasSuffix(name, "_created") && p.skipCT {
return p.Next()
}
return EntrySeries, nil
}

View file

@ -24,50 +24,68 @@ import (
"github.com/prometheus/prometheus/model/labels"
)
// # HELP go_gc_duration_seconds A summary of the GC invocation durations.
// # TYPE go_gc_duration_seconds summary
// # UNIT go_gc_duration_seconds seconds
// go_gc_duration_seconds{quantile="0"} 4.9351e-05
// go_gc_duration_seconds{quantile="0.25"} 7.424100000000001e-05
// go_gc_duration_seconds{quantile="0.5",a="b"} 8.3835e-05
// # HELP nohelp1
// # HELP help2 escape \ \n \\ \" \x chars
// # UNIT nounit
// go_gc_duration_seconds{quantile="1.0",a="b"} 8.3835e-05
// go_gc_duration_seconds_count 99
// some:aggregate:rate5m{a_b="c"} 1
// # HELP go_goroutines Number of goroutines that currently exist.
// # TYPE go_goroutines gauge
// go_goroutines 33 123.123
// # TYPE hh histogram
// hh_bucket{le="+Inf"} 1
// # TYPE gh gaugehistogram
// gh_bucket{le="+Inf"} 1
// # TYPE hhh histogram
// hhh_bucket{le="+Inf"} 1 # {id="histogram-bucket-test"} 4
// hhh_count 1 # {id="histogram-count-test"} 4
// # TYPE ggh gaugehistogram
// ggh_bucket{le="+Inf"} 1 # {id="gaugehistogram-bucket-test",xx="yy"} 4 123.123
// ggh_count 1 # {id="gaugehistogram-count-test",xx="yy"} 4 123.123
// # TYPE smr_seconds summary
// smr_seconds_count 2.0 # {id="summary-count-test"} 1 123.321
// smr_seconds_sum 42.0 # {id="summary-sum-test"} 1 123.321
// # TYPE ii info
// ii{foo="bar"} 1
// # TYPE ss stateset
// ss{ss="foo"} 1
// ss{ss="bar"} 0
// ss{A="a"} 0
// # TYPE un unknown
// _metric_starting_with_underscore 1
// testmetric{_label_starting_with_underscore="foo"} 1
// testmetric{label="\"bar\""} 1
// In summary: we find created but then p.Next() is called and we end up parsing the bar_created again
// In histogram bucket labels {le="0.0"} end up getting compared to baz_created which is not what we want
// This issue might happen in summaries if quantiles are before _created
// figure out summary order for _created, quantiles etc
func TestOpenMetricsParse(t *testing.T) {
input := `# HELP go_gc_duration_seconds A summary of the GC invocation durations.
# TYPE go_gc_duration_seconds summary
# UNIT go_gc_duration_seconds seconds
go_gc_duration_seconds{quantile="0"} 4.9351e-05
go_gc_duration_seconds{quantile="0.25"} 7.424100000000001e-05
go_gc_duration_seconds{quantile="0.5",a="b"} 8.3835e-05
# HELP nohelp1
# HELP help2 escape \ \n \\ \" \x chars
# UNIT nounit
go_gc_duration_seconds{quantile="1.0",a="b"} 8.3835e-05
go_gc_duration_seconds_count 99
some:aggregate:rate5m{a_b="c"} 1
# HELP go_goroutines Number of goroutines that currently exist.
# TYPE go_goroutines gauge
go_goroutines 33 123.123
# TYPE hh histogram
hh_bucket{le="+Inf"} 1
# TYPE gh gaugehistogram
gh_bucket{le="+Inf"} 1
# TYPE hhh histogram
hhh_bucket{le="+Inf"} 1 # {id="histogram-bucket-test"} 4
hhh_count 1 # {id="histogram-count-test"} 4
# TYPE ggh gaugehistogram
ggh_bucket{le="+Inf"} 1 # {id="gaugehistogram-bucket-test",xx="yy"} 4 123.123
ggh_count 1 # {id="gaugehistogram-count-test",xx="yy"} 4 123.123
# TYPE smr_seconds summary
smr_seconds_count 2.0 # {id="summary-count-test"} 1 123.321
smr_seconds_sum 42.0 # {id="summary-sum-test"} 1 123.321
# TYPE ii info
ii{foo="bar"} 1
# TYPE ss stateset
ss{ss="foo"} 1
ss{ss="bar"} 0
ss{A="a"} 0
# TYPE un unknown
_metric_starting_with_underscore 1
testmetric{_label_starting_with_underscore="foo"} 1
testmetric{label="\"bar\""} 1
# TYPE foo counter
input := `# TYPE foo counter
foo_total 17.0 1520879607.789 # {id="counter-test"} 5
foo_created 1000
foo_total{a="b"} 17.0 1520879607.789 # {id="counter-test"} 5
foo_created{a="b"} 1000`
foo_created{a="b"} 1000
# TYPE bar summary
bar_count 17.0
bar_sum 324789.3
bar{quantile="0.95"} 123.7
bar{quantile="0.99"} 150.0
bar_created 1520430000
# TYPE baz histogram
baz_bucket{le="0.0"} 0
baz_bucket{le="+Inf"} 17
baz_count 17
baz_sum 324789.3
baz_created 1520430000`
input += "\n# HELP metric foo\x00bar"
input += "\nnull_byte_metric{a=\"abc\x00\"} 1"
@ -76,170 +94,215 @@ foo_created{a="b"} 1000`
int64p := func(x int64) *int64 { return &x }
exp := []expectedParse{
// {
// m: "go_gc_duration_seconds",
// help: "A summary of the GC invocation durations.",
// }, {
// m: "go_gc_duration_seconds",
// typ: model.MetricTypeSummary,
// }, {
// m: "go_gc_duration_seconds",
// unit: "seconds",
// }, {
// m: `go_gc_duration_seconds{quantile="0"}`,
// v: 4.9351e-05,
// lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0"),
// }, {
// m: `go_gc_duration_seconds{quantile="0.25"}`,
// v: 7.424100000000001e-05,
// lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.25"),
// }, {
// m: `go_gc_duration_seconds{quantile="0.5",a="b"}`,
// v: 8.3835e-05,
// lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.5", "a", "b"),
// }, {
// m: "nohelp1",
// help: "",
// }, {
// m: "help2",
// help: "escape \\ \n \\ \" \\x chars",
// }, {
// m: "nounit",
// unit: "",
// }, {
// m: `go_gc_duration_seconds{quantile="1.0",a="b"}`,
// v: 8.3835e-05,
// lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "1.0", "a", "b"),
// }, {
// m: `go_gc_duration_seconds_count`,
// v: 99,
// lset: labels.FromStrings("__name__", "go_gc_duration_seconds_count"),
// }, {
// m: `some:aggregate:rate5m{a_b="c"}`,
// v: 1,
// lset: labels.FromStrings("__name__", "some:aggregate:rate5m", "a_b", "c"),
// }, {
// m: "go_goroutines",
// help: "Number of goroutines that currently exist.",
// }, {
// m: "go_goroutines",
// typ: model.MetricTypeGauge,
// }, {
// m: `go_goroutines`,
// v: 33,
// t: int64p(123123),
// lset: labels.FromStrings("__name__", "go_goroutines"),
// }, {
// m: "hh",
// typ: model.MetricTypeHistogram,
// }, {
// m: `hh_bucket{le="+Inf"}`,
// v: 1,
// lset: labels.FromStrings("__name__", "hh_bucket", "le", "+Inf"),
// }, {
// m: "gh",
// typ: model.MetricTypeGaugeHistogram,
// }, {
// m: `gh_bucket{le="+Inf"}`,
// v: 1,
// lset: labels.FromStrings("__name__", "gh_bucket", "le", "+Inf"),
// }, {
// m: "hhh",
// typ: model.MetricTypeHistogram,
// }, {
// m: `hhh_bucket{le="+Inf"}`,
// v: 1,
// lset: labels.FromStrings("__name__", "hhh_bucket", "le", "+Inf"),
// e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "histogram-bucket-test"), Value: 4},
// }, {
// m: `hhh_count`,
// v: 1,
// lset: labels.FromStrings("__name__", "hhh_count"),
// e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "histogram-count-test"), Value: 4},
// }, {
// m: "ggh",
// typ: model.MetricTypeGaugeHistogram,
// }, {
// m: `ggh_bucket{le="+Inf"}`,
// v: 1,
// lset: labels.FromStrings("__name__", "ggh_bucket", "le", "+Inf"),
// e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "gaugehistogram-bucket-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123},
// }, {
// m: `ggh_count`,
// v: 1,
// lset: labels.FromStrings("__name__", "ggh_count"),
// e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "gaugehistogram-count-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123},
// }, {
// m: "smr_seconds",
// typ: model.MetricTypeSummary,
// }, {
// m: `smr_seconds_count`,
// v: 2,
// lset: labels.FromStrings("__name__", "smr_seconds_count"),
// e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "summary-count-test"), Value: 1, HasTs: true, Ts: 123321},
// }, {
// m: `smr_seconds_sum`,
// v: 42,
// lset: labels.FromStrings("__name__", "smr_seconds_sum"),
// e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "summary-sum-test"), Value: 1, HasTs: true, Ts: 123321},
// }, {
// m: "ii",
// typ: model.MetricTypeInfo,
// }, {
// m: `ii{foo="bar"}`,
// v: 1,
// lset: labels.FromStrings("__name__", "ii", "foo", "bar"),
// }, {
// m: "ss",
// typ: model.MetricTypeStateset,
// }, {
// m: `ss{ss="foo"}`,
// v: 1,
// lset: labels.FromStrings("__name__", "ss", "ss", "foo"),
// }, {
// m: `ss{ss="bar"}`,
// v: 0,
// lset: labels.FromStrings("__name__", "ss", "ss", "bar"),
// }, {
// m: `ss{A="a"}`,
// v: 0,
// lset: labels.FromStrings("A", "a", "__name__", "ss"),
// }, {
// m: "un",
// typ: model.MetricTypeUnknown,
// }, {
// m: "_metric_starting_with_underscore",
// v: 1,
// lset: labels.FromStrings("__name__", "_metric_starting_with_underscore"),
// }, {
// m: "testmetric{_label_starting_with_underscore=\"foo\"}",
// v: 1,
// lset: labels.FromStrings("__name__", "testmetric", "_label_starting_with_underscore", "foo"),
// }, {
// m: "testmetric{label=\"\\\"bar\\\"\"}",
// v: 1,
// lset: labels.FromStrings("__name__", "testmetric", "label", `"bar"`),
// }, {
{
m: "go_gc_duration_seconds",
help: "A summary of the GC invocation durations.",
}, {
m: "go_gc_duration_seconds",
typ: model.MetricTypeSummary,
}, {
m: "go_gc_duration_seconds",
unit: "seconds",
}, {
m: `go_gc_duration_seconds{quantile="0"}`,
v: 4.9351e-05,
lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0"),
}, {
m: `go_gc_duration_seconds{quantile="0.25"}`,
v: 7.424100000000001e-05,
lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.25"),
}, {
m: `go_gc_duration_seconds{quantile="0.5",a="b"}`,
v: 8.3835e-05,
lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.5", "a", "b"),
}, {
m: "nohelp1",
help: "",
}, {
m: "help2",
help: "escape \\ \n \\ \" \\x chars",
}, {
m: "nounit",
unit: "",
}, {
m: `go_gc_duration_seconds{quantile="1.0",a="b"}`,
v: 8.3835e-05,
lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "1.0", "a", "b"),
}, {
m: `go_gc_duration_seconds_count`,
v: 99,
lset: labels.FromStrings("__name__", "go_gc_duration_seconds_count"),
}, {
m: `some:aggregate:rate5m{a_b="c"}`,
v: 1,
lset: labels.FromStrings("__name__", "some:aggregate:rate5m", "a_b", "c"),
}, {
m: "go_goroutines",
help: "Number of goroutines that currently exist.",
}, {
m: "go_goroutines",
typ: model.MetricTypeGauge,
}, {
m: `go_goroutines`,
v: 33,
t: int64p(123123),
lset: labels.FromStrings("__name__", "go_goroutines"),
}, {
m: "hh",
typ: model.MetricTypeHistogram,
}, {
m: `hh_bucket{le="+Inf"}`,
v: 1,
lset: labels.FromStrings("__name__", "hh_bucket", "le", "+Inf"),
}, {
m: "gh",
typ: model.MetricTypeGaugeHistogram,
}, {
m: `gh_bucket{le="+Inf"}`,
v: 1,
lset: labels.FromStrings("__name__", "gh_bucket", "le", "+Inf"),
}, {
m: "hhh",
typ: model.MetricTypeHistogram,
}, {
m: `hhh_bucket{le="+Inf"}`,
v: 1,
lset: labels.FromStrings("__name__", "hhh_bucket", "le", "+Inf"),
e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "histogram-bucket-test"), Value: 4},
}, {
m: `hhh_count`,
v: 1,
lset: labels.FromStrings("__name__", "hhh_count"),
e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "histogram-count-test"), Value: 4},
}, {
m: "ggh",
typ: model.MetricTypeGaugeHistogram,
}, {
m: `ggh_bucket{le="+Inf"}`,
v: 1,
lset: labels.FromStrings("__name__", "ggh_bucket", "le", "+Inf"),
e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "gaugehistogram-bucket-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123},
}, {
m: `ggh_count`,
v: 1,
lset: labels.FromStrings("__name__", "ggh_count"),
e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "gaugehistogram-count-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123},
}, {
m: "smr_seconds",
typ: model.MetricTypeSummary,
}, {
m: `smr_seconds_count`,
v: 2,
lset: labels.FromStrings("__name__", "smr_seconds_count"),
e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "summary-count-test"), Value: 1, HasTs: true, Ts: 123321},
}, {
m: `smr_seconds_sum`,
v: 42,
lset: labels.FromStrings("__name__", "smr_seconds_sum"),
e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "summary-sum-test"), Value: 1, HasTs: true, Ts: 123321},
}, {
m: "ii",
typ: model.MetricTypeInfo,
}, {
m: `ii{foo="bar"}`,
v: 1,
lset: labels.FromStrings("__name__", "ii", "foo", "bar"),
}, {
m: "ss",
typ: model.MetricTypeStateset,
}, {
m: `ss{ss="foo"}`,
v: 1,
lset: labels.FromStrings("__name__", "ss", "ss", "foo"),
}, {
m: `ss{ss="bar"}`,
v: 0,
lset: labels.FromStrings("__name__", "ss", "ss", "bar"),
}, {
m: `ss{A="a"}`,
v: 0,
lset: labels.FromStrings("A", "a", "__name__", "ss"),
}, {
m: "un",
typ: model.MetricTypeUnknown,
}, {
m: "_metric_starting_with_underscore",
v: 1,
lset: labels.FromStrings("__name__", "_metric_starting_with_underscore"),
}, {
m: "testmetric{_label_starting_with_underscore=\"foo\"}",
v: 1,
lset: labels.FromStrings("__name__", "testmetric", "_label_starting_with_underscore", "foo"),
}, {
m: "testmetric{label=\"\\\"bar\\\"\"}",
v: 1,
lset: labels.FromStrings("__name__", "testmetric", "label", `"bar"`),
}, {
m: "foo",
typ: model.MetricTypeCounter,
}, {
},
{
m: "foo_total",
v: 17,
lset: labels.FromStrings("__name__", "foo_total"),
t: int64p(1520879607789),
e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "counter-test"), Value: 5},
ct: int64p(1000),
},{
m: `foo_total{a="b"}`,
v: 17.0,
ct: int64p(1000),
}, {
m: `foo_total{a="b"}`,
v: 17.0,
lset: labels.FromStrings("__name__", "foo_total", "a", "b"),
t: int64p(1520879607789),
e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "counter-test"), Value: 5},
ct: int64p(1000),
},{
m: "rpc_durations_histogram_seconds_bucket",
ct: int64p(1000),
},{
t: int64p(1520879607789),
e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "counter-test"), Value: 5},
ct: int64p(1000),
}, {
m: "bar",
typ: model.MetricTypeSummary,
}, {
m: "bar_count",
v: 17.0,
lset: labels.FromStrings("__name__", "bar_count"),
ct: int64p(1520430000),
}, {
m: "bar_sum",
v: 324789.3,
lset: labels.FromStrings("__name__", "bar_sum"),
ct: int64p(1520430000),
}, {
m: `bar{quantile="0.95"}`,
v: 123.7,
lset: labels.FromStrings("__name__", "bar", "quantile", "0.95"),
ct: int64p(1520430000),
}, {
m: `bar{quantile="0.99"}`,
v: 150.0,
lset: labels.FromStrings("__name__", "bar", "quantile", "0.99"),
ct: int64p(1520430000),
}, {
m: "baz",
typ: model.MetricTypeHistogram,
}, {
m: `baz_bucket{le="0.0"}`,
v: 0,
lset: labels.FromStrings("__name__", "baz_bucket", "le", "0.0"),
ct: int64p(1520430000),
}, {
m: `baz_bucket{le="+Inf"}`,
v: 17,
lset: labels.FromStrings("__name__", "baz_bucket", "le", "+Inf"),
ct: int64p(1520430000),
}, {
m: `baz_count`,
v: 17,
lset: labels.FromStrings("__name__", "baz_count"),
ct: int64p(1520430000),
}, {
m: `baz_sum`,
v: 324789.3,
lset: labels.FromStrings("__name__", "baz_sum"),
ct: int64p(1520430000),
}, {
m: "metric",
help: "foo\x00bar",
}, {
@ -697,4 +760,4 @@ func TestOMNullByteHandling(t *testing.T) {
require.Equal(t, c.err, err.Error(), "test %d", i)
}
}
}

View file

@ -41,7 +41,7 @@ type expectedParse struct {
unit string
comment string
e *exemplar.Exemplar
ct *int64
ct *int64
}
func TestPromParse(t *testing.T) {